Roll boringssl/src fc9c67599..76918d016

Also roll_boringssl.py script is also adjusted to work with latest
boringssl.

Disable ASM compilation on Windows x64 which fails to compile. It also
means there are no HW AES on Windows x64.

Issue #198.

Change-Id: Ib7e8ff506f014c8c733f1882eeeddbe34fa28511
This commit is contained in:
KongQun Yang 2019-10-22 11:06:32 -07:00
parent 0342adb132
commit c80f053ba2
163 changed files with 65715 additions and 20886 deletions

2
DEPS
View File

@ -43,7 +43,7 @@ deps = {
# Make sure the version matches the one in
# src/packager/third_party/boringssl, which contains perl generated files.
"src/packager/third_party/boringssl/src":
Var("github") + "/google/boringssl@fc9c67599d9bdeb2e0467085133b81a8e28f77a4",
Var("github") + "/google/boringssl@76918d016414bf1d71a86d28239566fbcf8aacf0",
"src/packager/third_party/curl/source":
Var("github") + "/curl/curl@62c07b5743490ce373910f469abc8cdc759bec2b", #7.57.0

View File

@ -57,16 +57,18 @@ crypto_sources = [
"src/crypto/bytestring/cbb.c",
"src/crypto/bytestring/cbs.c",
"src/crypto/bytestring/internal.h",
"src/crypto/bytestring/unicode.c",
"src/crypto/chacha/chacha.c",
"src/crypto/chacha/internal.h",
"src/crypto/cipher_extra/cipher_extra.c",
"src/crypto/cipher_extra/derive_key.c",
"src/crypto/cipher_extra/e_aesccm.c",
"src/crypto/cipher_extra/e_aesctrhmac.c",
"src/crypto/cipher_extra/e_aesgcmsiv.c",
"src/crypto/cipher_extra/e_chacha20poly1305.c",
"src/crypto/cipher_extra/e_null.c",
"src/crypto/cipher_extra/e_rc2.c",
"src/crypto/cipher_extra/e_rc4.c",
"src/crypto/cipher_extra/e_ssl3.c",
"src/crypto/cipher_extra/e_tls.c",
"src/crypto/cipher_extra/internal.h",
"src/crypto/cipher_extra/tls_cbc.c",
@ -74,14 +76,15 @@ crypto_sources = [
"src/crypto/conf/conf.c",
"src/crypto/conf/conf_def.h",
"src/crypto/conf/internal.h",
"src/crypto/cpu-aarch64-fuchsia.c",
"src/crypto/cpu-aarch64-linux.c",
"src/crypto/cpu-arm-linux.c",
"src/crypto/cpu-arm-linux.h",
"src/crypto/cpu-arm.c",
"src/crypto/cpu-intel.c",
"src/crypto/cpu-ppc64le.c",
"src/crypto/crypto.c",
"src/crypto/curve25519/spake25519.c",
"src/crypto/curve25519/x25519-x86_64.c",
"src/crypto/dh/check.c",
"src/crypto/dh/dh.c",
"src/crypto/dh/dh_asn1.c",
@ -90,7 +93,8 @@ crypto_sources = [
"src/crypto/dsa/dsa.c",
"src/crypto/dsa/dsa_asn1.c",
"src/crypto/ec_extra/ec_asn1.c",
"src/crypto/ecdh/ecdh.c",
"src/crypto/ec_extra/ec_derive.c",
"src/crypto/ecdh_extra/ecdh_extra.c",
"src/crypto/ecdsa_extra/ecdsa_asn1.c",
"src/crypto/engine/engine.c",
"src/crypto/err/err.c",
@ -107,6 +111,8 @@ crypto_sources = [
"src/crypto/evp/p_ed25519_asn1.c",
"src/crypto/evp/p_rsa.c",
"src/crypto/evp/p_rsa_asn1.c",
"src/crypto/evp/p_x25519.c",
"src/crypto/evp/p_x25519_asn1.c",
"src/crypto/evp/pbkdf.c",
"src/crypto/evp/print.c",
"src/crypto/evp/scrypt.c",
@ -124,11 +130,17 @@ crypto_sources = [
"src/crypto/fipsmodule/ec/internal.h",
"src/crypto/fipsmodule/ec/p256-x86_64-table.h",
"src/crypto/fipsmodule/ec/p256-x86_64.h",
"src/crypto/fipsmodule/fips_shared_support.c",
"src/crypto/fipsmodule/is_fips.c",
"src/crypto/fipsmodule/md5/internal.h",
"src/crypto/fipsmodule/modes/internal.h",
"src/crypto/fipsmodule/rand/internal.h",
"src/crypto/fipsmodule/rsa/internal.h",
"src/crypto/fipsmodule/sha/internal.h",
"src/crypto/fipsmodule/tls/internal.h",
"src/crypto/hkdf/hkdf.c",
"src/crypto/hrss/hrss.c",
"src/crypto/hrss/internal.h",
"src/crypto/internal.h",
"src/crypto/lhash/lhash.c",
"src/crypto/mem.c",
@ -165,6 +177,8 @@ crypto_sources = [
"src/crypto/refcount_c11.c",
"src/crypto/refcount_lock.c",
"src/crypto/rsa_extra/rsa_asn1.c",
"src/crypto/rsa_extra/rsa_print.c",
"src/crypto/siphash/siphash.c",
"src/crypto/stack/stack.c",
"src/crypto/thread.c",
"src/crypto/thread_none.c",
@ -223,6 +237,7 @@ crypto_sources = [
"src/crypto/x509/x_x509.c",
"src/crypto/x509/x_x509a.c",
"src/crypto/x509v3/ext_dat.h",
"src/crypto/x509v3/internal.h",
"src/crypto/x509v3/pcy_cache.c",
"src/crypto/x509v3/pcy_data.c",
"src/crypto/x509v3/pcy_int.h",
@ -246,6 +261,7 @@ crypto_sources = [
"src/crypto/x509v3/v3_int.c",
"src/crypto/x509v3/v3_lib.c",
"src/crypto/x509v3/v3_ncons.c",
"src/crypto/x509v3/v3_ocsp.c",
"src/crypto/x509v3/v3_pci.c",
"src/crypto/x509v3/v3_pcia.c",
"src/crypto/x509v3/v3_pcons.c",
@ -256,6 +272,25 @@ crypto_sources = [
"src/crypto/x509v3/v3_skey.c",
"src/crypto/x509v3/v3_sxnet.c",
"src/crypto/x509v3/v3_utl.c",
"src/third_party/fiat/curve25519.c",
"src/third_party/fiat/curve25519_32.h",
"src/third_party/fiat/curve25519_64.h",
"src/third_party/fiat/curve25519_tables.h",
"src/third_party/fiat/internal.h",
"src/third_party/fiat/p256_32.h",
"src/third_party/fiat/p256_64.h",
"src/third_party/sike/asm/fp_generic.c",
"src/third_party/sike/curve_params.c",
"src/third_party/sike/fpx.c",
"src/third_party/sike/fpx.h",
"src/third_party/sike/isogeny.c",
"src/third_party/sike/isogeny.h",
"src/third_party/sike/sike.c",
"src/third_party/sike/sike.h",
"src/third_party/sike/utils.h",
]
crypto_headers = [
"src/include/openssl/aead.h",
"src/include/openssl/aes.h",
"src/include/openssl/arm_arch.h",
@ -282,6 +317,7 @@ crypto_sources = [
"src/include/openssl/dh.h",
"src/include/openssl/digest.h",
"src/include/openssl/dsa.h",
"src/include/openssl/e_os2.h",
"src/include/openssl/ec.h",
"src/include/openssl/ec_key.h",
"src/include/openssl/ecdh.h",
@ -292,9 +328,9 @@ crypto_sources = [
"src/include/openssl/ex_data.h",
"src/include/openssl/hkdf.h",
"src/include/openssl/hmac.h",
"src/include/openssl/hrss.h",
"src/include/openssl/is_boringssl.h",
"src/include/openssl/lhash.h",
"src/include/openssl/lhash_macros.h",
"src/include/openssl/md4.h",
"src/include/openssl/md5.h",
"src/include/openssl/mem.h",
@ -317,31 +353,25 @@ crypto_sources = [
"src/include/openssl/rsa.h",
"src/include/openssl/safestack.h",
"src/include/openssl/sha.h",
"src/include/openssl/siphash.h",
"src/include/openssl/span.h",
"src/include/openssl/srtp.h",
"src/include/openssl/stack.h",
"src/include/openssl/thread.h",
"src/include/openssl/type_check.h",
"src/include/openssl/x509.h",
"src/include/openssl/x509_vfy.h",
"src/include/openssl/x509v3.h",
"src/third_party/fiat/curve25519.c",
"src/third_party/fiat/internal.h",
]
ssl_sources = [
"src/include/openssl/dtls1.h",
"src/include/openssl/ssl.h",
"src/include/openssl/ssl3.h",
"src/include/openssl/tls1.h",
"src/ssl/bio_ssl.cc",
"src/ssl/custom_extensions.cc",
"src/ssl/d1_both.cc",
"src/ssl/d1_lib.cc",
"src/ssl/d1_pkt.cc",
"src/ssl/d1_srtp.cc",
"src/ssl/dtls_method.cc",
"src/ssl/dtls_record.cc",
"src/ssl/handoff.cc",
"src/ssl/handshake.cc",
"src/ssl/handshake_client.cc",
"src/ssl/handshake_server.cc",
@ -373,14 +403,26 @@ ssl_sources = [
"src/ssl/tls_record.cc",
]
ssl_headers = [
"src/include/openssl/dtls1.h",
"src/include/openssl/srtp.h",
"src/include/openssl/ssl.h",
"src/include/openssl/ssl3.h",
"src/include/openssl/tls1.h",
]
crypto_sources_ios_aarch64 = [
"ios-aarch64/crypto/chacha/chacha-armv8.S",
"ios-aarch64/crypto/fipsmodule/aesv8-armx64.S",
"ios-aarch64/crypto/fipsmodule/armv8-mont.S",
"ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S",
"ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S",
"ios-aarch64/crypto/fipsmodule/sha1-armv8.S",
"ios-aarch64/crypto/fipsmodule/sha256-armv8.S",
"ios-aarch64/crypto/fipsmodule/sha512-armv8.S",
"ios-aarch64/crypto/fipsmodule/vpaes-armv8.S",
"ios-aarch64/crypto/test/trampoline-armv8.S",
"ios-aarch64/crypto/third_party/sike/asm/fp-armv8.S",
]
crypto_sources_ios_arm = [
@ -394,16 +436,22 @@ crypto_sources_ios_arm = [
"ios-arm/crypto/fipsmodule/sha1-armv4-large.S",
"ios-arm/crypto/fipsmodule/sha256-armv4.S",
"ios-arm/crypto/fipsmodule/sha512-armv4.S",
"ios-arm/crypto/fipsmodule/vpaes-armv7.S",
"ios-arm/crypto/test/trampoline-armv4.S",
]
crypto_sources_linux_aarch64 = [
"linux-aarch64/crypto/chacha/chacha-armv8.S",
"linux-aarch64/crypto/fipsmodule/aesv8-armx64.S",
"linux-aarch64/crypto/fipsmodule/armv8-mont.S",
"linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S",
"linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S",
"linux-aarch64/crypto/fipsmodule/sha1-armv8.S",
"linux-aarch64/crypto/fipsmodule/sha256-armv8.S",
"linux-aarch64/crypto/fipsmodule/sha512-armv8.S",
"linux-aarch64/crypto/fipsmodule/vpaes-armv8.S",
"linux-aarch64/crypto/test/trampoline-armv8.S",
"linux-aarch64/crypto/third_party/sike/asm/fp-armv8.S",
]
crypto_sources_linux_arm = [
@ -417,6 +465,8 @@ crypto_sources_linux_arm = [
"linux-arm/crypto/fipsmodule/sha1-armv4-large.S",
"linux-arm/crypto/fipsmodule/sha256-armv4.S",
"linux-arm/crypto/fipsmodule/sha512-armv4.S",
"linux-arm/crypto/fipsmodule/vpaes-armv7.S",
"linux-arm/crypto/test/trampoline-armv4.S",
"src/crypto/curve25519/asm/x25519-asm-arm.S",
"src/crypto/poly1305/poly1305_arm_asm.S",
]
@ -432,6 +482,7 @@ crypto_sources_linux_x86 = [
"linux-x86/crypto/fipsmodule/aesni-x86.S",
"linux-x86/crypto/fipsmodule/bn-586.S",
"linux-x86/crypto/fipsmodule/co-586.S",
"linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S",
"linux-x86/crypto/fipsmodule/ghash-x86.S",
"linux-x86/crypto/fipsmodule/md5-586.S",
"linux-x86/crypto/fipsmodule/sha1-586.S",
@ -439,6 +490,7 @@ crypto_sources_linux_x86 = [
"linux-x86/crypto/fipsmodule/sha512-586.S",
"linux-x86/crypto/fipsmodule/vpaes-x86.S",
"linux-x86/crypto/fipsmodule/x86-mont.S",
"linux-x86/crypto/test/trampoline-x86.S",
]
crypto_sources_linux_x86_64 = [
@ -448,10 +500,11 @@ crypto_sources_linux_x86_64 = [
"linux-x86_64/crypto/fipsmodule/aes-x86_64.S",
"linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S",
"linux-x86_64/crypto/fipsmodule/aesni-x86_64.S",
"linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S",
"linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S",
"linux-x86_64/crypto/fipsmodule/ghash-x86_64.S",
"linux-x86_64/crypto/fipsmodule/md5-x86_64.S",
"linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S",
"linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S",
"linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S",
"linux-x86_64/crypto/fipsmodule/rsaz-avx2.S",
"linux-x86_64/crypto/fipsmodule/sha1-x86_64.S",
@ -460,7 +513,9 @@ crypto_sources_linux_x86_64 = [
"linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S",
"linux-x86_64/crypto/fipsmodule/x86_64-mont.S",
"linux-x86_64/crypto/fipsmodule/x86_64-mont5.S",
"src/crypto/curve25519/asm/x25519-asm-x86_64.S",
"linux-x86_64/crypto/test/trampoline-x86_64.S",
"linux-x86_64/crypto/third_party/sike/asm/fp-x86_64.S",
"src/crypto/hrss/asm/poly_rq_mul.S",
]
crypto_sources_mac_x86 = [
@ -469,6 +524,7 @@ crypto_sources_mac_x86 = [
"mac-x86/crypto/fipsmodule/aesni-x86.S",
"mac-x86/crypto/fipsmodule/bn-586.S",
"mac-x86/crypto/fipsmodule/co-586.S",
"mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S",
"mac-x86/crypto/fipsmodule/ghash-x86.S",
"mac-x86/crypto/fipsmodule/md5-586.S",
"mac-x86/crypto/fipsmodule/sha1-586.S",
@ -476,6 +532,7 @@ crypto_sources_mac_x86 = [
"mac-x86/crypto/fipsmodule/sha512-586.S",
"mac-x86/crypto/fipsmodule/vpaes-x86.S",
"mac-x86/crypto/fipsmodule/x86-mont.S",
"mac-x86/crypto/test/trampoline-x86.S",
]
crypto_sources_mac_x86_64 = [
@ -485,10 +542,11 @@ crypto_sources_mac_x86_64 = [
"mac-x86_64/crypto/fipsmodule/aes-x86_64.S",
"mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S",
"mac-x86_64/crypto/fipsmodule/aesni-x86_64.S",
"mac-x86_64/crypto/fipsmodule/bsaes-x86_64.S",
"mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S",
"mac-x86_64/crypto/fipsmodule/ghash-x86_64.S",
"mac-x86_64/crypto/fipsmodule/md5-x86_64.S",
"mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S",
"mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S",
"mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S",
"mac-x86_64/crypto/fipsmodule/rsaz-avx2.S",
"mac-x86_64/crypto/fipsmodule/sha1-x86_64.S",
@ -497,7 +555,8 @@ crypto_sources_mac_x86_64 = [
"mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S",
"mac-x86_64/crypto/fipsmodule/x86_64-mont.S",
"mac-x86_64/crypto/fipsmodule/x86_64-mont5.S",
"src/crypto/curve25519/asm/x25519-asm-x86_64.S",
"mac-x86_64/crypto/test/trampoline-x86_64.S",
"mac-x86_64/crypto/third_party/sike/asm/fp-x86_64.S",
]
crypto_sources_win_x86 = [
@ -506,6 +565,7 @@ crypto_sources_win_x86 = [
"win-x86/crypto/fipsmodule/aesni-x86.asm",
"win-x86/crypto/fipsmodule/bn-586.asm",
"win-x86/crypto/fipsmodule/co-586.asm",
"win-x86/crypto/fipsmodule/ghash-ssse3-x86.asm",
"win-x86/crypto/fipsmodule/ghash-x86.asm",
"win-x86/crypto/fipsmodule/md5-586.asm",
"win-x86/crypto/fipsmodule/sha1-586.asm",
@ -513,6 +573,7 @@ crypto_sources_win_x86 = [
"win-x86/crypto/fipsmodule/sha512-586.asm",
"win-x86/crypto/fipsmodule/vpaes-x86.asm",
"win-x86/crypto/fipsmodule/x86-mont.asm",
"win-x86/crypto/test/trampoline-x86.asm",
]
crypto_sources_win_x86_64 = [
@ -522,10 +583,11 @@ crypto_sources_win_x86_64 = [
"win-x86_64/crypto/fipsmodule/aes-x86_64.asm",
"win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm",
"win-x86_64/crypto/fipsmodule/aesni-x86_64.asm",
"win-x86_64/crypto/fipsmodule/bsaes-x86_64.asm",
"win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm",
"win-x86_64/crypto/fipsmodule/ghash-x86_64.asm",
"win-x86_64/crypto/fipsmodule/md5-x86_64.asm",
"win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm",
"win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.asm",
"win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm",
"win-x86_64/crypto/fipsmodule/rsaz-avx2.asm",
"win-x86_64/crypto/fipsmodule/sha1-x86_64.asm",
@ -534,15 +596,19 @@ crypto_sources_win_x86_64 = [
"win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm",
"win-x86_64/crypto/fipsmodule/x86_64-mont.asm",
"win-x86_64/crypto/fipsmodule/x86_64-mont5.asm",
"win-x86_64/crypto/test/trampoline-x86_64.asm",
"win-x86_64/crypto/third_party/sike/asm/fp-x86_64.asm",
]
fuzzers = [
"arm_cpuinfo",
"bn_div",
"bn_mod_exp",
"cert",
"client",
"dtls_client",
"dtls_server",
"pkcs12",
"pkcs8",
"privkey",
"read_pem",

View File

@ -5,21 +5,28 @@
# This file is created by generate_build_files.py. Do not edit manually.
test_support_sources = [
"src/crypto/test/abi_test.h",
"src/crypto/test/file_test.cc",
"src/crypto/test/file_test.h",
"src/crypto/test/gtest_main.h",
"src/crypto/test/malloc.cc",
"src/crypto/test/test_util.cc",
"src/crypto/test/test_util.h",
"src/crypto/test/wycheproof_util.cc",
"src/crypto/test/wycheproof_util.h",
"src/ssl/test/async_bio.h",
"src/ssl/test/fuzzer.h",
"src/ssl/test/fuzzer_tags.h",
"src/ssl/test/handshake_util.h",
"src/ssl/test/packeted_bio.h",
"src/ssl/test/settings_writer.h",
"src/ssl/test/test_config.h",
"src/ssl/test/test_state.h",
]
crypto_test_sources = [
"crypto_test_data.cc",
"src/crypto/abi_self_test.cc",
"src/crypto/asn1/asn1_test.cc",
"src/crypto/base64/base64_test.cc",
"src/crypto/bio/bio_test.cc",
@ -31,13 +38,14 @@ crypto_test_sources = [
"src/crypto/cmac/cmac_test.cc",
"src/crypto/compiler_test.cc",
"src/crypto/constant_time_test.cc",
"src/crypto/cpu-arm-linux_test.cc",
"src/crypto/curve25519/ed25519_test.cc",
"src/crypto/curve25519/spake25519_test.cc",
"src/crypto/curve25519/x25519_test.cc",
"src/crypto/dh/dh_test.cc",
"src/crypto/digest_extra/digest_test.cc",
"src/crypto/dsa/dsa_test.cc",
"src/crypto/ecdh/ecdh_test.cc",
"src/crypto/ecdh_extra/ecdh_test.cc",
"src/crypto/err/err_test.cc",
"src/crypto/evp/evp_extra_test.cc",
"src/crypto/evp/evp_test.cc",
@ -48,29 +56,133 @@ crypto_test_sources = [
"src/crypto/fipsmodule/ec/ec_test.cc",
"src/crypto/fipsmodule/ec/p256-x86_64_test.cc",
"src/crypto/fipsmodule/ecdsa/ecdsa_test.cc",
"src/crypto/fipsmodule/md5/md5_test.cc",
"src/crypto/fipsmodule/modes/gcm_test.cc",
"src/crypto/fipsmodule/rand/ctrdrbg_test.cc",
"src/crypto/fipsmodule/sha/sha_test.cc",
"src/crypto/hkdf/hkdf_test.cc",
"src/crypto/hmac_extra/hmac_test.cc",
"src/crypto/hrss/hrss_test.cc",
"src/crypto/impl_dispatch_test.cc",
"src/crypto/lhash/lhash_test.cc",
"src/crypto/obj/obj_test.cc",
"src/crypto/pem/pem_test.cc",
"src/crypto/pkcs7/pkcs7_test.cc",
"src/crypto/pkcs8/pkcs12_test.cc",
"src/crypto/pkcs8/pkcs8_test.cc",
"src/crypto/poly1305/poly1305_test.cc",
"src/crypto/pool/pool_test.cc",
"src/crypto/rand_extra/rand_test.cc",
"src/crypto/refcount_test.cc",
"src/crypto/rsa_extra/rsa_test.cc",
"src/crypto/self_test.cc",
"src/crypto/siphash/siphash_test.cc",
"src/crypto/stack/stack_test.cc",
"src/crypto/test/abi_test.cc",
"src/crypto/test/file_test_gtest.cc",
"src/crypto/test/gtest_main.cc",
"src/crypto/thread_test.cc",
"src/crypto/x509/x509_test.cc",
"src/crypto/x509/x509_time_test.cc",
"src/crypto/x509v3/tab_test.cc",
"src/crypto/x509v3/v3name_test.cc",
]
crypto_test_data = [
"src/crypto/cipher_extra/test/aes_128_cbc_sha1_tls_implicit_iv_tests.txt",
"src/crypto/cipher_extra/test/aes_128_cbc_sha1_tls_tests.txt",
"src/crypto/cipher_extra/test/aes_128_cbc_sha256_tls_tests.txt",
"src/crypto/cipher_extra/test/aes_128_ccm_bluetooth_8_tests.txt",
"src/crypto/cipher_extra/test/aes_128_ccm_bluetooth_tests.txt",
"src/crypto/cipher_extra/test/aes_128_ctr_hmac_sha256.txt",
"src/crypto/cipher_extra/test/aes_128_gcm_siv_tests.txt",
"src/crypto/cipher_extra/test/aes_128_gcm_tests.txt",
"src/crypto/cipher_extra/test/aes_192_gcm_tests.txt",
"src/crypto/cipher_extra/test/aes_256_cbc_sha1_tls_implicit_iv_tests.txt",
"src/crypto/cipher_extra/test/aes_256_cbc_sha1_tls_tests.txt",
"src/crypto/cipher_extra/test/aes_256_cbc_sha256_tls_tests.txt",
"src/crypto/cipher_extra/test/aes_256_cbc_sha384_tls_tests.txt",
"src/crypto/cipher_extra/test/aes_256_ctr_hmac_sha256.txt",
"src/crypto/cipher_extra/test/aes_256_gcm_siv_tests.txt",
"src/crypto/cipher_extra/test/aes_256_gcm_tests.txt",
"src/crypto/cipher_extra/test/chacha20_poly1305_tests.txt",
"src/crypto/cipher_extra/test/cipher_tests.txt",
"src/crypto/cipher_extra/test/des_ede3_cbc_sha1_tls_implicit_iv_tests.txt",
"src/crypto/cipher_extra/test/des_ede3_cbc_sha1_tls_tests.txt",
"src/crypto/cipher_extra/test/nist_cavp/aes_128_cbc.txt",
"src/crypto/cipher_extra/test/nist_cavp/aes_128_ctr.txt",
"src/crypto/cipher_extra/test/nist_cavp/aes_128_gcm.txt",
"src/crypto/cipher_extra/test/nist_cavp/aes_192_cbc.txt",
"src/crypto/cipher_extra/test/nist_cavp/aes_192_ctr.txt",
"src/crypto/cipher_extra/test/nist_cavp/aes_256_cbc.txt",
"src/crypto/cipher_extra/test/nist_cavp/aes_256_ctr.txt",
"src/crypto/cipher_extra/test/nist_cavp/aes_256_gcm.txt",
"src/crypto/cipher_extra/test/nist_cavp/tdes_cbc.txt",
"src/crypto/cipher_extra/test/nist_cavp/tdes_ecb.txt",
"src/crypto/cipher_extra/test/xchacha20_poly1305_tests.txt",
"src/crypto/cmac/cavp_3des_cmac_tests.txt",
"src/crypto/cmac/cavp_aes128_cmac_tests.txt",
"src/crypto/cmac/cavp_aes192_cmac_tests.txt",
"src/crypto/cmac/cavp_aes256_cmac_tests.txt",
"src/crypto/curve25519/ed25519_tests.txt",
"src/crypto/ecdh_extra/ecdh_tests.txt",
"src/crypto/evp/evp_tests.txt",
"src/crypto/evp/scrypt_tests.txt",
"src/crypto/fipsmodule/aes/aes_tests.txt",
"src/crypto/fipsmodule/bn/bn_tests.txt",
"src/crypto/fipsmodule/bn/miller_rabin_tests.txt",
"src/crypto/fipsmodule/ec/ec_scalar_base_mult_tests.txt",
"src/crypto/fipsmodule/ec/p256-x86_64_tests.txt",
"src/crypto/fipsmodule/ecdsa/ecdsa_sign_tests.txt",
"src/crypto/fipsmodule/ecdsa/ecdsa_verify_tests.txt",
"src/crypto/fipsmodule/modes/gcm_tests.txt",
"src/crypto/fipsmodule/rand/ctrdrbg_vectors.txt",
"src/crypto/hmac_extra/hmac_tests.txt",
"src/crypto/poly1305/poly1305_tests.txt",
"src/crypto/siphash/siphash_tests.txt",
"src/crypto/x509/many_constraints.pem",
"src/crypto/x509/many_names1.pem",
"src/crypto/x509/many_names2.pem",
"src/crypto/x509/many_names3.pem",
"src/crypto/x509/some_names1.pem",
"src/crypto/x509/some_names2.pem",
"src/crypto/x509/some_names3.pem",
"src/third_party/wycheproof_testvectors/aes_cbc_pkcs5_test.txt",
"src/third_party/wycheproof_testvectors/aes_cmac_test.txt",
"src/third_party/wycheproof_testvectors/aes_gcm_siv_test.txt",
"src/third_party/wycheproof_testvectors/aes_gcm_test.txt",
"src/third_party/wycheproof_testvectors/chacha20_poly1305_test.txt",
"src/third_party/wycheproof_testvectors/dsa_test.txt",
"src/third_party/wycheproof_testvectors/ecdh_secp224r1_test.txt",
"src/third_party/wycheproof_testvectors/ecdh_secp256r1_test.txt",
"src/third_party/wycheproof_testvectors/ecdh_secp384r1_test.txt",
"src/third_party/wycheproof_testvectors/ecdh_secp521r1_test.txt",
"src/third_party/wycheproof_testvectors/ecdsa_secp224r1_sha224_test.txt",
"src/third_party/wycheproof_testvectors/ecdsa_secp224r1_sha256_test.txt",
"src/third_party/wycheproof_testvectors/ecdsa_secp224r1_sha512_test.txt",
"src/third_party/wycheproof_testvectors/ecdsa_secp256r1_sha256_test.txt",
"src/third_party/wycheproof_testvectors/ecdsa_secp256r1_sha512_test.txt",
"src/third_party/wycheproof_testvectors/ecdsa_secp384r1_sha384_test.txt",
"src/third_party/wycheproof_testvectors/ecdsa_secp384r1_sha512_test.txt",
"src/third_party/wycheproof_testvectors/ecdsa_secp521r1_sha512_test.txt",
"src/third_party/wycheproof_testvectors/eddsa_test.txt",
"src/third_party/wycheproof_testvectors/kw_test.txt",
"src/third_party/wycheproof_testvectors/kwp_test.txt",
"src/third_party/wycheproof_testvectors/rsa_pss_2048_sha1_mgf1_20_test.txt",
"src/third_party/wycheproof_testvectors/rsa_pss_2048_sha256_mgf1_0_test.txt",
"src/third_party/wycheproof_testvectors/rsa_pss_2048_sha256_mgf1_32_test.txt",
"src/third_party/wycheproof_testvectors/rsa_pss_3072_sha256_mgf1_32_test.txt",
"src/third_party/wycheproof_testvectors/rsa_pss_4096_sha256_mgf1_32_test.txt",
"src/third_party/wycheproof_testvectors/rsa_pss_4096_sha512_mgf1_32_test.txt",
"src/third_party/wycheproof_testvectors/rsa_pss_misc_test.txt",
"src/third_party/wycheproof_testvectors/rsa_signature_test.txt",
"src/third_party/wycheproof_testvectors/x25519_test.txt",
]
ssl_test_sources = [
"src/crypto/test/abi_test.cc",
"src/crypto/test/gtest_main.cc",
"src/ssl/span_test.cc",
"src/ssl/ssl_c_test.c",
"src/ssl/ssl_test.cc",
]

View File

@ -175,15 +175,10 @@
'sources': [ '<@(boringssl_linux_x86_64_sources)' ],
}],
['OS == "win"', {
'sources': [ '<@(boringssl_win_x86_64_sources)' ],
# Windows' assembly is built with Yasm. The other platforms use
# the platform assembler.
'variables': {
'yasm_output_path': '<(SHARED_INTERMEDIATE_DIR)/third_party/boringssl',
# NOTES(kqyang): Somehow ASM fails to compile. Disable ASM.
'direct_dependent_settings': {
'defines': [ 'OPENSSL_NO_ASM' ],
},
'includes': [
'../yasm/yasm_compile.gypi',
],
}],
['OS != "mac" and OS != "linux" and OS != "win" and OS != "android"', {
'direct_dependent_settings': {

View File

@ -8,17 +8,18 @@
'variables': {
'boringssl_ssl_sources': [
'src/include/openssl/dtls1.h',
'src/include/openssl/srtp.h',
'src/include/openssl/ssl.h',
'src/include/openssl/ssl3.h',
'src/include/openssl/tls1.h',
'src/ssl/bio_ssl.cc',
'src/ssl/custom_extensions.cc',
'src/ssl/d1_both.cc',
'src/ssl/d1_lib.cc',
'src/ssl/d1_pkt.cc',
'src/ssl/d1_srtp.cc',
'src/ssl/dtls_method.cc',
'src/ssl/dtls_record.cc',
'src/ssl/handoff.cc',
'src/ssl/handshake.cc',
'src/ssl/handshake_client.cc',
'src/ssl/handshake_server.cc',
@ -102,16 +103,18 @@
'src/crypto/bytestring/cbb.c',
'src/crypto/bytestring/cbs.c',
'src/crypto/bytestring/internal.h',
'src/crypto/bytestring/unicode.c',
'src/crypto/chacha/chacha.c',
'src/crypto/chacha/internal.h',
'src/crypto/cipher_extra/cipher_extra.c',
'src/crypto/cipher_extra/derive_key.c',
'src/crypto/cipher_extra/e_aesccm.c',
'src/crypto/cipher_extra/e_aesctrhmac.c',
'src/crypto/cipher_extra/e_aesgcmsiv.c',
'src/crypto/cipher_extra/e_chacha20poly1305.c',
'src/crypto/cipher_extra/e_null.c',
'src/crypto/cipher_extra/e_rc2.c',
'src/crypto/cipher_extra/e_rc4.c',
'src/crypto/cipher_extra/e_ssl3.c',
'src/crypto/cipher_extra/e_tls.c',
'src/crypto/cipher_extra/internal.h',
'src/crypto/cipher_extra/tls_cbc.c',
@ -119,14 +122,15 @@
'src/crypto/conf/conf.c',
'src/crypto/conf/conf_def.h',
'src/crypto/conf/internal.h',
'src/crypto/cpu-aarch64-fuchsia.c',
'src/crypto/cpu-aarch64-linux.c',
'src/crypto/cpu-arm-linux.c',
'src/crypto/cpu-arm-linux.h',
'src/crypto/cpu-arm.c',
'src/crypto/cpu-intel.c',
'src/crypto/cpu-ppc64le.c',
'src/crypto/crypto.c',
'src/crypto/curve25519/spake25519.c',
'src/crypto/curve25519/x25519-x86_64.c',
'src/crypto/dh/check.c',
'src/crypto/dh/dh.c',
'src/crypto/dh/dh_asn1.c',
@ -135,7 +139,8 @@
'src/crypto/dsa/dsa.c',
'src/crypto/dsa/dsa_asn1.c',
'src/crypto/ec_extra/ec_asn1.c',
'src/crypto/ecdh/ecdh.c',
'src/crypto/ec_extra/ec_derive.c',
'src/crypto/ecdh_extra/ecdh_extra.c',
'src/crypto/ecdsa_extra/ecdsa_asn1.c',
'src/crypto/engine/engine.c',
'src/crypto/err/err.c',
@ -152,6 +157,8 @@
'src/crypto/evp/p_ed25519_asn1.c',
'src/crypto/evp/p_rsa.c',
'src/crypto/evp/p_rsa_asn1.c',
'src/crypto/evp/p_x25519.c',
'src/crypto/evp/p_x25519_asn1.c',
'src/crypto/evp/pbkdf.c',
'src/crypto/evp/print.c',
'src/crypto/evp/scrypt.c',
@ -169,11 +176,17 @@
'src/crypto/fipsmodule/ec/internal.h',
'src/crypto/fipsmodule/ec/p256-x86_64-table.h',
'src/crypto/fipsmodule/ec/p256-x86_64.h',
'src/crypto/fipsmodule/fips_shared_support.c',
'src/crypto/fipsmodule/is_fips.c',
'src/crypto/fipsmodule/md5/internal.h',
'src/crypto/fipsmodule/modes/internal.h',
'src/crypto/fipsmodule/rand/internal.h',
'src/crypto/fipsmodule/rsa/internal.h',
'src/crypto/fipsmodule/sha/internal.h',
'src/crypto/fipsmodule/tls/internal.h',
'src/crypto/hkdf/hkdf.c',
'src/crypto/hrss/hrss.c',
'src/crypto/hrss/internal.h',
'src/crypto/internal.h',
'src/crypto/lhash/lhash.c',
'src/crypto/mem.c',
@ -210,6 +223,8 @@
'src/crypto/refcount_c11.c',
'src/crypto/refcount_lock.c',
'src/crypto/rsa_extra/rsa_asn1.c',
'src/crypto/rsa_extra/rsa_print.c',
'src/crypto/siphash/siphash.c',
'src/crypto/stack/stack.c',
'src/crypto/thread.c',
'src/crypto/thread_none.c',
@ -268,6 +283,7 @@
'src/crypto/x509/x_x509.c',
'src/crypto/x509/x_x509a.c',
'src/crypto/x509v3/ext_dat.h',
'src/crypto/x509v3/internal.h',
'src/crypto/x509v3/pcy_cache.c',
'src/crypto/x509v3/pcy_data.c',
'src/crypto/x509v3/pcy_int.h',
@ -291,6 +307,7 @@
'src/crypto/x509v3/v3_int.c',
'src/crypto/x509v3/v3_lib.c',
'src/crypto/x509v3/v3_ncons.c',
'src/crypto/x509v3/v3_ocsp.c',
'src/crypto/x509v3/v3_pci.c',
'src/crypto/x509v3/v3_pcia.c',
'src/crypto/x509v3/v3_pcons.c',
@ -327,6 +344,7 @@
'src/include/openssl/dh.h',
'src/include/openssl/digest.h',
'src/include/openssl/dsa.h',
'src/include/openssl/e_os2.h',
'src/include/openssl/ec.h',
'src/include/openssl/ec_key.h',
'src/include/openssl/ecdh.h',
@ -337,9 +355,9 @@
'src/include/openssl/ex_data.h',
'src/include/openssl/hkdf.h',
'src/include/openssl/hmac.h',
'src/include/openssl/hrss.h',
'src/include/openssl/is_boringssl.h',
'src/include/openssl/lhash.h',
'src/include/openssl/lhash_macros.h',
'src/include/openssl/md4.h',
'src/include/openssl/md5.h',
'src/include/openssl/mem.h',
@ -362,8 +380,8 @@
'src/include/openssl/rsa.h',
'src/include/openssl/safestack.h',
'src/include/openssl/sha.h',
'src/include/openssl/siphash.h',
'src/include/openssl/span.h',
'src/include/openssl/srtp.h',
'src/include/openssl/stack.h',
'src/include/openssl/thread.h',
'src/include/openssl/type_check.h',
@ -371,16 +389,34 @@
'src/include/openssl/x509_vfy.h',
'src/include/openssl/x509v3.h',
'src/third_party/fiat/curve25519.c',
'src/third_party/fiat/curve25519_32.h',
'src/third_party/fiat/curve25519_64.h',
'src/third_party/fiat/curve25519_tables.h',
'src/third_party/fiat/internal.h',
'src/third_party/fiat/p256_32.h',
'src/third_party/fiat/p256_64.h',
'src/third_party/sike/asm/fp_generic.c',
'src/third_party/sike/curve_params.c',
'src/third_party/sike/fpx.c',
'src/third_party/sike/fpx.h',
'src/third_party/sike/isogeny.c',
'src/third_party/sike/isogeny.h',
'src/third_party/sike/sike.c',
'src/third_party/sike/sike.h',
'src/third_party/sike/utils.h',
],
'boringssl_ios_aarch64_sources': [
'ios-aarch64/crypto/chacha/chacha-armv8.S',
'ios-aarch64/crypto/fipsmodule/aesv8-armx64.S',
'ios-aarch64/crypto/fipsmodule/armv8-mont.S',
'ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S',
'ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S',
'ios-aarch64/crypto/fipsmodule/sha1-armv8.S',
'ios-aarch64/crypto/fipsmodule/sha256-armv8.S',
'ios-aarch64/crypto/fipsmodule/sha512-armv8.S',
'ios-aarch64/crypto/fipsmodule/vpaes-armv8.S',
'ios-aarch64/crypto/test/trampoline-armv8.S',
'ios-aarch64/crypto/third_party/sike/asm/fp-armv8.S',
],
'boringssl_ios_arm_sources': [
'ios-arm/crypto/chacha/chacha-armv4.S',
@ -393,15 +429,21 @@
'ios-arm/crypto/fipsmodule/sha1-armv4-large.S',
'ios-arm/crypto/fipsmodule/sha256-armv4.S',
'ios-arm/crypto/fipsmodule/sha512-armv4.S',
'ios-arm/crypto/fipsmodule/vpaes-armv7.S',
'ios-arm/crypto/test/trampoline-armv4.S',
],
'boringssl_linux_aarch64_sources': [
'linux-aarch64/crypto/chacha/chacha-armv8.S',
'linux-aarch64/crypto/fipsmodule/aesv8-armx64.S',
'linux-aarch64/crypto/fipsmodule/armv8-mont.S',
'linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S',
'linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S',
'linux-aarch64/crypto/fipsmodule/sha1-armv8.S',
'linux-aarch64/crypto/fipsmodule/sha256-armv8.S',
'linux-aarch64/crypto/fipsmodule/sha512-armv8.S',
'linux-aarch64/crypto/fipsmodule/vpaes-armv8.S',
'linux-aarch64/crypto/test/trampoline-armv8.S',
'linux-aarch64/crypto/third_party/sike/asm/fp-armv8.S',
],
'boringssl_linux_arm_sources': [
'linux-arm/crypto/chacha/chacha-armv4.S',
@ -414,6 +456,8 @@
'linux-arm/crypto/fipsmodule/sha1-armv4-large.S',
'linux-arm/crypto/fipsmodule/sha256-armv4.S',
'linux-arm/crypto/fipsmodule/sha512-armv4.S',
'linux-arm/crypto/fipsmodule/vpaes-armv7.S',
'linux-arm/crypto/test/trampoline-armv4.S',
'src/crypto/curve25519/asm/x25519-asm-arm.S',
'src/crypto/poly1305/poly1305_arm_asm.S',
],
@ -427,6 +471,7 @@
'linux-x86/crypto/fipsmodule/aesni-x86.S',
'linux-x86/crypto/fipsmodule/bn-586.S',
'linux-x86/crypto/fipsmodule/co-586.S',
'linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S',
'linux-x86/crypto/fipsmodule/ghash-x86.S',
'linux-x86/crypto/fipsmodule/md5-586.S',
'linux-x86/crypto/fipsmodule/sha1-586.S',
@ -434,6 +479,7 @@
'linux-x86/crypto/fipsmodule/sha512-586.S',
'linux-x86/crypto/fipsmodule/vpaes-x86.S',
'linux-x86/crypto/fipsmodule/x86-mont.S',
'linux-x86/crypto/test/trampoline-x86.S',
],
'boringssl_linux_x86_64_sources': [
'linux-x86_64/crypto/chacha/chacha-x86_64.S',
@ -442,10 +488,11 @@
'linux-x86_64/crypto/fipsmodule/aes-x86_64.S',
'linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S',
'linux-x86_64/crypto/fipsmodule/aesni-x86_64.S',
'linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S',
'linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S',
'linux-x86_64/crypto/fipsmodule/ghash-x86_64.S',
'linux-x86_64/crypto/fipsmodule/md5-x86_64.S',
'linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S',
'linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S',
'linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S',
'linux-x86_64/crypto/fipsmodule/rsaz-avx2.S',
'linux-x86_64/crypto/fipsmodule/sha1-x86_64.S',
@ -454,7 +501,9 @@
'linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S',
'linux-x86_64/crypto/fipsmodule/x86_64-mont.S',
'linux-x86_64/crypto/fipsmodule/x86_64-mont5.S',
'src/crypto/curve25519/asm/x25519-asm-x86_64.S',
'linux-x86_64/crypto/test/trampoline-x86_64.S',
'linux-x86_64/crypto/third_party/sike/asm/fp-x86_64.S',
'src/crypto/hrss/asm/poly_rq_mul.S',
],
'boringssl_mac_x86_sources': [
'mac-x86/crypto/chacha/chacha-x86.S',
@ -462,6 +511,7 @@
'mac-x86/crypto/fipsmodule/aesni-x86.S',
'mac-x86/crypto/fipsmodule/bn-586.S',
'mac-x86/crypto/fipsmodule/co-586.S',
'mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S',
'mac-x86/crypto/fipsmodule/ghash-x86.S',
'mac-x86/crypto/fipsmodule/md5-586.S',
'mac-x86/crypto/fipsmodule/sha1-586.S',
@ -469,6 +519,7 @@
'mac-x86/crypto/fipsmodule/sha512-586.S',
'mac-x86/crypto/fipsmodule/vpaes-x86.S',
'mac-x86/crypto/fipsmodule/x86-mont.S',
'mac-x86/crypto/test/trampoline-x86.S',
],
'boringssl_mac_x86_64_sources': [
'mac-x86_64/crypto/chacha/chacha-x86_64.S',
@ -477,10 +528,11 @@
'mac-x86_64/crypto/fipsmodule/aes-x86_64.S',
'mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S',
'mac-x86_64/crypto/fipsmodule/aesni-x86_64.S',
'mac-x86_64/crypto/fipsmodule/bsaes-x86_64.S',
'mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S',
'mac-x86_64/crypto/fipsmodule/ghash-x86_64.S',
'mac-x86_64/crypto/fipsmodule/md5-x86_64.S',
'mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S',
'mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S',
'mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S',
'mac-x86_64/crypto/fipsmodule/rsaz-avx2.S',
'mac-x86_64/crypto/fipsmodule/sha1-x86_64.S',
@ -489,7 +541,8 @@
'mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S',
'mac-x86_64/crypto/fipsmodule/x86_64-mont.S',
'mac-x86_64/crypto/fipsmodule/x86_64-mont5.S',
'src/crypto/curve25519/asm/x25519-asm-x86_64.S',
'mac-x86_64/crypto/test/trampoline-x86_64.S',
'mac-x86_64/crypto/third_party/sike/asm/fp-x86_64.S',
],
'boringssl_win_x86_sources': [
'win-x86/crypto/chacha/chacha-x86.asm',
@ -497,6 +550,7 @@
'win-x86/crypto/fipsmodule/aesni-x86.asm',
'win-x86/crypto/fipsmodule/bn-586.asm',
'win-x86/crypto/fipsmodule/co-586.asm',
'win-x86/crypto/fipsmodule/ghash-ssse3-x86.asm',
'win-x86/crypto/fipsmodule/ghash-x86.asm',
'win-x86/crypto/fipsmodule/md5-586.asm',
'win-x86/crypto/fipsmodule/sha1-586.asm',
@ -504,6 +558,7 @@
'win-x86/crypto/fipsmodule/sha512-586.asm',
'win-x86/crypto/fipsmodule/vpaes-x86.asm',
'win-x86/crypto/fipsmodule/x86-mont.asm',
'win-x86/crypto/test/trampoline-x86.asm',
],
'boringssl_win_x86_64_sources': [
'win-x86_64/crypto/chacha/chacha-x86_64.asm',
@ -512,10 +567,11 @@
'win-x86_64/crypto/fipsmodule/aes-x86_64.asm',
'win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm',
'win-x86_64/crypto/fipsmodule/aesni-x86_64.asm',
'win-x86_64/crypto/fipsmodule/bsaes-x86_64.asm',
'win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm',
'win-x86_64/crypto/fipsmodule/ghash-x86_64.asm',
'win-x86_64/crypto/fipsmodule/md5-x86_64.asm',
'win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm',
'win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.asm',
'win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm',
'win-x86_64/crypto/fipsmodule/rsaz-avx2.asm',
'win-x86_64/crypto/fipsmodule/sha1-x86_64.asm',
@ -524,6 +580,8 @@
'win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm',
'win-x86_64/crypto/fipsmodule/x86_64-mont.asm',
'win-x86_64/crypto/fipsmodule/x86_64-mont5.asm',
'win-x86_64/crypto/test/trampoline-x86_64.asm',
'win-x86_64/crypto/third_party/sike/asm/fp-x86_64.asm',
],
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,772 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.section __TEXT,__const
.align 5
Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl _aes_hw_set_encrypt_key
.private_extern _aes_hw_set_encrypt_key
.align 5
_aes_hw_set_encrypt_key:
Lenc_key:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
mov x3,#-1
cmp x0,#0
b.eq Lenc_key_abort
cmp x2,#0
b.eq Lenc_key_abort
mov x3,#-2
cmp w1,#128
b.lt Lenc_key_abort
cmp w1,#256
b.gt Lenc_key_abort
tst w1,#0x3f
b.ne Lenc_key_abort
adrp x3,Lrcon@PAGE
add x3,x3,Lrcon@PAGEOFF
cmp w1,#192
eor v0.16b,v0.16b,v0.16b
ld1 {v3.16b},[x0],#16
mov w1,#8 // reuse w1
ld1 {v1.4s,v2.4s},[x3],#32
b.lt Loop128
b.eq L192
b L256
.align 4
Loop128:
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
b.ne Loop128
ld1 {v1.4s},[x3]
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2]
add x2,x2,#0x50
mov w12,#10
b Ldone
.align 4
L192:
ld1 {v4.8b},[x0],#8
movi v6.16b,#8 // borrow v6.16b
st1 {v3.4s},[x2],#16
sub v2.16b,v2.16b,v6.16b // adjust the mask
Loop192:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.8b},[x2],#8
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
dup v5.4s,v3.s[3]
eor v5.16b,v5.16b,v4.16b
eor v6.16b,v6.16b,v1.16b
ext v4.16b,v0.16b,v4.16b,#12
shl v1.16b,v1.16b,#1
eor v4.16b,v4.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
eor v4.16b,v4.16b,v6.16b
st1 {v3.4s},[x2],#16
b.ne Loop192
mov w12,#12
add x2,x2,#0x20
b Ldone
.align 4
L256:
ld1 {v4.16b},[x0]
mov w1,#7
mov w12,#14
st1 {v3.4s},[x2],#16
Loop256:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2],#16
b.eq Ldone
dup v6.4s,v3.s[3] // just splat
ext v5.16b,v0.16b,v4.16b,#12
aese v6.16b,v0.16b
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
eor v4.16b,v4.16b,v6.16b
b Loop256
Ldone:
str w12,[x2]
mov x3,#0
Lenc_key_abort:
mov x0,x3 // return value
ldr x29,[sp],#16
ret
.globl _aes_hw_set_decrypt_key
.private_extern _aes_hw_set_decrypt_key
.align 5
_aes_hw_set_decrypt_key:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
bl Lenc_key
cmp x0,#0
b.ne Ldec_key_abort
sub x2,x2,#240 // restore original x2
mov x4,#-16
add x0,x2,x12,lsl#4 // end of key schedule
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
Loop_imc:
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
aesimc v0.16b,v0.16b
aesimc v1.16b,v1.16b
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
cmp x0,x2
b.hi Loop_imc
ld1 {v0.4s},[x2]
aesimc v0.16b,v0.16b
st1 {v0.4s},[x0]
eor x0,x0,x0 // return value
Ldec_key_abort:
ldp x29,x30,[sp],#16
ret
.globl _aes_hw_encrypt
.private_extern _aes_hw_encrypt
.align 5
_aes_hw_encrypt:
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
Loop_enc:
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aese v2.16b,v1.16b
aesmc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt Loop_enc
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aese v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.globl _aes_hw_decrypt
.private_extern _aes_hw_decrypt
.align 5
_aes_hw_decrypt:
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
Loop_dec:
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aesd v2.16b,v1.16b
aesimc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt Loop_dec
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aesd v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.globl _aes_hw_cbc_encrypt
.private_extern _aes_hw_cbc_encrypt
.align 5
_aes_hw_cbc_encrypt:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
subs x2,x2,#16
mov x8,#16
b.lo Lcbc_abort
csel x8,xzr,x8,eq
cmp w5,#0 // en- or decrypting?
ldr w5,[x3,#240]
and x2,x2,#-16
ld1 {v6.16b},[x4]
ld1 {v0.16b},[x0],x8
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#6
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
sub w5,w5,#2
ld1 {v18.4s,v19.4s},[x7],#32
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
b.eq Lcbc_dec
cmp w5,#2
eor v0.16b,v0.16b,v6.16b
eor v5.16b,v16.16b,v7.16b
b.eq Lcbc_enc128
ld1 {v2.4s,v3.4s},[x7]
add x7,x3,#16
add x6,x3,#16*4
add x12,x3,#16*5
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
add x14,x3,#16*6
add x3,x3,#16*7
b Lenter_cbc_enc
.align 4
Loop_cbc_enc:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
Lenter_cbc_enc:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x6]
cmp w5,#4
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x12]
b.eq Lcbc_enc192
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x14]
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x3]
nop
Lcbc_enc192:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs Loop_cbc_enc
st1 {v6.16b},[x1],#16
b Lcbc_done
.align 5
Lcbc_enc128:
ld1 {v2.4s,v3.4s},[x7]
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
b Lenter_cbc_enc128
Loop_cbc_enc128:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
Lenter_cbc_enc128:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs Loop_cbc_enc128
st1 {v6.16b},[x1],#16
b Lcbc_done
.align 5
Lcbc_dec:
ld1 {v18.16b},[x0],#16
subs x2,x2,#32 // bias
add w6,w5,#2
orr v3.16b,v0.16b,v0.16b
orr v1.16b,v0.16b,v0.16b
orr v19.16b,v18.16b,v18.16b
b.lo Lcbc_dec_tail
orr v1.16b,v18.16b,v18.16b
ld1 {v18.16b},[x0],#16
orr v2.16b,v0.16b,v0.16b
orr v3.16b,v1.16b,v1.16b
orr v19.16b,v18.16b,v18.16b
Loop3x_cbc_dec:
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt Loop3x_cbc_dec
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
eor v4.16b,v6.16b,v7.16b
subs x2,x2,#0x30
eor v5.16b,v2.16b,v7.16b
csel x6,x2,x6,lo // x6, w6, is zero at this point
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
add x0,x0,x6 // x0 is adjusted in such way that
// at exit from the loop v1.16b-v18.16b
// are loaded with last "words"
orr v6.16b,v19.16b,v19.16b
mov x7,x3
aesd v0.16b,v20.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
ld1 {v2.16b},[x0],#16
aesd v0.16b,v21.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
aesd v0.16b,v22.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
ld1 {v19.16b},[x0],#16
aesd v0.16b,v23.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
add w6,w5,#2
eor v4.16b,v4.16b,v0.16b
eor v5.16b,v5.16b,v1.16b
eor v18.16b,v18.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v4.16b},[x1],#16
orr v0.16b,v2.16b,v2.16b
st1 {v5.16b},[x1],#16
orr v1.16b,v3.16b,v3.16b
st1 {v18.16b},[x1],#16
orr v18.16b,v19.16b,v19.16b
b.hs Loop3x_cbc_dec
cmn x2,#0x30
b.eq Lcbc_done
nop
Lcbc_dec_tail:
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt Lcbc_dec_tail
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
cmn x2,#0x20
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
eor v5.16b,v6.16b,v7.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
b.eq Lcbc_dec_one
eor v5.16b,v5.16b,v1.16b
eor v17.16b,v17.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
st1 {v17.16b},[x1],#16
b Lcbc_done
Lcbc_dec_one:
eor v5.16b,v5.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
Lcbc_done:
st1 {v6.16b},[x4]
Lcbc_abort:
ldr x29,[sp],#16
ret
.globl _aes_hw_ctr32_encrypt_blocks
.private_extern _aes_hw_ctr32_encrypt_blocks
.align 5
_aes_hw_ctr32_encrypt_blocks:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldr w5,[x3,#240]
ldr w8, [x4, #12]
ld1 {v0.4s},[x4]
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#4
mov x12,#16
cmp x2,#2
add x7,x3,x5,lsl#4 // pointer to last 5 round keys
sub w5,w5,#2
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
csel x12,xzr,x12,lo
#ifndef __ARMEB__
rev w8, w8
#endif
orr v1.16b,v0.16b,v0.16b
add w10, w8, #1
orr v18.16b,v0.16b,v0.16b
add w8, w8, #2
orr v6.16b,v0.16b,v0.16b
rev w10, w10
mov v1.s[3],w10
b.ls Lctr32_tail
rev w12, w8
sub x2,x2,#3 // bias
mov v18.s[3],w12
b Loop3x_ctr32
.align 4
Loop3x_ctr32:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
aese v18.16b,v17.16b
aesmc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt Loop3x_ctr32
aese v0.16b,v16.16b
aesmc v4.16b,v0.16b
aese v1.16b,v16.16b
aesmc v5.16b,v1.16b
ld1 {v2.16b},[x0],#16
orr v0.16b,v6.16b,v6.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
orr v1.16b,v6.16b,v6.16b
aese v4.16b,v17.16b
aesmc v4.16b,v4.16b
aese v5.16b,v17.16b
aesmc v5.16b,v5.16b
ld1 {v19.16b},[x0],#16
mov x7,x3
aese v18.16b,v17.16b
aesmc v17.16b,v18.16b
orr v18.16b,v6.16b,v6.16b
add w9,w8,#1
aese v4.16b,v20.16b
aesmc v4.16b,v4.16b
aese v5.16b,v20.16b
aesmc v5.16b,v5.16b
eor v2.16b,v2.16b,v7.16b
add w10,w8,#2
aese v17.16b,v20.16b
aesmc v17.16b,v17.16b
eor v3.16b,v3.16b,v7.16b
add w8,w8,#3
aese v4.16b,v21.16b
aesmc v4.16b,v4.16b
aese v5.16b,v21.16b
aesmc v5.16b,v5.16b
eor v19.16b,v19.16b,v7.16b
rev w9,w9
aese v17.16b,v21.16b
aesmc v17.16b,v17.16b
mov v0.s[3], w9
rev w10,w10
aese v4.16b,v22.16b
aesmc v4.16b,v4.16b
aese v5.16b,v22.16b
aesmc v5.16b,v5.16b
mov v1.s[3], w10
rev w12,w8
aese v17.16b,v22.16b
aesmc v17.16b,v17.16b
mov v18.s[3], w12
subs x2,x2,#3
aese v4.16b,v23.16b
aese v5.16b,v23.16b
aese v17.16b,v23.16b
eor v2.16b,v2.16b,v4.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
st1 {v2.16b},[x1],#16
eor v3.16b,v3.16b,v5.16b
mov w6,w5
st1 {v3.16b},[x1],#16
eor v19.16b,v19.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v19.16b},[x1],#16
b.hs Loop3x_ctr32
adds x2,x2,#3
b.eq Lctr32_done
cmp x2,#1
mov x12,#16
csel x12,xzr,x12,eq
Lctr32_tail:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v17.4s},[x7],#16
b.gt Lctr32_tail
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v2.16b},[x0],x12
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v1.16b,v20.16b
aesmc v1.16b,v1.16b
ld1 {v3.16b},[x0]
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v1.16b,v21.16b
aesmc v1.16b,v1.16b
eor v2.16b,v2.16b,v7.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v1.16b,v22.16b
aesmc v1.16b,v1.16b
eor v3.16b,v3.16b,v7.16b
aese v0.16b,v23.16b
aese v1.16b,v23.16b
cmp x2,#1
eor v2.16b,v2.16b,v0.16b
eor v3.16b,v3.16b,v1.16b
st1 {v2.16b},[x1],#16
b.eq Lctr32_done
st1 {v3.16b},[x1]
Lctr32_done:
ldr x29,[sp],#16
ret
#endif
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,338 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl _gcm_init_neon
.private_extern _gcm_init_neon
.align 4
_gcm_init_neon:
// This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1
shl v19.2d, v19.2d, #57 // 0xc2.0
ext v3.16b, v17.16b, v17.16b, #8
ushr v18.2d, v19.2d, #63
dup v17.4s, v17.s[1]
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
ushr v18.2d, v3.2d, #63
sshr v17.4s, v17.4s, #31 // broadcast carry bit
and v18.16b, v18.16b, v16.16b
shl v3.2d, v3.2d, #1
ext v18.16b, v18.16b, v18.16b, #8
and v16.16b, v16.16b, v17.16b
orr v3.16b, v3.16b, v18.16b // H<<<=1
eor v5.16b, v3.16b, v16.16b // twisted H
st1 {v5.2d}, [x0] // store Htable[0]
ret
.globl _gcm_gmult_neon
.private_extern _gcm_gmult_neon
.align 4
_gcm_gmult_neon:
ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, Lmasks@PAGE // load constants
add x9, x9, Lmasks@PAGEOFF
ld1 {v24.2d, v25.2d}, [x9]
rev64 v3.16b, v3.16b // byteswap Xi
ext v3.16b, v3.16b, v3.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
mov x3, #16
b Lgmult_neon
.globl _gcm_ghash_neon
.private_extern _gcm_ghash_neon
.align 4
_gcm_ghash_neon:
ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, Lmasks@PAGE // load constants
add x9, x9, Lmasks@PAGEOFF
ld1 {v24.2d, v25.2d}, [x9]
rev64 v0.16b, v0.16b // byteswap Xi
ext v0.16b, v0.16b, v0.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
Loop_neon:
ld1 {v3.16b}, [x2], #16 // load inp
rev64 v3.16b, v3.16b // byteswap inp
ext v3.16b, v3.16b, v3.16b, #8
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
Lgmult_neon:
// Split the input into v3 and v4. (The upper halves are unused,
// so it is okay to leave them alone.)
ins v4.d[0], v3.d[1]
ext v16.8b, v5.8b, v5.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v0.8b, v3.8b, v3.8b, #1 // B1
pmull v0.8h, v5.8b, v0.8b // E = A*B1
ext v17.8b, v5.8b, v5.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v5.8b, v19.8b // G = A*B2
ext v18.8b, v5.8b, v5.8b, #3 // A3
eor v16.16b, v16.16b, v0.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v0.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v0.8h, v5.8b, v0.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v0.16b // N = I + J
pmull v19.8h, v5.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v0.8h, v5.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v0.16b, v0.16b, v16.16b
eor v0.16b, v0.16b, v18.16b
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
ext v16.8b, v7.8b, v7.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v1.8b, v3.8b, v3.8b, #1 // B1
pmull v1.8h, v7.8b, v1.8b // E = A*B1
ext v17.8b, v7.8b, v7.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v7.8b, v19.8b // G = A*B2
ext v18.8b, v7.8b, v7.8b, #3 // A3
eor v16.16b, v16.16b, v1.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v1.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v1.8h, v7.8b, v1.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v1.16b // N = I + J
pmull v19.8h, v7.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v1.8h, v7.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v1.16b, v1.16b, v16.16b
eor v1.16b, v1.16b, v18.16b
ext v16.8b, v6.8b, v6.8b, #1 // A1
pmull v16.8h, v16.8b, v4.8b // F = A1*B
ext v2.8b, v4.8b, v4.8b, #1 // B1
pmull v2.8h, v6.8b, v2.8b // E = A*B1
ext v17.8b, v6.8b, v6.8b, #2 // A2
pmull v17.8h, v17.8b, v4.8b // H = A2*B
ext v19.8b, v4.8b, v4.8b, #2 // B2
pmull v19.8h, v6.8b, v19.8b // G = A*B2
ext v18.8b, v6.8b, v6.8b, #3 // A3
eor v16.16b, v16.16b, v2.16b // L = E + F
pmull v18.8h, v18.8b, v4.8b // J = A3*B
ext v2.8b, v4.8b, v4.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v2.8h, v6.8b, v2.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v4.8b, v4.8b, #4 // B4
eor v18.16b, v18.16b, v2.16b // N = I + J
pmull v19.8h, v6.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v2.8h, v6.8b, v4.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v2.16b, v2.16b, v16.16b
eor v2.16b, v2.16b, v18.16b
ext v16.16b, v0.16b, v2.16b, #8
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
eor v1.16b, v1.16b, v2.16b
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
// This is a no-op due to the ins instruction below.
// ins v2.d[0], v1.d[1]
// equivalent of reduction_avx from ghash-x86_64.pl
shl v17.2d, v0.2d, #57 // 1st phase
shl v18.2d, v0.2d, #62
eor v18.16b, v18.16b, v17.16b //
shl v17.2d, v0.2d, #63
eor v18.16b, v18.16b, v17.16b //
// Note Xm contains {Xl.d[1], Xh.d[0]}.
eor v18.16b, v18.16b, v1.16b
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
ushr v18.2d, v0.2d, #1 // 2nd phase
eor v2.16b, v2.16b,v0.16b
eor v0.16b, v0.16b,v18.16b //
ushr v18.2d, v18.2d, #6
ushr v0.2d, v0.2d, #1 //
eor v0.16b, v0.16b, v2.16b //
eor v0.16b, v0.16b, v18.16b //
subs x3, x3, #16
bne Loop_neon
rev64 v0.16b, v0.16b // byteswap Xi and write
ext v0.16b, v0.16b, v0.16b, #8
st1 {v0.16b}, [x0]
ret
.section __TEXT,__const
.align 4
Lmasks:
.quad 0x0000ffffffffffff // k48
.quad 0x00000000ffffffff // k32
.quad 0x000000000000ffff // k16
.quad 0x0000000000000000 // k0
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM

View File

@ -0,0 +1,246 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl _gcm_init_v8
.private_extern _gcm_init_v8
.align 4
_gcm_init_v8:
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
ext v3.16b,v17.16b,v17.16b,#8
ushr v18.2d,v19.2d,#63
dup v17.4s,v17.s[1]
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
ushr v18.2d,v3.2d,#63
sshr v17.4s,v17.4s,#31 //broadcast carry bit
and v18.16b,v18.16b,v16.16b
shl v3.2d,v3.2d,#1
ext v18.16b,v18.16b,v18.16b,#8
and v16.16b,v16.16b,v17.16b
orr v3.16b,v3.16b,v18.16b //H<<<=1
eor v20.16b,v3.16b,v16.16b //twisted H
st1 {v20.2d},[x0],#16 //store Htable[0]
//calculate H^2
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
pmull v0.1q,v20.1d,v20.1d
eor v16.16b,v16.16b,v20.16b
pmull2 v2.1q,v20.2d,v20.2d
pmull v1.1q,v16.1d,v16.1d
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v22.16b,v0.16b,v18.16b
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
ret
.globl _gcm_gmult_v8
.private_extern _gcm_gmult_v8
.align 4
_gcm_gmult_v8:
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
shl v19.2d,v19.2d,#57
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v3.16b,v17.16b,v17.16b,#8
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.globl _gcm_ghash_v8
.private_extern _gcm_ghash_v8
.align 4
_gcm_ghash_v8:
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have
//to be rotated in order to
//make it appear as in
//algorithm specification
subs x3,x3,#32 //see if x3 is 32 or larger
mov x12,#16 //x12 is used as post-
//increment for input pointer;
//as loop is modulo-scheduled
//x12 is zeroed just in time
//to preclude overstepping
//inp[len], which means that
//last block[s] are actually
//loaded twice, but last
//copy is not processed
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
movi v19.16b,#0xe1
ld1 {v22.2d},[x1]
csel x12,xzr,x12,eq //is it time to zero x12?
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
rev64 v0.16b,v0.16b
#endif
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
b.lo Lodd_tail_v8 //x3 was less than 32
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v7.16b,v17.16b,v17.16b,#8
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
pmull2 v6.1q,v20.2d,v7.2d
b Loop_mod2x_v8
.align 4
Loop_mod2x_v8:
ext v18.16b,v3.16b,v3.16b,#8
subs x3,x3,#32 //is there more data?
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
csel x12,xzr,x12,lo //is it time to zero x12?
pmull v5.1q,v21.1d,v17.1d
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
eor v0.16b,v0.16b,v4.16b //accumulate
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
eor v2.16b,v2.16b,v6.16b
csel x12,xzr,x12,eq //is it time to zero x12?
eor v1.16b,v1.16b,v5.16b
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
#endif
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v7.16b,v17.16b,v17.16b,#8
ext v3.16b,v16.16b,v16.16b,#8
eor v0.16b,v1.16b,v18.16b
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v3.16b,v3.16b,v18.16b
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
eor v3.16b,v3.16b,v0.16b
pmull2 v6.1q,v20.2d,v7.2d
b.hs Loop_mod2x_v8 //there was at least 32 more bytes
eor v2.16b,v2.16b,v18.16b
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
adds x3,x3,#32 //re-construct x3
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
b.eq Ldone_v8 //is x3 zero?
Lodd_tail_v8:
ext v18.16b,v0.16b,v0.16b,#8
eor v3.16b,v3.16b,v0.16b //inp^=Xi
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
Ldone_v8:
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,685 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
// with |argv|, then saves the callee-saved registers into |state|. It returns
// the result of |func|. The |unwind| argument is unused.
// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
// const uint64_t *argv, size_t argc,
// uint64_t unwind);
.globl _abi_test_trampoline
.private_extern _abi_test_trampoline
.align 4
_abi_test_trampoline:
Labi_test_trampoline_begin:
// Stack layout (low to high addresses)
// x29,x30 (16 bytes)
// d8-d15 (64 bytes)
// x19-x28 (80 bytes)
// x1 (8 bytes)
// padding (8 bytes)
stp x29, x30, [sp, #-176]!
mov x29, sp
// Saved callee-saved registers and |state|.
stp d8, d9, [sp, #16]
stp d10, d11, [sp, #32]
stp d12, d13, [sp, #48]
stp d14, d15, [sp, #64]
stp x19, x20, [sp, #80]
stp x21, x22, [sp, #96]
stp x23, x24, [sp, #112]
stp x25, x26, [sp, #128]
stp x27, x28, [sp, #144]
str x1, [sp, #160]
// Load registers from |state|, with the exception of x29. x29 is the
// frame pointer and also callee-saved, but AAPCS64 allows platforms to
// mandate that x29 always point to a frame. iOS64 does so, which means
// we cannot fill x29 with entropy without violating ABI rules
// ourselves. x29 is tested separately below.
ldp d8, d9, [x1], #16
ldp d10, d11, [x1], #16
ldp d12, d13, [x1], #16
ldp d14, d15, [x1], #16
ldp x19, x20, [x1], #16
ldp x21, x22, [x1], #16
ldp x23, x24, [x1], #16
ldp x25, x26, [x1], #16
ldp x27, x28, [x1], #16
// Move parameters into temporary registers.
mov x9, x0
mov x10, x2
mov x11, x3
// Load parameters into registers.
cbz x11, Largs_done
ldr x0, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x1, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x2, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x3, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x4, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x5, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x6, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x7, [x10], #8
Largs_done:
blr x9
// Reload |state| and store registers.
ldr x1, [sp, #160]
stp d8, d9, [x1], #16
stp d10, d11, [x1], #16
stp d12, d13, [x1], #16
stp d14, d15, [x1], #16
stp x19, x20, [x1], #16
stp x21, x22, [x1], #16
stp x23, x24, [x1], #16
stp x25, x26, [x1], #16
stp x27, x28, [x1], #16
// |func| is required to preserve x29, the frame pointer. We cannot load
// random values into x29 (see comment above), so compare it against the
// expected value and zero the field of |state| if corrupted.
mov x9, sp
cmp x29, x9
b.eq Lx29_ok
str xzr, [x1]
Lx29_ok:
// Restore callee-saved registers.
ldp d8, d9, [sp, #16]
ldp d10, d11, [sp, #32]
ldp d12, d13, [sp, #48]
ldp d14, d15, [sp, #64]
ldp x19, x20, [sp, #80]
ldp x21, x22, [sp, #96]
ldp x23, x24, [sp, #112]
ldp x25, x26, [sp, #128]
ldp x27, x28, [sp, #144]
ldp x29, x30, [sp], #176
ret
.globl _abi_test_clobber_x0
.private_extern _abi_test_clobber_x0
.align 4
_abi_test_clobber_x0:
mov x0, xzr
ret
.globl _abi_test_clobber_x1
.private_extern _abi_test_clobber_x1
.align 4
_abi_test_clobber_x1:
mov x1, xzr
ret
.globl _abi_test_clobber_x2
.private_extern _abi_test_clobber_x2
.align 4
_abi_test_clobber_x2:
mov x2, xzr
ret
.globl _abi_test_clobber_x3
.private_extern _abi_test_clobber_x3
.align 4
_abi_test_clobber_x3:
mov x3, xzr
ret
.globl _abi_test_clobber_x4
.private_extern _abi_test_clobber_x4
.align 4
_abi_test_clobber_x4:
mov x4, xzr
ret
.globl _abi_test_clobber_x5
.private_extern _abi_test_clobber_x5
.align 4
_abi_test_clobber_x5:
mov x5, xzr
ret
.globl _abi_test_clobber_x6
.private_extern _abi_test_clobber_x6
.align 4
_abi_test_clobber_x6:
mov x6, xzr
ret
.globl _abi_test_clobber_x7
.private_extern _abi_test_clobber_x7
.align 4
_abi_test_clobber_x7:
mov x7, xzr
ret
.globl _abi_test_clobber_x8
.private_extern _abi_test_clobber_x8
.align 4
_abi_test_clobber_x8:
mov x8, xzr
ret
.globl _abi_test_clobber_x9
.private_extern _abi_test_clobber_x9
.align 4
_abi_test_clobber_x9:
mov x9, xzr
ret
.globl _abi_test_clobber_x10
.private_extern _abi_test_clobber_x10
.align 4
_abi_test_clobber_x10:
mov x10, xzr
ret
.globl _abi_test_clobber_x11
.private_extern _abi_test_clobber_x11
.align 4
_abi_test_clobber_x11:
mov x11, xzr
ret
.globl _abi_test_clobber_x12
.private_extern _abi_test_clobber_x12
.align 4
_abi_test_clobber_x12:
mov x12, xzr
ret
.globl _abi_test_clobber_x13
.private_extern _abi_test_clobber_x13
.align 4
_abi_test_clobber_x13:
mov x13, xzr
ret
.globl _abi_test_clobber_x14
.private_extern _abi_test_clobber_x14
.align 4
_abi_test_clobber_x14:
mov x14, xzr
ret
.globl _abi_test_clobber_x15
.private_extern _abi_test_clobber_x15
.align 4
_abi_test_clobber_x15:
mov x15, xzr
ret
.globl _abi_test_clobber_x16
.private_extern _abi_test_clobber_x16
.align 4
_abi_test_clobber_x16:
mov x16, xzr
ret
.globl _abi_test_clobber_x17
.private_extern _abi_test_clobber_x17
.align 4
_abi_test_clobber_x17:
mov x17, xzr
ret
.globl _abi_test_clobber_x19
.private_extern _abi_test_clobber_x19
.align 4
_abi_test_clobber_x19:
mov x19, xzr
ret
.globl _abi_test_clobber_x20
.private_extern _abi_test_clobber_x20
.align 4
_abi_test_clobber_x20:
mov x20, xzr
ret
.globl _abi_test_clobber_x21
.private_extern _abi_test_clobber_x21
.align 4
_abi_test_clobber_x21:
mov x21, xzr
ret
.globl _abi_test_clobber_x22
.private_extern _abi_test_clobber_x22
.align 4
_abi_test_clobber_x22:
mov x22, xzr
ret
.globl _abi_test_clobber_x23
.private_extern _abi_test_clobber_x23
.align 4
_abi_test_clobber_x23:
mov x23, xzr
ret
.globl _abi_test_clobber_x24
.private_extern _abi_test_clobber_x24
.align 4
_abi_test_clobber_x24:
mov x24, xzr
ret
.globl _abi_test_clobber_x25
.private_extern _abi_test_clobber_x25
.align 4
_abi_test_clobber_x25:
mov x25, xzr
ret
.globl _abi_test_clobber_x26
.private_extern _abi_test_clobber_x26
.align 4
_abi_test_clobber_x26:
mov x26, xzr
ret
.globl _abi_test_clobber_x27
.private_extern _abi_test_clobber_x27
.align 4
_abi_test_clobber_x27:
mov x27, xzr
ret
.globl _abi_test_clobber_x28
.private_extern _abi_test_clobber_x28
.align 4
_abi_test_clobber_x28:
mov x28, xzr
ret
.globl _abi_test_clobber_x29
.private_extern _abi_test_clobber_x29
.align 4
_abi_test_clobber_x29:
mov x29, xzr
ret
.globl _abi_test_clobber_d0
.private_extern _abi_test_clobber_d0
.align 4
_abi_test_clobber_d0:
fmov d0, xzr
ret
.globl _abi_test_clobber_d1
.private_extern _abi_test_clobber_d1
.align 4
_abi_test_clobber_d1:
fmov d1, xzr
ret
.globl _abi_test_clobber_d2
.private_extern _abi_test_clobber_d2
.align 4
_abi_test_clobber_d2:
fmov d2, xzr
ret
.globl _abi_test_clobber_d3
.private_extern _abi_test_clobber_d3
.align 4
_abi_test_clobber_d3:
fmov d3, xzr
ret
.globl _abi_test_clobber_d4
.private_extern _abi_test_clobber_d4
.align 4
_abi_test_clobber_d4:
fmov d4, xzr
ret
.globl _abi_test_clobber_d5
.private_extern _abi_test_clobber_d5
.align 4
_abi_test_clobber_d5:
fmov d5, xzr
ret
.globl _abi_test_clobber_d6
.private_extern _abi_test_clobber_d6
.align 4
_abi_test_clobber_d6:
fmov d6, xzr
ret
.globl _abi_test_clobber_d7
.private_extern _abi_test_clobber_d7
.align 4
_abi_test_clobber_d7:
fmov d7, xzr
ret
.globl _abi_test_clobber_d8
.private_extern _abi_test_clobber_d8
.align 4
_abi_test_clobber_d8:
fmov d8, xzr
ret
.globl _abi_test_clobber_d9
.private_extern _abi_test_clobber_d9
.align 4
_abi_test_clobber_d9:
fmov d9, xzr
ret
.globl _abi_test_clobber_d10
.private_extern _abi_test_clobber_d10
.align 4
_abi_test_clobber_d10:
fmov d10, xzr
ret
.globl _abi_test_clobber_d11
.private_extern _abi_test_clobber_d11
.align 4
_abi_test_clobber_d11:
fmov d11, xzr
ret
.globl _abi_test_clobber_d12
.private_extern _abi_test_clobber_d12
.align 4
_abi_test_clobber_d12:
fmov d12, xzr
ret
.globl _abi_test_clobber_d13
.private_extern _abi_test_clobber_d13
.align 4
_abi_test_clobber_d13:
fmov d13, xzr
ret
.globl _abi_test_clobber_d14
.private_extern _abi_test_clobber_d14
.align 4
_abi_test_clobber_d14:
fmov d14, xzr
ret
.globl _abi_test_clobber_d15
.private_extern _abi_test_clobber_d15
.align 4
_abi_test_clobber_d15:
fmov d15, xzr
ret
.globl _abi_test_clobber_d16
.private_extern _abi_test_clobber_d16
.align 4
_abi_test_clobber_d16:
fmov d16, xzr
ret
.globl _abi_test_clobber_d17
.private_extern _abi_test_clobber_d17
.align 4
_abi_test_clobber_d17:
fmov d17, xzr
ret
.globl _abi_test_clobber_d18
.private_extern _abi_test_clobber_d18
.align 4
_abi_test_clobber_d18:
fmov d18, xzr
ret
.globl _abi_test_clobber_d19
.private_extern _abi_test_clobber_d19
.align 4
_abi_test_clobber_d19:
fmov d19, xzr
ret
.globl _abi_test_clobber_d20
.private_extern _abi_test_clobber_d20
.align 4
_abi_test_clobber_d20:
fmov d20, xzr
ret
.globl _abi_test_clobber_d21
.private_extern _abi_test_clobber_d21
.align 4
_abi_test_clobber_d21:
fmov d21, xzr
ret
.globl _abi_test_clobber_d22
.private_extern _abi_test_clobber_d22
.align 4
_abi_test_clobber_d22:
fmov d22, xzr
ret
.globl _abi_test_clobber_d23
.private_extern _abi_test_clobber_d23
.align 4
_abi_test_clobber_d23:
fmov d23, xzr
ret
.globl _abi_test_clobber_d24
.private_extern _abi_test_clobber_d24
.align 4
_abi_test_clobber_d24:
fmov d24, xzr
ret
.globl _abi_test_clobber_d25
.private_extern _abi_test_clobber_d25
.align 4
_abi_test_clobber_d25:
fmov d25, xzr
ret
.globl _abi_test_clobber_d26
.private_extern _abi_test_clobber_d26
.align 4
_abi_test_clobber_d26:
fmov d26, xzr
ret
.globl _abi_test_clobber_d27
.private_extern _abi_test_clobber_d27
.align 4
_abi_test_clobber_d27:
fmov d27, xzr
ret
.globl _abi_test_clobber_d28
.private_extern _abi_test_clobber_d28
.align 4
_abi_test_clobber_d28:
fmov d28, xzr
ret
.globl _abi_test_clobber_d29
.private_extern _abi_test_clobber_d29
.align 4
_abi_test_clobber_d29:
fmov d29, xzr
ret
.globl _abi_test_clobber_d30
.private_extern _abi_test_clobber_d30
.align 4
_abi_test_clobber_d30:
fmov d30, xzr
ret
.globl _abi_test_clobber_d31
.private_extern _abi_test_clobber_d31
.align 4
_abi_test_clobber_d31:
fmov d31, xzr
ret
.globl _abi_test_clobber_v8_upper
.private_extern _abi_test_clobber_v8_upper
.align 4
_abi_test_clobber_v8_upper:
fmov v8.d[1], xzr
ret
.globl _abi_test_clobber_v9_upper
.private_extern _abi_test_clobber_v9_upper
.align 4
_abi_test_clobber_v9_upper:
fmov v9.d[1], xzr
ret
.globl _abi_test_clobber_v10_upper
.private_extern _abi_test_clobber_v10_upper
.align 4
_abi_test_clobber_v10_upper:
fmov v10.d[1], xzr
ret
.globl _abi_test_clobber_v11_upper
.private_extern _abi_test_clobber_v11_upper
.align 4
_abi_test_clobber_v11_upper:
fmov v11.d[1], xzr
ret
.globl _abi_test_clobber_v12_upper
.private_extern _abi_test_clobber_v12_upper
.align 4
_abi_test_clobber_v12_upper:
fmov v12.d[1], xzr
ret
.globl _abi_test_clobber_v13_upper
.private_extern _abi_test_clobber_v13_upper
.align 4
_abi_test_clobber_v13_upper:
fmov v13.d[1], xzr
ret
.globl _abi_test_clobber_v14_upper
.private_extern _abi_test_clobber_v14_upper
.align 4
_abi_test_clobber_v14_upper:
fmov v14.d[1], xzr
ret
.globl _abi_test_clobber_v15_upper
.private_extern _abi_test_clobber_v15_upper
.align 4
_abi_test_clobber_v15_upper:
fmov v15.d[1], xzr
ret
#endif // !OPENSSL_NO_ASM

View File

@ -0,0 +1,996 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.section __TEXT,__const
# p434 x 2
Lp434x2:
.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
# p434 + 1
Lp434p1:
.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
.quad 0x6CFC5FD681C52056, 0x0002341F27177344
.text
.globl _sike_mpmul
.private_extern _sike_mpmul
.align 4
_sike_mpmul:
stp x29, x30, [sp,#-96]!
add x29, sp, #0
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
ldp x14, x15, [x1,#32]
ldr x16, [x1,#48]
// x3-x7 <- AH + AL, x7 <- carry
adds x3, x3, x7
adcs x4, x4, x8
adcs x5, x5, x9
adcs x6, x6, xzr
adc x7, xzr, xzr
// x10-x13 <- BH + BL, x8 <- carry
adds x10, x10, x14
adcs x11, x11, x15
adcs x12, x12, x16
adcs x13, x13, xzr
adc x8, xzr, xzr
// x9 <- combined carry
and x9, x7, x8
// x7-x8 <- mask
sub x7, xzr, x7
sub x8, xzr, x8
// x15-x19 <- masked (BH + BL)
and x14, x10, x7
and x15, x11, x7
and x16, x12, x7
and x17, x13, x7
// x20-x23 <- masked (AH + AL)
and x20, x3, x8
and x21, x4, x8
and x22, x5, x8
and x23, x6, x8
// x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
adds x14, x14, x20
adcs x15, x15, x21
adcs x16, x16, x22
adcs x17, x17, x23
adc x7, x9, xzr
// x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
stp x3, x4, [x2,#0]
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x25, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x23, x10, x12
adcs x26, x11, x13
adc x24, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x19, xzr, x25
sub x20, xzr, x24
and x8, x23, x19
and x9, x26, x19
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x21, x3, x20
and x22, x4, x20
mul x19, x3, x23
mul x20, x3, x26
and x25, x25, x24
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x8, x21, x8
umulh x21, x3, x26
adcs x9, x22, x9
umulh x22, x3, x23
adc x25, x25, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x23
umulh x23, x4, x23
adds x20, x20, x22
adc x21, x21, xzr
mul x24, x4, x26
umulh x26, x4, x26
adds x20, x20, x3
adcs x21, x21, x23
adc x22, xzr, xzr
adds x21, x21, x24
adc x22, x22, x26
ldp x3, x4, [x2,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x21, x8, x21
umulh x24, x3, x10
umulh x26, x3, x11
adcs x22, x9, x22
mul x8, x3, x10
mul x9, x3, x11
adc x25, x25, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x9, x9, x24
adc x26, x26, xzr
mul x23, x4, x11
umulh x11, x4, x11
adds x9, x9, x3
adcs x26, x26, x10
adc x24, xzr, xzr
adds x26, x26, x23
adc x24, x24, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x19, x19, x8
sbcs x20, x20, x9
sbcs x21, x21, x26
mul x4, x5, x13
umulh x23, x5, x13
sbcs x22, x22, x24
sbc x25, x25, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x23, x23, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x23, x23, x12
adc x10, xzr, xzr
adds x23, x23, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x19, x19, x3
sbcs x20, x20, x4
sbcs x21, x21, x23
sbcs x22, x22, x10
sbc x25, x25, xzr
adds x19, x19, x26
adcs x20, x20, x24
adcs x21, x21, x3
adcs x22, x22, x4
adcs x23, x25, x23
adc x24, x10, xzr
// x15-x19, x7 <- (AH+AL) x (BH+BL), final step
adds x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adc x7, x7, xzr
// Load AL
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
// Load BL
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
// Temporarily store x8 in x2
stp x8, x9, [x2,#0]
// x21-x28 <- AL x BL
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x8, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x27, x10, x12
adcs x9, x11, x13
adc x28, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x23, xzr, x8
sub x24, xzr, x28
and x21, x27, x23
and x22, x9, x23
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x25, x3, x24
and x26, x4, x24
mul x23, x3, x27
mul x24, x3, x9
and x8, x8, x28
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x21, x25, x21
umulh x25, x3, x9
adcs x22, x26, x22
umulh x26, x3, x27
adc x8, x8, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x27
umulh x27, x4, x27
adds x24, x24, x26
adc x25, x25, xzr
mul x28, x4, x9
umulh x9, x4, x9
adds x24, x24, x3
adcs x25, x25, x27
adc x26, xzr, xzr
adds x25, x25, x28
adc x26, x26, x9
ldp x3, x4, [x0,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x25, x21, x25
umulh x28, x3, x10
umulh x9, x3, x11
adcs x26, x22, x26
mul x21, x3, x10
mul x22, x3, x11
adc x8, x8, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x22, x22, x28
adc x9, x9, xzr
mul x27, x4, x11
umulh x11, x4, x11
adds x22, x22, x3
adcs x9, x9, x10
adc x28, xzr, xzr
adds x9, x9, x27
adc x28, x28, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x23, x23, x21
sbcs x24, x24, x22
sbcs x25, x25, x9
mul x4, x5, x13
umulh x27, x5, x13
sbcs x26, x26, x28
sbc x8, x8, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x27, x27, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x27, x27, x12
adc x10, xzr, xzr
adds x27, x27, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x23, x23, x3
sbcs x24, x24, x4
sbcs x25, x25, x27
sbcs x26, x26, x10
sbc x8, x8, xzr
adds x23, x23, x9
adcs x24, x24, x28
adcs x25, x25, x3
adcs x26, x26, x4
adcs x27, x8, x27
adc x28, x10, xzr
// Restore x8
ldp x8, x9, [x2,#0]
// x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, x27
sbcs x17, x17, x28
sbc x7, x7, xzr
// Store ALxBL, low
stp x21, x22, [x2]
stp x23, x24, [x2,#16]
// Load AH
ldp x3, x4, [x0,#32]
ldr x5, [x0,#48]
// Load BH
ldp x10, x11, [x1,#32]
ldr x12, [x1,#48]
adds x8, x8, x25
adcs x9, x9, x26
adcs x19, x19, x27
adcs x20, x20, x28
adc x1, xzr, xzr
add x0, x0, #32
// Temporarily store x8,x9 in x2
stp x8,x9, [x2,#32]
// x21-x28 <- AH x BH
// A0 * B0
mul x21, x3, x10 // C0
umulh x24, x3, x10
// A0 * B1
mul x22, x3, x11
umulh x23, x3, x11
// A1 * B0
mul x8, x4, x10
umulh x9, x4, x10
adds x22, x22, x24
adc x23, x23, xzr
// A0 * B2
mul x27, x3, x12
umulh x28, x3, x12
adds x22, x22, x8 // C1
adcs x23, x23, x9
adc x24, xzr, xzr
// A2 * B0
mul x8, x5, x10
umulh x25, x5, x10
adds x23, x23, x27
adcs x24, x24, x25
adc x25, xzr, xzr
// A1 * B1
mul x27, x4, x11
umulh x9, x4, x11
adds x23, x23, x8
adcs x24, x24, x28
adc x25, x25, xzr
// A1 * B2
mul x8, x4, x12
umulh x28, x4, x12
adds x23, x23, x27 // C2
adcs x24, x24, x9
adc x25, x25, xzr
// A2 * B1
mul x27, x5, x11
umulh x9, x5, x11
adds x24, x24, x8
adcs x25, x25, x28
adc x26, xzr, xzr
// A2 * B2
mul x8, x5, x12
umulh x28, x5, x12
adds x24, x24, x27 // C3
adcs x25, x25, x9
adc x26, x26, xzr
adds x25, x25, x8 // C4
adc x26, x26, x28 // C5
// Restore x8,x9
ldp x8,x9, [x2,#32]
neg x1, x1
// x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, xzr
sbcs x17, x17, xzr
sbc x7, x7, xzr
// Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
stp x8, x9, [x2,#32]
stp x19, x20, [x2,#48]
adds x1, x1, #1
adcs x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adcs x25, x7, x25
adc x26, x26, xzr
stp x14, x15, [x2,#64]
stp x16, x17, [x2,#80]
stp x25, x26, [x2,#96]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl _sike_fprdc
.private_extern _sike_fprdc
.align 4
_sike_fprdc:
stp x29, x30, [sp, #-96]!
add x29, sp, xzr
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x2, x3, [x0,#0] // a[0-1]
// Load the prime constant
adrp x26, Lp434p1@PAGE
add x26, x26, Lp434p1@PAGEOFF
ldp x23, x24, [x26, #0x0]
ldp x25, x26, [x26,#0x10]
// a[0-1] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x10, x3, x23
umulh x11, x3, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x10 // C1
adcs x6, x6, x11
adc x7, xzr, xzr
mul x10, x3, x24
umulh x11, x3, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x10 // C2
adcs x7, x7, x11
adc x8, x8, xzr
mul x10, x3, x25
umulh x11, x3, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x3, x26
umulh x28, x3, x26
adds x7, x7, x10 // C3
adcs x8, x8, x11
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
ldp x10, x11, [x0, #0x18]
ldp x12, x13, [x0, #0x28]
ldp x14, x15, [x0, #0x38]
ldp x16, x17, [x0, #0x48]
ldp x19, x20, [x0, #0x58]
ldr x21, [x0, #0x68]
adds x10, x10, x4
adcs x11, x11, x5
adcs x12, x12, x6
adcs x13, x13, x7
adcs x14, x14, x8
adcs x15, x15, x9
adcs x22, x16, xzr
adcs x17, x17, xzr
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
ldr x2, [x0,#0x10] // a[2]
// a[2-3] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x0, x10, x23
umulh x3, x10, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x0 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x0, x10, x24
umulh x3, x10, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x0 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x0, x10, x25
umulh x3, x10, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x10, x26
umulh x28, x10, x26
adds x7, x7, x0 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x12, x12, x4
adcs x13, x13, x5
adcs x14, x14, x6
adcs x15, x15, x7
adcs x16, x22, x8
adcs x17, x17, x9
adcs x22, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
mul x4, x11, x23 // C0
umulh x7, x11, x23
mul x5, x11, x24
umulh x6, x11, x24
mul x10, x12, x23
umulh x3, x12, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x11, x25
umulh x28, x11, x25
adds x5, x5, x10 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x10, x12, x24
umulh x3, x12, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x11, x26
umulh x28, x11, x26
adds x6, x6, x10 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x10, x12, x25
umulh x3, x12, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x12, x26
umulh x28, x12, x26
adds x7, x7, x10 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x14, x14, x4
adcs x15, x15, x5
adcs x16, x16, x6
adcs x17, x17, x7
adcs x19, x22, x8
adcs x20, x20, x9
adc x22, x21, xzr
stp x14, x15, [x1, #0x0] // C0, C1
mul x4, x13, x23 // C0
umulh x10, x13, x23
mul x5, x13, x24
umulh x27, x13, x24
adds x5, x5, x10 // C1
adc x10, xzr, xzr
mul x6, x13, x25
umulh x28, x13, x25
adds x27, x10, x27
adcs x6, x6, x27 // C2
adc x10, xzr, xzr
mul x7, x13, x26
umulh x8, x13, x26
adds x28, x10, x28
adcs x7, x7, x28 // C3
adc x8, x8, xzr // C4
adds x16, x16, x4
adcs x17, x17, x5
adcs x19, x19, x6
adcs x20, x20, x7
adc x21, x22, x8
str x16, [x1, #0x10]
stp x17, x19, [x1, #0x18]
stp x20, x21, [x1, #0x28]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl _sike_fpadd
.private_extern _sike_fpadd
.align 4
_sike_fpadd:
stp x29,x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Add a + b
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
// Subtract 2xp434
adrp x17, Lp434x2@PAGE
add x17, x17, Lp434x2@PAGEOFF
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x12
sbcs x6, x6, x13
sbcs x7, x7, x14
sbcs x8, x8, x15
sbcs x9, x9, x16
sbc x0, xzr, xzr // x0 can be reused now
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl _sike_fpsub
.private_extern _sike_fpsub
.align 4
_sike_fpsub:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Subtract a - b
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
sbcs x9, x9, x17
sbc x0, xzr, xzr
// Add 2xp434 anded with the mask in x0
adrp x17, Lp434x2@PAGE
add x17, x17, Lp434x2@PAGEOFF
// First half
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl _sike_mpadd_asm
.private_extern _sike_mpadd_asm
.align 4
_sike_mpadd_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl _sike_mpsubx2_asm
.private_extern _sike_mpsubx2_asm
.align 4
_sike_mpsubx2_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x11, x12, [x1,#32]
ldp x13, x14, [x1,#48]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbcs x9, x9, x13
sbcs x10, x10, x14
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ldp x3, x4, [x0,#64]
ldp x5, x6, [x0,#80]
ldp x11, x12, [x1,#64]
ldp x13, x14, [x1,#80]
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#96]
ldp x11, x12, [x1,#96]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbc x0, xzr, xzr
stp x3, x4, [x2,#64]
stp x5, x6, [x2,#80]
stp x7, x8, [x2,#96]
ldp x29, x30, [sp],#16
ret
.globl _sike_mpdblsubx2_asm
.private_extern _sike_mpdblsubx2_asm
.align 4
_sike_mpdblsubx2_asm:
stp x29, x30, [sp, #-16]!
add x29, sp, #0
ldp x3, x4, [x2, #0]
ldp x5, x6, [x2,#16]
ldp x7, x8, [x2,#32]
ldp x11, x12, [x0, #0]
ldp x13, x14, [x0,#16]
ldp x15, x16, [x0,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
// x9 stores carry
adc x9, xzr, xzr
ldp x11, x12, [x1, #0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2, #0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
ldp x3, x4, [x2,#48]
ldp x5, x6, [x2,#64]
ldp x7, x8, [x2,#80]
ldp x11, x12, [x0,#48]
ldp x13, x14, [x0,#64]
ldp x15, x16, [x0,#80]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, xzr, xzr
ldp x11, x12, [x1,#48]
ldp x13, x14, [x1,#64]
ldp x15, x16, [x1,#80]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2,#48]
stp x5, x6, [x2,#64]
stp x7, x8, [x2,#80]
ldp x3, x4, [x2,#96]
ldp x11, x12, [x0,#96]
ldp x13, x14, [x1,#96]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
subs x3, x3, x13
sbc x4, x4, x14
stp x3, x4, [x2,#96]
ldp x29, x30, [sp],#16
ret
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,790 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.code 32
#undef __thumb2__
.align 5
Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl _aes_hw_set_encrypt_key
.private_extern _aes_hw_set_encrypt_key
#ifdef __thumb2__
.thumb_func _aes_hw_set_encrypt_key
#endif
.align 5
_aes_hw_set_encrypt_key:
Lenc_key:
mov r3,#-1
cmp r0,#0
beq Lenc_key_abort
cmp r2,#0
beq Lenc_key_abort
mov r3,#-2
cmp r1,#128
blt Lenc_key_abort
cmp r1,#256
bgt Lenc_key_abort
tst r1,#0x3f
bne Lenc_key_abort
adr r3,Lrcon
cmp r1,#192
veor q0,q0,q0
vld1.8 {q3},[r0]!
mov r1,#8 @ reuse r1
vld1.32 {q1,q2},[r3]!
blt Loop128
beq L192
b L256
.align 4
Loop128:
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
bne Loop128
vld1.32 {q1},[r3]
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
veor q3,q3,q10
vst1.32 {q3},[r2]
add r2,r2,#0x50
mov r12,#10
b Ldone
.align 4
L192:
vld1.8 {d16},[r0]!
vmov.i8 q10,#8 @ borrow q10
vst1.32 {q3},[r2]!
vsub.i8 q2,q2,q10 @ adjust the mask
Loop192:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {d16},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vdup.32 q9,d7[1]
veor q9,q9,q8
veor q10,q10,q1
vext.8 q8,q0,q8,#12
vshl.u8 q1,q1,#1
veor q8,q8,q9
veor q3,q3,q10
veor q8,q8,q10
vst1.32 {q3},[r2]!
bne Loop192
mov r12,#12
add r2,r2,#0x20
b Ldone
.align 4
L256:
vld1.8 {q8},[r0]
mov r1,#7
mov r12,#14
vst1.32 {q3},[r2]!
Loop256:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {q8},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vst1.32 {q3},[r2]!
beq Ldone
vdup.32 q10,d7[1]
vext.8 q9,q0,q8,#12
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
veor q8,q8,q10
b Loop256
Ldone:
str r12,[r2]
mov r3,#0
Lenc_key_abort:
mov r0,r3 @ return value
bx lr
.globl _aes_hw_set_decrypt_key
.private_extern _aes_hw_set_decrypt_key
#ifdef __thumb2__
.thumb_func _aes_hw_set_decrypt_key
#endif
.align 5
_aes_hw_set_decrypt_key:
stmdb sp!,{r4,lr}
bl Lenc_key
cmp r0,#0
bne Ldec_key_abort
sub r2,r2,#240 @ restore original r2
mov r4,#-16
add r0,r2,r12,lsl#4 @ end of key schedule
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
Loop_imc:
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
cmp r0,r2
bhi Loop_imc
vld1.32 {q0},[r2]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
vst1.32 {q0},[r0]
eor r0,r0,r0 @ return value
Ldec_key_abort:
ldmia sp!,{r4,pc}
.globl _aes_hw_encrypt
.private_extern _aes_hw_encrypt
#ifdef __thumb2__
.thumb_func _aes_hw_encrypt
#endif
.align 5
_aes_hw_encrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
Loop_enc:
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q1},[r2]!
bgt Loop_enc
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.globl _aes_hw_decrypt
.private_extern _aes_hw_decrypt
#ifdef __thumb2__
.thumb_func _aes_hw_decrypt
#endif
.align 5
_aes_hw_decrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
Loop_dec:
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q1},[r2]!
bgt Loop_dec
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.globl _aes_hw_cbc_encrypt
.private_extern _aes_hw_cbc_encrypt
#ifdef __thumb2__
.thumb_func _aes_hw_cbc_encrypt
#endif
.align 5
_aes_hw_cbc_encrypt:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load remaining args
subs r2,r2,#16
mov r8,#16
blo Lcbc_abort
moveq r8,#0
cmp r5,#0 @ en- or decrypting?
ldr r5,[r3,#240]
and r2,r2,#-16
vld1.8 {q6},[r4]
vld1.8 {q0},[r0],r8
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#6
add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
sub r5,r5,#2
vld1.32 {q10,q11},[r7]!
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
beq Lcbc_dec
cmp r5,#2
veor q0,q0,q6
veor q5,q8,q7
beq Lcbc_enc128
vld1.32 {q2,q3},[r7]
add r7,r3,#16
add r6,r3,#16*4
add r12,r3,#16*5
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
add r14,r3,#16*6
add r3,r3,#16*7
b Lenter_cbc_enc
.align 4
Loop_cbc_enc:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
Lenter_cbc_enc:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r6]
cmp r5,#4
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r12]
beq Lcbc_enc192
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r14]
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r3]
nop
Lcbc_enc192:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs Loop_cbc_enc
vst1.8 {q6},[r1]!
b Lcbc_done
.align 5
Lcbc_enc128:
vld1.32 {q2,q3},[r7]
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
b Lenter_cbc_enc128
Loop_cbc_enc128:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
Lenter_cbc_enc128:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs Loop_cbc_enc128
vst1.8 {q6},[r1]!
b Lcbc_done
.align 5
Lcbc_dec:
vld1.8 {q10},[r0]!
subs r2,r2,#32 @ bias
add r6,r5,#2
vorr q3,q0,q0
vorr q1,q0,q0
vorr q11,q10,q10
blo Lcbc_dec_tail
vorr q1,q10,q10
vld1.8 {q10},[r0]!
vorr q2,q0,q0
vorr q3,q1,q1
vorr q11,q10,q10
Loop3x_cbc_dec:
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt Loop3x_cbc_dec
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q4,q6,q7
subs r2,r2,#0x30
veor q5,q2,q7
movlo r6,r2 @ r6, r6, is zero at this point
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
add r0,r0,r6 @ r0 is adjusted in such way that
@ at exit from the loop q1-q10
@ are loaded with last "words"
vorr q6,q11,q11
mov r7,r3
.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q2},[r0]!
.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q3},[r0]!
.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q11},[r0]!
.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
add r6,r5,#2
veor q4,q4,q0
veor q5,q5,q1
veor q10,q10,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q4},[r1]!
vorr q0,q2,q2
vst1.8 {q5},[r1]!
vorr q1,q3,q3
vst1.8 {q10},[r1]!
vorr q10,q11,q11
bhs Loop3x_cbc_dec
cmn r2,#0x30
beq Lcbc_done
nop
Lcbc_dec_tail:
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt Lcbc_dec_tail
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
cmn r2,#0x20
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q5,q6,q7
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
beq Lcbc_dec_one
veor q5,q5,q1
veor q9,q9,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
vst1.8 {q9},[r1]!
b Lcbc_done
Lcbc_dec_one:
veor q5,q5,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
Lcbc_done:
vst1.8 {q6},[r4]
Lcbc_abort:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,pc}
.globl _aes_hw_ctr32_encrypt_blocks
.private_extern _aes_hw_ctr32_encrypt_blocks
#ifdef __thumb2__
.thumb_func _aes_hw_ctr32_encrypt_blocks
#endif
.align 5
_aes_hw_ctr32_encrypt_blocks:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldr r4, [ip] @ load remaining arg
ldr r5,[r3,#240]
ldr r8, [r4, #12]
vld1.32 {q0},[r4]
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#4
mov r12,#16
cmp r2,#2
add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
sub r5,r5,#2
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
movlo r12,#0
#ifndef __ARMEB__
rev r8, r8
#endif
vorr q1,q0,q0
add r10, r8, #1
vorr q10,q0,q0
add r8, r8, #2
vorr q6,q0,q0
rev r10, r10
vmov.32 d3[1],r10
bls Lctr32_tail
rev r12, r8
sub r2,r2,#3 @ bias
vmov.32 d21[1],r12
b Loop3x_ctr32
.align 4
Loop3x_ctr32:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q9},[r7]!
bgt Loop3x_ctr32
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
vld1.8 {q2},[r0]!
vorr q0,q6,q6
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.8 {q3},[r0]!
vorr q1,q6,q6
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vld1.8 {q11},[r0]!
mov r7,r3
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
vorr q10,q6,q6
add r9,r8,#1
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q2,q2,q7
add r10,r8,#2
.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
veor q3,q3,q7
add r8,r8,#3
.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q11,q11,q7
rev r9,r9
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d1[1], r9
rev r10,r10
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vmov.32 d3[1], r10
rev r12,r8
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d21[1], r12
subs r2,r2,#3
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
veor q2,q2,q4
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
vst1.8 {q2},[r1]!
veor q3,q3,q5
mov r6,r5
vst1.8 {q3},[r1]!
veor q11,q11,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q11},[r1]!
bhs Loop3x_ctr32
adds r2,r2,#3
beq Lctr32_done
cmp r2,#1
mov r12,#16
moveq r12,#0
Lctr32_tail:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q9},[r7]!
bgt Lctr32_tail
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q2},[r0],r12
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q3},[r0]
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q2,q2,q7
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q3,q3,q7
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
cmp r2,#1
veor q2,q2,q0
veor q3,q3,q1
vst1.8 {q2},[r1]!
beq Lctr32_done
vst1.8 {q3},[r1]
Lctr32_done:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
#endif
#endif // !OPENSSL_NO_ASM

View File

@ -0,0 +1,982 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
.text
#if defined(__thumb2__)
.syntax unified
.thumb
#else
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.align 5
LOPENSSL_armcap:
.word OPENSSL_armcap_P-Lbn_mul_mont
#endif
.globl _bn_mul_mont
.private_extern _bn_mul_mont
#ifdef __thumb2__
.thumb_func _bn_mul_mont
#endif
.align 5
_bn_mul_mont:
Lbn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne Lialu
adr r0,Lbn_mul_mont
ldr r2,LOPENSSL_armcap
ldr r0,[r0,r2]
#ifdef __APPLE__
ldr r0,[r0]
#endif
tst r0,#ARMV7_NEON @ NEON available?
ldmia sp, {r0,r2}
beq Lialu
add sp,sp,#8
b bn_mul8x_mont_neon
.align 4
Lialu:
#endif
cmp ip,#2
mov r0,ip @ load num
#ifdef __thumb2__
ittt lt
#endif
movlt r0,#0
addlt sp,sp,#2*4
blt Labrt
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
mov r0,r0,lsl#2 @ rescale r0 for byte count
sub sp,sp,r0 @ alloca(4*num)
sub sp,sp,#4 @ +extra dword
sub r0,r0,#4 @ "num=num-1"
add r4,r2,r0 @ &bp[num-1]
add r0,sp,r0 @ r0 to point at &tp[num-1]
ldr r8,[r0,#14*4] @ &n0
ldr r2,[r2] @ bp[0]
ldr r5,[r1],#4 @ ap[0],ap++
ldr r6,[r3],#4 @ np[0],np++
ldr r8,[r8] @ *n0
str r4,[r0,#15*4] @ save &bp[num]
umull r10,r11,r5,r2 @ ap[0]*bp[0]
str r8,[r0,#14*4] @ save n0 value
mul r8,r10,r8 @ "tp[0]"*n0
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
mov r4,sp
L1st:
ldr r5,[r1],#4 @ ap[j],ap++
mov r10,r11
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne L1st
adds r12,r12,r11
ldr r4,[r0,#13*4] @ restore bp
mov r14,#0
ldr r8,[r0,#14*4] @ restore n0
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
mov r7,sp
str r14,[r0,#4] @ tp[num]=
Louter:
sub r7,r0,r7 @ "original" r0-1 value
sub r1,r1,r7 @ "rewind" ap to &ap[1]
ldr r2,[r4,#4]! @ *(++bp)
sub r3,r3,r7 @ "rewind" np to &np[1]
ldr r5,[r1,#-4] @ ap[0]
ldr r10,[sp] @ tp[0]
ldr r6,[r3,#-4] @ np[0]
ldr r7,[sp,#4] @ tp[1]
mov r11,#0
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
str r4,[r0,#13*4] @ save bp
mul r8,r10,r8
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
mov r4,sp
Linner:
ldr r5,[r1],#4 @ ap[j],ap++
adds r10,r11,r7 @ +=tp[j]
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adc r11,r11,#0
ldr r7,[r4,#8] @ tp[j+1]
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne Linner
adds r12,r12,r11
mov r14,#0
ldr r4,[r0,#13*4] @ restore bp
adc r14,r14,#0
ldr r8,[r0,#14*4] @ restore n0
adds r12,r12,r7
ldr r7,[r0,#15*4] @ restore &bp[num]
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
str r14,[r0,#4] @ tp[num]=
cmp r4,r7
#ifdef __thumb2__
itt ne
#endif
movne r7,sp
bne Louter
ldr r2,[r0,#12*4] @ pull rp
mov r5,sp
add r0,r0,#4 @ r0 to point at &tp[num]
sub r5,r0,r5 @ "original" num value
mov r4,sp @ "rewind" r4
mov r1,r4 @ "borrow" r1
sub r3,r3,r5 @ "rewind" r3 to &np[0]
subs r7,r7,r7 @ "clear" carry flag
Lsub: ldr r7,[r4],#4
ldr r6,[r3],#4
sbcs r7,r7,r6 @ tp[j]-np[j]
str r7,[r2],#4 @ rp[j]=
teq r4,r0 @ preserve carry
bne Lsub
sbcs r14,r14,#0 @ upmost carry
mov r4,sp @ "rewind" r4
sub r2,r2,r5 @ "rewind" r2
Lcopy: ldr r7,[r4] @ conditional copy
ldr r5,[r2]
str sp,[r4],#4 @ zap tp
#ifdef __thumb2__
it cc
#endif
movcc r5,r7
str r5,[r2],#4
teq r4,r0 @ preserve carry
bne Lcopy
mov sp,r0
add sp,sp,#4 @ skip over tp[num+1]
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
Labrt:
#if __ARM_ARCH__>=5
bx lr @ bx lr
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
#if __ARM_MAX_ARCH__>=7
#ifdef __thumb2__
.thumb_func bn_mul8x_mont_neon
#endif
.align 5
bn_mul8x_mont_neon:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load rest of parameter block
mov ip,sp
cmp r5,#8
bhi LNEON_8n
@ special case for r5==8, everything is in register bank...
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
sub r7,sp,r5,lsl#4
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
and r7,r7,#-64
vld1.32 {d30[0]}, [r4,:32]
mov sp,r7 @ alloca
vzip.16 d28,d8
vmull.u32 q6,d28,d0[0]
vmull.u32 q7,d28,d0[1]
vmull.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmull.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
vmul.u32 d29,d29,d30
vmull.u32 q10,d28,d2[0]
vld1.32 {d4,d5,d6,d7}, [r3]!
vmull.u32 q11,d28,d2[1]
vmull.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmull.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
sub r9,r5,#1
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
b LNEON_outer8
.align 4
LNEON_outer8:
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
vzip.16 d28,d8
vadd.u64 d12,d12,d10
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
subs r9,r9,#1
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
bne LNEON_outer8
vadd.u64 d12,d12,d10
mov r7,sp
vshr.u64 d10,d12,#16
mov r8,r5
vadd.u64 d13,d13,d10
add r6,sp,#96
vshr.u64 d10,d13,#16
vzip.16 d12,d13
b LNEON_tail_entry
.align 4
LNEON_8n:
veor q6,q6,q6
sub r7,sp,#128
veor q7,q7,q7
sub r7,r7,r5,lsl#4
veor q8,q8,q8
and r7,r7,#-64
veor q9,q9,q9
mov sp,r7 @ alloca
veor q10,q10,q10
add r7,r7,#256
veor q11,q11,q11
sub r8,r5,#8
veor q12,q12,q12
veor q13,q13,q13
LNEON_8n_init:
vst1.64 {q6,q7},[r7,:256]!
subs r8,r8,#8
vst1.64 {q8,q9},[r7,:256]!
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12,q13},[r7,:256]!
bne LNEON_8n_init
add r6,sp,#256
vld1.32 {d0,d1,d2,d3},[r1]!
add r10,sp,#8
vld1.32 {d30[0]},[r4,:32]
mov r9,r5
b LNEON_8n_outer
.align 4
LNEON_8n_outer:
vld1.32 {d28[0]},[r2,:32]! @ *b++
veor d8,d8,d8
vzip.16 d28,d8
add r7,sp,#128
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
vmlal.u32 q10,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q11,d28,d2[1]
vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q6,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q7,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q8,d29,d5[0]
vshr.u64 d12,d12,#16
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vadd.u64 d12,d12,d13
vmlal.u32 q11,d29,d6[1]
vshr.u64 d12,d12,#16
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vadd.u64 d14,d14,d12
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0]
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]!
vmlal.u32 q8,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q9,d28,d1[0]
vshl.i64 d29,d15,#16
vmlal.u32 q10,d28,d1[1]
vadd.u64 d29,d29,d14
vmlal.u32 q11,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q12,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1]
vmlal.u32 q13,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q7,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q8,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q9,d29,d5[0]
vshr.u64 d14,d14,#16
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vadd.u64 d14,d14,d15
vmlal.u32 q12,d29,d6[1]
vshr.u64 d14,d14,#16
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vadd.u64 d16,d16,d14
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1]
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]!
vmlal.u32 q9,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q10,d28,d1[0]
vshl.i64 d29,d17,#16
vmlal.u32 q11,d28,d1[1]
vadd.u64 d29,d29,d16
vmlal.u32 q12,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q13,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2]
vmlal.u32 q6,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q8,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q9,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q10,d29,d5[0]
vshr.u64 d16,d16,#16
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vadd.u64 d16,d16,d17
vmlal.u32 q13,d29,d6[1]
vshr.u64 d16,d16,#16
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vadd.u64 d18,d18,d16
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2]
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]!
vmlal.u32 q10,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q11,d28,d1[0]
vshl.i64 d29,d19,#16
vmlal.u32 q12,d28,d1[1]
vadd.u64 d29,d29,d18
vmlal.u32 q13,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q6,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3]
vmlal.u32 q7,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q9,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q10,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q11,d29,d5[0]
vshr.u64 d18,d18,#16
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vadd.u64 d18,d18,d19
vmlal.u32 q6,d29,d6[1]
vshr.u64 d18,d18,#16
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vadd.u64 d20,d20,d18
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3]
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]!
vmlal.u32 q11,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q12,d28,d1[0]
vshl.i64 d29,d21,#16
vmlal.u32 q13,d28,d1[1]
vadd.u64 d29,d29,d20
vmlal.u32 q6,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q7,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4]
vmlal.u32 q8,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q10,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q11,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q12,d29,d5[0]
vshr.u64 d20,d20,#16
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vadd.u64 d20,d20,d21
vmlal.u32 q7,d29,d6[1]
vshr.u64 d20,d20,#16
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vadd.u64 d22,d22,d20
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4]
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]!
vmlal.u32 q12,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q13,d28,d1[0]
vshl.i64 d29,d23,#16
vmlal.u32 q6,d28,d1[1]
vadd.u64 d29,d29,d22
vmlal.u32 q7,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q8,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5]
vmlal.u32 q9,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q11,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q12,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q13,d29,d5[0]
vshr.u64 d22,d22,#16
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vadd.u64 d22,d22,d23
vmlal.u32 q8,d29,d6[1]
vshr.u64 d22,d22,#16
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vadd.u64 d24,d24,d22
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5]
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]!
vmlal.u32 q13,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q6,d28,d1[0]
vshl.i64 d29,d25,#16
vmlal.u32 q7,d28,d1[1]
vadd.u64 d29,d29,d24
vmlal.u32 q8,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q9,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6]
vmlal.u32 q10,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q12,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q13,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q6,d29,d5[0]
vshr.u64 d24,d24,#16
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vadd.u64 d24,d24,d25
vmlal.u32 q9,d29,d6[1]
vshr.u64 d24,d24,#16
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vadd.u64 d26,d26,d24
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6]
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]!
vmlal.u32 q6,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q7,d28,d1[0]
vshl.i64 d29,d27,#16
vmlal.u32 q8,d28,d1[1]
vadd.u64 d29,d29,d26
vmlal.u32 q9,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7]
vmlal.u32 q11,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q12,d28,d3[1]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q13,d29,d4[0]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q6,d29,d4[1]
vmlal.u32 q7,d29,d5[0]
vshr.u64 d26,d26,#16
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vadd.u64 d26,d26,d27
vmlal.u32 q10,d29,d6[1]
vshr.u64 d26,d26,#16
vmlal.u32 q11,d29,d7[0]
vmlal.u32 q12,d29,d7[1]
vadd.u64 d12,d12,d26
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
add r10,sp,#8 @ rewind
sub r8,r5,#8
b LNEON_8n_inner
.align 4
LNEON_8n_inner:
subs r8,r8,#8
vmlal.u32 q6,d28,d0[0]
vld1.64 {q13},[r6,:128]
vmlal.u32 q7,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0]
vmlal.u32 q8,d28,d1[0]
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q9,d28,d1[1]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmlal.u32 q11,d29,d6[1]
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vst1.64 {q6},[r7,:128]!
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]
vmlal.u32 q8,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1]
vmlal.u32 q9,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d1[1]
vmlal.u32 q11,d28,d2[0]
vmlal.u32 q12,d28,d2[1]
vmlal.u32 q13,d28,d3[0]
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2]
vmlal.u32 q7,d29,d4[0]
vmlal.u32 q8,d29,d4[1]
vmlal.u32 q9,d29,d5[0]
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vmlal.u32 q12,d29,d6[1]
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vst1.64 {q7},[r7,:128]!
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]
vmlal.u32 q9,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2]
vmlal.u32 q10,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q11,d28,d1[1]
vmlal.u32 q12,d28,d2[0]
vmlal.u32 q13,d28,d2[1]
vmlal.u32 q6,d28,d3[0]
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3]
vmlal.u32 q8,d29,d4[0]
vmlal.u32 q9,d29,d4[1]
vmlal.u32 q10,d29,d5[0]
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vmlal.u32 q13,d29,d6[1]
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vst1.64 {q8},[r7,:128]!
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]
vmlal.u32 q10,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3]
vmlal.u32 q11,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q12,d28,d1[1]
vmlal.u32 q13,d28,d2[0]
vmlal.u32 q6,d28,d2[1]
vmlal.u32 q7,d28,d3[0]
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4]
vmlal.u32 q9,d29,d4[0]
vmlal.u32 q10,d29,d4[1]
vmlal.u32 q11,d29,d5[0]
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vmlal.u32 q6,d29,d6[1]
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vst1.64 {q9},[r7,:128]!
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]
vmlal.u32 q11,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4]
vmlal.u32 q12,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q13,d28,d1[1]
vmlal.u32 q6,d28,d2[0]
vmlal.u32 q7,d28,d2[1]
vmlal.u32 q8,d28,d3[0]
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5]
vmlal.u32 q10,d29,d4[0]
vmlal.u32 q11,d29,d4[1]
vmlal.u32 q12,d29,d5[0]
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vmlal.u32 q7,d29,d6[1]
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vst1.64 {q10},[r7,:128]!
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]
vmlal.u32 q12,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5]
vmlal.u32 q13,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q6,d28,d1[1]
vmlal.u32 q7,d28,d2[0]
vmlal.u32 q8,d28,d2[1]
vmlal.u32 q9,d28,d3[0]
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6]
vmlal.u32 q11,d29,d4[0]
vmlal.u32 q12,d29,d4[1]
vmlal.u32 q13,d29,d5[0]
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vmlal.u32 q8,d29,d6[1]
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vst1.64 {q11},[r7,:128]!
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]
vmlal.u32 q13,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6]
vmlal.u32 q6,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q7,d28,d1[1]
vmlal.u32 q8,d28,d2[0]
vmlal.u32 q9,d28,d2[1]
vmlal.u32 q10,d28,d3[0]
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7]
vmlal.u32 q12,d29,d4[0]
vmlal.u32 q13,d29,d4[1]
vmlal.u32 q6,d29,d5[0]
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vmlal.u32 q9,d29,d6[1]
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vst1.64 {q12},[r7,:128]!
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]
vmlal.u32 q6,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7]
vmlal.u32 q7,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q8,d28,d1[1]
vmlal.u32 q9,d28,d2[0]
vmlal.u32 q10,d28,d2[1]
vmlal.u32 q11,d28,d3[0]
vmlal.u32 q12,d28,d3[1]
it eq
subeq r1,r1,r5,lsl#2 @ rewind
vmlal.u32 q13,d29,d4[0]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q6,d29,d4[1]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q7,d29,d5[0]
add r10,sp,#8 @ rewind
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vmlal.u32 q10,d29,d6[1]
vmlal.u32 q11,d29,d7[0]
vst1.64 {q13},[r7,:128]!
vmlal.u32 q12,d29,d7[1]
bne LNEON_8n_inner
add r6,sp,#128
vst1.64 {q6,q7},[r7,:256]!
veor q2,q2,q2 @ d4-d5
vst1.64 {q8,q9},[r7,:256]!
veor q3,q3,q3 @ d6-d7
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12},[r7,:128]
subs r9,r9,#8
vld1.64 {q6,q7},[r6,:256]!
vld1.64 {q8,q9},[r6,:256]!
vld1.64 {q10,q11},[r6,:256]!
vld1.64 {q12,q13},[r6,:256]!
itt ne
subne r3,r3,r5,lsl#2 @ rewind
bne LNEON_8n_outer
add r7,sp,#128
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
vshr.u64 d10,d12,#16
vst1.64 {q2,q3},[sp,:256]!
vadd.u64 d13,d13,d10
vst1.64 {q2,q3}, [sp,:256]!
vshr.u64 d10,d13,#16
vst1.64 {q2,q3}, [sp,:256]!
vzip.16 d12,d13
mov r8,r5
b LNEON_tail_entry
.align 4
LNEON_tail:
vadd.u64 d12,d12,d10
vshr.u64 d10,d12,#16
vld1.64 {q8,q9}, [r6, :256]!
vadd.u64 d13,d13,d10
vld1.64 {q10,q11}, [r6, :256]!
vshr.u64 d10,d13,#16
vld1.64 {q12,q13}, [r6, :256]!
vzip.16 d12,d13
LNEON_tail_entry:
vadd.u64 d14,d14,d10
vst1.32 {d12[0]}, [r7, :32]!
vshr.u64 d10,d14,#16
vadd.u64 d15,d15,d10
vshr.u64 d10,d15,#16
vzip.16 d14,d15
vadd.u64 d16,d16,d10
vst1.32 {d14[0]}, [r7, :32]!
vshr.u64 d10,d16,#16
vadd.u64 d17,d17,d10
vshr.u64 d10,d17,#16
vzip.16 d16,d17
vadd.u64 d18,d18,d10
vst1.32 {d16[0]}, [r7, :32]!
vshr.u64 d10,d18,#16
vadd.u64 d19,d19,d10
vshr.u64 d10,d19,#16
vzip.16 d18,d19
vadd.u64 d20,d20,d10
vst1.32 {d18[0]}, [r7, :32]!
vshr.u64 d10,d20,#16
vadd.u64 d21,d21,d10
vshr.u64 d10,d21,#16
vzip.16 d20,d21
vadd.u64 d22,d22,d10
vst1.32 {d20[0]}, [r7, :32]!
vshr.u64 d10,d22,#16
vadd.u64 d23,d23,d10
vshr.u64 d10,d23,#16
vzip.16 d22,d23
vadd.u64 d24,d24,d10
vst1.32 {d22[0]}, [r7, :32]!
vshr.u64 d10,d24,#16
vadd.u64 d25,d25,d10
vshr.u64 d10,d25,#16
vzip.16 d24,d25
vadd.u64 d26,d26,d10
vst1.32 {d24[0]}, [r7, :32]!
vshr.u64 d10,d26,#16
vadd.u64 d27,d27,d10
vshr.u64 d10,d27,#16
vzip.16 d26,d27
vld1.64 {q6,q7}, [r6, :256]!
subs r8,r8,#8
vst1.32 {d26[0]}, [r7, :32]!
bne LNEON_tail
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
sub r3,r3,r5,lsl#2 @ rewind r3
subs r1,sp,#0 @ clear carry flag
add r2,sp,r5,lsl#2
LNEON_sub:
ldmia r1!, {r4,r5,r6,r7}
ldmia r3!, {r8,r9,r10,r11}
sbcs r8, r4,r8
sbcs r9, r5,r9
sbcs r10,r6,r10
sbcs r11,r7,r11
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne LNEON_sub
ldr r10, [r1] @ load top-most bit
mov r11,sp
veor q0,q0,q0
sub r11,r2,r11 @ this is num*4
veor q1,q1,q1
mov r1,sp
sub r0,r0,r11 @ rewind r0
mov r3,r2 @ second 3/4th of frame
sbcs r10,r10,#0 @ result is carry flag
LNEON_copy_n_zap:
ldmia r1!, {r4,r5,r6,r7}
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r3,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
ldmia r1, {r4,r5,r6,r7}
stmia r0!, {r8,r9,r10,r11}
sub r1,r1,#16
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r1,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne LNEON_copy_n_zap
mov sp,ip
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
bx lr @ bx lr
#endif
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#if __ARM_MAX_ARCH__>=7
.comm _OPENSSL_armcap_P,4
.non_lazy_symbol_pointer
OPENSSL_armcap_P:
.indirect_symbol _OPENSSL_armcap_P
.long 0
.private_extern _OPENSSL_armcap_P
#endif
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,600 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
@ instructions are in aesv8-armx.pl.)
.text
#if defined(__thumb2__) || defined(__clang__)
.syntax unified
#define ldrplb ldrbpl
#define ldrneb ldrbne
#endif
#if defined(__thumb2__)
.thumb
#else
.code 32
#endif
.align 5
rem_4bit:
.short 0x0000,0x1C20,0x3840,0x2460
.short 0x7080,0x6CA0,0x48C0,0x54E0
.short 0xE100,0xFD20,0xD940,0xC560
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
#ifdef __thumb2__
.thumb_func rem_4bit_get
#endif
rem_4bit_get:
#if defined(__thumb2__)
adr r2,rem_4bit
#else
sub r2,pc,#8+32 @ &rem_4bit
#endif
b Lrem_4bit_got
nop
nop
.globl _gcm_ghash_4bit
.private_extern _gcm_ghash_4bit
#ifdef __thumb2__
.thumb_func _gcm_ghash_4bit
#endif
.align 4
_gcm_ghash_4bit:
#if defined(__thumb2__)
adr r12,rem_4bit
#else
sub r12,pc,#8+48 @ &rem_4bit
#endif
add r3,r2,r3 @ r3 to point at the end
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
ldrb r12,[r2,#15]
ldrb r14,[r0,#15]
Louter:
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
add r11,r1,r14
ldrb r12,[r2,#14]
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[sp,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
ldrb r14,[r0,#14]
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
eor r7,r7,r8,lsl#16
Linner:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[sp,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r2,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r8,[r0,r3]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r9,[sp,r14]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
#ifdef __thumb2__
it pl
#endif
eorpl r12,r12,r8
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
bpl Linner
ldr r3,[sp,#32] @ re-load r3/end
add r2,r2,#16
mov r14,r4
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
cmp r2,r3
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#ifdef __thumb2__
it ne
#endif
ldrneb r12,[r2,#15]
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
bne Louter
add sp,sp,#36
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.globl _gcm_gmult_4bit
.private_extern _gcm_gmult_4bit
#ifdef __thumb2__
.thumb_func _gcm_gmult_4bit
#endif
_gcm_gmult_4bit:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
ldrb r12,[r0,#15]
b rem_4bit_get
Lrem_4bit_got:
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
ldrb r12,[r0,#14]
add r11,r1,r14
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
and r14,r12,#0xf0
eor r7,r7,r8,lsl#16
and r12,r12,#0x0f
Loop:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[r2,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r0,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
bpl Loop
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
#if __ARM_MAX_ARCH__>=7
.globl _gcm_init_neon
.private_extern _gcm_init_neon
#ifdef __thumb2__
.thumb_func _gcm_init_neon
#endif
.align 4
_gcm_init_neon:
vld1.64 d7,[r1]! @ load H
vmov.i8 q8,#0xe1
vld1.64 d6,[r1]
vshl.i64 d17,#57
vshr.u64 d16,#63 @ t0=0xc2....01
vdup.8 q9,d7[7]
vshr.u64 d26,d6,#63
vshr.s8 q9,#7 @ broadcast carry bit
vshl.i64 q3,q3,#1
vand q8,q8,q9
vorr d7,d26 @ H<<<=1
veor q3,q3,q8 @ twisted H
vstmia r0,{q3}
bx lr @ bx lr
.globl _gcm_gmult_neon
.private_extern _gcm_gmult_neon
#ifdef __thumb2__
.thumb_func _gcm_gmult_neon
#endif
.align 4
_gcm_gmult_neon:
vld1.64 d7,[r0]! @ load Xi
vld1.64 d6,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
mov r3,#16
b Lgmult_neon
.globl _gcm_ghash_neon
.private_extern _gcm_ghash_neon
#ifdef __thumb2__
.thumb_func _gcm_ghash_neon
#endif
.align 4
_gcm_ghash_neon:
vld1.64 d1,[r0]! @ load Xi
vld1.64 d0,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
Loop_neon:
vld1.64 d7,[r2]! @ load inp
vld1.64 d6,[r2]!
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
veor q3,q0 @ inp^=Xi
Lgmult_neon:
vext.8 d16, d26, d26, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d0, d6, d6, #1 @ B1
vmull.p8 q0, d26, d0 @ E = A*B1
vext.8 d18, d26, d26, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d26, d22 @ G = A*B2
vext.8 d20, d26, d26, #3 @ A3
veor q8, q8, q0 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d0, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q0, d26, d0 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d26, d22 @ K = A*B4
veor q10, q10, q0 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q0, d26, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q0, q0, q8
veor q0, q0, q10
veor d6,d6,d7 @ Karatsuba pre-processing
vext.8 d16, d28, d28, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d2, d6, d6, #1 @ B1
vmull.p8 q1, d28, d2 @ E = A*B1
vext.8 d18, d28, d28, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d28, d22 @ G = A*B2
vext.8 d20, d28, d28, #3 @ A3
veor q8, q8, q1 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d2, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q1, d28, d2 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d28, d22 @ K = A*B4
veor q10, q10, q1 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q1, d28, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q1, q1, q8
veor q1, q1, q10
vext.8 d16, d27, d27, #1 @ A1
vmull.p8 q8, d16, d7 @ F = A1*B
vext.8 d4, d7, d7, #1 @ B1
vmull.p8 q2, d27, d4 @ E = A*B1
vext.8 d18, d27, d27, #2 @ A2
vmull.p8 q9, d18, d7 @ H = A2*B
vext.8 d22, d7, d7, #2 @ B2
vmull.p8 q11, d27, d22 @ G = A*B2
vext.8 d20, d27, d27, #3 @ A3
veor q8, q8, q2 @ L = E + F
vmull.p8 q10, d20, d7 @ J = A3*B
vext.8 d4, d7, d7, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q2, d27, d4 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d7, d7, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d27, d22 @ K = A*B4
veor q10, q10, q2 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q2, d27, d7 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q2, q2, q8
veor q2, q2, q10
veor q1,q1,q0 @ Karatsuba post-processing
veor q1,q1,q2
veor d1,d1,d2
veor d4,d4,d3 @ Xh|Xl - 256-bit result
@ equivalent of reduction_avx from ghash-x86_64.pl
vshl.i64 q9,q0,#57 @ 1st phase
vshl.i64 q10,q0,#62
veor q10,q10,q9 @
vshl.i64 q9,q0,#63
veor q10, q10, q9 @
veor d1,d1,d20 @
veor d4,d4,d21
vshr.u64 q10,q0,#1 @ 2nd phase
veor q2,q2,q0
veor q0,q0,q10 @
vshr.u64 q10,q10,#6
vshr.u64 q0,q0,#1 @
veor q0,q0,q2 @
veor q0,q0,q10 @
subs r3,#16
bne Loop_neon
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
sub r0,#16
vst1.64 d1,[r0]! @ write out Xi
vst1.64 d0,[r0]
bx lr @ bx lr
#endif
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM

View File

@ -0,0 +1,256 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.code 32
#undef __thumb2__
.globl _gcm_init_v8
.private_extern _gcm_init_v8
#ifdef __thumb2__
.thumb_func _gcm_init_v8
#endif
.align 4
_gcm_init_v8:
vld1.64 {q9},[r1] @ load input H
vmov.i8 q11,#0xe1
vshl.i64 q11,q11,#57 @ 0xc2.0
vext.8 q3,q9,q9,#8
vshr.u64 q10,q11,#63
vdup.32 q9,d18[1]
vext.8 q8,q10,q11,#8 @ t0=0xc2....01
vshr.u64 q10,q3,#63
vshr.s32 q9,q9,#31 @ broadcast carry bit
vand q10,q10,q8
vshl.i64 q3,q3,#1
vext.8 q10,q10,q10,#8
vand q8,q8,q9
vorr q3,q3,q10 @ H<<<=1
veor q12,q3,q8 @ twisted H
vst1.64 {q12},[r0]! @ store Htable[0]
@ calculate H^2
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12
veor q8,q8,q12
.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12
.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q14,q0,q10
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
veor q9,q9,q14
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
bx lr
.globl _gcm_gmult_v8
.private_extern _gcm_gmult_v8
#ifdef __thumb2__
.thumb_func _gcm_gmult_v8
#endif
.align 4
_gcm_gmult_v8:
vld1.64 {q9},[r0] @ load Xi
vmov.i8 q11,#0xe1
vld1.64 {q12,q13},[r1] @ load twisted H, ...
vshl.u64 q11,q11,#57
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q3,q9,q9,#8
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
bx lr
.globl _gcm_ghash_v8
.private_extern _gcm_ghash_v8
#ifdef __thumb2__
.thumb_func _gcm_ghash_v8
#endif
.align 4
_gcm_ghash_v8:
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
vld1.64 {q0},[r0] @ load [rotated] Xi
@ "[rotated]" means that
@ loaded value would have
@ to be rotated in order to
@ make it appear as in
@ algorithm specification
subs r3,r3,#32 @ see if r3 is 32 or larger
mov r12,#16 @ r12 is used as post-
@ increment for input pointer;
@ as loop is modulo-scheduled
@ r12 is zeroed just in time
@ to preclude overstepping
@ inp[len], which means that
@ last block[s] are actually
@ loaded twice, but last
@ copy is not processed
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
vmov.i8 q11,#0xe1
vld1.64 {q14},[r1]
moveq r12,#0 @ is it time to zero r12?
vext.8 q0,q0,q0,#8 @ rotate Xi
vld1.64 {q8},[r2]! @ load [rotated] I[0]
vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant
#ifndef __ARMEB__
vrev64.8 q8,q8
vrev64.8 q0,q0
#endif
vext.8 q3,q8,q8,#8 @ rotate I[0]
blo Lodd_tail_v8 @ r3 was less than 32
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q7,q9,q9,#8
veor q3,q3,q0 @ I[i]^=Xi
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q9,q9,q7 @ Karatsuba pre-processing
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
b Loop_mod2x_v8
.align 4
Loop_mod2x_v8:
vext.8 q10,q3,q3,#8
subs r3,r3,#32 @ is there more data?
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
movlo r12,#0 @ is it time to zero r12?
.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9
veor q10,q10,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
veor q0,q0,q4 @ accumulate
.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
veor q2,q2,q6
moveq r12,#0 @ is it time to zero r12?
veor q1,q1,q5
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3]
#ifndef __ARMEB__
vrev64.8 q8,q8
#endif
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
vext.8 q7,q9,q9,#8
vext.8 q3,q8,q8,#8
veor q0,q1,q10
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q3,q3,q2 @ accumulate q3 early
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q3,q3,q10
veor q9,q9,q7 @ Karatsuba pre-processing
veor q3,q3,q0
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
bhs Loop_mod2x_v8 @ there was at least 32 more bytes
veor q2,q2,q10
vext.8 q3,q8,q8,#8 @ re-construct q3
adds r3,r3,#32 @ re-construct r3
veor q0,q0,q2 @ re-construct q0
beq Ldone_v8 @ is r3 zero?
Lodd_tail_v8:
vext.8 q10,q0,q0,#8
veor q3,q3,q0 @ inp^=Xi
veor q9,q8,q10 @ q9 is rotated inp^Xi
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
Ldone_v8:
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
bx lr
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,377 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.syntax unified
.text
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@ with |argv|, then saves the callee-saved registers into |state|. It returns
@ the result of |func|. The |unwind| argument is unused.
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
@ const uint32_t *argv, size_t argc,
@ int unwind);
.globl _abi_test_trampoline
.private_extern _abi_test_trampoline
.align 4
_abi_test_trampoline:
Labi_test_trampoline_begin:
@ Save parameters and all callee-saved registers. For convenience, we
@ save r9 on iOS even though it's volatile.
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
sub sp, sp, #28
@ Every register in AAPCS is either non-volatile or a parameter (except
@ r9 on iOS), so this code, by the actual call, loses all its scratch
@ registers. First fill in stack parameters while there are registers
@ to spare.
cmp r3, #4
bls Lstack_args_done
mov r4, sp @ r4 is the output pointer.
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
add r2, r2, #16 @ Skip four arguments.
Lstack_args_loop:
ldr r6, [r2], #4
cmp r2, r5
str r6, [r4], #4
bne Lstack_args_loop
Lstack_args_done:
@ Load registers from |r1|.
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Load register parameters. This uses up our remaining registers, so we
@ repurpose lr as scratch space.
ldr r3, [sp, #40] @ Reload argc.
ldr lr, [sp, #36] @ Load argv into lr.
cmp r3, #3
bhi Larg_r3
beq Larg_r2
cmp r3, #1
bhi Larg_r1
beq Larg_r0
b Largs_done
Larg_r3:
ldr r3, [lr, #12] @ argv[3]
Larg_r2:
ldr r2, [lr, #8] @ argv[2]
Larg_r1:
ldr r1, [lr, #4] @ argv[1]
Larg_r0:
ldr r0, [lr] @ argv[0]
Largs_done:
@ With every other register in use, load the function pointer into lr
@ and call the function.
ldr lr, [sp, #28]
blx lr
@ r1-r3 are free for use again. The trampoline only supports
@ single-return functions. Pass r4-r11 to the caller.
ldr r1, [sp, #32]
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Unwind the stack and restore registers.
add sp, sp, #44 @ 44 = 28+16
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
bx lr
.globl _abi_test_clobber_r0
.private_extern _abi_test_clobber_r0
.align 4
_abi_test_clobber_r0:
mov r0, #0
bx lr
.globl _abi_test_clobber_r1
.private_extern _abi_test_clobber_r1
.align 4
_abi_test_clobber_r1:
mov r1, #0
bx lr
.globl _abi_test_clobber_r2
.private_extern _abi_test_clobber_r2
.align 4
_abi_test_clobber_r2:
mov r2, #0
bx lr
.globl _abi_test_clobber_r3
.private_extern _abi_test_clobber_r3
.align 4
_abi_test_clobber_r3:
mov r3, #0
bx lr
.globl _abi_test_clobber_r4
.private_extern _abi_test_clobber_r4
.align 4
_abi_test_clobber_r4:
mov r4, #0
bx lr
.globl _abi_test_clobber_r5
.private_extern _abi_test_clobber_r5
.align 4
_abi_test_clobber_r5:
mov r5, #0
bx lr
.globl _abi_test_clobber_r6
.private_extern _abi_test_clobber_r6
.align 4
_abi_test_clobber_r6:
mov r6, #0
bx lr
.globl _abi_test_clobber_r7
.private_extern _abi_test_clobber_r7
.align 4
_abi_test_clobber_r7:
mov r7, #0
bx lr
.globl _abi_test_clobber_r8
.private_extern _abi_test_clobber_r8
.align 4
_abi_test_clobber_r8:
mov r8, #0
bx lr
.globl _abi_test_clobber_r9
.private_extern _abi_test_clobber_r9
.align 4
_abi_test_clobber_r9:
mov r9, #0
bx lr
.globl _abi_test_clobber_r10
.private_extern _abi_test_clobber_r10
.align 4
_abi_test_clobber_r10:
mov r10, #0
bx lr
.globl _abi_test_clobber_r11
.private_extern _abi_test_clobber_r11
.align 4
_abi_test_clobber_r11:
mov r11, #0
bx lr
.globl _abi_test_clobber_r12
.private_extern _abi_test_clobber_r12
.align 4
_abi_test_clobber_r12:
mov r12, #0
bx lr
.globl _abi_test_clobber_d0
.private_extern _abi_test_clobber_d0
.align 4
_abi_test_clobber_d0:
mov r0, #0
vmov s0, r0
vmov s1, r0
bx lr
.globl _abi_test_clobber_d1
.private_extern _abi_test_clobber_d1
.align 4
_abi_test_clobber_d1:
mov r0, #0
vmov s2, r0
vmov s3, r0
bx lr
.globl _abi_test_clobber_d2
.private_extern _abi_test_clobber_d2
.align 4
_abi_test_clobber_d2:
mov r0, #0
vmov s4, r0
vmov s5, r0
bx lr
.globl _abi_test_clobber_d3
.private_extern _abi_test_clobber_d3
.align 4
_abi_test_clobber_d3:
mov r0, #0
vmov s6, r0
vmov s7, r0
bx lr
.globl _abi_test_clobber_d4
.private_extern _abi_test_clobber_d4
.align 4
_abi_test_clobber_d4:
mov r0, #0
vmov s8, r0
vmov s9, r0
bx lr
.globl _abi_test_clobber_d5
.private_extern _abi_test_clobber_d5
.align 4
_abi_test_clobber_d5:
mov r0, #0
vmov s10, r0
vmov s11, r0
bx lr
.globl _abi_test_clobber_d6
.private_extern _abi_test_clobber_d6
.align 4
_abi_test_clobber_d6:
mov r0, #0
vmov s12, r0
vmov s13, r0
bx lr
.globl _abi_test_clobber_d7
.private_extern _abi_test_clobber_d7
.align 4
_abi_test_clobber_d7:
mov r0, #0
vmov s14, r0
vmov s15, r0
bx lr
.globl _abi_test_clobber_d8
.private_extern _abi_test_clobber_d8
.align 4
_abi_test_clobber_d8:
mov r0, #0
vmov s16, r0
vmov s17, r0
bx lr
.globl _abi_test_clobber_d9
.private_extern _abi_test_clobber_d9
.align 4
_abi_test_clobber_d9:
mov r0, #0
vmov s18, r0
vmov s19, r0
bx lr
.globl _abi_test_clobber_d10
.private_extern _abi_test_clobber_d10
.align 4
_abi_test_clobber_d10:
mov r0, #0
vmov s20, r0
vmov s21, r0
bx lr
.globl _abi_test_clobber_d11
.private_extern _abi_test_clobber_d11
.align 4
_abi_test_clobber_d11:
mov r0, #0
vmov s22, r0
vmov s23, r0
bx lr
.globl _abi_test_clobber_d12
.private_extern _abi_test_clobber_d12
.align 4
_abi_test_clobber_d12:
mov r0, #0
vmov s24, r0
vmov s25, r0
bx lr
.globl _abi_test_clobber_d13
.private_extern _abi_test_clobber_d13
.align 4
_abi_test_clobber_d13:
mov r0, #0
vmov s26, r0
vmov s27, r0
bx lr
.globl _abi_test_clobber_d14
.private_extern _abi_test_clobber_d14
.align 4
_abi_test_clobber_d14:
mov r0, #0
vmov s28, r0
vmov s29, r0
bx lr
.globl _abi_test_clobber_d15
.private_extern _abi_test_clobber_d15
.align 4
_abi_test_clobber_d15:
mov r0, #0
vmov s30, r0
vmov s31, r0
bx lr
#endif // !OPENSSL_NO_ASM

View File

@ -1,39 +1,48 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.section .rodata
.align 5
.Lsigma:
.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral
.Lone:
.long 1,0,0,0
.LOPENSSL_armcap_P:
#ifdef __ILP32__
.long OPENSSL_armcap_P-.
#else
.quad OPENSSL_armcap_P-.
#endif
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.text
.globl ChaCha20_ctr32
.hidden ChaCha20_ctr32
.type ChaCha20_ctr32,%function
.align 5
ChaCha20_ctr32:
cbz x2,.Labort
adr x5,.LOPENSSL_armcap_P
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x5,:pg_hi21_nc:OPENSSL_armcap_P
#else
adrp x5,OPENSSL_armcap_P
#endif
cmp x2,#192
b.lo .Lshort
#ifdef __ILP32__
ldrsw x6,[x5]
#else
ldr x6,[x5]
#endif
ldr w17,[x6,x5]
ldr w17,[x5,:lo12:OPENSSL_armcap_P]
tst w17,#ARMV7_NEON
b.ne ChaCha20_neon
@ -41,7 +50,8 @@ ChaCha20_ctr32:
stp x29,x30,[sp,#-96]!
add x29,sp,#0
adr x5,.Lsigma
adrp x5,.Lsigma
add x5,x5,:lo12:.Lsigma
stp x19,x20,[sp,#16]
stp x21,x22,[sp,#32]
stp x23,x24,[sp,#48]
@ -314,7 +324,8 @@ ChaCha20_neon:
stp x29,x30,[sp,#-96]!
add x29,sp,#0
adr x5,.Lsigma
adrp x5,.Lsigma
add x5,x5,:lo12:.Lsigma
stp x19,x20,[sp,#16]
stp x21,x22,[sp,#32]
stp x23,x24,[sp,#48]
@ -807,7 +818,8 @@ ChaCha20_512_neon:
stp x29,x30,[sp,#-96]!
add x29,sp,#0
adr x5,.Lsigma
adrp x5,.Lsigma
add x5,x5,:lo12:.Lsigma
stp x19,x20,[sp,#16]
stp x21,x22,[sp,#32]
stp x23,x24,[sp,#48]
@ -1969,3 +1981,5 @@ ChaCha20_512_neon:
ret
.size ChaCha20_512_neon,.-ChaCha20_512_neon
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,17 +1,32 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
#if !defined(__clang__) || defined(BORINGSSL_CLANG_SUPPORTS_DOT_ARCH)
.arch armv8-a+crypto
#endif
.section .rodata
.align 5
.Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl aes_hw_set_encrypt_key
.hidden aes_hw_set_encrypt_key
.type aes_hw_set_encrypt_key,%function
@ -33,7 +48,8 @@ aes_hw_set_encrypt_key:
tst w1,#0x3f
b.ne .Lenc_key_abort
adr x3,.Lrcon
adrp x3,.Lrcon
add x3,x3,:lo12:.Lrcon
cmp w1,#192
eor v0.16b,v0.16b,v0.16b
@ -755,3 +771,5 @@ aes_hw_ctr32_encrypt_blocks:
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,4 +1,18 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl bn_mul_mont
@ -1405,3 +1419,5 @@ __bn_mul4x_mont:
.align 2
.align 4
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -0,0 +1,341 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl gcm_init_neon
.hidden gcm_init_neon
.type gcm_init_neon,%function
.align 4
gcm_init_neon:
// This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1
shl v19.2d, v19.2d, #57 // 0xc2.0
ext v3.16b, v17.16b, v17.16b, #8
ushr v18.2d, v19.2d, #63
dup v17.4s, v17.s[1]
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
ushr v18.2d, v3.2d, #63
sshr v17.4s, v17.4s, #31 // broadcast carry bit
and v18.16b, v18.16b, v16.16b
shl v3.2d, v3.2d, #1
ext v18.16b, v18.16b, v18.16b, #8
and v16.16b, v16.16b, v17.16b
orr v3.16b, v3.16b, v18.16b // H<<<=1
eor v5.16b, v3.16b, v16.16b // twisted H
st1 {v5.2d}, [x0] // store Htable[0]
ret
.size gcm_init_neon,.-gcm_init_neon
.globl gcm_gmult_neon
.hidden gcm_gmult_neon
.type gcm_gmult_neon,%function
.align 4
gcm_gmult_neon:
ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, .Lmasks // load constants
add x9, x9, :lo12:.Lmasks
ld1 {v24.2d, v25.2d}, [x9]
rev64 v3.16b, v3.16b // byteswap Xi
ext v3.16b, v3.16b, v3.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
mov x3, #16
b .Lgmult_neon
.size gcm_gmult_neon,.-gcm_gmult_neon
.globl gcm_ghash_neon
.hidden gcm_ghash_neon
.type gcm_ghash_neon,%function
.align 4
gcm_ghash_neon:
ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, .Lmasks // load constants
add x9, x9, :lo12:.Lmasks
ld1 {v24.2d, v25.2d}, [x9]
rev64 v0.16b, v0.16b // byteswap Xi
ext v0.16b, v0.16b, v0.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
.Loop_neon:
ld1 {v3.16b}, [x2], #16 // load inp
rev64 v3.16b, v3.16b // byteswap inp
ext v3.16b, v3.16b, v3.16b, #8
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
.Lgmult_neon:
// Split the input into v3 and v4. (The upper halves are unused,
// so it is okay to leave them alone.)
ins v4.d[0], v3.d[1]
ext v16.8b, v5.8b, v5.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v0.8b, v3.8b, v3.8b, #1 // B1
pmull v0.8h, v5.8b, v0.8b // E = A*B1
ext v17.8b, v5.8b, v5.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v5.8b, v19.8b // G = A*B2
ext v18.8b, v5.8b, v5.8b, #3 // A3
eor v16.16b, v16.16b, v0.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v0.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v0.8h, v5.8b, v0.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v0.16b // N = I + J
pmull v19.8h, v5.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v0.8h, v5.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v0.16b, v0.16b, v16.16b
eor v0.16b, v0.16b, v18.16b
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
ext v16.8b, v7.8b, v7.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v1.8b, v3.8b, v3.8b, #1 // B1
pmull v1.8h, v7.8b, v1.8b // E = A*B1
ext v17.8b, v7.8b, v7.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v7.8b, v19.8b // G = A*B2
ext v18.8b, v7.8b, v7.8b, #3 // A3
eor v16.16b, v16.16b, v1.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v1.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v1.8h, v7.8b, v1.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v1.16b // N = I + J
pmull v19.8h, v7.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v1.8h, v7.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v1.16b, v1.16b, v16.16b
eor v1.16b, v1.16b, v18.16b
ext v16.8b, v6.8b, v6.8b, #1 // A1
pmull v16.8h, v16.8b, v4.8b // F = A1*B
ext v2.8b, v4.8b, v4.8b, #1 // B1
pmull v2.8h, v6.8b, v2.8b // E = A*B1
ext v17.8b, v6.8b, v6.8b, #2 // A2
pmull v17.8h, v17.8b, v4.8b // H = A2*B
ext v19.8b, v4.8b, v4.8b, #2 // B2
pmull v19.8h, v6.8b, v19.8b // G = A*B2
ext v18.8b, v6.8b, v6.8b, #3 // A3
eor v16.16b, v16.16b, v2.16b // L = E + F
pmull v18.8h, v18.8b, v4.8b // J = A3*B
ext v2.8b, v4.8b, v4.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v2.8h, v6.8b, v2.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v4.8b, v4.8b, #4 // B4
eor v18.16b, v18.16b, v2.16b // N = I + J
pmull v19.8h, v6.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v2.8h, v6.8b, v4.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v2.16b, v2.16b, v16.16b
eor v2.16b, v2.16b, v18.16b
ext v16.16b, v0.16b, v2.16b, #8
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
eor v1.16b, v1.16b, v2.16b
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
// This is a no-op due to the ins instruction below.
// ins v2.d[0], v1.d[1]
// equivalent of reduction_avx from ghash-x86_64.pl
shl v17.2d, v0.2d, #57 // 1st phase
shl v18.2d, v0.2d, #62
eor v18.16b, v18.16b, v17.16b //
shl v17.2d, v0.2d, #63
eor v18.16b, v18.16b, v17.16b //
// Note Xm contains {Xl.d[1], Xh.d[0]}.
eor v18.16b, v18.16b, v1.16b
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
ushr v18.2d, v0.2d, #1 // 2nd phase
eor v2.16b, v2.16b,v0.16b
eor v0.16b, v0.16b,v18.16b //
ushr v18.2d, v18.2d, #6
ushr v0.2d, v0.2d, #1 //
eor v0.16b, v0.16b, v2.16b //
eor v0.16b, v0.16b, v18.16b //
subs x3, x3, #16
bne .Loop_neon
rev64 v0.16b, v0.16b // byteswap Xi and write
ext v0.16b, v0.16b, v0.16b, #8
st1 {v0.16b}, [x0]
ret
.size gcm_ghash_neon,.-gcm_ghash_neon
.section .rodata
.align 4
.Lmasks:
.quad 0x0000ffffffffffff // k48
.quad 0x00000000ffffffff // k32
.quad 0x000000000000ffff // k16
.quad 0x0000000000000000 // k0
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,10 +1,22 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
#if !defined(__clang__) || defined(BORINGSSL_CLANG_SUPPORTS_DOT_ARCH)
.arch armv8-a+crypto
#endif
.globl gcm_init_v8
.hidden gcm_init_v8
.type gcm_init_v8,%function
@ -108,13 +120,13 @@ gcm_ghash_v8:
//loaded value would have
//to be rotated in order to
//make it appear as in
//alorithm specification
//algorithm specification
subs x3,x3,#32 //see if x3 is 32 or larger
mov x12,#16 //x12 is used as post-
//increment for input pointer;
//as loop is modulo-scheduled
//x12 is zeroed just in time
//to preclude oversteping
//to preclude overstepping
//inp[len], which means that
//last block[s] are actually
//loaded twice, but last
@ -233,3 +245,5 @@ gcm_ghash_v8:
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,4 +1,18 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
@ -9,14 +23,12 @@
.type sha1_block_data_order,%function
.align 6
sha1_block_data_order:
#ifdef __ILP32__
ldrsw x16,.LOPENSSL_armcap_P
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
#else
ldr x16,.LOPENSSL_armcap_P
adrp x16,OPENSSL_armcap_P
#endif
adr x17,.LOPENSSL_armcap_P
add x16,x16,x17
ldr w16,[x16]
ldr w16,[x16,:lo12:OPENSSL_armcap_P]
tst w16,#ARMV8_SHA1
b.ne .Lv8_entry
@ -1082,7 +1094,8 @@ sha1_block_armv8:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
adr x4,.Lconst
adrp x4,.Lconst
add x4,x4,:lo12:.Lconst
eor v1.16b,v1.16b,v1.16b
ld1 {v0.4s},[x0],#16
ld1 {v1.s}[0],[x0]
@ -1205,20 +1218,18 @@ sha1_block_armv8:
ldr x29,[sp],#16
ret
.size sha1_block_armv8,.-sha1_block_armv8
.section .rodata
.align 6
.Lconst:
.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19
.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39
.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59
.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79
.LOPENSSL_armcap_P:
#ifdef __ILP32__
.long OPENSSL_armcap_P-.
#else
.quad OPENSSL_armcap_P-.
#endif
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
.comm OPENSSL_armcap_P,4,4
.hidden OPENSSL_armcap_P
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,4 +1,18 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
@ -51,14 +65,12 @@
.align 6
sha256_block_data_order:
#ifndef __KERNEL__
# ifdef __ILP32__
ldrsw x16,.LOPENSSL_armcap_P
# else
ldr x16,.LOPENSSL_armcap_P
# endif
adr x17,.LOPENSSL_armcap_P
add x16,x16,x17
ldr w16,[x16]
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
#else
adrp x16,OPENSSL_armcap_P
#endif
ldr w16,[x16,:lo12:OPENSSL_armcap_P]
tst w16,#ARMV8_SHA256
b.ne .Lv8_entry
#endif
@ -77,7 +89,8 @@ sha256_block_data_order:
ldp w24,w25,[x0,#4*4]
add x2,x1,x2,lsl#6 // end of input
ldp w26,w27,[x0,#6*4]
adr x30,.LK256
adrp x30,.LK256
add x30,x30,:lo12:.LK256
stp x0,x2,[x29,#96]
.Loop:
@ -1024,6 +1037,7 @@ sha256_block_data_order:
ret
.size sha256_block_data_order,.-sha256_block_data_order
.section .rodata
.align 6
.type .LK256,%object
.LK256:
@ -1045,18 +1059,10 @@ sha256_block_data_order:
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.long 0 //terminator
.size .LK256,.-.LK256
#ifndef __KERNEL__
.align 3
.LOPENSSL_armcap_P:
# ifdef __ILP32__
.long OPENSSL_armcap_P-.
# else
.quad OPENSSL_armcap_P-.
# endif
#endif
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
.text
#ifndef __KERNEL__
.type sha256_block_armv8,%function
.align 6
@ -1066,7 +1072,8 @@ sha256_block_armv8:
add x29,sp,#0
ld1 {v0.4s,v1.4s},[x0]
adr x3,.LK256
adrp x3,.LK256
add x3,x3,:lo12:.LK256
.Loop_hw:
ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64
@ -1199,5 +1206,8 @@ sha256_block_armv8:
#endif
#ifndef __KERNEL__
.comm OPENSSL_armcap_P,4,4
.hidden OPENSSL_armcap_P
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,4 +1,18 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
@ -65,7 +79,8 @@ sha512_block_data_order:
ldp x24,x25,[x0,#4*8]
add x2,x1,x2,lsl#7 // end of input
ldp x26,x27,[x0,#6*8]
adr x30,.LK512
adrp x30,.LK512
add x30,x30,:lo12:.LK512
stp x0,x2,[x29,#96]
.Loop:
@ -1012,6 +1027,7 @@ sha512_block_data_order:
ret
.size sha512_block_data_order,.-sha512_block_data_order
.section .rodata
.align 6
.type .LK512,%object
.LK512:
@ -1057,19 +1073,13 @@ sha512_block_data_order:
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
.quad 0 // terminator
.size .LK512,.-.LK512
#ifndef __KERNEL__
.align 3
.LOPENSSL_armcap_P:
# ifdef __ILP32__
.long OPENSSL_armcap_P-.
# else
.quad OPENSSL_armcap_P-.
# endif
#endif
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#ifndef __KERNEL__
.comm OPENSSL_armcap_P,4,4
.hidden OPENSSL_armcap_P
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,688 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
// with |argv|, then saves the callee-saved registers into |state|. It returns
// the result of |func|. The |unwind| argument is unused.
// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
// const uint64_t *argv, size_t argc,
// uint64_t unwind);
.type abi_test_trampoline, %function
.globl abi_test_trampoline
.hidden abi_test_trampoline
.align 4
abi_test_trampoline:
.Labi_test_trampoline_begin:
// Stack layout (low to high addresses)
// x29,x30 (16 bytes)
// d8-d15 (64 bytes)
// x19-x28 (80 bytes)
// x1 (8 bytes)
// padding (8 bytes)
stp x29, x30, [sp, #-176]!
mov x29, sp
// Saved callee-saved registers and |state|.
stp d8, d9, [sp, #16]
stp d10, d11, [sp, #32]
stp d12, d13, [sp, #48]
stp d14, d15, [sp, #64]
stp x19, x20, [sp, #80]
stp x21, x22, [sp, #96]
stp x23, x24, [sp, #112]
stp x25, x26, [sp, #128]
stp x27, x28, [sp, #144]
str x1, [sp, #160]
// Load registers from |state|, with the exception of x29. x29 is the
// frame pointer and also callee-saved, but AAPCS64 allows platforms to
// mandate that x29 always point to a frame. iOS64 does so, which means
// we cannot fill x29 with entropy without violating ABI rules
// ourselves. x29 is tested separately below.
ldp d8, d9, [x1], #16
ldp d10, d11, [x1], #16
ldp d12, d13, [x1], #16
ldp d14, d15, [x1], #16
ldp x19, x20, [x1], #16
ldp x21, x22, [x1], #16
ldp x23, x24, [x1], #16
ldp x25, x26, [x1], #16
ldp x27, x28, [x1], #16
// Move parameters into temporary registers.
mov x9, x0
mov x10, x2
mov x11, x3
// Load parameters into registers.
cbz x11, .Largs_done
ldr x0, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x1, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x2, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x3, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x4, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x5, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x6, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x7, [x10], #8
.Largs_done:
blr x9
// Reload |state| and store registers.
ldr x1, [sp, #160]
stp d8, d9, [x1], #16
stp d10, d11, [x1], #16
stp d12, d13, [x1], #16
stp d14, d15, [x1], #16
stp x19, x20, [x1], #16
stp x21, x22, [x1], #16
stp x23, x24, [x1], #16
stp x25, x26, [x1], #16
stp x27, x28, [x1], #16
// |func| is required to preserve x29, the frame pointer. We cannot load
// random values into x29 (see comment above), so compare it against the
// expected value and zero the field of |state| if corrupted.
mov x9, sp
cmp x29, x9
b.eq .Lx29_ok
str xzr, [x1]
.Lx29_ok:
// Restore callee-saved registers.
ldp d8, d9, [sp, #16]
ldp d10, d11, [sp, #32]
ldp d12, d13, [sp, #48]
ldp d14, d15, [sp, #64]
ldp x19, x20, [sp, #80]
ldp x21, x22, [sp, #96]
ldp x23, x24, [sp, #112]
ldp x25, x26, [sp, #128]
ldp x27, x28, [sp, #144]
ldp x29, x30, [sp], #176
ret
.size abi_test_trampoline,.-abi_test_trampoline
.type abi_test_clobber_x0, %function
.globl abi_test_clobber_x0
.hidden abi_test_clobber_x0
.align 4
abi_test_clobber_x0:
mov x0, xzr
ret
.size abi_test_clobber_x0,.-abi_test_clobber_x0
.type abi_test_clobber_x1, %function
.globl abi_test_clobber_x1
.hidden abi_test_clobber_x1
.align 4
abi_test_clobber_x1:
mov x1, xzr
ret
.size abi_test_clobber_x1,.-abi_test_clobber_x1
.type abi_test_clobber_x2, %function
.globl abi_test_clobber_x2
.hidden abi_test_clobber_x2
.align 4
abi_test_clobber_x2:
mov x2, xzr
ret
.size abi_test_clobber_x2,.-abi_test_clobber_x2
.type abi_test_clobber_x3, %function
.globl abi_test_clobber_x3
.hidden abi_test_clobber_x3
.align 4
abi_test_clobber_x3:
mov x3, xzr
ret
.size abi_test_clobber_x3,.-abi_test_clobber_x3
.type abi_test_clobber_x4, %function
.globl abi_test_clobber_x4
.hidden abi_test_clobber_x4
.align 4
abi_test_clobber_x4:
mov x4, xzr
ret
.size abi_test_clobber_x4,.-abi_test_clobber_x4
.type abi_test_clobber_x5, %function
.globl abi_test_clobber_x5
.hidden abi_test_clobber_x5
.align 4
abi_test_clobber_x5:
mov x5, xzr
ret
.size abi_test_clobber_x5,.-abi_test_clobber_x5
.type abi_test_clobber_x6, %function
.globl abi_test_clobber_x6
.hidden abi_test_clobber_x6
.align 4
abi_test_clobber_x6:
mov x6, xzr
ret
.size abi_test_clobber_x6,.-abi_test_clobber_x6
.type abi_test_clobber_x7, %function
.globl abi_test_clobber_x7
.hidden abi_test_clobber_x7
.align 4
abi_test_clobber_x7:
mov x7, xzr
ret
.size abi_test_clobber_x7,.-abi_test_clobber_x7
.type abi_test_clobber_x8, %function
.globl abi_test_clobber_x8
.hidden abi_test_clobber_x8
.align 4
abi_test_clobber_x8:
mov x8, xzr
ret
.size abi_test_clobber_x8,.-abi_test_clobber_x8
.type abi_test_clobber_x9, %function
.globl abi_test_clobber_x9
.hidden abi_test_clobber_x9
.align 4
abi_test_clobber_x9:
mov x9, xzr
ret
.size abi_test_clobber_x9,.-abi_test_clobber_x9
.type abi_test_clobber_x10, %function
.globl abi_test_clobber_x10
.hidden abi_test_clobber_x10
.align 4
abi_test_clobber_x10:
mov x10, xzr
ret
.size abi_test_clobber_x10,.-abi_test_clobber_x10
.type abi_test_clobber_x11, %function
.globl abi_test_clobber_x11
.hidden abi_test_clobber_x11
.align 4
abi_test_clobber_x11:
mov x11, xzr
ret
.size abi_test_clobber_x11,.-abi_test_clobber_x11
.type abi_test_clobber_x12, %function
.globl abi_test_clobber_x12
.hidden abi_test_clobber_x12
.align 4
abi_test_clobber_x12:
mov x12, xzr
ret
.size abi_test_clobber_x12,.-abi_test_clobber_x12
.type abi_test_clobber_x13, %function
.globl abi_test_clobber_x13
.hidden abi_test_clobber_x13
.align 4
abi_test_clobber_x13:
mov x13, xzr
ret
.size abi_test_clobber_x13,.-abi_test_clobber_x13
.type abi_test_clobber_x14, %function
.globl abi_test_clobber_x14
.hidden abi_test_clobber_x14
.align 4
abi_test_clobber_x14:
mov x14, xzr
ret
.size abi_test_clobber_x14,.-abi_test_clobber_x14
.type abi_test_clobber_x15, %function
.globl abi_test_clobber_x15
.hidden abi_test_clobber_x15
.align 4
abi_test_clobber_x15:
mov x15, xzr
ret
.size abi_test_clobber_x15,.-abi_test_clobber_x15
.type abi_test_clobber_x16, %function
.globl abi_test_clobber_x16
.hidden abi_test_clobber_x16
.align 4
abi_test_clobber_x16:
mov x16, xzr
ret
.size abi_test_clobber_x16,.-abi_test_clobber_x16
.type abi_test_clobber_x17, %function
.globl abi_test_clobber_x17
.hidden abi_test_clobber_x17
.align 4
abi_test_clobber_x17:
mov x17, xzr
ret
.size abi_test_clobber_x17,.-abi_test_clobber_x17
.type abi_test_clobber_x19, %function
.globl abi_test_clobber_x19
.hidden abi_test_clobber_x19
.align 4
abi_test_clobber_x19:
mov x19, xzr
ret
.size abi_test_clobber_x19,.-abi_test_clobber_x19
.type abi_test_clobber_x20, %function
.globl abi_test_clobber_x20
.hidden abi_test_clobber_x20
.align 4
abi_test_clobber_x20:
mov x20, xzr
ret
.size abi_test_clobber_x20,.-abi_test_clobber_x20
.type abi_test_clobber_x21, %function
.globl abi_test_clobber_x21
.hidden abi_test_clobber_x21
.align 4
abi_test_clobber_x21:
mov x21, xzr
ret
.size abi_test_clobber_x21,.-abi_test_clobber_x21
.type abi_test_clobber_x22, %function
.globl abi_test_clobber_x22
.hidden abi_test_clobber_x22
.align 4
abi_test_clobber_x22:
mov x22, xzr
ret
.size abi_test_clobber_x22,.-abi_test_clobber_x22
.type abi_test_clobber_x23, %function
.globl abi_test_clobber_x23
.hidden abi_test_clobber_x23
.align 4
abi_test_clobber_x23:
mov x23, xzr
ret
.size abi_test_clobber_x23,.-abi_test_clobber_x23
.type abi_test_clobber_x24, %function
.globl abi_test_clobber_x24
.hidden abi_test_clobber_x24
.align 4
abi_test_clobber_x24:
mov x24, xzr
ret
.size abi_test_clobber_x24,.-abi_test_clobber_x24
.type abi_test_clobber_x25, %function
.globl abi_test_clobber_x25
.hidden abi_test_clobber_x25
.align 4
abi_test_clobber_x25:
mov x25, xzr
ret
.size abi_test_clobber_x25,.-abi_test_clobber_x25
.type abi_test_clobber_x26, %function
.globl abi_test_clobber_x26
.hidden abi_test_clobber_x26
.align 4
abi_test_clobber_x26:
mov x26, xzr
ret
.size abi_test_clobber_x26,.-abi_test_clobber_x26
.type abi_test_clobber_x27, %function
.globl abi_test_clobber_x27
.hidden abi_test_clobber_x27
.align 4
abi_test_clobber_x27:
mov x27, xzr
ret
.size abi_test_clobber_x27,.-abi_test_clobber_x27
.type abi_test_clobber_x28, %function
.globl abi_test_clobber_x28
.hidden abi_test_clobber_x28
.align 4
abi_test_clobber_x28:
mov x28, xzr
ret
.size abi_test_clobber_x28,.-abi_test_clobber_x28
.type abi_test_clobber_x29, %function
.globl abi_test_clobber_x29
.hidden abi_test_clobber_x29
.align 4
abi_test_clobber_x29:
mov x29, xzr
ret
.size abi_test_clobber_x29,.-abi_test_clobber_x29
.type abi_test_clobber_d0, %function
.globl abi_test_clobber_d0
.hidden abi_test_clobber_d0
.align 4
abi_test_clobber_d0:
fmov d0, xzr
ret
.size abi_test_clobber_d0,.-abi_test_clobber_d0
.type abi_test_clobber_d1, %function
.globl abi_test_clobber_d1
.hidden abi_test_clobber_d1
.align 4
abi_test_clobber_d1:
fmov d1, xzr
ret
.size abi_test_clobber_d1,.-abi_test_clobber_d1
.type abi_test_clobber_d2, %function
.globl abi_test_clobber_d2
.hidden abi_test_clobber_d2
.align 4
abi_test_clobber_d2:
fmov d2, xzr
ret
.size abi_test_clobber_d2,.-abi_test_clobber_d2
.type abi_test_clobber_d3, %function
.globl abi_test_clobber_d3
.hidden abi_test_clobber_d3
.align 4
abi_test_clobber_d3:
fmov d3, xzr
ret
.size abi_test_clobber_d3,.-abi_test_clobber_d3
.type abi_test_clobber_d4, %function
.globl abi_test_clobber_d4
.hidden abi_test_clobber_d4
.align 4
abi_test_clobber_d4:
fmov d4, xzr
ret
.size abi_test_clobber_d4,.-abi_test_clobber_d4
.type abi_test_clobber_d5, %function
.globl abi_test_clobber_d5
.hidden abi_test_clobber_d5
.align 4
abi_test_clobber_d5:
fmov d5, xzr
ret
.size abi_test_clobber_d5,.-abi_test_clobber_d5
.type abi_test_clobber_d6, %function
.globl abi_test_clobber_d6
.hidden abi_test_clobber_d6
.align 4
abi_test_clobber_d6:
fmov d6, xzr
ret
.size abi_test_clobber_d6,.-abi_test_clobber_d6
.type abi_test_clobber_d7, %function
.globl abi_test_clobber_d7
.hidden abi_test_clobber_d7
.align 4
abi_test_clobber_d7:
fmov d7, xzr
ret
.size abi_test_clobber_d7,.-abi_test_clobber_d7
.type abi_test_clobber_d8, %function
.globl abi_test_clobber_d8
.hidden abi_test_clobber_d8
.align 4
abi_test_clobber_d8:
fmov d8, xzr
ret
.size abi_test_clobber_d8,.-abi_test_clobber_d8
.type abi_test_clobber_d9, %function
.globl abi_test_clobber_d9
.hidden abi_test_clobber_d9
.align 4
abi_test_clobber_d9:
fmov d9, xzr
ret
.size abi_test_clobber_d9,.-abi_test_clobber_d9
.type abi_test_clobber_d10, %function
.globl abi_test_clobber_d10
.hidden abi_test_clobber_d10
.align 4
abi_test_clobber_d10:
fmov d10, xzr
ret
.size abi_test_clobber_d10,.-abi_test_clobber_d10
.type abi_test_clobber_d11, %function
.globl abi_test_clobber_d11
.hidden abi_test_clobber_d11
.align 4
abi_test_clobber_d11:
fmov d11, xzr
ret
.size abi_test_clobber_d11,.-abi_test_clobber_d11
.type abi_test_clobber_d12, %function
.globl abi_test_clobber_d12
.hidden abi_test_clobber_d12
.align 4
abi_test_clobber_d12:
fmov d12, xzr
ret
.size abi_test_clobber_d12,.-abi_test_clobber_d12
.type abi_test_clobber_d13, %function
.globl abi_test_clobber_d13
.hidden abi_test_clobber_d13
.align 4
abi_test_clobber_d13:
fmov d13, xzr
ret
.size abi_test_clobber_d13,.-abi_test_clobber_d13
.type abi_test_clobber_d14, %function
.globl abi_test_clobber_d14
.hidden abi_test_clobber_d14
.align 4
abi_test_clobber_d14:
fmov d14, xzr
ret
.size abi_test_clobber_d14,.-abi_test_clobber_d14
.type abi_test_clobber_d15, %function
.globl abi_test_clobber_d15
.hidden abi_test_clobber_d15
.align 4
abi_test_clobber_d15:
fmov d15, xzr
ret
.size abi_test_clobber_d15,.-abi_test_clobber_d15
.type abi_test_clobber_d16, %function
.globl abi_test_clobber_d16
.hidden abi_test_clobber_d16
.align 4
abi_test_clobber_d16:
fmov d16, xzr
ret
.size abi_test_clobber_d16,.-abi_test_clobber_d16
.type abi_test_clobber_d17, %function
.globl abi_test_clobber_d17
.hidden abi_test_clobber_d17
.align 4
abi_test_clobber_d17:
fmov d17, xzr
ret
.size abi_test_clobber_d17,.-abi_test_clobber_d17
.type abi_test_clobber_d18, %function
.globl abi_test_clobber_d18
.hidden abi_test_clobber_d18
.align 4
abi_test_clobber_d18:
fmov d18, xzr
ret
.size abi_test_clobber_d18,.-abi_test_clobber_d18
.type abi_test_clobber_d19, %function
.globl abi_test_clobber_d19
.hidden abi_test_clobber_d19
.align 4
abi_test_clobber_d19:
fmov d19, xzr
ret
.size abi_test_clobber_d19,.-abi_test_clobber_d19
.type abi_test_clobber_d20, %function
.globl abi_test_clobber_d20
.hidden abi_test_clobber_d20
.align 4
abi_test_clobber_d20:
fmov d20, xzr
ret
.size abi_test_clobber_d20,.-abi_test_clobber_d20
.type abi_test_clobber_d21, %function
.globl abi_test_clobber_d21
.hidden abi_test_clobber_d21
.align 4
abi_test_clobber_d21:
fmov d21, xzr
ret
.size abi_test_clobber_d21,.-abi_test_clobber_d21
.type abi_test_clobber_d22, %function
.globl abi_test_clobber_d22
.hidden abi_test_clobber_d22
.align 4
abi_test_clobber_d22:
fmov d22, xzr
ret
.size abi_test_clobber_d22,.-abi_test_clobber_d22
.type abi_test_clobber_d23, %function
.globl abi_test_clobber_d23
.hidden abi_test_clobber_d23
.align 4
abi_test_clobber_d23:
fmov d23, xzr
ret
.size abi_test_clobber_d23,.-abi_test_clobber_d23
.type abi_test_clobber_d24, %function
.globl abi_test_clobber_d24
.hidden abi_test_clobber_d24
.align 4
abi_test_clobber_d24:
fmov d24, xzr
ret
.size abi_test_clobber_d24,.-abi_test_clobber_d24
.type abi_test_clobber_d25, %function
.globl abi_test_clobber_d25
.hidden abi_test_clobber_d25
.align 4
abi_test_clobber_d25:
fmov d25, xzr
ret
.size abi_test_clobber_d25,.-abi_test_clobber_d25
.type abi_test_clobber_d26, %function
.globl abi_test_clobber_d26
.hidden abi_test_clobber_d26
.align 4
abi_test_clobber_d26:
fmov d26, xzr
ret
.size abi_test_clobber_d26,.-abi_test_clobber_d26
.type abi_test_clobber_d27, %function
.globl abi_test_clobber_d27
.hidden abi_test_clobber_d27
.align 4
abi_test_clobber_d27:
fmov d27, xzr
ret
.size abi_test_clobber_d27,.-abi_test_clobber_d27
.type abi_test_clobber_d28, %function
.globl abi_test_clobber_d28
.hidden abi_test_clobber_d28
.align 4
abi_test_clobber_d28:
fmov d28, xzr
ret
.size abi_test_clobber_d28,.-abi_test_clobber_d28
.type abi_test_clobber_d29, %function
.globl abi_test_clobber_d29
.hidden abi_test_clobber_d29
.align 4
abi_test_clobber_d29:
fmov d29, xzr
ret
.size abi_test_clobber_d29,.-abi_test_clobber_d29
.type abi_test_clobber_d30, %function
.globl abi_test_clobber_d30
.hidden abi_test_clobber_d30
.align 4
abi_test_clobber_d30:
fmov d30, xzr
ret
.size abi_test_clobber_d30,.-abi_test_clobber_d30
.type abi_test_clobber_d31, %function
.globl abi_test_clobber_d31
.hidden abi_test_clobber_d31
.align 4
abi_test_clobber_d31:
fmov d31, xzr
ret
.size abi_test_clobber_d31,.-abi_test_clobber_d31
.type abi_test_clobber_v8_upper, %function
.globl abi_test_clobber_v8_upper
.hidden abi_test_clobber_v8_upper
.align 4
abi_test_clobber_v8_upper:
fmov v8.d[1], xzr
ret
.size abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
.type abi_test_clobber_v9_upper, %function
.globl abi_test_clobber_v9_upper
.hidden abi_test_clobber_v9_upper
.align 4
abi_test_clobber_v9_upper:
fmov v9.d[1], xzr
ret
.size abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
.type abi_test_clobber_v10_upper, %function
.globl abi_test_clobber_v10_upper
.hidden abi_test_clobber_v10_upper
.align 4
abi_test_clobber_v10_upper:
fmov v10.d[1], xzr
ret
.size abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
.type abi_test_clobber_v11_upper, %function
.globl abi_test_clobber_v11_upper
.hidden abi_test_clobber_v11_upper
.align 4
abi_test_clobber_v11_upper:
fmov v11.d[1], xzr
ret
.size abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
.type abi_test_clobber_v12_upper, %function
.globl abi_test_clobber_v12_upper
.hidden abi_test_clobber_v12_upper
.align 4
abi_test_clobber_v12_upper:
fmov v12.d[1], xzr
ret
.size abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
.type abi_test_clobber_v13_upper, %function
.globl abi_test_clobber_v13_upper
.hidden abi_test_clobber_v13_upper
.align 4
abi_test_clobber_v13_upper:
fmov v13.d[1], xzr
ret
.size abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
.type abi_test_clobber_v14_upper, %function
.globl abi_test_clobber_v14_upper
.hidden abi_test_clobber_v14_upper
.align 4
abi_test_clobber_v14_upper:
fmov v14.d[1], xzr
ret
.size abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
.type abi_test_clobber_v15_upper, %function
.globl abi_test_clobber_v15_upper
.hidden abi_test_clobber_v15_upper
.align 4
abi_test_clobber_v15_upper:
fmov v15.d[1], xzr
ret
.size abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -0,0 +1,999 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.section .rodata
# p434 x 2
.Lp434x2:
.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
# p434 + 1
.Lp434p1:
.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
.quad 0x6CFC5FD681C52056, 0x0002341F27177344
.text
.globl sike_mpmul
.hidden sike_mpmul
.align 4
sike_mpmul:
stp x29, x30, [sp,#-96]!
add x29, sp, #0
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
ldp x14, x15, [x1,#32]
ldr x16, [x1,#48]
// x3-x7 <- AH + AL, x7 <- carry
adds x3, x3, x7
adcs x4, x4, x8
adcs x5, x5, x9
adcs x6, x6, xzr
adc x7, xzr, xzr
// x10-x13 <- BH + BL, x8 <- carry
adds x10, x10, x14
adcs x11, x11, x15
adcs x12, x12, x16
adcs x13, x13, xzr
adc x8, xzr, xzr
// x9 <- combined carry
and x9, x7, x8
// x7-x8 <- mask
sub x7, xzr, x7
sub x8, xzr, x8
// x15-x19 <- masked (BH + BL)
and x14, x10, x7
and x15, x11, x7
and x16, x12, x7
and x17, x13, x7
// x20-x23 <- masked (AH + AL)
and x20, x3, x8
and x21, x4, x8
and x22, x5, x8
and x23, x6, x8
// x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
adds x14, x14, x20
adcs x15, x15, x21
adcs x16, x16, x22
adcs x17, x17, x23
adc x7, x9, xzr
// x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
stp x3, x4, [x2,#0]
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x25, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x23, x10, x12
adcs x26, x11, x13
adc x24, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x19, xzr, x25
sub x20, xzr, x24
and x8, x23, x19
and x9, x26, x19
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x21, x3, x20
and x22, x4, x20
mul x19, x3, x23
mul x20, x3, x26
and x25, x25, x24
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x8, x21, x8
umulh x21, x3, x26
adcs x9, x22, x9
umulh x22, x3, x23
adc x25, x25, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x23
umulh x23, x4, x23
adds x20, x20, x22
adc x21, x21, xzr
mul x24, x4, x26
umulh x26, x4, x26
adds x20, x20, x3
adcs x21, x21, x23
adc x22, xzr, xzr
adds x21, x21, x24
adc x22, x22, x26
ldp x3, x4, [x2,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x21, x8, x21
umulh x24, x3, x10
umulh x26, x3, x11
adcs x22, x9, x22
mul x8, x3, x10
mul x9, x3, x11
adc x25, x25, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x9, x9, x24
adc x26, x26, xzr
mul x23, x4, x11
umulh x11, x4, x11
adds x9, x9, x3
adcs x26, x26, x10
adc x24, xzr, xzr
adds x26, x26, x23
adc x24, x24, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x19, x19, x8
sbcs x20, x20, x9
sbcs x21, x21, x26
mul x4, x5, x13
umulh x23, x5, x13
sbcs x22, x22, x24
sbc x25, x25, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x23, x23, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x23, x23, x12
adc x10, xzr, xzr
adds x23, x23, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x19, x19, x3
sbcs x20, x20, x4
sbcs x21, x21, x23
sbcs x22, x22, x10
sbc x25, x25, xzr
adds x19, x19, x26
adcs x20, x20, x24
adcs x21, x21, x3
adcs x22, x22, x4
adcs x23, x25, x23
adc x24, x10, xzr
// x15-x19, x7 <- (AH+AL) x (BH+BL), final step
adds x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adc x7, x7, xzr
// Load AL
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
// Load BL
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
// Temporarily store x8 in x2
stp x8, x9, [x2,#0]
// x21-x28 <- AL x BL
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x8, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x27, x10, x12
adcs x9, x11, x13
adc x28, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x23, xzr, x8
sub x24, xzr, x28
and x21, x27, x23
and x22, x9, x23
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x25, x3, x24
and x26, x4, x24
mul x23, x3, x27
mul x24, x3, x9
and x8, x8, x28
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x21, x25, x21
umulh x25, x3, x9
adcs x22, x26, x22
umulh x26, x3, x27
adc x8, x8, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x27
umulh x27, x4, x27
adds x24, x24, x26
adc x25, x25, xzr
mul x28, x4, x9
umulh x9, x4, x9
adds x24, x24, x3
adcs x25, x25, x27
adc x26, xzr, xzr
adds x25, x25, x28
adc x26, x26, x9
ldp x3, x4, [x0,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x25, x21, x25
umulh x28, x3, x10
umulh x9, x3, x11
adcs x26, x22, x26
mul x21, x3, x10
mul x22, x3, x11
adc x8, x8, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x22, x22, x28
adc x9, x9, xzr
mul x27, x4, x11
umulh x11, x4, x11
adds x22, x22, x3
adcs x9, x9, x10
adc x28, xzr, xzr
adds x9, x9, x27
adc x28, x28, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x23, x23, x21
sbcs x24, x24, x22
sbcs x25, x25, x9
mul x4, x5, x13
umulh x27, x5, x13
sbcs x26, x26, x28
sbc x8, x8, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x27, x27, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x27, x27, x12
adc x10, xzr, xzr
adds x27, x27, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x23, x23, x3
sbcs x24, x24, x4
sbcs x25, x25, x27
sbcs x26, x26, x10
sbc x8, x8, xzr
adds x23, x23, x9
adcs x24, x24, x28
adcs x25, x25, x3
adcs x26, x26, x4
adcs x27, x8, x27
adc x28, x10, xzr
// Restore x8
ldp x8, x9, [x2,#0]
// x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, x27
sbcs x17, x17, x28
sbc x7, x7, xzr
// Store ALxBL, low
stp x21, x22, [x2]
stp x23, x24, [x2,#16]
// Load AH
ldp x3, x4, [x0,#32]
ldr x5, [x0,#48]
// Load BH
ldp x10, x11, [x1,#32]
ldr x12, [x1,#48]
adds x8, x8, x25
adcs x9, x9, x26
adcs x19, x19, x27
adcs x20, x20, x28
adc x1, xzr, xzr
add x0, x0, #32
// Temporarily store x8,x9 in x2
stp x8,x9, [x2,#32]
// x21-x28 <- AH x BH
// A0 * B0
mul x21, x3, x10 // C0
umulh x24, x3, x10
// A0 * B1
mul x22, x3, x11
umulh x23, x3, x11
// A1 * B0
mul x8, x4, x10
umulh x9, x4, x10
adds x22, x22, x24
adc x23, x23, xzr
// A0 * B2
mul x27, x3, x12
umulh x28, x3, x12
adds x22, x22, x8 // C1
adcs x23, x23, x9
adc x24, xzr, xzr
// A2 * B0
mul x8, x5, x10
umulh x25, x5, x10
adds x23, x23, x27
adcs x24, x24, x25
adc x25, xzr, xzr
// A1 * B1
mul x27, x4, x11
umulh x9, x4, x11
adds x23, x23, x8
adcs x24, x24, x28
adc x25, x25, xzr
// A1 * B2
mul x8, x4, x12
umulh x28, x4, x12
adds x23, x23, x27 // C2
adcs x24, x24, x9
adc x25, x25, xzr
// A2 * B1
mul x27, x5, x11
umulh x9, x5, x11
adds x24, x24, x8
adcs x25, x25, x28
adc x26, xzr, xzr
// A2 * B2
mul x8, x5, x12
umulh x28, x5, x12
adds x24, x24, x27 // C3
adcs x25, x25, x9
adc x26, x26, xzr
adds x25, x25, x8 // C4
adc x26, x26, x28 // C5
// Restore x8,x9
ldp x8,x9, [x2,#32]
neg x1, x1
// x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, xzr
sbcs x17, x17, xzr
sbc x7, x7, xzr
// Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
stp x8, x9, [x2,#32]
stp x19, x20, [x2,#48]
adds x1, x1, #1
adcs x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adcs x25, x7, x25
adc x26, x26, xzr
stp x14, x15, [x2,#64]
stp x16, x17, [x2,#80]
stp x25, x26, [x2,#96]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl sike_fprdc
.hidden sike_fprdc
.align 4
sike_fprdc:
stp x29, x30, [sp, #-96]!
add x29, sp, xzr
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x2, x3, [x0,#0] // a[0-1]
// Load the prime constant
adrp x26, .Lp434p1
add x26, x26, :lo12:.Lp434p1
ldp x23, x24, [x26, #0x0]
ldp x25, x26, [x26,#0x10]
// a[0-1] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x10, x3, x23
umulh x11, x3, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x10 // C1
adcs x6, x6, x11
adc x7, xzr, xzr
mul x10, x3, x24
umulh x11, x3, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x10 // C2
adcs x7, x7, x11
adc x8, x8, xzr
mul x10, x3, x25
umulh x11, x3, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x3, x26
umulh x28, x3, x26
adds x7, x7, x10 // C3
adcs x8, x8, x11
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
ldp x10, x11, [x0, #0x18]
ldp x12, x13, [x0, #0x28]
ldp x14, x15, [x0, #0x38]
ldp x16, x17, [x0, #0x48]
ldp x19, x20, [x0, #0x58]
ldr x21, [x0, #0x68]
adds x10, x10, x4
adcs x11, x11, x5
adcs x12, x12, x6
adcs x13, x13, x7
adcs x14, x14, x8
adcs x15, x15, x9
adcs x22, x16, xzr
adcs x17, x17, xzr
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
ldr x2, [x0,#0x10] // a[2]
// a[2-3] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x0, x10, x23
umulh x3, x10, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x0 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x0, x10, x24
umulh x3, x10, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x0 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x0, x10, x25
umulh x3, x10, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x10, x26
umulh x28, x10, x26
adds x7, x7, x0 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x12, x12, x4
adcs x13, x13, x5
adcs x14, x14, x6
adcs x15, x15, x7
adcs x16, x22, x8
adcs x17, x17, x9
adcs x22, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
mul x4, x11, x23 // C0
umulh x7, x11, x23
mul x5, x11, x24
umulh x6, x11, x24
mul x10, x12, x23
umulh x3, x12, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x11, x25
umulh x28, x11, x25
adds x5, x5, x10 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x10, x12, x24
umulh x3, x12, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x11, x26
umulh x28, x11, x26
adds x6, x6, x10 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x10, x12, x25
umulh x3, x12, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x12, x26
umulh x28, x12, x26
adds x7, x7, x10 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x14, x14, x4
adcs x15, x15, x5
adcs x16, x16, x6
adcs x17, x17, x7
adcs x19, x22, x8
adcs x20, x20, x9
adc x22, x21, xzr
stp x14, x15, [x1, #0x0] // C0, C1
mul x4, x13, x23 // C0
umulh x10, x13, x23
mul x5, x13, x24
umulh x27, x13, x24
adds x5, x5, x10 // C1
adc x10, xzr, xzr
mul x6, x13, x25
umulh x28, x13, x25
adds x27, x10, x27
adcs x6, x6, x27 // C2
adc x10, xzr, xzr
mul x7, x13, x26
umulh x8, x13, x26
adds x28, x10, x28
adcs x7, x7, x28 // C3
adc x8, x8, xzr // C4
adds x16, x16, x4
adcs x17, x17, x5
adcs x19, x19, x6
adcs x20, x20, x7
adc x21, x22, x8
str x16, [x1, #0x10]
stp x17, x19, [x1, #0x18]
stp x20, x21, [x1, #0x28]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl sike_fpadd
.hidden sike_fpadd
.align 4
sike_fpadd:
stp x29,x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Add a + b
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
// Subtract 2xp434
adrp x17, .Lp434x2
add x17, x17, :lo12:.Lp434x2
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x12
sbcs x6, x6, x13
sbcs x7, x7, x14
sbcs x8, x8, x15
sbcs x9, x9, x16
sbc x0, xzr, xzr // x0 can be reused now
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl sike_fpsub
.hidden sike_fpsub
.align 4
sike_fpsub:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Subtract a - b
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
sbcs x9, x9, x17
sbc x0, xzr, xzr
// Add 2xp434 anded with the mask in x0
adrp x17, .Lp434x2
add x17, x17, :lo12:.Lp434x2
// First half
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl sike_mpadd_asm
.hidden sike_mpadd_asm
.align 4
sike_mpadd_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl sike_mpsubx2_asm
.hidden sike_mpsubx2_asm
.align 4
sike_mpsubx2_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x11, x12, [x1,#32]
ldp x13, x14, [x1,#48]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbcs x9, x9, x13
sbcs x10, x10, x14
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ldp x3, x4, [x0,#64]
ldp x5, x6, [x0,#80]
ldp x11, x12, [x1,#64]
ldp x13, x14, [x1,#80]
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#96]
ldp x11, x12, [x1,#96]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbc x0, xzr, xzr
stp x3, x4, [x2,#64]
stp x5, x6, [x2,#80]
stp x7, x8, [x2,#96]
ldp x29, x30, [sp],#16
ret
.globl sike_mpdblsubx2_asm
.hidden sike_mpdblsubx2_asm
.align 4
sike_mpdblsubx2_asm:
stp x29, x30, [sp, #-16]!
add x29, sp, #0
ldp x3, x4, [x2, #0]
ldp x5, x6, [x2,#16]
ldp x7, x8, [x2,#32]
ldp x11, x12, [x0, #0]
ldp x13, x14, [x0,#16]
ldp x15, x16, [x0,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
// x9 stores carry
adc x9, xzr, xzr
ldp x11, x12, [x1, #0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2, #0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
ldp x3, x4, [x2,#48]
ldp x5, x6, [x2,#64]
ldp x7, x8, [x2,#80]
ldp x11, x12, [x0,#48]
ldp x13, x14, [x0,#64]
ldp x15, x16, [x0,#80]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, xzr, xzr
ldp x11, x12, [x1,#48]
ldp x13, x14, [x1,#64]
ldp x15, x16, [x1,#80]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2,#48]
stp x5, x6, [x2,#64]
stp x7, x8, [x2,#80]
ldp x3, x4, [x2,#96]
ldp x11, x12, [x0,#96]
ldp x13, x14, [x1,#96]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
subs x3, x3, x13
sbc x4, x4, x14
stp x3, x4, [x2,#96]
ldp x29, x30, [sp],#16
ret
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,6 +1,24 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
.arch armv7-a
.text
#if defined(__thumb2__) || defined(__clang__)
.syntax unified
@ -1471,3 +1489,5 @@ ChaCha20_neon:
.comm OPENSSL_armcap_P,4,4
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,4 +1,18 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
@
@ Licensed under the OpenSSL license (the "License"). You may not use
@ -45,6 +59,11 @@
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
#endif
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 AES
@ instructions are in aesv8-armx.pl.)
.arch armv7-a
.text
#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
@ -160,23 +179,23 @@ AES_Te:
.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
.size AES_Te,.-AES_Te
@ void asm_AES_encrypt(const unsigned char *in, unsigned char *out,
@ void aes_nohw_encrypt(const unsigned char *in, unsigned char *out,
@ const AES_KEY *key) {
.globl asm_AES_encrypt
.hidden asm_AES_encrypt
.type asm_AES_encrypt,%function
.globl aes_nohw_encrypt
.hidden aes_nohw_encrypt
.type aes_nohw_encrypt,%function
.align 5
asm_AES_encrypt:
aes_nohw_encrypt:
#ifndef __thumb2__
sub r3,pc,#8 @ asm_AES_encrypt
sub r3,pc,#8 @ aes_nohw_encrypt
#else
adr r3,.
#endif
stmdb sp!,{r1,r4-r12,lr}
#ifdef __APPLE__
#if defined(__thumb2__) || defined(__APPLE__)
adr r10,AES_Te
#else
sub r10,r3,#asm_AES_encrypt-AES_Te @ Te
sub r10,r3,#aes_nohw_encrypt-AES_Te @ Te
#endif
mov r12,r0 @ inp
mov r11,r2
@ -273,7 +292,7 @@ asm_AES_encrypt:
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size asm_AES_encrypt,.-asm_AES_encrypt
.size aes_nohw_encrypt,.-aes_nohw_encrypt
.type _armv4_AES_encrypt,%function
.align 2
@ -412,14 +431,14 @@ _armv4_AES_encrypt:
ldr pc,[sp],#4 @ pop and return
.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
.globl asm_AES_set_encrypt_key
.hidden asm_AES_set_encrypt_key
.type asm_AES_set_encrypt_key,%function
.globl aes_nohw_set_encrypt_key
.hidden aes_nohw_set_encrypt_key
.type aes_nohw_set_encrypt_key,%function
.align 5
asm_AES_set_encrypt_key:
aes_nohw_set_encrypt_key:
_armv4_AES_set_encrypt_key:
#ifndef __thumb2__
sub r3,pc,#8 @ asm_AES_set_encrypt_key
sub r3,pc,#8 @ aes_nohw_set_encrypt_key
#else
adr r3,.
#endif
@ -452,7 +471,7 @@ _armv4_AES_set_encrypt_key:
mov lr,r1 @ bits
mov r11,r2 @ key
#ifdef __APPLE__
#if defined(__thumb2__) || defined(__APPLE__)
adr r10,AES_Te+1024 @ Te4
#else
sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4
@ -717,23 +736,23 @@ _armv4_AES_set_encrypt_key:
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size asm_AES_set_encrypt_key,.-asm_AES_set_encrypt_key
.size aes_nohw_set_encrypt_key,.-aes_nohw_set_encrypt_key
.globl asm_AES_set_decrypt_key
.hidden asm_AES_set_decrypt_key
.type asm_AES_set_decrypt_key,%function
.globl aes_nohw_set_decrypt_key
.hidden aes_nohw_set_decrypt_key
.type aes_nohw_set_decrypt_key,%function
.align 5
asm_AES_set_decrypt_key:
aes_nohw_set_decrypt_key:
str lr,[sp,#-4]! @ push lr
bl _armv4_AES_set_encrypt_key
teq r0,#0
ldr lr,[sp],#4 @ pop lr
bne .Labrt
mov r0,r2 @ asm_AES_set_encrypt_key preserves r2,
mov r0,r2 @ aes_nohw_set_encrypt_key preserves r2,
mov r1,r2 @ which is AES_KEY *key
b _armv4_AES_set_enc2dec_key
.size asm_AES_set_decrypt_key,.-asm_AES_set_decrypt_key
.size aes_nohw_set_decrypt_key,.-aes_nohw_set_decrypt_key
@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
.globl AES_set_enc2dec_key
@ -935,23 +954,23 @@ AES_Td:
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
.size AES_Td,.-AES_Td
@ void asm_AES_decrypt(const unsigned char *in, unsigned char *out,
@ void aes_nohw_decrypt(const unsigned char *in, unsigned char *out,
@ const AES_KEY *key) {
.globl asm_AES_decrypt
.hidden asm_AES_decrypt
.type asm_AES_decrypt,%function
.globl aes_nohw_decrypt
.hidden aes_nohw_decrypt
.type aes_nohw_decrypt,%function
.align 5
asm_AES_decrypt:
aes_nohw_decrypt:
#ifndef __thumb2__
sub r3,pc,#8 @ asm_AES_decrypt
sub r3,pc,#8 @ aes_nohw_decrypt
#else
adr r3,.
#endif
stmdb sp!,{r1,r4-r12,lr}
#ifdef __APPLE__
#if defined(__thumb2__) || defined(__APPLE__)
adr r10,AES_Td
#else
sub r10,r3,#asm_AES_decrypt-AES_Td @ Td
sub r10,r3,#aes_nohw_decrypt-AES_Td @ Td
#endif
mov r12,r0 @ inp
mov r11,r2
@ -1048,7 +1067,7 @@ asm_AES_decrypt:
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size asm_AES_decrypt,.-asm_AES_decrypt
.size aes_nohw_decrypt,.-aes_nohw_decrypt
.type _armv4_AES_decrypt,%function
.align 2
@ -1199,3 +1218,5 @@ _armv4_AES_decrypt:
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,4 +1,18 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
@ -13,6 +27,8 @@
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl aes_hw_set_encrypt_key
.hidden aes_hw_set_encrypt_key
.type aes_hw_set_encrypt_key,%function
@ -761,3 +777,5 @@ aes_hw_ctr32_encrypt_blocks:
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,6 +1,24 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
.arch armv7-a
.text
#if defined(__thumb2__)
.syntax unified
@ -167,14 +185,15 @@ bn_mul_mont:
mov r4,sp @ "rewind" r4
sub r2,r2,r5 @ "rewind" r2
and r1,r4,r14
bic r3,r2,r14
orr r1,r1,r3 @ ap=borrow?tp:rp
.Lcopy: ldr r7,[r1],#4 @ copy or in-place refresh
.Lcopy: ldr r7,[r4] @ conditional copy
ldr r5,[r2]
str sp,[r4],#4 @ zap tp
str r7,[r2],#4
cmp r4,r0
#ifdef __thumb2__
it cc
#endif
movcc r5,r7
str r5,[r2],#4
teq r4,r0 @ preserve carry
bne .Lcopy
mov sp,r0
@ -954,3 +973,5 @@ bn_mul8x_mont_neon:
.hidden OPENSSL_armcap_P
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,30 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
@ instructions are in aesv8-armx.pl.)
.arch armv7-a
.text
#if defined(__thumb2__) || defined(__clang__)
.syntax unified
#define ldrplb ldrbpl
#define ldrneb ldrbne
#endif
#if defined(__thumb2__)
.thumb
@ -11,11 +32,6 @@
.code 32
#endif
#ifdef __clang__
#define ldrplb ldrbpl
#define ldrneb ldrbne
#endif
.type rem_4bit,%object
.align 5
rem_4bit:
@ -571,3 +587,5 @@ gcm_ghash_neon:
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,4 +1,18 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
@ -109,13 +123,13 @@ gcm_ghash_v8:
@ loaded value would have
@ to be rotated in order to
@ make it appear as in
@ alorithm specification
@ algorithm specification
subs r3,r3,#32 @ see if r3 is 32 or larger
mov r12,#16 @ r12 is used as post-
@ increment for input pointer;
@ as loop is modulo-scheduled
@ r12 is zeroed just in time
@ to preclude oversteping
@ to preclude overstepping
@ inp[len], which means that
@ last block[s] are actually
@ loaded twice, but last
@ -235,3 +249,5 @@ gcm_ghash_v8:
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,4 +1,18 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
@ -1493,3 +1507,5 @@ sha1_block_data_order_armv8:
.hidden OPENSSL_armcap_P
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,4 +1,18 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
@
@ Licensed under the OpenSSL license (the "License"). You may not use
@ -51,6 +65,11 @@
# define __ARM_MAX_ARCH__ 7
#endif
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
@ instructions are manually-encoded. (See unsha256.)
.arch armv7-a
.text
#if defined(__thumb2__)
.syntax unified
@ -2816,3 +2835,5 @@ sha256_block_data_order_armv8:
.hidden OPENSSL_armcap_P
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,4 +1,18 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
@
@ Licensed under the OpenSSL license (the "License"). You may not use
@ -64,6 +78,10 @@
# define VFP_ABI_POP
#endif
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
.arch armv7-a
#ifdef __ARMEL__
# define LO 0
# define HI 4
@ -1872,3 +1890,5 @@ sha512_block_data_order_neon:
.hidden OPENSSL_armcap_P
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,380 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.syntax unified
.arch armv7-a
.fpu vfp
.text
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@ with |argv|, then saves the callee-saved registers into |state|. It returns
@ the result of |func|. The |unwind| argument is unused.
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
@ const uint32_t *argv, size_t argc,
@ int unwind);
.type abi_test_trampoline, %function
.globl abi_test_trampoline
.hidden abi_test_trampoline
.align 4
abi_test_trampoline:
.Labi_test_trampoline_begin:
@ Save parameters and all callee-saved registers. For convenience, we
@ save r9 on iOS even though it's volatile.
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
sub sp, sp, #28
@ Every register in AAPCS is either non-volatile or a parameter (except
@ r9 on iOS), so this code, by the actual call, loses all its scratch
@ registers. First fill in stack parameters while there are registers
@ to spare.
cmp r3, #4
bls .Lstack_args_done
mov r4, sp @ r4 is the output pointer.
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
add r2, r2, #16 @ Skip four arguments.
.Lstack_args_loop:
ldr r6, [r2], #4
cmp r2, r5
str r6, [r4], #4
bne .Lstack_args_loop
.Lstack_args_done:
@ Load registers from |r1|.
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Load register parameters. This uses up our remaining registers, so we
@ repurpose lr as scratch space.
ldr r3, [sp, #40] @ Reload argc.
ldr lr, [sp, #36] @ .Load argv into lr.
cmp r3, #3
bhi .Larg_r3
beq .Larg_r2
cmp r3, #1
bhi .Larg_r1
beq .Larg_r0
b .Largs_done
.Larg_r3:
ldr r3, [lr, #12] @ argv[3]
.Larg_r2:
ldr r2, [lr, #8] @ argv[2]
.Larg_r1:
ldr r1, [lr, #4] @ argv[1]
.Larg_r0:
ldr r0, [lr] @ argv[0]
.Largs_done:
@ With every other register in use, load the function pointer into lr
@ and call the function.
ldr lr, [sp, #28]
blx lr
@ r1-r3 are free for use again. The trampoline only supports
@ single-return functions. Pass r4-r11 to the caller.
ldr r1, [sp, #32]
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Unwind the stack and restore registers.
add sp, sp, #44 @ 44 = 28+16
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
bx lr
.size abi_test_trampoline,.-abi_test_trampoline
.type abi_test_clobber_r0, %function
.globl abi_test_clobber_r0
.hidden abi_test_clobber_r0
.align 4
abi_test_clobber_r0:
mov r0, #0
bx lr
.size abi_test_clobber_r0,.-abi_test_clobber_r0
.type abi_test_clobber_r1, %function
.globl abi_test_clobber_r1
.hidden abi_test_clobber_r1
.align 4
abi_test_clobber_r1:
mov r1, #0
bx lr
.size abi_test_clobber_r1,.-abi_test_clobber_r1
.type abi_test_clobber_r2, %function
.globl abi_test_clobber_r2
.hidden abi_test_clobber_r2
.align 4
abi_test_clobber_r2:
mov r2, #0
bx lr
.size abi_test_clobber_r2,.-abi_test_clobber_r2
.type abi_test_clobber_r3, %function
.globl abi_test_clobber_r3
.hidden abi_test_clobber_r3
.align 4
abi_test_clobber_r3:
mov r3, #0
bx lr
.size abi_test_clobber_r3,.-abi_test_clobber_r3
.type abi_test_clobber_r4, %function
.globl abi_test_clobber_r4
.hidden abi_test_clobber_r4
.align 4
abi_test_clobber_r4:
mov r4, #0
bx lr
.size abi_test_clobber_r4,.-abi_test_clobber_r4
.type abi_test_clobber_r5, %function
.globl abi_test_clobber_r5
.hidden abi_test_clobber_r5
.align 4
abi_test_clobber_r5:
mov r5, #0
bx lr
.size abi_test_clobber_r5,.-abi_test_clobber_r5
.type abi_test_clobber_r6, %function
.globl abi_test_clobber_r6
.hidden abi_test_clobber_r6
.align 4
abi_test_clobber_r6:
mov r6, #0
bx lr
.size abi_test_clobber_r6,.-abi_test_clobber_r6
.type abi_test_clobber_r7, %function
.globl abi_test_clobber_r7
.hidden abi_test_clobber_r7
.align 4
abi_test_clobber_r7:
mov r7, #0
bx lr
.size abi_test_clobber_r7,.-abi_test_clobber_r7
.type abi_test_clobber_r8, %function
.globl abi_test_clobber_r8
.hidden abi_test_clobber_r8
.align 4
abi_test_clobber_r8:
mov r8, #0
bx lr
.size abi_test_clobber_r8,.-abi_test_clobber_r8
.type abi_test_clobber_r9, %function
.globl abi_test_clobber_r9
.hidden abi_test_clobber_r9
.align 4
abi_test_clobber_r9:
mov r9, #0
bx lr
.size abi_test_clobber_r9,.-abi_test_clobber_r9
.type abi_test_clobber_r10, %function
.globl abi_test_clobber_r10
.hidden abi_test_clobber_r10
.align 4
abi_test_clobber_r10:
mov r10, #0
bx lr
.size abi_test_clobber_r10,.-abi_test_clobber_r10
.type abi_test_clobber_r11, %function
.globl abi_test_clobber_r11
.hidden abi_test_clobber_r11
.align 4
abi_test_clobber_r11:
mov r11, #0
bx lr
.size abi_test_clobber_r11,.-abi_test_clobber_r11
.type abi_test_clobber_r12, %function
.globl abi_test_clobber_r12
.hidden abi_test_clobber_r12
.align 4
abi_test_clobber_r12:
mov r12, #0
bx lr
.size abi_test_clobber_r12,.-abi_test_clobber_r12
.type abi_test_clobber_d0, %function
.globl abi_test_clobber_d0
.hidden abi_test_clobber_d0
.align 4
abi_test_clobber_d0:
mov r0, #0
vmov s0, r0
vmov s1, r0
bx lr
.size abi_test_clobber_d0,.-abi_test_clobber_d0
.type abi_test_clobber_d1, %function
.globl abi_test_clobber_d1
.hidden abi_test_clobber_d1
.align 4
abi_test_clobber_d1:
mov r0, #0
vmov s2, r0
vmov s3, r0
bx lr
.size abi_test_clobber_d1,.-abi_test_clobber_d1
.type abi_test_clobber_d2, %function
.globl abi_test_clobber_d2
.hidden abi_test_clobber_d2
.align 4
abi_test_clobber_d2:
mov r0, #0
vmov s4, r0
vmov s5, r0
bx lr
.size abi_test_clobber_d2,.-abi_test_clobber_d2
.type abi_test_clobber_d3, %function
.globl abi_test_clobber_d3
.hidden abi_test_clobber_d3
.align 4
abi_test_clobber_d3:
mov r0, #0
vmov s6, r0
vmov s7, r0
bx lr
.size abi_test_clobber_d3,.-abi_test_clobber_d3
.type abi_test_clobber_d4, %function
.globl abi_test_clobber_d4
.hidden abi_test_clobber_d4
.align 4
abi_test_clobber_d4:
mov r0, #0
vmov s8, r0
vmov s9, r0
bx lr
.size abi_test_clobber_d4,.-abi_test_clobber_d4
.type abi_test_clobber_d5, %function
.globl abi_test_clobber_d5
.hidden abi_test_clobber_d5
.align 4
abi_test_clobber_d5:
mov r0, #0
vmov s10, r0
vmov s11, r0
bx lr
.size abi_test_clobber_d5,.-abi_test_clobber_d5
.type abi_test_clobber_d6, %function
.globl abi_test_clobber_d6
.hidden abi_test_clobber_d6
.align 4
abi_test_clobber_d6:
mov r0, #0
vmov s12, r0
vmov s13, r0
bx lr
.size abi_test_clobber_d6,.-abi_test_clobber_d6
.type abi_test_clobber_d7, %function
.globl abi_test_clobber_d7
.hidden abi_test_clobber_d7
.align 4
abi_test_clobber_d7:
mov r0, #0
vmov s14, r0
vmov s15, r0
bx lr
.size abi_test_clobber_d7,.-abi_test_clobber_d7
.type abi_test_clobber_d8, %function
.globl abi_test_clobber_d8
.hidden abi_test_clobber_d8
.align 4
abi_test_clobber_d8:
mov r0, #0
vmov s16, r0
vmov s17, r0
bx lr
.size abi_test_clobber_d8,.-abi_test_clobber_d8
.type abi_test_clobber_d9, %function
.globl abi_test_clobber_d9
.hidden abi_test_clobber_d9
.align 4
abi_test_clobber_d9:
mov r0, #0
vmov s18, r0
vmov s19, r0
bx lr
.size abi_test_clobber_d9,.-abi_test_clobber_d9
.type abi_test_clobber_d10, %function
.globl abi_test_clobber_d10
.hidden abi_test_clobber_d10
.align 4
abi_test_clobber_d10:
mov r0, #0
vmov s20, r0
vmov s21, r0
bx lr
.size abi_test_clobber_d10,.-abi_test_clobber_d10
.type abi_test_clobber_d11, %function
.globl abi_test_clobber_d11
.hidden abi_test_clobber_d11
.align 4
abi_test_clobber_d11:
mov r0, #0
vmov s22, r0
vmov s23, r0
bx lr
.size abi_test_clobber_d11,.-abi_test_clobber_d11
.type abi_test_clobber_d12, %function
.globl abi_test_clobber_d12
.hidden abi_test_clobber_d12
.align 4
abi_test_clobber_d12:
mov r0, #0
vmov s24, r0
vmov s25, r0
bx lr
.size abi_test_clobber_d12,.-abi_test_clobber_d12
.type abi_test_clobber_d13, %function
.globl abi_test_clobber_d13
.hidden abi_test_clobber_d13
.align 4
abi_test_clobber_d13:
mov r0, #0
vmov s26, r0
vmov s27, r0
bx lr
.size abi_test_clobber_d13,.-abi_test_clobber_d13
.type abi_test_clobber_d14, %function
.globl abi_test_clobber_d14
.hidden abi_test_clobber_d14
.align 4
abi_test_clobber_d14:
mov r0, #0
vmov s28, r0
vmov s29, r0
bx lr
.size abi_test_clobber_d14,.-abi_test_clobber_d14
.type abi_test_clobber_d15, %function
.globl abi_test_clobber_d15
.hidden abi_test_clobber_d15
.align 4
abi_test_clobber_d15:
mov r0, #0
vmov s30, r0
vmov s31, r0
bx lr
.size abi_test_clobber_d15,.-abi_test_clobber_d15
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,587 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if !defined(OPENSSL_NO_ASM) && defined(__powerpc64__)
.machine "any"
.abiversion 2
.text
.globl gcm_init_p8
.type gcm_init_p8,@function
.align 5
gcm_init_p8:
.localentry gcm_init_p8,0
li 0,-4096
li 8,0x10
li 12,-1
li 9,0x20
or 0,0,0
li 10,0x30
.long 0x7D202699
vspltisb 8,-16
vspltisb 5,1
vaddubm 8,8,8
vxor 4,4,4
vor 8,8,5
vsldoi 8,8,4,15
vsldoi 6,4,5,1
vaddubm 8,8,8
vspltisb 7,7
vor 8,8,6
vspltb 6,9,0
vsl 9,9,5
vsrab 6,6,7
vand 6,6,8
vxor 3,9,6
vsldoi 9,3,3,8
vsldoi 8,4,8,8
vsldoi 11,4,9,8
vsldoi 10,9,4,8
.long 0x7D001F99
.long 0x7D681F99
li 8,0x40
.long 0x7D291F99
li 9,0x50
.long 0x7D4A1F99
li 10,0x60
.long 0x10035CC8
.long 0x10234CC8
.long 0x104354C8
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
vxor 16,0,6
vsldoi 17,16,16,8
vsldoi 19,4,17,8
vsldoi 18,17,4,8
.long 0x7E681F99
li 8,0x70
.long 0x7E291F99
li 9,0x80
.long 0x7E4A1F99
li 10,0x90
.long 0x10039CC8
.long 0x11B09CC8
.long 0x10238CC8
.long 0x11D08CC8
.long 0x104394C8
.long 0x11F094C8
.long 0x10E044C8
.long 0x114D44C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vsldoi 11,14,4,8
vsldoi 9,4,14,8
vxor 0,0,5
vxor 2,2,6
vxor 13,13,11
vxor 15,15,9
vsldoi 0,0,0,8
vsldoi 13,13,13,8
vxor 0,0,7
vxor 13,13,10
vsldoi 6,0,0,8
vsldoi 9,13,13,8
.long 0x100044C8
.long 0x11AD44C8
vxor 6,6,2
vxor 9,9,15
vxor 0,0,6
vxor 13,13,9
vsldoi 9,0,0,8
vsldoi 17,13,13,8
vsldoi 11,4,9,8
vsldoi 10,9,4,8
vsldoi 19,4,17,8
vsldoi 18,17,4,8
.long 0x7D681F99
li 8,0xa0
.long 0x7D291F99
li 9,0xb0
.long 0x7D4A1F99
li 10,0xc0
.long 0x7E681F99
.long 0x7E291F99
.long 0x7E4A1F99
or 12,12,12
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size gcm_init_p8,.-gcm_init_p8
.globl gcm_gmult_p8
.type gcm_gmult_p8,@function
.align 5
gcm_gmult_p8:
.localentry gcm_gmult_p8,0
lis 0,0xfff8
li 8,0x10
li 12,-1
li 9,0x20
or 0,0,0
li 10,0x30
.long 0x7C601E99
.long 0x7D682699
lvsl 12,0,0
.long 0x7D292699
vspltisb 5,0x07
.long 0x7D4A2699
vxor 12,12,5
.long 0x7D002699
vperm 3,3,3,12
vxor 4,4,4
.long 0x10035CC8
.long 0x10234CC8
.long 0x104354C8
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
vxor 0,0,6
vperm 0,0,0,12
.long 0x7C001F99
or 12,12,12
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size gcm_gmult_p8,.-gcm_gmult_p8
.globl gcm_ghash_p8
.type gcm_ghash_p8,@function
.align 5
gcm_ghash_p8:
.localentry gcm_ghash_p8,0
li 0,-4096
li 8,0x10
li 12,-1
li 9,0x20
or 0,0,0
li 10,0x30
.long 0x7C001E99
.long 0x7D682699
li 8,0x40
lvsl 12,0,0
.long 0x7D292699
li 9,0x50
vspltisb 5,0x07
.long 0x7D4A2699
li 10,0x60
vxor 12,12,5
.long 0x7D002699
vperm 0,0,0,12
vxor 4,4,4
cmpldi 6,64
bge .Lgcm_ghash_p8_4x
.long 0x7C602E99
addi 5,5,16
subic. 6,6,16
vperm 3,3,3,12
vxor 3,3,0
beq .Lshort
.long 0x7E682699
li 8,16
.long 0x7E292699
add 9,5,6
.long 0x7E4A2699
.align 5
.Loop_2x:
.long 0x7E002E99
vperm 16,16,16,12
subic 6,6,32
.long 0x10039CC8
.long 0x11B05CC8
subfe 0,0,0
.long 0x10238CC8
.long 0x11D04CC8
and 0,0,6
.long 0x104394C8
.long 0x11F054C8
add 5,5,0
vxor 0,0,13
vxor 1,1,14
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 2,2,15
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
.long 0x7C682E99
addi 5,5,32
vsldoi 6,0,0,8
.long 0x100044C8
vperm 3,3,3,12
vxor 6,6,2
vxor 3,3,6
vxor 3,3,0
cmpld 9,5
bgt .Loop_2x
cmplwi 6,0
bne .Leven
.Lshort:
.long 0x10035CC8
.long 0x10234CC8
.long 0x104354C8
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
.Leven:
vxor 0,0,6
vperm 0,0,0,12
.long 0x7C001F99
or 12,12,12
blr
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
.align 5
.gcm_ghash_p8_4x:
.Lgcm_ghash_p8_4x:
stdu 1,-256(1)
li 10,63
li 11,79
stvx 20,10,1
addi 10,10,32
stvx 21,11,1
addi 11,11,32
stvx 22,10,1
addi 10,10,32
stvx 23,11,1
addi 11,11,32
stvx 24,10,1
addi 10,10,32
stvx 25,11,1
addi 11,11,32
stvx 26,10,1
addi 10,10,32
stvx 27,11,1
addi 11,11,32
stvx 28,10,1
addi 10,10,32
stvx 29,11,1
addi 11,11,32
stvx 30,10,1
li 10,0x60
stvx 31,11,1
li 0,-1
stw 12,252(1)
or 0,0,0
lvsl 5,0,8
li 8,0x70
.long 0x7E292699
li 9,0x80
vspltisb 6,8
li 10,0x90
.long 0x7EE82699
li 8,0xa0
.long 0x7F092699
li 9,0xb0
.long 0x7F2A2699
li 10,0xc0
.long 0x7FA82699
li 8,0x10
.long 0x7FC92699
li 9,0x20
.long 0x7FEA2699
li 10,0x30
vsldoi 7,4,6,8
vaddubm 18,5,7
vaddubm 19,6,18
srdi 6,6,4
.long 0x7C602E99
.long 0x7E082E99
subic. 6,6,8
.long 0x7EC92E99
.long 0x7F8A2E99
addi 5,5,0x40
vperm 3,3,3,12
vperm 16,16,16,12
vperm 22,22,22,12
vperm 28,28,28,12
vxor 2,3,0
.long 0x11B0BCC8
.long 0x11D0C4C8
.long 0x11F0CCC8
vperm 11,17,9,18
vperm 5,22,28,19
vperm 10,17,9,19
vperm 6,22,28,18
.long 0x12B68CC8
.long 0x12855CC8
.long 0x137C4CC8
.long 0x134654C8
vxor 21,21,14
vxor 20,20,13
vxor 27,27,21
vxor 26,26,15
blt .Ltail_4x
.Loop_4x:
.long 0x7C602E99
.long 0x7E082E99
subic. 6,6,4
.long 0x7EC92E99
.long 0x7F8A2E99
addi 5,5,0x40
vperm 16,16,16,12
vperm 22,22,22,12
vperm 28,28,28,12
vperm 3,3,3,12
.long 0x1002ECC8
.long 0x1022F4C8
.long 0x1042FCC8
.long 0x11B0BCC8
.long 0x11D0C4C8
.long 0x11F0CCC8
vxor 0,0,20
vxor 1,1,27
vxor 2,2,26
vperm 5,22,28,19
vperm 6,22,28,18
.long 0x10E044C8
.long 0x12855CC8
.long 0x134654C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x12B68CC8
.long 0x137C4CC8
.long 0x100044C8
vxor 20,20,13
vxor 26,26,15
vxor 2,2,3
vxor 21,21,14
vxor 2,2,6
vxor 27,27,21
vxor 2,2,0
bge .Loop_4x
.Ltail_4x:
.long 0x1002ECC8
.long 0x1022F4C8
.long 0x1042FCC8
vxor 0,0,20
vxor 1,1,27
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 2,2,26
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
vxor 0,0,6
addic. 6,6,4
beq .Ldone_4x
.long 0x7C602E99
cmpldi 6,2
li 6,-4
blt .Lone
.long 0x7E082E99
beq .Ltwo
.Lthree:
.long 0x7EC92E99
vperm 3,3,3,12
vperm 16,16,16,12
vperm 22,22,22,12
vxor 2,3,0
vor 29,23,23
vor 30, 24, 24
vor 31,25,25
vperm 5,16,22,19
vperm 6,16,22,18
.long 0x12B08CC8
.long 0x13764CC8
.long 0x12855CC8
.long 0x134654C8
vxor 27,27,21
b .Ltail_4x
.align 4
.Ltwo:
vperm 3,3,3,12
vperm 16,16,16,12
vxor 2,3,0
vperm 5,4,16,19
vperm 6,4,16,18
vsldoi 29,4,17,8
vor 30, 17, 17
vsldoi 31,17,4,8
.long 0x12855CC8
.long 0x13704CC8
.long 0x134654C8
b .Ltail_4x
.align 4
.Lone:
vperm 3,3,3,12
vsldoi 29,4,9,8
vor 30, 9, 9
vsldoi 31,9,4,8
vxor 2,3,0
vxor 20,20,20
vxor 27,27,27
vxor 26,26,26
b .Ltail_4x
.Ldone_4x:
vperm 0,0,0,12
.long 0x7C001F99
li 10,63
li 11,79
or 12,12,12
lvx 20,10,1
addi 10,10,32
lvx 21,11,1
addi 11,11,32
lvx 22,10,1
addi 10,10,32
lvx 23,11,1
addi 11,11,32
lvx 24,10,1
addi 10,10,32
lvx 25,11,1
addi 11,11,32
lvx 26,10,1
addi 10,10,32
lvx 27,11,1
addi 11,11,32
lvx 28,10,1
addi 10,10,32
lvx 29,11,1
addi 11,11,32
lvx 30,10,1
lvx 31,11,1
addi 1,1,256
blr
.long 0
.byte 0,12,0x04,0,0x80,0,4,0
.long 0
.size gcm_ghash_p8,.-gcm_ghash_p8
.byte 71,72,65,83,72,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM && __powerpc64__
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl ChaCha20_ctr32
.hidden ChaCha20_ctr32
@ -966,3 +972,4 @@ ChaCha20_ssse3:
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte 114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.hidden _x86_AES_encrypt_compact
.type _x86_AES_encrypt_compact,@function
@ -979,12 +985,12 @@ _x86_AES_encrypt:
.long 27,54,0,0
.long 0,0,0,0
.size _x86_AES_encrypt,.-_x86_AES_encrypt
.globl asm_AES_encrypt
.hidden asm_AES_encrypt
.type asm_AES_encrypt,@function
.globl aes_nohw_encrypt
.hidden aes_nohw_encrypt
.type aes_nohw_encrypt,@function
.align 16
asm_AES_encrypt:
.L_asm_AES_encrypt_begin:
aes_nohw_encrypt:
.L_aes_nohw_encrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
@ -1044,7 +1050,7 @@ asm_AES_encrypt:
popl %ebx
popl %ebp
ret
.size asm_AES_encrypt,.-.L_asm_AES_encrypt_begin
.size aes_nohw_encrypt,.-.L_aes_nohw_encrypt_begin
.hidden _x86_AES_decrypt_compact
.type _x86_AES_decrypt_compact,@function
.align 16
@ -2175,12 +2181,12 @@ _x86_AES_decrypt:
.byte 23,43,4,126,186,119,214,38
.byte 225,105,20,99,85,33,12,125
.size _x86_AES_decrypt,.-_x86_AES_decrypt
.globl asm_AES_decrypt
.hidden asm_AES_decrypt
.type asm_AES_decrypt,@function
.globl aes_nohw_decrypt
.hidden aes_nohw_decrypt
.type aes_nohw_decrypt,@function
.align 16
asm_AES_decrypt:
.L_asm_AES_decrypt_begin:
aes_nohw_decrypt:
.L_aes_nohw_decrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
@ -2240,13 +2246,13 @@ asm_AES_decrypt:
popl %ebx
popl %ebp
ret
.size asm_AES_decrypt,.-.L_asm_AES_decrypt_begin
.globl asm_AES_cbc_encrypt
.hidden asm_AES_cbc_encrypt
.type asm_AES_cbc_encrypt,@function
.size aes_nohw_decrypt,.-.L_aes_nohw_decrypt_begin
.globl aes_nohw_cbc_encrypt
.hidden aes_nohw_cbc_encrypt
.type aes_nohw_cbc_encrypt,@function
.align 16
asm_AES_cbc_encrypt:
.L_asm_AES_cbc_encrypt_begin:
aes_nohw_cbc_encrypt:
.L_aes_nohw_cbc_encrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
@ -2774,7 +2780,7 @@ asm_AES_cbc_encrypt:
popl %ebx
popl %ebp
ret
.size asm_AES_cbc_encrypt,.-.L_asm_AES_cbc_encrypt_begin
.size aes_nohw_cbc_encrypt,.-.L_aes_nohw_cbc_encrypt_begin
.hidden _x86_AES_set_encrypt_key
.type _x86_AES_set_encrypt_key,@function
.align 16
@ -3006,21 +3012,21 @@ _x86_AES_set_encrypt_key:
popl %ebp
ret
.size _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key
.globl asm_AES_set_encrypt_key
.hidden asm_AES_set_encrypt_key
.type asm_AES_set_encrypt_key,@function
.globl aes_nohw_set_encrypt_key
.hidden aes_nohw_set_encrypt_key
.type aes_nohw_set_encrypt_key,@function
.align 16
asm_AES_set_encrypt_key:
.L_asm_AES_set_encrypt_key_begin:
aes_nohw_set_encrypt_key:
.L_aes_nohw_set_encrypt_key_begin:
call _x86_AES_set_encrypt_key
ret
.size asm_AES_set_encrypt_key,.-.L_asm_AES_set_encrypt_key_begin
.globl asm_AES_set_decrypt_key
.hidden asm_AES_set_decrypt_key
.type asm_AES_set_decrypt_key,@function
.size aes_nohw_set_encrypt_key,.-.L_aes_nohw_set_encrypt_key_begin
.globl aes_nohw_set_decrypt_key
.hidden aes_nohw_set_decrypt_key
.type aes_nohw_set_decrypt_key,@function
.align 16
asm_AES_set_decrypt_key:
.L_asm_AES_set_decrypt_key_begin:
aes_nohw_set_decrypt_key:
.L_aes_nohw_set_decrypt_key_begin:
call _x86_AES_set_encrypt_key
cmpl $0,%eax
je .L054proceed
@ -3249,8 +3255,9 @@ asm_AES_set_decrypt_key:
popl %ebx
popl %ebp
ret
.size asm_AES_set_decrypt_key,.-.L_asm_AES_set_decrypt_key_begin
.size aes_nohw_set_decrypt_key,.-.L_aes_nohw_set_decrypt_key_begin
.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl bn_mul_add_words
.hidden bn_mul_add_words
@ -1535,3 +1541,4 @@ bn_sub_part_words:
ret
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl bn_mul_comba8
.hidden bn_mul_comba8
@ -1257,3 +1263,4 @@ bn_sqr_comba4:
ret
.size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -0,0 +1,294 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl gcm_gmult_ssse3
.hidden gcm_gmult_ssse3
.type gcm_gmult_ssse3,@function
.align 16
gcm_gmult_ssse3:
.L_gcm_gmult_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movdqu (%edi),%xmm0
call .L000pic_point
.L000pic_point:
popl %eax
movdqa .Lreverse_bytes-.L000pic_point(%eax),%xmm7
movdqa .Llow4_mask-.L000pic_point(%eax),%xmm2
.byte 102,15,56,0,199
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
.L001loop_row_1:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L001loop_row_1
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
.L002loop_row_2:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L002loop_row_2
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $6,%eax
.L003loop_row_3:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L003loop_row_3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
.byte 102,15,56,0,215
movdqu %xmm2,(%edi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_gmult_ssse3,.-.L_gcm_gmult_ssse3_begin
.globl gcm_ghash_ssse3
.hidden gcm_ghash_ssse3
.type gcm_ghash_ssse3,@function
.align 16
gcm_ghash_ssse3:
.L_gcm_ghash_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%edx
movl 32(%esp),%ecx
movdqu (%edi),%xmm0
call .L004pic_point
.L004pic_point:
popl %ebx
movdqa .Lreverse_bytes-.L004pic_point(%ebx),%xmm7
andl $-16,%ecx
.byte 102,15,56,0,199
pxor %xmm3,%xmm3
.L005loop_ghash:
movdqa .Llow4_mask-.L004pic_point(%ebx),%xmm2
movdqu (%edx),%xmm1
.byte 102,15,56,0,207
pxor %xmm1,%xmm0
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
movl $5,%eax
.L006loop_row_4:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L006loop_row_4
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
.L007loop_row_5:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L007loop_row_5
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $6,%eax
.L008loop_row_6:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L008loop_row_6
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movdqa %xmm2,%xmm0
leal -256(%esi),%esi
leal 16(%edx),%edx
subl $16,%ecx
jnz .L005loop_ghash
.byte 102,15,56,0,199
movdqu %xmm0,(%edi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_ghash_ssse3,.-.L_gcm_ghash_ssse3_begin
.align 16
.Lreverse_bytes:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.align 16
.Llow4_mask:
.long 252645135,252645135,252645135,252645135
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl gcm_gmult_4bit_mmx
.hidden gcm_gmult_4bit_mmx
@ -1066,3 +1072,4 @@ gcm_ghash_clmul:
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
.byte 0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl md5_block_asm_data_order
.hidden md5_block_asm_data_order
@ -679,3 +685,4 @@ md5_block_asm_data_order:
ret
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl sha1_block_data_order
.hidden sha1_block_data_order
@ -3799,3 +3805,4 @@ _sha1_block_data_order_avx:
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl sha256_block_data_order
.hidden sha256_block_data_order
@ -5558,3 +5564,4 @@ sha256_block_data_order:
ret
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl sha512_block_data_order
.hidden sha512_block_data_order
@ -2828,3 +2834,4 @@ sha512_block_data_order:
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,5 +1,13 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
#ifdef BORINGSSL_DISPATCH_TEST
#endif
.align 64
.L_vpaes_consts:
.long 218628480,235210255,168496130,67568393
@ -477,6 +485,18 @@ vpaes_set_encrypt_key:
pushl %ebx
pushl %esi
pushl %edi
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call .L016pic
.L016pic:
popl %ebx
leal BORINGSSL_function_hit+5-.L016pic(%ebx),%ebx
movl $1,%edx
movb %dl,(%ebx)
popl %edx
popl %ebx
#endif
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%eax
@ -490,9 +510,9 @@ vpaes_set_encrypt_key:
movl %ebx,240(%edx)
movl $48,%ecx
movl $0,%edi
leal .L_vpaes_consts+0x30-.L016pic_point,%ebp
leal .L_vpaes_consts+0x30-.L017pic_point,%ebp
call _vpaes_schedule_core
.L016pic_point:
.L017pic_point:
movl 48(%esp),%esp
xorl %eax,%eax
popl %edi
@ -529,9 +549,9 @@ vpaes_set_decrypt_key:
shrl $1,%ecx
andl $32,%ecx
xorl $32,%ecx
leal .L_vpaes_consts+0x30-.L017pic_point,%ebp
leal .L_vpaes_consts+0x30-.L018pic_point,%ebp
call _vpaes_schedule_core
.L017pic_point:
.L018pic_point:
movl 48(%esp),%esp
xorl %eax,%eax
popl %edi
@ -550,9 +570,21 @@ vpaes_encrypt:
pushl %ebx
pushl %esi
pushl %edi
leal .L_vpaes_consts+0x30-.L018pic_point,%ebp
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call .L019pic
.L019pic:
popl %ebx
leal BORINGSSL_function_hit+4-.L019pic(%ebx),%ebx
movl $1,%edx
movb %dl,(%ebx)
popl %edx
popl %ebx
#endif
leal .L_vpaes_consts+0x30-.L020pic_point,%ebp
call _vpaes_preheat
.L018pic_point:
.L020pic_point:
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%edi
@ -580,9 +612,9 @@ vpaes_decrypt:
pushl %ebx
pushl %esi
pushl %edi
leal .L_vpaes_consts+0x30-.L019pic_point,%ebp
leal .L_vpaes_consts+0x30-.L021pic_point,%ebp
call _vpaes_preheat
.L019pic_point:
.L021pic_point:
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%edi
@ -615,7 +647,7 @@ vpaes_cbc_encrypt:
movl 28(%esp),%eax
movl 32(%esp),%edx
subl $16,%eax
jc .L020cbc_abort
jc .L022cbc_abort
leal -56(%esp),%ebx
movl 36(%esp),%ebp
andl $-16,%ebx
@ -628,14 +660,14 @@ vpaes_cbc_encrypt:
movl %edx,4(%esp)
movl %ebp,8(%esp)
movl %eax,%edi
leal .L_vpaes_consts+0x30-.L021pic_point,%ebp
leal .L_vpaes_consts+0x30-.L023pic_point,%ebp
call _vpaes_preheat
.L021pic_point:
.L023pic_point:
cmpl $0,%ecx
je .L022cbc_dec_loop
jmp .L023cbc_enc_loop
je .L024cbc_dec_loop
jmp .L025cbc_enc_loop
.align 16
.L023cbc_enc_loop:
.L025cbc_enc_loop:
movdqu (%esi),%xmm0
pxor %xmm1,%xmm0
call _vpaes_encrypt_core
@ -645,10 +677,10 @@ vpaes_cbc_encrypt:
movdqu %xmm0,(%ebx,%esi,1)
leal 16(%esi),%esi
subl $16,%edi
jnc .L023cbc_enc_loop
jmp .L024cbc_done
jnc .L025cbc_enc_loop
jmp .L026cbc_done
.align 16
.L022cbc_dec_loop:
.L024cbc_dec_loop:
movdqu (%esi),%xmm0
movdqa %xmm1,16(%esp)
movdqa %xmm0,32(%esp)
@ -660,12 +692,12 @@ vpaes_cbc_encrypt:
movdqu %xmm0,(%ebx,%esi,1)
leal 16(%esi),%esi
subl $16,%edi
jnc .L022cbc_dec_loop
.L024cbc_done:
jnc .L024cbc_dec_loop
.L026cbc_done:
movl 8(%esp),%ebx
movl 48(%esp),%esp
movdqu %xmm1,(%ebx)
.L020cbc_abort:
.L022cbc_abort:
popl %edi
popl %esi
popl %ebx
@ -673,3 +705,4 @@ vpaes_cbc_encrypt:
ret
.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl bn_mul_mont
.hidden bn_mul_mont
@ -446,16 +452,18 @@ bn_mul_mont:
leal 1(%edx),%edx
jge .L017sub
sbbl $0,%eax
andl %eax,%esi
notl %eax
movl %edi,%ebp
andl %eax,%ebp
orl %ebp,%esi
movl $-1,%edx
xorl %eax,%edx
jmp .L018copy
.align 16
.L018copy:
movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4)
movl 32(%esp,%ebx,4),%esi
movl (%edi,%ebx,4),%ebp
movl %ecx,32(%esp,%ebx,4)
andl %eax,%esi
andl %edx,%ebp
orl %esi,%ebp
movl %ebp,(%edi,%ebx,4)
decl %ebx
jge .L018copy
movl 24(%esp),%esp
@ -473,3 +481,4 @@ bn_mul_mont:
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -0,0 +1,206 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl abi_test_trampoline
.hidden abi_test_trampoline
.type abi_test_trampoline,@function
.align 16
abi_test_trampoline:
.L_abi_test_trampoline_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 24(%esp),%ecx
movl (%ecx),%esi
movl 4(%ecx),%edi
movl 8(%ecx),%ebx
movl 12(%ecx),%ebp
subl $44,%esp
movl 72(%esp),%eax
xorl %ecx,%ecx
.L000loop:
cmpl 76(%esp),%ecx
jae .L001loop_done
movl (%eax,%ecx,4),%edx
movl %edx,(%esp,%ecx,4)
addl $1,%ecx
jmp .L000loop
.L001loop_done:
call *64(%esp)
addl $44,%esp
movl 24(%esp),%ecx
movl %esi,(%ecx)
movl %edi,4(%ecx)
movl %ebx,8(%ecx)
movl %ebp,12(%ecx)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size abi_test_trampoline,.-.L_abi_test_trampoline_begin
.globl abi_test_get_and_clear_direction_flag
.hidden abi_test_get_and_clear_direction_flag
.type abi_test_get_and_clear_direction_flag,@function
.align 16
abi_test_get_and_clear_direction_flag:
.L_abi_test_get_and_clear_direction_flag_begin:
pushfl
popl %eax
andl $1024,%eax
shrl $10,%eax
cld
ret
.size abi_test_get_and_clear_direction_flag,.-.L_abi_test_get_and_clear_direction_flag_begin
.globl abi_test_set_direction_flag
.hidden abi_test_set_direction_flag
.type abi_test_set_direction_flag,@function
.align 16
abi_test_set_direction_flag:
.L_abi_test_set_direction_flag_begin:
std
ret
.size abi_test_set_direction_flag,.-.L_abi_test_set_direction_flag_begin
.globl abi_test_clobber_eax
.hidden abi_test_clobber_eax
.type abi_test_clobber_eax,@function
.align 16
abi_test_clobber_eax:
.L_abi_test_clobber_eax_begin:
xorl %eax,%eax
ret
.size abi_test_clobber_eax,.-.L_abi_test_clobber_eax_begin
.globl abi_test_clobber_ebx
.hidden abi_test_clobber_ebx
.type abi_test_clobber_ebx,@function
.align 16
abi_test_clobber_ebx:
.L_abi_test_clobber_ebx_begin:
xorl %ebx,%ebx
ret
.size abi_test_clobber_ebx,.-.L_abi_test_clobber_ebx_begin
.globl abi_test_clobber_ecx
.hidden abi_test_clobber_ecx
.type abi_test_clobber_ecx,@function
.align 16
abi_test_clobber_ecx:
.L_abi_test_clobber_ecx_begin:
xorl %ecx,%ecx
ret
.size abi_test_clobber_ecx,.-.L_abi_test_clobber_ecx_begin
.globl abi_test_clobber_edx
.hidden abi_test_clobber_edx
.type abi_test_clobber_edx,@function
.align 16
abi_test_clobber_edx:
.L_abi_test_clobber_edx_begin:
xorl %edx,%edx
ret
.size abi_test_clobber_edx,.-.L_abi_test_clobber_edx_begin
.globl abi_test_clobber_edi
.hidden abi_test_clobber_edi
.type abi_test_clobber_edi,@function
.align 16
abi_test_clobber_edi:
.L_abi_test_clobber_edi_begin:
xorl %edi,%edi
ret
.size abi_test_clobber_edi,.-.L_abi_test_clobber_edi_begin
.globl abi_test_clobber_esi
.hidden abi_test_clobber_esi
.type abi_test_clobber_esi,@function
.align 16
abi_test_clobber_esi:
.L_abi_test_clobber_esi_begin:
xorl %esi,%esi
ret
.size abi_test_clobber_esi,.-.L_abi_test_clobber_esi_begin
.globl abi_test_clobber_ebp
.hidden abi_test_clobber_ebp
.type abi_test_clobber_ebp,@function
.align 16
abi_test_clobber_ebp:
.L_abi_test_clobber_ebp_begin:
xorl %ebp,%ebp
ret
.size abi_test_clobber_ebp,.-.L_abi_test_clobber_ebp_begin
.globl abi_test_clobber_xmm0
.hidden abi_test_clobber_xmm0
.type abi_test_clobber_xmm0,@function
.align 16
abi_test_clobber_xmm0:
.L_abi_test_clobber_xmm0_begin:
pxor %xmm0,%xmm0
ret
.size abi_test_clobber_xmm0,.-.L_abi_test_clobber_xmm0_begin
.globl abi_test_clobber_xmm1
.hidden abi_test_clobber_xmm1
.type abi_test_clobber_xmm1,@function
.align 16
abi_test_clobber_xmm1:
.L_abi_test_clobber_xmm1_begin:
pxor %xmm1,%xmm1
ret
.size abi_test_clobber_xmm1,.-.L_abi_test_clobber_xmm1_begin
.globl abi_test_clobber_xmm2
.hidden abi_test_clobber_xmm2
.type abi_test_clobber_xmm2,@function
.align 16
abi_test_clobber_xmm2:
.L_abi_test_clobber_xmm2_begin:
pxor %xmm2,%xmm2
ret
.size abi_test_clobber_xmm2,.-.L_abi_test_clobber_xmm2_begin
.globl abi_test_clobber_xmm3
.hidden abi_test_clobber_xmm3
.type abi_test_clobber_xmm3,@function
.align 16
abi_test_clobber_xmm3:
.L_abi_test_clobber_xmm3_begin:
pxor %xmm3,%xmm3
ret
.size abi_test_clobber_xmm3,.-.L_abi_test_clobber_xmm3_begin
.globl abi_test_clobber_xmm4
.hidden abi_test_clobber_xmm4
.type abi_test_clobber_xmm4,@function
.align 16
abi_test_clobber_xmm4:
.L_abi_test_clobber_xmm4_begin:
pxor %xmm4,%xmm4
ret
.size abi_test_clobber_xmm4,.-.L_abi_test_clobber_xmm4_begin
.globl abi_test_clobber_xmm5
.hidden abi_test_clobber_xmm5
.type abi_test_clobber_xmm5,@function
.align 16
abi_test_clobber_xmm5:
.L_abi_test_clobber_xmm5_begin:
pxor %xmm5,%xmm5
ret
.size abi_test_clobber_xmm5,.-.L_abi_test_clobber_xmm5_begin
.globl abi_test_clobber_xmm6
.hidden abi_test_clobber_xmm6
.type abi_test_clobber_xmm6,@function
.align 16
abi_test_clobber_xmm6:
.L_abi_test_clobber_xmm6_begin:
pxor %xmm6,%xmm6
ret
.size abi_test_clobber_xmm6,.-.L_abi_test_clobber_xmm6_begin
.globl abi_test_clobber_xmm7
.hidden abi_test_clobber_xmm7
.type abi_test_clobber_xmm7,@function
.align 16
abi_test_clobber_xmm7:
.L_abi_test_clobber_xmm7_begin:
pxor %xmm7,%xmm7
ret
.size abi_test_clobber_xmm7,.-.L_abi_test_clobber_xmm7_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,16 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.extern OPENSSL_ia32cap_P
@ -38,6 +50,7 @@
.type ChaCha20_ctr32,@function
.align 64
ChaCha20_ctr32:
.cfi_startproc
cmpq $0,%rdx
je .Lno_data
movq OPENSSL_ia32cap_P+4(%rip),%r10
@ -45,12 +58,25 @@ ChaCha20_ctr32:
jnz .LChaCha20_ssse3
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset r15,-56
subq $64+24,%rsp
.cfi_adjust_cfa_offset 88
.Lctr32_body:
@ -291,20 +317,30 @@ ChaCha20_ctr32:
.Ldone:
leaq 64+24+48(%rsp),%rsi
movq -48(%rsi),%r15
.cfi_restore r15
movq -40(%rsi),%r14
.cfi_restore r14
movq -32(%rsi),%r13
.cfi_restore r13
movq -24(%rsi),%r12
.cfi_restore r12
movq -16(%rsi),%rbp
.cfi_restore rbp
movq -8(%rsi),%rbx
.cfi_restore rbx
leaq (%rsi),%rsp
.cfi_adjust_cfa_offset -136
.Lno_data:
.byte 0xf3,0xc3
.cfi_endproc
.size ChaCha20_ctr32,.-ChaCha20_ctr32
.type ChaCha20_ssse3,@function
.align 32
ChaCha20_ssse3:
.LChaCha20_ssse3:
.cfi_startproc
movq %rsp,%r9
.cfi_def_cfa_register r9
cmpq $128,%rdx
ja .LChaCha20_4x
@ -430,14 +466,18 @@ ChaCha20_ssse3:
.Ldone_ssse3:
leaq (%r9),%rsp
.cfi_def_cfa_register rsp
.Lssse3_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size ChaCha20_ssse3,.-ChaCha20_ssse3
.type ChaCha20_4x,@function
.align 32
ChaCha20_4x:
.LChaCha20_4x:
.cfi_startproc
movq %rsp,%r9
.cfi_def_cfa_register r9
movq %r10,%r11
shrq $32,%r10
testq $32,%r10
@ -978,14 +1018,18 @@ ChaCha20_4x:
.Ldone4x:
leaq (%r9),%rsp
.cfi_def_cfa_register rsp
.L4x_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size ChaCha20_4x,.-ChaCha20_4x
.type ChaCha20_8x,@function
.align 32
ChaCha20_8x:
.LChaCha20_8x:
.cfi_startproc
movq %rsp,%r9
.cfi_def_cfa_register r9
subq $0x280+8,%rsp
andq $-32,%rsp
vzeroupper
@ -1580,7 +1624,10 @@ ChaCha20_8x:
.Ldone8x:
vzeroall
leaq (%r9),%rsp
.cfi_def_cfa_register rsp
.L8x_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size ChaCha20_8x,.-ChaCha20_8x
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,16 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.data
.align 16
@ -3064,3 +3076,4 @@ aes256gcmsiv_kdf:
.cfi_endproc
.size aes256gcmsiv_kdf, .-aes256gcmsiv_kdf
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,16 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P
@ -8972,3 +8984,4 @@ seal_avx2_short_tail:
jmp seal_sse_tail_16
.cfi_endproc
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,16 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.type _x86_64_AES_encrypt,@function
.align 16
@ -156,6 +168,7 @@ _x86_64_AES_encrypt:
.type _x86_64_AES_encrypt_compact,@function
.align 16
_x86_64_AES_encrypt_compact:
.cfi_startproc
leaq 128(%r14),%r8
movl 0-128(%r8),%edi
movl 32-128(%r8),%ebp
@ -325,20 +338,29 @@ _x86_64_AES_encrypt_compact:
xorl 8(%r15),%ecx
xorl 12(%r15),%edx
.byte 0xf3,0xc3
.cfi_endproc
.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
.align 16
.globl asm_AES_encrypt
.hidden asm_AES_encrypt
.type asm_AES_encrypt,@function
.hidden asm_AES_encrypt
asm_AES_encrypt:
.globl aes_nohw_encrypt
.hidden aes_nohw_encrypt
.type aes_nohw_encrypt,@function
.hidden aes_nohw_encrypt
aes_nohw_encrypt:
.cfi_startproc
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
leaq -63(%rdx),%rcx
@ -351,6 +373,7 @@ asm_AES_encrypt:
movq %rsi,16(%rsp)
movq %rax,24(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x18,0x06,0x23,0x08
.Lenc_prologue:
movq %rdx,%r15
@ -377,21 +400,30 @@ asm_AES_encrypt:
movq 16(%rsp),%r9
movq 24(%rsp),%rsi
.cfi_def_cfa %rsi,8
movl %eax,0(%r9)
movl %ebx,4(%r9)
movl %ecx,8(%r9)
movl %edx,12(%r9)
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lenc_epilogue:
.byte 0xf3,0xc3
.size asm_AES_encrypt,.-asm_AES_encrypt
.cfi_endproc
.size aes_nohw_encrypt,.-aes_nohw_encrypt
.type _x86_64_AES_decrypt,@function
.align 16
_x86_64_AES_decrypt:
@ -550,6 +582,7 @@ _x86_64_AES_decrypt:
.type _x86_64_AES_decrypt_compact,@function
.align 16
_x86_64_AES_decrypt_compact:
.cfi_startproc
leaq 128(%r14),%r8
movl 0-128(%r8),%edi
movl 32-128(%r8),%ebp
@ -771,20 +804,29 @@ _x86_64_AES_decrypt_compact:
xorl 8(%r15),%ecx
xorl 12(%r15),%edx
.byte 0xf3,0xc3
.cfi_endproc
.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
.align 16
.globl asm_AES_decrypt
.hidden asm_AES_decrypt
.type asm_AES_decrypt,@function
.hidden asm_AES_decrypt
asm_AES_decrypt:
.globl aes_nohw_decrypt
.hidden aes_nohw_decrypt
.type aes_nohw_decrypt,@function
.hidden aes_nohw_decrypt
aes_nohw_decrypt:
.cfi_startproc
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
leaq -63(%rdx),%rcx
@ -797,6 +839,7 @@ asm_AES_decrypt:
movq %rsi,16(%rsp)
movq %rax,24(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x18,0x06,0x23,0x08
.Ldec_prologue:
movq %rdx,%r15
@ -825,47 +868,75 @@ asm_AES_decrypt:
movq 16(%rsp),%r9
movq 24(%rsp),%rsi
.cfi_def_cfa %rsi,8
movl %eax,0(%r9)
movl %ebx,4(%r9)
movl %ecx,8(%r9)
movl %edx,12(%r9)
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Ldec_epilogue:
.byte 0xf3,0xc3
.size asm_AES_decrypt,.-asm_AES_decrypt
.cfi_endproc
.size aes_nohw_decrypt,.-aes_nohw_decrypt
.align 16
.globl asm_AES_set_encrypt_key
.hidden asm_AES_set_encrypt_key
.type asm_AES_set_encrypt_key,@function
asm_AES_set_encrypt_key:
.globl aes_nohw_set_encrypt_key
.hidden aes_nohw_set_encrypt_key
.type aes_nohw_set_encrypt_key,@function
aes_nohw_set_encrypt_key:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $8,%rsp
.cfi_adjust_cfa_offset 8
.Lenc_key_prologue:
call _x86_64_AES_set_encrypt_key
movq 40(%rsp),%rbp
.cfi_restore %rbp
movq 48(%rsp),%rbx
.cfi_restore %rbx
addq $56,%rsp
.cfi_adjust_cfa_offset -56
.Lenc_key_epilogue:
.byte 0xf3,0xc3
.size asm_AES_set_encrypt_key,.-asm_AES_set_encrypt_key
.cfi_endproc
.size aes_nohw_set_encrypt_key,.-aes_nohw_set_encrypt_key
.type _x86_64_AES_set_encrypt_key,@function
.align 16
_x86_64_AES_set_encrypt_key:
.cfi_startproc
movl %esi,%ecx
movq %rdi,%rsi
movq %rdx,%rdi
@ -1101,19 +1172,34 @@ _x86_64_AES_set_encrypt_key:
movq $-1,%rax
.Lexit:
.byte 0xf3,0xc3
.cfi_endproc
.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
.align 16
.globl asm_AES_set_decrypt_key
.hidden asm_AES_set_decrypt_key
.type asm_AES_set_decrypt_key,@function
asm_AES_set_decrypt_key:
.globl aes_nohw_set_decrypt_key
.hidden aes_nohw_set_decrypt_key
.type aes_nohw_set_decrypt_key,@function
aes_nohw_set_decrypt_key:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
pushq %rdx
.cfi_adjust_cfa_offset 8
.Ldec_key_prologue:
call _x86_64_AES_set_encrypt_key
@ -1281,32 +1367,56 @@ asm_AES_set_decrypt_key:
xorq %rax,%rax
.Labort:
movq 8(%rsp),%r15
.cfi_restore %r15
movq 16(%rsp),%r14
.cfi_restore %r14
movq 24(%rsp),%r13
.cfi_restore %r13
movq 32(%rsp),%r12
.cfi_restore %r12
movq 40(%rsp),%rbp
.cfi_restore %rbp
movq 48(%rsp),%rbx
.cfi_restore %rbx
addq $56,%rsp
.cfi_adjust_cfa_offset -56
.Ldec_key_epilogue:
.byte 0xf3,0xc3
.size asm_AES_set_decrypt_key,.-asm_AES_set_decrypt_key
.cfi_endproc
.size aes_nohw_set_decrypt_key,.-aes_nohw_set_decrypt_key
.align 16
.globl asm_AES_cbc_encrypt
.hidden asm_AES_cbc_encrypt
.type asm_AES_cbc_encrypt,@function
.globl aes_nohw_cbc_encrypt
.hidden aes_nohw_cbc_encrypt
.type aes_nohw_cbc_encrypt,@function
.extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P
.hidden asm_AES_cbc_encrypt
asm_AES_cbc_encrypt:
.hidden aes_nohw_cbc_encrypt
aes_nohw_cbc_encrypt:
.cfi_startproc
cmpq $0,%rdx
je .Lcbc_epilogue
pushfq
.cfi_adjust_cfa_offset 8
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-32
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-40
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-48
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-56
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-64
.Lcbc_prologue:
cld
@ -1317,6 +1427,7 @@ asm_AES_cbc_encrypt:
cmpq $0,%r9
cmoveq %r10,%r14
.cfi_remember_state
leaq OPENSSL_ia32cap_P(%rip),%r10
movl (%r10),%r10d
cmpq $512,%rdx
@ -1352,8 +1463,10 @@ asm_AES_cbc_encrypt:
.Lcbc_te_ok:
xchgq %rsp,%r15
.cfi_def_cfa_register %r15
movq %r15,16(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x40
.Lcbc_fast_body:
movq %rdi,24(%rsp)
movq %rsi,32(%rsp)
@ -1551,6 +1664,7 @@ asm_AES_cbc_encrypt:
.align 16
.Lcbc_slow_prologue:
.cfi_restore_state
leaq -88(%rsp),%rbp
andq $-64,%rbp
@ -1562,8 +1676,10 @@ asm_AES_cbc_encrypt:
subq %r10,%rbp
xchgq %rsp,%rbp
.cfi_def_cfa_register %rbp
movq %rbp,16(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x40
.Lcbc_slow_body:
@ -1735,18 +1851,30 @@ asm_AES_cbc_encrypt:
.align 16
.Lcbc_exit:
movq 16(%rsp),%rsi
.cfi_def_cfa %rsi,64
movq (%rsi),%r15
.cfi_restore %r15
movq 8(%rsi),%r14
.cfi_restore %r14
movq 16(%rsi),%r13
.cfi_restore %r13
movq 24(%rsi),%r12
.cfi_restore %r12
movq 32(%rsi),%rbp
.cfi_restore %rbp
movq 40(%rsi),%rbx
.cfi_restore %rbx
leaq 48(%rsi),%rsp
.cfi_def_cfa %rsp,16
.Lcbc_popfq:
popfq
.cfi_adjust_cfa_offset -8
.Lcbc_epilogue:
.byte 0xf3,0xc3
.size asm_AES_cbc_encrypt,.-asm_AES_cbc_encrypt
.cfi_endproc
.size aes_nohw_cbc_encrypt,.-aes_nohw_cbc_encrypt
.align 64
.LAES_Te:
.long 0xa56363c6,0xa56363c6
@ -2534,3 +2662,4 @@ asm_AES_cbc_encrypt:
.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,16 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.type _aesni_ctr32_ghash_6x,@function
@ -544,6 +556,11 @@ _aesni_ctr32_6x:
.align 32
aesni_gcm_encrypt:
.cfi_startproc
#ifdef BORINGSSL_DISPATCH_TEST
.extern BORINGSSL_function_hit
.hidden BORINGSSL_function_hit
movb $1,BORINGSSL_function_hit+2(%rip)
#endif
xorq %r10,%r10
@ -832,3 +849,4 @@ aesni_gcm_encrypt:
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
#endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,427 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.type gcm_gmult_ssse3, @function
.globl gcm_gmult_ssse3
.hidden gcm_gmult_ssse3
.align 16
gcm_gmult_ssse3:
.cfi_startproc
.Lgmult_seh_begin:
movdqu (%rdi),%xmm0
movdqa .Lreverse_bytes(%rip),%xmm10
movdqa .Llow4_mask(%rip),%xmm2
.byte 102,65,15,56,0,194
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
movq $5,%rax
.Loop_row_1:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz .Loop_row_1
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movq $5,%rax
.Loop_row_2:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz .Loop_row_2
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movq $6,%rax
.Loop_row_3:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz .Loop_row_3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
.byte 102,65,15,56,0,210
movdqu %xmm2,(%rdi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
.byte 0xf3,0xc3
.Lgmult_seh_end:
.cfi_endproc
.size gcm_gmult_ssse3,.-gcm_gmult_ssse3
.type gcm_ghash_ssse3, @function
.globl gcm_ghash_ssse3
.hidden gcm_ghash_ssse3
.align 16
gcm_ghash_ssse3:
.Lghash_seh_begin:
.cfi_startproc
movdqu (%rdi),%xmm0
movdqa .Lreverse_bytes(%rip),%xmm10
movdqa .Llow4_mask(%rip),%xmm11
andq $-16,%rcx
.byte 102,65,15,56,0,194
pxor %xmm3,%xmm3
.Loop_ghash:
movdqu (%rdx),%xmm1
.byte 102,65,15,56,0,202
pxor %xmm1,%xmm0
movdqa %xmm11,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm11,%xmm0
pxor %xmm2,%xmm2
movq $5,%rax
.Loop_row_4:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz .Loop_row_4
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movq $5,%rax
.Loop_row_5:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz .Loop_row_5
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movq $6,%rax
.Loop_row_6:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz .Loop_row_6
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movdqa %xmm2,%xmm0
leaq -256(%rsi),%rsi
leaq 16(%rdx),%rdx
subq $16,%rcx
jnz .Loop_ghash
.byte 102,65,15,56,0,194
movdqu %xmm0,(%rdi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
.byte 0xf3,0xc3
.Lghash_seh_end:
.cfi_endproc
.size gcm_ghash_ssse3,.-gcm_ghash_ssse3
.align 16
.Lreverse_bytes:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.Llow4_mask:
.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,16 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P
@ -8,13 +20,27 @@
.type gcm_gmult_4bit,@function
.align 16
gcm_gmult_4bit:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $280,%rsp
.cfi_adjust_cfa_offset 280
.Lgmult_prologue:
movzbq 15(%rdi),%r8
@ -92,23 +118,41 @@ gcm_gmult_4bit:
movq %r9,(%rdi)
leaq 280+48(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lgmult_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_gmult_4bit,.-gcm_gmult_4bit
.globl gcm_ghash_4bit
.hidden gcm_ghash_4bit
.type gcm_ghash_4bit,@function
.align 16
gcm_ghash_4bit:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $280,%rsp
.cfi_adjust_cfa_offset 280
.Lghash_prologue:
movq %rdx,%r14
movq %rcx,%r15
@ -654,21 +698,31 @@ gcm_ghash_4bit:
movq %r9,(%rdi)
leaq 280+48(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq 0(%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lghash_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_ghash_4bit,.-gcm_ghash_4bit
.globl gcm_init_clmul
.hidden gcm_init_clmul
.type gcm_init_clmul,@function
.align 16
gcm_init_clmul:
.cfi_startproc
.L_init_clmul:
movdqu (%rsi),%xmm2
pshufd $78,%xmm2,%xmm2
@ -820,12 +874,14 @@ gcm_init_clmul:
.byte 102,15,58,15,227,8
movdqu %xmm4,80(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_init_clmul,.-gcm_init_clmul
.globl gcm_gmult_clmul
.hidden gcm_gmult_clmul
.type gcm_gmult_clmul,@function
.align 16
gcm_gmult_clmul:
.cfi_startproc
.L_gmult_clmul:
movdqu (%rdi),%xmm0
movdqa .Lbswap_mask(%rip),%xmm5
@ -872,12 +928,14 @@ gcm_gmult_clmul:
.byte 102,15,56,0,197
movdqu %xmm0,(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_gmult_clmul,.-gcm_gmult_clmul
.globl gcm_ghash_clmul
.hidden gcm_ghash_clmul
.type gcm_ghash_clmul,@function
.align 32
gcm_ghash_clmul:
.cfi_startproc
.L_ghash_clmul:
movdqa .Lbswap_mask(%rip),%xmm10
@ -1257,12 +1315,14 @@ gcm_ghash_clmul:
.byte 102,65,15,56,0,194
movdqu %xmm0,(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_ghash_clmul,.-gcm_ghash_clmul
.globl gcm_init_avx
.hidden gcm_init_avx
.type gcm_init_avx,@function
.align 32
gcm_init_avx:
.cfi_startproc
vzeroupper
vmovdqu (%rsi),%xmm2
@ -1365,19 +1425,23 @@ gcm_init_avx:
vzeroupper
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_init_avx,.-gcm_init_avx
.globl gcm_gmult_avx
.hidden gcm_gmult_avx
.type gcm_gmult_avx,@function
.align 32
gcm_gmult_avx:
.cfi_startproc
jmp .L_gmult_clmul
.cfi_endproc
.size gcm_gmult_avx,.-gcm_gmult_avx
.globl gcm_ghash_avx
.hidden gcm_ghash_avx
.type gcm_ghash_avx,@function
.align 32
gcm_ghash_avx:
.cfi_startproc
vzeroupper
vmovdqu (%rdi),%xmm10
@ -1749,6 +1813,7 @@ gcm_ghash_avx:
vmovdqu %xmm10,(%rdi)
vzeroupper
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_ghash_avx,.-gcm_ghash_avx
.align 64
.Lbswap_mask:
@ -1804,3 +1869,4 @@ gcm_ghash_avx:
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,16 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.align 16
@ -6,11 +18,22 @@
.hidden md5_block_asm_data_order
.type md5_block_asm_data_order,@function
md5_block_asm_data_order:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset r12,-32
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset r14,-40
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset r15,-48
.Lprologue:
@ -660,12 +683,20 @@ md5_block_asm_data_order:
movl %edx,12(%rbp)
movq (%rsp),%r15
.cfi_restore r15
movq 8(%rsp),%r14
.cfi_restore r14
movq 16(%rsp),%r12
.cfi_restore r12
movq 24(%rsp),%rbx
.cfi_restore rbx
movq 32(%rsp),%rbp
.cfi_restore rbp
addq $40,%rsp
.cfi_adjust_cfa_offset -40
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size md5_block_asm_data_order,.-md5_block_asm_data_order
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -0,0 +1,343 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.type beeu_mod_inverse_vartime,@function
.hidden beeu_mod_inverse_vartime
.globl beeu_mod_inverse_vartime
.hidden beeu_mod_inverse_vartime
.align 32
beeu_mod_inverse_vartime:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset rbp,-16
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset r12,-24
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset r13,-32
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset r14,-40
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset r15,-48
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset rbx,-56
pushq %rsi
.cfi_adjust_cfa_offset 8
.cfi_offset rsi,-64
subq $80,%rsp
.cfi_adjust_cfa_offset 80
movq %rdi,0(%rsp)
movq $1,%r8
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
xorq %rdi,%rdi
xorq %r12,%r12
xorq %r13,%r13
xorq %r14,%r14
xorq %r15,%r15
xorq %rbp,%rbp
vmovdqu 0(%rsi),%xmm0
vmovdqu 16(%rsi),%xmm1
vmovdqu %xmm0,48(%rsp)
vmovdqu %xmm1,64(%rsp)
vmovdqu 0(%rdx),%xmm0
vmovdqu 16(%rdx),%xmm1
vmovdqu %xmm0,16(%rsp)
vmovdqu %xmm1,32(%rsp)
.Lbeeu_loop:
xorq %rbx,%rbx
orq 48(%rsp),%rbx
orq 56(%rsp),%rbx
orq 64(%rsp),%rbx
orq 72(%rsp),%rbx
jz .Lbeeu_loop_end
movq $1,%rcx
.Lbeeu_shift_loop_XB:
movq %rcx,%rbx
andq 48(%rsp),%rbx
jnz .Lbeeu_shift_loop_end_XB
movq $1,%rbx
andq %r8,%rbx
jz .Lshift1_0
addq 0(%rdx),%r8
adcq 8(%rdx),%r9
adcq 16(%rdx),%r10
adcq 24(%rdx),%r11
adcq $0,%rdi
.Lshift1_0:
shrdq $1,%r9,%r8
shrdq $1,%r10,%r9
shrdq $1,%r11,%r10
shrdq $1,%rdi,%r11
shrq $1,%rdi
shlq $1,%rcx
cmpq $0x8000000,%rcx
jne .Lbeeu_shift_loop_XB
.Lbeeu_shift_loop_end_XB:
bsfq %rcx,%rcx
testq %rcx,%rcx
jz .Lbeeu_no_shift_XB
movq 8+48(%rsp),%rax
movq 16+48(%rsp),%rbx
movq 24+48(%rsp),%rsi
shrdq %cl,%rax,0+48(%rsp)
shrdq %cl,%rbx,8+48(%rsp)
shrdq %cl,%rsi,16+48(%rsp)
shrq %cl,%rsi
movq %rsi,24+48(%rsp)
.Lbeeu_no_shift_XB:
movq $1,%rcx
.Lbeeu_shift_loop_YA:
movq %rcx,%rbx
andq 16(%rsp),%rbx
jnz .Lbeeu_shift_loop_end_YA
movq $1,%rbx
andq %r12,%rbx
jz .Lshift1_1
addq 0(%rdx),%r12
adcq 8(%rdx),%r13
adcq 16(%rdx),%r14
adcq 24(%rdx),%r15
adcq $0,%rbp
.Lshift1_1:
shrdq $1,%r13,%r12
shrdq $1,%r14,%r13
shrdq $1,%r15,%r14
shrdq $1,%rbp,%r15
shrq $1,%rbp
shlq $1,%rcx
cmpq $0x8000000,%rcx
jne .Lbeeu_shift_loop_YA
.Lbeeu_shift_loop_end_YA:
bsfq %rcx,%rcx
testq %rcx,%rcx
jz .Lbeeu_no_shift_YA
movq 8+16(%rsp),%rax
movq 16+16(%rsp),%rbx
movq 24+16(%rsp),%rsi
shrdq %cl,%rax,0+16(%rsp)
shrdq %cl,%rbx,8+16(%rsp)
shrdq %cl,%rsi,16+16(%rsp)
shrq %cl,%rsi
movq %rsi,24+16(%rsp)
.Lbeeu_no_shift_YA:
movq 48(%rsp),%rax
movq 56(%rsp),%rbx
movq 64(%rsp),%rsi
movq 72(%rsp),%rcx
subq 16(%rsp),%rax
sbbq 24(%rsp),%rbx
sbbq 32(%rsp),%rsi
sbbq 40(%rsp),%rcx
jnc .Lbeeu_B_bigger_than_A
movq 16(%rsp),%rax
movq 24(%rsp),%rbx
movq 32(%rsp),%rsi
movq 40(%rsp),%rcx
subq 48(%rsp),%rax
sbbq 56(%rsp),%rbx
sbbq 64(%rsp),%rsi
sbbq 72(%rsp),%rcx
movq %rax,16(%rsp)
movq %rbx,24(%rsp)
movq %rsi,32(%rsp)
movq %rcx,40(%rsp)
addq %r8,%r12
adcq %r9,%r13
adcq %r10,%r14
adcq %r11,%r15
adcq %rdi,%rbp
jmp .Lbeeu_loop
.Lbeeu_B_bigger_than_A:
movq %rax,48(%rsp)
movq %rbx,56(%rsp)
movq %rsi,64(%rsp)
movq %rcx,72(%rsp)
addq %r12,%r8
adcq %r13,%r9
adcq %r14,%r10
adcq %r15,%r11
adcq %rbp,%rdi
jmp .Lbeeu_loop
.Lbeeu_loop_end:
movq 16(%rsp),%rbx
subq $1,%rbx
orq 24(%rsp),%rbx
orq 32(%rsp),%rbx
orq 40(%rsp),%rbx
jnz .Lbeeu_err
movq 0(%rdx),%r8
movq 8(%rdx),%r9
movq 16(%rdx),%r10
movq 24(%rdx),%r11
xorq %rdi,%rdi
.Lbeeu_reduction_loop:
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq %r14,32(%rsp)
movq %r15,40(%rsp)
movq %rbp,48(%rsp)
subq %r8,%r12
sbbq %r9,%r13
sbbq %r10,%r14
sbbq %r11,%r15
sbbq $0,%rbp
cmovcq 16(%rsp),%r12
cmovcq 24(%rsp),%r13
cmovcq 32(%rsp),%r14
cmovcq 40(%rsp),%r15
jnc .Lbeeu_reduction_loop
subq %r12,%r8
sbbq %r13,%r9
sbbq %r14,%r10
sbbq %r15,%r11
.Lbeeu_save:
movq 0(%rsp),%rdi
movq %r8,0(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
movq $1,%rax
jmp .Lbeeu_finish
.Lbeeu_err:
xorq %rax,%rax
.Lbeeu_finish:
addq $80,%rsp
.cfi_adjust_cfa_offset -80
popq %rsi
.cfi_adjust_cfa_offset -8
.cfi_restore rsi
popq %rbx
.cfi_adjust_cfa_offset -8
.cfi_restore rbx
popq %r15
.cfi_adjust_cfa_offset -8
.cfi_restore r15
popq %r14
.cfi_adjust_cfa_offset -8
.cfi_restore r14
popq %r13
.cfi_adjust_cfa_offset -8
.cfi_restore r13
popq %r12
.cfi_adjust_cfa_offset -8
.cfi_restore r12
popq %rbp
.cfi_adjust_cfa_offset -8
.cfi_restore rbp
.byte 0xf3,0xc3
.cfi_endproc
.size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,16 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
@ -9,14 +21,15 @@
.type CRYPTO_rdrand,@function
.align 16
CRYPTO_rdrand:
.cfi_startproc
xorq %rax,%rax
.byte 0x48, 0x0f, 0xc7, 0xf1
.byte 72,15,199,242
adcq %rax,%rax
movq %rcx,0(%rdi)
movq %rdx,0(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size CRYPTO_rdrand,.-CRYPTO_rdrand
@ -27,13 +40,12 @@ CRYPTO_rdrand:
.type CRYPTO_rdrand_multiple8_buf,@function
.align 16
CRYPTO_rdrand_multiple8_buf:
.cfi_startproc
testq %rsi,%rsi
jz .Lout
movq $8,%rdx
.Lloop:
.byte 0x48, 0x0f, 0xc7, 0xf1
.byte 72,15,199,241
jnc .Lerr
movq %rcx,0(%rdi)
addq %rdx,%rdi
@ -45,4 +57,7 @@ CRYPTO_rdrand_multiple8_buf:
.Lerr:
xorq %rax,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size CRYPTO_rdrand_multiple8_buf,.-CRYPTO_rdrand_multiple8_buf
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,16 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl rsaz_1024_sqr_avx2
@ -77,7 +89,7 @@ rsaz_1024_sqr_avx2:
vmovdqu 256-128(%rsi),%ymm8
leaq 192(%rsp),%rbx
vpbroadcastq .Land_mask(%rip),%ymm15
vmovdqu .Land_mask(%rip),%ymm15
jmp .LOOP_GRANDE_SQR_1024
.align 32
@ -829,10 +841,10 @@ rsaz_1024_mul_avx2:
vpmuludq 192-128(%rcx),%ymm11,%ymm12
vpaddq %ymm12,%ymm6,%ymm6
vpmuludq 224-128(%rcx),%ymm11,%ymm13
vpblendd $3,%ymm14,%ymm9,%ymm9
vpblendd $3,%ymm14,%ymm9,%ymm12
vpaddq %ymm13,%ymm7,%ymm7
vpmuludq 256-128(%rcx),%ymm11,%ymm0
vpaddq %ymm9,%ymm3,%ymm3
vpaddq %ymm12,%ymm3,%ymm3
vpaddq %ymm0,%ymm8,%ymm8
movq %rbx,%rax
@ -845,7 +857,9 @@ rsaz_1024_mul_avx2:
vmovdqu -8+64-128(%rsi),%ymm13
movq %r10,%rax
vpblendd $0xfc,%ymm14,%ymm9,%ymm9
imull %r8d,%eax
vpaddq %ymm9,%ymm4,%ymm4
andl $0x1fffffff,%eax
imulq 16-128(%rsi),%rbx
@ -1074,7 +1088,6 @@ rsaz_1024_mul_avx2:
decl %r14d
jnz .Loop_mul_1024
vpermq $0,%ymm15,%ymm15
vpaddq (%rsp),%ymm12,%ymm0
vpsrlq $29,%ymm0,%ymm12
@ -1215,6 +1228,7 @@ rsaz_1024_mul_avx2:
.type rsaz_1024_red2norm_avx2,@function
.align 32
rsaz_1024_red2norm_avx2:
.cfi_startproc
subq $-128,%rsi
xorq %rax,%rax
movq -128(%rsi),%r8
@ -1406,6 +1420,7 @@ rsaz_1024_red2norm_avx2:
movq %rax,120(%rdi)
movq %r11,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2
.globl rsaz_1024_norm2red_avx2
@ -1413,6 +1428,7 @@ rsaz_1024_red2norm_avx2:
.type rsaz_1024_norm2red_avx2,@function
.align 32
rsaz_1024_norm2red_avx2:
.cfi_startproc
subq $-128,%rdi
movq (%rsi),%r8
movl $0x1fffffff,%eax
@ -1565,12 +1581,14 @@ rsaz_1024_norm2red_avx2:
movq %r8,176(%rdi)
movq %r8,184(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2
.globl rsaz_1024_scatter5_avx2
.hidden rsaz_1024_scatter5_avx2
.type rsaz_1024_scatter5_avx2,@function
.align 32
rsaz_1024_scatter5_avx2:
.cfi_startproc
vzeroupper
vmovdqu .Lscatter_permd(%rip),%ymm5
shll $4,%edx
@ -1590,6 +1608,7 @@ rsaz_1024_scatter5_avx2:
vzeroupper
.byte 0xf3,0xc3
.cfi_endproc
.size rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2
.globl rsaz_1024_gather5_avx2
@ -1714,23 +1733,9 @@ rsaz_1024_gather5_avx2:
.cfi_endproc
.LSEH_end_rsaz_1024_gather5:
.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
.extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P
.globl rsaz_avx2_eligible
.hidden rsaz_avx2_eligible
.type rsaz_avx2_eligible,@function
.align 32
rsaz_avx2_eligible:
leaq OPENSSL_ia32cap_P(%rip),%rax
movl 8(%rax),%eax
andl $32,%eax
shrl $5,%eax
.byte 0xf3,0xc3
.size rsaz_avx2_eligible,.-rsaz_avx2_eligible
.align 64
.Land_mask:
.quad 0x1fffffff,0x1fffffff,0x1fffffff,-1
.quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
.Lscatter_permd:
.long 0,2,4,6,7,7,7,7
.Lgather_permd:
@ -1741,3 +1746,4 @@ rsaz_avx2_eligible:
.long 4,4,4,4, 4,4,4,4
.align 64
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,16 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P
@ -8,6 +20,7 @@
.type sha1_block_data_order,@function
.align 16
sha1_block_data_order:
.cfi_startproc
leaq OPENSSL_ia32cap_P(%rip),%r10
movl 0(%r10),%r9d
movl 4(%r10),%r8d
@ -24,17 +37,24 @@ sha1_block_data_order:
.align 16
.Lialu:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
movq %rdi,%r8
subq $72,%rsp
movq %rsi,%r9
andq $-64,%rsp
movq %rdx,%r10
movq %rax,64(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xc0,0x00,0x06,0x23,0x08
.Lprologue:
movl 0(%r8),%esi
@ -1229,25 +1249,40 @@ sha1_block_data_order:
jnz .Lloop
movq 64(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size sha1_block_data_order,.-sha1_block_data_order
.type sha1_block_data_order_ssse3,@function
.align 16
sha1_block_data_order_ssse3:
_ssse3_shortcut:
.cfi_startproc
movq %rsp,%r11
.cfi_def_cfa_register %r11
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
leaq -64(%rsp),%rsp
andq $-64,%rsp
movq %rdi,%r8
@ -2404,24 +2439,38 @@ _ssse3_shortcut:
movl %edx,12(%r8)
movl %ebp,16(%r8)
movq -40(%r11),%r14
.cfi_restore %r14
movq -32(%r11),%r13
.cfi_restore %r13
movq -24(%r11),%r12
.cfi_restore %r12
movq -16(%r11),%rbp
.cfi_restore %rbp
movq -8(%r11),%rbx
.cfi_restore %rbx
leaq (%r11),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_ssse3:
.byte 0xf3,0xc3
.cfi_endproc
.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
.type sha1_block_data_order_avx,@function
.align 16
sha1_block_data_order_avx:
_avx_shortcut:
.cfi_startproc
movq %rsp,%r11
.cfi_def_cfa_register %r11
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
leaq -64(%rsp),%rsp
vzeroupper
andq $-64,%rsp
@ -3518,13 +3567,20 @@ _avx_shortcut:
movl %edx,12(%r8)
movl %ebp,16(%r8)
movq -40(%r11),%r14
.cfi_restore %r14
movq -32(%r11),%r13
.cfi_restore %r13
movq -24(%r11),%r12
.cfi_restore %r12
movq -16(%r11),%rbp
.cfi_restore %rbp
movq -8(%r11),%rbx
.cfi_restore %rbx
leaq (%r11),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx:
.byte 0xf3,0xc3
.cfi_endproc
.size sha1_block_data_order_avx,.-sha1_block_data_order_avx
.align 64
K_XX_XX:
@ -3542,3 +3598,4 @@ K_XX_XX:
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,16 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.extern OPENSSL_ia32cap_P
@ -8,6 +20,7 @@
.type sha256_block_data_order,@function
.align 16
sha256_block_data_order:
.cfi_startproc
leaq OPENSSL_ia32cap_P(%rip),%r11
movl 0(%r11),%r9d
movl 4(%r11),%r10d
@ -20,12 +33,19 @@ sha256_block_data_order:
testl $512,%r10d
jnz .Lssse3_shortcut
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
shlq $4,%rdx
subq $64+32,%rsp
leaq (%rsi,%rdx,4),%rdx
@ -33,7 +53,8 @@ sha256_block_data_order:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
movq %rax,64+24(%rsp)
movq %rax,88(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
.Lprologue:
movl 0(%rdi),%eax
@ -1697,16 +1718,25 @@ sha256_block_data_order:
movl %r11d,28(%rdi)
jb .Lloop
movq 64+24(%rsp),%rsi
movq 88(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size sha256_block_data_order,.-sha256_block_data_order
.align 64
.type K256,@object
@ -1754,14 +1784,22 @@ K256:
.type sha256_block_data_order_ssse3,@function
.align 64
sha256_block_data_order_ssse3:
.cfi_startproc
.Lssse3_shortcut:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
shlq $4,%rdx
subq $96,%rsp
leaq (%rsi,%rdx,4),%rdx
@ -1769,7 +1807,8 @@ sha256_block_data_order_ssse3:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
movq %rax,64+24(%rsp)
movq %rax,88(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
.Lprologue_ssse3:
movl 0(%rdi),%eax
@ -2835,28 +2874,45 @@ sha256_block_data_order_ssse3:
movl %r11d,28(%rdi)
jb .Lloop_ssse3
movq 64+24(%rsp),%rsi
movq 88(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_ssse3:
.byte 0xf3,0xc3
.cfi_endproc
.size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3
.type sha256_block_data_order_avx,@function
.align 64
sha256_block_data_order_avx:
.cfi_startproc
.Lavx_shortcut:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
shlq $4,%rdx
subq $96,%rsp
leaq (%rsi,%rdx,4),%rdx
@ -2864,7 +2920,8 @@ sha256_block_data_order_avx:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
movq %rax,64+24(%rsp)
movq %rax,88(%rsp)
.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
.Lprologue_avx:
vzeroupper
@ -3891,16 +3948,26 @@ sha256_block_data_order_avx:
movl %r11d,28(%rdi)
jb .Lloop_avx
movq 64+24(%rsp),%rsi
movq 88(%rsp),%rsi
.cfi_def_cfa %rsi,8
vzeroupper
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lepilogue_avx:
.byte 0xf3,0xc3
.cfi_endproc
.size sha256_block_data_order_avx,.-sha256_block_data_order_avx
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,16 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
@ -19,6 +31,7 @@
.type _vpaes_encrypt_core,@function
.align 16
_vpaes_encrypt_core:
.cfi_startproc
movq %rdx,%r9
movq $16,%r11
movl 240(%rdx),%eax
@ -99,6 +112,7 @@ _vpaes_encrypt_core:
pxor %xmm4,%xmm0
.byte 102,15,56,0,193
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
@ -106,9 +120,185 @@ _vpaes_encrypt_core:
.type _vpaes_encrypt_core_2x,@function
.align 16
_vpaes_encrypt_core_2x:
.cfi_startproc
movq %rdx,%r9
movq $16,%r11
movl 240(%rdx),%eax
movdqa %xmm9,%xmm1
movdqa %xmm9,%xmm7
movdqa .Lk_ipt(%rip),%xmm2
movdqa %xmm2,%xmm8
pandn %xmm0,%xmm1
pandn %xmm6,%xmm7
movdqu (%r9),%xmm5
psrld $4,%xmm1
psrld $4,%xmm7
pand %xmm9,%xmm0
pand %xmm9,%xmm6
.byte 102,15,56,0,208
.byte 102,68,15,56,0,198
movdqa .Lk_ipt+16(%rip),%xmm0
movdqa %xmm0,%xmm6
.byte 102,15,56,0,193
.byte 102,15,56,0,247
pxor %xmm5,%xmm2
pxor %xmm5,%xmm8
addq $16,%r9
pxor %xmm2,%xmm0
pxor %xmm8,%xmm6
leaq .Lk_mc_backward(%rip),%r10
jmp .Lenc2x_entry
.align 16
.Lenc2x_loop:
movdqa .Lk_sb1(%rip),%xmm4
movdqa .Lk_sb1+16(%rip),%xmm0
movdqa %xmm4,%xmm12
movdqa %xmm0,%xmm6
.byte 102,15,56,0,226
.byte 102,69,15,56,0,224
.byte 102,15,56,0,195
.byte 102,65,15,56,0,243
pxor %xmm5,%xmm4
pxor %xmm5,%xmm12
movdqa .Lk_sb2(%rip),%xmm5
movdqa %xmm5,%xmm13
pxor %xmm4,%xmm0
pxor %xmm12,%xmm6
movdqa -64(%r11,%r10,1),%xmm1
.byte 102,15,56,0,234
.byte 102,69,15,56,0,232
movdqa (%r11,%r10,1),%xmm4
movdqa .Lk_sb2+16(%rip),%xmm2
movdqa %xmm2,%xmm8
.byte 102,15,56,0,211
.byte 102,69,15,56,0,195
movdqa %xmm0,%xmm3
movdqa %xmm6,%xmm11
pxor %xmm5,%xmm2
pxor %xmm13,%xmm8
.byte 102,15,56,0,193
.byte 102,15,56,0,241
addq $16,%r9
pxor %xmm2,%xmm0
pxor %xmm8,%xmm6
.byte 102,15,56,0,220
.byte 102,68,15,56,0,220
addq $16,%r11
pxor %xmm0,%xmm3
pxor %xmm6,%xmm11
.byte 102,15,56,0,193
.byte 102,15,56,0,241
andq $0x30,%r11
subq $1,%rax
pxor %xmm3,%xmm0
pxor %xmm11,%xmm6
.Lenc2x_entry:
movdqa %xmm9,%xmm1
movdqa %xmm9,%xmm7
movdqa .Lk_inv+16(%rip),%xmm5
movdqa %xmm5,%xmm13
pandn %xmm0,%xmm1
pandn %xmm6,%xmm7
psrld $4,%xmm1
psrld $4,%xmm7
pand %xmm9,%xmm0
pand %xmm9,%xmm6
.byte 102,15,56,0,232
.byte 102,68,15,56,0,238
movdqa %xmm10,%xmm3
movdqa %xmm10,%xmm11
pxor %xmm1,%xmm0
pxor %xmm7,%xmm6
.byte 102,15,56,0,217
.byte 102,68,15,56,0,223
movdqa %xmm10,%xmm4
movdqa %xmm10,%xmm12
pxor %xmm5,%xmm3
pxor %xmm13,%xmm11
.byte 102,15,56,0,224
.byte 102,68,15,56,0,230
movdqa %xmm10,%xmm2
movdqa %xmm10,%xmm8
pxor %xmm5,%xmm4
pxor %xmm13,%xmm12
.byte 102,15,56,0,211
.byte 102,69,15,56,0,195
movdqa %xmm10,%xmm3
movdqa %xmm10,%xmm11
pxor %xmm0,%xmm2
pxor %xmm6,%xmm8
.byte 102,15,56,0,220
.byte 102,69,15,56,0,220
movdqu (%r9),%xmm5
pxor %xmm1,%xmm3
pxor %xmm7,%xmm11
jnz .Lenc2x_loop
movdqa -96(%r10),%xmm4
movdqa -80(%r10),%xmm0
movdqa %xmm4,%xmm12
movdqa %xmm0,%xmm6
.byte 102,15,56,0,226
.byte 102,69,15,56,0,224
pxor %xmm5,%xmm4
pxor %xmm5,%xmm12
.byte 102,15,56,0,195
.byte 102,65,15,56,0,243
movdqa 64(%r11,%r10,1),%xmm1
pxor %xmm4,%xmm0
pxor %xmm12,%xmm6
.byte 102,15,56,0,193
.byte 102,15,56,0,241
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_encrypt_core_2x,.-_vpaes_encrypt_core_2x
.type _vpaes_decrypt_core,@function
.align 16
_vpaes_decrypt_core:
.cfi_startproc
movq %rdx,%r9
movl 240(%rdx),%eax
movdqa %xmm9,%xmm1
@ -205,6 +395,7 @@ _vpaes_decrypt_core:
pxor %xmm4,%xmm0
.byte 102,15,56,0,194
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
@ -215,6 +406,7 @@ _vpaes_decrypt_core:
.type _vpaes_schedule_core,@function
.align 16
_vpaes_schedule_core:
.cfi_startproc
@ -381,6 +573,7 @@ _vpaes_schedule_core:
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_schedule_core,.-_vpaes_schedule_core
@ -400,6 +593,7 @@ _vpaes_schedule_core:
.type _vpaes_schedule_192_smear,@function
.align 16
_vpaes_schedule_192_smear:
.cfi_startproc
pshufd $0x80,%xmm6,%xmm1
pshufd $0xFE,%xmm7,%xmm0
pxor %xmm1,%xmm6
@ -408,6 +602,7 @@ _vpaes_schedule_192_smear:
movdqa %xmm6,%xmm0
movhlps %xmm1,%xmm6
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
@ -431,6 +626,7 @@ _vpaes_schedule_192_smear:
.type _vpaes_schedule_round,@function
.align 16
_vpaes_schedule_round:
.cfi_startproc
pxor %xmm1,%xmm1
.byte 102,65,15,58,15,200,15
@ -484,6 +680,7 @@ _vpaes_schedule_low_round:
pxor %xmm7,%xmm0
movdqa %xmm0,%xmm7
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_schedule_round,.-_vpaes_schedule_round
@ -498,6 +695,7 @@ _vpaes_schedule_low_round:
.type _vpaes_schedule_transform,@function
.align 16
_vpaes_schedule_transform:
.cfi_startproc
movdqa %xmm9,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
@ -508,6 +706,7 @@ _vpaes_schedule_transform:
.byte 102,15,56,0,193
pxor %xmm2,%xmm0
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
@ -536,6 +735,7 @@ _vpaes_schedule_transform:
.type _vpaes_schedule_mangle,@function
.align 16
_vpaes_schedule_mangle:
.cfi_startproc
movdqa %xmm0,%xmm4
movdqa .Lk_mc_forward(%rip),%xmm5
testq %rcx,%rcx
@ -600,6 +800,7 @@ _vpaes_schedule_mangle:
andq $0x30,%r8
movdqu %xmm3,(%rdx)
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
@ -610,6 +811,13 @@ _vpaes_schedule_mangle:
.type vpaes_set_encrypt_key,@function
.align 16
vpaes_set_encrypt_key:
.cfi_startproc
#ifdef BORINGSSL_DISPATCH_TEST
.extern BORINGSSL_function_hit
.hidden BORINGSSL_function_hit
movb $1,BORINGSSL_function_hit+5(%rip)
#endif
movl %esi,%eax
shrl $5,%eax
addl $5,%eax
@ -620,6 +828,7 @@ vpaes_set_encrypt_key:
call _vpaes_schedule_core
xorl %eax,%eax
.byte 0xf3,0xc3
.cfi_endproc
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
.globl vpaes_set_decrypt_key
@ -627,6 +836,7 @@ vpaes_set_encrypt_key:
.type vpaes_set_decrypt_key,@function
.align 16
vpaes_set_decrypt_key:
.cfi_startproc
movl %esi,%eax
shrl $5,%eax
addl $5,%eax
@ -642,6 +852,7 @@ vpaes_set_decrypt_key:
call _vpaes_schedule_core
xorl %eax,%eax
.byte 0xf3,0xc3
.cfi_endproc
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
.globl vpaes_encrypt
@ -649,11 +860,18 @@ vpaes_set_decrypt_key:
.type vpaes_encrypt,@function
.align 16
vpaes_encrypt:
.cfi_startproc
#ifdef BORINGSSL_DISPATCH_TEST
.extern BORINGSSL_function_hit
.hidden BORINGSSL_function_hit
movb $1,BORINGSSL_function_hit+4(%rip)
#endif
movdqu (%rdi),%xmm0
call _vpaes_preheat
call _vpaes_encrypt_core
movdqu %xmm0,(%rsi)
.byte 0xf3,0xc3
.cfi_endproc
.size vpaes_encrypt,.-vpaes_encrypt
.globl vpaes_decrypt
@ -661,17 +879,20 @@ vpaes_encrypt:
.type vpaes_decrypt,@function
.align 16
vpaes_decrypt:
.cfi_startproc
movdqu (%rdi),%xmm0
call _vpaes_preheat
call _vpaes_decrypt_core
movdqu %xmm0,(%rsi)
.byte 0xf3,0xc3
.cfi_endproc
.size vpaes_decrypt,.-vpaes_decrypt
.globl vpaes_cbc_encrypt
.hidden vpaes_cbc_encrypt
.type vpaes_cbc_encrypt,@function
.align 16
vpaes_cbc_encrypt:
.cfi_startproc
xchgq %rcx,%rdx
subq $16,%rcx
jc .Lcbc_abort
@ -707,7 +928,71 @@ vpaes_cbc_encrypt:
movdqu %xmm6,(%r8)
.Lcbc_abort:
.byte 0xf3,0xc3
.cfi_endproc
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
.globl vpaes_ctr32_encrypt_blocks
.hidden vpaes_ctr32_encrypt_blocks
.type vpaes_ctr32_encrypt_blocks,@function
.align 16
vpaes_ctr32_encrypt_blocks:
.cfi_startproc
xchgq %rcx,%rdx
testq %rcx,%rcx
jz .Lctr32_abort
movdqu (%r8),%xmm0
movdqa .Lctr_add_one(%rip),%xmm8
subq %rdi,%rsi
call _vpaes_preheat
movdqa %xmm0,%xmm6
pshufb .Lrev_ctr(%rip),%xmm6
testq $1,%rcx
jz .Lctr32_prep_loop
movdqu (%rdi),%xmm7
call _vpaes_encrypt_core
pxor %xmm7,%xmm0
paddd %xmm8,%xmm6
movdqu %xmm0,(%rsi,%rdi,1)
subq $1,%rcx
leaq 16(%rdi),%rdi
jz .Lctr32_done
.Lctr32_prep_loop:
movdqa %xmm6,%xmm14
movdqa %xmm6,%xmm15
paddd %xmm8,%xmm15
.Lctr32_loop:
movdqa .Lrev_ctr(%rip),%xmm1
movdqa %xmm14,%xmm0
movdqa %xmm15,%xmm6
.byte 102,15,56,0,193
.byte 102,15,56,0,241
call _vpaes_encrypt_core_2x
movdqu (%rdi),%xmm1
movdqu 16(%rdi),%xmm2
movdqa .Lctr_add_two(%rip),%xmm3
pxor %xmm1,%xmm0
pxor %xmm2,%xmm6
paddd %xmm3,%xmm14
paddd %xmm3,%xmm15
movdqu %xmm0,(%rsi,%rdi,1)
movdqu %xmm6,16(%rsi,%rdi,1)
subq $2,%rcx
leaq 32(%rdi),%rdi
jnz .Lctr32_loop
.Lctr32_done:
.Lctr32_abort:
.byte 0xf3,0xc3
.cfi_endproc
.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
@ -717,6 +1002,7 @@ vpaes_cbc_encrypt:
.type _vpaes_preheat,@function
.align 16
_vpaes_preheat:
.cfi_startproc
leaq .Lk_s0F(%rip),%r10
movdqa -32(%r10),%xmm10
movdqa -16(%r10),%xmm11
@ -726,6 +1012,7 @@ _vpaes_preheat:
movdqa 80(%r10),%xmm15
movdqa 96(%r10),%xmm14
.byte 0xf3,0xc3
.cfi_endproc
.size _vpaes_preheat,.-_vpaes_preheat
@ -828,7 +1115,19 @@ _vpaes_consts:
.Lk_dsbo:
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
.Lrev_ctr:
.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
.Lctr_add_one:
.quad 0x0000000000000000, 0x0000000100000000
.Lctr_add_two:
.quad 0x0000000000000000, 0x0000000200000000
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
.align 64
.size _vpaes_consts,.-_vpaes_consts
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,4 +1,16 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.extern OPENSSL_ia32cap_P
@ -17,6 +29,8 @@ bn_mul_mont:
jnz .Lmul_enter
cmpl $8,%r9d
jb .Lmul_enter
leaq OPENSSL_ia32cap_P(%rip),%r11
movl 8(%r11),%r11d
cmpq %rsi,%rdx
jne .Lmul4x_enter
testl $7,%r9d
@ -208,31 +222,30 @@ bn_mul_mont:
xorq %r14,%r14
movq (%rsp),%rax
leaq (%rsp),%rsi
movq %r9,%r15
jmp .Lsub
.align 16
.Lsub:
sbbq (%rcx,%r14,8),%rax
.Lsub: sbbq (%rcx,%r14,8),%rax
movq %rax,(%rdi,%r14,8)
movq 8(%rsi,%r14,8),%rax
movq 8(%rsp,%r14,8),%rax
leaq 1(%r14),%r14
decq %r15
jnz .Lsub
sbbq $0,%rax
movq $-1,%rbx
xorq %rax,%rbx
xorq %r14,%r14
andq %rax,%rsi
notq %rax
movq %rdi,%rcx
andq %rax,%rcx
movq %r9,%r15
orq %rcx,%rsi
.align 16
.Lcopy:
movq (%rsi,%r14,8),%rax
movq %r14,(%rsp,%r14,8)
movq %rax,(%rdi,%r14,8)
movq (%rdi,%r14,8),%rcx
movq (%rsp,%r14,8),%rdx
andq %rbx,%rcx
andq %rax,%rdx
movq %r9,(%rsp,%r14,8)
orq %rcx,%rdx
movq %rdx,(%rdi,%r14,8)
leaq 1(%r14),%r14
subq $1,%r15
jnz .Lcopy
@ -266,6 +279,9 @@ bn_mul4x_mont:
movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter:
andl $0x80100,%r11d
cmpl $0x80100,%r11d
je .Lmulx4x_enter
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
@ -603,7 +619,6 @@ bn_mul4x_mont:
movq 16(%rsp,%r9,8),%rdi
leaq -4(%r9),%r15
movq 0(%rsp),%rax
pxor %xmm0,%xmm0
movq 8(%rsp),%rdx
shrq $2,%r15
leaq (%rsp),%rsi
@ -613,8 +628,7 @@ bn_mul4x_mont:
movq 16(%rsi),%rbx
movq 24(%rsi),%rbp
sbbq 8(%rcx),%rdx
jmp .Lsub4x
.align 16
.Lsub4x:
movq %rax,0(%rdi,%r14,8)
movq %rdx,8(%rdi,%r14,8)
@ -641,34 +655,35 @@ bn_mul4x_mont:
sbbq $0,%rax
movq %rbp,24(%rdi,%r14,8)
xorq %r14,%r14
andq %rax,%rsi
notq %rax
movq %rdi,%rcx
andq %rax,%rcx
leaq -4(%r9),%r15
orq %rcx,%rsi
pxor %xmm0,%xmm0
.byte 102,72,15,110,224
pcmpeqd %xmm5,%xmm5
pshufd $0,%xmm4,%xmm4
movq %r9,%r15
pxor %xmm4,%xmm5
shrq $2,%r15
xorl %eax,%eax
movdqu (%rsi),%xmm1
movdqa %xmm0,(%rsp)
movdqu %xmm1,(%rdi)
jmp .Lcopy4x
.align 16
.Lcopy4x:
movdqu 16(%rsi,%r14,1),%xmm2
movdqu 32(%rsi,%r14,1),%xmm1
movdqa %xmm0,16(%rsp,%r14,1)
movdqu %xmm2,16(%rdi,%r14,1)
movdqa %xmm0,32(%rsp,%r14,1)
movdqu %xmm1,32(%rdi,%r14,1)
leaq 32(%r14),%r14
movdqa (%rsp,%rax,1),%xmm1
movdqu (%rdi,%rax,1),%xmm2
pand %xmm4,%xmm1
pand %xmm5,%xmm2
movdqa 16(%rsp,%rax,1),%xmm3
movdqa %xmm0,(%rsp,%rax,1)
por %xmm2,%xmm1
movdqu 16(%rdi,%rax,1),%xmm2
movdqu %xmm1,(%rdi,%rax,1)
pand %xmm4,%xmm3
pand %xmm5,%xmm2
movdqa %xmm0,16(%rsp,%rax,1)
por %xmm2,%xmm3
movdqu %xmm3,16(%rdi,%rax,1)
leaq 32(%rax),%rax
decq %r15
jnz .Lcopy4x
movdqu 16(%rsi,%r14,1),%xmm2
movdqa %xmm0,16(%rsp,%r14,1)
movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi
.cfi_def_cfa %rsi, 8
movq $1,%rax
@ -690,6 +705,8 @@ bn_mul4x_mont:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_mul4x_mont,.-bn_mul4x_mont
.extern bn_sqrx8x_internal
.hidden bn_sqrx8x_internal
.extern bn_sqr8x_internal
.hidden bn_sqr8x_internal
@ -774,6 +791,26 @@ bn_sqr8x_mont:
pxor %xmm0,%xmm0
.byte 102,72,15,110,207
.byte 102,73,15,110,218
leaq OPENSSL_ia32cap_P(%rip),%rax
movl 8(%rax),%eax
andl $0x80100,%eax
cmpl $0x80100,%eax
jne .Lsqr8x_nox
call bn_sqrx8x_internal
leaq (%r8,%rcx,1),%rbx
movq %rcx,%r9
movq %rcx,%rdx
.byte 102,72,15,126,207
sarq $3+2,%rcx
jmp .Lsqr8x_sub
.align 32
.Lsqr8x_nox:
call bn_sqr8x_internal
@ -861,6 +898,363 @@ bn_sqr8x_mont:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_sqr8x_mont,.-bn_sqr8x_mont
.type bn_mulx4x_mont,@function
.align 32
bn_mulx4x_mont:
.cfi_startproc
movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lmulx4x_enter:
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
.Lmulx4x_prologue:
shll $3,%r9d
xorq %r10,%r10
subq %r9,%r10
movq (%r8),%r8
leaq -72(%rsp,%r10,1),%rbp
andq $-128,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmulx4x_page_walk
jmp .Lmulx4x_page_walk_done
.align 16
.Lmulx4x_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmulx4x_page_walk
.Lmulx4x_page_walk_done:
leaq (%rdx,%r9,1),%r10
movq %r9,0(%rsp)
shrq $5,%r9
movq %r10,16(%rsp)
subq $1,%r9
movq %r8,24(%rsp)
movq %rdi,32(%rsp)
movq %rax,40(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
movq %r9,48(%rsp)
jmp .Lmulx4x_body
.align 32
.Lmulx4x_body:
leaq 8(%rdx),%rdi
movq (%rdx),%rdx
leaq 64+32(%rsp),%rbx
movq %rdx,%r9
mulxq 0(%rsi),%r8,%rax
mulxq 8(%rsi),%r11,%r14
addq %rax,%r11
movq %rdi,8(%rsp)
mulxq 16(%rsi),%r12,%r13
adcq %r14,%r12
adcq $0,%r13
movq %r8,%rdi
imulq 24(%rsp),%r8
xorq %rbp,%rbp
mulxq 24(%rsi),%rax,%r14
movq %r8,%rdx
leaq 32(%rsi),%rsi
adcxq %rax,%r13
adcxq %rbp,%r14
mulxq 0(%rcx),%rax,%r10
adcxq %rax,%rdi
adoxq %r11,%r10
mulxq 8(%rcx),%rax,%r11
adcxq %rax,%r10
adoxq %r12,%r11
.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00
movq 48(%rsp),%rdi
movq %r10,-32(%rbx)
adcxq %rax,%r11
adoxq %r13,%r12
mulxq 24(%rcx),%rax,%r15
movq %r9,%rdx
movq %r11,-24(%rbx)
adcxq %rax,%r12
adoxq %rbp,%r15
leaq 32(%rcx),%rcx
movq %r12,-16(%rbx)
jmp .Lmulx4x_1st
.align 32
.Lmulx4x_1st:
adcxq %rbp,%r15
mulxq 0(%rsi),%r10,%rax
adcxq %r14,%r10
mulxq 8(%rsi),%r11,%r14
adcxq %rax,%r11
mulxq 16(%rsi),%r12,%rax
adcxq %r14,%r12
mulxq 24(%rsi),%r13,%r14
.byte 0x67,0x67
movq %r8,%rdx
adcxq %rax,%r13
adcxq %rbp,%r14
leaq 32(%rsi),%rsi
leaq 32(%rbx),%rbx
adoxq %r15,%r10
mulxq 0(%rcx),%rax,%r15
adcxq %rax,%r10
adoxq %r15,%r11
mulxq 8(%rcx),%rax,%r15
adcxq %rax,%r11
adoxq %r15,%r12
mulxq 16(%rcx),%rax,%r15
movq %r10,-40(%rbx)
adcxq %rax,%r12
movq %r11,-32(%rbx)
adoxq %r15,%r13
mulxq 24(%rcx),%rax,%r15
movq %r9,%rdx
movq %r12,-24(%rbx)
adcxq %rax,%r13
adoxq %rbp,%r15
leaq 32(%rcx),%rcx
movq %r13,-16(%rbx)
decq %rdi
jnz .Lmulx4x_1st
movq 0(%rsp),%rax
movq 8(%rsp),%rdi
adcq %rbp,%r15
addq %r15,%r14
sbbq %r15,%r15
movq %r14,-8(%rbx)
jmp .Lmulx4x_outer
.align 32
.Lmulx4x_outer:
movq (%rdi),%rdx
leaq 8(%rdi),%rdi
subq %rax,%rsi
movq %r15,(%rbx)
leaq 64+32(%rsp),%rbx
subq %rax,%rcx
mulxq 0(%rsi),%r8,%r11
xorl %ebp,%ebp
movq %rdx,%r9
mulxq 8(%rsi),%r14,%r12
adoxq -32(%rbx),%r8
adcxq %r14,%r11
mulxq 16(%rsi),%r15,%r13
adoxq -24(%rbx),%r11
adcxq %r15,%r12
adoxq -16(%rbx),%r12
adcxq %rbp,%r13
adoxq %rbp,%r13
movq %rdi,8(%rsp)
movq %r8,%r15
imulq 24(%rsp),%r8
xorl %ebp,%ebp
mulxq 24(%rsi),%rax,%r14
movq %r8,%rdx
adcxq %rax,%r13
adoxq -8(%rbx),%r13
adcxq %rbp,%r14
leaq 32(%rsi),%rsi
adoxq %rbp,%r14
mulxq 0(%rcx),%rax,%r10
adcxq %rax,%r15
adoxq %r11,%r10
mulxq 8(%rcx),%rax,%r11
adcxq %rax,%r10
adoxq %r12,%r11
mulxq 16(%rcx),%rax,%r12
movq %r10,-32(%rbx)
adcxq %rax,%r11
adoxq %r13,%r12
mulxq 24(%rcx),%rax,%r15
movq %r9,%rdx
movq %r11,-24(%rbx)
leaq 32(%rcx),%rcx
adcxq %rax,%r12
adoxq %rbp,%r15
movq 48(%rsp),%rdi
movq %r12,-16(%rbx)
jmp .Lmulx4x_inner
.align 32
.Lmulx4x_inner:
mulxq 0(%rsi),%r10,%rax
adcxq %rbp,%r15
adoxq %r14,%r10
mulxq 8(%rsi),%r11,%r14
adcxq 0(%rbx),%r10
adoxq %rax,%r11
mulxq 16(%rsi),%r12,%rax
adcxq 8(%rbx),%r11
adoxq %r14,%r12
mulxq 24(%rsi),%r13,%r14
movq %r8,%rdx
adcxq 16(%rbx),%r12
adoxq %rax,%r13
adcxq 24(%rbx),%r13
adoxq %rbp,%r14
leaq 32(%rsi),%rsi
leaq 32(%rbx),%rbx
adcxq %rbp,%r14
adoxq %r15,%r10
mulxq 0(%rcx),%rax,%r15
adcxq %rax,%r10
adoxq %r15,%r11
mulxq 8(%rcx),%rax,%r15
adcxq %rax,%r11
adoxq %r15,%r12
mulxq 16(%rcx),%rax,%r15
movq %r10,-40(%rbx)
adcxq %rax,%r12
adoxq %r15,%r13
mulxq 24(%rcx),%rax,%r15
movq %r9,%rdx
movq %r11,-32(%rbx)
movq %r12,-24(%rbx)
adcxq %rax,%r13
adoxq %rbp,%r15
leaq 32(%rcx),%rcx
movq %r13,-16(%rbx)
decq %rdi
jnz .Lmulx4x_inner
movq 0(%rsp),%rax
movq 8(%rsp),%rdi
adcq %rbp,%r15
subq 0(%rbx),%rbp
adcq %r15,%r14
sbbq %r15,%r15
movq %r14,-8(%rbx)
cmpq 16(%rsp),%rdi
jne .Lmulx4x_outer
leaq 64(%rsp),%rbx
subq %rax,%rcx
negq %r15
movq %rax,%rdx
shrq $3+2,%rax
movq 32(%rsp),%rdi
jmp .Lmulx4x_sub
.align 32
.Lmulx4x_sub:
movq 0(%rbx),%r11
movq 8(%rbx),%r12
movq 16(%rbx),%r13
movq 24(%rbx),%r14
leaq 32(%rbx),%rbx
sbbq 0(%rcx),%r11
sbbq 8(%rcx),%r12
sbbq 16(%rcx),%r13
sbbq 24(%rcx),%r14
leaq 32(%rcx),%rcx
movq %r11,0(%rdi)
movq %r12,8(%rdi)
movq %r13,16(%rdi)
movq %r14,24(%rdi)
leaq 32(%rdi),%rdi
decq %rax
jnz .Lmulx4x_sub
sbbq $0,%r15
leaq 64(%rsp),%rbx
subq %rdx,%rdi
.byte 102,73,15,110,207
pxor %xmm0,%xmm0
pshufd $0,%xmm1,%xmm1
movq 40(%rsp),%rsi
.cfi_def_cfa %rsi,8
jmp .Lmulx4x_cond_copy
.align 32
.Lmulx4x_cond_copy:
movdqa 0(%rbx),%xmm2
movdqa 16(%rbx),%xmm3
leaq 32(%rbx),%rbx
movdqu 0(%rdi),%xmm4
movdqu 16(%rdi),%xmm5
leaq 32(%rdi),%rdi
movdqa %xmm0,-32(%rbx)
movdqa %xmm0,-16(%rbx)
pcmpeqd %xmm1,%xmm0
pand %xmm1,%xmm2
pand %xmm1,%xmm3
pand %xmm0,%xmm4
pand %xmm0,%xmm5
pxor %xmm0,%xmm0
por %xmm2,%xmm4
por %xmm3,%xmm5
movdqu %xmm4,-32(%rdi)
movdqu %xmm5,-16(%rdi)
subq $32,%rdx
jnz .Lmulx4x_cond_copy
movq %rdx,(%rbx)
movq $1,%rax
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmulx4x_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size bn_mulx4x_mont,.-bn_mulx4x_mont
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16
#endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,518 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.type abi_test_trampoline, @function
.globl abi_test_trampoline
.hidden abi_test_trampoline
.align 16
abi_test_trampoline:
.Labi_test_trampoline_seh_begin:
.cfi_startproc
subq $120,%rsp
.cfi_adjust_cfa_offset 120
.Labi_test_trampoline_seh_prolog_alloc:
movq %r8,48(%rsp)
movq %rbx,64(%rsp)
.cfi_offset rbx, -64
.Labi_test_trampoline_seh_prolog_rbx:
movq %rbp,72(%rsp)
.cfi_offset rbp, -56
.Labi_test_trampoline_seh_prolog_rbp:
movq %r12,80(%rsp)
.cfi_offset r12, -48
.Labi_test_trampoline_seh_prolog_r12:
movq %r13,88(%rsp)
.cfi_offset r13, -40
.Labi_test_trampoline_seh_prolog_r13:
movq %r14,96(%rsp)
.cfi_offset r14, -32
.Labi_test_trampoline_seh_prolog_r14:
movq %r15,104(%rsp)
.cfi_offset r15, -24
.Labi_test_trampoline_seh_prolog_r15:
.Labi_test_trampoline_seh_prolog_end:
movq 0(%rsi),%rbx
movq 8(%rsi),%rbp
movq 16(%rsi),%r12
movq 24(%rsi),%r13
movq 32(%rsi),%r14
movq 40(%rsi),%r15
movq %rdi,32(%rsp)
movq %rsi,40(%rsp)
movq %rdx,%r10
movq %rcx,%r11
decq %r11
js .Largs_done
movq (%r10),%rdi
addq $8,%r10
decq %r11
js .Largs_done
movq (%r10),%rsi
addq $8,%r10
decq %r11
js .Largs_done
movq (%r10),%rdx
addq $8,%r10
decq %r11
js .Largs_done
movq (%r10),%rcx
addq $8,%r10
decq %r11
js .Largs_done
movq (%r10),%r8
addq $8,%r10
decq %r11
js .Largs_done
movq (%r10),%r9
addq $8,%r10
leaq 0(%rsp),%rax
.Largs_loop:
decq %r11
js .Largs_done
movq %r11,56(%rsp)
movq (%r10),%r11
movq %r11,(%rax)
movq 56(%rsp),%r11
addq $8,%r10
addq $8,%rax
jmp .Largs_loop
.Largs_done:
movq 32(%rsp),%rax
movq 48(%rsp),%r10
testq %r10,%r10
jz .Lno_unwind
pushfq
orq $0x100,0(%rsp)
popfq
nop
.globl abi_test_unwind_start
.hidden abi_test_unwind_start
abi_test_unwind_start:
call *%rax
.globl abi_test_unwind_return
.hidden abi_test_unwind_return
abi_test_unwind_return:
pushfq
andq $-0x101,0(%rsp)
popfq
.globl abi_test_unwind_stop
.hidden abi_test_unwind_stop
abi_test_unwind_stop:
jmp .Lcall_done
.Lno_unwind:
call *%rax
.Lcall_done:
movq 40(%rsp),%rsi
movq %rbx,0(%rsi)
movq %rbp,8(%rsi)
movq %r12,16(%rsi)
movq %r13,24(%rsi)
movq %r14,32(%rsi)
movq %r15,40(%rsi)
movq 64(%rsp),%rbx
.cfi_restore rbx
movq 72(%rsp),%rbp
.cfi_restore rbp
movq 80(%rsp),%r12
.cfi_restore r12
movq 88(%rsp),%r13
.cfi_restore r13
movq 96(%rsp),%r14
.cfi_restore r14
movq 104(%rsp),%r15
.cfi_restore r15
addq $120,%rsp
.cfi_adjust_cfa_offset -120
.byte 0xf3,0xc3
.cfi_endproc
.Labi_test_trampoline_seh_end:
.size abi_test_trampoline,.-abi_test_trampoline
.type abi_test_clobber_rax, @function
.globl abi_test_clobber_rax
.hidden abi_test_clobber_rax
.align 16
abi_test_clobber_rax:
xorq %rax,%rax
.byte 0xf3,0xc3
.size abi_test_clobber_rax,.-abi_test_clobber_rax
.type abi_test_clobber_rbx, @function
.globl abi_test_clobber_rbx
.hidden abi_test_clobber_rbx
.align 16
abi_test_clobber_rbx:
xorq %rbx,%rbx
.byte 0xf3,0xc3
.size abi_test_clobber_rbx,.-abi_test_clobber_rbx
.type abi_test_clobber_rcx, @function
.globl abi_test_clobber_rcx
.hidden abi_test_clobber_rcx
.align 16
abi_test_clobber_rcx:
xorq %rcx,%rcx
.byte 0xf3,0xc3
.size abi_test_clobber_rcx,.-abi_test_clobber_rcx
.type abi_test_clobber_rdx, @function
.globl abi_test_clobber_rdx
.hidden abi_test_clobber_rdx
.align 16
abi_test_clobber_rdx:
xorq %rdx,%rdx
.byte 0xf3,0xc3
.size abi_test_clobber_rdx,.-abi_test_clobber_rdx
.type abi_test_clobber_rdi, @function
.globl abi_test_clobber_rdi
.hidden abi_test_clobber_rdi
.align 16
abi_test_clobber_rdi:
xorq %rdi,%rdi
.byte 0xf3,0xc3
.size abi_test_clobber_rdi,.-abi_test_clobber_rdi
.type abi_test_clobber_rsi, @function
.globl abi_test_clobber_rsi
.hidden abi_test_clobber_rsi
.align 16
abi_test_clobber_rsi:
xorq %rsi,%rsi
.byte 0xf3,0xc3
.size abi_test_clobber_rsi,.-abi_test_clobber_rsi
.type abi_test_clobber_rbp, @function
.globl abi_test_clobber_rbp
.hidden abi_test_clobber_rbp
.align 16
abi_test_clobber_rbp:
xorq %rbp,%rbp
.byte 0xf3,0xc3
.size abi_test_clobber_rbp,.-abi_test_clobber_rbp
.type abi_test_clobber_r8, @function
.globl abi_test_clobber_r8
.hidden abi_test_clobber_r8
.align 16
abi_test_clobber_r8:
xorq %r8,%r8
.byte 0xf3,0xc3
.size abi_test_clobber_r8,.-abi_test_clobber_r8
.type abi_test_clobber_r9, @function
.globl abi_test_clobber_r9
.hidden abi_test_clobber_r9
.align 16
abi_test_clobber_r9:
xorq %r9,%r9
.byte 0xf3,0xc3
.size abi_test_clobber_r9,.-abi_test_clobber_r9
.type abi_test_clobber_r10, @function
.globl abi_test_clobber_r10
.hidden abi_test_clobber_r10
.align 16
abi_test_clobber_r10:
xorq %r10,%r10
.byte 0xf3,0xc3
.size abi_test_clobber_r10,.-abi_test_clobber_r10
.type abi_test_clobber_r11, @function
.globl abi_test_clobber_r11
.hidden abi_test_clobber_r11
.align 16
abi_test_clobber_r11:
xorq %r11,%r11
.byte 0xf3,0xc3
.size abi_test_clobber_r11,.-abi_test_clobber_r11
.type abi_test_clobber_r12, @function
.globl abi_test_clobber_r12
.hidden abi_test_clobber_r12
.align 16
abi_test_clobber_r12:
xorq %r12,%r12
.byte 0xf3,0xc3
.size abi_test_clobber_r12,.-abi_test_clobber_r12
.type abi_test_clobber_r13, @function
.globl abi_test_clobber_r13
.hidden abi_test_clobber_r13
.align 16
abi_test_clobber_r13:
xorq %r13,%r13
.byte 0xf3,0xc3
.size abi_test_clobber_r13,.-abi_test_clobber_r13
.type abi_test_clobber_r14, @function
.globl abi_test_clobber_r14
.hidden abi_test_clobber_r14
.align 16
abi_test_clobber_r14:
xorq %r14,%r14
.byte 0xf3,0xc3
.size abi_test_clobber_r14,.-abi_test_clobber_r14
.type abi_test_clobber_r15, @function
.globl abi_test_clobber_r15
.hidden abi_test_clobber_r15
.align 16
abi_test_clobber_r15:
xorq %r15,%r15
.byte 0xf3,0xc3
.size abi_test_clobber_r15,.-abi_test_clobber_r15
.type abi_test_clobber_xmm0, @function
.globl abi_test_clobber_xmm0
.hidden abi_test_clobber_xmm0
.align 16
abi_test_clobber_xmm0:
pxor %xmm0,%xmm0
.byte 0xf3,0xc3
.size abi_test_clobber_xmm0,.-abi_test_clobber_xmm0
.type abi_test_clobber_xmm1, @function
.globl abi_test_clobber_xmm1
.hidden abi_test_clobber_xmm1
.align 16
abi_test_clobber_xmm1:
pxor %xmm1,%xmm1
.byte 0xf3,0xc3
.size abi_test_clobber_xmm1,.-abi_test_clobber_xmm1
.type abi_test_clobber_xmm2, @function
.globl abi_test_clobber_xmm2
.hidden abi_test_clobber_xmm2
.align 16
abi_test_clobber_xmm2:
pxor %xmm2,%xmm2
.byte 0xf3,0xc3
.size abi_test_clobber_xmm2,.-abi_test_clobber_xmm2
.type abi_test_clobber_xmm3, @function
.globl abi_test_clobber_xmm3
.hidden abi_test_clobber_xmm3
.align 16
abi_test_clobber_xmm3:
pxor %xmm3,%xmm3
.byte 0xf3,0xc3
.size abi_test_clobber_xmm3,.-abi_test_clobber_xmm3
.type abi_test_clobber_xmm4, @function
.globl abi_test_clobber_xmm4
.hidden abi_test_clobber_xmm4
.align 16
abi_test_clobber_xmm4:
pxor %xmm4,%xmm4
.byte 0xf3,0xc3
.size abi_test_clobber_xmm4,.-abi_test_clobber_xmm4
.type abi_test_clobber_xmm5, @function
.globl abi_test_clobber_xmm5
.hidden abi_test_clobber_xmm5
.align 16
abi_test_clobber_xmm5:
pxor %xmm5,%xmm5
.byte 0xf3,0xc3
.size abi_test_clobber_xmm5,.-abi_test_clobber_xmm5
.type abi_test_clobber_xmm6, @function
.globl abi_test_clobber_xmm6
.hidden abi_test_clobber_xmm6
.align 16
abi_test_clobber_xmm6:
pxor %xmm6,%xmm6
.byte 0xf3,0xc3
.size abi_test_clobber_xmm6,.-abi_test_clobber_xmm6
.type abi_test_clobber_xmm7, @function
.globl abi_test_clobber_xmm7
.hidden abi_test_clobber_xmm7
.align 16
abi_test_clobber_xmm7:
pxor %xmm7,%xmm7
.byte 0xf3,0xc3
.size abi_test_clobber_xmm7,.-abi_test_clobber_xmm7
.type abi_test_clobber_xmm8, @function
.globl abi_test_clobber_xmm8
.hidden abi_test_clobber_xmm8
.align 16
abi_test_clobber_xmm8:
pxor %xmm8,%xmm8
.byte 0xf3,0xc3
.size abi_test_clobber_xmm8,.-abi_test_clobber_xmm8
.type abi_test_clobber_xmm9, @function
.globl abi_test_clobber_xmm9
.hidden abi_test_clobber_xmm9
.align 16
abi_test_clobber_xmm9:
pxor %xmm9,%xmm9
.byte 0xf3,0xc3
.size abi_test_clobber_xmm9,.-abi_test_clobber_xmm9
.type abi_test_clobber_xmm10, @function
.globl abi_test_clobber_xmm10
.hidden abi_test_clobber_xmm10
.align 16
abi_test_clobber_xmm10:
pxor %xmm10,%xmm10
.byte 0xf3,0xc3
.size abi_test_clobber_xmm10,.-abi_test_clobber_xmm10
.type abi_test_clobber_xmm11, @function
.globl abi_test_clobber_xmm11
.hidden abi_test_clobber_xmm11
.align 16
abi_test_clobber_xmm11:
pxor %xmm11,%xmm11
.byte 0xf3,0xc3
.size abi_test_clobber_xmm11,.-abi_test_clobber_xmm11
.type abi_test_clobber_xmm12, @function
.globl abi_test_clobber_xmm12
.hidden abi_test_clobber_xmm12
.align 16
abi_test_clobber_xmm12:
pxor %xmm12,%xmm12
.byte 0xf3,0xc3
.size abi_test_clobber_xmm12,.-abi_test_clobber_xmm12
.type abi_test_clobber_xmm13, @function
.globl abi_test_clobber_xmm13
.hidden abi_test_clobber_xmm13
.align 16
abi_test_clobber_xmm13:
pxor %xmm13,%xmm13
.byte 0xf3,0xc3
.size abi_test_clobber_xmm13,.-abi_test_clobber_xmm13
.type abi_test_clobber_xmm14, @function
.globl abi_test_clobber_xmm14
.hidden abi_test_clobber_xmm14
.align 16
abi_test_clobber_xmm14:
pxor %xmm14,%xmm14
.byte 0xf3,0xc3
.size abi_test_clobber_xmm14,.-abi_test_clobber_xmm14
.type abi_test_clobber_xmm15, @function
.globl abi_test_clobber_xmm15
.hidden abi_test_clobber_xmm15
.align 16
abi_test_clobber_xmm15:
pxor %xmm15,%xmm15
.byte 0xf3,0xc3
.size abi_test_clobber_xmm15,.-abi_test_clobber_xmm15
.type abi_test_bad_unwind_wrong_register, @function
.globl abi_test_bad_unwind_wrong_register
.hidden abi_test_bad_unwind_wrong_register
.align 16
abi_test_bad_unwind_wrong_register:
.cfi_startproc
.Labi_test_bad_unwind_wrong_register_seh_begin:
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-16
.Labi_test_bad_unwind_wrong_register_seh_push_r13:
nop
popq %r12
.cfi_adjust_cfa_offset -8
.cfi_restore %r12
.byte 0xf3,0xc3
.Labi_test_bad_unwind_wrong_register_seh_end:
.cfi_endproc
.size abi_test_bad_unwind_wrong_register,.-abi_test_bad_unwind_wrong_register
.type abi_test_bad_unwind_temporary, @function
.globl abi_test_bad_unwind_temporary
.hidden abi_test_bad_unwind_temporary
.align 16
abi_test_bad_unwind_temporary:
.cfi_startproc
.Labi_test_bad_unwind_temporary_seh_begin:
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-16
.Labi_test_bad_unwind_temporary_seh_push_r12:
movq %r12,%rax
incq %rax
movq %rax,(%rsp)
movq %r12,(%rsp)
popq %r12
.cfi_adjust_cfa_offset -8
.cfi_restore %r12
.byte 0xf3,0xc3
.Labi_test_bad_unwind_temporary_seh_end:
.cfi_endproc
.size abi_test_bad_unwind_temporary,.-abi_test_bad_unwind_temporary
.type abi_test_set_direction_flag, @function
.globl abi_test_get_and_clear_direction_flag
.hidden abi_test_get_and_clear_direction_flag
abi_test_get_and_clear_direction_flag:
pushfq
popq %rax
andq $0x400,%rax
shrq $10,%rax
cld
.byte 0xf3,0xc3
.size abi_test_get_and_clear_direction_flag,.-abi_test_get_and_clear_direction_flag
.type abi_test_set_direction_flag, @function
.globl abi_test_set_direction_flag
.hidden abi_test_set_direction_flag
abi_test_set_direction_flag:
std
.byte 0xf3,0xc3
.size abi_test_set_direction_flag,.-abi_test_set_direction_flag
#endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl _ChaCha20_ctr32
.private_extern _ChaCha20_ctr32

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.private_extern __x86_AES_encrypt_compact
.align 4
@ -961,11 +967,11 @@ LAES_Te:
.long 16,32,64,128
.long 27,54,0,0
.long 0,0,0,0
.globl _asm_AES_encrypt
.private_extern _asm_AES_encrypt
.globl _aes_nohw_encrypt
.private_extern _aes_nohw_encrypt
.align 4
_asm_AES_encrypt:
L_asm_AES_encrypt_begin:
_aes_nohw_encrypt:
L_aes_nohw_encrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
@ -2145,11 +2151,11 @@ LAES_Td:
.byte 200,235,187,60,131,83,153,97
.byte 23,43,4,126,186,119,214,38
.byte 225,105,20,99,85,33,12,125
.globl _asm_AES_decrypt
.private_extern _asm_AES_decrypt
.globl _aes_nohw_decrypt
.private_extern _aes_nohw_decrypt
.align 4
_asm_AES_decrypt:
L_asm_AES_decrypt_begin:
_aes_nohw_decrypt:
L_aes_nohw_decrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
@ -2209,11 +2215,11 @@ L011x86:
popl %ebx
popl %ebp
ret
.globl _asm_AES_cbc_encrypt
.private_extern _asm_AES_cbc_encrypt
.globl _aes_nohw_cbc_encrypt
.private_extern _aes_nohw_cbc_encrypt
.align 4
_asm_AES_cbc_encrypt:
L_asm_AES_cbc_encrypt_begin:
_aes_nohw_cbc_encrypt:
L_aes_nohw_cbc_encrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
@ -2970,18 +2976,18 @@ L045exit:
popl %ebx
popl %ebp
ret
.globl _asm_AES_set_encrypt_key
.private_extern _asm_AES_set_encrypt_key
.globl _aes_nohw_set_encrypt_key
.private_extern _aes_nohw_set_encrypt_key
.align 4
_asm_AES_set_encrypt_key:
L_asm_AES_set_encrypt_key_begin:
_aes_nohw_set_encrypt_key:
L_aes_nohw_set_encrypt_key_begin:
call __x86_AES_set_encrypt_key
ret
.globl _asm_AES_set_decrypt_key
.private_extern _asm_AES_set_decrypt_key
.globl _aes_nohw_set_decrypt_key
.private_extern _aes_nohw_set_decrypt_key
.align 4
_asm_AES_set_decrypt_key:
L_asm_AES_set_decrypt_key_begin:
_aes_nohw_set_decrypt_key:
L_aes_nohw_set_decrypt_key_begin:
call __x86_AES_set_encrypt_key
cmpl $0,%eax
je L054proceed

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl _bn_mul_add_words
.private_extern _bn_mul_add_words

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl _bn_mul_comba8
.private_extern _bn_mul_comba8

View File

@ -0,0 +1,289 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl _gcm_gmult_ssse3
.private_extern _gcm_gmult_ssse3
.align 4
_gcm_gmult_ssse3:
L_gcm_gmult_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movdqu (%edi),%xmm0
call L000pic_point
L000pic_point:
popl %eax
movdqa Lreverse_bytes-L000pic_point(%eax),%xmm7
movdqa Llow4_mask-L000pic_point(%eax),%xmm2
.byte 102,15,56,0,199
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
L001loop_row_1:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz L001loop_row_1
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
L002loop_row_2:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz L002loop_row_2
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $6,%eax
L003loop_row_3:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz L003loop_row_3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
.byte 102,15,56,0,215
movdqu %xmm2,(%edi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _gcm_ghash_ssse3
.private_extern _gcm_ghash_ssse3
.align 4
_gcm_ghash_ssse3:
L_gcm_ghash_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%edx
movl 32(%esp),%ecx
movdqu (%edi),%xmm0
call L004pic_point
L004pic_point:
popl %ebx
movdqa Lreverse_bytes-L004pic_point(%ebx),%xmm7
andl $-16,%ecx
.byte 102,15,56,0,199
pxor %xmm3,%xmm3
L005loop_ghash:
movdqa Llow4_mask-L004pic_point(%ebx),%xmm2
movdqu (%edx),%xmm1
.byte 102,15,56,0,207
pxor %xmm1,%xmm0
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
movl $5,%eax
L006loop_row_4:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz L006loop_row_4
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
L007loop_row_5:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz L007loop_row_5
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $6,%eax
L008loop_row_6:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz L008loop_row_6
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movdqa %xmm2,%xmm0
leal -256(%esi),%esi
leal 16(%edx),%edx
subl $16,%ecx
jnz L005loop_ghash
.byte 102,15,56,0,199
movdqu %xmm0,(%edi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.align 4,0x90
Lreverse_bytes:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.align 4,0x90
Llow4_mask:
.long 252645135,252645135,252645135,252645135
#endif

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl _gcm_gmult_4bit_mmx
.private_extern _gcm_gmult_4bit_mmx

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl _md5_block_asm_data_order
.private_extern _md5_block_asm_data_order

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl _sha1_block_data_order
.private_extern _sha1_block_data_order

View File

@ -1,4 +1,10 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl _sha256_block_data_order
.private_extern _sha256_block_data_order

Some files were not shown because too many files have changed in this diff Show More