Roll boringssl/src fc9c67599..76918d016
Also roll_boringssl.py script is also adjusted to work with latest boringssl. Disable ASM compilation on Windows x64 which fails to compile. It also means there are no HW AES on Windows x64. Issue #198. Change-Id: Ib7e8ff506f014c8c733f1882eeeddbe34fa28511
This commit is contained in:
parent
0342adb132
commit
c80f053ba2
2
DEPS
2
DEPS
|
@ -43,7 +43,7 @@ deps = {
|
|||
# Make sure the version matches the one in
|
||||
# src/packager/third_party/boringssl, which contains perl generated files.
|
||||
"src/packager/third_party/boringssl/src":
|
||||
Var("github") + "/google/boringssl@fc9c67599d9bdeb2e0467085133b81a8e28f77a4",
|
||||
Var("github") + "/google/boringssl@76918d016414bf1d71a86d28239566fbcf8aacf0",
|
||||
|
||||
"src/packager/third_party/curl/source":
|
||||
Var("github") + "/curl/curl@62c07b5743490ce373910f469abc8cdc759bec2b", #7.57.0
|
||||
|
|
|
@ -57,16 +57,18 @@ crypto_sources = [
|
|||
"src/crypto/bytestring/cbb.c",
|
||||
"src/crypto/bytestring/cbs.c",
|
||||
"src/crypto/bytestring/internal.h",
|
||||
"src/crypto/bytestring/unicode.c",
|
||||
"src/crypto/chacha/chacha.c",
|
||||
"src/crypto/chacha/internal.h",
|
||||
"src/crypto/cipher_extra/cipher_extra.c",
|
||||
"src/crypto/cipher_extra/derive_key.c",
|
||||
"src/crypto/cipher_extra/e_aesccm.c",
|
||||
"src/crypto/cipher_extra/e_aesctrhmac.c",
|
||||
"src/crypto/cipher_extra/e_aesgcmsiv.c",
|
||||
"src/crypto/cipher_extra/e_chacha20poly1305.c",
|
||||
"src/crypto/cipher_extra/e_null.c",
|
||||
"src/crypto/cipher_extra/e_rc2.c",
|
||||
"src/crypto/cipher_extra/e_rc4.c",
|
||||
"src/crypto/cipher_extra/e_ssl3.c",
|
||||
"src/crypto/cipher_extra/e_tls.c",
|
||||
"src/crypto/cipher_extra/internal.h",
|
||||
"src/crypto/cipher_extra/tls_cbc.c",
|
||||
|
@ -74,14 +76,15 @@ crypto_sources = [
|
|||
"src/crypto/conf/conf.c",
|
||||
"src/crypto/conf/conf_def.h",
|
||||
"src/crypto/conf/internal.h",
|
||||
"src/crypto/cpu-aarch64-fuchsia.c",
|
||||
"src/crypto/cpu-aarch64-linux.c",
|
||||
"src/crypto/cpu-arm-linux.c",
|
||||
"src/crypto/cpu-arm-linux.h",
|
||||
"src/crypto/cpu-arm.c",
|
||||
"src/crypto/cpu-intel.c",
|
||||
"src/crypto/cpu-ppc64le.c",
|
||||
"src/crypto/crypto.c",
|
||||
"src/crypto/curve25519/spake25519.c",
|
||||
"src/crypto/curve25519/x25519-x86_64.c",
|
||||
"src/crypto/dh/check.c",
|
||||
"src/crypto/dh/dh.c",
|
||||
"src/crypto/dh/dh_asn1.c",
|
||||
|
@ -90,7 +93,8 @@ crypto_sources = [
|
|||
"src/crypto/dsa/dsa.c",
|
||||
"src/crypto/dsa/dsa_asn1.c",
|
||||
"src/crypto/ec_extra/ec_asn1.c",
|
||||
"src/crypto/ecdh/ecdh.c",
|
||||
"src/crypto/ec_extra/ec_derive.c",
|
||||
"src/crypto/ecdh_extra/ecdh_extra.c",
|
||||
"src/crypto/ecdsa_extra/ecdsa_asn1.c",
|
||||
"src/crypto/engine/engine.c",
|
||||
"src/crypto/err/err.c",
|
||||
|
@ -107,6 +111,8 @@ crypto_sources = [
|
|||
"src/crypto/evp/p_ed25519_asn1.c",
|
||||
"src/crypto/evp/p_rsa.c",
|
||||
"src/crypto/evp/p_rsa_asn1.c",
|
||||
"src/crypto/evp/p_x25519.c",
|
||||
"src/crypto/evp/p_x25519_asn1.c",
|
||||
"src/crypto/evp/pbkdf.c",
|
||||
"src/crypto/evp/print.c",
|
||||
"src/crypto/evp/scrypt.c",
|
||||
|
@ -124,11 +130,17 @@ crypto_sources = [
|
|||
"src/crypto/fipsmodule/ec/internal.h",
|
||||
"src/crypto/fipsmodule/ec/p256-x86_64-table.h",
|
||||
"src/crypto/fipsmodule/ec/p256-x86_64.h",
|
||||
"src/crypto/fipsmodule/fips_shared_support.c",
|
||||
"src/crypto/fipsmodule/is_fips.c",
|
||||
"src/crypto/fipsmodule/md5/internal.h",
|
||||
"src/crypto/fipsmodule/modes/internal.h",
|
||||
"src/crypto/fipsmodule/rand/internal.h",
|
||||
"src/crypto/fipsmodule/rsa/internal.h",
|
||||
"src/crypto/fipsmodule/sha/internal.h",
|
||||
"src/crypto/fipsmodule/tls/internal.h",
|
||||
"src/crypto/hkdf/hkdf.c",
|
||||
"src/crypto/hrss/hrss.c",
|
||||
"src/crypto/hrss/internal.h",
|
||||
"src/crypto/internal.h",
|
||||
"src/crypto/lhash/lhash.c",
|
||||
"src/crypto/mem.c",
|
||||
|
@ -165,6 +177,8 @@ crypto_sources = [
|
|||
"src/crypto/refcount_c11.c",
|
||||
"src/crypto/refcount_lock.c",
|
||||
"src/crypto/rsa_extra/rsa_asn1.c",
|
||||
"src/crypto/rsa_extra/rsa_print.c",
|
||||
"src/crypto/siphash/siphash.c",
|
||||
"src/crypto/stack/stack.c",
|
||||
"src/crypto/thread.c",
|
||||
"src/crypto/thread_none.c",
|
||||
|
@ -223,6 +237,7 @@ crypto_sources = [
|
|||
"src/crypto/x509/x_x509.c",
|
||||
"src/crypto/x509/x_x509a.c",
|
||||
"src/crypto/x509v3/ext_dat.h",
|
||||
"src/crypto/x509v3/internal.h",
|
||||
"src/crypto/x509v3/pcy_cache.c",
|
||||
"src/crypto/x509v3/pcy_data.c",
|
||||
"src/crypto/x509v3/pcy_int.h",
|
||||
|
@ -246,6 +261,7 @@ crypto_sources = [
|
|||
"src/crypto/x509v3/v3_int.c",
|
||||
"src/crypto/x509v3/v3_lib.c",
|
||||
"src/crypto/x509v3/v3_ncons.c",
|
||||
"src/crypto/x509v3/v3_ocsp.c",
|
||||
"src/crypto/x509v3/v3_pci.c",
|
||||
"src/crypto/x509v3/v3_pcia.c",
|
||||
"src/crypto/x509v3/v3_pcons.c",
|
||||
|
@ -256,6 +272,25 @@ crypto_sources = [
|
|||
"src/crypto/x509v3/v3_skey.c",
|
||||
"src/crypto/x509v3/v3_sxnet.c",
|
||||
"src/crypto/x509v3/v3_utl.c",
|
||||
"src/third_party/fiat/curve25519.c",
|
||||
"src/third_party/fiat/curve25519_32.h",
|
||||
"src/third_party/fiat/curve25519_64.h",
|
||||
"src/third_party/fiat/curve25519_tables.h",
|
||||
"src/third_party/fiat/internal.h",
|
||||
"src/third_party/fiat/p256_32.h",
|
||||
"src/third_party/fiat/p256_64.h",
|
||||
"src/third_party/sike/asm/fp_generic.c",
|
||||
"src/third_party/sike/curve_params.c",
|
||||
"src/third_party/sike/fpx.c",
|
||||
"src/third_party/sike/fpx.h",
|
||||
"src/third_party/sike/isogeny.c",
|
||||
"src/third_party/sike/isogeny.h",
|
||||
"src/third_party/sike/sike.c",
|
||||
"src/third_party/sike/sike.h",
|
||||
"src/third_party/sike/utils.h",
|
||||
]
|
||||
|
||||
crypto_headers = [
|
||||
"src/include/openssl/aead.h",
|
||||
"src/include/openssl/aes.h",
|
||||
"src/include/openssl/arm_arch.h",
|
||||
|
@ -282,6 +317,7 @@ crypto_sources = [
|
|||
"src/include/openssl/dh.h",
|
||||
"src/include/openssl/digest.h",
|
||||
"src/include/openssl/dsa.h",
|
||||
"src/include/openssl/e_os2.h",
|
||||
"src/include/openssl/ec.h",
|
||||
"src/include/openssl/ec_key.h",
|
||||
"src/include/openssl/ecdh.h",
|
||||
|
@ -292,9 +328,9 @@ crypto_sources = [
|
|||
"src/include/openssl/ex_data.h",
|
||||
"src/include/openssl/hkdf.h",
|
||||
"src/include/openssl/hmac.h",
|
||||
"src/include/openssl/hrss.h",
|
||||
"src/include/openssl/is_boringssl.h",
|
||||
"src/include/openssl/lhash.h",
|
||||
"src/include/openssl/lhash_macros.h",
|
||||
"src/include/openssl/md4.h",
|
||||
"src/include/openssl/md5.h",
|
||||
"src/include/openssl/mem.h",
|
||||
|
@ -317,31 +353,25 @@ crypto_sources = [
|
|||
"src/include/openssl/rsa.h",
|
||||
"src/include/openssl/safestack.h",
|
||||
"src/include/openssl/sha.h",
|
||||
"src/include/openssl/siphash.h",
|
||||
"src/include/openssl/span.h",
|
||||
"src/include/openssl/srtp.h",
|
||||
"src/include/openssl/stack.h",
|
||||
"src/include/openssl/thread.h",
|
||||
"src/include/openssl/type_check.h",
|
||||
"src/include/openssl/x509.h",
|
||||
"src/include/openssl/x509_vfy.h",
|
||||
"src/include/openssl/x509v3.h",
|
||||
"src/third_party/fiat/curve25519.c",
|
||||
"src/third_party/fiat/internal.h",
|
||||
]
|
||||
|
||||
ssl_sources = [
|
||||
"src/include/openssl/dtls1.h",
|
||||
"src/include/openssl/ssl.h",
|
||||
"src/include/openssl/ssl3.h",
|
||||
"src/include/openssl/tls1.h",
|
||||
"src/ssl/bio_ssl.cc",
|
||||
"src/ssl/custom_extensions.cc",
|
||||
"src/ssl/d1_both.cc",
|
||||
"src/ssl/d1_lib.cc",
|
||||
"src/ssl/d1_pkt.cc",
|
||||
"src/ssl/d1_srtp.cc",
|
||||
"src/ssl/dtls_method.cc",
|
||||
"src/ssl/dtls_record.cc",
|
||||
"src/ssl/handoff.cc",
|
||||
"src/ssl/handshake.cc",
|
||||
"src/ssl/handshake_client.cc",
|
||||
"src/ssl/handshake_server.cc",
|
||||
|
@ -373,14 +403,26 @@ ssl_sources = [
|
|||
"src/ssl/tls_record.cc",
|
||||
]
|
||||
|
||||
ssl_headers = [
|
||||
"src/include/openssl/dtls1.h",
|
||||
"src/include/openssl/srtp.h",
|
||||
"src/include/openssl/ssl.h",
|
||||
"src/include/openssl/ssl3.h",
|
||||
"src/include/openssl/tls1.h",
|
||||
]
|
||||
|
||||
crypto_sources_ios_aarch64 = [
|
||||
"ios-aarch64/crypto/chacha/chacha-armv8.S",
|
||||
"ios-aarch64/crypto/fipsmodule/aesv8-armx64.S",
|
||||
"ios-aarch64/crypto/fipsmodule/armv8-mont.S",
|
||||
"ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S",
|
||||
"ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S",
|
||||
"ios-aarch64/crypto/fipsmodule/sha1-armv8.S",
|
||||
"ios-aarch64/crypto/fipsmodule/sha256-armv8.S",
|
||||
"ios-aarch64/crypto/fipsmodule/sha512-armv8.S",
|
||||
"ios-aarch64/crypto/fipsmodule/vpaes-armv8.S",
|
||||
"ios-aarch64/crypto/test/trampoline-armv8.S",
|
||||
"ios-aarch64/crypto/third_party/sike/asm/fp-armv8.S",
|
||||
]
|
||||
|
||||
crypto_sources_ios_arm = [
|
||||
|
@ -394,16 +436,22 @@ crypto_sources_ios_arm = [
|
|||
"ios-arm/crypto/fipsmodule/sha1-armv4-large.S",
|
||||
"ios-arm/crypto/fipsmodule/sha256-armv4.S",
|
||||
"ios-arm/crypto/fipsmodule/sha512-armv4.S",
|
||||
"ios-arm/crypto/fipsmodule/vpaes-armv7.S",
|
||||
"ios-arm/crypto/test/trampoline-armv4.S",
|
||||
]
|
||||
|
||||
crypto_sources_linux_aarch64 = [
|
||||
"linux-aarch64/crypto/chacha/chacha-armv8.S",
|
||||
"linux-aarch64/crypto/fipsmodule/aesv8-armx64.S",
|
||||
"linux-aarch64/crypto/fipsmodule/armv8-mont.S",
|
||||
"linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S",
|
||||
"linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S",
|
||||
"linux-aarch64/crypto/fipsmodule/sha1-armv8.S",
|
||||
"linux-aarch64/crypto/fipsmodule/sha256-armv8.S",
|
||||
"linux-aarch64/crypto/fipsmodule/sha512-armv8.S",
|
||||
"linux-aarch64/crypto/fipsmodule/vpaes-armv8.S",
|
||||
"linux-aarch64/crypto/test/trampoline-armv8.S",
|
||||
"linux-aarch64/crypto/third_party/sike/asm/fp-armv8.S",
|
||||
]
|
||||
|
||||
crypto_sources_linux_arm = [
|
||||
|
@ -417,6 +465,8 @@ crypto_sources_linux_arm = [
|
|||
"linux-arm/crypto/fipsmodule/sha1-armv4-large.S",
|
||||
"linux-arm/crypto/fipsmodule/sha256-armv4.S",
|
||||
"linux-arm/crypto/fipsmodule/sha512-armv4.S",
|
||||
"linux-arm/crypto/fipsmodule/vpaes-armv7.S",
|
||||
"linux-arm/crypto/test/trampoline-armv4.S",
|
||||
"src/crypto/curve25519/asm/x25519-asm-arm.S",
|
||||
"src/crypto/poly1305/poly1305_arm_asm.S",
|
||||
]
|
||||
|
@ -432,6 +482,7 @@ crypto_sources_linux_x86 = [
|
|||
"linux-x86/crypto/fipsmodule/aesni-x86.S",
|
||||
"linux-x86/crypto/fipsmodule/bn-586.S",
|
||||
"linux-x86/crypto/fipsmodule/co-586.S",
|
||||
"linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S",
|
||||
"linux-x86/crypto/fipsmodule/ghash-x86.S",
|
||||
"linux-x86/crypto/fipsmodule/md5-586.S",
|
||||
"linux-x86/crypto/fipsmodule/sha1-586.S",
|
||||
|
@ -439,6 +490,7 @@ crypto_sources_linux_x86 = [
|
|||
"linux-x86/crypto/fipsmodule/sha512-586.S",
|
||||
"linux-x86/crypto/fipsmodule/vpaes-x86.S",
|
||||
"linux-x86/crypto/fipsmodule/x86-mont.S",
|
||||
"linux-x86/crypto/test/trampoline-x86.S",
|
||||
]
|
||||
|
||||
crypto_sources_linux_x86_64 = [
|
||||
|
@ -448,10 +500,11 @@ crypto_sources_linux_x86_64 = [
|
|||
"linux-x86_64/crypto/fipsmodule/aes-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/aesni-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/ghash-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/md5-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S",
|
||||
"linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S",
|
||||
"linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/rsaz-avx2.S",
|
||||
"linux-x86_64/crypto/fipsmodule/sha1-x86_64.S",
|
||||
|
@ -460,7 +513,9 @@ crypto_sources_linux_x86_64 = [
|
|||
"linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S",
|
||||
"linux-x86_64/crypto/fipsmodule/x86_64-mont.S",
|
||||
"linux-x86_64/crypto/fipsmodule/x86_64-mont5.S",
|
||||
"src/crypto/curve25519/asm/x25519-asm-x86_64.S",
|
||||
"linux-x86_64/crypto/test/trampoline-x86_64.S",
|
||||
"linux-x86_64/crypto/third_party/sike/asm/fp-x86_64.S",
|
||||
"src/crypto/hrss/asm/poly_rq_mul.S",
|
||||
]
|
||||
|
||||
crypto_sources_mac_x86 = [
|
||||
|
@ -469,6 +524,7 @@ crypto_sources_mac_x86 = [
|
|||
"mac-x86/crypto/fipsmodule/aesni-x86.S",
|
||||
"mac-x86/crypto/fipsmodule/bn-586.S",
|
||||
"mac-x86/crypto/fipsmodule/co-586.S",
|
||||
"mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S",
|
||||
"mac-x86/crypto/fipsmodule/ghash-x86.S",
|
||||
"mac-x86/crypto/fipsmodule/md5-586.S",
|
||||
"mac-x86/crypto/fipsmodule/sha1-586.S",
|
||||
|
@ -476,6 +532,7 @@ crypto_sources_mac_x86 = [
|
|||
"mac-x86/crypto/fipsmodule/sha512-586.S",
|
||||
"mac-x86/crypto/fipsmodule/vpaes-x86.S",
|
||||
"mac-x86/crypto/fipsmodule/x86-mont.S",
|
||||
"mac-x86/crypto/test/trampoline-x86.S",
|
||||
]
|
||||
|
||||
crypto_sources_mac_x86_64 = [
|
||||
|
@ -485,10 +542,11 @@ crypto_sources_mac_x86_64 = [
|
|||
"mac-x86_64/crypto/fipsmodule/aes-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/aesni-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/bsaes-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/ghash-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/md5-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S",
|
||||
"mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S",
|
||||
"mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/rsaz-avx2.S",
|
||||
"mac-x86_64/crypto/fipsmodule/sha1-x86_64.S",
|
||||
|
@ -497,7 +555,8 @@ crypto_sources_mac_x86_64 = [
|
|||
"mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S",
|
||||
"mac-x86_64/crypto/fipsmodule/x86_64-mont.S",
|
||||
"mac-x86_64/crypto/fipsmodule/x86_64-mont5.S",
|
||||
"src/crypto/curve25519/asm/x25519-asm-x86_64.S",
|
||||
"mac-x86_64/crypto/test/trampoline-x86_64.S",
|
||||
"mac-x86_64/crypto/third_party/sike/asm/fp-x86_64.S",
|
||||
]
|
||||
|
||||
crypto_sources_win_x86 = [
|
||||
|
@ -506,6 +565,7 @@ crypto_sources_win_x86 = [
|
|||
"win-x86/crypto/fipsmodule/aesni-x86.asm",
|
||||
"win-x86/crypto/fipsmodule/bn-586.asm",
|
||||
"win-x86/crypto/fipsmodule/co-586.asm",
|
||||
"win-x86/crypto/fipsmodule/ghash-ssse3-x86.asm",
|
||||
"win-x86/crypto/fipsmodule/ghash-x86.asm",
|
||||
"win-x86/crypto/fipsmodule/md5-586.asm",
|
||||
"win-x86/crypto/fipsmodule/sha1-586.asm",
|
||||
|
@ -513,6 +573,7 @@ crypto_sources_win_x86 = [
|
|||
"win-x86/crypto/fipsmodule/sha512-586.asm",
|
||||
"win-x86/crypto/fipsmodule/vpaes-x86.asm",
|
||||
"win-x86/crypto/fipsmodule/x86-mont.asm",
|
||||
"win-x86/crypto/test/trampoline-x86.asm",
|
||||
]
|
||||
|
||||
crypto_sources_win_x86_64 = [
|
||||
|
@ -522,10 +583,11 @@ crypto_sources_win_x86_64 = [
|
|||
"win-x86_64/crypto/fipsmodule/aes-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/aesni-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/bsaes-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/ghash-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/md5-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm",
|
||||
"win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.asm",
|
||||
"win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/rsaz-avx2.asm",
|
||||
"win-x86_64/crypto/fipsmodule/sha1-x86_64.asm",
|
||||
|
@ -534,15 +596,19 @@ crypto_sources_win_x86_64 = [
|
|||
"win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm",
|
||||
"win-x86_64/crypto/fipsmodule/x86_64-mont.asm",
|
||||
"win-x86_64/crypto/fipsmodule/x86_64-mont5.asm",
|
||||
"win-x86_64/crypto/test/trampoline-x86_64.asm",
|
||||
"win-x86_64/crypto/third_party/sike/asm/fp-x86_64.asm",
|
||||
]
|
||||
|
||||
fuzzers = [
|
||||
"arm_cpuinfo",
|
||||
"bn_div",
|
||||
"bn_mod_exp",
|
||||
"cert",
|
||||
"client",
|
||||
"dtls_client",
|
||||
"dtls_server",
|
||||
"pkcs12",
|
||||
"pkcs8",
|
||||
"privkey",
|
||||
"read_pem",
|
||||
|
|
|
@ -5,21 +5,28 @@
|
|||
# This file is created by generate_build_files.py. Do not edit manually.
|
||||
|
||||
test_support_sources = [
|
||||
"src/crypto/test/abi_test.h",
|
||||
"src/crypto/test/file_test.cc",
|
||||
"src/crypto/test/file_test.h",
|
||||
"src/crypto/test/gtest_main.h",
|
||||
"src/crypto/test/malloc.cc",
|
||||
"src/crypto/test/test_util.cc",
|
||||
"src/crypto/test/test_util.h",
|
||||
"src/crypto/test/wycheproof_util.cc",
|
||||
"src/crypto/test/wycheproof_util.h",
|
||||
"src/ssl/test/async_bio.h",
|
||||
"src/ssl/test/fuzzer.h",
|
||||
"src/ssl/test/fuzzer_tags.h",
|
||||
"src/ssl/test/handshake_util.h",
|
||||
"src/ssl/test/packeted_bio.h",
|
||||
"src/ssl/test/settings_writer.h",
|
||||
"src/ssl/test/test_config.h",
|
||||
"src/ssl/test/test_state.h",
|
||||
]
|
||||
|
||||
crypto_test_sources = [
|
||||
"crypto_test_data.cc",
|
||||
"src/crypto/abi_self_test.cc",
|
||||
"src/crypto/asn1/asn1_test.cc",
|
||||
"src/crypto/base64/base64_test.cc",
|
||||
"src/crypto/bio/bio_test.cc",
|
||||
|
@ -31,13 +38,14 @@ crypto_test_sources = [
|
|||
"src/crypto/cmac/cmac_test.cc",
|
||||
"src/crypto/compiler_test.cc",
|
||||
"src/crypto/constant_time_test.cc",
|
||||
"src/crypto/cpu-arm-linux_test.cc",
|
||||
"src/crypto/curve25519/ed25519_test.cc",
|
||||
"src/crypto/curve25519/spake25519_test.cc",
|
||||
"src/crypto/curve25519/x25519_test.cc",
|
||||
"src/crypto/dh/dh_test.cc",
|
||||
"src/crypto/digest_extra/digest_test.cc",
|
||||
"src/crypto/dsa/dsa_test.cc",
|
||||
"src/crypto/ecdh/ecdh_test.cc",
|
||||
"src/crypto/ecdh_extra/ecdh_test.cc",
|
||||
"src/crypto/err/err_test.cc",
|
||||
"src/crypto/evp/evp_extra_test.cc",
|
||||
"src/crypto/evp/evp_test.cc",
|
||||
|
@ -48,29 +56,133 @@ crypto_test_sources = [
|
|||
"src/crypto/fipsmodule/ec/ec_test.cc",
|
||||
"src/crypto/fipsmodule/ec/p256-x86_64_test.cc",
|
||||
"src/crypto/fipsmodule/ecdsa/ecdsa_test.cc",
|
||||
"src/crypto/fipsmodule/md5/md5_test.cc",
|
||||
"src/crypto/fipsmodule/modes/gcm_test.cc",
|
||||
"src/crypto/fipsmodule/rand/ctrdrbg_test.cc",
|
||||
"src/crypto/fipsmodule/sha/sha_test.cc",
|
||||
"src/crypto/hkdf/hkdf_test.cc",
|
||||
"src/crypto/hmac_extra/hmac_test.cc",
|
||||
"src/crypto/hrss/hrss_test.cc",
|
||||
"src/crypto/impl_dispatch_test.cc",
|
||||
"src/crypto/lhash/lhash_test.cc",
|
||||
"src/crypto/obj/obj_test.cc",
|
||||
"src/crypto/pem/pem_test.cc",
|
||||
"src/crypto/pkcs7/pkcs7_test.cc",
|
||||
"src/crypto/pkcs8/pkcs12_test.cc",
|
||||
"src/crypto/pkcs8/pkcs8_test.cc",
|
||||
"src/crypto/poly1305/poly1305_test.cc",
|
||||
"src/crypto/pool/pool_test.cc",
|
||||
"src/crypto/rand_extra/rand_test.cc",
|
||||
"src/crypto/refcount_test.cc",
|
||||
"src/crypto/rsa_extra/rsa_test.cc",
|
||||
"src/crypto/self_test.cc",
|
||||
"src/crypto/siphash/siphash_test.cc",
|
||||
"src/crypto/stack/stack_test.cc",
|
||||
"src/crypto/test/abi_test.cc",
|
||||
"src/crypto/test/file_test_gtest.cc",
|
||||
"src/crypto/test/gtest_main.cc",
|
||||
"src/crypto/thread_test.cc",
|
||||
"src/crypto/x509/x509_test.cc",
|
||||
"src/crypto/x509/x509_time_test.cc",
|
||||
"src/crypto/x509v3/tab_test.cc",
|
||||
"src/crypto/x509v3/v3name_test.cc",
|
||||
]
|
||||
|
||||
crypto_test_data = [
|
||||
"src/crypto/cipher_extra/test/aes_128_cbc_sha1_tls_implicit_iv_tests.txt",
|
||||
"src/crypto/cipher_extra/test/aes_128_cbc_sha1_tls_tests.txt",
|
||||
"src/crypto/cipher_extra/test/aes_128_cbc_sha256_tls_tests.txt",
|
||||
"src/crypto/cipher_extra/test/aes_128_ccm_bluetooth_8_tests.txt",
|
||||
"src/crypto/cipher_extra/test/aes_128_ccm_bluetooth_tests.txt",
|
||||
"src/crypto/cipher_extra/test/aes_128_ctr_hmac_sha256.txt",
|
||||
"src/crypto/cipher_extra/test/aes_128_gcm_siv_tests.txt",
|
||||
"src/crypto/cipher_extra/test/aes_128_gcm_tests.txt",
|
||||
"src/crypto/cipher_extra/test/aes_192_gcm_tests.txt",
|
||||
"src/crypto/cipher_extra/test/aes_256_cbc_sha1_tls_implicit_iv_tests.txt",
|
||||
"src/crypto/cipher_extra/test/aes_256_cbc_sha1_tls_tests.txt",
|
||||
"src/crypto/cipher_extra/test/aes_256_cbc_sha256_tls_tests.txt",
|
||||
"src/crypto/cipher_extra/test/aes_256_cbc_sha384_tls_tests.txt",
|
||||
"src/crypto/cipher_extra/test/aes_256_ctr_hmac_sha256.txt",
|
||||
"src/crypto/cipher_extra/test/aes_256_gcm_siv_tests.txt",
|
||||
"src/crypto/cipher_extra/test/aes_256_gcm_tests.txt",
|
||||
"src/crypto/cipher_extra/test/chacha20_poly1305_tests.txt",
|
||||
"src/crypto/cipher_extra/test/cipher_tests.txt",
|
||||
"src/crypto/cipher_extra/test/des_ede3_cbc_sha1_tls_implicit_iv_tests.txt",
|
||||
"src/crypto/cipher_extra/test/des_ede3_cbc_sha1_tls_tests.txt",
|
||||
"src/crypto/cipher_extra/test/nist_cavp/aes_128_cbc.txt",
|
||||
"src/crypto/cipher_extra/test/nist_cavp/aes_128_ctr.txt",
|
||||
"src/crypto/cipher_extra/test/nist_cavp/aes_128_gcm.txt",
|
||||
"src/crypto/cipher_extra/test/nist_cavp/aes_192_cbc.txt",
|
||||
"src/crypto/cipher_extra/test/nist_cavp/aes_192_ctr.txt",
|
||||
"src/crypto/cipher_extra/test/nist_cavp/aes_256_cbc.txt",
|
||||
"src/crypto/cipher_extra/test/nist_cavp/aes_256_ctr.txt",
|
||||
"src/crypto/cipher_extra/test/nist_cavp/aes_256_gcm.txt",
|
||||
"src/crypto/cipher_extra/test/nist_cavp/tdes_cbc.txt",
|
||||
"src/crypto/cipher_extra/test/nist_cavp/tdes_ecb.txt",
|
||||
"src/crypto/cipher_extra/test/xchacha20_poly1305_tests.txt",
|
||||
"src/crypto/cmac/cavp_3des_cmac_tests.txt",
|
||||
"src/crypto/cmac/cavp_aes128_cmac_tests.txt",
|
||||
"src/crypto/cmac/cavp_aes192_cmac_tests.txt",
|
||||
"src/crypto/cmac/cavp_aes256_cmac_tests.txt",
|
||||
"src/crypto/curve25519/ed25519_tests.txt",
|
||||
"src/crypto/ecdh_extra/ecdh_tests.txt",
|
||||
"src/crypto/evp/evp_tests.txt",
|
||||
"src/crypto/evp/scrypt_tests.txt",
|
||||
"src/crypto/fipsmodule/aes/aes_tests.txt",
|
||||
"src/crypto/fipsmodule/bn/bn_tests.txt",
|
||||
"src/crypto/fipsmodule/bn/miller_rabin_tests.txt",
|
||||
"src/crypto/fipsmodule/ec/ec_scalar_base_mult_tests.txt",
|
||||
"src/crypto/fipsmodule/ec/p256-x86_64_tests.txt",
|
||||
"src/crypto/fipsmodule/ecdsa/ecdsa_sign_tests.txt",
|
||||
"src/crypto/fipsmodule/ecdsa/ecdsa_verify_tests.txt",
|
||||
"src/crypto/fipsmodule/modes/gcm_tests.txt",
|
||||
"src/crypto/fipsmodule/rand/ctrdrbg_vectors.txt",
|
||||
"src/crypto/hmac_extra/hmac_tests.txt",
|
||||
"src/crypto/poly1305/poly1305_tests.txt",
|
||||
"src/crypto/siphash/siphash_tests.txt",
|
||||
"src/crypto/x509/many_constraints.pem",
|
||||
"src/crypto/x509/many_names1.pem",
|
||||
"src/crypto/x509/many_names2.pem",
|
||||
"src/crypto/x509/many_names3.pem",
|
||||
"src/crypto/x509/some_names1.pem",
|
||||
"src/crypto/x509/some_names2.pem",
|
||||
"src/crypto/x509/some_names3.pem",
|
||||
"src/third_party/wycheproof_testvectors/aes_cbc_pkcs5_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/aes_cmac_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/aes_gcm_siv_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/aes_gcm_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/chacha20_poly1305_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/dsa_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/ecdh_secp224r1_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/ecdh_secp256r1_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/ecdh_secp384r1_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/ecdh_secp521r1_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/ecdsa_secp224r1_sha224_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/ecdsa_secp224r1_sha256_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/ecdsa_secp224r1_sha512_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/ecdsa_secp256r1_sha256_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/ecdsa_secp256r1_sha512_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/ecdsa_secp384r1_sha384_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/ecdsa_secp384r1_sha512_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/ecdsa_secp521r1_sha512_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/eddsa_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/kw_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/kwp_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/rsa_pss_2048_sha1_mgf1_20_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/rsa_pss_2048_sha256_mgf1_0_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/rsa_pss_2048_sha256_mgf1_32_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/rsa_pss_3072_sha256_mgf1_32_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/rsa_pss_4096_sha256_mgf1_32_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/rsa_pss_4096_sha512_mgf1_32_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/rsa_pss_misc_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/rsa_signature_test.txt",
|
||||
"src/third_party/wycheproof_testvectors/x25519_test.txt",
|
||||
]
|
||||
|
||||
ssl_test_sources = [
|
||||
"src/crypto/test/abi_test.cc",
|
||||
"src/crypto/test/gtest_main.cc",
|
||||
"src/ssl/span_test.cc",
|
||||
"src/ssl/ssl_c_test.c",
|
||||
"src/ssl/ssl_test.cc",
|
||||
]
|
||||
|
|
|
@ -175,15 +175,10 @@
|
|||
'sources': [ '<@(boringssl_linux_x86_64_sources)' ],
|
||||
}],
|
||||
['OS == "win"', {
|
||||
'sources': [ '<@(boringssl_win_x86_64_sources)' ],
|
||||
# Windows' assembly is built with Yasm. The other platforms use
|
||||
# the platform assembler.
|
||||
'variables': {
|
||||
'yasm_output_path': '<(SHARED_INTERMEDIATE_DIR)/third_party/boringssl',
|
||||
# NOTES(kqyang): Somehow ASM fails to compile. Disable ASM.
|
||||
'direct_dependent_settings': {
|
||||
'defines': [ 'OPENSSL_NO_ASM' ],
|
||||
},
|
||||
'includes': [
|
||||
'../yasm/yasm_compile.gypi',
|
||||
],
|
||||
}],
|
||||
['OS != "mac" and OS != "linux" and OS != "win" and OS != "android"', {
|
||||
'direct_dependent_settings': {
|
||||
|
|
|
@ -8,17 +8,18 @@
|
|||
'variables': {
|
||||
'boringssl_ssl_sources': [
|
||||
'src/include/openssl/dtls1.h',
|
||||
'src/include/openssl/srtp.h',
|
||||
'src/include/openssl/ssl.h',
|
||||
'src/include/openssl/ssl3.h',
|
||||
'src/include/openssl/tls1.h',
|
||||
'src/ssl/bio_ssl.cc',
|
||||
'src/ssl/custom_extensions.cc',
|
||||
'src/ssl/d1_both.cc',
|
||||
'src/ssl/d1_lib.cc',
|
||||
'src/ssl/d1_pkt.cc',
|
||||
'src/ssl/d1_srtp.cc',
|
||||
'src/ssl/dtls_method.cc',
|
||||
'src/ssl/dtls_record.cc',
|
||||
'src/ssl/handoff.cc',
|
||||
'src/ssl/handshake.cc',
|
||||
'src/ssl/handshake_client.cc',
|
||||
'src/ssl/handshake_server.cc',
|
||||
|
@ -102,16 +103,18 @@
|
|||
'src/crypto/bytestring/cbb.c',
|
||||
'src/crypto/bytestring/cbs.c',
|
||||
'src/crypto/bytestring/internal.h',
|
||||
'src/crypto/bytestring/unicode.c',
|
||||
'src/crypto/chacha/chacha.c',
|
||||
'src/crypto/chacha/internal.h',
|
||||
'src/crypto/cipher_extra/cipher_extra.c',
|
||||
'src/crypto/cipher_extra/derive_key.c',
|
||||
'src/crypto/cipher_extra/e_aesccm.c',
|
||||
'src/crypto/cipher_extra/e_aesctrhmac.c',
|
||||
'src/crypto/cipher_extra/e_aesgcmsiv.c',
|
||||
'src/crypto/cipher_extra/e_chacha20poly1305.c',
|
||||
'src/crypto/cipher_extra/e_null.c',
|
||||
'src/crypto/cipher_extra/e_rc2.c',
|
||||
'src/crypto/cipher_extra/e_rc4.c',
|
||||
'src/crypto/cipher_extra/e_ssl3.c',
|
||||
'src/crypto/cipher_extra/e_tls.c',
|
||||
'src/crypto/cipher_extra/internal.h',
|
||||
'src/crypto/cipher_extra/tls_cbc.c',
|
||||
|
@ -119,14 +122,15 @@
|
|||
'src/crypto/conf/conf.c',
|
||||
'src/crypto/conf/conf_def.h',
|
||||
'src/crypto/conf/internal.h',
|
||||
'src/crypto/cpu-aarch64-fuchsia.c',
|
||||
'src/crypto/cpu-aarch64-linux.c',
|
||||
'src/crypto/cpu-arm-linux.c',
|
||||
'src/crypto/cpu-arm-linux.h',
|
||||
'src/crypto/cpu-arm.c',
|
||||
'src/crypto/cpu-intel.c',
|
||||
'src/crypto/cpu-ppc64le.c',
|
||||
'src/crypto/crypto.c',
|
||||
'src/crypto/curve25519/spake25519.c',
|
||||
'src/crypto/curve25519/x25519-x86_64.c',
|
||||
'src/crypto/dh/check.c',
|
||||
'src/crypto/dh/dh.c',
|
||||
'src/crypto/dh/dh_asn1.c',
|
||||
|
@ -135,7 +139,8 @@
|
|||
'src/crypto/dsa/dsa.c',
|
||||
'src/crypto/dsa/dsa_asn1.c',
|
||||
'src/crypto/ec_extra/ec_asn1.c',
|
||||
'src/crypto/ecdh/ecdh.c',
|
||||
'src/crypto/ec_extra/ec_derive.c',
|
||||
'src/crypto/ecdh_extra/ecdh_extra.c',
|
||||
'src/crypto/ecdsa_extra/ecdsa_asn1.c',
|
||||
'src/crypto/engine/engine.c',
|
||||
'src/crypto/err/err.c',
|
||||
|
@ -152,6 +157,8 @@
|
|||
'src/crypto/evp/p_ed25519_asn1.c',
|
||||
'src/crypto/evp/p_rsa.c',
|
||||
'src/crypto/evp/p_rsa_asn1.c',
|
||||
'src/crypto/evp/p_x25519.c',
|
||||
'src/crypto/evp/p_x25519_asn1.c',
|
||||
'src/crypto/evp/pbkdf.c',
|
||||
'src/crypto/evp/print.c',
|
||||
'src/crypto/evp/scrypt.c',
|
||||
|
@ -169,11 +176,17 @@
|
|||
'src/crypto/fipsmodule/ec/internal.h',
|
||||
'src/crypto/fipsmodule/ec/p256-x86_64-table.h',
|
||||
'src/crypto/fipsmodule/ec/p256-x86_64.h',
|
||||
'src/crypto/fipsmodule/fips_shared_support.c',
|
||||
'src/crypto/fipsmodule/is_fips.c',
|
||||
'src/crypto/fipsmodule/md5/internal.h',
|
||||
'src/crypto/fipsmodule/modes/internal.h',
|
||||
'src/crypto/fipsmodule/rand/internal.h',
|
||||
'src/crypto/fipsmodule/rsa/internal.h',
|
||||
'src/crypto/fipsmodule/sha/internal.h',
|
||||
'src/crypto/fipsmodule/tls/internal.h',
|
||||
'src/crypto/hkdf/hkdf.c',
|
||||
'src/crypto/hrss/hrss.c',
|
||||
'src/crypto/hrss/internal.h',
|
||||
'src/crypto/internal.h',
|
||||
'src/crypto/lhash/lhash.c',
|
||||
'src/crypto/mem.c',
|
||||
|
@ -210,6 +223,8 @@
|
|||
'src/crypto/refcount_c11.c',
|
||||
'src/crypto/refcount_lock.c',
|
||||
'src/crypto/rsa_extra/rsa_asn1.c',
|
||||
'src/crypto/rsa_extra/rsa_print.c',
|
||||
'src/crypto/siphash/siphash.c',
|
||||
'src/crypto/stack/stack.c',
|
||||
'src/crypto/thread.c',
|
||||
'src/crypto/thread_none.c',
|
||||
|
@ -268,6 +283,7 @@
|
|||
'src/crypto/x509/x_x509.c',
|
||||
'src/crypto/x509/x_x509a.c',
|
||||
'src/crypto/x509v3/ext_dat.h',
|
||||
'src/crypto/x509v3/internal.h',
|
||||
'src/crypto/x509v3/pcy_cache.c',
|
||||
'src/crypto/x509v3/pcy_data.c',
|
||||
'src/crypto/x509v3/pcy_int.h',
|
||||
|
@ -291,6 +307,7 @@
|
|||
'src/crypto/x509v3/v3_int.c',
|
||||
'src/crypto/x509v3/v3_lib.c',
|
||||
'src/crypto/x509v3/v3_ncons.c',
|
||||
'src/crypto/x509v3/v3_ocsp.c',
|
||||
'src/crypto/x509v3/v3_pci.c',
|
||||
'src/crypto/x509v3/v3_pcia.c',
|
||||
'src/crypto/x509v3/v3_pcons.c',
|
||||
|
@ -327,6 +344,7 @@
|
|||
'src/include/openssl/dh.h',
|
||||
'src/include/openssl/digest.h',
|
||||
'src/include/openssl/dsa.h',
|
||||
'src/include/openssl/e_os2.h',
|
||||
'src/include/openssl/ec.h',
|
||||
'src/include/openssl/ec_key.h',
|
||||
'src/include/openssl/ecdh.h',
|
||||
|
@ -337,9 +355,9 @@
|
|||
'src/include/openssl/ex_data.h',
|
||||
'src/include/openssl/hkdf.h',
|
||||
'src/include/openssl/hmac.h',
|
||||
'src/include/openssl/hrss.h',
|
||||
'src/include/openssl/is_boringssl.h',
|
||||
'src/include/openssl/lhash.h',
|
||||
'src/include/openssl/lhash_macros.h',
|
||||
'src/include/openssl/md4.h',
|
||||
'src/include/openssl/md5.h',
|
||||
'src/include/openssl/mem.h',
|
||||
|
@ -362,8 +380,8 @@
|
|||
'src/include/openssl/rsa.h',
|
||||
'src/include/openssl/safestack.h',
|
||||
'src/include/openssl/sha.h',
|
||||
'src/include/openssl/siphash.h',
|
||||
'src/include/openssl/span.h',
|
||||
'src/include/openssl/srtp.h',
|
||||
'src/include/openssl/stack.h',
|
||||
'src/include/openssl/thread.h',
|
||||
'src/include/openssl/type_check.h',
|
||||
|
@ -371,16 +389,34 @@
|
|||
'src/include/openssl/x509_vfy.h',
|
||||
'src/include/openssl/x509v3.h',
|
||||
'src/third_party/fiat/curve25519.c',
|
||||
'src/third_party/fiat/curve25519_32.h',
|
||||
'src/third_party/fiat/curve25519_64.h',
|
||||
'src/third_party/fiat/curve25519_tables.h',
|
||||
'src/third_party/fiat/internal.h',
|
||||
'src/third_party/fiat/p256_32.h',
|
||||
'src/third_party/fiat/p256_64.h',
|
||||
'src/third_party/sike/asm/fp_generic.c',
|
||||
'src/third_party/sike/curve_params.c',
|
||||
'src/third_party/sike/fpx.c',
|
||||
'src/third_party/sike/fpx.h',
|
||||
'src/third_party/sike/isogeny.c',
|
||||
'src/third_party/sike/isogeny.h',
|
||||
'src/third_party/sike/sike.c',
|
||||
'src/third_party/sike/sike.h',
|
||||
'src/third_party/sike/utils.h',
|
||||
],
|
||||
'boringssl_ios_aarch64_sources': [
|
||||
'ios-aarch64/crypto/chacha/chacha-armv8.S',
|
||||
'ios-aarch64/crypto/fipsmodule/aesv8-armx64.S',
|
||||
'ios-aarch64/crypto/fipsmodule/armv8-mont.S',
|
||||
'ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S',
|
||||
'ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S',
|
||||
'ios-aarch64/crypto/fipsmodule/sha1-armv8.S',
|
||||
'ios-aarch64/crypto/fipsmodule/sha256-armv8.S',
|
||||
'ios-aarch64/crypto/fipsmodule/sha512-armv8.S',
|
||||
'ios-aarch64/crypto/fipsmodule/vpaes-armv8.S',
|
||||
'ios-aarch64/crypto/test/trampoline-armv8.S',
|
||||
'ios-aarch64/crypto/third_party/sike/asm/fp-armv8.S',
|
||||
],
|
||||
'boringssl_ios_arm_sources': [
|
||||
'ios-arm/crypto/chacha/chacha-armv4.S',
|
||||
|
@ -393,15 +429,21 @@
|
|||
'ios-arm/crypto/fipsmodule/sha1-armv4-large.S',
|
||||
'ios-arm/crypto/fipsmodule/sha256-armv4.S',
|
||||
'ios-arm/crypto/fipsmodule/sha512-armv4.S',
|
||||
'ios-arm/crypto/fipsmodule/vpaes-armv7.S',
|
||||
'ios-arm/crypto/test/trampoline-armv4.S',
|
||||
],
|
||||
'boringssl_linux_aarch64_sources': [
|
||||
'linux-aarch64/crypto/chacha/chacha-armv8.S',
|
||||
'linux-aarch64/crypto/fipsmodule/aesv8-armx64.S',
|
||||
'linux-aarch64/crypto/fipsmodule/armv8-mont.S',
|
||||
'linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S',
|
||||
'linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S',
|
||||
'linux-aarch64/crypto/fipsmodule/sha1-armv8.S',
|
||||
'linux-aarch64/crypto/fipsmodule/sha256-armv8.S',
|
||||
'linux-aarch64/crypto/fipsmodule/sha512-armv8.S',
|
||||
'linux-aarch64/crypto/fipsmodule/vpaes-armv8.S',
|
||||
'linux-aarch64/crypto/test/trampoline-armv8.S',
|
||||
'linux-aarch64/crypto/third_party/sike/asm/fp-armv8.S',
|
||||
],
|
||||
'boringssl_linux_arm_sources': [
|
||||
'linux-arm/crypto/chacha/chacha-armv4.S',
|
||||
|
@ -414,6 +456,8 @@
|
|||
'linux-arm/crypto/fipsmodule/sha1-armv4-large.S',
|
||||
'linux-arm/crypto/fipsmodule/sha256-armv4.S',
|
||||
'linux-arm/crypto/fipsmodule/sha512-armv4.S',
|
||||
'linux-arm/crypto/fipsmodule/vpaes-armv7.S',
|
||||
'linux-arm/crypto/test/trampoline-armv4.S',
|
||||
'src/crypto/curve25519/asm/x25519-asm-arm.S',
|
||||
'src/crypto/poly1305/poly1305_arm_asm.S',
|
||||
],
|
||||
|
@ -427,6 +471,7 @@
|
|||
'linux-x86/crypto/fipsmodule/aesni-x86.S',
|
||||
'linux-x86/crypto/fipsmodule/bn-586.S',
|
||||
'linux-x86/crypto/fipsmodule/co-586.S',
|
||||
'linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S',
|
||||
'linux-x86/crypto/fipsmodule/ghash-x86.S',
|
||||
'linux-x86/crypto/fipsmodule/md5-586.S',
|
||||
'linux-x86/crypto/fipsmodule/sha1-586.S',
|
||||
|
@ -434,6 +479,7 @@
|
|||
'linux-x86/crypto/fipsmodule/sha512-586.S',
|
||||
'linux-x86/crypto/fipsmodule/vpaes-x86.S',
|
||||
'linux-x86/crypto/fipsmodule/x86-mont.S',
|
||||
'linux-x86/crypto/test/trampoline-x86.S',
|
||||
],
|
||||
'boringssl_linux_x86_64_sources': [
|
||||
'linux-x86_64/crypto/chacha/chacha-x86_64.S',
|
||||
|
@ -442,10 +488,11 @@
|
|||
'linux-x86_64/crypto/fipsmodule/aes-x86_64.S',
|
||||
'linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S',
|
||||
'linux-x86_64/crypto/fipsmodule/aesni-x86_64.S',
|
||||
'linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S',
|
||||
'linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S',
|
||||
'linux-x86_64/crypto/fipsmodule/ghash-x86_64.S',
|
||||
'linux-x86_64/crypto/fipsmodule/md5-x86_64.S',
|
||||
'linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S',
|
||||
'linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S',
|
||||
'linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S',
|
||||
'linux-x86_64/crypto/fipsmodule/rsaz-avx2.S',
|
||||
'linux-x86_64/crypto/fipsmodule/sha1-x86_64.S',
|
||||
|
@ -454,7 +501,9 @@
|
|||
'linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S',
|
||||
'linux-x86_64/crypto/fipsmodule/x86_64-mont.S',
|
||||
'linux-x86_64/crypto/fipsmodule/x86_64-mont5.S',
|
||||
'src/crypto/curve25519/asm/x25519-asm-x86_64.S',
|
||||
'linux-x86_64/crypto/test/trampoline-x86_64.S',
|
||||
'linux-x86_64/crypto/third_party/sike/asm/fp-x86_64.S',
|
||||
'src/crypto/hrss/asm/poly_rq_mul.S',
|
||||
],
|
||||
'boringssl_mac_x86_sources': [
|
||||
'mac-x86/crypto/chacha/chacha-x86.S',
|
||||
|
@ -462,6 +511,7 @@
|
|||
'mac-x86/crypto/fipsmodule/aesni-x86.S',
|
||||
'mac-x86/crypto/fipsmodule/bn-586.S',
|
||||
'mac-x86/crypto/fipsmodule/co-586.S',
|
||||
'mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S',
|
||||
'mac-x86/crypto/fipsmodule/ghash-x86.S',
|
||||
'mac-x86/crypto/fipsmodule/md5-586.S',
|
||||
'mac-x86/crypto/fipsmodule/sha1-586.S',
|
||||
|
@ -469,6 +519,7 @@
|
|||
'mac-x86/crypto/fipsmodule/sha512-586.S',
|
||||
'mac-x86/crypto/fipsmodule/vpaes-x86.S',
|
||||
'mac-x86/crypto/fipsmodule/x86-mont.S',
|
||||
'mac-x86/crypto/test/trampoline-x86.S',
|
||||
],
|
||||
'boringssl_mac_x86_64_sources': [
|
||||
'mac-x86_64/crypto/chacha/chacha-x86_64.S',
|
||||
|
@ -477,10 +528,11 @@
|
|||
'mac-x86_64/crypto/fipsmodule/aes-x86_64.S',
|
||||
'mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S',
|
||||
'mac-x86_64/crypto/fipsmodule/aesni-x86_64.S',
|
||||
'mac-x86_64/crypto/fipsmodule/bsaes-x86_64.S',
|
||||
'mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S',
|
||||
'mac-x86_64/crypto/fipsmodule/ghash-x86_64.S',
|
||||
'mac-x86_64/crypto/fipsmodule/md5-x86_64.S',
|
||||
'mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S',
|
||||
'mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S',
|
||||
'mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S',
|
||||
'mac-x86_64/crypto/fipsmodule/rsaz-avx2.S',
|
||||
'mac-x86_64/crypto/fipsmodule/sha1-x86_64.S',
|
||||
|
@ -489,7 +541,8 @@
|
|||
'mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S',
|
||||
'mac-x86_64/crypto/fipsmodule/x86_64-mont.S',
|
||||
'mac-x86_64/crypto/fipsmodule/x86_64-mont5.S',
|
||||
'src/crypto/curve25519/asm/x25519-asm-x86_64.S',
|
||||
'mac-x86_64/crypto/test/trampoline-x86_64.S',
|
||||
'mac-x86_64/crypto/third_party/sike/asm/fp-x86_64.S',
|
||||
],
|
||||
'boringssl_win_x86_sources': [
|
||||
'win-x86/crypto/chacha/chacha-x86.asm',
|
||||
|
@ -497,6 +550,7 @@
|
|||
'win-x86/crypto/fipsmodule/aesni-x86.asm',
|
||||
'win-x86/crypto/fipsmodule/bn-586.asm',
|
||||
'win-x86/crypto/fipsmodule/co-586.asm',
|
||||
'win-x86/crypto/fipsmodule/ghash-ssse3-x86.asm',
|
||||
'win-x86/crypto/fipsmodule/ghash-x86.asm',
|
||||
'win-x86/crypto/fipsmodule/md5-586.asm',
|
||||
'win-x86/crypto/fipsmodule/sha1-586.asm',
|
||||
|
@ -504,6 +558,7 @@
|
|||
'win-x86/crypto/fipsmodule/sha512-586.asm',
|
||||
'win-x86/crypto/fipsmodule/vpaes-x86.asm',
|
||||
'win-x86/crypto/fipsmodule/x86-mont.asm',
|
||||
'win-x86/crypto/test/trampoline-x86.asm',
|
||||
],
|
||||
'boringssl_win_x86_64_sources': [
|
||||
'win-x86_64/crypto/chacha/chacha-x86_64.asm',
|
||||
|
@ -512,10 +567,11 @@
|
|||
'win-x86_64/crypto/fipsmodule/aes-x86_64.asm',
|
||||
'win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm',
|
||||
'win-x86_64/crypto/fipsmodule/aesni-x86_64.asm',
|
||||
'win-x86_64/crypto/fipsmodule/bsaes-x86_64.asm',
|
||||
'win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm',
|
||||
'win-x86_64/crypto/fipsmodule/ghash-x86_64.asm',
|
||||
'win-x86_64/crypto/fipsmodule/md5-x86_64.asm',
|
||||
'win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm',
|
||||
'win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.asm',
|
||||
'win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm',
|
||||
'win-x86_64/crypto/fipsmodule/rsaz-avx2.asm',
|
||||
'win-x86_64/crypto/fipsmodule/sha1-x86_64.asm',
|
||||
|
@ -524,6 +580,8 @@
|
|||
'win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm',
|
||||
'win-x86_64/crypto/fipsmodule/x86_64-mont.asm',
|
||||
'win-x86_64/crypto/fipsmodule/x86_64-mont5.asm',
|
||||
'win-x86_64/crypto/test/trampoline-x86_64.asm',
|
||||
'win-x86_64/crypto/third_party/sike/asm/fp-x86_64.asm',
|
||||
],
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
772
packager/third_party/boringssl/ios-aarch64/crypto/fipsmodule/aesv8-armx64.S
vendored
Normal file
772
packager/third_party/boringssl/ios-aarch64/crypto/fipsmodule/aesv8-armx64.S
vendored
Normal file
|
@ -0,0 +1,772 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
|
||||
.section __TEXT,__const
|
||||
.align 5
|
||||
Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
|
||||
.text
|
||||
|
||||
.globl _aes_hw_set_encrypt_key
|
||||
.private_extern _aes_hw_set_encrypt_key
|
||||
|
||||
.align 5
|
||||
_aes_hw_set_encrypt_key:
|
||||
Lenc_key:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
mov x3,#-1
|
||||
cmp x0,#0
|
||||
b.eq Lenc_key_abort
|
||||
cmp x2,#0
|
||||
b.eq Lenc_key_abort
|
||||
mov x3,#-2
|
||||
cmp w1,#128
|
||||
b.lt Lenc_key_abort
|
||||
cmp w1,#256
|
||||
b.gt Lenc_key_abort
|
||||
tst w1,#0x3f
|
||||
b.ne Lenc_key_abort
|
||||
|
||||
adrp x3,Lrcon@PAGE
|
||||
add x3,x3,Lrcon@PAGEOFF
|
||||
cmp w1,#192
|
||||
|
||||
eor v0.16b,v0.16b,v0.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
mov w1,#8 // reuse w1
|
||||
ld1 {v1.4s,v2.4s},[x3],#32
|
||||
|
||||
b.lt Loop128
|
||||
b.eq L192
|
||||
b L256
|
||||
|
||||
.align 4
|
||||
Loop128:
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
b.ne Loop128
|
||||
|
||||
ld1 {v1.4s},[x3]
|
||||
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
st1 {v3.4s},[x2]
|
||||
add x2,x2,#0x50
|
||||
|
||||
mov w12,#10
|
||||
b Ldone
|
||||
|
||||
.align 4
|
||||
L192:
|
||||
ld1 {v4.8b},[x0],#8
|
||||
movi v6.16b,#8 // borrow v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
sub v2.16b,v2.16b,v6.16b // adjust the mask
|
||||
|
||||
Loop192:
|
||||
tbl v6.16b,{v4.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v4.8b},[x2],#8
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
|
||||
dup v5.4s,v3.s[3]
|
||||
eor v5.16b,v5.16b,v4.16b
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
ext v4.16b,v0.16b,v4.16b,#12
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
eor v4.16b,v4.16b,v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
b.ne Loop192
|
||||
|
||||
mov w12,#12
|
||||
add x2,x2,#0x20
|
||||
b Ldone
|
||||
|
||||
.align 4
|
||||
L256:
|
||||
ld1 {v4.16b},[x0]
|
||||
mov w1,#7
|
||||
mov w12,#14
|
||||
st1 {v3.4s},[x2],#16
|
||||
|
||||
Loop256:
|
||||
tbl v6.16b,{v4.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v4.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
b.eq Ldone
|
||||
|
||||
dup v6.4s,v3.s[3] // just splat
|
||||
ext v5.16b,v0.16b,v4.16b,#12
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
|
||||
eor v4.16b,v4.16b,v6.16b
|
||||
b Loop256
|
||||
|
||||
Ldone:
|
||||
str w12,[x2]
|
||||
mov x3,#0
|
||||
|
||||
Lenc_key_abort:
|
||||
mov x0,x3 // return value
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes_hw_set_decrypt_key
|
||||
.private_extern _aes_hw_set_decrypt_key
|
||||
|
||||
.align 5
|
||||
_aes_hw_set_decrypt_key:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
bl Lenc_key
|
||||
|
||||
cmp x0,#0
|
||||
b.ne Ldec_key_abort
|
||||
|
||||
sub x2,x2,#240 // restore original x2
|
||||
mov x4,#-16
|
||||
add x0,x2,x12,lsl#4 // end of key schedule
|
||||
|
||||
ld1 {v0.4s},[x2]
|
||||
ld1 {v1.4s},[x0]
|
||||
st1 {v0.4s},[x0],x4
|
||||
st1 {v1.4s},[x2],#16
|
||||
|
||||
Loop_imc:
|
||||
ld1 {v0.4s},[x2]
|
||||
ld1 {v1.4s},[x0]
|
||||
aesimc v0.16b,v0.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
st1 {v0.4s},[x0],x4
|
||||
st1 {v1.4s},[x2],#16
|
||||
cmp x0,x2
|
||||
b.hi Loop_imc
|
||||
|
||||
ld1 {v0.4s},[x2]
|
||||
aesimc v0.16b,v0.16b
|
||||
st1 {v0.4s},[x0]
|
||||
|
||||
eor x0,x0,x0 // return value
|
||||
Ldec_key_abort:
|
||||
ldp x29,x30,[sp],#16
|
||||
ret
|
||||
|
||||
.globl _aes_hw_encrypt
|
||||
.private_extern _aes_hw_encrypt
|
||||
|
||||
.align 5
|
||||
_aes_hw_encrypt:
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
sub w3,w3,#2
|
||||
ld1 {v1.4s},[x2],#16
|
||||
|
||||
Loop_enc:
|
||||
aese v2.16b,v0.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2],#16
|
||||
subs w3,w3,#2
|
||||
aese v2.16b,v1.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v1.4s},[x2],#16
|
||||
b.gt Loop_enc
|
||||
|
||||
aese v2.16b,v0.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2]
|
||||
aese v2.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
|
||||
st1 {v2.16b},[x1]
|
||||
ret
|
||||
|
||||
.globl _aes_hw_decrypt
|
||||
.private_extern _aes_hw_decrypt
|
||||
|
||||
.align 5
|
||||
_aes_hw_decrypt:
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
sub w3,w3,#2
|
||||
ld1 {v1.4s},[x2],#16
|
||||
|
||||
Loop_dec:
|
||||
aesd v2.16b,v0.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2],#16
|
||||
subs w3,w3,#2
|
||||
aesd v2.16b,v1.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v1.4s},[x2],#16
|
||||
b.gt Loop_dec
|
||||
|
||||
aesd v2.16b,v0.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2]
|
||||
aesd v2.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
|
||||
st1 {v2.16b},[x1]
|
||||
ret
|
||||
|
||||
.globl _aes_hw_cbc_encrypt
|
||||
.private_extern _aes_hw_cbc_encrypt
|
||||
|
||||
.align 5
|
||||
_aes_hw_cbc_encrypt:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
subs x2,x2,#16
|
||||
mov x8,#16
|
||||
b.lo Lcbc_abort
|
||||
csel x8,xzr,x8,eq
|
||||
|
||||
cmp w5,#0 // en- or decrypting?
|
||||
ldr w5,[x3,#240]
|
||||
and x2,x2,#-16
|
||||
ld1 {v6.16b},[x4]
|
||||
ld1 {v0.16b},[x0],x8
|
||||
|
||||
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
|
||||
sub w5,w5,#6
|
||||
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
|
||||
sub w5,w5,#2
|
||||
ld1 {v18.4s,v19.4s},[x7],#32
|
||||
ld1 {v20.4s,v21.4s},[x7],#32
|
||||
ld1 {v22.4s,v23.4s},[x7],#32
|
||||
ld1 {v7.4s},[x7]
|
||||
|
||||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
b.eq Lcbc_dec
|
||||
|
||||
cmp w5,#2
|
||||
eor v0.16b,v0.16b,v6.16b
|
||||
eor v5.16b,v16.16b,v7.16b
|
||||
b.eq Lcbc_enc128
|
||||
|
||||
ld1 {v2.4s,v3.4s},[x7]
|
||||
add x7,x3,#16
|
||||
add x6,x3,#16*4
|
||||
add x12,x3,#16*5
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
add x14,x3,#16*6
|
||||
add x3,x3,#16*7
|
||||
b Lenter_cbc_enc
|
||||
|
||||
.align 4
|
||||
Loop_cbc_enc:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
st1 {v6.16b},[x1],#16
|
||||
Lenter_cbc_enc:
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v2.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.4s},[x6]
|
||||
cmp w5,#4
|
||||
aese v0.16b,v3.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x12]
|
||||
b.eq Lcbc_enc192
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.4s},[x14]
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x3]
|
||||
nop
|
||||
|
||||
Lcbc_enc192:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
subs x2,x2,#16
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
csel x8,xzr,x8,eq
|
||||
aese v0.16b,v18.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v19.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.16b},[x0],x8
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
eor v16.16b,v16.16b,v5.16b
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v23.16b
|
||||
eor v6.16b,v0.16b,v7.16b
|
||||
b.hs Loop_cbc_enc
|
||||
|
||||
st1 {v6.16b},[x1],#16
|
||||
b Lcbc_done
|
||||
|
||||
.align 5
|
||||
Lcbc_enc128:
|
||||
ld1 {v2.4s,v3.4s},[x7]
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
b Lenter_cbc_enc128
|
||||
Loop_cbc_enc128:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
st1 {v6.16b},[x1],#16
|
||||
Lenter_cbc_enc128:
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
subs x2,x2,#16
|
||||
aese v0.16b,v2.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
csel x8,xzr,x8,eq
|
||||
aese v0.16b,v3.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v18.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v19.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.16b},[x0],x8
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
eor v16.16b,v16.16b,v5.16b
|
||||
aese v0.16b,v23.16b
|
||||
eor v6.16b,v0.16b,v7.16b
|
||||
b.hs Loop_cbc_enc128
|
||||
|
||||
st1 {v6.16b},[x1],#16
|
||||
b Lcbc_done
|
||||
.align 5
|
||||
Lcbc_dec:
|
||||
ld1 {v18.16b},[x0],#16
|
||||
subs x2,x2,#32 // bias
|
||||
add w6,w5,#2
|
||||
orr v3.16b,v0.16b,v0.16b
|
||||
orr v1.16b,v0.16b,v0.16b
|
||||
orr v19.16b,v18.16b,v18.16b
|
||||
b.lo Lcbc_dec_tail
|
||||
|
||||
orr v1.16b,v18.16b,v18.16b
|
||||
ld1 {v18.16b},[x0],#16
|
||||
orr v2.16b,v0.16b,v0.16b
|
||||
orr v3.16b,v1.16b,v1.16b
|
||||
orr v19.16b,v18.16b,v18.16b
|
||||
|
||||
Loop3x_cbc_dec:
|
||||
aesd v0.16b,v16.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aesd v0.16b,v17.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt Loop3x_cbc_dec
|
||||
|
||||
aesd v0.16b,v16.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v4.16b,v6.16b,v7.16b
|
||||
subs x2,x2,#0x30
|
||||
eor v5.16b,v2.16b,v7.16b
|
||||
csel x6,x2,x6,lo // x6, w6, is zero at this point
|
||||
aesd v0.16b,v17.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v17.16b,v3.16b,v7.16b
|
||||
add x0,x0,x6 // x0 is adjusted in such way that
|
||||
// at exit from the loop v1.16b-v18.16b
|
||||
// are loaded with last "words"
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
mov x7,x3
|
||||
aesd v0.16b,v20.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v20.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v20.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
aesd v0.16b,v21.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v21.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v21.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
aesd v0.16b,v22.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v22.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v22.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v19.16b},[x0],#16
|
||||
aesd v0.16b,v23.16b
|
||||
aesd v1.16b,v23.16b
|
||||
aesd v18.16b,v23.16b
|
||||
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
|
||||
add w6,w5,#2
|
||||
eor v4.16b,v4.16b,v0.16b
|
||||
eor v5.16b,v5.16b,v1.16b
|
||||
eor v18.16b,v18.16b,v17.16b
|
||||
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
|
||||
st1 {v4.16b},[x1],#16
|
||||
orr v0.16b,v2.16b,v2.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
orr v1.16b,v3.16b,v3.16b
|
||||
st1 {v18.16b},[x1],#16
|
||||
orr v18.16b,v19.16b,v19.16b
|
||||
b.hs Loop3x_cbc_dec
|
||||
|
||||
cmn x2,#0x30
|
||||
b.eq Lcbc_done
|
||||
nop
|
||||
|
||||
Lcbc_dec_tail:
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt Lcbc_dec_tail
|
||||
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
aesd v1.16b,v20.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v20.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
cmn x2,#0x20
|
||||
aesd v1.16b,v21.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v21.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v5.16b,v6.16b,v7.16b
|
||||
aesd v1.16b,v22.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v22.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v17.16b,v3.16b,v7.16b
|
||||
aesd v1.16b,v23.16b
|
||||
aesd v18.16b,v23.16b
|
||||
b.eq Lcbc_dec_one
|
||||
eor v5.16b,v5.16b,v1.16b
|
||||
eor v17.16b,v17.16b,v18.16b
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
st1 {v17.16b},[x1],#16
|
||||
b Lcbc_done
|
||||
|
||||
Lcbc_dec_one:
|
||||
eor v5.16b,v5.16b,v18.16b
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
|
||||
Lcbc_done:
|
||||
st1 {v6.16b},[x4]
|
||||
Lcbc_abort:
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
|
||||
.globl _aes_hw_ctr32_encrypt_blocks
|
||||
.private_extern _aes_hw_ctr32_encrypt_blocks
|
||||
|
||||
.align 5
|
||||
_aes_hw_ctr32_encrypt_blocks:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
ldr w5,[x3,#240]
|
||||
|
||||
ldr w8, [x4, #12]
|
||||
ld1 {v0.4s},[x4]
|
||||
|
||||
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
|
||||
sub w5,w5,#4
|
||||
mov x12,#16
|
||||
cmp x2,#2
|
||||
add x7,x3,x5,lsl#4 // pointer to last 5 round keys
|
||||
sub w5,w5,#2
|
||||
ld1 {v20.4s,v21.4s},[x7],#32
|
||||
ld1 {v22.4s,v23.4s},[x7],#32
|
||||
ld1 {v7.4s},[x7]
|
||||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
csel x12,xzr,x12,lo
|
||||
#ifndef __ARMEB__
|
||||
rev w8, w8
|
||||
#endif
|
||||
orr v1.16b,v0.16b,v0.16b
|
||||
add w10, w8, #1
|
||||
orr v18.16b,v0.16b,v0.16b
|
||||
add w8, w8, #2
|
||||
orr v6.16b,v0.16b,v0.16b
|
||||
rev w10, w10
|
||||
mov v1.s[3],w10
|
||||
b.ls Lctr32_tail
|
||||
rev w12, w8
|
||||
sub x2,x2,#3 // bias
|
||||
mov v18.s[3],w12
|
||||
b Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
Loop3x_ctr32:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt Loop3x_ctr32
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v4.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v5.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
orr v0.16b,v6.16b,v6.16b
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
aese v4.16b,v17.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v17.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
ld1 {v19.16b},[x0],#16
|
||||
mov x7,x3
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v17.16b,v18.16b
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
add w9,w8,#1
|
||||
aese v4.16b,v20.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v20.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
eor v2.16b,v2.16b,v7.16b
|
||||
add w10,w8,#2
|
||||
aese v17.16b,v20.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
eor v3.16b,v3.16b,v7.16b
|
||||
add w8,w8,#3
|
||||
aese v4.16b,v21.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v21.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
eor v19.16b,v19.16b,v7.16b
|
||||
rev w9,w9
|
||||
aese v17.16b,v21.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v0.s[3], w9
|
||||
rev w10,w10
|
||||
aese v4.16b,v22.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v22.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
mov v1.s[3], w10
|
||||
rev w12,w8
|
||||
aese v17.16b,v22.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v18.s[3], w12
|
||||
subs x2,x2,#3
|
||||
aese v4.16b,v23.16b
|
||||
aese v5.16b,v23.16b
|
||||
aese v17.16b,v23.16b
|
||||
|
||||
eor v2.16b,v2.16b,v4.16b
|
||||
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
|
||||
st1 {v2.16b},[x1],#16
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
mov w6,w5
|
||||
st1 {v3.16b},[x1],#16
|
||||
eor v19.16b,v19.16b,v17.16b
|
||||
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
|
||||
st1 {v19.16b},[x1],#16
|
||||
b.hs Loop3x_ctr32
|
||||
|
||||
adds x2,x2,#3
|
||||
b.eq Lctr32_done
|
||||
cmp x2,#1
|
||||
mov x12,#16
|
||||
csel x12,xzr,x12,eq
|
||||
|
||||
Lctr32_tail:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt Lctr32_tail
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],x12
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v20.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v3.16b},[x0]
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v21.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v7.16b
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v22.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v7.16b
|
||||
aese v0.16b,v23.16b
|
||||
aese v1.16b,v23.16b
|
||||
|
||||
cmp x2,#1
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
eor v3.16b,v3.16b,v1.16b
|
||||
st1 {v2.16b},[x1],#16
|
||||
b.eq Lctr32_done
|
||||
st1 {v3.16b},[x1]
|
||||
|
||||
Lctr32_done:
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
File diff suppressed because it is too large
Load Diff
338
packager/third_party/boringssl/ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
vendored
Normal file
338
packager/third_party/boringssl/ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
vendored
Normal file
|
@ -0,0 +1,338 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
.globl _gcm_init_neon
|
||||
.private_extern _gcm_init_neon
|
||||
|
||||
.align 4
|
||||
_gcm_init_neon:
|
||||
// This function is adapted from gcm_init_v8. xC2 is t3.
|
||||
ld1 {v17.2d}, [x1] // load H
|
||||
movi v19.16b, #0xe1
|
||||
shl v19.2d, v19.2d, #57 // 0xc2.0
|
||||
ext v3.16b, v17.16b, v17.16b, #8
|
||||
ushr v18.2d, v19.2d, #63
|
||||
dup v17.4s, v17.s[1]
|
||||
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
|
||||
ushr v18.2d, v3.2d, #63
|
||||
sshr v17.4s, v17.4s, #31 // broadcast carry bit
|
||||
and v18.16b, v18.16b, v16.16b
|
||||
shl v3.2d, v3.2d, #1
|
||||
ext v18.16b, v18.16b, v18.16b, #8
|
||||
and v16.16b, v16.16b, v17.16b
|
||||
orr v3.16b, v3.16b, v18.16b // H<<<=1
|
||||
eor v5.16b, v3.16b, v16.16b // twisted H
|
||||
st1 {v5.2d}, [x0] // store Htable[0]
|
||||
ret
|
||||
|
||||
|
||||
.globl _gcm_gmult_neon
|
||||
.private_extern _gcm_gmult_neon
|
||||
|
||||
.align 4
|
||||
_gcm_gmult_neon:
|
||||
ld1 {v3.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
adrp x9, Lmasks@PAGE // load constants
|
||||
add x9, x9, Lmasks@PAGEOFF
|
||||
ld1 {v24.2d, v25.2d}, [x9]
|
||||
rev64 v3.16b, v3.16b // byteswap Xi
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
|
||||
|
||||
mov x3, #16
|
||||
b Lgmult_neon
|
||||
|
||||
|
||||
.globl _gcm_ghash_neon
|
||||
.private_extern _gcm_ghash_neon
|
||||
|
||||
.align 4
|
||||
_gcm_ghash_neon:
|
||||
ld1 {v0.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
adrp x9, Lmasks@PAGE // load constants
|
||||
add x9, x9, Lmasks@PAGEOFF
|
||||
ld1 {v24.2d, v25.2d}, [x9]
|
||||
rev64 v0.16b, v0.16b // byteswap Xi
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
|
||||
|
||||
Loop_neon:
|
||||
ld1 {v3.16b}, [x2], #16 // load inp
|
||||
rev64 v3.16b, v3.16b // byteswap inp
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
|
||||
|
||||
Lgmult_neon:
|
||||
// Split the input into v3 and v4. (The upper halves are unused,
|
||||
// so it is okay to leave them alone.)
|
||||
ins v4.d[0], v3.d[1]
|
||||
ext v16.8b, v5.8b, v5.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v3.8b // F = A1*B
|
||||
ext v0.8b, v3.8b, v3.8b, #1 // B1
|
||||
pmull v0.8h, v5.8b, v0.8b // E = A*B1
|
||||
ext v17.8b, v5.8b, v5.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v3.8b // H = A2*B
|
||||
ext v19.8b, v3.8b, v3.8b, #2 // B2
|
||||
pmull v19.8h, v5.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v5.8b, v5.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v0.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v3.8b // J = A3*B
|
||||
ext v0.8b, v3.8b, v3.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v0.8h, v5.8b, v0.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v3.8b, v3.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v0.16b // N = I + J
|
||||
pmull v19.8h, v5.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v0.8h, v5.8b, v3.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v0.16b, v0.16b, v16.16b
|
||||
eor v0.16b, v0.16b, v18.16b
|
||||
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
|
||||
ext v16.8b, v7.8b, v7.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v3.8b // F = A1*B
|
||||
ext v1.8b, v3.8b, v3.8b, #1 // B1
|
||||
pmull v1.8h, v7.8b, v1.8b // E = A*B1
|
||||
ext v17.8b, v7.8b, v7.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v3.8b // H = A2*B
|
||||
ext v19.8b, v3.8b, v3.8b, #2 // B2
|
||||
pmull v19.8h, v7.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v7.8b, v7.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v1.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v3.8b // J = A3*B
|
||||
ext v1.8b, v3.8b, v3.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v1.8h, v7.8b, v1.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v3.8b, v3.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v1.16b // N = I + J
|
||||
pmull v19.8h, v7.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v1.8h, v7.8b, v3.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v1.16b, v1.16b, v16.16b
|
||||
eor v1.16b, v1.16b, v18.16b
|
||||
ext v16.8b, v6.8b, v6.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v4.8b // F = A1*B
|
||||
ext v2.8b, v4.8b, v4.8b, #1 // B1
|
||||
pmull v2.8h, v6.8b, v2.8b // E = A*B1
|
||||
ext v17.8b, v6.8b, v6.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v4.8b // H = A2*B
|
||||
ext v19.8b, v4.8b, v4.8b, #2 // B2
|
||||
pmull v19.8h, v6.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v6.8b, v6.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v2.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v4.8b // J = A3*B
|
||||
ext v2.8b, v4.8b, v4.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v2.8h, v6.8b, v2.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v4.8b, v4.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v2.16b // N = I + J
|
||||
pmull v19.8h, v6.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v2.8h, v6.8b, v4.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v2.16b, v2.16b, v16.16b
|
||||
eor v2.16b, v2.16b, v18.16b
|
||||
ext v16.16b, v0.16b, v2.16b, #8
|
||||
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
|
||||
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
|
||||
// This is a no-op due to the ins instruction below.
|
||||
// ins v2.d[0], v1.d[1]
|
||||
|
||||
// equivalent of reduction_avx from ghash-x86_64.pl
|
||||
shl v17.2d, v0.2d, #57 // 1st phase
|
||||
shl v18.2d, v0.2d, #62
|
||||
eor v18.16b, v18.16b, v17.16b //
|
||||
shl v17.2d, v0.2d, #63
|
||||
eor v18.16b, v18.16b, v17.16b //
|
||||
// Note Xm contains {Xl.d[1], Xh.d[0]}.
|
||||
eor v18.16b, v18.16b, v1.16b
|
||||
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
|
||||
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
|
||||
|
||||
ushr v18.2d, v0.2d, #1 // 2nd phase
|
||||
eor v2.16b, v2.16b,v0.16b
|
||||
eor v0.16b, v0.16b,v18.16b //
|
||||
ushr v18.2d, v18.2d, #6
|
||||
ushr v0.2d, v0.2d, #1 //
|
||||
eor v0.16b, v0.16b, v2.16b //
|
||||
eor v0.16b, v0.16b, v18.16b //
|
||||
|
||||
subs x3, x3, #16
|
||||
bne Loop_neon
|
||||
|
||||
rev64 v0.16b, v0.16b // byteswap Xi and write
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
st1 {v0.16b}, [x0]
|
||||
|
||||
ret
|
||||
|
||||
|
||||
.section __TEXT,__const
|
||||
.align 4
|
||||
Lmasks:
|
||||
.quad 0x0000ffffffffffff // k48
|
||||
.quad 0x00000000ffffffff // k32
|
||||
.quad 0x000000000000ffff // k16
|
||||
.quad 0x0000000000000000 // k0
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
246
packager/third_party/boringssl/ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S
vendored
Normal file
246
packager/third_party/boringssl/ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S
vendored
Normal file
|
@ -0,0 +1,246 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl _gcm_init_v8
|
||||
.private_extern _gcm_init_v8
|
||||
|
||||
.align 4
|
||||
_gcm_init_v8:
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
ushr v18.2d,v19.2d,#63
|
||||
dup v17.4s,v17.s[1]
|
||||
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
|
||||
ushr v18.2d,v3.2d,#63
|
||||
sshr v17.4s,v17.4s,#31 //broadcast carry bit
|
||||
and v18.16b,v18.16b,v16.16b
|
||||
shl v3.2d,v3.2d,#1
|
||||
ext v18.16b,v18.16b,v18.16b,#8
|
||||
and v16.16b,v16.16b,v17.16b
|
||||
orr v3.16b,v3.16b,v18.16b //H<<<=1
|
||||
eor v20.16b,v3.16b,v16.16b //twisted H
|
||||
st1 {v20.2d},[x0],#16 //store Htable[0]
|
||||
|
||||
//calculate H^2
|
||||
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
|
||||
pmull v0.1q,v20.1d,v20.1d
|
||||
eor v16.16b,v16.16b,v20.16b
|
||||
pmull2 v2.1q,v20.2d,v20.2d
|
||||
pmull v1.1q,v16.1d,v16.1d
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v22.16b,v0.16b,v18.16b
|
||||
|
||||
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
|
||||
|
||||
ret
|
||||
|
||||
.globl _gcm_gmult_v8
|
||||
.private_extern _gcm_gmult_v8
|
||||
|
||||
.align 4
|
||||
_gcm_gmult_v8:
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
shl v19.2d,v19.2d,#57
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
|
||||
.globl _gcm_ghash_v8
|
||||
.private_extern _gcm_ghash_v8
|
||||
|
||||
.align 4
|
||||
_gcm_ghash_v8:
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
//to be rotated in order to
|
||||
//make it appear as in
|
||||
//algorithm specification
|
||||
subs x3,x3,#32 //see if x3 is 32 or larger
|
||||
mov x12,#16 //x12 is used as post-
|
||||
//increment for input pointer;
|
||||
//as loop is modulo-scheduled
|
||||
//x12 is zeroed just in time
|
||||
//to preclude overstepping
|
||||
//inp[len], which means that
|
||||
//last block[s] are actually
|
||||
//loaded twice, but last
|
||||
//copy is not processed
|
||||
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v22.2d},[x1]
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
|
||||
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
|
||||
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
|
||||
#ifndef __ARMEB__
|
||||
rev64 v16.16b,v16.16b
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
|
||||
b.lo Lodd_tail_v8 //x3 was less than 32
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
Loop_mod2x_v8:
|
||||
ext v18.16b,v3.16b,v3.16b,#8
|
||||
subs x3,x3,#32 //is there more data?
|
||||
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
|
||||
csel x12,xzr,x12,lo //is it time to zero x12?
|
||||
|
||||
pmull v5.1q,v21.1d,v17.1d
|
||||
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
|
||||
eor v0.16b,v0.16b,v4.16b //accumulate
|
||||
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
|
||||
|
||||
eor v2.16b,v2.16b,v6.16b
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
eor v1.16b,v1.16b,v5.16b
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
|
||||
#ifndef __ARMEB__
|
||||
rev64 v16.16b,v16.16b
|
||||
#endif
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v3.16b,v3.16b,v18.16b
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
eor v3.16b,v3.16b,v0.16b
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b.hs Loop_mod2x_v8 //there was at least 32 more bytes
|
||||
|
||||
eor v2.16b,v2.16b,v18.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
|
||||
adds x3,x3,#32 //re-construct x3
|
||||
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
|
||||
b.eq Ldone_v8 //is x3 zero?
|
||||
Lodd_tail_v8:
|
||||
ext v18.16b,v0.16b,v0.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //inp^=Xi
|
||||
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
Ldone_v8:
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
File diff suppressed because it is too large
Load Diff
1210
packager/third_party/boringssl/ios-aarch64/crypto/fipsmodule/sha256-armv8.S
vendored
Normal file
1210
packager/third_party/boringssl/ios-aarch64/crypto/fipsmodule/sha256-armv8.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1082
packager/third_party/boringssl/ios-aarch64/crypto/fipsmodule/sha512-armv8.S
vendored
Normal file
1082
packager/third_party/boringssl/ios-aarch64/crypto/fipsmodule/sha512-armv8.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1213
packager/third_party/boringssl/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
vendored
Normal file
1213
packager/third_party/boringssl/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,685 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
// with |argv|, then saves the callee-saved registers into |state|. It returns
|
||||
// the result of |func|. The |unwind| argument is unused.
|
||||
// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
||||
// const uint64_t *argv, size_t argc,
|
||||
// uint64_t unwind);
|
||||
|
||||
.globl _abi_test_trampoline
|
||||
.private_extern _abi_test_trampoline
|
||||
.align 4
|
||||
_abi_test_trampoline:
|
||||
Labi_test_trampoline_begin:
|
||||
// Stack layout (low to high addresses)
|
||||
// x29,x30 (16 bytes)
|
||||
// d8-d15 (64 bytes)
|
||||
// x19-x28 (80 bytes)
|
||||
// x1 (8 bytes)
|
||||
// padding (8 bytes)
|
||||
stp x29, x30, [sp, #-176]!
|
||||
mov x29, sp
|
||||
|
||||
// Saved callee-saved registers and |state|.
|
||||
stp d8, d9, [sp, #16]
|
||||
stp d10, d11, [sp, #32]
|
||||
stp d12, d13, [sp, #48]
|
||||
stp d14, d15, [sp, #64]
|
||||
stp x19, x20, [sp, #80]
|
||||
stp x21, x22, [sp, #96]
|
||||
stp x23, x24, [sp, #112]
|
||||
stp x25, x26, [sp, #128]
|
||||
stp x27, x28, [sp, #144]
|
||||
str x1, [sp, #160]
|
||||
|
||||
// Load registers from |state|, with the exception of x29. x29 is the
|
||||
// frame pointer and also callee-saved, but AAPCS64 allows platforms to
|
||||
// mandate that x29 always point to a frame. iOS64 does so, which means
|
||||
// we cannot fill x29 with entropy without violating ABI rules
|
||||
// ourselves. x29 is tested separately below.
|
||||
ldp d8, d9, [x1], #16
|
||||
ldp d10, d11, [x1], #16
|
||||
ldp d12, d13, [x1], #16
|
||||
ldp d14, d15, [x1], #16
|
||||
ldp x19, x20, [x1], #16
|
||||
ldp x21, x22, [x1], #16
|
||||
ldp x23, x24, [x1], #16
|
||||
ldp x25, x26, [x1], #16
|
||||
ldp x27, x28, [x1], #16
|
||||
|
||||
// Move parameters into temporary registers.
|
||||
mov x9, x0
|
||||
mov x10, x2
|
||||
mov x11, x3
|
||||
|
||||
// Load parameters into registers.
|
||||
cbz x11, Largs_done
|
||||
ldr x0, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x1, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x2, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x3, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x4, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x5, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x6, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x7, [x10], #8
|
||||
|
||||
Largs_done:
|
||||
blr x9
|
||||
|
||||
// Reload |state| and store registers.
|
||||
ldr x1, [sp, #160]
|
||||
stp d8, d9, [x1], #16
|
||||
stp d10, d11, [x1], #16
|
||||
stp d12, d13, [x1], #16
|
||||
stp d14, d15, [x1], #16
|
||||
stp x19, x20, [x1], #16
|
||||
stp x21, x22, [x1], #16
|
||||
stp x23, x24, [x1], #16
|
||||
stp x25, x26, [x1], #16
|
||||
stp x27, x28, [x1], #16
|
||||
|
||||
// |func| is required to preserve x29, the frame pointer. We cannot load
|
||||
// random values into x29 (see comment above), so compare it against the
|
||||
// expected value and zero the field of |state| if corrupted.
|
||||
mov x9, sp
|
||||
cmp x29, x9
|
||||
b.eq Lx29_ok
|
||||
str xzr, [x1]
|
||||
|
||||
Lx29_ok:
|
||||
// Restore callee-saved registers.
|
||||
ldp d8, d9, [sp, #16]
|
||||
ldp d10, d11, [sp, #32]
|
||||
ldp d12, d13, [sp, #48]
|
||||
ldp d14, d15, [sp, #64]
|
||||
ldp x19, x20, [sp, #80]
|
||||
ldp x21, x22, [sp, #96]
|
||||
ldp x23, x24, [sp, #112]
|
||||
ldp x25, x26, [sp, #128]
|
||||
ldp x27, x28, [sp, #144]
|
||||
|
||||
ldp x29, x30, [sp], #176
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x0
|
||||
.private_extern _abi_test_clobber_x0
|
||||
.align 4
|
||||
_abi_test_clobber_x0:
|
||||
mov x0, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x1
|
||||
.private_extern _abi_test_clobber_x1
|
||||
.align 4
|
||||
_abi_test_clobber_x1:
|
||||
mov x1, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x2
|
||||
.private_extern _abi_test_clobber_x2
|
||||
.align 4
|
||||
_abi_test_clobber_x2:
|
||||
mov x2, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x3
|
||||
.private_extern _abi_test_clobber_x3
|
||||
.align 4
|
||||
_abi_test_clobber_x3:
|
||||
mov x3, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x4
|
||||
.private_extern _abi_test_clobber_x4
|
||||
.align 4
|
||||
_abi_test_clobber_x4:
|
||||
mov x4, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x5
|
||||
.private_extern _abi_test_clobber_x5
|
||||
.align 4
|
||||
_abi_test_clobber_x5:
|
||||
mov x5, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x6
|
||||
.private_extern _abi_test_clobber_x6
|
||||
.align 4
|
||||
_abi_test_clobber_x6:
|
||||
mov x6, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x7
|
||||
.private_extern _abi_test_clobber_x7
|
||||
.align 4
|
||||
_abi_test_clobber_x7:
|
||||
mov x7, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x8
|
||||
.private_extern _abi_test_clobber_x8
|
||||
.align 4
|
||||
_abi_test_clobber_x8:
|
||||
mov x8, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x9
|
||||
.private_extern _abi_test_clobber_x9
|
||||
.align 4
|
||||
_abi_test_clobber_x9:
|
||||
mov x9, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x10
|
||||
.private_extern _abi_test_clobber_x10
|
||||
.align 4
|
||||
_abi_test_clobber_x10:
|
||||
mov x10, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x11
|
||||
.private_extern _abi_test_clobber_x11
|
||||
.align 4
|
||||
_abi_test_clobber_x11:
|
||||
mov x11, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x12
|
||||
.private_extern _abi_test_clobber_x12
|
||||
.align 4
|
||||
_abi_test_clobber_x12:
|
||||
mov x12, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x13
|
||||
.private_extern _abi_test_clobber_x13
|
||||
.align 4
|
||||
_abi_test_clobber_x13:
|
||||
mov x13, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x14
|
||||
.private_extern _abi_test_clobber_x14
|
||||
.align 4
|
||||
_abi_test_clobber_x14:
|
||||
mov x14, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x15
|
||||
.private_extern _abi_test_clobber_x15
|
||||
.align 4
|
||||
_abi_test_clobber_x15:
|
||||
mov x15, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x16
|
||||
.private_extern _abi_test_clobber_x16
|
||||
.align 4
|
||||
_abi_test_clobber_x16:
|
||||
mov x16, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x17
|
||||
.private_extern _abi_test_clobber_x17
|
||||
.align 4
|
||||
_abi_test_clobber_x17:
|
||||
mov x17, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x19
|
||||
.private_extern _abi_test_clobber_x19
|
||||
.align 4
|
||||
_abi_test_clobber_x19:
|
||||
mov x19, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x20
|
||||
.private_extern _abi_test_clobber_x20
|
||||
.align 4
|
||||
_abi_test_clobber_x20:
|
||||
mov x20, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x21
|
||||
.private_extern _abi_test_clobber_x21
|
||||
.align 4
|
||||
_abi_test_clobber_x21:
|
||||
mov x21, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x22
|
||||
.private_extern _abi_test_clobber_x22
|
||||
.align 4
|
||||
_abi_test_clobber_x22:
|
||||
mov x22, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x23
|
||||
.private_extern _abi_test_clobber_x23
|
||||
.align 4
|
||||
_abi_test_clobber_x23:
|
||||
mov x23, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x24
|
||||
.private_extern _abi_test_clobber_x24
|
||||
.align 4
|
||||
_abi_test_clobber_x24:
|
||||
mov x24, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x25
|
||||
.private_extern _abi_test_clobber_x25
|
||||
.align 4
|
||||
_abi_test_clobber_x25:
|
||||
mov x25, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x26
|
||||
.private_extern _abi_test_clobber_x26
|
||||
.align 4
|
||||
_abi_test_clobber_x26:
|
||||
mov x26, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x27
|
||||
.private_extern _abi_test_clobber_x27
|
||||
.align 4
|
||||
_abi_test_clobber_x27:
|
||||
mov x27, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x28
|
||||
.private_extern _abi_test_clobber_x28
|
||||
.align 4
|
||||
_abi_test_clobber_x28:
|
||||
mov x28, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x29
|
||||
.private_extern _abi_test_clobber_x29
|
||||
.align 4
|
||||
_abi_test_clobber_x29:
|
||||
mov x29, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d0
|
||||
.private_extern _abi_test_clobber_d0
|
||||
.align 4
|
||||
_abi_test_clobber_d0:
|
||||
fmov d0, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d1
|
||||
.private_extern _abi_test_clobber_d1
|
||||
.align 4
|
||||
_abi_test_clobber_d1:
|
||||
fmov d1, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d2
|
||||
.private_extern _abi_test_clobber_d2
|
||||
.align 4
|
||||
_abi_test_clobber_d2:
|
||||
fmov d2, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d3
|
||||
.private_extern _abi_test_clobber_d3
|
||||
.align 4
|
||||
_abi_test_clobber_d3:
|
||||
fmov d3, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d4
|
||||
.private_extern _abi_test_clobber_d4
|
||||
.align 4
|
||||
_abi_test_clobber_d4:
|
||||
fmov d4, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d5
|
||||
.private_extern _abi_test_clobber_d5
|
||||
.align 4
|
||||
_abi_test_clobber_d5:
|
||||
fmov d5, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d6
|
||||
.private_extern _abi_test_clobber_d6
|
||||
.align 4
|
||||
_abi_test_clobber_d6:
|
||||
fmov d6, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d7
|
||||
.private_extern _abi_test_clobber_d7
|
||||
.align 4
|
||||
_abi_test_clobber_d7:
|
||||
fmov d7, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d8
|
||||
.private_extern _abi_test_clobber_d8
|
||||
.align 4
|
||||
_abi_test_clobber_d8:
|
||||
fmov d8, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d9
|
||||
.private_extern _abi_test_clobber_d9
|
||||
.align 4
|
||||
_abi_test_clobber_d9:
|
||||
fmov d9, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d10
|
||||
.private_extern _abi_test_clobber_d10
|
||||
.align 4
|
||||
_abi_test_clobber_d10:
|
||||
fmov d10, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d11
|
||||
.private_extern _abi_test_clobber_d11
|
||||
.align 4
|
||||
_abi_test_clobber_d11:
|
||||
fmov d11, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d12
|
||||
.private_extern _abi_test_clobber_d12
|
||||
.align 4
|
||||
_abi_test_clobber_d12:
|
||||
fmov d12, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d13
|
||||
.private_extern _abi_test_clobber_d13
|
||||
.align 4
|
||||
_abi_test_clobber_d13:
|
||||
fmov d13, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d14
|
||||
.private_extern _abi_test_clobber_d14
|
||||
.align 4
|
||||
_abi_test_clobber_d14:
|
||||
fmov d14, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d15
|
||||
.private_extern _abi_test_clobber_d15
|
||||
.align 4
|
||||
_abi_test_clobber_d15:
|
||||
fmov d15, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d16
|
||||
.private_extern _abi_test_clobber_d16
|
||||
.align 4
|
||||
_abi_test_clobber_d16:
|
||||
fmov d16, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d17
|
||||
.private_extern _abi_test_clobber_d17
|
||||
.align 4
|
||||
_abi_test_clobber_d17:
|
||||
fmov d17, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d18
|
||||
.private_extern _abi_test_clobber_d18
|
||||
.align 4
|
||||
_abi_test_clobber_d18:
|
||||
fmov d18, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d19
|
||||
.private_extern _abi_test_clobber_d19
|
||||
.align 4
|
||||
_abi_test_clobber_d19:
|
||||
fmov d19, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d20
|
||||
.private_extern _abi_test_clobber_d20
|
||||
.align 4
|
||||
_abi_test_clobber_d20:
|
||||
fmov d20, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d21
|
||||
.private_extern _abi_test_clobber_d21
|
||||
.align 4
|
||||
_abi_test_clobber_d21:
|
||||
fmov d21, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d22
|
||||
.private_extern _abi_test_clobber_d22
|
||||
.align 4
|
||||
_abi_test_clobber_d22:
|
||||
fmov d22, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d23
|
||||
.private_extern _abi_test_clobber_d23
|
||||
.align 4
|
||||
_abi_test_clobber_d23:
|
||||
fmov d23, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d24
|
||||
.private_extern _abi_test_clobber_d24
|
||||
.align 4
|
||||
_abi_test_clobber_d24:
|
||||
fmov d24, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d25
|
||||
.private_extern _abi_test_clobber_d25
|
||||
.align 4
|
||||
_abi_test_clobber_d25:
|
||||
fmov d25, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d26
|
||||
.private_extern _abi_test_clobber_d26
|
||||
.align 4
|
||||
_abi_test_clobber_d26:
|
||||
fmov d26, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d27
|
||||
.private_extern _abi_test_clobber_d27
|
||||
.align 4
|
||||
_abi_test_clobber_d27:
|
||||
fmov d27, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d28
|
||||
.private_extern _abi_test_clobber_d28
|
||||
.align 4
|
||||
_abi_test_clobber_d28:
|
||||
fmov d28, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d29
|
||||
.private_extern _abi_test_clobber_d29
|
||||
.align 4
|
||||
_abi_test_clobber_d29:
|
||||
fmov d29, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d30
|
||||
.private_extern _abi_test_clobber_d30
|
||||
.align 4
|
||||
_abi_test_clobber_d30:
|
||||
fmov d30, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d31
|
||||
.private_extern _abi_test_clobber_d31
|
||||
.align 4
|
||||
_abi_test_clobber_d31:
|
||||
fmov d31, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v8_upper
|
||||
.private_extern _abi_test_clobber_v8_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v8_upper:
|
||||
fmov v8.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v9_upper
|
||||
.private_extern _abi_test_clobber_v9_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v9_upper:
|
||||
fmov v9.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v10_upper
|
||||
.private_extern _abi_test_clobber_v10_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v10_upper:
|
||||
fmov v10.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v11_upper
|
||||
.private_extern _abi_test_clobber_v11_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v11_upper:
|
||||
fmov v11.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v12_upper
|
||||
.private_extern _abi_test_clobber_v12_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v12_upper:
|
||||
fmov v12.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v13_upper
|
||||
.private_extern _abi_test_clobber_v13_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v13_upper:
|
||||
fmov v13.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v14_upper
|
||||
.private_extern _abi_test_clobber_v14_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v14_upper:
|
||||
fmov v14.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v15_upper
|
||||
.private_extern _abi_test_clobber_v15_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v15_upper:
|
||||
fmov v15.d[1], xzr
|
||||
ret
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
996
packager/third_party/boringssl/ios-aarch64/crypto/third_party/sike/asm/fp-armv8.S
vendored
Normal file
996
packager/third_party/boringssl/ios-aarch64/crypto/third_party/sike/asm/fp-armv8.S
vendored
Normal file
|
@ -0,0 +1,996 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.section __TEXT,__const
|
||||
|
||||
# p434 x 2
|
||||
Lp434x2:
|
||||
.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
|
||||
.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
|
||||
.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
|
||||
|
||||
# p434 + 1
|
||||
Lp434p1:
|
||||
.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
|
||||
.quad 0x6CFC5FD681C52056, 0x0002341F27177344
|
||||
|
||||
.text
|
||||
.globl _sike_mpmul
|
||||
.private_extern _sike_mpmul
|
||||
.align 4
|
||||
_sike_mpmul:
|
||||
stp x29, x30, [sp,#-96]!
|
||||
add x29, sp, #0
|
||||
stp x19, x20, [sp,#16]
|
||||
stp x21, x22, [sp,#32]
|
||||
stp x23, x24, [sp,#48]
|
||||
stp x25, x26, [sp,#64]
|
||||
stp x27, x28, [sp,#80]
|
||||
|
||||
ldp x3, x4, [x0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x10, x11, [x1,#0]
|
||||
ldp x12, x13, [x1,#16]
|
||||
ldp x14, x15, [x1,#32]
|
||||
ldr x16, [x1,#48]
|
||||
|
||||
// x3-x7 <- AH + AL, x7 <- carry
|
||||
adds x3, x3, x7
|
||||
adcs x4, x4, x8
|
||||
adcs x5, x5, x9
|
||||
adcs x6, x6, xzr
|
||||
adc x7, xzr, xzr
|
||||
|
||||
// x10-x13 <- BH + BL, x8 <- carry
|
||||
adds x10, x10, x14
|
||||
adcs x11, x11, x15
|
||||
adcs x12, x12, x16
|
||||
adcs x13, x13, xzr
|
||||
adc x8, xzr, xzr
|
||||
|
||||
// x9 <- combined carry
|
||||
and x9, x7, x8
|
||||
// x7-x8 <- mask
|
||||
sub x7, xzr, x7
|
||||
sub x8, xzr, x8
|
||||
|
||||
// x15-x19 <- masked (BH + BL)
|
||||
and x14, x10, x7
|
||||
and x15, x11, x7
|
||||
and x16, x12, x7
|
||||
and x17, x13, x7
|
||||
|
||||
// x20-x23 <- masked (AH + AL)
|
||||
and x20, x3, x8
|
||||
and x21, x4, x8
|
||||
and x22, x5, x8
|
||||
and x23, x6, x8
|
||||
|
||||
// x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
|
||||
adds x14, x14, x20
|
||||
adcs x15, x15, x21
|
||||
adcs x16, x16, x22
|
||||
adcs x17, x17, x23
|
||||
adc x7, x9, xzr
|
||||
|
||||
// x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
|
||||
stp x3, x4, [x2,#0]
|
||||
// A0-A1 <- AH + AL, T0 <- mask
|
||||
adds x3, x3, x5
|
||||
adcs x4, x4, x6
|
||||
adc x25, xzr, xzr
|
||||
|
||||
// C6, T1 <- BH + BL, C7 <- mask
|
||||
adds x23, x10, x12
|
||||
adcs x26, x11, x13
|
||||
adc x24, xzr, xzr
|
||||
|
||||
// C0-C1 <- masked (BH + BL)
|
||||
sub x19, xzr, x25
|
||||
sub x20, xzr, x24
|
||||
and x8, x23, x19
|
||||
and x9, x26, x19
|
||||
|
||||
// C4-C5 <- masked (AH + AL), T0 <- combined carry
|
||||
and x21, x3, x20
|
||||
and x22, x4, x20
|
||||
mul x19, x3, x23
|
||||
mul x20, x3, x26
|
||||
and x25, x25, x24
|
||||
|
||||
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
|
||||
adds x8, x21, x8
|
||||
umulh x21, x3, x26
|
||||
adcs x9, x22, x9
|
||||
umulh x22, x3, x23
|
||||
adc x25, x25, xzr
|
||||
|
||||
// C2-C5 <- (AH+AL) x (BH+BL), low part
|
||||
mul x3, x4, x23
|
||||
umulh x23, x4, x23
|
||||
adds x20, x20, x22
|
||||
adc x21, x21, xzr
|
||||
|
||||
mul x24, x4, x26
|
||||
umulh x26, x4, x26
|
||||
adds x20, x20, x3
|
||||
adcs x21, x21, x23
|
||||
adc x22, xzr, xzr
|
||||
|
||||
adds x21, x21, x24
|
||||
adc x22, x22, x26
|
||||
|
||||
ldp x3, x4, [x2,#0]
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
|
||||
adds x21, x8, x21
|
||||
umulh x24, x3, x10
|
||||
umulh x26, x3, x11
|
||||
adcs x22, x9, x22
|
||||
mul x8, x3, x10
|
||||
mul x9, x3, x11
|
||||
adc x25, x25, xzr
|
||||
|
||||
// C0-C1, T1, C7 <- AL x BL
|
||||
mul x3, x4, x10
|
||||
umulh x10, x4, x10
|
||||
adds x9, x9, x24
|
||||
adc x26, x26, xzr
|
||||
|
||||
mul x23, x4, x11
|
||||
umulh x11, x4, x11
|
||||
adds x9, x9, x3
|
||||
adcs x26, x26, x10
|
||||
adc x24, xzr, xzr
|
||||
|
||||
adds x26, x26, x23
|
||||
adc x24, x24, x11
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
|
||||
mul x3, x5, x12
|
||||
umulh x10, x5, x12
|
||||
subs x19, x19, x8
|
||||
sbcs x20, x20, x9
|
||||
sbcs x21, x21, x26
|
||||
mul x4, x5, x13
|
||||
umulh x23, x5, x13
|
||||
sbcs x22, x22, x24
|
||||
sbc x25, x25, xzr
|
||||
|
||||
// A0, A1, C6, B0 <- AH x BH
|
||||
mul x5, x6, x12
|
||||
umulh x12, x6, x12
|
||||
adds x4, x4, x10
|
||||
adc x23, x23, xzr
|
||||
|
||||
mul x11, x6, x13
|
||||
umulh x13, x6, x13
|
||||
adds x4, x4, x5
|
||||
adcs x23, x23, x12
|
||||
adc x10, xzr, xzr
|
||||
|
||||
adds x23, x23, x11
|
||||
adc x10, x10, x13
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x19, x19, x3
|
||||
sbcs x20, x20, x4
|
||||
sbcs x21, x21, x23
|
||||
sbcs x22, x22, x10
|
||||
sbc x25, x25, xzr
|
||||
|
||||
adds x19, x19, x26
|
||||
adcs x20, x20, x24
|
||||
adcs x21, x21, x3
|
||||
adcs x22, x22, x4
|
||||
adcs x23, x25, x23
|
||||
adc x24, x10, xzr
|
||||
|
||||
|
||||
// x15-x19, x7 <- (AH+AL) x (BH+BL), final step
|
||||
adds x14, x14, x21
|
||||
adcs x15, x15, x22
|
||||
adcs x16, x16, x23
|
||||
adcs x17, x17, x24
|
||||
adc x7, x7, xzr
|
||||
|
||||
// Load AL
|
||||
ldp x3, x4, [x0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
// Load BL
|
||||
ldp x10, x11, [x1,#0]
|
||||
ldp x12, x13, [x1,#16]
|
||||
|
||||
// Temporarily store x8 in x2
|
||||
stp x8, x9, [x2,#0]
|
||||
// x21-x28 <- AL x BL
|
||||
// A0-A1 <- AH + AL, T0 <- mask
|
||||
adds x3, x3, x5
|
||||
adcs x4, x4, x6
|
||||
adc x8, xzr, xzr
|
||||
|
||||
// C6, T1 <- BH + BL, C7 <- mask
|
||||
adds x27, x10, x12
|
||||
adcs x9, x11, x13
|
||||
adc x28, xzr, xzr
|
||||
|
||||
// C0-C1 <- masked (BH + BL)
|
||||
sub x23, xzr, x8
|
||||
sub x24, xzr, x28
|
||||
and x21, x27, x23
|
||||
and x22, x9, x23
|
||||
|
||||
// C4-C5 <- masked (AH + AL), T0 <- combined carry
|
||||
and x25, x3, x24
|
||||
and x26, x4, x24
|
||||
mul x23, x3, x27
|
||||
mul x24, x3, x9
|
||||
and x8, x8, x28
|
||||
|
||||
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
|
||||
adds x21, x25, x21
|
||||
umulh x25, x3, x9
|
||||
adcs x22, x26, x22
|
||||
umulh x26, x3, x27
|
||||
adc x8, x8, xzr
|
||||
|
||||
// C2-C5 <- (AH+AL) x (BH+BL), low part
|
||||
mul x3, x4, x27
|
||||
umulh x27, x4, x27
|
||||
adds x24, x24, x26
|
||||
adc x25, x25, xzr
|
||||
|
||||
mul x28, x4, x9
|
||||
umulh x9, x4, x9
|
||||
adds x24, x24, x3
|
||||
adcs x25, x25, x27
|
||||
adc x26, xzr, xzr
|
||||
|
||||
adds x25, x25, x28
|
||||
adc x26, x26, x9
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
|
||||
adds x25, x21, x25
|
||||
umulh x28, x3, x10
|
||||
umulh x9, x3, x11
|
||||
adcs x26, x22, x26
|
||||
mul x21, x3, x10
|
||||
mul x22, x3, x11
|
||||
adc x8, x8, xzr
|
||||
|
||||
// C0-C1, T1, C7 <- AL x BL
|
||||
mul x3, x4, x10
|
||||
umulh x10, x4, x10
|
||||
adds x22, x22, x28
|
||||
adc x9, x9, xzr
|
||||
|
||||
mul x27, x4, x11
|
||||
umulh x11, x4, x11
|
||||
adds x22, x22, x3
|
||||
adcs x9, x9, x10
|
||||
adc x28, xzr, xzr
|
||||
|
||||
adds x9, x9, x27
|
||||
adc x28, x28, x11
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
|
||||
mul x3, x5, x12
|
||||
umulh x10, x5, x12
|
||||
subs x23, x23, x21
|
||||
sbcs x24, x24, x22
|
||||
sbcs x25, x25, x9
|
||||
mul x4, x5, x13
|
||||
umulh x27, x5, x13
|
||||
sbcs x26, x26, x28
|
||||
sbc x8, x8, xzr
|
||||
|
||||
// A0, A1, C6, B0 <- AH x BH
|
||||
mul x5, x6, x12
|
||||
umulh x12, x6, x12
|
||||
adds x4, x4, x10
|
||||
adc x27, x27, xzr
|
||||
|
||||
mul x11, x6, x13
|
||||
umulh x13, x6, x13
|
||||
adds x4, x4, x5
|
||||
adcs x27, x27, x12
|
||||
adc x10, xzr, xzr
|
||||
|
||||
adds x27, x27, x11
|
||||
adc x10, x10, x13
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x23, x23, x3
|
||||
sbcs x24, x24, x4
|
||||
sbcs x25, x25, x27
|
||||
sbcs x26, x26, x10
|
||||
sbc x8, x8, xzr
|
||||
|
||||
adds x23, x23, x9
|
||||
adcs x24, x24, x28
|
||||
adcs x25, x25, x3
|
||||
adcs x26, x26, x4
|
||||
adcs x27, x8, x27
|
||||
adc x28, x10, xzr
|
||||
|
||||
// Restore x8
|
||||
ldp x8, x9, [x2,#0]
|
||||
|
||||
// x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
|
||||
subs x8, x8, x21
|
||||
sbcs x9, x9, x22
|
||||
sbcs x19, x19, x23
|
||||
sbcs x20, x20, x24
|
||||
sbcs x14, x14, x25
|
||||
sbcs x15, x15, x26
|
||||
sbcs x16, x16, x27
|
||||
sbcs x17, x17, x28
|
||||
sbc x7, x7, xzr
|
||||
|
||||
// Store ALxBL, low
|
||||
stp x21, x22, [x2]
|
||||
stp x23, x24, [x2,#16]
|
||||
|
||||
// Load AH
|
||||
ldp x3, x4, [x0,#32]
|
||||
ldr x5, [x0,#48]
|
||||
// Load BH
|
||||
ldp x10, x11, [x1,#32]
|
||||
ldr x12, [x1,#48]
|
||||
|
||||
adds x8, x8, x25
|
||||
adcs x9, x9, x26
|
||||
adcs x19, x19, x27
|
||||
adcs x20, x20, x28
|
||||
adc x1, xzr, xzr
|
||||
|
||||
add x0, x0, #32
|
||||
// Temporarily store x8,x9 in x2
|
||||
stp x8,x9, [x2,#32]
|
||||
// x21-x28 <- AH x BH
|
||||
|
||||
// A0 * B0
|
||||
mul x21, x3, x10 // C0
|
||||
umulh x24, x3, x10
|
||||
|
||||
// A0 * B1
|
||||
mul x22, x3, x11
|
||||
umulh x23, x3, x11
|
||||
|
||||
// A1 * B0
|
||||
mul x8, x4, x10
|
||||
umulh x9, x4, x10
|
||||
adds x22, x22, x24
|
||||
adc x23, x23, xzr
|
||||
|
||||
// A0 * B2
|
||||
mul x27, x3, x12
|
||||
umulh x28, x3, x12
|
||||
adds x22, x22, x8 // C1
|
||||
adcs x23, x23, x9
|
||||
adc x24, xzr, xzr
|
||||
|
||||
// A2 * B0
|
||||
mul x8, x5, x10
|
||||
umulh x25, x5, x10
|
||||
adds x23, x23, x27
|
||||
adcs x24, x24, x25
|
||||
adc x25, xzr, xzr
|
||||
|
||||
// A1 * B1
|
||||
mul x27, x4, x11
|
||||
umulh x9, x4, x11
|
||||
adds x23, x23, x8
|
||||
adcs x24, x24, x28
|
||||
adc x25, x25, xzr
|
||||
|
||||
// A1 * B2
|
||||
mul x8, x4, x12
|
||||
umulh x28, x4, x12
|
||||
adds x23, x23, x27 // C2
|
||||
adcs x24, x24, x9
|
||||
adc x25, x25, xzr
|
||||
|
||||
// A2 * B1
|
||||
mul x27, x5, x11
|
||||
umulh x9, x5, x11
|
||||
adds x24, x24, x8
|
||||
adcs x25, x25, x28
|
||||
adc x26, xzr, xzr
|
||||
|
||||
// A2 * B2
|
||||
mul x8, x5, x12
|
||||
umulh x28, x5, x12
|
||||
adds x24, x24, x27 // C3
|
||||
adcs x25, x25, x9
|
||||
adc x26, x26, xzr
|
||||
|
||||
adds x25, x25, x8 // C4
|
||||
adc x26, x26, x28 // C5
|
||||
|
||||
// Restore x8,x9
|
||||
ldp x8,x9, [x2,#32]
|
||||
|
||||
neg x1, x1
|
||||
|
||||
// x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x8, x8, x21
|
||||
sbcs x9, x9, x22
|
||||
sbcs x19, x19, x23
|
||||
sbcs x20, x20, x24
|
||||
sbcs x14, x14, x25
|
||||
sbcs x15, x15, x26
|
||||
sbcs x16, x16, xzr
|
||||
sbcs x17, x17, xzr
|
||||
sbc x7, x7, xzr
|
||||
|
||||
// Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
|
||||
stp x8, x9, [x2,#32]
|
||||
stp x19, x20, [x2,#48]
|
||||
|
||||
adds x1, x1, #1
|
||||
adcs x14, x14, x21
|
||||
adcs x15, x15, x22
|
||||
adcs x16, x16, x23
|
||||
adcs x17, x17, x24
|
||||
adcs x25, x7, x25
|
||||
adc x26, x26, xzr
|
||||
|
||||
stp x14, x15, [x2,#64]
|
||||
stp x16, x17, [x2,#80]
|
||||
stp x25, x26, [x2,#96]
|
||||
|
||||
ldp x19, x20, [x29,#16]
|
||||
ldp x21, x22, [x29,#32]
|
||||
ldp x23, x24, [x29,#48]
|
||||
ldp x25, x26, [x29,#64]
|
||||
ldp x27, x28, [x29,#80]
|
||||
ldp x29, x30, [sp],#96
|
||||
ret
|
||||
.globl _sike_fprdc
|
||||
.private_extern _sike_fprdc
|
||||
.align 4
|
||||
_sike_fprdc:
|
||||
stp x29, x30, [sp, #-96]!
|
||||
add x29, sp, xzr
|
||||
stp x19, x20, [sp,#16]
|
||||
stp x21, x22, [sp,#32]
|
||||
stp x23, x24, [sp,#48]
|
||||
stp x25, x26, [sp,#64]
|
||||
stp x27, x28, [sp,#80]
|
||||
|
||||
ldp x2, x3, [x0,#0] // a[0-1]
|
||||
|
||||
// Load the prime constant
|
||||
adrp x26, Lp434p1@PAGE
|
||||
add x26, x26, Lp434p1@PAGEOFF
|
||||
ldp x23, x24, [x26, #0x0]
|
||||
ldp x25, x26, [x26,#0x10]
|
||||
|
||||
// a[0-1] * p434+1
|
||||
mul x4, x2, x23 // C0
|
||||
umulh x7, x2, x23
|
||||
|
||||
mul x5, x2, x24
|
||||
umulh x6, x2, x24
|
||||
|
||||
mul x10, x3, x23
|
||||
umulh x11, x3, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x2, x25
|
||||
umulh x28, x2, x25
|
||||
adds x5, x5, x10 // C1
|
||||
adcs x6, x6, x11
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x10, x3, x24
|
||||
umulh x11, x3, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x2, x26
|
||||
umulh x28, x2, x26
|
||||
adds x6, x6, x10 // C2
|
||||
adcs x7, x7, x11
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x10, x3, x25
|
||||
umulh x11, x3, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x3, x26
|
||||
umulh x28, x3, x26
|
||||
adds x7, x7, x10 // C3
|
||||
adcs x8, x8, x11
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
|
||||
ldp x10, x11, [x0, #0x18]
|
||||
ldp x12, x13, [x0, #0x28]
|
||||
ldp x14, x15, [x0, #0x38]
|
||||
ldp x16, x17, [x0, #0x48]
|
||||
ldp x19, x20, [x0, #0x58]
|
||||
ldr x21, [x0, #0x68]
|
||||
|
||||
adds x10, x10, x4
|
||||
adcs x11, x11, x5
|
||||
adcs x12, x12, x6
|
||||
adcs x13, x13, x7
|
||||
adcs x14, x14, x8
|
||||
adcs x15, x15, x9
|
||||
adcs x22, x16, xzr
|
||||
adcs x17, x17, xzr
|
||||
adcs x19, x19, xzr
|
||||
adcs x20, x20, xzr
|
||||
adc x21, x21, xzr
|
||||
|
||||
ldr x2, [x0,#0x10] // a[2]
|
||||
// a[2-3] * p434+1
|
||||
mul x4, x2, x23 // C0
|
||||
umulh x7, x2, x23
|
||||
|
||||
mul x5, x2, x24
|
||||
umulh x6, x2, x24
|
||||
|
||||
mul x0, x10, x23
|
||||
umulh x3, x10, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x2, x25
|
||||
umulh x28, x2, x25
|
||||
adds x5, x5, x0 // C1
|
||||
adcs x6, x6, x3
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x0, x10, x24
|
||||
umulh x3, x10, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x2, x26
|
||||
umulh x28, x2, x26
|
||||
adds x6, x6, x0 // C2
|
||||
adcs x7, x7, x3
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x0, x10, x25
|
||||
umulh x3, x10, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x10, x26
|
||||
umulh x28, x10, x26
|
||||
adds x7, x7, x0 // C3
|
||||
adcs x8, x8, x3
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
|
||||
adds x12, x12, x4
|
||||
adcs x13, x13, x5
|
||||
adcs x14, x14, x6
|
||||
adcs x15, x15, x7
|
||||
adcs x16, x22, x8
|
||||
adcs x17, x17, x9
|
||||
adcs x22, x19, xzr
|
||||
adcs x20, x20, xzr
|
||||
adc x21, x21, xzr
|
||||
|
||||
mul x4, x11, x23 // C0
|
||||
umulh x7, x11, x23
|
||||
|
||||
mul x5, x11, x24
|
||||
umulh x6, x11, x24
|
||||
|
||||
mul x10, x12, x23
|
||||
umulh x3, x12, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x11, x25
|
||||
umulh x28, x11, x25
|
||||
adds x5, x5, x10 // C1
|
||||
adcs x6, x6, x3
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x10, x12, x24
|
||||
umulh x3, x12, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x11, x26
|
||||
umulh x28, x11, x26
|
||||
adds x6, x6, x10 // C2
|
||||
adcs x7, x7, x3
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x10, x12, x25
|
||||
umulh x3, x12, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x12, x26
|
||||
umulh x28, x12, x26
|
||||
adds x7, x7, x10 // C3
|
||||
adcs x8, x8, x3
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
adds x14, x14, x4
|
||||
adcs x15, x15, x5
|
||||
adcs x16, x16, x6
|
||||
adcs x17, x17, x7
|
||||
adcs x19, x22, x8
|
||||
adcs x20, x20, x9
|
||||
adc x22, x21, xzr
|
||||
|
||||
stp x14, x15, [x1, #0x0] // C0, C1
|
||||
|
||||
mul x4, x13, x23 // C0
|
||||
umulh x10, x13, x23
|
||||
|
||||
mul x5, x13, x24
|
||||
umulh x27, x13, x24
|
||||
adds x5, x5, x10 // C1
|
||||
adc x10, xzr, xzr
|
||||
|
||||
mul x6, x13, x25
|
||||
umulh x28, x13, x25
|
||||
adds x27, x10, x27
|
||||
adcs x6, x6, x27 // C2
|
||||
adc x10, xzr, xzr
|
||||
|
||||
mul x7, x13, x26
|
||||
umulh x8, x13, x26
|
||||
adds x28, x10, x28
|
||||
adcs x7, x7, x28 // C3
|
||||
adc x8, x8, xzr // C4
|
||||
|
||||
adds x16, x16, x4
|
||||
adcs x17, x17, x5
|
||||
adcs x19, x19, x6
|
||||
adcs x20, x20, x7
|
||||
adc x21, x22, x8
|
||||
|
||||
str x16, [x1, #0x10]
|
||||
stp x17, x19, [x1, #0x18]
|
||||
stp x20, x21, [x1, #0x28]
|
||||
|
||||
ldp x19, x20, [x29,#16]
|
||||
ldp x21, x22, [x29,#32]
|
||||
ldp x23, x24, [x29,#48]
|
||||
ldp x25, x26, [x29,#64]
|
||||
ldp x27, x28, [x29,#80]
|
||||
ldp x29, x30, [sp],#96
|
||||
ret
|
||||
.globl _sike_fpadd
|
||||
.private_extern _sike_fpadd
|
||||
.align 4
|
||||
_sike_fpadd:
|
||||
stp x29,x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
// Add a + b
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adc x9, x9, x17
|
||||
|
||||
// Subtract 2xp434
|
||||
adrp x17, Lp434x2@PAGE
|
||||
add x17, x17, Lp434x2@PAGEOFF
|
||||
ldp x11, x12, [x17, #0]
|
||||
ldp x13, x14, [x17, #16]
|
||||
ldp x15, x16, [x17, #32]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x12
|
||||
sbcs x6, x6, x13
|
||||
sbcs x7, x7, x14
|
||||
sbcs x8, x8, x15
|
||||
sbcs x9, x9, x16
|
||||
sbc x0, xzr, xzr // x0 can be reused now
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
and x11, x11, x0
|
||||
and x12, x12, x0
|
||||
and x13, x13, x0
|
||||
and x14, x14, x0
|
||||
and x15, x15, x0
|
||||
and x16, x16, x0
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adc x9, x9, x16
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl _sike_fpsub
|
||||
.private_extern _sike_fpsub
|
||||
.align 4
|
||||
_sike_fpsub:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
// Subtract a - b
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
sbcs x9, x9, x17
|
||||
sbc x0, xzr, xzr
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
adrp x17, Lp434x2@PAGE
|
||||
add x17, x17, Lp434x2@PAGEOFF
|
||||
|
||||
// First half
|
||||
ldp x11, x12, [x17, #0]
|
||||
ldp x13, x14, [x17, #16]
|
||||
ldp x15, x16, [x17, #32]
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
and x11, x11, x0
|
||||
and x12, x12, x0
|
||||
and x13, x13, x0
|
||||
and x14, x14, x0
|
||||
and x15, x15, x0
|
||||
and x16, x16, x0
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adc x9, x9, x16
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl _sike_mpadd_asm
|
||||
.private_extern _sike_mpadd_asm
|
||||
.align 4
|
||||
_sike_mpadd_asm:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adc x9, x9, x17
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl _sike_mpsubx2_asm
|
||||
.private_extern _sike_mpsubx2_asm
|
||||
.align 4
|
||||
_sike_mpsubx2_asm:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x11, x12, [x1,#32]
|
||||
ldp x13, x14, [x1,#48]
|
||||
sbcs x7, x7, x11
|
||||
sbcs x8, x8, x12
|
||||
sbcs x9, x9, x13
|
||||
sbcs x10, x10, x14
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
|
||||
ldp x3, x4, [x0,#64]
|
||||
ldp x5, x6, [x0,#80]
|
||||
ldp x11, x12, [x1,#64]
|
||||
ldp x13, x14, [x1,#80]
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#96]
|
||||
ldp x11, x12, [x1,#96]
|
||||
sbcs x7, x7, x11
|
||||
sbcs x8, x8, x12
|
||||
sbc x0, xzr, xzr
|
||||
|
||||
stp x3, x4, [x2,#64]
|
||||
stp x5, x6, [x2,#80]
|
||||
stp x7, x8, [x2,#96]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl _sike_mpdblsubx2_asm
|
||||
.private_extern _sike_mpdblsubx2_asm
|
||||
.align 4
|
||||
_sike_mpdblsubx2_asm:
|
||||
stp x29, x30, [sp, #-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x2, #0]
|
||||
ldp x5, x6, [x2,#16]
|
||||
ldp x7, x8, [x2,#32]
|
||||
|
||||
ldp x11, x12, [x0, #0]
|
||||
ldp x13, x14, [x0,#16]
|
||||
ldp x15, x16, [x0,#32]
|
||||
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
|
||||
// x9 stores carry
|
||||
adc x9, xzr, xzr
|
||||
|
||||
ldp x11, x12, [x1, #0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, x9, xzr
|
||||
|
||||
stp x3, x4, [x2, #0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
|
||||
ldp x3, x4, [x2,#48]
|
||||
ldp x5, x6, [x2,#64]
|
||||
ldp x7, x8, [x2,#80]
|
||||
|
||||
ldp x11, x12, [x0,#48]
|
||||
ldp x13, x14, [x0,#64]
|
||||
ldp x15, x16, [x0,#80]
|
||||
|
||||
// x9 = 2 - x9
|
||||
neg x9, x9
|
||||
add x9, x9, #2
|
||||
|
||||
subs x3, x3, x9
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, xzr, xzr
|
||||
|
||||
ldp x11, x12, [x1,#48]
|
||||
ldp x13, x14, [x1,#64]
|
||||
ldp x15, x16, [x1,#80]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, x9, xzr
|
||||
|
||||
stp x3, x4, [x2,#48]
|
||||
stp x5, x6, [x2,#64]
|
||||
stp x7, x8, [x2,#80]
|
||||
|
||||
ldp x3, x4, [x2,#96]
|
||||
ldp x11, x12, [x0,#96]
|
||||
ldp x13, x14, [x1,#96]
|
||||
|
||||
// x9 = 2 - x9
|
||||
neg x9, x9
|
||||
add x9, x9, #2
|
||||
|
||||
subs x3, x3, x9
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
subs x3, x3, x13
|
||||
sbc x4, x4, x14
|
||||
stp x3, x4, [x2,#96]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
#endif // !OPENSSL_NO_ASM
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,790 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
|
||||
|
||||
.code 32
|
||||
#undef __thumb2__
|
||||
.align 5
|
||||
Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
|
||||
.text
|
||||
|
||||
.globl _aes_hw_set_encrypt_key
|
||||
.private_extern _aes_hw_set_encrypt_key
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_set_encrypt_key
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_set_encrypt_key:
|
||||
Lenc_key:
|
||||
mov r3,#-1
|
||||
cmp r0,#0
|
||||
beq Lenc_key_abort
|
||||
cmp r2,#0
|
||||
beq Lenc_key_abort
|
||||
mov r3,#-2
|
||||
cmp r1,#128
|
||||
blt Lenc_key_abort
|
||||
cmp r1,#256
|
||||
bgt Lenc_key_abort
|
||||
tst r1,#0x3f
|
||||
bne Lenc_key_abort
|
||||
|
||||
adr r3,Lrcon
|
||||
cmp r1,#192
|
||||
|
||||
veor q0,q0,q0
|
||||
vld1.8 {q3},[r0]!
|
||||
mov r1,#8 @ reuse r1
|
||||
vld1.32 {q1,q2},[r3]!
|
||||
|
||||
blt Loop128
|
||||
beq L192
|
||||
b L256
|
||||
|
||||
.align 4
|
||||
Loop128:
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q3},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
subs r1,r1,#1
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
bne Loop128
|
||||
|
||||
vld1.32 {q1},[r3]
|
||||
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q3},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q3},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
veor q3,q3,q10
|
||||
vst1.32 {q3},[r2]
|
||||
add r2,r2,#0x50
|
||||
|
||||
mov r12,#10
|
||||
b Ldone
|
||||
|
||||
.align 4
|
||||
L192:
|
||||
vld1.8 {d16},[r0]!
|
||||
vmov.i8 q10,#8 @ borrow q10
|
||||
vst1.32 {q3},[r2]!
|
||||
vsub.i8 q2,q2,q10 @ adjust the mask
|
||||
|
||||
Loop192:
|
||||
vtbl.8 d20,{q8},d4
|
||||
vtbl.8 d21,{q8},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {d16},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
subs r1,r1,#1
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
|
||||
vdup.32 q9,d7[1]
|
||||
veor q9,q9,q8
|
||||
veor q10,q10,q1
|
||||
vext.8 q8,q0,q8,#12
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q8,q8,q9
|
||||
veor q3,q3,q10
|
||||
veor q8,q8,q10
|
||||
vst1.32 {q3},[r2]!
|
||||
bne Loop192
|
||||
|
||||
mov r12,#12
|
||||
add r2,r2,#0x20
|
||||
b Ldone
|
||||
|
||||
.align 4
|
||||
L256:
|
||||
vld1.8 {q8},[r0]
|
||||
mov r1,#7
|
||||
mov r12,#14
|
||||
vst1.32 {q3},[r2]!
|
||||
|
||||
Loop256:
|
||||
vtbl.8 d20,{q8},d4
|
||||
vtbl.8 d21,{q8},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q8},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
subs r1,r1,#1
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
vst1.32 {q3},[r2]!
|
||||
beq Ldone
|
||||
|
||||
vdup.32 q10,d7[1]
|
||||
vext.8 q9,q0,q8,#12
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
|
||||
veor q8,q8,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q8,q8,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q8,q8,q9
|
||||
|
||||
veor q8,q8,q10
|
||||
b Loop256
|
||||
|
||||
Ldone:
|
||||
str r12,[r2]
|
||||
mov r3,#0
|
||||
|
||||
Lenc_key_abort:
|
||||
mov r0,r3 @ return value
|
||||
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _aes_hw_set_decrypt_key
|
||||
.private_extern _aes_hw_set_decrypt_key
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_set_decrypt_key
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_set_decrypt_key:
|
||||
stmdb sp!,{r4,lr}
|
||||
bl Lenc_key
|
||||
|
||||
cmp r0,#0
|
||||
bne Ldec_key_abort
|
||||
|
||||
sub r2,r2,#240 @ restore original r2
|
||||
mov r4,#-16
|
||||
add r0,r2,r12,lsl#4 @ end of key schedule
|
||||
|
||||
vld1.32 {q0},[r2]
|
||||
vld1.32 {q1},[r0]
|
||||
vst1.32 {q0},[r0],r4
|
||||
vst1.32 {q1},[r2]!
|
||||
|
||||
Loop_imc:
|
||||
vld1.32 {q0},[r2]
|
||||
vld1.32 {q1},[r0]
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
vst1.32 {q0},[r0],r4
|
||||
vst1.32 {q1},[r2]!
|
||||
cmp r0,r2
|
||||
bhi Loop_imc
|
||||
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
vst1.32 {q0},[r0]
|
||||
|
||||
eor r0,r0,r0 @ return value
|
||||
Ldec_key_abort:
|
||||
ldmia sp!,{r4,pc}
|
||||
|
||||
.globl _aes_hw_encrypt
|
||||
.private_extern _aes_hw_encrypt
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_encrypt
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_encrypt:
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
sub r3,r3,#2
|
||||
vld1.32 {q1},[r2]!
|
||||
|
||||
Loop_enc:
|
||||
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q0},[r2]!
|
||||
subs r3,r3,#2
|
||||
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q1},[r2]!
|
||||
bgt Loop_enc
|
||||
|
||||
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
|
||||
veor q2,q2,q0
|
||||
|
||||
vst1.8 {q2},[r1]
|
||||
bx lr
|
||||
|
||||
.globl _aes_hw_decrypt
|
||||
.private_extern _aes_hw_decrypt
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_decrypt
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_decrypt:
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
sub r3,r3,#2
|
||||
vld1.32 {q1},[r2]!
|
||||
|
||||
Loop_dec:
|
||||
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q0},[r2]!
|
||||
subs r3,r3,#2
|
||||
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q1},[r2]!
|
||||
bgt Loop_dec
|
||||
|
||||
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
|
||||
veor q2,q2,q0
|
||||
|
||||
vst1.8 {q2},[r1]
|
||||
bx lr
|
||||
|
||||
.globl _aes_hw_cbc_encrypt
|
||||
.private_extern _aes_hw_cbc_encrypt
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_cbc_encrypt
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_cbc_encrypt:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,lr}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldmia ip,{r4,r5} @ load remaining args
|
||||
subs r2,r2,#16
|
||||
mov r8,#16
|
||||
blo Lcbc_abort
|
||||
moveq r8,#0
|
||||
|
||||
cmp r5,#0 @ en- or decrypting?
|
||||
ldr r5,[r3,#240]
|
||||
and r2,r2,#-16
|
||||
vld1.8 {q6},[r4]
|
||||
vld1.8 {q0},[r0],r8
|
||||
|
||||
vld1.32 {q8,q9},[r3] @ load key schedule...
|
||||
sub r5,r5,#6
|
||||
add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
|
||||
sub r5,r5,#2
|
||||
vld1.32 {q10,q11},[r7]!
|
||||
vld1.32 {q12,q13},[r7]!
|
||||
vld1.32 {q14,q15},[r7]!
|
||||
vld1.32 {q7},[r7]
|
||||
|
||||
add r7,r3,#32
|
||||
mov r6,r5
|
||||
beq Lcbc_dec
|
||||
|
||||
cmp r5,#2
|
||||
veor q0,q0,q6
|
||||
veor q5,q8,q7
|
||||
beq Lcbc_enc128
|
||||
|
||||
vld1.32 {q2,q3},[r7]
|
||||
add r7,r3,#16
|
||||
add r6,r3,#16*4
|
||||
add r12,r3,#16*5
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
add r14,r3,#16*6
|
||||
add r3,r3,#16*7
|
||||
b Lenter_cbc_enc
|
||||
|
||||
.align 4
|
||||
Loop_cbc_enc:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vst1.8 {q6},[r1]!
|
||||
Lenter_cbc_enc:
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q8},[r6]
|
||||
cmp r5,#4
|
||||
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r12]
|
||||
beq Lcbc_enc192
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q8},[r14]
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r3]
|
||||
nop
|
||||
|
||||
Lcbc_enc192:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
subs r2,r2,#16
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
moveq r8,#0
|
||||
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.8 {q8},[r0],r8
|
||||
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
veor q8,q8,q5
|
||||
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
|
||||
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
veor q6,q0,q7
|
||||
bhs Loop_cbc_enc
|
||||
|
||||
vst1.8 {q6},[r1]!
|
||||
b Lcbc_done
|
||||
|
||||
.align 5
|
||||
Lcbc_enc128:
|
||||
vld1.32 {q2,q3},[r7]
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
b Lenter_cbc_enc128
|
||||
Loop_cbc_enc128:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vst1.8 {q6},[r1]!
|
||||
Lenter_cbc_enc128:
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
subs r2,r2,#16
|
||||
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
moveq r8,#0
|
||||
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.8 {q8},[r0],r8
|
||||
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
veor q8,q8,q5
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
veor q6,q0,q7
|
||||
bhs Loop_cbc_enc128
|
||||
|
||||
vst1.8 {q6},[r1]!
|
||||
b Lcbc_done
|
||||
.align 5
|
||||
Lcbc_dec:
|
||||
vld1.8 {q10},[r0]!
|
||||
subs r2,r2,#32 @ bias
|
||||
add r6,r5,#2
|
||||
vorr q3,q0,q0
|
||||
vorr q1,q0,q0
|
||||
vorr q11,q10,q10
|
||||
blo Lcbc_dec_tail
|
||||
|
||||
vorr q1,q10,q10
|
||||
vld1.8 {q10},[r0]!
|
||||
vorr q2,q0,q0
|
||||
vorr q3,q1,q1
|
||||
vorr q11,q10,q10
|
||||
|
||||
Loop3x_cbc_dec:
|
||||
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Loop3x_cbc_dec
|
||||
|
||||
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q4,q6,q7
|
||||
subs r2,r2,#0x30
|
||||
veor q5,q2,q7
|
||||
movlo r6,r2 @ r6, r6, is zero at this point
|
||||
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q9,q3,q7
|
||||
add r0,r0,r6 @ r0 is adjusted in such way that
|
||||
@ at exit from the loop q1-q10
|
||||
@ are loaded with last "words"
|
||||
vorr q6,q11,q11
|
||||
mov r7,r3
|
||||
.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.8 {q2},[r0]!
|
||||
.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.8 {q3},[r0]!
|
||||
.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.8 {q11},[r0]!
|
||||
.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
|
||||
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
|
||||
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
|
||||
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
|
||||
add r6,r5,#2
|
||||
veor q4,q4,q0
|
||||
veor q5,q5,q1
|
||||
veor q10,q10,q9
|
||||
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
|
||||
vst1.8 {q4},[r1]!
|
||||
vorr q0,q2,q2
|
||||
vst1.8 {q5},[r1]!
|
||||
vorr q1,q3,q3
|
||||
vst1.8 {q10},[r1]!
|
||||
vorr q10,q11,q11
|
||||
bhs Loop3x_cbc_dec
|
||||
|
||||
cmn r2,#0x30
|
||||
beq Lcbc_done
|
||||
nop
|
||||
|
||||
Lcbc_dec_tail:
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Lcbc_dec_tail
|
||||
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
cmn r2,#0x20
|
||||
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q5,q6,q7
|
||||
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q9,q3,q7
|
||||
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
|
||||
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
|
||||
beq Lcbc_dec_one
|
||||
veor q5,q5,q1
|
||||
veor q9,q9,q10
|
||||
vorr q6,q11,q11
|
||||
vst1.8 {q5},[r1]!
|
||||
vst1.8 {q9},[r1]!
|
||||
b Lcbc_done
|
||||
|
||||
Lcbc_dec_one:
|
||||
veor q5,q5,q10
|
||||
vorr q6,q11,q11
|
||||
vst1.8 {q5},[r1]!
|
||||
|
||||
Lcbc_done:
|
||||
vst1.8 {q6},[r4]
|
||||
Lcbc_abort:
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,pc}
|
||||
|
||||
.globl _aes_hw_ctr32_encrypt_blocks
|
||||
.private_extern _aes_hw_ctr32_encrypt_blocks
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_ctr32_encrypt_blocks
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_ctr32_encrypt_blocks:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldr r4, [ip] @ load remaining arg
|
||||
ldr r5,[r3,#240]
|
||||
|
||||
ldr r8, [r4, #12]
|
||||
vld1.32 {q0},[r4]
|
||||
|
||||
vld1.32 {q8,q9},[r3] @ load key schedule...
|
||||
sub r5,r5,#4
|
||||
mov r12,#16
|
||||
cmp r2,#2
|
||||
add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
|
||||
sub r5,r5,#2
|
||||
vld1.32 {q12,q13},[r7]!
|
||||
vld1.32 {q14,q15},[r7]!
|
||||
vld1.32 {q7},[r7]
|
||||
add r7,r3,#32
|
||||
mov r6,r5
|
||||
movlo r12,#0
|
||||
#ifndef __ARMEB__
|
||||
rev r8, r8
|
||||
#endif
|
||||
vorr q1,q0,q0
|
||||
add r10, r8, #1
|
||||
vorr q10,q0,q0
|
||||
add r8, r8, #2
|
||||
vorr q6,q0,q0
|
||||
rev r10, r10
|
||||
vmov.32 d3[1],r10
|
||||
bls Lctr32_tail
|
||||
rev r12, r8
|
||||
sub r2,r2,#3 @ bias
|
||||
vmov.32 d21[1],r12
|
||||
b Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
Loop3x_ctr32:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Loop3x_ctr32
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
|
||||
vld1.8 {q2},[r0]!
|
||||
vorr q0,q6,q6
|
||||
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.8 {q3},[r0]!
|
||||
vorr q1,q6,q6
|
||||
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
vld1.8 {q11},[r0]!
|
||||
mov r7,r3
|
||||
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
|
||||
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
|
||||
vorr q10,q6,q6
|
||||
add r9,r8,#1
|
||||
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
veor q2,q2,q7
|
||||
add r10,r8,#2
|
||||
.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
veor q3,q3,q7
|
||||
add r8,r8,#3
|
||||
.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
veor q11,q11,q7
|
||||
rev r9,r9
|
||||
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d1[1], r9
|
||||
rev r10,r10
|
||||
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
vmov.32 d3[1], r10
|
||||
rev r12,r8
|
||||
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d21[1], r12
|
||||
subs r2,r2,#3
|
||||
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
|
||||
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
|
||||
.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
|
||||
|
||||
veor q2,q2,q4
|
||||
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
|
||||
vst1.8 {q2},[r1]!
|
||||
veor q3,q3,q5
|
||||
mov r6,r5
|
||||
vst1.8 {q3},[r1]!
|
||||
veor q11,q11,q9
|
||||
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
|
||||
vst1.8 {q11},[r1]!
|
||||
bhs Loop3x_ctr32
|
||||
|
||||
adds r2,r2,#3
|
||||
beq Lctr32_done
|
||||
cmp r2,#1
|
||||
mov r12,#16
|
||||
moveq r12,#0
|
||||
|
||||
Lctr32_tail:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Lctr32_tail
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.8 {q2},[r0],r12
|
||||
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.8 {q3},[r0]
|
||||
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
veor q2,q2,q7
|
||||
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
veor q3,q3,q7
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
|
||||
|
||||
cmp r2,#1
|
||||
veor q2,q2,q0
|
||||
veor q3,q3,q1
|
||||
vst1.8 {q2},[r1]!
|
||||
beq Lctr32_done
|
||||
vst1.8 {q3},[r1]
|
||||
|
||||
Lctr32_done:
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
|
||||
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
|
@ -0,0 +1,982 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
|
||||
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__)
|
||||
.syntax unified
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.align 5
|
||||
LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap_P-Lbn_mul_mont
|
||||
#endif
|
||||
|
||||
.globl _bn_mul_mont
|
||||
.private_extern _bn_mul_mont
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _bn_mul_mont
|
||||
#endif
|
||||
|
||||
.align 5
|
||||
_bn_mul_mont:
|
||||
Lbn_mul_mont:
|
||||
ldr ip,[sp,#4] @ load num
|
||||
stmdb sp!,{r0,r2} @ sp points at argument block
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
tst ip,#7
|
||||
bne Lialu
|
||||
adr r0,Lbn_mul_mont
|
||||
ldr r2,LOPENSSL_armcap
|
||||
ldr r0,[r0,r2]
|
||||
#ifdef __APPLE__
|
||||
ldr r0,[r0]
|
||||
#endif
|
||||
tst r0,#ARMV7_NEON @ NEON available?
|
||||
ldmia sp, {r0,r2}
|
||||
beq Lialu
|
||||
add sp,sp,#8
|
||||
b bn_mul8x_mont_neon
|
||||
.align 4
|
||||
Lialu:
|
||||
#endif
|
||||
cmp ip,#2
|
||||
mov r0,ip @ load num
|
||||
#ifdef __thumb2__
|
||||
ittt lt
|
||||
#endif
|
||||
movlt r0,#0
|
||||
addlt sp,sp,#2*4
|
||||
blt Labrt
|
||||
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
|
||||
|
||||
mov r0,r0,lsl#2 @ rescale r0 for byte count
|
||||
sub sp,sp,r0 @ alloca(4*num)
|
||||
sub sp,sp,#4 @ +extra dword
|
||||
sub r0,r0,#4 @ "num=num-1"
|
||||
add r4,r2,r0 @ &bp[num-1]
|
||||
|
||||
add r0,sp,r0 @ r0 to point at &tp[num-1]
|
||||
ldr r8,[r0,#14*4] @ &n0
|
||||
ldr r2,[r2] @ bp[0]
|
||||
ldr r5,[r1],#4 @ ap[0],ap++
|
||||
ldr r6,[r3],#4 @ np[0],np++
|
||||
ldr r8,[r8] @ *n0
|
||||
str r4,[r0,#15*4] @ save &bp[num]
|
||||
|
||||
umull r10,r11,r5,r2 @ ap[0]*bp[0]
|
||||
str r8,[r0,#14*4] @ save n0 value
|
||||
mul r8,r10,r8 @ "tp[0]"*n0
|
||||
mov r12,#0
|
||||
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
|
||||
mov r4,sp
|
||||
|
||||
L1st:
|
||||
ldr r5,[r1],#4 @ ap[j],ap++
|
||||
mov r10,r11
|
||||
ldr r6,[r3],#4 @ np[j],np++
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
|
||||
mov r14,#0
|
||||
umlal r12,r14,r6,r8 @ np[j]*n0
|
||||
adds r12,r12,r10
|
||||
str r12,[r4],#4 @ tp[j-1]=,tp++
|
||||
adc r12,r14,#0
|
||||
cmp r4,r0
|
||||
bne L1st
|
||||
|
||||
adds r12,r12,r11
|
||||
ldr r4,[r0,#13*4] @ restore bp
|
||||
mov r14,#0
|
||||
ldr r8,[r0,#14*4] @ restore n0
|
||||
adc r14,r14,#0
|
||||
str r12,[r0] @ tp[num-1]=
|
||||
mov r7,sp
|
||||
str r14,[r0,#4] @ tp[num]=
|
||||
|
||||
Louter:
|
||||
sub r7,r0,r7 @ "original" r0-1 value
|
||||
sub r1,r1,r7 @ "rewind" ap to &ap[1]
|
||||
ldr r2,[r4,#4]! @ *(++bp)
|
||||
sub r3,r3,r7 @ "rewind" np to &np[1]
|
||||
ldr r5,[r1,#-4] @ ap[0]
|
||||
ldr r10,[sp] @ tp[0]
|
||||
ldr r6,[r3,#-4] @ np[0]
|
||||
ldr r7,[sp,#4] @ tp[1]
|
||||
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
|
||||
str r4,[r0,#13*4] @ save bp
|
||||
mul r8,r10,r8
|
||||
mov r12,#0
|
||||
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
|
||||
mov r4,sp
|
||||
|
||||
Linner:
|
||||
ldr r5,[r1],#4 @ ap[j],ap++
|
||||
adds r10,r11,r7 @ +=tp[j]
|
||||
ldr r6,[r3],#4 @ np[j],np++
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
|
||||
mov r14,#0
|
||||
umlal r12,r14,r6,r8 @ np[j]*n0
|
||||
adc r11,r11,#0
|
||||
ldr r7,[r4,#8] @ tp[j+1]
|
||||
adds r12,r12,r10
|
||||
str r12,[r4],#4 @ tp[j-1]=,tp++
|
||||
adc r12,r14,#0
|
||||
cmp r4,r0
|
||||
bne Linner
|
||||
|
||||
adds r12,r12,r11
|
||||
mov r14,#0
|
||||
ldr r4,[r0,#13*4] @ restore bp
|
||||
adc r14,r14,#0
|
||||
ldr r8,[r0,#14*4] @ restore n0
|
||||
adds r12,r12,r7
|
||||
ldr r7,[r0,#15*4] @ restore &bp[num]
|
||||
adc r14,r14,#0
|
||||
str r12,[r0] @ tp[num-1]=
|
||||
str r14,[r0,#4] @ tp[num]=
|
||||
|
||||
cmp r4,r7
|
||||
#ifdef __thumb2__
|
||||
itt ne
|
||||
#endif
|
||||
movne r7,sp
|
||||
bne Louter
|
||||
|
||||
ldr r2,[r0,#12*4] @ pull rp
|
||||
mov r5,sp
|
||||
add r0,r0,#4 @ r0 to point at &tp[num]
|
||||
sub r5,r0,r5 @ "original" num value
|
||||
mov r4,sp @ "rewind" r4
|
||||
mov r1,r4 @ "borrow" r1
|
||||
sub r3,r3,r5 @ "rewind" r3 to &np[0]
|
||||
|
||||
subs r7,r7,r7 @ "clear" carry flag
|
||||
Lsub: ldr r7,[r4],#4
|
||||
ldr r6,[r3],#4
|
||||
sbcs r7,r7,r6 @ tp[j]-np[j]
|
||||
str r7,[r2],#4 @ rp[j]=
|
||||
teq r4,r0 @ preserve carry
|
||||
bne Lsub
|
||||
sbcs r14,r14,#0 @ upmost carry
|
||||
mov r4,sp @ "rewind" r4
|
||||
sub r2,r2,r5 @ "rewind" r2
|
||||
|
||||
Lcopy: ldr r7,[r4] @ conditional copy
|
||||
ldr r5,[r2]
|
||||
str sp,[r4],#4 @ zap tp
|
||||
#ifdef __thumb2__
|
||||
it cc
|
||||
#endif
|
||||
movcc r5,r7
|
||||
str r5,[r2],#4
|
||||
teq r4,r0 @ preserve carry
|
||||
bne Lcopy
|
||||
|
||||
mov sp,r0
|
||||
add sp,sp,#4 @ skip over tp[num+1]
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
|
||||
add sp,sp,#2*4 @ skip over {r0,r2}
|
||||
mov r0,#1
|
||||
Labrt:
|
||||
#if __ARM_ARCH__>=5
|
||||
bx lr @ bx lr
|
||||
#else
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
|
||||
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func bn_mul8x_mont_neon
|
||||
#endif
|
||||
.align 5
|
||||
bn_mul8x_mont_neon:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldmia ip,{r4,r5} @ load rest of parameter block
|
||||
mov ip,sp
|
||||
|
||||
cmp r5,#8
|
||||
bhi LNEON_8n
|
||||
|
||||
@ special case for r5==8, everything is in register bank...
|
||||
|
||||
vld1.32 {d28[0]}, [r2,:32]!
|
||||
veor d8,d8,d8
|
||||
sub r7,sp,r5,lsl#4
|
||||
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
|
||||
and r7,r7,#-64
|
||||
vld1.32 {d30[0]}, [r4,:32]
|
||||
mov sp,r7 @ alloca
|
||||
vzip.16 d28,d8
|
||||
|
||||
vmull.u32 q6,d28,d0[0]
|
||||
vmull.u32 q7,d28,d0[1]
|
||||
vmull.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmull.u32 q9,d28,d1[1]
|
||||
|
||||
vadd.u64 d29,d29,d12
|
||||
veor d8,d8,d8
|
||||
vmul.u32 d29,d29,d30
|
||||
|
||||
vmull.u32 q10,d28,d2[0]
|
||||
vld1.32 {d4,d5,d6,d7}, [r3]!
|
||||
vmull.u32 q11,d28,d2[1]
|
||||
vmull.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmull.u32 q13,d28,d3[1]
|
||||
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
sub r9,r5,#1
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmov q5,q6
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmov q6,q7
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmov q7,q8
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vmov q8,q9
|
||||
vmov q9,q10
|
||||
vshr.u64 d10,d10,#16
|
||||
vmov q10,q11
|
||||
vmov q11,q12
|
||||
vadd.u64 d10,d10,d11
|
||||
vmov q12,q13
|
||||
veor q13,q13
|
||||
vshr.u64 d10,d10,#16
|
||||
|
||||
b LNEON_outer8
|
||||
|
||||
.align 4
|
||||
LNEON_outer8:
|
||||
vld1.32 {d28[0]}, [r2,:32]!
|
||||
veor d8,d8,d8
|
||||
vzip.16 d28,d8
|
||||
vadd.u64 d12,d12,d10
|
||||
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
|
||||
vadd.u64 d29,d29,d12
|
||||
veor d8,d8,d8
|
||||
subs r9,r9,#1
|
||||
vmul.u32 d29,d29,d30
|
||||
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmov q5,q6
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmov q6,q7
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmov q7,q8
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vmov q8,q9
|
||||
vmov q9,q10
|
||||
vshr.u64 d10,d10,#16
|
||||
vmov q10,q11
|
||||
vmov q11,q12
|
||||
vadd.u64 d10,d10,d11
|
||||
vmov q12,q13
|
||||
veor q13,q13
|
||||
vshr.u64 d10,d10,#16
|
||||
|
||||
bne LNEON_outer8
|
||||
|
||||
vadd.u64 d12,d12,d10
|
||||
mov r7,sp
|
||||
vshr.u64 d10,d12,#16
|
||||
mov r8,r5
|
||||
vadd.u64 d13,d13,d10
|
||||
add r6,sp,#96
|
||||
vshr.u64 d10,d13,#16
|
||||
vzip.16 d12,d13
|
||||
|
||||
b LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
LNEON_8n:
|
||||
veor q6,q6,q6
|
||||
sub r7,sp,#128
|
||||
veor q7,q7,q7
|
||||
sub r7,r7,r5,lsl#4
|
||||
veor q8,q8,q8
|
||||
and r7,r7,#-64
|
||||
veor q9,q9,q9
|
||||
mov sp,r7 @ alloca
|
||||
veor q10,q10,q10
|
||||
add r7,r7,#256
|
||||
veor q11,q11,q11
|
||||
sub r8,r5,#8
|
||||
veor q12,q12,q12
|
||||
veor q13,q13,q13
|
||||
|
||||
LNEON_8n_init:
|
||||
vst1.64 {q6,q7},[r7,:256]!
|
||||
subs r8,r8,#8
|
||||
vst1.64 {q8,q9},[r7,:256]!
|
||||
vst1.64 {q10,q11},[r7,:256]!
|
||||
vst1.64 {q12,q13},[r7,:256]!
|
||||
bne LNEON_8n_init
|
||||
|
||||
add r6,sp,#256
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
add r10,sp,#8
|
||||
vld1.32 {d30[0]},[r4,:32]
|
||||
mov r9,r5
|
||||
b LNEON_8n_outer
|
||||
|
||||
.align 4
|
||||
LNEON_8n_outer:
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
veor d8,d8,d8
|
||||
vzip.16 d28,d8
|
||||
add r7,sp,#128
|
||||
vld1.32 {d4,d5,d6,d7},[r3]!
|
||||
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
vadd.u64 d29,d29,d12
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vshr.u64 d12,d12,#16
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vadd.u64 d12,d12,d13
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vshr.u64 d12,d12,#16
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vadd.u64 d14,d14,d12
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0]
|
||||
vmlal.u32 q7,d28,d0[0]
|
||||
vld1.64 {q6},[r6,:128]!
|
||||
vmlal.u32 q8,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q9,d28,d1[0]
|
||||
vshl.i64 d29,d15,#16
|
||||
vmlal.u32 q10,d28,d1[1]
|
||||
vadd.u64 d29,d29,d14
|
||||
vmlal.u32 q11,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q12,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1]
|
||||
vmlal.u32 q13,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q6,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q7,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q8,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q9,d29,d5[0]
|
||||
vshr.u64 d14,d14,#16
|
||||
vmlal.u32 q10,d29,d5[1]
|
||||
vmlal.u32 q11,d29,d6[0]
|
||||
vadd.u64 d14,d14,d15
|
||||
vmlal.u32 q12,d29,d6[1]
|
||||
vshr.u64 d14,d14,#16
|
||||
vmlal.u32 q13,d29,d7[0]
|
||||
vmlal.u32 q6,d29,d7[1]
|
||||
vadd.u64 d16,d16,d14
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1]
|
||||
vmlal.u32 q8,d28,d0[0]
|
||||
vld1.64 {q7},[r6,:128]!
|
||||
vmlal.u32 q9,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q10,d28,d1[0]
|
||||
vshl.i64 d29,d17,#16
|
||||
vmlal.u32 q11,d28,d1[1]
|
||||
vadd.u64 d29,d29,d16
|
||||
vmlal.u32 q12,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q13,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2]
|
||||
vmlal.u32 q6,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q7,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q8,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q9,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q10,d29,d5[0]
|
||||
vshr.u64 d16,d16,#16
|
||||
vmlal.u32 q11,d29,d5[1]
|
||||
vmlal.u32 q12,d29,d6[0]
|
||||
vadd.u64 d16,d16,d17
|
||||
vmlal.u32 q13,d29,d6[1]
|
||||
vshr.u64 d16,d16,#16
|
||||
vmlal.u32 q6,d29,d7[0]
|
||||
vmlal.u32 q7,d29,d7[1]
|
||||
vadd.u64 d18,d18,d16
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2]
|
||||
vmlal.u32 q9,d28,d0[0]
|
||||
vld1.64 {q8},[r6,:128]!
|
||||
vmlal.u32 q10,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q11,d28,d1[0]
|
||||
vshl.i64 d29,d19,#16
|
||||
vmlal.u32 q12,d28,d1[1]
|
||||
vadd.u64 d29,d29,d18
|
||||
vmlal.u32 q13,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q6,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3]
|
||||
vmlal.u32 q7,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q8,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q9,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q10,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q11,d29,d5[0]
|
||||
vshr.u64 d18,d18,#16
|
||||
vmlal.u32 q12,d29,d5[1]
|
||||
vmlal.u32 q13,d29,d6[0]
|
||||
vadd.u64 d18,d18,d19
|
||||
vmlal.u32 q6,d29,d6[1]
|
||||
vshr.u64 d18,d18,#16
|
||||
vmlal.u32 q7,d29,d7[0]
|
||||
vmlal.u32 q8,d29,d7[1]
|
||||
vadd.u64 d20,d20,d18
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3]
|
||||
vmlal.u32 q10,d28,d0[0]
|
||||
vld1.64 {q9},[r6,:128]!
|
||||
vmlal.u32 q11,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q12,d28,d1[0]
|
||||
vshl.i64 d29,d21,#16
|
||||
vmlal.u32 q13,d28,d1[1]
|
||||
vadd.u64 d29,d29,d20
|
||||
vmlal.u32 q6,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q7,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4]
|
||||
vmlal.u32 q8,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q9,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q10,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q11,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q12,d29,d5[0]
|
||||
vshr.u64 d20,d20,#16
|
||||
vmlal.u32 q13,d29,d5[1]
|
||||
vmlal.u32 q6,d29,d6[0]
|
||||
vadd.u64 d20,d20,d21
|
||||
vmlal.u32 q7,d29,d6[1]
|
||||
vshr.u64 d20,d20,#16
|
||||
vmlal.u32 q8,d29,d7[0]
|
||||
vmlal.u32 q9,d29,d7[1]
|
||||
vadd.u64 d22,d22,d20
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4]
|
||||
vmlal.u32 q11,d28,d0[0]
|
||||
vld1.64 {q10},[r6,:128]!
|
||||
vmlal.u32 q12,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q13,d28,d1[0]
|
||||
vshl.i64 d29,d23,#16
|
||||
vmlal.u32 q6,d28,d1[1]
|
||||
vadd.u64 d29,d29,d22
|
||||
vmlal.u32 q7,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q8,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5]
|
||||
vmlal.u32 q9,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q10,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q11,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q12,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q13,d29,d5[0]
|
||||
vshr.u64 d22,d22,#16
|
||||
vmlal.u32 q6,d29,d5[1]
|
||||
vmlal.u32 q7,d29,d6[0]
|
||||
vadd.u64 d22,d22,d23
|
||||
vmlal.u32 q8,d29,d6[1]
|
||||
vshr.u64 d22,d22,#16
|
||||
vmlal.u32 q9,d29,d7[0]
|
||||
vmlal.u32 q10,d29,d7[1]
|
||||
vadd.u64 d24,d24,d22
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5]
|
||||
vmlal.u32 q12,d28,d0[0]
|
||||
vld1.64 {q11},[r6,:128]!
|
||||
vmlal.u32 q13,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q6,d28,d1[0]
|
||||
vshl.i64 d29,d25,#16
|
||||
vmlal.u32 q7,d28,d1[1]
|
||||
vadd.u64 d29,d29,d24
|
||||
vmlal.u32 q8,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q9,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6]
|
||||
vmlal.u32 q10,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q11,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q12,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q13,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q6,d29,d5[0]
|
||||
vshr.u64 d24,d24,#16
|
||||
vmlal.u32 q7,d29,d5[1]
|
||||
vmlal.u32 q8,d29,d6[0]
|
||||
vadd.u64 d24,d24,d25
|
||||
vmlal.u32 q9,d29,d6[1]
|
||||
vshr.u64 d24,d24,#16
|
||||
vmlal.u32 q10,d29,d7[0]
|
||||
vmlal.u32 q11,d29,d7[1]
|
||||
vadd.u64 d26,d26,d24
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6]
|
||||
vmlal.u32 q13,d28,d0[0]
|
||||
vld1.64 {q12},[r6,:128]!
|
||||
vmlal.u32 q6,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q7,d28,d1[0]
|
||||
vshl.i64 d29,d27,#16
|
||||
vmlal.u32 q8,d28,d1[1]
|
||||
vadd.u64 d29,d29,d26
|
||||
vmlal.u32 q9,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q10,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7]
|
||||
vmlal.u32 q11,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q12,d28,d3[1]
|
||||
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
|
||||
vmlal.u32 q13,d29,d4[0]
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
vmlal.u32 q6,d29,d4[1]
|
||||
vmlal.u32 q7,d29,d5[0]
|
||||
vshr.u64 d26,d26,#16
|
||||
vmlal.u32 q8,d29,d5[1]
|
||||
vmlal.u32 q9,d29,d6[0]
|
||||
vadd.u64 d26,d26,d27
|
||||
vmlal.u32 q10,d29,d6[1]
|
||||
vshr.u64 d26,d26,#16
|
||||
vmlal.u32 q11,d29,d7[0]
|
||||
vmlal.u32 q12,d29,d7[1]
|
||||
vadd.u64 d12,d12,d26
|
||||
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
|
||||
add r10,sp,#8 @ rewind
|
||||
sub r8,r5,#8
|
||||
b LNEON_8n_inner
|
||||
|
||||
.align 4
|
||||
LNEON_8n_inner:
|
||||
subs r8,r8,#8
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vld1.64 {q13},[r6,:128]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0]
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vld1.32 {d4,d5,d6,d7},[r3]!
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1]
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vst1.64 {q6},[r7,:128]!
|
||||
vmlal.u32 q7,d28,d0[0]
|
||||
vld1.64 {q6},[r6,:128]
|
||||
vmlal.u32 q8,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1]
|
||||
vmlal.u32 q9,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q10,d28,d1[1]
|
||||
vmlal.u32 q11,d28,d2[0]
|
||||
vmlal.u32 q12,d28,d2[1]
|
||||
vmlal.u32 q13,d28,d3[0]
|
||||
vmlal.u32 q6,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2]
|
||||
vmlal.u32 q7,d29,d4[0]
|
||||
vmlal.u32 q8,d29,d4[1]
|
||||
vmlal.u32 q9,d29,d5[0]
|
||||
vmlal.u32 q10,d29,d5[1]
|
||||
vmlal.u32 q11,d29,d6[0]
|
||||
vmlal.u32 q12,d29,d6[1]
|
||||
vmlal.u32 q13,d29,d7[0]
|
||||
vmlal.u32 q6,d29,d7[1]
|
||||
vst1.64 {q7},[r7,:128]!
|
||||
vmlal.u32 q8,d28,d0[0]
|
||||
vld1.64 {q7},[r6,:128]
|
||||
vmlal.u32 q9,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2]
|
||||
vmlal.u32 q10,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q11,d28,d1[1]
|
||||
vmlal.u32 q12,d28,d2[0]
|
||||
vmlal.u32 q13,d28,d2[1]
|
||||
vmlal.u32 q6,d28,d3[0]
|
||||
vmlal.u32 q7,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3]
|
||||
vmlal.u32 q8,d29,d4[0]
|
||||
vmlal.u32 q9,d29,d4[1]
|
||||
vmlal.u32 q10,d29,d5[0]
|
||||
vmlal.u32 q11,d29,d5[1]
|
||||
vmlal.u32 q12,d29,d6[0]
|
||||
vmlal.u32 q13,d29,d6[1]
|
||||
vmlal.u32 q6,d29,d7[0]
|
||||
vmlal.u32 q7,d29,d7[1]
|
||||
vst1.64 {q8},[r7,:128]!
|
||||
vmlal.u32 q9,d28,d0[0]
|
||||
vld1.64 {q8},[r6,:128]
|
||||
vmlal.u32 q10,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3]
|
||||
vmlal.u32 q11,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q12,d28,d1[1]
|
||||
vmlal.u32 q13,d28,d2[0]
|
||||
vmlal.u32 q6,d28,d2[1]
|
||||
vmlal.u32 q7,d28,d3[0]
|
||||
vmlal.u32 q8,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4]
|
||||
vmlal.u32 q9,d29,d4[0]
|
||||
vmlal.u32 q10,d29,d4[1]
|
||||
vmlal.u32 q11,d29,d5[0]
|
||||
vmlal.u32 q12,d29,d5[1]
|
||||
vmlal.u32 q13,d29,d6[0]
|
||||
vmlal.u32 q6,d29,d6[1]
|
||||
vmlal.u32 q7,d29,d7[0]
|
||||
vmlal.u32 q8,d29,d7[1]
|
||||
vst1.64 {q9},[r7,:128]!
|
||||
vmlal.u32 q10,d28,d0[0]
|
||||
vld1.64 {q9},[r6,:128]
|
||||
vmlal.u32 q11,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4]
|
||||
vmlal.u32 q12,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q13,d28,d1[1]
|
||||
vmlal.u32 q6,d28,d2[0]
|
||||
vmlal.u32 q7,d28,d2[1]
|
||||
vmlal.u32 q8,d28,d3[0]
|
||||
vmlal.u32 q9,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5]
|
||||
vmlal.u32 q10,d29,d4[0]
|
||||
vmlal.u32 q11,d29,d4[1]
|
||||
vmlal.u32 q12,d29,d5[0]
|
||||
vmlal.u32 q13,d29,d5[1]
|
||||
vmlal.u32 q6,d29,d6[0]
|
||||
vmlal.u32 q7,d29,d6[1]
|
||||
vmlal.u32 q8,d29,d7[0]
|
||||
vmlal.u32 q9,d29,d7[1]
|
||||
vst1.64 {q10},[r7,:128]!
|
||||
vmlal.u32 q11,d28,d0[0]
|
||||
vld1.64 {q10},[r6,:128]
|
||||
vmlal.u32 q12,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5]
|
||||
vmlal.u32 q13,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q6,d28,d1[1]
|
||||
vmlal.u32 q7,d28,d2[0]
|
||||
vmlal.u32 q8,d28,d2[1]
|
||||
vmlal.u32 q9,d28,d3[0]
|
||||
vmlal.u32 q10,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6]
|
||||
vmlal.u32 q11,d29,d4[0]
|
||||
vmlal.u32 q12,d29,d4[1]
|
||||
vmlal.u32 q13,d29,d5[0]
|
||||
vmlal.u32 q6,d29,d5[1]
|
||||
vmlal.u32 q7,d29,d6[0]
|
||||
vmlal.u32 q8,d29,d6[1]
|
||||
vmlal.u32 q9,d29,d7[0]
|
||||
vmlal.u32 q10,d29,d7[1]
|
||||
vst1.64 {q11},[r7,:128]!
|
||||
vmlal.u32 q12,d28,d0[0]
|
||||
vld1.64 {q11},[r6,:128]
|
||||
vmlal.u32 q13,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6]
|
||||
vmlal.u32 q6,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q7,d28,d1[1]
|
||||
vmlal.u32 q8,d28,d2[0]
|
||||
vmlal.u32 q9,d28,d2[1]
|
||||
vmlal.u32 q10,d28,d3[0]
|
||||
vmlal.u32 q11,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7]
|
||||
vmlal.u32 q12,d29,d4[0]
|
||||
vmlal.u32 q13,d29,d4[1]
|
||||
vmlal.u32 q6,d29,d5[0]
|
||||
vmlal.u32 q7,d29,d5[1]
|
||||
vmlal.u32 q8,d29,d6[0]
|
||||
vmlal.u32 q9,d29,d6[1]
|
||||
vmlal.u32 q10,d29,d7[0]
|
||||
vmlal.u32 q11,d29,d7[1]
|
||||
vst1.64 {q12},[r7,:128]!
|
||||
vmlal.u32 q13,d28,d0[0]
|
||||
vld1.64 {q12},[r6,:128]
|
||||
vmlal.u32 q6,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7]
|
||||
vmlal.u32 q7,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q8,d28,d1[1]
|
||||
vmlal.u32 q9,d28,d2[0]
|
||||
vmlal.u32 q10,d28,d2[1]
|
||||
vmlal.u32 q11,d28,d3[0]
|
||||
vmlal.u32 q12,d28,d3[1]
|
||||
it eq
|
||||
subeq r1,r1,r5,lsl#2 @ rewind
|
||||
vmlal.u32 q13,d29,d4[0]
|
||||
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
|
||||
vmlal.u32 q6,d29,d4[1]
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
vmlal.u32 q7,d29,d5[0]
|
||||
add r10,sp,#8 @ rewind
|
||||
vmlal.u32 q8,d29,d5[1]
|
||||
vmlal.u32 q9,d29,d6[0]
|
||||
vmlal.u32 q10,d29,d6[1]
|
||||
vmlal.u32 q11,d29,d7[0]
|
||||
vst1.64 {q13},[r7,:128]!
|
||||
vmlal.u32 q12,d29,d7[1]
|
||||
|
||||
bne LNEON_8n_inner
|
||||
add r6,sp,#128
|
||||
vst1.64 {q6,q7},[r7,:256]!
|
||||
veor q2,q2,q2 @ d4-d5
|
||||
vst1.64 {q8,q9},[r7,:256]!
|
||||
veor q3,q3,q3 @ d6-d7
|
||||
vst1.64 {q10,q11},[r7,:256]!
|
||||
vst1.64 {q12},[r7,:128]
|
||||
|
||||
subs r9,r9,#8
|
||||
vld1.64 {q6,q7},[r6,:256]!
|
||||
vld1.64 {q8,q9},[r6,:256]!
|
||||
vld1.64 {q10,q11},[r6,:256]!
|
||||
vld1.64 {q12,q13},[r6,:256]!
|
||||
|
||||
itt ne
|
||||
subne r3,r3,r5,lsl#2 @ rewind
|
||||
bne LNEON_8n_outer
|
||||
|
||||
add r7,sp,#128
|
||||
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
|
||||
vshr.u64 d10,d12,#16
|
||||
vst1.64 {q2,q3},[sp,:256]!
|
||||
vadd.u64 d13,d13,d10
|
||||
vst1.64 {q2,q3}, [sp,:256]!
|
||||
vshr.u64 d10,d13,#16
|
||||
vst1.64 {q2,q3}, [sp,:256]!
|
||||
vzip.16 d12,d13
|
||||
|
||||
mov r8,r5
|
||||
b LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
LNEON_tail:
|
||||
vadd.u64 d12,d12,d10
|
||||
vshr.u64 d10,d12,#16
|
||||
vld1.64 {q8,q9}, [r6, :256]!
|
||||
vadd.u64 d13,d13,d10
|
||||
vld1.64 {q10,q11}, [r6, :256]!
|
||||
vshr.u64 d10,d13,#16
|
||||
vld1.64 {q12,q13}, [r6, :256]!
|
||||
vzip.16 d12,d13
|
||||
|
||||
LNEON_tail_entry:
|
||||
vadd.u64 d14,d14,d10
|
||||
vst1.32 {d12[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d14,#16
|
||||
vadd.u64 d15,d15,d10
|
||||
vshr.u64 d10,d15,#16
|
||||
vzip.16 d14,d15
|
||||
vadd.u64 d16,d16,d10
|
||||
vst1.32 {d14[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d16,#16
|
||||
vadd.u64 d17,d17,d10
|
||||
vshr.u64 d10,d17,#16
|
||||
vzip.16 d16,d17
|
||||
vadd.u64 d18,d18,d10
|
||||
vst1.32 {d16[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d18,#16
|
||||
vadd.u64 d19,d19,d10
|
||||
vshr.u64 d10,d19,#16
|
||||
vzip.16 d18,d19
|
||||
vadd.u64 d20,d20,d10
|
||||
vst1.32 {d18[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d20,#16
|
||||
vadd.u64 d21,d21,d10
|
||||
vshr.u64 d10,d21,#16
|
||||
vzip.16 d20,d21
|
||||
vadd.u64 d22,d22,d10
|
||||
vst1.32 {d20[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d22,#16
|
||||
vadd.u64 d23,d23,d10
|
||||
vshr.u64 d10,d23,#16
|
||||
vzip.16 d22,d23
|
||||
vadd.u64 d24,d24,d10
|
||||
vst1.32 {d22[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d24,#16
|
||||
vadd.u64 d25,d25,d10
|
||||
vshr.u64 d10,d25,#16
|
||||
vzip.16 d24,d25
|
||||
vadd.u64 d26,d26,d10
|
||||
vst1.32 {d24[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d26,#16
|
||||
vadd.u64 d27,d27,d10
|
||||
vshr.u64 d10,d27,#16
|
||||
vzip.16 d26,d27
|
||||
vld1.64 {q6,q7}, [r6, :256]!
|
||||
subs r8,r8,#8
|
||||
vst1.32 {d26[0]}, [r7, :32]!
|
||||
bne LNEON_tail
|
||||
|
||||
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
|
||||
sub r3,r3,r5,lsl#2 @ rewind r3
|
||||
subs r1,sp,#0 @ clear carry flag
|
||||
add r2,sp,r5,lsl#2
|
||||
|
||||
LNEON_sub:
|
||||
ldmia r1!, {r4,r5,r6,r7}
|
||||
ldmia r3!, {r8,r9,r10,r11}
|
||||
sbcs r8, r4,r8
|
||||
sbcs r9, r5,r9
|
||||
sbcs r10,r6,r10
|
||||
sbcs r11,r7,r11
|
||||
teq r1,r2 @ preserves carry
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
bne LNEON_sub
|
||||
|
||||
ldr r10, [r1] @ load top-most bit
|
||||
mov r11,sp
|
||||
veor q0,q0,q0
|
||||
sub r11,r2,r11 @ this is num*4
|
||||
veor q1,q1,q1
|
||||
mov r1,sp
|
||||
sub r0,r0,r11 @ rewind r0
|
||||
mov r3,r2 @ second 3/4th of frame
|
||||
sbcs r10,r10,#0 @ result is carry flag
|
||||
|
||||
LNEON_copy_n_zap:
|
||||
ldmia r1!, {r4,r5,r6,r7}
|
||||
ldmia r0, {r8,r9,r10,r11}
|
||||
it cc
|
||||
movcc r8, r4
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
itt cc
|
||||
movcc r9, r5
|
||||
movcc r10,r6
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
it cc
|
||||
movcc r11,r7
|
||||
ldmia r1, {r4,r5,r6,r7}
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
sub r1,r1,#16
|
||||
ldmia r0, {r8,r9,r10,r11}
|
||||
it cc
|
||||
movcc r8, r4
|
||||
vst1.64 {q0,q1}, [r1,:256]! @ wipe
|
||||
itt cc
|
||||
movcc r9, r5
|
||||
movcc r10,r6
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
it cc
|
||||
movcc r11,r7
|
||||
teq r1,r2 @ preserves carry
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
bne LNEON_copy_n_zap
|
||||
|
||||
mov sp,ip
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
bx lr @ bx lr
|
||||
|
||||
#endif
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.comm _OPENSSL_armcap_P,4
|
||||
.non_lazy_symbol_pointer
|
||||
OPENSSL_armcap_P:
|
||||
.indirect_symbol _OPENSSL_armcap_P
|
||||
.long 0
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,600 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
|
||||
@ instructions are in aesv8-armx.pl.)
|
||||
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__) || defined(__clang__)
|
||||
.syntax unified
|
||||
#define ldrplb ldrbpl
|
||||
#define ldrneb ldrbne
|
||||
#endif
|
||||
#if defined(__thumb2__)
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
|
||||
.align 5
|
||||
rem_4bit:
|
||||
.short 0x0000,0x1C20,0x3840,0x2460
|
||||
.short 0x7080,0x6CA0,0x48C0,0x54E0
|
||||
.short 0xE100,0xFD20,0xD940,0xC560
|
||||
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
|
||||
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func rem_4bit_get
|
||||
#endif
|
||||
rem_4bit_get:
|
||||
#if defined(__thumb2__)
|
||||
adr r2,rem_4bit
|
||||
#else
|
||||
sub r2,pc,#8+32 @ &rem_4bit
|
||||
#endif
|
||||
b Lrem_4bit_got
|
||||
nop
|
||||
nop
|
||||
|
||||
|
||||
.globl _gcm_ghash_4bit
|
||||
.private_extern _gcm_ghash_4bit
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_ghash_4bit
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_ghash_4bit:
|
||||
#if defined(__thumb2__)
|
||||
adr r12,rem_4bit
|
||||
#else
|
||||
sub r12,pc,#8+48 @ &rem_4bit
|
||||
#endif
|
||||
add r3,r2,r3 @ r3 to point at the end
|
||||
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
|
||||
|
||||
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
|
||||
|
||||
ldrb r12,[r2,#15]
|
||||
ldrb r14,[r0,#15]
|
||||
Louter:
|
||||
eor r12,r12,r14
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
mov r3,#14
|
||||
|
||||
add r7,r1,r12,lsl#4
|
||||
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
|
||||
add r11,r1,r14
|
||||
ldrb r12,[r2,#14]
|
||||
|
||||
and r14,r4,#0xf @ rem
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
add r14,r14,r14
|
||||
eor r4,r8,r4,lsr#4
|
||||
ldrh r8,[sp,r14] @ rem_4bit[rem]
|
||||
eor r4,r4,r5,lsl#28
|
||||
ldrb r14,[r0,#14]
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
eor r12,r12,r14
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
eor r7,r7,r8,lsl#16
|
||||
|
||||
Linner:
|
||||
add r11,r1,r12,lsl#4
|
||||
and r12,r4,#0xf @ rem
|
||||
subs r3,r3,#1
|
||||
add r12,r12,r12
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
ldrh r8,[sp,r12] @ rem_4bit[rem]
|
||||
eor r6,r10,r6,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r12,[r2,r3]
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
add r14,r14,r14
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
eor r4,r8,r4,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r8,[r0,r3]
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
ldrh r9,[sp,r14]
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
eorpl r12,r12,r8
|
||||
eor r7,r11,r7,lsr#4
|
||||
#ifdef __thumb2__
|
||||
itt pl
|
||||
#endif
|
||||
andpl r14,r12,#0xf0
|
||||
andpl r12,r12,#0x0f
|
||||
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
|
||||
bpl Linner
|
||||
|
||||
ldr r3,[sp,#32] @ re-load r3/end
|
||||
add r2,r2,#16
|
||||
mov r14,r4
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r4,r4
|
||||
str r4,[r0,#12]
|
||||
#elif defined(__ARMEB__)
|
||||
str r4,[r0,#12]
|
||||
#else
|
||||
mov r9,r4,lsr#8
|
||||
strb r4,[r0,#12+3]
|
||||
mov r10,r4,lsr#16
|
||||
strb r9,[r0,#12+2]
|
||||
mov r11,r4,lsr#24
|
||||
strb r10,[r0,#12+1]
|
||||
strb r11,[r0,#12]
|
||||
#endif
|
||||
cmp r2,r3
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r5,r5
|
||||
str r5,[r0,#8]
|
||||
#elif defined(__ARMEB__)
|
||||
str r5,[r0,#8]
|
||||
#else
|
||||
mov r9,r5,lsr#8
|
||||
strb r5,[r0,#8+3]
|
||||
mov r10,r5,lsr#16
|
||||
strb r9,[r0,#8+2]
|
||||
mov r11,r5,lsr#24
|
||||
strb r10,[r0,#8+1]
|
||||
strb r11,[r0,#8]
|
||||
#endif
|
||||
|
||||
#ifdef __thumb2__
|
||||
it ne
|
||||
#endif
|
||||
ldrneb r12,[r2,#15]
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r6,r6
|
||||
str r6,[r0,#4]
|
||||
#elif defined(__ARMEB__)
|
||||
str r6,[r0,#4]
|
||||
#else
|
||||
mov r9,r6,lsr#8
|
||||
strb r6,[r0,#4+3]
|
||||
mov r10,r6,lsr#16
|
||||
strb r9,[r0,#4+2]
|
||||
mov r11,r6,lsr#24
|
||||
strb r10,[r0,#4+1]
|
||||
strb r11,[r0,#4]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r7,r7
|
||||
str r7,[r0,#0]
|
||||
#elif defined(__ARMEB__)
|
||||
str r7,[r0,#0]
|
||||
#else
|
||||
mov r9,r7,lsr#8
|
||||
strb r7,[r0,#0+3]
|
||||
mov r10,r7,lsr#16
|
||||
strb r9,[r0,#0+2]
|
||||
mov r11,r7,lsr#24
|
||||
strb r10,[r0,#0+1]
|
||||
strb r11,[r0,#0]
|
||||
#endif
|
||||
|
||||
bne Louter
|
||||
|
||||
add sp,sp,#36
|
||||
#if __ARM_ARCH__>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
|
||||
|
||||
.globl _gcm_gmult_4bit
|
||||
.private_extern _gcm_gmult_4bit
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_gmult_4bit
|
||||
#endif
|
||||
_gcm_gmult_4bit:
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
ldrb r12,[r0,#15]
|
||||
b rem_4bit_get
|
||||
Lrem_4bit_got:
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
mov r3,#14
|
||||
|
||||
add r7,r1,r12,lsl#4
|
||||
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
|
||||
ldrb r12,[r0,#14]
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
add r14,r14,r14
|
||||
eor r4,r8,r4,lsr#4
|
||||
ldrh r8,[r2,r14] @ rem_4bit[rem]
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
and r14,r12,#0xf0
|
||||
eor r7,r7,r8,lsl#16
|
||||
and r12,r12,#0x0f
|
||||
|
||||
Loop:
|
||||
add r11,r1,r12,lsl#4
|
||||
and r12,r4,#0xf @ rem
|
||||
subs r3,r3,#1
|
||||
add r12,r12,r12
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
ldrh r8,[r2,r12] @ rem_4bit[rem]
|
||||
eor r6,r10,r6,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r12,[r0,r3]
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
add r14,r14,r14
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
ldrh r8,[r2,r14] @ rem_4bit[rem]
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
#ifdef __thumb2__
|
||||
itt pl
|
||||
#endif
|
||||
andpl r14,r12,#0xf0
|
||||
andpl r12,r12,#0x0f
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
bpl Loop
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r4,r4
|
||||
str r4,[r0,#12]
|
||||
#elif defined(__ARMEB__)
|
||||
str r4,[r0,#12]
|
||||
#else
|
||||
mov r9,r4,lsr#8
|
||||
strb r4,[r0,#12+3]
|
||||
mov r10,r4,lsr#16
|
||||
strb r9,[r0,#12+2]
|
||||
mov r11,r4,lsr#24
|
||||
strb r10,[r0,#12+1]
|
||||
strb r11,[r0,#12]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r5,r5
|
||||
str r5,[r0,#8]
|
||||
#elif defined(__ARMEB__)
|
||||
str r5,[r0,#8]
|
||||
#else
|
||||
mov r9,r5,lsr#8
|
||||
strb r5,[r0,#8+3]
|
||||
mov r10,r5,lsr#16
|
||||
strb r9,[r0,#8+2]
|
||||
mov r11,r5,lsr#24
|
||||
strb r10,[r0,#8+1]
|
||||
strb r11,[r0,#8]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r6,r6
|
||||
str r6,[r0,#4]
|
||||
#elif defined(__ARMEB__)
|
||||
str r6,[r0,#4]
|
||||
#else
|
||||
mov r9,r6,lsr#8
|
||||
strb r6,[r0,#4+3]
|
||||
mov r10,r6,lsr#16
|
||||
strb r9,[r0,#4+2]
|
||||
mov r11,r6,lsr#24
|
||||
strb r10,[r0,#4+1]
|
||||
strb r11,[r0,#4]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r7,r7
|
||||
str r7,[r0,#0]
|
||||
#elif defined(__ARMEB__)
|
||||
str r7,[r0,#0]
|
||||
#else
|
||||
mov r9,r7,lsr#8
|
||||
strb r7,[r0,#0+3]
|
||||
mov r10,r7,lsr#16
|
||||
strb r9,[r0,#0+2]
|
||||
mov r11,r7,lsr#24
|
||||
strb r10,[r0,#0+1]
|
||||
strb r11,[r0,#0]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
|
||||
|
||||
|
||||
.globl _gcm_init_neon
|
||||
.private_extern _gcm_init_neon
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_init_neon
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_init_neon:
|
||||
vld1.64 d7,[r1]! @ load H
|
||||
vmov.i8 q8,#0xe1
|
||||
vld1.64 d6,[r1]
|
||||
vshl.i64 d17,#57
|
||||
vshr.u64 d16,#63 @ t0=0xc2....01
|
||||
vdup.8 q9,d7[7]
|
||||
vshr.u64 d26,d6,#63
|
||||
vshr.s8 q9,#7 @ broadcast carry bit
|
||||
vshl.i64 q3,q3,#1
|
||||
vand q8,q8,q9
|
||||
vorr d7,d26 @ H<<<=1
|
||||
veor q3,q3,q8 @ twisted H
|
||||
vstmia r0,{q3}
|
||||
|
||||
bx lr @ bx lr
|
||||
|
||||
|
||||
.globl _gcm_gmult_neon
|
||||
.private_extern _gcm_gmult_neon
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_gmult_neon
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_gmult_neon:
|
||||
vld1.64 d7,[r0]! @ load Xi
|
||||
vld1.64 d6,[r0]!
|
||||
vmov.i64 d29,#0x0000ffffffffffff
|
||||
vldmia r1,{d26,d27} @ load twisted H
|
||||
vmov.i64 d30,#0x00000000ffffffff
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q3,q3
|
||||
#endif
|
||||
vmov.i64 d31,#0x000000000000ffff
|
||||
veor d28,d26,d27 @ Karatsuba pre-processing
|
||||
mov r3,#16
|
||||
b Lgmult_neon
|
||||
|
||||
|
||||
.globl _gcm_ghash_neon
|
||||
.private_extern _gcm_ghash_neon
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_ghash_neon
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_ghash_neon:
|
||||
vld1.64 d1,[r0]! @ load Xi
|
||||
vld1.64 d0,[r0]!
|
||||
vmov.i64 d29,#0x0000ffffffffffff
|
||||
vldmia r1,{d26,d27} @ load twisted H
|
||||
vmov.i64 d30,#0x00000000ffffffff
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vmov.i64 d31,#0x000000000000ffff
|
||||
veor d28,d26,d27 @ Karatsuba pre-processing
|
||||
|
||||
Loop_neon:
|
||||
vld1.64 d7,[r2]! @ load inp
|
||||
vld1.64 d6,[r2]!
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q3,q3
|
||||
#endif
|
||||
veor q3,q0 @ inp^=Xi
|
||||
Lgmult_neon:
|
||||
vext.8 d16, d26, d26, #1 @ A1
|
||||
vmull.p8 q8, d16, d6 @ F = A1*B
|
||||
vext.8 d0, d6, d6, #1 @ B1
|
||||
vmull.p8 q0, d26, d0 @ E = A*B1
|
||||
vext.8 d18, d26, d26, #2 @ A2
|
||||
vmull.p8 q9, d18, d6 @ H = A2*B
|
||||
vext.8 d22, d6, d6, #2 @ B2
|
||||
vmull.p8 q11, d26, d22 @ G = A*B2
|
||||
vext.8 d20, d26, d26, #3 @ A3
|
||||
veor q8, q8, q0 @ L = E + F
|
||||
vmull.p8 q10, d20, d6 @ J = A3*B
|
||||
vext.8 d0, d6, d6, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q0, d26, d0 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d6, d6, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d26, d22 @ K = A*B4
|
||||
veor q10, q10, q0 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q0, d26, d6 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q0, q0, q8
|
||||
veor q0, q0, q10
|
||||
veor d6,d6,d7 @ Karatsuba pre-processing
|
||||
vext.8 d16, d28, d28, #1 @ A1
|
||||
vmull.p8 q8, d16, d6 @ F = A1*B
|
||||
vext.8 d2, d6, d6, #1 @ B1
|
||||
vmull.p8 q1, d28, d2 @ E = A*B1
|
||||
vext.8 d18, d28, d28, #2 @ A2
|
||||
vmull.p8 q9, d18, d6 @ H = A2*B
|
||||
vext.8 d22, d6, d6, #2 @ B2
|
||||
vmull.p8 q11, d28, d22 @ G = A*B2
|
||||
vext.8 d20, d28, d28, #3 @ A3
|
||||
veor q8, q8, q1 @ L = E + F
|
||||
vmull.p8 q10, d20, d6 @ J = A3*B
|
||||
vext.8 d2, d6, d6, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q1, d28, d2 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d6, d6, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d28, d22 @ K = A*B4
|
||||
veor q10, q10, q1 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q1, d28, d6 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q1, q1, q8
|
||||
veor q1, q1, q10
|
||||
vext.8 d16, d27, d27, #1 @ A1
|
||||
vmull.p8 q8, d16, d7 @ F = A1*B
|
||||
vext.8 d4, d7, d7, #1 @ B1
|
||||
vmull.p8 q2, d27, d4 @ E = A*B1
|
||||
vext.8 d18, d27, d27, #2 @ A2
|
||||
vmull.p8 q9, d18, d7 @ H = A2*B
|
||||
vext.8 d22, d7, d7, #2 @ B2
|
||||
vmull.p8 q11, d27, d22 @ G = A*B2
|
||||
vext.8 d20, d27, d27, #3 @ A3
|
||||
veor q8, q8, q2 @ L = E + F
|
||||
vmull.p8 q10, d20, d7 @ J = A3*B
|
||||
vext.8 d4, d7, d7, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q2, d27, d4 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d7, d7, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d27, d22 @ K = A*B4
|
||||
veor q10, q10, q2 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q2, d27, d7 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q2, q2, q8
|
||||
veor q2, q2, q10
|
||||
veor q1,q1,q0 @ Karatsuba post-processing
|
||||
veor q1,q1,q2
|
||||
veor d1,d1,d2
|
||||
veor d4,d4,d3 @ Xh|Xl - 256-bit result
|
||||
|
||||
@ equivalent of reduction_avx from ghash-x86_64.pl
|
||||
vshl.i64 q9,q0,#57 @ 1st phase
|
||||
vshl.i64 q10,q0,#62
|
||||
veor q10,q10,q9 @
|
||||
vshl.i64 q9,q0,#63
|
||||
veor q10, q10, q9 @
|
||||
veor d1,d1,d20 @
|
||||
veor d4,d4,d21
|
||||
|
||||
vshr.u64 q10,q0,#1 @ 2nd phase
|
||||
veor q2,q2,q0
|
||||
veor q0,q0,q10 @
|
||||
vshr.u64 q10,q10,#6
|
||||
vshr.u64 q0,q0,#1 @
|
||||
veor q0,q0,q2 @
|
||||
veor q0,q0,q10 @
|
||||
|
||||
subs r3,#16
|
||||
bne Loop_neon
|
||||
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
sub r0,#16
|
||||
vst1.64 d1,[r0]! @ write out Xi
|
||||
vst1.64 d0,[r0]
|
||||
|
||||
bx lr @ bx lr
|
||||
|
||||
#endif
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
|
@ -0,0 +1,256 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.code 32
|
||||
#undef __thumb2__
|
||||
.globl _gcm_init_v8
|
||||
.private_extern _gcm_init_v8
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_init_v8
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_init_v8:
|
||||
vld1.64 {q9},[r1] @ load input H
|
||||
vmov.i8 q11,#0xe1
|
||||
vshl.i64 q11,q11,#57 @ 0xc2.0
|
||||
vext.8 q3,q9,q9,#8
|
||||
vshr.u64 q10,q11,#63
|
||||
vdup.32 q9,d18[1]
|
||||
vext.8 q8,q10,q11,#8 @ t0=0xc2....01
|
||||
vshr.u64 q10,q3,#63
|
||||
vshr.s32 q9,q9,#31 @ broadcast carry bit
|
||||
vand q10,q10,q8
|
||||
vshl.i64 q3,q3,#1
|
||||
vext.8 q10,q10,q10,#8
|
||||
vand q8,q8,q9
|
||||
vorr q3,q3,q10 @ H<<<=1
|
||||
veor q12,q3,q8 @ twisted H
|
||||
vst1.64 {q12},[r0]! @ store Htable[0]
|
||||
|
||||
@ calculate H^2
|
||||
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
|
||||
.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12
|
||||
veor q8,q8,q12
|
||||
.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12
|
||||
.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q14,q0,q10
|
||||
|
||||
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
|
||||
veor q9,q9,q14
|
||||
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
|
||||
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
|
||||
|
||||
bx lr
|
||||
|
||||
.globl _gcm_gmult_v8
|
||||
.private_extern _gcm_gmult_v8
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_gmult_v8
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_gmult_v8:
|
||||
vld1.64 {q9},[r0] @ load Xi
|
||||
vmov.i8 q11,#0xe1
|
||||
vld1.64 {q12,q13},[r1] @ load twisted H, ...
|
||||
vshl.u64 q11,q11,#57
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vext.8 q3,q9,q9,#8
|
||||
|
||||
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
|
||||
veor q9,q9,q3 @ Karatsuba pre-processing
|
||||
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
|
||||
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q0,q0,q10
|
||||
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q0,q0,q0,#8
|
||||
vst1.64 {q0},[r0] @ write out Xi
|
||||
|
||||
bx lr
|
||||
|
||||
.globl _gcm_ghash_v8
|
||||
.private_extern _gcm_ghash_v8
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_ghash_v8
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_ghash_v8:
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
vld1.64 {q0},[r0] @ load [rotated] Xi
|
||||
@ "[rotated]" means that
|
||||
@ loaded value would have
|
||||
@ to be rotated in order to
|
||||
@ make it appear as in
|
||||
@ algorithm specification
|
||||
subs r3,r3,#32 @ see if r3 is 32 or larger
|
||||
mov r12,#16 @ r12 is used as post-
|
||||
@ increment for input pointer;
|
||||
@ as loop is modulo-scheduled
|
||||
@ r12 is zeroed just in time
|
||||
@ to preclude overstepping
|
||||
@ inp[len], which means that
|
||||
@ last block[s] are actually
|
||||
@ loaded twice, but last
|
||||
@ copy is not processed
|
||||
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
|
||||
vmov.i8 q11,#0xe1
|
||||
vld1.64 {q14},[r1]
|
||||
moveq r12,#0 @ is it time to zero r12?
|
||||
vext.8 q0,q0,q0,#8 @ rotate Xi
|
||||
vld1.64 {q8},[r2]! @ load [rotated] I[0]
|
||||
vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q8,q8
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q3,q8,q8,#8 @ rotate I[0]
|
||||
blo Lodd_tail_v8 @ r3 was less than 32
|
||||
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vext.8 q7,q9,q9,#8
|
||||
veor q3,q3,q0 @ I[i]^=Xi
|
||||
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
|
||||
veor q9,q9,q7 @ Karatsuba pre-processing
|
||||
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
|
||||
b Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
Loop_mod2x_v8:
|
||||
vext.8 q10,q3,q3,#8
|
||||
subs r3,r3,#32 @ is there more data?
|
||||
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
|
||||
movlo r12,#0 @ is it time to zero r12?
|
||||
|
||||
.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9
|
||||
veor q10,q10,q3 @ Karatsuba pre-processing
|
||||
.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
|
||||
veor q0,q0,q4 @ accumulate
|
||||
.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
|
||||
|
||||
veor q2,q2,q6
|
||||
moveq r12,#0 @ is it time to zero r12?
|
||||
veor q1,q1,q5
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3]
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q8,q8
|
||||
#endif
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
vext.8 q7,q9,q9,#8
|
||||
vext.8 q3,q8,q8,#8
|
||||
veor q0,q1,q10
|
||||
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
|
||||
veor q3,q3,q2 @ accumulate q3 early
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q3,q3,q10
|
||||
veor q9,q9,q7 @ Karatsuba pre-processing
|
||||
veor q3,q3,q0
|
||||
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
|
||||
bhs Loop_mod2x_v8 @ there was at least 32 more bytes
|
||||
|
||||
veor q2,q2,q10
|
||||
vext.8 q3,q8,q8,#8 @ re-construct q3
|
||||
adds r3,r3,#32 @ re-construct r3
|
||||
veor q0,q0,q2 @ re-construct q0
|
||||
beq Ldone_v8 @ is r3 zero?
|
||||
Lodd_tail_v8:
|
||||
vext.8 q10,q0,q0,#8
|
||||
veor q3,q3,q0 @ inp^=Xi
|
||||
veor q9,q8,q10 @ q9 is rotated inp^Xi
|
||||
|
||||
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
|
||||
veor q9,q9,q3 @ Karatsuba pre-processing
|
||||
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
|
||||
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q0,q0,q10
|
||||
|
||||
Ldone_v8:
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q0,q0,q0,#8
|
||||
vst1.64 {q0},[r0] @ write out Xi
|
||||
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
bx lr
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
1518
packager/third_party/boringssl/ios-arm/crypto/fipsmodule/sha1-armv4-large.S
vendored
Normal file
1518
packager/third_party/boringssl/ios-arm/crypto/fipsmodule/sha1-armv4-large.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,377 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.syntax unified
|
||||
|
||||
|
||||
|
||||
|
||||
.text
|
||||
|
||||
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
@ with |argv|, then saves the callee-saved registers into |state|. It returns
|
||||
@ the result of |func|. The |unwind| argument is unused.
|
||||
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
||||
@ const uint32_t *argv, size_t argc,
|
||||
@ int unwind);
|
||||
|
||||
.globl _abi_test_trampoline
|
||||
.private_extern _abi_test_trampoline
|
||||
.align 4
|
||||
_abi_test_trampoline:
|
||||
Labi_test_trampoline_begin:
|
||||
@ Save parameters and all callee-saved registers. For convenience, we
|
||||
@ save r9 on iOS even though it's volatile.
|
||||
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
|
||||
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
|
||||
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
|
||||
sub sp, sp, #28
|
||||
|
||||
@ Every register in AAPCS is either non-volatile or a parameter (except
|
||||
@ r9 on iOS), so this code, by the actual call, loses all its scratch
|
||||
@ registers. First fill in stack parameters while there are registers
|
||||
@ to spare.
|
||||
cmp r3, #4
|
||||
bls Lstack_args_done
|
||||
mov r4, sp @ r4 is the output pointer.
|
||||
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
|
||||
add r2, r2, #16 @ Skip four arguments.
|
||||
Lstack_args_loop:
|
||||
ldr r6, [r2], #4
|
||||
cmp r2, r5
|
||||
str r6, [r4], #4
|
||||
bne Lstack_args_loop
|
||||
|
||||
Lstack_args_done:
|
||||
@ Load registers from |r1|.
|
||||
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Load register parameters. This uses up our remaining registers, so we
|
||||
@ repurpose lr as scratch space.
|
||||
ldr r3, [sp, #40] @ Reload argc.
|
||||
ldr lr, [sp, #36] @ Load argv into lr.
|
||||
cmp r3, #3
|
||||
bhi Larg_r3
|
||||
beq Larg_r2
|
||||
cmp r3, #1
|
||||
bhi Larg_r1
|
||||
beq Larg_r0
|
||||
b Largs_done
|
||||
|
||||
Larg_r3:
|
||||
ldr r3, [lr, #12] @ argv[3]
|
||||
Larg_r2:
|
||||
ldr r2, [lr, #8] @ argv[2]
|
||||
Larg_r1:
|
||||
ldr r1, [lr, #4] @ argv[1]
|
||||
Larg_r0:
|
||||
ldr r0, [lr] @ argv[0]
|
||||
Largs_done:
|
||||
|
||||
@ With every other register in use, load the function pointer into lr
|
||||
@ and call the function.
|
||||
ldr lr, [sp, #28]
|
||||
blx lr
|
||||
|
||||
@ r1-r3 are free for use again. The trampoline only supports
|
||||
@ single-return functions. Pass r4-r11 to the caller.
|
||||
ldr r1, [sp, #32]
|
||||
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Unwind the stack and restore registers.
|
||||
add sp, sp, #44 @ 44 = 28+16
|
||||
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
|
||||
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r0
|
||||
.private_extern _abi_test_clobber_r0
|
||||
.align 4
|
||||
_abi_test_clobber_r0:
|
||||
mov r0, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r1
|
||||
.private_extern _abi_test_clobber_r1
|
||||
.align 4
|
||||
_abi_test_clobber_r1:
|
||||
mov r1, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r2
|
||||
.private_extern _abi_test_clobber_r2
|
||||
.align 4
|
||||
_abi_test_clobber_r2:
|
||||
mov r2, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r3
|
||||
.private_extern _abi_test_clobber_r3
|
||||
.align 4
|
||||
_abi_test_clobber_r3:
|
||||
mov r3, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r4
|
||||
.private_extern _abi_test_clobber_r4
|
||||
.align 4
|
||||
_abi_test_clobber_r4:
|
||||
mov r4, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r5
|
||||
.private_extern _abi_test_clobber_r5
|
||||
.align 4
|
||||
_abi_test_clobber_r5:
|
||||
mov r5, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r6
|
||||
.private_extern _abi_test_clobber_r6
|
||||
.align 4
|
||||
_abi_test_clobber_r6:
|
||||
mov r6, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r7
|
||||
.private_extern _abi_test_clobber_r7
|
||||
.align 4
|
||||
_abi_test_clobber_r7:
|
||||
mov r7, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r8
|
||||
.private_extern _abi_test_clobber_r8
|
||||
.align 4
|
||||
_abi_test_clobber_r8:
|
||||
mov r8, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r9
|
||||
.private_extern _abi_test_clobber_r9
|
||||
.align 4
|
||||
_abi_test_clobber_r9:
|
||||
mov r9, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r10
|
||||
.private_extern _abi_test_clobber_r10
|
||||
.align 4
|
||||
_abi_test_clobber_r10:
|
||||
mov r10, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r11
|
||||
.private_extern _abi_test_clobber_r11
|
||||
.align 4
|
||||
_abi_test_clobber_r11:
|
||||
mov r11, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r12
|
||||
.private_extern _abi_test_clobber_r12
|
||||
.align 4
|
||||
_abi_test_clobber_r12:
|
||||
mov r12, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d0
|
||||
.private_extern _abi_test_clobber_d0
|
||||
.align 4
|
||||
_abi_test_clobber_d0:
|
||||
mov r0, #0
|
||||
vmov s0, r0
|
||||
vmov s1, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d1
|
||||
.private_extern _abi_test_clobber_d1
|
||||
.align 4
|
||||
_abi_test_clobber_d1:
|
||||
mov r0, #0
|
||||
vmov s2, r0
|
||||
vmov s3, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d2
|
||||
.private_extern _abi_test_clobber_d2
|
||||
.align 4
|
||||
_abi_test_clobber_d2:
|
||||
mov r0, #0
|
||||
vmov s4, r0
|
||||
vmov s5, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d3
|
||||
.private_extern _abi_test_clobber_d3
|
||||
.align 4
|
||||
_abi_test_clobber_d3:
|
||||
mov r0, #0
|
||||
vmov s6, r0
|
||||
vmov s7, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d4
|
||||
.private_extern _abi_test_clobber_d4
|
||||
.align 4
|
||||
_abi_test_clobber_d4:
|
||||
mov r0, #0
|
||||
vmov s8, r0
|
||||
vmov s9, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d5
|
||||
.private_extern _abi_test_clobber_d5
|
||||
.align 4
|
||||
_abi_test_clobber_d5:
|
||||
mov r0, #0
|
||||
vmov s10, r0
|
||||
vmov s11, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d6
|
||||
.private_extern _abi_test_clobber_d6
|
||||
.align 4
|
||||
_abi_test_clobber_d6:
|
||||
mov r0, #0
|
||||
vmov s12, r0
|
||||
vmov s13, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d7
|
||||
.private_extern _abi_test_clobber_d7
|
||||
.align 4
|
||||
_abi_test_clobber_d7:
|
||||
mov r0, #0
|
||||
vmov s14, r0
|
||||
vmov s15, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d8
|
||||
.private_extern _abi_test_clobber_d8
|
||||
.align 4
|
||||
_abi_test_clobber_d8:
|
||||
mov r0, #0
|
||||
vmov s16, r0
|
||||
vmov s17, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d9
|
||||
.private_extern _abi_test_clobber_d9
|
||||
.align 4
|
||||
_abi_test_clobber_d9:
|
||||
mov r0, #0
|
||||
vmov s18, r0
|
||||
vmov s19, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d10
|
||||
.private_extern _abi_test_clobber_d10
|
||||
.align 4
|
||||
_abi_test_clobber_d10:
|
||||
mov r0, #0
|
||||
vmov s20, r0
|
||||
vmov s21, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d11
|
||||
.private_extern _abi_test_clobber_d11
|
||||
.align 4
|
||||
_abi_test_clobber_d11:
|
||||
mov r0, #0
|
||||
vmov s22, r0
|
||||
vmov s23, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d12
|
||||
.private_extern _abi_test_clobber_d12
|
||||
.align 4
|
||||
_abi_test_clobber_d12:
|
||||
mov r0, #0
|
||||
vmov s24, r0
|
||||
vmov s25, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d13
|
||||
.private_extern _abi_test_clobber_d13
|
||||
.align 4
|
||||
_abi_test_clobber_d13:
|
||||
mov r0, #0
|
||||
vmov s26, r0
|
||||
vmov s27, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d14
|
||||
.private_extern _abi_test_clobber_d14
|
||||
.align 4
|
||||
_abi_test_clobber_d14:
|
||||
mov r0, #0
|
||||
vmov s28, r0
|
||||
vmov s29, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d15
|
||||
.private_extern _abi_test_clobber_d15
|
||||
.align 4
|
||||
_abi_test_clobber_d15:
|
||||
mov r0, #0
|
||||
vmov s30, r0
|
||||
vmov s31, r0
|
||||
bx lr
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
|
@ -1,39 +1,48 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
|
||||
.section .rodata
|
||||
|
||||
.align 5
|
||||
.Lsigma:
|
||||
.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral
|
||||
.Lone:
|
||||
.long 1,0,0,0
|
||||
.LOPENSSL_armcap_P:
|
||||
#ifdef __ILP32__
|
||||
.long OPENSSL_armcap_P-.
|
||||
#else
|
||||
.quad OPENSSL_armcap_P-.
|
||||
#endif
|
||||
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
|
||||
.text
|
||||
|
||||
.globl ChaCha20_ctr32
|
||||
.hidden ChaCha20_ctr32
|
||||
.type ChaCha20_ctr32,%function
|
||||
.align 5
|
||||
ChaCha20_ctr32:
|
||||
cbz x2,.Labort
|
||||
adr x5,.LOPENSSL_armcap_P
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x5,:pg_hi21_nc:OPENSSL_armcap_P
|
||||
#else
|
||||
adrp x5,OPENSSL_armcap_P
|
||||
#endif
|
||||
cmp x2,#192
|
||||
b.lo .Lshort
|
||||
#ifdef __ILP32__
|
||||
ldrsw x6,[x5]
|
||||
#else
|
||||
ldr x6,[x5]
|
||||
#endif
|
||||
ldr w17,[x6,x5]
|
||||
ldr w17,[x5,:lo12:OPENSSL_armcap_P]
|
||||
tst w17,#ARMV7_NEON
|
||||
b.ne ChaCha20_neon
|
||||
|
||||
|
@ -41,7 +50,8 @@ ChaCha20_ctr32:
|
|||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
adr x5,.Lsigma
|
||||
adrp x5,.Lsigma
|
||||
add x5,x5,:lo12:.Lsigma
|
||||
stp x19,x20,[sp,#16]
|
||||
stp x21,x22,[sp,#32]
|
||||
stp x23,x24,[sp,#48]
|
||||
|
@ -314,7 +324,8 @@ ChaCha20_neon:
|
|||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
adr x5,.Lsigma
|
||||
adrp x5,.Lsigma
|
||||
add x5,x5,:lo12:.Lsigma
|
||||
stp x19,x20,[sp,#16]
|
||||
stp x21,x22,[sp,#32]
|
||||
stp x23,x24,[sp,#48]
|
||||
|
@ -807,7 +818,8 @@ ChaCha20_512_neon:
|
|||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
adr x5,.Lsigma
|
||||
adrp x5,.Lsigma
|
||||
add x5,x5,:lo12:.Lsigma
|
||||
stp x19,x20,[sp,#16]
|
||||
stp x21,x22,[sp,#32]
|
||||
stp x23,x24,[sp,#48]
|
||||
|
@ -1969,3 +1981,5 @@ ChaCha20_512_neon:
|
|||
ret
|
||||
.size ChaCha20_512_neon,.-ChaCha20_512_neon
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
|
@ -1,17 +1,32 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
#if !defined(__clang__) || defined(BORINGSSL_CLANG_SUPPORTS_DOT_ARCH)
|
||||
.arch armv8-a+crypto
|
||||
#endif
|
||||
.section .rodata
|
||||
.align 5
|
||||
.Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
|
||||
.text
|
||||
|
||||
.globl aes_hw_set_encrypt_key
|
||||
.hidden aes_hw_set_encrypt_key
|
||||
.type aes_hw_set_encrypt_key,%function
|
||||
|
@ -33,7 +48,8 @@ aes_hw_set_encrypt_key:
|
|||
tst w1,#0x3f
|
||||
b.ne .Lenc_key_abort
|
||||
|
||||
adr x3,.Lrcon
|
||||
adrp x3,.Lrcon
|
||||
add x3,x3,:lo12:.Lrcon
|
||||
cmp w1,#192
|
||||
|
||||
eor v0.16b,v0.16b,v0.16b
|
||||
|
@ -755,3 +771,5 @@ aes_hw_ctr32_encrypt_blocks:
|
|||
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
|
@ -1,4 +1,18 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
.globl bn_mul_mont
|
||||
|
@ -1405,3 +1419,5 @@ __bn_mul4x_mont:
|
|||
.align 2
|
||||
.align 4
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
341
packager/third_party/boringssl/linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
vendored
Normal file
341
packager/third_party/boringssl/linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
vendored
Normal file
|
@ -0,0 +1,341 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
.globl gcm_init_neon
|
||||
.hidden gcm_init_neon
|
||||
.type gcm_init_neon,%function
|
||||
.align 4
|
||||
gcm_init_neon:
|
||||
// This function is adapted from gcm_init_v8. xC2 is t3.
|
||||
ld1 {v17.2d}, [x1] // load H
|
||||
movi v19.16b, #0xe1
|
||||
shl v19.2d, v19.2d, #57 // 0xc2.0
|
||||
ext v3.16b, v17.16b, v17.16b, #8
|
||||
ushr v18.2d, v19.2d, #63
|
||||
dup v17.4s, v17.s[1]
|
||||
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
|
||||
ushr v18.2d, v3.2d, #63
|
||||
sshr v17.4s, v17.4s, #31 // broadcast carry bit
|
||||
and v18.16b, v18.16b, v16.16b
|
||||
shl v3.2d, v3.2d, #1
|
||||
ext v18.16b, v18.16b, v18.16b, #8
|
||||
and v16.16b, v16.16b, v17.16b
|
||||
orr v3.16b, v3.16b, v18.16b // H<<<=1
|
||||
eor v5.16b, v3.16b, v16.16b // twisted H
|
||||
st1 {v5.2d}, [x0] // store Htable[0]
|
||||
ret
|
||||
.size gcm_init_neon,.-gcm_init_neon
|
||||
|
||||
.globl gcm_gmult_neon
|
||||
.hidden gcm_gmult_neon
|
||||
.type gcm_gmult_neon,%function
|
||||
.align 4
|
||||
gcm_gmult_neon:
|
||||
ld1 {v3.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
adrp x9, .Lmasks // load constants
|
||||
add x9, x9, :lo12:.Lmasks
|
||||
ld1 {v24.2d, v25.2d}, [x9]
|
||||
rev64 v3.16b, v3.16b // byteswap Xi
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
|
||||
|
||||
mov x3, #16
|
||||
b .Lgmult_neon
|
||||
.size gcm_gmult_neon,.-gcm_gmult_neon
|
||||
|
||||
.globl gcm_ghash_neon
|
||||
.hidden gcm_ghash_neon
|
||||
.type gcm_ghash_neon,%function
|
||||
.align 4
|
||||
gcm_ghash_neon:
|
||||
ld1 {v0.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
adrp x9, .Lmasks // load constants
|
||||
add x9, x9, :lo12:.Lmasks
|
||||
ld1 {v24.2d, v25.2d}, [x9]
|
||||
rev64 v0.16b, v0.16b // byteswap Xi
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
|
||||
|
||||
.Loop_neon:
|
||||
ld1 {v3.16b}, [x2], #16 // load inp
|
||||
rev64 v3.16b, v3.16b // byteswap inp
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
|
||||
|
||||
.Lgmult_neon:
|
||||
// Split the input into v3 and v4. (The upper halves are unused,
|
||||
// so it is okay to leave them alone.)
|
||||
ins v4.d[0], v3.d[1]
|
||||
ext v16.8b, v5.8b, v5.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v3.8b // F = A1*B
|
||||
ext v0.8b, v3.8b, v3.8b, #1 // B1
|
||||
pmull v0.8h, v5.8b, v0.8b // E = A*B1
|
||||
ext v17.8b, v5.8b, v5.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v3.8b // H = A2*B
|
||||
ext v19.8b, v3.8b, v3.8b, #2 // B2
|
||||
pmull v19.8h, v5.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v5.8b, v5.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v0.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v3.8b // J = A3*B
|
||||
ext v0.8b, v3.8b, v3.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v0.8h, v5.8b, v0.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v3.8b, v3.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v0.16b // N = I + J
|
||||
pmull v19.8h, v5.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v0.8h, v5.8b, v3.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v0.16b, v0.16b, v16.16b
|
||||
eor v0.16b, v0.16b, v18.16b
|
||||
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
|
||||
ext v16.8b, v7.8b, v7.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v3.8b // F = A1*B
|
||||
ext v1.8b, v3.8b, v3.8b, #1 // B1
|
||||
pmull v1.8h, v7.8b, v1.8b // E = A*B1
|
||||
ext v17.8b, v7.8b, v7.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v3.8b // H = A2*B
|
||||
ext v19.8b, v3.8b, v3.8b, #2 // B2
|
||||
pmull v19.8h, v7.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v7.8b, v7.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v1.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v3.8b // J = A3*B
|
||||
ext v1.8b, v3.8b, v3.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v1.8h, v7.8b, v1.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v3.8b, v3.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v1.16b // N = I + J
|
||||
pmull v19.8h, v7.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v1.8h, v7.8b, v3.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v1.16b, v1.16b, v16.16b
|
||||
eor v1.16b, v1.16b, v18.16b
|
||||
ext v16.8b, v6.8b, v6.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v4.8b // F = A1*B
|
||||
ext v2.8b, v4.8b, v4.8b, #1 // B1
|
||||
pmull v2.8h, v6.8b, v2.8b // E = A*B1
|
||||
ext v17.8b, v6.8b, v6.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v4.8b // H = A2*B
|
||||
ext v19.8b, v4.8b, v4.8b, #2 // B2
|
||||
pmull v19.8h, v6.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v6.8b, v6.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v2.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v4.8b // J = A3*B
|
||||
ext v2.8b, v4.8b, v4.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v2.8h, v6.8b, v2.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v4.8b, v4.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v2.16b // N = I + J
|
||||
pmull v19.8h, v6.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v2.8h, v6.8b, v4.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v2.16b, v2.16b, v16.16b
|
||||
eor v2.16b, v2.16b, v18.16b
|
||||
ext v16.16b, v0.16b, v2.16b, #8
|
||||
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
|
||||
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
|
||||
// This is a no-op due to the ins instruction below.
|
||||
// ins v2.d[0], v1.d[1]
|
||||
|
||||
// equivalent of reduction_avx from ghash-x86_64.pl
|
||||
shl v17.2d, v0.2d, #57 // 1st phase
|
||||
shl v18.2d, v0.2d, #62
|
||||
eor v18.16b, v18.16b, v17.16b //
|
||||
shl v17.2d, v0.2d, #63
|
||||
eor v18.16b, v18.16b, v17.16b //
|
||||
// Note Xm contains {Xl.d[1], Xh.d[0]}.
|
||||
eor v18.16b, v18.16b, v1.16b
|
||||
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
|
||||
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
|
||||
|
||||
ushr v18.2d, v0.2d, #1 // 2nd phase
|
||||
eor v2.16b, v2.16b,v0.16b
|
||||
eor v0.16b, v0.16b,v18.16b //
|
||||
ushr v18.2d, v18.2d, #6
|
||||
ushr v0.2d, v0.2d, #1 //
|
||||
eor v0.16b, v0.16b, v2.16b //
|
||||
eor v0.16b, v0.16b, v18.16b //
|
||||
|
||||
subs x3, x3, #16
|
||||
bne .Loop_neon
|
||||
|
||||
rev64 v0.16b, v0.16b // byteswap Xi and write
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
st1 {v0.16b}, [x0]
|
||||
|
||||
ret
|
||||
.size gcm_ghash_neon,.-gcm_ghash_neon
|
||||
|
||||
.section .rodata
|
||||
.align 4
|
||||
.Lmasks:
|
||||
.quad 0x0000ffffffffffff // k48
|
||||
.quad 0x00000000ffffffff // k32
|
||||
.quad 0x000000000000ffff // k16
|
||||
.quad 0x0000000000000000 // k0
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
|
@ -1,10 +1,22 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
#if !defined(__clang__) || defined(BORINGSSL_CLANG_SUPPORTS_DOT_ARCH)
|
||||
.arch armv8-a+crypto
|
||||
#endif
|
||||
.globl gcm_init_v8
|
||||
.hidden gcm_init_v8
|
||||
.type gcm_init_v8,%function
|
||||
|
@ -108,13 +120,13 @@ gcm_ghash_v8:
|
|||
//loaded value would have
|
||||
//to be rotated in order to
|
||||
//make it appear as in
|
||||
//alorithm specification
|
||||
//algorithm specification
|
||||
subs x3,x3,#32 //see if x3 is 32 or larger
|
||||
mov x12,#16 //x12 is used as post-
|
||||
//increment for input pointer;
|
||||
//as loop is modulo-scheduled
|
||||
//x12 is zeroed just in time
|
||||
//to preclude oversteping
|
||||
//to preclude overstepping
|
||||
//inp[len], which means that
|
||||
//last block[s] are actually
|
||||
//loaded twice, but last
|
||||
|
@ -233,3 +245,5 @@ gcm_ghash_v8:
|
|||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
|
@ -1,4 +1,18 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
@ -9,14 +23,12 @@
|
|||
.type sha1_block_data_order,%function
|
||||
.align 6
|
||||
sha1_block_data_order:
|
||||
#ifdef __ILP32__
|
||||
ldrsw x16,.LOPENSSL_armcap_P
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
|
||||
#else
|
||||
ldr x16,.LOPENSSL_armcap_P
|
||||
adrp x16,OPENSSL_armcap_P
|
||||
#endif
|
||||
adr x17,.LOPENSSL_armcap_P
|
||||
add x16,x16,x17
|
||||
ldr w16,[x16]
|
||||
ldr w16,[x16,:lo12:OPENSSL_armcap_P]
|
||||
tst w16,#ARMV8_SHA1
|
||||
b.ne .Lv8_entry
|
||||
|
||||
|
@ -1082,7 +1094,8 @@ sha1_block_armv8:
|
|||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
adr x4,.Lconst
|
||||
adrp x4,.Lconst
|
||||
add x4,x4,:lo12:.Lconst
|
||||
eor v1.16b,v1.16b,v1.16b
|
||||
ld1 {v0.4s},[x0],#16
|
||||
ld1 {v1.s}[0],[x0]
|
||||
|
@ -1205,20 +1218,18 @@ sha1_block_armv8:
|
|||
ldr x29,[sp],#16
|
||||
ret
|
||||
.size sha1_block_armv8,.-sha1_block_armv8
|
||||
.section .rodata
|
||||
.align 6
|
||||
.Lconst:
|
||||
.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19
|
||||
.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39
|
||||
.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59
|
||||
.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79
|
||||
.LOPENSSL_armcap_P:
|
||||
#ifdef __ILP32__
|
||||
.long OPENSSL_armcap_P-.
|
||||
#else
|
||||
.quad OPENSSL_armcap_P-.
|
||||
#endif
|
||||
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
|
@ -1,4 +1,18 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the OpenSSL license (the "License"). You may not use
|
||||
|
@ -51,14 +65,12 @@
|
|||
.align 6
|
||||
sha256_block_data_order:
|
||||
#ifndef __KERNEL__
|
||||
# ifdef __ILP32__
|
||||
ldrsw x16,.LOPENSSL_armcap_P
|
||||
# else
|
||||
ldr x16,.LOPENSSL_armcap_P
|
||||
# endif
|
||||
adr x17,.LOPENSSL_armcap_P
|
||||
add x16,x16,x17
|
||||
ldr w16,[x16]
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
|
||||
#else
|
||||
adrp x16,OPENSSL_armcap_P
|
||||
#endif
|
||||
ldr w16,[x16,:lo12:OPENSSL_armcap_P]
|
||||
tst w16,#ARMV8_SHA256
|
||||
b.ne .Lv8_entry
|
||||
#endif
|
||||
|
@ -77,7 +89,8 @@ sha256_block_data_order:
|
|||
ldp w24,w25,[x0,#4*4]
|
||||
add x2,x1,x2,lsl#6 // end of input
|
||||
ldp w26,w27,[x0,#6*4]
|
||||
adr x30,.LK256
|
||||
adrp x30,.LK256
|
||||
add x30,x30,:lo12:.LK256
|
||||
stp x0,x2,[x29,#96]
|
||||
|
||||
.Loop:
|
||||
|
@ -1024,6 +1037,7 @@ sha256_block_data_order:
|
|||
ret
|
||||
.size sha256_block_data_order,.-sha256_block_data_order
|
||||
|
||||
.section .rodata
|
||||
.align 6
|
||||
.type .LK256,%object
|
||||
.LK256:
|
||||
|
@ -1045,18 +1059,10 @@ sha256_block_data_order:
|
|||
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
.long 0 //terminator
|
||||
.size .LK256,.-.LK256
|
||||
#ifndef __KERNEL__
|
||||
.align 3
|
||||
.LOPENSSL_armcap_P:
|
||||
# ifdef __ILP32__
|
||||
.long OPENSSL_armcap_P-.
|
||||
# else
|
||||
.quad OPENSSL_armcap_P-.
|
||||
# endif
|
||||
#endif
|
||||
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
.text
|
||||
#ifndef __KERNEL__
|
||||
.type sha256_block_armv8,%function
|
||||
.align 6
|
||||
|
@ -1066,7 +1072,8 @@ sha256_block_armv8:
|
|||
add x29,sp,#0
|
||||
|
||||
ld1 {v0.4s,v1.4s},[x0]
|
||||
adr x3,.LK256
|
||||
adrp x3,.LK256
|
||||
add x3,x3,:lo12:.LK256
|
||||
|
||||
.Loop_hw:
|
||||
ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64
|
||||
|
@ -1199,5 +1206,8 @@ sha256_block_armv8:
|
|||
#endif
|
||||
#ifndef __KERNEL__
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
|
@ -1,4 +1,18 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the OpenSSL license (the "License"). You may not use
|
||||
|
@ -65,7 +79,8 @@ sha512_block_data_order:
|
|||
ldp x24,x25,[x0,#4*8]
|
||||
add x2,x1,x2,lsl#7 // end of input
|
||||
ldp x26,x27,[x0,#6*8]
|
||||
adr x30,.LK512
|
||||
adrp x30,.LK512
|
||||
add x30,x30,:lo12:.LK512
|
||||
stp x0,x2,[x29,#96]
|
||||
|
||||
.Loop:
|
||||
|
@ -1012,6 +1027,7 @@ sha512_block_data_order:
|
|||
ret
|
||||
.size sha512_block_data_order,.-sha512_block_data_order
|
||||
|
||||
.section .rodata
|
||||
.align 6
|
||||
.type .LK512,%object
|
||||
.LK512:
|
||||
|
@ -1057,19 +1073,13 @@ sha512_block_data_order:
|
|||
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
|
||||
.quad 0 // terminator
|
||||
.size .LK512,.-.LK512
|
||||
#ifndef __KERNEL__
|
||||
.align 3
|
||||
.LOPENSSL_armcap_P:
|
||||
# ifdef __ILP32__
|
||||
.long OPENSSL_armcap_P-.
|
||||
# else
|
||||
.quad OPENSSL_armcap_P-.
|
||||
# endif
|
||||
#endif
|
||||
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#ifndef __KERNEL__
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
1216
packager/third_party/boringssl/linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
vendored
Normal file
1216
packager/third_party/boringssl/linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
688
packager/third_party/boringssl/linux-aarch64/crypto/test/trampoline-armv8.S
vendored
Normal file
688
packager/third_party/boringssl/linux-aarch64/crypto/test/trampoline-armv8.S
vendored
Normal file
|
@ -0,0 +1,688 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
// with |argv|, then saves the callee-saved registers into |state|. It returns
|
||||
// the result of |func|. The |unwind| argument is unused.
|
||||
// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
||||
// const uint64_t *argv, size_t argc,
|
||||
// uint64_t unwind);
|
||||
.type abi_test_trampoline, %function
|
||||
.globl abi_test_trampoline
|
||||
.hidden abi_test_trampoline
|
||||
.align 4
|
||||
abi_test_trampoline:
|
||||
.Labi_test_trampoline_begin:
|
||||
// Stack layout (low to high addresses)
|
||||
// x29,x30 (16 bytes)
|
||||
// d8-d15 (64 bytes)
|
||||
// x19-x28 (80 bytes)
|
||||
// x1 (8 bytes)
|
||||
// padding (8 bytes)
|
||||
stp x29, x30, [sp, #-176]!
|
||||
mov x29, sp
|
||||
|
||||
// Saved callee-saved registers and |state|.
|
||||
stp d8, d9, [sp, #16]
|
||||
stp d10, d11, [sp, #32]
|
||||
stp d12, d13, [sp, #48]
|
||||
stp d14, d15, [sp, #64]
|
||||
stp x19, x20, [sp, #80]
|
||||
stp x21, x22, [sp, #96]
|
||||
stp x23, x24, [sp, #112]
|
||||
stp x25, x26, [sp, #128]
|
||||
stp x27, x28, [sp, #144]
|
||||
str x1, [sp, #160]
|
||||
|
||||
// Load registers from |state|, with the exception of x29. x29 is the
|
||||
// frame pointer and also callee-saved, but AAPCS64 allows platforms to
|
||||
// mandate that x29 always point to a frame. iOS64 does so, which means
|
||||
// we cannot fill x29 with entropy without violating ABI rules
|
||||
// ourselves. x29 is tested separately below.
|
||||
ldp d8, d9, [x1], #16
|
||||
ldp d10, d11, [x1], #16
|
||||
ldp d12, d13, [x1], #16
|
||||
ldp d14, d15, [x1], #16
|
||||
ldp x19, x20, [x1], #16
|
||||
ldp x21, x22, [x1], #16
|
||||
ldp x23, x24, [x1], #16
|
||||
ldp x25, x26, [x1], #16
|
||||
ldp x27, x28, [x1], #16
|
||||
|
||||
// Move parameters into temporary registers.
|
||||
mov x9, x0
|
||||
mov x10, x2
|
||||
mov x11, x3
|
||||
|
||||
// Load parameters into registers.
|
||||
cbz x11, .Largs_done
|
||||
ldr x0, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x1, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x2, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x3, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x4, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x5, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x6, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x7, [x10], #8
|
||||
|
||||
.Largs_done:
|
||||
blr x9
|
||||
|
||||
// Reload |state| and store registers.
|
||||
ldr x1, [sp, #160]
|
||||
stp d8, d9, [x1], #16
|
||||
stp d10, d11, [x1], #16
|
||||
stp d12, d13, [x1], #16
|
||||
stp d14, d15, [x1], #16
|
||||
stp x19, x20, [x1], #16
|
||||
stp x21, x22, [x1], #16
|
||||
stp x23, x24, [x1], #16
|
||||
stp x25, x26, [x1], #16
|
||||
stp x27, x28, [x1], #16
|
||||
|
||||
// |func| is required to preserve x29, the frame pointer. We cannot load
|
||||
// random values into x29 (see comment above), so compare it against the
|
||||
// expected value and zero the field of |state| if corrupted.
|
||||
mov x9, sp
|
||||
cmp x29, x9
|
||||
b.eq .Lx29_ok
|
||||
str xzr, [x1]
|
||||
|
||||
.Lx29_ok:
|
||||
// Restore callee-saved registers.
|
||||
ldp d8, d9, [sp, #16]
|
||||
ldp d10, d11, [sp, #32]
|
||||
ldp d12, d13, [sp, #48]
|
||||
ldp d14, d15, [sp, #64]
|
||||
ldp x19, x20, [sp, #80]
|
||||
ldp x21, x22, [sp, #96]
|
||||
ldp x23, x24, [sp, #112]
|
||||
ldp x25, x26, [sp, #128]
|
||||
ldp x27, x28, [sp, #144]
|
||||
|
||||
ldp x29, x30, [sp], #176
|
||||
ret
|
||||
.size abi_test_trampoline,.-abi_test_trampoline
|
||||
.type abi_test_clobber_x0, %function
|
||||
.globl abi_test_clobber_x0
|
||||
.hidden abi_test_clobber_x0
|
||||
.align 4
|
||||
abi_test_clobber_x0:
|
||||
mov x0, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x0,.-abi_test_clobber_x0
|
||||
.type abi_test_clobber_x1, %function
|
||||
.globl abi_test_clobber_x1
|
||||
.hidden abi_test_clobber_x1
|
||||
.align 4
|
||||
abi_test_clobber_x1:
|
||||
mov x1, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x1,.-abi_test_clobber_x1
|
||||
.type abi_test_clobber_x2, %function
|
||||
.globl abi_test_clobber_x2
|
||||
.hidden abi_test_clobber_x2
|
||||
.align 4
|
||||
abi_test_clobber_x2:
|
||||
mov x2, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x2,.-abi_test_clobber_x2
|
||||
.type abi_test_clobber_x3, %function
|
||||
.globl abi_test_clobber_x3
|
||||
.hidden abi_test_clobber_x3
|
||||
.align 4
|
||||
abi_test_clobber_x3:
|
||||
mov x3, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x3,.-abi_test_clobber_x3
|
||||
.type abi_test_clobber_x4, %function
|
||||
.globl abi_test_clobber_x4
|
||||
.hidden abi_test_clobber_x4
|
||||
.align 4
|
||||
abi_test_clobber_x4:
|
||||
mov x4, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x4,.-abi_test_clobber_x4
|
||||
.type abi_test_clobber_x5, %function
|
||||
.globl abi_test_clobber_x5
|
||||
.hidden abi_test_clobber_x5
|
||||
.align 4
|
||||
abi_test_clobber_x5:
|
||||
mov x5, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x5,.-abi_test_clobber_x5
|
||||
.type abi_test_clobber_x6, %function
|
||||
.globl abi_test_clobber_x6
|
||||
.hidden abi_test_clobber_x6
|
||||
.align 4
|
||||
abi_test_clobber_x6:
|
||||
mov x6, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x6,.-abi_test_clobber_x6
|
||||
.type abi_test_clobber_x7, %function
|
||||
.globl abi_test_clobber_x7
|
||||
.hidden abi_test_clobber_x7
|
||||
.align 4
|
||||
abi_test_clobber_x7:
|
||||
mov x7, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x7,.-abi_test_clobber_x7
|
||||
.type abi_test_clobber_x8, %function
|
||||
.globl abi_test_clobber_x8
|
||||
.hidden abi_test_clobber_x8
|
||||
.align 4
|
||||
abi_test_clobber_x8:
|
||||
mov x8, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x8,.-abi_test_clobber_x8
|
||||
.type abi_test_clobber_x9, %function
|
||||
.globl abi_test_clobber_x9
|
||||
.hidden abi_test_clobber_x9
|
||||
.align 4
|
||||
abi_test_clobber_x9:
|
||||
mov x9, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x9,.-abi_test_clobber_x9
|
||||
.type abi_test_clobber_x10, %function
|
||||
.globl abi_test_clobber_x10
|
||||
.hidden abi_test_clobber_x10
|
||||
.align 4
|
||||
abi_test_clobber_x10:
|
||||
mov x10, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x10,.-abi_test_clobber_x10
|
||||
.type abi_test_clobber_x11, %function
|
||||
.globl abi_test_clobber_x11
|
||||
.hidden abi_test_clobber_x11
|
||||
.align 4
|
||||
abi_test_clobber_x11:
|
||||
mov x11, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x11,.-abi_test_clobber_x11
|
||||
.type abi_test_clobber_x12, %function
|
||||
.globl abi_test_clobber_x12
|
||||
.hidden abi_test_clobber_x12
|
||||
.align 4
|
||||
abi_test_clobber_x12:
|
||||
mov x12, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x12,.-abi_test_clobber_x12
|
||||
.type abi_test_clobber_x13, %function
|
||||
.globl abi_test_clobber_x13
|
||||
.hidden abi_test_clobber_x13
|
||||
.align 4
|
||||
abi_test_clobber_x13:
|
||||
mov x13, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x13,.-abi_test_clobber_x13
|
||||
.type abi_test_clobber_x14, %function
|
||||
.globl abi_test_clobber_x14
|
||||
.hidden abi_test_clobber_x14
|
||||
.align 4
|
||||
abi_test_clobber_x14:
|
||||
mov x14, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x14,.-abi_test_clobber_x14
|
||||
.type abi_test_clobber_x15, %function
|
||||
.globl abi_test_clobber_x15
|
||||
.hidden abi_test_clobber_x15
|
||||
.align 4
|
||||
abi_test_clobber_x15:
|
||||
mov x15, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x15,.-abi_test_clobber_x15
|
||||
.type abi_test_clobber_x16, %function
|
||||
.globl abi_test_clobber_x16
|
||||
.hidden abi_test_clobber_x16
|
||||
.align 4
|
||||
abi_test_clobber_x16:
|
||||
mov x16, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x16,.-abi_test_clobber_x16
|
||||
.type abi_test_clobber_x17, %function
|
||||
.globl abi_test_clobber_x17
|
||||
.hidden abi_test_clobber_x17
|
||||
.align 4
|
||||
abi_test_clobber_x17:
|
||||
mov x17, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x17,.-abi_test_clobber_x17
|
||||
.type abi_test_clobber_x19, %function
|
||||
.globl abi_test_clobber_x19
|
||||
.hidden abi_test_clobber_x19
|
||||
.align 4
|
||||
abi_test_clobber_x19:
|
||||
mov x19, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x19,.-abi_test_clobber_x19
|
||||
.type abi_test_clobber_x20, %function
|
||||
.globl abi_test_clobber_x20
|
||||
.hidden abi_test_clobber_x20
|
||||
.align 4
|
||||
abi_test_clobber_x20:
|
||||
mov x20, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x20,.-abi_test_clobber_x20
|
||||
.type abi_test_clobber_x21, %function
|
||||
.globl abi_test_clobber_x21
|
||||
.hidden abi_test_clobber_x21
|
||||
.align 4
|
||||
abi_test_clobber_x21:
|
||||
mov x21, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x21,.-abi_test_clobber_x21
|
||||
.type abi_test_clobber_x22, %function
|
||||
.globl abi_test_clobber_x22
|
||||
.hidden abi_test_clobber_x22
|
||||
.align 4
|
||||
abi_test_clobber_x22:
|
||||
mov x22, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x22,.-abi_test_clobber_x22
|
||||
.type abi_test_clobber_x23, %function
|
||||
.globl abi_test_clobber_x23
|
||||
.hidden abi_test_clobber_x23
|
||||
.align 4
|
||||
abi_test_clobber_x23:
|
||||
mov x23, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x23,.-abi_test_clobber_x23
|
||||
.type abi_test_clobber_x24, %function
|
||||
.globl abi_test_clobber_x24
|
||||
.hidden abi_test_clobber_x24
|
||||
.align 4
|
||||
abi_test_clobber_x24:
|
||||
mov x24, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x24,.-abi_test_clobber_x24
|
||||
.type abi_test_clobber_x25, %function
|
||||
.globl abi_test_clobber_x25
|
||||
.hidden abi_test_clobber_x25
|
||||
.align 4
|
||||
abi_test_clobber_x25:
|
||||
mov x25, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x25,.-abi_test_clobber_x25
|
||||
.type abi_test_clobber_x26, %function
|
||||
.globl abi_test_clobber_x26
|
||||
.hidden abi_test_clobber_x26
|
||||
.align 4
|
||||
abi_test_clobber_x26:
|
||||
mov x26, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x26,.-abi_test_clobber_x26
|
||||
.type abi_test_clobber_x27, %function
|
||||
.globl abi_test_clobber_x27
|
||||
.hidden abi_test_clobber_x27
|
||||
.align 4
|
||||
abi_test_clobber_x27:
|
||||
mov x27, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x27,.-abi_test_clobber_x27
|
||||
.type abi_test_clobber_x28, %function
|
||||
.globl abi_test_clobber_x28
|
||||
.hidden abi_test_clobber_x28
|
||||
.align 4
|
||||
abi_test_clobber_x28:
|
||||
mov x28, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x28,.-abi_test_clobber_x28
|
||||
.type abi_test_clobber_x29, %function
|
||||
.globl abi_test_clobber_x29
|
||||
.hidden abi_test_clobber_x29
|
||||
.align 4
|
||||
abi_test_clobber_x29:
|
||||
mov x29, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x29,.-abi_test_clobber_x29
|
||||
.type abi_test_clobber_d0, %function
|
||||
.globl abi_test_clobber_d0
|
||||
.hidden abi_test_clobber_d0
|
||||
.align 4
|
||||
abi_test_clobber_d0:
|
||||
fmov d0, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d0,.-abi_test_clobber_d0
|
||||
.type abi_test_clobber_d1, %function
|
||||
.globl abi_test_clobber_d1
|
||||
.hidden abi_test_clobber_d1
|
||||
.align 4
|
||||
abi_test_clobber_d1:
|
||||
fmov d1, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d1,.-abi_test_clobber_d1
|
||||
.type abi_test_clobber_d2, %function
|
||||
.globl abi_test_clobber_d2
|
||||
.hidden abi_test_clobber_d2
|
||||
.align 4
|
||||
abi_test_clobber_d2:
|
||||
fmov d2, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d2,.-abi_test_clobber_d2
|
||||
.type abi_test_clobber_d3, %function
|
||||
.globl abi_test_clobber_d3
|
||||
.hidden abi_test_clobber_d3
|
||||
.align 4
|
||||
abi_test_clobber_d3:
|
||||
fmov d3, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d3,.-abi_test_clobber_d3
|
||||
.type abi_test_clobber_d4, %function
|
||||
.globl abi_test_clobber_d4
|
||||
.hidden abi_test_clobber_d4
|
||||
.align 4
|
||||
abi_test_clobber_d4:
|
||||
fmov d4, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d4,.-abi_test_clobber_d4
|
||||
.type abi_test_clobber_d5, %function
|
||||
.globl abi_test_clobber_d5
|
||||
.hidden abi_test_clobber_d5
|
||||
.align 4
|
||||
abi_test_clobber_d5:
|
||||
fmov d5, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d5,.-abi_test_clobber_d5
|
||||
.type abi_test_clobber_d6, %function
|
||||
.globl abi_test_clobber_d6
|
||||
.hidden abi_test_clobber_d6
|
||||
.align 4
|
||||
abi_test_clobber_d6:
|
||||
fmov d6, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d6,.-abi_test_clobber_d6
|
||||
.type abi_test_clobber_d7, %function
|
||||
.globl abi_test_clobber_d7
|
||||
.hidden abi_test_clobber_d7
|
||||
.align 4
|
||||
abi_test_clobber_d7:
|
||||
fmov d7, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d7,.-abi_test_clobber_d7
|
||||
.type abi_test_clobber_d8, %function
|
||||
.globl abi_test_clobber_d8
|
||||
.hidden abi_test_clobber_d8
|
||||
.align 4
|
||||
abi_test_clobber_d8:
|
||||
fmov d8, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d8,.-abi_test_clobber_d8
|
||||
.type abi_test_clobber_d9, %function
|
||||
.globl abi_test_clobber_d9
|
||||
.hidden abi_test_clobber_d9
|
||||
.align 4
|
||||
abi_test_clobber_d9:
|
||||
fmov d9, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d9,.-abi_test_clobber_d9
|
||||
.type abi_test_clobber_d10, %function
|
||||
.globl abi_test_clobber_d10
|
||||
.hidden abi_test_clobber_d10
|
||||
.align 4
|
||||
abi_test_clobber_d10:
|
||||
fmov d10, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d10,.-abi_test_clobber_d10
|
||||
.type abi_test_clobber_d11, %function
|
||||
.globl abi_test_clobber_d11
|
||||
.hidden abi_test_clobber_d11
|
||||
.align 4
|
||||
abi_test_clobber_d11:
|
||||
fmov d11, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d11,.-abi_test_clobber_d11
|
||||
.type abi_test_clobber_d12, %function
|
||||
.globl abi_test_clobber_d12
|
||||
.hidden abi_test_clobber_d12
|
||||
.align 4
|
||||
abi_test_clobber_d12:
|
||||
fmov d12, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d12,.-abi_test_clobber_d12
|
||||
.type abi_test_clobber_d13, %function
|
||||
.globl abi_test_clobber_d13
|
||||
.hidden abi_test_clobber_d13
|
||||
.align 4
|
||||
abi_test_clobber_d13:
|
||||
fmov d13, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d13,.-abi_test_clobber_d13
|
||||
.type abi_test_clobber_d14, %function
|
||||
.globl abi_test_clobber_d14
|
||||
.hidden abi_test_clobber_d14
|
||||
.align 4
|
||||
abi_test_clobber_d14:
|
||||
fmov d14, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d14,.-abi_test_clobber_d14
|
||||
.type abi_test_clobber_d15, %function
|
||||
.globl abi_test_clobber_d15
|
||||
.hidden abi_test_clobber_d15
|
||||
.align 4
|
||||
abi_test_clobber_d15:
|
||||
fmov d15, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d15,.-abi_test_clobber_d15
|
||||
.type abi_test_clobber_d16, %function
|
||||
.globl abi_test_clobber_d16
|
||||
.hidden abi_test_clobber_d16
|
||||
.align 4
|
||||
abi_test_clobber_d16:
|
||||
fmov d16, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d16,.-abi_test_clobber_d16
|
||||
.type abi_test_clobber_d17, %function
|
||||
.globl abi_test_clobber_d17
|
||||
.hidden abi_test_clobber_d17
|
||||
.align 4
|
||||
abi_test_clobber_d17:
|
||||
fmov d17, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d17,.-abi_test_clobber_d17
|
||||
.type abi_test_clobber_d18, %function
|
||||
.globl abi_test_clobber_d18
|
||||
.hidden abi_test_clobber_d18
|
||||
.align 4
|
||||
abi_test_clobber_d18:
|
||||
fmov d18, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d18,.-abi_test_clobber_d18
|
||||
.type abi_test_clobber_d19, %function
|
||||
.globl abi_test_clobber_d19
|
||||
.hidden abi_test_clobber_d19
|
||||
.align 4
|
||||
abi_test_clobber_d19:
|
||||
fmov d19, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d19,.-abi_test_clobber_d19
|
||||
.type abi_test_clobber_d20, %function
|
||||
.globl abi_test_clobber_d20
|
||||
.hidden abi_test_clobber_d20
|
||||
.align 4
|
||||
abi_test_clobber_d20:
|
||||
fmov d20, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d20,.-abi_test_clobber_d20
|
||||
.type abi_test_clobber_d21, %function
|
||||
.globl abi_test_clobber_d21
|
||||
.hidden abi_test_clobber_d21
|
||||
.align 4
|
||||
abi_test_clobber_d21:
|
||||
fmov d21, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d21,.-abi_test_clobber_d21
|
||||
.type abi_test_clobber_d22, %function
|
||||
.globl abi_test_clobber_d22
|
||||
.hidden abi_test_clobber_d22
|
||||
.align 4
|
||||
abi_test_clobber_d22:
|
||||
fmov d22, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d22,.-abi_test_clobber_d22
|
||||
.type abi_test_clobber_d23, %function
|
||||
.globl abi_test_clobber_d23
|
||||
.hidden abi_test_clobber_d23
|
||||
.align 4
|
||||
abi_test_clobber_d23:
|
||||
fmov d23, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d23,.-abi_test_clobber_d23
|
||||
.type abi_test_clobber_d24, %function
|
||||
.globl abi_test_clobber_d24
|
||||
.hidden abi_test_clobber_d24
|
||||
.align 4
|
||||
abi_test_clobber_d24:
|
||||
fmov d24, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d24,.-abi_test_clobber_d24
|
||||
.type abi_test_clobber_d25, %function
|
||||
.globl abi_test_clobber_d25
|
||||
.hidden abi_test_clobber_d25
|
||||
.align 4
|
||||
abi_test_clobber_d25:
|
||||
fmov d25, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d25,.-abi_test_clobber_d25
|
||||
.type abi_test_clobber_d26, %function
|
||||
.globl abi_test_clobber_d26
|
||||
.hidden abi_test_clobber_d26
|
||||
.align 4
|
||||
abi_test_clobber_d26:
|
||||
fmov d26, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d26,.-abi_test_clobber_d26
|
||||
.type abi_test_clobber_d27, %function
|
||||
.globl abi_test_clobber_d27
|
||||
.hidden abi_test_clobber_d27
|
||||
.align 4
|
||||
abi_test_clobber_d27:
|
||||
fmov d27, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d27,.-abi_test_clobber_d27
|
||||
.type abi_test_clobber_d28, %function
|
||||
.globl abi_test_clobber_d28
|
||||
.hidden abi_test_clobber_d28
|
||||
.align 4
|
||||
abi_test_clobber_d28:
|
||||
fmov d28, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d28,.-abi_test_clobber_d28
|
||||
.type abi_test_clobber_d29, %function
|
||||
.globl abi_test_clobber_d29
|
||||
.hidden abi_test_clobber_d29
|
||||
.align 4
|
||||
abi_test_clobber_d29:
|
||||
fmov d29, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d29,.-abi_test_clobber_d29
|
||||
.type abi_test_clobber_d30, %function
|
||||
.globl abi_test_clobber_d30
|
||||
.hidden abi_test_clobber_d30
|
||||
.align 4
|
||||
abi_test_clobber_d30:
|
||||
fmov d30, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d30,.-abi_test_clobber_d30
|
||||
.type abi_test_clobber_d31, %function
|
||||
.globl abi_test_clobber_d31
|
||||
.hidden abi_test_clobber_d31
|
||||
.align 4
|
||||
abi_test_clobber_d31:
|
||||
fmov d31, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d31,.-abi_test_clobber_d31
|
||||
.type abi_test_clobber_v8_upper, %function
|
||||
.globl abi_test_clobber_v8_upper
|
||||
.hidden abi_test_clobber_v8_upper
|
||||
.align 4
|
||||
abi_test_clobber_v8_upper:
|
||||
fmov v8.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
|
||||
.type abi_test_clobber_v9_upper, %function
|
||||
.globl abi_test_clobber_v9_upper
|
||||
.hidden abi_test_clobber_v9_upper
|
||||
.align 4
|
||||
abi_test_clobber_v9_upper:
|
||||
fmov v9.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
|
||||
.type abi_test_clobber_v10_upper, %function
|
||||
.globl abi_test_clobber_v10_upper
|
||||
.hidden abi_test_clobber_v10_upper
|
||||
.align 4
|
||||
abi_test_clobber_v10_upper:
|
||||
fmov v10.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
|
||||
.type abi_test_clobber_v11_upper, %function
|
||||
.globl abi_test_clobber_v11_upper
|
||||
.hidden abi_test_clobber_v11_upper
|
||||
.align 4
|
||||
abi_test_clobber_v11_upper:
|
||||
fmov v11.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
|
||||
.type abi_test_clobber_v12_upper, %function
|
||||
.globl abi_test_clobber_v12_upper
|
||||
.hidden abi_test_clobber_v12_upper
|
||||
.align 4
|
||||
abi_test_clobber_v12_upper:
|
||||
fmov v12.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
|
||||
.type abi_test_clobber_v13_upper, %function
|
||||
.globl abi_test_clobber_v13_upper
|
||||
.hidden abi_test_clobber_v13_upper
|
||||
.align 4
|
||||
abi_test_clobber_v13_upper:
|
||||
fmov v13.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
|
||||
.type abi_test_clobber_v14_upper, %function
|
||||
.globl abi_test_clobber_v14_upper
|
||||
.hidden abi_test_clobber_v14_upper
|
||||
.align 4
|
||||
abi_test_clobber_v14_upper:
|
||||
fmov v14.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
|
||||
.type abi_test_clobber_v15_upper, %function
|
||||
.globl abi_test_clobber_v15_upper
|
||||
.hidden abi_test_clobber_v15_upper
|
||||
.align 4
|
||||
abi_test_clobber_v15_upper:
|
||||
fmov v15.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
999
packager/third_party/boringssl/linux-aarch64/crypto/third_party/sike/asm/fp-armv8.S
vendored
Normal file
999
packager/third_party/boringssl/linux-aarch64/crypto/third_party/sike/asm/fp-armv8.S
vendored
Normal file
|
@ -0,0 +1,999 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.section .rodata
|
||||
|
||||
# p434 x 2
|
||||
.Lp434x2:
|
||||
.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
|
||||
.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
|
||||
.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
|
||||
|
||||
# p434 + 1
|
||||
.Lp434p1:
|
||||
.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
|
||||
.quad 0x6CFC5FD681C52056, 0x0002341F27177344
|
||||
|
||||
.text
|
||||
.globl sike_mpmul
|
||||
.hidden sike_mpmul
|
||||
.align 4
|
||||
sike_mpmul:
|
||||
stp x29, x30, [sp,#-96]!
|
||||
add x29, sp, #0
|
||||
stp x19, x20, [sp,#16]
|
||||
stp x21, x22, [sp,#32]
|
||||
stp x23, x24, [sp,#48]
|
||||
stp x25, x26, [sp,#64]
|
||||
stp x27, x28, [sp,#80]
|
||||
|
||||
ldp x3, x4, [x0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x10, x11, [x1,#0]
|
||||
ldp x12, x13, [x1,#16]
|
||||
ldp x14, x15, [x1,#32]
|
||||
ldr x16, [x1,#48]
|
||||
|
||||
// x3-x7 <- AH + AL, x7 <- carry
|
||||
adds x3, x3, x7
|
||||
adcs x4, x4, x8
|
||||
adcs x5, x5, x9
|
||||
adcs x6, x6, xzr
|
||||
adc x7, xzr, xzr
|
||||
|
||||
// x10-x13 <- BH + BL, x8 <- carry
|
||||
adds x10, x10, x14
|
||||
adcs x11, x11, x15
|
||||
adcs x12, x12, x16
|
||||
adcs x13, x13, xzr
|
||||
adc x8, xzr, xzr
|
||||
|
||||
// x9 <- combined carry
|
||||
and x9, x7, x8
|
||||
// x7-x8 <- mask
|
||||
sub x7, xzr, x7
|
||||
sub x8, xzr, x8
|
||||
|
||||
// x15-x19 <- masked (BH + BL)
|
||||
and x14, x10, x7
|
||||
and x15, x11, x7
|
||||
and x16, x12, x7
|
||||
and x17, x13, x7
|
||||
|
||||
// x20-x23 <- masked (AH + AL)
|
||||
and x20, x3, x8
|
||||
and x21, x4, x8
|
||||
and x22, x5, x8
|
||||
and x23, x6, x8
|
||||
|
||||
// x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
|
||||
adds x14, x14, x20
|
||||
adcs x15, x15, x21
|
||||
adcs x16, x16, x22
|
||||
adcs x17, x17, x23
|
||||
adc x7, x9, xzr
|
||||
|
||||
// x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
|
||||
stp x3, x4, [x2,#0]
|
||||
// A0-A1 <- AH + AL, T0 <- mask
|
||||
adds x3, x3, x5
|
||||
adcs x4, x4, x6
|
||||
adc x25, xzr, xzr
|
||||
|
||||
// C6, T1 <- BH + BL, C7 <- mask
|
||||
adds x23, x10, x12
|
||||
adcs x26, x11, x13
|
||||
adc x24, xzr, xzr
|
||||
|
||||
// C0-C1 <- masked (BH + BL)
|
||||
sub x19, xzr, x25
|
||||
sub x20, xzr, x24
|
||||
and x8, x23, x19
|
||||
and x9, x26, x19
|
||||
|
||||
// C4-C5 <- masked (AH + AL), T0 <- combined carry
|
||||
and x21, x3, x20
|
||||
and x22, x4, x20
|
||||
mul x19, x3, x23
|
||||
mul x20, x3, x26
|
||||
and x25, x25, x24
|
||||
|
||||
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
|
||||
adds x8, x21, x8
|
||||
umulh x21, x3, x26
|
||||
adcs x9, x22, x9
|
||||
umulh x22, x3, x23
|
||||
adc x25, x25, xzr
|
||||
|
||||
// C2-C5 <- (AH+AL) x (BH+BL), low part
|
||||
mul x3, x4, x23
|
||||
umulh x23, x4, x23
|
||||
adds x20, x20, x22
|
||||
adc x21, x21, xzr
|
||||
|
||||
mul x24, x4, x26
|
||||
umulh x26, x4, x26
|
||||
adds x20, x20, x3
|
||||
adcs x21, x21, x23
|
||||
adc x22, xzr, xzr
|
||||
|
||||
adds x21, x21, x24
|
||||
adc x22, x22, x26
|
||||
|
||||
ldp x3, x4, [x2,#0]
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
|
||||
adds x21, x8, x21
|
||||
umulh x24, x3, x10
|
||||
umulh x26, x3, x11
|
||||
adcs x22, x9, x22
|
||||
mul x8, x3, x10
|
||||
mul x9, x3, x11
|
||||
adc x25, x25, xzr
|
||||
|
||||
// C0-C1, T1, C7 <- AL x BL
|
||||
mul x3, x4, x10
|
||||
umulh x10, x4, x10
|
||||
adds x9, x9, x24
|
||||
adc x26, x26, xzr
|
||||
|
||||
mul x23, x4, x11
|
||||
umulh x11, x4, x11
|
||||
adds x9, x9, x3
|
||||
adcs x26, x26, x10
|
||||
adc x24, xzr, xzr
|
||||
|
||||
adds x26, x26, x23
|
||||
adc x24, x24, x11
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
|
||||
mul x3, x5, x12
|
||||
umulh x10, x5, x12
|
||||
subs x19, x19, x8
|
||||
sbcs x20, x20, x9
|
||||
sbcs x21, x21, x26
|
||||
mul x4, x5, x13
|
||||
umulh x23, x5, x13
|
||||
sbcs x22, x22, x24
|
||||
sbc x25, x25, xzr
|
||||
|
||||
// A0, A1, C6, B0 <- AH x BH
|
||||
mul x5, x6, x12
|
||||
umulh x12, x6, x12
|
||||
adds x4, x4, x10
|
||||
adc x23, x23, xzr
|
||||
|
||||
mul x11, x6, x13
|
||||
umulh x13, x6, x13
|
||||
adds x4, x4, x5
|
||||
adcs x23, x23, x12
|
||||
adc x10, xzr, xzr
|
||||
|
||||
adds x23, x23, x11
|
||||
adc x10, x10, x13
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x19, x19, x3
|
||||
sbcs x20, x20, x4
|
||||
sbcs x21, x21, x23
|
||||
sbcs x22, x22, x10
|
||||
sbc x25, x25, xzr
|
||||
|
||||
adds x19, x19, x26
|
||||
adcs x20, x20, x24
|
||||
adcs x21, x21, x3
|
||||
adcs x22, x22, x4
|
||||
adcs x23, x25, x23
|
||||
adc x24, x10, xzr
|
||||
|
||||
|
||||
// x15-x19, x7 <- (AH+AL) x (BH+BL), final step
|
||||
adds x14, x14, x21
|
||||
adcs x15, x15, x22
|
||||
adcs x16, x16, x23
|
||||
adcs x17, x17, x24
|
||||
adc x7, x7, xzr
|
||||
|
||||
// Load AL
|
||||
ldp x3, x4, [x0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
// Load BL
|
||||
ldp x10, x11, [x1,#0]
|
||||
ldp x12, x13, [x1,#16]
|
||||
|
||||
// Temporarily store x8 in x2
|
||||
stp x8, x9, [x2,#0]
|
||||
// x21-x28 <- AL x BL
|
||||
// A0-A1 <- AH + AL, T0 <- mask
|
||||
adds x3, x3, x5
|
||||
adcs x4, x4, x6
|
||||
adc x8, xzr, xzr
|
||||
|
||||
// C6, T1 <- BH + BL, C7 <- mask
|
||||
adds x27, x10, x12
|
||||
adcs x9, x11, x13
|
||||
adc x28, xzr, xzr
|
||||
|
||||
// C0-C1 <- masked (BH + BL)
|
||||
sub x23, xzr, x8
|
||||
sub x24, xzr, x28
|
||||
and x21, x27, x23
|
||||
and x22, x9, x23
|
||||
|
||||
// C4-C5 <- masked (AH + AL), T0 <- combined carry
|
||||
and x25, x3, x24
|
||||
and x26, x4, x24
|
||||
mul x23, x3, x27
|
||||
mul x24, x3, x9
|
||||
and x8, x8, x28
|
||||
|
||||
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
|
||||
adds x21, x25, x21
|
||||
umulh x25, x3, x9
|
||||
adcs x22, x26, x22
|
||||
umulh x26, x3, x27
|
||||
adc x8, x8, xzr
|
||||
|
||||
// C2-C5 <- (AH+AL) x (BH+BL), low part
|
||||
mul x3, x4, x27
|
||||
umulh x27, x4, x27
|
||||
adds x24, x24, x26
|
||||
adc x25, x25, xzr
|
||||
|
||||
mul x28, x4, x9
|
||||
umulh x9, x4, x9
|
||||
adds x24, x24, x3
|
||||
adcs x25, x25, x27
|
||||
adc x26, xzr, xzr
|
||||
|
||||
adds x25, x25, x28
|
||||
adc x26, x26, x9
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
|
||||
adds x25, x21, x25
|
||||
umulh x28, x3, x10
|
||||
umulh x9, x3, x11
|
||||
adcs x26, x22, x26
|
||||
mul x21, x3, x10
|
||||
mul x22, x3, x11
|
||||
adc x8, x8, xzr
|
||||
|
||||
// C0-C1, T1, C7 <- AL x BL
|
||||
mul x3, x4, x10
|
||||
umulh x10, x4, x10
|
||||
adds x22, x22, x28
|
||||
adc x9, x9, xzr
|
||||
|
||||
mul x27, x4, x11
|
||||
umulh x11, x4, x11
|
||||
adds x22, x22, x3
|
||||
adcs x9, x9, x10
|
||||
adc x28, xzr, xzr
|
||||
|
||||
adds x9, x9, x27
|
||||
adc x28, x28, x11
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
|
||||
mul x3, x5, x12
|
||||
umulh x10, x5, x12
|
||||
subs x23, x23, x21
|
||||
sbcs x24, x24, x22
|
||||
sbcs x25, x25, x9
|
||||
mul x4, x5, x13
|
||||
umulh x27, x5, x13
|
||||
sbcs x26, x26, x28
|
||||
sbc x8, x8, xzr
|
||||
|
||||
// A0, A1, C6, B0 <- AH x BH
|
||||
mul x5, x6, x12
|
||||
umulh x12, x6, x12
|
||||
adds x4, x4, x10
|
||||
adc x27, x27, xzr
|
||||
|
||||
mul x11, x6, x13
|
||||
umulh x13, x6, x13
|
||||
adds x4, x4, x5
|
||||
adcs x27, x27, x12
|
||||
adc x10, xzr, xzr
|
||||
|
||||
adds x27, x27, x11
|
||||
adc x10, x10, x13
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x23, x23, x3
|
||||
sbcs x24, x24, x4
|
||||
sbcs x25, x25, x27
|
||||
sbcs x26, x26, x10
|
||||
sbc x8, x8, xzr
|
||||
|
||||
adds x23, x23, x9
|
||||
adcs x24, x24, x28
|
||||
adcs x25, x25, x3
|
||||
adcs x26, x26, x4
|
||||
adcs x27, x8, x27
|
||||
adc x28, x10, xzr
|
||||
|
||||
// Restore x8
|
||||
ldp x8, x9, [x2,#0]
|
||||
|
||||
// x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
|
||||
subs x8, x8, x21
|
||||
sbcs x9, x9, x22
|
||||
sbcs x19, x19, x23
|
||||
sbcs x20, x20, x24
|
||||
sbcs x14, x14, x25
|
||||
sbcs x15, x15, x26
|
||||
sbcs x16, x16, x27
|
||||
sbcs x17, x17, x28
|
||||
sbc x7, x7, xzr
|
||||
|
||||
// Store ALxBL, low
|
||||
stp x21, x22, [x2]
|
||||
stp x23, x24, [x2,#16]
|
||||
|
||||
// Load AH
|
||||
ldp x3, x4, [x0,#32]
|
||||
ldr x5, [x0,#48]
|
||||
// Load BH
|
||||
ldp x10, x11, [x1,#32]
|
||||
ldr x12, [x1,#48]
|
||||
|
||||
adds x8, x8, x25
|
||||
adcs x9, x9, x26
|
||||
adcs x19, x19, x27
|
||||
adcs x20, x20, x28
|
||||
adc x1, xzr, xzr
|
||||
|
||||
add x0, x0, #32
|
||||
// Temporarily store x8,x9 in x2
|
||||
stp x8,x9, [x2,#32]
|
||||
// x21-x28 <- AH x BH
|
||||
|
||||
// A0 * B0
|
||||
mul x21, x3, x10 // C0
|
||||
umulh x24, x3, x10
|
||||
|
||||
// A0 * B1
|
||||
mul x22, x3, x11
|
||||
umulh x23, x3, x11
|
||||
|
||||
// A1 * B0
|
||||
mul x8, x4, x10
|
||||
umulh x9, x4, x10
|
||||
adds x22, x22, x24
|
||||
adc x23, x23, xzr
|
||||
|
||||
// A0 * B2
|
||||
mul x27, x3, x12
|
||||
umulh x28, x3, x12
|
||||
adds x22, x22, x8 // C1
|
||||
adcs x23, x23, x9
|
||||
adc x24, xzr, xzr
|
||||
|
||||
// A2 * B0
|
||||
mul x8, x5, x10
|
||||
umulh x25, x5, x10
|
||||
adds x23, x23, x27
|
||||
adcs x24, x24, x25
|
||||
adc x25, xzr, xzr
|
||||
|
||||
// A1 * B1
|
||||
mul x27, x4, x11
|
||||
umulh x9, x4, x11
|
||||
adds x23, x23, x8
|
||||
adcs x24, x24, x28
|
||||
adc x25, x25, xzr
|
||||
|
||||
// A1 * B2
|
||||
mul x8, x4, x12
|
||||
umulh x28, x4, x12
|
||||
adds x23, x23, x27 // C2
|
||||
adcs x24, x24, x9
|
||||
adc x25, x25, xzr
|
||||
|
||||
// A2 * B1
|
||||
mul x27, x5, x11
|
||||
umulh x9, x5, x11
|
||||
adds x24, x24, x8
|
||||
adcs x25, x25, x28
|
||||
adc x26, xzr, xzr
|
||||
|
||||
// A2 * B2
|
||||
mul x8, x5, x12
|
||||
umulh x28, x5, x12
|
||||
adds x24, x24, x27 // C3
|
||||
adcs x25, x25, x9
|
||||
adc x26, x26, xzr
|
||||
|
||||
adds x25, x25, x8 // C4
|
||||
adc x26, x26, x28 // C5
|
||||
|
||||
// Restore x8,x9
|
||||
ldp x8,x9, [x2,#32]
|
||||
|
||||
neg x1, x1
|
||||
|
||||
// x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x8, x8, x21
|
||||
sbcs x9, x9, x22
|
||||
sbcs x19, x19, x23
|
||||
sbcs x20, x20, x24
|
||||
sbcs x14, x14, x25
|
||||
sbcs x15, x15, x26
|
||||
sbcs x16, x16, xzr
|
||||
sbcs x17, x17, xzr
|
||||
sbc x7, x7, xzr
|
||||
|
||||
// Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
|
||||
stp x8, x9, [x2,#32]
|
||||
stp x19, x20, [x2,#48]
|
||||
|
||||
adds x1, x1, #1
|
||||
adcs x14, x14, x21
|
||||
adcs x15, x15, x22
|
||||
adcs x16, x16, x23
|
||||
adcs x17, x17, x24
|
||||
adcs x25, x7, x25
|
||||
adc x26, x26, xzr
|
||||
|
||||
stp x14, x15, [x2,#64]
|
||||
stp x16, x17, [x2,#80]
|
||||
stp x25, x26, [x2,#96]
|
||||
|
||||
ldp x19, x20, [x29,#16]
|
||||
ldp x21, x22, [x29,#32]
|
||||
ldp x23, x24, [x29,#48]
|
||||
ldp x25, x26, [x29,#64]
|
||||
ldp x27, x28, [x29,#80]
|
||||
ldp x29, x30, [sp],#96
|
||||
ret
|
||||
.globl sike_fprdc
|
||||
.hidden sike_fprdc
|
||||
.align 4
|
||||
sike_fprdc:
|
||||
stp x29, x30, [sp, #-96]!
|
||||
add x29, sp, xzr
|
||||
stp x19, x20, [sp,#16]
|
||||
stp x21, x22, [sp,#32]
|
||||
stp x23, x24, [sp,#48]
|
||||
stp x25, x26, [sp,#64]
|
||||
stp x27, x28, [sp,#80]
|
||||
|
||||
ldp x2, x3, [x0,#0] // a[0-1]
|
||||
|
||||
// Load the prime constant
|
||||
adrp x26, .Lp434p1
|
||||
add x26, x26, :lo12:.Lp434p1
|
||||
ldp x23, x24, [x26, #0x0]
|
||||
ldp x25, x26, [x26,#0x10]
|
||||
|
||||
// a[0-1] * p434+1
|
||||
mul x4, x2, x23 // C0
|
||||
umulh x7, x2, x23
|
||||
|
||||
mul x5, x2, x24
|
||||
umulh x6, x2, x24
|
||||
|
||||
mul x10, x3, x23
|
||||
umulh x11, x3, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x2, x25
|
||||
umulh x28, x2, x25
|
||||
adds x5, x5, x10 // C1
|
||||
adcs x6, x6, x11
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x10, x3, x24
|
||||
umulh x11, x3, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x2, x26
|
||||
umulh x28, x2, x26
|
||||
adds x6, x6, x10 // C2
|
||||
adcs x7, x7, x11
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x10, x3, x25
|
||||
umulh x11, x3, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x3, x26
|
||||
umulh x28, x3, x26
|
||||
adds x7, x7, x10 // C3
|
||||
adcs x8, x8, x11
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
|
||||
ldp x10, x11, [x0, #0x18]
|
||||
ldp x12, x13, [x0, #0x28]
|
||||
ldp x14, x15, [x0, #0x38]
|
||||
ldp x16, x17, [x0, #0x48]
|
||||
ldp x19, x20, [x0, #0x58]
|
||||
ldr x21, [x0, #0x68]
|
||||
|
||||
adds x10, x10, x4
|
||||
adcs x11, x11, x5
|
||||
adcs x12, x12, x6
|
||||
adcs x13, x13, x7
|
||||
adcs x14, x14, x8
|
||||
adcs x15, x15, x9
|
||||
adcs x22, x16, xzr
|
||||
adcs x17, x17, xzr
|
||||
adcs x19, x19, xzr
|
||||
adcs x20, x20, xzr
|
||||
adc x21, x21, xzr
|
||||
|
||||
ldr x2, [x0,#0x10] // a[2]
|
||||
// a[2-3] * p434+1
|
||||
mul x4, x2, x23 // C0
|
||||
umulh x7, x2, x23
|
||||
|
||||
mul x5, x2, x24
|
||||
umulh x6, x2, x24
|
||||
|
||||
mul x0, x10, x23
|
||||
umulh x3, x10, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x2, x25
|
||||
umulh x28, x2, x25
|
||||
adds x5, x5, x0 // C1
|
||||
adcs x6, x6, x3
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x0, x10, x24
|
||||
umulh x3, x10, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x2, x26
|
||||
umulh x28, x2, x26
|
||||
adds x6, x6, x0 // C2
|
||||
adcs x7, x7, x3
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x0, x10, x25
|
||||
umulh x3, x10, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x10, x26
|
||||
umulh x28, x10, x26
|
||||
adds x7, x7, x0 // C3
|
||||
adcs x8, x8, x3
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
|
||||
adds x12, x12, x4
|
||||
adcs x13, x13, x5
|
||||
adcs x14, x14, x6
|
||||
adcs x15, x15, x7
|
||||
adcs x16, x22, x8
|
||||
adcs x17, x17, x9
|
||||
adcs x22, x19, xzr
|
||||
adcs x20, x20, xzr
|
||||
adc x21, x21, xzr
|
||||
|
||||
mul x4, x11, x23 // C0
|
||||
umulh x7, x11, x23
|
||||
|
||||
mul x5, x11, x24
|
||||
umulh x6, x11, x24
|
||||
|
||||
mul x10, x12, x23
|
||||
umulh x3, x12, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x11, x25
|
||||
umulh x28, x11, x25
|
||||
adds x5, x5, x10 // C1
|
||||
adcs x6, x6, x3
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x10, x12, x24
|
||||
umulh x3, x12, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x11, x26
|
||||
umulh x28, x11, x26
|
||||
adds x6, x6, x10 // C2
|
||||
adcs x7, x7, x3
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x10, x12, x25
|
||||
umulh x3, x12, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x12, x26
|
||||
umulh x28, x12, x26
|
||||
adds x7, x7, x10 // C3
|
||||
adcs x8, x8, x3
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
adds x14, x14, x4
|
||||
adcs x15, x15, x5
|
||||
adcs x16, x16, x6
|
||||
adcs x17, x17, x7
|
||||
adcs x19, x22, x8
|
||||
adcs x20, x20, x9
|
||||
adc x22, x21, xzr
|
||||
|
||||
stp x14, x15, [x1, #0x0] // C0, C1
|
||||
|
||||
mul x4, x13, x23 // C0
|
||||
umulh x10, x13, x23
|
||||
|
||||
mul x5, x13, x24
|
||||
umulh x27, x13, x24
|
||||
adds x5, x5, x10 // C1
|
||||
adc x10, xzr, xzr
|
||||
|
||||
mul x6, x13, x25
|
||||
umulh x28, x13, x25
|
||||
adds x27, x10, x27
|
||||
adcs x6, x6, x27 // C2
|
||||
adc x10, xzr, xzr
|
||||
|
||||
mul x7, x13, x26
|
||||
umulh x8, x13, x26
|
||||
adds x28, x10, x28
|
||||
adcs x7, x7, x28 // C3
|
||||
adc x8, x8, xzr // C4
|
||||
|
||||
adds x16, x16, x4
|
||||
adcs x17, x17, x5
|
||||
adcs x19, x19, x6
|
||||
adcs x20, x20, x7
|
||||
adc x21, x22, x8
|
||||
|
||||
str x16, [x1, #0x10]
|
||||
stp x17, x19, [x1, #0x18]
|
||||
stp x20, x21, [x1, #0x28]
|
||||
|
||||
ldp x19, x20, [x29,#16]
|
||||
ldp x21, x22, [x29,#32]
|
||||
ldp x23, x24, [x29,#48]
|
||||
ldp x25, x26, [x29,#64]
|
||||
ldp x27, x28, [x29,#80]
|
||||
ldp x29, x30, [sp],#96
|
||||
ret
|
||||
.globl sike_fpadd
|
||||
.hidden sike_fpadd
|
||||
.align 4
|
||||
sike_fpadd:
|
||||
stp x29,x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
// Add a + b
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adc x9, x9, x17
|
||||
|
||||
// Subtract 2xp434
|
||||
adrp x17, .Lp434x2
|
||||
add x17, x17, :lo12:.Lp434x2
|
||||
ldp x11, x12, [x17, #0]
|
||||
ldp x13, x14, [x17, #16]
|
||||
ldp x15, x16, [x17, #32]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x12
|
||||
sbcs x6, x6, x13
|
||||
sbcs x7, x7, x14
|
||||
sbcs x8, x8, x15
|
||||
sbcs x9, x9, x16
|
||||
sbc x0, xzr, xzr // x0 can be reused now
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
and x11, x11, x0
|
||||
and x12, x12, x0
|
||||
and x13, x13, x0
|
||||
and x14, x14, x0
|
||||
and x15, x15, x0
|
||||
and x16, x16, x0
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adc x9, x9, x16
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl sike_fpsub
|
||||
.hidden sike_fpsub
|
||||
.align 4
|
||||
sike_fpsub:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
// Subtract a - b
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
sbcs x9, x9, x17
|
||||
sbc x0, xzr, xzr
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
adrp x17, .Lp434x2
|
||||
add x17, x17, :lo12:.Lp434x2
|
||||
|
||||
// First half
|
||||
ldp x11, x12, [x17, #0]
|
||||
ldp x13, x14, [x17, #16]
|
||||
ldp x15, x16, [x17, #32]
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
and x11, x11, x0
|
||||
and x12, x12, x0
|
||||
and x13, x13, x0
|
||||
and x14, x14, x0
|
||||
and x15, x15, x0
|
||||
and x16, x16, x0
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adc x9, x9, x16
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl sike_mpadd_asm
|
||||
.hidden sike_mpadd_asm
|
||||
.align 4
|
||||
sike_mpadd_asm:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adc x9, x9, x17
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl sike_mpsubx2_asm
|
||||
.hidden sike_mpsubx2_asm
|
||||
.align 4
|
||||
sike_mpsubx2_asm:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x11, x12, [x1,#32]
|
||||
ldp x13, x14, [x1,#48]
|
||||
sbcs x7, x7, x11
|
||||
sbcs x8, x8, x12
|
||||
sbcs x9, x9, x13
|
||||
sbcs x10, x10, x14
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
|
||||
ldp x3, x4, [x0,#64]
|
||||
ldp x5, x6, [x0,#80]
|
||||
ldp x11, x12, [x1,#64]
|
||||
ldp x13, x14, [x1,#80]
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#96]
|
||||
ldp x11, x12, [x1,#96]
|
||||
sbcs x7, x7, x11
|
||||
sbcs x8, x8, x12
|
||||
sbc x0, xzr, xzr
|
||||
|
||||
stp x3, x4, [x2,#64]
|
||||
stp x5, x6, [x2,#80]
|
||||
stp x7, x8, [x2,#96]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl sike_mpdblsubx2_asm
|
||||
.hidden sike_mpdblsubx2_asm
|
||||
.align 4
|
||||
sike_mpdblsubx2_asm:
|
||||
stp x29, x30, [sp, #-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x2, #0]
|
||||
ldp x5, x6, [x2,#16]
|
||||
ldp x7, x8, [x2,#32]
|
||||
|
||||
ldp x11, x12, [x0, #0]
|
||||
ldp x13, x14, [x0,#16]
|
||||
ldp x15, x16, [x0,#32]
|
||||
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
|
||||
// x9 stores carry
|
||||
adc x9, xzr, xzr
|
||||
|
||||
ldp x11, x12, [x1, #0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, x9, xzr
|
||||
|
||||
stp x3, x4, [x2, #0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
|
||||
ldp x3, x4, [x2,#48]
|
||||
ldp x5, x6, [x2,#64]
|
||||
ldp x7, x8, [x2,#80]
|
||||
|
||||
ldp x11, x12, [x0,#48]
|
||||
ldp x13, x14, [x0,#64]
|
||||
ldp x15, x16, [x0,#80]
|
||||
|
||||
// x9 = 2 - x9
|
||||
neg x9, x9
|
||||
add x9, x9, #2
|
||||
|
||||
subs x3, x3, x9
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, xzr, xzr
|
||||
|
||||
ldp x11, x12, [x1,#48]
|
||||
ldp x13, x14, [x1,#64]
|
||||
ldp x15, x16, [x1,#80]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, x9, xzr
|
||||
|
||||
stp x3, x4, [x2,#48]
|
||||
stp x5, x6, [x2,#64]
|
||||
stp x7, x8, [x2,#80]
|
||||
|
||||
ldp x3, x4, [x2,#96]
|
||||
ldp x11, x12, [x0,#96]
|
||||
ldp x13, x14, [x1,#96]
|
||||
|
||||
// x9 = 2 - x9
|
||||
neg x9, x9
|
||||
add x9, x9, #2
|
||||
|
||||
subs x3, x3, x9
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
subs x3, x3, x13
|
||||
sbc x4, x4, x14
|
||||
stp x3, x4, [x2,#96]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
|
@ -1,6 +1,24 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
|
||||
.arch armv7-a
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__) || defined(__clang__)
|
||||
.syntax unified
|
||||
|
@ -1471,3 +1489,5 @@ ChaCha20_neon:
|
|||
.comm OPENSSL_armcap_P,4,4
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
|
@ -1,4 +1,18 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
@
|
||||
@ Licensed under the OpenSSL license (the "License"). You may not use
|
||||
|
@ -45,6 +59,11 @@
|
|||
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
|
||||
#endif
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 AES
|
||||
@ instructions are in aesv8-armx.pl.)
|
||||
.arch armv7-a
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||
.syntax unified
|
||||
|
@ -160,23 +179,23 @@ AES_Te:
|
|||
.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
|
||||
.size AES_Te,.-AES_Te
|
||||
|
||||
@ void asm_AES_encrypt(const unsigned char *in, unsigned char *out,
|
||||
@ const AES_KEY *key) {
|
||||
.globl asm_AES_encrypt
|
||||
.hidden asm_AES_encrypt
|
||||
.type asm_AES_encrypt,%function
|
||||
@ void aes_nohw_encrypt(const unsigned char *in, unsigned char *out,
|
||||
@ const AES_KEY *key) {
|
||||
.globl aes_nohw_encrypt
|
||||
.hidden aes_nohw_encrypt
|
||||
.type aes_nohw_encrypt,%function
|
||||
.align 5
|
||||
asm_AES_encrypt:
|
||||
aes_nohw_encrypt:
|
||||
#ifndef __thumb2__
|
||||
sub r3,pc,#8 @ asm_AES_encrypt
|
||||
sub r3,pc,#8 @ aes_nohw_encrypt
|
||||
#else
|
||||
adr r3,.
|
||||
#endif
|
||||
stmdb sp!,{r1,r4-r12,lr}
|
||||
#ifdef __APPLE__
|
||||
#if defined(__thumb2__) || defined(__APPLE__)
|
||||
adr r10,AES_Te
|
||||
#else
|
||||
sub r10,r3,#asm_AES_encrypt-AES_Te @ Te
|
||||
sub r10,r3,#aes_nohw_encrypt-AES_Te @ Te
|
||||
#endif
|
||||
mov r12,r0 @ inp
|
||||
mov r11,r2
|
||||
|
@ -273,7 +292,7 @@ asm_AES_encrypt:
|
|||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
.size asm_AES_encrypt,.-asm_AES_encrypt
|
||||
.size aes_nohw_encrypt,.-aes_nohw_encrypt
|
||||
|
||||
.type _armv4_AES_encrypt,%function
|
||||
.align 2
|
||||
|
@ -412,14 +431,14 @@ _armv4_AES_encrypt:
|
|||
ldr pc,[sp],#4 @ pop and return
|
||||
.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
|
||||
|
||||
.globl asm_AES_set_encrypt_key
|
||||
.hidden asm_AES_set_encrypt_key
|
||||
.type asm_AES_set_encrypt_key,%function
|
||||
.globl aes_nohw_set_encrypt_key
|
||||
.hidden aes_nohw_set_encrypt_key
|
||||
.type aes_nohw_set_encrypt_key,%function
|
||||
.align 5
|
||||
asm_AES_set_encrypt_key:
|
||||
aes_nohw_set_encrypt_key:
|
||||
_armv4_AES_set_encrypt_key:
|
||||
#ifndef __thumb2__
|
||||
sub r3,pc,#8 @ asm_AES_set_encrypt_key
|
||||
sub r3,pc,#8 @ aes_nohw_set_encrypt_key
|
||||
#else
|
||||
adr r3,.
|
||||
#endif
|
||||
|
@ -452,7 +471,7 @@ _armv4_AES_set_encrypt_key:
|
|||
mov lr,r1 @ bits
|
||||
mov r11,r2 @ key
|
||||
|
||||
#ifdef __APPLE__
|
||||
#if defined(__thumb2__) || defined(__APPLE__)
|
||||
adr r10,AES_Te+1024 @ Te4
|
||||
#else
|
||||
sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4
|
||||
|
@ -717,23 +736,23 @@ _armv4_AES_set_encrypt_key:
|
|||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
.size asm_AES_set_encrypt_key,.-asm_AES_set_encrypt_key
|
||||
.size aes_nohw_set_encrypt_key,.-aes_nohw_set_encrypt_key
|
||||
|
||||
.globl asm_AES_set_decrypt_key
|
||||
.hidden asm_AES_set_decrypt_key
|
||||
.type asm_AES_set_decrypt_key,%function
|
||||
.globl aes_nohw_set_decrypt_key
|
||||
.hidden aes_nohw_set_decrypt_key
|
||||
.type aes_nohw_set_decrypt_key,%function
|
||||
.align 5
|
||||
asm_AES_set_decrypt_key:
|
||||
aes_nohw_set_decrypt_key:
|
||||
str lr,[sp,#-4]! @ push lr
|
||||
bl _armv4_AES_set_encrypt_key
|
||||
teq r0,#0
|
||||
ldr lr,[sp],#4 @ pop lr
|
||||
bne .Labrt
|
||||
|
||||
mov r0,r2 @ asm_AES_set_encrypt_key preserves r2,
|
||||
mov r0,r2 @ aes_nohw_set_encrypt_key preserves r2,
|
||||
mov r1,r2 @ which is AES_KEY *key
|
||||
b _armv4_AES_set_enc2dec_key
|
||||
.size asm_AES_set_decrypt_key,.-asm_AES_set_decrypt_key
|
||||
.size aes_nohw_set_decrypt_key,.-aes_nohw_set_decrypt_key
|
||||
|
||||
@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
|
||||
.globl AES_set_enc2dec_key
|
||||
|
@ -935,23 +954,23 @@ AES_Td:
|
|||
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
|
||||
.size AES_Td,.-AES_Td
|
||||
|
||||
@ void asm_AES_decrypt(const unsigned char *in, unsigned char *out,
|
||||
@ const AES_KEY *key) {
|
||||
.globl asm_AES_decrypt
|
||||
.hidden asm_AES_decrypt
|
||||
.type asm_AES_decrypt,%function
|
||||
@ void aes_nohw_decrypt(const unsigned char *in, unsigned char *out,
|
||||
@ const AES_KEY *key) {
|
||||
.globl aes_nohw_decrypt
|
||||
.hidden aes_nohw_decrypt
|
||||
.type aes_nohw_decrypt,%function
|
||||
.align 5
|
||||
asm_AES_decrypt:
|
||||
aes_nohw_decrypt:
|
||||
#ifndef __thumb2__
|
||||
sub r3,pc,#8 @ asm_AES_decrypt
|
||||
sub r3,pc,#8 @ aes_nohw_decrypt
|
||||
#else
|
||||
adr r3,.
|
||||
#endif
|
||||
stmdb sp!,{r1,r4-r12,lr}
|
||||
#ifdef __APPLE__
|
||||
#if defined(__thumb2__) || defined(__APPLE__)
|
||||
adr r10,AES_Td
|
||||
#else
|
||||
sub r10,r3,#asm_AES_decrypt-AES_Td @ Td
|
||||
sub r10,r3,#aes_nohw_decrypt-AES_Td @ Td
|
||||
#endif
|
||||
mov r12,r0 @ inp
|
||||
mov r11,r2
|
||||
|
@ -1048,7 +1067,7 @@ asm_AES_decrypt:
|
|||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
.size asm_AES_decrypt,.-asm_AES_decrypt
|
||||
.size aes_nohw_decrypt,.-aes_nohw_decrypt
|
||||
|
||||
.type _armv4_AES_decrypt,%function
|
||||
.align 2
|
||||
|
@ -1199,3 +1218,5 @@ _armv4_AES_decrypt:
|
|||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
|
@ -1,4 +1,18 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
|
@ -13,6 +27,8 @@
|
|||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
|
||||
.text
|
||||
|
||||
.globl aes_hw_set_encrypt_key
|
||||
.hidden aes_hw_set_encrypt_key
|
||||
.type aes_hw_set_encrypt_key,%function
|
||||
|
@ -761,3 +777,5 @@ aes_hw_ctr32_encrypt_blocks:
|
|||
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
|
@ -1,6 +1,24 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
|
||||
.arch armv7-a
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__)
|
||||
.syntax unified
|
||||
|
@ -167,14 +185,15 @@ bn_mul_mont:
|
|||
mov r4,sp @ "rewind" r4
|
||||
sub r2,r2,r5 @ "rewind" r2
|
||||
|
||||
and r1,r4,r14
|
||||
bic r3,r2,r14
|
||||
orr r1,r1,r3 @ ap=borrow?tp:rp
|
||||
|
||||
.Lcopy: ldr r7,[r1],#4 @ copy or in-place refresh
|
||||
.Lcopy: ldr r7,[r4] @ conditional copy
|
||||
ldr r5,[r2]
|
||||
str sp,[r4],#4 @ zap tp
|
||||
str r7,[r2],#4
|
||||
cmp r4,r0
|
||||
#ifdef __thumb2__
|
||||
it cc
|
||||
#endif
|
||||
movcc r5,r7
|
||||
str r5,[r2],#4
|
||||
teq r4,r0 @ preserve carry
|
||||
bne .Lcopy
|
||||
|
||||
mov sp,r0
|
||||
|
@ -954,3 +973,5 @@ bn_mul8x_mont_neon:
|
|||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,9 +1,30 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
|
||||
@ instructions are in aesv8-armx.pl.)
|
||||
.arch armv7-a
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__) || defined(__clang__)
|
||||
.syntax unified
|
||||
#define ldrplb ldrbpl
|
||||
#define ldrneb ldrbne
|
||||
#endif
|
||||
#if defined(__thumb2__)
|
||||
.thumb
|
||||
|
@ -11,11 +32,6 @@
|
|||
.code 32
|
||||
#endif
|
||||
|
||||
#ifdef __clang__
|
||||
#define ldrplb ldrbpl
|
||||
#define ldrneb ldrbne
|
||||
#endif
|
||||
|
||||
.type rem_4bit,%object
|
||||
.align 5
|
||||
rem_4bit:
|
||||
|
@ -571,3 +587,5 @@ gcm_ghash_neon:
|
|||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
|
@ -1,4 +1,18 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
@ -109,13 +123,13 @@ gcm_ghash_v8:
|
|||
@ loaded value would have
|
||||
@ to be rotated in order to
|
||||
@ make it appear as in
|
||||
@ alorithm specification
|
||||
@ algorithm specification
|
||||
subs r3,r3,#32 @ see if r3 is 32 or larger
|
||||
mov r12,#16 @ r12 is used as post-
|
||||
@ increment for input pointer;
|
||||
@ as loop is modulo-scheduled
|
||||
@ r12 is zeroed just in time
|
||||
@ to preclude oversteping
|
||||
@ to preclude overstepping
|
||||
@ inp[len], which means that
|
||||
@ last block[s] are actually
|
||||
@ loaded twice, but last
|
||||
|
@ -235,3 +249,5 @@ gcm_ghash_v8:
|
|||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
|
@ -1,4 +1,18 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
@ -1493,3 +1507,5 @@ sha1_block_data_order_armv8:
|
|||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
|
@ -1,4 +1,18 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
@
|
||||
@ Licensed under the OpenSSL license (the "License"). You may not use
|
||||
|
@ -51,6 +65,11 @@
|
|||
# define __ARM_MAX_ARCH__ 7
|
||||
#endif
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
|
||||
@ instructions are manually-encoded. (See unsha256.)
|
||||
.arch armv7-a
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__)
|
||||
.syntax unified
|
||||
|
@ -2816,3 +2835,5 @@ sha256_block_data_order_armv8:
|
|||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
|
@ -1,4 +1,18 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
@
|
||||
@ Licensed under the OpenSSL license (the "License"). You may not use
|
||||
|
@ -64,6 +78,10 @@
|
|||
# define VFP_ABI_POP
|
||||
#endif
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
|
||||
.arch armv7-a
|
||||
|
||||
#ifdef __ARMEL__
|
||||
# define LO 0
|
||||
# define HI 4
|
||||
|
@ -1872,3 +1890,5 @@ sha512_block_data_order_neon:
|
|||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,380 @@
|
|||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.syntax unified
|
||||
|
||||
.arch armv7-a
|
||||
.fpu vfp
|
||||
|
||||
.text
|
||||
|
||||
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
@ with |argv|, then saves the callee-saved registers into |state|. It returns
|
||||
@ the result of |func|. The |unwind| argument is unused.
|
||||
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
||||
@ const uint32_t *argv, size_t argc,
|
||||
@ int unwind);
|
||||
.type abi_test_trampoline, %function
|
||||
.globl abi_test_trampoline
|
||||
.hidden abi_test_trampoline
|
||||
.align 4
|
||||
abi_test_trampoline:
|
||||
.Labi_test_trampoline_begin:
|
||||
@ Save parameters and all callee-saved registers. For convenience, we
|
||||
@ save r9 on iOS even though it's volatile.
|
||||
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
|
||||
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
|
||||
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
|
||||
sub sp, sp, #28
|
||||
|
||||
@ Every register in AAPCS is either non-volatile or a parameter (except
|
||||
@ r9 on iOS), so this code, by the actual call, loses all its scratch
|
||||
@ registers. First fill in stack parameters while there are registers
|
||||
@ to spare.
|
||||
cmp r3, #4
|
||||
bls .Lstack_args_done
|
||||
mov r4, sp @ r4 is the output pointer.
|
||||
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
|
||||
add r2, r2, #16 @ Skip four arguments.
|
||||
.Lstack_args_loop:
|
||||
ldr r6, [r2], #4
|
||||
cmp r2, r5
|
||||
str r6, [r4], #4
|
||||
bne .Lstack_args_loop
|
||||
|
||||
.Lstack_args_done:
|
||||
@ Load registers from |r1|.
|
||||
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Load register parameters. This uses up our remaining registers, so we
|
||||
@ repurpose lr as scratch space.
|
||||
ldr r3, [sp, #40] @ Reload argc.
|
||||
ldr lr, [sp, #36] @ .Load argv into lr.
|
||||
cmp r3, #3
|
||||
bhi .Larg_r3
|
||||
beq .Larg_r2
|
||||
cmp r3, #1
|
||||
bhi .Larg_r1
|
||||
beq .Larg_r0
|
||||
b .Largs_done
|
||||
|
||||
.Larg_r3:
|
||||
ldr r3, [lr, #12] @ argv[3]
|
||||
.Larg_r2:
|
||||
ldr r2, [lr, #8] @ argv[2]
|
||||
.Larg_r1:
|
||||
ldr r1, [lr, #4] @ argv[1]
|
||||
.Larg_r0:
|
||||
ldr r0, [lr] @ argv[0]
|
||||
.Largs_done:
|
||||
|
||||
@ With every other register in use, load the function pointer into lr
|
||||
@ and call the function.
|
||||
ldr lr, [sp, #28]
|
||||
blx lr
|
||||
|
||||
@ r1-r3 are free for use again. The trampoline only supports
|
||||
@ single-return functions. Pass r4-r11 to the caller.
|
||||
ldr r1, [sp, #32]
|
||||
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Unwind the stack and restore registers.
|
||||
add sp, sp, #44 @ 44 = 28+16
|
||||
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
|
||||
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
|
||||
bx lr
|
||||
.size abi_test_trampoline,.-abi_test_trampoline
|
||||
.type abi_test_clobber_r0, %function
|
||||
.globl abi_test_clobber_r0
|
||||
.hidden abi_test_clobber_r0
|
||||
.align 4
|
||||
abi_test_clobber_r0:
|
||||
mov r0, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r0,.-abi_test_clobber_r0
|
||||
.type abi_test_clobber_r1, %function
|
||||
.globl abi_test_clobber_r1
|
||||
.hidden abi_test_clobber_r1
|
||||
.align 4
|
||||
abi_test_clobber_r1:
|
||||
mov r1, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r1,.-abi_test_clobber_r1
|
||||
.type abi_test_clobber_r2, %function
|
||||
.globl abi_test_clobber_r2
|
||||
.hidden abi_test_clobber_r2
|
||||
.align 4
|
||||
abi_test_clobber_r2:
|
||||
mov r2, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r2,.-abi_test_clobber_r2
|
||||
.type abi_test_clobber_r3, %function
|
||||
.globl abi_test_clobber_r3
|
||||
.hidden abi_test_clobber_r3
|
||||
.align 4
|
||||
abi_test_clobber_r3:
|
||||
mov r3, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r3,.-abi_test_clobber_r3
|
||||
.type abi_test_clobber_r4, %function
|
||||
.globl abi_test_clobber_r4
|
||||
.hidden abi_test_clobber_r4
|
||||
.align 4
|
||||
abi_test_clobber_r4:
|
||||
mov r4, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r4,.-abi_test_clobber_r4
|
||||
.type abi_test_clobber_r5, %function
|
||||
.globl abi_test_clobber_r5
|
||||
.hidden abi_test_clobber_r5
|
||||
.align 4
|
||||
abi_test_clobber_r5:
|
||||
mov r5, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r5,.-abi_test_clobber_r5
|
||||
.type abi_test_clobber_r6, %function
|
||||
.globl abi_test_clobber_r6
|
||||
.hidden abi_test_clobber_r6
|
||||
.align 4
|
||||
abi_test_clobber_r6:
|
||||
mov r6, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r6,.-abi_test_clobber_r6
|
||||
.type abi_test_clobber_r7, %function
|
||||
.globl abi_test_clobber_r7
|
||||
.hidden abi_test_clobber_r7
|
||||
.align 4
|
||||
abi_test_clobber_r7:
|
||||
mov r7, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r7,.-abi_test_clobber_r7
|
||||
.type abi_test_clobber_r8, %function
|
||||
.globl abi_test_clobber_r8
|
||||
.hidden abi_test_clobber_r8
|
||||
.align 4
|
||||
abi_test_clobber_r8:
|
||||
mov r8, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r8,.-abi_test_clobber_r8
|
||||
.type abi_test_clobber_r9, %function
|
||||
.globl abi_test_clobber_r9
|
||||
.hidden abi_test_clobber_r9
|
||||
.align 4
|
||||
abi_test_clobber_r9:
|
||||
mov r9, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r9,.-abi_test_clobber_r9
|
||||
.type abi_test_clobber_r10, %function
|
||||
.globl abi_test_clobber_r10
|
||||
.hidden abi_test_clobber_r10
|
||||
.align 4
|
||||
abi_test_clobber_r10:
|
||||
mov r10, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r10,.-abi_test_clobber_r10
|
||||
.type abi_test_clobber_r11, %function
|
||||
.globl abi_test_clobber_r11
|
||||
.hidden abi_test_clobber_r11
|
||||
.align 4
|
||||
abi_test_clobber_r11:
|
||||
mov r11, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r11,.-abi_test_clobber_r11
|
||||
.type abi_test_clobber_r12, %function
|
||||
.globl abi_test_clobber_r12
|
||||
.hidden abi_test_clobber_r12
|
||||
.align 4
|
||||
abi_test_clobber_r12:
|
||||
mov r12, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r12,.-abi_test_clobber_r12
|
||||
.type abi_test_clobber_d0, %function
|
||||
.globl abi_test_clobber_d0
|
||||
.hidden abi_test_clobber_d0
|
||||
.align 4
|
||||
abi_test_clobber_d0:
|
||||
mov r0, #0
|
||||
vmov s0, r0
|
||||
vmov s1, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d0,.-abi_test_clobber_d0
|
||||
.type abi_test_clobber_d1, %function
|
||||
.globl abi_test_clobber_d1
|
||||
.hidden abi_test_clobber_d1
|
||||
.align 4
|
||||
abi_test_clobber_d1:
|
||||
mov r0, #0
|
||||
vmov s2, r0
|
||||
vmov s3, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d1,.-abi_test_clobber_d1
|
||||
.type abi_test_clobber_d2, %function
|
||||
.globl abi_test_clobber_d2
|
||||
.hidden abi_test_clobber_d2
|
||||
.align 4
|
||||
abi_test_clobber_d2:
|
||||
mov r0, #0
|
||||
vmov s4, r0
|
||||
vmov s5, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d2,.-abi_test_clobber_d2
|
||||
.type abi_test_clobber_d3, %function
|
||||
.globl abi_test_clobber_d3
|
||||
.hidden abi_test_clobber_d3
|
||||
.align 4
|
||||
abi_test_clobber_d3:
|
||||
mov r0, #0
|
||||
vmov s6, r0
|
||||
vmov s7, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d3,.-abi_test_clobber_d3
|
||||
.type abi_test_clobber_d4, %function
|
||||
.globl abi_test_clobber_d4
|
||||
.hidden abi_test_clobber_d4
|
||||
.align 4
|
||||
abi_test_clobber_d4:
|
||||
mov r0, #0
|
||||
vmov s8, r0
|
||||
vmov s9, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d4,.-abi_test_clobber_d4
|
||||
.type abi_test_clobber_d5, %function
|
||||
.globl abi_test_clobber_d5
|
||||
.hidden abi_test_clobber_d5
|
||||
.align 4
|
||||
abi_test_clobber_d5:
|
||||
mov r0, #0
|
||||
vmov s10, r0
|
||||
vmov s11, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d5,.-abi_test_clobber_d5
|
||||
.type abi_test_clobber_d6, %function
|
||||
.globl abi_test_clobber_d6
|
||||
.hidden abi_test_clobber_d6
|
||||
.align 4
|
||||
abi_test_clobber_d6:
|
||||
mov r0, #0
|
||||
vmov s12, r0
|
||||
vmov s13, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d6,.-abi_test_clobber_d6
|
||||
.type abi_test_clobber_d7, %function
|
||||
.globl abi_test_clobber_d7
|
||||
.hidden abi_test_clobber_d7
|
||||
.align 4
|
||||
abi_test_clobber_d7:
|
||||
mov r0, #0
|
||||
vmov s14, r0
|
||||
vmov s15, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d7,.-abi_test_clobber_d7
|
||||
.type abi_test_clobber_d8, %function
|
||||
.globl abi_test_clobber_d8
|
||||
.hidden abi_test_clobber_d8
|
||||
.align 4
|
||||
abi_test_clobber_d8:
|
||||
mov r0, #0
|
||||
vmov s16, r0
|
||||
vmov s17, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d8,.-abi_test_clobber_d8
|
||||
.type abi_test_clobber_d9, %function
|
||||
.globl abi_test_clobber_d9
|
||||
.hidden abi_test_clobber_d9
|
||||
.align 4
|
||||
abi_test_clobber_d9:
|
||||
mov r0, #0
|
||||
vmov s18, r0
|
||||
vmov s19, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d9,.-abi_test_clobber_d9
|
||||
.type abi_test_clobber_d10, %function
|
||||
.globl abi_test_clobber_d10
|
||||
.hidden abi_test_clobber_d10
|
||||
.align 4
|
||||
abi_test_clobber_d10:
|
||||
mov r0, #0
|
||||
vmov s20, r0
|
||||
vmov s21, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d10,.-abi_test_clobber_d10
|
||||
.type abi_test_clobber_d11, %function
|
||||
.globl abi_test_clobber_d11
|
||||
.hidden abi_test_clobber_d11
|
||||
.align 4
|
||||
abi_test_clobber_d11:
|
||||
mov r0, #0
|
||||
vmov s22, r0
|
||||
vmov s23, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d11,.-abi_test_clobber_d11
|
||||
.type abi_test_clobber_d12, %function
|
||||
.globl abi_test_clobber_d12
|
||||
.hidden abi_test_clobber_d12
|
||||
.align 4
|
||||
abi_test_clobber_d12:
|
||||
mov r0, #0
|
||||
vmov s24, r0
|
||||
vmov s25, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d12,.-abi_test_clobber_d12
|
||||
.type abi_test_clobber_d13, %function
|
||||
.globl abi_test_clobber_d13
|
||||
.hidden abi_test_clobber_d13
|
||||
.align 4
|
||||
abi_test_clobber_d13:
|
||||
mov r0, #0
|
||||
vmov s26, r0
|
||||
vmov s27, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d13,.-abi_test_clobber_d13
|
||||
.type abi_test_clobber_d14, %function
|
||||
.globl abi_test_clobber_d14
|
||||
.hidden abi_test_clobber_d14
|
||||
.align 4
|
||||
abi_test_clobber_d14:
|
||||
mov r0, #0
|
||||
vmov s28, r0
|
||||
vmov s29, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d14,.-abi_test_clobber_d14
|
||||
.type abi_test_clobber_d15, %function
|
||||
.globl abi_test_clobber_d15
|
||||
.hidden abi_test_clobber_d15
|
||||
.align 4
|
||||
abi_test_clobber_d15:
|
||||
mov r0, #0
|
||||
vmov s30, r0
|
||||
vmov s31, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d15,.-abi_test_clobber_d15
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
3670
packager/third_party/boringssl/linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
vendored
Normal file
3670
packager/third_party/boringssl/linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
587
packager/third_party/boringssl/linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
vendored
Normal file
587
packager/third_party/boringssl/linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
vendored
Normal file
|
@ -0,0 +1,587 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(__powerpc64__)
|
||||
.machine "any"
|
||||
|
||||
.abiversion 2
|
||||
.text
|
||||
|
||||
.globl gcm_init_p8
|
||||
.type gcm_init_p8,@function
|
||||
.align 5
|
||||
gcm_init_p8:
|
||||
.localentry gcm_init_p8,0
|
||||
|
||||
li 0,-4096
|
||||
li 8,0x10
|
||||
li 12,-1
|
||||
li 9,0x20
|
||||
or 0,0,0
|
||||
li 10,0x30
|
||||
.long 0x7D202699
|
||||
|
||||
vspltisb 8,-16
|
||||
vspltisb 5,1
|
||||
vaddubm 8,8,8
|
||||
vxor 4,4,4
|
||||
vor 8,8,5
|
||||
vsldoi 8,8,4,15
|
||||
vsldoi 6,4,5,1
|
||||
vaddubm 8,8,8
|
||||
vspltisb 7,7
|
||||
vor 8,8,6
|
||||
vspltb 6,9,0
|
||||
vsl 9,9,5
|
||||
vsrab 6,6,7
|
||||
vand 6,6,8
|
||||
vxor 3,9,6
|
||||
|
||||
vsldoi 9,3,3,8
|
||||
vsldoi 8,4,8,8
|
||||
vsldoi 11,4,9,8
|
||||
vsldoi 10,9,4,8
|
||||
|
||||
.long 0x7D001F99
|
||||
.long 0x7D681F99
|
||||
li 8,0x40
|
||||
.long 0x7D291F99
|
||||
li 9,0x50
|
||||
.long 0x7D4A1F99
|
||||
li 10,0x60
|
||||
|
||||
.long 0x10035CC8
|
||||
.long 0x10234CC8
|
||||
.long 0x104354C8
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
vxor 16,0,6
|
||||
|
||||
vsldoi 17,16,16,8
|
||||
vsldoi 19,4,17,8
|
||||
vsldoi 18,17,4,8
|
||||
|
||||
.long 0x7E681F99
|
||||
li 8,0x70
|
||||
.long 0x7E291F99
|
||||
li 9,0x80
|
||||
.long 0x7E4A1F99
|
||||
li 10,0x90
|
||||
.long 0x10039CC8
|
||||
.long 0x11B09CC8
|
||||
.long 0x10238CC8
|
||||
.long 0x11D08CC8
|
||||
.long 0x104394C8
|
||||
.long 0x11F094C8
|
||||
|
||||
.long 0x10E044C8
|
||||
.long 0x114D44C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vsldoi 11,14,4,8
|
||||
vsldoi 9,4,14,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
vxor 13,13,11
|
||||
vxor 15,15,9
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vsldoi 13,13,13,8
|
||||
vxor 0,0,7
|
||||
vxor 13,13,10
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
vsldoi 9,13,13,8
|
||||
.long 0x100044C8
|
||||
.long 0x11AD44C8
|
||||
vxor 6,6,2
|
||||
vxor 9,9,15
|
||||
vxor 0,0,6
|
||||
vxor 13,13,9
|
||||
|
||||
vsldoi 9,0,0,8
|
||||
vsldoi 17,13,13,8
|
||||
vsldoi 11,4,9,8
|
||||
vsldoi 10,9,4,8
|
||||
vsldoi 19,4,17,8
|
||||
vsldoi 18,17,4,8
|
||||
|
||||
.long 0x7D681F99
|
||||
li 8,0xa0
|
||||
.long 0x7D291F99
|
||||
li 9,0xb0
|
||||
.long 0x7D4A1F99
|
||||
li 10,0xc0
|
||||
.long 0x7E681F99
|
||||
.long 0x7E291F99
|
||||
.long 0x7E4A1F99
|
||||
|
||||
or 12,12,12
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size gcm_init_p8,.-gcm_init_p8
|
||||
.globl gcm_gmult_p8
|
||||
.type gcm_gmult_p8,@function
|
||||
.align 5
|
||||
gcm_gmult_p8:
|
||||
.localentry gcm_gmult_p8,0
|
||||
|
||||
lis 0,0xfff8
|
||||
li 8,0x10
|
||||
li 12,-1
|
||||
li 9,0x20
|
||||
or 0,0,0
|
||||
li 10,0x30
|
||||
.long 0x7C601E99
|
||||
|
||||
.long 0x7D682699
|
||||
lvsl 12,0,0
|
||||
.long 0x7D292699
|
||||
vspltisb 5,0x07
|
||||
.long 0x7D4A2699
|
||||
vxor 12,12,5
|
||||
.long 0x7D002699
|
||||
vperm 3,3,3,12
|
||||
vxor 4,4,4
|
||||
|
||||
.long 0x10035CC8
|
||||
.long 0x10234CC8
|
||||
.long 0x104354C8
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
vxor 0,0,6
|
||||
|
||||
vperm 0,0,0,12
|
||||
.long 0x7C001F99
|
||||
|
||||
or 12,12,12
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size gcm_gmult_p8,.-gcm_gmult_p8
|
||||
|
||||
.globl gcm_ghash_p8
|
||||
.type gcm_ghash_p8,@function
|
||||
.align 5
|
||||
gcm_ghash_p8:
|
||||
.localentry gcm_ghash_p8,0
|
||||
|
||||
li 0,-4096
|
||||
li 8,0x10
|
||||
li 12,-1
|
||||
li 9,0x20
|
||||
or 0,0,0
|
||||
li 10,0x30
|
||||
.long 0x7C001E99
|
||||
|
||||
.long 0x7D682699
|
||||
li 8,0x40
|
||||
lvsl 12,0,0
|
||||
.long 0x7D292699
|
||||
li 9,0x50
|
||||
vspltisb 5,0x07
|
||||
.long 0x7D4A2699
|
||||
li 10,0x60
|
||||
vxor 12,12,5
|
||||
.long 0x7D002699
|
||||
vperm 0,0,0,12
|
||||
vxor 4,4,4
|
||||
|
||||
cmpldi 6,64
|
||||
bge .Lgcm_ghash_p8_4x
|
||||
|
||||
.long 0x7C602E99
|
||||
addi 5,5,16
|
||||
subic. 6,6,16
|
||||
vperm 3,3,3,12
|
||||
vxor 3,3,0
|
||||
beq .Lshort
|
||||
|
||||
.long 0x7E682699
|
||||
li 8,16
|
||||
.long 0x7E292699
|
||||
add 9,5,6
|
||||
.long 0x7E4A2699
|
||||
|
||||
|
||||
.align 5
|
||||
.Loop_2x:
|
||||
.long 0x7E002E99
|
||||
vperm 16,16,16,12
|
||||
|
||||
subic 6,6,32
|
||||
.long 0x10039CC8
|
||||
.long 0x11B05CC8
|
||||
subfe 0,0,0
|
||||
.long 0x10238CC8
|
||||
.long 0x11D04CC8
|
||||
and 0,0,6
|
||||
.long 0x104394C8
|
||||
.long 0x11F054C8
|
||||
add 5,5,0
|
||||
|
||||
vxor 0,0,13
|
||||
vxor 1,1,14
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 2,2,15
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
.long 0x7C682E99
|
||||
addi 5,5,32
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vperm 3,3,3,12
|
||||
vxor 6,6,2
|
||||
vxor 3,3,6
|
||||
vxor 3,3,0
|
||||
cmpld 9,5
|
||||
bgt .Loop_2x
|
||||
|
||||
cmplwi 6,0
|
||||
bne .Leven
|
||||
|
||||
.Lshort:
|
||||
.long 0x10035CC8
|
||||
.long 0x10234CC8
|
||||
.long 0x104354C8
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
|
||||
.Leven:
|
||||
vxor 0,0,6
|
||||
vperm 0,0,0,12
|
||||
.long 0x7C001F99
|
||||
|
||||
or 12,12,12
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,4,0
|
||||
.long 0
|
||||
.align 5
|
||||
.gcm_ghash_p8_4x:
|
||||
.Lgcm_ghash_p8_4x:
|
||||
stdu 1,-256(1)
|
||||
li 10,63
|
||||
li 11,79
|
||||
stvx 20,10,1
|
||||
addi 10,10,32
|
||||
stvx 21,11,1
|
||||
addi 11,11,32
|
||||
stvx 22,10,1
|
||||
addi 10,10,32
|
||||
stvx 23,11,1
|
||||
addi 11,11,32
|
||||
stvx 24,10,1
|
||||
addi 10,10,32
|
||||
stvx 25,11,1
|
||||
addi 11,11,32
|
||||
stvx 26,10,1
|
||||
addi 10,10,32
|
||||
stvx 27,11,1
|
||||
addi 11,11,32
|
||||
stvx 28,10,1
|
||||
addi 10,10,32
|
||||
stvx 29,11,1
|
||||
addi 11,11,32
|
||||
stvx 30,10,1
|
||||
li 10,0x60
|
||||
stvx 31,11,1
|
||||
li 0,-1
|
||||
stw 12,252(1)
|
||||
or 0,0,0
|
||||
|
||||
lvsl 5,0,8
|
||||
|
||||
li 8,0x70
|
||||
.long 0x7E292699
|
||||
li 9,0x80
|
||||
vspltisb 6,8
|
||||
|
||||
li 10,0x90
|
||||
.long 0x7EE82699
|
||||
li 8,0xa0
|
||||
.long 0x7F092699
|
||||
li 9,0xb0
|
||||
.long 0x7F2A2699
|
||||
li 10,0xc0
|
||||
.long 0x7FA82699
|
||||
li 8,0x10
|
||||
.long 0x7FC92699
|
||||
li 9,0x20
|
||||
.long 0x7FEA2699
|
||||
li 10,0x30
|
||||
|
||||
vsldoi 7,4,6,8
|
||||
vaddubm 18,5,7
|
||||
vaddubm 19,6,18
|
||||
|
||||
srdi 6,6,4
|
||||
|
||||
.long 0x7C602E99
|
||||
.long 0x7E082E99
|
||||
subic. 6,6,8
|
||||
.long 0x7EC92E99
|
||||
.long 0x7F8A2E99
|
||||
addi 5,5,0x40
|
||||
vperm 3,3,3,12
|
||||
vperm 16,16,16,12
|
||||
vperm 22,22,22,12
|
||||
vperm 28,28,28,12
|
||||
|
||||
vxor 2,3,0
|
||||
|
||||
.long 0x11B0BCC8
|
||||
.long 0x11D0C4C8
|
||||
.long 0x11F0CCC8
|
||||
|
||||
vperm 11,17,9,18
|
||||
vperm 5,22,28,19
|
||||
vperm 10,17,9,19
|
||||
vperm 6,22,28,18
|
||||
.long 0x12B68CC8
|
||||
.long 0x12855CC8
|
||||
.long 0x137C4CC8
|
||||
.long 0x134654C8
|
||||
|
||||
vxor 21,21,14
|
||||
vxor 20,20,13
|
||||
vxor 27,27,21
|
||||
vxor 26,26,15
|
||||
|
||||
blt .Ltail_4x
|
||||
|
||||
.Loop_4x:
|
||||
.long 0x7C602E99
|
||||
.long 0x7E082E99
|
||||
subic. 6,6,4
|
||||
.long 0x7EC92E99
|
||||
.long 0x7F8A2E99
|
||||
addi 5,5,0x40
|
||||
vperm 16,16,16,12
|
||||
vperm 22,22,22,12
|
||||
vperm 28,28,28,12
|
||||
vperm 3,3,3,12
|
||||
|
||||
.long 0x1002ECC8
|
||||
.long 0x1022F4C8
|
||||
.long 0x1042FCC8
|
||||
.long 0x11B0BCC8
|
||||
.long 0x11D0C4C8
|
||||
.long 0x11F0CCC8
|
||||
|
||||
vxor 0,0,20
|
||||
vxor 1,1,27
|
||||
vxor 2,2,26
|
||||
vperm 5,22,28,19
|
||||
vperm 6,22,28,18
|
||||
|
||||
.long 0x10E044C8
|
||||
.long 0x12855CC8
|
||||
.long 0x134654C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x12B68CC8
|
||||
.long 0x137C4CC8
|
||||
.long 0x100044C8
|
||||
|
||||
vxor 20,20,13
|
||||
vxor 26,26,15
|
||||
vxor 2,2,3
|
||||
vxor 21,21,14
|
||||
vxor 2,2,6
|
||||
vxor 27,27,21
|
||||
vxor 2,2,0
|
||||
bge .Loop_4x
|
||||
|
||||
.Ltail_4x:
|
||||
.long 0x1002ECC8
|
||||
.long 0x1022F4C8
|
||||
.long 0x1042FCC8
|
||||
|
||||
vxor 0,0,20
|
||||
vxor 1,1,27
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 2,2,26
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
vxor 0,0,6
|
||||
|
||||
addic. 6,6,4
|
||||
beq .Ldone_4x
|
||||
|
||||
.long 0x7C602E99
|
||||
cmpldi 6,2
|
||||
li 6,-4
|
||||
blt .Lone
|
||||
.long 0x7E082E99
|
||||
beq .Ltwo
|
||||
|
||||
.Lthree:
|
||||
.long 0x7EC92E99
|
||||
vperm 3,3,3,12
|
||||
vperm 16,16,16,12
|
||||
vperm 22,22,22,12
|
||||
|
||||
vxor 2,3,0
|
||||
vor 29,23,23
|
||||
vor 30, 24, 24
|
||||
vor 31,25,25
|
||||
|
||||
vperm 5,16,22,19
|
||||
vperm 6,16,22,18
|
||||
.long 0x12B08CC8
|
||||
.long 0x13764CC8
|
||||
.long 0x12855CC8
|
||||
.long 0x134654C8
|
||||
|
||||
vxor 27,27,21
|
||||
b .Ltail_4x
|
||||
|
||||
.align 4
|
||||
.Ltwo:
|
||||
vperm 3,3,3,12
|
||||
vperm 16,16,16,12
|
||||
|
||||
vxor 2,3,0
|
||||
vperm 5,4,16,19
|
||||
vperm 6,4,16,18
|
||||
|
||||
vsldoi 29,4,17,8
|
||||
vor 30, 17, 17
|
||||
vsldoi 31,17,4,8
|
||||
|
||||
.long 0x12855CC8
|
||||
.long 0x13704CC8
|
||||
.long 0x134654C8
|
||||
|
||||
b .Ltail_4x
|
||||
|
||||
.align 4
|
||||
.Lone:
|
||||
vperm 3,3,3,12
|
||||
|
||||
vsldoi 29,4,9,8
|
||||
vor 30, 9, 9
|
||||
vsldoi 31,9,4,8
|
||||
|
||||
vxor 2,3,0
|
||||
vxor 20,20,20
|
||||
vxor 27,27,27
|
||||
vxor 26,26,26
|
||||
|
||||
b .Ltail_4x
|
||||
|
||||
.Ldone_4x:
|
||||
vperm 0,0,0,12
|
||||
.long 0x7C001F99
|
||||
|
||||
li 10,63
|
||||
li 11,79
|
||||
or 12,12,12
|
||||
lvx 20,10,1
|
||||
addi 10,10,32
|
||||
lvx 21,11,1
|
||||
addi 11,11,32
|
||||
lvx 22,10,1
|
||||
addi 10,10,32
|
||||
lvx 23,11,1
|
||||
addi 11,11,32
|
||||
lvx 24,10,1
|
||||
addi 10,10,32
|
||||
lvx 25,11,1
|
||||
addi 11,11,32
|
||||
lvx 26,10,1
|
||||
addi 10,10,32
|
||||
lvx 27,11,1
|
||||
addi 11,11,32
|
||||
lvx 28,10,1
|
||||
addi 10,10,32
|
||||
lvx 29,11,1
|
||||
addi 11,11,32
|
||||
lvx 30,10,1
|
||||
lvx 31,11,1
|
||||
addi 1,1,256
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x04,0,0x80,0,4,0
|
||||
.long 0
|
||||
.size gcm_ghash_p8,.-gcm_ghash_p8
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM && __powerpc64__
|
||||
.section .note.GNU-stack,"",@progbits
|
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl ChaCha20_ctr32
|
||||
.hidden ChaCha20_ctr32
|
||||
|
@ -966,3 +972,4 @@ ChaCha20_ssse3:
|
|||
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
|
||||
.byte 114,103,62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.hidden _x86_AES_encrypt_compact
|
||||
.type _x86_AES_encrypt_compact,@function
|
||||
|
@ -979,12 +985,12 @@ _x86_AES_encrypt:
|
|||
.long 27,54,0,0
|
||||
.long 0,0,0,0
|
||||
.size _x86_AES_encrypt,.-_x86_AES_encrypt
|
||||
.globl asm_AES_encrypt
|
||||
.hidden asm_AES_encrypt
|
||||
.type asm_AES_encrypt,@function
|
||||
.globl aes_nohw_encrypt
|
||||
.hidden aes_nohw_encrypt
|
||||
.type aes_nohw_encrypt,@function
|
||||
.align 16
|
||||
asm_AES_encrypt:
|
||||
.L_asm_AES_encrypt_begin:
|
||||
aes_nohw_encrypt:
|
||||
.L_aes_nohw_encrypt_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
|
@ -1044,7 +1050,7 @@ asm_AES_encrypt:
|
|||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size asm_AES_encrypt,.-.L_asm_AES_encrypt_begin
|
||||
.size aes_nohw_encrypt,.-.L_aes_nohw_encrypt_begin
|
||||
.hidden _x86_AES_decrypt_compact
|
||||
.type _x86_AES_decrypt_compact,@function
|
||||
.align 16
|
||||
|
@ -2175,12 +2181,12 @@ _x86_AES_decrypt:
|
|||
.byte 23,43,4,126,186,119,214,38
|
||||
.byte 225,105,20,99,85,33,12,125
|
||||
.size _x86_AES_decrypt,.-_x86_AES_decrypt
|
||||
.globl asm_AES_decrypt
|
||||
.hidden asm_AES_decrypt
|
||||
.type asm_AES_decrypt,@function
|
||||
.globl aes_nohw_decrypt
|
||||
.hidden aes_nohw_decrypt
|
||||
.type aes_nohw_decrypt,@function
|
||||
.align 16
|
||||
asm_AES_decrypt:
|
||||
.L_asm_AES_decrypt_begin:
|
||||
aes_nohw_decrypt:
|
||||
.L_aes_nohw_decrypt_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
|
@ -2240,13 +2246,13 @@ asm_AES_decrypt:
|
|||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size asm_AES_decrypt,.-.L_asm_AES_decrypt_begin
|
||||
.globl asm_AES_cbc_encrypt
|
||||
.hidden asm_AES_cbc_encrypt
|
||||
.type asm_AES_cbc_encrypt,@function
|
||||
.size aes_nohw_decrypt,.-.L_aes_nohw_decrypt_begin
|
||||
.globl aes_nohw_cbc_encrypt
|
||||
.hidden aes_nohw_cbc_encrypt
|
||||
.type aes_nohw_cbc_encrypt,@function
|
||||
.align 16
|
||||
asm_AES_cbc_encrypt:
|
||||
.L_asm_AES_cbc_encrypt_begin:
|
||||
aes_nohw_cbc_encrypt:
|
||||
.L_aes_nohw_cbc_encrypt_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
|
@ -2774,7 +2780,7 @@ asm_AES_cbc_encrypt:
|
|||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size asm_AES_cbc_encrypt,.-.L_asm_AES_cbc_encrypt_begin
|
||||
.size aes_nohw_cbc_encrypt,.-.L_aes_nohw_cbc_encrypt_begin
|
||||
.hidden _x86_AES_set_encrypt_key
|
||||
.type _x86_AES_set_encrypt_key,@function
|
||||
.align 16
|
||||
|
@ -3006,21 +3012,21 @@ _x86_AES_set_encrypt_key:
|
|||
popl %ebp
|
||||
ret
|
||||
.size _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key
|
||||
.globl asm_AES_set_encrypt_key
|
||||
.hidden asm_AES_set_encrypt_key
|
||||
.type asm_AES_set_encrypt_key,@function
|
||||
.globl aes_nohw_set_encrypt_key
|
||||
.hidden aes_nohw_set_encrypt_key
|
||||
.type aes_nohw_set_encrypt_key,@function
|
||||
.align 16
|
||||
asm_AES_set_encrypt_key:
|
||||
.L_asm_AES_set_encrypt_key_begin:
|
||||
aes_nohw_set_encrypt_key:
|
||||
.L_aes_nohw_set_encrypt_key_begin:
|
||||
call _x86_AES_set_encrypt_key
|
||||
ret
|
||||
.size asm_AES_set_encrypt_key,.-.L_asm_AES_set_encrypt_key_begin
|
||||
.globl asm_AES_set_decrypt_key
|
||||
.hidden asm_AES_set_decrypt_key
|
||||
.type asm_AES_set_decrypt_key,@function
|
||||
.size aes_nohw_set_encrypt_key,.-.L_aes_nohw_set_encrypt_key_begin
|
||||
.globl aes_nohw_set_decrypt_key
|
||||
.hidden aes_nohw_set_decrypt_key
|
||||
.type aes_nohw_set_decrypt_key,@function
|
||||
.align 16
|
||||
asm_AES_set_decrypt_key:
|
||||
.L_asm_AES_set_decrypt_key_begin:
|
||||
aes_nohw_set_decrypt_key:
|
||||
.L_aes_nohw_set_decrypt_key_begin:
|
||||
call _x86_AES_set_encrypt_key
|
||||
cmpl $0,%eax
|
||||
je .L054proceed
|
||||
|
@ -3249,8 +3255,9 @@ asm_AES_set_decrypt_key:
|
|||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size asm_AES_set_decrypt_key,.-.L_asm_AES_set_decrypt_key_begin
|
||||
.size aes_nohw_set_decrypt_key,.-.L_aes_nohw_set_decrypt_key_begin
|
||||
.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
|
||||
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
|
||||
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl bn_mul_add_words
|
||||
.hidden bn_mul_add_words
|
||||
|
@ -1535,3 +1541,4 @@ bn_sub_part_words:
|
|||
ret
|
||||
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl bn_mul_comba8
|
||||
.hidden bn_mul_comba8
|
||||
|
@ -1257,3 +1263,4 @@ bn_sqr_comba4:
|
|||
ret
|
||||
.size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
294
packager/third_party/boringssl/linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S
vendored
Normal file
294
packager/third_party/boringssl/linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S
vendored
Normal file
|
@ -0,0 +1,294 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl gcm_gmult_ssse3
|
||||
.hidden gcm_gmult_ssse3
|
||||
.type gcm_gmult_ssse3,@function
|
||||
.align 16
|
||||
gcm_gmult_ssse3:
|
||||
.L_gcm_gmult_ssse3_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
movdqu (%edi),%xmm0
|
||||
call .L000pic_point
|
||||
.L000pic_point:
|
||||
popl %eax
|
||||
movdqa .Lreverse_bytes-.L000pic_point(%eax),%xmm7
|
||||
movdqa .Llow4_mask-.L000pic_point(%eax),%xmm2
|
||||
.byte 102,15,56,0,199
|
||||
movdqa %xmm2,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm2,%xmm0
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $5,%eax
|
||||
.L001loop_row_1:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L001loop_row_1
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $5,%eax
|
||||
.L002loop_row_2:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L002loop_row_2
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $6,%eax
|
||||
.L003loop_row_3:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L003loop_row_3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
.byte 102,15,56,0,215
|
||||
movdqu %xmm2,(%edi)
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size gcm_gmult_ssse3,.-.L_gcm_gmult_ssse3_begin
|
||||
.globl gcm_ghash_ssse3
|
||||
.hidden gcm_ghash_ssse3
|
||||
.type gcm_ghash_ssse3,@function
|
||||
.align 16
|
||||
gcm_ghash_ssse3:
|
||||
.L_gcm_ghash_ssse3_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%edx
|
||||
movl 32(%esp),%ecx
|
||||
movdqu (%edi),%xmm0
|
||||
call .L004pic_point
|
||||
.L004pic_point:
|
||||
popl %ebx
|
||||
movdqa .Lreverse_bytes-.L004pic_point(%ebx),%xmm7
|
||||
andl $-16,%ecx
|
||||
.byte 102,15,56,0,199
|
||||
pxor %xmm3,%xmm3
|
||||
.L005loop_ghash:
|
||||
movdqa .Llow4_mask-.L004pic_point(%ebx),%xmm2
|
||||
movdqu (%edx),%xmm1
|
||||
.byte 102,15,56,0,207
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm2,%xmm0
|
||||
pxor %xmm2,%xmm2
|
||||
movl $5,%eax
|
||||
.L006loop_row_4:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L006loop_row_4
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $5,%eax
|
||||
.L007loop_row_5:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L007loop_row_5
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $6,%eax
|
||||
.L008loop_row_6:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L008loop_row_6
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movdqa %xmm2,%xmm0
|
||||
leal -256(%esi),%esi
|
||||
leal 16(%edx),%edx
|
||||
subl $16,%ecx
|
||||
jnz .L005loop_ghash
|
||||
.byte 102,15,56,0,199
|
||||
movdqu %xmm0,(%edi)
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size gcm_ghash_ssse3,.-.L_gcm_ghash_ssse3_begin
|
||||
.align 16
|
||||
.Lreverse_bytes:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
.align 16
|
||||
.Llow4_mask:
|
||||
.long 252645135,252645135,252645135,252645135
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl gcm_gmult_4bit_mmx
|
||||
.hidden gcm_gmult_4bit_mmx
|
||||
|
@ -1066,3 +1072,4 @@ gcm_ghash_clmul:
|
|||
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
|
||||
.byte 0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl md5_block_asm_data_order
|
||||
.hidden md5_block_asm_data_order
|
||||
|
@ -679,3 +685,4 @@ md5_block_asm_data_order:
|
|||
ret
|
||||
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl sha1_block_data_order
|
||||
.hidden sha1_block_data_order
|
||||
|
@ -3799,3 +3805,4 @@ _sha1_block_data_order_avx:
|
|||
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
|
||||
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl sha256_block_data_order
|
||||
.hidden sha256_block_data_order
|
||||
|
@ -5558,3 +5564,4 @@ sha256_block_data_order:
|
|||
ret
|
||||
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl sha512_block_data_order
|
||||
.hidden sha512_block_data_order
|
||||
|
@ -2828,3 +2834,4 @@ sha512_block_data_order:
|
|||
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
|
||||
.byte 62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,5 +1,13 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
#endif
|
||||
.align 64
|
||||
.L_vpaes_consts:
|
||||
.long 218628480,235210255,168496130,67568393
|
||||
|
@ -477,6 +485,18 @@ vpaes_set_encrypt_key:
|
|||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call .L016pic
|
||||
.L016pic:
|
||||
popl %ebx
|
||||
leal BORINGSSL_function_hit+5-.L016pic(%ebx),%ebx
|
||||
movl $1,%edx
|
||||
movb %dl,(%ebx)
|
||||
popl %edx
|
||||
popl %ebx
|
||||
#endif
|
||||
movl 20(%esp),%esi
|
||||
leal -56(%esp),%ebx
|
||||
movl 24(%esp),%eax
|
||||
|
@ -490,9 +510,9 @@ vpaes_set_encrypt_key:
|
|||
movl %ebx,240(%edx)
|
||||
movl $48,%ecx
|
||||
movl $0,%edi
|
||||
leal .L_vpaes_consts+0x30-.L016pic_point,%ebp
|
||||
leal .L_vpaes_consts+0x30-.L017pic_point,%ebp
|
||||
call _vpaes_schedule_core
|
||||
.L016pic_point:
|
||||
.L017pic_point:
|
||||
movl 48(%esp),%esp
|
||||
xorl %eax,%eax
|
||||
popl %edi
|
||||
|
@ -529,9 +549,9 @@ vpaes_set_decrypt_key:
|
|||
shrl $1,%ecx
|
||||
andl $32,%ecx
|
||||
xorl $32,%ecx
|
||||
leal .L_vpaes_consts+0x30-.L017pic_point,%ebp
|
||||
leal .L_vpaes_consts+0x30-.L018pic_point,%ebp
|
||||
call _vpaes_schedule_core
|
||||
.L017pic_point:
|
||||
.L018pic_point:
|
||||
movl 48(%esp),%esp
|
||||
xorl %eax,%eax
|
||||
popl %edi
|
||||
|
@ -550,9 +570,21 @@ vpaes_encrypt:
|
|||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
leal .L_vpaes_consts+0x30-.L018pic_point,%ebp
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call .L019pic
|
||||
.L019pic:
|
||||
popl %ebx
|
||||
leal BORINGSSL_function_hit+4-.L019pic(%ebx),%ebx
|
||||
movl $1,%edx
|
||||
movb %dl,(%ebx)
|
||||
popl %edx
|
||||
popl %ebx
|
||||
#endif
|
||||
leal .L_vpaes_consts+0x30-.L020pic_point,%ebp
|
||||
call _vpaes_preheat
|
||||
.L018pic_point:
|
||||
.L020pic_point:
|
||||
movl 20(%esp),%esi
|
||||
leal -56(%esp),%ebx
|
||||
movl 24(%esp),%edi
|
||||
|
@ -580,9 +612,9 @@ vpaes_decrypt:
|
|||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
leal .L_vpaes_consts+0x30-.L019pic_point,%ebp
|
||||
leal .L_vpaes_consts+0x30-.L021pic_point,%ebp
|
||||
call _vpaes_preheat
|
||||
.L019pic_point:
|
||||
.L021pic_point:
|
||||
movl 20(%esp),%esi
|
||||
leal -56(%esp),%ebx
|
||||
movl 24(%esp),%edi
|
||||
|
@ -615,7 +647,7 @@ vpaes_cbc_encrypt:
|
|||
movl 28(%esp),%eax
|
||||
movl 32(%esp),%edx
|
||||
subl $16,%eax
|
||||
jc .L020cbc_abort
|
||||
jc .L022cbc_abort
|
||||
leal -56(%esp),%ebx
|
||||
movl 36(%esp),%ebp
|
||||
andl $-16,%ebx
|
||||
|
@ -628,14 +660,14 @@ vpaes_cbc_encrypt:
|
|||
movl %edx,4(%esp)
|
||||
movl %ebp,8(%esp)
|
||||
movl %eax,%edi
|
||||
leal .L_vpaes_consts+0x30-.L021pic_point,%ebp
|
||||
leal .L_vpaes_consts+0x30-.L023pic_point,%ebp
|
||||
call _vpaes_preheat
|
||||
.L021pic_point:
|
||||
.L023pic_point:
|
||||
cmpl $0,%ecx
|
||||
je .L022cbc_dec_loop
|
||||
jmp .L023cbc_enc_loop
|
||||
je .L024cbc_dec_loop
|
||||
jmp .L025cbc_enc_loop
|
||||
.align 16
|
||||
.L023cbc_enc_loop:
|
||||
.L025cbc_enc_loop:
|
||||
movdqu (%esi),%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
call _vpaes_encrypt_core
|
||||
|
@ -645,10 +677,10 @@ vpaes_cbc_encrypt:
|
|||
movdqu %xmm0,(%ebx,%esi,1)
|
||||
leal 16(%esi),%esi
|
||||
subl $16,%edi
|
||||
jnc .L023cbc_enc_loop
|
||||
jmp .L024cbc_done
|
||||
jnc .L025cbc_enc_loop
|
||||
jmp .L026cbc_done
|
||||
.align 16
|
||||
.L022cbc_dec_loop:
|
||||
.L024cbc_dec_loop:
|
||||
movdqu (%esi),%xmm0
|
||||
movdqa %xmm1,16(%esp)
|
||||
movdqa %xmm0,32(%esp)
|
||||
|
@ -660,12 +692,12 @@ vpaes_cbc_encrypt:
|
|||
movdqu %xmm0,(%ebx,%esi,1)
|
||||
leal 16(%esi),%esi
|
||||
subl $16,%edi
|
||||
jnc .L022cbc_dec_loop
|
||||
.L024cbc_done:
|
||||
jnc .L024cbc_dec_loop
|
||||
.L026cbc_done:
|
||||
movl 8(%esp),%ebx
|
||||
movl 48(%esp),%esp
|
||||
movdqu %xmm1,(%ebx)
|
||||
.L020cbc_abort:
|
||||
.L022cbc_abort:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
|
@ -673,3 +705,4 @@ vpaes_cbc_encrypt:
|
|||
ret
|
||||
.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl bn_mul_mont
|
||||
.hidden bn_mul_mont
|
||||
|
@ -446,16 +452,18 @@ bn_mul_mont:
|
|||
leal 1(%edx),%edx
|
||||
jge .L017sub
|
||||
sbbl $0,%eax
|
||||
andl %eax,%esi
|
||||
notl %eax
|
||||
movl %edi,%ebp
|
||||
andl %eax,%ebp
|
||||
orl %ebp,%esi
|
||||
movl $-1,%edx
|
||||
xorl %eax,%edx
|
||||
jmp .L018copy
|
||||
.align 16
|
||||
.L018copy:
|
||||
movl (%esi,%ebx,4),%eax
|
||||
movl %eax,(%edi,%ebx,4)
|
||||
movl 32(%esp,%ebx,4),%esi
|
||||
movl (%edi,%ebx,4),%ebp
|
||||
movl %ecx,32(%esp,%ebx,4)
|
||||
andl %eax,%esi
|
||||
andl %edx,%ebp
|
||||
orl %esi,%ebp
|
||||
movl %ebp,(%edi,%ebx,4)
|
||||
decl %ebx
|
||||
jge .L018copy
|
||||
movl 24(%esp),%esp
|
||||
|
@ -473,3 +481,4 @@ bn_mul_mont:
|
|||
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
|
||||
.byte 111,114,103,62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -0,0 +1,206 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl abi_test_trampoline
|
||||
.hidden abi_test_trampoline
|
||||
.type abi_test_trampoline,@function
|
||||
.align 16
|
||||
abi_test_trampoline:
|
||||
.L_abi_test_trampoline_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 24(%esp),%ecx
|
||||
movl (%ecx),%esi
|
||||
movl 4(%ecx),%edi
|
||||
movl 8(%ecx),%ebx
|
||||
movl 12(%ecx),%ebp
|
||||
subl $44,%esp
|
||||
movl 72(%esp),%eax
|
||||
xorl %ecx,%ecx
|
||||
.L000loop:
|
||||
cmpl 76(%esp),%ecx
|
||||
jae .L001loop_done
|
||||
movl (%eax,%ecx,4),%edx
|
||||
movl %edx,(%esp,%ecx,4)
|
||||
addl $1,%ecx
|
||||
jmp .L000loop
|
||||
.L001loop_done:
|
||||
call *64(%esp)
|
||||
addl $44,%esp
|
||||
movl 24(%esp),%ecx
|
||||
movl %esi,(%ecx)
|
||||
movl %edi,4(%ecx)
|
||||
movl %ebx,8(%ecx)
|
||||
movl %ebp,12(%ecx)
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size abi_test_trampoline,.-.L_abi_test_trampoline_begin
|
||||
.globl abi_test_get_and_clear_direction_flag
|
||||
.hidden abi_test_get_and_clear_direction_flag
|
||||
.type abi_test_get_and_clear_direction_flag,@function
|
||||
.align 16
|
||||
abi_test_get_and_clear_direction_flag:
|
||||
.L_abi_test_get_and_clear_direction_flag_begin:
|
||||
pushfl
|
||||
popl %eax
|
||||
andl $1024,%eax
|
||||
shrl $10,%eax
|
||||
cld
|
||||
ret
|
||||
.size abi_test_get_and_clear_direction_flag,.-.L_abi_test_get_and_clear_direction_flag_begin
|
||||
.globl abi_test_set_direction_flag
|
||||
.hidden abi_test_set_direction_flag
|
||||
.type abi_test_set_direction_flag,@function
|
||||
.align 16
|
||||
abi_test_set_direction_flag:
|
||||
.L_abi_test_set_direction_flag_begin:
|
||||
std
|
||||
ret
|
||||
.size abi_test_set_direction_flag,.-.L_abi_test_set_direction_flag_begin
|
||||
.globl abi_test_clobber_eax
|
||||
.hidden abi_test_clobber_eax
|
||||
.type abi_test_clobber_eax,@function
|
||||
.align 16
|
||||
abi_test_clobber_eax:
|
||||
.L_abi_test_clobber_eax_begin:
|
||||
xorl %eax,%eax
|
||||
ret
|
||||
.size abi_test_clobber_eax,.-.L_abi_test_clobber_eax_begin
|
||||
.globl abi_test_clobber_ebx
|
||||
.hidden abi_test_clobber_ebx
|
||||
.type abi_test_clobber_ebx,@function
|
||||
.align 16
|
||||
abi_test_clobber_ebx:
|
||||
.L_abi_test_clobber_ebx_begin:
|
||||
xorl %ebx,%ebx
|
||||
ret
|
||||
.size abi_test_clobber_ebx,.-.L_abi_test_clobber_ebx_begin
|
||||
.globl abi_test_clobber_ecx
|
||||
.hidden abi_test_clobber_ecx
|
||||
.type abi_test_clobber_ecx,@function
|
||||
.align 16
|
||||
abi_test_clobber_ecx:
|
||||
.L_abi_test_clobber_ecx_begin:
|
||||
xorl %ecx,%ecx
|
||||
ret
|
||||
.size abi_test_clobber_ecx,.-.L_abi_test_clobber_ecx_begin
|
||||
.globl abi_test_clobber_edx
|
||||
.hidden abi_test_clobber_edx
|
||||
.type abi_test_clobber_edx,@function
|
||||
.align 16
|
||||
abi_test_clobber_edx:
|
||||
.L_abi_test_clobber_edx_begin:
|
||||
xorl %edx,%edx
|
||||
ret
|
||||
.size abi_test_clobber_edx,.-.L_abi_test_clobber_edx_begin
|
||||
.globl abi_test_clobber_edi
|
||||
.hidden abi_test_clobber_edi
|
||||
.type abi_test_clobber_edi,@function
|
||||
.align 16
|
||||
abi_test_clobber_edi:
|
||||
.L_abi_test_clobber_edi_begin:
|
||||
xorl %edi,%edi
|
||||
ret
|
||||
.size abi_test_clobber_edi,.-.L_abi_test_clobber_edi_begin
|
||||
.globl abi_test_clobber_esi
|
||||
.hidden abi_test_clobber_esi
|
||||
.type abi_test_clobber_esi,@function
|
||||
.align 16
|
||||
abi_test_clobber_esi:
|
||||
.L_abi_test_clobber_esi_begin:
|
||||
xorl %esi,%esi
|
||||
ret
|
||||
.size abi_test_clobber_esi,.-.L_abi_test_clobber_esi_begin
|
||||
.globl abi_test_clobber_ebp
|
||||
.hidden abi_test_clobber_ebp
|
||||
.type abi_test_clobber_ebp,@function
|
||||
.align 16
|
||||
abi_test_clobber_ebp:
|
||||
.L_abi_test_clobber_ebp_begin:
|
||||
xorl %ebp,%ebp
|
||||
ret
|
||||
.size abi_test_clobber_ebp,.-.L_abi_test_clobber_ebp_begin
|
||||
.globl abi_test_clobber_xmm0
|
||||
.hidden abi_test_clobber_xmm0
|
||||
.type abi_test_clobber_xmm0,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm0:
|
||||
.L_abi_test_clobber_xmm0_begin:
|
||||
pxor %xmm0,%xmm0
|
||||
ret
|
||||
.size abi_test_clobber_xmm0,.-.L_abi_test_clobber_xmm0_begin
|
||||
.globl abi_test_clobber_xmm1
|
||||
.hidden abi_test_clobber_xmm1
|
||||
.type abi_test_clobber_xmm1,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm1:
|
||||
.L_abi_test_clobber_xmm1_begin:
|
||||
pxor %xmm1,%xmm1
|
||||
ret
|
||||
.size abi_test_clobber_xmm1,.-.L_abi_test_clobber_xmm1_begin
|
||||
.globl abi_test_clobber_xmm2
|
||||
.hidden abi_test_clobber_xmm2
|
||||
.type abi_test_clobber_xmm2,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm2:
|
||||
.L_abi_test_clobber_xmm2_begin:
|
||||
pxor %xmm2,%xmm2
|
||||
ret
|
||||
.size abi_test_clobber_xmm2,.-.L_abi_test_clobber_xmm2_begin
|
||||
.globl abi_test_clobber_xmm3
|
||||
.hidden abi_test_clobber_xmm3
|
||||
.type abi_test_clobber_xmm3,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm3:
|
||||
.L_abi_test_clobber_xmm3_begin:
|
||||
pxor %xmm3,%xmm3
|
||||
ret
|
||||
.size abi_test_clobber_xmm3,.-.L_abi_test_clobber_xmm3_begin
|
||||
.globl abi_test_clobber_xmm4
|
||||
.hidden abi_test_clobber_xmm4
|
||||
.type abi_test_clobber_xmm4,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm4:
|
||||
.L_abi_test_clobber_xmm4_begin:
|
||||
pxor %xmm4,%xmm4
|
||||
ret
|
||||
.size abi_test_clobber_xmm4,.-.L_abi_test_clobber_xmm4_begin
|
||||
.globl abi_test_clobber_xmm5
|
||||
.hidden abi_test_clobber_xmm5
|
||||
.type abi_test_clobber_xmm5,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm5:
|
||||
.L_abi_test_clobber_xmm5_begin:
|
||||
pxor %xmm5,%xmm5
|
||||
ret
|
||||
.size abi_test_clobber_xmm5,.-.L_abi_test_clobber_xmm5_begin
|
||||
.globl abi_test_clobber_xmm6
|
||||
.hidden abi_test_clobber_xmm6
|
||||
.type abi_test_clobber_xmm6,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm6:
|
||||
.L_abi_test_clobber_xmm6_begin:
|
||||
pxor %xmm6,%xmm6
|
||||
ret
|
||||
.size abi_test_clobber_xmm6,.-.L_abi_test_clobber_xmm6_begin
|
||||
.globl abi_test_clobber_xmm7
|
||||
.hidden abi_test_clobber_xmm7
|
||||
.type abi_test_clobber_xmm7,@function
|
||||
.align 16
|
||||
abi_test_clobber_xmm7:
|
||||
.L_abi_test_clobber_xmm7_begin:
|
||||
pxor %xmm7,%xmm7
|
||||
ret
|
||||
.size abi_test_clobber_xmm7,.-.L_abi_test_clobber_xmm7_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
|
@ -1,4 +1,16 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
.extern OPENSSL_ia32cap_P
|
||||
|
@ -38,6 +50,7 @@
|
|||
.type ChaCha20_ctr32,@function
|
||||
.align 64
|
||||
ChaCha20_ctr32:
|
||||
.cfi_startproc
|
||||
cmpq $0,%rdx
|
||||
je .Lno_data
|
||||
movq OPENSSL_ia32cap_P+4(%rip),%r10
|
||||
|
@ -45,12 +58,25 @@ ChaCha20_ctr32:
|
|||
jnz .LChaCha20_ssse3
|
||||
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset rbp,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset r15,-56
|
||||
subq $64+24,%rsp
|
||||
.cfi_adjust_cfa_offset 88
|
||||
.Lctr32_body:
|
||||
|
||||
|
||||
|
@ -291,20 +317,30 @@ ChaCha20_ctr32:
|
|||
.Ldone:
|
||||
leaq 64+24+48(%rsp),%rsi
|
||||
movq -48(%rsi),%r15
|
||||
.cfi_restore r15
|
||||
movq -40(%rsi),%r14
|
||||
.cfi_restore r14
|
||||
movq -32(%rsi),%r13
|
||||
.cfi_restore r13
|
||||
movq -24(%rsi),%r12
|
||||
.cfi_restore r12
|
||||
movq -16(%rsi),%rbp
|
||||
.cfi_restore rbp
|
||||
movq -8(%rsi),%rbx
|
||||
.cfi_restore rbx
|
||||
leaq (%rsi),%rsp
|
||||
.cfi_adjust_cfa_offset -136
|
||||
.Lno_data:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size ChaCha20_ctr32,.-ChaCha20_ctr32
|
||||
.type ChaCha20_ssse3,@function
|
||||
.align 32
|
||||
ChaCha20_ssse3:
|
||||
.LChaCha20_ssse3:
|
||||
.cfi_startproc
|
||||
movq %rsp,%r9
|
||||
.cfi_def_cfa_register r9
|
||||
cmpq $128,%rdx
|
||||
ja .LChaCha20_4x
|
||||
|
||||
|
@ -430,14 +466,18 @@ ChaCha20_ssse3:
|
|||
|
||||
.Ldone_ssse3:
|
||||
leaq (%r9),%rsp
|
||||
.cfi_def_cfa_register rsp
|
||||
.Lssse3_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size ChaCha20_ssse3,.-ChaCha20_ssse3
|
||||
.type ChaCha20_4x,@function
|
||||
.align 32
|
||||
ChaCha20_4x:
|
||||
.LChaCha20_4x:
|
||||
.cfi_startproc
|
||||
movq %rsp,%r9
|
||||
.cfi_def_cfa_register r9
|
||||
movq %r10,%r11
|
||||
shrq $32,%r10
|
||||
testq $32,%r10
|
||||
|
@ -978,14 +1018,18 @@ ChaCha20_4x:
|
|||
|
||||
.Ldone4x:
|
||||
leaq (%r9),%rsp
|
||||
.cfi_def_cfa_register rsp
|
||||
.L4x_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size ChaCha20_4x,.-ChaCha20_4x
|
||||
.type ChaCha20_8x,@function
|
||||
.align 32
|
||||
ChaCha20_8x:
|
||||
.LChaCha20_8x:
|
||||
.cfi_startproc
|
||||
movq %rsp,%r9
|
||||
.cfi_def_cfa_register r9
|
||||
subq $0x280+8,%rsp
|
||||
andq $-32,%rsp
|
||||
vzeroupper
|
||||
|
@ -1580,7 +1624,10 @@ ChaCha20_8x:
|
|||
.Ldone8x:
|
||||
vzeroall
|
||||
leaq (%r9),%rsp
|
||||
.cfi_def_cfa_register rsp
|
||||
.L8x_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size ChaCha20_8x,.-ChaCha20_8x
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,16 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.data
|
||||
|
||||
.align 16
|
||||
|
@ -3064,3 +3076,4 @@ aes256gcmsiv_kdf:
|
|||
.cfi_endproc
|
||||
.size aes256gcmsiv_kdf, .-aes256gcmsiv_kdf
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,16 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.extern OPENSSL_ia32cap_P
|
||||
.hidden OPENSSL_ia32cap_P
|
||||
|
@ -8972,3 +8984,4 @@ seal_avx2_short_tail:
|
|||
jmp seal_sse_tail_16
|
||||
.cfi_endproc
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,16 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.type _x86_64_AES_encrypt,@function
|
||||
.align 16
|
||||
|
@ -156,6 +168,7 @@ _x86_64_AES_encrypt:
|
|||
.type _x86_64_AES_encrypt_compact,@function
|
||||
.align 16
|
||||
_x86_64_AES_encrypt_compact:
|
||||
.cfi_startproc
|
||||
leaq 128(%r14),%r8
|
||||
movl 0-128(%r8),%edi
|
||||
movl 32-128(%r8),%ebp
|
||||
|
@ -325,20 +338,29 @@ _x86_64_AES_encrypt_compact:
|
|||
xorl 8(%r15),%ecx
|
||||
xorl 12(%r15),%edx
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
|
||||
.align 16
|
||||
.globl asm_AES_encrypt
|
||||
.hidden asm_AES_encrypt
|
||||
.type asm_AES_encrypt,@function
|
||||
.hidden asm_AES_encrypt
|
||||
asm_AES_encrypt:
|
||||
.globl aes_nohw_encrypt
|
||||
.hidden aes_nohw_encrypt
|
||||
.type aes_nohw_encrypt,@function
|
||||
.hidden aes_nohw_encrypt
|
||||
aes_nohw_encrypt:
|
||||
.cfi_startproc
|
||||
movq %rsp,%rax
|
||||
.cfi_def_cfa_register %rax
|
||||
pushq %rbx
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_offset %r15,-56
|
||||
|
||||
|
||||
leaq -63(%rdx),%rcx
|
||||
|
@ -351,6 +373,7 @@ asm_AES_encrypt:
|
|||
|
||||
movq %rsi,16(%rsp)
|
||||
movq %rax,24(%rsp)
|
||||
.cfi_escape 0x0f,0x05,0x77,0x18,0x06,0x23,0x08
|
||||
.Lenc_prologue:
|
||||
|
||||
movq %rdx,%r15
|
||||
|
@ -377,21 +400,30 @@ asm_AES_encrypt:
|
|||
|
||||
movq 16(%rsp),%r9
|
||||
movq 24(%rsp),%rsi
|
||||
.cfi_def_cfa %rsi,8
|
||||
movl %eax,0(%r9)
|
||||
movl %ebx,4(%r9)
|
||||
movl %ecx,8(%r9)
|
||||
movl %edx,12(%r9)
|
||||
|
||||
movq -48(%rsi),%r15
|
||||
.cfi_restore %r15
|
||||
movq -40(%rsi),%r14
|
||||
.cfi_restore %r14
|
||||
movq -32(%rsi),%r13
|
||||
.cfi_restore %r13
|
||||
movq -24(%rsi),%r12
|
||||
.cfi_restore %r12
|
||||
movq -16(%rsi),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq -8(%rsi),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq (%rsi),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lenc_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.size asm_AES_encrypt,.-asm_AES_encrypt
|
||||
.cfi_endproc
|
||||
.size aes_nohw_encrypt,.-aes_nohw_encrypt
|
||||
.type _x86_64_AES_decrypt,@function
|
||||
.align 16
|
||||
_x86_64_AES_decrypt:
|
||||
|
@ -550,6 +582,7 @@ _x86_64_AES_decrypt:
|
|||
.type _x86_64_AES_decrypt_compact,@function
|
||||
.align 16
|
||||
_x86_64_AES_decrypt_compact:
|
||||
.cfi_startproc
|
||||
leaq 128(%r14),%r8
|
||||
movl 0-128(%r8),%edi
|
||||
movl 32-128(%r8),%ebp
|
||||
|
@ -771,20 +804,29 @@ _x86_64_AES_decrypt_compact:
|
|||
xorl 8(%r15),%ecx
|
||||
xorl 12(%r15),%edx
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
|
||||
.align 16
|
||||
.globl asm_AES_decrypt
|
||||
.hidden asm_AES_decrypt
|
||||
.type asm_AES_decrypt,@function
|
||||
.hidden asm_AES_decrypt
|
||||
asm_AES_decrypt:
|
||||
.globl aes_nohw_decrypt
|
||||
.hidden aes_nohw_decrypt
|
||||
.type aes_nohw_decrypt,@function
|
||||
.hidden aes_nohw_decrypt
|
||||
aes_nohw_decrypt:
|
||||
.cfi_startproc
|
||||
movq %rsp,%rax
|
||||
.cfi_def_cfa_register %rax
|
||||
pushq %rbx
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_offset %r15,-56
|
||||
|
||||
|
||||
leaq -63(%rdx),%rcx
|
||||
|
@ -797,6 +839,7 @@ asm_AES_decrypt:
|
|||
|
||||
movq %rsi,16(%rsp)
|
||||
movq %rax,24(%rsp)
|
||||
.cfi_escape 0x0f,0x05,0x77,0x18,0x06,0x23,0x08
|
||||
.Ldec_prologue:
|
||||
|
||||
movq %rdx,%r15
|
||||
|
@ -825,47 +868,75 @@ asm_AES_decrypt:
|
|||
|
||||
movq 16(%rsp),%r9
|
||||
movq 24(%rsp),%rsi
|
||||
.cfi_def_cfa %rsi,8
|
||||
movl %eax,0(%r9)
|
||||
movl %ebx,4(%r9)
|
||||
movl %ecx,8(%r9)
|
||||
movl %edx,12(%r9)
|
||||
|
||||
movq -48(%rsi),%r15
|
||||
.cfi_restore %r15
|
||||
movq -40(%rsi),%r14
|
||||
.cfi_restore %r14
|
||||
movq -32(%rsi),%r13
|
||||
.cfi_restore %r13
|
||||
movq -24(%rsi),%r12
|
||||
.cfi_restore %r12
|
||||
movq -16(%rsi),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq -8(%rsi),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq (%rsi),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Ldec_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.size asm_AES_decrypt,.-asm_AES_decrypt
|
||||
.cfi_endproc
|
||||
.size aes_nohw_decrypt,.-aes_nohw_decrypt
|
||||
.align 16
|
||||
.globl asm_AES_set_encrypt_key
|
||||
.hidden asm_AES_set_encrypt_key
|
||||
.type asm_AES_set_encrypt_key,@function
|
||||
asm_AES_set_encrypt_key:
|
||||
.globl aes_nohw_set_encrypt_key
|
||||
.hidden aes_nohw_set_encrypt_key
|
||||
.type aes_nohw_set_encrypt_key,@function
|
||||
aes_nohw_set_encrypt_key:
|
||||
.cfi_startproc
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
subq $8,%rsp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.Lenc_key_prologue:
|
||||
|
||||
call _x86_64_AES_set_encrypt_key
|
||||
|
||||
movq 40(%rsp),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq 48(%rsp),%rbx
|
||||
.cfi_restore %rbx
|
||||
addq $56,%rsp
|
||||
.cfi_adjust_cfa_offset -56
|
||||
.Lenc_key_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.size asm_AES_set_encrypt_key,.-asm_AES_set_encrypt_key
|
||||
.cfi_endproc
|
||||
.size aes_nohw_set_encrypt_key,.-aes_nohw_set_encrypt_key
|
||||
|
||||
.type _x86_64_AES_set_encrypt_key,@function
|
||||
.align 16
|
||||
_x86_64_AES_set_encrypt_key:
|
||||
.cfi_startproc
|
||||
movl %esi,%ecx
|
||||
movq %rdi,%rsi
|
||||
movq %rdx,%rdi
|
||||
|
@ -1101,19 +1172,34 @@ _x86_64_AES_set_encrypt_key:
|
|||
movq $-1,%rax
|
||||
.Lexit:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
|
||||
.align 16
|
||||
.globl asm_AES_set_decrypt_key
|
||||
.hidden asm_AES_set_decrypt_key
|
||||
.type asm_AES_set_decrypt_key,@function
|
||||
asm_AES_set_decrypt_key:
|
||||
.globl aes_nohw_set_decrypt_key
|
||||
.hidden aes_nohw_set_decrypt_key
|
||||
.type aes_nohw_set_decrypt_key,@function
|
||||
aes_nohw_set_decrypt_key:
|
||||
.cfi_startproc
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
pushq %rdx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.Ldec_key_prologue:
|
||||
|
||||
call _x86_64_AES_set_encrypt_key
|
||||
|
@ -1281,32 +1367,56 @@ asm_AES_set_decrypt_key:
|
|||
xorq %rax,%rax
|
||||
.Labort:
|
||||
movq 8(%rsp),%r15
|
||||
.cfi_restore %r15
|
||||
movq 16(%rsp),%r14
|
||||
.cfi_restore %r14
|
||||
movq 24(%rsp),%r13
|
||||
.cfi_restore %r13
|
||||
movq 32(%rsp),%r12
|
||||
.cfi_restore %r12
|
||||
movq 40(%rsp),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq 48(%rsp),%rbx
|
||||
.cfi_restore %rbx
|
||||
addq $56,%rsp
|
||||
.cfi_adjust_cfa_offset -56
|
||||
.Ldec_key_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.size asm_AES_set_decrypt_key,.-asm_AES_set_decrypt_key
|
||||
.cfi_endproc
|
||||
.size aes_nohw_set_decrypt_key,.-aes_nohw_set_decrypt_key
|
||||
.align 16
|
||||
.globl asm_AES_cbc_encrypt
|
||||
.hidden asm_AES_cbc_encrypt
|
||||
.type asm_AES_cbc_encrypt,@function
|
||||
.globl aes_nohw_cbc_encrypt
|
||||
.hidden aes_nohw_cbc_encrypt
|
||||
.type aes_nohw_cbc_encrypt,@function
|
||||
.extern OPENSSL_ia32cap_P
|
||||
.hidden OPENSSL_ia32cap_P
|
||||
.hidden asm_AES_cbc_encrypt
|
||||
asm_AES_cbc_encrypt:
|
||||
.hidden aes_nohw_cbc_encrypt
|
||||
aes_nohw_cbc_encrypt:
|
||||
.cfi_startproc
|
||||
cmpq $0,%rdx
|
||||
je .Lcbc_epilogue
|
||||
pushfq
|
||||
|
||||
|
||||
.cfi_adjust_cfa_offset 8
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-24
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-32
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-40
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-48
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-56
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-64
|
||||
.Lcbc_prologue:
|
||||
|
||||
cld
|
||||
|
@ -1317,6 +1427,7 @@ asm_AES_cbc_encrypt:
|
|||
cmpq $0,%r9
|
||||
cmoveq %r10,%r14
|
||||
|
||||
.cfi_remember_state
|
||||
leaq OPENSSL_ia32cap_P(%rip),%r10
|
||||
movl (%r10),%r10d
|
||||
cmpq $512,%rdx
|
||||
|
@ -1352,8 +1463,10 @@ asm_AES_cbc_encrypt:
|
|||
.Lcbc_te_ok:
|
||||
|
||||
xchgq %rsp,%r15
|
||||
.cfi_def_cfa_register %r15
|
||||
|
||||
movq %r15,16(%rsp)
|
||||
.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x40
|
||||
.Lcbc_fast_body:
|
||||
movq %rdi,24(%rsp)
|
||||
movq %rsi,32(%rsp)
|
||||
|
@ -1551,6 +1664,7 @@ asm_AES_cbc_encrypt:
|
|||
|
||||
.align 16
|
||||
.Lcbc_slow_prologue:
|
||||
.cfi_restore_state
|
||||
|
||||
leaq -88(%rsp),%rbp
|
||||
andq $-64,%rbp
|
||||
|
@ -1562,8 +1676,10 @@ asm_AES_cbc_encrypt:
|
|||
subq %r10,%rbp
|
||||
|
||||
xchgq %rsp,%rbp
|
||||
.cfi_def_cfa_register %rbp
|
||||
|
||||
movq %rbp,16(%rsp)
|
||||
.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x40
|
||||
.Lcbc_slow_body:
|
||||
|
||||
|
||||
|
@ -1735,18 +1851,30 @@ asm_AES_cbc_encrypt:
|
|||
.align 16
|
||||
.Lcbc_exit:
|
||||
movq 16(%rsp),%rsi
|
||||
.cfi_def_cfa %rsi,64
|
||||
movq (%rsi),%r15
|
||||
.cfi_restore %r15
|
||||
movq 8(%rsi),%r14
|
||||
.cfi_restore %r14
|
||||
movq 16(%rsi),%r13
|
||||
.cfi_restore %r13
|
||||
movq 24(%rsi),%r12
|
||||
.cfi_restore %r12
|
||||
movq 32(%rsi),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq 40(%rsi),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq 48(%rsi),%rsp
|
||||
.cfi_def_cfa %rsp,16
|
||||
.Lcbc_popfq:
|
||||
popfq
|
||||
|
||||
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.Lcbc_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.size asm_AES_cbc_encrypt,.-asm_AES_cbc_encrypt
|
||||
.cfi_endproc
|
||||
.size aes_nohw_cbc_encrypt,.-aes_nohw_cbc_encrypt
|
||||
.align 64
|
||||
.LAES_Te:
|
||||
.long 0xa56363c6,0xa56363c6
|
||||
|
@ -2534,3 +2662,4 @@ asm_AES_cbc_encrypt:
|
|||
.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,16 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
.type _aesni_ctr32_ghash_6x,@function
|
||||
|
@ -544,6 +556,11 @@ _aesni_ctr32_6x:
|
|||
.align 32
|
||||
aesni_gcm_encrypt:
|
||||
.cfi_startproc
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
.extern BORINGSSL_function_hit
|
||||
.hidden BORINGSSL_function_hit
|
||||
movb $1,BORINGSSL_function_hit+2(%rip)
|
||||
#endif
|
||||
xorq %r10,%r10
|
||||
|
||||
|
||||
|
@ -832,3 +849,4 @@ aesni_gcm_encrypt:
|
|||
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
427
packager/third_party/boringssl/linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
vendored
Normal file
427
packager/third_party/boringssl/linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
vendored
Normal file
|
@ -0,0 +1,427 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.type gcm_gmult_ssse3, @function
|
||||
.globl gcm_gmult_ssse3
|
||||
.hidden gcm_gmult_ssse3
|
||||
.align 16
|
||||
gcm_gmult_ssse3:
|
||||
.cfi_startproc
|
||||
.Lgmult_seh_begin:
|
||||
movdqu (%rdi),%xmm0
|
||||
movdqa .Lreverse_bytes(%rip),%xmm10
|
||||
movdqa .Llow4_mask(%rip),%xmm2
|
||||
|
||||
|
||||
.byte 102,65,15,56,0,194
|
||||
|
||||
|
||||
movdqa %xmm2,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm2,%xmm0
|
||||
|
||||
|
||||
|
||||
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movq $5,%rax
|
||||
.Loop_row_1:
|
||||
movdqa (%rsi),%xmm4
|
||||
leaq 16(%rsi),%rsi
|
||||
|
||||
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
|
||||
|
||||
|
||||
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
|
||||
|
||||
pxor %xmm5,%xmm2
|
||||
|
||||
|
||||
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
|
||||
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
|
||||
subq $1,%rax
|
||||
jnz .Loop_row_1
|
||||
|
||||
|
||||
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movq $5,%rax
|
||||
.Loop_row_2:
|
||||
movdqa (%rsi),%xmm4
|
||||
leaq 16(%rsi),%rsi
|
||||
|
||||
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
|
||||
|
||||
|
||||
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
|
||||
|
||||
pxor %xmm5,%xmm2
|
||||
|
||||
|
||||
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
|
||||
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
|
||||
subq $1,%rax
|
||||
jnz .Loop_row_2
|
||||
|
||||
|
||||
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movq $6,%rax
|
||||
.Loop_row_3:
|
||||
movdqa (%rsi),%xmm4
|
||||
leaq 16(%rsi),%rsi
|
||||
|
||||
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
|
||||
|
||||
|
||||
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
|
||||
|
||||
pxor %xmm5,%xmm2
|
||||
|
||||
|
||||
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
|
||||
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
|
||||
subq $1,%rax
|
||||
jnz .Loop_row_3
|
||||
|
||||
|
||||
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
|
||||
.byte 102,65,15,56,0,210
|
||||
movdqu %xmm2,(%rdi)
|
||||
|
||||
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
.byte 0xf3,0xc3
|
||||
.Lgmult_seh_end:
|
||||
.cfi_endproc
|
||||
.size gcm_gmult_ssse3,.-gcm_gmult_ssse3
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.type gcm_ghash_ssse3, @function
|
||||
.globl gcm_ghash_ssse3
|
||||
.hidden gcm_ghash_ssse3
|
||||
.align 16
|
||||
gcm_ghash_ssse3:
|
||||
.Lghash_seh_begin:
|
||||
.cfi_startproc
|
||||
movdqu (%rdi),%xmm0
|
||||
movdqa .Lreverse_bytes(%rip),%xmm10
|
||||
movdqa .Llow4_mask(%rip),%xmm11
|
||||
|
||||
|
||||
andq $-16,%rcx
|
||||
|
||||
|
||||
|
||||
.byte 102,65,15,56,0,194
|
||||
|
||||
|
||||
pxor %xmm3,%xmm3
|
||||
.Loop_ghash:
|
||||
|
||||
movdqu (%rdx),%xmm1
|
||||
.byte 102,65,15,56,0,202
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
|
||||
movdqa %xmm11,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm11,%xmm0
|
||||
|
||||
|
||||
|
||||
|
||||
pxor %xmm2,%xmm2
|
||||
|
||||
movq $5,%rax
|
||||
.Loop_row_4:
|
||||
movdqa (%rsi),%xmm4
|
||||
leaq 16(%rsi),%rsi
|
||||
|
||||
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
|
||||
|
||||
|
||||
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
|
||||
|
||||
pxor %xmm5,%xmm2
|
||||
|
||||
|
||||
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
|
||||
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
|
||||
subq $1,%rax
|
||||
jnz .Loop_row_4
|
||||
|
||||
|
||||
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movq $5,%rax
|
||||
.Loop_row_5:
|
||||
movdqa (%rsi),%xmm4
|
||||
leaq 16(%rsi),%rsi
|
||||
|
||||
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
|
||||
|
||||
|
||||
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
|
||||
|
||||
pxor %xmm5,%xmm2
|
||||
|
||||
|
||||
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
|
||||
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
|
||||
subq $1,%rax
|
||||
jnz .Loop_row_5
|
||||
|
||||
|
||||
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movq $6,%rax
|
||||
.Loop_row_6:
|
||||
movdqa (%rsi),%xmm4
|
||||
leaq 16(%rsi),%rsi
|
||||
|
||||
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
|
||||
|
||||
|
||||
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
|
||||
|
||||
pxor %xmm5,%xmm2
|
||||
|
||||
|
||||
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
|
||||
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
|
||||
subq $1,%rax
|
||||
jnz .Loop_row_6
|
||||
|
||||
|
||||
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movdqa %xmm2,%xmm0
|
||||
|
||||
|
||||
leaq -256(%rsi),%rsi
|
||||
|
||||
|
||||
leaq 16(%rdx),%rdx
|
||||
subq $16,%rcx
|
||||
jnz .Loop_ghash
|
||||
|
||||
|
||||
.byte 102,65,15,56,0,194
|
||||
movdqu %xmm0,(%rdi)
|
||||
|
||||
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
.byte 0xf3,0xc3
|
||||
.Lghash_seh_end:
|
||||
.cfi_endproc
|
||||
.size gcm_ghash_ssse3,.-gcm_ghash_ssse3
|
||||
|
||||
.align 16
|
||||
|
||||
|
||||
.Lreverse_bytes:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
.Llow4_mask:
|
||||
.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
|
@ -1,4 +1,16 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.extern OPENSSL_ia32cap_P
|
||||
.hidden OPENSSL_ia32cap_P
|
||||
|
@ -8,13 +20,27 @@
|
|||
.type gcm_gmult_4bit,@function
|
||||
.align 16
|
||||
gcm_gmult_4bit:
|
||||
.cfi_startproc
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
subq $280,%rsp
|
||||
.cfi_adjust_cfa_offset 280
|
||||
.Lgmult_prologue:
|
||||
|
||||
movzbq 15(%rdi),%r8
|
||||
|
@ -92,23 +118,41 @@ gcm_gmult_4bit:
|
|||
movq %r9,(%rdi)
|
||||
|
||||
leaq 280+48(%rsp),%rsi
|
||||
.cfi_def_cfa %rsi,8
|
||||
movq -8(%rsi),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq (%rsi),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lgmult_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size gcm_gmult_4bit,.-gcm_gmult_4bit
|
||||
.globl gcm_ghash_4bit
|
||||
.hidden gcm_ghash_4bit
|
||||
.type gcm_ghash_4bit,@function
|
||||
.align 16
|
||||
gcm_ghash_4bit:
|
||||
.cfi_startproc
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
subq $280,%rsp
|
||||
.cfi_adjust_cfa_offset 280
|
||||
.Lghash_prologue:
|
||||
movq %rdx,%r14
|
||||
movq %rcx,%r15
|
||||
|
@ -654,21 +698,31 @@ gcm_ghash_4bit:
|
|||
movq %r9,(%rdi)
|
||||
|
||||
leaq 280+48(%rsp),%rsi
|
||||
.cfi_def_cfa %rsi,8
|
||||
movq -48(%rsi),%r15
|
||||
.cfi_restore %r15
|
||||
movq -40(%rsi),%r14
|
||||
.cfi_restore %r14
|
||||
movq -32(%rsi),%r13
|
||||
.cfi_restore %r13
|
||||
movq -24(%rsi),%r12
|
||||
.cfi_restore %r12
|
||||
movq -16(%rsi),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq -8(%rsi),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq 0(%rsi),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lghash_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size gcm_ghash_4bit,.-gcm_ghash_4bit
|
||||
.globl gcm_init_clmul
|
||||
.hidden gcm_init_clmul
|
||||
.type gcm_init_clmul,@function
|
||||
.align 16
|
||||
gcm_init_clmul:
|
||||
.cfi_startproc
|
||||
.L_init_clmul:
|
||||
movdqu (%rsi),%xmm2
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
|
@ -820,12 +874,14 @@ gcm_init_clmul:
|
|||
.byte 102,15,58,15,227,8
|
||||
movdqu %xmm4,80(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size gcm_init_clmul,.-gcm_init_clmul
|
||||
.globl gcm_gmult_clmul
|
||||
.hidden gcm_gmult_clmul
|
||||
.type gcm_gmult_clmul,@function
|
||||
.align 16
|
||||
gcm_gmult_clmul:
|
||||
.cfi_startproc
|
||||
.L_gmult_clmul:
|
||||
movdqu (%rdi),%xmm0
|
||||
movdqa .Lbswap_mask(%rip),%xmm5
|
||||
|
@ -872,12 +928,14 @@ gcm_gmult_clmul:
|
|||
.byte 102,15,56,0,197
|
||||
movdqu %xmm0,(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size gcm_gmult_clmul,.-gcm_gmult_clmul
|
||||
.globl gcm_ghash_clmul
|
||||
.hidden gcm_ghash_clmul
|
||||
.type gcm_ghash_clmul,@function
|
||||
.align 32
|
||||
gcm_ghash_clmul:
|
||||
.cfi_startproc
|
||||
.L_ghash_clmul:
|
||||
movdqa .Lbswap_mask(%rip),%xmm10
|
||||
|
||||
|
@ -1257,12 +1315,14 @@ gcm_ghash_clmul:
|
|||
.byte 102,65,15,56,0,194
|
||||
movdqu %xmm0,(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size gcm_ghash_clmul,.-gcm_ghash_clmul
|
||||
.globl gcm_init_avx
|
||||
.hidden gcm_init_avx
|
||||
.type gcm_init_avx,@function
|
||||
.align 32
|
||||
gcm_init_avx:
|
||||
.cfi_startproc
|
||||
vzeroupper
|
||||
|
||||
vmovdqu (%rsi),%xmm2
|
||||
|
@ -1365,19 +1425,23 @@ gcm_init_avx:
|
|||
|
||||
vzeroupper
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size gcm_init_avx,.-gcm_init_avx
|
||||
.globl gcm_gmult_avx
|
||||
.hidden gcm_gmult_avx
|
||||
.type gcm_gmult_avx,@function
|
||||
.align 32
|
||||
gcm_gmult_avx:
|
||||
.cfi_startproc
|
||||
jmp .L_gmult_clmul
|
||||
.cfi_endproc
|
||||
.size gcm_gmult_avx,.-gcm_gmult_avx
|
||||
.globl gcm_ghash_avx
|
||||
.hidden gcm_ghash_avx
|
||||
.type gcm_ghash_avx,@function
|
||||
.align 32
|
||||
gcm_ghash_avx:
|
||||
.cfi_startproc
|
||||
vzeroupper
|
||||
|
||||
vmovdqu (%rdi),%xmm10
|
||||
|
@ -1749,6 +1813,7 @@ gcm_ghash_avx:
|
|||
vmovdqu %xmm10,(%rdi)
|
||||
vzeroupper
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size gcm_ghash_avx,.-gcm_ghash_avx
|
||||
.align 64
|
||||
.Lbswap_mask:
|
||||
|
@ -1804,3 +1869,4 @@ gcm_ghash_avx:
|
|||
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,16 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.align 16
|
||||
|
||||
|
@ -6,11 +18,22 @@
|
|||
.hidden md5_block_asm_data_order
|
||||
.type md5_block_asm_data_order,@function
|
||||
md5_block_asm_data_order:
|
||||
.cfi_startproc
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset rbp,-16
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset rbx,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset r12,-32
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset r14,-40
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset r15,-48
|
||||
.Lprologue:
|
||||
|
||||
|
||||
|
@ -660,12 +683,20 @@ md5_block_asm_data_order:
|
|||
movl %edx,12(%rbp)
|
||||
|
||||
movq (%rsp),%r15
|
||||
.cfi_restore r15
|
||||
movq 8(%rsp),%r14
|
||||
.cfi_restore r14
|
||||
movq 16(%rsp),%r12
|
||||
.cfi_restore r12
|
||||
movq 24(%rsp),%rbx
|
||||
.cfi_restore rbx
|
||||
movq 32(%rsp),%rbp
|
||||
.cfi_restore rbp
|
||||
addq $40,%rsp
|
||||
.cfi_adjust_cfa_offset -40
|
||||
.Lepilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size md5_block_asm_data_order,.-md5_block_asm_data_order
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
File diff suppressed because it is too large
Load Diff
343
packager/third_party/boringssl/linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
vendored
Normal file
343
packager/third_party/boringssl/linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
vendored
Normal file
|
@ -0,0 +1,343 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
.type beeu_mod_inverse_vartime,@function
|
||||
.hidden beeu_mod_inverse_vartime
|
||||
.globl beeu_mod_inverse_vartime
|
||||
.hidden beeu_mod_inverse_vartime
|
||||
.align 32
|
||||
beeu_mod_inverse_vartime:
|
||||
.cfi_startproc
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset rbp,-16
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset r12,-24
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset r13,-32
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset r14,-40
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset r15,-48
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset rbx,-56
|
||||
pushq %rsi
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset rsi,-64
|
||||
|
||||
subq $80,%rsp
|
||||
.cfi_adjust_cfa_offset 80
|
||||
movq %rdi,0(%rsp)
|
||||
|
||||
|
||||
movq $1,%r8
|
||||
xorq %r9,%r9
|
||||
xorq %r10,%r10
|
||||
xorq %r11,%r11
|
||||
xorq %rdi,%rdi
|
||||
|
||||
xorq %r12,%r12
|
||||
xorq %r13,%r13
|
||||
xorq %r14,%r14
|
||||
xorq %r15,%r15
|
||||
xorq %rbp,%rbp
|
||||
|
||||
|
||||
vmovdqu 0(%rsi),%xmm0
|
||||
vmovdqu 16(%rsi),%xmm1
|
||||
vmovdqu %xmm0,48(%rsp)
|
||||
vmovdqu %xmm1,64(%rsp)
|
||||
|
||||
vmovdqu 0(%rdx),%xmm0
|
||||
vmovdqu 16(%rdx),%xmm1
|
||||
vmovdqu %xmm0,16(%rsp)
|
||||
vmovdqu %xmm1,32(%rsp)
|
||||
|
||||
.Lbeeu_loop:
|
||||
xorq %rbx,%rbx
|
||||
orq 48(%rsp),%rbx
|
||||
orq 56(%rsp),%rbx
|
||||
orq 64(%rsp),%rbx
|
||||
orq 72(%rsp),%rbx
|
||||
jz .Lbeeu_loop_end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
movq $1,%rcx
|
||||
|
||||
|
||||
.Lbeeu_shift_loop_XB:
|
||||
movq %rcx,%rbx
|
||||
andq 48(%rsp),%rbx
|
||||
jnz .Lbeeu_shift_loop_end_XB
|
||||
|
||||
|
||||
movq $1,%rbx
|
||||
andq %r8,%rbx
|
||||
jz .Lshift1_0
|
||||
addq 0(%rdx),%r8
|
||||
adcq 8(%rdx),%r9
|
||||
adcq 16(%rdx),%r10
|
||||
adcq 24(%rdx),%r11
|
||||
adcq $0,%rdi
|
||||
|
||||
.Lshift1_0:
|
||||
shrdq $1,%r9,%r8
|
||||
shrdq $1,%r10,%r9
|
||||
shrdq $1,%r11,%r10
|
||||
shrdq $1,%rdi,%r11
|
||||
shrq $1,%rdi
|
||||
|
||||
shlq $1,%rcx
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
cmpq $0x8000000,%rcx
|
||||
jne .Lbeeu_shift_loop_XB
|
||||
|
||||
.Lbeeu_shift_loop_end_XB:
|
||||
bsfq %rcx,%rcx
|
||||
testq %rcx,%rcx
|
||||
jz .Lbeeu_no_shift_XB
|
||||
|
||||
|
||||
|
||||
movq 8+48(%rsp),%rax
|
||||
movq 16+48(%rsp),%rbx
|
||||
movq 24+48(%rsp),%rsi
|
||||
|
||||
shrdq %cl,%rax,0+48(%rsp)
|
||||
shrdq %cl,%rbx,8+48(%rsp)
|
||||
shrdq %cl,%rsi,16+48(%rsp)
|
||||
|
||||
shrq %cl,%rsi
|
||||
movq %rsi,24+48(%rsp)
|
||||
|
||||
|
||||
.Lbeeu_no_shift_XB:
|
||||
|
||||
movq $1,%rcx
|
||||
|
||||
|
||||
.Lbeeu_shift_loop_YA:
|
||||
movq %rcx,%rbx
|
||||
andq 16(%rsp),%rbx
|
||||
jnz .Lbeeu_shift_loop_end_YA
|
||||
|
||||
|
||||
movq $1,%rbx
|
||||
andq %r12,%rbx
|
||||
jz .Lshift1_1
|
||||
addq 0(%rdx),%r12
|
||||
adcq 8(%rdx),%r13
|
||||
adcq 16(%rdx),%r14
|
||||
adcq 24(%rdx),%r15
|
||||
adcq $0,%rbp
|
||||
|
||||
.Lshift1_1:
|
||||
shrdq $1,%r13,%r12
|
||||
shrdq $1,%r14,%r13
|
||||
shrdq $1,%r15,%r14
|
||||
shrdq $1,%rbp,%r15
|
||||
shrq $1,%rbp
|
||||
|
||||
shlq $1,%rcx
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
cmpq $0x8000000,%rcx
|
||||
jne .Lbeeu_shift_loop_YA
|
||||
|
||||
.Lbeeu_shift_loop_end_YA:
|
||||
bsfq %rcx,%rcx
|
||||
testq %rcx,%rcx
|
||||
jz .Lbeeu_no_shift_YA
|
||||
|
||||
|
||||
|
||||
movq 8+16(%rsp),%rax
|
||||
movq 16+16(%rsp),%rbx
|
||||
movq 24+16(%rsp),%rsi
|
||||
|
||||
shrdq %cl,%rax,0+16(%rsp)
|
||||
shrdq %cl,%rbx,8+16(%rsp)
|
||||
shrdq %cl,%rsi,16+16(%rsp)
|
||||
|
||||
shrq %cl,%rsi
|
||||
movq %rsi,24+16(%rsp)
|
||||
|
||||
|
||||
.Lbeeu_no_shift_YA:
|
||||
|
||||
movq 48(%rsp),%rax
|
||||
movq 56(%rsp),%rbx
|
||||
movq 64(%rsp),%rsi
|
||||
movq 72(%rsp),%rcx
|
||||
subq 16(%rsp),%rax
|
||||
sbbq 24(%rsp),%rbx
|
||||
sbbq 32(%rsp),%rsi
|
||||
sbbq 40(%rsp),%rcx
|
||||
jnc .Lbeeu_B_bigger_than_A
|
||||
|
||||
|
||||
movq 16(%rsp),%rax
|
||||
movq 24(%rsp),%rbx
|
||||
movq 32(%rsp),%rsi
|
||||
movq 40(%rsp),%rcx
|
||||
subq 48(%rsp),%rax
|
||||
sbbq 56(%rsp),%rbx
|
||||
sbbq 64(%rsp),%rsi
|
||||
sbbq 72(%rsp),%rcx
|
||||
movq %rax,16(%rsp)
|
||||
movq %rbx,24(%rsp)
|
||||
movq %rsi,32(%rsp)
|
||||
movq %rcx,40(%rsp)
|
||||
|
||||
|
||||
addq %r8,%r12
|
||||
adcq %r9,%r13
|
||||
adcq %r10,%r14
|
||||
adcq %r11,%r15
|
||||
adcq %rdi,%rbp
|
||||
jmp .Lbeeu_loop
|
||||
|
||||
.Lbeeu_B_bigger_than_A:
|
||||
|
||||
movq %rax,48(%rsp)
|
||||
movq %rbx,56(%rsp)
|
||||
movq %rsi,64(%rsp)
|
||||
movq %rcx,72(%rsp)
|
||||
|
||||
|
||||
addq %r12,%r8
|
||||
adcq %r13,%r9
|
||||
adcq %r14,%r10
|
||||
adcq %r15,%r11
|
||||
adcq %rbp,%rdi
|
||||
|
||||
jmp .Lbeeu_loop
|
||||
|
||||
.Lbeeu_loop_end:
|
||||
|
||||
|
||||
|
||||
|
||||
movq 16(%rsp),%rbx
|
||||
subq $1,%rbx
|
||||
orq 24(%rsp),%rbx
|
||||
orq 32(%rsp),%rbx
|
||||
orq 40(%rsp),%rbx
|
||||
|
||||
jnz .Lbeeu_err
|
||||
|
||||
|
||||
|
||||
|
||||
movq 0(%rdx),%r8
|
||||
movq 8(%rdx),%r9
|
||||
movq 16(%rdx),%r10
|
||||
movq 24(%rdx),%r11
|
||||
xorq %rdi,%rdi
|
||||
|
||||
.Lbeeu_reduction_loop:
|
||||
movq %r12,16(%rsp)
|
||||
movq %r13,24(%rsp)
|
||||
movq %r14,32(%rsp)
|
||||
movq %r15,40(%rsp)
|
||||
movq %rbp,48(%rsp)
|
||||
|
||||
|
||||
subq %r8,%r12
|
||||
sbbq %r9,%r13
|
||||
sbbq %r10,%r14
|
||||
sbbq %r11,%r15
|
||||
sbbq $0,%rbp
|
||||
|
||||
|
||||
cmovcq 16(%rsp),%r12
|
||||
cmovcq 24(%rsp),%r13
|
||||
cmovcq 32(%rsp),%r14
|
||||
cmovcq 40(%rsp),%r15
|
||||
jnc .Lbeeu_reduction_loop
|
||||
|
||||
|
||||
subq %r12,%r8
|
||||
sbbq %r13,%r9
|
||||
sbbq %r14,%r10
|
||||
sbbq %r15,%r11
|
||||
|
||||
.Lbeeu_save:
|
||||
|
||||
movq 0(%rsp),%rdi
|
||||
|
||||
movq %r8,0(%rdi)
|
||||
movq %r9,8(%rdi)
|
||||
movq %r10,16(%rdi)
|
||||
movq %r11,24(%rdi)
|
||||
|
||||
|
||||
movq $1,%rax
|
||||
jmp .Lbeeu_finish
|
||||
|
||||
.Lbeeu_err:
|
||||
|
||||
xorq %rax,%rax
|
||||
|
||||
.Lbeeu_finish:
|
||||
addq $80,%rsp
|
||||
.cfi_adjust_cfa_offset -80
|
||||
popq %rsi
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore rsi
|
||||
popq %rbx
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore rbx
|
||||
popq %r15
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore r15
|
||||
popq %r14
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore r14
|
||||
popq %r13
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore r13
|
||||
popq %r12
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore r12
|
||||
popq %rbp
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore rbp
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
|
||||
.size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
|
@ -1,4 +1,16 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
|
||||
|
@ -9,14 +21,15 @@
|
|||
.type CRYPTO_rdrand,@function
|
||||
.align 16
|
||||
CRYPTO_rdrand:
|
||||
.cfi_startproc
|
||||
xorq %rax,%rax
|
||||
|
||||
|
||||
.byte 0x48, 0x0f, 0xc7, 0xf1
|
||||
.byte 72,15,199,242
|
||||
|
||||
adcq %rax,%rax
|
||||
movq %rcx,0(%rdi)
|
||||
movq %rdx,0(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size CRYPTO_rdrand,.-CRYPTO_rdrand
|
||||
|
||||
|
||||
|
||||
|
@ -27,13 +40,12 @@ CRYPTO_rdrand:
|
|||
.type CRYPTO_rdrand_multiple8_buf,@function
|
||||
.align 16
|
||||
CRYPTO_rdrand_multiple8_buf:
|
||||
.cfi_startproc
|
||||
testq %rsi,%rsi
|
||||
jz .Lout
|
||||
movq $8,%rdx
|
||||
.Lloop:
|
||||
|
||||
|
||||
.byte 0x48, 0x0f, 0xc7, 0xf1
|
||||
.byte 72,15,199,241
|
||||
jnc .Lerr
|
||||
movq %rcx,0(%rdi)
|
||||
addq %rdx,%rdi
|
||||
|
@ -45,4 +57,7 @@ CRYPTO_rdrand_multiple8_buf:
|
|||
.Lerr:
|
||||
xorq %rax,%rax
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size CRYPTO_rdrand_multiple8_buf,.-CRYPTO_rdrand_multiple8_buf
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,16 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
.globl rsaz_1024_sqr_avx2
|
||||
|
@ -77,7 +89,7 @@ rsaz_1024_sqr_avx2:
|
|||
vmovdqu 256-128(%rsi),%ymm8
|
||||
|
||||
leaq 192(%rsp),%rbx
|
||||
vpbroadcastq .Land_mask(%rip),%ymm15
|
||||
vmovdqu .Land_mask(%rip),%ymm15
|
||||
jmp .LOOP_GRANDE_SQR_1024
|
||||
|
||||
.align 32
|
||||
|
@ -829,10 +841,10 @@ rsaz_1024_mul_avx2:
|
|||
vpmuludq 192-128(%rcx),%ymm11,%ymm12
|
||||
vpaddq %ymm12,%ymm6,%ymm6
|
||||
vpmuludq 224-128(%rcx),%ymm11,%ymm13
|
||||
vpblendd $3,%ymm14,%ymm9,%ymm9
|
||||
vpblendd $3,%ymm14,%ymm9,%ymm12
|
||||
vpaddq %ymm13,%ymm7,%ymm7
|
||||
vpmuludq 256-128(%rcx),%ymm11,%ymm0
|
||||
vpaddq %ymm9,%ymm3,%ymm3
|
||||
vpaddq %ymm12,%ymm3,%ymm3
|
||||
vpaddq %ymm0,%ymm8,%ymm8
|
||||
|
||||
movq %rbx,%rax
|
||||
|
@ -845,7 +857,9 @@ rsaz_1024_mul_avx2:
|
|||
vmovdqu -8+64-128(%rsi),%ymm13
|
||||
|
||||
movq %r10,%rax
|
||||
vpblendd $0xfc,%ymm14,%ymm9,%ymm9
|
||||
imull %r8d,%eax
|
||||
vpaddq %ymm9,%ymm4,%ymm4
|
||||
andl $0x1fffffff,%eax
|
||||
|
||||
imulq 16-128(%rsi),%rbx
|
||||
|
@ -1074,7 +1088,6 @@ rsaz_1024_mul_avx2:
|
|||
|
||||
decl %r14d
|
||||
jnz .Loop_mul_1024
|
||||
vpermq $0,%ymm15,%ymm15
|
||||
vpaddq (%rsp),%ymm12,%ymm0
|
||||
|
||||
vpsrlq $29,%ymm0,%ymm12
|
||||
|
@ -1215,6 +1228,7 @@ rsaz_1024_mul_avx2:
|
|||
.type rsaz_1024_red2norm_avx2,@function
|
||||
.align 32
|
||||
rsaz_1024_red2norm_avx2:
|
||||
.cfi_startproc
|
||||
subq $-128,%rsi
|
||||
xorq %rax,%rax
|
||||
movq -128(%rsi),%r8
|
||||
|
@ -1406,6 +1420,7 @@ rsaz_1024_red2norm_avx2:
|
|||
movq %rax,120(%rdi)
|
||||
movq %r11,%rax
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2
|
||||
|
||||
.globl rsaz_1024_norm2red_avx2
|
||||
|
@ -1413,6 +1428,7 @@ rsaz_1024_red2norm_avx2:
|
|||
.type rsaz_1024_norm2red_avx2,@function
|
||||
.align 32
|
||||
rsaz_1024_norm2red_avx2:
|
||||
.cfi_startproc
|
||||
subq $-128,%rdi
|
||||
movq (%rsi),%r8
|
||||
movl $0x1fffffff,%eax
|
||||
|
@ -1565,12 +1581,14 @@ rsaz_1024_norm2red_avx2:
|
|||
movq %r8,176(%rdi)
|
||||
movq %r8,184(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2
|
||||
.globl rsaz_1024_scatter5_avx2
|
||||
.hidden rsaz_1024_scatter5_avx2
|
||||
.type rsaz_1024_scatter5_avx2,@function
|
||||
.align 32
|
||||
rsaz_1024_scatter5_avx2:
|
||||
.cfi_startproc
|
||||
vzeroupper
|
||||
vmovdqu .Lscatter_permd(%rip),%ymm5
|
||||
shll $4,%edx
|
||||
|
@ -1590,6 +1608,7 @@ rsaz_1024_scatter5_avx2:
|
|||
|
||||
vzeroupper
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2
|
||||
|
||||
.globl rsaz_1024_gather5_avx2
|
||||
|
@ -1714,23 +1733,9 @@ rsaz_1024_gather5_avx2:
|
|||
.cfi_endproc
|
||||
.LSEH_end_rsaz_1024_gather5:
|
||||
.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
|
||||
.extern OPENSSL_ia32cap_P
|
||||
.hidden OPENSSL_ia32cap_P
|
||||
.globl rsaz_avx2_eligible
|
||||
.hidden rsaz_avx2_eligible
|
||||
.type rsaz_avx2_eligible,@function
|
||||
.align 32
|
||||
rsaz_avx2_eligible:
|
||||
leaq OPENSSL_ia32cap_P(%rip),%rax
|
||||
movl 8(%rax),%eax
|
||||
andl $32,%eax
|
||||
shrl $5,%eax
|
||||
.byte 0xf3,0xc3
|
||||
.size rsaz_avx2_eligible,.-rsaz_avx2_eligible
|
||||
|
||||
.align 64
|
||||
.Land_mask:
|
||||
.quad 0x1fffffff,0x1fffffff,0x1fffffff,-1
|
||||
.quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
|
||||
.Lscatter_permd:
|
||||
.long 0,2,4,6,7,7,7,7
|
||||
.Lgather_permd:
|
||||
|
@ -1741,3 +1746,4 @@ rsaz_avx2_eligible:
|
|||
.long 4,4,4,4, 4,4,4,4
|
||||
.align 64
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,16 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.extern OPENSSL_ia32cap_P
|
||||
.hidden OPENSSL_ia32cap_P
|
||||
|
@ -8,6 +20,7 @@
|
|||
.type sha1_block_data_order,@function
|
||||
.align 16
|
||||
sha1_block_data_order:
|
||||
.cfi_startproc
|
||||
leaq OPENSSL_ia32cap_P(%rip),%r10
|
||||
movl 0(%r10),%r9d
|
||||
movl 4(%r10),%r8d
|
||||
|
@ -24,17 +37,24 @@ sha1_block_data_order:
|
|||
.align 16
|
||||
.Lialu:
|
||||
movq %rsp,%rax
|
||||
.cfi_def_cfa_register %rax
|
||||
pushq %rbx
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_offset %r14,-48
|
||||
movq %rdi,%r8
|
||||
subq $72,%rsp
|
||||
movq %rsi,%r9
|
||||
andq $-64,%rsp
|
||||
movq %rdx,%r10
|
||||
movq %rax,64(%rsp)
|
||||
.cfi_escape 0x0f,0x06,0x77,0xc0,0x00,0x06,0x23,0x08
|
||||
.Lprologue:
|
||||
|
||||
movl 0(%r8),%esi
|
||||
|
@ -1229,25 +1249,40 @@ sha1_block_data_order:
|
|||
jnz .Lloop
|
||||
|
||||
movq 64(%rsp),%rsi
|
||||
.cfi_def_cfa %rsi,8
|
||||
movq -40(%rsi),%r14
|
||||
.cfi_restore %r14
|
||||
movq -32(%rsi),%r13
|
||||
.cfi_restore %r13
|
||||
movq -24(%rsi),%r12
|
||||
.cfi_restore %r12
|
||||
movq -16(%rsi),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq -8(%rsi),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq (%rsi),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lepilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size sha1_block_data_order,.-sha1_block_data_order
|
||||
.type sha1_block_data_order_ssse3,@function
|
||||
.align 16
|
||||
sha1_block_data_order_ssse3:
|
||||
_ssse3_shortcut:
|
||||
.cfi_startproc
|
||||
movq %rsp,%r11
|
||||
.cfi_def_cfa_register %r11
|
||||
pushq %rbx
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_offset %r14,-48
|
||||
leaq -64(%rsp),%rsp
|
||||
andq $-64,%rsp
|
||||
movq %rdi,%r8
|
||||
|
@ -2404,24 +2439,38 @@ _ssse3_shortcut:
|
|||
movl %edx,12(%r8)
|
||||
movl %ebp,16(%r8)
|
||||
movq -40(%r11),%r14
|
||||
.cfi_restore %r14
|
||||
movq -32(%r11),%r13
|
||||
.cfi_restore %r13
|
||||
movq -24(%r11),%r12
|
||||
.cfi_restore %r12
|
||||
movq -16(%r11),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq -8(%r11),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq (%r11),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lepilogue_ssse3:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
|
||||
.type sha1_block_data_order_avx,@function
|
||||
.align 16
|
||||
sha1_block_data_order_avx:
|
||||
_avx_shortcut:
|
||||
.cfi_startproc
|
||||
movq %rsp,%r11
|
||||
.cfi_def_cfa_register %r11
|
||||
pushq %rbx
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_offset %r14,-48
|
||||
leaq -64(%rsp),%rsp
|
||||
vzeroupper
|
||||
andq $-64,%rsp
|
||||
|
@ -3518,13 +3567,20 @@ _avx_shortcut:
|
|||
movl %edx,12(%r8)
|
||||
movl %ebp,16(%r8)
|
||||
movq -40(%r11),%r14
|
||||
.cfi_restore %r14
|
||||
movq -32(%r11),%r13
|
||||
.cfi_restore %r13
|
||||
movq -24(%r11),%r12
|
||||
.cfi_restore %r12
|
||||
movq -16(%r11),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq -8(%r11),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq (%r11),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lepilogue_avx:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size sha1_block_data_order_avx,.-sha1_block_data_order_avx
|
||||
.align 64
|
||||
K_XX_XX:
|
||||
|
@ -3542,3 +3598,4 @@ K_XX_XX:
|
|||
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,16 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
.extern OPENSSL_ia32cap_P
|
||||
|
@ -8,6 +20,7 @@
|
|||
.type sha256_block_data_order,@function
|
||||
.align 16
|
||||
sha256_block_data_order:
|
||||
.cfi_startproc
|
||||
leaq OPENSSL_ia32cap_P(%rip),%r11
|
||||
movl 0(%r11),%r9d
|
||||
movl 4(%r11),%r10d
|
||||
|
@ -20,12 +33,19 @@ sha256_block_data_order:
|
|||
testl $512,%r10d
|
||||
jnz .Lssse3_shortcut
|
||||
movq %rsp,%rax
|
||||
.cfi_def_cfa_register %rax
|
||||
pushq %rbx
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_offset %r15,-56
|
||||
shlq $4,%rdx
|
||||
subq $64+32,%rsp
|
||||
leaq (%rsi,%rdx,4),%rdx
|
||||
|
@ -33,7 +53,8 @@ sha256_block_data_order:
|
|||
movq %rdi,64+0(%rsp)
|
||||
movq %rsi,64+8(%rsp)
|
||||
movq %rdx,64+16(%rsp)
|
||||
movq %rax,64+24(%rsp)
|
||||
movq %rax,88(%rsp)
|
||||
.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
|
||||
.Lprologue:
|
||||
|
||||
movl 0(%rdi),%eax
|
||||
|
@ -1697,16 +1718,25 @@ sha256_block_data_order:
|
|||
movl %r11d,28(%rdi)
|
||||
jb .Lloop
|
||||
|
||||
movq 64+24(%rsp),%rsi
|
||||
movq 88(%rsp),%rsi
|
||||
.cfi_def_cfa %rsi,8
|
||||
movq -48(%rsi),%r15
|
||||
.cfi_restore %r15
|
||||
movq -40(%rsi),%r14
|
||||
.cfi_restore %r14
|
||||
movq -32(%rsi),%r13
|
||||
.cfi_restore %r13
|
||||
movq -24(%rsi),%r12
|
||||
.cfi_restore %r12
|
||||
movq -16(%rsi),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq -8(%rsi),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq (%rsi),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lepilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size sha256_block_data_order,.-sha256_block_data_order
|
||||
.align 64
|
||||
.type K256,@object
|
||||
|
@ -1754,14 +1784,22 @@ K256:
|
|||
.type sha256_block_data_order_ssse3,@function
|
||||
.align 64
|
||||
sha256_block_data_order_ssse3:
|
||||
.cfi_startproc
|
||||
.Lssse3_shortcut:
|
||||
movq %rsp,%rax
|
||||
.cfi_def_cfa_register %rax
|
||||
pushq %rbx
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_offset %r15,-56
|
||||
shlq $4,%rdx
|
||||
subq $96,%rsp
|
||||
leaq (%rsi,%rdx,4),%rdx
|
||||
|
@ -1769,7 +1807,8 @@ sha256_block_data_order_ssse3:
|
|||
movq %rdi,64+0(%rsp)
|
||||
movq %rsi,64+8(%rsp)
|
||||
movq %rdx,64+16(%rsp)
|
||||
movq %rax,64+24(%rsp)
|
||||
movq %rax,88(%rsp)
|
||||
.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
|
||||
.Lprologue_ssse3:
|
||||
|
||||
movl 0(%rdi),%eax
|
||||
|
@ -2835,28 +2874,45 @@ sha256_block_data_order_ssse3:
|
|||
movl %r11d,28(%rdi)
|
||||
jb .Lloop_ssse3
|
||||
|
||||
movq 64+24(%rsp),%rsi
|
||||
movq 88(%rsp),%rsi
|
||||
.cfi_def_cfa %rsi,8
|
||||
movq -48(%rsi),%r15
|
||||
.cfi_restore %r15
|
||||
movq -40(%rsi),%r14
|
||||
.cfi_restore %r14
|
||||
movq -32(%rsi),%r13
|
||||
.cfi_restore %r13
|
||||
movq -24(%rsi),%r12
|
||||
.cfi_restore %r12
|
||||
movq -16(%rsi),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq -8(%rsi),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq (%rsi),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lepilogue_ssse3:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3
|
||||
.type sha256_block_data_order_avx,@function
|
||||
.align 64
|
||||
sha256_block_data_order_avx:
|
||||
.cfi_startproc
|
||||
.Lavx_shortcut:
|
||||
movq %rsp,%rax
|
||||
.cfi_def_cfa_register %rax
|
||||
pushq %rbx
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_offset %r15,-56
|
||||
shlq $4,%rdx
|
||||
subq $96,%rsp
|
||||
leaq (%rsi,%rdx,4),%rdx
|
||||
|
@ -2864,7 +2920,8 @@ sha256_block_data_order_avx:
|
|||
movq %rdi,64+0(%rsp)
|
||||
movq %rsi,64+8(%rsp)
|
||||
movq %rdx,64+16(%rsp)
|
||||
movq %rax,64+24(%rsp)
|
||||
movq %rax,88(%rsp)
|
||||
.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
|
||||
.Lprologue_avx:
|
||||
|
||||
vzeroupper
|
||||
|
@ -3891,16 +3948,26 @@ sha256_block_data_order_avx:
|
|||
movl %r11d,28(%rdi)
|
||||
jb .Lloop_avx
|
||||
|
||||
movq 64+24(%rsp),%rsi
|
||||
movq 88(%rsp),%rsi
|
||||
.cfi_def_cfa %rsi,8
|
||||
vzeroupper
|
||||
movq -48(%rsi),%r15
|
||||
.cfi_restore %r15
|
||||
movq -40(%rsi),%r14
|
||||
.cfi_restore %r14
|
||||
movq -32(%rsi),%r13
|
||||
.cfi_restore %r13
|
||||
movq -24(%rsi),%r12
|
||||
.cfi_restore %r12
|
||||
movq -16(%rsi),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq -8(%rsi),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq (%rsi),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lepilogue_avx:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size sha256_block_data_order_avx,.-sha256_block_data_order_avx
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,16 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
|
||||
|
@ -19,6 +31,7 @@
|
|||
.type _vpaes_encrypt_core,@function
|
||||
.align 16
|
||||
_vpaes_encrypt_core:
|
||||
.cfi_startproc
|
||||
movq %rdx,%r9
|
||||
movq $16,%r11
|
||||
movl 240(%rdx),%eax
|
||||
|
@ -99,6 +112,7 @@ _vpaes_encrypt_core:
|
|||
pxor %xmm4,%xmm0
|
||||
.byte 102,15,56,0,193
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
|
||||
|
||||
|
||||
|
@ -106,9 +120,185 @@ _vpaes_encrypt_core:
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.type _vpaes_encrypt_core_2x,@function
|
||||
.align 16
|
||||
_vpaes_encrypt_core_2x:
|
||||
.cfi_startproc
|
||||
movq %rdx,%r9
|
||||
movq $16,%r11
|
||||
movl 240(%rdx),%eax
|
||||
movdqa %xmm9,%xmm1
|
||||
movdqa %xmm9,%xmm7
|
||||
movdqa .Lk_ipt(%rip),%xmm2
|
||||
movdqa %xmm2,%xmm8
|
||||
pandn %xmm0,%xmm1
|
||||
pandn %xmm6,%xmm7
|
||||
movdqu (%r9),%xmm5
|
||||
|
||||
psrld $4,%xmm1
|
||||
psrld $4,%xmm7
|
||||
pand %xmm9,%xmm0
|
||||
pand %xmm9,%xmm6
|
||||
.byte 102,15,56,0,208
|
||||
.byte 102,68,15,56,0,198
|
||||
movdqa .Lk_ipt+16(%rip),%xmm0
|
||||
movdqa %xmm0,%xmm6
|
||||
.byte 102,15,56,0,193
|
||||
.byte 102,15,56,0,247
|
||||
pxor %xmm5,%xmm2
|
||||
pxor %xmm5,%xmm8
|
||||
addq $16,%r9
|
||||
pxor %xmm2,%xmm0
|
||||
pxor %xmm8,%xmm6
|
||||
leaq .Lk_mc_backward(%rip),%r10
|
||||
jmp .Lenc2x_entry
|
||||
|
||||
.align 16
|
||||
.Lenc2x_loop:
|
||||
|
||||
movdqa .Lk_sb1(%rip),%xmm4
|
||||
movdqa .Lk_sb1+16(%rip),%xmm0
|
||||
movdqa %xmm4,%xmm12
|
||||
movdqa %xmm0,%xmm6
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,69,15,56,0,224
|
||||
.byte 102,15,56,0,195
|
||||
.byte 102,65,15,56,0,243
|
||||
pxor %xmm5,%xmm4
|
||||
pxor %xmm5,%xmm12
|
||||
movdqa .Lk_sb2(%rip),%xmm5
|
||||
movdqa %xmm5,%xmm13
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm12,%xmm6
|
||||
movdqa -64(%r11,%r10,1),%xmm1
|
||||
|
||||
.byte 102,15,56,0,234
|
||||
.byte 102,69,15,56,0,232
|
||||
movdqa (%r11,%r10,1),%xmm4
|
||||
|
||||
movdqa .Lk_sb2+16(%rip),%xmm2
|
||||
movdqa %xmm2,%xmm8
|
||||
.byte 102,15,56,0,211
|
||||
.byte 102,69,15,56,0,195
|
||||
movdqa %xmm0,%xmm3
|
||||
movdqa %xmm6,%xmm11
|
||||
pxor %xmm5,%xmm2
|
||||
pxor %xmm13,%xmm8
|
||||
.byte 102,15,56,0,193
|
||||
.byte 102,15,56,0,241
|
||||
addq $16,%r9
|
||||
pxor %xmm2,%xmm0
|
||||
pxor %xmm8,%xmm6
|
||||
.byte 102,15,56,0,220
|
||||
.byte 102,68,15,56,0,220
|
||||
addq $16,%r11
|
||||
pxor %xmm0,%xmm3
|
||||
pxor %xmm6,%xmm11
|
||||
.byte 102,15,56,0,193
|
||||
.byte 102,15,56,0,241
|
||||
andq $0x30,%r11
|
||||
subq $1,%rax
|
||||
pxor %xmm3,%xmm0
|
||||
pxor %xmm11,%xmm6
|
||||
|
||||
.Lenc2x_entry:
|
||||
|
||||
movdqa %xmm9,%xmm1
|
||||
movdqa %xmm9,%xmm7
|
||||
movdqa .Lk_inv+16(%rip),%xmm5
|
||||
movdqa %xmm5,%xmm13
|
||||
pandn %xmm0,%xmm1
|
||||
pandn %xmm6,%xmm7
|
||||
psrld $4,%xmm1
|
||||
psrld $4,%xmm7
|
||||
pand %xmm9,%xmm0
|
||||
pand %xmm9,%xmm6
|
||||
.byte 102,15,56,0,232
|
||||
.byte 102,68,15,56,0,238
|
||||
movdqa %xmm10,%xmm3
|
||||
movdqa %xmm10,%xmm11
|
||||
pxor %xmm1,%xmm0
|
||||
pxor %xmm7,%xmm6
|
||||
.byte 102,15,56,0,217
|
||||
.byte 102,68,15,56,0,223
|
||||
movdqa %xmm10,%xmm4
|
||||
movdqa %xmm10,%xmm12
|
||||
pxor %xmm5,%xmm3
|
||||
pxor %xmm13,%xmm11
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,68,15,56,0,230
|
||||
movdqa %xmm10,%xmm2
|
||||
movdqa %xmm10,%xmm8
|
||||
pxor %xmm5,%xmm4
|
||||
pxor %xmm13,%xmm12
|
||||
.byte 102,15,56,0,211
|
||||
.byte 102,69,15,56,0,195
|
||||
movdqa %xmm10,%xmm3
|
||||
movdqa %xmm10,%xmm11
|
||||
pxor %xmm0,%xmm2
|
||||
pxor %xmm6,%xmm8
|
||||
.byte 102,15,56,0,220
|
||||
.byte 102,69,15,56,0,220
|
||||
movdqu (%r9),%xmm5
|
||||
|
||||
pxor %xmm1,%xmm3
|
||||
pxor %xmm7,%xmm11
|
||||
jnz .Lenc2x_loop
|
||||
|
||||
|
||||
movdqa -96(%r10),%xmm4
|
||||
movdqa -80(%r10),%xmm0
|
||||
movdqa %xmm4,%xmm12
|
||||
movdqa %xmm0,%xmm6
|
||||
.byte 102,15,56,0,226
|
||||
.byte 102,69,15,56,0,224
|
||||
pxor %xmm5,%xmm4
|
||||
pxor %xmm5,%xmm12
|
||||
.byte 102,15,56,0,195
|
||||
.byte 102,65,15,56,0,243
|
||||
movdqa 64(%r11,%r10,1),%xmm1
|
||||
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm12,%xmm6
|
||||
.byte 102,15,56,0,193
|
||||
.byte 102,15,56,0,241
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_encrypt_core_2x,.-_vpaes_encrypt_core_2x
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.type _vpaes_decrypt_core,@function
|
||||
.align 16
|
||||
_vpaes_decrypt_core:
|
||||
.cfi_startproc
|
||||
movq %rdx,%r9
|
||||
movl 240(%rdx),%eax
|
||||
movdqa %xmm9,%xmm1
|
||||
|
@ -205,6 +395,7 @@ _vpaes_decrypt_core:
|
|||
pxor %xmm4,%xmm0
|
||||
.byte 102,15,56,0,194
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
|
||||
|
||||
|
||||
|
@ -215,6 +406,7 @@ _vpaes_decrypt_core:
|
|||
.type _vpaes_schedule_core,@function
|
||||
.align 16
|
||||
_vpaes_schedule_core:
|
||||
.cfi_startproc
|
||||
|
||||
|
||||
|
||||
|
@ -381,6 +573,7 @@ _vpaes_schedule_core:
|
|||
pxor %xmm6,%xmm6
|
||||
pxor %xmm7,%xmm7
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_schedule_core,.-_vpaes_schedule_core
|
||||
|
||||
|
||||
|
@ -400,6 +593,7 @@ _vpaes_schedule_core:
|
|||
.type _vpaes_schedule_192_smear,@function
|
||||
.align 16
|
||||
_vpaes_schedule_192_smear:
|
||||
.cfi_startproc
|
||||
pshufd $0x80,%xmm6,%xmm1
|
||||
pshufd $0xFE,%xmm7,%xmm0
|
||||
pxor %xmm1,%xmm6
|
||||
|
@ -408,6 +602,7 @@ _vpaes_schedule_192_smear:
|
|||
movdqa %xmm6,%xmm0
|
||||
movhlps %xmm1,%xmm6
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
|
||||
|
||||
|
||||
|
@ -431,6 +626,7 @@ _vpaes_schedule_192_smear:
|
|||
.type _vpaes_schedule_round,@function
|
||||
.align 16
|
||||
_vpaes_schedule_round:
|
||||
.cfi_startproc
|
||||
|
||||
pxor %xmm1,%xmm1
|
||||
.byte 102,65,15,58,15,200,15
|
||||
|
@ -484,6 +680,7 @@ _vpaes_schedule_low_round:
|
|||
pxor %xmm7,%xmm0
|
||||
movdqa %xmm0,%xmm7
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_schedule_round,.-_vpaes_schedule_round
|
||||
|
||||
|
||||
|
@ -498,6 +695,7 @@ _vpaes_schedule_low_round:
|
|||
.type _vpaes_schedule_transform,@function
|
||||
.align 16
|
||||
_vpaes_schedule_transform:
|
||||
.cfi_startproc
|
||||
movdqa %xmm9,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
|
@ -508,6 +706,7 @@ _vpaes_schedule_transform:
|
|||
.byte 102,15,56,0,193
|
||||
pxor %xmm2,%xmm0
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
|
||||
|
||||
|
||||
|
@ -536,6 +735,7 @@ _vpaes_schedule_transform:
|
|||
.type _vpaes_schedule_mangle,@function
|
||||
.align 16
|
||||
_vpaes_schedule_mangle:
|
||||
.cfi_startproc
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa .Lk_mc_forward(%rip),%xmm5
|
||||
testq %rcx,%rcx
|
||||
|
@ -600,6 +800,7 @@ _vpaes_schedule_mangle:
|
|||
andq $0x30,%r8
|
||||
movdqu %xmm3,(%rdx)
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
|
||||
|
||||
|
||||
|
@ -610,6 +811,13 @@ _vpaes_schedule_mangle:
|
|||
.type vpaes_set_encrypt_key,@function
|
||||
.align 16
|
||||
vpaes_set_encrypt_key:
|
||||
.cfi_startproc
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
.extern BORINGSSL_function_hit
|
||||
.hidden BORINGSSL_function_hit
|
||||
movb $1,BORINGSSL_function_hit+5(%rip)
|
||||
#endif
|
||||
|
||||
movl %esi,%eax
|
||||
shrl $5,%eax
|
||||
addl $5,%eax
|
||||
|
@ -620,6 +828,7 @@ vpaes_set_encrypt_key:
|
|||
call _vpaes_schedule_core
|
||||
xorl %eax,%eax
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
|
||||
|
||||
.globl vpaes_set_decrypt_key
|
||||
|
@ -627,6 +836,7 @@ vpaes_set_encrypt_key:
|
|||
.type vpaes_set_decrypt_key,@function
|
||||
.align 16
|
||||
vpaes_set_decrypt_key:
|
||||
.cfi_startproc
|
||||
movl %esi,%eax
|
||||
shrl $5,%eax
|
||||
addl $5,%eax
|
||||
|
@ -642,6 +852,7 @@ vpaes_set_decrypt_key:
|
|||
call _vpaes_schedule_core
|
||||
xorl %eax,%eax
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
|
||||
|
||||
.globl vpaes_encrypt
|
||||
|
@ -649,11 +860,18 @@ vpaes_set_decrypt_key:
|
|||
.type vpaes_encrypt,@function
|
||||
.align 16
|
||||
vpaes_encrypt:
|
||||
.cfi_startproc
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
.extern BORINGSSL_function_hit
|
||||
.hidden BORINGSSL_function_hit
|
||||
movb $1,BORINGSSL_function_hit+4(%rip)
|
||||
#endif
|
||||
movdqu (%rdi),%xmm0
|
||||
call _vpaes_preheat
|
||||
call _vpaes_encrypt_core
|
||||
movdqu %xmm0,(%rsi)
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size vpaes_encrypt,.-vpaes_encrypt
|
||||
|
||||
.globl vpaes_decrypt
|
||||
|
@ -661,17 +879,20 @@ vpaes_encrypt:
|
|||
.type vpaes_decrypt,@function
|
||||
.align 16
|
||||
vpaes_decrypt:
|
||||
.cfi_startproc
|
||||
movdqu (%rdi),%xmm0
|
||||
call _vpaes_preheat
|
||||
call _vpaes_decrypt_core
|
||||
movdqu %xmm0,(%rsi)
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size vpaes_decrypt,.-vpaes_decrypt
|
||||
.globl vpaes_cbc_encrypt
|
||||
.hidden vpaes_cbc_encrypt
|
||||
.type vpaes_cbc_encrypt,@function
|
||||
.align 16
|
||||
vpaes_cbc_encrypt:
|
||||
.cfi_startproc
|
||||
xchgq %rcx,%rdx
|
||||
subq $16,%rcx
|
||||
jc .Lcbc_abort
|
||||
|
@ -707,7 +928,71 @@ vpaes_cbc_encrypt:
|
|||
movdqu %xmm6,(%r8)
|
||||
.Lcbc_abort:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
|
||||
.globl vpaes_ctr32_encrypt_blocks
|
||||
.hidden vpaes_ctr32_encrypt_blocks
|
||||
.type vpaes_ctr32_encrypt_blocks,@function
|
||||
.align 16
|
||||
vpaes_ctr32_encrypt_blocks:
|
||||
.cfi_startproc
|
||||
|
||||
xchgq %rcx,%rdx
|
||||
testq %rcx,%rcx
|
||||
jz .Lctr32_abort
|
||||
movdqu (%r8),%xmm0
|
||||
movdqa .Lctr_add_one(%rip),%xmm8
|
||||
subq %rdi,%rsi
|
||||
call _vpaes_preheat
|
||||
movdqa %xmm0,%xmm6
|
||||
pshufb .Lrev_ctr(%rip),%xmm6
|
||||
|
||||
testq $1,%rcx
|
||||
jz .Lctr32_prep_loop
|
||||
|
||||
|
||||
|
||||
movdqu (%rdi),%xmm7
|
||||
call _vpaes_encrypt_core
|
||||
pxor %xmm7,%xmm0
|
||||
paddd %xmm8,%xmm6
|
||||
movdqu %xmm0,(%rsi,%rdi,1)
|
||||
subq $1,%rcx
|
||||
leaq 16(%rdi),%rdi
|
||||
jz .Lctr32_done
|
||||
|
||||
.Lctr32_prep_loop:
|
||||
|
||||
|
||||
movdqa %xmm6,%xmm14
|
||||
movdqa %xmm6,%xmm15
|
||||
paddd %xmm8,%xmm15
|
||||
|
||||
.Lctr32_loop:
|
||||
movdqa .Lrev_ctr(%rip),%xmm1
|
||||
movdqa %xmm14,%xmm0
|
||||
movdqa %xmm15,%xmm6
|
||||
.byte 102,15,56,0,193
|
||||
.byte 102,15,56,0,241
|
||||
call _vpaes_encrypt_core_2x
|
||||
movdqu (%rdi),%xmm1
|
||||
movdqu 16(%rdi),%xmm2
|
||||
movdqa .Lctr_add_two(%rip),%xmm3
|
||||
pxor %xmm1,%xmm0
|
||||
pxor %xmm2,%xmm6
|
||||
paddd %xmm3,%xmm14
|
||||
paddd %xmm3,%xmm15
|
||||
movdqu %xmm0,(%rsi,%rdi,1)
|
||||
movdqu %xmm6,16(%rsi,%rdi,1)
|
||||
subq $2,%rcx
|
||||
leaq 32(%rdi),%rdi
|
||||
jnz .Lctr32_loop
|
||||
|
||||
.Lctr32_done:
|
||||
.Lctr32_abort:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
|
||||
|
||||
|
||||
|
||||
|
@ -717,6 +1002,7 @@ vpaes_cbc_encrypt:
|
|||
.type _vpaes_preheat,@function
|
||||
.align 16
|
||||
_vpaes_preheat:
|
||||
.cfi_startproc
|
||||
leaq .Lk_s0F(%rip),%r10
|
||||
movdqa -32(%r10),%xmm10
|
||||
movdqa -16(%r10),%xmm11
|
||||
|
@ -726,6 +1012,7 @@ _vpaes_preheat:
|
|||
movdqa 80(%r10),%xmm15
|
||||
movdqa 96(%r10),%xmm14
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size _vpaes_preheat,.-_vpaes_preheat
|
||||
|
||||
|
||||
|
@ -828,7 +1115,19 @@ _vpaes_consts:
|
|||
.Lk_dsbo:
|
||||
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
|
||||
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
|
||||
|
||||
|
||||
.Lrev_ctr:
|
||||
.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
|
||||
|
||||
|
||||
.Lctr_add_one:
|
||||
.quad 0x0000000000000000, 0x0000000100000000
|
||||
.Lctr_add_two:
|
||||
.quad 0x0000000000000000, 0x0000000200000000
|
||||
|
||||
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
|
||||
.align 64
|
||||
.size _vpaes_consts,.-_vpaes_consts
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,4 +1,16 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
.extern OPENSSL_ia32cap_P
|
||||
|
@ -17,6 +29,8 @@ bn_mul_mont:
|
|||
jnz .Lmul_enter
|
||||
cmpl $8,%r9d
|
||||
jb .Lmul_enter
|
||||
leaq OPENSSL_ia32cap_P(%rip),%r11
|
||||
movl 8(%r11),%r11d
|
||||
cmpq %rsi,%rdx
|
||||
jne .Lmul4x_enter
|
||||
testl $7,%r9d
|
||||
|
@ -208,31 +222,30 @@ bn_mul_mont:
|
|||
|
||||
xorq %r14,%r14
|
||||
movq (%rsp),%rax
|
||||
leaq (%rsp),%rsi
|
||||
movq %r9,%r15
|
||||
jmp .Lsub
|
||||
|
||||
.align 16
|
||||
.Lsub:
|
||||
sbbq (%rcx,%r14,8),%rax
|
||||
.Lsub: sbbq (%rcx,%r14,8),%rax
|
||||
movq %rax,(%rdi,%r14,8)
|
||||
movq 8(%rsi,%r14,8),%rax
|
||||
movq 8(%rsp,%r14,8),%rax
|
||||
leaq 1(%r14),%r14
|
||||
decq %r15
|
||||
jnz .Lsub
|
||||
|
||||
sbbq $0,%rax
|
||||
movq $-1,%rbx
|
||||
xorq %rax,%rbx
|
||||
xorq %r14,%r14
|
||||
andq %rax,%rsi
|
||||
notq %rax
|
||||
movq %rdi,%rcx
|
||||
andq %rax,%rcx
|
||||
movq %r9,%r15
|
||||
orq %rcx,%rsi
|
||||
.align 16
|
||||
|
||||
.Lcopy:
|
||||
movq (%rsi,%r14,8),%rax
|
||||
movq %r14,(%rsp,%r14,8)
|
||||
movq %rax,(%rdi,%r14,8)
|
||||
movq (%rdi,%r14,8),%rcx
|
||||
movq (%rsp,%r14,8),%rdx
|
||||
andq %rbx,%rcx
|
||||
andq %rax,%rdx
|
||||
movq %r9,(%rsp,%r14,8)
|
||||
orq %rcx,%rdx
|
||||
movq %rdx,(%rdi,%r14,8)
|
||||
leaq 1(%r14),%r14
|
||||
subq $1,%r15
|
||||
jnz .Lcopy
|
||||
|
@ -266,6 +279,9 @@ bn_mul4x_mont:
|
|||
movq %rsp,%rax
|
||||
.cfi_def_cfa_register %rax
|
||||
.Lmul4x_enter:
|
||||
andl $0x80100,%r11d
|
||||
cmpl $0x80100,%r11d
|
||||
je .Lmulx4x_enter
|
||||
pushq %rbx
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
|
@ -603,7 +619,6 @@ bn_mul4x_mont:
|
|||
movq 16(%rsp,%r9,8),%rdi
|
||||
leaq -4(%r9),%r15
|
||||
movq 0(%rsp),%rax
|
||||
pxor %xmm0,%xmm0
|
||||
movq 8(%rsp),%rdx
|
||||
shrq $2,%r15
|
||||
leaq (%rsp),%rsi
|
||||
|
@ -613,8 +628,7 @@ bn_mul4x_mont:
|
|||
movq 16(%rsi),%rbx
|
||||
movq 24(%rsi),%rbp
|
||||
sbbq 8(%rcx),%rdx
|
||||
jmp .Lsub4x
|
||||
.align 16
|
||||
|
||||
.Lsub4x:
|
||||
movq %rax,0(%rdi,%r14,8)
|
||||
movq %rdx,8(%rdi,%r14,8)
|
||||
|
@ -641,34 +655,35 @@ bn_mul4x_mont:
|
|||
|
||||
sbbq $0,%rax
|
||||
movq %rbp,24(%rdi,%r14,8)
|
||||
xorq %r14,%r14
|
||||
andq %rax,%rsi
|
||||
notq %rax
|
||||
movq %rdi,%rcx
|
||||
andq %rax,%rcx
|
||||
leaq -4(%r9),%r15
|
||||
orq %rcx,%rsi
|
||||
pxor %xmm0,%xmm0
|
||||
.byte 102,72,15,110,224
|
||||
pcmpeqd %xmm5,%xmm5
|
||||
pshufd $0,%xmm4,%xmm4
|
||||
movq %r9,%r15
|
||||
pxor %xmm4,%xmm5
|
||||
shrq $2,%r15
|
||||
xorl %eax,%eax
|
||||
|
||||
movdqu (%rsi),%xmm1
|
||||
movdqa %xmm0,(%rsp)
|
||||
movdqu %xmm1,(%rdi)
|
||||
jmp .Lcopy4x
|
||||
.align 16
|
||||
.Lcopy4x:
|
||||
movdqu 16(%rsi,%r14,1),%xmm2
|
||||
movdqu 32(%rsi,%r14,1),%xmm1
|
||||
movdqa %xmm0,16(%rsp,%r14,1)
|
||||
movdqu %xmm2,16(%rdi,%r14,1)
|
||||
movdqa %xmm0,32(%rsp,%r14,1)
|
||||
movdqu %xmm1,32(%rdi,%r14,1)
|
||||
leaq 32(%r14),%r14
|
||||
movdqa (%rsp,%rax,1),%xmm1
|
||||
movdqu (%rdi,%rax,1),%xmm2
|
||||
pand %xmm4,%xmm1
|
||||
pand %xmm5,%xmm2
|
||||
movdqa 16(%rsp,%rax,1),%xmm3
|
||||
movdqa %xmm0,(%rsp,%rax,1)
|
||||
por %xmm2,%xmm1
|
||||
movdqu 16(%rdi,%rax,1),%xmm2
|
||||
movdqu %xmm1,(%rdi,%rax,1)
|
||||
pand %xmm4,%xmm3
|
||||
pand %xmm5,%xmm2
|
||||
movdqa %xmm0,16(%rsp,%rax,1)
|
||||
por %xmm2,%xmm3
|
||||
movdqu %xmm3,16(%rdi,%rax,1)
|
||||
leaq 32(%rax),%rax
|
||||
decq %r15
|
||||
jnz .Lcopy4x
|
||||
|
||||
movdqu 16(%rsi,%r14,1),%xmm2
|
||||
movdqa %xmm0,16(%rsp,%r14,1)
|
||||
movdqu %xmm2,16(%rdi,%r14,1)
|
||||
movq 8(%rsp,%r9,8),%rsi
|
||||
.cfi_def_cfa %rsi, 8
|
||||
movq $1,%rax
|
||||
|
@ -690,6 +705,8 @@ bn_mul4x_mont:
|
|||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size bn_mul4x_mont,.-bn_mul4x_mont
|
||||
.extern bn_sqrx8x_internal
|
||||
.hidden bn_sqrx8x_internal
|
||||
.extern bn_sqr8x_internal
|
||||
.hidden bn_sqr8x_internal
|
||||
|
||||
|
@ -774,6 +791,26 @@ bn_sqr8x_mont:
|
|||
pxor %xmm0,%xmm0
|
||||
.byte 102,72,15,110,207
|
||||
.byte 102,73,15,110,218
|
||||
leaq OPENSSL_ia32cap_P(%rip),%rax
|
||||
movl 8(%rax),%eax
|
||||
andl $0x80100,%eax
|
||||
cmpl $0x80100,%eax
|
||||
jne .Lsqr8x_nox
|
||||
|
||||
call bn_sqrx8x_internal
|
||||
|
||||
|
||||
|
||||
|
||||
leaq (%r8,%rcx,1),%rbx
|
||||
movq %rcx,%r9
|
||||
movq %rcx,%rdx
|
||||
.byte 102,72,15,126,207
|
||||
sarq $3+2,%rcx
|
||||
jmp .Lsqr8x_sub
|
||||
|
||||
.align 32
|
||||
.Lsqr8x_nox:
|
||||
call bn_sqr8x_internal
|
||||
|
||||
|
||||
|
@ -861,6 +898,363 @@ bn_sqr8x_mont:
|
|||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size bn_sqr8x_mont,.-bn_sqr8x_mont
|
||||
.type bn_mulx4x_mont,@function
|
||||
.align 32
|
||||
bn_mulx4x_mont:
|
||||
.cfi_startproc
|
||||
movq %rsp,%rax
|
||||
.cfi_def_cfa_register %rax
|
||||
.Lmulx4x_enter:
|
||||
pushq %rbx
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_offset %r15,-56
|
||||
.Lmulx4x_prologue:
|
||||
|
||||
shll $3,%r9d
|
||||
xorq %r10,%r10
|
||||
subq %r9,%r10
|
||||
movq (%r8),%r8
|
||||
leaq -72(%rsp,%r10,1),%rbp
|
||||
andq $-128,%rbp
|
||||
movq %rsp,%r11
|
||||
subq %rbp,%r11
|
||||
andq $-4096,%r11
|
||||
leaq (%r11,%rbp,1),%rsp
|
||||
movq (%rsp),%r10
|
||||
cmpq %rbp,%rsp
|
||||
ja .Lmulx4x_page_walk
|
||||
jmp .Lmulx4x_page_walk_done
|
||||
|
||||
.align 16
|
||||
.Lmulx4x_page_walk:
|
||||
leaq -4096(%rsp),%rsp
|
||||
movq (%rsp),%r10
|
||||
cmpq %rbp,%rsp
|
||||
ja .Lmulx4x_page_walk
|
||||
.Lmulx4x_page_walk_done:
|
||||
|
||||
leaq (%rdx,%r9,1),%r10
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
movq %r9,0(%rsp)
|
||||
shrq $5,%r9
|
||||
movq %r10,16(%rsp)
|
||||
subq $1,%r9
|
||||
movq %r8,24(%rsp)
|
||||
movq %rdi,32(%rsp)
|
||||
movq %rax,40(%rsp)
|
||||
.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
|
||||
movq %r9,48(%rsp)
|
||||
jmp .Lmulx4x_body
|
||||
|
||||
.align 32
|
||||
.Lmulx4x_body:
|
||||
leaq 8(%rdx),%rdi
|
||||
movq (%rdx),%rdx
|
||||
leaq 64+32(%rsp),%rbx
|
||||
movq %rdx,%r9
|
||||
|
||||
mulxq 0(%rsi),%r8,%rax
|
||||
mulxq 8(%rsi),%r11,%r14
|
||||
addq %rax,%r11
|
||||
movq %rdi,8(%rsp)
|
||||
mulxq 16(%rsi),%r12,%r13
|
||||
adcq %r14,%r12
|
||||
adcq $0,%r13
|
||||
|
||||
movq %r8,%rdi
|
||||
imulq 24(%rsp),%r8
|
||||
xorq %rbp,%rbp
|
||||
|
||||
mulxq 24(%rsi),%rax,%r14
|
||||
movq %r8,%rdx
|
||||
leaq 32(%rsi),%rsi
|
||||
adcxq %rax,%r13
|
||||
adcxq %rbp,%r14
|
||||
|
||||
mulxq 0(%rcx),%rax,%r10
|
||||
adcxq %rax,%rdi
|
||||
adoxq %r11,%r10
|
||||
mulxq 8(%rcx),%rax,%r11
|
||||
adcxq %rax,%r10
|
||||
adoxq %r12,%r11
|
||||
.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00
|
||||
movq 48(%rsp),%rdi
|
||||
movq %r10,-32(%rbx)
|
||||
adcxq %rax,%r11
|
||||
adoxq %r13,%r12
|
||||
mulxq 24(%rcx),%rax,%r15
|
||||
movq %r9,%rdx
|
||||
movq %r11,-24(%rbx)
|
||||
adcxq %rax,%r12
|
||||
adoxq %rbp,%r15
|
||||
leaq 32(%rcx),%rcx
|
||||
movq %r12,-16(%rbx)
|
||||
|
||||
jmp .Lmulx4x_1st
|
||||
|
||||
.align 32
|
||||
.Lmulx4x_1st:
|
||||
adcxq %rbp,%r15
|
||||
mulxq 0(%rsi),%r10,%rax
|
||||
adcxq %r14,%r10
|
||||
mulxq 8(%rsi),%r11,%r14
|
||||
adcxq %rax,%r11
|
||||
mulxq 16(%rsi),%r12,%rax
|
||||
adcxq %r14,%r12
|
||||
mulxq 24(%rsi),%r13,%r14
|
||||
.byte 0x67,0x67
|
||||
movq %r8,%rdx
|
||||
adcxq %rax,%r13
|
||||
adcxq %rbp,%r14
|
||||
leaq 32(%rsi),%rsi
|
||||
leaq 32(%rbx),%rbx
|
||||
|
||||
adoxq %r15,%r10
|
||||
mulxq 0(%rcx),%rax,%r15
|
||||
adcxq %rax,%r10
|
||||
adoxq %r15,%r11
|
||||
mulxq 8(%rcx),%rax,%r15
|
||||
adcxq %rax,%r11
|
||||
adoxq %r15,%r12
|
||||
mulxq 16(%rcx),%rax,%r15
|
||||
movq %r10,-40(%rbx)
|
||||
adcxq %rax,%r12
|
||||
movq %r11,-32(%rbx)
|
||||
adoxq %r15,%r13
|
||||
mulxq 24(%rcx),%rax,%r15
|
||||
movq %r9,%rdx
|
||||
movq %r12,-24(%rbx)
|
||||
adcxq %rax,%r13
|
||||
adoxq %rbp,%r15
|
||||
leaq 32(%rcx),%rcx
|
||||
movq %r13,-16(%rbx)
|
||||
|
||||
decq %rdi
|
||||
jnz .Lmulx4x_1st
|
||||
|
||||
movq 0(%rsp),%rax
|
||||
movq 8(%rsp),%rdi
|
||||
adcq %rbp,%r15
|
||||
addq %r15,%r14
|
||||
sbbq %r15,%r15
|
||||
movq %r14,-8(%rbx)
|
||||
jmp .Lmulx4x_outer
|
||||
|
||||
.align 32
|
||||
.Lmulx4x_outer:
|
||||
movq (%rdi),%rdx
|
||||
leaq 8(%rdi),%rdi
|
||||
subq %rax,%rsi
|
||||
movq %r15,(%rbx)
|
||||
leaq 64+32(%rsp),%rbx
|
||||
subq %rax,%rcx
|
||||
|
||||
mulxq 0(%rsi),%r8,%r11
|
||||
xorl %ebp,%ebp
|
||||
movq %rdx,%r9
|
||||
mulxq 8(%rsi),%r14,%r12
|
||||
adoxq -32(%rbx),%r8
|
||||
adcxq %r14,%r11
|
||||
mulxq 16(%rsi),%r15,%r13
|
||||
adoxq -24(%rbx),%r11
|
||||
adcxq %r15,%r12
|
||||
adoxq -16(%rbx),%r12
|
||||
adcxq %rbp,%r13
|
||||
adoxq %rbp,%r13
|
||||
|
||||
movq %rdi,8(%rsp)
|
||||
movq %r8,%r15
|
||||
imulq 24(%rsp),%r8
|
||||
xorl %ebp,%ebp
|
||||
|
||||
mulxq 24(%rsi),%rax,%r14
|
||||
movq %r8,%rdx
|
||||
adcxq %rax,%r13
|
||||
adoxq -8(%rbx),%r13
|
||||
adcxq %rbp,%r14
|
||||
leaq 32(%rsi),%rsi
|
||||
adoxq %rbp,%r14
|
||||
|
||||
mulxq 0(%rcx),%rax,%r10
|
||||
adcxq %rax,%r15
|
||||
adoxq %r11,%r10
|
||||
mulxq 8(%rcx),%rax,%r11
|
||||
adcxq %rax,%r10
|
||||
adoxq %r12,%r11
|
||||
mulxq 16(%rcx),%rax,%r12
|
||||
movq %r10,-32(%rbx)
|
||||
adcxq %rax,%r11
|
||||
adoxq %r13,%r12
|
||||
mulxq 24(%rcx),%rax,%r15
|
||||
movq %r9,%rdx
|
||||
movq %r11,-24(%rbx)
|
||||
leaq 32(%rcx),%rcx
|
||||
adcxq %rax,%r12
|
||||
adoxq %rbp,%r15
|
||||
movq 48(%rsp),%rdi
|
||||
movq %r12,-16(%rbx)
|
||||
|
||||
jmp .Lmulx4x_inner
|
||||
|
||||
.align 32
|
||||
.Lmulx4x_inner:
|
||||
mulxq 0(%rsi),%r10,%rax
|
||||
adcxq %rbp,%r15
|
||||
adoxq %r14,%r10
|
||||
mulxq 8(%rsi),%r11,%r14
|
||||
adcxq 0(%rbx),%r10
|
||||
adoxq %rax,%r11
|
||||
mulxq 16(%rsi),%r12,%rax
|
||||
adcxq 8(%rbx),%r11
|
||||
adoxq %r14,%r12
|
||||
mulxq 24(%rsi),%r13,%r14
|
||||
movq %r8,%rdx
|
||||
adcxq 16(%rbx),%r12
|
||||
adoxq %rax,%r13
|
||||
adcxq 24(%rbx),%r13
|
||||
adoxq %rbp,%r14
|
||||
leaq 32(%rsi),%rsi
|
||||
leaq 32(%rbx),%rbx
|
||||
adcxq %rbp,%r14
|
||||
|
||||
adoxq %r15,%r10
|
||||
mulxq 0(%rcx),%rax,%r15
|
||||
adcxq %rax,%r10
|
||||
adoxq %r15,%r11
|
||||
mulxq 8(%rcx),%rax,%r15
|
||||
adcxq %rax,%r11
|
||||
adoxq %r15,%r12
|
||||
mulxq 16(%rcx),%rax,%r15
|
||||
movq %r10,-40(%rbx)
|
||||
adcxq %rax,%r12
|
||||
adoxq %r15,%r13
|
||||
mulxq 24(%rcx),%rax,%r15
|
||||
movq %r9,%rdx
|
||||
movq %r11,-32(%rbx)
|
||||
movq %r12,-24(%rbx)
|
||||
adcxq %rax,%r13
|
||||
adoxq %rbp,%r15
|
||||
leaq 32(%rcx),%rcx
|
||||
movq %r13,-16(%rbx)
|
||||
|
||||
decq %rdi
|
||||
jnz .Lmulx4x_inner
|
||||
|
||||
movq 0(%rsp),%rax
|
||||
movq 8(%rsp),%rdi
|
||||
adcq %rbp,%r15
|
||||
subq 0(%rbx),%rbp
|
||||
adcq %r15,%r14
|
||||
sbbq %r15,%r15
|
||||
movq %r14,-8(%rbx)
|
||||
|
||||
cmpq 16(%rsp),%rdi
|
||||
jne .Lmulx4x_outer
|
||||
|
||||
leaq 64(%rsp),%rbx
|
||||
subq %rax,%rcx
|
||||
negq %r15
|
||||
movq %rax,%rdx
|
||||
shrq $3+2,%rax
|
||||
movq 32(%rsp),%rdi
|
||||
jmp .Lmulx4x_sub
|
||||
|
||||
.align 32
|
||||
.Lmulx4x_sub:
|
||||
movq 0(%rbx),%r11
|
||||
movq 8(%rbx),%r12
|
||||
movq 16(%rbx),%r13
|
||||
movq 24(%rbx),%r14
|
||||
leaq 32(%rbx),%rbx
|
||||
sbbq 0(%rcx),%r11
|
||||
sbbq 8(%rcx),%r12
|
||||
sbbq 16(%rcx),%r13
|
||||
sbbq 24(%rcx),%r14
|
||||
leaq 32(%rcx),%rcx
|
||||
movq %r11,0(%rdi)
|
||||
movq %r12,8(%rdi)
|
||||
movq %r13,16(%rdi)
|
||||
movq %r14,24(%rdi)
|
||||
leaq 32(%rdi),%rdi
|
||||
decq %rax
|
||||
jnz .Lmulx4x_sub
|
||||
|
||||
sbbq $0,%r15
|
||||
leaq 64(%rsp),%rbx
|
||||
subq %rdx,%rdi
|
||||
|
||||
.byte 102,73,15,110,207
|
||||
pxor %xmm0,%xmm0
|
||||
pshufd $0,%xmm1,%xmm1
|
||||
movq 40(%rsp),%rsi
|
||||
.cfi_def_cfa %rsi,8
|
||||
jmp .Lmulx4x_cond_copy
|
||||
|
||||
.align 32
|
||||
.Lmulx4x_cond_copy:
|
||||
movdqa 0(%rbx),%xmm2
|
||||
movdqa 16(%rbx),%xmm3
|
||||
leaq 32(%rbx),%rbx
|
||||
movdqu 0(%rdi),%xmm4
|
||||
movdqu 16(%rdi),%xmm5
|
||||
leaq 32(%rdi),%rdi
|
||||
movdqa %xmm0,-32(%rbx)
|
||||
movdqa %xmm0,-16(%rbx)
|
||||
pcmpeqd %xmm1,%xmm0
|
||||
pand %xmm1,%xmm2
|
||||
pand %xmm1,%xmm3
|
||||
pand %xmm0,%xmm4
|
||||
pand %xmm0,%xmm5
|
||||
pxor %xmm0,%xmm0
|
||||
por %xmm2,%xmm4
|
||||
por %xmm3,%xmm5
|
||||
movdqu %xmm4,-32(%rdi)
|
||||
movdqu %xmm5,-16(%rdi)
|
||||
subq $32,%rdx
|
||||
jnz .Lmulx4x_cond_copy
|
||||
|
||||
movq %rdx,(%rbx)
|
||||
|
||||
movq $1,%rax
|
||||
movq -48(%rsi),%r15
|
||||
.cfi_restore %r15
|
||||
movq -40(%rsi),%r14
|
||||
.cfi_restore %r14
|
||||
movq -32(%rsi),%r13
|
||||
.cfi_restore %r13
|
||||
movq -24(%rsi),%r12
|
||||
.cfi_restore %r12
|
||||
movq -16(%rsi),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq -8(%rsi),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq (%rsi),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lmulx4x_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size bn_mulx4x_mont,.-bn_mulx4x_mont
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 16
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
File diff suppressed because it is too large
Load Diff
518
packager/third_party/boringssl/linux-x86_64/crypto/test/trampoline-x86_64.S
vendored
Normal file
518
packager/third_party/boringssl/linux-x86_64/crypto/test/trampoline-x86_64.S
vendored
Normal file
|
@ -0,0 +1,518 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.type abi_test_trampoline, @function
|
||||
.globl abi_test_trampoline
|
||||
.hidden abi_test_trampoline
|
||||
.align 16
|
||||
abi_test_trampoline:
|
||||
.Labi_test_trampoline_seh_begin:
|
||||
.cfi_startproc
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
subq $120,%rsp
|
||||
.cfi_adjust_cfa_offset 120
|
||||
.Labi_test_trampoline_seh_prolog_alloc:
|
||||
movq %r8,48(%rsp)
|
||||
movq %rbx,64(%rsp)
|
||||
.cfi_offset rbx, -64
|
||||
.Labi_test_trampoline_seh_prolog_rbx:
|
||||
movq %rbp,72(%rsp)
|
||||
.cfi_offset rbp, -56
|
||||
.Labi_test_trampoline_seh_prolog_rbp:
|
||||
movq %r12,80(%rsp)
|
||||
.cfi_offset r12, -48
|
||||
.Labi_test_trampoline_seh_prolog_r12:
|
||||
movq %r13,88(%rsp)
|
||||
.cfi_offset r13, -40
|
||||
.Labi_test_trampoline_seh_prolog_r13:
|
||||
movq %r14,96(%rsp)
|
||||
.cfi_offset r14, -32
|
||||
.Labi_test_trampoline_seh_prolog_r14:
|
||||
movq %r15,104(%rsp)
|
||||
.cfi_offset r15, -24
|
||||
.Labi_test_trampoline_seh_prolog_r15:
|
||||
.Labi_test_trampoline_seh_prolog_end:
|
||||
movq 0(%rsi),%rbx
|
||||
movq 8(%rsi),%rbp
|
||||
movq 16(%rsi),%r12
|
||||
movq 24(%rsi),%r13
|
||||
movq 32(%rsi),%r14
|
||||
movq 40(%rsi),%r15
|
||||
|
||||
movq %rdi,32(%rsp)
|
||||
movq %rsi,40(%rsp)
|
||||
|
||||
|
||||
|
||||
|
||||
movq %rdx,%r10
|
||||
movq %rcx,%r11
|
||||
decq %r11
|
||||
js .Largs_done
|
||||
movq (%r10),%rdi
|
||||
addq $8,%r10
|
||||
decq %r11
|
||||
js .Largs_done
|
||||
movq (%r10),%rsi
|
||||
addq $8,%r10
|
||||
decq %r11
|
||||
js .Largs_done
|
||||
movq (%r10),%rdx
|
||||
addq $8,%r10
|
||||
decq %r11
|
||||
js .Largs_done
|
||||
movq (%r10),%rcx
|
||||
addq $8,%r10
|
||||
decq %r11
|
||||
js .Largs_done
|
||||
movq (%r10),%r8
|
||||
addq $8,%r10
|
||||
decq %r11
|
||||
js .Largs_done
|
||||
movq (%r10),%r9
|
||||
addq $8,%r10
|
||||
leaq 0(%rsp),%rax
|
||||
.Largs_loop:
|
||||
decq %r11
|
||||
js .Largs_done
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
movq %r11,56(%rsp)
|
||||
movq (%r10),%r11
|
||||
movq %r11,(%rax)
|
||||
movq 56(%rsp),%r11
|
||||
|
||||
addq $8,%r10
|
||||
addq $8,%rax
|
||||
jmp .Largs_loop
|
||||
|
||||
.Largs_done:
|
||||
movq 32(%rsp),%rax
|
||||
movq 48(%rsp),%r10
|
||||
testq %r10,%r10
|
||||
jz .Lno_unwind
|
||||
|
||||
|
||||
pushfq
|
||||
orq $0x100,0(%rsp)
|
||||
popfq
|
||||
|
||||
|
||||
|
||||
nop
|
||||
.globl abi_test_unwind_start
|
||||
.hidden abi_test_unwind_start
|
||||
abi_test_unwind_start:
|
||||
|
||||
call *%rax
|
||||
.globl abi_test_unwind_return
|
||||
.hidden abi_test_unwind_return
|
||||
abi_test_unwind_return:
|
||||
|
||||
|
||||
|
||||
|
||||
pushfq
|
||||
andq $-0x101,0(%rsp)
|
||||
popfq
|
||||
.globl abi_test_unwind_stop
|
||||
.hidden abi_test_unwind_stop
|
||||
abi_test_unwind_stop:
|
||||
|
||||
jmp .Lcall_done
|
||||
|
||||
.Lno_unwind:
|
||||
call *%rax
|
||||
|
||||
.Lcall_done:
|
||||
|
||||
movq 40(%rsp),%rsi
|
||||
movq %rbx,0(%rsi)
|
||||
movq %rbp,8(%rsi)
|
||||
movq %r12,16(%rsi)
|
||||
movq %r13,24(%rsi)
|
||||
movq %r14,32(%rsi)
|
||||
movq %r15,40(%rsi)
|
||||
movq 64(%rsp),%rbx
|
||||
.cfi_restore rbx
|
||||
movq 72(%rsp),%rbp
|
||||
.cfi_restore rbp
|
||||
movq 80(%rsp),%r12
|
||||
.cfi_restore r12
|
||||
movq 88(%rsp),%r13
|
||||
.cfi_restore r13
|
||||
movq 96(%rsp),%r14
|
||||
.cfi_restore r14
|
||||
movq 104(%rsp),%r15
|
||||
.cfi_restore r15
|
||||
addq $120,%rsp
|
||||
.cfi_adjust_cfa_offset -120
|
||||
|
||||
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.Labi_test_trampoline_seh_end:
|
||||
.size abi_test_trampoline,.-abi_test_trampoline
|
||||
.type abi_test_clobber_rax, @function
|
||||
.globl abi_test_clobber_rax
|
||||
.hidden abi_test_clobber_rax
|
||||
.align 16
|
||||
abi_test_clobber_rax:
|
||||
xorq %rax,%rax
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_rax,.-abi_test_clobber_rax
|
||||
.type abi_test_clobber_rbx, @function
|
||||
.globl abi_test_clobber_rbx
|
||||
.hidden abi_test_clobber_rbx
|
||||
.align 16
|
||||
abi_test_clobber_rbx:
|
||||
xorq %rbx,%rbx
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_rbx,.-abi_test_clobber_rbx
|
||||
.type abi_test_clobber_rcx, @function
|
||||
.globl abi_test_clobber_rcx
|
||||
.hidden abi_test_clobber_rcx
|
||||
.align 16
|
||||
abi_test_clobber_rcx:
|
||||
xorq %rcx,%rcx
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_rcx,.-abi_test_clobber_rcx
|
||||
.type abi_test_clobber_rdx, @function
|
||||
.globl abi_test_clobber_rdx
|
||||
.hidden abi_test_clobber_rdx
|
||||
.align 16
|
||||
abi_test_clobber_rdx:
|
||||
xorq %rdx,%rdx
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_rdx,.-abi_test_clobber_rdx
|
||||
.type abi_test_clobber_rdi, @function
|
||||
.globl abi_test_clobber_rdi
|
||||
.hidden abi_test_clobber_rdi
|
||||
.align 16
|
||||
abi_test_clobber_rdi:
|
||||
xorq %rdi,%rdi
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_rdi,.-abi_test_clobber_rdi
|
||||
.type abi_test_clobber_rsi, @function
|
||||
.globl abi_test_clobber_rsi
|
||||
.hidden abi_test_clobber_rsi
|
||||
.align 16
|
||||
abi_test_clobber_rsi:
|
||||
xorq %rsi,%rsi
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_rsi,.-abi_test_clobber_rsi
|
||||
.type abi_test_clobber_rbp, @function
|
||||
.globl abi_test_clobber_rbp
|
||||
.hidden abi_test_clobber_rbp
|
||||
.align 16
|
||||
abi_test_clobber_rbp:
|
||||
xorq %rbp,%rbp
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_rbp,.-abi_test_clobber_rbp
|
||||
.type abi_test_clobber_r8, @function
|
||||
.globl abi_test_clobber_r8
|
||||
.hidden abi_test_clobber_r8
|
||||
.align 16
|
||||
abi_test_clobber_r8:
|
||||
xorq %r8,%r8
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_r8,.-abi_test_clobber_r8
|
||||
.type abi_test_clobber_r9, @function
|
||||
.globl abi_test_clobber_r9
|
||||
.hidden abi_test_clobber_r9
|
||||
.align 16
|
||||
abi_test_clobber_r9:
|
||||
xorq %r9,%r9
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_r9,.-abi_test_clobber_r9
|
||||
.type abi_test_clobber_r10, @function
|
||||
.globl abi_test_clobber_r10
|
||||
.hidden abi_test_clobber_r10
|
||||
.align 16
|
||||
abi_test_clobber_r10:
|
||||
xorq %r10,%r10
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_r10,.-abi_test_clobber_r10
|
||||
.type abi_test_clobber_r11, @function
|
||||
.globl abi_test_clobber_r11
|
||||
.hidden abi_test_clobber_r11
|
||||
.align 16
|
||||
abi_test_clobber_r11:
|
||||
xorq %r11,%r11
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_r11,.-abi_test_clobber_r11
|
||||
.type abi_test_clobber_r12, @function
|
||||
.globl abi_test_clobber_r12
|
||||
.hidden abi_test_clobber_r12
|
||||
.align 16
|
||||
abi_test_clobber_r12:
|
||||
xorq %r12,%r12
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_r12,.-abi_test_clobber_r12
|
||||
.type abi_test_clobber_r13, @function
|
||||
.globl abi_test_clobber_r13
|
||||
.hidden abi_test_clobber_r13
|
||||
.align 16
|
||||
abi_test_clobber_r13:
|
||||
xorq %r13,%r13
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_r13,.-abi_test_clobber_r13
|
||||
.type abi_test_clobber_r14, @function
|
||||
.globl abi_test_clobber_r14
|
||||
.hidden abi_test_clobber_r14
|
||||
.align 16
|
||||
abi_test_clobber_r14:
|
||||
xorq %r14,%r14
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_r14,.-abi_test_clobber_r14
|
||||
.type abi_test_clobber_r15, @function
|
||||
.globl abi_test_clobber_r15
|
||||
.hidden abi_test_clobber_r15
|
||||
.align 16
|
||||
abi_test_clobber_r15:
|
||||
xorq %r15,%r15
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_r15,.-abi_test_clobber_r15
|
||||
.type abi_test_clobber_xmm0, @function
|
||||
.globl abi_test_clobber_xmm0
|
||||
.hidden abi_test_clobber_xmm0
|
||||
.align 16
|
||||
abi_test_clobber_xmm0:
|
||||
pxor %xmm0,%xmm0
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm0,.-abi_test_clobber_xmm0
|
||||
.type abi_test_clobber_xmm1, @function
|
||||
.globl abi_test_clobber_xmm1
|
||||
.hidden abi_test_clobber_xmm1
|
||||
.align 16
|
||||
abi_test_clobber_xmm1:
|
||||
pxor %xmm1,%xmm1
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm1,.-abi_test_clobber_xmm1
|
||||
.type abi_test_clobber_xmm2, @function
|
||||
.globl abi_test_clobber_xmm2
|
||||
.hidden abi_test_clobber_xmm2
|
||||
.align 16
|
||||
abi_test_clobber_xmm2:
|
||||
pxor %xmm2,%xmm2
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm2,.-abi_test_clobber_xmm2
|
||||
.type abi_test_clobber_xmm3, @function
|
||||
.globl abi_test_clobber_xmm3
|
||||
.hidden abi_test_clobber_xmm3
|
||||
.align 16
|
||||
abi_test_clobber_xmm3:
|
||||
pxor %xmm3,%xmm3
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm3,.-abi_test_clobber_xmm3
|
||||
.type abi_test_clobber_xmm4, @function
|
||||
.globl abi_test_clobber_xmm4
|
||||
.hidden abi_test_clobber_xmm4
|
||||
.align 16
|
||||
abi_test_clobber_xmm4:
|
||||
pxor %xmm4,%xmm4
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm4,.-abi_test_clobber_xmm4
|
||||
.type abi_test_clobber_xmm5, @function
|
||||
.globl abi_test_clobber_xmm5
|
||||
.hidden abi_test_clobber_xmm5
|
||||
.align 16
|
||||
abi_test_clobber_xmm5:
|
||||
pxor %xmm5,%xmm5
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm5,.-abi_test_clobber_xmm5
|
||||
.type abi_test_clobber_xmm6, @function
|
||||
.globl abi_test_clobber_xmm6
|
||||
.hidden abi_test_clobber_xmm6
|
||||
.align 16
|
||||
abi_test_clobber_xmm6:
|
||||
pxor %xmm6,%xmm6
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm6,.-abi_test_clobber_xmm6
|
||||
.type abi_test_clobber_xmm7, @function
|
||||
.globl abi_test_clobber_xmm7
|
||||
.hidden abi_test_clobber_xmm7
|
||||
.align 16
|
||||
abi_test_clobber_xmm7:
|
||||
pxor %xmm7,%xmm7
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm7,.-abi_test_clobber_xmm7
|
||||
.type abi_test_clobber_xmm8, @function
|
||||
.globl abi_test_clobber_xmm8
|
||||
.hidden abi_test_clobber_xmm8
|
||||
.align 16
|
||||
abi_test_clobber_xmm8:
|
||||
pxor %xmm8,%xmm8
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm8,.-abi_test_clobber_xmm8
|
||||
.type abi_test_clobber_xmm9, @function
|
||||
.globl abi_test_clobber_xmm9
|
||||
.hidden abi_test_clobber_xmm9
|
||||
.align 16
|
||||
abi_test_clobber_xmm9:
|
||||
pxor %xmm9,%xmm9
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm9,.-abi_test_clobber_xmm9
|
||||
.type abi_test_clobber_xmm10, @function
|
||||
.globl abi_test_clobber_xmm10
|
||||
.hidden abi_test_clobber_xmm10
|
||||
.align 16
|
||||
abi_test_clobber_xmm10:
|
||||
pxor %xmm10,%xmm10
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm10,.-abi_test_clobber_xmm10
|
||||
.type abi_test_clobber_xmm11, @function
|
||||
.globl abi_test_clobber_xmm11
|
||||
.hidden abi_test_clobber_xmm11
|
||||
.align 16
|
||||
abi_test_clobber_xmm11:
|
||||
pxor %xmm11,%xmm11
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm11,.-abi_test_clobber_xmm11
|
||||
.type abi_test_clobber_xmm12, @function
|
||||
.globl abi_test_clobber_xmm12
|
||||
.hidden abi_test_clobber_xmm12
|
||||
.align 16
|
||||
abi_test_clobber_xmm12:
|
||||
pxor %xmm12,%xmm12
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm12,.-abi_test_clobber_xmm12
|
||||
.type abi_test_clobber_xmm13, @function
|
||||
.globl abi_test_clobber_xmm13
|
||||
.hidden abi_test_clobber_xmm13
|
||||
.align 16
|
||||
abi_test_clobber_xmm13:
|
||||
pxor %xmm13,%xmm13
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm13,.-abi_test_clobber_xmm13
|
||||
.type abi_test_clobber_xmm14, @function
|
||||
.globl abi_test_clobber_xmm14
|
||||
.hidden abi_test_clobber_xmm14
|
||||
.align 16
|
||||
abi_test_clobber_xmm14:
|
||||
pxor %xmm14,%xmm14
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm14,.-abi_test_clobber_xmm14
|
||||
.type abi_test_clobber_xmm15, @function
|
||||
.globl abi_test_clobber_xmm15
|
||||
.hidden abi_test_clobber_xmm15
|
||||
.align 16
|
||||
abi_test_clobber_xmm15:
|
||||
pxor %xmm15,%xmm15
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_clobber_xmm15,.-abi_test_clobber_xmm15
|
||||
|
||||
|
||||
|
||||
.type abi_test_bad_unwind_wrong_register, @function
|
||||
.globl abi_test_bad_unwind_wrong_register
|
||||
.hidden abi_test_bad_unwind_wrong_register
|
||||
.align 16
|
||||
abi_test_bad_unwind_wrong_register:
|
||||
.cfi_startproc
|
||||
.Labi_test_bad_unwind_wrong_register_seh_begin:
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-16
|
||||
.Labi_test_bad_unwind_wrong_register_seh_push_r13:
|
||||
|
||||
|
||||
|
||||
nop
|
||||
popq %r12
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r12
|
||||
.byte 0xf3,0xc3
|
||||
.Labi_test_bad_unwind_wrong_register_seh_end:
|
||||
.cfi_endproc
|
||||
.size abi_test_bad_unwind_wrong_register,.-abi_test_bad_unwind_wrong_register
|
||||
|
||||
|
||||
|
||||
|
||||
.type abi_test_bad_unwind_temporary, @function
|
||||
.globl abi_test_bad_unwind_temporary
|
||||
.hidden abi_test_bad_unwind_temporary
|
||||
.align 16
|
||||
abi_test_bad_unwind_temporary:
|
||||
.cfi_startproc
|
||||
.Labi_test_bad_unwind_temporary_seh_begin:
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-16
|
||||
.Labi_test_bad_unwind_temporary_seh_push_r12:
|
||||
|
||||
movq %r12,%rax
|
||||
incq %rax
|
||||
movq %rax,(%rsp)
|
||||
|
||||
|
||||
|
||||
movq %r12,(%rsp)
|
||||
|
||||
|
||||
popq %r12
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore %r12
|
||||
.byte 0xf3,0xc3
|
||||
.Labi_test_bad_unwind_temporary_seh_end:
|
||||
.cfi_endproc
|
||||
.size abi_test_bad_unwind_temporary,.-abi_test_bad_unwind_temporary
|
||||
|
||||
|
||||
|
||||
|
||||
.type abi_test_set_direction_flag, @function
|
||||
.globl abi_test_get_and_clear_direction_flag
|
||||
.hidden abi_test_get_and_clear_direction_flag
|
||||
abi_test_get_and_clear_direction_flag:
|
||||
pushfq
|
||||
popq %rax
|
||||
andq $0x400,%rax
|
||||
shrq $10,%rax
|
||||
cld
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_get_and_clear_direction_flag,.-abi_test_get_and_clear_direction_flag
|
||||
|
||||
|
||||
|
||||
.type abi_test_set_direction_flag, @function
|
||||
.globl abi_test_set_direction_flag
|
||||
.hidden abi_test_set_direction_flag
|
||||
abi_test_set_direction_flag:
|
||||
std
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_set_direction_flag,.-abi_test_set_direction_flag
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
1871
packager/third_party/boringssl/linux-x86_64/crypto/third_party/sike/asm/fp-x86_64.S
vendored
Normal file
1871
packager/third_party/boringssl/linux-x86_64/crypto/third_party/sike/asm/fp-x86_64.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl _ChaCha20_ctr32
|
||||
.private_extern _ChaCha20_ctr32
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.private_extern __x86_AES_encrypt_compact
|
||||
.align 4
|
||||
|
@ -961,11 +967,11 @@ LAES_Te:
|
|||
.long 16,32,64,128
|
||||
.long 27,54,0,0
|
||||
.long 0,0,0,0
|
||||
.globl _asm_AES_encrypt
|
||||
.private_extern _asm_AES_encrypt
|
||||
.globl _aes_nohw_encrypt
|
||||
.private_extern _aes_nohw_encrypt
|
||||
.align 4
|
||||
_asm_AES_encrypt:
|
||||
L_asm_AES_encrypt_begin:
|
||||
_aes_nohw_encrypt:
|
||||
L_aes_nohw_encrypt_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
|
@ -2145,11 +2151,11 @@ LAES_Td:
|
|||
.byte 200,235,187,60,131,83,153,97
|
||||
.byte 23,43,4,126,186,119,214,38
|
||||
.byte 225,105,20,99,85,33,12,125
|
||||
.globl _asm_AES_decrypt
|
||||
.private_extern _asm_AES_decrypt
|
||||
.globl _aes_nohw_decrypt
|
||||
.private_extern _aes_nohw_decrypt
|
||||
.align 4
|
||||
_asm_AES_decrypt:
|
||||
L_asm_AES_decrypt_begin:
|
||||
_aes_nohw_decrypt:
|
||||
L_aes_nohw_decrypt_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
|
@ -2209,11 +2215,11 @@ L011x86:
|
|||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _asm_AES_cbc_encrypt
|
||||
.private_extern _asm_AES_cbc_encrypt
|
||||
.globl _aes_nohw_cbc_encrypt
|
||||
.private_extern _aes_nohw_cbc_encrypt
|
||||
.align 4
|
||||
_asm_AES_cbc_encrypt:
|
||||
L_asm_AES_cbc_encrypt_begin:
|
||||
_aes_nohw_cbc_encrypt:
|
||||
L_aes_nohw_cbc_encrypt_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
|
@ -2970,18 +2976,18 @@ L045exit:
|
|||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _asm_AES_set_encrypt_key
|
||||
.private_extern _asm_AES_set_encrypt_key
|
||||
.globl _aes_nohw_set_encrypt_key
|
||||
.private_extern _aes_nohw_set_encrypt_key
|
||||
.align 4
|
||||
_asm_AES_set_encrypt_key:
|
||||
L_asm_AES_set_encrypt_key_begin:
|
||||
_aes_nohw_set_encrypt_key:
|
||||
L_aes_nohw_set_encrypt_key_begin:
|
||||
call __x86_AES_set_encrypt_key
|
||||
ret
|
||||
.globl _asm_AES_set_decrypt_key
|
||||
.private_extern _asm_AES_set_decrypt_key
|
||||
.globl _aes_nohw_set_decrypt_key
|
||||
.private_extern _aes_nohw_set_decrypt_key
|
||||
.align 4
|
||||
_asm_AES_set_decrypt_key:
|
||||
L_asm_AES_set_decrypt_key_begin:
|
||||
_aes_nohw_set_decrypt_key:
|
||||
L_aes_nohw_set_decrypt_key_begin:
|
||||
call __x86_AES_set_encrypt_key
|
||||
cmpl $0,%eax
|
||||
je L054proceed
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl _bn_mul_add_words
|
||||
.private_extern _bn_mul_add_words
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl _bn_mul_comba8
|
||||
.private_extern _bn_mul_comba8
|
||||
|
|
|
@ -0,0 +1,289 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl _gcm_gmult_ssse3
|
||||
.private_extern _gcm_gmult_ssse3
|
||||
.align 4
|
||||
_gcm_gmult_ssse3:
|
||||
L_gcm_gmult_ssse3_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
movdqu (%edi),%xmm0
|
||||
call L000pic_point
|
||||
L000pic_point:
|
||||
popl %eax
|
||||
movdqa Lreverse_bytes-L000pic_point(%eax),%xmm7
|
||||
movdqa Llow4_mask-L000pic_point(%eax),%xmm2
|
||||
.byte 102,15,56,0,199
|
||||
movdqa %xmm2,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm2,%xmm0
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $5,%eax
|
||||
L001loop_row_1:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz L001loop_row_1
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $5,%eax
|
||||
L002loop_row_2:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz L002loop_row_2
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $6,%eax
|
||||
L003loop_row_3:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz L003loop_row_3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
.byte 102,15,56,0,215
|
||||
movdqu %xmm2,(%edi)
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _gcm_ghash_ssse3
|
||||
.private_extern _gcm_ghash_ssse3
|
||||
.align 4
|
||||
_gcm_ghash_ssse3:
|
||||
L_gcm_ghash_ssse3_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%edx
|
||||
movl 32(%esp),%ecx
|
||||
movdqu (%edi),%xmm0
|
||||
call L004pic_point
|
||||
L004pic_point:
|
||||
popl %ebx
|
||||
movdqa Lreverse_bytes-L004pic_point(%ebx),%xmm7
|
||||
andl $-16,%ecx
|
||||
.byte 102,15,56,0,199
|
||||
pxor %xmm3,%xmm3
|
||||
L005loop_ghash:
|
||||
movdqa Llow4_mask-L004pic_point(%ebx),%xmm2
|
||||
movdqu (%edx),%xmm1
|
||||
.byte 102,15,56,0,207
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm2,%xmm0
|
||||
pxor %xmm2,%xmm2
|
||||
movl $5,%eax
|
||||
L006loop_row_4:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz L006loop_row_4
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $5,%eax
|
||||
L007loop_row_5:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz L007loop_row_5
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $6,%eax
|
||||
L008loop_row_6:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz L008loop_row_6
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movdqa %xmm2,%xmm0
|
||||
leal -256(%esi),%esi
|
||||
leal 16(%edx),%edx
|
||||
subl $16,%ecx
|
||||
jnz L005loop_ghash
|
||||
.byte 102,15,56,0,199
|
||||
movdqu %xmm0,(%edi)
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.align 4,0x90
|
||||
Lreverse_bytes:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
.align 4,0x90
|
||||
Llow4_mask:
|
||||
.long 252645135,252645135,252645135,252645135
|
||||
#endif
|
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl _gcm_gmult_4bit_mmx
|
||||
.private_extern _gcm_gmult_4bit_mmx
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl _md5_block_asm_data_order
|
||||
.private_extern _md5_block_asm_data_order
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl _sha1_block_data_order
|
||||
.private_extern _sha1_block_data_order
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl _sha256_block_data_order
|
||||
.private_extern _sha256_block_data_order
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue