Roll boringssl to fc9c67599d9bdeb2e0467085133b81a8e28f77a4

Change-Id: I8ff75ede6c5b6a51719163d208c96d6ec0be61bc
This commit is contained in:
KongQun Yang 2017-11-30 11:57:15 -08:00
parent 82e0fd2095
commit 7097270f13
131 changed files with 45468 additions and 16819 deletions

2
DEPS
View File

@ -36,7 +36,7 @@ deps = {
# Make sure the version matches the one in # Make sure the version matches the one in
# src/packager/third_party/boringssl, which contains perl generated files. # src/packager/third_party/boringssl, which contains perl generated files.
"src/packager/third_party/boringssl/src": "src/packager/third_party/boringssl/src":
Var("github") + "/google/boringssl@3cab5572b1fcf5a8f6018529dc30dc8d21b2a4bd", Var("github") + "/google/boringssl@fc9c67599d9bdeb2e0467085133b81a8e28f77a4",
"src/packager/third_party/curl/source": "src/packager/third_party/curl/source":
Var("github") + "/curl/curl@79e63a53bb9598af863b0afe49ad662795faeef4", #7.50.0 Var("github") + "/curl/curl@79e63a53bb9598af863b0afe49ad662795faeef4", #7.50.0

View File

@ -6,11 +6,8 @@
crypto_sources = [ crypto_sources = [
"err_data.c", "err_data.c",
"src/crypto/aes/aes.c",
"src/crypto/aes/mode_wrappers.c",
"src/crypto/asn1/a_bitstr.c", "src/crypto/asn1/a_bitstr.c",
"src/crypto/asn1/a_bool.c", "src/crypto/asn1/a_bool.c",
"src/crypto/asn1/a_bytes.c",
"src/crypto/asn1/a_d2i_fp.c", "src/crypto/asn1/a_d2i_fp.c",
"src/crypto/asn1/a_dup.c", "src/crypto/asn1/a_dup.c",
"src/crypto/asn1/a_enum.c", "src/crypto/asn1/a_enum.c",
@ -27,135 +24,116 @@ crypto_sources = [
"src/crypto/asn1/a_utctm.c", "src/crypto/asn1/a_utctm.c",
"src/crypto/asn1/a_utf8.c", "src/crypto/asn1/a_utf8.c",
"src/crypto/asn1/asn1_lib.c", "src/crypto/asn1/asn1_lib.c",
"src/crypto/asn1/asn1_locl.h",
"src/crypto/asn1/asn1_par.c", "src/crypto/asn1/asn1_par.c",
"src/crypto/asn1/asn_pack.c", "src/crypto/asn1/asn_pack.c",
"src/crypto/asn1/f_enum.c", "src/crypto/asn1/f_enum.c",
"src/crypto/asn1/f_int.c", "src/crypto/asn1/f_int.c",
"src/crypto/asn1/f_string.c", "src/crypto/asn1/f_string.c",
"src/crypto/asn1/t_bitst.c",
"src/crypto/asn1/tasn_dec.c", "src/crypto/asn1/tasn_dec.c",
"src/crypto/asn1/tasn_enc.c", "src/crypto/asn1/tasn_enc.c",
"src/crypto/asn1/tasn_fre.c", "src/crypto/asn1/tasn_fre.c",
"src/crypto/asn1/tasn_new.c", "src/crypto/asn1/tasn_new.c",
"src/crypto/asn1/tasn_typ.c", "src/crypto/asn1/tasn_typ.c",
"src/crypto/asn1/tasn_utl.c", "src/crypto/asn1/tasn_utl.c",
"src/crypto/asn1/x_bignum.c", "src/crypto/asn1/time_support.c",
"src/crypto/asn1/x_long.c",
"src/crypto/base64/base64.c", "src/crypto/base64/base64.c",
"src/crypto/bio/bio.c", "src/crypto/bio/bio.c",
"src/crypto/bio/bio_mem.c", "src/crypto/bio/bio_mem.c",
"src/crypto/bio/buffer.c",
"src/crypto/bio/connect.c", "src/crypto/bio/connect.c",
"src/crypto/bio/fd.c", "src/crypto/bio/fd.c",
"src/crypto/bio/file.c", "src/crypto/bio/file.c",
"src/crypto/bio/hexdump.c", "src/crypto/bio/hexdump.c",
"src/crypto/bio/internal.h",
"src/crypto/bio/pair.c", "src/crypto/bio/pair.c",
"src/crypto/bio/printf.c", "src/crypto/bio/printf.c",
"src/crypto/bio/socket.c", "src/crypto/bio/socket.c",
"src/crypto/bio/socket_helper.c", "src/crypto/bio/socket_helper.c",
"src/crypto/bn/add.c", "src/crypto/bn_extra/bn_asn1.c",
"src/crypto/bn/asm/x86_64-gcc.c", "src/crypto/bn_extra/convert.c",
"src/crypto/bn/bn.c",
"src/crypto/bn/bn_asn1.c",
"src/crypto/bn/cmp.c",
"src/crypto/bn/convert.c",
"src/crypto/bn/ctx.c",
"src/crypto/bn/div.c",
"src/crypto/bn/exponentiation.c",
"src/crypto/bn/gcd.c",
"src/crypto/bn/generic.c",
"src/crypto/bn/kronecker.c",
"src/crypto/bn/montgomery.c",
"src/crypto/bn/mul.c",
"src/crypto/bn/prime.c",
"src/crypto/bn/random.c",
"src/crypto/bn/rsaz_exp.c",
"src/crypto/bn/shift.c",
"src/crypto/bn/sqrt.c",
"src/crypto/buf/buf.c", "src/crypto/buf/buf.c",
"src/crypto/bytestring/asn1_compat.c", "src/crypto/bytestring/asn1_compat.c",
"src/crypto/bytestring/ber.c", "src/crypto/bytestring/ber.c",
"src/crypto/bytestring/cbb.c", "src/crypto/bytestring/cbb.c",
"src/crypto/bytestring/cbs.c", "src/crypto/bytestring/cbs.c",
"src/crypto/bytestring/internal.h",
"src/crypto/chacha/chacha.c", "src/crypto/chacha/chacha.c",
"src/crypto/cipher/aead.c", "src/crypto/cipher_extra/cipher_extra.c",
"src/crypto/cipher/cipher.c", "src/crypto/cipher_extra/derive_key.c",
"src/crypto/cipher/derive_key.c", "src/crypto/cipher_extra/e_aesctrhmac.c",
"src/crypto/cipher/e_aes.c", "src/crypto/cipher_extra/e_aesgcmsiv.c",
"src/crypto/cipher/e_chacha20poly1305.c", "src/crypto/cipher_extra/e_chacha20poly1305.c",
"src/crypto/cipher/e_des.c", "src/crypto/cipher_extra/e_null.c",
"src/crypto/cipher/e_null.c", "src/crypto/cipher_extra/e_rc2.c",
"src/crypto/cipher/e_rc2.c", "src/crypto/cipher_extra/e_rc4.c",
"src/crypto/cipher/e_rc4.c", "src/crypto/cipher_extra/e_ssl3.c",
"src/crypto/cipher/e_ssl3.c", "src/crypto/cipher_extra/e_tls.c",
"src/crypto/cipher/e_tls.c", "src/crypto/cipher_extra/internal.h",
"src/crypto/cipher/tls_cbc.c", "src/crypto/cipher_extra/tls_cbc.c",
"src/crypto/cmac/cmac.c", "src/crypto/cmac/cmac.c",
"src/crypto/conf/conf.c", "src/crypto/conf/conf.c",
"src/crypto/conf/conf_def.h",
"src/crypto/conf/internal.h",
"src/crypto/cpu-aarch64-linux.c", "src/crypto/cpu-aarch64-linux.c",
"src/crypto/cpu-arm-linux.c", "src/crypto/cpu-arm-linux.c",
"src/crypto/cpu-arm.c", "src/crypto/cpu-arm.c",
"src/crypto/cpu-intel.c", "src/crypto/cpu-intel.c",
"src/crypto/cpu-ppc64le.c",
"src/crypto/crypto.c", "src/crypto/crypto.c",
"src/crypto/curve25519/curve25519.c",
"src/crypto/curve25519/spake25519.c", "src/crypto/curve25519/spake25519.c",
"src/crypto/curve25519/x25519-x86_64.c", "src/crypto/curve25519/x25519-x86_64.c",
"src/crypto/des/des.c",
"src/crypto/dh/check.c", "src/crypto/dh/check.c",
"src/crypto/dh/dh.c", "src/crypto/dh/dh.c",
"src/crypto/dh/dh_asn1.c", "src/crypto/dh/dh_asn1.c",
"src/crypto/dh/params.c", "src/crypto/dh/params.c",
"src/crypto/digest/digest.c", "src/crypto/digest_extra/digest_extra.c",
"src/crypto/digest/digests.c",
"src/crypto/dsa/dsa.c", "src/crypto/dsa/dsa.c",
"src/crypto/dsa/dsa_asn1.c", "src/crypto/dsa/dsa_asn1.c",
"src/crypto/ec/ec.c", "src/crypto/ec_extra/ec_asn1.c",
"src/crypto/ec/ec_asn1.c",
"src/crypto/ec/ec_key.c",
"src/crypto/ec/ec_montgomery.c",
"src/crypto/ec/oct.c",
"src/crypto/ec/p224-64.c",
"src/crypto/ec/p256-64.c",
"src/crypto/ec/p256-x86_64.c",
"src/crypto/ec/simple.c",
"src/crypto/ec/util-64.c",
"src/crypto/ec/wnaf.c",
"src/crypto/ecdh/ecdh.c", "src/crypto/ecdh/ecdh.c",
"src/crypto/ecdsa/ecdsa.c", "src/crypto/ecdsa_extra/ecdsa_asn1.c",
"src/crypto/ecdsa/ecdsa_asn1.c",
"src/crypto/engine/engine.c", "src/crypto/engine/engine.c",
"src/crypto/err/err.c", "src/crypto/err/err.c",
"src/crypto/err/internal.h",
"src/crypto/evp/digestsign.c", "src/crypto/evp/digestsign.c",
"src/crypto/evp/evp.c", "src/crypto/evp/evp.c",
"src/crypto/evp/evp_asn1.c", "src/crypto/evp/evp_asn1.c",
"src/crypto/evp/evp_ctx.c", "src/crypto/evp/evp_ctx.c",
"src/crypto/evp/internal.h",
"src/crypto/evp/p_dsa_asn1.c", "src/crypto/evp/p_dsa_asn1.c",
"src/crypto/evp/p_ec.c", "src/crypto/evp/p_ec.c",
"src/crypto/evp/p_ec_asn1.c", "src/crypto/evp/p_ec_asn1.c",
"src/crypto/evp/p_ed25519.c",
"src/crypto/evp/p_ed25519_asn1.c",
"src/crypto/evp/p_rsa.c", "src/crypto/evp/p_rsa.c",
"src/crypto/evp/p_rsa_asn1.c", "src/crypto/evp/p_rsa_asn1.c",
"src/crypto/evp/pbkdf.c", "src/crypto/evp/pbkdf.c",
"src/crypto/evp/print.c", "src/crypto/evp/print.c",
"src/crypto/evp/scrypt.c",
"src/crypto/evp/sign.c", "src/crypto/evp/sign.c",
"src/crypto/ex_data.c", "src/crypto/ex_data.c",
"src/crypto/fipsmodule/aes/internal.h",
"src/crypto/fipsmodule/bcm.c",
"src/crypto/fipsmodule/bn/internal.h",
"src/crypto/fipsmodule/bn/rsaz_exp.h",
"src/crypto/fipsmodule/cipher/internal.h",
"src/crypto/fipsmodule/delocate.h",
"src/crypto/fipsmodule/des/internal.h",
"src/crypto/fipsmodule/digest/internal.h",
"src/crypto/fipsmodule/digest/md32_common.h",
"src/crypto/fipsmodule/ec/internal.h",
"src/crypto/fipsmodule/ec/p256-x86_64-table.h",
"src/crypto/fipsmodule/ec/p256-x86_64.h",
"src/crypto/fipsmodule/is_fips.c",
"src/crypto/fipsmodule/modes/internal.h",
"src/crypto/fipsmodule/rand/internal.h",
"src/crypto/fipsmodule/rsa/internal.h",
"src/crypto/hkdf/hkdf.c", "src/crypto/hkdf/hkdf.c",
"src/crypto/hmac/hmac.c", "src/crypto/internal.h",
"src/crypto/lhash/lhash.c", "src/crypto/lhash/lhash.c",
"src/crypto/md4/md4.c",
"src/crypto/md5/md5.c",
"src/crypto/mem.c", "src/crypto/mem.c",
"src/crypto/modes/cbc.c",
"src/crypto/modes/cfb.c",
"src/crypto/modes/ctr.c",
"src/crypto/modes/gcm.c",
"src/crypto/modes/ofb.c",
"src/crypto/newhope/error_correction.c",
"src/crypto/newhope/newhope.c",
"src/crypto/newhope/ntt.c",
"src/crypto/newhope/poly.c",
"src/crypto/newhope/precomp.c",
"src/crypto/newhope/reduce.c",
"src/crypto/obj/obj.c", "src/crypto/obj/obj.c",
"src/crypto/obj/obj_dat.h",
"src/crypto/obj/obj_xref.c", "src/crypto/obj/obj_xref.c",
"src/crypto/pem/pem_all.c", "src/crypto/pem/pem_all.c",
"src/crypto/pem/pem_info.c", "src/crypto/pem/pem_info.c",
@ -165,34 +143,33 @@ crypto_sources = [
"src/crypto/pem/pem_pkey.c", "src/crypto/pem/pem_pkey.c",
"src/crypto/pem/pem_x509.c", "src/crypto/pem/pem_x509.c",
"src/crypto/pem/pem_xaux.c", "src/crypto/pem/pem_xaux.c",
"src/crypto/pkcs8/p5_pbe.c", "src/crypto/pkcs7/internal.h",
"src/crypto/pkcs7/pkcs7.c",
"src/crypto/pkcs7/pkcs7_x509.c",
"src/crypto/pkcs8/internal.h",
"src/crypto/pkcs8/p5_pbev2.c", "src/crypto/pkcs8/p5_pbev2.c",
"src/crypto/pkcs8/p8_pkey.c",
"src/crypto/pkcs8/pkcs8.c", "src/crypto/pkcs8/pkcs8.c",
"src/crypto/pkcs8/pkcs8_x509.c",
"src/crypto/poly1305/internal.h",
"src/crypto/poly1305/poly1305.c", "src/crypto/poly1305/poly1305.c",
"src/crypto/poly1305/poly1305_arm.c", "src/crypto/poly1305/poly1305_arm.c",
"src/crypto/poly1305/poly1305_vec.c", "src/crypto/poly1305/poly1305_vec.c",
"src/crypto/rand/deterministic.c", "src/crypto/pool/internal.h",
"src/crypto/rand/rand.c", "src/crypto/pool/pool.c",
"src/crypto/rand/urandom.c", "src/crypto/rand_extra/deterministic.c",
"src/crypto/rand/windows.c", "src/crypto/rand_extra/forkunsafe.c",
"src/crypto/rand_extra/fuchsia.c",
"src/crypto/rand_extra/rand_extra.c",
"src/crypto/rand_extra/windows.c",
"src/crypto/rc4/rc4.c", "src/crypto/rc4/rc4.c",
"src/crypto/refcount_c11.c", "src/crypto/refcount_c11.c",
"src/crypto/refcount_lock.c", "src/crypto/refcount_lock.c",
"src/crypto/rsa/blinding.c", "src/crypto/rsa_extra/rsa_asn1.c",
"src/crypto/rsa/padding.c",
"src/crypto/rsa/rsa.c",
"src/crypto/rsa/rsa_asn1.c",
"src/crypto/rsa/rsa_impl.c",
"src/crypto/sha/sha1.c",
"src/crypto/sha/sha256.c",
"src/crypto/sha/sha512.c",
"src/crypto/stack/stack.c", "src/crypto/stack/stack.c",
"src/crypto/thread.c", "src/crypto/thread.c",
"src/crypto/thread_none.c", "src/crypto/thread_none.c",
"src/crypto/thread_pthread.c", "src/crypto/thread_pthread.c",
"src/crypto/thread_win.c", "src/crypto/thread_win.c",
"src/crypto/time_support.c",
"src/crypto/x509/a_digest.c", "src/crypto/x509/a_digest.c",
"src/crypto/x509/a_sign.c", "src/crypto/x509/a_sign.c",
"src/crypto/x509/a_strex.c", "src/crypto/x509/a_strex.c",
@ -201,13 +178,15 @@ crypto_sources = [
"src/crypto/x509/asn1_gen.c", "src/crypto/x509/asn1_gen.c",
"src/crypto/x509/by_dir.c", "src/crypto/x509/by_dir.c",
"src/crypto/x509/by_file.c", "src/crypto/x509/by_file.c",
"src/crypto/x509/charmap.h",
"src/crypto/x509/i2d_pr.c", "src/crypto/x509/i2d_pr.c",
"src/crypto/x509/pkcs7.c", "src/crypto/x509/internal.h",
"src/crypto/x509/rsa_pss.c", "src/crypto/x509/rsa_pss.c",
"src/crypto/x509/t_crl.c", "src/crypto/x509/t_crl.c",
"src/crypto/x509/t_req.c", "src/crypto/x509/t_req.c",
"src/crypto/x509/t_x509.c", "src/crypto/x509/t_x509.c",
"src/crypto/x509/t_x509a.c", "src/crypto/x509/t_x509a.c",
"src/crypto/x509/vpm_int.h",
"src/crypto/x509/x509.c", "src/crypto/x509/x509.c",
"src/crypto/x509/x509_att.c", "src/crypto/x509/x509_att.c",
"src/crypto/x509/x509_cmp.c", "src/crypto/x509/x509_cmp.c",
@ -228,7 +207,6 @@ crypto_sources = [
"src/crypto/x509/x509name.c", "src/crypto/x509/x509name.c",
"src/crypto/x509/x509rset.c", "src/crypto/x509/x509rset.c",
"src/crypto/x509/x509spki.c", "src/crypto/x509/x509spki.c",
"src/crypto/x509/x509type.c",
"src/crypto/x509/x_algor.c", "src/crypto/x509/x_algor.c",
"src/crypto/x509/x_all.c", "src/crypto/x509/x_all.c",
"src/crypto/x509/x_attrib.c", "src/crypto/x509/x_attrib.c",
@ -244,8 +222,10 @@ crypto_sources = [
"src/crypto/x509/x_val.c", "src/crypto/x509/x_val.c",
"src/crypto/x509/x_x509.c", "src/crypto/x509/x_x509.c",
"src/crypto/x509/x_x509a.c", "src/crypto/x509/x_x509a.c",
"src/crypto/x509v3/ext_dat.h",
"src/crypto/x509v3/pcy_cache.c", "src/crypto/x509v3/pcy_cache.c",
"src/crypto/x509v3/pcy_data.c", "src/crypto/x509v3/pcy_data.c",
"src/crypto/x509v3/pcy_int.h",
"src/crypto/x509v3/pcy_lib.c", "src/crypto/x509v3/pcy_lib.c",
"src/crypto/x509v3/pcy_map.c", "src/crypto/x509v3/pcy_map.c",
"src/crypto/x509v3/pcy_node.c", "src/crypto/x509v3/pcy_node.c",
@ -276,184 +256,298 @@ crypto_sources = [
"src/crypto/x509v3/v3_skey.c", "src/crypto/x509v3/v3_skey.c",
"src/crypto/x509v3/v3_sxnet.c", "src/crypto/x509v3/v3_sxnet.c",
"src/crypto/x509v3/v3_utl.c", "src/crypto/x509v3/v3_utl.c",
"src/include/openssl/aead.h",
"src/include/openssl/aes.h",
"src/include/openssl/arm_arch.h",
"src/include/openssl/asn1.h",
"src/include/openssl/asn1_mac.h",
"src/include/openssl/asn1t.h",
"src/include/openssl/base.h",
"src/include/openssl/base64.h",
"src/include/openssl/bio.h",
"src/include/openssl/blowfish.h",
"src/include/openssl/bn.h",
"src/include/openssl/buf.h",
"src/include/openssl/buffer.h",
"src/include/openssl/bytestring.h",
"src/include/openssl/cast.h",
"src/include/openssl/chacha.h",
"src/include/openssl/cipher.h",
"src/include/openssl/cmac.h",
"src/include/openssl/conf.h",
"src/include/openssl/cpu.h",
"src/include/openssl/crypto.h",
"src/include/openssl/curve25519.h",
"src/include/openssl/des.h",
"src/include/openssl/dh.h",
"src/include/openssl/digest.h",
"src/include/openssl/dsa.h",
"src/include/openssl/ec.h",
"src/include/openssl/ec_key.h",
"src/include/openssl/ecdh.h",
"src/include/openssl/ecdsa.h",
"src/include/openssl/engine.h",
"src/include/openssl/err.h",
"src/include/openssl/evp.h",
"src/include/openssl/ex_data.h",
"src/include/openssl/hkdf.h",
"src/include/openssl/hmac.h",
"src/include/openssl/is_boringssl.h",
"src/include/openssl/lhash.h",
"src/include/openssl/lhash_macros.h",
"src/include/openssl/md4.h",
"src/include/openssl/md5.h",
"src/include/openssl/mem.h",
"src/include/openssl/nid.h",
"src/include/openssl/obj.h",
"src/include/openssl/obj_mac.h",
"src/include/openssl/objects.h",
"src/include/openssl/opensslconf.h",
"src/include/openssl/opensslv.h",
"src/include/openssl/ossl_typ.h",
"src/include/openssl/pem.h",
"src/include/openssl/pkcs12.h",
"src/include/openssl/pkcs7.h",
"src/include/openssl/pkcs8.h",
"src/include/openssl/poly1305.h",
"src/include/openssl/pool.h",
"src/include/openssl/rand.h",
"src/include/openssl/rc4.h",
"src/include/openssl/ripemd.h",
"src/include/openssl/rsa.h",
"src/include/openssl/safestack.h",
"src/include/openssl/sha.h",
"src/include/openssl/span.h",
"src/include/openssl/srtp.h",
"src/include/openssl/stack.h",
"src/include/openssl/thread.h",
"src/include/openssl/type_check.h",
"src/include/openssl/x509.h",
"src/include/openssl/x509_vfy.h",
"src/include/openssl/x509v3.h",
"src/third_party/fiat/curve25519.c",
"src/third_party/fiat/internal.h",
] ]
ssl_sources = [ ssl_sources = [
"src/ssl/custom_extensions.c", "src/include/openssl/dtls1.h",
"src/ssl/d1_both.c", "src/include/openssl/ssl.h",
"src/ssl/d1_lib.c", "src/include/openssl/ssl3.h",
"src/ssl/d1_meth.c", "src/include/openssl/tls1.h",
"src/ssl/d1_pkt.c", "src/ssl/bio_ssl.cc",
"src/ssl/d1_srtp.c", "src/ssl/custom_extensions.cc",
"src/ssl/dtls_record.c", "src/ssl/d1_both.cc",
"src/ssl/handshake_client.c", "src/ssl/d1_lib.cc",
"src/ssl/handshake_server.c", "src/ssl/d1_pkt.cc",
"src/ssl/pqueue/pqueue.c", "src/ssl/d1_srtp.cc",
"src/ssl/s3_both.c", "src/ssl/dtls_method.cc",
"src/ssl/s3_enc.c", "src/ssl/dtls_record.cc",
"src/ssl/s3_lib.c", "src/ssl/handshake.cc",
"src/ssl/s3_meth.c", "src/ssl/handshake_client.cc",
"src/ssl/s3_pkt.c", "src/ssl/handshake_server.cc",
"src/ssl/ssl_aead_ctx.c", "src/ssl/internal.h",
"src/ssl/ssl_asn1.c", "src/ssl/s3_both.cc",
"src/ssl/ssl_buffer.c", "src/ssl/s3_lib.cc",
"src/ssl/ssl_cert.c", "src/ssl/s3_pkt.cc",
"src/ssl/ssl_cipher.c", "src/ssl/ssl_aead_ctx.cc",
"src/ssl/ssl_ecdh.c", "src/ssl/ssl_asn1.cc",
"src/ssl/ssl_file.c", "src/ssl/ssl_buffer.cc",
"src/ssl/ssl_lib.c", "src/ssl/ssl_cert.cc",
"src/ssl/ssl_rsa.c", "src/ssl/ssl_cipher.cc",
"src/ssl/ssl_session.c", "src/ssl/ssl_file.cc",
"src/ssl/ssl_stat.c", "src/ssl/ssl_key_share.cc",
"src/ssl/t1_enc.c", "src/ssl/ssl_lib.cc",
"src/ssl/t1_lib.c", "src/ssl/ssl_privkey.cc",
"src/ssl/tls_record.c", "src/ssl/ssl_session.cc",
"src/ssl/ssl_stat.cc",
"src/ssl/ssl_transcript.cc",
"src/ssl/ssl_versions.cc",
"src/ssl/ssl_x509.cc",
"src/ssl/t1_enc.cc",
"src/ssl/t1_lib.cc",
"src/ssl/tls13_both.cc",
"src/ssl/tls13_client.cc",
"src/ssl/tls13_enc.cc",
"src/ssl/tls13_server.cc",
"src/ssl/tls_method.cc",
"src/ssl/tls_record.cc",
]
crypto_sources_ios_aarch64 = [
"ios-aarch64/crypto/chacha/chacha-armv8.S",
"ios-aarch64/crypto/fipsmodule/aesv8-armx64.S",
"ios-aarch64/crypto/fipsmodule/armv8-mont.S",
"ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S",
"ios-aarch64/crypto/fipsmodule/sha1-armv8.S",
"ios-aarch64/crypto/fipsmodule/sha256-armv8.S",
"ios-aarch64/crypto/fipsmodule/sha512-armv8.S",
]
crypto_sources_ios_arm = [
"ios-arm/crypto/chacha/chacha-armv4.S",
"ios-arm/crypto/fipsmodule/aes-armv4.S",
"ios-arm/crypto/fipsmodule/aesv8-armx32.S",
"ios-arm/crypto/fipsmodule/armv4-mont.S",
"ios-arm/crypto/fipsmodule/bsaes-armv7.S",
"ios-arm/crypto/fipsmodule/ghash-armv4.S",
"ios-arm/crypto/fipsmodule/ghashv8-armx32.S",
"ios-arm/crypto/fipsmodule/sha1-armv4-large.S",
"ios-arm/crypto/fipsmodule/sha256-armv4.S",
"ios-arm/crypto/fipsmodule/sha512-armv4.S",
] ]
crypto_sources_linux_aarch64 = [ crypto_sources_linux_aarch64 = [
"linux-aarch64/crypto/aes/aesv8-armx64.S",
"linux-aarch64/crypto/bn/armv8-mont.S",
"linux-aarch64/crypto/chacha/chacha-armv8.S", "linux-aarch64/crypto/chacha/chacha-armv8.S",
"linux-aarch64/crypto/modes/ghashv8-armx64.S", "linux-aarch64/crypto/fipsmodule/aesv8-armx64.S",
"linux-aarch64/crypto/sha/sha1-armv8.S", "linux-aarch64/crypto/fipsmodule/armv8-mont.S",
"linux-aarch64/crypto/sha/sha256-armv8.S", "linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S",
"linux-aarch64/crypto/sha/sha512-armv8.S", "linux-aarch64/crypto/fipsmodule/sha1-armv8.S",
"linux-aarch64/crypto/fipsmodule/sha256-armv8.S",
"linux-aarch64/crypto/fipsmodule/sha512-armv8.S",
] ]
crypto_sources_linux_arm = [ crypto_sources_linux_arm = [
"linux-arm/crypto/aes/aes-armv4.S",
"linux-arm/crypto/aes/aesv8-armx32.S",
"linux-arm/crypto/aes/bsaes-armv7.S",
"linux-arm/crypto/bn/armv4-mont.S",
"linux-arm/crypto/chacha/chacha-armv4.S", "linux-arm/crypto/chacha/chacha-armv4.S",
"linux-arm/crypto/modes/ghash-armv4.S", "linux-arm/crypto/fipsmodule/aes-armv4.S",
"linux-arm/crypto/modes/ghashv8-armx32.S", "linux-arm/crypto/fipsmodule/aesv8-armx32.S",
"linux-arm/crypto/sha/sha1-armv4-large.S", "linux-arm/crypto/fipsmodule/armv4-mont.S",
"linux-arm/crypto/sha/sha256-armv4.S", "linux-arm/crypto/fipsmodule/bsaes-armv7.S",
"linux-arm/crypto/sha/sha512-armv4.S", "linux-arm/crypto/fipsmodule/ghash-armv4.S",
"linux-arm/crypto/fipsmodule/ghashv8-armx32.S",
"linux-arm/crypto/fipsmodule/sha1-armv4-large.S",
"linux-arm/crypto/fipsmodule/sha256-armv4.S",
"linux-arm/crypto/fipsmodule/sha512-armv4.S",
"src/crypto/curve25519/asm/x25519-asm-arm.S", "src/crypto/curve25519/asm/x25519-asm-arm.S",
"src/crypto/poly1305/poly1305_arm_asm.S", "src/crypto/poly1305/poly1305_arm_asm.S",
] ]
crypto_sources_linux_ppc64le = [
"linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S",
"linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S",
]
crypto_sources_linux_x86 = [ crypto_sources_linux_x86 = [
"linux-x86/crypto/aes/aes-586.S",
"linux-x86/crypto/aes/aesni-x86.S",
"linux-x86/crypto/aes/vpaes-x86.S",
"linux-x86/crypto/bn/bn-586.S",
"linux-x86/crypto/bn/co-586.S",
"linux-x86/crypto/bn/x86-mont.S",
"linux-x86/crypto/chacha/chacha-x86.S", "linux-x86/crypto/chacha/chacha-x86.S",
"linux-x86/crypto/md5/md5-586.S", "linux-x86/crypto/fipsmodule/aes-586.S",
"linux-x86/crypto/modes/ghash-x86.S", "linux-x86/crypto/fipsmodule/aesni-x86.S",
"linux-x86/crypto/rc4/rc4-586.S", "linux-x86/crypto/fipsmodule/bn-586.S",
"linux-x86/crypto/sha/sha1-586.S", "linux-x86/crypto/fipsmodule/co-586.S",
"linux-x86/crypto/sha/sha256-586.S", "linux-x86/crypto/fipsmodule/ghash-x86.S",
"linux-x86/crypto/sha/sha512-586.S", "linux-x86/crypto/fipsmodule/md5-586.S",
"linux-x86/crypto/fipsmodule/sha1-586.S",
"linux-x86/crypto/fipsmodule/sha256-586.S",
"linux-x86/crypto/fipsmodule/sha512-586.S",
"linux-x86/crypto/fipsmodule/vpaes-x86.S",
"linux-x86/crypto/fipsmodule/x86-mont.S",
] ]
crypto_sources_linux_x86_64 = [ crypto_sources_linux_x86_64 = [
"linux-x86_64/crypto/aes/aes-x86_64.S",
"linux-x86_64/crypto/aes/aesni-x86_64.S",
"linux-x86_64/crypto/aes/bsaes-x86_64.S",
"linux-x86_64/crypto/aes/vpaes-x86_64.S",
"linux-x86_64/crypto/bn/rsaz-avx2.S",
"linux-x86_64/crypto/bn/rsaz-x86_64.S",
"linux-x86_64/crypto/bn/x86_64-mont.S",
"linux-x86_64/crypto/bn/x86_64-mont5.S",
"linux-x86_64/crypto/chacha/chacha-x86_64.S", "linux-x86_64/crypto/chacha/chacha-x86_64.S",
"linux-x86_64/crypto/ec/p256-x86_64-asm.S", "linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S",
"linux-x86_64/crypto/md5/md5-x86_64.S", "linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S",
"linux-x86_64/crypto/modes/aesni-gcm-x86_64.S", "linux-x86_64/crypto/fipsmodule/aes-x86_64.S",
"linux-x86_64/crypto/modes/ghash-x86_64.S", "linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S",
"linux-x86_64/crypto/rand/rdrand-x86_64.S", "linux-x86_64/crypto/fipsmodule/aesni-x86_64.S",
"linux-x86_64/crypto/rc4/rc4-x86_64.S", "linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S",
"linux-x86_64/crypto/sha/sha1-x86_64.S", "linux-x86_64/crypto/fipsmodule/ghash-x86_64.S",
"linux-x86_64/crypto/sha/sha256-x86_64.S", "linux-x86_64/crypto/fipsmodule/md5-x86_64.S",
"linux-x86_64/crypto/sha/sha512-x86_64.S", "linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S",
"linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S",
"linux-x86_64/crypto/fipsmodule/rsaz-avx2.S",
"linux-x86_64/crypto/fipsmodule/sha1-x86_64.S",
"linux-x86_64/crypto/fipsmodule/sha256-x86_64.S",
"linux-x86_64/crypto/fipsmodule/sha512-x86_64.S",
"linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S",
"linux-x86_64/crypto/fipsmodule/x86_64-mont.S",
"linux-x86_64/crypto/fipsmodule/x86_64-mont5.S",
"src/crypto/curve25519/asm/x25519-asm-x86_64.S", "src/crypto/curve25519/asm/x25519-asm-x86_64.S",
] ]
crypto_sources_mac_x86 = [ crypto_sources_mac_x86 = [
"mac-x86/crypto/aes/aes-586.S",
"mac-x86/crypto/aes/aesni-x86.S",
"mac-x86/crypto/aes/vpaes-x86.S",
"mac-x86/crypto/bn/bn-586.S",
"mac-x86/crypto/bn/co-586.S",
"mac-x86/crypto/bn/x86-mont.S",
"mac-x86/crypto/chacha/chacha-x86.S", "mac-x86/crypto/chacha/chacha-x86.S",
"mac-x86/crypto/md5/md5-586.S", "mac-x86/crypto/fipsmodule/aes-586.S",
"mac-x86/crypto/modes/ghash-x86.S", "mac-x86/crypto/fipsmodule/aesni-x86.S",
"mac-x86/crypto/rc4/rc4-586.S", "mac-x86/crypto/fipsmodule/bn-586.S",
"mac-x86/crypto/sha/sha1-586.S", "mac-x86/crypto/fipsmodule/co-586.S",
"mac-x86/crypto/sha/sha256-586.S", "mac-x86/crypto/fipsmodule/ghash-x86.S",
"mac-x86/crypto/sha/sha512-586.S", "mac-x86/crypto/fipsmodule/md5-586.S",
"mac-x86/crypto/fipsmodule/sha1-586.S",
"mac-x86/crypto/fipsmodule/sha256-586.S",
"mac-x86/crypto/fipsmodule/sha512-586.S",
"mac-x86/crypto/fipsmodule/vpaes-x86.S",
"mac-x86/crypto/fipsmodule/x86-mont.S",
] ]
crypto_sources_mac_x86_64 = [ crypto_sources_mac_x86_64 = [
"mac-x86_64/crypto/aes/aes-x86_64.S",
"mac-x86_64/crypto/aes/aesni-x86_64.S",
"mac-x86_64/crypto/aes/bsaes-x86_64.S",
"mac-x86_64/crypto/aes/vpaes-x86_64.S",
"mac-x86_64/crypto/bn/rsaz-avx2.S",
"mac-x86_64/crypto/bn/rsaz-x86_64.S",
"mac-x86_64/crypto/bn/x86_64-mont.S",
"mac-x86_64/crypto/bn/x86_64-mont5.S",
"mac-x86_64/crypto/chacha/chacha-x86_64.S", "mac-x86_64/crypto/chacha/chacha-x86_64.S",
"mac-x86_64/crypto/ec/p256-x86_64-asm.S", "mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S",
"mac-x86_64/crypto/md5/md5-x86_64.S", "mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S",
"mac-x86_64/crypto/modes/aesni-gcm-x86_64.S", "mac-x86_64/crypto/fipsmodule/aes-x86_64.S",
"mac-x86_64/crypto/modes/ghash-x86_64.S", "mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S",
"mac-x86_64/crypto/rand/rdrand-x86_64.S", "mac-x86_64/crypto/fipsmodule/aesni-x86_64.S",
"mac-x86_64/crypto/rc4/rc4-x86_64.S", "mac-x86_64/crypto/fipsmodule/bsaes-x86_64.S",
"mac-x86_64/crypto/sha/sha1-x86_64.S", "mac-x86_64/crypto/fipsmodule/ghash-x86_64.S",
"mac-x86_64/crypto/sha/sha256-x86_64.S", "mac-x86_64/crypto/fipsmodule/md5-x86_64.S",
"mac-x86_64/crypto/sha/sha512-x86_64.S", "mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S",
"mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S",
"mac-x86_64/crypto/fipsmodule/rsaz-avx2.S",
"mac-x86_64/crypto/fipsmodule/sha1-x86_64.S",
"mac-x86_64/crypto/fipsmodule/sha256-x86_64.S",
"mac-x86_64/crypto/fipsmodule/sha512-x86_64.S",
"mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S",
"mac-x86_64/crypto/fipsmodule/x86_64-mont.S",
"mac-x86_64/crypto/fipsmodule/x86_64-mont5.S",
"src/crypto/curve25519/asm/x25519-asm-x86_64.S", "src/crypto/curve25519/asm/x25519-asm-x86_64.S",
] ]
crypto_sources_win_x86 = [ crypto_sources_win_x86 = [
"win-x86/crypto/aes/aes-586.asm",
"win-x86/crypto/aes/aesni-x86.asm",
"win-x86/crypto/aes/vpaes-x86.asm",
"win-x86/crypto/bn/bn-586.asm",
"win-x86/crypto/bn/co-586.asm",
"win-x86/crypto/bn/x86-mont.asm",
"win-x86/crypto/chacha/chacha-x86.asm", "win-x86/crypto/chacha/chacha-x86.asm",
"win-x86/crypto/md5/md5-586.asm", "win-x86/crypto/fipsmodule/aes-586.asm",
"win-x86/crypto/modes/ghash-x86.asm", "win-x86/crypto/fipsmodule/aesni-x86.asm",
"win-x86/crypto/rc4/rc4-586.asm", "win-x86/crypto/fipsmodule/bn-586.asm",
"win-x86/crypto/sha/sha1-586.asm", "win-x86/crypto/fipsmodule/co-586.asm",
"win-x86/crypto/sha/sha256-586.asm", "win-x86/crypto/fipsmodule/ghash-x86.asm",
"win-x86/crypto/sha/sha512-586.asm", "win-x86/crypto/fipsmodule/md5-586.asm",
"win-x86/crypto/fipsmodule/sha1-586.asm",
"win-x86/crypto/fipsmodule/sha256-586.asm",
"win-x86/crypto/fipsmodule/sha512-586.asm",
"win-x86/crypto/fipsmodule/vpaes-x86.asm",
"win-x86/crypto/fipsmodule/x86-mont.asm",
] ]
crypto_sources_win_x86_64 = [ crypto_sources_win_x86_64 = [
"win-x86_64/crypto/aes/aes-x86_64.asm",
"win-x86_64/crypto/aes/aesni-x86_64.asm",
"win-x86_64/crypto/aes/bsaes-x86_64.asm",
"win-x86_64/crypto/aes/vpaes-x86_64.asm",
"win-x86_64/crypto/bn/rsaz-avx2.asm",
"win-x86_64/crypto/bn/rsaz-x86_64.asm",
"win-x86_64/crypto/bn/x86_64-mont.asm",
"win-x86_64/crypto/bn/x86_64-mont5.asm",
"win-x86_64/crypto/chacha/chacha-x86_64.asm", "win-x86_64/crypto/chacha/chacha-x86_64.asm",
"win-x86_64/crypto/ec/p256-x86_64-asm.asm", "win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm",
"win-x86_64/crypto/md5/md5-x86_64.asm", "win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm",
"win-x86_64/crypto/modes/aesni-gcm-x86_64.asm", "win-x86_64/crypto/fipsmodule/aes-x86_64.asm",
"win-x86_64/crypto/modes/ghash-x86_64.asm", "win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm",
"win-x86_64/crypto/rand/rdrand-x86_64.asm", "win-x86_64/crypto/fipsmodule/aesni-x86_64.asm",
"win-x86_64/crypto/rc4/rc4-x86_64.asm", "win-x86_64/crypto/fipsmodule/bsaes-x86_64.asm",
"win-x86_64/crypto/sha/sha1-x86_64.asm", "win-x86_64/crypto/fipsmodule/ghash-x86_64.asm",
"win-x86_64/crypto/sha/sha256-x86_64.asm", "win-x86_64/crypto/fipsmodule/md5-x86_64.asm",
"win-x86_64/crypto/sha/sha512-x86_64.asm", "win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm",
"win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm",
"win-x86_64/crypto/fipsmodule/rsaz-avx2.asm",
"win-x86_64/crypto/fipsmodule/sha1-x86_64.asm",
"win-x86_64/crypto/fipsmodule/sha256-x86_64.asm",
"win-x86_64/crypto/fipsmodule/sha512-x86_64.asm",
"win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm",
"win-x86_64/crypto/fipsmodule/x86_64-mont.asm",
"win-x86_64/crypto/fipsmodule/x86_64-mont5.asm",
] ]
fuzzers = [ fuzzers = [
"bn_div",
"bn_mod_exp",
"cert", "cert",
"client", "client",
"dtls_client",
"dtls_server",
"pkcs8", "pkcs8",
"privkey", "privkey",
"read_pem", "read_pem",
"server", "server",
"session",
"spki", "spki",
"ssl_ctx_api",
] ]

View File

@ -4,594 +4,73 @@
# This file is created by generate_build_files.py. Do not edit manually. # This file is created by generate_build_files.py. Do not edit manually.
_test_support_sources = [ test_support_sources = [
"src/crypto/test/file_test.cc", "src/crypto/test/file_test.cc",
"src/crypto/test/file_test.h", "src/crypto/test/file_test.h",
"src/crypto/test/gtest_main.h",
"src/crypto/test/malloc.cc", "src/crypto/test/malloc.cc",
"src/crypto/test/scoped_types.h",
"src/crypto/test/test_util.cc", "src/crypto/test/test_util.cc",
"src/crypto/test/test_util.h", "src/crypto/test/test_util.h",
"src/ssl/test/async_bio.h", "src/ssl/test/async_bio.h",
"src/ssl/test/fuzzer.h",
"src/ssl/test/fuzzer_tags.h",
"src/ssl/test/packeted_bio.h", "src/ssl/test/packeted_bio.h",
"src/ssl/test/scoped_types.h",
"src/ssl/test/test_config.h", "src/ssl/test/test_config.h",
] ]
template("create_tests") { crypto_test_sources = [
executable("boringssl_aes_test") { "crypto_test_data.cc",
sources = [
"src/crypto/aes/aes_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_asn1_test") {
sources = [
"src/crypto/asn1/asn1_test.cc", "src/crypto/asn1/asn1_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_base64_test") {
sources = [
"src/crypto/base64/base64_test.cc", "src/crypto/base64/base64_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_bio_test") {
sources = [
"src/crypto/bio/bio_test.cc", "src/crypto/bio/bio_test.cc",
] "src/crypto/buf/buf_test.cc",
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_bn_test") {
sources = [
"src/crypto/bn/bn_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_bytestring_test") {
sources = [
"src/crypto/bytestring/bytestring_test.cc", "src/crypto/bytestring/bytestring_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_chacha_test") {
sources = [
"src/crypto/chacha/chacha_test.cc", "src/crypto/chacha/chacha_test.cc",
] "src/crypto/cipher_extra/aead_test.cc",
sources += _test_support_sources "src/crypto/cipher_extra/cipher_test.cc",
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_aead_test") {
sources = [
"src/crypto/cipher/aead_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_cipher_test") {
sources = [
"src/crypto/cipher/cipher_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_cmac_test") {
sources = [
"src/crypto/cmac/cmac_test.cc", "src/crypto/cmac/cmac_test.cc",
] "src/crypto/compiler_test.cc",
sources += _test_support_sources "src/crypto/constant_time_test.cc",
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_constant_time_test") {
sources = [
"src/crypto/constant_time_test.c",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_ed25519_test") {
sources = [
"src/crypto/curve25519/ed25519_test.cc", "src/crypto/curve25519/ed25519_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_spake25519_test") {
sources = [
"src/crypto/curve25519/spake25519_test.cc", "src/crypto/curve25519/spake25519_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_x25519_test") {
sources = [
"src/crypto/curve25519/x25519_test.cc", "src/crypto/curve25519/x25519_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_dh_test") {
sources = [
"src/crypto/dh/dh_test.cc", "src/crypto/dh/dh_test.cc",
] "src/crypto/digest_extra/digest_test.cc",
sources += _test_support_sources "src/crypto/dsa/dsa_test.cc",
if (defined(invoker.configs_exclude)) { "src/crypto/ecdh/ecdh_test.cc",
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_digest_test") {
sources = [
"src/crypto/digest/digest_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_dsa_test") {
sources = [
"src/crypto/dsa/dsa_test.c",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_ec_test") {
sources = [
"src/crypto/ec/ec_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_example_mul") {
sources = [
"src/crypto/ec/example_mul.c",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_ecdsa_test") {
sources = [
"src/crypto/ecdsa/ecdsa_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_err_test") {
sources = [
"src/crypto/err/err_test.cc", "src/crypto/err/err_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_evp_extra_test") {
sources = [
"src/crypto/evp/evp_extra_test.cc", "src/crypto/evp/evp_extra_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_evp_test") {
sources = [
"src/crypto/evp/evp_test.cc", "src/crypto/evp/evp_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_pbkdf_test") {
sources = [
"src/crypto/evp/pbkdf_test.cc", "src/crypto/evp/pbkdf_test.cc",
] "src/crypto/evp/scrypt_test.cc",
sources += _test_support_sources "src/crypto/fipsmodule/aes/aes_test.cc",
if (defined(invoker.configs_exclude)) { "src/crypto/fipsmodule/bn/bn_test.cc",
configs -= invoker.configs_exclude "src/crypto/fipsmodule/ec/ec_test.cc",
} "src/crypto/fipsmodule/ec/p256-x86_64_test.cc",
configs += invoker.configs "src/crypto/fipsmodule/ecdsa/ecdsa_test.cc",
deps = invoker.deps "src/crypto/fipsmodule/modes/gcm_test.cc",
} "src/crypto/fipsmodule/rand/ctrdrbg_test.cc",
"src/crypto/hkdf/hkdf_test.cc",
executable("boringssl_hkdf_test") { "src/crypto/hmac_extra/hmac_test.cc",
sources = [ "src/crypto/lhash/lhash_test.cc",
"src/crypto/hkdf/hkdf_test.c",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_hmac_test") {
sources = [
"src/crypto/hmac/hmac_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_lhash_test") {
sources = [
"src/crypto/lhash/lhash_test.c",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_gcm_test") {
sources = [
"src/crypto/modes/gcm_test.c",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_newhope_statistical_test") {
sources = [
"src/crypto/newhope/newhope_statistical_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_newhope_test") {
sources = [
"src/crypto/newhope/newhope_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_newhope_vectors_test") {
sources = [
"src/crypto/newhope/newhope_vectors_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_obj_test") {
sources = [
"src/crypto/obj/obj_test.cc", "src/crypto/obj/obj_test.cc",
] "src/crypto/pkcs7/pkcs7_test.cc",
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_pkcs12_test") {
sources = [
"src/crypto/pkcs8/pkcs12_test.cc", "src/crypto/pkcs8/pkcs12_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_pkcs8_test") {
sources = [
"src/crypto/pkcs8/pkcs8_test.cc", "src/crypto/pkcs8/pkcs8_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_poly1305_test") {
sources = [
"src/crypto/poly1305/poly1305_test.cc", "src/crypto/poly1305/poly1305_test.cc",
] "src/crypto/pool/pool_test.cc",
sources += _test_support_sources "src/crypto/refcount_test.cc",
if (defined(invoker.configs_exclude)) { "src/crypto/rsa_extra/rsa_test.cc",
configs -= invoker.configs_exclude "src/crypto/test/file_test_gtest.cc",
} "src/crypto/test/gtest_main.cc",
configs += invoker.configs "src/crypto/thread_test.cc",
deps = invoker.deps
}
executable("boringssl_refcount_test") {
sources = [
"src/crypto/refcount_test.c",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_rsa_test") {
sources = [
"src/crypto/rsa/rsa_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_thread_test") {
sources = [
"src/crypto/thread_test.c",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_pkcs7_test") {
sources = [
"src/crypto/x509/pkcs7_test.c",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_x509_test") {
sources = [
"src/crypto/x509/x509_test.cc", "src/crypto/x509/x509_test.cc",
"src/crypto/x509v3/tab_test.cc",
"src/crypto/x509v3/v3name_test.cc",
] ]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_tab_test") { ssl_test_sources = [
sources = [ "src/crypto/test/gtest_main.cc",
"src/crypto/x509v3/tab_test.c", "src/ssl/span_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_v3name_test") {
sources = [
"src/crypto/x509v3/v3name_test.c",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_pqueue_test") {
sources = [
"src/ssl/pqueue/pqueue_test.c",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
executable("boringssl_ssl_test") {
sources = [
"src/ssl/ssl_test.cc", "src/ssl/ssl_test.cc",
] ]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
configs -= invoker.configs_exclude
}
configs += invoker.configs
deps = invoker.deps
}
group(target_name) {
deps = [
":boringssl_aead_test",
":boringssl_aes_test",
":boringssl_asn1_test",
":boringssl_base64_test",
":boringssl_bio_test",
":boringssl_bn_test",
":boringssl_bytestring_test",
":boringssl_chacha_test",
":boringssl_cipher_test",
":boringssl_cmac_test",
":boringssl_constant_time_test",
":boringssl_dh_test",
":boringssl_digest_test",
":boringssl_dsa_test",
":boringssl_ec_test",
":boringssl_ecdsa_test",
":boringssl_ed25519_test",
":boringssl_err_test",
":boringssl_evp_extra_test",
":boringssl_evp_test",
":boringssl_example_mul",
":boringssl_gcm_test",
":boringssl_hkdf_test",
":boringssl_hmac_test",
":boringssl_lhash_test",
":boringssl_newhope_statistical_test",
":boringssl_newhope_test",
":boringssl_newhope_vectors_test",
":boringssl_obj_test",
":boringssl_pbkdf_test",
":boringssl_pkcs12_test",
":boringssl_pkcs7_test",
":boringssl_pkcs8_test",
":boringssl_poly1305_test",
":boringssl_pqueue_test",
":boringssl_refcount_test",
":boringssl_rsa_test",
":boringssl_spake25519_test",
":boringssl_ssl_test",
":boringssl_tab_test",
":boringssl_thread_test",
":boringssl_v3name_test",
":boringssl_x25519_test",
":boringssl_x509_test",
]
}
}

View File

@ -7,43 +7,52 @@
{ {
'variables': { 'variables': {
'boringssl_ssl_sources': [ 'boringssl_ssl_sources': [
'src/ssl/custom_extensions.c', 'src/include/openssl/dtls1.h',
'src/ssl/d1_both.c', 'src/include/openssl/ssl.h',
'src/ssl/d1_lib.c', 'src/include/openssl/ssl3.h',
'src/ssl/d1_meth.c', 'src/include/openssl/tls1.h',
'src/ssl/d1_pkt.c', 'src/ssl/bio_ssl.cc',
'src/ssl/d1_srtp.c', 'src/ssl/custom_extensions.cc',
'src/ssl/dtls_record.c', 'src/ssl/d1_both.cc',
'src/ssl/handshake_client.c', 'src/ssl/d1_lib.cc',
'src/ssl/handshake_server.c', 'src/ssl/d1_pkt.cc',
'src/ssl/pqueue/pqueue.c', 'src/ssl/d1_srtp.cc',
'src/ssl/s3_both.c', 'src/ssl/dtls_method.cc',
'src/ssl/s3_enc.c', 'src/ssl/dtls_record.cc',
'src/ssl/s3_lib.c', 'src/ssl/handshake.cc',
'src/ssl/s3_meth.c', 'src/ssl/handshake_client.cc',
'src/ssl/s3_pkt.c', 'src/ssl/handshake_server.cc',
'src/ssl/ssl_aead_ctx.c', 'src/ssl/internal.h',
'src/ssl/ssl_asn1.c', 'src/ssl/s3_both.cc',
'src/ssl/ssl_buffer.c', 'src/ssl/s3_lib.cc',
'src/ssl/ssl_cert.c', 'src/ssl/s3_pkt.cc',
'src/ssl/ssl_cipher.c', 'src/ssl/ssl_aead_ctx.cc',
'src/ssl/ssl_ecdh.c', 'src/ssl/ssl_asn1.cc',
'src/ssl/ssl_file.c', 'src/ssl/ssl_buffer.cc',
'src/ssl/ssl_lib.c', 'src/ssl/ssl_cert.cc',
'src/ssl/ssl_rsa.c', 'src/ssl/ssl_cipher.cc',
'src/ssl/ssl_session.c', 'src/ssl/ssl_file.cc',
'src/ssl/ssl_stat.c', 'src/ssl/ssl_key_share.cc',
'src/ssl/t1_enc.c', 'src/ssl/ssl_lib.cc',
'src/ssl/t1_lib.c', 'src/ssl/ssl_privkey.cc',
'src/ssl/tls_record.c', 'src/ssl/ssl_session.cc',
'src/ssl/ssl_stat.cc',
'src/ssl/ssl_transcript.cc',
'src/ssl/ssl_versions.cc',
'src/ssl/ssl_x509.cc',
'src/ssl/t1_enc.cc',
'src/ssl/t1_lib.cc',
'src/ssl/tls13_both.cc',
'src/ssl/tls13_client.cc',
'src/ssl/tls13_enc.cc',
'src/ssl/tls13_server.cc',
'src/ssl/tls_method.cc',
'src/ssl/tls_record.cc',
], ],
'boringssl_crypto_sources': [ 'boringssl_crypto_sources': [
'err_data.c', 'err_data.c',
'src/crypto/aes/aes.c',
'src/crypto/aes/mode_wrappers.c',
'src/crypto/asn1/a_bitstr.c', 'src/crypto/asn1/a_bitstr.c',
'src/crypto/asn1/a_bool.c', 'src/crypto/asn1/a_bool.c',
'src/crypto/asn1/a_bytes.c',
'src/crypto/asn1/a_d2i_fp.c', 'src/crypto/asn1/a_d2i_fp.c',
'src/crypto/asn1/a_dup.c', 'src/crypto/asn1/a_dup.c',
'src/crypto/asn1/a_enum.c', 'src/crypto/asn1/a_enum.c',
@ -60,135 +69,116 @@
'src/crypto/asn1/a_utctm.c', 'src/crypto/asn1/a_utctm.c',
'src/crypto/asn1/a_utf8.c', 'src/crypto/asn1/a_utf8.c',
'src/crypto/asn1/asn1_lib.c', 'src/crypto/asn1/asn1_lib.c',
'src/crypto/asn1/asn1_locl.h',
'src/crypto/asn1/asn1_par.c', 'src/crypto/asn1/asn1_par.c',
'src/crypto/asn1/asn_pack.c', 'src/crypto/asn1/asn_pack.c',
'src/crypto/asn1/f_enum.c', 'src/crypto/asn1/f_enum.c',
'src/crypto/asn1/f_int.c', 'src/crypto/asn1/f_int.c',
'src/crypto/asn1/f_string.c', 'src/crypto/asn1/f_string.c',
'src/crypto/asn1/t_bitst.c',
'src/crypto/asn1/tasn_dec.c', 'src/crypto/asn1/tasn_dec.c',
'src/crypto/asn1/tasn_enc.c', 'src/crypto/asn1/tasn_enc.c',
'src/crypto/asn1/tasn_fre.c', 'src/crypto/asn1/tasn_fre.c',
'src/crypto/asn1/tasn_new.c', 'src/crypto/asn1/tasn_new.c',
'src/crypto/asn1/tasn_typ.c', 'src/crypto/asn1/tasn_typ.c',
'src/crypto/asn1/tasn_utl.c', 'src/crypto/asn1/tasn_utl.c',
'src/crypto/asn1/x_bignum.c', 'src/crypto/asn1/time_support.c',
'src/crypto/asn1/x_long.c',
'src/crypto/base64/base64.c', 'src/crypto/base64/base64.c',
'src/crypto/bio/bio.c', 'src/crypto/bio/bio.c',
'src/crypto/bio/bio_mem.c', 'src/crypto/bio/bio_mem.c',
'src/crypto/bio/buffer.c',
'src/crypto/bio/connect.c', 'src/crypto/bio/connect.c',
'src/crypto/bio/fd.c', 'src/crypto/bio/fd.c',
'src/crypto/bio/file.c', 'src/crypto/bio/file.c',
'src/crypto/bio/hexdump.c', 'src/crypto/bio/hexdump.c',
'src/crypto/bio/internal.h',
'src/crypto/bio/pair.c', 'src/crypto/bio/pair.c',
'src/crypto/bio/printf.c', 'src/crypto/bio/printf.c',
'src/crypto/bio/socket.c', 'src/crypto/bio/socket.c',
'src/crypto/bio/socket_helper.c', 'src/crypto/bio/socket_helper.c',
'src/crypto/bn/add.c', 'src/crypto/bn_extra/bn_asn1.c',
'src/crypto/bn/asm/x86_64-gcc.c', 'src/crypto/bn_extra/convert.c',
'src/crypto/bn/bn.c',
'src/crypto/bn/bn_asn1.c',
'src/crypto/bn/cmp.c',
'src/crypto/bn/convert.c',
'src/crypto/bn/ctx.c',
'src/crypto/bn/div.c',
'src/crypto/bn/exponentiation.c',
'src/crypto/bn/gcd.c',
'src/crypto/bn/generic.c',
'src/crypto/bn/kronecker.c',
'src/crypto/bn/montgomery.c',
'src/crypto/bn/mul.c',
'src/crypto/bn/prime.c',
'src/crypto/bn/random.c',
'src/crypto/bn/rsaz_exp.c',
'src/crypto/bn/shift.c',
'src/crypto/bn/sqrt.c',
'src/crypto/buf/buf.c', 'src/crypto/buf/buf.c',
'src/crypto/bytestring/asn1_compat.c', 'src/crypto/bytestring/asn1_compat.c',
'src/crypto/bytestring/ber.c', 'src/crypto/bytestring/ber.c',
'src/crypto/bytestring/cbb.c', 'src/crypto/bytestring/cbb.c',
'src/crypto/bytestring/cbs.c', 'src/crypto/bytestring/cbs.c',
'src/crypto/bytestring/internal.h',
'src/crypto/chacha/chacha.c', 'src/crypto/chacha/chacha.c',
'src/crypto/cipher/aead.c', 'src/crypto/cipher_extra/cipher_extra.c',
'src/crypto/cipher/cipher.c', 'src/crypto/cipher_extra/derive_key.c',
'src/crypto/cipher/derive_key.c', 'src/crypto/cipher_extra/e_aesctrhmac.c',
'src/crypto/cipher/e_aes.c', 'src/crypto/cipher_extra/e_aesgcmsiv.c',
'src/crypto/cipher/e_chacha20poly1305.c', 'src/crypto/cipher_extra/e_chacha20poly1305.c',
'src/crypto/cipher/e_des.c', 'src/crypto/cipher_extra/e_null.c',
'src/crypto/cipher/e_null.c', 'src/crypto/cipher_extra/e_rc2.c',
'src/crypto/cipher/e_rc2.c', 'src/crypto/cipher_extra/e_rc4.c',
'src/crypto/cipher/e_rc4.c', 'src/crypto/cipher_extra/e_ssl3.c',
'src/crypto/cipher/e_ssl3.c', 'src/crypto/cipher_extra/e_tls.c',
'src/crypto/cipher/e_tls.c', 'src/crypto/cipher_extra/internal.h',
'src/crypto/cipher/tls_cbc.c', 'src/crypto/cipher_extra/tls_cbc.c',
'src/crypto/cmac/cmac.c', 'src/crypto/cmac/cmac.c',
'src/crypto/conf/conf.c', 'src/crypto/conf/conf.c',
'src/crypto/conf/conf_def.h',
'src/crypto/conf/internal.h',
'src/crypto/cpu-aarch64-linux.c', 'src/crypto/cpu-aarch64-linux.c',
'src/crypto/cpu-arm-linux.c', 'src/crypto/cpu-arm-linux.c',
'src/crypto/cpu-arm.c', 'src/crypto/cpu-arm.c',
'src/crypto/cpu-intel.c', 'src/crypto/cpu-intel.c',
'src/crypto/cpu-ppc64le.c',
'src/crypto/crypto.c', 'src/crypto/crypto.c',
'src/crypto/curve25519/curve25519.c',
'src/crypto/curve25519/spake25519.c', 'src/crypto/curve25519/spake25519.c',
'src/crypto/curve25519/x25519-x86_64.c', 'src/crypto/curve25519/x25519-x86_64.c',
'src/crypto/des/des.c',
'src/crypto/dh/check.c', 'src/crypto/dh/check.c',
'src/crypto/dh/dh.c', 'src/crypto/dh/dh.c',
'src/crypto/dh/dh_asn1.c', 'src/crypto/dh/dh_asn1.c',
'src/crypto/dh/params.c', 'src/crypto/dh/params.c',
'src/crypto/digest/digest.c', 'src/crypto/digest_extra/digest_extra.c',
'src/crypto/digest/digests.c',
'src/crypto/dsa/dsa.c', 'src/crypto/dsa/dsa.c',
'src/crypto/dsa/dsa_asn1.c', 'src/crypto/dsa/dsa_asn1.c',
'src/crypto/ec/ec.c', 'src/crypto/ec_extra/ec_asn1.c',
'src/crypto/ec/ec_asn1.c',
'src/crypto/ec/ec_key.c',
'src/crypto/ec/ec_montgomery.c',
'src/crypto/ec/oct.c',
'src/crypto/ec/p224-64.c',
'src/crypto/ec/p256-64.c',
'src/crypto/ec/p256-x86_64.c',
'src/crypto/ec/simple.c',
'src/crypto/ec/util-64.c',
'src/crypto/ec/wnaf.c',
'src/crypto/ecdh/ecdh.c', 'src/crypto/ecdh/ecdh.c',
'src/crypto/ecdsa/ecdsa.c', 'src/crypto/ecdsa_extra/ecdsa_asn1.c',
'src/crypto/ecdsa/ecdsa_asn1.c',
'src/crypto/engine/engine.c', 'src/crypto/engine/engine.c',
'src/crypto/err/err.c', 'src/crypto/err/err.c',
'src/crypto/err/internal.h',
'src/crypto/evp/digestsign.c', 'src/crypto/evp/digestsign.c',
'src/crypto/evp/evp.c', 'src/crypto/evp/evp.c',
'src/crypto/evp/evp_asn1.c', 'src/crypto/evp/evp_asn1.c',
'src/crypto/evp/evp_ctx.c', 'src/crypto/evp/evp_ctx.c',
'src/crypto/evp/internal.h',
'src/crypto/evp/p_dsa_asn1.c', 'src/crypto/evp/p_dsa_asn1.c',
'src/crypto/evp/p_ec.c', 'src/crypto/evp/p_ec.c',
'src/crypto/evp/p_ec_asn1.c', 'src/crypto/evp/p_ec_asn1.c',
'src/crypto/evp/p_ed25519.c',
'src/crypto/evp/p_ed25519_asn1.c',
'src/crypto/evp/p_rsa.c', 'src/crypto/evp/p_rsa.c',
'src/crypto/evp/p_rsa_asn1.c', 'src/crypto/evp/p_rsa_asn1.c',
'src/crypto/evp/pbkdf.c', 'src/crypto/evp/pbkdf.c',
'src/crypto/evp/print.c', 'src/crypto/evp/print.c',
'src/crypto/evp/scrypt.c',
'src/crypto/evp/sign.c', 'src/crypto/evp/sign.c',
'src/crypto/ex_data.c', 'src/crypto/ex_data.c',
'src/crypto/fipsmodule/aes/internal.h',
'src/crypto/fipsmodule/bcm.c',
'src/crypto/fipsmodule/bn/internal.h',
'src/crypto/fipsmodule/bn/rsaz_exp.h',
'src/crypto/fipsmodule/cipher/internal.h',
'src/crypto/fipsmodule/delocate.h',
'src/crypto/fipsmodule/des/internal.h',
'src/crypto/fipsmodule/digest/internal.h',
'src/crypto/fipsmodule/digest/md32_common.h',
'src/crypto/fipsmodule/ec/internal.h',
'src/crypto/fipsmodule/ec/p256-x86_64-table.h',
'src/crypto/fipsmodule/ec/p256-x86_64.h',
'src/crypto/fipsmodule/is_fips.c',
'src/crypto/fipsmodule/modes/internal.h',
'src/crypto/fipsmodule/rand/internal.h',
'src/crypto/fipsmodule/rsa/internal.h',
'src/crypto/hkdf/hkdf.c', 'src/crypto/hkdf/hkdf.c',
'src/crypto/hmac/hmac.c', 'src/crypto/internal.h',
'src/crypto/lhash/lhash.c', 'src/crypto/lhash/lhash.c',
'src/crypto/md4/md4.c',
'src/crypto/md5/md5.c',
'src/crypto/mem.c', 'src/crypto/mem.c',
'src/crypto/modes/cbc.c',
'src/crypto/modes/cfb.c',
'src/crypto/modes/ctr.c',
'src/crypto/modes/gcm.c',
'src/crypto/modes/ofb.c',
'src/crypto/newhope/error_correction.c',
'src/crypto/newhope/newhope.c',
'src/crypto/newhope/ntt.c',
'src/crypto/newhope/poly.c',
'src/crypto/newhope/precomp.c',
'src/crypto/newhope/reduce.c',
'src/crypto/obj/obj.c', 'src/crypto/obj/obj.c',
'src/crypto/obj/obj_dat.h',
'src/crypto/obj/obj_xref.c', 'src/crypto/obj/obj_xref.c',
'src/crypto/pem/pem_all.c', 'src/crypto/pem/pem_all.c',
'src/crypto/pem/pem_info.c', 'src/crypto/pem/pem_info.c',
@ -198,34 +188,33 @@
'src/crypto/pem/pem_pkey.c', 'src/crypto/pem/pem_pkey.c',
'src/crypto/pem/pem_x509.c', 'src/crypto/pem/pem_x509.c',
'src/crypto/pem/pem_xaux.c', 'src/crypto/pem/pem_xaux.c',
'src/crypto/pkcs8/p5_pbe.c', 'src/crypto/pkcs7/internal.h',
'src/crypto/pkcs7/pkcs7.c',
'src/crypto/pkcs7/pkcs7_x509.c',
'src/crypto/pkcs8/internal.h',
'src/crypto/pkcs8/p5_pbev2.c', 'src/crypto/pkcs8/p5_pbev2.c',
'src/crypto/pkcs8/p8_pkey.c',
'src/crypto/pkcs8/pkcs8.c', 'src/crypto/pkcs8/pkcs8.c',
'src/crypto/pkcs8/pkcs8_x509.c',
'src/crypto/poly1305/internal.h',
'src/crypto/poly1305/poly1305.c', 'src/crypto/poly1305/poly1305.c',
'src/crypto/poly1305/poly1305_arm.c', 'src/crypto/poly1305/poly1305_arm.c',
'src/crypto/poly1305/poly1305_vec.c', 'src/crypto/poly1305/poly1305_vec.c',
'src/crypto/rand/deterministic.c', 'src/crypto/pool/internal.h',
'src/crypto/rand/rand.c', 'src/crypto/pool/pool.c',
'src/crypto/rand/urandom.c', 'src/crypto/rand_extra/deterministic.c',
'src/crypto/rand/windows.c', 'src/crypto/rand_extra/forkunsafe.c',
'src/crypto/rand_extra/fuchsia.c',
'src/crypto/rand_extra/rand_extra.c',
'src/crypto/rand_extra/windows.c',
'src/crypto/rc4/rc4.c', 'src/crypto/rc4/rc4.c',
'src/crypto/refcount_c11.c', 'src/crypto/refcount_c11.c',
'src/crypto/refcount_lock.c', 'src/crypto/refcount_lock.c',
'src/crypto/rsa/blinding.c', 'src/crypto/rsa_extra/rsa_asn1.c',
'src/crypto/rsa/padding.c',
'src/crypto/rsa/rsa.c',
'src/crypto/rsa/rsa_asn1.c',
'src/crypto/rsa/rsa_impl.c',
'src/crypto/sha/sha1.c',
'src/crypto/sha/sha256.c',
'src/crypto/sha/sha512.c',
'src/crypto/stack/stack.c', 'src/crypto/stack/stack.c',
'src/crypto/thread.c', 'src/crypto/thread.c',
'src/crypto/thread_none.c', 'src/crypto/thread_none.c',
'src/crypto/thread_pthread.c', 'src/crypto/thread_pthread.c',
'src/crypto/thread_win.c', 'src/crypto/thread_win.c',
'src/crypto/time_support.c',
'src/crypto/x509/a_digest.c', 'src/crypto/x509/a_digest.c',
'src/crypto/x509/a_sign.c', 'src/crypto/x509/a_sign.c',
'src/crypto/x509/a_strex.c', 'src/crypto/x509/a_strex.c',
@ -234,13 +223,15 @@
'src/crypto/x509/asn1_gen.c', 'src/crypto/x509/asn1_gen.c',
'src/crypto/x509/by_dir.c', 'src/crypto/x509/by_dir.c',
'src/crypto/x509/by_file.c', 'src/crypto/x509/by_file.c',
'src/crypto/x509/charmap.h',
'src/crypto/x509/i2d_pr.c', 'src/crypto/x509/i2d_pr.c',
'src/crypto/x509/pkcs7.c', 'src/crypto/x509/internal.h',
'src/crypto/x509/rsa_pss.c', 'src/crypto/x509/rsa_pss.c',
'src/crypto/x509/t_crl.c', 'src/crypto/x509/t_crl.c',
'src/crypto/x509/t_req.c', 'src/crypto/x509/t_req.c',
'src/crypto/x509/t_x509.c', 'src/crypto/x509/t_x509.c',
'src/crypto/x509/t_x509a.c', 'src/crypto/x509/t_x509a.c',
'src/crypto/x509/vpm_int.h',
'src/crypto/x509/x509.c', 'src/crypto/x509/x509.c',
'src/crypto/x509/x509_att.c', 'src/crypto/x509/x509_att.c',
'src/crypto/x509/x509_cmp.c', 'src/crypto/x509/x509_cmp.c',
@ -261,7 +252,6 @@
'src/crypto/x509/x509name.c', 'src/crypto/x509/x509name.c',
'src/crypto/x509/x509rset.c', 'src/crypto/x509/x509rset.c',
'src/crypto/x509/x509spki.c', 'src/crypto/x509/x509spki.c',
'src/crypto/x509/x509type.c',
'src/crypto/x509/x_algor.c', 'src/crypto/x509/x_algor.c',
'src/crypto/x509/x_all.c', 'src/crypto/x509/x_all.c',
'src/crypto/x509/x_attrib.c', 'src/crypto/x509/x_attrib.c',
@ -277,8 +267,10 @@
'src/crypto/x509/x_val.c', 'src/crypto/x509/x_val.c',
'src/crypto/x509/x_x509.c', 'src/crypto/x509/x_x509.c',
'src/crypto/x509/x_x509a.c', 'src/crypto/x509/x_x509a.c',
'src/crypto/x509v3/ext_dat.h',
'src/crypto/x509v3/pcy_cache.c', 'src/crypto/x509v3/pcy_cache.c',
'src/crypto/x509v3/pcy_data.c', 'src/crypto/x509v3/pcy_data.c',
'src/crypto/x509v3/pcy_int.h',
'src/crypto/x509v3/pcy_lib.c', 'src/crypto/x509v3/pcy_lib.c',
'src/crypto/x509v3/pcy_map.c', 'src/crypto/x509v3/pcy_map.c',
'src/crypto/x509v3/pcy_node.c', 'src/crypto/x509v3/pcy_node.c',
@ -309,136 +301,229 @@
'src/crypto/x509v3/v3_skey.c', 'src/crypto/x509v3/v3_skey.c',
'src/crypto/x509v3/v3_sxnet.c', 'src/crypto/x509v3/v3_sxnet.c',
'src/crypto/x509v3/v3_utl.c', 'src/crypto/x509v3/v3_utl.c',
'src/include/openssl/aead.h',
'src/include/openssl/aes.h',
'src/include/openssl/arm_arch.h',
'src/include/openssl/asn1.h',
'src/include/openssl/asn1_mac.h',
'src/include/openssl/asn1t.h',
'src/include/openssl/base.h',
'src/include/openssl/base64.h',
'src/include/openssl/bio.h',
'src/include/openssl/blowfish.h',
'src/include/openssl/bn.h',
'src/include/openssl/buf.h',
'src/include/openssl/buffer.h',
'src/include/openssl/bytestring.h',
'src/include/openssl/cast.h',
'src/include/openssl/chacha.h',
'src/include/openssl/cipher.h',
'src/include/openssl/cmac.h',
'src/include/openssl/conf.h',
'src/include/openssl/cpu.h',
'src/include/openssl/crypto.h',
'src/include/openssl/curve25519.h',
'src/include/openssl/des.h',
'src/include/openssl/dh.h',
'src/include/openssl/digest.h',
'src/include/openssl/dsa.h',
'src/include/openssl/ec.h',
'src/include/openssl/ec_key.h',
'src/include/openssl/ecdh.h',
'src/include/openssl/ecdsa.h',
'src/include/openssl/engine.h',
'src/include/openssl/err.h',
'src/include/openssl/evp.h',
'src/include/openssl/ex_data.h',
'src/include/openssl/hkdf.h',
'src/include/openssl/hmac.h',
'src/include/openssl/is_boringssl.h',
'src/include/openssl/lhash.h',
'src/include/openssl/lhash_macros.h',
'src/include/openssl/md4.h',
'src/include/openssl/md5.h',
'src/include/openssl/mem.h',
'src/include/openssl/nid.h',
'src/include/openssl/obj.h',
'src/include/openssl/obj_mac.h',
'src/include/openssl/objects.h',
'src/include/openssl/opensslconf.h',
'src/include/openssl/opensslv.h',
'src/include/openssl/ossl_typ.h',
'src/include/openssl/pem.h',
'src/include/openssl/pkcs12.h',
'src/include/openssl/pkcs7.h',
'src/include/openssl/pkcs8.h',
'src/include/openssl/poly1305.h',
'src/include/openssl/pool.h',
'src/include/openssl/rand.h',
'src/include/openssl/rc4.h',
'src/include/openssl/ripemd.h',
'src/include/openssl/rsa.h',
'src/include/openssl/safestack.h',
'src/include/openssl/sha.h',
'src/include/openssl/span.h',
'src/include/openssl/srtp.h',
'src/include/openssl/stack.h',
'src/include/openssl/thread.h',
'src/include/openssl/type_check.h',
'src/include/openssl/x509.h',
'src/include/openssl/x509_vfy.h',
'src/include/openssl/x509v3.h',
'src/third_party/fiat/curve25519.c',
'src/third_party/fiat/internal.h',
],
'boringssl_ios_aarch64_sources': [
'ios-aarch64/crypto/chacha/chacha-armv8.S',
'ios-aarch64/crypto/fipsmodule/aesv8-armx64.S',
'ios-aarch64/crypto/fipsmodule/armv8-mont.S',
'ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S',
'ios-aarch64/crypto/fipsmodule/sha1-armv8.S',
'ios-aarch64/crypto/fipsmodule/sha256-armv8.S',
'ios-aarch64/crypto/fipsmodule/sha512-armv8.S',
],
'boringssl_ios_arm_sources': [
'ios-arm/crypto/chacha/chacha-armv4.S',
'ios-arm/crypto/fipsmodule/aes-armv4.S',
'ios-arm/crypto/fipsmodule/aesv8-armx32.S',
'ios-arm/crypto/fipsmodule/armv4-mont.S',
'ios-arm/crypto/fipsmodule/bsaes-armv7.S',
'ios-arm/crypto/fipsmodule/ghash-armv4.S',
'ios-arm/crypto/fipsmodule/ghashv8-armx32.S',
'ios-arm/crypto/fipsmodule/sha1-armv4-large.S',
'ios-arm/crypto/fipsmodule/sha256-armv4.S',
'ios-arm/crypto/fipsmodule/sha512-armv4.S',
], ],
'boringssl_linux_aarch64_sources': [ 'boringssl_linux_aarch64_sources': [
'linux-aarch64/crypto/aes/aesv8-armx64.S',
'linux-aarch64/crypto/bn/armv8-mont.S',
'linux-aarch64/crypto/chacha/chacha-armv8.S', 'linux-aarch64/crypto/chacha/chacha-armv8.S',
'linux-aarch64/crypto/modes/ghashv8-armx64.S', 'linux-aarch64/crypto/fipsmodule/aesv8-armx64.S',
'linux-aarch64/crypto/sha/sha1-armv8.S', 'linux-aarch64/crypto/fipsmodule/armv8-mont.S',
'linux-aarch64/crypto/sha/sha256-armv8.S', 'linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S',
'linux-aarch64/crypto/sha/sha512-armv8.S', 'linux-aarch64/crypto/fipsmodule/sha1-armv8.S',
'linux-aarch64/crypto/fipsmodule/sha256-armv8.S',
'linux-aarch64/crypto/fipsmodule/sha512-armv8.S',
], ],
'boringssl_linux_arm_sources': [ 'boringssl_linux_arm_sources': [
'linux-arm/crypto/aes/aes-armv4.S',
'linux-arm/crypto/aes/aesv8-armx32.S',
'linux-arm/crypto/aes/bsaes-armv7.S',
'linux-arm/crypto/bn/armv4-mont.S',
'linux-arm/crypto/chacha/chacha-armv4.S', 'linux-arm/crypto/chacha/chacha-armv4.S',
'linux-arm/crypto/modes/ghash-armv4.S', 'linux-arm/crypto/fipsmodule/aes-armv4.S',
'linux-arm/crypto/modes/ghashv8-armx32.S', 'linux-arm/crypto/fipsmodule/aesv8-armx32.S',
'linux-arm/crypto/sha/sha1-armv4-large.S', 'linux-arm/crypto/fipsmodule/armv4-mont.S',
'linux-arm/crypto/sha/sha256-armv4.S', 'linux-arm/crypto/fipsmodule/bsaes-armv7.S',
'linux-arm/crypto/sha/sha512-armv4.S', 'linux-arm/crypto/fipsmodule/ghash-armv4.S',
'linux-arm/crypto/fipsmodule/ghashv8-armx32.S',
'linux-arm/crypto/fipsmodule/sha1-armv4-large.S',
'linux-arm/crypto/fipsmodule/sha256-armv4.S',
'linux-arm/crypto/fipsmodule/sha512-armv4.S',
'src/crypto/curve25519/asm/x25519-asm-arm.S', 'src/crypto/curve25519/asm/x25519-asm-arm.S',
'src/crypto/poly1305/poly1305_arm_asm.S', 'src/crypto/poly1305/poly1305_arm_asm.S',
], ],
'boringssl_linux_ppc64le_sources': [
'linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S',
'linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S',
],
'boringssl_linux_x86_sources': [ 'boringssl_linux_x86_sources': [
'linux-x86/crypto/aes/aes-586.S',
'linux-x86/crypto/aes/aesni-x86.S',
'linux-x86/crypto/aes/vpaes-x86.S',
'linux-x86/crypto/bn/bn-586.S',
'linux-x86/crypto/bn/co-586.S',
'linux-x86/crypto/bn/x86-mont.S',
'linux-x86/crypto/chacha/chacha-x86.S', 'linux-x86/crypto/chacha/chacha-x86.S',
'linux-x86/crypto/md5/md5-586.S', 'linux-x86/crypto/fipsmodule/aes-586.S',
'linux-x86/crypto/modes/ghash-x86.S', 'linux-x86/crypto/fipsmodule/aesni-x86.S',
'linux-x86/crypto/rc4/rc4-586.S', 'linux-x86/crypto/fipsmodule/bn-586.S',
'linux-x86/crypto/sha/sha1-586.S', 'linux-x86/crypto/fipsmodule/co-586.S',
'linux-x86/crypto/sha/sha256-586.S', 'linux-x86/crypto/fipsmodule/ghash-x86.S',
'linux-x86/crypto/sha/sha512-586.S', 'linux-x86/crypto/fipsmodule/md5-586.S',
'linux-x86/crypto/fipsmodule/sha1-586.S',
'linux-x86/crypto/fipsmodule/sha256-586.S',
'linux-x86/crypto/fipsmodule/sha512-586.S',
'linux-x86/crypto/fipsmodule/vpaes-x86.S',
'linux-x86/crypto/fipsmodule/x86-mont.S',
], ],
'boringssl_linux_x86_64_sources': [ 'boringssl_linux_x86_64_sources': [
'linux-x86_64/crypto/aes/aes-x86_64.S',
'linux-x86_64/crypto/aes/aesni-x86_64.S',
'linux-x86_64/crypto/aes/bsaes-x86_64.S',
'linux-x86_64/crypto/aes/vpaes-x86_64.S',
'linux-x86_64/crypto/bn/rsaz-avx2.S',
'linux-x86_64/crypto/bn/rsaz-x86_64.S',
'linux-x86_64/crypto/bn/x86_64-mont.S',
'linux-x86_64/crypto/bn/x86_64-mont5.S',
'linux-x86_64/crypto/chacha/chacha-x86_64.S', 'linux-x86_64/crypto/chacha/chacha-x86_64.S',
'linux-x86_64/crypto/ec/p256-x86_64-asm.S', 'linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S',
'linux-x86_64/crypto/md5/md5-x86_64.S', 'linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S',
'linux-x86_64/crypto/modes/aesni-gcm-x86_64.S', 'linux-x86_64/crypto/fipsmodule/aes-x86_64.S',
'linux-x86_64/crypto/modes/ghash-x86_64.S', 'linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S',
'linux-x86_64/crypto/rand/rdrand-x86_64.S', 'linux-x86_64/crypto/fipsmodule/aesni-x86_64.S',
'linux-x86_64/crypto/rc4/rc4-x86_64.S', 'linux-x86_64/crypto/fipsmodule/bsaes-x86_64.S',
'linux-x86_64/crypto/sha/sha1-x86_64.S', 'linux-x86_64/crypto/fipsmodule/ghash-x86_64.S',
'linux-x86_64/crypto/sha/sha256-x86_64.S', 'linux-x86_64/crypto/fipsmodule/md5-x86_64.S',
'linux-x86_64/crypto/sha/sha512-x86_64.S', 'linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S',
'linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S',
'linux-x86_64/crypto/fipsmodule/rsaz-avx2.S',
'linux-x86_64/crypto/fipsmodule/sha1-x86_64.S',
'linux-x86_64/crypto/fipsmodule/sha256-x86_64.S',
'linux-x86_64/crypto/fipsmodule/sha512-x86_64.S',
'linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S',
'linux-x86_64/crypto/fipsmodule/x86_64-mont.S',
'linux-x86_64/crypto/fipsmodule/x86_64-mont5.S',
'src/crypto/curve25519/asm/x25519-asm-x86_64.S', 'src/crypto/curve25519/asm/x25519-asm-x86_64.S',
], ],
'boringssl_mac_x86_sources': [ 'boringssl_mac_x86_sources': [
'mac-x86/crypto/aes/aes-586.S',
'mac-x86/crypto/aes/aesni-x86.S',
'mac-x86/crypto/aes/vpaes-x86.S',
'mac-x86/crypto/bn/bn-586.S',
'mac-x86/crypto/bn/co-586.S',
'mac-x86/crypto/bn/x86-mont.S',
'mac-x86/crypto/chacha/chacha-x86.S', 'mac-x86/crypto/chacha/chacha-x86.S',
'mac-x86/crypto/md5/md5-586.S', 'mac-x86/crypto/fipsmodule/aes-586.S',
'mac-x86/crypto/modes/ghash-x86.S', 'mac-x86/crypto/fipsmodule/aesni-x86.S',
'mac-x86/crypto/rc4/rc4-586.S', 'mac-x86/crypto/fipsmodule/bn-586.S',
'mac-x86/crypto/sha/sha1-586.S', 'mac-x86/crypto/fipsmodule/co-586.S',
'mac-x86/crypto/sha/sha256-586.S', 'mac-x86/crypto/fipsmodule/ghash-x86.S',
'mac-x86/crypto/sha/sha512-586.S', 'mac-x86/crypto/fipsmodule/md5-586.S',
'mac-x86/crypto/fipsmodule/sha1-586.S',
'mac-x86/crypto/fipsmodule/sha256-586.S',
'mac-x86/crypto/fipsmodule/sha512-586.S',
'mac-x86/crypto/fipsmodule/vpaes-x86.S',
'mac-x86/crypto/fipsmodule/x86-mont.S',
], ],
'boringssl_mac_x86_64_sources': [ 'boringssl_mac_x86_64_sources': [
'mac-x86_64/crypto/aes/aes-x86_64.S',
'mac-x86_64/crypto/aes/aesni-x86_64.S',
'mac-x86_64/crypto/aes/bsaes-x86_64.S',
'mac-x86_64/crypto/aes/vpaes-x86_64.S',
'mac-x86_64/crypto/bn/rsaz-avx2.S',
'mac-x86_64/crypto/bn/rsaz-x86_64.S',
'mac-x86_64/crypto/bn/x86_64-mont.S',
'mac-x86_64/crypto/bn/x86_64-mont5.S',
'mac-x86_64/crypto/chacha/chacha-x86_64.S', 'mac-x86_64/crypto/chacha/chacha-x86_64.S',
'mac-x86_64/crypto/ec/p256-x86_64-asm.S', 'mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S',
'mac-x86_64/crypto/md5/md5-x86_64.S', 'mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S',
'mac-x86_64/crypto/modes/aesni-gcm-x86_64.S', 'mac-x86_64/crypto/fipsmodule/aes-x86_64.S',
'mac-x86_64/crypto/modes/ghash-x86_64.S', 'mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S',
'mac-x86_64/crypto/rand/rdrand-x86_64.S', 'mac-x86_64/crypto/fipsmodule/aesni-x86_64.S',
'mac-x86_64/crypto/rc4/rc4-x86_64.S', 'mac-x86_64/crypto/fipsmodule/bsaes-x86_64.S',
'mac-x86_64/crypto/sha/sha1-x86_64.S', 'mac-x86_64/crypto/fipsmodule/ghash-x86_64.S',
'mac-x86_64/crypto/sha/sha256-x86_64.S', 'mac-x86_64/crypto/fipsmodule/md5-x86_64.S',
'mac-x86_64/crypto/sha/sha512-x86_64.S', 'mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S',
'mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S',
'mac-x86_64/crypto/fipsmodule/rsaz-avx2.S',
'mac-x86_64/crypto/fipsmodule/sha1-x86_64.S',
'mac-x86_64/crypto/fipsmodule/sha256-x86_64.S',
'mac-x86_64/crypto/fipsmodule/sha512-x86_64.S',
'mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S',
'mac-x86_64/crypto/fipsmodule/x86_64-mont.S',
'mac-x86_64/crypto/fipsmodule/x86_64-mont5.S',
'src/crypto/curve25519/asm/x25519-asm-x86_64.S', 'src/crypto/curve25519/asm/x25519-asm-x86_64.S',
], ],
'boringssl_win_x86_sources': [ 'boringssl_win_x86_sources': [
'win-x86/crypto/aes/aes-586.asm',
'win-x86/crypto/aes/aesni-x86.asm',
'win-x86/crypto/aes/vpaes-x86.asm',
'win-x86/crypto/bn/bn-586.asm',
'win-x86/crypto/bn/co-586.asm',
'win-x86/crypto/bn/x86-mont.asm',
'win-x86/crypto/chacha/chacha-x86.asm', 'win-x86/crypto/chacha/chacha-x86.asm',
'win-x86/crypto/md5/md5-586.asm', 'win-x86/crypto/fipsmodule/aes-586.asm',
'win-x86/crypto/modes/ghash-x86.asm', 'win-x86/crypto/fipsmodule/aesni-x86.asm',
'win-x86/crypto/rc4/rc4-586.asm', 'win-x86/crypto/fipsmodule/bn-586.asm',
'win-x86/crypto/sha/sha1-586.asm', 'win-x86/crypto/fipsmodule/co-586.asm',
'win-x86/crypto/sha/sha256-586.asm', 'win-x86/crypto/fipsmodule/ghash-x86.asm',
'win-x86/crypto/sha/sha512-586.asm', 'win-x86/crypto/fipsmodule/md5-586.asm',
'win-x86/crypto/fipsmodule/sha1-586.asm',
'win-x86/crypto/fipsmodule/sha256-586.asm',
'win-x86/crypto/fipsmodule/sha512-586.asm',
'win-x86/crypto/fipsmodule/vpaes-x86.asm',
'win-x86/crypto/fipsmodule/x86-mont.asm',
], ],
'boringssl_win_x86_64_sources': [ 'boringssl_win_x86_64_sources': [
'win-x86_64/crypto/aes/aes-x86_64.asm',
'win-x86_64/crypto/aes/aesni-x86_64.asm',
'win-x86_64/crypto/aes/bsaes-x86_64.asm',
'win-x86_64/crypto/aes/vpaes-x86_64.asm',
'win-x86_64/crypto/bn/rsaz-avx2.asm',
'win-x86_64/crypto/bn/rsaz-x86_64.asm',
'win-x86_64/crypto/bn/x86_64-mont.asm',
'win-x86_64/crypto/bn/x86_64-mont5.asm',
'win-x86_64/crypto/chacha/chacha-x86_64.asm', 'win-x86_64/crypto/chacha/chacha-x86_64.asm',
'win-x86_64/crypto/ec/p256-x86_64-asm.asm', 'win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm',
'win-x86_64/crypto/md5/md5-x86_64.asm', 'win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm',
'win-x86_64/crypto/modes/aesni-gcm-x86_64.asm', 'win-x86_64/crypto/fipsmodule/aes-x86_64.asm',
'win-x86_64/crypto/modes/ghash-x86_64.asm', 'win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm',
'win-x86_64/crypto/rand/rdrand-x86_64.asm', 'win-x86_64/crypto/fipsmodule/aesni-x86_64.asm',
'win-x86_64/crypto/rc4/rc4-x86_64.asm', 'win-x86_64/crypto/fipsmodule/bsaes-x86_64.asm',
'win-x86_64/crypto/sha/sha1-x86_64.asm', 'win-x86_64/crypto/fipsmodule/ghash-x86_64.asm',
'win-x86_64/crypto/sha/sha256-x86_64.asm', 'win-x86_64/crypto/fipsmodule/md5-x86_64.asm',
'win-x86_64/crypto/sha/sha512-x86_64.asm', 'win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm',
'win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm',
'win-x86_64/crypto/fipsmodule/rsaz-avx2.asm',
'win-x86_64/crypto/fipsmodule/sha1-x86_64.asm',
'win-x86_64/crypto/fipsmodule/sha256-x86_64.asm',
'win-x86_64/crypto/fipsmodule/sha512-x86_64.asm',
'win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm',
'win-x86_64/crypto/fipsmodule/x86_64-mont.asm',
'win-x86_64/crypto/fipsmodule/x86_64-mont5.asm',
], ],
} }
} }

View File

@ -1,686 +0,0 @@
# Copyright (c) 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# This file is created by generate_build_files.py. Do not edit manually.
{
'targets': [
{
'target_name': 'boringssl_aes_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/aes/aes_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_asn1_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/asn1/asn1_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_base64_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/base64/base64_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_bio_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/bio/bio_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_bn_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/bn/bn_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_bytestring_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/bytestring/bytestring_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_chacha_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/chacha/chacha_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_aead_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/cipher/aead_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_cipher_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/cipher/cipher_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_cmac_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/cmac/cmac_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_constant_time_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/constant_time_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_ed25519_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/curve25519/ed25519_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_spake25519_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/curve25519/spake25519_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_x25519_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/curve25519/x25519_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_dh_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/dh/dh_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_digest_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/digest/digest_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_dsa_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/dsa/dsa_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_ec_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/ec/ec_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_example_mul',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/ec/example_mul.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_ecdsa_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/ecdsa/ecdsa_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_err_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/err/err_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_evp_extra_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/evp/evp_extra_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_evp_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/evp/evp_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_pbkdf_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/evp/pbkdf_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_hkdf_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/hkdf/hkdf_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_hmac_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/hmac/hmac_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_lhash_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/lhash/lhash_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_gcm_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/modes/gcm_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_newhope_statistical_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/newhope/newhope_statistical_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_newhope_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/newhope/newhope_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_newhope_vectors_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/newhope/newhope_vectors_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_obj_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/obj/obj_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_pkcs12_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/pkcs8/pkcs12_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_pkcs8_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/pkcs8/pkcs8_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_poly1305_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/poly1305/poly1305_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_refcount_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/refcount_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_rsa_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/rsa/rsa_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_thread_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/thread_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_pkcs7_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/x509/pkcs7_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_x509_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/x509/x509_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_tab_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/x509v3/tab_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_v3name_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/x509v3/v3name_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_pqueue_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/ssl/pqueue/pqueue_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_ssl_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/ssl/ssl_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
],
'variables': {
'boringssl_test_support_sources': [
'src/crypto/test/file_test.cc',
'src/crypto/test/file_test.h',
'src/crypto/test/malloc.cc',
'src/crypto/test/scoped_types.h',
'src/crypto/test/test_util.cc',
'src/crypto/test/test_util.h',
'src/ssl/test/async_bio.h',
'src/ssl/test/packeted_bio.h',
'src/ssl/test/scoped_types.h',
'src/ssl/test/test_config.h',
],
'boringssl_test_targets': [
'boringssl_aead_test',
'boringssl_aes_test',
'boringssl_asn1_test',
'boringssl_base64_test',
'boringssl_bio_test',
'boringssl_bn_test',
'boringssl_bytestring_test',
'boringssl_chacha_test',
'boringssl_cipher_test',
'boringssl_cmac_test',
'boringssl_constant_time_test',
'boringssl_dh_test',
'boringssl_digest_test',
'boringssl_dsa_test',
'boringssl_ec_test',
'boringssl_ecdsa_test',
'boringssl_ed25519_test',
'boringssl_err_test',
'boringssl_evp_extra_test',
'boringssl_evp_test',
'boringssl_example_mul',
'boringssl_gcm_test',
'boringssl_hkdf_test',
'boringssl_hmac_test',
'boringssl_lhash_test',
'boringssl_newhope_statistical_test',
'boringssl_newhope_test',
'boringssl_newhope_vectors_test',
'boringssl_obj_test',
'boringssl_pbkdf_test',
'boringssl_pkcs12_test',
'boringssl_pkcs7_test',
'boringssl_pkcs8_test',
'boringssl_poly1305_test',
'boringssl_pqueue_test',
'boringssl_refcount_test',
'boringssl_rsa_test',
'boringssl_spake25519_test',
'boringssl_ssl_test',
'boringssl_tab_test',
'boringssl_thread_test',
'boringssl_v3name_test',
'boringssl_x25519_test',
'boringssl_x509_test',
],
}
}

File diff suppressed because it is too large Load Diff

View File

@ -3,7 +3,7 @@
#if __ARM_MAX_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.text .text
#if !defined(__clang__) #if !defined(__clang__) || defined(BORINGSSL_CLANG_SUPPORTS_DOT_ARCH)
.arch armv8-a+crypto .arch armv8-a+crypto
#endif #endif
.align 5 .align 5
@ -12,11 +12,11 @@
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b .long 0x1b,0x1b,0x1b,0x1b
.globl aes_v8_set_encrypt_key .globl aes_hw_set_encrypt_key
.hidden aes_v8_set_encrypt_key .hidden aes_hw_set_encrypt_key
.type aes_v8_set_encrypt_key,%function .type aes_hw_set_encrypt_key,%function
.align 5 .align 5
aes_v8_set_encrypt_key: aes_hw_set_encrypt_key:
.Lenc_key: .Lenc_key:
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
@ -178,13 +178,13 @@ aes_v8_set_encrypt_key:
mov x0,x3 // return value mov x0,x3 // return value
ldr x29,[sp],#16 ldr x29,[sp],#16
ret ret
.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key .size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
.globl aes_v8_set_decrypt_key .globl aes_hw_set_decrypt_key
.hidden aes_v8_set_decrypt_key .hidden aes_hw_set_decrypt_key
.type aes_v8_set_decrypt_key,%function .type aes_hw_set_decrypt_key,%function
.align 5 .align 5
aes_v8_set_decrypt_key: aes_hw_set_decrypt_key:
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
bl .Lenc_key bl .Lenc_key
@ -219,12 +219,12 @@ aes_v8_set_decrypt_key:
.Ldec_key_abort: .Ldec_key_abort:
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
ret ret
.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key .size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
.globl aes_v8_encrypt .globl aes_hw_encrypt
.hidden aes_v8_encrypt .hidden aes_hw_encrypt
.type aes_v8_encrypt,%function .type aes_hw_encrypt,%function
.align 5 .align 5
aes_v8_encrypt: aes_hw_encrypt:
ldr w3,[x2,#240] ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16 ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0] ld1 {v2.16b},[x0]
@ -249,12 +249,12 @@ aes_v8_encrypt:
st1 {v2.16b},[x1] st1 {v2.16b},[x1]
ret ret
.size aes_v8_encrypt,.-aes_v8_encrypt .size aes_hw_encrypt,.-aes_hw_encrypt
.globl aes_v8_decrypt .globl aes_hw_decrypt
.hidden aes_v8_decrypt .hidden aes_hw_decrypt
.type aes_v8_decrypt,%function .type aes_hw_decrypt,%function
.align 5 .align 5
aes_v8_decrypt: aes_hw_decrypt:
ldr w3,[x2,#240] ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16 ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0] ld1 {v2.16b},[x0]
@ -279,12 +279,12 @@ aes_v8_decrypt:
st1 {v2.16b},[x1] st1 {v2.16b},[x1]
ret ret
.size aes_v8_decrypt,.-aes_v8_decrypt .size aes_hw_decrypt,.-aes_hw_decrypt
.globl aes_v8_cbc_encrypt .globl aes_hw_cbc_encrypt
.hidden aes_v8_cbc_encrypt .hidden aes_hw_cbc_encrypt
.type aes_v8_cbc_encrypt,%function .type aes_hw_cbc_encrypt,%function
.align 5 .align 5
aes_v8_cbc_encrypt: aes_hw_cbc_encrypt:
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
subs x2,x2,#16 subs x2,x2,#16
@ -570,12 +570,12 @@ aes_v8_cbc_encrypt:
.Lcbc_abort: .Lcbc_abort:
ldr x29,[sp],#16 ldr x29,[sp],#16
ret ret
.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt .size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
.globl aes_v8_ctr32_encrypt_blocks .globl aes_hw_ctr32_encrypt_blocks
.hidden aes_v8_ctr32_encrypt_blocks .hidden aes_hw_ctr32_encrypt_blocks
.type aes_v8_ctr32_encrypt_blocks,%function .type aes_hw_ctr32_encrypt_blocks,%function
.align 5 .align 5
aes_v8_ctr32_encrypt_blocks: aes_hw_ctr32_encrypt_blocks:
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
ldr w5,[x3,#240] ldr w5,[x3,#240]
@ -752,6 +752,6 @@ aes_v8_ctr32_encrypt_blocks:
.Lctr32_done: .Lctr32_done:
ldr x29,[sp],#16 ldr x29,[sp],#16
ret ret
.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks .size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
#endif #endif
#endif #endif

View File

@ -2,7 +2,7 @@
#include <openssl/arm_arch.h> #include <openssl/arm_arch.h>
.text .text
#if !defined(__clang__) #if !defined(__clang__) || defined(BORINGSSL_CLANG_SUPPORTS_DOT_ARCH)
.arch armv8-a+crypto .arch armv8-a+crypto
#endif #endif
.globl gcm_init_v8 .globl gcm_init_v8

View File

@ -9,7 +9,11 @@
.type sha1_block_data_order,%function .type sha1_block_data_order,%function
.align 6 .align 6
sha1_block_data_order: sha1_block_data_order:
#ifdef __ILP32__
ldrsw x16,.LOPENSSL_armcap_P
#else
ldr x16,.LOPENSSL_armcap_P ldr x16,.LOPENSSL_armcap_P
#endif
adr x17,.LOPENSSL_armcap_P adr x17,.LOPENSSL_armcap_P
add x16,x16,x17 add x16,x16,x17
ldr w16,[x16] ldr w16,[x16]
@ -1208,7 +1212,11 @@ sha1_block_armv8:
.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59
.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79
.LOPENSSL_armcap_P: .LOPENSSL_armcap_P:
#ifdef __ILP32__
.long OPENSSL_armcap_P-.
#else
.quad OPENSSL_armcap_P-. .quad OPENSSL_armcap_P-.
#endif
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2 .align 2
.align 2 .align 2

View File

@ -1,5 +1,46 @@
#if defined(__aarch64__) #if defined(__aarch64__)
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
// this file except in compliance with the License. You can obtain a copy
// in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
// ====================================================================
// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
// project. The module is, however, dual licensed under OpenSSL and
// CRYPTOGAMS licenses depending on where you obtain it. For further
// details see http://www.openssl.org/~appro/cryptogams/.
//
// Permission to use under GPLv2 terms is granted.
// ====================================================================
//
// SHA256/512 for ARMv8.
//
// Performance in cycles per processed byte and improvement coefficient
// over code generated with "default" compiler:
//
// SHA256-hw SHA256(*) SHA512
// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***))
// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***))
// Denver 2.01 10.5 (+26%) 6.70 (+8%)
// X-Gene 20.0 (+100%) 12.8 (+300%(***))
// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
//
// (*) Software SHA256 results are of lesser relevance, presented
// mostly for informational purposes.
// (**) The result is a trade-off: it's possible to improve it by
// 10% (or by 1 cycle per round), but at the cost of 20% loss
// on Cortex-A53 (or by 4 cycles per round).
// (***) Super-impressive coefficients over gcc-generated code are
// indication of some compiler "pathology", most notably code
// generated with -mgeneral-regs-only is significanty faster
// and the gap is only 40-90%.
#ifndef __KERNEL__
# include <openssl/arm_arch.h> # include <openssl/arm_arch.h>
#endif
.text .text
@ -9,12 +50,18 @@
.type sha256_block_data_order,%function .type sha256_block_data_order,%function
.align 6 .align 6
sha256_block_data_order: sha256_block_data_order:
#ifndef __KERNEL__
# ifdef __ILP32__
ldrsw x16,.LOPENSSL_armcap_P
# else
ldr x16,.LOPENSSL_armcap_P ldr x16,.LOPENSSL_armcap_P
# endif
adr x17,.LOPENSSL_armcap_P adr x17,.LOPENSSL_armcap_P
add x16,x16,x17 add x16,x16,x17
ldr w16,[x16] ldr w16,[x16]
tst w16,#ARMV8_SHA256 tst w16,#ARMV8_SHA256
b.ne .Lv8_entry b.ne .Lv8_entry
#endif
stp x29,x30,[sp,#-128]! stp x29,x30,[sp,#-128]!
add x29,sp,#0 add x29,sp,#0
@ -998,12 +1045,19 @@ sha256_block_data_order:
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.long 0 //terminator .long 0 //terminator
.size .LK256,.-.LK256 .size .LK256,.-.LK256
#ifndef __KERNEL__
.align 3 .align 3
.LOPENSSL_armcap_P: .LOPENSSL_armcap_P:
# ifdef __ILP32__
.long OPENSSL_armcap_P-.
# else
.quad OPENSSL_armcap_P-. .quad OPENSSL_armcap_P-.
# endif
#endif
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2 .align 2
.align 2 .align 2
#ifndef __KERNEL__
.type sha256_block_armv8,%function .type sha256_block_armv8,%function
.align 6 .align 6
sha256_block_armv8: sha256_block_armv8:
@ -1142,5 +1196,8 @@ sha256_block_armv8:
ldr x29,[sp],#16 ldr x29,[sp],#16
ret ret
.size sha256_block_armv8,.-sha256_block_armv8 .size sha256_block_armv8,.-sha256_block_armv8
#endif
#ifndef __KERNEL__
.comm OPENSSL_armcap_P,4,4 .comm OPENSSL_armcap_P,4,4
#endif #endif
#endif

View File

@ -1,5 +1,46 @@
#if defined(__aarch64__) #if defined(__aarch64__)
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
// this file except in compliance with the License. You can obtain a copy
// in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
// ====================================================================
// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
// project. The module is, however, dual licensed under OpenSSL and
// CRYPTOGAMS licenses depending on where you obtain it. For further
// details see http://www.openssl.org/~appro/cryptogams/.
//
// Permission to use under GPLv2 terms is granted.
// ====================================================================
//
// SHA256/512 for ARMv8.
//
// Performance in cycles per processed byte and improvement coefficient
// over code generated with "default" compiler:
//
// SHA256-hw SHA256(*) SHA512
// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***))
// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***))
// Denver 2.01 10.5 (+26%) 6.70 (+8%)
// X-Gene 20.0 (+100%) 12.8 (+300%(***))
// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
//
// (*) Software SHA256 results are of lesser relevance, presented
// mostly for informational purposes.
// (**) The result is a trade-off: it's possible to improve it by
// 10% (or by 1 cycle per round), but at the cost of 20% loss
// on Cortex-A53 (or by 4 cycles per round).
// (***) Super-impressive coefficients over gcc-generated code are
// indication of some compiler "pathology", most notably code
// generated with -mgeneral-regs-only is significanty faster
// and the gap is only 40-90%.
#ifndef __KERNEL__
# include <openssl/arm_arch.h> # include <openssl/arm_arch.h>
#endif
.text .text
@ -1016,11 +1057,19 @@ sha512_block_data_order:
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
.quad 0 // terminator .quad 0 // terminator
.size .LK512,.-.LK512 .size .LK512,.-.LK512
#ifndef __KERNEL__
.align 3 .align 3
.LOPENSSL_armcap_P: .LOPENSSL_armcap_P:
# ifdef __ILP32__
.long OPENSSL_armcap_P-.
# else
.quad OPENSSL_armcap_P-. .quad OPENSSL_armcap_P-.
# endif
#endif
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2 .align 2
.align 2 .align 2
#ifndef __KERNEL__
.comm OPENSSL_armcap_P,4,4 .comm OPENSSL_armcap_P,4,4
#endif #endif
#endif

View File

@ -1,589 +0,0 @@
#if defined(__arm__)
#include <openssl/arm_arch.h>
.text
.code 32
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-.Lbn_mul_mont
#endif
.globl bn_mul_mont
.hidden bn_mul_mont
.type bn_mul_mont,%function
.align 5
bn_mul_mont:
.Lbn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne .Lialu
adr r0,bn_mul_mont
ldr r2,.LOPENSSL_armcap
ldr r0,[r0,r2]
#ifdef __APPLE__
ldr r0,[r0]
#endif
tst r0,#ARMV7_NEON @ NEON available?
ldmia sp, {r0,r2}
beq .Lialu
add sp,sp,#8
b bn_mul8x_mont_neon
.align 4
.Lialu:
#endif
cmp ip,#2
mov r0,ip @ load num
movlt r0,#0
addlt sp,sp,#2*4
blt .Labrt
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
mov r0,r0,lsl#2 @ rescale r0 for byte count
sub sp,sp,r0 @ alloca(4*num)
sub sp,sp,#4 @ +extra dword
sub r0,r0,#4 @ "num=num-1"
add r4,r2,r0 @ &bp[num-1]
add r0,sp,r0 @ r0 to point at &tp[num-1]
ldr r8,[r0,#14*4] @ &n0
ldr r2,[r2] @ bp[0]
ldr r5,[r1],#4 @ ap[0],ap++
ldr r6,[r3],#4 @ np[0],np++
ldr r8,[r8] @ *n0
str r4,[r0,#15*4] @ save &bp[num]
umull r10,r11,r5,r2 @ ap[0]*bp[0]
str r8,[r0,#14*4] @ save n0 value
mul r8,r10,r8 @ "tp[0]"*n0
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
mov r4,sp
.L1st:
ldr r5,[r1],#4 @ ap[j],ap++
mov r10,r11
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne .L1st
adds r12,r12,r11
ldr r4,[r0,#13*4] @ restore bp
mov r14,#0
ldr r8,[r0,#14*4] @ restore n0
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
str r14,[r0,#4] @ tp[num]=
.Louter:
sub r7,r0,sp @ "original" r0-1 value
sub r1,r1,r7 @ "rewind" ap to &ap[1]
ldr r2,[r4,#4]! @ *(++bp)
sub r3,r3,r7 @ "rewind" np to &np[1]
ldr r5,[r1,#-4] @ ap[0]
ldr r10,[sp] @ tp[0]
ldr r6,[r3,#-4] @ np[0]
ldr r7,[sp,#4] @ tp[1]
mov r11,#0
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
str r4,[r0,#13*4] @ save bp
mul r8,r10,r8
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
mov r4,sp
.Linner:
ldr r5,[r1],#4 @ ap[j],ap++
adds r10,r11,r7 @ +=tp[j]
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adc r11,r11,#0
ldr r7,[r4,#8] @ tp[j+1]
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne .Linner
adds r12,r12,r11
mov r14,#0
ldr r4,[r0,#13*4] @ restore bp
adc r14,r14,#0
ldr r8,[r0,#14*4] @ restore n0
adds r12,r12,r7
ldr r7,[r0,#15*4] @ restore &bp[num]
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
str r14,[r0,#4] @ tp[num]=
cmp r4,r7
bne .Louter
ldr r2,[r0,#12*4] @ pull rp
add r0,r0,#4 @ r0 to point at &tp[num]
sub r5,r0,sp @ "original" num value
mov r4,sp @ "rewind" r4
mov r1,r4 @ "borrow" r1
sub r3,r3,r5 @ "rewind" r3 to &np[0]
subs r7,r7,r7 @ "clear" carry flag
.Lsub: ldr r7,[r4],#4
ldr r6,[r3],#4
sbcs r7,r7,r6 @ tp[j]-np[j]
str r7,[r2],#4 @ rp[j]=
teq r4,r0 @ preserve carry
bne .Lsub
sbcs r14,r14,#0 @ upmost carry
mov r4,sp @ "rewind" r4
sub r2,r2,r5 @ "rewind" r2
and r1,r4,r14
bic r3,r2,r14
orr r1,r1,r3 @ ap=borrow?tp:rp
.Lcopy: ldr r7,[r1],#4 @ copy or in-place refresh
str sp,[r4],#4 @ zap tp
str r7,[r2],#4
cmp r4,r0
bne .Lcopy
add sp,r0,#4 @ skip over tp[num+1]
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
.Labrt:
#if __ARM_ARCH__>=5
bx lr @ .word 0xe12fff1e
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size bn_mul_mont,.-bn_mul_mont
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.type bn_mul8x_mont_neon,%function
.align 5
bn_mul8x_mont_neon:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load rest of parameter block
sub r7,sp,#16
vld1.32 {d28[0]}, [r2,:32]!
sub r7,r7,r5,lsl#4
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
and r7,r7,#-64
vld1.32 {d30[0]}, [r4,:32]
mov sp,r7 @ alloca
veor d8,d8,d8
subs r8,r5,#8
vzip.16 d28,d8
vmull.u32 q6,d28,d0[0]
vmull.u32 q7,d28,d0[1]
vmull.u32 q8,d28,d1[0]
vshl.i64 d10,d13,#16
vmull.u32 q9,d28,d1[1]
vadd.u64 d10,d10,d12
veor d8,d8,d8
vmul.u32 d29,d10,d30
vmull.u32 q10,d28,d2[0]
vld1.32 {d4,d5,d6,d7}, [r3]!
vmull.u32 q11,d28,d2[1]
vmull.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmull.u32 q13,d28,d3[1]
bne .LNEON_1st
@ special case for num=8, everything is in register bank...
vmlal.u32 q6,d29,d4[0]
sub r9,r5,#1
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
b .LNEON_outer8
.align 4
.LNEON_outer8:
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
vzip.16 d28,d8
vadd.u64 d12,d12,d10
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
vmlal.u32 q8,d28,d1[0]
vshl.i64 d10,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d10,d10,d12
veor d8,d8,d8
subs r9,r9,#1
vmul.u32 d29,d10,d30
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
bne .LNEON_outer8
vadd.u64 d12,d12,d10
mov r7,sp
vshr.u64 d10,d12,#16
mov r8,r5
vadd.u64 d13,d13,d10
add r6,sp,#16
vshr.u64 d10,d13,#16
vzip.16 d12,d13
b .LNEON_tail2
.align 4
.LNEON_1st:
vmlal.u32 q6,d29,d4[0]
vld1.32 {d0,d1,d2,d3}, [r1]!
vmlal.u32 q7,d29,d4[1]
subs r8,r8,#8
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vld1.32 {d4,d5}, [r3]!
vmlal.u32 q11,d29,d6[1]
vst1.64 {q6,q7}, [r7,:256]!
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vst1.64 {q8,q9}, [r7,:256]!
vmull.u32 q6,d28,d0[0]
vld1.32 {d6,d7}, [r3]!
vmull.u32 q7,d28,d0[1]
vst1.64 {q10,q11}, [r7,:256]!
vmull.u32 q8,d28,d1[0]
vmull.u32 q9,d28,d1[1]
vst1.64 {q12,q13}, [r7,:256]!
vmull.u32 q10,d28,d2[0]
vmull.u32 q11,d28,d2[1]
vmull.u32 q12,d28,d3[0]
vmull.u32 q13,d28,d3[1]
bne .LNEON_1st
vmlal.u32 q6,d29,d4[0]
add r6,sp,#16
vmlal.u32 q7,d29,d4[1]
sub r1,r1,r5,lsl#2 @ rewind r1
vmlal.u32 q8,d29,d5[0]
vld1.64 {q5}, [sp,:128]
vmlal.u32 q9,d29,d5[1]
sub r9,r5,#1
vmlal.u32 q10,d29,d6[0]
vst1.64 {q6,q7}, [r7,:256]!
vmlal.u32 q11,d29,d6[1]
vshr.u64 d10,d10,#16
vld1.64 {q6}, [r6, :128]!
vmlal.u32 q12,d29,d7[0]
vst1.64 {q8,q9}, [r7,:256]!
vmlal.u32 q13,d29,d7[1]
vst1.64 {q10,q11}, [r7,:256]!
vadd.u64 d10,d10,d11
veor q4,q4,q4
vst1.64 {q12,q13}, [r7,:256]!
vld1.64 {q7,q8}, [r6, :256]!
vst1.64 {q4}, [r7,:128]
vshr.u64 d10,d10,#16
b .LNEON_outer
.align 4
.LNEON_outer:
vld1.32 {d28[0]}, [r2,:32]!
sub r3,r3,r5,lsl#2 @ rewind r3
vld1.32 {d0,d1,d2,d3}, [r1]!
veor d8,d8,d8
mov r7,sp
vzip.16 d28,d8
sub r8,r5,#8
vadd.u64 d12,d12,d10
vmlal.u32 q6,d28,d0[0]
vld1.64 {q9,q10},[r6,:256]!
vmlal.u32 q7,d28,d0[1]
vmlal.u32 q8,d28,d1[0]
vld1.64 {q11,q12},[r6,:256]!
vmlal.u32 q9,d28,d1[1]
vshl.i64 d10,d13,#16
veor d8,d8,d8
vadd.u64 d10,d10,d12
vld1.64 {q13},[r6,:128]!
vmul.u32 d29,d10,d30
vmlal.u32 q10,d28,d2[0]
vld1.32 {d4,d5,d6,d7}, [r3]!
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
.LNEON_inner:
vmlal.u32 q6,d29,d4[0]
vld1.32 {d0,d1,d2,d3}, [r1]!
vmlal.u32 q7,d29,d4[1]
subs r8,r8,#8
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vst1.64 {q6,q7}, [r7,:256]!
vmlal.u32 q10,d29,d6[0]
vld1.64 {q6}, [r6, :128]!
vmlal.u32 q11,d29,d6[1]
vst1.64 {q8,q9}, [r7,:256]!
vmlal.u32 q12,d29,d7[0]
vld1.64 {q7,q8}, [r6, :256]!
vmlal.u32 q13,d29,d7[1]
vst1.64 {q10,q11}, [r7,:256]!
vmlal.u32 q6,d28,d0[0]
vld1.64 {q9,q10}, [r6, :256]!
vmlal.u32 q7,d28,d0[1]
vst1.64 {q12,q13}, [r7,:256]!
vmlal.u32 q8,d28,d1[0]
vld1.64 {q11,q12}, [r6, :256]!
vmlal.u32 q9,d28,d1[1]
vld1.32 {d4,d5,d6,d7}, [r3]!
vmlal.u32 q10,d28,d2[0]
vld1.64 {q13}, [r6, :128]!
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vmlal.u32 q13,d28,d3[1]
bne .LNEON_inner
vmlal.u32 q6,d29,d4[0]
add r6,sp,#16
vmlal.u32 q7,d29,d4[1]
sub r1,r1,r5,lsl#2 @ rewind r1
vmlal.u32 q8,d29,d5[0]
vld1.64 {q5}, [sp,:128]
vmlal.u32 q9,d29,d5[1]
subs r9,r9,#1
vmlal.u32 q10,d29,d6[0]
vst1.64 {q6,q7}, [r7,:256]!
vmlal.u32 q11,d29,d6[1]
vld1.64 {q6}, [r6, :128]!
vshr.u64 d10,d10,#16
vst1.64 {q8,q9}, [r7,:256]!
vmlal.u32 q12,d29,d7[0]
vld1.64 {q7,q8}, [r6, :256]!
vmlal.u32 q13,d29,d7[1]
vst1.64 {q10,q11}, [r7,:256]!
vadd.u64 d10,d10,d11
vst1.64 {q12,q13}, [r7,:256]!
vshr.u64 d10,d10,#16
bne .LNEON_outer
mov r7,sp
mov r8,r5
.LNEON_tail:
vadd.u64 d12,d12,d10
vld1.64 {q9,q10}, [r6, :256]!
vshr.u64 d10,d12,#16
vadd.u64 d13,d13,d10
vld1.64 {q11,q12}, [r6, :256]!
vshr.u64 d10,d13,#16
vld1.64 {q13}, [r6, :128]!
vzip.16 d12,d13
.LNEON_tail2:
vadd.u64 d14,d14,d10
vst1.32 {d12[0]}, [r7, :32]!
vshr.u64 d10,d14,#16
vadd.u64 d15,d15,d10
vshr.u64 d10,d15,#16
vzip.16 d14,d15
vadd.u64 d16,d16,d10
vst1.32 {d14[0]}, [r7, :32]!
vshr.u64 d10,d16,#16
vadd.u64 d17,d17,d10
vshr.u64 d10,d17,#16
vzip.16 d16,d17
vadd.u64 d18,d18,d10
vst1.32 {d16[0]}, [r7, :32]!
vshr.u64 d10,d18,#16
vadd.u64 d19,d19,d10
vshr.u64 d10,d19,#16
vzip.16 d18,d19
vadd.u64 d20,d20,d10
vst1.32 {d18[0]}, [r7, :32]!
vshr.u64 d10,d20,#16
vadd.u64 d21,d21,d10
vshr.u64 d10,d21,#16
vzip.16 d20,d21
vadd.u64 d22,d22,d10
vst1.32 {d20[0]}, [r7, :32]!
vshr.u64 d10,d22,#16
vadd.u64 d23,d23,d10
vshr.u64 d10,d23,#16
vzip.16 d22,d23
vadd.u64 d24,d24,d10
vst1.32 {d22[0]}, [r7, :32]!
vshr.u64 d10,d24,#16
vadd.u64 d25,d25,d10
vld1.64 {q6}, [r6, :128]!
vshr.u64 d10,d25,#16
vzip.16 d24,d25
vadd.u64 d26,d26,d10
vst1.32 {d24[0]}, [r7, :32]!
vshr.u64 d10,d26,#16
vadd.u64 d27,d27,d10
vld1.64 {q7,q8}, [r6, :256]!
vshr.u64 d10,d27,#16
vzip.16 d26,d27
subs r8,r8,#8
vst1.32 {d26[0]}, [r7, :32]!
bne .LNEON_tail
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
sub r3,r3,r5,lsl#2 @ rewind r3
subs r1,sp,#0 @ clear carry flag
add r2,sp,r5,lsl#2
.LNEON_sub:
ldmia r1!, {r4,r5,r6,r7}
ldmia r3!, {r8,r9,r10,r11}
sbcs r8, r4,r8
sbcs r9, r5,r9
sbcs r10,r6,r10
sbcs r11,r7,r11
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne .LNEON_sub
ldr r10, [r1] @ load top-most bit
veor q0,q0,q0
sub r11,r2,sp @ this is num*4
veor q1,q1,q1
mov r1,sp
sub r0,r0,r11 @ rewind r0
mov r3,r2 @ second 3/4th of frame
sbcs r10,r10,#0 @ result is carry flag
.LNEON_copy_n_zap:
ldmia r1!, {r4,r5,r6,r7}
ldmia r0, {r8,r9,r10,r11}
movcc r8, r4
vst1.64 {q0,q1}, [r3,:256]! @ wipe
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
movcc r11,r7
ldmia r1, {r4,r5,r6,r7}
stmia r0!, {r8,r9,r10,r11}
sub r1,r1,#16
ldmia r0, {r8,r9,r10,r11}
movcc r8, r4
vst1.64 {q0,q1}, [r1,:256]! @ wipe
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
movcc r11,r7
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne .LNEON_copy_n_zap
sub sp,ip,#96
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
bx lr @ .word 0xe12fff1e
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
#endif
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
.hidden OPENSSL_armcap_P
#endif
#endif

View File

@ -2,8 +2,10 @@
#include <openssl/arm_arch.h> #include <openssl/arm_arch.h>
.text .text
#if defined(__thumb2__) #if defined(__thumb2__) || defined(__clang__)
.syntax unified .syntax unified
#endif
#if defined(__thumb2__)
.thumb .thumb
#else #else
.code 32 .code 32
@ -1457,7 +1459,7 @@ ChaCha20_neon:
ldrb r9,[r12],#1 @ read input ldrb r9,[r12],#1 @ read input
subs r11,r11,#1 subs r11,r11,#1
eor r8,r8,r9 eor r8,r8,r9
strb r8,[r14],#1 @ store ouput strb r8,[r14],#1 @ store output
bne .Loop_tail_neon bne .Loop_tail_neon
.Ldone_neon: .Ldone_neon:

View File

@ -1,4 +1,11 @@
#if defined(__arm__) #if defined(__arm__)
@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
@
@ Licensed under the OpenSSL license (the "License"). You may not use
@ this file except in compliance with the License. You can obtain a copy
@ in the file LICENSE in the source distribution or at
@ https://www.openssl.org/source/license.html
@ ==================================================================== @ ====================================================================
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@ -32,7 +39,6 @@
@ Profiler-assisted and platform-specific optimization resulted in 16% @ Profiler-assisted and platform-specific optimization resulted in 16%
@ improvement on Cortex A8 core and ~21.5 cycles per byte. @ improvement on Cortex A8 core and ~21.5 cycles per byte.
#if defined(__arm__)
#ifndef __KERNEL__ #ifndef __KERNEL__
# include <openssl/arm_arch.h> # include <openssl/arm_arch.h>
#else #else
@ -40,15 +46,12 @@
#endif #endif
.text .text
#if __ARM_ARCH__<7
.code 32
#else
.syntax unified
#if defined(__thumb2__) && !defined(__APPLE__) #if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
.thumb .thumb
#else #else
.code 32 .code 32
# endif #undef __thumb2__
#endif #endif
.type AES_Te,%object .type AES_Te,%object
@ -164,10 +167,10 @@ AES_Te:
.type asm_AES_encrypt,%function .type asm_AES_encrypt,%function
.align 5 .align 5
asm_AES_encrypt: asm_AES_encrypt:
#if __ARM_ARCH__<7 #ifndef __thumb2__
sub r3,pc,#8 @ asm_AES_encrypt sub r3,pc,#8 @ asm_AES_encrypt
#else #else
adr r3,asm_AES_encrypt adr r3,.
#endif #endif
stmdb sp!,{r1,r4-r12,lr} stmdb sp!,{r1,r4-r12,lr}
#ifdef __APPLE__ #ifdef __APPLE__
@ -415,19 +418,19 @@ _armv4_AES_encrypt:
.align 5 .align 5
asm_AES_set_encrypt_key: asm_AES_set_encrypt_key:
_armv4_AES_set_encrypt_key: _armv4_AES_set_encrypt_key:
#if __ARM_ARCH__<7 #ifndef __thumb2__
sub r3,pc,#8 @ asm_AES_set_encrypt_key sub r3,pc,#8 @ asm_AES_set_encrypt_key
#else #else
adr r3,asm_AES_set_encrypt_key adr r3,.
#endif #endif
teq r0,#0 teq r0,#0
#if __ARM_ARCH__>=7 #ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM itt eq @ Thumb2 thing, sanity check in ARM
#endif #endif
moveq r0,#-1 moveq r0,#-1
beq .Labrt beq .Labrt
teq r2,#0 teq r2,#0
#if __ARM_ARCH__>=7 #ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM itt eq @ Thumb2 thing, sanity check in ARM
#endif #endif
moveq r0,#-1 moveq r0,#-1
@ -438,7 +441,7 @@ _armv4_AES_set_encrypt_key:
teq r1,#192 teq r1,#192
beq .Lok beq .Lok
teq r1,#256 teq r1,#256
#if __ARM_ARCH__>=7 #ifdef __thumb2__
itt ne @ Thumb2 thing, sanity check in ARM itt ne @ Thumb2 thing, sanity check in ARM
#endif #endif
movne r0,#-1 movne r0,#-1
@ -599,7 +602,7 @@ _armv4_AES_set_encrypt_key:
str r2,[r11,#-16] str r2,[r11,#-16]
subs r12,r12,#1 subs r12,r12,#1
str r3,[r11,#-12] str r3,[r11,#-12]
#if __ARM_ARCH__>=7 #ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM itt eq @ Thumb2 thing, sanity check in ARM
#endif #endif
subeq r2,r11,#216 subeq r2,r11,#216
@ -671,7 +674,7 @@ _armv4_AES_set_encrypt_key:
str r2,[r11,#-24] str r2,[r11,#-24]
subs r12,r12,#1 subs r12,r12,#1
str r3,[r11,#-20] str r3,[r11,#-20]
#if __ARM_ARCH__>=7 #ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM itt eq @ Thumb2 thing, sanity check in ARM
#endif #endif
subeq r2,r11,#256 subeq r2,r11,#256
@ -744,7 +747,7 @@ _armv4_AES_set_enc2dec_key:
ldr r12,[r0,#240] ldr r12,[r0,#240]
mov r7,r0 @ input mov r7,r0 @ input
add r8,r0,r12,lsl#4 add r8,r0,r12,lsl#4
mov r11,r1 @ ouput mov r11,r1 @ output
add r10,r1,r12,lsl#4 add r10,r1,r12,lsl#4
str r12,[r1,#240] str r12,[r1,#240]
@ -939,10 +942,10 @@ AES_Td:
.type asm_AES_decrypt,%function .type asm_AES_decrypt,%function
.align 5 .align 5
asm_AES_decrypt: asm_AES_decrypt:
#if __ARM_ARCH__<7 #ifndef __thumb2__
sub r3,pc,#8 @ asm_AES_decrypt sub r3,pc,#8 @ asm_AES_decrypt
#else #else
adr r3,asm_AES_decrypt adr r3,.
#endif #endif
stmdb sp!,{r1,r4-r12,lr} stmdb sp!,{r1,r4-r12,lr}
#ifdef __APPLE__ #ifdef __APPLE__
@ -1195,6 +1198,4 @@ _armv4_AES_decrypt:
.byte 65,69,83,32,102,111,114,32,65,82,77,118,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .byte 65,69,83,32,102,111,114,32,65,82,77,118,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2 .align 2
.align 2 .align 2
#endif
#endif #endif

View File

@ -3,20 +3,21 @@
#if __ARM_MAX_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.text .text
.arch armv7-a .arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-)
.fpu neon .fpu neon
.code 32 .code 32
#undef __thumb2__
.align 5 .align 5
.Lrcon: .Lrcon:
.long 0x01,0x01,0x01,0x01 .long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b .long 0x1b,0x1b,0x1b,0x1b
.globl aes_v8_set_encrypt_key .globl aes_hw_set_encrypt_key
.hidden aes_v8_set_encrypt_key .hidden aes_hw_set_encrypt_key
.type aes_v8_set_encrypt_key,%function .type aes_hw_set_encrypt_key,%function
.align 5 .align 5
aes_v8_set_encrypt_key: aes_hw_set_encrypt_key:
.Lenc_key: .Lenc_key:
mov r3,#-1 mov r3,#-1
cmp r0,#0 cmp r0,#0
@ -181,13 +182,13 @@ aes_v8_set_encrypt_key:
mov r0,r3 @ return value mov r0,r3 @ return value
bx lr bx lr
.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key .size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
.globl aes_v8_set_decrypt_key .globl aes_hw_set_decrypt_key
.hidden aes_v8_set_decrypt_key .hidden aes_hw_set_decrypt_key
.type aes_v8_set_decrypt_key,%function .type aes_hw_set_decrypt_key,%function
.align 5 .align 5
aes_v8_set_decrypt_key: aes_hw_set_decrypt_key:
stmdb sp!,{r4,lr} stmdb sp!,{r4,lr}
bl .Lenc_key bl .Lenc_key
@ -220,12 +221,12 @@ aes_v8_set_decrypt_key:
eor r0,r0,r0 @ return value eor r0,r0,r0 @ return value
.Ldec_key_abort: .Ldec_key_abort:
ldmia sp!,{r4,pc} ldmia sp!,{r4,pc}
.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key .size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
.globl aes_v8_encrypt .globl aes_hw_encrypt
.hidden aes_v8_encrypt .hidden aes_hw_encrypt
.type aes_v8_encrypt,%function .type aes_hw_encrypt,%function
.align 5 .align 5
aes_v8_encrypt: aes_hw_encrypt:
ldr r3,[r2,#240] ldr r3,[r2,#240]
vld1.32 {q0},[r2]! vld1.32 {q0},[r2]!
vld1.8 {q2},[r0] vld1.8 {q2},[r0]
@ -250,12 +251,12 @@ aes_v8_encrypt:
vst1.8 {q2},[r1] vst1.8 {q2},[r1]
bx lr bx lr
.size aes_v8_encrypt,.-aes_v8_encrypt .size aes_hw_encrypt,.-aes_hw_encrypt
.globl aes_v8_decrypt .globl aes_hw_decrypt
.hidden aes_v8_decrypt .hidden aes_hw_decrypt
.type aes_v8_decrypt,%function .type aes_hw_decrypt,%function
.align 5 .align 5
aes_v8_decrypt: aes_hw_decrypt:
ldr r3,[r2,#240] ldr r3,[r2,#240]
vld1.32 {q0},[r2]! vld1.32 {q0},[r2]!
vld1.8 {q2},[r0] vld1.8 {q2},[r0]
@ -280,12 +281,12 @@ aes_v8_decrypt:
vst1.8 {q2},[r1] vst1.8 {q2},[r1]
bx lr bx lr
.size aes_v8_decrypt,.-aes_v8_decrypt .size aes_hw_decrypt,.-aes_hw_decrypt
.globl aes_v8_cbc_encrypt .globl aes_hw_cbc_encrypt
.hidden aes_v8_cbc_encrypt .hidden aes_hw_cbc_encrypt
.type aes_v8_cbc_encrypt,%function .type aes_hw_cbc_encrypt,%function
.align 5 .align 5
aes_v8_cbc_encrypt: aes_hw_cbc_encrypt:
mov ip,sp mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,lr} stmdb sp!,{r4,r5,r6,r7,r8,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
@ -573,12 +574,12 @@ aes_v8_cbc_encrypt:
.Lcbc_abort: .Lcbc_abort:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,pc} ldmia sp!,{r4,r5,r6,r7,r8,pc}
.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt .size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
.globl aes_v8_ctr32_encrypt_blocks .globl aes_hw_ctr32_encrypt_blocks
.hidden aes_v8_ctr32_encrypt_blocks .hidden aes_hw_ctr32_encrypt_blocks
.type aes_v8_ctr32_encrypt_blocks,%function .type aes_hw_ctr32_encrypt_blocks,%function
.align 5 .align 5
aes_v8_ctr32_encrypt_blocks: aes_hw_ctr32_encrypt_blocks:
mov ip,sp mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
@ -757,6 +758,6 @@ aes_v8_ctr32_encrypt_blocks:
.Lctr32_done: .Lctr32_done:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks .size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
#endif #endif
#endif #endif

View File

@ -0,0 +1,956 @@
#if defined(__arm__)
#include <openssl/arm_arch.h>
.text
#if defined(__thumb2__)
.syntax unified
.thumb
#else
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-.Lbn_mul_mont
#endif
.globl bn_mul_mont
.hidden bn_mul_mont
.type bn_mul_mont,%function
.align 5
bn_mul_mont:
.Lbn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne .Lialu
adr r0,.Lbn_mul_mont
ldr r2,.LOPENSSL_armcap
ldr r0,[r0,r2]
#ifdef __APPLE__
ldr r0,[r0]
#endif
tst r0,#ARMV7_NEON @ NEON available?
ldmia sp, {r0,r2}
beq .Lialu
add sp,sp,#8
b bn_mul8x_mont_neon
.align 4
.Lialu:
#endif
cmp ip,#2
mov r0,ip @ load num
#ifdef __thumb2__
ittt lt
#endif
movlt r0,#0
addlt sp,sp,#2*4
blt .Labrt
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
mov r0,r0,lsl#2 @ rescale r0 for byte count
sub sp,sp,r0 @ alloca(4*num)
sub sp,sp,#4 @ +extra dword
sub r0,r0,#4 @ "num=num-1"
add r4,r2,r0 @ &bp[num-1]
add r0,sp,r0 @ r0 to point at &tp[num-1]
ldr r8,[r0,#14*4] @ &n0
ldr r2,[r2] @ bp[0]
ldr r5,[r1],#4 @ ap[0],ap++
ldr r6,[r3],#4 @ np[0],np++
ldr r8,[r8] @ *n0
str r4,[r0,#15*4] @ save &bp[num]
umull r10,r11,r5,r2 @ ap[0]*bp[0]
str r8,[r0,#14*4] @ save n0 value
mul r8,r10,r8 @ "tp[0]"*n0
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
mov r4,sp
.L1st:
ldr r5,[r1],#4 @ ap[j],ap++
mov r10,r11
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne .L1st
adds r12,r12,r11
ldr r4,[r0,#13*4] @ restore bp
mov r14,#0
ldr r8,[r0,#14*4] @ restore n0
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
mov r7,sp
str r14,[r0,#4] @ tp[num]=
.Louter:
sub r7,r0,r7 @ "original" r0-1 value
sub r1,r1,r7 @ "rewind" ap to &ap[1]
ldr r2,[r4,#4]! @ *(++bp)
sub r3,r3,r7 @ "rewind" np to &np[1]
ldr r5,[r1,#-4] @ ap[0]
ldr r10,[sp] @ tp[0]
ldr r6,[r3,#-4] @ np[0]
ldr r7,[sp,#4] @ tp[1]
mov r11,#0
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
str r4,[r0,#13*4] @ save bp
mul r8,r10,r8
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
mov r4,sp
.Linner:
ldr r5,[r1],#4 @ ap[j],ap++
adds r10,r11,r7 @ +=tp[j]
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adc r11,r11,#0
ldr r7,[r4,#8] @ tp[j+1]
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne .Linner
adds r12,r12,r11
mov r14,#0
ldr r4,[r0,#13*4] @ restore bp
adc r14,r14,#0
ldr r8,[r0,#14*4] @ restore n0
adds r12,r12,r7
ldr r7,[r0,#15*4] @ restore &bp[num]
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
str r14,[r0,#4] @ tp[num]=
cmp r4,r7
#ifdef __thumb2__
itt ne
#endif
movne r7,sp
bne .Louter
ldr r2,[r0,#12*4] @ pull rp
mov r5,sp
add r0,r0,#4 @ r0 to point at &tp[num]
sub r5,r0,r5 @ "original" num value
mov r4,sp @ "rewind" r4
mov r1,r4 @ "borrow" r1
sub r3,r3,r5 @ "rewind" r3 to &np[0]
subs r7,r7,r7 @ "clear" carry flag
.Lsub: ldr r7,[r4],#4
ldr r6,[r3],#4
sbcs r7,r7,r6 @ tp[j]-np[j]
str r7,[r2],#4 @ rp[j]=
teq r4,r0 @ preserve carry
bne .Lsub
sbcs r14,r14,#0 @ upmost carry
mov r4,sp @ "rewind" r4
sub r2,r2,r5 @ "rewind" r2
and r1,r4,r14
bic r3,r2,r14
orr r1,r1,r3 @ ap=borrow?tp:rp
.Lcopy: ldr r7,[r1],#4 @ copy or in-place refresh
str sp,[r4],#4 @ zap tp
str r7,[r2],#4
cmp r4,r0
bne .Lcopy
mov sp,r0
add sp,sp,#4 @ skip over tp[num+1]
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
.Labrt:
#if __ARM_ARCH__>=5
bx lr @ bx lr
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size bn_mul_mont,.-bn_mul_mont
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.type bn_mul8x_mont_neon,%function
.align 5
bn_mul8x_mont_neon:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load rest of parameter block
mov ip,sp
cmp r5,#8
bhi .LNEON_8n
@ special case for r5==8, everything is in register bank...
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
sub r7,sp,r5,lsl#4
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
and r7,r7,#-64
vld1.32 {d30[0]}, [r4,:32]
mov sp,r7 @ alloca
vzip.16 d28,d8
vmull.u32 q6,d28,d0[0]
vmull.u32 q7,d28,d0[1]
vmull.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmull.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
vmul.u32 d29,d29,d30
vmull.u32 q10,d28,d2[0]
vld1.32 {d4,d5,d6,d7}, [r3]!
vmull.u32 q11,d28,d2[1]
vmull.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmull.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
sub r9,r5,#1
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
b .LNEON_outer8
.align 4
.LNEON_outer8:
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
vzip.16 d28,d8
vadd.u64 d12,d12,d10
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
subs r9,r9,#1
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
bne .LNEON_outer8
vadd.u64 d12,d12,d10
mov r7,sp
vshr.u64 d10,d12,#16
mov r8,r5
vadd.u64 d13,d13,d10
add r6,sp,#96
vshr.u64 d10,d13,#16
vzip.16 d12,d13
b .LNEON_tail_entry
.align 4
.LNEON_8n:
veor q6,q6,q6
sub r7,sp,#128
veor q7,q7,q7
sub r7,r7,r5,lsl#4
veor q8,q8,q8
and r7,r7,#-64
veor q9,q9,q9
mov sp,r7 @ alloca
veor q10,q10,q10
add r7,r7,#256
veor q11,q11,q11
sub r8,r5,#8
veor q12,q12,q12
veor q13,q13,q13
.LNEON_8n_init:
vst1.64 {q6,q7},[r7,:256]!
subs r8,r8,#8
vst1.64 {q8,q9},[r7,:256]!
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12,q13},[r7,:256]!
bne .LNEON_8n_init
add r6,sp,#256
vld1.32 {d0,d1,d2,d3},[r1]!
add r10,sp,#8
vld1.32 {d30[0]},[r4,:32]
mov r9,r5
b .LNEON_8n_outer
.align 4
.LNEON_8n_outer:
vld1.32 {d28[0]},[r2,:32]! @ *b++
veor d8,d8,d8
vzip.16 d28,d8
add r7,sp,#128
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
vmlal.u32 q10,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q11,d28,d2[1]
vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q6,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q7,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q8,d29,d5[0]
vshr.u64 d12,d12,#16
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vadd.u64 d12,d12,d13
vmlal.u32 q11,d29,d6[1]
vshr.u64 d12,d12,#16
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vadd.u64 d14,d14,d12
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0]
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]!
vmlal.u32 q8,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q9,d28,d1[0]
vshl.i64 d29,d15,#16
vmlal.u32 q10,d28,d1[1]
vadd.u64 d29,d29,d14
vmlal.u32 q11,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q12,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1]
vmlal.u32 q13,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q7,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q8,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q9,d29,d5[0]
vshr.u64 d14,d14,#16
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vadd.u64 d14,d14,d15
vmlal.u32 q12,d29,d6[1]
vshr.u64 d14,d14,#16
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vadd.u64 d16,d16,d14
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1]
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]!
vmlal.u32 q9,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q10,d28,d1[0]
vshl.i64 d29,d17,#16
vmlal.u32 q11,d28,d1[1]
vadd.u64 d29,d29,d16
vmlal.u32 q12,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q13,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2]
vmlal.u32 q6,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q8,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q9,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q10,d29,d5[0]
vshr.u64 d16,d16,#16
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vadd.u64 d16,d16,d17
vmlal.u32 q13,d29,d6[1]
vshr.u64 d16,d16,#16
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vadd.u64 d18,d18,d16
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2]
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]!
vmlal.u32 q10,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q11,d28,d1[0]
vshl.i64 d29,d19,#16
vmlal.u32 q12,d28,d1[1]
vadd.u64 d29,d29,d18
vmlal.u32 q13,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q6,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3]
vmlal.u32 q7,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q9,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q10,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q11,d29,d5[0]
vshr.u64 d18,d18,#16
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vadd.u64 d18,d18,d19
vmlal.u32 q6,d29,d6[1]
vshr.u64 d18,d18,#16
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vadd.u64 d20,d20,d18
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3]
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]!
vmlal.u32 q11,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q12,d28,d1[0]
vshl.i64 d29,d21,#16
vmlal.u32 q13,d28,d1[1]
vadd.u64 d29,d29,d20
vmlal.u32 q6,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q7,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4]
vmlal.u32 q8,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q10,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q11,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q12,d29,d5[0]
vshr.u64 d20,d20,#16
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vadd.u64 d20,d20,d21
vmlal.u32 q7,d29,d6[1]
vshr.u64 d20,d20,#16
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vadd.u64 d22,d22,d20
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4]
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]!
vmlal.u32 q12,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q13,d28,d1[0]
vshl.i64 d29,d23,#16
vmlal.u32 q6,d28,d1[1]
vadd.u64 d29,d29,d22
vmlal.u32 q7,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q8,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5]
vmlal.u32 q9,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q11,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q12,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q13,d29,d5[0]
vshr.u64 d22,d22,#16
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vadd.u64 d22,d22,d23
vmlal.u32 q8,d29,d6[1]
vshr.u64 d22,d22,#16
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vadd.u64 d24,d24,d22
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5]
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]!
vmlal.u32 q13,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q6,d28,d1[0]
vshl.i64 d29,d25,#16
vmlal.u32 q7,d28,d1[1]
vadd.u64 d29,d29,d24
vmlal.u32 q8,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q9,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6]
vmlal.u32 q10,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q12,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q13,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q6,d29,d5[0]
vshr.u64 d24,d24,#16
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vadd.u64 d24,d24,d25
vmlal.u32 q9,d29,d6[1]
vshr.u64 d24,d24,#16
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vadd.u64 d26,d26,d24
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6]
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]!
vmlal.u32 q6,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q7,d28,d1[0]
vshl.i64 d29,d27,#16
vmlal.u32 q8,d28,d1[1]
vadd.u64 d29,d29,d26
vmlal.u32 q9,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7]
vmlal.u32 q11,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q12,d28,d3[1]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q13,d29,d4[0]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q6,d29,d4[1]
vmlal.u32 q7,d29,d5[0]
vshr.u64 d26,d26,#16
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vadd.u64 d26,d26,d27
vmlal.u32 q10,d29,d6[1]
vshr.u64 d26,d26,#16
vmlal.u32 q11,d29,d7[0]
vmlal.u32 q12,d29,d7[1]
vadd.u64 d12,d12,d26
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
add r10,sp,#8 @ rewind
sub r8,r5,#8
b .LNEON_8n_inner
.align 4
.LNEON_8n_inner:
subs r8,r8,#8
vmlal.u32 q6,d28,d0[0]
vld1.64 {q13},[r6,:128]
vmlal.u32 q7,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0]
vmlal.u32 q8,d28,d1[0]
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q9,d28,d1[1]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmlal.u32 q11,d29,d6[1]
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vst1.64 {q6},[r7,:128]!
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]
vmlal.u32 q8,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1]
vmlal.u32 q9,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d1[1]
vmlal.u32 q11,d28,d2[0]
vmlal.u32 q12,d28,d2[1]
vmlal.u32 q13,d28,d3[0]
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2]
vmlal.u32 q7,d29,d4[0]
vmlal.u32 q8,d29,d4[1]
vmlal.u32 q9,d29,d5[0]
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vmlal.u32 q12,d29,d6[1]
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vst1.64 {q7},[r7,:128]!
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]
vmlal.u32 q9,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2]
vmlal.u32 q10,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q11,d28,d1[1]
vmlal.u32 q12,d28,d2[0]
vmlal.u32 q13,d28,d2[1]
vmlal.u32 q6,d28,d3[0]
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3]
vmlal.u32 q8,d29,d4[0]
vmlal.u32 q9,d29,d4[1]
vmlal.u32 q10,d29,d5[0]
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vmlal.u32 q13,d29,d6[1]
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vst1.64 {q8},[r7,:128]!
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]
vmlal.u32 q10,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3]
vmlal.u32 q11,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q12,d28,d1[1]
vmlal.u32 q13,d28,d2[0]
vmlal.u32 q6,d28,d2[1]
vmlal.u32 q7,d28,d3[0]
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4]
vmlal.u32 q9,d29,d4[0]
vmlal.u32 q10,d29,d4[1]
vmlal.u32 q11,d29,d5[0]
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vmlal.u32 q6,d29,d6[1]
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vst1.64 {q9},[r7,:128]!
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]
vmlal.u32 q11,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4]
vmlal.u32 q12,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q13,d28,d1[1]
vmlal.u32 q6,d28,d2[0]
vmlal.u32 q7,d28,d2[1]
vmlal.u32 q8,d28,d3[0]
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5]
vmlal.u32 q10,d29,d4[0]
vmlal.u32 q11,d29,d4[1]
vmlal.u32 q12,d29,d5[0]
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vmlal.u32 q7,d29,d6[1]
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vst1.64 {q10},[r7,:128]!
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]
vmlal.u32 q12,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5]
vmlal.u32 q13,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q6,d28,d1[1]
vmlal.u32 q7,d28,d2[0]
vmlal.u32 q8,d28,d2[1]
vmlal.u32 q9,d28,d3[0]
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6]
vmlal.u32 q11,d29,d4[0]
vmlal.u32 q12,d29,d4[1]
vmlal.u32 q13,d29,d5[0]
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vmlal.u32 q8,d29,d6[1]
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vst1.64 {q11},[r7,:128]!
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]
vmlal.u32 q13,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6]
vmlal.u32 q6,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q7,d28,d1[1]
vmlal.u32 q8,d28,d2[0]
vmlal.u32 q9,d28,d2[1]
vmlal.u32 q10,d28,d3[0]
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7]
vmlal.u32 q12,d29,d4[0]
vmlal.u32 q13,d29,d4[1]
vmlal.u32 q6,d29,d5[0]
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vmlal.u32 q9,d29,d6[1]
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vst1.64 {q12},[r7,:128]!
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]
vmlal.u32 q6,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7]
vmlal.u32 q7,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q8,d28,d1[1]
vmlal.u32 q9,d28,d2[0]
vmlal.u32 q10,d28,d2[1]
vmlal.u32 q11,d28,d3[0]
vmlal.u32 q12,d28,d3[1]
it eq
subeq r1,r1,r5,lsl#2 @ rewind
vmlal.u32 q13,d29,d4[0]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q6,d29,d4[1]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q7,d29,d5[0]
add r10,sp,#8 @ rewind
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vmlal.u32 q10,d29,d6[1]
vmlal.u32 q11,d29,d7[0]
vst1.64 {q13},[r7,:128]!
vmlal.u32 q12,d29,d7[1]
bne .LNEON_8n_inner
add r6,sp,#128
vst1.64 {q6,q7},[r7,:256]!
veor q2,q2,q2 @ d4-d5
vst1.64 {q8,q9},[r7,:256]!
veor q3,q3,q3 @ d6-d7
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12},[r7,:128]
subs r9,r9,#8
vld1.64 {q6,q7},[r6,:256]!
vld1.64 {q8,q9},[r6,:256]!
vld1.64 {q10,q11},[r6,:256]!
vld1.64 {q12,q13},[r6,:256]!
itt ne
subne r3,r3,r5,lsl#2 @ rewind
bne .LNEON_8n_outer
add r7,sp,#128
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
vshr.u64 d10,d12,#16
vst1.64 {q2,q3},[sp,:256]!
vadd.u64 d13,d13,d10
vst1.64 {q2,q3}, [sp,:256]!
vshr.u64 d10,d13,#16
vst1.64 {q2,q3}, [sp,:256]!
vzip.16 d12,d13
mov r8,r5
b .LNEON_tail_entry
.align 4
.LNEON_tail:
vadd.u64 d12,d12,d10
vshr.u64 d10,d12,#16
vld1.64 {q8,q9}, [r6, :256]!
vadd.u64 d13,d13,d10
vld1.64 {q10,q11}, [r6, :256]!
vshr.u64 d10,d13,#16
vld1.64 {q12,q13}, [r6, :256]!
vzip.16 d12,d13
.LNEON_tail_entry:
vadd.u64 d14,d14,d10
vst1.32 {d12[0]}, [r7, :32]!
vshr.u64 d10,d14,#16
vadd.u64 d15,d15,d10
vshr.u64 d10,d15,#16
vzip.16 d14,d15
vadd.u64 d16,d16,d10
vst1.32 {d14[0]}, [r7, :32]!
vshr.u64 d10,d16,#16
vadd.u64 d17,d17,d10
vshr.u64 d10,d17,#16
vzip.16 d16,d17
vadd.u64 d18,d18,d10
vst1.32 {d16[0]}, [r7, :32]!
vshr.u64 d10,d18,#16
vadd.u64 d19,d19,d10
vshr.u64 d10,d19,#16
vzip.16 d18,d19
vadd.u64 d20,d20,d10
vst1.32 {d18[0]}, [r7, :32]!
vshr.u64 d10,d20,#16
vadd.u64 d21,d21,d10
vshr.u64 d10,d21,#16
vzip.16 d20,d21
vadd.u64 d22,d22,d10
vst1.32 {d20[0]}, [r7, :32]!
vshr.u64 d10,d22,#16
vadd.u64 d23,d23,d10
vshr.u64 d10,d23,#16
vzip.16 d22,d23
vadd.u64 d24,d24,d10
vst1.32 {d22[0]}, [r7, :32]!
vshr.u64 d10,d24,#16
vadd.u64 d25,d25,d10
vshr.u64 d10,d25,#16
vzip.16 d24,d25
vadd.u64 d26,d26,d10
vst1.32 {d24[0]}, [r7, :32]!
vshr.u64 d10,d26,#16
vadd.u64 d27,d27,d10
vshr.u64 d10,d27,#16
vzip.16 d26,d27
vld1.64 {q6,q7}, [r6, :256]!
subs r8,r8,#8
vst1.32 {d26[0]}, [r7, :32]!
bne .LNEON_tail
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
sub r3,r3,r5,lsl#2 @ rewind r3
subs r1,sp,#0 @ clear carry flag
add r2,sp,r5,lsl#2
.LNEON_sub:
ldmia r1!, {r4,r5,r6,r7}
ldmia r3!, {r8,r9,r10,r11}
sbcs r8, r4,r8
sbcs r9, r5,r9
sbcs r10,r6,r10
sbcs r11,r7,r11
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne .LNEON_sub
ldr r10, [r1] @ load top-most bit
mov r11,sp
veor q0,q0,q0
sub r11,r2,r11 @ this is num*4
veor q1,q1,q1
mov r1,sp
sub r0,r0,r11 @ rewind r0
mov r3,r2 @ second 3/4th of frame
sbcs r10,r10,#0 @ result is carry flag
.LNEON_copy_n_zap:
ldmia r1!, {r4,r5,r6,r7}
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r3,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
ldmia r1, {r4,r5,r6,r7}
stmia r0!, {r8,r9,r10,r11}
sub r1,r1,#16
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r1,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne .LNEON_copy_n_zap
mov sp,ip
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
bx lr @ bx lr
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
#endif
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
.hidden OPENSSL_armcap_P
#endif
#endif

View File

@ -1,4 +1,11 @@
#if defined(__arm__) #if defined(__arm__)
@ Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
@
@ Licensed under the OpenSSL license (the "License"). You may not use
@ this file except in compliance with the License. You can obtain a copy
@ in the file LICENSE in the source distribution or at
@ https://www.openssl.org/source/license.html
@ ==================================================================== @ ====================================================================
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@ -77,12 +84,13 @@
.thumb .thumb
#else #else
.code 32 .code 32
# undef __thumb2__
#endif #endif
.type _bsaes_decrypt8,%function .type _bsaes_decrypt8,%function
.align 4 .align 4
_bsaes_decrypt8: _bsaes_decrypt8:
adr r6,_bsaes_decrypt8 adr r6,.
vldmia r4!, {q9} @ round 0 key vldmia r4!, {q9} @ round 0 key
#ifdef __APPLE__ #ifdef __APPLE__
adr r6,.LM0ISR adr r6,.LM0ISR
@ -573,7 +581,7 @@ _bsaes_const:
.type _bsaes_encrypt8,%function .type _bsaes_encrypt8,%function
.align 4 .align 4
_bsaes_encrypt8: _bsaes_encrypt8:
adr r6,_bsaes_encrypt8 adr r6,.
vldmia r4!, {q9} @ round 0 key vldmia r4!, {q9} @ round 0 key
#ifdef __APPLE__ #ifdef __APPLE__
adr r6,.LM0SR adr r6,.LM0SR
@ -1008,7 +1016,7 @@ _bsaes_encrypt8_bitslice:
.type _bsaes_key_convert,%function .type _bsaes_key_convert,%function
.align 4 .align 4
_bsaes_key_convert: _bsaes_key_convert:
adr r6,_bsaes_key_convert adr r6,.
vld1.8 {q7}, [r4]! @ load round 0 key vld1.8 {q7}, [r4]! @ load round 0 key
#ifdef __APPLE__ #ifdef __APPLE__
adr r6,.LM0 adr r6,.LM0
@ -1313,7 +1321,7 @@ bsaes_cbc_encrypt:
vmov q4,q15 @ just in case ensure that IV vmov q4,q15 @ just in case ensure that IV
vmov q5,q0 @ and input are preserved vmov q5,q0 @ and input are preserved
bl AES_decrypt bl AES_decrypt
vld1.8 {q0}, [r9,:64] @ load result vld1.8 {q0}, [r9] @ load result
veor q0, q0, q4 @ ^= IV veor q0, q0, q4 @ ^= IV
vmov q15, q5 @ q5 holds input vmov q15, q5 @ q5 holds input
vst1.8 {q0}, [r10] @ write output vst1.8 {q0}, [r10] @ write output
@ -1843,8 +1851,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done b .Lxts_enc_done
.align 4 .align 4
.Lxts_enc_6: .Lxts_enc_6:
vst1.64 {q14}, [r0,:128] @ next round tweak
veor q4, q4, q12 veor q4, q4, q12
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -1880,8 +1886,6 @@ bsaes_xts_encrypt:
.align 5 .align 5
.Lxts_enc_5: .Lxts_enc_5:
vst1.64 {q13}, [r0,:128] @ next round tweak
veor q3, q3, q11 veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -1910,8 +1914,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done b .Lxts_enc_done
.align 4 .align 4
.Lxts_enc_4: .Lxts_enc_4:
vst1.64 {q12}, [r0,:128] @ next round tweak
veor q2, q2, q10 veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -1937,8 +1939,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done b .Lxts_enc_done
.align 4 .align 4
.Lxts_enc_3: .Lxts_enc_3:
vst1.64 {q11}, [r0,:128] @ next round tweak
veor q1, q1, q9 veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -1963,8 +1963,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done b .Lxts_enc_done
.align 4 .align 4
.Lxts_enc_2: .Lxts_enc_2:
vst1.64 {q10}, [r0,:128] @ next round tweak
veor q0, q0, q8 veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -1987,7 +1985,7 @@ bsaes_xts_encrypt:
.align 4 .align 4
.Lxts_enc_1: .Lxts_enc_1:
mov r0, sp mov r0, sp
veor q0, q8 veor q0, q0, q8
mov r1, sp mov r1, sp
vst1.8 {q0}, [sp,:128] vst1.8 {q0}, [sp,:128]
mov r2, r10 mov r2, r10
@ -2376,8 +2374,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done b .Lxts_dec_done
.align 4 .align 4
.Lxts_dec_5: .Lxts_dec_5:
vst1.64 {q13}, [r0,:128] @ next round tweak
veor q3, q3, q11 veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -2406,8 +2402,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done b .Lxts_dec_done
.align 4 .align 4
.Lxts_dec_4: .Lxts_dec_4:
vst1.64 {q12}, [r0,:128] @ next round tweak
veor q2, q2, q10 veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -2433,8 +2427,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done b .Lxts_dec_done
.align 4 .align 4
.Lxts_dec_3: .Lxts_dec_3:
vst1.64 {q11}, [r0,:128] @ next round tweak
veor q1, q1, q9 veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -2459,8 +2451,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done b .Lxts_dec_done
.align 4 .align 4
.Lxts_dec_2: .Lxts_dec_2:
vst1.64 {q10}, [r0,:128] @ next round tweak
veor q0, q0, q8 veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule add r4, sp, #0x90 @ pass key schedule
@ -2483,12 +2473,12 @@ bsaes_xts_decrypt:
.align 4 .align 4
.Lxts_dec_1: .Lxts_dec_1:
mov r0, sp mov r0, sp
veor q0, q8 veor q0, q0, q8
mov r1, sp mov r1, sp
vst1.8 {q0}, [sp,:128] vst1.8 {q0}, [sp,:128]
mov r5, r2 @ preserve magic
mov r2, r10 mov r2, r10
mov r4, r3 @ preserve fp mov r4, r3 @ preserve fp
mov r5, r2 @ preserve magic
bl AES_decrypt bl AES_decrypt

View File

@ -1,10 +1,15 @@
#if defined(__arm__) #if defined(__arm__)
#include <openssl/arm_arch.h> #include <openssl/arm_arch.h>
.syntax unified
.text .text
#if defined(__thumb2__) || defined(__clang__)
.syntax unified
#endif
#if defined(__thumb2__)
.thumb
#else
.code 32 .code 32
#endif
#ifdef __clang__ #ifdef __clang__
#define ldrplb ldrbpl #define ldrplb ldrbpl
@ -22,20 +27,28 @@ rem_4bit:
.type rem_4bit_get,%function .type rem_4bit_get,%function
rem_4bit_get: rem_4bit_get:
sub r2,pc,#8 #if defined(__thumb2__)
sub r2,r2,#32 @ &rem_4bit adr r2,rem_4bit
#else
sub r2,pc,#8+32 @ &rem_4bit
#endif
b .Lrem_4bit_got b .Lrem_4bit_got
nop nop
nop
.size rem_4bit_get,.-rem_4bit_get .size rem_4bit_get,.-rem_4bit_get
.globl gcm_ghash_4bit .globl gcm_ghash_4bit
.hidden gcm_ghash_4bit .hidden gcm_ghash_4bit
.type gcm_ghash_4bit,%function .type gcm_ghash_4bit,%function
.align 4
gcm_ghash_4bit: gcm_ghash_4bit:
sub r12,pc,#8 #if defined(__thumb2__)
adr r12,rem_4bit
#else
sub r12,pc,#8+48 @ &rem_4bit
#endif
add r3,r2,r3 @ r3 to point at the end add r3,r2,r3 @ r3 to point at the end
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
sub r12,r12,#48 @ &rem_4bit
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ... ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
@ -82,7 +95,10 @@ gcm_ghash_4bit:
eor r5,r5,r6,lsl#28 eor r5,r5,r6,lsl#28
ldrh r8,[sp,r12] @ rem_4bit[rem] ldrh r8,[sp,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4 eor r6,r10,r6,lsr#4
ldrbpl r12,[r2,r3] #ifdef __thumb2__
it pl
#endif
ldrplb r12,[r2,r3]
eor r6,r6,r7,lsl#28 eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4 eor r7,r11,r7,lsr#4
@ -92,15 +108,24 @@ gcm_ghash_4bit:
add r14,r14,r14 add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi] ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4 eor r4,r8,r4,lsr#4
ldrbpl r8,[r0,r3] #ifdef __thumb2__
it pl
#endif
ldrplb r8,[r0,r3]
eor r4,r4,r5,lsl#28 eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4 eor r5,r9,r5,lsr#4
ldrh r9,[sp,r14] ldrh r9,[sp,r14]
eor r5,r5,r6,lsl#28 eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4 eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28 eor r6,r6,r7,lsl#28
#ifdef __thumb2__
it pl
#endif
eorpl r12,r12,r8 eorpl r12,r12,r8
eor r7,r11,r7,lsr#4 eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0 andpl r14,r12,#0xf0
andpl r12,r12,#0x0f andpl r12,r12,#0x0f
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem] eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
@ -138,7 +163,11 @@ gcm_ghash_4bit:
strb r10,[r0,#8+1] strb r10,[r0,#8+1]
strb r11,[r0,#8] strb r11,[r0,#8]
#endif #endif
ldrbne r12,[r2,#15]
#ifdef __thumb2__
it ne
#endif
ldrneb r12,[r2,#15]
#if __ARM_ARCH__>=7 && defined(__ARMEL__) #if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6 rev r6,r6
str r6,[r0,#4] str r6,[r0,#4]
@ -226,7 +255,10 @@ gcm_gmult_4bit:
eor r5,r5,r6,lsl#28 eor r5,r5,r6,lsl#28
ldrh r8,[r2,r12] @ rem_4bit[rem] ldrh r8,[r2,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4 eor r6,r10,r6,lsr#4
ldrbpl r12,[r0,r3] #ifdef __thumb2__
it pl
#endif
ldrplb r12,[r0,r3]
eor r6,r6,r7,lsl#28 eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4 eor r7,r11,r7,lsr#4
@ -243,6 +275,9 @@ gcm_gmult_4bit:
eor r6,r10,r6,lsr#4 eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28 eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4 eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0 andpl r14,r12,#0xf0
andpl r12,r12,#0x0f andpl r12,r12,#0x0f
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]

View File

@ -4,6 +4,7 @@
.text .text
.fpu neon .fpu neon
.code 32 .code 32
#undef __thumb2__
.globl gcm_init_v8 .globl gcm_init_v8
.hidden gcm_init_v8 .hidden gcm_init_v8
.type gcm_init_v8,%function .type gcm_init_v8,%function

View File

@ -2,7 +2,12 @@
#include <openssl/arm_arch.h> #include <openssl/arm_arch.h>
.text .text
#if defined(__thumb2__)
.syntax unified
.thumb
#else
.code 32 .code 32
#endif
.globl sha1_block_data_order .globl sha1_block_data_order
.hidden sha1_block_data_order .hidden sha1_block_data_order
@ -11,7 +16,8 @@
.align 5 .align 5
sha1_block_data_order: sha1_block_data_order:
#if __ARM_MAX_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
sub r3,pc,#8 @ sha1_block_data_order .Lsha1_block:
adr r3,.Lsha1_block
ldr r12,.LOPENSSL_armcap ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P ldr r12,[r3,r12] @ OPENSSL_armcap_P
#ifdef __APPLE__ #ifdef __APPLE__
@ -158,7 +164,12 @@ sha1_block_data_order:
eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
str r9,[r14,#-4]! str r9,[r14,#-4]!
add r3,r3,r10 @ E+=F_00_19(B,C,D) add r3,r3,r10 @ E+=F_00_19(B,C,D)
#if defined(__thumb2__)
mov r12,sp
teq r14,r12
#else
teq r14,sp teq r14,sp
#endif
bne .L_00_15 @ [((11+4)*5+2)*3] bne .L_00_15 @ [((11+4)*5+2)*3]
sub sp,sp,#25*4 sub sp,sp,#25*4
#if __ARM_ARCH__<7 #if __ARM_ARCH__<7
@ -338,7 +349,12 @@ sha1_block_data_order:
@ F_xx_xx @ F_xx_xx
add r3,r3,r9 @ E+=X[i] add r3,r3,r9 @ E+=X[i]
add r3,r3,r10 @ E+=F_20_39(B,C,D) add r3,r3,r10 @ E+=F_20_39(B,C,D)
#if defined(__thumb2__)
mov r12,sp
teq r14,r12
#else
teq r14,sp @ preserve carry teq r14,sp @ preserve carry
#endif
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
@ -430,7 +446,12 @@ sha1_block_data_order:
add r3,r3,r9 @ E+=X[i] add r3,r3,r9 @ E+=X[i]
add r3,r3,r10 @ E+=F_40_59(B,C,D) add r3,r3,r10 @ E+=F_40_59(B,C,D)
add r3,r3,r11,ror#2 add r3,r3,r11,ror#2
#if defined(__thumb2__)
mov r12,sp
teq r14,r12
#else
teq r14,sp teq r14,sp
#endif
bne .L_40_59 @ [+((12+5)*5+2)*4] bne .L_40_59 @ [+((12+5)*5+2)*4]
ldr r8,.LK_60_79 ldr r8,.LK_60_79
@ -466,7 +487,7 @@ sha1_block_data_order:
.LK_60_79:.word 0xca62c1d6 .LK_60_79:.word 0xca62c1d6
#if __ARM_MAX_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap: .LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha1_block_data_order .word OPENSSL_armcap_P-.Lsha1_block
#endif #endif
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2 .align 2
@ -484,12 +505,12 @@ sha1_block_data_order_neon:
@ dmb @ errata #451034 on early Cortex A8 @ dmb @ errata #451034 on early Cortex A8
@ vstmdb sp!,{d8-d15} @ ABI specification says so @ vstmdb sp!,{d8-d15} @ ABI specification says so
mov r14,sp mov r14,sp
sub sp,sp,#64 @ alloca sub r12,sp,#64
adr r8,.LK_00_19 adr r8,.LK_00_19
bic sp,sp,#15 @ align for 128-bit stores bic r12,r12,#15 @ align for 128-bit stores
ldmia r0,{r3,r4,r5,r6,r7} @ load context ldmia r0,{r3,r4,r5,r6,r7} @ load context
mov r12,sp mov sp,r12 @ alloca
vld1.8 {q0,q1},[r1]! @ handles unaligned vld1.8 {q0,q1},[r1]! @ handles unaligned
veor q15,q15,q15 veor q15,q15,q15
@ -1182,6 +1203,7 @@ sha1_block_data_order_neon:
sub r12,r12,#64 sub r12,r12,#64
teq r1,r2 teq r1,r2
sub r8,r8,#16 sub r8,r8,#16
it eq
subeq r1,r1,#64 subeq r1,r1,#64
vld1.8 {q0,q1},[r1]! vld1.8 {q0,q1},[r1]!
ldr r9,[sp,#4] ldr r9,[sp,#4]
@ -1311,10 +1333,13 @@ sha1_block_data_order_neon:
add r4,r4,r10 add r4,r4,r10
add r5,r5,r11 add r5,r5,r11
add r6,r6,r12 add r6,r6,r12
it eq
moveq sp,r14 moveq sp,r14
add r7,r7,r9 add r7,r7,r9
it ne
ldrne r9,[sp] ldrne r9,[sp]
stmia r0,{r3,r4,r5,r6,r7} stmia r0,{r3,r4,r5,r6,r7}
itt ne
addne r12,sp,#3*16 addne r12,sp,#3*16
bne .Loop_neon bne .Loop_neon
@ -1323,6 +1348,13 @@ sha1_block_data_order_neon:
.size sha1_block_data_order_neon,.-sha1_block_data_order_neon .size sha1_block_data_order_neon,.-sha1_block_data_order_neon
#endif #endif
#if __ARM_MAX_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
# if defined(__thumb2__)
# define INST(a,b,c,d) .byte c,d|0xf,a,b
# else
# define INST(a,b,c,d) .byte a,b,c,d|0x10
# endif
.type sha1_block_data_order_armv8,%function .type sha1_block_data_order_armv8,%function
.align 5 .align 5
sha1_block_data_order_armv8: sha1_block_data_order_armv8:
@ -1352,98 +1384,98 @@ sha1_block_data_order_armv8:
vadd.i32 q13,q8,q5 vadd.i32 q13,q8,q5
vrev32.8 q7,q7 vrev32.8 q7,q7
.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 0 INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 0
.byte 0x68,0x0c,0x02,0xf2 @ sha1c q0,q1,q12 INST(0x68,0x0c,0x02,0xe2) @ sha1c q0,q1,q12
vadd.i32 q12,q8,q6 vadd.i32 q12,q8,q6
.byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6
.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 1 INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 1
.byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13 INST(0x6a,0x0c,0x06,0xe2) @ sha1c q0,q3,q13
vadd.i32 q13,q8,q7 vadd.i32 q13,q8,q7
.byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7
.byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7
.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 2 INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 2
.byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12 INST(0x68,0x0c,0x04,0xe2) @ sha1c q0,q2,q12
vadd.i32 q12,q8,q4 vadd.i32 q12,q8,q4
.byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4
.byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4
.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 3 INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 3
.byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13 INST(0x6a,0x0c,0x06,0xe2) @ sha1c q0,q3,q13
vadd.i32 q13,q9,q5 vadd.i32 q13,q9,q5
.byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5
.byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5
.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 4 INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 4
.byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12 INST(0x68,0x0c,0x04,0xe2) @ sha1c q0,q2,q12
vadd.i32 q12,q9,q6 vadd.i32 q12,q9,q6
.byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6
.byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6
.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 5 INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 5
.byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13
vadd.i32 q13,q9,q7 vadd.i32 q13,q9,q7
.byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7
.byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7
.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 6 INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 6
.byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12
vadd.i32 q12,q9,q4 vadd.i32 q12,q9,q4
.byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4
.byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4
.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 7 INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 7
.byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13
vadd.i32 q13,q9,q5 vadd.i32 q13,q9,q5
.byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5
.byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5
.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 8 INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 8
.byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12
vadd.i32 q12,q10,q6 vadd.i32 q12,q10,q6
.byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6
.byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6
.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 9 INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 9
.byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13
vadd.i32 q13,q10,q7 vadd.i32 q13,q10,q7
.byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7
.byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7
.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 10 INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 10
.byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 INST(0x68,0x0c,0x24,0xe2) @ sha1m q0,q2,q12
vadd.i32 q12,q10,q4 vadd.i32 q12,q10,q4
.byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4
.byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4
.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 11 INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 11
.byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13 INST(0x6a,0x0c,0x26,0xe2) @ sha1m q0,q3,q13
vadd.i32 q13,q10,q5 vadd.i32 q13,q10,q5
.byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5
.byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5
.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 12 INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 12
.byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 INST(0x68,0x0c,0x24,0xe2) @ sha1m q0,q2,q12
vadd.i32 q12,q10,q6 vadd.i32 q12,q10,q6
.byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6
.byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6
.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 13 INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 13
.byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13 INST(0x6a,0x0c,0x26,0xe2) @ sha1m q0,q3,q13
vadd.i32 q13,q11,q7 vadd.i32 q13,q11,q7
.byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7
.byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7
.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 14 INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 14
.byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 INST(0x68,0x0c,0x24,0xe2) @ sha1m q0,q2,q12
vadd.i32 q12,q11,q4 vadd.i32 q12,q11,q4
.byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4
.byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4
.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 15 INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 15
.byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13
vadd.i32 q13,q11,q5 vadd.i32 q13,q11,q5
.byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5
.byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5
.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 16 INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 16
.byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12
vadd.i32 q12,q11,q6 vadd.i32 q12,q11,q6
.byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6
.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 17 INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 17
.byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13
vadd.i32 q13,q11,q7 vadd.i32 q13,q11,q7
.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 18 INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 18
.byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12
.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 19 INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 19
.byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13
vadd.i32 q1,q1,q2 vadd.i32 q1,q1,q2
vadd.i32 q0,q0,q14 vadd.i32 q0,q0,q14

View File

@ -1,4 +1,11 @@
#if defined(__arm__) #if defined(__arm__)
@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
@
@ Licensed under the OpenSSL license (the "License"). You may not use
@ this file except in compliance with the License. You can obtain a copy
@ in the file LICENSE in the source distribution or at
@ https://www.openssl.org/source/license.html
@ ==================================================================== @ ====================================================================
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@ -45,17 +52,12 @@
#endif #endif
.text .text
#if __ARM_ARCH__<7 #if defined(__thumb2__)
.code 32
#else
.syntax unified .syntax unified
# if defined(__thumb2__) && !defined(__APPLE__)
# define adrl adr
.thumb .thumb
#else #else
.code 32 .code 32
#endif #endif
#endif
.type K256,%object .type K256,%object
.align 5 .align 5
@ -89,10 +91,10 @@ K256:
.type sha256_block_data_order,%function .type sha256_block_data_order,%function
sha256_block_data_order: sha256_block_data_order:
.Lsha256_block_data_order: .Lsha256_block_data_order:
#if __ARM_ARCH__<7 #if __ARM_ARCH__<7 && !defined(__thumb2__)
sub r3,pc,#8 @ sha256_block_data_order sub r3,pc,#8 @ sha256_block_data_order
#else #else
adr r3,sha256_block_data_order adr r3,.Lsha256_block_data_order
#endif #endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap ldr r12,.LOPENSSL_armcap
@ -1878,13 +1880,14 @@ sha256_block_data_order:
.globl sha256_block_data_order_neon .globl sha256_block_data_order_neon
.hidden sha256_block_data_order_neon .hidden sha256_block_data_order_neon
.type sha256_block_data_order_neon,%function .type sha256_block_data_order_neon,%function
.align 4 .align 5
.skip 16
sha256_block_data_order_neon: sha256_block_data_order_neon:
.LNEON: .LNEON:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
sub r11,sp,#16*4+16 sub r11,sp,#16*4+16
adrl r14,K256 adr r14,K256
bic r11,r11,#15 @ align for 128-bit stores bic r11,r11,#15 @ align for 128-bit stores
mov r12,sp mov r12,sp
mov sp,r11 @ alloca mov sp,r11 @ alloca
@ -2660,7 +2663,7 @@ sha256_block_data_order_neon:
#endif #endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
# if defined(__thumb2__) && !defined(__APPLE__) # if defined(__thumb2__)
# define INST(a,b,c,d) .byte c,d|0xc,a,b # define INST(a,b,c,d) .byte c,d|0xc,a,b
# else # else
# define INST(a,b,c,d) .byte a,b,c,d # define INST(a,b,c,d) .byte a,b,c,d
@ -2671,16 +2674,11 @@ sha256_block_data_order_neon:
sha256_block_data_order_armv8: sha256_block_data_order_armv8:
.LARMv8: .LARMv8:
vld1.32 {q0,q1},[r0] vld1.32 {q0,q1},[r0]
# ifdef __APPLE__
sub r3,r3,#256+32 sub r3,r3,#256+32
# elif defined(__thumb2__)
adr r3,.LARMv8
sub r3,r3,#.LARMv8-K256
# else
adrl r3,K256
# endif
add r2,r1,r2,lsl#6 @ len to point at the end of inp add r2,r1,r2,lsl#6 @ len to point at the end of inp
b .Loop_v8
.align 4
.Loop_v8: .Loop_v8:
vld1.8 {q8,q9},[r1]! vld1.8 {q8,q9},[r1]!
vld1.8 {q10,q11},[r1]! vld1.8 {q10,q11},[r1]!

View File

@ -1,4 +1,11 @@
#if defined(__arm__) #if defined(__arm__)
@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
@
@ Licensed under the OpenSSL license (the "License"). You may not use
@ this file except in compliance with the License. You can obtain a copy
@ in the file LICENSE in the source distribution or at
@ https://www.openssl.org/source/license.html
@ ==================================================================== @ ====================================================================
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@ -68,17 +75,13 @@
#endif #endif
.text .text
#if __ARM_ARCH__<7 || defined(__APPLE__) #if defined(__thumb2__)
.code 32
#else
.syntax unified .syntax unified
# ifdef __thumb2__
# define adrl adr
.thumb .thumb
# define adrl adr
#else #else
.code 32 .code 32
#endif #endif
#endif
.type K512,%object .type K512,%object
.align 5 .align 5
@ -137,10 +140,10 @@ K512:
.type sha512_block_data_order,%function .type sha512_block_data_order,%function
sha512_block_data_order: sha512_block_data_order:
.Lsha512_block_data_order: .Lsha512_block_data_order:
#if __ARM_ARCH__<7 #if __ARM_ARCH__<7 && !defined(__thumb2__)
sub r3,pc,#8 @ sha512_block_data_order sub r3,pc,#8 @ sha512_block_data_order
#else #else
adr r3,sha512_block_data_order adr r3,.Lsha512_block_data_order
#endif #endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap ldr r12,.LOPENSSL_armcap

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "chacha-x86.S"
.text .text
.globl ChaCha20_ctr32 .globl ChaCha20_ctr32
.hidden ChaCha20_ctr32 .hidden ChaCha20_ctr32

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "aes-586.S"
.text .text
.hidden _x86_AES_encrypt_compact .hidden _x86_AES_encrypt_compact
.type _x86_AES_encrypt_compact,@function .type _x86_AES_encrypt_compact,@function

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "src/crypto/aes/asm/aesni-x86.S"
.text .text
.globl aesni_encrypt .globl aesni_encrypt
.hidden aesni_encrypt .hidden aesni_encrypt

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "src/crypto/bn/asm/bn-586.S"
.text .text
.globl bn_mul_add_words .globl bn_mul_add_words
.hidden bn_mul_add_words .hidden bn_mul_add_words

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "src/crypto/bn/asm/co-586.S"
.text .text
.globl bn_mul_comba8 .globl bn_mul_comba8
.hidden bn_mul_comba8 .hidden bn_mul_comba8

View File

@ -1,211 +1,5 @@
#if defined(__i386__) #if defined(__i386__)
.file "ghash-x86.S"
.text .text
.globl gcm_gmult_4bit_x86
.hidden gcm_gmult_4bit_x86
.type gcm_gmult_4bit_x86,@function
.align 16
gcm_gmult_4bit_x86:
.L_gcm_gmult_4bit_x86_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
subl $84,%esp
movl 104(%esp),%edi
movl 108(%esp),%esi
movl (%edi),%ebp
movl 4(%edi),%edx
movl 8(%edi),%ecx
movl 12(%edi),%ebx
movl $0,16(%esp)
movl $471859200,20(%esp)
movl $943718400,24(%esp)
movl $610271232,28(%esp)
movl $1887436800,32(%esp)
movl $1822425088,36(%esp)
movl $1220542464,40(%esp)
movl $1423966208,44(%esp)
movl $3774873600,48(%esp)
movl $4246732800,52(%esp)
movl $3644850176,56(%esp)
movl $3311403008,60(%esp)
movl $2441084928,64(%esp)
movl $2376073216,68(%esp)
movl $2847932416,72(%esp)
movl $3051356160,76(%esp)
movl %ebp,(%esp)
movl %edx,4(%esp)
movl %ecx,8(%esp)
movl %ebx,12(%esp)
shrl $20,%ebx
andl $240,%ebx
movl 4(%esi,%ebx,1),%ebp
movl (%esi,%ebx,1),%edx
movl 12(%esi,%ebx,1),%ecx
movl 8(%esi,%ebx,1),%ebx
xorl %eax,%eax
movl $15,%edi
jmp .L000x86_loop
.align 16
.L000x86_loop:
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
andb $240,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
decl %edi
js .L001x86_break
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
shlb $4,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
jmp .L000x86_loop
.align 16
.L001x86_break:
bswap %ebx
bswap %ecx
bswap %edx
bswap %ebp
movl 104(%esp),%edi
movl %ebx,12(%edi)
movl %ecx,8(%edi)
movl %edx,4(%edi)
movl %ebp,(%edi)
addl $84,%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
.globl gcm_ghash_4bit_x86
.hidden gcm_ghash_4bit_x86
.type gcm_ghash_4bit_x86,@function
.align 16
gcm_ghash_4bit_x86:
.L_gcm_ghash_4bit_x86_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
subl $84,%esp
movl 104(%esp),%ebx
movl 108(%esp),%esi
movl 112(%esp),%edi
movl 116(%esp),%ecx
addl %edi,%ecx
movl %ecx,116(%esp)
movl (%ebx),%ebp
movl 4(%ebx),%edx
movl 8(%ebx),%ecx
movl 12(%ebx),%ebx
movl $0,16(%esp)
movl $471859200,20(%esp)
movl $943718400,24(%esp)
movl $610271232,28(%esp)
movl $1887436800,32(%esp)
movl $1822425088,36(%esp)
movl $1220542464,40(%esp)
movl $1423966208,44(%esp)
movl $3774873600,48(%esp)
movl $4246732800,52(%esp)
movl $3644850176,56(%esp)
movl $3311403008,60(%esp)
movl $2441084928,64(%esp)
movl $2376073216,68(%esp)
movl $2847932416,72(%esp)
movl $3051356160,76(%esp)
.align 16
.L002x86_outer_loop:
xorl 12(%edi),%ebx
xorl 8(%edi),%ecx
xorl 4(%edi),%edx
xorl (%edi),%ebp
movl %ebx,12(%esp)
movl %ecx,8(%esp)
movl %edx,4(%esp)
movl %ebp,(%esp)
shrl $20,%ebx
andl $240,%ebx
movl 4(%esi,%ebx,1),%ebp
movl (%esi,%ebx,1),%edx
movl 12(%esi,%ebx,1),%ecx
movl 8(%esi,%ebx,1),%ebx
xorl %eax,%eax
movl $15,%edi
jmp .L003x86_loop
.align 16
.L003x86_loop:
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
andb $240,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
decl %edi
js .L004x86_break
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
shlb $4,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
jmp .L003x86_loop
.align 16
.L004x86_break:
bswap %ebx
bswap %ecx
bswap %edx
bswap %ebp
movl 112(%esp),%edi
leal 16(%edi),%edi
cmpl 116(%esp),%edi
movl %edi,112(%esp)
jb .L002x86_outer_loop
movl 104(%esp),%edi
movl %ebx,12(%edi)
movl %ecx,8(%edi)
movl %edx,4(%edi)
movl %ebp,(%edi)
addl $84,%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
.globl gcm_gmult_4bit_mmx .globl gcm_gmult_4bit_mmx
.hidden gcm_gmult_4bit_mmx .hidden gcm_gmult_4bit_mmx
.type gcm_gmult_4bit_mmx,@function .type gcm_gmult_4bit_mmx,@function
@ -218,10 +12,10 @@ gcm_gmult_4bit_mmx:
pushl %edi pushl %edi
movl 20(%esp),%edi movl 20(%esp),%edi
movl 24(%esp),%esi movl 24(%esp),%esi
call .L005pic_point call .L000pic_point
.L005pic_point: .L000pic_point:
popl %eax popl %eax
leal .Lrem_4bit-.L005pic_point(%eax),%eax leal .Lrem_4bit-.L000pic_point(%eax),%eax
movzbl 15(%edi),%ebx movzbl 15(%edi),%ebx
xorl %ecx,%ecx xorl %ecx,%ecx
movl %ebx,%edx movl %ebx,%edx
@ -232,9 +26,9 @@ gcm_gmult_4bit_mmx:
movq 8(%esi,%ecx,1),%mm0 movq 8(%esi,%ecx,1),%mm0
movq (%esi,%ecx,1),%mm1 movq (%esi,%ecx,1),%mm1
movd %mm0,%ebx movd %mm0,%ebx
jmp .L006mmx_loop jmp .L001mmx_loop
.align 16 .align 16
.L006mmx_loop: .L001mmx_loop:
psrlq $4,%mm0 psrlq $4,%mm0
andl $15,%ebx andl $15,%ebx
movq %mm1,%mm2 movq %mm1,%mm2
@ -248,7 +42,7 @@ gcm_gmult_4bit_mmx:
pxor (%esi,%edx,1),%mm1 pxor (%esi,%edx,1),%mm1
movl %ecx,%edx movl %ecx,%edx
pxor %mm2,%mm0 pxor %mm2,%mm0
js .L007mmx_break js .L002mmx_break
shlb $4,%cl shlb $4,%cl
andl $15,%ebx andl $15,%ebx
psrlq $4,%mm0 psrlq $4,%mm0
@ -261,9 +55,9 @@ gcm_gmult_4bit_mmx:
movd %mm0,%ebx movd %mm0,%ebx
pxor (%esi,%ecx,1),%mm1 pxor (%esi,%ecx,1),%mm1
pxor %mm2,%mm0 pxor %mm2,%mm0
jmp .L006mmx_loop jmp .L001mmx_loop
.align 16 .align 16
.L007mmx_break: .L002mmx_break:
shlb $4,%cl shlb $4,%cl
andl $15,%ebx andl $15,%ebx
psrlq $4,%mm0 psrlq $4,%mm0
@ -321,10 +115,10 @@ gcm_ghash_4bit_mmx:
movl 28(%esp),%ecx movl 28(%esp),%ecx
movl 32(%esp),%edx movl 32(%esp),%edx
movl %esp,%ebp movl %esp,%ebp
call .L008pic_point call .L003pic_point
.L008pic_point: .L003pic_point:
popl %esi popl %esi
leal .Lrem_8bit-.L008pic_point(%esi),%esi leal .Lrem_8bit-.L003pic_point(%esi),%esi
subl $544,%esp subl $544,%esp
andl $-64,%esp andl $-64,%esp
subl $16,%esp subl $16,%esp
@ -563,7 +357,7 @@ gcm_ghash_4bit_mmx:
movl 8(%eax),%ebx movl 8(%eax),%ebx
movl 12(%eax),%edx movl 12(%eax),%edx
.align 16 .align 16
.L009outer: .L004outer:
xorl 12(%ecx),%edx xorl 12(%ecx),%edx
xorl 8(%ecx),%ebx xorl 8(%ecx),%ebx
pxor (%ecx),%mm6 pxor (%ecx),%mm6
@ -898,7 +692,7 @@ gcm_ghash_4bit_mmx:
pshufw $27,%mm6,%mm6 pshufw $27,%mm6,%mm6
bswap %ebx bswap %ebx
cmpl 552(%esp),%ecx cmpl 552(%esp),%ecx
jne .L009outer jne .L004outer
movl 544(%esp),%eax movl 544(%esp),%eax
movl %edx,12(%eax) movl %edx,12(%eax)
movl %ebx,8(%eax) movl %ebx,8(%eax)
@ -919,10 +713,10 @@ gcm_init_clmul:
.L_gcm_init_clmul_begin: .L_gcm_init_clmul_begin:
movl 4(%esp),%edx movl 4(%esp),%edx
movl 8(%esp),%eax movl 8(%esp),%eax
call .L010pic call .L005pic
.L010pic: .L005pic:
popl %ecx popl %ecx
leal .Lbswap-.L010pic(%ecx),%ecx leal .Lbswap-.L005pic(%ecx),%ecx
movdqu (%eax),%xmm2 movdqu (%eax),%xmm2
pshufd $78,%xmm2,%xmm2 pshufd $78,%xmm2,%xmm2
pshufd $255,%xmm2,%xmm4 pshufd $255,%xmm2,%xmm4
@ -989,10 +783,10 @@ gcm_gmult_clmul:
.L_gcm_gmult_clmul_begin: .L_gcm_gmult_clmul_begin:
movl 4(%esp),%eax movl 4(%esp),%eax
movl 8(%esp),%edx movl 8(%esp),%edx
call .L011pic call .L006pic
.L011pic: .L006pic:
popl %ecx popl %ecx
leal .Lbswap-.L011pic(%ecx),%ecx leal .Lbswap-.L006pic(%ecx),%ecx
movdqu (%eax),%xmm0 movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5 movdqa (%ecx),%xmm5
movups (%edx),%xmm2 movups (%edx),%xmm2
@ -1049,16 +843,16 @@ gcm_ghash_clmul:
movl 24(%esp),%edx movl 24(%esp),%edx
movl 28(%esp),%esi movl 28(%esp),%esi
movl 32(%esp),%ebx movl 32(%esp),%ebx
call .L012pic call .L007pic
.L012pic: .L007pic:
popl %ecx popl %ecx
leal .Lbswap-.L012pic(%ecx),%ecx leal .Lbswap-.L007pic(%ecx),%ecx
movdqu (%eax),%xmm0 movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5 movdqa (%ecx),%xmm5
movdqu (%edx),%xmm2 movdqu (%edx),%xmm2
.byte 102,15,56,0,197 .byte 102,15,56,0,197
subl $16,%ebx subl $16,%ebx
jz .L013odd_tail jz .L008odd_tail
movdqu (%esi),%xmm3 movdqu (%esi),%xmm3
movdqu 16(%esi),%xmm6 movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221 .byte 102,15,56,0,221
@ -1075,10 +869,10 @@ gcm_ghash_clmul:
movups 16(%edx),%xmm2 movups 16(%edx),%xmm2
nop nop
subl $32,%ebx subl $32,%ebx
jbe .L014even_tail jbe .L009even_tail
jmp .L015mod_loop jmp .L010mod_loop
.align 32 .align 32
.L015mod_loop: .L010mod_loop:
pshufd $78,%xmm0,%xmm4 pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1 movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4 pxor %xmm0,%xmm4
@ -1133,8 +927,8 @@ gcm_ghash_clmul:
.byte 102,15,58,68,221,0 .byte 102,15,58,68,221,0
leal 32(%esi),%esi leal 32(%esi),%esi
subl $32,%ebx subl $32,%ebx
ja .L015mod_loop ja .L010mod_loop
.L014even_tail: .L009even_tail:
pshufd $78,%xmm0,%xmm4 pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1 movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4 pxor %xmm0,%xmm4
@ -1173,9 +967,9 @@ gcm_ghash_clmul:
psrlq $1,%xmm0 psrlq $1,%xmm0
pxor %xmm1,%xmm0 pxor %xmm1,%xmm0
testl %ebx,%ebx testl %ebx,%ebx
jnz .L016done jnz .L011done
movups (%edx),%xmm2 movups (%edx),%xmm2
.L013odd_tail: .L008odd_tail:
movdqu (%esi),%xmm3 movdqu (%esi),%xmm3
.byte 102,15,56,0,221 .byte 102,15,56,0,221
pxor %xmm3,%xmm0 pxor %xmm3,%xmm0
@ -1214,7 +1008,7 @@ gcm_ghash_clmul:
pxor %xmm4,%xmm0 pxor %xmm4,%xmm0
psrlq $1,%xmm0 psrlq $1,%xmm0
pxor %xmm1,%xmm0 pxor %xmm1,%xmm0
.L016done: .L011done:
.byte 102,15,56,0,197 .byte 102,15,56,0,197
movdqu %xmm0,(%eax) movdqu %xmm0,(%eax)
popl %edi popl %edi

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "src/crypto/md5/asm/md5-586.S"
.text .text
.globl md5_block_asm_data_order .globl md5_block_asm_data_order
.hidden md5_block_asm_data_order .hidden md5_block_asm_data_order

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "sha1-586.S"
.text .text
.globl sha1_block_data_order .globl sha1_block_data_order
.hidden sha1_block_data_order .hidden sha1_block_data_order

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "sha512-586.S"
.text .text
.globl sha256_block_data_order .globl sha256_block_data_order
.hidden sha256_block_data_order .hidden sha256_block_data_order

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "sha512-586.S"
.text .text
.globl sha512_block_data_order .globl sha512_block_data_order
.hidden sha512_block_data_order .hidden sha512_block_data_order

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "vpaes-x86.S"
.text .text
.align 64 .align 64
.L_vpaes_consts: .L_vpaes_consts:

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "src/crypto/bn/asm/x86-mont.S"
.text .text
.globl bn_mul_mont .globl bn_mul_mont
.hidden bn_mul_mont .hidden bn_mul_mont
@ -17,39 +16,54 @@ bn_mul_mont:
jl .L000just_leave jl .L000just_leave
leal 20(%esp),%esi leal 20(%esp),%esi
leal 24(%esp),%edx leal 24(%esp),%edx
movl %esp,%ebp
addl $2,%edi addl $2,%edi
negl %edi negl %edi
leal -32(%esp,%edi,4),%esp leal -32(%esp,%edi,4),%ebp
negl %edi negl %edi
movl %esp,%eax movl %ebp,%eax
subl %edx,%eax subl %edx,%eax
andl $2047,%eax andl $2047,%eax
subl %eax,%esp subl %eax,%ebp
xorl %esp,%edx xorl %ebp,%edx
andl $2048,%edx andl $2048,%edx
xorl $2048,%edx xorl $2048,%edx
subl %edx,%esp subl %edx,%ebp
andl $-64,%esp andl $-64,%ebp
movl %esp,%eax
subl %ebp,%eax
andl $-4096,%eax
movl %esp,%edx
leal (%ebp,%eax,1),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja .L001page_walk
jmp .L002page_walk_done
.align 16
.L001page_walk:
leal -4096(%esp),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja .L001page_walk
.L002page_walk_done:
movl (%esi),%eax movl (%esi),%eax
movl 4(%esi),%ebx movl 4(%esi),%ebx
movl 8(%esi),%ecx movl 8(%esi),%ecx
movl 12(%esi),%edx movl 12(%esi),%ebp
movl 16(%esi),%esi movl 16(%esi),%esi
movl (%esi),%esi movl (%esi),%esi
movl %eax,4(%esp) movl %eax,4(%esp)
movl %ebx,8(%esp) movl %ebx,8(%esp)
movl %ecx,12(%esp) movl %ecx,12(%esp)
movl %edx,16(%esp) movl %ebp,16(%esp)
movl %esi,20(%esp) movl %esi,20(%esp)
leal -3(%edi),%ebx leal -3(%edi),%ebx
movl %ebp,24(%esp) movl %edx,24(%esp)
call .L001PIC_me_up call .L003PIC_me_up
.L001PIC_me_up: .L003PIC_me_up:
popl %eax popl %eax
leal OPENSSL_ia32cap_P-.L001PIC_me_up(%eax),%eax leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
btl $26,(%eax) btl $26,(%eax)
jnc .L002non_sse2 jnc .L004non_sse2
movl $-1,%eax movl $-1,%eax
movd %eax,%mm7 movd %eax,%mm7
movl 8(%esp),%esi movl 8(%esp),%esi
@ -73,7 +87,7 @@ bn_mul_mont:
psrlq $32,%mm3 psrlq $32,%mm3
incl %ecx incl %ecx
.align 16 .align 16
.L0031st: .L0051st:
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -88,7 +102,7 @@ bn_mul_mont:
psrlq $32,%mm3 psrlq $32,%mm3
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
cmpl %ebx,%ecx cmpl %ebx,%ecx
jl .L0031st jl .L0051st
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -102,7 +116,7 @@ bn_mul_mont:
paddq %mm2,%mm3 paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4) movq %mm3,32(%esp,%ebx,4)
incl %edx incl %edx
.L004outer: .L006outer:
xorl %ecx,%ecx xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4 movd (%edi,%edx,4),%mm4
movd (%esi),%mm5 movd (%esi),%mm5
@ -124,7 +138,7 @@ bn_mul_mont:
paddq %mm6,%mm2 paddq %mm6,%mm2
incl %ecx incl %ecx
decl %ebx decl %ebx
.L005inner: .L007inner:
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -141,7 +155,7 @@ bn_mul_mont:
paddq %mm6,%mm2 paddq %mm6,%mm2
decl %ebx decl %ebx
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
jnz .L005inner jnz .L007inner
movl %ecx,%ebx movl %ecx,%ebx
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
@ -159,11 +173,11 @@ bn_mul_mont:
movq %mm3,32(%esp,%ebx,4) movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx leal 1(%edx),%edx
cmpl %ebx,%edx cmpl %ebx,%edx
jle .L004outer jle .L006outer
emms emms
jmp .L006common_tail jmp .L008common_tail
.align 16 .align 16
.L002non_sse2: .L004non_sse2:
movl 8(%esp),%esi movl 8(%esp),%esi
leal 1(%ebx),%ebp leal 1(%ebx),%ebp
movl 12(%esp),%edi movl 12(%esp),%edi
@ -174,12 +188,12 @@ bn_mul_mont:
leal 4(%edi,%ebx,4),%eax leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp orl %edx,%ebp
movl (%edi),%edi movl (%edi),%edi
jz .L007bn_sqr_mont jz .L009bn_sqr_mont
movl %eax,28(%esp) movl %eax,28(%esp)
movl (%esi),%eax movl (%esi),%eax
xorl %edx,%edx xorl %edx,%edx
.align 16 .align 16
.L008mull: .L010mull:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl %eax,%ebp addl %eax,%ebp
@ -188,7 +202,7 @@ bn_mul_mont:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl .L008mull jl .L010mull
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
movl 20(%esp),%edi movl 20(%esp),%edi
@ -206,9 +220,9 @@ bn_mul_mont:
movl 4(%esi),%eax movl 4(%esi),%eax
adcl $0,%edx adcl $0,%edx
incl %ecx incl %ecx
jmp .L0092ndmadd jmp .L0112ndmadd
.align 16 .align 16
.L0101stmadd: .L0121stmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -219,7 +233,7 @@ bn_mul_mont:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl .L0101stmadd jl .L0121stmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%eax addl 32(%esp,%ebx,4),%eax
@ -242,7 +256,7 @@ bn_mul_mont:
adcl $0,%edx adcl $0,%edx
movl $1,%ecx movl $1,%ecx
.align 16 .align 16
.L0092ndmadd: .L0112ndmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -253,7 +267,7 @@ bn_mul_mont:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4) movl %ebp,24(%esp,%ecx,4)
jl .L0092ndmadd jl .L0112ndmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%ebp addl 32(%esp,%ebx,4),%ebp
@ -269,16 +283,16 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4) movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4) movl %eax,36(%esp,%ebx,4)
je .L006common_tail je .L008common_tail
movl (%ecx),%edi movl (%ecx),%edi
movl 8(%esp),%esi movl 8(%esp),%esi
movl %ecx,12(%esp) movl %ecx,12(%esp)
xorl %ecx,%ecx xorl %ecx,%ecx
xorl %edx,%edx xorl %edx,%edx
movl (%esi),%eax movl (%esi),%eax
jmp .L0101stmadd jmp .L0121stmadd
.align 16 .align 16
.L007bn_sqr_mont: .L009bn_sqr_mont:
movl %ebx,(%esp) movl %ebx,(%esp)
movl %ecx,12(%esp) movl %ecx,12(%esp)
movl %edi,%eax movl %edi,%eax
@ -289,7 +303,7 @@ bn_mul_mont:
andl $1,%ebx andl $1,%ebx
incl %ecx incl %ecx
.align 16 .align 16
.L011sqr: .L013sqr:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -301,7 +315,7 @@ bn_mul_mont:
cmpl (%esp),%ecx cmpl (%esp),%ecx
movl %eax,%ebx movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl .L011sqr jl .L013sqr
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -325,7 +339,7 @@ bn_mul_mont:
movl 4(%esi),%eax movl 4(%esi),%eax
movl $1,%ecx movl $1,%ecx
.align 16 .align 16
.L0123rdmadd: .L0143rdmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -344,7 +358,7 @@ bn_mul_mont:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4) movl %ebp,24(%esp,%ecx,4)
jl .L0123rdmadd jl .L0143rdmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%ebp addl 32(%esp,%ebx,4),%ebp
@ -360,7 +374,7 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4) movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4) movl %eax,36(%esp,%ebx,4)
je .L006common_tail je .L008common_tail
movl 4(%esi,%ecx,4),%edi movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
movl %edi,%eax movl %edi,%eax
@ -372,12 +386,12 @@ bn_mul_mont:
xorl %ebp,%ebp xorl %ebp,%ebp
cmpl %ebx,%ecx cmpl %ebx,%ecx
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
je .L013sqrlast je .L015sqrlast
movl %edx,%ebx movl %edx,%ebx
shrl $1,%edx shrl $1,%edx
andl $1,%ebx andl $1,%ebx
.align 16 .align 16
.L014sqradd: .L016sqradd:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -393,13 +407,13 @@ bn_mul_mont:
cmpl (%esp),%ecx cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx movl %eax,%ebx
jle .L014sqradd jle .L016sqradd
movl %edx,%ebp movl %edx,%ebp
addl %edx,%edx addl %edx,%edx
shrl $31,%ebp shrl $31,%ebp
addl %ebx,%edx addl %ebx,%edx
adcl $0,%ebp adcl $0,%ebp
.L013sqrlast: .L015sqrlast:
movl 20(%esp),%edi movl 20(%esp),%edi
movl 16(%esp),%esi movl 16(%esp),%esi
imull 32(%esp),%edi imull 32(%esp),%edi
@ -414,9 +428,9 @@ bn_mul_mont:
adcl $0,%edx adcl $0,%edx
movl $1,%ecx movl $1,%ecx
movl 4(%esi),%eax movl 4(%esi),%eax
jmp .L0123rdmadd jmp .L0143rdmadd
.align 16 .align 16
.L006common_tail: .L008common_tail:
movl 16(%esp),%ebp movl 16(%esp),%ebp
movl 4(%esp),%edi movl 4(%esp),%edi
leal 32(%esp),%esi leal 32(%esp),%esi
@ -424,25 +438,26 @@ bn_mul_mont:
movl %ebx,%ecx movl %ebx,%ecx
xorl %edx,%edx xorl %edx,%edx
.align 16 .align 16
.L015sub: .L017sub:
sbbl (%ebp,%edx,4),%eax sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4) movl %eax,(%edi,%edx,4)
decl %ecx decl %ecx
movl 4(%esi,%edx,4),%eax movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx leal 1(%edx),%edx
jge .L015sub jge .L017sub
sbbl $0,%eax sbbl $0,%eax
andl %eax,%esi
notl %eax
movl %edi,%ebp
andl %eax,%ebp
orl %ebp,%esi
.align 16 .align 16
.L016copy: .L018copy:
movl (%esi,%ebx,4),%edx movl (%esi,%ebx,4),%eax
movl (%edi,%ebx,4),%ebp movl %eax,(%edi,%ebx,4)
xorl %ebp,%edx movl %ecx,32(%esp,%ebx,4)
andl %eax,%edx
xorl %ebp,%edx
movl %ecx,(%esi,%ebx,4)
movl %edx,(%edi,%ebx,4)
decl %ebx decl %ebx
jge .L016copy jge .L018copy
movl 24(%esp),%esp movl 24(%esp),%esp
movl $1,%eax movl $1,%eax
.L000just_leave: .L000just_leave:

View File

@ -1,350 +0,0 @@
#if defined(__i386__)
.file "rc4-586.S"
.text
.globl asm_RC4
.hidden asm_RC4
.type asm_RC4,@function
.align 16
asm_RC4:
.L_asm_RC4_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebp
xorl %eax,%eax
xorl %ebx,%ebx
cmpl $0,%edx
je .L000abort
movb (%edi),%al
movb 4(%edi),%bl
addl $8,%edi
leal (%esi,%edx,1),%ecx
subl %esi,%ebp
movl %ecx,24(%esp)
incb %al
cmpl $-1,256(%edi)
je .L001RC4_CHAR
movl (%edi,%eax,4),%ecx
andl $-4,%edx
jz .L002loop1
movl %ebp,32(%esp)
testl $-8,%edx
jz .L003go4loop4
call .L004PIC_me_up
.L004PIC_me_up:
popl %ebp
leal OPENSSL_ia32cap_P-.L004PIC_me_up(%ebp),%ebp
btl $26,(%ebp)
jnc .L003go4loop4
movl 32(%esp),%ebp
andl $-8,%edx
leal -8(%esi,%edx,1),%edx
movl %edx,-4(%edi)
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
movq (%esi),%mm0
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm2
jmp .L005loop_mmx_enter
.align 16
.L006loop_mmx:
addb %cl,%bl
psllq $56,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movq (%esi),%mm0
movq %mm2,-8(%ebp,%esi,1)
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm2
.L005loop_mmx_enter:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm0,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $8,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $16,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $24,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $32,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $40,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $48,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
movl %ebx,%edx
xorl %ebx,%ebx
movb %dl,%bl
cmpl -4(%edi),%esi
leal 8(%esi),%esi
jb .L006loop_mmx
psllq $56,%mm1
pxor %mm1,%mm2
movq %mm2,-8(%ebp,%esi,1)
emms
cmpl 24(%esp),%esi
je .L007done
jmp .L002loop1
.align 16
.L003go4loop4:
leal -4(%esi,%edx,1),%edx
movl %edx,28(%esp)
.L008loop4:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
movl (%edi,%eax,4),%ecx
movl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl (%edi,%eax,4),%ecx
orl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl (%edi,%eax,4),%ecx
orl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl 32(%esp),%ecx
orl (%edi,%edx,4),%ebp
rorl $8,%ebp
xorl (%esi),%ebp
cmpl 28(%esp),%esi
movl %ebp,(%ecx,%esi,1)
leal 4(%esi),%esi
movl (%edi,%eax,4),%ecx
jb .L008loop4
cmpl 24(%esp),%esi
je .L007done
movl 32(%esp),%ebp
.align 16
.L002loop1:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
movl (%edi,%edx,4),%edx
xorb (%esi),%dl
leal 1(%esi),%esi
movl (%edi,%eax,4),%ecx
cmpl 24(%esp),%esi
movb %dl,-1(%ebp,%esi,1)
jb .L002loop1
jmp .L007done
.align 16
.L001RC4_CHAR:
movzbl (%edi,%eax,1),%ecx
.L009cloop1:
addb %cl,%bl
movzbl (%edi,%ebx,1),%edx
movb %cl,(%edi,%ebx,1)
movb %dl,(%edi,%eax,1)
addb %cl,%dl
movzbl (%edi,%edx,1),%edx
addb $1,%al
xorb (%esi),%dl
leal 1(%esi),%esi
movzbl (%edi,%eax,1),%ecx
cmpl 24(%esp),%esi
movb %dl,-1(%ebp,%esi,1)
jb .L009cloop1
.L007done:
decb %al
movl %ebx,-4(%edi)
movb %al,-8(%edi)
.L000abort:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size asm_RC4,.-.L_asm_RC4_begin
.globl asm_RC4_set_key
.hidden asm_RC4_set_key
.type asm_RC4_set_key,@function
.align 16
asm_RC4_set_key:
.L_asm_RC4_set_key_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%ebp
movl 28(%esp),%esi
call .L010PIC_me_up
.L010PIC_me_up:
popl %edx
leal OPENSSL_ia32cap_P-.L010PIC_me_up(%edx),%edx
leal 8(%edi),%edi
leal (%esi,%ebp,1),%esi
negl %ebp
xorl %eax,%eax
movl %ebp,-4(%edi)
btl $20,(%edx)
jc .L011c1stloop
.align 16
.L012w1stloop:
movl %eax,(%edi,%eax,4)
addb $1,%al
jnc .L012w1stloop
xorl %ecx,%ecx
xorl %edx,%edx
.align 16
.L013w2ndloop:
movl (%edi,%ecx,4),%eax
addb (%esi,%ebp,1),%dl
addb %al,%dl
addl $1,%ebp
movl (%edi,%edx,4),%ebx
jnz .L014wnowrap
movl -4(%edi),%ebp
.L014wnowrap:
movl %eax,(%edi,%edx,4)
movl %ebx,(%edi,%ecx,4)
addb $1,%cl
jnc .L013w2ndloop
jmp .L015exit
.align 16
.L011c1stloop:
movb %al,(%edi,%eax,1)
addb $1,%al
jnc .L011c1stloop
xorl %ecx,%ecx
xorl %edx,%edx
xorl %ebx,%ebx
.align 16
.L016c2ndloop:
movb (%edi,%ecx,1),%al
addb (%esi,%ebp,1),%dl
addb %al,%dl
addl $1,%ebp
movb (%edi,%edx,1),%bl
jnz .L017cnowrap
movl -4(%edi),%ebp
.L017cnowrap:
movb %al,(%edi,%edx,1)
movb %bl,(%edi,%ecx,1)
addb $1,%cl
jnc .L016c2ndloop
movl $-1,256(%edi)
.L015exit:
xorl %eax,%eax
movl %eax,-8(%edi)
movl %eax,-4(%edi)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size asm_RC4_set_key,.-.L_asm_RC4_set_key_begin
#endif

View File

@ -1,34 +0,0 @@
#if defined(__x86_64__)
.text
.globl rsaz_avx2_eligible
.hidden rsaz_avx2_eligible
.type rsaz_avx2_eligible,@function
rsaz_avx2_eligible:
xorl %eax,%eax
.byte 0xf3,0xc3
.size rsaz_avx2_eligible,.-rsaz_avx2_eligible
.globl rsaz_1024_sqr_avx2
.hidden rsaz_1024_sqr_avx2
.globl rsaz_1024_mul_avx2
.hidden rsaz_1024_mul_avx2
.globl rsaz_1024_norm2red_avx2
.hidden rsaz_1024_norm2red_avx2
.globl rsaz_1024_red2norm_avx2
.hidden rsaz_1024_red2norm_avx2
.globl rsaz_1024_scatter5_avx2
.hidden rsaz_1024_scatter5_avx2
.globl rsaz_1024_gather5_avx2
.hidden rsaz_1024_gather5_avx2
.type rsaz_1024_sqr_avx2,@function
rsaz_1024_sqr_avx2:
rsaz_1024_mul_avx2:
rsaz_1024_norm2red_avx2:
rsaz_1024_red2norm_avx2:
rsaz_1024_scatter5_avx2:
rsaz_1024_gather5_avx2:
.byte 0x0f,0x0b
.byte 0xf3,0xc3
.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
.extern OPENSSL_ia32cap_P .extern OPENSSL_ia32cap_P
@ -23,6 +23,15 @@
.byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe .byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe
.Lsigma: .Lsigma:
.byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0 .byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0
.align 64
.Lzeroz:
.long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0
.Lfourz:
.long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0
.Lincz:
.long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
.Lsixteen:
.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.globl ChaCha20_ctr32 .globl ChaCha20_ctr32
.hidden ChaCha20_ctr32 .hidden ChaCha20_ctr32
@ -42,6 +51,7 @@ ChaCha20_ctr32:
pushq %r14 pushq %r14
pushq %r15 pushq %r15
subq $64+24,%rsp subq $64+24,%rsp
.Lctr32_body:
movdqu (%rcx),%xmm1 movdqu (%rcx),%xmm1
@ -279,13 +289,14 @@ ChaCha20_ctr32:
jnz .Loop_tail jnz .Loop_tail
.Ldone: .Ldone:
addq $64+24,%rsp leaq 64+24+48(%rsp),%rsi
popq %r15 movq -48(%rsi),%r15
popq %r14 movq -40(%rsi),%r14
popq %r13 movq -32(%rsi),%r13
popq %r12 movq -24(%rsi),%r12
popq %rbp movq -16(%rsi),%rbp
popq %rbx movq -8(%rsi),%rbx
leaq (%rsi),%rsp
.Lno_data: .Lno_data:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size ChaCha20_ctr32,.-ChaCha20_ctr32 .size ChaCha20_ctr32,.-ChaCha20_ctr32
@ -293,18 +304,12 @@ ChaCha20_ctr32:
.align 32 .align 32
ChaCha20_ssse3: ChaCha20_ssse3:
.LChaCha20_ssse3: .LChaCha20_ssse3:
movq %rsp,%r9
cmpq $128,%rdx cmpq $128,%rdx
ja .LChaCha20_4x ja .LChaCha20_4x
.Ldo_sse3_after_all: .Ldo_sse3_after_all:
pushq %rbx subq $64+8,%rsp
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
subq $64+24,%rsp
movdqa .Lsigma(%rip),%xmm0 movdqa .Lsigma(%rip),%xmm0
movdqu (%rcx),%xmm1 movdqu (%rcx),%xmm1
movdqu 16(%rcx),%xmm2 movdqu 16(%rcx),%xmm2
@ -316,7 +321,7 @@ ChaCha20_ssse3:
movdqa %xmm1,16(%rsp) movdqa %xmm1,16(%rsp)
movdqa %xmm2,32(%rsp) movdqa %xmm2,32(%rsp)
movdqa %xmm3,48(%rsp) movdqa %xmm3,48(%rsp)
movl $10,%ebp movq $10,%r8
jmp .Loop_ssse3 jmp .Loop_ssse3
.align 32 .align 32
@ -326,7 +331,7 @@ ChaCha20_ssse3:
movdqa 16(%rsp),%xmm1 movdqa 16(%rsp),%xmm1
movdqa 32(%rsp),%xmm2 movdqa 32(%rsp),%xmm2
paddd 48(%rsp),%xmm3 paddd 48(%rsp),%xmm3
movl $10,%ebp movq $10,%r8
movdqa %xmm3,48(%rsp) movdqa %xmm3,48(%rsp)
jmp .Loop_ssse3 jmp .Loop_ssse3
@ -375,7 +380,7 @@ ChaCha20_ssse3:
pshufd $78,%xmm2,%xmm2 pshufd $78,%xmm2,%xmm2
pshufd $147,%xmm1,%xmm1 pshufd $147,%xmm1,%xmm1
pshufd $57,%xmm3,%xmm3 pshufd $57,%xmm3,%xmm3
decl %ebp decq %r8
jnz .Loop_ssse3 jnz .Loop_ssse3
paddd 0(%rsp),%xmm0 paddd 0(%rsp),%xmm0
paddd 16(%rsp),%xmm1 paddd 16(%rsp),%xmm1
@ -412,31 +417,27 @@ ChaCha20_ssse3:
movdqa %xmm1,16(%rsp) movdqa %xmm1,16(%rsp)
movdqa %xmm2,32(%rsp) movdqa %xmm2,32(%rsp)
movdqa %xmm3,48(%rsp) movdqa %xmm3,48(%rsp)
xorq %rbx,%rbx xorq %r8,%r8
.Loop_tail_ssse3: .Loop_tail_ssse3:
movzbl (%rsi,%rbx,1),%eax movzbl (%rsi,%r8,1),%eax
movzbl (%rsp,%rbx,1),%ecx movzbl (%rsp,%r8,1),%ecx
leaq 1(%rbx),%rbx leaq 1(%r8),%r8
xorl %ecx,%eax xorl %ecx,%eax
movb %al,-1(%rdi,%rbx,1) movb %al,-1(%rdi,%r8,1)
decq %rdx decq %rdx
jnz .Loop_tail_ssse3 jnz .Loop_tail_ssse3
.Ldone_ssse3: .Ldone_ssse3:
addq $64+24,%rsp leaq (%r9),%rsp
popq %r15 .Lssse3_epilogue:
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size ChaCha20_ssse3,.-ChaCha20_ssse3 .size ChaCha20_ssse3,.-ChaCha20_ssse3
.type ChaCha20_4x,@function .type ChaCha20_4x,@function
.align 32 .align 32
ChaCha20_4x: ChaCha20_4x:
.LChaCha20_4x: .LChaCha20_4x:
movq %rsp,%r9
movq %r10,%r11 movq %r10,%r11
shrq $32,%r10 shrq $32,%r10
testq $32,%r10 testq $32,%r10
@ -449,8 +450,7 @@ ChaCha20_4x:
je .Ldo_sse3_after_all je .Ldo_sse3_after_all
.Lproceed4x: .Lproceed4x:
leaq -120(%rsp),%r11 subq $0x140+8,%rsp
subq $0x148+0,%rsp
movdqa .Lsigma(%rip),%xmm11 movdqa .Lsigma(%rip),%xmm11
movdqu (%rcx),%xmm15 movdqu (%rcx),%xmm15
movdqu 16(%rcx),%xmm7 movdqu 16(%rcx),%xmm7
@ -977,18 +977,18 @@ ChaCha20_4x:
jnz .Loop_tail4x jnz .Loop_tail4x
.Ldone4x: .Ldone4x:
addq $0x148+0,%rsp leaq (%r9),%rsp
.L4x_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size ChaCha20_4x,.-ChaCha20_4x .size ChaCha20_4x,.-ChaCha20_4x
.type ChaCha20_8x,@function .type ChaCha20_8x,@function
.align 32 .align 32
ChaCha20_8x: ChaCha20_8x:
.LChaCha20_8x: .LChaCha20_8x:
movq %rsp,%r10 movq %rsp,%r9
subq $0x280+8,%rsp subq $0x280+8,%rsp
andq $-32,%rsp andq $-32,%rsp
vzeroupper vzeroupper
movq %r10,640(%rsp)
@ -1579,7 +1579,8 @@ ChaCha20_8x:
.Ldone8x: .Ldone8x:
vzeroall vzeroall
movq 640(%rsp),%rsp leaq (%r9),%rsp
.L8x_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size ChaCha20_8x,.-ChaCha20_8x .size ChaCha20_8x,.-ChaCha20_8x
#endif #endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
.type _x86_64_AES_encrypt,@function .type _x86_64_AES_encrypt,@function
.align 16 .align 16
@ -332,6 +332,7 @@ _x86_64_AES_encrypt_compact:
.type asm_AES_encrypt,@function .type asm_AES_encrypt,@function
.hidden asm_AES_encrypt .hidden asm_AES_encrypt
asm_AES_encrypt: asm_AES_encrypt:
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
@ -340,7 +341,6 @@ asm_AES_encrypt:
pushq %r15 pushq %r15
movq %rsp,%r10
leaq -63(%rdx),%rcx leaq -63(%rdx),%rcx
andq $-64,%rsp andq $-64,%rsp
subq %rsp,%rcx subq %rsp,%rcx
@ -350,7 +350,7 @@ asm_AES_encrypt:
subq $32,%rsp subq $32,%rsp
movq %rsi,16(%rsp) movq %rsi,16(%rsp)
movq %r10,24(%rsp) movq %rax,24(%rsp)
.Lenc_prologue: .Lenc_prologue:
movq %rdx,%r15 movq %rdx,%r15
@ -382,13 +382,13 @@ asm_AES_encrypt:
movl %ecx,8(%r9) movl %ecx,8(%r9)
movl %edx,12(%r9) movl %edx,12(%r9)
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
.Lenc_epilogue: .Lenc_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size asm_AES_encrypt,.-asm_AES_encrypt .size asm_AES_encrypt,.-asm_AES_encrypt
@ -778,6 +778,7 @@ _x86_64_AES_decrypt_compact:
.type asm_AES_decrypt,@function .type asm_AES_decrypt,@function
.hidden asm_AES_decrypt .hidden asm_AES_decrypt
asm_AES_decrypt: asm_AES_decrypt:
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
@ -786,7 +787,6 @@ asm_AES_decrypt:
pushq %r15 pushq %r15
movq %rsp,%r10
leaq -63(%rdx),%rcx leaq -63(%rdx),%rcx
andq $-64,%rsp andq $-64,%rsp
subq %rsp,%rcx subq %rsp,%rcx
@ -796,7 +796,7 @@ asm_AES_decrypt:
subq $32,%rsp subq $32,%rsp
movq %rsi,16(%rsp) movq %rsi,16(%rsp)
movq %r10,24(%rsp) movq %rax,24(%rsp)
.Ldec_prologue: .Ldec_prologue:
movq %rdx,%r15 movq %rdx,%r15
@ -830,13 +830,13 @@ asm_AES_decrypt:
movl %ecx,8(%r9) movl %ecx,8(%r9)
movl %edx,12(%r9) movl %edx,12(%r9)
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
.Ldec_epilogue: .Ldec_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size asm_AES_decrypt,.-asm_AES_decrypt .size asm_AES_decrypt,.-asm_AES_decrypt
@ -1313,12 +1313,12 @@ asm_AES_cbc_encrypt:
movl %r9d,%r9d movl %r9d,%r9d
leaq .LAES_Te(%rip),%r14 leaq .LAES_Te(%rip),%r14
leaq .LAES_Td(%rip),%r10
cmpq $0,%r9 cmpq $0,%r9
jne .Lcbc_picked_te cmoveq %r10,%r14
leaq .LAES_Td(%rip),%r14
.Lcbc_picked_te:
movl OPENSSL_ia32cap_P(%rip),%r10d leaq OPENSSL_ia32cap_P(%rip),%r10
movl (%r10),%r10d
cmpq $512,%rdx cmpq $512,%rdx
jb .Lcbc_slow_prologue jb .Lcbc_slow_prologue
testq $15,%rdx testq $15,%rdx

View File

@ -0,0 +1,834 @@
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text
.type _aesni_ctr32_ghash_6x,@function
.align 32
_aesni_ctr32_ghash_6x:
.cfi_startproc
vmovdqu 32(%r11),%xmm2
subq $6,%rdx
vpxor %xmm4,%xmm4,%xmm4
vmovdqu 0-128(%rcx),%xmm15
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpaddb %xmm2,%xmm11,%xmm12
vpaddb %xmm2,%xmm12,%xmm13
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm15,%xmm1,%xmm9
vmovdqu %xmm4,16+8(%rsp)
jmp .Loop6x
.align 32
.Loop6x:
addl $100663296,%ebx
jc .Lhandle_ctr32
vmovdqu 0-32(%r9),%xmm3
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm15,%xmm10,%xmm10
vpxor %xmm15,%xmm11,%xmm11
.Lresume_ctr32:
vmovdqu %xmm1,(%r8)
vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
vpxor %xmm15,%xmm12,%xmm12
vmovups 16-128(%rcx),%xmm2
vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
xorq %r12,%r12
cmpq %r14,%r15
vaesenc %xmm2,%xmm9,%xmm9
vmovdqu 48+8(%rsp),%xmm0
vpxor %xmm15,%xmm13,%xmm13
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
vaesenc %xmm2,%xmm10,%xmm10
vpxor %xmm15,%xmm14,%xmm14
setnc %r12b
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vaesenc %xmm2,%xmm11,%xmm11
vmovdqu 16-32(%r9),%xmm3
negq %r12
vaesenc %xmm2,%xmm12,%xmm12
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
vpxor %xmm4,%xmm8,%xmm8
vaesenc %xmm2,%xmm13,%xmm13
vpxor %xmm5,%xmm1,%xmm4
andq $0x60,%r12
vmovups 32-128(%rcx),%xmm15
vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
vaesenc %xmm2,%xmm14,%xmm14
vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
leaq (%r14,%r12,1),%r14
vaesenc %xmm15,%xmm9,%xmm9
vpxor 16+8(%rsp),%xmm8,%xmm8
vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
vmovdqu 64+8(%rsp),%xmm0
vaesenc %xmm15,%xmm10,%xmm10
movbeq 88(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 80(%r14),%r12
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,32+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,40+8(%rsp)
vmovdqu 48-32(%r9),%xmm5
vaesenc %xmm15,%xmm14,%xmm14
vmovups 48-128(%rcx),%xmm15
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm3,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
vaesenc %xmm15,%xmm11,%xmm11
vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
vmovdqu 80+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vpxor %xmm1,%xmm4,%xmm4
vmovdqu 64-32(%r9),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vmovups 64-128(%rcx),%xmm15
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
vaesenc %xmm15,%xmm10,%xmm10
movbeq 72(%r14),%r13
vpxor %xmm5,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
movbeq 64(%r14),%r12
vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
vmovdqu 96+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,48+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,56+8(%rsp)
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 96-32(%r9),%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vmovups 80-128(%rcx),%xmm15
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
vaesenc %xmm15,%xmm10,%xmm10
movbeq 56(%r14),%r13
vpxor %xmm1,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
vpxor 112+8(%rsp),%xmm8,%xmm8
vaesenc %xmm15,%xmm11,%xmm11
movbeq 48(%r14),%r12
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,64+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,72+8(%rsp)
vpxor %xmm3,%xmm4,%xmm4
vmovdqu 112-32(%r9),%xmm3
vaesenc %xmm15,%xmm14,%xmm14
vmovups 96-128(%rcx),%xmm15
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
vaesenc %xmm15,%xmm10,%xmm10
movbeq 40(%r14),%r13
vpxor %xmm2,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
movbeq 32(%r14),%r12
vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,80+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,88+8(%rsp)
vpxor %xmm5,%xmm6,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor %xmm1,%xmm6,%xmm6
vmovups 112-128(%rcx),%xmm15
vpslldq $8,%xmm6,%xmm5
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 16(%r11),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm8,%xmm7,%xmm7
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm5,%xmm4,%xmm4
movbeq 24(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 16(%r14),%r12
vpalignr $8,%xmm4,%xmm4,%xmm0
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
movq %r13,96+8(%rsp)
vaesenc %xmm15,%xmm12,%xmm12
movq %r12,104+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
vmovups 128-128(%rcx),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vmovups 144-128(%rcx),%xmm15
vaesenc %xmm1,%xmm10,%xmm10
vpsrldq $8,%xmm6,%xmm6
vaesenc %xmm1,%xmm11,%xmm11
vpxor %xmm6,%xmm7,%xmm7
vaesenc %xmm1,%xmm12,%xmm12
vpxor %xmm0,%xmm4,%xmm4
movbeq 8(%r14),%r13
vaesenc %xmm1,%xmm13,%xmm13
movbeq 0(%r14),%r12
vaesenc %xmm1,%xmm14,%xmm14
vmovups 160-128(%rcx),%xmm1
cmpl $11,%ebp
jb .Lenc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 176-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 192-128(%rcx),%xmm1
je .Lenc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 208-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 224-128(%rcx),%xmm1
jmp .Lenc_tail
.align 32
.Lhandle_ctr32:
vmovdqu (%r11),%xmm0
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vmovdqu 0-32(%r9),%xmm3
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm15,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm15,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpshufb %xmm0,%xmm14,%xmm14
vpshufb %xmm0,%xmm1,%xmm1
jmp .Lresume_ctr32
.align 32
.Lenc_tail:
vaesenc %xmm15,%xmm9,%xmm9
vmovdqu %xmm7,16+8(%rsp)
vpalignr $8,%xmm4,%xmm4,%xmm8
vaesenc %xmm15,%xmm10,%xmm10
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
vpxor 0(%rdi),%xmm1,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
vpxor 16(%rdi),%xmm1,%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vpxor 32(%rdi),%xmm1,%xmm5
vaesenc %xmm15,%xmm13,%xmm13
vpxor 48(%rdi),%xmm1,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor 64(%rdi),%xmm1,%xmm7
vpxor 80(%rdi),%xmm1,%xmm3
vmovdqu (%r8),%xmm1
vaesenclast %xmm2,%xmm9,%xmm9
vmovdqu 32(%r11),%xmm2
vaesenclast %xmm0,%xmm10,%xmm10
vpaddb %xmm2,%xmm1,%xmm0
movq %r13,112+8(%rsp)
leaq 96(%rdi),%rdi
vaesenclast %xmm5,%xmm11,%xmm11
vpaddb %xmm2,%xmm0,%xmm5
movq %r12,120+8(%rsp)
leaq 96(%rsi),%rsi
vmovdqu 0-128(%rcx),%xmm15
vaesenclast %xmm6,%xmm12,%xmm12
vpaddb %xmm2,%xmm5,%xmm6
vaesenclast %xmm7,%xmm13,%xmm13
vpaddb %xmm2,%xmm6,%xmm7
vaesenclast %xmm3,%xmm14,%xmm14
vpaddb %xmm2,%xmm7,%xmm3
addq $0x60,%r10
subq $0x6,%rdx
jc .L6x_done
vmovups %xmm9,-96(%rsi)
vpxor %xmm15,%xmm1,%xmm9
vmovups %xmm10,-80(%rsi)
vmovdqa %xmm0,%xmm10
vmovups %xmm11,-64(%rsi)
vmovdqa %xmm5,%xmm11
vmovups %xmm12,-48(%rsi)
vmovdqa %xmm6,%xmm12
vmovups %xmm13,-32(%rsi)
vmovdqa %xmm7,%xmm13
vmovups %xmm14,-16(%rsi)
vmovdqa %xmm3,%xmm14
vmovdqu 32+8(%rsp),%xmm7
jmp .Loop6x
.L6x_done:
vpxor 16+8(%rsp),%xmm8,%xmm8
vpxor %xmm4,%xmm8,%xmm8
.byte 0xf3,0xc3
.cfi_endproc
.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
.globl aesni_gcm_decrypt
.hidden aesni_gcm_decrypt
.type aesni_gcm_decrypt,@function
.align 32
aesni_gcm_decrypt:
.cfi_startproc
xorq %r10,%r10
cmpq $0x60,%rdx
jb .Lgcm_dec_abort
leaq (%rsp),%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
vzeroupper
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq .Lbswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
vmovdqu (%r9),%xmm8
andq $-128,%rsp
vmovdqu (%r11),%xmm0
leaq 128(%rcx),%rcx
leaq 32+32(%r9),%r9
movl 240-128(%rcx),%ebp
vpshufb %xmm0,%xmm8,%xmm8
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc .Ldec_no_key_aliasing
cmpq $768,%r15
jnc .Ldec_no_key_aliasing
subq %r15,%rsp
.Ldec_no_key_aliasing:
vmovdqu 80(%rdi),%xmm7
leaq (%rdi),%r14
vmovdqu 64(%rdi),%xmm4
leaq -192(%rdi,%rdx,1),%r15
vmovdqu 48(%rdi),%xmm5
shrq $4,%rdx
xorq %r10,%r10
vmovdqu 32(%rdi),%xmm6
vpshufb %xmm0,%xmm7,%xmm7
vmovdqu 16(%rdi),%xmm2
vpshufb %xmm0,%xmm4,%xmm4
vmovdqu (%rdi),%xmm3
vpshufb %xmm0,%xmm5,%xmm5
vmovdqu %xmm4,48(%rsp)
vpshufb %xmm0,%xmm6,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm2,%xmm2
vmovdqu %xmm6,80(%rsp)
vpshufb %xmm0,%xmm3,%xmm3
vmovdqu %xmm2,96(%rsp)
vmovdqu %xmm3,112(%rsp)
call _aesni_ctr32_ghash_6x
vmovups %xmm9,-96(%rsi)
vmovups %xmm10,-80(%rsi)
vmovups %xmm11,-64(%rsi)
vmovups %xmm12,-48(%rsi)
vmovups %xmm13,-32(%rsi)
vmovups %xmm14,-16(%rsi)
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vzeroupper
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lgcm_dec_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
.type _aesni_ctr32_6x,@function
.align 32
_aesni_ctr32_6x:
.cfi_startproc
vmovdqu 0-128(%rcx),%xmm4
vmovdqu 32(%r11),%xmm2
leaq -1(%rbp),%r13
vmovups 16-128(%rcx),%xmm15
leaq 32-128(%rcx),%r12
vpxor %xmm4,%xmm1,%xmm9
addl $100663296,%ebx
jc .Lhandle_ctr32_2
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddb %xmm2,%xmm11,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddb %xmm2,%xmm12,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp .Loop_ctr32
.align 16
.Loop_ctr32:
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vmovups (%r12),%xmm15
leaq 16(%r12),%r12
decl %r13d
jnz .Loop_ctr32
vmovdqu (%r12),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor 0(%rdi),%xmm3,%xmm4
vaesenc %xmm15,%xmm10,%xmm10
vpxor 16(%rdi),%xmm3,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
vpxor 32(%rdi),%xmm3,%xmm6
vaesenc %xmm15,%xmm12,%xmm12
vpxor 48(%rdi),%xmm3,%xmm8
vaesenc %xmm15,%xmm13,%xmm13
vpxor 64(%rdi),%xmm3,%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vpxor 80(%rdi),%xmm3,%xmm3
leaq 96(%rdi),%rdi
vaesenclast %xmm4,%xmm9,%xmm9
vaesenclast %xmm5,%xmm10,%xmm10
vaesenclast %xmm6,%xmm11,%xmm11
vaesenclast %xmm8,%xmm12,%xmm12
vaesenclast %xmm2,%xmm13,%xmm13
vaesenclast %xmm3,%xmm14,%xmm14
vmovups %xmm9,0(%rsi)
vmovups %xmm10,16(%rsi)
vmovups %xmm11,32(%rsi)
vmovups %xmm12,48(%rsi)
vmovups %xmm13,64(%rsi)
vmovups %xmm14,80(%rsi)
leaq 96(%rsi),%rsi
.byte 0xf3,0xc3
.align 32
.Lhandle_ctr32_2:
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpshufb %xmm0,%xmm14,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpshufb %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp .Loop_ctr32
.cfi_endproc
.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
.globl aesni_gcm_encrypt
.hidden aesni_gcm_encrypt
.type aesni_gcm_encrypt,@function
.align 32
aesni_gcm_encrypt:
.cfi_startproc
xorq %r10,%r10
cmpq $288,%rdx
jb .Lgcm_enc_abort
leaq (%rsp),%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
vzeroupper
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq .Lbswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
leaq 128(%rcx),%rcx
vmovdqu (%r11),%xmm0
andq $-128,%rsp
movl 240-128(%rcx),%ebp
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc .Lenc_no_key_aliasing
cmpq $768,%r15
jnc .Lenc_no_key_aliasing
subq %r15,%rsp
.Lenc_no_key_aliasing:
leaq (%rsi),%r14
leaq -192(%rsi,%rdx,1),%r15
shrq $4,%rdx
call _aesni_ctr32_6x
vpshufb %xmm0,%xmm9,%xmm8
vpshufb %xmm0,%xmm10,%xmm2
vmovdqu %xmm8,112(%rsp)
vpshufb %xmm0,%xmm11,%xmm4
vmovdqu %xmm2,96(%rsp)
vpshufb %xmm0,%xmm12,%xmm5
vmovdqu %xmm4,80(%rsp)
vpshufb %xmm0,%xmm13,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm14,%xmm7
vmovdqu %xmm6,48(%rsp)
call _aesni_ctr32_6x
vmovdqu (%r9),%xmm8
leaq 32+32(%r9),%r9
subq $12,%rdx
movq $192,%r10
vpshufb %xmm0,%xmm8,%xmm8
call _aesni_ctr32_ghash_6x
vmovdqu 32(%rsp),%xmm7
vmovdqu (%r11),%xmm0
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm7,%xmm7,%xmm1
vmovdqu 32-32(%r9),%xmm15
vmovups %xmm9,-96(%rsi)
vpshufb %xmm0,%xmm9,%xmm9
vpxor %xmm7,%xmm1,%xmm1
vmovups %xmm10,-80(%rsi)
vpshufb %xmm0,%xmm10,%xmm10
vmovups %xmm11,-64(%rsi)
vpshufb %xmm0,%xmm11,%xmm11
vmovups %xmm12,-48(%rsi)
vpshufb %xmm0,%xmm12,%xmm12
vmovups %xmm13,-32(%rsi)
vpshufb %xmm0,%xmm13,%xmm13
vmovups %xmm14,-16(%rsi)
vpshufb %xmm0,%xmm14,%xmm14
vmovdqu %xmm9,16(%rsp)
vmovdqu 48(%rsp),%xmm6
vmovdqu 16-32(%r9),%xmm0
vpunpckhqdq %xmm6,%xmm6,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5
vpxor %xmm6,%xmm2,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vmovdqu 64(%rsp),%xmm9
vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm9,%xmm9,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6
vpxor %xmm9,%xmm5,%xmm5
vpxor %xmm7,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vmovdqu 80(%rsp),%xmm1
vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm4,%xmm7,%xmm7
vpunpckhqdq %xmm1,%xmm1,%xmm4
vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpxor %xmm6,%xmm9,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 96(%rsp),%xmm2
vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm7,%xmm6,%xmm6
vpunpckhqdq %xmm2,%xmm2,%xmm7
vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1
vpxor %xmm2,%xmm7,%xmm7
vpxor %xmm9,%xmm1,%xmm1
vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm5,%xmm4,%xmm4
vpxor 112(%rsp),%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5
vmovdqu 112-32(%r9),%xmm0
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpxor %xmm6,%xmm5,%xmm5
vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7
vpxor %xmm4,%xmm7,%xmm4
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm14,%xmm14,%xmm1
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8
vpxor %xmm14,%xmm1,%xmm1
vpxor %xmm5,%xmm6,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9
vmovdqu 32-32(%r9),%xmm15
vpxor %xmm2,%xmm8,%xmm7
vpxor %xmm4,%xmm9,%xmm6
vmovdqu 16-32(%r9),%xmm0
vpxor %xmm5,%xmm7,%xmm9
vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4
vpxor %xmm9,%xmm6,%xmm6
vpunpckhqdq %xmm13,%xmm13,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14
vpxor %xmm13,%xmm2,%xmm2
vpslldq $8,%xmm6,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vpxor %xmm9,%xmm5,%xmm8
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm6,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm12,%xmm12,%xmm9
vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13
vpxor %xmm12,%xmm9,%xmm9
vpxor %xmm14,%xmm13,%xmm13
vpalignr $8,%xmm8,%xmm8,%xmm14
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm11,%xmm11,%xmm1
vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12
vpxor %xmm11,%xmm1,%xmm1
vpxor %xmm13,%xmm12,%xmm12
vxorps 16(%rsp),%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm9,%xmm9
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm10,%xmm10,%xmm2
vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11
vpxor %xmm10,%xmm2,%xmm2
vpalignr $8,%xmm8,%xmm8,%xmm14
vpxor %xmm12,%xmm11,%xmm11
vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm9,%xmm1,%xmm1
vxorps %xmm7,%xmm14,%xmm14
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4
vmovdqu 112-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm11,%xmm10,%xmm10
vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7
vpxor %xmm4,%xmm5,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6
vpxor %xmm10,%xmm7,%xmm7
vpxor %xmm2,%xmm6,%xmm6
vpxor %xmm5,%xmm7,%xmm4
vpxor %xmm4,%xmm6,%xmm6
vpslldq $8,%xmm6,%xmm1
vmovdqu 16(%r11),%xmm3
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm1,%xmm5,%xmm8
vpxor %xmm6,%xmm7,%xmm7
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm2,%xmm8,%xmm8
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm7,%xmm2,%xmm2
vpxor %xmm2,%xmm8,%xmm8
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vzeroupper
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lgcm_enc_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.Lpoly:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
.Lone_msb:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
.Ltwo_lsb:
.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.Lone_lsb:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
#endif

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
.extern asm_AES_encrypt .extern asm_AES_encrypt
@ -1305,15 +1305,14 @@ bsaes_cbc_encrypt:
cmpq %rax,%rbp cmpq %rax,%rbp
ja .Lcbc_dec_bzero ja .Lcbc_dec_bzero
leaq (%rbp),%rsp leaq 120(%rbp),%rax
movq 72(%rsp),%r15 movq -48(%rax),%r15
movq 80(%rsp),%r14 movq -40(%rax),%r14
movq 88(%rsp),%r13 movq -32(%rax),%r13
movq 96(%rsp),%r12 movq -24(%rax),%r12
movq 104(%rsp),%rbx movq -16(%rax),%rbx
movq 112(%rsp),%rax movq -8(%rax),%rbp
leaq 120(%rsp),%rsp leaq (%rax),%rsp
movq %rax,%rbp
.Lcbc_dec_epilogue: .Lcbc_dec_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt .size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
@ -1506,15 +1505,14 @@ bsaes_ctr32_encrypt_blocks:
cmpq %rax,%rbp cmpq %rax,%rbp
ja .Lctr_enc_bzero ja .Lctr_enc_bzero
leaq (%rbp),%rsp leaq 120(%rbp),%rax
movq 72(%rsp),%r15 movq -48(%rax),%r15
movq 80(%rsp),%r14 movq -40(%rax),%r14
movq 88(%rsp),%r13 movq -32(%rax),%r13
movq 96(%rsp),%r12 movq -24(%rax),%r12
movq 104(%rsp),%rbx movq -16(%rax),%rbx
movq 112(%rsp),%rax movq -8(%rax),%rbp
leaq 120(%rsp),%rsp leaq (%rax),%rsp
movq %rax,%rbp
.Lctr_enc_epilogue: .Lctr_enc_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks .size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
@ -1958,15 +1956,14 @@ bsaes_xts_encrypt:
cmpq %rax,%rbp cmpq %rax,%rbp
ja .Lxts_enc_bzero ja .Lxts_enc_bzero
leaq (%rbp),%rsp leaq 120(%rbp),%rax
movq 72(%rsp),%r15 movq -48(%rax),%r15
movq 80(%rsp),%r14 movq -40(%rax),%r14
movq 88(%rsp),%r13 movq -32(%rax),%r13
movq 96(%rsp),%r12 movq -24(%rax),%r12
movq 104(%rsp),%rbx movq -16(%rax),%rbx
movq 112(%rsp),%rax movq -8(%rax),%rbp
leaq 120(%rsp),%rsp leaq (%rax),%rsp
movq %rax,%rbp
.Lxts_enc_epilogue: .Lxts_enc_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size bsaes_xts_encrypt,.-bsaes_xts_encrypt .size bsaes_xts_encrypt,.-bsaes_xts_encrypt
@ -2437,15 +2434,14 @@ bsaes_xts_decrypt:
cmpq %rax,%rbp cmpq %rax,%rbp
ja .Lxts_dec_bzero ja .Lxts_dec_bzero
leaq (%rbp),%rsp leaq 120(%rbp),%rax
movq 72(%rsp),%r15 movq -48(%rax),%r15
movq 80(%rsp),%r14 movq -40(%rax),%r14
movq 88(%rsp),%r13 movq -32(%rax),%r13
movq 96(%rsp),%r12 movq -24(%rax),%r12
movq 104(%rsp),%rbx movq -16(%rax),%rbx
movq 112(%rsp),%rax movq -8(%rax),%rbp
leaq 120(%rsp),%rsp leaq (%rax),%rsp
movq %rax,%rbp
.Lxts_dec_epilogue: .Lxts_dec_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size bsaes_xts_decrypt,.-bsaes_xts_decrypt .size bsaes_xts_decrypt,.-bsaes_xts_decrypt

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
.extern OPENSSL_ia32cap_P .extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P .hidden OPENSSL_ia32cap_P
@ -11,6 +11,10 @@ gcm_gmult_4bit:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13
pushq %r14
pushq %r15
subq $280,%rsp
.Lgmult_prologue: .Lgmult_prologue:
movzbq 15(%rdi),%r8 movzbq 15(%rdi),%r8
@ -87,8 +91,9 @@ gcm_gmult_4bit:
movq %r8,8(%rdi) movq %r8,8(%rdi)
movq %r9,(%rdi) movq %r9,(%rdi)
movq 16(%rsp),%rbx leaq 280+48(%rsp),%rsi
leaq 24(%rsp),%rsp movq -8(%rsi),%rbx
leaq (%rsi),%rsp
.Lgmult_epilogue: .Lgmult_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size gcm_gmult_4bit,.-gcm_gmult_4bit .size gcm_gmult_4bit,.-gcm_gmult_4bit
@ -648,14 +653,14 @@ gcm_ghash_4bit:
movq %r8,8(%rdi) movq %r8,8(%rdi)
movq %r9,(%rdi) movq %r9,(%rdi)
leaq 280(%rsp),%rsi leaq 280+48(%rsp),%rsi
movq 0(%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq 0(%rsi),%rsp
.Lghash_epilogue: .Lghash_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size gcm_ghash_4bit,.-gcm_ghash_4bit .size gcm_ghash_4bit,.-gcm_ghash_4bit
@ -885,7 +890,8 @@ gcm_ghash_clmul:
jz .Lodd_tail jz .Lodd_tail
movdqu 16(%rsi),%xmm6 movdqu 16(%rsi),%xmm6
movl OPENSSL_ia32cap_P+4(%rip),%eax leaq OPENSSL_ia32cap_P(%rip),%rax
movl 4(%rax),%eax
cmpq $0x30,%rcx cmpq $0x30,%rcx
jb .Lskip4x jb .Lskip4x
@ -1257,7 +1263,108 @@ gcm_ghash_clmul:
.type gcm_init_avx,@function .type gcm_init_avx,@function
.align 32 .align 32
gcm_init_avx: gcm_init_avx:
jmp .L_init_clmul vzeroupper
vmovdqu (%rsi),%xmm2
vpshufd $78,%xmm2,%xmm2
vpshufd $255,%xmm2,%xmm4
vpsrlq $63,%xmm2,%xmm3
vpsllq $1,%xmm2,%xmm2
vpxor %xmm5,%xmm5,%xmm5
vpcmpgtd %xmm4,%xmm5,%xmm5
vpslldq $8,%xmm3,%xmm3
vpor %xmm3,%xmm2,%xmm2
vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5
vpxor %xmm5,%xmm2,%xmm2
vpunpckhqdq %xmm2,%xmm2,%xmm6
vmovdqa %xmm2,%xmm0
vpxor %xmm2,%xmm6,%xmm6
movq $4,%r10
jmp .Linit_start_avx
.align 32
.Linit_loop_avx:
vpalignr $8,%xmm3,%xmm4,%xmm5
vmovdqu %xmm5,-16(%rdi)
vpunpckhqdq %xmm0,%xmm0,%xmm3
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
vpxor %xmm0,%xmm1,%xmm4
vpxor %xmm4,%xmm3,%xmm3
vpslldq $8,%xmm3,%xmm4
vpsrldq $8,%xmm3,%xmm3
vpxor %xmm4,%xmm0,%xmm0
vpxor %xmm3,%xmm1,%xmm1
vpsllq $57,%xmm0,%xmm3
vpsllq $62,%xmm0,%xmm4
vpxor %xmm3,%xmm4,%xmm4
vpsllq $63,%xmm0,%xmm3
vpxor %xmm3,%xmm4,%xmm4
vpslldq $8,%xmm4,%xmm3
vpsrldq $8,%xmm4,%xmm4
vpxor %xmm3,%xmm0,%xmm0
vpxor %xmm4,%xmm1,%xmm1
vpsrlq $1,%xmm0,%xmm4
vpxor %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $5,%xmm4,%xmm4
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $1,%xmm0,%xmm0
vpxor %xmm1,%xmm0,%xmm0
.Linit_start_avx:
vmovdqa %xmm0,%xmm5
vpunpckhqdq %xmm0,%xmm0,%xmm3
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
vpxor %xmm0,%xmm1,%xmm4
vpxor %xmm4,%xmm3,%xmm3
vpslldq $8,%xmm3,%xmm4
vpsrldq $8,%xmm3,%xmm3
vpxor %xmm4,%xmm0,%xmm0
vpxor %xmm3,%xmm1,%xmm1
vpsllq $57,%xmm0,%xmm3
vpsllq $62,%xmm0,%xmm4
vpxor %xmm3,%xmm4,%xmm4
vpsllq $63,%xmm0,%xmm3
vpxor %xmm3,%xmm4,%xmm4
vpslldq $8,%xmm4,%xmm3
vpsrldq $8,%xmm4,%xmm4
vpxor %xmm3,%xmm0,%xmm0
vpxor %xmm4,%xmm1,%xmm1
vpsrlq $1,%xmm0,%xmm4
vpxor %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $5,%xmm4,%xmm4
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $1,%xmm0,%xmm0
vpxor %xmm1,%xmm0,%xmm0
vpshufd $78,%xmm5,%xmm3
vpshufd $78,%xmm0,%xmm4
vpxor %xmm5,%xmm3,%xmm3
vmovdqu %xmm5,0(%rdi)
vpxor %xmm0,%xmm4,%xmm4
vmovdqu %xmm0,16(%rdi)
leaq 48(%rdi),%rdi
subq $1,%r10
jnz .Linit_loop_avx
vpalignr $8,%xmm4,%xmm3,%xmm5
vmovdqu %xmm5,-16(%rdi)
vzeroupper
.byte 0xf3,0xc3
.size gcm_init_avx,.-gcm_init_avx .size gcm_init_avx,.-gcm_init_avx
.globl gcm_gmult_avx .globl gcm_gmult_avx
.hidden gcm_gmult_avx .hidden gcm_gmult_avx
@ -1271,7 +1378,377 @@ gcm_gmult_avx:
.type gcm_ghash_avx,@function .type gcm_ghash_avx,@function
.align 32 .align 32
gcm_ghash_avx: gcm_ghash_avx:
jmp .L_ghash_clmul vzeroupper
vmovdqu (%rdi),%xmm10
leaq .L0x1c2_polynomial(%rip),%r10
leaq 64(%rsi),%rsi
vmovdqu .Lbswap_mask(%rip),%xmm13
vpshufb %xmm13,%xmm10,%xmm10
cmpq $0x80,%rcx
jb .Lshort_avx
subq $0x80,%rcx
vmovdqu 112(%rdx),%xmm14
vmovdqu 0-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm14
vmovdqu 32-64(%rsi),%xmm7
vpunpckhqdq %xmm14,%xmm14,%xmm9
vmovdqu 96(%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm14,%xmm9,%xmm9
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 16-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vmovdqu 80(%rdx),%xmm14
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vpshufb %xmm13,%xmm14,%xmm14
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 48-64(%rsi),%xmm6
vpxor %xmm14,%xmm9,%xmm9
vmovdqu 64(%rdx),%xmm15
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 80-64(%rsi),%xmm7
vpshufb %xmm13,%xmm15,%xmm15
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm1,%xmm4,%xmm4
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 64-64(%rsi),%xmm6
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vmovdqu 48(%rdx),%xmm14
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpxor %xmm4,%xmm1,%xmm1
vpshufb %xmm13,%xmm14,%xmm14
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 96-64(%rsi),%xmm6
vpxor %xmm5,%xmm2,%xmm2
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 128-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vmovdqu 32(%rdx),%xmm15
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm1,%xmm4,%xmm4
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 112-64(%rsi),%xmm6
vpxor %xmm2,%xmm5,%xmm5
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vmovdqu 16(%rdx),%xmm14
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpxor %xmm4,%xmm1,%xmm1
vpshufb %xmm13,%xmm14,%xmm14
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 144-64(%rsi),%xmm6
vpxor %xmm5,%xmm2,%xmm2
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 176-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vmovdqu (%rdx),%xmm15
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm1,%xmm4,%xmm4
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 160-64(%rsi),%xmm6
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
leaq 128(%rdx),%rdx
cmpq $0x80,%rcx
jb .Ltail_avx
vpxor %xmm10,%xmm15,%xmm15
subq $0x80,%rcx
jmp .Loop8x_avx
.align 32
.Loop8x_avx:
vpunpckhqdq %xmm15,%xmm15,%xmm8
vmovdqu 112(%rdx),%xmm14
vpxor %xmm0,%xmm3,%xmm3
vpxor %xmm15,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10
vpshufb %xmm13,%xmm14,%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11
vmovdqu 0-64(%rsi),%xmm6
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12
vmovdqu 32-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vmovdqu 96(%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm3,%xmm10,%xmm10
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vxorps %xmm4,%xmm11,%xmm11
vmovdqu 16-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm5,%xmm12,%xmm12
vxorps %xmm15,%xmm8,%xmm8
vmovdqu 80(%rdx),%xmm14
vpxor %xmm10,%xmm12,%xmm12
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpxor %xmm11,%xmm12,%xmm12
vpslldq $8,%xmm12,%xmm9
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vpsrldq $8,%xmm12,%xmm12
vpxor %xmm9,%xmm10,%xmm10
vmovdqu 48-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm14
vxorps %xmm12,%xmm11,%xmm11
vpxor %xmm1,%xmm4,%xmm4
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 80-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 64(%rdx),%xmm15
vpalignr $8,%xmm10,%xmm10,%xmm12
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpshufb %xmm13,%xmm15,%xmm15
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 64-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm4,%xmm1,%xmm1
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vxorps %xmm15,%xmm8,%xmm8
vpxor %xmm5,%xmm2,%xmm2
vmovdqu 48(%rdx),%xmm14
vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpshufb %xmm13,%xmm14,%xmm14
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 96-64(%rsi),%xmm6
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 128-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 32(%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpshufb %xmm13,%xmm15,%xmm15
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 112-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm4,%xmm1,%xmm1
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vpxor %xmm5,%xmm2,%xmm2
vxorps %xmm12,%xmm10,%xmm10
vmovdqu 16(%rdx),%xmm14
vpalignr $8,%xmm10,%xmm10,%xmm12
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpshufb %xmm13,%xmm14,%xmm14
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 144-64(%rsi),%xmm6
vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
vxorps %xmm11,%xmm12,%xmm12
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 176-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vmovdqu (%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 160-64(%rsi),%xmm6
vpxor %xmm12,%xmm15,%xmm15
vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
vpxor %xmm10,%xmm15,%xmm15
leaq 128(%rdx),%rdx
subq $0x80,%rcx
jnc .Loop8x_avx
addq $0x80,%rcx
jmp .Ltail_no_xor_avx
.align 32
.Lshort_avx:
vmovdqu -16(%rdx,%rcx,1),%xmm14
leaq (%rdx,%rcx,1),%rdx
vmovdqu 0-64(%rsi),%xmm6
vmovdqu 32-64(%rsi),%xmm7
vpshufb %xmm13,%xmm14,%xmm15
vmovdqa %xmm0,%xmm3
vmovdqa %xmm1,%xmm4
vmovdqa %xmm2,%xmm5
subq $0x10,%rcx
jz .Ltail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -32(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 16-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vpsrldq $8,%xmm7,%xmm7
subq $0x10,%rcx
jz .Ltail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -48(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 48-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovdqu 80-64(%rsi),%xmm7
subq $0x10,%rcx
jz .Ltail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -64(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 64-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vpsrldq $8,%xmm7,%xmm7
subq $0x10,%rcx
jz .Ltail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -80(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 96-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovdqu 128-64(%rsi),%xmm7
subq $0x10,%rcx
jz .Ltail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -96(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 112-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vpsrldq $8,%xmm7,%xmm7
subq $0x10,%rcx
jz .Ltail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -112(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 144-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovq 184-64(%rsi),%xmm7
subq $0x10,%rcx
jmp .Ltail_avx
.align 32
.Ltail_avx:
vpxor %xmm10,%xmm15,%xmm15
.Ltail_no_xor_avx:
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovdqu (%r10),%xmm12
vpxor %xmm0,%xmm3,%xmm10
vpxor %xmm1,%xmm4,%xmm11
vpxor %xmm2,%xmm5,%xmm5
vpxor %xmm10,%xmm5,%xmm5
vpxor %xmm11,%xmm5,%xmm5
vpslldq $8,%xmm5,%xmm9
vpsrldq $8,%xmm5,%xmm5
vpxor %xmm9,%xmm10,%xmm10
vpxor %xmm5,%xmm11,%xmm11
vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
vpalignr $8,%xmm10,%xmm10,%xmm10
vpxor %xmm9,%xmm10,%xmm10
vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
vpalignr $8,%xmm10,%xmm10,%xmm10
vpxor %xmm11,%xmm10,%xmm10
vpxor %xmm9,%xmm10,%xmm10
cmpq $0,%rcx
jne .Lshort_avx
vpshufb %xmm13,%xmm10,%xmm10
vmovdqu %xmm10,(%rdi)
vzeroupper
.byte 0xf3,0xc3
.size gcm_ghash_avx,.-gcm_ghash_avx .size gcm_ghash_avx,.-gcm_ghash_avx
.align 64 .align 64
.Lbswap_mask: .Lbswap_mask:

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
.align 16 .align 16

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
.extern OPENSSL_ia32cap_P .extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P .hidden OPENSSL_ia32cap_P
@ -17,47 +17,6 @@
.LONE_mont: .LONE_mont:
.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
.type ecp_nistz256_mul_by_2,@function
.align 64
ecp_nistz256_mul_by_2:
pushq %r12
pushq %r13
movq 0(%rsi),%r8
movq 8(%rsi),%r9
addq %r8,%r8
movq 16(%rsi),%r10
adcq %r9,%r9
movq 24(%rsi),%r11
leaq .Lpoly(%rip),%rsi
movq %r8,%rax
adcq %r10,%r10
adcq %r11,%r11
movq %r9,%rdx
sbbq %r13,%r13
subq 0(%rsi),%r8
movq %r10,%rcx
sbbq 8(%rsi),%r9
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
testq %r13,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
movq %r8,0(%rdi)
cmovzq %rcx,%r10
movq %r9,8(%rdi)
cmovzq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
popq %r13
popq %r12
.byte 0xf3,0xc3
.size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2
.globl ecp_nistz256_neg .globl ecp_nistz256_neg
@ -553,106 +512,15 @@ __ecp_nistz256_sqr_montq:
.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq .size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
.globl ecp_nistz256_from_mont
.hidden ecp_nistz256_from_mont
.type ecp_nistz256_from_mont,@function
.align 32
ecp_nistz256_from_mont:
pushq %r12
pushq %r13
movq 0(%rsi),%rax
movq .Lpoly+24(%rip),%r13
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
movq %rax,%r8
movq .Lpoly+8(%rip),%r12
movq %rax,%rcx
shlq $32,%r8
mulq %r13
shrq $32,%rcx
addq %r8,%r9
adcq %rcx,%r10
adcq %rax,%r11
movq %r9,%rax
adcq $0,%rdx
movq %r9,%rcx
shlq $32,%r9
movq %rdx,%r8
mulq %r13
shrq $32,%rcx
addq %r9,%r10
adcq %rcx,%r11
adcq %rax,%r8
movq %r10,%rax
adcq $0,%rdx
movq %r10,%rcx
shlq $32,%r10
movq %rdx,%r9
mulq %r13
shrq $32,%rcx
addq %r10,%r11
adcq %rcx,%r8
adcq %rax,%r9
movq %r11,%rax
adcq $0,%rdx
movq %r11,%rcx
shlq $32,%r11
movq %rdx,%r10
mulq %r13
shrq $32,%rcx
addq %r11,%r8
adcq %rcx,%r9
movq %r8,%rcx
adcq %rax,%r10
movq %r9,%rsi
adcq $0,%rdx
subq $-1,%r8
movq %r10,%rax
sbbq %r12,%r9
sbbq $0,%r10
movq %rdx,%r11
sbbq %r13,%rdx
sbbq %r13,%r13
cmovnzq %rcx,%r8
cmovnzq %rsi,%r9
movq %r8,0(%rdi)
cmovnzq %rax,%r10
movq %r9,8(%rdi)
cmovzq %rdx,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
popq %r13
popq %r12
.byte 0xf3,0xc3
.size ecp_nistz256_from_mont,.-ecp_nistz256_from_mont
.globl ecp_nistz256_select_w5 .globl ecp_nistz256_select_w5
.hidden ecp_nistz256_select_w5 .hidden ecp_nistz256_select_w5
.type ecp_nistz256_select_w5,@function .type ecp_nistz256_select_w5,@function
.align 32 .align 32
ecp_nistz256_select_w5: ecp_nistz256_select_w5:
leaq OPENSSL_ia32cap_P(%rip),%rax
movq 8(%rax),%rax
testl $32,%eax
jnz .Lavx2_select_w5
movdqa .LOne(%rip),%xmm0 movdqa .LOne(%rip),%xmm0
movd %edx,%xmm1 movd %edx,%xmm1
@ -713,6 +581,10 @@ ecp_nistz256_select_w5:
.type ecp_nistz256_select_w7,@function .type ecp_nistz256_select_w7,@function
.align 32 .align 32
ecp_nistz256_select_w7: ecp_nistz256_select_w7:
leaq OPENSSL_ia32cap_P(%rip),%rax
movq 8(%rax),%rax
testl $32,%eax
jnz .Lavx2_select_w7
movdqa .LOne(%rip),%xmm8 movdqa .LOne(%rip),%xmm8
movd %edx,%xmm1 movd %edx,%xmm1
@ -754,24 +626,155 @@ ecp_nistz256_select_w7:
movdqu %xmm5,48(%rdi) movdqu %xmm5,48(%rdi)
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7 .size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
.type ecp_nistz256_avx2_select_w5,@function
.align 32
ecp_nistz256_avx2_select_w5:
.Lavx2_select_w5:
vzeroupper
vmovdqa .LTwo(%rip),%ymm0
vpxor %ymm2,%ymm2,%ymm2
vpxor %ymm3,%ymm3,%ymm3
vpxor %ymm4,%ymm4,%ymm4
vmovdqa .LOne(%rip),%ymm5
vmovdqa .LTwo(%rip),%ymm10
vmovd %edx,%xmm1
vpermd %ymm1,%ymm2,%ymm1
movq $8,%rax
.Lselect_loop_avx2_w5:
vmovdqa 0(%rsi),%ymm6
vmovdqa 32(%rsi),%ymm7
vmovdqa 64(%rsi),%ymm8
vmovdqa 96(%rsi),%ymm11
vmovdqa 128(%rsi),%ymm12
vmovdqa 160(%rsi),%ymm13
vpcmpeqd %ymm1,%ymm5,%ymm9
vpcmpeqd %ymm1,%ymm10,%ymm14
vpaddd %ymm0,%ymm5,%ymm5
vpaddd %ymm0,%ymm10,%ymm10
leaq 192(%rsi),%rsi
vpand %ymm9,%ymm6,%ymm6
vpand %ymm9,%ymm7,%ymm7
vpand %ymm9,%ymm8,%ymm8
vpand %ymm14,%ymm11,%ymm11
vpand %ymm14,%ymm12,%ymm12
vpand %ymm14,%ymm13,%ymm13
vpxor %ymm6,%ymm2,%ymm2
vpxor %ymm7,%ymm3,%ymm3
vpxor %ymm8,%ymm4,%ymm4
vpxor %ymm11,%ymm2,%ymm2
vpxor %ymm12,%ymm3,%ymm3
vpxor %ymm13,%ymm4,%ymm4
decq %rax
jnz .Lselect_loop_avx2_w5
vmovdqu %ymm2,0(%rdi)
vmovdqu %ymm3,32(%rdi)
vmovdqu %ymm4,64(%rdi)
vzeroupper
.byte 0xf3,0xc3
.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
.globl ecp_nistz256_avx2_select_w7 .globl ecp_nistz256_avx2_select_w7
.hidden ecp_nistz256_avx2_select_w7 .hidden ecp_nistz256_avx2_select_w7
.type ecp_nistz256_avx2_select_w7,@function .type ecp_nistz256_avx2_select_w7,@function
.align 32 .align 32
ecp_nistz256_avx2_select_w7: ecp_nistz256_avx2_select_w7:
.byte 0x0f,0x0b .Lavx2_select_w7:
vzeroupper
vmovdqa .LThree(%rip),%ymm0
vpxor %ymm2,%ymm2,%ymm2
vpxor %ymm3,%ymm3,%ymm3
vmovdqa .LOne(%rip),%ymm4
vmovdqa .LTwo(%rip),%ymm8
vmovdqa .LThree(%rip),%ymm12
vmovd %edx,%xmm1
vpermd %ymm1,%ymm2,%ymm1
movq $21,%rax
.Lselect_loop_avx2_w7:
vmovdqa 0(%rsi),%ymm5
vmovdqa 32(%rsi),%ymm6
vmovdqa 64(%rsi),%ymm9
vmovdqa 96(%rsi),%ymm10
vmovdqa 128(%rsi),%ymm13
vmovdqa 160(%rsi),%ymm14
vpcmpeqd %ymm1,%ymm4,%ymm7
vpcmpeqd %ymm1,%ymm8,%ymm11
vpcmpeqd %ymm1,%ymm12,%ymm15
vpaddd %ymm0,%ymm4,%ymm4
vpaddd %ymm0,%ymm8,%ymm8
vpaddd %ymm0,%ymm12,%ymm12
leaq 192(%rsi),%rsi
vpand %ymm7,%ymm5,%ymm5
vpand %ymm7,%ymm6,%ymm6
vpand %ymm11,%ymm9,%ymm9
vpand %ymm11,%ymm10,%ymm10
vpand %ymm15,%ymm13,%ymm13
vpand %ymm15,%ymm14,%ymm14
vpxor %ymm5,%ymm2,%ymm2
vpxor %ymm6,%ymm3,%ymm3
vpxor %ymm9,%ymm2,%ymm2
vpxor %ymm10,%ymm3,%ymm3
vpxor %ymm13,%ymm2,%ymm2
vpxor %ymm14,%ymm3,%ymm3
decq %rax
jnz .Lselect_loop_avx2_w7
vmovdqa 0(%rsi),%ymm5
vmovdqa 32(%rsi),%ymm6
vpcmpeqd %ymm1,%ymm4,%ymm7
vpand %ymm7,%ymm5,%ymm5
vpand %ymm7,%ymm6,%ymm6
vpxor %ymm5,%ymm2,%ymm2
vpxor %ymm6,%ymm3,%ymm3
vmovdqu %ymm2,0(%rdi)
vmovdqu %ymm3,32(%rdi)
vzeroupper
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 .size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
.type __ecp_nistz256_add_toq,@function .type __ecp_nistz256_add_toq,@function
.align 32 .align 32
__ecp_nistz256_add_toq: __ecp_nistz256_add_toq:
xorq %r11,%r11
addq 0(%rbx),%r12 addq 0(%rbx),%r12
adcq 8(%rbx),%r13 adcq 8(%rbx),%r13
movq %r12,%rax movq %r12,%rax
adcq 16(%rbx),%r8 adcq 16(%rbx),%r8
adcq 24(%rbx),%r9 adcq 24(%rbx),%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -779,14 +782,14 @@ __ecp_nistz256_add_toq:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovzq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -854,13 +857,14 @@ __ecp_nistz256_subq:
.type __ecp_nistz256_mul_by_2q,@function .type __ecp_nistz256_mul_by_2q,@function
.align 32 .align 32
__ecp_nistz256_mul_by_2q: __ecp_nistz256_mul_by_2q:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
adcq %r13,%r13 adcq %r13,%r13
movq %r12,%rax movq %r12,%rax
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -868,14 +872,14 @@ __ecp_nistz256_mul_by_2q:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovzq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -1107,16 +1111,14 @@ ecp_nistz256_point_add:
movq %rdx,%rsi movq %rdx,%rsi
movdqa %xmm0,384(%rsp) movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp) movdqa %xmm1,384+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,416(%rsp) movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp) movdqa %xmm3,416+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,448(%rsp) movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp) movdqa %xmm5,448+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0 movdqu 0(%rsi),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1 movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2 movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -1128,14 +1130,14 @@ ecp_nistz256_point_add:
movdqa %xmm0,480(%rsp) movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4 pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp) movdqa %xmm1,480+16(%rsp)
por %xmm0,%xmm1 movdqu 64(%rsi),%xmm0
.byte 102,72,15,110,199 movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp) movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp) movdqa %xmm3,512+16(%rsp)
por %xmm2,%xmm3
por %xmm4,%xmm5 por %xmm4,%xmm5
pxor %xmm4,%xmm4 pxor %xmm4,%xmm4
por %xmm1,%xmm3 por %xmm0,%xmm1
.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp) movq %rax,544+0(%rsp)
@ -1146,8 +1148,8 @@ ecp_nistz256_point_add:
call __ecp_nistz256_sqr_montq call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5 pcmpeqd %xmm4,%xmm5
pshufd $0xb1,%xmm3,%xmm4 pshufd $0xb1,%xmm1,%xmm4
por %xmm3,%xmm4 por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5 pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3 pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4 por %xmm3,%xmm4
@ -1330,6 +1332,7 @@ ecp_nistz256_point_add:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 96(%rsp),%rsi leaq 96(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -1337,7 +1340,7 @@ ecp_nistz256_point_add:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1345,15 +1348,15 @@ ecp_nistz256_point_add:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subq call __ecp_nistz256_subq
@ -1508,16 +1511,14 @@ ecp_nistz256_point_add_affine:
movq 64+24(%rsi),%r8 movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp) movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp) movdqa %xmm1,320+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,352(%rsp) movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp) movdqa %xmm3,352+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,384(%rsp) movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp) movdqa %xmm5,384+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0 movdqu 0(%rbx),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1 movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2 movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -1635,6 +1636,7 @@ ecp_nistz256_point_add_affine:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 192(%rsp),%rsi leaq 192(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -1642,7 +1644,7 @@ ecp_nistz256_point_add_affine:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1650,15 +1652,15 @@ ecp_nistz256_point_add_affine:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subq call __ecp_nistz256_subq

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
.extern OPENSSL_ia32cap_P .extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P .hidden OPENSSL_ia32cap_P
@ -8,9 +8,10 @@
.type sha1_block_data_order,@function .type sha1_block_data_order,@function
.align 16 .align 16
sha1_block_data_order: sha1_block_data_order:
movl OPENSSL_ia32cap_P+0(%rip),%r9d leaq OPENSSL_ia32cap_P(%rip),%r10
movl OPENSSL_ia32cap_P+4(%rip),%r8d movl 0(%r10),%r9d
movl OPENSSL_ia32cap_P+8(%rip),%r10d movl 4(%r10),%r8d
movl 8(%r10),%r10d
testl $512,%r8d testl $512,%r8d
jz .Lialu jz .Lialu
andl $268435456,%r8d andl $268435456,%r8d
@ -1241,14 +1242,13 @@ sha1_block_data_order:
.align 16 .align 16
sha1_block_data_order_ssse3: sha1_block_data_order_ssse3:
_ssse3_shortcut: _ssse3_shortcut:
movq %rsp,%rax movq %rsp,%r11
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
leaq -64(%rsp),%rsp leaq -64(%rsp),%rsp
movq %rax,%r14
andq $-64,%rsp andq $-64,%rsp
movq %rdi,%r8 movq %rdi,%r8
movq %rsi,%r9 movq %rsi,%r9
@ -1256,7 +1256,7 @@ _ssse3_shortcut:
shlq $6,%r10 shlq $6,%r10
addq %r9,%r10 addq %r9,%r10
leaq K_XX_XX+64(%rip),%r11 leaq K_XX_XX+64(%rip),%r14
movl 0(%r8),%eax movl 0(%r8),%eax
movl 4(%r8),%ebx movl 4(%r8),%ebx
@ -1268,8 +1268,8 @@ _ssse3_shortcut:
xorl %edx,%edi xorl %edx,%edi
andl %edi,%esi andl %edi,%esi
movdqa 64(%r11),%xmm6 movdqa 64(%r14),%xmm6
movdqa -64(%r11),%xmm9 movdqa -64(%r14),%xmm9
movdqu 0(%r9),%xmm0 movdqu 0(%r9),%xmm0
movdqu 16(%r9),%xmm1 movdqu 16(%r9),%xmm1
movdqu 32(%r9),%xmm2 movdqu 32(%r9),%xmm2
@ -1345,7 +1345,7 @@ _ssse3_shortcut:
pslld $2,%xmm9 pslld $2,%xmm9
pxor %xmm10,%xmm4 pxor %xmm10,%xmm4
xorl %ebp,%edx xorl %ebp,%edx
movdqa -64(%r11),%xmm10 movdqa -64(%r14),%xmm10
roll $5,%ecx roll $5,%ecx
addl %edi,%ebx addl %edi,%ebx
andl %edx,%esi andl %edx,%esi
@ -1406,7 +1406,7 @@ _ssse3_shortcut:
pslld $2,%xmm10 pslld $2,%xmm10
pxor %xmm8,%xmm5 pxor %xmm8,%xmm5
xorl %eax,%ebp xorl %eax,%ebp
movdqa -32(%r11),%xmm8 movdqa -32(%r14),%xmm8
roll $5,%edx roll $5,%edx
addl %edi,%ecx addl %edi,%ecx
andl %ebp,%esi andl %ebp,%esi
@ -1467,7 +1467,7 @@ _ssse3_shortcut:
pslld $2,%xmm8 pslld $2,%xmm8
pxor %xmm9,%xmm6 pxor %xmm9,%xmm6
xorl %ebx,%eax xorl %ebx,%eax
movdqa -32(%r11),%xmm9 movdqa -32(%r14),%xmm9
roll $5,%ebp roll $5,%ebp
addl %edi,%edx addl %edi,%edx
andl %eax,%esi andl %eax,%esi
@ -1528,7 +1528,7 @@ _ssse3_shortcut:
pslld $2,%xmm9 pslld $2,%xmm9
pxor %xmm10,%xmm7 pxor %xmm10,%xmm7
xorl %ecx,%ebx xorl %ecx,%ebx
movdqa -32(%r11),%xmm10 movdqa -32(%r14),%xmm10
roll $5,%eax roll $5,%eax
addl %edi,%ebp addl %edi,%ebp
andl %ebx,%esi andl %ebx,%esi
@ -1639,7 +1639,7 @@ _ssse3_shortcut:
pxor %xmm3,%xmm2 pxor %xmm3,%xmm2
addl %esi,%eax addl %esi,%eax
xorl %edx,%edi xorl %edx,%edi
movdqa 0(%r11),%xmm10 movdqa 0(%r14),%xmm10
rorl $7,%ecx rorl $7,%ecx
paddd %xmm1,%xmm9 paddd %xmm1,%xmm9
addl %ebx,%eax addl %ebx,%eax
@ -1874,7 +1874,7 @@ _ssse3_shortcut:
pxor %xmm0,%xmm7 pxor %xmm0,%xmm7
roll $5,%ebx roll $5,%ebx
addl %esi,%eax addl %esi,%eax
movdqa 32(%r11),%xmm9 movdqa 32(%r14),%xmm9
xorl %ecx,%edi xorl %ecx,%edi
paddd %xmm6,%xmm8 paddd %xmm6,%xmm8
xorl %edx,%ecx xorl %edx,%ecx
@ -2165,8 +2165,8 @@ _ssse3_shortcut:
addl %edx,%ecx addl %edx,%ecx
cmpq %r10,%r9 cmpq %r10,%r9
je .Ldone_ssse3 je .Ldone_ssse3
movdqa 64(%r11),%xmm6 movdqa 64(%r14),%xmm6
movdqa -64(%r11),%xmm9 movdqa -64(%r14),%xmm9
movdqu 0(%r9),%xmm0 movdqu 0(%r9),%xmm0
movdqu 16(%r9),%xmm1 movdqu 16(%r9),%xmm1
movdqu 32(%r9),%xmm2 movdqu 32(%r9),%xmm2
@ -2403,13 +2403,12 @@ _ssse3_shortcut:
movl %ecx,8(%r8) movl %ecx,8(%r8)
movl %edx,12(%r8) movl %edx,12(%r8)
movl %ebp,16(%r8) movl %ebp,16(%r8)
leaq (%r14),%rsi movq -40(%r11),%r14
movq -40(%rsi),%r14 movq -32(%r11),%r13
movq -32(%rsi),%r13 movq -24(%r11),%r12
movq -24(%rsi),%r12 movq -16(%r11),%rbp
movq -16(%rsi),%rbp movq -8(%r11),%rbx
movq -8(%rsi),%rbx leaq (%r11),%rsp
leaq (%rsi),%rsp
.Lepilogue_ssse3: .Lepilogue_ssse3:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 .size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
@ -2417,7 +2416,7 @@ _ssse3_shortcut:
.align 16 .align 16
sha1_block_data_order_avx: sha1_block_data_order_avx:
_avx_shortcut: _avx_shortcut:
movq %rsp,%rax movq %rsp,%r11
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
@ -2425,7 +2424,6 @@ _avx_shortcut:
pushq %r14 pushq %r14
leaq -64(%rsp),%rsp leaq -64(%rsp),%rsp
vzeroupper vzeroupper
movq %rax,%r14
andq $-64,%rsp andq $-64,%rsp
movq %rdi,%r8 movq %rdi,%r8
movq %rsi,%r9 movq %rsi,%r9
@ -2433,7 +2431,7 @@ _avx_shortcut:
shlq $6,%r10 shlq $6,%r10
addq %r9,%r10 addq %r9,%r10
leaq K_XX_XX+64(%rip),%r11 leaq K_XX_XX+64(%rip),%r14
movl 0(%r8),%eax movl 0(%r8),%eax
movl 4(%r8),%ebx movl 4(%r8),%ebx
@ -2445,8 +2443,8 @@ _avx_shortcut:
xorl %edx,%edi xorl %edx,%edi
andl %edi,%esi andl %edi,%esi
vmovdqa 64(%r11),%xmm6 vmovdqa 64(%r14),%xmm6
vmovdqa -64(%r11),%xmm11 vmovdqa -64(%r14),%xmm11
vmovdqu 0(%r9),%xmm0 vmovdqu 0(%r9),%xmm0
vmovdqu 16(%r9),%xmm1 vmovdqu 16(%r9),%xmm1
vmovdqu 32(%r9),%xmm2 vmovdqu 32(%r9),%xmm2
@ -2571,7 +2569,7 @@ _avx_shortcut:
vpxor %xmm10,%xmm5,%xmm5 vpxor %xmm10,%xmm5,%xmm5
xorl %eax,%ebp xorl %eax,%ebp
shldl $5,%edx,%edx shldl $5,%edx,%edx
vmovdqa -32(%r11),%xmm11 vmovdqa -32(%r14),%xmm11
addl %edi,%ecx addl %edi,%ecx
andl %ebp,%esi andl %ebp,%esi
xorl %eax,%ebp xorl %eax,%ebp
@ -2784,7 +2782,7 @@ _avx_shortcut:
addl %esi,%eax addl %esi,%eax
xorl %edx,%edi xorl %edx,%edi
vpaddd %xmm1,%xmm11,%xmm9 vpaddd %xmm1,%xmm11,%xmm9
vmovdqa 0(%r11),%xmm11 vmovdqa 0(%r14),%xmm11
shrdl $7,%ecx,%ecx shrdl $7,%ecx,%ecx
addl %ebx,%eax addl %ebx,%eax
vpxor %xmm8,%xmm2,%xmm2 vpxor %xmm8,%xmm2,%xmm2
@ -3003,7 +3001,7 @@ _avx_shortcut:
movl %ebx,%edi movl %ebx,%edi
xorl %edx,%esi xorl %edx,%esi
vpaddd %xmm6,%xmm11,%xmm9 vpaddd %xmm6,%xmm11,%xmm9
vmovdqa 32(%r11),%xmm11 vmovdqa 32(%r14),%xmm11
shldl $5,%ebx,%ebx shldl $5,%ebx,%ebx
addl %esi,%eax addl %esi,%eax
vpxor %xmm8,%xmm7,%xmm7 vpxor %xmm8,%xmm7,%xmm7
@ -3282,8 +3280,8 @@ _avx_shortcut:
addl %edx,%ecx addl %edx,%ecx
cmpq %r10,%r9 cmpq %r10,%r9
je .Ldone_avx je .Ldone_avx
vmovdqa 64(%r11),%xmm6 vmovdqa 64(%r14),%xmm6
vmovdqa -64(%r11),%xmm11 vmovdqa -64(%r14),%xmm11
vmovdqu 0(%r9),%xmm0 vmovdqu 0(%r9),%xmm0
vmovdqu 16(%r9),%xmm1 vmovdqu 16(%r9),%xmm1
vmovdqu 32(%r9),%xmm2 vmovdqu 32(%r9),%xmm2
@ -3519,13 +3517,12 @@ _avx_shortcut:
movl %ecx,8(%r8) movl %ecx,8(%r8)
movl %edx,12(%r8) movl %edx,12(%r8)
movl %ebp,16(%r8) movl %ebp,16(%r8)
leaq (%r14),%rsi movq -40(%r11),%r14
movq -40(%rsi),%r14 movq -32(%r11),%r13
movq -32(%rsi),%r13 movq -24(%r11),%r12
movq -24(%rsi),%r12 movq -16(%r11),%rbp
movq -16(%rsi),%rbp movq -8(%r11),%rbx
movq -8(%rsi),%rbx leaq (%r11),%rsp
leaq (%rsi),%rsp
.Lepilogue_avx: .Lepilogue_avx:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size sha1_block_data_order_avx,.-sha1_block_data_order_avx .size sha1_block_data_order_avx,.-sha1_block_data_order_avx

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
.extern OPENSSL_ia32cap_P .extern OPENSSL_ia32cap_P
@ -19,13 +19,13 @@ sha256_block_data_order:
je .Lavx_shortcut je .Lavx_shortcut
testl $512,%r10d testl $512,%r10d
jnz .Lssse3_shortcut jnz .Lssse3_shortcut
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movq %rsp,%r11
shlq $4,%rdx shlq $4,%rdx
subq $64+32,%rsp subq $64+32,%rsp
leaq (%rsi,%rdx,4),%rdx leaq (%rsi,%rdx,4),%rdx
@ -33,7 +33,7 @@ sha256_block_data_order:
movq %rdi,64+0(%rsp) movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp) movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp) movq %rdx,64+16(%rsp)
movq %r11,64+24(%rsp) movq %rax,64+24(%rsp)
.Lprologue: .Lprologue:
movl 0(%rdi),%eax movl 0(%rdi),%eax
@ -1698,13 +1698,13 @@ sha256_block_data_order:
jb .Lloop jb .Lloop
movq 64+24(%rsp),%rsi movq 64+24(%rsp),%rsi
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
.Lepilogue: .Lepilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size sha256_block_data_order,.-sha256_block_data_order .size sha256_block_data_order,.-sha256_block_data_order
@ -1755,13 +1755,13 @@ K256:
.align 64 .align 64
sha256_block_data_order_ssse3: sha256_block_data_order_ssse3:
.Lssse3_shortcut: .Lssse3_shortcut:
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movq %rsp,%r11
shlq $4,%rdx shlq $4,%rdx
subq $96,%rsp subq $96,%rsp
leaq (%rsi,%rdx,4),%rdx leaq (%rsi,%rdx,4),%rdx
@ -1769,7 +1769,7 @@ sha256_block_data_order_ssse3:
movq %rdi,64+0(%rsp) movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp) movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp) movq %rdx,64+16(%rsp)
movq %r11,64+24(%rsp) movq %rax,64+24(%rsp)
.Lprologue_ssse3: .Lprologue_ssse3:
movl 0(%rdi),%eax movl 0(%rdi),%eax
@ -2836,13 +2836,13 @@ sha256_block_data_order_ssse3:
jb .Lloop_ssse3 jb .Lloop_ssse3
movq 64+24(%rsp),%rsi movq 64+24(%rsp),%rsi
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
.Lepilogue_ssse3: .Lepilogue_ssse3:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3 .size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3
@ -2850,13 +2850,13 @@ sha256_block_data_order_ssse3:
.align 64 .align 64
sha256_block_data_order_avx: sha256_block_data_order_avx:
.Lavx_shortcut: .Lavx_shortcut:
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movq %rsp,%r11
shlq $4,%rdx shlq $4,%rdx
subq $96,%rsp subq $96,%rsp
leaq (%rsi,%rdx,4),%rdx leaq (%rsi,%rdx,4),%rdx
@ -2864,7 +2864,7 @@ sha256_block_data_order_avx:
movq %rdi,64+0(%rsp) movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp) movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp) movq %rdx,64+16(%rsp)
movq %r11,64+24(%rsp) movq %rax,64+24(%rsp)
.Lprologue_avx: .Lprologue_avx:
vzeroupper vzeroupper
@ -3893,13 +3893,13 @@ sha256_block_data_order_avx:
movq 64+24(%rsp),%rsi movq 64+24(%rsp),%rsi
vzeroupper vzeroupper
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
.Lepilogue_avx: .Lepilogue_avx:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size sha256_block_data_order_avx,.-sha256_block_data_order_avx .size sha256_block_data_order_avx,.-sha256_block_data_order_avx

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
.extern OPENSSL_ia32cap_P .extern OPENSSL_ia32cap_P
@ -19,13 +19,13 @@ sha512_block_data_order:
orl %r9d,%r10d orl %r9d,%r10d
cmpl $1342177792,%r10d cmpl $1342177792,%r10d
je .Lavx_shortcut je .Lavx_shortcut
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movq %rsp,%r11
shlq $4,%rdx shlq $4,%rdx
subq $128+32,%rsp subq $128+32,%rsp
leaq (%rsi,%rdx,8),%rdx leaq (%rsi,%rdx,8),%rdx
@ -33,7 +33,7 @@ sha512_block_data_order:
movq %rdi,128+0(%rsp) movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp) movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp) movq %rdx,128+16(%rsp)
movq %r11,128+24(%rsp) movq %rax,128+24(%rsp)
.Lprologue: .Lprologue:
movq 0(%rdi),%rax movq 0(%rdi),%rax
@ -1698,13 +1698,13 @@ sha512_block_data_order:
jb .Lloop jb .Lloop
movq 128+24(%rsp),%rsi movq 128+24(%rsp),%rsi
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
.Lepilogue: .Lepilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size sha512_block_data_order,.-sha512_block_data_order .size sha512_block_data_order,.-sha512_block_data_order
@ -1799,13 +1799,13 @@ K512:
.align 64 .align 64
sha512_block_data_order_xop: sha512_block_data_order_xop:
.Lxop_shortcut: .Lxop_shortcut:
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movq %rsp,%r11
shlq $4,%rdx shlq $4,%rdx
subq $160,%rsp subq $160,%rsp
leaq (%rsi,%rdx,8),%rdx leaq (%rsi,%rdx,8),%rdx
@ -1813,7 +1813,7 @@ sha512_block_data_order_xop:
movq %rdi,128+0(%rsp) movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp) movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp) movq %rdx,128+16(%rsp)
movq %r11,128+24(%rsp) movq %rax,128+24(%rsp)
.Lprologue_xop: .Lprologue_xop:
vzeroupper vzeroupper
@ -2868,13 +2868,13 @@ sha512_block_data_order_xop:
movq 128+24(%rsp),%rsi movq 128+24(%rsp),%rsi
vzeroupper vzeroupper
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
.Lepilogue_xop: .Lepilogue_xop:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size sha512_block_data_order_xop,.-sha512_block_data_order_xop .size sha512_block_data_order_xop,.-sha512_block_data_order_xop
@ -2882,13 +2882,13 @@ sha512_block_data_order_xop:
.align 64 .align 64
sha512_block_data_order_avx: sha512_block_data_order_avx:
.Lavx_shortcut: .Lavx_shortcut:
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movq %rsp,%r11
shlq $4,%rdx shlq $4,%rdx
subq $160,%rsp subq $160,%rsp
leaq (%rsi,%rdx,8),%rdx leaq (%rsi,%rdx,8),%rdx
@ -2896,7 +2896,7 @@ sha512_block_data_order_avx:
movq %rdi,128+0(%rsp) movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp) movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp) movq %rdx,128+16(%rsp)
movq %r11,128+24(%rsp) movq %rax,128+24(%rsp)
.Lprologue_avx: .Lprologue_avx:
vzeroupper vzeroupper
@ -4015,13 +4015,13 @@ sha512_block_data_order_avx:
movq 128+24(%rsp),%rsi movq 128+24(%rsp),%rsi
vzeroupper vzeroupper
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
.Lepilogue_avx: .Lepilogue_avx:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.size sha512_block_data_order_avx,.-sha512_block_data_order_avx .size sha512_block_data_order_avx,.-sha512_block_data_order_avx

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
.extern OPENSSL_ia32cap_P .extern OPENSSL_ia32cap_P
@ -9,6 +9,10 @@
.type bn_mul_mont,@function .type bn_mul_mont,@function
.align 16 .align 16
bn_mul_mont: bn_mul_mont:
.cfi_startproc
movl %r9d,%r9d
movq %rsp,%rax
.cfi_def_cfa_register %rax
testl $3,%r9d testl $3,%r9d
jnz .Lmul_enter jnz .Lmul_enter
cmpl $8,%r9d cmpl $8,%r9d
@ -22,20 +26,50 @@ bn_mul_mont:
.align 16 .align 16
.Lmul_enter: .Lmul_enter:
pushq %rbx pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp pushq %rbp
.cfi_offset %rbp,-24
pushq %r12 pushq %r12
.cfi_offset %r12,-32
pushq %r13 pushq %r13
.cfi_offset %r13,-40
pushq %r14 pushq %r14
.cfi_offset %r14,-48
pushq %r15 pushq %r15
.cfi_offset %r15,-56
movl %r9d,%r9d negq %r9
leaq 2(%r9),%r10
movq %rsp,%r11 movq %rsp,%r11
negq %r10 leaq -16(%rsp,%r9,8),%r10
leaq (%rsp,%r10,8),%rsp negq %r9
andq $-1024,%rsp andq $-1024,%r10
movq %r11,8(%rsp,%r9,8)
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
jmp .Lmul_page_walk_done
.align 16
.Lmul_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
.Lmul_page_walk_done:
movq %rax,8(%rsp,%r9,8)
.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
.Lmul_body: .Lmul_body:
movq %rdx,%r12 movq %rdx,%r12
movq (%r8),%r8 movq (%r8),%r8
@ -178,7 +212,8 @@ bn_mul_mont:
movq %r9,%r15 movq %r9,%r15
jmp .Lsub jmp .Lsub
.align 16 .align 16
.Lsub: sbbq (%rcx,%r14,8),%rax .Lsub:
sbbq (%rcx,%r14,8),%rax
movq %rax,(%rdi,%r14,8) movq %rax,(%rdi,%r14,8)
movq 8(%rsi,%r14,8),%rax movq 8(%rsi,%r14,8),%rax
leaq 1(%r14),%r14 leaq 1(%r14),%r14
@ -187,51 +222,86 @@ bn_mul_mont:
sbbq $0,%rax sbbq $0,%rax
xorq %r14,%r14 xorq %r14,%r14
andq %rax,%rsi
notq %rax
movq %rdi,%rcx
andq %rax,%rcx
movq %r9,%r15 movq %r9,%r15
orq %rcx,%rsi
.align 16 .align 16
.Lcopy: .Lcopy:
movq (%rsp,%r14,8),%rsi movq (%rsi,%r14,8),%rax
movq (%rdi,%r14,8),%rcx
xorq %rcx,%rsi
andq %rax,%rsi
xorq %rcx,%rsi
movq %r14,(%rsp,%r14,8) movq %r14,(%rsp,%r14,8)
movq %rsi,(%rdi,%r14,8) movq %rax,(%rdi,%r14,8)
leaq 1(%r14),%r14 leaq 1(%r14),%r14
subq $1,%r15 subq $1,%r15
jnz .Lcopy jnz .Lcopy
movq 8(%rsp,%r9,8),%rsi movq 8(%rsp,%r9,8),%rsi
.cfi_def_cfa %rsi,8
movq $1,%rax movq $1,%rax
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 .cfi_restore %r15
movq 16(%rsi),%r13 movq -40(%rsi),%r14
movq 24(%rsi),%r12 .cfi_restore %r14
movq 32(%rsi),%rbp movq -32(%rsi),%r13
movq 40(%rsi),%rbx .cfi_restore %r13
leaq 48(%rsi),%rsp movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul_epilogue: .Lmul_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.cfi_endproc
.size bn_mul_mont,.-bn_mul_mont .size bn_mul_mont,.-bn_mul_mont
.type bn_mul4x_mont,@function .type bn_mul4x_mont,@function
.align 16 .align 16
bn_mul4x_mont: bn_mul4x_mont:
.cfi_startproc
movl %r9d,%r9d
movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter: .Lmul4x_enter:
pushq %rbx pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp pushq %rbp
.cfi_offset %rbp,-24
pushq %r12 pushq %r12
.cfi_offset %r12,-32
pushq %r13 pushq %r13
.cfi_offset %r13,-40
pushq %r14 pushq %r14
.cfi_offset %r14,-48
pushq %r15 pushq %r15
.cfi_offset %r15,-56
movl %r9d,%r9d negq %r9
leaq 4(%r9),%r10
movq %rsp,%r11 movq %rsp,%r11
negq %r10 leaq -32(%rsp,%r9,8),%r10
leaq (%rsp,%r10,8),%rsp negq %r9
andq $-1024,%rsp andq $-1024,%r10
movq %r11,8(%rsp,%r9,8) subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul4x_page_walk
jmp .Lmul4x_page_walk_done
.Lmul4x_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul4x_page_walk
.Lmul4x_page_walk_done:
movq %rax,8(%rsp,%r9,8)
.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
.Lmul4x_body: .Lmul4x_body:
movq %rdi,16(%rsp,%r9,8) movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12 movq %rdx,%r12
@ -531,9 +601,11 @@ bn_mul4x_mont:
cmpq %r9,%r14 cmpq %r9,%r14
jb .Louter4x jb .Louter4x
movq 16(%rsp,%r9,8),%rdi movq 16(%rsp,%r9,8),%rdi
leaq -4(%r9),%r15
movq 0(%rsp),%rax movq 0(%rsp),%rax
pxor %xmm0,%xmm0
movq 8(%rsp),%rdx movq 8(%rsp),%rdx
shrq $2,%r9 shrq $2,%r15
leaq (%rsp),%rsi leaq (%rsp),%rsi
xorq %r14,%r14 xorq %r14,%r14
@ -541,7 +613,6 @@ bn_mul4x_mont:
movq 16(%rsi),%rbx movq 16(%rsi),%rbx
movq 24(%rsi),%rbp movq 24(%rsi),%rbp
sbbq 8(%rcx),%rdx sbbq 8(%rcx),%rdx
leaq -1(%r9),%r15
jmp .Lsub4x jmp .Lsub4x
.align 16 .align 16
.Lsub4x: .Lsub4x:
@ -569,47 +640,55 @@ bn_mul4x_mont:
movq %rbx,16(%rdi,%r14,8) movq %rbx,16(%rdi,%r14,8)
sbbq $0,%rax sbbq $0,%rax
movq %rax,%xmm0
punpcklqdq %xmm0,%xmm0
movq %rbp,24(%rdi,%r14,8) movq %rbp,24(%rdi,%r14,8)
xorq %r14,%r14 xorq %r14,%r14
andq %rax,%rsi
notq %rax
movq %rdi,%rcx
andq %rax,%rcx
leaq -4(%r9),%r15
orq %rcx,%rsi
shrq $2,%r15
movq %r9,%r15 movdqu (%rsi),%xmm1
pxor %xmm5,%xmm5 movdqa %xmm0,(%rsp)
movdqu %xmm1,(%rdi)
jmp .Lcopy4x jmp .Lcopy4x
.align 16 .align 16
.Lcopy4x: .Lcopy4x:
movdqu (%rsp,%r14,1),%xmm2 movdqu 16(%rsi,%r14,1),%xmm2
movdqu 16(%rsp,%r14,1),%xmm4 movdqu 32(%rsi,%r14,1),%xmm1
movdqu (%rdi,%r14,1),%xmm1 movdqa %xmm0,16(%rsp,%r14,1)
movdqu 16(%rdi,%r14,1),%xmm3 movdqu %xmm2,16(%rdi,%r14,1)
pxor %xmm1,%xmm2 movdqa %xmm0,32(%rsp,%r14,1)
pxor %xmm3,%xmm4 movdqu %xmm1,32(%rdi,%r14,1)
pand %xmm0,%xmm2
pand %xmm0,%xmm4
pxor %xmm1,%xmm2
pxor %xmm3,%xmm4
movdqu %xmm2,(%rdi,%r14,1)
movdqu %xmm4,16(%rdi,%r14,1)
movdqa %xmm5,(%rsp,%r14,1)
movdqa %xmm5,16(%rsp,%r14,1)
leaq 32(%r14),%r14 leaq 32(%r14),%r14
decq %r15 decq %r15
jnz .Lcopy4x jnz .Lcopy4x
shlq $2,%r9 movdqu 16(%rsi,%r14,1),%xmm2
movdqa %xmm0,16(%rsp,%r14,1)
movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi movq 8(%rsp,%r9,8),%rsi
.cfi_def_cfa %rsi, 8
movq $1,%rax movq $1,%rax
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 .cfi_restore %r15
movq 16(%rsi),%r13 movq -40(%rsi),%r14
movq 24(%rsi),%r12 .cfi_restore %r14
movq 32(%rsi),%rbp movq -32(%rsi),%r13
movq 40(%rsi),%rbx .cfi_restore %r13
leaq 48(%rsi),%rsp movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul4x_epilogue: .Lmul4x_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.cfi_endproc
.size bn_mul4x_mont,.-bn_mul4x_mont .size bn_mul4x_mont,.-bn_mul4x_mont
.extern bn_sqr8x_internal .extern bn_sqr8x_internal
.hidden bn_sqr8x_internal .hidden bn_sqr8x_internal
@ -617,14 +696,23 @@ bn_mul4x_mont:
.type bn_sqr8x_mont,@function .type bn_sqr8x_mont,@function
.align 32 .align 32
bn_sqr8x_mont: bn_sqr8x_mont:
.Lsqr8x_enter: .cfi_startproc
movq %rsp,%rax movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lsqr8x_enter:
pushq %rbx pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp pushq %rbp
.cfi_offset %rbp,-24
pushq %r12 pushq %r12
.cfi_offset %r12,-32
pushq %r13 pushq %r13
.cfi_offset %r13,-40
pushq %r14 pushq %r14
.cfi_offset %r14,-48
pushq %r15 pushq %r15
.cfi_offset %r15,-56
.Lsqr8x_prologue:
movl %r9d,%r10d movl %r9d,%r10d
shll $3,%r9d shll $3,%r9d
@ -637,30 +725,49 @@ bn_sqr8x_mont:
leaq -64(%rsp,%r9,2),%r11 leaq -64(%rsp,%r9,2),%r11
movq %rsp,%rbp
movq (%r8),%r8 movq (%r8),%r8
subq %rsi,%r11 subq %rsi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb .Lsqr8x_sp_alt jb .Lsqr8x_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -64(%rsp,%r9,2),%rsp leaq -64(%rbp,%r9,2),%rbp
jmp .Lsqr8x_sp_done jmp .Lsqr8x_sp_done
.align 32 .align 32
.Lsqr8x_sp_alt: .Lsqr8x_sp_alt:
leaq 4096-64(,%r9,2),%r10 leaq 4096-64(,%r9,2),%r10
leaq -64(%rsp,%r9,2),%rsp leaq -64(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
.Lsqr8x_sp_done: .Lsqr8x_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lsqr8x_page_walk
jmp .Lsqr8x_page_walk_done
.align 16
.Lsqr8x_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lsqr8x_page_walk
.Lsqr8x_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9
movq %r8,32(%rsp) movq %r8,32(%rsp)
movq %rax,40(%rsp) movq %rax,40(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lsqr8x_body: .Lsqr8x_body:
.byte 102,72,15,110,209 .byte 102,72,15,110,209
@ -707,6 +814,7 @@ bn_sqr8x_mont:
pxor %xmm0,%xmm0 pxor %xmm0,%xmm0
pshufd $0,%xmm1,%xmm1 pshufd $0,%xmm1,%xmm1
movq 40(%rsp),%rsi movq 40(%rsp),%rsi
.cfi_def_cfa %rsi,8
jmp .Lsqr8x_cond_copy jmp .Lsqr8x_cond_copy
.align 32 .align 32
@ -736,14 +844,22 @@ bn_sqr8x_mont:
movq $1,%rax movq $1,%rax
movq -48(%rsi),%r15 movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14 movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13 movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12 movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lsqr8x_epilogue: .Lsqr8x_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.cfi_endproc
.size bn_sqr8x_mont,.-bn_sqr8x_mont .size bn_sqr8x_mont,.-bn_sqr8x_mont
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16 .align 16

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
.extern OPENSSL_ia32cap_P .extern OPENSSL_ia32cap_P
@ -9,30 +9,64 @@
.type bn_mul_mont_gather5,@function .type bn_mul_mont_gather5,@function
.align 64 .align 64
bn_mul_mont_gather5: bn_mul_mont_gather5:
.cfi_startproc
movl %r9d,%r9d
movq %rsp,%rax
.cfi_def_cfa_register %rax
testl $7,%r9d testl $7,%r9d
jnz .Lmul_enter jnz .Lmul_enter
jmp .Lmul4x_enter jmp .Lmul4x_enter
.align 16 .align 16
.Lmul_enter: .Lmul_enter:
movl %r9d,%r9d
movq %rsp,%rax
movd 8(%rsp),%xmm5 movd 8(%rsp),%xmm5
leaq .Linc(%rip),%r10
pushq %rbx pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp pushq %rbp
.cfi_offset %rbp,-24
pushq %r12 pushq %r12
.cfi_offset %r12,-32
pushq %r13 pushq %r13
.cfi_offset %r13,-40
pushq %r14 pushq %r14
.cfi_offset %r14,-48
pushq %r15 pushq %r15
.cfi_offset %r15,-56
leaq 2(%r9),%r11 negq %r9
negq %r11 movq %rsp,%r11
leaq -264(%rsp,%r11,8),%rsp leaq -280(%rsp,%r9,8),%r10
andq $-1024,%rsp negq %r9
andq $-1024,%r10
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
jmp .Lmul_page_walk_done
.Lmul_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja .Lmul_page_walk
.Lmul_page_walk_done:
leaq .Linc(%rip),%r10
movq %rax,8(%rsp,%r9,8) movq %rax,8(%rsp,%r9,8)
.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
.Lmul_body: .Lmul_body:
leaq 128(%rdx),%r12 leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0 movdqa 0(%r10),%xmm0
movdqa 16(%r10),%xmm1 movdqa 16(%r10),%xmm1
@ -362,7 +396,8 @@ bn_mul_mont_gather5:
movq %r9,%r15 movq %r9,%r15
jmp .Lsub jmp .Lsub
.align 16 .align 16
.Lsub: sbbq (%rcx,%r14,8),%rax .Lsub:
sbbq (%rcx,%r14,8),%rax
movq %rax,(%rdi,%r14,8) movq %rax,(%rdi,%r14,8)
movq 8(%rsi,%r14,8),%rax movq 8(%rsi,%r14,8),%rax
leaq 1(%r14),%r14 leaq 1(%r14),%r14
@ -371,45 +406,64 @@ bn_mul_mont_gather5:
sbbq $0,%rax sbbq $0,%rax
xorq %r14,%r14 xorq %r14,%r14
andq %rax,%rsi
notq %rax
movq %rdi,%rcx
andq %rax,%rcx
movq %r9,%r15 movq %r9,%r15
orq %rcx,%rsi
.align 16 .align 16
.Lcopy: .Lcopy:
movq (%rsp,%r14,8),%rsi movq (%rsi,%r14,8),%rax
movq (%rdi,%r14,8),%rcx
xorq %rcx,%rsi
andq %rax,%rsi
xorq %rcx,%rsi
movq %r14,(%rsp,%r14,8) movq %r14,(%rsp,%r14,8)
movq %rsi,(%rdi,%r14,8) movq %rax,(%rdi,%r14,8)
leaq 1(%r14),%r14 leaq 1(%r14),%r14
subq $1,%r15 subq $1,%r15
jnz .Lcopy jnz .Lcopy
movq 8(%rsp,%r9,8),%rsi movq 8(%rsp,%r9,8),%rsi
.cfi_def_cfa %rsi,8
movq $1,%rax movq $1,%rax
movq -48(%rsi),%r15 movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14 movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13 movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12 movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul_epilogue: .Lmul_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.cfi_endproc
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5
.type bn_mul4x_mont_gather5,@function .type bn_mul4x_mont_gather5,@function
.align 32 .align 32
bn_mul4x_mont_gather5: bn_mul4x_mont_gather5:
.Lmul4x_enter: .cfi_startproc
.byte 0x67 .byte 0x67
movq %rsp,%rax movq %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter:
pushq %rbx pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp pushq %rbp
.cfi_offset %rbp,-24
pushq %r12 pushq %r12
.cfi_offset %r12,-32
pushq %r13 pushq %r13
.cfi_offset %r13,-40
pushq %r14 pushq %r14
.cfi_offset %r14,-48
pushq %r15 pushq %r15
.cfi_offset %r15,-56
.Lmul4x_prologue:
.byte 0x67 .byte 0x67
shll $3,%r9d shll $3,%r9d
@ -426,43 +480,70 @@ bn_mul4x_mont_gather5:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb .Lmul4xsp_alt jb .Lmul4xsp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp .Lmul4xsp_done jmp .Lmul4xsp_done
.align 32 .align 32
.Lmul4xsp_alt: .Lmul4xsp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
.Lmul4xsp_done: .Lmul4xsp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmul4x_page_walk
jmp .Lmul4x_page_walk_done
.Lmul4x_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lmul4x_page_walk
.Lmul4x_page_walk_done:
negq %r9 negq %r9
movq %rax,40(%rsp) movq %rax,40(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lmul4x_body: .Lmul4x_body:
call mul4x_internal call mul4x_internal
movq 40(%rsp),%rsi movq 40(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq $1,%rax movq $1,%rax
movq -48(%rsi),%r15 movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14 movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13 movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12 movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul4x_epilogue: .Lmul4x_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.cfi_endproc
.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
.type mul4x_internal,@function .type mul4x_internal,@function
@ -995,13 +1076,22 @@ mul4x_internal:
.type bn_power5,@function .type bn_power5,@function
.align 32 .align 32
bn_power5: bn_power5:
.cfi_startproc
movq %rsp,%rax movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp pushq %rbp
.cfi_offset %rbp,-24
pushq %r12 pushq %r12
.cfi_offset %r12,-32
pushq %r13 pushq %r13
.cfi_offset %r13,-40
pushq %r14 pushq %r14
.cfi_offset %r14,-48
pushq %r15 pushq %r15
.cfi_offset %r15,-56
.Lpower5_prologue:
shll $3,%r9d shll $3,%r9d
leal (%r9,%r9,2),%r10d leal (%r9,%r9,2),%r10d
@ -1016,24 +1106,41 @@ bn_power5:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb .Lpwr_sp_alt jb .Lpwr_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp .Lpwr_sp_done jmp .Lpwr_sp_done
.align 32 .align 32
.Lpwr_sp_alt: .Lpwr_sp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
.Lpwr_sp_done: .Lpwr_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lpwr_page_walk
jmp .Lpwr_page_walk_done
.Lpwr_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lpwr_page_walk
.Lpwr_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9
@ -1048,6 +1155,7 @@ bn_power5:
movq %r8,32(%rsp) movq %r8,32(%rsp)
movq %rax,40(%rsp) movq %rax,40(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lpower5_body: .Lpower5_body:
.byte 102,72,15,110,207 .byte 102,72,15,110,207
.byte 102,72,15,110,209 .byte 102,72,15,110,209
@ -1074,16 +1182,25 @@ bn_power5:
call mul4x_internal call mul4x_internal
movq 40(%rsp),%rsi movq 40(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq $1,%rax movq $1,%rax
movq -48(%rsi),%r15 movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14 movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13 movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12 movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lpower5_epilogue: .Lpower5_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.cfi_endproc
.size bn_power5,.-bn_power5 .size bn_power5,.-bn_power5
.globl bn_sqr8x_internal .globl bn_sqr8x_internal
@ -1826,6 +1943,7 @@ __bn_sqr8x_reduction:
.align 32 .align 32
.L8x_tail_done: .L8x_tail_done:
xorq %rax,%rax
addq (%rdx),%r8 addq (%rdx),%r8
adcq $0,%r9 adcq $0,%r9
adcq $0,%r10 adcq $0,%r10
@ -1834,9 +1952,7 @@ __bn_sqr8x_reduction:
adcq $0,%r13 adcq $0,%r13
adcq $0,%r14 adcq $0,%r14
adcq $0,%r15 adcq $0,%r15
adcq $0,%rax
xorq %rax,%rax
negq %rsi negq %rsi
.L8x_no_tail: .L8x_no_tail:
@ -1937,14 +2053,23 @@ bn_from_montgomery:
.type bn_from_mont8x,@function .type bn_from_mont8x,@function
.align 32 .align 32
bn_from_mont8x: bn_from_mont8x:
.cfi_startproc
.byte 0x67 .byte 0x67
movq %rsp,%rax movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp pushq %rbp
.cfi_offset %rbp,-24
pushq %r12 pushq %r12
.cfi_offset %r12,-32
pushq %r13 pushq %r13
.cfi_offset %r13,-40
pushq %r14 pushq %r14
.cfi_offset %r14,-48
pushq %r15 pushq %r15
.cfi_offset %r15,-56
.Lfrom_prologue:
shll $3,%r9d shll $3,%r9d
leaq (%r9,%r9,2),%r10 leaq (%r9,%r9,2),%r10
@ -1959,24 +2084,41 @@ bn_from_mont8x:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb .Lfrom_sp_alt jb .Lfrom_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp .Lfrom_sp_done jmp .Lfrom_sp_done
.align 32 .align 32
.Lfrom_sp_alt: .Lfrom_sp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
.Lfrom_sp_done: .Lfrom_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lfrom_page_walk
jmp .Lfrom_page_walk_done
.Lfrom_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja .Lfrom_page_walk
.Lfrom_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9
@ -1991,6 +2133,7 @@ bn_from_mont8x:
movq %r8,32(%rsp) movq %r8,32(%rsp)
movq %rax,40(%rsp) movq %rax,40(%rsp)
.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lfrom_body: .Lfrom_body:
movq %r9,%r11 movq %r9,%r11
leaq 48(%rsp),%rax leaq 48(%rsp),%rax
@ -2026,11 +2169,12 @@ bn_from_mont8x:
pxor %xmm0,%xmm0 pxor %xmm0,%xmm0
leaq 48(%rsp),%rax leaq 48(%rsp),%rax
movq 40(%rsp),%rsi
jmp .Lfrom_mont_zero jmp .Lfrom_mont_zero
.align 32 .align 32
.Lfrom_mont_zero: .Lfrom_mont_zero:
movq 40(%rsp),%rsi
.cfi_def_cfa %rsi,8
movdqa %xmm0,0(%rax) movdqa %xmm0,0(%rax)
movdqa %xmm0,16(%rax) movdqa %xmm0,16(%rax)
movdqa %xmm0,32(%rax) movdqa %xmm0,32(%rax)
@ -2041,14 +2185,22 @@ bn_from_mont8x:
movq $1,%rax movq $1,%rax
movq -48(%rsi),%r15 movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14 movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13 movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12 movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lfrom_epilogue: .Lfrom_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.cfi_endproc
.size bn_from_mont8x,.-bn_from_mont8x .size bn_from_mont8x,.-bn_from_mont8x
.globl bn_scatter5 .globl bn_scatter5
.hidden bn_scatter5 .hidden bn_scatter5

View File

@ -1,19 +0,0 @@
#if defined(__x86_64__)
.text
.globl aesni_gcm_encrypt
.hidden aesni_gcm_encrypt
.type aesni_gcm_encrypt,@function
aesni_gcm_encrypt:
xorl %eax,%eax
.byte 0xf3,0xc3
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
.globl aesni_gcm_decrypt
.hidden aesni_gcm_decrypt
.type aesni_gcm_decrypt,@function
aesni_gcm_decrypt:
xorl %eax,%eax
.byte 0xf3,0xc3
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,596 +0,0 @@
#if defined(__x86_64__)
.text
.extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P
.globl asm_RC4
.hidden asm_RC4
.type asm_RC4,@function
.align 16
asm_RC4:
orq %rsi,%rsi
jne .Lentry
.byte 0xf3,0xc3
.Lentry:
pushq %rbx
pushq %r12
pushq %r13
.Lprologue:
movq %rsi,%r11
movq %rdx,%r12
movq %rcx,%r13
xorq %r10,%r10
xorq %rcx,%rcx
leaq 8(%rdi),%rdi
movb -8(%rdi),%r10b
movb -4(%rdi),%cl
cmpl $-1,256(%rdi)
je .LRC4_CHAR
movl OPENSSL_ia32cap_P(%rip),%r8d
xorq %rbx,%rbx
incb %r10b
subq %r10,%rbx
subq %r12,%r13
movl (%rdi,%r10,4),%eax
testq $-16,%r11
jz .Lloop1
btl $30,%r8d
jc .Lintel
andq $7,%rbx
leaq 1(%r10),%rsi
jz .Loop8
subq %rbx,%r11
.Loop8_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r12,%r13,1)
leaq 1(%r12),%r12
decq %rbx
jnz .Loop8_warmup
leaq 1(%r10),%rsi
jmp .Loop8
.align 16
.Loop8:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 0(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,0(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,4(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 8(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,8(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 12(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,12(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 16(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,16(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 20(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,20(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 24(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,24(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%sil
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl -4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,28(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%r10b
rorq $8,%r8
subq $8,%r11
xorq (%r12),%r8
movq %r8,(%r12,%r13,1)
leaq 8(%r12),%r12
testq $-8,%r11
jnz .Loop8
cmpq $0,%r11
jne .Lloop1
jmp .Lexit
.align 16
.Lintel:
testq $-32,%r11
jz .Lloop1
andq $15,%rbx
jz .Loop16_is_hot
subq %rbx,%r11
.Loop16_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r12,%r13,1)
leaq 1(%r12),%r12
decq %rbx
jnz .Loop16_warmup
movq %rcx,%rbx
xorq %rcx,%rcx
movb %bl,%cl
.Loop16_is_hot:
leaq (%rdi,%r10,4),%rsi
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
jmp .Loop16_enter
.align 16
.Loop16:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm2
psllq $8,%xmm1
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
pxor %xmm1,%xmm2
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
movdqu %xmm2,(%r12,%r13,1)
leaq 16(%r12),%r12
.Loop16_enter:
movl (%rdi,%rcx,4),%edx
pxor %xmm1,%xmm1
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 8(%rsi),%eax
movzbl %bl,%ebx
movl %edx,4(%rsi)
addb %al,%cl
pinsrw $0,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 12(%rsi),%ebx
movzbl %al,%eax
movl %edx,8(%rsi)
addb %bl,%cl
pinsrw $1,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 16(%rsi),%eax
movzbl %bl,%ebx
movl %edx,12(%rsi)
addb %al,%cl
pinsrw $1,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 20(%rsi),%ebx
movzbl %al,%eax
movl %edx,16(%rsi)
addb %bl,%cl
pinsrw $2,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 24(%rsi),%eax
movzbl %bl,%ebx
movl %edx,20(%rsi)
addb %al,%cl
pinsrw $2,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 28(%rsi),%ebx
movzbl %al,%eax
movl %edx,24(%rsi)
addb %bl,%cl
pinsrw $3,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 32(%rsi),%eax
movzbl %bl,%ebx
movl %edx,28(%rsi)
addb %al,%cl
pinsrw $3,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 36(%rsi),%ebx
movzbl %al,%eax
movl %edx,32(%rsi)
addb %bl,%cl
pinsrw $4,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 40(%rsi),%eax
movzbl %bl,%ebx
movl %edx,36(%rsi)
addb %al,%cl
pinsrw $4,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 44(%rsi),%ebx
movzbl %al,%eax
movl %edx,40(%rsi)
addb %bl,%cl
pinsrw $5,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 48(%rsi),%eax
movzbl %bl,%ebx
movl %edx,44(%rsi)
addb %al,%cl
pinsrw $5,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 52(%rsi),%ebx
movzbl %al,%eax
movl %edx,48(%rsi)
addb %bl,%cl
pinsrw $6,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 56(%rsi),%eax
movzbl %bl,%ebx
movl %edx,52(%rsi)
addb %al,%cl
pinsrw $6,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 60(%rsi),%ebx
movzbl %al,%eax
movl %edx,56(%rsi)
addb %bl,%cl
pinsrw $7,(%rdi,%rax,4),%xmm0
addb $16,%r10b
movdqu (%r12),%xmm2
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movzbl %bl,%ebx
movl %edx,60(%rsi)
leaq (%rdi,%r10,4),%rsi
pinsrw $7,(%rdi,%rbx,4),%xmm1
movl (%rsi),%eax
movq %rcx,%rbx
xorq %rcx,%rcx
subq $16,%r11
movb %bl,%cl
testq $-16,%r11
jnz .Loop16
psllq $8,%xmm1
pxor %xmm0,%xmm2
pxor %xmm1,%xmm2
movdqu %xmm2,(%r12,%r13,1)
leaq 16(%r12),%r12
cmpq $0,%r11
jne .Lloop1
jmp .Lexit
.align 16
.Lloop1:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r12,%r13,1)
leaq 1(%r12),%r12
decq %r11
jnz .Lloop1
jmp .Lexit
.align 16
.LRC4_CHAR:
addb $1,%r10b
movzbl (%rdi,%r10,1),%eax
testq $-8,%r11
jz .Lcloop1
jmp .Lcloop8
.align 16
.Lcloop8:
movl (%r12),%r8d
movl 4(%r12),%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov0
movq %rax,%rbx
.Lcmov0:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov1
movq %rbx,%rax
.Lcmov1:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov2
movq %rax,%rbx
.Lcmov2:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov3
movq %rbx,%rax
.Lcmov3:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov4
movq %rax,%rbx
.Lcmov4:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov5
movq %rbx,%rax
.Lcmov5:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov6
movq %rax,%rbx
.Lcmov6:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov7
movq %rbx,%rax
.Lcmov7:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
leaq -8(%r11),%r11
movl %r8d,(%r13)
leaq 8(%r12),%r12
movl %r9d,4(%r13)
leaq 8(%r13),%r13
testq $-8,%r11
jnz .Lcloop8
cmpq $0,%r11
jne .Lcloop1
jmp .Lexit
.align 16
.Lcloop1:
addb %al,%cl
movzbl %cl,%ecx
movzbl (%rdi,%rcx,1),%edx
movb %al,(%rdi,%rcx,1)
movb %dl,(%rdi,%r10,1)
addb %al,%dl
addb $1,%r10b
movzbl %dl,%edx
movzbl %r10b,%r10d
movzbl (%rdi,%rdx,1),%edx
movzbl (%rdi,%r10,1),%eax
xorb (%r12),%dl
leaq 1(%r12),%r12
movb %dl,(%r13)
leaq 1(%r13),%r13
subq $1,%r11
jnz .Lcloop1
jmp .Lexit
.align 16
.Lexit:
subb $1,%r10b
movl %r10d,-8(%rdi)
movl %ecx,-4(%rdi)
movq (%rsp),%r13
movq 8(%rsp),%r12
movq 16(%rsp),%rbx
addq $24,%rsp
.Lepilogue:
.byte 0xf3,0xc3
.size asm_RC4,.-asm_RC4
.globl asm_RC4_set_key
.hidden asm_RC4_set_key
.type asm_RC4_set_key,@function
.align 16
asm_RC4_set_key:
leaq 8(%rdi),%rdi
leaq (%rdx,%rsi,1),%rdx
negq %rsi
movq %rsi,%rcx
xorl %eax,%eax
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
movl OPENSSL_ia32cap_P(%rip),%r8d
btl $20,%r8d
jc .Lc1stloop
jmp .Lw1stloop
.align 16
.Lw1stloop:
movl %eax,(%rdi,%rax,4)
addb $1,%al
jnc .Lw1stloop
xorq %r9,%r9
xorq %r8,%r8
.align 16
.Lw2ndloop:
movl (%rdi,%r9,4),%r10d
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movl (%rdi,%r8,4),%r11d
cmovzq %rcx,%rsi
movl %r10d,(%rdi,%r8,4)
movl %r11d,(%rdi,%r9,4)
addb $1,%r9b
jnc .Lw2ndloop
jmp .Lexit_key
.align 16
.Lc1stloop:
movb %al,(%rdi,%rax,1)
addb $1,%al
jnc .Lc1stloop
xorq %r9,%r9
xorq %r8,%r8
.align 16
.Lc2ndloop:
movb (%rdi,%r9,1),%r10b
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movb (%rdi,%r8,1),%r11b
jnz .Lcnowrap
movq %rcx,%rsi
.Lcnowrap:
movb %r10b,(%rdi,%r8,1)
movb %r11b,(%rdi,%r9,1)
addb $1,%r9b
jnc .Lc2ndloop
movl $-1,256(%rdi)
.align 16
.Lexit_key:
xorl %eax,%eax
movl %eax,-8(%rdi)
movl %eax,-4(%rdi)
.byte 0xf3,0xc3
.size asm_RC4_set_key,.-asm_RC4_set_key
#endif

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "chacha-x86.S"
.text .text
.globl _ChaCha20_ctr32 .globl _ChaCha20_ctr32
.private_extern _ChaCha20_ctr32 .private_extern _ChaCha20_ctr32

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "aes-586.S"
.text .text
.private_extern __x86_AES_encrypt_compact .private_extern __x86_AES_encrypt_compact
.align 4 .align 4

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "src/crypto/aes/asm/aesni-x86.S"
.text .text
.globl _aesni_encrypt .globl _aesni_encrypt
.private_extern _aesni_encrypt .private_extern _aesni_encrypt

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "src/crypto/bn/asm/bn-586.S"
.text .text
.globl _bn_mul_add_words .globl _bn_mul_add_words
.private_extern _bn_mul_add_words .private_extern _bn_mul_add_words

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "src/crypto/bn/asm/co-586.S"
.text .text
.globl _bn_mul_comba8 .globl _bn_mul_comba8
.private_extern _bn_mul_comba8 .private_extern _bn_mul_comba8

View File

@ -1,207 +1,5 @@
#if defined(__i386__) #if defined(__i386__)
.file "ghash-x86.S"
.text .text
.globl _gcm_gmult_4bit_x86
.private_extern _gcm_gmult_4bit_x86
.align 4
_gcm_gmult_4bit_x86:
L_gcm_gmult_4bit_x86_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
subl $84,%esp
movl 104(%esp),%edi
movl 108(%esp),%esi
movl (%edi),%ebp
movl 4(%edi),%edx
movl 8(%edi),%ecx
movl 12(%edi),%ebx
movl $0,16(%esp)
movl $471859200,20(%esp)
movl $943718400,24(%esp)
movl $610271232,28(%esp)
movl $1887436800,32(%esp)
movl $1822425088,36(%esp)
movl $1220542464,40(%esp)
movl $1423966208,44(%esp)
movl $3774873600,48(%esp)
movl $4246732800,52(%esp)
movl $3644850176,56(%esp)
movl $3311403008,60(%esp)
movl $2441084928,64(%esp)
movl $2376073216,68(%esp)
movl $2847932416,72(%esp)
movl $3051356160,76(%esp)
movl %ebp,(%esp)
movl %edx,4(%esp)
movl %ecx,8(%esp)
movl %ebx,12(%esp)
shrl $20,%ebx
andl $240,%ebx
movl 4(%esi,%ebx,1),%ebp
movl (%esi,%ebx,1),%edx
movl 12(%esi,%ebx,1),%ecx
movl 8(%esi,%ebx,1),%ebx
xorl %eax,%eax
movl $15,%edi
jmp L000x86_loop
.align 4,0x90
L000x86_loop:
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
andb $240,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
decl %edi
js L001x86_break
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
shlb $4,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
jmp L000x86_loop
.align 4,0x90
L001x86_break:
bswap %ebx
bswap %ecx
bswap %edx
bswap %ebp
movl 104(%esp),%edi
movl %ebx,12(%edi)
movl %ecx,8(%edi)
movl %edx,4(%edi)
movl %ebp,(%edi)
addl $84,%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _gcm_ghash_4bit_x86
.private_extern _gcm_ghash_4bit_x86
.align 4
_gcm_ghash_4bit_x86:
L_gcm_ghash_4bit_x86_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
subl $84,%esp
movl 104(%esp),%ebx
movl 108(%esp),%esi
movl 112(%esp),%edi
movl 116(%esp),%ecx
addl %edi,%ecx
movl %ecx,116(%esp)
movl (%ebx),%ebp
movl 4(%ebx),%edx
movl 8(%ebx),%ecx
movl 12(%ebx),%ebx
movl $0,16(%esp)
movl $471859200,20(%esp)
movl $943718400,24(%esp)
movl $610271232,28(%esp)
movl $1887436800,32(%esp)
movl $1822425088,36(%esp)
movl $1220542464,40(%esp)
movl $1423966208,44(%esp)
movl $3774873600,48(%esp)
movl $4246732800,52(%esp)
movl $3644850176,56(%esp)
movl $3311403008,60(%esp)
movl $2441084928,64(%esp)
movl $2376073216,68(%esp)
movl $2847932416,72(%esp)
movl $3051356160,76(%esp)
.align 4,0x90
L002x86_outer_loop:
xorl 12(%edi),%ebx
xorl 8(%edi),%ecx
xorl 4(%edi),%edx
xorl (%edi),%ebp
movl %ebx,12(%esp)
movl %ecx,8(%esp)
movl %edx,4(%esp)
movl %ebp,(%esp)
shrl $20,%ebx
andl $240,%ebx
movl 4(%esi,%ebx,1),%ebp
movl (%esi,%ebx,1),%edx
movl 12(%esi,%ebx,1),%ecx
movl 8(%esi,%ebx,1),%ebx
xorl %eax,%eax
movl $15,%edi
jmp L003x86_loop
.align 4,0x90
L003x86_loop:
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
andb $240,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
decl %edi
js L004x86_break
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
shlb $4,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
jmp L003x86_loop
.align 4,0x90
L004x86_break:
bswap %ebx
bswap %ecx
bswap %edx
bswap %ebp
movl 112(%esp),%edi
leal 16(%edi),%edi
cmpl 116(%esp),%edi
movl %edi,112(%esp)
jb L002x86_outer_loop
movl 104(%esp),%edi
movl %ebx,12(%edi)
movl %ecx,8(%edi)
movl %edx,4(%edi)
movl %ebp,(%edi)
addl $84,%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _gcm_gmult_4bit_mmx .globl _gcm_gmult_4bit_mmx
.private_extern _gcm_gmult_4bit_mmx .private_extern _gcm_gmult_4bit_mmx
.align 4 .align 4
@ -213,10 +11,10 @@ L_gcm_gmult_4bit_mmx_begin:
pushl %edi pushl %edi
movl 20(%esp),%edi movl 20(%esp),%edi
movl 24(%esp),%esi movl 24(%esp),%esi
call L005pic_point call L000pic_point
L005pic_point: L000pic_point:
popl %eax popl %eax
leal Lrem_4bit-L005pic_point(%eax),%eax leal Lrem_4bit-L000pic_point(%eax),%eax
movzbl 15(%edi),%ebx movzbl 15(%edi),%ebx
xorl %ecx,%ecx xorl %ecx,%ecx
movl %ebx,%edx movl %ebx,%edx
@ -227,9 +25,9 @@ L005pic_point:
movq 8(%esi,%ecx,1),%mm0 movq 8(%esi,%ecx,1),%mm0
movq (%esi,%ecx,1),%mm1 movq (%esi,%ecx,1),%mm1
movd %mm0,%ebx movd %mm0,%ebx
jmp L006mmx_loop jmp L001mmx_loop
.align 4,0x90 .align 4,0x90
L006mmx_loop: L001mmx_loop:
psrlq $4,%mm0 psrlq $4,%mm0
andl $15,%ebx andl $15,%ebx
movq %mm1,%mm2 movq %mm1,%mm2
@ -243,7 +41,7 @@ L006mmx_loop:
pxor (%esi,%edx,1),%mm1 pxor (%esi,%edx,1),%mm1
movl %ecx,%edx movl %ecx,%edx
pxor %mm2,%mm0 pxor %mm2,%mm0
js L007mmx_break js L002mmx_break
shlb $4,%cl shlb $4,%cl
andl $15,%ebx andl $15,%ebx
psrlq $4,%mm0 psrlq $4,%mm0
@ -256,9 +54,9 @@ L006mmx_loop:
movd %mm0,%ebx movd %mm0,%ebx
pxor (%esi,%ecx,1),%mm1 pxor (%esi,%ecx,1),%mm1
pxor %mm2,%mm0 pxor %mm2,%mm0
jmp L006mmx_loop jmp L001mmx_loop
.align 4,0x90 .align 4,0x90
L007mmx_break: L002mmx_break:
shlb $4,%cl shlb $4,%cl
andl $15,%ebx andl $15,%ebx
psrlq $4,%mm0 psrlq $4,%mm0
@ -314,10 +112,10 @@ L_gcm_ghash_4bit_mmx_begin:
movl 28(%esp),%ecx movl 28(%esp),%ecx
movl 32(%esp),%edx movl 32(%esp),%edx
movl %esp,%ebp movl %esp,%ebp
call L008pic_point call L003pic_point
L008pic_point: L003pic_point:
popl %esi popl %esi
leal Lrem_8bit-L008pic_point(%esi),%esi leal Lrem_8bit-L003pic_point(%esi),%esi
subl $544,%esp subl $544,%esp
andl $-64,%esp andl $-64,%esp
subl $16,%esp subl $16,%esp
@ -556,7 +354,7 @@ L008pic_point:
movl 8(%eax),%ebx movl 8(%eax),%ebx
movl 12(%eax),%edx movl 12(%eax),%edx
.align 4,0x90 .align 4,0x90
L009outer: L004outer:
xorl 12(%ecx),%edx xorl 12(%ecx),%edx
xorl 8(%ecx),%ebx xorl 8(%ecx),%ebx
pxor (%ecx),%mm6 pxor (%ecx),%mm6
@ -891,7 +689,7 @@ L009outer:
pshufw $27,%mm6,%mm6 pshufw $27,%mm6,%mm6
bswap %ebx bswap %ebx
cmpl 552(%esp),%ecx cmpl 552(%esp),%ecx
jne L009outer jne L004outer
movl 544(%esp),%eax movl 544(%esp),%eax
movl %edx,12(%eax) movl %edx,12(%eax)
movl %ebx,8(%eax) movl %ebx,8(%eax)
@ -910,10 +708,10 @@ _gcm_init_clmul:
L_gcm_init_clmul_begin: L_gcm_init_clmul_begin:
movl 4(%esp),%edx movl 4(%esp),%edx
movl 8(%esp),%eax movl 8(%esp),%eax
call L010pic call L005pic
L010pic: L005pic:
popl %ecx popl %ecx
leal Lbswap-L010pic(%ecx),%ecx leal Lbswap-L005pic(%ecx),%ecx
movdqu (%eax),%xmm2 movdqu (%eax),%xmm2
pshufd $78,%xmm2,%xmm2 pshufd $78,%xmm2,%xmm2
pshufd $255,%xmm2,%xmm4 pshufd $255,%xmm2,%xmm4
@ -978,10 +776,10 @@ _gcm_gmult_clmul:
L_gcm_gmult_clmul_begin: L_gcm_gmult_clmul_begin:
movl 4(%esp),%eax movl 4(%esp),%eax
movl 8(%esp),%edx movl 8(%esp),%edx
call L011pic call L006pic
L011pic: L006pic:
popl %ecx popl %ecx
leal Lbswap-L011pic(%ecx),%ecx leal Lbswap-L006pic(%ecx),%ecx
movdqu (%eax),%xmm0 movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5 movdqa (%ecx),%xmm5
movups (%edx),%xmm2 movups (%edx),%xmm2
@ -1036,16 +834,16 @@ L_gcm_ghash_clmul_begin:
movl 24(%esp),%edx movl 24(%esp),%edx
movl 28(%esp),%esi movl 28(%esp),%esi
movl 32(%esp),%ebx movl 32(%esp),%ebx
call L012pic call L007pic
L012pic: L007pic:
popl %ecx popl %ecx
leal Lbswap-L012pic(%ecx),%ecx leal Lbswap-L007pic(%ecx),%ecx
movdqu (%eax),%xmm0 movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5 movdqa (%ecx),%xmm5
movdqu (%edx),%xmm2 movdqu (%edx),%xmm2
.byte 102,15,56,0,197 .byte 102,15,56,0,197
subl $16,%ebx subl $16,%ebx
jz L013odd_tail jz L008odd_tail
movdqu (%esi),%xmm3 movdqu (%esi),%xmm3
movdqu 16(%esi),%xmm6 movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221 .byte 102,15,56,0,221
@ -1062,10 +860,10 @@ L012pic:
movups 16(%edx),%xmm2 movups 16(%edx),%xmm2
nop nop
subl $32,%ebx subl $32,%ebx
jbe L014even_tail jbe L009even_tail
jmp L015mod_loop jmp L010mod_loop
.align 5,0x90 .align 5,0x90
L015mod_loop: L010mod_loop:
pshufd $78,%xmm0,%xmm4 pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1 movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4 pxor %xmm0,%xmm4
@ -1120,8 +918,8 @@ L015mod_loop:
.byte 102,15,58,68,221,0 .byte 102,15,58,68,221,0
leal 32(%esi),%esi leal 32(%esi),%esi
subl $32,%ebx subl $32,%ebx
ja L015mod_loop ja L010mod_loop
L014even_tail: L009even_tail:
pshufd $78,%xmm0,%xmm4 pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1 movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4 pxor %xmm0,%xmm4
@ -1160,9 +958,9 @@ L014even_tail:
psrlq $1,%xmm0 psrlq $1,%xmm0
pxor %xmm1,%xmm0 pxor %xmm1,%xmm0
testl %ebx,%ebx testl %ebx,%ebx
jnz L016done jnz L011done
movups (%edx),%xmm2 movups (%edx),%xmm2
L013odd_tail: L008odd_tail:
movdqu (%esi),%xmm3 movdqu (%esi),%xmm3
.byte 102,15,56,0,221 .byte 102,15,56,0,221
pxor %xmm3,%xmm0 pxor %xmm3,%xmm0
@ -1201,7 +999,7 @@ L013odd_tail:
pxor %xmm4,%xmm0 pxor %xmm4,%xmm0
psrlq $1,%xmm0 psrlq $1,%xmm0
pxor %xmm1,%xmm0 pxor %xmm1,%xmm0
L016done: L011done:
.byte 102,15,56,0,197 .byte 102,15,56,0,197
movdqu %xmm0,(%eax) movdqu %xmm0,(%eax)
popl %edi popl %edi

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "src/crypto/md5/asm/md5-586.S"
.text .text
.globl _md5_block_asm_data_order .globl _md5_block_asm_data_order
.private_extern _md5_block_asm_data_order .private_extern _md5_block_asm_data_order

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "sha1-586.S"
.text .text
.globl _sha1_block_data_order .globl _sha1_block_data_order
.private_extern _sha1_block_data_order .private_extern _sha1_block_data_order

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "sha512-586.S"
.text .text
.globl _sha256_block_data_order .globl _sha256_block_data_order
.private_extern _sha256_block_data_order .private_extern _sha256_block_data_order

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "sha512-586.S"
.text .text
.globl _sha512_block_data_order .globl _sha512_block_data_order
.private_extern _sha512_block_data_order .private_extern _sha512_block_data_order

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "vpaes-x86.S"
.text .text
.align 6,0x90 .align 6,0x90
L_vpaes_consts: L_vpaes_consts:

View File

@ -1,5 +1,4 @@
#if defined(__i386__) #if defined(__i386__)
.file "src/crypto/bn/asm/x86-mont.S"
.text .text
.globl _bn_mul_mont .globl _bn_mul_mont
.private_extern _bn_mul_mont .private_extern _bn_mul_mont
@ -16,39 +15,54 @@ L_bn_mul_mont_begin:
jl L000just_leave jl L000just_leave
leal 20(%esp),%esi leal 20(%esp),%esi
leal 24(%esp),%edx leal 24(%esp),%edx
movl %esp,%ebp
addl $2,%edi addl $2,%edi
negl %edi negl %edi
leal -32(%esp,%edi,4),%esp leal -32(%esp,%edi,4),%ebp
negl %edi negl %edi
movl %esp,%eax movl %ebp,%eax
subl %edx,%eax subl %edx,%eax
andl $2047,%eax andl $2047,%eax
subl %eax,%esp subl %eax,%ebp
xorl %esp,%edx xorl %ebp,%edx
andl $2048,%edx andl $2048,%edx
xorl $2048,%edx xorl $2048,%edx
subl %edx,%esp subl %edx,%ebp
andl $-64,%esp andl $-64,%ebp
movl %esp,%eax
subl %ebp,%eax
andl $-4096,%eax
movl %esp,%edx
leal (%ebp,%eax,1),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja L001page_walk
jmp L002page_walk_done
.align 4,0x90
L001page_walk:
leal -4096(%esp),%esp
movl (%esp),%eax
cmpl %ebp,%esp
ja L001page_walk
L002page_walk_done:
movl (%esi),%eax movl (%esi),%eax
movl 4(%esi),%ebx movl 4(%esi),%ebx
movl 8(%esi),%ecx movl 8(%esi),%ecx
movl 12(%esi),%edx movl 12(%esi),%ebp
movl 16(%esi),%esi movl 16(%esi),%esi
movl (%esi),%esi movl (%esi),%esi
movl %eax,4(%esp) movl %eax,4(%esp)
movl %ebx,8(%esp) movl %ebx,8(%esp)
movl %ecx,12(%esp) movl %ecx,12(%esp)
movl %edx,16(%esp) movl %ebp,16(%esp)
movl %esi,20(%esp) movl %esi,20(%esp)
leal -3(%edi),%ebx leal -3(%edi),%ebx
movl %ebp,24(%esp) movl %edx,24(%esp)
call L001PIC_me_up call L003PIC_me_up
L001PIC_me_up: L003PIC_me_up:
popl %eax popl %eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001PIC_me_up(%eax),%eax movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax
btl $26,(%eax) btl $26,(%eax)
jnc L002non_sse2 jnc L004non_sse2
movl $-1,%eax movl $-1,%eax
movd %eax,%mm7 movd %eax,%mm7
movl 8(%esp),%esi movl 8(%esp),%esi
@ -72,7 +86,7 @@ L001PIC_me_up:
psrlq $32,%mm3 psrlq $32,%mm3
incl %ecx incl %ecx
.align 4,0x90 .align 4,0x90
L0031st: L0051st:
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -87,7 +101,7 @@ L0031st:
psrlq $32,%mm3 psrlq $32,%mm3
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
cmpl %ebx,%ecx cmpl %ebx,%ecx
jl L0031st jl L0051st
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -101,7 +115,7 @@ L0031st:
paddq %mm2,%mm3 paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4) movq %mm3,32(%esp,%ebx,4)
incl %edx incl %edx
L004outer: L006outer:
xorl %ecx,%ecx xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4 movd (%edi,%edx,4),%mm4
movd (%esi),%mm5 movd (%esi),%mm5
@ -123,7 +137,7 @@ L004outer:
paddq %mm6,%mm2 paddq %mm6,%mm2
incl %ecx incl %ecx
decl %ebx decl %ebx
L005inner: L007inner:
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
paddq %mm0,%mm2 paddq %mm0,%mm2
@ -140,7 +154,7 @@ L005inner:
paddq %mm6,%mm2 paddq %mm6,%mm2
decl %ebx decl %ebx
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
jnz L005inner jnz L007inner
movl %ecx,%ebx movl %ecx,%ebx
pmuludq %mm4,%mm0 pmuludq %mm4,%mm0
pmuludq %mm5,%mm1 pmuludq %mm5,%mm1
@ -158,11 +172,11 @@ L005inner:
movq %mm3,32(%esp,%ebx,4) movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx leal 1(%edx),%edx
cmpl %ebx,%edx cmpl %ebx,%edx
jle L004outer jle L006outer
emms emms
jmp L006common_tail jmp L008common_tail
.align 4,0x90 .align 4,0x90
L002non_sse2: L004non_sse2:
movl 8(%esp),%esi movl 8(%esp),%esi
leal 1(%ebx),%ebp leal 1(%ebx),%ebp
movl 12(%esp),%edi movl 12(%esp),%edi
@ -173,12 +187,12 @@ L002non_sse2:
leal 4(%edi,%ebx,4),%eax leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp orl %edx,%ebp
movl (%edi),%edi movl (%edi),%edi
jz L007bn_sqr_mont jz L009bn_sqr_mont
movl %eax,28(%esp) movl %eax,28(%esp)
movl (%esi),%eax movl (%esi),%eax
xorl %edx,%edx xorl %edx,%edx
.align 4,0x90 .align 4,0x90
L008mull: L010mull:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl %eax,%ebp addl %eax,%ebp
@ -187,7 +201,7 @@ L008mull:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl L008mull jl L010mull
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
movl 20(%esp),%edi movl 20(%esp),%edi
@ -205,9 +219,9 @@ L008mull:
movl 4(%esi),%eax movl 4(%esi),%eax
adcl $0,%edx adcl $0,%edx
incl %ecx incl %ecx
jmp L0092ndmadd jmp L0112ndmadd
.align 4,0x90 .align 4,0x90
L0101stmadd: L0121stmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -218,7 +232,7 @@ L0101stmadd:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl L0101stmadd jl L0121stmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%eax addl 32(%esp,%ebx,4),%eax
@ -241,7 +255,7 @@ L0101stmadd:
adcl $0,%edx adcl $0,%edx
movl $1,%ecx movl $1,%ecx
.align 4,0x90 .align 4,0x90
L0092ndmadd: L0112ndmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -252,7 +266,7 @@ L0092ndmadd:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4) movl %ebp,24(%esp,%ecx,4)
jl L0092ndmadd jl L0112ndmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%ebp addl 32(%esp,%ebx,4),%ebp
@ -268,16 +282,16 @@ L0092ndmadd:
movl %edx,32(%esp,%ebx,4) movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4) movl %eax,36(%esp,%ebx,4)
je L006common_tail je L008common_tail
movl (%ecx),%edi movl (%ecx),%edi
movl 8(%esp),%esi movl 8(%esp),%esi
movl %ecx,12(%esp) movl %ecx,12(%esp)
xorl %ecx,%ecx xorl %ecx,%ecx
xorl %edx,%edx xorl %edx,%edx
movl (%esi),%eax movl (%esi),%eax
jmp L0101stmadd jmp L0121stmadd
.align 4,0x90 .align 4,0x90
L007bn_sqr_mont: L009bn_sqr_mont:
movl %ebx,(%esp) movl %ebx,(%esp)
movl %ecx,12(%esp) movl %ecx,12(%esp)
movl %edi,%eax movl %edi,%eax
@ -288,7 +302,7 @@ L007bn_sqr_mont:
andl $1,%ebx andl $1,%ebx
incl %ecx incl %ecx
.align 4,0x90 .align 4,0x90
L011sqr: L013sqr:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -300,7 +314,7 @@ L011sqr:
cmpl (%esp),%ecx cmpl (%esp),%ecx
movl %eax,%ebx movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
jl L011sqr jl L013sqr
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -324,7 +338,7 @@ L011sqr:
movl 4(%esi),%eax movl 4(%esi),%eax
movl $1,%ecx movl $1,%ecx
.align 4,0x90 .align 4,0x90
L0123rdmadd: L0143rdmadd:
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ecx,4),%ebp addl 32(%esp,%ecx,4),%ebp
@ -343,7 +357,7 @@ L0123rdmadd:
adcl $0,%edx adcl $0,%edx
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4) movl %ebp,24(%esp,%ecx,4)
jl L0123rdmadd jl L0143rdmadd
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
addl 32(%esp,%ebx,4),%ebp addl 32(%esp,%ebx,4),%ebp
@ -359,7 +373,7 @@ L0123rdmadd:
movl %edx,32(%esp,%ebx,4) movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4) movl %eax,36(%esp,%ebx,4)
je L006common_tail je L008common_tail
movl 4(%esi,%ecx,4),%edi movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
movl %edi,%eax movl %edi,%eax
@ -371,12 +385,12 @@ L0123rdmadd:
xorl %ebp,%ebp xorl %ebp,%ebp
cmpl %ebx,%ecx cmpl %ebx,%ecx
leal 1(%ecx),%ecx leal 1(%ecx),%ecx
je L013sqrlast je L015sqrlast
movl %edx,%ebx movl %edx,%ebx
shrl $1,%edx shrl $1,%edx
andl $1,%ebx andl $1,%ebx
.align 4,0x90 .align 4,0x90
L014sqradd: L016sqradd:
movl (%esi,%ecx,4),%eax movl (%esi,%ecx,4),%eax
movl %edx,%ebp movl %edx,%ebp
mull %edi mull %edi
@ -392,13 +406,13 @@ L014sqradd:
cmpl (%esp),%ecx cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4) movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx movl %eax,%ebx
jle L014sqradd jle L016sqradd
movl %edx,%ebp movl %edx,%ebp
addl %edx,%edx addl %edx,%edx
shrl $31,%ebp shrl $31,%ebp
addl %ebx,%edx addl %ebx,%edx
adcl $0,%ebp adcl $0,%ebp
L013sqrlast: L015sqrlast:
movl 20(%esp),%edi movl 20(%esp),%edi
movl 16(%esp),%esi movl 16(%esp),%esi
imull 32(%esp),%edi imull 32(%esp),%edi
@ -413,9 +427,9 @@ L013sqrlast:
adcl $0,%edx adcl $0,%edx
movl $1,%ecx movl $1,%ecx
movl 4(%esi),%eax movl 4(%esi),%eax
jmp L0123rdmadd jmp L0143rdmadd
.align 4,0x90 .align 4,0x90
L006common_tail: L008common_tail:
movl 16(%esp),%ebp movl 16(%esp),%ebp
movl 4(%esp),%edi movl 4(%esp),%edi
leal 32(%esp),%esi leal 32(%esp),%esi
@ -423,25 +437,26 @@ L006common_tail:
movl %ebx,%ecx movl %ebx,%ecx
xorl %edx,%edx xorl %edx,%edx
.align 4,0x90 .align 4,0x90
L015sub: L017sub:
sbbl (%ebp,%edx,4),%eax sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4) movl %eax,(%edi,%edx,4)
decl %ecx decl %ecx
movl 4(%esi,%edx,4),%eax movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx leal 1(%edx),%edx
jge L015sub jge L017sub
sbbl $0,%eax sbbl $0,%eax
andl %eax,%esi
notl %eax
movl %edi,%ebp
andl %eax,%ebp
orl %ebp,%esi
.align 4,0x90 .align 4,0x90
L016copy: L018copy:
movl (%esi,%ebx,4),%edx movl (%esi,%ebx,4),%eax
movl (%edi,%ebx,4),%ebp movl %eax,(%edi,%ebx,4)
xorl %ebp,%edx movl %ecx,32(%esp,%ebx,4)
andl %eax,%edx
xorl %ebp,%edx
movl %ecx,(%esi,%ebx,4)
movl %edx,(%edi,%ebx,4)
decl %ebx decl %ebx
jge L016copy jge L018copy
movl 24(%esp),%esp movl 24(%esp),%esp
movl $1,%eax movl $1,%eax
L000just_leave: L000just_leave:

View File

@ -1,350 +0,0 @@
#if defined(__i386__)
.file "rc4-586.S"
.text
.globl _asm_RC4
.private_extern _asm_RC4
.align 4
_asm_RC4:
L_asm_RC4_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebp
xorl %eax,%eax
xorl %ebx,%ebx
cmpl $0,%edx
je L000abort
movb (%edi),%al
movb 4(%edi),%bl
addl $8,%edi
leal (%esi,%edx,1),%ecx
subl %esi,%ebp
movl %ecx,24(%esp)
incb %al
cmpl $-1,256(%edi)
je L001RC4_CHAR
movl (%edi,%eax,4),%ecx
andl $-4,%edx
jz L002loop1
movl %ebp,32(%esp)
testl $-8,%edx
jz L003go4loop4
call L004PIC_me_up
L004PIC_me_up:
popl %ebp
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L004PIC_me_up(%ebp),%ebp
btl $26,(%ebp)
jnc L003go4loop4
movl 32(%esp),%ebp
andl $-8,%edx
leal -8(%esi,%edx,1),%edx
movl %edx,-4(%edi)
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
movq (%esi),%mm0
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm2
jmp L005loop_mmx_enter
.align 4,0x90
L006loop_mmx:
addb %cl,%bl
psllq $56,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movq (%esi),%mm0
movq %mm2,-8(%ebp,%esi,1)
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm2
L005loop_mmx_enter:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm0,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $8,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $16,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $24,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $32,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $40,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $48,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
movl %ebx,%edx
xorl %ebx,%ebx
movb %dl,%bl
cmpl -4(%edi),%esi
leal 8(%esi),%esi
jb L006loop_mmx
psllq $56,%mm1
pxor %mm1,%mm2
movq %mm2,-8(%ebp,%esi,1)
emms
cmpl 24(%esp),%esi
je L007done
jmp L002loop1
.align 4,0x90
L003go4loop4:
leal -4(%esi,%edx,1),%edx
movl %edx,28(%esp)
L008loop4:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
movl (%edi,%eax,4),%ecx
movl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl (%edi,%eax,4),%ecx
orl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl (%edi,%eax,4),%ecx
orl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl 32(%esp),%ecx
orl (%edi,%edx,4),%ebp
rorl $8,%ebp
xorl (%esi),%ebp
cmpl 28(%esp),%esi
movl %ebp,(%ecx,%esi,1)
leal 4(%esi),%esi
movl (%edi,%eax,4),%ecx
jb L008loop4
cmpl 24(%esp),%esi
je L007done
movl 32(%esp),%ebp
.align 4,0x90
L002loop1:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
movl (%edi,%edx,4),%edx
xorb (%esi),%dl
leal 1(%esi),%esi
movl (%edi,%eax,4),%ecx
cmpl 24(%esp),%esi
movb %dl,-1(%ebp,%esi,1)
jb L002loop1
jmp L007done
.align 4,0x90
L001RC4_CHAR:
movzbl (%edi,%eax,1),%ecx
L009cloop1:
addb %cl,%bl
movzbl (%edi,%ebx,1),%edx
movb %cl,(%edi,%ebx,1)
movb %dl,(%edi,%eax,1)
addb %cl,%dl
movzbl (%edi,%edx,1),%edx
addb $1,%al
xorb (%esi),%dl
leal 1(%esi),%esi
movzbl (%edi,%eax,1),%ecx
cmpl 24(%esp),%esi
movb %dl,-1(%ebp,%esi,1)
jb L009cloop1
L007done:
decb %al
movl %ebx,-4(%edi)
movb %al,-8(%edi)
L000abort:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _asm_RC4_set_key
.private_extern _asm_RC4_set_key
.align 4
_asm_RC4_set_key:
L_asm_RC4_set_key_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%ebp
movl 28(%esp),%esi
call L010PIC_me_up
L010PIC_me_up:
popl %edx
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%edx),%edx
leal 8(%edi),%edi
leal (%esi,%ebp,1),%esi
negl %ebp
xorl %eax,%eax
movl %ebp,-4(%edi)
btl $20,(%edx)
jc L011c1stloop
.align 4,0x90
L012w1stloop:
movl %eax,(%edi,%eax,4)
addb $1,%al
jnc L012w1stloop
xorl %ecx,%ecx
xorl %edx,%edx
.align 4,0x90
L013w2ndloop:
movl (%edi,%ecx,4),%eax
addb (%esi,%ebp,1),%dl
addb %al,%dl
addl $1,%ebp
movl (%edi,%edx,4),%ebx
jnz L014wnowrap
movl -4(%edi),%ebp
L014wnowrap:
movl %eax,(%edi,%edx,4)
movl %ebx,(%edi,%ecx,4)
addb $1,%cl
jnc L013w2ndloop
jmp L015exit
.align 4,0x90
L011c1stloop:
movb %al,(%edi,%eax,1)
addb $1,%al
jnc L011c1stloop
xorl %ecx,%ecx
xorl %edx,%edx
xorl %ebx,%ebx
.align 4,0x90
L016c2ndloop:
movb (%edi,%ecx,1),%al
addb (%esi,%ebp,1),%dl
addb %al,%dl
addl $1,%ebp
movb (%edi,%edx,1),%bl
jnz L017cnowrap
movl -4(%edi),%ebp
L017cnowrap:
movb %al,(%edi,%edx,1)
movb %bl,(%edi,%ecx,1)
addb $1,%cl
jnc L016c2ndloop
movl $-1,256(%edi)
L015exit:
xorl %eax,%eax
movl %eax,-8(%edi)
movl %eax,-4(%edi)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.section __IMPORT,__pointers,non_lazy_symbol_pointers
L_OPENSSL_ia32cap_P$non_lazy_ptr:
.indirect_symbol _OPENSSL_ia32cap_P
.long 0
#endif

View File

@ -1,34 +0,0 @@
#if defined(__x86_64__)
.text
.globl _rsaz_avx2_eligible
.private_extern _rsaz_avx2_eligible
_rsaz_avx2_eligible:
xorl %eax,%eax
.byte 0xf3,0xc3
.globl _rsaz_1024_sqr_avx2
.private_extern _rsaz_1024_sqr_avx2
.globl _rsaz_1024_mul_avx2
.private_extern _rsaz_1024_mul_avx2
.globl _rsaz_1024_norm2red_avx2
.private_extern _rsaz_1024_norm2red_avx2
.globl _rsaz_1024_red2norm_avx2
.private_extern _rsaz_1024_red2norm_avx2
.globl _rsaz_1024_scatter5_avx2
.private_extern _rsaz_1024_scatter5_avx2
.globl _rsaz_1024_gather5_avx2
.private_extern _rsaz_1024_gather5_avx2
_rsaz_1024_sqr_avx2:
_rsaz_1024_mul_avx2:
_rsaz_1024_norm2red_avx2:
_rsaz_1024_red2norm_avx2:
_rsaz_1024_scatter5_avx2:
_rsaz_1024_gather5_avx2:
.byte 0x0f,0x0b
.byte 0xf3,0xc3
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
@ -22,6 +22,15 @@ L$rot24:
.byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe .byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe
L$sigma: L$sigma:
.byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0 .byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0
.p2align 6
L$zeroz:
.long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0
L$fourz:
.long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0
L$incz:
.long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
L$sixteen:
.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.globl _ChaCha20_ctr32 .globl _ChaCha20_ctr32
.private_extern _ChaCha20_ctr32 .private_extern _ChaCha20_ctr32
@ -41,6 +50,7 @@ _ChaCha20_ctr32:
pushq %r14 pushq %r14
pushq %r15 pushq %r15
subq $64+24,%rsp subq $64+24,%rsp
L$ctr32_body:
movdqu (%rcx),%xmm1 movdqu (%rcx),%xmm1
@ -278,13 +288,14 @@ L$oop_tail:
jnz L$oop_tail jnz L$oop_tail
L$done: L$done:
addq $64+24,%rsp leaq 64+24+48(%rsp),%rsi
popq %r15 movq -48(%rsi),%r15
popq %r14 movq -40(%rsi),%r14
popq %r13 movq -32(%rsi),%r13
popq %r12 movq -24(%rsi),%r12
popq %rbp movq -16(%rsi),%rbp
popq %rbx movq -8(%rsi),%rbx
leaq (%rsi),%rsp
L$no_data: L$no_data:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
@ -292,18 +303,12 @@ L$no_data:
.p2align 5 .p2align 5
ChaCha20_ssse3: ChaCha20_ssse3:
L$ChaCha20_ssse3: L$ChaCha20_ssse3:
movq %rsp,%r9
cmpq $128,%rdx cmpq $128,%rdx
ja L$ChaCha20_4x ja L$ChaCha20_4x
L$do_sse3_after_all: L$do_sse3_after_all:
pushq %rbx subq $64+8,%rsp
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
subq $64+24,%rsp
movdqa L$sigma(%rip),%xmm0 movdqa L$sigma(%rip),%xmm0
movdqu (%rcx),%xmm1 movdqu (%rcx),%xmm1
movdqu 16(%rcx),%xmm2 movdqu 16(%rcx),%xmm2
@ -315,7 +320,7 @@ L$do_sse3_after_all:
movdqa %xmm1,16(%rsp) movdqa %xmm1,16(%rsp)
movdqa %xmm2,32(%rsp) movdqa %xmm2,32(%rsp)
movdqa %xmm3,48(%rsp) movdqa %xmm3,48(%rsp)
movl $10,%ebp movq $10,%r8
jmp L$oop_ssse3 jmp L$oop_ssse3
.p2align 5 .p2align 5
@ -325,7 +330,7 @@ L$oop_outer_ssse3:
movdqa 16(%rsp),%xmm1 movdqa 16(%rsp),%xmm1
movdqa 32(%rsp),%xmm2 movdqa 32(%rsp),%xmm2
paddd 48(%rsp),%xmm3 paddd 48(%rsp),%xmm3
movl $10,%ebp movq $10,%r8
movdqa %xmm3,48(%rsp) movdqa %xmm3,48(%rsp)
jmp L$oop_ssse3 jmp L$oop_ssse3
@ -374,7 +379,7 @@ L$oop_ssse3:
pshufd $78,%xmm2,%xmm2 pshufd $78,%xmm2,%xmm2
pshufd $147,%xmm1,%xmm1 pshufd $147,%xmm1,%xmm1
pshufd $57,%xmm3,%xmm3 pshufd $57,%xmm3,%xmm3
decl %ebp decq %r8
jnz L$oop_ssse3 jnz L$oop_ssse3
paddd 0(%rsp),%xmm0 paddd 0(%rsp),%xmm0
paddd 16(%rsp),%xmm1 paddd 16(%rsp),%xmm1
@ -411,31 +416,27 @@ L$tail_ssse3:
movdqa %xmm1,16(%rsp) movdqa %xmm1,16(%rsp)
movdqa %xmm2,32(%rsp) movdqa %xmm2,32(%rsp)
movdqa %xmm3,48(%rsp) movdqa %xmm3,48(%rsp)
xorq %rbx,%rbx xorq %r8,%r8
L$oop_tail_ssse3: L$oop_tail_ssse3:
movzbl (%rsi,%rbx,1),%eax movzbl (%rsi,%r8,1),%eax
movzbl (%rsp,%rbx,1),%ecx movzbl (%rsp,%r8,1),%ecx
leaq 1(%rbx),%rbx leaq 1(%r8),%r8
xorl %ecx,%eax xorl %ecx,%eax
movb %al,-1(%rdi,%rbx,1) movb %al,-1(%rdi,%r8,1)
decq %rdx decq %rdx
jnz L$oop_tail_ssse3 jnz L$oop_tail_ssse3
L$done_ssse3: L$done_ssse3:
addq $64+24,%rsp leaq (%r9),%rsp
popq %r15 L$ssse3_epilogue:
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.p2align 5 .p2align 5
ChaCha20_4x: ChaCha20_4x:
L$ChaCha20_4x: L$ChaCha20_4x:
movq %rsp,%r9
movq %r10,%r11 movq %r10,%r11
shrq $32,%r10 shrq $32,%r10
testq $32,%r10 testq $32,%r10
@ -448,8 +449,7 @@ L$ChaCha20_4x:
je L$do_sse3_after_all je L$do_sse3_after_all
L$proceed4x: L$proceed4x:
leaq -120(%rsp),%r11 subq $0x140+8,%rsp
subq $0x148+0,%rsp
movdqa L$sigma(%rip),%xmm11 movdqa L$sigma(%rip),%xmm11
movdqu (%rcx),%xmm15 movdqu (%rcx),%xmm15
movdqu 16(%rcx),%xmm7 movdqu 16(%rcx),%xmm7
@ -976,18 +976,18 @@ L$oop_tail4x:
jnz L$oop_tail4x jnz L$oop_tail4x
L$done4x: L$done4x:
addq $0x148+0,%rsp leaq (%r9),%rsp
L$4x_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.p2align 5 .p2align 5
ChaCha20_8x: ChaCha20_8x:
L$ChaCha20_8x: L$ChaCha20_8x:
movq %rsp,%r10 movq %rsp,%r9
subq $0x280+8,%rsp subq $0x280+8,%rsp
andq $-32,%rsp andq $-32,%rsp
vzeroupper vzeroupper
movq %r10,640(%rsp)
@ -1578,7 +1578,8 @@ L$oop_tail8x:
L$done8x: L$done8x:
vzeroall vzeroall
movq 640(%rsp),%rsp leaq (%r9),%rsp
L$8x_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
#endif #endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
.p2align 4 .p2align 4
@ -332,6 +332,7 @@ L$enc_compact_done:
.private_extern _asm_AES_encrypt .private_extern _asm_AES_encrypt
_asm_AES_encrypt: _asm_AES_encrypt:
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
@ -340,7 +341,6 @@ _asm_AES_encrypt:
pushq %r15 pushq %r15
movq %rsp,%r10
leaq -63(%rdx),%rcx leaq -63(%rdx),%rcx
andq $-64,%rsp andq $-64,%rsp
subq %rsp,%rcx subq %rsp,%rcx
@ -350,7 +350,7 @@ _asm_AES_encrypt:
subq $32,%rsp subq $32,%rsp
movq %rsi,16(%rsp) movq %rsi,16(%rsp)
movq %r10,24(%rsp) movq %rax,24(%rsp)
L$enc_prologue: L$enc_prologue:
movq %rdx,%r15 movq %rdx,%r15
@ -382,13 +382,13 @@ L$enc_prologue:
movl %ecx,8(%r9) movl %ecx,8(%r9)
movl %edx,12(%r9) movl %edx,12(%r9)
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
L$enc_epilogue: L$enc_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
@ -778,6 +778,7 @@ L$dec_compact_done:
.private_extern _asm_AES_decrypt .private_extern _asm_AES_decrypt
_asm_AES_decrypt: _asm_AES_decrypt:
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
@ -786,7 +787,6 @@ _asm_AES_decrypt:
pushq %r15 pushq %r15
movq %rsp,%r10
leaq -63(%rdx),%rcx leaq -63(%rdx),%rcx
andq $-64,%rsp andq $-64,%rsp
subq %rsp,%rcx subq %rsp,%rcx
@ -796,7 +796,7 @@ _asm_AES_decrypt:
subq $32,%rsp subq $32,%rsp
movq %rsi,16(%rsp) movq %rsi,16(%rsp)
movq %r10,24(%rsp) movq %rax,24(%rsp)
L$dec_prologue: L$dec_prologue:
movq %rdx,%r15 movq %rdx,%r15
@ -830,13 +830,13 @@ L$dec_prologue:
movl %ecx,8(%r9) movl %ecx,8(%r9)
movl %edx,12(%r9) movl %edx,12(%r9)
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
L$dec_epilogue: L$dec_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
@ -1312,12 +1312,12 @@ L$cbc_prologue:
movl %r9d,%r9d movl %r9d,%r9d
leaq L$AES_Te(%rip),%r14 leaq L$AES_Te(%rip),%r14
leaq L$AES_Td(%rip),%r10
cmpq $0,%r9 cmpq $0,%r9
jne L$cbc_picked_te cmoveq %r10,%r14
leaq L$AES_Td(%rip),%r14
L$cbc_picked_te:
movl _OPENSSL_ia32cap_P(%rip),%r10d leaq _OPENSSL_ia32cap_P(%rip),%r10
movl (%r10),%r10d
cmpq $512,%rdx cmpq $512,%rdx
jb L$cbc_slow_prologue jb L$cbc_slow_prologue
testq $15,%rdx testq $15,%rdx

View File

@ -0,0 +1,834 @@
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text
.p2align 5
_aesni_ctr32_ghash_6x:
vmovdqu 32(%r11),%xmm2
subq $6,%rdx
vpxor %xmm4,%xmm4,%xmm4
vmovdqu 0-128(%rcx),%xmm15
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpaddb %xmm2,%xmm11,%xmm12
vpaddb %xmm2,%xmm12,%xmm13
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm15,%xmm1,%xmm9
vmovdqu %xmm4,16+8(%rsp)
jmp L$oop6x
.p2align 5
L$oop6x:
addl $100663296,%ebx
jc L$handle_ctr32
vmovdqu 0-32(%r9),%xmm3
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm15,%xmm10,%xmm10
vpxor %xmm15,%xmm11,%xmm11
L$resume_ctr32:
vmovdqu %xmm1,(%r8)
vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
vpxor %xmm15,%xmm12,%xmm12
vmovups 16-128(%rcx),%xmm2
vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
xorq %r12,%r12
cmpq %r14,%r15
vaesenc %xmm2,%xmm9,%xmm9
vmovdqu 48+8(%rsp),%xmm0
vpxor %xmm15,%xmm13,%xmm13
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
vaesenc %xmm2,%xmm10,%xmm10
vpxor %xmm15,%xmm14,%xmm14
setnc %r12b
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vaesenc %xmm2,%xmm11,%xmm11
vmovdqu 16-32(%r9),%xmm3
negq %r12
vaesenc %xmm2,%xmm12,%xmm12
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
vpxor %xmm4,%xmm8,%xmm8
vaesenc %xmm2,%xmm13,%xmm13
vpxor %xmm5,%xmm1,%xmm4
andq $0x60,%r12
vmovups 32-128(%rcx),%xmm15
vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
vaesenc %xmm2,%xmm14,%xmm14
vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
leaq (%r14,%r12,1),%r14
vaesenc %xmm15,%xmm9,%xmm9
vpxor 16+8(%rsp),%xmm8,%xmm8
vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
vmovdqu 64+8(%rsp),%xmm0
vaesenc %xmm15,%xmm10,%xmm10
movbeq 88(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 80(%r14),%r12
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,32+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,40+8(%rsp)
vmovdqu 48-32(%r9),%xmm5
vaesenc %xmm15,%xmm14,%xmm14
vmovups 48-128(%rcx),%xmm15
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm3,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
vaesenc %xmm15,%xmm11,%xmm11
vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
vmovdqu 80+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vpxor %xmm1,%xmm4,%xmm4
vmovdqu 64-32(%r9),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vmovups 64-128(%rcx),%xmm15
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
vaesenc %xmm15,%xmm10,%xmm10
movbeq 72(%r14),%r13
vpxor %xmm5,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
movbeq 64(%r14),%r12
vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
vmovdqu 96+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,48+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,56+8(%rsp)
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 96-32(%r9),%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vmovups 80-128(%rcx),%xmm15
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
vaesenc %xmm15,%xmm10,%xmm10
movbeq 56(%r14),%r13
vpxor %xmm1,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
vpxor 112+8(%rsp),%xmm8,%xmm8
vaesenc %xmm15,%xmm11,%xmm11
movbeq 48(%r14),%r12
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,64+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,72+8(%rsp)
vpxor %xmm3,%xmm4,%xmm4
vmovdqu 112-32(%r9),%xmm3
vaesenc %xmm15,%xmm14,%xmm14
vmovups 96-128(%rcx),%xmm15
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
vaesenc %xmm15,%xmm10,%xmm10
movbeq 40(%r14),%r13
vpxor %xmm2,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
movbeq 32(%r14),%r12
vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,80+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,88+8(%rsp)
vpxor %xmm5,%xmm6,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor %xmm1,%xmm6,%xmm6
vmovups 112-128(%rcx),%xmm15
vpslldq $8,%xmm6,%xmm5
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 16(%r11),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm8,%xmm7,%xmm7
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm5,%xmm4,%xmm4
movbeq 24(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 16(%r14),%r12
vpalignr $8,%xmm4,%xmm4,%xmm0
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
movq %r13,96+8(%rsp)
vaesenc %xmm15,%xmm12,%xmm12
movq %r12,104+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
vmovups 128-128(%rcx),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vmovups 144-128(%rcx),%xmm15
vaesenc %xmm1,%xmm10,%xmm10
vpsrldq $8,%xmm6,%xmm6
vaesenc %xmm1,%xmm11,%xmm11
vpxor %xmm6,%xmm7,%xmm7
vaesenc %xmm1,%xmm12,%xmm12
vpxor %xmm0,%xmm4,%xmm4
movbeq 8(%r14),%r13
vaesenc %xmm1,%xmm13,%xmm13
movbeq 0(%r14),%r12
vaesenc %xmm1,%xmm14,%xmm14
vmovups 160-128(%rcx),%xmm1
cmpl $11,%ebp
jb L$enc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 176-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 192-128(%rcx),%xmm1
je L$enc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 208-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 224-128(%rcx),%xmm1
jmp L$enc_tail
.p2align 5
L$handle_ctr32:
vmovdqu (%r11),%xmm0
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vmovdqu 0-32(%r9),%xmm3
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm15,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm15,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpshufb %xmm0,%xmm14,%xmm14
vpshufb %xmm0,%xmm1,%xmm1
jmp L$resume_ctr32
.p2align 5
L$enc_tail:
vaesenc %xmm15,%xmm9,%xmm9
vmovdqu %xmm7,16+8(%rsp)
vpalignr $8,%xmm4,%xmm4,%xmm8
vaesenc %xmm15,%xmm10,%xmm10
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
vpxor 0(%rdi),%xmm1,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
vpxor 16(%rdi),%xmm1,%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vpxor 32(%rdi),%xmm1,%xmm5
vaesenc %xmm15,%xmm13,%xmm13
vpxor 48(%rdi),%xmm1,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor 64(%rdi),%xmm1,%xmm7
vpxor 80(%rdi),%xmm1,%xmm3
vmovdqu (%r8),%xmm1
vaesenclast %xmm2,%xmm9,%xmm9
vmovdqu 32(%r11),%xmm2
vaesenclast %xmm0,%xmm10,%xmm10
vpaddb %xmm2,%xmm1,%xmm0
movq %r13,112+8(%rsp)
leaq 96(%rdi),%rdi
vaesenclast %xmm5,%xmm11,%xmm11
vpaddb %xmm2,%xmm0,%xmm5
movq %r12,120+8(%rsp)
leaq 96(%rsi),%rsi
vmovdqu 0-128(%rcx),%xmm15
vaesenclast %xmm6,%xmm12,%xmm12
vpaddb %xmm2,%xmm5,%xmm6
vaesenclast %xmm7,%xmm13,%xmm13
vpaddb %xmm2,%xmm6,%xmm7
vaesenclast %xmm3,%xmm14,%xmm14
vpaddb %xmm2,%xmm7,%xmm3
addq $0x60,%r10
subq $0x6,%rdx
jc L$6x_done
vmovups %xmm9,-96(%rsi)
vpxor %xmm15,%xmm1,%xmm9
vmovups %xmm10,-80(%rsi)
vmovdqa %xmm0,%xmm10
vmovups %xmm11,-64(%rsi)
vmovdqa %xmm5,%xmm11
vmovups %xmm12,-48(%rsi)
vmovdqa %xmm6,%xmm12
vmovups %xmm13,-32(%rsi)
vmovdqa %xmm7,%xmm13
vmovups %xmm14,-16(%rsi)
vmovdqa %xmm3,%xmm14
vmovdqu 32+8(%rsp),%xmm7
jmp L$oop6x
L$6x_done:
vpxor 16+8(%rsp),%xmm8,%xmm8
vpxor %xmm4,%xmm8,%xmm8
.byte 0xf3,0xc3
.globl _aesni_gcm_decrypt
.private_extern _aesni_gcm_decrypt
.p2align 5
_aesni_gcm_decrypt:
xorq %r10,%r10
cmpq $0x60,%rdx
jb L$gcm_dec_abort
leaq (%rsp),%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
vzeroupper
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq L$bswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
vmovdqu (%r9),%xmm8
andq $-128,%rsp
vmovdqu (%r11),%xmm0
leaq 128(%rcx),%rcx
leaq 32+32(%r9),%r9
movl 240-128(%rcx),%ebp
vpshufb %xmm0,%xmm8,%xmm8
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc L$dec_no_key_aliasing
cmpq $768,%r15
jnc L$dec_no_key_aliasing
subq %r15,%rsp
L$dec_no_key_aliasing:
vmovdqu 80(%rdi),%xmm7
leaq (%rdi),%r14
vmovdqu 64(%rdi),%xmm4
leaq -192(%rdi,%rdx,1),%r15
vmovdqu 48(%rdi),%xmm5
shrq $4,%rdx
xorq %r10,%r10
vmovdqu 32(%rdi),%xmm6
vpshufb %xmm0,%xmm7,%xmm7
vmovdqu 16(%rdi),%xmm2
vpshufb %xmm0,%xmm4,%xmm4
vmovdqu (%rdi),%xmm3
vpshufb %xmm0,%xmm5,%xmm5
vmovdqu %xmm4,48(%rsp)
vpshufb %xmm0,%xmm6,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm2,%xmm2
vmovdqu %xmm6,80(%rsp)
vpshufb %xmm0,%xmm3,%xmm3
vmovdqu %xmm2,96(%rsp)
vmovdqu %xmm3,112(%rsp)
call _aesni_ctr32_ghash_6x
vmovups %xmm9,-96(%rsi)
vmovups %xmm10,-80(%rsi)
vmovups %xmm11,-64(%rsi)
vmovups %xmm12,-48(%rsi)
vmovups %xmm13,-32(%rsi)
vmovups %xmm14,-16(%rsi)
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vzeroupper
movq -48(%rax),%r15
movq -40(%rax),%r14
movq -32(%rax),%r13
movq -24(%rax),%r12
movq -16(%rax),%rbp
movq -8(%rax),%rbx
leaq (%rax),%rsp
L$gcm_dec_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.p2align 5
_aesni_ctr32_6x:
vmovdqu 0-128(%rcx),%xmm4
vmovdqu 32(%r11),%xmm2
leaq -1(%rbp),%r13
vmovups 16-128(%rcx),%xmm15
leaq 32-128(%rcx),%r12
vpxor %xmm4,%xmm1,%xmm9
addl $100663296,%ebx
jc L$handle_ctr32_2
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddb %xmm2,%xmm11,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddb %xmm2,%xmm12,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp L$oop_ctr32
.p2align 4
L$oop_ctr32:
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vmovups (%r12),%xmm15
leaq 16(%r12),%r12
decl %r13d
jnz L$oop_ctr32
vmovdqu (%r12),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor 0(%rdi),%xmm3,%xmm4
vaesenc %xmm15,%xmm10,%xmm10
vpxor 16(%rdi),%xmm3,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
vpxor 32(%rdi),%xmm3,%xmm6
vaesenc %xmm15,%xmm12,%xmm12
vpxor 48(%rdi),%xmm3,%xmm8
vaesenc %xmm15,%xmm13,%xmm13
vpxor 64(%rdi),%xmm3,%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vpxor 80(%rdi),%xmm3,%xmm3
leaq 96(%rdi),%rdi
vaesenclast %xmm4,%xmm9,%xmm9
vaesenclast %xmm5,%xmm10,%xmm10
vaesenclast %xmm6,%xmm11,%xmm11
vaesenclast %xmm8,%xmm12,%xmm12
vaesenclast %xmm2,%xmm13,%xmm13
vaesenclast %xmm3,%xmm14,%xmm14
vmovups %xmm9,0(%rsi)
vmovups %xmm10,16(%rsi)
vmovups %xmm11,32(%rsi)
vmovups %xmm12,48(%rsi)
vmovups %xmm13,64(%rsi)
vmovups %xmm14,80(%rsi)
leaq 96(%rsi),%rsi
.byte 0xf3,0xc3
.p2align 5
L$handle_ctr32_2:
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpshufb %xmm0,%xmm14,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpshufb %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp L$oop_ctr32
.globl _aesni_gcm_encrypt
.private_extern _aesni_gcm_encrypt
.p2align 5
_aesni_gcm_encrypt:
xorq %r10,%r10
cmpq $288,%rdx
jb L$gcm_enc_abort
leaq (%rsp),%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
vzeroupper
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq L$bswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
leaq 128(%rcx),%rcx
vmovdqu (%r11),%xmm0
andq $-128,%rsp
movl 240-128(%rcx),%ebp
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc L$enc_no_key_aliasing
cmpq $768,%r15
jnc L$enc_no_key_aliasing
subq %r15,%rsp
L$enc_no_key_aliasing:
leaq (%rsi),%r14
leaq -192(%rsi,%rdx,1),%r15
shrq $4,%rdx
call _aesni_ctr32_6x
vpshufb %xmm0,%xmm9,%xmm8
vpshufb %xmm0,%xmm10,%xmm2
vmovdqu %xmm8,112(%rsp)
vpshufb %xmm0,%xmm11,%xmm4
vmovdqu %xmm2,96(%rsp)
vpshufb %xmm0,%xmm12,%xmm5
vmovdqu %xmm4,80(%rsp)
vpshufb %xmm0,%xmm13,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm14,%xmm7
vmovdqu %xmm6,48(%rsp)
call _aesni_ctr32_6x
vmovdqu (%r9),%xmm8
leaq 32+32(%r9),%r9
subq $12,%rdx
movq $192,%r10
vpshufb %xmm0,%xmm8,%xmm8
call _aesni_ctr32_ghash_6x
vmovdqu 32(%rsp),%xmm7
vmovdqu (%r11),%xmm0
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm7,%xmm7,%xmm1
vmovdqu 32-32(%r9),%xmm15
vmovups %xmm9,-96(%rsi)
vpshufb %xmm0,%xmm9,%xmm9
vpxor %xmm7,%xmm1,%xmm1
vmovups %xmm10,-80(%rsi)
vpshufb %xmm0,%xmm10,%xmm10
vmovups %xmm11,-64(%rsi)
vpshufb %xmm0,%xmm11,%xmm11
vmovups %xmm12,-48(%rsi)
vpshufb %xmm0,%xmm12,%xmm12
vmovups %xmm13,-32(%rsi)
vpshufb %xmm0,%xmm13,%xmm13
vmovups %xmm14,-16(%rsi)
vpshufb %xmm0,%xmm14,%xmm14
vmovdqu %xmm9,16(%rsp)
vmovdqu 48(%rsp),%xmm6
vmovdqu 16-32(%r9),%xmm0
vpunpckhqdq %xmm6,%xmm6,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5
vpxor %xmm6,%xmm2,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vmovdqu 64(%rsp),%xmm9
vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm9,%xmm9,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6
vpxor %xmm9,%xmm5,%xmm5
vpxor %xmm7,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vmovdqu 80(%rsp),%xmm1
vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm4,%xmm7,%xmm7
vpunpckhqdq %xmm1,%xmm1,%xmm4
vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpxor %xmm6,%xmm9,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 96(%rsp),%xmm2
vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm7,%xmm6,%xmm6
vpunpckhqdq %xmm2,%xmm2,%xmm7
vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1
vpxor %xmm2,%xmm7,%xmm7
vpxor %xmm9,%xmm1,%xmm1
vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm5,%xmm4,%xmm4
vpxor 112(%rsp),%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5
vmovdqu 112-32(%r9),%xmm0
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpxor %xmm6,%xmm5,%xmm5
vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7
vpxor %xmm4,%xmm7,%xmm4
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm14,%xmm14,%xmm1
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8
vpxor %xmm14,%xmm1,%xmm1
vpxor %xmm5,%xmm6,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9
vmovdqu 32-32(%r9),%xmm15
vpxor %xmm2,%xmm8,%xmm7
vpxor %xmm4,%xmm9,%xmm6
vmovdqu 16-32(%r9),%xmm0
vpxor %xmm5,%xmm7,%xmm9
vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4
vpxor %xmm9,%xmm6,%xmm6
vpunpckhqdq %xmm13,%xmm13,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14
vpxor %xmm13,%xmm2,%xmm2
vpslldq $8,%xmm6,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vpxor %xmm9,%xmm5,%xmm8
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm6,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm12,%xmm12,%xmm9
vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13
vpxor %xmm12,%xmm9,%xmm9
vpxor %xmm14,%xmm13,%xmm13
vpalignr $8,%xmm8,%xmm8,%xmm14
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm11,%xmm11,%xmm1
vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12
vpxor %xmm11,%xmm1,%xmm1
vpxor %xmm13,%xmm12,%xmm12
vxorps 16(%rsp),%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm9,%xmm9
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm10,%xmm10,%xmm2
vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11
vpxor %xmm10,%xmm2,%xmm2
vpalignr $8,%xmm8,%xmm8,%xmm14
vpxor %xmm12,%xmm11,%xmm11
vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm9,%xmm1,%xmm1
vxorps %xmm7,%xmm14,%xmm14
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4
vmovdqu 112-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm11,%xmm10,%xmm10
vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7
vpxor %xmm4,%xmm5,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6
vpxor %xmm10,%xmm7,%xmm7
vpxor %xmm2,%xmm6,%xmm6
vpxor %xmm5,%xmm7,%xmm4
vpxor %xmm4,%xmm6,%xmm6
vpslldq $8,%xmm6,%xmm1
vmovdqu 16(%r11),%xmm3
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm1,%xmm5,%xmm8
vpxor %xmm6,%xmm7,%xmm7
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm2,%xmm8,%xmm8
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm7,%xmm2,%xmm2
vpxor %xmm2,%xmm8,%xmm8
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vzeroupper
movq -48(%rax),%r15
movq -40(%rax),%r14
movq -32(%rax),%r13
movq -24(%rax),%r12
movq -16(%rax),%rbp
movq -8(%rax),%rbx
leaq (%rax),%rsp
L$gcm_enc_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.p2align 6
L$bswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
L$poly:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
L$one_msb:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
L$two_lsb:
.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
L$one_lsb:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 6
#endif

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
@ -1302,15 +1302,14 @@ L$cbc_dec_bzero:
cmpq %rax,%rbp cmpq %rax,%rbp
ja L$cbc_dec_bzero ja L$cbc_dec_bzero
leaq (%rbp),%rsp leaq 120(%rbp),%rax
movq 72(%rsp),%r15 movq -48(%rax),%r15
movq 80(%rsp),%r14 movq -40(%rax),%r14
movq 88(%rsp),%r13 movq -32(%rax),%r13
movq 96(%rsp),%r12 movq -24(%rax),%r12
movq 104(%rsp),%rbx movq -16(%rax),%rbx
movq 112(%rsp),%rax movq -8(%rax),%rbp
leaq 120(%rsp),%rsp leaq (%rax),%rsp
movq %rax,%rbp
L$cbc_dec_epilogue: L$cbc_dec_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
@ -1503,15 +1502,14 @@ L$ctr_enc_bzero:
cmpq %rax,%rbp cmpq %rax,%rbp
ja L$ctr_enc_bzero ja L$ctr_enc_bzero
leaq (%rbp),%rsp leaq 120(%rbp),%rax
movq 72(%rsp),%r15 movq -48(%rax),%r15
movq 80(%rsp),%r14 movq -40(%rax),%r14
movq 88(%rsp),%r13 movq -32(%rax),%r13
movq 96(%rsp),%r12 movq -24(%rax),%r12
movq 104(%rsp),%rbx movq -16(%rax),%rbx
movq 112(%rsp),%rax movq -8(%rax),%rbp
leaq 120(%rsp),%rsp leaq (%rax),%rsp
movq %rax,%rbp
L$ctr_enc_epilogue: L$ctr_enc_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
@ -1955,15 +1953,14 @@ L$xts_enc_bzero:
cmpq %rax,%rbp cmpq %rax,%rbp
ja L$xts_enc_bzero ja L$xts_enc_bzero
leaq (%rbp),%rsp leaq 120(%rbp),%rax
movq 72(%rsp),%r15 movq -48(%rax),%r15
movq 80(%rsp),%r14 movq -40(%rax),%r14
movq 88(%rsp),%r13 movq -32(%rax),%r13
movq 96(%rsp),%r12 movq -24(%rax),%r12
movq 104(%rsp),%rbx movq -16(%rax),%rbx
movq 112(%rsp),%rax movq -8(%rax),%rbp
leaq 120(%rsp),%rsp leaq (%rax),%rsp
movq %rax,%rbp
L$xts_enc_epilogue: L$xts_enc_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
@ -2434,15 +2431,14 @@ L$xts_dec_bzero:
cmpq %rax,%rbp cmpq %rax,%rbp
ja L$xts_dec_bzero ja L$xts_dec_bzero
leaq (%rbp),%rsp leaq 120(%rbp),%rax
movq 72(%rsp),%r15 movq -48(%rax),%r15
movq 80(%rsp),%r14 movq -40(%rax),%r14
movq 88(%rsp),%r13 movq -32(%rax),%r13
movq 96(%rsp),%r12 movq -24(%rax),%r12
movq 104(%rsp),%rbx movq -16(%rax),%rbx
movq 112(%rsp),%rax movq -8(%rax),%rbp
leaq 120(%rsp),%rsp leaq (%rax),%rsp
movq %rax,%rbp
L$xts_dec_epilogue: L$xts_dec_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
@ -10,6 +10,10 @@ _gcm_gmult_4bit:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13
pushq %r14
pushq %r15
subq $280,%rsp
L$gmult_prologue: L$gmult_prologue:
movzbq 15(%rdi),%r8 movzbq 15(%rdi),%r8
@ -86,8 +90,9 @@ L$break1:
movq %r8,8(%rdi) movq %r8,8(%rdi)
movq %r9,(%rdi) movq %r9,(%rdi)
movq 16(%rsp),%rbx leaq 280+48(%rsp),%rsi
leaq 24(%rsp),%rsp movq -8(%rsi),%rbx
leaq (%rsi),%rsp
L$gmult_epilogue: L$gmult_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
@ -647,14 +652,14 @@ L$outer_loop:
movq %r8,8(%rdi) movq %r8,8(%rdi)
movq %r9,(%rdi) movq %r9,(%rdi)
leaq 280(%rsp),%rsi leaq 280+48(%rsp),%rsi
movq 0(%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq 0(%rsi),%rsp
L$ghash_epilogue: L$ghash_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
@ -884,7 +889,8 @@ L$_ghash_clmul:
jz L$odd_tail jz L$odd_tail
movdqu 16(%rsi),%xmm6 movdqu 16(%rsi),%xmm6
movl _OPENSSL_ia32cap_P+4(%rip),%eax leaq _OPENSSL_ia32cap_P(%rip),%rax
movl 4(%rax),%eax
cmpq $0x30,%rcx cmpq $0x30,%rcx
jb L$skip4x jb L$skip4x
@ -1256,7 +1262,108 @@ L$done:
.p2align 5 .p2align 5
_gcm_init_avx: _gcm_init_avx:
jmp L$_init_clmul vzeroupper
vmovdqu (%rsi),%xmm2
vpshufd $78,%xmm2,%xmm2
vpshufd $255,%xmm2,%xmm4
vpsrlq $63,%xmm2,%xmm3
vpsllq $1,%xmm2,%xmm2
vpxor %xmm5,%xmm5,%xmm5
vpcmpgtd %xmm4,%xmm5,%xmm5
vpslldq $8,%xmm3,%xmm3
vpor %xmm3,%xmm2,%xmm2
vpand L$0x1c2_polynomial(%rip),%xmm5,%xmm5
vpxor %xmm5,%xmm2,%xmm2
vpunpckhqdq %xmm2,%xmm2,%xmm6
vmovdqa %xmm2,%xmm0
vpxor %xmm2,%xmm6,%xmm6
movq $4,%r10
jmp L$init_start_avx
.p2align 5
L$init_loop_avx:
vpalignr $8,%xmm3,%xmm4,%xmm5
vmovdqu %xmm5,-16(%rdi)
vpunpckhqdq %xmm0,%xmm0,%xmm3
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
vpxor %xmm0,%xmm1,%xmm4
vpxor %xmm4,%xmm3,%xmm3
vpslldq $8,%xmm3,%xmm4
vpsrldq $8,%xmm3,%xmm3
vpxor %xmm4,%xmm0,%xmm0
vpxor %xmm3,%xmm1,%xmm1
vpsllq $57,%xmm0,%xmm3
vpsllq $62,%xmm0,%xmm4
vpxor %xmm3,%xmm4,%xmm4
vpsllq $63,%xmm0,%xmm3
vpxor %xmm3,%xmm4,%xmm4
vpslldq $8,%xmm4,%xmm3
vpsrldq $8,%xmm4,%xmm4
vpxor %xmm3,%xmm0,%xmm0
vpxor %xmm4,%xmm1,%xmm1
vpsrlq $1,%xmm0,%xmm4
vpxor %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $5,%xmm4,%xmm4
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $1,%xmm0,%xmm0
vpxor %xmm1,%xmm0,%xmm0
L$init_start_avx:
vmovdqa %xmm0,%xmm5
vpunpckhqdq %xmm0,%xmm0,%xmm3
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
vpxor %xmm0,%xmm1,%xmm4
vpxor %xmm4,%xmm3,%xmm3
vpslldq $8,%xmm3,%xmm4
vpsrldq $8,%xmm3,%xmm3
vpxor %xmm4,%xmm0,%xmm0
vpxor %xmm3,%xmm1,%xmm1
vpsllq $57,%xmm0,%xmm3
vpsllq $62,%xmm0,%xmm4
vpxor %xmm3,%xmm4,%xmm4
vpsllq $63,%xmm0,%xmm3
vpxor %xmm3,%xmm4,%xmm4
vpslldq $8,%xmm4,%xmm3
vpsrldq $8,%xmm4,%xmm4
vpxor %xmm3,%xmm0,%xmm0
vpxor %xmm4,%xmm1,%xmm1
vpsrlq $1,%xmm0,%xmm4
vpxor %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $5,%xmm4,%xmm4
vpxor %xmm4,%xmm0,%xmm0
vpsrlq $1,%xmm0,%xmm0
vpxor %xmm1,%xmm0,%xmm0
vpshufd $78,%xmm5,%xmm3
vpshufd $78,%xmm0,%xmm4
vpxor %xmm5,%xmm3,%xmm3
vmovdqu %xmm5,0(%rdi)
vpxor %xmm0,%xmm4,%xmm4
vmovdqu %xmm0,16(%rdi)
leaq 48(%rdi),%rdi
subq $1,%r10
jnz L$init_loop_avx
vpalignr $8,%xmm4,%xmm3,%xmm5
vmovdqu %xmm5,-16(%rdi)
vzeroupper
.byte 0xf3,0xc3
.globl _gcm_gmult_avx .globl _gcm_gmult_avx
.private_extern _gcm_gmult_avx .private_extern _gcm_gmult_avx
@ -1270,7 +1377,377 @@ _gcm_gmult_avx:
.p2align 5 .p2align 5
_gcm_ghash_avx: _gcm_ghash_avx:
jmp L$_ghash_clmul vzeroupper
vmovdqu (%rdi),%xmm10
leaq L$0x1c2_polynomial(%rip),%r10
leaq 64(%rsi),%rsi
vmovdqu L$bswap_mask(%rip),%xmm13
vpshufb %xmm13,%xmm10,%xmm10
cmpq $0x80,%rcx
jb L$short_avx
subq $0x80,%rcx
vmovdqu 112(%rdx),%xmm14
vmovdqu 0-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm14
vmovdqu 32-64(%rsi),%xmm7
vpunpckhqdq %xmm14,%xmm14,%xmm9
vmovdqu 96(%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm14,%xmm9,%xmm9
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 16-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vmovdqu 80(%rdx),%xmm14
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vpshufb %xmm13,%xmm14,%xmm14
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 48-64(%rsi),%xmm6
vpxor %xmm14,%xmm9,%xmm9
vmovdqu 64(%rdx),%xmm15
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 80-64(%rsi),%xmm7
vpshufb %xmm13,%xmm15,%xmm15
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm1,%xmm4,%xmm4
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 64-64(%rsi),%xmm6
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vmovdqu 48(%rdx),%xmm14
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpxor %xmm4,%xmm1,%xmm1
vpshufb %xmm13,%xmm14,%xmm14
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 96-64(%rsi),%xmm6
vpxor %xmm5,%xmm2,%xmm2
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 128-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vmovdqu 32(%rdx),%xmm15
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm1,%xmm4,%xmm4
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 112-64(%rsi),%xmm6
vpxor %xmm2,%xmm5,%xmm5
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vmovdqu 16(%rdx),%xmm14
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpxor %xmm4,%xmm1,%xmm1
vpshufb %xmm13,%xmm14,%xmm14
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 144-64(%rsi),%xmm6
vpxor %xmm5,%xmm2,%xmm2
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 176-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vmovdqu (%rdx),%xmm15
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm1,%xmm4,%xmm4
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 160-64(%rsi),%xmm6
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
leaq 128(%rdx),%rdx
cmpq $0x80,%rcx
jb L$tail_avx
vpxor %xmm10,%xmm15,%xmm15
subq $0x80,%rcx
jmp L$oop8x_avx
.p2align 5
L$oop8x_avx:
vpunpckhqdq %xmm15,%xmm15,%xmm8
vmovdqu 112(%rdx),%xmm14
vpxor %xmm0,%xmm3,%xmm3
vpxor %xmm15,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10
vpshufb %xmm13,%xmm14,%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11
vmovdqu 0-64(%rsi),%xmm6
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12
vmovdqu 32-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vmovdqu 96(%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpxor %xmm3,%xmm10,%xmm10
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vxorps %xmm4,%xmm11,%xmm11
vmovdqu 16-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm5,%xmm12,%xmm12
vxorps %xmm15,%xmm8,%xmm8
vmovdqu 80(%rdx),%xmm14
vpxor %xmm10,%xmm12,%xmm12
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpxor %xmm11,%xmm12,%xmm12
vpslldq $8,%xmm12,%xmm9
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vpsrldq $8,%xmm12,%xmm12
vpxor %xmm9,%xmm10,%xmm10
vmovdqu 48-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm14
vxorps %xmm12,%xmm11,%xmm11
vpxor %xmm1,%xmm4,%xmm4
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 80-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 64(%rdx),%xmm15
vpalignr $8,%xmm10,%xmm10,%xmm12
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpshufb %xmm13,%xmm15,%xmm15
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 64-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm4,%xmm1,%xmm1
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vxorps %xmm15,%xmm8,%xmm8
vpxor %xmm5,%xmm2,%xmm2
vmovdqu 48(%rdx),%xmm14
vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpshufb %xmm13,%xmm14,%xmm14
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 96-64(%rsi),%xmm6
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 128-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 32(%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpshufb %xmm13,%xmm15,%xmm15
vpxor %xmm3,%xmm0,%xmm0
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 112-64(%rsi),%xmm6
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm4,%xmm1,%xmm1
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
vpxor %xmm15,%xmm8,%xmm8
vpxor %xmm5,%xmm2,%xmm2
vxorps %xmm12,%xmm10,%xmm10
vmovdqu 16(%rdx),%xmm14
vpalignr $8,%xmm10,%xmm10,%xmm12
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
vpshufb %xmm13,%xmm14,%xmm14
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
vmovdqu 144-64(%rsi),%xmm6
vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
vxorps %xmm11,%xmm12,%xmm12
vpunpckhqdq %xmm14,%xmm14,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
vmovdqu 176-64(%rsi),%xmm7
vpxor %xmm14,%xmm9,%xmm9
vpxor %xmm2,%xmm5,%xmm5
vmovdqu (%rdx),%xmm15
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
vpshufb %xmm13,%xmm15,%xmm15
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
vmovdqu 160-64(%rsi),%xmm6
vpxor %xmm12,%xmm15,%xmm15
vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
vpxor %xmm10,%xmm15,%xmm15
leaq 128(%rdx),%rdx
subq $0x80,%rcx
jnc L$oop8x_avx
addq $0x80,%rcx
jmp L$tail_no_xor_avx
.p2align 5
L$short_avx:
vmovdqu -16(%rdx,%rcx,1),%xmm14
leaq (%rdx,%rcx,1),%rdx
vmovdqu 0-64(%rsi),%xmm6
vmovdqu 32-64(%rsi),%xmm7
vpshufb %xmm13,%xmm14,%xmm15
vmovdqa %xmm0,%xmm3
vmovdqa %xmm1,%xmm4
vmovdqa %xmm2,%xmm5
subq $0x10,%rcx
jz L$tail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -32(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 16-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vpsrldq $8,%xmm7,%xmm7
subq $0x10,%rcx
jz L$tail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -48(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 48-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovdqu 80-64(%rsi),%xmm7
subq $0x10,%rcx
jz L$tail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -64(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 64-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vpsrldq $8,%xmm7,%xmm7
subq $0x10,%rcx
jz L$tail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -80(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 96-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovdqu 128-64(%rsi),%xmm7
subq $0x10,%rcx
jz L$tail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -96(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 112-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vpsrldq $8,%xmm7,%xmm7
subq $0x10,%rcx
jz L$tail_avx
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vmovdqu -112(%rdx),%xmm14
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vmovdqu 144-64(%rsi),%xmm6
vpshufb %xmm13,%xmm14,%xmm15
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovq 184-64(%rsi),%xmm7
subq $0x10,%rcx
jmp L$tail_avx
.p2align 5
L$tail_avx:
vpxor %xmm10,%xmm15,%xmm15
L$tail_no_xor_avx:
vpunpckhqdq %xmm15,%xmm15,%xmm8
vpxor %xmm0,%xmm3,%xmm3
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
vpxor %xmm15,%xmm8,%xmm8
vpxor %xmm1,%xmm4,%xmm4
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
vpxor %xmm2,%xmm5,%xmm5
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
vmovdqu (%r10),%xmm12
vpxor %xmm0,%xmm3,%xmm10
vpxor %xmm1,%xmm4,%xmm11
vpxor %xmm2,%xmm5,%xmm5
vpxor %xmm10,%xmm5,%xmm5
vpxor %xmm11,%xmm5,%xmm5
vpslldq $8,%xmm5,%xmm9
vpsrldq $8,%xmm5,%xmm5
vpxor %xmm9,%xmm10,%xmm10
vpxor %xmm5,%xmm11,%xmm11
vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
vpalignr $8,%xmm10,%xmm10,%xmm10
vpxor %xmm9,%xmm10,%xmm10
vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
vpalignr $8,%xmm10,%xmm10,%xmm10
vpxor %xmm11,%xmm10,%xmm10
vpxor %xmm9,%xmm10,%xmm10
cmpq $0,%rcx
jne L$short_avx
vpshufb %xmm13,%xmm10,%xmm10
vmovdqu %xmm10,(%rdi)
vzeroupper
.byte 0xf3,0xc3
.p2align 6 .p2align 6
L$bswap_mask: L$bswap_mask:

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
.p2align 4 .p2align 4

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
@ -17,47 +17,6 @@ L$ONE_mont:
.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
.p2align 6
ecp_nistz256_mul_by_2:
pushq %r12
pushq %r13
movq 0(%rsi),%r8
movq 8(%rsi),%r9
addq %r8,%r8
movq 16(%rsi),%r10
adcq %r9,%r9
movq 24(%rsi),%r11
leaq L$poly(%rip),%rsi
movq %r8,%rax
adcq %r10,%r10
adcq %r11,%r11
movq %r9,%rdx
sbbq %r13,%r13
subq 0(%rsi),%r8
movq %r10,%rcx
sbbq 8(%rsi),%r9
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
testq %r13,%r13
cmovzq %rax,%r8
cmovzq %rdx,%r9
movq %r8,0(%rdi)
cmovzq %rcx,%r10
movq %r9,8(%rdi)
cmovzq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
popq %r13
popq %r12
.byte 0xf3,0xc3
.globl _ecp_nistz256_neg .globl _ecp_nistz256_neg
.private_extern _ecp_nistz256_neg .private_extern _ecp_nistz256_neg
@ -552,106 +511,15 @@ __ecp_nistz256_sqr_montq:
.globl _ecp_nistz256_from_mont
.private_extern _ecp_nistz256_from_mont
.p2align 5
_ecp_nistz256_from_mont:
pushq %r12
pushq %r13
movq 0(%rsi),%rax
movq L$poly+24(%rip),%r13
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
movq %rax,%r8
movq L$poly+8(%rip),%r12
movq %rax,%rcx
shlq $32,%r8
mulq %r13
shrq $32,%rcx
addq %r8,%r9
adcq %rcx,%r10
adcq %rax,%r11
movq %r9,%rax
adcq $0,%rdx
movq %r9,%rcx
shlq $32,%r9
movq %rdx,%r8
mulq %r13
shrq $32,%rcx
addq %r9,%r10
adcq %rcx,%r11
adcq %rax,%r8
movq %r10,%rax
adcq $0,%rdx
movq %r10,%rcx
shlq $32,%r10
movq %rdx,%r9
mulq %r13
shrq $32,%rcx
addq %r10,%r11
adcq %rcx,%r8
adcq %rax,%r9
movq %r11,%rax
adcq $0,%rdx
movq %r11,%rcx
shlq $32,%r11
movq %rdx,%r10
mulq %r13
shrq $32,%rcx
addq %r11,%r8
adcq %rcx,%r9
movq %r8,%rcx
adcq %rax,%r10
movq %r9,%rsi
adcq $0,%rdx
subq $-1,%r8
movq %r10,%rax
sbbq %r12,%r9
sbbq $0,%r10
movq %rdx,%r11
sbbq %r13,%rdx
sbbq %r13,%r13
cmovnzq %rcx,%r8
cmovnzq %rsi,%r9
movq %r8,0(%rdi)
cmovnzq %rax,%r10
movq %r9,8(%rdi)
cmovzq %rdx,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
popq %r13
popq %r12
.byte 0xf3,0xc3
.globl _ecp_nistz256_select_w5 .globl _ecp_nistz256_select_w5
.private_extern _ecp_nistz256_select_w5 .private_extern _ecp_nistz256_select_w5
.p2align 5 .p2align 5
_ecp_nistz256_select_w5: _ecp_nistz256_select_w5:
leaq _OPENSSL_ia32cap_P(%rip),%rax
movq 8(%rax),%rax
testl $32,%eax
jnz L$avx2_select_w5
movdqa L$One(%rip),%xmm0 movdqa L$One(%rip),%xmm0
movd %edx,%xmm1 movd %edx,%xmm1
@ -712,6 +580,10 @@ L$select_loop_sse_w5:
.p2align 5 .p2align 5
_ecp_nistz256_select_w7: _ecp_nistz256_select_w7:
leaq _OPENSSL_ia32cap_P(%rip),%rax
movq 8(%rax),%rax
testl $32,%eax
jnz L$avx2_select_w7
movdqa L$One(%rip),%xmm8 movdqa L$One(%rip),%xmm8
movd %edx,%xmm1 movd %edx,%xmm1
@ -753,24 +625,155 @@ L$select_loop_sse_w7:
movdqu %xmm5,48(%rdi) movdqu %xmm5,48(%rdi)
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.p2align 5
ecp_nistz256_avx2_select_w5:
L$avx2_select_w5:
vzeroupper
vmovdqa L$Two(%rip),%ymm0
vpxor %ymm2,%ymm2,%ymm2
vpxor %ymm3,%ymm3,%ymm3
vpxor %ymm4,%ymm4,%ymm4
vmovdqa L$One(%rip),%ymm5
vmovdqa L$Two(%rip),%ymm10
vmovd %edx,%xmm1
vpermd %ymm1,%ymm2,%ymm1
movq $8,%rax
L$select_loop_avx2_w5:
vmovdqa 0(%rsi),%ymm6
vmovdqa 32(%rsi),%ymm7
vmovdqa 64(%rsi),%ymm8
vmovdqa 96(%rsi),%ymm11
vmovdqa 128(%rsi),%ymm12
vmovdqa 160(%rsi),%ymm13
vpcmpeqd %ymm1,%ymm5,%ymm9
vpcmpeqd %ymm1,%ymm10,%ymm14
vpaddd %ymm0,%ymm5,%ymm5
vpaddd %ymm0,%ymm10,%ymm10
leaq 192(%rsi),%rsi
vpand %ymm9,%ymm6,%ymm6
vpand %ymm9,%ymm7,%ymm7
vpand %ymm9,%ymm8,%ymm8
vpand %ymm14,%ymm11,%ymm11
vpand %ymm14,%ymm12,%ymm12
vpand %ymm14,%ymm13,%ymm13
vpxor %ymm6,%ymm2,%ymm2
vpxor %ymm7,%ymm3,%ymm3
vpxor %ymm8,%ymm4,%ymm4
vpxor %ymm11,%ymm2,%ymm2
vpxor %ymm12,%ymm3,%ymm3
vpxor %ymm13,%ymm4,%ymm4
decq %rax
jnz L$select_loop_avx2_w5
vmovdqu %ymm2,0(%rdi)
vmovdqu %ymm3,32(%rdi)
vmovdqu %ymm4,64(%rdi)
vzeroupper
.byte 0xf3,0xc3
.globl _ecp_nistz256_avx2_select_w7 .globl _ecp_nistz256_avx2_select_w7
.private_extern _ecp_nistz256_avx2_select_w7 .private_extern _ecp_nistz256_avx2_select_w7
.p2align 5 .p2align 5
_ecp_nistz256_avx2_select_w7: _ecp_nistz256_avx2_select_w7:
.byte 0x0f,0x0b L$avx2_select_w7:
vzeroupper
vmovdqa L$Three(%rip),%ymm0
vpxor %ymm2,%ymm2,%ymm2
vpxor %ymm3,%ymm3,%ymm3
vmovdqa L$One(%rip),%ymm4
vmovdqa L$Two(%rip),%ymm8
vmovdqa L$Three(%rip),%ymm12
vmovd %edx,%xmm1
vpermd %ymm1,%ymm2,%ymm1
movq $21,%rax
L$select_loop_avx2_w7:
vmovdqa 0(%rsi),%ymm5
vmovdqa 32(%rsi),%ymm6
vmovdqa 64(%rsi),%ymm9
vmovdqa 96(%rsi),%ymm10
vmovdqa 128(%rsi),%ymm13
vmovdqa 160(%rsi),%ymm14
vpcmpeqd %ymm1,%ymm4,%ymm7
vpcmpeqd %ymm1,%ymm8,%ymm11
vpcmpeqd %ymm1,%ymm12,%ymm15
vpaddd %ymm0,%ymm4,%ymm4
vpaddd %ymm0,%ymm8,%ymm8
vpaddd %ymm0,%ymm12,%ymm12
leaq 192(%rsi),%rsi
vpand %ymm7,%ymm5,%ymm5
vpand %ymm7,%ymm6,%ymm6
vpand %ymm11,%ymm9,%ymm9
vpand %ymm11,%ymm10,%ymm10
vpand %ymm15,%ymm13,%ymm13
vpand %ymm15,%ymm14,%ymm14
vpxor %ymm5,%ymm2,%ymm2
vpxor %ymm6,%ymm3,%ymm3
vpxor %ymm9,%ymm2,%ymm2
vpxor %ymm10,%ymm3,%ymm3
vpxor %ymm13,%ymm2,%ymm2
vpxor %ymm14,%ymm3,%ymm3
decq %rax
jnz L$select_loop_avx2_w7
vmovdqa 0(%rsi),%ymm5
vmovdqa 32(%rsi),%ymm6
vpcmpeqd %ymm1,%ymm4,%ymm7
vpand %ymm7,%ymm5,%ymm5
vpand %ymm7,%ymm6,%ymm6
vpxor %ymm5,%ymm2,%ymm2
vpxor %ymm6,%ymm3,%ymm3
vmovdqu %ymm2,0(%rdi)
vmovdqu %ymm3,32(%rdi)
vzeroupper
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.p2align 5 .p2align 5
__ecp_nistz256_add_toq: __ecp_nistz256_add_toq:
xorq %r11,%r11
addq 0(%rbx),%r12 addq 0(%rbx),%r12
adcq 8(%rbx),%r13 adcq 8(%rbx),%r13
movq %r12,%rax movq %r12,%rax
adcq 16(%rbx),%r8 adcq 16(%rbx),%r8
adcq 24(%rbx),%r9 adcq 24(%rbx),%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -778,14 +781,14 @@ __ecp_nistz256_add_toq:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovzq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -853,13 +856,14 @@ __ecp_nistz256_subq:
.p2align 5 .p2align 5
__ecp_nistz256_mul_by_2q: __ecp_nistz256_mul_by_2q:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
adcq %r13,%r13 adcq %r13,%r13
movq %r12,%rax movq %r12,%rax
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -867,14 +871,14 @@ __ecp_nistz256_mul_by_2q:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq %r12,0(%rdi) movq %r12,0(%rdi)
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq %r13,8(%rdi) movq %r13,8(%rdi)
cmovzq %r10,%r9 cmovcq %r10,%r9
movq %r8,16(%rdi) movq %r8,16(%rdi)
movq %r9,24(%rdi) movq %r9,24(%rdi)
@ -1106,16 +1110,14 @@ _ecp_nistz256_point_add:
movq %rdx,%rsi movq %rdx,%rsi
movdqa %xmm0,384(%rsp) movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp) movdqa %xmm1,384+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,416(%rsp) movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp) movdqa %xmm3,416+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,448(%rsp) movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp) movdqa %xmm5,448+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0 movdqu 0(%rsi),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1 movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2 movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -1127,14 +1129,14 @@ _ecp_nistz256_point_add:
movdqa %xmm0,480(%rsp) movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4 pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp) movdqa %xmm1,480+16(%rsp)
por %xmm0,%xmm1 movdqu 64(%rsi),%xmm0
.byte 102,72,15,110,199 movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp) movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp) movdqa %xmm3,512+16(%rsp)
por %xmm2,%xmm3
por %xmm4,%xmm5 por %xmm4,%xmm5
pxor %xmm4,%xmm4 pxor %xmm4,%xmm4
por %xmm1,%xmm3 por %xmm0,%xmm1
.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp) movq %rax,544+0(%rsp)
@ -1145,8 +1147,8 @@ _ecp_nistz256_point_add:
call __ecp_nistz256_sqr_montq call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5 pcmpeqd %xmm4,%xmm5
pshufd $0xb1,%xmm3,%xmm4 pshufd $0xb1,%xmm1,%xmm4
por %xmm3,%xmm4 por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5 pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3 pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4 por %xmm3,%xmm4
@ -1329,6 +1331,7 @@ L$add_proceedq:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 96(%rsp),%rsi leaq 96(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -1336,7 +1339,7 @@ L$add_proceedq:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1344,15 +1347,15 @@ L$add_proceedq:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subq call __ecp_nistz256_subq
@ -1507,16 +1510,14 @@ _ecp_nistz256_point_add_affine:
movq 64+24(%rsi),%r8 movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp) movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp) movdqa %xmm1,320+16(%rsp)
por %xmm0,%xmm1
movdqa %xmm2,352(%rsp) movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp) movdqa %xmm3,352+16(%rsp)
por %xmm2,%xmm3
movdqa %xmm4,384(%rsp) movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp) movdqa %xmm5,384+16(%rsp)
por %xmm1,%xmm3 por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0 movdqu 0(%rbx),%xmm0
pshufd $0xb1,%xmm3,%xmm5 pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1 movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2 movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5 por %xmm3,%xmm5
@ -1634,6 +1635,7 @@ _ecp_nistz256_point_add_affine:
xorq %r11,%r11
addq %r12,%r12 addq %r12,%r12
leaq 192(%rsp),%rsi leaq 192(%rsp),%rsi
adcq %r13,%r13 adcq %r13,%r13
@ -1641,7 +1643,7 @@ _ecp_nistz256_point_add_affine:
adcq %r8,%r8 adcq %r8,%r8
adcq %r9,%r9 adcq %r9,%r9
movq %r13,%rbp movq %r13,%rbp
sbbq %r11,%r11 adcq $0,%r11
subq $-1,%r12 subq $-1,%r12
movq %r8,%rcx movq %r8,%rcx
@ -1649,15 +1651,15 @@ _ecp_nistz256_point_add_affine:
sbbq $0,%r8 sbbq $0,%r8
movq %r9,%r10 movq %r9,%r10
sbbq %r15,%r9 sbbq %r15,%r9
testq %r11,%r11 sbbq $0,%r11
cmovzq %rax,%r12 cmovcq %rax,%r12
movq 0(%rsi),%rax movq 0(%rsi),%rax
cmovzq %rbp,%r13 cmovcq %rbp,%r13
movq 8(%rsi),%rbp movq 8(%rsi),%rbp
cmovzq %rcx,%r8 cmovcq %rcx,%r8
movq 16(%rsi),%rcx movq 16(%rsi),%rcx
cmovzq %r10,%r9 cmovcq %r10,%r9
movq 24(%rsi),%r10 movq 24(%rsi),%r10
call __ecp_nistz256_subq call __ecp_nistz256_subq

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
@ -7,9 +7,10 @@
.p2align 4 .p2align 4
_sha1_block_data_order: _sha1_block_data_order:
movl _OPENSSL_ia32cap_P+0(%rip),%r9d leaq _OPENSSL_ia32cap_P(%rip),%r10
movl _OPENSSL_ia32cap_P+4(%rip),%r8d movl 0(%r10),%r9d
movl _OPENSSL_ia32cap_P+8(%rip),%r10d movl 4(%r10),%r8d
movl 8(%r10),%r10d
testl $512,%r8d testl $512,%r8d
jz L$ialu jz L$ialu
andl $268435456,%r8d andl $268435456,%r8d
@ -1240,14 +1241,13 @@ L$epilogue:
.p2align 4 .p2align 4
sha1_block_data_order_ssse3: sha1_block_data_order_ssse3:
_ssse3_shortcut: _ssse3_shortcut:
movq %rsp,%rax movq %rsp,%r11
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
leaq -64(%rsp),%rsp leaq -64(%rsp),%rsp
movq %rax,%r14
andq $-64,%rsp andq $-64,%rsp
movq %rdi,%r8 movq %rdi,%r8
movq %rsi,%r9 movq %rsi,%r9
@ -1255,7 +1255,7 @@ _ssse3_shortcut:
shlq $6,%r10 shlq $6,%r10
addq %r9,%r10 addq %r9,%r10
leaq K_XX_XX+64(%rip),%r11 leaq K_XX_XX+64(%rip),%r14
movl 0(%r8),%eax movl 0(%r8),%eax
movl 4(%r8),%ebx movl 4(%r8),%ebx
@ -1267,8 +1267,8 @@ _ssse3_shortcut:
xorl %edx,%edi xorl %edx,%edi
andl %edi,%esi andl %edi,%esi
movdqa 64(%r11),%xmm6 movdqa 64(%r14),%xmm6
movdqa -64(%r11),%xmm9 movdqa -64(%r14),%xmm9
movdqu 0(%r9),%xmm0 movdqu 0(%r9),%xmm0
movdqu 16(%r9),%xmm1 movdqu 16(%r9),%xmm1
movdqu 32(%r9),%xmm2 movdqu 32(%r9),%xmm2
@ -1344,7 +1344,7 @@ L$oop_ssse3:
pslld $2,%xmm9 pslld $2,%xmm9
pxor %xmm10,%xmm4 pxor %xmm10,%xmm4
xorl %ebp,%edx xorl %ebp,%edx
movdqa -64(%r11),%xmm10 movdqa -64(%r14),%xmm10
roll $5,%ecx roll $5,%ecx
addl %edi,%ebx addl %edi,%ebx
andl %edx,%esi andl %edx,%esi
@ -1405,7 +1405,7 @@ L$oop_ssse3:
pslld $2,%xmm10 pslld $2,%xmm10
pxor %xmm8,%xmm5 pxor %xmm8,%xmm5
xorl %eax,%ebp xorl %eax,%ebp
movdqa -32(%r11),%xmm8 movdqa -32(%r14),%xmm8
roll $5,%edx roll $5,%edx
addl %edi,%ecx addl %edi,%ecx
andl %ebp,%esi andl %ebp,%esi
@ -1466,7 +1466,7 @@ L$oop_ssse3:
pslld $2,%xmm8 pslld $2,%xmm8
pxor %xmm9,%xmm6 pxor %xmm9,%xmm6
xorl %ebx,%eax xorl %ebx,%eax
movdqa -32(%r11),%xmm9 movdqa -32(%r14),%xmm9
roll $5,%ebp roll $5,%ebp
addl %edi,%edx addl %edi,%edx
andl %eax,%esi andl %eax,%esi
@ -1527,7 +1527,7 @@ L$oop_ssse3:
pslld $2,%xmm9 pslld $2,%xmm9
pxor %xmm10,%xmm7 pxor %xmm10,%xmm7
xorl %ecx,%ebx xorl %ecx,%ebx
movdqa -32(%r11),%xmm10 movdqa -32(%r14),%xmm10
roll $5,%eax roll $5,%eax
addl %edi,%ebp addl %edi,%ebp
andl %ebx,%esi andl %ebx,%esi
@ -1638,7 +1638,7 @@ L$oop_ssse3:
pxor %xmm3,%xmm2 pxor %xmm3,%xmm2
addl %esi,%eax addl %esi,%eax
xorl %edx,%edi xorl %edx,%edi
movdqa 0(%r11),%xmm10 movdqa 0(%r14),%xmm10
rorl $7,%ecx rorl $7,%ecx
paddd %xmm1,%xmm9 paddd %xmm1,%xmm9
addl %ebx,%eax addl %ebx,%eax
@ -1873,7 +1873,7 @@ L$oop_ssse3:
pxor %xmm0,%xmm7 pxor %xmm0,%xmm7
roll $5,%ebx roll $5,%ebx
addl %esi,%eax addl %esi,%eax
movdqa 32(%r11),%xmm9 movdqa 32(%r14),%xmm9
xorl %ecx,%edi xorl %ecx,%edi
paddd %xmm6,%xmm8 paddd %xmm6,%xmm8
xorl %edx,%ecx xorl %edx,%ecx
@ -2164,8 +2164,8 @@ L$oop_ssse3:
addl %edx,%ecx addl %edx,%ecx
cmpq %r10,%r9 cmpq %r10,%r9
je L$done_ssse3 je L$done_ssse3
movdqa 64(%r11),%xmm6 movdqa 64(%r14),%xmm6
movdqa -64(%r11),%xmm9 movdqa -64(%r14),%xmm9
movdqu 0(%r9),%xmm0 movdqu 0(%r9),%xmm0
movdqu 16(%r9),%xmm1 movdqu 16(%r9),%xmm1
movdqu 32(%r9),%xmm2 movdqu 32(%r9),%xmm2
@ -2402,13 +2402,12 @@ L$done_ssse3:
movl %ecx,8(%r8) movl %ecx,8(%r8)
movl %edx,12(%r8) movl %edx,12(%r8)
movl %ebp,16(%r8) movl %ebp,16(%r8)
leaq (%r14),%rsi movq -40(%r11),%r14
movq -40(%rsi),%r14 movq -32(%r11),%r13
movq -32(%rsi),%r13 movq -24(%r11),%r12
movq -24(%rsi),%r12 movq -16(%r11),%rbp
movq -16(%rsi),%rbp movq -8(%r11),%rbx
movq -8(%rsi),%rbx leaq (%r11),%rsp
leaq (%rsi),%rsp
L$epilogue_ssse3: L$epilogue_ssse3:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
@ -2416,7 +2415,7 @@ L$epilogue_ssse3:
.p2align 4 .p2align 4
sha1_block_data_order_avx: sha1_block_data_order_avx:
_avx_shortcut: _avx_shortcut:
movq %rsp,%rax movq %rsp,%r11
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
@ -2424,7 +2423,6 @@ _avx_shortcut:
pushq %r14 pushq %r14
leaq -64(%rsp),%rsp leaq -64(%rsp),%rsp
vzeroupper vzeroupper
movq %rax,%r14
andq $-64,%rsp andq $-64,%rsp
movq %rdi,%r8 movq %rdi,%r8
movq %rsi,%r9 movq %rsi,%r9
@ -2432,7 +2430,7 @@ _avx_shortcut:
shlq $6,%r10 shlq $6,%r10
addq %r9,%r10 addq %r9,%r10
leaq K_XX_XX+64(%rip),%r11 leaq K_XX_XX+64(%rip),%r14
movl 0(%r8),%eax movl 0(%r8),%eax
movl 4(%r8),%ebx movl 4(%r8),%ebx
@ -2444,8 +2442,8 @@ _avx_shortcut:
xorl %edx,%edi xorl %edx,%edi
andl %edi,%esi andl %edi,%esi
vmovdqa 64(%r11),%xmm6 vmovdqa 64(%r14),%xmm6
vmovdqa -64(%r11),%xmm11 vmovdqa -64(%r14),%xmm11
vmovdqu 0(%r9),%xmm0 vmovdqu 0(%r9),%xmm0
vmovdqu 16(%r9),%xmm1 vmovdqu 16(%r9),%xmm1
vmovdqu 32(%r9),%xmm2 vmovdqu 32(%r9),%xmm2
@ -2570,7 +2568,7 @@ L$oop_avx:
vpxor %xmm10,%xmm5,%xmm5 vpxor %xmm10,%xmm5,%xmm5
xorl %eax,%ebp xorl %eax,%ebp
shldl $5,%edx,%edx shldl $5,%edx,%edx
vmovdqa -32(%r11),%xmm11 vmovdqa -32(%r14),%xmm11
addl %edi,%ecx addl %edi,%ecx
andl %ebp,%esi andl %ebp,%esi
xorl %eax,%ebp xorl %eax,%ebp
@ -2783,7 +2781,7 @@ L$oop_avx:
addl %esi,%eax addl %esi,%eax
xorl %edx,%edi xorl %edx,%edi
vpaddd %xmm1,%xmm11,%xmm9 vpaddd %xmm1,%xmm11,%xmm9
vmovdqa 0(%r11),%xmm11 vmovdqa 0(%r14),%xmm11
shrdl $7,%ecx,%ecx shrdl $7,%ecx,%ecx
addl %ebx,%eax addl %ebx,%eax
vpxor %xmm8,%xmm2,%xmm2 vpxor %xmm8,%xmm2,%xmm2
@ -3002,7 +3000,7 @@ L$oop_avx:
movl %ebx,%edi movl %ebx,%edi
xorl %edx,%esi xorl %edx,%esi
vpaddd %xmm6,%xmm11,%xmm9 vpaddd %xmm6,%xmm11,%xmm9
vmovdqa 32(%r11),%xmm11 vmovdqa 32(%r14),%xmm11
shldl $5,%ebx,%ebx shldl $5,%ebx,%ebx
addl %esi,%eax addl %esi,%eax
vpxor %xmm8,%xmm7,%xmm7 vpxor %xmm8,%xmm7,%xmm7
@ -3281,8 +3279,8 @@ L$oop_avx:
addl %edx,%ecx addl %edx,%ecx
cmpq %r10,%r9 cmpq %r10,%r9
je L$done_avx je L$done_avx
vmovdqa 64(%r11),%xmm6 vmovdqa 64(%r14),%xmm6
vmovdqa -64(%r11),%xmm11 vmovdqa -64(%r14),%xmm11
vmovdqu 0(%r9),%xmm0 vmovdqu 0(%r9),%xmm0
vmovdqu 16(%r9),%xmm1 vmovdqu 16(%r9),%xmm1
vmovdqu 32(%r9),%xmm2 vmovdqu 32(%r9),%xmm2
@ -3518,13 +3516,12 @@ L$done_avx:
movl %ecx,8(%r8) movl %ecx,8(%r8)
movl %edx,12(%r8) movl %edx,12(%r8)
movl %ebp,16(%r8) movl %ebp,16(%r8)
leaq (%r14),%rsi movq -40(%r11),%r14
movq -40(%rsi),%r14 movq -32(%r11),%r13
movq -32(%rsi),%r13 movq -24(%r11),%r12
movq -24(%rsi),%r12 movq -16(%r11),%rbp
movq -16(%rsi),%rbp movq -8(%r11),%rbx
movq -8(%rsi),%rbx leaq (%r11),%rsp
leaq (%rsi),%rsp
L$epilogue_avx: L$epilogue_avx:
.byte 0xf3,0xc3 .byte 0xf3,0xc3

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
@ -18,13 +18,13 @@ _sha256_block_data_order:
je L$avx_shortcut je L$avx_shortcut
testl $512,%r10d testl $512,%r10d
jnz L$ssse3_shortcut jnz L$ssse3_shortcut
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movq %rsp,%r11
shlq $4,%rdx shlq $4,%rdx
subq $64+32,%rsp subq $64+32,%rsp
leaq (%rsi,%rdx,4),%rdx leaq (%rsi,%rdx,4),%rdx
@ -32,7 +32,7 @@ _sha256_block_data_order:
movq %rdi,64+0(%rsp) movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp) movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp) movq %rdx,64+16(%rsp)
movq %r11,64+24(%rsp) movq %rax,64+24(%rsp)
L$prologue: L$prologue:
movl 0(%rdi),%eax movl 0(%rdi),%eax
@ -1697,13 +1697,13 @@ L$rounds_16_xx:
jb L$loop jb L$loop
movq 64+24(%rsp),%rsi movq 64+24(%rsp),%rsi
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
L$epilogue: L$epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
@ -1754,13 +1754,13 @@ K256:
.p2align 6 .p2align 6
sha256_block_data_order_ssse3: sha256_block_data_order_ssse3:
L$ssse3_shortcut: L$ssse3_shortcut:
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movq %rsp,%r11
shlq $4,%rdx shlq $4,%rdx
subq $96,%rsp subq $96,%rsp
leaq (%rsi,%rdx,4),%rdx leaq (%rsi,%rdx,4),%rdx
@ -1768,7 +1768,7 @@ L$ssse3_shortcut:
movq %rdi,64+0(%rsp) movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp) movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp) movq %rdx,64+16(%rsp)
movq %r11,64+24(%rsp) movq %rax,64+24(%rsp)
L$prologue_ssse3: L$prologue_ssse3:
movl 0(%rdi),%eax movl 0(%rdi),%eax
@ -2835,13 +2835,13 @@ L$ssse3_00_47:
jb L$loop_ssse3 jb L$loop_ssse3
movq 64+24(%rsp),%rsi movq 64+24(%rsp),%rsi
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
L$epilogue_ssse3: L$epilogue_ssse3:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
@ -2849,13 +2849,13 @@ L$epilogue_ssse3:
.p2align 6 .p2align 6
sha256_block_data_order_avx: sha256_block_data_order_avx:
L$avx_shortcut: L$avx_shortcut:
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movq %rsp,%r11
shlq $4,%rdx shlq $4,%rdx
subq $96,%rsp subq $96,%rsp
leaq (%rsi,%rdx,4),%rdx leaq (%rsi,%rdx,4),%rdx
@ -2863,7 +2863,7 @@ L$avx_shortcut:
movq %rdi,64+0(%rsp) movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp) movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp) movq %rdx,64+16(%rsp)
movq %r11,64+24(%rsp) movq %rax,64+24(%rsp)
L$prologue_avx: L$prologue_avx:
vzeroupper vzeroupper
@ -3892,13 +3892,13 @@ L$avx_00_47:
movq 64+24(%rsp),%rsi movq 64+24(%rsp),%rsi
vzeroupper vzeroupper
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
L$epilogue_avx: L$epilogue_avx:
.byte 0xf3,0xc3 .byte 0xf3,0xc3

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
@ -18,13 +18,13 @@ _sha512_block_data_order:
orl %r9d,%r10d orl %r9d,%r10d
cmpl $1342177792,%r10d cmpl $1342177792,%r10d
je L$avx_shortcut je L$avx_shortcut
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movq %rsp,%r11
shlq $4,%rdx shlq $4,%rdx
subq $128+32,%rsp subq $128+32,%rsp
leaq (%rsi,%rdx,8),%rdx leaq (%rsi,%rdx,8),%rdx
@ -32,7 +32,7 @@ _sha512_block_data_order:
movq %rdi,128+0(%rsp) movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp) movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp) movq %rdx,128+16(%rsp)
movq %r11,128+24(%rsp) movq %rax,128+24(%rsp)
L$prologue: L$prologue:
movq 0(%rdi),%rax movq 0(%rdi),%rax
@ -1697,13 +1697,13 @@ L$rounds_16_xx:
jb L$loop jb L$loop
movq 128+24(%rsp),%rsi movq 128+24(%rsp),%rsi
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
L$epilogue: L$epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
@ -1798,13 +1798,13 @@ K512:
.p2align 6 .p2align 6
sha512_block_data_order_xop: sha512_block_data_order_xop:
L$xop_shortcut: L$xop_shortcut:
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movq %rsp,%r11
shlq $4,%rdx shlq $4,%rdx
subq $160,%rsp subq $160,%rsp
leaq (%rsi,%rdx,8),%rdx leaq (%rsi,%rdx,8),%rdx
@ -1812,7 +1812,7 @@ L$xop_shortcut:
movq %rdi,128+0(%rsp) movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp) movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp) movq %rdx,128+16(%rsp)
movq %r11,128+24(%rsp) movq %rax,128+24(%rsp)
L$prologue_xop: L$prologue_xop:
vzeroupper vzeroupper
@ -2867,13 +2867,13 @@ L$xop_00_47:
movq 128+24(%rsp),%rsi movq 128+24(%rsp),%rsi
vzeroupper vzeroupper
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
L$epilogue_xop: L$epilogue_xop:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
@ -2881,13 +2881,13 @@ L$epilogue_xop:
.p2align 6 .p2align 6
sha512_block_data_order_avx: sha512_block_data_order_avx:
L$avx_shortcut: L$avx_shortcut:
movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movq %rsp,%r11
shlq $4,%rdx shlq $4,%rdx
subq $160,%rsp subq $160,%rsp
leaq (%rsi,%rdx,8),%rdx leaq (%rsi,%rdx,8),%rdx
@ -2895,7 +2895,7 @@ L$avx_shortcut:
movq %rdi,128+0(%rsp) movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp) movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp) movq %rdx,128+16(%rsp)
movq %r11,128+24(%rsp) movq %rax,128+24(%rsp)
L$prologue_avx: L$prologue_avx:
vzeroupper vzeroupper
@ -4014,13 +4014,13 @@ L$avx_00_47:
movq 128+24(%rsp),%rsi movq 128+24(%rsp),%rsi
vzeroupper vzeroupper
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14 movq -40(%rsi),%r14
movq 16(%rsi),%r13 movq -32(%rsi),%r13
movq 24(%rsi),%r12 movq -24(%rsi),%r12
movq 32(%rsi),%rbp movq -16(%rsi),%rbp
movq 40(%rsi),%rbx movq -8(%rsi),%rbx
leaq 48(%rsi),%rsp leaq (%rsi),%rsp
L$epilogue_avx: L$epilogue_avx:
.byte 0xf3,0xc3 .byte 0xf3,0xc3

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
@ -8,6 +8,10 @@
.p2align 4 .p2align 4
_bn_mul_mont: _bn_mul_mont:
movl %r9d,%r9d
movq %rsp,%rax
testl $3,%r9d testl $3,%r9d
jnz L$mul_enter jnz L$mul_enter
cmpl $8,%r9d cmpl $8,%r9d
@ -21,20 +25,50 @@ _bn_mul_mont:
.p2align 4 .p2align 4
L$mul_enter: L$mul_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
movl %r9d,%r9d
leaq 2(%r9),%r10
movq %rsp,%r11
negq %r10
leaq (%rsp,%r10,8),%rsp
andq $-1024,%rsp
movq %r11,8(%rsp,%r9,8) negq %r9
movq %rsp,%r11
leaq -16(%rsp,%r9,8),%r10
negq %r9
andq $-1024,%r10
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
jmp L$mul_page_walk_done
.p2align 4
L$mul_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
L$mul_page_walk_done:
movq %rax,8(%rsp,%r9,8)
L$mul_body: L$mul_body:
movq %rdx,%r12 movq %rdx,%r12
movq (%r8),%r8 movq (%r8),%r8
@ -177,7 +211,8 @@ L$inner_enter:
movq %r9,%r15 movq %r9,%r15
jmp L$sub jmp L$sub
.p2align 4 .p2align 4
L$sub: sbbq (%rcx,%r14,8),%rax L$sub:
sbbq (%rcx,%r14,8),%rax
movq %rax,(%rdi,%r14,8) movq %rax,(%rdi,%r14,8)
movq 8(%rsi,%r14,8),%rax movq 8(%rsi,%r14,8),%rax
leaq 1(%r14),%r14 leaq 1(%r14),%r14
@ -186,51 +221,86 @@ L$sub: sbbq (%rcx,%r14,8),%rax
sbbq $0,%rax sbbq $0,%rax
xorq %r14,%r14 xorq %r14,%r14
andq %rax,%rsi
notq %rax
movq %rdi,%rcx
andq %rax,%rcx
movq %r9,%r15 movq %r9,%r15
orq %rcx,%rsi
.p2align 4 .p2align 4
L$copy: L$copy:
movq (%rsp,%r14,8),%rsi movq (%rsi,%r14,8),%rax
movq (%rdi,%r14,8),%rcx
xorq %rcx,%rsi
andq %rax,%rsi
xorq %rcx,%rsi
movq %r14,(%rsp,%r14,8) movq %r14,(%rsp,%r14,8)
movq %rsi,(%rdi,%r14,8) movq %rax,(%rdi,%r14,8)
leaq 1(%r14),%r14 leaq 1(%r14),%r14
subq $1,%r15 subq $1,%r15
jnz L$copy jnz L$copy
movq 8(%rsp,%r9,8),%rsi movq 8(%rsp,%r9,8),%rsi
movq $1,%rax movq $1,%rax
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13 movq -40(%rsi),%r14
movq 24(%rsi),%r12
movq 32(%rsi),%rbp movq -32(%rsi),%r13
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
L$mul_epilogue: L$mul_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.p2align 4 .p2align 4
bn_mul4x_mont: bn_mul4x_mont:
L$mul4x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
movl %r9d,%r9d movl %r9d,%r9d
leaq 4(%r9),%r10 movq %rsp,%rax
movq %rsp,%r11
negq %r10 L$mul4x_enter:
leaq (%rsp,%r10,8),%rsp pushq %rbx
andq $-1024,%rsp
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
negq %r9
movq %rsp,%r11
leaq -32(%rsp,%r9,8),%r10
negq %r9
andq $-1024,%r10
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul4x_page_walk
jmp L$mul4x_page_walk_done
L$mul4x_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul4x_page_walk
L$mul4x_page_walk_done:
movq %rax,8(%rsp,%r9,8)
movq %r11,8(%rsp,%r9,8)
L$mul4x_body: L$mul4x_body:
movq %rdi,16(%rsp,%r9,8) movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12 movq %rdx,%r12
@ -530,9 +600,11 @@ L$inner4x:
cmpq %r9,%r14 cmpq %r9,%r14
jb L$outer4x jb L$outer4x
movq 16(%rsp,%r9,8),%rdi movq 16(%rsp,%r9,8),%rdi
leaq -4(%r9),%r15
movq 0(%rsp),%rax movq 0(%rsp),%rax
pxor %xmm0,%xmm0
movq 8(%rsp),%rdx movq 8(%rsp),%rdx
shrq $2,%r9 shrq $2,%r15
leaq (%rsp),%rsi leaq (%rsp),%rsi
xorq %r14,%r14 xorq %r14,%r14
@ -540,7 +612,6 @@ L$inner4x:
movq 16(%rsi),%rbx movq 16(%rsi),%rbx
movq 24(%rsi),%rbp movq 24(%rsi),%rbp
sbbq 8(%rcx),%rdx sbbq 8(%rcx),%rdx
leaq -1(%r9),%r15
jmp L$sub4x jmp L$sub4x
.p2align 4 .p2align 4
L$sub4x: L$sub4x:
@ -568,62 +639,79 @@ L$sub4x:
movq %rbx,16(%rdi,%r14,8) movq %rbx,16(%rdi,%r14,8)
sbbq $0,%rax sbbq $0,%rax
movq %rax,%xmm0
punpcklqdq %xmm0,%xmm0
movq %rbp,24(%rdi,%r14,8) movq %rbp,24(%rdi,%r14,8)
xorq %r14,%r14 xorq %r14,%r14
andq %rax,%rsi
notq %rax
movq %rdi,%rcx
andq %rax,%rcx
leaq -4(%r9),%r15
orq %rcx,%rsi
shrq $2,%r15
movq %r9,%r15 movdqu (%rsi),%xmm1
pxor %xmm5,%xmm5 movdqa %xmm0,(%rsp)
movdqu %xmm1,(%rdi)
jmp L$copy4x jmp L$copy4x
.p2align 4 .p2align 4
L$copy4x: L$copy4x:
movdqu (%rsp,%r14,1),%xmm2 movdqu 16(%rsi,%r14,1),%xmm2
movdqu 16(%rsp,%r14,1),%xmm4 movdqu 32(%rsi,%r14,1),%xmm1
movdqu (%rdi,%r14,1),%xmm1 movdqa %xmm0,16(%rsp,%r14,1)
movdqu 16(%rdi,%r14,1),%xmm3 movdqu %xmm2,16(%rdi,%r14,1)
pxor %xmm1,%xmm2 movdqa %xmm0,32(%rsp,%r14,1)
pxor %xmm3,%xmm4 movdqu %xmm1,32(%rdi,%r14,1)
pand %xmm0,%xmm2
pand %xmm0,%xmm4
pxor %xmm1,%xmm2
pxor %xmm3,%xmm4
movdqu %xmm2,(%rdi,%r14,1)
movdqu %xmm4,16(%rdi,%r14,1)
movdqa %xmm5,(%rsp,%r14,1)
movdqa %xmm5,16(%rsp,%r14,1)
leaq 32(%r14),%r14 leaq 32(%r14),%r14
decq %r15 decq %r15
jnz L$copy4x jnz L$copy4x
shlq $2,%r9 movdqu 16(%rsi,%r14,1),%xmm2
movdqa %xmm0,16(%rsp,%r14,1)
movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi movq 8(%rsp,%r9,8),%rsi
movq $1,%rax movq $1,%rax
movq (%rsi),%r15 movq -48(%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13 movq -40(%rsi),%r14
movq 24(%rsi),%r12
movq 32(%rsi),%rbp movq -32(%rsi),%r13
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
L$mul4x_epilogue: L$mul4x_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.p2align 5 .p2align 5
bn_sqr8x_mont: bn_sqr8x_mont:
L$sqr8x_enter:
movq %rsp,%rax movq %rsp,%rax
L$sqr8x_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
L$sqr8x_prologue:
movl %r9d,%r10d movl %r9d,%r10d
shll $3,%r9d shll $3,%r9d
shlq $3+2,%r10 shlq $3+2,%r10
@ -635,30 +723,49 @@ L$sqr8x_enter:
leaq -64(%rsp,%r9,2),%r11 leaq -64(%rsp,%r9,2),%r11
movq %rsp,%rbp
movq (%r8),%r8 movq (%r8),%r8
subq %rsi,%r11 subq %rsi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb L$sqr8x_sp_alt jb L$sqr8x_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -64(%rsp,%r9,2),%rsp leaq -64(%rbp,%r9,2),%rbp
jmp L$sqr8x_sp_done jmp L$sqr8x_sp_done
.p2align 5 .p2align 5
L$sqr8x_sp_alt: L$sqr8x_sp_alt:
leaq 4096-64(,%r9,2),%r10 leaq 4096-64(,%r9,2),%r10
leaq -64(%rsp,%r9,2),%rsp leaq -64(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
L$sqr8x_sp_done: L$sqr8x_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$sqr8x_page_walk
jmp L$sqr8x_page_walk_done
.p2align 4
L$sqr8x_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$sqr8x_page_walk
L$sqr8x_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9
movq %r8,32(%rsp) movq %r8,32(%rsp)
movq %rax,40(%rsp) movq %rax,40(%rsp)
L$sqr8x_body: L$sqr8x_body:
.byte 102,72,15,110,209 .byte 102,72,15,110,209
@ -705,6 +812,7 @@ L$sqr8x_sub:
pxor %xmm0,%xmm0 pxor %xmm0,%xmm0
pshufd $0,%xmm1,%xmm1 pshufd $0,%xmm1,%xmm1
movq 40(%rsp),%rsi movq 40(%rsp),%rsi
jmp L$sqr8x_cond_copy jmp L$sqr8x_cond_copy
.p2align 5 .p2align 5
@ -734,15 +842,23 @@ L$sqr8x_cond_copy:
movq $1,%rax movq $1,%rax
movq -48(%rsi),%r15 movq -48(%rsi),%r15
movq -40(%rsi),%r14 movq -40(%rsi),%r14
movq -32(%rsi),%r13 movq -32(%rsi),%r13
movq -24(%rsi),%r12 movq -24(%rsi),%r12
movq -16(%rsi),%rbp movq -16(%rsi),%rbp
movq -8(%rsi),%rbx movq -8(%rsi),%rbx
leaq (%rsi),%rsp leaq (%rsi),%rsp
L$sqr8x_epilogue: L$sqr8x_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 4 .p2align 4
#endif #endif

View File

@ -1,4 +1,4 @@
#if defined(__x86_64__) #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
.text .text
@ -8,30 +8,64 @@
.p2align 6 .p2align 6
_bn_mul_mont_gather5: _bn_mul_mont_gather5:
movl %r9d,%r9d
movq %rsp,%rax
testl $7,%r9d testl $7,%r9d
jnz L$mul_enter jnz L$mul_enter
jmp L$mul4x_enter jmp L$mul4x_enter
.p2align 4 .p2align 4
L$mul_enter: L$mul_enter:
movl %r9d,%r9d
movq %rsp,%rax
movd 8(%rsp),%xmm5 movd 8(%rsp),%xmm5
leaq L$inc(%rip),%r10
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
leaq 2(%r9),%r11
negq %r11
leaq -264(%rsp,%r11,8),%rsp
andq $-1024,%rsp
negq %r9
movq %rsp,%r11
leaq -280(%rsp,%r9,8),%r10
negq %r9
andq $-1024,%r10
subq %r10,%r11
andq $-4096,%r11
leaq (%r10,%r11,1),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
jmp L$mul_page_walk_done
L$mul_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r11
cmpq %r10,%rsp
ja L$mul_page_walk
L$mul_page_walk_done:
leaq L$inc(%rip),%r10
movq %rax,8(%rsp,%r9,8) movq %rax,8(%rsp,%r9,8)
L$mul_body: L$mul_body:
leaq 128(%rdx),%r12 leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0 movdqa 0(%r10),%xmm0
movdqa 16(%r10),%xmm1 movdqa 16(%r10),%xmm1
@ -361,7 +395,8 @@ L$inner_enter:
movq %r9,%r15 movq %r9,%r15
jmp L$sub jmp L$sub
.p2align 4 .p2align 4
L$sub: sbbq (%rcx,%r14,8),%rax L$sub:
sbbq (%rcx,%r14,8),%rax
movq %rax,(%rdi,%r14,8) movq %rax,(%rdi,%r14,8)
movq 8(%rsi,%r14,8),%rax movq 8(%rsi,%r14,8),%rax
leaq 1(%r14),%r14 leaq 1(%r14),%r14
@ -370,46 +405,65 @@ L$sub: sbbq (%rcx,%r14,8),%rax
sbbq $0,%rax sbbq $0,%rax
xorq %r14,%r14 xorq %r14,%r14
andq %rax,%rsi
notq %rax
movq %rdi,%rcx
andq %rax,%rcx
movq %r9,%r15 movq %r9,%r15
orq %rcx,%rsi
.p2align 4 .p2align 4
L$copy: L$copy:
movq (%rsp,%r14,8),%rsi movq (%rsi,%r14,8),%rax
movq (%rdi,%r14,8),%rcx
xorq %rcx,%rsi
andq %rax,%rsi
xorq %rcx,%rsi
movq %r14,(%rsp,%r14,8) movq %r14,(%rsp,%r14,8)
movq %rsi,(%rdi,%r14,8) movq %rax,(%rdi,%r14,8)
leaq 1(%r14),%r14 leaq 1(%r14),%r14
subq $1,%r15 subq $1,%r15
jnz L$copy jnz L$copy
movq 8(%rsp,%r9,8),%rsi movq 8(%rsp,%r9,8),%rsi
movq $1,%rax movq $1,%rax
movq -48(%rsi),%r15 movq -48(%rsi),%r15
movq -40(%rsi),%r14 movq -40(%rsi),%r14
movq -32(%rsi),%r13 movq -32(%rsi),%r13
movq -24(%rsi),%r12 movq -24(%rsi),%r12
movq -16(%rsi),%rbp movq -16(%rsi),%rbp
movq -8(%rsi),%rbx movq -8(%rsi),%rbx
leaq (%rsi),%rsp leaq (%rsi),%rsp
L$mul_epilogue: L$mul_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.p2align 5 .p2align 5
bn_mul4x_mont_gather5: bn_mul4x_mont_gather5:
L$mul4x_enter:
.byte 0x67 .byte 0x67
movq %rsp,%rax movq %rsp,%rax
L$mul4x_enter:
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
L$mul4x_prologue:
.byte 0x67 .byte 0x67
shll $3,%r9d shll $3,%r9d
leaq (%r9,%r9,2),%r10 leaq (%r9,%r9,2),%r10
@ -425,46 +479,73 @@ L$mul4x_enter:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb L$mul4xsp_alt jb L$mul4xsp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp L$mul4xsp_done jmp L$mul4xsp_done
.p2align 5 .p2align 5
L$mul4xsp_alt: L$mul4xsp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
L$mul4xsp_done: L$mul4xsp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$mul4x_page_walk
jmp L$mul4x_page_walk_done
L$mul4x_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$mul4x_page_walk
L$mul4x_page_walk_done:
negq %r9 negq %r9
movq %rax,40(%rsp) movq %rax,40(%rsp)
L$mul4x_body: L$mul4x_body:
call mul4x_internal call mul4x_internal
movq 40(%rsp),%rsi movq 40(%rsp),%rsi
movq $1,%rax movq $1,%rax
movq -48(%rsi),%r15 movq -48(%rsi),%r15
movq -40(%rsi),%r14 movq -40(%rsi),%r14
movq -32(%rsi),%r13 movq -32(%rsi),%r13
movq -24(%rsi),%r12 movq -24(%rsi),%r12
movq -16(%rsi),%rbp movq -16(%rsi),%rbp
movq -8(%rsi),%rbx movq -8(%rsi),%rbx
leaq (%rsi),%rsp leaq (%rsi),%rsp
L$mul4x_epilogue: L$mul4x_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.p2align 5 .p2align 5
mul4x_internal: mul4x_internal:
shlq $5,%r9 shlq $5,%r9
@ -994,14 +1075,23 @@ L$inner4x:
.p2align 5 .p2align 5
_bn_power5: _bn_power5:
movq %rsp,%rax movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
L$power5_prologue:
shll $3,%r9d shll $3,%r9d
leal (%r9,%r9,2),%r10d leal (%r9,%r9,2),%r10d
negq %r9 negq %r9
@ -1015,24 +1105,41 @@ _bn_power5:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb L$pwr_sp_alt jb L$pwr_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp L$pwr_sp_done jmp L$pwr_sp_done
.p2align 5 .p2align 5
L$pwr_sp_alt: L$pwr_sp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
L$pwr_sp_done: L$pwr_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$pwr_page_walk
jmp L$pwr_page_walk_done
L$pwr_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$pwr_page_walk
L$pwr_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9
@ -1047,6 +1154,7 @@ L$pwr_sp_done:
movq %r8,32(%rsp) movq %r8,32(%rsp)
movq %rax,40(%rsp) movq %rax,40(%rsp)
L$power5_body: L$power5_body:
.byte 102,72,15,110,207 .byte 102,72,15,110,207
.byte 102,72,15,110,209 .byte 102,72,15,110,209
@ -1073,18 +1181,27 @@ L$power5_body:
call mul4x_internal call mul4x_internal
movq 40(%rsp),%rsi movq 40(%rsp),%rsi
movq $1,%rax movq $1,%rax
movq -48(%rsi),%r15 movq -48(%rsi),%r15
movq -40(%rsi),%r14 movq -40(%rsi),%r14
movq -32(%rsi),%r13 movq -32(%rsi),%r13
movq -24(%rsi),%r12 movq -24(%rsi),%r12
movq -16(%rsi),%rbp movq -16(%rsi),%rbp
movq -8(%rsi),%rbx movq -8(%rsi),%rbx
leaq (%rsi),%rsp leaq (%rsi),%rsp
L$power5_epilogue: L$power5_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.globl _bn_sqr8x_internal .globl _bn_sqr8x_internal
.private_extern _bn_sqr8x_internal .private_extern _bn_sqr8x_internal
.private_extern _bn_sqr8x_internal .private_extern _bn_sqr8x_internal
@ -1825,6 +1942,7 @@ L$8x_tail:
.p2align 5 .p2align 5
L$8x_tail_done: L$8x_tail_done:
xorq %rax,%rax
addq (%rdx),%r8 addq (%rdx),%r8
adcq $0,%r9 adcq $0,%r9
adcq $0,%r10 adcq $0,%r10
@ -1833,9 +1951,7 @@ L$8x_tail_done:
adcq $0,%r13 adcq $0,%r13
adcq $0,%r14 adcq $0,%r14
adcq $0,%r15 adcq $0,%r15
adcq $0,%rax
xorq %rax,%rax
negq %rsi negq %rsi
L$8x_no_tail: L$8x_no_tail:
@ -1936,15 +2052,24 @@ _bn_from_montgomery:
.p2align 5 .p2align 5
bn_from_mont8x: bn_from_mont8x:
.byte 0x67 .byte 0x67
movq %rsp,%rax movq %rsp,%rax
pushq %rbx pushq %rbx
pushq %rbp pushq %rbp
pushq %r12 pushq %r12
pushq %r13 pushq %r13
pushq %r14 pushq %r14
pushq %r15 pushq %r15
L$from_prologue:
shll $3,%r9d shll $3,%r9d
leaq (%r9,%r9,2),%r10 leaq (%r9,%r9,2),%r10
negq %r9 negq %r9
@ -1958,24 +2083,41 @@ bn_from_mont8x:
leaq -320(%rsp,%r9,2),%r11 leaq -320(%rsp,%r9,2),%r11
movq %rsp,%rbp
subq %rdi,%r11 subq %rdi,%r11
andq $4095,%r11 andq $4095,%r11
cmpq %r11,%r10 cmpq %r11,%r10
jb L$from_sp_alt jb L$from_sp_alt
subq %r11,%rsp subq %r11,%rbp
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
jmp L$from_sp_done jmp L$from_sp_done
.p2align 5 .p2align 5
L$from_sp_alt: L$from_sp_alt:
leaq 4096-320(,%r9,2),%r10 leaq 4096-320(,%r9,2),%r10
leaq -320(%rsp,%r9,2),%rsp leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11 subq %r10,%r11
movq $0,%r10 movq $0,%r10
cmovcq %r10,%r11 cmovcq %r10,%r11
subq %r11,%rsp subq %r11,%rbp
L$from_sp_done: L$from_sp_done:
andq $-64,%rsp andq $-64,%rbp
movq %rsp,%r11
subq %rbp,%r11
andq $-4096,%r11
leaq (%r11,%rbp,1),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$from_page_walk
jmp L$from_page_walk_done
L$from_page_walk:
leaq -4096(%rsp),%rsp
movq (%rsp),%r10
cmpq %rbp,%rsp
ja L$from_page_walk
L$from_page_walk_done:
movq %r9,%r10 movq %r9,%r10
negq %r9 negq %r9
@ -1990,6 +2132,7 @@ L$from_sp_done:
movq %r8,32(%rsp) movq %r8,32(%rsp)
movq %rax,40(%rsp) movq %rax,40(%rsp)
L$from_body: L$from_body:
movq %r9,%r11 movq %r9,%r11
leaq 48(%rsp),%rax leaq 48(%rsp),%rax
@ -2025,11 +2168,12 @@ L$mul_by_1:
pxor %xmm0,%xmm0 pxor %xmm0,%xmm0
leaq 48(%rsp),%rax leaq 48(%rsp),%rax
movq 40(%rsp),%rsi
jmp L$from_mont_zero jmp L$from_mont_zero
.p2align 5 .p2align 5
L$from_mont_zero: L$from_mont_zero:
movq 40(%rsp),%rsi
movdqa %xmm0,0(%rax) movdqa %xmm0,0(%rax)
movdqa %xmm0,16(%rax) movdqa %xmm0,16(%rax)
movdqa %xmm0,32(%rax) movdqa %xmm0,32(%rax)
@ -2040,15 +2184,23 @@ L$from_mont_zero:
movq $1,%rax movq $1,%rax
movq -48(%rsi),%r15 movq -48(%rsi),%r15
movq -40(%rsi),%r14 movq -40(%rsi),%r14
movq -32(%rsi),%r13 movq -32(%rsi),%r13
movq -24(%rsi),%r12 movq -24(%rsi),%r12
movq -16(%rsi),%rbp movq -16(%rsi),%rbp
movq -8(%rsi),%rbx movq -8(%rsi),%rbx
leaq (%rsi),%rsp leaq (%rsi),%rsp
L$from_epilogue: L$from_epilogue:
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.globl _bn_scatter5 .globl _bn_scatter5
.private_extern _bn_scatter5 .private_extern _bn_scatter5

View File

@ -1,19 +0,0 @@
#if defined(__x86_64__)
.text
.globl _aesni_gcm_encrypt
.private_extern _aesni_gcm_encrypt
_aesni_gcm_encrypt:
xorl %eax,%eax
.byte 0xf3,0xc3
.globl _aesni_gcm_decrypt
.private_extern _aesni_gcm_decrypt
_aesni_gcm_decrypt:
xorl %eax,%eax
.byte 0xf3,0xc3
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,595 +0,0 @@
#if defined(__x86_64__)
.text
.globl _asm_RC4
.private_extern _asm_RC4
.p2align 4
_asm_RC4:
orq %rsi,%rsi
jne L$entry
.byte 0xf3,0xc3
L$entry:
pushq %rbx
pushq %r12
pushq %r13
L$prologue:
movq %rsi,%r11
movq %rdx,%r12
movq %rcx,%r13
xorq %r10,%r10
xorq %rcx,%rcx
leaq 8(%rdi),%rdi
movb -8(%rdi),%r10b
movb -4(%rdi),%cl
cmpl $-1,256(%rdi)
je L$RC4_CHAR
movl _OPENSSL_ia32cap_P(%rip),%r8d
xorq %rbx,%rbx
incb %r10b
subq %r10,%rbx
subq %r12,%r13
movl (%rdi,%r10,4),%eax
testq $-16,%r11
jz L$loop1
btl $30,%r8d
jc L$intel
andq $7,%rbx
leaq 1(%r10),%rsi
jz L$oop8
subq %rbx,%r11
L$oop8_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r12,%r13,1)
leaq 1(%r12),%r12
decq %rbx
jnz L$oop8_warmup
leaq 1(%r10),%rsi
jmp L$oop8
.p2align 4
L$oop8:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 0(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,0(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,4(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 8(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,8(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 12(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,12(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 16(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,16(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 20(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,20(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 24(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,24(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%sil
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl -4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,28(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%r10b
rorq $8,%r8
subq $8,%r11
xorq (%r12),%r8
movq %r8,(%r12,%r13,1)
leaq 8(%r12),%r12
testq $-8,%r11
jnz L$oop8
cmpq $0,%r11
jne L$loop1
jmp L$exit
.p2align 4
L$intel:
testq $-32,%r11
jz L$loop1
andq $15,%rbx
jz L$oop16_is_hot
subq %rbx,%r11
L$oop16_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r12,%r13,1)
leaq 1(%r12),%r12
decq %rbx
jnz L$oop16_warmup
movq %rcx,%rbx
xorq %rcx,%rcx
movb %bl,%cl
L$oop16_is_hot:
leaq (%rdi,%r10,4),%rsi
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
jmp L$oop16_enter
.p2align 4
L$oop16:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm2
psllq $8,%xmm1
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
pxor %xmm1,%xmm2
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
movdqu %xmm2,(%r12,%r13,1)
leaq 16(%r12),%r12
L$oop16_enter:
movl (%rdi,%rcx,4),%edx
pxor %xmm1,%xmm1
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 8(%rsi),%eax
movzbl %bl,%ebx
movl %edx,4(%rsi)
addb %al,%cl
pinsrw $0,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 12(%rsi),%ebx
movzbl %al,%eax
movl %edx,8(%rsi)
addb %bl,%cl
pinsrw $1,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 16(%rsi),%eax
movzbl %bl,%ebx
movl %edx,12(%rsi)
addb %al,%cl
pinsrw $1,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 20(%rsi),%ebx
movzbl %al,%eax
movl %edx,16(%rsi)
addb %bl,%cl
pinsrw $2,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 24(%rsi),%eax
movzbl %bl,%ebx
movl %edx,20(%rsi)
addb %al,%cl
pinsrw $2,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 28(%rsi),%ebx
movzbl %al,%eax
movl %edx,24(%rsi)
addb %bl,%cl
pinsrw $3,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 32(%rsi),%eax
movzbl %bl,%ebx
movl %edx,28(%rsi)
addb %al,%cl
pinsrw $3,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 36(%rsi),%ebx
movzbl %al,%eax
movl %edx,32(%rsi)
addb %bl,%cl
pinsrw $4,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 40(%rsi),%eax
movzbl %bl,%ebx
movl %edx,36(%rsi)
addb %al,%cl
pinsrw $4,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 44(%rsi),%ebx
movzbl %al,%eax
movl %edx,40(%rsi)
addb %bl,%cl
pinsrw $5,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 48(%rsi),%eax
movzbl %bl,%ebx
movl %edx,44(%rsi)
addb %al,%cl
pinsrw $5,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 52(%rsi),%ebx
movzbl %al,%eax
movl %edx,48(%rsi)
addb %bl,%cl
pinsrw $6,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 56(%rsi),%eax
movzbl %bl,%ebx
movl %edx,52(%rsi)
addb %al,%cl
pinsrw $6,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 60(%rsi),%ebx
movzbl %al,%eax
movl %edx,56(%rsi)
addb %bl,%cl
pinsrw $7,(%rdi,%rax,4),%xmm0
addb $16,%r10b
movdqu (%r12),%xmm2
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movzbl %bl,%ebx
movl %edx,60(%rsi)
leaq (%rdi,%r10,4),%rsi
pinsrw $7,(%rdi,%rbx,4),%xmm1
movl (%rsi),%eax
movq %rcx,%rbx
xorq %rcx,%rcx
subq $16,%r11
movb %bl,%cl
testq $-16,%r11
jnz L$oop16
psllq $8,%xmm1
pxor %xmm0,%xmm2
pxor %xmm1,%xmm2
movdqu %xmm2,(%r12,%r13,1)
leaq 16(%r12),%r12
cmpq $0,%r11
jne L$loop1
jmp L$exit
.p2align 4
L$loop1:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r12,%r13,1)
leaq 1(%r12),%r12
decq %r11
jnz L$loop1
jmp L$exit
.p2align 4
L$RC4_CHAR:
addb $1,%r10b
movzbl (%rdi,%r10,1),%eax
testq $-8,%r11
jz L$cloop1
jmp L$cloop8
.p2align 4
L$cloop8:
movl (%r12),%r8d
movl 4(%r12),%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne L$cmov0
movq %rax,%rbx
L$cmov0:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne L$cmov1
movq %rbx,%rax
L$cmov1:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne L$cmov2
movq %rax,%rbx
L$cmov2:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne L$cmov3
movq %rbx,%rax
L$cmov3:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne L$cmov4
movq %rax,%rbx
L$cmov4:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne L$cmov5
movq %rbx,%rax
L$cmov5:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne L$cmov6
movq %rax,%rbx
L$cmov6:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne L$cmov7
movq %rbx,%rax
L$cmov7:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
leaq -8(%r11),%r11
movl %r8d,(%r13)
leaq 8(%r12),%r12
movl %r9d,4(%r13)
leaq 8(%r13),%r13
testq $-8,%r11
jnz L$cloop8
cmpq $0,%r11
jne L$cloop1
jmp L$exit
.p2align 4
L$cloop1:
addb %al,%cl
movzbl %cl,%ecx
movzbl (%rdi,%rcx,1),%edx
movb %al,(%rdi,%rcx,1)
movb %dl,(%rdi,%r10,1)
addb %al,%dl
addb $1,%r10b
movzbl %dl,%edx
movzbl %r10b,%r10d
movzbl (%rdi,%rdx,1),%edx
movzbl (%rdi,%r10,1),%eax
xorb (%r12),%dl
leaq 1(%r12),%r12
movb %dl,(%r13)
leaq 1(%r13),%r13
subq $1,%r11
jnz L$cloop1
jmp L$exit
.p2align 4
L$exit:
subb $1,%r10b
movl %r10d,-8(%rdi)
movl %ecx,-4(%rdi)
movq (%rsp),%r13
movq 8(%rsp),%r12
movq 16(%rsp),%rbx
addq $24,%rsp
L$epilogue:
.byte 0xf3,0xc3
.globl _asm_RC4_set_key
.private_extern _asm_RC4_set_key
.p2align 4
_asm_RC4_set_key:
leaq 8(%rdi),%rdi
leaq (%rdx,%rsi,1),%rdx
negq %rsi
movq %rsi,%rcx
xorl %eax,%eax
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
movl _OPENSSL_ia32cap_P(%rip),%r8d
btl $20,%r8d
jc L$c1stloop
jmp L$w1stloop
.p2align 4
L$w1stloop:
movl %eax,(%rdi,%rax,4)
addb $1,%al
jnc L$w1stloop
xorq %r9,%r9
xorq %r8,%r8
.p2align 4
L$w2ndloop:
movl (%rdi,%r9,4),%r10d
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movl (%rdi,%r8,4),%r11d
cmovzq %rcx,%rsi
movl %r10d,(%rdi,%r8,4)
movl %r11d,(%rdi,%r9,4)
addb $1,%r9b
jnc L$w2ndloop
jmp L$exit_key
.p2align 4
L$c1stloop:
movb %al,(%rdi,%rax,1)
addb $1,%al
jnc L$c1stloop
xorq %r9,%r9
xorq %r8,%r8
.p2align 4
L$c2ndloop:
movb (%rdi,%r9,1),%r10b
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movb (%rdi,%r8,1),%r11b
jnz L$cnowrap
movq %rcx,%rsi
L$cnowrap:
movb %r10b,(%rdi,%r8,1)
movb %r11b,(%rdi,%r9,1)
addb $1,%r9b
jnc L$c2ndloop
movl $-1,256(%rdi)
.p2align 4
L$exit_key:
xorl %eax,%eax
movl %eax,-8(%rdi)
movl %eax,-4(%rdi)
.byte 0xf3,0xc3
#endif

View File

@ -13,13 +13,14 @@ import sys
SCRIPT_PATH = os.path.abspath(__file__) SCRIPT_PATH = os.path.abspath(__file__)
SRC_PATH = os.path.dirname(os.path.dirname(os.path.dirname(SCRIPT_PATH))) SRC_PATH = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.dirname(SCRIPT_PATH))))
DEPS_PATH = os.path.join(SRC_PATH, 'DEPS') DEPS_PATH = os.path.join(SRC_PATH, 'DEPS')
BORINGSSL_PATH = os.path.join(SRC_PATH, 'third_party', 'boringssl') BORINGSSL_PATH = os.path.join(SRC_PATH, 'packager', 'third_party', 'boringssl')
BORINGSSL_SRC_PATH = os.path.join(BORINGSSL_PATH, 'src') BORINGSSL_SRC_PATH = os.path.join(BORINGSSL_PATH, 'src')
if not os.path.isfile(DEPS_PATH) or not os.path.isdir(BORINGSSL_SRC_PATH): if not os.path.isfile(DEPS_PATH) or not os.path.isdir(BORINGSSL_SRC_PATH):
raise Exception('Could not find Chromium checkout') raise Exception('Could not find packager checkout')
# Pull OS_ARCH_COMBOS out of the BoringSSL script. # Pull OS_ARCH_COMBOS out of the BoringSSL script.
sys.path.append(os.path.join(BORINGSSL_SRC_PATH, 'util')) sys.path.append(os.path.join(BORINGSSL_SRC_PATH, 'util'))
@ -63,7 +64,7 @@ def main():
return 1 return 1
if not IsPristine(SRC_PATH): if not IsPristine(SRC_PATH):
print >>sys.stderr, 'Chromium checkout not pristine.' print >>sys.stderr, 'Packager checkout not pristine.'
return 0 return 0
if not IsPristine(BORINGSSL_SRC_PATH): if not IsPristine(BORINGSSL_SRC_PATH):
print >>sys.stderr, 'BoringSSL checkout not pristine.' print >>sys.stderr, 'BoringSSL checkout not pristine.'

Some files were not shown because too many files have changed in this diff Show More