Boringssl gyp and patches from Chromium

Bug: 22463853

Change-Id: Ib108016160d2c6ecdd56e369b585ab3c3ae82601
This commit is contained in:
Kongqun Yang 2015-07-24 11:46:54 -07:00 committed by KongQun Yang
parent 9c0ae378bc
commit 571d713fec
140 changed files with 186841 additions and 0 deletions

103
packager/third_party/boringssl/BUILD.gn vendored Normal file
View File

@ -0,0 +1,103 @@
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# Config for us and everybody else depending on BoringSSL.
config("openssl_config") {
include_dirs = []
include_dirs += [ "src/include" ]
if (is_component_build) {
defines = [ "BORINGSSL_SHARED_LIBRARY" ]
}
}
# Config internal to this build file.
config("openssl_internal_config") {
visibility = [ ":*" ] # Only targets in this file can depend on this.
}
# The list of BoringSSL files is kept in boringssl.gypi.
gypi_values =
exec_script("//build/gypi_to_gn.py",
[ rebase_path("//third_party/boringssl/boringssl.gypi") ],
"scope",
[ "//third_party/boringssl/boringssl.gypi" ])
# Windows' assembly is built with Yasm. The other platforms use the platform
# assembler.
if (is_win && !is_msan) {
import("//third_party/yasm/yasm_assemble.gni")
yasm_assemble("boringssl_asm") {
if (current_cpu == "x64") {
sources = gypi_values.boringssl_win_x86_64_sources
} else if (current_cpu == "x86") {
sources = gypi_values.boringssl_win_x86_sources
}
}
}
component("boringssl") {
sources = gypi_values.boringssl_crypto_sources
sources += gypi_values.boringssl_ssl_sources
public_configs = [ ":openssl_config" ]
cflags = []
defines = [
"BORINGSSL_IMPLEMENTATION",
"BORINGSSL_NO_STATIC_INITIALIZER",
]
deps = []
if (is_component_build) {
defines += [ "BORINGSSL_SHARED_LIBRARY" ]
}
configs -= [ "//build/config/compiler:chromium_code" ]
configs += [
"//build/config/compiler:no_chromium_code",
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
"//build/config/compiler:no_size_t_to_int_warning",
]
# Also gets the include dirs from :openssl_config
include_dirs = [
"src/include",
# This is for arm_arch.h, which is needed by some asm files. Since the
# asm files are generated and kept in a different directory, they
# cannot use relative paths to find this file.
"src/crypto",
]
if (is_msan) {
defines += [ "OPENSSL_NO_ASM" ]
} else if (current_cpu == "x64") {
if (is_mac || is_ios) {
sources += gypi_values.boringssl_mac_x86_64_sources
} else if (is_linux || is_android) {
sources += gypi_values.boringssl_linux_x86_64_sources
} else if (is_win) {
deps += [ ":boringssl_asm" ]
} else {
defines += [ "OPENSSL_NO_ASM" ]
}
} else if (current_cpu == "x86") {
if (is_mac || is_ios) {
sources += gypi_values.boringssl_mac_x86_sources
} else if (is_linux || is_android) {
sources += gypi_values.boringssl_linux_x86_sources
} else if (is_win) {
deps += [ ":boringssl_asm" ]
} else {
defines += [ "OPENSSL_NO_ASM" ]
}
} else if (current_cpu == "arm" && (is_linux || is_android)) {
sources += gypi_values.boringssl_linux_arm_sources
} else if (current_cpu == "arm64" && (is_linux || is_android)) {
sources += gypi_values.boringssl_linux_aarch64_sources
} else {
defines += [ "OPENSSL_NO_ASM" ]
}
}

6
packager/third_party/boringssl/DEPS vendored Normal file
View File

@ -0,0 +1,6 @@
specific_include_rules = {
"boringssl_unittest\.cc": [
"+base",
"+testing",
],
}

127
packager/third_party/boringssl/NOTICE vendored Normal file
View File

@ -0,0 +1,127 @@
LICENSE ISSUES
==============
The OpenSSL toolkit stays under a dual license, i.e. both the conditions of
the OpenSSL License and the original SSLeay license apply to the toolkit.
See below for the actual license texts. Actually both licenses are BSD-style
Open Source licenses. In case of any license issues related to OpenSSL
please contact openssl-core@openssl.org.
OpenSSL License
---------------
/* ====================================================================
* Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This product includes cryptographic software written by Eric Young
* (eay@cryptsoft.com). This product includes software written by Tim
* Hudson (tjh@cryptsoft.com).
*
*/
Original SSLeay License
-----------------------
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.]
*/

3
packager/third_party/boringssl/OWNERS vendored Normal file
View File

@ -0,0 +1,3 @@
agl@chromium.org
davidben@chromium.org
rsleevi@chromium.org

View File

@ -0,0 +1,18 @@
Name: boringssl
URL: https://boringssl.googlesource.com/boringssl
Version: git
License: BSDish
License File: NOTICE
License Android Compatible: yes
Security Critical: yes
Description:
This is BoringSSL, a fork of OpenSSL. See
https://www.imperialviolet.org/2014/06/20/boringssl.html
Note: when rolling DEPS forward, remember to run
cd third_party/boringssl
python src/util/generate_build_files.py chromium
from a system with both Perl and Go installed.

View File

@ -0,0 +1,121 @@
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
{
'targets': [
{
'target_name': 'boringssl',
'type': '<(component)',
'includes': [
'boringssl.gypi',
],
'sources': [
'<@(boringssl_crypto_sources)',
'<@(boringssl_ssl_sources)',
],
'defines': [
'BORINGSSL_IMPLEMENTATION',
'BORINGSSL_NO_STATIC_INITIALIZER',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
'conditions': [
['component == "shared_library"', {
'defines': [
'BORINGSSL_SHARED_LIBRARY',
],
}],
['target_arch == "arm" and msan == 0', {
'conditions': [
['OS == "linux" or OS == "android"', {
'sources': [ '<@(boringssl_linux_arm_sources)' ],
}, {
'defines': [ 'OPENSSL_NO_ASM' ],
}],
],
}],
['target_arch == "arm64" and msan == 0', {
'conditions': [
['OS == "linux" or OS == "android"', {
'sources': [ '<@(boringssl_linux_aarch64_sources)' ],
}, {
'defines': [ 'OPENSSL_NO_ASM' ],
}],
],
}],
['target_arch == "ia32" and msan == 0', {
'conditions': [
['OS == "mac" or OS == "ios"', {
'sources': [ '<@(boringssl_mac_x86_sources)' ],
}],
['OS == "linux" or OS == "android"', {
'sources': [ '<@(boringssl_linux_x86_sources)' ],
}],
['OS == "win"', {
'sources': [ '<@(boringssl_win_x86_sources)' ],
# Windows' assembly is built with Yasm. The other platforms use
# the platform assembler.
'variables': {
'yasm_output_path': '<(SHARED_INTERMEDIATE_DIR)/third_party/boringssl',
},
'includes': [
'../yasm/yasm_compile.gypi',
],
}],
['OS != "mac" and OS != "ios" and OS != "linux" and OS != "win" and OS != "android"', {
'defines': [ 'OPENSSL_NO_ASM' ],
}],
]
}],
['target_arch == "x64" and msan == 0', {
'conditions': [
['OS == "mac" or OS == "ios"', {
'sources': [ '<@(boringssl_mac_x86_64_sources)' ],
}],
['OS == "linux" or OS == "android"', {
'sources': [ '<@(boringssl_linux_x86_64_sources)' ],
}],
['OS == "win"', {
'sources': [ '<@(boringssl_win_x86_64_sources)' ],
# Windows' assembly is built with Yasm. The other platforms use
# the platform assembler.
'variables': {
'yasm_output_path': '<(SHARED_INTERMEDIATE_DIR)/third_party/boringssl',
},
'includes': [
'../yasm/yasm_compile.gypi',
],
}],
['OS != "mac" and OS != "ios" and OS != "linux" and OS != "win" and OS != "android"', {
'defines': [ 'OPENSSL_NO_ASM' ],
}],
]
}],
['msan == 1 or (target_arch != "arm" and target_arch != "ia32" and target_arch != "x64" and target_arch != "arm64")', {
'defines': [ 'OPENSSL_NO_ASM' ],
}],
],
'include_dirs': [
'src/include',
# This is for arm_arch.h, which is needed by some asm files. Since the
# asm files are generated and kept in a different directory, they
# cannot use relative paths to find this file.
'src/crypto',
],
'direct_dependent_settings': {
'include_dirs': [
'src/include',
],
'conditions': [
['component == "shared_library"', {
'defines': [
'BORINGSSL_SHARED_LIBRARY',
],
}],
],
},
},
],
}

View File

@ -0,0 +1,425 @@
# Copyright (c) 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# This file is created by generate_build_files.py. Do not edit manually.
{
'variables': {
'boringssl_ssl_sources': [
'src/ssl/d1_both.c',
'src/ssl/d1_clnt.c',
'src/ssl/d1_lib.c',
'src/ssl/d1_meth.c',
'src/ssl/d1_pkt.c',
'src/ssl/d1_srtp.c',
'src/ssl/d1_srvr.c',
'src/ssl/pqueue/pqueue.c',
'src/ssl/s3_both.c',
'src/ssl/s3_clnt.c',
'src/ssl/s3_enc.c',
'src/ssl/s3_lib.c',
'src/ssl/s3_meth.c',
'src/ssl/s3_pkt.c',
'src/ssl/s3_srvr.c',
'src/ssl/ssl_aead_ctx.c',
'src/ssl/ssl_algs.c',
'src/ssl/ssl_asn1.c',
'src/ssl/ssl_cert.c',
'src/ssl/ssl_cipher.c',
'src/ssl/ssl_lib.c',
'src/ssl/ssl_rsa.c',
'src/ssl/ssl_sess.c',
'src/ssl/ssl_stat.c',
'src/ssl/ssl_txt.c',
'src/ssl/t1_enc.c',
'src/ssl/t1_lib.c',
],
'boringssl_crypto_sources': [
'err_data.c',
'src/crypto/aes/aes.c',
'src/crypto/aes/mode_wrappers.c',
'src/crypto/asn1/a_bitstr.c',
'src/crypto/asn1/a_bool.c',
'src/crypto/asn1/a_bytes.c',
'src/crypto/asn1/a_d2i_fp.c',
'src/crypto/asn1/a_dup.c',
'src/crypto/asn1/a_enum.c',
'src/crypto/asn1/a_gentm.c',
'src/crypto/asn1/a_i2d_fp.c',
'src/crypto/asn1/a_int.c',
'src/crypto/asn1/a_mbstr.c',
'src/crypto/asn1/a_object.c',
'src/crypto/asn1/a_octet.c',
'src/crypto/asn1/a_print.c',
'src/crypto/asn1/a_strnid.c',
'src/crypto/asn1/a_time.c',
'src/crypto/asn1/a_type.c',
'src/crypto/asn1/a_utctm.c',
'src/crypto/asn1/a_utf8.c',
'src/crypto/asn1/asn1_lib.c',
'src/crypto/asn1/asn1_par.c',
'src/crypto/asn1/asn_pack.c',
'src/crypto/asn1/bio_asn1.c',
'src/crypto/asn1/bio_ndef.c',
'src/crypto/asn1/f_enum.c',
'src/crypto/asn1/f_int.c',
'src/crypto/asn1/f_string.c',
'src/crypto/asn1/t_bitst.c',
'src/crypto/asn1/t_pkey.c',
'src/crypto/asn1/tasn_dec.c',
'src/crypto/asn1/tasn_enc.c',
'src/crypto/asn1/tasn_fre.c',
'src/crypto/asn1/tasn_new.c',
'src/crypto/asn1/tasn_prn.c',
'src/crypto/asn1/tasn_typ.c',
'src/crypto/asn1/tasn_utl.c',
'src/crypto/asn1/x_bignum.c',
'src/crypto/asn1/x_long.c',
'src/crypto/base64/base64.c',
'src/crypto/bio/bio.c',
'src/crypto/bio/bio_mem.c',
'src/crypto/bio/buffer.c',
'src/crypto/bio/connect.c',
'src/crypto/bio/fd.c',
'src/crypto/bio/file.c',
'src/crypto/bio/hexdump.c',
'src/crypto/bio/pair.c',
'src/crypto/bio/printf.c',
'src/crypto/bio/socket.c',
'src/crypto/bio/socket_helper.c',
'src/crypto/bn/add.c',
'src/crypto/bn/asm/x86_64-gcc.c',
'src/crypto/bn/bn.c',
'src/crypto/bn/bn_asn1.c',
'src/crypto/bn/cmp.c',
'src/crypto/bn/convert.c',
'src/crypto/bn/ctx.c',
'src/crypto/bn/div.c',
'src/crypto/bn/exponentiation.c',
'src/crypto/bn/gcd.c',
'src/crypto/bn/generic.c',
'src/crypto/bn/kronecker.c',
'src/crypto/bn/montgomery.c',
'src/crypto/bn/mul.c',
'src/crypto/bn/prime.c',
'src/crypto/bn/random.c',
'src/crypto/bn/rsaz_exp.c',
'src/crypto/bn/shift.c',
'src/crypto/bn/sqrt.c',
'src/crypto/buf/buf.c',
'src/crypto/bytestring/ber.c',
'src/crypto/bytestring/cbb.c',
'src/crypto/bytestring/cbs.c',
'src/crypto/chacha/chacha_generic.c',
'src/crypto/chacha/chacha_vec.c',
'src/crypto/cipher/aead.c',
'src/crypto/cipher/cipher.c',
'src/crypto/cipher/derive_key.c',
'src/crypto/cipher/e_aes.c',
'src/crypto/cipher/e_chacha20poly1305.c',
'src/crypto/cipher/e_des.c',
'src/crypto/cipher/e_null.c',
'src/crypto/cipher/e_rc2.c',
'src/crypto/cipher/e_rc4.c',
'src/crypto/cipher/e_ssl3.c',
'src/crypto/cipher/e_tls.c',
'src/crypto/cipher/tls_cbc.c',
'src/crypto/cmac/cmac.c',
'src/crypto/conf/conf.c',
'src/crypto/cpu-arm.c',
'src/crypto/cpu-intel.c',
'src/crypto/crypto.c',
'src/crypto/des/des.c',
'src/crypto/dh/check.c',
'src/crypto/dh/dh.c',
'src/crypto/dh/dh_asn1.c',
'src/crypto/dh/dh_impl.c',
'src/crypto/dh/params.c',
'src/crypto/digest/digest.c',
'src/crypto/digest/digests.c',
'src/crypto/directory_posix.c',
'src/crypto/directory_win.c',
'src/crypto/dsa/dsa.c',
'src/crypto/dsa/dsa_asn1.c',
'src/crypto/dsa/dsa_impl.c',
'src/crypto/ec/ec.c',
'src/crypto/ec/ec_asn1.c',
'src/crypto/ec/ec_key.c',
'src/crypto/ec/ec_montgomery.c',
'src/crypto/ec/oct.c',
'src/crypto/ec/p256-64.c',
'src/crypto/ec/simple.c',
'src/crypto/ec/util-64.c',
'src/crypto/ec/wnaf.c',
'src/crypto/ecdh/ecdh.c',
'src/crypto/ecdsa/ecdsa.c',
'src/crypto/ecdsa/ecdsa_asn1.c',
'src/crypto/engine/engine.c',
'src/crypto/err/err.c',
'src/crypto/evp/algorithm.c',
'src/crypto/evp/digestsign.c',
'src/crypto/evp/evp.c',
'src/crypto/evp/evp_asn1.c',
'src/crypto/evp/evp_ctx.c',
'src/crypto/evp/p_dsa_asn1.c',
'src/crypto/evp/p_ec.c',
'src/crypto/evp/p_ec_asn1.c',
'src/crypto/evp/p_rsa.c',
'src/crypto/evp/p_rsa_asn1.c',
'src/crypto/evp/pbkdf.c',
'src/crypto/evp/sign.c',
'src/crypto/ex_data.c',
'src/crypto/hkdf/hkdf.c',
'src/crypto/hmac/hmac.c',
'src/crypto/lhash/lhash.c',
'src/crypto/md4/md4.c',
'src/crypto/md5/md5.c',
'src/crypto/mem.c',
'src/crypto/modes/cbc.c',
'src/crypto/modes/cfb.c',
'src/crypto/modes/ctr.c',
'src/crypto/modes/gcm.c',
'src/crypto/modes/ofb.c',
'src/crypto/obj/obj.c',
'src/crypto/obj/obj_xref.c',
'src/crypto/pem/pem_all.c',
'src/crypto/pem/pem_info.c',
'src/crypto/pem/pem_lib.c',
'src/crypto/pem/pem_oth.c',
'src/crypto/pem/pem_pk8.c',
'src/crypto/pem/pem_pkey.c',
'src/crypto/pem/pem_x509.c',
'src/crypto/pem/pem_xaux.c',
'src/crypto/pkcs8/p5_pbe.c',
'src/crypto/pkcs8/p5_pbev2.c',
'src/crypto/pkcs8/p8_pkey.c',
'src/crypto/pkcs8/pkcs8.c',
'src/crypto/poly1305/poly1305.c',
'src/crypto/poly1305/poly1305_arm.c',
'src/crypto/poly1305/poly1305_vec.c',
'src/crypto/rand/hwrand.c',
'src/crypto/rand/rand.c',
'src/crypto/rand/urandom.c',
'src/crypto/rand/windows.c',
'src/crypto/rc4/rc4.c',
'src/crypto/refcount_c11.c',
'src/crypto/refcount_lock.c',
'src/crypto/rsa/blinding.c',
'src/crypto/rsa/padding.c',
'src/crypto/rsa/rsa.c',
'src/crypto/rsa/rsa_asn1.c',
'src/crypto/rsa/rsa_impl.c',
'src/crypto/sha/sha1.c',
'src/crypto/sha/sha256.c',
'src/crypto/sha/sha512.c',
'src/crypto/stack/stack.c',
'src/crypto/thread.c',
'src/crypto/thread_none.c',
'src/crypto/thread_pthread.c',
'src/crypto/thread_win.c',
'src/crypto/time_support.c',
'src/crypto/x509/a_digest.c',
'src/crypto/x509/a_sign.c',
'src/crypto/x509/a_strex.c',
'src/crypto/x509/a_verify.c',
'src/crypto/x509/asn1_gen.c',
'src/crypto/x509/by_dir.c',
'src/crypto/x509/by_file.c',
'src/crypto/x509/i2d_pr.c',
'src/crypto/x509/pkcs7.c',
'src/crypto/x509/t_crl.c',
'src/crypto/x509/t_req.c',
'src/crypto/x509/t_x509.c',
'src/crypto/x509/t_x509a.c',
'src/crypto/x509/x509.c',
'src/crypto/x509/x509_att.c',
'src/crypto/x509/x509_cmp.c',
'src/crypto/x509/x509_d2.c',
'src/crypto/x509/x509_def.c',
'src/crypto/x509/x509_ext.c',
'src/crypto/x509/x509_lu.c',
'src/crypto/x509/x509_obj.c',
'src/crypto/x509/x509_r2x.c',
'src/crypto/x509/x509_req.c',
'src/crypto/x509/x509_set.c',
'src/crypto/x509/x509_trs.c',
'src/crypto/x509/x509_txt.c',
'src/crypto/x509/x509_v3.c',
'src/crypto/x509/x509_vfy.c',
'src/crypto/x509/x509_vpm.c',
'src/crypto/x509/x509cset.c',
'src/crypto/x509/x509name.c',
'src/crypto/x509/x509rset.c',
'src/crypto/x509/x509spki.c',
'src/crypto/x509/x509type.c',
'src/crypto/x509/x_algor.c',
'src/crypto/x509/x_all.c',
'src/crypto/x509/x_attrib.c',
'src/crypto/x509/x_crl.c',
'src/crypto/x509/x_exten.c',
'src/crypto/x509/x_info.c',
'src/crypto/x509/x_name.c',
'src/crypto/x509/x_pkey.c',
'src/crypto/x509/x_pubkey.c',
'src/crypto/x509/x_req.c',
'src/crypto/x509/x_sig.c',
'src/crypto/x509/x_spki.c',
'src/crypto/x509/x_val.c',
'src/crypto/x509/x_x509.c',
'src/crypto/x509/x_x509a.c',
'src/crypto/x509v3/pcy_cache.c',
'src/crypto/x509v3/pcy_data.c',
'src/crypto/x509v3/pcy_lib.c',
'src/crypto/x509v3/pcy_map.c',
'src/crypto/x509v3/pcy_node.c',
'src/crypto/x509v3/pcy_tree.c',
'src/crypto/x509v3/v3_akey.c',
'src/crypto/x509v3/v3_akeya.c',
'src/crypto/x509v3/v3_alt.c',
'src/crypto/x509v3/v3_bcons.c',
'src/crypto/x509v3/v3_bitst.c',
'src/crypto/x509v3/v3_conf.c',
'src/crypto/x509v3/v3_cpols.c',
'src/crypto/x509v3/v3_crld.c',
'src/crypto/x509v3/v3_enum.c',
'src/crypto/x509v3/v3_extku.c',
'src/crypto/x509v3/v3_genn.c',
'src/crypto/x509v3/v3_ia5.c',
'src/crypto/x509v3/v3_info.c',
'src/crypto/x509v3/v3_int.c',
'src/crypto/x509v3/v3_lib.c',
'src/crypto/x509v3/v3_ncons.c',
'src/crypto/x509v3/v3_pci.c',
'src/crypto/x509v3/v3_pcia.c',
'src/crypto/x509v3/v3_pcons.c',
'src/crypto/x509v3/v3_pku.c',
'src/crypto/x509v3/v3_pmaps.c',
'src/crypto/x509v3/v3_prn.c',
'src/crypto/x509v3/v3_purp.c',
'src/crypto/x509v3/v3_skey.c',
'src/crypto/x509v3/v3_sxnet.c',
'src/crypto/x509v3/v3_utl.c',
],
'boringssl_linux_aarch64_sources': [
'linux-aarch64/crypto/aes/aesv8-armx64.S',
'linux-aarch64/crypto/modes/ghashv8-armx64.S',
'linux-aarch64/crypto/sha/sha1-armv8.S',
'linux-aarch64/crypto/sha/sha256-armv8.S',
'linux-aarch64/crypto/sha/sha512-armv8.S',
],
'boringssl_linux_arm_sources': [
'linux-arm/crypto/aes/aes-armv4.S',
'linux-arm/crypto/aes/aesv8-armx32.S',
'linux-arm/crypto/aes/bsaes-armv7.S',
'linux-arm/crypto/bn/armv4-mont.S',
'linux-arm/crypto/modes/ghash-armv4.S',
'linux-arm/crypto/modes/ghashv8-armx32.S',
'linux-arm/crypto/sha/sha1-armv4-large.S',
'linux-arm/crypto/sha/sha256-armv4.S',
'linux-arm/crypto/sha/sha512-armv4.S',
'src/crypto/chacha/chacha_vec_arm.S',
'src/crypto/cpu-arm-asm.S',
'src/crypto/poly1305/poly1305_arm_asm.S',
],
'boringssl_linux_x86_sources': [
'linux-x86/crypto/aes/aes-586.S',
'linux-x86/crypto/aes/aesni-x86.S',
'linux-x86/crypto/aes/vpaes-x86.S',
'linux-x86/crypto/bn/bn-586.S',
'linux-x86/crypto/bn/co-586.S',
'linux-x86/crypto/bn/x86-mont.S',
'linux-x86/crypto/md5/md5-586.S',
'linux-x86/crypto/modes/ghash-x86.S',
'linux-x86/crypto/rc4/rc4-586.S',
'linux-x86/crypto/sha/sha1-586.S',
'linux-x86/crypto/sha/sha256-586.S',
'linux-x86/crypto/sha/sha512-586.S',
],
'boringssl_linux_x86_64_sources': [
'linux-x86_64/crypto/aes/aes-x86_64.S',
'linux-x86_64/crypto/aes/aesni-x86_64.S',
'linux-x86_64/crypto/aes/bsaes-x86_64.S',
'linux-x86_64/crypto/aes/vpaes-x86_64.S',
'linux-x86_64/crypto/bn/rsaz-avx2.S',
'linux-x86_64/crypto/bn/rsaz-x86_64.S',
'linux-x86_64/crypto/bn/x86_64-mont.S',
'linux-x86_64/crypto/bn/x86_64-mont5.S',
'linux-x86_64/crypto/md5/md5-x86_64.S',
'linux-x86_64/crypto/modes/aesni-gcm-x86_64.S',
'linux-x86_64/crypto/modes/ghash-x86_64.S',
'linux-x86_64/crypto/rand/rdrand-x86_64.S',
'linux-x86_64/crypto/rc4/rc4-md5-x86_64.S',
'linux-x86_64/crypto/rc4/rc4-x86_64.S',
'linux-x86_64/crypto/sha/sha1-x86_64.S',
'linux-x86_64/crypto/sha/sha256-x86_64.S',
'linux-x86_64/crypto/sha/sha512-x86_64.S',
],
'boringssl_mac_x86_sources': [
'mac-x86/crypto/aes/aes-586.S',
'mac-x86/crypto/aes/aesni-x86.S',
'mac-x86/crypto/aes/vpaes-x86.S',
'mac-x86/crypto/bn/bn-586.S',
'mac-x86/crypto/bn/co-586.S',
'mac-x86/crypto/bn/x86-mont.S',
'mac-x86/crypto/md5/md5-586.S',
'mac-x86/crypto/modes/ghash-x86.S',
'mac-x86/crypto/rc4/rc4-586.S',
'mac-x86/crypto/sha/sha1-586.S',
'mac-x86/crypto/sha/sha256-586.S',
'mac-x86/crypto/sha/sha512-586.S',
],
'boringssl_mac_x86_64_sources': [
'mac-x86_64/crypto/aes/aes-x86_64.S',
'mac-x86_64/crypto/aes/aesni-x86_64.S',
'mac-x86_64/crypto/aes/bsaes-x86_64.S',
'mac-x86_64/crypto/aes/vpaes-x86_64.S',
'mac-x86_64/crypto/bn/rsaz-avx2.S',
'mac-x86_64/crypto/bn/rsaz-x86_64.S',
'mac-x86_64/crypto/bn/x86_64-mont.S',
'mac-x86_64/crypto/bn/x86_64-mont5.S',
'mac-x86_64/crypto/md5/md5-x86_64.S',
'mac-x86_64/crypto/modes/aesni-gcm-x86_64.S',
'mac-x86_64/crypto/modes/ghash-x86_64.S',
'mac-x86_64/crypto/rand/rdrand-x86_64.S',
'mac-x86_64/crypto/rc4/rc4-md5-x86_64.S',
'mac-x86_64/crypto/rc4/rc4-x86_64.S',
'mac-x86_64/crypto/sha/sha1-x86_64.S',
'mac-x86_64/crypto/sha/sha256-x86_64.S',
'mac-x86_64/crypto/sha/sha512-x86_64.S',
],
'boringssl_win_x86_sources': [
'win-x86/crypto/aes/aes-586.asm',
'win-x86/crypto/aes/aesni-x86.asm',
'win-x86/crypto/aes/vpaes-x86.asm',
'win-x86/crypto/bn/bn-586.asm',
'win-x86/crypto/bn/co-586.asm',
'win-x86/crypto/bn/x86-mont.asm',
'win-x86/crypto/md5/md5-586.asm',
'win-x86/crypto/modes/ghash-x86.asm',
'win-x86/crypto/rc4/rc4-586.asm',
'win-x86/crypto/sha/sha1-586.asm',
'win-x86/crypto/sha/sha256-586.asm',
'win-x86/crypto/sha/sha512-586.asm',
],
'boringssl_win_x86_64_sources': [
'win-x86_64/crypto/aes/aes-x86_64.asm',
'win-x86_64/crypto/aes/aesni-x86_64.asm',
'win-x86_64/crypto/aes/bsaes-x86_64.asm',
'win-x86_64/crypto/aes/vpaes-x86_64.asm',
'win-x86_64/crypto/bn/rsaz-avx2.asm',
'win-x86_64/crypto/bn/rsaz-x86_64.asm',
'win-x86_64/crypto/bn/x86_64-mont.asm',
'win-x86_64/crypto/bn/x86_64-mont5.asm',
'win-x86_64/crypto/md5/md5-x86_64.asm',
'win-x86_64/crypto/modes/aesni-gcm-x86_64.asm',
'win-x86_64/crypto/modes/ghash-x86_64.asm',
'win-x86_64/crypto/rand/rdrand-x86_64.asm',
'win-x86_64/crypto/rc4/rc4-md5-x86_64.asm',
'win-x86_64/crypto/rc4/rc4-x86_64.asm',
'win-x86_64/crypto/sha/sha1-x86_64.asm',
'win-x86_64/crypto/sha/sha256-x86_64.asm',
'win-x86_64/crypto/sha/sha512-x86_64.asm',
],
}
}

View File

@ -0,0 +1,50 @@
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
{
'includes': [
'../../native_client/build/untrusted.gypi',
],
'targets': [
{
'target_name': 'boringssl_nacl',
'type': 'none',
'variables': {
'nlib_target': 'libboringssl_nacl.a',
'build_glibc': 0,
'build_newlib': 0,
'build_pnacl_newlib': 1,
},
'dependencies': [
'<(DEPTH)/native_client_sdk/native_client_sdk_untrusted.gyp:nacl_io_untrusted',
],
'includes': [
# Include the auto-generated gypi file.
'boringssl.gypi'
],
'sources': [
'<@(boringssl_crypto_sources)',
'<@(boringssl_ssl_sources)',
],
'defines': [
'OPENSSL_NO_ASM',
],
'include_dirs': [
'src/include',
# This is for arm_arch.h, which is needed by some asm files. Since the
# asm files are generated and kept in a different directory, they
# cannot use relative paths to find this file.
'src/crypto',
],
'direct_dependent_settings': {
'include_dirs': [
'src/include',
],
},
'pnacl_compile_flags': [
'-Wno-sometimes-uninitialized',
'-Wno-unused-variable',
],
}, # target boringssl_nacl
],
}

View File

@ -0,0 +1,25 @@
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
{
'includes': [
'boringssl_tests.gypi',
],
'targets': [
{
'target_name': 'boringssl_unittests',
'type': 'executable',
'sources': [
'boringssl_unittest.cc',
],
'dependencies': [
'<@(boringssl_test_targets)',
'../../base/base.gyp:base',
'../../base/base.gyp:run_all_unittests',
'../../base/base.gyp:test_support_base',
'../../testing/gtest.gyp:gtest',
],
},
],
}

View File

@ -0,0 +1,513 @@
# Copyright (c) 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# This file is created by generate_build_files.py. Do not edit manually.
{
'targets': [
{
'target_name': 'boringssl_aes_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/aes/aes_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_base64_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/base64/base64_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_bio_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/bio/bio_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_bn_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/bn/bn_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_bytestring_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/bytestring/bytestring_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_aead_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/cipher/aead_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_cipher_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/cipher/cipher_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_cmac_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/cmac/cmac_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_constant_time_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/constant_time_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_dh_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/dh/dh_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_digest_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/digest/digest_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_dsa_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/dsa/dsa_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_ec_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/ec/ec_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_example_mul',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/ec/example_mul.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_ecdsa_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/ecdsa/ecdsa_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_err_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/err/err_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_evp_extra_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/evp/evp_extra_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_evp_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/evp/evp_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_pbkdf_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/evp/pbkdf_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_hkdf_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/hkdf/hkdf_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_hmac_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/hmac/hmac_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_lhash_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/lhash/lhash_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_gcm_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/modes/gcm_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_pkcs12_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/pkcs8/pkcs12_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_poly1305_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/poly1305/poly1305_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_refcount_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/refcount_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_rsa_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/rsa/rsa_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_thread_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/thread_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_pkcs7_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/x509/pkcs7_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_tab_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/x509v3/tab_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_v3name_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/crypto/x509v3/v3name_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_pqueue_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/ssl/pqueue/pqueue_test.c',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
{
'target_name': 'boringssl_ssl_test',
'type': 'executable',
'dependencies': [
'boringssl.gyp:boringssl',
],
'sources': [
'src/ssl/ssl_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
# https://crbug.com/429039
'msvs_disabled_warnings': [ 4267, ],
},
],
'variables': {
'boringssl_test_support_sources': [
'src/crypto/test/file_test.cc',
'src/crypto/test/malloc.cc',
],
'boringssl_test_targets': [
'boringssl_aead_test',
'boringssl_aes_test',
'boringssl_base64_test',
'boringssl_bio_test',
'boringssl_bn_test',
'boringssl_bytestring_test',
'boringssl_cipher_test',
'boringssl_cmac_test',
'boringssl_constant_time_test',
'boringssl_dh_test',
'boringssl_digest_test',
'boringssl_dsa_test',
'boringssl_ec_test',
'boringssl_ecdsa_test',
'boringssl_err_test',
'boringssl_evp_extra_test',
'boringssl_evp_test',
'boringssl_example_mul',
'boringssl_gcm_test',
'boringssl_hkdf_test',
'boringssl_hmac_test',
'boringssl_lhash_test',
'boringssl_pbkdf_test',
'boringssl_pkcs12_test',
'boringssl_pkcs7_test',
'boringssl_poly1305_test',
'boringssl_pqueue_test',
'boringssl_refcount_test',
'boringssl_rsa_test',
'boringssl_ssl_test',
'boringssl_tab_test',
'boringssl_thread_test',
'boringssl_v3name_test',
],
}
}

View File

@ -0,0 +1,304 @@
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stdarg.h>
#include <string>
#include "base/base_paths.h"
#include "base/command_line.h"
#include "base/files/file_path.h"
#include "base/logging.h"
#include "base/path_service.h"
#include "base/process/launch.h"
#include "base/strings/string_util.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace {
void TestProcess(const std::string& name,
const std::vector<base::CommandLine::StringType>& args) {
base::FilePath exe_dir;
ASSERT_TRUE(PathService::Get(base::DIR_EXE, &exe_dir));
base::FilePath test_binary =
exe_dir.AppendASCII("boringssl_" + name);
base::CommandLine cmd(test_binary);
for (size_t i = 0; i < args.size(); ++i) {
cmd.AppendArgNative(args[i]);
}
std::string output;
EXPECT_TRUE(base::GetAppOutput(cmd, &output));
// Account for Windows line endings.
base::ReplaceSubstringsAfterOffset(&output, 0, "\r\n", "\n");
const bool ok = output.size() >= 5 &&
memcmp("PASS\n", &output[output.size() - 5], 5) == 0 &&
(output.size() == 5 || output[output.size() - 6] == '\n');
EXPECT_TRUE(ok) << output;
}
void TestSimple(const std::string& name) {
std::vector<base::CommandLine::StringType> empty;
TestProcess(name, empty);
}
bool BoringSSLPath(base::FilePath* result) {
if (!PathService::Get(base::DIR_SOURCE_ROOT, result))
return false;
*result = result->Append(FILE_PATH_LITERAL("third_party"));
*result = result->Append(FILE_PATH_LITERAL("boringssl"));
*result = result->Append(FILE_PATH_LITERAL("src"));
return true;
}
bool CryptoCipherTestPath(base::FilePath *result) {
if (!BoringSSLPath(result))
return false;
*result = result->Append(FILE_PATH_LITERAL("crypto"));
*result = result->Append(FILE_PATH_LITERAL("cipher"));
*result = result->Append(FILE_PATH_LITERAL("test"));
return true;
}
} // anonymous namespace
struct AEADTest {
const base::CommandLine::CharType *name;
const base::FilePath::CharType *test_vector_filename;
};
static const AEADTest kAEADTests[] = {
{FILE_PATH_LITERAL("aes-128-gcm"),
FILE_PATH_LITERAL("aes_128_gcm_tests.txt")},
{FILE_PATH_LITERAL("aes-128-key-wrap"),
FILE_PATH_LITERAL("aes_128_key_wrap_tests.txt")},
{FILE_PATH_LITERAL("aes-256-gcm"),
FILE_PATH_LITERAL("aes_256_gcm_tests.txt")},
{FILE_PATH_LITERAL("aes-256-key-wrap"),
FILE_PATH_LITERAL("aes_256_key_wrap_tests.txt")},
{FILE_PATH_LITERAL("chacha20-poly1305"),
FILE_PATH_LITERAL("chacha20_poly1305_tests.txt")},
{FILE_PATH_LITERAL("rc4-md5-tls"),
FILE_PATH_LITERAL("rc4_md5_tls_tests.txt")},
{FILE_PATH_LITERAL("rc4-sha1-tls"),
FILE_PATH_LITERAL("rc4_sha1_tls_tests.txt")},
{FILE_PATH_LITERAL("aes-128-cbc-sha1-tls"),
FILE_PATH_LITERAL("aes_128_cbc_sha1_tls_tests.txt")},
{FILE_PATH_LITERAL("aes-128-cbc-sha1-tls-implicit-iv"),
FILE_PATH_LITERAL("aes_128_cbc_sha1_tls_implicit_iv_tests.txt")},
{FILE_PATH_LITERAL("aes-128-cbc-sha256-tls"),
FILE_PATH_LITERAL("aes_128_cbc_sha256_tls_tests.txt")},
{FILE_PATH_LITERAL("aes-256-cbc-sha1-tls"),
FILE_PATH_LITERAL("aes_256_cbc_sha1_tls_tests.txt")},
{FILE_PATH_LITERAL("aes-256-cbc-sha1-tls-implicit-iv"),
FILE_PATH_LITERAL("aes_256_cbc_sha1_tls_implicit_iv_tests.txt")},
{FILE_PATH_LITERAL("aes-256-cbc-sha256-tls"),
FILE_PATH_LITERAL("aes_256_cbc_sha256_tls_tests.txt")},
{FILE_PATH_LITERAL("aes-256-cbc-sha384-tls"),
FILE_PATH_LITERAL("aes_256_cbc_sha384_tls_tests.txt")},
{FILE_PATH_LITERAL("des-ede3-cbc-sha1-tls"),
FILE_PATH_LITERAL("des_ede3_cbc_sha1_tls_tests.txt")},
{FILE_PATH_LITERAL("des-ede3-cbc-sha1-tls-implicit-iv"),
FILE_PATH_LITERAL("des_ede3_cbc_sha1_tls_implicit_iv_tests.txt")},
{FILE_PATH_LITERAL("rc4-md5-ssl3"),
FILE_PATH_LITERAL("rc4_md5_ssl3_tests.txt")},
{FILE_PATH_LITERAL("rc4-sha1-ssl3"),
FILE_PATH_LITERAL("rc4_sha1_ssl3_tests.txt")},
{FILE_PATH_LITERAL("aes-128-cbc-sha1-ssl3"),
FILE_PATH_LITERAL("aes_128_cbc_sha1_ssl3_tests.txt")},
{FILE_PATH_LITERAL("aes-256-cbc-sha1-ssl3"),
FILE_PATH_LITERAL("aes_256_cbc_sha1_ssl3_tests.txt")},
{FILE_PATH_LITERAL("des-ede3-cbc-sha1-ssl3"),
FILE_PATH_LITERAL("des_ede3_cbc_sha1_ssl3_tests.txt")},
{FILE_PATH_LITERAL("aes-128-ctr-hmac-sha256"),
FILE_PATH_LITERAL("aes_128_ctr_hmac_sha256.txt")},
{FILE_PATH_LITERAL("aes-256-ctr-hmac-sha256"),
FILE_PATH_LITERAL("aes_256_ctr_hmac_sha256.txt")},
};
TEST(BoringSSL, AEADs) {
base::FilePath test_vector_dir;
ASSERT_TRUE(CryptoCipherTestPath(&test_vector_dir));
for (size_t i = 0; i < arraysize(kAEADTests); i++) {
const AEADTest& test = kAEADTests[i];
SCOPED_TRACE(test.name);
base::FilePath test_vector_file =
test_vector_dir.Append(test.test_vector_filename);
std::vector<base::CommandLine::StringType> args;
args.push_back(test.name);
args.push_back(test_vector_file.value());
TestProcess("aead_test", args);
}
}
TEST(BoringSSL, AES) {
TestSimple("aes_test");
}
TEST(BoringSSL, Base64) {
TestSimple("base64_test");
}
TEST(BoringSSL, BIO) {
TestSimple("bio_test");
}
TEST(BoringSSL, BN) {
TestSimple("bn_test");
}
TEST(BoringSSL, ByteString) {
TestSimple("bytestring_test");
}
TEST(BoringSSL, Cipher) {
base::FilePath data_file;
ASSERT_TRUE(CryptoCipherTestPath(&data_file));
data_file = data_file.Append(FILE_PATH_LITERAL("cipher_test.txt"));
std::vector<base::CommandLine::StringType> args;
args.push_back(data_file.value());
TestProcess("cipher_test", args);
}
TEST(BoringSSL, CMAC) {
TestSimple("cmac_test");
}
TEST(BoringSSL, ConstantTime) {
TestSimple("constant_time_test");
}
TEST(BoringSSL, DH) {
TestSimple("dh_test");
}
TEST(BoringSSL, Digest) {
TestSimple("digest_test");
}
TEST(BoringSSL, DSA) {
TestSimple("dsa_test");
}
TEST(BoringSSL, EC) {
TestSimple("ec_test");
}
TEST(BoringSSL, ECDSA) {
TestSimple("ecdsa_test");
}
TEST(BoringSSL, ERR) {
TestSimple("err_test");
}
TEST(BoringSSL, EVP) {
base::FilePath data_file;
ASSERT_TRUE(BoringSSLPath(&data_file));
data_file = data_file.Append(FILE_PATH_LITERAL("crypto"));
data_file = data_file.Append(FILE_PATH_LITERAL("evp"));
data_file = data_file.Append(FILE_PATH_LITERAL("evp_tests.txt"));
std::vector<base::CommandLine::StringType> args;
args.push_back(data_file.value());
TestProcess("evp_test", args);
}
TEST(BoringSSL, EVPExtra) {
TestSimple("evp_extra_test");
}
TEST(BoringSSL, ExampleMul) {
TestSimple("example_mul");
}
TEST(BoringSSL, GCM) {
TestSimple("gcm_test");
}
TEST(BoringSSL, HKDF) {
TestSimple("hkdf_test");
}
TEST(BoringSSL, HMAC) {
base::FilePath data_file;
ASSERT_TRUE(BoringSSLPath(&data_file));
data_file = data_file.Append(FILE_PATH_LITERAL("crypto"));
data_file = data_file.Append(FILE_PATH_LITERAL("hmac"));
data_file = data_file.Append(FILE_PATH_LITERAL("hmac_tests.txt"));
std::vector<base::CommandLine::StringType> args;
args.push_back(data_file.value());
TestProcess("hmac_test", args);
}
TEST(BoringSSL, LH) {
TestSimple("lhash_test");
}
TEST(BoringSSL, PBKDF) {
TestSimple("pbkdf_test");
}
TEST(BoringSSL, Poly1305) {
base::FilePath data_file;
ASSERT_TRUE(BoringSSLPath(&data_file));
data_file = data_file.Append(FILE_PATH_LITERAL("crypto"));
data_file = data_file.Append(FILE_PATH_LITERAL("poly1305"));
data_file = data_file.Append(FILE_PATH_LITERAL("poly1305_test.txt"));
std::vector<base::CommandLine::StringType> args;
args.push_back(data_file.value());
TestProcess("poly1305_test", args);
}
TEST(BoringSSL, PKCS7) {
TestSimple("pkcs7_test");
}
TEST(BoringSSL, PKCS12) {
TestSimple("pkcs12_test");
}
TEST(BoringSSL, PQueue) {
TestSimple("pqueue_test");
}
TEST(BoringSSL, RefcountTest) {
TestSimple("refcount_test");
}
TEST(BoringSSL, RSA) {
TestSimple("rsa_test");
}
TEST(BoringSSL, SSL) {
TestSimple("ssl_test");
}
TEST(BoringSSL, TabTest) {
TestSimple("tab_test");
}
TEST(BoringSSL, Thread) {
TestSimple("thread_test");
}
TEST(BoringSSL, V3NameTest) {
TestSimple("v3name_test");
}

1314
packager/third_party/boringssl/err_data.c vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,751 @@
#if defined(__aarch64__)
#include "arm_arch.h"
#if __ARM_MAX_ARCH__>=7
.text
#if !defined(__clang__)
.arch armv8-a+crypto
#endif
.align 5
.Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.globl aes_v8_set_encrypt_key
.type aes_v8_set_encrypt_key,%function
.align 5
aes_v8_set_encrypt_key:
.Lenc_key:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
mov x3,#-1
cmp x0,#0
b.eq .Lenc_key_abort
cmp x2,#0
b.eq .Lenc_key_abort
mov x3,#-2
cmp w1,#128
b.lt .Lenc_key_abort
cmp w1,#256
b.gt .Lenc_key_abort
tst w1,#0x3f
b.ne .Lenc_key_abort
adr x3,.Lrcon
cmp w1,#192
eor v0.16b,v0.16b,v0.16b
ld1 {v3.16b},[x0],#16
mov w1,#8 // reuse w1
ld1 {v1.4s,v2.4s},[x3],#32
b.lt .Loop128
b.eq .L192
b .L256
.align 4
.Loop128:
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
b.ne .Loop128
ld1 {v1.4s},[x3]
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2]
add x2,x2,#0x50
mov w12,#10
b .Ldone
.align 4
.L192:
ld1 {v4.8b},[x0],#8
movi v6.16b,#8 // borrow v6.16b
st1 {v3.4s},[x2],#16
sub v2.16b,v2.16b,v6.16b // adjust the mask
.Loop192:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.8b},[x2],#8
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
dup v5.4s,v3.s[3]
eor v5.16b,v5.16b,v4.16b
eor v6.16b,v6.16b,v1.16b
ext v4.16b,v0.16b,v4.16b,#12
shl v1.16b,v1.16b,#1
eor v4.16b,v4.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
eor v4.16b,v4.16b,v6.16b
st1 {v3.4s},[x2],#16
b.ne .Loop192
mov w12,#12
add x2,x2,#0x20
b .Ldone
.align 4
.L256:
ld1 {v4.16b},[x0]
mov w1,#7
mov w12,#14
st1 {v3.4s},[x2],#16
.Loop256:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2],#16
b.eq .Ldone
dup v6.4s,v3.s[3] // just splat
ext v5.16b,v0.16b,v4.16b,#12
aese v6.16b,v0.16b
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
eor v4.16b,v4.16b,v6.16b
b .Loop256
.Ldone:
str w12,[x2]
mov x3,#0
.Lenc_key_abort:
mov x0,x3 // return value
ldr x29,[sp],#16
ret
.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
.globl aes_v8_set_decrypt_key
.type aes_v8_set_decrypt_key,%function
.align 5
aes_v8_set_decrypt_key:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
bl .Lenc_key
cmp x0,#0
b.ne .Ldec_key_abort
sub x2,x2,#240 // restore original x2
mov x4,#-16
add x0,x2,x12,lsl#4 // end of key schedule
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
.Loop_imc:
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
aesimc v0.16b,v0.16b
aesimc v1.16b,v1.16b
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
cmp x0,x2
b.hi .Loop_imc
ld1 {v0.4s},[x2]
aesimc v0.16b,v0.16b
st1 {v0.4s},[x0]
eor x0,x0,x0 // return value
.Ldec_key_abort:
ldp x29,x30,[sp],#16
ret
.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
.globl aes_v8_encrypt
.type aes_v8_encrypt,%function
.align 5
aes_v8_encrypt:
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
.Loop_enc:
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aese v2.16b,v1.16b
aesmc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt .Loop_enc
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aese v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.size aes_v8_encrypt,.-aes_v8_encrypt
.globl aes_v8_decrypt
.type aes_v8_decrypt,%function
.align 5
aes_v8_decrypt:
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
.Loop_dec:
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aesd v2.16b,v1.16b
aesimc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt .Loop_dec
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aesd v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.size aes_v8_decrypt,.-aes_v8_decrypt
.globl aes_v8_cbc_encrypt
.type aes_v8_cbc_encrypt,%function
.align 5
aes_v8_cbc_encrypt:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
subs x2,x2,#16
mov x8,#16
b.lo .Lcbc_abort
csel x8,xzr,x8,eq
cmp w5,#0 // en- or decrypting?
ldr w5,[x3,#240]
and x2,x2,#-16
ld1 {v6.16b},[x4]
ld1 {v0.16b},[x0],x8
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#6
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
sub w5,w5,#2
ld1 {v18.4s,v19.4s},[x7],#32
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
b.eq .Lcbc_dec
cmp w5,#2
eor v0.16b,v0.16b,v6.16b
eor v5.16b,v16.16b,v7.16b
b.eq .Lcbc_enc128
ld1 {v2.4s,v3.4s},[x7]
add x7,x3,#16
add x6,x3,#16*4
add x12,x3,#16*5
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
add x14,x3,#16*6
add x3,x3,#16*7
b .Lenter_cbc_enc
.align 4
.Loop_cbc_enc:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
.Lenter_cbc_enc:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x6]
cmp w5,#4
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x12]
b.eq .Lcbc_enc192
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x14]
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x3]
nop
.Lcbc_enc192:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs .Loop_cbc_enc
st1 {v6.16b},[x1],#16
b .Lcbc_done
.align 5
.Lcbc_enc128:
ld1 {v2.4s,v3.4s},[x7]
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
b .Lenter_cbc_enc128
.Loop_cbc_enc128:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
.Lenter_cbc_enc128:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs .Loop_cbc_enc128
st1 {v6.16b},[x1],#16
b .Lcbc_done
.align 5
.Lcbc_dec:
ld1 {v18.16b},[x0],#16
subs x2,x2,#32 // bias
add w6,w5,#2
orr v3.16b,v0.16b,v0.16b
orr v1.16b,v0.16b,v0.16b
orr v19.16b,v18.16b,v18.16b
b.lo .Lcbc_dec_tail
orr v1.16b,v18.16b,v18.16b
ld1 {v18.16b},[x0],#16
orr v2.16b,v0.16b,v0.16b
orr v3.16b,v1.16b,v1.16b
orr v19.16b,v18.16b,v18.16b
.Loop3x_cbc_dec:
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt .Loop3x_cbc_dec
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
eor v4.16b,v6.16b,v7.16b
subs x2,x2,#0x30
eor v5.16b,v2.16b,v7.16b
csel x6,x2,x6,lo // x6, w6, is zero at this point
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
add x0,x0,x6 // x0 is adjusted in such way that
// at exit from the loop v1.16b-v18.16b
// are loaded with last "words"
orr v6.16b,v19.16b,v19.16b
mov x7,x3
aesd v0.16b,v20.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
ld1 {v2.16b},[x0],#16
aesd v0.16b,v21.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
aesd v0.16b,v22.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
ld1 {v19.16b},[x0],#16
aesd v0.16b,v23.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
add w6,w5,#2
eor v4.16b,v4.16b,v0.16b
eor v5.16b,v5.16b,v1.16b
eor v18.16b,v18.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v4.16b},[x1],#16
orr v0.16b,v2.16b,v2.16b
st1 {v5.16b},[x1],#16
orr v1.16b,v3.16b,v3.16b
st1 {v18.16b},[x1],#16
orr v18.16b,v19.16b,v19.16b
b.hs .Loop3x_cbc_dec
cmn x2,#0x30
b.eq .Lcbc_done
nop
.Lcbc_dec_tail:
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt .Lcbc_dec_tail
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
cmn x2,#0x20
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
eor v5.16b,v6.16b,v7.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
b.eq .Lcbc_dec_one
eor v5.16b,v5.16b,v1.16b
eor v17.16b,v17.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
st1 {v17.16b},[x1],#16
b .Lcbc_done
.Lcbc_dec_one:
eor v5.16b,v5.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
.Lcbc_done:
st1 {v6.16b},[x4]
.Lcbc_abort:
ldr x29,[sp],#16
ret
.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
.globl aes_v8_ctr32_encrypt_blocks
.type aes_v8_ctr32_encrypt_blocks,%function
.align 5
aes_v8_ctr32_encrypt_blocks:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldr w5,[x3,#240]
ldr w8, [x4, #12]
ld1 {v0.4s},[x4]
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#4
mov x12,#16
cmp x2,#2
add x7,x3,x5,lsl#4 // pointer to last 5 round keys
sub w5,w5,#2
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
csel x12,xzr,x12,lo
#ifndef __ARMEB__
rev w8, w8
#endif
orr v1.16b,v0.16b,v0.16b
add w10, w8, #1
orr v18.16b,v0.16b,v0.16b
add w8, w8, #2
orr v6.16b,v0.16b,v0.16b
rev w10, w10
mov v1.s[3],w10
b.ls .Lctr32_tail
rev w12, w8
sub x2,x2,#3 // bias
mov v18.s[3],w12
b .Loop3x_ctr32
.align 4
.Loop3x_ctr32:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
aese v18.16b,v17.16b
aesmc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt .Loop3x_ctr32
aese v0.16b,v16.16b
aesmc v4.16b,v0.16b
aese v1.16b,v16.16b
aesmc v5.16b,v1.16b
ld1 {v2.16b},[x0],#16
orr v0.16b,v6.16b,v6.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
orr v1.16b,v6.16b,v6.16b
aese v4.16b,v17.16b
aesmc v4.16b,v4.16b
aese v5.16b,v17.16b
aesmc v5.16b,v5.16b
ld1 {v19.16b},[x0],#16
mov x7,x3
aese v18.16b,v17.16b
aesmc v17.16b,v18.16b
orr v18.16b,v6.16b,v6.16b
add w9,w8,#1
aese v4.16b,v20.16b
aesmc v4.16b,v4.16b
aese v5.16b,v20.16b
aesmc v5.16b,v5.16b
eor v2.16b,v2.16b,v7.16b
add w10,w8,#2
aese v17.16b,v20.16b
aesmc v17.16b,v17.16b
eor v3.16b,v3.16b,v7.16b
add w8,w8,#3
aese v4.16b,v21.16b
aesmc v4.16b,v4.16b
aese v5.16b,v21.16b
aesmc v5.16b,v5.16b
eor v19.16b,v19.16b,v7.16b
rev w9,w9
aese v17.16b,v21.16b
aesmc v17.16b,v17.16b
mov v0.s[3], w9
rev w10,w10
aese v4.16b,v22.16b
aesmc v4.16b,v4.16b
aese v5.16b,v22.16b
aesmc v5.16b,v5.16b
mov v1.s[3], w10
rev w12,w8
aese v17.16b,v22.16b
aesmc v17.16b,v17.16b
mov v18.s[3], w12
subs x2,x2,#3
aese v4.16b,v23.16b
aese v5.16b,v23.16b
aese v17.16b,v23.16b
eor v2.16b,v2.16b,v4.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
st1 {v2.16b},[x1],#16
eor v3.16b,v3.16b,v5.16b
mov w6,w5
st1 {v3.16b},[x1],#16
eor v19.16b,v19.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v19.16b},[x1],#16
b.hs .Loop3x_ctr32
adds x2,x2,#3
b.eq .Lctr32_done
cmp x2,#1
mov x12,#16
csel x12,xzr,x12,eq
.Lctr32_tail:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v17.4s},[x7],#16
b.gt .Lctr32_tail
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v2.16b},[x0],x12
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v1.16b,v20.16b
aesmc v1.16b,v1.16b
ld1 {v3.16b},[x0]
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v1.16b,v21.16b
aesmc v1.16b,v1.16b
eor v2.16b,v2.16b,v7.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v1.16b,v22.16b
aesmc v1.16b,v1.16b
eor v3.16b,v3.16b,v7.16b
aese v0.16b,v23.16b
aese v1.16b,v23.16b
cmp x2,#1
eor v2.16b,v2.16b,v0.16b
eor v3.16b,v3.16b,v1.16b
st1 {v2.16b},[x1],#16
b.eq .Lctr32_done
st1 {v3.16b},[x1]
.Lctr32_done:
ldr x29,[sp],#16
ret
.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
#endif
#endif

View File

@ -0,0 +1,232 @@
#if defined(__aarch64__)
#include "arm_arch.h"
.text
#if !defined(__clang__)
.arch armv8-a+crypto
#endif
.globl gcm_init_v8
.type gcm_init_v8,%function
.align 4
gcm_init_v8:
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
ext v3.16b,v17.16b,v17.16b,#8
ushr v18.2d,v19.2d,#63
dup v17.4s,v17.s[1]
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
ushr v18.2d,v3.2d,#63
sshr v17.4s,v17.4s,#31 //broadcast carry bit
and v18.16b,v18.16b,v16.16b
shl v3.2d,v3.2d,#1
ext v18.16b,v18.16b,v18.16b,#8
and v16.16b,v16.16b,v17.16b
orr v3.16b,v3.16b,v18.16b //H<<<=1
eor v20.16b,v3.16b,v16.16b //twisted H
st1 {v20.2d},[x0],#16 //store Htable[0]
//calculate H^2
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
pmull v0.1q,v20.1d,v20.1d
eor v16.16b,v16.16b,v20.16b
pmull2 v2.1q,v20.2d,v20.2d
pmull v1.1q,v16.1d,v16.1d
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v22.16b,v0.16b,v18.16b
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
ret
.size gcm_init_v8,.-gcm_init_v8
.globl gcm_gmult_v8
.type gcm_gmult_v8,%function
.align 4
gcm_gmult_v8:
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
shl v19.2d,v19.2d,#57
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v3.16b,v17.16b,v17.16b,#8
pmull v0.1q,v20.1d,v3.1d //H.loˇXi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hiˇXi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)ˇ(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.size gcm_gmult_v8,.-gcm_gmult_v8
.globl gcm_ghash_v8
.type gcm_ghash_v8,%function
.align 4
gcm_ghash_v8:
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have
//to be rotated in order to
//make it appear as in
//alorithm specification
subs x3,x3,#32 //see if x3 is 32 or larger
mov x12,#16 //x12 is used as post-
//increment for input pointer;
//as loop is modulo-scheduled
//x12 is zeroed just in time
//to preclude oversteping
//inp[len], which means that
//last block[s] are actually
//loaded twice, but last
//copy is not processed
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
movi v19.16b,#0xe1
ld1 {v22.2d},[x1]
csel x12,xzr,x12,eq //is it time to zero x12?
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
rev64 v0.16b,v0.16b
#endif
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
b.lo .Lodd_tail_v8 //x3 was less than 32
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v7.16b,v17.16b,v17.16b,#8
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
pmull v4.1q,v20.1d,v7.1d //HˇIi+1
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
pmull2 v6.1q,v20.2d,v7.2d
b .Loop_mod2x_v8
.align 4
.Loop_mod2x_v8:
ext v18.16b,v3.16b,v3.16b,#8
subs x3,x3,#32 //is there more data?
pmull v0.1q,v22.1d,v3.1d //H^2.loˇXi.lo
csel x12,xzr,x12,lo //is it time to zero x12?
pmull v5.1q,v21.1d,v17.1d
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v22.2d,v3.2d //H^2.hiˇXi.hi
eor v0.16b,v0.16b,v4.16b //accumulate
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)ˇ(Xi.lo+Xi.hi)
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
eor v2.16b,v2.16b,v6.16b
csel x12,xzr,x12,eq //is it time to zero x12?
eor v1.16b,v1.16b,v5.16b
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
#endif
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v7.16b,v17.16b,v17.16b,#8
ext v3.16b,v16.16b,v16.16b,#8
eor v0.16b,v1.16b,v18.16b
pmull v4.1q,v20.1d,v7.1d //HˇIi+1
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v3.16b,v3.16b,v18.16b
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
eor v3.16b,v3.16b,v0.16b
pmull2 v6.1q,v20.2d,v7.2d
b.hs .Loop_mod2x_v8 //there was at least 32 more bytes
eor v2.16b,v2.16b,v18.16b
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
adds x3,x3,#32 //re-construct x3
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
b.eq .Ldone_v8 //is x3 zero?
.Lodd_tail_v8:
ext v18.16b,v0.16b,v0.16b,#8
eor v3.16b,v3.16b,v0.16b //inp^=Xi
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
pmull v0.1q,v20.1d,v3.1d //H.loˇXi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hiˇXi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)ˇ(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
.Ldone_v8:
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.size gcm_ghash_v8,.-gcm_ghash_v8
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,756 @@
#if defined(__arm__)
#include "arm_arch.h"
#if __ARM_MAX_ARCH__>=7
.text
.arch armv7-a
.fpu neon
.code 32
.align 5
.Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.globl aes_v8_set_encrypt_key
.type aes_v8_set_encrypt_key,%function
.align 5
aes_v8_set_encrypt_key:
.Lenc_key:
mov r3,#-1
cmp r0,#0
beq .Lenc_key_abort
cmp r2,#0
beq .Lenc_key_abort
mov r3,#-2
cmp r1,#128
blt .Lenc_key_abort
cmp r1,#256
bgt .Lenc_key_abort
tst r1,#0x3f
bne .Lenc_key_abort
adr r3,.Lrcon
cmp r1,#192
veor q0,q0,q0
vld1.8 {q3},[r0]!
mov r1,#8 @ reuse r1
vld1.32 {q1,q2},[r3]!
blt .Loop128
beq .L192
b .L256
.align 4
.Loop128:
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
bne .Loop128
vld1.32 {q1},[r3]
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
veor q3,q3,q10
vst1.32 {q3},[r2]
add r2,r2,#0x50
mov r12,#10
b .Ldone
.align 4
.L192:
vld1.8 {d16},[r0]!
vmov.i8 q10,#8 @ borrow q10
vst1.32 {q3},[r2]!
vsub.i8 q2,q2,q10 @ adjust the mask
.Loop192:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {d16},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vdup.32 q9,d7[1]
veor q9,q9,q8
veor q10,q10,q1
vext.8 q8,q0,q8,#12
vshl.u8 q1,q1,#1
veor q8,q8,q9
veor q3,q3,q10
veor q8,q8,q10
vst1.32 {q3},[r2]!
bne .Loop192
mov r12,#12
add r2,r2,#0x20
b .Ldone
.align 4
.L256:
vld1.8 {q8},[r0]
mov r1,#7
mov r12,#14
vst1.32 {q3},[r2]!
.Loop256:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {q8},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vst1.32 {q3},[r2]!
beq .Ldone
vdup.32 q10,d7[1]
vext.8 q9,q0,q8,#12
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
veor q8,q8,q10
b .Loop256
.Ldone:
str r12,[r2]
mov r3,#0
.Lenc_key_abort:
mov r0,r3 @ return value
bx lr
.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
.globl aes_v8_set_decrypt_key
.type aes_v8_set_decrypt_key,%function
.align 5
aes_v8_set_decrypt_key:
stmdb sp!,{r4,lr}
bl .Lenc_key
cmp r0,#0
bne .Ldec_key_abort
sub r2,r2,#240 @ restore original r2
mov r4,#-16
add r0,r2,r12,lsl#4 @ end of key schedule
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
.Loop_imc:
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
cmp r0,r2
bhi .Loop_imc
vld1.32 {q0},[r2]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
vst1.32 {q0},[r0]
eor r0,r0,r0 @ return value
.Ldec_key_abort:
ldmia sp!,{r4,pc}
.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
.globl aes_v8_encrypt
.type aes_v8_encrypt,%function
.align 5
aes_v8_encrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
.Loop_enc:
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q1},[r2]!
bgt .Loop_enc
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.size aes_v8_encrypt,.-aes_v8_encrypt
.globl aes_v8_decrypt
.type aes_v8_decrypt,%function
.align 5
aes_v8_decrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
.Loop_dec:
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q1},[r2]!
bgt .Loop_dec
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.size aes_v8_decrypt,.-aes_v8_decrypt
.globl aes_v8_cbc_encrypt
.type aes_v8_cbc_encrypt,%function
.align 5
aes_v8_cbc_encrypt:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load remaining args
subs r2,r2,#16
mov r8,#16
blo .Lcbc_abort
moveq r8,#0
cmp r5,#0 @ en- or decrypting?
ldr r5,[r3,#240]
and r2,r2,#-16
vld1.8 {q6},[r4]
vld1.8 {q0},[r0],r8
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#6
add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
sub r5,r5,#2
vld1.32 {q10,q11},[r7]!
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
beq .Lcbc_dec
cmp r5,#2
veor q0,q0,q6
veor q5,q8,q7
beq .Lcbc_enc128
vld1.32 {q2,q3},[r7]
add r7,r3,#16
add r6,r3,#16*4
add r12,r3,#16*5
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
add r14,r3,#16*6
add r3,r3,#16*7
b .Lenter_cbc_enc
.align 4
.Loop_cbc_enc:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
.Lenter_cbc_enc:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r6]
cmp r5,#4
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r12]
beq .Lcbc_enc192
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r14]
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r3]
nop
.Lcbc_enc192:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs .Loop_cbc_enc
vst1.8 {q6},[r1]!
b .Lcbc_done
.align 5
.Lcbc_enc128:
vld1.32 {q2,q3},[r7]
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
b .Lenter_cbc_enc128
.Loop_cbc_enc128:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
.Lenter_cbc_enc128:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs .Loop_cbc_enc128
vst1.8 {q6},[r1]!
b .Lcbc_done
.align 5
.Lcbc_dec:
vld1.8 {q10},[r0]!
subs r2,r2,#32 @ bias
add r6,r5,#2
vorr q3,q0,q0
vorr q1,q0,q0
vorr q11,q10,q10
blo .Lcbc_dec_tail
vorr q1,q10,q10
vld1.8 {q10},[r0]!
vorr q2,q0,q0
vorr q3,q1,q1
vorr q11,q10,q10
.Loop3x_cbc_dec:
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt .Loop3x_cbc_dec
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q4,q6,q7
subs r2,r2,#0x30
veor q5,q2,q7
movlo r6,r2 @ r6, r6, is zero at this point
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
add r0,r0,r6 @ r0 is adjusted in such way that
@ at exit from the loop q1-q10
@ are loaded with last "words"
vorr q6,q11,q11
mov r7,r3
.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q2},[r0]!
.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q3},[r0]!
.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q11},[r0]!
.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
add r6,r5,#2
veor q4,q4,q0
veor q5,q5,q1
veor q10,q10,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q4},[r1]!
vorr q0,q2,q2
vst1.8 {q5},[r1]!
vorr q1,q3,q3
vst1.8 {q10},[r1]!
vorr q10,q11,q11
bhs .Loop3x_cbc_dec
cmn r2,#0x30
beq .Lcbc_done
nop
.Lcbc_dec_tail:
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt .Lcbc_dec_tail
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
cmn r2,#0x20
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q5,q6,q7
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
beq .Lcbc_dec_one
veor q5,q5,q1
veor q9,q9,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
vst1.8 {q9},[r1]!
b .Lcbc_done
.Lcbc_dec_one:
veor q5,q5,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
.Lcbc_done:
vst1.8 {q6},[r4]
.Lcbc_abort:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,pc}
.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
.globl aes_v8_ctr32_encrypt_blocks
.type aes_v8_ctr32_encrypt_blocks,%function
.align 5
aes_v8_ctr32_encrypt_blocks:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldr r4, [ip] @ load remaining arg
ldr r5,[r3,#240]
ldr r8, [r4, #12]
vld1.32 {q0},[r4]
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#4
mov r12,#16
cmp r2,#2
add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
sub r5,r5,#2
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
movlo r12,#0
#ifndef __ARMEB__
rev r8, r8
#endif
vorr q1,q0,q0
add r10, r8, #1
vorr q10,q0,q0
add r8, r8, #2
vorr q6,q0,q0
rev r10, r10
vmov.32 d3[1],r10
bls .Lctr32_tail
rev r12, r8
sub r2,r2,#3 @ bias
vmov.32 d21[1],r12
b .Loop3x_ctr32
.align 4
.Loop3x_ctr32:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q9},[r7]!
bgt .Loop3x_ctr32
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
vld1.8 {q2},[r0]!
vorr q0,q6,q6
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.8 {q3},[r0]!
vorr q1,q6,q6
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vld1.8 {q11},[r0]!
mov r7,r3
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
vorr q10,q6,q6
add r9,r8,#1
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q2,q2,q7
add r10,r8,#2
.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
veor q3,q3,q7
add r8,r8,#3
.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q11,q11,q7
rev r9,r9
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d1[1], r9
rev r10,r10
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vmov.32 d3[1], r10
rev r12,r8
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d21[1], r12
subs r2,r2,#3
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
veor q2,q2,q4
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
vst1.8 {q2},[r1]!
veor q3,q3,q5
mov r6,r5
vst1.8 {q3},[r1]!
veor q11,q11,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q11},[r1]!
bhs .Loop3x_ctr32
adds r2,r2,#3
beq .Lctr32_done
cmp r2,#1
mov r12,#16
moveq r12,#0
.Lctr32_tail:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q9},[r7]!
bgt .Lctr32_tail
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q2},[r0],r12
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q3},[r0]
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q2,q2,q7
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q3,q3,q7
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
cmp r2,#1
veor q2,q2,q0
veor q3,q3,q1
vst1.8 {q2},[r1]!
beq .Lctr32_done
vst1.8 {q3},[r1]
.Lctr32_done:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,589 @@
#if defined(__arm__)
#include "arm_arch.h"
.text
.code 32
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-.Lbn_mul_mont
#endif
.globl bn_mul_mont
.hidden bn_mul_mont
.type bn_mul_mont,%function
.align 5
bn_mul_mont:
.Lbn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne .Lialu
adr r0,bn_mul_mont
ldr r2,.LOPENSSL_armcap
ldr r0,[r0,r2]
#ifdef __APPLE__
ldr r0,[r0]
#endif
tst r0,#1 @ NEON available?
ldmia sp, {r0,r2}
beq .Lialu
add sp,sp,#8
b bn_mul8x_mont_neon
.align 4
.Lialu:
#endif
cmp ip,#2
mov r0,ip @ load num
movlt r0,#0
addlt sp,sp,#2*4
blt .Labrt
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
mov r0,r0,lsl#2 @ rescale r0 for byte count
sub sp,sp,r0 @ alloca(4*num)
sub sp,sp,#4 @ +extra dword
sub r0,r0,#4 @ "num=num-1"
add r4,r2,r0 @ &bp[num-1]
add r0,sp,r0 @ r0 to point at &tp[num-1]
ldr r8,[r0,#14*4] @ &n0
ldr r2,[r2] @ bp[0]
ldr r5,[r1],#4 @ ap[0],ap++
ldr r6,[r3],#4 @ np[0],np++
ldr r8,[r8] @ *n0
str r4,[r0,#15*4] @ save &bp[num]
umull r10,r11,r5,r2 @ ap[0]*bp[0]
str r8,[r0,#14*4] @ save n0 value
mul r8,r10,r8 @ "tp[0]"*n0
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
mov r4,sp
.L1st:
ldr r5,[r1],#4 @ ap[j],ap++
mov r10,r11
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne .L1st
adds r12,r12,r11
ldr r4,[r0,#13*4] @ restore bp
mov r14,#0
ldr r8,[r0,#14*4] @ restore n0
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
str r14,[r0,#4] @ tp[num]=
.Louter:
sub r7,r0,sp @ "original" r0-1 value
sub r1,r1,r7 @ "rewind" ap to &ap[1]
ldr r2,[r4,#4]! @ *(++bp)
sub r3,r3,r7 @ "rewind" np to &np[1]
ldr r5,[r1,#-4] @ ap[0]
ldr r10,[sp] @ tp[0]
ldr r6,[r3,#-4] @ np[0]
ldr r7,[sp,#4] @ tp[1]
mov r11,#0
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
str r4,[r0,#13*4] @ save bp
mul r8,r10,r8
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
mov r4,sp
.Linner:
ldr r5,[r1],#4 @ ap[j],ap++
adds r10,r11,r7 @ +=tp[j]
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adc r11,r11,#0
ldr r7,[r4,#8] @ tp[j+1]
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne .Linner
adds r12,r12,r11
mov r14,#0
ldr r4,[r0,#13*4] @ restore bp
adc r14,r14,#0
ldr r8,[r0,#14*4] @ restore n0
adds r12,r12,r7
ldr r7,[r0,#15*4] @ restore &bp[num]
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
str r14,[r0,#4] @ tp[num]=
cmp r4,r7
bne .Louter
ldr r2,[r0,#12*4] @ pull rp
add r0,r0,#4 @ r0 to point at &tp[num]
sub r5,r0,sp @ "original" num value
mov r4,sp @ "rewind" r4
mov r1,r4 @ "borrow" r1
sub r3,r3,r5 @ "rewind" r3 to &np[0]
subs r7,r7,r7 @ "clear" carry flag
.Lsub: ldr r7,[r4],#4
ldr r6,[r3],#4
sbcs r7,r7,r6 @ tp[j]-np[j]
str r7,[r2],#4 @ rp[j]=
teq r4,r0 @ preserve carry
bne .Lsub
sbcs r14,r14,#0 @ upmost carry
mov r4,sp @ "rewind" r4
sub r2,r2,r5 @ "rewind" r2
and r1,r4,r14
bic r3,r2,r14
orr r1,r1,r3 @ ap=borrow?tp:rp
.Lcopy: ldr r7,[r1],#4 @ copy or in-place refresh
str sp,[r4],#4 @ zap tp
str r7,[r2],#4
cmp r4,r0
bne .Lcopy
add sp,r0,#4 @ skip over tp[num+1]
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
.Labrt:
#if __ARM_ARCH__>=5
bx lr @ .word 0xe12fff1e
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size bn_mul_mont,.-bn_mul_mont
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.type bn_mul8x_mont_neon,%function
.align 5
bn_mul8x_mont_neon:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load rest of parameter block
sub r7,sp,#16
vld1.32 {d28[0]}, [r2,:32]!
sub r7,r7,r5,lsl#4
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
and r7,r7,#-64
vld1.32 {d30[0]}, [r4,:32]
mov sp,r7 @ alloca
veor d8,d8,d8
subs r8,r5,#8
vzip.16 d28,d8
vmull.u32 q6,d28,d0[0]
vmull.u32 q7,d28,d0[1]
vmull.u32 q8,d28,d1[0]
vshl.i64 d10,d13,#16
vmull.u32 q9,d28,d1[1]
vadd.u64 d10,d10,d12
veor d8,d8,d8
vmul.u32 d29,d10,d30
vmull.u32 q10,d28,d2[0]
vld1.32 {d4,d5,d6,d7}, [r3]!
vmull.u32 q11,d28,d2[1]
vmull.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmull.u32 q13,d28,d3[1]
bne .LNEON_1st
@ special case for num=8, everything is in register bank...
vmlal.u32 q6,d29,d4[0]
sub r9,r5,#1
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
b .LNEON_outer8
.align 4
.LNEON_outer8:
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
vzip.16 d28,d8
vadd.u64 d12,d12,d10
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
vmlal.u32 q8,d28,d1[0]
vshl.i64 d10,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d10,d10,d12
veor d8,d8,d8
subs r9,r9,#1
vmul.u32 d29,d10,d30
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
bne .LNEON_outer8
vadd.u64 d12,d12,d10
mov r7,sp
vshr.u64 d10,d12,#16
mov r8,r5
vadd.u64 d13,d13,d10
add r6,sp,#16
vshr.u64 d10,d13,#16
vzip.16 d12,d13
b .LNEON_tail2
.align 4
.LNEON_1st:
vmlal.u32 q6,d29,d4[0]
vld1.32 {d0,d1,d2,d3}, [r1]!
vmlal.u32 q7,d29,d4[1]
subs r8,r8,#8
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vld1.32 {d4,d5}, [r3]!
vmlal.u32 q11,d29,d6[1]
vst1.64 {q6,q7}, [r7,:256]!
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vst1.64 {q8,q9}, [r7,:256]!
vmull.u32 q6,d28,d0[0]
vld1.32 {d6,d7}, [r3]!
vmull.u32 q7,d28,d0[1]
vst1.64 {q10,q11}, [r7,:256]!
vmull.u32 q8,d28,d1[0]
vmull.u32 q9,d28,d1[1]
vst1.64 {q12,q13}, [r7,:256]!
vmull.u32 q10,d28,d2[0]
vmull.u32 q11,d28,d2[1]
vmull.u32 q12,d28,d3[0]
vmull.u32 q13,d28,d3[1]
bne .LNEON_1st
vmlal.u32 q6,d29,d4[0]
add r6,sp,#16
vmlal.u32 q7,d29,d4[1]
sub r1,r1,r5,lsl#2 @ rewind r1
vmlal.u32 q8,d29,d5[0]
vld1.64 {q5}, [sp,:128]
vmlal.u32 q9,d29,d5[1]
sub r9,r5,#1
vmlal.u32 q10,d29,d6[0]
vst1.64 {q6,q7}, [r7,:256]!
vmlal.u32 q11,d29,d6[1]
vshr.u64 d10,d10,#16
vld1.64 {q6}, [r6, :128]!
vmlal.u32 q12,d29,d7[0]
vst1.64 {q8,q9}, [r7,:256]!
vmlal.u32 q13,d29,d7[1]
vst1.64 {q10,q11}, [r7,:256]!
vadd.u64 d10,d10,d11
veor q4,q4,q4
vst1.64 {q12,q13}, [r7,:256]!
vld1.64 {q7,q8}, [r6, :256]!
vst1.64 {q4}, [r7,:128]
vshr.u64 d10,d10,#16
b .LNEON_outer
.align 4
.LNEON_outer:
vld1.32 {d28[0]}, [r2,:32]!
sub r3,r3,r5,lsl#2 @ rewind r3
vld1.32 {d0,d1,d2,d3}, [r1]!
veor d8,d8,d8
mov r7,sp
vzip.16 d28,d8
sub r8,r5,#8
vadd.u64 d12,d12,d10
vmlal.u32 q6,d28,d0[0]
vld1.64 {q9,q10},[r6,:256]!
vmlal.u32 q7,d28,d0[1]
vmlal.u32 q8,d28,d1[0]
vld1.64 {q11,q12},[r6,:256]!
vmlal.u32 q9,d28,d1[1]
vshl.i64 d10,d13,#16
veor d8,d8,d8
vadd.u64 d10,d10,d12
vld1.64 {q13},[r6,:128]!
vmul.u32 d29,d10,d30
vmlal.u32 q10,d28,d2[0]
vld1.32 {d4,d5,d6,d7}, [r3]!
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
.LNEON_inner:
vmlal.u32 q6,d29,d4[0]
vld1.32 {d0,d1,d2,d3}, [r1]!
vmlal.u32 q7,d29,d4[1]
subs r8,r8,#8
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vst1.64 {q6,q7}, [r7,:256]!
vmlal.u32 q10,d29,d6[0]
vld1.64 {q6}, [r6, :128]!
vmlal.u32 q11,d29,d6[1]
vst1.64 {q8,q9}, [r7,:256]!
vmlal.u32 q12,d29,d7[0]
vld1.64 {q7,q8}, [r6, :256]!
vmlal.u32 q13,d29,d7[1]
vst1.64 {q10,q11}, [r7,:256]!
vmlal.u32 q6,d28,d0[0]
vld1.64 {q9,q10}, [r6, :256]!
vmlal.u32 q7,d28,d0[1]
vst1.64 {q12,q13}, [r7,:256]!
vmlal.u32 q8,d28,d1[0]
vld1.64 {q11,q12}, [r6, :256]!
vmlal.u32 q9,d28,d1[1]
vld1.32 {d4,d5,d6,d7}, [r3]!
vmlal.u32 q10,d28,d2[0]
vld1.64 {q13}, [r6, :128]!
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vmlal.u32 q13,d28,d3[1]
bne .LNEON_inner
vmlal.u32 q6,d29,d4[0]
add r6,sp,#16
vmlal.u32 q7,d29,d4[1]
sub r1,r1,r5,lsl#2 @ rewind r1
vmlal.u32 q8,d29,d5[0]
vld1.64 {q5}, [sp,:128]
vmlal.u32 q9,d29,d5[1]
subs r9,r9,#1
vmlal.u32 q10,d29,d6[0]
vst1.64 {q6,q7}, [r7,:256]!
vmlal.u32 q11,d29,d6[1]
vld1.64 {q6}, [r6, :128]!
vshr.u64 d10,d10,#16
vst1.64 {q8,q9}, [r7,:256]!
vmlal.u32 q12,d29,d7[0]
vld1.64 {q7,q8}, [r6, :256]!
vmlal.u32 q13,d29,d7[1]
vst1.64 {q10,q11}, [r7,:256]!
vadd.u64 d10,d10,d11
vst1.64 {q12,q13}, [r7,:256]!
vshr.u64 d10,d10,#16
bne .LNEON_outer
mov r7,sp
mov r8,r5
.LNEON_tail:
vadd.u64 d12,d12,d10
vld1.64 {q9,q10}, [r6, :256]!
vshr.u64 d10,d12,#16
vadd.u64 d13,d13,d10
vld1.64 {q11,q12}, [r6, :256]!
vshr.u64 d10,d13,#16
vld1.64 {q13}, [r6, :128]!
vzip.16 d12,d13
.LNEON_tail2:
vadd.u64 d14,d14,d10
vst1.32 {d12[0]}, [r7, :32]!
vshr.u64 d10,d14,#16
vadd.u64 d15,d15,d10
vshr.u64 d10,d15,#16
vzip.16 d14,d15
vadd.u64 d16,d16,d10
vst1.32 {d14[0]}, [r7, :32]!
vshr.u64 d10,d16,#16
vadd.u64 d17,d17,d10
vshr.u64 d10,d17,#16
vzip.16 d16,d17
vadd.u64 d18,d18,d10
vst1.32 {d16[0]}, [r7, :32]!
vshr.u64 d10,d18,#16
vadd.u64 d19,d19,d10
vshr.u64 d10,d19,#16
vzip.16 d18,d19
vadd.u64 d20,d20,d10
vst1.32 {d18[0]}, [r7, :32]!
vshr.u64 d10,d20,#16
vadd.u64 d21,d21,d10
vshr.u64 d10,d21,#16
vzip.16 d20,d21
vadd.u64 d22,d22,d10
vst1.32 {d20[0]}, [r7, :32]!
vshr.u64 d10,d22,#16
vadd.u64 d23,d23,d10
vshr.u64 d10,d23,#16
vzip.16 d22,d23
vadd.u64 d24,d24,d10
vst1.32 {d22[0]}, [r7, :32]!
vshr.u64 d10,d24,#16
vadd.u64 d25,d25,d10
vld1.64 {q6}, [r6, :128]!
vshr.u64 d10,d25,#16
vzip.16 d24,d25
vadd.u64 d26,d26,d10
vst1.32 {d24[0]}, [r7, :32]!
vshr.u64 d10,d26,#16
vadd.u64 d27,d27,d10
vld1.64 {q7,q8}, [r6, :256]!
vshr.u64 d10,d27,#16
vzip.16 d26,d27
subs r8,r8,#8
vst1.32 {d26[0]}, [r7, :32]!
bne .LNEON_tail
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
sub r3,r3,r5,lsl#2 @ rewind r3
subs r1,sp,#0 @ clear carry flag
add r2,sp,r5,lsl#2
.LNEON_sub:
ldmia r1!, {r4,r5,r6,r7}
ldmia r3!, {r8,r9,r10,r11}
sbcs r8, r4,r8
sbcs r9, r5,r9
sbcs r10,r6,r10
sbcs r11,r7,r11
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne .LNEON_sub
ldr r10, [r1] @ load top-most bit
veor q0,q0,q0
sub r11,r2,sp @ this is num*4
veor q1,q1,q1
mov r1,sp
sub r0,r0,r11 @ rewind r0
mov r3,r2 @ second 3/4th of frame
sbcs r10,r10,#0 @ result is carry flag
.LNEON_copy_n_zap:
ldmia r1!, {r4,r5,r6,r7}
ldmia r0, {r8,r9,r10,r11}
movcc r8, r4
vst1.64 {q0,q1}, [r3,:256]! @ wipe
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
movcc r11,r7
ldmia r1, {r4,r5,r6,r7}
stmia r0!, {r8,r9,r10,r11}
sub r1,r1,#16
ldmia r0, {r8,r9,r10,r11}
movcc r8, r4
vst1.64 {q0,q1}, [r1,:256]! @ wipe
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
movcc r11,r7
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne .LNEON_copy_n_zap
sub sp,ip,#96
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
bx lr @ .word 0xe12fff1e
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
#endif
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
.hidden OPENSSL_armcap_P
#endif
#endif

View File

@ -0,0 +1,541 @@
#if defined(__arm__)
#if defined(__arm__)
#include "arm_arch.h"
.syntax unified
.text
.code 32
#ifdef __APPLE__
#define ldrplb ldrbpl
#define ldrneb ldrbne
#endif
.type rem_4bit,%object
.align 5
rem_4bit:
.short 0x0000,0x1C20,0x3840,0x2460
.short 0x7080,0x6CA0,0x48C0,0x54E0
.short 0xE100,0xFD20,0xD940,0xC560
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
.size rem_4bit,.-rem_4bit
.type rem_4bit_get,%function
rem_4bit_get:
sub r2,pc,#8
sub r2,r2,#32 @ &rem_4bit
b .Lrem_4bit_got
nop
.size rem_4bit_get,.-rem_4bit_get
.globl gcm_ghash_4bit
.hidden gcm_ghash_4bit
.type gcm_ghash_4bit,%function
gcm_ghash_4bit:
sub r12,pc,#8
add r3,r2,r3 @ r3 to point at the end
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
sub r12,r12,#48 @ &rem_4bit
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
ldrb r12,[r2,#15]
ldrb r14,[r0,#15]
.Louter:
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
add r11,r1,r14
ldrb r12,[r2,#14]
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[sp,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
ldrb r14,[r0,#14]
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
eor r7,r7,r8,lsl#16
.Linner:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[sp,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
ldrbpl r12,[r2,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
ldrbpl r8,[r0,r3]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r9,[sp,r14]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eorpl r12,r12,r8
eor r7,r11,r7,lsr#4
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
bpl .Linner
ldr r3,[sp,#32] @ re-load r3/end
add r2,r2,#16
mov r14,r4
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
cmp r2,r3
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
ldrbne r12,[r2,#15]
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
bne .Louter
add sp,sp,#36
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size gcm_ghash_4bit,.-gcm_ghash_4bit
.globl gcm_gmult_4bit
.hidden gcm_gmult_4bit
.type gcm_gmult_4bit,%function
gcm_gmult_4bit:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
ldrb r12,[r0,#15]
b rem_4bit_get
.Lrem_4bit_got:
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
ldrb r12,[r0,#14]
add r11,r1,r14
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
and r14,r12,#0xf0
eor r7,r7,r8,lsl#16
and r12,r12,#0x0f
.Loop:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[r2,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
ldrbpl r12,[r0,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
bpl .Loop
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size gcm_gmult_4bit,.-gcm_gmult_4bit
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.globl gcm_init_neon
.hidden gcm_init_neon
.type gcm_init_neon,%function
.align 4
gcm_init_neon:
vld1.64 d7,[r1]! @ load H
vmov.i8 q8,#0xe1
vld1.64 d6,[r1]
vshl.i64 d17,#57
vshr.u64 d16,#63 @ t0=0xc2....01
vdup.8 q9,d7[7]
vshr.u64 d26,d6,#63
vshr.s8 q9,#7 @ broadcast carry bit
vshl.i64 q3,q3,#1
vand q8,q8,q9
vorr d7,d26 @ H<<<=1
veor q3,q3,q8 @ twisted H
vstmia r0,{q3}
bx lr @ bx lr
.size gcm_init_neon,.-gcm_init_neon
.globl gcm_gmult_neon
.hidden gcm_gmult_neon
.type gcm_gmult_neon,%function
.align 4
gcm_gmult_neon:
vld1.64 d7,[r0]! @ load Xi
vld1.64 d6,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
mov r3,#16
b .Lgmult_neon
.size gcm_gmult_neon,.-gcm_gmult_neon
.globl gcm_ghash_neon
.hidden gcm_ghash_neon
.type gcm_ghash_neon,%function
.align 4
gcm_ghash_neon:
vld1.64 d1,[r0]! @ load Xi
vld1.64 d0,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
.Loop_neon:
vld1.64 d7,[r2]! @ load inp
vld1.64 d6,[r2]!
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
veor q3,q0 @ inp^=Xi
.Lgmult_neon:
vext.8 d16, d26, d26, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d0, d6, d6, #1 @ B1
vmull.p8 q0, d26, d0 @ E = A*B1
vext.8 d18, d26, d26, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d26, d22 @ G = A*B2
vext.8 d20, d26, d26, #3 @ A3
veor q8, q8, q0 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d0, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q0, d26, d0 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d26, d22 @ K = A*B4
veor q10, q10, q0 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q0, d26, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q0, q0, q8
veor q0, q0, q10
veor d6,d6,d7 @ Karatsuba pre-processing
vext.8 d16, d28, d28, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d2, d6, d6, #1 @ B1
vmull.p8 q1, d28, d2 @ E = A*B1
vext.8 d18, d28, d28, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d28, d22 @ G = A*B2
vext.8 d20, d28, d28, #3 @ A3
veor q8, q8, q1 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d2, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q1, d28, d2 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d28, d22 @ K = A*B4
veor q10, q10, q1 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q1, d28, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q1, q1, q8
veor q1, q1, q10
vext.8 d16, d27, d27, #1 @ A1
vmull.p8 q8, d16, d7 @ F = A1*B
vext.8 d4, d7, d7, #1 @ B1
vmull.p8 q2, d27, d4 @ E = A*B1
vext.8 d18, d27, d27, #2 @ A2
vmull.p8 q9, d18, d7 @ H = A2*B
vext.8 d22, d7, d7, #2 @ B2
vmull.p8 q11, d27, d22 @ G = A*B2
vext.8 d20, d27, d27, #3 @ A3
veor q8, q8, q2 @ L = E + F
vmull.p8 q10, d20, d7 @ J = A3*B
vext.8 d4, d7, d7, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q2, d27, d4 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d7, d7, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d27, d22 @ K = A*B4
veor q10, q10, q2 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q2, d27, d7 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q2, q2, q8
veor q2, q2, q10
veor q1,q1,q0 @ Karatsuba post-processing
veor q1,q1,q2
veor d1,d1,d2
veor d4,d4,d3 @ Xh|Xl - 256-bit result
@ equivalent of reduction_avx from ghash-x86_64.pl
vshl.i64 q9,q0,#57 @ 1st phase
vshl.i64 q10,q0,#62
veor q10,q10,q9 @
vshl.i64 q9,q0,#63
veor q10, q10, q9 @
veor d1,d1,d20 @
veor d4,d4,d21
vshr.u64 q10,q0,#1 @ 2nd phase
veor q2,q2,q0
veor q0,q0,q10 @
vshr.u64 q10,q10,#6
vshr.u64 q0,q0,#1 @
veor q0,q0,q2 @
veor q0,q0,q10 @
subs r3,#16
bne .Loop_neon
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
sub r0,#16
vst1.64 d1,[r0]! @ write out Xi
vst1.64 d0,[r0]
bx lr @ bx lr
.size gcm_ghash_neon,.-gcm_ghash_neon
#endif
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif

View File

@ -0,0 +1,233 @@
#if defined(__arm__)
#include "arm_arch.h"
.text
.fpu neon
.code 32
.globl gcm_init_v8
.type gcm_init_v8,%function
.align 4
gcm_init_v8:
vld1.64 {q9},[r1] @ load input H
vmov.i8 q11,#0xe1
vshl.i64 q11,q11,#57 @ 0xc2.0
vext.8 q3,q9,q9,#8
vshr.u64 q10,q11,#63
vdup.32 q9,d18[1]
vext.8 q8,q10,q11,#8 @ t0=0xc2....01
vshr.u64 q10,q3,#63
vshr.s32 q9,q9,#31 @ broadcast carry bit
vand q10,q10,q8
vshl.i64 q3,q3,#1
vext.8 q10,q10,q10,#8
vand q8,q8,q9
vorr q3,q3,q10 @ H<<<=1
veor q12,q3,q8 @ twisted H
vst1.64 {q12},[r0]! @ store Htable[0]
@ calculate H^2
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12
veor q8,q8,q12
.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12
.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q14,q0,q10
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
veor q9,q9,q14
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
bx lr
.size gcm_init_v8,.-gcm_init_v8
.globl gcm_gmult_v8
.type gcm_gmult_v8,%function
.align 4
gcm_gmult_v8:
vld1.64 {q9},[r0] @ load Xi
vmov.i8 q11,#0xe1
vld1.64 {q12,q13},[r1] @ load twisted H, ...
vshl.u64 q11,q11,#57
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q3,q9,q9,#8
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
bx lr
.size gcm_gmult_v8,.-gcm_gmult_v8
.globl gcm_ghash_v8
.type gcm_ghash_v8,%function
.align 4
gcm_ghash_v8:
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
vld1.64 {q0},[r0] @ load [rotated] Xi
@ "[rotated]" means that
@ loaded value would have
@ to be rotated in order to
@ make it appear as in
@ alorithm specification
subs r3,r3,#32 @ see if r3 is 32 or larger
mov r12,#16 @ r12 is used as post-
@ increment for input pointer;
@ as loop is modulo-scheduled
@ r12 is zeroed just in time
@ to preclude oversteping
@ inp[len], which means that
@ last block[s] are actually
@ loaded twice, but last
@ copy is not processed
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
vmov.i8 q11,#0xe1
vld1.64 {q14},[r1]
moveq r12,#0 @ is it time to zero r12?
vext.8 q0,q0,q0,#8 @ rotate Xi
vld1.64 {q8},[r2]! @ load [rotated] I[0]
vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant
#ifndef __ARMEB__
vrev64.8 q8,q8
vrev64.8 q0,q0
#endif
vext.8 q3,q8,q8,#8 @ rotate I[0]
blo .Lodd_tail_v8 @ r3 was less than 32
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q7,q9,q9,#8
veor q3,q3,q0 @ I[i]^=Xi
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q9,q9,q7 @ Karatsuba pre-processing
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
b .Loop_mod2x_v8
.align 4
.Loop_mod2x_v8:
vext.8 q10,q3,q3,#8
subs r3,r3,#32 @ is there more data?
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
movlo r12,#0 @ is it time to zero r12?
.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9
veor q10,q10,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
veor q0,q0,q4 @ accumulate
.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
veor q2,q2,q6
moveq r12,#0 @ is it time to zero r12?
veor q1,q1,q5
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3]
#ifndef __ARMEB__
vrev64.8 q8,q8
#endif
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
vext.8 q7,q9,q9,#8
vext.8 q3,q8,q8,#8
veor q0,q1,q10
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q3,q3,q2 @ accumulate q3 early
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q3,q3,q10
veor q9,q9,q7 @ Karatsuba pre-processing
veor q3,q3,q0
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
bhs .Loop_mod2x_v8 @ there was at least 32 more bytes
veor q2,q2,q10
vext.8 q3,q8,q8,#8 @ re-construct q3
adds r3,r3,#32 @ re-construct r3
veor q0,q0,q2 @ re-construct q0
beq .Ldone_v8 @ is r3 zero?
.Lodd_tail_v8:
vext.8 q10,q0,q0,#8
veor q3,q3,q0 @ inp^=Xi
veor q9,q8,q10 @ q9 is rotated inp^Xi
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
.Ldone_v8:
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
bx lr
.size gcm_ghash_v8,.-gcm_ghash_v8
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,676 @@
#if defined(__i386__)
.file "vpaes-x86.S"
.text
.align 64
.L_vpaes_consts:
.long 218628480,235210255,168496130,67568393
.long 252381056,17041926,33884169,51187212
.long 252645135,252645135,252645135,252645135
.long 1512730624,3266504856,1377990664,3401244816
.long 830229760,1275146365,2969422977,3447763452
.long 3411033600,2979783055,338359620,2782886510
.long 4209124096,907596821,221174255,1006095553
.long 191964160,3799684038,3164090317,1589111125
.long 182528256,1777043520,2877432650,3265356744
.long 1874708224,3503451415,3305285752,363511674
.long 1606117888,3487855781,1093350906,2384367825
.long 197121,67569157,134941193,202313229
.long 67569157,134941193,202313229,197121
.long 134941193,202313229,197121,67569157
.long 202313229,197121,67569157,134941193
.long 33619971,100992007,168364043,235736079
.long 235736079,33619971,100992007,168364043
.long 168364043,235736079,33619971,100992007
.long 100992007,168364043,235736079,33619971
.long 50462976,117835012,185207048,252579084
.long 252314880,51251460,117574920,184942860
.long 184682752,252054788,50987272,118359308
.long 118099200,185467140,251790600,50727180
.long 2946363062,528716217,1300004225,1881839624
.long 1532713819,1532713819,1532713819,1532713819
.long 3602276352,4288629033,3737020424,4153884961
.long 1354558464,32357713,2958822624,3775749553
.long 1201988352,132424512,1572796698,503232858
.long 2213177600,1597421020,4103937655,675398315
.long 2749646592,4273543773,1511898873,121693092
.long 3040248576,1103263732,2871565598,1608280554
.long 2236667136,2588920351,482954393,64377734
.long 3069987328,291237287,2117370568,3650299247
.long 533321216,3573750986,2572112006,1401264716
.long 1339849704,2721158661,548607111,3445553514
.long 2128193280,3054596040,2183486460,1257083700
.long 655635200,1165381986,3923443150,2344132524
.long 190078720,256924420,290342170,357187870
.long 1610966272,2263057382,4103205268,309794674
.long 2592527872,2233205587,1335446729,3402964816
.long 3973531904,3225098121,3002836325,1918774430
.long 3870401024,2102906079,2284471353,4117666579
.long 617007872,1021508343,366931923,691083277
.long 2528395776,3491914898,2968704004,1613121270
.long 3445188352,3247741094,844474987,4093578302
.long 651481088,1190302358,1689581232,574775300
.long 4289380608,206939853,2555985458,2489840491
.long 2130264064,327674451,3566485037,3349835193
.long 2470714624,316102159,3636825756,3393945945
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
.byte 118,101,114,115,105,116,121,41,0
.align 64
.hidden _vpaes_preheat
.type _vpaes_preheat,@function
.align 16
_vpaes_preheat:
addl (%esp),%ebp
movdqa -48(%ebp),%xmm7
movdqa -16(%ebp),%xmm6
ret
.size _vpaes_preheat,.-_vpaes_preheat
.hidden _vpaes_encrypt_core
.type _vpaes_encrypt_core,@function
.align 16
_vpaes_encrypt_core:
movl $16,%ecx
movl 240(%edx),%eax
movdqa %xmm6,%xmm1
movdqa (%ebp),%xmm2
pandn %xmm0,%xmm1
pand %xmm6,%xmm0
movdqu (%edx),%xmm5
.byte 102,15,56,0,208
movdqa 16(%ebp),%xmm0
pxor %xmm5,%xmm2
psrld $4,%xmm1
addl $16,%edx
.byte 102,15,56,0,193
leal 192(%ebp),%ebx
pxor %xmm2,%xmm0
jmp .L000enc_entry
.align 16
.L001enc_loop:
movdqa 32(%ebp),%xmm4
movdqa 48(%ebp),%xmm0
.byte 102,15,56,0,226
.byte 102,15,56,0,195
pxor %xmm5,%xmm4
movdqa 64(%ebp),%xmm5
pxor %xmm4,%xmm0
movdqa -64(%ebx,%ecx,1),%xmm1
.byte 102,15,56,0,234
movdqa 80(%ebp),%xmm2
movdqa (%ebx,%ecx,1),%xmm4
.byte 102,15,56,0,211
movdqa %xmm0,%xmm3
pxor %xmm5,%xmm2
.byte 102,15,56,0,193
addl $16,%edx
pxor %xmm2,%xmm0
.byte 102,15,56,0,220
addl $16,%ecx
pxor %xmm0,%xmm3
.byte 102,15,56,0,193
andl $48,%ecx
subl $1,%eax
pxor %xmm3,%xmm0
.L000enc_entry:
movdqa %xmm6,%xmm1
movdqa -32(%ebp),%xmm5
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm6,%xmm0
.byte 102,15,56,0,232
movdqa %xmm7,%xmm3
pxor %xmm1,%xmm0
.byte 102,15,56,0,217
movdqa %xmm7,%xmm4
pxor %xmm5,%xmm3
.byte 102,15,56,0,224
movdqa %xmm7,%xmm2
pxor %xmm5,%xmm4
.byte 102,15,56,0,211
movdqa %xmm7,%xmm3
pxor %xmm0,%xmm2
.byte 102,15,56,0,220
movdqu (%edx),%xmm5
pxor %xmm1,%xmm3
jnz .L001enc_loop
movdqa 96(%ebp),%xmm4
movdqa 112(%ebp),%xmm0
.byte 102,15,56,0,226
pxor %xmm5,%xmm4
.byte 102,15,56,0,195
movdqa 64(%ebx,%ecx,1),%xmm1
pxor %xmm4,%xmm0
.byte 102,15,56,0,193
ret
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
.hidden _vpaes_decrypt_core
.type _vpaes_decrypt_core,@function
.align 16
_vpaes_decrypt_core:
leal 608(%ebp),%ebx
movl 240(%edx),%eax
movdqa %xmm6,%xmm1
movdqa -64(%ebx),%xmm2
pandn %xmm0,%xmm1
movl %eax,%ecx
psrld $4,%xmm1
movdqu (%edx),%xmm5
shll $4,%ecx
pand %xmm6,%xmm0
.byte 102,15,56,0,208
movdqa -48(%ebx),%xmm0
xorl $48,%ecx
.byte 102,15,56,0,193
andl $48,%ecx
pxor %xmm5,%xmm2
movdqa 176(%ebp),%xmm5
pxor %xmm2,%xmm0
addl $16,%edx
leal -352(%ebx,%ecx,1),%ecx
jmp .L002dec_entry
.align 16
.L003dec_loop:
movdqa -32(%ebx),%xmm4
movdqa -16(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa (%ebx),%xmm4
pxor %xmm1,%xmm0
movdqa 16(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 32(%ebx),%xmm4
pxor %xmm1,%xmm0
movdqa 48(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 64(%ebx),%xmm4
pxor %xmm1,%xmm0
movdqa 80(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
addl $16,%edx
.byte 102,15,58,15,237,12
pxor %xmm1,%xmm0
subl $1,%eax
.L002dec_entry:
movdqa %xmm6,%xmm1
movdqa -32(%ebp),%xmm2
pandn %xmm0,%xmm1
pand %xmm6,%xmm0
psrld $4,%xmm1
.byte 102,15,56,0,208
movdqa %xmm7,%xmm3
pxor %xmm1,%xmm0
.byte 102,15,56,0,217
movdqa %xmm7,%xmm4
pxor %xmm2,%xmm3
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm7,%xmm2
.byte 102,15,56,0,211
movdqa %xmm7,%xmm3
pxor %xmm0,%xmm2
.byte 102,15,56,0,220
movdqu (%edx),%xmm0
pxor %xmm1,%xmm3
jnz .L003dec_loop
movdqa 96(%ebx),%xmm4
.byte 102,15,56,0,226
pxor %xmm0,%xmm4
movdqa 112(%ebx),%xmm0
movdqa (%ecx),%xmm2
.byte 102,15,56,0,195
pxor %xmm4,%xmm0
.byte 102,15,56,0,194
ret
.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
.hidden _vpaes_schedule_core
.type _vpaes_schedule_core,@function
.align 16
_vpaes_schedule_core:
addl (%esp),%ebp
movdqu (%esi),%xmm0
movdqa 320(%ebp),%xmm2
movdqa %xmm0,%xmm3
leal (%ebp),%ebx
movdqa %xmm2,4(%esp)
call _vpaes_schedule_transform
movdqa %xmm0,%xmm7
testl %edi,%edi
jnz .L004schedule_am_decrypting
movdqu %xmm0,(%edx)
jmp .L005schedule_go
.L004schedule_am_decrypting:
movdqa 256(%ebp,%ecx,1),%xmm1
.byte 102,15,56,0,217
movdqu %xmm3,(%edx)
xorl $48,%ecx
.L005schedule_go:
cmpl $192,%eax
ja .L006schedule_256
je .L007schedule_192
.L008schedule_128:
movl $10,%eax
.L009loop_schedule_128:
call _vpaes_schedule_round
decl %eax
jz .L010schedule_mangle_last
call _vpaes_schedule_mangle
jmp .L009loop_schedule_128
.align 16
.L007schedule_192:
movdqu 8(%esi),%xmm0
call _vpaes_schedule_transform
movdqa %xmm0,%xmm6
pxor %xmm4,%xmm4
movhlps %xmm4,%xmm6
movl $4,%eax
.L011loop_schedule_192:
call _vpaes_schedule_round
.byte 102,15,58,15,198,8
call _vpaes_schedule_mangle
call _vpaes_schedule_192_smear
call _vpaes_schedule_mangle
call _vpaes_schedule_round
decl %eax
jz .L010schedule_mangle_last
call _vpaes_schedule_mangle
call _vpaes_schedule_192_smear
jmp .L011loop_schedule_192
.align 16
.L006schedule_256:
movdqu 16(%esi),%xmm0
call _vpaes_schedule_transform
movl $7,%eax
.L012loop_schedule_256:
call _vpaes_schedule_mangle
movdqa %xmm0,%xmm6
call _vpaes_schedule_round
decl %eax
jz .L010schedule_mangle_last
call _vpaes_schedule_mangle
pshufd $255,%xmm0,%xmm0
movdqa %xmm7,20(%esp)
movdqa %xmm6,%xmm7
call .L_vpaes_schedule_low_round
movdqa 20(%esp),%xmm7
jmp .L012loop_schedule_256
.align 16
.L010schedule_mangle_last:
leal 384(%ebp),%ebx
testl %edi,%edi
jnz .L013schedule_mangle_last_dec
movdqa 256(%ebp,%ecx,1),%xmm1
.byte 102,15,56,0,193
leal 352(%ebp),%ebx
addl $32,%edx
.L013schedule_mangle_last_dec:
addl $-16,%edx
pxor 336(%ebp),%xmm0
call _vpaes_schedule_transform
movdqu %xmm0,(%edx)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
ret
.size _vpaes_schedule_core,.-_vpaes_schedule_core
.hidden _vpaes_schedule_192_smear
.type _vpaes_schedule_192_smear,@function
.align 16
_vpaes_schedule_192_smear:
pshufd $128,%xmm6,%xmm1
pshufd $254,%xmm7,%xmm0
pxor %xmm1,%xmm6
pxor %xmm1,%xmm1
pxor %xmm0,%xmm6
movdqa %xmm6,%xmm0
movhlps %xmm1,%xmm6
ret
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
.hidden _vpaes_schedule_round
.type _vpaes_schedule_round,@function
.align 16
_vpaes_schedule_round:
movdqa 8(%esp),%xmm2
pxor %xmm1,%xmm1
.byte 102,15,58,15,202,15
.byte 102,15,58,15,210,15
pxor %xmm1,%xmm7
pshufd $255,%xmm0,%xmm0
.byte 102,15,58,15,192,1
movdqa %xmm2,8(%esp)
.L_vpaes_schedule_low_round:
movdqa %xmm7,%xmm1
pslldq $4,%xmm7
pxor %xmm1,%xmm7
movdqa %xmm7,%xmm1
pslldq $8,%xmm7
pxor %xmm1,%xmm7
pxor 336(%ebp),%xmm7
movdqa -16(%ebp),%xmm4
movdqa -48(%ebp),%xmm5
movdqa %xmm4,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm4,%xmm0
movdqa -32(%ebp),%xmm2
.byte 102,15,56,0,208
pxor %xmm1,%xmm0
movdqa %xmm5,%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
movdqa %xmm5,%xmm4
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm5,%xmm2
.byte 102,15,56,0,211
pxor %xmm0,%xmm2
movdqa %xmm5,%xmm3
.byte 102,15,56,0,220
pxor %xmm1,%xmm3
movdqa 32(%ebp),%xmm4
.byte 102,15,56,0,226
movdqa 48(%ebp),%xmm0
.byte 102,15,56,0,195
pxor %xmm4,%xmm0
pxor %xmm7,%xmm0
movdqa %xmm0,%xmm7
ret
.size _vpaes_schedule_round,.-_vpaes_schedule_round
.hidden _vpaes_schedule_transform
.type _vpaes_schedule_transform,@function
.align 16
_vpaes_schedule_transform:
movdqa -16(%ebp),%xmm2
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
movdqa (%ebx),%xmm2
.byte 102,15,56,0,208
movdqa 16(%ebx),%xmm0
.byte 102,15,56,0,193
pxor %xmm2,%xmm0
ret
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
.hidden _vpaes_schedule_mangle
.type _vpaes_schedule_mangle,@function
.align 16
_vpaes_schedule_mangle:
movdqa %xmm0,%xmm4
movdqa 128(%ebp),%xmm5
testl %edi,%edi
jnz .L014schedule_mangle_dec
addl $16,%edx
pxor 336(%ebp),%xmm4
.byte 102,15,56,0,229
movdqa %xmm4,%xmm3
.byte 102,15,56,0,229
pxor %xmm4,%xmm3
.byte 102,15,56,0,229
pxor %xmm4,%xmm3
jmp .L015schedule_mangle_both
.align 16
.L014schedule_mangle_dec:
movdqa -16(%ebp),%xmm2
leal 416(%ebp),%esi
movdqa %xmm2,%xmm1
pandn %xmm4,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm4
movdqa (%esi),%xmm2
.byte 102,15,56,0,212
movdqa 16(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 32(%esi),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 48(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 64(%esi),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 80(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 96(%esi),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 112(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
addl $-16,%edx
.L015schedule_mangle_both:
movdqa 256(%ebp,%ecx,1),%xmm1
.byte 102,15,56,0,217
addl $-16,%ecx
andl $48,%ecx
movdqu %xmm3,(%edx)
ret
.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
.globl vpaes_set_encrypt_key
.hidden vpaes_set_encrypt_key
.type vpaes_set_encrypt_key,@function
.align 16
vpaes_set_encrypt_key:
.L_vpaes_set_encrypt_key_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%eax
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movl %eax,%ebx
shrl $5,%ebx
addl $5,%ebx
movl %ebx,240(%edx)
movl $48,%ecx
movl $0,%edi
leal .L_vpaes_consts+0x30-.L016pic_point,%ebp
call _vpaes_schedule_core
.L016pic_point:
movl 48(%esp),%esp
xorl %eax,%eax
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin
.globl vpaes_set_decrypt_key
.hidden vpaes_set_decrypt_key
.type vpaes_set_decrypt_key,@function
.align 16
vpaes_set_decrypt_key:
.L_vpaes_set_decrypt_key_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%eax
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movl %eax,%ebx
shrl $5,%ebx
addl $5,%ebx
movl %ebx,240(%edx)
shll $4,%ebx
leal 16(%edx,%ebx,1),%edx
movl $1,%edi
movl %eax,%ecx
shrl $1,%ecx
andl $32,%ecx
xorl $32,%ecx
leal .L_vpaes_consts+0x30-.L017pic_point,%ebp
call _vpaes_schedule_core
.L017pic_point:
movl 48(%esp),%esp
xorl %eax,%eax
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin
.globl vpaes_encrypt
.hidden vpaes_encrypt
.type vpaes_encrypt,@function
.align 16
vpaes_encrypt:
.L_vpaes_encrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
leal .L_vpaes_consts+0x30-.L018pic_point,%ebp
call _vpaes_preheat
.L018pic_point:
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%edi
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movdqu (%esi),%xmm0
call _vpaes_encrypt_core
movdqu %xmm0,(%edi)
movl 48(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size vpaes_encrypt,.-.L_vpaes_encrypt_begin
.globl vpaes_decrypt
.hidden vpaes_decrypt
.type vpaes_decrypt,@function
.align 16
vpaes_decrypt:
.L_vpaes_decrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
leal .L_vpaes_consts+0x30-.L019pic_point,%ebp
call _vpaes_preheat
.L019pic_point:
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%edi
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movdqu (%esi),%xmm0
call _vpaes_decrypt_core
movdqu %xmm0,(%edi)
movl 48(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size vpaes_decrypt,.-.L_vpaes_decrypt_begin
.globl vpaes_cbc_encrypt
.hidden vpaes_cbc_encrypt
.type vpaes_cbc_encrypt,@function
.align 16
vpaes_cbc_encrypt:
.L_vpaes_cbc_encrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%esi
movl 24(%esp),%edi
movl 28(%esp),%eax
movl 32(%esp),%edx
subl $16,%eax
jc .L020cbc_abort
leal -56(%esp),%ebx
movl 36(%esp),%ebp
andl $-16,%ebx
movl 40(%esp),%ecx
xchgl %esp,%ebx
movdqu (%ebp),%xmm1
subl %esi,%edi
movl %ebx,48(%esp)
movl %edi,(%esp)
movl %edx,4(%esp)
movl %ebp,8(%esp)
movl %eax,%edi
leal .L_vpaes_consts+0x30-.L021pic_point,%ebp
call _vpaes_preheat
.L021pic_point:
cmpl $0,%ecx
je .L022cbc_dec_loop
jmp .L023cbc_enc_loop
.align 16
.L023cbc_enc_loop:
movdqu (%esi),%xmm0
pxor %xmm1,%xmm0
call _vpaes_encrypt_core
movl (%esp),%ebx
movl 4(%esp),%edx
movdqa %xmm0,%xmm1
movdqu %xmm0,(%ebx,%esi,1)
leal 16(%esi),%esi
subl $16,%edi
jnc .L023cbc_enc_loop
jmp .L024cbc_done
.align 16
.L022cbc_dec_loop:
movdqu (%esi),%xmm0
movdqa %xmm1,16(%esp)
movdqa %xmm0,32(%esp)
call _vpaes_decrypt_core
movl (%esp),%ebx
movl 4(%esp),%edx
pxor 16(%esp),%xmm0
movdqa 32(%esp),%xmm1
movdqu %xmm0,(%ebx,%esi,1)
leal 16(%esi),%esi
subl $16,%edi
jnc .L022cbc_dec_loop
.L024cbc_done:
movl 8(%esp),%ebx
movl 48(%esp),%esp
movdqu %xmm1,(%ebx)
.L020cbc_abort:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,460 @@
#if defined(__i386__)
.file "src/crypto/bn/asm/x86-mont.S"
.text
.globl bn_mul_mont
.hidden bn_mul_mont
.type bn_mul_mont,@function
.align 16
bn_mul_mont:
.L_bn_mul_mont_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %eax,%eax
movl 40(%esp),%edi
cmpl $4,%edi
jl .L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
movl %esp,%ebp
addl $2,%edi
negl %edi
leal -32(%esp,%edi,4),%esp
negl %edi
movl %esp,%eax
subl %edx,%eax
andl $2047,%eax
subl %eax,%esp
xorl %esp,%edx
andl $2048,%edx
xorl $2048,%edx
subl %edx,%esp
andl $-64,%esp
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edx
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
movl %edx,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %ebp,24(%esp)
call .L001PIC_me_up
.L001PIC_me_up:
popl %eax
leal OPENSSL_ia32cap_P-.L001PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L002non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
movl 12(%esp),%edi
movl 16(%esp),%ebp
xorl %edx,%edx
xorl %ecx,%ecx
movd (%edi),%mm4
movd (%esi),%mm5
movd (%ebp),%mm3
pmuludq %mm4,%mm5
movq %mm5,%mm2
movq %mm5,%mm0
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
incl %ecx
.align 16
.L0031st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl .L0031st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
.L004outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
movd 32(%esp),%mm6
movd (%ebp),%mm3
pmuludq %mm4,%mm5
paddq %mm6,%mm5
movq %mm5,%mm0
movq %mm5,%mm2
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 36(%esp),%mm6
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm6,%mm2
incl %ecx
decl %ebx
.L005inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
movd 36(%esp,%ecx,4),%mm6
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz .L005inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
movd 36(%esp,%ebx,4),%mm6
paddq %mm2,%mm3
paddq %mm6,%mm3
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle .L004outer
emms
jmp .L006common_tail
.align 16
.L002non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
xorl %ecx,%ecx
movl %esi,%edx
andl $1,%ebp
subl %edi,%edx
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz .L007bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 16
.L008mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L008mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
addl %ebp,%eax
movl 16(%esp),%esi
adcl $0,%edx
imull 32(%esp),%edi
movl %eax,32(%esp,%ebx,4)
xorl %ecx,%ecx
movl %edx,36(%esp,%ebx,4)
movl %ecx,40(%esp,%ebx,4)
movl (%esi),%eax
mull %edi
addl 32(%esp),%eax
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp .L0092ndmadd
.align 16
.L0101stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl .L0101stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
movl 20(%esp),%edi
adcl $0,%edx
movl 16(%esp),%esi
addl %eax,%ebp
adcl $0,%edx
imull 32(%esp),%edi
xorl %ecx,%ecx
addl 36(%esp,%ebx,4),%edx
movl %ebp,32(%esp,%ebx,4)
adcl $0,%ecx
movl (%esi),%eax
movl %edx,36(%esp,%ebx,4)
movl %ecx,40(%esp,%ebx,4)
mull %edi
addl 32(%esp),%eax
movl 4(%esi),%eax
adcl $0,%edx
movl $1,%ecx
.align 16
.L0092ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0092ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
adcl $0,%edx
movl %ebp,28(%esp,%ebx,4)
xorl %eax,%eax
movl 12(%esp),%ecx
addl 36(%esp,%ebx,4),%edx
adcl 40(%esp,%ebx,4),%eax
leal 4(%ecx),%ecx
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je .L006common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp .L0101stmadd
.align 16
.L007bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
mull %edi
movl %eax,32(%esp)
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
incl %ecx
.align 16
.L011sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
leal 1(%ecx),%ecx
adcl $0,%edx
leal (%ebx,%eax,2),%ebp
shrl $31,%eax
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl .L011sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
movl 20(%esp),%edi
adcl $0,%edx
movl 16(%esp),%esi
leal (%ebx,%eax,2),%ebp
imull 32(%esp),%edi
shrl $31,%eax
movl %ebp,32(%esp,%ecx,4)
leal (%eax,%edx,2),%ebp
movl (%esi),%eax
shrl $31,%edx
movl %ebp,36(%esp,%ecx,4)
movl %edx,40(%esp,%ecx,4)
mull %edi
addl 32(%esp),%eax
movl %ecx,%ebx
adcl $0,%edx
movl 4(%esi),%eax
movl $1,%ecx
.align 16
.L0123rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
movl 4(%esi,%ecx,4),%eax
adcl $0,%edx
movl %ebp,28(%esp,%ecx,4)
movl %edx,%ebp
mull %edi
addl 36(%esp,%ecx,4),%ebp
leal 2(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl .L0123rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
adcl $0,%edx
movl %ebp,28(%esp,%ebx,4)
movl 12(%esp),%ecx
xorl %eax,%eax
movl 8(%esp),%esi
addl 36(%esp,%ebx,4),%edx
adcl 40(%esp,%ebx,4),%eax
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je .L006common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
movl %ecx,12(%esp)
mull %edi
addl 32(%esp,%ecx,4),%eax
adcl $0,%edx
movl %eax,32(%esp,%ecx,4)
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je .L013sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 16
.L014sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
leal (%eax,%eax,1),%ebp
adcl $0,%edx
shrl $31,%eax
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%eax
addl %ebx,%ebp
adcl $0,%eax
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle .L014sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
.L013sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
addl 32(%esp,%ecx,4),%edx
movl (%esi),%eax
adcl $0,%ebp
movl %edx,32(%esp,%ecx,4)
movl %ebp,36(%esp,%ecx,4)
mull %edi
addl 32(%esp),%eax
leal -1(%ecx),%ebx
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp .L0123rdmadd
.align 16
.L006common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
movl (%esi),%eax
movl %ebx,%ecx
xorl %edx,%edx
.align 16
.L015sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge .L015sub
sbbl $0,%eax
.align 16
.L016copy:
movl (%esi,%ebx,4),%edx
movl (%edi,%ebx,4),%ebp
xorl %ebp,%edx
andl %eax,%edx
xorl %ebp,%edx
movl %ecx,(%esi,%ebx,4)
movl %edx,(%edi,%ebx,4)
decl %ebx
jge .L016copy
movl 24(%esp),%esp
movl $1,%eax
.L000just_leave:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_mul_mont,.-.L_bn_mul_mont_begin
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
#endif

View File

@ -0,0 +1,682 @@
#if defined(__i386__)
.file "src/crypto/md5/asm/md5-586.S"
.text
.globl md5_block_asm_data_order
.hidden md5_block_asm_data_order
.type md5_block_asm_data_order,@function
.align 16
md5_block_asm_data_order:
.L_md5_block_asm_data_order_begin:
pushl %esi
pushl %edi
movl 12(%esp),%edi
movl 16(%esp),%esi
movl 20(%esp),%ecx
pushl %ebp
shll $6,%ecx
pushl %ebx
addl %esi,%ecx
subl $64,%ecx
movl (%edi),%eax
pushl %ecx
movl 4(%edi),%ebx
movl 8(%edi),%ecx
movl 12(%edi),%edx
.L000start:
movl %ecx,%edi
movl (%esi),%ebp
xorl %edx,%edi
andl %ebx,%edi
leal 3614090360(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 4(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 3905402710(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 8(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 606105819(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 12(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 3250441966(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 16(%esi),%ebp
addl %ecx,%ebx
xorl %edx,%edi
andl %ebx,%edi
leal 4118548399(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 20(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 1200080426(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 24(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 2821735955(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 28(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 4249261313(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 32(%esi),%ebp
addl %ecx,%ebx
xorl %edx,%edi
andl %ebx,%edi
leal 1770035416(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 36(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 2336552879(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 40(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 4294925233(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 44(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 2304563134(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 48(%esi),%ebp
addl %ecx,%ebx
xorl %edx,%edi
andl %ebx,%edi
leal 1804603682(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 52(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 4254626195(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 56(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 2792965006(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 60(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 1236535329(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 4(%esi),%ebp
addl %ecx,%ebx
leal 4129170786(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 24(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 3225465664(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 44(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 643717713(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl (%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 3921069994(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 20(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
leal 3593408605(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 40(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 38016083(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 60(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 3634488961(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 16(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 3889429448(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 36(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
leal 568446438(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 56(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 3275163606(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 12(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 4107603335(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 32(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 1163531501(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 52(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
leal 2850285829(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 8(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 4243563512(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 28(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 1735328473(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 48(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 2368359562(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 20(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 4294588738(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 32(%esi),%ebp
movl %ebx,%edi
leal 2272392833(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 44(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 1839030562(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 56(%esi),%ebp
movl %edx,%edi
leal 4259657740(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 4(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 2763975236(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 16(%esi),%ebp
movl %ebx,%edi
leal 1272893353(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 28(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 4139469664(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 40(%esi),%ebp
movl %edx,%edi
leal 3200236656(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 52(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 681279174(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl (%esi),%ebp
movl %ebx,%edi
leal 3936430074(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 12(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 3572445317(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 24(%esi),%ebp
movl %edx,%edi
leal 76029189(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 36(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 3654602809(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 48(%esi),%ebp
movl %ebx,%edi
leal 3873151461(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 60(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 530742520(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 8(%esi),%ebp
movl %edx,%edi
leal 3299628645(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl (%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
orl %ebx,%edi
leal 4096336452(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 28(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 1126891415(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 56(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 2878612391(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 20(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 4237533241(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 48(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
orl %ebx,%edi
leal 1700485571(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 12(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 2399980690(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 40(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 4293915773(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 4(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 2240044497(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 32(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
orl %ebx,%edi
leal 1873313359(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 60(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 4264355552(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 24(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 2734768916(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 52(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 1309151649(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 16(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
orl %ebx,%edi
leal 4149444226(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 44(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 3174756917(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 8(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 718787259(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 36(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 3951481745(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 24(%esp),%ebp
addl %edi,%ebx
addl $64,%esi
roll $21,%ebx
movl (%ebp),%edi
addl %ecx,%ebx
addl %edi,%eax
movl 4(%ebp),%edi
addl %edi,%ebx
movl 8(%ebp),%edi
addl %edi,%ecx
movl 12(%ebp),%edi
addl %edi,%edx
movl %eax,(%ebp)
movl %ebx,4(%ebp)
movl (%esp),%edi
movl %ecx,8(%ebp)
movl %edx,12(%ebp)
cmpl %esi,%edi
jae .L000start
popl %eax
popl %ebx
popl %ebp
popl %edi
popl %esi
ret
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,385 @@
#if defined(__i386__)
.file "rc4-586.S"
.text
.globl asm_RC4
.hidden asm_RC4
.type asm_RC4,@function
.align 16
asm_RC4:
.L_asm_RC4_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebp
xorl %eax,%eax
xorl %ebx,%ebx
cmpl $0,%edx
je .L000abort
movb (%edi),%al
movb 4(%edi),%bl
addl $8,%edi
leal (%esi,%edx,1),%ecx
subl %esi,%ebp
movl %ecx,24(%esp)
incb %al
cmpl $-1,256(%edi)
je .L001RC4_CHAR
movl (%edi,%eax,4),%ecx
andl $-4,%edx
jz .L002loop1
movl %ebp,32(%esp)
testl $-8,%edx
jz .L003go4loop4
call .L004PIC_me_up
.L004PIC_me_up:
popl %ebp
leal OPENSSL_ia32cap_P-.L004PIC_me_up(%ebp),%ebp
btl $26,(%ebp)
jnc .L003go4loop4
movl 32(%esp),%ebp
andl $-8,%edx
leal -8(%esi,%edx,1),%edx
movl %edx,-4(%edi)
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
movq (%esi),%mm0
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm2
jmp .L005loop_mmx_enter
.align 16
.L006loop_mmx:
addb %cl,%bl
psllq $56,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movq (%esi),%mm0
movq %mm2,-8(%ebp,%esi,1)
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm2
.L005loop_mmx_enter:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm0,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $8,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $16,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $24,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $32,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $40,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $48,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
movl %ebx,%edx
xorl %ebx,%ebx
movb %dl,%bl
cmpl -4(%edi),%esi
leal 8(%esi),%esi
jb .L006loop_mmx
psllq $56,%mm1
pxor %mm1,%mm2
movq %mm2,-8(%ebp,%esi,1)
emms
cmpl 24(%esp),%esi
je .L007done
jmp .L002loop1
.align 16
.L003go4loop4:
leal -4(%esi,%edx,1),%edx
movl %edx,28(%esp)
.L008loop4:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
movl (%edi,%eax,4),%ecx
movl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl (%edi,%eax,4),%ecx
orl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl (%edi,%eax,4),%ecx
orl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl 32(%esp),%ecx
orl (%edi,%edx,4),%ebp
rorl $8,%ebp
xorl (%esi),%ebp
cmpl 28(%esp),%esi
movl %ebp,(%ecx,%esi,1)
leal 4(%esi),%esi
movl (%edi,%eax,4),%ecx
jb .L008loop4
cmpl 24(%esp),%esi
je .L007done
movl 32(%esp),%ebp
.align 16
.L002loop1:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
movl (%edi,%edx,4),%edx
xorb (%esi),%dl
leal 1(%esi),%esi
movl (%edi,%eax,4),%ecx
cmpl 24(%esp),%esi
movb %dl,-1(%ebp,%esi,1)
jb .L002loop1
jmp .L007done
.align 16
.L001RC4_CHAR:
movzbl (%edi,%eax,1),%ecx
.L009cloop1:
addb %cl,%bl
movzbl (%edi,%ebx,1),%edx
movb %cl,(%edi,%ebx,1)
movb %dl,(%edi,%eax,1)
addb %cl,%dl
movzbl (%edi,%edx,1),%edx
addb $1,%al
xorb (%esi),%dl
leal 1(%esi),%esi
movzbl (%edi,%eax,1),%ecx
cmpl 24(%esp),%esi
movb %dl,-1(%ebp,%esi,1)
jb .L009cloop1
.L007done:
decb %al
movl %ebx,-4(%edi)
movb %al,-8(%edi)
.L000abort:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size asm_RC4,.-.L_asm_RC4_begin
.globl asm_RC4_set_key
.hidden asm_RC4_set_key
.type asm_RC4_set_key,@function
.align 16
asm_RC4_set_key:
.L_asm_RC4_set_key_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%ebp
movl 28(%esp),%esi
call .L010PIC_me_up
.L010PIC_me_up:
popl %edx
leal OPENSSL_ia32cap_P-.L010PIC_me_up(%edx),%edx
leal 8(%edi),%edi
leal (%esi,%ebp,1),%esi
negl %ebp
xorl %eax,%eax
movl %ebp,-4(%edi)
btl $20,(%edx)
jc .L011c1stloop
.align 16
.L012w1stloop:
movl %eax,(%edi,%eax,4)
addb $1,%al
jnc .L012w1stloop
xorl %ecx,%ecx
xorl %edx,%edx
.align 16
.L013w2ndloop:
movl (%edi,%ecx,4),%eax
addb (%esi,%ebp,1),%dl
addb %al,%dl
addl $1,%ebp
movl (%edi,%edx,4),%ebx
jnz .L014wnowrap
movl -4(%edi),%ebp
.L014wnowrap:
movl %eax,(%edi,%edx,4)
movl %ebx,(%edi,%ecx,4)
addb $1,%cl
jnc .L013w2ndloop
jmp .L015exit
.align 16
.L011c1stloop:
movb %al,(%edi,%eax,1)
addb $1,%al
jnc .L011c1stloop
xorl %ecx,%ecx
xorl %edx,%edx
xorl %ebx,%ebx
.align 16
.L016c2ndloop:
movb (%edi,%ecx,1),%al
addb (%esi,%ebp,1),%dl
addb %al,%dl
addl $1,%ebp
movb (%edi,%edx,1),%bl
jnz .L017cnowrap
movl -4(%edi),%ebp
.L017cnowrap:
movb %al,(%edi,%edx,1)
movb %bl,(%edi,%ecx,1)
addb $1,%cl
jnc .L016c2ndloop
movl $-1,256(%edi)
.L015exit:
xorl %eax,%eax
movl %eax,-8(%edi)
movl %eax,-4(%edi)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size asm_RC4_set_key,.-.L_asm_RC4_set_key_begin
.globl RC4_options
.hidden RC4_options
.type RC4_options,@function
.align 16
RC4_options:
.L_RC4_options_begin:
call .L018pic_point
.L018pic_point:
popl %eax
leal .L019opts-.L018pic_point(%eax),%eax
call .L020PIC_me_up
.L020PIC_me_up:
popl %edx
leal OPENSSL_ia32cap_P-.L020PIC_me_up(%edx),%edx
movl (%edx),%edx
btl $20,%edx
jc .L0211xchar
btl $26,%edx
jnc .L022ret
addl $25,%eax
ret
.L0211xchar:
addl $12,%eax
.L022ret:
ret
.align 64
.L019opts:
.byte 114,99,52,40,52,120,44,105,110,116,41,0
.byte 114,99,52,40,49,120,44,99,104,97,114,41,0
.byte 114,99,52,40,56,120,44,109,109,120,41,0
.byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.size RC4_options,.-.L_RC4_options_begin
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,834 @@
#if defined(__x86_64__)
.text
.type _vpaes_encrypt_core,@function
.align 16
_vpaes_encrypt_core:
movq %rdx,%r9
movq $16,%r11
movl 240(%rdx),%eax
movdqa %xmm9,%xmm1
movdqa .Lk_ipt(%rip),%xmm2
pandn %xmm0,%xmm1
movdqu (%r9),%xmm5
psrld $4,%xmm1
pand %xmm9,%xmm0
.byte 102,15,56,0,208
movdqa .Lk_ipt+16(%rip),%xmm0
.byte 102,15,56,0,193
pxor %xmm5,%xmm2
addq $16,%r9
pxor %xmm2,%xmm0
leaq .Lk_mc_backward(%rip),%r10
jmp .Lenc_entry
.align 16
.Lenc_loop:
movdqa %xmm13,%xmm4
movdqa %xmm12,%xmm0
.byte 102,15,56,0,226
.byte 102,15,56,0,195
pxor %xmm5,%xmm4
movdqa %xmm15,%xmm5
pxor %xmm4,%xmm0
movdqa -64(%r11,%r10,1),%xmm1
.byte 102,15,56,0,234
movdqa (%r11,%r10,1),%xmm4
movdqa %xmm14,%xmm2
.byte 102,15,56,0,211
movdqa %xmm0,%xmm3
pxor %xmm5,%xmm2
.byte 102,15,56,0,193
addq $16,%r9
pxor %xmm2,%xmm0
.byte 102,15,56,0,220
addq $16,%r11
pxor %xmm0,%xmm3
.byte 102,15,56,0,193
andq $48,%r11
subq $1,%rax
pxor %xmm3,%xmm0
.Lenc_entry:
movdqa %xmm9,%xmm1
movdqa %xmm11,%xmm5
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm9,%xmm0
.byte 102,15,56,0,232
movdqa %xmm10,%xmm3
pxor %xmm1,%xmm0
.byte 102,15,56,0,217
movdqa %xmm10,%xmm4
pxor %xmm5,%xmm3
.byte 102,15,56,0,224
movdqa %xmm10,%xmm2
pxor %xmm5,%xmm4
.byte 102,15,56,0,211
movdqa %xmm10,%xmm3
pxor %xmm0,%xmm2
.byte 102,15,56,0,220
movdqu (%r9),%xmm5
pxor %xmm1,%xmm3
jnz .Lenc_loop
movdqa -96(%r10),%xmm4
movdqa -80(%r10),%xmm0
.byte 102,15,56,0,226
pxor %xmm5,%xmm4
.byte 102,15,56,0,195
movdqa 64(%r11,%r10,1),%xmm1
pxor %xmm4,%xmm0
.byte 102,15,56,0,193
.byte 0xf3,0xc3
.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
.type _vpaes_decrypt_core,@function
.align 16
_vpaes_decrypt_core:
movq %rdx,%r9
movl 240(%rdx),%eax
movdqa %xmm9,%xmm1
movdqa .Lk_dipt(%rip),%xmm2
pandn %xmm0,%xmm1
movq %rax,%r11
psrld $4,%xmm1
movdqu (%r9),%xmm5
shlq $4,%r11
pand %xmm9,%xmm0
.byte 102,15,56,0,208
movdqa .Lk_dipt+16(%rip),%xmm0
xorq $48,%r11
leaq .Lk_dsbd(%rip),%r10
.byte 102,15,56,0,193
andq $48,%r11
pxor %xmm5,%xmm2
movdqa .Lk_mc_forward+48(%rip),%xmm5
pxor %xmm2,%xmm0
addq $16,%r9
addq %r10,%r11
jmp .Ldec_entry
.align 16
.Ldec_loop:
movdqa -32(%r10),%xmm4
movdqa -16(%r10),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 0(%r10),%xmm4
pxor %xmm1,%xmm0
movdqa 16(%r10),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 32(%r10),%xmm4
pxor %xmm1,%xmm0
movdqa 48(%r10),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 64(%r10),%xmm4
pxor %xmm1,%xmm0
movdqa 80(%r10),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
addq $16,%r9
.byte 102,15,58,15,237,12
pxor %xmm1,%xmm0
subq $1,%rax
.Ldec_entry:
movdqa %xmm9,%xmm1
pandn %xmm0,%xmm1
movdqa %xmm11,%xmm2
psrld $4,%xmm1
pand %xmm9,%xmm0
.byte 102,15,56,0,208
movdqa %xmm10,%xmm3
pxor %xmm1,%xmm0
.byte 102,15,56,0,217
movdqa %xmm10,%xmm4
pxor %xmm2,%xmm3
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm10,%xmm2
.byte 102,15,56,0,211
movdqa %xmm10,%xmm3
pxor %xmm0,%xmm2
.byte 102,15,56,0,220
movdqu (%r9),%xmm0
pxor %xmm1,%xmm3
jnz .Ldec_loop
movdqa 96(%r10),%xmm4
.byte 102,15,56,0,226
pxor %xmm0,%xmm4
movdqa 112(%r10),%xmm0
movdqa -352(%r11),%xmm2
.byte 102,15,56,0,195
pxor %xmm4,%xmm0
.byte 102,15,56,0,194
.byte 0xf3,0xc3
.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
.type _vpaes_schedule_core,@function
.align 16
_vpaes_schedule_core:
call _vpaes_preheat
movdqa .Lk_rcon(%rip),%xmm8
movdqu (%rdi),%xmm0
movdqa %xmm0,%xmm3
leaq .Lk_ipt(%rip),%r11
call _vpaes_schedule_transform
movdqa %xmm0,%xmm7
leaq .Lk_sr(%rip),%r10
testq %rcx,%rcx
jnz .Lschedule_am_decrypting
movdqu %xmm0,(%rdx)
jmp .Lschedule_go
.Lschedule_am_decrypting:
movdqa (%r8,%r10,1),%xmm1
.byte 102,15,56,0,217
movdqu %xmm3,(%rdx)
xorq $48,%r8
.Lschedule_go:
cmpl $192,%esi
ja .Lschedule_256
je .Lschedule_192
.Lschedule_128:
movl $10,%esi
.Loop_schedule_128:
call _vpaes_schedule_round
decq %rsi
jz .Lschedule_mangle_last
call _vpaes_schedule_mangle
jmp .Loop_schedule_128
.align 16
.Lschedule_192:
movdqu 8(%rdi),%xmm0
call _vpaes_schedule_transform
movdqa %xmm0,%xmm6
pxor %xmm4,%xmm4
movhlps %xmm4,%xmm6
movl $4,%esi
.Loop_schedule_192:
call _vpaes_schedule_round
.byte 102,15,58,15,198,8
call _vpaes_schedule_mangle
call _vpaes_schedule_192_smear
call _vpaes_schedule_mangle
call _vpaes_schedule_round
decq %rsi
jz .Lschedule_mangle_last
call _vpaes_schedule_mangle
call _vpaes_schedule_192_smear
jmp .Loop_schedule_192
.align 16
.Lschedule_256:
movdqu 16(%rdi),%xmm0
call _vpaes_schedule_transform
movl $7,%esi
.Loop_schedule_256:
call _vpaes_schedule_mangle
movdqa %xmm0,%xmm6
call _vpaes_schedule_round
decq %rsi
jz .Lschedule_mangle_last
call _vpaes_schedule_mangle
pshufd $255,%xmm0,%xmm0
movdqa %xmm7,%xmm5
movdqa %xmm6,%xmm7
call _vpaes_schedule_low_round
movdqa %xmm5,%xmm7
jmp .Loop_schedule_256
.align 16
.Lschedule_mangle_last:
leaq .Lk_deskew(%rip),%r11
testq %rcx,%rcx
jnz .Lschedule_mangle_last_dec
movdqa (%r8,%r10,1),%xmm1
.byte 102,15,56,0,193
leaq .Lk_opt(%rip),%r11
addq $32,%rdx
.Lschedule_mangle_last_dec:
addq $-16,%rdx
pxor .Lk_s63(%rip),%xmm0
call _vpaes_schedule_transform
movdqu %xmm0,(%rdx)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
.byte 0xf3,0xc3
.size _vpaes_schedule_core,.-_vpaes_schedule_core
.type _vpaes_schedule_192_smear,@function
.align 16
_vpaes_schedule_192_smear:
pshufd $128,%xmm6,%xmm1
pshufd $254,%xmm7,%xmm0
pxor %xmm1,%xmm6
pxor %xmm1,%xmm1
pxor %xmm0,%xmm6
movdqa %xmm6,%xmm0
movhlps %xmm1,%xmm6
.byte 0xf3,0xc3
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
.type _vpaes_schedule_round,@function
.align 16
_vpaes_schedule_round:
pxor %xmm1,%xmm1
.byte 102,65,15,58,15,200,15
.byte 102,69,15,58,15,192,15
pxor %xmm1,%xmm7
pshufd $255,%xmm0,%xmm0
.byte 102,15,58,15,192,1
_vpaes_schedule_low_round:
movdqa %xmm7,%xmm1
pslldq $4,%xmm7
pxor %xmm1,%xmm7
movdqa %xmm7,%xmm1
pslldq $8,%xmm7
pxor %xmm1,%xmm7
pxor .Lk_s63(%rip),%xmm7
movdqa %xmm9,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm9,%xmm0
movdqa %xmm11,%xmm2
.byte 102,15,56,0,208
pxor %xmm1,%xmm0
movdqa %xmm10,%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
movdqa %xmm10,%xmm4
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm10,%xmm2
.byte 102,15,56,0,211
pxor %xmm0,%xmm2
movdqa %xmm10,%xmm3
.byte 102,15,56,0,220
pxor %xmm1,%xmm3
movdqa %xmm13,%xmm4
.byte 102,15,56,0,226
movdqa %xmm12,%xmm0
.byte 102,15,56,0,195
pxor %xmm4,%xmm0
pxor %xmm7,%xmm0
movdqa %xmm0,%xmm7
.byte 0xf3,0xc3
.size _vpaes_schedule_round,.-_vpaes_schedule_round
.type _vpaes_schedule_transform,@function
.align 16
_vpaes_schedule_transform:
movdqa %xmm9,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm9,%xmm0
movdqa (%r11),%xmm2
.byte 102,15,56,0,208
movdqa 16(%r11),%xmm0
.byte 102,15,56,0,193
pxor %xmm2,%xmm0
.byte 0xf3,0xc3
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
.type _vpaes_schedule_mangle,@function
.align 16
_vpaes_schedule_mangle:
movdqa %xmm0,%xmm4
movdqa .Lk_mc_forward(%rip),%xmm5
testq %rcx,%rcx
jnz .Lschedule_mangle_dec
addq $16,%rdx
pxor .Lk_s63(%rip),%xmm4
.byte 102,15,56,0,229
movdqa %xmm4,%xmm3
.byte 102,15,56,0,229
pxor %xmm4,%xmm3
.byte 102,15,56,0,229
pxor %xmm4,%xmm3
jmp .Lschedule_mangle_both
.align 16
.Lschedule_mangle_dec:
leaq .Lk_dksd(%rip),%r11
movdqa %xmm9,%xmm1
pandn %xmm4,%xmm1
psrld $4,%xmm1
pand %xmm9,%xmm4
movdqa 0(%r11),%xmm2
.byte 102,15,56,0,212
movdqa 16(%r11),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 32(%r11),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 48(%r11),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 64(%r11),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 80(%r11),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 96(%r11),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 112(%r11),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
addq $-16,%rdx
.Lschedule_mangle_both:
movdqa (%r8,%r10,1),%xmm1
.byte 102,15,56,0,217
addq $-16,%r8
andq $48,%r8
movdqu %xmm3,(%rdx)
.byte 0xf3,0xc3
.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
.globl vpaes_set_encrypt_key
.hidden vpaes_set_encrypt_key
.type vpaes_set_encrypt_key,@function
.align 16
vpaes_set_encrypt_key:
movl %esi,%eax
shrl $5,%eax
addl $5,%eax
movl %eax,240(%rdx)
movl $0,%ecx
movl $48,%r8d
call _vpaes_schedule_core
xorl %eax,%eax
.byte 0xf3,0xc3
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
.globl vpaes_set_decrypt_key
.hidden vpaes_set_decrypt_key
.type vpaes_set_decrypt_key,@function
.align 16
vpaes_set_decrypt_key:
movl %esi,%eax
shrl $5,%eax
addl $5,%eax
movl %eax,240(%rdx)
shll $4,%eax
leaq 16(%rdx,%rax,1),%rdx
movl $1,%ecx
movl %esi,%r8d
shrl $1,%r8d
andl $32,%r8d
xorl $32,%r8d
call _vpaes_schedule_core
xorl %eax,%eax
.byte 0xf3,0xc3
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
.globl vpaes_encrypt
.hidden vpaes_encrypt
.type vpaes_encrypt,@function
.align 16
vpaes_encrypt:
movdqu (%rdi),%xmm0
call _vpaes_preheat
call _vpaes_encrypt_core
movdqu %xmm0,(%rsi)
.byte 0xf3,0xc3
.size vpaes_encrypt,.-vpaes_encrypt
.globl vpaes_decrypt
.hidden vpaes_decrypt
.type vpaes_decrypt,@function
.align 16
vpaes_decrypt:
movdqu (%rdi),%xmm0
call _vpaes_preheat
call _vpaes_decrypt_core
movdqu %xmm0,(%rsi)
.byte 0xf3,0xc3
.size vpaes_decrypt,.-vpaes_decrypt
.globl vpaes_cbc_encrypt
.hidden vpaes_cbc_encrypt
.type vpaes_cbc_encrypt,@function
.align 16
vpaes_cbc_encrypt:
xchgq %rcx,%rdx
subq $16,%rcx
jc .Lcbc_abort
movdqu (%r8),%xmm6
subq %rdi,%rsi
call _vpaes_preheat
cmpl $0,%r9d
je .Lcbc_dec_loop
jmp .Lcbc_enc_loop
.align 16
.Lcbc_enc_loop:
movdqu (%rdi),%xmm0
pxor %xmm6,%xmm0
call _vpaes_encrypt_core
movdqa %xmm0,%xmm6
movdqu %xmm0,(%rsi,%rdi,1)
leaq 16(%rdi),%rdi
subq $16,%rcx
jnc .Lcbc_enc_loop
jmp .Lcbc_done
.align 16
.Lcbc_dec_loop:
movdqu (%rdi),%xmm0
movdqa %xmm0,%xmm7
call _vpaes_decrypt_core
pxor %xmm6,%xmm0
movdqa %xmm7,%xmm6
movdqu %xmm0,(%rsi,%rdi,1)
leaq 16(%rdi),%rdi
subq $16,%rcx
jnc .Lcbc_dec_loop
.Lcbc_done:
movdqu %xmm6,(%r8)
.Lcbc_abort:
.byte 0xf3,0xc3
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
.type _vpaes_preheat,@function
.align 16
_vpaes_preheat:
leaq .Lk_s0F(%rip),%r10
movdqa -32(%r10),%xmm10
movdqa -16(%r10),%xmm11
movdqa 0(%r10),%xmm9
movdqa 48(%r10),%xmm13
movdqa 64(%r10),%xmm12
movdqa 80(%r10),%xmm15
movdqa 96(%r10),%xmm14
.byte 0xf3,0xc3
.size _vpaes_preheat,.-_vpaes_preheat
.type _vpaes_consts,@object
.align 64
_vpaes_consts:
.Lk_inv:
.quad 0x0E05060F0D080180, 0x040703090A0B0C02
.quad 0x01040A060F0B0780, 0x030D0E0C02050809
.Lk_s0F:
.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
.Lk_ipt:
.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
.Lk_sb1:
.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
.Lk_sb2:
.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
.Lk_sbo:
.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
.Lk_mc_forward:
.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
.quad 0x080B0A0904070605, 0x000302010C0F0E0D
.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
.quad 0x000302010C0F0E0D, 0x080B0A0904070605
.Lk_mc_backward:
.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
.quad 0x020100030E0D0C0F, 0x0A09080B06050407
.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
.quad 0x0A09080B06050407, 0x020100030E0D0C0F
.Lk_sr:
.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
.quad 0x030E09040F0A0500, 0x0B06010C07020D08
.quad 0x0F060D040B020900, 0x070E050C030A0108
.quad 0x0B0E0104070A0D00, 0x0306090C0F020508
.Lk_rcon:
.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
.Lk_s63:
.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
.Lk_opt:
.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
.Lk_deskew:
.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
.Lk_dksd:
.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
.Lk_dksb:
.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
.Lk_dkse:
.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
.Lk_dks9:
.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
.Lk_dipt:
.quad 0x0F505B040B545F00, 0x154A411E114E451A
.quad 0x86E383E660056500, 0x12771772F491F194
.Lk_dsb9:
.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
.Lk_dsbd:
.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
.Lk_dsbb:
.quad 0xD022649296B44200, 0x602646F6B0F2D404
.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
.Lk_dsbe:
.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
.Lk_dsbo:
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
.align 64
.size _vpaes_consts,.-_vpaes_consts
#endif

View File

@ -0,0 +1,34 @@
#if defined(__x86_64__)
.text
.globl rsaz_avx2_eligible
.hidden rsaz_avx2_eligible
.type rsaz_avx2_eligible,@function
rsaz_avx2_eligible:
xorl %eax,%eax
.byte 0xf3,0xc3
.size rsaz_avx2_eligible,.-rsaz_avx2_eligible
.globl rsaz_1024_sqr_avx2
.hidden rsaz_1024_sqr_avx2
.globl rsaz_1024_mul_avx2
.hidden rsaz_1024_mul_avx2
.globl rsaz_1024_norm2red_avx2
.hidden rsaz_1024_norm2red_avx2
.globl rsaz_1024_red2norm_avx2
.hidden rsaz_1024_red2norm_avx2
.globl rsaz_1024_scatter5_avx2
.hidden rsaz_1024_scatter5_avx2
.globl rsaz_1024_gather5_avx2
.hidden rsaz_1024_gather5_avx2
.type rsaz_1024_sqr_avx2,@function
rsaz_1024_sqr_avx2:
rsaz_1024_mul_avx2:
rsaz_1024_norm2red_avx2:
rsaz_1024_red2norm_avx2:
rsaz_1024_scatter5_avx2:
rsaz_1024_gather5_avx2:
.byte 0x0f,0x0b
.byte 0xf3,0xc3
.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,728 @@
#if defined(__x86_64__)
.text
.extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P
.globl bn_mul_mont
.hidden bn_mul_mont
.type bn_mul_mont,@function
.align 16
bn_mul_mont:
testl $3,%r9d
jnz .Lmul_enter
cmpl $8,%r9d
jb .Lmul_enter
cmpq %rsi,%rdx
jne .Lmul4x_enter
testl $7,%r9d
jz .Lsqr8x_enter
jmp .Lmul4x_enter
.align 16
.Lmul_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
movl %r9d,%r9d
leaq 2(%r9),%r10
movq %rsp,%r11
negq %r10
leaq (%rsp,%r10,8),%rsp
andq $-1024,%rsp
movq %r11,8(%rsp,%r9,8)
.Lmul_body:
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
movq (%rsi),%rax
xorq %r14,%r14
xorq %r15,%r15
movq %r8,%rbp
mulq %rbx
movq %rax,%r10
movq (%rcx),%rax
imulq %r10,%rbp
movq %rdx,%r11
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq %rdx,%r13
leaq 1(%r15),%r15
jmp .L1st_enter
.align 16
.L1st:
addq %rax,%r13
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%r13
movq %r10,%r11
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
.L1st_enter:
mulq %rbx
addq %rax,%r11
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
leaq 1(%r15),%r15
movq %rdx,%r10
mulq %rbp
cmpq %r9,%r15
jne .L1st
addq %rax,%r13
movq (%rsi),%rax
adcq $0,%rdx
addq %r11,%r13
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
movq %r10,%r11
xorq %rdx,%rdx
addq %r11,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r9,8)
movq %rdx,(%rsp,%r9,8)
leaq 1(%r14),%r14
jmp .Louter
.align 16
.Louter:
movq (%r12,%r14,8),%rbx
xorq %r15,%r15
movq %r8,%rbp
movq (%rsp),%r10
mulq %rbx
addq %rax,%r10
movq (%rcx),%rax
adcq $0,%rdx
imulq %r10,%rbp
movq %rdx,%r11
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq 8(%rsp),%r10
movq %rdx,%r13
leaq 1(%r15),%r15
jmp .Linner_enter
.align 16
.Linner:
addq %rax,%r13
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
movq (%rsp,%r15,8),%r10
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
.Linner_enter:
mulq %rbx
addq %rax,%r11
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
addq %r11,%r10
movq %rdx,%r11
adcq $0,%r11
leaq 1(%r15),%r15
mulq %rbp
cmpq %r9,%r15
jne .Linner
addq %rax,%r13
movq (%rsi),%rax
adcq $0,%rdx
addq %r10,%r13
movq (%rsp,%r15,8),%r10
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
xorq %rdx,%rdx
addq %r11,%r13
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r9,8)
movq %rdx,(%rsp,%r9,8)
leaq 1(%r14),%r14
cmpq %r9,%r14
jb .Louter
xorq %r14,%r14
movq (%rsp),%rax
leaq (%rsp),%rsi
movq %r9,%r15
jmp .Lsub
.align 16
.Lsub: sbbq (%rcx,%r14,8),%rax
movq %rax,(%rdi,%r14,8)
movq 8(%rsi,%r14,8),%rax
leaq 1(%r14),%r14
decq %r15
jnz .Lsub
sbbq $0,%rax
xorq %r14,%r14
movq %r9,%r15
.align 16
.Lcopy:
movq (%rsp,%r14,8),%rsi
movq (%rdi,%r14,8),%rcx
xorq %rcx,%rsi
andq %rax,%rsi
xorq %rcx,%rsi
movq %r14,(%rsp,%r14,8)
movq %rsi,(%rdi,%r14,8)
leaq 1(%r14),%r14
subq $1,%r15
jnz .Lcopy
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
.Lmul_epilogue:
.byte 0xf3,0xc3
.size bn_mul_mont,.-bn_mul_mont
.type bn_mul4x_mont,@function
.align 16
bn_mul4x_mont:
.Lmul4x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
movl %r9d,%r9d
leaq 4(%r9),%r10
movq %rsp,%r11
negq %r10
leaq (%rsp,%r10,8),%rsp
andq $-1024,%rsp
movq %r11,8(%rsp,%r9,8)
.Lmul4x_body:
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
movq (%rsi),%rax
xorq %r14,%r14
xorq %r15,%r15
movq %r8,%rbp
mulq %rbx
movq %rax,%r10
movq (%rcx),%rax
imulq %r10,%rbp
movq %rdx,%r11
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx),%rax
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq 16(%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
leaq 4(%r15),%r15
adcq $0,%rdx
movq %rdi,(%rsp)
movq %rdx,%r13
jmp .L1st4x
.align 16
.L1st4x:
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-16(%rsp,%r15,8)
movq %rdx,%r13
mulq %rbx
addq %rax,%r10
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq 8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx,%r15,8),%rax
adcq $0,%rdx
leaq 4(%r15),%r15
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq -16(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-32(%rsp,%r15,8)
movq %rdx,%r13
cmpq %r9,%r15
jb .L1st4x
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-16(%rsp,%r15,8)
movq %rdx,%r13
xorq %rdi,%rdi
addq %r10,%r13
adcq $0,%rdi
movq %r13,-8(%rsp,%r15,8)
movq %rdi,(%rsp,%r15,8)
leaq 1(%r14),%r14
.align 4
.Louter4x:
movq (%r12,%r14,8),%rbx
xorq %r15,%r15
movq (%rsp),%r10
movq %r8,%rbp
mulq %rbx
addq %rax,%r10
movq (%rcx),%rax
adcq $0,%rdx
imulq %r10,%rbp
movq %rdx,%r11
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx),%rax
adcq $0,%rdx
addq 8(%rsp),%r11
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq 16(%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
leaq 4(%r15),%r15
adcq $0,%rdx
movq %rdi,(%rsp)
movq %rdx,%r13
jmp .Linner4x
.align 16
.Linner4x:
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -16(%rsp,%r15,8),%r10
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -8(%rsp,%r15,8),%r11
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-16(%rsp,%r15,8)
movq %rdx,%r13
mulq %rbx
addq %rax,%r10
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
addq (%rsp,%r15,8),%r10
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq 8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx,%r15,8),%rax
adcq $0,%rdx
addq 8(%rsp,%r15,8),%r11
adcq $0,%rdx
leaq 4(%r15),%r15
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq -16(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-32(%rsp,%r15,8)
movq %rdx,%r13
cmpq %r9,%r15
jb .Linner4x
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -16(%rsp,%r15,8),%r10
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -8(%rsp,%r15,8),%r11
adcq $0,%rdx
leaq 1(%r14),%r14
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-16(%rsp,%r15,8)
movq %rdx,%r13
xorq %rdi,%rdi
addq %r10,%r13
adcq $0,%rdi
addq (%rsp,%r9,8),%r13
adcq $0,%rdi
movq %r13,-8(%rsp,%r15,8)
movq %rdi,(%rsp,%r15,8)
cmpq %r9,%r14
jb .Louter4x
movq 16(%rsp,%r9,8),%rdi
movq 0(%rsp),%rax
movq 8(%rsp),%rdx
shrq $2,%r9
leaq (%rsp),%rsi
xorq %r14,%r14
subq 0(%rcx),%rax
movq 16(%rsi),%rbx
movq 24(%rsi),%rbp
sbbq 8(%rcx),%rdx
leaq -1(%r9),%r15
jmp .Lsub4x
.align 16
.Lsub4x:
movq %rax,0(%rdi,%r14,8)
movq %rdx,8(%rdi,%r14,8)
sbbq 16(%rcx,%r14,8),%rbx
movq 32(%rsi,%r14,8),%rax
movq 40(%rsi,%r14,8),%rdx
sbbq 24(%rcx,%r14,8),%rbp
movq %rbx,16(%rdi,%r14,8)
movq %rbp,24(%rdi,%r14,8)
sbbq 32(%rcx,%r14,8),%rax
movq 48(%rsi,%r14,8),%rbx
movq 56(%rsi,%r14,8),%rbp
sbbq 40(%rcx,%r14,8),%rdx
leaq 4(%r14),%r14
decq %r15
jnz .Lsub4x
movq %rax,0(%rdi,%r14,8)
movq 32(%rsi,%r14,8),%rax
sbbq 16(%rcx,%r14,8),%rbx
movq %rdx,8(%rdi,%r14,8)
sbbq 24(%rcx,%r14,8),%rbp
movq %rbx,16(%rdi,%r14,8)
sbbq $0,%rax
movq %rax,%xmm0
punpcklqdq %xmm0,%xmm0
movq %rbp,24(%rdi,%r14,8)
xorq %r14,%r14
movq %r9,%r15
pxor %xmm5,%xmm5
jmp .Lcopy4x
.align 16
.Lcopy4x:
movdqu (%rsp,%r14,1),%xmm2
movdqu 16(%rsp,%r14,1),%xmm4
movdqu (%rdi,%r14,1),%xmm1
movdqu 16(%rdi,%r14,1),%xmm3
pxor %xmm1,%xmm2
pxor %xmm3,%xmm4
pand %xmm0,%xmm2
pand %xmm0,%xmm4
pxor %xmm1,%xmm2
pxor %xmm3,%xmm4
movdqu %xmm2,(%rdi,%r14,1)
movdqu %xmm4,16(%rdi,%r14,1)
movdqa %xmm5,(%rsp,%r14,1)
movdqa %xmm5,16(%rsp,%r14,1)
leaq 32(%r14),%r14
decq %r15
jnz .Lcopy4x
shlq $2,%r9
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
.Lmul4x_epilogue:
.byte 0xf3,0xc3
.size bn_mul4x_mont,.-bn_mul4x_mont
.extern bn_sqr8x_internal
.hidden bn_sqr8x_internal
.type bn_sqr8x_mont,@function
.align 32
bn_sqr8x_mont:
.Lsqr8x_enter:
movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
movl %r9d,%r10d
shll $3,%r9d
shlq $3+2,%r10
negq %r9
leaq -64(%rsp,%r9,4),%r11
movq (%r8),%r8
subq %rsi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lsqr8x_sp_alt
subq %r11,%rsp
leaq -64(%rsp,%r9,4),%rsp
jmp .Lsqr8x_sp_done
.align 32
.Lsqr8x_sp_alt:
leaq 4096-64(,%r9,4),%r10
leaq -64(%rsp,%r9,4),%rsp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
.Lsqr8x_sp_done:
andq $-64,%rsp
movq %r9,%r10
negq %r9
leaq 64(%rsp,%r9,2),%r11
movq %r8,32(%rsp)
movq %rax,40(%rsp)
.Lsqr8x_body:
movq %r9,%rbp
.byte 102,73,15,110,211
shrq $3+2,%rbp
movl OPENSSL_ia32cap_P+8(%rip),%eax
jmp .Lsqr8x_copy_n
.align 32
.Lsqr8x_copy_n:
movq 0(%rcx),%xmm0
movq 8(%rcx),%xmm1
movq 16(%rcx),%xmm3
movq 24(%rcx),%xmm4
leaq 32(%rcx),%rcx
movdqa %xmm0,0(%r11)
movdqa %xmm1,16(%r11)
movdqa %xmm3,32(%r11)
movdqa %xmm4,48(%r11)
leaq 64(%r11),%r11
decq %rbp
jnz .Lsqr8x_copy_n
pxor %xmm0,%xmm0
.byte 102,72,15,110,207
.byte 102,73,15,110,218
call bn_sqr8x_internal
pxor %xmm0,%xmm0
leaq 48(%rsp),%rax
leaq 64(%rsp,%r9,2),%rdx
shrq $3+2,%r9
movq 40(%rsp),%rsi
jmp .Lsqr8x_zero
.align 32
.Lsqr8x_zero:
movdqa %xmm0,0(%rax)
movdqa %xmm0,16(%rax)
movdqa %xmm0,32(%rax)
movdqa %xmm0,48(%rax)
leaq 64(%rax),%rax
movdqa %xmm0,0(%rdx)
movdqa %xmm0,16(%rdx)
movdqa %xmm0,32(%rdx)
movdqa %xmm0,48(%rdx)
leaq 64(%rdx),%rdx
decq %r9
jnz .Lsqr8x_zero
movq $1,%rax
movq -48(%rsi),%r15
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
.Lsqr8x_epilogue:
.byte 0xf3,0xc3
.size bn_sqr8x_mont,.-bn_sqr8x_mont
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,671 @@
#if defined(__x86_64__)
.text
.align 16
.globl md5_block_asm_data_order
.hidden md5_block_asm_data_order
.type md5_block_asm_data_order,@function
md5_block_asm_data_order:
pushq %rbp
pushq %rbx
pushq %r12
pushq %r14
pushq %r15
.Lprologue:
movq %rdi,%rbp
shlq $6,%rdx
leaq (%rsi,%rdx,1),%rdi
movl 0(%rbp),%eax
movl 4(%rbp),%ebx
movl 8(%rbp),%ecx
movl 12(%rbp),%edx
cmpq %rdi,%rsi
je .Lend
.Lloop:
movl %eax,%r8d
movl %ebx,%r9d
movl %ecx,%r14d
movl %edx,%r15d
movl 0(%rsi),%r10d
movl %edx,%r11d
xorl %ecx,%r11d
leal -680876936(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 4(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal -389564586(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 8(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal 606105819(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 12(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal -1044525330(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 16(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
xorl %ecx,%r11d
leal -176418897(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 20(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal 1200080426(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 24(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal -1473231341(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 28(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal -45705983(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 32(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
xorl %ecx,%r11d
leal 1770035416(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 36(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal -1958414417(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 40(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal -42063(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 44(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal -1990404162(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 48(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
xorl %ecx,%r11d
leal 1804603682(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 52(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal -40341101(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 56(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal -1502002290(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 60(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal 1236535329(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 0(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
movl 4(%rsi),%r10d
movl %edx,%r11d
movl %edx,%r12d
notl %r11d
leal -165796510(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 24(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal -1069501632(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 44(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal 643717713(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 0(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal -373897302(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 20(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
leal -701558691(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 40(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal 38016083(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 60(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal -660478335(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 16(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal -405537848(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 36(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
leal 568446438(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 56(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal -1019803690(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 12(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal -187363961(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 32(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal 1163531501(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 52(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
leal -1444681467(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 8(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal -51403784(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 28(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal 1735328473(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 48(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal -1926607734(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 0(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
movl 20(%rsi),%r10d
movl %ecx,%r11d
leal -378558(%rax,%r10,1),%eax
movl 32(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal -2022574463(%rdx,%r10,1),%edx
movl 44(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal 1839030562(%rcx,%r10,1),%ecx
movl 56(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal -35309556(%rbx,%r10,1),%ebx
movl 4(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal -1530992060(%rax,%r10,1),%eax
movl 16(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal 1272893353(%rdx,%r10,1),%edx
movl 28(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal -155497632(%rcx,%r10,1),%ecx
movl 40(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal -1094730640(%rbx,%r10,1),%ebx
movl 52(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal 681279174(%rax,%r10,1),%eax
movl 0(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal -358537222(%rdx,%r10,1),%edx
movl 12(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal -722521979(%rcx,%r10,1),%ecx
movl 24(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal 76029189(%rbx,%r10,1),%ebx
movl 36(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal -640364487(%rax,%r10,1),%eax
movl 48(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal -421815835(%rdx,%r10,1),%edx
movl 60(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal 530742520(%rcx,%r10,1),%ecx
movl 8(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal -995338651(%rbx,%r10,1),%ebx
movl 0(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
movl 0(%rsi),%r10d
movl $4294967295,%r11d
xorl %edx,%r11d
leal -198630844(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 28(%rsi),%r10d
movl $4294967295,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal 1126891415(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 56(%rsi),%r10d
movl $4294967295,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1416354905(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 20(%rsi),%r10d
movl $4294967295,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -57434055(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 48(%rsi),%r10d
movl $4294967295,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal 1700485571(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 12(%rsi),%r10d
movl $4294967295,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -1894986606(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 40(%rsi),%r10d
movl $4294967295,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1051523(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 4(%rsi),%r10d
movl $4294967295,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -2054922799(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 32(%rsi),%r10d
movl $4294967295,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal 1873313359(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 60(%rsi),%r10d
movl $4294967295,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -30611744(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 24(%rsi),%r10d
movl $4294967295,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1560198380(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 52(%rsi),%r10d
movl $4294967295,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal 1309151649(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 16(%rsi),%r10d
movl $4294967295,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal -145523070(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 44(%rsi),%r10d
movl $4294967295,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -1120210379(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 8(%rsi),%r10d
movl $4294967295,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal 718787259(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 36(%rsi),%r10d
movl $4294967295,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -343485551(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 0(%rsi),%r10d
movl $4294967295,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
addl %r8d,%eax
addl %r9d,%ebx
addl %r14d,%ecx
addl %r15d,%edx
addq $64,%rsi
cmpq %rdi,%rsi
jb .Lloop
.Lend:
movl %eax,0(%rbp)
movl %ebx,4(%rbp)
movl %ecx,8(%rbp)
movl %edx,12(%rbp)
movq (%rsp),%r15
movq 8(%rsp),%r14
movq 16(%rsp),%r12
movq 24(%rsp),%rbx
movq 32(%rsp),%rbp
addq $40,%rsp
.Lepilogue:
.byte 0xf3,0xc3
.size md5_block_asm_data_order,.-md5_block_asm_data_order
#endif

View File

@ -0,0 +1,19 @@
#if defined(__x86_64__)
.text
.globl aesni_gcm_encrypt
.hidden aesni_gcm_encrypt
.type aesni_gcm_encrypt,@function
aesni_gcm_encrypt:
xorl %eax,%eax
.byte 0xf3,0xc3
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
.globl aesni_gcm_decrypt
.hidden aesni_gcm_decrypt
.type aesni_gcm_decrypt,@function
aesni_gcm_decrypt:
xorl %eax,%eax
.byte 0xf3,0xc3
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,48 @@
#if defined(__x86_64__)
.text
.globl CRYPTO_rdrand
.hidden CRYPTO_rdrand
.type CRYPTO_rdrand,@function
.align 16
CRYPTO_rdrand:
xorq %rax,%rax
.byte 0x48, 0x0f, 0xc7, 0xf1
adcq %rax,%rax
movq %rcx,0(%rdi)
.byte 0xf3,0xc3
.globl CRYPTO_rdrand_multiple8_buf
.hidden CRYPTO_rdrand_multiple8_buf
.type CRYPTO_rdrand_multiple8_buf,@function
.align 16
CRYPTO_rdrand_multiple8_buf:
testq %rsi,%rsi
jz .Lout
movq $8,%rdx
.Lloop:
.byte 0x48, 0x0f, 0xc7, 0xf1
jnc .Lerr
movq %rcx,0(%rdi)
addq %rdx,%rdi
subq %rdx,%rsi
jnz .Lloop
.Lout:
movq $1,%rax
.byte 0xf3,0xc3
.Lerr:
xorq %rax,%rax
.byte 0xf3,0xc3
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,596 @@
#if defined(__x86_64__)
.text
.extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P
.globl asm_RC4
.hidden asm_RC4
.type asm_RC4,@function
.align 16
asm_RC4:
orq %rsi,%rsi
jne .Lentry
.byte 0xf3,0xc3
.Lentry:
pushq %rbx
pushq %r12
pushq %r13
.Lprologue:
movq %rsi,%r11
movq %rdx,%r12
movq %rcx,%r13
xorq %r10,%r10
xorq %rcx,%rcx
leaq 8(%rdi),%rdi
movb -8(%rdi),%r10b
movb -4(%rdi),%cl
cmpl $-1,256(%rdi)
je .LRC4_CHAR
movl OPENSSL_ia32cap_P(%rip),%r8d
xorq %rbx,%rbx
incb %r10b
subq %r10,%rbx
subq %r12,%r13
movl (%rdi,%r10,4),%eax
testq $-16,%r11
jz .Lloop1
btl $30,%r8d
jc .Lintel
andq $7,%rbx
leaq 1(%r10),%rsi
jz .Loop8
subq %rbx,%r11
.Loop8_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r12,%r13,1)
leaq 1(%r12),%r12
decq %rbx
jnz .Loop8_warmup
leaq 1(%r10),%rsi
jmp .Loop8
.align 16
.Loop8:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 0(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,0(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,4(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 8(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,8(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 12(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,12(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 16(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,16(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 20(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,20(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 24(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,24(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%sil
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl -4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,28(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%r10b
rorq $8,%r8
subq $8,%r11
xorq (%r12),%r8
movq %r8,(%r12,%r13,1)
leaq 8(%r12),%r12
testq $-8,%r11
jnz .Loop8
cmpq $0,%r11
jne .Lloop1
jmp .Lexit
.align 16
.Lintel:
testq $-32,%r11
jz .Lloop1
andq $15,%rbx
jz .Loop16_is_hot
subq %rbx,%r11
.Loop16_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r12,%r13,1)
leaq 1(%r12),%r12
decq %rbx
jnz .Loop16_warmup
movq %rcx,%rbx
xorq %rcx,%rcx
movb %bl,%cl
.Loop16_is_hot:
leaq (%rdi,%r10,4),%rsi
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
jmp .Loop16_enter
.align 16
.Loop16:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm2
psllq $8,%xmm1
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
pxor %xmm1,%xmm2
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
movdqu %xmm2,(%r12,%r13,1)
leaq 16(%r12),%r12
.Loop16_enter:
movl (%rdi,%rcx,4),%edx
pxor %xmm1,%xmm1
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 8(%rsi),%eax
movzbl %bl,%ebx
movl %edx,4(%rsi)
addb %al,%cl
pinsrw $0,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 12(%rsi),%ebx
movzbl %al,%eax
movl %edx,8(%rsi)
addb %bl,%cl
pinsrw $1,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 16(%rsi),%eax
movzbl %bl,%ebx
movl %edx,12(%rsi)
addb %al,%cl
pinsrw $1,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 20(%rsi),%ebx
movzbl %al,%eax
movl %edx,16(%rsi)
addb %bl,%cl
pinsrw $2,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 24(%rsi),%eax
movzbl %bl,%ebx
movl %edx,20(%rsi)
addb %al,%cl
pinsrw $2,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 28(%rsi),%ebx
movzbl %al,%eax
movl %edx,24(%rsi)
addb %bl,%cl
pinsrw $3,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 32(%rsi),%eax
movzbl %bl,%ebx
movl %edx,28(%rsi)
addb %al,%cl
pinsrw $3,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 36(%rsi),%ebx
movzbl %al,%eax
movl %edx,32(%rsi)
addb %bl,%cl
pinsrw $4,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 40(%rsi),%eax
movzbl %bl,%ebx
movl %edx,36(%rsi)
addb %al,%cl
pinsrw $4,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 44(%rsi),%ebx
movzbl %al,%eax
movl %edx,40(%rsi)
addb %bl,%cl
pinsrw $5,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 48(%rsi),%eax
movzbl %bl,%ebx
movl %edx,44(%rsi)
addb %al,%cl
pinsrw $5,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 52(%rsi),%ebx
movzbl %al,%eax
movl %edx,48(%rsi)
addb %bl,%cl
pinsrw $6,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 56(%rsi),%eax
movzbl %bl,%ebx
movl %edx,52(%rsi)
addb %al,%cl
pinsrw $6,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 60(%rsi),%ebx
movzbl %al,%eax
movl %edx,56(%rsi)
addb %bl,%cl
pinsrw $7,(%rdi,%rax,4),%xmm0
addb $16,%r10b
movdqu (%r12),%xmm2
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movzbl %bl,%ebx
movl %edx,60(%rsi)
leaq (%rdi,%r10,4),%rsi
pinsrw $7,(%rdi,%rbx,4),%xmm1
movl (%rsi),%eax
movq %rcx,%rbx
xorq %rcx,%rcx
subq $16,%r11
movb %bl,%cl
testq $-16,%r11
jnz .Loop16
psllq $8,%xmm1
pxor %xmm0,%xmm2
pxor %xmm1,%xmm2
movdqu %xmm2,(%r12,%r13,1)
leaq 16(%r12),%r12
cmpq $0,%r11
jne .Lloop1
jmp .Lexit
.align 16
.Lloop1:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r12,%r13,1)
leaq 1(%r12),%r12
decq %r11
jnz .Lloop1
jmp .Lexit
.align 16
.LRC4_CHAR:
addb $1,%r10b
movzbl (%rdi,%r10,1),%eax
testq $-8,%r11
jz .Lcloop1
jmp .Lcloop8
.align 16
.Lcloop8:
movl (%r12),%r8d
movl 4(%r12),%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov0
movq %rax,%rbx
.Lcmov0:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov1
movq %rbx,%rax
.Lcmov1:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov2
movq %rax,%rbx
.Lcmov2:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov3
movq %rbx,%rax
.Lcmov3:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov4
movq %rax,%rbx
.Lcmov4:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov5
movq %rbx,%rax
.Lcmov5:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov6
movq %rax,%rbx
.Lcmov6:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov7
movq %rbx,%rax
.Lcmov7:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
leaq -8(%r11),%r11
movl %r8d,(%r13)
leaq 8(%r12),%r12
movl %r9d,4(%r13)
leaq 8(%r13),%r13
testq $-8,%r11
jnz .Lcloop8
cmpq $0,%r11
jne .Lcloop1
jmp .Lexit
.align 16
.Lcloop1:
addb %al,%cl
movzbl %cl,%ecx
movzbl (%rdi,%rcx,1),%edx
movb %al,(%rdi,%rcx,1)
movb %dl,(%rdi,%r10,1)
addb %al,%dl
addb $1,%r10b
movzbl %dl,%edx
movzbl %r10b,%r10d
movzbl (%rdi,%rdx,1),%edx
movzbl (%rdi,%r10,1),%eax
xorb (%r12),%dl
leaq 1(%r12),%r12
movb %dl,(%r13)
leaq 1(%r13),%r13
subq $1,%r11
jnz .Lcloop1
jmp .Lexit
.align 16
.Lexit:
subb $1,%r10b
movl %r10d,-8(%rdi)
movl %ecx,-4(%rdi)
movq (%rsp),%r13
movq 8(%rsp),%r12
movq 16(%rsp),%rbx
addq $24,%rsp
.Lepilogue:
.byte 0xf3,0xc3
.size asm_RC4,.-asm_RC4
.globl asm_RC4_set_key
.hidden asm_RC4_set_key
.type asm_RC4_set_key,@function
.align 16
asm_RC4_set_key:
leaq 8(%rdi),%rdi
leaq (%rdx,%rsi,1),%rdx
negq %rsi
movq %rsi,%rcx
xorl %eax,%eax
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
movl OPENSSL_ia32cap_P(%rip),%r8d
btl $20,%r8d
jc .Lc1stloop
jmp .Lw1stloop
.align 16
.Lw1stloop:
movl %eax,(%rdi,%rax,4)
addb $1,%al
jnc .Lw1stloop
xorq %r9,%r9
xorq %r8,%r8
.align 16
.Lw2ndloop:
movl (%rdi,%r9,4),%r10d
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movl (%rdi,%r8,4),%r11d
cmovzq %rcx,%rsi
movl %r10d,(%rdi,%r8,4)
movl %r11d,(%rdi,%r9,4)
addb $1,%r9b
jnc .Lw2ndloop
jmp .Lexit_key
.align 16
.Lc1stloop:
movb %al,(%rdi,%rax,1)
addb $1,%al
jnc .Lc1stloop
xorq %r9,%r9
xorq %r8,%r8
.align 16
.Lc2ndloop:
movb (%rdi,%r9,1),%r10b
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movb (%rdi,%r8,1),%r11b
jnz .Lcnowrap
movq %rcx,%rsi
.Lcnowrap:
movb %r10b,(%rdi,%r8,1)
movb %r11b,(%rdi,%r9,1)
addb $1,%r9b
jnc .Lc2ndloop
movl $-1,256(%rdi)
.align 16
.Lexit_key:
xorl %eax,%eax
movl %eax,-8(%rdi)
movl %eax,-4(%rdi)
.byte 0xf3,0xc3
.size asm_RC4_set_key,.-asm_RC4_set_key
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,650 @@
#if defined(__i386__)
.file "vpaes-x86.S"
.text
.align 6,0x90
L_vpaes_consts:
.long 218628480,235210255,168496130,67568393
.long 252381056,17041926,33884169,51187212
.long 252645135,252645135,252645135,252645135
.long 1512730624,3266504856,1377990664,3401244816
.long 830229760,1275146365,2969422977,3447763452
.long 3411033600,2979783055,338359620,2782886510
.long 4209124096,907596821,221174255,1006095553
.long 191964160,3799684038,3164090317,1589111125
.long 182528256,1777043520,2877432650,3265356744
.long 1874708224,3503451415,3305285752,363511674
.long 1606117888,3487855781,1093350906,2384367825
.long 197121,67569157,134941193,202313229
.long 67569157,134941193,202313229,197121
.long 134941193,202313229,197121,67569157
.long 202313229,197121,67569157,134941193
.long 33619971,100992007,168364043,235736079
.long 235736079,33619971,100992007,168364043
.long 168364043,235736079,33619971,100992007
.long 100992007,168364043,235736079,33619971
.long 50462976,117835012,185207048,252579084
.long 252314880,51251460,117574920,184942860
.long 184682752,252054788,50987272,118359308
.long 118099200,185467140,251790600,50727180
.long 2946363062,528716217,1300004225,1881839624
.long 1532713819,1532713819,1532713819,1532713819
.long 3602276352,4288629033,3737020424,4153884961
.long 1354558464,32357713,2958822624,3775749553
.long 1201988352,132424512,1572796698,503232858
.long 2213177600,1597421020,4103937655,675398315
.long 2749646592,4273543773,1511898873,121693092
.long 3040248576,1103263732,2871565598,1608280554
.long 2236667136,2588920351,482954393,64377734
.long 3069987328,291237287,2117370568,3650299247
.long 533321216,3573750986,2572112006,1401264716
.long 1339849704,2721158661,548607111,3445553514
.long 2128193280,3054596040,2183486460,1257083700
.long 655635200,1165381986,3923443150,2344132524
.long 190078720,256924420,290342170,357187870
.long 1610966272,2263057382,4103205268,309794674
.long 2592527872,2233205587,1335446729,3402964816
.long 3973531904,3225098121,3002836325,1918774430
.long 3870401024,2102906079,2284471353,4117666579
.long 617007872,1021508343,366931923,691083277
.long 2528395776,3491914898,2968704004,1613121270
.long 3445188352,3247741094,844474987,4093578302
.long 651481088,1190302358,1689581232,574775300
.long 4289380608,206939853,2555985458,2489840491
.long 2130264064,327674451,3566485037,3349835193
.long 2470714624,316102159,3636825756,3393945945
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
.byte 118,101,114,115,105,116,121,41,0
.align 6,0x90
.private_extern __vpaes_preheat
.align 4
__vpaes_preheat:
addl (%esp),%ebp
movdqa -48(%ebp),%xmm7
movdqa -16(%ebp),%xmm6
ret
.private_extern __vpaes_encrypt_core
.align 4
__vpaes_encrypt_core:
movl $16,%ecx
movl 240(%edx),%eax
movdqa %xmm6,%xmm1
movdqa (%ebp),%xmm2
pandn %xmm0,%xmm1
pand %xmm6,%xmm0
movdqu (%edx),%xmm5
.byte 102,15,56,0,208
movdqa 16(%ebp),%xmm0
pxor %xmm5,%xmm2
psrld $4,%xmm1
addl $16,%edx
.byte 102,15,56,0,193
leal 192(%ebp),%ebx
pxor %xmm2,%xmm0
jmp L000enc_entry
.align 4,0x90
L001enc_loop:
movdqa 32(%ebp),%xmm4
movdqa 48(%ebp),%xmm0
.byte 102,15,56,0,226
.byte 102,15,56,0,195
pxor %xmm5,%xmm4
movdqa 64(%ebp),%xmm5
pxor %xmm4,%xmm0
movdqa -64(%ebx,%ecx,1),%xmm1
.byte 102,15,56,0,234
movdqa 80(%ebp),%xmm2
movdqa (%ebx,%ecx,1),%xmm4
.byte 102,15,56,0,211
movdqa %xmm0,%xmm3
pxor %xmm5,%xmm2
.byte 102,15,56,0,193
addl $16,%edx
pxor %xmm2,%xmm0
.byte 102,15,56,0,220
addl $16,%ecx
pxor %xmm0,%xmm3
.byte 102,15,56,0,193
andl $48,%ecx
subl $1,%eax
pxor %xmm3,%xmm0
L000enc_entry:
movdqa %xmm6,%xmm1
movdqa -32(%ebp),%xmm5
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm6,%xmm0
.byte 102,15,56,0,232
movdqa %xmm7,%xmm3
pxor %xmm1,%xmm0
.byte 102,15,56,0,217
movdqa %xmm7,%xmm4
pxor %xmm5,%xmm3
.byte 102,15,56,0,224
movdqa %xmm7,%xmm2
pxor %xmm5,%xmm4
.byte 102,15,56,0,211
movdqa %xmm7,%xmm3
pxor %xmm0,%xmm2
.byte 102,15,56,0,220
movdqu (%edx),%xmm5
pxor %xmm1,%xmm3
jnz L001enc_loop
movdqa 96(%ebp),%xmm4
movdqa 112(%ebp),%xmm0
.byte 102,15,56,0,226
pxor %xmm5,%xmm4
.byte 102,15,56,0,195
movdqa 64(%ebx,%ecx,1),%xmm1
pxor %xmm4,%xmm0
.byte 102,15,56,0,193
ret
.private_extern __vpaes_decrypt_core
.align 4
__vpaes_decrypt_core:
leal 608(%ebp),%ebx
movl 240(%edx),%eax
movdqa %xmm6,%xmm1
movdqa -64(%ebx),%xmm2
pandn %xmm0,%xmm1
movl %eax,%ecx
psrld $4,%xmm1
movdqu (%edx),%xmm5
shll $4,%ecx
pand %xmm6,%xmm0
.byte 102,15,56,0,208
movdqa -48(%ebx),%xmm0
xorl $48,%ecx
.byte 102,15,56,0,193
andl $48,%ecx
pxor %xmm5,%xmm2
movdqa 176(%ebp),%xmm5
pxor %xmm2,%xmm0
addl $16,%edx
leal -352(%ebx,%ecx,1),%ecx
jmp L002dec_entry
.align 4,0x90
L003dec_loop:
movdqa -32(%ebx),%xmm4
movdqa -16(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa (%ebx),%xmm4
pxor %xmm1,%xmm0
movdqa 16(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 32(%ebx),%xmm4
pxor %xmm1,%xmm0
movdqa 48(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 64(%ebx),%xmm4
pxor %xmm1,%xmm0
movdqa 80(%ebx),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
addl $16,%edx
.byte 102,15,58,15,237,12
pxor %xmm1,%xmm0
subl $1,%eax
L002dec_entry:
movdqa %xmm6,%xmm1
movdqa -32(%ebp),%xmm2
pandn %xmm0,%xmm1
pand %xmm6,%xmm0
psrld $4,%xmm1
.byte 102,15,56,0,208
movdqa %xmm7,%xmm3
pxor %xmm1,%xmm0
.byte 102,15,56,0,217
movdqa %xmm7,%xmm4
pxor %xmm2,%xmm3
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm7,%xmm2
.byte 102,15,56,0,211
movdqa %xmm7,%xmm3
pxor %xmm0,%xmm2
.byte 102,15,56,0,220
movdqu (%edx),%xmm0
pxor %xmm1,%xmm3
jnz L003dec_loop
movdqa 96(%ebx),%xmm4
.byte 102,15,56,0,226
pxor %xmm0,%xmm4
movdqa 112(%ebx),%xmm0
movdqa (%ecx),%xmm2
.byte 102,15,56,0,195
pxor %xmm4,%xmm0
.byte 102,15,56,0,194
ret
.private_extern __vpaes_schedule_core
.align 4
__vpaes_schedule_core:
addl (%esp),%ebp
movdqu (%esi),%xmm0
movdqa 320(%ebp),%xmm2
movdqa %xmm0,%xmm3
leal (%ebp),%ebx
movdqa %xmm2,4(%esp)
call __vpaes_schedule_transform
movdqa %xmm0,%xmm7
testl %edi,%edi
jnz L004schedule_am_decrypting
movdqu %xmm0,(%edx)
jmp L005schedule_go
L004schedule_am_decrypting:
movdqa 256(%ebp,%ecx,1),%xmm1
.byte 102,15,56,0,217
movdqu %xmm3,(%edx)
xorl $48,%ecx
L005schedule_go:
cmpl $192,%eax
ja L006schedule_256
je L007schedule_192
L008schedule_128:
movl $10,%eax
L009loop_schedule_128:
call __vpaes_schedule_round
decl %eax
jz L010schedule_mangle_last
call __vpaes_schedule_mangle
jmp L009loop_schedule_128
.align 4,0x90
L007schedule_192:
movdqu 8(%esi),%xmm0
call __vpaes_schedule_transform
movdqa %xmm0,%xmm6
pxor %xmm4,%xmm4
movhlps %xmm4,%xmm6
movl $4,%eax
L011loop_schedule_192:
call __vpaes_schedule_round
.byte 102,15,58,15,198,8
call __vpaes_schedule_mangle
call __vpaes_schedule_192_smear
call __vpaes_schedule_mangle
call __vpaes_schedule_round
decl %eax
jz L010schedule_mangle_last
call __vpaes_schedule_mangle
call __vpaes_schedule_192_smear
jmp L011loop_schedule_192
.align 4,0x90
L006schedule_256:
movdqu 16(%esi),%xmm0
call __vpaes_schedule_transform
movl $7,%eax
L012loop_schedule_256:
call __vpaes_schedule_mangle
movdqa %xmm0,%xmm6
call __vpaes_schedule_round
decl %eax
jz L010schedule_mangle_last
call __vpaes_schedule_mangle
pshufd $255,%xmm0,%xmm0
movdqa %xmm7,20(%esp)
movdqa %xmm6,%xmm7
call L_vpaes_schedule_low_round
movdqa 20(%esp),%xmm7
jmp L012loop_schedule_256
.align 4,0x90
L010schedule_mangle_last:
leal 384(%ebp),%ebx
testl %edi,%edi
jnz L013schedule_mangle_last_dec
movdqa 256(%ebp,%ecx,1),%xmm1
.byte 102,15,56,0,193
leal 352(%ebp),%ebx
addl $32,%edx
L013schedule_mangle_last_dec:
addl $-16,%edx
pxor 336(%ebp),%xmm0
call __vpaes_schedule_transform
movdqu %xmm0,(%edx)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
ret
.private_extern __vpaes_schedule_192_smear
.align 4
__vpaes_schedule_192_smear:
pshufd $128,%xmm6,%xmm1
pshufd $254,%xmm7,%xmm0
pxor %xmm1,%xmm6
pxor %xmm1,%xmm1
pxor %xmm0,%xmm6
movdqa %xmm6,%xmm0
movhlps %xmm1,%xmm6
ret
.private_extern __vpaes_schedule_round
.align 4
__vpaes_schedule_round:
movdqa 8(%esp),%xmm2
pxor %xmm1,%xmm1
.byte 102,15,58,15,202,15
.byte 102,15,58,15,210,15
pxor %xmm1,%xmm7
pshufd $255,%xmm0,%xmm0
.byte 102,15,58,15,192,1
movdqa %xmm2,8(%esp)
L_vpaes_schedule_low_round:
movdqa %xmm7,%xmm1
pslldq $4,%xmm7
pxor %xmm1,%xmm7
movdqa %xmm7,%xmm1
pslldq $8,%xmm7
pxor %xmm1,%xmm7
pxor 336(%ebp),%xmm7
movdqa -16(%ebp),%xmm4
movdqa -48(%ebp),%xmm5
movdqa %xmm4,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm4,%xmm0
movdqa -32(%ebp),%xmm2
.byte 102,15,56,0,208
pxor %xmm1,%xmm0
movdqa %xmm5,%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
movdqa %xmm5,%xmm4
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm5,%xmm2
.byte 102,15,56,0,211
pxor %xmm0,%xmm2
movdqa %xmm5,%xmm3
.byte 102,15,56,0,220
pxor %xmm1,%xmm3
movdqa 32(%ebp),%xmm4
.byte 102,15,56,0,226
movdqa 48(%ebp),%xmm0
.byte 102,15,56,0,195
pxor %xmm4,%xmm0
pxor %xmm7,%xmm0
movdqa %xmm0,%xmm7
ret
.private_extern __vpaes_schedule_transform
.align 4
__vpaes_schedule_transform:
movdqa -16(%ebp),%xmm2
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
movdqa (%ebx),%xmm2
.byte 102,15,56,0,208
movdqa 16(%ebx),%xmm0
.byte 102,15,56,0,193
pxor %xmm2,%xmm0
ret
.private_extern __vpaes_schedule_mangle
.align 4
__vpaes_schedule_mangle:
movdqa %xmm0,%xmm4
movdqa 128(%ebp),%xmm5
testl %edi,%edi
jnz L014schedule_mangle_dec
addl $16,%edx
pxor 336(%ebp),%xmm4
.byte 102,15,56,0,229
movdqa %xmm4,%xmm3
.byte 102,15,56,0,229
pxor %xmm4,%xmm3
.byte 102,15,56,0,229
pxor %xmm4,%xmm3
jmp L015schedule_mangle_both
.align 4,0x90
L014schedule_mangle_dec:
movdqa -16(%ebp),%xmm2
leal 416(%ebp),%esi
movdqa %xmm2,%xmm1
pandn %xmm4,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm4
movdqa (%esi),%xmm2
.byte 102,15,56,0,212
movdqa 16(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 32(%esi),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 48(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 64(%esi),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 80(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 96(%esi),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 112(%esi),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
addl $-16,%edx
L015schedule_mangle_both:
movdqa 256(%ebp,%ecx,1),%xmm1
.byte 102,15,56,0,217
addl $-16,%ecx
andl $48,%ecx
movdqu %xmm3,(%edx)
ret
.globl _vpaes_set_encrypt_key
.private_extern _vpaes_set_encrypt_key
.align 4
_vpaes_set_encrypt_key:
L_vpaes_set_encrypt_key_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%eax
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movl %eax,%ebx
shrl $5,%ebx
addl $5,%ebx
movl %ebx,240(%edx)
movl $48,%ecx
movl $0,%edi
leal L_vpaes_consts+0x30-L016pic_point,%ebp
call __vpaes_schedule_core
L016pic_point:
movl 48(%esp),%esp
xorl %eax,%eax
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _vpaes_set_decrypt_key
.private_extern _vpaes_set_decrypt_key
.align 4
_vpaes_set_decrypt_key:
L_vpaes_set_decrypt_key_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%eax
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movl %eax,%ebx
shrl $5,%ebx
addl $5,%ebx
movl %ebx,240(%edx)
shll $4,%ebx
leal 16(%edx,%ebx,1),%edx
movl $1,%edi
movl %eax,%ecx
shrl $1,%ecx
andl $32,%ecx
xorl $32,%ecx
leal L_vpaes_consts+0x30-L017pic_point,%ebp
call __vpaes_schedule_core
L017pic_point:
movl 48(%esp),%esp
xorl %eax,%eax
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _vpaes_encrypt
.private_extern _vpaes_encrypt
.align 4
_vpaes_encrypt:
L_vpaes_encrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
leal L_vpaes_consts+0x30-L018pic_point,%ebp
call __vpaes_preheat
L018pic_point:
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%edi
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movdqu (%esi),%xmm0
call __vpaes_encrypt_core
movdqu %xmm0,(%edi)
movl 48(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _vpaes_decrypt
.private_extern _vpaes_decrypt
.align 4
_vpaes_decrypt:
L_vpaes_decrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
leal L_vpaes_consts+0x30-L019pic_point,%ebp
call __vpaes_preheat
L019pic_point:
movl 20(%esp),%esi
leal -56(%esp),%ebx
movl 24(%esp),%edi
andl $-16,%ebx
movl 28(%esp),%edx
xchgl %esp,%ebx
movl %ebx,48(%esp)
movdqu (%esi),%xmm0
call __vpaes_decrypt_core
movdqu %xmm0,(%edi)
movl 48(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _vpaes_cbc_encrypt
.private_extern _vpaes_cbc_encrypt
.align 4
_vpaes_cbc_encrypt:
L_vpaes_cbc_encrypt_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%esi
movl 24(%esp),%edi
movl 28(%esp),%eax
movl 32(%esp),%edx
subl $16,%eax
jc L020cbc_abort
leal -56(%esp),%ebx
movl 36(%esp),%ebp
andl $-16,%ebx
movl 40(%esp),%ecx
xchgl %esp,%ebx
movdqu (%ebp),%xmm1
subl %esi,%edi
movl %ebx,48(%esp)
movl %edi,(%esp)
movl %edx,4(%esp)
movl %ebp,8(%esp)
movl %eax,%edi
leal L_vpaes_consts+0x30-L021pic_point,%ebp
call __vpaes_preheat
L021pic_point:
cmpl $0,%ecx
je L022cbc_dec_loop
jmp L023cbc_enc_loop
.align 4,0x90
L023cbc_enc_loop:
movdqu (%esi),%xmm0
pxor %xmm1,%xmm0
call __vpaes_encrypt_core
movl (%esp),%ebx
movl 4(%esp),%edx
movdqa %xmm0,%xmm1
movdqu %xmm0,(%ebx,%esi,1)
leal 16(%esi),%esi
subl $16,%edi
jnc L023cbc_enc_loop
jmp L024cbc_done
.align 4,0x90
L022cbc_dec_loop:
movdqu (%esi),%xmm0
movdqa %xmm1,16(%esp)
movdqa %xmm0,32(%esp)
call __vpaes_decrypt_core
movl (%esp),%ebx
movl 4(%esp),%edx
pxor 16(%esp),%xmm0
movdqa 32(%esp),%xmm1
movdqu %xmm0,(%ebx,%esi,1)
leal 16(%esi),%esi
subl $16,%edi
jnc L022cbc_dec_loop
L024cbc_done:
movl 8(%esp),%ebx
movl 48(%esp),%esp
movdqu %xmm1,(%ebx)
L020cbc_abort:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,462 @@
#if defined(__i386__)
.file "src/crypto/bn/asm/x86-mont.S"
.text
.globl _bn_mul_mont
.private_extern _bn_mul_mont
.align 4
_bn_mul_mont:
L_bn_mul_mont_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %eax,%eax
movl 40(%esp),%edi
cmpl $4,%edi
jl L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
movl %esp,%ebp
addl $2,%edi
negl %edi
leal -32(%esp,%edi,4),%esp
negl %edi
movl %esp,%eax
subl %edx,%eax
andl $2047,%eax
subl %eax,%esp
xorl %esp,%edx
andl $2048,%edx
xorl $2048,%edx
subl %edx,%esp
andl $-64,%esp
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edx
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
movl %edx,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %ebp,24(%esp)
call L001PIC_me_up
L001PIC_me_up:
popl %eax
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc L002non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
movl 12(%esp),%edi
movl 16(%esp),%ebp
xorl %edx,%edx
xorl %ecx,%ecx
movd (%edi),%mm4
movd (%esi),%mm5
movd (%ebp),%mm3
pmuludq %mm4,%mm5
movq %mm5,%mm2
movq %mm5,%mm0
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
incl %ecx
.align 4,0x90
L0031st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl L0031st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
L004outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
movd 32(%esp),%mm6
movd (%ebp),%mm3
pmuludq %mm4,%mm5
paddq %mm6,%mm5
movq %mm5,%mm0
movq %mm5,%mm2
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 36(%esp),%mm6
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm6,%mm2
incl %ecx
decl %ebx
L005inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
movd 36(%esp,%ecx,4),%mm6
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz L005inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
movd 36(%esp,%ebx,4),%mm6
paddq %mm2,%mm3
paddq %mm6,%mm3
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle L004outer
emms
jmp L006common_tail
.align 4,0x90
L002non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
xorl %ecx,%ecx
movl %esi,%edx
andl $1,%ebp
subl %edi,%edx
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz L007bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 4,0x90
L008mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl L008mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
addl %ebp,%eax
movl 16(%esp),%esi
adcl $0,%edx
imull 32(%esp),%edi
movl %eax,32(%esp,%ebx,4)
xorl %ecx,%ecx
movl %edx,36(%esp,%ebx,4)
movl %ecx,40(%esp,%ebx,4)
movl (%esi),%eax
mull %edi
addl 32(%esp),%eax
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp L0092ndmadd
.align 4,0x90
L0101stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl L0101stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
movl 20(%esp),%edi
adcl $0,%edx
movl 16(%esp),%esi
addl %eax,%ebp
adcl $0,%edx
imull 32(%esp),%edi
xorl %ecx,%ecx
addl 36(%esp,%ebx,4),%edx
movl %ebp,32(%esp,%ebx,4)
adcl $0,%ecx
movl (%esi),%eax
movl %edx,36(%esp,%ebx,4)
movl %ecx,40(%esp,%ebx,4)
mull %edi
addl 32(%esp),%eax
movl 4(%esi),%eax
adcl $0,%edx
movl $1,%ecx
.align 4,0x90
L0092ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl L0092ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
adcl $0,%edx
movl %ebp,28(%esp,%ebx,4)
xorl %eax,%eax
movl 12(%esp),%ecx
addl 36(%esp,%ebx,4),%edx
adcl 40(%esp,%ebx,4),%eax
leal 4(%ecx),%ecx
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je L006common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp L0101stmadd
.align 4,0x90
L007bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
mull %edi
movl %eax,32(%esp)
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
incl %ecx
.align 4,0x90
L011sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
leal 1(%ecx),%ecx
adcl $0,%edx
leal (%ebx,%eax,2),%ebp
shrl $31,%eax
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl L011sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
movl 20(%esp),%edi
adcl $0,%edx
movl 16(%esp),%esi
leal (%ebx,%eax,2),%ebp
imull 32(%esp),%edi
shrl $31,%eax
movl %ebp,32(%esp,%ecx,4)
leal (%eax,%edx,2),%ebp
movl (%esi),%eax
shrl $31,%edx
movl %ebp,36(%esp,%ecx,4)
movl %edx,40(%esp,%ecx,4)
mull %edi
addl 32(%esp),%eax
movl %ecx,%ebx
adcl $0,%edx
movl 4(%esi),%eax
movl $1,%ecx
.align 4,0x90
L0123rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
movl 4(%esi,%ecx,4),%eax
adcl $0,%edx
movl %ebp,28(%esp,%ecx,4)
movl %edx,%ebp
mull %edi
addl 36(%esp,%ecx,4),%ebp
leal 2(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl L0123rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
adcl $0,%edx
movl %ebp,28(%esp,%ebx,4)
movl 12(%esp),%ecx
xorl %eax,%eax
movl 8(%esp),%esi
addl 36(%esp,%ebx,4),%edx
adcl 40(%esp,%ebx,4),%eax
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je L006common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
movl %ecx,12(%esp)
mull %edi
addl 32(%esp,%ecx,4),%eax
adcl $0,%edx
movl %eax,32(%esp,%ecx,4)
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je L013sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 4,0x90
L014sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
leal (%eax,%eax,1),%ebp
adcl $0,%edx
shrl $31,%eax
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%eax
addl %ebx,%ebp
adcl $0,%eax
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle L014sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
L013sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
addl 32(%esp,%ecx,4),%edx
movl (%esi),%eax
adcl $0,%ebp
movl %edx,32(%esp,%ecx,4)
movl %ebp,36(%esp,%ecx,4)
mull %edi
addl 32(%esp),%eax
leal -1(%ecx),%ebx
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp L0123rdmadd
.align 4,0x90
L006common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
movl (%esi),%eax
movl %ebx,%ecx
xorl %edx,%edx
.align 4,0x90
L015sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge L015sub
sbbl $0,%eax
.align 4,0x90
L016copy:
movl (%esi,%ebx,4),%edx
movl (%edi,%ebx,4),%ebp
xorl %ebp,%edx
andl %eax,%edx
xorl %ebp,%edx
movl %ecx,(%esi,%ebx,4)
movl %edx,(%edi,%ebx,4)
decl %ebx
jge L016copy
movl 24(%esp),%esp
movl $1,%eax
L000just_leave:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
.section __IMPORT,__pointers,non_lazy_symbol_pointers
L_OPENSSL_ia32cap_P$non_lazy_ptr:
.indirect_symbol _OPENSSL_ia32cap_P
.long 0
#endif

View File

@ -0,0 +1,680 @@
#if defined(__i386__)
.file "src/crypto/md5/asm/md5-586.S"
.text
.globl _md5_block_asm_data_order
.private_extern _md5_block_asm_data_order
.align 4
_md5_block_asm_data_order:
L_md5_block_asm_data_order_begin:
pushl %esi
pushl %edi
movl 12(%esp),%edi
movl 16(%esp),%esi
movl 20(%esp),%ecx
pushl %ebp
shll $6,%ecx
pushl %ebx
addl %esi,%ecx
subl $64,%ecx
movl (%edi),%eax
pushl %ecx
movl 4(%edi),%ebx
movl 8(%edi),%ecx
movl 12(%edi),%edx
L000start:
# R0 section
movl %ecx,%edi
movl (%esi),%ebp
# R0 0
xorl %edx,%edi
andl %ebx,%edi
leal 3614090360(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 4(%esi),%ebp
addl %ebx,%eax
# R0 1
xorl %ecx,%edi
andl %eax,%edi
leal 3905402710(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 8(%esi),%ebp
addl %eax,%edx
# R0 2
xorl %ebx,%edi
andl %edx,%edi
leal 606105819(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 12(%esi),%ebp
addl %edx,%ecx
# R0 3
xorl %eax,%edi
andl %ecx,%edi
leal 3250441966(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 16(%esi),%ebp
addl %ecx,%ebx
# R0 4
xorl %edx,%edi
andl %ebx,%edi
leal 4118548399(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 20(%esi),%ebp
addl %ebx,%eax
# R0 5
xorl %ecx,%edi
andl %eax,%edi
leal 1200080426(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 24(%esi),%ebp
addl %eax,%edx
# R0 6
xorl %ebx,%edi
andl %edx,%edi
leal 2821735955(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 28(%esi),%ebp
addl %edx,%ecx
# R0 7
xorl %eax,%edi
andl %ecx,%edi
leal 4249261313(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 32(%esi),%ebp
addl %ecx,%ebx
# R0 8
xorl %edx,%edi
andl %ebx,%edi
leal 1770035416(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 36(%esi),%ebp
addl %ebx,%eax
# R0 9
xorl %ecx,%edi
andl %eax,%edi
leal 2336552879(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 40(%esi),%ebp
addl %eax,%edx
# R0 10
xorl %ebx,%edi
andl %edx,%edi
leal 4294925233(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 44(%esi),%ebp
addl %edx,%ecx
# R0 11
xorl %eax,%edi
andl %ecx,%edi
leal 2304563134(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 48(%esi),%ebp
addl %ecx,%ebx
# R0 12
xorl %edx,%edi
andl %ebx,%edi
leal 1804603682(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 52(%esi),%ebp
addl %ebx,%eax
# R0 13
xorl %ecx,%edi
andl %eax,%edi
leal 4254626195(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 56(%esi),%ebp
addl %eax,%edx
# R0 14
xorl %ebx,%edi
andl %edx,%edi
leal 2792965006(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 60(%esi),%ebp
addl %edx,%ecx
# R0 15
xorl %eax,%edi
andl %ecx,%edi
leal 1236535329(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 4(%esi),%ebp
addl %ecx,%ebx
# R1 section
# R1 16
leal 4129170786(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 24(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
# R1 17
leal 3225465664(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 44(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
# R1 18
leal 643717713(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl (%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
# R1 19
leal 3921069994(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 20(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
# R1 20
leal 3593408605(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 40(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
# R1 21
leal 38016083(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 60(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
# R1 22
leal 3634488961(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 16(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
# R1 23
leal 3889429448(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 36(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
# R1 24
leal 568446438(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 56(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
# R1 25
leal 3275163606(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 12(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
# R1 26
leal 4107603335(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 32(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
# R1 27
leal 1163531501(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 52(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
# R1 28
leal 2850285829(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 8(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
# R1 29
leal 4243563512(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 28(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
# R1 30
leal 1735328473(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 48(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
# R1 31
leal 2368359562(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 20(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
# R2 section
# R2 32
xorl %edx,%edi
xorl %ebx,%edi
leal 4294588738(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 32(%esi),%ebp
movl %ebx,%edi
# R2 33
leal 2272392833(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 44(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
# R2 34
xorl %ebx,%edi
xorl %edx,%edi
leal 1839030562(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 56(%esi),%ebp
movl %edx,%edi
# R2 35
leal 4259657740(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 4(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
# R2 36
xorl %edx,%edi
xorl %ebx,%edi
leal 2763975236(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 16(%esi),%ebp
movl %ebx,%edi
# R2 37
leal 1272893353(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 28(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
# R2 38
xorl %ebx,%edi
xorl %edx,%edi
leal 4139469664(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 40(%esi),%ebp
movl %edx,%edi
# R2 39
leal 3200236656(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 52(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
# R2 40
xorl %edx,%edi
xorl %ebx,%edi
leal 681279174(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl (%esi),%ebp
movl %ebx,%edi
# R2 41
leal 3936430074(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 12(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
# R2 42
xorl %ebx,%edi
xorl %edx,%edi
leal 3572445317(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 24(%esi),%ebp
movl %edx,%edi
# R2 43
leal 76029189(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 36(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
# R2 44
xorl %edx,%edi
xorl %ebx,%edi
leal 3654602809(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 48(%esi),%ebp
movl %ebx,%edi
# R2 45
leal 3873151461(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 60(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
# R2 46
xorl %ebx,%edi
xorl %edx,%edi
leal 530742520(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 8(%esi),%ebp
movl %edx,%edi
# R2 47
leal 3299628645(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl (%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $23,%ebx
addl %ecx,%ebx
# R3 section
# R3 48
xorl %edx,%edi
orl %ebx,%edi
leal 4096336452(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 28(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
# R3 49
orl %eax,%edi
leal 1126891415(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 56(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
# R3 50
orl %edx,%edi
leal 2878612391(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 20(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
# R3 51
orl %ecx,%edi
leal 4237533241(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 48(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
# R3 52
orl %ebx,%edi
leal 1700485571(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 12(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
# R3 53
orl %eax,%edi
leal 2399980690(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 40(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
# R3 54
orl %edx,%edi
leal 4293915773(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 4(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
# R3 55
orl %ecx,%edi
leal 2240044497(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 32(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
# R3 56
orl %ebx,%edi
leal 1873313359(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 60(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
# R3 57
orl %eax,%edi
leal 4264355552(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 24(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
# R3 58
orl %edx,%edi
leal 2734768916(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 52(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
# R3 59
orl %ecx,%edi
leal 1309151649(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 16(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
# R3 60
orl %ebx,%edi
leal 4149444226(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 44(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
# R3 61
orl %eax,%edi
leal 3174756917(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 8(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
# R3 62
orl %edx,%edi
leal 718787259(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 36(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
# R3 63
orl %ecx,%edi
leal 3951481745(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 24(%esp),%ebp
addl %edi,%ebx
addl $64,%esi
roll $21,%ebx
movl (%ebp),%edi
addl %ecx,%ebx
addl %edi,%eax
movl 4(%ebp),%edi
addl %edi,%ebx
movl 8(%ebp),%edi
addl %edi,%ecx
movl 12(%ebp),%edi
addl %edi,%edx
movl %eax,(%ebp)
movl %ebx,4(%ebp)
movl (%esp),%edi
movl %ecx,8(%ebp)
movl %edx,12(%ebp)
cmpl %esi,%edi
jae L000start
popl %eax
popl %ebx
popl %ebp
popl %edi
popl %esi
ret
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,383 @@
#if defined(__i386__)
.file "rc4-586.S"
.text
.globl _asm_RC4
.private_extern _asm_RC4
.align 4
_asm_RC4:
L_asm_RC4_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebp
xorl %eax,%eax
xorl %ebx,%ebx
cmpl $0,%edx
je L000abort
movb (%edi),%al
movb 4(%edi),%bl
addl $8,%edi
leal (%esi,%edx,1),%ecx
subl %esi,%ebp
movl %ecx,24(%esp)
incb %al
cmpl $-1,256(%edi)
je L001RC4_CHAR
movl (%edi,%eax,4),%ecx
andl $-4,%edx
jz L002loop1
movl %ebp,32(%esp)
testl $-8,%edx
jz L003go4loop4
call L004PIC_me_up
L004PIC_me_up:
popl %ebp
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L004PIC_me_up(%ebp),%ebp
btl $26,(%ebp)
jnc L003go4loop4
movl 32(%esp),%ebp
andl $-8,%edx
leal -8(%esi,%edx,1),%edx
movl %edx,-4(%edi)
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
movq (%esi),%mm0
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm2
jmp L005loop_mmx_enter
.align 4,0x90
L006loop_mmx:
addb %cl,%bl
psllq $56,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movq (%esi),%mm0
movq %mm2,-8(%ebp,%esi,1)
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm2
L005loop_mmx_enter:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm0,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $8,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $16,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $24,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $32,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $40,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
addb %cl,%bl
psllq $48,%mm1
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
incl %eax
addl %ecx,%edx
movzbl %al,%eax
movzbl %dl,%edx
pxor %mm1,%mm2
movl (%edi,%eax,4),%ecx
movd (%edi,%edx,4),%mm1
movl %ebx,%edx
xorl %ebx,%ebx
movb %dl,%bl
cmpl -4(%edi),%esi
leal 8(%esi),%esi
jb L006loop_mmx
psllq $56,%mm1
pxor %mm1,%mm2
movq %mm2,-8(%ebp,%esi,1)
emms
cmpl 24(%esp),%esi
je L007done
jmp L002loop1
.align 4,0x90
L003go4loop4:
leal -4(%esi,%edx,1),%edx
movl %edx,28(%esp)
L008loop4:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
movl (%edi,%eax,4),%ecx
movl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl (%edi,%eax,4),%ecx
orl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl (%edi,%eax,4),%ecx
orl (%edi,%edx,4),%ebp
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
rorl $8,%ebp
movl 32(%esp),%ecx
orl (%edi,%edx,4),%ebp
rorl $8,%ebp
xorl (%esi),%ebp
cmpl 28(%esp),%esi
movl %ebp,(%ecx,%esi,1)
leal 4(%esi),%esi
movl (%edi,%eax,4),%ecx
jb L008loop4
cmpl 24(%esp),%esi
je L007done
movl 32(%esp),%ebp
.align 4,0x90
L002loop1:
addb %cl,%bl
movl (%edi,%ebx,4),%edx
movl %ecx,(%edi,%ebx,4)
movl %edx,(%edi,%eax,4)
addl %ecx,%edx
incb %al
andl $255,%edx
movl (%edi,%edx,4),%edx
xorb (%esi),%dl
leal 1(%esi),%esi
movl (%edi,%eax,4),%ecx
cmpl 24(%esp),%esi
movb %dl,-1(%ebp,%esi,1)
jb L002loop1
jmp L007done
.align 4,0x90
L001RC4_CHAR:
movzbl (%edi,%eax,1),%ecx
L009cloop1:
addb %cl,%bl
movzbl (%edi,%ebx,1),%edx
movb %cl,(%edi,%ebx,1)
movb %dl,(%edi,%eax,1)
addb %cl,%dl
movzbl (%edi,%edx,1),%edx
addb $1,%al
xorb (%esi),%dl
leal 1(%esi),%esi
movzbl (%edi,%eax,1),%ecx
cmpl 24(%esp),%esi
movb %dl,-1(%ebp,%esi,1)
jb L009cloop1
L007done:
decb %al
movl %ebx,-4(%edi)
movb %al,-8(%edi)
L000abort:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _asm_RC4_set_key
.private_extern _asm_RC4_set_key
.align 4
_asm_RC4_set_key:
L_asm_RC4_set_key_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%ebp
movl 28(%esp),%esi
call L010PIC_me_up
L010PIC_me_up:
popl %edx
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%edx),%edx
leal 8(%edi),%edi
leal (%esi,%ebp,1),%esi
negl %ebp
xorl %eax,%eax
movl %ebp,-4(%edi)
btl $20,(%edx)
jc L011c1stloop
.align 4,0x90
L012w1stloop:
movl %eax,(%edi,%eax,4)
addb $1,%al
jnc L012w1stloop
xorl %ecx,%ecx
xorl %edx,%edx
.align 4,0x90
L013w2ndloop:
movl (%edi,%ecx,4),%eax
addb (%esi,%ebp,1),%dl
addb %al,%dl
addl $1,%ebp
movl (%edi,%edx,4),%ebx
jnz L014wnowrap
movl -4(%edi),%ebp
L014wnowrap:
movl %eax,(%edi,%edx,4)
movl %ebx,(%edi,%ecx,4)
addb $1,%cl
jnc L013w2ndloop
jmp L015exit
.align 4,0x90
L011c1stloop:
movb %al,(%edi,%eax,1)
addb $1,%al
jnc L011c1stloop
xorl %ecx,%ecx
xorl %edx,%edx
xorl %ebx,%ebx
.align 4,0x90
L016c2ndloop:
movb (%edi,%ecx,1),%al
addb (%esi,%ebp,1),%dl
addb %al,%dl
addl $1,%ebp
movb (%edi,%edx,1),%bl
jnz L017cnowrap
movl -4(%edi),%ebp
L017cnowrap:
movb %al,(%edi,%edx,1)
movb %bl,(%edi,%ecx,1)
addb $1,%cl
jnc L016c2ndloop
movl $-1,256(%edi)
L015exit:
xorl %eax,%eax
movl %eax,-8(%edi)
movl %eax,-4(%edi)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _RC4_options
.private_extern _RC4_options
.align 4
_RC4_options:
L_RC4_options_begin:
call L018pic_point
L018pic_point:
popl %eax
leal L019opts-L018pic_point(%eax),%eax
call L020PIC_me_up
L020PIC_me_up:
popl %edx
movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L020PIC_me_up(%edx),%edx
movl (%edx),%edx
btl $20,%edx
jc L0211xchar
btl $26,%edx
jnc L022ret
addl $25,%eax
ret
L0211xchar:
addl $12,%eax
L022ret:
ret
.align 6,0x90
L019opts:
.byte 114,99,52,40,52,120,44,105,110,116,41,0
.byte 114,99,52,40,49,120,44,99,104,97,114,41,0
.byte 114,99,52,40,56,120,44,109,109,120,41,0
.byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 6,0x90
.section __IMPORT,__pointers,non_lazy_symbol_pointers
L_OPENSSL_ia32cap_P$non_lazy_ptr:
.indirect_symbol _OPENSSL_ia32cap_P
.long 0
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,834 @@
#if defined(__x86_64__)
.text
.p2align 4
_vpaes_encrypt_core:
movq %rdx,%r9
movq $16,%r11
movl 240(%rdx),%eax
movdqa %xmm9,%xmm1
movdqa L$k_ipt(%rip),%xmm2
pandn %xmm0,%xmm1
movdqu (%r9),%xmm5
psrld $4,%xmm1
pand %xmm9,%xmm0
.byte 102,15,56,0,208
movdqa L$k_ipt+16(%rip),%xmm0
.byte 102,15,56,0,193
pxor %xmm5,%xmm2
addq $16,%r9
pxor %xmm2,%xmm0
leaq L$k_mc_backward(%rip),%r10
jmp L$enc_entry
.p2align 4
L$enc_loop:
movdqa %xmm13,%xmm4
movdqa %xmm12,%xmm0
.byte 102,15,56,0,226
.byte 102,15,56,0,195
pxor %xmm5,%xmm4
movdqa %xmm15,%xmm5
pxor %xmm4,%xmm0
movdqa -64(%r11,%r10,1),%xmm1
.byte 102,15,56,0,234
movdqa (%r11,%r10,1),%xmm4
movdqa %xmm14,%xmm2
.byte 102,15,56,0,211
movdqa %xmm0,%xmm3
pxor %xmm5,%xmm2
.byte 102,15,56,0,193
addq $16,%r9
pxor %xmm2,%xmm0
.byte 102,15,56,0,220
addq $16,%r11
pxor %xmm0,%xmm3
.byte 102,15,56,0,193
andq $48,%r11
subq $1,%rax
pxor %xmm3,%xmm0
L$enc_entry:
movdqa %xmm9,%xmm1
movdqa %xmm11,%xmm5
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm9,%xmm0
.byte 102,15,56,0,232
movdqa %xmm10,%xmm3
pxor %xmm1,%xmm0
.byte 102,15,56,0,217
movdqa %xmm10,%xmm4
pxor %xmm5,%xmm3
.byte 102,15,56,0,224
movdqa %xmm10,%xmm2
pxor %xmm5,%xmm4
.byte 102,15,56,0,211
movdqa %xmm10,%xmm3
pxor %xmm0,%xmm2
.byte 102,15,56,0,220
movdqu (%r9),%xmm5
pxor %xmm1,%xmm3
jnz L$enc_loop
movdqa -96(%r10),%xmm4
movdqa -80(%r10),%xmm0
.byte 102,15,56,0,226
pxor %xmm5,%xmm4
.byte 102,15,56,0,195
movdqa 64(%r11,%r10,1),%xmm1
pxor %xmm4,%xmm0
.byte 102,15,56,0,193
.byte 0xf3,0xc3
.p2align 4
_vpaes_decrypt_core:
movq %rdx,%r9
movl 240(%rdx),%eax
movdqa %xmm9,%xmm1
movdqa L$k_dipt(%rip),%xmm2
pandn %xmm0,%xmm1
movq %rax,%r11
psrld $4,%xmm1
movdqu (%r9),%xmm5
shlq $4,%r11
pand %xmm9,%xmm0
.byte 102,15,56,0,208
movdqa L$k_dipt+16(%rip),%xmm0
xorq $48,%r11
leaq L$k_dsbd(%rip),%r10
.byte 102,15,56,0,193
andq $48,%r11
pxor %xmm5,%xmm2
movdqa L$k_mc_forward+48(%rip),%xmm5
pxor %xmm2,%xmm0
addq $16,%r9
addq %r10,%r11
jmp L$dec_entry
.p2align 4
L$dec_loop:
movdqa -32(%r10),%xmm4
movdqa -16(%r10),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 0(%r10),%xmm4
pxor %xmm1,%xmm0
movdqa 16(%r10),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 32(%r10),%xmm4
pxor %xmm1,%xmm0
movdqa 48(%r10),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
movdqa 64(%r10),%xmm4
pxor %xmm1,%xmm0
movdqa 80(%r10),%xmm1
.byte 102,15,56,0,226
.byte 102,15,56,0,197
.byte 102,15,56,0,203
pxor %xmm4,%xmm0
addq $16,%r9
.byte 102,15,58,15,237,12
pxor %xmm1,%xmm0
subq $1,%rax
L$dec_entry:
movdqa %xmm9,%xmm1
pandn %xmm0,%xmm1
movdqa %xmm11,%xmm2
psrld $4,%xmm1
pand %xmm9,%xmm0
.byte 102,15,56,0,208
movdqa %xmm10,%xmm3
pxor %xmm1,%xmm0
.byte 102,15,56,0,217
movdqa %xmm10,%xmm4
pxor %xmm2,%xmm3
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm10,%xmm2
.byte 102,15,56,0,211
movdqa %xmm10,%xmm3
pxor %xmm0,%xmm2
.byte 102,15,56,0,220
movdqu (%r9),%xmm0
pxor %xmm1,%xmm3
jnz L$dec_loop
movdqa 96(%r10),%xmm4
.byte 102,15,56,0,226
pxor %xmm0,%xmm4
movdqa 112(%r10),%xmm0
movdqa -352(%r11),%xmm2
.byte 102,15,56,0,195
pxor %xmm4,%xmm0
.byte 102,15,56,0,194
.byte 0xf3,0xc3
.p2align 4
_vpaes_schedule_core:
call _vpaes_preheat
movdqa L$k_rcon(%rip),%xmm8
movdqu (%rdi),%xmm0
movdqa %xmm0,%xmm3
leaq L$k_ipt(%rip),%r11
call _vpaes_schedule_transform
movdqa %xmm0,%xmm7
leaq L$k_sr(%rip),%r10
testq %rcx,%rcx
jnz L$schedule_am_decrypting
movdqu %xmm0,(%rdx)
jmp L$schedule_go
L$schedule_am_decrypting:
movdqa (%r8,%r10,1),%xmm1
.byte 102,15,56,0,217
movdqu %xmm3,(%rdx)
xorq $48,%r8
L$schedule_go:
cmpl $192,%esi
ja L$schedule_256
je L$schedule_192
L$schedule_128:
movl $10,%esi
L$oop_schedule_128:
call _vpaes_schedule_round
decq %rsi
jz L$schedule_mangle_last
call _vpaes_schedule_mangle
jmp L$oop_schedule_128
.p2align 4
L$schedule_192:
movdqu 8(%rdi),%xmm0
call _vpaes_schedule_transform
movdqa %xmm0,%xmm6
pxor %xmm4,%xmm4
movhlps %xmm4,%xmm6
movl $4,%esi
L$oop_schedule_192:
call _vpaes_schedule_round
.byte 102,15,58,15,198,8
call _vpaes_schedule_mangle
call _vpaes_schedule_192_smear
call _vpaes_schedule_mangle
call _vpaes_schedule_round
decq %rsi
jz L$schedule_mangle_last
call _vpaes_schedule_mangle
call _vpaes_schedule_192_smear
jmp L$oop_schedule_192
.p2align 4
L$schedule_256:
movdqu 16(%rdi),%xmm0
call _vpaes_schedule_transform
movl $7,%esi
L$oop_schedule_256:
call _vpaes_schedule_mangle
movdqa %xmm0,%xmm6
call _vpaes_schedule_round
decq %rsi
jz L$schedule_mangle_last
call _vpaes_schedule_mangle
pshufd $255,%xmm0,%xmm0
movdqa %xmm7,%xmm5
movdqa %xmm6,%xmm7
call _vpaes_schedule_low_round
movdqa %xmm5,%xmm7
jmp L$oop_schedule_256
.p2align 4
L$schedule_mangle_last:
leaq L$k_deskew(%rip),%r11
testq %rcx,%rcx
jnz L$schedule_mangle_last_dec
movdqa (%r8,%r10,1),%xmm1
.byte 102,15,56,0,193
leaq L$k_opt(%rip),%r11
addq $32,%rdx
L$schedule_mangle_last_dec:
addq $-16,%rdx
pxor L$k_s63(%rip),%xmm0
call _vpaes_schedule_transform
movdqu %xmm0,(%rdx)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
.byte 0xf3,0xc3
.p2align 4
_vpaes_schedule_192_smear:
pshufd $128,%xmm6,%xmm1
pshufd $254,%xmm7,%xmm0
pxor %xmm1,%xmm6
pxor %xmm1,%xmm1
pxor %xmm0,%xmm6
movdqa %xmm6,%xmm0
movhlps %xmm1,%xmm6
.byte 0xf3,0xc3
.p2align 4
_vpaes_schedule_round:
pxor %xmm1,%xmm1
.byte 102,65,15,58,15,200,15
.byte 102,69,15,58,15,192,15
pxor %xmm1,%xmm7
pshufd $255,%xmm0,%xmm0
.byte 102,15,58,15,192,1
_vpaes_schedule_low_round:
movdqa %xmm7,%xmm1
pslldq $4,%xmm7
pxor %xmm1,%xmm7
movdqa %xmm7,%xmm1
pslldq $8,%xmm7
pxor %xmm1,%xmm7
pxor L$k_s63(%rip),%xmm7
movdqa %xmm9,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm9,%xmm0
movdqa %xmm11,%xmm2
.byte 102,15,56,0,208
pxor %xmm1,%xmm0
movdqa %xmm10,%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
movdqa %xmm10,%xmm4
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm10,%xmm2
.byte 102,15,56,0,211
pxor %xmm0,%xmm2
movdqa %xmm10,%xmm3
.byte 102,15,56,0,220
pxor %xmm1,%xmm3
movdqa %xmm13,%xmm4
.byte 102,15,56,0,226
movdqa %xmm12,%xmm0
.byte 102,15,56,0,195
pxor %xmm4,%xmm0
pxor %xmm7,%xmm0
movdqa %xmm0,%xmm7
.byte 0xf3,0xc3
.p2align 4
_vpaes_schedule_transform:
movdqa %xmm9,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm9,%xmm0
movdqa (%r11),%xmm2
.byte 102,15,56,0,208
movdqa 16(%r11),%xmm0
.byte 102,15,56,0,193
pxor %xmm2,%xmm0
.byte 0xf3,0xc3
.p2align 4
_vpaes_schedule_mangle:
movdqa %xmm0,%xmm4
movdqa L$k_mc_forward(%rip),%xmm5
testq %rcx,%rcx
jnz L$schedule_mangle_dec
addq $16,%rdx
pxor L$k_s63(%rip),%xmm4
.byte 102,15,56,0,229
movdqa %xmm4,%xmm3
.byte 102,15,56,0,229
pxor %xmm4,%xmm3
.byte 102,15,56,0,229
pxor %xmm4,%xmm3
jmp L$schedule_mangle_both
.p2align 4
L$schedule_mangle_dec:
leaq L$k_dksd(%rip),%r11
movdqa %xmm9,%xmm1
pandn %xmm4,%xmm1
psrld $4,%xmm1
pand %xmm9,%xmm4
movdqa 0(%r11),%xmm2
.byte 102,15,56,0,212
movdqa 16(%r11),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 32(%r11),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 48(%r11),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 64(%r11),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 80(%r11),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
.byte 102,15,56,0,221
movdqa 96(%r11),%xmm2
.byte 102,15,56,0,212
pxor %xmm3,%xmm2
movdqa 112(%r11),%xmm3
.byte 102,15,56,0,217
pxor %xmm2,%xmm3
addq $-16,%rdx
L$schedule_mangle_both:
movdqa (%r8,%r10,1),%xmm1
.byte 102,15,56,0,217
addq $-16,%r8
andq $48,%r8
movdqu %xmm3,(%rdx)
.byte 0xf3,0xc3
.globl _vpaes_set_encrypt_key
.private_extern _vpaes_set_encrypt_key
.p2align 4
_vpaes_set_encrypt_key:
movl %esi,%eax
shrl $5,%eax
addl $5,%eax
movl %eax,240(%rdx)
movl $0,%ecx
movl $48,%r8d
call _vpaes_schedule_core
xorl %eax,%eax
.byte 0xf3,0xc3
.globl _vpaes_set_decrypt_key
.private_extern _vpaes_set_decrypt_key
.p2align 4
_vpaes_set_decrypt_key:
movl %esi,%eax
shrl $5,%eax
addl $5,%eax
movl %eax,240(%rdx)
shll $4,%eax
leaq 16(%rdx,%rax,1),%rdx
movl $1,%ecx
movl %esi,%r8d
shrl $1,%r8d
andl $32,%r8d
xorl $32,%r8d
call _vpaes_schedule_core
xorl %eax,%eax
.byte 0xf3,0xc3
.globl _vpaes_encrypt
.private_extern _vpaes_encrypt
.p2align 4
_vpaes_encrypt:
movdqu (%rdi),%xmm0
call _vpaes_preheat
call _vpaes_encrypt_core
movdqu %xmm0,(%rsi)
.byte 0xf3,0xc3
.globl _vpaes_decrypt
.private_extern _vpaes_decrypt
.p2align 4
_vpaes_decrypt:
movdqu (%rdi),%xmm0
call _vpaes_preheat
call _vpaes_decrypt_core
movdqu %xmm0,(%rsi)
.byte 0xf3,0xc3
.globl _vpaes_cbc_encrypt
.private_extern _vpaes_cbc_encrypt
.p2align 4
_vpaes_cbc_encrypt:
xchgq %rcx,%rdx
subq $16,%rcx
jc L$cbc_abort
movdqu (%r8),%xmm6
subq %rdi,%rsi
call _vpaes_preheat
cmpl $0,%r9d
je L$cbc_dec_loop
jmp L$cbc_enc_loop
.p2align 4
L$cbc_enc_loop:
movdqu (%rdi),%xmm0
pxor %xmm6,%xmm0
call _vpaes_encrypt_core
movdqa %xmm0,%xmm6
movdqu %xmm0,(%rsi,%rdi,1)
leaq 16(%rdi),%rdi
subq $16,%rcx
jnc L$cbc_enc_loop
jmp L$cbc_done
.p2align 4
L$cbc_dec_loop:
movdqu (%rdi),%xmm0
movdqa %xmm0,%xmm7
call _vpaes_decrypt_core
pxor %xmm6,%xmm0
movdqa %xmm7,%xmm6
movdqu %xmm0,(%rsi,%rdi,1)
leaq 16(%rdi),%rdi
subq $16,%rcx
jnc L$cbc_dec_loop
L$cbc_done:
movdqu %xmm6,(%r8)
L$cbc_abort:
.byte 0xf3,0xc3
.p2align 4
_vpaes_preheat:
leaq L$k_s0F(%rip),%r10
movdqa -32(%r10),%xmm10
movdqa -16(%r10),%xmm11
movdqa 0(%r10),%xmm9
movdqa 48(%r10),%xmm13
movdqa 64(%r10),%xmm12
movdqa 80(%r10),%xmm15
movdqa 96(%r10),%xmm14
.byte 0xf3,0xc3
.p2align 6
_vpaes_consts:
L$k_inv:
.quad 0x0E05060F0D080180, 0x040703090A0B0C02
.quad 0x01040A060F0B0780, 0x030D0E0C02050809
L$k_s0F:
.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
L$k_ipt:
.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
L$k_sb1:
.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
L$k_sb2:
.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
L$k_sbo:
.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
L$k_mc_forward:
.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
.quad 0x080B0A0904070605, 0x000302010C0F0E0D
.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
.quad 0x000302010C0F0E0D, 0x080B0A0904070605
L$k_mc_backward:
.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
.quad 0x020100030E0D0C0F, 0x0A09080B06050407
.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
.quad 0x0A09080B06050407, 0x020100030E0D0C0F
L$k_sr:
.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
.quad 0x030E09040F0A0500, 0x0B06010C07020D08
.quad 0x0F060D040B020900, 0x070E050C030A0108
.quad 0x0B0E0104070A0D00, 0x0306090C0F020508
L$k_rcon:
.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
L$k_s63:
.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
L$k_opt:
.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
L$k_deskew:
.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
L$k_dksd:
.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
L$k_dksb:
.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
L$k_dkse:
.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
L$k_dks9:
.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
L$k_dipt:
.quad 0x0F505B040B545F00, 0x154A411E114E451A
.quad 0x86E383E660056500, 0x12771772F491F194
L$k_dsb9:
.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
L$k_dsbd:
.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
L$k_dsbb:
.quad 0xD022649296B44200, 0x602646F6B0F2D404
.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
L$k_dsbe:
.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
L$k_dsbo:
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
.p2align 6
#endif

View File

@ -0,0 +1,34 @@
#if defined(__x86_64__)
.text
.globl _rsaz_avx2_eligible
.private_extern _rsaz_avx2_eligible
_rsaz_avx2_eligible:
xorl %eax,%eax
.byte 0xf3,0xc3
.globl _rsaz_1024_sqr_avx2
.private_extern _rsaz_1024_sqr_avx2
.globl _rsaz_1024_mul_avx2
.private_extern _rsaz_1024_mul_avx2
.globl _rsaz_1024_norm2red_avx2
.private_extern _rsaz_1024_norm2red_avx2
.globl _rsaz_1024_red2norm_avx2
.private_extern _rsaz_1024_red2norm_avx2
.globl _rsaz_1024_scatter5_avx2
.private_extern _rsaz_1024_scatter5_avx2
.globl _rsaz_1024_gather5_avx2
.private_extern _rsaz_1024_gather5_avx2
_rsaz_1024_sqr_avx2:
_rsaz_1024_mul_avx2:
_rsaz_1024_norm2red_avx2:
_rsaz_1024_red2norm_avx2:
_rsaz_1024_scatter5_avx2:
_rsaz_1024_gather5_avx2:
.byte 0x0f,0x0b
.byte 0xf3,0xc3
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,726 @@
#if defined(__x86_64__)
.text
.globl _bn_mul_mont
.private_extern _bn_mul_mont
.p2align 4
_bn_mul_mont:
testl $3,%r9d
jnz L$mul_enter
cmpl $8,%r9d
jb L$mul_enter
cmpq %rsi,%rdx
jne L$mul4x_enter
testl $7,%r9d
jz L$sqr8x_enter
jmp L$mul4x_enter
.p2align 4
L$mul_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
movl %r9d,%r9d
leaq 2(%r9),%r10
movq %rsp,%r11
negq %r10
leaq (%rsp,%r10,8),%rsp
andq $-1024,%rsp
movq %r11,8(%rsp,%r9,8)
L$mul_body:
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
movq (%rsi),%rax
xorq %r14,%r14
xorq %r15,%r15
movq %r8,%rbp
mulq %rbx
movq %rax,%r10
movq (%rcx),%rax
imulq %r10,%rbp
movq %rdx,%r11
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq %rdx,%r13
leaq 1(%r15),%r15
jmp L$1st_enter
.p2align 4
L$1st:
addq %rax,%r13
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%r13
movq %r10,%r11
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
L$1st_enter:
mulq %rbx
addq %rax,%r11
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
leaq 1(%r15),%r15
movq %rdx,%r10
mulq %rbp
cmpq %r9,%r15
jne L$1st
addq %rax,%r13
movq (%rsi),%rax
adcq $0,%rdx
addq %r11,%r13
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
movq %r10,%r11
xorq %rdx,%rdx
addq %r11,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r9,8)
movq %rdx,(%rsp,%r9,8)
leaq 1(%r14),%r14
jmp L$outer
.p2align 4
L$outer:
movq (%r12,%r14,8),%rbx
xorq %r15,%r15
movq %r8,%rbp
movq (%rsp),%r10
mulq %rbx
addq %rax,%r10
movq (%rcx),%rax
adcq $0,%rdx
imulq %r10,%rbp
movq %rdx,%r11
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq 8(%rsp),%r10
movq %rdx,%r13
leaq 1(%r15),%r15
jmp L$inner_enter
.p2align 4
L$inner:
addq %rax,%r13
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
movq (%rsp,%r15,8),%r10
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
L$inner_enter:
mulq %rbx
addq %rax,%r11
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
addq %r11,%r10
movq %rdx,%r11
adcq $0,%r11
leaq 1(%r15),%r15
mulq %rbp
cmpq %r9,%r15
jne L$inner
addq %rax,%r13
movq (%rsi),%rax
adcq $0,%rdx
addq %r10,%r13
movq (%rsp,%r15,8),%r10
adcq $0,%rdx
movq %r13,-16(%rsp,%r15,8)
movq %rdx,%r13
xorq %rdx,%rdx
addq %r11,%r13
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r9,8)
movq %rdx,(%rsp,%r9,8)
leaq 1(%r14),%r14
cmpq %r9,%r14
jb L$outer
xorq %r14,%r14
movq (%rsp),%rax
leaq (%rsp),%rsi
movq %r9,%r15
jmp L$sub
.p2align 4
L$sub: sbbq (%rcx,%r14,8),%rax
movq %rax,(%rdi,%r14,8)
movq 8(%rsi,%r14,8),%rax
leaq 1(%r14),%r14
decq %r15
jnz L$sub
sbbq $0,%rax
xorq %r14,%r14
movq %r9,%r15
.p2align 4
L$copy:
movq (%rsp,%r14,8),%rsi
movq (%rdi,%r14,8),%rcx
xorq %rcx,%rsi
andq %rax,%rsi
xorq %rcx,%rsi
movq %r14,(%rsp,%r14,8)
movq %rsi,(%rdi,%r14,8)
leaq 1(%r14),%r14
subq $1,%r15
jnz L$copy
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
L$mul_epilogue:
.byte 0xf3,0xc3
.p2align 4
bn_mul4x_mont:
L$mul4x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
movl %r9d,%r9d
leaq 4(%r9),%r10
movq %rsp,%r11
negq %r10
leaq (%rsp,%r10,8),%rsp
andq $-1024,%rsp
movq %r11,8(%rsp,%r9,8)
L$mul4x_body:
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
movq (%rsi),%rax
xorq %r14,%r14
xorq %r15,%r15
movq %r8,%rbp
mulq %rbx
movq %rax,%r10
movq (%rcx),%rax
imulq %r10,%rbp
movq %rdx,%r11
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx),%rax
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq 16(%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
leaq 4(%r15),%r15
adcq $0,%rdx
movq %rdi,(%rsp)
movq %rdx,%r13
jmp L$1st4x
.p2align 4
L$1st4x:
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-16(%rsp,%r15,8)
movq %rdx,%r13
mulq %rbx
addq %rax,%r10
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq 8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx,%r15,8),%rax
adcq $0,%rdx
leaq 4(%r15),%r15
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq -16(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-32(%rsp,%r15,8)
movq %rdx,%r13
cmpq %r9,%r15
jb L$1st4x
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-16(%rsp,%r15,8)
movq %rdx,%r13
xorq %rdi,%rdi
addq %r10,%r13
adcq $0,%rdi
movq %r13,-8(%rsp,%r15,8)
movq %rdi,(%rsp,%r15,8)
leaq 1(%r14),%r14
.p2align 2
L$outer4x:
movq (%r12,%r14,8),%rbx
xorq %r15,%r15
movq (%rsp),%r10
movq %r8,%rbp
mulq %rbx
addq %rax,%r10
movq (%rcx),%rax
adcq $0,%rdx
imulq %r10,%rbp
movq %rdx,%r11
mulq %rbp
addq %rax,%r10
movq 8(%rsi),%rax
adcq $0,%rdx
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx),%rax
adcq $0,%rdx
addq 8(%rsp),%r11
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq 16(%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
leaq 4(%r15),%r15
adcq $0,%rdx
movq %rdi,(%rsp)
movq %rdx,%r13
jmp L$inner4x
.p2align 4
L$inner4x:
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -16(%rsp,%r15,8),%r10
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -8(%rsp,%r15,8),%r11
adcq $0,%rdx
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-16(%rsp,%r15,8)
movq %rdx,%r13
mulq %rbx
addq %rax,%r10
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
addq (%rsp,%r15,8),%r10
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq 8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq 8(%rcx,%r15,8),%rax
adcq $0,%rdx
addq 8(%rsp,%r15,8),%r11
adcq $0,%rdx
leaq 4(%r15),%r15
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq -16(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-32(%rsp,%r15,8)
movq %rdx,%r13
cmpq %r9,%r15
jb L$inner4x
mulq %rbx
addq %rax,%r10
movq -16(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -16(%rsp,%r15,8),%r10
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %rax,%r13
movq -8(%rsi,%r15,8),%rax
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-24(%rsp,%r15,8)
movq %rdx,%rdi
mulq %rbx
addq %rax,%r11
movq -8(%rcx,%r15,8),%rax
adcq $0,%rdx
addq -8(%rsp,%r15,8),%r11
adcq $0,%rdx
leaq 1(%r14),%r14
movq %rdx,%r10
mulq %rbp
addq %rax,%rdi
movq (%rsi),%rax
adcq $0,%rdx
addq %r11,%rdi
adcq $0,%rdx
movq %rdi,-16(%rsp,%r15,8)
movq %rdx,%r13
xorq %rdi,%rdi
addq %r10,%r13
adcq $0,%rdi
addq (%rsp,%r9,8),%r13
adcq $0,%rdi
movq %r13,-8(%rsp,%r15,8)
movq %rdi,(%rsp,%r15,8)
cmpq %r9,%r14
jb L$outer4x
movq 16(%rsp,%r9,8),%rdi
movq 0(%rsp),%rax
movq 8(%rsp),%rdx
shrq $2,%r9
leaq (%rsp),%rsi
xorq %r14,%r14
subq 0(%rcx),%rax
movq 16(%rsi),%rbx
movq 24(%rsi),%rbp
sbbq 8(%rcx),%rdx
leaq -1(%r9),%r15
jmp L$sub4x
.p2align 4
L$sub4x:
movq %rax,0(%rdi,%r14,8)
movq %rdx,8(%rdi,%r14,8)
sbbq 16(%rcx,%r14,8),%rbx
movq 32(%rsi,%r14,8),%rax
movq 40(%rsi,%r14,8),%rdx
sbbq 24(%rcx,%r14,8),%rbp
movq %rbx,16(%rdi,%r14,8)
movq %rbp,24(%rdi,%r14,8)
sbbq 32(%rcx,%r14,8),%rax
movq 48(%rsi,%r14,8),%rbx
movq 56(%rsi,%r14,8),%rbp
sbbq 40(%rcx,%r14,8),%rdx
leaq 4(%r14),%r14
decq %r15
jnz L$sub4x
movq %rax,0(%rdi,%r14,8)
movq 32(%rsi,%r14,8),%rax
sbbq 16(%rcx,%r14,8),%rbx
movq %rdx,8(%rdi,%r14,8)
sbbq 24(%rcx,%r14,8),%rbp
movq %rbx,16(%rdi,%r14,8)
sbbq $0,%rax
movq %rax,%xmm0
punpcklqdq %xmm0,%xmm0
movq %rbp,24(%rdi,%r14,8)
xorq %r14,%r14
movq %r9,%r15
pxor %xmm5,%xmm5
jmp L$copy4x
.p2align 4
L$copy4x:
movdqu (%rsp,%r14,1),%xmm2
movdqu 16(%rsp,%r14,1),%xmm4
movdqu (%rdi,%r14,1),%xmm1
movdqu 16(%rdi,%r14,1),%xmm3
pxor %xmm1,%xmm2
pxor %xmm3,%xmm4
pand %xmm0,%xmm2
pand %xmm0,%xmm4
pxor %xmm1,%xmm2
pxor %xmm3,%xmm4
movdqu %xmm2,(%rdi,%r14,1)
movdqu %xmm4,16(%rdi,%r14,1)
movdqa %xmm5,(%rsp,%r14,1)
movdqa %xmm5,16(%rsp,%r14,1)
leaq 32(%r14),%r14
decq %r15
jnz L$copy4x
shlq $2,%r9
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
L$mul4x_epilogue:
.byte 0xf3,0xc3
.p2align 5
bn_sqr8x_mont:
L$sqr8x_enter:
movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
movl %r9d,%r10d
shll $3,%r9d
shlq $3+2,%r10
negq %r9
leaq -64(%rsp,%r9,4),%r11
movq (%r8),%r8
subq %rsi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$sqr8x_sp_alt
subq %r11,%rsp
leaq -64(%rsp,%r9,4),%rsp
jmp L$sqr8x_sp_done
.p2align 5
L$sqr8x_sp_alt:
leaq 4096-64(,%r9,4),%r10
leaq -64(%rsp,%r9,4),%rsp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
subq %r11,%rsp
L$sqr8x_sp_done:
andq $-64,%rsp
movq %r9,%r10
negq %r9
leaq 64(%rsp,%r9,2),%r11
movq %r8,32(%rsp)
movq %rax,40(%rsp)
L$sqr8x_body:
movq %r9,%rbp
.byte 102,73,15,110,211
shrq $3+2,%rbp
movl _OPENSSL_ia32cap_P+8(%rip),%eax
jmp L$sqr8x_copy_n
.p2align 5
L$sqr8x_copy_n:
movq 0(%rcx),%xmm0
movq 8(%rcx),%xmm1
movq 16(%rcx),%xmm3
movq 24(%rcx),%xmm4
leaq 32(%rcx),%rcx
movdqa %xmm0,0(%r11)
movdqa %xmm1,16(%r11)
movdqa %xmm3,32(%r11)
movdqa %xmm4,48(%r11)
leaq 64(%r11),%r11
decq %rbp
jnz L$sqr8x_copy_n
pxor %xmm0,%xmm0
.byte 102,72,15,110,207
.byte 102,73,15,110,218
call _bn_sqr8x_internal
pxor %xmm0,%xmm0
leaq 48(%rsp),%rax
leaq 64(%rsp,%r9,2),%rdx
shrq $3+2,%r9
movq 40(%rsp),%rsi
jmp L$sqr8x_zero
.p2align 5
L$sqr8x_zero:
movdqa %xmm0,0(%rax)
movdqa %xmm0,16(%rax)
movdqa %xmm0,32(%rax)
movdqa %xmm0,48(%rax)
leaq 64(%rax),%rax
movdqa %xmm0,0(%rdx)
movdqa %xmm0,16(%rdx)
movdqa %xmm0,32(%rdx)
movdqa %xmm0,48(%rdx)
leaq 64(%rdx),%rdx
decq %r9
jnz L$sqr8x_zero
movq $1,%rax
movq -48(%rsi),%r15
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
L$sqr8x_epilogue:
.byte 0xf3,0xc3
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 4
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,671 @@
#if defined(__x86_64__)
.text
.p2align 4
.globl _md5_block_asm_data_order
.private_extern _md5_block_asm_data_order
_md5_block_asm_data_order:
pushq %rbp
pushq %rbx
pushq %r12
pushq %r14
pushq %r15
L$prologue:
movq %rdi,%rbp
shlq $6,%rdx
leaq (%rsi,%rdx,1),%rdi
movl 0(%rbp),%eax
movl 4(%rbp),%ebx
movl 8(%rbp),%ecx
movl 12(%rbp),%edx
cmpq %rdi,%rsi
je L$end
L$loop:
movl %eax,%r8d
movl %ebx,%r9d
movl %ecx,%r14d
movl %edx,%r15d
movl 0(%rsi),%r10d
movl %edx,%r11d
xorl %ecx,%r11d
leal -680876936(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 4(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal -389564586(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 8(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal 606105819(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 12(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal -1044525330(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 16(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
xorl %ecx,%r11d
leal -176418897(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 20(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal 1200080426(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 24(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal -1473231341(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 28(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal -45705983(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 32(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
xorl %ecx,%r11d
leal 1770035416(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 36(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal -1958414417(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 40(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal -42063(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 44(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal -1990404162(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 48(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
xorl %ecx,%r11d
leal 1804603682(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 52(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal -40341101(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 56(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal -1502002290(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 60(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal 1236535329(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 0(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
movl 4(%rsi),%r10d
movl %edx,%r11d
movl %edx,%r12d
notl %r11d
leal -165796510(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 24(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal -1069501632(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 44(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal 643717713(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 0(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal -373897302(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 20(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
leal -701558691(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 40(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal 38016083(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 60(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal -660478335(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 16(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal -405537848(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 36(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
leal 568446438(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 56(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal -1019803690(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 12(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal -187363961(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 32(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal 1163531501(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 52(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
leal -1444681467(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 8(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal -51403784(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 28(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal 1735328473(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 48(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal -1926607734(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 0(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
movl 20(%rsi),%r10d
movl %ecx,%r11d
leal -378558(%rax,%r10,1),%eax
movl 32(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal -2022574463(%rdx,%r10,1),%edx
movl 44(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal 1839030562(%rcx,%r10,1),%ecx
movl 56(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal -35309556(%rbx,%r10,1),%ebx
movl 4(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal -1530992060(%rax,%r10,1),%eax
movl 16(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal 1272893353(%rdx,%r10,1),%edx
movl 28(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal -155497632(%rcx,%r10,1),%ecx
movl 40(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal -1094730640(%rbx,%r10,1),%ebx
movl 52(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal 681279174(%rax,%r10,1),%eax
movl 0(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal -358537222(%rdx,%r10,1),%edx
movl 12(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal -722521979(%rcx,%r10,1),%ecx
movl 24(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal 76029189(%rbx,%r10,1),%ebx
movl 36(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal -640364487(%rax,%r10,1),%eax
movl 48(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal -421815835(%rdx,%r10,1),%edx
movl 60(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal 530742520(%rcx,%r10,1),%ecx
movl 8(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal -995338651(%rbx,%r10,1),%ebx
movl 0(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
movl 0(%rsi),%r10d
movl $4294967295,%r11d
xorl %edx,%r11d
leal -198630844(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 28(%rsi),%r10d
movl $4294967295,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal 1126891415(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 56(%rsi),%r10d
movl $4294967295,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1416354905(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 20(%rsi),%r10d
movl $4294967295,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -57434055(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 48(%rsi),%r10d
movl $4294967295,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal 1700485571(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 12(%rsi),%r10d
movl $4294967295,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -1894986606(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 40(%rsi),%r10d
movl $4294967295,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1051523(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 4(%rsi),%r10d
movl $4294967295,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -2054922799(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 32(%rsi),%r10d
movl $4294967295,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal 1873313359(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 60(%rsi),%r10d
movl $4294967295,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -30611744(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 24(%rsi),%r10d
movl $4294967295,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1560198380(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 52(%rsi),%r10d
movl $4294967295,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal 1309151649(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 16(%rsi),%r10d
movl $4294967295,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal -145523070(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 44(%rsi),%r10d
movl $4294967295,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -1120210379(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 8(%rsi),%r10d
movl $4294967295,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal 718787259(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 36(%rsi),%r10d
movl $4294967295,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -343485551(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 0(%rsi),%r10d
movl $4294967295,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
addl %r8d,%eax
addl %r9d,%ebx
addl %r14d,%ecx
addl %r15d,%edx
addq $64,%rsi
cmpq %rdi,%rsi
jb L$loop
L$end:
movl %eax,0(%rbp)
movl %ebx,4(%rbp)
movl %ecx,8(%rbp)
movl %edx,12(%rbp)
movq (%rsp),%r15
movq 8(%rsp),%r14
movq 16(%rsp),%r12
movq 24(%rsp),%rbx
movq 32(%rsp),%rbp
addq $40,%rsp
L$epilogue:
.byte 0xf3,0xc3
#endif

View File

@ -0,0 +1,19 @@
#if defined(__x86_64__)
.text
.globl _aesni_gcm_encrypt
.private_extern _aesni_gcm_encrypt
_aesni_gcm_encrypt:
xorl %eax,%eax
.byte 0xf3,0xc3
.globl _aesni_gcm_decrypt
.private_extern _aesni_gcm_decrypt
_aesni_gcm_decrypt:
xorl %eax,%eax
.byte 0xf3,0xc3
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,48 @@
#if defined(__x86_64__)
.text
.globl _CRYPTO_rdrand
.private_extern _CRYPTO_rdrand
.p2align 4
_CRYPTO_rdrand:
xorq %rax,%rax
.byte 0x48, 0x0f, 0xc7, 0xf1
adcq %rax,%rax
movq %rcx,0(%rdi)
.byte 0xf3,0xc3
.globl _CRYPTO_rdrand_multiple8_buf
.private_extern _CRYPTO_rdrand_multiple8_buf
.p2align 4
_CRYPTO_rdrand_multiple8_buf:
testq %rsi,%rsi
jz L$out
movq $8,%rdx
L$loop:
.byte 0x48, 0x0f, 0xc7, 0xf1
jnc L$err
movq %rcx,0(%rdi)
addq %rdx,%rdi
subq %rdx,%rsi
jnz L$loop
L$out:
movq $1,%rax
.byte 0xf3,0xc3
L$err:
xorq %rax,%rax
.byte 0xf3,0xc3
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,595 @@
#if defined(__x86_64__)
.text
.globl _asm_RC4
.private_extern _asm_RC4
.p2align 4
_asm_RC4:
orq %rsi,%rsi
jne L$entry
.byte 0xf3,0xc3
L$entry:
pushq %rbx
pushq %r12
pushq %r13
L$prologue:
movq %rsi,%r11
movq %rdx,%r12
movq %rcx,%r13
xorq %r10,%r10
xorq %rcx,%rcx
leaq 8(%rdi),%rdi
movb -8(%rdi),%r10b
movb -4(%rdi),%cl
cmpl $-1,256(%rdi)
je L$RC4_CHAR
movl _OPENSSL_ia32cap_P(%rip),%r8d
xorq %rbx,%rbx
incb %r10b
subq %r10,%rbx
subq %r12,%r13
movl (%rdi,%r10,4),%eax
testq $-16,%r11
jz L$loop1
btl $30,%r8d
jc L$intel
andq $7,%rbx
leaq 1(%r10),%rsi
jz L$oop8
subq %rbx,%r11
L$oop8_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r12,%r13,1)
leaq 1(%r12),%r12
decq %rbx
jnz L$oop8_warmup
leaq 1(%r10),%rsi
jmp L$oop8
.p2align 4
L$oop8:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 0(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,0(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,4(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 8(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,8(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 12(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,12(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 16(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,16(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 20(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,20(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 24(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,24(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%sil
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl -4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,28(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%r10b
rorq $8,%r8
subq $8,%r11
xorq (%r12),%r8
movq %r8,(%r12,%r13,1)
leaq 8(%r12),%r12
testq $-8,%r11
jnz L$oop8
cmpq $0,%r11
jne L$loop1
jmp L$exit
.p2align 4
L$intel:
testq $-32,%r11
jz L$loop1
andq $15,%rbx
jz L$oop16_is_hot
subq %rbx,%r11
L$oop16_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r12,%r13,1)
leaq 1(%r12),%r12
decq %rbx
jnz L$oop16_warmup
movq %rcx,%rbx
xorq %rcx,%rcx
movb %bl,%cl
L$oop16_is_hot:
leaq (%rdi,%r10,4),%rsi
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
jmp L$oop16_enter
.p2align 4
L$oop16:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm2
psllq $8,%xmm1
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
pxor %xmm1,%xmm2
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
movdqu %xmm2,(%r12,%r13,1)
leaq 16(%r12),%r12
L$oop16_enter:
movl (%rdi,%rcx,4),%edx
pxor %xmm1,%xmm1
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 8(%rsi),%eax
movzbl %bl,%ebx
movl %edx,4(%rsi)
addb %al,%cl
pinsrw $0,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 12(%rsi),%ebx
movzbl %al,%eax
movl %edx,8(%rsi)
addb %bl,%cl
pinsrw $1,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 16(%rsi),%eax
movzbl %bl,%ebx
movl %edx,12(%rsi)
addb %al,%cl
pinsrw $1,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 20(%rsi),%ebx
movzbl %al,%eax
movl %edx,16(%rsi)
addb %bl,%cl
pinsrw $2,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 24(%rsi),%eax
movzbl %bl,%ebx
movl %edx,20(%rsi)
addb %al,%cl
pinsrw $2,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 28(%rsi),%ebx
movzbl %al,%eax
movl %edx,24(%rsi)
addb %bl,%cl
pinsrw $3,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 32(%rsi),%eax
movzbl %bl,%ebx
movl %edx,28(%rsi)
addb %al,%cl
pinsrw $3,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 36(%rsi),%ebx
movzbl %al,%eax
movl %edx,32(%rsi)
addb %bl,%cl
pinsrw $4,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 40(%rsi),%eax
movzbl %bl,%ebx
movl %edx,36(%rsi)
addb %al,%cl
pinsrw $4,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 44(%rsi),%ebx
movzbl %al,%eax
movl %edx,40(%rsi)
addb %bl,%cl
pinsrw $5,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 48(%rsi),%eax
movzbl %bl,%ebx
movl %edx,44(%rsi)
addb %al,%cl
pinsrw $5,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 52(%rsi),%ebx
movzbl %al,%eax
movl %edx,48(%rsi)
addb %bl,%cl
pinsrw $6,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 56(%rsi),%eax
movzbl %bl,%ebx
movl %edx,52(%rsi)
addb %al,%cl
pinsrw $6,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 60(%rsi),%ebx
movzbl %al,%eax
movl %edx,56(%rsi)
addb %bl,%cl
pinsrw $7,(%rdi,%rax,4),%xmm0
addb $16,%r10b
movdqu (%r12),%xmm2
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movzbl %bl,%ebx
movl %edx,60(%rsi)
leaq (%rdi,%r10,4),%rsi
pinsrw $7,(%rdi,%rbx,4),%xmm1
movl (%rsi),%eax
movq %rcx,%rbx
xorq %rcx,%rcx
subq $16,%r11
movb %bl,%cl
testq $-16,%r11
jnz L$oop16
psllq $8,%xmm1
pxor %xmm0,%xmm2
pxor %xmm1,%xmm2
movdqu %xmm2,(%r12,%r13,1)
leaq 16(%r12),%r12
cmpq $0,%r11
jne L$loop1
jmp L$exit
.p2align 4
L$loop1:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r12,%r13,1)
leaq 1(%r12),%r12
decq %r11
jnz L$loop1
jmp L$exit
.p2align 4
L$RC4_CHAR:
addb $1,%r10b
movzbl (%rdi,%r10,1),%eax
testq $-8,%r11
jz L$cloop1
jmp L$cloop8
.p2align 4
L$cloop8:
movl (%r12),%r8d
movl 4(%r12),%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne L$cmov0
movq %rax,%rbx
L$cmov0:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne L$cmov1
movq %rbx,%rax
L$cmov1:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne L$cmov2
movq %rax,%rbx
L$cmov2:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne L$cmov3
movq %rbx,%rax
L$cmov3:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne L$cmov4
movq %rax,%rbx
L$cmov4:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne L$cmov5
movq %rbx,%rax
L$cmov5:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne L$cmov6
movq %rax,%rbx
L$cmov6:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne L$cmov7
movq %rbx,%rax
L$cmov7:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
leaq -8(%r11),%r11
movl %r8d,(%r13)
leaq 8(%r12),%r12
movl %r9d,4(%r13)
leaq 8(%r13),%r13
testq $-8,%r11
jnz L$cloop8
cmpq $0,%r11
jne L$cloop1
jmp L$exit
.p2align 4
L$cloop1:
addb %al,%cl
movzbl %cl,%ecx
movzbl (%rdi,%rcx,1),%edx
movb %al,(%rdi,%rcx,1)
movb %dl,(%rdi,%r10,1)
addb %al,%dl
addb $1,%r10b
movzbl %dl,%edx
movzbl %r10b,%r10d
movzbl (%rdi,%rdx,1),%edx
movzbl (%rdi,%r10,1),%eax
xorb (%r12),%dl
leaq 1(%r12),%r12
movb %dl,(%r13)
leaq 1(%r13),%r13
subq $1,%r11
jnz L$cloop1
jmp L$exit
.p2align 4
L$exit:
subb $1,%r10b
movl %r10d,-8(%rdi)
movl %ecx,-4(%rdi)
movq (%rsp),%r13
movq 8(%rsp),%r12
movq 16(%rsp),%rbx
addq $24,%rsp
L$epilogue:
.byte 0xf3,0xc3
.globl _asm_RC4_set_key
.private_extern _asm_RC4_set_key
.p2align 4
_asm_RC4_set_key:
leaq 8(%rdi),%rdi
leaq (%rdx,%rsi,1),%rdx
negq %rsi
movq %rsi,%rcx
xorl %eax,%eax
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
movl _OPENSSL_ia32cap_P(%rip),%r8d
btl $20,%r8d
jc L$c1stloop
jmp L$w1stloop
.p2align 4
L$w1stloop:
movl %eax,(%rdi,%rax,4)
addb $1,%al
jnc L$w1stloop
xorq %r9,%r9
xorq %r8,%r8
.p2align 4
L$w2ndloop:
movl (%rdi,%r9,4),%r10d
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movl (%rdi,%r8,4),%r11d
cmovzq %rcx,%rsi
movl %r10d,(%rdi,%r8,4)
movl %r11d,(%rdi,%r9,4)
addb $1,%r9b
jnc L$w2ndloop
jmp L$exit_key
.p2align 4
L$c1stloop:
movb %al,(%rdi,%rax,1)
addb $1,%al
jnc L$c1stloop
xorq %r9,%r9
xorq %r8,%r8
.p2align 4
L$c2ndloop:
movb (%rdi,%r9,1),%r10b
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movb (%rdi,%r8,1),%r11b
jnz L$cnowrap
movq %rcx,%rsi
L$cnowrap:
movb %r10b,(%rdi,%r8,1)
movb %r11b,(%rdi,%r9,1)
addb $1,%r9b
jnc L$c2ndloop
movl $-1,256(%rdi)
.p2align 4
L$exit_key:
xorl %eax,%eax
movl %eax,-8(%rdi)
movl %eax,-4(%rdi)
.byte 0xf3,0xc3
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,649 @@
%ifidn __OUTPUT_FORMAT__,obj
section code use32 class=code align=64
%elifidn __OUTPUT_FORMAT__,win32
%ifdef __YASM_VERSION_ID__
%if __YASM_VERSION_ID__ < 01010000h
%error yasm version 1.1.0 or later needed.
%endif
; Yasm automatically includes .00 and complains about redefining it.
; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
%else
$@feat.00 equ 1
%endif
section .text code align=64
%else
section .text code
%endif
align 64
L$_vpaes_consts:
dd 218628480,235210255,168496130,67568393
dd 252381056,17041926,33884169,51187212
dd 252645135,252645135,252645135,252645135
dd 1512730624,3266504856,1377990664,3401244816
dd 830229760,1275146365,2969422977,3447763452
dd 3411033600,2979783055,338359620,2782886510
dd 4209124096,907596821,221174255,1006095553
dd 191964160,3799684038,3164090317,1589111125
dd 182528256,1777043520,2877432650,3265356744
dd 1874708224,3503451415,3305285752,363511674
dd 1606117888,3487855781,1093350906,2384367825
dd 197121,67569157,134941193,202313229
dd 67569157,134941193,202313229,197121
dd 134941193,202313229,197121,67569157
dd 202313229,197121,67569157,134941193
dd 33619971,100992007,168364043,235736079
dd 235736079,33619971,100992007,168364043
dd 168364043,235736079,33619971,100992007
dd 100992007,168364043,235736079,33619971
dd 50462976,117835012,185207048,252579084
dd 252314880,51251460,117574920,184942860
dd 184682752,252054788,50987272,118359308
dd 118099200,185467140,251790600,50727180
dd 2946363062,528716217,1300004225,1881839624
dd 1532713819,1532713819,1532713819,1532713819
dd 3602276352,4288629033,3737020424,4153884961
dd 1354558464,32357713,2958822624,3775749553
dd 1201988352,132424512,1572796698,503232858
dd 2213177600,1597421020,4103937655,675398315
dd 2749646592,4273543773,1511898873,121693092
dd 3040248576,1103263732,2871565598,1608280554
dd 2236667136,2588920351,482954393,64377734
dd 3069987328,291237287,2117370568,3650299247
dd 533321216,3573750986,2572112006,1401264716
dd 1339849704,2721158661,548607111,3445553514
dd 2128193280,3054596040,2183486460,1257083700
dd 655635200,1165381986,3923443150,2344132524
dd 190078720,256924420,290342170,357187870
dd 1610966272,2263057382,4103205268,309794674
dd 2592527872,2233205587,1335446729,3402964816
dd 3973531904,3225098121,3002836325,1918774430
dd 3870401024,2102906079,2284471353,4117666579
dd 617007872,1021508343,366931923,691083277
dd 2528395776,3491914898,2968704004,1613121270
dd 3445188352,3247741094,844474987,4093578302
dd 651481088,1190302358,1689581232,574775300
dd 4289380608,206939853,2555985458,2489840491
dd 2130264064,327674451,3566485037,3349835193
dd 2470714624,316102159,3636825756,3393945945
db 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
db 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
db 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
db 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
db 118,101,114,115,105,116,121,41,0
align 64
align 16
__vpaes_preheat:
add ebp,DWORD [esp]
movdqa xmm7,[ebp-48]
movdqa xmm6,[ebp-16]
ret
align 16
__vpaes_encrypt_core:
mov ecx,16
mov eax,DWORD [240+edx]
movdqa xmm1,xmm6
movdqa xmm2,[ebp]
pandn xmm1,xmm0
pand xmm0,xmm6
movdqu xmm5,[edx]
db 102,15,56,0,208
movdqa xmm0,[16+ebp]
pxor xmm2,xmm5
psrld xmm1,4
add edx,16
db 102,15,56,0,193
lea ebx,[192+ebp]
pxor xmm0,xmm2
jmp NEAR L$000enc_entry
align 16
L$001enc_loop:
movdqa xmm4,[32+ebp]
movdqa xmm0,[48+ebp]
db 102,15,56,0,226
db 102,15,56,0,195
pxor xmm4,xmm5
movdqa xmm5,[64+ebp]
pxor xmm0,xmm4
movdqa xmm1,[ecx*1+ebx-64]
db 102,15,56,0,234
movdqa xmm2,[80+ebp]
movdqa xmm4,[ecx*1+ebx]
db 102,15,56,0,211
movdqa xmm3,xmm0
pxor xmm2,xmm5
db 102,15,56,0,193
add edx,16
pxor xmm0,xmm2
db 102,15,56,0,220
add ecx,16
pxor xmm3,xmm0
db 102,15,56,0,193
and ecx,48
sub eax,1
pxor xmm0,xmm3
L$000enc_entry:
movdqa xmm1,xmm6
movdqa xmm5,[ebp-32]
pandn xmm1,xmm0
psrld xmm1,4
pand xmm0,xmm6
db 102,15,56,0,232
movdqa xmm3,xmm7
pxor xmm0,xmm1
db 102,15,56,0,217
movdqa xmm4,xmm7
pxor xmm3,xmm5
db 102,15,56,0,224
movdqa xmm2,xmm7
pxor xmm4,xmm5
db 102,15,56,0,211
movdqa xmm3,xmm7
pxor xmm2,xmm0
db 102,15,56,0,220
movdqu xmm5,[edx]
pxor xmm3,xmm1
jnz NEAR L$001enc_loop
movdqa xmm4,[96+ebp]
movdqa xmm0,[112+ebp]
db 102,15,56,0,226
pxor xmm4,xmm5
db 102,15,56,0,195
movdqa xmm1,[64+ecx*1+ebx]
pxor xmm0,xmm4
db 102,15,56,0,193
ret
align 16
__vpaes_decrypt_core:
lea ebx,[608+ebp]
mov eax,DWORD [240+edx]
movdqa xmm1,xmm6
movdqa xmm2,[ebx-64]
pandn xmm1,xmm0
mov ecx,eax
psrld xmm1,4
movdqu xmm5,[edx]
shl ecx,4
pand xmm0,xmm6
db 102,15,56,0,208
movdqa xmm0,[ebx-48]
xor ecx,48
db 102,15,56,0,193
and ecx,48
pxor xmm2,xmm5
movdqa xmm5,[176+ebp]
pxor xmm0,xmm2
add edx,16
lea ecx,[ecx*1+ebx-352]
jmp NEAR L$002dec_entry
align 16
L$003dec_loop:
movdqa xmm4,[ebx-32]
movdqa xmm1,[ebx-16]
db 102,15,56,0,226
db 102,15,56,0,203
pxor xmm0,xmm4
movdqa xmm4,[ebx]
pxor xmm0,xmm1
movdqa xmm1,[16+ebx]
db 102,15,56,0,226
db 102,15,56,0,197
db 102,15,56,0,203
pxor xmm0,xmm4
movdqa xmm4,[32+ebx]
pxor xmm0,xmm1
movdqa xmm1,[48+ebx]
db 102,15,56,0,226
db 102,15,56,0,197
db 102,15,56,0,203
pxor xmm0,xmm4
movdqa xmm4,[64+ebx]
pxor xmm0,xmm1
movdqa xmm1,[80+ebx]
db 102,15,56,0,226
db 102,15,56,0,197
db 102,15,56,0,203
pxor xmm0,xmm4
add edx,16
db 102,15,58,15,237,12
pxor xmm0,xmm1
sub eax,1
L$002dec_entry:
movdqa xmm1,xmm6
movdqa xmm2,[ebp-32]
pandn xmm1,xmm0
pand xmm0,xmm6
psrld xmm1,4
db 102,15,56,0,208
movdqa xmm3,xmm7
pxor xmm0,xmm1
db 102,15,56,0,217
movdqa xmm4,xmm7
pxor xmm3,xmm2
db 102,15,56,0,224
pxor xmm4,xmm2
movdqa xmm2,xmm7
db 102,15,56,0,211
movdqa xmm3,xmm7
pxor xmm2,xmm0
db 102,15,56,0,220
movdqu xmm0,[edx]
pxor xmm3,xmm1
jnz NEAR L$003dec_loop
movdqa xmm4,[96+ebx]
db 102,15,56,0,226
pxor xmm4,xmm0
movdqa xmm0,[112+ebx]
movdqa xmm2,[ecx]
db 102,15,56,0,195
pxor xmm0,xmm4
db 102,15,56,0,194
ret
align 16
__vpaes_schedule_core:
add ebp,DWORD [esp]
movdqu xmm0,[esi]
movdqa xmm2,[320+ebp]
movdqa xmm3,xmm0
lea ebx,[ebp]
movdqa [4+esp],xmm2
call __vpaes_schedule_transform
movdqa xmm7,xmm0
test edi,edi
jnz NEAR L$004schedule_am_decrypting
movdqu [edx],xmm0
jmp NEAR L$005schedule_go
L$004schedule_am_decrypting:
movdqa xmm1,[256+ecx*1+ebp]
db 102,15,56,0,217
movdqu [edx],xmm3
xor ecx,48
L$005schedule_go:
cmp eax,192
ja NEAR L$006schedule_256
je NEAR L$007schedule_192
L$008schedule_128:
mov eax,10
L$009loop_schedule_128:
call __vpaes_schedule_round
dec eax
jz NEAR L$010schedule_mangle_last
call __vpaes_schedule_mangle
jmp NEAR L$009loop_schedule_128
align 16
L$007schedule_192:
movdqu xmm0,[8+esi]
call __vpaes_schedule_transform
movdqa xmm6,xmm0
pxor xmm4,xmm4
movhlps xmm6,xmm4
mov eax,4
L$011loop_schedule_192:
call __vpaes_schedule_round
db 102,15,58,15,198,8
call __vpaes_schedule_mangle
call __vpaes_schedule_192_smear
call __vpaes_schedule_mangle
call __vpaes_schedule_round
dec eax
jz NEAR L$010schedule_mangle_last
call __vpaes_schedule_mangle
call __vpaes_schedule_192_smear
jmp NEAR L$011loop_schedule_192
align 16
L$006schedule_256:
movdqu xmm0,[16+esi]
call __vpaes_schedule_transform
mov eax,7
L$012loop_schedule_256:
call __vpaes_schedule_mangle
movdqa xmm6,xmm0
call __vpaes_schedule_round
dec eax
jz NEAR L$010schedule_mangle_last
call __vpaes_schedule_mangle
pshufd xmm0,xmm0,255
movdqa [20+esp],xmm7
movdqa xmm7,xmm6
call L$_vpaes_schedule_low_round
movdqa xmm7,[20+esp]
jmp NEAR L$012loop_schedule_256
align 16
L$010schedule_mangle_last:
lea ebx,[384+ebp]
test edi,edi
jnz NEAR L$013schedule_mangle_last_dec
movdqa xmm1,[256+ecx*1+ebp]
db 102,15,56,0,193
lea ebx,[352+ebp]
add edx,32
L$013schedule_mangle_last_dec:
add edx,-16
pxor xmm0,[336+ebp]
call __vpaes_schedule_transform
movdqu [edx],xmm0
pxor xmm0,xmm0
pxor xmm1,xmm1
pxor xmm2,xmm2
pxor xmm3,xmm3
pxor xmm4,xmm4
pxor xmm5,xmm5
pxor xmm6,xmm6
pxor xmm7,xmm7
ret
align 16
__vpaes_schedule_192_smear:
pshufd xmm1,xmm6,128
pshufd xmm0,xmm7,254
pxor xmm6,xmm1
pxor xmm1,xmm1
pxor xmm6,xmm0
movdqa xmm0,xmm6
movhlps xmm6,xmm1
ret
align 16
__vpaes_schedule_round:
movdqa xmm2,[8+esp]
pxor xmm1,xmm1
db 102,15,58,15,202,15
db 102,15,58,15,210,15
pxor xmm7,xmm1
pshufd xmm0,xmm0,255
db 102,15,58,15,192,1
movdqa [8+esp],xmm2
L$_vpaes_schedule_low_round:
movdqa xmm1,xmm7
pslldq xmm7,4
pxor xmm7,xmm1
movdqa xmm1,xmm7
pslldq xmm7,8
pxor xmm7,xmm1
pxor xmm7,[336+ebp]
movdqa xmm4,[ebp-16]
movdqa xmm5,[ebp-48]
movdqa xmm1,xmm4
pandn xmm1,xmm0
psrld xmm1,4
pand xmm0,xmm4
movdqa xmm2,[ebp-32]
db 102,15,56,0,208
pxor xmm0,xmm1
movdqa xmm3,xmm5
db 102,15,56,0,217
pxor xmm3,xmm2
movdqa xmm4,xmm5
db 102,15,56,0,224
pxor xmm4,xmm2
movdqa xmm2,xmm5
db 102,15,56,0,211
pxor xmm2,xmm0
movdqa xmm3,xmm5
db 102,15,56,0,220
pxor xmm3,xmm1
movdqa xmm4,[32+ebp]
db 102,15,56,0,226
movdqa xmm0,[48+ebp]
db 102,15,56,0,195
pxor xmm0,xmm4
pxor xmm0,xmm7
movdqa xmm7,xmm0
ret
align 16
__vpaes_schedule_transform:
movdqa xmm2,[ebp-16]
movdqa xmm1,xmm2
pandn xmm1,xmm0
psrld xmm1,4
pand xmm0,xmm2
movdqa xmm2,[ebx]
db 102,15,56,0,208
movdqa xmm0,[16+ebx]
db 102,15,56,0,193
pxor xmm0,xmm2
ret
align 16
__vpaes_schedule_mangle:
movdqa xmm4,xmm0
movdqa xmm5,[128+ebp]
test edi,edi
jnz NEAR L$014schedule_mangle_dec
add edx,16
pxor xmm4,[336+ebp]
db 102,15,56,0,229
movdqa xmm3,xmm4
db 102,15,56,0,229
pxor xmm3,xmm4
db 102,15,56,0,229
pxor xmm3,xmm4
jmp NEAR L$015schedule_mangle_both
align 16
L$014schedule_mangle_dec:
movdqa xmm2,[ebp-16]
lea esi,[416+ebp]
movdqa xmm1,xmm2
pandn xmm1,xmm4
psrld xmm1,4
pand xmm4,xmm2
movdqa xmm2,[esi]
db 102,15,56,0,212
movdqa xmm3,[16+esi]
db 102,15,56,0,217
pxor xmm3,xmm2
db 102,15,56,0,221
movdqa xmm2,[32+esi]
db 102,15,56,0,212
pxor xmm2,xmm3
movdqa xmm3,[48+esi]
db 102,15,56,0,217
pxor xmm3,xmm2
db 102,15,56,0,221
movdqa xmm2,[64+esi]
db 102,15,56,0,212
pxor xmm2,xmm3
movdqa xmm3,[80+esi]
db 102,15,56,0,217
pxor xmm3,xmm2
db 102,15,56,0,221
movdqa xmm2,[96+esi]
db 102,15,56,0,212
pxor xmm2,xmm3
movdqa xmm3,[112+esi]
db 102,15,56,0,217
pxor xmm3,xmm2
add edx,-16
L$015schedule_mangle_both:
movdqa xmm1,[256+ecx*1+ebp]
db 102,15,56,0,217
add ecx,-16
and ecx,48
movdqu [edx],xmm3
ret
global _vpaes_set_encrypt_key
align 16
_vpaes_set_encrypt_key:
L$_vpaes_set_encrypt_key_begin:
push ebp
push ebx
push esi
push edi
mov esi,DWORD [20+esp]
lea ebx,[esp-56]
mov eax,DWORD [24+esp]
and ebx,-16
mov edx,DWORD [28+esp]
xchg ebx,esp
mov DWORD [48+esp],ebx
mov ebx,eax
shr ebx,5
add ebx,5
mov DWORD [240+edx],ebx
mov ecx,48
mov edi,0
lea ebp,[(L$_vpaes_consts+0x30-L$016pic_point)]
call __vpaes_schedule_core
L$016pic_point:
mov esp,DWORD [48+esp]
xor eax,eax
pop edi
pop esi
pop ebx
pop ebp
ret
global _vpaes_set_decrypt_key
align 16
_vpaes_set_decrypt_key:
L$_vpaes_set_decrypt_key_begin:
push ebp
push ebx
push esi
push edi
mov esi,DWORD [20+esp]
lea ebx,[esp-56]
mov eax,DWORD [24+esp]
and ebx,-16
mov edx,DWORD [28+esp]
xchg ebx,esp
mov DWORD [48+esp],ebx
mov ebx,eax
shr ebx,5
add ebx,5
mov DWORD [240+edx],ebx
shl ebx,4
lea edx,[16+ebx*1+edx]
mov edi,1
mov ecx,eax
shr ecx,1
and ecx,32
xor ecx,32
lea ebp,[(L$_vpaes_consts+0x30-L$017pic_point)]
call __vpaes_schedule_core
L$017pic_point:
mov esp,DWORD [48+esp]
xor eax,eax
pop edi
pop esi
pop ebx
pop ebp
ret
global _vpaes_encrypt
align 16
_vpaes_encrypt:
L$_vpaes_encrypt_begin:
push ebp
push ebx
push esi
push edi
lea ebp,[(L$_vpaes_consts+0x30-L$018pic_point)]
call __vpaes_preheat
L$018pic_point:
mov esi,DWORD [20+esp]
lea ebx,[esp-56]
mov edi,DWORD [24+esp]
and ebx,-16
mov edx,DWORD [28+esp]
xchg ebx,esp
mov DWORD [48+esp],ebx
movdqu xmm0,[esi]
call __vpaes_encrypt_core
movdqu [edi],xmm0
mov esp,DWORD [48+esp]
pop edi
pop esi
pop ebx
pop ebp
ret
global _vpaes_decrypt
align 16
_vpaes_decrypt:
L$_vpaes_decrypt_begin:
push ebp
push ebx
push esi
push edi
lea ebp,[(L$_vpaes_consts+0x30-L$019pic_point)]
call __vpaes_preheat
L$019pic_point:
mov esi,DWORD [20+esp]
lea ebx,[esp-56]
mov edi,DWORD [24+esp]
and ebx,-16
mov edx,DWORD [28+esp]
xchg ebx,esp
mov DWORD [48+esp],ebx
movdqu xmm0,[esi]
call __vpaes_decrypt_core
movdqu [edi],xmm0
mov esp,DWORD [48+esp]
pop edi
pop esi
pop ebx
pop ebp
ret
global _vpaes_cbc_encrypt
align 16
_vpaes_cbc_encrypt:
L$_vpaes_cbc_encrypt_begin:
push ebp
push ebx
push esi
push edi
mov esi,DWORD [20+esp]
mov edi,DWORD [24+esp]
mov eax,DWORD [28+esp]
mov edx,DWORD [32+esp]
sub eax,16
jc NEAR L$020cbc_abort
lea ebx,[esp-56]
mov ebp,DWORD [36+esp]
and ebx,-16
mov ecx,DWORD [40+esp]
xchg ebx,esp
movdqu xmm1,[ebp]
sub edi,esi
mov DWORD [48+esp],ebx
mov DWORD [esp],edi
mov DWORD [4+esp],edx
mov DWORD [8+esp],ebp
mov edi,eax
lea ebp,[(L$_vpaes_consts+0x30-L$021pic_point)]
call __vpaes_preheat
L$021pic_point:
cmp ecx,0
je NEAR L$022cbc_dec_loop
jmp NEAR L$023cbc_enc_loop
align 16
L$023cbc_enc_loop:
movdqu xmm0,[esi]
pxor xmm0,xmm1
call __vpaes_encrypt_core
mov ebx,DWORD [esp]
mov edx,DWORD [4+esp]
movdqa xmm1,xmm0
movdqu [esi*1+ebx],xmm0
lea esi,[16+esi]
sub edi,16
jnc NEAR L$023cbc_enc_loop
jmp NEAR L$024cbc_done
align 16
L$022cbc_dec_loop:
movdqu xmm0,[esi]
movdqa [16+esp],xmm1
movdqa [32+esp],xmm0
call __vpaes_decrypt_core
mov ebx,DWORD [esp]
mov edx,DWORD [4+esp]
pxor xmm0,[16+esp]
movdqa xmm1,[32+esp]
movdqu [esi*1+ebx],xmm0
lea esi,[16+esi]
sub edi,16
jnc NEAR L$022cbc_dec_loop
L$024cbc_done:
mov ebx,DWORD [8+esp]
mov esp,DWORD [48+esp]
movdqu [ebx],xmm1
L$020cbc_abort:
pop edi
pop esi
pop ebx
pop ebp
ret

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,469 @@
%ifidn __OUTPUT_FORMAT__,obj
section code use32 class=code align=64
%elifidn __OUTPUT_FORMAT__,win32
%ifdef __YASM_VERSION_ID__
%if __YASM_VERSION_ID__ < 01010000h
%error yasm version 1.1.0 or later needed.
%endif
; Yasm automatically includes .00 and complains about redefining it.
; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
%else
$@feat.00 equ 1
%endif
section .text code align=64
%else
section .text code
%endif
;extern _OPENSSL_ia32cap_P
global _bn_mul_mont
align 16
_bn_mul_mont:
L$_bn_mul_mont_begin:
push ebp
push ebx
push esi
push edi
xor eax,eax
mov edi,DWORD [40+esp]
cmp edi,4
jl NEAR L$000just_leave
lea esi,[20+esp]
lea edx,[24+esp]
mov ebp,esp
add edi,2
neg edi
lea esp,[edi*4+esp-32]
neg edi
mov eax,esp
sub eax,edx
and eax,2047
sub esp,eax
xor edx,esp
and edx,2048
xor edx,2048
sub esp,edx
and esp,-64
mov eax,DWORD [esi]
mov ebx,DWORD [4+esi]
mov ecx,DWORD [8+esi]
mov edx,DWORD [12+esi]
mov esi,DWORD [16+esi]
mov esi,DWORD [esi]
mov DWORD [4+esp],eax
mov DWORD [8+esp],ebx
mov DWORD [12+esp],ecx
mov DWORD [16+esp],edx
mov DWORD [20+esp],esi
lea ebx,[edi-3]
mov DWORD [24+esp],ebp
lea eax,[_OPENSSL_ia32cap_P]
bt DWORD [eax],26
jnc NEAR L$001non_sse2
mov eax,-1
movd mm7,eax
mov esi,DWORD [8+esp]
mov edi,DWORD [12+esp]
mov ebp,DWORD [16+esp]
xor edx,edx
xor ecx,ecx
movd mm4,DWORD [edi]
movd mm5,DWORD [esi]
movd mm3,DWORD [ebp]
pmuludq mm5,mm4
movq mm2,mm5
movq mm0,mm5
pand mm0,mm7
pmuludq mm5,[20+esp]
pmuludq mm3,mm5
paddq mm3,mm0
movd mm1,DWORD [4+ebp]
movd mm0,DWORD [4+esi]
psrlq mm2,32
psrlq mm3,32
inc ecx
align 16
L$0021st:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
paddq mm3,mm1
movq mm0,mm2
pand mm0,mm7
movd mm1,DWORD [4+ecx*4+ebp]
paddq mm3,mm0
movd mm0,DWORD [4+ecx*4+esi]
psrlq mm2,32
movd DWORD [28+ecx*4+esp],mm3
psrlq mm3,32
lea ecx,[1+ecx]
cmp ecx,ebx
jl NEAR L$0021st
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
paddq mm3,mm1
movq mm0,mm2
pand mm0,mm7
paddq mm3,mm0
movd DWORD [28+ecx*4+esp],mm3
psrlq mm2,32
psrlq mm3,32
paddq mm3,mm2
movq [32+ebx*4+esp],mm3
inc edx
L$003outer:
xor ecx,ecx
movd mm4,DWORD [edx*4+edi]
movd mm5,DWORD [esi]
movd mm6,DWORD [32+esp]
movd mm3,DWORD [ebp]
pmuludq mm5,mm4
paddq mm5,mm6
movq mm0,mm5
movq mm2,mm5
pand mm0,mm7
pmuludq mm5,[20+esp]
pmuludq mm3,mm5
paddq mm3,mm0
movd mm6,DWORD [36+esp]
movd mm1,DWORD [4+ebp]
movd mm0,DWORD [4+esi]
psrlq mm2,32
psrlq mm3,32
paddq mm2,mm6
inc ecx
dec ebx
L$004inner:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
paddq mm3,mm1
movq mm0,mm2
movd mm6,DWORD [36+ecx*4+esp]
pand mm0,mm7
movd mm1,DWORD [4+ecx*4+ebp]
paddq mm3,mm0
movd mm0,DWORD [4+ecx*4+esi]
psrlq mm2,32
movd DWORD [28+ecx*4+esp],mm3
psrlq mm3,32
paddq mm2,mm6
dec ebx
lea ecx,[1+ecx]
jnz NEAR L$004inner
mov ebx,ecx
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
paddq mm3,mm1
movq mm0,mm2
pand mm0,mm7
paddq mm3,mm0
movd DWORD [28+ecx*4+esp],mm3
psrlq mm2,32
psrlq mm3,32
movd mm6,DWORD [36+ebx*4+esp]
paddq mm3,mm2
paddq mm3,mm6
movq [32+ebx*4+esp],mm3
lea edx,[1+edx]
cmp edx,ebx
jle NEAR L$003outer
emms
jmp NEAR L$005common_tail
align 16
L$001non_sse2:
mov esi,DWORD [8+esp]
lea ebp,[1+ebx]
mov edi,DWORD [12+esp]
xor ecx,ecx
mov edx,esi
and ebp,1
sub edx,edi
lea eax,[4+ebx*4+edi]
or ebp,edx
mov edi,DWORD [edi]
jz NEAR L$006bn_sqr_mont
mov DWORD [28+esp],eax
mov eax,DWORD [esi]
xor edx,edx
align 16
L$007mull:
mov ebp,edx
mul edi
add ebp,eax
lea ecx,[1+ecx]
adc edx,0
mov eax,DWORD [ecx*4+esi]
cmp ecx,ebx
mov DWORD [28+ecx*4+esp],ebp
jl NEAR L$007mull
mov ebp,edx
mul edi
mov edi,DWORD [20+esp]
add eax,ebp
mov esi,DWORD [16+esp]
adc edx,0
imul edi,DWORD [32+esp]
mov DWORD [32+ebx*4+esp],eax
xor ecx,ecx
mov DWORD [36+ebx*4+esp],edx
mov DWORD [40+ebx*4+esp],ecx
mov eax,DWORD [esi]
mul edi
add eax,DWORD [32+esp]
mov eax,DWORD [4+esi]
adc edx,0
inc ecx
jmp NEAR L$0082ndmadd
align 16
L$0091stmadd:
mov ebp,edx
mul edi
add ebp,DWORD [32+ecx*4+esp]
lea ecx,[1+ecx]
adc edx,0
add ebp,eax
mov eax,DWORD [ecx*4+esi]
adc edx,0
cmp ecx,ebx
mov DWORD [28+ecx*4+esp],ebp
jl NEAR L$0091stmadd
mov ebp,edx
mul edi
add eax,DWORD [32+ebx*4+esp]
mov edi,DWORD [20+esp]
adc edx,0
mov esi,DWORD [16+esp]
add ebp,eax
adc edx,0
imul edi,DWORD [32+esp]
xor ecx,ecx
add edx,DWORD [36+ebx*4+esp]
mov DWORD [32+ebx*4+esp],ebp
adc ecx,0
mov eax,DWORD [esi]
mov DWORD [36+ebx*4+esp],edx
mov DWORD [40+ebx*4+esp],ecx
mul edi
add eax,DWORD [32+esp]
mov eax,DWORD [4+esi]
adc edx,0
mov ecx,1
align 16
L$0082ndmadd:
mov ebp,edx
mul edi
add ebp,DWORD [32+ecx*4+esp]
lea ecx,[1+ecx]
adc edx,0
add ebp,eax
mov eax,DWORD [ecx*4+esi]
adc edx,0
cmp ecx,ebx
mov DWORD [24+ecx*4+esp],ebp
jl NEAR L$0082ndmadd
mov ebp,edx
mul edi
add ebp,DWORD [32+ebx*4+esp]
adc edx,0
add ebp,eax
adc edx,0
mov DWORD [28+ebx*4+esp],ebp
xor eax,eax
mov ecx,DWORD [12+esp]
add edx,DWORD [36+ebx*4+esp]
adc eax,DWORD [40+ebx*4+esp]
lea ecx,[4+ecx]
mov DWORD [32+ebx*4+esp],edx
cmp ecx,DWORD [28+esp]
mov DWORD [36+ebx*4+esp],eax
je NEAR L$005common_tail
mov edi,DWORD [ecx]
mov esi,DWORD [8+esp]
mov DWORD [12+esp],ecx
xor ecx,ecx
xor edx,edx
mov eax,DWORD [esi]
jmp NEAR L$0091stmadd
align 16
L$006bn_sqr_mont:
mov DWORD [esp],ebx
mov DWORD [12+esp],ecx
mov eax,edi
mul edi
mov DWORD [32+esp],eax
mov ebx,edx
shr edx,1
and ebx,1
inc ecx
align 16
L$010sqr:
mov eax,DWORD [ecx*4+esi]
mov ebp,edx
mul edi
add eax,ebp
lea ecx,[1+ecx]
adc edx,0
lea ebp,[eax*2+ebx]
shr eax,31
cmp ecx,DWORD [esp]
mov ebx,eax
mov DWORD [28+ecx*4+esp],ebp
jl NEAR L$010sqr
mov eax,DWORD [ecx*4+esi]
mov ebp,edx
mul edi
add eax,ebp
mov edi,DWORD [20+esp]
adc edx,0
mov esi,DWORD [16+esp]
lea ebp,[eax*2+ebx]
imul edi,DWORD [32+esp]
shr eax,31
mov DWORD [32+ecx*4+esp],ebp
lea ebp,[edx*2+eax]
mov eax,DWORD [esi]
shr edx,31
mov DWORD [36+ecx*4+esp],ebp
mov DWORD [40+ecx*4+esp],edx
mul edi
add eax,DWORD [32+esp]
mov ebx,ecx
adc edx,0
mov eax,DWORD [4+esi]
mov ecx,1
align 16
L$0113rdmadd:
mov ebp,edx
mul edi
add ebp,DWORD [32+ecx*4+esp]
adc edx,0
add ebp,eax
mov eax,DWORD [4+ecx*4+esi]
adc edx,0
mov DWORD [28+ecx*4+esp],ebp
mov ebp,edx
mul edi
add ebp,DWORD [36+ecx*4+esp]
lea ecx,[2+ecx]
adc edx,0
add ebp,eax
mov eax,DWORD [ecx*4+esi]
adc edx,0
cmp ecx,ebx
mov DWORD [24+ecx*4+esp],ebp
jl NEAR L$0113rdmadd
mov ebp,edx
mul edi
add ebp,DWORD [32+ebx*4+esp]
adc edx,0
add ebp,eax
adc edx,0
mov DWORD [28+ebx*4+esp],ebp
mov ecx,DWORD [12+esp]
xor eax,eax
mov esi,DWORD [8+esp]
add edx,DWORD [36+ebx*4+esp]
adc eax,DWORD [40+ebx*4+esp]
mov DWORD [32+ebx*4+esp],edx
cmp ecx,ebx
mov DWORD [36+ebx*4+esp],eax
je NEAR L$005common_tail
mov edi,DWORD [4+ecx*4+esi]
lea ecx,[1+ecx]
mov eax,edi
mov DWORD [12+esp],ecx
mul edi
add eax,DWORD [32+ecx*4+esp]
adc edx,0
mov DWORD [32+ecx*4+esp],eax
xor ebp,ebp
cmp ecx,ebx
lea ecx,[1+ecx]
je NEAR L$012sqrlast
mov ebx,edx
shr edx,1
and ebx,1
align 16
L$013sqradd:
mov eax,DWORD [ecx*4+esi]
mov ebp,edx
mul edi
add eax,ebp
lea ebp,[eax*1+eax]
adc edx,0
shr eax,31
add ebp,DWORD [32+ecx*4+esp]
lea ecx,[1+ecx]
adc eax,0
add ebp,ebx
adc eax,0
cmp ecx,DWORD [esp]
mov DWORD [28+ecx*4+esp],ebp
mov ebx,eax
jle NEAR L$013sqradd
mov ebp,edx
add edx,edx
shr ebp,31
add edx,ebx
adc ebp,0
L$012sqrlast:
mov edi,DWORD [20+esp]
mov esi,DWORD [16+esp]
imul edi,DWORD [32+esp]
add edx,DWORD [32+ecx*4+esp]
mov eax,DWORD [esi]
adc ebp,0
mov DWORD [32+ecx*4+esp],edx
mov DWORD [36+ecx*4+esp],ebp
mul edi
add eax,DWORD [32+esp]
lea ebx,[ecx-1]
adc edx,0
mov ecx,1
mov eax,DWORD [4+esi]
jmp NEAR L$0113rdmadd
align 16
L$005common_tail:
mov ebp,DWORD [16+esp]
mov edi,DWORD [4+esp]
lea esi,[32+esp]
mov eax,DWORD [esi]
mov ecx,ebx
xor edx,edx
align 16
L$014sub:
sbb eax,DWORD [edx*4+ebp]
mov DWORD [edx*4+edi],eax
dec ecx
mov eax,DWORD [4+edx*4+esi]
lea edx,[1+edx]
jge NEAR L$014sub
sbb eax,0
align 16
L$015copy:
mov edx,DWORD [ebx*4+esi]
mov ebp,DWORD [ebx*4+edi]
xor edx,ebp
and edx,eax
xor edx,ebp
mov DWORD [ebx*4+esi],ecx
mov DWORD [ebx*4+edi],edx
dec ebx
jge NEAR L$015copy
mov esp,DWORD [24+esp]
mov eax,1
L$000just_leave:
pop edi
pop esi
pop ebx
pop ebp
ret
db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
db 111,114,103,62,0
segment .bss
common _OPENSSL_ia32cap_P 16

View File

@ -0,0 +1,691 @@
%ifidn __OUTPUT_FORMAT__,obj
section code use32 class=code align=64
%elifidn __OUTPUT_FORMAT__,win32
%ifdef __YASM_VERSION_ID__
%if __YASM_VERSION_ID__ < 01010000h
%error yasm version 1.1.0 or later needed.
%endif
; Yasm automatically includes .00 and complains about redefining it.
; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
%else
$@feat.00 equ 1
%endif
section .text code align=64
%else
section .text code
%endif
global _md5_block_asm_data_order
align 16
_md5_block_asm_data_order:
L$_md5_block_asm_data_order_begin:
push esi
push edi
mov edi,DWORD [12+esp]
mov esi,DWORD [16+esp]
mov ecx,DWORD [20+esp]
push ebp
shl ecx,6
push ebx
add ecx,esi
sub ecx,64
mov eax,DWORD [edi]
push ecx
mov ebx,DWORD [4+edi]
mov ecx,DWORD [8+edi]
mov edx,DWORD [12+edi]
L$000start:
;
; R0 section
mov edi,ecx
mov ebp,DWORD [esi]
; R0 0
xor edi,edx
and edi,ebx
lea eax,[3614090360+ebp*1+eax]
xor edi,edx
add eax,edi
mov edi,ebx
rol eax,7
mov ebp,DWORD [4+esi]
add eax,ebx
; R0 1
xor edi,ecx
and edi,eax
lea edx,[3905402710+ebp*1+edx]
xor edi,ecx
add edx,edi
mov edi,eax
rol edx,12
mov ebp,DWORD [8+esi]
add edx,eax
; R0 2
xor edi,ebx
and edi,edx
lea ecx,[606105819+ebp*1+ecx]
xor edi,ebx
add ecx,edi
mov edi,edx
rol ecx,17
mov ebp,DWORD [12+esi]
add ecx,edx
; R0 3
xor edi,eax
and edi,ecx
lea ebx,[3250441966+ebp*1+ebx]
xor edi,eax
add ebx,edi
mov edi,ecx
rol ebx,22
mov ebp,DWORD [16+esi]
add ebx,ecx
; R0 4
xor edi,edx
and edi,ebx
lea eax,[4118548399+ebp*1+eax]
xor edi,edx
add eax,edi
mov edi,ebx
rol eax,7
mov ebp,DWORD [20+esi]
add eax,ebx
; R0 5
xor edi,ecx
and edi,eax
lea edx,[1200080426+ebp*1+edx]
xor edi,ecx
add edx,edi
mov edi,eax
rol edx,12
mov ebp,DWORD [24+esi]
add edx,eax
; R0 6
xor edi,ebx
and edi,edx
lea ecx,[2821735955+ebp*1+ecx]
xor edi,ebx
add ecx,edi
mov edi,edx
rol ecx,17
mov ebp,DWORD [28+esi]
add ecx,edx
; R0 7
xor edi,eax
and edi,ecx
lea ebx,[4249261313+ebp*1+ebx]
xor edi,eax
add ebx,edi
mov edi,ecx
rol ebx,22
mov ebp,DWORD [32+esi]
add ebx,ecx
; R0 8
xor edi,edx
and edi,ebx
lea eax,[1770035416+ebp*1+eax]
xor edi,edx
add eax,edi
mov edi,ebx
rol eax,7
mov ebp,DWORD [36+esi]
add eax,ebx
; R0 9
xor edi,ecx
and edi,eax
lea edx,[2336552879+ebp*1+edx]
xor edi,ecx
add edx,edi
mov edi,eax
rol edx,12
mov ebp,DWORD [40+esi]
add edx,eax
; R0 10
xor edi,ebx
and edi,edx
lea ecx,[4294925233+ebp*1+ecx]
xor edi,ebx
add ecx,edi
mov edi,edx
rol ecx,17
mov ebp,DWORD [44+esi]
add ecx,edx
; R0 11
xor edi,eax
and edi,ecx
lea ebx,[2304563134+ebp*1+ebx]
xor edi,eax
add ebx,edi
mov edi,ecx
rol ebx,22
mov ebp,DWORD [48+esi]
add ebx,ecx
; R0 12
xor edi,edx
and edi,ebx
lea eax,[1804603682+ebp*1+eax]
xor edi,edx
add eax,edi
mov edi,ebx
rol eax,7
mov ebp,DWORD [52+esi]
add eax,ebx
; R0 13
xor edi,ecx
and edi,eax
lea edx,[4254626195+ebp*1+edx]
xor edi,ecx
add edx,edi
mov edi,eax
rol edx,12
mov ebp,DWORD [56+esi]
add edx,eax
; R0 14
xor edi,ebx
and edi,edx
lea ecx,[2792965006+ebp*1+ecx]
xor edi,ebx
add ecx,edi
mov edi,edx
rol ecx,17
mov ebp,DWORD [60+esi]
add ecx,edx
; R0 15
xor edi,eax
and edi,ecx
lea ebx,[1236535329+ebp*1+ebx]
xor edi,eax
add ebx,edi
mov edi,ecx
rol ebx,22
mov ebp,DWORD [4+esi]
add ebx,ecx
;
; R1 section
; R1 16
lea eax,[4129170786+ebp*1+eax]
xor edi,ebx
and edi,edx
mov ebp,DWORD [24+esi]
xor edi,ecx
add eax,edi
mov edi,ebx
rol eax,5
add eax,ebx
; R1 17
lea edx,[3225465664+ebp*1+edx]
xor edi,eax
and edi,ecx
mov ebp,DWORD [44+esi]
xor edi,ebx
add edx,edi
mov edi,eax
rol edx,9
add edx,eax
; R1 18
lea ecx,[643717713+ebp*1+ecx]
xor edi,edx
and edi,ebx
mov ebp,DWORD [esi]
xor edi,eax
add ecx,edi
mov edi,edx
rol ecx,14
add ecx,edx
; R1 19
lea ebx,[3921069994+ebp*1+ebx]
xor edi,ecx
and edi,eax
mov ebp,DWORD [20+esi]
xor edi,edx
add ebx,edi
mov edi,ecx
rol ebx,20
add ebx,ecx
; R1 20
lea eax,[3593408605+ebp*1+eax]
xor edi,ebx
and edi,edx
mov ebp,DWORD [40+esi]
xor edi,ecx
add eax,edi
mov edi,ebx
rol eax,5
add eax,ebx
; R1 21
lea edx,[38016083+ebp*1+edx]
xor edi,eax
and edi,ecx
mov ebp,DWORD [60+esi]
xor edi,ebx
add edx,edi
mov edi,eax
rol edx,9
add edx,eax
; R1 22
lea ecx,[3634488961+ebp*1+ecx]
xor edi,edx
and edi,ebx
mov ebp,DWORD [16+esi]
xor edi,eax
add ecx,edi
mov edi,edx
rol ecx,14
add ecx,edx
; R1 23
lea ebx,[3889429448+ebp*1+ebx]
xor edi,ecx
and edi,eax
mov ebp,DWORD [36+esi]
xor edi,edx
add ebx,edi
mov edi,ecx
rol ebx,20
add ebx,ecx
; R1 24
lea eax,[568446438+ebp*1+eax]
xor edi,ebx
and edi,edx
mov ebp,DWORD [56+esi]
xor edi,ecx
add eax,edi
mov edi,ebx
rol eax,5
add eax,ebx
; R1 25
lea edx,[3275163606+ebp*1+edx]
xor edi,eax
and edi,ecx
mov ebp,DWORD [12+esi]
xor edi,ebx
add edx,edi
mov edi,eax
rol edx,9
add edx,eax
; R1 26
lea ecx,[4107603335+ebp*1+ecx]
xor edi,edx
and edi,ebx
mov ebp,DWORD [32+esi]
xor edi,eax
add ecx,edi
mov edi,edx
rol ecx,14
add ecx,edx
; R1 27
lea ebx,[1163531501+ebp*1+ebx]
xor edi,ecx
and edi,eax
mov ebp,DWORD [52+esi]
xor edi,edx
add ebx,edi
mov edi,ecx
rol ebx,20
add ebx,ecx
; R1 28
lea eax,[2850285829+ebp*1+eax]
xor edi,ebx
and edi,edx
mov ebp,DWORD [8+esi]
xor edi,ecx
add eax,edi
mov edi,ebx
rol eax,5
add eax,ebx
; R1 29
lea edx,[4243563512+ebp*1+edx]
xor edi,eax
and edi,ecx
mov ebp,DWORD [28+esi]
xor edi,ebx
add edx,edi
mov edi,eax
rol edx,9
add edx,eax
; R1 30
lea ecx,[1735328473+ebp*1+ecx]
xor edi,edx
and edi,ebx
mov ebp,DWORD [48+esi]
xor edi,eax
add ecx,edi
mov edi,edx
rol ecx,14
add ecx,edx
; R1 31
lea ebx,[2368359562+ebp*1+ebx]
xor edi,ecx
and edi,eax
mov ebp,DWORD [20+esi]
xor edi,edx
add ebx,edi
mov edi,ecx
rol ebx,20
add ebx,ecx
;
; R2 section
; R2 32
xor edi,edx
xor edi,ebx
lea eax,[4294588738+ebp*1+eax]
add eax,edi
rol eax,4
mov ebp,DWORD [32+esi]
mov edi,ebx
; R2 33
lea edx,[2272392833+ebp*1+edx]
add eax,ebx
xor edi,ecx
xor edi,eax
mov ebp,DWORD [44+esi]
add edx,edi
mov edi,eax
rol edx,11
add edx,eax
; R2 34
xor edi,ebx
xor edi,edx
lea ecx,[1839030562+ebp*1+ecx]
add ecx,edi
rol ecx,16
mov ebp,DWORD [56+esi]
mov edi,edx
; R2 35
lea ebx,[4259657740+ebp*1+ebx]
add ecx,edx
xor edi,eax
xor edi,ecx
mov ebp,DWORD [4+esi]
add ebx,edi
mov edi,ecx
rol ebx,23
add ebx,ecx
; R2 36
xor edi,edx
xor edi,ebx
lea eax,[2763975236+ebp*1+eax]
add eax,edi
rol eax,4
mov ebp,DWORD [16+esi]
mov edi,ebx
; R2 37
lea edx,[1272893353+ebp*1+edx]
add eax,ebx
xor edi,ecx
xor edi,eax
mov ebp,DWORD [28+esi]
add edx,edi
mov edi,eax
rol edx,11
add edx,eax
; R2 38
xor edi,ebx
xor edi,edx
lea ecx,[4139469664+ebp*1+ecx]
add ecx,edi
rol ecx,16
mov ebp,DWORD [40+esi]
mov edi,edx
; R2 39
lea ebx,[3200236656+ebp*1+ebx]
add ecx,edx
xor edi,eax
xor edi,ecx
mov ebp,DWORD [52+esi]
add ebx,edi
mov edi,ecx
rol ebx,23
add ebx,ecx
; R2 40
xor edi,edx
xor edi,ebx
lea eax,[681279174+ebp*1+eax]
add eax,edi
rol eax,4
mov ebp,DWORD [esi]
mov edi,ebx
; R2 41
lea edx,[3936430074+ebp*1+edx]
add eax,ebx
xor edi,ecx
xor edi,eax
mov ebp,DWORD [12+esi]
add edx,edi
mov edi,eax
rol edx,11
add edx,eax
; R2 42
xor edi,ebx
xor edi,edx
lea ecx,[3572445317+ebp*1+ecx]
add ecx,edi
rol ecx,16
mov ebp,DWORD [24+esi]
mov edi,edx
; R2 43
lea ebx,[76029189+ebp*1+ebx]
add ecx,edx
xor edi,eax
xor edi,ecx
mov ebp,DWORD [36+esi]
add ebx,edi
mov edi,ecx
rol ebx,23
add ebx,ecx
; R2 44
xor edi,edx
xor edi,ebx
lea eax,[3654602809+ebp*1+eax]
add eax,edi
rol eax,4
mov ebp,DWORD [48+esi]
mov edi,ebx
; R2 45
lea edx,[3873151461+ebp*1+edx]
add eax,ebx
xor edi,ecx
xor edi,eax
mov ebp,DWORD [60+esi]
add edx,edi
mov edi,eax
rol edx,11
add edx,eax
; R2 46
xor edi,ebx
xor edi,edx
lea ecx,[530742520+ebp*1+ecx]
add ecx,edi
rol ecx,16
mov ebp,DWORD [8+esi]
mov edi,edx
; R2 47
lea ebx,[3299628645+ebp*1+ebx]
add ecx,edx
xor edi,eax
xor edi,ecx
mov ebp,DWORD [esi]
add ebx,edi
mov edi,-1
rol ebx,23
add ebx,ecx
;
; R3 section
; R3 48
xor edi,edx
or edi,ebx
lea eax,[4096336452+ebp*1+eax]
xor edi,ecx
mov ebp,DWORD [28+esi]
add eax,edi
mov edi,-1
rol eax,6
xor edi,ecx
add eax,ebx
; R3 49
or edi,eax
lea edx,[1126891415+ebp*1+edx]
xor edi,ebx
mov ebp,DWORD [56+esi]
add edx,edi
mov edi,-1
rol edx,10
xor edi,ebx
add edx,eax
; R3 50
or edi,edx
lea ecx,[2878612391+ebp*1+ecx]
xor edi,eax
mov ebp,DWORD [20+esi]
add ecx,edi
mov edi,-1
rol ecx,15
xor edi,eax
add ecx,edx
; R3 51
or edi,ecx
lea ebx,[4237533241+ebp*1+ebx]
xor edi,edx
mov ebp,DWORD [48+esi]
add ebx,edi
mov edi,-1
rol ebx,21
xor edi,edx
add ebx,ecx
; R3 52
or edi,ebx
lea eax,[1700485571+ebp*1+eax]
xor edi,ecx
mov ebp,DWORD [12+esi]
add eax,edi
mov edi,-1
rol eax,6
xor edi,ecx
add eax,ebx
; R3 53
or edi,eax
lea edx,[2399980690+ebp*1+edx]
xor edi,ebx
mov ebp,DWORD [40+esi]
add edx,edi
mov edi,-1
rol edx,10
xor edi,ebx
add edx,eax
; R3 54
or edi,edx
lea ecx,[4293915773+ebp*1+ecx]
xor edi,eax
mov ebp,DWORD [4+esi]
add ecx,edi
mov edi,-1
rol ecx,15
xor edi,eax
add ecx,edx
; R3 55
or edi,ecx
lea ebx,[2240044497+ebp*1+ebx]
xor edi,edx
mov ebp,DWORD [32+esi]
add ebx,edi
mov edi,-1
rol ebx,21
xor edi,edx
add ebx,ecx
; R3 56
or edi,ebx
lea eax,[1873313359+ebp*1+eax]
xor edi,ecx
mov ebp,DWORD [60+esi]
add eax,edi
mov edi,-1
rol eax,6
xor edi,ecx
add eax,ebx
; R3 57
or edi,eax
lea edx,[4264355552+ebp*1+edx]
xor edi,ebx
mov ebp,DWORD [24+esi]
add edx,edi
mov edi,-1
rol edx,10
xor edi,ebx
add edx,eax
; R3 58
or edi,edx
lea ecx,[2734768916+ebp*1+ecx]
xor edi,eax
mov ebp,DWORD [52+esi]
add ecx,edi
mov edi,-1
rol ecx,15
xor edi,eax
add ecx,edx
; R3 59
or edi,ecx
lea ebx,[1309151649+ebp*1+ebx]
xor edi,edx
mov ebp,DWORD [16+esi]
add ebx,edi
mov edi,-1
rol ebx,21
xor edi,edx
add ebx,ecx
; R3 60
or edi,ebx
lea eax,[4149444226+ebp*1+eax]
xor edi,ecx
mov ebp,DWORD [44+esi]
add eax,edi
mov edi,-1
rol eax,6
xor edi,ecx
add eax,ebx
; R3 61
or edi,eax
lea edx,[3174756917+ebp*1+edx]
xor edi,ebx
mov ebp,DWORD [8+esi]
add edx,edi
mov edi,-1
rol edx,10
xor edi,ebx
add edx,eax
; R3 62
or edi,edx
lea ecx,[718787259+ebp*1+ecx]
xor edi,eax
mov ebp,DWORD [36+esi]
add ecx,edi
mov edi,-1
rol ecx,15
xor edi,eax
add ecx,edx
; R3 63
or edi,ecx
lea ebx,[3951481745+ebp*1+ebx]
xor edi,edx
mov ebp,DWORD [24+esp]
add ebx,edi
add esi,64
rol ebx,21
mov edi,DWORD [ebp]
add ebx,ecx
add eax,edi
mov edi,DWORD [4+ebp]
add ebx,edi
mov edi,DWORD [8+ebp]
add ecx,edi
mov edi,DWORD [12+ebp]
add edx,edi
mov DWORD [ebp],eax
mov DWORD [4+ebp],ebx
mov edi,DWORD [esp]
mov DWORD [8+ebp],ecx
mov DWORD [12+ebp],edx
cmp edi,esi
jae NEAR L$000start
pop eax
pop ebx
pop ebp
pop edi
pop esi
ret

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,382 @@
%ifidn __OUTPUT_FORMAT__,obj
section code use32 class=code align=64
%elifidn __OUTPUT_FORMAT__,win32
%ifdef __YASM_VERSION_ID__
%if __YASM_VERSION_ID__ < 01010000h
%error yasm version 1.1.0 or later needed.
%endif
; Yasm automatically includes .00 and complains about redefining it.
; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
%else
$@feat.00 equ 1
%endif
section .text code align=64
%else
section .text code
%endif
;extern _OPENSSL_ia32cap_P
global _asm_RC4
align 16
_asm_RC4:
L$_asm_RC4_begin:
push ebp
push ebx
push esi
push edi
mov edi,DWORD [20+esp]
mov edx,DWORD [24+esp]
mov esi,DWORD [28+esp]
mov ebp,DWORD [32+esp]
xor eax,eax
xor ebx,ebx
cmp edx,0
je NEAR L$000abort
mov al,BYTE [edi]
mov bl,BYTE [4+edi]
add edi,8
lea ecx,[edx*1+esi]
sub ebp,esi
mov DWORD [24+esp],ecx
inc al
cmp DWORD [256+edi],-1
je NEAR L$001RC4_CHAR
mov ecx,DWORD [eax*4+edi]
and edx,-4
jz NEAR L$002loop1
mov DWORD [32+esp],ebp
test edx,-8
jz NEAR L$003go4loop4
lea ebp,[_OPENSSL_ia32cap_P]
bt DWORD [ebp],26
jnc NEAR L$003go4loop4
mov ebp,DWORD [32+esp]
and edx,-8
lea edx,[edx*1+esi-8]
mov DWORD [edi-4],edx
add bl,cl
mov edx,DWORD [ebx*4+edi]
mov DWORD [ebx*4+edi],ecx
mov DWORD [eax*4+edi],edx
inc eax
add edx,ecx
movzx eax,al
movzx edx,dl
movq mm0,[esi]
mov ecx,DWORD [eax*4+edi]
movd mm2,DWORD [edx*4+edi]
jmp NEAR L$004loop_mmx_enter
align 16
L$005loop_mmx:
add bl,cl
psllq mm1,56
mov edx,DWORD [ebx*4+edi]
mov DWORD [ebx*4+edi],ecx
mov DWORD [eax*4+edi],edx
inc eax
add edx,ecx
movzx eax,al
movzx edx,dl
pxor mm2,mm1
movq mm0,[esi]
movq [esi*1+ebp-8],mm2
mov ecx,DWORD [eax*4+edi]
movd mm2,DWORD [edx*4+edi]
L$004loop_mmx_enter:
add bl,cl
mov edx,DWORD [ebx*4+edi]
mov DWORD [ebx*4+edi],ecx
mov DWORD [eax*4+edi],edx
inc eax
add edx,ecx
movzx eax,al
movzx edx,dl
pxor mm2,mm0
mov ecx,DWORD [eax*4+edi]
movd mm1,DWORD [edx*4+edi]
add bl,cl
psllq mm1,8
mov edx,DWORD [ebx*4+edi]
mov DWORD [ebx*4+edi],ecx
mov DWORD [eax*4+edi],edx
inc eax
add edx,ecx
movzx eax,al
movzx edx,dl
pxor mm2,mm1
mov ecx,DWORD [eax*4+edi]
movd mm1,DWORD [edx*4+edi]
add bl,cl
psllq mm1,16
mov edx,DWORD [ebx*4+edi]
mov DWORD [ebx*4+edi],ecx
mov DWORD [eax*4+edi],edx
inc eax
add edx,ecx
movzx eax,al
movzx edx,dl
pxor mm2,mm1
mov ecx,DWORD [eax*4+edi]
movd mm1,DWORD [edx*4+edi]
add bl,cl
psllq mm1,24
mov edx,DWORD [ebx*4+edi]
mov DWORD [ebx*4+edi],ecx
mov DWORD [eax*4+edi],edx
inc eax
add edx,ecx
movzx eax,al
movzx edx,dl
pxor mm2,mm1
mov ecx,DWORD [eax*4+edi]
movd mm1,DWORD [edx*4+edi]
add bl,cl
psllq mm1,32
mov edx,DWORD [ebx*4+edi]
mov DWORD [ebx*4+edi],ecx
mov DWORD [eax*4+edi],edx
inc eax
add edx,ecx
movzx eax,al
movzx edx,dl
pxor mm2,mm1
mov ecx,DWORD [eax*4+edi]
movd mm1,DWORD [edx*4+edi]
add bl,cl
psllq mm1,40
mov edx,DWORD [ebx*4+edi]
mov DWORD [ebx*4+edi],ecx
mov DWORD [eax*4+edi],edx
inc eax
add edx,ecx
movzx eax,al
movzx edx,dl
pxor mm2,mm1
mov ecx,DWORD [eax*4+edi]
movd mm1,DWORD [edx*4+edi]
add bl,cl
psllq mm1,48
mov edx,DWORD [ebx*4+edi]
mov DWORD [ebx*4+edi],ecx
mov DWORD [eax*4+edi],edx
inc eax
add edx,ecx
movzx eax,al
movzx edx,dl
pxor mm2,mm1
mov ecx,DWORD [eax*4+edi]
movd mm1,DWORD [edx*4+edi]
mov edx,ebx
xor ebx,ebx
mov bl,dl
cmp esi,DWORD [edi-4]
lea esi,[8+esi]
jb NEAR L$005loop_mmx
psllq mm1,56
pxor mm2,mm1
movq [esi*1+ebp-8],mm2
emms
cmp esi,DWORD [24+esp]
je NEAR L$006done
jmp NEAR L$002loop1
align 16
L$003go4loop4:
lea edx,[edx*1+esi-4]
mov DWORD [28+esp],edx
L$007loop4:
add bl,cl
mov edx,DWORD [ebx*4+edi]
mov DWORD [ebx*4+edi],ecx
mov DWORD [eax*4+edi],edx
add edx,ecx
inc al
and edx,255
mov ecx,DWORD [eax*4+edi]
mov ebp,DWORD [edx*4+edi]
add bl,cl
mov edx,DWORD [ebx*4+edi]
mov DWORD [ebx*4+edi],ecx
mov DWORD [eax*4+edi],edx
add edx,ecx
inc al
and edx,255
ror ebp,8
mov ecx,DWORD [eax*4+edi]
or ebp,DWORD [edx*4+edi]
add bl,cl
mov edx,DWORD [ebx*4+edi]
mov DWORD [ebx*4+edi],ecx
mov DWORD [eax*4+edi],edx
add edx,ecx
inc al
and edx,255
ror ebp,8
mov ecx,DWORD [eax*4+edi]
or ebp,DWORD [edx*4+edi]
add bl,cl
mov edx,DWORD [ebx*4+edi]
mov DWORD [ebx*4+edi],ecx
mov DWORD [eax*4+edi],edx
add edx,ecx
inc al
and edx,255
ror ebp,8
mov ecx,DWORD [32+esp]
or ebp,DWORD [edx*4+edi]
ror ebp,8
xor ebp,DWORD [esi]
cmp esi,DWORD [28+esp]
mov DWORD [esi*1+ecx],ebp
lea esi,[4+esi]
mov ecx,DWORD [eax*4+edi]
jb NEAR L$007loop4
cmp esi,DWORD [24+esp]
je NEAR L$006done
mov ebp,DWORD [32+esp]
align 16
L$002loop1:
add bl,cl
mov edx,DWORD [ebx*4+edi]
mov DWORD [ebx*4+edi],ecx
mov DWORD [eax*4+edi],edx
add edx,ecx
inc al
and edx,255
mov edx,DWORD [edx*4+edi]
xor dl,BYTE [esi]
lea esi,[1+esi]
mov ecx,DWORD [eax*4+edi]
cmp esi,DWORD [24+esp]
mov BYTE [esi*1+ebp-1],dl
jb NEAR L$002loop1
jmp NEAR L$006done
align 16
L$001RC4_CHAR:
movzx ecx,BYTE [eax*1+edi]
L$008cloop1:
add bl,cl
movzx edx,BYTE [ebx*1+edi]
mov BYTE [ebx*1+edi],cl
mov BYTE [eax*1+edi],dl
add dl,cl
movzx edx,BYTE [edx*1+edi]
add al,1
xor dl,BYTE [esi]
lea esi,[1+esi]
movzx ecx,BYTE [eax*1+edi]
cmp esi,DWORD [24+esp]
mov BYTE [esi*1+ebp-1],dl
jb NEAR L$008cloop1
L$006done:
dec al
mov DWORD [edi-4],ebx
mov BYTE [edi-8],al
L$000abort:
pop edi
pop esi
pop ebx
pop ebp
ret
global _asm_RC4_set_key
align 16
_asm_RC4_set_key:
L$_asm_RC4_set_key_begin:
push ebp
push ebx
push esi
push edi
mov edi,DWORD [20+esp]
mov ebp,DWORD [24+esp]
mov esi,DWORD [28+esp]
lea edx,[_OPENSSL_ia32cap_P]
lea edi,[8+edi]
lea esi,[ebp*1+esi]
neg ebp
xor eax,eax
mov DWORD [edi-4],ebp
bt DWORD [edx],20
jc NEAR L$009c1stloop
align 16
L$010w1stloop:
mov DWORD [eax*4+edi],eax
add al,1
jnc NEAR L$010w1stloop
xor ecx,ecx
xor edx,edx
align 16
L$011w2ndloop:
mov eax,DWORD [ecx*4+edi]
add dl,BYTE [ebp*1+esi]
add dl,al
add ebp,1
mov ebx,DWORD [edx*4+edi]
jnz NEAR L$012wnowrap
mov ebp,DWORD [edi-4]
L$012wnowrap:
mov DWORD [edx*4+edi],eax
mov DWORD [ecx*4+edi],ebx
add cl,1
jnc NEAR L$011w2ndloop
jmp NEAR L$013exit
align 16
L$009c1stloop:
mov BYTE [eax*1+edi],al
add al,1
jnc NEAR L$009c1stloop
xor ecx,ecx
xor edx,edx
xor ebx,ebx
align 16
L$014c2ndloop:
mov al,BYTE [ecx*1+edi]
add dl,BYTE [ebp*1+esi]
add dl,al
add ebp,1
mov bl,BYTE [edx*1+edi]
jnz NEAR L$015cnowrap
mov ebp,DWORD [edi-4]
L$015cnowrap:
mov BYTE [edx*1+edi],al
mov BYTE [ecx*1+edi],bl
add cl,1
jnc NEAR L$014c2ndloop
mov DWORD [256+edi],-1
L$013exit:
xor eax,eax
mov DWORD [edi-8],eax
mov DWORD [edi-4],eax
pop edi
pop esi
pop ebx
pop ebp
ret
global _RC4_options
align 16
_RC4_options:
L$_RC4_options_begin:
call L$016pic_point
L$016pic_point:
pop eax
lea eax,[(L$017opts-L$016pic_point)+eax]
lea edx,[_OPENSSL_ia32cap_P]
mov edx,DWORD [edx]
bt edx,20
jc NEAR L$0181xchar
bt edx,26
jnc NEAR L$019ret
add eax,25
ret
L$0181xchar:
add eax,12
L$019ret:
ret
align 64
L$017opts:
db 114,99,52,40,52,120,44,105,110,116,41,0
db 114,99,52,40,49,120,44,99,104,97,114,41,0
db 114,99,52,40,56,120,44,109,109,120,41,0
db 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89
db 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
db 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
align 64
segment .bss
common _OPENSSL_ia32cap_P 16

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More