diff --git a/configure.ac b/configure.ac index 23c00e573..c61505198 100644 --- a/configure.ac +++ b/configure.ac @@ -576,6 +576,15 @@ then ENABLED_FASTMATH="yes" fi +if test "$host_cpu" = "x86_64" || test "$host_cpu" = "amd64" +then + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_X86_64_BUILD" +fi +if test "$host_cpu" = "x86" +then + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_X86_BUILD" +fi + # if sp-math-all is not set, then enable fast math if test "x$ENABLED_FASTMATH" = "xyes" && test "$enable_sp_math_all" = "" && test "$enable_sp_math" = "" then @@ -594,11 +603,6 @@ then ENABLED_HEAPMATH="no" ENABLED_SP_MATH_ALL="no" fi - if test "$host_cpu" = "x86_64" || test "$host_cpu" = "amd64" - then - # Have settings.h set FP_MAX_BITS higher if user didn't set directly - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_X86_64_BUILD" - fi AS_IF([test "x$host_cpu" = "xaarch64"],[AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_AARCH64_BUILD"]) if test "$ENABLED_SAKKE" = "yes" && test "$ENABLED_SAKKE_SMALL" != "yes" @@ -2313,6 +2317,16 @@ then AM_CFLAGS="$AM_CFLAGS -DUSE_INTEL_SPEEDUP" ENABLED_AESNI=yes fi + + if test "$host_cpu" = "x86_64" || test "$host_cpu" = "amd64" + then + AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_X86_64_BUILD" + fi + if test "$host_cpu" = "x86" + then + AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_X86_BUILD" + ENABLED_X86_ASM=yes + fi fi AC_ARG_ENABLE([aligndata], @@ -6732,7 +6746,7 @@ if test "$ENABLED_SP_MATH_ALL" = "yes" && test "$ENABLED_ASM" != "no"; then case $host_cpu in *x86_64* | *amd64*) - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_X86_64 -DWOLFSSL_X86_64_BUILD" + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_X86_64" ;; *x86*) AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_X86" @@ -6817,10 +6831,6 @@ if test "$ENABLED_SP_ASM" = "yes" && test "$ENABLED_SP" = "yes"; then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_X86_64_ASM" AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_X86_64_ASM" ENABLED_SP_X86_64_ASM=yes - if test "x$ENABLED_FASTMATH" = "xno" - then - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_X86_64_BUILD" - fi ;; *) AC_MSG_ERROR([ASM not available for CPU. Supported CPUs: x86_64, aarch64, arm]) @@ -8097,6 +8107,7 @@ AM_CONDITIONAL([BUILD_ARMASM_CRYPTO],[test "x$ENABLED_ARMASM_CRYPTO" = "xyes"]) AM_CONDITIONAL([BUILD_XILINX],[test "x$ENABLED_XILINX" = "xyes"]) AM_CONDITIONAL([BUILD_AESNI],[test "x$ENABLED_AESNI" = "xyes"]) AM_CONDITIONAL([BUILD_INTELASM],[test "x$ENABLED_INTELASM" = "xyes"]) +AM_CONDITIONAL([BUILD_X86_ASM],[test "x$ENABLED_X86_ASM" = "xyes"]) AM_CONDITIONAL([BUILD_AFALG],[test "x$ENABLED_AFALG" = "xyes"]) AM_CONDITIONAL([BUILD_KCAPI],[test "x$ENABLED_KCAPI" = "xyes"]) AM_CONDITIONAL([BUILD_DEVCRYPTO],[test "x$ENABLED_DEVCRYPTO" = "xyes"]) diff --git a/src/include.am b/src/include.am index 807f43e41..bfb0ed3f7 100644 --- a/src/include.am +++ b/src/include.am @@ -118,8 +118,12 @@ endif if BUILD_AESNI src_libwolfssl_la_SOURCES += wolfcrypt/src/aes_asm.S +if BUILD_X86_ASM +src_libwolfssl_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S +else src_libwolfssl_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S endif +endif if BUILD_DES3 src_libwolfssl_la_SOURCES += wolfcrypt/src/des3.c @@ -210,8 +214,12 @@ endif if BUILD_AESNI src_libwolfssl_la_SOURCES += wolfcrypt/src/aes_asm.S +if BUILD_X86_ASM +src_libwolfssl_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S +else src_libwolfssl_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S endif +endif if BUILD_SHA src_libwolfssl_la_SOURCES += wolfcrypt/src/sha.c @@ -552,9 +560,13 @@ endif if !BUILD_FIPS_CURRENT if BUILD_AESNI src_libwolfssl_la_SOURCES += wolfcrypt/src/aes_asm.S +if BUILD_X86_ASM +src_libwolfssl_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S +else src_libwolfssl_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S endif endif +endif if BUILD_CAMELLIA src_libwolfssl_la_SOURCES += wolfcrypt/src/camellia.c diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index a4ab8c1e0..8cf8d7184 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -738,7 +738,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits XASM_LINK("AES_CBC_encrypt"); #ifdef HAVE_AES_DECRYPT - #if defined(WOLFSSL_AESNI_BY4) + #if defined(WOLFSSL_AESNI_BY4) || defined(WOLFSSL_X86_BUILD) void AES_CBC_decrypt_by4(const unsigned char* in, unsigned char* out, unsigned char* ivec, unsigned long length, const unsigned char* KS, int nr) @@ -4191,7 +4191,7 @@ int wc_AesSetIV(Aes* aes, const byte* iv) /* if input and output same will overwrite input iv */ XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); SAVE_VECTOR_REGISTERS(return _svr_ret;); - #if defined(WOLFSSL_AESNI_BY4) + #if defined(WOLFSSL_AESNI_BY4) || defined(WOLFSSL_X86_BUILD) AES_CBC_decrypt_by4(in, out, (byte*)aes->reg, sz, (byte*)aes->key, aes->rounds); #elif defined(WOLFSSL_AESNI_BY6) @@ -7867,7 +7867,7 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, } else #endif - #ifdef HAVE_INTEL_AVX1 + #if defined(HAVE_INTEL_AVX1) if (IS_INTEL_AVX1(intel_flags)) { SAVE_VECTOR_REGISTERS(return _svr_ret;); AES_GCM_encrypt_avx1(in, out, authIn, iv, authTag, sz, authInSz, ivSz, @@ -8414,7 +8414,7 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, } else #endif - #ifdef HAVE_INTEL_AVX1 + #if defined(HAVE_INTEL_AVX1) if (IS_INTEL_AVX1(intel_flags)) { SAVE_VECTOR_REGISTERS(return _svr_ret;); AES_GCM_decrypt_avx1(in, out, authIn, iv, authTag, sz, authInSz, ivSz, @@ -9035,7 +9035,7 @@ static WARN_UNUSED_RESULT int AesGcmEncryptFinal_aesni( extern "C" { #endif -/* Assembly code implementations in: aes_gcm_asm.S */ +/* Assembly code implementations in: aes_gcm_asm.S and aes_gcm_x86_asm.S */ #ifdef HAVE_INTEL_AVX2 extern void AES_GCM_decrypt_update_avx2(const unsigned char* key, int nr, unsigned char* out, const unsigned char* in, unsigned int nbytes, diff --git a/wolfcrypt/src/aes_asm.S b/wolfcrypt/src/aes_asm.S index e12f4d27c..f5037f209 100644 --- a/wolfcrypt/src/aes_asm.S +++ b/wolfcrypt/src/aes_asm.S @@ -27,6 +27,7 @@ * by Intel Mobility Group, Israel Development Center, Israel Shay Gueron */ +#ifdef WOLFSSL_X86_64_BUILD /* AES_CBC_encrypt (const unsigned char *in, @@ -1333,6 +1334,893 @@ pxor %xmm4, %xmm3 pxor %xmm2, %xmm3 ret +#elif defined WOLFSSL_X86_BUILD + +/* +AES_CBC_encrypt (const unsigned char *in, + unsigned char *out, + unsigned char ivec[16], + unsigned long length, + const unsigned char *KS, + int nr) +*/ +#ifndef __APPLE__ +.globl AES_CBC_encrypt +AES_CBC_encrypt: +#else +.globl _AES_CBC_encrypt +_AES_CBC_encrypt: +#endif + # parameter 1: stack[4] => %edi + # parameter 2: stack[8] => %esi + # parameter 3: stack[12] => %edx + # parameter 4: stack[16] => %ecx + # parameter 5: stack[20] => %eax + # parameter 6: stack[24] => %ebx + push %edi + push %esi + push %ebx + push %ebp + movl 20(%esp), %edi + movl 24(%esp), %esi + movl 28(%esp), %edx + movl 32(%esp), %ecx + movl 36(%esp), %eax + movl 40(%esp), %ebx + + movl %ecx, %ebp + shrl $4, %ecx + shll $60, %ebp + je NO_PARTS + addl $1, %ecx + NO_PARTS: + subl $16, %esi + movdqa (%edx), %xmm1 + LOOP: + pxor (%edi), %xmm1 + pxor (%eax), %xmm1 + addl $16,%esi + addl $16,%edi + cmpl $12, %ebx + aesenc 16(%eax),%xmm1 + aesenc 32(%eax),%xmm1 + aesenc 48(%eax),%xmm1 + aesenc 64(%eax),%xmm1 + aesenc 80(%eax),%xmm1 + aesenc 96(%eax),%xmm1 + aesenc 112(%eax),%xmm1 + aesenc 128(%eax),%xmm1 + aesenc 144(%eax),%xmm1 + movdqa 160(%eax),%xmm2 + jb LAST + cmpl $14, %ebx + + aesenc 160(%eax),%xmm1 + aesenc 176(%eax),%xmm1 + movdqa 192(%eax),%xmm2 + jb LAST + aesenc 192(%eax),%xmm1 + aesenc 208(%eax),%xmm1 + movdqa 224(%eax),%xmm2 + LAST: + decl %ecx + aesenclast %xmm2,%xmm1 + movdqu %xmm1,(%esi) + jne LOOP + + pop %ebp + pop %ebx + pop %esi + pop %edi + ret + + +/* +AES_CBC_decrypt_by4 (const unsigned char *in, + unsigned char *out, + unsigned char ivec[16], + unsigned long length, + const unsigned char *KS, + int nr) +*/ +#ifndef __APPLE__ +.globl AES_CBC_decrypt_by4 +AES_CBC_decrypt_by4: +#else +.globl _AES_CBC_decrypt_by4 +_AES_CBC_decrypt_by4: +#endif +# parameter 1: stack[4] => %edi +# parameter 2: stack[8] => %esi +# parameter 3: stack[12] => %edx +# parameter 4: stack[16] => %ecx +# parameter 5: stack[20] => %eax +# parameter 6: stack[24] => %ebx + push %edi + push %esi + push %ebx + push %ebp + movl 20(%esp), %edi + movl 24(%esp), %esi + movl 28(%esp), %edx + movl 32(%esp), %ecx + movl 36(%esp), %eax + movl 40(%esp), %ebx + subl $16, %esp + + movdqu (%edx), %xmm0 + movl %ecx, %ebp + shrl $4, %ecx + shll $60, %ebp + movdqu %xmm0, (%esp) + je DNO_PARTS_4 + addl $1, %ecx +DNO_PARTS_4: + movl %ecx, %ebp + shll $62, %ebp + shrl $62, %ebp + shrl $2, %ecx + je DREMAINDER_4 + subl $64, %esi +DLOOP_4: + movdqu (%edi), %xmm1 + movdqu 16(%edi), %xmm2 + movdqu 32(%edi), %xmm3 + movdqu 48(%edi), %xmm4 + movdqa (%eax), %xmm5 + movdqa 16(%eax), %xmm6 + movdqa 32(%eax), %xmm7 + movdqa 48(%eax), %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm5, %xmm2 + pxor %xmm5, %xmm3 + pxor %xmm5, %xmm4 + aesdec %xmm6, %xmm1 + aesdec %xmm6, %xmm2 + aesdec %xmm6, %xmm3 + aesdec %xmm6, %xmm4 + aesdec %xmm7, %xmm1 + aesdec %xmm7, %xmm2 + aesdec %xmm7, %xmm3 + aesdec %xmm7, %xmm4 + aesdec %xmm0, %xmm1 + aesdec %xmm0, %xmm2 + aesdec %xmm0, %xmm3 + aesdec %xmm0, %xmm4 + movdqa 64(%eax), %xmm5 + movdqa 80(%eax), %xmm6 + movdqa 96(%eax), %xmm7 + movdqa 112(%eax), %xmm0 + aesdec %xmm5, %xmm1 + aesdec %xmm5, %xmm2 + aesdec %xmm5, %xmm3 + aesdec %xmm5, %xmm4 + aesdec %xmm6, %xmm1 + aesdec %xmm6, %xmm2 + aesdec %xmm6, %xmm3 + aesdec %xmm6, %xmm4 + aesdec %xmm7, %xmm1 + aesdec %xmm7, %xmm2 + aesdec %xmm7, %xmm3 + aesdec %xmm7, %xmm4 + aesdec %xmm0, %xmm1 + aesdec %xmm0, %xmm2 + aesdec %xmm0, %xmm3 + aesdec %xmm0, %xmm4 + movdqa 128(%eax), %xmm5 + movdqa 144(%eax), %xmm6 + movdqa 160(%eax), %xmm7 + cmpl $12, %ebx + aesdec %xmm5, %xmm1 + aesdec %xmm5, %xmm2 + aesdec %xmm5, %xmm3 + aesdec %xmm5, %xmm4 + aesdec %xmm6, %xmm1 + aesdec %xmm6, %xmm2 + aesdec %xmm6, %xmm3 + aesdec %xmm6, %xmm4 + jb DLAST_4 + movdqa 160(%eax), %xmm5 + movdqa 176(%eax), %xmm6 + movdqa 192(%eax), %xmm7 + cmpl $14, %ebx + aesdec %xmm5, %xmm1 + aesdec %xmm5, %xmm2 + aesdec %xmm5, %xmm3 + aesdec %xmm5, %xmm4 + aesdec %xmm6, %xmm1 + aesdec %xmm6, %xmm2 + aesdec %xmm6, %xmm3 + aesdec %xmm6, %xmm4 + jb DLAST_4 + movdqa 192(%eax), %xmm5 + movdqa 208(%eax), %xmm6 + movdqa 224(%eax), %xmm7 + aesdec %xmm5, %xmm1 + aesdec %xmm5, %xmm2 + aesdec %xmm5, %xmm3 + aesdec %xmm5, %xmm4 + aesdec %xmm6, %xmm1 + aesdec %xmm6, %xmm2 + aesdec %xmm6, %xmm3 + aesdec %xmm6, %xmm4 +DLAST_4: + addl $64, %esi + aesdeclast %xmm7, %xmm1 + aesdeclast %xmm7, %xmm2 + aesdeclast %xmm7, %xmm3 + aesdeclast %xmm7, %xmm4 + movdqu (%esp), %xmm0 + movdqu (%edi), %xmm5 + movdqu 16(%edi), %xmm6 + movdqu 32(%edi), %xmm7 + pxor %xmm0, %xmm1 + pxor %xmm5, %xmm2 + pxor %xmm6, %xmm3 + pxor %xmm7, %xmm4 + movdqu 48(%edi), %xmm0 + movdqu %xmm1, (%esi) + movdqu %xmm2, 16(%esi) + movdqu %xmm3, 32(%esi) + movdqu %xmm4, 48(%esi) + movdqu %xmm0, (%esp) + addl $64, %edi + decl %ecx + jne DLOOP_4 + addl $64, %esi +DREMAINDER_4: + cmpl $0, %ebp + je DEND_4 +DLOOP_4_2: + movdqu (%edi), %xmm1 + movdqa %xmm1, %xmm5 + addl $16, %edi + pxor (%eax), %xmm1 + movdqu 160(%eax), %xmm2 + cmpl $12, %ebx + aesdec 16(%eax), %xmm1 + aesdec 32(%eax), %xmm1 + aesdec 48(%eax), %xmm1 + aesdec 64(%eax), %xmm1 + aesdec 80(%eax), %xmm1 + aesdec 96(%eax), %xmm1 + aesdec 112(%eax), %xmm1 + aesdec 128(%eax), %xmm1 + aesdec 144(%eax), %xmm1 + jb DLAST_4_2 + movdqu 192(%eax), %xmm2 + cmpl $14, %ebx + aesdec 160(%eax), %xmm1 + aesdec 176(%eax), %xmm1 + jb DLAST_4_2 + movdqu 224(%eax), %xmm2 + aesdec 192(%eax), %xmm1 + aesdec 208(%eax), %xmm1 +DLAST_4_2: + aesdeclast %xmm2, %xmm1 + pxor %xmm0, %xmm1 + movdqa %xmm5, %xmm0 + movdqu %xmm1, (%esi) + addl $16, %esi + decl %ebp + jne DLOOP_4_2 +DEND_4: + + addl $16, %esp + pop %ebp + pop %ebx + pop %esi + pop %edi + ret + +/* +AES_ECB_encrypt (const unsigned char *in, + unsigned char *out, + unsigned long length, + const unsigned char *KS, + int nr) +*/ +#ifndef __APPLE__ +.globl AES_ECB_encrypt +AES_ECB_encrypt: +#else +.globl _AES_ECB_encrypt +_AES_ECB_encrypt: +#endif +# parameter 1: stack[4] => %edi +# parameter 2: stack[8] => %esi +# parameter 3: stack[12] => %edx +# parameter 4: stack[16] => %ecx +# parameter 5: stack[20] => %eax + push %edi + push %esi + push %ebx + movl 16(%esp), %edi + movl 20(%esp), %esi + movl 24(%esp), %edx + movl 28(%esp), %ecx + movl 32(%esp), %eax + + movl %edx, %ebx + shrl $4, %edx + shll $60, %ebx + je EECB_NO_PARTS_4 + addl $1, %edx +EECB_NO_PARTS_4: + movl %edx, %ebx + shll $62, %ebx + shrl $62, %ebx + shrl $2, %edx + je EECB_REMAINDER_4 + subl $64, %esi +EECB_LOOP_4: + movdqu (%edi), %xmm1 + movdqu 16(%edi), %xmm2 + movdqu 32(%edi), %xmm3 + movdqu 48(%edi), %xmm4 + movdqa (%ecx), %xmm5 + movdqa 16(%ecx), %xmm6 + movdqa 32(%ecx), %xmm7 + movdqa 48(%ecx), %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm5, %xmm2 + pxor %xmm5, %xmm3 + pxor %xmm5, %xmm4 + aesenc %xmm6, %xmm1 + aesenc %xmm6, %xmm2 + aesenc %xmm6, %xmm3 + aesenc %xmm6, %xmm4 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + aesenc %xmm7, %xmm4 + aesenc %xmm0, %xmm1 + aesenc %xmm0, %xmm2 + aesenc %xmm0, %xmm3 + aesenc %xmm0, %xmm4 + movdqa 64(%ecx), %xmm5 + movdqa 80(%ecx), %xmm6 + movdqa 96(%ecx), %xmm7 + movdqa 112(%ecx), %xmm0 + aesenc %xmm5, %xmm1 + aesenc %xmm5, %xmm2 + aesenc %xmm5, %xmm3 + aesenc %xmm5, %xmm4 + aesenc %xmm6, %xmm1 + aesenc %xmm6, %xmm2 + aesenc %xmm6, %xmm3 + aesenc %xmm6, %xmm4 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + aesenc %xmm7, %xmm4 + aesenc %xmm0, %xmm1 + aesenc %xmm0, %xmm2 + aesenc %xmm0, %xmm3 + aesenc %xmm0, %xmm4 + movdqa 128(%ecx), %xmm5 + movdqa 144(%ecx), %xmm6 + movdqa 160(%ecx), %xmm7 + cmpl $12, %eax + aesenc %xmm5, %xmm1 + aesenc %xmm5, %xmm2 + aesenc %xmm5, %xmm3 + aesenc %xmm5, %xmm4 + aesenc %xmm6, %xmm1 + aesenc %xmm6, %xmm2 + aesenc %xmm6, %xmm3 + aesenc %xmm6, %xmm4 + jb EECB_LAST_4 + movdqa 160(%ecx), %xmm5 + movdqa 176(%ecx), %xmm6 + movdqa 192(%ecx), %xmm7 + cmpl $14, %eax + aesenc %xmm5, %xmm1 + aesenc %xmm5, %xmm2 + aesenc %xmm5, %xmm3 + aesenc %xmm5, %xmm4 + aesenc %xmm6, %xmm1 + aesenc %xmm6, %xmm2 + aesenc %xmm6, %xmm3 + aesenc %xmm6, %xmm4 + jb EECB_LAST_4 + movdqa 192(%ecx), %xmm5 + movdqa 208(%ecx), %xmm6 + movdqa 224(%ecx), %xmm7 + aesenc %xmm5, %xmm1 + aesenc %xmm5, %xmm2 + aesenc %xmm5, %xmm3 + aesenc %xmm5, %xmm4 + aesenc %xmm6, %xmm1 + aesenc %xmm6, %xmm2 + aesenc %xmm6, %xmm3 + aesenc %xmm6, %xmm4 +EECB_LAST_4: + addl $64, %edi + addl $64, %esi + decl %edx + aesenclast %xmm7, %xmm1 + aesenclast %xmm7, %xmm2 + aesenclast %xmm7, %xmm3 + aesenclast %xmm7, %xmm4 + movdqu %xmm1, (%esi) + movdqu %xmm2, 16(%esi) + movdqu %xmm3, 32(%esi) + movdqu %xmm4, 48(%esi) + jne EECB_LOOP_4 + addl $64, %esi +EECB_REMAINDER_4: + cmpl $0, %ebx + je EECB_END_4 +EECB_LOOP_4_2: + movdqu (%edi), %xmm1 + addl $16, %edi + pxor (%ecx), %xmm1 + movdqu 160(%ecx), %xmm2 + aesenc 16(%ecx), %xmm1 + aesenc 32(%ecx), %xmm1 + aesenc 48(%ecx), %xmm1 + aesenc 64(%ecx), %xmm1 + aesenc 80(%ecx), %xmm1 + aesenc 96(%ecx), %xmm1 + aesenc 112(%ecx), %xmm1 + aesenc 128(%ecx), %xmm1 + aesenc 144(%ecx), %xmm1 + cmpl $12, %eax + jb EECB_LAST_4_2 + movdqu 192(%ecx), %xmm2 + aesenc 160(%ecx), %xmm1 + aesenc 176(%ecx), %xmm1 + cmpl $14, %eax + jb EECB_LAST_4_2 + movdqu 224(%ecx), %xmm2 + aesenc 192(%ecx), %xmm1 + aesenc 208(%ecx), %xmm1 +EECB_LAST_4_2: + aesenclast %xmm2, %xmm1 + movdqu %xmm1, (%esi) + addl $16, %esi + decl %ebx + jne EECB_LOOP_4_2 +EECB_END_4: + + pop %ebx + pop %esi + pop %edi + ret + + +/* +AES_ECB_decrypt (const unsigned char *in, + unsigned char *out, + unsigned long length, + const unsigned char *KS, + int nr) +*/ +#ifndef __APPLE__ +.globl AES_ECB_decrypt +AES_ECB_decrypt: +#else +.globl _AES_ECB_decrypt +_AES_ECB_decrypt: +#endif +# parameter 1: stack[4] => %edi +# parameter 2: stack[8] => %esi +# parameter 3: stack[12] => %edx +# parameter 4: stack[16] => %ecx +# parameter 5: stack[20] => %eax + push %edi + push %esi + push %ebx + movl 20(%esp), %edi + movl 24(%esp), %esi + movl 28(%esp), %edx + movl 32(%esp), %ecx + movl 36(%esp), %eax + + + movl %edx, %ebx + shrl $4, %edx + shll $60, %ebx + je DECB_NO_PARTS_4 + addl $1, %edx +DECB_NO_PARTS_4: + movl %edx, %ebx + shll $62, %ebx + shrl $62, %ebx + shrl $2, %edx + je DECB_REMAINDER_4 + subl $64, %esi +DECB_LOOP_4: + movdqu (%edi), %xmm1 + movdqu 16(%edi), %xmm2 + movdqu 32(%edi), %xmm3 + movdqu 48(%edi), %xmm4 + movdqa (%ecx), %xmm5 + movdqa 16(%ecx), %xmm6 + movdqa 32(%ecx), %xmm7 + movdqa 48(%ecx), %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm5, %xmm2 + pxor %xmm5, %xmm3 + pxor %xmm5, %xmm4 + aesdec %xmm6, %xmm1 + aesdec %xmm6, %xmm2 + aesdec %xmm6, %xmm3 + aesdec %xmm6, %xmm4 + aesdec %xmm7, %xmm1 + aesdec %xmm7, %xmm2 + aesdec %xmm7, %xmm3 + aesdec %xmm7, %xmm4 + aesdec %xmm0, %xmm1 + aesdec %xmm0, %xmm2 + aesdec %xmm0, %xmm3 + aesdec %xmm0, %xmm4 + movdqa 64(%ecx), %xmm5 + movdqa 80(%ecx), %xmm6 + movdqa 96(%ecx), %xmm7 + movdqa 112(%ecx), %xmm0 + aesdec %xmm5, %xmm1 + aesdec %xmm5, %xmm2 + aesdec %xmm5, %xmm3 + aesdec %xmm5, %xmm4 + aesdec %xmm6, %xmm1 + aesdec %xmm6, %xmm2 + aesdec %xmm6, %xmm3 + aesdec %xmm6, %xmm4 + aesdec %xmm7, %xmm1 + aesdec %xmm7, %xmm2 + aesdec %xmm7, %xmm3 + aesdec %xmm7, %xmm4 + aesdec %xmm0, %xmm1 + aesdec %xmm0, %xmm2 + aesdec %xmm0, %xmm3 + aesdec %xmm0, %xmm4 + movdqa 128(%ecx), %xmm5 + movdqa 144(%ecx), %xmm6 + movdqa 160(%ecx), %xmm7 + cmpl $12, %eax + aesdec %xmm5, %xmm1 + aesdec %xmm5, %xmm2 + aesdec %xmm5, %xmm3 + aesdec %xmm5, %xmm4 + aesdec %xmm6, %xmm1 + aesdec %xmm6, %xmm2 + aesdec %xmm6, %xmm3 + aesdec %xmm6, %xmm4 + jb DECB_LAST_4 + movdqa 160(%ecx), %xmm5 + movdqa 176(%ecx), %xmm6 + movdqa 192(%ecx), %xmm7 + cmpl $14, %eax + aesdec %xmm5, %xmm1 + aesdec %xmm5, %xmm2 + aesdec %xmm5, %xmm3 + aesdec %xmm5, %xmm4 + aesdec %xmm6, %xmm1 + aesdec %xmm6, %xmm2 + aesdec %xmm6, %xmm3 + aesdec %xmm6, %xmm4 + jb DECB_LAST_4 + movdqa 192(%ecx), %xmm5 + movdqa 208(%ecx), %xmm6 + movdqa 224(%ecx), %xmm7 + aesdec %xmm5, %xmm1 + aesdec %xmm5, %xmm2 + aesdec %xmm5, %xmm3 + aesdec %xmm5, %xmm4 + aesdec %xmm6, %xmm1 + aesdec %xmm6, %xmm2 + aesdec %xmm6, %xmm3 + aesdec %xmm6, %xmm4 +DECB_LAST_4: + addl $64, %edi + addl $64, %esi + decl %edx + aesdeclast %xmm7, %xmm1 + aesdeclast %xmm7, %xmm2 + aesdeclast %xmm7, %xmm3 + aesdeclast %xmm7, %xmm4 + movdqu %xmm1, (%esi) + movdqu %xmm2, 16(%esi) + movdqu %xmm3, 32(%esi) + movdqu %xmm4, 48(%esi) + jne DECB_LOOP_4 + addl $64, %esi +DECB_REMAINDER_4: + cmpl $0, %ebx + je DECB_END_4 +DECB_LOOP_4_2: + movdqu (%edi), %xmm1 + addl $16, %edi + pxor (%ecx), %xmm1 + movdqu 160(%ecx), %xmm2 + cmpl $12, %eax + aesdec 16(%ecx), %xmm1 + aesdec 32(%ecx), %xmm1 + aesdec 48(%ecx), %xmm1 + aesdec 64(%ecx), %xmm1 + aesdec 80(%ecx), %xmm1 + aesdec 96(%ecx), %xmm1 + aesdec 112(%ecx), %xmm1 + aesdec 128(%ecx), %xmm1 + aesdec 144(%ecx), %xmm1 + jb DECB_LAST_4_2 + cmpl $14, %eax + movdqu 192(%ecx), %xmm2 + aesdec 160(%ecx), %xmm1 + aesdec 176(%ecx), %xmm1 + jb DECB_LAST_4_2 + movdqu 224(%ecx), %xmm2 + aesdec 192(%ecx), %xmm1 + aesdec 208(%ecx), %xmm1 +DECB_LAST_4_2: + aesdeclast %xmm2, %xmm1 + movdqu %xmm1, (%esi) + addl $16, %esi + decl %ebx + jne DECB_LOOP_4_2 +DECB_END_4: + pop %ebx + pop %esi + pop %edi + ret + + + +/* +void AES_128_Key_Expansion(const unsigned char* userkey, + unsigned char* key_schedule); +*/ +.align 16,0x90 +#ifndef __APPLE__ +.globl AES_128_Key_Expansion +AES_128_Key_Expansion: +#else +.globl _AES_128_Key_Expansion +_AES_128_Key_Expansion: +#endif + # parameter 1: stack[4] => %eax + # parameter 2: stack[8] => %edx + movl 4(%esp), %eax + movl 8(%esp), %edx + + movl $10, 240(%edx) + + movdqu (%eax), %xmm1 + movdqa %xmm1, (%edx) + + +ASSISTS: + aeskeygenassist $1, %xmm1, %xmm2 + call PREPARE_ROUNDKEY_128 + movdqa %xmm1, 16(%edx) + aeskeygenassist $2, %xmm1, %xmm2 + call PREPARE_ROUNDKEY_128 + movdqa %xmm1, 32(%edx) + aeskeygenassist $4, %xmm1, %xmm2 + call PREPARE_ROUNDKEY_128 + movdqa %xmm1, 48(%edx) + aeskeygenassist $8, %xmm1, %xmm2 + call PREPARE_ROUNDKEY_128 + movdqa %xmm1, 64(%edx) + aeskeygenassist $16, %xmm1, %xmm2 + call PREPARE_ROUNDKEY_128 + movdqa %xmm1, 80(%edx) + aeskeygenassist $32, %xmm1, %xmm2 + call PREPARE_ROUNDKEY_128 + movdqa %xmm1, 96(%edx) + aeskeygenassist $64, %xmm1, %xmm2 + call PREPARE_ROUNDKEY_128 + movdqa %xmm1, 112(%edx) + aeskeygenassist $0x80, %xmm1, %xmm2 + call PREPARE_ROUNDKEY_128 + movdqa %xmm1, 128(%edx) + aeskeygenassist $0x1b, %xmm1, %xmm2 + call PREPARE_ROUNDKEY_128 + movdqa %xmm1, 144(%edx) + aeskeygenassist $0x36, %xmm1, %xmm2 + call PREPARE_ROUNDKEY_128 + movdqa %xmm1, 160(%edx) + ret + +PREPARE_ROUNDKEY_128: + pshufd $255, %xmm2, %xmm2 + movdqa %xmm1, %xmm3 + pslldq $4, %xmm3 + pxor %xmm3, %xmm1 + pslldq $4, %xmm3 + pxor %xmm3, %xmm1 + pslldq $4, %xmm3 + pxor %xmm3, %xmm1 + pxor %xmm2, %xmm1 + ret + + +/* +void AES_192_Key_Expansion (const unsigned char *userkey, + unsigned char *key) +*/ +#ifndef __APPLE__ +.globl AES_192_Key_Expansion +AES_192_Key_Expansion: +#else +.globl _AES_192_Key_Expansion +_AES_192_Key_Expansion: +#endif + # parameter 1: stack[4] => %eax + # parameter 2: stack[8] => %edx + movl 4(%esp), %eax + movl 8(%esp), %edx + + movdqu (%eax), %xmm1 + movq 16(%eax), %xmm3 + movdqa %xmm1, (%edx) + movdqa %xmm3, %xmm5 + + aeskeygenassist $0x1, %xmm3, %xmm2 + call PREPARE_ROUNDKEY_192 + shufpd $0, %xmm1, %xmm5 + movdqa %xmm5, 16(%edx) + movdqa %xmm1, %xmm6 + shufpd $1, %xmm3, %xmm6 + movdqa %xmm6, 32(%edx) + + aeskeygenassist $0x2, %xmm3, %xmm2 + call PREPARE_ROUNDKEY_192 + movdqa %xmm1, 48(%edx) + movdqa %xmm3, %xmm5 + + aeskeygenassist $0x4, %xmm3, %xmm2 + call PREPARE_ROUNDKEY_192 + shufpd $0, %xmm1, %xmm5 + movdqa %xmm5, 64(%edx) + movdqa %xmm1, %xmm6 + shufpd $1, %xmm3, %xmm6 + movdqa %xmm6, 80(%edx) + + aeskeygenassist $0x8, %xmm3, %xmm2 + call PREPARE_ROUNDKEY_192 + movdqa %xmm1, 96(%edx) + movdqa %xmm3, %xmm5 + + aeskeygenassist $0x10, %xmm3, %xmm2 + call PREPARE_ROUNDKEY_192 + shufpd $0, %xmm1, %xmm5 + movdqa %xmm5, 112(%edx) + movdqa %xmm1, %xmm6 + shufpd $1, %xmm3, %xmm6 + movdqa %xmm6, 128(%edx) + + aeskeygenassist $0x20, %xmm3, %xmm2 + call PREPARE_ROUNDKEY_192 + movdqa %xmm1, 144(%edx) + movdqa %xmm3, %xmm5 + + aeskeygenassist $0x40, %xmm3, %xmm2 + call PREPARE_ROUNDKEY_192 + shufpd $0, %xmm1, %xmm5 + movdqa %xmm5, 160(%edx) + movdqa %xmm1, %xmm6 + shufpd $1, %xmm3, %xmm6 + movdqa %xmm6, 176(%edx) + + aeskeygenassist $0x80, %xmm3, %xmm2 + call PREPARE_ROUNDKEY_192 + movdqa %xmm1, 192(%edx) + movdqa %xmm3, 208(%edx) + ret + +PREPARE_ROUNDKEY_192: + pshufd $0x55, %xmm2, %xmm2 + movdqu %xmm1, %xmm4 + pslldq $4, %xmm4 + pxor %xmm4, %xmm1 + + pslldq $4, %xmm4 + pxor %xmm4, %xmm1 + pslldq $4, %xmm4 + pxor %xmm4, %xmm1 + pxor %xmm2, %xmm1 + pshufd $0xff, %xmm1, %xmm2 + movdqu %xmm3, %xmm4 + pslldq $4, %xmm4 + pxor %xmm4, %xmm3 + pxor %xmm2, %xmm3 + ret + + +/* +void AES_256_Key_Expansion (const unsigned char *userkey, + unsigned char *key) +*/ +#ifndef __APPLE__ +.globl AES_256_Key_Expansion +AES_256_Key_Expansion: +#else +.globl _AES_256_Key_Expansion +_AES_256_Key_Expansion: +#endif + # parameter 1: stack[4] => %eax + # parameter 2: stack[8] => %edx + movl 4(%esp), %eax + movl 8(%esp), %edx + + movdqu (%eax), %xmm1 + movdqu 16(%eax), %xmm3 + movdqa %xmm1, (%edx) + movdqa %xmm3, 16(%edx) + + aeskeygenassist $0x1, %xmm3, %xmm2 + call MAKE_RK256_a + movdqa %xmm1, 32(%edx) + aeskeygenassist $0x0, %xmm1, %xmm2 + call MAKE_RK256_b + movdqa %xmm3, 48(%edx) + aeskeygenassist $0x2, %xmm3, %xmm2 + call MAKE_RK256_a + movdqa %xmm1, 64(%edx) + aeskeygenassist $0x0, %xmm1, %xmm2 + call MAKE_RK256_b + movdqa %xmm3, 80(%edx) + aeskeygenassist $0x4, %xmm3, %xmm2 + call MAKE_RK256_a + movdqa %xmm1, 96(%edx) + aeskeygenassist $0x0, %xmm1, %xmm2 + call MAKE_RK256_b + movdqa %xmm3, 112(%edx) + aeskeygenassist $0x8, %xmm3, %xmm2 + call MAKE_RK256_a + movdqa %xmm1, 128(%edx) + aeskeygenassist $0x0, %xmm1, %xmm2 + call MAKE_RK256_b + movdqa %xmm3, 144(%edx) + aeskeygenassist $0x10, %xmm3, %xmm2 + call MAKE_RK256_a + movdqa %xmm1, 160(%edx) + aeskeygenassist $0x0, %xmm1, %xmm2 + call MAKE_RK256_b + movdqa %xmm3, 176(%edx) + aeskeygenassist $0x20, %xmm3, %xmm2 + call MAKE_RK256_a + movdqa %xmm1, 192(%edx) + + aeskeygenassist $0x0, %xmm1, %xmm2 + call MAKE_RK256_b + movdqa %xmm3, 208(%edx) + aeskeygenassist $0x40, %xmm3, %xmm2 + call MAKE_RK256_a + movdqa %xmm1, 224(%edx) + + ret + +MAKE_RK256_a: + pshufd $0xff, %xmm2, %xmm2 + movdqa %xmm1, %xmm4 + pslldq $4, %xmm4 + pxor %xmm4, %xmm1 + pslldq $4, %xmm4 + pxor %xmm4, %xmm1 + pslldq $4, %xmm4 + pxor %xmm4, %xmm1 + pxor %xmm2, %xmm1 + ret + +MAKE_RK256_b: + pshufd $0xaa, %xmm2, %xmm2 + movdqa %xmm3, %xmm4 + pslldq $4, %xmm4 + pxor %xmm4, %xmm3 + pslldq $4, %xmm4 + pxor %xmm4, %xmm3 + pslldq $4, %xmm4 + pxor %xmm4, %xmm3 + pxor %xmm2, %xmm3 + ret + +#endif /* WOLFSSL_X86_64_BUILD */ + #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits #endif + diff --git a/wolfcrypt/src/aes_gcm_asm.S b/wolfcrypt/src/aes_gcm_asm.S index ad3b6be6f..d217597e7 100644 --- a/wolfcrypt/src/aes_gcm_asm.S +++ b/wolfcrypt/src/aes_gcm_asm.S @@ -30,6 +30,7 @@ #define HAVE_INTEL_AVX2 #endif /* NO_AVX2_SUPPORT */ +#ifdef WOLFSSL_X86_64_BUILD #ifndef __APPLE__ .data #else @@ -15833,6 +15834,7 @@ L_AES_GCM_decrypt_final_avx2_cmp_tag_done: #endif /* __APPLE__ */ #endif /* WOLFSSL_AESGCM_STREAM */ #endif /* HAVE_INTEL_AVX2 */ +#endif /* WOLFSSL_X86_64_BUILD */ #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/wolfcrypt/src/aes_gcm_x86_asm.S b/wolfcrypt/src/aes_gcm_x86_asm.S new file mode 100644 index 000000000..611875bcf --- /dev/null +++ b/wolfcrypt/src/aes_gcm_x86_asm.S @@ -0,0 +1,12962 @@ +/* aes_gcm_x86_asm + * + * Copyright (C) 2006-2022 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef WOLFSSL_USER_SETTINGS +#include "wolfssl/wolfcrypt/settings.h" +#endif + +#ifndef HAVE_INTEL_AVX1 +#define HAVE_INTEL_AVX1 +#endif /* HAVE_INTEL_AVX1 */ +#ifndef NO_AVX2_SUPPORT +#define HAVE_INTEL_AVX2 +#endif /* NO_AVX2_SUPPORT */ + +.type data, @object +L_aes_gcm_one: +.long 0x0,0x0,0x1,0x0 +.type data, @object +L_aes_gcm_two: +.long 0x0,0x0,0x2,0x0 +.type data, @object +L_aes_gcm_three: +.long 0x0,0x0,0x3,0x0 +.type data, @object +L_aes_gcm_four: +.long 0x0,0x0,0x4,0x0 +.type data, @object +L_aes_gcm_bswap_epi64: +.long 0x4050607,0x10203,0xc0d0e0f,0x8090a0b +.type data, @object +L_aes_gcm_bswap_mask: +.long 0xc0d0e0f,0x8090a0b,0x4050607,0x10203 +.type data, @object +L_aes_gcm_mod2_128: +.long 0x1,0x0,0x0,0xc2000000 +.type data, @object +L_aes_gcm_avx1_one: +.long 0x0,0x0,0x1,0x0 +.type data, @object +L_aes_gcm_avx1_two: +.long 0x0,0x0,0x2,0x0 +.type data, @object +L_aes_gcm_avx1_three: +.long 0x0,0x0,0x3,0x0 +.type data, @object +L_aes_gcm_avx1_four: +.long 0x0,0x0,0x4,0x0 +.type data, @object +L_aes_gcm_avx1_bswap_epi64: +.long 0x4050607,0x10203,0xc0d0e0f,0x8090a0b +.type data, @object +L_aes_gcm_avx1_bswap_mask: +.long 0xc0d0e0f,0x8090a0b,0x4050607,0x10203 +.type data, @object +L_aes_gcm_avx1_mod2_128: +.long 0x1,0x0,0x0,0xc2000000 +.type data, @object +L_aes_gcm_avx2_one: +.long 0x0,0x0,0x1,0x0 +.type data, @object +L_aes_gcm_avx2_two: +.long 0x0,0x0,0x2,0x0 +.type data, @object +L_aes_gcm_avx2_three: +.long 0x0,0x0,0x3,0x0 +.type data, @object +L_aes_gcm_avx2_four: +.long 0x0,0x0,0x4,0x0 +.type data, @object +L_avx2_aes_gcm_bswap_one: +.long 0x0,0x0,0x0,0x1000000 +.type data, @object +L_aes_gcm_avx2_bswap_epi64: +.long 0x4050607,0x10203,0xc0d0e0f,0x8090a0b +.type data, @object +L_aes_gcm_avx2_bswap_mask: +.long 0xc0d0e0f,0x8090a0b,0x4050607,0x10203 +.type data, @object +L_aes_gcm_avx2_mod2_128: +.long 0x1,0x0,0x0,0xc2000000 +.text +.globl AES_GCM_encrypt +.type AES_GCM_encrypt,@function +.align 16 +AES_GCM_encrypt: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $0x70, %esp + movl 144(%esp), %esi + movl 168(%esp), %ebp + movl 160(%esp), %edx + pxor %xmm0, %xmm0 + pxor %xmm2, %xmm2 + cmpl $12, %edx + jne L_AES_GCM_encrypt_iv_not_12 + # # Calculate values when IV is 12 bytes + # Set counter based on IV + movl $0x1000000, %ecx + pinsrd $0x00, (%esi), %xmm0 + pinsrd $0x01, 4(%esi), %xmm0 + pinsrd $2, 8(%esi), %xmm0 + pinsrd $3, %ecx, %xmm0 + # H = Encrypt X(=0) and T = Encrypt counter + movdqa %xmm0, %xmm5 + movdqa (%ebp), %xmm1 + pxor %xmm1, %xmm5 + movdqa 16(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 32(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 48(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 64(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 80(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 96(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 112(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 128(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 144(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + cmpl $11, 172(%esp) + movdqa 160(%ebp), %xmm3 + jl L_AES_GCM_encrypt_calc_iv_12_last + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 176(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + cmpl $13, 172(%esp) + movdqa 192(%ebp), %xmm3 + jl L_AES_GCM_encrypt_calc_iv_12_last + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 208(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 224(%ebp), %xmm3 +L_AES_GCM_encrypt_calc_iv_12_last: + aesenclast %xmm3, %xmm1 + aesenclast %xmm3, %xmm5 + pshufb L_aes_gcm_bswap_mask, %xmm1 + movdqu %xmm5, 80(%esp) + jmp L_AES_GCM_encrypt_iv_done +L_AES_GCM_encrypt_iv_not_12: + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + movdqa (%ebp), %xmm1 + aesenc 16(%ebp), %xmm1 + aesenc 32(%ebp), %xmm1 + aesenc 48(%ebp), %xmm1 + aesenc 64(%ebp), %xmm1 + aesenc 80(%ebp), %xmm1 + aesenc 96(%ebp), %xmm1 + aesenc 112(%ebp), %xmm1 + aesenc 128(%ebp), %xmm1 + aesenc 144(%ebp), %xmm1 + cmpl $11, 172(%esp) + movdqa 160(%ebp), %xmm5 + jl L_AES_GCM_encrypt_calc_iv_1_aesenc_avx_last + aesenc %xmm5, %xmm1 + aesenc 176(%ebp), %xmm1 + cmpl $13, 172(%esp) + movdqa 192(%ebp), %xmm5 + jl L_AES_GCM_encrypt_calc_iv_1_aesenc_avx_last + aesenc %xmm5, %xmm1 + aesenc 208(%ebp), %xmm1 + movdqa 224(%ebp), %xmm5 +L_AES_GCM_encrypt_calc_iv_1_aesenc_avx_last: + aesenclast %xmm5, %xmm1 + pshufb L_aes_gcm_bswap_mask, %xmm1 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movl $0x00, %ecx + je L_AES_GCM_encrypt_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_encrypt_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_encrypt_calc_iv_16_loop: + movdqu (%esi,%ecx,1), %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm4, %xmm0 + pshufd $0x4e, %xmm0, %xmm5 + pshufd $0x4e, %xmm1, %xmm6 + movdqa %xmm1, %xmm7 + movdqa %xmm1, %xmm4 + pclmulqdq $0x11, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm0, %xmm4 + pxor %xmm0, %xmm5 + pxor %xmm1, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm4, %xmm3 + movdqa %xmm7, %xmm0 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm3 + pxor %xmm5, %xmm0 + movdqa %xmm3, %xmm4 + movdqa %xmm0, %xmm5 + psrld $31, %xmm4 + psrld $31, %xmm5 + pslld $0x01, %xmm3 + pslld $0x01, %xmm0 + movdqa %xmm4, %xmm6 + pslldq $4, %xmm4 + psrldq $12, %xmm6 + pslldq $4, %xmm5 + por %xmm6, %xmm0 + por %xmm4, %xmm3 + por %xmm5, %xmm0 + movdqa %xmm3, %xmm4 + movdqa %xmm3, %xmm5 + movdqa %xmm3, %xmm6 + pslld $31, %xmm4 + pslld $30, %xmm5 + pslld $25, %xmm6 + pxor %xmm5, %xmm4 + pxor %xmm6, %xmm4 + movdqa %xmm4, %xmm5 + psrldq $4, %xmm5 + pslldq $12, %xmm4 + pxor %xmm4, %xmm3 + movdqa %xmm3, %xmm6 + movdqa %xmm3, %xmm7 + movdqa %xmm3, %xmm4 + psrld $0x01, %xmm6 + psrld $2, %xmm7 + psrld $7, %xmm4 + pxor %xmm7, %xmm6 + pxor %xmm4, %xmm6 + pxor %xmm5, %xmm6 + pxor %xmm3, %xmm6 + pxor %xmm6, %xmm0 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_calc_iv_16_loop + movl 160(%esp), %edx + cmpl %edx, %ecx + je L_AES_GCM_encrypt_calc_iv_done +L_AES_GCM_encrypt_calc_iv_lt16: + subl $16, %esp + pxor %xmm4, %xmm4 + xorl %ebx, %ebx + movdqu %xmm4, (%esp) +L_AES_GCM_encrypt_calc_iv_loop: + movzbl (%esi,%ecx,1), %eax + movb %al, (%esp,%ebx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_calc_iv_loop + movdqu (%esp), %xmm4 + addl $16, %esp + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm4, %xmm0 + pshufd $0x4e, %xmm0, %xmm5 + pshufd $0x4e, %xmm1, %xmm6 + movdqa %xmm1, %xmm7 + movdqa %xmm1, %xmm4 + pclmulqdq $0x11, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm0, %xmm4 + pxor %xmm0, %xmm5 + pxor %xmm1, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm4, %xmm3 + movdqa %xmm7, %xmm0 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm3 + pxor %xmm5, %xmm0 + movdqa %xmm3, %xmm4 + movdqa %xmm0, %xmm5 + psrld $31, %xmm4 + psrld $31, %xmm5 + pslld $0x01, %xmm3 + pslld $0x01, %xmm0 + movdqa %xmm4, %xmm6 + pslldq $4, %xmm4 + psrldq $12, %xmm6 + pslldq $4, %xmm5 + por %xmm6, %xmm0 + por %xmm4, %xmm3 + por %xmm5, %xmm0 + movdqa %xmm3, %xmm4 + movdqa %xmm3, %xmm5 + movdqa %xmm3, %xmm6 + pslld $31, %xmm4 + pslld $30, %xmm5 + pslld $25, %xmm6 + pxor %xmm5, %xmm4 + pxor %xmm6, %xmm4 + movdqa %xmm4, %xmm5 + psrldq $4, %xmm5 + pslldq $12, %xmm4 + pxor %xmm4, %xmm3 + movdqa %xmm3, %xmm6 + movdqa %xmm3, %xmm7 + movdqa %xmm3, %xmm4 + psrld $0x01, %xmm6 + psrld $2, %xmm7 + psrld $7, %xmm4 + pxor %xmm7, %xmm6 + pxor %xmm4, %xmm6 + pxor %xmm5, %xmm6 + pxor %xmm3, %xmm6 + pxor %xmm6, %xmm0 +L_AES_GCM_encrypt_calc_iv_done: + # T = Encrypt counter + pxor %xmm4, %xmm4 + shll $3, %edx + pinsrd $0x00, %edx, %xmm4 + pxor %xmm4, %xmm0 + pshufd $0x4e, %xmm0, %xmm5 + pshufd $0x4e, %xmm1, %xmm6 + movdqa %xmm1, %xmm7 + movdqa %xmm1, %xmm4 + pclmulqdq $0x11, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm0, %xmm4 + pxor %xmm0, %xmm5 + pxor %xmm1, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm4, %xmm3 + movdqa %xmm7, %xmm0 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm3 + pxor %xmm5, %xmm0 + movdqa %xmm3, %xmm4 + movdqa %xmm0, %xmm5 + psrld $31, %xmm4 + psrld $31, %xmm5 + pslld $0x01, %xmm3 + pslld $0x01, %xmm0 + movdqa %xmm4, %xmm6 + pslldq $4, %xmm4 + psrldq $12, %xmm6 + pslldq $4, %xmm5 + por %xmm6, %xmm0 + por %xmm4, %xmm3 + por %xmm5, %xmm0 + movdqa %xmm3, %xmm4 + movdqa %xmm3, %xmm5 + movdqa %xmm3, %xmm6 + pslld $31, %xmm4 + pslld $30, %xmm5 + pslld $25, %xmm6 + pxor %xmm5, %xmm4 + pxor %xmm6, %xmm4 + movdqa %xmm4, %xmm5 + psrldq $4, %xmm5 + pslldq $12, %xmm4 + pxor %xmm4, %xmm3 + movdqa %xmm3, %xmm6 + movdqa %xmm3, %xmm7 + movdqa %xmm3, %xmm4 + psrld $0x01, %xmm6 + psrld $2, %xmm7 + psrld $7, %xmm4 + pxor %xmm7, %xmm6 + pxor %xmm4, %xmm6 + pxor %xmm5, %xmm6 + pxor %xmm3, %xmm6 + pxor %xmm6, %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm0 + # Encrypt counter + movdqa (%ebp), %xmm4 + pxor %xmm0, %xmm4 + aesenc 16(%ebp), %xmm4 + aesenc 32(%ebp), %xmm4 + aesenc 48(%ebp), %xmm4 + aesenc 64(%ebp), %xmm4 + aesenc 80(%ebp), %xmm4 + aesenc 96(%ebp), %xmm4 + aesenc 112(%ebp), %xmm4 + aesenc 128(%ebp), %xmm4 + aesenc 144(%ebp), %xmm4 + cmpl $11, 172(%esp) + movdqa 160(%ebp), %xmm5 + jl L_AES_GCM_encrypt_calc_iv_2_aesenc_avx_last + aesenc %xmm5, %xmm4 + aesenc 176(%ebp), %xmm4 + cmpl $13, 172(%esp) + movdqa 192(%ebp), %xmm5 + jl L_AES_GCM_encrypt_calc_iv_2_aesenc_avx_last + aesenc %xmm5, %xmm4 + aesenc 208(%ebp), %xmm4 + movdqa 224(%ebp), %xmm5 +L_AES_GCM_encrypt_calc_iv_2_aesenc_avx_last: + aesenclast %xmm5, %xmm4 + movdqu %xmm4, 80(%esp) +L_AES_GCM_encrypt_iv_done: + movl 140(%esp), %esi + # Additional authentication data + movl 156(%esp), %edx + cmpl $0x00, %edx + je L_AES_GCM_encrypt_calc_aad_done + xorl %ecx, %ecx + cmpl $16, %edx + jl L_AES_GCM_encrypt_calc_aad_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_encrypt_calc_aad_16_loop: + movdqu (%esi,%ecx,1), %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm4, %xmm2 + pshufd $0x4e, %xmm2, %xmm5 + pshufd $0x4e, %xmm1, %xmm6 + movdqa %xmm1, %xmm7 + movdqa %xmm1, %xmm4 + pclmulqdq $0x11, %xmm2, %xmm7 + pclmulqdq $0x00, %xmm2, %xmm4 + pxor %xmm2, %xmm5 + pxor %xmm1, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm4, %xmm3 + movdqa %xmm7, %xmm2 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm3 + pxor %xmm5, %xmm2 + movdqa %xmm3, %xmm4 + movdqa %xmm2, %xmm5 + psrld $31, %xmm4 + psrld $31, %xmm5 + pslld $0x01, %xmm3 + pslld $0x01, %xmm2 + movdqa %xmm4, %xmm6 + pslldq $4, %xmm4 + psrldq $12, %xmm6 + pslldq $4, %xmm5 + por %xmm6, %xmm2 + por %xmm4, %xmm3 + por %xmm5, %xmm2 + movdqa %xmm3, %xmm4 + movdqa %xmm3, %xmm5 + movdqa %xmm3, %xmm6 + pslld $31, %xmm4 + pslld $30, %xmm5 + pslld $25, %xmm6 + pxor %xmm5, %xmm4 + pxor %xmm6, %xmm4 + movdqa %xmm4, %xmm5 + psrldq $4, %xmm5 + pslldq $12, %xmm4 + pxor %xmm4, %xmm3 + movdqa %xmm3, %xmm6 + movdqa %xmm3, %xmm7 + movdqa %xmm3, %xmm4 + psrld $0x01, %xmm6 + psrld $2, %xmm7 + psrld $7, %xmm4 + pxor %xmm7, %xmm6 + pxor %xmm4, %xmm6 + pxor %xmm5, %xmm6 + pxor %xmm3, %xmm6 + pxor %xmm6, %xmm2 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_calc_aad_16_loop + movl 156(%esp), %edx + cmpl %edx, %ecx + je L_AES_GCM_encrypt_calc_aad_done +L_AES_GCM_encrypt_calc_aad_lt16: + subl $16, %esp + pxor %xmm4, %xmm4 + xorl %ebx, %ebx + movdqu %xmm4, (%esp) +L_AES_GCM_encrypt_calc_aad_loop: + movzbl (%esi,%ecx,1), %eax + movb %al, (%esp,%ebx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_calc_aad_loop + movdqu (%esp), %xmm4 + addl $16, %esp + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm4, %xmm2 + pshufd $0x4e, %xmm2, %xmm5 + pshufd $0x4e, %xmm1, %xmm6 + movdqa %xmm1, %xmm7 + movdqa %xmm1, %xmm4 + pclmulqdq $0x11, %xmm2, %xmm7 + pclmulqdq $0x00, %xmm2, %xmm4 + pxor %xmm2, %xmm5 + pxor %xmm1, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm4, %xmm3 + movdqa %xmm7, %xmm2 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm3 + pxor %xmm5, %xmm2 + movdqa %xmm3, %xmm4 + movdqa %xmm2, %xmm5 + psrld $31, %xmm4 + psrld $31, %xmm5 + pslld $0x01, %xmm3 + pslld $0x01, %xmm2 + movdqa %xmm4, %xmm6 + pslldq $4, %xmm4 + psrldq $12, %xmm6 + pslldq $4, %xmm5 + por %xmm6, %xmm2 + por %xmm4, %xmm3 + por %xmm5, %xmm2 + movdqa %xmm3, %xmm4 + movdqa %xmm3, %xmm5 + movdqa %xmm3, %xmm6 + pslld $31, %xmm4 + pslld $30, %xmm5 + pslld $25, %xmm6 + pxor %xmm5, %xmm4 + pxor %xmm6, %xmm4 + movdqa %xmm4, %xmm5 + psrldq $4, %xmm5 + pslldq $12, %xmm4 + pxor %xmm4, %xmm3 + movdqa %xmm3, %xmm6 + movdqa %xmm3, %xmm7 + movdqa %xmm3, %xmm4 + psrld $0x01, %xmm6 + psrld $2, %xmm7 + psrld $7, %xmm4 + pxor %xmm7, %xmm6 + pxor %xmm4, %xmm6 + pxor %xmm5, %xmm6 + pxor %xmm3, %xmm6 + pxor %xmm6, %xmm2 +L_AES_GCM_encrypt_calc_aad_done: + movdqu %xmm2, 96(%esp) + movl 132(%esp), %esi + movl 136(%esp), %edi + # Calculate counter and H + pshufb L_aes_gcm_bswap_epi64, %xmm0 + movdqa %xmm1, %xmm5 + paddd L_aes_gcm_one, %xmm0 + movdqa %xmm1, %xmm4 + movdqu %xmm0, 64(%esp) + psrlq $63, %xmm5 + psllq $0x01, %xmm4 + pslldq $8, %xmm5 + por %xmm5, %xmm4 + pshufd $0xff, %xmm1, %xmm1 + psrad $31, %xmm1 + pand L_aes_gcm_mod2_128, %xmm1 + pxor %xmm4, %xmm1 + xorl %ebx, %ebx + movl 152(%esp), %eax + cmpl $0x40, %eax + jl L_AES_GCM_encrypt_done_64 + andl $0xffffffc0, %eax + movdqa %xmm2, %xmm6 + # H ^ 1 + movdqu %xmm1, (%esp) + # H ^ 2 + pshufd $0x4e, %xmm1, %xmm5 + pshufd $0x4e, %xmm1, %xmm6 + movdqa %xmm1, %xmm7 + movdqa %xmm1, %xmm4 + pclmulqdq $0x11, %xmm1, %xmm7 + pclmulqdq $0x00, %xmm1, %xmm4 + pxor %xmm1, %xmm5 + pxor %xmm1, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm7, %xmm0 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm4 + pxor %xmm5, %xmm0 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + movdqa %xmm4, %xmm7 + pslld $31, %xmm5 + pslld $30, %xmm6 + pslld $25, %xmm7 + pxor %xmm6, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm7 + psrldq $4, %xmm7 + pslldq $12, %xmm5 + pxor %xmm5, %xmm4 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + psrld $0x01, %xmm5 + psrld $2, %xmm6 + pxor %xmm6, %xmm5 + pxor %xmm4, %xmm5 + psrld $7, %xmm4 + pxor %xmm7, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm0 + movdqu %xmm0, 16(%esp) + # H ^ 3 + pshufd $0x4e, %xmm1, %xmm5 + pshufd $0x4e, %xmm0, %xmm6 + movdqa %xmm0, %xmm7 + movdqa %xmm0, %xmm4 + pclmulqdq $0x11, %xmm1, %xmm7 + pclmulqdq $0x00, %xmm1, %xmm4 + pxor %xmm1, %xmm5 + pxor %xmm0, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm7, %xmm3 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm4 + pxor %xmm5, %xmm3 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + movdqa %xmm4, %xmm7 + pslld $31, %xmm5 + pslld $30, %xmm6 + pslld $25, %xmm7 + pxor %xmm6, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm7 + psrldq $4, %xmm7 + pslldq $12, %xmm5 + pxor %xmm5, %xmm4 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + psrld $0x01, %xmm5 + psrld $2, %xmm6 + pxor %xmm6, %xmm5 + pxor %xmm4, %xmm5 + psrld $7, %xmm4 + pxor %xmm7, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm3 + movdqu %xmm3, 32(%esp) + # H ^ 4 + pshufd $0x4e, %xmm0, %xmm5 + pshufd $0x4e, %xmm0, %xmm6 + movdqa %xmm0, %xmm7 + movdqa %xmm0, %xmm4 + pclmulqdq $0x11, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm0, %xmm4 + pxor %xmm0, %xmm5 + pxor %xmm0, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm7, %xmm3 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm4 + pxor %xmm5, %xmm3 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + movdqa %xmm4, %xmm7 + pslld $31, %xmm5 + pslld $30, %xmm6 + pslld $25, %xmm7 + pxor %xmm6, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm7 + psrldq $4, %xmm7 + pslldq $12, %xmm5 + pxor %xmm5, %xmm4 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + psrld $0x01, %xmm5 + psrld $2, %xmm6 + pxor %xmm6, %xmm5 + pxor %xmm4, %xmm5 + psrld $7, %xmm4 + pxor %xmm7, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm3 + movdqu %xmm3, 48(%esp) + # First 64 bytes of input + # Encrypt 64 bytes of counter + movdqu 64(%esp), %xmm4 + movdqa L_aes_gcm_bswap_epi64, %xmm3 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + movdqa %xmm4, %xmm7 + pshufb %xmm3, %xmm4 + paddd L_aes_gcm_one, %xmm5 + pshufb %xmm3, %xmm5 + paddd L_aes_gcm_two, %xmm6 + pshufb %xmm3, %xmm6 + paddd L_aes_gcm_three, %xmm7 + pshufb %xmm3, %xmm7 + movdqu 64(%esp), %xmm3 + paddd L_aes_gcm_four, %xmm3 + movdqu %xmm3, 64(%esp) + movdqa (%ebp), %xmm3 + pxor %xmm3, %xmm4 + pxor %xmm3, %xmm5 + pxor %xmm3, %xmm6 + pxor %xmm3, %xmm7 + movdqa 16(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 32(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 48(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 64(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 80(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 96(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 112(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 128(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 144(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + cmpl $11, 172(%esp) + movdqa 160(%ebp), %xmm3 + jl L_AES_GCM_encrypt_enc_done + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 176(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + cmpl $13, 172(%esp) + movdqa 192(%ebp), %xmm3 + jl L_AES_GCM_encrypt_enc_done + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 208(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 224(%ebp), %xmm3 +L_AES_GCM_encrypt_enc_done: + aesenclast %xmm3, %xmm4 + aesenclast %xmm3, %xmm5 + movdqu (%esi), %xmm0 + movdqu 16(%esi), %xmm1 + pxor %xmm0, %xmm4 + pxor %xmm1, %xmm5 + movdqu %xmm4, (%edi) + movdqu %xmm5, 16(%edi) + aesenclast %xmm3, %xmm6 + aesenclast %xmm3, %xmm7 + movdqu 32(%esi), %xmm0 + movdqu 48(%esi), %xmm1 + pxor %xmm0, %xmm6 + pxor %xmm1, %xmm7 + movdqu %xmm6, 32(%edi) + movdqu %xmm7, 48(%edi) + cmpl $0x40, %eax + movl $0x40, %ebx + movl %esi, %ecx + movl %edi, %edx + jle L_AES_GCM_encrypt_end_64 + # More 64 bytes of input +L_AES_GCM_encrypt_ghash_64: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + # Encrypt 64 bytes of counter + movdqu 64(%esp), %xmm4 + movdqa L_aes_gcm_bswap_epi64, %xmm3 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + movdqa %xmm4, %xmm7 + pshufb %xmm3, %xmm4 + paddd L_aes_gcm_one, %xmm5 + pshufb %xmm3, %xmm5 + paddd L_aes_gcm_two, %xmm6 + pshufb %xmm3, %xmm6 + paddd L_aes_gcm_three, %xmm7 + pshufb %xmm3, %xmm7 + movdqu 64(%esp), %xmm3 + paddd L_aes_gcm_four, %xmm3 + movdqu %xmm3, 64(%esp) + movdqa (%ebp), %xmm3 + pxor %xmm3, %xmm4 + pxor %xmm3, %xmm5 + pxor %xmm3, %xmm6 + pxor %xmm3, %xmm7 + movdqa 16(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 32(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 48(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 64(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 80(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 96(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 112(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 128(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 144(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + cmpl $11, 172(%esp) + movdqa 160(%ebp), %xmm3 + jl L_AES_GCM_encrypt_aesenc_64_ghash_avx_done + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 176(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + cmpl $13, 172(%esp) + movdqa 192(%ebp), %xmm3 + jl L_AES_GCM_encrypt_aesenc_64_ghash_avx_done + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 208(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 224(%ebp), %xmm3 +L_AES_GCM_encrypt_aesenc_64_ghash_avx_done: + aesenclast %xmm3, %xmm4 + aesenclast %xmm3, %xmm5 + movdqu (%ecx), %xmm0 + movdqu 16(%ecx), %xmm1 + pxor %xmm0, %xmm4 + pxor %xmm1, %xmm5 + movdqu %xmm4, (%edx) + movdqu %xmm5, 16(%edx) + aesenclast %xmm3, %xmm6 + aesenclast %xmm3, %xmm7 + movdqu 32(%ecx), %xmm0 + movdqu 48(%ecx), %xmm1 + pxor %xmm0, %xmm6 + pxor %xmm1, %xmm7 + movdqu %xmm6, 32(%edx) + movdqu %xmm7, 48(%edx) + # ghash encrypted counter + movdqu 96(%esp), %xmm6 + movdqu 48(%esp), %xmm3 + movdqu -64(%edx), %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm6, %xmm4 + pshufd $0x4e, %xmm3, %xmm5 + pshufd $0x4e, %xmm4, %xmm1 + pxor %xmm3, %xmm5 + pxor %xmm4, %xmm1 + movdqa %xmm4, %xmm7 + pclmulqdq $0x11, %xmm3, %xmm7 + movdqa %xmm4, %xmm6 + pclmulqdq $0x00, %xmm3, %xmm6 + pclmulqdq $0x00, %xmm1, %xmm5 + pxor %xmm6, %xmm5 + pxor %xmm7, %xmm5 + movdqu 32(%esp), %xmm3 + movdqu -48(%edx), %xmm4 + pshufd $0x4e, %xmm3, %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm3, %xmm0 + pshufd $0x4e, %xmm4, %xmm1 + pxor %xmm4, %xmm1 + movdqa %xmm4, %xmm2 + pclmulqdq $0x11, %xmm3, %xmm2 + pclmulqdq $0x00, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm1, %xmm0 + pxor %xmm3, %xmm5 + pxor %xmm3, %xmm6 + pxor %xmm2, %xmm5 + pxor %xmm2, %xmm7 + pxor %xmm0, %xmm5 + movdqu 16(%esp), %xmm3 + movdqu -32(%edx), %xmm4 + pshufd $0x4e, %xmm3, %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm3, %xmm0 + pshufd $0x4e, %xmm4, %xmm1 + pxor %xmm4, %xmm1 + movdqa %xmm4, %xmm2 + pclmulqdq $0x11, %xmm3, %xmm2 + pclmulqdq $0x00, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm1, %xmm0 + pxor %xmm3, %xmm5 + pxor %xmm3, %xmm6 + pxor %xmm2, %xmm5 + pxor %xmm2, %xmm7 + pxor %xmm0, %xmm5 + movdqu (%esp), %xmm3 + movdqu -16(%edx), %xmm4 + pshufd $0x4e, %xmm3, %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm3, %xmm0 + pshufd $0x4e, %xmm4, %xmm1 + pxor %xmm4, %xmm1 + movdqa %xmm4, %xmm2 + pclmulqdq $0x11, %xmm3, %xmm2 + pclmulqdq $0x00, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm1, %xmm0 + pxor %xmm3, %xmm5 + pxor %xmm3, %xmm6 + pxor %xmm2, %xmm5 + pxor %xmm2, %xmm7 + pxor %xmm0, %xmm5 + movdqa %xmm5, %xmm1 + psrldq $8, %xmm5 + pslldq $8, %xmm1 + pxor %xmm1, %xmm6 + pxor %xmm5, %xmm7 + movdqa %xmm6, %xmm3 + movdqa %xmm6, %xmm0 + movdqa %xmm6, %xmm1 + pslld $31, %xmm3 + pslld $30, %xmm0 + pslld $25, %xmm1 + pxor %xmm0, %xmm3 + pxor %xmm1, %xmm3 + movdqa %xmm3, %xmm0 + pslldq $12, %xmm3 + psrldq $4, %xmm0 + pxor %xmm3, %xmm6 + movdqa %xmm6, %xmm1 + movdqa %xmm6, %xmm5 + movdqa %xmm6, %xmm4 + psrld $0x01, %xmm1 + psrld $2, %xmm5 + psrld $7, %xmm4 + pxor %xmm5, %xmm1 + pxor %xmm4, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm1, %xmm6 + pxor %xmm7, %xmm6 + movdqu %xmm6, 96(%esp) + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_encrypt_ghash_64 +L_AES_GCM_encrypt_end_64: + movdqu 96(%esp), %xmm2 + # Block 1 + movdqa L_aes_gcm_bswap_mask, %xmm4 + movdqu (%edx), %xmm1 + pshufb %xmm4, %xmm1 + movdqu 48(%esp), %xmm3 + pxor %xmm2, %xmm1 + pshufd $0x4e, %xmm1, %xmm5 + pshufd $0x4e, %xmm3, %xmm6 + movdqa %xmm3, %xmm7 + movdqa %xmm3, %xmm4 + pclmulqdq $0x11, %xmm1, %xmm7 + pclmulqdq $0x00, %xmm1, %xmm4 + pxor %xmm1, %xmm5 + pxor %xmm3, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm4, %xmm0 + movdqa %xmm7, %xmm2 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm0 + pxor %xmm5, %xmm2 + # Block 2 + movdqa L_aes_gcm_bswap_mask, %xmm4 + movdqu 16(%edx), %xmm1 + pshufb %xmm4, %xmm1 + movdqu 32(%esp), %xmm3 + pshufd $0x4e, %xmm1, %xmm5 + pshufd $0x4e, %xmm3, %xmm6 + movdqa %xmm3, %xmm7 + movdqa %xmm3, %xmm4 + pclmulqdq $0x11, %xmm1, %xmm7 + pclmulqdq $0x00, %xmm1, %xmm4 + pxor %xmm1, %xmm5 + pxor %xmm3, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + pxor %xmm4, %xmm0 + pxor %xmm7, %xmm2 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm0 + pxor %xmm5, %xmm2 + # Block 3 + movdqa L_aes_gcm_bswap_mask, %xmm4 + movdqu 32(%edx), %xmm1 + pshufb %xmm4, %xmm1 + movdqu 16(%esp), %xmm3 + pshufd $0x4e, %xmm1, %xmm5 + pshufd $0x4e, %xmm3, %xmm6 + movdqa %xmm3, %xmm7 + movdqa %xmm3, %xmm4 + pclmulqdq $0x11, %xmm1, %xmm7 + pclmulqdq $0x00, %xmm1, %xmm4 + pxor %xmm1, %xmm5 + pxor %xmm3, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + pxor %xmm4, %xmm0 + pxor %xmm7, %xmm2 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm0 + pxor %xmm5, %xmm2 + # Block 4 + movdqa L_aes_gcm_bswap_mask, %xmm4 + movdqu 48(%edx), %xmm1 + pshufb %xmm4, %xmm1 + movdqu (%esp), %xmm3 + pshufd $0x4e, %xmm1, %xmm5 + pshufd $0x4e, %xmm3, %xmm6 + movdqa %xmm3, %xmm7 + movdqa %xmm3, %xmm4 + pclmulqdq $0x11, %xmm1, %xmm7 + pclmulqdq $0x00, %xmm1, %xmm4 + pxor %xmm1, %xmm5 + pxor %xmm3, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + pxor %xmm4, %xmm0 + pxor %xmm7, %xmm2 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm0 + pxor %xmm5, %xmm2 + movdqa %xmm0, %xmm4 + movdqa %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pslld $31, %xmm4 + pslld $30, %xmm5 + pslld $25, %xmm6 + pxor %xmm5, %xmm4 + pxor %xmm6, %xmm4 + movdqa %xmm4, %xmm5 + psrldq $4, %xmm5 + pslldq $12, %xmm4 + pxor %xmm4, %xmm0 + movdqa %xmm0, %xmm6 + movdqa %xmm0, %xmm7 + movdqa %xmm0, %xmm4 + psrld $0x01, %xmm6 + psrld $2, %xmm7 + psrld $7, %xmm4 + pxor %xmm7, %xmm6 + pxor %xmm4, %xmm6 + pxor %xmm5, %xmm6 + pxor %xmm0, %xmm6 + pxor %xmm6, %xmm2 + movdqu (%esp), %xmm1 +L_AES_GCM_encrypt_done_64: + movl 152(%esp), %edx + cmpl %edx, %ebx + jge L_AES_GCM_encrypt_done_enc + movl 152(%esp), %eax + andl $0xfffffff0, %eax + cmpl %eax, %ebx + jge L_AES_GCM_encrypt_last_block_done + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + movdqu 64(%esp), %xmm4 + movdqa %xmm4, %xmm5 + pshufb L_aes_gcm_bswap_epi64, %xmm4 + paddd L_aes_gcm_one, %xmm5 + pxor (%ebp), %xmm4 + movdqu %xmm5, 64(%esp) + aesenc 16(%ebp), %xmm4 + aesenc 32(%ebp), %xmm4 + aesenc 48(%ebp), %xmm4 + aesenc 64(%ebp), %xmm4 + aesenc 80(%ebp), %xmm4 + aesenc 96(%ebp), %xmm4 + aesenc 112(%ebp), %xmm4 + aesenc 128(%ebp), %xmm4 + aesenc 144(%ebp), %xmm4 + cmpl $11, 172(%esp) + movdqa 160(%ebp), %xmm5 + jl L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last + aesenc %xmm5, %xmm4 + aesenc 176(%ebp), %xmm4 + cmpl $13, 172(%esp) + movdqa 192(%ebp), %xmm5 + jl L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last + aesenc %xmm5, %xmm4 + aesenc 208(%ebp), %xmm4 + movdqa 224(%ebp), %xmm5 +L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last: + aesenclast %xmm5, %xmm4 + movdqu (%ecx), %xmm5 + pxor %xmm5, %xmm4 + movdqu %xmm4, (%edx) + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm4, %xmm2 + addl $16, %ebx + cmpl %eax, %ebx + jge L_AES_GCM_encrypt_last_block_ghash +L_AES_GCM_encrypt_last_block_start: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + movdqu 64(%esp), %xmm4 + movdqa %xmm4, %xmm5 + pshufb L_aes_gcm_bswap_epi64, %xmm4 + paddd L_aes_gcm_one, %xmm5 + pxor (%ebp), %xmm4 + movdqu %xmm5, 64(%esp) + movdqu %xmm2, %xmm0 + pclmulqdq $16, %xmm1, %xmm0 + aesenc 16(%ebp), %xmm4 + aesenc 32(%ebp), %xmm4 + movdqu %xmm2, %xmm3 + pclmulqdq $0x01, %xmm1, %xmm3 + aesenc 48(%ebp), %xmm4 + aesenc 64(%ebp), %xmm4 + aesenc 80(%ebp), %xmm4 + movdqu %xmm2, %xmm5 + pclmulqdq $0x11, %xmm1, %xmm5 + aesenc 96(%ebp), %xmm4 + pxor %xmm3, %xmm0 + movdqa %xmm0, %xmm6 + psrldq $8, %xmm0 + pslldq $8, %xmm6 + aesenc 112(%ebp), %xmm4 + movdqu %xmm2, %xmm3 + pclmulqdq $0x00, %xmm1, %xmm3 + pxor %xmm3, %xmm6 + pxor %xmm0, %xmm5 + movdqa L_aes_gcm_mod2_128, %xmm7 + movdqa %xmm6, %xmm3 + pclmulqdq $16, %xmm7, %xmm3 + aesenc 128(%ebp), %xmm4 + pshufd $0x4e, %xmm6, %xmm0 + pxor %xmm3, %xmm0 + movdqa %xmm0, %xmm3 + pclmulqdq $16, %xmm7, %xmm3 + aesenc 144(%ebp), %xmm4 + pshufd $0x4e, %xmm0, %xmm2 + pxor %xmm3, %xmm2 + pxor %xmm5, %xmm2 + cmpl $11, 172(%esp) + movdqa 160(%ebp), %xmm5 + jl L_AES_GCM_encrypt_aesenc_gfmul_last + aesenc %xmm5, %xmm4 + aesenc 176(%ebp), %xmm4 + cmpl $13, 172(%esp) + movdqa 192(%ebp), %xmm5 + jl L_AES_GCM_encrypt_aesenc_gfmul_last + aesenc %xmm5, %xmm4 + aesenc 208(%ebp), %xmm4 + movdqa 224(%ebp), %xmm5 +L_AES_GCM_encrypt_aesenc_gfmul_last: + aesenclast %xmm5, %xmm4 + movdqu (%ecx), %xmm5 + pxor %xmm5, %xmm4 + movdqu %xmm4, (%edx) + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm4, %xmm2 + addl $16, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_encrypt_last_block_start +L_AES_GCM_encrypt_last_block_ghash: + pshufd $0x4e, %xmm1, %xmm5 + pshufd $0x4e, %xmm2, %xmm6 + movdqa %xmm2, %xmm7 + movdqa %xmm2, %xmm4 + pclmulqdq $0x11, %xmm1, %xmm7 + pclmulqdq $0x00, %xmm1, %xmm4 + pxor %xmm1, %xmm5 + pxor %xmm2, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm7, %xmm2 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm4 + pxor %xmm5, %xmm2 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + movdqa %xmm4, %xmm7 + pslld $31, %xmm5 + pslld $30, %xmm6 + pslld $25, %xmm7 + pxor %xmm6, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm7 + psrldq $4, %xmm7 + pslldq $12, %xmm5 + pxor %xmm5, %xmm4 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + psrld $0x01, %xmm5 + psrld $2, %xmm6 + pxor %xmm6, %xmm5 + pxor %xmm4, %xmm5 + psrld $7, %xmm4 + pxor %xmm7, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm2 +L_AES_GCM_encrypt_last_block_done: + movl 152(%esp), %ecx + movl %ecx, %edx + andl $15, %ecx + jz L_AES_GCM_encrypt_aesenc_last15_enc_avx_done + movdqu 64(%esp), %xmm0 + pshufb L_aes_gcm_bswap_epi64, %xmm0 + pxor (%ebp), %xmm0 + aesenc 16(%ebp), %xmm0 + aesenc 32(%ebp), %xmm0 + aesenc 48(%ebp), %xmm0 + aesenc 64(%ebp), %xmm0 + aesenc 80(%ebp), %xmm0 + aesenc 96(%ebp), %xmm0 + aesenc 112(%ebp), %xmm0 + aesenc 128(%ebp), %xmm0 + aesenc 144(%ebp), %xmm0 + cmpl $11, 172(%esp) + movdqa 160(%ebp), %xmm5 + jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_aesenc_avx_last + aesenc %xmm5, %xmm0 + aesenc 176(%ebp), %xmm0 + cmpl $13, 172(%esp) + movdqa 192(%ebp), %xmm5 + jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_aesenc_avx_last + aesenc %xmm5, %xmm0 + aesenc 208(%ebp), %xmm0 + movdqa 224(%ebp), %xmm5 +L_AES_GCM_encrypt_aesenc_last15_enc_avx_aesenc_avx_last: + aesenclast %xmm5, %xmm0 + subl $16, %esp + xorl %ecx, %ecx + movdqu %xmm0, (%esp) +L_AES_GCM_encrypt_aesenc_last15_enc_avx_loop: + movzbl (%esi,%ebx,1), %eax + xorb (%esp,%ecx,1), %al + movb %al, (%edi,%ebx,1) + movb %al, (%esp,%ecx,1) + incl %ebx + incl %ecx + cmpl %edx, %ebx + jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_loop + xorl %eax, %eax + cmpl $16, %ecx + je L_AES_GCM_encrypt_aesenc_last15_enc_avx_finish_enc +L_AES_GCM_encrypt_aesenc_last15_enc_avx_byte_loop: + movb %al, (%esp,%ecx,1) + incl %ecx + cmpl $16, %ecx + jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_byte_loop +L_AES_GCM_encrypt_aesenc_last15_enc_avx_finish_enc: + movdqu (%esp), %xmm0 + addl $16, %esp + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm0, %xmm2 + pshufd $0x4e, %xmm1, %xmm5 + pshufd $0x4e, %xmm2, %xmm6 + movdqa %xmm2, %xmm7 + movdqa %xmm2, %xmm4 + pclmulqdq $0x11, %xmm1, %xmm7 + pclmulqdq $0x00, %xmm1, %xmm4 + pxor %xmm1, %xmm5 + pxor %xmm2, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm7, %xmm2 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm4 + pxor %xmm5, %xmm2 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + movdqa %xmm4, %xmm7 + pslld $31, %xmm5 + pslld $30, %xmm6 + pslld $25, %xmm7 + pxor %xmm6, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm7 + psrldq $4, %xmm7 + pslldq $12, %xmm5 + pxor %xmm5, %xmm4 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + psrld $0x01, %xmm5 + psrld $2, %xmm6 + pxor %xmm6, %xmm5 + pxor %xmm4, %xmm5 + psrld $7, %xmm4 + pxor %xmm7, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm2 +L_AES_GCM_encrypt_aesenc_last15_enc_avx_done: +L_AES_GCM_encrypt_done_enc: + movl 148(%esp), %edi + movl 164(%esp), %ebx + movl 152(%esp), %edx + movl 156(%esp), %ecx + shll $3, %edx + shll $3, %ecx + pinsrd $0x00, %edx, %xmm4 + pinsrd $2, %ecx, %xmm4 + movl 152(%esp), %edx + movl 156(%esp), %ecx + shrl $29, %edx + shrl $29, %ecx + pinsrd $0x01, %edx, %xmm4 + pinsrd $3, %ecx, %xmm4 + pxor %xmm4, %xmm2 + pshufd $0x4e, %xmm1, %xmm5 + pshufd $0x4e, %xmm2, %xmm6 + movdqa %xmm2, %xmm7 + movdqa %xmm2, %xmm4 + pclmulqdq $0x11, %xmm1, %xmm7 + pclmulqdq $0x00, %xmm1, %xmm4 + pxor %xmm1, %xmm5 + pxor %xmm2, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm7, %xmm2 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm4 + pxor %xmm5, %xmm2 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + movdqa %xmm4, %xmm7 + pslld $31, %xmm5 + pslld $30, %xmm6 + pslld $25, %xmm7 + pxor %xmm6, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm7 + psrldq $4, %xmm7 + pslldq $12, %xmm5 + pxor %xmm5, %xmm4 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + psrld $0x01, %xmm5 + psrld $2, %xmm6 + pxor %xmm6, %xmm5 + pxor %xmm4, %xmm5 + psrld $7, %xmm4 + pxor %xmm7, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm2 + pshufb L_aes_gcm_bswap_mask, %xmm2 + movdqu 80(%esp), %xmm4 + pxor %xmm2, %xmm4 + cmpl $16, %ebx + je L_AES_GCM_encrypt_store_tag_16 + xorl %ecx, %ecx + movdqu %xmm4, (%esp) +L_AES_GCM_encrypt_store_tag_loop: + movzbl (%esp,%ecx,1), %eax + movb %al, (%edi,%ecx,1) + incl %ecx + cmpl %ebx, %ecx + jne L_AES_GCM_encrypt_store_tag_loop + jmp L_AES_GCM_encrypt_store_tag_done +L_AES_GCM_encrypt_store_tag_16: + movdqu %xmm4, (%edi) +L_AES_GCM_encrypt_store_tag_done: + addl $0x70, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_encrypt,.-AES_GCM_encrypt +.text +.globl AES_GCM_decrypt +.type AES_GCM_decrypt,@function +.align 16 +AES_GCM_decrypt: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $0xb0, %esp + movl 208(%esp), %esi + movl 232(%esp), %ebp + movl 224(%esp), %edx + pxor %xmm0, %xmm0 + pxor %xmm2, %xmm2 + cmpl $12, %edx + jne L_AES_GCM_decrypt_iv_not_12 + # # Calculate values when IV is 12 bytes + # Set counter based on IV + movl $0x1000000, %ecx + pinsrd $0x00, (%esi), %xmm0 + pinsrd $0x01, 4(%esi), %xmm0 + pinsrd $2, 8(%esi), %xmm0 + pinsrd $3, %ecx, %xmm0 + # H = Encrypt X(=0) and T = Encrypt counter + movdqa %xmm0, %xmm5 + movdqa (%ebp), %xmm1 + pxor %xmm1, %xmm5 + movdqa 16(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 32(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 48(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 64(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 80(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 96(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 112(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 128(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 144(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + cmpl $11, 236(%esp) + movdqa 160(%ebp), %xmm3 + jl L_AES_GCM_decrypt_calc_iv_12_last + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 176(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + cmpl $13, 236(%esp) + movdqa 192(%ebp), %xmm3 + jl L_AES_GCM_decrypt_calc_iv_12_last + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 208(%ebp), %xmm3 + aesenc %xmm3, %xmm1 + aesenc %xmm3, %xmm5 + movdqa 224(%ebp), %xmm3 +L_AES_GCM_decrypt_calc_iv_12_last: + aesenclast %xmm3, %xmm1 + aesenclast %xmm3, %xmm5 + pshufb L_aes_gcm_bswap_mask, %xmm1 + movdqu %xmm5, 80(%esp) + jmp L_AES_GCM_decrypt_iv_done +L_AES_GCM_decrypt_iv_not_12: + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + movdqa (%ebp), %xmm1 + aesenc 16(%ebp), %xmm1 + aesenc 32(%ebp), %xmm1 + aesenc 48(%ebp), %xmm1 + aesenc 64(%ebp), %xmm1 + aesenc 80(%ebp), %xmm1 + aesenc 96(%ebp), %xmm1 + aesenc 112(%ebp), %xmm1 + aesenc 128(%ebp), %xmm1 + aesenc 144(%ebp), %xmm1 + cmpl $11, 236(%esp) + movdqa 160(%ebp), %xmm5 + jl L_AES_GCM_decrypt_calc_iv_1_aesenc_avx_last + aesenc %xmm5, %xmm1 + aesenc 176(%ebp), %xmm1 + cmpl $13, 236(%esp) + movdqa 192(%ebp), %xmm5 + jl L_AES_GCM_decrypt_calc_iv_1_aesenc_avx_last + aesenc %xmm5, %xmm1 + aesenc 208(%ebp), %xmm1 + movdqa 224(%ebp), %xmm5 +L_AES_GCM_decrypt_calc_iv_1_aesenc_avx_last: + aesenclast %xmm5, %xmm1 + pshufb L_aes_gcm_bswap_mask, %xmm1 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movl $0x00, %ecx + je L_AES_GCM_decrypt_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_decrypt_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_decrypt_calc_iv_16_loop: + movdqu (%esi,%ecx,1), %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm4, %xmm0 + pshufd $0x4e, %xmm0, %xmm5 + pshufd $0x4e, %xmm1, %xmm6 + movdqa %xmm1, %xmm7 + movdqa %xmm1, %xmm4 + pclmulqdq $0x11, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm0, %xmm4 + pxor %xmm0, %xmm5 + pxor %xmm1, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm4, %xmm3 + movdqa %xmm7, %xmm0 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm3 + pxor %xmm5, %xmm0 + movdqa %xmm3, %xmm4 + movdqa %xmm0, %xmm5 + psrld $31, %xmm4 + psrld $31, %xmm5 + pslld $0x01, %xmm3 + pslld $0x01, %xmm0 + movdqa %xmm4, %xmm6 + pslldq $4, %xmm4 + psrldq $12, %xmm6 + pslldq $4, %xmm5 + por %xmm6, %xmm0 + por %xmm4, %xmm3 + por %xmm5, %xmm0 + movdqa %xmm3, %xmm4 + movdqa %xmm3, %xmm5 + movdqa %xmm3, %xmm6 + pslld $31, %xmm4 + pslld $30, %xmm5 + pslld $25, %xmm6 + pxor %xmm5, %xmm4 + pxor %xmm6, %xmm4 + movdqa %xmm4, %xmm5 + psrldq $4, %xmm5 + pslldq $12, %xmm4 + pxor %xmm4, %xmm3 + movdqa %xmm3, %xmm6 + movdqa %xmm3, %xmm7 + movdqa %xmm3, %xmm4 + psrld $0x01, %xmm6 + psrld $2, %xmm7 + psrld $7, %xmm4 + pxor %xmm7, %xmm6 + pxor %xmm4, %xmm6 + pxor %xmm5, %xmm6 + pxor %xmm3, %xmm6 + pxor %xmm6, %xmm0 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_calc_iv_16_loop + movl 224(%esp), %edx + cmpl %edx, %ecx + je L_AES_GCM_decrypt_calc_iv_done +L_AES_GCM_decrypt_calc_iv_lt16: + subl $16, %esp + pxor %xmm4, %xmm4 + xorl %ebx, %ebx + movdqu %xmm4, (%esp) +L_AES_GCM_decrypt_calc_iv_loop: + movzbl (%esi,%ecx,1), %eax + movb %al, (%esp,%ebx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_calc_iv_loop + movdqu (%esp), %xmm4 + addl $16, %esp + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm4, %xmm0 + pshufd $0x4e, %xmm0, %xmm5 + pshufd $0x4e, %xmm1, %xmm6 + movdqa %xmm1, %xmm7 + movdqa %xmm1, %xmm4 + pclmulqdq $0x11, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm0, %xmm4 + pxor %xmm0, %xmm5 + pxor %xmm1, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm4, %xmm3 + movdqa %xmm7, %xmm0 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm3 + pxor %xmm5, %xmm0 + movdqa %xmm3, %xmm4 + movdqa %xmm0, %xmm5 + psrld $31, %xmm4 + psrld $31, %xmm5 + pslld $0x01, %xmm3 + pslld $0x01, %xmm0 + movdqa %xmm4, %xmm6 + pslldq $4, %xmm4 + psrldq $12, %xmm6 + pslldq $4, %xmm5 + por %xmm6, %xmm0 + por %xmm4, %xmm3 + por %xmm5, %xmm0 + movdqa %xmm3, %xmm4 + movdqa %xmm3, %xmm5 + movdqa %xmm3, %xmm6 + pslld $31, %xmm4 + pslld $30, %xmm5 + pslld $25, %xmm6 + pxor %xmm5, %xmm4 + pxor %xmm6, %xmm4 + movdqa %xmm4, %xmm5 + psrldq $4, %xmm5 + pslldq $12, %xmm4 + pxor %xmm4, %xmm3 + movdqa %xmm3, %xmm6 + movdqa %xmm3, %xmm7 + movdqa %xmm3, %xmm4 + psrld $0x01, %xmm6 + psrld $2, %xmm7 + psrld $7, %xmm4 + pxor %xmm7, %xmm6 + pxor %xmm4, %xmm6 + pxor %xmm5, %xmm6 + pxor %xmm3, %xmm6 + pxor %xmm6, %xmm0 +L_AES_GCM_decrypt_calc_iv_done: + # T = Encrypt counter + pxor %xmm4, %xmm4 + shll $3, %edx + pinsrd $0x00, %edx, %xmm4 + pxor %xmm4, %xmm0 + pshufd $0x4e, %xmm0, %xmm5 + pshufd $0x4e, %xmm1, %xmm6 + movdqa %xmm1, %xmm7 + movdqa %xmm1, %xmm4 + pclmulqdq $0x11, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm0, %xmm4 + pxor %xmm0, %xmm5 + pxor %xmm1, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm4, %xmm3 + movdqa %xmm7, %xmm0 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm3 + pxor %xmm5, %xmm0 + movdqa %xmm3, %xmm4 + movdqa %xmm0, %xmm5 + psrld $31, %xmm4 + psrld $31, %xmm5 + pslld $0x01, %xmm3 + pslld $0x01, %xmm0 + movdqa %xmm4, %xmm6 + pslldq $4, %xmm4 + psrldq $12, %xmm6 + pslldq $4, %xmm5 + por %xmm6, %xmm0 + por %xmm4, %xmm3 + por %xmm5, %xmm0 + movdqa %xmm3, %xmm4 + movdqa %xmm3, %xmm5 + movdqa %xmm3, %xmm6 + pslld $31, %xmm4 + pslld $30, %xmm5 + pslld $25, %xmm6 + pxor %xmm5, %xmm4 + pxor %xmm6, %xmm4 + movdqa %xmm4, %xmm5 + psrldq $4, %xmm5 + pslldq $12, %xmm4 + pxor %xmm4, %xmm3 + movdqa %xmm3, %xmm6 + movdqa %xmm3, %xmm7 + movdqa %xmm3, %xmm4 + psrld $0x01, %xmm6 + psrld $2, %xmm7 + psrld $7, %xmm4 + pxor %xmm7, %xmm6 + pxor %xmm4, %xmm6 + pxor %xmm5, %xmm6 + pxor %xmm3, %xmm6 + pxor %xmm6, %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm0 + # Encrypt counter + movdqa (%ebp), %xmm4 + pxor %xmm0, %xmm4 + aesenc 16(%ebp), %xmm4 + aesenc 32(%ebp), %xmm4 + aesenc 48(%ebp), %xmm4 + aesenc 64(%ebp), %xmm4 + aesenc 80(%ebp), %xmm4 + aesenc 96(%ebp), %xmm4 + aesenc 112(%ebp), %xmm4 + aesenc 128(%ebp), %xmm4 + aesenc 144(%ebp), %xmm4 + cmpl $11, 236(%esp) + movdqa 160(%ebp), %xmm5 + jl L_AES_GCM_decrypt_calc_iv_2_aesenc_avx_last + aesenc %xmm5, %xmm4 + aesenc 176(%ebp), %xmm4 + cmpl $13, 236(%esp) + movdqa 192(%ebp), %xmm5 + jl L_AES_GCM_decrypt_calc_iv_2_aesenc_avx_last + aesenc %xmm5, %xmm4 + aesenc 208(%ebp), %xmm4 + movdqa 224(%ebp), %xmm5 +L_AES_GCM_decrypt_calc_iv_2_aesenc_avx_last: + aesenclast %xmm5, %xmm4 + movdqu %xmm4, 80(%esp) +L_AES_GCM_decrypt_iv_done: + movl 204(%esp), %esi + # Additional authentication data + movl 220(%esp), %edx + cmpl $0x00, %edx + je L_AES_GCM_decrypt_calc_aad_done + xorl %ecx, %ecx + cmpl $16, %edx + jl L_AES_GCM_decrypt_calc_aad_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_decrypt_calc_aad_16_loop: + movdqu (%esi,%ecx,1), %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm4, %xmm2 + pshufd $0x4e, %xmm2, %xmm5 + pshufd $0x4e, %xmm1, %xmm6 + movdqa %xmm1, %xmm7 + movdqa %xmm1, %xmm4 + pclmulqdq $0x11, %xmm2, %xmm7 + pclmulqdq $0x00, %xmm2, %xmm4 + pxor %xmm2, %xmm5 + pxor %xmm1, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm4, %xmm3 + movdqa %xmm7, %xmm2 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm3 + pxor %xmm5, %xmm2 + movdqa %xmm3, %xmm4 + movdqa %xmm2, %xmm5 + psrld $31, %xmm4 + psrld $31, %xmm5 + pslld $0x01, %xmm3 + pslld $0x01, %xmm2 + movdqa %xmm4, %xmm6 + pslldq $4, %xmm4 + psrldq $12, %xmm6 + pslldq $4, %xmm5 + por %xmm6, %xmm2 + por %xmm4, %xmm3 + por %xmm5, %xmm2 + movdqa %xmm3, %xmm4 + movdqa %xmm3, %xmm5 + movdqa %xmm3, %xmm6 + pslld $31, %xmm4 + pslld $30, %xmm5 + pslld $25, %xmm6 + pxor %xmm5, %xmm4 + pxor %xmm6, %xmm4 + movdqa %xmm4, %xmm5 + psrldq $4, %xmm5 + pslldq $12, %xmm4 + pxor %xmm4, %xmm3 + movdqa %xmm3, %xmm6 + movdqa %xmm3, %xmm7 + movdqa %xmm3, %xmm4 + psrld $0x01, %xmm6 + psrld $2, %xmm7 + psrld $7, %xmm4 + pxor %xmm7, %xmm6 + pxor %xmm4, %xmm6 + pxor %xmm5, %xmm6 + pxor %xmm3, %xmm6 + pxor %xmm6, %xmm2 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_calc_aad_16_loop + movl 220(%esp), %edx + cmpl %edx, %ecx + je L_AES_GCM_decrypt_calc_aad_done +L_AES_GCM_decrypt_calc_aad_lt16: + subl $16, %esp + pxor %xmm4, %xmm4 + xorl %ebx, %ebx + movdqu %xmm4, (%esp) +L_AES_GCM_decrypt_calc_aad_loop: + movzbl (%esi,%ecx,1), %eax + movb %al, (%esp,%ebx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_calc_aad_loop + movdqu (%esp), %xmm4 + addl $16, %esp + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm4, %xmm2 + pshufd $0x4e, %xmm2, %xmm5 + pshufd $0x4e, %xmm1, %xmm6 + movdqa %xmm1, %xmm7 + movdqa %xmm1, %xmm4 + pclmulqdq $0x11, %xmm2, %xmm7 + pclmulqdq $0x00, %xmm2, %xmm4 + pxor %xmm2, %xmm5 + pxor %xmm1, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm4, %xmm3 + movdqa %xmm7, %xmm2 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm3 + pxor %xmm5, %xmm2 + movdqa %xmm3, %xmm4 + movdqa %xmm2, %xmm5 + psrld $31, %xmm4 + psrld $31, %xmm5 + pslld $0x01, %xmm3 + pslld $0x01, %xmm2 + movdqa %xmm4, %xmm6 + pslldq $4, %xmm4 + psrldq $12, %xmm6 + pslldq $4, %xmm5 + por %xmm6, %xmm2 + por %xmm4, %xmm3 + por %xmm5, %xmm2 + movdqa %xmm3, %xmm4 + movdqa %xmm3, %xmm5 + movdqa %xmm3, %xmm6 + pslld $31, %xmm4 + pslld $30, %xmm5 + pslld $25, %xmm6 + pxor %xmm5, %xmm4 + pxor %xmm6, %xmm4 + movdqa %xmm4, %xmm5 + psrldq $4, %xmm5 + pslldq $12, %xmm4 + pxor %xmm4, %xmm3 + movdqa %xmm3, %xmm6 + movdqa %xmm3, %xmm7 + movdqa %xmm3, %xmm4 + psrld $0x01, %xmm6 + psrld $2, %xmm7 + psrld $7, %xmm4 + pxor %xmm7, %xmm6 + pxor %xmm4, %xmm6 + pxor %xmm5, %xmm6 + pxor %xmm3, %xmm6 + pxor %xmm6, %xmm2 +L_AES_GCM_decrypt_calc_aad_done: + movdqu %xmm2, 96(%esp) + movl 196(%esp), %esi + movl 200(%esp), %edi + # Calculate counter and H + pshufb L_aes_gcm_bswap_epi64, %xmm0 + movdqa %xmm1, %xmm5 + paddd L_aes_gcm_one, %xmm0 + movdqa %xmm1, %xmm4 + movdqu %xmm0, 64(%esp) + psrlq $63, %xmm5 + psllq $0x01, %xmm4 + pslldq $8, %xmm5 + por %xmm5, %xmm4 + pshufd $0xff, %xmm1, %xmm1 + psrad $31, %xmm1 + pand L_aes_gcm_mod2_128, %xmm1 + pxor %xmm4, %xmm1 + xorl %ebx, %ebx + cmpl $0x40, 216(%esp) + movl 216(%esp), %eax + jl L_AES_GCM_decrypt_done_64 + andl $0xffffffc0, %eax + movdqa %xmm2, %xmm6 + # H ^ 1 + movdqu %xmm1, (%esp) + # H ^ 2 + pshufd $0x4e, %xmm1, %xmm5 + pshufd $0x4e, %xmm1, %xmm6 + movdqa %xmm1, %xmm7 + movdqa %xmm1, %xmm4 + pclmulqdq $0x11, %xmm1, %xmm7 + pclmulqdq $0x00, %xmm1, %xmm4 + pxor %xmm1, %xmm5 + pxor %xmm1, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm7, %xmm0 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm4 + pxor %xmm5, %xmm0 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + movdqa %xmm4, %xmm7 + pslld $31, %xmm5 + pslld $30, %xmm6 + pslld $25, %xmm7 + pxor %xmm6, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm7 + psrldq $4, %xmm7 + pslldq $12, %xmm5 + pxor %xmm5, %xmm4 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + psrld $0x01, %xmm5 + psrld $2, %xmm6 + pxor %xmm6, %xmm5 + pxor %xmm4, %xmm5 + psrld $7, %xmm4 + pxor %xmm7, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm0 + movdqu %xmm0, 16(%esp) + # H ^ 3 + pshufd $0x4e, %xmm1, %xmm5 + pshufd $0x4e, %xmm0, %xmm6 + movdqa %xmm0, %xmm7 + movdqa %xmm0, %xmm4 + pclmulqdq $0x11, %xmm1, %xmm7 + pclmulqdq $0x00, %xmm1, %xmm4 + pxor %xmm1, %xmm5 + pxor %xmm0, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm7, %xmm3 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm4 + pxor %xmm5, %xmm3 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + movdqa %xmm4, %xmm7 + pslld $31, %xmm5 + pslld $30, %xmm6 + pslld $25, %xmm7 + pxor %xmm6, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm7 + psrldq $4, %xmm7 + pslldq $12, %xmm5 + pxor %xmm5, %xmm4 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + psrld $0x01, %xmm5 + psrld $2, %xmm6 + pxor %xmm6, %xmm5 + pxor %xmm4, %xmm5 + psrld $7, %xmm4 + pxor %xmm7, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm3 + movdqu %xmm3, 32(%esp) + # H ^ 4 + pshufd $0x4e, %xmm0, %xmm5 + pshufd $0x4e, %xmm0, %xmm6 + movdqa %xmm0, %xmm7 + movdqa %xmm0, %xmm4 + pclmulqdq $0x11, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm0, %xmm4 + pxor %xmm0, %xmm5 + pxor %xmm0, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm7, %xmm3 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm4 + pxor %xmm5, %xmm3 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + movdqa %xmm4, %xmm7 + pslld $31, %xmm5 + pslld $30, %xmm6 + pslld $25, %xmm7 + pxor %xmm6, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm7 + psrldq $4, %xmm7 + pslldq $12, %xmm5 + pxor %xmm5, %xmm4 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + psrld $0x01, %xmm5 + psrld $2, %xmm6 + pxor %xmm6, %xmm5 + pxor %xmm4, %xmm5 + psrld $7, %xmm4 + pxor %xmm7, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm3 + movdqu %xmm3, 48(%esp) + cmpl %esi, %edi + jne L_AES_GCM_decrypt_ghash_64 +L_AES_GCM_decrypt_ghash_64_inplace: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + # Encrypt 64 bytes of counter + movdqu 64(%esp), %xmm4 + movdqa L_aes_gcm_bswap_epi64, %xmm3 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + movdqa %xmm4, %xmm7 + pshufb %xmm3, %xmm4 + paddd L_aes_gcm_one, %xmm5 + pshufb %xmm3, %xmm5 + paddd L_aes_gcm_two, %xmm6 + pshufb %xmm3, %xmm6 + paddd L_aes_gcm_three, %xmm7 + pshufb %xmm3, %xmm7 + movdqu 64(%esp), %xmm3 + paddd L_aes_gcm_four, %xmm3 + movdqu %xmm3, 64(%esp) + movdqa (%ebp), %xmm3 + pxor %xmm3, %xmm4 + pxor %xmm3, %xmm5 + pxor %xmm3, %xmm6 + pxor %xmm3, %xmm7 + movdqa 16(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 32(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 48(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 64(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 80(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 96(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 112(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 128(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 144(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + cmpl $11, 236(%esp) + movdqa 160(%ebp), %xmm3 + jl L_AES_GCM_decryptinplace_aesenc_64_ghash_avx_done + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 176(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + cmpl $13, 236(%esp) + movdqa 192(%ebp), %xmm3 + jl L_AES_GCM_decryptinplace_aesenc_64_ghash_avx_done + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 208(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 224(%ebp), %xmm3 +L_AES_GCM_decryptinplace_aesenc_64_ghash_avx_done: + aesenclast %xmm3, %xmm4 + aesenclast %xmm3, %xmm5 + movdqu (%ecx), %xmm0 + movdqu 16(%ecx), %xmm1 + pxor %xmm0, %xmm4 + pxor %xmm1, %xmm5 + movdqu %xmm0, 112(%esp) + movdqu %xmm1, 128(%esp) + movdqu %xmm4, (%edx) + movdqu %xmm5, 16(%edx) + aesenclast %xmm3, %xmm6 + aesenclast %xmm3, %xmm7 + movdqu 32(%ecx), %xmm0 + movdqu 48(%ecx), %xmm1 + pxor %xmm0, %xmm6 + pxor %xmm1, %xmm7 + movdqu %xmm0, 144(%esp) + movdqu %xmm1, 160(%esp) + movdqu %xmm6, 32(%edx) + movdqu %xmm7, 48(%edx) + # ghash encrypted counter + movdqu 96(%esp), %xmm6 + movdqu 48(%esp), %xmm3 + movdqu 112(%esp), %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm6, %xmm4 + pshufd $0x4e, %xmm3, %xmm5 + pshufd $0x4e, %xmm4, %xmm1 + pxor %xmm3, %xmm5 + pxor %xmm4, %xmm1 + movdqa %xmm4, %xmm7 + pclmulqdq $0x11, %xmm3, %xmm7 + movdqa %xmm4, %xmm6 + pclmulqdq $0x00, %xmm3, %xmm6 + pclmulqdq $0x00, %xmm1, %xmm5 + pxor %xmm6, %xmm5 + pxor %xmm7, %xmm5 + movdqu 32(%esp), %xmm3 + movdqu 128(%esp), %xmm4 + pshufd $0x4e, %xmm3, %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm3, %xmm0 + pshufd $0x4e, %xmm4, %xmm1 + pxor %xmm4, %xmm1 + movdqa %xmm4, %xmm2 + pclmulqdq $0x11, %xmm3, %xmm2 + pclmulqdq $0x00, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm1, %xmm0 + pxor %xmm3, %xmm5 + pxor %xmm3, %xmm6 + pxor %xmm2, %xmm5 + pxor %xmm2, %xmm7 + pxor %xmm0, %xmm5 + movdqu 16(%esp), %xmm3 + movdqu 144(%esp), %xmm4 + pshufd $0x4e, %xmm3, %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm3, %xmm0 + pshufd $0x4e, %xmm4, %xmm1 + pxor %xmm4, %xmm1 + movdqa %xmm4, %xmm2 + pclmulqdq $0x11, %xmm3, %xmm2 + pclmulqdq $0x00, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm1, %xmm0 + pxor %xmm3, %xmm5 + pxor %xmm3, %xmm6 + pxor %xmm2, %xmm5 + pxor %xmm2, %xmm7 + pxor %xmm0, %xmm5 + movdqu (%esp), %xmm3 + movdqu 160(%esp), %xmm4 + pshufd $0x4e, %xmm3, %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm3, %xmm0 + pshufd $0x4e, %xmm4, %xmm1 + pxor %xmm4, %xmm1 + movdqa %xmm4, %xmm2 + pclmulqdq $0x11, %xmm3, %xmm2 + pclmulqdq $0x00, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm1, %xmm0 + pxor %xmm3, %xmm5 + pxor %xmm3, %xmm6 + pxor %xmm2, %xmm5 + pxor %xmm2, %xmm7 + pxor %xmm0, %xmm5 + movdqa %xmm5, %xmm1 + psrldq $8, %xmm5 + pslldq $8, %xmm1 + pxor %xmm1, %xmm6 + pxor %xmm5, %xmm7 + movdqa %xmm6, %xmm3 + movdqa %xmm6, %xmm0 + movdqa %xmm6, %xmm1 + pslld $31, %xmm3 + pslld $30, %xmm0 + pslld $25, %xmm1 + pxor %xmm0, %xmm3 + pxor %xmm1, %xmm3 + movdqa %xmm3, %xmm0 + pslldq $12, %xmm3 + psrldq $4, %xmm0 + pxor %xmm3, %xmm6 + movdqa %xmm6, %xmm1 + movdqa %xmm6, %xmm5 + movdqa %xmm6, %xmm4 + psrld $0x01, %xmm1 + psrld $2, %xmm5 + psrld $7, %xmm4 + pxor %xmm5, %xmm1 + pxor %xmm4, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm1, %xmm6 + pxor %xmm7, %xmm6 + movdqu %xmm6, 96(%esp) + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_ghash_64_inplace + jmp L_AES_GCM_decrypt_ghash_64_done +L_AES_GCM_decrypt_ghash_64: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + # Encrypt 64 bytes of counter + movdqu 64(%esp), %xmm4 + movdqa L_aes_gcm_bswap_epi64, %xmm3 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + movdqa %xmm4, %xmm7 + pshufb %xmm3, %xmm4 + paddd L_aes_gcm_one, %xmm5 + pshufb %xmm3, %xmm5 + paddd L_aes_gcm_two, %xmm6 + pshufb %xmm3, %xmm6 + paddd L_aes_gcm_three, %xmm7 + pshufb %xmm3, %xmm7 + movdqu 64(%esp), %xmm3 + paddd L_aes_gcm_four, %xmm3 + movdqu %xmm3, 64(%esp) + movdqa (%ebp), %xmm3 + pxor %xmm3, %xmm4 + pxor %xmm3, %xmm5 + pxor %xmm3, %xmm6 + pxor %xmm3, %xmm7 + movdqa 16(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 32(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 48(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 64(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 80(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 96(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 112(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 128(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 144(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + cmpl $11, 236(%esp) + movdqa 160(%ebp), %xmm3 + jl L_AES_GCM_decrypt_aesenc_64_ghash_avx_done + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 176(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + cmpl $13, 236(%esp) + movdqa 192(%ebp), %xmm3 + jl L_AES_GCM_decrypt_aesenc_64_ghash_avx_done + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 208(%ebp), %xmm3 + aesenc %xmm3, %xmm4 + aesenc %xmm3, %xmm5 + aesenc %xmm3, %xmm6 + aesenc %xmm3, %xmm7 + movdqa 224(%ebp), %xmm3 +L_AES_GCM_decrypt_aesenc_64_ghash_avx_done: + aesenclast %xmm3, %xmm4 + aesenclast %xmm3, %xmm5 + movdqu (%ecx), %xmm0 + movdqu 16(%ecx), %xmm1 + pxor %xmm0, %xmm4 + pxor %xmm1, %xmm5 + movdqu %xmm0, (%ecx) + movdqu %xmm1, 16(%ecx) + movdqu %xmm4, (%edx) + movdqu %xmm5, 16(%edx) + aesenclast %xmm3, %xmm6 + aesenclast %xmm3, %xmm7 + movdqu 32(%ecx), %xmm0 + movdqu 48(%ecx), %xmm1 + pxor %xmm0, %xmm6 + pxor %xmm1, %xmm7 + movdqu %xmm0, 32(%ecx) + movdqu %xmm1, 48(%ecx) + movdqu %xmm6, 32(%edx) + movdqu %xmm7, 48(%edx) + # ghash encrypted counter + movdqu 96(%esp), %xmm6 + movdqu 48(%esp), %xmm3 + movdqu (%ecx), %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm6, %xmm4 + pshufd $0x4e, %xmm3, %xmm5 + pshufd $0x4e, %xmm4, %xmm1 + pxor %xmm3, %xmm5 + pxor %xmm4, %xmm1 + movdqa %xmm4, %xmm7 + pclmulqdq $0x11, %xmm3, %xmm7 + movdqa %xmm4, %xmm6 + pclmulqdq $0x00, %xmm3, %xmm6 + pclmulqdq $0x00, %xmm1, %xmm5 + pxor %xmm6, %xmm5 + pxor %xmm7, %xmm5 + movdqu 32(%esp), %xmm3 + movdqu 16(%ecx), %xmm4 + pshufd $0x4e, %xmm3, %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm3, %xmm0 + pshufd $0x4e, %xmm4, %xmm1 + pxor %xmm4, %xmm1 + movdqa %xmm4, %xmm2 + pclmulqdq $0x11, %xmm3, %xmm2 + pclmulqdq $0x00, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm1, %xmm0 + pxor %xmm3, %xmm5 + pxor %xmm3, %xmm6 + pxor %xmm2, %xmm5 + pxor %xmm2, %xmm7 + pxor %xmm0, %xmm5 + movdqu 16(%esp), %xmm3 + movdqu 32(%ecx), %xmm4 + pshufd $0x4e, %xmm3, %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm3, %xmm0 + pshufd $0x4e, %xmm4, %xmm1 + pxor %xmm4, %xmm1 + movdqa %xmm4, %xmm2 + pclmulqdq $0x11, %xmm3, %xmm2 + pclmulqdq $0x00, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm1, %xmm0 + pxor %xmm3, %xmm5 + pxor %xmm3, %xmm6 + pxor %xmm2, %xmm5 + pxor %xmm2, %xmm7 + pxor %xmm0, %xmm5 + movdqu (%esp), %xmm3 + movdqu 48(%ecx), %xmm4 + pshufd $0x4e, %xmm3, %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm4 + pxor %xmm3, %xmm0 + pshufd $0x4e, %xmm4, %xmm1 + pxor %xmm4, %xmm1 + movdqa %xmm4, %xmm2 + pclmulqdq $0x11, %xmm3, %xmm2 + pclmulqdq $0x00, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm1, %xmm0 + pxor %xmm3, %xmm5 + pxor %xmm3, %xmm6 + pxor %xmm2, %xmm5 + pxor %xmm2, %xmm7 + pxor %xmm0, %xmm5 + movdqa %xmm5, %xmm1 + psrldq $8, %xmm5 + pslldq $8, %xmm1 + pxor %xmm1, %xmm6 + pxor %xmm5, %xmm7 + movdqa %xmm6, %xmm3 + movdqa %xmm6, %xmm0 + movdqa %xmm6, %xmm1 + pslld $31, %xmm3 + pslld $30, %xmm0 + pslld $25, %xmm1 + pxor %xmm0, %xmm3 + pxor %xmm1, %xmm3 + movdqa %xmm3, %xmm0 + pslldq $12, %xmm3 + psrldq $4, %xmm0 + pxor %xmm3, %xmm6 + movdqa %xmm6, %xmm1 + movdqa %xmm6, %xmm5 + movdqa %xmm6, %xmm4 + psrld $0x01, %xmm1 + psrld $2, %xmm5 + psrld $7, %xmm4 + pxor %xmm5, %xmm1 + pxor %xmm4, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm1, %xmm6 + pxor %xmm7, %xmm6 + movdqu %xmm6, 96(%esp) + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_ghash_64 +L_AES_GCM_decrypt_ghash_64_done: + movdqa %xmm6, %xmm2 + movdqu (%esp), %xmm1 +L_AES_GCM_decrypt_done_64: + movl 216(%esp), %edx + cmpl %edx, %ebx + jge L_AES_GCM_decrypt_done_dec + movl 216(%esp), %eax + andl $0xfffffff0, %eax + cmpl %eax, %ebx + jge L_AES_GCM_decrypt_last_block_done +L_AES_GCM_decrypt_last_block_start: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + movdqu (%ecx), %xmm5 + pshufb L_aes_gcm_bswap_mask, %xmm5 + pxor %xmm2, %xmm5 + movdqu %xmm5, (%esp) + movdqu 64(%esp), %xmm4 + movdqa %xmm4, %xmm5 + pshufb L_aes_gcm_bswap_epi64, %xmm4 + paddd L_aes_gcm_one, %xmm5 + pxor (%ebp), %xmm4 + movdqu %xmm5, 64(%esp) + movdqu (%esp), %xmm0 + pclmulqdq $16, %xmm1, %xmm0 + aesenc 16(%ebp), %xmm4 + aesenc 32(%ebp), %xmm4 + movdqu (%esp), %xmm3 + pclmulqdq $0x01, %xmm1, %xmm3 + aesenc 48(%ebp), %xmm4 + aesenc 64(%ebp), %xmm4 + aesenc 80(%ebp), %xmm4 + movdqu (%esp), %xmm5 + pclmulqdq $0x11, %xmm1, %xmm5 + aesenc 96(%ebp), %xmm4 + pxor %xmm3, %xmm0 + movdqa %xmm0, %xmm6 + psrldq $8, %xmm0 + pslldq $8, %xmm6 + aesenc 112(%ebp), %xmm4 + movdqu (%esp), %xmm3 + pclmulqdq $0x00, %xmm1, %xmm3 + pxor %xmm3, %xmm6 + pxor %xmm0, %xmm5 + movdqa L_aes_gcm_mod2_128, %xmm7 + movdqa %xmm6, %xmm3 + pclmulqdq $16, %xmm7, %xmm3 + aesenc 128(%ebp), %xmm4 + pshufd $0x4e, %xmm6, %xmm0 + pxor %xmm3, %xmm0 + movdqa %xmm0, %xmm3 + pclmulqdq $16, %xmm7, %xmm3 + aesenc 144(%ebp), %xmm4 + pshufd $0x4e, %xmm0, %xmm2 + pxor %xmm3, %xmm2 + pxor %xmm5, %xmm2 + cmpl $11, 236(%esp) + movdqa 160(%ebp), %xmm5 + jl L_AES_GCM_decrypt_aesenc_gfmul_last + aesenc %xmm5, %xmm4 + aesenc 176(%ebp), %xmm4 + cmpl $13, 236(%esp) + movdqa 192(%ebp), %xmm5 + jl L_AES_GCM_decrypt_aesenc_gfmul_last + aesenc %xmm5, %xmm4 + aesenc 208(%ebp), %xmm4 + movdqa 224(%ebp), %xmm5 +L_AES_GCM_decrypt_aesenc_gfmul_last: + aesenclast %xmm5, %xmm4 + movdqu (%ecx), %xmm5 + pxor %xmm5, %xmm4 + movdqu %xmm4, (%edx) + addl $16, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_last_block_start +L_AES_GCM_decrypt_last_block_done: + movl 216(%esp), %ecx + movl %ecx, %edx + andl $15, %ecx + jz L_AES_GCM_decrypt_aesenc_last15_dec_avx_done + movdqu 64(%esp), %xmm0 + pshufb L_aes_gcm_bswap_epi64, %xmm0 + pxor (%ebp), %xmm0 + aesenc 16(%ebp), %xmm0 + aesenc 32(%ebp), %xmm0 + aesenc 48(%ebp), %xmm0 + aesenc 64(%ebp), %xmm0 + aesenc 80(%ebp), %xmm0 + aesenc 96(%ebp), %xmm0 + aesenc 112(%ebp), %xmm0 + aesenc 128(%ebp), %xmm0 + aesenc 144(%ebp), %xmm0 + cmpl $11, 236(%esp) + movdqa 160(%ebp), %xmm5 + jl L_AES_GCM_decrypt_aesenc_last15_dec_avx_aesenc_avx_last + aesenc %xmm5, %xmm0 + aesenc 176(%ebp), %xmm0 + cmpl $13, 236(%esp) + movdqa 192(%ebp), %xmm5 + jl L_AES_GCM_decrypt_aesenc_last15_dec_avx_aesenc_avx_last + aesenc %xmm5, %xmm0 + aesenc 208(%ebp), %xmm0 + movdqa 224(%ebp), %xmm5 +L_AES_GCM_decrypt_aesenc_last15_dec_avx_aesenc_avx_last: + aesenclast %xmm5, %xmm0 + subl $32, %esp + xorl %ecx, %ecx + movdqu %xmm0, (%esp) + pxor %xmm4, %xmm4 + movdqu %xmm4, 16(%esp) +L_AES_GCM_decrypt_aesenc_last15_dec_avx_loop: + movzbl (%esi,%ebx,1), %eax + movb %al, 16(%esp,%ecx,1) + xorb (%esp,%ecx,1), %al + movb %al, (%edi,%ebx,1) + incl %ebx + incl %ecx + cmpl %edx, %ebx + jl L_AES_GCM_decrypt_aesenc_last15_dec_avx_loop + movdqu 16(%esp), %xmm0 + addl $32, %esp + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm0, %xmm2 + pshufd $0x4e, %xmm1, %xmm5 + pshufd $0x4e, %xmm2, %xmm6 + movdqa %xmm2, %xmm7 + movdqa %xmm2, %xmm4 + pclmulqdq $0x11, %xmm1, %xmm7 + pclmulqdq $0x00, %xmm1, %xmm4 + pxor %xmm1, %xmm5 + pxor %xmm2, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm7, %xmm2 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm4 + pxor %xmm5, %xmm2 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + movdqa %xmm4, %xmm7 + pslld $31, %xmm5 + pslld $30, %xmm6 + pslld $25, %xmm7 + pxor %xmm6, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm7 + psrldq $4, %xmm7 + pslldq $12, %xmm5 + pxor %xmm5, %xmm4 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + psrld $0x01, %xmm5 + psrld $2, %xmm6 + pxor %xmm6, %xmm5 + pxor %xmm4, %xmm5 + psrld $7, %xmm4 + pxor %xmm7, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm2 +L_AES_GCM_decrypt_aesenc_last15_dec_avx_done: +L_AES_GCM_decrypt_done_dec: + movl 212(%esp), %esi + movl 228(%esp), %ebp + movl 216(%esp), %edx + movl 220(%esp), %ecx + shll $3, %edx + shll $3, %ecx + pinsrd $0x00, %edx, %xmm4 + pinsrd $2, %ecx, %xmm4 + movl 216(%esp), %edx + movl 220(%esp), %ecx + shrl $29, %edx + shrl $29, %ecx + pinsrd $0x01, %edx, %xmm4 + pinsrd $3, %ecx, %xmm4 + pxor %xmm4, %xmm2 + pshufd $0x4e, %xmm1, %xmm5 + pshufd $0x4e, %xmm2, %xmm6 + movdqa %xmm2, %xmm7 + movdqa %xmm2, %xmm4 + pclmulqdq $0x11, %xmm1, %xmm7 + pclmulqdq $0x00, %xmm1, %xmm4 + pxor %xmm1, %xmm5 + pxor %xmm2, %xmm6 + pclmulqdq $0x00, %xmm6, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm6 + movdqa %xmm7, %xmm2 + pslldq $8, %xmm6 + psrldq $8, %xmm5 + pxor %xmm6, %xmm4 + pxor %xmm5, %xmm2 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + movdqa %xmm4, %xmm7 + pslld $31, %xmm5 + pslld $30, %xmm6 + pslld $25, %xmm7 + pxor %xmm6, %xmm5 + pxor %xmm7, %xmm5 + movdqa %xmm5, %xmm7 + psrldq $4, %xmm7 + pslldq $12, %xmm5 + pxor %xmm5, %xmm4 + movdqa %xmm4, %xmm5 + movdqa %xmm4, %xmm6 + psrld $0x01, %xmm5 + psrld $2, %xmm6 + pxor %xmm6, %xmm5 + pxor %xmm4, %xmm5 + psrld $7, %xmm4 + pxor %xmm7, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm2 + pshufb L_aes_gcm_bswap_mask, %xmm2 + movdqu 80(%esp), %xmm4 + pxor %xmm2, %xmm4 + movl 240(%esp), %edi + cmpl $16, %ebp + je L_AES_GCM_decrypt_cmp_tag_16 + subl $16, %esp + xorl %ecx, %ecx + xorl %ebx, %ebx + movdqu %xmm4, (%esp) +L_AES_GCM_decrypt_cmp_tag_loop: + movzbl (%esp,%ecx,1), %eax + xorb (%esi,%ecx,1), %al + orb %al, %bl + incl %ecx + cmpl %ebp, %ecx + jne L_AES_GCM_decrypt_cmp_tag_loop + cmpb $0x00, %bl + sete %bl + addl $16, %esp + xorl %ecx, %ecx + jmp L_AES_GCM_decrypt_cmp_tag_done +L_AES_GCM_decrypt_cmp_tag_16: + movdqu (%esi), %xmm5 + pcmpeqb %xmm5, %xmm4 + pmovmskb %xmm4, %edx + # %%edx == 0xFFFF then return 1 else => return 0 + xorl %ebx, %ebx + cmpl $0xffff, %edx + sete %bl +L_AES_GCM_decrypt_cmp_tag_done: + movl %ebx, (%edi) + addl $0xb0, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_decrypt,.-AES_GCM_decrypt +#ifdef WOLFSSL_AESGCM_STREAM +.text +.globl AES_GCM_init_aesni +.type AES_GCM_init_aesni,@function +.align 16 +AES_GCM_init_aesni: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $16, %esp + movl 36(%esp), %ebp + movl 44(%esp), %esi + movl 60(%esp), %edi + pxor %xmm4, %xmm4 + movl 48(%esp), %edx + cmpl $12, %edx + jne L_AES_GCM_init_aesni_iv_not_12 + # # Calculate values when IV is 12 bytes + # Set counter based on IV + movl $0x1000000, %ecx + pinsrd $0x00, (%esi), %xmm4 + pinsrd $0x01, 4(%esi), %xmm4 + pinsrd $2, 8(%esi), %xmm4 + pinsrd $3, %ecx, %xmm4 + # H = Encrypt X(=0) and T = Encrypt counter + movdqa %xmm4, %xmm1 + movdqa (%ebp), %xmm5 + pxor %xmm5, %xmm1 + movdqa 16(%ebp), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 32(%ebp), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 48(%ebp), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 64(%ebp), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 80(%ebp), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 96(%ebp), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 112(%ebp), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 128(%ebp), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 144(%ebp), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + cmpl $11, 40(%esp) + movdqa 160(%ebp), %xmm7 + jl L_AES_GCM_init_aesni_calc_iv_12_last + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 176(%ebp), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + cmpl $13, 40(%esp) + movdqa 192(%ebp), %xmm7 + jl L_AES_GCM_init_aesni_calc_iv_12_last + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 208(%ebp), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 224(%ebp), %xmm7 +L_AES_GCM_init_aesni_calc_iv_12_last: + aesenclast %xmm7, %xmm5 + aesenclast %xmm7, %xmm1 + pshufb L_aes_gcm_bswap_mask, %xmm5 + movdqu %xmm1, (%edi) + jmp L_AES_GCM_init_aesni_iv_done +L_AES_GCM_init_aesni_iv_not_12: + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + movdqa (%ebp), %xmm5 + aesenc 16(%ebp), %xmm5 + aesenc 32(%ebp), %xmm5 + aesenc 48(%ebp), %xmm5 + aesenc 64(%ebp), %xmm5 + aesenc 80(%ebp), %xmm5 + aesenc 96(%ebp), %xmm5 + aesenc 112(%ebp), %xmm5 + aesenc 128(%ebp), %xmm5 + aesenc 144(%ebp), %xmm5 + cmpl $11, 40(%esp) + movdqa 160(%ebp), %xmm1 + jl L_AES_GCM_init_aesni_calc_iv_1_aesenc_avx_last + aesenc %xmm1, %xmm5 + aesenc 176(%ebp), %xmm5 + cmpl $13, 40(%esp) + movdqa 192(%ebp), %xmm1 + jl L_AES_GCM_init_aesni_calc_iv_1_aesenc_avx_last + aesenc %xmm1, %xmm5 + aesenc 208(%ebp), %xmm5 + movdqa 224(%ebp), %xmm1 +L_AES_GCM_init_aesni_calc_iv_1_aesenc_avx_last: + aesenclast %xmm1, %xmm5 + pshufb L_aes_gcm_bswap_mask, %xmm5 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movl $0x00, %ecx + je L_AES_GCM_init_aesni_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_init_aesni_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_init_aesni_calc_iv_16_loop: + movdqu (%esi,%ecx,1), %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm0, %xmm4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm4, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm4 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm4 + por %xmm0, %xmm7 + por %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm4 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_init_aesni_calc_iv_16_loop + movl 48(%esp), %edx + cmpl %edx, %ecx + je L_AES_GCM_init_aesni_calc_iv_done +L_AES_GCM_init_aesni_calc_iv_lt16: + subl $16, %esp + pxor %xmm0, %xmm0 + xorl %ebx, %ebx + movdqu %xmm0, (%esp) +L_AES_GCM_init_aesni_calc_iv_loop: + movzbl (%esi,%ecx,1), %eax + movb %al, (%esp,%ebx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_init_aesni_calc_iv_loop + movdqu (%esp), %xmm0 + addl $16, %esp + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm0, %xmm4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm4, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm4 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm4 + por %xmm0, %xmm7 + por %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm4 +L_AES_GCM_init_aesni_calc_iv_done: + # T = Encrypt counter + pxor %xmm0, %xmm0 + shll $3, %edx + pinsrd $0x00, %edx, %xmm0 + pxor %xmm0, %xmm4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm4, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm4 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm4 + por %xmm0, %xmm7 + por %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm4 + # Encrypt counter + movdqa (%ebp), %xmm0 + pxor %xmm4, %xmm0 + aesenc 16(%ebp), %xmm0 + aesenc 32(%ebp), %xmm0 + aesenc 48(%ebp), %xmm0 + aesenc 64(%ebp), %xmm0 + aesenc 80(%ebp), %xmm0 + aesenc 96(%ebp), %xmm0 + aesenc 112(%ebp), %xmm0 + aesenc 128(%ebp), %xmm0 + aesenc 144(%ebp), %xmm0 + cmpl $11, 40(%esp) + movdqa 160(%ebp), %xmm1 + jl L_AES_GCM_init_aesni_calc_iv_2_aesenc_avx_last + aesenc %xmm1, %xmm0 + aesenc 176(%ebp), %xmm0 + cmpl $13, 40(%esp) + movdqa 192(%ebp), %xmm1 + jl L_AES_GCM_init_aesni_calc_iv_2_aesenc_avx_last + aesenc %xmm1, %xmm0 + aesenc 208(%ebp), %xmm0 + movdqa 224(%ebp), %xmm1 +L_AES_GCM_init_aesni_calc_iv_2_aesenc_avx_last: + aesenclast %xmm1, %xmm0 + movdqu %xmm0, (%edi) +L_AES_GCM_init_aesni_iv_done: + movl 52(%esp), %ebp + movl 56(%esp), %edi + pshufb L_aes_gcm_bswap_epi64, %xmm4 + paddd L_aes_gcm_one, %xmm4 + movdqa %xmm5, (%ebp) + movdqa %xmm4, (%edi) + addl $16, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_init_aesni,.-AES_GCM_init_aesni +.text +.globl AES_GCM_aad_update_aesni +.type AES_GCM_aad_update_aesni,@function +.align 16 +AES_GCM_aad_update_aesni: + pushl %esi + pushl %edi + movl 12(%esp), %esi + movl 16(%esp), %edx + movl 20(%esp), %edi + movl 24(%esp), %eax + movdqa (%edi), %xmm5 + movdqa (%eax), %xmm6 + xorl %ecx, %ecx +L_AES_GCM_aad_update_aesni_16_loop: + movdqu (%esi,%ecx,1), %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm0, %xmm5 + pshufd $0x4e, %xmm5, %xmm1 + pshufd $0x4e, %xmm6, %xmm2 + movdqa %xmm6, %xmm3 + movdqa %xmm6, %xmm0 + pclmulqdq $0x11, %xmm5, %xmm3 + pclmulqdq $0x00, %xmm5, %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm6, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm4 + movdqa %xmm3, %xmm5 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm5 + movdqa %xmm4, %xmm0 + movdqa %xmm5, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm4 + pslld $0x01, %xmm5 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm5 + por %xmm0, %xmm4 + por %xmm1, %xmm5 + movdqa %xmm4, %xmm0 + movdqa %xmm4, %xmm1 + movdqa %xmm4, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm4 + movdqa %xmm4, %xmm2 + movdqa %xmm4, %xmm3 + movdqa %xmm4, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm4, %xmm2 + pxor %xmm2, %xmm5 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_aad_update_aesni_16_loop + movdqa %xmm5, (%edi) + popl %edi + popl %esi + ret +.size AES_GCM_aad_update_aesni,.-AES_GCM_aad_update_aesni +.text +.globl AES_GCM_encrypt_block_aesni +.type AES_GCM_encrypt_block_aesni,@function +.align 16 +AES_GCM_encrypt_block_aesni: + pushl %esi + pushl %edi + movl 12(%esp), %ecx + movl 16(%esp), %eax + movl 20(%esp), %edi + movl 24(%esp), %esi + movl 28(%esp), %edx + movdqu (%edx), %xmm0 + movdqa %xmm0, %xmm1 + pshufb L_aes_gcm_bswap_epi64, %xmm0 + paddd L_aes_gcm_one, %xmm1 + pxor (%ecx), %xmm0 + movdqu %xmm1, (%edx) + aesenc 16(%ecx), %xmm0 + aesenc 32(%ecx), %xmm0 + aesenc 48(%ecx), %xmm0 + aesenc 64(%ecx), %xmm0 + aesenc 80(%ecx), %xmm0 + aesenc 96(%ecx), %xmm0 + aesenc 112(%ecx), %xmm0 + aesenc 128(%ecx), %xmm0 + aesenc 144(%ecx), %xmm0 + cmpl $11, %eax + movdqa 160(%ecx), %xmm1 + jl L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last + aesenc %xmm1, %xmm0 + aesenc 176(%ecx), %xmm0 + cmpl $13, %eax + movdqa 192(%ecx), %xmm1 + jl L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last + aesenc %xmm1, %xmm0 + aesenc 208(%ecx), %xmm0 + movdqa 224(%ecx), %xmm1 +L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last: + aesenclast %xmm1, %xmm0 + movdqu (%esi), %xmm1 + pxor %xmm1, %xmm0 + movdqu %xmm0, (%edi) + pshufb L_aes_gcm_bswap_mask, %xmm0 + popl %edi + popl %esi + ret +.size AES_GCM_encrypt_block_aesni,.-AES_GCM_encrypt_block_aesni +.text +.globl AES_GCM_ghash_block_aesni +.type AES_GCM_ghash_block_aesni,@function +.align 16 +AES_GCM_ghash_block_aesni: + movl 4(%esp), %edx + movl 8(%esp), %eax + movl 12(%esp), %ecx + movdqa (%eax), %xmm4 + movdqa (%ecx), %xmm5 + movdqu (%edx), %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm0, %xmm4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm6 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm6 + pxor %xmm1, %xmm4 + movdqa %xmm6, %xmm0 + movdqa %xmm4, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm6 + pslld $0x01, %xmm4 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm4 + por %xmm0, %xmm6 + por %xmm1, %xmm4 + movdqa %xmm6, %xmm0 + movdqa %xmm6, %xmm1 + movdqa %xmm6, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm6 + movdqa %xmm6, %xmm2 + movdqa %xmm6, %xmm3 + movdqa %xmm6, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm6, %xmm2 + pxor %xmm2, %xmm4 + movdqa %xmm4, (%eax) + ret +.size AES_GCM_ghash_block_aesni,.-AES_GCM_ghash_block_aesni +.text +.globl AES_GCM_encrypt_update_aesni +.type AES_GCM_encrypt_update_aesni,@function +.align 16 +AES_GCM_encrypt_update_aesni: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $0x60, %esp + movl 144(%esp), %esi + movdqa (%esi), %xmm4 + movdqu %xmm4, 64(%esp) + movl 136(%esp), %esi + movl 140(%esp), %ebp + movdqa (%esi), %xmm6 + movdqa (%ebp), %xmm5 + movdqu %xmm6, 80(%esp) + movl 116(%esp), %ebp + movl 124(%esp), %edi + movl 128(%esp), %esi + movdqa %xmm5, %xmm1 + movdqa %xmm5, %xmm0 + psrlq $63, %xmm1 + psllq $0x01, %xmm0 + pslldq $8, %xmm1 + por %xmm1, %xmm0 + pshufd $0xff, %xmm5, %xmm5 + psrad $31, %xmm5 + pand L_aes_gcm_mod2_128, %xmm5 + pxor %xmm0, %xmm5 + xorl %ebx, %ebx + cmpl $0x40, 132(%esp) + movl 132(%esp), %eax + jl L_AES_GCM_encrypt_update_aesni_done_64 + andl $0xffffffc0, %eax + movdqa %xmm6, %xmm2 + # H ^ 1 + movdqu %xmm5, (%esp) + # H ^ 2 + pshufd $0x4e, %xmm5, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm5, %xmm3 + pclmulqdq $0x00, %xmm5, %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm0 + pxor %xmm1, %xmm4 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + movdqa %xmm0, %xmm3 + pslld $31, %xmm1 + pslld $30, %xmm2 + pslld $25, %xmm3 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm3 + psrldq $4, %xmm3 + pslldq $12, %xmm1 + pxor %xmm1, %xmm0 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + psrld $0x01, %xmm1 + psrld $2, %xmm2 + pxor %xmm2, %xmm1 + pxor %xmm0, %xmm1 + psrld $7, %xmm0 + pxor %xmm3, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm1, %xmm4 + movdqu %xmm4, 16(%esp) + # H ^ 3 + pshufd $0x4e, %xmm5, %xmm1 + pshufd $0x4e, %xmm4, %xmm2 + movdqa %xmm4, %xmm3 + movdqa %xmm4, %xmm0 + pclmulqdq $0x11, %xmm5, %xmm3 + pclmulqdq $0x00, %xmm5, %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm4, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm3, %xmm7 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm0 + pxor %xmm1, %xmm7 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + movdqa %xmm0, %xmm3 + pslld $31, %xmm1 + pslld $30, %xmm2 + pslld $25, %xmm3 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm3 + psrldq $4, %xmm3 + pslldq $12, %xmm1 + pxor %xmm1, %xmm0 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + psrld $0x01, %xmm1 + psrld $2, %xmm2 + pxor %xmm2, %xmm1 + pxor %xmm0, %xmm1 + psrld $7, %xmm0 + pxor %xmm3, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm1, %xmm7 + movdqu %xmm7, 32(%esp) + # H ^ 4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm4, %xmm2 + movdqa %xmm4, %xmm3 + movdqa %xmm4, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm4, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm3, %xmm7 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm0 + pxor %xmm1, %xmm7 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + movdqa %xmm0, %xmm3 + pslld $31, %xmm1 + pslld $30, %xmm2 + pslld $25, %xmm3 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm3 + psrldq $4, %xmm3 + pslldq $12, %xmm1 + pxor %xmm1, %xmm0 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + psrld $0x01, %xmm1 + psrld $2, %xmm2 + pxor %xmm2, %xmm1 + pxor %xmm0, %xmm1 + psrld $7, %xmm0 + pxor %xmm3, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm1, %xmm7 + movdqu %xmm7, 48(%esp) + # First 64 bytes of input + # Encrypt 64 bytes of counter + movdqu 64(%esp), %xmm0 + movdqa L_aes_gcm_bswap_epi64, %xmm7 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + movdqa %xmm0, %xmm3 + pshufb %xmm7, %xmm0 + paddd L_aes_gcm_one, %xmm1 + pshufb %xmm7, %xmm1 + paddd L_aes_gcm_two, %xmm2 + pshufb %xmm7, %xmm2 + paddd L_aes_gcm_three, %xmm3 + pshufb %xmm7, %xmm3 + movdqu 64(%esp), %xmm7 + paddd L_aes_gcm_four, %xmm7 + movdqu %xmm7, 64(%esp) + movdqa (%ebp), %xmm7 + pxor %xmm7, %xmm0 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm7, %xmm3 + movdqa 16(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 32(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 48(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 64(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 80(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 96(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 112(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 128(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 144(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + cmpl $11, 120(%esp) + movdqa 160(%ebp), %xmm7 + jl L_AES_GCM_encrypt_update_aesni_enc_done + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 176(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + cmpl $13, 120(%esp) + movdqa 192(%ebp), %xmm7 + jl L_AES_GCM_encrypt_update_aesni_enc_done + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 208(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 224(%ebp), %xmm7 +L_AES_GCM_encrypt_update_aesni_enc_done: + aesenclast %xmm7, %xmm0 + aesenclast %xmm7, %xmm1 + movdqu (%esi), %xmm4 + movdqu 16(%esi), %xmm5 + pxor %xmm4, %xmm0 + pxor %xmm5, %xmm1 + movdqu %xmm0, (%edi) + movdqu %xmm1, 16(%edi) + aesenclast %xmm7, %xmm2 + aesenclast %xmm7, %xmm3 + movdqu 32(%esi), %xmm4 + movdqu 48(%esi), %xmm5 + pxor %xmm4, %xmm2 + pxor %xmm5, %xmm3 + movdqu %xmm2, 32(%edi) + movdqu %xmm3, 48(%edi) + cmpl $0x40, %eax + movl $0x40, %ebx + jle L_AES_GCM_encrypt_update_aesni_end_64 + # More 64 bytes of input +L_AES_GCM_encrypt_update_aesni_ghash_64: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + # Encrypt 64 bytes of counter + movdqu 64(%esp), %xmm0 + movdqa L_aes_gcm_bswap_epi64, %xmm7 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + movdqa %xmm0, %xmm3 + pshufb %xmm7, %xmm0 + paddd L_aes_gcm_one, %xmm1 + pshufb %xmm7, %xmm1 + paddd L_aes_gcm_two, %xmm2 + pshufb %xmm7, %xmm2 + paddd L_aes_gcm_three, %xmm3 + pshufb %xmm7, %xmm3 + movdqu 64(%esp), %xmm7 + paddd L_aes_gcm_four, %xmm7 + movdqu %xmm7, 64(%esp) + movdqa (%ebp), %xmm7 + pxor %xmm7, %xmm0 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm7, %xmm3 + movdqa 16(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 32(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 48(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 64(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 80(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 96(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 112(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 128(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 144(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + cmpl $11, 120(%esp) + movdqa 160(%ebp), %xmm7 + jl L_AES_GCM_encrypt_update_aesni_aesenc_64_ghash_avx_done + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 176(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + cmpl $13, 120(%esp) + movdqa 192(%ebp), %xmm7 + jl L_AES_GCM_encrypt_update_aesni_aesenc_64_ghash_avx_done + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 208(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 224(%ebp), %xmm7 +L_AES_GCM_encrypt_update_aesni_aesenc_64_ghash_avx_done: + aesenclast %xmm7, %xmm0 + aesenclast %xmm7, %xmm1 + movdqu (%ecx), %xmm4 + movdqu 16(%ecx), %xmm5 + pxor %xmm4, %xmm0 + pxor %xmm5, %xmm1 + movdqu %xmm0, (%edx) + movdqu %xmm1, 16(%edx) + aesenclast %xmm7, %xmm2 + aesenclast %xmm7, %xmm3 + movdqu 32(%ecx), %xmm4 + movdqu 48(%ecx), %xmm5 + pxor %xmm4, %xmm2 + pxor %xmm5, %xmm3 + movdqu %xmm2, 32(%edx) + movdqu %xmm3, 48(%edx) + # ghash encrypted counter + movdqu 80(%esp), %xmm2 + movdqu 48(%esp), %xmm7 + movdqu -64(%edx), %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm2, %xmm0 + pshufd $0x4e, %xmm7, %xmm1 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm7, %xmm1 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm3 + pclmulqdq $0x11, %xmm7, %xmm3 + movdqa %xmm0, %xmm2 + pclmulqdq $0x00, %xmm7, %xmm2 + pclmulqdq $0x00, %xmm5, %xmm1 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqu 32(%esp), %xmm7 + movdqu -48(%edx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + pclmulqdq $0x00, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm5, %xmm4 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqu 16(%esp), %xmm7 + movdqu -32(%edx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + pclmulqdq $0x00, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm5, %xmm4 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqu (%esp), %xmm7 + movdqu -16(%edx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + pclmulqdq $0x00, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm5, %xmm4 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa %xmm1, %xmm5 + psrldq $8, %xmm1 + pslldq $8, %xmm5 + pxor %xmm5, %xmm2 + pxor %xmm1, %xmm3 + movdqa %xmm2, %xmm7 + movdqa %xmm2, %xmm4 + movdqa %xmm2, %xmm5 + pslld $31, %xmm7 + pslld $30, %xmm4 + pslld $25, %xmm5 + pxor %xmm4, %xmm7 + pxor %xmm5, %xmm7 + movdqa %xmm7, %xmm4 + pslldq $12, %xmm7 + psrldq $4, %xmm4 + pxor %xmm7, %xmm2 + movdqa %xmm2, %xmm5 + movdqa %xmm2, %xmm1 + movdqa %xmm2, %xmm0 + psrld $0x01, %xmm5 + psrld $2, %xmm1 + psrld $7, %xmm0 + pxor %xmm1, %xmm5 + pxor %xmm0, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm2 + pxor %xmm3, %xmm2 + movdqu %xmm2, 80(%esp) + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_encrypt_update_aesni_ghash_64 +L_AES_GCM_encrypt_update_aesni_end_64: + movdqu 80(%esp), %xmm6 + # Block 1 + movdqa L_aes_gcm_bswap_mask, %xmm0 + movdqu (%edx), %xmm5 + pshufb %xmm0, %xmm5 + movdqu 48(%esp), %xmm7 + pxor %xmm6, %xmm5 + pshufd $0x4e, %xmm5, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm5, %xmm3 + pclmulqdq $0x00, %xmm5, %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm4 + movdqa %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + # Block 2 + movdqa L_aes_gcm_bswap_mask, %xmm0 + movdqu 16(%edx), %xmm5 + pshufb %xmm0, %xmm5 + movdqu 32(%esp), %xmm7 + pshufd $0x4e, %xmm5, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm5, %xmm3 + pclmulqdq $0x00, %xmm5, %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + # Block 3 + movdqa L_aes_gcm_bswap_mask, %xmm0 + movdqu 32(%edx), %xmm5 + pshufb %xmm0, %xmm5 + movdqu 16(%esp), %xmm7 + pshufd $0x4e, %xmm5, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm5, %xmm3 + pclmulqdq $0x00, %xmm5, %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + # Block 4 + movdqa L_aes_gcm_bswap_mask, %xmm0 + movdqu 48(%edx), %xmm5 + pshufb %xmm0, %xmm5 + movdqu (%esp), %xmm7 + pshufd $0x4e, %xmm5, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm5, %xmm3 + pclmulqdq $0x00, %xmm5, %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa %xmm4, %xmm0 + movdqa %xmm4, %xmm1 + movdqa %xmm4, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm4 + movdqa %xmm4, %xmm2 + movdqa %xmm4, %xmm3 + movdqa %xmm4, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm4, %xmm2 + pxor %xmm2, %xmm6 + movdqu (%esp), %xmm5 +L_AES_GCM_encrypt_update_aesni_done_64: + movl 132(%esp), %edx + cmpl %edx, %ebx + jge L_AES_GCM_encrypt_update_aesni_done_enc + movl 132(%esp), %eax + andl $0xfffffff0, %eax + cmpl %eax, %ebx + jge L_AES_GCM_encrypt_update_aesni_last_block_done + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + movdqu 64(%esp), %xmm0 + movdqa %xmm0, %xmm1 + pshufb L_aes_gcm_bswap_epi64, %xmm0 + paddd L_aes_gcm_one, %xmm1 + pxor (%ebp), %xmm0 + movdqu %xmm1, 64(%esp) + aesenc 16(%ebp), %xmm0 + aesenc 32(%ebp), %xmm0 + aesenc 48(%ebp), %xmm0 + aesenc 64(%ebp), %xmm0 + aesenc 80(%ebp), %xmm0 + aesenc 96(%ebp), %xmm0 + aesenc 112(%ebp), %xmm0 + aesenc 128(%ebp), %xmm0 + aesenc 144(%ebp), %xmm0 + cmpl $11, 120(%esp) + movdqa 160(%ebp), %xmm1 + jl L_AES_GCM_encrypt_update_aesni_aesenc_block_aesenc_avx_last + aesenc %xmm1, %xmm0 + aesenc 176(%ebp), %xmm0 + cmpl $13, 120(%esp) + movdqa 192(%ebp), %xmm1 + jl L_AES_GCM_encrypt_update_aesni_aesenc_block_aesenc_avx_last + aesenc %xmm1, %xmm0 + aesenc 208(%ebp), %xmm0 + movdqa 224(%ebp), %xmm1 +L_AES_GCM_encrypt_update_aesni_aesenc_block_aesenc_avx_last: + aesenclast %xmm1, %xmm0 + movdqu (%ecx), %xmm1 + pxor %xmm1, %xmm0 + movdqu %xmm0, (%edx) + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm0, %xmm6 + addl $16, %ebx + cmpl %eax, %ebx + jge L_AES_GCM_encrypt_update_aesni_last_block_ghash +L_AES_GCM_encrypt_update_aesni_last_block_start: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + movdqu 64(%esp), %xmm0 + movdqa %xmm0, %xmm1 + pshufb L_aes_gcm_bswap_epi64, %xmm0 + paddd L_aes_gcm_one, %xmm1 + pxor (%ebp), %xmm0 + movdqu %xmm1, 64(%esp) + movdqu %xmm6, %xmm4 + pclmulqdq $16, %xmm5, %xmm4 + aesenc 16(%ebp), %xmm0 + aesenc 32(%ebp), %xmm0 + movdqu %xmm6, %xmm7 + pclmulqdq $0x01, %xmm5, %xmm7 + aesenc 48(%ebp), %xmm0 + aesenc 64(%ebp), %xmm0 + aesenc 80(%ebp), %xmm0 + movdqu %xmm6, %xmm1 + pclmulqdq $0x11, %xmm5, %xmm1 + aesenc 96(%ebp), %xmm0 + pxor %xmm7, %xmm4 + movdqa %xmm4, %xmm2 + psrldq $8, %xmm4 + pslldq $8, %xmm2 + aesenc 112(%ebp), %xmm0 + movdqu %xmm6, %xmm7 + pclmulqdq $0x00, %xmm5, %xmm7 + pxor %xmm7, %xmm2 + pxor %xmm4, %xmm1 + movdqa L_aes_gcm_mod2_128, %xmm3 + movdqa %xmm2, %xmm7 + pclmulqdq $16, %xmm3, %xmm7 + aesenc 128(%ebp), %xmm0 + pshufd $0x4e, %xmm2, %xmm4 + pxor %xmm7, %xmm4 + movdqa %xmm4, %xmm7 + pclmulqdq $16, %xmm3, %xmm7 + aesenc 144(%ebp), %xmm0 + pshufd $0x4e, %xmm4, %xmm6 + pxor %xmm7, %xmm6 + pxor %xmm1, %xmm6 + cmpl $11, 120(%esp) + movdqa 160(%ebp), %xmm1 + jl L_AES_GCM_encrypt_update_aesni_aesenc_gfmul_last + aesenc %xmm1, %xmm0 + aesenc 176(%ebp), %xmm0 + cmpl $13, 120(%esp) + movdqa 192(%ebp), %xmm1 + jl L_AES_GCM_encrypt_update_aesni_aesenc_gfmul_last + aesenc %xmm1, %xmm0 + aesenc 208(%ebp), %xmm0 + movdqa 224(%ebp), %xmm1 +L_AES_GCM_encrypt_update_aesni_aesenc_gfmul_last: + aesenclast %xmm1, %xmm0 + movdqu (%ecx), %xmm1 + pxor %xmm1, %xmm0 + movdqu %xmm0, (%edx) + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm0, %xmm6 + addl $16, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_encrypt_update_aesni_last_block_start +L_AES_GCM_encrypt_update_aesni_last_block_ghash: + pshufd $0x4e, %xmm5, %xmm1 + pshufd $0x4e, %xmm6, %xmm2 + movdqa %xmm6, %xmm3 + movdqa %xmm6, %xmm0 + pclmulqdq $0x11, %xmm5, %xmm3 + pclmulqdq $0x00, %xmm5, %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm6, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm0 + pxor %xmm1, %xmm6 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + movdqa %xmm0, %xmm3 + pslld $31, %xmm1 + pslld $30, %xmm2 + pslld $25, %xmm3 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm3 + psrldq $4, %xmm3 + pslldq $12, %xmm1 + pxor %xmm1, %xmm0 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + psrld $0x01, %xmm1 + psrld $2, %xmm2 + pxor %xmm2, %xmm1 + pxor %xmm0, %xmm1 + psrld $7, %xmm0 + pxor %xmm3, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm1, %xmm6 +L_AES_GCM_encrypt_update_aesni_last_block_done: +L_AES_GCM_encrypt_update_aesni_done_enc: + movl 136(%esp), %esi + movl 144(%esp), %edi + movdqu 64(%esp), %xmm4 + movdqa %xmm6, (%esi) + movdqu %xmm4, (%edi) + addl $0x60, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_encrypt_update_aesni,.-AES_GCM_encrypt_update_aesni +.text +.globl AES_GCM_encrypt_final_aesni +.type AES_GCM_encrypt_final_aesni,@function +.align 16 +AES_GCM_encrypt_final_aesni: + pushl %esi + pushl %edi + pushl %ebp + subl $16, %esp + movl 32(%esp), %ebp + movl 52(%esp), %esi + movl 56(%esp), %edi + movdqa (%ebp), %xmm4 + movdqa (%esi), %xmm5 + movdqa (%edi), %xmm6 + movdqa %xmm5, %xmm1 + movdqa %xmm5, %xmm0 + psrlq $63, %xmm1 + psllq $0x01, %xmm0 + pslldq $8, %xmm1 + por %xmm1, %xmm0 + pshufd $0xff, %xmm5, %xmm5 + psrad $31, %xmm5 + pand L_aes_gcm_mod2_128, %xmm5 + pxor %xmm0, %xmm5 + movl 44(%esp), %edx + movl 48(%esp), %ecx + shll $3, %edx + shll $3, %ecx + pinsrd $0x00, %edx, %xmm0 + pinsrd $2, %ecx, %xmm0 + movl 44(%esp), %edx + movl 48(%esp), %ecx + shrl $29, %edx + shrl $29, %ecx + pinsrd $0x01, %edx, %xmm0 + pinsrd $3, %ecx, %xmm0 + pxor %xmm0, %xmm4 + pshufd $0x4e, %xmm5, %xmm1 + pshufd $0x4e, %xmm4, %xmm2 + movdqa %xmm4, %xmm3 + movdqa %xmm4, %xmm0 + pclmulqdq $0x11, %xmm5, %xmm3 + pclmulqdq $0x00, %xmm5, %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm4, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm0 + pxor %xmm1, %xmm4 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + movdqa %xmm0, %xmm3 + pslld $31, %xmm1 + pslld $30, %xmm2 + pslld $25, %xmm3 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm3 + psrldq $4, %xmm3 + pslldq $12, %xmm1 + pxor %xmm1, %xmm0 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + psrld $0x01, %xmm1 + psrld $2, %xmm2 + pxor %xmm2, %xmm1 + pxor %xmm0, %xmm1 + psrld $7, %xmm0 + pxor %xmm3, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm1, %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm4 + movdqu %xmm6, %xmm0 + pxor %xmm4, %xmm0 + movl 36(%esp), %edi + cmpl $16, 40(%esp) + je L_AES_GCM_encrypt_final_aesni_store_tag_16 + xorl %ecx, %ecx + movdqu %xmm0, (%esp) +L_AES_GCM_encrypt_final_aesni_store_tag_loop: + movzbl (%esp,%ecx,1), %eax + movb %al, (%edi,%ecx,1) + incl %ecx + cmpl 40(%esp), %ecx + jne L_AES_GCM_encrypt_final_aesni_store_tag_loop + jmp L_AES_GCM_encrypt_final_aesni_store_tag_done +L_AES_GCM_encrypt_final_aesni_store_tag_16: + movdqu %xmm0, (%edi) +L_AES_GCM_encrypt_final_aesni_store_tag_done: + addl $16, %esp + popl %ebp + popl %edi + popl %esi + ret +.size AES_GCM_encrypt_final_aesni,.-AES_GCM_encrypt_final_aesni +.text +.globl AES_GCM_decrypt_update_aesni +.type AES_GCM_decrypt_update_aesni,@function +.align 16 +AES_GCM_decrypt_update_aesni: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $0xa0, %esp + movl 208(%esp), %esi + movdqa (%esi), %xmm4 + movdqu %xmm4, 64(%esp) + movl 200(%esp), %esi + movl 204(%esp), %ebp + movdqa (%esi), %xmm6 + movdqa (%ebp), %xmm5 + movdqu %xmm6, 80(%esp) + movl 180(%esp), %ebp + movl 188(%esp), %edi + movl 192(%esp), %esi + movdqa %xmm5, %xmm1 + movdqa %xmm5, %xmm0 + psrlq $63, %xmm1 + psllq $0x01, %xmm0 + pslldq $8, %xmm1 + por %xmm1, %xmm0 + pshufd $0xff, %xmm5, %xmm5 + psrad $31, %xmm5 + pand L_aes_gcm_mod2_128, %xmm5 + pxor %xmm0, %xmm5 + xorl %ebx, %ebx + cmpl $0x40, 196(%esp) + movl 196(%esp), %eax + jl L_AES_GCM_decrypt_update_aesni_done_64 + andl $0xffffffc0, %eax + movdqa %xmm6, %xmm2 + # H ^ 1 + movdqu %xmm5, (%esp) + # H ^ 2 + pshufd $0x4e, %xmm5, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm5, %xmm3 + pclmulqdq $0x00, %xmm5, %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm0 + pxor %xmm1, %xmm4 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + movdqa %xmm0, %xmm3 + pslld $31, %xmm1 + pslld $30, %xmm2 + pslld $25, %xmm3 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm3 + psrldq $4, %xmm3 + pslldq $12, %xmm1 + pxor %xmm1, %xmm0 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + psrld $0x01, %xmm1 + psrld $2, %xmm2 + pxor %xmm2, %xmm1 + pxor %xmm0, %xmm1 + psrld $7, %xmm0 + pxor %xmm3, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm1, %xmm4 + movdqu %xmm4, 16(%esp) + # H ^ 3 + pshufd $0x4e, %xmm5, %xmm1 + pshufd $0x4e, %xmm4, %xmm2 + movdqa %xmm4, %xmm3 + movdqa %xmm4, %xmm0 + pclmulqdq $0x11, %xmm5, %xmm3 + pclmulqdq $0x00, %xmm5, %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm4, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm3, %xmm7 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm0 + pxor %xmm1, %xmm7 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + movdqa %xmm0, %xmm3 + pslld $31, %xmm1 + pslld $30, %xmm2 + pslld $25, %xmm3 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm3 + psrldq $4, %xmm3 + pslldq $12, %xmm1 + pxor %xmm1, %xmm0 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + psrld $0x01, %xmm1 + psrld $2, %xmm2 + pxor %xmm2, %xmm1 + pxor %xmm0, %xmm1 + psrld $7, %xmm0 + pxor %xmm3, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm1, %xmm7 + movdqu %xmm7, 32(%esp) + # H ^ 4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm4, %xmm2 + movdqa %xmm4, %xmm3 + movdqa %xmm4, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm4, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm3, %xmm7 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm0 + pxor %xmm1, %xmm7 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + movdqa %xmm0, %xmm3 + pslld $31, %xmm1 + pslld $30, %xmm2 + pslld $25, %xmm3 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm3 + psrldq $4, %xmm3 + pslldq $12, %xmm1 + pxor %xmm1, %xmm0 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + psrld $0x01, %xmm1 + psrld $2, %xmm2 + pxor %xmm2, %xmm1 + pxor %xmm0, %xmm1 + psrld $7, %xmm0 + pxor %xmm3, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm1, %xmm7 + movdqu %xmm7, 48(%esp) + cmpl %esi, %edi + je L_AES_GCM_decrypt_update_aesni_ghash_64 +L_AES_GCM_decrypt_update_aesni_ghash_64_inplace: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + # Encrypt 64 bytes of counter + movdqu 64(%esp), %xmm0 + movdqa L_aes_gcm_bswap_epi64, %xmm7 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + movdqa %xmm0, %xmm3 + pshufb %xmm7, %xmm0 + paddd L_aes_gcm_one, %xmm1 + pshufb %xmm7, %xmm1 + paddd L_aes_gcm_two, %xmm2 + pshufb %xmm7, %xmm2 + paddd L_aes_gcm_three, %xmm3 + pshufb %xmm7, %xmm3 + movdqu 64(%esp), %xmm7 + paddd L_aes_gcm_four, %xmm7 + movdqu %xmm7, 64(%esp) + movdqa (%ebp), %xmm7 + pxor %xmm7, %xmm0 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm7, %xmm3 + movdqa 16(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 32(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 48(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 64(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 80(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 96(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 112(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 128(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 144(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + cmpl $11, 184(%esp) + movdqa 160(%ebp), %xmm7 + jl L_AES_GCM_decrypt_update_aesniinplace_aesenc_64_ghash_avx_done + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 176(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + cmpl $13, 184(%esp) + movdqa 192(%ebp), %xmm7 + jl L_AES_GCM_decrypt_update_aesniinplace_aesenc_64_ghash_avx_done + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 208(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 224(%ebp), %xmm7 +L_AES_GCM_decrypt_update_aesniinplace_aesenc_64_ghash_avx_done: + aesenclast %xmm7, %xmm0 + aesenclast %xmm7, %xmm1 + movdqu (%ecx), %xmm4 + movdqu 16(%ecx), %xmm5 + pxor %xmm4, %xmm0 + pxor %xmm5, %xmm1 + movdqu %xmm4, 96(%esp) + movdqu %xmm5, 112(%esp) + movdqu %xmm0, (%edx) + movdqu %xmm1, 16(%edx) + aesenclast %xmm7, %xmm2 + aesenclast %xmm7, %xmm3 + movdqu 32(%ecx), %xmm4 + movdqu 48(%ecx), %xmm5 + pxor %xmm4, %xmm2 + pxor %xmm5, %xmm3 + movdqu %xmm4, 128(%esp) + movdqu %xmm5, 144(%esp) + movdqu %xmm2, 32(%edx) + movdqu %xmm3, 48(%edx) + # ghash encrypted counter + movdqu 80(%esp), %xmm2 + movdqu 48(%esp), %xmm7 + movdqu 96(%esp), %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm2, %xmm0 + pshufd $0x4e, %xmm7, %xmm1 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm7, %xmm1 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm3 + pclmulqdq $0x11, %xmm7, %xmm3 + movdqa %xmm0, %xmm2 + pclmulqdq $0x00, %xmm7, %xmm2 + pclmulqdq $0x00, %xmm5, %xmm1 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqu 32(%esp), %xmm7 + movdqu 112(%esp), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + pclmulqdq $0x00, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm5, %xmm4 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqu 16(%esp), %xmm7 + movdqu 128(%esp), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + pclmulqdq $0x00, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm5, %xmm4 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqu (%esp), %xmm7 + movdqu 144(%esp), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + pclmulqdq $0x00, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm5, %xmm4 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa %xmm1, %xmm5 + psrldq $8, %xmm1 + pslldq $8, %xmm5 + pxor %xmm5, %xmm2 + pxor %xmm1, %xmm3 + movdqa %xmm2, %xmm7 + movdqa %xmm2, %xmm4 + movdqa %xmm2, %xmm5 + pslld $31, %xmm7 + pslld $30, %xmm4 + pslld $25, %xmm5 + pxor %xmm4, %xmm7 + pxor %xmm5, %xmm7 + movdqa %xmm7, %xmm4 + pslldq $12, %xmm7 + psrldq $4, %xmm4 + pxor %xmm7, %xmm2 + movdqa %xmm2, %xmm5 + movdqa %xmm2, %xmm1 + movdqa %xmm2, %xmm0 + psrld $0x01, %xmm5 + psrld $2, %xmm1 + psrld $7, %xmm0 + pxor %xmm1, %xmm5 + pxor %xmm0, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm2 + pxor %xmm3, %xmm2 + movdqu %xmm2, 80(%esp) + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_update_aesni_ghash_64_inplace + jmp L_AES_GCM_decrypt_update_aesni_ghash_64_done +L_AES_GCM_decrypt_update_aesni_ghash_64: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + # Encrypt 64 bytes of counter + movdqu 64(%esp), %xmm0 + movdqa L_aes_gcm_bswap_epi64, %xmm7 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + movdqa %xmm0, %xmm3 + pshufb %xmm7, %xmm0 + paddd L_aes_gcm_one, %xmm1 + pshufb %xmm7, %xmm1 + paddd L_aes_gcm_two, %xmm2 + pshufb %xmm7, %xmm2 + paddd L_aes_gcm_three, %xmm3 + pshufb %xmm7, %xmm3 + movdqu 64(%esp), %xmm7 + paddd L_aes_gcm_four, %xmm7 + movdqu %xmm7, 64(%esp) + movdqa (%ebp), %xmm7 + pxor %xmm7, %xmm0 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm7, %xmm3 + movdqa 16(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 32(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 48(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 64(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 80(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 96(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 112(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 128(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 144(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + cmpl $11, 184(%esp) + movdqa 160(%ebp), %xmm7 + jl L_AES_GCM_decrypt_update_aesni_aesenc_64_ghash_avx_done + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 176(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + cmpl $13, 184(%esp) + movdqa 192(%ebp), %xmm7 + jl L_AES_GCM_decrypt_update_aesni_aesenc_64_ghash_avx_done + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 208(%ebp), %xmm7 + aesenc %xmm7, %xmm0 + aesenc %xmm7, %xmm1 + aesenc %xmm7, %xmm2 + aesenc %xmm7, %xmm3 + movdqa 224(%ebp), %xmm7 +L_AES_GCM_decrypt_update_aesni_aesenc_64_ghash_avx_done: + aesenclast %xmm7, %xmm0 + aesenclast %xmm7, %xmm1 + movdqu (%ecx), %xmm4 + movdqu 16(%ecx), %xmm5 + pxor %xmm4, %xmm0 + pxor %xmm5, %xmm1 + movdqu %xmm4, (%ecx) + movdqu %xmm5, 16(%ecx) + movdqu %xmm0, (%edx) + movdqu %xmm1, 16(%edx) + aesenclast %xmm7, %xmm2 + aesenclast %xmm7, %xmm3 + movdqu 32(%ecx), %xmm4 + movdqu 48(%ecx), %xmm5 + pxor %xmm4, %xmm2 + pxor %xmm5, %xmm3 + movdqu %xmm4, 32(%ecx) + movdqu %xmm5, 48(%ecx) + movdqu %xmm2, 32(%edx) + movdqu %xmm3, 48(%edx) + # ghash encrypted counter + movdqu 80(%esp), %xmm2 + movdqu 48(%esp), %xmm7 + movdqu (%ecx), %xmm0 + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm2, %xmm0 + pshufd $0x4e, %xmm7, %xmm1 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm7, %xmm1 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm3 + pclmulqdq $0x11, %xmm7, %xmm3 + movdqa %xmm0, %xmm2 + pclmulqdq $0x00, %xmm7, %xmm2 + pclmulqdq $0x00, %xmm5, %xmm1 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqu 32(%esp), %xmm7 + movdqu 16(%ecx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + pclmulqdq $0x00, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm5, %xmm4 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqu 16(%esp), %xmm7 + movdqu 32(%ecx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + pclmulqdq $0x00, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm5, %xmm4 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqu (%esp), %xmm7 + movdqu 48(%ecx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask, %xmm0 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + pclmulqdq $0x00, %xmm0, %xmm7 + pclmulqdq $0x00, %xmm5, %xmm4 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa %xmm1, %xmm5 + psrldq $8, %xmm1 + pslldq $8, %xmm5 + pxor %xmm5, %xmm2 + pxor %xmm1, %xmm3 + movdqa %xmm2, %xmm7 + movdqa %xmm2, %xmm4 + movdqa %xmm2, %xmm5 + pslld $31, %xmm7 + pslld $30, %xmm4 + pslld $25, %xmm5 + pxor %xmm4, %xmm7 + pxor %xmm5, %xmm7 + movdqa %xmm7, %xmm4 + pslldq $12, %xmm7 + psrldq $4, %xmm4 + pxor %xmm7, %xmm2 + movdqa %xmm2, %xmm5 + movdqa %xmm2, %xmm1 + movdqa %xmm2, %xmm0 + psrld $0x01, %xmm5 + psrld $2, %xmm1 + psrld $7, %xmm0 + pxor %xmm1, %xmm5 + pxor %xmm0, %xmm5 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm2 + pxor %xmm3, %xmm2 + movdqu %xmm2, 80(%esp) + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_update_aesni_ghash_64 +L_AES_GCM_decrypt_update_aesni_ghash_64_done: + movdqa %xmm2, %xmm6 + movdqu (%esp), %xmm5 +L_AES_GCM_decrypt_update_aesni_done_64: + movl 196(%esp), %edx + cmpl %edx, %ebx + jge L_AES_GCM_decrypt_update_aesni_done_dec + movl 196(%esp), %eax + andl $0xfffffff0, %eax + cmpl %eax, %ebx + jge L_AES_GCM_decrypt_update_aesni_last_block_done +L_AES_GCM_decrypt_update_aesni_last_block_start: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + movdqu (%ecx), %xmm1 + pshufb L_aes_gcm_bswap_mask, %xmm1 + pxor %xmm6, %xmm1 + movdqu %xmm1, (%esp) + movdqu 64(%esp), %xmm0 + movdqa %xmm0, %xmm1 + pshufb L_aes_gcm_bswap_epi64, %xmm0 + paddd L_aes_gcm_one, %xmm1 + pxor (%ebp), %xmm0 + movdqu %xmm1, 64(%esp) + movdqu (%esp), %xmm4 + pclmulqdq $16, %xmm5, %xmm4 + aesenc 16(%ebp), %xmm0 + aesenc 32(%ebp), %xmm0 + movdqu (%esp), %xmm7 + pclmulqdq $0x01, %xmm5, %xmm7 + aesenc 48(%ebp), %xmm0 + aesenc 64(%ebp), %xmm0 + aesenc 80(%ebp), %xmm0 + movdqu (%esp), %xmm1 + pclmulqdq $0x11, %xmm5, %xmm1 + aesenc 96(%ebp), %xmm0 + pxor %xmm7, %xmm4 + movdqa %xmm4, %xmm2 + psrldq $8, %xmm4 + pslldq $8, %xmm2 + aesenc 112(%ebp), %xmm0 + movdqu (%esp), %xmm7 + pclmulqdq $0x00, %xmm5, %xmm7 + pxor %xmm7, %xmm2 + pxor %xmm4, %xmm1 + movdqa L_aes_gcm_mod2_128, %xmm3 + movdqa %xmm2, %xmm7 + pclmulqdq $16, %xmm3, %xmm7 + aesenc 128(%ebp), %xmm0 + pshufd $0x4e, %xmm2, %xmm4 + pxor %xmm7, %xmm4 + movdqa %xmm4, %xmm7 + pclmulqdq $16, %xmm3, %xmm7 + aesenc 144(%ebp), %xmm0 + pshufd $0x4e, %xmm4, %xmm6 + pxor %xmm7, %xmm6 + pxor %xmm1, %xmm6 + cmpl $11, 184(%esp) + movdqa 160(%ebp), %xmm1 + jl L_AES_GCM_decrypt_update_aesni_aesenc_gfmul_last + aesenc %xmm1, %xmm0 + aesenc 176(%ebp), %xmm0 + cmpl $13, 184(%esp) + movdqa 192(%ebp), %xmm1 + jl L_AES_GCM_decrypt_update_aesni_aesenc_gfmul_last + aesenc %xmm1, %xmm0 + aesenc 208(%ebp), %xmm0 + movdqa 224(%ebp), %xmm1 +L_AES_GCM_decrypt_update_aesni_aesenc_gfmul_last: + aesenclast %xmm1, %xmm0 + movdqu (%ecx), %xmm1 + pxor %xmm1, %xmm0 + movdqu %xmm0, (%edx) + addl $16, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_update_aesni_last_block_start +L_AES_GCM_decrypt_update_aesni_last_block_done: +L_AES_GCM_decrypt_update_aesni_done_dec: + movl 200(%esp), %esi + movl 208(%esp), %edi + movdqu 64(%esp), %xmm4 + movdqa %xmm6, (%esi) + movdqu %xmm4, (%edi) + addl $0xa0, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_decrypt_update_aesni,.-AES_GCM_decrypt_update_aesni +.text +.globl AES_GCM_decrypt_final_aesni +.type AES_GCM_decrypt_final_aesni,@function +.align 16 +AES_GCM_decrypt_final_aesni: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $16, %esp + movl 36(%esp), %ebp + movl 56(%esp), %esi + movl 60(%esp), %edi + movdqa (%ebp), %xmm6 + movdqa (%esi), %xmm5 + movdqa (%edi), %xmm7 + movdqa %xmm5, %xmm1 + movdqa %xmm5, %xmm0 + psrlq $63, %xmm1 + psllq $0x01, %xmm0 + pslldq $8, %xmm1 + por %xmm1, %xmm0 + pshufd $0xff, %xmm5, %xmm5 + psrad $31, %xmm5 + pand L_aes_gcm_mod2_128, %xmm5 + pxor %xmm0, %xmm5 + movl 48(%esp), %edx + movl 52(%esp), %ecx + shll $3, %edx + shll $3, %ecx + pinsrd $0x00, %edx, %xmm0 + pinsrd $2, %ecx, %xmm0 + movl 48(%esp), %edx + movl 52(%esp), %ecx + shrl $29, %edx + shrl $29, %ecx + pinsrd $0x01, %edx, %xmm0 + pinsrd $3, %ecx, %xmm0 + pxor %xmm0, %xmm6 + pshufd $0x4e, %xmm5, %xmm1 + pshufd $0x4e, %xmm6, %xmm2 + movdqa %xmm6, %xmm3 + movdqa %xmm6, %xmm0 + pclmulqdq $0x11, %xmm5, %xmm3 + pclmulqdq $0x00, %xmm5, %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm6, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm0 + pxor %xmm1, %xmm6 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + movdqa %xmm0, %xmm3 + pslld $31, %xmm1 + pslld $30, %xmm2 + pslld $25, %xmm3 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm3 + psrldq $4, %xmm3 + pslldq $12, %xmm1 + pxor %xmm1, %xmm0 + movdqa %xmm0, %xmm1 + movdqa %xmm0, %xmm2 + psrld $0x01, %xmm1 + psrld $2, %xmm2 + pxor %xmm2, %xmm1 + pxor %xmm0, %xmm1 + psrld $7, %xmm0 + pxor %xmm3, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm1, %xmm6 + pshufb L_aes_gcm_bswap_mask, %xmm6 + movdqu %xmm7, %xmm0 + pxor %xmm6, %xmm0 + movl 40(%esp), %esi + movl 64(%esp), %edi + cmpl $16, 44(%esp) + je L_AES_GCM_decrypt_final_aesni_cmp_tag_16 + subl $16, %esp + xorl %ecx, %ecx + xorl %ebx, %ebx + movdqu %xmm0, (%esp) +L_AES_GCM_decrypt_final_aesni_cmp_tag_loop: + movzbl (%esp,%ecx,1), %eax + xorb (%esi,%ecx,1), %al + orb %al, %bl + incl %ecx + cmpl 44(%esp), %ecx + jne L_AES_GCM_decrypt_final_aesni_cmp_tag_loop + cmpb $0x00, %bl + sete %bl + addl $16, %esp + xorl %ecx, %ecx + jmp L_AES_GCM_decrypt_final_aesni_cmp_tag_done +L_AES_GCM_decrypt_final_aesni_cmp_tag_16: + movdqu (%esi), %xmm1 + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm0, %edx + # %%edx == 0xFFFF then return 1 else => return 0 + xorl %ebx, %ebx + cmpl $0xffff, %edx + sete %bl +L_AES_GCM_decrypt_final_aesni_cmp_tag_done: + movl %ebx, (%edi) + addl $16, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_decrypt_final_aesni,.-AES_GCM_decrypt_final_aesni +#endif /* WOLFSSL_AESGCM_STREAM */ +#ifdef HAVE_INTEL_AVX1 +.text +.globl AES_GCM_encrypt_avx1 +.type AES_GCM_encrypt_avx1,@function +.align 16 +AES_GCM_encrypt_avx1: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $0x70, %esp + movl 144(%esp), %esi + movl 168(%esp), %ebp + movl 160(%esp), %edx + vpxor %xmm0, %xmm0, %xmm0 + vpxor %xmm2, %xmm2, %xmm2 + cmpl $12, %edx + jne L_AES_GCM_encrypt_avx1_iv_not_12 + # # Calculate values when IV is 12 bytes + # Set counter based on IV + movl $0x1000000, %ecx + vpinsrd $0x00, (%esi), %xmm0, %xmm0 + vpinsrd $0x01, 4(%esi), %xmm0, %xmm0 + vpinsrd $2, 8(%esi), %xmm0, %xmm0 + vpinsrd $3, %ecx, %xmm0, %xmm0 + # H = Encrypt X(=0) and T = Encrypt counter + vmovdqa (%ebp), %xmm1 + vpxor %xmm1, %xmm0, %xmm5 + vmovdqa 16(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 32(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 48(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 64(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 80(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 96(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 112(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 128(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 144(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + cmpl $11, 172(%esp) + vmovdqa 160(%ebp), %xmm3 + jl L_AES_GCM_encrypt_avx1_calc_iv_12_last + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 176(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + cmpl $13, 172(%esp) + vmovdqa 192(%ebp), %xmm3 + jl L_AES_GCM_encrypt_avx1_calc_iv_12_last + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 208(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 224(%ebp), %xmm3 +L_AES_GCM_encrypt_avx1_calc_iv_12_last: + vaesenclast %xmm3, %xmm1, %xmm1 + vaesenclast %xmm3, %xmm5, %xmm5 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm1, %xmm1 + vmovdqu %xmm5, 80(%esp) + jmp L_AES_GCM_encrypt_avx1_iv_done +L_AES_GCM_encrypt_avx1_iv_not_12: + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + vmovdqa (%ebp), %xmm1 + vaesenc 16(%ebp), %xmm1, %xmm1 + vaesenc 32(%ebp), %xmm1, %xmm1 + vaesenc 48(%ebp), %xmm1, %xmm1 + vaesenc 64(%ebp), %xmm1, %xmm1 + vaesenc 80(%ebp), %xmm1, %xmm1 + vaesenc 96(%ebp), %xmm1, %xmm1 + vaesenc 112(%ebp), %xmm1, %xmm1 + vaesenc 128(%ebp), %xmm1, %xmm1 + vaesenc 144(%ebp), %xmm1, %xmm1 + cmpl $11, 172(%esp) + vmovdqa 160(%ebp), %xmm5 + jl L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last + vaesenc %xmm5, %xmm1, %xmm1 + vaesenc 176(%ebp), %xmm1, %xmm1 + cmpl $13, 172(%esp) + vmovdqa 192(%ebp), %xmm5 + jl L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last + vaesenc %xmm5, %xmm1, %xmm1 + vaesenc 208(%ebp), %xmm1, %xmm1 + vmovdqa 224(%ebp), %xmm5 +L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last: + vaesenclast %xmm5, %xmm1, %xmm1 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm1, %xmm1 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movl $0x00, %ecx + je L_AES_GCM_encrypt_avx1_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_encrypt_avx1_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_encrypt_avx1_calc_iv_16_loop: + vmovdqu (%esi,%ecx,1), %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm4, %xmm0, %xmm0 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm0, %xmm5 + vpshufd $0x4e, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm7 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpxor %xmm0, %xmm5, %xmm5 + vpxor %xmm1, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vmovdqa %xmm4, %xmm3 + vmovdqa %xmm7, %xmm0 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm5, %xmm0, %xmm0 + vpsrld $31, %xmm3, %xmm4 + vpsrld $31, %xmm0, %xmm5 + vpslld $0x01, %xmm3, %xmm3 + vpslld $0x01, %xmm0, %xmm0 + vpsrldq $12, %xmm4, %xmm6 + vpslldq $4, %xmm4, %xmm4 + vpslldq $4, %xmm5, %xmm5 + vpor %xmm6, %xmm0, %xmm0 + vpor %xmm4, %xmm3, %xmm3 + vpor %xmm5, %xmm0, %xmm0 + vpslld $31, %xmm3, %xmm4 + vpslld $30, %xmm3, %xmm5 + vpslld $25, %xmm3, %xmm6 + vpxor %xmm5, %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm4 + vmovdqa %xmm4, %xmm5 + vpsrldq $4, %xmm5, %xmm5 + vpslldq $12, %xmm4, %xmm4 + vpxor %xmm4, %xmm3, %xmm3 + vpsrld $0x01, %xmm3, %xmm6 + vpsrld $2, %xmm3, %xmm7 + vpsrld $7, %xmm3, %xmm4 + vpxor %xmm7, %xmm6, %xmm6 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm5, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm6, %xmm0, %xmm0 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx1_calc_iv_16_loop + movl 160(%esp), %edx + cmpl %edx, %ecx + je L_AES_GCM_encrypt_avx1_calc_iv_done +L_AES_GCM_encrypt_avx1_calc_iv_lt16: + subl $16, %esp + vpxor %xmm4, %xmm4, %xmm4 + xorl %ebx, %ebx + vmovdqu %xmm4, (%esp) +L_AES_GCM_encrypt_avx1_calc_iv_loop: + movzbl (%esi,%ecx,1), %eax + movb %al, (%esp,%ebx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx1_calc_iv_loop + vmovdqu (%esp), %xmm4 + addl $16, %esp + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm4, %xmm0, %xmm0 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm0, %xmm5 + vpshufd $0x4e, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm7 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpxor %xmm0, %xmm5, %xmm5 + vpxor %xmm1, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vmovdqa %xmm4, %xmm3 + vmovdqa %xmm7, %xmm0 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm5, %xmm0, %xmm0 + vpsrld $31, %xmm3, %xmm4 + vpsrld $31, %xmm0, %xmm5 + vpslld $0x01, %xmm3, %xmm3 + vpslld $0x01, %xmm0, %xmm0 + vpsrldq $12, %xmm4, %xmm6 + vpslldq $4, %xmm4, %xmm4 + vpslldq $4, %xmm5, %xmm5 + vpor %xmm6, %xmm0, %xmm0 + vpor %xmm4, %xmm3, %xmm3 + vpor %xmm5, %xmm0, %xmm0 + vpslld $31, %xmm3, %xmm4 + vpslld $30, %xmm3, %xmm5 + vpslld $25, %xmm3, %xmm6 + vpxor %xmm5, %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm4 + vmovdqa %xmm4, %xmm5 + vpsrldq $4, %xmm5, %xmm5 + vpslldq $12, %xmm4, %xmm4 + vpxor %xmm4, %xmm3, %xmm3 + vpsrld $0x01, %xmm3, %xmm6 + vpsrld $2, %xmm3, %xmm7 + vpsrld $7, %xmm3, %xmm4 + vpxor %xmm7, %xmm6, %xmm6 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm5, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm6, %xmm0, %xmm0 +L_AES_GCM_encrypt_avx1_calc_iv_done: + # T = Encrypt counter + vpxor %xmm4, %xmm4, %xmm4 + shll $3, %edx + vpinsrd $0x00, %edx, %xmm4, %xmm4 + vpxor %xmm4, %xmm0, %xmm0 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm0, %xmm5 + vpshufd $0x4e, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm7 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpxor %xmm0, %xmm5, %xmm5 + vpxor %xmm1, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vmovdqa %xmm4, %xmm3 + vmovdqa %xmm7, %xmm0 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm5, %xmm0, %xmm0 + vpsrld $31, %xmm3, %xmm4 + vpsrld $31, %xmm0, %xmm5 + vpslld $0x01, %xmm3, %xmm3 + vpslld $0x01, %xmm0, %xmm0 + vpsrldq $12, %xmm4, %xmm6 + vpslldq $4, %xmm4, %xmm4 + vpslldq $4, %xmm5, %xmm5 + vpor %xmm6, %xmm0, %xmm0 + vpor %xmm4, %xmm3, %xmm3 + vpor %xmm5, %xmm0, %xmm0 + vpslld $31, %xmm3, %xmm4 + vpslld $30, %xmm3, %xmm5 + vpslld $25, %xmm3, %xmm6 + vpxor %xmm5, %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm4 + vmovdqa %xmm4, %xmm5 + vpsrldq $4, %xmm5, %xmm5 + vpslldq $12, %xmm4, %xmm4 + vpxor %xmm4, %xmm3, %xmm3 + vpsrld $0x01, %xmm3, %xmm6 + vpsrld $2, %xmm3, %xmm7 + vpsrld $7, %xmm3, %xmm4 + vpxor %xmm7, %xmm6, %xmm6 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm5, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm6, %xmm0, %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + # Encrypt counter + vmovdqa (%ebp), %xmm4 + vpxor %xmm0, %xmm4, %xmm4 + vaesenc 16(%ebp), %xmm4, %xmm4 + vaesenc 32(%ebp), %xmm4, %xmm4 + vaesenc 48(%ebp), %xmm4, %xmm4 + vaesenc 64(%ebp), %xmm4, %xmm4 + vaesenc 80(%ebp), %xmm4, %xmm4 + vaesenc 96(%ebp), %xmm4, %xmm4 + vaesenc 112(%ebp), %xmm4, %xmm4 + vaesenc 128(%ebp), %xmm4, %xmm4 + vaesenc 144(%ebp), %xmm4, %xmm4 + cmpl $11, 172(%esp) + vmovdqa 160(%ebp), %xmm5 + jl L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last + vaesenc %xmm5, %xmm4, %xmm4 + vaesenc 176(%ebp), %xmm4, %xmm4 + cmpl $13, 172(%esp) + vmovdqa 192(%ebp), %xmm5 + jl L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last + vaesenc %xmm5, %xmm4, %xmm4 + vaesenc 208(%ebp), %xmm4, %xmm4 + vmovdqa 224(%ebp), %xmm5 +L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last: + vaesenclast %xmm5, %xmm4, %xmm4 + vmovdqu %xmm4, 80(%esp) +L_AES_GCM_encrypt_avx1_iv_done: + movl 140(%esp), %esi + # Additional authentication data + movl 156(%esp), %edx + cmpl $0x00, %edx + je L_AES_GCM_encrypt_avx1_calc_aad_done + xorl %ecx, %ecx + cmpl $16, %edx + jl L_AES_GCM_encrypt_avx1_calc_aad_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_encrypt_avx1_calc_aad_16_loop: + vmovdqu (%esi,%ecx,1), %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm4, %xmm2, %xmm2 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm2, %xmm5 + vpshufd $0x4e, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm4 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm1, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vmovdqa %xmm4, %xmm3 + vmovdqa %xmm7, %xmm2 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm5, %xmm2, %xmm2 + vpsrld $31, %xmm3, %xmm4 + vpsrld $31, %xmm2, %xmm5 + vpslld $0x01, %xmm3, %xmm3 + vpslld $0x01, %xmm2, %xmm2 + vpsrldq $12, %xmm4, %xmm6 + vpslldq $4, %xmm4, %xmm4 + vpslldq $4, %xmm5, %xmm5 + vpor %xmm6, %xmm2, %xmm2 + vpor %xmm4, %xmm3, %xmm3 + vpor %xmm5, %xmm2, %xmm2 + vpslld $31, %xmm3, %xmm4 + vpslld $30, %xmm3, %xmm5 + vpslld $25, %xmm3, %xmm6 + vpxor %xmm5, %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm4 + vmovdqa %xmm4, %xmm5 + vpsrldq $4, %xmm5, %xmm5 + vpslldq $12, %xmm4, %xmm4 + vpxor %xmm4, %xmm3, %xmm3 + vpsrld $0x01, %xmm3, %xmm6 + vpsrld $2, %xmm3, %xmm7 + vpsrld $7, %xmm3, %xmm4 + vpxor %xmm7, %xmm6, %xmm6 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm5, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm6, %xmm2, %xmm2 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx1_calc_aad_16_loop + movl 156(%esp), %edx + cmpl %edx, %ecx + je L_AES_GCM_encrypt_avx1_calc_aad_done +L_AES_GCM_encrypt_avx1_calc_aad_lt16: + subl $16, %esp + vpxor %xmm4, %xmm4, %xmm4 + xorl %ebx, %ebx + vmovdqu %xmm4, (%esp) +L_AES_GCM_encrypt_avx1_calc_aad_loop: + movzbl (%esi,%ecx,1), %eax + movb %al, (%esp,%ebx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx1_calc_aad_loop + vmovdqu (%esp), %xmm4 + addl $16, %esp + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm4, %xmm2, %xmm2 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm2, %xmm5 + vpshufd $0x4e, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm4 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm1, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vmovdqa %xmm4, %xmm3 + vmovdqa %xmm7, %xmm2 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm5, %xmm2, %xmm2 + vpsrld $31, %xmm3, %xmm4 + vpsrld $31, %xmm2, %xmm5 + vpslld $0x01, %xmm3, %xmm3 + vpslld $0x01, %xmm2, %xmm2 + vpsrldq $12, %xmm4, %xmm6 + vpslldq $4, %xmm4, %xmm4 + vpslldq $4, %xmm5, %xmm5 + vpor %xmm6, %xmm2, %xmm2 + vpor %xmm4, %xmm3, %xmm3 + vpor %xmm5, %xmm2, %xmm2 + vpslld $31, %xmm3, %xmm4 + vpslld $30, %xmm3, %xmm5 + vpslld $25, %xmm3, %xmm6 + vpxor %xmm5, %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm4 + vmovdqa %xmm4, %xmm5 + vpsrldq $4, %xmm5, %xmm5 + vpslldq $12, %xmm4, %xmm4 + vpxor %xmm4, %xmm3, %xmm3 + vpsrld $0x01, %xmm3, %xmm6 + vpsrld $2, %xmm3, %xmm7 + vpsrld $7, %xmm3, %xmm4 + vpxor %xmm7, %xmm6, %xmm6 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm5, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm6, %xmm2, %xmm2 +L_AES_GCM_encrypt_avx1_calc_aad_done: + vmovdqu %xmm2, 96(%esp) + movl 132(%esp), %esi + movl 136(%esp), %edi + # Calculate counter and H + vpsrlq $63, %xmm1, %xmm5 + vpsllq $0x01, %xmm1, %xmm4 + vpslldq $8, %xmm5, %xmm5 + vpor %xmm5, %xmm4, %xmm4 + vpshufd $0xff, %xmm1, %xmm1 + vpsrad $31, %xmm1, %xmm1 + vpshufb L_aes_gcm_avx1_bswap_epi64, %xmm0, %xmm0 + vpand L_aes_gcm_avx1_mod2_128, %xmm1, %xmm1 + vpaddd L_aes_gcm_avx1_one, %xmm0, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu %xmm0, 64(%esp) + xorl %ebx, %ebx + cmpl $0x40, 152(%esp) + movl 152(%esp), %eax + jl L_AES_GCM_encrypt_avx1_done_64 + andl $0xffffffc0, %eax + vmovdqa %xmm2, %xmm6 + # H ^ 1 + vmovdqu %xmm1, (%esp) + # H ^ 2 + vpclmulqdq $0x00, %xmm1, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm1, %xmm1, %xmm0 + vpslld $31, %xmm4, %xmm5 + vpslld $30, %xmm4, %xmm6 + vpslld $25, %xmm4, %xmm7 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpsrldq $4, %xmm5, %xmm7 + vpslldq $12, %xmm5, %xmm5 + vpxor %xmm5, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm5 + vpsrld $2, %xmm4, %xmm6 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpsrld $7, %xmm4, %xmm4 + vpxor %xmm7, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm0, %xmm0 + vmovdqu %xmm0, 16(%esp) + # H ^ 3 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm1, %xmm5 + vpshufd $0x4e, %xmm0, %xmm6 + vpclmulqdq $0x11, %xmm1, %xmm0, %xmm7 + vpclmulqdq $0x00, %xmm1, %xmm0, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm0, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm4, %xmm4 + vpxor %xmm5, %xmm7, %xmm3 + vpslld $31, %xmm4, %xmm5 + vpslld $30, %xmm4, %xmm6 + vpslld $25, %xmm4, %xmm7 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpsrldq $4, %xmm5, %xmm7 + vpslldq $12, %xmm5, %xmm5 + vpxor %xmm5, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm5 + vpsrld $2, %xmm4, %xmm6 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpsrld $7, %xmm4, %xmm4 + vpxor %xmm7, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm3, %xmm3 + vmovdqu %xmm3, 32(%esp) + # H ^ 4 + vpclmulqdq $0x00, %xmm0, %xmm0, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm0, %xmm3 + vpslld $31, %xmm4, %xmm5 + vpslld $30, %xmm4, %xmm6 + vpslld $25, %xmm4, %xmm7 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpsrldq $4, %xmm5, %xmm7 + vpslldq $12, %xmm5, %xmm5 + vpxor %xmm5, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm5 + vpsrld $2, %xmm4, %xmm6 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpsrld $7, %xmm4, %xmm4 + vpxor %xmm7, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm3, %xmm3 + vmovdqu %xmm3, 48(%esp) + # First 64 bytes of input + vmovdqu 64(%esp), %xmm4 + vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm3 + vpaddd L_aes_gcm_avx1_one, %xmm4, %xmm5 + vpshufb %xmm3, %xmm5, %xmm5 + vpaddd L_aes_gcm_avx1_two, %xmm4, %xmm6 + vpshufb %xmm3, %xmm6, %xmm6 + vpaddd L_aes_gcm_avx1_three, %xmm4, %xmm7 + vpshufb %xmm3, %xmm7, %xmm7 + vpshufb %xmm3, %xmm4, %xmm4 + vmovdqu 64(%esp), %xmm3 + vpaddd L_aes_gcm_avx1_four, %xmm3, %xmm3 + vmovdqu %xmm3, 64(%esp) + vmovdqa (%ebp), %xmm3 + vpxor %xmm3, %xmm4, %xmm4 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm3, %xmm7, %xmm7 + vmovdqa 16(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 32(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 48(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 64(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 80(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 96(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 112(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 128(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 144(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + cmpl $11, 172(%esp) + vmovdqa 160(%ebp), %xmm3 + jl L_AES_GCM_encrypt_avx1_aesenc_64_enc_done + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 176(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + cmpl $13, 172(%esp) + vmovdqa 192(%ebp), %xmm3 + jl L_AES_GCM_encrypt_avx1_aesenc_64_enc_done + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 208(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 224(%ebp), %xmm3 +L_AES_GCM_encrypt_avx1_aesenc_64_enc_done: + vaesenclast %xmm3, %xmm4, %xmm4 + vaesenclast %xmm3, %xmm5, %xmm5 + vmovdqu (%esi), %xmm0 + vmovdqu 16(%esi), %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vmovdqu %xmm0, (%esi) + vmovdqu %xmm1, 16(%esi) + vmovdqu %xmm4, (%edi) + vmovdqu %xmm5, 16(%edi) + vaesenclast %xmm3, %xmm6, %xmm6 + vaesenclast %xmm3, %xmm7, %xmm7 + vmovdqu 32(%esi), %xmm0 + vmovdqu 48(%esi), %xmm1 + vpxor %xmm0, %xmm6, %xmm6 + vpxor %xmm1, %xmm7, %xmm7 + vmovdqu %xmm0, 32(%esi) + vmovdqu %xmm1, 48(%esi) + vmovdqu %xmm6, 32(%edi) + vmovdqu %xmm7, 48(%edi) + cmpl $0x40, %eax + movl $0x40, %ebx + movl %esi, %ecx + movl %edi, %edx + jle L_AES_GCM_encrypt_avx1_end_64 + # More 64 bytes of input +L_AES_GCM_encrypt_avx1_ghash_64: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + vmovdqu 64(%esp), %xmm4 + vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm3 + vpaddd L_aes_gcm_avx1_one, %xmm4, %xmm5 + vpshufb %xmm3, %xmm5, %xmm5 + vpaddd L_aes_gcm_avx1_two, %xmm4, %xmm6 + vpshufb %xmm3, %xmm6, %xmm6 + vpaddd L_aes_gcm_avx1_three, %xmm4, %xmm7 + vpshufb %xmm3, %xmm7, %xmm7 + vpshufb %xmm3, %xmm4, %xmm4 + vmovdqu 64(%esp), %xmm3 + vpaddd L_aes_gcm_avx1_four, %xmm3, %xmm3 + vmovdqu %xmm3, 64(%esp) + vmovdqa (%ebp), %xmm3 + vpxor %xmm3, %xmm4, %xmm4 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm3, %xmm7, %xmm7 + vmovdqa 16(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 32(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 48(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 64(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 80(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 96(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 112(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 128(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 144(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + cmpl $11, 172(%esp) + vmovdqa 160(%ebp), %xmm3 + jl L_AES_GCM_encrypt_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 176(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + cmpl $13, 172(%esp) + vmovdqa 192(%ebp), %xmm3 + jl L_AES_GCM_encrypt_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 208(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 224(%ebp), %xmm3 +L_AES_GCM_encrypt_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done: + vaesenclast %xmm3, %xmm4, %xmm4 + vaesenclast %xmm3, %xmm5, %xmm5 + vmovdqu (%ecx), %xmm0 + vmovdqu 16(%ecx), %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vmovdqu %xmm4, (%edx) + vmovdqu %xmm5, 16(%edx) + vaesenclast %xmm3, %xmm6, %xmm6 + vaesenclast %xmm3, %xmm7, %xmm7 + vmovdqu 32(%ecx), %xmm0 + vmovdqu 48(%ecx), %xmm1 + vpxor %xmm0, %xmm6, %xmm6 + vpxor %xmm1, %xmm7, %xmm7 + vmovdqu %xmm6, 32(%edx) + vmovdqu %xmm7, 48(%edx) + # ghash encrypted counter + vmovdqu 96(%esp), %xmm6 + vmovdqu 48(%esp), %xmm3 + vmovdqu -64(%edx), %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm4 + vpshufd $0x4e, %xmm3, %xmm5 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm4, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm3, %xmm4, %xmm7 + vpclmulqdq $0x00, %xmm3, %xmm4, %xmm6 + vpclmulqdq $0x00, %xmm1, %xmm5, %xmm5 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vmovdqu 32(%esp), %xmm3 + vmovdqu -48(%edx), %xmm4 + vpshufd $0x4e, %xmm3, %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm3, %xmm0, %xmm0 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm3, %xmm4, %xmm2 + vpclmulqdq $0x00, %xmm3, %xmm4, %xmm3 + vpclmulqdq $0x00, %xmm1, %xmm0, %xmm0 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm0, %xmm5, %xmm5 + vmovdqu 16(%esp), %xmm3 + vmovdqu -32(%edx), %xmm4 + vpshufd $0x4e, %xmm3, %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm3, %xmm0, %xmm0 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm3, %xmm4, %xmm2 + vpclmulqdq $0x00, %xmm3, %xmm4, %xmm3 + vpclmulqdq $0x00, %xmm1, %xmm0, %xmm0 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm0, %xmm5, %xmm5 + vmovdqu (%esp), %xmm3 + vmovdqu -16(%edx), %xmm4 + vpshufd $0x4e, %xmm3, %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm3, %xmm0, %xmm0 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm3, %xmm4, %xmm2 + vpclmulqdq $0x00, %xmm3, %xmm4, %xmm3 + vpclmulqdq $0x00, %xmm1, %xmm0, %xmm0 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm0, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm1 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm5, %xmm7, %xmm7 + vpslld $31, %xmm6, %xmm3 + vpslld $30, %xmm6, %xmm0 + vpslld $25, %xmm6, %xmm1 + vpxor %xmm0, %xmm3, %xmm3 + vpxor %xmm1, %xmm3, %xmm3 + vpsrldq $4, %xmm3, %xmm0 + vpslldq $12, %xmm3, %xmm3 + vpxor %xmm3, %xmm6, %xmm6 + vpsrld $0x01, %xmm6, %xmm1 + vpsrld $2, %xmm6, %xmm5 + vpsrld $7, %xmm6, %xmm4 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm7, %xmm6, %xmm6 + vmovdqu %xmm6, 96(%esp) + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_encrypt_avx1_ghash_64 +L_AES_GCM_encrypt_avx1_end_64: + vmovdqu 96(%esp), %xmm2 + # Block 1 + vmovdqa L_aes_gcm_avx1_bswap_mask, %xmm4 + vmovdqa (%edx), %xmm1 + vpshufb %xmm4, %xmm1, %xmm1 + vmovdqu 48(%esp), %xmm3 + vpxor %xmm2, %xmm1, %xmm1 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm1, %xmm5 + vpshufd $0x4e, %xmm3, %xmm6 + vpclmulqdq $0x11, %xmm1, %xmm3, %xmm7 + vpclmulqdq $0x00, %xmm1, %xmm3, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vmovdqa %xmm4, %xmm0 + vmovdqa %xmm7, %xmm2 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm0, %xmm0 + vpxor %xmm5, %xmm2, %xmm2 + # Block 2 + vmovdqa L_aes_gcm_avx1_bswap_mask, %xmm4 + vmovdqa 16(%edx), %xmm1 + vpshufb %xmm4, %xmm1, %xmm1 + vmovdqu 32(%esp), %xmm3 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm1, %xmm5 + vpshufd $0x4e, %xmm3, %xmm6 + vpclmulqdq $0x11, %xmm1, %xmm3, %xmm7 + vpclmulqdq $0x00, %xmm1, %xmm3, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpxor %xmm4, %xmm0, %xmm0 + vpxor %xmm7, %xmm2, %xmm2 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm0, %xmm0 + vpxor %xmm5, %xmm2, %xmm2 + # Block 3 + vmovdqa L_aes_gcm_avx1_bswap_mask, %xmm4 + vmovdqa 32(%edx), %xmm1 + vpshufb %xmm4, %xmm1, %xmm1 + vmovdqu 16(%esp), %xmm3 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm1, %xmm5 + vpshufd $0x4e, %xmm3, %xmm6 + vpclmulqdq $0x11, %xmm1, %xmm3, %xmm7 + vpclmulqdq $0x00, %xmm1, %xmm3, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpxor %xmm4, %xmm0, %xmm0 + vpxor %xmm7, %xmm2, %xmm2 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm0, %xmm0 + vpxor %xmm5, %xmm2, %xmm2 + # Block 4 + vmovdqa L_aes_gcm_avx1_bswap_mask, %xmm4 + vmovdqa 48(%edx), %xmm1 + vpshufb %xmm4, %xmm1, %xmm1 + vmovdqu (%esp), %xmm3 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm1, %xmm5 + vpshufd $0x4e, %xmm3, %xmm6 + vpclmulqdq $0x11, %xmm1, %xmm3, %xmm7 + vpclmulqdq $0x00, %xmm1, %xmm3, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpxor %xmm4, %xmm0, %xmm0 + vpxor %xmm7, %xmm2, %xmm2 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm0, %xmm0 + vpxor %xmm5, %xmm2, %xmm2 + vpslld $31, %xmm0, %xmm4 + vpslld $30, %xmm0, %xmm5 + vpslld $25, %xmm0, %xmm6 + vpxor %xmm5, %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm4 + vmovdqa %xmm4, %xmm5 + vpsrldq $4, %xmm5, %xmm5 + vpslldq $12, %xmm4, %xmm4 + vpxor %xmm4, %xmm0, %xmm0 + vpsrld $0x01, %xmm0, %xmm6 + vpsrld $2, %xmm0, %xmm7 + vpsrld $7, %xmm0, %xmm4 + vpxor %xmm7, %xmm6, %xmm6 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm5, %xmm6, %xmm6 + vpxor %xmm0, %xmm6, %xmm6 + vpxor %xmm6, %xmm2, %xmm2 + vmovdqu (%esp), %xmm1 +L_AES_GCM_encrypt_avx1_done_64: + movl 152(%esp), %edx + cmpl %edx, %ebx + jge L_AES_GCM_encrypt_avx1_done_enc + movl 152(%esp), %eax + andl $0xfffffff0, %eax + cmpl %eax, %ebx + jge L_AES_GCM_encrypt_avx1_last_block_done + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + vmovdqu 64(%esp), %xmm5 + vpshufb L_aes_gcm_avx1_bswap_epi64, %xmm5, %xmm4 + vpaddd L_aes_gcm_avx1_one, %xmm5, %xmm5 + vmovdqu %xmm5, 64(%esp) + vpxor (%ebp), %xmm4, %xmm4 + vaesenc 16(%ebp), %xmm4, %xmm4 + vaesenc 32(%ebp), %xmm4, %xmm4 + vaesenc 48(%ebp), %xmm4, %xmm4 + vaesenc 64(%ebp), %xmm4, %xmm4 + vaesenc 80(%ebp), %xmm4, %xmm4 + vaesenc 96(%ebp), %xmm4, %xmm4 + vaesenc 112(%ebp), %xmm4, %xmm4 + vaesenc 128(%ebp), %xmm4, %xmm4 + vaesenc 144(%ebp), %xmm4, %xmm4 + cmpl $11, 172(%esp) + vmovdqa 160(%ebp), %xmm5 + jl L_AES_GCM_encrypt_avx1_aesenc_block_aesenc_avx_last + vaesenc %xmm5, %xmm4, %xmm4 + vaesenc 176(%ebp), %xmm4, %xmm4 + cmpl $13, 172(%esp) + vmovdqa 192(%ebp), %xmm5 + jl L_AES_GCM_encrypt_avx1_aesenc_block_aesenc_avx_last + vaesenc %xmm5, %xmm4, %xmm4 + vaesenc 208(%ebp), %xmm4, %xmm4 + vmovdqa 224(%ebp), %xmm5 +L_AES_GCM_encrypt_avx1_aesenc_block_aesenc_avx_last: + vaesenclast %xmm5, %xmm4, %xmm4 + vmovdqu (%ecx), %xmm5 + vpxor %xmm5, %xmm4, %xmm4 + vmovdqu %xmm4, (%edx) + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm4, %xmm2, %xmm2 + addl $16, %ebx + cmpl %eax, %ebx + jge L_AES_GCM_encrypt_avx1_last_block_ghash +L_AES_GCM_encrypt_avx1_last_block_start: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + vmovdqu 64(%esp), %xmm5 + vmovdqu %xmm2, %xmm7 + vpshufb L_aes_gcm_avx1_bswap_epi64, %xmm5, %xmm4 + vpaddd L_aes_gcm_avx1_one, %xmm5, %xmm5 + vmovdqu %xmm5, 64(%esp) + vpxor (%ebp), %xmm4, %xmm4 + vpclmulqdq $16, %xmm1, %xmm7, %xmm0 + vaesenc 16(%ebp), %xmm4, %xmm4 + vaesenc 32(%ebp), %xmm4, %xmm4 + vpclmulqdq $0x01, %xmm1, %xmm7, %xmm3 + vaesenc 48(%ebp), %xmm4, %xmm4 + vaesenc 64(%ebp), %xmm4, %xmm4 + vaesenc 80(%ebp), %xmm4, %xmm4 + vpclmulqdq $0x11, %xmm1, %xmm7, %xmm5 + vaesenc 96(%ebp), %xmm4, %xmm4 + vpxor %xmm3, %xmm0, %xmm0 + vpslldq $8, %xmm0, %xmm6 + vpsrldq $8, %xmm0, %xmm0 + vaesenc 112(%ebp), %xmm4, %xmm4 + vpclmulqdq $0x00, %xmm1, %xmm7, %xmm3 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm0, %xmm5, %xmm5 + vmovdqa L_aes_gcm_avx1_mod2_128, %xmm7 + vpclmulqdq $16, %xmm7, %xmm6, %xmm3 + vaesenc 128(%ebp), %xmm4, %xmm4 + vpshufd $0x4e, %xmm6, %xmm0 + vpxor %xmm3, %xmm0, %xmm0 + vpclmulqdq $16, %xmm7, %xmm0, %xmm3 + vaesenc 144(%ebp), %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm2 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm5, %xmm2, %xmm2 + cmpl $11, 172(%esp) + vmovdqa 160(%ebp), %xmm5 + jl L_AES_GCM_encrypt_avx1_aesenc_gfmul_last + vaesenc %xmm5, %xmm4, %xmm4 + vaesenc 176(%ebp), %xmm4, %xmm4 + cmpl $13, 172(%esp) + vmovdqa 192(%ebp), %xmm5 + jl L_AES_GCM_encrypt_avx1_aesenc_gfmul_last + vaesenc %xmm5, %xmm4, %xmm4 + vaesenc 208(%ebp), %xmm4, %xmm4 + vmovdqa 224(%ebp), %xmm5 +L_AES_GCM_encrypt_avx1_aesenc_gfmul_last: + vaesenclast %xmm5, %xmm4, %xmm4 + vmovdqu (%ecx), %xmm5 + vpxor %xmm5, %xmm4, %xmm4 + vmovdqu %xmm4, (%edx) + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + addl $16, %ebx + vpxor %xmm4, %xmm2, %xmm2 + cmpl %eax, %ebx + jl L_AES_GCM_encrypt_avx1_last_block_start +L_AES_GCM_encrypt_avx1_last_block_ghash: + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm1, %xmm5 + vpshufd $0x4e, %xmm2, %xmm6 + vpclmulqdq $0x11, %xmm1, %xmm2, %xmm7 + vpclmulqdq $0x00, %xmm1, %xmm2, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm2, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm4, %xmm4 + vpxor %xmm5, %xmm7, %xmm2 + vpslld $31, %xmm4, %xmm5 + vpslld $30, %xmm4, %xmm6 + vpslld $25, %xmm4, %xmm7 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpsrldq $4, %xmm5, %xmm7 + vpslldq $12, %xmm5, %xmm5 + vpxor %xmm5, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm5 + vpsrld $2, %xmm4, %xmm6 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpsrld $7, %xmm4, %xmm4 + vpxor %xmm7, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm2, %xmm2 +L_AES_GCM_encrypt_avx1_last_block_done: + movl 152(%esp), %ecx + movl %ecx, %edx + andl $15, %ecx + jz L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_done + vmovdqu 64(%esp), %xmm0 + vpshufb L_aes_gcm_avx1_bswap_epi64, %xmm0, %xmm0 + vpxor (%ebp), %xmm0, %xmm0 + vaesenc 16(%ebp), %xmm0, %xmm0 + vaesenc 32(%ebp), %xmm0, %xmm0 + vaesenc 48(%ebp), %xmm0, %xmm0 + vaesenc 64(%ebp), %xmm0, %xmm0 + vaesenc 80(%ebp), %xmm0, %xmm0 + vaesenc 96(%ebp), %xmm0, %xmm0 + vaesenc 112(%ebp), %xmm0, %xmm0 + vaesenc 128(%ebp), %xmm0, %xmm0 + vaesenc 144(%ebp), %xmm0, %xmm0 + cmpl $11, 172(%esp) + vmovdqa 160(%ebp), %xmm5 + jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last + vaesenc %xmm5, %xmm0, %xmm0 + vaesenc 176(%ebp), %xmm0, %xmm0 + cmpl $13, 172(%esp) + vmovdqa 192(%ebp), %xmm5 + jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last + vaesenc %xmm5, %xmm0, %xmm0 + vaesenc 208(%ebp), %xmm0, %xmm0 + vmovdqa 224(%ebp), %xmm5 +L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last: + vaesenclast %xmm5, %xmm0, %xmm0 + subl $16, %esp + xorl %ecx, %ecx + vmovdqu %xmm0, (%esp) +L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_loop: + movzbl (%esi,%ebx,1), %eax + xorb (%esp,%ecx,1), %al + movb %al, (%edi,%ebx,1) + movb %al, (%esp,%ecx,1) + incl %ebx + incl %ecx + cmpl %edx, %ebx + jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_loop + xorl %eax, %eax + cmpl $16, %ecx + je L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_finish_enc +L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_byte_loop: + movb %al, (%esp,%ecx,1) + incl %ecx + cmpl $16, %ecx + jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_byte_loop +L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_finish_enc: + vmovdqu (%esp), %xmm0 + addl $16, %esp + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm2, %xmm2 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm1, %xmm5 + vpshufd $0x4e, %xmm2, %xmm6 + vpclmulqdq $0x11, %xmm1, %xmm2, %xmm7 + vpclmulqdq $0x00, %xmm1, %xmm2, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm2, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm4, %xmm4 + vpxor %xmm5, %xmm7, %xmm2 + vpslld $31, %xmm4, %xmm5 + vpslld $30, %xmm4, %xmm6 + vpslld $25, %xmm4, %xmm7 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpsrldq $4, %xmm5, %xmm7 + vpslldq $12, %xmm5, %xmm5 + vpxor %xmm5, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm5 + vpsrld $2, %xmm4, %xmm6 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpsrld $7, %xmm4, %xmm4 + vpxor %xmm7, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm2, %xmm2 +L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_done: +L_AES_GCM_encrypt_avx1_done_enc: + movl 148(%esp), %edi + movl 164(%esp), %ebx + movl 152(%esp), %edx + movl 156(%esp), %ecx + shll $3, %edx + shll $3, %ecx + vpinsrd $0x00, %edx, %xmm4, %xmm4 + vpinsrd $2, %ecx, %xmm4, %xmm4 + movl 152(%esp), %edx + movl 156(%esp), %ecx + shrl $29, %edx + shrl $29, %ecx + vpinsrd $0x01, %edx, %xmm4, %xmm4 + vpinsrd $3, %ecx, %xmm4, %xmm4 + vpxor %xmm4, %xmm2, %xmm2 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm1, %xmm5 + vpshufd $0x4e, %xmm2, %xmm6 + vpclmulqdq $0x11, %xmm1, %xmm2, %xmm7 + vpclmulqdq $0x00, %xmm1, %xmm2, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm2, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm4, %xmm4 + vpxor %xmm5, %xmm7, %xmm2 + vpslld $31, %xmm4, %xmm5 + vpslld $30, %xmm4, %xmm6 + vpslld $25, %xmm4, %xmm7 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpsrldq $4, %xmm5, %xmm7 + vpslldq $12, %xmm5, %xmm5 + vpxor %xmm5, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm5 + vpsrld $2, %xmm4, %xmm6 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpsrld $7, %xmm4, %xmm4 + vpxor %xmm7, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm2, %xmm2 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm2, %xmm2 + vpxor 80(%esp), %xmm2, %xmm4 + cmpl $16, %ebx + je L_AES_GCM_encrypt_avx1_store_tag_16 + xorl %ecx, %ecx + vmovdqu %xmm4, (%esp) +L_AES_GCM_encrypt_avx1_store_tag_loop: + movzbl (%esp,%ecx,1), %eax + movb %al, (%edi,%ecx,1) + incl %ecx + cmpl %ebx, %ecx + jne L_AES_GCM_encrypt_avx1_store_tag_loop + jmp L_AES_GCM_encrypt_avx1_store_tag_done +L_AES_GCM_encrypt_avx1_store_tag_16: + vmovdqu %xmm4, (%edi) +L_AES_GCM_encrypt_avx1_store_tag_done: + addl $0x70, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_encrypt_avx1,.-AES_GCM_encrypt_avx1 +.text +.globl AES_GCM_decrypt_avx1 +.type AES_GCM_decrypt_avx1,@function +.align 16 +AES_GCM_decrypt_avx1: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $0xb0, %esp + movl 208(%esp), %esi + movl 232(%esp), %ebp + movl 224(%esp), %edx + vpxor %xmm0, %xmm0, %xmm0 + vpxor %xmm2, %xmm2, %xmm2 + cmpl $12, %edx + jne L_AES_GCM_decrypt_avx1_iv_not_12 + # # Calculate values when IV is 12 bytes + # Set counter based on IV + movl $0x1000000, %ecx + vpinsrd $0x00, (%esi), %xmm0, %xmm0 + vpinsrd $0x01, 4(%esi), %xmm0, %xmm0 + vpinsrd $2, 8(%esi), %xmm0, %xmm0 + vpinsrd $3, %ecx, %xmm0, %xmm0 + # H = Encrypt X(=0) and T = Encrypt counter + vmovdqa (%ebp), %xmm1 + vpxor %xmm1, %xmm0, %xmm5 + vmovdqa 16(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 32(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 48(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 64(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 80(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 96(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 112(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 128(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 144(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + cmpl $11, 236(%esp) + vmovdqa 160(%ebp), %xmm3 + jl L_AES_GCM_decrypt_avx1_calc_iv_12_last + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 176(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + cmpl $13, 236(%esp) + vmovdqa 192(%ebp), %xmm3 + jl L_AES_GCM_decrypt_avx1_calc_iv_12_last + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 208(%ebp), %xmm3 + vaesenc %xmm3, %xmm1, %xmm1 + vaesenc %xmm3, %xmm5, %xmm5 + vmovdqa 224(%ebp), %xmm3 +L_AES_GCM_decrypt_avx1_calc_iv_12_last: + vaesenclast %xmm3, %xmm1, %xmm1 + vaesenclast %xmm3, %xmm5, %xmm5 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm1, %xmm1 + vmovdqu %xmm5, 80(%esp) + jmp L_AES_GCM_decrypt_avx1_iv_done +L_AES_GCM_decrypt_avx1_iv_not_12: + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + vmovdqa (%ebp), %xmm1 + vaesenc 16(%ebp), %xmm1, %xmm1 + vaesenc 32(%ebp), %xmm1, %xmm1 + vaesenc 48(%ebp), %xmm1, %xmm1 + vaesenc 64(%ebp), %xmm1, %xmm1 + vaesenc 80(%ebp), %xmm1, %xmm1 + vaesenc 96(%ebp), %xmm1, %xmm1 + vaesenc 112(%ebp), %xmm1, %xmm1 + vaesenc 128(%ebp), %xmm1, %xmm1 + vaesenc 144(%ebp), %xmm1, %xmm1 + cmpl $11, 236(%esp) + vmovdqa 160(%ebp), %xmm5 + jl L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last + vaesenc %xmm5, %xmm1, %xmm1 + vaesenc 176(%ebp), %xmm1, %xmm1 + cmpl $13, 236(%esp) + vmovdqa 192(%ebp), %xmm5 + jl L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last + vaesenc %xmm5, %xmm1, %xmm1 + vaesenc 208(%ebp), %xmm1, %xmm1 + vmovdqa 224(%ebp), %xmm5 +L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last: + vaesenclast %xmm5, %xmm1, %xmm1 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm1, %xmm1 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movl $0x00, %ecx + je L_AES_GCM_decrypt_avx1_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_decrypt_avx1_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_decrypt_avx1_calc_iv_16_loop: + vmovdqu (%esi,%ecx,1), %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm4, %xmm0, %xmm0 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm0, %xmm5 + vpshufd $0x4e, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm7 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpxor %xmm0, %xmm5, %xmm5 + vpxor %xmm1, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vmovdqa %xmm4, %xmm3 + vmovdqa %xmm7, %xmm0 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm5, %xmm0, %xmm0 + vpsrld $31, %xmm3, %xmm4 + vpsrld $31, %xmm0, %xmm5 + vpslld $0x01, %xmm3, %xmm3 + vpslld $0x01, %xmm0, %xmm0 + vpsrldq $12, %xmm4, %xmm6 + vpslldq $4, %xmm4, %xmm4 + vpslldq $4, %xmm5, %xmm5 + vpor %xmm6, %xmm0, %xmm0 + vpor %xmm4, %xmm3, %xmm3 + vpor %xmm5, %xmm0, %xmm0 + vpslld $31, %xmm3, %xmm4 + vpslld $30, %xmm3, %xmm5 + vpslld $25, %xmm3, %xmm6 + vpxor %xmm5, %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm4 + vmovdqa %xmm4, %xmm5 + vpsrldq $4, %xmm5, %xmm5 + vpslldq $12, %xmm4, %xmm4 + vpxor %xmm4, %xmm3, %xmm3 + vpsrld $0x01, %xmm3, %xmm6 + vpsrld $2, %xmm3, %xmm7 + vpsrld $7, %xmm3, %xmm4 + vpxor %xmm7, %xmm6, %xmm6 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm5, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm6, %xmm0, %xmm0 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx1_calc_iv_16_loop + movl 224(%esp), %edx + cmpl %edx, %ecx + je L_AES_GCM_decrypt_avx1_calc_iv_done +L_AES_GCM_decrypt_avx1_calc_iv_lt16: + subl $16, %esp + vpxor %xmm4, %xmm4, %xmm4 + xorl %ebx, %ebx + vmovdqu %xmm4, (%esp) +L_AES_GCM_decrypt_avx1_calc_iv_loop: + movzbl (%esi,%ecx,1), %eax + movb %al, (%esp,%ebx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx1_calc_iv_loop + vmovdqu (%esp), %xmm4 + addl $16, %esp + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm4, %xmm0, %xmm0 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm0, %xmm5 + vpshufd $0x4e, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm7 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpxor %xmm0, %xmm5, %xmm5 + vpxor %xmm1, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vmovdqa %xmm4, %xmm3 + vmovdqa %xmm7, %xmm0 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm5, %xmm0, %xmm0 + vpsrld $31, %xmm3, %xmm4 + vpsrld $31, %xmm0, %xmm5 + vpslld $0x01, %xmm3, %xmm3 + vpslld $0x01, %xmm0, %xmm0 + vpsrldq $12, %xmm4, %xmm6 + vpslldq $4, %xmm4, %xmm4 + vpslldq $4, %xmm5, %xmm5 + vpor %xmm6, %xmm0, %xmm0 + vpor %xmm4, %xmm3, %xmm3 + vpor %xmm5, %xmm0, %xmm0 + vpslld $31, %xmm3, %xmm4 + vpslld $30, %xmm3, %xmm5 + vpslld $25, %xmm3, %xmm6 + vpxor %xmm5, %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm4 + vmovdqa %xmm4, %xmm5 + vpsrldq $4, %xmm5, %xmm5 + vpslldq $12, %xmm4, %xmm4 + vpxor %xmm4, %xmm3, %xmm3 + vpsrld $0x01, %xmm3, %xmm6 + vpsrld $2, %xmm3, %xmm7 + vpsrld $7, %xmm3, %xmm4 + vpxor %xmm7, %xmm6, %xmm6 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm5, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm6, %xmm0, %xmm0 +L_AES_GCM_decrypt_avx1_calc_iv_done: + # T = Encrypt counter + vpxor %xmm4, %xmm4, %xmm4 + shll $3, %edx + vpinsrd $0x00, %edx, %xmm4, %xmm4 + vpxor %xmm4, %xmm0, %xmm0 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm0, %xmm5 + vpshufd $0x4e, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm7 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpxor %xmm0, %xmm5, %xmm5 + vpxor %xmm1, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vmovdqa %xmm4, %xmm3 + vmovdqa %xmm7, %xmm0 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm5, %xmm0, %xmm0 + vpsrld $31, %xmm3, %xmm4 + vpsrld $31, %xmm0, %xmm5 + vpslld $0x01, %xmm3, %xmm3 + vpslld $0x01, %xmm0, %xmm0 + vpsrldq $12, %xmm4, %xmm6 + vpslldq $4, %xmm4, %xmm4 + vpslldq $4, %xmm5, %xmm5 + vpor %xmm6, %xmm0, %xmm0 + vpor %xmm4, %xmm3, %xmm3 + vpor %xmm5, %xmm0, %xmm0 + vpslld $31, %xmm3, %xmm4 + vpslld $30, %xmm3, %xmm5 + vpslld $25, %xmm3, %xmm6 + vpxor %xmm5, %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm4 + vmovdqa %xmm4, %xmm5 + vpsrldq $4, %xmm5, %xmm5 + vpslldq $12, %xmm4, %xmm4 + vpxor %xmm4, %xmm3, %xmm3 + vpsrld $0x01, %xmm3, %xmm6 + vpsrld $2, %xmm3, %xmm7 + vpsrld $7, %xmm3, %xmm4 + vpxor %xmm7, %xmm6, %xmm6 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm5, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm6, %xmm0, %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + # Encrypt counter + vmovdqa (%ebp), %xmm4 + vpxor %xmm0, %xmm4, %xmm4 + vaesenc 16(%ebp), %xmm4, %xmm4 + vaesenc 32(%ebp), %xmm4, %xmm4 + vaesenc 48(%ebp), %xmm4, %xmm4 + vaesenc 64(%ebp), %xmm4, %xmm4 + vaesenc 80(%ebp), %xmm4, %xmm4 + vaesenc 96(%ebp), %xmm4, %xmm4 + vaesenc 112(%ebp), %xmm4, %xmm4 + vaesenc 128(%ebp), %xmm4, %xmm4 + vaesenc 144(%ebp), %xmm4, %xmm4 + cmpl $11, 236(%esp) + vmovdqa 160(%ebp), %xmm5 + jl L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last + vaesenc %xmm5, %xmm4, %xmm4 + vaesenc 176(%ebp), %xmm4, %xmm4 + cmpl $13, 236(%esp) + vmovdqa 192(%ebp), %xmm5 + jl L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last + vaesenc %xmm5, %xmm4, %xmm4 + vaesenc 208(%ebp), %xmm4, %xmm4 + vmovdqa 224(%ebp), %xmm5 +L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last: + vaesenclast %xmm5, %xmm4, %xmm4 + vmovdqu %xmm4, 80(%esp) +L_AES_GCM_decrypt_avx1_iv_done: + movl 204(%esp), %esi + # Additional authentication data + movl 220(%esp), %edx + cmpl $0x00, %edx + je L_AES_GCM_decrypt_avx1_calc_aad_done + xorl %ecx, %ecx + cmpl $16, %edx + jl L_AES_GCM_decrypt_avx1_calc_aad_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_decrypt_avx1_calc_aad_16_loop: + vmovdqu (%esi,%ecx,1), %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm4, %xmm2, %xmm2 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm2, %xmm5 + vpshufd $0x4e, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm4 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm1, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vmovdqa %xmm4, %xmm3 + vmovdqa %xmm7, %xmm2 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm5, %xmm2, %xmm2 + vpsrld $31, %xmm3, %xmm4 + vpsrld $31, %xmm2, %xmm5 + vpslld $0x01, %xmm3, %xmm3 + vpslld $0x01, %xmm2, %xmm2 + vpsrldq $12, %xmm4, %xmm6 + vpslldq $4, %xmm4, %xmm4 + vpslldq $4, %xmm5, %xmm5 + vpor %xmm6, %xmm2, %xmm2 + vpor %xmm4, %xmm3, %xmm3 + vpor %xmm5, %xmm2, %xmm2 + vpslld $31, %xmm3, %xmm4 + vpslld $30, %xmm3, %xmm5 + vpslld $25, %xmm3, %xmm6 + vpxor %xmm5, %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm4 + vmovdqa %xmm4, %xmm5 + vpsrldq $4, %xmm5, %xmm5 + vpslldq $12, %xmm4, %xmm4 + vpxor %xmm4, %xmm3, %xmm3 + vpsrld $0x01, %xmm3, %xmm6 + vpsrld $2, %xmm3, %xmm7 + vpsrld $7, %xmm3, %xmm4 + vpxor %xmm7, %xmm6, %xmm6 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm5, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm6, %xmm2, %xmm2 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx1_calc_aad_16_loop + movl 220(%esp), %edx + cmpl %edx, %ecx + je L_AES_GCM_decrypt_avx1_calc_aad_done +L_AES_GCM_decrypt_avx1_calc_aad_lt16: + subl $16, %esp + vpxor %xmm4, %xmm4, %xmm4 + xorl %ebx, %ebx + vmovdqu %xmm4, (%esp) +L_AES_GCM_decrypt_avx1_calc_aad_loop: + movzbl (%esi,%ecx,1), %eax + movb %al, (%esp,%ebx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx1_calc_aad_loop + vmovdqu (%esp), %xmm4 + addl $16, %esp + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm4, %xmm2, %xmm2 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm2, %xmm5 + vpshufd $0x4e, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm4 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm1, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vmovdqa %xmm4, %xmm3 + vmovdqa %xmm7, %xmm2 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm5, %xmm2, %xmm2 + vpsrld $31, %xmm3, %xmm4 + vpsrld $31, %xmm2, %xmm5 + vpslld $0x01, %xmm3, %xmm3 + vpslld $0x01, %xmm2, %xmm2 + vpsrldq $12, %xmm4, %xmm6 + vpslldq $4, %xmm4, %xmm4 + vpslldq $4, %xmm5, %xmm5 + vpor %xmm6, %xmm2, %xmm2 + vpor %xmm4, %xmm3, %xmm3 + vpor %xmm5, %xmm2, %xmm2 + vpslld $31, %xmm3, %xmm4 + vpslld $30, %xmm3, %xmm5 + vpslld $25, %xmm3, %xmm6 + vpxor %xmm5, %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm4 + vmovdqa %xmm4, %xmm5 + vpsrldq $4, %xmm5, %xmm5 + vpslldq $12, %xmm4, %xmm4 + vpxor %xmm4, %xmm3, %xmm3 + vpsrld $0x01, %xmm3, %xmm6 + vpsrld $2, %xmm3, %xmm7 + vpsrld $7, %xmm3, %xmm4 + vpxor %xmm7, %xmm6, %xmm6 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm5, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm6, %xmm2, %xmm2 +L_AES_GCM_decrypt_avx1_calc_aad_done: + vmovdqu %xmm2, 96(%esp) + movl 196(%esp), %esi + movl 200(%esp), %edi + # Calculate counter and H + vpsrlq $63, %xmm1, %xmm5 + vpsllq $0x01, %xmm1, %xmm4 + vpslldq $8, %xmm5, %xmm5 + vpor %xmm5, %xmm4, %xmm4 + vpshufd $0xff, %xmm1, %xmm1 + vpsrad $31, %xmm1, %xmm1 + vpshufb L_aes_gcm_avx1_bswap_epi64, %xmm0, %xmm0 + vpand L_aes_gcm_avx1_mod2_128, %xmm1, %xmm1 + vpaddd L_aes_gcm_avx1_one, %xmm0, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu %xmm0, 64(%esp) + xorl %ebx, %ebx + cmpl $0x40, 216(%esp) + movl 216(%esp), %eax + jl L_AES_GCM_decrypt_avx1_done_64 + andl $0xffffffc0, %eax + vmovdqa %xmm2, %xmm6 + # H ^ 1 + vmovdqu %xmm1, (%esp) + # H ^ 2 + vpclmulqdq $0x00, %xmm1, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm1, %xmm1, %xmm0 + vpslld $31, %xmm4, %xmm5 + vpslld $30, %xmm4, %xmm6 + vpslld $25, %xmm4, %xmm7 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpsrldq $4, %xmm5, %xmm7 + vpslldq $12, %xmm5, %xmm5 + vpxor %xmm5, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm5 + vpsrld $2, %xmm4, %xmm6 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpsrld $7, %xmm4, %xmm4 + vpxor %xmm7, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm0, %xmm0 + vmovdqu %xmm0, 16(%esp) + # H ^ 3 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm1, %xmm5 + vpshufd $0x4e, %xmm0, %xmm6 + vpclmulqdq $0x11, %xmm1, %xmm0, %xmm7 + vpclmulqdq $0x00, %xmm1, %xmm0, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm0, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm4, %xmm4 + vpxor %xmm5, %xmm7, %xmm3 + vpslld $31, %xmm4, %xmm5 + vpslld $30, %xmm4, %xmm6 + vpslld $25, %xmm4, %xmm7 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpsrldq $4, %xmm5, %xmm7 + vpslldq $12, %xmm5, %xmm5 + vpxor %xmm5, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm5 + vpsrld $2, %xmm4, %xmm6 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpsrld $7, %xmm4, %xmm4 + vpxor %xmm7, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm3, %xmm3 + vmovdqu %xmm3, 32(%esp) + # H ^ 4 + vpclmulqdq $0x00, %xmm0, %xmm0, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm0, %xmm3 + vpslld $31, %xmm4, %xmm5 + vpslld $30, %xmm4, %xmm6 + vpslld $25, %xmm4, %xmm7 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpsrldq $4, %xmm5, %xmm7 + vpslldq $12, %xmm5, %xmm5 + vpxor %xmm5, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm5 + vpsrld $2, %xmm4, %xmm6 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpsrld $7, %xmm4, %xmm4 + vpxor %xmm7, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm3, %xmm3 + vmovdqu %xmm3, 48(%esp) + cmpl %esi, %edi + jne L_AES_GCM_decrypt_avx1_ghash_64 +L_AES_GCM_decrypt_avx1_ghash_64_inplace: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + vmovdqu 64(%esp), %xmm4 + vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm3 + vpaddd L_aes_gcm_avx1_one, %xmm4, %xmm5 + vpshufb %xmm3, %xmm5, %xmm5 + vpaddd L_aes_gcm_avx1_two, %xmm4, %xmm6 + vpshufb %xmm3, %xmm6, %xmm6 + vpaddd L_aes_gcm_avx1_three, %xmm4, %xmm7 + vpshufb %xmm3, %xmm7, %xmm7 + vpshufb %xmm3, %xmm4, %xmm4 + vmovdqu 64(%esp), %xmm3 + vpaddd L_aes_gcm_avx1_four, %xmm3, %xmm3 + vmovdqu %xmm3, 64(%esp) + vmovdqa (%ebp), %xmm3 + vpxor %xmm3, %xmm4, %xmm4 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm3, %xmm7, %xmm7 + vmovdqa 16(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 32(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 48(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 64(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 80(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 96(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 112(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 128(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 144(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + cmpl $11, 236(%esp) + vmovdqa 160(%ebp), %xmm3 + jl L_AES_GCM_decrypt_avx1inplace_aesenc_64_ghash_avx_aesenc_64_enc_done + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 176(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + cmpl $13, 236(%esp) + vmovdqa 192(%ebp), %xmm3 + jl L_AES_GCM_decrypt_avx1inplace_aesenc_64_ghash_avx_aesenc_64_enc_done + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 208(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 224(%ebp), %xmm3 +L_AES_GCM_decrypt_avx1inplace_aesenc_64_ghash_avx_aesenc_64_enc_done: + vaesenclast %xmm3, %xmm4, %xmm4 + vaesenclast %xmm3, %xmm5, %xmm5 + vmovdqu (%ecx), %xmm0 + vmovdqu 16(%ecx), %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vmovdqu %xmm0, 112(%esp) + vmovdqu %xmm1, 128(%esp) + vmovdqu %xmm4, (%edx) + vmovdqu %xmm5, 16(%edx) + vaesenclast %xmm3, %xmm6, %xmm6 + vaesenclast %xmm3, %xmm7, %xmm7 + vmovdqu 32(%ecx), %xmm0 + vmovdqu 48(%ecx), %xmm1 + vpxor %xmm0, %xmm6, %xmm6 + vpxor %xmm1, %xmm7, %xmm7 + vmovdqu %xmm0, 144(%esp) + vmovdqu %xmm1, 160(%esp) + vmovdqu %xmm6, 32(%edx) + vmovdqu %xmm7, 48(%edx) + # ghash encrypted counter + vmovdqu 96(%esp), %xmm6 + vmovdqu 48(%esp), %xmm3 + vmovdqu 112(%esp), %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm4 + vpshufd $0x4e, %xmm3, %xmm5 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm4, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm3, %xmm4, %xmm7 + vpclmulqdq $0x00, %xmm3, %xmm4, %xmm6 + vpclmulqdq $0x00, %xmm1, %xmm5, %xmm5 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vmovdqu 32(%esp), %xmm3 + vmovdqu 128(%esp), %xmm4 + vpshufd $0x4e, %xmm3, %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm3, %xmm0, %xmm0 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm3, %xmm4, %xmm2 + vpclmulqdq $0x00, %xmm3, %xmm4, %xmm3 + vpclmulqdq $0x00, %xmm1, %xmm0, %xmm0 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm0, %xmm5, %xmm5 + vmovdqu 16(%esp), %xmm3 + vmovdqu 144(%esp), %xmm4 + vpshufd $0x4e, %xmm3, %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm3, %xmm0, %xmm0 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm3, %xmm4, %xmm2 + vpclmulqdq $0x00, %xmm3, %xmm4, %xmm3 + vpclmulqdq $0x00, %xmm1, %xmm0, %xmm0 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm0, %xmm5, %xmm5 + vmovdqu (%esp), %xmm3 + vmovdqu 160(%esp), %xmm4 + vpshufd $0x4e, %xmm3, %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm3, %xmm0, %xmm0 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm3, %xmm4, %xmm2 + vpclmulqdq $0x00, %xmm3, %xmm4, %xmm3 + vpclmulqdq $0x00, %xmm1, %xmm0, %xmm0 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm0, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm1 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm5, %xmm7, %xmm7 + vpslld $31, %xmm6, %xmm3 + vpslld $30, %xmm6, %xmm0 + vpslld $25, %xmm6, %xmm1 + vpxor %xmm0, %xmm3, %xmm3 + vpxor %xmm1, %xmm3, %xmm3 + vpsrldq $4, %xmm3, %xmm0 + vpslldq $12, %xmm3, %xmm3 + vpxor %xmm3, %xmm6, %xmm6 + vpsrld $0x01, %xmm6, %xmm1 + vpsrld $2, %xmm6, %xmm5 + vpsrld $7, %xmm6, %xmm4 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm7, %xmm6, %xmm6 + vmovdqu %xmm6, 96(%esp) + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_avx1_ghash_64_inplace + jmp L_AES_GCM_decrypt_avx1_ghash_64_done +L_AES_GCM_decrypt_avx1_ghash_64: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + vmovdqu 64(%esp), %xmm4 + vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm3 + vpaddd L_aes_gcm_avx1_one, %xmm4, %xmm5 + vpshufb %xmm3, %xmm5, %xmm5 + vpaddd L_aes_gcm_avx1_two, %xmm4, %xmm6 + vpshufb %xmm3, %xmm6, %xmm6 + vpaddd L_aes_gcm_avx1_three, %xmm4, %xmm7 + vpshufb %xmm3, %xmm7, %xmm7 + vpshufb %xmm3, %xmm4, %xmm4 + vmovdqu 64(%esp), %xmm3 + vpaddd L_aes_gcm_avx1_four, %xmm3, %xmm3 + vmovdqu %xmm3, 64(%esp) + vmovdqa (%ebp), %xmm3 + vpxor %xmm3, %xmm4, %xmm4 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm3, %xmm7, %xmm7 + vmovdqa 16(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 32(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 48(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 64(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 80(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 96(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 112(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 128(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 144(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + cmpl $11, 236(%esp) + vmovdqa 160(%ebp), %xmm3 + jl L_AES_GCM_decrypt_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 176(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + cmpl $13, 236(%esp) + vmovdqa 192(%ebp), %xmm3 + jl L_AES_GCM_decrypt_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 208(%ebp), %xmm3 + vaesenc %xmm3, %xmm4, %xmm4 + vaesenc %xmm3, %xmm5, %xmm5 + vaesenc %xmm3, %xmm6, %xmm6 + vaesenc %xmm3, %xmm7, %xmm7 + vmovdqa 224(%ebp), %xmm3 +L_AES_GCM_decrypt_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done: + vaesenclast %xmm3, %xmm4, %xmm4 + vaesenclast %xmm3, %xmm5, %xmm5 + vmovdqu (%ecx), %xmm0 + vmovdqu 16(%ecx), %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vmovdqu %xmm0, (%ecx) + vmovdqu %xmm1, 16(%ecx) + vmovdqu %xmm4, (%edx) + vmovdqu %xmm5, 16(%edx) + vaesenclast %xmm3, %xmm6, %xmm6 + vaesenclast %xmm3, %xmm7, %xmm7 + vmovdqu 32(%ecx), %xmm0 + vmovdqu 48(%ecx), %xmm1 + vpxor %xmm0, %xmm6, %xmm6 + vpxor %xmm1, %xmm7, %xmm7 + vmovdqu %xmm0, 32(%ecx) + vmovdqu %xmm1, 48(%ecx) + vmovdqu %xmm6, 32(%edx) + vmovdqu %xmm7, 48(%edx) + # ghash encrypted counter + vmovdqu 96(%esp), %xmm6 + vmovdqu 48(%esp), %xmm3 + vmovdqu (%ecx), %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm4 + vpshufd $0x4e, %xmm3, %xmm5 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm4, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm3, %xmm4, %xmm7 + vpclmulqdq $0x00, %xmm3, %xmm4, %xmm6 + vpclmulqdq $0x00, %xmm1, %xmm5, %xmm5 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vmovdqu 32(%esp), %xmm3 + vmovdqu 16(%ecx), %xmm4 + vpshufd $0x4e, %xmm3, %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm3, %xmm0, %xmm0 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm3, %xmm4, %xmm2 + vpclmulqdq $0x00, %xmm3, %xmm4, %xmm3 + vpclmulqdq $0x00, %xmm1, %xmm0, %xmm0 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm0, %xmm5, %xmm5 + vmovdqu 16(%esp), %xmm3 + vmovdqu 32(%ecx), %xmm4 + vpshufd $0x4e, %xmm3, %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm3, %xmm0, %xmm0 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm3, %xmm4, %xmm2 + vpclmulqdq $0x00, %xmm3, %xmm4, %xmm3 + vpclmulqdq $0x00, %xmm1, %xmm0, %xmm0 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm0, %xmm5, %xmm5 + vmovdqu (%esp), %xmm3 + vmovdqu 48(%ecx), %xmm4 + vpshufd $0x4e, %xmm3, %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm3, %xmm0, %xmm0 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm3, %xmm4, %xmm2 + vpclmulqdq $0x00, %xmm3, %xmm4, %xmm3 + vpclmulqdq $0x00, %xmm1, %xmm0, %xmm0 + vpxor %xmm3, %xmm5, %xmm5 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm0, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm1 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm5, %xmm7, %xmm7 + vpslld $31, %xmm6, %xmm3 + vpslld $30, %xmm6, %xmm0 + vpslld $25, %xmm6, %xmm1 + vpxor %xmm0, %xmm3, %xmm3 + vpxor %xmm1, %xmm3, %xmm3 + vpsrldq $4, %xmm3, %xmm0 + vpslldq $12, %xmm3, %xmm3 + vpxor %xmm3, %xmm6, %xmm6 + vpsrld $0x01, %xmm6, %xmm1 + vpsrld $2, %xmm6, %xmm5 + vpsrld $7, %xmm6, %xmm4 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm7, %xmm6, %xmm6 + vmovdqu %xmm6, 96(%esp) + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_avx1_ghash_64 +L_AES_GCM_decrypt_avx1_ghash_64_done: + vmovdqa %xmm6, %xmm2 + vmovdqu (%esp), %xmm1 +L_AES_GCM_decrypt_avx1_done_64: + movl 216(%esp), %edx + cmpl %edx, %ebx + jge L_AES_GCM_decrypt_avx1_done_dec + movl 216(%esp), %eax + andl $0xfffffff0, %eax + cmpl %eax, %ebx + jge L_AES_GCM_decrypt_avx1_last_block_done +L_AES_GCM_decrypt_avx1_last_block_start: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + vmovdqu (%ecx), %xmm7 + pshufb L_aes_gcm_avx1_bswap_mask, %xmm7 + pxor %xmm2, %xmm7 + vmovdqu 64(%esp), %xmm5 + vmovdqu %xmm7, %xmm7 + vpshufb L_aes_gcm_avx1_bswap_epi64, %xmm5, %xmm4 + vpaddd L_aes_gcm_avx1_one, %xmm5, %xmm5 + vmovdqu %xmm5, 64(%esp) + vpxor (%ebp), %xmm4, %xmm4 + vpclmulqdq $16, %xmm1, %xmm7, %xmm0 + vaesenc 16(%ebp), %xmm4, %xmm4 + vaesenc 32(%ebp), %xmm4, %xmm4 + vpclmulqdq $0x01, %xmm1, %xmm7, %xmm3 + vaesenc 48(%ebp), %xmm4, %xmm4 + vaesenc 64(%ebp), %xmm4, %xmm4 + vaesenc 80(%ebp), %xmm4, %xmm4 + vpclmulqdq $0x11, %xmm1, %xmm7, %xmm5 + vaesenc 96(%ebp), %xmm4, %xmm4 + vpxor %xmm3, %xmm0, %xmm0 + vpslldq $8, %xmm0, %xmm6 + vpsrldq $8, %xmm0, %xmm0 + vaesenc 112(%ebp), %xmm4, %xmm4 + vpclmulqdq $0x00, %xmm1, %xmm7, %xmm3 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm0, %xmm5, %xmm5 + vmovdqa L_aes_gcm_avx1_mod2_128, %xmm7 + vpclmulqdq $16, %xmm7, %xmm6, %xmm3 + vaesenc 128(%ebp), %xmm4, %xmm4 + vpshufd $0x4e, %xmm6, %xmm0 + vpxor %xmm3, %xmm0, %xmm0 + vpclmulqdq $16, %xmm7, %xmm0, %xmm3 + vaesenc 144(%ebp), %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm2 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm5, %xmm2, %xmm2 + cmpl $11, 236(%esp) + vmovdqa 160(%ebp), %xmm5 + jl L_AES_GCM_decrypt_avx1_aesenc_gfmul_last + vaesenc %xmm5, %xmm4, %xmm4 + vaesenc 176(%ebp), %xmm4, %xmm4 + cmpl $13, 236(%esp) + vmovdqa 192(%ebp), %xmm5 + jl L_AES_GCM_decrypt_avx1_aesenc_gfmul_last + vaesenc %xmm5, %xmm4, %xmm4 + vaesenc 208(%ebp), %xmm4, %xmm4 + vmovdqa 224(%ebp), %xmm5 +L_AES_GCM_decrypt_avx1_aesenc_gfmul_last: + vaesenclast %xmm5, %xmm4, %xmm4 + vmovdqu (%ecx), %xmm5 + vpxor %xmm5, %xmm4, %xmm4 + vmovdqu %xmm4, (%edx) + addl $16, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_avx1_last_block_start +L_AES_GCM_decrypt_avx1_last_block_done: + movl 216(%esp), %ecx + movl %ecx, %edx + andl $15, %ecx + jz L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_done + vmovdqu 64(%esp), %xmm0 + vpshufb L_aes_gcm_avx1_bswap_epi64, %xmm0, %xmm0 + vpxor (%ebp), %xmm0, %xmm0 + vaesenc 16(%ebp), %xmm0, %xmm0 + vaesenc 32(%ebp), %xmm0, %xmm0 + vaesenc 48(%ebp), %xmm0, %xmm0 + vaesenc 64(%ebp), %xmm0, %xmm0 + vaesenc 80(%ebp), %xmm0, %xmm0 + vaesenc 96(%ebp), %xmm0, %xmm0 + vaesenc 112(%ebp), %xmm0, %xmm0 + vaesenc 128(%ebp), %xmm0, %xmm0 + vaesenc 144(%ebp), %xmm0, %xmm0 + cmpl $11, 236(%esp) + vmovdqa 160(%ebp), %xmm5 + jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last + vaesenc %xmm5, %xmm0, %xmm0 + vaesenc 176(%ebp), %xmm0, %xmm0 + cmpl $13, 236(%esp) + vmovdqa 192(%ebp), %xmm5 + jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last + vaesenc %xmm5, %xmm0, %xmm0 + vaesenc 208(%ebp), %xmm0, %xmm0 + vmovdqa 224(%ebp), %xmm5 +L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last: + vaesenclast %xmm5, %xmm0, %xmm0 + subl $32, %esp + xorl %ecx, %ecx + vmovdqu %xmm0, (%esp) + vpxor %xmm4, %xmm4, %xmm4 + vmovdqu %xmm4, 16(%esp) +L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_loop: + movzbl (%esi,%ebx,1), %eax + movb %al, 16(%esp,%ecx,1) + xorb (%esp,%ecx,1), %al + movb %al, (%edi,%ebx,1) + incl %ebx + incl %ecx + cmpl %edx, %ebx + jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_loop + vmovdqu 16(%esp), %xmm0 + addl $32, %esp + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm2, %xmm2 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm1, %xmm5 + vpshufd $0x4e, %xmm2, %xmm6 + vpclmulqdq $0x11, %xmm1, %xmm2, %xmm7 + vpclmulqdq $0x00, %xmm1, %xmm2, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm2, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm4, %xmm4 + vpxor %xmm5, %xmm7, %xmm2 + vpslld $31, %xmm4, %xmm5 + vpslld $30, %xmm4, %xmm6 + vpslld $25, %xmm4, %xmm7 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpsrldq $4, %xmm5, %xmm7 + vpslldq $12, %xmm5, %xmm5 + vpxor %xmm5, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm5 + vpsrld $2, %xmm4, %xmm6 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpsrld $7, %xmm4, %xmm4 + vpxor %xmm7, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm2, %xmm2 +L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_done: +L_AES_GCM_decrypt_avx1_done_dec: + movl 212(%esp), %esi + movl 228(%esp), %ebp + movl 216(%esp), %edx + movl 220(%esp), %ecx + shll $3, %edx + shll $3, %ecx + vpinsrd $0x00, %edx, %xmm4, %xmm4 + vpinsrd $2, %ecx, %xmm4, %xmm4 + movl 216(%esp), %edx + movl 220(%esp), %ecx + shrl $29, %edx + shrl $29, %ecx + vpinsrd $0x01, %edx, %xmm4, %xmm4 + vpinsrd $3, %ecx, %xmm4, %xmm4 + vpxor %xmm4, %xmm2, %xmm2 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm1, %xmm5 + vpshufd $0x4e, %xmm2, %xmm6 + vpclmulqdq $0x11, %xmm1, %xmm2, %xmm7 + vpclmulqdq $0x00, %xmm1, %xmm2, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm2, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm6 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm6, %xmm4, %xmm4 + vpxor %xmm5, %xmm7, %xmm2 + vpslld $31, %xmm4, %xmm5 + vpslld $30, %xmm4, %xmm6 + vpslld $25, %xmm4, %xmm7 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm7, %xmm5, %xmm5 + vpsrldq $4, %xmm5, %xmm7 + vpslldq $12, %xmm5, %xmm5 + vpxor %xmm5, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm5 + vpsrld $2, %xmm4, %xmm6 + vpxor %xmm6, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpsrld $7, %xmm4, %xmm4 + vpxor %xmm7, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm2, %xmm2 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm2, %xmm2 + vpxor 80(%esp), %xmm2, %xmm4 + movl 240(%esp), %edi + cmpl $16, %ebp + je L_AES_GCM_decrypt_avx1_cmp_tag_16 + subl $16, %esp + xorl %ecx, %ecx + xorl %ebx, %ebx + vmovdqu %xmm4, (%esp) +L_AES_GCM_decrypt_avx1_cmp_tag_loop: + movzbl (%esp,%ecx,1), %eax + xorb (%esi,%ecx,1), %al + orb %al, %bl + incl %ecx + cmpl %ebp, %ecx + jne L_AES_GCM_decrypt_avx1_cmp_tag_loop + cmpb $0x00, %bl + sete %bl + addl $16, %esp + xorl %ecx, %ecx + jmp L_AES_GCM_decrypt_avx1_cmp_tag_done +L_AES_GCM_decrypt_avx1_cmp_tag_16: + vmovdqu (%esi), %xmm5 + vpcmpeqb %xmm5, %xmm4, %xmm4 + vpmovmskb %xmm4, %edx + # %%edx == 0xFFFF then return 1 else => return 0 + xorl %ebx, %ebx + cmpl $0xffff, %edx + sete %bl +L_AES_GCM_decrypt_avx1_cmp_tag_done: + movl %ebx, (%edi) + addl $0xb0, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_decrypt_avx1,.-AES_GCM_decrypt_avx1 +#ifdef WOLFSSL_AESGCM_STREAM +.text +.globl AES_GCM_init_avx1 +.type AES_GCM_init_avx1,@function +.align 16 +AES_GCM_init_avx1: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $16, %esp + movl 36(%esp), %ebp + movl 44(%esp), %esi + movl 60(%esp), %edi + vpxor %xmm4, %xmm4, %xmm4 + movl 48(%esp), %edx + cmpl $12, %edx + jne L_AES_GCM_init_avx1_iv_not_12 + # # Calculate values when IV is 12 bytes + # Set counter based on IV + movl $0x1000000, %ecx + vpinsrd $0x00, (%esi), %xmm4, %xmm4 + vpinsrd $0x01, 4(%esi), %xmm4, %xmm4 + vpinsrd $2, 8(%esi), %xmm4, %xmm4 + vpinsrd $3, %ecx, %xmm4, %xmm4 + # H = Encrypt X(=0) and T = Encrypt counter + vmovdqa (%ebp), %xmm5 + vpxor %xmm5, %xmm4, %xmm1 + vmovdqa 16(%ebp), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 32(%ebp), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 48(%ebp), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 64(%ebp), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 80(%ebp), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 96(%ebp), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 112(%ebp), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 128(%ebp), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 144(%ebp), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + cmpl $11, 40(%esp) + vmovdqa 160(%ebp), %xmm7 + jl L_AES_GCM_init_avx1_calc_iv_12_last + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 176(%ebp), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + cmpl $13, 40(%esp) + vmovdqa 192(%ebp), %xmm7 + jl L_AES_GCM_init_avx1_calc_iv_12_last + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 208(%ebp), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 224(%ebp), %xmm7 +L_AES_GCM_init_avx1_calc_iv_12_last: + vaesenclast %xmm7, %xmm5, %xmm5 + vaesenclast %xmm7, %xmm1, %xmm1 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm5, %xmm5 + vmovdqu %xmm1, (%edi) + jmp L_AES_GCM_init_avx1_iv_done +L_AES_GCM_init_avx1_iv_not_12: + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + vmovdqa (%ebp), %xmm5 + vaesenc 16(%ebp), %xmm5, %xmm5 + vaesenc 32(%ebp), %xmm5, %xmm5 + vaesenc 48(%ebp), %xmm5, %xmm5 + vaesenc 64(%ebp), %xmm5, %xmm5 + vaesenc 80(%ebp), %xmm5, %xmm5 + vaesenc 96(%ebp), %xmm5, %xmm5 + vaesenc 112(%ebp), %xmm5, %xmm5 + vaesenc 128(%ebp), %xmm5, %xmm5 + vaesenc 144(%ebp), %xmm5, %xmm5 + cmpl $11, 40(%esp) + vmovdqa 160(%ebp), %xmm1 + jl L_AES_GCM_init_avx1_calc_iv_1_aesenc_avx_last + vaesenc %xmm1, %xmm5, %xmm5 + vaesenc 176(%ebp), %xmm5, %xmm5 + cmpl $13, 40(%esp) + vmovdqa 192(%ebp), %xmm1 + jl L_AES_GCM_init_avx1_calc_iv_1_aesenc_avx_last + vaesenc %xmm1, %xmm5, %xmm5 + vaesenc 208(%ebp), %xmm5, %xmm5 + vmovdqa 224(%ebp), %xmm1 +L_AES_GCM_init_avx1_calc_iv_1_aesenc_avx_last: + vaesenclast %xmm1, %xmm5, %xmm5 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm5, %xmm5 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movl $0x00, %ecx + je L_AES_GCM_init_avx1_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_init_avx1_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_init_avx1_calc_iv_16_loop: + vmovdqu (%esi,%ecx,1), %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm4, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm4 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm4, %xmm4 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm4, %xmm4 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_init_avx1_calc_iv_16_loop + movl 48(%esp), %edx + cmpl %edx, %ecx + je L_AES_GCM_init_avx1_calc_iv_done +L_AES_GCM_init_avx1_calc_iv_lt16: + subl $16, %esp + vpxor %xmm0, %xmm0, %xmm0 + xorl %ebx, %ebx + vmovdqu %xmm0, (%esp) +L_AES_GCM_init_avx1_calc_iv_loop: + movzbl (%esi,%ecx,1), %eax + movb %al, (%esp,%ebx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_init_avx1_calc_iv_loop + vmovdqu (%esp), %xmm0 + addl $16, %esp + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm4, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm4 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm4, %xmm4 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm4, %xmm4 +L_AES_GCM_init_avx1_calc_iv_done: + # T = Encrypt counter + vpxor %xmm0, %xmm0, %xmm0 + shll $3, %edx + vpinsrd $0x00, %edx, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm4, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm4 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm4, %xmm4 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm4, %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + # Encrypt counter + vmovdqa (%ebp), %xmm0 + vpxor %xmm4, %xmm0, %xmm0 + vaesenc 16(%ebp), %xmm0, %xmm0 + vaesenc 32(%ebp), %xmm0, %xmm0 + vaesenc 48(%ebp), %xmm0, %xmm0 + vaesenc 64(%ebp), %xmm0, %xmm0 + vaesenc 80(%ebp), %xmm0, %xmm0 + vaesenc 96(%ebp), %xmm0, %xmm0 + vaesenc 112(%ebp), %xmm0, %xmm0 + vaesenc 128(%ebp), %xmm0, %xmm0 + vaesenc 144(%ebp), %xmm0, %xmm0 + cmpl $11, 40(%esp) + vmovdqa 160(%ebp), %xmm1 + jl L_AES_GCM_init_avx1_calc_iv_2_aesenc_avx_last + vaesenc %xmm1, %xmm0, %xmm0 + vaesenc 176(%ebp), %xmm0, %xmm0 + cmpl $13, 40(%esp) + vmovdqa 192(%ebp), %xmm1 + jl L_AES_GCM_init_avx1_calc_iv_2_aesenc_avx_last + vaesenc %xmm1, %xmm0, %xmm0 + vaesenc 208(%ebp), %xmm0, %xmm0 + vmovdqa 224(%ebp), %xmm1 +L_AES_GCM_init_avx1_calc_iv_2_aesenc_avx_last: + vaesenclast %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, (%edi) +L_AES_GCM_init_avx1_iv_done: + movl 52(%esp), %ebp + movl 56(%esp), %edi + vpshufb L_aes_gcm_avx1_bswap_epi64, %xmm4, %xmm4 + vpaddd L_aes_gcm_avx1_one, %xmm4, %xmm4 + vmovdqa %xmm5, (%ebp) + vmovdqa %xmm4, (%edi) + addl $16, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_init_avx1,.-AES_GCM_init_avx1 +.text +.globl AES_GCM_aad_update_avx1 +.type AES_GCM_aad_update_avx1,@function +.align 16 +AES_GCM_aad_update_avx1: + pushl %esi + pushl %edi + movl 12(%esp), %esi + movl 16(%esp), %edx + movl 20(%esp), %edi + movl 24(%esp), %eax + vmovdqa (%edi), %xmm5 + vmovdqa (%eax), %xmm6 + xorl %ecx, %ecx +L_AES_GCM_aad_update_avx1_16_loop: + vmovdqu (%esi,%ecx,1), %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm5, %xmm5 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm5, %xmm1 + vpshufd $0x4e, %xmm6, %xmm2 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm0 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm6, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm4 + vmovdqa %xmm3, %xmm5 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vpsrld $31, %xmm4, %xmm0 + vpsrld $31, %xmm5, %xmm1 + vpslld $0x01, %xmm4, %xmm4 + vpslld $0x01, %xmm5, %xmm5 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm5, %xmm5 + vpor %xmm0, %xmm4, %xmm4 + vpor %xmm1, %xmm5, %xmm5 + vpslld $31, %xmm4, %xmm0 + vpslld $30, %xmm4, %xmm1 + vpslld $25, %xmm4, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm2 + vpsrld $2, %xmm4, %xmm3 + vpsrld $7, %xmm4, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm4, %xmm2, %xmm2 + vpxor %xmm2, %xmm5, %xmm5 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_aad_update_avx1_16_loop + vmovdqa %xmm5, (%edi) + popl %edi + popl %esi + ret +.size AES_GCM_aad_update_avx1,.-AES_GCM_aad_update_avx1 +.text +.globl AES_GCM_encrypt_block_avx1 +.type AES_GCM_encrypt_block_avx1,@function +.align 16 +AES_GCM_encrypt_block_avx1: + pushl %esi + pushl %edi + movl 12(%esp), %ecx + movl 16(%esp), %eax + movl 20(%esp), %edi + movl 24(%esp), %esi + movl 28(%esp), %edx + vmovdqu (%edx), %xmm1 + vpshufb L_aes_gcm_avx1_bswap_epi64, %xmm1, %xmm0 + vpaddd L_aes_gcm_avx1_one, %xmm1, %xmm1 + vmovdqu %xmm1, (%edx) + vpxor (%ecx), %xmm0, %xmm0 + vaesenc 16(%ecx), %xmm0, %xmm0 + vaesenc 32(%ecx), %xmm0, %xmm0 + vaesenc 48(%ecx), %xmm0, %xmm0 + vaesenc 64(%ecx), %xmm0, %xmm0 + vaesenc 80(%ecx), %xmm0, %xmm0 + vaesenc 96(%ecx), %xmm0, %xmm0 + vaesenc 112(%ecx), %xmm0, %xmm0 + vaesenc 128(%ecx), %xmm0, %xmm0 + vaesenc 144(%ecx), %xmm0, %xmm0 + cmpl $11, %eax + vmovdqa 160(%ecx), %xmm1 + jl L_AES_GCM_encrypt_block_avx1_aesenc_block_aesenc_avx_last + vaesenc %xmm1, %xmm0, %xmm0 + vaesenc 176(%ecx), %xmm0, %xmm0 + cmpl $13, %eax + vmovdqa 192(%ecx), %xmm1 + jl L_AES_GCM_encrypt_block_avx1_aesenc_block_aesenc_avx_last + vaesenc %xmm1, %xmm0, %xmm0 + vaesenc 208(%ecx), %xmm0, %xmm0 + vmovdqa 224(%ecx), %xmm1 +L_AES_GCM_encrypt_block_avx1_aesenc_block_aesenc_avx_last: + vaesenclast %xmm1, %xmm0, %xmm0 + vmovdqu (%esi), %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, (%edi) + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + popl %edi + popl %esi + ret +.size AES_GCM_encrypt_block_avx1,.-AES_GCM_encrypt_block_avx1 +.text +.globl AES_GCM_ghash_block_avx1 +.type AES_GCM_ghash_block_avx1,@function +.align 16 +AES_GCM_ghash_block_avx1: + movl 4(%esp), %edx + movl 8(%esp), %eax + movl 12(%esp), %ecx + vmovdqa (%eax), %xmm4 + vmovdqa (%ecx), %xmm5 + vmovdqu (%edx), %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm4, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm6 + vmovdqa %xmm3, %xmm4 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm6, %xmm6 + vpxor %xmm1, %xmm4, %xmm4 + vpsrld $31, %xmm6, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm6, %xmm6 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm6, %xmm6 + vpor %xmm1, %xmm4, %xmm4 + vpslld $31, %xmm6, %xmm0 + vpslld $30, %xmm6, %xmm1 + vpslld $25, %xmm6, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + vpsrld $0x01, %xmm6, %xmm2 + vpsrld $2, %xmm6, %xmm3 + vpsrld $7, %xmm6, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm6, %xmm2, %xmm2 + vpxor %xmm2, %xmm4, %xmm4 + vmovdqa %xmm4, (%eax) + ret +.size AES_GCM_ghash_block_avx1,.-AES_GCM_ghash_block_avx1 +.text +.globl AES_GCM_encrypt_update_avx1 +.type AES_GCM_encrypt_update_avx1,@function +.align 16 +AES_GCM_encrypt_update_avx1: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $0x60, %esp + movl 144(%esp), %esi + vmovdqa (%esi), %xmm4 + vmovdqu %xmm4, 64(%esp) + movl 136(%esp), %esi + movl 140(%esp), %ebp + vmovdqa (%esi), %xmm6 + vmovdqa (%ebp), %xmm5 + vmovdqu %xmm6, 80(%esp) + movl 116(%esp), %ebp + movl 124(%esp), %edi + movl 128(%esp), %esi + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_aes_gcm_avx1_mod2_128, %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + xorl %ebx, %ebx + cmpl $0x40, 132(%esp) + movl 132(%esp), %eax + jl L_AES_GCM_encrypt_update_avx1_done_64 + andl $0xffffffc0, %eax + vmovdqa %xmm6, %xmm2 + # H ^ 1 + vmovdqu %xmm5, (%esp) + # H ^ 2 + vpclmulqdq $0x00, %xmm5, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm5, %xmm5, %xmm4 + vpslld $31, %xmm0, %xmm1 + vpslld $30, %xmm0, %xmm2 + vpslld $25, %xmm0, %xmm3 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpsrldq $4, %xmm1, %xmm3 + vpslldq $12, %xmm1, %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vpsrld $0x01, %xmm0, %xmm1 + vpsrld $2, %xmm0, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpsrld $7, %xmm0, %xmm0 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + vmovdqu %xmm4, 16(%esp) + # H ^ 3 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm1 + vpshufd $0x4e, %xmm4, %xmm2 + vpclmulqdq $0x11, %xmm5, %xmm4, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm0 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm4, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm0, %xmm0 + vpxor %xmm1, %xmm3, %xmm7 + vpslld $31, %xmm0, %xmm1 + vpslld $30, %xmm0, %xmm2 + vpslld $25, %xmm0, %xmm3 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpsrldq $4, %xmm1, %xmm3 + vpslldq $12, %xmm1, %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vpsrld $0x01, %xmm0, %xmm1 + vpsrld $2, %xmm0, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpsrld $7, %xmm0, %xmm0 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + vmovdqu %xmm7, 32(%esp) + # H ^ 4 + vpclmulqdq $0x00, %xmm4, %xmm4, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm4, %xmm7 + vpslld $31, %xmm0, %xmm1 + vpslld $30, %xmm0, %xmm2 + vpslld $25, %xmm0, %xmm3 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpsrldq $4, %xmm1, %xmm3 + vpslldq $12, %xmm1, %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vpsrld $0x01, %xmm0, %xmm1 + vpsrld $2, %xmm0, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpsrld $7, %xmm0, %xmm0 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + vmovdqu %xmm7, 48(%esp) + # First 64 bytes of input + vmovdqu 64(%esp), %xmm0 + vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm7 + vpaddd L_aes_gcm_avx1_one, %xmm0, %xmm1 + vpshufb %xmm7, %xmm1, %xmm1 + vpaddd L_aes_gcm_avx1_two, %xmm0, %xmm2 + vpshufb %xmm7, %xmm2, %xmm2 + vpaddd L_aes_gcm_avx1_three, %xmm0, %xmm3 + vpshufb %xmm7, %xmm3, %xmm3 + vpshufb %xmm7, %xmm0, %xmm0 + vmovdqu 64(%esp), %xmm7 + vpaddd L_aes_gcm_avx1_four, %xmm7, %xmm7 + vmovdqu %xmm7, 64(%esp) + vmovdqa (%ebp), %xmm7 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm7, %xmm3, %xmm3 + vmovdqa 16(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 32(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 48(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 64(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 80(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 96(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 112(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 128(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 144(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $11, 120(%esp) + vmovdqa 160(%ebp), %xmm7 + jl L_AES_GCM_encrypt_update_avx1_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 176(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $13, 120(%esp) + vmovdqa 192(%ebp), %xmm7 + jl L_AES_GCM_encrypt_update_avx1_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 208(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 224(%ebp), %xmm7 +L_AES_GCM_encrypt_update_avx1_aesenc_64_enc_done: + vaesenclast %xmm7, %xmm0, %xmm0 + vaesenclast %xmm7, %xmm1, %xmm1 + vmovdqu (%esi), %xmm4 + vmovdqu 16(%esi), %xmm5 + vpxor %xmm4, %xmm0, %xmm0 + vpxor %xmm5, %xmm1, %xmm1 + vmovdqu %xmm4, (%esi) + vmovdqu %xmm5, 16(%esi) + vmovdqu %xmm0, (%edi) + vmovdqu %xmm1, 16(%edi) + vaesenclast %xmm7, %xmm2, %xmm2 + vaesenclast %xmm7, %xmm3, %xmm3 + vmovdqu 32(%esi), %xmm4 + vmovdqu 48(%esi), %xmm5 + vpxor %xmm4, %xmm2, %xmm2 + vpxor %xmm5, %xmm3, %xmm3 + vmovdqu %xmm4, 32(%esi) + vmovdqu %xmm5, 48(%esi) + vmovdqu %xmm2, 32(%edi) + vmovdqu %xmm3, 48(%edi) + cmpl $0x40, %eax + movl $0x40, %ebx + movl %esi, %ecx + movl %edi, %edx + jle L_AES_GCM_encrypt_update_avx1_end_64 + # More 64 bytes of input +L_AES_GCM_encrypt_update_avx1_ghash_64: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + vmovdqu 64(%esp), %xmm0 + vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm7 + vpaddd L_aes_gcm_avx1_one, %xmm0, %xmm1 + vpshufb %xmm7, %xmm1, %xmm1 + vpaddd L_aes_gcm_avx1_two, %xmm0, %xmm2 + vpshufb %xmm7, %xmm2, %xmm2 + vpaddd L_aes_gcm_avx1_three, %xmm0, %xmm3 + vpshufb %xmm7, %xmm3, %xmm3 + vpshufb %xmm7, %xmm0, %xmm0 + vmovdqu 64(%esp), %xmm7 + vpaddd L_aes_gcm_avx1_four, %xmm7, %xmm7 + vmovdqu %xmm7, 64(%esp) + vmovdqa (%ebp), %xmm7 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm7, %xmm3, %xmm3 + vmovdqa 16(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 32(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 48(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 64(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 80(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 96(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 112(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 128(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 144(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $11, 120(%esp) + vmovdqa 160(%ebp), %xmm7 + jl L_AES_GCM_encrypt_update_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 176(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $13, 120(%esp) + vmovdqa 192(%ebp), %xmm7 + jl L_AES_GCM_encrypt_update_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 208(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 224(%ebp), %xmm7 +L_AES_GCM_encrypt_update_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done: + vaesenclast %xmm7, %xmm0, %xmm0 + vaesenclast %xmm7, %xmm1, %xmm1 + vmovdqu (%ecx), %xmm4 + vmovdqu 16(%ecx), %xmm5 + vpxor %xmm4, %xmm0, %xmm0 + vpxor %xmm5, %xmm1, %xmm1 + vmovdqu %xmm0, (%edx) + vmovdqu %xmm1, 16(%edx) + vaesenclast %xmm7, %xmm2, %xmm2 + vaesenclast %xmm7, %xmm3, %xmm3 + vmovdqu 32(%ecx), %xmm4 + vmovdqu 48(%ecx), %xmm5 + vpxor %xmm4, %xmm2, %xmm2 + vpxor %xmm5, %xmm3, %xmm3 + vmovdqu %xmm2, 32(%edx) + vmovdqu %xmm3, 48(%edx) + # ghash encrypted counter + vmovdqu 80(%esp), %xmm2 + vmovdqu 48(%esp), %xmm7 + vmovdqu -64(%edx), %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm3 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm2 + vpclmulqdq $0x00, %xmm5, %xmm1, %xmm1 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqu 32(%esp), %xmm7 + vmovdqu -48(%edx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu 16(%esp), %xmm7 + vmovdqu -32(%edx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu (%esp), %xmm7 + vmovdqu -16(%edx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vpslldq $8, %xmm1, %xmm5 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpxor %xmm1, %xmm3, %xmm3 + vpslld $31, %xmm2, %xmm7 + vpslld $30, %xmm2, %xmm4 + vpslld $25, %xmm2, %xmm5 + vpxor %xmm4, %xmm7, %xmm7 + vpxor %xmm5, %xmm7, %xmm7 + vpsrldq $4, %xmm7, %xmm4 + vpslldq $12, %xmm7, %xmm7 + vpxor %xmm7, %xmm2, %xmm2 + vpsrld $0x01, %xmm2, %xmm5 + vpsrld $2, %xmm2, %xmm1 + vpsrld $7, %xmm2, %xmm0 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm2, %xmm2 + vpxor %xmm3, %xmm2, %xmm2 + vmovdqu %xmm2, 80(%esp) + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_encrypt_update_avx1_ghash_64 +L_AES_GCM_encrypt_update_avx1_end_64: + movdqu 80(%esp), %xmm6 + # Block 1 + vmovdqa L_aes_gcm_avx1_bswap_mask, %xmm0 + vmovdqu (%edx), %xmm5 + pshufb %xmm0, %xmm5 + vmovdqu 48(%esp), %xmm7 + pxor %xmm6, %xmm5 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm5, %xmm1 + vpshufd $0x4e, %xmm7, %xmm2 + vpclmulqdq $0x11, %xmm5, %xmm7, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm7, %xmm0 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm4 + vmovdqa %xmm3, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + # Block 2 + vmovdqa L_aes_gcm_avx1_bswap_mask, %xmm0 + vmovdqu 16(%edx), %xmm5 + pshufb %xmm0, %xmm5 + vmovdqu 32(%esp), %xmm7 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm5, %xmm1 + vpshufd $0x4e, %xmm7, %xmm2 + vpclmulqdq $0x11, %xmm5, %xmm7, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm7, %xmm0 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + # Block 3 + vmovdqa L_aes_gcm_avx1_bswap_mask, %xmm0 + vmovdqu 32(%edx), %xmm5 + pshufb %xmm0, %xmm5 + vmovdqu 16(%esp), %xmm7 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm5, %xmm1 + vpshufd $0x4e, %xmm7, %xmm2 + vpclmulqdq $0x11, %xmm5, %xmm7, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm7, %xmm0 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + # Block 4 + vmovdqa L_aes_gcm_avx1_bswap_mask, %xmm0 + vmovdqu 48(%edx), %xmm5 + pshufb %xmm0, %xmm5 + vmovdqu (%esp), %xmm7 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm5, %xmm1 + vpshufd $0x4e, %xmm7, %xmm2 + vpclmulqdq $0x11, %xmm5, %xmm7, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm7, %xmm0 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + vpslld $31, %xmm4, %xmm0 + vpslld $30, %xmm4, %xmm1 + vpslld $25, %xmm4, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm2 + vpsrld $2, %xmm4, %xmm3 + vpsrld $7, %xmm4, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm4, %xmm2, %xmm2 + vpxor %xmm2, %xmm6, %xmm6 + vmovdqu (%esp), %xmm5 +L_AES_GCM_encrypt_update_avx1_done_64: + movl 132(%esp), %edx + cmpl %edx, %ebx + jge L_AES_GCM_encrypt_update_avx1_done_enc + movl 132(%esp), %eax + andl $0xfffffff0, %eax + cmpl %eax, %ebx + jge L_AES_GCM_encrypt_update_avx1_last_block_done + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + vmovdqu 64(%esp), %xmm1 + vpshufb L_aes_gcm_avx1_bswap_epi64, %xmm1, %xmm0 + vpaddd L_aes_gcm_avx1_one, %xmm1, %xmm1 + vmovdqu %xmm1, 64(%esp) + vpxor (%ebp), %xmm0, %xmm0 + vaesenc 16(%ebp), %xmm0, %xmm0 + vaesenc 32(%ebp), %xmm0, %xmm0 + vaesenc 48(%ebp), %xmm0, %xmm0 + vaesenc 64(%ebp), %xmm0, %xmm0 + vaesenc 80(%ebp), %xmm0, %xmm0 + vaesenc 96(%ebp), %xmm0, %xmm0 + vaesenc 112(%ebp), %xmm0, %xmm0 + vaesenc 128(%ebp), %xmm0, %xmm0 + vaesenc 144(%ebp), %xmm0, %xmm0 + cmpl $11, 120(%esp) + vmovdqa 160(%ebp), %xmm1 + jl L_AES_GCM_encrypt_update_avx1_aesenc_block_aesenc_avx_last + vaesenc %xmm1, %xmm0, %xmm0 + vaesenc 176(%ebp), %xmm0, %xmm0 + cmpl $13, 120(%esp) + vmovdqa 192(%ebp), %xmm1 + jl L_AES_GCM_encrypt_update_avx1_aesenc_block_aesenc_avx_last + vaesenc %xmm1, %xmm0, %xmm0 + vaesenc 208(%ebp), %xmm0, %xmm0 + vmovdqa 224(%ebp), %xmm1 +L_AES_GCM_encrypt_update_avx1_aesenc_block_aesenc_avx_last: + vaesenclast %xmm1, %xmm0, %xmm0 + vmovdqu (%ecx), %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, (%edx) + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + addl $16, %ebx + cmpl %eax, %ebx + jge L_AES_GCM_encrypt_update_avx1_last_block_ghash +L_AES_GCM_encrypt_update_avx1_last_block_start: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + vmovdqu 64(%esp), %xmm1 + vmovdqu %xmm6, %xmm3 + vpshufb L_aes_gcm_avx1_bswap_epi64, %xmm1, %xmm0 + vpaddd L_aes_gcm_avx1_one, %xmm1, %xmm1 + vmovdqu %xmm1, 64(%esp) + vpxor (%ebp), %xmm0, %xmm0 + vpclmulqdq $16, %xmm5, %xmm3, %xmm4 + vaesenc 16(%ebp), %xmm0, %xmm0 + vaesenc 32(%ebp), %xmm0, %xmm0 + vpclmulqdq $0x01, %xmm5, %xmm3, %xmm7 + vaesenc 48(%ebp), %xmm0, %xmm0 + vaesenc 64(%ebp), %xmm0, %xmm0 + vaesenc 80(%ebp), %xmm0, %xmm0 + vpclmulqdq $0x11, %xmm5, %xmm3, %xmm1 + vaesenc 96(%ebp), %xmm0, %xmm0 + vpxor %xmm7, %xmm4, %xmm4 + vpslldq $8, %xmm4, %xmm2 + vpsrldq $8, %xmm4, %xmm4 + vaesenc 112(%ebp), %xmm0, %xmm0 + vpclmulqdq $0x00, %xmm5, %xmm3, %xmm7 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa L_aes_gcm_avx1_mod2_128, %xmm3 + vpclmulqdq $16, %xmm3, %xmm2, %xmm7 + vaesenc 128(%ebp), %xmm0, %xmm0 + vpshufd $0x4e, %xmm2, %xmm4 + vpxor %xmm7, %xmm4, %xmm4 + vpclmulqdq $16, %xmm3, %xmm4, %xmm7 + vaesenc 144(%ebp), %xmm0, %xmm0 + vpshufd $0x4e, %xmm4, %xmm6 + vpxor %xmm7, %xmm6, %xmm6 + vpxor %xmm1, %xmm6, %xmm6 + cmpl $11, 120(%esp) + vmovdqa 160(%ebp), %xmm1 + jl L_AES_GCM_encrypt_update_avx1_aesenc_gfmul_last + vaesenc %xmm1, %xmm0, %xmm0 + vaesenc 176(%ebp), %xmm0, %xmm0 + cmpl $13, 120(%esp) + vmovdqa 192(%ebp), %xmm1 + jl L_AES_GCM_encrypt_update_avx1_aesenc_gfmul_last + vaesenc %xmm1, %xmm0, %xmm0 + vaesenc 208(%ebp), %xmm0, %xmm0 + vmovdqa 224(%ebp), %xmm1 +L_AES_GCM_encrypt_update_avx1_aesenc_gfmul_last: + vaesenclast %xmm1, %xmm0, %xmm0 + vmovdqu (%ecx), %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, (%edx) + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + addl $16, %ebx + vpxor %xmm0, %xmm6, %xmm6 + cmpl %eax, %ebx + jl L_AES_GCM_encrypt_update_avx1_last_block_start +L_AES_GCM_encrypt_update_avx1_last_block_ghash: + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm1 + vpshufd $0x4e, %xmm6, %xmm2 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm0 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm6, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm0, %xmm0 + vpxor %xmm1, %xmm3, %xmm6 + vpslld $31, %xmm0, %xmm1 + vpslld $30, %xmm0, %xmm2 + vpslld $25, %xmm0, %xmm3 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpsrldq $4, %xmm1, %xmm3 + vpslldq $12, %xmm1, %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vpsrld $0x01, %xmm0, %xmm1 + vpsrld $2, %xmm0, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpsrld $7, %xmm0, %xmm0 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 +L_AES_GCM_encrypt_update_avx1_last_block_done: +L_AES_GCM_encrypt_update_avx1_done_enc: + movl 136(%esp), %esi + movl 144(%esp), %edi + vmovdqu 64(%esp), %xmm4 + vmovdqa %xmm6, (%esi) + vmovdqu %xmm4, (%edi) + addl $0x60, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_encrypt_update_avx1,.-AES_GCM_encrypt_update_avx1 +.text +.globl AES_GCM_encrypt_final_avx1 +.type AES_GCM_encrypt_final_avx1,@function +.align 16 +AES_GCM_encrypt_final_avx1: + pushl %esi + pushl %edi + pushl %ebp + subl $16, %esp + movl 32(%esp), %ebp + movl 52(%esp), %esi + movl 56(%esp), %edi + vmovdqa (%ebp), %xmm4 + vmovdqa (%esi), %xmm5 + vmovdqa (%edi), %xmm6 + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_aes_gcm_avx1_mod2_128, %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + movl 44(%esp), %edx + movl 48(%esp), %ecx + shll $3, %edx + shll $3, %ecx + vpinsrd $0x00, %edx, %xmm0, %xmm0 + vpinsrd $2, %ecx, %xmm0, %xmm0 + movl 44(%esp), %edx + movl 48(%esp), %ecx + shrl $29, %edx + shrl $29, %ecx + vpinsrd $0x01, %edx, %xmm0, %xmm0 + vpinsrd $3, %ecx, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm1 + vpshufd $0x4e, %xmm4, %xmm2 + vpclmulqdq $0x11, %xmm5, %xmm4, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm0 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm4, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm0, %xmm0 + vpxor %xmm1, %xmm3, %xmm4 + vpslld $31, %xmm0, %xmm1 + vpslld $30, %xmm0, %xmm2 + vpslld $25, %xmm0, %xmm3 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpsrldq $4, %xmm1, %xmm3 + vpslldq $12, %xmm1, %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vpsrld $0x01, %xmm0, %xmm1 + vpsrld $2, %xmm0, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpsrld $7, %xmm0, %xmm0 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm0 + movl 36(%esp), %edi + cmpl $16, 40(%esp) + je L_AES_GCM_encrypt_final_avx1_store_tag_16 + xorl %ecx, %ecx + vmovdqu %xmm0, (%esp) +L_AES_GCM_encrypt_final_avx1_store_tag_loop: + movzbl (%esp,%ecx,1), %eax + movb %al, (%edi,%ecx,1) + incl %ecx + cmpl 40(%esp), %ecx + jne L_AES_GCM_encrypt_final_avx1_store_tag_loop + jmp L_AES_GCM_encrypt_final_avx1_store_tag_done +L_AES_GCM_encrypt_final_avx1_store_tag_16: + vmovdqu %xmm0, (%edi) +L_AES_GCM_encrypt_final_avx1_store_tag_done: + addl $16, %esp + popl %ebp + popl %edi + popl %esi + ret +.size AES_GCM_encrypt_final_avx1,.-AES_GCM_encrypt_final_avx1 +.text +.globl AES_GCM_decrypt_update_avx1 +.type AES_GCM_decrypt_update_avx1,@function +.align 16 +AES_GCM_decrypt_update_avx1: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $0xa0, %esp + movl 208(%esp), %esi + vmovdqa (%esi), %xmm4 + vmovdqu %xmm4, 64(%esp) + movl 200(%esp), %esi + movl 204(%esp), %ebp + vmovdqa (%esi), %xmm6 + vmovdqa (%ebp), %xmm5 + vmovdqu %xmm6, 80(%esp) + movl 180(%esp), %ebp + movl 188(%esp), %edi + movl 192(%esp), %esi + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_aes_gcm_avx1_mod2_128, %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + xorl %ebx, %ebx + cmpl $0x40, 196(%esp) + movl 196(%esp), %eax + jl L_AES_GCM_decrypt_update_avx1_done_64 + andl $0xffffffc0, %eax + vmovdqa %xmm6, %xmm2 + # H ^ 1 + vmovdqu %xmm5, (%esp) + # H ^ 2 + vpclmulqdq $0x00, %xmm5, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm5, %xmm5, %xmm4 + vpslld $31, %xmm0, %xmm1 + vpslld $30, %xmm0, %xmm2 + vpslld $25, %xmm0, %xmm3 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpsrldq $4, %xmm1, %xmm3 + vpslldq $12, %xmm1, %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vpsrld $0x01, %xmm0, %xmm1 + vpsrld $2, %xmm0, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpsrld $7, %xmm0, %xmm0 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + vmovdqu %xmm4, 16(%esp) + # H ^ 3 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm1 + vpshufd $0x4e, %xmm4, %xmm2 + vpclmulqdq $0x11, %xmm5, %xmm4, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm0 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm4, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm0, %xmm0 + vpxor %xmm1, %xmm3, %xmm7 + vpslld $31, %xmm0, %xmm1 + vpslld $30, %xmm0, %xmm2 + vpslld $25, %xmm0, %xmm3 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpsrldq $4, %xmm1, %xmm3 + vpslldq $12, %xmm1, %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vpsrld $0x01, %xmm0, %xmm1 + vpsrld $2, %xmm0, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpsrld $7, %xmm0, %xmm0 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + vmovdqu %xmm7, 32(%esp) + # H ^ 4 + vpclmulqdq $0x00, %xmm4, %xmm4, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm4, %xmm7 + vpslld $31, %xmm0, %xmm1 + vpslld $30, %xmm0, %xmm2 + vpslld $25, %xmm0, %xmm3 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpsrldq $4, %xmm1, %xmm3 + vpslldq $12, %xmm1, %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vpsrld $0x01, %xmm0, %xmm1 + vpsrld $2, %xmm0, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpsrld $7, %xmm0, %xmm0 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + vmovdqu %xmm7, 48(%esp) + cmpl %esi, %edi + jne L_AES_GCM_decrypt_update_avx1_ghash_64 +L_AES_GCM_decrypt_update_avx1_ghash_64_inplace: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + vmovdqu 64(%esp), %xmm0 + vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm7 + vpaddd L_aes_gcm_avx1_one, %xmm0, %xmm1 + vpshufb %xmm7, %xmm1, %xmm1 + vpaddd L_aes_gcm_avx1_two, %xmm0, %xmm2 + vpshufb %xmm7, %xmm2, %xmm2 + vpaddd L_aes_gcm_avx1_three, %xmm0, %xmm3 + vpshufb %xmm7, %xmm3, %xmm3 + vpshufb %xmm7, %xmm0, %xmm0 + vmovdqu 64(%esp), %xmm7 + vpaddd L_aes_gcm_avx1_four, %xmm7, %xmm7 + vmovdqu %xmm7, 64(%esp) + vmovdqa (%ebp), %xmm7 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm7, %xmm3, %xmm3 + vmovdqa 16(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 32(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 48(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 64(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 80(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 96(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 112(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 128(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 144(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $11, 184(%esp) + vmovdqa 160(%ebp), %xmm7 + jl L_AES_GCM_decrypt_update_avx1inplace_aesenc_64_ghash_avx_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 176(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $13, 184(%esp) + vmovdqa 192(%ebp), %xmm7 + jl L_AES_GCM_decrypt_update_avx1inplace_aesenc_64_ghash_avx_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 208(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 224(%ebp), %xmm7 +L_AES_GCM_decrypt_update_avx1inplace_aesenc_64_ghash_avx_aesenc_64_enc_done: + vaesenclast %xmm7, %xmm0, %xmm0 + vaesenclast %xmm7, %xmm1, %xmm1 + vmovdqu (%ecx), %xmm4 + vmovdqu 16(%ecx), %xmm5 + vpxor %xmm4, %xmm0, %xmm0 + vpxor %xmm5, %xmm1, %xmm1 + vmovdqu %xmm4, 96(%esp) + vmovdqu %xmm5, 112(%esp) + vmovdqu %xmm0, (%edx) + vmovdqu %xmm1, 16(%edx) + vaesenclast %xmm7, %xmm2, %xmm2 + vaesenclast %xmm7, %xmm3, %xmm3 + vmovdqu 32(%ecx), %xmm4 + vmovdqu 48(%ecx), %xmm5 + vpxor %xmm4, %xmm2, %xmm2 + vpxor %xmm5, %xmm3, %xmm3 + vmovdqu %xmm4, 128(%esp) + vmovdqu %xmm5, 144(%esp) + vmovdqu %xmm2, 32(%edx) + vmovdqu %xmm3, 48(%edx) + # ghash encrypted counter + vmovdqu 80(%esp), %xmm2 + vmovdqu 48(%esp), %xmm7 + vmovdqu 96(%esp), %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm3 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm2 + vpclmulqdq $0x00, %xmm5, %xmm1, %xmm1 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqu 32(%esp), %xmm7 + vmovdqu 112(%esp), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu 16(%esp), %xmm7 + vmovdqu 128(%esp), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu (%esp), %xmm7 + vmovdqu 144(%esp), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vpslldq $8, %xmm1, %xmm5 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpxor %xmm1, %xmm3, %xmm3 + vpslld $31, %xmm2, %xmm7 + vpslld $30, %xmm2, %xmm4 + vpslld $25, %xmm2, %xmm5 + vpxor %xmm4, %xmm7, %xmm7 + vpxor %xmm5, %xmm7, %xmm7 + vpsrldq $4, %xmm7, %xmm4 + vpslldq $12, %xmm7, %xmm7 + vpxor %xmm7, %xmm2, %xmm2 + vpsrld $0x01, %xmm2, %xmm5 + vpsrld $2, %xmm2, %xmm1 + vpsrld $7, %xmm2, %xmm0 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm2, %xmm2 + vpxor %xmm3, %xmm2, %xmm2 + vmovdqu %xmm2, 80(%esp) + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_update_avx1_ghash_64_inplace + jmp L_AES_GCM_decrypt_update_avx1_ghash_64_done +L_AES_GCM_decrypt_update_avx1_ghash_64: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + vmovdqu 64(%esp), %xmm0 + vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm7 + vpaddd L_aes_gcm_avx1_one, %xmm0, %xmm1 + vpshufb %xmm7, %xmm1, %xmm1 + vpaddd L_aes_gcm_avx1_two, %xmm0, %xmm2 + vpshufb %xmm7, %xmm2, %xmm2 + vpaddd L_aes_gcm_avx1_three, %xmm0, %xmm3 + vpshufb %xmm7, %xmm3, %xmm3 + vpshufb %xmm7, %xmm0, %xmm0 + vmovdqu 64(%esp), %xmm7 + vpaddd L_aes_gcm_avx1_four, %xmm7, %xmm7 + vmovdqu %xmm7, 64(%esp) + vmovdqa (%ebp), %xmm7 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm7, %xmm3, %xmm3 + vmovdqa 16(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 32(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 48(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 64(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 80(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 96(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 112(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 128(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 144(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $11, 184(%esp) + vmovdqa 160(%ebp), %xmm7 + jl L_AES_GCM_decrypt_update_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 176(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $13, 184(%esp) + vmovdqa 192(%ebp), %xmm7 + jl L_AES_GCM_decrypt_update_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 208(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqa 224(%ebp), %xmm7 +L_AES_GCM_decrypt_update_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done: + vaesenclast %xmm7, %xmm0, %xmm0 + vaesenclast %xmm7, %xmm1, %xmm1 + vmovdqu (%ecx), %xmm4 + vmovdqu 16(%ecx), %xmm5 + vpxor %xmm4, %xmm0, %xmm0 + vpxor %xmm5, %xmm1, %xmm1 + vmovdqu %xmm4, (%ecx) + vmovdqu %xmm5, 16(%ecx) + vmovdqu %xmm0, (%edx) + vmovdqu %xmm1, 16(%edx) + vaesenclast %xmm7, %xmm2, %xmm2 + vaesenclast %xmm7, %xmm3, %xmm3 + vmovdqu 32(%ecx), %xmm4 + vmovdqu 48(%ecx), %xmm5 + vpxor %xmm4, %xmm2, %xmm2 + vpxor %xmm5, %xmm3, %xmm3 + vmovdqu %xmm4, 32(%ecx) + vmovdqu %xmm5, 48(%ecx) + vmovdqu %xmm2, 32(%edx) + vmovdqu %xmm3, 48(%edx) + # ghash encrypted counter + vmovdqu 80(%esp), %xmm2 + vmovdqu 48(%esp), %xmm7 + vmovdqu (%ecx), %xmm0 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm3 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm2 + vpclmulqdq $0x00, %xmm5, %xmm1, %xmm1 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqu 32(%esp), %xmm7 + vmovdqu 16(%ecx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu 16(%esp), %xmm7 + vmovdqu 32(%ecx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu (%esp), %xmm7 + vmovdqu 48(%ecx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm0, %xmm0 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vpslldq $8, %xmm1, %xmm5 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpxor %xmm1, %xmm3, %xmm3 + vpslld $31, %xmm2, %xmm7 + vpslld $30, %xmm2, %xmm4 + vpslld $25, %xmm2, %xmm5 + vpxor %xmm4, %xmm7, %xmm7 + vpxor %xmm5, %xmm7, %xmm7 + vpsrldq $4, %xmm7, %xmm4 + vpslldq $12, %xmm7, %xmm7 + vpxor %xmm7, %xmm2, %xmm2 + vpsrld $0x01, %xmm2, %xmm5 + vpsrld $2, %xmm2, %xmm1 + vpsrld $7, %xmm2, %xmm0 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm2, %xmm2 + vpxor %xmm3, %xmm2, %xmm2 + vmovdqu %xmm2, 80(%esp) + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_update_avx1_ghash_64 +L_AES_GCM_decrypt_update_avx1_ghash_64_done: + vmovdqa %xmm2, %xmm6 + vmovdqu (%esp), %xmm5 +L_AES_GCM_decrypt_update_avx1_done_64: + movl 196(%esp), %edx + cmpl %edx, %ebx + jge L_AES_GCM_decrypt_update_avx1_done_dec + movl 196(%esp), %eax + andl $0xfffffff0, %eax + cmpl %eax, %ebx + jge L_AES_GCM_decrypt_update_avx1_last_block_done +L_AES_GCM_decrypt_update_avx1_last_block_start: + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + vmovdqu (%ecx), %xmm1 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm1, %xmm1 + vpxor %xmm6, %xmm1, %xmm1 + vmovdqu %xmm1, (%esp) + vmovdqu 64(%esp), %xmm1 + vmovdqu (%esp), %xmm3 + vpshufb L_aes_gcm_avx1_bswap_epi64, %xmm1, %xmm0 + vpaddd L_aes_gcm_avx1_one, %xmm1, %xmm1 + vmovdqu %xmm1, 64(%esp) + vpxor (%ebp), %xmm0, %xmm0 + vpclmulqdq $16, %xmm5, %xmm3, %xmm4 + vaesenc 16(%ebp), %xmm0, %xmm0 + vaesenc 32(%ebp), %xmm0, %xmm0 + vpclmulqdq $0x01, %xmm5, %xmm3, %xmm7 + vaesenc 48(%ebp), %xmm0, %xmm0 + vaesenc 64(%ebp), %xmm0, %xmm0 + vaesenc 80(%ebp), %xmm0, %xmm0 + vpclmulqdq $0x11, %xmm5, %xmm3, %xmm1 + vaesenc 96(%ebp), %xmm0, %xmm0 + vpxor %xmm7, %xmm4, %xmm4 + vpslldq $8, %xmm4, %xmm2 + vpsrldq $8, %xmm4, %xmm4 + vaesenc 112(%ebp), %xmm0, %xmm0 + vpclmulqdq $0x00, %xmm5, %xmm3, %xmm7 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa L_aes_gcm_avx1_mod2_128, %xmm3 + vpclmulqdq $16, %xmm3, %xmm2, %xmm7 + vaesenc 128(%ebp), %xmm0, %xmm0 + vpshufd $0x4e, %xmm2, %xmm4 + vpxor %xmm7, %xmm4, %xmm4 + vpclmulqdq $16, %xmm3, %xmm4, %xmm7 + vaesenc 144(%ebp), %xmm0, %xmm0 + vpshufd $0x4e, %xmm4, %xmm6 + vpxor %xmm7, %xmm6, %xmm6 + vpxor %xmm1, %xmm6, %xmm6 + cmpl $11, 184(%esp) + vmovdqa 160(%ebp), %xmm1 + jl L_AES_GCM_decrypt_update_avx1_aesenc_gfmul_last + vaesenc %xmm1, %xmm0, %xmm0 + vaesenc 176(%ebp), %xmm0, %xmm0 + cmpl $13, 184(%esp) + vmovdqa 192(%ebp), %xmm1 + jl L_AES_GCM_decrypt_update_avx1_aesenc_gfmul_last + vaesenc %xmm1, %xmm0, %xmm0 + vaesenc 208(%ebp), %xmm0, %xmm0 + vmovdqa 224(%ebp), %xmm1 +L_AES_GCM_decrypt_update_avx1_aesenc_gfmul_last: + vaesenclast %xmm1, %xmm0, %xmm0 + vmovdqu (%ecx), %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, (%edx) + addl $16, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_update_avx1_last_block_start +L_AES_GCM_decrypt_update_avx1_last_block_done: +L_AES_GCM_decrypt_update_avx1_done_dec: + movl 200(%esp), %esi + movl 208(%esp), %edi + vmovdqu 64(%esp), %xmm4 + vmovdqa %xmm6, (%esi) + vmovdqu %xmm4, (%edi) + addl $0xa0, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_decrypt_update_avx1,.-AES_GCM_decrypt_update_avx1 +.text +.globl AES_GCM_decrypt_final_avx1 +.type AES_GCM_decrypt_final_avx1,@function +.align 16 +AES_GCM_decrypt_final_avx1: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $16, %esp + movl 36(%esp), %ebp + movl 56(%esp), %esi + movl 60(%esp), %edi + vmovdqa (%ebp), %xmm6 + vmovdqa (%esi), %xmm5 + vmovdqa (%edi), %xmm7 + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_aes_gcm_avx1_mod2_128, %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + movl 48(%esp), %edx + movl 52(%esp), %ecx + shll $3, %edx + shll $3, %ecx + vpinsrd $0x00, %edx, %xmm0, %xmm0 + vpinsrd $2, %ecx, %xmm0, %xmm0 + movl 48(%esp), %edx + movl 52(%esp), %ecx + shrl $29, %edx + shrl $29, %ecx + vpinsrd $0x01, %edx, %xmm0, %xmm0 + vpinsrd $3, %ecx, %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm1 + vpshufd $0x4e, %xmm6, %xmm2 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm0 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm6, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm0, %xmm0 + vpxor %xmm1, %xmm3, %xmm6 + vpslld $31, %xmm0, %xmm1 + vpslld $30, %xmm0, %xmm2 + vpslld $25, %xmm0, %xmm3 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpsrldq $4, %xmm1, %xmm3 + vpslldq $12, %xmm1, %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vpsrld $0x01, %xmm0, %xmm1 + vpsrld $2, %xmm0, %xmm2 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpsrld $7, %xmm0, %xmm0 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 + vpshufb L_aes_gcm_avx1_bswap_mask, %xmm6, %xmm6 + vpxor %xmm7, %xmm6, %xmm0 + movl 40(%esp), %esi + movl 64(%esp), %edi + cmpl $16, 44(%esp) + je L_AES_GCM_decrypt_final_avx1_cmp_tag_16 + subl $16, %esp + xorl %ecx, %ecx + xorl %ebx, %ebx + vmovdqu %xmm0, (%esp) +L_AES_GCM_decrypt_final_avx1_cmp_tag_loop: + movzbl (%esp,%ecx,1), %eax + xorb (%esi,%ecx,1), %al + orb %al, %bl + incl %ecx + cmpl 44(%esp), %ecx + jne L_AES_GCM_decrypt_final_avx1_cmp_tag_loop + cmpb $0x00, %bl + sete %bl + addl $16, %esp + xorl %ecx, %ecx + jmp L_AES_GCM_decrypt_final_avx1_cmp_tag_done +L_AES_GCM_decrypt_final_avx1_cmp_tag_16: + vmovdqu (%esi), %xmm1 + vpcmpeqb %xmm1, %xmm0, %xmm0 + vpmovmskb %xmm0, %edx + # %%edx == 0xFFFF then return 1 else => return 0 + xorl %ebx, %ebx + cmpl $0xffff, %edx + sete %bl +L_AES_GCM_decrypt_final_avx1_cmp_tag_done: + movl %ebx, (%edi) + addl $16, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_decrypt_final_avx1,.-AES_GCM_decrypt_final_avx1 +#endif /* WOLFSSL_AESGCM_STREAM */ +#endif /* HAVE_INTEL_AVX1 */ +#ifdef HAVE_INTEL_AVX2 +.text +.globl AES_GCM_encrypt_avx2 +.type AES_GCM_encrypt_avx2,@function +.align 16 +AES_GCM_encrypt_avx2: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $0x70, %esp + movl 144(%esp), %esi + movl 168(%esp), %ebp + movl 160(%esp), %edx + vpxor %xmm4, %xmm4, %xmm4 + cmpl $12, %edx + je L_AES_GCM_encrypt_avx2_iv_12 + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + vmovdqu (%ebp), %xmm5 + vaesenc 16(%ebp), %xmm5, %xmm5 + vaesenc 32(%ebp), %xmm5, %xmm5 + vaesenc 48(%ebp), %xmm5, %xmm5 + vaesenc 64(%ebp), %xmm5, %xmm5 + vaesenc 80(%ebp), %xmm5, %xmm5 + vaesenc 96(%ebp), %xmm5, %xmm5 + vaesenc 112(%ebp), %xmm5, %xmm5 + vaesenc 128(%ebp), %xmm5, %xmm5 + vaesenc 144(%ebp), %xmm5, %xmm5 + cmpl $11, 172(%esp) + vmovdqu 160(%ebp), %xmm0 + jl L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc 176(%ebp), %xmm5, %xmm5 + cmpl $13, 172(%esp) + vmovdqu 192(%ebp), %xmm0 + jl L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc 208(%ebp), %xmm5, %xmm5 + vmovdqu 224(%ebp), %xmm0 +L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last: + vaesenclast %xmm0, %xmm5, %xmm5 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm5, %xmm5 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movl $0x00, %ecx + je L_AES_GCM_encrypt_avx2_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_encrypt_avx2_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_encrypt_avx2_calc_iv_16_loop: + vmovdqu (%esi,%ecx,1), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx2_calc_iv_16_loop + movl 160(%esp), %edx + cmpl %edx, %ecx + je L_AES_GCM_encrypt_avx2_calc_iv_done +L_AES_GCM_encrypt_avx2_calc_iv_lt16: + vpxor %xmm0, %xmm0, %xmm0 + xorl %ebx, %ebx + vmovdqu %xmm0, (%esp) +L_AES_GCM_encrypt_avx2_calc_iv_loop: + movzbl (%esi,%ecx,1), %eax + movb %al, (%esp,%ebx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx2_calc_iv_loop + vmovdqu (%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 +L_AES_GCM_encrypt_avx2_calc_iv_done: + # T = Encrypt counter + vpxor %xmm0, %xmm0, %xmm0 + shll $3, %edx + vpinsrd $0x00, %edx, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm4, %xmm4 + # Encrypt counter + vmovdqu (%ebp), %xmm6 + vpxor %xmm4, %xmm6, %xmm6 + vaesenc 16(%ebp), %xmm6, %xmm6 + vaesenc 32(%ebp), %xmm6, %xmm6 + vaesenc 48(%ebp), %xmm6, %xmm6 + vaesenc 64(%ebp), %xmm6, %xmm6 + vaesenc 80(%ebp), %xmm6, %xmm6 + vaesenc 96(%ebp), %xmm6, %xmm6 + vaesenc 112(%ebp), %xmm6, %xmm6 + vaesenc 128(%ebp), %xmm6, %xmm6 + vaesenc 144(%ebp), %xmm6, %xmm6 + cmpl $11, 172(%esp) + vmovdqu 160(%ebp), %xmm0 + jl L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last + vaesenc %xmm0, %xmm6, %xmm6 + vaesenc 176(%ebp), %xmm6, %xmm6 + cmpl $13, 172(%esp) + vmovdqu 192(%ebp), %xmm0 + jl L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last + vaesenc %xmm0, %xmm6, %xmm6 + vaesenc 208(%ebp), %xmm6, %xmm6 + vmovdqu 224(%ebp), %xmm0 +L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last: + vaesenclast %xmm0, %xmm6, %xmm6 + jmp L_AES_GCM_encrypt_avx2_iv_done +L_AES_GCM_encrypt_avx2_iv_12: + # # Calculate values when IV is 12 bytes + # Set counter based on IV + vmovdqu L_avx2_aes_gcm_bswap_one, %xmm4 + vmovdqu (%ebp), %xmm5 + vpblendd $7, (%esi), %xmm4, %xmm4 + # H = Encrypt X(=0) and T = Encrypt counter + vmovdqu 16(%ebp), %xmm7 + vpxor %xmm5, %xmm4, %xmm6 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm6, %xmm6 + vmovdqu 32(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 48(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 64(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 80(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 96(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 112(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 128(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 144(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + cmpl $11, 172(%esp) + vmovdqu 160(%ebp), %xmm0 + jl L_AES_GCM_encrypt_avx2_calc_iv_12_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 176(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + cmpl $13, 172(%esp) + vmovdqu 192(%ebp), %xmm0 + jl L_AES_GCM_encrypt_avx2_calc_iv_12_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 208(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 224(%ebp), %xmm0 +L_AES_GCM_encrypt_avx2_calc_iv_12_last: + vaesenclast %xmm0, %xmm5, %xmm5 + vaesenclast %xmm0, %xmm6, %xmm6 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm5, %xmm5 +L_AES_GCM_encrypt_avx2_iv_done: + vmovdqu %xmm6, 80(%esp) + vpxor %xmm6, %xmm6, %xmm6 + movl 140(%esp), %esi + # Additional authentication data + movl 156(%esp), %edx + cmpl $0x00, %edx + je L_AES_GCM_encrypt_avx2_calc_aad_done + xorl %ecx, %ecx + cmpl $16, %edx + jl L_AES_GCM_encrypt_avx2_calc_aad_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_encrypt_avx2_calc_aad_16_loop: + vmovdqu (%esi,%ecx,1), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm6, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm6, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm6 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm6, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm6, %xmm6 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm6, %xmm6 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm6, %xmm6 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx2_calc_aad_16_loop + movl 156(%esp), %edx + cmpl %edx, %ecx + je L_AES_GCM_encrypt_avx2_calc_aad_done +L_AES_GCM_encrypt_avx2_calc_aad_lt16: + vpxor %xmm0, %xmm0, %xmm0 + xorl %ebx, %ebx + vmovdqu %xmm0, (%esp) +L_AES_GCM_encrypt_avx2_calc_aad_loop: + movzbl (%esi,%ecx,1), %eax + movb %al, (%esp,%ebx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_encrypt_avx2_calc_aad_loop + vmovdqu (%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm6, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm6, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm6 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm6, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm6, %xmm6 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm6, %xmm6 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm6, %xmm6 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 +L_AES_GCM_encrypt_avx2_calc_aad_done: + movl 132(%esp), %esi + movl 136(%esp), %edi + # Calculate counter and H + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpshufb L_aes_gcm_avx2_bswap_epi64, %xmm4, %xmm4 + vpand L_aes_gcm_avx2_mod2_128, %xmm5, %xmm5 + vpaddd L_aes_gcm_avx2_one, %xmm4, %xmm4 + vpxor %xmm0, %xmm5, %xmm5 + xorl %ebx, %ebx + cmpl $0x40, 152(%esp) + movl 152(%esp), %eax + jl L_AES_GCM_encrypt_avx2_done_64 + andl $0xffffffc0, %eax + vmovdqu %xmm4, 64(%esp) + vmovdqu %xmm6, 96(%esp) + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm3 + # H ^ 1 + vmovdqu %xmm5, (%esp) + vmovdqu %xmm5, %xmm2 + # H ^ 2 + vpclmulqdq $0x00, %xmm2, %xmm2, %xmm5 + vpclmulqdq $0x11, %xmm2, %xmm2, %xmm6 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm6, %xmm0 + vmovdqu %xmm0, 16(%esp) + # H ^ 3 + # ghash_gfmul_red + vpclmulqdq $16, %xmm0, %xmm2, %xmm6 + vpclmulqdq $0x01, %xmm0, %xmm2, %xmm5 + vpclmulqdq $0x00, %xmm0, %xmm2, %xmm4 + vpxor %xmm5, %xmm6, %xmm6 + vpslldq $8, %xmm6, %xmm5 + vpsrldq $8, %xmm6, %xmm6 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm0, %xmm2, %xmm1 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu %xmm1, 32(%esp) + # H ^ 4 + vpclmulqdq $0x00, %xmm0, %xmm0, %xmm5 + vpclmulqdq $0x11, %xmm0, %xmm0, %xmm6 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm6, %xmm2 + vmovdqu %xmm2, 48(%esp) + vmovdqu 96(%esp), %xmm6 + # First 64 bytes of input + # aesenc_64 + # aesenc_ctr + vmovdqu 64(%esp), %xmm4 + vmovdqu L_aes_gcm_avx2_bswap_epi64, %xmm7 + vpaddd L_aes_gcm_avx2_one, %xmm4, %xmm1 + vpshufb %xmm7, %xmm4, %xmm0 + vpaddd L_aes_gcm_avx2_two, %xmm4, %xmm2 + vpshufb %xmm7, %xmm1, %xmm1 + vpaddd L_aes_gcm_avx2_three, %xmm4, %xmm3 + vpshufb %xmm7, %xmm2, %xmm2 + vpaddd L_aes_gcm_avx2_four, %xmm4, %xmm4 + vpshufb %xmm7, %xmm3, %xmm3 + # aesenc_xor + vmovdqu (%ebp), %xmm7 + vmovdqu %xmm4, 64(%esp) + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm7, %xmm3, %xmm3 + vmovdqu 16(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 32(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 48(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 64(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 80(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 96(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 112(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 128(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 144(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $11, 172(%esp) + vmovdqu 160(%ebp), %xmm7 + jl L_AES_GCM_encrypt_avx2_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 176(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $13, 172(%esp) + vmovdqu 192(%ebp), %xmm7 + jl L_AES_GCM_encrypt_avx2_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 208(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 224(%ebp), %xmm7 +L_AES_GCM_encrypt_avx2_aesenc_64_enc_done: + # aesenc_last + vaesenclast %xmm7, %xmm0, %xmm0 + vaesenclast %xmm7, %xmm1, %xmm1 + vaesenclast %xmm7, %xmm2, %xmm2 + vaesenclast %xmm7, %xmm3, %xmm3 + vmovdqu (%esi), %xmm7 + vmovdqu 16(%esi), %xmm4 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu %xmm0, (%edi) + vmovdqu %xmm1, 16(%edi) + vmovdqu 32(%esi), %xmm7 + vmovdqu 48(%esi), %xmm4 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm4, %xmm3, %xmm3 + vmovdqu %xmm2, 32(%edi) + vmovdqu %xmm3, 48(%edi) + cmpl $0x40, %eax + movl $0x40, %ebx + movl %esi, %ecx + movl %edi, %edx + jle L_AES_GCM_encrypt_avx2_end_64 + # More 64 bytes of input +L_AES_GCM_encrypt_avx2_ghash_64: + # aesenc_64_ghash + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + # aesenc_64 + # aesenc_ctr + vmovdqu 64(%esp), %xmm4 + vmovdqu L_aes_gcm_avx2_bswap_epi64, %xmm7 + vpaddd L_aes_gcm_avx2_one, %xmm4, %xmm1 + vpshufb %xmm7, %xmm4, %xmm0 + vpaddd L_aes_gcm_avx2_two, %xmm4, %xmm2 + vpshufb %xmm7, %xmm1, %xmm1 + vpaddd L_aes_gcm_avx2_three, %xmm4, %xmm3 + vpshufb %xmm7, %xmm2, %xmm2 + vpaddd L_aes_gcm_avx2_four, %xmm4, %xmm4 + vpshufb %xmm7, %xmm3, %xmm3 + # aesenc_xor + vmovdqu (%ebp), %xmm7 + vmovdqu %xmm4, 64(%esp) + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm7, %xmm3, %xmm3 + vmovdqu 16(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 32(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 48(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 64(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 80(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 96(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 112(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 128(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 144(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $11, 172(%esp) + vmovdqu 160(%ebp), %xmm7 + jl L_AES_GCM_encrypt_avx2_aesenc_64_ghash_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 176(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $13, 172(%esp) + vmovdqu 192(%ebp), %xmm7 + jl L_AES_GCM_encrypt_avx2_aesenc_64_ghash_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 208(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 224(%ebp), %xmm7 +L_AES_GCM_encrypt_avx2_aesenc_64_ghash_aesenc_64_enc_done: + # aesenc_last + vaesenclast %xmm7, %xmm0, %xmm0 + vaesenclast %xmm7, %xmm1, %xmm1 + vaesenclast %xmm7, %xmm2, %xmm2 + vaesenclast %xmm7, %xmm3, %xmm3 + vmovdqu (%ecx), %xmm7 + vmovdqu 16(%ecx), %xmm4 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu %xmm0, (%edx) + vmovdqu %xmm1, 16(%edx) + vmovdqu 32(%ecx), %xmm7 + vmovdqu 48(%ecx), %xmm4 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm4, %xmm3, %xmm3 + vmovdqu %xmm2, 32(%edx) + vmovdqu %xmm3, 48(%edx) + # pclmul_1 + vmovdqu -64(%edx), %xmm1 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vmovdqu 48(%esp), %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm5 + vpclmulqdq $0x01, %xmm2, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7 + # pclmul_2 + vmovdqu -48(%edx), %xmm1 + vmovdqu 32(%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # pclmul_n + vmovdqu -32(%edx), %xmm1 + vmovdqu 16(%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # pclmul_n + vmovdqu -16(%edx), %xmm1 + vmovdqu (%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # aesenc_pclmul_l + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm3, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm1 + vpsrldq $8, %xmm5, %xmm5 + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm0 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm5, %xmm7, %xmm7 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm7, %xmm6, %xmm6 + # aesenc_64_ghash - end + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_encrypt_avx2_ghash_64 +L_AES_GCM_encrypt_avx2_end_64: + vmovdqu %xmm6, 96(%esp) + vmovdqu 48(%edx), %xmm3 + vmovdqu (%esp), %xmm7 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm3, %xmm3 + vpclmulqdq $16, %xmm3, %xmm7, %xmm5 + vpclmulqdq $0x01, %xmm3, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm3, %xmm7, %xmm4 + vpclmulqdq $0x11, %xmm3, %xmm7, %xmm6 + vpxor %xmm1, %xmm5, %xmm5 + vmovdqu 32(%edx), %xmm3 + vmovdqu 16(%esp), %xmm7 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm3, %xmm3 + vpclmulqdq $16, %xmm3, %xmm7, %xmm2 + vpclmulqdq $0x01, %xmm3, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm3, %xmm7, %xmm0 + vpclmulqdq $0x11, %xmm3, %xmm7, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vmovdqu 16(%edx), %xmm3 + vmovdqu 32(%esp), %xmm7 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm3, %xmm3 + vpclmulqdq $16, %xmm3, %xmm7, %xmm2 + vpclmulqdq $0x01, %xmm3, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm3, %xmm7, %xmm0 + vpclmulqdq $0x11, %xmm3, %xmm7, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vmovdqu 96(%esp), %xmm0 + vmovdqu (%edx), %xmm3 + vmovdqu 48(%esp), %xmm7 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm3, %xmm3 + vpxor %xmm0, %xmm3, %xmm3 + vpclmulqdq $16, %xmm3, %xmm7, %xmm2 + vpclmulqdq $0x01, %xmm3, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm3, %xmm7, %xmm0 + vpclmulqdq $0x11, %xmm3, %xmm7, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vpslldq $8, %xmm5, %xmm7 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm7, %xmm4, %xmm4 + vpxor %xmm5, %xmm6, %xmm6 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm4, %xmm0 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 + vmovdqu (%esp), %xmm5 + vmovdqu 64(%esp), %xmm4 +L_AES_GCM_encrypt_avx2_done_64: + cmpl 152(%esp), %ebx + je L_AES_GCM_encrypt_avx2_done_enc + movl 152(%esp), %eax + andl $0xfffffff0, %eax + cmpl %eax, %ebx + jge L_AES_GCM_encrypt_avx2_last_block_done + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + # aesenc_block + vmovdqu %xmm4, %xmm1 + vpshufb L_aes_gcm_avx2_bswap_epi64, %xmm1, %xmm0 + vpaddd L_aes_gcm_avx2_one, %xmm1, %xmm1 + vpxor (%ebp), %xmm0, %xmm0 + vaesenc 16(%ebp), %xmm0, %xmm0 + vaesenc 32(%ebp), %xmm0, %xmm0 + vaesenc 48(%ebp), %xmm0, %xmm0 + vaesenc 64(%ebp), %xmm0, %xmm0 + vaesenc 80(%ebp), %xmm0, %xmm0 + vaesenc 96(%ebp), %xmm0, %xmm0 + vaesenc 112(%ebp), %xmm0, %xmm0 + vaesenc 128(%ebp), %xmm0, %xmm0 + vaesenc 144(%ebp), %xmm0, %xmm0 + cmpl $11, 172(%esp) + vmovdqu 160(%ebp), %xmm2 + jl L_AES_GCM_encrypt_avx2_aesenc_block_aesenc_avx_last + vaesenc %xmm2, %xmm0, %xmm0 + vaesenc 176(%ebp), %xmm0, %xmm0 + cmpl $13, 172(%esp) + vmovdqu 192(%ebp), %xmm2 + jl L_AES_GCM_encrypt_avx2_aesenc_block_aesenc_avx_last + vaesenc %xmm2, %xmm0, %xmm0 + vaesenc 208(%ebp), %xmm0, %xmm0 + vmovdqu 224(%ebp), %xmm2 +L_AES_GCM_encrypt_avx2_aesenc_block_aesenc_avx_last: + vaesenclast %xmm2, %xmm0, %xmm0 + vmovdqu %xmm1, %xmm4 + vmovdqu (%ecx), %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, (%edx) + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + addl $16, %ebx + cmpl %eax, %ebx + jge L_AES_GCM_encrypt_avx2_last_block_ghash +L_AES_GCM_encrypt_avx2_last_block_start: + vpshufb L_aes_gcm_avx2_bswap_epi64, %xmm4, %xmm7 + vpaddd L_aes_gcm_avx2_one, %xmm4, %xmm4 + vmovdqu %xmm4, 64(%esp) + # aesenc_gfmul_sb + vpclmulqdq $0x01, %xmm5, %xmm6, %xmm2 + vpclmulqdq $16, %xmm5, %xmm6, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm1 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm4 + vpxor (%ebp), %xmm7, %xmm7 + vaesenc 16(%ebp), %xmm7, %xmm7 + vpxor %xmm2, %xmm3, %xmm3 + vpslldq $8, %xmm3, %xmm2 + vpsrldq $8, %xmm3, %xmm3 + vaesenc 32(%ebp), %xmm7, %xmm7 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm2, %xmm1 + vaesenc 48(%ebp), %xmm7, %xmm7 + vaesenc 64(%ebp), %xmm7, %xmm7 + vaesenc 80(%ebp), %xmm7, %xmm7 + vpshufd $0x4e, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm2, %xmm1 + vaesenc 96(%ebp), %xmm7, %xmm7 + vaesenc 112(%ebp), %xmm7, %xmm7 + vaesenc 128(%ebp), %xmm7, %xmm7 + vpshufd $0x4e, %xmm2, %xmm2 + vaesenc 144(%ebp), %xmm7, %xmm7 + vpxor %xmm3, %xmm4, %xmm4 + vpxor %xmm4, %xmm2, %xmm2 + vmovdqu 160(%ebp), %xmm0 + cmpl $11, 172(%esp) + jl L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm7, %xmm7 + vaesenc 176(%ebp), %xmm7, %xmm7 + vmovdqu 192(%ebp), %xmm0 + cmpl $13, 172(%esp) + jl L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm7, %xmm7 + vaesenc 208(%ebp), %xmm7, %xmm7 + vmovdqu 224(%ebp), %xmm0 +L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last: + vaesenclast %xmm0, %xmm7, %xmm7 + vmovdqu (%esi,%ebx,1), %xmm3 + vpxor %xmm1, %xmm2, %xmm6 + vpxor %xmm3, %xmm7, %xmm7 + vmovdqu %xmm7, (%edi,%ebx,1) + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm7, %xmm7 + vpxor %xmm7, %xmm6, %xmm6 + vmovdqu 64(%esp), %xmm4 + addl $16, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_encrypt_avx2_last_block_start +L_AES_GCM_encrypt_avx2_last_block_ghash: + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm6, %xmm2 + vpclmulqdq $0x01, %xmm5, %xmm6, %xmm1 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm0 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm6 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm2, %xmm6, %xmm6 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm0, %xmm6, %xmm6 +L_AES_GCM_encrypt_avx2_last_block_done: + movl 152(%esp), %ecx + movl 152(%esp), %edx + andl $15, %ecx + jz L_AES_GCM_encrypt_avx2_done_enc + # aesenc_last15_enc + vpshufb L_aes_gcm_avx2_bswap_epi64, %xmm4, %xmm4 + vpxor (%ebp), %xmm4, %xmm4 + vaesenc 16(%ebp), %xmm4, %xmm4 + vaesenc 32(%ebp), %xmm4, %xmm4 + vaesenc 48(%ebp), %xmm4, %xmm4 + vaesenc 64(%ebp), %xmm4, %xmm4 + vaesenc 80(%ebp), %xmm4, %xmm4 + vaesenc 96(%ebp), %xmm4, %xmm4 + vaesenc 112(%ebp), %xmm4, %xmm4 + vaesenc 128(%ebp), %xmm4, %xmm4 + vaesenc 144(%ebp), %xmm4, %xmm4 + cmpl $11, 172(%esp) + vmovdqu 160(%ebp), %xmm0 + jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last + vaesenc %xmm0, %xmm4, %xmm4 + vaesenc 176(%ebp), %xmm4, %xmm4 + cmpl $13, 172(%esp) + vmovdqu 192(%ebp), %xmm0 + jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last + vaesenc %xmm0, %xmm4, %xmm4 + vaesenc 208(%ebp), %xmm4, %xmm4 + vmovdqu 224(%ebp), %xmm0 +L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last: + vaesenclast %xmm0, %xmm4, %xmm4 + xorl %ecx, %ecx + vpxor %xmm0, %xmm0, %xmm0 + vmovdqu %xmm4, (%esp) + vmovdqu %xmm0, 16(%esp) +L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_loop: + movzbl (%esi,%ebx,1), %eax + xorb (%esp,%ecx,1), %al + movb %al, 16(%esp,%ecx,1) + movb %al, (%edi,%ebx,1) + incl %ebx + incl %ecx + cmpl %edx, %ebx + jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_loop +L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_finish_enc: + vmovdqu 16(%esp), %xmm4 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm4, %xmm4 + vpxor %xmm4, %xmm6, %xmm6 + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm6, %xmm2 + vpclmulqdq $0x01, %xmm5, %xmm6, %xmm1 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm0 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm6 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm2, %xmm6, %xmm6 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm0, %xmm6, %xmm6 +L_AES_GCM_encrypt_avx2_done_enc: + vmovdqu 80(%esp), %xmm7 + # calc_tag + movl 152(%esp), %ecx + shll $3, %ecx + vpinsrd $0x00, %ecx, %xmm0, %xmm0 + movl 156(%esp), %ecx + shll $3, %ecx + vpinsrd $2, %ecx, %xmm0, %xmm0 + movl 152(%esp), %ecx + shrl $29, %ecx + vpinsrd $0x01, %ecx, %xmm0, %xmm0 + movl 156(%esp), %ecx + shrl $29, %ecx + vpinsrd $3, %ecx, %xmm0, %xmm0 + vpxor %xmm6, %xmm0, %xmm0 + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm0, %xmm4 + vpclmulqdq $0x01, %xmm5, %xmm0, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm2 + vpxor %xmm3, %xmm4, %xmm4 + vpslldq $8, %xmm4, %xmm3 + vpsrldq $8, %xmm4, %xmm4 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm0 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm4, %xmm0, %xmm0 + vpxor %xmm3, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm7, %xmm0, %xmm0 + movl 148(%esp), %edi + movl 164(%esp), %ebx + # store_tag + cmpl $16, %ebx + je L_AES_GCM_encrypt_avx2_store_tag_16 + xorl %ecx, %ecx + vmovdqu %xmm0, (%esp) +L_AES_GCM_encrypt_avx2_store_tag_loop: + movzbl (%esp,%ecx,1), %eax + movb %al, (%edi,%ecx,1) + incl %ecx + cmpl %ebx, %ecx + jne L_AES_GCM_encrypt_avx2_store_tag_loop + jmp L_AES_GCM_encrypt_avx2_store_tag_done +L_AES_GCM_encrypt_avx2_store_tag_16: + vmovdqu %xmm0, (%edi) +L_AES_GCM_encrypt_avx2_store_tag_done: + addl $0x70, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_encrypt_avx2,.-AES_GCM_encrypt_avx2 +.text +.globl AES_GCM_decrypt_avx2 +.type AES_GCM_decrypt_avx2,@function +.align 16 +AES_GCM_decrypt_avx2: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $0xb0, %esp + movl 208(%esp), %esi + movl 232(%esp), %ebp + vpxor %xmm4, %xmm4, %xmm4 + movl 224(%esp), %edx + cmpl $12, %edx + je L_AES_GCM_decrypt_avx2_iv_12 + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + vmovdqu (%ebp), %xmm5 + vaesenc 16(%ebp), %xmm5, %xmm5 + vaesenc 32(%ebp), %xmm5, %xmm5 + vaesenc 48(%ebp), %xmm5, %xmm5 + vaesenc 64(%ebp), %xmm5, %xmm5 + vaesenc 80(%ebp), %xmm5, %xmm5 + vaesenc 96(%ebp), %xmm5, %xmm5 + vaesenc 112(%ebp), %xmm5, %xmm5 + vaesenc 128(%ebp), %xmm5, %xmm5 + vaesenc 144(%ebp), %xmm5, %xmm5 + cmpl $11, 236(%esp) + vmovdqu 160(%ebp), %xmm0 + jl L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc 176(%ebp), %xmm5, %xmm5 + cmpl $13, 236(%esp) + vmovdqu 192(%ebp), %xmm0 + jl L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc 208(%ebp), %xmm5, %xmm5 + vmovdqu 224(%ebp), %xmm0 +L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last: + vaesenclast %xmm0, %xmm5, %xmm5 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm5, %xmm5 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movl $0x00, %ecx + je L_AES_GCM_decrypt_avx2_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_decrypt_avx2_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_decrypt_avx2_calc_iv_16_loop: + vmovdqu (%esi,%ecx,1), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx2_calc_iv_16_loop + movl 224(%esp), %edx + cmpl %edx, %ecx + je L_AES_GCM_decrypt_avx2_calc_iv_done +L_AES_GCM_decrypt_avx2_calc_iv_lt16: + vpxor %xmm0, %xmm0, %xmm0 + xorl %ebx, %ebx + vmovdqu %xmm0, (%esp) +L_AES_GCM_decrypt_avx2_calc_iv_loop: + movzbl (%esi,%ecx,1), %eax + movb %al, (%esp,%ebx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx2_calc_iv_loop + vmovdqu (%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 +L_AES_GCM_decrypt_avx2_calc_iv_done: + # T = Encrypt counter + vpxor %xmm0, %xmm0, %xmm0 + shll $3, %edx + vpinsrd $0x00, %edx, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm4, %xmm4 + # Encrypt counter + vmovdqu (%ebp), %xmm6 + vpxor %xmm4, %xmm6, %xmm6 + vaesenc 16(%ebp), %xmm6, %xmm6 + vaesenc 32(%ebp), %xmm6, %xmm6 + vaesenc 48(%ebp), %xmm6, %xmm6 + vaesenc 64(%ebp), %xmm6, %xmm6 + vaesenc 80(%ebp), %xmm6, %xmm6 + vaesenc 96(%ebp), %xmm6, %xmm6 + vaesenc 112(%ebp), %xmm6, %xmm6 + vaesenc 128(%ebp), %xmm6, %xmm6 + vaesenc 144(%ebp), %xmm6, %xmm6 + cmpl $11, 236(%esp) + vmovdqu 160(%ebp), %xmm0 + jl L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last + vaesenc %xmm0, %xmm6, %xmm6 + vaesenc 176(%ebp), %xmm6, %xmm6 + cmpl $13, 236(%esp) + vmovdqu 192(%ebp), %xmm0 + jl L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last + vaesenc %xmm0, %xmm6, %xmm6 + vaesenc 208(%ebp), %xmm6, %xmm6 + vmovdqu 224(%ebp), %xmm0 +L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last: + vaesenclast %xmm0, %xmm6, %xmm6 + jmp L_AES_GCM_decrypt_avx2_iv_done +L_AES_GCM_decrypt_avx2_iv_12: + # # Calculate values when IV is 12 bytes + # Set counter based on IV + vmovdqu L_avx2_aes_gcm_bswap_one, %xmm4 + vmovdqu (%ebp), %xmm5 + vpblendd $7, (%esi), %xmm4, %xmm4 + # H = Encrypt X(=0) and T = Encrypt counter + vmovdqu 16(%ebp), %xmm7 + vpxor %xmm5, %xmm4, %xmm6 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm6, %xmm6 + vmovdqu 32(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 48(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 64(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 80(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 96(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 112(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 128(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 144(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + cmpl $11, 236(%esp) + vmovdqu 160(%ebp), %xmm0 + jl L_AES_GCM_decrypt_avx2_calc_iv_12_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 176(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + cmpl $13, 236(%esp) + vmovdqu 192(%ebp), %xmm0 + jl L_AES_GCM_decrypt_avx2_calc_iv_12_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 208(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm6, %xmm6 + vmovdqu 224(%ebp), %xmm0 +L_AES_GCM_decrypt_avx2_calc_iv_12_last: + vaesenclast %xmm0, %xmm5, %xmm5 + vaesenclast %xmm0, %xmm6, %xmm6 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm5, %xmm5 +L_AES_GCM_decrypt_avx2_iv_done: + vmovdqu %xmm6, 80(%esp) + vpxor %xmm6, %xmm6, %xmm6 + movl 204(%esp), %esi + # Additional authentication data + movl 220(%esp), %edx + cmpl $0x00, %edx + je L_AES_GCM_decrypt_avx2_calc_aad_done + xorl %ecx, %ecx + cmpl $16, %edx + jl L_AES_GCM_decrypt_avx2_calc_aad_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_decrypt_avx2_calc_aad_16_loop: + vmovdqu (%esi,%ecx,1), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm6, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm6, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm6 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm6, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm6, %xmm6 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm6, %xmm6 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm6, %xmm6 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx2_calc_aad_16_loop + movl 220(%esp), %edx + cmpl %edx, %ecx + je L_AES_GCM_decrypt_avx2_calc_aad_done +L_AES_GCM_decrypt_avx2_calc_aad_lt16: + vpxor %xmm0, %xmm0, %xmm0 + xorl %ebx, %ebx + vmovdqu %xmm0, (%esp) +L_AES_GCM_decrypt_avx2_calc_aad_loop: + movzbl (%esi,%ecx,1), %eax + movb %al, (%esp,%ebx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_decrypt_avx2_calc_aad_loop + vmovdqu (%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm6, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm6, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm6, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm6, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm7 + vpxor %xmm2, %xmm3, %xmm6 + # ghash_mid + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm6, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm6, %xmm6 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm6, %xmm6 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm6, %xmm6 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm7, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 +L_AES_GCM_decrypt_avx2_calc_aad_done: + movl 196(%esp), %esi + movl 200(%esp), %edi + # Calculate counter and H + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpshufb L_aes_gcm_avx2_bswap_epi64, %xmm4, %xmm4 + vpand L_aes_gcm_avx2_mod2_128, %xmm5, %xmm5 + vpaddd L_aes_gcm_avx2_one, %xmm4, %xmm4 + vpxor %xmm0, %xmm5, %xmm5 + xorl %ebx, %ebx + cmpl $0x40, 216(%esp) + movl 216(%esp), %eax + jl L_AES_GCM_decrypt_avx2_done_64 + andl $0xffffffc0, %eax + vmovdqu %xmm4, 64(%esp) + vmovdqu %xmm6, 96(%esp) + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm3 + # H ^ 1 + vmovdqu %xmm5, (%esp) + vmovdqu %xmm5, %xmm2 + # H ^ 2 + vpclmulqdq $0x00, %xmm2, %xmm2, %xmm5 + vpclmulqdq $0x11, %xmm2, %xmm2, %xmm6 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm6, %xmm0 + vmovdqu %xmm0, 16(%esp) + # H ^ 3 + # ghash_gfmul_red + vpclmulqdq $16, %xmm0, %xmm2, %xmm6 + vpclmulqdq $0x01, %xmm0, %xmm2, %xmm5 + vpclmulqdq $0x00, %xmm0, %xmm2, %xmm4 + vpxor %xmm5, %xmm6, %xmm6 + vpslldq $8, %xmm6, %xmm5 + vpsrldq $8, %xmm6, %xmm6 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm0, %xmm2, %xmm1 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu %xmm1, 32(%esp) + # H ^ 4 + vpclmulqdq $0x00, %xmm0, %xmm0, %xmm5 + vpclmulqdq $0x11, %xmm0, %xmm0, %xmm6 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm6, %xmm2 + vmovdqu %xmm2, 48(%esp) + vmovdqu 96(%esp), %xmm6 + cmpl %esi, %edi + jne L_AES_GCM_decrypt_avx2_ghash_64 +L_AES_GCM_decrypt_avx2_ghash_64_inplace: + # aesenc_64_ghash + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + # aesenc_64 + # aesenc_ctr + vmovdqu 64(%esp), %xmm4 + vmovdqu L_aes_gcm_avx2_bswap_epi64, %xmm7 + vpaddd L_aes_gcm_avx2_one, %xmm4, %xmm1 + vpshufb %xmm7, %xmm4, %xmm0 + vpaddd L_aes_gcm_avx2_two, %xmm4, %xmm2 + vpshufb %xmm7, %xmm1, %xmm1 + vpaddd L_aes_gcm_avx2_three, %xmm4, %xmm3 + vpshufb %xmm7, %xmm2, %xmm2 + vpaddd L_aes_gcm_avx2_four, %xmm4, %xmm4 + vpshufb %xmm7, %xmm3, %xmm3 + # aesenc_xor + vmovdqu (%ebp), %xmm7 + vmovdqu %xmm4, 64(%esp) + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm7, %xmm3, %xmm3 + vmovdqu 16(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 32(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 48(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 64(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 80(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 96(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 112(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 128(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 144(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $11, 236(%esp) + vmovdqu 160(%ebp), %xmm7 + jl L_AES_GCM_decrypt_avx2_inplace_aesenc_64_ghash_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 176(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $13, 236(%esp) + vmovdqu 192(%ebp), %xmm7 + jl L_AES_GCM_decrypt_avx2_inplace_aesenc_64_ghash_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 208(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 224(%ebp), %xmm7 +L_AES_GCM_decrypt_avx2_inplace_aesenc_64_ghash_aesenc_64_enc_done: + # aesenc_last + vaesenclast %xmm7, %xmm0, %xmm0 + vaesenclast %xmm7, %xmm1, %xmm1 + vaesenclast %xmm7, %xmm2, %xmm2 + vaesenclast %xmm7, %xmm3, %xmm3 + vmovdqu (%ecx), %xmm7 + vmovdqu 16(%ecx), %xmm4 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu %xmm7, 112(%esp) + vmovdqu %xmm4, 128(%esp) + vmovdqu %xmm0, (%edx) + vmovdqu %xmm1, 16(%edx) + vmovdqu 32(%ecx), %xmm7 + vmovdqu 48(%ecx), %xmm4 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm4, %xmm3, %xmm3 + vmovdqu %xmm7, 144(%esp) + vmovdqu %xmm4, 160(%esp) + vmovdqu %xmm2, 32(%edx) + vmovdqu %xmm3, 48(%edx) + # pclmul_1 + vmovdqu 112(%esp), %xmm1 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vmovdqu 48(%esp), %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm5 + vpclmulqdq $0x01, %xmm2, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7 + # pclmul_2 + vmovdqu 128(%esp), %xmm1 + vmovdqu 32(%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # pclmul_n + vmovdqu 144(%esp), %xmm1 + vmovdqu 16(%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # pclmul_n + vmovdqu 160(%esp), %xmm1 + vmovdqu (%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # aesenc_pclmul_l + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm3, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm1 + vpsrldq $8, %xmm5, %xmm5 + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm0 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm5, %xmm7, %xmm7 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm7, %xmm6, %xmm6 + # aesenc_64_ghash - end + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_avx2_ghash_64_inplace + jmp L_AES_GCM_decrypt_avx2_ghash_64_done +L_AES_GCM_decrypt_avx2_ghash_64: + # aesenc_64_ghash + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + # aesenc_64 + # aesenc_ctr + vmovdqu 64(%esp), %xmm4 + vmovdqu L_aes_gcm_avx2_bswap_epi64, %xmm7 + vpaddd L_aes_gcm_avx2_one, %xmm4, %xmm1 + vpshufb %xmm7, %xmm4, %xmm0 + vpaddd L_aes_gcm_avx2_two, %xmm4, %xmm2 + vpshufb %xmm7, %xmm1, %xmm1 + vpaddd L_aes_gcm_avx2_three, %xmm4, %xmm3 + vpshufb %xmm7, %xmm2, %xmm2 + vpaddd L_aes_gcm_avx2_four, %xmm4, %xmm4 + vpshufb %xmm7, %xmm3, %xmm3 + # aesenc_xor + vmovdqu (%ebp), %xmm7 + vmovdqu %xmm4, 64(%esp) + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm7, %xmm3, %xmm3 + vmovdqu 16(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 32(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 48(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 64(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 80(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 96(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 112(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 128(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 144(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $11, 236(%esp) + vmovdqu 160(%ebp), %xmm7 + jl L_AES_GCM_decrypt_avx2_aesenc_64_ghash_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 176(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $13, 236(%esp) + vmovdqu 192(%ebp), %xmm7 + jl L_AES_GCM_decrypt_avx2_aesenc_64_ghash_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 208(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 224(%ebp), %xmm7 +L_AES_GCM_decrypt_avx2_aesenc_64_ghash_aesenc_64_enc_done: + # aesenc_last + vaesenclast %xmm7, %xmm0, %xmm0 + vaesenclast %xmm7, %xmm1, %xmm1 + vaesenclast %xmm7, %xmm2, %xmm2 + vaesenclast %xmm7, %xmm3, %xmm3 + vmovdqu (%ecx), %xmm7 + vmovdqu 16(%ecx), %xmm4 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu %xmm7, (%ecx) + vmovdqu %xmm4, 16(%ecx) + vmovdqu %xmm0, (%edx) + vmovdqu %xmm1, 16(%edx) + vmovdqu 32(%ecx), %xmm7 + vmovdqu 48(%ecx), %xmm4 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm4, %xmm3, %xmm3 + vmovdqu %xmm7, 32(%ecx) + vmovdqu %xmm4, 48(%ecx) + vmovdqu %xmm2, 32(%edx) + vmovdqu %xmm3, 48(%edx) + # pclmul_1 + vmovdqu (%ecx), %xmm1 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vmovdqu 48(%esp), %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm5 + vpclmulqdq $0x01, %xmm2, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7 + # pclmul_2 + vmovdqu 16(%ecx), %xmm1 + vmovdqu 32(%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # pclmul_n + vmovdqu 32(%ecx), %xmm1 + vmovdqu 16(%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # pclmul_n + vmovdqu 48(%ecx), %xmm1 + vmovdqu (%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # aesenc_pclmul_l + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm3, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm1 + vpsrldq $8, %xmm5, %xmm5 + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm0 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm5, %xmm7, %xmm7 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm7, %xmm6, %xmm6 + # aesenc_64_ghash - end + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_avx2_ghash_64 +L_AES_GCM_decrypt_avx2_ghash_64_done: + vmovdqu (%esp), %xmm5 + vmovdqu 64(%esp), %xmm4 +L_AES_GCM_decrypt_avx2_done_64: + cmpl 216(%esp), %ebx + jge L_AES_GCM_decrypt_avx2_done_dec + movl 216(%esp), %eax + andl $0xfffffff0, %eax + cmpl %eax, %ebx + jge L_AES_GCM_decrypt_avx2_last_block_done +L_AES_GCM_decrypt_avx2_last_block_start: + vmovdqu (%esi,%ebx,1), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_epi64, %xmm4, %xmm7 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpaddd L_aes_gcm_avx2_one, %xmm4, %xmm4 + vmovdqu %xmm4, 64(%esp) + vpxor %xmm6, %xmm0, %xmm4 + # aesenc_gfmul_sb + vpclmulqdq $0x01, %xmm5, %xmm4, %xmm2 + vpclmulqdq $16, %xmm5, %xmm4, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm1 + vpclmulqdq $0x11, %xmm5, %xmm4, %xmm4 + vpxor (%ebp), %xmm7, %xmm7 + vaesenc 16(%ebp), %xmm7, %xmm7 + vpxor %xmm2, %xmm3, %xmm3 + vpslldq $8, %xmm3, %xmm2 + vpsrldq $8, %xmm3, %xmm3 + vaesenc 32(%ebp), %xmm7, %xmm7 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm2, %xmm1 + vaesenc 48(%ebp), %xmm7, %xmm7 + vaesenc 64(%ebp), %xmm7, %xmm7 + vaesenc 80(%ebp), %xmm7, %xmm7 + vpshufd $0x4e, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm2, %xmm1 + vaesenc 96(%ebp), %xmm7, %xmm7 + vaesenc 112(%ebp), %xmm7, %xmm7 + vaesenc 128(%ebp), %xmm7, %xmm7 + vpshufd $0x4e, %xmm2, %xmm2 + vaesenc 144(%ebp), %xmm7, %xmm7 + vpxor %xmm3, %xmm4, %xmm4 + vpxor %xmm4, %xmm2, %xmm2 + vmovdqu 160(%ebp), %xmm0 + cmpl $11, 236(%esp) + jl L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm7, %xmm7 + vaesenc 176(%ebp), %xmm7, %xmm7 + vmovdqu 192(%ebp), %xmm0 + cmpl $13, 236(%esp) + jl L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm7, %xmm7 + vaesenc 208(%ebp), %xmm7, %xmm7 + vmovdqu 224(%ebp), %xmm0 +L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last: + vaesenclast %xmm0, %xmm7, %xmm7 + vmovdqu (%esi,%ebx,1), %xmm3 + vpxor %xmm1, %xmm2, %xmm6 + vpxor %xmm3, %xmm7, %xmm7 + vmovdqu %xmm7, (%edi,%ebx,1) + vmovdqu 64(%esp), %xmm4 + addl $16, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_avx2_last_block_start +L_AES_GCM_decrypt_avx2_last_block_done: + movl 216(%esp), %ecx + movl 216(%esp), %edx + andl $15, %ecx + jz L_AES_GCM_decrypt_avx2_done_dec + # aesenc_last15_dec + vpshufb L_aes_gcm_avx2_bswap_epi64, %xmm4, %xmm4 + vpxor (%ebp), %xmm4, %xmm4 + vaesenc 16(%ebp), %xmm4, %xmm4 + vaesenc 32(%ebp), %xmm4, %xmm4 + vaesenc 48(%ebp), %xmm4, %xmm4 + vaesenc 64(%ebp), %xmm4, %xmm4 + vaesenc 80(%ebp), %xmm4, %xmm4 + vaesenc 96(%ebp), %xmm4, %xmm4 + vaesenc 112(%ebp), %xmm4, %xmm4 + vaesenc 128(%ebp), %xmm4, %xmm4 + vaesenc 144(%ebp), %xmm4, %xmm4 + cmpl $11, 236(%esp) + vmovdqu 160(%ebp), %xmm1 + jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last + vaesenc %xmm1, %xmm4, %xmm4 + vaesenc 176(%ebp), %xmm4, %xmm4 + cmpl $13, 236(%esp) + vmovdqu 192(%ebp), %xmm1 + jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last + vaesenc %xmm1, %xmm4, %xmm4 + vaesenc 208(%ebp), %xmm4, %xmm4 + vmovdqu 224(%ebp), %xmm1 +L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last: + vaesenclast %xmm1, %xmm4, %xmm4 + xorl %ecx, %ecx + vpxor %xmm0, %xmm0, %xmm0 + vmovdqu %xmm4, (%esp) + vmovdqu %xmm0, 16(%esp) +L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_loop: + movzbl (%esi,%ebx,1), %eax + movb %al, 16(%esp,%ecx,1) + xorb (%esp,%ecx,1), %al + movb %al, (%edi,%ebx,1) + incl %ebx + incl %ecx + cmpl %edx, %ebx + jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_loop + vmovdqu 16(%esp), %xmm4 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm4, %xmm4 + vpxor %xmm4, %xmm6, %xmm6 + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm6, %xmm2 + vpclmulqdq $0x01, %xmm5, %xmm6, %xmm1 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm0 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm6 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm2, %xmm6, %xmm6 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm0, %xmm6, %xmm6 +L_AES_GCM_decrypt_avx2_done_dec: + vmovdqu 80(%esp), %xmm7 + # calc_tag + movl 216(%esp), %ecx + shll $3, %ecx + vpinsrd $0x00, %ecx, %xmm0, %xmm0 + movl 220(%esp), %ecx + shll $3, %ecx + vpinsrd $2, %ecx, %xmm0, %xmm0 + movl 216(%esp), %ecx + shrl $29, %ecx + vpinsrd $0x01, %ecx, %xmm0, %xmm0 + movl 220(%esp), %ecx + shrl $29, %ecx + vpinsrd $3, %ecx, %xmm0, %xmm0 + vpxor %xmm6, %xmm0, %xmm0 + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm0, %xmm4 + vpclmulqdq $0x01, %xmm5, %xmm0, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm2 + vpxor %xmm3, %xmm4, %xmm4 + vpslldq $8, %xmm4, %xmm3 + vpsrldq $8, %xmm4, %xmm4 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm0 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm4, %xmm0, %xmm0 + vpxor %xmm3, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm7, %xmm0, %xmm0 + movl 212(%esp), %edi + movl 228(%esp), %ebx + movl 240(%esp), %ebp + # cmp_tag + cmpl $16, %ebx + je L_AES_GCM_decrypt_avx2_cmp_tag_16 + xorl %edx, %edx + xorl %ecx, %ecx + vmovdqu %xmm0, (%esp) +L_AES_GCM_decrypt_avx2_cmp_tag_loop: + movzbl (%esp,%edx,1), %eax + xorb (%edi,%edx,1), %al + orb %al, %cl + incl %edx + cmpl %ebx, %edx + jne L_AES_GCM_decrypt_avx2_cmp_tag_loop + cmpb $0x00, %cl + sete %cl + jmp L_AES_GCM_decrypt_avx2_cmp_tag_done +L_AES_GCM_decrypt_avx2_cmp_tag_16: + vmovdqu (%edi), %xmm1 + vpcmpeqb %xmm1, %xmm0, %xmm0 + vpmovmskb %xmm0, %edx + # %%edx == 0xFFFF then return 1 else => return 0 + xorl %ecx, %ecx + cmpl $0xffff, %edx + sete %cl +L_AES_GCM_decrypt_avx2_cmp_tag_done: + movl %ecx, (%ebp) + addl $0xb0, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_decrypt_avx2,.-AES_GCM_decrypt_avx2 +#ifdef WOLFSSL_AESGCM_STREAM +.text +.globl AES_GCM_init_avx2 +.type AES_GCM_init_avx2,@function +.align 16 +AES_GCM_init_avx2: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $32, %esp + movl 52(%esp), %ebp + movl 60(%esp), %esi + movl 76(%esp), %edi + vpxor %xmm4, %xmm4, %xmm4 + movl 64(%esp), %edx + cmpl $12, %edx + je L_AES_GCM_init_avx2_iv_12 + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + vmovdqu (%ebp), %xmm5 + vaesenc 16(%ebp), %xmm5, %xmm5 + vaesenc 32(%ebp), %xmm5, %xmm5 + vaesenc 48(%ebp), %xmm5, %xmm5 + vaesenc 64(%ebp), %xmm5, %xmm5 + vaesenc 80(%ebp), %xmm5, %xmm5 + vaesenc 96(%ebp), %xmm5, %xmm5 + vaesenc 112(%ebp), %xmm5, %xmm5 + vaesenc 128(%ebp), %xmm5, %xmm5 + vaesenc 144(%ebp), %xmm5, %xmm5 + cmpl $11, 56(%esp) + vmovdqu 160(%ebp), %xmm0 + jl L_AES_GCM_init_avx2_calc_iv_1_aesenc_avx_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc 176(%ebp), %xmm5, %xmm5 + cmpl $13, 56(%esp) + vmovdqu 192(%ebp), %xmm0 + jl L_AES_GCM_init_avx2_calc_iv_1_aesenc_avx_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc 208(%ebp), %xmm5, %xmm5 + vmovdqu 224(%ebp), %xmm0 +L_AES_GCM_init_avx2_calc_iv_1_aesenc_avx_last: + vaesenclast %xmm0, %xmm5, %xmm5 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm5, %xmm5 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movl $0x00, %ecx + je L_AES_GCM_init_avx2_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_init_avx2_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_init_avx2_calc_iv_16_loop: + vmovdqu (%esi,%ecx,1), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm6 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm6, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm6, %xmm6 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm6, %xmm6 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm6, %xmm0 + vpshufd $0x4e, %xmm6, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_init_avx2_calc_iv_16_loop + movl 64(%esp), %edx + cmpl %edx, %ecx + je L_AES_GCM_init_avx2_calc_iv_done +L_AES_GCM_init_avx2_calc_iv_lt16: + vpxor %xmm0, %xmm0, %xmm0 + xorl %ebx, %ebx + vmovdqu %xmm0, (%esp) +L_AES_GCM_init_avx2_calc_iv_loop: + movzbl (%esi,%ecx,1), %eax + movb %al, (%esp,%ebx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_init_avx2_calc_iv_loop + vmovdqu (%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm6 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm6, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm6, %xmm6 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm6, %xmm6 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm6, %xmm0 + vpshufd $0x4e, %xmm6, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 +L_AES_GCM_init_avx2_calc_iv_done: + # T = Encrypt counter + vpxor %xmm0, %xmm0, %xmm0 + shll $3, %edx + vpinsrd $0x00, %edx, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm6 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm6, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm6, %xmm6 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm6, %xmm6 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm6, %xmm0 + vpshufd $0x4e, %xmm6, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm4, %xmm4 + # Encrypt counter + vmovdqu (%ebp), %xmm7 + vpxor %xmm4, %xmm7, %xmm7 + vaesenc 16(%ebp), %xmm7, %xmm7 + vaesenc 32(%ebp), %xmm7, %xmm7 + vaesenc 48(%ebp), %xmm7, %xmm7 + vaesenc 64(%ebp), %xmm7, %xmm7 + vaesenc 80(%ebp), %xmm7, %xmm7 + vaesenc 96(%ebp), %xmm7, %xmm7 + vaesenc 112(%ebp), %xmm7, %xmm7 + vaesenc 128(%ebp), %xmm7, %xmm7 + vaesenc 144(%ebp), %xmm7, %xmm7 + cmpl $11, 56(%esp) + vmovdqu 160(%ebp), %xmm0 + jl L_AES_GCM_init_avx2_calc_iv_2_aesenc_avx_last + vaesenc %xmm0, %xmm7, %xmm7 + vaesenc 176(%ebp), %xmm7, %xmm7 + cmpl $13, 56(%esp) + vmovdqu 192(%ebp), %xmm0 + jl L_AES_GCM_init_avx2_calc_iv_2_aesenc_avx_last + vaesenc %xmm0, %xmm7, %xmm7 + vaesenc 208(%ebp), %xmm7, %xmm7 + vmovdqu 224(%ebp), %xmm0 +L_AES_GCM_init_avx2_calc_iv_2_aesenc_avx_last: + vaesenclast %xmm0, %xmm7, %xmm7 + jmp L_AES_GCM_init_avx2_iv_done +L_AES_GCM_init_avx2_iv_12: + # # Calculate values when IV is 12 bytes + # Set counter based on IV + vmovdqu L_avx2_aes_gcm_bswap_one, %xmm4 + vmovdqu (%ebp), %xmm5 + vpblendd $7, (%esi), %xmm4, %xmm4 + # H = Encrypt X(=0) and T = Encrypt counter + vmovdqu 16(%ebp), %xmm6 + vpxor %xmm5, %xmm4, %xmm7 + vaesenc %xmm6, %xmm5, %xmm5 + vaesenc %xmm6, %xmm7, %xmm7 + vmovdqu 32(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqu 48(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqu 64(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqu 80(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqu 96(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqu 112(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqu 128(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqu 144(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + cmpl $11, 56(%esp) + vmovdqu 160(%ebp), %xmm0 + jl L_AES_GCM_init_avx2_calc_iv_12_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqu 176(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + cmpl $13, 56(%esp) + vmovdqu 192(%ebp), %xmm0 + jl L_AES_GCM_init_avx2_calc_iv_12_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqu 208(%ebp), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqu 224(%ebp), %xmm0 +L_AES_GCM_init_avx2_calc_iv_12_last: + vaesenclast %xmm0, %xmm5, %xmm5 + vaesenclast %xmm0, %xmm7, %xmm7 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm5, %xmm5 +L_AES_GCM_init_avx2_iv_done: + vmovdqu %xmm7, (%edi) + movl 68(%esp), %ebp + movl 72(%esp), %edi + vpshufb L_aes_gcm_avx2_bswap_epi64, %xmm4, %xmm4 + vpaddd L_aes_gcm_avx2_one, %xmm4, %xmm4 + vmovdqu %xmm5, (%ebp) + vmovdqu %xmm4, (%edi) + addl $32, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_init_avx2,.-AES_GCM_init_avx2 +.text +.globl AES_GCM_aad_update_avx2 +.type AES_GCM_aad_update_avx2,@function +.align 16 +AES_GCM_aad_update_avx2: + pushl %esi + pushl %edi + movl 12(%esp), %esi + movl 16(%esp), %edx + movl 20(%esp), %edi + movl 24(%esp), %eax + vmovdqu (%edi), %xmm4 + vmovdqu (%eax), %xmm5 + xorl %ecx, %ecx +L_AES_GCM_aad_update_avx2_16_loop: + vmovdqu (%esi,%ecx,1), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm6 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm6, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm6, %xmm6 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm6, %xmm6 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm6, %xmm0 + vpshufd $0x4e, %xmm6, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_aad_update_avx2_16_loop + vmovdqu %xmm4, (%edi) + popl %edi + popl %esi + ret +.size AES_GCM_aad_update_avx2,.-AES_GCM_aad_update_avx2 +.text +.globl AES_GCM_encrypt_block_avx2 +.type AES_GCM_encrypt_block_avx2,@function +.align 16 +AES_GCM_encrypt_block_avx2: + pushl %esi + pushl %edi + movl 12(%esp), %ecx + movl 16(%esp), %eax + movl 20(%esp), %edi + movl 24(%esp), %esi + movl 28(%esp), %edx + vmovdqu (%edx), %xmm3 + # aesenc_block + vmovdqu %xmm3, %xmm1 + vpshufb L_aes_gcm_avx2_bswap_epi64, %xmm1, %xmm0 + vpaddd L_aes_gcm_avx2_one, %xmm1, %xmm1 + vpxor (%ecx), %xmm0, %xmm0 + vaesenc 16(%ecx), %xmm0, %xmm0 + vaesenc 32(%ecx), %xmm0, %xmm0 + vaesenc 48(%ecx), %xmm0, %xmm0 + vaesenc 64(%ecx), %xmm0, %xmm0 + vaesenc 80(%ecx), %xmm0, %xmm0 + vaesenc 96(%ecx), %xmm0, %xmm0 + vaesenc 112(%ecx), %xmm0, %xmm0 + vaesenc 128(%ecx), %xmm0, %xmm0 + vaesenc 144(%ecx), %xmm0, %xmm0 + cmpl $11, %eax + vmovdqu 160(%ecx), %xmm2 + jl L_AES_GCM_encrypt_block_avx2_aesenc_block_aesenc_avx_last + vaesenc %xmm2, %xmm0, %xmm0 + vaesenc 176(%ecx), %xmm0, %xmm0 + cmpl $13, %eax + vmovdqu 192(%ecx), %xmm2 + jl L_AES_GCM_encrypt_block_avx2_aesenc_block_aesenc_avx_last + vaesenc %xmm2, %xmm0, %xmm0 + vaesenc 208(%ecx), %xmm0, %xmm0 + vmovdqu 224(%ecx), %xmm2 +L_AES_GCM_encrypt_block_avx2_aesenc_block_aesenc_avx_last: + vaesenclast %xmm2, %xmm0, %xmm0 + vmovdqu %xmm1, %xmm3 + vmovdqu (%esi), %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, (%edi) + vmovdqu %xmm3, (%edx) + popl %edi + popl %esi + ret +.size AES_GCM_encrypt_block_avx2,.-AES_GCM_encrypt_block_avx2 +.text +.globl AES_GCM_ghash_block_avx2 +.type AES_GCM_ghash_block_avx2,@function +.align 16 +AES_GCM_ghash_block_avx2: + movl 4(%esp), %edx + movl 8(%esp), %eax + movl 12(%esp), %ecx + vmovdqu (%eax), %xmm4 + vmovdqu (%ecx), %xmm5 + vmovdqu (%edx), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm6 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm6, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm6, %xmm6 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm6, %xmm6 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm6, %xmm0 + vpshufd $0x4e, %xmm6, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + vmovdqu %xmm4, (%eax) + ret +.size AES_GCM_ghash_block_avx2,.-AES_GCM_ghash_block_avx2 +.text +.globl AES_GCM_encrypt_update_avx2 +.type AES_GCM_encrypt_update_avx2,@function +.align 16 +AES_GCM_encrypt_update_avx2: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $0x60, %esp + movl 144(%esp), %esi + vmovdqu (%esi), %xmm4 + vmovdqu %xmm4, 64(%esp) + movl 136(%esp), %esi + movl 140(%esp), %ebp + vmovdqu (%esi), %xmm6 + vmovdqu (%ebp), %xmm5 + vmovdqu %xmm6, 80(%esp) + movl 116(%esp), %ebp + movl 124(%esp), %edi + movl 128(%esp), %esi + # Calculate H + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_aes_gcm_avx2_mod2_128, %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + xorl %ebx, %ebx + cmpl $0x40, 132(%esp) + movl 132(%esp), %eax + jl L_AES_GCM_encrypt_update_avx2_done_64 + andl $0xffffffc0, %eax + vmovdqu %xmm4, 64(%esp) + vmovdqu %xmm6, 80(%esp) + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm3 + # H ^ 1 + vmovdqu %xmm5, (%esp) + vmovdqu %xmm5, %xmm2 + # H ^ 2 + vpclmulqdq $0x00, %xmm2, %xmm2, %xmm5 + vpclmulqdq $0x11, %xmm2, %xmm2, %xmm6 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm6, %xmm0 + vmovdqu %xmm0, 16(%esp) + # H ^ 3 + # ghash_gfmul_red + vpclmulqdq $16, %xmm0, %xmm2, %xmm6 + vpclmulqdq $0x01, %xmm0, %xmm2, %xmm5 + vpclmulqdq $0x00, %xmm0, %xmm2, %xmm4 + vpxor %xmm5, %xmm6, %xmm6 + vpslldq $8, %xmm6, %xmm5 + vpsrldq $8, %xmm6, %xmm6 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm0, %xmm2, %xmm1 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu %xmm1, 32(%esp) + # H ^ 4 + vpclmulqdq $0x00, %xmm0, %xmm0, %xmm5 + vpclmulqdq $0x11, %xmm0, %xmm0, %xmm6 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm6, %xmm2 + vmovdqu %xmm2, 48(%esp) + vmovdqu 80(%esp), %xmm6 + # First 64 bytes of input + # aesenc_64 + # aesenc_ctr + vmovdqu 64(%esp), %xmm4 + vmovdqu L_aes_gcm_avx2_bswap_epi64, %xmm7 + vpaddd L_aes_gcm_avx2_one, %xmm4, %xmm1 + vpshufb %xmm7, %xmm4, %xmm0 + vpaddd L_aes_gcm_avx2_two, %xmm4, %xmm2 + vpshufb %xmm7, %xmm1, %xmm1 + vpaddd L_aes_gcm_avx2_three, %xmm4, %xmm3 + vpshufb %xmm7, %xmm2, %xmm2 + vpaddd L_aes_gcm_avx2_four, %xmm4, %xmm4 + vpshufb %xmm7, %xmm3, %xmm3 + # aesenc_xor + vmovdqu (%ebp), %xmm7 + vmovdqu %xmm4, 64(%esp) + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm7, %xmm3, %xmm3 + vmovdqu 16(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 32(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 48(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 64(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 80(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 96(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 112(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 128(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 144(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $11, 120(%esp) + vmovdqu 160(%ebp), %xmm7 + jl L_AES_GCM_encrypt_update_avx2_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 176(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $13, 120(%esp) + vmovdqu 192(%ebp), %xmm7 + jl L_AES_GCM_encrypt_update_avx2_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 208(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 224(%ebp), %xmm7 +L_AES_GCM_encrypt_update_avx2_aesenc_64_enc_done: + # aesenc_last + vaesenclast %xmm7, %xmm0, %xmm0 + vaesenclast %xmm7, %xmm1, %xmm1 + vaesenclast %xmm7, %xmm2, %xmm2 + vaesenclast %xmm7, %xmm3, %xmm3 + vmovdqu (%esi), %xmm7 + vmovdqu 16(%esi), %xmm4 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu %xmm0, (%edi) + vmovdqu %xmm1, 16(%edi) + vmovdqu 32(%esi), %xmm7 + vmovdqu 48(%esi), %xmm4 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm4, %xmm3, %xmm3 + vmovdqu %xmm2, 32(%edi) + vmovdqu %xmm3, 48(%edi) + cmpl $0x40, %eax + movl $0x40, %ebx + movl %esi, %ecx + movl %edi, %edx + jle L_AES_GCM_encrypt_update_avx2_end_64 + # More 64 bytes of input +L_AES_GCM_encrypt_update_avx2_ghash_64: + # aesenc_64_ghash + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + # aesenc_64 + # aesenc_ctr + vmovdqu 64(%esp), %xmm4 + vmovdqu L_aes_gcm_avx2_bswap_epi64, %xmm7 + vpaddd L_aes_gcm_avx2_one, %xmm4, %xmm1 + vpshufb %xmm7, %xmm4, %xmm0 + vpaddd L_aes_gcm_avx2_two, %xmm4, %xmm2 + vpshufb %xmm7, %xmm1, %xmm1 + vpaddd L_aes_gcm_avx2_three, %xmm4, %xmm3 + vpshufb %xmm7, %xmm2, %xmm2 + vpaddd L_aes_gcm_avx2_four, %xmm4, %xmm4 + vpshufb %xmm7, %xmm3, %xmm3 + # aesenc_xor + vmovdqu (%ebp), %xmm7 + vmovdqu %xmm4, 64(%esp) + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm7, %xmm3, %xmm3 + vmovdqu 16(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 32(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 48(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 64(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 80(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 96(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 112(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 128(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 144(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $11, 120(%esp) + vmovdqu 160(%ebp), %xmm7 + jl L_AES_GCM_encrypt_update_avx2_aesenc_64_ghash_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 176(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $13, 120(%esp) + vmovdqu 192(%ebp), %xmm7 + jl L_AES_GCM_encrypt_update_avx2_aesenc_64_ghash_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 208(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 224(%ebp), %xmm7 +L_AES_GCM_encrypt_update_avx2_aesenc_64_ghash_aesenc_64_enc_done: + # aesenc_last + vaesenclast %xmm7, %xmm0, %xmm0 + vaesenclast %xmm7, %xmm1, %xmm1 + vaesenclast %xmm7, %xmm2, %xmm2 + vaesenclast %xmm7, %xmm3, %xmm3 + vmovdqu (%ecx), %xmm7 + vmovdqu 16(%ecx), %xmm4 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu %xmm0, (%edx) + vmovdqu %xmm1, 16(%edx) + vmovdqu 32(%ecx), %xmm7 + vmovdqu 48(%ecx), %xmm4 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm4, %xmm3, %xmm3 + vmovdqu %xmm2, 32(%edx) + vmovdqu %xmm3, 48(%edx) + # pclmul_1 + vmovdqu -64(%edx), %xmm1 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vmovdqu 48(%esp), %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm5 + vpclmulqdq $0x01, %xmm2, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7 + # pclmul_2 + vmovdqu -48(%edx), %xmm1 + vmovdqu 32(%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # pclmul_n + vmovdqu -32(%edx), %xmm1 + vmovdqu 16(%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # pclmul_n + vmovdqu -16(%edx), %xmm1 + vmovdqu (%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # aesenc_pclmul_l + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm3, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm1 + vpsrldq $8, %xmm5, %xmm5 + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm0 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm5, %xmm7, %xmm7 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm7, %xmm6, %xmm6 + # aesenc_64_ghash - end + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_encrypt_update_avx2_ghash_64 +L_AES_GCM_encrypt_update_avx2_end_64: + vmovdqu %xmm6, 80(%esp) + vmovdqu 48(%edx), %xmm3 + vmovdqu (%esp), %xmm7 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm3, %xmm3 + vpclmulqdq $16, %xmm3, %xmm7, %xmm5 + vpclmulqdq $0x01, %xmm3, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm3, %xmm7, %xmm4 + vpclmulqdq $0x11, %xmm3, %xmm7, %xmm6 + vpxor %xmm1, %xmm5, %xmm5 + vmovdqu 32(%edx), %xmm3 + vmovdqu 16(%esp), %xmm7 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm3, %xmm3 + vpclmulqdq $16, %xmm3, %xmm7, %xmm2 + vpclmulqdq $0x01, %xmm3, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm3, %xmm7, %xmm0 + vpclmulqdq $0x11, %xmm3, %xmm7, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vmovdqu 16(%edx), %xmm3 + vmovdqu 32(%esp), %xmm7 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm3, %xmm3 + vpclmulqdq $16, %xmm3, %xmm7, %xmm2 + vpclmulqdq $0x01, %xmm3, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm3, %xmm7, %xmm0 + vpclmulqdq $0x11, %xmm3, %xmm7, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vmovdqu 80(%esp), %xmm0 + vmovdqu (%edx), %xmm3 + vmovdqu 48(%esp), %xmm7 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm3, %xmm3 + vpxor %xmm0, %xmm3, %xmm3 + vpclmulqdq $16, %xmm3, %xmm7, %xmm2 + vpclmulqdq $0x01, %xmm3, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm3, %xmm7, %xmm0 + vpclmulqdq $0x11, %xmm3, %xmm7, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vpslldq $8, %xmm5, %xmm7 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm7, %xmm4, %xmm4 + vpxor %xmm5, %xmm6, %xmm6 + # ghash_red + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm2 + vpclmulqdq $16, %xmm2, %xmm4, %xmm0 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 + vmovdqu (%esp), %xmm5 + vmovdqu 64(%esp), %xmm4 +L_AES_GCM_encrypt_update_avx2_done_64: + cmpl 132(%esp), %ebx + je L_AES_GCM_encrypt_update_avx2_done_enc + movl 132(%esp), %eax + andl $0xfffffff0, %eax + cmpl %eax, %ebx + jge L_AES_GCM_encrypt_update_avx2_last_block_done + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + # aesenc_block + vmovdqu %xmm4, %xmm1 + vpshufb L_aes_gcm_avx2_bswap_epi64, %xmm1, %xmm0 + vpaddd L_aes_gcm_avx2_one, %xmm1, %xmm1 + vpxor (%ebp), %xmm0, %xmm0 + vaesenc 16(%ebp), %xmm0, %xmm0 + vaesenc 32(%ebp), %xmm0, %xmm0 + vaesenc 48(%ebp), %xmm0, %xmm0 + vaesenc 64(%ebp), %xmm0, %xmm0 + vaesenc 80(%ebp), %xmm0, %xmm0 + vaesenc 96(%ebp), %xmm0, %xmm0 + vaesenc 112(%ebp), %xmm0, %xmm0 + vaesenc 128(%ebp), %xmm0, %xmm0 + vaesenc 144(%ebp), %xmm0, %xmm0 + cmpl $11, 120(%esp) + vmovdqu 160(%ebp), %xmm2 + jl L_AES_GCM_encrypt_update_avx2_aesenc_block_aesenc_avx_last + vaesenc %xmm2, %xmm0, %xmm0 + vaesenc 176(%ebp), %xmm0, %xmm0 + cmpl $13, 120(%esp) + vmovdqu 192(%ebp), %xmm2 + jl L_AES_GCM_encrypt_update_avx2_aesenc_block_aesenc_avx_last + vaesenc %xmm2, %xmm0, %xmm0 + vaesenc 208(%ebp), %xmm0, %xmm0 + vmovdqu 224(%ebp), %xmm2 +L_AES_GCM_encrypt_update_avx2_aesenc_block_aesenc_avx_last: + vaesenclast %xmm2, %xmm0, %xmm0 + vmovdqu %xmm1, %xmm4 + vmovdqu (%ecx), %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, (%edx) + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + addl $16, %ebx + cmpl %eax, %ebx + jge L_AES_GCM_encrypt_update_avx2_last_block_ghash +L_AES_GCM_encrypt_update_avx2_last_block_start: + vpshufb L_aes_gcm_avx2_bswap_epi64, %xmm4, %xmm7 + vpaddd L_aes_gcm_avx2_one, %xmm4, %xmm4 + vmovdqu %xmm4, 64(%esp) + # aesenc_gfmul_sb + vpclmulqdq $0x01, %xmm5, %xmm6, %xmm2 + vpclmulqdq $16, %xmm5, %xmm6, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm1 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm4 + vpxor (%ebp), %xmm7, %xmm7 + vaesenc 16(%ebp), %xmm7, %xmm7 + vpxor %xmm2, %xmm3, %xmm3 + vpslldq $8, %xmm3, %xmm2 + vpsrldq $8, %xmm3, %xmm3 + vaesenc 32(%ebp), %xmm7, %xmm7 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm2, %xmm1 + vaesenc 48(%ebp), %xmm7, %xmm7 + vaesenc 64(%ebp), %xmm7, %xmm7 + vaesenc 80(%ebp), %xmm7, %xmm7 + vpshufd $0x4e, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm2, %xmm1 + vaesenc 96(%ebp), %xmm7, %xmm7 + vaesenc 112(%ebp), %xmm7, %xmm7 + vaesenc 128(%ebp), %xmm7, %xmm7 + vpshufd $0x4e, %xmm2, %xmm2 + vaesenc 144(%ebp), %xmm7, %xmm7 + vpxor %xmm3, %xmm4, %xmm4 + vpxor %xmm4, %xmm2, %xmm2 + vmovdqu 160(%ebp), %xmm0 + cmpl $11, 120(%esp) + jl L_AES_GCM_encrypt_update_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm7, %xmm7 + vaesenc 176(%ebp), %xmm7, %xmm7 + vmovdqu 192(%ebp), %xmm0 + cmpl $13, 120(%esp) + jl L_AES_GCM_encrypt_update_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm7, %xmm7 + vaesenc 208(%ebp), %xmm7, %xmm7 + vmovdqu 224(%ebp), %xmm0 +L_AES_GCM_encrypt_update_avx2_aesenc_gfmul_sb_last: + vaesenclast %xmm0, %xmm7, %xmm7 + vmovdqu (%esi,%ebx,1), %xmm3 + vpxor %xmm1, %xmm2, %xmm6 + vpxor %xmm3, %xmm7, %xmm7 + vmovdqu %xmm7, (%edi,%ebx,1) + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm7, %xmm7 + vpxor %xmm7, %xmm6, %xmm6 + vmovdqu 64(%esp), %xmm4 + addl $16, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_encrypt_update_avx2_last_block_start +L_AES_GCM_encrypt_update_avx2_last_block_ghash: + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm6, %xmm2 + vpclmulqdq $0x01, %xmm5, %xmm6, %xmm1 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm0 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm6 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm2, %xmm6, %xmm6 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm0, %xmm6, %xmm6 +L_AES_GCM_encrypt_update_avx2_last_block_done: +L_AES_GCM_encrypt_update_avx2_done_enc: + movl 136(%esp), %esi + movl 144(%esp), %edi + vmovdqu %xmm6, (%esi) + vmovdqu %xmm4, (%edi) + addl $0x60, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_encrypt_update_avx2,.-AES_GCM_encrypt_update_avx2 +.text +.globl AES_GCM_encrypt_final_avx2 +.type AES_GCM_encrypt_final_avx2,@function +.align 16 +AES_GCM_encrypt_final_avx2: + pushl %esi + pushl %edi + pushl %ebp + subl $16, %esp + movl 32(%esp), %ebp + movl 52(%esp), %esi + movl 56(%esp), %edi + vmovdqu (%ebp), %xmm4 + vmovdqu (%esi), %xmm5 + vmovdqu (%edi), %xmm6 + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_aes_gcm_avx2_mod2_128, %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + # calc_tag + movl 44(%esp), %ecx + shll $3, %ecx + vpinsrd $0x00, %ecx, %xmm0, %xmm0 + movl 48(%esp), %ecx + shll $3, %ecx + vpinsrd $2, %ecx, %xmm0, %xmm0 + movl 44(%esp), %ecx + shrl $29, %ecx + vpinsrd $0x01, %ecx, %xmm0, %xmm0 + movl 48(%esp), %ecx + shrl $29, %ecx + vpinsrd $3, %ecx, %xmm0, %xmm0 + vpxor %xmm4, %xmm0, %xmm0 + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm0, %xmm7 + vpclmulqdq $0x01, %xmm5, %xmm0, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm2 + vpxor %xmm3, %xmm7, %xmm7 + vpslldq $8, %xmm7, %xmm3 + vpsrldq $8, %xmm7, %xmm7 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm0 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm3, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm6, %xmm0, %xmm0 + movl 36(%esp), %edi + # store_tag + cmpl $16, 40(%esp) + je L_AES_GCM_encrypt_final_avx2_store_tag_16 + xorl %ecx, %ecx + vmovdqu %xmm0, (%esp) +L_AES_GCM_encrypt_final_avx2_store_tag_loop: + movzbl (%esp,%ecx,1), %eax + movb %al, (%edi,%ecx,1) + incl %ecx + cmpl 40(%esp), %ecx + jne L_AES_GCM_encrypt_final_avx2_store_tag_loop + jmp L_AES_GCM_encrypt_final_avx2_store_tag_done +L_AES_GCM_encrypt_final_avx2_store_tag_16: + vmovdqu %xmm0, (%edi) +L_AES_GCM_encrypt_final_avx2_store_tag_done: + addl $16, %esp + popl %ebp + popl %edi + popl %esi + ret +.size AES_GCM_encrypt_final_avx2,.-AES_GCM_encrypt_final_avx2 +.text +.globl AES_GCM_decrypt_update_avx2 +.type AES_GCM_decrypt_update_avx2,@function +.align 16 +AES_GCM_decrypt_update_avx2: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $0xa0, %esp + movl 208(%esp), %esi + vmovdqu (%esi), %xmm4 + movl 200(%esp), %esi + movl 204(%esp), %ebp + vmovdqu (%esi), %xmm6 + vmovdqu (%ebp), %xmm5 + movl 180(%esp), %ebp + movl 188(%esp), %edi + movl 192(%esp), %esi + # Calculate H + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_aes_gcm_avx2_mod2_128, %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + xorl %ebx, %ebx + cmpl $0x40, 196(%esp) + movl 196(%esp), %eax + jl L_AES_GCM_decrypt_update_avx2_done_64 + andl $0xffffffc0, %eax + vmovdqu %xmm4, 64(%esp) + vmovdqu %xmm6, 80(%esp) + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm3 + # H ^ 1 + vmovdqu %xmm5, (%esp) + vmovdqu %xmm5, %xmm2 + # H ^ 2 + vpclmulqdq $0x00, %xmm2, %xmm2, %xmm5 + vpclmulqdq $0x11, %xmm2, %xmm2, %xmm6 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm6, %xmm0 + vmovdqu %xmm0, 16(%esp) + # H ^ 3 + # ghash_gfmul_red + vpclmulqdq $16, %xmm0, %xmm2, %xmm6 + vpclmulqdq $0x01, %xmm0, %xmm2, %xmm5 + vpclmulqdq $0x00, %xmm0, %xmm2, %xmm4 + vpxor %xmm5, %xmm6, %xmm6 + vpslldq $8, %xmm6, %xmm5 + vpsrldq $8, %xmm6, %xmm6 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm0, %xmm2, %xmm1 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu %xmm1, 32(%esp) + # H ^ 4 + vpclmulqdq $0x00, %xmm0, %xmm0, %xmm5 + vpclmulqdq $0x11, %xmm0, %xmm0, %xmm6 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpclmulqdq $16, %xmm3, %xmm5, %xmm4 + vpshufd $0x4e, %xmm5, %xmm5 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm6, %xmm2 + vmovdqu %xmm2, 48(%esp) + vmovdqu 80(%esp), %xmm6 + cmpl %esi, %edi + jne L_AES_GCM_decrypt_update_avx2_ghash_64 +L_AES_GCM_decrypt_update_avx2_ghash_64_inplace: + # aesenc_64_ghash + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + # aesenc_64 + # aesenc_ctr + vmovdqu 64(%esp), %xmm4 + vmovdqu L_aes_gcm_avx2_bswap_epi64, %xmm7 + vpaddd L_aes_gcm_avx2_one, %xmm4, %xmm1 + vpshufb %xmm7, %xmm4, %xmm0 + vpaddd L_aes_gcm_avx2_two, %xmm4, %xmm2 + vpshufb %xmm7, %xmm1, %xmm1 + vpaddd L_aes_gcm_avx2_three, %xmm4, %xmm3 + vpshufb %xmm7, %xmm2, %xmm2 + vpaddd L_aes_gcm_avx2_four, %xmm4, %xmm4 + vpshufb %xmm7, %xmm3, %xmm3 + # aesenc_xor + vmovdqu (%ebp), %xmm7 + vmovdqu %xmm4, 64(%esp) + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm7, %xmm3, %xmm3 + vmovdqu 16(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 32(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 48(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 64(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 80(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 96(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 112(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 128(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 144(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $11, 184(%esp) + vmovdqu 160(%ebp), %xmm7 + jl L_AES_GCM_decrypt_update_avx2_inplace_aesenc_64_ghash_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 176(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $13, 184(%esp) + vmovdqu 192(%ebp), %xmm7 + jl L_AES_GCM_decrypt_update_avx2_inplace_aesenc_64_ghash_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 208(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 224(%ebp), %xmm7 +L_AES_GCM_decrypt_update_avx2_inplace_aesenc_64_ghash_aesenc_64_enc_done: + # aesenc_last + vaesenclast %xmm7, %xmm0, %xmm0 + vaesenclast %xmm7, %xmm1, %xmm1 + vaesenclast %xmm7, %xmm2, %xmm2 + vaesenclast %xmm7, %xmm3, %xmm3 + vmovdqu (%ecx), %xmm7 + vmovdqu 16(%ecx), %xmm4 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu %xmm7, 96(%esp) + vmovdqu %xmm4, 112(%esp) + vmovdqu %xmm0, (%edx) + vmovdqu %xmm1, 16(%edx) + vmovdqu 32(%ecx), %xmm7 + vmovdqu 48(%ecx), %xmm4 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm4, %xmm3, %xmm3 + vmovdqu %xmm7, 128(%esp) + vmovdqu %xmm4, 144(%esp) + vmovdqu %xmm2, 32(%edx) + vmovdqu %xmm3, 48(%edx) + # pclmul_1 + vmovdqu 96(%esp), %xmm1 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vmovdqu 48(%esp), %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm5 + vpclmulqdq $0x01, %xmm2, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7 + # pclmul_2 + vmovdqu 112(%esp), %xmm1 + vmovdqu 32(%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # pclmul_n + vmovdqu 128(%esp), %xmm1 + vmovdqu 16(%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # pclmul_n + vmovdqu 144(%esp), %xmm1 + vmovdqu (%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # aesenc_pclmul_l + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm3, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm1 + vpsrldq $8, %xmm5, %xmm5 + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm0 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm5, %xmm7, %xmm7 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm7, %xmm6, %xmm6 + # aesenc_64_ghash - end + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_update_avx2_ghash_64_inplace + jmp L_AES_GCM_decrypt_update_avx2_ghash_64_done +L_AES_GCM_decrypt_update_avx2_ghash_64: + # aesenc_64_ghash + leal (%esi,%ebx,1), %ecx + leal (%edi,%ebx,1), %edx + # aesenc_64 + # aesenc_ctr + vmovdqu 64(%esp), %xmm4 + vmovdqu L_aes_gcm_avx2_bswap_epi64, %xmm7 + vpaddd L_aes_gcm_avx2_one, %xmm4, %xmm1 + vpshufb %xmm7, %xmm4, %xmm0 + vpaddd L_aes_gcm_avx2_two, %xmm4, %xmm2 + vpshufb %xmm7, %xmm1, %xmm1 + vpaddd L_aes_gcm_avx2_three, %xmm4, %xmm3 + vpshufb %xmm7, %xmm2, %xmm2 + vpaddd L_aes_gcm_avx2_four, %xmm4, %xmm4 + vpshufb %xmm7, %xmm3, %xmm3 + # aesenc_xor + vmovdqu (%ebp), %xmm7 + vmovdqu %xmm4, 64(%esp) + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm7, %xmm3, %xmm3 + vmovdqu 16(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 32(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 48(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 64(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 80(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 96(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 112(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 128(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 144(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $11, 184(%esp) + vmovdqu 160(%ebp), %xmm7 + jl L_AES_GCM_decrypt_update_avx2_aesenc_64_ghash_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 176(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + cmpl $13, 184(%esp) + vmovdqu 192(%ebp), %xmm7 + jl L_AES_GCM_decrypt_update_avx2_aesenc_64_ghash_aesenc_64_enc_done + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 208(%ebp), %xmm7 + vaesenc %xmm7, %xmm0, %xmm0 + vaesenc %xmm7, %xmm1, %xmm1 + vaesenc %xmm7, %xmm2, %xmm2 + vaesenc %xmm7, %xmm3, %xmm3 + vmovdqu 224(%ebp), %xmm7 +L_AES_GCM_decrypt_update_avx2_aesenc_64_ghash_aesenc_64_enc_done: + # aesenc_last + vaesenclast %xmm7, %xmm0, %xmm0 + vaesenclast %xmm7, %xmm1, %xmm1 + vaesenclast %xmm7, %xmm2, %xmm2 + vaesenclast %xmm7, %xmm3, %xmm3 + vmovdqu (%ecx), %xmm7 + vmovdqu 16(%ecx), %xmm4 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqu %xmm7, (%ecx) + vmovdqu %xmm4, 16(%ecx) + vmovdqu %xmm0, (%edx) + vmovdqu %xmm1, 16(%edx) + vmovdqu 32(%ecx), %xmm7 + vmovdqu 48(%ecx), %xmm4 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm4, %xmm3, %xmm3 + vmovdqu %xmm7, 32(%ecx) + vmovdqu %xmm4, 48(%ecx) + vmovdqu %xmm2, 32(%edx) + vmovdqu %xmm3, 48(%edx) + # pclmul_1 + vmovdqu (%ecx), %xmm1 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vmovdqu 48(%esp), %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm5 + vpclmulqdq $0x01, %xmm2, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7 + # pclmul_2 + vmovdqu 16(%ecx), %xmm1 + vmovdqu 32(%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # pclmul_n + vmovdqu 32(%ecx), %xmm1 + vmovdqu 16(%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # pclmul_n + vmovdqu 48(%ecx), %xmm1 + vmovdqu (%esp), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm7, %xmm7 + # aesenc_pclmul_l + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm3, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm1 + vpsrldq $8, %xmm5, %xmm5 + vmovdqu L_aes_gcm_avx2_mod2_128, %xmm0 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm5, %xmm7, %xmm7 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm7, %xmm6, %xmm6 + # aesenc_64_ghash - end + addl $0x40, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_update_avx2_ghash_64 +L_AES_GCM_decrypt_update_avx2_ghash_64_done: + vmovdqu (%esp), %xmm5 + vmovdqu 64(%esp), %xmm4 +L_AES_GCM_decrypt_update_avx2_done_64: + cmpl 196(%esp), %ebx + jge L_AES_GCM_decrypt_update_avx2_done_dec + movl 196(%esp), %eax + andl $0xfffffff0, %eax + cmpl %eax, %ebx + jge L_AES_GCM_decrypt_update_avx2_last_block_done +L_AES_GCM_decrypt_update_avx2_last_block_start: + vmovdqu (%esi,%ebx,1), %xmm0 + vpshufb L_aes_gcm_avx2_bswap_epi64, %xmm4, %xmm7 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpaddd L_aes_gcm_avx2_one, %xmm4, %xmm4 + vmovdqu %xmm4, 64(%esp) + vpxor %xmm6, %xmm0, %xmm4 + # aesenc_gfmul_sb + vpclmulqdq $0x01, %xmm5, %xmm4, %xmm2 + vpclmulqdq $16, %xmm5, %xmm4, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm1 + vpclmulqdq $0x11, %xmm5, %xmm4, %xmm4 + vpxor (%ebp), %xmm7, %xmm7 + vaesenc 16(%ebp), %xmm7, %xmm7 + vpxor %xmm2, %xmm3, %xmm3 + vpslldq $8, %xmm3, %xmm2 + vpsrldq $8, %xmm3, %xmm3 + vaesenc 32(%ebp), %xmm7, %xmm7 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm2, %xmm1 + vaesenc 48(%ebp), %xmm7, %xmm7 + vaesenc 64(%ebp), %xmm7, %xmm7 + vaesenc 80(%ebp), %xmm7, %xmm7 + vpshufd $0x4e, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm2, %xmm1 + vaesenc 96(%ebp), %xmm7, %xmm7 + vaesenc 112(%ebp), %xmm7, %xmm7 + vaesenc 128(%ebp), %xmm7, %xmm7 + vpshufd $0x4e, %xmm2, %xmm2 + vaesenc 144(%ebp), %xmm7, %xmm7 + vpxor %xmm3, %xmm4, %xmm4 + vpxor %xmm4, %xmm2, %xmm2 + vmovdqu 160(%ebp), %xmm0 + cmpl $11, 184(%esp) + jl L_AES_GCM_decrypt_update_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm7, %xmm7 + vaesenc 176(%ebp), %xmm7, %xmm7 + vmovdqu 192(%ebp), %xmm0 + cmpl $13, 184(%esp) + jl L_AES_GCM_decrypt_update_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm7, %xmm7 + vaesenc 208(%ebp), %xmm7, %xmm7 + vmovdqu 224(%ebp), %xmm0 +L_AES_GCM_decrypt_update_avx2_aesenc_gfmul_sb_last: + vaesenclast %xmm0, %xmm7, %xmm7 + vmovdqu (%esi,%ebx,1), %xmm3 + vpxor %xmm1, %xmm2, %xmm6 + vpxor %xmm3, %xmm7, %xmm7 + vmovdqu %xmm7, (%edi,%ebx,1) + vmovdqu 64(%esp), %xmm4 + addl $16, %ebx + cmpl %eax, %ebx + jl L_AES_GCM_decrypt_update_avx2_last_block_start +L_AES_GCM_decrypt_update_avx2_last_block_done: +L_AES_GCM_decrypt_update_avx2_done_dec: + movl 200(%esp), %esi + movl 208(%esp), %edi + vmovdqu 64(%esp), %xmm4 + vmovdqu %xmm6, (%esi) + vmovdqu %xmm4, (%edi) + addl $0xa0, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_decrypt_update_avx2,.-AES_GCM_decrypt_update_avx2 +.text +.globl AES_GCM_decrypt_final_avx2 +.type AES_GCM_decrypt_final_avx2,@function +.align 16 +AES_GCM_decrypt_final_avx2: + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + subl $16, %esp + movl 36(%esp), %ebp + movl 56(%esp), %esi + movl 60(%esp), %edi + vmovdqu (%ebp), %xmm4 + vmovdqu (%esi), %xmm5 + vmovdqu (%edi), %xmm6 + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_aes_gcm_avx2_mod2_128, %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + # calc_tag + movl 48(%esp), %ecx + shll $3, %ecx + vpinsrd $0x00, %ecx, %xmm0, %xmm0 + movl 52(%esp), %ecx + shll $3, %ecx + vpinsrd $2, %ecx, %xmm0, %xmm0 + movl 48(%esp), %ecx + shrl $29, %ecx + vpinsrd $0x01, %ecx, %xmm0, %xmm0 + movl 52(%esp), %ecx + shrl $29, %ecx + vpinsrd $3, %ecx, %xmm0, %xmm0 + vpxor %xmm4, %xmm0, %xmm0 + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm0, %xmm7 + vpclmulqdq $0x01, %xmm5, %xmm0, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm2 + vpxor %xmm3, %xmm7, %xmm7 + vpslldq $8, %xmm7, %xmm3 + vpsrldq $8, %xmm7, %xmm7 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm0 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $16, L_aes_gcm_avx2_mod2_128, %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm3, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vpshufb L_aes_gcm_avx2_bswap_mask, %xmm0, %xmm0 + vpxor %xmm6, %xmm0, %xmm0 + movl 40(%esp), %esi + movl 64(%esp), %edi + # cmp_tag + cmpl $16, 44(%esp) + je L_AES_GCM_decrypt_final_avx2_cmp_tag_16 + xorl %ecx, %ecx + xorl %edx, %edx + vmovdqu %xmm0, (%esp) +L_AES_GCM_decrypt_final_avx2_cmp_tag_loop: + movzbl (%esp,%ecx,1), %eax + xorb (%esi,%ecx,1), %al + orb %al, %dl + incl %ecx + cmpl 44(%esp), %ecx + jne L_AES_GCM_decrypt_final_avx2_cmp_tag_loop + cmpb $0x00, %dl + sete %dl + jmp L_AES_GCM_decrypt_final_avx2_cmp_tag_done +L_AES_GCM_decrypt_final_avx2_cmp_tag_16: + vmovdqu (%esi), %xmm1 + vpcmpeqb %xmm1, %xmm0, %xmm0 + vpmovmskb %xmm0, %ecx + # %%edx == 0xFFFF then return 1 else => return 0 + xorl %edx, %edx + cmpl $0xffff, %ecx + sete %dl +L_AES_GCM_decrypt_final_avx2_cmp_tag_done: + movl %edx, (%edi) + addl $16, %esp + popl %ebp + popl %edi + popl %esi + popl %ebx + ret +.size AES_GCM_decrypt_final_avx2,.-AES_GCM_decrypt_final_avx2 +#endif /* WOLFSSL_AESGCM_STREAM */ +#endif /* HAVE_INTEL_AVX2 */ + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/wolfcrypt/src/chacha_asm.S b/wolfcrypt/src/chacha_asm.S index f85cb25a2..49fb1ec3e 100644 --- a/wolfcrypt/src/chacha_asm.S +++ b/wolfcrypt/src/chacha_asm.S @@ -30,6 +30,7 @@ #define HAVE_INTEL_AVX2 #endif /* NO_AVX2_SUPPORT */ +#ifdef WOLFSSL_X86_64_BUILD #ifndef __APPLE__ .text .globl chacha_encrypt_x64 @@ -1430,6 +1431,7 @@ L_chacha20_avx2_end256: .size chacha_encrypt_avx2,.-chacha_encrypt_avx2 #endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ +#endif /* WOLFSSL_X86_64_BUILD */ #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/wolfcrypt/src/poly1305.c b/wolfcrypt/src/poly1305.c index 0bdfc447f..fbebaba5e 100644 --- a/wolfcrypt/src/poly1305.c +++ b/wolfcrypt/src/poly1305.c @@ -55,7 +55,7 @@ and Daniel J. Bernstein #pragma warning(disable: 4127) #endif -#ifdef USE_INTEL_SPEEDUP +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) #include #include @@ -77,12 +77,13 @@ and Daniel J. Bernstein #endif #endif -#ifdef USE_INTEL_SPEEDUP +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) static word32 intel_flags = 0; static word32 cpu_flags_set = 0; #endif -#if defined(USE_INTEL_SPEEDUP) || defined(POLY130564) +#if (defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP)) || \ + defined(POLY130564) #if defined(_MSC_VER) #define POLY1305_NOINLINE __declspec(noinline) #elif defined(__GNUC__) @@ -122,7 +123,7 @@ static word32 cpu_flags_set = 0; #endif #endif -#ifdef USE_INTEL_SPEEDUP +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) #ifdef __cplusplus extern "C" { #endif @@ -265,7 +266,7 @@ with a given ctx pointer to a Poly1305 structure. static int poly1305_blocks(Poly1305* ctx, const unsigned char *m, size_t bytes) { -#ifdef USE_INTEL_SPEEDUP +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) /* AVX2 is handled in wc_Poly1305Update. */ SAVE_VECTOR_REGISTERS(return _svr_ret;); poly1305_blocks_avx(ctx, m, bytes); @@ -399,7 +400,7 @@ number of bytes is less than the block size. */ static int poly1305_block(Poly1305* ctx, const unsigned char *m) { -#ifdef USE_INTEL_SPEEDUP +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) /* No call to poly1305_block when AVX2, AVX2 does 4 blocks at a time. */ SAVE_VECTOR_REGISTERS(return _svr_ret;); poly1305_block_avx(ctx, m); @@ -414,7 +415,8 @@ static int poly1305_block(Poly1305* ctx, const unsigned char *m) #if !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz) { -#if defined(POLY130564) && !defined(USE_INTEL_SPEEDUP) +#if defined(POLY130564) && \ + !(defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP)) word64 t0,t1; #endif @@ -435,7 +437,7 @@ int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz) if (keySz != 32 || ctx == NULL) return BAD_FUNC_ARG; -#ifdef USE_INTEL_SPEEDUP +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) if (!cpu_flags_set) { intel_flags = cpuid_get_flags(); cpu_flags_set = 1; @@ -502,7 +504,7 @@ int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz) int wc_Poly1305Final(Poly1305* ctx, byte* mac) { -#ifdef USE_INTEL_SPEEDUP +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) #elif defined(POLY130564) word64 h0,h1,h2,c; @@ -521,7 +523,7 @@ int wc_Poly1305Final(Poly1305* ctx, byte* mac) if (ctx == NULL || mac == NULL) return BAD_FUNC_ARG; -#ifdef USE_INTEL_SPEEDUP +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) SAVE_VECTOR_REGISTERS(return _svr_ret;); #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_AVX2(intel_flags)) @@ -707,7 +709,7 @@ int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes) printf("\n"); #endif -#ifdef USE_INTEL_SPEEDUP +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_AVX2(intel_flags)) { SAVE_VECTOR_REGISTERS(return _svr_ret;); diff --git a/wolfcrypt/src/poly1305_asm.S b/wolfcrypt/src/poly1305_asm.S index db4222b8d..b0dcbe660 100644 --- a/wolfcrypt/src/poly1305_asm.S +++ b/wolfcrypt/src/poly1305_asm.S @@ -30,6 +30,7 @@ #define HAVE_INTEL_AVX2 #endif /* NO_AVX2_SUPPORT */ +#ifdef WOLFSSL_X86_64_BUILD #ifdef HAVE_INTEL_AVX1 #ifndef __APPLE__ .text @@ -1107,6 +1108,7 @@ L_poly1305_avx2_final_cmp_copy: .size poly1305_final_avx2,.-poly1305_final_avx2 #endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ +#endif /* WOLFSSL_X86_64_BUILD */ #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index 3be80672b..53621f5f5 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -174,7 +174,7 @@ on the specific device platform. #endif -#if defined(USE_INTEL_SPEEDUP) +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) #if defined(__GNUC__) && ((__GNUC__ < 4) || \ (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) #undef NO_AVX2_SUPPORT @@ -194,7 +194,7 @@ on the specific device platform. #else #undef HAVE_INTEL_AVX1 #undef HAVE_INTEL_AVX2 -#endif /* USE_INTEL_SPEEDUP */ +#endif /* WOLFSSL_X86_64_BUILD && USE_INTEL_SPEEDUP */ #if defined(HAVE_INTEL_AVX2) #define HAVE_INTEL_RORX @@ -253,8 +253,8 @@ static int InitSha256(wc_Sha256* sha256) /* Hardware Acceleration */ -#if defined(USE_INTEL_SPEEDUP) && (defined(HAVE_INTEL_AVX1) || \ - defined(HAVE_INTEL_AVX2)) +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ + (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) /* in case intel instructions aren't available, plus we need the K[] global */ #define NEED_SOFT_SHA256 @@ -1072,7 +1072,8 @@ static int InitSha256(wc_Sha256* sha256) if (sha256->buffLen == WC_SHA256_BLOCK_SIZE) { #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - #if defined(USE_INTEL_SPEEDUP) && \ + #if defined(WOLFSSL_X86_64_BUILD) && \ + defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) #endif @@ -1107,7 +1108,7 @@ static int InitSha256(wc_Sha256* sha256) /* process blocks */ #ifdef XTRANSFORM_LEN - #if defined(USE_INTEL_SPEEDUP) && \ + #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) if (Transform_Sha256_Len_p != NULL) #endif @@ -1123,13 +1124,14 @@ static int InitSha256(wc_Sha256* sha256) len -= blocksLen; } } - #if defined(USE_INTEL_SPEEDUP) && \ + #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) else #endif #endif /* XTRANSFORM_LEN */ - #if !defined(XTRANSFORM_LEN) || (defined(USE_INTEL_SPEEDUP) && \ - (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))) + #if !defined(XTRANSFORM_LEN) || \ + (defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ + (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))) { while (len >= WC_SHA256_BLOCK_SIZE) { word32* local32 = sha256->buffer; @@ -1137,7 +1139,8 @@ static int InitSha256(wc_Sha256* sha256) /* Intel transform function requires use of sha256->buffer */ /* Little Endian requires byte swap, so can't use data directly */ #if defined(WC_HASH_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER) && \ - !(defined(USE_INTEL_SPEEDUP) && \ + !(defined(WOLFSSL_X86_64_BUILD) && \ + defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))) if (((wc_ptr_t)data % WC_HASH_DATA_ALIGNMENT) == 0) { local32 = (word32*)data; @@ -1152,7 +1155,8 @@ static int InitSha256(wc_Sha256* sha256) len -= WC_SHA256_BLOCK_SIZE; #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - #if defined(USE_INTEL_SPEEDUP) && \ + #if defined(WOLFSSL_X86_64_BUILD) && \ + defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) #endif @@ -1245,7 +1249,7 @@ static int InitSha256(wc_Sha256* sha256) sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen; #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - #if defined(USE_INTEL_SPEEDUP) && \ + #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) #endif @@ -1283,7 +1287,7 @@ static int InitSha256(wc_Sha256* sha256) /* store lengths */ #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - #if defined(USE_INTEL_SPEEDUP) && \ + #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) #endif @@ -1297,10 +1301,11 @@ static int InitSha256(wc_Sha256* sha256) XMEMCPY(&local[WC_SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen, sizeof(word32)); - #if defined(FREESCALE_MMCAU_SHA) || (defined(USE_INTEL_SPEEDUP) && \ + #if defined(FREESCALE_MMCAU_SHA) || \ + (defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))) /* Kinetis requires only these bytes reversed */ - #if defined(USE_INTEL_SPEEDUP) && \ + #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags)) #endif @@ -1532,7 +1537,7 @@ static int InitSha256(wc_Sha256* sha256) sha224->loLen = 0; sha224->hiLen = 0; - #if defined(USE_INTEL_SPEEDUP) && \ + #if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \ (defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)) /* choose best Transform function under this runtime environment */ Sha256_SetTransform(); diff --git a/wolfcrypt/src/sha256_asm.S b/wolfcrypt/src/sha256_asm.S index ffb4e93d5..402e6eac1 100644 --- a/wolfcrypt/src/sha256_asm.S +++ b/wolfcrypt/src/sha256_asm.S @@ -30,6 +30,7 @@ #define HAVE_INTEL_AVX2 #endif /* NO_AVX2_SUPPORT */ +#ifdef WOLFSSL_X86_64_BUILD #ifdef HAVE_INTEL_AVX1 #ifndef __APPLE__ .data @@ -22655,6 +22656,7 @@ L_sha256_len_avx2_rorx_done: .size Transform_Sha256_AVX2_RORX_Len,.-Transform_Sha256_AVX2_RORX_Len #endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ +#endif /* WOLFSSL_X86_64_BUILD */ #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index 92accfff9..02b8675b7 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -11321,6 +11321,33 @@ WOLFSSL_TEST_SUBROUTINE int aesgcm_test(void) ERROR_OUT(-6394, out); } #endif /* HAVE_AES_DECRYPT */ +#ifdef BENCH_AESGCM_LARGE + /* setup test buffer */ + result = wc_AesGcmEncryptInit(enc, k1, sizeof(k1), iv1, sizeof(iv1)); + if (result != 0) + ERROR_OUT(-6360, out); + result = wc_AesGcmEncryptUpdate(enc, large_output, large_input, + BENCH_AESGCM_LARGE, a, sizeof(a)); + if (result != 0) + ERROR_OUT(-6361, out); + result = wc_AesGcmEncryptFinal(enc, resultT, sizeof(t1)); + if (result != 0) + ERROR_OUT(-6362, out); +#ifdef HAVE_AES_DECRYPT + result = wc_AesGcmDecryptInit(enc, k1, sizeof(k1), iv1, sizeof(iv1)); + if (result != 0) + ERROR_OUT(-6363, out); + result = wc_AesGcmDecryptUpdate(enc, large_outdec, large_output, + BENCH_AESGCM_LARGE, a, sizeof(a)); + if (result != 0) + ERROR_OUT(-6364, out); + result = wc_AesGcmDecryptFinal(enc, resultT, sizeof(t1)); + if (result != 0) + ERROR_OUT(-6365, out); + if (XMEMCMP(large_input, large_outdec, BENCH_AESGCM_LARGE)) + ERROR_OUT(-6366, out); +#endif /* HAVE_AES_DECRYPT */ +#endif /* BENCH_AESGCM_LARGE */ #endif /* WOLFSSL_AESGCM_STREAM */ #endif /* WOLFSSL_AES_256 */ #endif /* !WOLFSSL_AFALG_XILINX_AES && !WOLFSSL_XILINX_CRYPT */ diff --git a/wolfssl/wolfcrypt/poly1305.h b/wolfssl/wolfcrypt/poly1305.h index 70d433413..a1c47b9c1 100644 --- a/wolfssl/wolfcrypt/poly1305.h +++ b/wolfssl/wolfcrypt/poly1305.h @@ -48,7 +48,7 @@ #define WC_HAS_GCC_4_4_64BIT #endif -#ifdef USE_INTEL_SPEEDUP +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) #elif (defined(WC_HAS_SIZEOF_INT128_64BIT) || defined(WC_HAS_MSVC_64BIT) || \ defined(WC_HAS_GCC_4_4_64BIT)) #define POLY130564 @@ -67,7 +67,7 @@ enum { /* Poly1305 state */ typedef struct Poly1305 { -#ifdef USE_INTEL_SPEEDUP +#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) word64 r[3]; word64 h[3]; word64 pad[2];