Merge pull request #7334 from SparkiDev/macosx_clang_15_asm_fix

MacOS X Intel ASM clang 15: fix asm to compile without warning
pull/7335/head
David Garske 2024-03-14 10:10:42 -07:00 committed by GitHub
commit 5dff8aa417
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 218 additions and 185 deletions

View File

@ -79,23 +79,29 @@ if BUILD_SHA
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha.c
endif
if !BUILD_X86_ASM
if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha256_asm.S
endif
endif
if BUILD_SHA512
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha512.c
if !BUILD_X86_ASM
if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha512_asm.S
endif
endif
endif
if BUILD_SHA3
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha3.c
if !BUILD_X86_ASM
if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha3_asm.S
endif
endif
endif
if BUILD_DH
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/dh.c
@ -202,9 +208,11 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha256-asm
endif !BUILD_ARMASM_INLINE
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha256.c
if !BUILD_X86_ASM
if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha256_asm.S
endif BUILD_INTELASM
endif !BUILD_X86_ASM
endif !BUILD_ARMASM
endif !BUILD_ARMASM_NEON
@ -230,9 +238,11 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha512-asm
endif !BUILD_ARMASM_INLINE
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha512.c
if !BUILD_X86_ASM
if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha512_asm.S
endif BUILD_INTELASM
endif !BUILD_X86_ASM
endif !BUILD_ARMASM
endif !BUILD_ARMASM_NEON
endif BUILD_SHA512
@ -246,10 +256,12 @@ else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha3-asm.S
endif !BUILD_ARMASM_INLINE
endif BUILD_ARMASM_NEON
if !BUILD_X86_ASM
if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha3_asm.S
endif
endif
endif
if BUILD_DH
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/dh.c
@ -328,9 +340,11 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha256-a
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha256-asm.S
endif !BUILD_ARMASM_INLINE
else
if !BUILD_X86_ASM
if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha256_asm.S
endif BUILD_INTELASM
endif !BUILD_X86_ASM
endif !BUILD_ARMASM
endif !BUILD_ARMASM_NEON
endif !BUILD_FIPS_CURRENT
@ -473,9 +487,11 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha512-asm
endif !BUILD_ARMASM_INLINE
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha512.c
if !BUILD_X86_ASM
if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha512_asm.S
endif BUILD_INTELASM
endif !BUILD_X86_ASM
endif !BUILD_ARMASM
endif !BUILD_ARMASM_NEON
endif BUILD_SHA512
@ -491,10 +507,12 @@ else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha3-asm.S
endif !BUILD_ARMASM_INLINE
endif BUILD_ARMASM_NEON
if !BUILD_X86_ASM
if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha3_asm.S
endif
endif
endif
endif !BUILD_FIPS_CURRENT
if !BUILD_FIPS_CURRENT
@ -535,9 +553,11 @@ endif !BUILD_FIPS_CURRENT
if !BUILD_FIPS_CURRENT
if BUILD_SM3
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sm3.c
if !BUILD_X86_ASM
if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sm3_asm.S
endif
endif
endif BUILD_SM3
endif !BUILD_FIPS_CURRENT
@ -602,10 +622,12 @@ if BUILD_ARMASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-poly1305.c
endif
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/poly1305.c
if !BUILD_X86_ASM
if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/poly1305_asm.S
endif
endif
endif
if BUILD_RC4
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/arc4.c
@ -664,10 +686,12 @@ if BUILD_ARMASM_NEON
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-chacha.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha.c
if !BUILD_X86_ASM
if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha_asm.S
endif
endif
endif
if BUILD_POLY1305
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha20_poly1305.c
endif
@ -702,11 +726,13 @@ if !BUILD_FIPS_CURRENT
if BUILD_WC_KYBER
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber_poly.c
if !BUILD_X86_ASM
if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber_asm.S
endif
endif
endif
endif
if BUILD_WC_LMS
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_lms.c
@ -731,7 +757,9 @@ if BUILD_CURVE25519_SMALL
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/fe_low_mem.c
else
if BUILD_INTELASM
if !BUILD_X86_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/fe_x25519_asm.S
endif !BUILD_X86_ASM
else
if BUILD_ARMASM
if BUILD_ARMASM_NEON
@ -767,7 +795,9 @@ else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/ge_operations.c
if !BUILD_FEMATH
if BUILD_INTELASM
if !BUILD_X86_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/fe_x25519_asm.S
endif !BUILD_X86_ASM
else
if BUILD_ARMASM
if BUILD_ARMASM_NEON

View File

@ -1095,12 +1095,13 @@ DECB_END_4:
void AES_128_Key_Expansion_AESNI(const unsigned char* userkey,
unsigned char* key_schedule);
*/
.align 16,0x90
#ifndef __APPLE__
.globl AES_128_Key_Expansion_AESNI
.align 16,0x90
AES_128_Key_Expansion_AESNI:
#else
.globl _AES_128_Key_Expansion_AESNI
.p2align 4
_AES_128_Key_Expansion_AESNI:
#endif
# parameter 1: %rdi
@ -1971,12 +1972,13 @@ DECB_END_4:
void AES_128_Key_Expansion_AESNI(const unsigned char* userkey,
unsigned char* key_schedule);
*/
.align 16,0x90
#ifndef __APPLE__
.globl AES_128_Key_Expansion_AESNI
.align 16,0x90
AES_128_Key_Expansion_AESNI:
#else
.globl _AES_128_Key_Expansion_AESNI
.p2align 4
_AES_128_Key_Expansion_AESNI:
#endif
# parameter 1: stack[4] => %eax

View File

@ -97,10 +97,10 @@ L_aes_gcm_avx2_bswap_mask:
L_aes_gcm_avx2_mod2_128:
.long 0x1,0x0,0x0,0xc2000000
.text
.globl AES_GCM_encrypt
.type AES_GCM_encrypt,@function
.globl AES_GCM_encrypt_aesni
.type AES_GCM_encrypt_aesni,@function
.align 16
AES_GCM_encrypt:
AES_GCM_encrypt_aesni:
pushl %ebx
pushl %esi
pushl %edi
@ -112,7 +112,7 @@ AES_GCM_encrypt:
pxor %xmm0, %xmm0
pxor %xmm2, %xmm2
cmpl $12, %edx
jne L_AES_GCM_encrypt_iv_not_12
jne L_AES_GCM_encrypt_aesni_iv_not_12
# # Calculate values when IV is 12 bytes
# Set counter based on IV
movl $0x1000000, %ecx
@ -153,7 +153,7 @@ AES_GCM_encrypt:
aesenc %xmm3, %xmm5
cmpl $11, 172(%esp)
movdqa 160(%ebp), %xmm3
jl L_AES_GCM_encrypt_calc_iv_12_last
jl L_AES_GCM_encrypt_aesni_calc_iv_12_last
aesenc %xmm3, %xmm1
aesenc %xmm3, %xmm5
movdqa 176(%ebp), %xmm3
@ -161,20 +161,20 @@ AES_GCM_encrypt:
aesenc %xmm3, %xmm5
cmpl $13, 172(%esp)
movdqa 192(%ebp), %xmm3
jl L_AES_GCM_encrypt_calc_iv_12_last
jl L_AES_GCM_encrypt_aesni_calc_iv_12_last
aesenc %xmm3, %xmm1
aesenc %xmm3, %xmm5
movdqa 208(%ebp), %xmm3
aesenc %xmm3, %xmm1
aesenc %xmm3, %xmm5
movdqa 224(%ebp), %xmm3
L_AES_GCM_encrypt_calc_iv_12_last:
L_AES_GCM_encrypt_aesni_calc_iv_12_last:
aesenclast %xmm3, %xmm1
aesenclast %xmm3, %xmm5
pshufb L_aes_gcm_bswap_mask, %xmm1
movdqu %xmm5, 80(%esp)
jmp L_AES_GCM_encrypt_iv_done
L_AES_GCM_encrypt_iv_not_12:
jmp L_AES_GCM_encrypt_aesni_iv_done
L_AES_GCM_encrypt_aesni_iv_not_12:
# Calculate values when IV is not 12 bytes
# H = Encrypt X(=0)
movdqa (%ebp), %xmm1
@ -189,27 +189,27 @@ L_AES_GCM_encrypt_iv_not_12:
aesenc 144(%ebp), %xmm1
cmpl $11, 172(%esp)
movdqa 160(%ebp), %xmm5
jl L_AES_GCM_encrypt_calc_iv_1_aesenc_avx_last
jl L_AES_GCM_encrypt_aesni_calc_iv_1_aesenc_avx_last
aesenc %xmm5, %xmm1
aesenc 176(%ebp), %xmm1
cmpl $13, 172(%esp)
movdqa 192(%ebp), %xmm5
jl L_AES_GCM_encrypt_calc_iv_1_aesenc_avx_last
jl L_AES_GCM_encrypt_aesni_calc_iv_1_aesenc_avx_last
aesenc %xmm5, %xmm1
aesenc 208(%ebp), %xmm1
movdqa 224(%ebp), %xmm5
L_AES_GCM_encrypt_calc_iv_1_aesenc_avx_last:
L_AES_GCM_encrypt_aesni_calc_iv_1_aesenc_avx_last:
aesenclast %xmm5, %xmm1
pshufb L_aes_gcm_bswap_mask, %xmm1
# Calc counter
# Initialization vector
cmpl $0x00, %edx
movl $0x00, %ecx
je L_AES_GCM_encrypt_calc_iv_done
je L_AES_GCM_encrypt_aesni_calc_iv_done
cmpl $16, %edx
jl L_AES_GCM_encrypt_calc_iv_lt16
jl L_AES_GCM_encrypt_aesni_calc_iv_lt16
andl $0xfffffff0, %edx
L_AES_GCM_encrypt_calc_iv_16_loop:
L_AES_GCM_encrypt_aesni_calc_iv_16_loop:
movdqu (%esi,%ecx,1), %xmm4
pshufb L_aes_gcm_bswap_mask, %xmm4
pxor %xmm4, %xmm0
@ -269,22 +269,22 @@ L_AES_GCM_encrypt_calc_iv_16_loop:
pxor %xmm6, %xmm0
addl $16, %ecx
cmpl %edx, %ecx
jl L_AES_GCM_encrypt_calc_iv_16_loop
jl L_AES_GCM_encrypt_aesni_calc_iv_16_loop
movl 160(%esp), %edx
cmpl %edx, %ecx
je L_AES_GCM_encrypt_calc_iv_done
L_AES_GCM_encrypt_calc_iv_lt16:
je L_AES_GCM_encrypt_aesni_calc_iv_done
L_AES_GCM_encrypt_aesni_calc_iv_lt16:
subl $16, %esp
pxor %xmm4, %xmm4
xorl %ebx, %ebx
movdqu %xmm4, (%esp)
L_AES_GCM_encrypt_calc_iv_loop:
L_AES_GCM_encrypt_aesni_calc_iv_loop:
movzbl (%esi,%ecx,1), %eax
movb %al, (%esp,%ebx,1)
incl %ecx
incl %ebx
cmpl %edx, %ecx
jl L_AES_GCM_encrypt_calc_iv_loop
jl L_AES_GCM_encrypt_aesni_calc_iv_loop
movdqu (%esp), %xmm4
addl $16, %esp
pshufb L_aes_gcm_bswap_mask, %xmm4
@ -343,7 +343,7 @@ L_AES_GCM_encrypt_calc_iv_loop:
pxor %xmm5, %xmm6
pxor %xmm3, %xmm6
pxor %xmm6, %xmm0
L_AES_GCM_encrypt_calc_iv_done:
L_AES_GCM_encrypt_aesni_calc_iv_done:
# T = Encrypt counter
pxor %xmm4, %xmm4
shll $3, %edx
@ -418,29 +418,29 @@ L_AES_GCM_encrypt_calc_iv_done:
aesenc 144(%ebp), %xmm4
cmpl $11, 172(%esp)
movdqa 160(%ebp), %xmm5
jl L_AES_GCM_encrypt_calc_iv_2_aesenc_avx_last
jl L_AES_GCM_encrypt_aesni_calc_iv_2_aesenc_avx_last
aesenc %xmm5, %xmm4
aesenc 176(%ebp), %xmm4
cmpl $13, 172(%esp)
movdqa 192(%ebp), %xmm5
jl L_AES_GCM_encrypt_calc_iv_2_aesenc_avx_last
jl L_AES_GCM_encrypt_aesni_calc_iv_2_aesenc_avx_last
aesenc %xmm5, %xmm4
aesenc 208(%ebp), %xmm4
movdqa 224(%ebp), %xmm5
L_AES_GCM_encrypt_calc_iv_2_aesenc_avx_last:
L_AES_GCM_encrypt_aesni_calc_iv_2_aesenc_avx_last:
aesenclast %xmm5, %xmm4
movdqu %xmm4, 80(%esp)
L_AES_GCM_encrypt_iv_done:
L_AES_GCM_encrypt_aesni_iv_done:
movl 140(%esp), %esi
# Additional authentication data
movl 156(%esp), %edx
cmpl $0x00, %edx
je L_AES_GCM_encrypt_calc_aad_done
je L_AES_GCM_encrypt_aesni_calc_aad_done
xorl %ecx, %ecx
cmpl $16, %edx
jl L_AES_GCM_encrypt_calc_aad_lt16
jl L_AES_GCM_encrypt_aesni_calc_aad_lt16
andl $0xfffffff0, %edx
L_AES_GCM_encrypt_calc_aad_16_loop:
L_AES_GCM_encrypt_aesni_calc_aad_16_loop:
movdqu (%esi,%ecx,1), %xmm4
pshufb L_aes_gcm_bswap_mask, %xmm4
pxor %xmm4, %xmm2
@ -500,22 +500,22 @@ L_AES_GCM_encrypt_calc_aad_16_loop:
pxor %xmm6, %xmm2
addl $16, %ecx
cmpl %edx, %ecx
jl L_AES_GCM_encrypt_calc_aad_16_loop
jl L_AES_GCM_encrypt_aesni_calc_aad_16_loop
movl 156(%esp), %edx
cmpl %edx, %ecx
je L_AES_GCM_encrypt_calc_aad_done
L_AES_GCM_encrypt_calc_aad_lt16:
je L_AES_GCM_encrypt_aesni_calc_aad_done
L_AES_GCM_encrypt_aesni_calc_aad_lt16:
subl $16, %esp
pxor %xmm4, %xmm4
xorl %ebx, %ebx
movdqu %xmm4, (%esp)
L_AES_GCM_encrypt_calc_aad_loop:
L_AES_GCM_encrypt_aesni_calc_aad_loop:
movzbl (%esi,%ecx,1), %eax
movb %al, (%esp,%ebx,1)
incl %ecx
incl %ebx
cmpl %edx, %ecx
jl L_AES_GCM_encrypt_calc_aad_loop
jl L_AES_GCM_encrypt_aesni_calc_aad_loop
movdqu (%esp), %xmm4
addl $16, %esp
pshufb L_aes_gcm_bswap_mask, %xmm4
@ -574,7 +574,7 @@ L_AES_GCM_encrypt_calc_aad_loop:
pxor %xmm5, %xmm6
pxor %xmm3, %xmm6
pxor %xmm6, %xmm2
L_AES_GCM_encrypt_calc_aad_done:
L_AES_GCM_encrypt_aesni_calc_aad_done:
movdqu %xmm2, 96(%esp)
movl 132(%esp), %esi
movl 136(%esp), %edi
@ -595,7 +595,7 @@ L_AES_GCM_encrypt_calc_aad_done:
xorl %ebx, %ebx
movl 152(%esp), %eax
cmpl $0x40, %eax
jl L_AES_GCM_encrypt_done_64
jl L_AES_GCM_encrypt_aesni_done_64
andl $0xffffffc0, %eax
movdqa %xmm2, %xmm6
# H ^ 1
@ -792,7 +792,7 @@ L_AES_GCM_encrypt_calc_aad_done:
aesenc %xmm3, %xmm7
cmpl $11, 172(%esp)
movdqa 160(%ebp), %xmm3
jl L_AES_GCM_encrypt_enc_done
jl L_AES_GCM_encrypt_aesni_enc_done
aesenc %xmm3, %xmm4
aesenc %xmm3, %xmm5
aesenc %xmm3, %xmm6
@ -804,7 +804,7 @@ L_AES_GCM_encrypt_calc_aad_done:
aesenc %xmm3, %xmm7
cmpl $13, 172(%esp)
movdqa 192(%ebp), %xmm3
jl L_AES_GCM_encrypt_enc_done
jl L_AES_GCM_encrypt_aesni_enc_done
aesenc %xmm3, %xmm4
aesenc %xmm3, %xmm5
aesenc %xmm3, %xmm6
@ -815,7 +815,7 @@ L_AES_GCM_encrypt_calc_aad_done:
aesenc %xmm3, %xmm6
aesenc %xmm3, %xmm7
movdqa 224(%ebp), %xmm3
L_AES_GCM_encrypt_enc_done:
L_AES_GCM_encrypt_aesni_enc_done:
aesenclast %xmm3, %xmm4
aesenclast %xmm3, %xmm5
movdqu (%esi), %xmm0
@ -836,9 +836,9 @@ L_AES_GCM_encrypt_enc_done:
movl $0x40, %ebx
movl %esi, %ecx
movl %edi, %edx
jle L_AES_GCM_encrypt_end_64
jle L_AES_GCM_encrypt_aesni_end_64
# More 64 bytes of input
L_AES_GCM_encrypt_ghash_64:
L_AES_GCM_encrypt_aesni_ghash_64:
leal (%esi,%ebx,1), %ecx
leal (%edi,%ebx,1), %edx
# Encrypt 64 bytes of counter
@ -909,7 +909,7 @@ L_AES_GCM_encrypt_ghash_64:
aesenc %xmm3, %xmm7
cmpl $11, 172(%esp)
movdqa 160(%ebp), %xmm3
jl L_AES_GCM_encrypt_aesenc_64_ghash_avx_done
jl L_AES_GCM_encrypt_aesni_aesenc_64_ghash_avx_done
aesenc %xmm3, %xmm4
aesenc %xmm3, %xmm5
aesenc %xmm3, %xmm6
@ -921,7 +921,7 @@ L_AES_GCM_encrypt_ghash_64:
aesenc %xmm3, %xmm7
cmpl $13, 172(%esp)
movdqa 192(%ebp), %xmm3
jl L_AES_GCM_encrypt_aesenc_64_ghash_avx_done
jl L_AES_GCM_encrypt_aesni_aesenc_64_ghash_avx_done
aesenc %xmm3, %xmm4
aesenc %xmm3, %xmm5
aesenc %xmm3, %xmm6
@ -932,7 +932,7 @@ L_AES_GCM_encrypt_ghash_64:
aesenc %xmm3, %xmm6
aesenc %xmm3, %xmm7
movdqa 224(%ebp), %xmm3
L_AES_GCM_encrypt_aesenc_64_ghash_avx_done:
L_AES_GCM_encrypt_aesni_aesenc_64_ghash_avx_done:
aesenclast %xmm3, %xmm4
aesenclast %xmm3, %xmm5
movdqu (%ecx), %xmm0
@ -1045,8 +1045,8 @@ L_AES_GCM_encrypt_aesenc_64_ghash_avx_done:
movdqu %xmm6, 96(%esp)
addl $0x40, %ebx
cmpl %eax, %ebx
jl L_AES_GCM_encrypt_ghash_64
L_AES_GCM_encrypt_end_64:
jl L_AES_GCM_encrypt_aesni_ghash_64
L_AES_GCM_encrypt_aesni_end_64:
movdqu 96(%esp), %xmm2
# Block 1
movdqa L_aes_gcm_bswap_mask, %xmm4
@ -1165,14 +1165,14 @@ L_AES_GCM_encrypt_end_64:
pxor %xmm0, %xmm6
pxor %xmm6, %xmm2
movdqu (%esp), %xmm1
L_AES_GCM_encrypt_done_64:
L_AES_GCM_encrypt_aesni_done_64:
movl 152(%esp), %edx
cmpl %edx, %ebx
jge L_AES_GCM_encrypt_done_enc
jge L_AES_GCM_encrypt_aesni_done_enc
movl 152(%esp), %eax
andl $0xfffffff0, %eax
cmpl %eax, %ebx
jge L_AES_GCM_encrypt_last_block_done
jge L_AES_GCM_encrypt_aesni_last_block_done
leal (%esi,%ebx,1), %ecx
leal (%edi,%ebx,1), %edx
movdqu 64(%esp), %xmm4
@ -1192,16 +1192,16 @@ L_AES_GCM_encrypt_done_64:
aesenc 144(%ebp), %xmm4
cmpl $11, 172(%esp)
movdqa 160(%ebp), %xmm5
jl L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last
jl L_AES_GCM_encrypt_aesni_aesenc_block_aesenc_avx_last
aesenc %xmm5, %xmm4
aesenc 176(%ebp), %xmm4
cmpl $13, 172(%esp)
movdqa 192(%ebp), %xmm5
jl L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last
jl L_AES_GCM_encrypt_aesni_aesenc_block_aesenc_avx_last
aesenc %xmm5, %xmm4
aesenc 208(%ebp), %xmm4
movdqa 224(%ebp), %xmm5
L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last:
L_AES_GCM_encrypt_aesni_aesenc_block_aesenc_avx_last:
aesenclast %xmm5, %xmm4
movdqu (%ecx), %xmm5
pxor %xmm5, %xmm4
@ -1210,8 +1210,8 @@ L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last:
pxor %xmm4, %xmm2
addl $16, %ebx
cmpl %eax, %ebx
jge L_AES_GCM_encrypt_last_block_ghash
L_AES_GCM_encrypt_last_block_start:
jge L_AES_GCM_encrypt_aesni_last_block_ghash
L_AES_GCM_encrypt_aesni_last_block_start:
leal (%esi,%ebx,1), %ecx
leal (%edi,%ebx,1), %edx
movdqu 64(%esp), %xmm4
@ -1255,16 +1255,16 @@ L_AES_GCM_encrypt_last_block_start:
pxor %xmm5, %xmm2
cmpl $11, 172(%esp)
movdqa 160(%ebp), %xmm5
jl L_AES_GCM_encrypt_aesenc_gfmul_last
jl L_AES_GCM_encrypt_aesni_aesenc_gfmul_last
aesenc %xmm5, %xmm4
aesenc 176(%ebp), %xmm4
cmpl $13, 172(%esp)
movdqa 192(%ebp), %xmm5
jl L_AES_GCM_encrypt_aesenc_gfmul_last
jl L_AES_GCM_encrypt_aesni_aesenc_gfmul_last
aesenc %xmm5, %xmm4
aesenc 208(%ebp), %xmm4
movdqa 224(%ebp), %xmm5
L_AES_GCM_encrypt_aesenc_gfmul_last:
L_AES_GCM_encrypt_aesni_aesenc_gfmul_last:
aesenclast %xmm5, %xmm4
movdqu (%ecx), %xmm5
pxor %xmm5, %xmm4
@ -1273,8 +1273,8 @@ L_AES_GCM_encrypt_aesenc_gfmul_last:
pxor %xmm4, %xmm2
addl $16, %ebx
cmpl %eax, %ebx
jl L_AES_GCM_encrypt_last_block_start
L_AES_GCM_encrypt_last_block_ghash:
jl L_AES_GCM_encrypt_aesni_last_block_start
L_AES_GCM_encrypt_aesni_last_block_ghash:
pshufd $0x4e, %xmm1, %xmm5
pshufd $0x4e, %xmm2, %xmm6
movdqa %xmm2, %xmm7
@ -1314,11 +1314,11 @@ L_AES_GCM_encrypt_last_block_ghash:
pxor %xmm7, %xmm5
pxor %xmm4, %xmm5
pxor %xmm5, %xmm2
L_AES_GCM_encrypt_last_block_done:
L_AES_GCM_encrypt_aesni_last_block_done:
movl 152(%esp), %ecx
movl %ecx, %edx
andl $15, %ecx
jz L_AES_GCM_encrypt_aesenc_last15_enc_avx_done
jz L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_done
movdqu 64(%esp), %xmm0
pshufb L_aes_gcm_bswap_epi64, %xmm0
pxor (%ebp), %xmm0
@ -1333,21 +1333,21 @@ L_AES_GCM_encrypt_last_block_done:
aesenc 144(%ebp), %xmm0
cmpl $11, 172(%esp)
movdqa 160(%ebp), %xmm5
jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_aesenc_avx_last
jl L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_aesenc_avx_last
aesenc %xmm5, %xmm0
aesenc 176(%ebp), %xmm0
cmpl $13, 172(%esp)
movdqa 192(%ebp), %xmm5
jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_aesenc_avx_last
jl L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_aesenc_avx_last
aesenc %xmm5, %xmm0
aesenc 208(%ebp), %xmm0
movdqa 224(%ebp), %xmm5
L_AES_GCM_encrypt_aesenc_last15_enc_avx_aesenc_avx_last:
L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_aesenc_avx_last:
aesenclast %xmm5, %xmm0
subl $16, %esp
xorl %ecx, %ecx
movdqu %xmm0, (%esp)
L_AES_GCM_encrypt_aesenc_last15_enc_avx_loop:
L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_loop:
movzbl (%esi,%ebx,1), %eax
xorb (%esp,%ecx,1), %al
movb %al, (%edi,%ebx,1)
@ -1355,16 +1355,16 @@ L_AES_GCM_encrypt_aesenc_last15_enc_avx_loop:
incl %ebx
incl %ecx
cmpl %edx, %ebx
jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_loop
jl L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_loop
xorl %eax, %eax
cmpl $16, %ecx
je L_AES_GCM_encrypt_aesenc_last15_enc_avx_finish_enc
L_AES_GCM_encrypt_aesenc_last15_enc_avx_byte_loop:
je L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_finish_enc
L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_byte_loop:
movb %al, (%esp,%ecx,1)
incl %ecx
cmpl $16, %ecx
jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_byte_loop
L_AES_GCM_encrypt_aesenc_last15_enc_avx_finish_enc:
jl L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_byte_loop
L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_finish_enc:
movdqu (%esp), %xmm0
addl $16, %esp
pshufb L_aes_gcm_bswap_mask, %xmm0
@ -1408,8 +1408,8 @@ L_AES_GCM_encrypt_aesenc_last15_enc_avx_finish_enc:
pxor %xmm7, %xmm5
pxor %xmm4, %xmm5
pxor %xmm5, %xmm2
L_AES_GCM_encrypt_aesenc_last15_enc_avx_done:
L_AES_GCM_encrypt_done_enc:
L_AES_GCM_encrypt_aesni_aesenc_last15_enc_avx_done:
L_AES_GCM_encrypt_aesni_done_enc:
movl 148(%esp), %edi
movl 164(%esp), %ebx
movl 152(%esp), %edx
@ -1468,31 +1468,31 @@ L_AES_GCM_encrypt_done_enc:
movdqu 80(%esp), %xmm4
pxor %xmm2, %xmm4
cmpl $16, %ebx
je L_AES_GCM_encrypt_store_tag_16
je L_AES_GCM_encrypt_aesni_store_tag_16
xorl %ecx, %ecx
movdqu %xmm4, (%esp)
L_AES_GCM_encrypt_store_tag_loop:
L_AES_GCM_encrypt_aesni_store_tag_loop:
movzbl (%esp,%ecx,1), %eax
movb %al, (%edi,%ecx,1)
incl %ecx
cmpl %ebx, %ecx
jne L_AES_GCM_encrypt_store_tag_loop
jmp L_AES_GCM_encrypt_store_tag_done
L_AES_GCM_encrypt_store_tag_16:
jne L_AES_GCM_encrypt_aesni_store_tag_loop
jmp L_AES_GCM_encrypt_aesni_store_tag_done
L_AES_GCM_encrypt_aesni_store_tag_16:
movdqu %xmm4, (%edi)
L_AES_GCM_encrypt_store_tag_done:
L_AES_GCM_encrypt_aesni_store_tag_done:
addl $0x70, %esp
popl %ebp
popl %edi
popl %esi
popl %ebx
ret
.size AES_GCM_encrypt,.-AES_GCM_encrypt
.size AES_GCM_encrypt_aesni,.-AES_GCM_encrypt_aesni
.text
.globl AES_GCM_decrypt
.type AES_GCM_decrypt,@function
.globl AES_GCM_decrypt_aesni
.type AES_GCM_decrypt_aesni,@function
.align 16
AES_GCM_decrypt:
AES_GCM_decrypt_aesni:
pushl %ebx
pushl %esi
pushl %edi
@ -1504,7 +1504,7 @@ AES_GCM_decrypt:
pxor %xmm0, %xmm0
pxor %xmm2, %xmm2
cmpl $12, %edx
jne L_AES_GCM_decrypt_iv_not_12
jne L_AES_GCM_decrypt_aesni_iv_not_12
# # Calculate values when IV is 12 bytes
# Set counter based on IV
movl $0x1000000, %ecx
@ -1545,7 +1545,7 @@ AES_GCM_decrypt:
aesenc %xmm3, %xmm5
cmpl $11, 236(%esp)
movdqa 160(%ebp), %xmm3
jl L_AES_GCM_decrypt_calc_iv_12_last
jl L_AES_GCM_decrypt_aesni_calc_iv_12_last
aesenc %xmm3, %xmm1
aesenc %xmm3, %xmm5
movdqa 176(%ebp), %xmm3
@ -1553,20 +1553,20 @@ AES_GCM_decrypt:
aesenc %xmm3, %xmm5
cmpl $13, 236(%esp)
movdqa 192(%ebp), %xmm3
jl L_AES_GCM_decrypt_calc_iv_12_last
jl L_AES_GCM_decrypt_aesni_calc_iv_12_last
aesenc %xmm3, %xmm1
aesenc %xmm3, %xmm5
movdqa 208(%ebp), %xmm3
aesenc %xmm3, %xmm1
aesenc %xmm3, %xmm5
movdqa 224(%ebp), %xmm3
L_AES_GCM_decrypt_calc_iv_12_last:
L_AES_GCM_decrypt_aesni_calc_iv_12_last:
aesenclast %xmm3, %xmm1
aesenclast %xmm3, %xmm5
pshufb L_aes_gcm_bswap_mask, %xmm1
movdqu %xmm5, 80(%esp)
jmp L_AES_GCM_decrypt_iv_done
L_AES_GCM_decrypt_iv_not_12:
jmp L_AES_GCM_decrypt_aesni_iv_done
L_AES_GCM_decrypt_aesni_iv_not_12:
# Calculate values when IV is not 12 bytes
# H = Encrypt X(=0)
movdqa (%ebp), %xmm1
@ -1581,27 +1581,27 @@ L_AES_GCM_decrypt_iv_not_12:
aesenc 144(%ebp), %xmm1
cmpl $11, 236(%esp)
movdqa 160(%ebp), %xmm5
jl L_AES_GCM_decrypt_calc_iv_1_aesenc_avx_last
jl L_AES_GCM_decrypt_aesni_calc_iv_1_aesenc_avx_last
aesenc %xmm5, %xmm1
aesenc 176(%ebp), %xmm1
cmpl $13, 236(%esp)
movdqa 192(%ebp), %xmm5
jl L_AES_GCM_decrypt_calc_iv_1_aesenc_avx_last
jl L_AES_GCM_decrypt_aesni_calc_iv_1_aesenc_avx_last
aesenc %xmm5, %xmm1
aesenc 208(%ebp), %xmm1
movdqa 224(%ebp), %xmm5
L_AES_GCM_decrypt_calc_iv_1_aesenc_avx_last:
L_AES_GCM_decrypt_aesni_calc_iv_1_aesenc_avx_last:
aesenclast %xmm5, %xmm1
pshufb L_aes_gcm_bswap_mask, %xmm1
# Calc counter
# Initialization vector
cmpl $0x00, %edx
movl $0x00, %ecx
je L_AES_GCM_decrypt_calc_iv_done
je L_AES_GCM_decrypt_aesni_calc_iv_done
cmpl $16, %edx
jl L_AES_GCM_decrypt_calc_iv_lt16
jl L_AES_GCM_decrypt_aesni_calc_iv_lt16
andl $0xfffffff0, %edx
L_AES_GCM_decrypt_calc_iv_16_loop:
L_AES_GCM_decrypt_aesni_calc_iv_16_loop:
movdqu (%esi,%ecx,1), %xmm4
pshufb L_aes_gcm_bswap_mask, %xmm4
pxor %xmm4, %xmm0
@ -1661,22 +1661,22 @@ L_AES_GCM_decrypt_calc_iv_16_loop:
pxor %xmm6, %xmm0
addl $16, %ecx
cmpl %edx, %ecx
jl L_AES_GCM_decrypt_calc_iv_16_loop
jl L_AES_GCM_decrypt_aesni_calc_iv_16_loop
movl 224(%esp), %edx
cmpl %edx, %ecx
je L_AES_GCM_decrypt_calc_iv_done
L_AES_GCM_decrypt_calc_iv_lt16:
je L_AES_GCM_decrypt_aesni_calc_iv_done
L_AES_GCM_decrypt_aesni_calc_iv_lt16:
subl $16, %esp
pxor %xmm4, %xmm4
xorl %ebx, %ebx
movdqu %xmm4, (%esp)
L_AES_GCM_decrypt_calc_iv_loop:
L_AES_GCM_decrypt_aesni_calc_iv_loop:
movzbl (%esi,%ecx,1), %eax
movb %al, (%esp,%ebx,1)
incl %ecx
incl %ebx
cmpl %edx, %ecx
jl L_AES_GCM_decrypt_calc_iv_loop
jl L_AES_GCM_decrypt_aesni_calc_iv_loop
movdqu (%esp), %xmm4
addl $16, %esp
pshufb L_aes_gcm_bswap_mask, %xmm4
@ -1735,7 +1735,7 @@ L_AES_GCM_decrypt_calc_iv_loop:
pxor %xmm5, %xmm6
pxor %xmm3, %xmm6
pxor %xmm6, %xmm0
L_AES_GCM_decrypt_calc_iv_done:
L_AES_GCM_decrypt_aesni_calc_iv_done:
# T = Encrypt counter
pxor %xmm4, %xmm4
shll $3, %edx
@ -1810,29 +1810,29 @@ L_AES_GCM_decrypt_calc_iv_done:
aesenc 144(%ebp), %xmm4
cmpl $11, 236(%esp)
movdqa 160(%ebp), %xmm5
jl L_AES_GCM_decrypt_calc_iv_2_aesenc_avx_last
jl L_AES_GCM_decrypt_aesni_calc_iv_2_aesenc_avx_last
aesenc %xmm5, %xmm4
aesenc 176(%ebp), %xmm4
cmpl $13, 236(%esp)
movdqa 192(%ebp), %xmm5
jl L_AES_GCM_decrypt_calc_iv_2_aesenc_avx_last
jl L_AES_GCM_decrypt_aesni_calc_iv_2_aesenc_avx_last
aesenc %xmm5, %xmm4
aesenc 208(%ebp), %xmm4
movdqa 224(%ebp), %xmm5
L_AES_GCM_decrypt_calc_iv_2_aesenc_avx_last:
L_AES_GCM_decrypt_aesni_calc_iv_2_aesenc_avx_last:
aesenclast %xmm5, %xmm4
movdqu %xmm4, 80(%esp)
L_AES_GCM_decrypt_iv_done:
L_AES_GCM_decrypt_aesni_iv_done:
movl 204(%esp), %esi
# Additional authentication data
movl 220(%esp), %edx
cmpl $0x00, %edx
je L_AES_GCM_decrypt_calc_aad_done
je L_AES_GCM_decrypt_aesni_calc_aad_done
xorl %ecx, %ecx
cmpl $16, %edx
jl L_AES_GCM_decrypt_calc_aad_lt16
jl L_AES_GCM_decrypt_aesni_calc_aad_lt16
andl $0xfffffff0, %edx
L_AES_GCM_decrypt_calc_aad_16_loop:
L_AES_GCM_decrypt_aesni_calc_aad_16_loop:
movdqu (%esi,%ecx,1), %xmm4
pshufb L_aes_gcm_bswap_mask, %xmm4
pxor %xmm4, %xmm2
@ -1892,22 +1892,22 @@ L_AES_GCM_decrypt_calc_aad_16_loop:
pxor %xmm6, %xmm2
addl $16, %ecx
cmpl %edx, %ecx
jl L_AES_GCM_decrypt_calc_aad_16_loop
jl L_AES_GCM_decrypt_aesni_calc_aad_16_loop
movl 220(%esp), %edx
cmpl %edx, %ecx
je L_AES_GCM_decrypt_calc_aad_done
L_AES_GCM_decrypt_calc_aad_lt16:
je L_AES_GCM_decrypt_aesni_calc_aad_done
L_AES_GCM_decrypt_aesni_calc_aad_lt16:
subl $16, %esp
pxor %xmm4, %xmm4
xorl %ebx, %ebx
movdqu %xmm4, (%esp)
L_AES_GCM_decrypt_calc_aad_loop:
L_AES_GCM_decrypt_aesni_calc_aad_loop:
movzbl (%esi,%ecx,1), %eax
movb %al, (%esp,%ebx,1)
incl %ecx
incl %ebx
cmpl %edx, %ecx
jl L_AES_GCM_decrypt_calc_aad_loop
jl L_AES_GCM_decrypt_aesni_calc_aad_loop
movdqu (%esp), %xmm4
addl $16, %esp
pshufb L_aes_gcm_bswap_mask, %xmm4
@ -1966,7 +1966,7 @@ L_AES_GCM_decrypt_calc_aad_loop:
pxor %xmm5, %xmm6
pxor %xmm3, %xmm6
pxor %xmm6, %xmm2
L_AES_GCM_decrypt_calc_aad_done:
L_AES_GCM_decrypt_aesni_calc_aad_done:
movdqu %xmm2, 96(%esp)
movl 196(%esp), %esi
movl 200(%esp), %edi
@ -1987,7 +1987,7 @@ L_AES_GCM_decrypt_calc_aad_done:
xorl %ebx, %ebx
cmpl $0x40, 216(%esp)
movl 216(%esp), %eax
jl L_AES_GCM_decrypt_done_64
jl L_AES_GCM_decrypt_aesni_done_64
andl $0xffffffc0, %eax
movdqa %xmm2, %xmm6
# H ^ 1
@ -2116,8 +2116,8 @@ L_AES_GCM_decrypt_calc_aad_done:
pxor %xmm5, %xmm3
movdqu %xmm3, 48(%esp)
cmpl %esi, %edi
jne L_AES_GCM_decrypt_ghash_64
L_AES_GCM_decrypt_ghash_64_inplace:
jne L_AES_GCM_decrypt_aesni_ghash_64
L_AES_GCM_decrypt_aesni_ghash_64_inplace:
leal (%esi,%ebx,1), %ecx
leal (%edi,%ebx,1), %edx
# Encrypt 64 bytes of counter
@ -2188,7 +2188,7 @@ L_AES_GCM_decrypt_ghash_64_inplace:
aesenc %xmm3, %xmm7
cmpl $11, 236(%esp)
movdqa 160(%ebp), %xmm3
jl L_AES_GCM_decryptinplace_aesenc_64_ghash_avx_done
jl L_AES_GCM_decrypt_aesniinplace_aesenc_64_ghash_avx_done
aesenc %xmm3, %xmm4
aesenc %xmm3, %xmm5
aesenc %xmm3, %xmm6
@ -2200,7 +2200,7 @@ L_AES_GCM_decrypt_ghash_64_inplace:
aesenc %xmm3, %xmm7
cmpl $13, 236(%esp)
movdqa 192(%ebp), %xmm3
jl L_AES_GCM_decryptinplace_aesenc_64_ghash_avx_done
jl L_AES_GCM_decrypt_aesniinplace_aesenc_64_ghash_avx_done
aesenc %xmm3, %xmm4
aesenc %xmm3, %xmm5
aesenc %xmm3, %xmm6
@ -2211,7 +2211,7 @@ L_AES_GCM_decrypt_ghash_64_inplace:
aesenc %xmm3, %xmm6
aesenc %xmm3, %xmm7
movdqa 224(%ebp), %xmm3
L_AES_GCM_decryptinplace_aesenc_64_ghash_avx_done:
L_AES_GCM_decrypt_aesniinplace_aesenc_64_ghash_avx_done:
aesenclast %xmm3, %xmm4
aesenclast %xmm3, %xmm5
movdqu (%ecx), %xmm0
@ -2328,9 +2328,9 @@ L_AES_GCM_decryptinplace_aesenc_64_ghash_avx_done:
movdqu %xmm6, 96(%esp)
addl $0x40, %ebx
cmpl %eax, %ebx
jl L_AES_GCM_decrypt_ghash_64_inplace
jmp L_AES_GCM_decrypt_ghash_64_done
L_AES_GCM_decrypt_ghash_64:
jl L_AES_GCM_decrypt_aesni_ghash_64_inplace
jmp L_AES_GCM_decrypt_aesni_ghash_64_done
L_AES_GCM_decrypt_aesni_ghash_64:
leal (%esi,%ebx,1), %ecx
leal (%edi,%ebx,1), %edx
# Encrypt 64 bytes of counter
@ -2401,7 +2401,7 @@ L_AES_GCM_decrypt_ghash_64:
aesenc %xmm3, %xmm7
cmpl $11, 236(%esp)
movdqa 160(%ebp), %xmm3
jl L_AES_GCM_decrypt_aesenc_64_ghash_avx_done
jl L_AES_GCM_decrypt_aesni_aesenc_64_ghash_avx_done
aesenc %xmm3, %xmm4
aesenc %xmm3, %xmm5
aesenc %xmm3, %xmm6
@ -2413,7 +2413,7 @@ L_AES_GCM_decrypt_ghash_64:
aesenc %xmm3, %xmm7
cmpl $13, 236(%esp)
movdqa 192(%ebp), %xmm3
jl L_AES_GCM_decrypt_aesenc_64_ghash_avx_done
jl L_AES_GCM_decrypt_aesni_aesenc_64_ghash_avx_done
aesenc %xmm3, %xmm4
aesenc %xmm3, %xmm5
aesenc %xmm3, %xmm6
@ -2424,7 +2424,7 @@ L_AES_GCM_decrypt_ghash_64:
aesenc %xmm3, %xmm6
aesenc %xmm3, %xmm7
movdqa 224(%ebp), %xmm3
L_AES_GCM_decrypt_aesenc_64_ghash_avx_done:
L_AES_GCM_decrypt_aesni_aesenc_64_ghash_avx_done:
aesenclast %xmm3, %xmm4
aesenclast %xmm3, %xmm5
movdqu (%ecx), %xmm0
@ -2541,19 +2541,19 @@ L_AES_GCM_decrypt_aesenc_64_ghash_avx_done:
movdqu %xmm6, 96(%esp)
addl $0x40, %ebx
cmpl %eax, %ebx
jl L_AES_GCM_decrypt_ghash_64
L_AES_GCM_decrypt_ghash_64_done:
jl L_AES_GCM_decrypt_aesni_ghash_64
L_AES_GCM_decrypt_aesni_ghash_64_done:
movdqa %xmm6, %xmm2
movdqu (%esp), %xmm1
L_AES_GCM_decrypt_done_64:
L_AES_GCM_decrypt_aesni_done_64:
movl 216(%esp), %edx
cmpl %edx, %ebx
jge L_AES_GCM_decrypt_done_dec
jge L_AES_GCM_decrypt_aesni_done_dec
movl 216(%esp), %eax
andl $0xfffffff0, %eax
cmpl %eax, %ebx
jge L_AES_GCM_decrypt_last_block_done
L_AES_GCM_decrypt_last_block_start:
jge L_AES_GCM_decrypt_aesni_last_block_done
L_AES_GCM_decrypt_aesni_last_block_start:
leal (%esi,%ebx,1), %ecx
leal (%edi,%ebx,1), %edx
movdqu (%ecx), %xmm5
@ -2601,28 +2601,28 @@ L_AES_GCM_decrypt_last_block_start:
pxor %xmm5, %xmm2
cmpl $11, 236(%esp)
movdqa 160(%ebp), %xmm5
jl L_AES_GCM_decrypt_aesenc_gfmul_last
jl L_AES_GCM_decrypt_aesni_aesenc_gfmul_last
aesenc %xmm5, %xmm4
aesenc 176(%ebp), %xmm4
cmpl $13, 236(%esp)
movdqa 192(%ebp), %xmm5
jl L_AES_GCM_decrypt_aesenc_gfmul_last
jl L_AES_GCM_decrypt_aesni_aesenc_gfmul_last
aesenc %xmm5, %xmm4
aesenc 208(%ebp), %xmm4
movdqa 224(%ebp), %xmm5
L_AES_GCM_decrypt_aesenc_gfmul_last:
L_AES_GCM_decrypt_aesni_aesenc_gfmul_last:
aesenclast %xmm5, %xmm4
movdqu (%ecx), %xmm5
pxor %xmm5, %xmm4
movdqu %xmm4, (%edx)
addl $16, %ebx
cmpl %eax, %ebx
jl L_AES_GCM_decrypt_last_block_start
L_AES_GCM_decrypt_last_block_done:
jl L_AES_GCM_decrypt_aesni_last_block_start
L_AES_GCM_decrypt_aesni_last_block_done:
movl 216(%esp), %ecx
movl %ecx, %edx
andl $15, %ecx
jz L_AES_GCM_decrypt_aesenc_last15_dec_avx_done
jz L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_done
movdqu 64(%esp), %xmm0
pshufb L_aes_gcm_bswap_epi64, %xmm0
pxor (%ebp), %xmm0
@ -2637,23 +2637,23 @@ L_AES_GCM_decrypt_last_block_done:
aesenc 144(%ebp), %xmm0
cmpl $11, 236(%esp)
movdqa 160(%ebp), %xmm5
jl L_AES_GCM_decrypt_aesenc_last15_dec_avx_aesenc_avx_last
jl L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_aesenc_avx_last
aesenc %xmm5, %xmm0
aesenc 176(%ebp), %xmm0
cmpl $13, 236(%esp)
movdqa 192(%ebp), %xmm5
jl L_AES_GCM_decrypt_aesenc_last15_dec_avx_aesenc_avx_last
jl L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_aesenc_avx_last
aesenc %xmm5, %xmm0
aesenc 208(%ebp), %xmm0
movdqa 224(%ebp), %xmm5
L_AES_GCM_decrypt_aesenc_last15_dec_avx_aesenc_avx_last:
L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_aesenc_avx_last:
aesenclast %xmm5, %xmm0
subl $32, %esp
xorl %ecx, %ecx
movdqu %xmm0, (%esp)
pxor %xmm4, %xmm4
movdqu %xmm4, 16(%esp)
L_AES_GCM_decrypt_aesenc_last15_dec_avx_loop:
L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_loop:
movzbl (%esi,%ebx,1), %eax
movb %al, 16(%esp,%ecx,1)
xorb (%esp,%ecx,1), %al
@ -2661,7 +2661,7 @@ L_AES_GCM_decrypt_aesenc_last15_dec_avx_loop:
incl %ebx
incl %ecx
cmpl %edx, %ebx
jl L_AES_GCM_decrypt_aesenc_last15_dec_avx_loop
jl L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_loop
movdqu 16(%esp), %xmm0
addl $32, %esp
pshufb L_aes_gcm_bswap_mask, %xmm0
@ -2705,8 +2705,8 @@ L_AES_GCM_decrypt_aesenc_last15_dec_avx_loop:
pxor %xmm7, %xmm5
pxor %xmm4, %xmm5
pxor %xmm5, %xmm2
L_AES_GCM_decrypt_aesenc_last15_dec_avx_done:
L_AES_GCM_decrypt_done_dec:
L_AES_GCM_decrypt_aesni_aesenc_last15_dec_avx_done:
L_AES_GCM_decrypt_aesni_done_dec:
movl 212(%esp), %esi
movl 228(%esp), %ebp
movl 216(%esp), %edx
@ -2766,24 +2766,24 @@ L_AES_GCM_decrypt_done_dec:
pxor %xmm2, %xmm4
movl 240(%esp), %edi
cmpl $16, %ebp
je L_AES_GCM_decrypt_cmp_tag_16
je L_AES_GCM_decrypt_aesni_cmp_tag_16
subl $16, %esp
xorl %ecx, %ecx
xorl %ebx, %ebx
movdqu %xmm4, (%esp)
L_AES_GCM_decrypt_cmp_tag_loop:
L_AES_GCM_decrypt_aesni_cmp_tag_loop:
movzbl (%esp,%ecx,1), %eax
xorb (%esi,%ecx,1), %al
orb %al, %bl
incl %ecx
cmpl %ebp, %ecx
jne L_AES_GCM_decrypt_cmp_tag_loop
jne L_AES_GCM_decrypt_aesni_cmp_tag_loop
cmpb $0x00, %bl
sete %bl
addl $16, %esp
xorl %ecx, %ecx
jmp L_AES_GCM_decrypt_cmp_tag_done
L_AES_GCM_decrypt_cmp_tag_16:
jmp L_AES_GCM_decrypt_aesni_cmp_tag_done
L_AES_GCM_decrypt_aesni_cmp_tag_16:
movdqu (%esi), %xmm5
pcmpeqb %xmm5, %xmm4
pmovmskb %xmm4, %edx
@ -2791,7 +2791,7 @@ L_AES_GCM_decrypt_cmp_tag_16:
xorl %ebx, %ebx
cmpl $0xffff, %edx
sete %bl
L_AES_GCM_decrypt_cmp_tag_done:
L_AES_GCM_decrypt_aesni_cmp_tag_done:
movl %ebx, (%edi)
addl $0xb0, %esp
popl %ebp
@ -2799,7 +2799,7 @@ L_AES_GCM_decrypt_cmp_tag_done:
popl %esi
popl %ebx
ret
.size AES_GCM_decrypt,.-AES_GCM_decrypt
.size AES_GCM_decrypt_aesni,.-AES_GCM_decrypt_aesni
#ifdef WOLFSSL_AESGCM_STREAM
.text
.globl AES_GCM_init_aesni

View File

@ -1254,7 +1254,7 @@ cpuFlagsSet:
.long 0
#else
.section __DATA,__data
.p2align 2
.p2align 3
_cpuFlagsSet:
.long 0
#endif /* __APPLE__ */
@ -1266,7 +1266,7 @@ intelFlags:
.long 0
#else
.section __DATA,__data
.p2align 2
.p2align 3
_intelFlags:
.long 0
#endif /* __APPLE__ */
@ -1278,7 +1278,7 @@ fe_mul_p:
.quad fe_mul_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_fe_mul_p:
.quad _fe_mul_x64
#endif /* __APPLE__ */
@ -1290,7 +1290,7 @@ fe_sq_p:
.quad fe_sq_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_fe_sq_p:
.quad _fe_sq_x64
#endif /* __APPLE__ */
@ -1302,7 +1302,7 @@ fe_mul121666_p:
.quad fe_mul121666_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_fe_mul121666_p:
.quad _fe_mul121666_x64
#endif /* __APPLE__ */
@ -1314,7 +1314,7 @@ fe_invert_p:
.quad fe_invert_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_fe_invert_p:
.quad _fe_invert_x64
#endif /* __APPLE__ */
@ -1326,7 +1326,7 @@ curve25519_p:
.quad curve25519_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_curve25519_p:
.quad _curve25519_x64
#endif /* __APPLE__ */
@ -1339,7 +1339,7 @@ fe_sq2_p:
.quad fe_sq2_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_fe_sq2_p:
.quad _fe_sq2_x64
#endif /* __APPLE__ */
@ -1351,7 +1351,7 @@ fe_pow22523_p:
.quad fe_pow22523_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_fe_pow22523_p:
.quad _fe_pow22523_x64
#endif /* __APPLE__ */
@ -1363,7 +1363,7 @@ ge_p1p1_to_p2_p:
.quad ge_p1p1_to_p2_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_ge_p1p1_to_p2_p:
.quad _ge_p1p1_to_p2_x64
#endif /* __APPLE__ */
@ -1375,7 +1375,7 @@ ge_p1p1_to_p3_p:
.quad ge_p1p1_to_p3_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_ge_p1p1_to_p3_p:
.quad _ge_p1p1_to_p3_x64
#endif /* __APPLE__ */
@ -1387,7 +1387,7 @@ ge_p2_dbl_p:
.quad ge_p2_dbl_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_ge_p2_dbl_p:
.quad _ge_p2_dbl_x64
#endif /* __APPLE__ */
@ -1399,7 +1399,7 @@ ge_madd_p:
.quad ge_madd_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_ge_madd_p:
.quad _ge_madd_x64
#endif /* __APPLE__ */
@ -1411,7 +1411,7 @@ ge_msub_p:
.quad ge_msub_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_ge_msub_p:
.quad _ge_msub_x64
#endif /* __APPLE__ */
@ -1423,7 +1423,7 @@ ge_add_p:
.quad ge_add_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_ge_add_p:
.quad _ge_add_x64
#endif /* __APPLE__ */
@ -1435,7 +1435,7 @@ ge_sub_p:
.quad ge_sub_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_ge_sub_p:
.quad _ge_sub_x64
#endif /* __APPLE__ */
@ -1447,7 +1447,7 @@ sc_reduce_p:
.quad sc_reduce_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_sc_reduce_p:
.quad _sc_reduce_x64
#endif /* __APPLE__ */
@ -1459,7 +1459,7 @@ sc_muladd_p:
.quad sc_muladd_x64
#else
.section __DATA,__data
.p2align 2
.p2align 3
_sc_muladd_p:
.quad _sc_muladd_x64
#endif /* __APPLE__ */

View File

@ -87,7 +87,7 @@
#endif
#if defined(USE_INTEL_SPEEDUP)
#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP)
#if defined(__GNUC__) && ((__GNUC__ < 4) || \
(__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
#undef NO_AVX2_SUPPORT
@ -318,7 +318,7 @@ static int InitSha512_256(wc_Sha512* sha512)
#endif /* WOLFSSL_SHA512 */
/* Hardware Acceleration */
#if defined(USE_INTEL_SPEEDUP) && \
#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \
(defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))
/*****
@ -516,7 +516,7 @@ static int InitSha512_Family(wc_Sha512* sha512, void* heap, int devId,
if (ret != 0)
return ret;
#if defined(USE_INTEL_SPEEDUP) && \
#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \
(defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))
Sha512_SetTransform();
#endif
@ -757,7 +757,7 @@ static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 le
if (sha512->buffLen == WC_SHA512_BLOCK_SIZE) {
#if defined(LITTLE_ENDIAN_ORDER)
#if defined(USE_INTEL_SPEEDUP) && \
#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \
(defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))
if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
#endif
@ -792,7 +792,7 @@ static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 le
}
}
#if defined(USE_INTEL_SPEEDUP) && \
#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \
(defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))
if (Transform_Sha512_Len_p != NULL) {
word32 blocksLen = len & ~((word32)WC_SHA512_BLOCK_SIZE-1);
@ -807,8 +807,9 @@ static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 le
}
else
#endif
#if !defined(LITTLE_ENDIAN_ORDER) || (defined(USE_INTEL_SPEEDUP) && \
(defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)))
#if !defined(LITTLE_ENDIAN_ORDER) || (defined(WOLFSSL_X86_64_BUILD) && \
defined(USE_INTEL_SPEEDUP) && (defined(HAVE_INTEL_AVX1) || \
defined(HAVE_INTEL_AVX2)))
{
while (len >= WC_SHA512_BLOCK_SIZE) {
XMEMCPY(local, data, WC_SHA512_BLOCK_SIZE);
@ -816,7 +817,7 @@ static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 le
data += WC_SHA512_BLOCK_SIZE;
len -= WC_SHA512_BLOCK_SIZE;
#if defined(USE_INTEL_SPEEDUP) && \
#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \
(defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))
if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
{
@ -946,7 +947,7 @@ static WC_INLINE int Sha512Final(wc_Sha512* sha512)
sha512->buffLen += WC_SHA512_BLOCK_SIZE - sha512->buffLen;
#if defined(LITTLE_ENDIAN_ORDER)
#if defined(USE_INTEL_SPEEDUP) && \
#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \
(defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))
if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
#endif
@ -992,7 +993,7 @@ static WC_INLINE int Sha512Final(wc_Sha512* sha512)
/* store lengths */
#if defined(LITTLE_ENDIAN_ORDER)
#if defined(USE_INTEL_SPEEDUP) && \
#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \
(defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))
if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
#endif
@ -1011,7 +1012,7 @@ static WC_INLINE int Sha512Final(wc_Sha512* sha512)
sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 1] = sha512->loLen;
#endif
#if defined(USE_INTEL_SPEEDUP) && \
#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \
(defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))
if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags))
ByteReverseWords64(&(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
@ -1216,13 +1217,13 @@ int wc_Sha512Transform(wc_Sha512* sha, const unsigned char* data)
return MEMORY_E;
#endif
#if defined(USE_INTEL_SPEEDUP) && \
#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \
(defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))
Sha512_SetTransform();
#endif
#if defined(LITTLE_ENDIAN_ORDER)
#if defined(USE_INTEL_SPEEDUP) && \
#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \
(defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))
if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
#endif
@ -1459,7 +1460,7 @@ int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId)
return ret;
}
#if defined(USE_INTEL_SPEEDUP) && \
#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \
(defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))
Sha512_SetTransform();
#endif

View File

@ -862,7 +862,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
"bsr %[a], %[i] \n\t" \
: [i] "=r" (vi) \
: [a] "r" (va) \
: "cC" \
: "cc" \
)
#ifndef WOLFSSL_SP_DIV_WORD_HALF