Performance improvements

AES-GCM: don't generate M0 when using assembly unless falling back to C
and then use new assembly code.
HMAC: add option to copy hashes (--enable-hash-copy
-DWOLFSSL_HMAC_COPY_HASH) to improve performance when using the same key
for multiple operations.
pull/8445/head
Sean Parkinson 2025-02-10 17:50:26 +10:00
parent 8f131ff3d0
commit bfd52decb6
8 changed files with 1915 additions and 167 deletions

View File

@ -648,6 +648,7 @@ WOLFSSL_HARDEN_TLS_ALLOW_OLD_TLS
WOLFSSL_HARDEN_TLS_ALLOW_TRUNCATED_HMAC
WOLFSSL_HARDEN_TLS_NO_PKEY_CHECK
WOLFSSL_HARDEN_TLS_NO_SCR_CHECK
WOLFSSL_HMAC_COPY_HASH
WOLFSSL_HOSTNAME_VERIFY_ALT_NAME_ONLY
WOLFSSL_I2D_ECDSA_SIG_ALLOC
WOLFSSL_IAR_ARM_TIME

View File

@ -295,6 +295,25 @@ AC_ARG_ENABLE([hmac],
[ ENABLED_HMAC=yes ]
)
# enable HMAC hash copying automatically for x86_64 and aarch64 (except Linux kernel module)
HMAC_COPY_DEFAULT=no
if test "$ENABLED_LINUXKM_DEFAULTS" = "no"
then
if test "$host_cpu" = "x86_64" || test "$host_cpu" = "aarch64" || test "$host_cpu" = "amd64"
then
HMAC_COPY_DEFAULT=yes
fi
fi
AC_ARG_ENABLE([hmac-copy],
[AS_HELP_STRING([--enable-hmac-copy],[Enables digest copying implementation for HMAC (default: disabled)])],
[ ENABLED_HMAC_COPY=$enableval ],
[ ENABLED_HMAC_COPY=$HMAC_COPY_DEFAULT ]
)
if test "$ENABLED_HMAC_COPY" = "yes"
then
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_HMAC_COPY_HASH"
fi
AC_ARG_ENABLE([do178],
[AS_HELP_STRING([--enable-do178],[Enable DO-178, Will NOT work w/o DO178 license (default: disabled)])],
[ENABLED_DO178=$enableval],

View File

@ -1193,7 +1193,7 @@ static int lng_index = 0;
#ifndef NO_MAIN_DRIVER
#ifndef MAIN_NO_ARGS
static const char* bench_Usage_msg1[][25] = {
static const char* bench_Usage_msg1[][27] = {
/* 0 English */
{ "-? <num> Help, print this usage\n",
" 0: English, 1: Japanese\n",
@ -1207,6 +1207,8 @@ static const char* bench_Usage_msg1[][25] = {
" (if set via -aad_size) <aad_size> bytes.\n"
),
"-dgst_full Full digest operation performed.\n",
"-mac_final MAC update and final operation timed.\n",
"-aead_set_key Set the key as part of the timing of AEAD ciphers.\n",
"-rsa_sign Measure RSA sign/verify instead of encrypt/decrypt.\n",
"<keySz> -rsa-sz\n Measure RSA <key size> performance.\n",
"-ffhdhe2048 Measure DH using FFDHE 2048-bit parameters.\n",
@ -1240,6 +1242,8 @@ static const char* bench_Usage_msg1[][25] = {
"-aad_size <num> TBD.\n",
"-all_aad TBD.\n",
"-dgst_full フルの digest 暗号操作を実施します。\n",
"-mac_final MAC update and final operation timed.\n",
"-aead_set_key Set the key as part of the timing of AEAD ciphers.\n",
"-rsa_sign 暗号/復号化の代わりに RSA の署名/検証を測定します。\n",
"<keySz> -rsa-sz\n RSA <key size> の性能を測定します。\n",
"-ffhdhe2048 Measure DH using FFDHE 2048-bit parameters.\n",
@ -2056,6 +2060,8 @@ static int numBlocks = NUM_BLOCKS;
static word32 bench_size = BENCH_SIZE;
static int base2 = 1;
static int digest_stream = 1;
static int mac_stream = 1;
static int aead_set_key = 0;
#ifdef HAVE_CHACHA
static int encrypt_only = 0;
#endif
@ -4505,10 +4511,12 @@ static void bench_aesgcm_internal(int useDeviceID,
goto exit;
}
ret = wc_AesGcmSetKey(enc[i], key, keySz);
if (ret != 0) {
printf("AesGcmSetKey failed, ret = %d\n", ret);
goto exit;
if (!aead_set_key) {
ret = wc_AesGcmSetKey(enc[i], key, keySz);
if (ret != 0) {
printf("AesGcmSetKey failed, ret = %d\n", ret);
goto exit;
}
}
}
@ -4522,6 +4530,14 @@ static void bench_aesgcm_internal(int useDeviceID,
for (i = 0; i < BENCH_MAX_PENDING; i++) {
if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(enc[i]), 0,
&times, numBlocks, &pending)) {
if (aead_set_key) {
ret = wc_AesGcmSetKey(enc[i], key, keySz);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(enc[i]), 0,
&times, &pending)) {
goto exit_aes_gcm;
}
}
ret = wc_AesGcmEncrypt(enc[i], bench_cipher,
bench_plain, bench_size,
iv, ivSz, bench_tag, AES_AUTH_TAG_SZ,
@ -4560,10 +4576,12 @@ exit_aes_gcm:
goto exit;
}
ret = wc_AesGcmSetKey(dec[i], key, keySz);
if (ret != 0) {
printf("AesGcmSetKey failed, ret = %d\n", ret);
goto exit;
if (!aead_set_key) {
ret = wc_AesGcmSetKey(dec[i], key, keySz);
if (ret != 0) {
printf("AesGcmSetKey failed, ret = %d\n", ret);
goto exit;
}
}
}
@ -4576,6 +4594,14 @@ exit_aes_gcm:
for (i = 0; i < BENCH_MAX_PENDING; i++) {
if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(dec[i]), 0,
&times, numBlocks, &pending)) {
if (aead_set_key) {
ret = wc_AesGcmSetKey(dec[i], key, keySz);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(dec[i]), 0,
&times, &pending)) {
goto exit_aes_gcm_dec;
}
}
ret = wc_AesGcmDecrypt(dec[i], bench_plain,
bench_cipher, bench_size,
iv, ivSz, bench_tag, AES_AUTH_TAG_SZ,
@ -8300,50 +8326,89 @@ static void bench_hmac(int useDeviceID, int type, int digestSz,
}
}
bench_stats_start(&count, &start);
do {
for (times = 0; times < numBlocks || pending > 0; ) {
bench_async_poll(&pending);
/* while free pending slots in queue, submit ops */
for (i = 0; i < BENCH_MAX_PENDING; i++) {
if (bench_async_check(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
&times, numBlocks, &pending)) {
ret = wc_HmacUpdate(hmac[i], bench_plain, bench_size);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]),
0, &times, &pending)) {
goto exit_hmac;
}
}
} /* for i */
} /* for times */
count += times;
times = 0;
if (mac_stream) {
bench_stats_start(&count, &start);
do {
bench_async_poll(&pending);
for (times = 0; times < numBlocks || pending > 0; ) {
bench_async_poll(&pending);
for (i = 0; i < BENCH_MAX_PENDING; i++) {
if (bench_async_check(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
&times, numBlocks, &pending)) {
ret = wc_HmacFinal(hmac[i], digest[i]);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]),
0, &times, &pending)) {
goto exit_hmac;
/* while free pending slots in queue, submit ops */
for (i = 0; i < BENCH_MAX_PENDING; i++) {
if (bench_async_check(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
&times, numBlocks, &pending)) {
ret = wc_HmacUpdate(hmac[i], bench_plain, bench_size);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]),
0, &times, &pending)) {
goto exit_hmac;
}
}
}
RECORD_MULTI_VALUE_STATS();
} /* for i */
} while (pending > 0);
} while (bench_stats_check(start)
#ifdef MULTI_VALUE_STATISTICS
|| runs < minimum_runs
#endif
);
} /* for i */
} /* for times */
count += times;
times = 0;
do {
bench_async_poll(&pending);
for (i = 0; i < BENCH_MAX_PENDING; i++) {
if (bench_async_check(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
&times, numBlocks, &pending)) {
ret = wc_HmacFinal(hmac[i], digest[i]);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]),
0, &times, &pending)) {
goto exit_hmac;
}
}
RECORD_MULTI_VALUE_STATS();
} /* for i */
} while (pending > 0);
} while (bench_stats_check(start)
#ifdef MULTI_VALUE_STATISTICS
|| runs < minimum_runs
#endif
);
}
else {
bench_stats_start(&count, &start);
do {
for (times = 0; times < numBlocks || pending > 0; ) {
bench_async_poll(&pending);
/* while free pending slots in queue, submit ops */
for (i = 0; i < BENCH_MAX_PENDING; i++) {
if (bench_async_check(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
&times, numBlocks, &pending)) {
ret = wc_HmacUpdate(hmac[i], bench_plain, bench_size);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]),
0, &times, &pending)) {
goto exit_hmac;
}
}
if (bench_async_check(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
&times, numBlocks, &pending)) {
ret = wc_HmacFinal(hmac[i], digest[i]);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]),
0, &times, &pending)) {
goto exit_hmac;
}
}
} /* for i */
} /* for times */
count += times;
} while (bench_stats_check(start)
#ifdef MULTI_VALUE_STATISTICS
|| runs < minimum_runs
#endif
);
}
exit_hmac:
bench_stats_sym_finish(label, useDeviceID, count, bench_size, start, ret);
@ -14989,6 +15054,7 @@ static void Usage(void)
e += 3;
#endif
printf("%s", bench_Usage_msg1[lng_index][e++]); /* option -dgst_full */
printf("%s", bench_Usage_msg1[lng_index][e++]); /* option -mca_final */
#ifndef NO_RSA
printf("%s", bench_Usage_msg1[lng_index][e++]); /* option -ras_sign */
#ifdef WOLFSSL_KEY_GEN
@ -15186,6 +15252,10 @@ int wolfcrypt_benchmark_main(int argc, char** argv)
#endif
else if (string_matches(argv[1], "-dgst_full"))
digest_stream = 0;
else if (string_matches(argv[1], "-mac_final"))
mac_stream = 0;
else if (string_matches(argv[1], "-aead_set_key"))
aead_set_key = 1;
#ifdef HAVE_CHACHA
else if (string_matches(argv[1], "-enc_only"))
encrypt_only = 1;

View File

@ -6633,6 +6633,25 @@ void GenerateM0(Gcm* gcm)
#endif /* GCM_TABLE */
#if defined(WOLFSSL_AESNI) && defined(USE_INTEL_SPEEDUP)
#define HAVE_INTEL_AVX1
#define HAVE_INTEL_AVX2
#endif
#if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT) && \
defined(WC_C_DYNAMIC_FALLBACK)
void GCM_generate_m0_aesni(const unsigned char *h, unsigned char *m)
XASM_LINK("GCM_generate_m0_aesni");
#ifdef HAVE_INTEL_AVX1
void GCM_generate_m0_avx1(const unsigned char *h, unsigned char *m)
XASM_LINK("GCM_generate_m0_avx1");
#endif
#ifdef HAVE_INTEL_AVX2
void GCM_generate_m0_avx2(const unsigned char *h, unsigned char *m)
XASM_LINK("GCM_generate_m0_avx2");
#endif
#endif /* WOLFSSL_AESNI && GCM_TABLE_4BIT && WC_C_DYNAMIC_FALLBACK */
/* Software AES - GCM SetKey */
int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
{
@ -6702,9 +6721,33 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
VECTOR_REGISTERS_POP;
}
if (ret == 0) {
#if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT)
GenerateM0(&aes->gcm);
#endif /* GCM_TABLE */
#if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT)
#if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT)
if (aes->use_aesni) {
#if defined(WC_C_DYNAMIC_FALLBACK)
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_AVX2(intel_flags)) {
GCM_generate_m0_avx2(aes->gcm.H, (byte*)aes->gcm.M0);
}
else
#endif
#if defined(HAVE_INTEL_AVX1)
if (IS_INTEL_AVX1(intel_flags)) {
GCM_generate_m0_avx1(aes->gcm.H, (byte*)aes->gcm.M0);
}
else
#endif
{
GCM_generate_m0_aesni(aes->gcm.H, (byte*)aes->gcm.M0);
}
#endif
}
else
#endif
{
GenerateM0(&aes->gcm);
}
#endif /* GCM_TABLE || GCM_TABLE_4BIT */
}
#endif /* FREESCALE_LTC_AES_GCM */
@ -6727,11 +6770,6 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
#ifdef WOLFSSL_AESNI
#if defined(USE_INTEL_SPEEDUP)
#define HAVE_INTEL_AVX1
#define HAVE_INTEL_AVX2
#endif /* USE_INTEL_SPEEDUP */
void AES_GCM_encrypt_aesni(const unsigned char *in, unsigned char *out,
const unsigned char* addt, const unsigned char* ivec,
unsigned char *tag, word32 nbytes,

View File

@ -56,6 +56,272 @@
#else
.p2align 4
#endif /* __APPLE__ */
L_GCM_generate_m0_aesni_rev8:
.quad 0x8090a0b0c0d0e0f, 0x1020304050607
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_GCM_generate_m0_aesni_mod2_128:
.quad 0x0, 0xe100000000000000
#ifndef __APPLE__
.text
.globl GCM_generate_m0_aesni
.type GCM_generate_m0_aesni,@function
.align 16
GCM_generate_m0_aesni:
#else
.section __TEXT,__text
.globl _GCM_generate_m0_aesni
.p2align 4
_GCM_generate_m0_aesni:
#endif /* __APPLE__ */
movdqu L_GCM_generate_m0_aesni_rev8(%rip), %xmm9
movdqu L_GCM_generate_m0_aesni_mod2_128(%rip), %xmm10
pxor %xmm8, %xmm8
movdqu (%rdi), %xmm0
movdqu %xmm8, (%rsi)
movdqu %xmm0, %xmm8
pshufb %xmm9, %xmm0
movdqu %xmm0, %xmm5
movdqu %xmm0, %xmm4
psllq $63, %xmm5
psrlq $0x01, %xmm4
movdqu %xmm5, %xmm1
pslldq $8, %xmm1
psrldq $8, %xmm5
pshufd $0xff, %xmm1, %xmm1
por %xmm5, %xmm4
psrad $31, %xmm1
pand %xmm10, %xmm1
pxor %xmm4, %xmm1
movdqu %xmm1, %xmm5
movdqu %xmm1, %xmm4
psllq $63, %xmm5
psrlq $0x01, %xmm4
movdqu %xmm5, %xmm2
pslldq $8, %xmm2
psrldq $8, %xmm5
pshufd $0xff, %xmm2, %xmm2
por %xmm5, %xmm4
psrad $31, %xmm2
pand %xmm10, %xmm2
pxor %xmm4, %xmm2
movdqu %xmm2, %xmm5
movdqu %xmm2, %xmm4
psllq $63, %xmm5
psrlq $0x01, %xmm4
movdqu %xmm5, %xmm3
pslldq $8, %xmm3
psrldq $8, %xmm5
pshufd $0xff, %xmm3, %xmm3
por %xmm5, %xmm4
psrad $31, %xmm3
pand %xmm10, %xmm3
pxor %xmm4, %xmm3
pshufb %xmm9, %xmm3
pshufb %xmm9, %xmm2
movdqu %xmm3, %xmm8
pshufb %xmm9, %xmm1
pshufb %xmm9, %xmm0
pxor %xmm2, %xmm8
movdqu %xmm3, 16(%rsi)
movdqu %xmm2, 32(%rsi)
movdqu %xmm8, 48(%rsi)
movdqu %xmm1, 64(%rsi)
movdqu %xmm3, %xmm4
movdqu %xmm2, %xmm5
movdqu %xmm8, %xmm6
pxor %xmm1, %xmm4
pxor %xmm1, %xmm5
pxor %xmm1, %xmm6
movdqu %xmm4, 80(%rsi)
movdqu %xmm5, 96(%rsi)
movdqu %xmm6, 112(%rsi)
movdqu %xmm0, 128(%rsi)
pxor %xmm0, %xmm1
movdqu %xmm3, %xmm4
movdqu %xmm2, %xmm6
pxor %xmm0, %xmm4
pxor %xmm0, %xmm6
movdqu %xmm4, 144(%rsi)
movdqu %xmm6, 160(%rsi)
pxor %xmm3, %xmm6
movdqu %xmm6, 176(%rsi)
movdqu %xmm1, 192(%rsi)
movdqu %xmm3, %xmm4
movdqu %xmm2, %xmm5
movdqu %xmm8, %xmm6
pxor %xmm1, %xmm4
pxor %xmm1, %xmm5
pxor %xmm1, %xmm6
movdqu %xmm4, 208(%rsi)
movdqu %xmm5, 224(%rsi)
movdqu %xmm6, 240(%rsi)
movdqu (%rsi), %xmm0
movdqu 16(%rsi), %xmm1
movdqu 32(%rsi), %xmm2
movdqu 48(%rsi), %xmm3
pshufb %xmm9, %xmm0
pshufb %xmm9, %xmm1
pshufb %xmm9, %xmm2
pshufb %xmm9, %xmm3
movdqu %xmm0, %xmm4
movdqu %xmm1, %xmm5
movdqu %xmm2, %xmm6
movdqu %xmm3, %xmm7
psllq $60, %xmm4
psllq $60, %xmm5
psllq $60, %xmm6
psllq $60, %xmm7
psrlq $4, %xmm0
psrlq $4, %xmm1
psrlq $4, %xmm2
psrlq $4, %xmm3
psrldq $8, %xmm4
psrldq $8, %xmm5
psrldq $8, %xmm6
psrldq $8, %xmm7
por %xmm4, %xmm0
por %xmm5, %xmm1
por %xmm6, %xmm2
por %xmm7, %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
movdqu %xmm0, 256(%rsi)
movdqu %xmm1, 272(%rsi)
movdqu %xmm2, 288(%rsi)
movdqu %xmm3, 304(%rsi)
movdqu 64(%rsi), %xmm0
movdqu 80(%rsi), %xmm1
movdqu 96(%rsi), %xmm2
movdqu 112(%rsi), %xmm3
pshufb %xmm9, %xmm0
pshufb %xmm9, %xmm1
pshufb %xmm9, %xmm2
pshufb %xmm9, %xmm3
movdqu %xmm0, %xmm4
movdqu %xmm1, %xmm5
movdqu %xmm2, %xmm6
movdqu %xmm3, %xmm7
psllq $60, %xmm4
psllq $60, %xmm5
psllq $60, %xmm6
psllq $60, %xmm7
psrlq $4, %xmm0
psrlq $4, %xmm1
psrlq $4, %xmm2
psrlq $4, %xmm3
psrldq $8, %xmm4
psrldq $8, %xmm5
psrldq $8, %xmm6
psrldq $8, %xmm7
por %xmm4, %xmm0
por %xmm5, %xmm1
por %xmm6, %xmm2
por %xmm7, %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
movdqu %xmm0, 320(%rsi)
movdqu %xmm1, 336(%rsi)
movdqu %xmm2, 352(%rsi)
movdqu %xmm3, 368(%rsi)
movdqu 128(%rsi), %xmm0
movdqu 144(%rsi), %xmm1
movdqu 160(%rsi), %xmm2
movdqu 176(%rsi), %xmm3
pshufb %xmm9, %xmm0
pshufb %xmm9, %xmm1
pshufb %xmm9, %xmm2
pshufb %xmm9, %xmm3
movdqu %xmm0, %xmm4
movdqu %xmm1, %xmm5
movdqu %xmm2, %xmm6
movdqu %xmm3, %xmm7
psllq $60, %xmm4
psllq $60, %xmm5
psllq $60, %xmm6
psllq $60, %xmm7
psrlq $4, %xmm0
psrlq $4, %xmm1
psrlq $4, %xmm2
psrlq $4, %xmm3
psrldq $8, %xmm4
psrldq $8, %xmm5
psrldq $8, %xmm6
psrldq $8, %xmm7
por %xmm4, %xmm0
por %xmm5, %xmm1
por %xmm6, %xmm2
por %xmm7, %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
movdqu %xmm0, 384(%rsi)
movdqu %xmm1, 400(%rsi)
movdqu %xmm2, 416(%rsi)
movdqu %xmm3, 432(%rsi)
movdqu 192(%rsi), %xmm0
movdqu 208(%rsi), %xmm1
movdqu 224(%rsi), %xmm2
movdqu 240(%rsi), %xmm3
pshufb %xmm9, %xmm0
pshufb %xmm9, %xmm1
pshufb %xmm9, %xmm2
pshufb %xmm9, %xmm3
movdqu %xmm0, %xmm4
movdqu %xmm1, %xmm5
movdqu %xmm2, %xmm6
movdqu %xmm3, %xmm7
psllq $60, %xmm4
psllq $60, %xmm5
psllq $60, %xmm6
psllq $60, %xmm7
psrlq $4, %xmm0
psrlq $4, %xmm1
psrlq $4, %xmm2
psrlq $4, %xmm3
psrldq $8, %xmm4
psrldq $8, %xmm5
psrldq $8, %xmm6
psrldq $8, %xmm7
por %xmm4, %xmm0
por %xmm5, %xmm1
por %xmm6, %xmm2
por %xmm7, %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
movdqu %xmm0, 448(%rsi)
movdqu %xmm1, 464(%rsi)
movdqu %xmm2, 480(%rsi)
movdqu %xmm3, 496(%rsi)
repz retq
#ifndef __APPLE__
.size GCM_generate_m0_aesni,.-GCM_generate_m0_aesni
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_aes_gcm_one:
.quad 0x0, 0x1
#ifndef __APPLE__
@ -6221,6 +6487,238 @@ L_AES_GCM_decrypt_final_aesni_cmp_tag_done:
#else
.p2align 4
#endif /* __APPLE__ */
L_GCM_generate_m0_avx1_rev8:
.quad 0x8090a0b0c0d0e0f, 0x1020304050607
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_GCM_generate_m0_avx1_mod2_128:
.quad 0x0, 0xe100000000000000
#ifndef __APPLE__
.text
.globl GCM_generate_m0_avx1
.type GCM_generate_m0_avx1,@function
.align 16
GCM_generate_m0_avx1:
#else
.section __TEXT,__text
.globl _GCM_generate_m0_avx1
.p2align 4
_GCM_generate_m0_avx1:
#endif /* __APPLE__ */
vmovdqu L_GCM_generate_m0_avx1_rev8(%rip), %xmm9
vmovdqu L_GCM_generate_m0_avx1_mod2_128(%rip), %xmm10
vpxor %xmm8, %xmm8, %xmm8
vmovdqu (%rdi), %xmm0
vmovdqu %xmm8, (%rsi)
vmovdqu %xmm0, %xmm8
vpshufb %xmm9, %xmm0, %xmm0
vpsllq $63, %xmm0, %xmm5
vpsrlq $0x01, %xmm0, %xmm4
vpslldq $8, %xmm5, %xmm1
vpsrldq $8, %xmm5, %xmm5
vpshufd $0xff, %xmm1, %xmm1
vpor %xmm5, %xmm4, %xmm4
vpsrad $31, %xmm1, %xmm1
vpand %xmm10, %xmm1, %xmm1
vpxor %xmm4, %xmm1, %xmm1
vpsllq $63, %xmm1, %xmm5
vpsrlq $0x01, %xmm1, %xmm4
vpslldq $8, %xmm5, %xmm2
vpsrldq $8, %xmm5, %xmm5
vpshufd $0xff, %xmm2, %xmm2
vpor %xmm5, %xmm4, %xmm4
vpsrad $31, %xmm2, %xmm2
vpand %xmm10, %xmm2, %xmm2
vpxor %xmm4, %xmm2, %xmm2
vpsllq $63, %xmm2, %xmm5
vpsrlq $0x01, %xmm2, %xmm4
vpslldq $8, %xmm5, %xmm3
vpsrldq $8, %xmm5, %xmm5
vpshufd $0xff, %xmm3, %xmm3
vpor %xmm5, %xmm4, %xmm4
vpsrad $31, %xmm3, %xmm3
vpand %xmm10, %xmm3, %xmm3
vpxor %xmm4, %xmm3, %xmm3
vpshufb %xmm9, %xmm3, %xmm3
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm0, %xmm0
vpxor %xmm2, %xmm3, %xmm8
vmovdqu %xmm3, 16(%rsi)
vmovdqu %xmm2, 32(%rsi)
vmovdqu %xmm8, 48(%rsi)
vmovdqu %xmm1, 64(%rsi)
vpxor %xmm1, %xmm3, %xmm4
vpxor %xmm1, %xmm2, %xmm5
vpxor %xmm1, %xmm8, %xmm6
vmovdqu %xmm4, 80(%rsi)
vmovdqu %xmm5, 96(%rsi)
vmovdqu %xmm6, 112(%rsi)
vmovdqu %xmm0, 128(%rsi)
vpxor %xmm0, %xmm1, %xmm1
vpxor %xmm0, %xmm3, %xmm4
vpxor %xmm0, %xmm2, %xmm6
vmovdqu %xmm4, 144(%rsi)
vmovdqu %xmm6, 160(%rsi)
vpxor %xmm6, %xmm3, %xmm6
vmovdqu %xmm6, 176(%rsi)
vmovdqu %xmm1, 192(%rsi)
vpxor %xmm1, %xmm3, %xmm4
vpxor %xmm1, %xmm2, %xmm5
vpxor %xmm1, %xmm8, %xmm6
vmovdqu %xmm4, 208(%rsi)
vmovdqu %xmm5, 224(%rsi)
vmovdqu %xmm6, 240(%rsi)
vmovdqu (%rsi), %xmm0
vmovdqu 16(%rsi), %xmm1
vmovdqu 32(%rsi), %xmm2
vmovdqu 48(%rsi), %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vpsllq $60, %xmm0, %xmm4
vpsllq $60, %xmm1, %xmm5
vpsllq $60, %xmm2, %xmm6
vpsllq $60, %xmm3, %xmm7
vpsrlq $4, %xmm0, %xmm0
vpsrlq $4, %xmm1, %xmm1
vpsrlq $4, %xmm2, %xmm2
vpsrlq $4, %xmm3, %xmm3
vpsrldq $8, %xmm4, %xmm4
vpsrldq $8, %xmm5, %xmm5
vpsrldq $8, %xmm6, %xmm6
vpsrldq $8, %xmm7, %xmm7
vpor %xmm4, %xmm0, %xmm0
vpor %xmm5, %xmm1, %xmm1
vpor %xmm6, %xmm2, %xmm2
vpor %xmm7, %xmm3, %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vmovdqu %xmm0, 256(%rsi)
vmovdqu %xmm1, 272(%rsi)
vmovdqu %xmm2, 288(%rsi)
vmovdqu %xmm3, 304(%rsi)
vmovdqu 64(%rsi), %xmm0
vmovdqu 80(%rsi), %xmm1
vmovdqu 96(%rsi), %xmm2
vmovdqu 112(%rsi), %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vpsllq $60, %xmm0, %xmm4
vpsllq $60, %xmm1, %xmm5
vpsllq $60, %xmm2, %xmm6
vpsllq $60, %xmm3, %xmm7
vpsrlq $4, %xmm0, %xmm0
vpsrlq $4, %xmm1, %xmm1
vpsrlq $4, %xmm2, %xmm2
vpsrlq $4, %xmm3, %xmm3
vpsrldq $8, %xmm4, %xmm4
vpsrldq $8, %xmm5, %xmm5
vpsrldq $8, %xmm6, %xmm6
vpsrldq $8, %xmm7, %xmm7
vpor %xmm4, %xmm0, %xmm0
vpor %xmm5, %xmm1, %xmm1
vpor %xmm6, %xmm2, %xmm2
vpor %xmm7, %xmm3, %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vmovdqu %xmm0, 320(%rsi)
vmovdqu %xmm1, 336(%rsi)
vmovdqu %xmm2, 352(%rsi)
vmovdqu %xmm3, 368(%rsi)
vmovdqu 128(%rsi), %xmm0
vmovdqu 144(%rsi), %xmm1
vmovdqu 160(%rsi), %xmm2
vmovdqu 176(%rsi), %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vpsllq $60, %xmm0, %xmm4
vpsllq $60, %xmm1, %xmm5
vpsllq $60, %xmm2, %xmm6
vpsllq $60, %xmm3, %xmm7
vpsrlq $4, %xmm0, %xmm0
vpsrlq $4, %xmm1, %xmm1
vpsrlq $4, %xmm2, %xmm2
vpsrlq $4, %xmm3, %xmm3
vpsrldq $8, %xmm4, %xmm4
vpsrldq $8, %xmm5, %xmm5
vpsrldq $8, %xmm6, %xmm6
vpsrldq $8, %xmm7, %xmm7
vpor %xmm4, %xmm0, %xmm0
vpor %xmm5, %xmm1, %xmm1
vpor %xmm6, %xmm2, %xmm2
vpor %xmm7, %xmm3, %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vmovdqu %xmm0, 384(%rsi)
vmovdqu %xmm1, 400(%rsi)
vmovdqu %xmm2, 416(%rsi)
vmovdqu %xmm3, 432(%rsi)
vmovdqu 192(%rsi), %xmm0
vmovdqu 208(%rsi), %xmm1
vmovdqu 224(%rsi), %xmm2
vmovdqu 240(%rsi), %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vpsllq $60, %xmm0, %xmm4
vpsllq $60, %xmm1, %xmm5
vpsllq $60, %xmm2, %xmm6
vpsllq $60, %xmm3, %xmm7
vpsrlq $4, %xmm0, %xmm0
vpsrlq $4, %xmm1, %xmm1
vpsrlq $4, %xmm2, %xmm2
vpsrlq $4, %xmm3, %xmm3
vpsrldq $8, %xmm4, %xmm4
vpsrldq $8, %xmm5, %xmm5
vpsrldq $8, %xmm6, %xmm6
vpsrldq $8, %xmm7, %xmm7
vpor %xmm4, %xmm0, %xmm0
vpor %xmm5, %xmm1, %xmm1
vpor %xmm6, %xmm2, %xmm2
vpor %xmm7, %xmm3, %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vmovdqu %xmm0, 448(%rsi)
vmovdqu %xmm1, 464(%rsi)
vmovdqu %xmm2, 480(%rsi)
vmovdqu %xmm3, 496(%rsi)
repz retq
#ifndef __APPLE__
.size GCM_generate_m0_avx1,.-GCM_generate_m0_avx1
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_aes_gcm_one:
.quad 0x0, 0x1
#ifndef __APPLE__
@ -11454,6 +11952,238 @@ L_AES_GCM_decrypt_final_avx1_cmp_tag_done:
#else
.p2align 4
#endif /* __APPLE__ */
L_GCM_generate_m0_avx2_rev8:
.quad 0x8090a0b0c0d0e0f, 0x1020304050607
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_GCM_generate_m0_avx2_mod2_128:
.quad 0x0, 0xe100000000000000
#ifndef __APPLE__
.text
.globl GCM_generate_m0_avx2
.type GCM_generate_m0_avx2,@function
.align 16
GCM_generate_m0_avx2:
#else
.section __TEXT,__text
.globl _GCM_generate_m0_avx2
.p2align 4
_GCM_generate_m0_avx2:
#endif /* __APPLE__ */
vmovdqu L_GCM_generate_m0_avx2_rev8(%rip), %xmm9
vmovdqu L_GCM_generate_m0_avx2_mod2_128(%rip), %xmm10
vpxor %xmm8, %xmm8, %xmm8
vmovdqu (%rdi), %xmm0
vmovdqu %xmm8, (%rsi)
vmovdqu %xmm0, %xmm8
vpshufb %xmm9, %xmm0, %xmm0
vpsllq $63, %xmm0, %xmm5
vpsrlq $0x01, %xmm0, %xmm4
vpslldq $8, %xmm5, %xmm1
vpsrldq $8, %xmm5, %xmm5
vpshufd $0xff, %xmm1, %xmm1
vpor %xmm5, %xmm4, %xmm4
vpsrad $31, %xmm1, %xmm1
vpand %xmm10, %xmm1, %xmm1
vpxor %xmm4, %xmm1, %xmm1
vpsllq $63, %xmm1, %xmm5
vpsrlq $0x01, %xmm1, %xmm4
vpslldq $8, %xmm5, %xmm2
vpsrldq $8, %xmm5, %xmm5
vpshufd $0xff, %xmm2, %xmm2
vpor %xmm5, %xmm4, %xmm4
vpsrad $31, %xmm2, %xmm2
vpand %xmm10, %xmm2, %xmm2
vpxor %xmm4, %xmm2, %xmm2
vpsllq $63, %xmm2, %xmm5
vpsrlq $0x01, %xmm2, %xmm4
vpslldq $8, %xmm5, %xmm3
vpsrldq $8, %xmm5, %xmm5
vpshufd $0xff, %xmm3, %xmm3
vpor %xmm5, %xmm4, %xmm4
vpsrad $31, %xmm3, %xmm3
vpand %xmm10, %xmm3, %xmm3
vpxor %xmm4, %xmm3, %xmm3
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpxor %xmm2, %xmm3, %xmm8
vmovdqu %xmm3, 16(%rsi)
vmovdqu %xmm2, 32(%rsi)
vmovdqu %xmm8, 48(%rsi)
vmovdqu %xmm1, 64(%rsi)
vpxor %xmm1, %xmm3, %xmm4
vpxor %xmm1, %xmm2, %xmm5
vpxor %xmm1, %xmm8, %xmm6
vmovdqu %xmm4, 80(%rsi)
vmovdqu %xmm5, 96(%rsi)
vmovdqu %xmm6, 112(%rsi)
vmovdqu %xmm0, 128(%rsi)
vpxor %xmm0, %xmm1, %xmm1
vpxor %xmm0, %xmm3, %xmm4
vpxor %xmm0, %xmm2, %xmm6
vmovdqu %xmm4, 144(%rsi)
vmovdqu %xmm6, 160(%rsi)
vpxor %xmm6, %xmm3, %xmm6
vmovdqu %xmm6, 176(%rsi)
vmovdqu %xmm1, 192(%rsi)
vpxor %xmm1, %xmm3, %xmm4
vpxor %xmm1, %xmm2, %xmm5
vpxor %xmm1, %xmm8, %xmm6
vmovdqu %xmm4, 208(%rsi)
vmovdqu %xmm5, 224(%rsi)
vmovdqu %xmm6, 240(%rsi)
vmovdqu (%rsi), %xmm0
vmovdqu 16(%rsi), %xmm1
vmovdqu 32(%rsi), %xmm2
vmovdqu 48(%rsi), %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vpsllq $60, %xmm0, %xmm4
vpsllq $60, %xmm1, %xmm5
vpsllq $60, %xmm2, %xmm6
vpsllq $60, %xmm3, %xmm7
vpsrlq $4, %xmm0, %xmm0
vpsrlq $4, %xmm1, %xmm1
vpsrlq $4, %xmm2, %xmm2
vpsrlq $4, %xmm3, %xmm3
vpsrldq $8, %xmm4, %xmm4
vpsrldq $8, %xmm5, %xmm5
vpsrldq $8, %xmm6, %xmm6
vpsrldq $8, %xmm7, %xmm7
vpor %xmm4, %xmm0, %xmm0
vpor %xmm5, %xmm1, %xmm1
vpor %xmm6, %xmm2, %xmm2
vpor %xmm7, %xmm3, %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vmovdqu %xmm0, 256(%rsi)
vmovdqu %xmm1, 272(%rsi)
vmovdqu %xmm2, 288(%rsi)
vmovdqu %xmm3, 304(%rsi)
vmovdqu 64(%rsi), %xmm0
vmovdqu 80(%rsi), %xmm1
vmovdqu 96(%rsi), %xmm2
vmovdqu 112(%rsi), %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vpsllq $60, %xmm0, %xmm4
vpsllq $60, %xmm1, %xmm5
vpsllq $60, %xmm2, %xmm6
vpsllq $60, %xmm3, %xmm7
vpsrlq $4, %xmm0, %xmm0
vpsrlq $4, %xmm1, %xmm1
vpsrlq $4, %xmm2, %xmm2
vpsrlq $4, %xmm3, %xmm3
vpsrldq $8, %xmm4, %xmm4
vpsrldq $8, %xmm5, %xmm5
vpsrldq $8, %xmm6, %xmm6
vpsrldq $8, %xmm7, %xmm7
vpor %xmm4, %xmm0, %xmm0
vpor %xmm5, %xmm1, %xmm1
vpor %xmm6, %xmm2, %xmm2
vpor %xmm7, %xmm3, %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vmovdqu %xmm0, 320(%rsi)
vmovdqu %xmm1, 336(%rsi)
vmovdqu %xmm2, 352(%rsi)
vmovdqu %xmm3, 368(%rsi)
vmovdqu 128(%rsi), %xmm0
vmovdqu 144(%rsi), %xmm1
vmovdqu 160(%rsi), %xmm2
vmovdqu 176(%rsi), %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vpsllq $60, %xmm0, %xmm4
vpsllq $60, %xmm1, %xmm5
vpsllq $60, %xmm2, %xmm6
vpsllq $60, %xmm3, %xmm7
vpsrlq $4, %xmm0, %xmm0
vpsrlq $4, %xmm1, %xmm1
vpsrlq $4, %xmm2, %xmm2
vpsrlq $4, %xmm3, %xmm3
vpsrldq $8, %xmm4, %xmm4
vpsrldq $8, %xmm5, %xmm5
vpsrldq $8, %xmm6, %xmm6
vpsrldq $8, %xmm7, %xmm7
vpor %xmm4, %xmm0, %xmm0
vpor %xmm5, %xmm1, %xmm1
vpor %xmm6, %xmm2, %xmm2
vpor %xmm7, %xmm3, %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vmovdqu %xmm0, 384(%rsi)
vmovdqu %xmm1, 400(%rsi)
vmovdqu %xmm2, 416(%rsi)
vmovdqu %xmm3, 432(%rsi)
vmovdqu 192(%rsi), %xmm0
vmovdqu 208(%rsi), %xmm1
vmovdqu 224(%rsi), %xmm2
vmovdqu 240(%rsi), %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vpsllq $60, %xmm0, %xmm4
vpsllq $60, %xmm1, %xmm5
vpsllq $60, %xmm2, %xmm6
vpsllq $60, %xmm3, %xmm7
vpsrlq $4, %xmm0, %xmm0
vpsrlq $4, %xmm1, %xmm1
vpsrlq $4, %xmm2, %xmm2
vpsrlq $4, %xmm3, %xmm3
vpsrldq $8, %xmm4, %xmm4
vpsrldq $8, %xmm5, %xmm5
vpsrldq $8, %xmm6, %xmm6
vpsrldq $8, %xmm7, %xmm7
vpor %xmm4, %xmm0, %xmm0
vpor %xmm5, %xmm1, %xmm1
vpor %xmm6, %xmm2, %xmm2
vpor %xmm7, %xmm3, %xmm3
vpshufb %xmm9, %xmm0, %xmm0
vpshufb %xmm9, %xmm1, %xmm1
vpshufb %xmm9, %xmm2, %xmm2
vpshufb %xmm9, %xmm3, %xmm3
vmovdqu %xmm0, 448(%rsi)
vmovdqu %xmm1, 464(%rsi)
vmovdqu %xmm2, 480(%rsi)
vmovdqu %xmm3, 496(%rsi)
repz retq
#ifndef __APPLE__
.size GCM_generate_m0_avx2,.-GCM_generate_m0_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_aes_gcm_one:
.quad 0x0, 0x1
#ifndef __APPLE__

View File

@ -40,6 +40,259 @@ IFNDEF _WIN64
_WIN64 = 1
ENDIF
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_aesni_rev8 QWORD 579005069656919567, 283686952306183
ptr_L_GCM_generate_m0_aesni_rev8 QWORD L_GCM_generate_m0_aesni_rev8
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_aesni_mod2_128 QWORD 0, 16212958658533785600
ptr_L_GCM_generate_m0_aesni_mod2_128 QWORD L_GCM_generate_m0_aesni_mod2_128
_DATA ENDS
_text SEGMENT READONLY PARA
GCM_generate_m0_aesni PROC
sub rsp, 80
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
movdqu OWORD PTR [rsp+32], xmm8
movdqu OWORD PTR [rsp+48], xmm9
movdqu OWORD PTR [rsp+64], xmm10
movdqu xmm9, OWORD PTR L_GCM_generate_m0_aesni_rev8
movdqu xmm10, OWORD PTR L_GCM_generate_m0_aesni_mod2_128
pxor xmm8, xmm8
movdqu xmm0, OWORD PTR [rcx]
movdqu OWORD PTR [rdx], xmm8
movdqu xmm8, xmm0
pshufb xmm0, xmm9
movdqu xmm5, xmm0
movdqu xmm4, xmm0
psllq xmm5, 63
psrlq xmm4, 1
movdqu xmm1, xmm5
pslldq xmm1, 8
psrldq xmm5, 8
pshufd xmm1, xmm1, 255
por xmm4, xmm5
psrad xmm1, 31
pand xmm1, xmm10
pxor xmm1, xmm4
movdqu xmm5, xmm1
movdqu xmm4, xmm1
psllq xmm5, 63
psrlq xmm4, 1
movdqu xmm2, xmm5
pslldq xmm2, 8
psrldq xmm5, 8
pshufd xmm2, xmm2, 255
por xmm4, xmm5
psrad xmm2, 31
pand xmm2, xmm10
pxor xmm2, xmm4
movdqu xmm5, xmm2
movdqu xmm4, xmm2
psllq xmm5, 63
psrlq xmm4, 1
movdqu xmm3, xmm5
pslldq xmm3, 8
psrldq xmm5, 8
pshufd xmm3, xmm3, 255
por xmm4, xmm5
psrad xmm3, 31
pand xmm3, xmm10
pxor xmm3, xmm4
pshufb xmm3, xmm9
pshufb xmm2, xmm9
movdqu xmm8, xmm3
pshufb xmm1, xmm9
pshufb xmm0, xmm9
pxor xmm8, xmm2
movdqu OWORD PTR [rdx+16], xmm3
movdqu OWORD PTR [rdx+32], xmm2
movdqu OWORD PTR [rdx+48], xmm8
movdqu OWORD PTR [rdx+64], xmm1
movdqu xmm4, xmm3
movdqu xmm5, xmm2
movdqu xmm6, xmm8
pxor xmm4, xmm1
pxor xmm5, xmm1
pxor xmm6, xmm1
movdqu OWORD PTR [rdx+80], xmm4
movdqu OWORD PTR [rdx+96], xmm5
movdqu OWORD PTR [rdx+112], xmm6
movdqu OWORD PTR [rdx+128], xmm0
pxor xmm1, xmm0
movdqu xmm4, xmm3
movdqu xmm6, xmm2
pxor xmm4, xmm0
pxor xmm6, xmm0
movdqu OWORD PTR [rdx+144], xmm4
movdqu OWORD PTR [rdx+160], xmm6
pxor xmm6, xmm3
movdqu OWORD PTR [rdx+176], xmm6
movdqu OWORD PTR [rdx+192], xmm1
movdqu xmm4, xmm3
movdqu xmm5, xmm2
movdqu xmm6, xmm8
pxor xmm4, xmm1
pxor xmm5, xmm1
pxor xmm6, xmm1
movdqu OWORD PTR [rdx+208], xmm4
movdqu OWORD PTR [rdx+224], xmm5
movdqu OWORD PTR [rdx+240], xmm6
movdqu xmm0, OWORD PTR [rdx]
movdqu xmm1, OWORD PTR [rdx+16]
movdqu xmm2, OWORD PTR [rdx+32]
movdqu xmm3, OWORD PTR [rdx+48]
pshufb xmm0, xmm9
pshufb xmm1, xmm9
pshufb xmm2, xmm9
pshufb xmm3, xmm9
movdqu xmm4, xmm0
movdqu xmm5, xmm1
movdqu xmm6, xmm2
movdqu xmm7, xmm3
psllq xmm4, 60
psllq xmm5, 60
psllq xmm6, 60
psllq xmm7, 60
psrlq xmm0, 4
psrlq xmm1, 4
psrlq xmm2, 4
psrlq xmm3, 4
psrldq xmm4, 8
psrldq xmm5, 8
psrldq xmm6, 8
psrldq xmm7, 8
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
movdqu OWORD PTR [rdx+256], xmm0
movdqu OWORD PTR [rdx+272], xmm1
movdqu OWORD PTR [rdx+288], xmm2
movdqu OWORD PTR [rdx+304], xmm3
movdqu xmm0, OWORD PTR [rdx+64]
movdqu xmm1, OWORD PTR [rdx+80]
movdqu xmm2, OWORD PTR [rdx+96]
movdqu xmm3, OWORD PTR [rdx+112]
pshufb xmm0, xmm9
pshufb xmm1, xmm9
pshufb xmm2, xmm9
pshufb xmm3, xmm9
movdqu xmm4, xmm0
movdqu xmm5, xmm1
movdqu xmm6, xmm2
movdqu xmm7, xmm3
psllq xmm4, 60
psllq xmm5, 60
psllq xmm6, 60
psllq xmm7, 60
psrlq xmm0, 4
psrlq xmm1, 4
psrlq xmm2, 4
psrlq xmm3, 4
psrldq xmm4, 8
psrldq xmm5, 8
psrldq xmm6, 8
psrldq xmm7, 8
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
movdqu OWORD PTR [rdx+320], xmm0
movdqu OWORD PTR [rdx+336], xmm1
movdqu OWORD PTR [rdx+352], xmm2
movdqu OWORD PTR [rdx+368], xmm3
movdqu xmm0, OWORD PTR [rdx+128]
movdqu xmm1, OWORD PTR [rdx+144]
movdqu xmm2, OWORD PTR [rdx+160]
movdqu xmm3, OWORD PTR [rdx+176]
pshufb xmm0, xmm9
pshufb xmm1, xmm9
pshufb xmm2, xmm9
pshufb xmm3, xmm9
movdqu xmm4, xmm0
movdqu xmm5, xmm1
movdqu xmm6, xmm2
movdqu xmm7, xmm3
psllq xmm4, 60
psllq xmm5, 60
psllq xmm6, 60
psllq xmm7, 60
psrlq xmm0, 4
psrlq xmm1, 4
psrlq xmm2, 4
psrlq xmm3, 4
psrldq xmm4, 8
psrldq xmm5, 8
psrldq xmm6, 8
psrldq xmm7, 8
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
movdqu OWORD PTR [rdx+384], xmm0
movdqu OWORD PTR [rdx+400], xmm1
movdqu OWORD PTR [rdx+416], xmm2
movdqu OWORD PTR [rdx+432], xmm3
movdqu xmm0, OWORD PTR [rdx+192]
movdqu xmm1, OWORD PTR [rdx+208]
movdqu xmm2, OWORD PTR [rdx+224]
movdqu xmm3, OWORD PTR [rdx+240]
pshufb xmm0, xmm9
pshufb xmm1, xmm9
pshufb xmm2, xmm9
pshufb xmm3, xmm9
movdqu xmm4, xmm0
movdqu xmm5, xmm1
movdqu xmm6, xmm2
movdqu xmm7, xmm3
psllq xmm4, 60
psllq xmm5, 60
psllq xmm6, 60
psllq xmm7, 60
psrlq xmm0, 4
psrlq xmm1, 4
psrlq xmm2, 4
psrlq xmm3, 4
psrldq xmm4, 8
psrldq xmm5, 8
psrldq xmm6, 8
psrldq xmm7, 8
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
movdqu OWORD PTR [rdx+448], xmm0
movdqu OWORD PTR [rdx+464], xmm1
movdqu OWORD PTR [rdx+480], xmm2
movdqu OWORD PTR [rdx+496], xmm3
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
movdqu xmm8, OWORD PTR [rsp+32]
movdqu xmm9, OWORD PTR [rsp+48]
movdqu xmm10, OWORD PTR [rsp+64]
add rsp, 80
ret
GCM_generate_m0_aesni ENDP
_text ENDS
_DATA SEGMENT
ALIGN 16
L_aes_gcm_one QWORD 0, 1
@ -6205,6 +6458,225 @@ _text ENDS
IFDEF HAVE_INTEL_AVX1
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_avx1_rev8 QWORD 579005069656919567, 283686952306183
ptr_L_GCM_generate_m0_avx1_rev8 QWORD L_GCM_generate_m0_avx1_rev8
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_avx1_mod2_128 QWORD 0, 16212958658533785600
ptr_L_GCM_generate_m0_avx1_mod2_128 QWORD L_GCM_generate_m0_avx1_mod2_128
_DATA ENDS
_text SEGMENT READONLY PARA
GCM_generate_m0_avx1 PROC
sub rsp, 80
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu xmm9, OWORD PTR L_GCM_generate_m0_avx1_rev8
vmovdqu xmm10, OWORD PTR L_GCM_generate_m0_avx1_mod2_128
vpxor xmm8, xmm8, xmm8
vmovdqu xmm0, OWORD PTR [rcx]
vmovdqu OWORD PTR [rdx], xmm8
vmovdqu xmm8, xmm0
vpshufb xmm0, xmm0, xmm9
vpsllq xmm5, xmm0, 63
vpsrlq xmm4, xmm0, 1
vpslldq xmm1, xmm5, 8
vpsrldq xmm5, xmm5, 8
vpshufd xmm1, xmm1, 255
vpor xmm4, xmm4, xmm5
vpsrad xmm1, xmm1, 31
vpand xmm1, xmm1, xmm10
vpxor xmm1, xmm1, xmm4
vpsllq xmm5, xmm1, 63
vpsrlq xmm4, xmm1, 1
vpslldq xmm2, xmm5, 8
vpsrldq xmm5, xmm5, 8
vpshufd xmm2, xmm2, 255
vpor xmm4, xmm4, xmm5
vpsrad xmm2, xmm2, 31
vpand xmm2, xmm2, xmm10
vpxor xmm2, xmm2, xmm4
vpsllq xmm5, xmm2, 63
vpsrlq xmm4, xmm2, 1
vpslldq xmm3, xmm5, 8
vpsrldq xmm5, xmm5, 8
vpshufd xmm3, xmm3, 255
vpor xmm4, xmm4, xmm5
vpsrad xmm3, xmm3, 31
vpand xmm3, xmm3, xmm10
vpxor xmm3, xmm3, xmm4
vpshufb xmm3, xmm3, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm0, xmm0, xmm9
vpxor xmm8, xmm3, xmm2
vmovdqu OWORD PTR [rdx+16], xmm3
vmovdqu OWORD PTR [rdx+32], xmm2
vmovdqu OWORD PTR [rdx+48], xmm8
vmovdqu OWORD PTR [rdx+64], xmm1
vpxor xmm4, xmm3, xmm1
vpxor xmm5, xmm2, xmm1
vpxor xmm6, xmm8, xmm1
vmovdqu OWORD PTR [rdx+80], xmm4
vmovdqu OWORD PTR [rdx+96], xmm5
vmovdqu OWORD PTR [rdx+112], xmm6
vmovdqu OWORD PTR [rdx+128], xmm0
vpxor xmm1, xmm1, xmm0
vpxor xmm4, xmm3, xmm0
vpxor xmm6, xmm2, xmm0
vmovdqu OWORD PTR [rdx+144], xmm4
vmovdqu OWORD PTR [rdx+160], xmm6
vpxor xmm6, xmm3, xmm6
vmovdqu OWORD PTR [rdx+176], xmm6
vmovdqu OWORD PTR [rdx+192], xmm1
vpxor xmm4, xmm3, xmm1
vpxor xmm5, xmm2, xmm1
vpxor xmm6, xmm8, xmm1
vmovdqu OWORD PTR [rdx+208], xmm4
vmovdqu OWORD PTR [rdx+224], xmm5
vmovdqu OWORD PTR [rdx+240], xmm6
vmovdqu xmm0, OWORD PTR [rdx]
vmovdqu xmm1, OWORD PTR [rdx+16]
vmovdqu xmm2, OWORD PTR [rdx+32]
vmovdqu xmm3, OWORD PTR [rdx+48]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+256], xmm0
vmovdqu OWORD PTR [rdx+272], xmm1
vmovdqu OWORD PTR [rdx+288], xmm2
vmovdqu OWORD PTR [rdx+304], xmm3
vmovdqu xmm0, OWORD PTR [rdx+64]
vmovdqu xmm1, OWORD PTR [rdx+80]
vmovdqu xmm2, OWORD PTR [rdx+96]
vmovdqu xmm3, OWORD PTR [rdx+112]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+320], xmm0
vmovdqu OWORD PTR [rdx+336], xmm1
vmovdqu OWORD PTR [rdx+352], xmm2
vmovdqu OWORD PTR [rdx+368], xmm3
vmovdqu xmm0, OWORD PTR [rdx+128]
vmovdqu xmm1, OWORD PTR [rdx+144]
vmovdqu xmm2, OWORD PTR [rdx+160]
vmovdqu xmm3, OWORD PTR [rdx+176]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+384], xmm0
vmovdqu OWORD PTR [rdx+400], xmm1
vmovdqu OWORD PTR [rdx+416], xmm2
vmovdqu OWORD PTR [rdx+432], xmm3
vmovdqu xmm0, OWORD PTR [rdx+192]
vmovdqu xmm1, OWORD PTR [rdx+208]
vmovdqu xmm2, OWORD PTR [rdx+224]
vmovdqu xmm3, OWORD PTR [rdx+240]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+448], xmm0
vmovdqu OWORD PTR [rdx+464], xmm1
vmovdqu OWORD PTR [rdx+480], xmm2
vmovdqu OWORD PTR [rdx+496], xmm3
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
add rsp, 80
ret
GCM_generate_m0_avx1 ENDP
_text ENDS
_DATA SEGMENT
ALIGN 16
L_avx1_aes_gcm_one QWORD 0, 1
ptr_L_avx1_aes_gcm_one QWORD L_avx1_aes_gcm_one
_DATA ENDS
@ -11436,6 +11908,225 @@ ENDIF
IFDEF HAVE_INTEL_AVX2
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_avx2_rev8 QWORD 579005069656919567, 283686952306183
ptr_L_GCM_generate_m0_avx2_rev8 QWORD L_GCM_generate_m0_avx2_rev8
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_avx2_mod2_128 QWORD 0, 16212958658533785600
ptr_L_GCM_generate_m0_avx2_mod2_128 QWORD L_GCM_generate_m0_avx2_mod2_128
_DATA ENDS
_text SEGMENT READONLY PARA
GCM_generate_m0_avx2 PROC
sub rsp, 80
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vmovdqu OWORD PTR [rsp+32], xmm8
vmovdqu OWORD PTR [rsp+48], xmm9
vmovdqu OWORD PTR [rsp+64], xmm10
vmovdqu xmm9, OWORD PTR L_GCM_generate_m0_avx2_rev8
vmovdqu xmm10, OWORD PTR L_GCM_generate_m0_avx2_mod2_128
vpxor xmm8, xmm8, xmm8
vmovdqu xmm0, OWORD PTR [rcx]
vmovdqu OWORD PTR [rdx], xmm8
vmovdqu xmm8, xmm0
vpshufb xmm0, xmm0, xmm9
vpsllq xmm5, xmm0, 63
vpsrlq xmm4, xmm0, 1
vpslldq xmm1, xmm5, 8
vpsrldq xmm5, xmm5, 8
vpshufd xmm1, xmm1, 255
vpor xmm4, xmm4, xmm5
vpsrad xmm1, xmm1, 31
vpand xmm1, xmm1, xmm10
vpxor xmm1, xmm1, xmm4
vpsllq xmm5, xmm1, 63
vpsrlq xmm4, xmm1, 1
vpslldq xmm2, xmm5, 8
vpsrldq xmm5, xmm5, 8
vpshufd xmm2, xmm2, 255
vpor xmm4, xmm4, xmm5
vpsrad xmm2, xmm2, 31
vpand xmm2, xmm2, xmm10
vpxor xmm2, xmm2, xmm4
vpsllq xmm5, xmm2, 63
vpsrlq xmm4, xmm2, 1
vpslldq xmm3, xmm5, 8
vpsrldq xmm5, xmm5, 8
vpshufd xmm3, xmm3, 255
vpor xmm4, xmm4, xmm5
vpsrad xmm3, xmm3, 31
vpand xmm3, xmm3, xmm10
vpxor xmm3, xmm3, xmm4
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpxor xmm8, xmm3, xmm2
vmovdqu OWORD PTR [rdx+16], xmm3
vmovdqu OWORD PTR [rdx+32], xmm2
vmovdqu OWORD PTR [rdx+48], xmm8
vmovdqu OWORD PTR [rdx+64], xmm1
vpxor xmm4, xmm3, xmm1
vpxor xmm5, xmm2, xmm1
vpxor xmm6, xmm8, xmm1
vmovdqu OWORD PTR [rdx+80], xmm4
vmovdqu OWORD PTR [rdx+96], xmm5
vmovdqu OWORD PTR [rdx+112], xmm6
vmovdqu OWORD PTR [rdx+128], xmm0
vpxor xmm1, xmm1, xmm0
vpxor xmm4, xmm3, xmm0
vpxor xmm6, xmm2, xmm0
vmovdqu OWORD PTR [rdx+144], xmm4
vmovdqu OWORD PTR [rdx+160], xmm6
vpxor xmm6, xmm3, xmm6
vmovdqu OWORD PTR [rdx+176], xmm6
vmovdqu OWORD PTR [rdx+192], xmm1
vpxor xmm4, xmm3, xmm1
vpxor xmm5, xmm2, xmm1
vpxor xmm6, xmm8, xmm1
vmovdqu OWORD PTR [rdx+208], xmm4
vmovdqu OWORD PTR [rdx+224], xmm5
vmovdqu OWORD PTR [rdx+240], xmm6
vmovdqu xmm0, OWORD PTR [rdx]
vmovdqu xmm1, OWORD PTR [rdx+16]
vmovdqu xmm2, OWORD PTR [rdx+32]
vmovdqu xmm3, OWORD PTR [rdx+48]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+256], xmm0
vmovdqu OWORD PTR [rdx+272], xmm1
vmovdqu OWORD PTR [rdx+288], xmm2
vmovdqu OWORD PTR [rdx+304], xmm3
vmovdqu xmm0, OWORD PTR [rdx+64]
vmovdqu xmm1, OWORD PTR [rdx+80]
vmovdqu xmm2, OWORD PTR [rdx+96]
vmovdqu xmm3, OWORD PTR [rdx+112]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+320], xmm0
vmovdqu OWORD PTR [rdx+336], xmm1
vmovdqu OWORD PTR [rdx+352], xmm2
vmovdqu OWORD PTR [rdx+368], xmm3
vmovdqu xmm0, OWORD PTR [rdx+128]
vmovdqu xmm1, OWORD PTR [rdx+144]
vmovdqu xmm2, OWORD PTR [rdx+160]
vmovdqu xmm3, OWORD PTR [rdx+176]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+384], xmm0
vmovdqu OWORD PTR [rdx+400], xmm1
vmovdqu OWORD PTR [rdx+416], xmm2
vmovdqu OWORD PTR [rdx+432], xmm3
vmovdqu xmm0, OWORD PTR [rdx+192]
vmovdqu xmm1, OWORD PTR [rdx+208]
vmovdqu xmm2, OWORD PTR [rdx+224]
vmovdqu xmm3, OWORD PTR [rdx+240]
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vpsllq xmm4, xmm0, 60
vpsllq xmm5, xmm1, 60
vpsllq xmm6, xmm2, 60
vpsllq xmm7, xmm3, 60
vpsrlq xmm0, xmm0, 4
vpsrlq xmm1, xmm1, 4
vpsrlq xmm2, xmm2, 4
vpsrlq xmm3, xmm3, 4
vpsrldq xmm4, xmm4, 8
vpsrldq xmm5, xmm5, 8
vpsrldq xmm6, xmm6, 8
vpsrldq xmm7, xmm7, 8
vpor xmm0, xmm0, xmm4
vpor xmm1, xmm1, xmm5
vpor xmm2, xmm2, xmm6
vpor xmm3, xmm3, xmm7
vpshufb xmm0, xmm0, xmm9
vpshufb xmm1, xmm1, xmm9
vpshufb xmm2, xmm2, xmm9
vpshufb xmm3, xmm3, xmm9
vmovdqu OWORD PTR [rdx+448], xmm0
vmovdqu OWORD PTR [rdx+464], xmm1
vmovdqu OWORD PTR [rdx+480], xmm2
vmovdqu OWORD PTR [rdx+496], xmm3
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
vmovdqu xmm8, OWORD PTR [rsp+32]
vmovdqu xmm9, OWORD PTR [rsp+48]
vmovdqu xmm10, OWORD PTR [rsp+64]
add rsp, 80
ret
GCM_generate_m0_avx2 ENDP
_text ENDS
_DATA SEGMENT
ALIGN 16
L_avx2_aes_gcm_one QWORD 0, 1
ptr_L_avx2_aes_gcm_one QWORD L_avx2_aes_gcm_one
_DATA ENDS

View File

@ -155,76 +155,72 @@ int wc_HmacSizeByType(int type)
return ret;
}
int _InitHmac(Hmac* hmac, int type, void* heap)
static int HmacKeyInitHash(wc_HmacHash* hash, int type, void* heap, int devId)
{
int ret = 0;
#ifdef WOLF_CRYPTO_CB
int devId = hmac->devId;
#else
int devId = INVALID_DEVID;
#endif
switch (type) {
#ifndef NO_MD5
case WC_MD5:
ret = wc_InitMd5_ex(&hmac->hash.md5, heap, devId);
ret = wc_InitMd5_ex(&hash->md5, heap, devId);
break;
#endif /* !NO_MD5 */
#ifndef NO_SHA
case WC_SHA:
ret = wc_InitSha_ex(&hmac->hash.sha, heap, devId);
ret = wc_InitSha_ex(&hash->sha, heap, devId);
break;
#endif /* !NO_SHA */
#ifdef WOLFSSL_SHA224
case WC_SHA224:
ret = wc_InitSha224_ex(&hmac->hash.sha224, heap, devId);
ret = wc_InitSha224_ex(&hash->sha224, heap, devId);
break;
#endif /* WOLFSSL_SHA224 */
#ifndef NO_SHA256
case WC_SHA256:
ret = wc_InitSha256_ex(&hmac->hash.sha256, heap, devId);
ret = wc_InitSha256_ex(&hash->sha256, heap, devId);
break;
#endif /* !NO_SHA256 */
#ifdef WOLFSSL_SHA384
case WC_SHA384:
ret = wc_InitSha384_ex(&hmac->hash.sha384, heap, devId);
ret = wc_InitSha384_ex(&hash->sha384, heap, devId);
break;
#endif /* WOLFSSL_SHA384 */
#ifdef WOLFSSL_SHA512
case WC_SHA512:
ret = wc_InitSha512_ex(&hmac->hash.sha512, heap, devId);
ret = wc_InitSha512_ex(&hash->sha512, heap, devId);
break;
#endif /* WOLFSSL_SHA512 */
#ifdef WOLFSSL_SHA3
#ifndef WOLFSSL_NOSHA3_224
case WC_SHA3_224:
ret = wc_InitSha3_224(&hmac->hash.sha3, heap, devId);
ret = wc_InitSha3_224(&hash->sha3, heap, devId);
break;
#endif
#ifndef WOLFSSL_NOSHA3_256
case WC_SHA3_256:
ret = wc_InitSha3_256(&hmac->hash.sha3, heap, devId);
ret = wc_InitSha3_256(&hash->sha3, heap, devId);
break;
#endif
#ifndef WOLFSSL_NOSHA3_384
case WC_SHA3_384:
ret = wc_InitSha3_384(&hmac->hash.sha3, heap, devId);
ret = wc_InitSha3_384(&hash->sha3, heap, devId);
break;
#endif
#ifndef WOLFSSL_NOSHA3_512
case WC_SHA3_512:
ret = wc_InitSha3_512(&hmac->hash.sha3, heap, devId);
ret = wc_InitSha3_512(&hash->sha3, heap, devId);
break;
#endif
#endif
#ifdef WOLFSSL_SM3
case WC_SM3:
ret = wc_InitSm3(&hmac->hash.sm3, heap, devId);
ret = wc_InitSm3(&hash->sm3, heap, devId);
break;
#endif
@ -233,6 +229,22 @@ int _InitHmac(Hmac* hmac, int type, void* heap)
break;
}
return ret;
}
int _InitHmac(Hmac* hmac, int type, void* heap)
{
int ret;
#ifdef WOLF_CRYPTO_CB
int devId = hmac->devId;
#else
int devId = INVALID_DEVID;
#endif
ret = HmacKeyInitHash(&hmac->hash, type, heap, devId);
if (ret != 0)
return ret;
/* default to NULL heap hint or test value */
#ifdef WOLFSSL_HEAP_TEST
hmac->heap = (void*)WOLFSSL_HEAP_TEST;
@ -243,6 +255,158 @@ int _InitHmac(Hmac* hmac, int type, void* heap)
return ret;
}
#ifdef WOLFSSL_HMAC_COPY_HASH
static int HmacKeyCopyHash(byte macType, wc_HmacHash* src, wc_HmacHash* dst)
{
int ret = 0;
switch (macType) {
#ifndef NO_MD5
case WC_MD5:
ret = wc_Md5Copy(&src->md5, &dst->md5);
break;
#endif /* !NO_MD5 */
#ifndef NO_SHA
case WC_SHA:
ret = wc_ShaCopy(&src->sha, &dst->sha);
break;
#endif /* !NO_SHA */
#ifdef WOLFSSL_SHA224
case WC_SHA224:
ret = wc_Sha224Copy(&src->sha224, &dst->sha224);
break;
#endif /* WOLFSSL_SHA224 */
#ifndef NO_SHA256
case WC_SHA256:
ret = wc_Sha256Copy(&src->sha256, &dst->sha256);
break;
#endif /* !NO_SHA256 */
#ifdef WOLFSSL_SHA384
case WC_SHA384:
ret = wc_Sha384Copy(&src->sha384, &dst->sha384);
break;
#endif /* WOLFSSL_SHA384 */
#ifdef WOLFSSL_SHA512
case WC_SHA512:
ret = wc_Sha512Copy(&src->sha512, &dst->sha512);
break;
#endif /* WOLFSSL_SHA512 */
#ifdef WOLFSSL_SHA3
#ifndef WOLFSSL_NOSHA3_224
case WC_SHA3_224:
ret = wc_Sha3_224_Copy(&src->sha3, &dst->sha3);
break;
#endif
#ifndef WOLFSSL_NOSHA3_256
case WC_SHA3_256:
ret = wc_Sha3_256_Copy(&src->sha3, &dst->sha3);
break;
#endif
#ifndef WOLFSSL_NOSHA3_384
case WC_SHA3_384:
ret = wc_Sha3_384_Copy(&src->sha3, &dst->sha3);
break;
#endif
#ifndef WOLFSSL_NOSHA3_512
case WC_SHA3_512:
ret = wc_Sha3_512_Copy(&src->sha3, &dst->sha3);
break;
#endif
#endif /* WOLFSSL_SHA3 */
#ifdef WOLFSSL_SM3
case WC_SM3:
ret = wc_Sm3Copy(&src->sm3, &dst->sm3);
break;
#endif
default:
break;
}
return ret;
}
#endif
static int HmacKeyHashUpdate(byte macType, wc_HmacHash* hash, byte* pad)
{
int ret = 0;
switch (macType) {
#ifndef NO_MD5
case WC_MD5:
ret = wc_Md5Update(&hash->md5, pad, WC_MD5_BLOCK_SIZE);
break;
#endif /* !NO_MD5 */
#ifndef NO_SHA
case WC_SHA:
ret = wc_ShaUpdate(&hash->sha, pad, WC_SHA_BLOCK_SIZE);
break;
#endif /* !NO_SHA */
#ifdef WOLFSSL_SHA224
case WC_SHA224:
ret = wc_Sha224Update(&hash->sha224, pad, WC_SHA224_BLOCK_SIZE);
break;
#endif /* WOLFSSL_SHA224 */
#ifndef NO_SHA256
case WC_SHA256:
ret = wc_Sha256Update(&hash->sha256, pad, WC_SHA256_BLOCK_SIZE);
break;
#endif /* !NO_SHA256 */
#ifdef WOLFSSL_SHA384
case WC_SHA384:
ret = wc_Sha384Update(&hash->sha384, pad, WC_SHA384_BLOCK_SIZE);
break;
#endif /* WOLFSSL_SHA384 */
#ifdef WOLFSSL_SHA512
case WC_SHA512:
ret = wc_Sha512Update(&hash->sha512, pad, WC_SHA512_BLOCK_SIZE);
break;
#endif /* WOLFSSL_SHA512 */
#ifdef WOLFSSL_SHA3
#ifndef WOLFSSL_NOSHA3_224
case WC_SHA3_224:
ret = wc_Sha3_224_Update(&hash->sha3, pad, WC_SHA3_224_BLOCK_SIZE);
break;
#endif
#ifndef WOLFSSL_NOSHA3_256
case WC_SHA3_256:
ret = wc_Sha3_256_Update(&hash->sha3, pad, WC_SHA3_256_BLOCK_SIZE);
break;
#endif
#ifndef WOLFSSL_NOSHA3_384
case WC_SHA3_384:
ret = wc_Sha3_384_Update(&hash->sha3, pad, WC_SHA3_384_BLOCK_SIZE);
break;
#endif
#ifndef WOLFSSL_NOSHA3_512
case WC_SHA3_512:
ret = wc_Sha3_512_Update(&hash->sha3, pad, WC_SHA3_512_BLOCK_SIZE);
break;
#endif
#endif /* WOLFSSL_SHA3 */
#ifdef WOLFSSL_SM3
case WC_SM3:
ret = wc_Sm3Update(&hash->sm3, pad, WC_SM3_BLOCK_SIZE);
break;
#endif
default:
break;
}
return ret;
}
int wc_HmacSetKey_ex(Hmac* hmac, int type, const byte* key, word32 length,
int allowFlag)
@ -603,6 +767,29 @@ int wc_HmacSetKey_ex(Hmac* hmac, int type, const byte* key, word32 length,
}
}
#ifdef WOLFSSL_HMAC_COPY_HASH
if ( ret == 0) {
#ifdef WOLF_CRYPTO_CB
int devId = hmac->devId;
#else
int devId = INVALID_DEVID;
#endif
ret = HmacKeyInitHash(&hmac->i_hash, hmac->macType, heap, devId);
if (ret != 0)
return ret;
ret = HmacKeyInitHash(&hmac->o_hash, hmac->macType, heap, devId);
if (ret != 0)
return ret;
ret = HmacKeyHashUpdate(hmac->macType, &hmac->i_hash, ip);
if (ret != 0)
return ret;
ret = HmacKeyHashUpdate(hmac->macType, &hmac->o_hash, op);
if (ret != 0)
return ret;
}
#endif
return ret;
#endif /* WOLFSSL_MAXQ108X */
}
@ -618,96 +805,6 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
return wc_HmacSetKey_ex(hmac, type, key, length, allowFlag);
}
static int HmacKeyInnerHash(Hmac* hmac)
{
int ret = 0;
switch (hmac->macType) {
#ifndef NO_MD5
case WC_MD5:
ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->ipad,
WC_MD5_BLOCK_SIZE);
break;
#endif /* !NO_MD5 */
#ifndef NO_SHA
case WC_SHA:
ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->ipad,
WC_SHA_BLOCK_SIZE);
break;
#endif /* !NO_SHA */
#ifdef WOLFSSL_SHA224
case WC_SHA224:
ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->ipad,
WC_SHA224_BLOCK_SIZE);
break;
#endif /* WOLFSSL_SHA224 */
#ifndef NO_SHA256
case WC_SHA256:
ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->ipad,
WC_SHA256_BLOCK_SIZE);
break;
#endif /* !NO_SHA256 */
#ifdef WOLFSSL_SHA384
case WC_SHA384:
ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->ipad,
WC_SHA384_BLOCK_SIZE);
break;
#endif /* WOLFSSL_SHA384 */
#ifdef WOLFSSL_SHA512
case WC_SHA512:
ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->ipad,
WC_SHA512_BLOCK_SIZE);
break;
#endif /* WOLFSSL_SHA512 */
#ifdef WOLFSSL_SHA3
#ifndef WOLFSSL_NOSHA3_224
case WC_SHA3_224:
ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
WC_SHA3_224_BLOCK_SIZE);
break;
#endif
#ifndef WOLFSSL_NOSHA3_256
case WC_SHA3_256:
ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
WC_SHA3_256_BLOCK_SIZE);
break;
#endif
#ifndef WOLFSSL_NOSHA3_384
case WC_SHA3_384:
ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
WC_SHA3_384_BLOCK_SIZE);
break;
#endif
#ifndef WOLFSSL_NOSHA3_512
case WC_SHA3_512:
ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
WC_SHA3_512_BLOCK_SIZE);
break;
#endif
#endif /* WOLFSSL_SHA3 */
#ifdef WOLFSSL_SM3
case WC_SM3:
ret = wc_Sm3Update(&hmac->hash.sm3, (byte*)hmac->ipad,
WC_SM3_BLOCK_SIZE);
break;
#endif
default:
break;
}
if (ret == 0)
hmac->innerHashKeyed = WC_HMAC_INNER_HASH_KEYED_SW;
return ret;
}
int wc_HmacUpdate(Hmac* hmac, const byte* msg, word32 length)
{
int ret = 0;
@ -739,9 +836,14 @@ int wc_HmacUpdate(Hmac* hmac, const byte* msg, word32 length)
#endif /* WOLFSSL_ASYNC_CRYPT */
if (!hmac->innerHashKeyed) {
ret = HmacKeyInnerHash(hmac);
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = HmacKeyHashUpdate(hmac->macType, &hmac->hash, (byte*)hmac->ipad);
#else
ret = HmacKeyCopyHash(hmac->macType, &hmac->i_hash, &hmac->hash);
#endif
if (ret != 0)
return ret;
hmac->innerHashKeyed = WC_HMAC_INNER_HASH_KEYED_SW;
}
switch (hmac->macType) {
@ -851,9 +953,14 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
#endif /* WOLFSSL_ASYNC_CRYPT */
if (!hmac->innerHashKeyed) {
ret = HmacKeyInnerHash(hmac);
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = HmacKeyHashUpdate(hmac->macType, &hmac->hash, (byte*)hmac->ipad);
#else
ret = HmacKeyCopyHash(hmac->macType, &hmac->i_hash, &hmac->hash);
#endif
if (ret != 0)
return ret;
hmac->innerHashKeyed = WC_HMAC_INNER_HASH_KEYED_SW;
}
switch (hmac->macType) {
@ -862,8 +969,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Md5Final(&hmac->hash.md5, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->opad,
WC_MD5_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_MD5, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->innerHash,
@ -879,8 +990,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_ShaFinal(&hmac->hash.sha, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->opad,
WC_SHA_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->innerHash,
@ -896,8 +1011,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha224Final(&hmac->hash.sha224, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->opad,
WC_SHA224_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA224, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->innerHash,
@ -914,8 +1033,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha256Final(&hmac->hash.sha256, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->opad,
WC_SHA256_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA256, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->innerHash,
@ -931,8 +1054,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha384Final(&hmac->hash.sha384, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->opad,
WC_SHA384_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA384, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->innerHash,
@ -947,8 +1074,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha512Final(&hmac->hash.sha512, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->opad,
WC_SHA512_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA512, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->innerHash,
@ -965,8 +1096,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha3_224_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->opad,
WC_SHA3_224_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA3_224, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
@ -981,8 +1116,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha3_256_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->opad,
WC_SHA3_256_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA3_256, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
@ -997,8 +1136,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha3_384_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->opad,
WC_SHA3_384_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA3_384, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
@ -1013,8 +1156,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha3_512_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->opad,
WC_SHA3_512_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA3_512, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
@ -1031,8 +1178,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sm3Final(&hmac->hash.sm3, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sm3Update(&hmac->hash.sm3, (byte*)hmac->opad,
WC_SM3_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SM3, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sm3Update(&hmac->hash.sm3, (byte*)hmac->innerHash,
@ -1163,34 +1314,58 @@ void wc_HmacFree(Hmac* hmac)
#ifndef NO_MD5
case WC_MD5:
wc_Md5Free(&hmac->hash.md5);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Md5Free(&hmac->i_hash.md5);
wc_Md5Free(&hmac->o_hash.md5);
#endif
break;
#endif /* !NO_MD5 */
#ifndef NO_SHA
case WC_SHA:
wc_ShaFree(&hmac->hash.sha);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_ShaFree(&hmac->i_hash.sha);
wc_ShaFree(&hmac->o_hash.sha);
#endif
break;
#endif /* !NO_SHA */
#ifdef WOLFSSL_SHA224
case WC_SHA224:
wc_Sha224Free(&hmac->hash.sha224);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha224Free(&hmac->i_hash.sha224);
wc_Sha224Free(&hmac->o_hash.sha224);
#endif
break;
#endif /* WOLFSSL_SHA224 */
#ifndef NO_SHA256
case WC_SHA256:
wc_Sha256Free(&hmac->hash.sha256);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha256Free(&hmac->i_hash.sha256);
wc_Sha256Free(&hmac->o_hash.sha256);
#endif
break;
#endif /* !NO_SHA256 */
#ifdef WOLFSSL_SHA384
case WC_SHA384:
wc_Sha384Free(&hmac->hash.sha384);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha384Free(&hmac->i_hash.sha384);
wc_Sha384Free(&hmac->o_hash.sha384);
#endif
break;
#endif /* WOLFSSL_SHA384 */
#ifdef WOLFSSL_SHA512
case WC_SHA512:
wc_Sha512Free(&hmac->hash.sha512);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha512Free(&hmac->i_hash.sha512);
wc_Sha512Free(&hmac->o_hash.sha512);
#endif
break;
#endif /* WOLFSSL_SHA512 */
@ -1198,21 +1373,37 @@ void wc_HmacFree(Hmac* hmac)
#ifndef WOLFSSL_NOSHA3_224
case WC_SHA3_224:
wc_Sha3_224_Free(&hmac->hash.sha3);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha3_224_Free(&hmac->i_hash.sha3);
wc_Sha3_224_Free(&hmac->o_hash.sha3);
#endif
break;
#endif
#ifndef WOLFSSL_NOSHA3_256
case WC_SHA3_256:
wc_Sha3_256_Free(&hmac->hash.sha3);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha3_256_Free(&hmac->i_hash.sha3);
wc_Sha3_256_Free(&hmac->o_hash.sha3);
#endif
break;
#endif
#ifndef WOLFSSL_NOSHA3_384
case WC_SHA3_384:
wc_Sha3_384_Free(&hmac->hash.sha3);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha3_384_Free(&hmac->i_hash.sha3);
wc_Sha3_384_Free(&hmac->o_hash.sha3);
#endif
break;
#endif
#ifndef WOLFSSL_NOSHA3_512
case WC_SHA3_512:
wc_Sha3_512_Free(&hmac->hash.sha3);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha3_512_Free(&hmac->i_hash.sha3);
wc_Sha3_512_Free(&hmac->o_hash.sha3);
#endif
break;
#endif
#endif /* WOLFSSL_SHA3 */
@ -1220,6 +1411,10 @@ void wc_HmacFree(Hmac* hmac)
#ifdef WOLFSSL_SM3
case WC_SM3:
wc_Sm3Free(&hmac->hash.sm3);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sm3Free(&hmac->i_hash.sm3);
wc_Sm3Free(&hmac->i_hash.sm3);
#endif
break;
#endif

View File

@ -124,6 +124,10 @@ typedef wc_Hashes wc_HmacHash;
/* Hmac digest */
struct Hmac {
wc_HmacHash hash;
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_HmacHash i_hash;
wc_HmacHash o_hash;
#endif
word32 ipad[WC_HMAC_BLOCK_SIZE / sizeof(word32)]; /* same block size all*/
word32 opad[WC_HMAC_BLOCK_SIZE / sizeof(word32)];
word32 innerHash[WC_MAX_DIGEST_SIZE / sizeof(word32)];