Performance improvements

AES-GCM: don't generate M0 when using assembly unless falling back to C
and then use new assembly code.
HMAC: add option to copy hashes (--enable-hash-copy
-DWOLFSSL_HMAC_COPY_HASH) to improve performance when using the same key
for multiple operations.
pull/8429/head
Sean Parkinson 2025-02-10 17:50:26 +10:00
parent 8f131ff3d0
commit ce679ef057
8 changed files with 1061 additions and 167 deletions

View File

@ -648,6 +648,7 @@ WOLFSSL_HARDEN_TLS_ALLOW_OLD_TLS
WOLFSSL_HARDEN_TLS_ALLOW_TRUNCATED_HMAC
WOLFSSL_HARDEN_TLS_NO_PKEY_CHECK
WOLFSSL_HARDEN_TLS_NO_SCR_CHECK
WOLFSSL_HMAC_COPY_HASH
WOLFSSL_HOSTNAME_VERIFY_ALT_NAME_ONLY
WOLFSSL_I2D_ECDSA_SIG_ALLOC
WOLFSSL_IAR_ARM_TIME

View File

@ -295,6 +295,25 @@ AC_ARG_ENABLE([hmac],
[ ENABLED_HMAC=yes ]
)
# enable HMAC hash copying automatically for x86_64 and aarch64 (except Linux kernel module)
HMAC_COPY_DEFAULT=no
if test "$ENABLED_LINUXKM_DEFAULTS" = "no"
then
if test "$host_cpu" = "x86_64" || test "$host_cpu" = "aarch64" || test "$host_cpu" = "amd64"
then
HMAC_COPY_DEFAULT=yes
fi
fi
AC_ARG_ENABLE([hmac-copy],
[AS_HELP_STRING([--enable-hmac-copy],[Enables digest copying implementation for HMAC (default: disabled)])],
[ ENABLED_HMAC_COPY=$enableval ],
[ ENABLED_HMAC_COPY=$HMAC_COPY_DEFAULT ]
)
if test "$ENABLED_HMAC_COPY" = "yes"
then
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_HMAC_COPY_HASH"
fi
AC_ARG_ENABLE([do178],
[AS_HELP_STRING([--enable-do178],[Enable DO-178, Will NOT work w/o DO178 license (default: disabled)])],
[ENABLED_DO178=$enableval],

View File

@ -1193,7 +1193,7 @@ static int lng_index = 0;
#ifndef NO_MAIN_DRIVER
#ifndef MAIN_NO_ARGS
static const char* bench_Usage_msg1[][25] = {
static const char* bench_Usage_msg1[][27] = {
/* 0 English */
{ "-? <num> Help, print this usage\n",
" 0: English, 1: Japanese\n",
@ -1207,6 +1207,8 @@ static const char* bench_Usage_msg1[][25] = {
" (if set via -aad_size) <aad_size> bytes.\n"
),
"-dgst_full Full digest operation performed.\n",
"-mac_final MAC update and final operation timed.\n",
"-aead_set_key Set the key as part of the timing of AEAD ciphers.\n",
"-rsa_sign Measure RSA sign/verify instead of encrypt/decrypt.\n",
"<keySz> -rsa-sz\n Measure RSA <key size> performance.\n",
"-ffhdhe2048 Measure DH using FFDHE 2048-bit parameters.\n",
@ -1240,6 +1242,8 @@ static const char* bench_Usage_msg1[][25] = {
"-aad_size <num> TBD.\n",
"-all_aad TBD.\n",
"-dgst_full フルの digest 暗号操作を実施します。\n",
"-mac_final MAC update and final operation timed.\n",
"-aead_set_key Set the key as part of the timing of AEAD ciphers.\n",
"-rsa_sign 暗号/復号化の代わりに RSA の署名/検証を測定します。\n",
"<keySz> -rsa-sz\n RSA <key size> の性能を測定します。\n",
"-ffhdhe2048 Measure DH using FFDHE 2048-bit parameters.\n",
@ -2056,6 +2060,8 @@ static int numBlocks = NUM_BLOCKS;
static word32 bench_size = BENCH_SIZE;
static int base2 = 1;
static int digest_stream = 1;
static int mac_stream = 1;
static int aead_set_key = 0;
#ifdef HAVE_CHACHA
static int encrypt_only = 0;
#endif
@ -4505,10 +4511,12 @@ static void bench_aesgcm_internal(int useDeviceID,
goto exit;
}
ret = wc_AesGcmSetKey(enc[i], key, keySz);
if (ret != 0) {
printf("AesGcmSetKey failed, ret = %d\n", ret);
goto exit;
if (!aead_set_key) {
ret = wc_AesGcmSetKey(enc[i], key, keySz);
if (ret != 0) {
printf("AesGcmSetKey failed, ret = %d\n", ret);
goto exit;
}
}
}
@ -4522,6 +4530,14 @@ static void bench_aesgcm_internal(int useDeviceID,
for (i = 0; i < BENCH_MAX_PENDING; i++) {
if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(enc[i]), 0,
&times, numBlocks, &pending)) {
if (aead_set_key) {
ret = wc_AesGcmSetKey(enc[i], key, keySz);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(enc[i]), 0,
&times, &pending)) {
goto exit_aes_gcm;
}
}
ret = wc_AesGcmEncrypt(enc[i], bench_cipher,
bench_plain, bench_size,
iv, ivSz, bench_tag, AES_AUTH_TAG_SZ,
@ -4560,10 +4576,12 @@ exit_aes_gcm:
goto exit;
}
ret = wc_AesGcmSetKey(dec[i], key, keySz);
if (ret != 0) {
printf("AesGcmSetKey failed, ret = %d\n", ret);
goto exit;
if (!aead_set_key) {
ret = wc_AesGcmSetKey(dec[i], key, keySz);
if (ret != 0) {
printf("AesGcmSetKey failed, ret = %d\n", ret);
goto exit;
}
}
}
@ -4576,6 +4594,14 @@ exit_aes_gcm:
for (i = 0; i < BENCH_MAX_PENDING; i++) {
if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(dec[i]), 0,
&times, numBlocks, &pending)) {
if (aead_set_key) {
ret = wc_AesGcmSetKey(dec[i], key, keySz);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(dec[i]), 0,
&times, &pending)) {
goto exit_aes_gcm_dec;
}
}
ret = wc_AesGcmDecrypt(dec[i], bench_plain,
bench_cipher, bench_size,
iv, ivSz, bench_tag, AES_AUTH_TAG_SZ,
@ -8300,50 +8326,89 @@ static void bench_hmac(int useDeviceID, int type, int digestSz,
}
}
bench_stats_start(&count, &start);
do {
for (times = 0; times < numBlocks || pending > 0; ) {
bench_async_poll(&pending);
/* while free pending slots in queue, submit ops */
for (i = 0; i < BENCH_MAX_PENDING; i++) {
if (bench_async_check(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
&times, numBlocks, &pending)) {
ret = wc_HmacUpdate(hmac[i], bench_plain, bench_size);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]),
0, &times, &pending)) {
goto exit_hmac;
}
}
} /* for i */
} /* for times */
count += times;
times = 0;
if (mac_stream) {
bench_stats_start(&count, &start);
do {
bench_async_poll(&pending);
for (times = 0; times < numBlocks || pending > 0; ) {
bench_async_poll(&pending);
for (i = 0; i < BENCH_MAX_PENDING; i++) {
if (bench_async_check(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
&times, numBlocks, &pending)) {
ret = wc_HmacFinal(hmac[i], digest[i]);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]),
0, &times, &pending)) {
goto exit_hmac;
/* while free pending slots in queue, submit ops */
for (i = 0; i < BENCH_MAX_PENDING; i++) {
if (bench_async_check(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
&times, numBlocks, &pending)) {
ret = wc_HmacUpdate(hmac[i], bench_plain, bench_size);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]),
0, &times, &pending)) {
goto exit_hmac;
}
}
}
RECORD_MULTI_VALUE_STATS();
} /* for i */
} while (pending > 0);
} while (bench_stats_check(start)
#ifdef MULTI_VALUE_STATISTICS
|| runs < minimum_runs
#endif
);
} /* for i */
} /* for times */
count += times;
times = 0;
do {
bench_async_poll(&pending);
for (i = 0; i < BENCH_MAX_PENDING; i++) {
if (bench_async_check(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
&times, numBlocks, &pending)) {
ret = wc_HmacFinal(hmac[i], digest[i]);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]),
0, &times, &pending)) {
goto exit_hmac;
}
}
RECORD_MULTI_VALUE_STATS();
} /* for i */
} while (pending > 0);
} while (bench_stats_check(start)
#ifdef MULTI_VALUE_STATISTICS
|| runs < minimum_runs
#endif
);
}
else {
bench_stats_start(&count, &start);
do {
for (times = 0; times < numBlocks || pending > 0; ) {
bench_async_poll(&pending);
/* while free pending slots in queue, submit ops */
for (i = 0; i < BENCH_MAX_PENDING; i++) {
if (bench_async_check(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
&times, numBlocks, &pending)) {
ret = wc_HmacUpdate(hmac[i], bench_plain, bench_size);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]),
0, &times, &pending)) {
goto exit_hmac;
}
}
if (bench_async_check(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
&times, numBlocks, &pending)) {
ret = wc_HmacFinal(hmac[i], digest[i]);
if (!bench_async_handle(&ret,
BENCH_ASYNC_GET_DEV(hmac[i]),
0, &times, &pending)) {
goto exit_hmac;
}
}
} /* for i */
} /* for times */
count += times;
} while (bench_stats_check(start)
#ifdef MULTI_VALUE_STATISTICS
|| runs < minimum_runs
#endif
);
}
exit_hmac:
bench_stats_sym_finish(label, useDeviceID, count, bench_size, start, ret);
@ -14989,6 +15054,7 @@ static void Usage(void)
e += 3;
#endif
printf("%s", bench_Usage_msg1[lng_index][e++]); /* option -dgst_full */
printf("%s", bench_Usage_msg1[lng_index][e++]); /* option -mca_final */
#ifndef NO_RSA
printf("%s", bench_Usage_msg1[lng_index][e++]); /* option -ras_sign */
#ifdef WOLFSSL_KEY_GEN
@ -15186,6 +15252,10 @@ int wolfcrypt_benchmark_main(int argc, char** argv)
#endif
else if (string_matches(argv[1], "-dgst_full"))
digest_stream = 0;
else if (string_matches(argv[1], "-mac_final"))
mac_stream = 0;
else if (string_matches(argv[1], "-aead_set_key"))
aead_set_key = 1;
#ifdef HAVE_CHACHA
else if (string_matches(argv[1], "-enc_only"))
encrypt_only = 1;

View File

@ -6633,6 +6633,25 @@ void GenerateM0(Gcm* gcm)
#endif /* GCM_TABLE */
#if defined(WOLFSSL_AESNI) && defined(USE_INTEL_SPEEDUP)
#define HAVE_INTEL_AVX1
#define HAVE_INTEL_AVX2
#endif
#if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT) && \
defined(WC_C_DYNAMIC_FALLBACK)
void GCM_generate_m0_aesni(const unsigned char *h, unsigned char *m)
XASM_LINK("GCM_generate_m0_aesni");
#ifdef HAVE_INTEL_AVX1
void GCM_generate_m0_avx1(const unsigned char *h, unsigned char *m)
XASM_LINK("GCM_generate_m0_avx1");
#endif
#ifdef HAVE_INTEL_AVX2
void GCM_generate_m0_avx2(const unsigned char *h, unsigned char *m)
XASM_LINK("GCM_generate_m0_avx2");
#endif
#endif /* WOLFSSL_AESNI && GCM_TABLE_4BIT && WC_C_DYNAMIC_FALLBACK */
/* Software AES - GCM SetKey */
int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
{
@ -6702,9 +6721,33 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
VECTOR_REGISTERS_POP;
}
if (ret == 0) {
#if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT)
GenerateM0(&aes->gcm);
#endif /* GCM_TABLE */
#if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT)
#if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT)
if (aes->use_aesni) {
#if defined(WC_C_DYNAMIC_FALLBACK)
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_AVX2(intel_flags)) {
GCM_generate_m0_avx2(aes->gcm.H, (byte*)aes->gcm.M0);
}
else
#endif
#if defined(HAVE_INTEL_AVX1)
if (IS_INTEL_AVX1(intel_flags)) {
GCM_generate_m0_avx1(aes->gcm.H, (byte*)aes->gcm.M0);
}
else
#endif
{
GCM_generate_m0_aesni(aes->gcm.H, (byte*)aes->gcm.M0);
}
#endif
}
else
#endif
{
GenerateM0(&aes->gcm);
}
#endif /* GCM_TABLE || GCM_TABLE_4BIT */
}
#endif /* FREESCALE_LTC_AES_GCM */
@ -6727,11 +6770,6 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
#ifdef WOLFSSL_AESNI
#if defined(USE_INTEL_SPEEDUP)
#define HAVE_INTEL_AVX1
#define HAVE_INTEL_AVX2
#endif /* USE_INTEL_SPEEDUP */
void AES_GCM_encrypt_aesni(const unsigned char *in, unsigned char *out,
const unsigned char* addt, const unsigned char* ivec,
unsigned char *tag, word32 nbytes,

View File

@ -56,6 +56,120 @@
#else
.p2align 4
#endif /* __APPLE__ */
L_GCM_generate_m0_aesni_rev8:
.quad 0x8090a0b0c0d0e0f, 0x1020304050607
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_GCM_generate_m0_aesni_mod2_128:
.quad 0x0, 0xe100000000000000
#ifndef __APPLE__
.text
.globl GCM_generate_m0_aesni
.type GCM_generate_m0_aesni,@function
.align 16
GCM_generate_m0_aesni:
#else
.section __TEXT,__text
.globl _GCM_generate_m0_aesni
.p2align 4
_GCM_generate_m0_aesni:
#endif /* __APPLE__ */
pxor %xmm4, %xmm4
movdqu (%rdi), %xmm0
movdqu %xmm4, (%rsi)
movdqu %xmm0, %xmm4
pshufb L_GCM_generate_m0_aesni_rev8(%rip), %xmm0
movdqu %xmm0, %xmm6
movdqu %xmm0, %xmm5
psllq $63, %xmm6
psrlq $0x01, %xmm5
psrldq $8, %xmm6
por %xmm6, %xmm5
pshufd $0xff, %xmm0, %xmm1
psrad $31, %xmm1
pand L_GCM_generate_m0_aesni_mod2_128(%rip), %xmm1
pxor %xmm5, %xmm1
movdqu %xmm1, %xmm6
movdqu %xmm1, %xmm5
psllq $63, %xmm6
psrlq $0x01, %xmm5
psrldq $8, %xmm6
por %xmm6, %xmm5
pshufd $0xff, %xmm1, %xmm2
psrad $31, %xmm2
pand L_GCM_generate_m0_aesni_mod2_128(%rip), %xmm2
pxor %xmm5, %xmm2
movdqu %xmm2, %xmm6
movdqu %xmm2, %xmm5
psllq $63, %xmm6
psrlq $0x01, %xmm5
psrldq $8, %xmm6
por %xmm6, %xmm5
pshufd $0xff, %xmm2, %xmm3
psrad $31, %xmm3
pand L_GCM_generate_m0_aesni_mod2_128(%rip), %xmm3
pxor %xmm5, %xmm3
pshufb L_GCM_generate_m0_aesni_rev8(%rip), %xmm3
pshufb L_GCM_generate_m0_aesni_rev8(%rip), %xmm2
movdqu %xmm3, %xmm4
pshufb L_GCM_generate_m0_aesni_rev8(%rip), %xmm1
pshufb L_GCM_generate_m0_aesni_rev8(%rip), %xmm0
pxor %xmm2, %xmm4
movdqu %xmm3, 16(%rsi)
movdqu %xmm2, 32(%rsi)
movdqu %xmm4, 48(%rsi)
movdqu %xmm1, 64(%rsi)
movdqu %xmm3, %xmm5
movdqu %xmm2, %xmm6
movdqu %xmm4, %xmm7
pxor %xmm1, %xmm5
pxor %xmm1, %xmm6
pxor %xmm1, %xmm7
movdqu %xmm5, 80(%rsi)
movdqu %xmm6, 96(%rsi)
movdqu %xmm7, 112(%rsi)
movdqu %xmm0, 128(%rsi)
pxor %xmm0, %xmm1
movdqu %xmm3, %xmm5
movdqu %xmm2, %xmm7
pxor %xmm0, %xmm5
pxor %xmm0, %xmm7
movdqu %xmm5, 144(%rsi)
movdqu %xmm7, 160(%rsi)
pxor %xmm3, %xmm7
movdqu %xmm7, 176(%rsi)
movdqu %xmm1, 192(%rsi)
movdqu %xmm3, %xmm5
movdqu %xmm2, %xmm6
movdqu %xmm4, %xmm7
pxor %xmm1, %xmm5
pxor %xmm1, %xmm6
pxor %xmm1, %xmm7
movdqu %xmm5, 208(%rsi)
movdqu %xmm6, 224(%rsi)
movdqu %xmm7, 240(%rsi)
repz retq
#ifndef __APPLE__
.size GCM_generate_m0_aesni,.-GCM_generate_m0_aesni
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_aes_gcm_one:
.quad 0x0, 0x1
#ifndef __APPLE__
@ -6221,6 +6335,105 @@ L_AES_GCM_decrypt_final_aesni_cmp_tag_done:
#else
.p2align 4
#endif /* __APPLE__ */
L_GCM_generate_m0_avx1_rev8:
.quad 0x8090a0b0c0d0e0f, 0x1020304050607
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_GCM_generate_m0_avx1_mod2_128:
.quad 0x0, 0xe100000000000000
#ifndef __APPLE__
.text
.globl GCM_generate_m0_avx1
.type GCM_generate_m0_avx1,@function
.align 16
GCM_generate_m0_avx1:
#else
.section __TEXT,__text
.globl _GCM_generate_m0_avx1
.p2align 4
_GCM_generate_m0_avx1:
#endif /* __APPLE__ */
vpxor %xmm4, %xmm4, %xmm4
vmovdqu (%rdi), %xmm0
vmovdqu %xmm4, (%rsi)
vmovdqu %xmm0, %xmm4
vpshufb L_GCM_generate_m0_avx1_rev8(%rip), %xmm0, %xmm0
vpsllq $63, %xmm0, %xmm6
vpsrlq $0x01, %xmm0, %xmm5
vpsrldq $8, %xmm6, %xmm6
vpor %xmm6, %xmm5, %xmm5
vpshufd $0xff, %xmm0, %xmm1
vpsrad $31, %xmm1, %xmm1
vpand L_GCM_generate_m0_avx1_mod2_128(%rip), %xmm1, %xmm1
vpxor %xmm5, %xmm1, %xmm1
vpsllq $63, %xmm1, %xmm6
vpsrlq $0x01, %xmm1, %xmm5
vpsrldq $8, %xmm6, %xmm6
vpor %xmm6, %xmm5, %xmm5
vpshufd $0xff, %xmm1, %xmm2
vpsrad $31, %xmm2, %xmm2
vpand L_GCM_generate_m0_avx1_mod2_128(%rip), %xmm2, %xmm2
vpxor %xmm5, %xmm2, %xmm2
vpsllq $63, %xmm2, %xmm6
vpsrlq $0x01, %xmm2, %xmm5
vpsrldq $8, %xmm6, %xmm6
vpor %xmm6, %xmm5, %xmm5
vpshufd $0xff, %xmm2, %xmm3
vpsrad $31, %xmm3, %xmm3
vpand L_GCM_generate_m0_avx1_mod2_128(%rip), %xmm3, %xmm3
vpxor %xmm5, %xmm3, %xmm3
vpshufb L_GCM_generate_m0_avx1_rev8(%rip), %xmm3, %xmm3
vpshufb L_GCM_generate_m0_avx1_rev8(%rip), %xmm2, %xmm2
vpshufb L_GCM_generate_m0_avx1_rev8(%rip), %xmm1, %xmm1
vpshufb L_GCM_generate_m0_avx1_rev8(%rip), %xmm0, %xmm0
vpxor %xmm2, %xmm3, %xmm4
vmovdqu %xmm3, 16(%rsi)
vmovdqu %xmm2, 32(%rsi)
vmovdqu %xmm4, 48(%rsi)
vmovdqu %xmm1, 64(%rsi)
vpxor %xmm1, %xmm3, %xmm5
vpxor %xmm1, %xmm2, %xmm6
vpxor %xmm1, %xmm4, %xmm7
vmovdqu %xmm5, 80(%rsi)
vmovdqu %xmm6, 96(%rsi)
vmovdqu %xmm7, 112(%rsi)
vmovdqu %xmm0, 128(%rsi)
vpxor %xmm0, %xmm1, %xmm1
vpxor %xmm0, %xmm3, %xmm5
vpxor %xmm0, %xmm2, %xmm7
vmovdqu %xmm5, 144(%rsi)
vmovdqu %xmm7, 160(%rsi)
vpxor %xmm7, %xmm3, %xmm7
vmovdqu %xmm7, 176(%rsi)
vmovdqu %xmm1, 192(%rsi)
vpxor %xmm1, %xmm3, %xmm5
vpxor %xmm1, %xmm2, %xmm6
vpxor %xmm1, %xmm4, %xmm7
vmovdqu %xmm5, 208(%rsi)
vmovdqu %xmm6, 224(%rsi)
vmovdqu %xmm7, 240(%rsi)
repz retq
#ifndef __APPLE__
.size GCM_generate_m0_avx1,.-GCM_generate_m0_avx1
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_aes_gcm_one:
.quad 0x0, 0x1
#ifndef __APPLE__
@ -11454,6 +11667,105 @@ L_AES_GCM_decrypt_final_avx1_cmp_tag_done:
#else
.p2align 4
#endif /* __APPLE__ */
L_GCM_generate_m0_avx2_rev8:
.quad 0x8090a0b0c0d0e0f, 0x1020304050607
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_GCM_generate_m0_avx2_mod2_128:
.quad 0x0, 0xe100000000000000
#ifndef __APPLE__
.text
.globl GCM_generate_m0_avx2
.type GCM_generate_m0_avx2,@function
.align 16
GCM_generate_m0_avx2:
#else
.section __TEXT,__text
.globl _GCM_generate_m0_avx2
.p2align 4
_GCM_generate_m0_avx2:
#endif /* __APPLE__ */
vpxor %xmm4, %xmm4, %xmm4
vmovdqu (%rdi), %xmm0
vmovdqu %xmm4, (%rsi)
vmovdqu %xmm0, %xmm4
vpshufb L_GCM_generate_m0_avx2_rev8(%rip), %xmm0, %xmm0
vpsllq $63, %xmm0, %xmm6
vpsrlq $0x01, %xmm0, %xmm5
vpsrldq $8, %xmm6, %xmm6
vpor %xmm6, %xmm5, %xmm5
vpshufd $0xff, %xmm0, %xmm1
vpsrad $31, %xmm1, %xmm1
vpand L_GCM_generate_m0_avx2_mod2_128(%rip), %xmm1, %xmm1
vpxor %xmm5, %xmm1, %xmm1
vpsllq $63, %xmm1, %xmm6
vpsrlq $0x01, %xmm1, %xmm5
vpsrldq $8, %xmm6, %xmm6
vpor %xmm6, %xmm5, %xmm5
vpshufd $0xff, %xmm1, %xmm2
vpsrad $31, %xmm2, %xmm2
vpand L_GCM_generate_m0_avx2_mod2_128(%rip), %xmm2, %xmm2
vpxor %xmm5, %xmm2, %xmm2
vpsllq $63, %xmm2, %xmm6
vpsrlq $0x01, %xmm2, %xmm5
vpsrldq $8, %xmm6, %xmm6
vpor %xmm6, %xmm5, %xmm5
vpshufd $0xff, %xmm2, %xmm3
vpsrad $31, %xmm3, %xmm3
vpand L_GCM_generate_m0_avx2_mod2_128(%rip), %xmm3, %xmm3
vpxor %xmm5, %xmm3, %xmm3
vpshufb L_GCM_generate_m0_avx2_rev8(%rip), %xmm3, %xmm3
vpshufb L_GCM_generate_m0_avx2_rev8(%rip), %xmm2, %xmm2
vpshufb L_GCM_generate_m0_avx2_rev8(%rip), %xmm1, %xmm1
vpshufb L_GCM_generate_m0_avx2_rev8(%rip), %xmm0, %xmm0
vpxor %xmm2, %xmm3, %xmm4
vmovdqu %xmm3, 16(%rsi)
vmovdqu %xmm2, 32(%rsi)
vmovdqu %xmm4, 48(%rsi)
vmovdqu %xmm1, 64(%rsi)
vpxor %xmm1, %xmm3, %xmm5
vpxor %xmm1, %xmm2, %xmm6
vpxor %xmm1, %xmm4, %xmm7
vmovdqu %xmm5, 80(%rsi)
vmovdqu %xmm6, 96(%rsi)
vmovdqu %xmm7, 112(%rsi)
vmovdqu %xmm0, 128(%rsi)
vpxor %xmm0, %xmm1, %xmm1
vpxor %xmm0, %xmm3, %xmm5
vpxor %xmm0, %xmm2, %xmm7
vmovdqu %xmm5, 144(%rsi)
vmovdqu %xmm7, 160(%rsi)
vpxor %xmm7, %xmm3, %xmm7
vmovdqu %xmm7, 176(%rsi)
vmovdqu %xmm1, 192(%rsi)
vpxor %xmm1, %xmm3, %xmm5
vpxor %xmm1, %xmm2, %xmm6
vpxor %xmm1, %xmm4, %xmm7
vmovdqu %xmm5, 208(%rsi)
vmovdqu %xmm6, 224(%rsi)
vmovdqu %xmm7, 240(%rsi)
repz retq
#ifndef __APPLE__
.size GCM_generate_m0_avx2,.-GCM_generate_m0_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_aes_gcm_one:
.quad 0x0, 0x1
#ifndef __APPLE__

View File

@ -40,6 +40,101 @@ IFNDEF _WIN64
_WIN64 = 1
ENDIF
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_aesni_rev8 QWORD 579005069656919567, 283686952306183
ptr_L_GCM_generate_m0_aesni_rev8 QWORD L_GCM_generate_m0_aesni_rev8
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_aesni_mod2_128 QWORD 0, 16212958658533785600
ptr_L_GCM_generate_m0_aesni_mod2_128 QWORD L_GCM_generate_m0_aesni_mod2_128
_DATA ENDS
_text SEGMENT READONLY PARA
GCM_generate_m0_aesni PROC
sub rsp, 32
movdqu OWORD PTR [rsp], xmm6
movdqu OWORD PTR [rsp+16], xmm7
pxor xmm4, xmm4
movdqu xmm0, OWORD PTR [rcx]
movdqu OWORD PTR [rdx], xmm4
movdqu xmm4, xmm0
pshufb xmm0, OWORD PTR L_GCM_generate_m0_aesni_rev8
movdqu xmm6, xmm0
movdqu xmm5, xmm0
psllq xmm6, 63
psrlq xmm5, 1
psrldq xmm6, 8
por xmm5, xmm6
pshufd xmm1, xmm0, 255
psrad xmm1, 31
pand xmm1, OWORD PTR L_GCM_generate_m0_aesni_mod2_128
pxor xmm1, xmm5
movdqu xmm6, xmm1
movdqu xmm5, xmm1
psllq xmm6, 63
psrlq xmm5, 1
psrldq xmm6, 8
por xmm5, xmm6
pshufd xmm2, xmm1, 255
psrad xmm2, 31
pand xmm2, OWORD PTR L_GCM_generate_m0_aesni_mod2_128
pxor xmm2, xmm5
movdqu xmm6, xmm2
movdqu xmm5, xmm2
psllq xmm6, 63
psrlq xmm5, 1
psrldq xmm6, 8
por xmm5, xmm6
pshufd xmm3, xmm2, 255
psrad xmm3, 31
pand xmm3, OWORD PTR L_GCM_generate_m0_aesni_mod2_128
pxor xmm3, xmm5
pshufb xmm3, OWORD PTR L_GCM_generate_m0_aesni_rev8
pshufb xmm2, OWORD PTR L_GCM_generate_m0_aesni_rev8
movdqu xmm4, xmm3
pshufb xmm1, OWORD PTR L_GCM_generate_m0_aesni_rev8
pshufb xmm0, OWORD PTR L_GCM_generate_m0_aesni_rev8
pxor xmm4, xmm2
movdqu OWORD PTR [rdx+16], xmm3
movdqu OWORD PTR [rdx+32], xmm2
movdqu OWORD PTR [rdx+48], xmm4
movdqu OWORD PTR [rdx+64], xmm1
movdqu xmm5, xmm3
movdqu xmm6, xmm2
movdqu xmm7, xmm4
pxor xmm5, xmm1
pxor xmm6, xmm1
pxor xmm7, xmm1
movdqu OWORD PTR [rdx+80], xmm5
movdqu OWORD PTR [rdx+96], xmm6
movdqu OWORD PTR [rdx+112], xmm7
movdqu OWORD PTR [rdx+128], xmm0
pxor xmm1, xmm0
movdqu xmm5, xmm3
movdqu xmm7, xmm2
pxor xmm5, xmm0
pxor xmm7, xmm0
movdqu OWORD PTR [rdx+144], xmm5
movdqu OWORD PTR [rdx+160], xmm7
pxor xmm7, xmm3
movdqu OWORD PTR [rdx+176], xmm7
movdqu OWORD PTR [rdx+192], xmm1
movdqu xmm5, xmm3
movdqu xmm6, xmm2
movdqu xmm7, xmm4
pxor xmm5, xmm1
pxor xmm6, xmm1
pxor xmm7, xmm1
movdqu OWORD PTR [rdx+208], xmm5
movdqu OWORD PTR [rdx+224], xmm6
movdqu OWORD PTR [rdx+240], xmm7
movdqu xmm6, OWORD PTR [rsp]
movdqu xmm7, OWORD PTR [rsp+16]
add rsp, 32
ret
GCM_generate_m0_aesni ENDP
_text ENDS
_DATA SEGMENT
ALIGN 16
L_aes_gcm_one QWORD 0, 1
@ -6205,6 +6300,86 @@ _text ENDS
IFDEF HAVE_INTEL_AVX1
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_avx1_rev8 QWORD 579005069656919567, 283686952306183
ptr_L_GCM_generate_m0_avx1_rev8 QWORD L_GCM_generate_m0_avx1_rev8
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_avx1_mod2_128 QWORD 0, 16212958658533785600
ptr_L_GCM_generate_m0_avx1_mod2_128 QWORD L_GCM_generate_m0_avx1_mod2_128
_DATA ENDS
_text SEGMENT READONLY PARA
GCM_generate_m0_avx1 PROC
sub rsp, 32
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vpxor xmm4, xmm4, xmm4
vmovdqu xmm0, OWORD PTR [rcx]
vmovdqu OWORD PTR [rdx], xmm4
vmovdqu xmm4, xmm0
vpshufb xmm0, xmm0, OWORD PTR L_GCM_generate_m0_avx1_rev8
vpsllq xmm6, xmm0, 63
vpsrlq xmm5, xmm0, 1
vpsrldq xmm6, xmm6, 8
vpor xmm5, xmm5, xmm6
vpshufd xmm1, xmm0, 255
vpsrad xmm1, xmm1, 31
vpand xmm1, xmm1, OWORD PTR L_GCM_generate_m0_avx1_mod2_128
vpxor xmm1, xmm1, xmm5
vpsllq xmm6, xmm1, 63
vpsrlq xmm5, xmm1, 1
vpsrldq xmm6, xmm6, 8
vpor xmm5, xmm5, xmm6
vpshufd xmm2, xmm1, 255
vpsrad xmm2, xmm2, 31
vpand xmm2, xmm2, OWORD PTR L_GCM_generate_m0_avx1_mod2_128
vpxor xmm2, xmm2, xmm5
vpsllq xmm6, xmm2, 63
vpsrlq xmm5, xmm2, 1
vpsrldq xmm6, xmm6, 8
vpor xmm5, xmm5, xmm6
vpshufd xmm3, xmm2, 255
vpsrad xmm3, xmm3, 31
vpand xmm3, xmm3, OWORD PTR L_GCM_generate_m0_avx1_mod2_128
vpxor xmm3, xmm3, xmm5
vpshufb xmm3, xmm3, OWORD PTR L_GCM_generate_m0_avx1_rev8
vpshufb xmm2, xmm2, OWORD PTR L_GCM_generate_m0_avx1_rev8
vpshufb xmm1, xmm1, OWORD PTR L_GCM_generate_m0_avx1_rev8
vpshufb xmm0, xmm0, OWORD PTR L_GCM_generate_m0_avx1_rev8
vpxor xmm4, xmm3, xmm2
vmovdqu OWORD PTR [rdx+16], xmm3
vmovdqu OWORD PTR [rdx+32], xmm2
vmovdqu OWORD PTR [rdx+48], xmm4
vmovdqu OWORD PTR [rdx+64], xmm1
vpxor xmm5, xmm3, xmm1
vpxor xmm6, xmm2, xmm1
vpxor xmm7, xmm4, xmm1
vmovdqu OWORD PTR [rdx+80], xmm5
vmovdqu OWORD PTR [rdx+96], xmm6
vmovdqu OWORD PTR [rdx+112], xmm7
vmovdqu OWORD PTR [rdx+128], xmm0
vpxor xmm1, xmm1, xmm0
vpxor xmm5, xmm3, xmm0
vpxor xmm7, xmm2, xmm0
vmovdqu OWORD PTR [rdx+144], xmm5
vmovdqu OWORD PTR [rdx+160], xmm7
vpxor xmm7, xmm3, xmm7
vmovdqu OWORD PTR [rdx+176], xmm7
vmovdqu OWORD PTR [rdx+192], xmm1
vpxor xmm5, xmm3, xmm1
vpxor xmm6, xmm2, xmm1
vpxor xmm7, xmm4, xmm1
vmovdqu OWORD PTR [rdx+208], xmm5
vmovdqu OWORD PTR [rdx+224], xmm6
vmovdqu OWORD PTR [rdx+240], xmm7
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
add rsp, 32
ret
GCM_generate_m0_avx1 ENDP
_text ENDS
_DATA SEGMENT
ALIGN 16
L_avx1_aes_gcm_one QWORD 0, 1
ptr_L_avx1_aes_gcm_one QWORD L_avx1_aes_gcm_one
_DATA ENDS
@ -11436,6 +11611,86 @@ ENDIF
IFDEF HAVE_INTEL_AVX2
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_avx2_rev8 QWORD 579005069656919567, 283686952306183
ptr_L_GCM_generate_m0_avx2_rev8 QWORD L_GCM_generate_m0_avx2_rev8
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_GCM_generate_m0_avx2_mod2_128 QWORD 0, 16212958658533785600
ptr_L_GCM_generate_m0_avx2_mod2_128 QWORD L_GCM_generate_m0_avx2_mod2_128
_DATA ENDS
_text SEGMENT READONLY PARA
GCM_generate_m0_avx2 PROC
sub rsp, 32
vmovdqu OWORD PTR [rsp], xmm6
vmovdqu OWORD PTR [rsp+16], xmm7
vpxor xmm4, xmm4, xmm4
vmovdqu xmm0, OWORD PTR [rcx]
vmovdqu OWORD PTR [rdx], xmm4
vmovdqu xmm4, xmm0
vpshufb xmm0, xmm0, OWORD PTR L_GCM_generate_m0_avx2_rev8
vpsllq xmm6, xmm0, 63
vpsrlq xmm5, xmm0, 1
vpsrldq xmm6, xmm6, 8
vpor xmm5, xmm5, xmm6
vpshufd xmm1, xmm0, 255
vpsrad xmm1, xmm1, 31
vpand xmm1, xmm1, OWORD PTR L_GCM_generate_m0_avx2_mod2_128
vpxor xmm1, xmm1, xmm5
vpsllq xmm6, xmm1, 63
vpsrlq xmm5, xmm1, 1
vpsrldq xmm6, xmm6, 8
vpor xmm5, xmm5, xmm6
vpshufd xmm2, xmm1, 255
vpsrad xmm2, xmm2, 31
vpand xmm2, xmm2, OWORD PTR L_GCM_generate_m0_avx2_mod2_128
vpxor xmm2, xmm2, xmm5
vpsllq xmm6, xmm2, 63
vpsrlq xmm5, xmm2, 1
vpsrldq xmm6, xmm6, 8
vpor xmm5, xmm5, xmm6
vpshufd xmm3, xmm2, 255
vpsrad xmm3, xmm3, 31
vpand xmm3, xmm3, OWORD PTR L_GCM_generate_m0_avx2_mod2_128
vpxor xmm3, xmm3, xmm5
vpshufb xmm3, xmm3, OWORD PTR L_GCM_generate_m0_avx2_rev8
vpshufb xmm2, xmm2, OWORD PTR L_GCM_generate_m0_avx2_rev8
vpshufb xmm1, xmm1, OWORD PTR L_GCM_generate_m0_avx2_rev8
vpshufb xmm0, xmm0, OWORD PTR L_GCM_generate_m0_avx2_rev8
vpxor xmm4, xmm3, xmm2
vmovdqu OWORD PTR [rdx+16], xmm3
vmovdqu OWORD PTR [rdx+32], xmm2
vmovdqu OWORD PTR [rdx+48], xmm4
vmovdqu OWORD PTR [rdx+64], xmm1
vpxor xmm5, xmm3, xmm1
vpxor xmm6, xmm2, xmm1
vpxor xmm7, xmm4, xmm1
vmovdqu OWORD PTR [rdx+80], xmm5
vmovdqu OWORD PTR [rdx+96], xmm6
vmovdqu OWORD PTR [rdx+112], xmm7
vmovdqu OWORD PTR [rdx+128], xmm0
vpxor xmm1, xmm1, xmm0
vpxor xmm5, xmm3, xmm0
vpxor xmm7, xmm2, xmm0
vmovdqu OWORD PTR [rdx+144], xmm5
vmovdqu OWORD PTR [rdx+160], xmm7
vpxor xmm7, xmm3, xmm7
vmovdqu OWORD PTR [rdx+176], xmm7
vmovdqu OWORD PTR [rdx+192], xmm1
vpxor xmm5, xmm3, xmm1
vpxor xmm6, xmm2, xmm1
vpxor xmm7, xmm4, xmm1
vmovdqu OWORD PTR [rdx+208], xmm5
vmovdqu OWORD PTR [rdx+224], xmm6
vmovdqu OWORD PTR [rdx+240], xmm7
vmovdqu xmm6, OWORD PTR [rsp]
vmovdqu xmm7, OWORD PTR [rsp+16]
add rsp, 32
ret
GCM_generate_m0_avx2 ENDP
_text ENDS
_DATA SEGMENT
ALIGN 16
L_avx2_aes_gcm_one QWORD 0, 1
ptr_L_avx2_aes_gcm_one QWORD L_avx2_aes_gcm_one
_DATA ENDS

View File

@ -155,76 +155,72 @@ int wc_HmacSizeByType(int type)
return ret;
}
int _InitHmac(Hmac* hmac, int type, void* heap)
static int HmacKeyInitHash(wc_HmacHash* hash, int type, void* heap, int devId)
{
int ret = 0;
#ifdef WOLF_CRYPTO_CB
int devId = hmac->devId;
#else
int devId = INVALID_DEVID;
#endif
switch (type) {
#ifndef NO_MD5
case WC_MD5:
ret = wc_InitMd5_ex(&hmac->hash.md5, heap, devId);
ret = wc_InitMd5_ex(&hash->md5, heap, devId);
break;
#endif /* !NO_MD5 */
#ifndef NO_SHA
case WC_SHA:
ret = wc_InitSha_ex(&hmac->hash.sha, heap, devId);
ret = wc_InitSha_ex(&hash->sha, heap, devId);
break;
#endif /* !NO_SHA */
#ifdef WOLFSSL_SHA224
case WC_SHA224:
ret = wc_InitSha224_ex(&hmac->hash.sha224, heap, devId);
ret = wc_InitSha224_ex(&hash->sha224, heap, devId);
break;
#endif /* WOLFSSL_SHA224 */
#ifndef NO_SHA256
case WC_SHA256:
ret = wc_InitSha256_ex(&hmac->hash.sha256, heap, devId);
ret = wc_InitSha256_ex(&hash->sha256, heap, devId);
break;
#endif /* !NO_SHA256 */
#ifdef WOLFSSL_SHA384
case WC_SHA384:
ret = wc_InitSha384_ex(&hmac->hash.sha384, heap, devId);
ret = wc_InitSha384_ex(&hash->sha384, heap, devId);
break;
#endif /* WOLFSSL_SHA384 */
#ifdef WOLFSSL_SHA512
case WC_SHA512:
ret = wc_InitSha512_ex(&hmac->hash.sha512, heap, devId);
ret = wc_InitSha512_ex(&hash->sha512, heap, devId);
break;
#endif /* WOLFSSL_SHA512 */
#ifdef WOLFSSL_SHA3
#ifndef WOLFSSL_NOSHA3_224
case WC_SHA3_224:
ret = wc_InitSha3_224(&hmac->hash.sha3, heap, devId);
ret = wc_InitSha3_224(&hash->sha3, heap, devId);
break;
#endif
#ifndef WOLFSSL_NOSHA3_256
case WC_SHA3_256:
ret = wc_InitSha3_256(&hmac->hash.sha3, heap, devId);
ret = wc_InitSha3_256(&hash->sha3, heap, devId);
break;
#endif
#ifndef WOLFSSL_NOSHA3_384
case WC_SHA3_384:
ret = wc_InitSha3_384(&hmac->hash.sha3, heap, devId);
ret = wc_InitSha3_384(&hash->sha3, heap, devId);
break;
#endif
#ifndef WOLFSSL_NOSHA3_512
case WC_SHA3_512:
ret = wc_InitSha3_512(&hmac->hash.sha3, heap, devId);
ret = wc_InitSha3_512(&hash->sha3, heap, devId);
break;
#endif
#endif
#ifdef WOLFSSL_SM3
case WC_SM3:
ret = wc_InitSm3(&hmac->hash.sm3, heap, devId);
ret = wc_InitSm3(&hash->sm3, heap, devId);
break;
#endif
@ -233,6 +229,22 @@ int _InitHmac(Hmac* hmac, int type, void* heap)
break;
}
return ret;
}
int _InitHmac(Hmac* hmac, int type, void* heap)
{
int ret;
#ifdef WOLF_CRYPTO_CB
int devId = hmac->devId;
#else
int devId = INVALID_DEVID;
#endif
ret = HmacKeyInitHash(&hmac->hash, type, heap, devId);
if (ret != 0)
return ret;
/* default to NULL heap hint or test value */
#ifdef WOLFSSL_HEAP_TEST
hmac->heap = (void*)WOLFSSL_HEAP_TEST;
@ -243,6 +255,158 @@ int _InitHmac(Hmac* hmac, int type, void* heap)
return ret;
}
#ifdef WOLFSSL_HMAC_COPY_HASH
static int HmacKeyCopyHash(byte macType, wc_HmacHash* src, wc_HmacHash* dst)
{
int ret = 0;
switch (macType) {
#ifndef NO_MD5
case WC_MD5:
ret = wc_Md5Copy(&src->md5, &dst->md5);
break;
#endif /* !NO_MD5 */
#ifndef NO_SHA
case WC_SHA:
ret = wc_ShaCopy(&src->sha, &dst->sha);
break;
#endif /* !NO_SHA */
#ifdef WOLFSSL_SHA224
case WC_SHA224:
ret = wc_Sha224Copy(&src->sha224, &dst->sha224);
break;
#endif /* WOLFSSL_SHA224 */
#ifndef NO_SHA256
case WC_SHA256:
ret = wc_Sha256Copy(&src->sha256, &dst->sha256);
break;
#endif /* !NO_SHA256 */
#ifdef WOLFSSL_SHA384
case WC_SHA384:
ret = wc_Sha384Copy(&src->sha384, &dst->sha384);
break;
#endif /* WOLFSSL_SHA384 */
#ifdef WOLFSSL_SHA512
case WC_SHA512:
ret = wc_Sha512Copy(&src->sha512, &dst->sha512);
break;
#endif /* WOLFSSL_SHA512 */
#ifdef WOLFSSL_SHA3
#ifndef WOLFSSL_NOSHA3_224
case WC_SHA3_224:
ret = wc_Sha3_224_Copy(&src->sha3, &dst->sha3);
break;
#endif
#ifndef WOLFSSL_NOSHA3_256
case WC_SHA3_256:
ret = wc_Sha3_256_Copy(&src->sha3, &dst->sha3);
break;
#endif
#ifndef WOLFSSL_NOSHA3_384
case WC_SHA3_384:
ret = wc_Sha3_384_Copy(&src->sha3, &dst->sha3);
break;
#endif
#ifndef WOLFSSL_NOSHA3_512
case WC_SHA3_512:
ret = wc_Sha3_512_Copy(&src->sha3, &dst->sha3);
break;
#endif
#endif /* WOLFSSL_SHA3 */
#ifdef WOLFSSL_SM3
case WC_SM3:
ret = wc_Sm3Copy(&src->sm3, &dst->sm3);
break;
#endif
default:
break;
}
return ret;
}
#endif
static int HmacKeyHashUpdate(byte macType, wc_HmacHash* hash, byte* pad)
{
int ret = 0;
switch (macType) {
#ifndef NO_MD5
case WC_MD5:
ret = wc_Md5Update(&hash->md5, pad, WC_MD5_BLOCK_SIZE);
break;
#endif /* !NO_MD5 */
#ifndef NO_SHA
case WC_SHA:
ret = wc_ShaUpdate(&hash->sha, pad, WC_SHA_BLOCK_SIZE);
break;
#endif /* !NO_SHA */
#ifdef WOLFSSL_SHA224
case WC_SHA224:
ret = wc_Sha224Update(&hash->sha224, pad, WC_SHA224_BLOCK_SIZE);
break;
#endif /* WOLFSSL_SHA224 */
#ifndef NO_SHA256
case WC_SHA256:
ret = wc_Sha256Update(&hash->sha256, pad, WC_SHA256_BLOCK_SIZE);
break;
#endif /* !NO_SHA256 */
#ifdef WOLFSSL_SHA384
case WC_SHA384:
ret = wc_Sha384Update(&hash->sha384, pad, WC_SHA384_BLOCK_SIZE);
break;
#endif /* WOLFSSL_SHA384 */
#ifdef WOLFSSL_SHA512
case WC_SHA512:
ret = wc_Sha512Update(&hash->sha512, pad, WC_SHA512_BLOCK_SIZE);
break;
#endif /* WOLFSSL_SHA512 */
#ifdef WOLFSSL_SHA3
#ifndef WOLFSSL_NOSHA3_224
case WC_SHA3_224:
ret = wc_Sha3_224_Update(&hash->sha3, pad, WC_SHA3_224_BLOCK_SIZE);
break;
#endif
#ifndef WOLFSSL_NOSHA3_256
case WC_SHA3_256:
ret = wc_Sha3_256_Update(&hash->sha3, pad, WC_SHA3_256_BLOCK_SIZE);
break;
#endif
#ifndef WOLFSSL_NOSHA3_384
case WC_SHA3_384:
ret = wc_Sha3_384_Update(&hash->sha3, pad, WC_SHA3_384_BLOCK_SIZE);
break;
#endif
#ifndef WOLFSSL_NOSHA3_512
case WC_SHA3_512:
ret = wc_Sha3_512_Update(&hash->sha3, pad, WC_SHA3_512_BLOCK_SIZE);
break;
#endif
#endif /* WOLFSSL_SHA3 */
#ifdef WOLFSSL_SM3
case WC_SM3:
ret = wc_Sm3Update(hash->sm3, pad, WC_SM3_BLOCK_SIZE);
break;
#endif
default:
break;
}
return ret;
}
int wc_HmacSetKey_ex(Hmac* hmac, int type, const byte* key, word32 length,
int allowFlag)
@ -603,6 +767,29 @@ int wc_HmacSetKey_ex(Hmac* hmac, int type, const byte* key, word32 length,
}
}
#ifdef WOLFSSL_HMAC_COPY_HASH
if ( ret == 0) {
#ifdef WOLF_CRYPTO_CB
int devId = hmac->devId;
#else
int devId = INVALID_DEVID;
#endif
ret = HmacKeyInitHash(&hmac->i_hash, hmac->macType, heap, devId);
if (ret != 0)
return ret;
ret = HmacKeyInitHash(&hmac->o_hash, hmac->macType, heap, devId);
if (ret != 0)
return ret;
ret = HmacKeyHashUpdate(hmac->macType, &hmac->i_hash, ip);
if (ret != 0)
return ret;
ret = HmacKeyHashUpdate(hmac->macType, &hmac->o_hash, op);
if (ret != 0)
return ret;
}
#endif
return ret;
#endif /* WOLFSSL_MAXQ108X */
}
@ -618,96 +805,6 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
return wc_HmacSetKey_ex(hmac, type, key, length, allowFlag);
}
static int HmacKeyInnerHash(Hmac* hmac)
{
int ret = 0;
switch (hmac->macType) {
#ifndef NO_MD5
case WC_MD5:
ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->ipad,
WC_MD5_BLOCK_SIZE);
break;
#endif /* !NO_MD5 */
#ifndef NO_SHA
case WC_SHA:
ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->ipad,
WC_SHA_BLOCK_SIZE);
break;
#endif /* !NO_SHA */
#ifdef WOLFSSL_SHA224
case WC_SHA224:
ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->ipad,
WC_SHA224_BLOCK_SIZE);
break;
#endif /* WOLFSSL_SHA224 */
#ifndef NO_SHA256
case WC_SHA256:
ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->ipad,
WC_SHA256_BLOCK_SIZE);
break;
#endif /* !NO_SHA256 */
#ifdef WOLFSSL_SHA384
case WC_SHA384:
ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->ipad,
WC_SHA384_BLOCK_SIZE);
break;
#endif /* WOLFSSL_SHA384 */
#ifdef WOLFSSL_SHA512
case WC_SHA512:
ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->ipad,
WC_SHA512_BLOCK_SIZE);
break;
#endif /* WOLFSSL_SHA512 */
#ifdef WOLFSSL_SHA3
#ifndef WOLFSSL_NOSHA3_224
case WC_SHA3_224:
ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
WC_SHA3_224_BLOCK_SIZE);
break;
#endif
#ifndef WOLFSSL_NOSHA3_256
case WC_SHA3_256:
ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
WC_SHA3_256_BLOCK_SIZE);
break;
#endif
#ifndef WOLFSSL_NOSHA3_384
case WC_SHA3_384:
ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
WC_SHA3_384_BLOCK_SIZE);
break;
#endif
#ifndef WOLFSSL_NOSHA3_512
case WC_SHA3_512:
ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
WC_SHA3_512_BLOCK_SIZE);
break;
#endif
#endif /* WOLFSSL_SHA3 */
#ifdef WOLFSSL_SM3
case WC_SM3:
ret = wc_Sm3Update(&hmac->hash.sm3, (byte*)hmac->ipad,
WC_SM3_BLOCK_SIZE);
break;
#endif
default:
break;
}
if (ret == 0)
hmac->innerHashKeyed = WC_HMAC_INNER_HASH_KEYED_SW;
return ret;
}
int wc_HmacUpdate(Hmac* hmac, const byte* msg, word32 length)
{
int ret = 0;
@ -739,9 +836,14 @@ int wc_HmacUpdate(Hmac* hmac, const byte* msg, word32 length)
#endif /* WOLFSSL_ASYNC_CRYPT */
if (!hmac->innerHashKeyed) {
ret = HmacKeyInnerHash(hmac);
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = HmacKeyHashUpdate(hmac->macType, &hmac->hash, (byte*)hmac->ipad);
#else
ret = HmacKeyCopyHash(hmac->macType, &hmac->i_hash, &hmac->hash);
#endif
if (ret != 0)
return ret;
hmac->innerHashKeyed = WC_HMAC_INNER_HASH_KEYED_SW;
}
switch (hmac->macType) {
@ -851,9 +953,14 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
#endif /* WOLFSSL_ASYNC_CRYPT */
if (!hmac->innerHashKeyed) {
ret = HmacKeyInnerHash(hmac);
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = HmacKeyHashUpdate(hmac->macType, &hmac->hash, (byte*)hmac->ipad);
#else
ret = HmacKeyCopyHash(hmac->macType, &hmac->i_hash, &hmac->hash);
#endif
if (ret != 0)
return ret;
hmac->innerHashKeyed = WC_HMAC_INNER_HASH_KEYED_SW;
}
switch (hmac->macType) {
@ -862,8 +969,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Md5Final(&hmac->hash.md5, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->opad,
WC_MD5_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_MD5, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->innerHash,
@ -879,8 +990,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_ShaFinal(&hmac->hash.sha, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->opad,
WC_SHA_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->innerHash,
@ -896,8 +1011,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha224Final(&hmac->hash.sha224, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->opad,
WC_SHA224_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA224, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->innerHash,
@ -914,8 +1033,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha256Final(&hmac->hash.sha256, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->opad,
WC_SHA256_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA256, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->innerHash,
@ -931,8 +1054,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha384Final(&hmac->hash.sha384, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->opad,
WC_SHA384_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA384, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->innerHash,
@ -947,8 +1074,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha512Final(&hmac->hash.sha512, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->opad,
WC_SHA512_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA512, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->innerHash,
@ -965,8 +1096,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha3_224_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->opad,
WC_SHA3_224_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA3_224, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
@ -981,8 +1116,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha3_256_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->opad,
WC_SHA3_256_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA3_256, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
@ -997,8 +1136,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha3_384_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->opad,
WC_SHA3_384_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA3_384, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
@ -1013,8 +1156,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sha3_512_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->opad,
WC_SHA3_512_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SHA3_512, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
@ -1031,8 +1178,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
ret = wc_Sm3Final(&hmac->hash.sm3, (byte*)hmac->innerHash);
if (ret != 0)
break;
#ifndef WOLFSSL_HMAC_COPY_HASH
ret = wc_Sm3Update(&hmac->hash.sm3, (byte*)hmac->opad,
WC_SM3_BLOCK_SIZE);
#else
ret = HmacKeyCopyHash(WC_SM3, &hmac->o_hash, &hmac->hash);
#endif
if (ret != 0)
break;
ret = wc_Sm3Update(&hmac->hash.sm3, (byte*)hmac->innerHash,
@ -1163,34 +1314,58 @@ void wc_HmacFree(Hmac* hmac)
#ifndef NO_MD5
case WC_MD5:
wc_Md5Free(&hmac->hash.md5);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Md5Free(&hmac->i_hash.md5);
wc_Md5Free(&hmac->o_hash.md5);
#endif
break;
#endif /* !NO_MD5 */
#ifndef NO_SHA
case WC_SHA:
wc_ShaFree(&hmac->hash.sha);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_ShaFree(&hmac->i_hash.sha);
wc_ShaFree(&hmac->o_hash.sha);
#endif
break;
#endif /* !NO_SHA */
#ifdef WOLFSSL_SHA224
case WC_SHA224:
wc_Sha224Free(&hmac->hash.sha224);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha224Free(&hmac->i_hash.sha224);
wc_Sha224Free(&hmac->o_hash.sha224);
#endif
break;
#endif /* WOLFSSL_SHA224 */
#ifndef NO_SHA256
case WC_SHA256:
wc_Sha256Free(&hmac->hash.sha256);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha256Free(&hmac->i_hash.sha256);
wc_Sha256Free(&hmac->o_hash.sha256);
#endif
break;
#endif /* !NO_SHA256 */
#ifdef WOLFSSL_SHA384
case WC_SHA384:
wc_Sha384Free(&hmac->hash.sha384);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha384Free(&hmac->i_hash.sha384);
wc_Sha384Free(&hmac->o_hash.sha384);
#endif
break;
#endif /* WOLFSSL_SHA384 */
#ifdef WOLFSSL_SHA512
case WC_SHA512:
wc_Sha512Free(&hmac->hash.sha512);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha512Free(&hmac->i_hash.sha512);
wc_Sha512Free(&hmac->o_hash.sha512);
#endif
break;
#endif /* WOLFSSL_SHA512 */
@ -1198,21 +1373,37 @@ void wc_HmacFree(Hmac* hmac)
#ifndef WOLFSSL_NOSHA3_224
case WC_SHA3_224:
wc_Sha3_224_Free(&hmac->hash.sha3);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha3_224_Free(&hmac->i_hash.sha3);
wc_Sha3_224_Free(&hmac->o_hash.sha3);
#endif
break;
#endif
#ifndef WOLFSSL_NOSHA3_256
case WC_SHA3_256:
wc_Sha3_256_Free(&hmac->hash.sha3);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha3_256_Free(&hmac->i_hash.sha3);
wc_Sha3_256_Free(&hmac->o_hash.sha3);
#endif
break;
#endif
#ifndef WOLFSSL_NOSHA3_384
case WC_SHA3_384:
wc_Sha3_384_Free(&hmac->hash.sha3);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha3_384_Free(&hmac->i_hash.sha3);
wc_Sha3_384_Free(&hmac->o_hash.sha3);
#endif
break;
#endif
#ifndef WOLFSSL_NOSHA3_512
case WC_SHA3_512:
wc_Sha3_512_Free(&hmac->hash.sha3);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sha3_512_Free(&hmac->i_hash.sha3);
wc_Sha3_512_Free(&hmac->o_hash.sha3);
#endif
break;
#endif
#endif /* WOLFSSL_SHA3 */
@ -1220,6 +1411,10 @@ void wc_HmacFree(Hmac* hmac)
#ifdef WOLFSSL_SM3
case WC_SM3:
wc_Sm3Free(&hmac->hash.sm3);
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_Sm3Free(&hmac->i_hash.sm3);
wc_Sm3Free(&hmac->i_hash.sm3);
#endif
break;
#endif

View File

@ -124,6 +124,10 @@ typedef wc_Hashes wc_HmacHash;
/* Hmac digest */
struct Hmac {
wc_HmacHash hash;
#ifdef WOLFSSL_HMAC_COPY_HASH
wc_HmacHash i_hash;
wc_HmacHash o_hash;
#endif
word32 ipad[WC_HMAC_BLOCK_SIZE / sizeof(word32)]; /* same block size all*/
word32 opad[WC_HMAC_BLOCK_SIZE / sizeof(word32)];
word32 innerHash[WC_MAX_DIGEST_SIZE / sizeof(word32)];