mirror of https://github.com/wolfSSL/wolfssl.git
Performance improvements
AES-GCM: don't generate M0 when using assembly unless falling back to C and then use new assembly code. HMAC: add option to copy hashes (--enable-hash-copy -DWOLFSSL_HMAC_COPY_HASH) to improve performance when using the same key for multiple operations.pull/8445/head
parent
8f131ff3d0
commit
bfd52decb6
|
@ -648,6 +648,7 @@ WOLFSSL_HARDEN_TLS_ALLOW_OLD_TLS
|
|||
WOLFSSL_HARDEN_TLS_ALLOW_TRUNCATED_HMAC
|
||||
WOLFSSL_HARDEN_TLS_NO_PKEY_CHECK
|
||||
WOLFSSL_HARDEN_TLS_NO_SCR_CHECK
|
||||
WOLFSSL_HMAC_COPY_HASH
|
||||
WOLFSSL_HOSTNAME_VERIFY_ALT_NAME_ONLY
|
||||
WOLFSSL_I2D_ECDSA_SIG_ALLOC
|
||||
WOLFSSL_IAR_ARM_TIME
|
||||
|
|
19
configure.ac
19
configure.ac
|
@ -295,6 +295,25 @@ AC_ARG_ENABLE([hmac],
|
|||
[ ENABLED_HMAC=yes ]
|
||||
)
|
||||
|
||||
# enable HMAC hash copying automatically for x86_64 and aarch64 (except Linux kernel module)
|
||||
HMAC_COPY_DEFAULT=no
|
||||
if test "$ENABLED_LINUXKM_DEFAULTS" = "no"
|
||||
then
|
||||
if test "$host_cpu" = "x86_64" || test "$host_cpu" = "aarch64" || test "$host_cpu" = "amd64"
|
||||
then
|
||||
HMAC_COPY_DEFAULT=yes
|
||||
fi
|
||||
fi
|
||||
AC_ARG_ENABLE([hmac-copy],
|
||||
[AS_HELP_STRING([--enable-hmac-copy],[Enables digest copying implementation for HMAC (default: disabled)])],
|
||||
[ ENABLED_HMAC_COPY=$enableval ],
|
||||
[ ENABLED_HMAC_COPY=$HMAC_COPY_DEFAULT ]
|
||||
)
|
||||
if test "$ENABLED_HMAC_COPY" = "yes"
|
||||
then
|
||||
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_HMAC_COPY_HASH"
|
||||
fi
|
||||
|
||||
AC_ARG_ENABLE([do178],
|
||||
[AS_HELP_STRING([--enable-do178],[Enable DO-178, Will NOT work w/o DO178 license (default: disabled)])],
|
||||
[ENABLED_DO178=$enableval],
|
||||
|
|
|
@ -1193,7 +1193,7 @@ static int lng_index = 0;
|
|||
|
||||
#ifndef NO_MAIN_DRIVER
|
||||
#ifndef MAIN_NO_ARGS
|
||||
static const char* bench_Usage_msg1[][25] = {
|
||||
static const char* bench_Usage_msg1[][27] = {
|
||||
/* 0 English */
|
||||
{ "-? <num> Help, print this usage\n",
|
||||
" 0: English, 1: Japanese\n",
|
||||
|
@ -1207,6 +1207,8 @@ static const char* bench_Usage_msg1[][25] = {
|
|||
" (if set via -aad_size) <aad_size> bytes.\n"
|
||||
),
|
||||
"-dgst_full Full digest operation performed.\n",
|
||||
"-mac_final MAC update and final operation timed.\n",
|
||||
"-aead_set_key Set the key as part of the timing of AEAD ciphers.\n",
|
||||
"-rsa_sign Measure RSA sign/verify instead of encrypt/decrypt.\n",
|
||||
"<keySz> -rsa-sz\n Measure RSA <key size> performance.\n",
|
||||
"-ffhdhe2048 Measure DH using FFDHE 2048-bit parameters.\n",
|
||||
|
@ -1240,6 +1242,8 @@ static const char* bench_Usage_msg1[][25] = {
|
|||
"-aad_size <num> TBD.\n",
|
||||
"-all_aad TBD.\n",
|
||||
"-dgst_full フルの digest 暗号操作を実施します。\n",
|
||||
"-mac_final MAC update and final operation timed.\n",
|
||||
"-aead_set_key Set the key as part of the timing of AEAD ciphers.\n",
|
||||
"-rsa_sign 暗号/復号化の代わりに RSA の署名/検証を測定します。\n",
|
||||
"<keySz> -rsa-sz\n RSA <key size> の性能を測定します。\n",
|
||||
"-ffhdhe2048 Measure DH using FFDHE 2048-bit parameters.\n",
|
||||
|
@ -2056,6 +2060,8 @@ static int numBlocks = NUM_BLOCKS;
|
|||
static word32 bench_size = BENCH_SIZE;
|
||||
static int base2 = 1;
|
||||
static int digest_stream = 1;
|
||||
static int mac_stream = 1;
|
||||
static int aead_set_key = 0;
|
||||
#ifdef HAVE_CHACHA
|
||||
static int encrypt_only = 0;
|
||||
#endif
|
||||
|
@ -4505,10 +4511,12 @@ static void bench_aesgcm_internal(int useDeviceID,
|
|||
goto exit;
|
||||
}
|
||||
|
||||
ret = wc_AesGcmSetKey(enc[i], key, keySz);
|
||||
if (ret != 0) {
|
||||
printf("AesGcmSetKey failed, ret = %d\n", ret);
|
||||
goto exit;
|
||||
if (!aead_set_key) {
|
||||
ret = wc_AesGcmSetKey(enc[i], key, keySz);
|
||||
if (ret != 0) {
|
||||
printf("AesGcmSetKey failed, ret = %d\n", ret);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4522,6 +4530,14 @@ static void bench_aesgcm_internal(int useDeviceID,
|
|||
for (i = 0; i < BENCH_MAX_PENDING; i++) {
|
||||
if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(enc[i]), 0,
|
||||
×, numBlocks, &pending)) {
|
||||
if (aead_set_key) {
|
||||
ret = wc_AesGcmSetKey(enc[i], key, keySz);
|
||||
if (!bench_async_handle(&ret,
|
||||
BENCH_ASYNC_GET_DEV(enc[i]), 0,
|
||||
×, &pending)) {
|
||||
goto exit_aes_gcm;
|
||||
}
|
||||
}
|
||||
ret = wc_AesGcmEncrypt(enc[i], bench_cipher,
|
||||
bench_plain, bench_size,
|
||||
iv, ivSz, bench_tag, AES_AUTH_TAG_SZ,
|
||||
|
@ -4560,10 +4576,12 @@ exit_aes_gcm:
|
|||
goto exit;
|
||||
}
|
||||
|
||||
ret = wc_AesGcmSetKey(dec[i], key, keySz);
|
||||
if (ret != 0) {
|
||||
printf("AesGcmSetKey failed, ret = %d\n", ret);
|
||||
goto exit;
|
||||
if (!aead_set_key) {
|
||||
ret = wc_AesGcmSetKey(dec[i], key, keySz);
|
||||
if (ret != 0) {
|
||||
printf("AesGcmSetKey failed, ret = %d\n", ret);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4576,6 +4594,14 @@ exit_aes_gcm:
|
|||
for (i = 0; i < BENCH_MAX_PENDING; i++) {
|
||||
if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(dec[i]), 0,
|
||||
×, numBlocks, &pending)) {
|
||||
if (aead_set_key) {
|
||||
ret = wc_AesGcmSetKey(dec[i], key, keySz);
|
||||
if (!bench_async_handle(&ret,
|
||||
BENCH_ASYNC_GET_DEV(dec[i]), 0,
|
||||
×, &pending)) {
|
||||
goto exit_aes_gcm_dec;
|
||||
}
|
||||
}
|
||||
ret = wc_AesGcmDecrypt(dec[i], bench_plain,
|
||||
bench_cipher, bench_size,
|
||||
iv, ivSz, bench_tag, AES_AUTH_TAG_SZ,
|
||||
|
@ -8300,50 +8326,89 @@ static void bench_hmac(int useDeviceID, int type, int digestSz,
|
|||
}
|
||||
}
|
||||
|
||||
bench_stats_start(&count, &start);
|
||||
do {
|
||||
for (times = 0; times < numBlocks || pending > 0; ) {
|
||||
bench_async_poll(&pending);
|
||||
|
||||
/* while free pending slots in queue, submit ops */
|
||||
for (i = 0; i < BENCH_MAX_PENDING; i++) {
|
||||
if (bench_async_check(&ret,
|
||||
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
|
||||
×, numBlocks, &pending)) {
|
||||
ret = wc_HmacUpdate(hmac[i], bench_plain, bench_size);
|
||||
if (!bench_async_handle(&ret,
|
||||
BENCH_ASYNC_GET_DEV(hmac[i]),
|
||||
0, ×, &pending)) {
|
||||
goto exit_hmac;
|
||||
}
|
||||
}
|
||||
} /* for i */
|
||||
} /* for times */
|
||||
count += times;
|
||||
|
||||
times = 0;
|
||||
if (mac_stream) {
|
||||
bench_stats_start(&count, &start);
|
||||
do {
|
||||
bench_async_poll(&pending);
|
||||
for (times = 0; times < numBlocks || pending > 0; ) {
|
||||
bench_async_poll(&pending);
|
||||
|
||||
for (i = 0; i < BENCH_MAX_PENDING; i++) {
|
||||
if (bench_async_check(&ret,
|
||||
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
|
||||
×, numBlocks, &pending)) {
|
||||
ret = wc_HmacFinal(hmac[i], digest[i]);
|
||||
if (!bench_async_handle(&ret,
|
||||
BENCH_ASYNC_GET_DEV(hmac[i]),
|
||||
0, ×, &pending)) {
|
||||
goto exit_hmac;
|
||||
/* while free pending slots in queue, submit ops */
|
||||
for (i = 0; i < BENCH_MAX_PENDING; i++) {
|
||||
if (bench_async_check(&ret,
|
||||
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
|
||||
×, numBlocks, &pending)) {
|
||||
ret = wc_HmacUpdate(hmac[i], bench_plain, bench_size);
|
||||
if (!bench_async_handle(&ret,
|
||||
BENCH_ASYNC_GET_DEV(hmac[i]),
|
||||
0, ×, &pending)) {
|
||||
goto exit_hmac;
|
||||
}
|
||||
}
|
||||
}
|
||||
RECORD_MULTI_VALUE_STATS();
|
||||
} /* for i */
|
||||
} while (pending > 0);
|
||||
} while (bench_stats_check(start)
|
||||
#ifdef MULTI_VALUE_STATISTICS
|
||||
|| runs < minimum_runs
|
||||
#endif
|
||||
);
|
||||
} /* for i */
|
||||
} /* for times */
|
||||
count += times;
|
||||
|
||||
times = 0;
|
||||
do {
|
||||
bench_async_poll(&pending);
|
||||
|
||||
for (i = 0; i < BENCH_MAX_PENDING; i++) {
|
||||
if (bench_async_check(&ret,
|
||||
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
|
||||
×, numBlocks, &pending)) {
|
||||
ret = wc_HmacFinal(hmac[i], digest[i]);
|
||||
if (!bench_async_handle(&ret,
|
||||
BENCH_ASYNC_GET_DEV(hmac[i]),
|
||||
0, ×, &pending)) {
|
||||
goto exit_hmac;
|
||||
}
|
||||
}
|
||||
RECORD_MULTI_VALUE_STATS();
|
||||
} /* for i */
|
||||
} while (pending > 0);
|
||||
} while (bench_stats_check(start)
|
||||
#ifdef MULTI_VALUE_STATISTICS
|
||||
|| runs < minimum_runs
|
||||
#endif
|
||||
);
|
||||
}
|
||||
else {
|
||||
bench_stats_start(&count, &start);
|
||||
do {
|
||||
for (times = 0; times < numBlocks || pending > 0; ) {
|
||||
bench_async_poll(&pending);
|
||||
|
||||
/* while free pending slots in queue, submit ops */
|
||||
for (i = 0; i < BENCH_MAX_PENDING; i++) {
|
||||
if (bench_async_check(&ret,
|
||||
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
|
||||
×, numBlocks, &pending)) {
|
||||
ret = wc_HmacUpdate(hmac[i], bench_plain, bench_size);
|
||||
if (!bench_async_handle(&ret,
|
||||
BENCH_ASYNC_GET_DEV(hmac[i]),
|
||||
0, ×, &pending)) {
|
||||
goto exit_hmac;
|
||||
}
|
||||
}
|
||||
if (bench_async_check(&ret,
|
||||
BENCH_ASYNC_GET_DEV(hmac[i]), 0,
|
||||
×, numBlocks, &pending)) {
|
||||
ret = wc_HmacFinal(hmac[i], digest[i]);
|
||||
if (!bench_async_handle(&ret,
|
||||
BENCH_ASYNC_GET_DEV(hmac[i]),
|
||||
0, ×, &pending)) {
|
||||
goto exit_hmac;
|
||||
}
|
||||
}
|
||||
} /* for i */
|
||||
} /* for times */
|
||||
count += times;
|
||||
} while (bench_stats_check(start)
|
||||
#ifdef MULTI_VALUE_STATISTICS
|
||||
|| runs < minimum_runs
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
exit_hmac:
|
||||
bench_stats_sym_finish(label, useDeviceID, count, bench_size, start, ret);
|
||||
|
@ -14989,6 +15054,7 @@ static void Usage(void)
|
|||
e += 3;
|
||||
#endif
|
||||
printf("%s", bench_Usage_msg1[lng_index][e++]); /* option -dgst_full */
|
||||
printf("%s", bench_Usage_msg1[lng_index][e++]); /* option -mca_final */
|
||||
#ifndef NO_RSA
|
||||
printf("%s", bench_Usage_msg1[lng_index][e++]); /* option -ras_sign */
|
||||
#ifdef WOLFSSL_KEY_GEN
|
||||
|
@ -15186,6 +15252,10 @@ int wolfcrypt_benchmark_main(int argc, char** argv)
|
|||
#endif
|
||||
else if (string_matches(argv[1], "-dgst_full"))
|
||||
digest_stream = 0;
|
||||
else if (string_matches(argv[1], "-mac_final"))
|
||||
mac_stream = 0;
|
||||
else if (string_matches(argv[1], "-aead_set_key"))
|
||||
aead_set_key = 1;
|
||||
#ifdef HAVE_CHACHA
|
||||
else if (string_matches(argv[1], "-enc_only"))
|
||||
encrypt_only = 1;
|
||||
|
|
|
@ -6633,6 +6633,25 @@ void GenerateM0(Gcm* gcm)
|
|||
|
||||
#endif /* GCM_TABLE */
|
||||
|
||||
#if defined(WOLFSSL_AESNI) && defined(USE_INTEL_SPEEDUP)
|
||||
#define HAVE_INTEL_AVX1
|
||||
#define HAVE_INTEL_AVX2
|
||||
#endif
|
||||
|
||||
#if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT) && \
|
||||
defined(WC_C_DYNAMIC_FALLBACK)
|
||||
void GCM_generate_m0_aesni(const unsigned char *h, unsigned char *m)
|
||||
XASM_LINK("GCM_generate_m0_aesni");
|
||||
#ifdef HAVE_INTEL_AVX1
|
||||
void GCM_generate_m0_avx1(const unsigned char *h, unsigned char *m)
|
||||
XASM_LINK("GCM_generate_m0_avx1");
|
||||
#endif
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
void GCM_generate_m0_avx2(const unsigned char *h, unsigned char *m)
|
||||
XASM_LINK("GCM_generate_m0_avx2");
|
||||
#endif
|
||||
#endif /* WOLFSSL_AESNI && GCM_TABLE_4BIT && WC_C_DYNAMIC_FALLBACK */
|
||||
|
||||
/* Software AES - GCM SetKey */
|
||||
int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
|
||||
{
|
||||
|
@ -6702,9 +6721,33 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
|
|||
VECTOR_REGISTERS_POP;
|
||||
}
|
||||
if (ret == 0) {
|
||||
#if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT)
|
||||
GenerateM0(&aes->gcm);
|
||||
#endif /* GCM_TABLE */
|
||||
#if defined(GCM_TABLE) || defined(GCM_TABLE_4BIT)
|
||||
#if defined(WOLFSSL_AESNI) && defined(GCM_TABLE_4BIT)
|
||||
if (aes->use_aesni) {
|
||||
#if defined(WC_C_DYNAMIC_FALLBACK)
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_AVX2(intel_flags)) {
|
||||
GCM_generate_m0_avx2(aes->gcm.H, (byte*)aes->gcm.M0);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#if defined(HAVE_INTEL_AVX1)
|
||||
if (IS_INTEL_AVX1(intel_flags)) {
|
||||
GCM_generate_m0_avx1(aes->gcm.H, (byte*)aes->gcm.M0);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
GCM_generate_m0_aesni(aes->gcm.H, (byte*)aes->gcm.M0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
GenerateM0(&aes->gcm);
|
||||
}
|
||||
#endif /* GCM_TABLE || GCM_TABLE_4BIT */
|
||||
}
|
||||
#endif /* FREESCALE_LTC_AES_GCM */
|
||||
|
||||
|
@ -6727,11 +6770,6 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
|
|||
|
||||
#ifdef WOLFSSL_AESNI
|
||||
|
||||
#if defined(USE_INTEL_SPEEDUP)
|
||||
#define HAVE_INTEL_AVX1
|
||||
#define HAVE_INTEL_AVX2
|
||||
#endif /* USE_INTEL_SPEEDUP */
|
||||
|
||||
void AES_GCM_encrypt_aesni(const unsigned char *in, unsigned char *out,
|
||||
const unsigned char* addt, const unsigned char* ivec,
|
||||
unsigned char *tag, word32 nbytes,
|
||||
|
|
|
@ -56,6 +56,272 @@
|
|||
#else
|
||||
.p2align 4
|
||||
#endif /* __APPLE__ */
|
||||
L_GCM_generate_m0_aesni_rev8:
|
||||
.quad 0x8090a0b0c0d0e0f, 0x1020304050607
|
||||
#ifndef __APPLE__
|
||||
.data
|
||||
#else
|
||||
.section __DATA,__data
|
||||
#endif /* __APPLE__ */
|
||||
#ifndef __APPLE__
|
||||
.align 16
|
||||
#else
|
||||
.p2align 4
|
||||
#endif /* __APPLE__ */
|
||||
L_GCM_generate_m0_aesni_mod2_128:
|
||||
.quad 0x0, 0xe100000000000000
|
||||
#ifndef __APPLE__
|
||||
.text
|
||||
.globl GCM_generate_m0_aesni
|
||||
.type GCM_generate_m0_aesni,@function
|
||||
.align 16
|
||||
GCM_generate_m0_aesni:
|
||||
#else
|
||||
.section __TEXT,__text
|
||||
.globl _GCM_generate_m0_aesni
|
||||
.p2align 4
|
||||
_GCM_generate_m0_aesni:
|
||||
#endif /* __APPLE__ */
|
||||
movdqu L_GCM_generate_m0_aesni_rev8(%rip), %xmm9
|
||||
movdqu L_GCM_generate_m0_aesni_mod2_128(%rip), %xmm10
|
||||
pxor %xmm8, %xmm8
|
||||
movdqu (%rdi), %xmm0
|
||||
movdqu %xmm8, (%rsi)
|
||||
movdqu %xmm0, %xmm8
|
||||
pshufb %xmm9, %xmm0
|
||||
movdqu %xmm0, %xmm5
|
||||
movdqu %xmm0, %xmm4
|
||||
psllq $63, %xmm5
|
||||
psrlq $0x01, %xmm4
|
||||
movdqu %xmm5, %xmm1
|
||||
pslldq $8, %xmm1
|
||||
psrldq $8, %xmm5
|
||||
pshufd $0xff, %xmm1, %xmm1
|
||||
por %xmm5, %xmm4
|
||||
psrad $31, %xmm1
|
||||
pand %xmm10, %xmm1
|
||||
pxor %xmm4, %xmm1
|
||||
movdqu %xmm1, %xmm5
|
||||
movdqu %xmm1, %xmm4
|
||||
psllq $63, %xmm5
|
||||
psrlq $0x01, %xmm4
|
||||
movdqu %xmm5, %xmm2
|
||||
pslldq $8, %xmm2
|
||||
psrldq $8, %xmm5
|
||||
pshufd $0xff, %xmm2, %xmm2
|
||||
por %xmm5, %xmm4
|
||||
psrad $31, %xmm2
|
||||
pand %xmm10, %xmm2
|
||||
pxor %xmm4, %xmm2
|
||||
movdqu %xmm2, %xmm5
|
||||
movdqu %xmm2, %xmm4
|
||||
psllq $63, %xmm5
|
||||
psrlq $0x01, %xmm4
|
||||
movdqu %xmm5, %xmm3
|
||||
pslldq $8, %xmm3
|
||||
psrldq $8, %xmm5
|
||||
pshufd $0xff, %xmm3, %xmm3
|
||||
por %xmm5, %xmm4
|
||||
psrad $31, %xmm3
|
||||
pand %xmm10, %xmm3
|
||||
pxor %xmm4, %xmm3
|
||||
pshufb %xmm9, %xmm3
|
||||
pshufb %xmm9, %xmm2
|
||||
movdqu %xmm3, %xmm8
|
||||
pshufb %xmm9, %xmm1
|
||||
pshufb %xmm9, %xmm0
|
||||
pxor %xmm2, %xmm8
|
||||
movdqu %xmm3, 16(%rsi)
|
||||
movdqu %xmm2, 32(%rsi)
|
||||
movdqu %xmm8, 48(%rsi)
|
||||
movdqu %xmm1, 64(%rsi)
|
||||
movdqu %xmm3, %xmm4
|
||||
movdqu %xmm2, %xmm5
|
||||
movdqu %xmm8, %xmm6
|
||||
pxor %xmm1, %xmm4
|
||||
pxor %xmm1, %xmm5
|
||||
pxor %xmm1, %xmm6
|
||||
movdqu %xmm4, 80(%rsi)
|
||||
movdqu %xmm5, 96(%rsi)
|
||||
movdqu %xmm6, 112(%rsi)
|
||||
movdqu %xmm0, 128(%rsi)
|
||||
pxor %xmm0, %xmm1
|
||||
movdqu %xmm3, %xmm4
|
||||
movdqu %xmm2, %xmm6
|
||||
pxor %xmm0, %xmm4
|
||||
pxor %xmm0, %xmm6
|
||||
movdqu %xmm4, 144(%rsi)
|
||||
movdqu %xmm6, 160(%rsi)
|
||||
pxor %xmm3, %xmm6
|
||||
movdqu %xmm6, 176(%rsi)
|
||||
movdqu %xmm1, 192(%rsi)
|
||||
movdqu %xmm3, %xmm4
|
||||
movdqu %xmm2, %xmm5
|
||||
movdqu %xmm8, %xmm6
|
||||
pxor %xmm1, %xmm4
|
||||
pxor %xmm1, %xmm5
|
||||
pxor %xmm1, %xmm6
|
||||
movdqu %xmm4, 208(%rsi)
|
||||
movdqu %xmm5, 224(%rsi)
|
||||
movdqu %xmm6, 240(%rsi)
|
||||
movdqu (%rsi), %xmm0
|
||||
movdqu 16(%rsi), %xmm1
|
||||
movdqu 32(%rsi), %xmm2
|
||||
movdqu 48(%rsi), %xmm3
|
||||
pshufb %xmm9, %xmm0
|
||||
pshufb %xmm9, %xmm1
|
||||
pshufb %xmm9, %xmm2
|
||||
pshufb %xmm9, %xmm3
|
||||
movdqu %xmm0, %xmm4
|
||||
movdqu %xmm1, %xmm5
|
||||
movdqu %xmm2, %xmm6
|
||||
movdqu %xmm3, %xmm7
|
||||
psllq $60, %xmm4
|
||||
psllq $60, %xmm5
|
||||
psllq $60, %xmm6
|
||||
psllq $60, %xmm7
|
||||
psrlq $4, %xmm0
|
||||
psrlq $4, %xmm1
|
||||
psrlq $4, %xmm2
|
||||
psrlq $4, %xmm3
|
||||
psrldq $8, %xmm4
|
||||
psrldq $8, %xmm5
|
||||
psrldq $8, %xmm6
|
||||
psrldq $8, %xmm7
|
||||
por %xmm4, %xmm0
|
||||
por %xmm5, %xmm1
|
||||
por %xmm6, %xmm2
|
||||
por %xmm7, %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
movdqu %xmm0, 256(%rsi)
|
||||
movdqu %xmm1, 272(%rsi)
|
||||
movdqu %xmm2, 288(%rsi)
|
||||
movdqu %xmm3, 304(%rsi)
|
||||
movdqu 64(%rsi), %xmm0
|
||||
movdqu 80(%rsi), %xmm1
|
||||
movdqu 96(%rsi), %xmm2
|
||||
movdqu 112(%rsi), %xmm3
|
||||
pshufb %xmm9, %xmm0
|
||||
pshufb %xmm9, %xmm1
|
||||
pshufb %xmm9, %xmm2
|
||||
pshufb %xmm9, %xmm3
|
||||
movdqu %xmm0, %xmm4
|
||||
movdqu %xmm1, %xmm5
|
||||
movdqu %xmm2, %xmm6
|
||||
movdqu %xmm3, %xmm7
|
||||
psllq $60, %xmm4
|
||||
psllq $60, %xmm5
|
||||
psllq $60, %xmm6
|
||||
psllq $60, %xmm7
|
||||
psrlq $4, %xmm0
|
||||
psrlq $4, %xmm1
|
||||
psrlq $4, %xmm2
|
||||
psrlq $4, %xmm3
|
||||
psrldq $8, %xmm4
|
||||
psrldq $8, %xmm5
|
||||
psrldq $8, %xmm6
|
||||
psrldq $8, %xmm7
|
||||
por %xmm4, %xmm0
|
||||
por %xmm5, %xmm1
|
||||
por %xmm6, %xmm2
|
||||
por %xmm7, %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
movdqu %xmm0, 320(%rsi)
|
||||
movdqu %xmm1, 336(%rsi)
|
||||
movdqu %xmm2, 352(%rsi)
|
||||
movdqu %xmm3, 368(%rsi)
|
||||
movdqu 128(%rsi), %xmm0
|
||||
movdqu 144(%rsi), %xmm1
|
||||
movdqu 160(%rsi), %xmm2
|
||||
movdqu 176(%rsi), %xmm3
|
||||
pshufb %xmm9, %xmm0
|
||||
pshufb %xmm9, %xmm1
|
||||
pshufb %xmm9, %xmm2
|
||||
pshufb %xmm9, %xmm3
|
||||
movdqu %xmm0, %xmm4
|
||||
movdqu %xmm1, %xmm5
|
||||
movdqu %xmm2, %xmm6
|
||||
movdqu %xmm3, %xmm7
|
||||
psllq $60, %xmm4
|
||||
psllq $60, %xmm5
|
||||
psllq $60, %xmm6
|
||||
psllq $60, %xmm7
|
||||
psrlq $4, %xmm0
|
||||
psrlq $4, %xmm1
|
||||
psrlq $4, %xmm2
|
||||
psrlq $4, %xmm3
|
||||
psrldq $8, %xmm4
|
||||
psrldq $8, %xmm5
|
||||
psrldq $8, %xmm6
|
||||
psrldq $8, %xmm7
|
||||
por %xmm4, %xmm0
|
||||
por %xmm5, %xmm1
|
||||
por %xmm6, %xmm2
|
||||
por %xmm7, %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
movdqu %xmm0, 384(%rsi)
|
||||
movdqu %xmm1, 400(%rsi)
|
||||
movdqu %xmm2, 416(%rsi)
|
||||
movdqu %xmm3, 432(%rsi)
|
||||
movdqu 192(%rsi), %xmm0
|
||||
movdqu 208(%rsi), %xmm1
|
||||
movdqu 224(%rsi), %xmm2
|
||||
movdqu 240(%rsi), %xmm3
|
||||
pshufb %xmm9, %xmm0
|
||||
pshufb %xmm9, %xmm1
|
||||
pshufb %xmm9, %xmm2
|
||||
pshufb %xmm9, %xmm3
|
||||
movdqu %xmm0, %xmm4
|
||||
movdqu %xmm1, %xmm5
|
||||
movdqu %xmm2, %xmm6
|
||||
movdqu %xmm3, %xmm7
|
||||
psllq $60, %xmm4
|
||||
psllq $60, %xmm5
|
||||
psllq $60, %xmm6
|
||||
psllq $60, %xmm7
|
||||
psrlq $4, %xmm0
|
||||
psrlq $4, %xmm1
|
||||
psrlq $4, %xmm2
|
||||
psrlq $4, %xmm3
|
||||
psrldq $8, %xmm4
|
||||
psrldq $8, %xmm5
|
||||
psrldq $8, %xmm6
|
||||
psrldq $8, %xmm7
|
||||
por %xmm4, %xmm0
|
||||
por %xmm5, %xmm1
|
||||
por %xmm6, %xmm2
|
||||
por %xmm7, %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
movdqu %xmm0, 448(%rsi)
|
||||
movdqu %xmm1, 464(%rsi)
|
||||
movdqu %xmm2, 480(%rsi)
|
||||
movdqu %xmm3, 496(%rsi)
|
||||
repz retq
|
||||
#ifndef __APPLE__
|
||||
.size GCM_generate_m0_aesni,.-GCM_generate_m0_aesni
|
||||
#endif /* __APPLE__ */
|
||||
#ifndef __APPLE__
|
||||
.data
|
||||
#else
|
||||
.section __DATA,__data
|
||||
#endif /* __APPLE__ */
|
||||
#ifndef __APPLE__
|
||||
.align 16
|
||||
#else
|
||||
.p2align 4
|
||||
#endif /* __APPLE__ */
|
||||
L_aes_gcm_one:
|
||||
.quad 0x0, 0x1
|
||||
#ifndef __APPLE__
|
||||
|
@ -6221,6 +6487,238 @@ L_AES_GCM_decrypt_final_aesni_cmp_tag_done:
|
|||
#else
|
||||
.p2align 4
|
||||
#endif /* __APPLE__ */
|
||||
L_GCM_generate_m0_avx1_rev8:
|
||||
.quad 0x8090a0b0c0d0e0f, 0x1020304050607
|
||||
#ifndef __APPLE__
|
||||
.data
|
||||
#else
|
||||
.section __DATA,__data
|
||||
#endif /* __APPLE__ */
|
||||
#ifndef __APPLE__
|
||||
.align 16
|
||||
#else
|
||||
.p2align 4
|
||||
#endif /* __APPLE__ */
|
||||
L_GCM_generate_m0_avx1_mod2_128:
|
||||
.quad 0x0, 0xe100000000000000
|
||||
#ifndef __APPLE__
|
||||
.text
|
||||
.globl GCM_generate_m0_avx1
|
||||
.type GCM_generate_m0_avx1,@function
|
||||
.align 16
|
||||
GCM_generate_m0_avx1:
|
||||
#else
|
||||
.section __TEXT,__text
|
||||
.globl _GCM_generate_m0_avx1
|
||||
.p2align 4
|
||||
_GCM_generate_m0_avx1:
|
||||
#endif /* __APPLE__ */
|
||||
vmovdqu L_GCM_generate_m0_avx1_rev8(%rip), %xmm9
|
||||
vmovdqu L_GCM_generate_m0_avx1_mod2_128(%rip), %xmm10
|
||||
vpxor %xmm8, %xmm8, %xmm8
|
||||
vmovdqu (%rdi), %xmm0
|
||||
vmovdqu %xmm8, (%rsi)
|
||||
vmovdqu %xmm0, %xmm8
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpsllq $63, %xmm0, %xmm5
|
||||
vpsrlq $0x01, %xmm0, %xmm4
|
||||
vpslldq $8, %xmm5, %xmm1
|
||||
vpsrldq $8, %xmm5, %xmm5
|
||||
vpshufd $0xff, %xmm1, %xmm1
|
||||
vpor %xmm5, %xmm4, %xmm4
|
||||
vpsrad $31, %xmm1, %xmm1
|
||||
vpand %xmm10, %xmm1, %xmm1
|
||||
vpxor %xmm4, %xmm1, %xmm1
|
||||
vpsllq $63, %xmm1, %xmm5
|
||||
vpsrlq $0x01, %xmm1, %xmm4
|
||||
vpslldq $8, %xmm5, %xmm2
|
||||
vpsrldq $8, %xmm5, %xmm5
|
||||
vpshufd $0xff, %xmm2, %xmm2
|
||||
vpor %xmm5, %xmm4, %xmm4
|
||||
vpsrad $31, %xmm2, %xmm2
|
||||
vpand %xmm10, %xmm2, %xmm2
|
||||
vpxor %xmm4, %xmm2, %xmm2
|
||||
vpsllq $63, %xmm2, %xmm5
|
||||
vpsrlq $0x01, %xmm2, %xmm4
|
||||
vpslldq $8, %xmm5, %xmm3
|
||||
vpsrldq $8, %xmm5, %xmm5
|
||||
vpshufd $0xff, %xmm3, %xmm3
|
||||
vpor %xmm5, %xmm4, %xmm4
|
||||
vpsrad $31, %xmm3, %xmm3
|
||||
vpand %xmm10, %xmm3, %xmm3
|
||||
vpxor %xmm4, %xmm3, %xmm3
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpxor %xmm2, %xmm3, %xmm8
|
||||
vmovdqu %xmm3, 16(%rsi)
|
||||
vmovdqu %xmm2, 32(%rsi)
|
||||
vmovdqu %xmm8, 48(%rsi)
|
||||
vmovdqu %xmm1, 64(%rsi)
|
||||
vpxor %xmm1, %xmm3, %xmm4
|
||||
vpxor %xmm1, %xmm2, %xmm5
|
||||
vpxor %xmm1, %xmm8, %xmm6
|
||||
vmovdqu %xmm4, 80(%rsi)
|
||||
vmovdqu %xmm5, 96(%rsi)
|
||||
vmovdqu %xmm6, 112(%rsi)
|
||||
vmovdqu %xmm0, 128(%rsi)
|
||||
vpxor %xmm0, %xmm1, %xmm1
|
||||
vpxor %xmm0, %xmm3, %xmm4
|
||||
vpxor %xmm0, %xmm2, %xmm6
|
||||
vmovdqu %xmm4, 144(%rsi)
|
||||
vmovdqu %xmm6, 160(%rsi)
|
||||
vpxor %xmm6, %xmm3, %xmm6
|
||||
vmovdqu %xmm6, 176(%rsi)
|
||||
vmovdqu %xmm1, 192(%rsi)
|
||||
vpxor %xmm1, %xmm3, %xmm4
|
||||
vpxor %xmm1, %xmm2, %xmm5
|
||||
vpxor %xmm1, %xmm8, %xmm6
|
||||
vmovdqu %xmm4, 208(%rsi)
|
||||
vmovdqu %xmm5, 224(%rsi)
|
||||
vmovdqu %xmm6, 240(%rsi)
|
||||
vmovdqu (%rsi), %xmm0
|
||||
vmovdqu 16(%rsi), %xmm1
|
||||
vmovdqu 32(%rsi), %xmm2
|
||||
vmovdqu 48(%rsi), %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vpsllq $60, %xmm0, %xmm4
|
||||
vpsllq $60, %xmm1, %xmm5
|
||||
vpsllq $60, %xmm2, %xmm6
|
||||
vpsllq $60, %xmm3, %xmm7
|
||||
vpsrlq $4, %xmm0, %xmm0
|
||||
vpsrlq $4, %xmm1, %xmm1
|
||||
vpsrlq $4, %xmm2, %xmm2
|
||||
vpsrlq $4, %xmm3, %xmm3
|
||||
vpsrldq $8, %xmm4, %xmm4
|
||||
vpsrldq $8, %xmm5, %xmm5
|
||||
vpsrldq $8, %xmm6, %xmm6
|
||||
vpsrldq $8, %xmm7, %xmm7
|
||||
vpor %xmm4, %xmm0, %xmm0
|
||||
vpor %xmm5, %xmm1, %xmm1
|
||||
vpor %xmm6, %xmm2, %xmm2
|
||||
vpor %xmm7, %xmm3, %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vmovdqu %xmm0, 256(%rsi)
|
||||
vmovdqu %xmm1, 272(%rsi)
|
||||
vmovdqu %xmm2, 288(%rsi)
|
||||
vmovdqu %xmm3, 304(%rsi)
|
||||
vmovdqu 64(%rsi), %xmm0
|
||||
vmovdqu 80(%rsi), %xmm1
|
||||
vmovdqu 96(%rsi), %xmm2
|
||||
vmovdqu 112(%rsi), %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vpsllq $60, %xmm0, %xmm4
|
||||
vpsllq $60, %xmm1, %xmm5
|
||||
vpsllq $60, %xmm2, %xmm6
|
||||
vpsllq $60, %xmm3, %xmm7
|
||||
vpsrlq $4, %xmm0, %xmm0
|
||||
vpsrlq $4, %xmm1, %xmm1
|
||||
vpsrlq $4, %xmm2, %xmm2
|
||||
vpsrlq $4, %xmm3, %xmm3
|
||||
vpsrldq $8, %xmm4, %xmm4
|
||||
vpsrldq $8, %xmm5, %xmm5
|
||||
vpsrldq $8, %xmm6, %xmm6
|
||||
vpsrldq $8, %xmm7, %xmm7
|
||||
vpor %xmm4, %xmm0, %xmm0
|
||||
vpor %xmm5, %xmm1, %xmm1
|
||||
vpor %xmm6, %xmm2, %xmm2
|
||||
vpor %xmm7, %xmm3, %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vmovdqu %xmm0, 320(%rsi)
|
||||
vmovdqu %xmm1, 336(%rsi)
|
||||
vmovdqu %xmm2, 352(%rsi)
|
||||
vmovdqu %xmm3, 368(%rsi)
|
||||
vmovdqu 128(%rsi), %xmm0
|
||||
vmovdqu 144(%rsi), %xmm1
|
||||
vmovdqu 160(%rsi), %xmm2
|
||||
vmovdqu 176(%rsi), %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vpsllq $60, %xmm0, %xmm4
|
||||
vpsllq $60, %xmm1, %xmm5
|
||||
vpsllq $60, %xmm2, %xmm6
|
||||
vpsllq $60, %xmm3, %xmm7
|
||||
vpsrlq $4, %xmm0, %xmm0
|
||||
vpsrlq $4, %xmm1, %xmm1
|
||||
vpsrlq $4, %xmm2, %xmm2
|
||||
vpsrlq $4, %xmm3, %xmm3
|
||||
vpsrldq $8, %xmm4, %xmm4
|
||||
vpsrldq $8, %xmm5, %xmm5
|
||||
vpsrldq $8, %xmm6, %xmm6
|
||||
vpsrldq $8, %xmm7, %xmm7
|
||||
vpor %xmm4, %xmm0, %xmm0
|
||||
vpor %xmm5, %xmm1, %xmm1
|
||||
vpor %xmm6, %xmm2, %xmm2
|
||||
vpor %xmm7, %xmm3, %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vmovdqu %xmm0, 384(%rsi)
|
||||
vmovdqu %xmm1, 400(%rsi)
|
||||
vmovdqu %xmm2, 416(%rsi)
|
||||
vmovdqu %xmm3, 432(%rsi)
|
||||
vmovdqu 192(%rsi), %xmm0
|
||||
vmovdqu 208(%rsi), %xmm1
|
||||
vmovdqu 224(%rsi), %xmm2
|
||||
vmovdqu 240(%rsi), %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vpsllq $60, %xmm0, %xmm4
|
||||
vpsllq $60, %xmm1, %xmm5
|
||||
vpsllq $60, %xmm2, %xmm6
|
||||
vpsllq $60, %xmm3, %xmm7
|
||||
vpsrlq $4, %xmm0, %xmm0
|
||||
vpsrlq $4, %xmm1, %xmm1
|
||||
vpsrlq $4, %xmm2, %xmm2
|
||||
vpsrlq $4, %xmm3, %xmm3
|
||||
vpsrldq $8, %xmm4, %xmm4
|
||||
vpsrldq $8, %xmm5, %xmm5
|
||||
vpsrldq $8, %xmm6, %xmm6
|
||||
vpsrldq $8, %xmm7, %xmm7
|
||||
vpor %xmm4, %xmm0, %xmm0
|
||||
vpor %xmm5, %xmm1, %xmm1
|
||||
vpor %xmm6, %xmm2, %xmm2
|
||||
vpor %xmm7, %xmm3, %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vmovdqu %xmm0, 448(%rsi)
|
||||
vmovdqu %xmm1, 464(%rsi)
|
||||
vmovdqu %xmm2, 480(%rsi)
|
||||
vmovdqu %xmm3, 496(%rsi)
|
||||
repz retq
|
||||
#ifndef __APPLE__
|
||||
.size GCM_generate_m0_avx1,.-GCM_generate_m0_avx1
|
||||
#endif /* __APPLE__ */
|
||||
#ifndef __APPLE__
|
||||
.data
|
||||
#else
|
||||
.section __DATA,__data
|
||||
#endif /* __APPLE__ */
|
||||
#ifndef __APPLE__
|
||||
.align 16
|
||||
#else
|
||||
.p2align 4
|
||||
#endif /* __APPLE__ */
|
||||
L_avx1_aes_gcm_one:
|
||||
.quad 0x0, 0x1
|
||||
#ifndef __APPLE__
|
||||
|
@ -11454,6 +11952,238 @@ L_AES_GCM_decrypt_final_avx1_cmp_tag_done:
|
|||
#else
|
||||
.p2align 4
|
||||
#endif /* __APPLE__ */
|
||||
L_GCM_generate_m0_avx2_rev8:
|
||||
.quad 0x8090a0b0c0d0e0f, 0x1020304050607
|
||||
#ifndef __APPLE__
|
||||
.data
|
||||
#else
|
||||
.section __DATA,__data
|
||||
#endif /* __APPLE__ */
|
||||
#ifndef __APPLE__
|
||||
.align 16
|
||||
#else
|
||||
.p2align 4
|
||||
#endif /* __APPLE__ */
|
||||
L_GCM_generate_m0_avx2_mod2_128:
|
||||
.quad 0x0, 0xe100000000000000
|
||||
#ifndef __APPLE__
|
||||
.text
|
||||
.globl GCM_generate_m0_avx2
|
||||
.type GCM_generate_m0_avx2,@function
|
||||
.align 16
|
||||
GCM_generate_m0_avx2:
|
||||
#else
|
||||
.section __TEXT,__text
|
||||
.globl _GCM_generate_m0_avx2
|
||||
.p2align 4
|
||||
_GCM_generate_m0_avx2:
|
||||
#endif /* __APPLE__ */
|
||||
vmovdqu L_GCM_generate_m0_avx2_rev8(%rip), %xmm9
|
||||
vmovdqu L_GCM_generate_m0_avx2_mod2_128(%rip), %xmm10
|
||||
vpxor %xmm8, %xmm8, %xmm8
|
||||
vmovdqu (%rdi), %xmm0
|
||||
vmovdqu %xmm8, (%rsi)
|
||||
vmovdqu %xmm0, %xmm8
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpsllq $63, %xmm0, %xmm5
|
||||
vpsrlq $0x01, %xmm0, %xmm4
|
||||
vpslldq $8, %xmm5, %xmm1
|
||||
vpsrldq $8, %xmm5, %xmm5
|
||||
vpshufd $0xff, %xmm1, %xmm1
|
||||
vpor %xmm5, %xmm4, %xmm4
|
||||
vpsrad $31, %xmm1, %xmm1
|
||||
vpand %xmm10, %xmm1, %xmm1
|
||||
vpxor %xmm4, %xmm1, %xmm1
|
||||
vpsllq $63, %xmm1, %xmm5
|
||||
vpsrlq $0x01, %xmm1, %xmm4
|
||||
vpslldq $8, %xmm5, %xmm2
|
||||
vpsrldq $8, %xmm5, %xmm5
|
||||
vpshufd $0xff, %xmm2, %xmm2
|
||||
vpor %xmm5, %xmm4, %xmm4
|
||||
vpsrad $31, %xmm2, %xmm2
|
||||
vpand %xmm10, %xmm2, %xmm2
|
||||
vpxor %xmm4, %xmm2, %xmm2
|
||||
vpsllq $63, %xmm2, %xmm5
|
||||
vpsrlq $0x01, %xmm2, %xmm4
|
||||
vpslldq $8, %xmm5, %xmm3
|
||||
vpsrldq $8, %xmm5, %xmm5
|
||||
vpshufd $0xff, %xmm3, %xmm3
|
||||
vpor %xmm5, %xmm4, %xmm4
|
||||
vpsrad $31, %xmm3, %xmm3
|
||||
vpand %xmm10, %xmm3, %xmm3
|
||||
vpxor %xmm4, %xmm3, %xmm3
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpxor %xmm2, %xmm3, %xmm8
|
||||
vmovdqu %xmm3, 16(%rsi)
|
||||
vmovdqu %xmm2, 32(%rsi)
|
||||
vmovdqu %xmm8, 48(%rsi)
|
||||
vmovdqu %xmm1, 64(%rsi)
|
||||
vpxor %xmm1, %xmm3, %xmm4
|
||||
vpxor %xmm1, %xmm2, %xmm5
|
||||
vpxor %xmm1, %xmm8, %xmm6
|
||||
vmovdqu %xmm4, 80(%rsi)
|
||||
vmovdqu %xmm5, 96(%rsi)
|
||||
vmovdqu %xmm6, 112(%rsi)
|
||||
vmovdqu %xmm0, 128(%rsi)
|
||||
vpxor %xmm0, %xmm1, %xmm1
|
||||
vpxor %xmm0, %xmm3, %xmm4
|
||||
vpxor %xmm0, %xmm2, %xmm6
|
||||
vmovdqu %xmm4, 144(%rsi)
|
||||
vmovdqu %xmm6, 160(%rsi)
|
||||
vpxor %xmm6, %xmm3, %xmm6
|
||||
vmovdqu %xmm6, 176(%rsi)
|
||||
vmovdqu %xmm1, 192(%rsi)
|
||||
vpxor %xmm1, %xmm3, %xmm4
|
||||
vpxor %xmm1, %xmm2, %xmm5
|
||||
vpxor %xmm1, %xmm8, %xmm6
|
||||
vmovdqu %xmm4, 208(%rsi)
|
||||
vmovdqu %xmm5, 224(%rsi)
|
||||
vmovdqu %xmm6, 240(%rsi)
|
||||
vmovdqu (%rsi), %xmm0
|
||||
vmovdqu 16(%rsi), %xmm1
|
||||
vmovdqu 32(%rsi), %xmm2
|
||||
vmovdqu 48(%rsi), %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vpsllq $60, %xmm0, %xmm4
|
||||
vpsllq $60, %xmm1, %xmm5
|
||||
vpsllq $60, %xmm2, %xmm6
|
||||
vpsllq $60, %xmm3, %xmm7
|
||||
vpsrlq $4, %xmm0, %xmm0
|
||||
vpsrlq $4, %xmm1, %xmm1
|
||||
vpsrlq $4, %xmm2, %xmm2
|
||||
vpsrlq $4, %xmm3, %xmm3
|
||||
vpsrldq $8, %xmm4, %xmm4
|
||||
vpsrldq $8, %xmm5, %xmm5
|
||||
vpsrldq $8, %xmm6, %xmm6
|
||||
vpsrldq $8, %xmm7, %xmm7
|
||||
vpor %xmm4, %xmm0, %xmm0
|
||||
vpor %xmm5, %xmm1, %xmm1
|
||||
vpor %xmm6, %xmm2, %xmm2
|
||||
vpor %xmm7, %xmm3, %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vmovdqu %xmm0, 256(%rsi)
|
||||
vmovdqu %xmm1, 272(%rsi)
|
||||
vmovdqu %xmm2, 288(%rsi)
|
||||
vmovdqu %xmm3, 304(%rsi)
|
||||
vmovdqu 64(%rsi), %xmm0
|
||||
vmovdqu 80(%rsi), %xmm1
|
||||
vmovdqu 96(%rsi), %xmm2
|
||||
vmovdqu 112(%rsi), %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vpsllq $60, %xmm0, %xmm4
|
||||
vpsllq $60, %xmm1, %xmm5
|
||||
vpsllq $60, %xmm2, %xmm6
|
||||
vpsllq $60, %xmm3, %xmm7
|
||||
vpsrlq $4, %xmm0, %xmm0
|
||||
vpsrlq $4, %xmm1, %xmm1
|
||||
vpsrlq $4, %xmm2, %xmm2
|
||||
vpsrlq $4, %xmm3, %xmm3
|
||||
vpsrldq $8, %xmm4, %xmm4
|
||||
vpsrldq $8, %xmm5, %xmm5
|
||||
vpsrldq $8, %xmm6, %xmm6
|
||||
vpsrldq $8, %xmm7, %xmm7
|
||||
vpor %xmm4, %xmm0, %xmm0
|
||||
vpor %xmm5, %xmm1, %xmm1
|
||||
vpor %xmm6, %xmm2, %xmm2
|
||||
vpor %xmm7, %xmm3, %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vmovdqu %xmm0, 320(%rsi)
|
||||
vmovdqu %xmm1, 336(%rsi)
|
||||
vmovdqu %xmm2, 352(%rsi)
|
||||
vmovdqu %xmm3, 368(%rsi)
|
||||
vmovdqu 128(%rsi), %xmm0
|
||||
vmovdqu 144(%rsi), %xmm1
|
||||
vmovdqu 160(%rsi), %xmm2
|
||||
vmovdqu 176(%rsi), %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vpsllq $60, %xmm0, %xmm4
|
||||
vpsllq $60, %xmm1, %xmm5
|
||||
vpsllq $60, %xmm2, %xmm6
|
||||
vpsllq $60, %xmm3, %xmm7
|
||||
vpsrlq $4, %xmm0, %xmm0
|
||||
vpsrlq $4, %xmm1, %xmm1
|
||||
vpsrlq $4, %xmm2, %xmm2
|
||||
vpsrlq $4, %xmm3, %xmm3
|
||||
vpsrldq $8, %xmm4, %xmm4
|
||||
vpsrldq $8, %xmm5, %xmm5
|
||||
vpsrldq $8, %xmm6, %xmm6
|
||||
vpsrldq $8, %xmm7, %xmm7
|
||||
vpor %xmm4, %xmm0, %xmm0
|
||||
vpor %xmm5, %xmm1, %xmm1
|
||||
vpor %xmm6, %xmm2, %xmm2
|
||||
vpor %xmm7, %xmm3, %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vmovdqu %xmm0, 384(%rsi)
|
||||
vmovdqu %xmm1, 400(%rsi)
|
||||
vmovdqu %xmm2, 416(%rsi)
|
||||
vmovdqu %xmm3, 432(%rsi)
|
||||
vmovdqu 192(%rsi), %xmm0
|
||||
vmovdqu 208(%rsi), %xmm1
|
||||
vmovdqu 224(%rsi), %xmm2
|
||||
vmovdqu 240(%rsi), %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vpsllq $60, %xmm0, %xmm4
|
||||
vpsllq $60, %xmm1, %xmm5
|
||||
vpsllq $60, %xmm2, %xmm6
|
||||
vpsllq $60, %xmm3, %xmm7
|
||||
vpsrlq $4, %xmm0, %xmm0
|
||||
vpsrlq $4, %xmm1, %xmm1
|
||||
vpsrlq $4, %xmm2, %xmm2
|
||||
vpsrlq $4, %xmm3, %xmm3
|
||||
vpsrldq $8, %xmm4, %xmm4
|
||||
vpsrldq $8, %xmm5, %xmm5
|
||||
vpsrldq $8, %xmm6, %xmm6
|
||||
vpsrldq $8, %xmm7, %xmm7
|
||||
vpor %xmm4, %xmm0, %xmm0
|
||||
vpor %xmm5, %xmm1, %xmm1
|
||||
vpor %xmm6, %xmm2, %xmm2
|
||||
vpor %xmm7, %xmm3, %xmm3
|
||||
vpshufb %xmm9, %xmm0, %xmm0
|
||||
vpshufb %xmm9, %xmm1, %xmm1
|
||||
vpshufb %xmm9, %xmm2, %xmm2
|
||||
vpshufb %xmm9, %xmm3, %xmm3
|
||||
vmovdqu %xmm0, 448(%rsi)
|
||||
vmovdqu %xmm1, 464(%rsi)
|
||||
vmovdqu %xmm2, 480(%rsi)
|
||||
vmovdqu %xmm3, 496(%rsi)
|
||||
repz retq
|
||||
#ifndef __APPLE__
|
||||
.size GCM_generate_m0_avx2,.-GCM_generate_m0_avx2
|
||||
#endif /* __APPLE__ */
|
||||
#ifndef __APPLE__
|
||||
.data
|
||||
#else
|
||||
.section __DATA,__data
|
||||
#endif /* __APPLE__ */
|
||||
#ifndef __APPLE__
|
||||
.align 16
|
||||
#else
|
||||
.p2align 4
|
||||
#endif /* __APPLE__ */
|
||||
L_avx2_aes_gcm_one:
|
||||
.quad 0x0, 0x1
|
||||
#ifndef __APPLE__
|
||||
|
|
|
@ -40,6 +40,259 @@ IFNDEF _WIN64
|
|||
_WIN64 = 1
|
||||
ENDIF
|
||||
|
||||
_DATA SEGMENT
|
||||
ALIGN 16
|
||||
L_GCM_generate_m0_aesni_rev8 QWORD 579005069656919567, 283686952306183
|
||||
ptr_L_GCM_generate_m0_aesni_rev8 QWORD L_GCM_generate_m0_aesni_rev8
|
||||
_DATA ENDS
|
||||
_DATA SEGMENT
|
||||
ALIGN 16
|
||||
L_GCM_generate_m0_aesni_mod2_128 QWORD 0, 16212958658533785600
|
||||
ptr_L_GCM_generate_m0_aesni_mod2_128 QWORD L_GCM_generate_m0_aesni_mod2_128
|
||||
_DATA ENDS
|
||||
_text SEGMENT READONLY PARA
|
||||
GCM_generate_m0_aesni PROC
|
||||
sub rsp, 80
|
||||
movdqu OWORD PTR [rsp], xmm6
|
||||
movdqu OWORD PTR [rsp+16], xmm7
|
||||
movdqu OWORD PTR [rsp+32], xmm8
|
||||
movdqu OWORD PTR [rsp+48], xmm9
|
||||
movdqu OWORD PTR [rsp+64], xmm10
|
||||
movdqu xmm9, OWORD PTR L_GCM_generate_m0_aesni_rev8
|
||||
movdqu xmm10, OWORD PTR L_GCM_generate_m0_aesni_mod2_128
|
||||
pxor xmm8, xmm8
|
||||
movdqu xmm0, OWORD PTR [rcx]
|
||||
movdqu OWORD PTR [rdx], xmm8
|
||||
movdqu xmm8, xmm0
|
||||
pshufb xmm0, xmm9
|
||||
movdqu xmm5, xmm0
|
||||
movdqu xmm4, xmm0
|
||||
psllq xmm5, 63
|
||||
psrlq xmm4, 1
|
||||
movdqu xmm1, xmm5
|
||||
pslldq xmm1, 8
|
||||
psrldq xmm5, 8
|
||||
pshufd xmm1, xmm1, 255
|
||||
por xmm4, xmm5
|
||||
psrad xmm1, 31
|
||||
pand xmm1, xmm10
|
||||
pxor xmm1, xmm4
|
||||
movdqu xmm5, xmm1
|
||||
movdqu xmm4, xmm1
|
||||
psllq xmm5, 63
|
||||
psrlq xmm4, 1
|
||||
movdqu xmm2, xmm5
|
||||
pslldq xmm2, 8
|
||||
psrldq xmm5, 8
|
||||
pshufd xmm2, xmm2, 255
|
||||
por xmm4, xmm5
|
||||
psrad xmm2, 31
|
||||
pand xmm2, xmm10
|
||||
pxor xmm2, xmm4
|
||||
movdqu xmm5, xmm2
|
||||
movdqu xmm4, xmm2
|
||||
psllq xmm5, 63
|
||||
psrlq xmm4, 1
|
||||
movdqu xmm3, xmm5
|
||||
pslldq xmm3, 8
|
||||
psrldq xmm5, 8
|
||||
pshufd xmm3, xmm3, 255
|
||||
por xmm4, xmm5
|
||||
psrad xmm3, 31
|
||||
pand xmm3, xmm10
|
||||
pxor xmm3, xmm4
|
||||
pshufb xmm3, xmm9
|
||||
pshufb xmm2, xmm9
|
||||
movdqu xmm8, xmm3
|
||||
pshufb xmm1, xmm9
|
||||
pshufb xmm0, xmm9
|
||||
pxor xmm8, xmm2
|
||||
movdqu OWORD PTR [rdx+16], xmm3
|
||||
movdqu OWORD PTR [rdx+32], xmm2
|
||||
movdqu OWORD PTR [rdx+48], xmm8
|
||||
movdqu OWORD PTR [rdx+64], xmm1
|
||||
movdqu xmm4, xmm3
|
||||
movdqu xmm5, xmm2
|
||||
movdqu xmm6, xmm8
|
||||
pxor xmm4, xmm1
|
||||
pxor xmm5, xmm1
|
||||
pxor xmm6, xmm1
|
||||
movdqu OWORD PTR [rdx+80], xmm4
|
||||
movdqu OWORD PTR [rdx+96], xmm5
|
||||
movdqu OWORD PTR [rdx+112], xmm6
|
||||
movdqu OWORD PTR [rdx+128], xmm0
|
||||
pxor xmm1, xmm0
|
||||
movdqu xmm4, xmm3
|
||||
movdqu xmm6, xmm2
|
||||
pxor xmm4, xmm0
|
||||
pxor xmm6, xmm0
|
||||
movdqu OWORD PTR [rdx+144], xmm4
|
||||
movdqu OWORD PTR [rdx+160], xmm6
|
||||
pxor xmm6, xmm3
|
||||
movdqu OWORD PTR [rdx+176], xmm6
|
||||
movdqu OWORD PTR [rdx+192], xmm1
|
||||
movdqu xmm4, xmm3
|
||||
movdqu xmm5, xmm2
|
||||
movdqu xmm6, xmm8
|
||||
pxor xmm4, xmm1
|
||||
pxor xmm5, xmm1
|
||||
pxor xmm6, xmm1
|
||||
movdqu OWORD PTR [rdx+208], xmm4
|
||||
movdqu OWORD PTR [rdx+224], xmm5
|
||||
movdqu OWORD PTR [rdx+240], xmm6
|
||||
movdqu xmm0, OWORD PTR [rdx]
|
||||
movdqu xmm1, OWORD PTR [rdx+16]
|
||||
movdqu xmm2, OWORD PTR [rdx+32]
|
||||
movdqu xmm3, OWORD PTR [rdx+48]
|
||||
pshufb xmm0, xmm9
|
||||
pshufb xmm1, xmm9
|
||||
pshufb xmm2, xmm9
|
||||
pshufb xmm3, xmm9
|
||||
movdqu xmm4, xmm0
|
||||
movdqu xmm5, xmm1
|
||||
movdqu xmm6, xmm2
|
||||
movdqu xmm7, xmm3
|
||||
psllq xmm4, 60
|
||||
psllq xmm5, 60
|
||||
psllq xmm6, 60
|
||||
psllq xmm7, 60
|
||||
psrlq xmm0, 4
|
||||
psrlq xmm1, 4
|
||||
psrlq xmm2, 4
|
||||
psrlq xmm3, 4
|
||||
psrldq xmm4, 8
|
||||
psrldq xmm5, 8
|
||||
psrldq xmm6, 8
|
||||
psrldq xmm7, 8
|
||||
por xmm0, xmm4
|
||||
por xmm1, xmm5
|
||||
por xmm2, xmm6
|
||||
por xmm3, xmm7
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
movdqu OWORD PTR [rdx+256], xmm0
|
||||
movdqu OWORD PTR [rdx+272], xmm1
|
||||
movdqu OWORD PTR [rdx+288], xmm2
|
||||
movdqu OWORD PTR [rdx+304], xmm3
|
||||
movdqu xmm0, OWORD PTR [rdx+64]
|
||||
movdqu xmm1, OWORD PTR [rdx+80]
|
||||
movdqu xmm2, OWORD PTR [rdx+96]
|
||||
movdqu xmm3, OWORD PTR [rdx+112]
|
||||
pshufb xmm0, xmm9
|
||||
pshufb xmm1, xmm9
|
||||
pshufb xmm2, xmm9
|
||||
pshufb xmm3, xmm9
|
||||
movdqu xmm4, xmm0
|
||||
movdqu xmm5, xmm1
|
||||
movdqu xmm6, xmm2
|
||||
movdqu xmm7, xmm3
|
||||
psllq xmm4, 60
|
||||
psllq xmm5, 60
|
||||
psllq xmm6, 60
|
||||
psllq xmm7, 60
|
||||
psrlq xmm0, 4
|
||||
psrlq xmm1, 4
|
||||
psrlq xmm2, 4
|
||||
psrlq xmm3, 4
|
||||
psrldq xmm4, 8
|
||||
psrldq xmm5, 8
|
||||
psrldq xmm6, 8
|
||||
psrldq xmm7, 8
|
||||
por xmm0, xmm4
|
||||
por xmm1, xmm5
|
||||
por xmm2, xmm6
|
||||
por xmm3, xmm7
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
movdqu OWORD PTR [rdx+320], xmm0
|
||||
movdqu OWORD PTR [rdx+336], xmm1
|
||||
movdqu OWORD PTR [rdx+352], xmm2
|
||||
movdqu OWORD PTR [rdx+368], xmm3
|
||||
movdqu xmm0, OWORD PTR [rdx+128]
|
||||
movdqu xmm1, OWORD PTR [rdx+144]
|
||||
movdqu xmm2, OWORD PTR [rdx+160]
|
||||
movdqu xmm3, OWORD PTR [rdx+176]
|
||||
pshufb xmm0, xmm9
|
||||
pshufb xmm1, xmm9
|
||||
pshufb xmm2, xmm9
|
||||
pshufb xmm3, xmm9
|
||||
movdqu xmm4, xmm0
|
||||
movdqu xmm5, xmm1
|
||||
movdqu xmm6, xmm2
|
||||
movdqu xmm7, xmm3
|
||||
psllq xmm4, 60
|
||||
psllq xmm5, 60
|
||||
psllq xmm6, 60
|
||||
psllq xmm7, 60
|
||||
psrlq xmm0, 4
|
||||
psrlq xmm1, 4
|
||||
psrlq xmm2, 4
|
||||
psrlq xmm3, 4
|
||||
psrldq xmm4, 8
|
||||
psrldq xmm5, 8
|
||||
psrldq xmm6, 8
|
||||
psrldq xmm7, 8
|
||||
por xmm0, xmm4
|
||||
por xmm1, xmm5
|
||||
por xmm2, xmm6
|
||||
por xmm3, xmm7
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
movdqu OWORD PTR [rdx+384], xmm0
|
||||
movdqu OWORD PTR [rdx+400], xmm1
|
||||
movdqu OWORD PTR [rdx+416], xmm2
|
||||
movdqu OWORD PTR [rdx+432], xmm3
|
||||
movdqu xmm0, OWORD PTR [rdx+192]
|
||||
movdqu xmm1, OWORD PTR [rdx+208]
|
||||
movdqu xmm2, OWORD PTR [rdx+224]
|
||||
movdqu xmm3, OWORD PTR [rdx+240]
|
||||
pshufb xmm0, xmm9
|
||||
pshufb xmm1, xmm9
|
||||
pshufb xmm2, xmm9
|
||||
pshufb xmm3, xmm9
|
||||
movdqu xmm4, xmm0
|
||||
movdqu xmm5, xmm1
|
||||
movdqu xmm6, xmm2
|
||||
movdqu xmm7, xmm3
|
||||
psllq xmm4, 60
|
||||
psllq xmm5, 60
|
||||
psllq xmm6, 60
|
||||
psllq xmm7, 60
|
||||
psrlq xmm0, 4
|
||||
psrlq xmm1, 4
|
||||
psrlq xmm2, 4
|
||||
psrlq xmm3, 4
|
||||
psrldq xmm4, 8
|
||||
psrldq xmm5, 8
|
||||
psrldq xmm6, 8
|
||||
psrldq xmm7, 8
|
||||
por xmm0, xmm4
|
||||
por xmm1, xmm5
|
||||
por xmm2, xmm6
|
||||
por xmm3, xmm7
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
movdqu OWORD PTR [rdx+448], xmm0
|
||||
movdqu OWORD PTR [rdx+464], xmm1
|
||||
movdqu OWORD PTR [rdx+480], xmm2
|
||||
movdqu OWORD PTR [rdx+496], xmm3
|
||||
movdqu xmm6, OWORD PTR [rsp]
|
||||
movdqu xmm7, OWORD PTR [rsp+16]
|
||||
movdqu xmm8, OWORD PTR [rsp+32]
|
||||
movdqu xmm9, OWORD PTR [rsp+48]
|
||||
movdqu xmm10, OWORD PTR [rsp+64]
|
||||
add rsp, 80
|
||||
ret
|
||||
GCM_generate_m0_aesni ENDP
|
||||
_text ENDS
|
||||
_DATA SEGMENT
|
||||
ALIGN 16
|
||||
L_aes_gcm_one QWORD 0, 1
|
||||
|
@ -6205,6 +6458,225 @@ _text ENDS
|
|||
IFDEF HAVE_INTEL_AVX1
|
||||
_DATA SEGMENT
|
||||
ALIGN 16
|
||||
L_GCM_generate_m0_avx1_rev8 QWORD 579005069656919567, 283686952306183
|
||||
ptr_L_GCM_generate_m0_avx1_rev8 QWORD L_GCM_generate_m0_avx1_rev8
|
||||
_DATA ENDS
|
||||
_DATA SEGMENT
|
||||
ALIGN 16
|
||||
L_GCM_generate_m0_avx1_mod2_128 QWORD 0, 16212958658533785600
|
||||
ptr_L_GCM_generate_m0_avx1_mod2_128 QWORD L_GCM_generate_m0_avx1_mod2_128
|
||||
_DATA ENDS
|
||||
_text SEGMENT READONLY PARA
|
||||
GCM_generate_m0_avx1 PROC
|
||||
sub rsp, 80
|
||||
vmovdqu OWORD PTR [rsp], xmm6
|
||||
vmovdqu OWORD PTR [rsp+16], xmm7
|
||||
vmovdqu OWORD PTR [rsp+32], xmm8
|
||||
vmovdqu OWORD PTR [rsp+48], xmm9
|
||||
vmovdqu OWORD PTR [rsp+64], xmm10
|
||||
vmovdqu xmm9, OWORD PTR L_GCM_generate_m0_avx1_rev8
|
||||
vmovdqu xmm10, OWORD PTR L_GCM_generate_m0_avx1_mod2_128
|
||||
vpxor xmm8, xmm8, xmm8
|
||||
vmovdqu xmm0, OWORD PTR [rcx]
|
||||
vmovdqu OWORD PTR [rdx], xmm8
|
||||
vmovdqu xmm8, xmm0
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpsllq xmm5, xmm0, 63
|
||||
vpsrlq xmm4, xmm0, 1
|
||||
vpslldq xmm1, xmm5, 8
|
||||
vpsrldq xmm5, xmm5, 8
|
||||
vpshufd xmm1, xmm1, 255
|
||||
vpor xmm4, xmm4, xmm5
|
||||
vpsrad xmm1, xmm1, 31
|
||||
vpand xmm1, xmm1, xmm10
|
||||
vpxor xmm1, xmm1, xmm4
|
||||
vpsllq xmm5, xmm1, 63
|
||||
vpsrlq xmm4, xmm1, 1
|
||||
vpslldq xmm2, xmm5, 8
|
||||
vpsrldq xmm5, xmm5, 8
|
||||
vpshufd xmm2, xmm2, 255
|
||||
vpor xmm4, xmm4, xmm5
|
||||
vpsrad xmm2, xmm2, 31
|
||||
vpand xmm2, xmm2, xmm10
|
||||
vpxor xmm2, xmm2, xmm4
|
||||
vpsllq xmm5, xmm2, 63
|
||||
vpsrlq xmm4, xmm2, 1
|
||||
vpslldq xmm3, xmm5, 8
|
||||
vpsrldq xmm5, xmm5, 8
|
||||
vpshufd xmm3, xmm3, 255
|
||||
vpor xmm4, xmm4, xmm5
|
||||
vpsrad xmm3, xmm3, 31
|
||||
vpand xmm3, xmm3, xmm10
|
||||
vpxor xmm3, xmm3, xmm4
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpxor xmm8, xmm3, xmm2
|
||||
vmovdqu OWORD PTR [rdx+16], xmm3
|
||||
vmovdqu OWORD PTR [rdx+32], xmm2
|
||||
vmovdqu OWORD PTR [rdx+48], xmm8
|
||||
vmovdqu OWORD PTR [rdx+64], xmm1
|
||||
vpxor xmm4, xmm3, xmm1
|
||||
vpxor xmm5, xmm2, xmm1
|
||||
vpxor xmm6, xmm8, xmm1
|
||||
vmovdqu OWORD PTR [rdx+80], xmm4
|
||||
vmovdqu OWORD PTR [rdx+96], xmm5
|
||||
vmovdqu OWORD PTR [rdx+112], xmm6
|
||||
vmovdqu OWORD PTR [rdx+128], xmm0
|
||||
vpxor xmm1, xmm1, xmm0
|
||||
vpxor xmm4, xmm3, xmm0
|
||||
vpxor xmm6, xmm2, xmm0
|
||||
vmovdqu OWORD PTR [rdx+144], xmm4
|
||||
vmovdqu OWORD PTR [rdx+160], xmm6
|
||||
vpxor xmm6, xmm3, xmm6
|
||||
vmovdqu OWORD PTR [rdx+176], xmm6
|
||||
vmovdqu OWORD PTR [rdx+192], xmm1
|
||||
vpxor xmm4, xmm3, xmm1
|
||||
vpxor xmm5, xmm2, xmm1
|
||||
vpxor xmm6, xmm8, xmm1
|
||||
vmovdqu OWORD PTR [rdx+208], xmm4
|
||||
vmovdqu OWORD PTR [rdx+224], xmm5
|
||||
vmovdqu OWORD PTR [rdx+240], xmm6
|
||||
vmovdqu xmm0, OWORD PTR [rdx]
|
||||
vmovdqu xmm1, OWORD PTR [rdx+16]
|
||||
vmovdqu xmm2, OWORD PTR [rdx+32]
|
||||
vmovdqu xmm3, OWORD PTR [rdx+48]
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vpsllq xmm4, xmm0, 60
|
||||
vpsllq xmm5, xmm1, 60
|
||||
vpsllq xmm6, xmm2, 60
|
||||
vpsllq xmm7, xmm3, 60
|
||||
vpsrlq xmm0, xmm0, 4
|
||||
vpsrlq xmm1, xmm1, 4
|
||||
vpsrlq xmm2, xmm2, 4
|
||||
vpsrlq xmm3, xmm3, 4
|
||||
vpsrldq xmm4, xmm4, 8
|
||||
vpsrldq xmm5, xmm5, 8
|
||||
vpsrldq xmm6, xmm6, 8
|
||||
vpsrldq xmm7, xmm7, 8
|
||||
vpor xmm0, xmm0, xmm4
|
||||
vpor xmm1, xmm1, xmm5
|
||||
vpor xmm2, xmm2, xmm6
|
||||
vpor xmm3, xmm3, xmm7
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vmovdqu OWORD PTR [rdx+256], xmm0
|
||||
vmovdqu OWORD PTR [rdx+272], xmm1
|
||||
vmovdqu OWORD PTR [rdx+288], xmm2
|
||||
vmovdqu OWORD PTR [rdx+304], xmm3
|
||||
vmovdqu xmm0, OWORD PTR [rdx+64]
|
||||
vmovdqu xmm1, OWORD PTR [rdx+80]
|
||||
vmovdqu xmm2, OWORD PTR [rdx+96]
|
||||
vmovdqu xmm3, OWORD PTR [rdx+112]
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vpsllq xmm4, xmm0, 60
|
||||
vpsllq xmm5, xmm1, 60
|
||||
vpsllq xmm6, xmm2, 60
|
||||
vpsllq xmm7, xmm3, 60
|
||||
vpsrlq xmm0, xmm0, 4
|
||||
vpsrlq xmm1, xmm1, 4
|
||||
vpsrlq xmm2, xmm2, 4
|
||||
vpsrlq xmm3, xmm3, 4
|
||||
vpsrldq xmm4, xmm4, 8
|
||||
vpsrldq xmm5, xmm5, 8
|
||||
vpsrldq xmm6, xmm6, 8
|
||||
vpsrldq xmm7, xmm7, 8
|
||||
vpor xmm0, xmm0, xmm4
|
||||
vpor xmm1, xmm1, xmm5
|
||||
vpor xmm2, xmm2, xmm6
|
||||
vpor xmm3, xmm3, xmm7
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vmovdqu OWORD PTR [rdx+320], xmm0
|
||||
vmovdqu OWORD PTR [rdx+336], xmm1
|
||||
vmovdqu OWORD PTR [rdx+352], xmm2
|
||||
vmovdqu OWORD PTR [rdx+368], xmm3
|
||||
vmovdqu xmm0, OWORD PTR [rdx+128]
|
||||
vmovdqu xmm1, OWORD PTR [rdx+144]
|
||||
vmovdqu xmm2, OWORD PTR [rdx+160]
|
||||
vmovdqu xmm3, OWORD PTR [rdx+176]
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vpsllq xmm4, xmm0, 60
|
||||
vpsllq xmm5, xmm1, 60
|
||||
vpsllq xmm6, xmm2, 60
|
||||
vpsllq xmm7, xmm3, 60
|
||||
vpsrlq xmm0, xmm0, 4
|
||||
vpsrlq xmm1, xmm1, 4
|
||||
vpsrlq xmm2, xmm2, 4
|
||||
vpsrlq xmm3, xmm3, 4
|
||||
vpsrldq xmm4, xmm4, 8
|
||||
vpsrldq xmm5, xmm5, 8
|
||||
vpsrldq xmm6, xmm6, 8
|
||||
vpsrldq xmm7, xmm7, 8
|
||||
vpor xmm0, xmm0, xmm4
|
||||
vpor xmm1, xmm1, xmm5
|
||||
vpor xmm2, xmm2, xmm6
|
||||
vpor xmm3, xmm3, xmm7
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vmovdqu OWORD PTR [rdx+384], xmm0
|
||||
vmovdqu OWORD PTR [rdx+400], xmm1
|
||||
vmovdqu OWORD PTR [rdx+416], xmm2
|
||||
vmovdqu OWORD PTR [rdx+432], xmm3
|
||||
vmovdqu xmm0, OWORD PTR [rdx+192]
|
||||
vmovdqu xmm1, OWORD PTR [rdx+208]
|
||||
vmovdqu xmm2, OWORD PTR [rdx+224]
|
||||
vmovdqu xmm3, OWORD PTR [rdx+240]
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vpsllq xmm4, xmm0, 60
|
||||
vpsllq xmm5, xmm1, 60
|
||||
vpsllq xmm6, xmm2, 60
|
||||
vpsllq xmm7, xmm3, 60
|
||||
vpsrlq xmm0, xmm0, 4
|
||||
vpsrlq xmm1, xmm1, 4
|
||||
vpsrlq xmm2, xmm2, 4
|
||||
vpsrlq xmm3, xmm3, 4
|
||||
vpsrldq xmm4, xmm4, 8
|
||||
vpsrldq xmm5, xmm5, 8
|
||||
vpsrldq xmm6, xmm6, 8
|
||||
vpsrldq xmm7, xmm7, 8
|
||||
vpor xmm0, xmm0, xmm4
|
||||
vpor xmm1, xmm1, xmm5
|
||||
vpor xmm2, xmm2, xmm6
|
||||
vpor xmm3, xmm3, xmm7
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vmovdqu OWORD PTR [rdx+448], xmm0
|
||||
vmovdqu OWORD PTR [rdx+464], xmm1
|
||||
vmovdqu OWORD PTR [rdx+480], xmm2
|
||||
vmovdqu OWORD PTR [rdx+496], xmm3
|
||||
vmovdqu xmm6, OWORD PTR [rsp]
|
||||
vmovdqu xmm7, OWORD PTR [rsp+16]
|
||||
vmovdqu xmm8, OWORD PTR [rsp+32]
|
||||
vmovdqu xmm9, OWORD PTR [rsp+48]
|
||||
vmovdqu xmm10, OWORD PTR [rsp+64]
|
||||
add rsp, 80
|
||||
ret
|
||||
GCM_generate_m0_avx1 ENDP
|
||||
_text ENDS
|
||||
_DATA SEGMENT
|
||||
ALIGN 16
|
||||
L_avx1_aes_gcm_one QWORD 0, 1
|
||||
ptr_L_avx1_aes_gcm_one QWORD L_avx1_aes_gcm_one
|
||||
_DATA ENDS
|
||||
|
@ -11436,6 +11908,225 @@ ENDIF
|
|||
IFDEF HAVE_INTEL_AVX2
|
||||
_DATA SEGMENT
|
||||
ALIGN 16
|
||||
L_GCM_generate_m0_avx2_rev8 QWORD 579005069656919567, 283686952306183
|
||||
ptr_L_GCM_generate_m0_avx2_rev8 QWORD L_GCM_generate_m0_avx2_rev8
|
||||
_DATA ENDS
|
||||
_DATA SEGMENT
|
||||
ALIGN 16
|
||||
L_GCM_generate_m0_avx2_mod2_128 QWORD 0, 16212958658533785600
|
||||
ptr_L_GCM_generate_m0_avx2_mod2_128 QWORD L_GCM_generate_m0_avx2_mod2_128
|
||||
_DATA ENDS
|
||||
_text SEGMENT READONLY PARA
|
||||
GCM_generate_m0_avx2 PROC
|
||||
sub rsp, 80
|
||||
vmovdqu OWORD PTR [rsp], xmm6
|
||||
vmovdqu OWORD PTR [rsp+16], xmm7
|
||||
vmovdqu OWORD PTR [rsp+32], xmm8
|
||||
vmovdqu OWORD PTR [rsp+48], xmm9
|
||||
vmovdqu OWORD PTR [rsp+64], xmm10
|
||||
vmovdqu xmm9, OWORD PTR L_GCM_generate_m0_avx2_rev8
|
||||
vmovdqu xmm10, OWORD PTR L_GCM_generate_m0_avx2_mod2_128
|
||||
vpxor xmm8, xmm8, xmm8
|
||||
vmovdqu xmm0, OWORD PTR [rcx]
|
||||
vmovdqu OWORD PTR [rdx], xmm8
|
||||
vmovdqu xmm8, xmm0
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpsllq xmm5, xmm0, 63
|
||||
vpsrlq xmm4, xmm0, 1
|
||||
vpslldq xmm1, xmm5, 8
|
||||
vpsrldq xmm5, xmm5, 8
|
||||
vpshufd xmm1, xmm1, 255
|
||||
vpor xmm4, xmm4, xmm5
|
||||
vpsrad xmm1, xmm1, 31
|
||||
vpand xmm1, xmm1, xmm10
|
||||
vpxor xmm1, xmm1, xmm4
|
||||
vpsllq xmm5, xmm1, 63
|
||||
vpsrlq xmm4, xmm1, 1
|
||||
vpslldq xmm2, xmm5, 8
|
||||
vpsrldq xmm5, xmm5, 8
|
||||
vpshufd xmm2, xmm2, 255
|
||||
vpor xmm4, xmm4, xmm5
|
||||
vpsrad xmm2, xmm2, 31
|
||||
vpand xmm2, xmm2, xmm10
|
||||
vpxor xmm2, xmm2, xmm4
|
||||
vpsllq xmm5, xmm2, 63
|
||||
vpsrlq xmm4, xmm2, 1
|
||||
vpslldq xmm3, xmm5, 8
|
||||
vpsrldq xmm5, xmm5, 8
|
||||
vpshufd xmm3, xmm3, 255
|
||||
vpor xmm4, xmm4, xmm5
|
||||
vpsrad xmm3, xmm3, 31
|
||||
vpand xmm3, xmm3, xmm10
|
||||
vpxor xmm3, xmm3, xmm4
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpxor xmm8, xmm3, xmm2
|
||||
vmovdqu OWORD PTR [rdx+16], xmm3
|
||||
vmovdqu OWORD PTR [rdx+32], xmm2
|
||||
vmovdqu OWORD PTR [rdx+48], xmm8
|
||||
vmovdqu OWORD PTR [rdx+64], xmm1
|
||||
vpxor xmm4, xmm3, xmm1
|
||||
vpxor xmm5, xmm2, xmm1
|
||||
vpxor xmm6, xmm8, xmm1
|
||||
vmovdqu OWORD PTR [rdx+80], xmm4
|
||||
vmovdqu OWORD PTR [rdx+96], xmm5
|
||||
vmovdqu OWORD PTR [rdx+112], xmm6
|
||||
vmovdqu OWORD PTR [rdx+128], xmm0
|
||||
vpxor xmm1, xmm1, xmm0
|
||||
vpxor xmm4, xmm3, xmm0
|
||||
vpxor xmm6, xmm2, xmm0
|
||||
vmovdqu OWORD PTR [rdx+144], xmm4
|
||||
vmovdqu OWORD PTR [rdx+160], xmm6
|
||||
vpxor xmm6, xmm3, xmm6
|
||||
vmovdqu OWORD PTR [rdx+176], xmm6
|
||||
vmovdqu OWORD PTR [rdx+192], xmm1
|
||||
vpxor xmm4, xmm3, xmm1
|
||||
vpxor xmm5, xmm2, xmm1
|
||||
vpxor xmm6, xmm8, xmm1
|
||||
vmovdqu OWORD PTR [rdx+208], xmm4
|
||||
vmovdqu OWORD PTR [rdx+224], xmm5
|
||||
vmovdqu OWORD PTR [rdx+240], xmm6
|
||||
vmovdqu xmm0, OWORD PTR [rdx]
|
||||
vmovdqu xmm1, OWORD PTR [rdx+16]
|
||||
vmovdqu xmm2, OWORD PTR [rdx+32]
|
||||
vmovdqu xmm3, OWORD PTR [rdx+48]
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vpsllq xmm4, xmm0, 60
|
||||
vpsllq xmm5, xmm1, 60
|
||||
vpsllq xmm6, xmm2, 60
|
||||
vpsllq xmm7, xmm3, 60
|
||||
vpsrlq xmm0, xmm0, 4
|
||||
vpsrlq xmm1, xmm1, 4
|
||||
vpsrlq xmm2, xmm2, 4
|
||||
vpsrlq xmm3, xmm3, 4
|
||||
vpsrldq xmm4, xmm4, 8
|
||||
vpsrldq xmm5, xmm5, 8
|
||||
vpsrldq xmm6, xmm6, 8
|
||||
vpsrldq xmm7, xmm7, 8
|
||||
vpor xmm0, xmm0, xmm4
|
||||
vpor xmm1, xmm1, xmm5
|
||||
vpor xmm2, xmm2, xmm6
|
||||
vpor xmm3, xmm3, xmm7
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vmovdqu OWORD PTR [rdx+256], xmm0
|
||||
vmovdqu OWORD PTR [rdx+272], xmm1
|
||||
vmovdqu OWORD PTR [rdx+288], xmm2
|
||||
vmovdqu OWORD PTR [rdx+304], xmm3
|
||||
vmovdqu xmm0, OWORD PTR [rdx+64]
|
||||
vmovdqu xmm1, OWORD PTR [rdx+80]
|
||||
vmovdqu xmm2, OWORD PTR [rdx+96]
|
||||
vmovdqu xmm3, OWORD PTR [rdx+112]
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vpsllq xmm4, xmm0, 60
|
||||
vpsllq xmm5, xmm1, 60
|
||||
vpsllq xmm6, xmm2, 60
|
||||
vpsllq xmm7, xmm3, 60
|
||||
vpsrlq xmm0, xmm0, 4
|
||||
vpsrlq xmm1, xmm1, 4
|
||||
vpsrlq xmm2, xmm2, 4
|
||||
vpsrlq xmm3, xmm3, 4
|
||||
vpsrldq xmm4, xmm4, 8
|
||||
vpsrldq xmm5, xmm5, 8
|
||||
vpsrldq xmm6, xmm6, 8
|
||||
vpsrldq xmm7, xmm7, 8
|
||||
vpor xmm0, xmm0, xmm4
|
||||
vpor xmm1, xmm1, xmm5
|
||||
vpor xmm2, xmm2, xmm6
|
||||
vpor xmm3, xmm3, xmm7
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vmovdqu OWORD PTR [rdx+320], xmm0
|
||||
vmovdqu OWORD PTR [rdx+336], xmm1
|
||||
vmovdqu OWORD PTR [rdx+352], xmm2
|
||||
vmovdqu OWORD PTR [rdx+368], xmm3
|
||||
vmovdqu xmm0, OWORD PTR [rdx+128]
|
||||
vmovdqu xmm1, OWORD PTR [rdx+144]
|
||||
vmovdqu xmm2, OWORD PTR [rdx+160]
|
||||
vmovdqu xmm3, OWORD PTR [rdx+176]
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vpsllq xmm4, xmm0, 60
|
||||
vpsllq xmm5, xmm1, 60
|
||||
vpsllq xmm6, xmm2, 60
|
||||
vpsllq xmm7, xmm3, 60
|
||||
vpsrlq xmm0, xmm0, 4
|
||||
vpsrlq xmm1, xmm1, 4
|
||||
vpsrlq xmm2, xmm2, 4
|
||||
vpsrlq xmm3, xmm3, 4
|
||||
vpsrldq xmm4, xmm4, 8
|
||||
vpsrldq xmm5, xmm5, 8
|
||||
vpsrldq xmm6, xmm6, 8
|
||||
vpsrldq xmm7, xmm7, 8
|
||||
vpor xmm0, xmm0, xmm4
|
||||
vpor xmm1, xmm1, xmm5
|
||||
vpor xmm2, xmm2, xmm6
|
||||
vpor xmm3, xmm3, xmm7
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vmovdqu OWORD PTR [rdx+384], xmm0
|
||||
vmovdqu OWORD PTR [rdx+400], xmm1
|
||||
vmovdqu OWORD PTR [rdx+416], xmm2
|
||||
vmovdqu OWORD PTR [rdx+432], xmm3
|
||||
vmovdqu xmm0, OWORD PTR [rdx+192]
|
||||
vmovdqu xmm1, OWORD PTR [rdx+208]
|
||||
vmovdqu xmm2, OWORD PTR [rdx+224]
|
||||
vmovdqu xmm3, OWORD PTR [rdx+240]
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vpsllq xmm4, xmm0, 60
|
||||
vpsllq xmm5, xmm1, 60
|
||||
vpsllq xmm6, xmm2, 60
|
||||
vpsllq xmm7, xmm3, 60
|
||||
vpsrlq xmm0, xmm0, 4
|
||||
vpsrlq xmm1, xmm1, 4
|
||||
vpsrlq xmm2, xmm2, 4
|
||||
vpsrlq xmm3, xmm3, 4
|
||||
vpsrldq xmm4, xmm4, 8
|
||||
vpsrldq xmm5, xmm5, 8
|
||||
vpsrldq xmm6, xmm6, 8
|
||||
vpsrldq xmm7, xmm7, 8
|
||||
vpor xmm0, xmm0, xmm4
|
||||
vpor xmm1, xmm1, xmm5
|
||||
vpor xmm2, xmm2, xmm6
|
||||
vpor xmm3, xmm3, xmm7
|
||||
vpshufb xmm0, xmm0, xmm9
|
||||
vpshufb xmm1, xmm1, xmm9
|
||||
vpshufb xmm2, xmm2, xmm9
|
||||
vpshufb xmm3, xmm3, xmm9
|
||||
vmovdqu OWORD PTR [rdx+448], xmm0
|
||||
vmovdqu OWORD PTR [rdx+464], xmm1
|
||||
vmovdqu OWORD PTR [rdx+480], xmm2
|
||||
vmovdqu OWORD PTR [rdx+496], xmm3
|
||||
vmovdqu xmm6, OWORD PTR [rsp]
|
||||
vmovdqu xmm7, OWORD PTR [rsp+16]
|
||||
vmovdqu xmm8, OWORD PTR [rsp+32]
|
||||
vmovdqu xmm9, OWORD PTR [rsp+48]
|
||||
vmovdqu xmm10, OWORD PTR [rsp+64]
|
||||
add rsp, 80
|
||||
ret
|
||||
GCM_generate_m0_avx2 ENDP
|
||||
_text ENDS
|
||||
_DATA SEGMENT
|
||||
ALIGN 16
|
||||
L_avx2_aes_gcm_one QWORD 0, 1
|
||||
ptr_L_avx2_aes_gcm_one QWORD L_avx2_aes_gcm_one
|
||||
_DATA ENDS
|
||||
|
|
|
@ -155,76 +155,72 @@ int wc_HmacSizeByType(int type)
|
|||
return ret;
|
||||
}
|
||||
|
||||
int _InitHmac(Hmac* hmac, int type, void* heap)
|
||||
static int HmacKeyInitHash(wc_HmacHash* hash, int type, void* heap, int devId)
|
||||
{
|
||||
int ret = 0;
|
||||
#ifdef WOLF_CRYPTO_CB
|
||||
int devId = hmac->devId;
|
||||
#else
|
||||
int devId = INVALID_DEVID;
|
||||
#endif
|
||||
|
||||
switch (type) {
|
||||
#ifndef NO_MD5
|
||||
case WC_MD5:
|
||||
ret = wc_InitMd5_ex(&hmac->hash.md5, heap, devId);
|
||||
ret = wc_InitMd5_ex(&hash->md5, heap, devId);
|
||||
break;
|
||||
#endif /* !NO_MD5 */
|
||||
|
||||
#ifndef NO_SHA
|
||||
case WC_SHA:
|
||||
ret = wc_InitSha_ex(&hmac->hash.sha, heap, devId);
|
||||
ret = wc_InitSha_ex(&hash->sha, heap, devId);
|
||||
break;
|
||||
#endif /* !NO_SHA */
|
||||
|
||||
#ifdef WOLFSSL_SHA224
|
||||
case WC_SHA224:
|
||||
ret = wc_InitSha224_ex(&hmac->hash.sha224, heap, devId);
|
||||
ret = wc_InitSha224_ex(&hash->sha224, heap, devId);
|
||||
break;
|
||||
#endif /* WOLFSSL_SHA224 */
|
||||
|
||||
#ifndef NO_SHA256
|
||||
case WC_SHA256:
|
||||
ret = wc_InitSha256_ex(&hmac->hash.sha256, heap, devId);
|
||||
ret = wc_InitSha256_ex(&hash->sha256, heap, devId);
|
||||
break;
|
||||
#endif /* !NO_SHA256 */
|
||||
|
||||
#ifdef WOLFSSL_SHA384
|
||||
case WC_SHA384:
|
||||
ret = wc_InitSha384_ex(&hmac->hash.sha384, heap, devId);
|
||||
ret = wc_InitSha384_ex(&hash->sha384, heap, devId);
|
||||
break;
|
||||
#endif /* WOLFSSL_SHA384 */
|
||||
#ifdef WOLFSSL_SHA512
|
||||
case WC_SHA512:
|
||||
ret = wc_InitSha512_ex(&hmac->hash.sha512, heap, devId);
|
||||
ret = wc_InitSha512_ex(&hash->sha512, heap, devId);
|
||||
break;
|
||||
#endif /* WOLFSSL_SHA512 */
|
||||
|
||||
#ifdef WOLFSSL_SHA3
|
||||
#ifndef WOLFSSL_NOSHA3_224
|
||||
case WC_SHA3_224:
|
||||
ret = wc_InitSha3_224(&hmac->hash.sha3, heap, devId);
|
||||
ret = wc_InitSha3_224(&hash->sha3, heap, devId);
|
||||
break;
|
||||
#endif
|
||||
#ifndef WOLFSSL_NOSHA3_256
|
||||
case WC_SHA3_256:
|
||||
ret = wc_InitSha3_256(&hmac->hash.sha3, heap, devId);
|
||||
ret = wc_InitSha3_256(&hash->sha3, heap, devId);
|
||||
break;
|
||||
#endif
|
||||
#ifndef WOLFSSL_NOSHA3_384
|
||||
case WC_SHA3_384:
|
||||
ret = wc_InitSha3_384(&hmac->hash.sha3, heap, devId);
|
||||
ret = wc_InitSha3_384(&hash->sha3, heap, devId);
|
||||
break;
|
||||
#endif
|
||||
#ifndef WOLFSSL_NOSHA3_512
|
||||
case WC_SHA3_512:
|
||||
ret = wc_InitSha3_512(&hmac->hash.sha3, heap, devId);
|
||||
ret = wc_InitSha3_512(&hash->sha3, heap, devId);
|
||||
break;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef WOLFSSL_SM3
|
||||
case WC_SM3:
|
||||
ret = wc_InitSm3(&hmac->hash.sm3, heap, devId);
|
||||
ret = wc_InitSm3(&hash->sm3, heap, devId);
|
||||
break;
|
||||
#endif
|
||||
|
||||
|
@ -233,6 +229,22 @@ int _InitHmac(Hmac* hmac, int type, void* heap)
|
|||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int _InitHmac(Hmac* hmac, int type, void* heap)
|
||||
{
|
||||
int ret;
|
||||
#ifdef WOLF_CRYPTO_CB
|
||||
int devId = hmac->devId;
|
||||
#else
|
||||
int devId = INVALID_DEVID;
|
||||
#endif
|
||||
|
||||
ret = HmacKeyInitHash(&hmac->hash, type, heap, devId);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
/* default to NULL heap hint or test value */
|
||||
#ifdef WOLFSSL_HEAP_TEST
|
||||
hmac->heap = (void*)WOLFSSL_HEAP_TEST;
|
||||
|
@ -243,6 +255,158 @@ int _InitHmac(Hmac* hmac, int type, void* heap)
|
|||
return ret;
|
||||
}
|
||||
|
||||
#ifdef WOLFSSL_HMAC_COPY_HASH
|
||||
static int HmacKeyCopyHash(byte macType, wc_HmacHash* src, wc_HmacHash* dst)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
switch (macType) {
|
||||
#ifndef NO_MD5
|
||||
case WC_MD5:
|
||||
ret = wc_Md5Copy(&src->md5, &dst->md5);
|
||||
break;
|
||||
#endif /* !NO_MD5 */
|
||||
|
||||
#ifndef NO_SHA
|
||||
case WC_SHA:
|
||||
ret = wc_ShaCopy(&src->sha, &dst->sha);
|
||||
break;
|
||||
#endif /* !NO_SHA */
|
||||
|
||||
#ifdef WOLFSSL_SHA224
|
||||
case WC_SHA224:
|
||||
ret = wc_Sha224Copy(&src->sha224, &dst->sha224);
|
||||
break;
|
||||
#endif /* WOLFSSL_SHA224 */
|
||||
#ifndef NO_SHA256
|
||||
case WC_SHA256:
|
||||
ret = wc_Sha256Copy(&src->sha256, &dst->sha256);
|
||||
break;
|
||||
#endif /* !NO_SHA256 */
|
||||
|
||||
#ifdef WOLFSSL_SHA384
|
||||
case WC_SHA384:
|
||||
ret = wc_Sha384Copy(&src->sha384, &dst->sha384);
|
||||
break;
|
||||
#endif /* WOLFSSL_SHA384 */
|
||||
#ifdef WOLFSSL_SHA512
|
||||
case WC_SHA512:
|
||||
ret = wc_Sha512Copy(&src->sha512, &dst->sha512);
|
||||
break;
|
||||
#endif /* WOLFSSL_SHA512 */
|
||||
|
||||
#ifdef WOLFSSL_SHA3
|
||||
#ifndef WOLFSSL_NOSHA3_224
|
||||
case WC_SHA3_224:
|
||||
ret = wc_Sha3_224_Copy(&src->sha3, &dst->sha3);
|
||||
break;
|
||||
#endif
|
||||
#ifndef WOLFSSL_NOSHA3_256
|
||||
case WC_SHA3_256:
|
||||
ret = wc_Sha3_256_Copy(&src->sha3, &dst->sha3);
|
||||
break;
|
||||
#endif
|
||||
#ifndef WOLFSSL_NOSHA3_384
|
||||
case WC_SHA3_384:
|
||||
ret = wc_Sha3_384_Copy(&src->sha3, &dst->sha3);
|
||||
break;
|
||||
#endif
|
||||
#ifndef WOLFSSL_NOSHA3_512
|
||||
case WC_SHA3_512:
|
||||
ret = wc_Sha3_512_Copy(&src->sha3, &dst->sha3);
|
||||
break;
|
||||
#endif
|
||||
#endif /* WOLFSSL_SHA3 */
|
||||
|
||||
#ifdef WOLFSSL_SM3
|
||||
case WC_SM3:
|
||||
ret = wc_Sm3Copy(&src->sm3, &dst->sm3);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int HmacKeyHashUpdate(byte macType, wc_HmacHash* hash, byte* pad)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
switch (macType) {
|
||||
#ifndef NO_MD5
|
||||
case WC_MD5:
|
||||
ret = wc_Md5Update(&hash->md5, pad, WC_MD5_BLOCK_SIZE);
|
||||
break;
|
||||
#endif /* !NO_MD5 */
|
||||
|
||||
#ifndef NO_SHA
|
||||
case WC_SHA:
|
||||
ret = wc_ShaUpdate(&hash->sha, pad, WC_SHA_BLOCK_SIZE);
|
||||
break;
|
||||
#endif /* !NO_SHA */
|
||||
|
||||
#ifdef WOLFSSL_SHA224
|
||||
case WC_SHA224:
|
||||
ret = wc_Sha224Update(&hash->sha224, pad, WC_SHA224_BLOCK_SIZE);
|
||||
break;
|
||||
#endif /* WOLFSSL_SHA224 */
|
||||
#ifndef NO_SHA256
|
||||
case WC_SHA256:
|
||||
ret = wc_Sha256Update(&hash->sha256, pad, WC_SHA256_BLOCK_SIZE);
|
||||
break;
|
||||
#endif /* !NO_SHA256 */
|
||||
|
||||
#ifdef WOLFSSL_SHA384
|
||||
case WC_SHA384:
|
||||
ret = wc_Sha384Update(&hash->sha384, pad, WC_SHA384_BLOCK_SIZE);
|
||||
break;
|
||||
#endif /* WOLFSSL_SHA384 */
|
||||
#ifdef WOLFSSL_SHA512
|
||||
case WC_SHA512:
|
||||
ret = wc_Sha512Update(&hash->sha512, pad, WC_SHA512_BLOCK_SIZE);
|
||||
break;
|
||||
#endif /* WOLFSSL_SHA512 */
|
||||
|
||||
#ifdef WOLFSSL_SHA3
|
||||
#ifndef WOLFSSL_NOSHA3_224
|
||||
case WC_SHA3_224:
|
||||
ret = wc_Sha3_224_Update(&hash->sha3, pad, WC_SHA3_224_BLOCK_SIZE);
|
||||
break;
|
||||
#endif
|
||||
#ifndef WOLFSSL_NOSHA3_256
|
||||
case WC_SHA3_256:
|
||||
ret = wc_Sha3_256_Update(&hash->sha3, pad, WC_SHA3_256_BLOCK_SIZE);
|
||||
break;
|
||||
#endif
|
||||
#ifndef WOLFSSL_NOSHA3_384
|
||||
case WC_SHA3_384:
|
||||
ret = wc_Sha3_384_Update(&hash->sha3, pad, WC_SHA3_384_BLOCK_SIZE);
|
||||
break;
|
||||
#endif
|
||||
#ifndef WOLFSSL_NOSHA3_512
|
||||
case WC_SHA3_512:
|
||||
ret = wc_Sha3_512_Update(&hash->sha3, pad, WC_SHA3_512_BLOCK_SIZE);
|
||||
break;
|
||||
#endif
|
||||
#endif /* WOLFSSL_SHA3 */
|
||||
|
||||
#ifdef WOLFSSL_SM3
|
||||
case WC_SM3:
|
||||
ret = wc_Sm3Update(&hash->sm3, pad, WC_SM3_BLOCK_SIZE);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int wc_HmacSetKey_ex(Hmac* hmac, int type, const byte* key, word32 length,
|
||||
int allowFlag)
|
||||
|
@ -603,6 +767,29 @@ int wc_HmacSetKey_ex(Hmac* hmac, int type, const byte* key, word32 length,
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef WOLFSSL_HMAC_COPY_HASH
|
||||
if ( ret == 0) {
|
||||
#ifdef WOLF_CRYPTO_CB
|
||||
int devId = hmac->devId;
|
||||
#else
|
||||
int devId = INVALID_DEVID;
|
||||
#endif
|
||||
|
||||
ret = HmacKeyInitHash(&hmac->i_hash, hmac->macType, heap, devId);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
ret = HmacKeyInitHash(&hmac->o_hash, hmac->macType, heap, devId);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
ret = HmacKeyHashUpdate(hmac->macType, &hmac->i_hash, ip);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
ret = HmacKeyHashUpdate(hmac->macType, &hmac->o_hash, op);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
#endif /* WOLFSSL_MAXQ108X */
|
||||
}
|
||||
|
@ -618,96 +805,6 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
|
|||
return wc_HmacSetKey_ex(hmac, type, key, length, allowFlag);
|
||||
}
|
||||
|
||||
static int HmacKeyInnerHash(Hmac* hmac)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
switch (hmac->macType) {
|
||||
#ifndef NO_MD5
|
||||
case WC_MD5:
|
||||
ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->ipad,
|
||||
WC_MD5_BLOCK_SIZE);
|
||||
break;
|
||||
#endif /* !NO_MD5 */
|
||||
|
||||
#ifndef NO_SHA
|
||||
case WC_SHA:
|
||||
ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->ipad,
|
||||
WC_SHA_BLOCK_SIZE);
|
||||
break;
|
||||
#endif /* !NO_SHA */
|
||||
|
||||
#ifdef WOLFSSL_SHA224
|
||||
case WC_SHA224:
|
||||
ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->ipad,
|
||||
WC_SHA224_BLOCK_SIZE);
|
||||
break;
|
||||
#endif /* WOLFSSL_SHA224 */
|
||||
#ifndef NO_SHA256
|
||||
case WC_SHA256:
|
||||
ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->ipad,
|
||||
WC_SHA256_BLOCK_SIZE);
|
||||
break;
|
||||
#endif /* !NO_SHA256 */
|
||||
|
||||
#ifdef WOLFSSL_SHA384
|
||||
case WC_SHA384:
|
||||
ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->ipad,
|
||||
WC_SHA384_BLOCK_SIZE);
|
||||
break;
|
||||
#endif /* WOLFSSL_SHA384 */
|
||||
#ifdef WOLFSSL_SHA512
|
||||
case WC_SHA512:
|
||||
ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->ipad,
|
||||
WC_SHA512_BLOCK_SIZE);
|
||||
break;
|
||||
#endif /* WOLFSSL_SHA512 */
|
||||
|
||||
#ifdef WOLFSSL_SHA3
|
||||
#ifndef WOLFSSL_NOSHA3_224
|
||||
case WC_SHA3_224:
|
||||
ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
|
||||
WC_SHA3_224_BLOCK_SIZE);
|
||||
break;
|
||||
#endif
|
||||
#ifndef WOLFSSL_NOSHA3_256
|
||||
case WC_SHA3_256:
|
||||
ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
|
||||
WC_SHA3_256_BLOCK_SIZE);
|
||||
break;
|
||||
#endif
|
||||
#ifndef WOLFSSL_NOSHA3_384
|
||||
case WC_SHA3_384:
|
||||
ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
|
||||
WC_SHA3_384_BLOCK_SIZE);
|
||||
break;
|
||||
#endif
|
||||
#ifndef WOLFSSL_NOSHA3_512
|
||||
case WC_SHA3_512:
|
||||
ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->ipad,
|
||||
WC_SHA3_512_BLOCK_SIZE);
|
||||
break;
|
||||
#endif
|
||||
#endif /* WOLFSSL_SHA3 */
|
||||
|
||||
#ifdef WOLFSSL_SM3
|
||||
case WC_SM3:
|
||||
ret = wc_Sm3Update(&hmac->hash.sm3, (byte*)hmac->ipad,
|
||||
WC_SM3_BLOCK_SIZE);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret == 0)
|
||||
hmac->innerHashKeyed = WC_HMAC_INNER_HASH_KEYED_SW;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int wc_HmacUpdate(Hmac* hmac, const byte* msg, word32 length)
|
||||
{
|
||||
int ret = 0;
|
||||
|
@ -739,9 +836,14 @@ int wc_HmacUpdate(Hmac* hmac, const byte* msg, word32 length)
|
|||
#endif /* WOLFSSL_ASYNC_CRYPT */
|
||||
|
||||
if (!hmac->innerHashKeyed) {
|
||||
ret = HmacKeyInnerHash(hmac);
|
||||
#ifndef WOLFSSL_HMAC_COPY_HASH
|
||||
ret = HmacKeyHashUpdate(hmac->macType, &hmac->hash, (byte*)hmac->ipad);
|
||||
#else
|
||||
ret = HmacKeyCopyHash(hmac->macType, &hmac->i_hash, &hmac->hash);
|
||||
#endif
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
hmac->innerHashKeyed = WC_HMAC_INNER_HASH_KEYED_SW;
|
||||
}
|
||||
|
||||
switch (hmac->macType) {
|
||||
|
@ -851,9 +953,14 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
|
|||
#endif /* WOLFSSL_ASYNC_CRYPT */
|
||||
|
||||
if (!hmac->innerHashKeyed) {
|
||||
ret = HmacKeyInnerHash(hmac);
|
||||
#ifndef WOLFSSL_HMAC_COPY_HASH
|
||||
ret = HmacKeyHashUpdate(hmac->macType, &hmac->hash, (byte*)hmac->ipad);
|
||||
#else
|
||||
ret = HmacKeyCopyHash(hmac->macType, &hmac->i_hash, &hmac->hash);
|
||||
#endif
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
hmac->innerHashKeyed = WC_HMAC_INNER_HASH_KEYED_SW;
|
||||
}
|
||||
|
||||
switch (hmac->macType) {
|
||||
|
@ -862,8 +969,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
|
|||
ret = wc_Md5Final(&hmac->hash.md5, (byte*)hmac->innerHash);
|
||||
if (ret != 0)
|
||||
break;
|
||||
#ifndef WOLFSSL_HMAC_COPY_HASH
|
||||
ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->opad,
|
||||
WC_MD5_BLOCK_SIZE);
|
||||
#else
|
||||
ret = HmacKeyCopyHash(WC_MD5, &hmac->o_hash, &hmac->hash);
|
||||
#endif
|
||||
if (ret != 0)
|
||||
break;
|
||||
ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->innerHash,
|
||||
|
@ -879,8 +990,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
|
|||
ret = wc_ShaFinal(&hmac->hash.sha, (byte*)hmac->innerHash);
|
||||
if (ret != 0)
|
||||
break;
|
||||
#ifndef WOLFSSL_HMAC_COPY_HASH
|
||||
ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->opad,
|
||||
WC_SHA_BLOCK_SIZE);
|
||||
#else
|
||||
ret = HmacKeyCopyHash(WC_SHA, &hmac->o_hash, &hmac->hash);
|
||||
#endif
|
||||
if (ret != 0)
|
||||
break;
|
||||
ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->innerHash,
|
||||
|
@ -896,8 +1011,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
|
|||
ret = wc_Sha224Final(&hmac->hash.sha224, (byte*)hmac->innerHash);
|
||||
if (ret != 0)
|
||||
break;
|
||||
#ifndef WOLFSSL_HMAC_COPY_HASH
|
||||
ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->opad,
|
||||
WC_SHA224_BLOCK_SIZE);
|
||||
#else
|
||||
ret = HmacKeyCopyHash(WC_SHA224, &hmac->o_hash, &hmac->hash);
|
||||
#endif
|
||||
if (ret != 0)
|
||||
break;
|
||||
ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->innerHash,
|
||||
|
@ -914,8 +1033,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
|
|||
ret = wc_Sha256Final(&hmac->hash.sha256, (byte*)hmac->innerHash);
|
||||
if (ret != 0)
|
||||
break;
|
||||
#ifndef WOLFSSL_HMAC_COPY_HASH
|
||||
ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->opad,
|
||||
WC_SHA256_BLOCK_SIZE);
|
||||
#else
|
||||
ret = HmacKeyCopyHash(WC_SHA256, &hmac->o_hash, &hmac->hash);
|
||||
#endif
|
||||
if (ret != 0)
|
||||
break;
|
||||
ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->innerHash,
|
||||
|
@ -931,8 +1054,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
|
|||
ret = wc_Sha384Final(&hmac->hash.sha384, (byte*)hmac->innerHash);
|
||||
if (ret != 0)
|
||||
break;
|
||||
#ifndef WOLFSSL_HMAC_COPY_HASH
|
||||
ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->opad,
|
||||
WC_SHA384_BLOCK_SIZE);
|
||||
#else
|
||||
ret = HmacKeyCopyHash(WC_SHA384, &hmac->o_hash, &hmac->hash);
|
||||
#endif
|
||||
if (ret != 0)
|
||||
break;
|
||||
ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->innerHash,
|
||||
|
@ -947,8 +1074,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
|
|||
ret = wc_Sha512Final(&hmac->hash.sha512, (byte*)hmac->innerHash);
|
||||
if (ret != 0)
|
||||
break;
|
||||
#ifndef WOLFSSL_HMAC_COPY_HASH
|
||||
ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->opad,
|
||||
WC_SHA512_BLOCK_SIZE);
|
||||
#else
|
||||
ret = HmacKeyCopyHash(WC_SHA512, &hmac->o_hash, &hmac->hash);
|
||||
#endif
|
||||
if (ret != 0)
|
||||
break;
|
||||
ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->innerHash,
|
||||
|
@ -965,8 +1096,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
|
|||
ret = wc_Sha3_224_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
|
||||
if (ret != 0)
|
||||
break;
|
||||
#ifndef WOLFSSL_HMAC_COPY_HASH
|
||||
ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->opad,
|
||||
WC_SHA3_224_BLOCK_SIZE);
|
||||
#else
|
||||
ret = HmacKeyCopyHash(WC_SHA3_224, &hmac->o_hash, &hmac->hash);
|
||||
#endif
|
||||
if (ret != 0)
|
||||
break;
|
||||
ret = wc_Sha3_224_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
|
||||
|
@ -981,8 +1116,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
|
|||
ret = wc_Sha3_256_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
|
||||
if (ret != 0)
|
||||
break;
|
||||
#ifndef WOLFSSL_HMAC_COPY_HASH
|
||||
ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->opad,
|
||||
WC_SHA3_256_BLOCK_SIZE);
|
||||
#else
|
||||
ret = HmacKeyCopyHash(WC_SHA3_256, &hmac->o_hash, &hmac->hash);
|
||||
#endif
|
||||
if (ret != 0)
|
||||
break;
|
||||
ret = wc_Sha3_256_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
|
||||
|
@ -997,8 +1136,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
|
|||
ret = wc_Sha3_384_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
|
||||
if (ret != 0)
|
||||
break;
|
||||
#ifndef WOLFSSL_HMAC_COPY_HASH
|
||||
ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->opad,
|
||||
WC_SHA3_384_BLOCK_SIZE);
|
||||
#else
|
||||
ret = HmacKeyCopyHash(WC_SHA3_384, &hmac->o_hash, &hmac->hash);
|
||||
#endif
|
||||
if (ret != 0)
|
||||
break;
|
||||
ret = wc_Sha3_384_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
|
||||
|
@ -1013,8 +1156,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
|
|||
ret = wc_Sha3_512_Final(&hmac->hash.sha3, (byte*)hmac->innerHash);
|
||||
if (ret != 0)
|
||||
break;
|
||||
#ifndef WOLFSSL_HMAC_COPY_HASH
|
||||
ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->opad,
|
||||
WC_SHA3_512_BLOCK_SIZE);
|
||||
#else
|
||||
ret = HmacKeyCopyHash(WC_SHA3_512, &hmac->o_hash, &hmac->hash);
|
||||
#endif
|
||||
if (ret != 0)
|
||||
break;
|
||||
ret = wc_Sha3_512_Update(&hmac->hash.sha3, (byte*)hmac->innerHash,
|
||||
|
@ -1031,8 +1178,12 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
|
|||
ret = wc_Sm3Final(&hmac->hash.sm3, (byte*)hmac->innerHash);
|
||||
if (ret != 0)
|
||||
break;
|
||||
#ifndef WOLFSSL_HMAC_COPY_HASH
|
||||
ret = wc_Sm3Update(&hmac->hash.sm3, (byte*)hmac->opad,
|
||||
WC_SM3_BLOCK_SIZE);
|
||||
#else
|
||||
ret = HmacKeyCopyHash(WC_SM3, &hmac->o_hash, &hmac->hash);
|
||||
#endif
|
||||
if (ret != 0)
|
||||
break;
|
||||
ret = wc_Sm3Update(&hmac->hash.sm3, (byte*)hmac->innerHash,
|
||||
|
@ -1163,34 +1314,58 @@ void wc_HmacFree(Hmac* hmac)
|
|||
#ifndef NO_MD5
|
||||
case WC_MD5:
|
||||
wc_Md5Free(&hmac->hash.md5);
|
||||
#ifdef WOLFSSL_HMAC_COPY_HASH
|
||||
wc_Md5Free(&hmac->i_hash.md5);
|
||||
wc_Md5Free(&hmac->o_hash.md5);
|
||||
#endif
|
||||
break;
|
||||
#endif /* !NO_MD5 */
|
||||
|
||||
#ifndef NO_SHA
|
||||
case WC_SHA:
|
||||
wc_ShaFree(&hmac->hash.sha);
|
||||
#ifdef WOLFSSL_HMAC_COPY_HASH
|
||||
wc_ShaFree(&hmac->i_hash.sha);
|
||||
wc_ShaFree(&hmac->o_hash.sha);
|
||||
#endif
|
||||
break;
|
||||
#endif /* !NO_SHA */
|
||||
|
||||
#ifdef WOLFSSL_SHA224
|
||||
case WC_SHA224:
|
||||
wc_Sha224Free(&hmac->hash.sha224);
|
||||
#ifdef WOLFSSL_HMAC_COPY_HASH
|
||||
wc_Sha224Free(&hmac->i_hash.sha224);
|
||||
wc_Sha224Free(&hmac->o_hash.sha224);
|
||||
#endif
|
||||
break;
|
||||
#endif /* WOLFSSL_SHA224 */
|
||||
#ifndef NO_SHA256
|
||||
case WC_SHA256:
|
||||
wc_Sha256Free(&hmac->hash.sha256);
|
||||
#ifdef WOLFSSL_HMAC_COPY_HASH
|
||||
wc_Sha256Free(&hmac->i_hash.sha256);
|
||||
wc_Sha256Free(&hmac->o_hash.sha256);
|
||||
#endif
|
||||
break;
|
||||
#endif /* !NO_SHA256 */
|
||||
|
||||
#ifdef WOLFSSL_SHA384
|
||||
case WC_SHA384:
|
||||
wc_Sha384Free(&hmac->hash.sha384);
|
||||
#ifdef WOLFSSL_HMAC_COPY_HASH
|
||||
wc_Sha384Free(&hmac->i_hash.sha384);
|
||||
wc_Sha384Free(&hmac->o_hash.sha384);
|
||||
#endif
|
||||
break;
|
||||
#endif /* WOLFSSL_SHA384 */
|
||||
#ifdef WOLFSSL_SHA512
|
||||
case WC_SHA512:
|
||||
wc_Sha512Free(&hmac->hash.sha512);
|
||||
#ifdef WOLFSSL_HMAC_COPY_HASH
|
||||
wc_Sha512Free(&hmac->i_hash.sha512);
|
||||
wc_Sha512Free(&hmac->o_hash.sha512);
|
||||
#endif
|
||||
break;
|
||||
#endif /* WOLFSSL_SHA512 */
|
||||
|
||||
|
@ -1198,21 +1373,37 @@ void wc_HmacFree(Hmac* hmac)
|
|||
#ifndef WOLFSSL_NOSHA3_224
|
||||
case WC_SHA3_224:
|
||||
wc_Sha3_224_Free(&hmac->hash.sha3);
|
||||
#ifdef WOLFSSL_HMAC_COPY_HASH
|
||||
wc_Sha3_224_Free(&hmac->i_hash.sha3);
|
||||
wc_Sha3_224_Free(&hmac->o_hash.sha3);
|
||||
#endif
|
||||
break;
|
||||
#endif
|
||||
#ifndef WOLFSSL_NOSHA3_256
|
||||
case WC_SHA3_256:
|
||||
wc_Sha3_256_Free(&hmac->hash.sha3);
|
||||
#ifdef WOLFSSL_HMAC_COPY_HASH
|
||||
wc_Sha3_256_Free(&hmac->i_hash.sha3);
|
||||
wc_Sha3_256_Free(&hmac->o_hash.sha3);
|
||||
#endif
|
||||
break;
|
||||
#endif
|
||||
#ifndef WOLFSSL_NOSHA3_384
|
||||
case WC_SHA3_384:
|
||||
wc_Sha3_384_Free(&hmac->hash.sha3);
|
||||
#ifdef WOLFSSL_HMAC_COPY_HASH
|
||||
wc_Sha3_384_Free(&hmac->i_hash.sha3);
|
||||
wc_Sha3_384_Free(&hmac->o_hash.sha3);
|
||||
#endif
|
||||
break;
|
||||
#endif
|
||||
#ifndef WOLFSSL_NOSHA3_512
|
||||
case WC_SHA3_512:
|
||||
wc_Sha3_512_Free(&hmac->hash.sha3);
|
||||
#ifdef WOLFSSL_HMAC_COPY_HASH
|
||||
wc_Sha3_512_Free(&hmac->i_hash.sha3);
|
||||
wc_Sha3_512_Free(&hmac->o_hash.sha3);
|
||||
#endif
|
||||
break;
|
||||
#endif
|
||||
#endif /* WOLFSSL_SHA3 */
|
||||
|
@ -1220,6 +1411,10 @@ void wc_HmacFree(Hmac* hmac)
|
|||
#ifdef WOLFSSL_SM3
|
||||
case WC_SM3:
|
||||
wc_Sm3Free(&hmac->hash.sm3);
|
||||
#ifdef WOLFSSL_HMAC_COPY_HASH
|
||||
wc_Sm3Free(&hmac->i_hash.sm3);
|
||||
wc_Sm3Free(&hmac->i_hash.sm3);
|
||||
#endif
|
||||
break;
|
||||
#endif
|
||||
|
||||
|
|
|
@ -124,6 +124,10 @@ typedef wc_Hashes wc_HmacHash;
|
|||
/* Hmac digest */
|
||||
struct Hmac {
|
||||
wc_HmacHash hash;
|
||||
#ifdef WOLFSSL_HMAC_COPY_HASH
|
||||
wc_HmacHash i_hash;
|
||||
wc_HmacHash o_hash;
|
||||
#endif
|
||||
word32 ipad[WC_HMAC_BLOCK_SIZE / sizeof(word32)]; /* same block size all*/
|
||||
word32 opad[WC_HMAC_BLOCK_SIZE / sizeof(word32)];
|
||||
word32 innerHash[WC_MAX_DIGEST_SIZE / sizeof(word32)];
|
||||
|
|
Loading…
Reference in New Issue