mirror of https://github.com/wolfSSL/wolfssl.git
Assembly optimization for AES-NI, and AVX1 and AVX2
Unroll the loop for 8. Use new optimized maths. Fix SHA-384 to use SHA-512 assembly code. Only perform CPU id check in one place.pull/1030/head
parent
36c2ee92dc
commit
bde6a35ac4
|
@ -789,6 +789,11 @@ AC_ARG_ENABLE([intelasm],
|
|||
[ ENABLED_INTELASM=no ]
|
||||
)
|
||||
|
||||
if test "$ENABLED_AESNI" = "small"
|
||||
then
|
||||
AM_CFLAGS="$AM_CFLAGS -DAES_GCM_AESNI_NO_UNROLL"
|
||||
ENABLED_AESNI=yes
|
||||
fi
|
||||
|
||||
if test "$ENABLED_AESNI" = "yes" || test "$ENABLED_INTELASM" = "yes"
|
||||
then
|
||||
|
@ -799,7 +804,7 @@ then
|
|||
# opt levels greater than 2 may cause problems on systems w/o aesni
|
||||
if test "$CC" != "icc"
|
||||
then
|
||||
AM_CFLAGS="$AM_CFLAGS -maes -msse4"
|
||||
AM_CFLAGS="$AM_CFLAGS -maes -msse4 -mpclmul"
|
||||
fi
|
||||
fi
|
||||
AS_IF([test "x$ENABLED_AESGCM" != "xno"],[AM_CCASFLAGS="$AM_CCASFLAGS -DHAVE_AESGCM"])
|
||||
|
|
|
@ -61,7 +61,8 @@ endif
|
|||
|
||||
src_libwolfssl_la_SOURCES += \
|
||||
wolfcrypt/src/hmac.c \
|
||||
wolfcrypt/src/hash.c
|
||||
wolfcrypt/src/hash.c \
|
||||
wolfcrypt/src/cpuid.c
|
||||
|
||||
if BUILD_RNG
|
||||
src_libwolfssl_la_SOURCES += wolfcrypt/src/random.c
|
||||
|
|
|
@ -137,8 +137,8 @@
|
|||
#define BEGIN_INTEL_CYCLES total_cycles = get_intel_cycles();
|
||||
#define END_INTEL_CYCLES total_cycles = get_intel_cycles() - total_cycles;
|
||||
#define SHOW_INTEL_CYCLES printf(" Cycles per byte = %6.2f", \
|
||||
count == 0 ? 0 : \
|
||||
(float)total_cycles / (count*BENCH_SIZE));
|
||||
count == 0 ? 0 : \
|
||||
(float)total_cycles / ((word64)count*BENCH_SIZE));
|
||||
#elif defined(LINUX_CYCLE_COUNT)
|
||||
#include <linux/perf_event.h>
|
||||
#include <sys/syscall.h>
|
||||
|
@ -579,7 +579,7 @@ static void bench_stats_sym_finish(const char* desc, int doAsync, int count, dou
|
|||
persec = (1 / total) * blocks;
|
||||
}
|
||||
|
||||
printf("%-8s%s %5.0f %s took %5.3f seconds, %8.3f %s/s",
|
||||
printf("%-12s%s %5.0f %s took %5.3f seconds, %8.3f %s/s",
|
||||
desc, BENCH_ASYNC_GET_NAME(doAsync), blocks, blockType, total,
|
||||
persec, blockType);
|
||||
SHOW_INTEL_CYCLES
|
||||
|
@ -1275,7 +1275,31 @@ void bench_aesgcm(int doAsync)
|
|||
count += times;
|
||||
} while (bench_stats_sym_check(start));
|
||||
exit_aes_gcm:
|
||||
bench_stats_sym_finish("AES-GCM", doAsync, count, start);
|
||||
bench_stats_sym_finish("AES-GCM-Enc", doAsync, count, start);
|
||||
|
||||
/* GCM uses same routine in backend for both encrypt and decrypt */
|
||||
bench_stats_start(&count, &start);
|
||||
do {
|
||||
for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
|
||||
bench_async_poll();
|
||||
|
||||
/* while free pending slots in queue, submit ops */
|
||||
for (i = 0; i < BENCH_MAX_PENDING; i++) {
|
||||
if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×, numBlocks)) {
|
||||
ret = wc_AesGcmDecrypt(&enc[i], bench_plain,
|
||||
bench_cipher, BENCH_SIZE,
|
||||
bench_iv, 12, bench_tag, AES_AUTH_TAG_SZ,
|
||||
bench_additional, AES_AUTH_ADD_SZ);
|
||||
if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×)) {
|
||||
goto exit_aes_gcm_dec;
|
||||
}
|
||||
}
|
||||
} /* for i */
|
||||
} /* for times */
|
||||
count += times;
|
||||
} while (bench_stats_sym_check(start));
|
||||
exit_aes_gcm_dec:
|
||||
bench_stats_sym_finish("AES-GCM-Dec", doAsync, count, start);
|
||||
|
||||
exit:
|
||||
|
||||
|
|
3067
wolfcrypt/src/aes.c
3067
wolfcrypt/src/aes.c
File diff suppressed because it is too large
Load Diff
|
@ -1502,100 +1502,4 @@ MAKE_RK256_b:
|
|||
pxor xmm3,xmm2
|
||||
ret
|
||||
|
||||
|
||||
; See Intel® Carry-Less Multiplication Instruction
|
||||
; and its Usage for Computing the GCM Mode White Paper
|
||||
; by Shay Gueron, Intel Mobility Group, Israel Development Center;
|
||||
; and Michael E. Kounavis, Intel Labs, Circuits and Systems Research
|
||||
|
||||
; void gfmul(__m128i a, __m128i b, __m128i* out);
|
||||
|
||||
; .globl gfmul
|
||||
gfmul PROC
|
||||
; xmm0 holds operand a (128 bits)
|
||||
; xmm1 holds operand b (128 bits)
|
||||
; r8 holds the pointer to output (128 bits)
|
||||
|
||||
; convert to what we had for att&t convention
|
||||
movdqa xmm0, [rcx]
|
||||
movdqa xmm1, [rdx]
|
||||
|
||||
; on microsoft xmm6-xmm15 are non volaitle, let's save on stack and restore at end
|
||||
sub rsp,8+4*16 ; 8 = align stack , 4 xmm6-9 16 bytes each
|
||||
movdqa [rsp+0], xmm6
|
||||
movdqa [rsp+16], xmm7
|
||||
movdqa [rsp+32], xmm8
|
||||
movdqa [rsp+48], xmm9
|
||||
|
||||
movdqa xmm3, xmm0
|
||||
pclmulqdq xmm3, xmm1, 0 ; xmm3 holds a0*b0
|
||||
movdqa xmm4, xmm0
|
||||
pclmulqdq xmm4, xmm1, 16 ; xmm4 holds a0*b1
|
||||
movdqa xmm5, xmm0
|
||||
pclmulqdq xmm5, xmm1, 1 ; xmm5 holds a1*b0
|
||||
movdqa xmm6, xmm0
|
||||
pclmulqdq xmm6, xmm1, 17 ; xmm6 holds a1*b1
|
||||
pxor xmm4, xmm5 ; xmm4 holds a0*b1 + a1*b0
|
||||
movdqa xmm5, xmm4
|
||||
psrldq xmm4, 8
|
||||
pslldq xmm5, 8
|
||||
pxor xmm3, xmm5
|
||||
pxor xmm6, xmm4 ; <xmm6:xmm3> holds the result of
|
||||
; the carry-less multiplication of
|
||||
; xmm0 by xmm1
|
||||
|
||||
; shift the result by one bit position to the left cope for the fact
|
||||
; that bits are reversed
|
||||
movdqa xmm7, xmm3
|
||||
movdqa xmm8, xmm6
|
||||
pslld xmm3, 1
|
||||
pslld xmm6, 1
|
||||
psrld xmm7, 31
|
||||
psrld xmm8, 31
|
||||
movdqa xmm9, xmm7
|
||||
pslldq xmm8, 4
|
||||
pslldq xmm7, 4
|
||||
psrldq xmm9, 12
|
||||
por xmm3, xmm7
|
||||
por xmm6, xmm8
|
||||
por xmm6, xmm9
|
||||
|
||||
; first phase of the reduction
|
||||
movdqa xmm7, xmm3
|
||||
movdqa xmm8, xmm3
|
||||
movdqa xmm9, xmm3
|
||||
pslld xmm7, 31 ; packed right shifting << 31
|
||||
pslld xmm8, 30 ; packed right shifting shift << 30
|
||||
pslld xmm9, 25 ; packed right shifting shift << 25
|
||||
pxor xmm7, xmm8 ; xor the shifted versions
|
||||
pxor xmm7, xmm9
|
||||
|
||||
movdqa xmm8, xmm7
|
||||
pslldq xmm7, 12
|
||||
psrldq xmm8, 4
|
||||
pxor xmm3, xmm7 ; first phase of the reduction complete
|
||||
movdqa xmm2, xmm3 ; second phase of the reduction
|
||||
movdqa xmm4, xmm3
|
||||
movdqa xmm5, xmm3
|
||||
psrld xmm2, 1 ; packed left shifting >> 1
|
||||
psrld xmm4, 2 ; packed left shifting >> 2
|
||||
psrld xmm5, 7 ; packed left shifting >> 7
|
||||
|
||||
pxor xmm2, xmm4 ; xor the shifted versions
|
||||
pxor xmm2, xmm5
|
||||
pxor xmm2, xmm8
|
||||
pxor xmm3, xmm2
|
||||
pxor xmm6, xmm3 ; the result is in xmm6
|
||||
movdqu [r8],xmm6 ; store the result
|
||||
|
||||
; restore non volatile xmms from stack
|
||||
movdqa xmm6, [rsp+0]
|
||||
movdqa xmm7, [rsp+16]
|
||||
movdqa xmm8, [rsp+32]
|
||||
movdqa xmm9, [rsp+48]
|
||||
add rsp,8+4*16 ; 8 = align stack , 4 xmm6-9 16 bytes each
|
||||
|
||||
ret
|
||||
gfmul ENDP
|
||||
|
||||
END
|
||||
|
|
|
@ -1288,91 +1288,6 @@ pxor %xmm4, %xmm3
|
|||
pxor %xmm2, %xmm3
|
||||
ret
|
||||
|
||||
|
||||
#ifdef HAVE_AESGCM
|
||||
|
||||
/* See Intel® Carry-Less Multiplication Instruction
|
||||
* and its Usage for Computing the GCM Mode White Paper
|
||||
* by Shay Gueron, Intel Mobility Group, Israel Development Center;
|
||||
* and Michael E. Kounavis, Intel Labs, Circuits and Systems Research
|
||||
*
|
||||
* This is for use with the C code.
|
||||
*/
|
||||
|
||||
/* Figure 6. Code Sample - Performing Ghash Using Algorithms 1 and 5 */
|
||||
|
||||
/*
|
||||
* void gfmul(__m128i a, __m128i b, __m128i* out);
|
||||
*/
|
||||
.globl gfmul
|
||||
gfmul:
|
||||
#xmm0 holds operand a (128 bits)
|
||||
#xmm1 holds operand b (128 bits)
|
||||
#rdi holds the pointer to output (128 bits)
|
||||
movdqa %xmm0, %xmm3
|
||||
pclmulqdq $0, %xmm1, %xmm3 # xmm3 holds a0*b0
|
||||
movdqa %xmm0, %xmm4
|
||||
pclmulqdq $16, %xmm1, %xmm4 # xmm4 holds a0*b1
|
||||
movdqa %xmm0, %xmm5
|
||||
pclmulqdq $1, %xmm1, %xmm5 # xmm5 holds a1*b0
|
||||
movdqa %xmm0, %xmm6
|
||||
pclmulqdq $17, %xmm1, %xmm6 # xmm6 holds a1*b1
|
||||
pxor %xmm5, %xmm4 # xmm4 holds a0*b1 + a1*b0
|
||||
movdqa %xmm4, %xmm5
|
||||
psrldq $8, %xmm4
|
||||
pslldq $8, %xmm5
|
||||
pxor %xmm5, %xmm3
|
||||
pxor %xmm4, %xmm6 # <xmm6:xmm3> holds the result of
|
||||
# the carry-less multiplication of
|
||||
# xmm0 by xmm1
|
||||
|
||||
# shift the result by one bit position to the left cope for the fact
|
||||
# that bits are reversed
|
||||
movdqa %xmm3, %xmm7
|
||||
movdqa %xmm6, %xmm8
|
||||
pslld $1, %xmm3
|
||||
pslld $1, %xmm6
|
||||
psrld $31, %xmm7
|
||||
psrld $31, %xmm8
|
||||
movdqa %xmm7, %xmm9
|
||||
pslldq $4, %xmm8
|
||||
pslldq $4, %xmm7
|
||||
psrldq $12, %xmm9
|
||||
por %xmm7, %xmm3
|
||||
por %xmm8, %xmm6
|
||||
por %xmm9, %xmm6
|
||||
|
||||
# first phase of the reduction
|
||||
movdqa %xmm3, %xmm7
|
||||
movdqa %xmm3, %xmm8
|
||||
movdqa %xmm3, %xmm9
|
||||
pslld $31, %xmm7 # packed right shifting << 31
|
||||
pslld $30, %xmm8 # packed right shifting shift << 30
|
||||
pslld $25, %xmm9 # packed right shifting shift << 25
|
||||
pxor %xmm8, %xmm7 # xor the shifted versions
|
||||
pxor %xmm9, %xmm7
|
||||
|
||||
movdqa %xmm7, %xmm8
|
||||
pslldq $12, %xmm7
|
||||
psrldq $4, %xmm8
|
||||
pxor %xmm7, %xmm3 # first phase of the reduction complete
|
||||
movdqa %xmm3,%xmm2 # second phase of the reduction
|
||||
movdqa %xmm3,%xmm4
|
||||
movdqa %xmm3,%xmm5
|
||||
psrld $1, %xmm2 # packed left shifting >> 1
|
||||
psrld $2, %xmm4 # packed left shifting >> 2
|
||||
psrld $7, %xmm5 # packed left shifting >> 7
|
||||
|
||||
pxor %xmm4, %xmm2 # xor the shifted versions
|
||||
pxor %xmm5, %xmm2
|
||||
pxor %xmm8, %xmm2
|
||||
pxor %xmm2, %xmm3
|
||||
pxor %xmm3, %xmm6 # the result is in xmm6
|
||||
movdqu %xmm6, (%rdi) # store the result
|
||||
ret
|
||||
|
||||
#endif /* HAVE_AESGCM */
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
/* cpuid.c
|
||||
*
|
||||
* Copyright (C) 2006-2016 wolfSSL Inc.
|
||||
*
|
||||
* This file is part of wolfSSL.
|
||||
*
|
||||
* wolfSSL is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* wolfSSL is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
|
||||
#include <wolfssl/wolfcrypt/cpuid.h>
|
||||
|
||||
#ifdef WOLFSSL_X86_64_BUILD
|
||||
/* Each platform needs to query info type 1 from cpuid to see if aesni is
|
||||
* supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
|
||||
*/
|
||||
|
||||
#ifndef _MSC_VER
|
||||
#define cpuid(reg, leaf, sub)\
|
||||
__asm__ __volatile__ ("cpuid":\
|
||||
"=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
|
||||
"a" (leaf), "c"(sub));
|
||||
|
||||
#define XASM_LINK(f) asm(f)
|
||||
#else
|
||||
|
||||
#include <intrin.h>
|
||||
#define cpuid(a,b) __cpuid((int*)a,b)
|
||||
|
||||
#define XASM_LINK(f)
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
#define EAX 0
|
||||
#define EBX 1
|
||||
#define ECX 2
|
||||
#define EDX 3
|
||||
|
||||
static word32 cpuid_check = 0;
|
||||
static word32 cpuid_flags = 0;
|
||||
|
||||
static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit)
|
||||
{
|
||||
static int got_intel_cpu = 0;
|
||||
static unsigned int reg[5];
|
||||
|
||||
reg[4] = '\0';
|
||||
cpuid(reg, 0, 0);
|
||||
if (XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&
|
||||
XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&
|
||||
XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) {
|
||||
got_intel_cpu = 1;
|
||||
}
|
||||
if (got_intel_cpu) {
|
||||
cpuid(reg, leaf, sub);
|
||||
return ((reg[num] >> bit) & 0x1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void cpuid_set_flags(void)
|
||||
{
|
||||
if (!cpuid_check) {
|
||||
cpuid_check = 1;
|
||||
if (cpuid_flag(1, 0, ECX, 28)) { cpuid_flags |= CPUID_AVX1 ; }
|
||||
if (cpuid_flag(7, 0, EBX, 5)) { cpuid_flags |= CPUID_AVX2 ; }
|
||||
if (cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; }
|
||||
if (cpuid_flag(1, 0, ECX, 30)) { cpuid_flags |= CPUID_RDRAND; }
|
||||
if (cpuid_flag(7, 0, EBX, 18)) { cpuid_flags |= CPUID_RDSEED; }
|
||||
if (cpuid_flag(1, 0, ECX, 26)) { cpuid_flags |= CPUID_AESNI ; }
|
||||
}
|
||||
}
|
||||
|
||||
word32 cpuid_get_flags(void)
|
||||
{
|
||||
if (!cpuid_check)
|
||||
cpuid_set_flags();
|
||||
return cpuid_flags;
|
||||
}
|
||||
#endif
|
||||
|
|
@ -32,6 +32,7 @@
|
|||
*/
|
||||
|
||||
#include <wolfssl/wolfcrypt/random.h>
|
||||
#include <wolfssl/wolfcrypt/cpuid.h>
|
||||
|
||||
|
||||
#ifdef HAVE_FIPS
|
||||
|
@ -141,12 +142,6 @@ int wc_RNG_GenerateByte(WC_RNG* rng, byte* b)
|
|||
#ifdef HAVE_INTEL_RDRAND
|
||||
static int wc_GenerateRand_IntelRD(OS_Seed* os, byte* output, word32 sz);
|
||||
#endif
|
||||
static word32 cpuid_check = 0;
|
||||
static word32 cpuid_flags = 0;
|
||||
#define CPUID_RDRAND 0x4
|
||||
#define CPUID_RDSEED 0x8
|
||||
#define IS_INTEL_RDRAND (cpuid_flags & CPUID_RDRAND)
|
||||
#define IS_INTEL_RDSEED (cpuid_flags & CPUID_RDSEED)
|
||||
#endif
|
||||
|
||||
/* Start NIST DRBG code */
|
||||
|
@ -540,7 +535,7 @@ int wc_InitRng_ex(WC_RNG* rng, void* heap, int devId)
|
|||
|
||||
#ifdef HAVE_INTEL_RDRAND
|
||||
/* if CPU supports RDRAND, use it directly and by-pass DRBG init */
|
||||
if (IS_INTEL_RDRAND)
|
||||
if (IS_INTEL_RDRAND(cpuid_get_flags()))
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
|
@ -610,7 +605,7 @@ int wc_RNG_GenerateBlock(WC_RNG* rng, byte* output, word32 sz)
|
|||
return BAD_FUNC_ARG;
|
||||
|
||||
#ifdef HAVE_INTEL_RDRAND
|
||||
if (IS_INTEL_RDRAND)
|
||||
if (IS_INTEL_RDRAND(cpuid_get_flags()))
|
||||
return wc_GenerateRand_IntelRD(NULL, output, sz);
|
||||
#endif
|
||||
|
||||
|
@ -982,52 +977,8 @@ int wc_FreeNetRandom(void)
|
|||
|
||||
#if defined(HAVE_INTEL_RDRAND) || defined(HAVE_INTEL_RDSEED)
|
||||
|
||||
#ifndef _MSC_VER
|
||||
#define cpuid(reg, leaf, sub)\
|
||||
__asm__ __volatile__ ("cpuid":\
|
||||
"=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
|
||||
"a" (leaf), "c"(sub));
|
||||
|
||||
#define XASM_LINK(f) asm(f)
|
||||
#else
|
||||
|
||||
#include <intrin.h>
|
||||
#define cpuid(a,b) __cpuid((int*)a,b)
|
||||
|
||||
#define XASM_LINK(f)
|
||||
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
#define EAX 0
|
||||
#define EBX 1
|
||||
#define ECX 2
|
||||
#define EDX 3
|
||||
|
||||
static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
|
||||
int got_intel_cpu = 0;
|
||||
unsigned int reg[5];
|
||||
|
||||
reg[4] = '\0';
|
||||
cpuid(reg, 0, 0);
|
||||
if (XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&
|
||||
XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&
|
||||
XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0)
|
||||
{
|
||||
got_intel_cpu = 1;
|
||||
}
|
||||
if (got_intel_cpu) {
|
||||
cpuid(reg, leaf, sub);
|
||||
return ((reg[num] >> bit) & 0x1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void wc_InitRng_IntelRD(void) {
|
||||
if (cpuid_check==0) {
|
||||
if (cpuid_flag(1, 0, ECX, 30)) { cpuid_flags |= CPUID_RDRAND; }
|
||||
if (cpuid_flag(7, 0, EBX, 18)) { cpuid_flags |= CPUID_RDSEED; }
|
||||
cpuid_check = 1;
|
||||
}
|
||||
cpuid_set_flags();
|
||||
}
|
||||
|
||||
#ifdef WOLFSSL_ASYNC_CRYPT
|
||||
|
@ -1067,7 +1018,7 @@ static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz)
|
|||
|
||||
(void)os;
|
||||
|
||||
if (!IS_INTEL_RDSEED)
|
||||
if (!IS_INTEL_RDSEED(cpuid_get_flags()))
|
||||
return -1;
|
||||
|
||||
for (; (sz / sizeof(word64)) > 0; sz -= sizeof(word64),
|
||||
|
@ -1122,7 +1073,7 @@ static int wc_GenerateRand_IntelRD(OS_Seed* os, byte* output, word32 sz)
|
|||
|
||||
(void)os;
|
||||
|
||||
if (!IS_INTEL_RDRAND)
|
||||
if (!IS_INTEL_RDRAND(cpuid_get_flags()))
|
||||
return -1;
|
||||
|
||||
for (; (sz / sizeof(word64)) > 0; sz -= sizeof(word64),
|
||||
|
@ -1702,7 +1653,7 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
|
|||
int ret = 0;
|
||||
|
||||
#ifdef HAVE_INTEL_RDSEED
|
||||
if (IS_INTEL_RDSEED) {
|
||||
if (IS_INTEL_RDSEED(cpuid_get_flags())) {
|
||||
ret = wc_GenerateSeed_IntelRD(NULL, output, sz);
|
||||
if (ret == 0) {
|
||||
/* success, we're done */
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
|
||||
#include <wolfssl/wolfcrypt/sha256.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
#include <wolfssl/wolfcrypt/cpuid.h>
|
||||
|
||||
/* fips wrapper calls, user can call direct */
|
||||
#ifdef HAVE_FIPS
|
||||
|
@ -177,77 +178,14 @@ static int InitSha256(Sha256* sha256)
|
|||
More granural Stitched Message Sched/Round
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
*/
|
||||
|
||||
/* Each platform needs to query info type 1 from cpuid to see if aesni is
|
||||
* supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
|
||||
*/
|
||||
|
||||
#ifndef _MSC_VER
|
||||
#define cpuid(reg, leaf, sub)\
|
||||
__asm__ __volatile__ ("cpuid":\
|
||||
"=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
|
||||
"a" (leaf), "c"(sub));
|
||||
|
||||
#define XASM_LINK(f) asm(f)
|
||||
#else
|
||||
#include <intrin.h>
|
||||
#define cpuid(a,b) __cpuid((int*)a,b)
|
||||
|
||||
#define XASM_LINK(f)
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
#define EAX 0
|
||||
#define EBX 1
|
||||
#define ECX 2
|
||||
#define EDX 3
|
||||
|
||||
#define CPUID_AVX1 0x1
|
||||
#define CPUID_AVX2 0x2
|
||||
#define CPUID_RDRAND 0x4
|
||||
#define CPUID_RDSEED 0x8
|
||||
#define CPUID_BMI2 0x10 /* MULX, RORX */
|
||||
|
||||
#define IS_INTEL_AVX1 (cpuid_flags & CPUID_AVX1)
|
||||
#define IS_INTEL_AVX2 (cpuid_flags & CPUID_AVX2)
|
||||
#define IS_INTEL_BMI2 (cpuid_flags & CPUID_BMI2)
|
||||
#define IS_INTEL_RDRAND (cpuid_flags & CPUID_RDRAND)
|
||||
#define IS_INTEL_RDSEED (cpuid_flags & CPUID_RDSEED)
|
||||
|
||||
static word32 cpuid_check = 0;
|
||||
static word32 cpuid_flags = 0;
|
||||
|
||||
static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
|
||||
int got_intel_cpu=0;
|
||||
unsigned int reg[5];
|
||||
|
||||
reg[4] = '\0';
|
||||
cpuid(reg, 0, 0);
|
||||
if (XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&
|
||||
XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&
|
||||
XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) {
|
||||
got_intel_cpu = 1;
|
||||
}
|
||||
if (got_intel_cpu) {
|
||||
cpuid(reg, leaf, sub);
|
||||
return ((reg[num] >> bit) & 0x1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_cpuid_flags(void) {
|
||||
if (cpuid_check==0) {
|
||||
if (cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1; }
|
||||
if (cpuid_flag(7, 0, EBX, 5)) { cpuid_flags |= CPUID_AVX2; }
|
||||
if (cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2; }
|
||||
if (cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND; }
|
||||
if (cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED; }
|
||||
cpuid_check = 1;
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha256 */
|
||||
static int Transform(Sha256* sha256);
|
||||
#if defined(HAVE_INTEL_AVX1)
|
||||
|
@ -258,22 +196,31 @@ static int InitSha256(Sha256* sha256)
|
|||
static int Transform_AVX1_RORX(Sha256 *sha256);
|
||||
#endif
|
||||
static int (*Transform_p)(Sha256* sha256) /* = _Transform */;
|
||||
static int transform_check = 0;
|
||||
#define XTRANSFORM(sha256, B) (*Transform_p)(sha256)
|
||||
|
||||
static void set_Transform(void) {
|
||||
if (set_cpuid_flags()) return;
|
||||
static void set_Transform(void)
|
||||
{
|
||||
word32 intel_flags;
|
||||
|
||||
cpuid_set_flags();
|
||||
if (transform_check)
|
||||
return;
|
||||
transform_check = 1;
|
||||
intel_flags = cpuid_get_flags();
|
||||
|
||||
#if defined(HAVE_INTEL_AVX2)
|
||||
if (IS_INTEL_AVX2 && IS_INTEL_BMI2) {
|
||||
Transform_p = Transform_AVX1_RORX; return;
|
||||
Transform_p = Transform_AVX2;
|
||||
/* for avoiding warning,"not used" */
|
||||
}
|
||||
if (IS_INTEL_AVX2(intel_flags) && IS_INTEL_BMI2(intel_flags)) {
|
||||
Transform_p = Transform_AVX1_RORX; return;
|
||||
Transform_p = Transform_AVX2;
|
||||
/* for avoiding warning,"not used" */
|
||||
}
|
||||
#endif
|
||||
#if defined(HAVE_INTEL_AVX1)
|
||||
Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : Transform); return;
|
||||
Transform_p = ((IS_INTEL_AVX1(intel_flags)) ? Transform_AVX1 :
|
||||
Transform); return;
|
||||
#endif
|
||||
Transform_p = Transform; return;
|
||||
Transform_p = Transform; return;
|
||||
}
|
||||
|
||||
/* Dummy for saving MM_REGs on behalf of Transform */
|
||||
|
@ -519,6 +466,11 @@ static int InitSha256(Sha256* sha256)
|
|||
{
|
||||
int ret = 0;
|
||||
byte* local;
|
||||
#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
word32 intel_flags = cpuid_get_flags();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (sha256 == NULL || (data == NULL && len > 0)) {
|
||||
return BAD_FUNC_ARG;
|
||||
|
@ -552,7 +504,7 @@ static int InitSha256(Sha256* sha256)
|
|||
if (sha256->buffLen == SHA256_BLOCK_SIZE) {
|
||||
#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
|
||||
if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
|
||||
#endif
|
||||
{
|
||||
ByteReverseWords(sha256->buffer, sha256->buffer,
|
||||
|
@ -582,6 +534,11 @@ static int InitSha256(Sha256* sha256)
|
|||
|
||||
int ret;
|
||||
byte* local = (byte*)sha256->buffer;
|
||||
#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
word32 intel_flags = cpuid_get_flags();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (sha256 == NULL) {
|
||||
return BAD_FUNC_ARG;
|
||||
|
@ -598,15 +555,15 @@ static int InitSha256(Sha256* sha256)
|
|||
SHA256_BLOCK_SIZE - sha256->buffLen);
|
||||
sha256->buffLen += SHA256_BLOCK_SIZE - sha256->buffLen;
|
||||
|
||||
#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
|
||||
#endif
|
||||
{
|
||||
ByteReverseWords(sha256->buffer, sha256->buffer,
|
||||
SHA256_BLOCK_SIZE);
|
||||
#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
|
||||
#endif
|
||||
ByteReverseWords(sha256->buffer, sha256->buffer,
|
||||
SHA256_BLOCK_SIZE);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
ret = XTRANSFORM(sha256, local);
|
||||
if (ret != 0)
|
||||
|
@ -624,7 +581,7 @@ static int InitSha256(Sha256* sha256)
|
|||
/* store lengths */
|
||||
#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
|
||||
if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
|
||||
#endif
|
||||
{
|
||||
ByteReverseWords(sha256->buffer, sha256->buffer,
|
||||
|
@ -640,7 +597,7 @@ static int InitSha256(Sha256* sha256)
|
|||
defined(HAVE_INTEL_AVX2)
|
||||
/* Kinetis requires only these bytes reversed */
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
if (IS_INTEL_AVX1 || IS_INTEL_AVX2)
|
||||
if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags))
|
||||
#endif
|
||||
{
|
||||
ByteReverseWords(
|
||||
|
|
|
@ -27,10 +27,9 @@
|
|||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
|
||||
#ifdef WOLFSSL_SHA512
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
#include <wolfssl/wolfcrypt/sha512.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
#include <wolfssl/wolfcrypt/cpuid.h>
|
||||
|
||||
/* fips wrapper calls, user can call direct */
|
||||
#ifdef HAVE_FIPS
|
||||
|
@ -261,74 +260,6 @@ static int InitSha512(Sha512* sha512)
|
|||
* supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
|
||||
*/
|
||||
|
||||
#ifndef _MSC_VER
|
||||
#define cpuid(reg, leaf, sub)\
|
||||
__asm__ __volatile__ ("cpuid":\
|
||||
"=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
|
||||
"a" (leaf), "c"(sub));
|
||||
|
||||
#define XASM_LINK(f) asm(f)
|
||||
#else
|
||||
|
||||
#include <intrin.h>
|
||||
#define cpuid(a,b) __cpuid((int*)a,b)
|
||||
|
||||
#define XASM_LINK(f)
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
#define EAX 0
|
||||
#define EBX 1
|
||||
#define ECX 2
|
||||
#define EDX 3
|
||||
|
||||
#define CPUID_AVX1 0x1
|
||||
#define CPUID_AVX2 0x2
|
||||
#define CPUID_RDRAND 0x4
|
||||
#define CPUID_RDSEED 0x8
|
||||
#define CPUID_BMI2 0x10 /* MULX, RORX */
|
||||
|
||||
#define IS_INTEL_AVX1 (cpuid_flags & CPUID_AVX1)
|
||||
#define IS_INTEL_AVX2 (cpuid_flags & CPUID_AVX2)
|
||||
#define IS_INTEL_BMI2 (cpuid_flags & CPUID_BMI2)
|
||||
#define IS_INTEL_RDRAND (cpuid_flags & CPUID_RDRAND)
|
||||
#define IS_INTEL_RDSEED (cpuid_flags & CPUID_RDSEED)
|
||||
|
||||
static word32 cpuid_check = 0;
|
||||
static word32 cpuid_flags = 0;
|
||||
|
||||
static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
|
||||
int got_intel_cpu = 0;
|
||||
unsigned int reg[5];
|
||||
|
||||
reg[4] = '\0';
|
||||
cpuid(reg, 0, 0);
|
||||
if (XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&
|
||||
XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&
|
||||
XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) {
|
||||
got_intel_cpu = 1;
|
||||
}
|
||||
if (got_intel_cpu) {
|
||||
cpuid(reg, leaf, sub);
|
||||
return ((reg[num] >> bit) & 0x1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int set_cpuid_flags() {
|
||||
if(cpuid_check ==0) {
|
||||
if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;}
|
||||
if(cpuid_flag(7, 0, EBX, 5)){ cpuid_flags |= CPUID_AVX2 ; }
|
||||
if(cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; }
|
||||
if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ; }
|
||||
if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ; }
|
||||
cpuid_check = 1 ;
|
||||
return 0 ;
|
||||
}
|
||||
return 1 ;
|
||||
}
|
||||
|
||||
|
||||
#if defined(HAVE_INTEL_AVX1)
|
||||
static int Transform_AVX1(Sha512 *sha512);
|
||||
#endif
|
||||
|
@ -340,6 +271,7 @@ static int InitSha512(Sha512* sha512)
|
|||
#endif
|
||||
static int _Transform(Sha512 *sha512);
|
||||
static int (*Transform_p)(Sha512* sha512) = _Transform;
|
||||
static int transform_check = 0;
|
||||
#define Transform(sha512) (*Transform_p)(sha512)
|
||||
|
||||
/* Dummy for saving MM_REGs on behalf of Transform */
|
||||
|
@ -353,6 +285,28 @@ static int InitSha512(Sha512* sha512)
|
|||
"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15")
|
||||
#endif
|
||||
|
||||
static void Sha512_SetTransform()
|
||||
{
|
||||
word32 intel_flags;
|
||||
|
||||
if (transform_check)
|
||||
return;
|
||||
transform_check = 1;
|
||||
intel_flags = cpuid_get_flags();
|
||||
|
||||
#if defined(HAVE_INTEL_AVX2)
|
||||
if (IS_INTEL_AVX2(intel_flags) && IS_INTEL_BMI2(intel_flags)) {
|
||||
Transform_p = Transform_AVX1_RORX; return;
|
||||
Transform_p = Transform_AVX2;
|
||||
/* for avoiding warning,"not used" */
|
||||
}
|
||||
#endif
|
||||
#if defined(HAVE_INTEL_AVX1)
|
||||
Transform_p = ((IS_INTEL_AVX1(intel_flags)) ? Transform_AVX1 :
|
||||
_Transform); return;
|
||||
#endif
|
||||
Transform_p = _Transform;
|
||||
}
|
||||
|
||||
int wc_InitSha512_ex(Sha512* sha512, void* heap, int devId)
|
||||
{
|
||||
|
@ -361,20 +315,7 @@ static int InitSha512(Sha512* sha512)
|
|||
(void)heap;
|
||||
(void)devId;
|
||||
|
||||
if (set_cpuid_flags())
|
||||
return ret;
|
||||
|
||||
#if defined(HAVE_INTEL_AVX2)
|
||||
if (IS_INTEL_AVX2 && IS_INTEL_BMI2) {
|
||||
Transform_p = Transform_AVX1_RORX; return ret;
|
||||
Transform_p = Transform_AVX2;
|
||||
/* for avoiding warning,"not used" */
|
||||
}
|
||||
#endif
|
||||
#if defined(HAVE_INTEL_AVX1)
|
||||
Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform); return ret;
|
||||
#endif
|
||||
Transform_p = _Transform;
|
||||
Sha512_SetTransform();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -554,6 +495,11 @@ static INLINE int Sha512Update(Sha512* sha512, const byte* data, word32 len)
|
|||
int ret = 0;
|
||||
/* do block size increments */
|
||||
byte* local = (byte*)sha512->buffer;
|
||||
#if defined(LITTLE_ENDIAN_ORDER)
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
word32 intel_flags = cpuid_get_flags();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (sha512 == NULL || (data == NULL && len > 0)) {
|
||||
return BAD_FUNC_ARG;
|
||||
|
@ -570,16 +516,18 @@ static INLINE int Sha512Update(Sha512* sha512, const byte* data, word32 len)
|
|||
XMEMCPY(&local[sha512->buffLen], data, add);
|
||||
|
||||
sha512->buffLen += add;
|
||||
data += add;
|
||||
len -= add;
|
||||
data += add;
|
||||
len -= add;
|
||||
|
||||
if (sha512->buffLen == SHA512_BLOCK_SIZE) {
|
||||
#if defined(LITTLE_ENDIAN_ORDER)
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
|
||||
if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
|
||||
#endif
|
||||
{
|
||||
ByteReverseWords64(sha512->buffer, sha512->buffer,
|
||||
SHA512_BLOCK_SIZE);
|
||||
SHA512_BLOCK_SIZE);
|
||||
}
|
||||
#endif
|
||||
ret = Transform(sha512);
|
||||
if (ret != 0)
|
||||
|
@ -615,6 +563,11 @@ static INLINE int Sha512Final(Sha512* sha512)
|
|||
{
|
||||
byte* local = (byte*)sha512->buffer;
|
||||
int ret;
|
||||
#if defined(LITTLE_ENDIAN_ORDER)
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
word32 intel_flags = cpuid_get_flags();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (sha512 == NULL) {
|
||||
return BAD_FUNC_ARG;
|
||||
|
@ -629,13 +582,15 @@ static INLINE int Sha512Final(Sha512* sha512)
|
|||
if (sha512->buffLen > SHA512_PAD_SIZE) {
|
||||
XMEMSET(&local[sha512->buffLen], 0, SHA512_BLOCK_SIZE - sha512->buffLen);
|
||||
sha512->buffLen += SHA512_BLOCK_SIZE - sha512->buffLen;
|
||||
#if defined(LITTLE_ENDIAN_ORDER)
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
|
||||
#endif
|
||||
ByteReverseWords64(sha512->buffer,sha512->buffer,SHA512_BLOCK_SIZE);
|
||||
|
||||
#endif /* LITTLE_ENDIAN_ORDER */
|
||||
#if defined(LITTLE_ENDIAN_ORDER)
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
|
||||
#endif
|
||||
{
|
||||
ByteReverseWords64(sha512->buffer,sha512->buffer,
|
||||
SHA512_BLOCK_SIZE);
|
||||
}
|
||||
#endif /* LITTLE_ENDIAN_ORDER */
|
||||
ret = Transform(sha512);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
@ -651,17 +606,19 @@ static INLINE int Sha512Final(Sha512* sha512)
|
|||
|
||||
/* store lengths */
|
||||
#if defined(LITTLE_ENDIAN_ORDER)
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
|
||||
#endif
|
||||
ByteReverseWords64(sha512->buffer, sha512->buffer, SHA512_PAD_SIZE);
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
|
||||
#endif
|
||||
{
|
||||
ByteReverseWords64(sha512->buffer, sha512->buffer, SHA512_PAD_SIZE);
|
||||
}
|
||||
#endif
|
||||
/* ! length ordering dependent on digest endian type ! */
|
||||
|
||||
sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2] = sha512->hiLen;
|
||||
sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 1] = sha512->loLen;
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
if (IS_INTEL_AVX1 || IS_INTEL_AVX2)
|
||||
if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags))
|
||||
ByteReverseWords64(&(sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
|
||||
&(sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
|
||||
SHA512_BLOCK_SIZE - SHA512_PAD_SIZE);
|
||||
|
@ -1470,6 +1427,21 @@ int wc_Sha384Final(Sha384* sha384, byte* hash)
|
|||
}
|
||||
|
||||
|
||||
/* Hardware Acceleration */
|
||||
#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
|
||||
|
||||
int wc_InitSha384_ex(Sha384* sha384, void* heap, int devId)
|
||||
{
|
||||
int ret = InitSha384(sha384);
|
||||
|
||||
(void)heap;
|
||||
(void)devId;
|
||||
|
||||
Sha512_SetTransform();
|
||||
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
int wc_InitSha384_ex(Sha384* sha384, void* heap, int devId)
|
||||
{
|
||||
int ret;
|
||||
|
@ -1492,6 +1464,7 @@ int wc_InitSha384_ex(Sha384* sha384, void* heap, int devId)
|
|||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
int wc_InitSha384(Sha384* sha384)
|
||||
{
|
||||
|
|
|
@ -195,6 +195,12 @@ static int devId = INVALID_DEVID;
|
|||
const char* wnrConfigFile = "wnr-example.conf";
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_AESGCM
|
||||
#define LARGE_BUFFER_SIZE 1024
|
||||
static byte large_input[LARGE_BUFFER_SIZE];
|
||||
static byte large_output[LARGE_BUFFER_SIZE];
|
||||
static byte large_outdec[LARGE_BUFFER_SIZE];
|
||||
#endif
|
||||
|
||||
typedef struct testVector {
|
||||
const char* input;
|
||||
|
@ -375,6 +381,9 @@ int wolfcrypt_test(void* args)
|
|||
#endif
|
||||
{
|
||||
int ret;
|
||||
#ifdef HAVE_AESGCM
|
||||
int i;
|
||||
#endif
|
||||
|
||||
((func_args*)args)->return_code = -1; /* error state */
|
||||
|
||||
|
@ -665,6 +674,8 @@ int wolfcrypt_test(void* args)
|
|||
printf( "AES256 test passed!\n");
|
||||
|
||||
#ifdef HAVE_AESGCM
|
||||
for (i=0; i<LARGE_BUFFER_SIZE; i++)
|
||||
large_input[i] = i;
|
||||
if ( (ret = aesgcm_test()) != 0)
|
||||
return err_sys("AES-GCM test failed!\n", ret);
|
||||
else
|
||||
|
@ -4594,6 +4605,10 @@ int aesgcm_test(void)
|
|||
byte resultP[sizeof(p)];
|
||||
byte resultC[sizeof(p)];
|
||||
int result;
|
||||
#if !defined(HAVE_FIPS) && !defined(STM32F2_CRYPTO) && !defined(STM32F4_CRYPTO)
|
||||
int ivlen;
|
||||
#endif
|
||||
int alen, plen;
|
||||
|
||||
XMEMSET(resultT, 0, sizeof(resultT));
|
||||
XMEMSET(resultC, 0, sizeof(resultC));
|
||||
|
@ -4630,6 +4645,87 @@ int aesgcm_test(void)
|
|||
if (XMEMCMP(p, resultP, sizeof(resultP)))
|
||||
return -4306;
|
||||
|
||||
/* Large buffer test */
|
||||
/* AES-GCM encrypt and decrypt both use AES encrypt internally */
|
||||
result = wc_AesGcmEncrypt(&enc, large_output, large_input,
|
||||
LARGE_BUFFER_SIZE, iv1, sizeof(iv1),
|
||||
resultT, sizeof(resultT), a, sizeof(a));
|
||||
#if defined(WOLFSSL_ASYNC_CRYPT)
|
||||
result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
|
||||
#endif
|
||||
if (result != 0)
|
||||
return -4307;
|
||||
|
||||
result = wc_AesGcmDecrypt(&enc, large_outdec, large_output,
|
||||
LARGE_BUFFER_SIZE, iv1, sizeof(iv1), resultT,
|
||||
sizeof(resultT), a, sizeof(a));
|
||||
#if defined(WOLFSSL_ASYNC_CRYPT)
|
||||
result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
|
||||
#endif
|
||||
if (result != 0)
|
||||
return -4308;
|
||||
if (XMEMCMP(large_input, large_outdec, LARGE_BUFFER_SIZE))
|
||||
return -4309;
|
||||
|
||||
#if !defined(HAVE_FIPS) && !defined(STM32F2_CRYPTO) && !defined(STM32F4_CRYPTO)
|
||||
/* Variable IV length test */
|
||||
for (ivlen=0; ivlen<(int)sizeof(k1); ivlen++) {
|
||||
/* AES-GCM encrypt and decrypt both use AES encrypt internally */
|
||||
result = wc_AesGcmEncrypt(&enc, resultC, p, sizeof(p), k1, ivlen,
|
||||
resultT, sizeof(resultT), a, sizeof(a));
|
||||
#if defined(WOLFSSL_ASYNC_CRYPT)
|
||||
result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
|
||||
#endif
|
||||
if (result != 0)
|
||||
return -4310;
|
||||
result = wc_AesGcmDecrypt(&enc, resultP, resultC, sizeof(resultC), k1,
|
||||
ivlen, resultT, sizeof(resultT), a, sizeof(a));
|
||||
#if defined(WOLFSSL_ASYNC_CRYPT)
|
||||
result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
|
||||
#endif
|
||||
if (result != 0)
|
||||
return -4311;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Variable authenticed data length test */
|
||||
for (alen=0; alen<(int)sizeof(p); alen++) {
|
||||
/* AES-GCM encrypt and decrypt both use AES encrypt internally */
|
||||
result = wc_AesGcmEncrypt(&enc, resultC, p, sizeof(p), iv1,
|
||||
sizeof(iv1), resultT, sizeof(resultT), p, alen);
|
||||
#if defined(WOLFSSL_ASYNC_CRYPT)
|
||||
result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
|
||||
#endif
|
||||
if (result != 0)
|
||||
return -4312;
|
||||
result = wc_AesGcmDecrypt(&enc, resultP, resultC, sizeof(resultC), iv1,
|
||||
sizeof(iv1), resultT, sizeof(resultT), p, alen);
|
||||
#if defined(WOLFSSL_ASYNC_CRYPT)
|
||||
result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
|
||||
#endif
|
||||
if (result != 0)
|
||||
return -4313;
|
||||
}
|
||||
|
||||
/* Variable plain text length test */
|
||||
for (plen=1; plen<(int)sizeof(p); plen++) {
|
||||
/* AES-GCM encrypt and decrypt both use AES encrypt internally */
|
||||
result = wc_AesGcmEncrypt(&enc, resultC, p, plen, iv1, sizeof(iv1),
|
||||
resultT, sizeof(resultT), a, sizeof(a));
|
||||
#if defined(WOLFSSL_ASYNC_CRYPT)
|
||||
result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
|
||||
#endif
|
||||
if (result != 0)
|
||||
return -4314;
|
||||
result = wc_AesGcmDecrypt(&enc, resultP, resultC, plen, iv1,
|
||||
sizeof(iv1), resultT, sizeof(resultT), a, sizeof(a));
|
||||
#if defined(WOLFSSL_ASYNC_CRYPT)
|
||||
result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
|
||||
#endif
|
||||
if (result != 0)
|
||||
return -4315;
|
||||
}
|
||||
|
||||
/* FIPS, QAT and STM32F2/4 HW Crypto only support 12-byte IV */
|
||||
#if !defined(HAVE_FIPS) && !defined(HAVE_INTEL_QA) && \
|
||||
!defined(STM32F2_CRYPTO) && !defined(STM32F4_CRYPTO) && \
|
||||
|
@ -4646,11 +4742,11 @@ int aesgcm_test(void)
|
|||
result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
|
||||
#endif
|
||||
if (result != 0)
|
||||
return -4307;
|
||||
return -4316;
|
||||
if (XMEMCMP(c2, resultC, sizeof(resultC)))
|
||||
return -4308;
|
||||
return -4317;
|
||||
if (XMEMCMP(t2, resultT, sizeof(resultT)))
|
||||
return -4309;
|
||||
return -4318;
|
||||
|
||||
result = wc_AesGcmDecrypt(&enc, resultP, resultC, sizeof(resultC),
|
||||
iv2, sizeof(iv2), resultT, sizeof(resultT), a, sizeof(a));
|
||||
|
@ -4658,9 +4754,9 @@ int aesgcm_test(void)
|
|||
result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
|
||||
#endif
|
||||
if (result != 0)
|
||||
return -4310;
|
||||
return -4319;
|
||||
if (XMEMCMP(p, resultP, sizeof(resultP)))
|
||||
return -4311;
|
||||
return -4320;
|
||||
#endif /* !HAVE_FIPS && !HAVE_INTEL_QA && !STM32F2_CRYPTO && !STM32F4_CRYPTO */
|
||||
|
||||
wc_AesFree(&enc);
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
/* cpuid.h
|
||||
*
|
||||
* Copyright (C) 2006-2016 wolfSSL Inc.
|
||||
*
|
||||
* This file is part of wolfSSL.
|
||||
*
|
||||
* wolfSSL is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* wolfSSL is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef WOLF_CRYPT_CPUID_H
|
||||
#define WOLF_CRYPT_CPUID_H
|
||||
|
||||
|
||||
#include <wolfssl/wolfcrypt/types.h>
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef WOLFSSL_X86_64_BUILD
|
||||
#define CPUID_AVX1 0x0001
|
||||
#define CPUID_AVX2 0x0002
|
||||
#define CPUID_RDRAND 0x0004
|
||||
#define CPUID_RDSEED 0x0008
|
||||
#define CPUID_BMI2 0x0010 /* MULX, RORX */
|
||||
#define CPUID_AESNI 0x0020
|
||||
|
||||
#define IS_INTEL_AVX1(f) ((f) & CPUID_AVX1)
|
||||
#define IS_INTEL_AVX2(f) ((f) & CPUID_AVX2)
|
||||
#define IS_INTEL_RDRAND(f) ((f) & CPUID_RDRAND)
|
||||
#define IS_INTEL_RDSEED(f) ((f) & CPUID_RDSEED)
|
||||
#define IS_INTEL_BMI2(f) ((f) & CPUID_BMI2)
|
||||
#define IS_INTEL_AESNI(f) ((f) & CPUID_AESNI)
|
||||
|
||||
void cpuid_set_flags(void);
|
||||
word32 cpuid_get_flags(void);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* WOLF_CRYPT_CPUID_H */
|
|
@ -60,7 +60,8 @@ nobase_include_HEADERS+= \
|
|||
wolfssl/wolfcrypt/wolfevent.h \
|
||||
wolfssl/wolfcrypt/pkcs12.h \
|
||||
wolfssl/wolfcrypt/wolfmath.h \
|
||||
wolfssl/wolfcrypt/sha3.h
|
||||
wolfssl/wolfcrypt/sha3.h \
|
||||
wolfssl/wolfcrypt/cpuid.h
|
||||
|
||||
noinst_HEADERS+= \
|
||||
wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h \
|
||||
|
|
Loading…
Reference in New Issue