AES XTS x64 ASM: add AVX1 and AESNI implementations

Adding AES-XTS AVX1 and AESNI implementations.
Fix name in comment at top of x64 assembly files.
pull/6815/head
Sean Parkinson 2023-09-28 09:25:37 +10:00 committed by Daniel Pouzzner
parent 0571040e36
commit 3ea0fb30dd
14 changed files with 1864 additions and 252 deletions

View File

@ -4664,6 +4664,10 @@ AC_ARG_ENABLE([xts],
AS_IF([test "x$ENABLED_XTS" = "xyes"],
[AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_AES_XTS -DWOLFSSL_AES_DIRECT"])
AS_IF([test "x$ENABLED_XTS" = "xyes" && test "x$ENABLED_INTELASM" = "xyes"],
[AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_AES_XTS"])
AS_IF([test "x$ENABLED_XTS" = "xyes" && test "x$ENABLED_AESNI" = "xyes"],
[AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_AES_XTS"])
# Web Server Build
AC_ARG_ENABLE([webserver],
@ -8922,6 +8926,7 @@ AM_CONDITIONAL([BUILD_SNIFFER], [ test "x$ENABLED_SNIFFER" = "xyes" || test "
AM_CONDITIONAL([BUILD_SNIFFTEST],[ test "x$ENABLED_SNIFFTEST" = "xyes"])
AM_CONDITIONAL([BUILD_AESGCM],[test "x$ENABLED_AESGCM" = "xyes" || test "x$ENABLED_USERSETTINGS" = "xyes"])
AM_CONDITIONAL([BUILD_AESCCM],[test "x$ENABLED_AESCCM" = "xyes" || test "x$ENABLED_USERSETTINGS" = "xyes"])
AM_CONDITIONAL([BUILD_XTS],[test "x$ENABLED_XTS" = "xyes" || test "x$ENABLED_USERSETTINGS" = "xyes"])
AM_CONDITIONAL([BUILD_ARMASM],[test "x$ENABLED_ARMASM" = "xyes"])
AM_CONDITIONAL([BUILD_ARMASM_INLINE],[test "x$ENABLED_ARMASM_INLINE" = "xyes"])
AM_CONDITIONAL([BUILD_ARMASM_CRYPTO],[test "x$ENABLED_ARMASM_CRYPTO" = "xyes"])

View File

@ -103,6 +103,8 @@ $(obj)/wolfcrypt/src/aes_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DIS
$(obj)/wolfcrypt/src/aes_asm.o: OBJECT_FILES_NON_STANDARD := y
$(obj)/wolfcrypt/src/aes_gcm_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
$(obj)/wolfcrypt/src/aes_gcm_asm.o: OBJECT_FILES_NON_STANDARD := y
$(obj)/wolfcrypt/src/aes_xts_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
$(obj)/wolfcrypt/src/aes_xts_asm.o: OBJECT_FILES_NON_STANDARD := y
$(obj)/wolfcrypt/src/sp_x86_64_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
$(obj)/wolfcrypt/src/sp_x86_64_asm.o: OBJECT_FILES_NON_STANDARD := y

View File

@ -80,6 +80,7 @@ if BUILD_X86_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_asm.S
endif
endif
@ -187,6 +188,7 @@ if BUILD_X86_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_asm.S
endif
endif
@ -623,6 +625,7 @@ if BUILD_X86_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_x86_asm.S
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_gcm_asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes_xts_asm.S
endif
endif
endif

View File

@ -11006,6 +11006,41 @@ int wc_AesXtsDecryptSector(XtsAes* aes, byte* out, const byte* in, word32 sz,
return wc_AesXtsDecrypt(aes, out, in, sz, (const byte*)i, AES_BLOCK_SIZE);
}
#ifdef WOLFSSL_AESNI
#if defined(USE_INTEL_SPEEDUP)
#define HAVE_INTEL_AVX1
#define HAVE_INTEL_AVX2
#endif /* USE_INTEL_SPEEDUP */
void AES_XTS_encrypt(const unsigned char *in, unsigned char *out, word32 sz,
const unsigned char* i, const unsigned char* key,
const unsigned char* key2, int nr)
XASM_LINK("AES_XTS_encrypt");
#ifdef HAVE_INTEL_AVX1
void AES_XTS_encrypt_avx1(const unsigned char *in, unsigned char *out,
word32 sz, const unsigned char* i,
const unsigned char* key, const unsigned char* key2,
int nr)
XASM_LINK("AES_XTS_encrypt_avx1");
#endif /* HAVE_INTEL_AVX1 */
#ifdef HAVE_AES_DECRYPT
void AES_XTS_decrypt(const unsigned char *in, unsigned char *out, word32 sz,
const unsigned char* i, const unsigned char* key,
const unsigned char* key2, int nr)
XASM_LINK("AES_XTS_decrypt");
#ifdef HAVE_INTEL_AVX1
void AES_XTS_decrypt_avx1(const unsigned char *in, unsigned char *out,
word32 sz, const unsigned char* i,
const unsigned char* key, const unsigned char* key2,
int nr)
XASM_LINK("AES_XTS_decrypt_avx1");
#endif /* HAVE_INTEL_AVX1 */
#endif /* HAVE_AES_DECRYPT */
#endif /* WOLFSSL_AESNI */
#ifdef HAVE_AES_ECB
/* helper function for encrypting / decrypting full buffer at once */
static WARN_UNUSED_RESULT int _AesXtsHelper(
@ -11054,31 +11089,17 @@ static WARN_UNUSED_RESULT int _AesXtsHelper(
* in input plain text buffer to encrypt
* sz size of both out and in buffers
* i value to use for tweak
* iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input
* adds a sanity check on how the user calls the function.
*
* returns 0 on success
*/
/* Software AES - XTS Encrypt */
int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
const byte* i, word32 iSz)
static int AesXtsEncrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz,
const byte* i)
{
int ret = 0;
word32 blocks = (sz / AES_BLOCK_SIZE);
Aes *aes, *tweak;
if (xaes == NULL || out == NULL || in == NULL) {
return BAD_FUNC_ARG;
}
aes = &xaes->aes;
tweak = &xaes->tweak;
if (iSz < AES_BLOCK_SIZE) {
return BAD_FUNC_ARG;
}
if (blocks > 0) {
Aes *aes = &xaes->aes;
Aes *tweak = &xaes->tweak;
byte tmp[AES_BLOCK_SIZE];
XMEMSET(tmp, 0, AES_BLOCK_SIZE); /* set to 0's in case of improper AES
@ -11093,7 +11114,7 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
return ret;
}
#ifdef HAVE_AES_ECB
#ifdef HAVE_AES_ECB
/* encrypt all of buffer at once when possible */
if (in != out) { /* can not handle inline */
XMEMCPY(out, tmp, AES_BLOCK_SIZE);
@ -11102,15 +11123,15 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
return ret;
}
}
#endif
#endif
while (blocks > 0) {
word32 j;
byte carry = 0;
#ifdef HAVE_AES_ECB
#ifdef HAVE_AES_ECB
if (in == out)
#endif
#endif
{ /* check for if inline */
byte buf[AES_BLOCK_SIZE];
@ -11169,13 +11190,59 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
xorbuf(out - AES_BLOCK_SIZE, tmp, AES_BLOCK_SIZE);
}
RESTORE_VECTOR_REGISTERS();
return ret;
}
/* AES with XTS mode. (XTS) XEX encryption with Tweak and cipher text Stealing.
*
* xaes AES keys to use for block encrypt/decrypt
* out output buffer to hold cipher text
* in input plain text buffer to encrypt
* sz size of both out and in buffers
* i value to use for tweak
* iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input
* adds a sanity check on how the user calls the function.
*
* returns 0 on success
*/
int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
const byte* i, word32 iSz)
{
if (xaes == NULL || out == NULL || in == NULL) {
return BAD_FUNC_ARG;
}
else {
if (iSz < AES_BLOCK_SIZE) {
return BAD_FUNC_ARG;
}
if (sz < AES_BLOCK_SIZE) {
WOLFSSL_MSG("Plain text input too small for encryption");
return BAD_FUNC_ARG;
}
return ret;
#ifdef WOLFSSL_AESNI
#if defined(HAVE_INTEL_AVX1)
if (IS_INTEL_AVX1(intel_flags)) {
SAVE_VECTOR_REGISTERS(return _svr_ret;);
AES_XTS_encrypt_avx1(in, out, sz, i, (const byte*)xaes->aes.key,
(const byte*)xaes->tweak.key, (int)xaes->aes.rounds);
RESTORE_VECTOR_REGISTERS();
return 0;
}
else
#endif
if (haveAESNI) {
AES_XTS_encrypt(in, out, sz, i, (const byte*)xaes->aes.key,
(const byte*)xaes->tweak.key, (int)xaes->aes.rounds);
return 0;
}
else
#endif
{
return AesXtsEncrypt_sw(xaes, out, in, sz, i);
}
}
@ -11186,31 +11253,17 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
* in input cipher text buffer to decrypt
* sz size of both out and in buffers
* i value to use for tweak
* iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input
* adds a sanity check on how the user calls the function.
*
* returns 0 on success
*/
/* Software AES - XTS Decrypt */
int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
const byte* i, word32 iSz)
static int AesXtsDecrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz,
const byte* i)
{
int ret = 0;
word32 blocks = (sz / AES_BLOCK_SIZE);
Aes *aes, *tweak;
if (xaes == NULL || out == NULL || in == NULL) {
return BAD_FUNC_ARG;
}
aes = &xaes->aes;
tweak = &xaes->tweak;
if (iSz < AES_BLOCK_SIZE) {
return BAD_FUNC_ARG;
}
if (blocks > 0) {
Aes *aes = &xaes->aes;
Aes *tweak = &xaes->tweak;
word32 j;
byte carry = 0;
byte tmp[AES_BLOCK_SIZE];
@ -11233,7 +11286,7 @@ int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
blocks--;
}
#ifdef HAVE_AES_ECB
#ifdef HAVE_AES_ECB
/* decrypt all of buffer at once when possible */
if (in != out) { /* can not handle inline */
XMEMCPY(out, tmp, AES_BLOCK_SIZE);
@ -11242,12 +11295,12 @@ int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
return ret;
}
}
#endif
#endif
while (blocks > 0) {
#ifdef HAVE_AES_ECB
#ifdef HAVE_AES_ECB
if (in == out)
#endif
#endif
{ /* check for if inline */
byte buf[AES_BLOCK_SIZE];
@ -11331,15 +11384,60 @@ int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
XMEMCPY(out - AES_BLOCK_SIZE, tmp2, AES_BLOCK_SIZE);
}
RESTORE_VECTOR_REGISTERS();
}
else {
WOLFSSL_MSG("Plain text input too small for encryption");
return BAD_FUNC_ARG;
}
return ret;
}
/* Same process as encryption but Aes key is AES_DECRYPTION type.
*
* xaes AES keys to use for block encrypt/decrypt
* out output buffer to hold plain text
* in input cipher text buffer to decrypt
* sz size of both out and in buffers
* i value to use for tweak
* iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input
* adds a sanity check on how the user calls the function.
*
* returns 0 on success
*/
int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
const byte* i, word32 iSz)
{
if (xaes == NULL || out == NULL || in == NULL) {
return BAD_FUNC_ARG;
}
if (iSz < AES_BLOCK_SIZE) {
return BAD_FUNC_ARG;
}
if (sz < AES_BLOCK_SIZE) {
WOLFSSL_MSG("Cipher text input too small for decryption");
return BAD_FUNC_ARG;
}
#ifdef WOLFSSL_AESNI
#if defined(HAVE_INTEL_AVX1)
if (IS_INTEL_AVX1(intel_flags)) {
SAVE_VECTOR_REGISTERS(return _svr_ret;);
AES_XTS_decrypt_avx1(in, out, sz, i, (const byte*)xaes->aes.key,
(const byte*)xaes->tweak.key, (int)xaes->aes.rounds);
RESTORE_VECTOR_REGISTERS();
return 0;
}
else
#endif
if (haveAESNI) {
AES_XTS_decrypt(in, out, sz, i, (const byte*)xaes->aes.key,
(const byte*)xaes->tweak.key, (int)xaes->aes.rounds);
return 0;
}
else
#endif
{
return AesXtsDecrypt_sw(xaes, out, in, sz, i);
}
}
#endif /* WOLFSSL_AES_XTS */
#ifdef WOLFSSL_AES_SIV

View File

@ -1,5 +1,5 @@
/* aes_gcm_asm
*
/* aes_gcm_asm.S */
/*
* Copyright (C) 2006-2023 wolfSSL Inc.
*
* This file is part of wolfSSL.

View File

@ -1,5 +1,5 @@
; /* aes_gcm_asm
; *
; /* aes_gcm_asm.asm */
; /*
; * Copyright (C) 2006-2023 wolfSSL Inc.
; *
; * This file is part of wolfSSL.

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/* chacha_asm
*
/* chacha_asm.S */
/*
* Copyright (C) 2006-2023 wolfSSL Inc.
*
* This file is part of wolfSSL.

View File

@ -1,5 +1,5 @@
/* fe_x25519_asm
*
/* fe_x25519_asm.S */
/*
* Copyright (C) 2006-2023 wolfSSL Inc.
*
* This file is part of wolfSSL.

View File

@ -1,5 +1,5 @@
/* poly1305_asm
*
/* poly1305_asm.S */
/*
* Copyright (C) 2006-2023 wolfSSL Inc.
*
* This file is part of wolfSSL.

View File

@ -1,5 +1,5 @@
/* sha256_asm
*
/* sha256_asm.S */
/*
* Copyright (C) 2006-2023 wolfSSL Inc.
*
* This file is part of wolfSSL.

View File

@ -1,5 +1,5 @@
/* sha3_asm
*
/* sha3_asm.S */
/*
* Copyright (C) 2006-2023 wolfSSL Inc.
*
* This file is part of wolfSSL.

View File

@ -1,5 +1,5 @@
/* sha512_asm
*
/* sha512_asm.S */
/*
* Copyright (C) 2006-2023 wolfSSL Inc.
*
* This file is part of wolfSSL.

View File

@ -9501,6 +9501,61 @@ static wc_test_ret_t aes_xts_128_test(void)
#endif /* !HAVE_FIPS || FIPS_VERSION_GE(5,3) */
#if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM)
{
#define LARGE_XTS_SZ 1024
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC)
byte* large_input = (byte *)XMALLOC(LARGE_XTS_SZ, HEAP_HINT,
DYNAMIC_TYPE_TMP_BUFFER);
#else
byte large_input[LARGE_XTS_SZ];
#endif
int i;
int j;
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC)
if (large_input == NULL)
ERROR_OUT(WC_TEST_RET_ENC_EC(MEMORY_E), out);
#endif
for (i = 0; i < (int)LARGE_XTS_SZ; i++)
large_input[i] = (byte)i;
for (j = 16; j < (int)LARGE_XTS_SZ; j++) {
ret = wc_AesXtsSetKey(aes, k1, sizeof(k1), AES_ENCRYPTION,
HEAP_HINT, devId);
if (ret != 0)
ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out);
ret = wc_AesXtsEncrypt(aes, large_input, large_input, j, i1,
sizeof(i1));
#if defined(WOLFSSL_ASYNC_CRYPT)
ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE);
#endif
if (ret != 0)
ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out);
ret = wc_AesXtsSetKey(aes, k1, sizeof(k1), AES_DECRYPTION,
HEAP_HINT, devId);
if (ret != 0)
ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out);
ret = wc_AesXtsDecrypt(aes, large_input, large_input, j, i1,
sizeof(i1));
#if defined(WOLFSSL_ASYNC_CRYPT)
ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE);
#endif
if (ret != 0)
ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out);
for (i = 0; i < j; i++) {
if (large_input[i] != (byte)i) {
ERROR_OUT(WC_TEST_RET_ENC_NC, out);
}
}
}
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC)
XFREE(large_input, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
#endif
}
#endif
out:
if (aes_inited)