wolfcrypt/src/sha{256,512,3}.c add SAVE_VECTOR_REGISTERS() for SHA-2 and SHA-3 vectorized implementations, and add WC_NO_INTERNAL_FUNCTION_POINTERS code paths to fix GOT relocations around implementation function pointers.

pull/7460/head
Daniel Pouzzner 2024-04-23 01:31:43 -05:00
parent c3d9fb61a8
commit 5d9154e8c6
4 changed files with 339 additions and 11 deletions

View File

@ -371,25 +371,172 @@ static int InitSha256(wc_Sha256* sha256)
} /* extern "C" */
#endif
static int (*Transform_Sha256_p)(wc_Sha256* sha256, const byte* data);
/* = _Transform_Sha256 */
static int (*Transform_Sha256_Len_p)(wc_Sha256* sha256, const byte* data,
word32 len);
/* = NULL */
static int transform_check = 0;
static word32 intel_flags;
static int Transform_Sha256_is_vectorized = 0;
#ifdef WC_NO_INTERNAL_FUNCTION_POINTERS
static enum { SHA256_UNSET, SHA256_AVX1, SHA256_AVX2, SHA256_AVX1_RORX,
SHA256_AVX2_RORX, SHA256_SSE2, SHA256_C }
sha_method = SHA256_UNSET;
static void Sha256_SetTransform(void)
{
if (sha_method != SHA256_UNSET)
return;
intel_flags = cpuid_get_flags();
if (IS_INTEL_SHA(intel_flags)) {
#ifdef HAVE_INTEL_AVX1
if (IS_INTEL_AVX1(intel_flags)) {
sha_method = SHA256_AVX1;
Transform_Sha256_is_vectorized = 1;
}
else
#endif
{
sha_method = SHA256_SSE2;
Transform_Sha256_is_vectorized = 1;
}
}
else
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_AVX2(intel_flags)) {
#ifdef HAVE_INTEL_RORX
if (IS_INTEL_BMI2(intel_flags)) {
sha_method = SHA256_AVX2_RORX;
Transform_Sha256_is_vectorized = 1;
}
else
#endif
{
sha_method = SHA256_AVX2;
Transform_Sha256_is_vectorized = 1;
}
}
else
#endif
#ifdef HAVE_INTEL_AVX1
if (IS_INTEL_AVX1(intel_flags)) {
#ifdef HAVE_INTEL_RORX
if (IS_INTEL_BMI2(intel_flags)) {
sha_method = SHA256_AVX1_RORX;
Transform_Sha256_is_vectorized = 1;
}
else
#endif
{
sha_method = SHA256_AVX1;
Transform_Sha256_is_vectorized = 1;
}
}
else
#endif
{
sha_method = SHA256_C;
Transform_Sha256_is_vectorized = 0;
}
}
static WC_INLINE int inline_XTRANSFORM(wc_Sha256* S, const byte* D) {
int ret;
ret = (*Transform_Sha256_p)(S, D);
if (sha_method == SHA256_C)
return Transform_Sha256(S, D);
SAVE_VECTOR_REGISTERS(return _svr_ret;);
switch (sha_method) {
case SHA256_AVX2:
ret = Transform_Sha256_AVX2(S, D);
break;
case SHA256_AVX2_RORX:
ret = Transform_Sha256_AVX2_RORX(S, D);
break;
case SHA256_AVX1:
ret = Transform_Sha256_AVX1_Sha(S, D);
break;
case SHA256_AVX1_RORX:
ret = Transform_Sha256_AVX1_RORX(S, D);
break;
case SHA256_SSE2:
ret = Transform_Sha256_SSE2_Sha(S, D);
break;
case SHA256_C:
case SHA256_UNSET:
default:
ret = Transform_Sha256(S, D);
break;
}
RESTORE_VECTOR_REGISTERS();
return ret;
}
#define XTRANSFORM(...) inline_XTRANSFORM(__VA_ARGS__)
static WC_INLINE int inline_XTRANSFORM_LEN(wc_Sha256* S, const byte* D, word32 L) {
int ret;
SAVE_VECTOR_REGISTERS(return _svr_ret;);
switch (sha_method) {
case SHA256_AVX2:
ret = Transform_Sha256_AVX2_Len(S, D, L);
break;
case SHA256_AVX2_RORX:
ret = Transform_Sha256_AVX2_RORX_Len(S, D, L);
break;
case SHA256_AVX1:
ret = Transform_Sha256_AVX1_Sha_Len(S, D, L);
break;
case SHA256_AVX1_RORX:
ret = Transform_Sha256_AVX1_RORX_Len(S, D, L);
break;
case SHA256_SSE2:
ret = Transform_Sha256_SSE2_Sha_Len(S, D, L);
break;
case SHA256_C:
case SHA256_UNSET:
default:
ret = 0;
break;
}
RESTORE_VECTOR_REGISTERS();
return ret;
}
#define XTRANSFORM_LEN(...) inline_XTRANSFORM_LEN(__VA_ARGS__)
#else /* !WC_NO_INTERNAL_FUNCTION_POINTERS */
static int (*Transform_Sha256_p)(wc_Sha256* sha256, const byte* data);
/* = _Transform_Sha256 */
static int (*Transform_Sha256_Len_p)(wc_Sha256* sha256, const byte* data,
word32 len);
/* = NULL */
static int transform_check = 0;
static WC_INLINE int inline_XTRANSFORM(wc_Sha256* S, const byte* D) {
int ret;
#ifdef WOLFSSL_LINUXKM
if (Transform_Sha256_is_vectorized)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif
ret = (*Transform_Sha256_p)(S, D);
#ifdef WOLFSSL_LINUXKM
if (Transform_Sha256_is_vectorized)
RESTORE_VECTOR_REGISTERS();
#endif
return ret;
}
#define XTRANSFORM(...) inline_XTRANSFORM(__VA_ARGS__)
static WC_INLINE int inline_XTRANSFORM_LEN(wc_Sha256* S, const byte* D, word32 L) {
int ret;
#ifdef WOLFSSL_LINUXKM
if (Transform_Sha256_is_vectorized)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif
ret = (*Transform_Sha256_Len_p)(S, D, L);
#ifdef WOLFSSL_LINUXKM
if (Transform_Sha256_is_vectorized)
RESTORE_VECTOR_REGISTERS();
#endif
return ret;
}
#define XTRANSFORM_LEN(...) inline_XTRANSFORM_LEN(__VA_ARGS__)
@ -463,6 +610,8 @@ static int InitSha256(wc_Sha256* sha256)
transform_check = 1;
}
#endif /* !WC_NO_INTERNAL_FUNCTION_POINTERS */
#if !defined(WOLFSSL_KCAPI_HASH)
int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
{
@ -1162,7 +1311,13 @@ static int InitSha256(wc_Sha256* sha256)
#ifdef XTRANSFORM_LEN
#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \
(defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))
#ifdef WC_NO_INTERNAL_FUNCTION_POINTERS
if (sha_method != SHA256_C)
#else
if (Transform_Sha256_Len_p != NULL)
#endif
#endif
{
if (len >= WC_SHA256_BLOCK_SIZE) {

View File

@ -651,6 +651,10 @@ static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p)
word32 i;
word32 blocks;
#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif
if (sha3->i > 0) {
byte *t;
byte l = (byte)(p * 8 - sha3->i);
@ -699,6 +703,10 @@ static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p)
len -= p * 8;
data += p * 8;
}
#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
RESTORE_VECTOR_REGISTERS();
#endif
XMEMCPY(sha3->t, data, len);
sha3->i += (byte)len;
@ -732,6 +740,12 @@ static int Sha3Final(wc_Sha3* sha3, byte padChar, byte* hash, byte p, word32 l)
for (i = 0; i < p; i++) {
sha3->s[i] ^= Load64BitBigEndian(sha3->t + 8 * i);
}
#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif
for (j = 0; l - j >= rate; j += rate) {
#ifdef USE_INTEL_SPEEDUP
(*sha3_block)(sha3->s);
@ -755,6 +769,11 @@ static int Sha3Final(wc_Sha3* sha3, byte padChar, byte* hash, byte p, word32 l)
#endif
XMEMCPY(hash + j, sha3->s, l - j);
}
#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
RESTORE_VECTOR_REGISTERS();
#endif
return 0;
}
@ -1328,6 +1347,10 @@ int wc_Shake128_Absorb(wc_Shake* shake, const byte* data, word32 len)
*/
int wc_Shake128_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt)
{
#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif
for (; (blockCnt > 0); blockCnt--) {
#ifdef USE_INTEL_SPEEDUP
(*sha3_block)(shake->s);
@ -1341,6 +1364,10 @@ int wc_Shake128_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt)
#endif
out += WC_SHA3_128_COUNT * 8;
}
#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
RESTORE_VECTOR_REGISTERS();
#endif
return 0;
}
@ -1458,6 +1485,10 @@ int wc_Shake256_Absorb(wc_Shake* shake, const byte* data, word32 len)
*/
int wc_Shake256_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt)
{
#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif
for (; (blockCnt > 0); blockCnt--) {
#ifdef USE_INTEL_SPEEDUP
(*sha3_block)(shake->s);
@ -1471,6 +1502,10 @@ int wc_Shake256_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt)
#endif
out += WC_SHA3_256_COUNT * 8;
}
#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
RESTORE_VECTOR_REGISTERS();
#endif
return 0;
}

View File

@ -426,20 +426,147 @@ static int InitSha512_256(wc_Sha512* sha512)
#endif
static int _Transform_Sha512(wc_Sha512 *sha512);
static int (*Transform_Sha512_p)(wc_Sha512* sha512) = _Transform_Sha512;
static int (*Transform_Sha512_Len_p)(wc_Sha512* sha512, word32 len) = NULL;
static int transform_check = 0;
static word32 intel_flags;
static int Transform_Sha512_is_vectorized = 0;
#ifdef WC_NO_INTERNAL_FUNCTION_POINTERS
static enum { SHA512_UNSET, SHA512_AVX1, SHA512_AVX2, SHA512_AVX1_RORX,
SHA512_AVX2_RORX, SHA512_C }
sha_method = SHA512_UNSET;
static void Sha512_SetTransform(void)
{
if (sha_method != SHA512_UNSET)
return;
intel_flags = cpuid_get_flags();
#if defined(HAVE_INTEL_AVX2)
if (IS_INTEL_AVX2(intel_flags)) {
#ifdef HAVE_INTEL_RORX
if (IS_INTEL_BMI2(intel_flags)) {
sha_method = SHA512_AVX2_RORX;
Transform_Sha512_is_vectorized = 1;
}
else
#endif
{
sha_method = SHA512_AVX2;
Transform_Sha512_is_vectorized = 1;
}
}
else
#endif
#if defined(HAVE_INTEL_AVX1)
if (IS_INTEL_AVX1(intel_flags)) {
#ifdef HAVE_INTEL_RORX
if (IS_INTEL_BMI2(intel_flags)) {
sha_method = SHA512_AVX1_RORX;
Transform_Sha512_is_vectorized = 1;
}
else
#endif
{
sha_method = SHA512_AVX1;
Transform_Sha512_is_vectorized = 1;
}
}
else
#endif
{
sha_method = SHA512_C;
Transform_Sha512_is_vectorized = 0;
}
}
static WC_INLINE int Transform_Sha512(wc_Sha512 *sha512) {
int ret;
if (sha_method == SHA512_C)
return _Transform_Sha512(sha512);
SAVE_VECTOR_REGISTERS(return _svr_ret;);
switch (sha_method) {
case SHA512_AVX2:
ret = Transform_Sha512_AVX2(sha512);
break;
case SHA512_AVX2_RORX:
ret = Transform_Sha512_AVX2_RORX(sha512);
break;
case SHA512_AVX1:
ret = Transform_Sha512_AVX1(sha512);
break;
case SHA512_AVX1_RORX:
ret = Transform_Sha512_AVX1_RORX(sha512);
break;
case SHA512_C:
case SHA512_UNSET:
default:
ret = _Transform_Sha512(sha512);
break;
}
RESTORE_VECTOR_REGISTERS();
return ret;
}
#define XTRANSFORM(...) inline_XTRANSFORM(__VA_ARGS__)
static WC_INLINE int Transform_Sha512_Len(wc_Sha512 *sha512, word32 len) {
int ret;
SAVE_VECTOR_REGISTERS(return _svr_ret;);
switch (sha_method) {
case SHA512_AVX2:
ret = Transform_Sha512_AVX2_Len(sha512, len);
break;
case SHA512_AVX2_RORX:
ret = Transform_Sha512_AVX2_RORX_Len(sha512, len);
break;
case SHA512_AVX1:
ret = Transform_Sha512_AVX1_Len(sha512, len);
break;
case SHA512_AVX1_RORX:
ret = Transform_Sha512_AVX1_RORX_Len(sha512, len);
break;
case SHA512_C:
case SHA512_UNSET:
default:
ret = 0;
break;
}
RESTORE_VECTOR_REGISTERS();
return ret;
}
#define XTRANSFORM_LEN(...) inline_XTRANSFORM_LEN(__VA_ARGS__)
#else /* !WC_NO_INTERNAL_FUNCTION_POINTERS */
static int (*Transform_Sha512_p)(wc_Sha512* sha512) = _Transform_Sha512;
static int (*Transform_Sha512_Len_p)(wc_Sha512* sha512, word32 len) = NULL;
static int transform_check = 0;
static WC_INLINE int Transform_Sha512(wc_Sha512 *sha512) {
int ret;
#ifdef WOLFSSL_LINUXKM
if (Transform_Sha512_is_vectorized)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif
ret = (*Transform_Sha512_p)(sha512);
#ifdef WOLFSSL_LINUXKM
if (Transform_Sha512_is_vectorized)
RESTORE_VECTOR_REGISTERS();
#endif
return ret;
}
static WC_INLINE int Transform_Sha512_Len(wc_Sha512 *sha512, word32 len) {
int ret;
#ifdef WOLFSSL_LINUXKM
if (Transform_Sha512_is_vectorized)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif
ret = (*Transform_Sha512_Len_p)(sha512, len);
#ifdef WOLFSSL_LINUXKM
if (Transform_Sha512_is_vectorized)
RESTORE_VECTOR_REGISTERS();
#endif
return ret;
}
@ -495,6 +622,8 @@ static int InitSha512_256(wc_Sha512* sha512)
transform_check = 1;
}
#endif /* !WC_NO_INTERNAL_FUNCTION_POINTERS */
#else
#define Transform_Sha512(sha512) _Transform_Sha512(sha512)
@ -804,7 +933,13 @@ static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 le
#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \
(defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))
if (Transform_Sha512_Len_p != NULL) {
#ifdef WC_NO_INTERNAL_FUNCTION_POINTERS
if (sha_method != SHA512_C)
#else
if (Transform_Sha512_Len_p != NULL)
#endif
{
word32 blocksLen = len & ~((word32)WC_SHA512_BLOCK_SIZE-1);
if (blocksLen > 0) {

View File

@ -2896,6 +2896,9 @@ extern void uITRON4_free(void *p) ;
#ifndef WOLFSSL_SP_DIV_WORD_HALF
#define WOLFSSL_SP_DIV_WORD_HALF
#endif
#ifdef __PIE__
#define WC_NO_INTERNAL_FUNCTION_POINTERS
#endif
#endif