diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 184838a6b..a6e1be830 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -1725,12 +1725,17 @@ static WARN_UNUSED_RESULT word32 inv_col_mul( #endif #endif - #ifndef WC_NO_CACHE_RESISTANT + +#if defined(__riscv) && !defined(WOLFSSL_AES_TOUCH_LINES) + #define WOLFSSL_AES_TOUCH_LINES +#endif + #ifndef WOLFSSL_AES_SMALL_TABLES /* load 4 Te Tables into cache by cache line stride */ static WARN_UNUSED_RESULT WC_INLINE word32 PreFetchTe(void) { +#ifndef WOLFSSL_AES_TOUCH_LINES word32 x = 0; int i,j; @@ -1741,11 +1746,15 @@ static WARN_UNUSED_RESULT WC_INLINE word32 PreFetchTe(void) } } return x; +#else + return 0; +#endif } #else /* load sbox into cache by cache line stride */ static WARN_UNUSED_RESULT WC_INLINE word32 PreFetchSBox(void) { +#ifndef WOLFSSL_AES_TOUCH_LINES word32 x = 0; int i; @@ -1753,10 +1762,230 @@ static WARN_UNUSED_RESULT WC_INLINE word32 PreFetchSBox(void) x &= Tsbox[i]; } return x; +#else + return 0; +#endif } #endif #endif +#ifdef WOLFSSL_AES_TOUCH_LINES +#if WC_CACHE_LINE_SZ == 128 + #define WC_CACHE_LINE_BITS 5 + #define WC_CACHE_LINE_MASK_HI 0xe0 + #define WC_CACHE_LINE_MASK_LO 0x1f + #define WC_CACHE_LINE_ADD 0x20 +#elif WC_CACHE_LINE_SZ == 64 + #define WC_CACHE_LINE_BITS 4 + #define WC_CACHE_LINE_MASK_HI 0xf0 + #define WC_CACHE_LINE_MASK_LO 0x0f + #define WC_CACHE_LINE_ADD 0x10 +#elif WC_CACHE_LINE_SZ == 32 + #define WC_CACHE_LINE_BITS 3 + #define WC_CACHE_LINE_MASK_HI 0xf8 + #define WC_CACHE_LINE_MASK_LO 0x07 + #define WC_CACHE_LINE_ADD 0x08 +#elif WC_CACHE_LINE_SZ = 16 + #define WC_CACHE_LINE_BITS 2 + #define WC_CACHE_LINE_MASK_HI 0xfc + #define WC_CACHE_LINE_MASK_LO 0x03 + #define WC_CACHE_LINE_ADD 0x04 +#else + #error Cache line size not supported +#endif + +#ifndef WOLFSSL_AES_SMALL_TABLES +static word32 GetTable(const word32* t, byte o) +{ +#if WC_CACHE_LINE_SZ == 64 + word32 e; + byte hi = o & 0xf0; + byte lo = o & 0x0f; + + e = t[lo + 0x00] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x10] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x20] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x30] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x40] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x50] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x60] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x70] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x80] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x90] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0xa0] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0xb0] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0xc0] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0xd0] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0xe0] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0xf0] & ((word32)0 - (((word32)hi - 0x01) >> 31)); + + return e; +#else + word32 e = 0; + int i; + byte hi = o & WC_CACHE_LINE_MASK_HI; + byte lo = o & WC_CACHE_LINE_MASK_LO; + + for (i = 0; i < 256; i += (1 << WC_CACHE_LINE_BITS)) { + e |= t[lo + i] & ((word32)0 - (((word32)hi - 0x01) >> 31)); + hi -= WC_CACHE_LINE_ADD; + } + + return e; +#endif +} +#endif + +#ifdef WOLFSSL_AES_SMALL_TABLES +static byte GetTable8(const byte* t, byte o) +{ +#if WC_CACHE_LINE_SZ == 64 + byte e; + byte hi = o & 0xf0; + byte lo = o & 0x0f; + + e = t[lo + 0x00] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x10] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x20] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x30] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x40] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x50] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x60] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x70] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x80] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0x90] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0xa0] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0xb0] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0xc0] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0xd0] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0xe0] & ((word32)0 - (((word32)hi - 0x01) >> 31)); hi -= 0x10; + e |= t[lo + 0xf0] & ((word32)0 - (((word32)hi - 0x01) >> 31)); + + return e; +#else + byte e = 0; + int i; + byte hi = o & WC_CACHE_LINE_MASK_HI; + byte lo = o & WC_CACHE_LINE_MASK_LO; + + for (i = 0; i < 256; i += (1 << WC_CACHE_LINE_BITS)) { + e |= t[lo + i] & ((word32)0 - (((word32)hi - 0x01) >> 31)); + hi -= WC_CACHE_LINE_ADD; + } + + return e; +#endif +} +#endif + +#ifndef WOLFSSL_AES_SMALL_TABLES +static void GetTable_Multi(const word32* t, word32* t0, byte o0, + word32* t1, byte o1, word32* t2, byte o2, word32* t3, byte o3) +{ + word32 e0 = 0; + word32 e1 = 0; + word32 e2 = 0; + word32 e3 = 0; + byte hi0 = o0 & WC_CACHE_LINE_MASK_HI; + byte lo0 = o0 & WC_CACHE_LINE_MASK_LO; + byte hi1 = o1 & WC_CACHE_LINE_MASK_HI; + byte lo1 = o1 & WC_CACHE_LINE_MASK_LO; + byte hi2 = o2 & WC_CACHE_LINE_MASK_HI; + byte lo2 = o2 & WC_CACHE_LINE_MASK_LO; + byte hi3 = o3 & WC_CACHE_LINE_MASK_HI; + byte lo3 = o3 & WC_CACHE_LINE_MASK_LO; + int i; + + for (i = 0; i < 256; i += (1 << WC_CACHE_LINE_BITS)) { + e0 |= t[lo0 + i] & ((word32)0 - (((word32)hi0 - 0x01) >> 31)); + hi0 -= WC_CACHE_LINE_ADD; + e1 |= t[lo1 + i] & ((word32)0 - (((word32)hi1 - 0x01) >> 31)); + hi1 -= WC_CACHE_LINE_ADD; + e2 |= t[lo2 + i] & ((word32)0 - (((word32)hi2 - 0x01) >> 31)); + hi2 -= WC_CACHE_LINE_ADD; + e3 |= t[lo3 + i] & ((word32)0 - (((word32)hi3 - 0x01) >> 31)); + hi3 -= WC_CACHE_LINE_ADD; + } + *t0 = e0; + *t1 = e1; + *t2 = e2; + *t3 = e3; +} +static void XorTable_Multi(const word32* t, word32* t0, byte o0, + word32* t1, byte o1, word32* t2, byte o2, word32* t3, byte o3) +{ + word32 e0 = 0; + word32 e1 = 0; + word32 e2 = 0; + word32 e3 = 0; + byte hi0 = o0 & 0xf0; + byte lo0 = o0 & 0x0f; + byte hi1 = o1 & 0xf0; + byte lo1 = o1 & 0x0f; + byte hi2 = o2 & 0xf0; + byte lo2 = o2 & 0x0f; + byte hi3 = o3 & 0xf0; + byte lo3 = o3 & 0x0f; + int i; + + for (i = 0; i < 256; i += (1 << WC_CACHE_LINE_BITS)) { + e0 |= t[lo0 + i] & ((word32)0 - (((word32)hi0 - 0x01) >> 31)); + hi0 -= WC_CACHE_LINE_ADD; + e1 |= t[lo1 + i] & ((word32)0 - (((word32)hi1 - 0x01) >> 31)); + hi1 -= WC_CACHE_LINE_ADD; + e2 |= t[lo2 + i] & ((word32)0 - (((word32)hi2 - 0x01) >> 31)); + hi2 -= WC_CACHE_LINE_ADD; + e3 |= t[lo3 + i] & ((word32)0 - (((word32)hi3 - 0x01) >> 31)); + hi3 -= WC_CACHE_LINE_ADD; + } + *t0 ^= e0; + *t1 ^= e1; + *t2 ^= e2; + *t3 ^= e3; +} +static word32 GetTable8_4(const byte* t, byte o0, byte o1, byte o2, byte o3) +{ + word32 e = 0; + int i; + byte hi0 = o0 & WC_CACHE_LINE_MASK_HI; + byte lo0 = o0 & WC_CACHE_LINE_MASK_LO; + byte hi1 = o1 & WC_CACHE_LINE_MASK_HI; + byte lo1 = o1 & WC_CACHE_LINE_MASK_LO; + byte hi2 = o2 & WC_CACHE_LINE_MASK_HI; + byte lo2 = o2 & WC_CACHE_LINE_MASK_LO; + byte hi3 = o3 & WC_CACHE_LINE_MASK_HI; + byte lo3 = o3 & WC_CACHE_LINE_MASK_LO; + + for (i = 0; i < 256; i += (1 << WC_CACHE_LINE_BITS)) { + e |= (word32)(t[lo0 + i] & ((word32)0 - (((word32)hi0 - 0x01) >> 31))) + << 24; + hi0 -= WC_CACHE_LINE_ADD; + e |= (word32)(t[lo1 + i] & ((word32)0 - (((word32)hi1 - 0x01) >> 31))) + << 16; + hi1 -= WC_CACHE_LINE_ADD; + e |= (word32)(t[lo2 + i] & ((word32)0 - (((word32)hi2 - 0x01) >> 31))) + << 8; + hi2 -= WC_CACHE_LINE_ADD; + e |= (word32)(t[lo3 + i] & ((word32)0 - (((word32)hi3 - 0x01) >> 31))) + << 0; + hi3 -= WC_CACHE_LINE_ADD; + } + + return e; +} +#endif +#else +#define GetTable(t, o) t[o] +#define GetTable8(t, o) t[o] +#define GetTable_Multi(t, t0, o0, t1, o1, t2, o2, t3, o3) \ + *t0 = t[o0]; *t1 = t[o1]; *t2 = t[o2]; *t3 = t[o3] +#define XorTable_Multi(t, t0, o0, t1, o1, t2, o2, t3, o3) \ + *t0 ^= t[o0]; *t1 ^= t[o1]; *t2 ^= t[o2]; *t3 ^= t[o3] +#define GetTable8_4(t, o0, o1, o2, o3) \ + (((word32)t[o0] << 24) | ((word32)t[o1] << 16) | \ + ((word32)t[o2] << 8) | ((word32)t[o3] << 0)) +#endif + /* Software AES - ECB Encrypt */ static WARN_UNUSED_RESULT int wc_AesEncrypt( Aes* aes, const byte* inBlock, byte* outBlock) @@ -1863,27 +2092,59 @@ static WARN_UNUSED_RESULT int wc_AesEncrypt( s0 |= PreFetchTe(); #endif +#ifndef WOLFSSL_AES_TOUCH_LINES +#define ENC_ROUND_T_S(o) \ + t0 = GetTable(Te[0], GETBYTE(s0, 3)) ^ GetTable(Te[1], GETBYTE(s1, 2)) ^ \ + GetTable(Te[2], GETBYTE(s2, 1)) ^ GetTable(Te[3], GETBYTE(s3, 0)) ^ \ + rk[(o)+4]; \ + t1 = GetTable(Te[0], GETBYTE(s1, 3)) ^ GetTable(Te[1], GETBYTE(s2, 2)) ^ \ + GetTable(Te[2], GETBYTE(s3, 1)) ^ GetTable(Te[3], GETBYTE(s0, 0)) ^ \ + rk[(o)+5]; \ + t2 = GetTable(Te[0], GETBYTE(s2, 3)) ^ GetTable(Te[1], GETBYTE(s3, 2)) ^ \ + GetTable(Te[2], GETBYTE(s0, 1)) ^ GetTable(Te[3], GETBYTE(s1, 0)) ^ \ + rk[(o)+6]; \ + t3 = GetTable(Te[0], GETBYTE(s3, 3)) ^ GetTable(Te[1], GETBYTE(s0, 2)) ^ \ + GetTable(Te[2], GETBYTE(s1, 1)) ^ GetTable(Te[3], GETBYTE(s2, 0)) ^ \ + rk[(o)+7] +#define ENC_ROUND_S_T(o) \ + s0 = GetTable(Te[0], GETBYTE(t0, 3)) ^ GetTable(Te[1], GETBYTE(t1, 2)) ^ \ + GetTable(Te[2], GETBYTE(t2, 1)) ^ GetTable(Te[3], GETBYTE(t3, 0)) ^ \ + rk[(o)+0]; \ + s1 = GetTable(Te[0], GETBYTE(t1, 3)) ^ GetTable(Te[1], GETBYTE(t2, 2)) ^ \ + GetTable(Te[2], GETBYTE(t3, 1)) ^ GetTable(Te[3], GETBYTE(t0, 0)) ^ \ + rk[(o)+1]; \ + s2 = GetTable(Te[0], GETBYTE(t2, 3)) ^ GetTable(Te[1], GETBYTE(t3, 2)) ^ \ + GetTable(Te[2], GETBYTE(t0, 1)) ^ GetTable(Te[3], GETBYTE(t1, 0)) ^ \ + rk[(o)+2]; \ + s3 = GetTable(Te[0], GETBYTE(t3, 3)) ^ GetTable(Te[1], GETBYTE(t0, 2)) ^ \ + GetTable(Te[2], GETBYTE(t1, 1)) ^ GetTable(Te[3], GETBYTE(t2, 0)) ^ \ + rk[(o)+3] +#else +#define ENC_ROUND_T_S(o) \ + GetTable_Multi(Te[0], &t0, GETBYTE(s0, 3), &t1, GETBYTE(s1, 3), \ + &t2, GETBYTE(s2, 3), &t3, GETBYTE(s3, 3)); \ + XorTable_Multi(Te[1], &t0, GETBYTE(s1, 2), &t1, GETBYTE(s2, 2), \ + &t2, GETBYTE(s3, 2), &t3, GETBYTE(s0, 2)); \ + XorTable_Multi(Te[2], &t0, GETBYTE(s2, 1), &t1, GETBYTE(s3, 1), \ + &t2, GETBYTE(s0, 1), &t3, GETBYTE(s1, 1)); \ + XorTable_Multi(Te[3], &t0, GETBYTE(s3, 0), &t1, GETBYTE(s0, 0), \ + &t2, GETBYTE(s1, 0), &t3, GETBYTE(s2, 0)); \ + t0 ^= rk[(o)+4]; t1 ^= rk[(o)+5]; t2 ^= rk[(o)+6]; t3 ^= rk[(o)+7]; + +#define ENC_ROUND_S_T(o) \ + GetTable_Multi(Te[0], &s0, GETBYTE(t0, 3), &s1, GETBYTE(t1, 3), \ + &s2, GETBYTE(t2, 3), &s3, GETBYTE(t3, 3)); \ + XorTable_Multi(Te[1], &s0, GETBYTE(t1, 2), &s1, GETBYTE(t2, 2), \ + &s2, GETBYTE(t3, 2), &s3, GETBYTE(t0, 2)); \ + XorTable_Multi(Te[2], &s0, GETBYTE(t2, 1), &s1, GETBYTE(t3, 1), \ + &s2, GETBYTE(t0, 1), &s3, GETBYTE(t1, 1)); \ + XorTable_Multi(Te[3], &s0, GETBYTE(t3, 0), &s1, GETBYTE(t0, 0), \ + &s2, GETBYTE(t1, 0), &s3, GETBYTE(t2, 0)); \ + s0 ^= rk[(o)+0]; s1 ^= rk[(o)+1]; s2 ^= rk[(o)+2]; s3 ^= rk[(o)+3]; +#endif + #ifndef WOLFSSL_AES_NO_UNROLL /* Unroll the loop. */ -#define ENC_ROUND_T_S(o) \ - t0 = Te[0][GETBYTE(s0, 3)] ^ Te[1][GETBYTE(s1, 2)] ^ \ - Te[2][GETBYTE(s2, 1)] ^ Te[3][GETBYTE(s3, 0)] ^ rk[(o)+4]; \ - t1 = Te[0][GETBYTE(s1, 3)] ^ Te[1][GETBYTE(s2, 2)] ^ \ - Te[2][GETBYTE(s3, 1)] ^ Te[3][GETBYTE(s0, 0)] ^ rk[(o)+5]; \ - t2 = Te[0][GETBYTE(s2, 3)] ^ Te[1][GETBYTE(s3, 2)] ^ \ - Te[2][GETBYTE(s0, 1)] ^ Te[3][GETBYTE(s1, 0)] ^ rk[(o)+6]; \ - t3 = Te[0][GETBYTE(s3, 3)] ^ Te[1][GETBYTE(s0, 2)] ^ \ - Te[2][GETBYTE(s1, 1)] ^ Te[3][GETBYTE(s2, 0)] ^ rk[(o)+7] -#define ENC_ROUND_S_T(o) \ - s0 = Te[0][GETBYTE(t0, 3)] ^ Te[1][GETBYTE(t1, 2)] ^ \ - Te[2][GETBYTE(t2, 1)] ^ Te[3][GETBYTE(t3, 0)] ^ rk[(o)+0]; \ - s1 = Te[0][GETBYTE(t1, 3)] ^ Te[1][GETBYTE(t2, 2)] ^ \ - Te[2][GETBYTE(t3, 1)] ^ Te[3][GETBYTE(t0, 0)] ^ rk[(o)+1]; \ - s2 = Te[0][GETBYTE(t2, 3)] ^ Te[1][GETBYTE(t3, 2)] ^ \ - Te[2][GETBYTE(t0, 1)] ^ Te[3][GETBYTE(t1, 0)] ^ rk[(o)+2]; \ - s3 = Te[0][GETBYTE(t3, 3)] ^ Te[1][GETBYTE(t0, 2)] ^ \ - Te[2][GETBYTE(t1, 1)] ^ Te[3][GETBYTE(t2, 0)] ^ rk[(o)+3] - ENC_ROUND_T_S( 0); ENC_ROUND_S_T( 8); ENC_ROUND_T_S( 8); ENC_ROUND_S_T(16); ENC_ROUND_T_S(16); @@ -1902,60 +2163,14 @@ static WARN_UNUSED_RESULT int wc_AesEncrypt( */ for (;;) { - t0 = - Te[0][GETBYTE(s0, 3)] ^ - Te[1][GETBYTE(s1, 2)] ^ - Te[2][GETBYTE(s2, 1)] ^ - Te[3][GETBYTE(s3, 0)] ^ - rk[4]; - t1 = - Te[0][GETBYTE(s1, 3)] ^ - Te[1][GETBYTE(s2, 2)] ^ - Te[2][GETBYTE(s3, 1)] ^ - Te[3][GETBYTE(s0, 0)] ^ - rk[5]; - t2 = - Te[0][GETBYTE(s2, 3)] ^ - Te[1][GETBYTE(s3, 2)] ^ - Te[2][GETBYTE(s0, 1)] ^ - Te[3][GETBYTE(s1, 0)] ^ - rk[6]; - t3 = - Te[0][GETBYTE(s3, 3)] ^ - Te[1][GETBYTE(s0, 2)] ^ - Te[2][GETBYTE(s1, 1)] ^ - Te[3][GETBYTE(s2, 0)] ^ - rk[7]; + ENC_ROUND_T_S(0); rk += 8; if (--r == 0) { break; } - s0 = - Te[0][GETBYTE(t0, 3)] ^ - Te[1][GETBYTE(t1, 2)] ^ - Te[2][GETBYTE(t2, 1)] ^ - Te[3][GETBYTE(t3, 0)] ^ - rk[0]; - s1 = - Te[0][GETBYTE(t1, 3)] ^ - Te[1][GETBYTE(t2, 2)] ^ - Te[2][GETBYTE(t3, 1)] ^ - Te[3][GETBYTE(t0, 0)] ^ - rk[1]; - s2 = - Te[0][GETBYTE(t2, 3)] ^ - Te[1][GETBYTE(t3, 2)] ^ - Te[2][GETBYTE(t0, 1)] ^ - Te[3][GETBYTE(t1, 0)] ^ - rk[2]; - s3 = - Te[0][GETBYTE(t3, 3)] ^ - Te[1][GETBYTE(t0, 2)] ^ - Te[2][GETBYTE(t1, 1)] ^ - Te[3][GETBYTE(t2, 0)] ^ - rk[3]; + ENC_ROUND_S_T(0); } #endif @@ -1964,31 +2179,58 @@ static WARN_UNUSED_RESULT int wc_AesEncrypt( * map cipher state to byte array block: */ +#ifndef WOLFSSL_AES_TOUCH_LINES s0 = - (Te[2][GETBYTE(t0, 3)] & 0xff000000) ^ - (Te[3][GETBYTE(t1, 2)] & 0x00ff0000) ^ - (Te[0][GETBYTE(t2, 1)] & 0x0000ff00) ^ - (Te[1][GETBYTE(t3, 0)] & 0x000000ff) ^ + (GetTable(Te[2], GETBYTE(t0, 3)) & 0xff000000) ^ + (GetTable(Te[3], GETBYTE(t1, 2)) & 0x00ff0000) ^ + (GetTable(Te[0], GETBYTE(t2, 1)) & 0x0000ff00) ^ + (GetTable(Te[1], GETBYTE(t3, 0)) & 0x000000ff) ^ rk[0]; s1 = - (Te[2][GETBYTE(t1, 3)] & 0xff000000) ^ - (Te[3][GETBYTE(t2, 2)] & 0x00ff0000) ^ - (Te[0][GETBYTE(t3, 1)] & 0x0000ff00) ^ - (Te[1][GETBYTE(t0, 0)] & 0x000000ff) ^ + (GetTable(Te[2], GETBYTE(t1, 3)) & 0xff000000) ^ + (GetTable(Te[3], GETBYTE(t2, 2)) & 0x00ff0000) ^ + (GetTable(Te[0], GETBYTE(t3, 1)) & 0x0000ff00) ^ + (GetTable(Te[1], GETBYTE(t0, 0)) & 0x000000ff) ^ rk[1]; s2 = - (Te[2][GETBYTE(t2, 3)] & 0xff000000) ^ - (Te[3][GETBYTE(t3, 2)] & 0x00ff0000) ^ - (Te[0][GETBYTE(t0, 1)] & 0x0000ff00) ^ - (Te[1][GETBYTE(t1, 0)] & 0x000000ff) ^ + (GetTable(Te[2], GETBYTE(t2, 3)) & 0xff000000) ^ + (GetTable(Te[3], GETBYTE(t3, 2)) & 0x00ff0000) ^ + (GetTable(Te[0], GETBYTE(t0, 1)) & 0x0000ff00) ^ + (GetTable(Te[1], GETBYTE(t1, 0)) & 0x000000ff) ^ rk[2]; s3 = - (Te[2][GETBYTE(t3, 3)] & 0xff000000) ^ - (Te[3][GETBYTE(t0, 2)] & 0x00ff0000) ^ - (Te[0][GETBYTE(t1, 1)] & 0x0000ff00) ^ - (Te[1][GETBYTE(t2, 0)] & 0x000000ff) ^ + (GetTable(Te[2], GETBYTE(t3, 3)) & 0xff000000) ^ + (GetTable(Te[3], GETBYTE(t0, 2)) & 0x00ff0000) ^ + (GetTable(Te[0], GETBYTE(t1, 1)) & 0x0000ff00) ^ + (GetTable(Te[1], GETBYTE(t2, 0)) & 0x000000ff) ^ rk[3]; #else +{ + word32 u0; + word32 u1; + word32 u2; + word32 u3; + + s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3]; + GetTable_Multi(Te[2], &u0, GETBYTE(t0, 3), &u1, GETBYTE(t1, 3), + &u2, GETBYTE(t2, 3), &u3, GETBYTE(t3, 3)); + s0 ^= u0 & 0xff000000; s1 ^= u1 & 0xff000000; + s2 ^= u2 & 0xff000000; s3 ^= u3 & 0xff000000; + GetTable_Multi(Te[3], &u0, GETBYTE(t1, 2), &u1, GETBYTE(t2, 2), + &u2, GETBYTE(t3, 2), &u3, GETBYTE(t0, 2)); + s0 ^= u0 & 0x00ff0000; s1 ^= u1 & 0x00ff0000; + s2 ^= u2 & 0x00ff0000; s3 ^= u3 & 0x00ff0000; + GetTable_Multi(Te[0], &u0, GETBYTE(t2, 1), &u1, GETBYTE(t3, 1), + &u2, GETBYTE(t0, 1), &u3, GETBYTE(t1, 1)); + s0 ^= u0 & 0x0000ff00; s1 ^= u1 & 0x0000ff00; + s2 ^= u2 & 0x0000ff00; s3 ^= u3 & 0x0000ff00; + GetTable_Multi(Te[1], &u0, GETBYTE(t3, 0), &u1, GETBYTE(t0, 0), + &u2, GETBYTE(t1, 0), &u3, GETBYTE(t2, 0)); + s0 ^= u0 & 0x000000ff; s1 ^= u1 & 0x000000ff; + s2 ^= u2 & 0x000000ff; s3 ^= u3 & 0x000000ff; +} +#endif +#else #ifndef WC_NO_CACHE_RESISTANT s0 |= PreFetchSBox(); #endif @@ -1997,25 +2239,25 @@ static WARN_UNUSED_RESULT int wc_AesEncrypt( /* Two rounds at a time */ for (rk += 4; r > 1; r--, rk += 4) { t0 = - ((word32)Tsbox[GETBYTE(s0, 3)] << 24) ^ - ((word32)Tsbox[GETBYTE(s1, 2)] << 16) ^ - ((word32)Tsbox[GETBYTE(s2, 1)] << 8) ^ - ((word32)Tsbox[GETBYTE(s3, 0)]); + ((word32)GetTable8(Tsbox, GETBYTE(s0, 3)) << 24) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s1, 2)) << 16) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s2, 1)) << 8) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s3, 0))); t1 = - ((word32)Tsbox[GETBYTE(s1, 3)] << 24) ^ - ((word32)Tsbox[GETBYTE(s2, 2)] << 16) ^ - ((word32)Tsbox[GETBYTE(s3, 1)] << 8) ^ - ((word32)Tsbox[GETBYTE(s0, 0)]); + ((word32)GetTable8(Tsbox, GETBYTE(s1, 3)) << 24) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s2, 2)) << 16) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s3, 1)) << 8) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s0, 0))); t2 = - ((word32)Tsbox[GETBYTE(s2, 3)] << 24) ^ - ((word32)Tsbox[GETBYTE(s3, 2)] << 16) ^ - ((word32)Tsbox[GETBYTE(s0, 1)] << 8) ^ - ((word32)Tsbox[GETBYTE(s1, 0)]); + ((word32)GetTable8(Tsbox, GETBYTE(s2, 3)) << 24) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s3, 2)) << 16) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s0, 1)) << 8) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s1, 0))); t3 = - ((word32)Tsbox[GETBYTE(s3, 3)] << 24) ^ - ((word32)Tsbox[GETBYTE(s0, 2)] << 16) ^ - ((word32)Tsbox[GETBYTE(s1, 1)] << 8) ^ - ((word32)Tsbox[GETBYTE(s2, 0)]); + ((word32)GetTable8(Tsbox, GETBYTE(s3, 3)) << 24) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s0, 2)) << 16) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s1, 1)) << 8) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s2, 0))); s0 = (col_mul(t0, 3, 2, 0, 1) << 24) ^ @@ -2044,25 +2286,25 @@ static WARN_UNUSED_RESULT int wc_AesEncrypt( } t0 = - ((word32)Tsbox[GETBYTE(s0, 3)] << 24) ^ - ((word32)Tsbox[GETBYTE(s1, 2)] << 16) ^ - ((word32)Tsbox[GETBYTE(s2, 1)] << 8) ^ - ((word32)Tsbox[GETBYTE(s3, 0)]); + ((word32)GetTable8(Tsbox, GETBYTE(s0, 3)) << 24) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s1, 2)) << 16) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s2, 1)) << 8) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s3, 0))); t1 = - ((word32)Tsbox[GETBYTE(s1, 3)] << 24) ^ - ((word32)Tsbox[GETBYTE(s2, 2)] << 16) ^ - ((word32)Tsbox[GETBYTE(s3, 1)] << 8) ^ - ((word32)Tsbox[GETBYTE(s0, 0)]); + ((word32)GetTable8(Tsbox, GETBYTE(s1, 3)) << 24) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s2, 2)) << 16) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s3, 1)) << 8) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s0, 0))); t2 = - ((word32)Tsbox[GETBYTE(s2, 3)] << 24) ^ - ((word32)Tsbox[GETBYTE(s3, 2)] << 16) ^ - ((word32)Tsbox[GETBYTE(s0, 1)] << 8) ^ - ((word32)Tsbox[GETBYTE(s1, 0)]); + ((word32)GetTable8(Tsbox, GETBYTE(s2, 3)) << 24) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s3, 2)) << 16) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s0, 1)) << 8) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s1, 0))); t3 = - ((word32)Tsbox[GETBYTE(s3, 3)] << 24) ^ - ((word32)Tsbox[GETBYTE(s0, 2)] << 16) ^ - ((word32)Tsbox[GETBYTE(s1, 1)] << 8) ^ - ((word32)Tsbox[GETBYTE(s2, 0)]); + ((word32)GetTable8(Tsbox, GETBYTE(s3, 3)) << 24) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s0, 2)) << 16) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s1, 1)) << 8) ^ + ((word32)GetTable8(Tsbox, GETBYTE(s2, 0))); s0 = t0 ^ rk[0]; s1 = t1 ^ rk[1]; s2 = t2 ^ rk[2]; @@ -2111,6 +2353,7 @@ static WARN_UNUSED_RESULT WC_INLINE word32 PreFetchTd(void) /* load Td Table4 into cache by cache line stride */ static WARN_UNUSED_RESULT WC_INLINE word32 PreFetchTd4(void) { +#ifndef WOLFSSL_AES_TOUCH_LINES word32 x = 0; int i; @@ -2118,6 +2361,9 @@ static WARN_UNUSED_RESULT WC_INLINE word32 PreFetchTd4(void) x &= (word32)Td4[i]; } return x; +#else + return 0; +#endif } #endif @@ -2201,27 +2447,51 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( s0 |= PreFetchTd(); #endif -#ifndef WOLFSSL_AES_NO_UNROLL +#ifndef WOLFSSL_AES_TOUCH_LINES /* Unroll the loop. */ #define DEC_ROUND_T_S(o) \ - t0 = Td[0][GETBYTE(s0, 3)] ^ Td[1][GETBYTE(s3, 2)] ^ \ - Td[2][GETBYTE(s2, 1)] ^ Td[3][GETBYTE(s1, 0)] ^ rk[(o)+4]; \ - t1 = Td[0][GETBYTE(s1, 3)] ^ Td[1][GETBYTE(s0, 2)] ^ \ - Td[2][GETBYTE(s3, 1)] ^ Td[3][GETBYTE(s2, 0)] ^ rk[(o)+5]; \ - t2 = Td[0][GETBYTE(s2, 3)] ^ Td[1][GETBYTE(s1, 2)] ^ \ - Td[2][GETBYTE(s0, 1)] ^ Td[3][GETBYTE(s3, 0)] ^ rk[(o)+6]; \ - t3 = Td[0][GETBYTE(s3, 3)] ^ Td[1][GETBYTE(s2, 2)] ^ \ - Td[2][GETBYTE(s1, 1)] ^ Td[3][GETBYTE(s0, 0)] ^ rk[(o)+7] + t0 = GetTable(Td[0], GETBYTE(s0, 3)) ^ GetTable(Td[1], GETBYTE(s3, 2)) ^ \ + GetTable(Td[2], GETBYTE(s2, 1)) ^ GetTable(Td[3], GETBYTE(s1, 0)) ^ rk[(o)+4]; \ + t1 = GetTable(Td[0], GETBYTE(s1, 3)) ^ GetTable(Td[1], GETBYTE(s0, 2)) ^ \ + GetTable(Td[2], GETBYTE(s3, 1)) ^ GetTable(Td[3], GETBYTE(s2, 0)) ^ rk[(o)+5]; \ + t2 = GetTable(Td[0], GETBYTE(s2, 3)) ^ GetTable(Td[1], GETBYTE(s1, 2)) ^ \ + GetTable(Td[2], GETBYTE(s0, 1)) ^ GetTable(Td[3], GETBYTE(s3, 0)) ^ rk[(o)+6]; \ + t3 = GetTable(Td[0], GETBYTE(s3, 3)) ^ GetTable(Td[1], GETBYTE(s2, 2)) ^ \ + GetTable(Td[2], GETBYTE(s1, 1)) ^ GetTable(Td[3], GETBYTE(s0, 0)) ^ rk[(o)+7] #define DEC_ROUND_S_T(o) \ - s0 = Td[0][GETBYTE(t0, 3)] ^ Td[1][GETBYTE(t3, 2)] ^ \ - Td[2][GETBYTE(t2, 1)] ^ Td[3][GETBYTE(t1, 0)] ^ rk[(o)+0]; \ - s1 = Td[0][GETBYTE(t1, 3)] ^ Td[1][GETBYTE(t0, 2)] ^ \ - Td[2][GETBYTE(t3, 1)] ^ Td[3][GETBYTE(t2, 0)] ^ rk[(o)+1]; \ - s2 = Td[0][GETBYTE(t2, 3)] ^ Td[1][GETBYTE(t1, 2)] ^ \ - Td[2][GETBYTE(t0, 1)] ^ Td[3][GETBYTE(t3, 0)] ^ rk[(o)+2]; \ - s3 = Td[0][GETBYTE(t3, 3)] ^ Td[1][GETBYTE(t2, 2)] ^ \ - Td[2][GETBYTE(t1, 1)] ^ Td[3][GETBYTE(t0, 0)] ^ rk[(o)+3] + s0 = GetTable(Td[0], GETBYTE(t0, 3)) ^ GetTable(Td[1], GETBYTE(t3, 2)) ^ \ + GetTable(Td[2], GETBYTE(t2, 1)) ^ GetTable(Td[3], GETBYTE(t1, 0)) ^ rk[(o)+0]; \ + s1 = GetTable(Td[0], GETBYTE(t1, 3)) ^ GetTable(Td[1], GETBYTE(t0, 2)) ^ \ + GetTable(Td[2], GETBYTE(t3, 1)) ^ GetTable(Td[3], GETBYTE(t2, 0)) ^ rk[(o)+1]; \ + s2 = GetTable(Td[0], GETBYTE(t2, 3)) ^ GetTable(Td[1], GETBYTE(t1, 2)) ^ \ + GetTable(Td[2], GETBYTE(t0, 1)) ^ GetTable(Td[3], GETBYTE(t3, 0)) ^ rk[(o)+2]; \ + s3 = GetTable(Td[0], GETBYTE(t3, 3)) ^ GetTable(Td[1], GETBYTE(t2, 2)) ^ \ + GetTable(Td[2], GETBYTE(t1, 1)) ^ GetTable(Td[3], GETBYTE(t0, 0)) ^ rk[(o)+3] +#else +#define DEC_ROUND_T_S(o) \ + GetTable_Multi(Td[0], &t0, GETBYTE(s0, 3), &t1, GETBYTE(s1, 3), \ + &t2, GETBYTE(s2, 3), &t3, GETBYTE(s3, 3)); \ + XorTable_Multi(Td[1], &t0, GETBYTE(s3, 2), &t1, GETBYTE(s0, 2), \ + &t2, GETBYTE(s1, 2), &t3, GETBYTE(s2, 2)); \ + XorTable_Multi(Td[2], &t0, GETBYTE(s2, 1), &t1, GETBYTE(s3, 1), \ + &t2, GETBYTE(s0, 1), &t3, GETBYTE(s1, 1)); \ + XorTable_Multi(Td[3], &t0, GETBYTE(s1, 0), &t1, GETBYTE(s2, 0), \ + &t2, GETBYTE(s3, 0), &t3, GETBYTE(s0, 0)); \ + t0 ^= rk[(o)+4]; t1 ^= rk[(o)+5]; t2 ^= rk[(o)+6]; t3 ^= rk[(o)+7]; +#define DEC_ROUND_S_T(o) \ + GetTable_Multi(Td[0], &s0, GETBYTE(t0, 3), &s1, GETBYTE(t1, 3), \ + &s2, GETBYTE(t2, 3), &s3, GETBYTE(t3, 3)); \ + XorTable_Multi(Td[1], &s0, GETBYTE(t3, 2), &s1, GETBYTE(t0, 2), \ + &s2, GETBYTE(t1, 2), &s3, GETBYTE(t2, 2)); \ + XorTable_Multi(Td[2], &s0, GETBYTE(t2, 1), &s1, GETBYTE(t3, 1), \ + &s2, GETBYTE(t0, 1), &s3, GETBYTE(t1, 1)); \ + XorTable_Multi(Td[3], &s0, GETBYTE(t1, 0), &s1, GETBYTE(t2, 0), \ + &s2, GETBYTE(t3, 0), &s3, GETBYTE(t0, 0)); \ + s0 ^= rk[(o)+0]; s1 ^= rk[(o)+1]; s2 ^= rk[(o)+2]; s3 ^= rk[(o)+3]; +#endif + +#ifndef WOLFSSL_AES_NO_UNROLL DEC_ROUND_T_S( 0); DEC_ROUND_S_T( 8); DEC_ROUND_T_S( 8); DEC_ROUND_S_T(16); DEC_ROUND_T_S(16); @@ -2241,60 +2511,14 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( */ for (;;) { - t0 = - Td[0][GETBYTE(s0, 3)] ^ - Td[1][GETBYTE(s3, 2)] ^ - Td[2][GETBYTE(s2, 1)] ^ - Td[3][GETBYTE(s1, 0)] ^ - rk[4]; - t1 = - Td[0][GETBYTE(s1, 3)] ^ - Td[1][GETBYTE(s0, 2)] ^ - Td[2][GETBYTE(s3, 1)] ^ - Td[3][GETBYTE(s2, 0)] ^ - rk[5]; - t2 = - Td[0][GETBYTE(s2, 3)] ^ - Td[1][GETBYTE(s1, 2)] ^ - Td[2][GETBYTE(s0, 1)] ^ - Td[3][GETBYTE(s3, 0)] ^ - rk[6]; - t3 = - Td[0][GETBYTE(s3, 3)] ^ - Td[1][GETBYTE(s2, 2)] ^ - Td[2][GETBYTE(s1, 1)] ^ - Td[3][GETBYTE(s0, 0)] ^ - rk[7]; + DEC_ROUND_T_S(0); rk += 8; if (--r == 0) { break; } - s0 = - Td[0][GETBYTE(t0, 3)] ^ - Td[1][GETBYTE(t3, 2)] ^ - Td[2][GETBYTE(t2, 1)] ^ - Td[3][GETBYTE(t1, 0)] ^ - rk[0]; - s1 = - Td[0][GETBYTE(t1, 3)] ^ - Td[1][GETBYTE(t0, 2)] ^ - Td[2][GETBYTE(t3, 1)] ^ - Td[3][GETBYTE(t2, 0)] ^ - rk[1]; - s2 = - Td[0][GETBYTE(t2, 3)] ^ - Td[1][GETBYTE(t1, 2)] ^ - Td[2][GETBYTE(t0, 1)] ^ - Td[3][GETBYTE(t3, 0)] ^ - rk[2]; - s3 = - Td[0][GETBYTE(t3, 3)] ^ - Td[1][GETBYTE(t2, 2)] ^ - Td[2][GETBYTE(t1, 1)] ^ - Td[3][GETBYTE(t0, 0)] ^ - rk[3]; + DEC_ROUND_S_T(0); } #endif /* @@ -2306,30 +2530,14 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( t0 |= PreFetchTd4(); #endif - s0 = - ((word32)Td4[GETBYTE(t0, 3)] << 24) ^ - ((word32)Td4[GETBYTE(t3, 2)] << 16) ^ - ((word32)Td4[GETBYTE(t2, 1)] << 8) ^ - ((word32)Td4[GETBYTE(t1, 0)]) ^ - rk[0]; - s1 = - ((word32)Td4[GETBYTE(t1, 3)] << 24) ^ - ((word32)Td4[GETBYTE(t0, 2)] << 16) ^ - ((word32)Td4[GETBYTE(t3, 1)] << 8) ^ - ((word32)Td4[GETBYTE(t2, 0)]) ^ - rk[1]; - s2 = - ((word32)Td4[GETBYTE(t2, 3)] << 24) ^ - ((word32)Td4[GETBYTE(t1, 2)] << 16) ^ - ((word32)Td4[GETBYTE(t0, 1)] << 8) ^ - ((word32)Td4[GETBYTE(t3, 0)]) ^ - rk[2]; - s3 = - ((word32)Td4[GETBYTE(t3, 3)] << 24) ^ - ((word32)Td4[GETBYTE(t2, 2)] << 16) ^ - ((word32)Td4[GETBYTE(t1, 1)] << 8) ^ - ((word32)Td4[GETBYTE(t0, 0)]) ^ - rk[3]; + s0 = GetTable8_4(Td4, GETBYTE(t0, 3), GETBYTE(t3, 2), + GETBYTE(t2, 1), GETBYTE(t1, 0)) ^ rk[0]; + s1 = GetTable8_4(Td4, GETBYTE(t1, 3), GETBYTE(t0, 2), + GETBYTE(t3, 1), GETBYTE(t2, 0)) ^ rk[1]; + s2 = GetTable8_4(Td4, GETBYTE(t2, 3), GETBYTE(t1, 2), + GETBYTE(t0, 1), GETBYTE(t3, 0)) ^ rk[2]; + s3 = GetTable8_4(Td4, GETBYTE(t3, 3), GETBYTE(t2, 2), + GETBYTE(t1, 1), GETBYTE(t0, 0)) ^ rk[3]; #else #ifndef WC_NO_CACHE_RESISTANT s0 |= PreFetchTd4(); @@ -2338,28 +2546,28 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( r *= 2; for (rk += 4; r > 1; r--, rk += 4) { t0 = - ((word32)Td4[GETBYTE(s0, 3)] << 24) ^ - ((word32)Td4[GETBYTE(s3, 2)] << 16) ^ - ((word32)Td4[GETBYTE(s2, 1)] << 8) ^ - ((word32)Td4[GETBYTE(s1, 0)]) ^ + ((word32)GetTable8(Td4, GETBYTE(s0, 3)) << 24) ^ + ((word32)GetTable8(Td4, GETBYTE(s3, 2)) << 16) ^ + ((word32)GetTable8(Td4, GETBYTE(s2, 1)) << 8) ^ + ((word32)GetTable8(Td4, GETBYTE(s1, 0))) ^ rk[0]; t1 = - ((word32)Td4[GETBYTE(s1, 3)] << 24) ^ - ((word32)Td4[GETBYTE(s0, 2)] << 16) ^ - ((word32)Td4[GETBYTE(s3, 1)] << 8) ^ - ((word32)Td4[GETBYTE(s2, 0)]) ^ + ((word32)GetTable8(Td4, GETBYTE(s1, 3)) << 24) ^ + ((word32)GetTable8(Td4, GETBYTE(s0, 2)) << 16) ^ + ((word32)GetTable8(Td4, GETBYTE(s3, 1)) << 8) ^ + ((word32)GetTable8(Td4, GETBYTE(s2, 0))) ^ rk[1]; t2 = - ((word32)Td4[GETBYTE(s2, 3)] << 24) ^ - ((word32)Td4[GETBYTE(s1, 2)] << 16) ^ - ((word32)Td4[GETBYTE(s0, 1)] << 8) ^ - ((word32)Td4[GETBYTE(s3, 0)]) ^ + ((word32)GetTable8(Td4, GETBYTE(s2, 3)) << 24) ^ + ((word32)GetTable8(Td4, GETBYTE(s1, 2)) << 16) ^ + ((word32)GetTable8(Td4, GETBYTE(s0, 1)) << 8) ^ + ((word32)GetTable8(Td4, GETBYTE(s3, 0))) ^ rk[2]; t3 = - ((word32)Td4[GETBYTE(s3, 3)] << 24) ^ - ((word32)Td4[GETBYTE(s2, 2)] << 16) ^ - ((word32)Td4[GETBYTE(s1, 1)] << 8) ^ - ((word32)Td4[GETBYTE(s0, 0)]) ^ + ((word32)GetTable8(Td4, GETBYTE(s3, 3)) << 24) ^ + ((word32)GetTable8(Td4, GETBYTE(s2, 2)) << 16) ^ + ((word32)GetTable8(Td4, GETBYTE(s1, 1)) << 8) ^ + ((word32)GetTable8(Td4, GETBYTE(s0, 0))) ^ rk[3]; s0 = @@ -2385,25 +2593,25 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( } t0 = - ((word32)Td4[GETBYTE(s0, 3)] << 24) ^ - ((word32)Td4[GETBYTE(s3, 2)] << 16) ^ - ((word32)Td4[GETBYTE(s2, 1)] << 8) ^ - ((word32)Td4[GETBYTE(s1, 0)]); + ((word32)GetTable8(Td4, GETBYTE(s0, 3)) << 24) ^ + ((word32)GetTable8(Td4, GETBYTE(s3, 2)) << 16) ^ + ((word32)GetTable8(Td4, GETBYTE(s2, 1)) << 8) ^ + ((word32)GetTable8(Td4, GETBYTE(s1, 0))); t1 = - ((word32)Td4[GETBYTE(s1, 3)] << 24) ^ - ((word32)Td4[GETBYTE(s0, 2)] << 16) ^ - ((word32)Td4[GETBYTE(s3, 1)] << 8) ^ - ((word32)Td4[GETBYTE(s2, 0)]); + ((word32)GetTable8(Td4, GETBYTE(s1, 3)) << 24) ^ + ((word32)GetTable8(Td4, GETBYTE(s0, 2)) << 16) ^ + ((word32)GetTable8(Td4, GETBYTE(s3, 1)) << 8) ^ + ((word32)GetTable8(Td4, GETBYTE(s2, 0))); t2 = - ((word32)Td4[GETBYTE(s2, 3)] << 24) ^ - ((word32)Td4[GETBYTE(s1, 2)] << 16) ^ - ((word32)Td4[GETBYTE(s0, 1)] << 8) ^ - ((word32)Td4[GETBYTE(s3, 0)]); + ((word32)GetTable8(Td4, GETBYTE(s2, 3)) << 24) ^ + ((word32)GetTable8(Td4, GETBYTE(s1, 2)) << 16) ^ + ((word32)GetTable8(Td4, GETBYTE(s0, 1)) << 8) ^ + ((word32)GetTable8(Td4, GETBYTE(s3, 0))); t3 = - ((word32)Td4[GETBYTE(s3, 3)] << 24) ^ - ((word32)Td4[GETBYTE(s2, 2)] << 16) ^ - ((word32)Td4[GETBYTE(s1, 1)] << 8) ^ - ((word32)Td4[GETBYTE(s0, 0)]); + ((word32)GetTable8(Td4, GETBYTE(s3, 3)) << 24) ^ + ((word32)GetTable8(Td4, GETBYTE(s2, 2)) << 16) ^ + ((word32)GetTable8(Td4, GETBYTE(s1, 1)) << 8) ^ + ((word32)GetTable8(Td4, GETBYTE(s0, 0))); s0 = t0 ^ rk[0]; s1 = t1 ^ rk[1]; s2 = t2 ^ rk[2]; @@ -2996,15 +3204,15 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( temp = rk[3]; rk[4] = rk[0] ^ #ifndef WOLFSSL_AES_SMALL_TABLES - (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^ - (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^ - (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^ - (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^ + (GetTable(Te[2], GETBYTE(temp, 2)) & 0xff000000) ^ + (GetTable(Te[3], GETBYTE(temp, 1)) & 0x00ff0000) ^ + (GetTable(Te[0], GETBYTE(temp, 0)) & 0x0000ff00) ^ + (GetTable(Te[1], GETBYTE(temp, 3)) & 0x000000ff) ^ #else - ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^ - ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^ - ((word32)Tsbox[GETBYTE(temp, 0)] << 8) ^ - ((word32)Tsbox[GETBYTE(temp, 3)]) ^ + ((word32)GetTable8(Tsbox, GETBYTE(temp, 2)) << 24) ^ + ((word32)GetTable8(Tsbox, GETBYTE(temp, 1)) << 16) ^ + ((word32)GetTable8(Tsbox, GETBYTE(temp, 0)) << 8) ^ + ((word32)GetTable8(Tsbox, GETBYTE(temp, 3))) ^ #endif rcon[i]; rk[5] = rk[1] ^ rk[4]; @@ -3030,15 +3238,15 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( temp = rk[ 5]; rk[ 6] = rk[ 0] ^ #ifndef WOLFSSL_AES_SMALL_TABLES - (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^ - (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^ - (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^ - (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^ + (GetTable(Te[2], GETBYTE(temp, 2)) & 0xff000000) ^ + (GetTable(Te[3], GETBYTE(temp, 1)) & 0x00ff0000) ^ + (GetTable(Te[0], GETBYTE(temp, 0)) & 0x0000ff00) ^ + (GetTable(Te[1], GETBYTE(temp, 3)) & 0x000000ff) ^ #else - ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^ - ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^ - ((word32)Tsbox[GETBYTE(temp, 0)] << 8) ^ - ((word32)Tsbox[GETBYTE(temp, 3)]) ^ + ((word32)GetTable8(Tsbox, GETBYTE(temp, 2)) << 24) ^ + ((word32)GetTable8(Tsbox, GETBYTE(temp, 1)) << 16) ^ + ((word32)GetTable8(Tsbox, GETBYTE(temp, 0)) << 8) ^ + ((word32)GetTable8(Tsbox, GETBYTE(temp, 3))) ^ #endif rcon[i]; rk[ 7] = rk[ 1] ^ rk[ 6]; @@ -3065,15 +3273,15 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( temp = rk[ 7]; rk[ 8] = rk[ 0] ^ #ifndef WOLFSSL_AES_SMALL_TABLES - (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^ - (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^ - (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^ - (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^ + (GetTable(Te[2], GETBYTE(temp, 2)) & 0xff000000) ^ + (GetTable(Te[3], GETBYTE(temp, 1)) & 0x00ff0000) ^ + (GetTable(Te[0], GETBYTE(temp, 0)) & 0x0000ff00) ^ + (GetTable(Te[1], GETBYTE(temp, 3)) & 0x000000ff) ^ #else - ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^ - ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^ - ((word32)Tsbox[GETBYTE(temp, 0)] << 8) ^ - ((word32)Tsbox[GETBYTE(temp, 3)]) ^ + ((word32)GetTable8(Tsbox, GETBYTE(temp, 2)) << 24) ^ + ((word32)GetTable8(Tsbox, GETBYTE(temp, 1)) << 16) ^ + ((word32)GetTable8(Tsbox, GETBYTE(temp, 0)) << 8) ^ + ((word32)GetTable8(Tsbox, GETBYTE(temp, 3))) ^ #endif rcon[i]; rk[ 9] = rk[ 1] ^ rk[ 8]; @@ -3084,15 +3292,15 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( temp = rk[11]; rk[12] = rk[ 4] ^ #ifndef WOLFSSL_AES_SMALL_TABLES - (Te[2][GETBYTE(temp, 3)] & 0xff000000) ^ - (Te[3][GETBYTE(temp, 2)] & 0x00ff0000) ^ - (Te[0][GETBYTE(temp, 1)] & 0x0000ff00) ^ - (Te[1][GETBYTE(temp, 0)] & 0x000000ff); + (GetTable(Te[2], GETBYTE(temp, 3)) & 0xff000000) ^ + (GetTable(Te[3], GETBYTE(temp, 2)) & 0x00ff0000) ^ + (GetTable(Te[0], GETBYTE(temp, 1)) & 0x0000ff00) ^ + (GetTable(Te[1], GETBYTE(temp, 0)) & 0x000000ff); #else - ((word32)Tsbox[GETBYTE(temp, 3)] << 24) ^ - ((word32)Tsbox[GETBYTE(temp, 2)] << 16) ^ - ((word32)Tsbox[GETBYTE(temp, 1)] << 8) ^ - ((word32)Tsbox[GETBYTE(temp, 0)]); + ((word32)GetTable8(Tsbox, GETBYTE(temp, 3)) << 24) ^ + ((word32)GetTable8(Tsbox, GETBYTE(temp, 2)) << 16) ^ + ((word32)GetTable8(Tsbox, GETBYTE(temp, 1)) << 8) ^ + ((word32)GetTable8(Tsbox, GETBYTE(temp, 0))); #endif rk[13] = rk[ 5] ^ rk[12]; rk[14] = rk[ 6] ^ rk[13]; @@ -3127,25 +3335,25 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( for (i = 1; i < aes->rounds; i++) { rk += 4; rk[0] = - Td[0][Te[1][GETBYTE(rk[0], 3)] & 0xff] ^ - Td[1][Te[1][GETBYTE(rk[0], 2)] & 0xff] ^ - Td[2][Te[1][GETBYTE(rk[0], 1)] & 0xff] ^ - Td[3][Te[1][GETBYTE(rk[0], 0)] & 0xff]; + GetTable(Td[0], GetTable(Te[1], GETBYTE(rk[0], 3)) & 0xff) ^ + GetTable(Td[1], GetTable(Te[1], GETBYTE(rk[0], 2)) & 0xff) ^ + GetTable(Td[2], GetTable(Te[1], GETBYTE(rk[0], 1)) & 0xff) ^ + GetTable(Td[3], GetTable(Te[1], GETBYTE(rk[0], 0)) & 0xff); rk[1] = - Td[0][Te[1][GETBYTE(rk[1], 3)] & 0xff] ^ - Td[1][Te[1][GETBYTE(rk[1], 2)] & 0xff] ^ - Td[2][Te[1][GETBYTE(rk[1], 1)] & 0xff] ^ - Td[3][Te[1][GETBYTE(rk[1], 0)] & 0xff]; + GetTable(Td[0], GetTable(Te[1], GETBYTE(rk[1], 3)) & 0xff) ^ + GetTable(Td[1], GetTable(Te[1], GETBYTE(rk[1], 2)) & 0xff) ^ + GetTable(Td[2], GetTable(Te[1], GETBYTE(rk[1], 1)) & 0xff) ^ + GetTable(Td[3], GetTable(Te[1], GETBYTE(rk[1], 0)) & 0xff); rk[2] = - Td[0][Te[1][GETBYTE(rk[2], 3)] & 0xff] ^ - Td[1][Te[1][GETBYTE(rk[2], 2)] & 0xff] ^ - Td[2][Te[1][GETBYTE(rk[2], 1)] & 0xff] ^ - Td[3][Te[1][GETBYTE(rk[2], 0)] & 0xff]; + GetTable(Td[0], GetTable(Te[1], GETBYTE(rk[2], 3)) & 0xff) ^ + GetTable(Td[1], GetTable(Te[1], GETBYTE(rk[2], 2)) & 0xff) ^ + GetTable(Td[2], GetTable(Te[1], GETBYTE(rk[2], 1)) & 0xff) ^ + GetTable(Td[3], GetTable(Te[1], GETBYTE(rk[2], 0)) & 0xff); rk[3] = - Td[0][Te[1][GETBYTE(rk[3], 3)] & 0xff] ^ - Td[1][Te[1][GETBYTE(rk[3], 2)] & 0xff] ^ - Td[2][Te[1][GETBYTE(rk[3], 1)] & 0xff] ^ - Td[3][Te[1][GETBYTE(rk[3], 0)] & 0xff]; + GetTable(Td[0], GetTable(Te[1], GETBYTE(rk[3], 3)) & 0xff) ^ + GetTable(Td[1], GetTable(Te[1], GETBYTE(rk[3], 2)) & 0xff) ^ + GetTable(Td[2], GetTable(Te[1], GETBYTE(rk[3], 1)) & 0xff) ^ + GetTable(Td[3], GetTable(Te[1], GETBYTE(rk[3], 0)) & 0xff); } #endif }