diff --git a/wolfcrypt/src/port/arm/armv8-aes.c b/wolfcrypt/src/port/arm/armv8-aes.c index 59a61f616..6b36b0c5e 100644 --- a/wolfcrypt/src/port/arm/armv8-aes.c +++ b/wolfcrypt/src/port/arm/armv8-aes.c @@ -888,536 +888,568 @@ int wc_AesSetIV(Aes* aes, const byte* iv) /* AES-CTR */ #ifdef WOLFSSL_AES_COUNTER - - /* Increment AES counter */ - static WC_INLINE void IncrementAesCounter(byte* inOutCtr) - { - int i; - - /* in network byte order so start at end and work back */ - for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) { - if (++inOutCtr[i]) /* we're done unless we overflow */ - return; - } - } - - int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) - { - byte* tmp; - word32 numBlocks; - - if (aes == NULL || out == NULL || in == NULL) { - return BAD_FUNC_ARG; - } - - tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; - - /* consume any unused bytes left in aes->tmp */ - while (aes->left && sz) { - *(out++) = *(in++) ^ *(tmp++); - aes->left--; - sz--; - } - - /* do as many block size ops as possible */ - numBlocks = sz/AES_BLOCK_SIZE; - if (numBlocks > 0) { - /* pointer needed because it is incremented when read, causing - * an issue with call to encrypt/decrypt leftovers */ - byte* keyPt = (byte*)aes->key; - sz -= numBlocks * AES_BLOCK_SIZE; - switch(aes->rounds) { +static void wc_aes_ctr_encrypt_asm(Aes* aes, byte* out, const byte* in, + byte* keyPt, word32 numBlocks) +{ + switch(aes->rounds) { #ifdef WOLFSSL_AES_128 - case 10: /* AES 128 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + case 10: /* AES 128 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" - "#Create vector with the value 1 \n" - "MOVI v15.16b, #1 \n" - "USHR v15.2d, v15.2d, #56 \n" - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "EOR v14.16b, v14.16b, v14.16b \n" - "EXT v14.16b, v15.16b, v14.16b, #8\n" + "#Create vector with the value 1 \n" + "MOVI v15.16b, #1 \n" + "USHR v15.2d, v15.2d, #56 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EOR v14.16b, v14.16b, v14.16b \n" + "EXT v14.16b, v15.16b, v14.16b, #8\n" - "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" - "LD1 {v13.2d}, %[reg] \n" + "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n" + "LD1 {v13.2d}, %[reg] \n" - /* double block */ - "1: \n" - "CMP w11, #1 \n" - "BEQ 2f \n" - "CMP w11, #0 \n" - "BEQ 3f \n" + /* double block */ + "1: \n" + "CMP w11, #1 \n" + "BEQ 2f \n" + "CMP w11, #0 \n" + "BEQ 3f \n" - "MOV v0.16b, v13.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v13.16b, v13.16b \n" /* network order */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "SUB w11, w11, #2 \n" - "ADD v15.2d, v13.2d, v14.2d \n" /* add 1 to counter */ - "ADD v13.2d, v15.2d, v14.2d \n" /* add 1 to counter */ + "MOV v0.16b, v13.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v13.16b, v13.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "SUB w11, w11, #2 \n" + "ADD v15.2d, v13.2d, v14.2d \n" /* add 1 to counter */ + "CMEQ v12.2d, v15.2d, #0 \n" + "EXT v12.16b, v14.16b, v12.16b, #8 \n" + "SUB v15.2d, v15.2d, v12.2d \n" + "ADD v13.2d, v15.2d, v14.2d \n" /* add 1 to counter */ + "CMEQ v12.2d, v13.2d, #0 \n" + "EXT v12.16b, v14.16b, v12.16b, #8 \n" + "SUB v13.2d, v13.2d, v12.2d \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v15.16b, v15.16b, v15.16b, #8 \n" - "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v15.16b, v15.16b \n" /* revert from network order */ - "REV64 v13.16b, v13.16b \n" /* revert from network order */ + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v15.16b, v15.16b \n" /* revert from network order */ + "REV64 v13.16b, v13.16b \n" /* revert from network order */ - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v15.16b, v1.16b \n" - "AESMC v15.16b, v15.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v15.16b, v1.16b \n" + "AESMC v15.16b, v15.16b \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v15.16b, v2.16b \n" - "AESMC v15.16b, v15.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v15.16b, v2.16b \n" + "AESMC v15.16b, v15.16b \n" - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v15.16b, v3.16b \n" - "AESMC v15.16b, v15.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v15.16b, v3.16b \n" + "AESMC v15.16b, v15.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v15.16b, v4.16b \n" - "AESMC v15.16b, v15.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v15.16b, v4.16b \n" + "AESMC v15.16b, v15.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v15.16b, v5.16b \n" - "AESMC v15.16b, v15.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v15.16b, v5.16b \n" + "AESMC v15.16b, v15.16b \n" - "AESE v0.16b, v10.16b \n" - "AESE v15.16b, v6.16b \n" - "AESMC v15.16b, v15.16b \n" + "AESE v0.16b, v10.16b \n" + "AESE v15.16b, v6.16b \n" + "AESMC v15.16b, v15.16b \n" - "EOR v0.16b, v0.16b, v11.16b \n" - "AESE v15.16b, v7.16b \n" - "AESMC v15.16b, v15.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n" + "AESE v15.16b, v7.16b \n" + "AESMC v15.16b, v15.16b \n" - "LD1 {v12.2d}, [%[input]], #16 \n" - "AESE v15.16b, v8.16b \n" - "AESMC v15.16b, v15.16b \n" + "LD1 {v12.2d}, [%[input]], #16 \n" + "AESE v15.16b, v8.16b \n" + "AESMC v15.16b, v15.16b \n" - "EOR v0.16b, v0.16b, v12.16b \n" - "AESE v15.16b, v9.16b \n" - "AESMC v15.16b, v15.16b \n" + "EOR v0.16b, v0.16b, v12.16b \n" + "AESE v15.16b, v9.16b \n" + "AESMC v15.16b, v15.16b \n" - "LD1 {v12.2d}, [%[input]], #16 \n" - "AESE v15.16b, v10.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - "EOR v15.16b, v15.16b, v11.16b \n" - "EOR v15.16b, v15.16b, v12.16b \n" - "ST1 {v15.2d}, [%[out]], #16 \n" + "LD1 {v12.2d}, [%[input]], #16 \n" + "AESE v15.16b, v10.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "EOR v15.16b, v15.16b, v11.16b \n" + "EOR v15.16b, v15.16b, v12.16b \n" + "ST1 {v15.2d}, [%[out]], #16 \n" - "B 1b \n" + "B 1b \n" - /* single block */ - "2: \n" - "MOV v0.16b, v13.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v13.16b, v13.16b \n" /* network order */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "SUB w11, w11, #1 \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v13.16b, v13.16b, v13.16b, #8 \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v13.16b, v13.16b \n" /* revert from network order */ - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "EOR v0.16b, v0.16b, v11.16b \n" - "#CTR operations, increment counter and xorbuf \n" - "LD1 {v12.2d}, [%[input]], #16 \n" - "EOR v0.16b, v0.16b, v12.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" + /* single block */ + "2: \n" + "MOV v0.16b, v13.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v13.16b, v13.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */ + "CMEQ v15.2d, v13.2d, #0 \n" + "EXT v15.16b, v14.16b, v15.16b, #8 \n" + "SUB v13.2d, v13.2d, v15.2d \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #1 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v13.16b, v13.16b, v13.16b, #8 \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v13.16b, v13.16b \n" /* revert from network order */ + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "EOR v0.16b, v0.16b, v11.16b \n" + "#CTR operations, increment counter and xorbuf \n" + "LD1 {v12.2d}, [%[input]], #16 \n" + "EOR v0.16b, v0.16b, v12.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" - "3: \n" - "#store current counter value at the end \n" - "ST1 {v13.2d}, %[regOut] \n" + "3: \n" + "#store current counter value at the end \n" + "ST1 {v13.2d}, %[regOut] \n" - :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg), - "=r" (in) - :"0" (out), [Key] "1" (keyPt), [input] "3" (in), - [blocks] "r" (numBlocks), [reg] "m" (aes->reg) - : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15" - ); - break; + :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg), + "=r" (in) + :"0" (out), [Key] "1" (keyPt), [input] "3" (in), + [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15" + ); + break; #endif /* WOLFSSL_AES_128 */ #ifdef WOLFSSL_AES_192 - case 12: /* AES 192 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + case 12: /* AES 192 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" - "#Create vector with the value 1 \n" - "MOVI v16.16b, #1 \n" - "USHR v16.2d, v16.2d, #56 \n" - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "EOR v14.16b, v14.16b, v14.16b \n" - "EXT v16.16b, v16.16b, v14.16b, #8\n" + "#Create vector with the value 1 \n" + "MOVI v16.16b, #1 \n" + "USHR v16.2d, v16.2d, #56 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EOR v14.16b, v14.16b, v14.16b \n" + "EXT v16.16b, v16.16b, v14.16b, #8\n" - "LD1 {v9.2d-v12.2d}, [%[Key]], #64\n" - "LD1 {v15.2d}, %[reg] \n" - "LD1 {v13.16b}, [%[Key]], #16 \n" + "LD1 {v9.2d-v12.2d}, [%[Key]], #64\n" + "LD1 {v15.2d}, %[reg] \n" + "LD1 {v13.16b}, [%[Key]], #16 \n" - /* double block */ - "1: \n" - "CMP w11, #1 \n" - "BEQ 2f \n" - "CMP w11, #0 \n" - "BEQ 3f \n" + /* double block */ + "1: \n" + "CMP w11, #1 \n" + "BEQ 2f \n" + "CMP w11, #0 \n" + "BEQ 3f \n" - "MOV v0.16b, v15.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v15.16b, v15.16b \n" /* network order */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v15.16b, v15.16b, v15.16b, #8 \n" - "SUB w11, w11, #2 \n" - "ADD v17.2d, v15.2d, v16.2d \n" /* add 1 to counter */ - "ADD v15.2d, v17.2d, v16.2d \n" /* add 1 to counter */ + "MOV v0.16b, v15.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v15.16b, v15.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "SUB w11, w11, #2 \n" + "ADD v17.2d, v15.2d, v16.2d \n" /* add 1 to counter */ + "CMEQ v14.2d, v17.2d, #0 \n" + "EXT v14.16b, v16.16b, v14.16b, #8 \n" + "SUB v17.2d, v17.2d, v14.2d \n" + "ADD v15.2d, v17.2d, v16.2d \n" /* add 1 to counter */ + "CMEQ v14.2d, v15.2d, #0 \n" + "EXT v14.16b, v16.16b, v14.16b, #8 \n" + "SUB v15.2d, v15.2d, v14.2d \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v17.16b, v17.16b, v17.16b, #8 \n" - "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v17.16b, v17.16b \n" /* revert from network order */ - "REV64 v15.16b, v15.16b \n" /* revert from network order */ + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v17.16b, v17.16b \n" /* revert from network order */ + "REV64 v15.16b, v15.16b \n" /* revert from network order */ - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v17.16b, v1.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v1.16b \n" + "AESMC v17.16b, v17.16b \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v17.16b, v2.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v2.16b \n" + "AESMC v17.16b, v17.16b \n" - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v17.16b, v3.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v3.16b \n" + "AESMC v17.16b, v17.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v17.16b, v4.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v4.16b \n" + "AESMC v17.16b, v17.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v17.16b, v5.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v5.16b \n" + "AESMC v17.16b, v17.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v17.16b, v6.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v6.16b \n" + "AESMC v17.16b, v17.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v17.16b, v7.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v17.16b, v7.16b \n" + "AESMC v17.16b, v17.16b \n" - "AESE v0.16b, v12.16b \n" - "AESE v17.16b, v8.16b \n" - "AESMC v17.16b, v17.16b \n" + "AESE v0.16b, v12.16b \n" + "AESE v17.16b, v8.16b \n" + "AESMC v17.16b, v17.16b \n" - "EOR v0.16b, v0.16b, v13.16b \n" - "AESE v17.16b, v9.16b \n" - "AESMC v17.16b, v17.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n" + "AESE v17.16b, v9.16b \n" + "AESMC v17.16b, v17.16b \n" - "LD1 {v14.2d}, [%[input]], #16 \n" - "AESE v17.16b, v10.16b \n" - "AESMC v17.16b, v17.16b \n" + "LD1 {v14.2d}, [%[input]], #16 \n" + "AESE v17.16b, v10.16b \n" + "AESMC v17.16b, v17.16b \n" - "EOR v0.16b, v0.16b, v14.16b \n" - "AESE v17.16b, v11.16b \n" - "AESMC v17.16b, v17.16b \n" + "EOR v0.16b, v0.16b, v14.16b \n" + "AESE v17.16b, v11.16b \n" + "AESMC v17.16b, v17.16b \n" - "LD1 {v14.2d}, [%[input]], #16 \n" - "AESE v17.16b, v12.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - "EOR v17.16b, v17.16b, v13.16b \n" - "EOR v17.16b, v17.16b, v14.16b \n" - "ST1 {v17.2d}, [%[out]], #16 \n" + "LD1 {v14.2d}, [%[input]], #16 \n" + "AESE v17.16b, v12.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "EOR v17.16b, v17.16b, v13.16b \n" + "EOR v17.16b, v17.16b, v14.16b \n" + "ST1 {v17.2d}, [%[out]], #16 \n" - "B 1b \n" + "B 1b \n" - "2: \n" - "LD1 {v14.2d}, [%[input]], #16 \n" - "MOV v0.16b, v15.16b \n" + "2: \n" + "LD1 {v14.2d}, [%[input]], #16 \n" + "MOV v0.16b, v15.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v15.16b, v15.16b \n" /* network order */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v15.16b, v15.16b, v15.16b, #8 \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "ADD v15.2d, v15.2d, v16.2d \n" /* add 1 to counter */ - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "SUB w11, w11, #1 \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v15.16b, v15.16b, v15.16b, #8 \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v15.16b, v15.16b \n" /* revert from network order */ - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v12.16b \n" - "EOR v0.16b, v0.16b, v13.16b \n" - "#CTR operations, increment counter and xorbuf \n" - "EOR v0.16b, v0.16b, v14.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v15.16b, v15.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "ADD v15.2d, v15.2d, v16.2d \n" /* add 1 to counter */ + "CMEQ v17.2d, v15.2d, #0 \n" + "EXT v17.16b, v16.16b, v17.16b, #8 \n" + "SUB v15.2d, v15.2d, v17.2d \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "SUB w11, w11, #1 \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v15.16b, v15.16b, v15.16b, #8 \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v15.16b, v15.16b \n" /* revert from network order */ + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "EOR v0.16b, v0.16b, v13.16b \n" + "#CTR operations, increment counter and xorbuf \n" + "EOR v0.16b, v0.16b, v14.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" - "3: \n" - "#store current counter value at the end \n" - "ST1 {v15.2d}, %[regOut] \n" + "3: \n" + "#store current counter value at the end \n" + "ST1 {v15.2d}, %[regOut] \n" - :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg), - "=r" (in) - :"0" (out), [Key] "1" (keyPt), [input] "3" (in), - [blocks] "r" (numBlocks), [reg] "m" (aes->reg) - : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15", - "v16", "v17" - ); - break; + :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg), + "=r" (in) + :"0" (out), [Key] "1" (keyPt), [input] "3" (in), + [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15", + "v16", "v17" + ); + break; #endif /* WOLFSSL_AES_192 */ #ifdef WOLFSSL_AES_256 - case 14: /* AES 256 BLOCK */ - __asm__ __volatile__ ( - "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + case 14: /* AES 256 BLOCK */ + __asm__ __volatile__ ( + "MOV w11, %w[blocks] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" - "#Create vector with the value 1 \n" - "MOVI v18.16b, #1 \n" - "USHR v18.2d, v18.2d, #56 \n" - "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" - "EOR v19.16b, v19.16b, v19.16b \n" - "EXT v18.16b, v18.16b, v19.16b, #8\n" + "#Create vector with the value 1 \n" + "MOVI v18.16b, #1 \n" + "USHR v18.2d, v18.2d, #56 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "EOR v19.16b, v19.16b, v19.16b \n" + "EXT v18.16b, v18.16b, v19.16b, #8\n" - "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n" - "LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n" - "LD1 {v17.2d}, %[reg] \n" + "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n" + "LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n" + "LD1 {v17.2d}, %[reg] \n" - /* double block */ - "1: \n" - "CMP w11, #1 \n" - "BEQ 2f \n" - "CMP w11, #0 \n" - "BEQ 3f \n" + /* double block */ + "1: \n" + "CMP w11, #1 \n" + "BEQ 2f \n" + "CMP w11, #0 \n" + "BEQ 3f \n" - "MOV v0.16b, v17.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v17.16b, v17.16b \n" /* network order */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v17.16b, v17.16b, v17.16b, #8 \n" - "SUB w11, w11, #2 \n" - "ADD v19.2d, v17.2d, v18.2d \n" /* add 1 to counter */ - "ADD v17.2d, v19.2d, v18.2d \n" /* add 1 to counter */ + "MOV v0.16b, v17.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v17.16b, v17.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "SUB w11, w11, #2 \n" + "ADD v19.2d, v17.2d, v18.2d \n" /* add 1 to counter */ + "CMEQ v16.2d, v19.2d, #0 \n" + "EXT v16.16b, v18.16b, v16.16b, #8 \n" + "SUB v19.2d, v19.2d, v16.2d \n" + "ADD v17.2d, v19.2d, v18.2d \n" /* add 1 to counter */ + "CMEQ v16.2d, v17.2d, #0 \n" + "EXT v16.16b, v18.16b, v16.16b, #8 \n" + "SUB v17.2d, v17.2d, v16.2d \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v19.16b, v19.16b, v19.16b, #8 \n" - "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v19.16b, v19.16b, v19.16b, #8 \n" + "EXT v17.16b, v17.16b, v17.16b, #8 \n" - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v19.16b, v19.16b \n" /* revert from network order */ - "REV64 v17.16b, v17.16b \n" /* revert from network order */ + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v19.16b, v19.16b \n" /* revert from network order */ + "REV64 v17.16b, v17.16b \n" /* revert from network order */ - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v1.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v1.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v2.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v2.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v3.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v3.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v4.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v4.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v5.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v5.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v6.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v6.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v7.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v7.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v12.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v8.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v8.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v13.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v19.16b, v9.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v19.16b, v9.16b \n" + "AESMC v19.16b, v19.16b \n" - "AESE v0.16b, v14.16b \n" - "AESE v19.16b, v10.16b \n" - "AESMC v19.16b, v19.16b \n" + "AESE v0.16b, v14.16b \n" + "AESE v19.16b, v10.16b \n" + "AESMC v19.16b, v19.16b \n" - "EOR v0.16b, v0.16b, v15.16b \n" - "AESE v19.16b, v11.16b \n" - "AESMC v19.16b, v19.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "AESE v19.16b, v11.16b \n" + "AESMC v19.16b, v19.16b \n" - "LD1 {v16.2d}, [%[input]], #16 \n" - "AESE v19.16b, v12.16b \n" - "AESMC v19.16b, v19.16b \n" + "LD1 {v16.2d}, [%[input]], #16 \n" + "AESE v19.16b, v12.16b \n" + "AESMC v19.16b, v19.16b \n" - "EOR v0.16b, v0.16b, v16.16b \n" - "AESE v19.16b, v13.16b \n" - "AESMC v19.16b, v19.16b \n" + "EOR v0.16b, v0.16b, v16.16b \n" + "AESE v19.16b, v13.16b \n" + "AESMC v19.16b, v19.16b \n" - "LD1 {v16.2d}, [%[input]], #16 \n" - "AESE v19.16b, v14.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" - "EOR v19.16b, v19.16b, v15.16b \n" - "EOR v19.16b, v19.16b, v16.16b \n" - "ST1 {v19.2d}, [%[out]], #16 \n" + "LD1 {v16.2d}, [%[input]], #16 \n" + "AESE v19.16b, v14.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" + "EOR v19.16b, v19.16b, v15.16b \n" + "EOR v19.16b, v19.16b, v16.16b \n" + "ST1 {v19.2d}, [%[out]], #16 \n" - "B 1b \n" + "B 1b \n" - "2: \n" - "LD1 {v16.2d}, [%[input]], #16 \n" - "MOV v0.16b, v17.16b \n" - "AESE v0.16b, v1.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v17.16b, v17.16b \n" /* network order */ - "AESE v0.16b, v2.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v17.16b, v17.16b, v17.16b, #8 \n" - "AESE v0.16b, v3.16b \n" - "AESMC v0.16b, v0.16b \n" - "ADD v17.2d, v17.2d, v18.2d \n" /* add 1 to counter */ - "AESE v0.16b, v4.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v5.16b \n" - "AESMC v0.16b, v0.16b \n" - "EXT v17.16b, v17.16b, v17.16b, #8 \n" - "AESE v0.16b, v6.16b \n" - "AESMC v0.16b, v0.16b \n" - "REV64 v17.16b, v17.16b \n" /* revert from network order */ - "AESE v0.16b, v7.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v8.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v9.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v10.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v11.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v12.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v13.16b \n" - "AESMC v0.16b, v0.16b \n" - "AESE v0.16b, v14.16b \n" - "EOR v0.16b, v0.16b, v15.16b \n" - "#CTR operations, increment counter and xorbuf \n" - "EOR v0.16b, v0.16b, v16.16b \n" - "ST1 {v0.2d}, [%[out]], #16 \n" + "2: \n" + "LD1 {v16.2d}, [%[input]], #16 \n" + "MOV v0.16b, v17.16b \n" + "AESE v0.16b, v1.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v17.16b, v17.16b \n" /* network order */ + "AESE v0.16b, v2.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "AESE v0.16b, v3.16b \n" + "AESMC v0.16b, v0.16b \n" + "ADD v17.2d, v17.2d, v18.2d \n" /* add 1 to counter */ + "CMEQ v19.2d, v17.2d, #0 \n" + "EXT v19.16b, v18.16b, v19.16b, #8 \n" + "SUB v17.2d, v17.2d, v19.2d \n" + "AESE v0.16b, v4.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v5.16b \n" + "AESMC v0.16b, v0.16b \n" + "EXT v17.16b, v17.16b, v17.16b, #8 \n" + "AESE v0.16b, v6.16b \n" + "AESMC v0.16b, v0.16b \n" + "REV64 v17.16b, v17.16b \n" /* revert from network order */ + "AESE v0.16b, v7.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v8.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v9.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v10.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v11.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v12.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v13.16b \n" + "AESMC v0.16b, v0.16b \n" + "AESE v0.16b, v14.16b \n" + "EOR v0.16b, v0.16b, v15.16b \n" + "#CTR operations, increment counter and xorbuf \n" + "EOR v0.16b, v0.16b, v16.16b \n" + "ST1 {v0.2d}, [%[out]], #16 \n" - "3: \n" - "#store current counter value at the end \n" - "ST1 {v17.2d}, %[regOut] \n" + "3: \n" + "#store current counter value at the end \n" + "ST1 {v17.2d}, %[regOut] \n" - :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg), - "=r" (in) - :"0" (out), [Key] "1" (keyPt), [input] "3" (in), - [blocks] "r" (numBlocks), [reg] "m" (aes->reg) - : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", - "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15", - "v16", "v17", "v18", "v19" - ); - break; + :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg), + "=r" (in) + :"0" (out), [Key] "1" (keyPt), [input] "3" (in), + [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", + "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15", + "v16", "v17", "v18", "v19" + ); + break; #endif /* WOLFSSL_AES_256 */ - default: - WOLFSSL_MSG("Bad AES-CTR round value"); - return BAD_FUNC_ARG; - } + } +} - aes->left = 0; - } +int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + byte* tmp; + word32 numBlocks; - /* handle non block size remaining */ - if (sz) { - wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp); - IncrementAesCounter((byte*)aes->reg); + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + switch(aes->rounds) { + #ifdef WOLFSSL_AES_128 + case 10: /* AES 128 BLOCK */ + #endif /* WOLFSSL_AES_128 */ + #ifdef WOLFSSL_AES_192 + case 12: /* AES 192 BLOCK */ + #endif /* WOLFSSL_AES_192 */ + #ifdef WOLFSSL_AES_256 + case 14: /* AES 256 BLOCK */ + #endif /* WOLFSSL_AES_256 */ + break; + default: + WOLFSSL_MSG("Bad AES-CTR round value"); + return BAD_FUNC_ARG; + } - aes->left = AES_BLOCK_SIZE; - tmp = (byte*)aes->tmp; - while (sz--) { - *(out++) = *(in++) ^ *(tmp++); - aes->left--; - } - } - return 0; + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + + /* consume any unused bytes left in aes->tmp */ + while ((aes->left != 0) && (sz != 0)) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + sz--; + } + + /* do as many block size ops as possible */ + numBlocks = sz / AES_BLOCK_SIZE; + if (numBlocks > 0) { + wc_aes_ctr_encrypt_asm(aes, out, in, (byte*)aes->key, numBlocks); + + sz -= numBlocks * AES_BLOCK_SIZE; + out += numBlocks * AES_BLOCK_SIZE; + in += numBlocks * AES_BLOCK_SIZE; + } + + /* handle non block size remaining */ + if (sz) { + byte zeros[AES_BLOCK_SIZE] = { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; + wc_aes_ctr_encrypt_asm(aes, (byte*)aes->tmp, zeros, (byte*)aes->key, 1); + + aes->left = AES_BLOCK_SIZE; + tmp = (byte*)aes->tmp; + + while (sz--) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; } + } + return 0; +} #endif /* WOLFSSL_AES_COUNTER */ diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index fdd7e9ce5..955aa304d 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -8549,21 +8549,21 @@ static int aesecb_test(void) XMEMSET(cipher, 0, AES_BLOCK_SIZE); ret = wc_AesSetKey(enc, niKey, sizeof(niKey), cipher, AES_ENCRYPTION); if (ret != 0) - ERROR_OUT(-5943, out); + ERROR_OUT(-5923, out); if (wc_AesEcbEncrypt(enc, cipher, niPlain, AES_BLOCK_SIZE) != 0) - ERROR_OUT(-5950, out); + ERROR_OUT(-5924, out); if (XMEMCMP(cipher, niCipher, AES_BLOCK_SIZE) != 0) - ERROR_OUT(-5944, out); + ERROR_OUT(-5925, out); XMEMSET(plain, 0, AES_BLOCK_SIZE); ret = wc_AesSetKey(dec, niKey, sizeof(niKey), plain, AES_DECRYPTION); if (ret != 0) - ERROR_OUT(-5945, out); + ERROR_OUT(-5926, out); if (wc_AesEcbDecrypt(dec, plain, niCipher, AES_BLOCK_SIZE) != 0) - ERROR_OUT(-5951, out); + ERROR_OUT(-5927, out); wc_AesEcbDecrypt(dec, plain, niCipher, AES_BLOCK_SIZE); if (XMEMCMP(plain, niPlain, AES_BLOCK_SIZE) != 0) - ERROR_OUT(-5946, out); + ERROR_OUT(-5928, out); } wc_AesFree(enc); @@ -8627,11 +8627,11 @@ WOLFSSL_TEST_SUBROUTINE int aes_test(void) #ifdef WOLFSSL_SMALL_STACK #if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_DIRECT) if (enc == NULL) - ERROR_OUT(-5948, out); + ERROR_OUT(-5990, out); #endif #if defined(HAVE_AES_DECRYPT) || defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_DIRECT) if (dec == NULL) - ERROR_OUT(-5949, out); + ERROR_OUT(-5991, out); #endif #endif @@ -8737,7 +8737,7 @@ WOLFSSL_TEST_SUBROUTINE int aes_test(void) (bigPlain == NULL)) { if (bigCipher != NULL) XFREE(bigCipher, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - ERROR_OUT(-5947, out); + ERROR_OUT(-5992, out); } #else byte bigCipher[sizeof(bigMsg)]; @@ -8909,6 +8909,12 @@ WOLFSSL_TEST_SUBROUTINE int aes_test(void) 0xad,0x2b,0x41,0x7b,0xe6,0x6c,0x37,0x10 }; + WOLFSSL_SMALL_STACK_STATIC const byte ctrIvWrap[] = + { + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff + }; + #ifdef WOLFSSL_AES_128 WOLFSSL_SMALL_STACK_STATIC const byte oddCipher[] = { @@ -8933,6 +8939,13 @@ WOLFSSL_TEST_SUBROUTINE int aes_test(void) 0x1e,0x03,0x1d,0xda,0x2f,0xbe,0x03,0xd1, 0x79,0x21,0x70,0xa0,0xf3,0x00,0x9c,0xee }; + + WOLFSSL_SMALL_STACK_STATIC const byte ctr128WrapCipher[] = + { + 0xe1,0x33,0x38,0xe3,0x6c,0xb7,0x19,0x62, + 0xe0,0x0d,0x02,0x0b,0x4c,0xed,0xbd,0x86, + 0xd3,0xda,0xe1,0x5b,0x04 + }; #endif /* WOLFSSL_AES_128 */ #ifdef WOLFSSL_AES_192 @@ -8954,6 +8967,13 @@ WOLFSSL_TEST_SUBROUTINE int aes_test(void) 0x4f,0x78,0xa7,0xf6,0xd2,0x98,0x09,0x58, 0x5a,0x97,0xda,0xec,0x58,0xc6,0xb0,0x50 }; + + WOLFSSL_SMALL_STACK_STATIC const byte ctr192WrapCipher[] = + { + 0xd4,0x45,0x1f,0xc8,0xa4,0x71,0xbf,0xd9, + 0x61,0xe2,0xec,0xa8,0x4d,0x80,0x7b,0x81, + 0xf8,0xd4,0x6f,0xa1,0x38 + }; #endif #ifdef WOLFSSL_AES_256 WOLFSSL_SMALL_STACK_STATIC const byte ctr256Key[] = @@ -8975,78 +8995,113 @@ WOLFSSL_TEST_SUBROUTINE int aes_test(void) 0xdf,0xc9,0xc5,0x8d,0xb6,0x7a,0xad,0xa6, 0x13,0xc2,0xdd,0x08,0x45,0x79,0x41,0xa6 }; + + WOLFSSL_SMALL_STACK_STATIC const byte ctr256WrapCipher[] = + { + 0xed,0x4b,0xc8,0xa8,0x5c,0x84,0xae,0x14, + 0xc4,0x6e,0xb2,0x25,0xba,0xf7,0x4f,0x47, + 0x38,0xf1,0xe2,0xee,0x3d + }; #endif #ifdef WOLFSSL_AES_128 ret = wc_AesSetKeyDirect(enc, ctr128Key, sizeof(ctr128Key), ctrIv, AES_ENCRYPTION); if (ret != 0) { - ERROR_OUT(-5947, out); + ERROR_OUT(-5930, out); } /* Ctr only uses encrypt, even on key setup */ ret = wc_AesSetKeyDirect(dec, ctr128Key, sizeof(ctr128Key), ctrIv, AES_ENCRYPTION); if (ret != 0) { - ERROR_OUT(-5948, out); + ERROR_OUT(-5931, out); } ret = wc_AesCtrEncrypt(enc, cipher, ctrPlain, sizeof(ctrPlain)); if (ret != 0) { - ERROR_OUT(-5923, out); + ERROR_OUT(-5932, out); } ret = wc_AesCtrEncrypt(dec, plain, cipher, sizeof(ctrPlain)); if (ret != 0) { - ERROR_OUT(-5924, out); + ERROR_OUT(-5933, out); } if (XMEMCMP(plain, ctrPlain, sizeof(ctrPlain))) - ERROR_OUT(-5925, out); + ERROR_OUT(-5934, out); if (XMEMCMP(cipher, ctr128Cipher, sizeof(ctr128Cipher))) - ERROR_OUT(-5926, out); + ERROR_OUT(-5935, out); /* let's try with just 9 bytes, non block size test */ ret = wc_AesSetKeyDirect(enc, ctr128Key, AES_BLOCK_SIZE, ctrIv, AES_ENCRYPTION); if (ret != 0) { - ERROR_OUT(-5949, out); + ERROR_OUT(-5936, out); } /* Ctr only uses encrypt, even on key setup */ ret = wc_AesSetKeyDirect(dec, ctr128Key, AES_BLOCK_SIZE, ctrIv, AES_ENCRYPTION); if (ret != 0) { - ERROR_OUT(-5952, out); + ERROR_OUT(-5937, out); } ret = wc_AesCtrEncrypt(enc, cipher, ctrPlain, sizeof(oddCipher)); if (ret != 0) { - ERROR_OUT(-5927, out); + ERROR_OUT(-5938, out); } ret = wc_AesCtrEncrypt(dec, plain, cipher, sizeof(oddCipher)); if (ret != 0) { - ERROR_OUT(-5928, out); + ERROR_OUT(-5939, out); } if (XMEMCMP(plain, ctrPlain, sizeof(oddCipher))) - ERROR_OUT(-5929, out); + ERROR_OUT(-5940, out); if (XMEMCMP(cipher, ctr128Cipher, sizeof(oddCipher))) - ERROR_OUT(-5930, out); + ERROR_OUT(-5941, out); /* and an additional 9 bytes to reuse tmp left buffer */ ret = wc_AesCtrEncrypt(enc, cipher, ctrPlain, sizeof(oddCipher)); if (ret != 0) { - ERROR_OUT(-5931, out); + ERROR_OUT(-5942, out); } ret = wc_AesCtrEncrypt(dec, plain, cipher, sizeof(oddCipher)); if (ret != 0) { - ERROR_OUT(-5932, out); + ERROR_OUT(-5943, out); } if (XMEMCMP(plain, ctrPlain, sizeof(oddCipher))) - ERROR_OUT(-5933, out); + ERROR_OUT(-5944, out); if (XMEMCMP(cipher, oddCipher, sizeof(oddCipher))) - ERROR_OUT(-5934, out); + ERROR_OUT(-5945, out); + + /* When more than a block but less than two and wrapping ctr */ + ret = wc_AesSetKeyDirect(enc, ctr128Key, AES_BLOCK_SIZE, + ctrIvWrap, AES_ENCRYPTION); + if (ret != 0) { + ERROR_OUT(-5946, out); + } + /* Ctr only uses encrypt, even on key setup */ + ret = wc_AesSetKeyDirect(dec, ctr128Key, AES_BLOCK_SIZE, + ctrIvWrap, AES_ENCRYPTION); + if (ret != 0) { + ERROR_OUT(-5947, out); + } + + ret = wc_AesCtrEncrypt(enc, cipher, ctrPlain, sizeof(ctr128WrapCipher)); + if (ret != 0) { + ERROR_OUT(-5948, out); + } + ret = wc_AesCtrEncrypt(dec, plain, cipher, sizeof(ctr128WrapCipher)); + if (ret != 0) { + ERROR_OUT(-5949, out); + } + + if (XMEMCMP(plain, ctrPlain, sizeof(ctr128WrapCipher))) + ERROR_OUT(-5950, out); + + if (XMEMCMP(cipher, ctr128WrapCipher, sizeof(ctr128WrapCipher))) + ERROR_OUT(-5951, out); #endif /* WOLFSSL_AES_128 */ #ifdef WOLFSSL_AES_192 @@ -9054,30 +9109,58 @@ WOLFSSL_TEST_SUBROUTINE int aes_test(void) ret = wc_AesSetKeyDirect(enc, ctr192Key, sizeof(ctr192Key), ctrIv, AES_ENCRYPTION); if (ret != 0) { - ERROR_OUT(-5953, out); + ERROR_OUT(-5952, out); } /* Ctr only uses encrypt, even on key setup */ ret = wc_AesSetKeyDirect(dec, ctr192Key, sizeof(ctr192Key), ctrIv, AES_ENCRYPTION); if (ret != 0) { - ERROR_OUT(-5954, out); + ERROR_OUT(-5953, out); } XMEMSET(plain, 0, sizeof(plain)); ret = wc_AesCtrEncrypt(enc, plain, ctr192Cipher, sizeof(ctr192Cipher)); if (ret != 0) { - ERROR_OUT(-5935, out); + ERROR_OUT(-5954, out); } if (XMEMCMP(plain, ctrPlain, sizeof(ctr192Cipher))) - ERROR_OUT(-5936, out); + ERROR_OUT(-5955, out); ret = wc_AesCtrEncrypt(dec, cipher, ctrPlain, sizeof(ctrPlain)); if (ret != 0) { - ERROR_OUT(-5937, out); + ERROR_OUT(-5956, out); } if (XMEMCMP(ctr192Cipher, cipher, sizeof(ctr192Cipher))) - ERROR_OUT(-5938, out); + ERROR_OUT(-5957, out); + + /* When more than a block but less than two and wrapping ctr */ + ret = wc_AesSetKeyDirect(enc, ctr192Key, AES_BLOCK_SIZE, + ctrIvWrap, AES_ENCRYPTION); + if (ret != 0) { + ERROR_OUT(-5958, out); + } + /* Ctr only uses encrypt, even on key setup */ + ret = wc_AesSetKeyDirect(dec, ctr192Key, AES_BLOCK_SIZE, + ctrIvWrap, AES_ENCRYPTION); + if (ret != 0) { + ERROR_OUT(-5959, out); + } + + ret = wc_AesCtrEncrypt(enc, cipher, ctrPlain, sizeof(ctr192WrapCipher)); + if (ret != 0) { + ERROR_OUT(-5960, out); + } + ret = wc_AesCtrEncrypt(dec, plain, cipher, sizeof(ctr192WrapCipher)); + if (ret != 0) { + ERROR_OUT(-5961, out); + } + + if (XMEMCMP(plain, ctrPlain, sizeof(ctr192WrapCipher))) + ERROR_OUT(-5962, out); + + if (XMEMCMP(cipher, ctr192WrapCipher, sizeof(ctr192WrapCipher))) + ERROR_OUT(-5963, out); #endif /* WOLFSSL_AES_192 */ #ifdef WOLFSSL_AES_256 @@ -9085,30 +9168,58 @@ WOLFSSL_TEST_SUBROUTINE int aes_test(void) ret = wc_AesSetKeyDirect(enc, ctr256Key, sizeof(ctr256Key), ctrIv, AES_ENCRYPTION); if (ret != 0) { - ERROR_OUT(-5955, out); + ERROR_OUT(-5964, out); } /* Ctr only uses encrypt, even on key setup */ ret = wc_AesSetKeyDirect(dec, ctr256Key, sizeof(ctr256Key), ctrIv, AES_ENCRYPTION); if (ret != 0) { - ERROR_OUT(-5956, out); + ERROR_OUT(-5965, out); } XMEMSET(plain, 0, sizeof(plain)); ret = wc_AesCtrEncrypt(enc, plain, ctr256Cipher, sizeof(ctr256Cipher)); if (ret != 0) { - ERROR_OUT(-5939, out); + ERROR_OUT(-5966, out); } if (XMEMCMP(plain, ctrPlain, sizeof(ctrPlain))) - ERROR_OUT(-5940, out); + ERROR_OUT(-5967, out); ret = wc_AesCtrEncrypt(dec, cipher, ctrPlain, sizeof(ctrPlain)); if (ret != 0) { - ERROR_OUT(-5941, out); + ERROR_OUT(-5968, out); } if (XMEMCMP(ctr256Cipher, cipher, sizeof(ctr256Cipher))) - ERROR_OUT(-5942, out); + ERROR_OUT(-5969, out); + + /* When more than a block but less than two and wrapping ctr */ + ret = wc_AesSetKeyDirect(enc, ctr256Key, AES_BLOCK_SIZE, + ctrIvWrap, AES_ENCRYPTION); + if (ret != 0) { + ERROR_OUT(-5970, out); + } + /* Ctr only uses encrypt, even on key setup */ + ret = wc_AesSetKeyDirect(dec, ctr256Key, AES_BLOCK_SIZE, + ctrIvWrap, AES_ENCRYPTION); + if (ret != 0) { + ERROR_OUT(-5971, out); + } + + ret = wc_AesCtrEncrypt(enc, cipher, ctrPlain, sizeof(ctr256WrapCipher)); + if (ret != 0) { + ERROR_OUT(-5972, out); + } + ret = wc_AesCtrEncrypt(dec, plain, cipher, sizeof(ctr256WrapCipher)); + if (ret != 0) { + ERROR_OUT(-5973, out); + } + + if (XMEMCMP(plain, ctrPlain, sizeof(ctr256WrapCipher))) + ERROR_OUT(-5974, out); + + if (XMEMCMP(cipher, ctr256WrapCipher, sizeof(ctr256WrapCipher))) + ERROR_OUT(-5975, out); #endif /* WOLFSSL_AES_256 */ } #endif /* WOLFSSL_AES_COUNTER */ @@ -9138,34 +9249,34 @@ WOLFSSL_TEST_SUBROUTINE int aes_test(void) XMEMSET(cipher, 0, AES_BLOCK_SIZE); ret = wc_AesSetKey(enc, niKey, sizeof(niKey), cipher, AES_ENCRYPTION); if (ret != 0) - ERROR_OUT(-5943, out); + ERROR_OUT(-5976, out); #if !defined(HAVE_SELFTEST) && \ (defined(WOLFSSL_LINUXKM) || \ !defined(HAVE_FIPS) || \ (defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(5,3))) if (wc_AesEncryptDirect(enc, cipher, niPlain) != 0) - ERROR_OUT(-5950, out); + ERROR_OUT(-5977, out); #else wc_AesEncryptDirect(enc, cipher, niPlain); #endif if (XMEMCMP(cipher, niCipher, AES_BLOCK_SIZE) != 0) - ERROR_OUT(-5944, out); + ERROR_OUT(-5978, out); XMEMSET(plain, 0, AES_BLOCK_SIZE); ret = wc_AesSetKey(dec, niKey, sizeof(niKey), plain, AES_DECRYPTION); if (ret != 0) - ERROR_OUT(-5945, out); + ERROR_OUT(-5979, out); #if !defined(HAVE_SELFTEST) && \ (defined(WOLFSSL_LINUXKM) || \ !defined(HAVE_FIPS) || \ (defined(FIPS_VERSION_GE) && FIPS_VERSION_GE(5,3))) if (wc_AesDecryptDirect(dec, plain, niCipher) != 0) - ERROR_OUT(-5951, out); + ERROR_OUT(-5980, out); #else wc_AesDecryptDirect(dec, plain, niCipher); #endif if (XMEMCMP(plain, niPlain, AES_BLOCK_SIZE) != 0) - ERROR_OUT(-5946, out); + ERROR_OUT(-5981, out); } #endif /* WOLFSSL_AES_DIRECT && WOLFSSL_AES_256 */