Various fixes for Aarch64/ARM32/Thumb2 ASM

cpuid.c: hwcaps not used.
thumb2-*: ldm -> LDM
sp_arm32.c: No register assignment, fix sp_*_from_bin
sp_armthumb.c: fix sp_*_from_bin
sp_cotexm.c: fix line lengths, fix sp_*_from_bin
pull/8672/head
Sean Parkinson 2025-04-15 17:00:06 +10:00
parent 83d134e0be
commit cf1f8e14ff
9 changed files with 4540 additions and 1070 deletions

View File

@ -187,6 +187,7 @@
cpuid_flags |= CPUID_SM4; cpuid_flags |= CPUID_SM4;
#endif #endif
(void)hwcaps;
cpuid_check = 1; cpuid_check = 1;
} }
} }

View File

@ -229,8 +229,8 @@ void AES_invert_key(unsigned char* ks, word32 rounds)
#else #else
"L_AES_invert_key_loop_%=:\n\t" "L_AES_invert_key_loop_%=:\n\t"
#endif #endif
"ldm %[ks], {r2, r3, r4, r5}\n\t" "LDM %[ks], {r2, r3, r4, r5}\n\t"
"ldm r10, {r6, r7, r8, r9}\n\t" "LDM r10, {r6, r7, r8, r9}\n\t"
"STM r10, {r2, r3, r4, r5}\n\t" "STM r10, {r2, r3, r4, r5}\n\t"
"STM %[ks]!, {r6, r7, r8, r9}\n\t" "STM %[ks]!, {r6, r7, r8, r9}\n\t"
"SUBS r11, r11, #0x2\n\t" "SUBS r11, r11, #0x2\n\t"
@ -251,7 +251,7 @@ void AES_invert_key(unsigned char* ks, word32 rounds)
#else #else
"L_AES_invert_key_mix_loop_%=:\n\t" "L_AES_invert_key_mix_loop_%=:\n\t"
#endif #endif
"ldm %[ks], {r2, r3, r4, r5}\n\t" "LDM %[ks], {r2, r3, r4, r5}\n\t"
"UBFX r6, r2, #0, #8\n\t" "UBFX r6, r2, #0, #8\n\t"
"UBFX r7, r2, #8, #8\n\t" "UBFX r7, r2, #8, #8\n\t"
"UBFX r8, r2, #16, #8\n\t" "UBFX r8, r2, #16, #8\n\t"
@ -424,9 +424,9 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len,
"EOR r3, r7, r4, LSL #8\n\t" "EOR r3, r7, r4, LSL #8\n\t"
"EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r5, LSL #16\n\t"
"EOR r3, r3, r6, LSL #24\n\t" "EOR r3, r3, r6, LSL #24\n\t"
"ldm %[ks]!, {r4, r5, r6, r7}\n\t" "LDM %[ks]!, {r4, r5, r6, r7}\n\t"
"EOR r4, r4, r3\n\t" "EOR r4, r4, r3\n\t"
"ldm lr!, {r3}\n\t" "LDM lr!, {r3}\n\t"
"EOR r4, r4, r3\n\t" "EOR r4, r4, r3\n\t"
"EOR r5, r5, r4\n\t" "EOR r5, r5, r4\n\t"
"EOR r6, r6, r5\n\t" "EOR r6, r6, r5\n\t"
@ -446,7 +446,7 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len,
"EOR r3, r3, r4, LSL #8\n\t" "EOR r3, r3, r4, LSL #8\n\t"
"EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r5, LSL #16\n\t"
"EOR r3, r3, r6, LSL #24\n\t" "EOR r3, r3, r6, LSL #24\n\t"
"ldm %[ks]!, {r4, r5, r6, r7}\n\t" "LDM %[ks]!, {r4, r5, r6, r7}\n\t"
"EOR r4, r4, r3\n\t" "EOR r4, r4, r3\n\t"
"EOR r5, r5, r4\n\t" "EOR r5, r5, r4\n\t"
"EOR r6, r6, r5\n\t" "EOR r6, r6, r5\n\t"
@ -473,9 +473,9 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len,
"EOR r3, r7, r4, LSL #8\n\t" "EOR r3, r7, r4, LSL #8\n\t"
"EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r5, LSL #16\n\t"
"EOR r3, r3, r6, LSL #24\n\t" "EOR r3, r3, r6, LSL #24\n\t"
"ldm %[ks]!, {r4, r5, r6, r7}\n\t" "LDM %[ks]!, {r4, r5, r6, r7}\n\t"
"EOR r4, r4, r3\n\t" "EOR r4, r4, r3\n\t"
"ldm lr!, {r3}\n\t" "LDM lr!, {r3}\n\t"
"EOR r4, r4, r3\n\t" "EOR r4, r4, r3\n\t"
"EOR r5, r5, r4\n\t" "EOR r5, r5, r4\n\t"
"EOR r6, r6, r5\n\t" "EOR r6, r6, r5\n\t"
@ -529,9 +529,9 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len,
"EOR r3, r9, r4, LSL #8\n\t" "EOR r3, r9, r4, LSL #8\n\t"
"EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r5, LSL #16\n\t"
"EOR r3, r3, r6, LSL #24\n\t" "EOR r3, r3, r6, LSL #24\n\t"
"ldm %[ks]!, {r4, r5, r6, r7, r8, r9}\n\t" "LDM %[ks]!, {r4, r5, r6, r7, r8, r9}\n\t"
"EOR r4, r4, r3\n\t" "EOR r4, r4, r3\n\t"
"ldm lr!, {r3}\n\t" "LDM lr!, {r3}\n\t"
"EOR r4, r4, r3\n\t" "EOR r4, r4, r3\n\t"
"EOR r5, r5, r4\n\t" "EOR r5, r5, r4\n\t"
"EOR r6, r6, r5\n\t" "EOR r6, r6, r5\n\t"
@ -558,9 +558,9 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len,
"EOR r3, r9, r4, LSL #8\n\t" "EOR r3, r9, r4, LSL #8\n\t"
"EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r5, LSL #16\n\t"
"EOR r3, r3, r6, LSL #24\n\t" "EOR r3, r3, r6, LSL #24\n\t"
"ldm %[ks]!, {r4, r5, r6, r7, r8, r9}\n\t" "LDM %[ks]!, {r4, r5, r6, r7, r8, r9}\n\t"
"EOR r4, r4, r3\n\t" "EOR r4, r4, r3\n\t"
"ldm lr!, {r3}\n\t" "LDM lr!, {r3}\n\t"
"EOR r4, r4, r3\n\t" "EOR r4, r4, r3\n\t"
"EOR r5, r5, r4\n\t" "EOR r5, r5, r4\n\t"
"EOR r6, r6, r5\n\t" "EOR r6, r6, r5\n\t"
@ -606,9 +606,9 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len,
"EOR r3, r7, r4, LSL #8\n\t" "EOR r3, r7, r4, LSL #8\n\t"
"EOR r3, r3, r5, LSL #16\n\t" "EOR r3, r3, r5, LSL #16\n\t"
"EOR r3, r3, r6, LSL #24\n\t" "EOR r3, r3, r6, LSL #24\n\t"
"ldm %[ks]!, {r4, r5, r6, r7}\n\t" "LDM %[ks]!, {r4, r5, r6, r7}\n\t"
"EOR r4, r4, r3\n\t" "EOR r4, r4, r3\n\t"
"ldm lr!, {r3}\n\t" "LDM lr!, {r3}\n\t"
"EOR r4, r4, r3\n\t" "EOR r4, r4, r3\n\t"
"EOR r5, r5, r4\n\t" "EOR r5, r5, r4\n\t"
"EOR r6, r6, r5\n\t" "EOR r6, r6, r5\n\t"
@ -700,7 +700,7 @@ void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks)
"LDR r11, [%[te], r11, LSL #2]\n\t" "LDR r11, [%[te], r11, LSL #2]\n\t"
"LDR r2, [%[te], r2, LSL #2]\n\t" "LDR r2, [%[te], r2, LSL #2]\n\t"
"EOR lr, lr, r6, ROR #24\n\t" "EOR lr, lr, r6, ROR #24\n\t"
"ldm %[ks]!, {r4, r5, r6, r7}\n\t" "LDM %[ks]!, {r4, r5, r6, r7}\n\t"
"EOR r11, r11, lr, ROR #24\n\t" "EOR r11, r11, lr, ROR #24\n\t"
"EOR r11, r11, r2, ROR #8\n\t" "EOR r11, r11, r2, ROR #8\n\t"
/* XOR in Key Schedule */ /* XOR in Key Schedule */
@ -750,7 +750,7 @@ void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks)
"LDR r7, [%[te], r7, LSL #2]\n\t" "LDR r7, [%[te], r7, LSL #2]\n\t"
"LDR r2, [%[te], r2, LSL #2]\n\t" "LDR r2, [%[te], r2, LSL #2]\n\t"
"EOR lr, lr, r10, ROR #24\n\t" "EOR lr, lr, r10, ROR #24\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
"EOR r7, r7, lr, ROR #24\n\t" "EOR r7, r7, lr, ROR #24\n\t"
"EOR r7, r7, r2, ROR #8\n\t" "EOR r7, r7, r2, ROR #8\n\t"
/* XOR in Key Schedule */ /* XOR in Key Schedule */
@ -808,7 +808,7 @@ void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks)
"LDR r11, [%[te], r11, LSL #2]\n\t" "LDR r11, [%[te], r11, LSL #2]\n\t"
"LDR r2, [%[te], r2, LSL #2]\n\t" "LDR r2, [%[te], r2, LSL #2]\n\t"
"EOR lr, lr, r6, ROR #24\n\t" "EOR lr, lr, r6, ROR #24\n\t"
"ldm %[ks]!, {r4, r5, r6, r7}\n\t" "LDM %[ks]!, {r4, r5, r6, r7}\n\t"
"EOR r11, r11, lr, ROR #24\n\t" "EOR r11, r11, lr, ROR #24\n\t"
"EOR r11, r11, r2, ROR #8\n\t" "EOR r11, r11, r2, ROR #8\n\t"
/* XOR in Key Schedule */ /* XOR in Key Schedule */
@ -858,7 +858,7 @@ void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks)
"LDRB lr, [%[te], lr, LSL #2]\n\t" "LDRB lr, [%[te], lr, LSL #2]\n\t"
"LDRB r2, [%[te], r2, LSL #2]\n\t" "LDRB r2, [%[te], r2, LSL #2]\n\t"
"EOR lr, lr, r11, LSL #16\n\t" "EOR lr, lr, r11, LSL #16\n\t"
"ldm %[ks], {r8, r9, r10, r11}\n\t" "LDM %[ks], {r8, r9, r10, r11}\n\t"
"EOR r7, r7, lr, LSL #8\n\t" "EOR r7, r7, lr, LSL #8\n\t"
"EOR r7, r7, r2, LSL #16\n\t" "EOR r7, r7, r2, LSL #16\n\t"
/* XOR in Key Schedule */ /* XOR in Key Schedule */
@ -945,7 +945,7 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out,
"REV r6, r6\n\t" "REV r6, r6\n\t"
"REV r7, r7\n\t" "REV r7, r7\n\t"
"PUSH {r1, %[len], lr}\n\t" "PUSH {r1, %[len], lr}\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
/* Round: 0 - XOR in key schedule */ /* Round: 0 - XOR in key schedule */
"EOR r4, r4, r8\n\t" "EOR r4, r4, r8\n\t"
"EOR r5, r5, r9\n\t" "EOR r5, r5, r9\n\t"
@ -1001,7 +1001,7 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out,
"REV r6, r6\n\t" "REV r6, r6\n\t"
"REV r7, r7\n\t" "REV r7, r7\n\t"
"PUSH {r1, %[len], lr}\n\t" "PUSH {r1, %[len], lr}\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
/* Round: 0 - XOR in key schedule */ /* Round: 0 - XOR in key schedule */
"EOR r4, r4, r8\n\t" "EOR r4, r4, r8\n\t"
"EOR r5, r5, r9\n\t" "EOR r5, r5, r9\n\t"
@ -1057,7 +1057,7 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out,
"REV r6, r6\n\t" "REV r6, r6\n\t"
"REV r7, r7\n\t" "REV r7, r7\n\t"
"PUSH {r1, %[len], lr}\n\t" "PUSH {r1, %[len], lr}\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
/* Round: 0 - XOR in key schedule */ /* Round: 0 - XOR in key schedule */
"EOR r4, r4, r8\n\t" "EOR r4, r4, r8\n\t"
"EOR r5, r5, r9\n\t" "EOR r5, r5, r9\n\t"
@ -1143,7 +1143,7 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
"MOV lr, %[in]\n\t" "MOV lr, %[in]\n\t"
"MOV r0, %[L_AES_Thumb2_te_ecb]\n\t" "MOV r0, %[L_AES_Thumb2_te_ecb]\n\t"
"ldm r9, {r4, r5, r6, r7}\n\t" "LDM r9, {r4, r5, r6, r7}\n\t"
"PUSH {%[ks], r9}\n\t" "PUSH {%[ks], r9}\n\t"
"CMP r8, #0xa\n\t" "CMP r8, #0xa\n\t"
#if defined(__GNUC__) #if defined(__GNUC__)
@ -1176,7 +1176,7 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
"EOR r6, r6, r10\n\t" "EOR r6, r6, r10\n\t"
"EOR r7, r7, r11\n\t" "EOR r7, r7, r11\n\t"
"PUSH {r1, %[len], lr}\n\t" "PUSH {r1, %[len], lr}\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
"REV r4, r4\n\t" "REV r4, r4\n\t"
"REV r5, r5\n\t" "REV r5, r5\n\t"
"REV r6, r6\n\t" "REV r6, r6\n\t"
@ -1236,7 +1236,7 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
"EOR r6, r6, r10\n\t" "EOR r6, r6, r10\n\t"
"EOR r7, r7, r11\n\t" "EOR r7, r7, r11\n\t"
"PUSH {r1, %[len], lr}\n\t" "PUSH {r1, %[len], lr}\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
"REV r4, r4\n\t" "REV r4, r4\n\t"
"REV r5, r5\n\t" "REV r5, r5\n\t"
"REV r6, r6\n\t" "REV r6, r6\n\t"
@ -1296,7 +1296,7 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
"EOR r6, r6, r10\n\t" "EOR r6, r6, r10\n\t"
"EOR r7, r7, r11\n\t" "EOR r7, r7, r11\n\t"
"PUSH {r1, %[len], lr}\n\t" "PUSH {r1, %[len], lr}\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
"REV r4, r4\n\t" "REV r4, r4\n\t"
"REV r5, r5\n\t" "REV r5, r5\n\t"
"REV r6, r6\n\t" "REV r6, r6\n\t"
@ -1387,7 +1387,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out,
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
"MOV lr, %[in]\n\t" "MOV lr, %[in]\n\t"
"MOV r0, %[L_AES_Thumb2_te_ecb]\n\t" "MOV r0, %[L_AES_Thumb2_te_ecb]\n\t"
"ldm r8, {r4, r5, r6, r7}\n\t" "LDM r8, {r4, r5, r6, r7}\n\t"
"REV r4, r4\n\t" "REV r4, r4\n\t"
"REV r5, r5\n\t" "REV r5, r5\n\t"
"REV r6, r6\n\t" "REV r6, r6\n\t"
@ -1423,7 +1423,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out,
"ADCS r9, r5, #0x0\n\t" "ADCS r9, r5, #0x0\n\t"
"ADC r8, r4, #0x0\n\t" "ADC r8, r4, #0x0\n\t"
"STM lr, {r8, r9, r10, r11}\n\t" "STM lr, {r8, r9, r10, r11}\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
/* Round: 0 - XOR in key schedule */ /* Round: 0 - XOR in key schedule */
"EOR r4, r4, r8\n\t" "EOR r4, r4, r8\n\t"
"EOR r5, r5, r9\n\t" "EOR r5, r5, r9\n\t"
@ -1450,7 +1450,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out,
"STR r5, [%[out], #4]\n\t" "STR r5, [%[out], #4]\n\t"
"STR r6, [%[out], #8]\n\t" "STR r6, [%[out], #8]\n\t"
"STR r7, [%[out], #12]\n\t" "STR r7, [%[out], #12]\n\t"
"ldm r8, {r4, r5, r6, r7}\n\t" "LDM r8, {r4, r5, r6, r7}\n\t"
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
@ -1487,7 +1487,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out,
"ADCS r9, r5, #0x0\n\t" "ADCS r9, r5, #0x0\n\t"
"ADC r8, r4, #0x0\n\t" "ADC r8, r4, #0x0\n\t"
"STM lr, {r8, r9, r10, r11}\n\t" "STM lr, {r8, r9, r10, r11}\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
/* Round: 0 - XOR in key schedule */ /* Round: 0 - XOR in key schedule */
"EOR r4, r4, r8\n\t" "EOR r4, r4, r8\n\t"
"EOR r5, r5, r9\n\t" "EOR r5, r5, r9\n\t"
@ -1514,7 +1514,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out,
"STR r5, [%[out], #4]\n\t" "STR r5, [%[out], #4]\n\t"
"STR r6, [%[out], #8]\n\t" "STR r6, [%[out], #8]\n\t"
"STR r7, [%[out], #12]\n\t" "STR r7, [%[out], #12]\n\t"
"ldm r8, {r4, r5, r6, r7}\n\t" "LDM r8, {r4, r5, r6, r7}\n\t"
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
@ -1551,7 +1551,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out,
"ADCS r9, r5, #0x0\n\t" "ADCS r9, r5, #0x0\n\t"
"ADC r8, r4, #0x0\n\t" "ADC r8, r4, #0x0\n\t"
"STM lr, {r8, r9, r10, r11}\n\t" "STM lr, {r8, r9, r10, r11}\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
/* Round: 0 - XOR in key schedule */ /* Round: 0 - XOR in key schedule */
"EOR r4, r4, r8\n\t" "EOR r4, r4, r8\n\t"
"EOR r5, r5, r9\n\t" "EOR r5, r5, r9\n\t"
@ -1578,7 +1578,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out,
"STR r5, [%[out], #4]\n\t" "STR r5, [%[out], #4]\n\t"
"STR r6, [%[out], #8]\n\t" "STR r6, [%[out], #8]\n\t"
"STR r7, [%[out], #12]\n\t" "STR r7, [%[out], #12]\n\t"
"ldm r8, {r4, r5, r6, r7}\n\t" "LDM r8, {r4, r5, r6, r7}\n\t"
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
@ -1675,7 +1675,7 @@ void AES_decrypt_block(const word32* td, int nr, const byte* td4)
"LDR r11, [%[td], r11, LSL #2]\n\t" "LDR r11, [%[td], r11, LSL #2]\n\t"
"LDR lr, [%[td], lr, LSL #2]\n\t" "LDR lr, [%[td], lr, LSL #2]\n\t"
"EOR r12, r12, r4, ROR #24\n\t" "EOR r12, r12, r4, ROR #24\n\t"
"ldm r3!, {r4, r5, r6, r7}\n\t" "LDM r3!, {r4, r5, r6, r7}\n\t"
"EOR r11, r11, lr, ROR #8\n\t" "EOR r11, r11, lr, ROR #8\n\t"
"EOR r11, r11, r12, ROR #24\n\t" "EOR r11, r11, r12, ROR #24\n\t"
/* XOR in Key Schedule */ /* XOR in Key Schedule */
@ -1725,7 +1725,7 @@ void AES_decrypt_block(const word32* td, int nr, const byte* td4)
"LDR r7, [%[td], r7, LSL #2]\n\t" "LDR r7, [%[td], r7, LSL #2]\n\t"
"LDR lr, [%[td], lr, LSL #2]\n\t" "LDR lr, [%[td], lr, LSL #2]\n\t"
"EOR r12, r12, r8, ROR #24\n\t" "EOR r12, r12, r8, ROR #24\n\t"
"ldm r3!, {r8, r9, r10, r11}\n\t" "LDM r3!, {r8, r9, r10, r11}\n\t"
"EOR r7, r7, lr, ROR #8\n\t" "EOR r7, r7, lr, ROR #8\n\t"
"EOR r7, r7, r12, ROR #24\n\t" "EOR r7, r7, r12, ROR #24\n\t"
/* XOR in Key Schedule */ /* XOR in Key Schedule */
@ -1783,7 +1783,7 @@ void AES_decrypt_block(const word32* td, int nr, const byte* td4)
"LDR r11, [%[td], r11, LSL #2]\n\t" "LDR r11, [%[td], r11, LSL #2]\n\t"
"LDR lr, [%[td], lr, LSL #2]\n\t" "LDR lr, [%[td], lr, LSL #2]\n\t"
"EOR r12, r12, r4, ROR #24\n\t" "EOR r12, r12, r4, ROR #24\n\t"
"ldm r3!, {r4, r5, r6, r7}\n\t" "LDM r3!, {r4, r5, r6, r7}\n\t"
"EOR r11, r11, lr, ROR #8\n\t" "EOR r11, r11, lr, ROR #8\n\t"
"EOR r11, r11, r12, ROR #24\n\t" "EOR r11, r11, r12, ROR #24\n\t"
/* XOR in Key Schedule */ /* XOR in Key Schedule */
@ -1833,7 +1833,7 @@ void AES_decrypt_block(const word32* td, int nr, const byte* td4)
"LDRB r7, [%[td4], r7]\n\t" "LDRB r7, [%[td4], r7]\n\t"
"LDRB lr, [%[td4], lr]\n\t" "LDRB lr, [%[td4], lr]\n\t"
"EOR r12, r12, r11, LSL #16\n\t" "EOR r12, r12, r11, LSL #16\n\t"
"ldm r3, {r8, r9, r10, r11}\n\t" "LDM r3, {r8, r9, r10, r11}\n\t"
"EOR r7, r7, r12, LSL #8\n\t" "EOR r7, r7, r12, LSL #8\n\t"
"EOR r7, r7, lr, LSL #16\n\t" "EOR r7, r7, lr, LSL #16\n\t"
/* XOR in Key Schedule */ /* XOR in Key Schedule */
@ -1956,7 +1956,7 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
"REV r6, r6\n\t" "REV r6, r6\n\t"
"REV r7, r7\n\t" "REV r7, r7\n\t"
"PUSH {r1, %[ks], r12, lr}\n\t" "PUSH {r1, %[ks], r12, lr}\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
/* Round: 0 - XOR in key schedule */ /* Round: 0 - XOR in key schedule */
"EOR r4, r4, r8\n\t" "EOR r4, r4, r8\n\t"
"EOR r5, r5, r9\n\t" "EOR r5, r5, r9\n\t"
@ -2011,7 +2011,7 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
"REV r6, r6\n\t" "REV r6, r6\n\t"
"REV r7, r7\n\t" "REV r7, r7\n\t"
"PUSH {r1, %[ks], r12, lr}\n\t" "PUSH {r1, %[ks], r12, lr}\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
/* Round: 0 - XOR in key schedule */ /* Round: 0 - XOR in key schedule */
"EOR r4, r4, r8\n\t" "EOR r4, r4, r8\n\t"
"EOR r5, r5, r9\n\t" "EOR r5, r5, r9\n\t"
@ -2066,7 +2066,7 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
"REV r6, r6\n\t" "REV r6, r6\n\t"
"REV r7, r7\n\t" "REV r7, r7\n\t"
"PUSH {r1, %[ks], r12, lr}\n\t" "PUSH {r1, %[ks], r12, lr}\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
/* Round: 0 - XOR in key schedule */ /* Round: 0 - XOR in key schedule */
"EOR r4, r4, r8\n\t" "EOR r4, r4, r8\n\t"
"EOR r5, r5, r9\n\t" "EOR r5, r5, r9\n\t"
@ -2188,7 +2188,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
"LDR lr, [sp, #16]\n\t" "LDR lr, [sp, #16]\n\t"
"STRD r4, r5, [lr, #16]\n\t" "STRD r4, r5, [lr, #16]\n\t"
"STRD r6, r7, [lr, #24]\n\t" "STRD r6, r7, [lr, #24]\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
"REV r4, r4\n\t" "REV r4, r4\n\t"
"REV r5, r5\n\t" "REV r5, r5\n\t"
"REV r6, r6\n\t" "REV r6, r6\n\t"
@ -2205,7 +2205,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
"REV r5, r5\n\t" "REV r5, r5\n\t"
"REV r6, r6\n\t" "REV r6, r6\n\t"
"REV r7, r7\n\t" "REV r7, r7\n\t"
"ldm lr, {r8, r9, r10, r11}\n\t" "LDM lr, {r8, r9, r10, r11}\n\t"
"POP {r1, r12, lr}\n\t" "POP {r1, r12, lr}\n\t"
"LDR %[ks], [sp]\n\t" "LDR %[ks], [sp]\n\t"
"EOR r4, r4, r8\n\t" "EOR r4, r4, r8\n\t"
@ -2234,7 +2234,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
"LDR lr, [sp, #16]\n\t" "LDR lr, [sp, #16]\n\t"
"STRD r4, r5, [lr]\n\t" "STRD r4, r5, [lr]\n\t"
"STRD r6, r7, [lr, #8]\n\t" "STRD r6, r7, [lr, #8]\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
"REV r4, r4\n\t" "REV r4, r4\n\t"
"REV r5, r5\n\t" "REV r5, r5\n\t"
"REV r6, r6\n\t" "REV r6, r6\n\t"
@ -2294,7 +2294,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
"LDR lr, [sp, #16]\n\t" "LDR lr, [sp, #16]\n\t"
"STRD r4, r5, [lr, #16]\n\t" "STRD r4, r5, [lr, #16]\n\t"
"STRD r6, r7, [lr, #24]\n\t" "STRD r6, r7, [lr, #24]\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
"REV r4, r4\n\t" "REV r4, r4\n\t"
"REV r5, r5\n\t" "REV r5, r5\n\t"
"REV r6, r6\n\t" "REV r6, r6\n\t"
@ -2311,7 +2311,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
"REV r5, r5\n\t" "REV r5, r5\n\t"
"REV r6, r6\n\t" "REV r6, r6\n\t"
"REV r7, r7\n\t" "REV r7, r7\n\t"
"ldm lr, {r8, r9, r10, r11}\n\t" "LDM lr, {r8, r9, r10, r11}\n\t"
"POP {r1, r12, lr}\n\t" "POP {r1, r12, lr}\n\t"
"LDR %[ks], [sp]\n\t" "LDR %[ks], [sp]\n\t"
"EOR r4, r4, r8\n\t" "EOR r4, r4, r8\n\t"
@ -2340,7 +2340,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
"LDR lr, [sp, #16]\n\t" "LDR lr, [sp, #16]\n\t"
"STRD r4, r5, [lr]\n\t" "STRD r4, r5, [lr]\n\t"
"STRD r6, r7, [lr, #8]\n\t" "STRD r6, r7, [lr, #8]\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
"REV r4, r4\n\t" "REV r4, r4\n\t"
"REV r5, r5\n\t" "REV r5, r5\n\t"
"REV r6, r6\n\t" "REV r6, r6\n\t"
@ -2400,7 +2400,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
"LDR lr, [sp, #16]\n\t" "LDR lr, [sp, #16]\n\t"
"STRD r4, r5, [lr, #16]\n\t" "STRD r4, r5, [lr, #16]\n\t"
"STRD r6, r7, [lr, #24]\n\t" "STRD r6, r7, [lr, #24]\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
"REV r4, r4\n\t" "REV r4, r4\n\t"
"REV r5, r5\n\t" "REV r5, r5\n\t"
"REV r6, r6\n\t" "REV r6, r6\n\t"
@ -2417,7 +2417,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
"REV r5, r5\n\t" "REV r5, r5\n\t"
"REV r6, r6\n\t" "REV r6, r6\n\t"
"REV r7, r7\n\t" "REV r7, r7\n\t"
"ldm lr, {r8, r9, r10, r11}\n\t" "LDM lr, {r8, r9, r10, r11}\n\t"
"POP {r1, r12, lr}\n\t" "POP {r1, r12, lr}\n\t"
"LDR %[ks], [sp]\n\t" "LDR %[ks], [sp]\n\t"
"EOR r4, r4, r8\n\t" "EOR r4, r4, r8\n\t"
@ -2446,7 +2446,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
"LDR lr, [sp, #16]\n\t" "LDR lr, [sp, #16]\n\t"
"STRD r4, r5, [lr]\n\t" "STRD r4, r5, [lr]\n\t"
"STRD r6, r7, [lr, #8]\n\t" "STRD r6, r7, [lr, #8]\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
"REV r4, r4\n\t" "REV r4, r4\n\t"
"REV r5, r5\n\t" "REV r5, r5\n\t"
"REV r6, r6\n\t" "REV r6, r6\n\t"
@ -2570,7 +2570,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"LSR %[len], r12, #24\n\t" "LSR %[len], r12, #24\n\t"
"AND %[len], %[len], #0xf\n\t" "AND %[len], %[len], #0xf\n\t"
"ADD %[len], %[m], %[len], LSL #4\n\t" "ADD %[len], %[m], %[len], LSL #4\n\t"
"ldm %[len], {r8, r9, r10, r11}\n\t" "LDM %[len], {r8, r9, r10, r11}\n\t"
"LSR r6, r10, #4\n\t" "LSR r6, r10, #4\n\t"
"AND %[len], r11, #0xf\n\t" "AND %[len], r11, #0xf\n\t"
"LSR r11, r11, #4\n\t" "LSR r11, r11, #4\n\t"
@ -2580,7 +2580,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2597,7 +2597,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2614,7 +2614,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2631,7 +2631,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2648,7 +2648,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2664,7 +2664,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2681,7 +2681,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2703,7 +2703,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"LSR %[len], r12, #24\n\t" "LSR %[len], r12, #24\n\t"
"AND %[len], %[len], #0xf\n\t" "AND %[len], %[len], #0xf\n\t"
"ADD %[len], %[m], %[len], LSL #4\n\t" "ADD %[len], %[m], %[len], LSL #4\n\t"
"ldm %[len], {r4, r5, r6, r7}\n\t" "LDM %[len], {r4, r5, r6, r7}\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
"EOR r9, r9, r5\n\t" "EOR r9, r9, r5\n\t"
"EOR r10, r10, r6\n\t" "EOR r10, r10, r6\n\t"
@ -2717,7 +2717,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2734,7 +2734,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2751,7 +2751,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2768,7 +2768,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2785,7 +2785,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2801,7 +2801,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2818,7 +2818,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2840,7 +2840,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"LSR %[len], r12, #24\n\t" "LSR %[len], r12, #24\n\t"
"AND %[len], %[len], #0xf\n\t" "AND %[len], %[len], #0xf\n\t"
"ADD %[len], %[m], %[len], LSL #4\n\t" "ADD %[len], %[m], %[len], LSL #4\n\t"
"ldm %[len], {r4, r5, r6, r7}\n\t" "LDM %[len], {r4, r5, r6, r7}\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
"EOR r9, r9, r5\n\t" "EOR r9, r9, r5\n\t"
"EOR r10, r10, r6\n\t" "EOR r10, r10, r6\n\t"
@ -2854,7 +2854,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2871,7 +2871,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2888,7 +2888,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2905,7 +2905,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2922,7 +2922,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2938,7 +2938,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2955,7 +2955,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -2977,7 +2977,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"LSR %[len], r12, #24\n\t" "LSR %[len], r12, #24\n\t"
"AND %[len], %[len], #0xf\n\t" "AND %[len], %[len], #0xf\n\t"
"ADD %[len], %[m], %[len], LSL #4\n\t" "ADD %[len], %[m], %[len], LSL #4\n\t"
"ldm %[len], {r4, r5, r6, r7}\n\t" "LDM %[len], {r4, r5, r6, r7}\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
"EOR r9, r9, r5\n\t" "EOR r9, r9, r5\n\t"
"EOR r10, r10, r6\n\t" "EOR r10, r10, r6\n\t"
@ -2991,7 +2991,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -3008,7 +3008,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -3025,7 +3025,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -3042,7 +3042,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -3059,7 +3059,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -3075,7 +3075,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -3092,7 +3092,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m,
"ADD r4, %[m], r4, LSL #4\n\t" "ADD r4, %[m], r4, LSL #4\n\t"
"EOR r10, r6, r9, LSL #28\n\t" "EOR r10, r6, r9, LSL #28\n\t"
"LSR r9, r9, #4\n\t" "LSR r9, r9, #4\n\t"
"ldm r4, {r4, r5, r6, r7}\n\t" "LDM r4, {r4, r5, r6, r7}\n\t"
"EOR r9, r9, r8, LSL #28\n\t" "EOR r9, r9, r8, LSL #28\n\t"
"EOR r8, %[len], r8, LSR #4\n\t" "EOR r8, %[len], r8, LSR #4\n\t"
"EOR r8, r8, r4\n\t" "EOR r8, r8, r4\n\t"
@ -3164,7 +3164,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out,
#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */
"MOV lr, %[in]\n\t" "MOV lr, %[in]\n\t"
"MOV r0, %[L_AES_Thumb2_te_gcm]\n\t" "MOV r0, %[L_AES_Thumb2_te_gcm]\n\t"
"ldm r8, {r4, r5, r6, r7}\n\t" "LDM r8, {r4, r5, r6, r7}\n\t"
"REV r4, r4\n\t" "REV r4, r4\n\t"
"REV r5, r5\n\t" "REV r5, r5\n\t"
"REV r6, r6\n\t" "REV r6, r6\n\t"
@ -3196,7 +3196,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out,
"PUSH {r1, %[len], lr}\n\t" "PUSH {r1, %[len], lr}\n\t"
"LDR lr, [sp, #16]\n\t" "LDR lr, [sp, #16]\n\t"
"ADD r7, r7, #0x1\n\t" "ADD r7, r7, #0x1\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
"STR r7, [lr, #12]\n\t" "STR r7, [lr, #12]\n\t"
/* Round: 0 - XOR in key schedule */ /* Round: 0 - XOR in key schedule */
"EOR r4, r4, r8\n\t" "EOR r4, r4, r8\n\t"
@ -3224,7 +3224,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out,
"STR r5, [%[out], #4]\n\t" "STR r5, [%[out], #4]\n\t"
"STR r6, [%[out], #8]\n\t" "STR r6, [%[out], #8]\n\t"
"STR r7, [%[out], #12]\n\t" "STR r7, [%[out], #12]\n\t"
"ldm r8, {r4, r5, r6, r7}\n\t" "LDM r8, {r4, r5, r6, r7}\n\t"
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
@ -3257,7 +3257,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out,
"PUSH {r1, %[len], lr}\n\t" "PUSH {r1, %[len], lr}\n\t"
"LDR lr, [sp, #16]\n\t" "LDR lr, [sp, #16]\n\t"
"ADD r7, r7, #0x1\n\t" "ADD r7, r7, #0x1\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
"STR r7, [lr, #12]\n\t" "STR r7, [lr, #12]\n\t"
/* Round: 0 - XOR in key schedule */ /* Round: 0 - XOR in key schedule */
"EOR r4, r4, r8\n\t" "EOR r4, r4, r8\n\t"
@ -3285,7 +3285,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out,
"STR r5, [%[out], #4]\n\t" "STR r5, [%[out], #4]\n\t"
"STR r6, [%[out], #8]\n\t" "STR r6, [%[out], #8]\n\t"
"STR r7, [%[out], #12]\n\t" "STR r7, [%[out], #12]\n\t"
"ldm r8, {r4, r5, r6, r7}\n\t" "LDM r8, {r4, r5, r6, r7}\n\t"
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
@ -3318,7 +3318,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out,
"PUSH {r1, %[len], lr}\n\t" "PUSH {r1, %[len], lr}\n\t"
"LDR lr, [sp, #16]\n\t" "LDR lr, [sp, #16]\n\t"
"ADD r7, r7, #0x1\n\t" "ADD r7, r7, #0x1\n\t"
"ldm %[ks]!, {r8, r9, r10, r11}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t"
"STR r7, [lr, #12]\n\t" "STR r7, [lr, #12]\n\t"
/* Round: 0 - XOR in key schedule */ /* Round: 0 - XOR in key schedule */
"EOR r4, r4, r8\n\t" "EOR r4, r4, r8\n\t"
@ -3346,7 +3346,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out,
"STR r5, [%[out], #4]\n\t" "STR r5, [%[out], #4]\n\t"
"STR r6, [%[out], #8]\n\t" "STR r6, [%[out], #8]\n\t"
"STR r7, [%[out], #12]\n\t" "STR r7, [%[out], #12]\n\t"
"ldm r8, {r4, r5, r6, r7}\n\t" "LDM r8, {r4, r5, r6, r7}\n\t"
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"

View File

@ -103,7 +103,7 @@ void wc_chacha_setkey(word32* x, const byte* key, word32 keySz)
"SUBS %[keySz], %[keySz], #0x10\n\t" "SUBS %[keySz], %[keySz], #0x10\n\t"
"ADD r7, r7, %[keySz]\n\t" "ADD r7, r7, %[keySz]\n\t"
/* Start state with constants */ /* Start state with constants */
"ldm r7, {r3, r4, r5, r6}\n\t" "LDM r7, {r3, r4, r5, r6}\n\t"
"STM %[x]!, {r3, r4, r5, r6}\n\t" "STM %[x]!, {r3, r4, r5, r6}\n\t"
/* Next is first 16 bytes of key. */ /* Next is first 16 bytes of key. */
"LDR r3, [%[key]]\n\t" "LDR r3, [%[key]]\n\t"
@ -176,7 +176,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len)
"STRD r4, r5, [sp, #16]\n\t" "STRD r4, r5, [sp, #16]\n\t"
"STRD r6, r7, [sp, #24]\n\t" "STRD r6, r7, [sp, #24]\n\t"
/* Load x[0]..x[12] into registers. */ /* Load x[0]..x[12] into registers. */
"ldm lr, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12}\n\t" "LDM lr, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12}\n\t"
/* 10x 2 full rounds to perform. */ /* 10x 2 full rounds to perform. */
"MOV lr, #0xa\n\t" "MOV lr, #0xa\n\t"
"STR lr, [sp, #48]\n\t" "STR lr, [sp, #48]\n\t"
@ -315,35 +315,35 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len)
"LDR lr, [sp, #32]\n\t" "LDR lr, [sp, #32]\n\t"
"MOV r12, sp\n\t" "MOV r12, sp\n\t"
/* Add in original state */ /* Add in original state */
"ldm lr!, {r8, r9, r10, r11}\n\t" "LDM lr!, {r8, r9, r10, r11}\n\t"
"ADD %[ctx], %[ctx], r8\n\t" "ADD %[ctx], %[ctx], r8\n\t"
"ADD %[c], %[c], r9\n\t" "ADD %[c], %[c], r9\n\t"
"ADD %[m], %[m], r10\n\t" "ADD %[m], %[m], r10\n\t"
"ADD %[len], %[len], r11\n\t" "ADD %[len], %[len], r11\n\t"
"ldm lr!, {r8, r9, r10, r11}\n\t" "LDM lr!, {r8, r9, r10, r11}\n\t"
"ADD r4, r4, r8\n\t" "ADD r4, r4, r8\n\t"
"ADD r5, r5, r9\n\t" "ADD r5, r5, r9\n\t"
"ADD r6, r6, r10\n\t" "ADD r6, r6, r10\n\t"
"ADD r7, r7, r11\n\t" "ADD r7, r7, r11\n\t"
"ldm r12, {r8, r9}\n\t" "LDM r12, {r8, r9}\n\t"
"ldm lr!, {r10, r11}\n\t" "LDM lr!, {r10, r11}\n\t"
"ADD r8, r8, r10\n\t" "ADD r8, r8, r10\n\t"
"ADD r9, r9, r11\n\t" "ADD r9, r9, r11\n\t"
"STM r12!, {r8, r9}\n\t" "STM r12!, {r8, r9}\n\t"
"ldm r12, {r8, r9}\n\t" "LDM r12, {r8, r9}\n\t"
"ldm lr!, {r10, r11}\n\t" "LDM lr!, {r10, r11}\n\t"
"ADD r8, r8, r10\n\t" "ADD r8, r8, r10\n\t"
"ADD r9, r9, r11\n\t" "ADD r9, r9, r11\n\t"
"STM r12!, {r8, r9}\n\t" "STM r12!, {r8, r9}\n\t"
"ldm r12, {r8, r9}\n\t" "LDM r12, {r8, r9}\n\t"
"ldm lr!, {r10, r11}\n\t" "LDM lr!, {r10, r11}\n\t"
"ADD r8, r8, r10\n\t" "ADD r8, r8, r10\n\t"
"ADD r9, r9, r11\n\t" "ADD r9, r9, r11\n\t"
"ADD r10, r10, #0x1\n\t" "ADD r10, r10, #0x1\n\t"
"STM r12!, {r8, r9}\n\t" "STM r12!, {r8, r9}\n\t"
"STR r10, [lr, #-8]\n\t" "STR r10, [lr, #-8]\n\t"
"ldm r12, {r8, r9}\n\t" "LDM r12, {r8, r9}\n\t"
"ldm lr, {r10, r11}\n\t" "LDM lr, {r10, r11}\n\t"
"ADD r8, r8, r10\n\t" "ADD r8, r8, r10\n\t"
"ADD r9, r9, r11\n\t" "ADD r9, r9, r11\n\t"
"STM r12, {r8, r9}\n\t" "STM r12, {r8, r9}\n\t"
@ -447,7 +447,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len)
"LDR lr, [sp, #32]\n\t" "LDR lr, [sp, #32]\n\t"
"ADD r12, lr, #0x44\n\t" "ADD r12, lr, #0x44\n\t"
"STM r12!, {%[ctx], %[c], %[m], %[len], r4, r5, r6, r7}\n\t" "STM r12!, {%[ctx], %[c], %[m], %[len], r4, r5, r6, r7}\n\t"
"ldm sp, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" "LDM sp, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
"STM r12, {%[ctx], %[c], %[m], %[len], r4, r5, r6, r7}\n\t" "STM r12, {%[ctx], %[c], %[m], %[len], r4, r5, r6, r7}\n\t"
"LDRD %[m], %[len], [sp, #40]\n\t" "LDRD %[m], %[len], [sp, #40]\n\t"
"LDR %[c], [sp, #36]\n\t" "LDR %[c], [sp, #36]\n\t"
@ -469,7 +469,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len)
"BLT.N L_chacha_thumb2_crypt_word_loop_%=\n\t" "BLT.N L_chacha_thumb2_crypt_word_loop_%=\n\t"
#endif #endif
/* 16 bytes of state XORed into message. */ /* 16 bytes of state XORed into message. */
"ldm lr!, {r4, r5, r6, r7}\n\t" "LDM lr!, {r4, r5, r6, r7}\n\t"
"LDR r8, [%[m]]\n\t" "LDR r8, [%[m]]\n\t"
"LDR r9, [%[m], #4]\n\t" "LDR r9, [%[m], #4]\n\t"
"LDR r10, [%[m], #8]\n\t" "LDR r10, [%[m], #8]\n\t"

File diff suppressed because it is too large Load Diff

View File

@ -3110,8 +3110,8 @@ void mlkem_thumb2_basemul_mont(sword16* r, const sword16* a, const sword16* b)
#else #else
"L_mlkem_basemul_mont_loop_%=:\n\t" "L_mlkem_basemul_mont_loop_%=:\n\t"
#endif #endif
"ldm %[a]!, {r4, r5}\n\t" "LDM %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r7}\n\t" "LDM %[b]!, {r6, r7}\n\t"
"LDR lr, [r3, r8]\n\t" "LDR lr, [r3, r8]\n\t"
"ADD r8, r8, #0x2\n\t" "ADD r8, r8, #0x2\n\t"
"PUSH {r8}\n\t" "PUSH {r8}\n\t"
@ -3261,8 +3261,8 @@ void mlkem_thumb2_basemul_mont_add(sword16* r, const sword16* a,
#else #else
"L_mlkem_thumb2_basemul_mont_add_loop_%=:\n\t" "L_mlkem_thumb2_basemul_mont_add_loop_%=:\n\t"
#endif #endif
"ldm %[a]!, {r4, r5}\n\t" "LDM %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r7}\n\t" "LDM %[b]!, {r6, r7}\n\t"
"LDR lr, [r3, r8]\n\t" "LDR lr, [r3, r8]\n\t"
"ADD r8, r8, #0x2\n\t" "ADD r8, r8, #0x2\n\t"
"PUSH {r8}\n\t" "PUSH {r8}\n\t"
@ -3291,7 +3291,7 @@ void mlkem_thumb2_basemul_mont_add(sword16* r, const sword16* a,
"SMULTB r7, r12, r11\n\t" "SMULTB r7, r12, r11\n\t"
"SMLABB r9, r12, r6, r9\n\t" "SMLABB r9, r12, r6, r9\n\t"
"SMLABB r11, r12, r7, r11\n\t" "SMLABB r11, r12, r7, r11\n\t"
"ldm %[r], {r4, r5}\n\t" "LDM %[r], {r4, r5}\n\t"
"PKHTB r9, r9, r8, ASR #16\n\t" "PKHTB r9, r9, r8, ASR #16\n\t"
"PKHTB r11, r11, r10, ASR #16\n\t" "PKHTB r11, r11, r10, ASR #16\n\t"
"SADD16 r4, r4, r9\n\t" "SADD16 r4, r4, r9\n\t"
@ -3358,7 +3358,7 @@ void mlkem_thumb2_basemul_mont_add(sword16* r, const sword16* a,
"SBFX r5, r7, #0, #16\n\t" "SBFX r5, r7, #0, #16\n\t"
"MLA r9, r12, r4, r9\n\t" "MLA r9, r12, r4, r9\n\t"
"MLA r11, r12, r5, r11\n\t" "MLA r11, r12, r5, r11\n\t"
"ldm %[r], {r4, r5}\n\t" "LDM %[r], {r4, r5}\n\t"
"BFC r9, #0, #16\n\t" "BFC r9, #0, #16\n\t"
"BFC r11, #0, #16\n\t" "BFC r11, #0, #16\n\t"
"ORR r9, r9, r8, LSR #16\n\t" "ORR r9, r9, r8, LSR #16\n\t"
@ -3421,7 +3421,7 @@ void mlkem_thumb2_csubq(sword16* p)
#else #else
"L_mlkem_thumb2_csubq_loop_%=:\n\t" "L_mlkem_thumb2_csubq_loop_%=:\n\t"
#endif #endif
"ldm %[p], {r2, r3, r4, r5}\n\t" "LDM %[p], {r2, r3, r4, r5}\n\t"
#ifndef WOLFSSL_ARM_ARCH_7M #ifndef WOLFSSL_ARM_ARCH_7M
"SSUB16 r2, r2, r12\n\t" "SSUB16 r2, r2, r12\n\t"
"SSUB16 r3, r3, r12\n\t" "SSUB16 r3, r3, r12\n\t"
@ -3541,7 +3541,7 @@ unsigned int mlkem_thumb2_rej_uniform(sword16* p, unsigned int len,
#else #else
"BLT.N L_mlkem_thumb2_rej_uniform_done_no_fail_%=\n\t" "BLT.N L_mlkem_thumb2_rej_uniform_done_no_fail_%=\n\t"
#endif #endif
"ldm %[r]!, {r4, r5, r6}\n\t" "LDM %[r]!, {r4, r5, r6}\n\t"
"UBFX r7, r4, #0, #12\n\t" "UBFX r7, r4, #0, #12\n\t"
"STRH r7, [%[p], r9]\n\t" "STRH r7, [%[p], r9]\n\t"
"SUB r10, r7, r8\n\t" "SUB r10, r7, r8\n\t"
@ -3627,7 +3627,7 @@ unsigned int mlkem_thumb2_rej_uniform(sword16* p, unsigned int len,
#else #else
"L_mlkem_thumb2_rej_uniform_loop_%=:\n\t" "L_mlkem_thumb2_rej_uniform_loop_%=:\n\t"
#endif #endif
"ldm %[r]!, {r4, r5, r6}\n\t" "LDM %[r]!, {r4, r5, r6}\n\t"
"UBFX r7, r4, #0, #12\n\t" "UBFX r7, r4, #0, #12\n\t"
"CMP r7, r8\n\t" "CMP r7, r8\n\t"
#if defined(__GNUC__) #if defined(__GNUC__)

View File

@ -73,7 +73,7 @@ void poly1305_blocks_thumb2_16(Poly1305* ctx, const byte* m, word32 len,
"STM lr, {%[ctx], %[m], %[len], %[notLast]}\n\t" "STM lr, {%[ctx], %[m], %[len], %[notLast]}\n\t"
/* Get h pointer */ /* Get h pointer */
"ADD lr, %[ctx], #0x10\n\t" "ADD lr, %[ctx], #0x10\n\t"
"ldm lr, {r4, r5, r6, r7, r8}\n\t" "LDM lr, {r4, r5, r6, r7, r8}\n\t"
"\n" "\n"
#if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000)
"L_poly1305_thumb2_16_loop:\n\t" "L_poly1305_thumb2_16_loop:\n\t"
@ -195,7 +195,7 @@ void poly1305_blocks_thumb2_16(Poly1305* ctx, const byte* m, word32 len,
"MOV r12, %[ctx]\n\t" "MOV r12, %[ctx]\n\t"
"MLA r11, %[notLast], %[len], r11\n\t" "MLA r11, %[notLast], %[len], r11\n\t"
#else #else
"ldm %[m], {r0, r1, r2, r3}\n\t" "LDM %[m], {r0, r1, r2, r3}\n\t"
/* r[0] * h[0] */ /* r[0] * h[0] */
"UMULL r10, r11, %[ctx], r4\n\t" "UMULL r10, r11, %[ctx], r4\n\t"
/* r[1] * h[0] */ /* r[1] * h[0] */
@ -243,7 +243,7 @@ void poly1305_blocks_thumb2_16(Poly1305* ctx, const byte* m, word32 len,
/* r[3] * h[4] */ /* r[3] * h[4] */
"UMAAL r11, r12, %[notLast], r5\n\t" "UMAAL r11, r12, %[notLast], r5\n\t"
/* DONE */ /* DONE */
"ldm sp, {r4, r5, r6}\n\t" "LDM sp, {r4, r5, r6}\n\t"
#endif /* WOLFSSL_ARM_ARCH_7M */ #endif /* WOLFSSL_ARM_ARCH_7M */
/* r12 will be zero because r is masked. */ /* r12 will be zero because r is masked. */
/* Load length */ /* Load length */
@ -321,7 +321,7 @@ void poly1305_set_key(Poly1305* ctx, const byte* key)
__asm__ __volatile__ ( __asm__ __volatile__ (
/* Load mask. */ /* Load mask. */
"MOV r10, %[L_poly1305_thumb2_clamp]\n\t" "MOV r10, %[L_poly1305_thumb2_clamp]\n\t"
"ldm r10, {r6, r7, r8, r9}\n\t" "LDM r10, {r6, r7, r8, r9}\n\t"
/* Load and cache padding. */ /* Load and cache padding. */
"LDR r2, [%[key], #16]\n\t" "LDR r2, [%[key], #16]\n\t"
"LDR r3, [%[key], #20]\n\t" "LDR r3, [%[key], #20]\n\t"
@ -370,7 +370,7 @@ void poly1305_final(Poly1305* ctx, byte* mac)
__asm__ __volatile__ ( __asm__ __volatile__ (
"ADD r11, %[ctx], #0x10\n\t" "ADD r11, %[ctx], #0x10\n\t"
"ldm r11, {r2, r3, r4, r5, r6}\n\t" "LDM r11, {r2, r3, r4, r5, r6}\n\t"
/* Add 5 and check for h larger than p. */ /* Add 5 and check for h larger than p. */
"ADDS r7, r2, #0x5\n\t" "ADDS r7, r2, #0x5\n\t"
"ADCS r7, r3, #0x0\n\t" "ADCS r7, r3, #0x0\n\t"
@ -388,7 +388,7 @@ void poly1305_final(Poly1305* ctx, byte* mac)
"ADC r5, r5, #0x0\n\t" "ADC r5, r5, #0x0\n\t"
/* Add padding */ /* Add padding */
"ADD r11, %[ctx], #0x24\n\t" "ADD r11, %[ctx], #0x24\n\t"
"ldm r11, {r7, r8, r9, r10}\n\t" "LDM r11, {r7, r8, r9, r10}\n\t"
"ADDS r2, r2, r7\n\t" "ADDS r2, r2, r7\n\t"
"ADCS r3, r3, r8\n\t" "ADCS r3, r3, r8\n\t"
"ADCS r4, r4, r9\n\t" "ADCS r4, r4, r9\n\t"

File diff suppressed because it is too large Load Diff

View File

@ -109,9 +109,9 @@ static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
} }
#else #else
switch (i) { switch (i) {
case 2: d[2] = a[2]; //fallthrough case 2: d[i-2] = a[2]; //fallthrough
case 1: d[1] = a[1]; //fallthrough case 1: d[i-1] = a[1]; //fallthrough
case 0: d[0] = a[0]; //fallthrough case 0: d[i-0] = a[0]; //fallthrough
} }
#endif #endif
j++; j++;
@ -30300,9 +30300,9 @@ static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
} }
#else #else
switch (i) { switch (i) {
case 2: d[2] = a[2]; //fallthrough case 2: d[i-2] = a[2]; //fallthrough
case 1: d[1] = a[1]; //fallthrough case 1: d[i-1] = a[1]; //fallthrough
case 0: d[0] = a[0]; //fallthrough case 0: d[i-0] = a[0]; //fallthrough
} }
#endif #endif
j++; j++;
@ -83394,9 +83394,9 @@ static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
} }
#else #else
switch (i) { switch (i) {
case 2: d[2] = a[2]; //fallthrough case 2: d[i-2] = a[2]; //fallthrough
case 1: d[1] = a[1]; //fallthrough case 1: d[i-1] = a[1]; //fallthrough
case 0: d[0] = a[0]; //fallthrough case 0: d[i-0] = a[0]; //fallthrough
} }
#endif #endif
j++; j++;
@ -103938,9 +103938,9 @@ static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
} }
#else #else
switch (i) { switch (i) {
case 2: d[2] = a[2]; //fallthrough case 2: d[i-2] = a[2]; //fallthrough
case 1: d[1] = a[1]; //fallthrough case 1: d[i-1] = a[1]; //fallthrough
case 0: d[0] = a[0]; //fallthrough case 0: d[i-0] = a[0]; //fallthrough
} }
#endif #endif
j++; j++;
@ -114847,9 +114847,9 @@ static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
} }
#else #else
switch (i) { switch (i) {
case 2: d[2] = a[2]; //fallthrough case 2: d[i-2] = a[2]; //fallthrough
case 1: d[1] = a[1]; //fallthrough case 1: d[i-1] = a[1]; //fallthrough
case 0: d[0] = a[0]; //fallthrough case 0: d[i-0] = a[0]; //fallthrough
} }
#endif #endif
j++; j++;
@ -129089,9 +129089,9 @@ static void sp_521_from_bin(sp_digit* r, int size, const byte* a, int n)
} }
#else #else
switch (i) { switch (i) {
case 2: d[2] = a[2]; //fallthrough case 2: d[i-2] = a[2]; //fallthrough
case 1: d[1] = a[1]; //fallthrough case 1: d[i-1] = a[1]; //fallthrough
case 0: d[0] = a[0]; //fallthrough case 0: d[i-0] = a[0]; //fallthrough
} }
#endif #endif
j++; j++;
@ -218532,9 +218532,9 @@ static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n)
} }
#else #else
switch (i) { switch (i) {
case 2: d[2] = a[2]; //fallthrough case 2: d[i-2] = a[2]; //fallthrough
case 1: d[1] = a[1]; //fallthrough case 1: d[i-1] = a[1]; //fallthrough
case 0: d[0] = a[0]; //fallthrough case 0: d[i-0] = a[0]; //fallthrough
} }
#endif #endif
j++; j++;

File diff suppressed because it is too large Load Diff