diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c index 3c34f3ef6..cb921f594 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c @@ -406,9 +406,9 @@ void AES_invert_key(unsigned char* ks_p, word32 rounds_p) "str r8, [%[ks]], #4\n\t" "subs r11, r11, #1\n\t" "bne L_AES_invert_key_mix_loop_%=\n\t" - : [ks] "+r" (ks), [rounds] "+r" (rounds), - [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), - [L_AES_ARM32_td] "+r" (L_AES_ARM32_td_c) + : [ks] "+r" (ks), [rounds] "+r" (rounds), + [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), + [L_AES_ARM32_td] "+r" (L_AES_ARM32_td_c) : : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -933,9 +933,9 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, "bne L_AES_set_encrypt_key_loop_128_%=\n\t" "\n" "L_AES_set_encrypt_key_end_%=: \n\t" - : [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), - [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), - [L_AES_ARM32_rcon] "+r" (L_AES_ARM32_rcon_c) + : [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), + [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), + [L_AES_ARM32_rcon] "+r" (L_AES_ARM32_rcon_c) : : "memory", "cc", "r12", "lr", "r5", "r6", "r7", "r8" ); @@ -1588,7 +1588,7 @@ void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, "eor r5, r5, r9\n\t" "eor r6, r6, r10\n\t" "eor r7, r7, r11\n\t" - : [te] "+r" (te), [nr] "+r" (nr), [len] "+r" (len), [ks] "+r" (ks) + : [te] "+r" (te), [nr] "+r" (nr), [len] "+r" (len), [ks] "+r" (ks) : : "memory", "cc", "lr" ); @@ -1841,8 +1841,8 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, "\n" "L_AES_ECB_encrypt_end_%=: \n\t" "pop {%[ks]}\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), - [nr] "+r" (nr), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c) + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), + [nr] "+r" (nr), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c) : : "memory", "cc", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -2114,9 +2114,9 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, "L_AES_CBC_encrypt_end_%=: \n\t" "pop {%[ks], r9}\n\t" "stm r9, {r4, r5, r6, r7}\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), - [nr] "+r" (nr), [iv] "+r" (iv), - [L_AES_ARM32_te_cbc] "+r" (L_AES_ARM32_te_cbc_c) + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), + [nr] "+r" (nr), [iv] "+r" (iv), + [L_AES_ARM32_te_cbc] "+r" (L_AES_ARM32_te_cbc_c) : : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); @@ -2389,9 +2389,9 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, "rev r7, r7\n\t" #endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ "stm r8, {r4, r5, r6, r7}\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), - [nr] "+r" (nr), [ctr] "+r" (ctr), - [L_AES_ARM32_te_ctr] "+r" (L_AES_ARM32_te_ctr_c) + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), + [nr] "+r" (nr), [ctr] "+r" (ctr), + [L_AES_ARM32_te_ctr] "+r" (L_AES_ARM32_te_ctr_c) : : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); @@ -3045,7 +3045,7 @@ void AES_decrypt_block(const uint32_t* td_p, int nr_p, const uint8_t* td4_p) "eor r5, r5, r9\n\t" "eor r6, r6, r10\n\t" "eor r7, r7, r11\n\t" - : [td] "+r" (td), [nr] "+r" (nr), [td4] "+r" (td4) + : [td] "+r" (td), [nr] "+r" (nr), [td4] "+r" (td4) : : "memory", "cc", "lr" ); @@ -3331,9 +3331,9 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, "bne L_AES_ECB_decrypt_loop_block_128_%=\n\t" "\n" "L_AES_ECB_decrypt_end_%=: \n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), - [nr] "+r" (nr), [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), - [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c) + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), + [nr] "+r" (nr), [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), + [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c) : : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); @@ -3971,10 +3971,10 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, "\n" "L_AES_CBC_decrypt_end_%=: \n\t" "pop {%[ks]-r4}\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), - [nr] "+r" (nr), [iv] "+r" (iv), - [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), - [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c) + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), + [nr] "+r" (nr), [iv] "+r" (iv), + [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), + [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c) : : "memory", "cc", "r12", "lr", "r8", "r9", "r10", "r11" ); @@ -4576,8 +4576,8 @@ void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, "subs %[len], %[len], #16\n\t" "add %[data], %[data], #16\n\t" "bne L_GCM_gmult_len_start_block_%=\n\t" - : [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), - [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c) + : [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), + [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c) : : "memory", "cc", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -4840,9 +4840,9 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, "rev r7, r7\n\t" #endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */ "stm r8, {r4, r5, r6, r7}\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), - [nr] "+r" (nr), [ctr] "+r" (ctr), - [L_AES_ARM32_te_gcm] "+r" (L_AES_ARM32_te_gcm_c) + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), + [nr] "+r" (nr), [ctr] "+r" (ctr), + [L_AES_ARM32_te_gcm] "+r" (L_AES_ARM32_te_gcm_c) : : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); diff --git a/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c index aebcff155..5d5d17f4c 100644 --- a/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c @@ -71,7 +71,7 @@ void wc_chacha_setiv(word32* x_p, const byte* iv_p, word32 counter_p) "rev lr, lr\n\t" #endif /* BIG_ENDIAN_ORDER */ "stm r3, {r4, r12, lr}\n\t" - : [x] "+r" (x), [iv] "+r" (iv), [counter] "+r" (counter) + : [x] "+r" (x), [iv] "+r" (iv), [counter] "+r" (counter) : : "memory", "cc", "r3", "r12", "lr", "r4" ); @@ -119,8 +119,8 @@ void wc_chacha_setkey(word32* x_p, const byte* key_p, word32 keySz_p) "\n" "L_chacha_arm32_setkey_same_keyb_ytes_%=: \n\t" "stm %[x], {r4, r5, r12, lr}\n\t" - : [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz), - [L_chacha_arm32_constants] "+r" (L_chacha_arm32_constants_c) + : [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz), + [L_chacha_arm32_constants] "+r" (L_chacha_arm32_constants_c) : : "memory", "cc", "r12", "lr", "r4", "r5" ); @@ -484,7 +484,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx_p, byte* c_p, const byte* m_p, "\n" "L_chacha_arm32_crypt_done_%=: \n\t" "add sp, sp, #52\n\t" - : [ctx] "+r" (ctx), [c] "+r" (c), [m] "+r" (m), [len] "+r" (len) + : [ctx] "+r" (ctx), [c] "+r" (c), [m] "+r" (m), [len] "+r" (len) : : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -557,8 +557,8 @@ void wc_chacha_use_over(byte* over_p, byte* output_p, const byte* input_p, "b L_chacha_arm32_over_byte_loop_%=\n\t" "\n" "L_chacha_arm32_over_done_%=: \n\t" - : [over] "+r" (over), [output] "+r" (output), [input] "+r" (input), - [len] "+r" (len) + : [over] "+r" (over), [output] "+r" (output), [input] "+r" (input), + [len] "+r" (len) : : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9" ); diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c index 6871aaade..e3b52140c 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c @@ -335,7 +335,7 @@ void fe_sub(fe r_p, const fe a_p, const fe b_p) __asm__ __volatile__ ( "bl fe_sub_op\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" @@ -393,7 +393,7 @@ void fe_add(fe r_p, const fe a_p, const fe b_p) __asm__ __volatile__ ( "bl fe_add_op\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" @@ -428,7 +428,7 @@ void fe_frombytes(fe out_p, const unsigned char* in_p) "str r7, [%[out], #20]\n\t" "str r8, [%[out], #24]\n\t" "str r9, [%[out], #28]\n\t" - : [out] "+r" (out), [in] "+r" (in) + : [out] "+r" (out), [in] "+r" (in) : : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); @@ -472,7 +472,7 @@ void fe_tobytes(unsigned char* out_p, const fe n_p) "str r7, [%[out], #20]\n\t" "str r8, [%[out], #24]\n\t" "str r9, [%[out], #28]\n\t" - : [out] "+r" (out), [n] "+r" (n) + : [out] "+r" (out), [n] "+r" (n) : : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12" ); @@ -575,7 +575,7 @@ void fe_copy(fe r_p, const fe a_p) #else "strd r4, r5, [%[r], #24]\n\t" #endif - : [r] "+r" (r), [a] "+r" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "cc", "r2", "r3", "r4", "r5" ); @@ -602,7 +602,7 @@ void fe_neg(fe r_p, const fe a_p) "sbcs r4, lr, r4\n\t" "sbc r5, r12, r5\n\t" "stm %[r]!, {r2, r3, r4, r5}\n\t" - : [r] "+r" (r), [a] "+r" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "cc", "r2", "r3", "r4", "r5", "r12", "lr" ); @@ -2407,7 +2407,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) #else "strd r8, r9, [%[r], #88]\n\t" #endif - : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) + : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) : : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r3", "r10", "r11", "r12", "lr" @@ -2528,7 +2528,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) "and r7, r7, lr\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "sub %[base], %[base], %[b]\n\t" - : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) + : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) : : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" @@ -3074,7 +3074,7 @@ void fe_mul(fe r_p, const fe a_p, const fe b_p) __asm__ __volatile__ ( "bl fe_mul_op\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" @@ -3495,7 +3495,7 @@ void fe_sq(fe r_p, const fe a_p) __asm__ __volatile__ ( "bl fe_sq_op\n\t" - : [r] "+r" (r), [a] "+r" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "r11" @@ -3572,7 +3572,7 @@ void fe_mul121666(fe r_p, fe a_p) "adcs r8, r8, #0\n\t" "adc r9, r9, #0\n\t" "stm %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" - : [r] "+r" (r), [a] "+r" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10" @@ -3635,7 +3635,7 @@ void fe_mul121666(fe r_p, fe a_p) "adcs r8, r8, #0\n\t" "adc r9, r9, #0\n\t" "stm %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" - : [r] "+r" (r), [a] "+r" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10" @@ -4026,7 +4026,7 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "bl fe_mul_op\n\t" "mov r0, #0\n\t" "add sp, sp, #0xbc\n\t" - : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) + : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) : : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr" @@ -4340,7 +4340,7 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "mov r0, #0\n\t" "add sp, sp, #0xc0\n\t" - : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) + : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) : : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr" @@ -4515,7 +4515,7 @@ void fe_invert(fe r_p, const fe a_p) "ldr %[a], [sp, #132]\n\t" "ldr %[r], [sp, #128]\n\t" "add sp, sp, #0x88\n\t" - : [r] "+r" (r), [a] "+r" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "cc", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -4836,7 +4836,7 @@ void fe_sq2(fe r_p, const fe a_p) "ldr r0, [sp, #64]\n\t" "stm r0, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" "add sp, sp, #0x44\n\t" - : [r] "+r" (r), [a] "+r" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "cc", "lr" ); @@ -5015,7 +5015,7 @@ void fe_sq2(fe r_p, const fe a_p) "stm r12, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" "mov r0, r12\n\t" "mov r1, lr\n\t" - : [r] "+r" (r), [a] "+r" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "cc", "lr" ); @@ -5186,7 +5186,7 @@ void fe_pow22523(fe r_p, const fe a_p) "ldr %[a], [sp, #100]\n\t" "ldr %[r], [sp, #96]\n\t" "add sp, sp, #0x68\n\t" - : [r] "+r" (r), [a] "+r" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "cc", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -5217,7 +5217,7 @@ void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p) "add r0, r0, #0x40\n\t" "bl fe_mul_op\n\t" "add sp, sp, #8\n\t" - : [r] "+r" (r), [p] "+r" (p) + : [r] "+r" (r), [p] "+r" (p) : : "memory", "cc", "lr", "r2", "r3", "r12", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -5253,7 +5253,7 @@ void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p) "add r0, r0, #0x60\n\t" "bl fe_mul_op\n\t" "add sp, sp, #8\n\t" - : [r] "+r" (r), [p] "+r" (p) + : [r] "+r" (r), [p] "+r" (p) : : "memory", "cc", "lr", "r2", "r3", "r12", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -5301,7 +5301,7 @@ void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p) "mov r1, r0\n\t" "bl fe_sub_op\n\t" "add sp, sp, #8\n\t" - : [r] "+r" (r), [p] "+r" (p) + : [r] "+r" (r), [p] "+r" (p) : : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" @@ -5388,7 +5388,7 @@ void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) "add r1, r0, #32\n\t" "bl fe_add_sub_op\n\t" "add sp, sp, #12\n\t" - : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) + : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" @@ -5476,7 +5476,7 @@ void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) "add r0, r0, #32\n\t" "bl fe_add_sub_op\n\t" "add sp, sp, #12\n\t" - : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) + : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" @@ -5564,7 +5564,7 @@ void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) "add r0, r0, #32\n\t" "bl fe_add_sub_op\n\t" "add sp, sp, #44\n\t" - : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) + : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" @@ -5652,7 +5652,7 @@ void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) "add r0, r0, #0x40\n\t" "bl fe_add_sub_op\n\t" "add sp, sp, #44\n\t" - : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) + : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" @@ -8528,7 +8528,7 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "str r8, [%[s], #24]\n\t" "str r9, [%[s], #28]\n\t" "add sp, sp, #0x50\n\t" - : [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c) + : [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c) : : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" @@ -9413,7 +9413,7 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) "str r8, [%[s], #24]\n\t" "str r9, [%[s], #28]\n\t" "add sp, sp, #0x50\n\t" - : [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c) + : [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c) : : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" diff --git a/wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c index 4650b9bc8..4e5081d47 100644 --- a/wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c @@ -3312,7 +3312,7 @@ void kyber_arm32_ntt(sword16* r_p) "bne L_kyber_arm32_ntt_loop_567_%=\n\t" "add sp, sp, #8\n\t" : [r] "+r" (r), - [L_kyber_arm32_ntt_zetas] "+r" (L_kyber_arm32_ntt_zetas_c) + [L_kyber_arm32_ntt_zetas] "+r" (L_kyber_arm32_ntt_zetas_c) : : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -8076,7 +8076,7 @@ void kyber_arm32_invntt(sword16* r_p) "bne L_kyber_arm32_invntt_loop_321_%=\n\t" "add sp, sp, #8\n\t" : [r] "+r" (r), - [L_kyber_arm32_invntt_zetas_inv] "+r" (L_kyber_arm32_invntt_zetas_inv_c) + [L_kyber_arm32_invntt_zetas_inv] "+r" (L_kyber_arm32_invntt_zetas_inv_c) : : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -8405,8 +8405,8 @@ void kyber_arm32_basemul_mont(sword16* r_p, const sword16* a_p, "stm %[r]!, {r4, r5}\n\t" "pop {r8}\n\t" "bne L_kyber_arm32_basemul_mont_loop_%=\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), - [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), + [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) : : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -8738,8 +8738,8 @@ void kyber_arm32_basemul_mont_add(sword16* r_p, const sword16* a_p, "stm %[r]!, {r4, r5}\n\t" "pop {r8}\n\t" "bne L_kyber_arm32_basemul_mont_add_loop_%=\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), - [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), + [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) : : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -8948,7 +8948,7 @@ void kyber_arm32_csubq(sword16* p_p) "subs r1, r1, #8\n\t" "bne L_kyber_arm32_csubq_loop_%=\n\t" : [p] "+r" (p), - [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) + [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) : : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -9220,8 +9220,8 @@ unsigned int kyber_arm32_rej_uniform(sword16* p_p, unsigned int len_p, "\n" "L_kyber_arm32_rej_uniform_done_%=: \n\t" "lsr r0, r12, #1\n\t" - : [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen), - [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) + : [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen), + [L_kyber_arm32_basemul_mont_zetas] "+r" (L_kyber_arm32_basemul_mont_zetas_c) : : "memory", "cc", "r12", "lr", "r5", "r6", "r7", "r8" ); diff --git a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c index aaf596d4c..97bd63aaa 100644 --- a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c @@ -270,8 +270,8 @@ void poly1305_blocks_arm32_16(Poly1305* ctx_p, const byte* m_p, word32 len_p, "\n" "L_poly1305_arm32_16_done_%=: \n\t" "add sp, sp, #28\n\t" - : [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len), - [notLast] "+r" (notLast) + : [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len), + [notLast] "+r" (notLast) : : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -321,8 +321,8 @@ void poly1305_set_key(Poly1305* ctx_p, const byte* key_p) "stm lr, {r5, r6, r7, r8, r12}\n\t" /* Zero leftover */ "str r5, [%[ctx], #52]\n\t" - : [ctx] "+r" (ctx), [key] "+r" (key), - [L_poly1305_arm32_clamp] "+r" (L_poly1305_arm32_clamp_c) + : [ctx] "+r" (ctx), [key] "+r" (key), + [L_poly1305_arm32_clamp] "+r" (L_poly1305_arm32_clamp_c) : : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); @@ -377,7 +377,7 @@ void poly1305_final(Poly1305* ctx_p, byte* mac_p) /* Zero out padding. */ "add r9, %[ctx], #36\n\t" "stm r9, {r4, r5, r6, r7}\n\t" - : [ctx] "+r" (ctx), [mac] "+r" (mac) + : [ctx] "+r" (ctx), [mac] "+r" (mac) : : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9" diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c index 05086af07..f21b5317b 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c @@ -1732,8 +1732,8 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "add %[data], %[data], #0x40\n\t" "bne L_SHA256_transform_len_begin_%=\n\t" "add sp, sp, #0xc0\n\t" - : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), - [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c) + : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), + [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c) : : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" @@ -2797,8 +2797,8 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "str r10, [sp, #8]\n\t" "bne L_SHA256_transform_neon_len_begin_%=\n\t" "add sp, sp, #24\n\t" - : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), - [L_SHA256_transform_neon_len_k] "+r" (L_SHA256_transform_neon_len_k_c) + : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), + [L_SHA256_transform_neon_len_k] "+r" (L_SHA256_transform_neon_len_k_c) : : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c index e9e227ec3..170fcfd8b 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c @@ -334,7 +334,7 @@ void BlockSha3(word64* state_p) "vst1.8 {d24}, [%[state]]\n\t" "add sp, sp, #16\n\t" : [state] "+r" (state), - [L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c) + [L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c) : : "memory", "cc", "r2", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c index 32506f64e..f64ef3ce5 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c @@ -7601,8 +7601,8 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "bne L_SHA512_transform_len_begin_%=\n\t" "eor r0, r0, r0\n\t" "add sp, sp, #0xc0\n\t" - : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), - [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c) + : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), + [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c) : : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" @@ -9154,8 +9154,8 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "subs %[len], %[len], #0x80\n\t" "sub r3, r3, #0x280\n\t" "bne L_SHA512_transform_neon_len_begin_%=\n\t" - : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), - [L_SHA512_transform_neon_len_k] "+r" (L_SHA512_transform_neon_len_k_c) + : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), + [L_SHA512_transform_neon_len_k] "+r" (L_SHA512_transform_neon_len_k_c) : : "memory", "cc", "r12", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "q8", "q9", diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index c8ecf47ec..68449bebd 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -2231,7 +2231,8 @@ static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ "stm %[r]!, {r3, r4, r5, r6}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", + "r12" ); } @@ -2584,7 +2585,8 @@ static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ "add sp, sp, #36\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -2610,7 +2612,7 @@ static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ "strd %[r], %[a], [sp, #36]\n\t" #endif "mov lr, %[b]\n\t" - "ldm %[a], {%[r], %[a], %[b], r3}\n\t" + "ldm %[a], {r0, r1, r2, r3}\n\t" "ldm lr!, {r4, r5, r6}\n\t" "umull r10, r11, %[r], r4\n\t" "umull r12, r7, %[a], r4\n\t" @@ -2655,7 +2657,7 @@ static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ "umaal r4, r6, %[b], r7\n\t" "sub lr, lr, #16\n\t" "umaal r5, r6, r3, r7\n\t" - "ldm %[r], {%[r], %[a], %[b], r3}\n\t" + "ldm %[r], {r0, r1, r2, r3}\n\t" "str r6, [sp, #32]\n\t" "ldm lr!, {r6}\n\t" "mov r7, #0\n\t" @@ -2715,7 +2717,8 @@ static void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ "add sp, sp, #44\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r10", "r11", "r12", "r7", "r8", "r9", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r10", "r11", "r12", "r7", + "r8", "r9", "lr" ); } @@ -2751,7 +2754,7 @@ static sp_digit sp_2048_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "adc %[r], %[r], #0\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -2798,7 +2801,7 @@ static sp_digit sp_2048_sub_in_place_16(sp_digit* a_p, const sp_digit* b_p) "sbc %[a], r9, r9\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (uint32_t)(size_t)a; } @@ -2848,7 +2851,7 @@ static sp_digit sp_2048_add_16(sp_digit* r_p, const sp_digit* a_p, const sp_digi "adc %[r], %[r], #0\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -2988,7 +2991,7 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) "sbc %[a], r9, r9\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (uint32_t)(size_t)a; } @@ -3066,7 +3069,7 @@ static sp_digit sp_2048_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digi "adc %[r], %[r], #0\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -3266,7 +3269,7 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p) "sbc %[a], r9, r9\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (uint32_t)(size_t)a; } @@ -3400,7 +3403,7 @@ static sp_digit sp_2048_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digi "adc %[r], %[r], #0\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -4680,7 +4683,8 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) "stm %[r]!, {r2, r3, r4, r8}\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r12" ); } @@ -4923,7 +4927,8 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) "add sp, sp, #0x44\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -4941,7 +4946,7 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) __asm__ __volatile__ ( "sub sp, sp, #32\n\t" "str %[r], [sp, #28]\n\t" - "ldm %[a], {%[r], %[a], r2, r3, r4, r5, r6, r7}\n\t" + "ldm %[a], {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" "umull r9, r10, %[r], %[r]\n\t" "umull r11, r12, %[r], %[a]\n\t" "adds r11, r11, r11\n\t" @@ -5028,18 +5033,19 @@ static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) /* R[15] = r7 */ "ldr lr, [sp, #28]\n\t" "add lr, lr, #28\n\t" - "stm lr!, {%[r], r12}\n\t" + "stm lr!, {r0, r12}\n\t" "stm lr!, {r11}\n\t" "stm lr!, {r10}\n\t" "stm lr!, {r3, r4, r8, r9}\n\t" "stm lr!, {r7}\n\t" "sub lr, lr, #0x40\n\t" - "ldm sp, {%[r], %[a], r2, r3, r4, r5, r6}\n\t" - "stm lr, {%[r], %[a], r2, r3, r4, r5, r6}\n\t" + "ldm sp, {r0, r1, r2, r3, r4, r5, r6}\n\t" + "stm lr, {r0, r1, r2, r3, r4, r5, r6}\n\t" "add sp, sp, #32\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -5074,7 +5080,7 @@ static sp_digit sp_2048_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "sbc %[r], r6, r6\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -5159,7 +5165,7 @@ static sp_digit sp_2048_sub_16(sp_digit* r_p, const sp_digit* a_p, const sp_digi "sbc %[r], r6, r6\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -5272,7 +5278,7 @@ static sp_digit sp_2048_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digi "sbc %[r], r6, r6\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -5347,7 +5353,8 @@ static sp_digit sp_2048_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digi "mov %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12" ); return (uint32_t)(size_t)r; } @@ -5383,7 +5390,8 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, const sp_digit* b_p) "mov %[a], r12\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", + "lr" ); return (uint32_t)(size_t)a; } @@ -5585,7 +5593,8 @@ static void sp_2048_mul_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "bgt L_sp_2048_mul_64_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -5742,7 +5751,8 @@ static void sp_2048_sqr_64(sp_digit* r_p, const sp_digit* a_p) "bgt L_sp_2048_sqr_64_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -5798,7 +5808,8 @@ static sp_digit sp_2048_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digi "mov %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12" ); return (uint32_t)(size_t)r; } @@ -5834,7 +5845,8 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) "mov %[a], r12\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", + "lr" ); return (uint32_t)(size_t)a; } @@ -6036,7 +6048,8 @@ static void sp_2048_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "bgt L_sp_2048_mul_32_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -6193,7 +6206,8 @@ static void sp_2048_sqr_32(sp_digit* r_p, const sp_digit* a_p) "bgt L_sp_2048_sqr_32_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -6314,7 +6328,7 @@ static void sp_2048_mul_d_64(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r3, [%[r], #256]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -8379,7 +8393,7 @@ static void sp_2048_mul_d_64(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r4, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -8408,7 +8422,8 @@ static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -8434,7 +8449,7 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp "mov %[r], r12\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)r; } @@ -8448,7 +8463,8 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -8572,7 +8588,7 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp "sbc %[r], lr, lr\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7" ); return (uint32_t)(size_t)r; } @@ -9553,7 +9569,8 @@ static SP_NOINLINE void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_ "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp); } @@ -9848,7 +9865,8 @@ static SP_NOINLINE void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_ "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp); } @@ -10053,7 +10071,8 @@ static SP_NOINLINE void sp_2048_mont_reduce_32(sp_digit* a_p, const sp_digit* m_ "mov %[mp], lr\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp); } @@ -10183,7 +10202,7 @@ static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r3, [%[r], #128]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -11224,7 +11243,7 @@ static void sp_2048_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r5, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -11283,7 +11302,7 @@ static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "add %[d1], r4, r3\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -11421,7 +11440,7 @@ static sp_digit div_2048_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "sub %[d1], r3, r6\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -11820,7 +11839,7 @@ static sp_int32 sp_2048_cmp_32(const sp_digit* a_p, const sp_digit* b_p) "mov %[a], r2\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)a; } @@ -12229,7 +12248,8 @@ static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -12255,7 +12275,7 @@ static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp "mov %[r], r12\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)r; } @@ -12269,7 +12289,8 @@ static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -12505,7 +12526,7 @@ static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp "sbc %[r], lr, lr\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7" ); return (uint32_t)(size_t)r; } @@ -14414,7 +14435,8 @@ static SP_NOINLINE void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_ "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp); } @@ -14965,7 +14987,8 @@ static SP_NOINLINE void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_ "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp); } @@ -15330,7 +15353,8 @@ static SP_NOINLINE void sp_2048_mont_reduce_64(sp_digit* a_p, const sp_digit* m_ "mov %[mp], lr\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp); } @@ -15398,7 +15422,8 @@ static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digi "mov %[r], r12\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r12", "lr" ); return (uint32_t)(size_t)r; } @@ -15532,7 +15557,7 @@ static sp_digit sp_2048_sub_64(sp_digit* r_p, const sp_digit* a_p, const sp_digi "sbc %[r], r6, r6\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -15592,7 +15617,7 @@ static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "add %[d1], r4, r3\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -15730,7 +15755,7 @@ static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "sub %[d1], r3, r6\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -16585,7 +16610,7 @@ static sp_int32 sp_2048_cmp_64(const sp_digit* a_p, const sp_digit* b_p) "mov %[a], r2\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)a; } @@ -17116,7 +17141,8 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -17142,7 +17168,7 @@ static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp "mov %[r], lr\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)r; } @@ -17156,7 +17182,8 @@ static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -17280,7 +17307,7 @@ static sp_digit sp_2048_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp "adc %[r], r8, r8\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)r; } @@ -17994,7 +18021,7 @@ static void sp_2048_lshift_64(sp_digit* r_p, const sp_digit* a_p, byte n_p) "str r6, [%[r], #4]\n\t" : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : "memory", "r4", "r5", "r6", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r3", "r12" ); } @@ -23899,7 +23926,8 @@ static void sp_3072_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "stm %[r]!, {r3, r4, r5, r6}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", + "r12" ); } @@ -23941,7 +23969,7 @@ static sp_digit sp_3072_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi "adc %[r], %[r], #0\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -24002,7 +24030,7 @@ static sp_digit sp_3072_sub_in_place_24(sp_digit* a_p, const sp_digit* b_p) "sbc %[a], r9, r9\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (uint32_t)(size_t)a; } @@ -24066,7 +24094,7 @@ static sp_digit sp_3072_add_24(sp_digit* r_p, const sp_digit* a_p, const sp_digi "adc %[r], %[r], #0\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -24238,7 +24266,7 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p) "sbc %[a], r9, r9\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (uint32_t)(size_t)a; } @@ -24344,7 +24372,7 @@ static sp_digit sp_3072_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digi "adc %[r], %[r], #0\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -24600,7 +24628,7 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p) "sbc %[a], r9, r9\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (uint32_t)(size_t)a; } @@ -24790,7 +24818,7 @@ static sp_digit sp_3072_add_96(sp_digit* r_p, const sp_digit* a_p, const sp_digi "adc %[r], %[r], #0\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -27922,7 +27950,8 @@ static void sp_3072_sqr_12(sp_digit* r_p, const sp_digit* a_p) "stm %[r]!, {r2, r3, r4, r8}\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r12" ); } @@ -27963,7 +27992,7 @@ static sp_digit sp_3072_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi "sbc %[r], r6, r6\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -28062,7 +28091,7 @@ static sp_digit sp_3072_sub_24(sp_digit* r_p, const sp_digit* a_p, const sp_digi "sbc %[r], r6, r6\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -28203,7 +28232,7 @@ static sp_digit sp_3072_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digi "sbc %[r], r6, r6\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -28278,7 +28307,8 @@ static sp_digit sp_3072_add_96(sp_digit* r_p, const sp_digit* a_p, const sp_digi "mov %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12" ); return (uint32_t)(size_t)r; } @@ -28314,7 +28344,8 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, const sp_digit* b_p) "mov %[a], r12\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", + "lr" ); return (uint32_t)(size_t)a; } @@ -28516,7 +28547,8 @@ static void sp_3072_mul_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "bgt L_sp_3072_mul_96_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -28673,7 +28705,8 @@ static void sp_3072_sqr_96(sp_digit* r_p, const sp_digit* a_p) "bgt L_sp_3072_sqr_96_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -28729,7 +28762,8 @@ static sp_digit sp_3072_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digi "mov %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12" ); return (uint32_t)(size_t)r; } @@ -28765,7 +28799,8 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, const sp_digit* b_p) "mov %[a], r12\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", + "lr" ); return (uint32_t)(size_t)a; } @@ -28967,7 +29002,8 @@ static void sp_3072_mul_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "bgt L_sp_3072_mul_48_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -29124,7 +29160,8 @@ static void sp_3072_sqr_48(sp_digit* r_p, const sp_digit* a_p) "bgt L_sp_3072_sqr_48_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -29245,7 +29282,7 @@ static void sp_3072_mul_d_96(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r3, [%[r], #384]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -32334,7 +32371,7 @@ static void sp_3072_mul_d_96(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -32363,7 +32400,8 @@ static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -32389,7 +32427,7 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp "mov %[r], r12\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)r; } @@ -32403,7 +32441,8 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -32583,7 +32622,7 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, const sp_digit* a_p, const sp "sbc %[r], lr, lr\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7" ); return (uint32_t)(size_t)r; } @@ -34028,7 +34067,8 @@ static SP_NOINLINE void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_ "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp); } @@ -34451,7 +34491,8 @@ static SP_NOINLINE void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_ "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp); } @@ -34736,7 +34777,8 @@ static SP_NOINLINE void sp_3072_mont_reduce_48(sp_digit* a_p, const sp_digit* m_ "mov %[mp], lr\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp); } @@ -34866,7 +34908,7 @@ static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r3, [%[r], #192]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -36419,7 +36461,7 @@ static void sp_3072_mul_d_48(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -36478,7 +36520,7 @@ static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "add %[d1], r4, r3\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -36616,7 +36658,7 @@ static sp_digit div_3072_word_48(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "sub %[d1], r3, r6\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -37191,7 +37233,7 @@ static sp_int32 sp_3072_cmp_48(const sp_digit* a_p, const sp_digit* b_p) "mov %[a], r2\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)a; } @@ -37600,7 +37642,8 @@ static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -37626,7 +37669,7 @@ static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp "mov %[r], r12\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)r; } @@ -37640,7 +37683,8 @@ static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -37988,7 +38032,7 @@ static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp "sbc %[r], lr, lr\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7" ); return (uint32_t)(size_t)r; } @@ -40825,7 +40869,8 @@ static SP_NOINLINE void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_ "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp); } @@ -41632,7 +41677,8 @@ static SP_NOINLINE void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_ "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp); } @@ -42157,7 +42203,8 @@ static SP_NOINLINE void sp_3072_mont_reduce_96(sp_digit* a_p, const sp_digit* m_ "mov %[mp], lr\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp); } @@ -42225,7 +42272,8 @@ static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digi "mov %[r], r12\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r12", "lr" ); return (uint32_t)(size_t)r; } @@ -42415,7 +42463,7 @@ static sp_digit sp_3072_sub_96(sp_digit* r_p, const sp_digit* a_p, const sp_digi "sbc %[r], r6, r6\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -42475,7 +42523,7 @@ static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "add %[d1], r4, r3\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -42613,7 +42661,7 @@ static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "sub %[d1], r3, r6\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -43826,7 +43874,7 @@ static sp_int32 sp_3072_cmp_96(const sp_digit* a_p, const sp_digit* b_p) "mov %[a], r2\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)a; } @@ -44357,7 +44405,8 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -44383,7 +44432,7 @@ static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp "mov %[r], lr\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)r; } @@ -44397,7 +44446,8 @@ static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -44577,7 +44627,7 @@ static sp_digit sp_3072_cond_add_48(sp_digit* r_p, const sp_digit* a_p, const sp "adc %[r], r8, r8\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)r; } @@ -45483,7 +45533,7 @@ static void sp_3072_lshift_96(sp_digit* r_p, const sp_digit* a_p, byte n_p) "str r4, [%[r], #4]\n\t" : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : "memory", "r4", "r5", "r6", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r3", "r12" ); } @@ -46134,7 +46184,7 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p) "sbc %[a], r9, r9\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (uint32_t)(size_t)a; } @@ -46145,7 +46195,8 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -46380,7 +46431,7 @@ static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_dig "adc %[r], %[r], #0\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -46468,7 +46519,8 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -46494,7 +46546,8 @@ static sp_digit sp_4096_add_128(sp_digit* r_p, const sp_digit* a_p, const sp_dig "mov %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12" ); return (uint32_t)(size_t)r; } @@ -46530,7 +46583,8 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, const sp_digit* b_p) "mov %[a], r12\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", + "lr" ); return (uint32_t)(size_t)a; } @@ -46732,7 +46786,8 @@ static void sp_4096_mul_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* "bgt L_sp_4096_mul_128_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -46889,7 +46944,8 @@ static void sp_4096_sqr_128(sp_digit* r_p, const sp_digit* a_p) "bgt L_sp_4096_sqr_128_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -47008,7 +47064,7 @@ static void sp_4096_mul_d_128(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r3, [%[r], #512]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -51121,7 +51177,7 @@ static void sp_4096_mul_d_128(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r5, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -51151,7 +51207,8 @@ static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -51177,7 +51234,7 @@ static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const s "mov %[r], r12\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)r; } @@ -51191,7 +51248,8 @@ static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const s * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -51651,7 +51709,7 @@ static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, const sp_digit* a_p, const s "sbc %[r], lr, lr\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7" ); return (uint32_t)(size_t)r; } @@ -55416,7 +55474,8 @@ static SP_NOINLINE void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp); } @@ -56479,7 +56538,8 @@ static SP_NOINLINE void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp); } @@ -57164,7 +57224,8 @@ static SP_NOINLINE void sp_4096_mont_reduce_128(sp_digit* a_p, const sp_digit* m "mov %[mp], lr\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp); } @@ -57207,7 +57268,8 @@ SP_NOINLINE static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -57232,7 +57294,8 @@ static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_dig "mov %[r], r12\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r12", "lr" ); return (uint32_t)(size_t)r; } @@ -57244,7 +57307,8 @@ static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_dig * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -57478,7 +57542,7 @@ static sp_digit sp_4096_sub_128(sp_digit* r_p, const sp_digit* a_p, const sp_dig "sbc %[r], r6, r6\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -57538,7 +57602,7 @@ static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "add %[d1], r4, r3\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -57676,7 +57740,7 @@ static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "sub %[d1], r3, r6\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -59241,7 +59305,7 @@ static sp_int32 sp_4096_cmp_128(const sp_digit* a_p, const sp_digit* b_p) "mov %[a], r2\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)a; } @@ -59772,7 +59836,8 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -59798,7 +59863,7 @@ static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp "mov %[r], lr\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)r; } @@ -59812,7 +59877,8 @@ static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -60048,7 +60114,7 @@ static sp_digit sp_4096_cond_add_64(sp_digit* r_p, const sp_digit* a_p, const sp "adc %[r], r8, r8\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)r; } @@ -61146,7 +61212,7 @@ static void sp_4096_lshift_128(sp_digit* r_p, const sp_digit* a_p, byte n_p) "str r5, [%[r], #4]\n\t" : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : "memory", "r4", "r5", "r6", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r3", "r12" ); } @@ -61627,7 +61693,8 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p "bgt L_sp_256_mul_8_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -63623,7 +63690,8 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p "stm %[r]!, {r3, r4, r5, r6}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", + "r12" ); } @@ -63976,7 +64044,8 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p "add sp, sp, #36\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -64002,7 +64071,7 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p "strd %[r], %[a], [sp, #36]\n\t" #endif "mov lr, %[b]\n\t" - "ldm %[a], {%[r], %[a], %[b], r3}\n\t" + "ldm %[a], {r0, r1, r2, r3}\n\t" "ldm lr!, {r4, r5, r6}\n\t" "umull r10, r11, %[r], r4\n\t" "umull r12, r7, %[a], r4\n\t" @@ -64047,7 +64116,7 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p "umaal r4, r6, %[b], r7\n\t" "sub lr, lr, #16\n\t" "umaal r5, r6, r3, r7\n\t" - "ldm %[r], {%[r], %[a], %[b], r3}\n\t" + "ldm %[r], {r0, r1, r2, r3}\n\t" "str r6, [sp, #32]\n\t" "ldm lr!, {r6}\n\t" "mov r7, #0\n\t" @@ -64107,7 +64176,8 @@ static void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p "add sp, sp, #44\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r10", "r11", "r12", "r7", "r8", "r9", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r10", "r11", "r12", "r7", + "r8", "r9", "lr" ); } @@ -64267,7 +64337,8 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) "bgt L_sp_256_sqr_8_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -65478,7 +65549,8 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) "stm %[r]!, {r2, r3, r4, r8}\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r12" ); } @@ -65721,7 +65793,8 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) "add sp, sp, #0x44\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -65739,7 +65812,7 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) __asm__ __volatile__ ( "sub sp, sp, #32\n\t" "str %[r], [sp, #28]\n\t" - "ldm %[a], {%[r], %[a], r2, r3, r4, r5, r6, r7}\n\t" + "ldm %[a], {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" "umull r9, r10, %[r], %[r]\n\t" "umull r11, r12, %[r], %[a]\n\t" "adds r11, r11, r11\n\t" @@ -65826,18 +65899,19 @@ static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) /* R[15] = r7 */ "ldr lr, [sp, #28]\n\t" "add lr, lr, #28\n\t" - "stm lr!, {%[r], r12}\n\t" + "stm lr!, {r0, r12}\n\t" "stm lr!, {r11}\n\t" "stm lr!, {r10}\n\t" "stm lr!, {r3, r4, r8, r9}\n\t" "stm lr!, {r7}\n\t" "sub lr, lr, #0x40\n\t" - "ldm sp, {%[r], %[a], r2, r3, r4, r5, r6}\n\t" - "stm lr, {%[r], %[a], r2, r3, r4, r5, r6}\n\t" + "ldm sp, {r0, r1, r2, r3, r4, r5, r6}\n\t" + "stm lr, {r0, r1, r2, r3, r4, r5, r6}\n\t" "add sp, sp, #32\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -65876,7 +65950,8 @@ static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* "mov %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12" ); return (uint32_t)(size_t)r; } @@ -65913,7 +65988,7 @@ static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* "adc %[r], %[r], #0\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -65925,7 +66000,8 @@ static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* * a The number to convert. * m The modulus (prime). */ -static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -66149,7 +66225,8 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di "add sp, sp, #24\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", + "lr", "r10" ); (void)m_p; return (uint32_t)(size_t)r; @@ -66360,7 +66437,8 @@ static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm) * m Modulus (prime). * mp Montgomery multiplier. */ -static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p, sp_digit mp_p) +static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, + const sp_digit* m_p, sp_digit mp_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -68464,7 +68542,8 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co "add sp, sp, #0x44\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r12" ); (void)m_p; (void)mp_p; @@ -68480,7 +68559,8 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co * m Modulus (prime). * mp Montgomery multiplier. */ -static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p, sp_digit mp_p) +static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, + const sp_digit* m_p, sp_digit mp_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -68941,7 +69021,8 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co "add sp, sp, #0x44\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); (void)m_p; (void)mp_p; @@ -68957,7 +69038,8 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co * m Modulus (prime). * mp Montgomery multiplier. */ -static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p, sp_digit mp_p) +static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, + const sp_digit* m_p, sp_digit mp_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -68972,7 +69054,7 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co "strd %[r], %[a], [sp, #68]\n\t" #endif "mov lr, %[b]\n\t" - "ldm %[a], {%[r], %[a], %[b], r3}\n\t" + "ldm %[a], {r0, r1, r2, r3}\n\t" "ldm lr!, {r4, r5, r6}\n\t" "umull r10, r11, %[r], r4\n\t" "umull r12, r7, %[a], r4\n\t" @@ -69017,7 +69099,7 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co "umaal r4, r6, %[b], r7\n\t" "sub lr, lr, #16\n\t" "umaal r5, r6, r3, r7\n\t" - "ldm %[r], {%[r], %[a], %[b], r3}\n\t" + "ldm %[r], {r0, r1, r2, r3}\n\t" "str r6, [sp, #64]\n\t" "ldm lr!, {r6}\n\t" "mov r7, #0\n\t" @@ -69196,7 +69278,8 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co "add sp, sp, #0x4c\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r10", "r11", "r12", "r7", "r8", "r9", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r10", "r11", "r12", "r7", + "r8", "r9", "lr" ); (void)m_p; (void)mp_p; @@ -69211,7 +69294,8 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co * m Modulus (prime). * mp Montgomery multiplier. */ -static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, + sp_digit mp_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -70394,7 +70478,8 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co "add sp, sp, #0x44\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r12", "r8", "r9", "r10", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r12", "r8", "r9", + "r10", "lr" ); (void)m_p; (void)mp_p; @@ -70408,7 +70493,8 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co * m Modulus (prime). * mp Montgomery multiplier. */ -static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, + sp_digit mp_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -70760,7 +70846,8 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co "add sp, sp, #0x44\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); (void)m_p; (void)mp_p; @@ -70774,7 +70861,8 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co * m Modulus (prime). * mp Montgomery multiplier. */ -static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, + sp_digit mp_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -70782,7 +70870,7 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co __asm__ __volatile__ ( "sub sp, sp, #0x44\n\t" "str %[r], [sp, #64]\n\t" - "ldm %[a], {%[r], %[a], r2, r3, r4, r5, r6, r7}\n\t" + "ldm %[a], {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" "umull r9, r10, %[r], %[r]\n\t" "umull r11, r12, %[r], %[a]\n\t" "adds r11, r11, r11\n\t" @@ -70869,7 +70957,7 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co /* R[15] = r7 */ "mov lr, sp\n\t" "add lr, lr, #28\n\t" - "stm lr!, {%[r], r12}\n\t" + "stm lr!, {r0, r12}\n\t" "stm lr!, {r11}\n\t" "stm lr!, {r10}\n\t" "stm lr!, {r3, r4, r8, r9}\n\t" @@ -71000,7 +71088,8 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co "add sp, sp, #0x44\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); (void)m_p; (void)mp_p; @@ -71232,7 +71321,7 @@ static sp_int32 sp_256_cmp_8(const sp_digit* a_p, const sp_digit* b_p) "mov %[a], r2\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)a; } @@ -71252,7 +71341,8 @@ static sp_int32 sp_256_cmp_8(const sp_digit* a_p, const sp_digit* b_p) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -71278,7 +71368,7 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_d "mov %[r], r12\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)r; } @@ -71292,7 +71382,8 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_d * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -71332,7 +71423,7 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_d "sbc %[r], lr, lr\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7" ); return (uint32_t)(size_t)r; } @@ -71620,7 +71711,8 @@ static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - mp); } @@ -71723,7 +71815,8 @@ static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - mp); } @@ -71808,7 +71901,8 @@ static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, "mov %[mp], lr\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - mp); } @@ -71959,7 +72053,8 @@ static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, "add sp, sp, #0x44\n\t" : [a] "+r" (a) : - : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11", "r12", "lr" ); (void)m_p; (void)mp_p; @@ -71972,7 +72067,8 @@ static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, + sp_digit mp_p) { register sp_digit* a asm ("r0") = (sp_digit*)a_p; register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; @@ -72244,7 +72340,8 @@ static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - mp); } @@ -72256,7 +72353,8 @@ static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, + sp_digit mp_p) { register sp_digit* a asm ("r0") = (sp_digit*)a_p; register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; @@ -72347,7 +72445,8 @@ static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - mp); } @@ -72359,7 +72458,8 @@ static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit* m_p, + sp_digit mp_p) { register sp_digit* a asm ("r0") = (sp_digit*)a_p; register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; @@ -72432,7 +72532,8 @@ static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a_p, const sp_digit "mov %[mp], lr\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - mp); } @@ -72486,7 +72587,8 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, + const sp_digit* m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -72530,7 +72632,8 @@ static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "stm %[r], {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); (void)m_p; } @@ -72580,7 +72683,8 @@ static void sp_256_mont_dbl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r2" ); (void)m_p; } @@ -72662,7 +72766,8 @@ static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r2", "r3", "r12" ); (void)m_p; } @@ -72674,7 +72779,8 @@ static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, + const sp_digit* m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -72716,7 +72822,8 @@ static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "stm %[r], {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); (void)m_p; } @@ -72793,7 +72900,8 @@ static void sp_256_mont_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digi "stm %[r], {r8, r9, r10, r11}\n\t" : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3" ); } @@ -76318,7 +76426,7 @@ static void sp_256_add_one_8(sp_digit* a_p) "stm %[a]!, {r1, r2, r3, r4}\n\t" : [a] "+r" (a) : - : "memory", "r1", "r2", "r3", "r4", "cc" + : "memory", "cc", "r1", "r2", "r3", "r4" ); } @@ -76722,7 +76830,8 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p) "mov %[a], r12\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", + "lr" ); return (uint32_t)(size_t)a; } @@ -76756,7 +76865,7 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, const sp_digit* b_p) "sbc %[a], r9, r9\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (uint32_t)(size_t)a; } @@ -76856,7 +76965,7 @@ static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r3, [%[r], #32]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -77129,7 +77238,7 @@ static void sp_256_mul_d_8(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r5, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -77188,7 +77297,7 @@ static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "add %[d1], r4, r3\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -77326,7 +77435,7 @@ static sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "sub %[d1], r3, r6\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -78010,7 +78119,8 @@ static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* "mov %[r], r12\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r12", "lr" ); return (uint32_t)(size_t)r; } @@ -78046,7 +78156,7 @@ static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* "sbc %[r], r6, r6\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -78126,7 +78236,8 @@ static void sp_256_rshift1_8(sp_digit* r_p, const sp_digit* a_p) #endif : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", + "lr", "r10" ); } @@ -78212,7 +78323,8 @@ static void sp_256_div2_mod_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "stm %[r], {r8, r9, r10, r11}\n\t" : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12" ); } @@ -78255,7 +78367,8 @@ static const unsigned char L_sp_256_num_bits_8_table[] = { static int sp_256_num_bits_8(const sp_digit* a_p) { register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register unsigned char* L_sp_256_num_bits_8_table_c asm ("r1") = (unsigned char*)&L_sp_256_num_bits_8_table; + register unsigned char* L_sp_256_num_bits_8_table_c asm ("r1") = + (unsigned char*)&L_sp_256_num_bits_8_table; __asm__ __volatile__ ( "mov lr, %[L_sp_256_num_bits_8_table]\n\t" @@ -78567,9 +78680,10 @@ static int sp_256_num_bits_8(const sp_digit* a_p) "\n" "L_sp_256_num_bits_8_9_%=: \n\t" "mov %[a], r12\n\t" - : [a] "+r" (a), [L_sp_256_num_bits_8_table] "+r" (L_sp_256_num_bits_8_table_c) + : [a] "+r" (a), + [L_sp_256_num_bits_8_table] "+r" (L_sp_256_num_bits_8_table_c) : - : "memory", "r2", "r3", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r12", "lr" ); return (uint32_t)(size_t)a; } @@ -78658,7 +78772,7 @@ static int sp_256_num_bits_8(const sp_digit* a_p) "mov %[a], r12\n\t" : [a] "+r" (a) : - : "memory", "r1", "r2", "r3", "r12", "lr", "cc" + : "memory", "cc", "r1", "r2", "r3", "r12", "lr" ); return (uint32_t)(size_t)a; } @@ -79931,7 +80045,8 @@ static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ "bgt L_sp_384_mul_12_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -85428,7 +85543,8 @@ static void sp_384_mul_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ "stm %[r]!, {r3, r4, r5, r6}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", + "r12" ); } @@ -85587,7 +85703,8 @@ static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) "bgt L_sp_384_sqr_12_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -88650,7 +88767,8 @@ static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) "stm %[r]!, {r2, r3, r4, r8}\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r12" ); } @@ -88688,7 +88806,8 @@ static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit "mov %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12" ); return (uint32_t)(size_t)r; } @@ -88732,7 +88851,7 @@ static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit "adc %[r], %[r], #0\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -89043,7 +89162,8 @@ static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -89069,7 +89189,7 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_ "mov %[r], r12\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)r; } @@ -89083,7 +89203,8 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_ * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -89137,7 +89258,7 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_ "sbc %[r], lr, lr\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7" ); return (uint32_t)(size_t)r; } @@ -89540,7 +89661,8 @@ static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - mp); } @@ -89675,7 +89797,8 @@ static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - mp); } @@ -89780,7 +89903,8 @@ static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a_p, const sp_digit* m_p "mov %[mp], lr\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - mp); } @@ -90101,7 +90225,7 @@ static sp_int32 sp_384_cmp_12(const sp_digit* a_p, const sp_digit* b_p) "mov %[a], r2\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)a; } @@ -90159,7 +90283,8 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +static void sp_384_mont_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, + const sp_digit* m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -90242,7 +90367,8 @@ static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit "mov %[r], r12\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r12", "lr" ); return (uint32_t)(size_t)r; } @@ -90285,7 +90411,7 @@ static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit "sbc %[r], r6, r6\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -90300,7 +90426,8 @@ static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -90326,7 +90453,7 @@ static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_ "mov %[r], lr\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)r; } @@ -90340,7 +90467,8 @@ static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_ * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -90394,7 +90522,7 @@ static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_ "adc %[r], r8, r8\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)r; } @@ -90407,7 +90535,8 @@ static sp_digit sp_384_cond_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_ * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +static void sp_384_mont_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, + const sp_digit* m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -90477,7 +90606,7 @@ static void sp_384_rshift1_12(sp_digit* r_p, const sp_digit* a_p) "str r4, [%[r], #44]\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "cc" + : "memory", "cc", "r2", "r3", "r4" ); } @@ -94081,7 +94210,7 @@ static void sp_384_add_one_12(sp_digit* a_p) "stm %[a]!, {r1, r2, r3, r4}\n\t" : [a] "+r" (a) : - : "memory", "r1", "r2", "r3", "r4", "cc" + : "memory", "cc", "r1", "r2", "r3", "r4" ); } @@ -94485,7 +94614,8 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p) "mov %[a], r12\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", + "lr" ); return (uint32_t)(size_t)a; } @@ -94526,7 +94656,7 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, const sp_digit* b_p) "sbc %[a], r9, r9\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (uint32_t)(size_t)a; } @@ -94626,7 +94756,7 @@ static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r3, [%[r], #48]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -95027,7 +95157,7 @@ static void sp_384_mul_d_12(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -95086,7 +95216,7 @@ static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "add %[d1], r4, r3\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -95224,7 +95354,7 @@ static sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "sub %[d1], r3, r6\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -95952,7 +96082,8 @@ static void sp_384_div2_mod_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi "str r10, [%[r], #44]\n\t" : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12" ); } @@ -95995,7 +96126,8 @@ static const unsigned char L_sp_384_num_bits_12_table[] = { static int sp_384_num_bits_12(const sp_digit* a_p) { register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register unsigned char* L_sp_384_num_bits_12_table_c asm ("r1") = (unsigned char*)&L_sp_384_num_bits_12_table; + register unsigned char* L_sp_384_num_bits_12_table_c asm ("r1") = + (unsigned char*)&L_sp_384_num_bits_12_table; __asm__ __volatile__ ( "mov lr, %[L_sp_384_num_bits_12_table]\n\t" @@ -96559,9 +96691,10 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "\n" "L_sp_384_num_bits_12_13_%=: \n\t" "mov %[a], r12\n\t" - : [a] "+r" (a), [L_sp_384_num_bits_12_table] "+r" (L_sp_384_num_bits_12_table_c) + : [a] "+r" (a), + [L_sp_384_num_bits_12_table] "+r" (L_sp_384_num_bits_12_table_c) : - : "memory", "r2", "r3", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r12", "lr" ); return (uint32_t)(size_t)a; } @@ -96710,7 +96843,7 @@ static int sp_384_num_bits_12(const sp_digit* a_p) "mov %[a], r12\n\t" : [a] "+r" (a) : - : "memory", "r1", "r2", "r3", "r12", "lr", "cc" + : "memory", "cc", "r1", "r2", "r3", "r12", "lr" ); return (uint32_t)(size_t)a; } @@ -98032,7 +98165,8 @@ static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ "bgt L_sp_521_mul_17_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -109048,7 +109182,8 @@ static void sp_521_mul_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_ "stm %[r]!, {r3}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", + "r12" ); } @@ -109210,7 +109345,8 @@ static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) "bgt L_sp_521_sqr_17_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -115082,7 +115218,8 @@ static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) "stm %[r]!, {r2}\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r12" ); } @@ -115126,7 +115263,8 @@ static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit "adc %[r], r4, #0\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12" ); return (uint32_t)(size_t)r; } @@ -115181,7 +115319,7 @@ static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit "adc %[r], %[r], #0\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -115409,7 +115547,8 @@ static int sp_521_point_to_ecc_point_17(const sp_point_521* p, ecc_point* pm) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -115435,7 +115574,7 @@ static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_ "mov %[r], r12\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)r; } @@ -115449,7 +115588,8 @@ static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_ * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -115522,7 +115662,7 @@ static sp_digit sp_521_cond_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_ "sbc %[r], lr, lr\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7" ); return (uint32_t)(size_t)r; } @@ -115650,7 +115790,8 @@ static SP_NOINLINE void sp_521_mont_reduce_17(sp_digit* a_p, const sp_digit* m_p "stm %[a]!, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" : [a] "+r" (a) : - : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11", "r12", "lr" ); (void)m_p; (void)mp_p; @@ -115663,7 +115804,8 @@ static SP_NOINLINE void sp_521_mont_reduce_17(sp_digit* a_p, const sp_digit* m_p * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, + sp_digit mp_p) { register sp_digit* a asm ("r0") = (sp_digit*)a_p; register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; @@ -116279,7 +116421,8 @@ static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digi "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_521_cond_sub_17(a - 17, a, m, (sp_digit)0 - mp); } @@ -116291,7 +116434,8 @@ static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digi * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, + sp_digit mp_p) { register sp_digit* a asm ("r0") = (sp_digit*)a_p; register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; @@ -116537,7 +116681,8 @@ static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digi "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_521_cond_sub_17(a - 17, a, m, (sp_digit)0 - mp); } @@ -116549,7 +116694,8 @@ static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digi * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) +static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, + sp_digit mp_p) { register sp_digit* a asm ("r0") = (sp_digit*)a_p; register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; @@ -116750,7 +116896,8 @@ static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digi "mov %[mp], lr\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_521_cond_sub_17(a - 17, a, m, (sp_digit)0 - mp); } @@ -117123,7 +117270,7 @@ static sp_int32 sp_521_cmp_17(const sp_digit* a_p, const sp_digit* b_p) "mov %[a], r2\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)a; } @@ -117181,7 +117328,8 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +static void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, + const sp_digit* m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -117256,7 +117404,8 @@ static void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi "stm %[r]!, {r4}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12" ); (void)m_p; } @@ -117332,7 +117481,8 @@ static void sp_521_mont_dbl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi "stm %[r]!, {r4}\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r2", "r3" ); (void)m_p; } @@ -117428,7 +117578,8 @@ static void sp_521_mont_tpl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi "stm %[r]!, {r4}\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r2", "r3" ); (void)m_p; } @@ -117440,7 +117591,8 @@ static void sp_521_mont_tpl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_521_mont_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +static void sp_521_mont_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, + const sp_digit* m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -117516,7 +117668,8 @@ static void sp_521_mont_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi "stm %[r]!, {r4}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12" ); (void)m_p; } @@ -117595,7 +117748,7 @@ static void sp_521_rshift1_17(sp_digit* r_p, const sp_digit* a_p) "str r3, [%[r], #64]\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "cc" + : "memory", "cc", "r2", "r3", "r4" ); } @@ -121830,7 +121983,7 @@ static void sp_521_add_one_17(sp_digit* a_p) "stm %[a]!, {r1}\n\t" : [a] "+r" (a) : - : "memory", "r1", "r2", "r3", "r4", "cc" + : "memory", "cc", "r1", "r2", "r3", "r4" ); } @@ -122304,7 +122457,7 @@ static void sp_521_rshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p) #endif : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : "memory", "r4", "r5", "r6", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r3", "r12" ); } @@ -122424,7 +122577,7 @@ static void sp_521_lshift_17(sp_digit* r_p, const sp_digit* a_p, byte n_p) "str r5, [%[r], #4]\n\t" : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : "memory", "r4", "r5", "r6", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r3", "r12" ); } @@ -122642,7 +122795,7 @@ static void sp_521_lshift_34(sp_digit* r_p, const sp_digit* a_p, byte n_p) "str r6, [%[r], #4]\n\t" : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : "memory", "r4", "r5", "r6", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r3", "r12" ); } @@ -122681,7 +122834,8 @@ static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p) "sbc %[a], %[a], %[a]\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", + "lr" ); return (uint32_t)(size_t)a; } @@ -122733,7 +122887,7 @@ static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, const sp_digit* b_p) "sbc %[a], r9, r9\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (uint32_t)(size_t)a; } @@ -122833,7 +122987,7 @@ static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r3, [%[r], #68]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -123394,7 +123548,7 @@ static void sp_521_mul_d_17(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r5, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -123453,7 +123607,7 @@ static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "add %[d1], r4, r3\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -123591,7 +123745,7 @@ static sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "sub %[d1], r3, r6\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -124281,7 +124435,8 @@ static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit "sbc %[r], r6, r6\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r12", "lr" ); return (uint32_t)(size_t)r; } @@ -124335,7 +124490,7 @@ static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit "sbc %[r], r6, r6\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -124481,7 +124636,8 @@ static void sp_521_div2_mod_17(sp_digit* r_p, const sp_digit* a_p, const sp_digi "str r9, [%[r], #64]\n\t" : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12" ); } @@ -124524,7 +124680,8 @@ static const unsigned char L_sp_521_num_bits_17_table[] = { static int sp_521_num_bits_17(const sp_digit* a_p) { register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register unsigned char* L_sp_521_num_bits_17_table_c asm ("r1") = (unsigned char*)&L_sp_521_num_bits_17_table; + register unsigned char* L_sp_521_num_bits_17_table_c asm ("r1") = + (unsigned char*)&L_sp_521_num_bits_17_table; __asm__ __volatile__ ( "mov lr, %[L_sp_521_num_bits_17_table]\n\t" @@ -125403,9 +125560,10 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "\n" "L_sp_521_num_bits_17_18_%=: \n\t" "mov %[a], r12\n\t" - : [a] "+r" (a), [L_sp_521_num_bits_17_table] "+r" (L_sp_521_num_bits_17_table_c) + : [a] "+r" (a), + [L_sp_521_num_bits_17_table] "+r" (L_sp_521_num_bits_17_table_c) : - : "memory", "r2", "r3", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r12", "lr" ); return (uint32_t)(size_t)a; } @@ -125629,7 +125787,7 @@ static int sp_521_num_bits_17(const sp_digit* a_p) "mov %[a], r12\n\t" : [a] "+r" (a) : - : "memory", "r1", "r2", "r3", "r12", "lr", "cc" + : "memory", "cc", "r1", "r2", "r3", "r12", "lr" ); return (uint32_t)(size_t)a; } @@ -136390,7 +136548,8 @@ static void sp_1024_mul_16(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "stm %[r]!, {r3, r4, r5, r6}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", + "r12" ); } @@ -141622,7 +141781,8 @@ static void sp_1024_sqr_16(sp_digit* r_p, const sp_digit* a_p) "stm %[r]!, {r2, r3, r4, r8}\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r12" ); } @@ -141671,7 +141831,7 @@ static sp_digit sp_1024_add_16(sp_digit* r_p, const sp_digit* a_p, const sp_digi "adc %[r], %[r], #0\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -141746,7 +141906,7 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) "sbc %[a], r9, r9\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (uint32_t)(size_t)a; } @@ -141824,7 +141984,7 @@ static sp_digit sp_1024_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digi "adc %[r], %[r], #0\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -141942,7 +142102,7 @@ static sp_digit sp_1024_sub_16(sp_digit* r_p, const sp_digit* a_p, const sp_digi "sbc %[r], r6, r6\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)r; } @@ -142179,7 +142339,8 @@ static void sp_1024_mul_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b "bgt L_sp_1024_mul_32_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -142336,7 +142497,8 @@ static void sp_1024_sqr_32(sp_digit* r_p, const sp_digit* a_p) "bgt L_sp_1024_sqr_32_store_%=\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", + "r11" ); } @@ -142456,7 +142618,8 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) "mov %[a], r12\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", + "lr" ); return (uint32_t)(size_t)a; } @@ -142471,7 +142634,8 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, const sp_digit* b_p) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -142497,7 +142661,7 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp "mov %[r], r12\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)r; } @@ -142511,7 +142675,8 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -142635,7 +142800,7 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp "sbc %[r], lr, lr\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7" ); return (uint32_t)(size_t)r; } @@ -142674,7 +142839,8 @@ static sp_digit sp_1024_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digi "mov %[r], r3\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r3", "r12" ); return (uint32_t)(size_t)r; } @@ -142774,7 +142940,7 @@ static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r3, [%[r], #128]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -143815,7 +143981,7 @@ static void sp_1024_mul_d_32(sp_digit* r_p, const sp_digit* a_p, sp_digit b_p) "str r5, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -143874,7 +144040,7 @@ static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "add %[d1], r4, r3\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -144012,7 +144178,7 @@ static sp_digit div_1024_word_32(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) "sub %[d1], r3, r6\n\t" : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)d1; } @@ -144441,7 +144607,7 @@ static sp_int32 sp_1024_cmp_32(const sp_digit* a_p, const sp_digit* b_p) "mov %[a], r2\n\t" : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)a; } @@ -145755,7 +145921,8 @@ static SP_NOINLINE void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_ "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_1024_cond_sub_32(a - 32, a, m, mp); } @@ -146055,7 +146222,8 @@ static SP_NOINLINE void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_ "mov %[mp], r3\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_1024_cond_sub_32(a - 32, a, m, mp); } @@ -146265,7 +146433,8 @@ static SP_NOINLINE void sp_1024_mont_reduce_32(sp_digit* a_p, const sp_digit* m_ "mov %[mp], lr\n\t" : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11" ); sp_1024_cond_sub_32(a - 32, a, m, mp); } @@ -146415,7 +146584,8 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +static void sp_1024_mont_add_32(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, const sp_digit* m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -146577,7 +146747,8 @@ static void sp_1024_mont_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig "stm %[r]!, {r4, r5, r6, r7}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r12" ); } @@ -146587,7 +146758,8 @@ static void sp_1024_mont_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig * a Number to double in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_dbl_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +static void sp_1024_mont_dbl_32(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -146732,7 +146904,8 @@ static void sp_1024_mont_dbl_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig "stm %[r]!, {r4, r5, r6, r7}\n\t" : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : "memory", "r8", "r9", "r10", "r11", "r4", "r5", "r6", "r7", "r12", "cc" + : "memory", "cc", "r8", "r9", "r10", "r11", "r4", "r5", "r6", "r7", + "r12" ); } @@ -146742,7 +146915,8 @@ static void sp_1024_mont_dbl_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig * a Number to triple in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_tpl_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +static void sp_1024_mont_tpl_32(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -147042,7 +147216,8 @@ static void sp_1024_mont_tpl_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig "stm %[r]!, {r4, r5, r6, r7}\n\t" : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : "memory", "r8", "r9", "r10", "r11", "r4", "r5", "r6", "r7", "r12", "cc" + : "memory", "cc", "r8", "r9", "r10", "r11", "r4", "r5", "r6", "r7", + "r12" ); } @@ -147053,7 +147228,8 @@ static void sp_1024_mont_tpl_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) +static void sp_1024_mont_sub_32(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, const sp_digit* m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -147209,7 +147385,8 @@ static void sp_1024_mont_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig "stm %[r]!, {r4, r5, r6, r7}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r12" ); } @@ -147222,7 +147399,8 @@ static void sp_1024_mont_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_dig * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -147248,7 +147426,7 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp "mov %[r], lr\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6" ); return (uint32_t)(size_t)r; } @@ -147262,7 +147440,8 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, sp_digit m_p) +static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, + const sp_digit* b_p, sp_digit m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -147386,7 +147565,7 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r_p, const sp_digit* a_p, const sp "adc %[r], r8, r8\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); return (uint32_t)(size_t)r; } @@ -147526,7 +147705,7 @@ static void sp_1024_rshift1_32(sp_digit* r_p, const sp_digit* a_p) "str r3, [%[r], #124]\n\t" : [r] "+r" (r), [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "cc" + : "memory", "cc", "r2", "r3", "r4" ); }