diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index c51366553..fb507ede0 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -220,9 +220,8 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { sp_digit tmp[8]; + __asm__ __volatile__ ( - "mov r8, %[r]\n\t" - "mov %[r], #0\n\t" /* A[0] * B[0] */ "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[b], #0]\n\t" @@ -231,19 +230,17 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, "str r3, [%[tmp], #0]\n\t" "mov r3, #0\n\t" /* A[0] * B[1] */ - "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adc r5, r5, r7\n\t" /* A[1] * B[0] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" "str r4, [%[tmp], #4]\n\t" "mov r4, #0\n\t" /* A[0] * B[2] */ @@ -251,22 +248,22 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[1] * B[1] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[2] * B[0] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" "str r5, [%[tmp], #8]\n\t" "mov r5, #0\n\t" /* A[0] * B[3] */ @@ -274,29 +271,29 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[1] * B[2] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[2] * B[1] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[3] * B[0] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" "str r3, [%[tmp], #12]\n\t" "mov r3, #0\n\t" /* A[0] * B[4] */ @@ -304,36 +301,36 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[1] * B[3] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[2] * B[2] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[3] * B[1] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[4] * B[0] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" "str r4, [%[tmp], #16]\n\t" "mov r4, #0\n\t" /* A[0] * B[5] */ @@ -341,43 +338,43 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[1] * B[4] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[2] * B[3] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[3] * B[2] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[4] * B[1] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[5] * B[0] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" "str r5, [%[tmp], #20]\n\t" "mov r5, #0\n\t" /* A[0] * B[6] */ @@ -385,50 +382,50 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[1] * B[5] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[2] * B[4] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[3] * B[3] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[4] * B[2] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[5] * B[1] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[6] * B[0] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" "str r3, [%[tmp], #24]\n\t" "mov r3, #0\n\t" /* A[0] * B[7] */ @@ -436,57 +433,57 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[1] * B[6] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[2] * B[5] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[3] * B[4] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[4] * B[3] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[5] * B[2] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[6] * B[1] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[7] * B[0] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" "str r4, [%[tmp], #28]\n\t" "mov r4, #0\n\t" /* A[1] * B[7] */ @@ -494,230 +491,230 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[2] * B[6] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[3] * B[5] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[4] * B[4] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[5] * B[3] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[6] * B[2] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[7] * B[1] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "str r5, [r8, #32]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[r], #32]\n\t" "mov r5, #0\n\t" /* A[2] * B[7] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[3] * B[6] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[4] * B[5] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[5] * B[4] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[6] * B[3] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[7] * B[2] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "str r3, [r8, #36]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[r], #36]\n\t" "mov r3, #0\n\t" /* A[3] * B[7] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[4] * B[6] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[5] * B[5] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[6] * B[4] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[7] * B[3] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "str r4, [r8, #40]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[r], #40]\n\t" "mov r4, #0\n\t" /* A[4] * B[7] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[5] * B[6] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[6] * B[5] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[7] * B[4] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "str r5, [r8, #44]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[r], #44]\n\t" "mov r5, #0\n\t" /* A[5] * B[7] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[6] * B[6] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[7] * B[5] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "str r3, [r8, #48]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[r], #48]\n\t" "mov r3, #0\n\t" /* A[6] * B[7] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[7] * B[6] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "str r4, [r8, #52]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[r], #52]\n\t" "mov r4, #0\n\t" /* A[7] * B[7] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "str r5, [r8, #56]\n\t" - "str r3, [r8, #60]\n\t" + "adc r3, r3, r7\n\t" + "str r5, [%[r], #56]\n\t" + "str r3, [%[r], #60]\n\t" + /* Transfer tmp to r */ "ldr r3, [%[tmp], #0]\n\t" "ldr r4, [%[tmp], #4]\n\t" "ldr r5, [%[tmp], #8]\n\t" "ldr r6, [%[tmp], #12]\n\t" - "str r3, [r8, #0]\n\t" - "str r4, [r8, #4]\n\t" - "str r5, [r8, #8]\n\t" - "str r6, [r8, #12]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" "ldr r3, [%[tmp], #16]\n\t" "ldr r4, [%[tmp], #20]\n\t" "ldr r5, [%[tmp], #24]\n\t" "ldr r6, [%[tmp], #28]\n\t" - "str r3, [r8, #16]\n\t" - "str r4, [r8, #20]\n\t" - "str r5, [r8, #24]\n\t" - "str r6, [r8, #28]\n\t" - "mov %[r], r8\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8" + : "memory", "r3", "r4", "r5", "r6", "r7" ); } @@ -730,8 +727,6 @@ SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) { sp_digit tmp[8]; __asm__ __volatile__ ( - "mov r8, %[r]\n\t" - "mov %[r], #0\n\t" /* A[0] * A[0] */ "ldr r6, [%[a], #0]\n\t" "umull r3, r4, r6, r6\n\t" @@ -739,15 +734,13 @@ SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) "str r3, [%[tmp], #0]\n\t" "mov r3, #0\n\t" /* A[0] * A[1] */ - "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[a], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adc r5, r5, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" "str r4, [%[tmp], #4]\n\t" "mov r4, #0\n\t" /* A[0] * A[2] */ @@ -755,358 +748,323 @@ SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) "ldr r7, [%[a], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adc r3, r3, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[1] * A[1] */ "ldr r6, [%[a], #4]\n\t" "umull r6, r7, r6, r6\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adc r4, r4, #0\n\t" "str r5, [%[tmp], #8]\n\t" "mov r5, #0\n\t" /* A[0] * A[3] */ "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[a], #12]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[1] * A[2] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r5, r5, r10\n\t" "str r3, [%[tmp], #12]\n\t" "mov r3, #0\n\t" /* A[0] * A[4] */ "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[a], #16]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[1] * A[3] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[2] * A[2] */ "ldr r6, [%[a], #8]\n\t" "umull r6, r7, r6, r6\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adc r3, r3, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adc r3, r3, r10\n\t" "str r4, [%[tmp], #16]\n\t" "mov r4, #0\n\t" /* A[0] * A[5] */ "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[a], #20]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[1] * A[4] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[2] * A[3] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r5, r5, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r10\n\t" "str r5, [%[tmp], #20]\n\t" "mov r5, #0\n\t" /* A[0] * A[6] */ "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[a], #24]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[1] * A[5] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[2] * A[4] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[3] * A[3] */ "ldr r6, [%[a], #12]\n\t" "umull r6, r7, r6, r6\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adc r5, r5, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r5, r5, r10\n\t" "str r3, [%[tmp], #24]\n\t" "mov r3, #0\n\t" /* A[0] * A[7] */ "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[a], #28]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[1] * A[6] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[2] * A[5] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[3] * A[4] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adc r3, r3, r10\n\t" "str r4, [%[tmp], #28]\n\t" "mov r4, #0\n\t" /* A[1] * A[7] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #28]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[2] * A[6] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[3] * A[5] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[4] * A[4] */ "ldr r6, [%[a], #16]\n\t" "umull r6, r7, r6, r6\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "str r5, [r8, #32]\n\t" + "adc r4, r4, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r5, r5, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #32]\n\t" "mov r5, #0\n\t" /* A[2] * A[7] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #28]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[3] * A[6] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[4] * A[5] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "str r3, [r8, #36]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #36]\n\t" "mov r3, #0\n\t" /* A[3] * A[7] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[a], #28]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[4] * A[6] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[5] * A[5] */ "ldr r6, [%[a], #20]\n\t" "umull r6, r7, r6, r6\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "str r4, [r8, #40]\n\t" + "adc r3, r3, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #40]\n\t" "mov r4, #0\n\t" /* A[4] * A[7] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[5] * A[6] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "str r5, [r8, #44]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[r], #44]\n\t" "mov r5, #0\n\t" /* A[5] * A[7] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[6] * A[6] */ "ldr r6, [%[a], #24]\n\t" "umull r6, r7, r6, r6\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "str r3, [r8, #48]\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[r], #48]\n\t" "mov r3, #0\n\t" /* A[6] * A[7] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "str r4, [r8, #52]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[r], #52]\n\t" "mov r4, #0\n\t" /* A[7] * A[7] */ "ldr r6, [%[a], #28]\n\t" "umull r6, r7, r6, r6\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "str r5, [r8, #56]\n\t" - "str r3, [r8, #60]\n\t" + "adc r3, r3, r7\n\t" + "str r5, [%[r], #56]\n\t" + "str r3, [%[r], #60]\n\t" + /* Transfer tmp to r */ "ldr r3, [%[tmp], #0]\n\t" "ldr r4, [%[tmp], #4]\n\t" "ldr r5, [%[tmp], #8]\n\t" "ldr r6, [%[tmp], #12]\n\t" - "str r3, [r8, #0]\n\t" - "str r4, [r8, #4]\n\t" - "str r5, [r8, #8]\n\t" - "str r6, [r8, #12]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" "ldr r3, [%[tmp], #16]\n\t" "ldr r4, [%[tmp], #20]\n\t" "ldr r5, [%[tmp], #24]\n\t" "ldr r6, [%[tmp], #28]\n\t" - "str r3, [r8, #16]\n\t" - "str r4, [r8, #20]\n\t" - "str r5, [r8, #24]\n\t" - "str r6, [r8, #28]\n\t" - "mov %[r], r8\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" : : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); } @@ -1122,43 +1080,31 @@ SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adds r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5" + : "memory", "r4", "r5", "r6", "r7" ); return c; @@ -1176,70 +1122,46 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r6, [%[b], #4]\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "subs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #0]\n\t" - "str r4, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r6, [%[b], #12]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r6, [%[b], #20]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #16]\n\t" - "str r4, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r6, [%[b], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r6, [%[b], #36]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #32]\n\t" - "str r4, [%[a], #36]\n\t" - "ldr r3, [%[a], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r6, [%[b], #44]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #40]\n\t" - "str r4, [%[a], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r6, [%[b], #52]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #48]\n\t" - "str r4, [%[a], #52]\n\t" - "ldr r3, [%[a], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r6, [%[b], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #56]\n\t" - "str r4, [%[a], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : @@ -1261,75 +1183,51 @@ SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adds r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5" + : "memory", "r4", "r5", "r6", "r7" ); return c; @@ -1433,134 +1331,86 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r6, [%[b], #4]\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "subs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #0]\n\t" - "str r4, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r6, [%[b], #12]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r6, [%[b], #20]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #16]\n\t" - "str r4, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r6, [%[b], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r6, [%[b], #36]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #32]\n\t" - "str r4, [%[a], #36]\n\t" - "ldr r3, [%[a], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r6, [%[b], #44]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #40]\n\t" - "str r4, [%[a], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r6, [%[b], #52]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #48]\n\t" - "str r4, [%[a], #52]\n\t" - "ldr r3, [%[a], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r6, [%[b], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #56]\n\t" - "str r4, [%[a], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r6, [%[b], #68]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #64]\n\t" - "str r4, [%[a], #68]\n\t" - "ldr r3, [%[a], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r6, [%[b], #76]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #72]\n\t" - "str r4, [%[a], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r6, [%[b], #84]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #80]\n\t" - "str r4, [%[a], #84]\n\t" - "ldr r3, [%[a], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r6, [%[b], #92]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #88]\n\t" - "str r4, [%[a], #92]\n\t" - "ldr r3, [%[a], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r6, [%[b], #100]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #96]\n\t" - "str r4, [%[a], #100]\n\t" - "ldr r3, [%[a], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r6, [%[b], #108]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #104]\n\t" - "str r4, [%[a], #108]\n\t" - "ldr r3, [%[a], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r6, [%[b], #116]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #112]\n\t" - "str r4, [%[a], #116]\n\t" - "ldr r3, [%[a], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r6, [%[b], #124]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #120]\n\t" - "str r4, [%[a], #124]\n\t" + "stm %[a]!, {r3, r4}\n\t" "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : @@ -1582,139 +1432,91 @@ SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adds r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #124]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5" + : "memory", "r4", "r5", "r6", "r7" ); return c; @@ -1822,267 +1624,166 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r6, [%[b], #4]\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "subs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #0]\n\t" - "str r4, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r6, [%[b], #12]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r6, [%[b], #20]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #16]\n\t" - "str r4, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r6, [%[b], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r6, [%[b], #36]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #32]\n\t" - "str r4, [%[a], #36]\n\t" - "ldr r3, [%[a], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r6, [%[b], #44]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #40]\n\t" - "str r4, [%[a], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r6, [%[b], #52]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #48]\n\t" - "str r4, [%[a], #52]\n\t" - "ldr r3, [%[a], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r6, [%[b], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #56]\n\t" - "str r4, [%[a], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r6, [%[b], #68]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #64]\n\t" - "str r4, [%[a], #68]\n\t" - "ldr r3, [%[a], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r6, [%[b], #76]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #72]\n\t" - "str r4, [%[a], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r6, [%[b], #84]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #80]\n\t" - "str r4, [%[a], #84]\n\t" - "ldr r3, [%[a], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r6, [%[b], #92]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #88]\n\t" - "str r4, [%[a], #92]\n\t" - "ldr r3, [%[a], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r6, [%[b], #100]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #96]\n\t" - "str r4, [%[a], #100]\n\t" - "ldr r3, [%[a], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r6, [%[b], #108]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #104]\n\t" - "str r4, [%[a], #108]\n\t" - "ldr r3, [%[a], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r6, [%[b], #116]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #112]\n\t" - "str r4, [%[a], #116]\n\t" - "ldr r3, [%[a], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r6, [%[b], #124]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #120]\n\t" - "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c], %[c]\n\t" - "add %[a], %[a], #0x80\n\t" - "add %[b], %[b], #0x80\n\t" - "mov r5, #0\n\t" - "sub r5, r5, %[c]\n\t" - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r6, [%[b], #4]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #0]\n\t" - "str r4, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r6, [%[b], #12]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r6, [%[b], #20]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #16]\n\t" - "str r4, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r6, [%[b], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r6, [%[b], #36]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #32]\n\t" - "str r4, [%[a], #36]\n\t" - "ldr r3, [%[a], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r6, [%[b], #44]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #40]\n\t" - "str r4, [%[a], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r6, [%[b], #52]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #48]\n\t" - "str r4, [%[a], #52]\n\t" - "ldr r3, [%[a], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r6, [%[b], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #56]\n\t" - "str r4, [%[a], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r6, [%[b], #68]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #64]\n\t" - "str r4, [%[a], #68]\n\t" - "ldr r3, [%[a], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r6, [%[b], #76]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #72]\n\t" - "str r4, [%[a], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r6, [%[b], #84]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #80]\n\t" - "str r4, [%[a], #84]\n\t" - "ldr r3, [%[a], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r6, [%[b], #92]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #88]\n\t" - "str r4, [%[a], #92]\n\t" - "ldr r3, [%[a], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r6, [%[b], #100]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #96]\n\t" - "str r4, [%[a], #100]\n\t" - "ldr r3, [%[a], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r6, [%[b], #108]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #104]\n\t" - "str r4, [%[a], #108]\n\t" - "ldr r3, [%[a], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r6, [%[b], #116]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #112]\n\t" - "str r4, [%[a], #116]\n\t" - "ldr r3, [%[a], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r6, [%[b], #124]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #120]\n\t" - "str r4, [%[a], #124]\n\t" + "stm %[a]!, {r3, r4}\n\t" "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : @@ -2104,275 +1805,171 @@ SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mvn r7, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adds r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c], %[c]\n\t" - "add %[a], %[a], #0x80\n\t" - "add %[b], %[b], #0x80\n\t" - "add %[r], %[r], #0x80\n\t" - "adds %[c], %[c], r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #124]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r7" + : "memory", "r4", "r5", "r6", "r7" ); return c; @@ -2484,10 +2081,8 @@ SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r6, %[a]\n\t" "mov r7, #0\n\t" - "mov r4, #1\n\t" - "lsl r4, r4, #8\n\t" + "add r6, r6, #256\n\t" "sub r7, r7, #1\n\t" - "add r6, r6, r4\n\t" "\n1:\n\t" "adds %[c], %[c], r7\n\t" "ldr r4, [%[a]]\n\t" @@ -2522,9 +2117,7 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( "mov r7, %[a]\n\t" - "mov r5, #1\n\t" - "lsl r5, r5, #8\n\t" - "add r7, r7, r5\n\t" + "add r7, r7, #256\n\t" "\n1:\n\t" "mov r5, #0\n\t" "subs r5, r5, %[c]\n\t" @@ -2591,7 +2184,7 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, "ldr r7, [%[b]]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" /* Multiply Done */ "add %[a], %[a], #4\n\t" @@ -2665,10 +2258,10 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "ldr r7, [r2]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ "bal 5f\n\t" @@ -2865,7 +2458,7 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, "ldr r7, [%[b]]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" /* Multiply Done */ "add %[a], %[a], #4\n\t" @@ -2937,10 +2530,10 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "ldr r7, [r2]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ "bal 5f\n\t" @@ -3028,36 +2621,31 @@ SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, r6, %[a]\n\t" - "mov r8, %[r]\n\t" - "mov r9, r6\n\t" - "mov r3, #0\n\t" + "add r8, %[a], #256\n\t" + /* A[0] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r5, r3, r6, %[b]\n\t" "mov r4, #0\n\t" + "str r5, [%[r]], #4\n\t" + /* A[0] * B - Done */ "\n1:\n\t" - "mov %[r], #0\n\t" "mov r5, #0\n\t" /* A[] * B */ - "ldr r6, [%[a]]\n\t" + "ldr r6, [%[a]], #4\n\t" "umull r6, r7, r6, %[b]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[] * B - Done */ - "mov %[r], r8\n\t" - "str r3, [%[r]]\n\t" + "str r3, [%[r]], #4\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add %[r], %[r], #4\n\t" - "add %[a], %[a], #4\n\t" - "mov r8, %[r]\n\t" - "cmp %[a], r9\n\t" + "cmp %[a], r8\n\t" "blt 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -3126,77 +2714,80 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, __asm__ __volatile__ ( "mov r8, %[mp]\n\t" - "mov r12, %[ca]\n\t" - "mov r14, %[m]\n\t" + "mov r12, %[m]\n\t" "mov r9, %[a]\n\t" "mov r4, #0\n\t" - /* i = 0 */ - "mov r11, r4\n\t" + "add r11, r9, #128\n\t" "\n1:\n\t" - "mov r5, #0\n\t" - "mov %[ca], #0\n\t" /* mu = a[i] * mp */ "mov %[mp], r8\n\t" - "ldr %[a], [%[a]]\n\t" + "ldr %[a], [r9]\n\t" "mul %[mp], %[mp], %[a]\n\t" - "mov %[m], r14\n\t" + "mov %[m], r12\n\t" "mov r10, r9\n\t" + "add r14, r9, #120\n\t" "\n2:\n\t" /* a[i+j] += m[j] * mu */ - "mov %[a], r10\n\t" - "ldr %[a], [%[a]]\n\t" - "mov %[ca], #0\n\t" - "mov r4, r5\n\t" + "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]]\n\t" + "ldr r7, [%[m]], #4\n\t" "umull r6, r7, %[mp], r7\n\t" "adds %[a], %[a], r6\n\t" - "adcs r5, r5, r7\n\t" + "adc r5, r5, r7\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" - "adc r5, r5, %[ca]\n\t" - "mov %[a], r10\n\t" - "str r4, [%[a]]\n\t" - "mov r6, #4\n\t" - "add %[m], %[m], #4\n\t" - "add r10, r10, r6\n\t" - "mov r4, #124\n\t" - "add r4, r4, r9\n\t" - "cmp r10, r4\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "ldr %[a], [r10]\n\t" + "mov r4, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r7, [%[m]], #4\n\t" + "umull r6, r7, %[mp], r7\n\t" + "adds %[a], %[a], r6\n\t" + "adc r4, r4, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r5, r5, %[a]\n\t" + "adc r4, r4, #0\n\t" + "str r5, [r10], #4\n\t" + "cmp r10, r14\n\t" "blt 2b\n\t" + /* a[i+30] += m[30] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r7, [%[m]], #4\n\t" + "umull r6, r7, %[mp], r7\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" /* a[i+31] += m[31] * mu */ + "mov r4, %[ca]\n\t" "mov %[ca], #0\n\t" - "mov r4, r12\n\t" - "mov %[a], #0\n\t" /* Multiply m[31] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" "adds r5, r5, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc %[a], %[a], %[ca]\n\t" + "adcs r4, r4, r7\n\t" + "adc %[ca], %[ca], #0\n\t" /* Multiply m[31] and mu - Done */ - "mov %[ca], %[a]\n\t" - "mov %[a], r10\n\t" - "ldr r7, [%[a], #4]\n\t" - "ldr %[a], [%[a]]\n\t" - "mov r6, #0\n\t" - "adds r5, r5, %[a]\n\t" + "ldr r6, [r10]\n\t" + "ldr r7, [r10, #4]\n\t" + "adds r6, r6, r5\n\t" "adcs r7, r7, r4\n\t" - "adc %[ca], %[ca], r6\n\t" - "mov %[a], r10\n\t" - "str r5, [%[a]]\n\t" - "str r7, [%[a], #4]\n\t" - /* i += 1 */ - "mov r6, #4\n\t" - "add r9, r9, r6\n\t" - "add r11, r11, r6\n\t" - "mov r12, %[ca]\n\t" - "mov %[a], r9\n\t" - "mov r4, #128\n\t" - "cmp r11, r4\n\t" + "adc %[ca], %[ca], #0\n\t" + "str r6, [r10]\n\t" + "str r7, [r10, #4]\n\t" + /* Next word in a */ + "add r9, r9, #4\n\t" + "cmp r9, r11\n\t" "blt 1b\n\t" - "mov %[m], r14\n\t" + "mov %[a], r9\n\t" + "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" @@ -3245,35 +2836,31 @@ SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "mov r6, #128\n\t" - "add r6, r6, %[a]\n\t" - "mov r8, %[r]\n\t" - "mov r9, r6\n\t" - "mov r3, #0\n\t" + "add r8, %[a], #128\n\t" + /* A[0] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r5, r3, r6, %[b]\n\t" "mov r4, #0\n\t" + "str r5, [%[r]], #4\n\t" + /* A[0] * B - Done */ "\n1:\n\t" - "mov %[r], #0\n\t" "mov r5, #0\n\t" /* A[] * B */ - "ldr r6, [%[a]]\n\t" + "ldr r6, [%[a]], #4\n\t" "umull r6, r7, r6, %[b]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[] * B - Done */ - "mov %[r], r8\n\t" - "str r3, [%[r]]\n\t" + "str r3, [%[r]], #4\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add %[r], %[r], #4\n\t" - "add %[a], %[a], #4\n\t" - "mov r8, %[r]\n\t" - "cmp %[a], r9\n\t" + "cmp %[a], r8\n\t" "blt 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -3774,78 +3361,80 @@ SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, __asm__ __volatile__ ( "mov r8, %[mp]\n\t" - "mov r12, %[ca]\n\t" - "mov r14, %[m]\n\t" + "mov r12, %[m]\n\t" "mov r9, %[a]\n\t" "mov r4, #0\n\t" - /* i = 0 */ - "mov r11, r4\n\t" + "add r11, r9, #256\n\t" "\n1:\n\t" - "mov r5, #0\n\t" - "mov %[ca], #0\n\t" /* mu = a[i] * mp */ "mov %[mp], r8\n\t" - "ldr %[a], [%[a]]\n\t" + "ldr %[a], [r9]\n\t" "mul %[mp], %[mp], %[a]\n\t" - "mov %[m], r14\n\t" + "mov %[m], r12\n\t" "mov r10, r9\n\t" + "add r14, r9, #248\n\t" "\n2:\n\t" /* a[i+j] += m[j] * mu */ - "mov %[a], r10\n\t" - "ldr %[a], [%[a]]\n\t" - "mov %[ca], #0\n\t" - "mov r4, r5\n\t" + "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]]\n\t" + "ldr r7, [%[m]], #4\n\t" "umull r6, r7, %[mp], r7\n\t" "adds %[a], %[a], r6\n\t" - "adcs r5, r5, r7\n\t" + "adc r5, r5, r7\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" - "adc r5, r5, %[ca]\n\t" - "mov %[a], r10\n\t" - "str r4, [%[a]]\n\t" - "mov r6, #4\n\t" - "add %[m], %[m], #4\n\t" - "add r10, r10, r6\n\t" - "mov r4, #252\n\t" - "add r4, r4, r9\n\t" - "cmp r10, r4\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "ldr %[a], [r10]\n\t" + "mov r4, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r7, [%[m]], #4\n\t" + "umull r6, r7, %[mp], r7\n\t" + "adds %[a], %[a], r6\n\t" + "adc r4, r4, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r5, r5, %[a]\n\t" + "adc r4, r4, #0\n\t" + "str r5, [r10], #4\n\t" + "cmp r10, r14\n\t" "blt 2b\n\t" + /* a[i+62] += m[62] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r7, [%[m]], #4\n\t" + "umull r6, r7, %[mp], r7\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" /* a[i+63] += m[63] * mu */ + "mov r4, %[ca]\n\t" "mov %[ca], #0\n\t" - "mov r4, r12\n\t" - "mov %[a], #0\n\t" /* Multiply m[63] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" "adds r5, r5, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc %[a], %[a], %[ca]\n\t" + "adcs r4, r4, r7\n\t" + "adc %[ca], %[ca], #0\n\t" /* Multiply m[63] and mu - Done */ - "mov %[ca], %[a]\n\t" - "mov %[a], r10\n\t" - "ldr r7, [%[a], #4]\n\t" - "ldr %[a], [%[a]]\n\t" - "mov r6, #0\n\t" - "adds r5, r5, %[a]\n\t" + "ldr r6, [r10]\n\t" + "ldr r7, [r10, #4]\n\t" + "adds r6, r6, r5\n\t" "adcs r7, r7, r4\n\t" - "adc %[ca], %[ca], r6\n\t" - "mov %[a], r10\n\t" - "str r5, [%[a]]\n\t" - "str r7, [%[a], #4]\n\t" - /* i += 1 */ - "mov r6, #4\n\t" - "add r9, r9, r6\n\t" - "add r11, r11, r6\n\t" - "mov r12, %[ca]\n\t" - "mov %[a], r9\n\t" - "mov r4, #1\n\t" - "lsl r4, r4, #8\n\t" - "cmp r11, r4\n\t" + "adc %[ca], %[ca], #0\n\t" + "str r6, [r10]\n\t" + "str r7, [r10, #4]\n\t" + /* Next word in a */ + "add r9, r9, #4\n\t" + "cmp r9, r11\n\t" "blt 1b\n\t" - "mov %[m], r14\n\t" + "mov %[a], r9\n\t" + "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" @@ -5593,7 +5182,7 @@ SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, "ldr r7, [%[b]]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" /* Multiply Done */ "add %[a], %[a], #4\n\t" @@ -5664,10 +5253,10 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "ldr r7, [r2]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ "bal 5f\n\t" @@ -5734,59 +5323,41 @@ SP_NOINLINE static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adds r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5" + : "memory", "r4", "r5", "r6", "r7" ); return c; @@ -5804,102 +5375,66 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r6, [%[b], #4]\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "subs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #0]\n\t" - "str r4, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r6, [%[b], #12]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r6, [%[b], #20]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #16]\n\t" - "str r4, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r6, [%[b], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r6, [%[b], #36]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #32]\n\t" - "str r4, [%[a], #36]\n\t" - "ldr r3, [%[a], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r6, [%[b], #44]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #40]\n\t" - "str r4, [%[a], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r6, [%[b], #52]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #48]\n\t" - "str r4, [%[a], #52]\n\t" - "ldr r3, [%[a], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r6, [%[b], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #56]\n\t" - "str r4, [%[a], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r6, [%[b], #68]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #64]\n\t" - "str r4, [%[a], #68]\n\t" - "ldr r3, [%[a], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r6, [%[b], #76]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #72]\n\t" - "str r4, [%[a], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r6, [%[b], #84]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #80]\n\t" - "str r4, [%[a], #84]\n\t" - "ldr r3, [%[a], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r6, [%[b], #92]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #88]\n\t" - "str r4, [%[a], #92]\n\t" + "stm %[a]!, {r3, r4}\n\t" "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : @@ -5921,107 +5456,71 @@ SP_NOINLINE static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adds r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #92]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5" + : "memory", "r4", "r5", "r6", "r7" ); return c; @@ -6129,203 +5628,126 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r6, [%[b], #4]\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "subs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #0]\n\t" - "str r4, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r6, [%[b], #12]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r6, [%[b], #20]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #16]\n\t" - "str r4, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r6, [%[b], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r6, [%[b], #36]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #32]\n\t" - "str r4, [%[a], #36]\n\t" - "ldr r3, [%[a], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r6, [%[b], #44]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #40]\n\t" - "str r4, [%[a], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r6, [%[b], #52]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #48]\n\t" - "str r4, [%[a], #52]\n\t" - "ldr r3, [%[a], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r6, [%[b], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #56]\n\t" - "str r4, [%[a], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r6, [%[b], #68]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #64]\n\t" - "str r4, [%[a], #68]\n\t" - "ldr r3, [%[a], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r6, [%[b], #76]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #72]\n\t" - "str r4, [%[a], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r6, [%[b], #84]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #80]\n\t" - "str r4, [%[a], #84]\n\t" - "ldr r3, [%[a], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r6, [%[b], #92]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #88]\n\t" - "str r4, [%[a], #92]\n\t" - "ldr r3, [%[a], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r6, [%[b], #100]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #96]\n\t" - "str r4, [%[a], #100]\n\t" - "ldr r3, [%[a], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r6, [%[b], #108]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #104]\n\t" - "str r4, [%[a], #108]\n\t" - "ldr r3, [%[a], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r6, [%[b], #116]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #112]\n\t" - "str r4, [%[a], #116]\n\t" - "ldr r3, [%[a], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r6, [%[b], #124]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #120]\n\t" - "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c], %[c]\n\t" - "add %[a], %[a], #0x80\n\t" - "add %[b], %[b], #0x80\n\t" - "mov r5, #0\n\t" - "sub r5, r5, %[c]\n\t" - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r6, [%[b], #4]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #0]\n\t" - "str r4, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r6, [%[b], #12]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r6, [%[b], #20]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #16]\n\t" - "str r4, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r6, [%[b], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r6, [%[b], #36]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #32]\n\t" - "str r4, [%[a], #36]\n\t" - "ldr r3, [%[a], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r6, [%[b], #44]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #40]\n\t" - "str r4, [%[a], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r6, [%[b], #52]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #48]\n\t" - "str r4, [%[a], #52]\n\t" - "ldr r3, [%[a], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r6, [%[b], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #56]\n\t" - "str r4, [%[a], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : @@ -6347,211 +5769,131 @@ SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mvn r7, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adds r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c], %[c]\n\t" - "add %[a], %[a], #0x80\n\t" - "add %[b], %[b], #0x80\n\t" - "add %[r], %[r], #0x80\n\t" - "adds %[c], %[c], r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r7" + : "memory", "r4", "r5", "r6", "r7" ); return c; @@ -6659,400 +6001,246 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r6, [%[b], #4]\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "subs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #0]\n\t" - "str r4, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r6, [%[b], #12]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r6, [%[b], #20]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #16]\n\t" - "str r4, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r6, [%[b], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r6, [%[b], #36]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #32]\n\t" - "str r4, [%[a], #36]\n\t" - "ldr r3, [%[a], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r6, [%[b], #44]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #40]\n\t" - "str r4, [%[a], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r6, [%[b], #52]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #48]\n\t" - "str r4, [%[a], #52]\n\t" - "ldr r3, [%[a], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r6, [%[b], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #56]\n\t" - "str r4, [%[a], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r6, [%[b], #68]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #64]\n\t" - "str r4, [%[a], #68]\n\t" - "ldr r3, [%[a], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r6, [%[b], #76]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #72]\n\t" - "str r4, [%[a], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r6, [%[b], #84]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #80]\n\t" - "str r4, [%[a], #84]\n\t" - "ldr r3, [%[a], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r6, [%[b], #92]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #88]\n\t" - "str r4, [%[a], #92]\n\t" - "ldr r3, [%[a], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r6, [%[b], #100]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #96]\n\t" - "str r4, [%[a], #100]\n\t" - "ldr r3, [%[a], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r6, [%[b], #108]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #104]\n\t" - "str r4, [%[a], #108]\n\t" - "ldr r3, [%[a], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r6, [%[b], #116]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #112]\n\t" - "str r4, [%[a], #116]\n\t" - "ldr r3, [%[a], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r6, [%[b], #124]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #120]\n\t" - "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c], %[c]\n\t" - "add %[a], %[a], #0x80\n\t" - "add %[b], %[b], #0x80\n\t" - "mov r5, #0\n\t" - "sub r5, r5, %[c]\n\t" - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r6, [%[b], #4]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #0]\n\t" - "str r4, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r6, [%[b], #12]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r6, [%[b], #20]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #16]\n\t" - "str r4, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r6, [%[b], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r6, [%[b], #36]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #32]\n\t" - "str r4, [%[a], #36]\n\t" - "ldr r3, [%[a], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r6, [%[b], #44]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #40]\n\t" - "str r4, [%[a], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r6, [%[b], #52]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #48]\n\t" - "str r4, [%[a], #52]\n\t" - "ldr r3, [%[a], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r6, [%[b], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #56]\n\t" - "str r4, [%[a], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r6, [%[b], #68]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #64]\n\t" - "str r4, [%[a], #68]\n\t" - "ldr r3, [%[a], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r6, [%[b], #76]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #72]\n\t" - "str r4, [%[a], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r6, [%[b], #84]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #80]\n\t" - "str r4, [%[a], #84]\n\t" - "ldr r3, [%[a], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r6, [%[b], #92]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #88]\n\t" - "str r4, [%[a], #92]\n\t" - "ldr r3, [%[a], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r6, [%[b], #100]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #96]\n\t" - "str r4, [%[a], #100]\n\t" - "ldr r3, [%[a], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r6, [%[b], #108]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #104]\n\t" - "str r4, [%[a], #108]\n\t" - "ldr r3, [%[a], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r6, [%[b], #116]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #112]\n\t" - "str r4, [%[a], #116]\n\t" - "ldr r3, [%[a], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r6, [%[b], #124]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #120]\n\t" - "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c], %[c]\n\t" - "add %[a], %[a], #0x80\n\t" - "add %[b], %[b], #0x80\n\t" - "mov r5, #0\n\t" - "sub r5, r5, %[c]\n\t" - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r6, [%[b], #4]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #0]\n\t" - "str r4, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r6, [%[b], #12]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r6, [%[b], #20]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #16]\n\t" - "str r4, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r6, [%[b], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r6, [%[b], #36]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #32]\n\t" - "str r4, [%[a], #36]\n\t" - "ldr r3, [%[a], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r6, [%[b], #44]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #40]\n\t" - "str r4, [%[a], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r6, [%[b], #52]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #48]\n\t" - "str r4, [%[a], #52]\n\t" - "ldr r3, [%[a], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r6, [%[b], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #56]\n\t" - "str r4, [%[a], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r6, [%[b], #68]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #64]\n\t" - "str r4, [%[a], #68]\n\t" - "ldr r3, [%[a], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r6, [%[b], #76]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #72]\n\t" - "str r4, [%[a], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r6, [%[b], #84]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #80]\n\t" - "str r4, [%[a], #84]\n\t" - "ldr r3, [%[a], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r6, [%[b], #92]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #88]\n\t" - "str r4, [%[a], #92]\n\t" - "ldr r3, [%[a], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r6, [%[b], #100]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #96]\n\t" - "str r4, [%[a], #100]\n\t" - "ldr r3, [%[a], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r6, [%[b], #108]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #104]\n\t" - "str r4, [%[a], #108]\n\t" - "ldr r3, [%[a], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r6, [%[b], #116]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #112]\n\t" - "str r4, [%[a], #116]\n\t" - "ldr r3, [%[a], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r6, [%[b], #124]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #120]\n\t" - "str r4, [%[a], #124]\n\t" + "stm %[a]!, {r3, r4}\n\t" "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : @@ -7074,409 +6262,251 @@ SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mvn r7, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adds r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c], %[c]\n\t" - "add %[a], %[a], #0x80\n\t" - "add %[b], %[b], #0x80\n\t" - "add %[r], %[r], #0x80\n\t" - "adds %[c], %[c], r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c], %[c]\n\t" - "add %[a], %[a], #0x80\n\t" - "add %[b], %[b], #0x80\n\t" - "add %[r], %[r], #0x80\n\t" - "adds %[c], %[c], r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #124]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r7" + : "memory", "r4", "r5", "r6", "r7" ); return c; @@ -7588,11 +6618,8 @@ SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r6, %[a]\n\t" "mov r7, #0\n\t" - "mov r4, #1\n\t" - "lsl r4, r4, #8\n\t" - "add r4, r4, #128\n\t" + "add r6, r6, #384\n\t" "sub r7, r7, #1\n\t" - "add r6, r6, r4\n\t" "\n1:\n\t" "adds %[c], %[c], r7\n\t" "ldr r4, [%[a]]\n\t" @@ -7627,10 +6654,7 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( "mov r7, %[a]\n\t" - "mov r5, #1\n\t" - "lsl r5, r5, #8\n\t" - "add r5, r5, #128\n\t" - "add r7, r7, r5\n\t" + "add r7, r7, #384\n\t" "\n1:\n\t" "mov r5, #0\n\t" "subs r5, r5, %[c]\n\t" @@ -7700,7 +6724,7 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, "ldr r7, [%[b]]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" /* Multiply Done */ "add %[a], %[a], #4\n\t" @@ -7776,10 +6800,10 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "ldr r7, [r2]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ "bal 5f\n\t" @@ -7977,7 +7001,7 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, "ldr r7, [%[b]]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" /* Multiply Done */ "add %[a], %[a], #4\n\t" @@ -8052,10 +7076,10 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "ldr r7, [r2]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ "bal 5f\n\t" @@ -8148,37 +7172,31 @@ SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, r6, #128\n\t" - "add r6, r6, %[a]\n\t" - "mov r8, %[r]\n\t" - "mov r9, r6\n\t" - "mov r3, #0\n\t" + "add r8, %[a], #384\n\t" + /* A[0] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r5, r3, r6, %[b]\n\t" "mov r4, #0\n\t" + "str r5, [%[r]], #4\n\t" + /* A[0] * B - Done */ "\n1:\n\t" - "mov %[r], #0\n\t" "mov r5, #0\n\t" /* A[] * B */ - "ldr r6, [%[a]]\n\t" + "ldr r6, [%[a]], #4\n\t" "umull r6, r7, r6, %[b]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[] * B - Done */ - "mov %[r], r8\n\t" - "str r3, [%[r]]\n\t" + "str r3, [%[r]], #4\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add %[r], %[r], #4\n\t" - "add %[a], %[a], #4\n\t" - "mov r8, %[r]\n\t" - "cmp %[a], r9\n\t" + "cmp %[a], r8\n\t" "blt 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -8247,77 +7265,80 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, __asm__ __volatile__ ( "mov r8, %[mp]\n\t" - "mov r12, %[ca]\n\t" - "mov r14, %[m]\n\t" + "mov r12, %[m]\n\t" "mov r9, %[a]\n\t" "mov r4, #0\n\t" - /* i = 0 */ - "mov r11, r4\n\t" + "add r11, r9, #192\n\t" "\n1:\n\t" - "mov r5, #0\n\t" - "mov %[ca], #0\n\t" /* mu = a[i] * mp */ "mov %[mp], r8\n\t" - "ldr %[a], [%[a]]\n\t" + "ldr %[a], [r9]\n\t" "mul %[mp], %[mp], %[a]\n\t" - "mov %[m], r14\n\t" + "mov %[m], r12\n\t" "mov r10, r9\n\t" + "add r14, r9, #184\n\t" "\n2:\n\t" /* a[i+j] += m[j] * mu */ - "mov %[a], r10\n\t" - "ldr %[a], [%[a]]\n\t" - "mov %[ca], #0\n\t" - "mov r4, r5\n\t" + "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]]\n\t" + "ldr r7, [%[m]], #4\n\t" "umull r6, r7, %[mp], r7\n\t" "adds %[a], %[a], r6\n\t" - "adcs r5, r5, r7\n\t" + "adc r5, r5, r7\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" - "adc r5, r5, %[ca]\n\t" - "mov %[a], r10\n\t" - "str r4, [%[a]]\n\t" - "mov r6, #4\n\t" - "add %[m], %[m], #4\n\t" - "add r10, r10, r6\n\t" - "mov r4, #188\n\t" - "add r4, r4, r9\n\t" - "cmp r10, r4\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "ldr %[a], [r10]\n\t" + "mov r4, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r7, [%[m]], #4\n\t" + "umull r6, r7, %[mp], r7\n\t" + "adds %[a], %[a], r6\n\t" + "adc r4, r4, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r5, r5, %[a]\n\t" + "adc r4, r4, #0\n\t" + "str r5, [r10], #4\n\t" + "cmp r10, r14\n\t" "blt 2b\n\t" + /* a[i+46] += m[46] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r7, [%[m]], #4\n\t" + "umull r6, r7, %[mp], r7\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" /* a[i+47] += m[47] * mu */ + "mov r4, %[ca]\n\t" "mov %[ca], #0\n\t" - "mov r4, r12\n\t" - "mov %[a], #0\n\t" /* Multiply m[47] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" "adds r5, r5, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc %[a], %[a], %[ca]\n\t" + "adcs r4, r4, r7\n\t" + "adc %[ca], %[ca], #0\n\t" /* Multiply m[47] and mu - Done */ - "mov %[ca], %[a]\n\t" - "mov %[a], r10\n\t" - "ldr r7, [%[a], #4]\n\t" - "ldr %[a], [%[a]]\n\t" - "mov r6, #0\n\t" - "adds r5, r5, %[a]\n\t" + "ldr r6, [r10]\n\t" + "ldr r7, [r10, #4]\n\t" + "adds r6, r6, r5\n\t" "adcs r7, r7, r4\n\t" - "adc %[ca], %[ca], r6\n\t" - "mov %[a], r10\n\t" - "str r5, [%[a]]\n\t" - "str r7, [%[a], #4]\n\t" - /* i += 1 */ - "mov r6, #4\n\t" - "add r9, r9, r6\n\t" - "add r11, r11, r6\n\t" - "mov r12, %[ca]\n\t" - "mov %[a], r9\n\t" - "mov r4, #192\n\t" - "cmp r11, r4\n\t" + "adc %[ca], %[ca], #0\n\t" + "str r6, [r10]\n\t" + "str r7, [r10, #4]\n\t" + /* Next word in a */ + "add r9, r9, #4\n\t" + "cmp r9, r11\n\t" "blt 1b\n\t" - "mov %[m], r14\n\t" + "mov %[a], r9\n\t" + "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" @@ -8366,35 +7387,31 @@ SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "mov r6, #192\n\t" - "add r6, r6, %[a]\n\t" - "mov r8, %[r]\n\t" - "mov r9, r6\n\t" - "mov r3, #0\n\t" + "add r8, %[a], #192\n\t" + /* A[0] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r5, r3, r6, %[b]\n\t" "mov r4, #0\n\t" + "str r5, [%[r]], #4\n\t" + /* A[0] * B - Done */ "\n1:\n\t" - "mov %[r], #0\n\t" "mov r5, #0\n\t" /* A[] * B */ - "ldr r6, [%[a]]\n\t" + "ldr r6, [%[a]], #4\n\t" "umull r6, r7, r6, %[b]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[] * B - Done */ - "mov %[r], r8\n\t" - "str r3, [%[r]]\n\t" + "str r3, [%[r]], #4\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add %[r], %[r], #4\n\t" - "add %[a], %[a], #4\n\t" - "mov r8, %[r]\n\t" - "cmp %[a], r9\n\t" + "cmp %[a], r8\n\t" "blt 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -8896,81 +7913,80 @@ SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, __asm__ __volatile__ ( "mov r8, %[mp]\n\t" - "mov r12, %[ca]\n\t" - "mov r14, %[m]\n\t" + "mov r12, %[m]\n\t" "mov r9, %[a]\n\t" "mov r4, #0\n\t" - /* i = 0 */ - "mov r11, r4\n\t" + "add r11, r9, #384\n\t" "\n1:\n\t" - "mov r5, #0\n\t" - "mov %[ca], #0\n\t" /* mu = a[i] * mp */ "mov %[mp], r8\n\t" - "ldr %[a], [%[a]]\n\t" + "ldr %[a], [r9]\n\t" "mul %[mp], %[mp], %[a]\n\t" - "mov %[m], r14\n\t" + "mov %[m], r12\n\t" "mov r10, r9\n\t" + "add r14, r9, #376\n\t" "\n2:\n\t" /* a[i+j] += m[j] * mu */ - "mov %[a], r10\n\t" - "ldr %[a], [%[a]]\n\t" - "mov %[ca], #0\n\t" - "mov r4, r5\n\t" + "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]]\n\t" + "ldr r7, [%[m]], #4\n\t" "umull r6, r7, %[mp], r7\n\t" "adds %[a], %[a], r6\n\t" - "adcs r5, r5, r7\n\t" + "adc r5, r5, r7\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" - "adc r5, r5, %[ca]\n\t" - "mov %[a], r10\n\t" - "str r4, [%[a]]\n\t" - "mov r6, #4\n\t" - "add %[m], %[m], #4\n\t" - "add r10, r10, r6\n\t" - "mov r4, #1\n\t" - "lsl r4, r4, #8\n\t" - "add r4, r4, #124\n\t" - "add r4, r4, r9\n\t" - "cmp r10, r4\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "ldr %[a], [r10]\n\t" + "mov r4, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r7, [%[m]], #4\n\t" + "umull r6, r7, %[mp], r7\n\t" + "adds %[a], %[a], r6\n\t" + "adc r4, r4, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r5, r5, %[a]\n\t" + "adc r4, r4, #0\n\t" + "str r5, [r10], #4\n\t" + "cmp r10, r14\n\t" "blt 2b\n\t" + /* a[i+94] += m[94] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r7, [%[m]], #4\n\t" + "umull r6, r7, %[mp], r7\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" /* a[i+95] += m[95] * mu */ + "mov r4, %[ca]\n\t" "mov %[ca], #0\n\t" - "mov r4, r12\n\t" - "mov %[a], #0\n\t" /* Multiply m[95] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" "adds r5, r5, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc %[a], %[a], %[ca]\n\t" + "adcs r4, r4, r7\n\t" + "adc %[ca], %[ca], #0\n\t" /* Multiply m[95] and mu - Done */ - "mov %[ca], %[a]\n\t" - "mov %[a], r10\n\t" - "ldr r7, [%[a], #4]\n\t" - "ldr %[a], [%[a]]\n\t" - "mov r6, #0\n\t" - "adds r5, r5, %[a]\n\t" + "ldr r6, [r10]\n\t" + "ldr r7, [r10, #4]\n\t" + "adds r6, r6, r5\n\t" "adcs r7, r7, r4\n\t" - "adc %[ca], %[ca], r6\n\t" - "mov %[a], r10\n\t" - "str r5, [%[a]]\n\t" - "str r7, [%[a], #4]\n\t" - /* i += 1 */ - "mov r6, #4\n\t" - "add r9, r9, r6\n\t" - "add r11, r11, r6\n\t" - "mov r12, %[ca]\n\t" - "mov %[a], r9\n\t" - "mov r4, #1\n\t" - "lsl r4, r4, #8\n\t" - "add r4, r4, #128\n\t" - "cmp r11, r4\n\t" + "adc %[ca], %[ca], #0\n\t" + "str r6, [r10]\n\t" + "str r7, [r10, #4]\n\t" + /* Next word in a */ + "add r9, r9, #4\n\t" + "cmp r9, r11\n\t" "blt 1b\n\t" - "mov %[m], r14\n\t" + "mov %[a], r9\n\t" + "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" @@ -10889,275 +9905,171 @@ SP_NOINLINE static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mvn r7, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adds r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c], %[c]\n\t" - "add %[a], %[a], #0x80\n\t" - "add %[b], %[b], #0x80\n\t" - "add %[r], %[r], #0x80\n\t" - "adds %[c], %[c], r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #124]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r7" + : "memory", "r4", "r5", "r6", "r7" ); return c; @@ -11175,533 +10087,326 @@ SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r6, [%[b], #4]\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "subs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #0]\n\t" - "str r4, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r6, [%[b], #12]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r6, [%[b], #20]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #16]\n\t" - "str r4, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r6, [%[b], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r6, [%[b], #36]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #32]\n\t" - "str r4, [%[a], #36]\n\t" - "ldr r3, [%[a], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r6, [%[b], #44]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #40]\n\t" - "str r4, [%[a], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r6, [%[b], #52]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #48]\n\t" - "str r4, [%[a], #52]\n\t" - "ldr r3, [%[a], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r6, [%[b], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #56]\n\t" - "str r4, [%[a], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r6, [%[b], #68]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #64]\n\t" - "str r4, [%[a], #68]\n\t" - "ldr r3, [%[a], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r6, [%[b], #76]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #72]\n\t" - "str r4, [%[a], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r6, [%[b], #84]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #80]\n\t" - "str r4, [%[a], #84]\n\t" - "ldr r3, [%[a], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r6, [%[b], #92]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #88]\n\t" - "str r4, [%[a], #92]\n\t" - "ldr r3, [%[a], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r6, [%[b], #100]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #96]\n\t" - "str r4, [%[a], #100]\n\t" - "ldr r3, [%[a], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r6, [%[b], #108]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #104]\n\t" - "str r4, [%[a], #108]\n\t" - "ldr r3, [%[a], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r6, [%[b], #116]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #112]\n\t" - "str r4, [%[a], #116]\n\t" - "ldr r3, [%[a], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r6, [%[b], #124]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #120]\n\t" - "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c], %[c]\n\t" - "add %[a], %[a], #0x80\n\t" - "add %[b], %[b], #0x80\n\t" - "mov r5, #0\n\t" - "sub r5, r5, %[c]\n\t" - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r6, [%[b], #4]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #0]\n\t" - "str r4, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r6, [%[b], #12]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r6, [%[b], #20]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #16]\n\t" - "str r4, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r6, [%[b], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r6, [%[b], #36]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #32]\n\t" - "str r4, [%[a], #36]\n\t" - "ldr r3, [%[a], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r6, [%[b], #44]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #40]\n\t" - "str r4, [%[a], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r6, [%[b], #52]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #48]\n\t" - "str r4, [%[a], #52]\n\t" - "ldr r3, [%[a], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r6, [%[b], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #56]\n\t" - "str r4, [%[a], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r6, [%[b], #68]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #64]\n\t" - "str r4, [%[a], #68]\n\t" - "ldr r3, [%[a], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r6, [%[b], #76]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #72]\n\t" - "str r4, [%[a], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r6, [%[b], #84]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #80]\n\t" - "str r4, [%[a], #84]\n\t" - "ldr r3, [%[a], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r6, [%[b], #92]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #88]\n\t" - "str r4, [%[a], #92]\n\t" - "ldr r3, [%[a], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r6, [%[b], #100]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #96]\n\t" - "str r4, [%[a], #100]\n\t" - "ldr r3, [%[a], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r6, [%[b], #108]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #104]\n\t" - "str r4, [%[a], #108]\n\t" - "ldr r3, [%[a], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r6, [%[b], #116]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #112]\n\t" - "str r4, [%[a], #116]\n\t" - "ldr r3, [%[a], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r6, [%[b], #124]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #120]\n\t" - "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c], %[c]\n\t" - "add %[a], %[a], #0x80\n\t" - "add %[b], %[b], #0x80\n\t" - "mov r5, #0\n\t" - "sub r5, r5, %[c]\n\t" - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r6, [%[b], #4]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #0]\n\t" - "str r4, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r6, [%[b], #12]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r6, [%[b], #20]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #16]\n\t" - "str r4, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r6, [%[b], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r6, [%[b], #36]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #32]\n\t" - "str r4, [%[a], #36]\n\t" - "ldr r3, [%[a], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r6, [%[b], #44]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #40]\n\t" - "str r4, [%[a], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r6, [%[b], #52]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #48]\n\t" - "str r4, [%[a], #52]\n\t" - "ldr r3, [%[a], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r6, [%[b], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #56]\n\t" - "str r4, [%[a], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r6, [%[b], #68]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #64]\n\t" - "str r4, [%[a], #68]\n\t" - "ldr r3, [%[a], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r6, [%[b], #76]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #72]\n\t" - "str r4, [%[a], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r6, [%[b], #84]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #80]\n\t" - "str r4, [%[a], #84]\n\t" - "ldr r3, [%[a], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r6, [%[b], #92]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #88]\n\t" - "str r4, [%[a], #92]\n\t" - "ldr r3, [%[a], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r6, [%[b], #100]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #96]\n\t" - "str r4, [%[a], #100]\n\t" - "ldr r3, [%[a], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r6, [%[b], #108]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #104]\n\t" - "str r4, [%[a], #108]\n\t" - "ldr r3, [%[a], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r6, [%[b], #116]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #112]\n\t" - "str r4, [%[a], #116]\n\t" - "ldr r3, [%[a], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r6, [%[b], #124]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #120]\n\t" - "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c], %[c]\n\t" - "add %[a], %[a], #0x80\n\t" - "add %[b], %[b], #0x80\n\t" - "mov r5, #0\n\t" - "sub r5, r5, %[c]\n\t" - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r6, [%[b], #4]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #0]\n\t" - "str r4, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r6, [%[b], #12]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r6, [%[b], #20]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #16]\n\t" - "str r4, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r6, [%[b], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" - "ldr r3, [%[a], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r6, [%[b], #36]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #32]\n\t" - "str r4, [%[a], #36]\n\t" - "ldr r3, [%[a], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r6, [%[b], #44]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #40]\n\t" - "str r4, [%[a], #44]\n\t" - "ldr r3, [%[a], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #48]\n\t" - "ldr r6, [%[b], #52]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #48]\n\t" - "str r4, [%[a], #52]\n\t" - "ldr r3, [%[a], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #56]\n\t" - "ldr r6, [%[b], #60]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #56]\n\t" - "str r4, [%[a], #60]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #64]\n\t" - "ldr r6, [%[b], #68]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #64]\n\t" - "str r4, [%[a], #68]\n\t" - "ldr r3, [%[a], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #72]\n\t" - "ldr r6, [%[b], #76]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #72]\n\t" - "str r4, [%[a], #76]\n\t" - "ldr r3, [%[a], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #80]\n\t" - "ldr r6, [%[b], #84]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #80]\n\t" - "str r4, [%[a], #84]\n\t" - "ldr r3, [%[a], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #88]\n\t" - "ldr r6, [%[b], #92]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #88]\n\t" - "str r4, [%[a], #92]\n\t" - "ldr r3, [%[a], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #96]\n\t" - "ldr r6, [%[b], #100]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #96]\n\t" - "str r4, [%[a], #100]\n\t" - "ldr r3, [%[a], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #104]\n\t" - "ldr r6, [%[b], #108]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #104]\n\t" - "str r4, [%[a], #108]\n\t" - "ldr r3, [%[a], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #112]\n\t" - "ldr r6, [%[b], #116]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #112]\n\t" - "str r4, [%[a], #116]\n\t" - "ldr r3, [%[a], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #120]\n\t" - "ldr r6, [%[b], #124]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #120]\n\t" - "str r4, [%[a], #124]\n\t" + "stm %[a]!, {r3, r4}\n\t" "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : @@ -11723,543 +10428,331 @@ SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mvn r7, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adds r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c], %[c]\n\t" - "add %[a], %[a], #0x80\n\t" - "add %[b], %[b], #0x80\n\t" - "add %[r], %[r], #0x80\n\t" - "adds %[c], %[c], r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c], %[c]\n\t" - "add %[a], %[a], #0x80\n\t" - "add %[b], %[b], #0x80\n\t" - "add %[r], %[r], #0x80\n\t" - "adds %[c], %[c], r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c], %[c]\n\t" - "add %[a], %[a], #0x80\n\t" - "add %[b], %[b], #0x80\n\t" - "add %[r], %[r], #0x80\n\t" - "adds %[c], %[c], r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #32]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #40]\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #48]\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #52]\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #56]\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #64]\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #68]\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #72]\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #76]\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #80]\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #84]\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #88]\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #92]\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #96]\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #100]\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #104]\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #108]\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #112]\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #116]\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #120]\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #124]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r7" + : "memory", "r4", "r5", "r6", "r7" ); return c; @@ -12305,7 +10798,7 @@ SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, "ldr r7, [%[b]]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" /* Multiply Done */ "add %[a], %[a], #4\n\t" @@ -12442,10 +10935,10 @@ SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) "ldr r7, [r2]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ "bal 5f\n\t" @@ -12549,10 +11042,8 @@ SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r6, %[a]\n\t" "mov r7, #0\n\t" - "mov r4, #2\n\t" - "lsl r4, r4, #8\n\t" + "add r6, r6, #512\n\t" "sub r7, r7, #1\n\t" - "add r6, r6, r4\n\t" "\n1:\n\t" "adds %[c], %[c], r7\n\t" "ldr r4, [%[a]]\n\t" @@ -12587,9 +11078,7 @@ SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( "mov r7, %[a]\n\t" - "mov r5, #2\n\t" - "lsl r5, r5, #8\n\t" - "add r7, r7, r5\n\t" + "add r7, r7, #512\n\t" "\n1:\n\t" "mov r5, #0\n\t" "subs r5, r5, %[c]\n\t" @@ -12658,7 +11147,7 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, "ldr r7, [%[b]]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" /* Multiply Done */ "add %[a], %[a], #4\n\t" @@ -12734,10 +11223,10 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "ldr r7, [r2]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r7\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ "bal 5f\n\t" @@ -12828,36 +11317,31 @@ SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "mov r6, #2\n\t" - "lsl r6, r6, #8\n\t" - "add r6, r6, %[a]\n\t" - "mov r8, %[r]\n\t" - "mov r9, r6\n\t" - "mov r3, #0\n\t" + "add r8, %[a], #512\n\t" + /* A[0] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r5, r3, r6, %[b]\n\t" "mov r4, #0\n\t" + "str r5, [%[r]], #4\n\t" + /* A[0] * B - Done */ "\n1:\n\t" - "mov %[r], #0\n\t" "mov r5, #0\n\t" /* A[] * B */ - "ldr r6, [%[a]]\n\t" + "ldr r6, [%[a]], #4\n\t" "umull r6, r7, r6, %[b]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[] * B - Done */ - "mov %[r], r8\n\t" - "str r3, [%[r]]\n\t" + "str r3, [%[r]], #4\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add %[r], %[r], #4\n\t" - "add %[a], %[a], #4\n\t" - "mov r8, %[r]\n\t" - "cmp %[a], r9\n\t" + "cmp %[a], r8\n\t" "blt 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -12928,80 +11412,80 @@ SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, __asm__ __volatile__ ( "mov r8, %[mp]\n\t" - "mov r12, %[ca]\n\t" - "mov r14, %[m]\n\t" + "mov r12, %[m]\n\t" "mov r9, %[a]\n\t" "mov r4, #0\n\t" - /* i = 0 */ - "mov r11, r4\n\t" + "add r11, r9, #512\n\t" "\n1:\n\t" - "mov r5, #0\n\t" - "mov %[ca], #0\n\t" /* mu = a[i] * mp */ "mov %[mp], r8\n\t" - "ldr %[a], [%[a]]\n\t" + "ldr %[a], [r9]\n\t" "mul %[mp], %[mp], %[a]\n\t" - "mov %[m], r14\n\t" + "mov %[m], r12\n\t" "mov r10, r9\n\t" + "add r14, r9, #504\n\t" "\n2:\n\t" /* a[i+j] += m[j] * mu */ - "mov %[a], r10\n\t" - "ldr %[a], [%[a]]\n\t" - "mov %[ca], #0\n\t" - "mov r4, r5\n\t" + "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]]\n\t" + "ldr r7, [%[m]], #4\n\t" "umull r6, r7, %[mp], r7\n\t" "adds %[a], %[a], r6\n\t" - "adcs r5, r5, r7\n\t" + "adc r5, r5, r7\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" - "adc r5, r5, %[ca]\n\t" - "mov %[a], r10\n\t" - "str r4, [%[a]]\n\t" - "mov r6, #4\n\t" - "add %[m], %[m], #4\n\t" - "add r10, r10, r6\n\t" - "mov r4, #1\n\t" - "lsl r4, r4, #8\n\t" - "add r4, r4, #252\n\t" - "add r4, r4, r9\n\t" - "cmp r10, r4\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "ldr %[a], [r10]\n\t" + "mov r4, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r7, [%[m]], #4\n\t" + "umull r6, r7, %[mp], r7\n\t" + "adds %[a], %[a], r6\n\t" + "adc r4, r4, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r5, r5, %[a]\n\t" + "adc r4, r4, #0\n\t" + "str r5, [r10], #4\n\t" + "cmp r10, r14\n\t" "blt 2b\n\t" + /* a[i+126] += m[126] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r7, [%[m]], #4\n\t" + "umull r6, r7, %[mp], r7\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" /* a[i+127] += m[127] * mu */ + "mov r4, %[ca]\n\t" "mov %[ca], #0\n\t" - "mov r4, r12\n\t" - "mov %[a], #0\n\t" /* Multiply m[127] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" "adds r5, r5, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc %[a], %[a], %[ca]\n\t" + "adcs r4, r4, r7\n\t" + "adc %[ca], %[ca], #0\n\t" /* Multiply m[127] and mu - Done */ - "mov %[ca], %[a]\n\t" - "mov %[a], r10\n\t" - "ldr r7, [%[a], #4]\n\t" - "ldr %[a], [%[a]]\n\t" - "mov r6, #0\n\t" - "adds r5, r5, %[a]\n\t" + "ldr r6, [r10]\n\t" + "ldr r7, [r10, #4]\n\t" + "adds r6, r6, r5\n\t" "adcs r7, r7, r4\n\t" - "adc %[ca], %[ca], r6\n\t" - "mov %[a], r10\n\t" - "str r5, [%[a]]\n\t" - "str r7, [%[a], #4]\n\t" - /* i += 1 */ - "mov r6, #4\n\t" - "add r9, r9, r6\n\t" - "add r11, r11, r6\n\t" - "mov r12, %[ca]\n\t" - "mov %[a], r9\n\t" - "mov r4, #2\n\t" - "lsl r4, r4, #8\n\t" - "cmp r11, r4\n\t" + "adc %[ca], %[ca], #0\n\t" + "str r6, [r10]\n\t" + "str r7, [r10, #4]\n\t" + /* Next word in a */ + "add r9, r9, #4\n\t" + "cmp r9, r11\n\t" "blt 1b\n\t" - "mov %[m], r14\n\t" + "mov %[a], r9\n\t" + "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" @@ -15492,77 +13976,80 @@ SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* __asm__ __volatile__ ( "mov r8, %[mp]\n\t" - "mov r12, %[ca]\n\t" - "mov r14, %[m]\n\t" + "mov r12, %[m]\n\t" "mov r9, %[a]\n\t" "mov r4, #0\n\t" - /* i = 0 */ - "mov r11, r4\n\t" + "add r11, r9, #32\n\t" "\n1:\n\t" - "mov r5, #0\n\t" - "mov %[ca], #0\n\t" /* mu = a[i] * mp */ "mov %[mp], r8\n\t" - "ldr %[a], [%[a]]\n\t" + "ldr %[a], [r9]\n\t" "mul %[mp], %[mp], %[a]\n\t" - "mov %[m], r14\n\t" + "mov %[m], r12\n\t" "mov r10, r9\n\t" + "add r14, r9, #24\n\t" "\n2:\n\t" /* a[i+j] += m[j] * mu */ - "mov %[a], r10\n\t" - "ldr %[a], [%[a]]\n\t" - "mov %[ca], #0\n\t" - "mov r4, r5\n\t" + "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]]\n\t" + "ldr r7, [%[m]], #4\n\t" "umull r6, r7, %[mp], r7\n\t" "adds %[a], %[a], r6\n\t" - "adcs r5, r5, r7\n\t" + "adc r5, r5, r7\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" - "adc r5, r5, %[ca]\n\t" - "mov %[a], r10\n\t" - "str r4, [%[a]]\n\t" - "mov r6, #4\n\t" - "add %[m], %[m], #4\n\t" - "add r10, r10, r6\n\t" - "mov r4, #28\n\t" - "add r4, r4, r9\n\t" - "cmp r10, r4\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" + /* a[i+j+1] += m[j+1] * mu */ + "ldr %[a], [r10]\n\t" + "mov r4, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r7, [%[m]], #4\n\t" + "umull r6, r7, %[mp], r7\n\t" + "adds %[a], %[a], r6\n\t" + "adc r4, r4, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r5, r5, %[a]\n\t" + "adc r4, r4, #0\n\t" + "str r5, [r10], #4\n\t" + "cmp r10, r14\n\t" "blt 2b\n\t" + /* a[i+6] += m[6] * mu */ + "ldr %[a], [r10]\n\t" + "mov r5, #0\n\t" + /* Multiply m[j] and mu - Start */ + "ldr r7, [%[m]], #4\n\t" + "umull r6, r7, %[mp], r7\n\t" + "adds %[a], %[a], r6\n\t" + "adc r5, r5, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, #0\n\t" + "str r4, [r10], #4\n\t" /* a[i+7] += m[7] * mu */ + "mov r4, %[ca]\n\t" "mov %[ca], #0\n\t" - "mov r4, r12\n\t" - "mov %[a], #0\n\t" /* Multiply m[7] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" "adds r5, r5, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc %[a], %[a], %[ca]\n\t" + "adcs r4, r4, r7\n\t" + "adc %[ca], %[ca], #0\n\t" /* Multiply m[7] and mu - Done */ - "mov %[ca], %[a]\n\t" - "mov %[a], r10\n\t" - "ldr r7, [%[a], #4]\n\t" - "ldr %[a], [%[a]]\n\t" - "mov r6, #0\n\t" - "adds r5, r5, %[a]\n\t" + "ldr r6, [r10]\n\t" + "ldr r7, [r10, #4]\n\t" + "adds r6, r6, r5\n\t" "adcs r7, r7, r4\n\t" - "adc %[ca], %[ca], r6\n\t" - "mov %[a], r10\n\t" - "str r5, [%[a]]\n\t" - "str r7, [%[a], #4]\n\t" - /* i += 1 */ - "mov r6, #4\n\t" - "add r9, r9, r6\n\t" - "add r11, r11, r6\n\t" - "mov r12, %[ca]\n\t" - "mov %[a], r9\n\t" - "mov r4, #32\n\t" - "cmp r11, r4\n\t" + "adc %[ca], %[ca], #0\n\t" + "str r6, [r10]\n\t" + "str r7, [r10, #4]\n\t" + /* Next word in a */ + "add r9, r9, #4\n\t" + "cmp r9, r11\n\t" "blt 1b\n\t" - "mov %[m], r14\n\t" + "mov %[a], r9\n\t" + "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" @@ -15581,9 +14068,8 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { sp_digit tmp[8]; + __asm__ __volatile__ ( - "mov r8, %[r]\n\t" - "mov %[r], #0\n\t" /* A[0] * B[0] */ "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[b], #0]\n\t" @@ -15592,19 +14078,17 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, "str r3, [%[tmp], #0]\n\t" "mov r3, #0\n\t" /* A[0] * B[1] */ - "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adc r5, r5, r7\n\t" /* A[1] * B[0] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" "str r4, [%[tmp], #4]\n\t" "mov r4, #0\n\t" /* A[0] * B[2] */ @@ -15612,22 +14096,22 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[1] * B[1] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[2] * B[0] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" "str r5, [%[tmp], #8]\n\t" "mov r5, #0\n\t" /* A[0] * B[3] */ @@ -15635,29 +14119,29 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[1] * B[2] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[2] * B[1] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[3] * B[0] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" "str r3, [%[tmp], #12]\n\t" "mov r3, #0\n\t" /* A[0] * B[4] */ @@ -15665,36 +14149,36 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[1] * B[3] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[2] * B[2] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[3] * B[1] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[4] * B[0] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" "str r4, [%[tmp], #16]\n\t" "mov r4, #0\n\t" /* A[0] * B[5] */ @@ -15702,43 +14186,43 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[1] * B[4] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[2] * B[3] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[3] * B[2] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[4] * B[1] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[5] * B[0] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" "str r5, [%[tmp], #20]\n\t" "mov r5, #0\n\t" /* A[0] * B[6] */ @@ -15746,50 +14230,50 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[1] * B[5] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[2] * B[4] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[3] * B[3] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[4] * B[2] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[5] * B[1] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[6] * B[0] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" "str r3, [%[tmp], #24]\n\t" "mov r3, #0\n\t" /* A[0] * B[7] */ @@ -15797,57 +14281,57 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[1] * B[6] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[2] * B[5] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[3] * B[4] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[4] * B[3] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[5] * B[2] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[6] * B[1] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[7] * B[0] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" "str r4, [%[tmp], #28]\n\t" "mov r4, #0\n\t" /* A[1] * B[7] */ @@ -15855,230 +14339,230 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[2] * B[6] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[3] * B[5] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[4] * B[4] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[5] * B[3] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[6] * B[2] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[7] * B[1] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "str r5, [r8, #32]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[r], #32]\n\t" "mov r5, #0\n\t" /* A[2] * B[7] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[3] * B[6] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[4] * B[5] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[5] * B[4] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[6] * B[3] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[7] * B[2] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "str r3, [r8, #36]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[r], #36]\n\t" "mov r3, #0\n\t" /* A[3] * B[7] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[4] * B[6] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[5] * B[5] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[6] * B[4] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[7] * B[3] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "str r4, [r8, #40]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[r], #40]\n\t" "mov r4, #0\n\t" /* A[4] * B[7] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[5] * B[6] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[6] * B[5] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[7] * B[4] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "str r5, [r8, #44]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[r], #44]\n\t" "mov r5, #0\n\t" /* A[5] * B[7] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[6] * B[6] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[7] * B[5] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "str r3, [r8, #48]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[r], #48]\n\t" "mov r3, #0\n\t" /* A[6] * B[7] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" /* A[7] * B[6] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "str r4, [r8, #52]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[r], #52]\n\t" "mov r4, #0\n\t" /* A[7] * B[7] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "str r5, [r8, #56]\n\t" - "str r3, [r8, #60]\n\t" + "adc r3, r3, r7\n\t" + "str r5, [%[r], #56]\n\t" + "str r3, [%[r], #60]\n\t" + /* Transfer tmp to r */ "ldr r3, [%[tmp], #0]\n\t" "ldr r4, [%[tmp], #4]\n\t" "ldr r5, [%[tmp], #8]\n\t" "ldr r6, [%[tmp], #12]\n\t" - "str r3, [r8, #0]\n\t" - "str r4, [r8, #4]\n\t" - "str r5, [r8, #8]\n\t" - "str r6, [r8, #12]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" "ldr r3, [%[tmp], #16]\n\t" "ldr r4, [%[tmp], #20]\n\t" "ldr r5, [%[tmp], #24]\n\t" "ldr r6, [%[tmp], #28]\n\t" - "str r3, [r8, #16]\n\t" - "str r4, [r8, #20]\n\t" - "str r5, [r8, #24]\n\t" - "str r6, [r8, #28]\n\t" - "mov %[r], r8\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8" + : "memory", "r3", "r4", "r5", "r6", "r7" ); } @@ -16107,8 +14591,6 @@ SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) { sp_digit tmp[8]; __asm__ __volatile__ ( - "mov r8, %[r]\n\t" - "mov %[r], #0\n\t" /* A[0] * A[0] */ "ldr r6, [%[a], #0]\n\t" "umull r3, r4, r6, r6\n\t" @@ -16116,15 +14598,13 @@ SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "str r3, [%[tmp], #0]\n\t" "mov r3, #0\n\t" /* A[0] * A[1] */ - "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[a], #4]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adc r5, r5, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" "str r4, [%[tmp], #4]\n\t" "mov r4, #0\n\t" /* A[0] * A[2] */ @@ -16132,358 +14612,323 @@ SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "ldr r7, [%[a], #8]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adc r3, r3, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[1] * A[1] */ "ldr r6, [%[a], #4]\n\t" "umull r6, r7, r6, r6\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adc r4, r4, #0\n\t" "str r5, [%[tmp], #8]\n\t" "mov r5, #0\n\t" /* A[0] * A[3] */ "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[a], #12]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[1] * A[2] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r5, r5, r10\n\t" "str r3, [%[tmp], #12]\n\t" "mov r3, #0\n\t" /* A[0] * A[4] */ "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[a], #16]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[1] * A[3] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[2] * A[2] */ "ldr r6, [%[a], #8]\n\t" "umull r6, r7, r6, r6\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adc r3, r3, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adc r3, r3, r10\n\t" "str r4, [%[tmp], #16]\n\t" "mov r4, #0\n\t" /* A[0] * A[5] */ "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[a], #20]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[1] * A[4] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[2] * A[3] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r5, r5, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r10\n\t" "str r5, [%[tmp], #20]\n\t" "mov r5, #0\n\t" /* A[0] * A[6] */ "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[a], #24]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[1] * A[5] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[2] * A[4] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[3] * A[3] */ "ldr r6, [%[a], #12]\n\t" "umull r6, r7, r6, r6\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adc r5, r5, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r5, r5, r10\n\t" "str r3, [%[tmp], #24]\n\t" "mov r3, #0\n\t" /* A[0] * A[7] */ "ldr r6, [%[a], #0]\n\t" "ldr r7, [%[a], #28]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[1] * A[6] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[2] * A[5] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[3] * A[4] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adc r3, r3, r10\n\t" "str r4, [%[tmp], #28]\n\t" "mov r4, #0\n\t" /* A[1] * A[7] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #28]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[2] * A[6] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[3] * A[5] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[4] * A[4] */ "ldr r6, [%[a], #16]\n\t" "umull r6, r7, r6, r6\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "str r5, [r8, #32]\n\t" + "adc r4, r4, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r5, r5, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #32]\n\t" "mov r5, #0\n\t" /* A[2] * A[7] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #28]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[3] * A[6] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[4] * A[5] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "str r3, [r8, #36]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #36]\n\t" "mov r3, #0\n\t" /* A[3] * A[7] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[a], #28]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "umull r8, r9, r6, r7\n\t" + "mov r10, #0\n\t" /* A[4] * A[6] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adds r8, r8, r6\n\t" + "adcs r9, r9, r7\n\t" + "adc r10, r10, #0\n\t" /* A[5] * A[5] */ "ldr r6, [%[a], #20]\n\t" "umull r6, r7, r6, r6\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "str r4, [r8, #40]\n\t" + "adc r3, r3, #0\n\t" + "adds r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adc r10, r10, r10\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #40]\n\t" "mov r4, #0\n\t" /* A[4] * A[7] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" /* A[5] * A[6] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, %[r]\n\t" - "str r5, [r8, #44]\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[r], #44]\n\t" "mov r5, #0\n\t" /* A[5] * A[7] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[6] * A[6] */ "ldr r6, [%[a], #24]\n\t" "umull r6, r7, r6, r6\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" - "str r3, [r8, #48]\n\t" + "adc r5, r5, #0\n\t" + "str r3, [%[r], #48]\n\t" "mov r3, #0\n\t" /* A[6] * A[7] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, %[r]\n\t" - "str r4, [r8, #52]\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "str r4, [%[r], #52]\n\t" "mov r4, #0\n\t" /* A[7] * A[7] */ "ldr r6, [%[a], #28]\n\t" "umull r6, r7, r6, r6\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "str r5, [r8, #56]\n\t" - "str r3, [r8, #60]\n\t" + "adc r3, r3, r7\n\t" + "str r5, [%[r], #56]\n\t" + "str r3, [%[r], #60]\n\t" + /* Transfer tmp to r */ "ldr r3, [%[tmp], #0]\n\t" "ldr r4, [%[tmp], #4]\n\t" "ldr r5, [%[tmp], #8]\n\t" "ldr r6, [%[tmp], #12]\n\t" - "str r3, [r8, #0]\n\t" - "str r4, [r8, #4]\n\t" - "str r5, [r8, #8]\n\t" - "str r6, [r8, #12]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" "ldr r3, [%[tmp], #16]\n\t" "ldr r4, [%[tmp], #20]\n\t" "ldr r5, [%[tmp], #24]\n\t" "ldr r6, [%[tmp], #28]\n\t" - "str r3, [r8, #16]\n\t" - "str r4, [r8, #20]\n\t" - "str r5, [r8, #24]\n\t" - "str r6, [r8, #28]\n\t" - "mov %[r], r8\n\t" + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" : : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); } @@ -16695,43 +15140,31 @@ SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "adds r4, r4, r5\n\t" - "str r4, [%[r], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "adcs r4, r4, r5\n\t" - "str r4, [%[r], #28]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5" + : "memory", "r4", "r5", "r6", "r7" ); return c; @@ -20213,38 +18646,26 @@ SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r6, [%[b], #4]\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "subs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #0]\n\t" - "str r4, [%[a], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r6, [%[b], #12]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #8]\n\t" - "str r4, [%[a], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r6, [%[b], #20]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #16]\n\t" - "str r4, [%[a], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r6, [%[b], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" + "ldm %[a], {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" "sbcs r3, r3, r5\n\t" "sbcs r4, r4, r6\n\t" - "str r3, [%[a], #24]\n\t" - "str r4, [%[a], #28]\n\t" + "stm %[a]!, {r3, r4}\n\t" "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : @@ -20265,35 +18686,31 @@ SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "mov r6, #32\n\t" - "add r6, r6, %[a]\n\t" - "mov r8, %[r]\n\t" - "mov r9, r6\n\t" - "mov r3, #0\n\t" + "add r8, %[a], #32\n\t" + /* A[0] * B */ + "ldr r6, [%[a]], #4\n\t" + "umull r5, r3, r6, %[b]\n\t" "mov r4, #0\n\t" + "str r5, [%[r]], #4\n\t" + /* A[0] * B - Done */ "\n1:\n\t" - "mov %[r], #0\n\t" "mov r5, #0\n\t" /* A[] * B */ - "ldr r6, [%[a]]\n\t" + "ldr r6, [%[a]], #4\n\t" "umull r6, r7, r6, %[b]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, %[r]\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" /* A[] * B - Done */ - "mov %[r], r8\n\t" - "str r3, [%[r]]\n\t" + "str r3, [%[r]], #4\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add %[r], %[r], #4\n\t" - "add %[a], %[a], #4\n\t" - "mov r8, %[r]\n\t" - "cmp %[a], r9\n\t" + "cmp %[a], r8\n\t" "blt 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8" ); }