small improvement for inversion
parent
4b31304c08
commit
36be90765d
|
@ -23,63 +23,55 @@ void shift_right(uint32_t *x) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
uint32_t add9_zxy(uint32_t *z, uint32_t *x, uint32_t *y) {
|
void add9_zxy(uint32_t *z, uint32_t *x, uint32_t *y) {
|
||||||
// for mod_inverse, operates on 9 words
|
// for mod_inverse, operates on 9 words
|
||||||
uint32_t carry;
|
|
||||||
__asm__ volatile (
|
__asm__ volatile (
|
||||||
"LDMIA %1!, {r3-r6}\n"
|
"LDMIA %0!, {r3-r6}\n"
|
||||||
"LDMIA %2!, {r7-r10}\n"
|
"LDMIA %1!, {r7-r10}\n"
|
||||||
"ADDS r3, r3, r7\n"
|
"ADDS r3, r3, r7\n"
|
||||||
"ADCS r4, r4, r8\n"
|
"ADCS r4, r4, r8\n"
|
||||||
"ADCS r5, r5, r9\n"
|
"ADCS r5, r5, r9\n"
|
||||||
"ADCS r6, r6, r10\n"
|
"ADCS r6, r6, r10\n"
|
||||||
"STMIA %3!, {r3-r6}\n"
|
"STMIA %2!, {r3-r6}\n"
|
||||||
"LDMIA %1!, {r3-r6}\n"
|
"LDMIA %0!, {r3-r6}\n"
|
||||||
"LDMIA %2!, {r7-r10}\n"
|
"LDMIA %1!, {r7-r10}\n"
|
||||||
"ADCS r3, r3, r7\n"
|
"ADCS r3, r3, r7\n"
|
||||||
"ADCS r4, r4, r8\n"
|
"ADCS r4, r4, r8\n"
|
||||||
"ADCS r5, r5, r9\n"
|
"ADCS r5, r5, r9\n"
|
||||||
"ADCS r6, r6, r10\n"
|
"ADCS r6, r6, r10\n"
|
||||||
"STMIA %3!, {r3-r6}\n"
|
"STMIA %2!, {r3-r6}\n"
|
||||||
"LDMIA %1, {r3}\n"
|
"LDMIA %0, {r3}\n"
|
||||||
"LDMIA %2, {r7}\n"
|
"LDMIA %1, {r7}\n"
|
||||||
"ADCS r3, r3, r7\n"
|
"ADCS r3, r3, r7\n"
|
||||||
"STMIA %3, {r3}\n"
|
"STMIA %2, {r3}\n"
|
||||||
"MOV %0, 0\n"
|
: : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
|
||||||
"ADCS %0, %0, 0\n"
|
|
||||||
: "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
|
|
||||||
);
|
);
|
||||||
return carry;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
uint32_t sub9_zxy(uint32_t *z, uint32_t *x, uint32_t *y) {
|
void sub9_zxy(uint32_t *z, uint32_t *x, uint32_t *y) {
|
||||||
// for mod_inverse, operates on 9 words
|
// for mod_inverse, operates on 9 words
|
||||||
uint32_t carry;
|
|
||||||
__asm__ volatile (
|
__asm__ volatile (
|
||||||
"LDMIA %1!, {r3-r6}\n"
|
"LDMIA %0!, {r3-r6}\n"
|
||||||
"LDMIA %2!, {r7-r10}\n"
|
"LDMIA %1!, {r7-r10}\n"
|
||||||
"SUBS r3, r3, r7\n"
|
"SUBS r3, r3, r7\n"
|
||||||
"SBCS r4, r4, r8\n"
|
"SBCS r4, r4, r8\n"
|
||||||
"SBCS r5, r5, r9\n"
|
"SBCS r5, r5, r9\n"
|
||||||
"SBCS r6, r6, r10\n"
|
"SBCS r6, r6, r10\n"
|
||||||
"STMIA %3!, {r3-r6}\n"
|
"STMIA %2!, {r3-r6}\n"
|
||||||
"LDMIA %1!, {r3-r6}\n"
|
"LDMIA %0!, {r3-r6}\n"
|
||||||
"LDMIA %2!, {r7-r10}\n"
|
"LDMIA %1!, {r7-r10}\n"
|
||||||
"SBCS r3, r3, r7\n"
|
"SBCS r3, r3, r7\n"
|
||||||
"SBCS r4, r4, r8\n"
|
"SBCS r4, r4, r8\n"
|
||||||
"SBCS r5, r5, r9\n"
|
"SBCS r5, r5, r9\n"
|
||||||
"SBCS r6, r6, r10\n"
|
"SBCS r6, r6, r10\n"
|
||||||
"STMIA %3!, {r3-r6}\n"
|
"STMIA %2!, {r3-r6}\n"
|
||||||
"LDMIA %1, {r3}\n"
|
"LDMIA %0, {r3}\n"
|
||||||
"LDMIA %2, {r7}\n"
|
"LDMIA %1, {r7}\n"
|
||||||
"SBCS r3, r3, r7\n"
|
"SBCS r3, r3, r7\n"
|
||||||
"STMIA %3, {r3}\n"
|
"STMIA %2, {r3}\n"
|
||||||
"MOV %0, 0\n"
|
: : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
|
||||||
"ADCS %0, %0, 0\n"
|
|
||||||
: "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
|
|
||||||
);
|
);
|
||||||
return carry;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -111,7 +103,8 @@ STEP_4:
|
||||||
}
|
}
|
||||||
shift_right(D);
|
shift_right(D);
|
||||||
}
|
}
|
||||||
if (sub9_zxy(u, u, v) == 1) { // u >= v
|
sub9_zxy(u, u, v);
|
||||||
|
if ((u[8] & 0x80000000) == 0) { // u >= v
|
||||||
sub9_zxy(B, B, D);
|
sub9_zxy(B, B, D);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
Loading…
Reference in New Issue