combining mul+add in one call
parent
fed427c269
commit
1390b505ce
16
README.md
16
README.md
|
@ -26,13 +26,13 @@ Python script `x25519.py`
|
|||
|
||||
Test vectors from https://tools.ietf.org/html/rfc8031#appendix-A
|
||||
Test 1: X25519: q = d*u
|
||||
Computatation time: 39 ms
|
||||
Computatation time: 26 ms
|
||||
q [hex/dec] = 66c7fb0d9f7090f777fa8493081ce8a4f174dbbbf9a36f16ba571206d4ddd548 46489245826987382655505058740283756869827209462947799117248009944518788765000
|
||||
Test 1 passed.
|
||||
|
||||
Test 2: X25519 + y-coordinate recovery + transform to Edwards-curve
|
||||
(x, y) = Edward(q, r), (q, r) = d*(u, v)
|
||||
Computatation time: 44 ms
|
||||
Computatation time: 27 ms
|
||||
x [hex/dec] = 1ce7e6e3a747a25352df2d3155f06427ba389769e37755731dead2b54c5cef03 13074494971479542188989287385397236998770807488645203601973104535274459557635
|
||||
y [hex/dec] = 4dd1c7c2001c147333ceedf77ebd48b1100e2a95f88cf1f40d1b74ec7279e657 35198739055214410372845858661063095427357109357427482712729161712065293444695
|
||||
Test 2 passed.
|
||||
|
@ -41,24 +41,24 @@ Python script `x25519.py`
|
|||
Python script `ed25519.py`
|
||||
|
||||
Test 1: Length of message: 0 bytes
|
||||
Computatation time: 89 ms
|
||||
Computatation time: 58 ms
|
||||
Test 1 passed.
|
||||
|
||||
Test 2: Length of message: 1 byte
|
||||
Computatation time: 90 ms
|
||||
Computatation time: 58 ms
|
||||
Test 2 passed.
|
||||
|
||||
Test 3: Length of message: 2 bytes
|
||||
Computatation time: 90 ms
|
||||
Computatation time: 58 ms
|
||||
Test 3 passed.
|
||||
|
||||
Test 4: Length of message: 1023 bytes
|
||||
Computatation time: 98 ms
|
||||
Computatation time: 67 ms
|
||||
Test 4 passed.
|
||||
|
||||
Test 5: Length of message: 64 bytes
|
||||
Computatation time: 90 ms
|
||||
Computatation time: 59 ms
|
||||
Test 5 passed.
|
||||
|
||||
## Warning
|
||||
The code is not ready for production for both security reasons and missing regression tests.
|
||||
The source code is not ready for production for both security reasons and missing regression tests.
|
||||
|
|
|
@ -70,93 +70,159 @@ void sub_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) {
|
|||
}
|
||||
|
||||
|
||||
void mul_zxy(uint32_t *z, uint32_t *x, uint32_t y) {
|
||||
void mul_add_zxy(uint32_t *z, uint32_t *x, uint32_t y) {
|
||||
// z += x*y
|
||||
// Note, UMAAL is not available
|
||||
__asm__ volatile (
|
||||
// 0
|
||||
"LDMIA %0!, {r3}\n"
|
||||
"UMULL r5, r6, r3, %1\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADDS r5, r5, r3\n"
|
||||
"STMIA %2!, {r5}\n"
|
||||
// 1
|
||||
"LDMIA %0!, {r3}\n"
|
||||
"MOV r5, 0\n"
|
||||
"UMLAL r6, r5, r3, %1\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r6, r6, r3\n"
|
||||
"STMIA %2!, {r6}\n"
|
||||
// 2
|
||||
"LDMIA %0!, {r3}\n"
|
||||
"MOV r6, 0\n"
|
||||
"UMLAL r5, r6, r3, %1\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r5, r5, r3\n"
|
||||
"STMIA %2!, {r5}\n"
|
||||
// 3
|
||||
"LDMIA %0!, {r3}\n"
|
||||
"MOV r5, 0\n"
|
||||
"UMLAL r6, r5, r3, %1\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r6, r6, r3\n"
|
||||
"STMIA %2!, {r6}\n"
|
||||
// 4
|
||||
"LDMIA %0!, {r3}\n"
|
||||
"MOV r6, 0\n"
|
||||
"UMLAL r5, r6, r3, %1\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r5, r5, r3\n"
|
||||
"STMIA %2!, {r5}\n"
|
||||
// 5
|
||||
"LDMIA %0!, {r3}\n"
|
||||
"MOV r5, 0\n"
|
||||
"UMLAL r6, r5, r3, %1\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r6, r6, r3\n"
|
||||
"STMIA %2!, {r6}\n"
|
||||
// 6
|
||||
"LDMIA %0!, {r3}\n"
|
||||
"MOV r6, 0\n"
|
||||
"UMLAL r5, r6, r3, %1\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r5, r5, r3\n"
|
||||
"STMIA %2!, {r5}\n"
|
||||
// 7
|
||||
"LDMIA %0!, {r3}\n"
|
||||
"MOV r5, 0\n"
|
||||
"UMLAL r6, r5, r3, %1\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r6, r6, r3\n"
|
||||
"STMIA %2!, {r6}\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r5, r5, r3\n"
|
||||
"STMIA %2, {r5}\n"
|
||||
: : "r" (x), "r" (y), "r" (z) : "r3", "r5", "r6"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
void mul_zx0y0(uint32_t *z, uint32_t x, uint32_t y) {
|
||||
|
||||
void mul_add_zx0y0(uint32_t *z, uint32_t x, uint32_t y) {
|
||||
// z += x0 * y0
|
||||
__asm__ volatile (
|
||||
// 0
|
||||
"UMULL r5, r6, %0, %1\n"
|
||||
"STMIA %2!, {r5}\n"
|
||||
"STMIA %2!, {r6}\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADDS r3, r3, r5\n"
|
||||
"STMIA %2!, {r3}\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r3, r3, r6\n"
|
||||
"STMIA %2!, {r3}\n"
|
||||
"MOV r5, 0\n"
|
||||
"MOV r6, 0\n"
|
||||
"STMIA %2!, {r5, r6}\n"
|
||||
"STMIA %2!, {r5, r6}\n"
|
||||
"STMIA %2!, {r5, r6}\n"
|
||||
"STMIA %2, {r5}\n"
|
||||
: : "r" (x), "r" (y), "r" (z) : "r5", "r6"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r3, r3, r5\n"
|
||||
"STMIA %2!, {r3}\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r3, r3, r5\n"
|
||||
"STMIA %2!, {r3}\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r3, r3, r5\n"
|
||||
"STMIA %2!, {r3}\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r3, r3, r5\n"
|
||||
"STMIA %2!, {r3}\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r3, r3, r5\n"
|
||||
"STMIA %2!, {r3}\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r3, r3, r5\n"
|
||||
"STMIA %2!, {r3}\n"
|
||||
"LDMIA %2, {r3}\n"
|
||||
"ADCS r3, r3, r5\n"
|
||||
"STMIA %2, {r3}\n"
|
||||
//"SUBS %2, 32\n" //*
|
||||
: : "r" (x), "r" (y), "r" (z) : "r3", "r5", "r6"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
void pu(uint32_t *t, uint32_t u) {
|
||||
// computes (2^255 -19) * u
|
||||
void pu_add_shift(uint32_t *t, uint32_t u) {
|
||||
// computes t = (t + (2^255 -19) * u) >> 32
|
||||
__asm__ volatile (
|
||||
"MOV r3, 19\n"
|
||||
"UMULL r5, r6, r3, %1\n"
|
||||
"UMULL r5, r6, r3, %1\n"
|
||||
"MOV r2, 0\n"
|
||||
"LSRS %1, %1, 1\n"
|
||||
"RRXS r2, r2\n"
|
||||
"MOV r3, 0\n"
|
||||
"SUBS r4, r3, r5\n"
|
||||
"STMIA %0!, {r4}\n"
|
||||
"SBCS r4, r3, r6\n"
|
||||
"STMIA %0!, {r4}\n"
|
||||
"SBCS r4, r3, r3\n"
|
||||
"STMIA %0!, {r4}\n"
|
||||
"STMIA %0!, {r4}\n"
|
||||
"STMIA %0!, {r4}\n"
|
||||
"STMIA %0!, {r4}\n"
|
||||
"STMIA %0!, {r4}\n"
|
||||
"SBCS r4, r2, r3\n"
|
||||
"STMIA %0!, {r4}\n"
|
||||
"SBCS r4, %1, r3\n"
|
||||
"STMIA %0, {r4}\n"
|
||||
// t[8]:t[7] + (u << 255)
|
||||
"ADDS %0, %0, 28\n"
|
||||
"LDMIA %0, {r3}\n"
|
||||
"ADDS r3, r3, r2\n"
|
||||
"STMIA %0!, {r3}\n"
|
||||
"LDMIA %0, {r3}\n"
|
||||
"ADCS r3, r3, %1\n"
|
||||
"STMIA %0, {r3}\n"
|
||||
"SUBS %0, %0, 32\n"
|
||||
// t - 19*u
|
||||
"MOV r2, %0\n"
|
||||
"LDMIA %0!, {r4}\n"
|
||||
"SUBS r4, r4, r5\n"
|
||||
"LDMIA %0!, {r4}\n"
|
||||
"SBCS r4, r4, r6\n"
|
||||
"STMIA r2!, {r4}\n"
|
||||
"MOV r5, 0\n"
|
||||
"LDMIA %0!, {r4}\n"
|
||||
"SBCS r4, r4, r5\n"
|
||||
"STMIA r2!, {r4}\n"
|
||||
"LDMIA %0!, {r4}\n"
|
||||
"SBCS r4, r4, r5\n"
|
||||
"STMIA r2!, {r4}\n"
|
||||
"LDMIA %0!, {r4}\n"
|
||||
"SBCS r4, r4, r5\n"
|
||||
"STMIA r2!, {r4}\n"
|
||||
"LDMIA %0!, {r4}\n"
|
||||
"SBCS r4, r4, r5\n"
|
||||
"STMIA r2!, {r4}\n"
|
||||
"LDMIA %0!, {r4}\n"
|
||||
"SBCS r4, r4, r5\n"
|
||||
"STMIA r2!, {r4}\n"
|
||||
"LDMIA %0!, {r4}\n"
|
||||
"SBCS r4, r4, r5\n"
|
||||
"STMIA r2!, {r4}\n"
|
||||
"LDMIA %0, {r4}\n"
|
||||
"SBCS r4, r4, r5\n"
|
||||
"STMIA r2!, {r4}\n"
|
||||
"STMIA r2, {r5}\n" // ms-word = 0
|
||||
//"SUBS %0, %0, 32\n"
|
||||
: : "r" (t), "r" (u) : "r2", "r3", "r4", "r5", "r6"
|
||||
);
|
||||
|
@ -173,13 +239,8 @@ void mont_mul_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) {
|
|||
for (int i=0; i<9; i++) a[i] = 0;
|
||||
for (int i=0; i<8; i++) {
|
||||
u = (a[0] + x[i] * y[0]) * 678152731;
|
||||
mul_zxy(tmp, y, x[i]);
|
||||
a[8] += tmp[8] + add_zxy(a, a, tmp);
|
||||
// mul_zxy(tmp, p, u);
|
||||
pu(tmp, u);
|
||||
a[8] += tmp[8] + add_zxy(a, a, tmp); // A <- (A + xi y + u m) / b
|
||||
for (int j=0; j<8; j++) a[j] = a[j+1];
|
||||
a[8] = 0;
|
||||
mul_add_zxy(a, y, x[i]); // A <- A + xi y
|
||||
pu_add_shift(a, u); // A <- (A + u m) / b
|
||||
}
|
||||
uint32_t c = sub_zxy(tmp, a, p); // carry not set if negative
|
||||
for (int i=0; i<8; i++) z[i] = ret[c][i];
|
||||
|
@ -196,13 +257,8 @@ void mont_mul_zxy0_mod_p(uint32_t *z, uint32_t *x, uint32_t y, uint32_t *p) {
|
|||
for (int i=0; i<9; i++) a[i] = 0;
|
||||
for (int i=0; i<8; i++) {
|
||||
u = (a[0] + x[i] * y) * 678152731;
|
||||
mul_zx0y0(tmp, x[i], y);
|
||||
a[8] += tmp[8] + add_zxy(a, a, tmp);
|
||||
// mul_zxy(tmp, p, u);
|
||||
pu(tmp, u);
|
||||
a[8] += tmp[8] + add_zxy(a, a, tmp); // A <- (A + xi y + u m) / b
|
||||
for (int j=0; j<8; j++) a[j] = a[j+1];
|
||||
a[8] = 0;
|
||||
mul_add_zx0y0(a, y, x[i]); // A <- A + xi y
|
||||
pu_add_shift(a, u); // A <- (A + u m) / b
|
||||
}
|
||||
uint32_t c = sub_zxy(tmp, a, p); // carry not set if negative
|
||||
for (int i=0; i<8; i++) z[i] = ret[c][i];
|
||||
|
|
Loading…
Reference in New Issue