timing improvements, reduced array size for mp-ints

master v0.2
pmvr 2020-06-06 20:30:13 +02:00
parent ed2344c18b
commit 76356e23af
5 changed files with 78 additions and 88 deletions

View File

@ -26,7 +26,7 @@ Python script `x25519.py`
Test vectors from https://tools.ietf.org/html/rfc8031#appendix-A Test vectors from https://tools.ietf.org/html/rfc8031#appendix-A
Test 1: X25519: q = d*u Test 1: X25519: q = d*u
Computatation time: 46 ms Computatation time: 45 ms
q [hex/dec] = 66c7fb0d9f7090f777fa8493081ce8a4f174dbbbf9a36f16ba571206d4ddd548 46489245826987382655505058740283756869827209462947799117248009944518788765000 q [hex/dec] = 66c7fb0d9f7090f777fa8493081ce8a4f174dbbbf9a36f16ba571206d4ddd548 46489245826987382655505058740283756869827209462947799117248009944518788765000
Test 1 passed. Test 1 passed.
@ -41,23 +41,23 @@ Python script `x25519.py`
Python script `ed25519.py` Python script `ed25519.py`
Test 1: Length of message: 0 bytes Test 1: Length of message: 0 bytes
Computatation time: 104 ms Computatation time: 101 ms
Test 1 passed. Test 1 passed.
Test 2: Length of message: 1 byte Test 2: Length of message: 1 byte
Computatation time: 104 ms Computatation time: 101 ms
Test 2 passed. Test 2 passed.
Test 3: Length of message: 2 bytes Test 3: Length of message: 2 bytes
Computatation time: 104 ms Computatation time: 101 ms
Test 3 passed. Test 3 passed.
Test 4: Length of message: 1023 bytes Test 4: Length of message: 1023 bytes
Computatation time: 113 ms Computatation time: 110 ms
Test 4 passed. Test 4 passed.
Test 5: Length of message: 64 bytes Test 5: Length of message: 64 bytes
Computatation time: 105 ms Computatation time: 102 ms
Test 5 passed. Test 5 passed.
## Warning ## Warning

View File

@ -11,17 +11,13 @@ uint32_t add_zxy(uint32_t *z, uint32_t *x, uint32_t *y) {
"ADCS r5, r5, r9\n" "ADCS r5, r5, r9\n"
"ADCS r6, r6, r10\n" "ADCS r6, r6, r10\n"
"STMIA %3!, {r3-r6}\n" "STMIA %3!, {r3-r6}\n"
"LDMIA %1!, {r3-r6}\n" "LDMIA %1, {r3-r6}\n"
"LDMIA %2!, {r7-r10}\n" "LDMIA %2, {r7-r10}\n"
"ADCS r3, r3, r7\n" "ADCS r3, r3, r7\n"
"ADCS r4, r4, r8\n" "ADCS r4, r4, r8\n"
"ADCS r5, r5, r9\n" "ADCS r5, r5, r9\n"
"ADCS r6, r6, r10\n" "ADCS r6, r6, r10\n"
"STMIA %3!, {r3-r6}\n" "STMIA %3, {r3-r6}\n"
"LDMIA %1, {r3}\n"
"LDMIA %2, {r7}\n"
"ADCS r3, r3, r7\n"
"STMIA %3, {r3}\n"
"MOV %0, 0\n" "MOV %0, 0\n"
"ADCS %0, %0, 0\n" "ADCS %0, %0, 0\n"
: "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" : "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
@ -40,17 +36,13 @@ uint32_t sub_zxy(uint32_t *z, uint32_t *x, uint32_t *y) {
"SBCS r5, r5, r9\n" "SBCS r5, r5, r9\n"
"SBCS r6, r6, r10\n" "SBCS r6, r6, r10\n"
"STMIA %3!, {r3-r6}\n" "STMIA %3!, {r3-r6}\n"
"LDMIA %1!, {r3-r6}\n" "LDMIA %1, {r3-r6}\n"
"LDMIA %2!, {r7-r10}\n" "LDMIA %2, {r7-r10}\n"
"SBCS r3, r3, r7\n" "SBCS r3, r3, r7\n"
"SBCS r4, r4, r8\n" "SBCS r4, r4, r8\n"
"SBCS r5, r5, r9\n" "SBCS r5, r5, r9\n"
"SBCS r6, r6, r10\n" "SBCS r6, r6, r10\n"
"STMIA %3!, {r3-r6}\n" "STMIA %3, {r3-r6}\n"
"LDMIA %1, {r3}\n"
"LDMIA %2, {r7}\n"
"SBCS r3, r3, r7\n"
"STMIA %3, {r3}\n"
"MOV %0, 0\n" "MOV %0, 0\n"
"ADCS %0, %0, 0\n" "ADCS %0, %0, 0\n"
: "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" : "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
@ -60,11 +52,21 @@ uint32_t sub_zxy(uint32_t *z, uint32_t *x, uint32_t *y) {
void add_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) { void add_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) {
uint32_t z1[9], z2[9]; uint32_t z1[8], z2[8];
uint32_t* ret[2] = {z1, z2}; uint32_t* ret[2] = {z1, z2};
add_zxy(z1, x, y); add_zxy(z1, x, y);
uint32_t c = sub_zxy(z2, z1, p); // carry not set if negative uint32_t c = sub_zxy(z2, z1, p); // carry not set if negative
for (int i=0; i<9; i++) z[i] = ret[c][i]; for (int i=0; i<8; i++) z[i] = ret[c][i];
}
void sub_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) {
// x-y % p
uint32_t z1[8], z2[8];
uint32_t* ret[2] = {z2, z1};
uint32_t c = sub_zxy(z1, x, y); // carry not set if negative
add_zxy(z2, z1, p);
for (int i=0; i<8; i++) z[i] = ret[c][i];
} }
@ -135,7 +137,7 @@ void mul_zx0y0(uint32_t *z, uint32_t x, uint32_t y) {
void mont_mul_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) { void mont_mul_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) {
// see Alg. 14.36 HoAC // see Alg. 14.36 HoAC
// -m^(-1) mod b is 678152731 for curve25519 // -m^(-1) mod b is 678152731 for curve25519
uint32_t u, carry; uint32_t u;
uint32_t tmp[9], a[9]; uint32_t tmp[9], a[9];
uint32_t* ret[2] = {a, tmp}; uint32_t* ret[2] = {a, tmp};
@ -143,21 +145,21 @@ void mont_mul_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) {
for (int i=0; i<8; i++) { for (int i=0; i<8; i++) {
u = (a[0] + x[i] * y[0]) * 678152731; u = (a[0] + x[i] * y[0]) * 678152731;
mul_zxy(tmp, y, x[i]); mul_zxy(tmp, y, x[i]);
carry = add_zxy(a, a, tmp); a[8] += tmp[8] + add_zxy(a, a, tmp);
mul_zxy(tmp, p, u); mul_zxy(tmp, p, u);
carry += add_zxy(a, a, tmp); // A <- (A + xi y + u m) / b a[8] += tmp[8] + add_zxy(a, a, tmp); // A <- (A + xi y + u m) / b
for (int j=0; j<8; j++) a[j] = a[j+1]; for (int j=0; j<8; j++) a[j] = a[j+1];
a[8] = carry; a[8] = 0;
} }
uint32_t c = sub_zxy(tmp, a, p); // carry not set if negative uint32_t c = sub_zxy(tmp, a, p); // carry not set if negative
for (int i=0; i<9; i++) z[i] = ret[c][i]; for (int i=0; i<8; i++) z[i] = ret[c][i];
} }
void mont_mul_zxy0_mod_p(uint32_t *z, uint32_t *x, uint32_t y, uint32_t *p) { void mont_mul_zxy0_mod_p(uint32_t *z, uint32_t *x, uint32_t y, uint32_t *p) {
// see Alg. 14.36 HoAC // see Alg. 14.36 HoAC
// -m^(-1) mod b is 678152731 for curve25519 // -m^(-1) mod b is 678152731 for curve25519
uint32_t u, carry; uint32_t u;
uint32_t tmp[9], a[9]; uint32_t tmp[9], a[9];
uint32_t* ret[2] = {a, tmp}; uint32_t* ret[2] = {a, tmp};
@ -165,13 +167,13 @@ void mont_mul_zxy0_mod_p(uint32_t *z, uint32_t *x, uint32_t y, uint32_t *p) {
for (int i=0; i<8; i++) { for (int i=0; i<8; i++) {
u = (a[0] + x[i] * y) * 678152731; u = (a[0] + x[i] * y) * 678152731;
mul_zx0y0(tmp, x[i], y); mul_zx0y0(tmp, x[i], y);
carry = add_zxy(a, a, tmp); a[8] += tmp[8] + add_zxy(a, a, tmp);
mul_zxy(tmp, p, u); mul_zxy(tmp, p, u);
carry += add_zxy(a, a, tmp); // A <- (A + xi y + u m) / b a[8] += tmp[8] + add_zxy(a, a, tmp); // A <- (A + xi y + u m) / b
for (int j=0; j<8; j++) a[j] = a[j+1]; for (int j=0; j<8; j++) a[j] = a[j+1];
a[8] = carry; a[8] = 0;
} }
uint32_t c = sub_zxy(tmp, a, p); // carry not set if negative uint32_t c = sub_zxy(tmp, a, p); // carry not set if negative
for (int i=0; i<9; i++) z[i] = ret[c][i]; for (int i=0; i<8; i++) z[i] = ret[c][i];
} }

View File

@ -2,9 +2,8 @@
#define __arithmetic__ #define __arithmetic__
void shift_right(uint32_t *x); void shift_right(uint32_t *x);
uint32_t add_zxy(uint32_t *z, uint32_t *x, uint32_t *y);
uint32_t sub_zxy(uint32_t *z, uint32_t *x, uint32_t *y);
void add_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p); void add_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p);
void sub_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p);
void mul_zxy(uint32_t *z, uint32_t *x, uint32_t y); void mul_zxy(uint32_t *z, uint32_t *x, uint32_t y);
void mul_zx0y0(uint32_t *z, uint32_t x, uint32_t y); void mul_zx0y0(uint32_t *z, uint32_t x, uint32_t y);
void mont_mul_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p); void mont_mul_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p);

View File

@ -3,14 +3,14 @@
#include "ec.h" #include "ec.h"
const uint32_t a24R = 0x468ba6; const uint32_t a24R = 0x468ba6;
const uint32_t p[9] = {0x00000000, 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffed}; const uint32_t p[8] = {0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffed};
const uint32_t pR2 = 0x5a4; const uint32_t pR2 = 0x5a4;
const uint32_t R = 38; const uint32_t R = 38;
const uint32_t sqrt_minus_486664R[9] = {0x00000000, 0x4038adb9, 0xa83f001e, 0xc1bcaf57, 0x688c332e, 0xa9fa8eee, 0xcb6e1095, 0xa7ab4e9e, 0x1baf4abd}; const uint32_t sqrt_minus_486664R[8] = {0x4038adb9, 0xa83f001e, 0xc1bcaf57, 0x688c332e, 0xa9fa8eee, 0xcb6e1095, 0xa7ab4e9e, 0x1baf4abd};
struct curve { struct curve {
uint32_t a24; uint32_t a24;
uint32_t p[9]; uint32_t p[8];
uint32_t pR2; uint32_t pR2;
}; };
@ -18,8 +18,7 @@ struct curve Curve;
void random_z(uint32_t *z) { void random_z(uint32_t *z) {
// TODO: a true random source should be provided // TODO: a true random source should be provided
for (uint32_t i=0; i<8; i++) z[i] = 0xC3A50FE1; for (uint32_t i=0; i<7; i++) z[i] = 0xC3A50FE1;
z[8] = 0;
z[7] &= 0x7fffffff; z[7] &= 0x7fffffff;
} }
@ -27,37 +26,37 @@ void random_z(uint32_t *z) {
void ini_curve() { void ini_curve() {
Curve.a24 = a24R; Curve.a24 = a24R;
Curve.pR2 = pR2; Curve.pR2 = pR2;
for (int i=0; i<9; i++) { for (int i=0; i<8; i++) {
Curve.p[i] = p[8-i]; Curve.p[i] = p[7-i];
} }
} }
void to_Montgomery(uint32_t *x) { void to_Montgomery(uint32_t *x) {
uint32_t s[9]; uint32_t s[8];
mont_mul_zxy0_mod_p(s, x, Curve.pR2, Curve.p); mont_mul_zxy0_mod_p(s, x, Curve.pR2, Curve.p);
for (uint32_t i=0;i<9;i++) x[i] = s[i]; for (uint32_t i=0;i<8;i++) x[i] = s[i];
} }
void from_Montgomery(uint32_t *x) { void from_Montgomery(uint32_t *x) {
uint32_t s[9]; uint32_t s[8];
mont_mul_zxy0_mod_p(s, x, 1, Curve.p); mont_mul_zxy0_mod_p(s, x, 1, Curve.p);
for (uint32_t i=0;i<9;i++) x[i] = s[i]; for (uint32_t i=0;i<8;i++) x[i] = s[i];
} }
void mod_inverse(uint32_t *z_inv, uint32_t *z) { void mod_inverse(uint32_t *z_inv, uint32_t *z) {
uint32_t z2[9]; uint32_t z2[8];
uint32_t z9[9]; uint32_t z9[8];
uint32_t z11[9]; uint32_t z11[8];
uint32_t z2_5_0[9]; uint32_t z2_5_0[8];
uint32_t z2_10_0[9]; uint32_t z2_10_0[8];
uint32_t z2_20_0[9]; uint32_t z2_20_0[8];
uint32_t z2_50_0[9]; uint32_t z2_50_0[8];
uint32_t z2_100_0[9]; uint32_t z2_100_0[8];
uint32_t t0[9]; uint32_t t0[8];
uint32_t t1[9]; uint32_t t1[8];
uint32_t i; uint32_t i;
mont_mul_zxy_mod_p(z2, z, z, Curve.p); // 2 mont_mul_zxy_mod_p(z2, z, z, Curve.p); // 2
@ -134,12 +133,11 @@ void mod_inverse(uint32_t *z_inv, uint32_t *z) {
void recover_y(uint32_t *x, uint32_t *y, uint32_t *z, uint32_t *xp, uint32_t *yp, uint32_t *xq, uint32_t *zq, uint32_t *xpq, uint32_t *zpq) { void recover_y(uint32_t *x, uint32_t *y, uint32_t *z, uint32_t *xp, uint32_t *yp, uint32_t *xq, uint32_t *zq, uint32_t *xpq, uint32_t *zpq) {
// https://eprint.iacr.org/2017/212.pdf // https://eprint.iacr.org/2017/212.pdf
uint32_t AR = 18493156; uint32_t AR = 18493156;
uint32_t v1[9], v2[9], v3[9], v4[9], s[9], t[9]; uint32_t v1[8], v2[8], v3[8], v4[8], s[8], t[8];
mont_mul_zxy_mod_p(v1, xp, zq, Curve.p); mont_mul_zxy_mod_p(v1, xp, zq, Curve.p);
add_zxy_mod_p(v2, xq, v1, Curve.p); add_zxy_mod_p(v2, xq, v1, Curve.p);
sub_zxy(v3, Curve.p, v1); sub_zxy_mod_p(v3, xq, v1, Curve.p);
add_zxy_mod_p(v3, xq, v3, Curve.p);
mont_mul_zxy_mod_p(t, v3, v3, Curve.p); mont_mul_zxy_mod_p(t, v3, v3, Curve.p);
mont_mul_zxy_mod_p(v3, t, xpq, Curve.p); mont_mul_zxy_mod_p(v3, t, xpq, Curve.p);
mont_mul_zxy0_mod_p(v1, zq, AR, Curve.p); mont_mul_zxy0_mod_p(v1, zq, AR, Curve.p);
@ -149,11 +147,9 @@ void recover_y(uint32_t *x, uint32_t *y, uint32_t *z, uint32_t *xp, uint32_t *yp
add_zxy_mod_p(v4, v4, zq, Curve.p); add_zxy_mod_p(v4, v4, zq, Curve.p);
mont_mul_zxy_mod_p(s, v2, v4, Curve.p); mont_mul_zxy_mod_p(s, v2, v4, Curve.p);
mont_mul_zxy_mod_p(t, v1, zq, Curve.p); mont_mul_zxy_mod_p(t, v1, zq, Curve.p);
sub_zxy(t, Curve.p, t); sub_zxy_mod_p(s, s, t, Curve.p);
add_zxy_mod_p(s, s, t, Curve.p);
mont_mul_zxy_mod_p(v2, s, zpq, Curve.p); mont_mul_zxy_mod_p(v2, s, zpq, Curve.p);
sub_zxy(v3, Curve.p, v3); sub_zxy_mod_p(y, v2, v3, Curve.p);
add_zxy_mod_p(y, v2, v3, Curve.p);
add_zxy_mod_p(v1, yp, yp, Curve.p); add_zxy_mod_p(v1, yp, yp, Curve.p);
mont_mul_zxy_mod_p(s, v1, zq, Curve.p); mont_mul_zxy_mod_p(s, v1, zq, Curve.p);
mont_mul_zxy_mod_p(v1, s, zpq, Curve.p); mont_mul_zxy_mod_p(v1, s, zpq, Curve.p);
@ -164,18 +160,17 @@ void recover_y(uint32_t *x, uint32_t *y, uint32_t *z, uint32_t *xp, uint32_t *yp
void montgomery2edward(uint32_t *x, uint32_t *y, uint32_t *u, uint32_t *v) { void montgomery2edward(uint32_t *x, uint32_t *y, uint32_t *u, uint32_t *v) {
// (x, y) = (sqrt(-486664)*u/v, (u-1)/(u+1)) // (x, y) = (sqrt(-486664)*u/v, (u-1)/(u+1))
uint32_t s[9], t[9], w[9]; uint32_t s[8], t[8], w[8];
for (uint32_t i=1; i<9; i++) t[i] = 0; for (uint32_t i=1; i<8; i++) t[i] = 0;
t[0] = R; // 1*R t[0] = R; // 1*R
add_zxy_mod_p(s, u, t, Curve.p); // s = u+1 add_zxy_mod_p(s, u, t, Curve.p); // s = u+1
mont_mul_zxy_mod_p(w, s, v, Curve.p); mont_mul_zxy_mod_p(w, s, v, Curve.p);
mod_inverse(w, w); // w = (u+1)^(-1) * v^(-1) mod_inverse(w, w); // w = (u+1)^(-1) * v^(-1)
sub_zxy(t, Curve.p, t); // t = -1*R sub_zxy_mod_p(y, u, t, Curve.p); // y = u-1
add_zxy_mod_p(y, u, t, Curve.p); // y = u-1
mont_mul_zxy_mod_p(x, y, w, Curve.p); // x = (u-1)/((u+1) * v) mont_mul_zxy_mod_p(x, y, w, Curve.p); // x = (u-1)/((u+1) * v)
mont_mul_zxy_mod_p(y, x, v, Curve.p); mont_mul_zxy_mod_p(y, x, v, Curve.p);
for (uint32_t i=0; i<9; i++) t[i] = sqrt_minus_486664R[8-i]; for (uint32_t i=0; i<8; i++) t[i] = sqrt_minus_486664R[7-i];
mont_mul_zxy_mod_p(x, t, w, Curve.p); mont_mul_zxy_mod_p(x, t, w, Curve.p);
mont_mul_zxy_mod_p(t, x, s, Curve.p); mont_mul_zxy_mod_p(t, x, s, Curve.p);
mont_mul_zxy_mod_p(x, t, u, Curve.p); mont_mul_zxy_mod_p(x, t, u, Curve.p);
@ -200,11 +195,11 @@ void cswap(uint32_t swap, uint32_t **x, uint32_t **y) {
void X25519(uint32_t *q, uint32_t *r, uint8_t *k, uint32_t *u, uint32_t *v, uint8_t toEdward) { void X25519(uint32_t *q, uint32_t *r, uint8_t *k, uint32_t *u, uint32_t *v, uint8_t toEdward) {
// cpmputes q = k * u // cpmputes q = k * u
uint32_t x1[9], x[2*9], z[2*9]; uint32_t x1[8], x[2*8], z[2*8];
uint32_t *x2 = x; uint32_t *x3 = x+9; uint32_t *x2 = x; uint32_t *x3 = x+8;
uint32_t *z2 = z; uint32_t *z3 = z+9; uint32_t *z2 = z; uint32_t *z3 = z+8;
uint8_t swap, kt, kw; uint8_t swap, kt, kw;
uint32_t A[9], AA[9], B[9], BB[9], C[9], D[9], E[9], DA[9], CB[9]; uint32_t A[8], AA[8], B[8], BB[8], C[8], D[8], E[8], DA[8], CB[8];
ini_curve(); ini_curve();
@ -217,9 +212,8 @@ void X25519(uint32_t *q, uint32_t *r, uint8_t *k, uint32_t *u, uint32_t *v, uint
A[i] = u[i]; A[i] = u[i];
z2[i] = 0; z2[i] = 0;
} }
z2[8] = A[8] = 0;
to_Montgomery(A); to_Montgomery(A);
for (uint32_t i=0; i<9; i++) x1[i] = A[i]; for (uint32_t i=0; i<8; i++) x1[i] = A[i];
mont_mul_zxy_mod_p(x3, A, z3, Curve.p); mont_mul_zxy_mod_p(x3, A, z3, Curve.p);
swap = 0; swap = 0;
@ -235,20 +229,16 @@ void X25519(uint32_t *q, uint32_t *r, uint8_t *k, uint32_t *u, uint32_t *v, uint
add_zxy_mod_p(A, x2, z2, Curve.p); add_zxy_mod_p(A, x2, z2, Curve.p);
mont_mul_zxy_mod_p(AA, A, A, Curve.p); mont_mul_zxy_mod_p(AA, A, A, Curve.p);
sub_zxy(B, Curve.p, z2); sub_zxy_mod_p(B, x2, z2, Curve.p);
add_zxy_mod_p(B, x2, B, Curve.p);
mont_mul_zxy_mod_p(BB, B, B, Curve.p); mont_mul_zxy_mod_p(BB, B, B, Curve.p);
sub_zxy(E, Curve.p, BB); sub_zxy_mod_p(E, AA, BB, Curve.p);
add_zxy_mod_p(E, AA, E, Curve.p);
add_zxy_mod_p(C, x3, z3, Curve.p); add_zxy_mod_p(C, x3, z3, Curve.p);
sub_zxy(D, Curve.p, z3); sub_zxy_mod_p(D, x3, z3, Curve.p);
add_zxy_mod_p(D, x3, D, Curve.p);
mont_mul_zxy_mod_p(DA, D, A, Curve.p); mont_mul_zxy_mod_p(DA, D, A, Curve.p);
mont_mul_zxy_mod_p(CB, C, B, Curve.p); mont_mul_zxy_mod_p(CB, C, B, Curve.p);
add_zxy_mod_p(A, DA, CB, Curve.p); add_zxy_mod_p(A, DA, CB, Curve.p);
mont_mul_zxy_mod_p(x3, A, A, Curve.p); mont_mul_zxy_mod_p(x3, A, A, Curve.p);
sub_zxy(A, Curve.p, CB); sub_zxy_mod_p(A, DA, CB, Curve.p);
add_zxy_mod_p(A, DA, A, Curve.p);
mont_mul_zxy_mod_p(B, A, A, Curve.p); mont_mul_zxy_mod_p(B, A, A, Curve.p);
mont_mul_zxy_mod_p(z3, x1, B, Curve.p); mont_mul_zxy_mod_p(z3, x1, B, Curve.p);
mont_mul_zxy_mod_p(x2, AA, BB, Curve.p); mont_mul_zxy_mod_p(x2, AA, BB, Curve.p);
@ -265,7 +255,6 @@ void X25519(uint32_t *q, uint32_t *r, uint8_t *k, uint32_t *u, uint32_t *v, uint
AA[i] = u[i]; AA[i] = u[i];
BB[i] = v[i]; BB[i] = v[i];
} }
AA[8] = BB[8] = 0;
to_Montgomery(AA); to_Montgomery(AA);
to_Montgomery(BB); to_Montgomery(BB);
recover_y(A, B, C, AA, BB, x2, z2, x3, z3); recover_y(A, B, C, AA, BB, x2, z2, x3, z3);
@ -273,7 +262,7 @@ void X25519(uint32_t *q, uint32_t *r, uint8_t *k, uint32_t *u, uint32_t *v, uint
mont_mul_zxy_mod_p(q, A, C, Curve.p); mont_mul_zxy_mod_p(q, A, C, Curve.p);
mont_mul_zxy_mod_p(r, B, C, Curve.p); mont_mul_zxy_mod_p(r, B, C, Curve.p);
if (toEdward) { if (toEdward) {
for (uint32_t i=0; i<9; i++) { for (uint32_t i=0; i<8; i++) {
A[i] = q[i]; A[i] = q[i];
B[i] = r[i]; B[i] = r[i];
} }

View File

@ -12,10 +12,10 @@ STATIC mp_obj_t x25519(mp_obj_t k, mp_obj_t u) {
mp_get_buffer_raise(k, &bufinfo_k, MP_BUFFER_READ); mp_get_buffer_raise(k, &bufinfo_k, MP_BUFFER_READ);
mp_get_buffer_raise(u, &bufinfo_u, MP_BUFFER_READ); mp_get_buffer_raise(u, &bufinfo_u, MP_BUFFER_READ);
uint32_t q[9]; uint32_t q[8];
X25519(q, 0, bufinfo_k.buf, (uint32_t*)bufinfo_u.buf, 0, 0); X25519(q, 0, bufinfo_k.buf, (uint32_t*)bufinfo_u.buf, 0, 0);
return mp_obj_new_bytes((uint8_t *)q, 8*4); return mp_obj_new_bytes((uint8_t *)q, sizeof(q));
} }
// Define a Python reference to the function above // Define a Python reference to the function above
STATIC MP_DEFINE_CONST_FUN_OBJ_2(x25519_obj, x25519); STATIC MP_DEFINE_CONST_FUN_OBJ_2(x25519_obj, x25519);
@ -30,11 +30,11 @@ STATIC mp_obj_t x25519_ed(mp_obj_t k, mp_obj_t u, mp_obj_t v) {
mp_get_buffer_raise(u, &bufinfo_u, MP_BUFFER_READ); mp_get_buffer_raise(u, &bufinfo_u, MP_BUFFER_READ);
mp_get_buffer_raise(v, &bufinfo_v, MP_BUFFER_READ); mp_get_buffer_raise(v, &bufinfo_v, MP_BUFFER_READ);
uint32_t q[9], r[9]; uint32_t q[8], r[8];
X25519(q, r, bufinfo_k.buf, (uint32_t*)bufinfo_u.buf, (uint32_t*)bufinfo_v.buf, 1); X25519(q, r, bufinfo_k.buf, (uint32_t*)bufinfo_u.buf, (uint32_t*)bufinfo_v.buf, 1);
items[0] = mp_obj_new_bytes((uint8_t *)q, 8*4); items[0] = mp_obj_new_bytes((uint8_t *)q, sizeof(q));
items[1] = mp_obj_new_bytes((uint8_t *)r, 8*4); items[1] = mp_obj_new_bytes((uint8_t *)r, sizeof(r));
return mp_obj_new_tuple(2, items); return mp_obj_new_tuple(2, items);
} }
// Define a Python reference to the function above // Define a Python reference to the function above