diff --git a/README.md b/README.md index 4bfff30..6202d5e 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Python script `x25519.py` Test vectors from https://tools.ietf.org/html/rfc8031#appendix-A Test 1: X25519: q = d*u - Computatation time: 46 ms + Computatation time: 45 ms q [hex/dec] = 66c7fb0d9f7090f777fa8493081ce8a4f174dbbbf9a36f16ba571206d4ddd548 46489245826987382655505058740283756869827209462947799117248009944518788765000 Test 1 passed. @@ -41,23 +41,23 @@ Python script `x25519.py` Python script `ed25519.py` Test 1: Length of message: 0 bytes - Computatation time: 104 ms + Computatation time: 101 ms Test 1 passed. Test 2: Length of message: 1 byte - Computatation time: 104 ms + Computatation time: 101 ms Test 2 passed. Test 3: Length of message: 2 bytes - Computatation time: 104 ms + Computatation time: 101 ms Test 3 passed. Test 4: Length of message: 1023 bytes - Computatation time: 113 ms + Computatation time: 110 ms Test 4 passed. Test 5: Length of message: 64 bytes - Computatation time: 105 ms + Computatation time: 102 ms Test 5 passed. ## Warning diff --git a/mpy-modules/curve25519/arithmetic.c b/mpy-modules/curve25519/arithmetic.c index 3842a2b..39f41e9 100644 --- a/mpy-modules/curve25519/arithmetic.c +++ b/mpy-modules/curve25519/arithmetic.c @@ -11,17 +11,13 @@ uint32_t add_zxy(uint32_t *z, uint32_t *x, uint32_t *y) { "ADCS r5, r5, r9\n" "ADCS r6, r6, r10\n" "STMIA %3!, {r3-r6}\n" - "LDMIA %1!, {r3-r6}\n" - "LDMIA %2!, {r7-r10}\n" + "LDMIA %1, {r3-r6}\n" + "LDMIA %2, {r7-r10}\n" "ADCS r3, r3, r7\n" "ADCS r4, r4, r8\n" "ADCS r5, r5, r9\n" "ADCS r6, r6, r10\n" - "STMIA %3!, {r3-r6}\n" - "LDMIA %1, {r3}\n" - "LDMIA %2, {r7}\n" - "ADCS r3, r3, r7\n" - "STMIA %3, {r3}\n" + "STMIA %3, {r3-r6}\n" "MOV %0, 0\n" "ADCS %0, %0, 0\n" : "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" @@ -40,17 +36,13 @@ uint32_t sub_zxy(uint32_t *z, uint32_t *x, uint32_t *y) { "SBCS r5, r5, r9\n" "SBCS r6, r6, r10\n" "STMIA %3!, {r3-r6}\n" - "LDMIA %1!, {r3-r6}\n" - "LDMIA %2!, {r7-r10}\n" + "LDMIA %1, {r3-r6}\n" + "LDMIA %2, {r7-r10}\n" "SBCS r3, r3, r7\n" "SBCS r4, r4, r8\n" "SBCS r5, r5, r9\n" "SBCS r6, r6, r10\n" - "STMIA %3!, {r3-r6}\n" - "LDMIA %1, {r3}\n" - "LDMIA %2, {r7}\n" - "SBCS r3, r3, r7\n" - "STMIA %3, {r3}\n" + "STMIA %3, {r3-r6}\n" "MOV %0, 0\n" "ADCS %0, %0, 0\n" : "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" @@ -60,11 +52,21 @@ uint32_t sub_zxy(uint32_t *z, uint32_t *x, uint32_t *y) { void add_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) { - uint32_t z1[9], z2[9]; + uint32_t z1[8], z2[8]; uint32_t* ret[2] = {z1, z2}; add_zxy(z1, x, y); uint32_t c = sub_zxy(z2, z1, p); // carry not set if negative - for (int i=0; i<9; i++) z[i] = ret[c][i]; + for (int i=0; i<8; i++) z[i] = ret[c][i]; +} + + +void sub_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) { + // x-y % p + uint32_t z1[8], z2[8]; + uint32_t* ret[2] = {z2, z1}; + uint32_t c = sub_zxy(z1, x, y); // carry not set if negative + add_zxy(z2, z1, p); + for (int i=0; i<8; i++) z[i] = ret[c][i]; } @@ -135,7 +137,7 @@ void mul_zx0y0(uint32_t *z, uint32_t x, uint32_t y) { void mont_mul_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) { // see Alg. 14.36 HoAC // -m^(-1) mod b is 678152731 for curve25519 - uint32_t u, carry; + uint32_t u; uint32_t tmp[9], a[9]; uint32_t* ret[2] = {a, tmp}; @@ -143,21 +145,21 @@ void mont_mul_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) { for (int i=0; i<8; i++) { u = (a[0] + x[i] * y[0]) * 678152731; mul_zxy(tmp, y, x[i]); - carry = add_zxy(a, a, tmp); + a[8] += tmp[8] + add_zxy(a, a, tmp); mul_zxy(tmp, p, u); - carry += add_zxy(a, a, tmp); // A <- (A + xi y + u m) / b + a[8] += tmp[8] + add_zxy(a, a, tmp); // A <- (A + xi y + u m) / b for (int j=0; j<8; j++) a[j] = a[j+1]; - a[8] = carry; + a[8] = 0; } uint32_t c = sub_zxy(tmp, a, p); // carry not set if negative - for (int i=0; i<9; i++) z[i] = ret[c][i]; + for (int i=0; i<8; i++) z[i] = ret[c][i]; } void mont_mul_zxy0_mod_p(uint32_t *z, uint32_t *x, uint32_t y, uint32_t *p) { // see Alg. 14.36 HoAC // -m^(-1) mod b is 678152731 for curve25519 - uint32_t u, carry; + uint32_t u; uint32_t tmp[9], a[9]; uint32_t* ret[2] = {a, tmp}; @@ -165,13 +167,13 @@ void mont_mul_zxy0_mod_p(uint32_t *z, uint32_t *x, uint32_t y, uint32_t *p) { for (int i=0; i<8; i++) { u = (a[0] + x[i] * y) * 678152731; mul_zx0y0(tmp, x[i], y); - carry = add_zxy(a, a, tmp); + a[8] += tmp[8] + add_zxy(a, a, tmp); mul_zxy(tmp, p, u); - carry += add_zxy(a, a, tmp); // A <- (A + xi y + u m) / b + a[8] += tmp[8] + add_zxy(a, a, tmp); // A <- (A + xi y + u m) / b for (int j=0; j<8; j++) a[j] = a[j+1]; - a[8] = carry; + a[8] = 0; } uint32_t c = sub_zxy(tmp, a, p); // carry not set if negative - for (int i=0; i<9; i++) z[i] = ret[c][i]; + for (int i=0; i<8; i++) z[i] = ret[c][i]; } diff --git a/mpy-modules/curve25519/arithmetic.h b/mpy-modules/curve25519/arithmetic.h index 0fd8464..022d019 100644 --- a/mpy-modules/curve25519/arithmetic.h +++ b/mpy-modules/curve25519/arithmetic.h @@ -2,9 +2,8 @@ #define __arithmetic__ void shift_right(uint32_t *x); -uint32_t add_zxy(uint32_t *z, uint32_t *x, uint32_t *y); -uint32_t sub_zxy(uint32_t *z, uint32_t *x, uint32_t *y); void add_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p); +void sub_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p); void mul_zxy(uint32_t *z, uint32_t *x, uint32_t y); void mul_zx0y0(uint32_t *z, uint32_t x, uint32_t y); void mont_mul_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p); diff --git a/mpy-modules/curve25519/ec.c b/mpy-modules/curve25519/ec.c index e1eac9a..0a3cd41 100644 --- a/mpy-modules/curve25519/ec.c +++ b/mpy-modules/curve25519/ec.c @@ -3,14 +3,14 @@ #include "ec.h" const uint32_t a24R = 0x468ba6; -const uint32_t p[9] = {0x00000000, 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffed}; +const uint32_t p[8] = {0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffed}; const uint32_t pR2 = 0x5a4; const uint32_t R = 38; -const uint32_t sqrt_minus_486664R[9] = {0x00000000, 0x4038adb9, 0xa83f001e, 0xc1bcaf57, 0x688c332e, 0xa9fa8eee, 0xcb6e1095, 0xa7ab4e9e, 0x1baf4abd}; +const uint32_t sqrt_minus_486664R[8] = {0x4038adb9, 0xa83f001e, 0xc1bcaf57, 0x688c332e, 0xa9fa8eee, 0xcb6e1095, 0xa7ab4e9e, 0x1baf4abd}; struct curve { uint32_t a24; - uint32_t p[9]; + uint32_t p[8]; uint32_t pR2; }; @@ -18,8 +18,7 @@ struct curve Curve; void random_z(uint32_t *z) { // TODO: a true random source should be provided - for (uint32_t i=0; i<8; i++) z[i] = 0xC3A50FE1; - z[8] = 0; + for (uint32_t i=0; i<7; i++) z[i] = 0xC3A50FE1; z[7] &= 0x7fffffff; } @@ -27,37 +26,37 @@ void random_z(uint32_t *z) { void ini_curve() { Curve.a24 = a24R; Curve.pR2 = pR2; - for (int i=0; i<9; i++) { - Curve.p[i] = p[8-i]; + for (int i=0; i<8; i++) { + Curve.p[i] = p[7-i]; } } void to_Montgomery(uint32_t *x) { - uint32_t s[9]; + uint32_t s[8]; mont_mul_zxy0_mod_p(s, x, Curve.pR2, Curve.p); - for (uint32_t i=0;i<9;i++) x[i] = s[i]; + for (uint32_t i=0;i<8;i++) x[i] = s[i]; } void from_Montgomery(uint32_t *x) { - uint32_t s[9]; + uint32_t s[8]; mont_mul_zxy0_mod_p(s, x, 1, Curve.p); - for (uint32_t i=0;i<9;i++) x[i] = s[i]; + for (uint32_t i=0;i<8;i++) x[i] = s[i]; } void mod_inverse(uint32_t *z_inv, uint32_t *z) { - uint32_t z2[9]; - uint32_t z9[9]; - uint32_t z11[9]; - uint32_t z2_5_0[9]; - uint32_t z2_10_0[9]; - uint32_t z2_20_0[9]; - uint32_t z2_50_0[9]; - uint32_t z2_100_0[9]; - uint32_t t0[9]; - uint32_t t1[9]; + uint32_t z2[8]; + uint32_t z9[8]; + uint32_t z11[8]; + uint32_t z2_5_0[8]; + uint32_t z2_10_0[8]; + uint32_t z2_20_0[8]; + uint32_t z2_50_0[8]; + uint32_t z2_100_0[8]; + uint32_t t0[8]; + uint32_t t1[8]; uint32_t i; mont_mul_zxy_mod_p(z2, z, z, Curve.p); // 2 @@ -134,12 +133,11 @@ void mod_inverse(uint32_t *z_inv, uint32_t *z) { void recover_y(uint32_t *x, uint32_t *y, uint32_t *z, uint32_t *xp, uint32_t *yp, uint32_t *xq, uint32_t *zq, uint32_t *xpq, uint32_t *zpq) { // https://eprint.iacr.org/2017/212.pdf uint32_t AR = 18493156; - uint32_t v1[9], v2[9], v3[9], v4[9], s[9], t[9]; + uint32_t v1[8], v2[8], v3[8], v4[8], s[8], t[8]; mont_mul_zxy_mod_p(v1, xp, zq, Curve.p); add_zxy_mod_p(v2, xq, v1, Curve.p); - sub_zxy(v3, Curve.p, v1); - add_zxy_mod_p(v3, xq, v3, Curve.p); + sub_zxy_mod_p(v3, xq, v1, Curve.p); mont_mul_zxy_mod_p(t, v3, v3, Curve.p); mont_mul_zxy_mod_p(v3, t, xpq, Curve.p); mont_mul_zxy0_mod_p(v1, zq, AR, Curve.p); @@ -149,11 +147,9 @@ void recover_y(uint32_t *x, uint32_t *y, uint32_t *z, uint32_t *xp, uint32_t *yp add_zxy_mod_p(v4, v4, zq, Curve.p); mont_mul_zxy_mod_p(s, v2, v4, Curve.p); mont_mul_zxy_mod_p(t, v1, zq, Curve.p); - sub_zxy(t, Curve.p, t); - add_zxy_mod_p(s, s, t, Curve.p); + sub_zxy_mod_p(s, s, t, Curve.p); mont_mul_zxy_mod_p(v2, s, zpq, Curve.p); - sub_zxy(v3, Curve.p, v3); - add_zxy_mod_p(y, v2, v3, Curve.p); + sub_zxy_mod_p(y, v2, v3, Curve.p); add_zxy_mod_p(v1, yp, yp, Curve.p); mont_mul_zxy_mod_p(s, v1, zq, Curve.p); mont_mul_zxy_mod_p(v1, s, zpq, Curve.p); @@ -164,18 +160,17 @@ void recover_y(uint32_t *x, uint32_t *y, uint32_t *z, uint32_t *xp, uint32_t *yp void montgomery2edward(uint32_t *x, uint32_t *y, uint32_t *u, uint32_t *v) { // (x, y) = (sqrt(-486664)*u/v, (u-1)/(u+1)) - uint32_t s[9], t[9], w[9]; + uint32_t s[8], t[8], w[8]; - for (uint32_t i=1; i<9; i++) t[i] = 0; + for (uint32_t i=1; i<8; i++) t[i] = 0; t[0] = R; // 1*R add_zxy_mod_p(s, u, t, Curve.p); // s = u+1 mont_mul_zxy_mod_p(w, s, v, Curve.p); mod_inverse(w, w); // w = (u+1)^(-1) * v^(-1) - sub_zxy(t, Curve.p, t); // t = -1*R - add_zxy_mod_p(y, u, t, Curve.p); // y = u-1 + sub_zxy_mod_p(y, u, t, Curve.p); // y = u-1 mont_mul_zxy_mod_p(x, y, w, Curve.p); // x = (u-1)/((u+1) * v) mont_mul_zxy_mod_p(y, x, v, Curve.p); - for (uint32_t i=0; i<9; i++) t[i] = sqrt_minus_486664R[8-i]; + for (uint32_t i=0; i<8; i++) t[i] = sqrt_minus_486664R[7-i]; mont_mul_zxy_mod_p(x, t, w, Curve.p); mont_mul_zxy_mod_p(t, x, s, Curve.p); mont_mul_zxy_mod_p(x, t, u, Curve.p); @@ -200,11 +195,11 @@ void cswap(uint32_t swap, uint32_t **x, uint32_t **y) { void X25519(uint32_t *q, uint32_t *r, uint8_t *k, uint32_t *u, uint32_t *v, uint8_t toEdward) { // cpmputes q = k * u - uint32_t x1[9], x[2*9], z[2*9]; - uint32_t *x2 = x; uint32_t *x3 = x+9; - uint32_t *z2 = z; uint32_t *z3 = z+9; + uint32_t x1[8], x[2*8], z[2*8]; + uint32_t *x2 = x; uint32_t *x3 = x+8; + uint32_t *z2 = z; uint32_t *z3 = z+8; uint8_t swap, kt, kw; - uint32_t A[9], AA[9], B[9], BB[9], C[9], D[9], E[9], DA[9], CB[9]; + uint32_t A[8], AA[8], B[8], BB[8], C[8], D[8], E[8], DA[8], CB[8]; ini_curve(); @@ -217,9 +212,8 @@ void X25519(uint32_t *q, uint32_t *r, uint8_t *k, uint32_t *u, uint32_t *v, uint A[i] = u[i]; z2[i] = 0; } - z2[8] = A[8] = 0; to_Montgomery(A); - for (uint32_t i=0; i<9; i++) x1[i] = A[i]; + for (uint32_t i=0; i<8; i++) x1[i] = A[i]; mont_mul_zxy_mod_p(x3, A, z3, Curve.p); swap = 0; @@ -235,20 +229,16 @@ void X25519(uint32_t *q, uint32_t *r, uint8_t *k, uint32_t *u, uint32_t *v, uint add_zxy_mod_p(A, x2, z2, Curve.p); mont_mul_zxy_mod_p(AA, A, A, Curve.p); - sub_zxy(B, Curve.p, z2); - add_zxy_mod_p(B, x2, B, Curve.p); + sub_zxy_mod_p(B, x2, z2, Curve.p); mont_mul_zxy_mod_p(BB, B, B, Curve.p); - sub_zxy(E, Curve.p, BB); - add_zxy_mod_p(E, AA, E, Curve.p); + sub_zxy_mod_p(E, AA, BB, Curve.p); add_zxy_mod_p(C, x3, z3, Curve.p); - sub_zxy(D, Curve.p, z3); - add_zxy_mod_p(D, x3, D, Curve.p); + sub_zxy_mod_p(D, x3, z3, Curve.p); mont_mul_zxy_mod_p(DA, D, A, Curve.p); mont_mul_zxy_mod_p(CB, C, B, Curve.p); add_zxy_mod_p(A, DA, CB, Curve.p); mont_mul_zxy_mod_p(x3, A, A, Curve.p); - sub_zxy(A, Curve.p, CB); - add_zxy_mod_p(A, DA, A, Curve.p); + sub_zxy_mod_p(A, DA, CB, Curve.p); mont_mul_zxy_mod_p(B, A, A, Curve.p); mont_mul_zxy_mod_p(z3, x1, B, Curve.p); mont_mul_zxy_mod_p(x2, AA, BB, Curve.p); @@ -265,7 +255,6 @@ void X25519(uint32_t *q, uint32_t *r, uint8_t *k, uint32_t *u, uint32_t *v, uint AA[i] = u[i]; BB[i] = v[i]; } - AA[8] = BB[8] = 0; to_Montgomery(AA); to_Montgomery(BB); recover_y(A, B, C, AA, BB, x2, z2, x3, z3); @@ -273,7 +262,7 @@ void X25519(uint32_t *q, uint32_t *r, uint8_t *k, uint32_t *u, uint32_t *v, uint mont_mul_zxy_mod_p(q, A, C, Curve.p); mont_mul_zxy_mod_p(r, B, C, Curve.p); if (toEdward) { - for (uint32_t i=0; i<9; i++) { + for (uint32_t i=0; i<8; i++) { A[i] = q[i]; B[i] = r[i]; } diff --git a/mpy-modules/curve25519/main.c b/mpy-modules/curve25519/main.c index ce96bee..fb06978 100644 --- a/mpy-modules/curve25519/main.c +++ b/mpy-modules/curve25519/main.c @@ -12,10 +12,10 @@ STATIC mp_obj_t x25519(mp_obj_t k, mp_obj_t u) { mp_get_buffer_raise(k, &bufinfo_k, MP_BUFFER_READ); mp_get_buffer_raise(u, &bufinfo_u, MP_BUFFER_READ); - uint32_t q[9]; + uint32_t q[8]; X25519(q, 0, bufinfo_k.buf, (uint32_t*)bufinfo_u.buf, 0, 0); - return mp_obj_new_bytes((uint8_t *)q, 8*4); + return mp_obj_new_bytes((uint8_t *)q, sizeof(q)); } // Define a Python reference to the function above STATIC MP_DEFINE_CONST_FUN_OBJ_2(x25519_obj, x25519); @@ -30,11 +30,11 @@ STATIC mp_obj_t x25519_ed(mp_obj_t k, mp_obj_t u, mp_obj_t v) { mp_get_buffer_raise(u, &bufinfo_u, MP_BUFFER_READ); mp_get_buffer_raise(v, &bufinfo_v, MP_BUFFER_READ); - uint32_t q[9], r[9]; + uint32_t q[8], r[8]; X25519(q, r, bufinfo_k.buf, (uint32_t*)bufinfo_u.buf, (uint32_t*)bufinfo_v.buf, 1); - items[0] = mp_obj_new_bytes((uint8_t *)q, 8*4); - items[1] = mp_obj_new_bytes((uint8_t *)r, 8*4); + items[0] = mp_obj_new_bytes((uint8_t *)q, sizeof(q)); + items[1] = mp_obj_new_bytes((uint8_t *)r, sizeof(r)); return mp_obj_new_tuple(2, items); } // Define a Python reference to the function above