micropython-curve25519/mpy-modules/curve25519/arithmetic.c

398 lines
10 KiB
C

#include "stdint.h"
#include "ec.h"
void shift_right(uint32_t *x) {
// for mod_inverse, operates on 9 words
__asm__ volatile (
"LDMIA %0, {r1-r9}\n"
"AND r10, r9, 0x80000000\n" // store sign
"LSRS r9, r9, 1\n"
"RRXS r8, r8\n"
"RRXS r7, r7\n"
"RRXS r6, r6\n"
"RRXS r5, r5\n"
"RRXS r4, r4\n"
"RRXS r3, r3\n"
"RRXS r2, r2\n"
"RRXS r1, r1\n"
"ORR r9, r9, r10\n" // restore sign
"STMIA %0, {r1-r9}\n"
: : "r" (x) : "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
);
}
uint32_t add9_zxy(uint32_t *z, uint32_t *x, uint32_t *y) {
// for mod_inverse, operates on 9 words
uint32_t carry;
__asm__ volatile (
"LDMIA %1!, {r3-r6}\n"
"LDMIA %2!, {r7-r10}\n"
"ADDS r3, r3, r7\n"
"ADCS r4, r4, r8\n"
"ADCS r5, r5, r9\n"
"ADCS r6, r6, r10\n"
"STMIA %3!, {r3-r6}\n"
"LDMIA %1!, {r3-r6}\n"
"LDMIA %2!, {r7-r10}\n"
"ADCS r3, r3, r7\n"
"ADCS r4, r4, r8\n"
"ADCS r5, r5, r9\n"
"ADCS r6, r6, r10\n"
"STMIA %3!, {r3-r6}\n"
"LDMIA %1, {r3}\n"
"LDMIA %2, {r7}\n"
"ADCS r3, r3, r7\n"
"STMIA %3, {r3}\n"
"MOV %0, 0\n"
"ADCS %0, %0, 0\n"
: "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
);
return carry;
}
uint32_t sub9_zxy(uint32_t *z, uint32_t *x, uint32_t *y) {
// for mod_inverse, operates on 9 words
uint32_t carry;
__asm__ volatile (
"LDMIA %1!, {r3-r6}\n"
"LDMIA %2!, {r7-r10}\n"
"SUBS r3, r3, r7\n"
"SBCS r4, r4, r8\n"
"SBCS r5, r5, r9\n"
"SBCS r6, r6, r10\n"
"STMIA %3!, {r3-r6}\n"
"LDMIA %1!, {r3-r6}\n"
"LDMIA %2!, {r7-r10}\n"
"SBCS r3, r3, r7\n"
"SBCS r4, r4, r8\n"
"SBCS r5, r5, r9\n"
"SBCS r6, r6, r10\n"
"STMIA %3!, {r3-r6}\n"
"LDMIA %1, {r3}\n"
"LDMIA %2, {r7}\n"
"SBCS r3, r3, r7\n"
"STMIA %3, {r3}\n"
"MOV %0, 0\n"
"ADCS %0, %0, 0\n"
: "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
);
return carry;
}
void mod_inverse(const uint32_t *x, uint32_t *y) {
// HoAC Alg. 14.61 and Note 14.64
// input: x=modulus, y=a, output y=a^-1
// it is required that the modulus is odd
uint32_t u[9], v[9], B[9], D[9], m[9];
for (uint32_t i=0; i<8; i++) {
B[i] = D[i] = 0;
m[i] = u[i] = x[i];
v[i] = y[i];
}
D[8] = B[8] = m[8] = u[8] = v[8] = 0;
D[0] = 1;
STEP_4:
while ((u[0] & 1) == 0) {
shift_right(u);
if ((B[0] & 1) == 1) {
sub9_zxy(B, B, m);
}
shift_right(B);
}
while ((v[0] & 1) == 0) {
shift_right(v);
if ((D[0] & 1) == 1) {
sub9_zxy(D, D, m);
}
shift_right(D);
}
if (sub9_zxy(u, u, v) == 1) { // u >= v
sub9_zxy(B, B, D);
}
else {
add9_zxy(u, u, v);
sub9_zxy(v, v, u);
sub9_zxy(D, D, B);
}
uint32_t cmp = 0;
for (int i=0; i<9; i++) cmp |= u[i];
if (cmp == 0) {
if (D[8] & 0x80000000) {
add9_zxy(D, D, m); // D < 0
}
for (uint32_t i=0; i<8; i++) y[i] = D[i];
return;
}
goto STEP_4;
}
uint32_t add_zxy(uint32_t *z, const uint32_t *x, const uint32_t *y) {
uint32_t carry;
__asm__ volatile (
"LDMIA %1!, {r3-r6}\n"
"LDMIA %2!, {r7-r10}\n"
"ADDS r3, r3, r7\n"
"ADCS r4, r4, r8\n"
"ADCS r5, r5, r9\n"
"ADCS r6, r6, r10\n"
"STMIA %3!, {r3-r6}\n"
"LDMIA %1, {r3-r6}\n"
"LDMIA %2, {r7-r10}\n"
"ADCS r3, r3, r7\n"
"ADCS r4, r4, r8\n"
"ADCS r5, r5, r9\n"
"ADCS r6, r6, r10\n"
"STMIA %3, {r3-r6}\n"
"MOV %0, 0\n"
"ADCS %0, %0, 0\n"
: "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
);
return carry;
}
uint32_t sub_zxy(uint32_t *z, const uint32_t *x, const uint32_t *y) {
uint32_t carry;
__asm__ volatile (
"LDMIA %1!, {r3-r6}\n"
"LDMIA %2!, {r7-r10}\n"
"SUBS r3, r3, r7\n"
"SBCS r4, r4, r8\n"
"SBCS r5, r5, r9\n"
"SBCS r6, r6, r10\n"
"STMIA %3!, {r3-r6}\n"
"LDMIA %1, {r3-r6}\n"
"LDMIA %2, {r7-r10}\n"
"SBCS r3, r3, r7\n"
"SBCS r4, r4, r8\n"
"SBCS r5, r5, r9\n"
"SBCS r6, r6, r10\n"
"STMIA %3, {r3-r6}\n"
"MOV %0, 0\n"
"ADCS %0, %0, 0\n"
: "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
);
return carry;
}
void add_zxy_mod_p(uint32_t *z, const uint32_t *x, const uint32_t *y, const uint32_t *p) {
uint32_t z1[8], z2[8];
uint32_t* ret[2] = {z1, z2};
add_zxy(z1, x, y);
uint32_t c = sub_zxy(z2, z1, p); // carry not set if negative
for (int i=0; i<8; i++) z[i] = ret[c][i];
}
void sub_zxy_mod_p(uint32_t *z, const uint32_t *x, const uint32_t *y, const uint32_t *p) {
// x-y % p
uint32_t z1[8], z2[8];
uint32_t* ret[2] = {z2, z1};
uint32_t c = sub_zxy(z1, x, y); // carry not set if negative
add_zxy(z2, z1, p);
for (int i=0; i<8; i++) z[i] = ret[c][i];
}
void mul_add_zxy(uint32_t *z, const uint32_t *x, const uint32_t y) {
// z += x*y
// Note, UMAAL is not available
__asm__ volatile (
// 0
"LDMIA %0!, {r3}\n"
"UMULL r5, r6, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADDS r5, r5, r3\n"
"STMIA %2!, {r5}\n"
// 1
"LDMIA %0!, {r3}\n"
"MOV r5, 0\n"
"UMLAL r6, r5, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADCS r6, r6, r3\n"
"STMIA %2!, {r6}\n"
// 2
"LDMIA %0!, {r3}\n"
"MOV r6, 0\n"
"UMLAL r5, r6, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADCS r5, r5, r3\n"
"STMIA %2!, {r5}\n"
// 3
"LDMIA %0!, {r3}\n"
"MOV r5, 0\n"
"UMLAL r6, r5, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADCS r6, r6, r3\n"
"STMIA %2!, {r6}\n"
// 4
"LDMIA %0!, {r3}\n"
"MOV r6, 0\n"
"UMLAL r5, r6, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADCS r5, r5, r3\n"
"STMIA %2!, {r5}\n"
// 5
"LDMIA %0!, {r3}\n"
"MOV r5, 0\n"
"UMLAL r6, r5, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADCS r6, r6, r3\n"
"STMIA %2!, {r6}\n"
// 6
"LDMIA %0!, {r3}\n"
"MOV r6, 0\n"
"UMLAL r5, r6, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADCS r5, r5, r3\n"
"STMIA %2!, {r5}\n"
// 7
"LDMIA %0!, {r3}\n"
"MOV r5, 0\n"
"UMLAL r6, r5, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADCS r6, r6, r3\n"
"STMIA %2!, {r6}\n"
"LDMIA %2, {r3}\n"
"ADCS r5, r5, r3\n"
"STMIA %2, {r5}\n"
: : "r" (x), "r" (y), "r" (z) : "r3", "r5", "r6"
);
}
void mul_add_zx0y0(uint32_t *z, uint32_t x, uint32_t y) {
// z += x0 * y0
__asm__ volatile (
// 0
"UMULL r5, r6, %0, %1\n"
"LDMIA %2, {r3}\n"
"ADDS r3, r3, r5\n"
"STMIA %2!, {r3}\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r6\n"
"STMIA %2!, {r3}\n"
"MOV r5, 0\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r5\n"
"STMIA %2!, {r3}\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r5\n"
"STMIA %2!, {r3}\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r5\n"
"STMIA %2!, {r3}\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r5\n"
"STMIA %2!, {r3}\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r5\n"
"STMIA %2!, {r3}\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r5\n"
"STMIA %2!, {r3}\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r5\n"
"STMIA %2, {r3}\n"
//"SUBS %2, 32\n" //*
: : "r" (x), "r" (y), "r" (z) : "r3", "r5", "r6"
);
}
void pu_add_shift(uint32_t *t, uint32_t u) {
// computes t = (t + (2^255 -19) * u) >> 32
__asm__ volatile (
"MOV r3, %2\n"
"UMULL r5, r6, r3, %1\n"
"MOV r2, 0\n"
"LSRS %1, %1, 1\n"
"RRXS r2, r2\n"
// t[8]:t[7] + (u << 255)
"ADDS %0, %0, 28\n"
"LDMIA %0, {r3}\n"
"ADDS r3, r3, r2\n"
"STMIA %0!, {r3}\n"
"LDMIA %0, {r3}\n"
"ADCS r3, r3, %1\n"
"STMIA %0, {r3}\n"
"SUBS %0, %0, 32\n"
// t - 19*u
"MOV r2, %0\n"
"LDMIA %0!, {r4}\n"
"SUBS r4, r4, r5\n"
"LDMIA %0!, {r4}\n"
"SBCS r4, r4, r6\n"
"STMIA r2!, {r4}\n"
"MOV r5, 0\n"
"LDMIA %0!, {r4}\n"
"SBCS r4, r4, r5\n"
"STMIA r2!, {r4}\n"
"LDMIA %0!, {r4}\n"
"SBCS r4, r4, r5\n"
"STMIA r2!, {r4}\n"
"LDMIA %0!, {r4}\n"
"SBCS r4, r4, r5\n"
"STMIA r2!, {r4}\n"
"LDMIA %0!, {r4}\n"
"SBCS r4, r4, r5\n"
"STMIA r2!, {r4}\n"
"LDMIA %0!, {r4}\n"
"SBCS r4, r4, r5\n"
"STMIA r2!, {r4}\n"
"LDMIA %0!, {r4}\n"
"SBCS r4, r4, r5\n"
"STMIA r2!, {r4}\n"
"LDMIA %0, {r4}\n"
"SBCS r4, r4, r5\n"
"STMIA r2!, {r4}\n"
"STMIA r2, {r5}\n" // ms-word = 0
//"SUBS %0, %0, 32\n"
: : "r" (t), "r" (u) , "r" (DIFF_P) : "r2", "r3", "r4", "r5", "r6"
);
}
void mont_mul_zxy_mod_p(uint32_t *z, const uint32_t *x, const uint32_t *y, const uint32_t *p) {
// see Alg. 14.36 HoAC
// -m^(-1) mod b is 678152731 for curve25519
uint32_t u;
uint32_t tmp[9], a[9];
uint32_t* ret[2] = {a, tmp};
for (int i=0; i<9; i++) a[i] = 0;
for (int i=0; i<8; i++) {
u = (a[0] + x[i] * y[0]) * MINUS_IM;
mul_add_zxy(a, y, x[i]); // A <- A + xi y
pu_add_shift(a, u); // A <- (A + u m) / b
}
uint32_t c = sub_zxy(tmp, a, p); // carry not set if negative
for (int i=0; i<8; i++) z[i] = ret[c][i];
}
void mont_mul_zxy0_mod_p(uint32_t *z, const uint32_t *x, const uint32_t y, const uint32_t *p) {
// see Alg. 14.36 HoAC
// -m^(-1) mod b is 678152731 for curve25519
uint32_t u;
uint32_t tmp[9], a[9];
uint32_t* ret[2] = {a, tmp};
for (int i=0; i<9; i++) a[i] = 0;
for (int i=0; i<8; i++) {
u = (a[0] + x[i] * y) * MINUS_IM;
mul_add_zx0y0(a, y, x[i]); // A <- A + xi y
pu_add_shift(a, u); // A <- (A + u m) / b
}
uint32_t c = sub_zxy(tmp, a, p); // carry not set if negative
for (int i=0; i<8; i++) z[i] = ret[c][i];
}