micropython-curve25519/mpy-modules/curve25519/arithmetic.c

267 lines
6.7 KiB
C

#include "py/dynruntime.h"
uint32_t add_zxy(uint32_t *z, uint32_t *x, uint32_t *y) {
uint32_t carry;
__asm__ volatile (
"LDMIA %1!, {r3-r6}\n"
"LDMIA %2!, {r7-r10}\n"
"ADDS r3, r3, r7\n"
"ADCS r4, r4, r8\n"
"ADCS r5, r5, r9\n"
"ADCS r6, r6, r10\n"
"STMIA %3!, {r3-r6}\n"
"LDMIA %1, {r3-r6}\n"
"LDMIA %2, {r7-r10}\n"
"ADCS r3, r3, r7\n"
"ADCS r4, r4, r8\n"
"ADCS r5, r5, r9\n"
"ADCS r6, r6, r10\n"
"STMIA %3, {r3-r6}\n"
"MOV %0, 0\n"
"ADCS %0, %0, 0\n"
: "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
);
return carry;
}
uint32_t sub_zxy(uint32_t *z, uint32_t *x, uint32_t *y) {
uint32_t carry;
__asm__ volatile (
"LDMIA %1!, {r3-r6}\n"
"LDMIA %2!, {r7-r10}\n"
"SUBS r3, r3, r7\n"
"SBCS r4, r4, r8\n"
"SBCS r5, r5, r9\n"
"SBCS r6, r6, r10\n"
"STMIA %3!, {r3-r6}\n"
"LDMIA %1, {r3-r6}\n"
"LDMIA %2, {r7-r10}\n"
"SBCS r3, r3, r7\n"
"SBCS r4, r4, r8\n"
"SBCS r5, r5, r9\n"
"SBCS r6, r6, r10\n"
"STMIA %3, {r3-r6}\n"
"MOV %0, 0\n"
"ADCS %0, %0, 0\n"
: "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
);
return carry;
}
void add_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) {
uint32_t z1[8], z2[8];
uint32_t* ret[2] = {z1, z2};
add_zxy(z1, x, y);
uint32_t c = sub_zxy(z2, z1, p); // carry not set if negative
for (int i=0; i<8; i++) z[i] = ret[c][i];
}
void sub_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) {
// x-y % p
uint32_t z1[8], z2[8];
uint32_t* ret[2] = {z2, z1};
uint32_t c = sub_zxy(z1, x, y); // carry not set if negative
add_zxy(z2, z1, p);
for (int i=0; i<8; i++) z[i] = ret[c][i];
}
void mul_add_zxy(uint32_t *z, uint32_t *x, uint32_t y) {
// z += x*y
// Note, UMAAL is not available
__asm__ volatile (
// 0
"LDMIA %0!, {r3}\n"
"UMULL r5, r6, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADDS r5, r5, r3\n"
"STMIA %2!, {r5}\n"
// 1
"LDMIA %0!, {r3}\n"
"MOV r5, 0\n"
"UMLAL r6, r5, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADCS r6, r6, r3\n"
"STMIA %2!, {r6}\n"
// 2
"LDMIA %0!, {r3}\n"
"MOV r6, 0\n"
"UMLAL r5, r6, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADCS r5, r5, r3\n"
"STMIA %2!, {r5}\n"
// 3
"LDMIA %0!, {r3}\n"
"MOV r5, 0\n"
"UMLAL r6, r5, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADCS r6, r6, r3\n"
"STMIA %2!, {r6}\n"
// 4
"LDMIA %0!, {r3}\n"
"MOV r6, 0\n"
"UMLAL r5, r6, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADCS r5, r5, r3\n"
"STMIA %2!, {r5}\n"
// 5
"LDMIA %0!, {r3}\n"
"MOV r5, 0\n"
"UMLAL r6, r5, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADCS r6, r6, r3\n"
"STMIA %2!, {r6}\n"
// 6
"LDMIA %0!, {r3}\n"
"MOV r6, 0\n"
"UMLAL r5, r6, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADCS r5, r5, r3\n"
"STMIA %2!, {r5}\n"
// 7
"LDMIA %0!, {r3}\n"
"MOV r5, 0\n"
"UMLAL r6, r5, r3, %1\n"
"LDMIA %2, {r3}\n"
"ADCS r6, r6, r3\n"
"STMIA %2!, {r6}\n"
"LDMIA %2, {r3}\n"
"ADCS r5, r5, r3\n"
"STMIA %2, {r5}\n"
: : "r" (x), "r" (y), "r" (z) : "r3", "r5", "r6"
);
}
void mul_add_zx0y0(uint32_t *z, uint32_t x, uint32_t y) {
// z += x0 * y0
__asm__ volatile (
// 0
"UMULL r5, r6, %0, %1\n"
"LDMIA %2, {r3}\n"
"ADDS r3, r3, r5\n"
"STMIA %2!, {r3}\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r6\n"
"STMIA %2!, {r3}\n"
"MOV r5, 0\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r5\n"
"STMIA %2!, {r3}\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r5\n"
"STMIA %2!, {r3}\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r5\n"
"STMIA %2!, {r3}\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r5\n"
"STMIA %2!, {r3}\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r5\n"
"STMIA %2!, {r3}\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r5\n"
"STMIA %2!, {r3}\n"
"LDMIA %2, {r3}\n"
"ADCS r3, r3, r5\n"
"STMIA %2, {r3}\n"
//"SUBS %2, 32\n" //*
: : "r" (x), "r" (y), "r" (z) : "r3", "r5", "r6"
);
}
void pu_add_shift(uint32_t *t, uint32_t u) {
// computes t = (t + (2^255 -19) * u) >> 32
__asm__ volatile (
"MOV r3, 19\n"
"UMULL r5, r6, r3, %1\n"
"MOV r2, 0\n"
"LSRS %1, %1, 1\n"
"RRXS r2, r2\n"
// t[8]:t[7] + (u << 255)
"ADDS %0, %0, 28\n"
"LDMIA %0, {r3}\n"
"ADDS r3, r3, r2\n"
"STMIA %0!, {r3}\n"
"LDMIA %0, {r3}\n"
"ADCS r3, r3, %1\n"
"STMIA %0, {r3}\n"
"SUBS %0, %0, 32\n"
// t - 19*u
"MOV r2, %0\n"
"LDMIA %0!, {r4}\n"
"SUBS r4, r4, r5\n"
"LDMIA %0!, {r4}\n"
"SBCS r4, r4, r6\n"
"STMIA r2!, {r4}\n"
"MOV r5, 0\n"
"LDMIA %0!, {r4}\n"
"SBCS r4, r4, r5\n"
"STMIA r2!, {r4}\n"
"LDMIA %0!, {r4}\n"
"SBCS r4, r4, r5\n"
"STMIA r2!, {r4}\n"
"LDMIA %0!, {r4}\n"
"SBCS r4, r4, r5\n"
"STMIA r2!, {r4}\n"
"LDMIA %0!, {r4}\n"
"SBCS r4, r4, r5\n"
"STMIA r2!, {r4}\n"
"LDMIA %0!, {r4}\n"
"SBCS r4, r4, r5\n"
"STMIA r2!, {r4}\n"
"LDMIA %0!, {r4}\n"
"SBCS r4, r4, r5\n"
"STMIA r2!, {r4}\n"
"LDMIA %0, {r4}\n"
"SBCS r4, r4, r5\n"
"STMIA r2!, {r4}\n"
"STMIA r2, {r5}\n" // ms-word = 0
//"SUBS %0, %0, 32\n"
: : "r" (t), "r" (u) : "r2", "r3", "r4", "r5", "r6"
);
}
void mont_mul_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) {
// see Alg. 14.36 HoAC
// -m^(-1) mod b is 678152731 for curve25519
uint32_t u;
uint32_t tmp[9], a[9];
uint32_t* ret[2] = {a, tmp};
for (int i=0; i<9; i++) a[i] = 0;
for (int i=0; i<8; i++) {
u = (a[0] + x[i] * y[0]) * 678152731;
mul_add_zxy(a, y, x[i]); // A <- A + xi y
pu_add_shift(a, u); // A <- (A + u m) / b
}
uint32_t c = sub_zxy(tmp, a, p); // carry not set if negative
for (int i=0; i<8; i++) z[i] = ret[c][i];
}
void mont_mul_zxy0_mod_p(uint32_t *z, uint32_t *x, uint32_t y, uint32_t *p) {
// see Alg. 14.36 HoAC
// -m^(-1) mod b is 678152731 for curve25519
uint32_t u;
uint32_t tmp[9], a[9];
uint32_t* ret[2] = {a, tmp};
for (int i=0; i<9; i++) a[i] = 0;
for (int i=0; i<8; i++) {
u = (a[0] + x[i] * y) * 678152731;
mul_add_zx0y0(a, y, x[i]); // A <- A + xi y
pu_add_shift(a, u); // A <- (A + u m) / b
}
uint32_t c = sub_zxy(tmp, a, p); // carry not set if negative
for (int i=0; i<8; i++) z[i] = ret[c][i];
}