267 lines
6.7 KiB
C
267 lines
6.7 KiB
C
#include "py/dynruntime.h"
|
|
|
|
|
|
uint32_t add_zxy(uint32_t *z, uint32_t *x, uint32_t *y) {
|
|
uint32_t carry;
|
|
__asm__ volatile (
|
|
"LDMIA %1!, {r3-r6}\n"
|
|
"LDMIA %2!, {r7-r10}\n"
|
|
"ADDS r3, r3, r7\n"
|
|
"ADCS r4, r4, r8\n"
|
|
"ADCS r5, r5, r9\n"
|
|
"ADCS r6, r6, r10\n"
|
|
"STMIA %3!, {r3-r6}\n"
|
|
"LDMIA %1, {r3-r6}\n"
|
|
"LDMIA %2, {r7-r10}\n"
|
|
"ADCS r3, r3, r7\n"
|
|
"ADCS r4, r4, r8\n"
|
|
"ADCS r5, r5, r9\n"
|
|
"ADCS r6, r6, r10\n"
|
|
"STMIA %3, {r3-r6}\n"
|
|
"MOV %0, 0\n"
|
|
"ADCS %0, %0, 0\n"
|
|
: "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
|
|
);
|
|
return carry;
|
|
}
|
|
|
|
|
|
uint32_t sub_zxy(uint32_t *z, uint32_t *x, uint32_t *y) {
|
|
uint32_t carry;
|
|
__asm__ volatile (
|
|
"LDMIA %1!, {r3-r6}\n"
|
|
"LDMIA %2!, {r7-r10}\n"
|
|
"SUBS r3, r3, r7\n"
|
|
"SBCS r4, r4, r8\n"
|
|
"SBCS r5, r5, r9\n"
|
|
"SBCS r6, r6, r10\n"
|
|
"STMIA %3!, {r3-r6}\n"
|
|
"LDMIA %1, {r3-r6}\n"
|
|
"LDMIA %2, {r7-r10}\n"
|
|
"SBCS r3, r3, r7\n"
|
|
"SBCS r4, r4, r8\n"
|
|
"SBCS r5, r5, r9\n"
|
|
"SBCS r6, r6, r10\n"
|
|
"STMIA %3, {r3-r6}\n"
|
|
"MOV %0, 0\n"
|
|
"ADCS %0, %0, 0\n"
|
|
: "=r" (carry) : "r" (x), "r" (y), "r" (z) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
|
|
);
|
|
return carry;
|
|
}
|
|
|
|
|
|
void add_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) {
|
|
uint32_t z1[8], z2[8];
|
|
uint32_t* ret[2] = {z1, z2};
|
|
add_zxy(z1, x, y);
|
|
uint32_t c = sub_zxy(z2, z1, p); // carry not set if negative
|
|
for (int i=0; i<8; i++) z[i] = ret[c][i];
|
|
}
|
|
|
|
|
|
void sub_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) {
|
|
// x-y % p
|
|
uint32_t z1[8], z2[8];
|
|
uint32_t* ret[2] = {z2, z1};
|
|
uint32_t c = sub_zxy(z1, x, y); // carry not set if negative
|
|
add_zxy(z2, z1, p);
|
|
for (int i=0; i<8; i++) z[i] = ret[c][i];
|
|
}
|
|
|
|
|
|
void mul_add_zxy(uint32_t *z, uint32_t *x, uint32_t y) {
|
|
// z += x*y
|
|
// Note, UMAAL is not available
|
|
__asm__ volatile (
|
|
// 0
|
|
"LDMIA %0!, {r3}\n"
|
|
"UMULL r5, r6, r3, %1\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADDS r5, r5, r3\n"
|
|
"STMIA %2!, {r5}\n"
|
|
// 1
|
|
"LDMIA %0!, {r3}\n"
|
|
"MOV r5, 0\n"
|
|
"UMLAL r6, r5, r3, %1\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r6, r6, r3\n"
|
|
"STMIA %2!, {r6}\n"
|
|
// 2
|
|
"LDMIA %0!, {r3}\n"
|
|
"MOV r6, 0\n"
|
|
"UMLAL r5, r6, r3, %1\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r5, r5, r3\n"
|
|
"STMIA %2!, {r5}\n"
|
|
// 3
|
|
"LDMIA %0!, {r3}\n"
|
|
"MOV r5, 0\n"
|
|
"UMLAL r6, r5, r3, %1\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r6, r6, r3\n"
|
|
"STMIA %2!, {r6}\n"
|
|
// 4
|
|
"LDMIA %0!, {r3}\n"
|
|
"MOV r6, 0\n"
|
|
"UMLAL r5, r6, r3, %1\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r5, r5, r3\n"
|
|
"STMIA %2!, {r5}\n"
|
|
// 5
|
|
"LDMIA %0!, {r3}\n"
|
|
"MOV r5, 0\n"
|
|
"UMLAL r6, r5, r3, %1\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r6, r6, r3\n"
|
|
"STMIA %2!, {r6}\n"
|
|
// 6
|
|
"LDMIA %0!, {r3}\n"
|
|
"MOV r6, 0\n"
|
|
"UMLAL r5, r6, r3, %1\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r5, r5, r3\n"
|
|
"STMIA %2!, {r5}\n"
|
|
// 7
|
|
"LDMIA %0!, {r3}\n"
|
|
"MOV r5, 0\n"
|
|
"UMLAL r6, r5, r3, %1\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r6, r6, r3\n"
|
|
"STMIA %2!, {r6}\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r5, r5, r3\n"
|
|
"STMIA %2, {r5}\n"
|
|
: : "r" (x), "r" (y), "r" (z) : "r3", "r5", "r6"
|
|
);
|
|
}
|
|
|
|
|
|
void mul_add_zx0y0(uint32_t *z, uint32_t x, uint32_t y) {
|
|
// z += x0 * y0
|
|
__asm__ volatile (
|
|
// 0
|
|
"UMULL r5, r6, %0, %1\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADDS r3, r3, r5\n"
|
|
"STMIA %2!, {r3}\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r3, r3, r6\n"
|
|
"STMIA %2!, {r3}\n"
|
|
"MOV r5, 0\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r3, r3, r5\n"
|
|
"STMIA %2!, {r3}\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r3, r3, r5\n"
|
|
"STMIA %2!, {r3}\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r3, r3, r5\n"
|
|
"STMIA %2!, {r3}\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r3, r3, r5\n"
|
|
"STMIA %2!, {r3}\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r3, r3, r5\n"
|
|
"STMIA %2!, {r3}\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r3, r3, r5\n"
|
|
"STMIA %2!, {r3}\n"
|
|
"LDMIA %2, {r3}\n"
|
|
"ADCS r3, r3, r5\n"
|
|
"STMIA %2, {r3}\n"
|
|
//"SUBS %2, 32\n" //*
|
|
: : "r" (x), "r" (y), "r" (z) : "r3", "r5", "r6"
|
|
);
|
|
}
|
|
|
|
|
|
void pu_add_shift(uint32_t *t, uint32_t u) {
|
|
// computes t = (t + (2^255 -19) * u) >> 32
|
|
__asm__ volatile (
|
|
"MOV r3, 19\n"
|
|
"UMULL r5, r6, r3, %1\n"
|
|
"MOV r2, 0\n"
|
|
"LSRS %1, %1, 1\n"
|
|
"RRXS r2, r2\n"
|
|
// t[8]:t[7] + (u << 255)
|
|
"ADDS %0, %0, 28\n"
|
|
"LDMIA %0, {r3}\n"
|
|
"ADDS r3, r3, r2\n"
|
|
"STMIA %0!, {r3}\n"
|
|
"LDMIA %0, {r3}\n"
|
|
"ADCS r3, r3, %1\n"
|
|
"STMIA %0, {r3}\n"
|
|
"SUBS %0, %0, 32\n"
|
|
// t - 19*u
|
|
"MOV r2, %0\n"
|
|
"LDMIA %0!, {r4}\n"
|
|
"SUBS r4, r4, r5\n"
|
|
"LDMIA %0!, {r4}\n"
|
|
"SBCS r4, r4, r6\n"
|
|
"STMIA r2!, {r4}\n"
|
|
"MOV r5, 0\n"
|
|
"LDMIA %0!, {r4}\n"
|
|
"SBCS r4, r4, r5\n"
|
|
"STMIA r2!, {r4}\n"
|
|
"LDMIA %0!, {r4}\n"
|
|
"SBCS r4, r4, r5\n"
|
|
"STMIA r2!, {r4}\n"
|
|
"LDMIA %0!, {r4}\n"
|
|
"SBCS r4, r4, r5\n"
|
|
"STMIA r2!, {r4}\n"
|
|
"LDMIA %0!, {r4}\n"
|
|
"SBCS r4, r4, r5\n"
|
|
"STMIA r2!, {r4}\n"
|
|
"LDMIA %0!, {r4}\n"
|
|
"SBCS r4, r4, r5\n"
|
|
"STMIA r2!, {r4}\n"
|
|
"LDMIA %0!, {r4}\n"
|
|
"SBCS r4, r4, r5\n"
|
|
"STMIA r2!, {r4}\n"
|
|
"LDMIA %0, {r4}\n"
|
|
"SBCS r4, r4, r5\n"
|
|
"STMIA r2!, {r4}\n"
|
|
"STMIA r2, {r5}\n" // ms-word = 0
|
|
//"SUBS %0, %0, 32\n"
|
|
: : "r" (t), "r" (u) : "r2", "r3", "r4", "r5", "r6"
|
|
);
|
|
}
|
|
|
|
|
|
void mont_mul_zxy_mod_p(uint32_t *z, uint32_t *x, uint32_t *y, uint32_t *p) {
|
|
// see Alg. 14.36 HoAC
|
|
// -m^(-1) mod b is 678152731 for curve25519
|
|
uint32_t u;
|
|
uint32_t tmp[9], a[9];
|
|
uint32_t* ret[2] = {a, tmp};
|
|
|
|
for (int i=0; i<9; i++) a[i] = 0;
|
|
for (int i=0; i<8; i++) {
|
|
u = (a[0] + x[i] * y[0]) * 678152731;
|
|
mul_add_zxy(a, y, x[i]); // A <- A + xi y
|
|
pu_add_shift(a, u); // A <- (A + u m) / b
|
|
}
|
|
uint32_t c = sub_zxy(tmp, a, p); // carry not set if negative
|
|
for (int i=0; i<8; i++) z[i] = ret[c][i];
|
|
}
|
|
|
|
|
|
void mont_mul_zxy0_mod_p(uint32_t *z, uint32_t *x, uint32_t y, uint32_t *p) {
|
|
// see Alg. 14.36 HoAC
|
|
// -m^(-1) mod b is 678152731 for curve25519
|
|
uint32_t u;
|
|
uint32_t tmp[9], a[9];
|
|
uint32_t* ret[2] = {a, tmp};
|
|
|
|
for (int i=0; i<9; i++) a[i] = 0;
|
|
for (int i=0; i<8; i++) {
|
|
u = (a[0] + x[i] * y) * 678152731;
|
|
mul_add_zx0y0(a, y, x[i]); // A <- A + xi y
|
|
pu_add_shift(a, u); // A <- (A + u m) / b
|
|
}
|
|
uint32_t c = sub_zxy(tmp, a, p); // carry not set if negative
|
|
for (int i=0; i<8; i++) z[i] = ret[c][i];
|
|
}
|
|
|