Fix div_word without using div.
Fix ARM32 and Cortex-M builds to work for 4096 again.
pull/5140/head
Sean Parkinson 2022-05-13 09:37:24 +10:00
parent 6aaee73585
commit e8160f049e
4 changed files with 210 additions and 368 deletions

View File

@ -20943,7 +20943,7 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
u += sp_4096_add_128(r + 64, r + 64, z1);
XMEMSET(a1 + 1, 0, sizeof(sp_digit) * (64 - 1));
a1[0] = u;
(void)sp_4096_add_64(r + 192, r + 192, a1);
(void)sp_2048_add_64(r + 192, r + 192, a1);
}
/* Square a and put result in r. (r = a * a)

View File

@ -1937,7 +1937,7 @@ static WC_INLINE sp_digit sp_2048_div_word_36(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r);
for (i = 27; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 28);
t1 += t1 + (((sp_uint32)t0 >> 28) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2;
@ -1951,11 +1951,6 @@ static WC_INLINE sp_digit sp_2048_div_word_36(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 29);
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign;
@ -3004,7 +2999,7 @@ static WC_INLINE sp_digit sp_2048_div_word_72(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r);
for (i = 27; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 28);
t1 += t1 + (((sp_uint32)t0 >> 28) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2;
@ -3018,11 +3013,6 @@ static WC_INLINE sp_digit sp_2048_div_word_72(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 29);
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign;
@ -5629,7 +5619,7 @@ static WC_INLINE sp_digit sp_3072_div_word_53(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r);
for (i = 27; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 28);
t1 += t1 + (((sp_uint32)t0 >> 28) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2;
@ -5643,11 +5633,6 @@ static WC_INLINE sp_digit sp_3072_div_word_53(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 29);
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign;
@ -6473,7 +6458,7 @@ static WC_INLINE sp_digit sp_3072_div_word_106(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r);
for (i = 27; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 28);
t1 += t1 + (((sp_uint32)t0 >> 28) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2;
@ -6487,11 +6472,6 @@ static WC_INLINE sp_digit sp_3072_div_word_106(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 29);
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign;
@ -9662,7 +9642,7 @@ static WC_INLINE sp_digit sp_3072_div_word_56(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r);
for (i = 26; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 27);
t1 += t1 + (((sp_uint32)t0 >> 27) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2;
@ -9676,11 +9656,6 @@ static WC_INLINE sp_digit sp_3072_div_word_56(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div);
r += (m >> 56) - (sp_digit)(d >> 56);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 28);
m = d - ((sp_int64)r * div);
r += (m >> 56) - (sp_digit)(d >> 56);
m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign;
@ -10585,7 +10560,7 @@ static WC_INLINE sp_digit sp_3072_div_word_112(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r);
for (i = 26; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 27);
t1 += t1 + (((sp_uint32)t0 >> 27) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2;
@ -10599,11 +10574,6 @@ static WC_INLINE sp_digit sp_3072_div_word_112(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div);
r += (m >> 56) - (sp_digit)(d >> 56);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 28);
m = d - ((sp_int64)r * div);
r += (m >> 56) - (sp_digit)(d >> 56);
m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign;
@ -13292,7 +13262,7 @@ static WC_INLINE sp_digit sp_4096_div_word_71(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r);
for (i = 27; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 28);
t1 += t1 + (((sp_uint32)t0 >> 28) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2;
@ -13306,11 +13276,6 @@ static WC_INLINE sp_digit sp_4096_div_word_71(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 29);
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign;
@ -14137,7 +14102,7 @@ static WC_INLINE sp_digit sp_4096_div_word_142(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r);
for (i = 27; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 28);
t1 += t1 + (((sp_uint32)t0 >> 28) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2;
@ -14151,11 +14116,6 @@ static WC_INLINE sp_digit sp_4096_div_word_142(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 29);
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign;
@ -17205,7 +17165,7 @@ static WC_INLINE sp_digit sp_4096_div_word_81(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r);
for (i = 24; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 25);
t1 += t1 + (((sp_uint32)t0 >> 25) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2;
@ -17219,11 +17179,6 @@ static WC_INLINE sp_digit sp_4096_div_word_81(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div);
r += (m >> 52) - (sp_digit)(d >> 52);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 26);
m = d - ((sp_int64)r * div);
r += (m >> 52) - (sp_digit)(d >> 52);
m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign;
@ -18114,7 +18069,7 @@ static WC_INLINE sp_digit sp_4096_div_word_162(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r);
for (i = 24; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 25);
t1 += t1 + (((sp_uint32)t0 >> 25) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2;
@ -18128,11 +18083,6 @@ static WC_INLINE sp_digit sp_4096_div_word_162(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div);
r += (m >> 52) - (sp_digit)(d >> 52);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 26);
m = d - ((sp_int64)r * div);
r += (m >> 52) - (sp_digit)(d >> 52);
m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign;
@ -41044,96 +40994,112 @@ SP_NOINLINE static void sp_521_mul_d_21(sp_digit* r, const sp_digit* a,
#endif /* WOLFSSL_SP_SMALL */
}
static WC_INLINE sp_digit sp_521_div_word_21(sp_digit d1, sp_digit d0,
sp_digit div)
SP_NOINLINE static void sp_521_lshift_42(sp_digit* r, const sp_digit* a,
byte n)
{
#ifdef SP_USE_DIVTI3
sp_int64 d = ((sp_int64)d1 << 25) + d0;
return d / div;
#elif defined(__x86_64__) || defined(__i386__)
sp_int64 d = ((sp_int64)d1 << 25) + d0;
sp_uint32 lo = (sp_uint32)d;
sp_digit hi = (sp_digit)(d >> 32);
__asm__ __volatile__ (
"idiv %2"
: "+a" (lo)
: "d" (hi), "r" (div)
: "cc"
);
return (sp_digit)lo;
#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV)
sp_int64 d = ((sp_int64)d1 << 25) + d0;
sp_digit dv = (div >> 1) + 1;
sp_digit t1 = (sp_digit)(d >> 25);
sp_digit t0 = (sp_digit)(d & 0x1ffffff);
sp_digit t2;
sp_digit sign;
sp_digit r;
#ifdef WOLFSSL_SP_SMALL
int i;
sp_int64 m;
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r);
for (i = 23; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 24);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2;
t1 -= dv & (0 - t2);
t1 += t2;
r[42] = a[41] >> (25 - n);
for (i=41; i>0; i--) {
r[i] = ((a[i] << n) | (a[i-1] >> (25 - n))) & 0x1ffffff;
}
r += r + 1;
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 25);
m = d - ((sp_int64)r * div);
r += (m >> 50) - (sp_digit)(d >> 50);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 25);
m = d - ((sp_int64)r * div);
r += (m >> 50) - (sp_digit)(d >> 50);
m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign;
t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31);
r += sign * t2;
m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign;
t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31);
r += sign * t2;
return r;
#else
sp_int64 d = ((sp_int64)d1 << 25) + d0;
sp_digit r = 0;
sp_digit t;
sp_digit dv = (div >> 10) + 1;
sp_int_digit s;
sp_int_digit t;
t = (sp_digit)(d >> 20);
t = (t / dv) << 10;
r += t;
d -= (sp_int64)t * div;
t = (sp_digit)(d >> 5);
t = t / (dv << 5);
r += t;
d -= (sp_int64)t * div;
t = (sp_digit)d;
t = t / div;
r += t;
d -= (sp_int64)t * div;
return r;
#endif
s = (sp_int_digit)a[41];
r[42] = s >> (25U - n);
s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]);
r[41] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]);
r[40] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]);
r[39] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]);
r[38] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]);
r[37] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]);
r[36] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]);
r[35] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]);
r[34] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]);
r[33] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]);
r[32] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]);
r[31] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]);
r[30] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]);
r[29] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]);
r[28] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]);
r[27] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]);
r[26] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]);
r[25] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]);
r[24] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]);
r[23] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]);
r[22] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]);
r[21] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]);
r[20] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]);
r[19] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]);
r[18] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
r[17] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
r[16] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
r[15] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
r[14] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
r[13] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
r[12] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
r[11] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
r[10] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
r[9] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
r[8] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
r[7] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
r[6] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
r[5] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
r[4] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
r[3] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
r[2] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
r[1] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
#endif /* WOLFSSL_SP_SMALL */
r[0] = (a[0] << n) & 0x1ffffff;
}
/* Divide d in a and put remainder into r (m*d + r = a)
* m is not calculated as it is not needed at this time.
*
* Large number of bits in last word.
* Simplified based on top word of divisor being (1 << 25) - 1
*
* a Number to be divided.
* d Number to divide with.
@ -41145,60 +41111,49 @@ static int sp_521_div_21(const sp_digit* a, const sp_digit* d,
const sp_digit* m, sp_digit* r)
{
int i;
sp_digit dv;
sp_digit r1;
sp_digit mask;
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* t1 = NULL;
#else
sp_digit t1[3 * 21 + 1];
sp_digit t1[4 * 21 + 3];
#endif
sp_digit* t2 = NULL;
sp_digit* sd = NULL;
int err = MP_OKAY;
(void)m;
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 21 + 1), NULL,
t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 21 + 3), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (t1 == NULL)
err = MEMORY_E;
#endif
(void)m;
if (err == MP_OKAY) {
t2 = t1 + 2 * 21;
t2 = t1 + 42 + 1;
sd = t2 + 21 + 1;
dv = d[20];
XMEMCPY(t1, a, sizeof(*t1) * 2U * 21U);
sp_521_mul_d_21(sd, d, (sp_digit)1 << 4);
sp_521_lshift_42(t1, a, 4);
t1[21 + 21] += t1[21 + 21 - 1] >> 25;
t1[21 + 21 - 1] &= 0x1ffffff;
for (i=20; i>=0; i--) {
t1[21 + i] += t1[21 + i - 1] >> 25;
t1[21 + i - 1] &= 0x1ffffff;
r1 = sp_521_div_word_21(t1[21 + i], t1[21 + i - 1], dv);
sp_521_mul_d_21(t2, d, r1);
r1 = t1[21 + i];
sp_521_mul_d_21(t2, sd, r1);
(void)sp_521_sub_21(&t1[i], &t1[i], t2);
sp_521_norm_21(&t1[i]);
t1[21 + i] -= t2[21];
t1[21 + i] += t1[21 + i - 1] >> 25;
t1[21 + i - 1] &= 0x1ffffff;
r1 = sp_521_div_word_21(-t1[21 + i], -t1[21 + i - 1], dv);
r1++;
sp_521_mul_d_21(t2, d, r1);
(void)sp_521_add_21(&t1[i], &t1[i], t2);
t1[21 + i] += t1[21 + i - 1] >> 25;
t1[21 + i - 1] &= 0x1ffffff;
}
t1[21 - 1] += t1[21 - 2] >> 25;
t1[21 - 2] &= 0x1ffffff;
r1 = t1[21 - 1] / dv;
sp_521_norm_21(&t1[i + 1]);
sp_521_mul_d_21(t2, d, r1);
(void)sp_521_sub_21(t1, t1, t2);
XMEMCPY(r, t1, sizeof(*r) * 42U);
for (i=0; i<20; i++) {
r[i+1] += r[i] >> 25;
r[i] &= 0x1ffffff;
mask = ~((t1[21 + i] - 1) >> 31);
sp_521_cond_sub_21(t1 + i, t1 + i, sd, mask);
sp_521_norm_21(&t1[i + 1]);
}
sp_521_cond_add_21(r, r, d, r[20] >> 31);
sp_521_norm_21(t1);
sp_521_rshift_21(r, t1, 4);
}
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
@ -43741,7 +43696,7 @@ static WC_INLINE sp_digit sp_1024_div_word_42(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r);
for (i = 23; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 24);
t1 += t1 + (((sp_uint32)t0 >> 24) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2;
@ -43755,11 +43710,6 @@ static WC_INLINE sp_digit sp_1024_div_word_42(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div);
r += (m >> 50) - (sp_digit)(d >> 50);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 25);
m = d - ((sp_int64)r * div);
r += (m >> 50) - (sp_digit)(d >> 50);
m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign;

View File

@ -857,7 +857,7 @@ static WC_INLINE sp_digit sp_2048_div_word_17(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r);
for (i = 59; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 60);
t1 += t1 + (((sp_uint64)t0 >> 60) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2;
@ -871,11 +871,6 @@ static WC_INLINE sp_digit sp_2048_div_word_17(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div);
r += (m >> 122) - (sp_digit)(d >> 122);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 61);
m = d - ((sp_int128)r * div);
r += (m >> 122) - (sp_digit)(d >> 122);
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
@ -1687,7 +1682,7 @@ static WC_INLINE sp_digit sp_2048_div_word_34(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r);
for (i = 59; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 60);
t1 += t1 + (((sp_uint64)t0 >> 60) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2;
@ -1701,11 +1696,6 @@ static WC_INLINE sp_digit sp_2048_div_word_34(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div);
r += (m >> 122) - (sp_digit)(d >> 122);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 61);
m = d - ((sp_int128)r * div);
r += (m >> 122) - (sp_digit)(d >> 122);
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
@ -4391,7 +4381,7 @@ static WC_INLINE sp_digit sp_2048_div_word_18(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r);
for (i = 55; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 56);
t1 += t1 + (((sp_uint64)t0 >> 56) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2;
@ -4405,11 +4395,6 @@ static WC_INLINE sp_digit sp_2048_div_word_18(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 57);
m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
@ -5282,7 +5267,7 @@ static WC_INLINE sp_digit sp_2048_div_word_36(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r);
for (i = 55; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 56);
t1 += t1 + (((sp_uint64)t0 >> 56) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2;
@ -5296,11 +5281,6 @@ static WC_INLINE sp_digit sp_2048_div_word_36(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 57);
m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
@ -7740,7 +7720,7 @@ static WC_INLINE sp_digit sp_3072_div_word_26(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r);
for (i = 58; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 59);
t1 += t1 + (((sp_uint64)t0 >> 59) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2;
@ -7754,11 +7734,6 @@ static WC_INLINE sp_digit sp_3072_div_word_26(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div);
r += (m >> 120) - (sp_digit)(d >> 120);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 60);
m = d - ((sp_int128)r * div);
r += (m >> 120) - (sp_digit)(d >> 120);
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
@ -8576,7 +8551,7 @@ static WC_INLINE sp_digit sp_3072_div_word_52(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r);
for (i = 58; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 59);
t1 += t1 + (((sp_uint64)t0 >> 59) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2;
@ -8590,11 +8565,6 @@ static WC_INLINE sp_digit sp_3072_div_word_52(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div);
r += (m >> 120) - (sp_digit)(d >> 120);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 60);
m = d - ((sp_int128)r * div);
r += (m >> 120) - (sp_digit)(d >> 120);
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
@ -11418,7 +11388,7 @@ static WC_INLINE sp_digit sp_3072_div_word_27(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r);
for (i = 55; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 56);
t1 += t1 + (((sp_uint64)t0 >> 56) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2;
@ -11432,11 +11402,6 @@ static WC_INLINE sp_digit sp_3072_div_word_27(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 57);
m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
@ -12320,7 +12285,7 @@ static WC_INLINE sp_digit sp_3072_div_word_54(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r);
for (i = 55; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 56);
t1 += t1 + (((sp_uint64)t0 >> 56) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2;
@ -12334,11 +12299,6 @@ static WC_INLINE sp_digit sp_3072_div_word_54(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 57);
m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
@ -14820,7 +14780,7 @@ static WC_INLINE sp_digit sp_4096_div_word_35(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r);
for (i = 57; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 58);
t1 += t1 + (((sp_uint64)t0 >> 58) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2;
@ -14834,11 +14794,6 @@ static WC_INLINE sp_digit sp_4096_div_word_35(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div);
r += (m >> 118) - (sp_digit)(d >> 118);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 59);
m = d - ((sp_int128)r * div);
r += (m >> 118) - (sp_digit)(d >> 118);
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
@ -15651,7 +15606,7 @@ static WC_INLINE sp_digit sp_4096_div_word_70(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r);
for (i = 57; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 58);
t1 += t1 + (((sp_uint64)t0 >> 58) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2;
@ -15665,11 +15620,6 @@ static WC_INLINE sp_digit sp_4096_div_word_70(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div);
r += (m >> 118) - (sp_digit)(d >> 118);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 59);
m = d - ((sp_int128)r * div);
r += (m >> 118) - (sp_digit)(d >> 118);
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
@ -18548,7 +18498,7 @@ static WC_INLINE sp_digit sp_4096_div_word_39(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r);
for (i = 51; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 52);
t1 += t1 + (((sp_uint64)t0 >> 52) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2;
@ -18562,11 +18512,6 @@ static WC_INLINE sp_digit sp_4096_div_word_39(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div);
r += (m >> 106) - (sp_digit)(d >> 106);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 53);
m = d - ((sp_int128)r * div);
r += (m >> 106) - (sp_digit)(d >> 106);
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
@ -19451,7 +19396,7 @@ static WC_INLINE sp_digit sp_4096_div_word_78(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r);
for (i = 51; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 52);
t1 += t1 + (((sp_uint64)t0 >> 52) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2;
@ -19465,11 +19410,6 @@ static WC_INLINE sp_digit sp_4096_div_word_78(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div);
r += (m >> 106) - (sp_digit)(d >> 106);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 53);
m = d - ((sp_int128)r * div);
r += (m >> 106) - (sp_digit)(d >> 106);
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
@ -40705,96 +40645,64 @@ SP_NOINLINE static void sp_521_mul_d_9(sp_digit* r, const sp_digit* a,
#endif /* WOLFSSL_SP_SMALL */
}
static WC_INLINE sp_digit sp_521_div_word_9(sp_digit d1, sp_digit d0,
sp_digit div)
SP_NOINLINE static void sp_521_lshift_18(sp_digit* r, const sp_digit* a,
byte n)
{
#ifdef SP_USE_DIVTI3
sp_int128 d = ((sp_int128)d1 << 58) + d0;
return d / div;
#elif defined(__x86_64__) || defined(__i386__)
sp_int128 d = ((sp_int128)d1 << 58) + d0;
sp_uint64 lo = (sp_uint64)d;
sp_digit hi = (sp_digit)(d >> 64);
__asm__ __volatile__ (
"idiv %2"
: "+a" (lo)
: "d" (hi), "r" (div)
: "cc"
);
return (sp_digit)lo;
#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV)
sp_int128 d = ((sp_int128)d1 << 58) + d0;
sp_digit dv = (div >> 1) + 1;
sp_digit t1 = (sp_digit)(d >> 58);
sp_digit t0 = (sp_digit)(d & 0x3ffffffffffffffL);
sp_digit t2;
sp_digit sign;
sp_digit r;
#ifdef WOLFSSL_SP_SMALL
int i;
sp_int128 m;
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r);
for (i = 56; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 57);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2;
t1 -= dv & (0 - t2);
t1 += t2;
r[18] = a[17] >> (58 - n);
for (i=17; i>0; i--) {
r[i] = ((a[i] << n) | (a[i-1] >> (58 - n))) & 0x3ffffffffffffffL;
}
r += r + 1;
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 58);
m = d - ((sp_int128)r * div);
r += (m >> 116) - (sp_digit)(d >> 116);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 58);
m = d - ((sp_int128)r * div);
r += (m >> 116) - (sp_digit)(d >> 116);
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63);
r += sign * t2;
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63);
r += sign * t2;
return r;
#else
sp_int128 d = ((sp_int128)d1 << 58) + d0;
sp_digit r = 0;
sp_digit t;
sp_digit dv = (div >> 27) + 1;
sp_int_digit s;
sp_int_digit t;
t = (sp_digit)(d >> 54);
t = (t / dv) << 27;
r += t;
d -= (sp_int128)t * div;
t = (sp_digit)(d >> 23);
t = t / (dv << 4);
r += t;
d -= (sp_int128)t * div;
t = (sp_digit)d;
t = t / div;
r += t;
d -= (sp_int128)t * div;
return r;
#endif
s = (sp_int_digit)a[17];
r[18] = s >> (58U - n);
s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
r[17] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
r[16] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
r[15] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
r[14] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
r[13] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
r[12] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
r[11] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
r[10] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
r[9] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
r[8] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
r[7] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
r[6] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
r[5] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
r[4] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
r[3] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
r[2] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
r[1] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
#endif /* WOLFSSL_SP_SMALL */
r[0] = (a[0] << n) & 0x3ffffffffffffffL;
}
/* Divide d in a and put remainder into r (m*d + r = a)
* m is not calculated as it is not needed at this time.
*
* Large number of bits in last word.
* Simplified based on top word of divisor being (1 << 58) - 1
*
* a Number to be divided.
* d Number to divide with.
@ -40806,60 +40714,49 @@ static int sp_521_div_9(const sp_digit* a, const sp_digit* d,
const sp_digit* m, sp_digit* r)
{
int i;
sp_digit dv;
sp_digit r1;
sp_digit mask;
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* t1 = NULL;
#else
sp_digit t1[3 * 9 + 1];
sp_digit t1[4 * 9 + 3];
#endif
sp_digit* t2 = NULL;
sp_digit* sd = NULL;
int err = MP_OKAY;
(void)m;
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 9 + 1), NULL,
t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 9 + 3), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (t1 == NULL)
err = MEMORY_E;
#endif
(void)m;
if (err == MP_OKAY) {
t2 = t1 + 2 * 9;
t2 = t1 + 18 + 1;
sd = t2 + 9 + 1;
dv = d[8];
XMEMCPY(t1, a, sizeof(*t1) * 2U * 9U);
sp_521_mul_d_9(sd, d, (sp_digit)1 << 1);
sp_521_lshift_18(t1, a, 1);
t1[9 + 9] += t1[9 + 9 - 1] >> 58;
t1[9 + 9 - 1] &= 0x3ffffffffffffffL;
for (i=8; i>=0; i--) {
t1[9 + i] += t1[9 + i - 1] >> 58;
t1[9 + i - 1] &= 0x3ffffffffffffffL;
r1 = sp_521_div_word_9(t1[9 + i], t1[9 + i - 1], dv);
sp_521_mul_d_9(t2, d, r1);
r1 = t1[9 + i];
sp_521_mul_d_9(t2, sd, r1);
(void)sp_521_sub_9(&t1[i], &t1[i], t2);
sp_521_norm_9(&t1[i]);
t1[9 + i] -= t2[9];
t1[9 + i] += t1[9 + i - 1] >> 58;
t1[9 + i - 1] &= 0x3ffffffffffffffL;
r1 = sp_521_div_word_9(-t1[9 + i], -t1[9 + i - 1], dv);
r1++;
sp_521_mul_d_9(t2, d, r1);
(void)sp_521_add_9(&t1[i], &t1[i], t2);
t1[9 + i] += t1[9 + i - 1] >> 58;
t1[9 + i - 1] &= 0x3ffffffffffffffL;
}
t1[9 - 1] += t1[9 - 2] >> 58;
t1[9 - 2] &= 0x3ffffffffffffffL;
r1 = t1[9 - 1] / dv;
sp_521_norm_9(&t1[i + 1]);
sp_521_mul_d_9(t2, d, r1);
(void)sp_521_sub_9(t1, t1, t2);
XMEMCPY(r, t1, sizeof(*r) * 18U);
for (i=0; i<8; i++) {
r[i+1] += r[i] >> 58;
r[i] &= 0x3ffffffffffffffL;
mask = ~((t1[9 + i] - 1) >> 63);
sp_521_cond_sub_9(t1 + i, t1 + i, sd, mask);
sp_521_norm_9(&t1[i + 1]);
}
sp_521_cond_add_9(r, r, d, r[8] >> 63);
sp_521_norm_9(t1);
sp_521_rshift_9(r, t1, 1);
}
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
@ -43261,7 +43158,7 @@ static WC_INLINE sp_digit sp_1024_div_word_18(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r);
for (i = 55; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 56);
t1 += t1 + (((sp_uint64)t0 >> 56) & 1);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2;
@ -43275,11 +43172,6 @@ static WC_INLINE sp_digit sp_1024_div_word_18(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 57);
m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;

View File

@ -12527,7 +12527,7 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
u += sp_4096_add_128(r + 64, r + 64, z1);
XMEMSET(a1 + 1, 0, sizeof(sp_digit) * (64 - 1));
a1[0] = u;
(void)sp_4096_add_64(r + 192, r + 192, a1);
(void)sp_2048_add_64(r + 192, r + 192, a1);
}
/* Square a and put result in r. (r = a * a)