Merge pull request #2642 from SparkiDev/sp_exptmod

sp_int: support for more values in sp_exptmod
pull/2682/head
toddouska 2019-12-17 16:36:12 -08:00 committed by GitHub
commit 06563ed3fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 357 additions and 248 deletions

View File

@ -1424,30 +1424,30 @@ SP_NOINLINE static void sp_2048_mul_add_45(sp_digit* r, const sp_digit* a,
int64_t t[8]; int64_t t[8];
int i; int i;
t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff);
for (i = 0; i < 40; i += 8) { for (i = 0; i < 40; i += 8) {
t[1] = tb * a[i+1]; t[1] = tb * a[i+1];
r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
t[2] = tb * a[i+2]; t[2] = tb * a[i+2];
r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
t[3] = tb * a[i+3]; t[3] = tb * a[i+3];
r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
t[4] = tb * a[i+4]; t[4] = tb * a[i+4];
r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
t[5] = tb * a[i+5]; t[5] = tb * a[i+5];
r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff));
t[6] = tb * a[i+6]; t[6] = tb * a[i+6];
r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff));
t[7] = tb * a[i+7]; t[7] = tb * a[i+7];
r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff));
t[0] = tb * a[i+8]; t[0] = tb * a[i+8];
r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff));
} }
t[1] = tb * a[41]; r[41] += (t[0] >> 23) + (t[1] & 0x7fffff); t[1] = tb * a[41]; r[41] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
t[2] = tb * a[42]; r[42] += (t[1] >> 23) + (t[2] & 0x7fffff); t[2] = tb * a[42]; r[42] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
t[3] = tb * a[43]; r[43] += (t[2] >> 23) + (t[3] & 0x7fffff); t[3] = tb * a[43]; r[43] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
t[4] = tb * a[44]; r[44] += (t[3] >> 23) + (t[4] & 0x7fffff); t[4] = tb * a[44]; r[44] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
r[45] += t[4] >> 23; r[45] += (sp_digit)(t[4] >> 23);
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }
@ -1547,6 +1547,8 @@ static void sp_2048_mont_reduce_45(sp_digit* a, const sp_digit* m, sp_digit mp)
int i; int i;
sp_digit mu; sp_digit mu;
sp_2048_norm_45(a + 45);
for (i=0; i<44; i++) { for (i=0; i<44; i++) {
mu = (a[i] * mp) & 0x7fffff; mu = (a[i] * mp) & 0x7fffff;
sp_2048_mul_add_45(a+i, m, mu); sp_2048_mul_add_45(a+i, m, mu);
@ -2328,27 +2330,27 @@ SP_NOINLINE static void sp_2048_mul_add_90(sp_digit* r, const sp_digit* a,
int64_t t[8]; int64_t t[8];
int i; int i;
t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff);
for (i = 0; i < 88; i += 8) { for (i = 0; i < 88; i += 8) {
t[1] = tb * a[i+1]; t[1] = tb * a[i+1];
r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
t[2] = tb * a[i+2]; t[2] = tb * a[i+2];
r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
t[3] = tb * a[i+3]; t[3] = tb * a[i+3];
r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
t[4] = tb * a[i+4]; t[4] = tb * a[i+4];
r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
t[5] = tb * a[i+5]; t[5] = tb * a[i+5];
r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff));
t[6] = tb * a[i+6]; t[6] = tb * a[i+6];
r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff));
t[7] = tb * a[i+7]; t[7] = tb * a[i+7];
r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff));
t[0] = tb * a[i+8]; t[0] = tb * a[i+8];
r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff));
} }
t[1] = tb * a[89]; r[89] += (t[0] >> 23) + (t[1] & 0x7fffff); t[1] = tb * a[89]; r[89] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
r[90] += t[1] >> 23; r[90] += (sp_digit)(t[1] >> 23);
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }
@ -2439,6 +2441,8 @@ static void sp_2048_mont_reduce_90(sp_digit* a, const sp_digit* m, sp_digit mp)
int i; int i;
sp_digit mu; sp_digit mu;
sp_2048_norm_90(a + 90);
#ifdef WOLFSSL_SP_DH #ifdef WOLFSSL_SP_DH
if (mp != 1) { if (mp != 1) {
for (i=0; i<89; i++) { for (i=0; i<89; i++) {
@ -5294,28 +5298,28 @@ SP_NOINLINE static void sp_3072_mul_add_67(sp_digit* r, const sp_digit* a,
int64_t t[8]; int64_t t[8];
int i; int i;
t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff);
for (i = 0; i < 64; i += 8) { for (i = 0; i < 64; i += 8) {
t[1] = tb * a[i+1]; t[1] = tb * a[i+1];
r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
t[2] = tb * a[i+2]; t[2] = tb * a[i+2];
r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
t[3] = tb * a[i+3]; t[3] = tb * a[i+3];
r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
t[4] = tb * a[i+4]; t[4] = tb * a[i+4];
r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
t[5] = tb * a[i+5]; t[5] = tb * a[i+5];
r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff));
t[6] = tb * a[i+6]; t[6] = tb * a[i+6];
r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff));
t[7] = tb * a[i+7]; t[7] = tb * a[i+7];
r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff));
t[0] = tb * a[i+8]; t[0] = tb * a[i+8];
r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff));
} }
t[1] = tb * a[65]; r[65] += (t[0] >> 23) + (t[1] & 0x7fffff); t[1] = tb * a[65]; r[65] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
t[2] = tb * a[66]; r[66] += (t[1] >> 23) + (t[2] & 0x7fffff); t[2] = tb * a[66]; r[66] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
r[67] += t[2] >> 23; r[67] += (sp_digit)(t[2] >> 23);
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }
@ -5415,6 +5419,8 @@ static void sp_3072_mont_reduce_67(sp_digit* a, const sp_digit* m, sp_digit mp)
int i; int i;
sp_digit mu; sp_digit mu;
sp_3072_norm_67(a + 67);
for (i=0; i<66; i++) { for (i=0; i<66; i++) {
mu = (a[i] * mp) & 0x7fffff; mu = (a[i] * mp) & 0x7fffff;
sp_3072_mul_add_67(a+i, m, mu); sp_3072_mul_add_67(a+i, m, mu);
@ -6166,31 +6172,31 @@ SP_NOINLINE static void sp_3072_mul_add_134(sp_digit* r, const sp_digit* a,
int64_t t[8]; int64_t t[8];
int i; int i;
t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff);
for (i = 0; i < 128; i += 8) { for (i = 0; i < 128; i += 8) {
t[1] = tb * a[i+1]; t[1] = tb * a[i+1];
r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
t[2] = tb * a[i+2]; t[2] = tb * a[i+2];
r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
t[3] = tb * a[i+3]; t[3] = tb * a[i+3];
r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
t[4] = tb * a[i+4]; t[4] = tb * a[i+4];
r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
t[5] = tb * a[i+5]; t[5] = tb * a[i+5];
r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff));
t[6] = tb * a[i+6]; t[6] = tb * a[i+6];
r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff));
t[7] = tb * a[i+7]; t[7] = tb * a[i+7];
r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff));
t[0] = tb * a[i+8]; t[0] = tb * a[i+8];
r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff));
} }
t[1] = tb * a[129]; r[129] += (t[0] >> 23) + (t[1] & 0x7fffff); t[1] = tb * a[129]; r[129] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff));
t[2] = tb * a[130]; r[130] += (t[1] >> 23) + (t[2] & 0x7fffff); t[2] = tb * a[130]; r[130] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff));
t[3] = tb * a[131]; r[131] += (t[2] >> 23) + (t[3] & 0x7fffff); t[3] = tb * a[131]; r[131] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff));
t[4] = tb * a[132]; r[132] += (t[3] >> 23) + (t[4] & 0x7fffff); t[4] = tb * a[132]; r[132] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff));
t[5] = tb * a[133]; r[133] += (t[4] >> 23) + (t[5] & 0x7fffff); t[5] = tb * a[133]; r[133] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff));
r[134] += t[5] >> 23; r[134] += (sp_digit)(t[5] >> 23);
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }
@ -6293,6 +6299,8 @@ static void sp_3072_mont_reduce_134(sp_digit* a, const sp_digit* m, sp_digit mp)
int i; int i;
sp_digit mu; sp_digit mu;
sp_3072_norm_134(a + 134);
#ifdef WOLFSSL_SP_DH #ifdef WOLFSSL_SP_DH
if (mp != 1) { if (mp != 1) {
for (i=0; i<133; i++) { for (i=0; i<133; i++) {
@ -9298,27 +9306,27 @@ SP_NOINLINE static void sp_4096_mul_add_98(sp_digit* r, const sp_digit* a,
int64_t t[8]; int64_t t[8];
int i; int i;
t[0] = tb * a[0]; r[0] += t[0] & 0x1fffff; t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffff);
for (i = 0; i < 96; i += 8) { for (i = 0; i < 96; i += 8) {
t[1] = tb * a[i+1]; t[1] = tb * a[i+1];
r[i+1] += (t[0] >> 21) + (t[1] & 0x1fffff); r[i+1] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff));
t[2] = tb * a[i+2]; t[2] = tb * a[i+2];
r[i+2] += (t[1] >> 21) + (t[2] & 0x1fffff); r[i+2] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff));
t[3] = tb * a[i+3]; t[3] = tb * a[i+3];
r[i+3] += (t[2] >> 21) + (t[3] & 0x1fffff); r[i+3] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff));
t[4] = tb * a[i+4]; t[4] = tb * a[i+4];
r[i+4] += (t[3] >> 21) + (t[4] & 0x1fffff); r[i+4] += (sp_digit)((t[3] >> 21) + (t[4] & 0x1fffff));
t[5] = tb * a[i+5]; t[5] = tb * a[i+5];
r[i+5] += (t[4] >> 21) + (t[5] & 0x1fffff); r[i+5] += (sp_digit)((t[4] >> 21) + (t[5] & 0x1fffff));
t[6] = tb * a[i+6]; t[6] = tb * a[i+6];
r[i+6] += (t[5] >> 21) + (t[6] & 0x1fffff); r[i+6] += (sp_digit)((t[5] >> 21) + (t[6] & 0x1fffff));
t[7] = tb * a[i+7]; t[7] = tb * a[i+7];
r[i+7] += (t[6] >> 21) + (t[7] & 0x1fffff); r[i+7] += (sp_digit)((t[6] >> 21) + (t[7] & 0x1fffff));
t[0] = tb * a[i+8]; t[0] = tb * a[i+8];
r[i+8] += (t[7] >> 21) + (t[0] & 0x1fffff); r[i+8] += (sp_digit)((t[7] >> 21) + (t[0] & 0x1fffff));
} }
t[1] = tb * a[97]; r[97] += (t[0] >> 21) + (t[1] & 0x1fffff); t[1] = tb * a[97]; r[97] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff));
r[98] += t[1] >> 21; r[98] += (sp_digit)(t[1] >> 21);
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }
@ -9409,6 +9417,8 @@ static void sp_4096_mont_reduce_98(sp_digit* a, const sp_digit* m, sp_digit mp)
int i; int i;
sp_digit mu; sp_digit mu;
sp_4096_norm_98(a + 98);
for (i=0; i<97; i++) { for (i=0; i<97; i++) {
mu = (a[i] * mp) & 0x1fffff; mu = (a[i] * mp) & 0x1fffff;
sp_4096_mul_add_98(a+i, m, mu); sp_4096_mul_add_98(a+i, m, mu);
@ -10204,29 +10214,29 @@ SP_NOINLINE static void sp_4096_mul_add_196(sp_digit* r, const sp_digit* a,
int64_t t[8]; int64_t t[8];
int i; int i;
t[0] = tb * a[0]; r[0] += t[0] & 0x1fffff; t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffff);
for (i = 0; i < 192; i += 8) { for (i = 0; i < 192; i += 8) {
t[1] = tb * a[i+1]; t[1] = tb * a[i+1];
r[i+1] += (t[0] >> 21) + (t[1] & 0x1fffff); r[i+1] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff));
t[2] = tb * a[i+2]; t[2] = tb * a[i+2];
r[i+2] += (t[1] >> 21) + (t[2] & 0x1fffff); r[i+2] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff));
t[3] = tb * a[i+3]; t[3] = tb * a[i+3];
r[i+3] += (t[2] >> 21) + (t[3] & 0x1fffff); r[i+3] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff));
t[4] = tb * a[i+4]; t[4] = tb * a[i+4];
r[i+4] += (t[3] >> 21) + (t[4] & 0x1fffff); r[i+4] += (sp_digit)((t[3] >> 21) + (t[4] & 0x1fffff));
t[5] = tb * a[i+5]; t[5] = tb * a[i+5];
r[i+5] += (t[4] >> 21) + (t[5] & 0x1fffff); r[i+5] += (sp_digit)((t[4] >> 21) + (t[5] & 0x1fffff));
t[6] = tb * a[i+6]; t[6] = tb * a[i+6];
r[i+6] += (t[5] >> 21) + (t[6] & 0x1fffff); r[i+6] += (sp_digit)((t[5] >> 21) + (t[6] & 0x1fffff));
t[7] = tb * a[i+7]; t[7] = tb * a[i+7];
r[i+7] += (t[6] >> 21) + (t[7] & 0x1fffff); r[i+7] += (sp_digit)((t[6] >> 21) + (t[7] & 0x1fffff));
t[0] = tb * a[i+8]; t[0] = tb * a[i+8];
r[i+8] += (t[7] >> 21) + (t[0] & 0x1fffff); r[i+8] += (sp_digit)((t[7] >> 21) + (t[0] & 0x1fffff));
} }
t[1] = tb * a[193]; r[193] += (t[0] >> 21) + (t[1] & 0x1fffff); t[1] = tb * a[193]; r[193] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff));
t[2] = tb * a[194]; r[194] += (t[1] >> 21) + (t[2] & 0x1fffff); t[2] = tb * a[194]; r[194] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff));
t[3] = tb * a[195]; r[195] += (t[2] >> 21) + (t[3] & 0x1fffff); t[3] = tb * a[195]; r[195] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff));
r[196] += t[3] >> 21; r[196] += (sp_digit)(t[3] >> 21);
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }
@ -10323,6 +10333,8 @@ static void sp_4096_mont_reduce_196(sp_digit* a, const sp_digit* m, sp_digit mp)
int i; int i;
sp_digit mu; sp_digit mu;
sp_4096_norm_196(a + 196);
#ifdef WOLFSSL_SP_DH #ifdef WOLFSSL_SP_DH
if (mp != 1) { if (mp != 1) {
for (i=0; i<195; i++) { for (i=0; i<195; i++) {
@ -12998,16 +13010,16 @@ SP_NOINLINE static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a,
t[ 7] = tb * a[ 7]; t[ 7] = tb * a[ 7];
t[ 8] = tb * a[ 8]; t[ 8] = tb * a[ 8];
t[ 9] = tb * a[ 9]; t[ 9] = tb * a[ 9];
r[ 0] += (t[ 0] & 0x3ffffff); r[ 0] += (sp_digit)(t[ 0] & 0x3ffffff);
r[ 1] += (t[ 0] >> 26) + (t[ 1] & 0x3ffffff); r[ 1] += (sp_digit)((t[ 0] >> 26) + (t[ 1] & 0x3ffffff));
r[ 2] += (t[ 1] >> 26) + (t[ 2] & 0x3ffffff); r[ 2] += (sp_digit)((t[ 1] >> 26) + (t[ 2] & 0x3ffffff));
r[ 3] += (t[ 2] >> 26) + (t[ 3] & 0x3ffffff); r[ 3] += (sp_digit)((t[ 2] >> 26) + (t[ 3] & 0x3ffffff));
r[ 4] += (t[ 3] >> 26) + (t[ 4] & 0x3ffffff); r[ 4] += (sp_digit)((t[ 3] >> 26) + (t[ 4] & 0x3ffffff));
r[ 5] += (t[ 4] >> 26) + (t[ 5] & 0x3ffffff); r[ 5] += (sp_digit)((t[ 4] >> 26) + (t[ 5] & 0x3ffffff));
r[ 6] += (t[ 5] >> 26) + (t[ 6] & 0x3ffffff); r[ 6] += (sp_digit)((t[ 5] >> 26) + (t[ 6] & 0x3ffffff));
r[ 7] += (t[ 6] >> 26) + (t[ 7] & 0x3ffffff); r[ 7] += (sp_digit)((t[ 6] >> 26) + (t[ 7] & 0x3ffffff));
r[ 8] += (t[ 7] >> 26) + (t[ 8] & 0x3ffffff); r[ 8] += (sp_digit)((t[ 7] >> 26) + (t[ 8] & 0x3ffffff));
r[ 9] += (t[ 8] >> 26) + (t[ 9] & 0x3ffffff); r[ 9] += (sp_digit)((t[ 8] >> 26) + (t[ 9] & 0x3ffffff));
r[10] += t[ 9] >> 26; r[10] += t[ 9] >> 26;
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }

View File

@ -1058,27 +1058,27 @@ SP_NOINLINE static void sp_2048_mul_add_18(sp_digit* r, const sp_digit* a,
int128_t t[8]; int128_t t[8];
int i; int i;
t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffL; t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL);
for (i = 0; i < 16; i += 8) { for (i = 0; i < 16; i += 8) {
t[1] = tb * a[i+1]; t[1] = tb * a[i+1];
r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
t[2] = tb * a[i+2]; t[2] = tb * a[i+2];
r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
t[3] = tb * a[i+3]; t[3] = tb * a[i+3];
r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
t[4] = tb * a[i+4]; t[4] = tb * a[i+4];
r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL));
t[5] = tb * a[i+5]; t[5] = tb * a[i+5];
r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL));
t[6] = tb * a[i+6]; t[6] = tb * a[i+6];
r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL));
t[7] = tb * a[i+7]; t[7] = tb * a[i+7];
r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL));
t[0] = tb * a[i+8]; t[0] = tb * a[i+8];
r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL));
} }
t[1] = tb * a[17]; r[17] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); t[1] = tb * a[17]; r[17] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
r[18] += t[1] >> 57; r[18] += (sp_digit)(t[1] >> 57);
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }
@ -1164,6 +1164,8 @@ static void sp_2048_mont_reduce_18(sp_digit* a, const sp_digit* m, sp_digit mp)
int i; int i;
sp_digit mu; sp_digit mu;
sp_2048_norm_18(a + 18);
for (i=0; i<17; i++) { for (i=0; i<17; i++) {
mu = (a[i] * mp) & 0x1ffffffffffffffL; mu = (a[i] * mp) & 0x1ffffffffffffffL;
sp_2048_mul_add_18(a+i, m, mu); sp_2048_mul_add_18(a+i, m, mu);
@ -1935,29 +1937,29 @@ SP_NOINLINE static void sp_2048_mul_add_36(sp_digit* r, const sp_digit* a,
int128_t t[8]; int128_t t[8];
int i; int i;
t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffL; t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL);
for (i = 0; i < 32; i += 8) { for (i = 0; i < 32; i += 8) {
t[1] = tb * a[i+1]; t[1] = tb * a[i+1];
r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
t[2] = tb * a[i+2]; t[2] = tb * a[i+2];
r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
t[3] = tb * a[i+3]; t[3] = tb * a[i+3];
r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
t[4] = tb * a[i+4]; t[4] = tb * a[i+4];
r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL));
t[5] = tb * a[i+5]; t[5] = tb * a[i+5];
r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL));
t[6] = tb * a[i+6]; t[6] = tb * a[i+6];
r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL));
t[7] = tb * a[i+7]; t[7] = tb * a[i+7];
r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL));
t[0] = tb * a[i+8]; t[0] = tb * a[i+8];
r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL));
} }
t[1] = tb * a[33]; r[33] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); t[1] = tb * a[33]; r[33] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
t[2] = tb * a[34]; r[34] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); t[2] = tb * a[34]; r[34] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
t[3] = tb * a[35]; r[35] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); t[3] = tb * a[35]; r[35] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
r[36] += t[3] >> 57; r[36] += (sp_digit)(t[3] >> 57);
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }
@ -2061,6 +2063,8 @@ static void sp_2048_mont_reduce_36(sp_digit* a, const sp_digit* m, sp_digit mp)
int i; int i;
sp_digit mu; sp_digit mu;
sp_2048_norm_36(a + 36);
#ifdef WOLFSSL_SP_DH #ifdef WOLFSSL_SP_DH
if (mp != 1) { if (mp != 1) {
for (i=0; i<35; i++) { for (i=0; i<35; i++) {
@ -5148,28 +5152,28 @@ SP_NOINLINE static void sp_3072_mul_add_27(sp_digit* r, const sp_digit* a,
int128_t t[8]; int128_t t[8];
int i; int i;
t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffL; t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL);
for (i = 0; i < 24; i += 8) { for (i = 0; i < 24; i += 8) {
t[1] = tb * a[i+1]; t[1] = tb * a[i+1];
r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
t[2] = tb * a[i+2]; t[2] = tb * a[i+2];
r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
t[3] = tb * a[i+3]; t[3] = tb * a[i+3];
r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
t[4] = tb * a[i+4]; t[4] = tb * a[i+4];
r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL));
t[5] = tb * a[i+5]; t[5] = tb * a[i+5];
r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL));
t[6] = tb * a[i+6]; t[6] = tb * a[i+6];
r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL));
t[7] = tb * a[i+7]; t[7] = tb * a[i+7];
r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL));
t[0] = tb * a[i+8]; t[0] = tb * a[i+8];
r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL));
} }
t[1] = tb * a[25]; r[25] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); t[1] = tb * a[25]; r[25] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
t[2] = tb * a[26]; r[26] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); t[2] = tb * a[26]; r[26] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
r[27] += t[2] >> 57; r[27] += (sp_digit)(t[2] >> 57);
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }
@ -5269,6 +5273,8 @@ static void sp_3072_mont_reduce_27(sp_digit* a, const sp_digit* m, sp_digit mp)
int i; int i;
sp_digit mu; sp_digit mu;
sp_3072_norm_27(a + 27);
for (i=0; i<26; i++) { for (i=0; i<26; i++) {
mu = (a[i] * mp) & 0x1ffffffffffffffL; mu = (a[i] * mp) & 0x1ffffffffffffffL;
sp_3072_mul_add_27(a+i, m, mu); sp_3072_mul_add_27(a+i, m, mu);
@ -6010,31 +6016,31 @@ SP_NOINLINE static void sp_3072_mul_add_54(sp_digit* r, const sp_digit* a,
int128_t t[8]; int128_t t[8];
int i; int i;
t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffL; t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL);
for (i = 0; i < 48; i += 8) { for (i = 0; i < 48; i += 8) {
t[1] = tb * a[i+1]; t[1] = tb * a[i+1];
r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
t[2] = tb * a[i+2]; t[2] = tb * a[i+2];
r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
t[3] = tb * a[i+3]; t[3] = tb * a[i+3];
r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
t[4] = tb * a[i+4]; t[4] = tb * a[i+4];
r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL));
t[5] = tb * a[i+5]; t[5] = tb * a[i+5];
r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL));
t[6] = tb * a[i+6]; t[6] = tb * a[i+6];
r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL));
t[7] = tb * a[i+7]; t[7] = tb * a[i+7];
r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL));
t[0] = tb * a[i+8]; t[0] = tb * a[i+8];
r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL));
} }
t[1] = tb * a[49]; r[49] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); t[1] = tb * a[49]; r[49] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL));
t[2] = tb * a[50]; r[50] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); t[2] = tb * a[50]; r[50] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL));
t[3] = tb * a[51]; r[51] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); t[3] = tb * a[51]; r[51] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL));
t[4] = tb * a[52]; r[52] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); t[4] = tb * a[52]; r[52] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL));
t[5] = tb * a[53]; r[53] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); t[5] = tb * a[53]; r[53] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL));
r[54] += t[5] >> 57; r[54] += (sp_digit)(t[5] >> 57);
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }
@ -6137,6 +6143,8 @@ static void sp_3072_mont_reduce_54(sp_digit* a, const sp_digit* m, sp_digit mp)
int i; int i;
sp_digit mu; sp_digit mu;
sp_3072_norm_54(a + 54);
#ifdef WOLFSSL_SP_DH #ifdef WOLFSSL_SP_DH
if (mp != 1) { if (mp != 1) {
for (i=0; i<53; i++) { for (i=0; i<53; i++) {
@ -9284,32 +9292,32 @@ SP_NOINLINE static void sp_4096_mul_add_39(sp_digit* r, const sp_digit* a,
int128_t t[8]; int128_t t[8];
int i; int i;
t[0] = tb * a[0]; r[0] += t[0] & 0x1fffffffffffffL; t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffffffffffffL);
for (i = 0; i < 32; i += 8) { for (i = 0; i < 32; i += 8) {
t[1] = tb * a[i+1]; t[1] = tb * a[i+1];
r[i+1] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); r[i+1] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL));
t[2] = tb * a[i+2]; t[2] = tb * a[i+2];
r[i+2] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); r[i+2] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL));
t[3] = tb * a[i+3]; t[3] = tb * a[i+3];
r[i+3] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); r[i+3] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL));
t[4] = tb * a[i+4]; t[4] = tb * a[i+4];
r[i+4] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); r[i+4] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL));
t[5] = tb * a[i+5]; t[5] = tb * a[i+5];
r[i+5] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); r[i+5] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL));
t[6] = tb * a[i+6]; t[6] = tb * a[i+6];
r[i+6] += (t[5] >> 53) + (t[6] & 0x1fffffffffffffL); r[i+6] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL));
t[7] = tb * a[i+7]; t[7] = tb * a[i+7];
r[i+7] += (t[6] >> 53) + (t[7] & 0x1fffffffffffffL); r[i+7] += (sp_digit)((t[6] >> 53) + (t[7] & 0x1fffffffffffffL));
t[0] = tb * a[i+8]; t[0] = tb * a[i+8];
r[i+8] += (t[7] >> 53) + (t[0] & 0x1fffffffffffffL); r[i+8] += (sp_digit)((t[7] >> 53) + (t[0] & 0x1fffffffffffffL));
} }
t[1] = tb * a[33]; r[33] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); t[1] = tb * a[33]; r[33] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL));
t[2] = tb * a[34]; r[34] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); t[2] = tb * a[34]; r[34] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL));
t[3] = tb * a[35]; r[35] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); t[3] = tb * a[35]; r[35] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL));
t[4] = tb * a[36]; r[36] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); t[4] = tb * a[36]; r[36] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL));
t[5] = tb * a[37]; r[37] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); t[5] = tb * a[37]; r[37] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL));
t[6] = tb * a[38]; r[38] += (t[5] >> 53) + (t[6] & 0x1fffffffffffffL); t[6] = tb * a[38]; r[38] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL));
r[39] += t[6] >> 53; r[39] += (sp_digit)(t[6] >> 53);
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }
@ -9415,6 +9423,8 @@ static void sp_4096_mont_reduce_39(sp_digit* a, const sp_digit* m, sp_digit mp)
int i; int i;
sp_digit mu; sp_digit mu;
sp_4096_norm_39(a + 39);
for (i=0; i<38; i++) { for (i=0; i<38; i++) {
mu = (a[i] * mp) & 0x1fffffffffffffL; mu = (a[i] * mp) & 0x1fffffffffffffL;
sp_4096_mul_add_39(a+i, m, mu); sp_4096_mul_add_39(a+i, m, mu);
@ -10225,31 +10235,31 @@ SP_NOINLINE static void sp_4096_mul_add_78(sp_digit* r, const sp_digit* a,
int128_t t[8]; int128_t t[8];
int i; int i;
t[0] = tb * a[0]; r[0] += t[0] & 0x1fffffffffffffL; t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffffffffffffL);
for (i = 0; i < 72; i += 8) { for (i = 0; i < 72; i += 8) {
t[1] = tb * a[i+1]; t[1] = tb * a[i+1];
r[i+1] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); r[i+1] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL));
t[2] = tb * a[i+2]; t[2] = tb * a[i+2];
r[i+2] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); r[i+2] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL));
t[3] = tb * a[i+3]; t[3] = tb * a[i+3];
r[i+3] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); r[i+3] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL));
t[4] = tb * a[i+4]; t[4] = tb * a[i+4];
r[i+4] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); r[i+4] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL));
t[5] = tb * a[i+5]; t[5] = tb * a[i+5];
r[i+5] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); r[i+5] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL));
t[6] = tb * a[i+6]; t[6] = tb * a[i+6];
r[i+6] += (t[5] >> 53) + (t[6] & 0x1fffffffffffffL); r[i+6] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL));
t[7] = tb * a[i+7]; t[7] = tb * a[i+7];
r[i+7] += (t[6] >> 53) + (t[7] & 0x1fffffffffffffL); r[i+7] += (sp_digit)((t[6] >> 53) + (t[7] & 0x1fffffffffffffL));
t[0] = tb * a[i+8]; t[0] = tb * a[i+8];
r[i+8] += (t[7] >> 53) + (t[0] & 0x1fffffffffffffL); r[i+8] += (sp_digit)((t[7] >> 53) + (t[0] & 0x1fffffffffffffL));
} }
t[1] = tb * a[73]; r[73] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); t[1] = tb * a[73]; r[73] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL));
t[2] = tb * a[74]; r[74] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); t[2] = tb * a[74]; r[74] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL));
t[3] = tb * a[75]; r[75] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); t[3] = tb * a[75]; r[75] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL));
t[4] = tb * a[76]; r[76] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); t[4] = tb * a[76]; r[76] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL));
t[5] = tb * a[77]; r[77] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); t[5] = tb * a[77]; r[77] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL));
r[78] += t[5] >> 53; r[78] += (sp_digit)(t[5] >> 53);
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }
@ -10352,6 +10362,8 @@ static void sp_4096_mont_reduce_78(sp_digit* a, const sp_digit* m, sp_digit mp)
int i; int i;
sp_digit mu; sp_digit mu;
sp_4096_norm_78(a + 78);
#ifdef WOLFSSL_SP_DH #ifdef WOLFSSL_SP_DH
if (mp != 1) { if (mp != 1) {
for (i=0; i<77; i++) { for (i=0; i<77; i++) {
@ -12779,11 +12791,11 @@ SP_NOINLINE static void sp_256_mul_add_5(sp_digit* r, const sp_digit* a,
t[ 2] = tb * a[ 2]; t[ 2] = tb * a[ 2];
t[ 3] = tb * a[ 3]; t[ 3] = tb * a[ 3];
t[ 4] = tb * a[ 4]; t[ 4] = tb * a[ 4];
r[ 0] += (t[ 0] & 0xfffffffffffffL); r[ 0] += (sp_digit)(t[ 0] & 0xfffffffffffffL);
r[ 1] += (t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffL); r[ 1] += (sp_digit)((t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffL));
r[ 2] += (t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffL); r[ 2] += (sp_digit)((t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffL));
r[ 3] += (t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffL); r[ 3] += (sp_digit)((t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffL));
r[ 4] += (t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffL); r[ 4] += (sp_digit)((t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffL));
r[ 5] += t[ 4] >> 52; r[ 5] += t[ 4] >> 52;
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }

View File

@ -149,12 +149,19 @@ int sp_unsigned_bin_size(sp_int* a)
* a SP integer. * a SP integer.
* in Array of bytes. * in Array of bytes.
* inSz Number of data bytes in array. * inSz Number of data bytes in array.
* returns MP_OKAY always. * returns BAD_FUNC_ARG when the number is too big to fit in an SP and
MP_OKAY otherwise.
*/ */
int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz) int sp_read_unsigned_bin(sp_int* a, const byte* in, int inSz)
{ {
int err = MP_OKAY;
int i, j = 0, s = 0; int i, j = 0, s = 0;
if (inSz > SP_INT_DIGITS * (int)sizeof(a->dp[0])) {
err = MP_VAL;
}
if (err == MP_OKAY) {
a->dp[0] = 0; a->dp[0] = 0;
for (i = inSz-1; i >= 0; i--) { for (i = inSz-1; i >= 0; i--) {
a->dp[j] |= ((sp_int_digit)in[i]) << s; a->dp[j] |= ((sp_int_digit)in[i]) << s;
@ -174,14 +181,12 @@ int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz)
} }
a->used = j + 1; a->used = j + 1;
if (a->dp[j] == 0) sp_clamp(a);
a->used--;
for (j++; j < a->size; j++) for (j++; j < a->size; j++)
a->dp[j] = 0; a->dp[j] = 0;
sp_clamp(a); }
return MP_OKAY; return err;
} }
#ifdef HAVE_ECC #ifdef HAVE_ECC
@ -201,8 +206,13 @@ int sp_read_radix(sp_int* a, const char* in, int radix)
int i, j = 0, k = 0; int i, j = 0, k = 0;
char ch; char ch;
if ((radix != 16) || (*in == '-')) if ((radix != 16) || (*in == '-')) {
err = BAD_FUNC_ARG; err = BAD_FUNC_ARG;
}
while (*in == '0') {
in++;
}
if (err == MP_OKAY) { if (err == MP_OKAY) {
a->dp[0] = 0; a->dp[0] = 0;
@ -221,7 +231,11 @@ int sp_read_radix(sp_int* a, const char* in, int radix)
a->dp[k] |= ((sp_int_digit)ch) << j; a->dp[k] |= ((sp_int_digit)ch) << j;
j += 4; j += 4;
if (j == DIGIT_BIT && k < SP_INT_DIGITS) if (k >= SP_INT_DIGITS - 1) {
err = MP_VAL;
break;
}
if (j == DIGIT_BIT)
a->dp[++k] = 0; a->dp[++k] = 0;
j &= DIGIT_BIT - 1; j &= DIGIT_BIT - 1;
} }
@ -234,8 +248,9 @@ int sp_read_radix(sp_int* a, const char* in, int radix)
for (k++; k < a->size; k++) for (k++; k < a->size; k++)
a->dp[k] = 0; a->dp[k] = 0;
}
sp_clamp(a); sp_clamp(a);
}
return err; return err;
} }
@ -1082,12 +1097,17 @@ int sp_mul(sp_int* a, sp_int* b, sp_int* r)
sp_int tr[1]; sp_int tr[1];
#endif #endif
if (a->used + b->used > SP_INT_DIGITS)
err = MP_VAL;
#ifdef WOLFSSL_SMALL_STACK #ifdef WOLFSSL_SMALL_STACK
if (err == MP_OKAY) {
t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
if (t == NULL) if (t == NULL)
err = MP_MEM; err = MP_MEM;
else else
tr = &t[1]; tr = &t[1];
}
#endif #endif
if (err == MP_OKAY) { if (err == MP_OKAY) {
@ -1114,13 +1134,17 @@ int sp_mul(sp_int* a, sp_int* b, sp_int* r)
* a SP integer to square. * a SP integer to square.
* m SP integer modulus. * m SP integer modulus.
* r SP integer result. * r SP integer result.
* returns MP_VAL when m is 0, MP_MEM when dynamic memory allocation fails and * returns MP_VAL when m is 0, MP_MEM when dynamic memory allocation fails,
* MP_OKAY otherwise. * BAD_FUNC_ARG when a is to big and MP_OKAY otherwise.
*/ */
static int sp_sqrmod(sp_int* a, sp_int* m, sp_int* r) static int sp_sqrmod(sp_int* a, sp_int* m, sp_int* r)
{ {
int err; int err = MP_OKAY;
if (a->used * 2 > SP_INT_DIGITS)
err = MP_VAL;
if (err == MP_OKAY)
err = sp_mul(a, a, r); err = sp_mul(a, a, r);
if (err == MP_OKAY) if (err == MP_OKAY)
err = sp_mod(r, m, r); err = sp_mod(r, m, r);
@ -1147,11 +1171,16 @@ int sp_mulmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
sp_int t[1]; sp_int t[1];
#endif #endif
if (a->used + b->used > SP_INT_DIGITS)
err = MP_VAL;
#ifdef WOLFSSL_SMALL_STACK #ifdef WOLFSSL_SMALL_STACK
if (err == MP_OKAY) {
t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT);
if (t == NULL) { if (t == NULL) {
err = MP_MEM; err = MP_MEM;
} }
}
#endif #endif
if (err == MP_OKAY) { if (err == MP_OKAY) {
err = sp_mul(a, b, t); err = sp_mul(a, b, t);
@ -1364,7 +1393,9 @@ int sp_invmod(sp_int* a, sp_int* m, sp_int* r)
*/ */
err = sp_invmod(m, a, r); err = sp_invmod(m, a, r);
if (err == MP_OKAY) { if (err == MP_OKAY) {
sp_mul(r, m, r); err = sp_mul(r, m, r);
}
if (err == MP_OKAY) {
sp_sub_d(r, 1, r); sp_sub_d(r, 1, r);
sp_div(r, a, r, NULL); sp_div(r, a, r, NULL);
sp_sub(m, r, r); sp_sub(m, r, r);
@ -1489,24 +1520,59 @@ int sp_lcm(sp_int* a, sp_int* b, sp_int* r)
int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r) int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
{ {
int err = MP_OKAY; int err = MP_OKAY;
int bits = sp_count_bits(m); int done = 0;
int mBits = sp_count_bits(m);
int bBits = sp_count_bits(b);
int eBits = sp_count_bits(e);
if (sp_iszero(m)) {
err = MP_VAL;
}
else if (sp_isone(m)) {
sp_set(r, 0);
done = 1;
}
else if (sp_iszero(e)) {
sp_set(r, 1);
done = 1;
}
else if (sp_iszero(b)) {
sp_set(r, 0);
done = 1;
}
else if (m->used * 2 > SP_INT_DIGITS) {
err = BAD_FUNC_ARG;
}
if (!done && (err == MP_OKAY)) {
#ifndef WOLFSSL_SP_NO_2048 #ifndef WOLFSSL_SP_NO_2048
if (bits == 1024) if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) &&
sp_ModExp_1024(b, e, m, r); (eBits <= 1024)) {
else if (bits == 2048) err = sp_ModExp_1024(b, e, m, r);
sp_ModExp_2048(b, e, m, r); done = 1;
}
else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) &&
(eBits <= 2048)) {
err = sp_ModExp_2048(b, e, m, r);
done = 1;
}
else else
#endif #endif
#ifndef WOLFSSL_SP_NO_3072 #ifndef WOLFSSL_SP_NO_3072
if (bits == 1536) if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) &&
sp_ModExp_1536(b, e, m, r); (eBits <= 1536)) {
else if (bits == 3072) err = sp_ModExp_1536(b, e, m, r);
sp_ModExp_3072(b, e, m, r); done = 1;
else }
else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) &&
(eBits <= 3072)) {
err = sp_ModExp_3072(b, e, m, r);
done = 1;
}
#endif #endif
}
#if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN) #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
if (bits == 256) { if (!done && (err == MP_OKAY)) {
int i; int i;
#ifdef WOLFSSL_SMALL_STACK #ifdef WOLFSSL_SMALL_STACK
@ -1516,37 +1582,56 @@ int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
#endif #endif
#ifdef WOLFSSL_SMALL_STACK #ifdef WOLFSSL_SMALL_STACK
t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); if (!done && (err == MP_OKAY)) {
t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT);
if (t == NULL) { if (t == NULL) {
err = MP_MEM; err = MP_MEM;
} }
#endif
if (err == MP_OKAY) {
sp_init(t);
sp_copy(b, t);
bits = sp_count_bits(e);
} }
for (i = bits-2; err == MP_OKAY && i >= 0; i--) { #endif
if (!done && (err == MP_OKAY)) {
sp_init(t);
if (sp_cmp(b, m) != MP_LT) {
err = sp_mod(b, m, t);
if (err == MP_OKAY && sp_iszero(t)) {
sp_set(r, 0);
done = 1;
}
}
else {
sp_copy(b, t);
}
if (!done && (err == MP_OKAY)) {
for (i = eBits-2; err == MP_OKAY && i >= 0; i--) {
err = sp_sqrmod(t, m, t); err = sp_sqrmod(t, m, t);
if (err == MP_OKAY && if (err == MP_OKAY && (e->dp[i / SP_WORD_SIZE] >>
(e->dp[i / SP_WORD_SIZE] >> (i % SP_WORD_SIZE)) & 1) { (i % SP_WORD_SIZE)) & 1) {
err = sp_mulmod(t, b, m, t); err = sp_mulmod(t, b, m, t);
} }
} }
if (err == MP_OKAY) }
}
if (!done && (err == MP_OKAY)) {
sp_copy(t, r); sp_copy(t, r);
}
#ifdef WOLFSSL_SMALL_STACK #ifdef WOLFSSL_SMALL_STACK
if (t != NULL) if (t != NULL) {
XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
}
#endif #endif
} }
else #else
#endif {
err = MP_VAL; err = MP_VAL;
}
#endif
(void)bits; (void)mBits;
(void)bBits;
(void)eBits;
return err; return err;
} }

View File

@ -161,7 +161,7 @@ MP_API int sp_init_multi(sp_int* a, sp_int* b, sp_int* c, sp_int* d,
sp_int* e, sp_int* f); sp_int* e, sp_int* f);
MP_API void sp_clear(sp_int* a); MP_API void sp_clear(sp_int* a);
MP_API int sp_unsigned_bin_size(sp_int* a); MP_API int sp_unsigned_bin_size(sp_int* a);
MP_API int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz); MP_API int sp_read_unsigned_bin(sp_int* a, const byte* in, int inSz);
MP_API int sp_read_radix(sp_int* a, const char* in, int radix); MP_API int sp_read_radix(sp_int* a, const char* in, int radix);
MP_API int sp_cmp(sp_int* a, sp_int* b); MP_API int sp_cmp(sp_int* a, sp_int* b);
MP_API int sp_count_bits(sp_int* a); MP_API int sp_count_bits(sp_int* a);