Special implementation of mod exp when base is 2 in SP

pull/2383/head
Sean Parkinson 2019-07-29 11:02:29 +10:00
parent 50fbdb961f
commit 23af4e92f3
10 changed files with 9802 additions and 3447 deletions

View File

@ -499,7 +499,7 @@ static int lng_index = 0;
#ifndef NO_MAIN_DRIVER
#ifndef MAIN_NO_ARGS
static const char* bench_Usage_msg1[][12] = {
static const char* bench_Usage_msg1[][14] = {
/* 0 English */
{ "-? <num> Help, print this usage\n 0: English, 1: Japanese\n",
"-csv Print terminal output in csv format\n",
@ -508,6 +508,8 @@ static const char* bench_Usage_msg1[][12] = {
"-dgst_full Full digest operation performed.\n",
"-rsa_sign Measure RSA sign/verify instead of encrypt/decrypt.\n",
"<keySz> -rsa-sz\n Measure RSA <key size> performance.\n",
"-ffhdhe2048 Measure DH using FFDHE 2048-bit parameters.\n",
"-ffhdhe3072 Measure DH using FFDHE 3072-bit parameters.\n",
"-<alg> Algorithm to benchmark. Available algorithms include:\n",
"-lng <num> Display benchmark result by specified language.\n 0: English, 1: Japanese\n",
"<num> Size of block in bytes\n",
@ -523,6 +525,8 @@ static const char* bench_Usage_msg1[][12] = {
"-dgst_full フルの digest 暗号操作を実施します。\n",
"-rsa_sign 暗号/復号化の代わりに RSA の署名/検証を測定します。\n",
"<keySz> -rsa-sz\n RSA <key size> の性能を測定します。\n",
"-ffhdhe2048 Measure DH using FFDHE 2048-bit parameters.\n",
"-ffhdhe3072 Measure DH using FFDHE 3072-bit parameters.\n",
"-<alg> アルゴリズムのベンチマークを実施します。\n 利用可能なアルゴリズムは下記を含みます:\n",
"-lng <num> 指定された言語でベンチマーク結果を表示します。\n 0: 英語、 1: 日本語\n",
"<num> ブロックサイズをバイト単位で指定します。\n",
@ -858,6 +862,10 @@ static int digest_stream = 1;
/* Don't measure RSA sign/verify by default */
static int rsa_sign_verify = 0;
#endif
#ifndef NO_DH
/* Use the FFDHE parameters */
static int use_ffdhe = 0;
#endif
/* Don't print out in CSV format by default */
static int csv_format = 0;
@ -4647,7 +4655,7 @@ void bench_dh(int doAsync)
int dhKeySz = BENCH_DH_KEY_SIZE * 8; /* used in printf */
const char**desc = bench_desc_words[lng_index];
#ifndef NO_ASN
size_t bytes;
size_t bytes = 0;
word32 idx;
#endif
word32 pubSz[BENCH_MAX_PENDING];
@ -4655,6 +4663,9 @@ void bench_dh(int doAsync)
word32 pubSz2;
word32 privSz2;
word32 agreeSz[BENCH_MAX_PENDING];
#ifdef HAVE_FFDHE_2048
const DhParams *params = NULL;
#endif
DECLARE_ARRAY(pub, byte, BENCH_MAX_PENDING, BENCH_DH_KEY_SIZE, HEAP_HINT);
DECLARE_VAR(pub2, byte, BENCH_DH_KEY_SIZE, HEAP_HINT);
@ -4664,24 +4675,38 @@ void bench_dh(int doAsync)
(void)tmp;
if (!use_ffdhe) {
#if defined(NO_ASN)
dhKeySz = 1024;
/* do nothing, but don't use default FILE */
dhKeySz = 1024;
/* do nothing, but don't use default FILE */
#elif defined(USE_CERT_BUFFERS_1024)
tmp = dh_key_der_1024;
bytes = (size_t)sizeof_dh_key_der_1024;
dhKeySz = 1024;
tmp = dh_key_der_1024;
bytes = (size_t)sizeof_dh_key_der_1024;
dhKeySz = 1024;
#elif defined(USE_CERT_BUFFERS_2048)
tmp = dh_key_der_2048;
bytes = (size_t)sizeof_dh_key_der_2048;
dhKeySz = 2048;
tmp = dh_key_der_2048;
bytes = (size_t)sizeof_dh_key_der_2048;
dhKeySz = 2048;
#elif defined(USE_CERT_BUFFERS_3072)
tmp = dh_key_der_3072;
bytes = (size_t)sizeof_dh_key_der_3072;
dhKeySz = 3072;
tmp = dh_key_der_3072;
bytes = (size_t)sizeof_dh_key_der_3072;
dhKeySz = 3072;
#else
#error "need to define a cert buffer size"
#endif /* USE_CERT_BUFFERS */
}
#ifdef HAVE_FFDHE_2048
else if (use_ffdhe == 2048) {
params = wc_Dh_ffdhe2048_Get();
dhKeySz = 2048;
}
#endif
#ifdef HAVE_FFDHE_3072
else if (use_ffdhe == 3072) {
params = wc_Dh_ffdhe3072_Get();
dhKeySz = 3072;
}
#endif
/* clear for done cleanup */
XMEMSET(dhKey, 0, sizeof(dhKey));
@ -4695,11 +4720,20 @@ void bench_dh(int doAsync)
goto exit;
/* setup key */
if (!use_ffdhe) {
#ifdef NO_ASN
ret = wc_DhSetKey(&dhKey[i], dh_p, sizeof(dh_p), dh_g, sizeof(dh_g));
ret = wc_DhSetKey(&dhKey[i], dh_p, sizeof(dh_p), dh_g,
sizeof(dh_g));
#else
idx = 0;
ret = wc_DhKeyDecode(tmp, &idx, &dhKey[i], (word32)bytes);
idx = 0;
ret = wc_DhKeyDecode(tmp, &idx, &dhKey[i], (word32)bytes);
#endif
}
#if defined(HAVE_FFDHE_2048) || defined(HAVE_FFDHE_3072)
else if (params != NULL) {
ret = wc_DhSetKey(&dhKey[i], params->p, params->p_len, params->g,
params->g_len);
}
#endif
if (ret != 0) {
printf("DhKeyDecode failed %d, can't benchmark\n", ret);
@ -5682,8 +5716,14 @@ static void Usage(void)
printf("%s", bench_Usage_msg1[lng_index][6]); /* option -rsa-sz */
#endif
#endif
#if !defined(NO_DH) && defined(HAVE_FFDHE_2048)
printf("%s", bench_Usage_msg1[lng_index][7]); /* option -ffdhe2048 */
#endif
#if !defined(NO_DH) && defined(HAVE_FFDHE_3072)
printf("%s", bench_Usage_msg1[lng_index][8]); /* option -ffdhe3072 */
#endif
#ifndef WOLFSSL_BENCHMARK_ALL
printf("%s", bench_Usage_msg1[lng_index][7]); /* option -<alg> */
printf("%s", bench_Usage_msg1[lng_index][9]); /* option -<alg> */
printf(" ");
line = 13;
for (i=0; bench_cipher_opt[i].str != NULL; i++)
@ -5706,8 +5746,8 @@ static void Usage(void)
print_alg(bench_other_opt[i].str + 1, &line);
printf("\n");
#endif
printf("%s", bench_Usage_msg1[lng_index][8]); /* option -lng */
printf("%s", bench_Usage_msg1[lng_index][9]); /* option <num> */
printf("%s", bench_Usage_msg1[lng_index][10]); /* option -lng */
printf("%s", bench_Usage_msg1[lng_index][11]); /* option <num> */
#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
printf("%s", bench_Usage_msg1[lng_index][10]); /* option -threads <num> */
#endif
@ -5791,6 +5831,14 @@ int main(int argc, char** argv)
else if (string_matches(argv[1], "-rsa_sign"))
rsa_sign_verify = 1;
#endif
#if !defined(NO_DH) && defined(HAVE_FFDHE_2048)
else if (string_matches(argv[1], "-ffdhe2048"))
use_ffdhe = 2048;
#endif
#if !defined(NO_DH) && defined(HAVE_FFDHE_3072)
else if (string_matches(argv[1], "-ffdhe3072"))
use_ffdhe = 3072;
#endif
#ifdef BENCH_ASYM
else if (string_matches(argv[1], "-csv")) {
csv_format = 1;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -3080,7 +3080,7 @@ static int sp_2048_mod_exp_90(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
* a A single precision integer.
* m Mask to AND against each digit.
*/
static void sp_2048_mask_45(sp_digit* r, sp_digit* a, sp_digit m)
static void sp_2048_mask_45(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
int i;
@ -3720,6 +3720,213 @@ int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
}
#ifdef WOLFSSL_HAVE_SP_DH
#ifdef HAVE_FFDHE_2048
SP_NOINLINE static void sp_2048_lshift_90(sp_digit* r, sp_digit* a, byte n)
{
#ifdef WOLFSSL_SP_SMALL
int i;
r[90] = a[89] >> (23 - n);
for (i=89; i>0; i--)
r[i] = ((a[i] << n) | (a[i-1] >> (23 - n)) & 0x7fffff;
#else
r[90] = a[89] >> (23 - n);
r[89] = ((a[89] << n) | (a[88] >> (23 - n))) & 0x7fffff;
r[88] = ((a[88] << n) | (a[87] >> (23 - n))) & 0x7fffff;
r[87] = ((a[87] << n) | (a[86] >> (23 - n))) & 0x7fffff;
r[86] = ((a[86] << n) | (a[85] >> (23 - n))) & 0x7fffff;
r[85] = ((a[85] << n) | (a[84] >> (23 - n))) & 0x7fffff;
r[84] = ((a[84] << n) | (a[83] >> (23 - n))) & 0x7fffff;
r[83] = ((a[83] << n) | (a[82] >> (23 - n))) & 0x7fffff;
r[82] = ((a[82] << n) | (a[81] >> (23 - n))) & 0x7fffff;
r[81] = ((a[81] << n) | (a[80] >> (23 - n))) & 0x7fffff;
r[80] = ((a[80] << n) | (a[79] >> (23 - n))) & 0x7fffff;
r[79] = ((a[79] << n) | (a[78] >> (23 - n))) & 0x7fffff;
r[78] = ((a[78] << n) | (a[77] >> (23 - n))) & 0x7fffff;
r[77] = ((a[77] << n) | (a[76] >> (23 - n))) & 0x7fffff;
r[76] = ((a[76] << n) | (a[75] >> (23 - n))) & 0x7fffff;
r[75] = ((a[75] << n) | (a[74] >> (23 - n))) & 0x7fffff;
r[74] = ((a[74] << n) | (a[73] >> (23 - n))) & 0x7fffff;
r[73] = ((a[73] << n) | (a[72] >> (23 - n))) & 0x7fffff;
r[72] = ((a[72] << n) | (a[71] >> (23 - n))) & 0x7fffff;
r[71] = ((a[71] << n) | (a[70] >> (23 - n))) & 0x7fffff;
r[70] = ((a[70] << n) | (a[69] >> (23 - n))) & 0x7fffff;
r[69] = ((a[69] << n) | (a[68] >> (23 - n))) & 0x7fffff;
r[68] = ((a[68] << n) | (a[67] >> (23 - n))) & 0x7fffff;
r[67] = ((a[67] << n) | (a[66] >> (23 - n))) & 0x7fffff;
r[66] = ((a[66] << n) | (a[65] >> (23 - n))) & 0x7fffff;
r[65] = ((a[65] << n) | (a[64] >> (23 - n))) & 0x7fffff;
r[64] = ((a[64] << n) | (a[63] >> (23 - n))) & 0x7fffff;
r[63] = ((a[63] << n) | (a[62] >> (23 - n))) & 0x7fffff;
r[62] = ((a[62] << n) | (a[61] >> (23 - n))) & 0x7fffff;
r[61] = ((a[61] << n) | (a[60] >> (23 - n))) & 0x7fffff;
r[60] = ((a[60] << n) | (a[59] >> (23 - n))) & 0x7fffff;
r[59] = ((a[59] << n) | (a[58] >> (23 - n))) & 0x7fffff;
r[58] = ((a[58] << n) | (a[57] >> (23 - n))) & 0x7fffff;
r[57] = ((a[57] << n) | (a[56] >> (23 - n))) & 0x7fffff;
r[56] = ((a[56] << n) | (a[55] >> (23 - n))) & 0x7fffff;
r[55] = ((a[55] << n) | (a[54] >> (23 - n))) & 0x7fffff;
r[54] = ((a[54] << n) | (a[53] >> (23 - n))) & 0x7fffff;
r[53] = ((a[53] << n) | (a[52] >> (23 - n))) & 0x7fffff;
r[52] = ((a[52] << n) | (a[51] >> (23 - n))) & 0x7fffff;
r[51] = ((a[51] << n) | (a[50] >> (23 - n))) & 0x7fffff;
r[50] = ((a[50] << n) | (a[49] >> (23 - n))) & 0x7fffff;
r[49] = ((a[49] << n) | (a[48] >> (23 - n))) & 0x7fffff;
r[48] = ((a[48] << n) | (a[47] >> (23 - n))) & 0x7fffff;
r[47] = ((a[47] << n) | (a[46] >> (23 - n))) & 0x7fffff;
r[46] = ((a[46] << n) | (a[45] >> (23 - n))) & 0x7fffff;
r[45] = ((a[45] << n) | (a[44] >> (23 - n))) & 0x7fffff;
r[44] = ((a[44] << n) | (a[43] >> (23 - n))) & 0x7fffff;
r[43] = ((a[43] << n) | (a[42] >> (23 - n))) & 0x7fffff;
r[42] = ((a[42] << n) | (a[41] >> (23 - n))) & 0x7fffff;
r[41] = ((a[41] << n) | (a[40] >> (23 - n))) & 0x7fffff;
r[40] = ((a[40] << n) | (a[39] >> (23 - n))) & 0x7fffff;
r[39] = ((a[39] << n) | (a[38] >> (23 - n))) & 0x7fffff;
r[38] = ((a[38] << n) | (a[37] >> (23 - n))) & 0x7fffff;
r[37] = ((a[37] << n) | (a[36] >> (23 - n))) & 0x7fffff;
r[36] = ((a[36] << n) | (a[35] >> (23 - n))) & 0x7fffff;
r[35] = ((a[35] << n) | (a[34] >> (23 - n))) & 0x7fffff;
r[34] = ((a[34] << n) | (a[33] >> (23 - n))) & 0x7fffff;
r[33] = ((a[33] << n) | (a[32] >> (23 - n))) & 0x7fffff;
r[32] = ((a[32] << n) | (a[31] >> (23 - n))) & 0x7fffff;
r[31] = ((a[31] << n) | (a[30] >> (23 - n))) & 0x7fffff;
r[30] = ((a[30] << n) | (a[29] >> (23 - n))) & 0x7fffff;
r[29] = ((a[29] << n) | (a[28] >> (23 - n))) & 0x7fffff;
r[28] = ((a[28] << n) | (a[27] >> (23 - n))) & 0x7fffff;
r[27] = ((a[27] << n) | (a[26] >> (23 - n))) & 0x7fffff;
r[26] = ((a[26] << n) | (a[25] >> (23 - n))) & 0x7fffff;
r[25] = ((a[25] << n) | (a[24] >> (23 - n))) & 0x7fffff;
r[24] = ((a[24] << n) | (a[23] >> (23 - n))) & 0x7fffff;
r[23] = ((a[23] << n) | (a[22] >> (23 - n))) & 0x7fffff;
r[22] = ((a[22] << n) | (a[21] >> (23 - n))) & 0x7fffff;
r[21] = ((a[21] << n) | (a[20] >> (23 - n))) & 0x7fffff;
r[20] = ((a[20] << n) | (a[19] >> (23 - n))) & 0x7fffff;
r[19] = ((a[19] << n) | (a[18] >> (23 - n))) & 0x7fffff;
r[18] = ((a[18] << n) | (a[17] >> (23 - n))) & 0x7fffff;
r[17] = ((a[17] << n) | (a[16] >> (23 - n))) & 0x7fffff;
r[16] = ((a[16] << n) | (a[15] >> (23 - n))) & 0x7fffff;
r[15] = ((a[15] << n) | (a[14] >> (23 - n))) & 0x7fffff;
r[14] = ((a[14] << n) | (a[13] >> (23 - n))) & 0x7fffff;
r[13] = ((a[13] << n) | (a[12] >> (23 - n))) & 0x7fffff;
r[12] = ((a[12] << n) | (a[11] >> (23 - n))) & 0x7fffff;
r[11] = ((a[11] << n) | (a[10] >> (23 - n))) & 0x7fffff;
r[10] = ((a[10] << n) | (a[9] >> (23 - n))) & 0x7fffff;
r[9] = ((a[9] << n) | (a[8] >> (23 - n))) & 0x7fffff;
r[8] = ((a[8] << n) | (a[7] >> (23 - n))) & 0x7fffff;
r[7] = ((a[7] << n) | (a[6] >> (23 - n))) & 0x7fffff;
r[6] = ((a[6] << n) | (a[5] >> (23 - n))) & 0x7fffff;
r[5] = ((a[5] << n) | (a[4] >> (23 - n))) & 0x7fffff;
r[4] = ((a[4] << n) | (a[3] >> (23 - n))) & 0x7fffff;
r[3] = ((a[3] << n) | (a[2] >> (23 - n))) & 0x7fffff;
r[2] = ((a[2] << n) | (a[1] >> (23 - n))) & 0x7fffff;
r[1] = ((a[1] << n) | (a[0] >> (23 - n))) & 0x7fffff;
#endif
r[0] = (a[0] << n) & 0x7fffff;
}
/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
*
* r A single precision number that is the result of the operation.
* e A single precision number that is the exponent.
* bits The number of bits in the exponent.
* m A single precision number that is the modulus.
* returns 0 on success and MEMORY_E on dynamic memory allocation failure.
*/
static int sp_2048_mod_exp_2_90(sp_digit* r, sp_digit* e, int bits, sp_digit* m)
{
#ifndef WOLFSSL_SMALL_STACK
sp_digit nd[180];
sp_digit td[91];
#else
sp_digit* td;
#endif
sp_digit* norm;
sp_digit* tmp;
sp_digit mp = 1;
sp_digit n, o;
int i;
int c, y;
int err = MP_OKAY;
#ifdef WOLFSSL_SMALL_STACK
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 271, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td == NULL)
err = MEMORY_E;
if (err == MP_OKAY) {
norm = td;
tmp = td + 180;
}
#else
norm = nd;
tmp = td;
#endif
if (err == MP_OKAY) {
XMEMSET(td, 0, sizeof(td));
sp_2048_mont_setup(m, &mp);
sp_2048_mont_norm_90(norm, m);
bits = ((bits + 3) / 4) * 4;
i = ((bits + 22) / 23) - 1;
c = bits % 23;
if (c == 0)
c = 23;
if (i < 90)
n = e[i--] << (32 - c);
else {
n = 0;
i--;
}
if (c < 4) {
n |= e[i--] << (9 - c);
c += 23;
}
y = (n >> 28) & 0xf;
n <<= 4;
c -= 4;
sp_2048_lshift_90(r, norm, y);
for (; i>=0 || c>=4; ) {
if (c < 4) {
n |= e[i--] << (9 - c);
c += 23;
}
y = (n >> 28) & 0xf;
n <<= 4;
c -= 4;
sp_2048_mont_sqr_90(r, r, m, mp);
sp_2048_mont_sqr_90(r, r, m, mp);
sp_2048_mont_sqr_90(r, r, m, mp);
sp_2048_mont_sqr_90(r, r, m, mp);
sp_2048_lshift_90(r, r, y);
sp_2048_mul_d_90(tmp, norm, (r[90] << 22) + (r[89] >> 1));
r[90] = 0;
r[89] &= 0x1L;
sp_2048_add_90(r, r, tmp);
sp_2048_norm_90(r);
o = sp_2048_cmp_90(r, m);
sp_2048_cond_sub_90(r, r, m, (o < 0) - 1);
}
sp_2048_mont_reduce_90(r, m, mp);
n = sp_2048_cmp_90(r, m);
sp_2048_cond_sub_90(r, r, m, (n < 0) - 1);
}
#ifdef WOLFSSL_SMALL_STACK
if (td != NULL)
XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
#endif /* HAVE_FFDHE_2048 */
/* Perform the modular exponentiation for Diffie-Hellman.
*
* base Base.
@ -3765,7 +3972,14 @@ int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
sp_2048_from_bin(e, 90, exp, expLen);
sp_2048_from_mp(m, 90, mod);
err = sp_2048_mod_exp_90(r, b, e, expLen * 8, m, 0);
#ifdef HAVE_FFDHE_2048
if (base->used == 1 && base->dp[0] == 2 &&
((m[89] << 15) | (m[88] >> 8)) == 0xffffL) {
err = sp_2048_mod_exp_2_90(r, e, expLen * 8, m);
}
else
#endif
err = sp_2048_mod_exp_90(r, b, e, expLen * 8, m, 0);
}
if (err == MP_OKAY) {
@ -3824,7 +4038,14 @@ int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
sp_2048_from_bin(e, 90, exp, expLen);
sp_2048_from_mp(m, 90, mod);
err = sp_2048_mod_exp_90(r, b, e, expLen * 8, m, 0);
#ifdef HAVE_FFDHE_2048
if (base->used == 1 && base->dp[0] == 2 &&
((m[89] << 15) | (m[88] >> 8)) == 0xffffL) {
err = sp_2048_mod_exp_2_90(r, e, expLen * 8, m);
}
else
#endif
err = sp_2048_mod_exp_90(r, b, e, expLen * 8, m, 0);
}
if (err == MP_OKAY) {
@ -6427,7 +6648,7 @@ static int sp_3072_mod_exp_134(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
* a A single precision integer.
* m Mask to AND against each digit.
*/
static void sp_3072_mask_67(sp_digit* r, sp_digit* a, sp_digit m)
static void sp_3072_mask_67(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
int i;
@ -7065,6 +7286,257 @@ int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
}
#ifdef WOLFSSL_HAVE_SP_DH
#ifdef HAVE_FFDHE_3072
SP_NOINLINE static void sp_3072_lshift_134(sp_digit* r, sp_digit* a, byte n)
{
#ifdef WOLFSSL_SP_SMALL
int i;
r[134] = a[133] >> (23 - n);
for (i=133; i>0; i--)
r[i] = ((a[i] << n) | (a[i-1] >> (23 - n)) & 0x7fffff;
#else
r[134] = a[133] >> (23 - n);
r[133] = ((a[133] << n) | (a[132] >> (23 - n))) & 0x7fffff;
r[132] = ((a[132] << n) | (a[131] >> (23 - n))) & 0x7fffff;
r[131] = ((a[131] << n) | (a[130] >> (23 - n))) & 0x7fffff;
r[130] = ((a[130] << n) | (a[129] >> (23 - n))) & 0x7fffff;
r[129] = ((a[129] << n) | (a[128] >> (23 - n))) & 0x7fffff;
r[128] = ((a[128] << n) | (a[127] >> (23 - n))) & 0x7fffff;
r[127] = ((a[127] << n) | (a[126] >> (23 - n))) & 0x7fffff;
r[126] = ((a[126] << n) | (a[125] >> (23 - n))) & 0x7fffff;
r[125] = ((a[125] << n) | (a[124] >> (23 - n))) & 0x7fffff;
r[124] = ((a[124] << n) | (a[123] >> (23 - n))) & 0x7fffff;
r[123] = ((a[123] << n) | (a[122] >> (23 - n))) & 0x7fffff;
r[122] = ((a[122] << n) | (a[121] >> (23 - n))) & 0x7fffff;
r[121] = ((a[121] << n) | (a[120] >> (23 - n))) & 0x7fffff;
r[120] = ((a[120] << n) | (a[119] >> (23 - n))) & 0x7fffff;
r[119] = ((a[119] << n) | (a[118] >> (23 - n))) & 0x7fffff;
r[118] = ((a[118] << n) | (a[117] >> (23 - n))) & 0x7fffff;
r[117] = ((a[117] << n) | (a[116] >> (23 - n))) & 0x7fffff;
r[116] = ((a[116] << n) | (a[115] >> (23 - n))) & 0x7fffff;
r[115] = ((a[115] << n) | (a[114] >> (23 - n))) & 0x7fffff;
r[114] = ((a[114] << n) | (a[113] >> (23 - n))) & 0x7fffff;
r[113] = ((a[113] << n) | (a[112] >> (23 - n))) & 0x7fffff;
r[112] = ((a[112] << n) | (a[111] >> (23 - n))) & 0x7fffff;
r[111] = ((a[111] << n) | (a[110] >> (23 - n))) & 0x7fffff;
r[110] = ((a[110] << n) | (a[109] >> (23 - n))) & 0x7fffff;
r[109] = ((a[109] << n) | (a[108] >> (23 - n))) & 0x7fffff;
r[108] = ((a[108] << n) | (a[107] >> (23 - n))) & 0x7fffff;
r[107] = ((a[107] << n) | (a[106] >> (23 - n))) & 0x7fffff;
r[106] = ((a[106] << n) | (a[105] >> (23 - n))) & 0x7fffff;
r[105] = ((a[105] << n) | (a[104] >> (23 - n))) & 0x7fffff;
r[104] = ((a[104] << n) | (a[103] >> (23 - n))) & 0x7fffff;
r[103] = ((a[103] << n) | (a[102] >> (23 - n))) & 0x7fffff;
r[102] = ((a[102] << n) | (a[101] >> (23 - n))) & 0x7fffff;
r[101] = ((a[101] << n) | (a[100] >> (23 - n))) & 0x7fffff;
r[100] = ((a[100] << n) | (a[99] >> (23 - n))) & 0x7fffff;
r[99] = ((a[99] << n) | (a[98] >> (23 - n))) & 0x7fffff;
r[98] = ((a[98] << n) | (a[97] >> (23 - n))) & 0x7fffff;
r[97] = ((a[97] << n) | (a[96] >> (23 - n))) & 0x7fffff;
r[96] = ((a[96] << n) | (a[95] >> (23 - n))) & 0x7fffff;
r[95] = ((a[95] << n) | (a[94] >> (23 - n))) & 0x7fffff;
r[94] = ((a[94] << n) | (a[93] >> (23 - n))) & 0x7fffff;
r[93] = ((a[93] << n) | (a[92] >> (23 - n))) & 0x7fffff;
r[92] = ((a[92] << n) | (a[91] >> (23 - n))) & 0x7fffff;
r[91] = ((a[91] << n) | (a[90] >> (23 - n))) & 0x7fffff;
r[90] = ((a[90] << n) | (a[89] >> (23 - n))) & 0x7fffff;
r[89] = ((a[89] << n) | (a[88] >> (23 - n))) & 0x7fffff;
r[88] = ((a[88] << n) | (a[87] >> (23 - n))) & 0x7fffff;
r[87] = ((a[87] << n) | (a[86] >> (23 - n))) & 0x7fffff;
r[86] = ((a[86] << n) | (a[85] >> (23 - n))) & 0x7fffff;
r[85] = ((a[85] << n) | (a[84] >> (23 - n))) & 0x7fffff;
r[84] = ((a[84] << n) | (a[83] >> (23 - n))) & 0x7fffff;
r[83] = ((a[83] << n) | (a[82] >> (23 - n))) & 0x7fffff;
r[82] = ((a[82] << n) | (a[81] >> (23 - n))) & 0x7fffff;
r[81] = ((a[81] << n) | (a[80] >> (23 - n))) & 0x7fffff;
r[80] = ((a[80] << n) | (a[79] >> (23 - n))) & 0x7fffff;
r[79] = ((a[79] << n) | (a[78] >> (23 - n))) & 0x7fffff;
r[78] = ((a[78] << n) | (a[77] >> (23 - n))) & 0x7fffff;
r[77] = ((a[77] << n) | (a[76] >> (23 - n))) & 0x7fffff;
r[76] = ((a[76] << n) | (a[75] >> (23 - n))) & 0x7fffff;
r[75] = ((a[75] << n) | (a[74] >> (23 - n))) & 0x7fffff;
r[74] = ((a[74] << n) | (a[73] >> (23 - n))) & 0x7fffff;
r[73] = ((a[73] << n) | (a[72] >> (23 - n))) & 0x7fffff;
r[72] = ((a[72] << n) | (a[71] >> (23 - n))) & 0x7fffff;
r[71] = ((a[71] << n) | (a[70] >> (23 - n))) & 0x7fffff;
r[70] = ((a[70] << n) | (a[69] >> (23 - n))) & 0x7fffff;
r[69] = ((a[69] << n) | (a[68] >> (23 - n))) & 0x7fffff;
r[68] = ((a[68] << n) | (a[67] >> (23 - n))) & 0x7fffff;
r[67] = ((a[67] << n) | (a[66] >> (23 - n))) & 0x7fffff;
r[66] = ((a[66] << n) | (a[65] >> (23 - n))) & 0x7fffff;
r[65] = ((a[65] << n) | (a[64] >> (23 - n))) & 0x7fffff;
r[64] = ((a[64] << n) | (a[63] >> (23 - n))) & 0x7fffff;
r[63] = ((a[63] << n) | (a[62] >> (23 - n))) & 0x7fffff;
r[62] = ((a[62] << n) | (a[61] >> (23 - n))) & 0x7fffff;
r[61] = ((a[61] << n) | (a[60] >> (23 - n))) & 0x7fffff;
r[60] = ((a[60] << n) | (a[59] >> (23 - n))) & 0x7fffff;
r[59] = ((a[59] << n) | (a[58] >> (23 - n))) & 0x7fffff;
r[58] = ((a[58] << n) | (a[57] >> (23 - n))) & 0x7fffff;
r[57] = ((a[57] << n) | (a[56] >> (23 - n))) & 0x7fffff;
r[56] = ((a[56] << n) | (a[55] >> (23 - n))) & 0x7fffff;
r[55] = ((a[55] << n) | (a[54] >> (23 - n))) & 0x7fffff;
r[54] = ((a[54] << n) | (a[53] >> (23 - n))) & 0x7fffff;
r[53] = ((a[53] << n) | (a[52] >> (23 - n))) & 0x7fffff;
r[52] = ((a[52] << n) | (a[51] >> (23 - n))) & 0x7fffff;
r[51] = ((a[51] << n) | (a[50] >> (23 - n))) & 0x7fffff;
r[50] = ((a[50] << n) | (a[49] >> (23 - n))) & 0x7fffff;
r[49] = ((a[49] << n) | (a[48] >> (23 - n))) & 0x7fffff;
r[48] = ((a[48] << n) | (a[47] >> (23 - n))) & 0x7fffff;
r[47] = ((a[47] << n) | (a[46] >> (23 - n))) & 0x7fffff;
r[46] = ((a[46] << n) | (a[45] >> (23 - n))) & 0x7fffff;
r[45] = ((a[45] << n) | (a[44] >> (23 - n))) & 0x7fffff;
r[44] = ((a[44] << n) | (a[43] >> (23 - n))) & 0x7fffff;
r[43] = ((a[43] << n) | (a[42] >> (23 - n))) & 0x7fffff;
r[42] = ((a[42] << n) | (a[41] >> (23 - n))) & 0x7fffff;
r[41] = ((a[41] << n) | (a[40] >> (23 - n))) & 0x7fffff;
r[40] = ((a[40] << n) | (a[39] >> (23 - n))) & 0x7fffff;
r[39] = ((a[39] << n) | (a[38] >> (23 - n))) & 0x7fffff;
r[38] = ((a[38] << n) | (a[37] >> (23 - n))) & 0x7fffff;
r[37] = ((a[37] << n) | (a[36] >> (23 - n))) & 0x7fffff;
r[36] = ((a[36] << n) | (a[35] >> (23 - n))) & 0x7fffff;
r[35] = ((a[35] << n) | (a[34] >> (23 - n))) & 0x7fffff;
r[34] = ((a[34] << n) | (a[33] >> (23 - n))) & 0x7fffff;
r[33] = ((a[33] << n) | (a[32] >> (23 - n))) & 0x7fffff;
r[32] = ((a[32] << n) | (a[31] >> (23 - n))) & 0x7fffff;
r[31] = ((a[31] << n) | (a[30] >> (23 - n))) & 0x7fffff;
r[30] = ((a[30] << n) | (a[29] >> (23 - n))) & 0x7fffff;
r[29] = ((a[29] << n) | (a[28] >> (23 - n))) & 0x7fffff;
r[28] = ((a[28] << n) | (a[27] >> (23 - n))) & 0x7fffff;
r[27] = ((a[27] << n) | (a[26] >> (23 - n))) & 0x7fffff;
r[26] = ((a[26] << n) | (a[25] >> (23 - n))) & 0x7fffff;
r[25] = ((a[25] << n) | (a[24] >> (23 - n))) & 0x7fffff;
r[24] = ((a[24] << n) | (a[23] >> (23 - n))) & 0x7fffff;
r[23] = ((a[23] << n) | (a[22] >> (23 - n))) & 0x7fffff;
r[22] = ((a[22] << n) | (a[21] >> (23 - n))) & 0x7fffff;
r[21] = ((a[21] << n) | (a[20] >> (23 - n))) & 0x7fffff;
r[20] = ((a[20] << n) | (a[19] >> (23 - n))) & 0x7fffff;
r[19] = ((a[19] << n) | (a[18] >> (23 - n))) & 0x7fffff;
r[18] = ((a[18] << n) | (a[17] >> (23 - n))) & 0x7fffff;
r[17] = ((a[17] << n) | (a[16] >> (23 - n))) & 0x7fffff;
r[16] = ((a[16] << n) | (a[15] >> (23 - n))) & 0x7fffff;
r[15] = ((a[15] << n) | (a[14] >> (23 - n))) & 0x7fffff;
r[14] = ((a[14] << n) | (a[13] >> (23 - n))) & 0x7fffff;
r[13] = ((a[13] << n) | (a[12] >> (23 - n))) & 0x7fffff;
r[12] = ((a[12] << n) | (a[11] >> (23 - n))) & 0x7fffff;
r[11] = ((a[11] << n) | (a[10] >> (23 - n))) & 0x7fffff;
r[10] = ((a[10] << n) | (a[9] >> (23 - n))) & 0x7fffff;
r[9] = ((a[9] << n) | (a[8] >> (23 - n))) & 0x7fffff;
r[8] = ((a[8] << n) | (a[7] >> (23 - n))) & 0x7fffff;
r[7] = ((a[7] << n) | (a[6] >> (23 - n))) & 0x7fffff;
r[6] = ((a[6] << n) | (a[5] >> (23 - n))) & 0x7fffff;
r[5] = ((a[5] << n) | (a[4] >> (23 - n))) & 0x7fffff;
r[4] = ((a[4] << n) | (a[3] >> (23 - n))) & 0x7fffff;
r[3] = ((a[3] << n) | (a[2] >> (23 - n))) & 0x7fffff;
r[2] = ((a[2] << n) | (a[1] >> (23 - n))) & 0x7fffff;
r[1] = ((a[1] << n) | (a[0] >> (23 - n))) & 0x7fffff;
#endif
r[0] = (a[0] << n) & 0x7fffff;
}
/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
*
* r A single precision number that is the result of the operation.
* e A single precision number that is the exponent.
* bits The number of bits in the exponent.
* m A single precision number that is the modulus.
* returns 0 on success and MEMORY_E on dynamic memory allocation failure.
*/
static int sp_3072_mod_exp_2_134(sp_digit* r, sp_digit* e, int bits, sp_digit* m)
{
#ifndef WOLFSSL_SMALL_STACK
sp_digit nd[268];
sp_digit td[135];
#else
sp_digit* td;
#endif
sp_digit* norm;
sp_digit* tmp;
sp_digit mp = 1;
sp_digit n, o;
int i;
int c, y;
int err = MP_OKAY;
#ifdef WOLFSSL_SMALL_STACK
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 403, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td == NULL)
err = MEMORY_E;
if (err == MP_OKAY) {
norm = td;
tmp = td + 268;
}
#else
norm = nd;
tmp = td;
#endif
if (err == MP_OKAY) {
XMEMSET(td, 0, sizeof(td));
sp_3072_mont_setup(m, &mp);
sp_3072_mont_norm_134(norm, m);
bits = ((bits + 3) / 4) * 4;
i = ((bits + 22) / 23) - 1;
c = bits % 23;
if (c == 0)
c = 23;
if (i < 134)
n = e[i--] << (32 - c);
else {
n = 0;
i--;
}
if (c < 4) {
n |= e[i--] << (9 - c);
c += 23;
}
y = (n >> 28) & 0xf;
n <<= 4;
c -= 4;
sp_3072_lshift_134(r, norm, y);
for (; i>=0 || c>=4; ) {
if (c < 4) {
n |= e[i--] << (9 - c);
c += 23;
}
y = (n >> 28) & 0xf;
n <<= 4;
c -= 4;
sp_3072_mont_sqr_134(r, r, m, mp);
sp_3072_mont_sqr_134(r, r, m, mp);
sp_3072_mont_sqr_134(r, r, m, mp);
sp_3072_mont_sqr_134(r, r, m, mp);
sp_3072_lshift_134(r, r, y);
sp_3072_mul_d_134(tmp, norm, (r[134] << 10) + (r[133] >> 13));
r[134] = 0;
r[133] &= 0x1fffL;
sp_3072_add_134(r, r, tmp);
sp_3072_norm_134(r);
o = sp_3072_cmp_134(r, m);
sp_3072_cond_sub_134(r, r, m, (o < 0) - 1);
}
sp_3072_mont_reduce_134(r, m, mp);
n = sp_3072_cmp_134(r, m);
sp_3072_cond_sub_134(r, r, m, (n < 0) - 1);
}
#ifdef WOLFSSL_SMALL_STACK
if (td != NULL)
XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
#endif /* HAVE_FFDHE_3072 */
/* Perform the modular exponentiation for Diffie-Hellman.
*
* base Base.
@ -7110,7 +7582,14 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
sp_3072_from_bin(e, 134, exp, expLen);
sp_3072_from_mp(m, 134, mod);
err = sp_3072_mod_exp_134(r, b, e, expLen * 8, m, 0);
#ifdef HAVE_FFDHE_3072
if (base->used == 1 && base->dp[0] == 2 &&
((m[133] << 3) | (m[132] >> 20)) == 0xffffL) {
err = sp_3072_mod_exp_2_134(r, e, expLen * 8, m);
}
else
#endif
err = sp_3072_mod_exp_134(r, b, e, expLen * 8, m, 0);
}
if (err == MP_OKAY) {
@ -7169,7 +7648,14 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
sp_3072_from_bin(e, 134, exp, expLen);
sp_3072_from_mp(m, 134, mod);
err = sp_3072_mod_exp_134(r, b, e, expLen * 8, m, 0);
#ifdef HAVE_FFDHE_3072
if (base->used == 1 && base->dp[0] == 2 &&
((m[133] << 3) | (m[132] >> 20)) == 0xffffL) {
err = sp_3072_mod_exp_2_134(r, e, expLen * 8, m);
}
else
#endif
err = sp_3072_mod_exp_134(r, b, e, expLen * 8, m, 0);
}
if (err == MP_OKAY) {

View File

@ -2594,7 +2594,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
* a A single precision integer.
* m Mask to AND against each digit.
*/
static void sp_2048_mask_18(sp_digit* r, sp_digit* a, sp_digit m)
static void sp_2048_mask_18(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
int i;
@ -3231,6 +3231,160 @@ int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
}
#ifdef WOLFSSL_HAVE_SP_DH
#ifdef HAVE_FFDHE_2048
SP_NOINLINE static void sp_2048_lshift_36(sp_digit* r, sp_digit* a, byte n)
{
#ifdef WOLFSSL_SP_SMALL
int i;
r[36] = a[35] >> (57 - n);
for (i=35; i>0; i--)
r[i] = ((a[i] << n) | (a[i-1] >> (57 - n)) & 0x1ffffffffffffffl;
#else
r[36] = a[35] >> (57 - n);
r[35] = ((a[35] << n) | (a[34] >> (57 - n))) & 0x1ffffffffffffffl;
r[34] = ((a[34] << n) | (a[33] >> (57 - n))) & 0x1ffffffffffffffl;
r[33] = ((a[33] << n) | (a[32] >> (57 - n))) & 0x1ffffffffffffffl;
r[32] = ((a[32] << n) | (a[31] >> (57 - n))) & 0x1ffffffffffffffl;
r[31] = ((a[31] << n) | (a[30] >> (57 - n))) & 0x1ffffffffffffffl;
r[30] = ((a[30] << n) | (a[29] >> (57 - n))) & 0x1ffffffffffffffl;
r[29] = ((a[29] << n) | (a[28] >> (57 - n))) & 0x1ffffffffffffffl;
r[28] = ((a[28] << n) | (a[27] >> (57 - n))) & 0x1ffffffffffffffl;
r[27] = ((a[27] << n) | (a[26] >> (57 - n))) & 0x1ffffffffffffffl;
r[26] = ((a[26] << n) | (a[25] >> (57 - n))) & 0x1ffffffffffffffl;
r[25] = ((a[25] << n) | (a[24] >> (57 - n))) & 0x1ffffffffffffffl;
r[24] = ((a[24] << n) | (a[23] >> (57 - n))) & 0x1ffffffffffffffl;
r[23] = ((a[23] << n) | (a[22] >> (57 - n))) & 0x1ffffffffffffffl;
r[22] = ((a[22] << n) | (a[21] >> (57 - n))) & 0x1ffffffffffffffl;
r[21] = ((a[21] << n) | (a[20] >> (57 - n))) & 0x1ffffffffffffffl;
r[20] = ((a[20] << n) | (a[19] >> (57 - n))) & 0x1ffffffffffffffl;
r[19] = ((a[19] << n) | (a[18] >> (57 - n))) & 0x1ffffffffffffffl;
r[18] = ((a[18] << n) | (a[17] >> (57 - n))) & 0x1ffffffffffffffl;
r[17] = ((a[17] << n) | (a[16] >> (57 - n))) & 0x1ffffffffffffffl;
r[16] = ((a[16] << n) | (a[15] >> (57 - n))) & 0x1ffffffffffffffl;
r[15] = ((a[15] << n) | (a[14] >> (57 - n))) & 0x1ffffffffffffffl;
r[14] = ((a[14] << n) | (a[13] >> (57 - n))) & 0x1ffffffffffffffl;
r[13] = ((a[13] << n) | (a[12] >> (57 - n))) & 0x1ffffffffffffffl;
r[12] = ((a[12] << n) | (a[11] >> (57 - n))) & 0x1ffffffffffffffl;
r[11] = ((a[11] << n) | (a[10] >> (57 - n))) & 0x1ffffffffffffffl;
r[10] = ((a[10] << n) | (a[9] >> (57 - n))) & 0x1ffffffffffffffl;
r[9] = ((a[9] << n) | (a[8] >> (57 - n))) & 0x1ffffffffffffffl;
r[8] = ((a[8] << n) | (a[7] >> (57 - n))) & 0x1ffffffffffffffl;
r[7] = ((a[7] << n) | (a[6] >> (57 - n))) & 0x1ffffffffffffffl;
r[6] = ((a[6] << n) | (a[5] >> (57 - n))) & 0x1ffffffffffffffl;
r[5] = ((a[5] << n) | (a[4] >> (57 - n))) & 0x1ffffffffffffffl;
r[4] = ((a[4] << n) | (a[3] >> (57 - n))) & 0x1ffffffffffffffl;
r[3] = ((a[3] << n) | (a[2] >> (57 - n))) & 0x1ffffffffffffffl;
r[2] = ((a[2] << n) | (a[1] >> (57 - n))) & 0x1ffffffffffffffl;
r[1] = ((a[1] << n) | (a[0] >> (57 - n))) & 0x1ffffffffffffffl;
#endif
r[0] = (a[0] << n) & 0x1ffffffffffffffl;
}
/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
*
* r A single precision number that is the result of the operation.
* e A single precision number that is the exponent.
* bits The number of bits in the exponent.
* m A single precision number that is the modulus.
* returns 0 on success and MEMORY_E on dynamic memory allocation failure.
*/
static int sp_2048_mod_exp_2_36(sp_digit* r, sp_digit* e, int bits, sp_digit* m)
{
#ifndef WOLFSSL_SMALL_STACK
sp_digit nd[72];
sp_digit td[37];
#else
sp_digit* td;
#endif
sp_digit* norm;
sp_digit* tmp;
sp_digit mp = 1;
sp_digit n, o;
int i;
int c, y;
int err = MP_OKAY;
#ifdef WOLFSSL_SMALL_STACK
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 109, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td == NULL)
err = MEMORY_E;
if (err == MP_OKAY) {
norm = td;
tmp = td + 72;
}
#else
norm = nd;
tmp = td;
#endif
if (err == MP_OKAY) {
XMEMSET(td, 0, sizeof(td));
sp_2048_mont_setup(m, &mp);
sp_2048_mont_norm_36(norm, m);
bits = ((bits + 4) / 5) * 5;
i = ((bits + 56) / 57) - 1;
c = bits % 57;
if (c == 0)
c = 57;
if (i < 36)
n = e[i--] << (64 - c);
else {
n = 0;
i--;
}
if (c < 5) {
n |= e[i--] << (7 - c);
c += 57;
}
y = (n >> 59) & 0x1f;
n <<= 5;
c -= 5;
sp_2048_lshift_36(r, norm, y);
for (; i>=0 || c>=5; ) {
if (c < 5) {
n |= e[i--] << (7 - c);
c += 57;
}
y = (n >> 59) & 0x1f;
n <<= 5;
c -= 5;
sp_2048_mont_sqr_36(r, r, m, mp);
sp_2048_mont_sqr_36(r, r, m, mp);
sp_2048_mont_sqr_36(r, r, m, mp);
sp_2048_mont_sqr_36(r, r, m, mp);
sp_2048_mont_sqr_36(r, r, m, mp);
sp_2048_lshift_36(r, r, y);
sp_2048_mul_d_36(tmp, norm, (r[36] << 4) + (r[35] >> 53));
r[36] = 0;
r[35] &= 0x1fffffffffffffL;
sp_2048_add_36(r, r, tmp);
sp_2048_norm_36(r);
o = sp_2048_cmp_36(r, m);
sp_2048_cond_sub_36(r, r, m, (o < 0) - 1);
}
sp_2048_mont_reduce_36(r, m, mp);
n = sp_2048_cmp_36(r, m);
sp_2048_cond_sub_36(r, r, m, (n < 0) - 1);
}
#ifdef WOLFSSL_SMALL_STACK
if (td != NULL)
XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
#endif /* HAVE_FFDHE_2048 */
/* Perform the modular exponentiation for Diffie-Hellman.
*
* base Base.
@ -3276,7 +3430,14 @@ int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
sp_2048_from_bin(e, 36, exp, expLen);
sp_2048_from_mp(m, 36, mod);
err = sp_2048_mod_exp_36(r, b, e, expLen * 8, m, 0);
#ifdef HAVE_FFDHE_2048
if (base->used == 1 && base->dp[0] == 2 &&
(m[35] >> 21) == 0xffffffffL) {
err = sp_2048_mod_exp_2_36(r, e, expLen * 8, m);
}
else
#endif
err = sp_2048_mod_exp_36(r, b, e, expLen * 8, m, 0);
}
if (err == MP_OKAY) {
@ -3335,7 +3496,14 @@ int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
sp_2048_from_bin(e, 36, exp, expLen);
sp_2048_from_mp(m, 36, mod);
err = sp_2048_mod_exp_36(r, b, e, expLen * 8, m, 0);
#ifdef HAVE_FFDHE_2048
if (base->used == 1 && base->dp[0] == 2 &&
(m[35] >> 21) == 0xffffffffL) {
err = sp_2048_mod_exp_2_36(r, e, expLen * 8, m);
}
else
#endif
err = sp_2048_mod_exp_36(r, b, e, expLen * 8, m, 0);
}
if (err == MP_OKAY) {
@ -6196,7 +6364,7 @@ static int sp_3072_mod_exp_54(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
* a A single precision integer.
* m Mask to AND against each digit.
*/
static void sp_3072_mask_27(sp_digit* r, sp_digit* a, sp_digit m)
static void sp_3072_mask_27(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
int i;
@ -6834,6 +7002,178 @@ int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
}
#ifdef WOLFSSL_HAVE_SP_DH
#ifdef HAVE_FFDHE_3072
SP_NOINLINE static void sp_3072_lshift_54(sp_digit* r, sp_digit* a, byte n)
{
#ifdef WOLFSSL_SP_SMALL
int i;
r[54] = a[53] >> (57 - n);
for (i=53; i>0; i--)
r[i] = ((a[i] << n) | (a[i-1] >> (57 - n)) & 0x1ffffffffffffffl;
#else
r[54] = a[53] >> (57 - n);
r[53] = ((a[53] << n) | (a[52] >> (57 - n))) & 0x1ffffffffffffffl;
r[52] = ((a[52] << n) | (a[51] >> (57 - n))) & 0x1ffffffffffffffl;
r[51] = ((a[51] << n) | (a[50] >> (57 - n))) & 0x1ffffffffffffffl;
r[50] = ((a[50] << n) | (a[49] >> (57 - n))) & 0x1ffffffffffffffl;
r[49] = ((a[49] << n) | (a[48] >> (57 - n))) & 0x1ffffffffffffffl;
r[48] = ((a[48] << n) | (a[47] >> (57 - n))) & 0x1ffffffffffffffl;
r[47] = ((a[47] << n) | (a[46] >> (57 - n))) & 0x1ffffffffffffffl;
r[46] = ((a[46] << n) | (a[45] >> (57 - n))) & 0x1ffffffffffffffl;
r[45] = ((a[45] << n) | (a[44] >> (57 - n))) & 0x1ffffffffffffffl;
r[44] = ((a[44] << n) | (a[43] >> (57 - n))) & 0x1ffffffffffffffl;
r[43] = ((a[43] << n) | (a[42] >> (57 - n))) & 0x1ffffffffffffffl;
r[42] = ((a[42] << n) | (a[41] >> (57 - n))) & 0x1ffffffffffffffl;
r[41] = ((a[41] << n) | (a[40] >> (57 - n))) & 0x1ffffffffffffffl;
r[40] = ((a[40] << n) | (a[39] >> (57 - n))) & 0x1ffffffffffffffl;
r[39] = ((a[39] << n) | (a[38] >> (57 - n))) & 0x1ffffffffffffffl;
r[38] = ((a[38] << n) | (a[37] >> (57 - n))) & 0x1ffffffffffffffl;
r[37] = ((a[37] << n) | (a[36] >> (57 - n))) & 0x1ffffffffffffffl;
r[36] = ((a[36] << n) | (a[35] >> (57 - n))) & 0x1ffffffffffffffl;
r[35] = ((a[35] << n) | (a[34] >> (57 - n))) & 0x1ffffffffffffffl;
r[34] = ((a[34] << n) | (a[33] >> (57 - n))) & 0x1ffffffffffffffl;
r[33] = ((a[33] << n) | (a[32] >> (57 - n))) & 0x1ffffffffffffffl;
r[32] = ((a[32] << n) | (a[31] >> (57 - n))) & 0x1ffffffffffffffl;
r[31] = ((a[31] << n) | (a[30] >> (57 - n))) & 0x1ffffffffffffffl;
r[30] = ((a[30] << n) | (a[29] >> (57 - n))) & 0x1ffffffffffffffl;
r[29] = ((a[29] << n) | (a[28] >> (57 - n))) & 0x1ffffffffffffffl;
r[28] = ((a[28] << n) | (a[27] >> (57 - n))) & 0x1ffffffffffffffl;
r[27] = ((a[27] << n) | (a[26] >> (57 - n))) & 0x1ffffffffffffffl;
r[26] = ((a[26] << n) | (a[25] >> (57 - n))) & 0x1ffffffffffffffl;
r[25] = ((a[25] << n) | (a[24] >> (57 - n))) & 0x1ffffffffffffffl;
r[24] = ((a[24] << n) | (a[23] >> (57 - n))) & 0x1ffffffffffffffl;
r[23] = ((a[23] << n) | (a[22] >> (57 - n))) & 0x1ffffffffffffffl;
r[22] = ((a[22] << n) | (a[21] >> (57 - n))) & 0x1ffffffffffffffl;
r[21] = ((a[21] << n) | (a[20] >> (57 - n))) & 0x1ffffffffffffffl;
r[20] = ((a[20] << n) | (a[19] >> (57 - n))) & 0x1ffffffffffffffl;
r[19] = ((a[19] << n) | (a[18] >> (57 - n))) & 0x1ffffffffffffffl;
r[18] = ((a[18] << n) | (a[17] >> (57 - n))) & 0x1ffffffffffffffl;
r[17] = ((a[17] << n) | (a[16] >> (57 - n))) & 0x1ffffffffffffffl;
r[16] = ((a[16] << n) | (a[15] >> (57 - n))) & 0x1ffffffffffffffl;
r[15] = ((a[15] << n) | (a[14] >> (57 - n))) & 0x1ffffffffffffffl;
r[14] = ((a[14] << n) | (a[13] >> (57 - n))) & 0x1ffffffffffffffl;
r[13] = ((a[13] << n) | (a[12] >> (57 - n))) & 0x1ffffffffffffffl;
r[12] = ((a[12] << n) | (a[11] >> (57 - n))) & 0x1ffffffffffffffl;
r[11] = ((a[11] << n) | (a[10] >> (57 - n))) & 0x1ffffffffffffffl;
r[10] = ((a[10] << n) | (a[9] >> (57 - n))) & 0x1ffffffffffffffl;
r[9] = ((a[9] << n) | (a[8] >> (57 - n))) & 0x1ffffffffffffffl;
r[8] = ((a[8] << n) | (a[7] >> (57 - n))) & 0x1ffffffffffffffl;
r[7] = ((a[7] << n) | (a[6] >> (57 - n))) & 0x1ffffffffffffffl;
r[6] = ((a[6] << n) | (a[5] >> (57 - n))) & 0x1ffffffffffffffl;
r[5] = ((a[5] << n) | (a[4] >> (57 - n))) & 0x1ffffffffffffffl;
r[4] = ((a[4] << n) | (a[3] >> (57 - n))) & 0x1ffffffffffffffl;
r[3] = ((a[3] << n) | (a[2] >> (57 - n))) & 0x1ffffffffffffffl;
r[2] = ((a[2] << n) | (a[1] >> (57 - n))) & 0x1ffffffffffffffl;
r[1] = ((a[1] << n) | (a[0] >> (57 - n))) & 0x1ffffffffffffffl;
#endif
r[0] = (a[0] << n) & 0x1ffffffffffffffl;
}
/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
*
* r A single precision number that is the result of the operation.
* e A single precision number that is the exponent.
* bits The number of bits in the exponent.
* m A single precision number that is the modulus.
* returns 0 on success and MEMORY_E on dynamic memory allocation failure.
*/
static int sp_3072_mod_exp_2_54(sp_digit* r, sp_digit* e, int bits, sp_digit* m)
{
#ifndef WOLFSSL_SMALL_STACK
sp_digit nd[108];
sp_digit td[55];
#else
sp_digit* td;
#endif
sp_digit* norm;
sp_digit* tmp;
sp_digit mp = 1;
sp_digit n, o;
int i;
int c, y;
int err = MP_OKAY;
#ifdef WOLFSSL_SMALL_STACK
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 163, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td == NULL)
err = MEMORY_E;
if (err == MP_OKAY) {
norm = td;
tmp = td + 108;
}
#else
norm = nd;
tmp = td;
#endif
if (err == MP_OKAY) {
XMEMSET(td, 0, sizeof(td));
sp_3072_mont_setup(m, &mp);
sp_3072_mont_norm_54(norm, m);
bits = ((bits + 4) / 5) * 5;
i = ((bits + 56) / 57) - 1;
c = bits % 57;
if (c == 0)
c = 57;
if (i < 54)
n = e[i--] << (64 - c);
else {
n = 0;
i--;
}
if (c < 5) {
n |= e[i--] << (7 - c);
c += 57;
}
y = (n >> 59) & 0x1f;
n <<= 5;
c -= 5;
sp_3072_lshift_54(r, norm, y);
for (; i>=0 || c>=5; ) {
if (c < 5) {
n |= e[i--] << (7 - c);
c += 57;
}
y = (n >> 59) & 0x1f;
n <<= 5;
c -= 5;
sp_3072_mont_sqr_54(r, r, m, mp);
sp_3072_mont_sqr_54(r, r, m, mp);
sp_3072_mont_sqr_54(r, r, m, mp);
sp_3072_mont_sqr_54(r, r, m, mp);
sp_3072_mont_sqr_54(r, r, m, mp);
sp_3072_lshift_54(r, r, y);
sp_3072_mul_d_54(tmp, norm, (r[54] << 6) + (r[53] >> 51));
r[54] = 0;
r[53] &= 0x7ffffffffffffL;
sp_3072_add_54(r, r, tmp);
sp_3072_norm_54(r);
o = sp_3072_cmp_54(r, m);
sp_3072_cond_sub_54(r, r, m, (o < 0) - 1);
}
sp_3072_mont_reduce_54(r, m, mp);
n = sp_3072_cmp_54(r, m);
sp_3072_cond_sub_54(r, r, m, (n < 0) - 1);
}
#ifdef WOLFSSL_SMALL_STACK
if (td != NULL)
XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
#endif /* HAVE_FFDHE_3072 */
/* Perform the modular exponentiation for Diffie-Hellman.
*
* base Base.
@ -6879,7 +7219,14 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
sp_3072_from_bin(e, 54, exp, expLen);
sp_3072_from_mp(m, 54, mod);
err = sp_3072_mod_exp_54(r, b, e, expLen * 8, m, 0);
#ifdef HAVE_FFDHE_3072
if (base->used == 1 && base->dp[0] == 2 &&
(m[53] >> 19) == 0xffffffffL) {
err = sp_3072_mod_exp_2_54(r, e, expLen * 8, m);
}
else
#endif
err = sp_3072_mod_exp_54(r, b, e, expLen * 8, m, 0);
}
if (err == MP_OKAY) {
@ -6938,7 +7285,14 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
sp_3072_from_bin(e, 54, exp, expLen);
sp_3072_from_mp(m, 54, mod);
err = sp_3072_mod_exp_54(r, b, e, expLen * 8, m, 0);
#ifdef HAVE_FFDHE_3072
if (base->used == 1 && base->dp[0] == 2 &&
(m[53] >> 19) == 0xffffffffL) {
err = sp_3072_mod_exp_2_54(r, e, expLen * 8, m);
}
else
#endif
err = sp_3072_mod_exp_54(r, b, e, expLen * 8, m, 0);
}
if (err == MP_OKAY) {

File diff suppressed because it is too large Load Diff

View File

@ -102,14 +102,14 @@ static void sp_2048_from_mp(sp_digit* r, int max, mp_int* a)
s = 64 - s;
if (j + 1 >= max)
break;
r[++j] = a->dp[i] >> s;
r[++j] = (sp_digit)(a->dp[i] >> s);
while (s + 64 <= DIGIT_BIT) {
s += 64;
r[j] &= 0xffffffffffffffffl;
if (j + 1 >= max)
break;
if (s < DIGIT_BIT)
r[++j] = a->dp[i] >> s;
r[++j] = (sp_digit)(a->dp[i] >> s);
else
r[++j] = 0;
}
@ -190,7 +190,7 @@ extern sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b
* a A single precision integer.
* m Mask to AND against each digit.
*/
static void sp_2048_mask_16(sp_digit* r, sp_digit* a, sp_digit m)
static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
int i;
@ -580,9 +580,12 @@ static int sp_2048_mod_exp_16(sp_digit* r, sp_digit* a, sp_digit* e,
i = (bits - 1) / 64;
n = e[i--];
y = n >> 59;
n <<= 5;
c = 59;
c = bits & 63;
if (c == 0)
c = 64;
c -= bits % 5;
y = n >> c;
n <<= 64 - c;
XMEMCPY(r, t[y], sizeof(sp_digit) * 16);
for (; i>=0 || c>=5; ) {
if (c == 0) {
@ -613,10 +616,6 @@ static int sp_2048_mod_exp_16(sp_digit* r, sp_digit* a, sp_digit* e,
sp_2048_mont_mul_16(r, r, t[y], m, mp);
}
y = e[0] & ((1 << c) - 1);
for (; c > 0; c--)
sp_2048_mont_sqr_16(r, r, m, mp);
sp_2048_mont_mul_16(r, r, t[y], m, mp);
XMEMSET(&r[16], 0, sizeof(sp_digit) * 16);
sp_2048_mont_reduce_16(r, m, mp);
@ -760,9 +759,12 @@ static int sp_2048_mod_exp_avx2_16(sp_digit* r, sp_digit* a, sp_digit* e,
i = (bits - 1) / 64;
n = e[i--];
y = n >> 59;
n <<= 5;
c = 59;
c = bits & 63;
if (c == 0)
c = 64;
c -= bits % 5;
y = n >> c;
n <<= 64 - c;
XMEMCPY(r, t[y], sizeof(sp_digit) * 16);
for (; i>=0 || c>=5; ) {
if (c == 0) {
@ -793,10 +795,6 @@ static int sp_2048_mod_exp_avx2_16(sp_digit* r, sp_digit* a, sp_digit* e,
sp_2048_mont_mul_avx2_16(r, r, t[y], m, mp);
}
y = e[0] & ((1 << c) - 1);
for (; c > 0; c--)
sp_2048_mont_sqr_avx2_16(r, r, m, mp);
sp_2048_mont_mul_avx2_16(r, r, t[y], m, mp);
XMEMSET(&r[16], 0, sizeof(sp_digit) * 16);
sp_2048_mont_reduce_avx2_16(r, m, mp);
@ -891,7 +889,7 @@ static WC_INLINE sp_digit div_2048_word_32(sp_digit d1, sp_digit d0,
* a A single precision integer.
* m Mask to AND against each digit.
*/
static void sp_2048_mask_32(sp_digit* r, sp_digit* a, sp_digit m)
static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
int i;
@ -1125,9 +1123,12 @@ static int sp_2048_mod_exp_32(sp_digit* r, sp_digit* a, sp_digit* e,
i = (bits - 1) / 64;
n = e[i--];
y = n >> 59;
n <<= 5;
c = 59;
c = bits & 63;
if (c == 0)
c = 64;
c -= bits % 5;
y = n >> c;
n <<= 64 - c;
XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
for (; i>=0 || c>=5; ) {
if (c == 0) {
@ -1158,10 +1159,6 @@ static int sp_2048_mod_exp_32(sp_digit* r, sp_digit* a, sp_digit* e,
sp_2048_mont_mul_32(r, r, t[y], m, mp);
}
y = e[0] & ((1 << c) - 1);
for (; c > 0; c--)
sp_2048_mont_sqr_32(r, r, m, mp);
sp_2048_mont_mul_32(r, r, t[y], m, mp);
XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
sp_2048_mont_reduce_32(r, m, mp);
@ -1307,9 +1304,12 @@ static int sp_2048_mod_exp_avx2_32(sp_digit* r, sp_digit* a, sp_digit* e,
i = (bits - 1) / 64;
n = e[i--];
y = n >> 59;
n <<= 5;
c = 59;
c = bits & 63;
if (c == 0)
c = 64;
c -= bits % 5;
y = n >> c;
n <<= 64 - c;
XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
for (; i>=0 || c>=5; ) {
if (c == 0) {
@ -1340,10 +1340,6 @@ static int sp_2048_mod_exp_avx2_32(sp_digit* r, sp_digit* a, sp_digit* e,
sp_2048_mont_mul_avx2_32(r, r, t[y], m, mp);
}
y = e[0] & ((1 << c) - 1);
for (; c > 0; c--)
sp_2048_mont_sqr_avx2_32(r, r, m, mp);
sp_2048_mont_mul_avx2_32(r, r, t[y], m, mp);
XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
sp_2048_mont_reduce_avx2_32(r, m, mp);
@ -1711,7 +1707,7 @@ static int sp_2048_to_mp(sp_digit* a, mp_int* r)
for (i = 0; i < 32; i++) {
r->dp[j] |= ((mp_digit)a[i]) << s;
if (s + 64 >= DIGIT_BIT) {
#if DIGIT_BIT < 64
#if DIGIT_BIT != 32 && DIGIT_BIT != 64
r->dp[j] &= (1l << DIGIT_BIT) - 1;
#endif
s = DIGIT_BIT - s;
@ -1775,6 +1771,220 @@ int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
return err;
}
#ifdef HAVE_FFDHE_2048
extern void sp_2048_lshift_32(sp_digit* r, const sp_digit* a, int n);
#ifdef HAVE_INTEL_AVX2
/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
*
* r A single precision number that is the result of the operation.
* e A single precision number that is the exponent.
* bits The number of bits in the exponent.
* m A single precision number that is the modulus.
* returns 0 on success and MEMORY_E on dynamic memory allocation failure.
*/
static int sp_2048_mod_exp_2_avx2_32(sp_digit* r, sp_digit* e, int bits,
sp_digit* m)
{
#ifndef WOLFSSL_SMALL_STACK
sp_digit nd[64];
sp_digit td[33];
#else
sp_digit* td;
#endif
sp_digit* norm;
sp_digit* tmp;
sp_digit mp = 1;
sp_digit n, o;
sp_digit mask;
int i;
int c, y;
int err = MP_OKAY;
#ifdef WOLFSSL_SMALL_STACK
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 97, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td == NULL)
err = MEMORY_E;
if (err == MP_OKAY) {
norm = td;
tmp = td + 64;
}
#else
norm = nd;
tmp = td;
#endif
if (err == MP_OKAY) {
sp_2048_mont_setup(m, &mp);
sp_2048_mont_norm_32(norm, m);
i = (bits - 1) / 64;
n = e[i--];
c = bits & 63;
if (c == 0)
c = 64;
c -= bits % 6;
y = n >> c;
n <<= 64 - c;
sp_2048_lshift_32(r, norm, y);
for (; i>=0 || c>=6; ) {
if (c == 0) {
n = e[i--];
y = n >> 58;
n <<= 6;
c = 58;
}
else if (c < 6) {
y = n >> 58;
n = e[i--];
c = 6 - c;
y |= n >> (64 - c);
n <<= c;
c = 64 - c;
}
else {
y = (n >> 58) & 0x3f;
n <<= 6;
c -= 6;
}
sp_2048_mont_sqr_avx2_32(r, r, m, mp);
sp_2048_mont_sqr_avx2_32(r, r, m, mp);
sp_2048_mont_sqr_avx2_32(r, r, m, mp);
sp_2048_mont_sqr_avx2_32(r, r, m, mp);
sp_2048_mont_sqr_avx2_32(r, r, m, mp);
sp_2048_mont_sqr_avx2_32(r, r, m, mp);
sp_2048_lshift_32(r, r, y);
sp_2048_mul_d_avx2_32(tmp, norm, r[32]);
r[32] = 0;
o = sp_2048_add_32(r, r, tmp);
sp_2048_cond_sub_32(r, r, m, (sp_digit)0 - o);
}
XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
sp_2048_mont_reduce_avx2_32(r, m, mp);
mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
sp_2048_cond_sub_32(r, r, m, mask);
}
#ifdef WOLFSSL_SMALL_STACK
if (td != NULL)
XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
#endif /* HAVE_INTEL_AVX2 */
/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
*
* r A single precision number that is the result of the operation.
* e A single precision number that is the exponent.
* bits The number of bits in the exponent.
* m A single precision number that is the modulus.
* returns 0 on success and MEMORY_E on dynamic memory allocation failure.
*/
static int sp_2048_mod_exp_2_32(sp_digit* r, sp_digit* e, int bits,
sp_digit* m)
{
#ifndef WOLFSSL_SMALL_STACK
sp_digit nd[64];
sp_digit td[33];
#else
sp_digit* td;
#endif
sp_digit* norm;
sp_digit* tmp;
sp_digit mp = 1;
sp_digit n, o;
sp_digit mask;
int i;
int c, y;
int err = MP_OKAY;
#ifdef WOLFSSL_SMALL_STACK
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 97, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td == NULL)
err = MEMORY_E;
if (err == MP_OKAY) {
norm = td;
tmp = td + 64;
}
#else
norm = nd;
tmp = td;
#endif
if (err == MP_OKAY) {
sp_2048_mont_setup(m, &mp);
sp_2048_mont_norm_32(norm, m);
i = (bits - 1) / 64;
n = e[i--];
c = bits & 63;
if (c == 0)
c = 64;
c -= bits % 6;
y = n >> c;
n <<= 64 - c;
sp_2048_lshift_32(r, norm, y);
for (; i>=0 || c>=6; ) {
if (c == 0) {
n = e[i--];
y = n >> 58;
n <<= 6;
c = 58;
}
else if (c < 6) {
y = n >> 58;
n = e[i--];
c = 6 - c;
y |= n >> (64 - c);
n <<= c;
c = 64 - c;
}
else {
y = (n >> 58) & 0x3f;
n <<= 6;
c -= 6;
}
sp_2048_mont_sqr_32(r, r, m, mp);
sp_2048_mont_sqr_32(r, r, m, mp);
sp_2048_mont_sqr_32(r, r, m, mp);
sp_2048_mont_sqr_32(r, r, m, mp);
sp_2048_mont_sqr_32(r, r, m, mp);
sp_2048_mont_sqr_32(r, r, m, mp);
sp_2048_lshift_32(r, r, y);
sp_2048_mul_d_32(tmp, norm, r[32]);
r[32] = 0;
o = sp_2048_add_32(r, r, tmp);
sp_2048_cond_sub_32(r, r, m, (sp_digit)0 - o);
}
XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
sp_2048_mont_reduce_32(r, m, mp);
mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
sp_2048_cond_sub_32(r, r, m, mask);
}
#ifdef WOLFSSL_SMALL_STACK
if (td != NULL)
XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
#endif /* HAVE_FFDHE_2048 */
/* Perform the modular exponentiation for Diffie-Hellman.
*
* base Base.
@ -1808,12 +2018,25 @@ int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
sp_2048_from_bin(e, 32, exp, expLen);
sp_2048_from_mp(m, 32, mod);
#ifdef HAVE_FFDHE_2048
if (base->used == 1 && base->dp[0] == 2 && m[31] == (sp_digit)-1) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_2048_mod_exp_avx2_32(r, b, e, expLen * 8, m, 0);
else
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_2048_mod_exp_2_avx2_32(r, e, expLen * 8, m);
else
#endif
err = sp_2048_mod_exp_32(r, b, e, expLen * 8, m, 0);
err = sp_2048_mod_exp_2_32(r, e, expLen * 8, m);
}
else
#endif
{
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_2048_mod_exp_avx2_32(r, b, e, expLen * 8, m, 0);
else
#endif
err = sp_2048_mod_exp_32(r, b, e, expLen * 8, m, 0);
}
}
if (err == MP_OKAY) {
@ -1934,14 +2157,14 @@ static void sp_3072_from_mp(sp_digit* r, int max, mp_int* a)
s = 64 - s;
if (j + 1 >= max)
break;
r[++j] = a->dp[i] >> s;
r[++j] = (sp_digit)(a->dp[i] >> s);
while (s + 64 <= DIGIT_BIT) {
s += 64;
r[j] &= 0xffffffffffffffffl;
if (j + 1 >= max)
break;
if (s < DIGIT_BIT)
r[++j] = a->dp[i] >> s;
r[++j] = (sp_digit)(a->dp[i] >> s);
else
r[++j] = 0;
}
@ -2022,7 +2245,7 @@ extern sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b
* a A single precision integer.
* m Mask to AND against each digit.
*/
static void sp_3072_mask_24(sp_digit* r, sp_digit* a, sp_digit m)
static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
int i;
@ -2412,9 +2635,12 @@ static int sp_3072_mod_exp_24(sp_digit* r, sp_digit* a, sp_digit* e,
i = (bits - 1) / 64;
n = e[i--];
y = n >> 59;
n <<= 5;
c = 59;
c = bits & 63;
if (c == 0)
c = 64;
c -= bits % 5;
y = n >> c;
n <<= 64 - c;
XMEMCPY(r, t[y], sizeof(sp_digit) * 24);
for (; i>=0 || c>=5; ) {
if (c == 0) {
@ -2445,10 +2671,6 @@ static int sp_3072_mod_exp_24(sp_digit* r, sp_digit* a, sp_digit* e,
sp_3072_mont_mul_24(r, r, t[y], m, mp);
}
y = e[0] & ((1 << c) - 1);
for (; c > 0; c--)
sp_3072_mont_sqr_24(r, r, m, mp);
sp_3072_mont_mul_24(r, r, t[y], m, mp);
XMEMSET(&r[24], 0, sizeof(sp_digit) * 24);
sp_3072_mont_reduce_24(r, m, mp);
@ -2592,9 +2814,12 @@ static int sp_3072_mod_exp_avx2_24(sp_digit* r, sp_digit* a, sp_digit* e,
i = (bits - 1) / 64;
n = e[i--];
y = n >> 59;
n <<= 5;
c = 59;
c = bits & 63;
if (c == 0)
c = 64;
c -= bits % 5;
y = n >> c;
n <<= 64 - c;
XMEMCPY(r, t[y], sizeof(sp_digit) * 24);
for (; i>=0 || c>=5; ) {
if (c == 0) {
@ -2625,10 +2850,6 @@ static int sp_3072_mod_exp_avx2_24(sp_digit* r, sp_digit* a, sp_digit* e,
sp_3072_mont_mul_avx2_24(r, r, t[y], m, mp);
}
y = e[0] & ((1 << c) - 1);
for (; c > 0; c--)
sp_3072_mont_sqr_avx2_24(r, r, m, mp);
sp_3072_mont_mul_avx2_24(r, r, t[y], m, mp);
XMEMSET(&r[24], 0, sizeof(sp_digit) * 24);
sp_3072_mont_reduce_avx2_24(r, m, mp);
@ -2723,7 +2944,7 @@ static WC_INLINE sp_digit div_3072_word_48(sp_digit d1, sp_digit d0,
* a A single precision integer.
* m Mask to AND against each digit.
*/
static void sp_3072_mask_48(sp_digit* r, sp_digit* a, sp_digit m)
static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
int i;
@ -2957,9 +3178,12 @@ static int sp_3072_mod_exp_48(sp_digit* r, sp_digit* a, sp_digit* e,
i = (bits - 1) / 64;
n = e[i--];
y = n >> 59;
n <<= 5;
c = 59;
c = bits & 63;
if (c == 0)
c = 64;
c -= bits % 5;
y = n >> c;
n <<= 64 - c;
XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
for (; i>=0 || c>=5; ) {
if (c == 0) {
@ -2990,10 +3214,6 @@ static int sp_3072_mod_exp_48(sp_digit* r, sp_digit* a, sp_digit* e,
sp_3072_mont_mul_48(r, r, t[y], m, mp);
}
y = e[0] & ((1 << c) - 1);
for (; c > 0; c--)
sp_3072_mont_sqr_48(r, r, m, mp);
sp_3072_mont_mul_48(r, r, t[y], m, mp);
XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
sp_3072_mont_reduce_48(r, m, mp);
@ -3139,9 +3359,12 @@ static int sp_3072_mod_exp_avx2_48(sp_digit* r, sp_digit* a, sp_digit* e,
i = (bits - 1) / 64;
n = e[i--];
y = n >> 59;
n <<= 5;
c = 59;
c = bits & 63;
if (c == 0)
c = 64;
c -= bits % 5;
y = n >> c;
n <<= 64 - c;
XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
for (; i>=0 || c>=5; ) {
if (c == 0) {
@ -3172,10 +3395,6 @@ static int sp_3072_mod_exp_avx2_48(sp_digit* r, sp_digit* a, sp_digit* e,
sp_3072_mont_mul_avx2_48(r, r, t[y], m, mp);
}
y = e[0] & ((1 << c) - 1);
for (; c > 0; c--)
sp_3072_mont_sqr_avx2_48(r, r, m, mp);
sp_3072_mont_mul_avx2_48(r, r, t[y], m, mp);
XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
sp_3072_mont_reduce_avx2_48(r, m, mp);
@ -3543,7 +3762,7 @@ static int sp_3072_to_mp(sp_digit* a, mp_int* r)
for (i = 0; i < 48; i++) {
r->dp[j] |= ((mp_digit)a[i]) << s;
if (s + 64 >= DIGIT_BIT) {
#if DIGIT_BIT < 64
#if DIGIT_BIT != 32 && DIGIT_BIT != 64
r->dp[j] &= (1l << DIGIT_BIT) - 1;
#endif
s = DIGIT_BIT - s;
@ -3607,6 +3826,220 @@ int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
return err;
}
#ifdef HAVE_FFDHE_3072
extern void sp_3072_lshift_48(sp_digit* r, const sp_digit* a, int n);
#ifdef HAVE_INTEL_AVX2
/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
*
* r A single precision number that is the result of the operation.
* e A single precision number that is the exponent.
* bits The number of bits in the exponent.
* m A single precision number that is the modulus.
* returns 0 on success and MEMORY_E on dynamic memory allocation failure.
*/
static int sp_3072_mod_exp_2_avx2_48(sp_digit* r, sp_digit* e, int bits,
sp_digit* m)
{
#ifndef WOLFSSL_SMALL_STACK
sp_digit nd[96];
sp_digit td[49];
#else
sp_digit* td;
#endif
sp_digit* norm;
sp_digit* tmp;
sp_digit mp = 1;
sp_digit n, o;
sp_digit mask;
int i;
int c, y;
int err = MP_OKAY;
#ifdef WOLFSSL_SMALL_STACK
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 145, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td == NULL)
err = MEMORY_E;
if (err == MP_OKAY) {
norm = td;
tmp = td + 96;
}
#else
norm = nd;
tmp = td;
#endif
if (err == MP_OKAY) {
sp_3072_mont_setup(m, &mp);
sp_3072_mont_norm_48(norm, m);
i = (bits - 1) / 64;
n = e[i--];
c = bits & 63;
if (c == 0)
c = 64;
c -= bits % 6;
y = n >> c;
n <<= 64 - c;
sp_3072_lshift_48(r, norm, y);
for (; i>=0 || c>=6; ) {
if (c == 0) {
n = e[i--];
y = n >> 58;
n <<= 6;
c = 58;
}
else if (c < 6) {
y = n >> 58;
n = e[i--];
c = 6 - c;
y |= n >> (64 - c);
n <<= c;
c = 64 - c;
}
else {
y = (n >> 58) & 0x3f;
n <<= 6;
c -= 6;
}
sp_3072_mont_sqr_avx2_48(r, r, m, mp);
sp_3072_mont_sqr_avx2_48(r, r, m, mp);
sp_3072_mont_sqr_avx2_48(r, r, m, mp);
sp_3072_mont_sqr_avx2_48(r, r, m, mp);
sp_3072_mont_sqr_avx2_48(r, r, m, mp);
sp_3072_mont_sqr_avx2_48(r, r, m, mp);
sp_3072_lshift_48(r, r, y);
sp_3072_mul_d_avx2_48(tmp, norm, r[48]);
r[48] = 0;
o = sp_3072_add_48(r, r, tmp);
sp_3072_cond_sub_48(r, r, m, (sp_digit)0 - o);
}
XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
sp_3072_mont_reduce_avx2_48(r, m, mp);
mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
sp_3072_cond_sub_48(r, r, m, mask);
}
#ifdef WOLFSSL_SMALL_STACK
if (td != NULL)
XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
#endif /* HAVE_INTEL_AVX2 */
/* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
*
* r A single precision number that is the result of the operation.
* e A single precision number that is the exponent.
* bits The number of bits in the exponent.
* m A single precision number that is the modulus.
* returns 0 on success and MEMORY_E on dynamic memory allocation failure.
*/
static int sp_3072_mod_exp_2_48(sp_digit* r, sp_digit* e, int bits,
sp_digit* m)
{
#ifndef WOLFSSL_SMALL_STACK
sp_digit nd[96];
sp_digit td[49];
#else
sp_digit* td;
#endif
sp_digit* norm;
sp_digit* tmp;
sp_digit mp = 1;
sp_digit n, o;
sp_digit mask;
int i;
int c, y;
int err = MP_OKAY;
#ifdef WOLFSSL_SMALL_STACK
td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 145, NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td == NULL)
err = MEMORY_E;
if (err == MP_OKAY) {
norm = td;
tmp = td + 96;
}
#else
norm = nd;
tmp = td;
#endif
if (err == MP_OKAY) {
sp_3072_mont_setup(m, &mp);
sp_3072_mont_norm_48(norm, m);
i = (bits - 1) / 64;
n = e[i--];
c = bits & 63;
if (c == 0)
c = 64;
c -= bits % 6;
y = n >> c;
n <<= 64 - c;
sp_3072_lshift_48(r, norm, y);
for (; i>=0 || c>=6; ) {
if (c == 0) {
n = e[i--];
y = n >> 58;
n <<= 6;
c = 58;
}
else if (c < 6) {
y = n >> 58;
n = e[i--];
c = 6 - c;
y |= n >> (64 - c);
n <<= c;
c = 64 - c;
}
else {
y = (n >> 58) & 0x3f;
n <<= 6;
c -= 6;
}
sp_3072_mont_sqr_48(r, r, m, mp);
sp_3072_mont_sqr_48(r, r, m, mp);
sp_3072_mont_sqr_48(r, r, m, mp);
sp_3072_mont_sqr_48(r, r, m, mp);
sp_3072_mont_sqr_48(r, r, m, mp);
sp_3072_mont_sqr_48(r, r, m, mp);
sp_3072_lshift_48(r, r, y);
sp_3072_mul_d_48(tmp, norm, r[48]);
r[48] = 0;
o = sp_3072_add_48(r, r, tmp);
sp_3072_cond_sub_48(r, r, m, (sp_digit)0 - o);
}
XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
sp_3072_mont_reduce_48(r, m, mp);
mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
sp_3072_cond_sub_48(r, r, m, mask);
}
#ifdef WOLFSSL_SMALL_STACK
if (td != NULL)
XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
#endif /* HAVE_FFDHE_3072 */
/* Perform the modular exponentiation for Diffie-Hellman.
*
* base Base.
@ -3640,12 +4073,25 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
sp_3072_from_bin(e, 48, exp, expLen);
sp_3072_from_mp(m, 48, mod);
#ifdef HAVE_FFDHE_3072
if (base->used == 1 && base->dp[0] == 2 && m[47] == (sp_digit)-1) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_3072_mod_exp_avx2_48(r, b, e, expLen * 8, m, 0);
else
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_3072_mod_exp_2_avx2_48(r, e, expLen * 8, m);
else
#endif
err = sp_3072_mod_exp_48(r, b, e, expLen * 8, m, 0);
err = sp_3072_mod_exp_2_48(r, e, expLen * 8, m);
}
else
#endif
{
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_3072_mod_exp_avx2_48(r, b, e, expLen * 8, m, 0);
else
#endif
err = sp_3072_mod_exp_48(r, b, e, expLen * 8, m, 0);
}
}
if (err == MP_OKAY) {
@ -3911,14 +4357,14 @@ static void sp_256_from_mp(sp_digit* r, int max, mp_int* a)
s = 64 - s;
if (j + 1 >= max)
break;
r[++j] = a->dp[i] >> s;
r[++j] = (sp_digit)(a->dp[i] >> s);
while (s + 64 <= DIGIT_BIT) {
s += 64;
r[j] &= 0xffffffffffffffffl;
if (j + 1 >= max)
break;
if (s < DIGIT_BIT)
r[++j] = a->dp[i] >> s;
r[++j] = (sp_digit)(a->dp[i] >> s);
else
r[++j] = 0;
}
@ -4012,7 +4458,7 @@ static int sp_256_to_mp(sp_digit* a, mp_int* r)
for (i = 0; i < 4; i++) {
r->dp[j] |= ((mp_digit)a[i]) << s;
if (s + 64 >= DIGIT_BIT) {
#if DIGIT_BIT < 64
#if DIGIT_BIT != 32 && DIGIT_BIT != 64
r->dp[j] &= (1l << DIGIT_BIT) - 1;
#endif
s = DIGIT_BIT - s;
@ -19921,7 +20367,7 @@ static WC_INLINE sp_digit div_256_word_4(sp_digit d1, sp_digit d0,
* a A single precision integer.
* m Mask to AND against each digit.
*/
static void sp_256_mask_4(sp_digit* r, sp_digit* a, sp_digit m)
static void sp_256_mask_4(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
int i;

View File

@ -8474,6 +8474,123 @@ L_mont_loop_avx2_32:
.size sp_2048_mont_reduce_avx2_32,.-sp_2048_mont_reduce_avx2_32
#endif /* __APPLE__ */
#endif /* HAVE_INTEL_AVX2 */
/* Shift number left by n bit. (r = a << n)
*
* r Result of left shift by n.
* a Number to shift.
* n Amoutnt o shift.
*/
#ifndef __APPLE__
.globl sp_2048_lshift_32
.type sp_2048_lshift_32,@function
.align 16
sp_2048_lshift_32:
#else
.globl _sp_2048_lshift_32
.p2align 4
_sp_2048_lshift_32:
#endif /* __APPLE__ */
movq %rdx, %rcx
movq $0, %r10
movq 216(%rsi), %r11
movq 224(%rsi), %rdx
movq 232(%rsi), %rax
movq 240(%rsi), %r8
movq 248(%rsi), %r9
shldq %cl, %r9, %r10
shldq %cl, %r8, %r9
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r11, %rdx
movq %rdx, 224(%rdi)
movq %rax, 232(%rdi)
movq %r8, 240(%rdi)
movq %r9, 248(%rdi)
movq %r10, 256(%rdi)
movq 184(%rsi), %r9
movq 192(%rsi), %rdx
movq 200(%rsi), %rax
movq 208(%rsi), %r8
shldq %cl, %r8, %r11
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r9, %rdx
movq %rdx, 192(%rdi)
movq %rax, 200(%rdi)
movq %r8, 208(%rdi)
movq %r11, 216(%rdi)
movq 152(%rsi), %r11
movq 160(%rsi), %rdx
movq 168(%rsi), %rax
movq 176(%rsi), %r8
shldq %cl, %r8, %r9
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r11, %rdx
movq %rdx, 160(%rdi)
movq %rax, 168(%rdi)
movq %r8, 176(%rdi)
movq %r9, 184(%rdi)
movq 120(%rsi), %r9
movq 128(%rsi), %rdx
movq 136(%rsi), %rax
movq 144(%rsi), %r8
shldq %cl, %r8, %r11
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r9, %rdx
movq %rdx, 128(%rdi)
movq %rax, 136(%rdi)
movq %r8, 144(%rdi)
movq %r11, 152(%rdi)
movq 88(%rsi), %r11
movq 96(%rsi), %rdx
movq 104(%rsi), %rax
movq 112(%rsi), %r8
shldq %cl, %r8, %r9
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r11, %rdx
movq %rdx, 96(%rdi)
movq %rax, 104(%rdi)
movq %r8, 112(%rdi)
movq %r9, 120(%rdi)
movq 56(%rsi), %r9
movq 64(%rsi), %rdx
movq 72(%rsi), %rax
movq 80(%rsi), %r8
shldq %cl, %r8, %r11
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r9, %rdx
movq %rdx, 64(%rdi)
movq %rax, 72(%rdi)
movq %r8, 80(%rdi)
movq %r11, 88(%rdi)
movq 24(%rsi), %r11
movq 32(%rsi), %rdx
movq 40(%rsi), %rax
movq 48(%rsi), %r8
shldq %cl, %r8, %r9
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r11, %rdx
movq %rdx, 32(%rdi)
movq %rax, 40(%rdi)
movq %r8, 48(%rdi)
movq %r9, 56(%rdi)
movq (%rsi), %rdx
movq 8(%rsi), %rax
movq 16(%rsi), %r8
shldq %cl, %r8, %r11
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shlq %cl, %rdx
movq %rdx, (%rdi)
movq %rax, 8(%rdi)
movq %r8, 16(%rdi)
movq %r11, 24(%rdi)
repz retq
/* Multiply a and b into r. (r = a * b)
*
* r A single precision integer.
@ -24330,6 +24447,171 @@ L_mont_loop_avx2_48:
.size sp_3072_mont_reduce_avx2_48,.-sp_3072_mont_reduce_avx2_48
#endif /* __APPLE__ */
#endif /* HAVE_INTEL_AVX2 */
/* Shift number left by n bit. (r = a << n)
*
* r Result of left shift by n.
* a Number to shift.
* n Amoutnt o shift.
*/
#ifndef __APPLE__
.globl sp_3072_lshift_48
.type sp_3072_lshift_48,@function
.align 16
sp_3072_lshift_48:
#else
.globl _sp_3072_lshift_48
.p2align 4
_sp_3072_lshift_48:
#endif /* __APPLE__ */
movq %rdx, %rcx
movq $0, %r10
movq 344(%rsi), %r11
movq 352(%rsi), %rdx
movq 360(%rsi), %rax
movq 368(%rsi), %r8
movq 376(%rsi), %r9
shldq %cl, %r9, %r10
shldq %cl, %r8, %r9
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r11, %rdx
movq %rdx, 352(%rdi)
movq %rax, 360(%rdi)
movq %r8, 368(%rdi)
movq %r9, 376(%rdi)
movq %r10, 384(%rdi)
movq 312(%rsi), %r9
movq 320(%rsi), %rdx
movq 328(%rsi), %rax
movq 336(%rsi), %r8
shldq %cl, %r8, %r11
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r9, %rdx
movq %rdx, 320(%rdi)
movq %rax, 328(%rdi)
movq %r8, 336(%rdi)
movq %r11, 344(%rdi)
movq 280(%rsi), %r11
movq 288(%rsi), %rdx
movq 296(%rsi), %rax
movq 304(%rsi), %r8
shldq %cl, %r8, %r9
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r11, %rdx
movq %rdx, 288(%rdi)
movq %rax, 296(%rdi)
movq %r8, 304(%rdi)
movq %r9, 312(%rdi)
movq 248(%rsi), %r9
movq 256(%rsi), %rdx
movq 264(%rsi), %rax
movq 272(%rsi), %r8
shldq %cl, %r8, %r11
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r9, %rdx
movq %rdx, 256(%rdi)
movq %rax, 264(%rdi)
movq %r8, 272(%rdi)
movq %r11, 280(%rdi)
movq 216(%rsi), %r11
movq 224(%rsi), %rdx
movq 232(%rsi), %rax
movq 240(%rsi), %r8
shldq %cl, %r8, %r9
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r11, %rdx
movq %rdx, 224(%rdi)
movq %rax, 232(%rdi)
movq %r8, 240(%rdi)
movq %r9, 248(%rdi)
movq 184(%rsi), %r9
movq 192(%rsi), %rdx
movq 200(%rsi), %rax
movq 208(%rsi), %r8
shldq %cl, %r8, %r11
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r9, %rdx
movq %rdx, 192(%rdi)
movq %rax, 200(%rdi)
movq %r8, 208(%rdi)
movq %r11, 216(%rdi)
movq 152(%rsi), %r11
movq 160(%rsi), %rdx
movq 168(%rsi), %rax
movq 176(%rsi), %r8
shldq %cl, %r8, %r9
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r11, %rdx
movq %rdx, 160(%rdi)
movq %rax, 168(%rdi)
movq %r8, 176(%rdi)
movq %r9, 184(%rdi)
movq 120(%rsi), %r9
movq 128(%rsi), %rdx
movq 136(%rsi), %rax
movq 144(%rsi), %r8
shldq %cl, %r8, %r11
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r9, %rdx
movq %rdx, 128(%rdi)
movq %rax, 136(%rdi)
movq %r8, 144(%rdi)
movq %r11, 152(%rdi)
movq 88(%rsi), %r11
movq 96(%rsi), %rdx
movq 104(%rsi), %rax
movq 112(%rsi), %r8
shldq %cl, %r8, %r9
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r11, %rdx
movq %rdx, 96(%rdi)
movq %rax, 104(%rdi)
movq %r8, 112(%rdi)
movq %r9, 120(%rdi)
movq 56(%rsi), %r9
movq 64(%rsi), %rdx
movq 72(%rsi), %rax
movq 80(%rsi), %r8
shldq %cl, %r8, %r11
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r9, %rdx
movq %rdx, 64(%rdi)
movq %rax, 72(%rdi)
movq %r8, 80(%rdi)
movq %r11, 88(%rdi)
movq 24(%rsi), %r11
movq 32(%rsi), %rdx
movq 40(%rsi), %rax
movq 48(%rsi), %r8
shldq %cl, %r8, %r9
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shldq %cl, %r11, %rdx
movq %rdx, 32(%rdi)
movq %rax, 40(%rdi)
movq %r8, 48(%rdi)
movq %r9, 56(%rdi)
movq (%rsi), %rdx
movq 8(%rsi), %rax
movq 16(%rsi), %r8
shldq %cl, %r8, %r11
shldq %cl, %rax, %r8
shldq %cl, %rdx, %rax
shlq %cl, %rdx
movq %rdx, (%rdi)
movq %rax, 8(%rdi)
movq %r8, 16(%rdi)
movq %r11, 24(%rdi)
repz retq
/* Conditionally copy a into r using the mask m.
* m is -1 to copy and 0 when not.
*

View File

@ -12910,6 +12910,79 @@ static int dh_test_check_pubvalue(void)
}
#endif
#if defined(WOLFSSL_HAVE_SP_DH) && defined(HAVE_FFDHE)
#ifdef HAVE_FFDHE_3072
#define FFDHE_KEY_SIZE (3072/8)
#else
#define FFDHE_KEY_SIZE (2048/8)
#endif
static int dh_test_ffdhe(WC_RNG *rng, const DhParams* params)
{
int ret;
word32 privSz, pubSz, privSz2, pubSz2;
byte priv[FFDHE_KEY_SIZE];
byte pub[FFDHE_KEY_SIZE];
byte priv2[FFDHE_KEY_SIZE];
byte pub2[FFDHE_KEY_SIZE];
byte agree[FFDHE_KEY_SIZE];
byte agree2[FFDHE_KEY_SIZE];
word32 agreeSz = (word32)sizeof(agree);
word32 agreeSz2 = (word32)sizeof(agree2);
DhKey key;
DhKey key2;
ret = wc_InitDhKey_ex(&key, HEAP_HINT, devId);
if (ret != 0) {
ERROR_OUT(-7180, done);
}
ret = wc_InitDhKey_ex(&key2, HEAP_HINT, devId);
if (ret != 0) {
ERROR_OUT(-7181, done);
}
ret = wc_DhSetKey(&key, params->p, params->p_len, params->g, params->g_len);
if (ret != 0) {
ERROR_OUT(-7182, done);
}
ret = wc_DhSetKey(&key2, params->p, params->p_len, params->g,
params->g_len);
if (ret != 0) {
ERROR_OUT(-7183, done);
}
ret = wc_DhGenerateKeyPair(&key, rng, priv, &privSz, pub, &pubSz);
if (ret != 0) {
ERROR_OUT(-7184, done);
}
ret = wc_DhGenerateKeyPair(&key2, rng, priv2, &privSz2, pub2, &pubSz2);
if (ret != 0) {
ERROR_OUT(-7185, done);
}
ret = wc_DhAgree(&key, agree, &agreeSz, priv, privSz, pub2, pubSz2);
if (ret != 0) {
ERROR_OUT(-7186, done);
}
ret = wc_DhAgree(&key2, agree2, &agreeSz2, priv2, privSz2, pub, pubSz);
if (ret != 0) {
ERROR_OUT(-7187, done);
}
if (agreeSz != agreeSz2 || XMEMCMP(agree, agree2, agreeSz)) {
ERROR_OUT(-7188, done);
}
done:
return ret;
}
#endif /* WOLFSSL_HAVE_SP_DH && HAVE_FFDHE */
int dh_test(void)
{
int ret;
@ -13062,6 +13135,17 @@ int dh_test(void)
ret = dh_test_check_pubvalue();
#endif
#ifdef WOLFSSL_HAVE_SP_DH
/* Specialized code for key gen when using FFDHE-2048 and FFDHE-3072. */
#ifdef HAVE_FFDHE_2048
if (ret == 0)
ret = dh_test_ffdhe(&rng, wc_Dh_ffdhe2048_Get());
#endif
#ifdef HAVE_FFDHE_3072
if (ret == 0)
ret = dh_test_ffdhe(&rng, wc_Dh_ffdhe3072_Get());
#endif
#endif /* WOLFSSL_HAVE_SP_DH */
wc_FreeDhKey(&key);
keyInit = 0;