Single Precision: ARM Thumb assembly implementation

Remove AVX2 code from platform specific code that doesn't support it.
Fix sp_lshd to memmove correct amount.
pull/1905/head
Sean Parkinson 2018-09-25 09:10:45 +10:00
parent fb699acec4
commit 741301bb2c
10 changed files with 16397 additions and 1566 deletions

View File

@ -3722,8 +3722,13 @@ if test "$ENABLED_SP_ASM" = "yes"; then
ENABLED_SP_ARM64_ASM=yes
;;
*arm*)
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM"
ENABLED_SP_ARM32_ASM=yes
if test $host_alias = "thumb"; then
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM_THUMB_ASM -mthumb -march=armv6"
ENABLED_SP_ARM_THUMB_ASM=yes
else
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM"
ENABLED_SP_ARM32_ASM=yes
fi
;;
*x86_64*)
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_X86_64_ASM"
@ -3771,6 +3776,7 @@ AM_CONDITIONAL([BUILD_SP], [test "x$ENABLED_SP" = "xyes"])
AM_CONDITIONAL([BUILD_SP_C], [test "x$ENABLED_SP" = "xyes" && test "x$ENABLED_SP_ASM" = "xno" ])
AM_CONDITIONAL([BUILD_SP_ARM64], [test "x$ENABLED_SP_ARM64_ASM" = "xyes" ])
AM_CONDITIONAL([BUILD_SP_ARM32], [test "x$ENABLED_SP_ARM32_ASM" = "xyes" ])
AM_CONDITIONAL([BUILD_SP_ARM_THUMB], [test "x$ENABLED_SP_ARM_THUMB_ASM" = "xyes" ])
AM_CONDITIONAL([BUILD_SP_X86_64], [test "x$ENABLED_SP_X86_64_ASM" = "xyes" ])
AM_CONDITIONAL([BUILD_SP_INT], [test "x$ENABLED_SP_MATH" = "xyes" ])

View File

@ -183,6 +183,9 @@ endif
if BUILD_SP_ARM32
src_libwolfssl_la_SOURCES += wolfcrypt/src/sp_arm32.c
endif
if BUILD_SP_ARM_THUMB
src_libwolfssl_la_SOURCES += wolfcrypt/src/sp_armthumb.c
endif
if BUILD_SP_ARM64
src_libwolfssl_la_SOURCES += wolfcrypt/src/sp_arm64.c
endif

File diff suppressed because it is too large Load Diff

View File

@ -2679,7 +2679,7 @@ static WC_INLINE int sp_2048_div_16(sp_digit* a, sp_digit* d, sp_digit* m,
}
r1 = sp_2048_cmp_16(t1, d) >= 0;
sp_2048_cond_sub_16(r, t1, t2, (sp_digit)0 - r1);
sp_2048_cond_sub_16(r, t1, d, (sp_digit)0 - r1);
return MP_OKAY;
}
@ -4323,7 +4323,7 @@ static WC_INLINE int sp_2048_div_32(sp_digit* a, sp_digit* d, sp_digit* m,
}
r1 = sp_2048_cmp_32(t1, d) >= 0;
sp_2048_cond_sub_32(r, t1, t2, (sp_digit)0 - r1);
sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
return MP_OKAY;
}
@ -4374,7 +4374,7 @@ static WC_INLINE int sp_2048_div_32_cond(sp_digit* a, sp_digit* d, sp_digit* m,
}
r1 = sp_2048_cmp_32(t1, d) >= 0;
sp_2048_cond_sub_32(r, t1, t2, (sp_digit)0 - r1);
sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
return MP_OKAY;
}
@ -9158,7 +9158,7 @@ static WC_INLINE int sp_3072_div_24(sp_digit* a, sp_digit* d, sp_digit* m,
}
r1 = sp_3072_cmp_24(t1, d) >= 0;
sp_3072_cond_sub_24(r, t1, t2, (sp_digit)0 - r1);
sp_3072_cond_sub_24(r, t1, d, (sp_digit)0 - r1);
return MP_OKAY;
}
@ -11314,7 +11314,7 @@ static WC_INLINE int sp_3072_div_48(sp_digit* a, sp_digit* d, sp_digit* m,
}
r1 = sp_3072_cmp_48(t1, d) >= 0;
sp_3072_cond_sub_48(r, t1, t2, (sp_digit)0 - r1);
sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
return MP_OKAY;
}
@ -11365,7 +11365,7 @@ static WC_INLINE int sp_3072_div_48_cond(sp_digit* a, sp_digit* d, sp_digit* m,
}
r1 = sp_3072_cmp_48(t1, d) >= 0;
sp_3072_cond_sub_48(r, t1, t2, (sp_digit)0 - r1);
sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
return MP_OKAY;
}
@ -12588,6 +12588,8 @@ static sp_digit sp_256_sub_4(sp_digit* r, const sp_digit* a,
return c;
}
#define sp_256_mont_reduce_order_4 sp_256_mont_reduce_4
/* Reduce the number back to 256 bits using Montgomery reduction.
*
* a A single precision number to reduce in place.
@ -14392,9 +14394,6 @@ int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
sp_point* point;
sp_digit* k = NULL;
int err = MP_OKAY;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(heap, p, point);
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
@ -14410,11 +14409,6 @@ int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
sp_256_from_mp(k, 4, km);
sp_256_point_from_ecc_point_4(point, gm);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_avx2_4(point, point, k, map, heap);
else
#endif
err = sp_256_ecc_mulmod_4(point, point, k, map, heap);
}
if (err == MP_OKAY)
@ -27651,8 +27645,10 @@ static int sp_256_ecc_mulmod_base_4(sp_point* r, sp_digit* k, int map,
}
i = 32;
XMEMCPY(t[v[i].mul].x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
XMEMCPY(t[v[i].mul].y, p256_table[i][v[i].i].y, sizeof(p256_table[i]->y));
XMEMCPY(t[v[i].mul].x, p256_table[i][v[i].i].x,
sizeof(p256_table[i]->x));
XMEMCPY(t[v[i].mul].y, p256_table[i][v[i].i].y,
sizeof(p256_table[i]->y));
t[v[i].mul].infinity = p256_table[i][v[i].i].infinity;
for (--i; i>=0; i--) {
XMEMCPY(p->x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
@ -27660,7 +27656,8 @@ static int sp_256_ecc_mulmod_base_4(sp_point* r, sp_digit* k, int map,
p->infinity = p256_table[i][v[i].i].infinity;
sp_256_sub_4(negy, p256_mod, p->y);
sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg);
sp_256_proj_point_add_qz1_4(&t[v[i].mul], &t[v[i].mul], p, tmp);
sp_256_proj_point_add_qz1_4(&t[v[i].mul], &t[v[i].mul], p,
tmp);
}
sp_256_proj_point_add_4(&t[2], &t[2], &t[3], tmp);
sp_256_proj_point_add_4(&t[1], &t[1], &t[3], tmp);
@ -27710,9 +27707,6 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
sp_point* point;
sp_digit* k = NULL;
int err = MP_OKAY;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(heap, p, point);
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
@ -27727,11 +27721,6 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
if (err == MP_OKAY) {
sp_256_from_mp(k, 4, km);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_base_avx2_4(point, k, map, heap);
else
#endif
err = sp_256_ecc_mulmod_base_4(point, k, map, heap);
}
if (err == MP_OKAY)
@ -27761,7 +27750,6 @@ static int sp_256_iszero_4(const sp_digit* a)
#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN */
/* Add 1 to a. (a = a + 1)
*
* r A single precision integer.
* a A single precision integer.
*/
static void sp_256_add_one_4(sp_digit* a)
@ -27861,9 +27849,6 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
sp_point* infinity;
#endif
int err;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
(void)heap;
@ -27885,23 +27870,11 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
if (err == MP_OKAY)
err = sp_256_ecc_gen_k_4(rng, k);
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_base_avx2_4(point, k, 1, NULL);
else
#endif
err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL);
}
#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
err = sp_256_ecc_mulmod_avx2_4(infinity, point, p256_order, 1,
NULL);
}
else
#endif
err = sp_256_ecc_mulmod_4(infinity, point, p256_order, 1, NULL);
}
if (err == MP_OKAY) {
@ -27980,9 +27953,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
sp_point* point = NULL;
sp_digit* k = NULL;
int err = MP_OKAY;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
if (*outLen < 32)
err = BUFFER_E;
@ -28002,11 +27972,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
if (err == MP_OKAY) {
sp_256_from_mp(k, 4, priv);
sp_256_point_from_ecc_point_4(point, pub);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_avx2_4(point, point, k, 1, heap);
else
#endif
err = sp_256_ecc_mulmod_4(point, point, k, 1, heap);
}
if (err == MP_OKAY) {
@ -28232,8 +28197,6 @@ static void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b)
}
#endif /* WOLFSSL_SP_SMALL */
#ifdef HAVE_INTEL_AVX2
#endif /* HAVE_INTEL_AVX2 */
#endif
#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
/* Sub b from a into a. (a -= b)
@ -28431,7 +28394,7 @@ static WC_INLINE int sp_256_div_4(sp_digit* a, sp_digit* d, sp_digit* m,
}
r1 = sp_256_cmp_4(t1, d) >= 0;
sp_256_cond_sub_4(r, t1, t2, (sp_digit)0 - r1);
sp_256_cond_sub_4(r, t1, d, (sp_digit)0 - r1);
return MP_OKAY;
}
@ -28642,7 +28605,7 @@ static const uint64_t p256_order_low[2] = {
static void sp_256_mont_mul_order_4(sp_digit* r, sp_digit* a, sp_digit* b)
{
sp_256_mul_4(r, a, b);
sp_256_mont_reduce_4(r, p256_order, p256_mp_order);
sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
}
/* Square number mod the order of P256 curve. (r = a * a mod order)
@ -28653,7 +28616,7 @@ static void sp_256_mont_mul_order_4(sp_digit* r, sp_digit* a, sp_digit* b)
static void sp_256_mont_sqr_order_4(sp_digit* r, sp_digit* a)
{
sp_256_sqr_4(r, a);
sp_256_mont_reduce_4(r, p256_order, p256_mp_order);
sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
}
#ifndef WOLFSSL_SP_SMALL
@ -28768,143 +28731,6 @@ static void sp_256_mont_inv_order_4(sp_digit* r, sp_digit* a,
#endif /* WOLFSSL_SP_SMALL */
}
#ifdef HAVE_INTEL_AVX2
/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
*
* r Result of the multiplication.
* a First operand of the multiplication.
* b Second operand of the multiplication.
*/
static void sp_256_mont_mul_order_avx2_4(sp_digit* r, sp_digit* a, sp_digit* b)
{
sp_256_mul_avx2_4(r, a, b);
sp_256_mont_reduce_avx2_4(r, p256_order, p256_mp_order);
}
/* Square number mod the order of P256 curve. (r = a * a mod order)
*
* r Result of the squaring.
* a Number to square.
*/
static void sp_256_mont_sqr_order_avx2_4(sp_digit* r, sp_digit* a)
{
sp_256_sqr_avx2_4(r, a);
sp_256_mont_reduce_avx2_4(r, p256_order, p256_mp_order);
}
#ifndef WOLFSSL_SP_SMALL
/* Square number mod the order of P256 curve a number of times.
* (r = a ^ n mod order)
*
* r Result of the squaring.
* a Number to square.
*/
static void sp_256_mont_sqr_n_order_avx2_4(sp_digit* r, sp_digit* a, int n)
{
int i;
sp_256_mont_sqr_order_avx2_4(r, a);
for (i=1; i<n; i++)
sp_256_mont_sqr_order_avx2_4(r, r);
}
#endif /* !WOLFSSL_SP_SMALL */
/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
* (r = 1 / a mod order)
*
* r Inverse result.
* a Number to invert.
* td Temporary data.
*/
static void sp_256_mont_inv_order_avx2_4(sp_digit* r, sp_digit* a,
sp_digit* td)
{
#ifdef WOLFSSL_SP_SMALL
sp_digit* t = td;
int i;
XMEMCPY(t, a, sizeof(sp_digit) * 4);
for (i=254; i>=0; i--) {
sp_256_mont_sqr_order_avx2_4(t, t);
if (p256_order_2[i / 64] & ((sp_digit)1 << (i % 64)))
sp_256_mont_mul_order_avx2_4(t, t, a);
}
XMEMCPY(r, t, sizeof(sp_digit) * 4);
#else
sp_digit* t = td;
sp_digit* t2 = td + 2 * 4;
sp_digit* t3 = td + 4 * 4;
int i;
/* t = a^2 */
sp_256_mont_sqr_order_avx2_4(t, a);
/* t = a^3 = t * a */
sp_256_mont_mul_order_avx2_4(t, t, a);
/* t2= a^c = t ^ 2 ^ 2 */
sp_256_mont_sqr_n_order_avx2_4(t2, t, 2);
/* t3= a^f = t2 * t */
sp_256_mont_mul_order_avx2_4(t3, t2, t);
/* t2= a^f0 = t3 ^ 2 ^ 4 */
sp_256_mont_sqr_n_order_avx2_4(t2, t3, 4);
/* t = a^ff = t2 * t3 */
sp_256_mont_mul_order_avx2_4(t, t2, t3);
/* t3= a^ff00 = t ^ 2 ^ 8 */
sp_256_mont_sqr_n_order_avx2_4(t2, t, 8);
/* t = a^ffff = t2 * t */
sp_256_mont_mul_order_avx2_4(t, t2, t);
/* t2= a^ffff0000 = t ^ 2 ^ 16 */
sp_256_mont_sqr_n_order_avx2_4(t2, t, 16);
/* t = a^ffffffff = t2 * t */
sp_256_mont_mul_order_avx2_4(t, t2, t);
/* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
sp_256_mont_sqr_n_order_avx2_4(t2, t, 64);
/* t2= a^ffffffff00000000ffffffff = t2 * t */
sp_256_mont_mul_order_avx2_4(t2, t2, t);
/* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
sp_256_mont_sqr_n_order_avx2_4(t2, t2, 32);
/* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
sp_256_mont_mul_order_avx2_4(t2, t2, t);
/* t2= a^ffffffff00000000ffffffffffffffffbce6 */
for (i=127; i>=112; i--) {
sp_256_mont_sqr_order_avx2_4(t2, t2);
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
sp_256_mont_mul_order_avx2_4(t2, t2, a);
}
/* t2= a^ffffffff00000000ffffffffffffffffbce6f */
sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
sp_256_mont_mul_order_avx2_4(t2, t2, t3);
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
for (i=107; i>=64; i--) {
sp_256_mont_sqr_order_avx2_4(t2, t2);
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
sp_256_mont_mul_order_avx2_4(t2, t2, a);
}
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
sp_256_mont_mul_order_avx2_4(t2, t2, t3);
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
for (i=59; i>=32; i--) {
sp_256_mont_sqr_order_avx2_4(t2, t2);
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
sp_256_mont_mul_order_avx2_4(t2, t2, a);
}
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
sp_256_mont_mul_order_avx2_4(t2, t2, t3);
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
for (i=27; i>=0; i--) {
sp_256_mont_sqr_order_avx2_4(t2, t2);
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
sp_256_mont_mul_order_avx2_4(t2, t2, a);
}
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
/* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
sp_256_mont_mul_order_avx2_4(r, t2, t3);
#endif /* WOLFSSL_SP_SMALL */
}
#endif /* HAVE_INTEL_AVX2 */
#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
#ifdef HAVE_ECC_SIGN
#ifndef SP_ECC_MAX_SIG_GEN
@ -28952,9 +28778,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
int err = MP_OKAY;
int64_t c;
int i;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
(void)heap;
@ -28994,11 +28817,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
/* New random point. */
err = sp_256_ecc_gen_k_4(rng, k);
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_base_avx2_4(point, k, 1, heap);
else
#endif
err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL);
}
@ -29011,31 +28829,16 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
sp_256_norm_4(r);
/* Conv k to Montgomery form (mod order) */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_mul_avx2_4(k, k, p256_norm_order);
else
#endif
sp_256_mul_4(k, k, p256_norm_order);
err = sp_256_mod_4(k, k, p256_order);
}
if (err == MP_OKAY) {
sp_256_norm_4(k);
/* kInv = 1/k mod order */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_mont_inv_order_avx2_4(kInv, k, tmp);
else
#endif
sp_256_mont_inv_order_4(kInv, k, tmp);
sp_256_norm_4(kInv);
/* s = r * x + e */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_mul_avx2_4(x, x, r);
else
#endif
sp_256_mul_4(x, x, r);
err = sp_256_mod_4(x, x, p256_order);
}
@ -29049,11 +28852,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
sp_256_norm_4(s);
/* s = s * k^-1 mod order */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_mont_mul_order_avx2_4(s, s, kInv);
else
#endif
sp_256_mont_mul_order_4(s, s, kInv);
sp_256_norm_4(s);
@ -29133,9 +28931,6 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
sp_digit carry;
int64_t c;
int err;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(heap, p1d, p1);
if (err == MP_OKAY)
@ -29170,52 +28965,24 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
sp_256_from_mp(p2->y, 4, pY);
sp_256_from_mp(p2->z, 4, pZ);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_mul_avx2_4(s, s, p256_norm_order);
else
#endif
sp_256_mul_4(s, s, p256_norm_order);
err = sp_256_mod_4(s, s, p256_order);
}
if (err == MP_OKAY) {
sp_256_norm_4(s);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
sp_256_mont_inv_order_avx2_4(s, s, tmp);
sp_256_mont_mul_order_avx2_4(u1, u1, s);
sp_256_mont_mul_order_avx2_4(u2, u2, s);
}
else
#endif
{
sp_256_mont_inv_order_4(s, s, tmp);
sp_256_mont_mul_order_4(u1, u1, s);
sp_256_mont_mul_order_4(u2, u2, s);
}
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_base_avx2_4(p1, u1, 0, heap);
else
#endif
err = sp_256_ecc_mulmod_base_4(p1, u1, 0, heap);
}
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_avx2_4(p2, p2, u2, 0, heap);
else
#endif
err = sp_256_ecc_mulmod_4(p2, p2, u2, 0, heap);
}
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_proj_point_add_avx2_4(p1, p1, p2, tmp);
else
#endif
sp_256_proj_point_add_4(p1, p1, p2, tmp);
/* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
@ -29378,9 +29145,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
sp_point* p = NULL;
byte one[1] = { 1 };
int err;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(heap, pubd, pub);
if (err == MP_OKAY)
@ -29421,11 +29185,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
if (err == MP_OKAY) {
/* Point * order = infinity */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_avx2_4(p, pub, p256_order, 1, heap);
else
#endif
err = sp_256_ecc_mulmod_4(p, pub, p256_order, 1, heap);
}
if (err == MP_OKAY) {
@ -29438,11 +29197,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
if (err == MP_OKAY) {
/* Base * private = point */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_base_avx2_4(p, priv, 1, heap);
else
#endif
err = sp_256_ecc_mulmod_base_4(p, priv, 1, heap);
}
if (err == MP_OKAY) {
@ -29491,9 +29245,6 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
sp_point* p;
sp_point* q = NULL;
int err;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(NULL, pd, p);
if (err == MP_OKAY)
@ -29516,11 +29267,6 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
sp_256_from_mp(q->y, 4, qY);
sp_256_from_mp(q->z, 4, qZ);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_proj_point_add_avx2_4(p, p, q, tmp);
else
#endif
sp_256_proj_point_add_4(p, p, q, tmp);
}
@ -29562,9 +29308,6 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
sp_digit* tmp;
sp_point* p;
int err;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(NULL, pd, p);
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
@ -29582,11 +29325,6 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
sp_256_from_mp(p->y, 4, pY);
sp_256_from_mp(p->z, 4, pZ);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_proj_point_dbl_avx2_4(p, p, tmp);
else
#endif
sp_256_proj_point_dbl_4(p, p, tmp);
}
@ -29675,9 +29413,6 @@ static int sp_256_mont_sqrt_4(sp_digit* y)
sp_digit* t1;
sp_digit* t2;
int err = MP_OKAY;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
d = XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
@ -29693,40 +29428,6 @@ static int sp_256_mont_sqrt_4(sp_digit* y)
#endif
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
/* t2 = y ^ 0x2 */
sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod);
/* t1 = y ^ 0x3 */
sp_256_mont_mul_avx2_4(t1, t2, y, p256_mod, p256_mp_mod);
/* t2 = y ^ 0xc */
sp_256_mont_sqr_n_avx2_4(t2, t1, 2, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xf */
sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
/* t2 = y ^ 0xf0 */
sp_256_mont_sqr_n_avx2_4(t2, t1, 4, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xff */
sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
/* t2 = y ^ 0xff00 */
sp_256_mont_sqr_n_avx2_4(t2, t1, 8, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffff */
sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
/* t2 = y ^ 0xffff0000 */
sp_256_mont_sqr_n_avx2_4(t2, t1, 16, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffffffff */
sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffffffff00000000 */
sp_256_mont_sqr_n_avx2_4(t1, t1, 32, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffffffff00000001 */
sp_256_mont_mul_avx2_4(t1, t1, y, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
sp_256_mont_sqr_n_avx2_4(t1, t1, 96, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
sp_256_mont_mul_avx2_4(t1, t1, y, p256_mod, p256_mp_mod);
sp_256_mont_sqr_n_avx2_4(y, t1, 94, p256_mod, p256_mp_mod);
}
else
#endif
{
/* t2 = y ^ 0x2 */
sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod);
@ -29786,9 +29487,6 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
sp_digit* x;
sp_digit* y;
int err = MP_OKAY;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
d = XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
@ -29811,13 +29509,6 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
if (err == MP_OKAY) {
/* y = x^3 */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
sp_256_mont_sqr_avx2_4(y, x, p256_mod, p256_mp_mod);
sp_256_mont_mul_avx2_4(y, y, x, p256_mod, p256_mp_mod);
}
else
#endif
{
sp_256_mont_sqr_4(y, x, p256_mod, p256_mp_mod);
sp_256_mont_mul_4(y, y, x, p256_mod, p256_mp_mod);

File diff suppressed because it is too large Load Diff

View File

@ -1743,7 +1743,8 @@ static int sp_2048_div_45(sp_digit* a, sp_digit* d, sp_digit* m,
int err = MP_OKAY;
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
td = XMALLOC(sizeof(sp_digit) * (4 * 45 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER);
td = XMALLOC(sizeof(sp_digit) * (4 * 45 + 3), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td != NULL) {
t1 = td;
t2 = td + 90 + 1;
@ -2675,7 +2676,8 @@ static int sp_2048_div_90(sp_digit* a, sp_digit* d, sp_digit* m,
int err = MP_OKAY;
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
td = XMALLOC(sizeof(sp_digit) * (4 * 90 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER);
td = XMALLOC(sizeof(sp_digit) * (4 * 90 + 3), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td != NULL) {
t1 = td;
t2 = td + 180 + 1;
@ -5024,7 +5026,8 @@ static int sp_3072_div_68(sp_digit* a, sp_digit* d, sp_digit* m,
int err = MP_OKAY;
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
td = XMALLOC(sizeof(sp_digit) * (3 * 68 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
td = XMALLOC(sizeof(sp_digit) * (3 * 68 + 1), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td != NULL) {
t1 = td;
t2 = td + 2 * 68;
@ -6042,7 +6045,8 @@ static int sp_3072_div_136(sp_digit* a, sp_digit* d, sp_digit* m,
int err = MP_OKAY;
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
td = XMALLOC(sizeof(sp_digit) * (4 * 136 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER);
td = XMALLOC(sizeof(sp_digit) * (4 * 136 + 3), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td != NULL) {
t1 = td;
t2 = td + 272 + 1;
@ -7685,6 +7689,8 @@ static void sp_256_cond_sub_10(sp_digit* r, const sp_digit* a,
#endif /* WOLFSSL_SP_SMALL */
}
#define sp_256_mont_reduce_order_10 sp_256_mont_reduce_10
/* Mul a by scalar b and add into r. (r += a * b)
*
* r A single precision integer.
@ -8844,7 +8850,7 @@ static int sp_256_ecc_mulmod_10(sp_point* r, sp_point* g, sp_digit* k,
((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
sp_256_proj_point_dbl_10(&t[2], &t[2], tmp);
XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
}
if (map)
@ -9487,9 +9493,6 @@ int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
sp_point* point;
sp_digit* k = NULL;
int err = MP_OKAY;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(heap, p, point);
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
@ -9505,11 +9508,6 @@ int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
sp_256_from_mp(k, 10, km);
sp_256_point_from_ecc_point_10(point, gm);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_avx2_10(point, point, k, map, heap);
else
#endif
err = sp_256_ecc_mulmod_10(point, point, k, map, heap);
}
if (err == MP_OKAY)
@ -11115,9 +11113,6 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
sp_point* point;
sp_digit* k = NULL;
int err = MP_OKAY;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(heap, p, point);
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
@ -11132,11 +11127,6 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
if (err == MP_OKAY) {
sp_256_from_mp(k, 10, km);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_base_avx2_10(point, k, map, heap);
else
#endif
err = sp_256_ecc_mulmod_base_10(point, k, map, heap);
}
if (err == MP_OKAY)
@ -11256,9 +11246,6 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
sp_point* infinity;
#endif
int err;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
(void)heap;
@ -11280,23 +11267,11 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
if (err == MP_OKAY)
err = sp_256_ecc_gen_k_10(rng, k);
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_base_avx2_10(point, k, 1, NULL);
else
#endif
err = sp_256_ecc_mulmod_base_10(point, k, 1, NULL);
}
#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
err = sp_256_ecc_mulmod_avx2_10(infinity, point, p256_order, 1,
NULL);
}
else
#endif
err = sp_256_ecc_mulmod_10(infinity, point, p256_order, 1, NULL);
}
if (err == MP_OKAY) {
@ -11379,9 +11354,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
sp_point* point = NULL;
sp_digit* k = NULL;
int err = MP_OKAY;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
if (*outLen < 32)
err = BUFFER_E;
@ -11401,11 +11373,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
if (err == MP_OKAY) {
sp_256_from_mp(k, 10, priv);
sp_256_point_from_ecc_point_10(point, pub);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_avx2_10(point, point, k, 1, heap);
else
#endif
err = sp_256_ecc_mulmod_10(point, point, k, 1, heap);
}
if (err == MP_OKAY) {
@ -11424,8 +11391,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
#endif /* HAVE_ECC_DHE */
#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
#ifdef HAVE_INTEL_AVX2
#endif /* HAVE_INTEL_AVX2 */
#endif
#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
/* Multiply a by scalar b into r. (r = a * b)
@ -11501,7 +11466,8 @@ static int sp_256_div_10(sp_digit* a, sp_digit* d, sp_digit* m,
int err = MP_OKAY;
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
td = XMALLOC(sizeof(sp_digit) * (3 * 10 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
td = XMALLOC(sizeof(sp_digit) * (3 * 10 + 1), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td != NULL) {
t1 = td;
t2 = td + 2 * 10;
@ -11597,7 +11563,7 @@ static const uint32_t p256_order_low[4] = {
static void sp_256_mont_mul_order_10(sp_digit* r, sp_digit* a, sp_digit* b)
{
sp_256_mul_10(r, a, b);
sp_256_mont_reduce_10(r, p256_order, p256_mp_order);
sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order);
}
/* Square number mod the order of P256 curve. (r = a * a mod order)
@ -11608,7 +11574,7 @@ static void sp_256_mont_mul_order_10(sp_digit* r, sp_digit* a, sp_digit* b)
static void sp_256_mont_sqr_order_10(sp_digit* r, sp_digit* a)
{
sp_256_sqr_10(r, a);
sp_256_mont_reduce_10(r, p256_order, p256_mp_order);
sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order);
}
#ifndef WOLFSSL_SP_SMALL
@ -11723,143 +11689,6 @@ static void sp_256_mont_inv_order_10(sp_digit* r, sp_digit* a,
#endif /* WOLFSSL_SP_SMALL */
}
#ifdef HAVE_INTEL_AVX2
/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
*
* r Result of the multiplication.
* a First operand of the multiplication.
* b Second operand of the multiplication.
*/
static void sp_256_mont_mul_order_avx2_10(sp_digit* r, sp_digit* a, sp_digit* b)
{
sp_256_mul_avx2_10(r, a, b);
sp_256_mont_reduce_avx2_10(r, p256_order, p256_mp_order);
}
/* Square number mod the order of P256 curve. (r = a * a mod order)
*
* r Result of the squaring.
* a Number to square.
*/
static void sp_256_mont_sqr_order_avx2_10(sp_digit* r, sp_digit* a)
{
sp_256_sqr_avx2_10(r, a);
sp_256_mont_reduce_avx2_10(r, p256_order, p256_mp_order);
}
#ifndef WOLFSSL_SP_SMALL
/* Square number mod the order of P256 curve a number of times.
* (r = a ^ n mod order)
*
* r Result of the squaring.
* a Number to square.
*/
static void sp_256_mont_sqr_n_order_avx2_10(sp_digit* r, sp_digit* a, int n)
{
int i;
sp_256_mont_sqr_order_avx2_10(r, a);
for (i=1; i<n; i++)
sp_256_mont_sqr_order_avx2_10(r, r);
}
#endif /* !WOLFSSL_SP_SMALL */
/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
* (r = 1 / a mod order)
*
* r Inverse result.
* a Number to invert.
* td Temporary data.
*/
static void sp_256_mont_inv_order_avx2_10(sp_digit* r, sp_digit* a,
sp_digit* td)
{
#ifdef WOLFSSL_SP_SMALL
sp_digit* t = td;
int i;
XMEMCPY(t, a, sizeof(sp_digit) * 10);
for (i=254; i>=0; i--) {
sp_256_mont_sqr_order_avx2_10(t, t);
if (p256_order_2[i / 32] & ((sp_digit)1 << (i % 32)))
sp_256_mont_mul_order_avx2_10(t, t, a);
}
XMEMCPY(r, t, sizeof(sp_digit) * 10);
#else
sp_digit* t = td;
sp_digit* t2 = td + 2 * 10;
sp_digit* t3 = td + 4 * 10;
int i;
/* t = a^2 */
sp_256_mont_sqr_order_avx2_10(t, a);
/* t = a^3 = t * a */
sp_256_mont_mul_order_avx2_10(t, t, a);
/* t2= a^c = t ^ 2 ^ 2 */
sp_256_mont_sqr_n_order_avx2_10(t2, t, 2);
/* t3= a^f = t2 * t */
sp_256_mont_mul_order_avx2_10(t3, t2, t);
/* t2= a^f0 = t3 ^ 2 ^ 4 */
sp_256_mont_sqr_n_order_avx2_10(t2, t3, 4);
/* t = a^ff = t2 * t3 */
sp_256_mont_mul_order_avx2_10(t, t2, t3);
/* t3= a^ff00 = t ^ 2 ^ 8 */
sp_256_mont_sqr_n_order_avx2_10(t2, t, 8);
/* t = a^ffff = t2 * t */
sp_256_mont_mul_order_avx2_10(t, t2, t);
/* t2= a^ffff0000 = t ^ 2 ^ 16 */
sp_256_mont_sqr_n_order_avx2_10(t2, t, 16);
/* t = a^ffffffff = t2 * t */
sp_256_mont_mul_order_avx2_10(t, t2, t);
/* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
sp_256_mont_sqr_n_order_avx2_10(t2, t, 64);
/* t2= a^ffffffff00000000ffffffff = t2 * t */
sp_256_mont_mul_order_avx2_10(t2, t2, t);
/* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
sp_256_mont_sqr_n_order_avx2_10(t2, t2, 32);
/* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
sp_256_mont_mul_order_avx2_10(t2, t2, t);
/* t2= a^ffffffff00000000ffffffffffffffffbce6 */
for (i=127; i>=112; i--) {
sp_256_mont_sqr_order_avx2_10(t2, t2);
if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
sp_256_mont_mul_order_avx2_10(t2, t2, a);
}
/* t2= a^ffffffff00000000ffffffffffffffffbce6f */
sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
sp_256_mont_mul_order_avx2_10(t2, t2, t3);
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
for (i=107; i>=64; i--) {
sp_256_mont_sqr_order_avx2_10(t2, t2);
if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
sp_256_mont_mul_order_avx2_10(t2, t2, a);
}
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
sp_256_mont_mul_order_avx2_10(t2, t2, t3);
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
for (i=59; i>=32; i--) {
sp_256_mont_sqr_order_avx2_10(t2, t2);
if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
sp_256_mont_mul_order_avx2_10(t2, t2, a);
}
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
sp_256_mont_mul_order_avx2_10(t2, t2, t3);
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
for (i=27; i>=0; i--) {
sp_256_mont_sqr_order_avx2_10(t2, t2);
if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
sp_256_mont_mul_order_avx2_10(t2, t2, a);
}
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
/* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
sp_256_mont_mul_order_avx2_10(r, t2, t3);
#endif /* WOLFSSL_SP_SMALL */
}
#endif /* HAVE_INTEL_AVX2 */
#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
#ifdef HAVE_ECC_SIGN
#ifndef SP_ECC_MAX_SIG_GEN
@ -11907,9 +11736,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
int err = MP_OKAY;
int32_t c;
int i;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
(void)heap;
@ -11949,11 +11775,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
/* New random point. */
err = sp_256_ecc_gen_k_10(rng, k);
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_base_avx2_10(point, k, 1, heap);
else
#endif
err = sp_256_ecc_mulmod_base_10(point, k, 1, NULL);
}
@ -11966,31 +11787,16 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
sp_256_norm_10(r);
/* Conv k to Montgomery form (mod order) */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_mul_avx2_10(k, k, p256_norm_order);
else
#endif
sp_256_mul_10(k, k, p256_norm_order);
err = sp_256_mod_10(k, k, p256_order);
}
if (err == MP_OKAY) {
sp_256_norm_10(k);
/* kInv = 1/k mod order */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_mont_inv_order_avx2_10(kInv, k, tmp);
else
#endif
sp_256_mont_inv_order_10(kInv, k, tmp);
sp_256_norm_10(kInv);
/* s = r * x + e */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_mul_avx2_10(x, x, r);
else
#endif
sp_256_mul_10(x, x, r);
err = sp_256_mod_10(x, x, p256_order);
}
@ -12004,11 +11810,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
sp_256_norm_10(s);
/* s = s * k^-1 mod order */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_mont_mul_order_avx2_10(s, s, kInv);
else
#endif
sp_256_mont_mul_order_10(s, s, kInv);
sp_256_norm_10(s);
@ -12088,9 +11889,6 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
sp_digit carry;
int32_t c;
int err;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(heap, p1d, p1);
if (err == MP_OKAY)
@ -12125,52 +11923,24 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
sp_256_from_mp(p2->y, 10, pY);
sp_256_from_mp(p2->z, 10, pZ);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_mul_avx2_10(s, s, p256_norm_order);
else
#endif
sp_256_mul_10(s, s, p256_norm_order);
err = sp_256_mod_10(s, s, p256_order);
}
if (err == MP_OKAY) {
sp_256_norm_10(s);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
sp_256_mont_inv_order_avx2_10(s, s, tmp);
sp_256_mont_mul_order_avx2_10(u1, u1, s);
sp_256_mont_mul_order_avx2_10(u2, u2, s);
}
else
#endif
{
sp_256_mont_inv_order_10(s, s, tmp);
sp_256_mont_mul_order_10(u1, u1, s);
sp_256_mont_mul_order_10(u2, u2, s);
}
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_base_avx2_10(p1, u1, 0, heap);
else
#endif
err = sp_256_ecc_mulmod_base_10(p1, u1, 0, heap);
}
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_avx2_10(p2, p2, u2, 0, heap);
else
#endif
err = sp_256_ecc_mulmod_10(p2, p2, u2, 0, heap);
}
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_proj_point_add_avx2_10(p1, p1, p2, tmp);
else
#endif
sp_256_proj_point_add_10(p1, p1, p2, tmp);
/* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
@ -12333,9 +12103,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
sp_point* p = NULL;
byte one[1] = { 1 };
int err;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(heap, pubd, pub);
if (err == MP_OKAY)
@ -12376,11 +12143,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
if (err == MP_OKAY) {
/* Point * order = infinity */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_avx2_10(p, pub, p256_order, 1, heap);
else
#endif
err = sp_256_ecc_mulmod_10(p, pub, p256_order, 1, heap);
}
if (err == MP_OKAY) {
@ -12393,11 +12155,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
if (err == MP_OKAY) {
/* Base * private = point */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_base_avx2_10(p, priv, 1, heap);
else
#endif
err = sp_256_ecc_mulmod_base_10(p, priv, 1, heap);
}
if (err == MP_OKAY) {
@ -12446,9 +12203,6 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
sp_point* p;
sp_point* q = NULL;
int err;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(NULL, pd, p);
if (err == MP_OKAY)
@ -12471,11 +12225,6 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
sp_256_from_mp(q->y, 10, qY);
sp_256_from_mp(q->z, 10, qZ);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_proj_point_add_avx2_10(p, p, q, tmp);
else
#endif
sp_256_proj_point_add_10(p, p, q, tmp);
}
@ -12517,9 +12266,6 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
sp_digit* tmp;
sp_point* p;
int err;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(NULL, pd, p);
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
@ -12537,11 +12283,6 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
sp_256_from_mp(p->y, 10, pY);
sp_256_from_mp(p->z, 10, pZ);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_proj_point_dbl_avx2_10(p, p, tmp);
else
#endif
sp_256_proj_point_dbl_10(p, p, tmp);
}
@ -12630,9 +12371,6 @@ static int sp_256_mont_sqrt_10(sp_digit* y)
sp_digit* t1;
sp_digit* t2;
int err = MP_OKAY;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
d = XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
@ -12648,40 +12386,6 @@ static int sp_256_mont_sqrt_10(sp_digit* y)
#endif
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
/* t2 = y ^ 0x2 */
sp_256_mont_sqr_avx2_10(t2, y, p256_mod, p256_mp_mod);
/* t1 = y ^ 0x3 */
sp_256_mont_mul_avx2_10(t1, t2, y, p256_mod, p256_mp_mod);
/* t2 = y ^ 0xc */
sp_256_mont_sqr_n_avx2_10(t2, t1, 2, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xf */
sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
/* t2 = y ^ 0xf0 */
sp_256_mont_sqr_n_avx2_10(t2, t1, 4, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xff */
sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
/* t2 = y ^ 0xff00 */
sp_256_mont_sqr_n_avx2_10(t2, t1, 8, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffff */
sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
/* t2 = y ^ 0xffff0000 */
sp_256_mont_sqr_n_avx2_10(t2, t1, 16, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffffffff */
sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffffffff00000000 */
sp_256_mont_sqr_n_avx2_10(t1, t1, 32, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffffffff00000001 */
sp_256_mont_mul_avx2_10(t1, t1, y, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
sp_256_mont_sqr_n_avx2_10(t1, t1, 96, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
sp_256_mont_mul_avx2_10(t1, t1, y, p256_mod, p256_mp_mod);
sp_256_mont_sqr_n_avx2_10(y, t1, 94, p256_mod, p256_mp_mod);
}
else
#endif
{
/* t2 = y ^ 0x2 */
sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
@ -12741,9 +12445,6 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
sp_digit* x;
sp_digit* y;
int err = MP_OKAY;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
d = XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
@ -12766,13 +12467,6 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
if (err == MP_OKAY) {
/* y = x^3 */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
sp_256_mont_sqr_avx2_10(y, x, p256_mod, p256_mp_mod);
sp_256_mont_mul_avx2_10(y, y, x, p256_mod, p256_mp_mod);
}
else
#endif
{
sp_256_mont_sqr_10(y, x, p256_mod, p256_mp_mod);
sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod);

View File

@ -1258,7 +1258,8 @@ static int sp_2048_div_18(sp_digit* a, sp_digit* d, sp_digit* m,
int err = MP_OKAY;
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
td = XMALLOC(sizeof(sp_digit) * (3 * 18 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
td = XMALLOC(sizeof(sp_digit) * (3 * 18 + 1), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td != NULL) {
t1 = td;
t2 = td + 2 * 18;
@ -2106,7 +2107,8 @@ static int sp_2048_div_36(sp_digit* a, sp_digit* d, sp_digit* m,
int err = MP_OKAY;
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
td = XMALLOC(sizeof(sp_digit) * (3 * 36 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
td = XMALLOC(sizeof(sp_digit) * (3 * 36 + 1), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td != NULL) {
t1 = td;
t2 = td + 2 * 36;
@ -4660,7 +4662,8 @@ static int sp_3072_div_27(sp_digit* a, sp_digit* d, sp_digit* m,
int err = MP_OKAY;
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
td = XMALLOC(sizeof(sp_digit) * (3 * 27 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
td = XMALLOC(sizeof(sp_digit) * (3 * 27 + 1), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td != NULL) {
t1 = td;
t2 = td + 2 * 27;
@ -5484,7 +5487,8 @@ static int sp_3072_div_54(sp_digit* a, sp_digit* d, sp_digit* m,
int err = MP_OKAY;
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
td = XMALLOC(sizeof(sp_digit) * (3 * 54 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
td = XMALLOC(sizeof(sp_digit) * (3 * 54 + 1), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td != NULL) {
t1 = td;
t2 = td + 2 * 54;
@ -7089,6 +7093,8 @@ static void sp_256_cond_sub_5(sp_digit* r, const sp_digit* a,
#endif /* WOLFSSL_SP_SMALL */
}
#define sp_256_mont_reduce_order_5 sp_256_mont_reduce_5
/* Mul a by scalar b and add into r. (r += a * b)
*
* r A single precision integer.
@ -8066,7 +8072,7 @@ static int sp_256_ecc_mulmod_5(sp_point* r, sp_point* g, sp_digit* k,
((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
sp_256_proj_point_dbl_5(&t[2], &t[2], tmp);
XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
}
if (map)
@ -8709,9 +8715,6 @@ int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
sp_point* point;
sp_digit* k = NULL;
int err = MP_OKAY;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(heap, p, point);
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
@ -8727,11 +8730,6 @@ int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
sp_256_from_mp(k, 5, km);
sp_256_point_from_ecc_point_5(point, gm);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_avx2_5(point, point, k, map, heap);
else
#endif
err = sp_256_ecc_mulmod_5(point, point, k, map, heap);
}
if (err == MP_OKAY)
@ -10337,9 +10335,6 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
sp_point* point;
sp_digit* k = NULL;
int err = MP_OKAY;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(heap, p, point);
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
@ -10354,11 +10349,6 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
if (err == MP_OKAY) {
sp_256_from_mp(k, 5, km);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_base_avx2_5(point, k, map, heap);
else
#endif
err = sp_256_ecc_mulmod_base_5(point, k, map, heap);
}
if (err == MP_OKAY)
@ -10477,9 +10467,6 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
sp_point* infinity;
#endif
int err;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
(void)heap;
@ -10501,23 +10488,11 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
if (err == MP_OKAY)
err = sp_256_ecc_gen_k_5(rng, k);
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_base_avx2_5(point, k, 1, NULL);
else
#endif
err = sp_256_ecc_mulmod_base_5(point, k, 1, NULL);
}
#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
err = sp_256_ecc_mulmod_avx2_5(infinity, point, p256_order, 1,
NULL);
}
else
#endif
err = sp_256_ecc_mulmod_5(infinity, point, p256_order, 1, NULL);
}
if (err == MP_OKAY) {
@ -10600,9 +10575,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
sp_point* point = NULL;
sp_digit* k = NULL;
int err = MP_OKAY;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
if (*outLen < 32)
err = BUFFER_E;
@ -10622,11 +10594,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
if (err == MP_OKAY) {
sp_256_from_mp(k, 5, priv);
sp_256_point_from_ecc_point_5(point, pub);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_avx2_5(point, point, k, 1, heap);
else
#endif
err = sp_256_ecc_mulmod_5(point, point, k, 1, heap);
}
if (err == MP_OKAY) {
@ -10645,8 +10612,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
#endif /* HAVE_ECC_DHE */
#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
#ifdef HAVE_INTEL_AVX2
#endif /* HAVE_INTEL_AVX2 */
#endif
#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
/* Multiply a by scalar b into r. (r = a * b)
@ -10712,7 +10677,8 @@ static int sp_256_div_5(sp_digit* a, sp_digit* d, sp_digit* m,
int err = MP_OKAY;
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
td = XMALLOC(sizeof(sp_digit) * (3 * 5 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
td = XMALLOC(sizeof(sp_digit) * (3 * 5 + 1), NULL,
DYNAMIC_TYPE_TMP_BUFFER);
if (td != NULL) {
t1 = td;
t2 = td + 2 * 5;
@ -10808,7 +10774,7 @@ static const uint64_t p256_order_low[2] = {
static void sp_256_mont_mul_order_5(sp_digit* r, sp_digit* a, sp_digit* b)
{
sp_256_mul_5(r, a, b);
sp_256_mont_reduce_5(r, p256_order, p256_mp_order);
sp_256_mont_reduce_order_5(r, p256_order, p256_mp_order);
}
/* Square number mod the order of P256 curve. (r = a * a mod order)
@ -10819,7 +10785,7 @@ static void sp_256_mont_mul_order_5(sp_digit* r, sp_digit* a, sp_digit* b)
static void sp_256_mont_sqr_order_5(sp_digit* r, sp_digit* a)
{
sp_256_sqr_5(r, a);
sp_256_mont_reduce_5(r, p256_order, p256_mp_order);
sp_256_mont_reduce_order_5(r, p256_order, p256_mp_order);
}
#ifndef WOLFSSL_SP_SMALL
@ -10934,143 +10900,6 @@ static void sp_256_mont_inv_order_5(sp_digit* r, sp_digit* a,
#endif /* WOLFSSL_SP_SMALL */
}
#ifdef HAVE_INTEL_AVX2
/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
*
* r Result of the multiplication.
* a First operand of the multiplication.
* b Second operand of the multiplication.
*/
static void sp_256_mont_mul_order_avx2_5(sp_digit* r, sp_digit* a, sp_digit* b)
{
sp_256_mul_avx2_5(r, a, b);
sp_256_mont_reduce_avx2_5(r, p256_order, p256_mp_order);
}
/* Square number mod the order of P256 curve. (r = a * a mod order)
*
* r Result of the squaring.
* a Number to square.
*/
static void sp_256_mont_sqr_order_avx2_5(sp_digit* r, sp_digit* a)
{
sp_256_sqr_avx2_5(r, a);
sp_256_mont_reduce_avx2_5(r, p256_order, p256_mp_order);
}
#ifndef WOLFSSL_SP_SMALL
/* Square number mod the order of P256 curve a number of times.
* (r = a ^ n mod order)
*
* r Result of the squaring.
* a Number to square.
*/
static void sp_256_mont_sqr_n_order_avx2_5(sp_digit* r, sp_digit* a, int n)
{
int i;
sp_256_mont_sqr_order_avx2_5(r, a);
for (i=1; i<n; i++)
sp_256_mont_sqr_order_avx2_5(r, r);
}
#endif /* !WOLFSSL_SP_SMALL */
/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
* (r = 1 / a mod order)
*
* r Inverse result.
* a Number to invert.
* td Temporary data.
*/
static void sp_256_mont_inv_order_avx2_5(sp_digit* r, sp_digit* a,
sp_digit* td)
{
#ifdef WOLFSSL_SP_SMALL
sp_digit* t = td;
int i;
XMEMCPY(t, a, sizeof(sp_digit) * 5);
for (i=254; i>=0; i--) {
sp_256_mont_sqr_order_avx2_5(t, t);
if (p256_order_2[i / 64] & ((sp_digit)1 << (i % 64)))
sp_256_mont_mul_order_avx2_5(t, t, a);
}
XMEMCPY(r, t, sizeof(sp_digit) * 5);
#else
sp_digit* t = td;
sp_digit* t2 = td + 2 * 5;
sp_digit* t3 = td + 4 * 5;
int i;
/* t = a^2 */
sp_256_mont_sqr_order_avx2_5(t, a);
/* t = a^3 = t * a */
sp_256_mont_mul_order_avx2_5(t, t, a);
/* t2= a^c = t ^ 2 ^ 2 */
sp_256_mont_sqr_n_order_avx2_5(t2, t, 2);
/* t3= a^f = t2 * t */
sp_256_mont_mul_order_avx2_5(t3, t2, t);
/* t2= a^f0 = t3 ^ 2 ^ 4 */
sp_256_mont_sqr_n_order_avx2_5(t2, t3, 4);
/* t = a^ff = t2 * t3 */
sp_256_mont_mul_order_avx2_5(t, t2, t3);
/* t3= a^ff00 = t ^ 2 ^ 8 */
sp_256_mont_sqr_n_order_avx2_5(t2, t, 8);
/* t = a^ffff = t2 * t */
sp_256_mont_mul_order_avx2_5(t, t2, t);
/* t2= a^ffff0000 = t ^ 2 ^ 16 */
sp_256_mont_sqr_n_order_avx2_5(t2, t, 16);
/* t = a^ffffffff = t2 * t */
sp_256_mont_mul_order_avx2_5(t, t2, t);
/* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
sp_256_mont_sqr_n_order_avx2_5(t2, t, 64);
/* t2= a^ffffffff00000000ffffffff = t2 * t */
sp_256_mont_mul_order_avx2_5(t2, t2, t);
/* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
sp_256_mont_sqr_n_order_avx2_5(t2, t2, 32);
/* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
sp_256_mont_mul_order_avx2_5(t2, t2, t);
/* t2= a^ffffffff00000000ffffffffffffffffbce6 */
for (i=127; i>=112; i--) {
sp_256_mont_sqr_order_avx2_5(t2, t2);
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
sp_256_mont_mul_order_avx2_5(t2, t2, a);
}
/* t2= a^ffffffff00000000ffffffffffffffffbce6f */
sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
sp_256_mont_mul_order_avx2_5(t2, t2, t3);
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
for (i=107; i>=64; i--) {
sp_256_mont_sqr_order_avx2_5(t2, t2);
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
sp_256_mont_mul_order_avx2_5(t2, t2, a);
}
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
sp_256_mont_mul_order_avx2_5(t2, t2, t3);
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
for (i=59; i>=32; i--) {
sp_256_mont_sqr_order_avx2_5(t2, t2);
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
sp_256_mont_mul_order_avx2_5(t2, t2, a);
}
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
sp_256_mont_mul_order_avx2_5(t2, t2, t3);
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
for (i=27; i>=0; i--) {
sp_256_mont_sqr_order_avx2_5(t2, t2);
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
sp_256_mont_mul_order_avx2_5(t2, t2, a);
}
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
/* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
sp_256_mont_mul_order_avx2_5(r, t2, t3);
#endif /* WOLFSSL_SP_SMALL */
}
#endif /* HAVE_INTEL_AVX2 */
#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
#ifdef HAVE_ECC_SIGN
#ifndef SP_ECC_MAX_SIG_GEN
@ -11118,9 +10947,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
int err = MP_OKAY;
int64_t c;
int i;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
(void)heap;
@ -11160,11 +10986,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
/* New random point. */
err = sp_256_ecc_gen_k_5(rng, k);
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_base_avx2_5(point, k, 1, heap);
else
#endif
err = sp_256_ecc_mulmod_base_5(point, k, 1, NULL);
}
@ -11177,31 +10998,16 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
sp_256_norm_5(r);
/* Conv k to Montgomery form (mod order) */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_mul_avx2_5(k, k, p256_norm_order);
else
#endif
sp_256_mul_5(k, k, p256_norm_order);
err = sp_256_mod_5(k, k, p256_order);
}
if (err == MP_OKAY) {
sp_256_norm_5(k);
/* kInv = 1/k mod order */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_mont_inv_order_avx2_5(kInv, k, tmp);
else
#endif
sp_256_mont_inv_order_5(kInv, k, tmp);
sp_256_norm_5(kInv);
/* s = r * x + e */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_mul_avx2_5(x, x, r);
else
#endif
sp_256_mul_5(x, x, r);
err = sp_256_mod_5(x, x, p256_order);
}
@ -11215,11 +11021,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
sp_256_norm_5(s);
/* s = s * k^-1 mod order */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_mont_mul_order_avx2_5(s, s, kInv);
else
#endif
sp_256_mont_mul_order_5(s, s, kInv);
sp_256_norm_5(s);
@ -11299,9 +11100,6 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
sp_digit carry;
int64_t c;
int err;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(heap, p1d, p1);
if (err == MP_OKAY)
@ -11336,52 +11134,24 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
sp_256_from_mp(p2->y, 5, pY);
sp_256_from_mp(p2->z, 5, pZ);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_mul_avx2_5(s, s, p256_norm_order);
else
#endif
sp_256_mul_5(s, s, p256_norm_order);
err = sp_256_mod_5(s, s, p256_order);
}
if (err == MP_OKAY) {
sp_256_norm_5(s);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
sp_256_mont_inv_order_avx2_5(s, s, tmp);
sp_256_mont_mul_order_avx2_5(u1, u1, s);
sp_256_mont_mul_order_avx2_5(u2, u2, s);
}
else
#endif
{
sp_256_mont_inv_order_5(s, s, tmp);
sp_256_mont_mul_order_5(u1, u1, s);
sp_256_mont_mul_order_5(u2, u2, s);
}
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_base_avx2_5(p1, u1, 0, heap);
else
#endif
err = sp_256_ecc_mulmod_base_5(p1, u1, 0, heap);
}
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_avx2_5(p2, p2, u2, 0, heap);
else
#endif
err = sp_256_ecc_mulmod_5(p2, p2, u2, 0, heap);
}
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_proj_point_add_avx2_5(p1, p1, p2, tmp);
else
#endif
sp_256_proj_point_add_5(p1, p1, p2, tmp);
/* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
@ -11544,9 +11314,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
sp_point* p = NULL;
byte one[1] = { 1 };
int err;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(heap, pubd, pub);
if (err == MP_OKAY)
@ -11587,11 +11354,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
if (err == MP_OKAY) {
/* Point * order = infinity */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_avx2_5(p, pub, p256_order, 1, heap);
else
#endif
err = sp_256_ecc_mulmod_5(p, pub, p256_order, 1, heap);
}
if (err == MP_OKAY) {
@ -11604,11 +11366,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
if (err == MP_OKAY) {
/* Base * private = point */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
err = sp_256_ecc_mulmod_base_avx2_5(p, priv, 1, heap);
else
#endif
err = sp_256_ecc_mulmod_base_5(p, priv, 1, heap);
}
if (err == MP_OKAY) {
@ -11657,9 +11414,6 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
sp_point* p;
sp_point* q = NULL;
int err;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(NULL, pd, p);
if (err == MP_OKAY)
@ -11682,11 +11436,6 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
sp_256_from_mp(q->y, 5, qY);
sp_256_from_mp(q->z, 5, qZ);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_proj_point_add_avx2_5(p, p, q, tmp);
else
#endif
sp_256_proj_point_add_5(p, p, q, tmp);
}
@ -11728,9 +11477,6 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
sp_digit* tmp;
sp_point* p;
int err;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
err = sp_ecc_point_new(NULL, pd, p);
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
@ -11748,11 +11494,6 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
sp_256_from_mp(p->y, 5, pY);
sp_256_from_mp(p->z, 5, pZ);
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
sp_256_proj_point_dbl_avx2_5(p, p, tmp);
else
#endif
sp_256_proj_point_dbl_5(p, p, tmp);
}
@ -11841,9 +11582,6 @@ static int sp_256_mont_sqrt_5(sp_digit* y)
sp_digit* t1;
sp_digit* t2;
int err = MP_OKAY;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
d = XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC);
@ -11859,40 +11597,6 @@ static int sp_256_mont_sqrt_5(sp_digit* y)
#endif
if (err == MP_OKAY) {
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
/* t2 = y ^ 0x2 */
sp_256_mont_sqr_avx2_5(t2, y, p256_mod, p256_mp_mod);
/* t1 = y ^ 0x3 */
sp_256_mont_mul_avx2_5(t1, t2, y, p256_mod, p256_mp_mod);
/* t2 = y ^ 0xc */
sp_256_mont_sqr_n_avx2_5(t2, t1, 2, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xf */
sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
/* t2 = y ^ 0xf0 */
sp_256_mont_sqr_n_avx2_5(t2, t1, 4, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xff */
sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
/* t2 = y ^ 0xff00 */
sp_256_mont_sqr_n_avx2_5(t2, t1, 8, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffff */
sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
/* t2 = y ^ 0xffff0000 */
sp_256_mont_sqr_n_avx2_5(t2, t1, 16, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffffffff */
sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffffffff00000000 */
sp_256_mont_sqr_n_avx2_5(t1, t1, 32, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffffffff00000001 */
sp_256_mont_mul_avx2_5(t1, t1, y, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
sp_256_mont_sqr_n_avx2_5(t1, t1, 96, p256_mod, p256_mp_mod);
/* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
sp_256_mont_mul_avx2_5(t1, t1, y, p256_mod, p256_mp_mod);
sp_256_mont_sqr_n_avx2_5(y, t1, 94, p256_mod, p256_mp_mod);
}
else
#endif
{
/* t2 = y ^ 0x2 */
sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod);
@ -11952,9 +11656,6 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
sp_digit* x;
sp_digit* y;
int err = MP_OKAY;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
d = XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC);
@ -11977,13 +11678,6 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
if (err == MP_OKAY) {
/* y = x^3 */
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
sp_256_mont_sqr_avx2_5(y, x, p256_mod, p256_mp_mod);
sp_256_mont_mul_avx2_5(y, y, x, p256_mod, p256_mp_mod);
}
else
#endif
{
sp_256_mont_sqr_5(y, x, p256_mod, p256_mp_mod);
sp_256_mont_mul_5(y, y, x, p256_mod, p256_mp_mod);

View File

@ -590,7 +590,7 @@ int sp_lshd(sp_int* a, int s)
if (a->used + s > a->size)
a->used = a->size - s;
XMEMMOVE(a->dp + s, a->dp, a->used * SP_INT_DIGITS);
XMEMMOVE(a->dp + s, a->dp, a->used * sizeof(sp_int_digit));
a->used += s;
XMEMSET(a->dp, 0, s * sizeof(sp_int_digit));

View File

@ -6943,7 +6943,7 @@ static WC_INLINE int sp_2048_div_16(sp_digit* a, sp_digit* d, sp_digit* m,
}
r1 = sp_2048_cmp_16(t1, d) >= 0;
sp_2048_cond_sub_16(r, t1, t2, (sp_digit)0 - r1);
sp_2048_cond_sub_16(r, t1, d, (sp_digit)0 - r1);
return MP_OKAY;
}
@ -8923,7 +8923,7 @@ static WC_INLINE int sp_2048_div_32(sp_digit* a, sp_digit* d, sp_digit* m,
}
r1 = sp_2048_cmp_32(t1, d) >= 0;
sp_2048_cond_sub_32(r, t1, t2, (sp_digit)0 - r1);
sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
return MP_OKAY;
}
@ -8982,7 +8982,7 @@ static WC_INLINE int sp_2048_div_32_cond(sp_digit* a, sp_digit* d, sp_digit* m,
}
r1 = sp_2048_cmp_32(t1, d) >= 0;
sp_2048_cond_sub_32(r, t1, t2, (sp_digit)0 - r1);
sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
return MP_OKAY;
}
@ -23504,7 +23504,7 @@ static WC_INLINE int sp_3072_div_24(sp_digit* a, sp_digit* d, sp_digit* m,
}
r1 = sp_3072_cmp_24(t1, d) >= 0;
sp_3072_cond_sub_24(r, t1, t2, (sp_digit)0 - r1);
sp_3072_cond_sub_24(r, t1, d, (sp_digit)0 - r1);
return MP_OKAY;
}
@ -26156,7 +26156,7 @@ static WC_INLINE int sp_3072_div_48(sp_digit* a, sp_digit* d, sp_digit* m,
}
r1 = sp_3072_cmp_48(t1, d) >= 0;
sp_3072_cond_sub_48(r, t1, t2, (sp_digit)0 - r1);
sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
return MP_OKAY;
}
@ -26215,7 +26215,7 @@ static WC_INLINE int sp_3072_div_48_cond(sp_digit* a, sp_digit* d, sp_digit* m,
}
r1 = sp_3072_cmp_48(t1, d) >= 0;
sp_3072_cond_sub_48(r, t1, t2, (sp_digit)0 - r1);
sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
return MP_OKAY;
}
@ -27880,6 +27880,8 @@ SP_NOINLINE static sp_digit sp_256_sub_4(sp_digit* r, const sp_digit* a,
return c;
}
#define sp_256_mont_reduce_order_4 sp_256_mont_reduce_4
/* Reduce the number back to 256 bits using Montgomery reduction.
*
* a A single precision number to reduce in place.
@ -44202,8 +44204,10 @@ static int sp_256_ecc_mulmod_base_4(sp_point* r, sp_digit* k, int map,
}
i = 32;
XMEMCPY(t[v[i].mul].x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
XMEMCPY(t[v[i].mul].y, p256_table[i][v[i].i].y, sizeof(p256_table[i]->y));
XMEMCPY(t[v[i].mul].x, p256_table[i][v[i].i].x,
sizeof(p256_table[i]->x));
XMEMCPY(t[v[i].mul].y, p256_table[i][v[i].i].y,
sizeof(p256_table[i]->y));
t[v[i].mul].infinity = p256_table[i][v[i].i].infinity;
for (--i; i>=0; i--) {
XMEMCPY(p->x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
@ -44211,7 +44215,8 @@ static int sp_256_ecc_mulmod_base_4(sp_point* r, sp_digit* k, int map,
p->infinity = p256_table[i][v[i].i].infinity;
sp_256_sub_4(negy, p256_mod, p->y);
sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg);
sp_256_proj_point_add_qz1_4(&t[v[i].mul], &t[v[i].mul], p, tmp);
sp_256_proj_point_add_qz1_4(&t[v[i].mul], &t[v[i].mul], p,
tmp);
}
sp_256_proj_point_add_4(&t[2], &t[2], &t[3], tmp);
sp_256_proj_point_add_4(&t[1], &t[1], &t[3], tmp);
@ -44296,8 +44301,10 @@ static int sp_256_ecc_mulmod_base_avx2_4(sp_point* r, sp_digit* k, int map,
}
i = 32;
XMEMCPY(t[v[i].mul].x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
XMEMCPY(t[v[i].mul].y, p256_table[i][v[i].i].y, sizeof(p256_table[i]->y));
XMEMCPY(t[v[i].mul].x, p256_table[i][v[i].i].x,
sizeof(p256_table[i]->x));
XMEMCPY(t[v[i].mul].y, p256_table[i][v[i].i].y,
sizeof(p256_table[i]->y));
t[v[i].mul].infinity = p256_table[i][v[i].i].infinity;
for (--i; i>=0; i--) {
XMEMCPY(p->x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
@ -44305,7 +44312,8 @@ static int sp_256_ecc_mulmod_base_avx2_4(sp_point* r, sp_digit* k, int map,
p->infinity = p256_table[i][v[i].i].infinity;
sp_256_sub_4(negy, p256_mod, p->y);
sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg);
sp_256_proj_point_add_qz1_avx2_4(&t[v[i].mul], &t[v[i].mul], p, tmp);
sp_256_proj_point_add_qz1_avx2_4(&t[v[i].mul], &t[v[i].mul], p,
tmp);
}
sp_256_proj_point_add_avx2_4(&t[2], &t[2], &t[3], tmp);
sp_256_proj_point_add_avx2_4(&t[1], &t[1], &t[3], tmp);
@ -44407,7 +44415,6 @@ static int sp_256_iszero_4(const sp_digit* a)
#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN */
/* Add 1 to a. (a = a + 1)
*
* r A single precision integer.
* a A single precision integer.
*/
static void sp_256_add_one_4(sp_digit* a)
@ -45146,7 +45153,7 @@ static WC_INLINE int sp_256_div_4(sp_digit* a, sp_digit* d, sp_digit* m,
}
r1 = sp_256_cmp_4(t1, d) >= 0;
sp_256_cond_sub_4(r, t1, t2, (sp_digit)0 - r1);
sp_256_cond_sub_4(r, t1, d, (sp_digit)0 - r1);
return MP_OKAY;
}
@ -45294,7 +45301,7 @@ static const uint64_t p256_order_low[2] = {
static void sp_256_mont_mul_order_4(sp_digit* r, sp_digit* a, sp_digit* b)
{
sp_256_mul_4(r, a, b);
sp_256_mont_reduce_4(r, p256_order, p256_mp_order);
sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
}
/* Square number mod the order of P256 curve. (r = a * a mod order)
@ -45305,7 +45312,7 @@ static void sp_256_mont_mul_order_4(sp_digit* r, sp_digit* a, sp_digit* b)
static void sp_256_mont_sqr_order_4(sp_digit* r, sp_digit* a)
{
sp_256_sqr_4(r, a);
sp_256_mont_reduce_4(r, p256_order, p256_mp_order);
sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
}
#ifndef WOLFSSL_SP_SMALL
@ -45497,6 +45504,8 @@ SP_NOINLINE static void sp_256_sqr_avx2_4(sp_digit* r, const sp_digit* a)
);
}
#define sp_256_mont_reduce_order_avx2_4 sp_256_mont_reduce_avx2_4
/* Reduce the number back to 256 bits using Montgomery reduction.
*
* a A single precision number to reduce in place.
@ -45646,7 +45655,7 @@ SP_NOINLINE static void sp_256_mont_reduce_avx2_4(sp_digit* a, sp_digit* m,
static void sp_256_mont_mul_order_avx2_4(sp_digit* r, sp_digit* a, sp_digit* b)
{
sp_256_mul_avx2_4(r, a, b);
sp_256_mont_reduce_avx2_4(r, p256_order, p256_mp_order);
sp_256_mont_reduce_order_avx2_4(r, p256_order, p256_mp_order);
}
/* Square number mod the order of P256 curve. (r = a * a mod order)
@ -45657,7 +45666,7 @@ static void sp_256_mont_mul_order_avx2_4(sp_digit* r, sp_digit* a, sp_digit* b)
static void sp_256_mont_sqr_order_avx2_4(sp_digit* r, sp_digit* a)
{
sp_256_sqr_avx2_4(r, a);
sp_256_mont_reduce_avx2_4(r, p256_order, p256_mp_order);
sp_256_mont_reduce_order_avx2_4(r, p256_order, p256_mp_order);
}
#ifndef WOLFSSL_SP_SMALL

View File

@ -35,6 +35,8 @@
#define SP_WORD_SIZE 64
#elif defined(WOLFSSL_SP_ARM32_ASM)
#define SP_WORD_SIZE 32
#elif defined(WOLFSSL_SP_ARM_THUMB_ASM)
#define SP_WORD_SIZE 32
#endif
#ifndef SP_WORD_SIZE