mirror of https://github.com/wolfSSL/wolfssl.git
Single Precision: ARM Thumb assembly implementation
Remove AVX2 code from platform specific code that doesn't support it. Fix sp_lshd to memmove correct amount.pull/1905/head
parent
fb699acec4
commit
741301bb2c
10
configure.ac
10
configure.ac
|
@ -3722,8 +3722,13 @@ if test "$ENABLED_SP_ASM" = "yes"; then
|
|||
ENABLED_SP_ARM64_ASM=yes
|
||||
;;
|
||||
*arm*)
|
||||
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM"
|
||||
ENABLED_SP_ARM32_ASM=yes
|
||||
if test $host_alias = "thumb"; then
|
||||
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM_THUMB_ASM -mthumb -march=armv6"
|
||||
ENABLED_SP_ARM_THUMB_ASM=yes
|
||||
else
|
||||
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM"
|
||||
ENABLED_SP_ARM32_ASM=yes
|
||||
fi
|
||||
;;
|
||||
*x86_64*)
|
||||
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_X86_64_ASM"
|
||||
|
@ -3771,6 +3776,7 @@ AM_CONDITIONAL([BUILD_SP], [test "x$ENABLED_SP" = "xyes"])
|
|||
AM_CONDITIONAL([BUILD_SP_C], [test "x$ENABLED_SP" = "xyes" && test "x$ENABLED_SP_ASM" = "xno" ])
|
||||
AM_CONDITIONAL([BUILD_SP_ARM64], [test "x$ENABLED_SP_ARM64_ASM" = "xyes" ])
|
||||
AM_CONDITIONAL([BUILD_SP_ARM32], [test "x$ENABLED_SP_ARM32_ASM" = "xyes" ])
|
||||
AM_CONDITIONAL([BUILD_SP_ARM_THUMB], [test "x$ENABLED_SP_ARM_THUMB_ASM" = "xyes" ])
|
||||
AM_CONDITIONAL([BUILD_SP_X86_64], [test "x$ENABLED_SP_X86_64_ASM" = "xyes" ])
|
||||
AM_CONDITIONAL([BUILD_SP_INT], [test "x$ENABLED_SP_MATH" = "xyes" ])
|
||||
|
||||
|
|
|
@ -183,6 +183,9 @@ endif
|
|||
if BUILD_SP_ARM32
|
||||
src_libwolfssl_la_SOURCES += wolfcrypt/src/sp_arm32.c
|
||||
endif
|
||||
if BUILD_SP_ARM_THUMB
|
||||
src_libwolfssl_la_SOURCES += wolfcrypt/src/sp_armthumb.c
|
||||
endif
|
||||
if BUILD_SP_ARM64
|
||||
src_libwolfssl_la_SOURCES += wolfcrypt/src/sp_arm64.c
|
||||
endif
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2679,7 +2679,7 @@ static WC_INLINE int sp_2048_div_16(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
}
|
||||
|
||||
r1 = sp_2048_cmp_16(t1, d) >= 0;
|
||||
sp_2048_cond_sub_16(r, t1, t2, (sp_digit)0 - r1);
|
||||
sp_2048_cond_sub_16(r, t1, d, (sp_digit)0 - r1);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -4323,7 +4323,7 @@ static WC_INLINE int sp_2048_div_32(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
}
|
||||
|
||||
r1 = sp_2048_cmp_32(t1, d) >= 0;
|
||||
sp_2048_cond_sub_32(r, t1, t2, (sp_digit)0 - r1);
|
||||
sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -4374,7 +4374,7 @@ static WC_INLINE int sp_2048_div_32_cond(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
}
|
||||
|
||||
r1 = sp_2048_cmp_32(t1, d) >= 0;
|
||||
sp_2048_cond_sub_32(r, t1, t2, (sp_digit)0 - r1);
|
||||
sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -9158,7 +9158,7 @@ static WC_INLINE int sp_3072_div_24(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
}
|
||||
|
||||
r1 = sp_3072_cmp_24(t1, d) >= 0;
|
||||
sp_3072_cond_sub_24(r, t1, t2, (sp_digit)0 - r1);
|
||||
sp_3072_cond_sub_24(r, t1, d, (sp_digit)0 - r1);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -11314,7 +11314,7 @@ static WC_INLINE int sp_3072_div_48(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
}
|
||||
|
||||
r1 = sp_3072_cmp_48(t1, d) >= 0;
|
||||
sp_3072_cond_sub_48(r, t1, t2, (sp_digit)0 - r1);
|
||||
sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -11365,7 +11365,7 @@ static WC_INLINE int sp_3072_div_48_cond(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
}
|
||||
|
||||
r1 = sp_3072_cmp_48(t1, d) >= 0;
|
||||
sp_3072_cond_sub_48(r, t1, t2, (sp_digit)0 - r1);
|
||||
sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -12588,6 +12588,8 @@ static sp_digit sp_256_sub_4(sp_digit* r, const sp_digit* a,
|
|||
return c;
|
||||
}
|
||||
|
||||
#define sp_256_mont_reduce_order_4 sp_256_mont_reduce_4
|
||||
|
||||
/* Reduce the number back to 256 bits using Montgomery reduction.
|
||||
*
|
||||
* a A single precision number to reduce in place.
|
||||
|
@ -14392,9 +14394,6 @@ int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
|
|||
sp_point* point;
|
||||
sp_digit* k = NULL;
|
||||
int err = MP_OKAY;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(heap, p, point);
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
|
@ -14410,11 +14409,6 @@ int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
|
|||
sp_256_from_mp(k, 4, km);
|
||||
sp_256_point_from_ecc_point_4(point, gm);
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_avx2_4(point, point, k, map, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_4(point, point, k, map, heap);
|
||||
}
|
||||
if (err == MP_OKAY)
|
||||
|
@ -27651,8 +27645,10 @@ static int sp_256_ecc_mulmod_base_4(sp_point* r, sp_digit* k, int map,
|
|||
}
|
||||
|
||||
i = 32;
|
||||
XMEMCPY(t[v[i].mul].x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
|
||||
XMEMCPY(t[v[i].mul].y, p256_table[i][v[i].i].y, sizeof(p256_table[i]->y));
|
||||
XMEMCPY(t[v[i].mul].x, p256_table[i][v[i].i].x,
|
||||
sizeof(p256_table[i]->x));
|
||||
XMEMCPY(t[v[i].mul].y, p256_table[i][v[i].i].y,
|
||||
sizeof(p256_table[i]->y));
|
||||
t[v[i].mul].infinity = p256_table[i][v[i].i].infinity;
|
||||
for (--i; i>=0; i--) {
|
||||
XMEMCPY(p->x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
|
||||
|
@ -27660,7 +27656,8 @@ static int sp_256_ecc_mulmod_base_4(sp_point* r, sp_digit* k, int map,
|
|||
p->infinity = p256_table[i][v[i].i].infinity;
|
||||
sp_256_sub_4(negy, p256_mod, p->y);
|
||||
sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg);
|
||||
sp_256_proj_point_add_qz1_4(&t[v[i].mul], &t[v[i].mul], p, tmp);
|
||||
sp_256_proj_point_add_qz1_4(&t[v[i].mul], &t[v[i].mul], p,
|
||||
tmp);
|
||||
}
|
||||
sp_256_proj_point_add_4(&t[2], &t[2], &t[3], tmp);
|
||||
sp_256_proj_point_add_4(&t[1], &t[1], &t[3], tmp);
|
||||
|
@ -27710,9 +27707,6 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
|
|||
sp_point* point;
|
||||
sp_digit* k = NULL;
|
||||
int err = MP_OKAY;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(heap, p, point);
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
|
@ -27727,11 +27721,6 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
|
|||
if (err == MP_OKAY) {
|
||||
sp_256_from_mp(k, 4, km);
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_base_avx2_4(point, k, map, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_base_4(point, k, map, heap);
|
||||
}
|
||||
if (err == MP_OKAY)
|
||||
|
@ -27761,7 +27750,6 @@ static int sp_256_iszero_4(const sp_digit* a)
|
|||
#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN */
|
||||
/* Add 1 to a. (a = a + 1)
|
||||
*
|
||||
* r A single precision integer.
|
||||
* a A single precision integer.
|
||||
*/
|
||||
static void sp_256_add_one_4(sp_digit* a)
|
||||
|
@ -27861,9 +27849,6 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
|
|||
sp_point* infinity;
|
||||
#endif
|
||||
int err;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
(void)heap;
|
||||
|
||||
|
@ -27885,23 +27870,11 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
|
|||
if (err == MP_OKAY)
|
||||
err = sp_256_ecc_gen_k_4(rng, k);
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_base_avx2_4(point, k, 1, NULL);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL);
|
||||
}
|
||||
|
||||
#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
|
||||
err = sp_256_ecc_mulmod_avx2_4(infinity, point, p256_order, 1,
|
||||
NULL);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_4(infinity, point, p256_order, 1, NULL);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
|
@ -27980,9 +27953,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
|
|||
sp_point* point = NULL;
|
||||
sp_digit* k = NULL;
|
||||
int err = MP_OKAY;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
if (*outLen < 32)
|
||||
err = BUFFER_E;
|
||||
|
@ -28002,11 +27972,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
|
|||
if (err == MP_OKAY) {
|
||||
sp_256_from_mp(k, 4, priv);
|
||||
sp_256_point_from_ecc_point_4(point, pub);
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_avx2_4(point, point, k, 1, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_4(point, point, k, 1, heap);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
|
@ -28232,8 +28197,6 @@ static void sp_256_mul_4(sp_digit* r, const sp_digit* a, const sp_digit* b)
|
|||
}
|
||||
|
||||
#endif /* WOLFSSL_SP_SMALL */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
#endif /* HAVE_INTEL_AVX2 */
|
||||
#endif
|
||||
#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
|
||||
/* Sub b from a into a. (a -= b)
|
||||
|
@ -28431,7 +28394,7 @@ static WC_INLINE int sp_256_div_4(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
}
|
||||
|
||||
r1 = sp_256_cmp_4(t1, d) >= 0;
|
||||
sp_256_cond_sub_4(r, t1, t2, (sp_digit)0 - r1);
|
||||
sp_256_cond_sub_4(r, t1, d, (sp_digit)0 - r1);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -28642,7 +28605,7 @@ static const uint64_t p256_order_low[2] = {
|
|||
static void sp_256_mont_mul_order_4(sp_digit* r, sp_digit* a, sp_digit* b)
|
||||
{
|
||||
sp_256_mul_4(r, a, b);
|
||||
sp_256_mont_reduce_4(r, p256_order, p256_mp_order);
|
||||
sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
/* Square number mod the order of P256 curve. (r = a * a mod order)
|
||||
|
@ -28653,7 +28616,7 @@ static void sp_256_mont_mul_order_4(sp_digit* r, sp_digit* a, sp_digit* b)
|
|||
static void sp_256_mont_sqr_order_4(sp_digit* r, sp_digit* a)
|
||||
{
|
||||
sp_256_sqr_4(r, a);
|
||||
sp_256_mont_reduce_4(r, p256_order, p256_mp_order);
|
||||
sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
#ifndef WOLFSSL_SP_SMALL
|
||||
|
@ -28768,143 +28731,6 @@ static void sp_256_mont_inv_order_4(sp_digit* r, sp_digit* a,
|
|||
#endif /* WOLFSSL_SP_SMALL */
|
||||
}
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
|
||||
*
|
||||
* r Result of the multiplication.
|
||||
* a First operand of the multiplication.
|
||||
* b Second operand of the multiplication.
|
||||
*/
|
||||
static void sp_256_mont_mul_order_avx2_4(sp_digit* r, sp_digit* a, sp_digit* b)
|
||||
{
|
||||
sp_256_mul_avx2_4(r, a, b);
|
||||
sp_256_mont_reduce_avx2_4(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
/* Square number mod the order of P256 curve. (r = a * a mod order)
|
||||
*
|
||||
* r Result of the squaring.
|
||||
* a Number to square.
|
||||
*/
|
||||
static void sp_256_mont_sqr_order_avx2_4(sp_digit* r, sp_digit* a)
|
||||
{
|
||||
sp_256_sqr_avx2_4(r, a);
|
||||
sp_256_mont_reduce_avx2_4(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
#ifndef WOLFSSL_SP_SMALL
|
||||
/* Square number mod the order of P256 curve a number of times.
|
||||
* (r = a ^ n mod order)
|
||||
*
|
||||
* r Result of the squaring.
|
||||
* a Number to square.
|
||||
*/
|
||||
static void sp_256_mont_sqr_n_order_avx2_4(sp_digit* r, sp_digit* a, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
sp_256_mont_sqr_order_avx2_4(r, a);
|
||||
for (i=1; i<n; i++)
|
||||
sp_256_mont_sqr_order_avx2_4(r, r);
|
||||
}
|
||||
#endif /* !WOLFSSL_SP_SMALL */
|
||||
|
||||
/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
|
||||
* (r = 1 / a mod order)
|
||||
*
|
||||
* r Inverse result.
|
||||
* a Number to invert.
|
||||
* td Temporary data.
|
||||
*/
|
||||
static void sp_256_mont_inv_order_avx2_4(sp_digit* r, sp_digit* a,
|
||||
sp_digit* td)
|
||||
{
|
||||
#ifdef WOLFSSL_SP_SMALL
|
||||
sp_digit* t = td;
|
||||
int i;
|
||||
|
||||
XMEMCPY(t, a, sizeof(sp_digit) * 4);
|
||||
for (i=254; i>=0; i--) {
|
||||
sp_256_mont_sqr_order_avx2_4(t, t);
|
||||
if (p256_order_2[i / 64] & ((sp_digit)1 << (i % 64)))
|
||||
sp_256_mont_mul_order_avx2_4(t, t, a);
|
||||
}
|
||||
XMEMCPY(r, t, sizeof(sp_digit) * 4);
|
||||
#else
|
||||
sp_digit* t = td;
|
||||
sp_digit* t2 = td + 2 * 4;
|
||||
sp_digit* t3 = td + 4 * 4;
|
||||
int i;
|
||||
|
||||
/* t = a^2 */
|
||||
sp_256_mont_sqr_order_avx2_4(t, a);
|
||||
/* t = a^3 = t * a */
|
||||
sp_256_mont_mul_order_avx2_4(t, t, a);
|
||||
/* t2= a^c = t ^ 2 ^ 2 */
|
||||
sp_256_mont_sqr_n_order_avx2_4(t2, t, 2);
|
||||
/* t3= a^f = t2 * t */
|
||||
sp_256_mont_mul_order_avx2_4(t3, t2, t);
|
||||
/* t2= a^f0 = t3 ^ 2 ^ 4 */
|
||||
sp_256_mont_sqr_n_order_avx2_4(t2, t3, 4);
|
||||
/* t = a^ff = t2 * t3 */
|
||||
sp_256_mont_mul_order_avx2_4(t, t2, t3);
|
||||
/* t3= a^ff00 = t ^ 2 ^ 8 */
|
||||
sp_256_mont_sqr_n_order_avx2_4(t2, t, 8);
|
||||
/* t = a^ffff = t2 * t */
|
||||
sp_256_mont_mul_order_avx2_4(t, t2, t);
|
||||
/* t2= a^ffff0000 = t ^ 2 ^ 16 */
|
||||
sp_256_mont_sqr_n_order_avx2_4(t2, t, 16);
|
||||
/* t = a^ffffffff = t2 * t */
|
||||
sp_256_mont_mul_order_avx2_4(t, t2, t);
|
||||
/* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
|
||||
sp_256_mont_sqr_n_order_avx2_4(t2, t, 64);
|
||||
/* t2= a^ffffffff00000000ffffffff = t2 * t */
|
||||
sp_256_mont_mul_order_avx2_4(t2, t2, t);
|
||||
/* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
|
||||
sp_256_mont_sqr_n_order_avx2_4(t2, t2, 32);
|
||||
/* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
|
||||
sp_256_mont_mul_order_avx2_4(t2, t2, t);
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6 */
|
||||
for (i=127; i>=112; i--) {
|
||||
sp_256_mont_sqr_order_avx2_4(t2, t2);
|
||||
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
|
||||
sp_256_mont_mul_order_avx2_4(t2, t2, a);
|
||||
}
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6f */
|
||||
sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
|
||||
sp_256_mont_mul_order_avx2_4(t2, t2, t3);
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
|
||||
for (i=107; i>=64; i--) {
|
||||
sp_256_mont_sqr_order_avx2_4(t2, t2);
|
||||
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
|
||||
sp_256_mont_mul_order_avx2_4(t2, t2, a);
|
||||
}
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
|
||||
sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
|
||||
sp_256_mont_mul_order_avx2_4(t2, t2, t3);
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
|
||||
for (i=59; i>=32; i--) {
|
||||
sp_256_mont_sqr_order_avx2_4(t2, t2);
|
||||
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
|
||||
sp_256_mont_mul_order_avx2_4(t2, t2, a);
|
||||
}
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
|
||||
sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
|
||||
sp_256_mont_mul_order_avx2_4(t2, t2, t3);
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
|
||||
for (i=27; i>=0; i--) {
|
||||
sp_256_mont_sqr_order_avx2_4(t2, t2);
|
||||
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
|
||||
sp_256_mont_mul_order_avx2_4(t2, t2, a);
|
||||
}
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
|
||||
sp_256_mont_sqr_n_order_avx2_4(t2, t2, 4);
|
||||
/* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
|
||||
sp_256_mont_mul_order_avx2_4(r, t2, t3);
|
||||
#endif /* WOLFSSL_SP_SMALL */
|
||||
}
|
||||
|
||||
#endif /* HAVE_INTEL_AVX2 */
|
||||
#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
|
||||
#ifdef HAVE_ECC_SIGN
|
||||
#ifndef SP_ECC_MAX_SIG_GEN
|
||||
|
@ -28952,9 +28778,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
|
|||
int err = MP_OKAY;
|
||||
int64_t c;
|
||||
int i;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
(void)heap;
|
||||
|
||||
|
@ -28994,11 +28817,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
|
|||
/* New random point. */
|
||||
err = sp_256_ecc_gen_k_4(rng, k);
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_base_avx2_4(point, k, 1, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_base_4(point, k, 1, NULL);
|
||||
}
|
||||
|
||||
|
@ -29011,31 +28829,16 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
|
|||
sp_256_norm_4(r);
|
||||
|
||||
/* Conv k to Montgomery form (mod order) */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_mul_avx2_4(k, k, p256_norm_order);
|
||||
else
|
||||
#endif
|
||||
sp_256_mul_4(k, k, p256_norm_order);
|
||||
err = sp_256_mod_4(k, k, p256_order);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
sp_256_norm_4(k);
|
||||
/* kInv = 1/k mod order */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_mont_inv_order_avx2_4(kInv, k, tmp);
|
||||
else
|
||||
#endif
|
||||
sp_256_mont_inv_order_4(kInv, k, tmp);
|
||||
sp_256_norm_4(kInv);
|
||||
|
||||
/* s = r * x + e */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_mul_avx2_4(x, x, r);
|
||||
else
|
||||
#endif
|
||||
sp_256_mul_4(x, x, r);
|
||||
err = sp_256_mod_4(x, x, p256_order);
|
||||
}
|
||||
|
@ -29049,11 +28852,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
|
|||
sp_256_norm_4(s);
|
||||
|
||||
/* s = s * k^-1 mod order */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_mont_mul_order_avx2_4(s, s, kInv);
|
||||
else
|
||||
#endif
|
||||
sp_256_mont_mul_order_4(s, s, kInv);
|
||||
sp_256_norm_4(s);
|
||||
|
||||
|
@ -29133,9 +28931,6 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
|
|||
sp_digit carry;
|
||||
int64_t c;
|
||||
int err;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(heap, p1d, p1);
|
||||
if (err == MP_OKAY)
|
||||
|
@ -29170,52 +28965,24 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
|
|||
sp_256_from_mp(p2->y, 4, pY);
|
||||
sp_256_from_mp(p2->z, 4, pZ);
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_mul_avx2_4(s, s, p256_norm_order);
|
||||
else
|
||||
#endif
|
||||
sp_256_mul_4(s, s, p256_norm_order);
|
||||
err = sp_256_mod_4(s, s, p256_order);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
sp_256_norm_4(s);
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
|
||||
sp_256_mont_inv_order_avx2_4(s, s, tmp);
|
||||
sp_256_mont_mul_order_avx2_4(u1, u1, s);
|
||||
sp_256_mont_mul_order_avx2_4(u2, u2, s);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
sp_256_mont_inv_order_4(s, s, tmp);
|
||||
sp_256_mont_mul_order_4(u1, u1, s);
|
||||
sp_256_mont_mul_order_4(u2, u2, s);
|
||||
}
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_base_avx2_4(p1, u1, 0, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_base_4(p1, u1, 0, heap);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_avx2_4(p2, p2, u2, 0, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_4(p2, p2, u2, 0, heap);
|
||||
}
|
||||
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_proj_point_add_avx2_4(p1, p1, p2, tmp);
|
||||
else
|
||||
#endif
|
||||
sp_256_proj_point_add_4(p1, p1, p2, tmp);
|
||||
|
||||
/* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
|
||||
|
@ -29378,9 +29145,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
|
|||
sp_point* p = NULL;
|
||||
byte one[1] = { 1 };
|
||||
int err;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(heap, pubd, pub);
|
||||
if (err == MP_OKAY)
|
||||
|
@ -29421,11 +29185,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
|
|||
|
||||
if (err == MP_OKAY) {
|
||||
/* Point * order = infinity */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_avx2_4(p, pub, p256_order, 1, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_4(p, pub, p256_order, 1, heap);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
|
@ -29438,11 +29197,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
|
|||
|
||||
if (err == MP_OKAY) {
|
||||
/* Base * private = point */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_base_avx2_4(p, priv, 1, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_base_4(p, priv, 1, heap);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
|
@ -29491,9 +29245,6 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
|
|||
sp_point* p;
|
||||
sp_point* q = NULL;
|
||||
int err;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(NULL, pd, p);
|
||||
if (err == MP_OKAY)
|
||||
|
@ -29516,11 +29267,6 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
|
|||
sp_256_from_mp(q->y, 4, qY);
|
||||
sp_256_from_mp(q->z, 4, qZ);
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_proj_point_add_avx2_4(p, p, q, tmp);
|
||||
else
|
||||
#endif
|
||||
sp_256_proj_point_add_4(p, p, q, tmp);
|
||||
}
|
||||
|
||||
|
@ -29562,9 +29308,6 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
|
|||
sp_digit* tmp;
|
||||
sp_point* p;
|
||||
int err;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(NULL, pd, p);
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
|
@ -29582,11 +29325,6 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
|
|||
sp_256_from_mp(p->y, 4, pY);
|
||||
sp_256_from_mp(p->z, 4, pZ);
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_proj_point_dbl_avx2_4(p, p, tmp);
|
||||
else
|
||||
#endif
|
||||
sp_256_proj_point_dbl_4(p, p, tmp);
|
||||
}
|
||||
|
||||
|
@ -29675,9 +29413,6 @@ static int sp_256_mont_sqrt_4(sp_digit* y)
|
|||
sp_digit* t1;
|
||||
sp_digit* t2;
|
||||
int err = MP_OKAY;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
d = XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
|
||||
|
@ -29693,40 +29428,6 @@ static int sp_256_mont_sqrt_4(sp_digit* y)
|
|||
#endif
|
||||
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
|
||||
/* t2 = y ^ 0x2 */
|
||||
sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0x3 */
|
||||
sp_256_mont_mul_avx2_4(t1, t2, y, p256_mod, p256_mp_mod);
|
||||
/* t2 = y ^ 0xc */
|
||||
sp_256_mont_sqr_n_avx2_4(t2, t1, 2, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xf */
|
||||
sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
|
||||
/* t2 = y ^ 0xf0 */
|
||||
sp_256_mont_sqr_n_avx2_4(t2, t1, 4, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xff */
|
||||
sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
|
||||
/* t2 = y ^ 0xff00 */
|
||||
sp_256_mont_sqr_n_avx2_4(t2, t1, 8, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffff */
|
||||
sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
|
||||
/* t2 = y ^ 0xffff0000 */
|
||||
sp_256_mont_sqr_n_avx2_4(t2, t1, 16, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffffffff */
|
||||
sp_256_mont_mul_avx2_4(t1, t1, t2, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffffffff00000000 */
|
||||
sp_256_mont_sqr_n_avx2_4(t1, t1, 32, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffffffff00000001 */
|
||||
sp_256_mont_mul_avx2_4(t1, t1, y, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
|
||||
sp_256_mont_sqr_n_avx2_4(t1, t1, 96, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
|
||||
sp_256_mont_mul_avx2_4(t1, t1, y, p256_mod, p256_mp_mod);
|
||||
sp_256_mont_sqr_n_avx2_4(y, t1, 94, p256_mod, p256_mp_mod);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
/* t2 = y ^ 0x2 */
|
||||
sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod);
|
||||
|
@ -29786,9 +29487,6 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
|
|||
sp_digit* x;
|
||||
sp_digit* y;
|
||||
int err = MP_OKAY;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
d = XMALLOC(sizeof(sp_digit) * 4 * 4, NULL, DYNAMIC_TYPE_ECC);
|
||||
|
@ -29811,13 +29509,6 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
|
|||
|
||||
if (err == MP_OKAY) {
|
||||
/* y = x^3 */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
|
||||
sp_256_mont_sqr_avx2_4(y, x, p256_mod, p256_mp_mod);
|
||||
sp_256_mont_mul_avx2_4(y, y, x, p256_mod, p256_mp_mod);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
sp_256_mont_sqr_4(y, x, p256_mod, p256_mp_mod);
|
||||
sp_256_mont_mul_4(y, y, x, p256_mod, p256_mp_mod);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1743,7 +1743,8 @@ static int sp_2048_div_45(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
int err = MP_OKAY;
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
td = XMALLOC(sizeof(sp_digit) * (4 * 45 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER);
|
||||
td = XMALLOC(sizeof(sp_digit) * (4 * 45 + 3), NULL,
|
||||
DYNAMIC_TYPE_TMP_BUFFER);
|
||||
if (td != NULL) {
|
||||
t1 = td;
|
||||
t2 = td + 90 + 1;
|
||||
|
@ -2675,7 +2676,8 @@ static int sp_2048_div_90(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
int err = MP_OKAY;
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
td = XMALLOC(sizeof(sp_digit) * (4 * 90 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER);
|
||||
td = XMALLOC(sizeof(sp_digit) * (4 * 90 + 3), NULL,
|
||||
DYNAMIC_TYPE_TMP_BUFFER);
|
||||
if (td != NULL) {
|
||||
t1 = td;
|
||||
t2 = td + 180 + 1;
|
||||
|
@ -5024,7 +5026,8 @@ static int sp_3072_div_68(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
int err = MP_OKAY;
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
td = XMALLOC(sizeof(sp_digit) * (3 * 68 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
|
||||
td = XMALLOC(sizeof(sp_digit) * (3 * 68 + 1), NULL,
|
||||
DYNAMIC_TYPE_TMP_BUFFER);
|
||||
if (td != NULL) {
|
||||
t1 = td;
|
||||
t2 = td + 2 * 68;
|
||||
|
@ -6042,7 +6045,8 @@ static int sp_3072_div_136(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
int err = MP_OKAY;
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
td = XMALLOC(sizeof(sp_digit) * (4 * 136 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER);
|
||||
td = XMALLOC(sizeof(sp_digit) * (4 * 136 + 3), NULL,
|
||||
DYNAMIC_TYPE_TMP_BUFFER);
|
||||
if (td != NULL) {
|
||||
t1 = td;
|
||||
t2 = td + 272 + 1;
|
||||
|
@ -7685,6 +7689,8 @@ static void sp_256_cond_sub_10(sp_digit* r, const sp_digit* a,
|
|||
#endif /* WOLFSSL_SP_SMALL */
|
||||
}
|
||||
|
||||
#define sp_256_mont_reduce_order_10 sp_256_mont_reduce_10
|
||||
|
||||
/* Mul a by scalar b and add into r. (r += a * b)
|
||||
*
|
||||
* r A single precision integer.
|
||||
|
@ -8844,7 +8850,7 @@ static int sp_256_ecc_mulmod_10(sp_point* r, sp_point* g, sp_digit* k,
|
|||
((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
|
||||
sp_256_proj_point_dbl_10(&t[2], &t[2], tmp);
|
||||
XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
|
||||
((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
|
||||
((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
|
||||
}
|
||||
|
||||
if (map)
|
||||
|
@ -9487,9 +9493,6 @@ int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
|
|||
sp_point* point;
|
||||
sp_digit* k = NULL;
|
||||
int err = MP_OKAY;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(heap, p, point);
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
|
@ -9505,11 +9508,6 @@ int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
|
|||
sp_256_from_mp(k, 10, km);
|
||||
sp_256_point_from_ecc_point_10(point, gm);
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_avx2_10(point, point, k, map, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_10(point, point, k, map, heap);
|
||||
}
|
||||
if (err == MP_OKAY)
|
||||
|
@ -11115,9 +11113,6 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
|
|||
sp_point* point;
|
||||
sp_digit* k = NULL;
|
||||
int err = MP_OKAY;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(heap, p, point);
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
|
@ -11132,11 +11127,6 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
|
|||
if (err == MP_OKAY) {
|
||||
sp_256_from_mp(k, 10, km);
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_base_avx2_10(point, k, map, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_base_10(point, k, map, heap);
|
||||
}
|
||||
if (err == MP_OKAY)
|
||||
|
@ -11256,9 +11246,6 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
|
|||
sp_point* infinity;
|
||||
#endif
|
||||
int err;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
(void)heap;
|
||||
|
||||
|
@ -11280,23 +11267,11 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
|
|||
if (err == MP_OKAY)
|
||||
err = sp_256_ecc_gen_k_10(rng, k);
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_base_avx2_10(point, k, 1, NULL);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_base_10(point, k, 1, NULL);
|
||||
}
|
||||
|
||||
#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
|
||||
err = sp_256_ecc_mulmod_avx2_10(infinity, point, p256_order, 1,
|
||||
NULL);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_10(infinity, point, p256_order, 1, NULL);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
|
@ -11379,9 +11354,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
|
|||
sp_point* point = NULL;
|
||||
sp_digit* k = NULL;
|
||||
int err = MP_OKAY;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
if (*outLen < 32)
|
||||
err = BUFFER_E;
|
||||
|
@ -11401,11 +11373,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
|
|||
if (err == MP_OKAY) {
|
||||
sp_256_from_mp(k, 10, priv);
|
||||
sp_256_point_from_ecc_point_10(point, pub);
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_avx2_10(point, point, k, 1, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_10(point, point, k, 1, heap);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
|
@ -11424,8 +11391,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
|
|||
#endif /* HAVE_ECC_DHE */
|
||||
|
||||
#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
#endif /* HAVE_INTEL_AVX2 */
|
||||
#endif
|
||||
#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
|
||||
/* Multiply a by scalar b into r. (r = a * b)
|
||||
|
@ -11501,7 +11466,8 @@ static int sp_256_div_10(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
int err = MP_OKAY;
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
td = XMALLOC(sizeof(sp_digit) * (3 * 10 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
|
||||
td = XMALLOC(sizeof(sp_digit) * (3 * 10 + 1), NULL,
|
||||
DYNAMIC_TYPE_TMP_BUFFER);
|
||||
if (td != NULL) {
|
||||
t1 = td;
|
||||
t2 = td + 2 * 10;
|
||||
|
@ -11597,7 +11563,7 @@ static const uint32_t p256_order_low[4] = {
|
|||
static void sp_256_mont_mul_order_10(sp_digit* r, sp_digit* a, sp_digit* b)
|
||||
{
|
||||
sp_256_mul_10(r, a, b);
|
||||
sp_256_mont_reduce_10(r, p256_order, p256_mp_order);
|
||||
sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
/* Square number mod the order of P256 curve. (r = a * a mod order)
|
||||
|
@ -11608,7 +11574,7 @@ static void sp_256_mont_mul_order_10(sp_digit* r, sp_digit* a, sp_digit* b)
|
|||
static void sp_256_mont_sqr_order_10(sp_digit* r, sp_digit* a)
|
||||
{
|
||||
sp_256_sqr_10(r, a);
|
||||
sp_256_mont_reduce_10(r, p256_order, p256_mp_order);
|
||||
sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
#ifndef WOLFSSL_SP_SMALL
|
||||
|
@ -11723,143 +11689,6 @@ static void sp_256_mont_inv_order_10(sp_digit* r, sp_digit* a,
|
|||
#endif /* WOLFSSL_SP_SMALL */
|
||||
}
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
|
||||
*
|
||||
* r Result of the multiplication.
|
||||
* a First operand of the multiplication.
|
||||
* b Second operand of the multiplication.
|
||||
*/
|
||||
static void sp_256_mont_mul_order_avx2_10(sp_digit* r, sp_digit* a, sp_digit* b)
|
||||
{
|
||||
sp_256_mul_avx2_10(r, a, b);
|
||||
sp_256_mont_reduce_avx2_10(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
/* Square number mod the order of P256 curve. (r = a * a mod order)
|
||||
*
|
||||
* r Result of the squaring.
|
||||
* a Number to square.
|
||||
*/
|
||||
static void sp_256_mont_sqr_order_avx2_10(sp_digit* r, sp_digit* a)
|
||||
{
|
||||
sp_256_sqr_avx2_10(r, a);
|
||||
sp_256_mont_reduce_avx2_10(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
#ifndef WOLFSSL_SP_SMALL
|
||||
/* Square number mod the order of P256 curve a number of times.
|
||||
* (r = a ^ n mod order)
|
||||
*
|
||||
* r Result of the squaring.
|
||||
* a Number to square.
|
||||
*/
|
||||
static void sp_256_mont_sqr_n_order_avx2_10(sp_digit* r, sp_digit* a, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
sp_256_mont_sqr_order_avx2_10(r, a);
|
||||
for (i=1; i<n; i++)
|
||||
sp_256_mont_sqr_order_avx2_10(r, r);
|
||||
}
|
||||
#endif /* !WOLFSSL_SP_SMALL */
|
||||
|
||||
/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
|
||||
* (r = 1 / a mod order)
|
||||
*
|
||||
* r Inverse result.
|
||||
* a Number to invert.
|
||||
* td Temporary data.
|
||||
*/
|
||||
static void sp_256_mont_inv_order_avx2_10(sp_digit* r, sp_digit* a,
|
||||
sp_digit* td)
|
||||
{
|
||||
#ifdef WOLFSSL_SP_SMALL
|
||||
sp_digit* t = td;
|
||||
int i;
|
||||
|
||||
XMEMCPY(t, a, sizeof(sp_digit) * 10);
|
||||
for (i=254; i>=0; i--) {
|
||||
sp_256_mont_sqr_order_avx2_10(t, t);
|
||||
if (p256_order_2[i / 32] & ((sp_digit)1 << (i % 32)))
|
||||
sp_256_mont_mul_order_avx2_10(t, t, a);
|
||||
}
|
||||
XMEMCPY(r, t, sizeof(sp_digit) * 10);
|
||||
#else
|
||||
sp_digit* t = td;
|
||||
sp_digit* t2 = td + 2 * 10;
|
||||
sp_digit* t3 = td + 4 * 10;
|
||||
int i;
|
||||
|
||||
/* t = a^2 */
|
||||
sp_256_mont_sqr_order_avx2_10(t, a);
|
||||
/* t = a^3 = t * a */
|
||||
sp_256_mont_mul_order_avx2_10(t, t, a);
|
||||
/* t2= a^c = t ^ 2 ^ 2 */
|
||||
sp_256_mont_sqr_n_order_avx2_10(t2, t, 2);
|
||||
/* t3= a^f = t2 * t */
|
||||
sp_256_mont_mul_order_avx2_10(t3, t2, t);
|
||||
/* t2= a^f0 = t3 ^ 2 ^ 4 */
|
||||
sp_256_mont_sqr_n_order_avx2_10(t2, t3, 4);
|
||||
/* t = a^ff = t2 * t3 */
|
||||
sp_256_mont_mul_order_avx2_10(t, t2, t3);
|
||||
/* t3= a^ff00 = t ^ 2 ^ 8 */
|
||||
sp_256_mont_sqr_n_order_avx2_10(t2, t, 8);
|
||||
/* t = a^ffff = t2 * t */
|
||||
sp_256_mont_mul_order_avx2_10(t, t2, t);
|
||||
/* t2= a^ffff0000 = t ^ 2 ^ 16 */
|
||||
sp_256_mont_sqr_n_order_avx2_10(t2, t, 16);
|
||||
/* t = a^ffffffff = t2 * t */
|
||||
sp_256_mont_mul_order_avx2_10(t, t2, t);
|
||||
/* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
|
||||
sp_256_mont_sqr_n_order_avx2_10(t2, t, 64);
|
||||
/* t2= a^ffffffff00000000ffffffff = t2 * t */
|
||||
sp_256_mont_mul_order_avx2_10(t2, t2, t);
|
||||
/* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
|
||||
sp_256_mont_sqr_n_order_avx2_10(t2, t2, 32);
|
||||
/* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
|
||||
sp_256_mont_mul_order_avx2_10(t2, t2, t);
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6 */
|
||||
for (i=127; i>=112; i--) {
|
||||
sp_256_mont_sqr_order_avx2_10(t2, t2);
|
||||
if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
|
||||
sp_256_mont_mul_order_avx2_10(t2, t2, a);
|
||||
}
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6f */
|
||||
sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
|
||||
sp_256_mont_mul_order_avx2_10(t2, t2, t3);
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
|
||||
for (i=107; i>=64; i--) {
|
||||
sp_256_mont_sqr_order_avx2_10(t2, t2);
|
||||
if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
|
||||
sp_256_mont_mul_order_avx2_10(t2, t2, a);
|
||||
}
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
|
||||
sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
|
||||
sp_256_mont_mul_order_avx2_10(t2, t2, t3);
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
|
||||
for (i=59; i>=32; i--) {
|
||||
sp_256_mont_sqr_order_avx2_10(t2, t2);
|
||||
if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
|
||||
sp_256_mont_mul_order_avx2_10(t2, t2, a);
|
||||
}
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
|
||||
sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
|
||||
sp_256_mont_mul_order_avx2_10(t2, t2, t3);
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
|
||||
for (i=27; i>=0; i--) {
|
||||
sp_256_mont_sqr_order_avx2_10(t2, t2);
|
||||
if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
|
||||
sp_256_mont_mul_order_avx2_10(t2, t2, a);
|
||||
}
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
|
||||
sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
|
||||
/* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
|
||||
sp_256_mont_mul_order_avx2_10(r, t2, t3);
|
||||
#endif /* WOLFSSL_SP_SMALL */
|
||||
}
|
||||
|
||||
#endif /* HAVE_INTEL_AVX2 */
|
||||
#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
|
||||
#ifdef HAVE_ECC_SIGN
|
||||
#ifndef SP_ECC_MAX_SIG_GEN
|
||||
|
@ -11907,9 +11736,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
|
|||
int err = MP_OKAY;
|
||||
int32_t c;
|
||||
int i;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
(void)heap;
|
||||
|
||||
|
@ -11949,11 +11775,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
|
|||
/* New random point. */
|
||||
err = sp_256_ecc_gen_k_10(rng, k);
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_base_avx2_10(point, k, 1, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_base_10(point, k, 1, NULL);
|
||||
}
|
||||
|
||||
|
@ -11966,31 +11787,16 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
|
|||
sp_256_norm_10(r);
|
||||
|
||||
/* Conv k to Montgomery form (mod order) */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_mul_avx2_10(k, k, p256_norm_order);
|
||||
else
|
||||
#endif
|
||||
sp_256_mul_10(k, k, p256_norm_order);
|
||||
err = sp_256_mod_10(k, k, p256_order);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
sp_256_norm_10(k);
|
||||
/* kInv = 1/k mod order */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_mont_inv_order_avx2_10(kInv, k, tmp);
|
||||
else
|
||||
#endif
|
||||
sp_256_mont_inv_order_10(kInv, k, tmp);
|
||||
sp_256_norm_10(kInv);
|
||||
|
||||
/* s = r * x + e */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_mul_avx2_10(x, x, r);
|
||||
else
|
||||
#endif
|
||||
sp_256_mul_10(x, x, r);
|
||||
err = sp_256_mod_10(x, x, p256_order);
|
||||
}
|
||||
|
@ -12004,11 +11810,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
|
|||
sp_256_norm_10(s);
|
||||
|
||||
/* s = s * k^-1 mod order */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_mont_mul_order_avx2_10(s, s, kInv);
|
||||
else
|
||||
#endif
|
||||
sp_256_mont_mul_order_10(s, s, kInv);
|
||||
sp_256_norm_10(s);
|
||||
|
||||
|
@ -12088,9 +11889,6 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
|
|||
sp_digit carry;
|
||||
int32_t c;
|
||||
int err;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(heap, p1d, p1);
|
||||
if (err == MP_OKAY)
|
||||
|
@ -12125,52 +11923,24 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
|
|||
sp_256_from_mp(p2->y, 10, pY);
|
||||
sp_256_from_mp(p2->z, 10, pZ);
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_mul_avx2_10(s, s, p256_norm_order);
|
||||
else
|
||||
#endif
|
||||
sp_256_mul_10(s, s, p256_norm_order);
|
||||
err = sp_256_mod_10(s, s, p256_order);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
sp_256_norm_10(s);
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
|
||||
sp_256_mont_inv_order_avx2_10(s, s, tmp);
|
||||
sp_256_mont_mul_order_avx2_10(u1, u1, s);
|
||||
sp_256_mont_mul_order_avx2_10(u2, u2, s);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
sp_256_mont_inv_order_10(s, s, tmp);
|
||||
sp_256_mont_mul_order_10(u1, u1, s);
|
||||
sp_256_mont_mul_order_10(u2, u2, s);
|
||||
}
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_base_avx2_10(p1, u1, 0, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_base_10(p1, u1, 0, heap);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_avx2_10(p2, p2, u2, 0, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_10(p2, p2, u2, 0, heap);
|
||||
}
|
||||
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_proj_point_add_avx2_10(p1, p1, p2, tmp);
|
||||
else
|
||||
#endif
|
||||
sp_256_proj_point_add_10(p1, p1, p2, tmp);
|
||||
|
||||
/* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
|
||||
|
@ -12333,9 +12103,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
|
|||
sp_point* p = NULL;
|
||||
byte one[1] = { 1 };
|
||||
int err;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(heap, pubd, pub);
|
||||
if (err == MP_OKAY)
|
||||
|
@ -12376,11 +12143,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
|
|||
|
||||
if (err == MP_OKAY) {
|
||||
/* Point * order = infinity */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_avx2_10(p, pub, p256_order, 1, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_10(p, pub, p256_order, 1, heap);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
|
@ -12393,11 +12155,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
|
|||
|
||||
if (err == MP_OKAY) {
|
||||
/* Base * private = point */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_base_avx2_10(p, priv, 1, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_base_10(p, priv, 1, heap);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
|
@ -12446,9 +12203,6 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
|
|||
sp_point* p;
|
||||
sp_point* q = NULL;
|
||||
int err;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(NULL, pd, p);
|
||||
if (err == MP_OKAY)
|
||||
|
@ -12471,11 +12225,6 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
|
|||
sp_256_from_mp(q->y, 10, qY);
|
||||
sp_256_from_mp(q->z, 10, qZ);
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_proj_point_add_avx2_10(p, p, q, tmp);
|
||||
else
|
||||
#endif
|
||||
sp_256_proj_point_add_10(p, p, q, tmp);
|
||||
}
|
||||
|
||||
|
@ -12517,9 +12266,6 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
|
|||
sp_digit* tmp;
|
||||
sp_point* p;
|
||||
int err;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(NULL, pd, p);
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
|
@ -12537,11 +12283,6 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
|
|||
sp_256_from_mp(p->y, 10, pY);
|
||||
sp_256_from_mp(p->z, 10, pZ);
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_proj_point_dbl_avx2_10(p, p, tmp);
|
||||
else
|
||||
#endif
|
||||
sp_256_proj_point_dbl_10(p, p, tmp);
|
||||
}
|
||||
|
||||
|
@ -12630,9 +12371,6 @@ static int sp_256_mont_sqrt_10(sp_digit* y)
|
|||
sp_digit* t1;
|
||||
sp_digit* t2;
|
||||
int err = MP_OKAY;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
d = XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
|
||||
|
@ -12648,40 +12386,6 @@ static int sp_256_mont_sqrt_10(sp_digit* y)
|
|||
#endif
|
||||
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
|
||||
/* t2 = y ^ 0x2 */
|
||||
sp_256_mont_sqr_avx2_10(t2, y, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0x3 */
|
||||
sp_256_mont_mul_avx2_10(t1, t2, y, p256_mod, p256_mp_mod);
|
||||
/* t2 = y ^ 0xc */
|
||||
sp_256_mont_sqr_n_avx2_10(t2, t1, 2, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xf */
|
||||
sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
|
||||
/* t2 = y ^ 0xf0 */
|
||||
sp_256_mont_sqr_n_avx2_10(t2, t1, 4, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xff */
|
||||
sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
|
||||
/* t2 = y ^ 0xff00 */
|
||||
sp_256_mont_sqr_n_avx2_10(t2, t1, 8, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffff */
|
||||
sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
|
||||
/* t2 = y ^ 0xffff0000 */
|
||||
sp_256_mont_sqr_n_avx2_10(t2, t1, 16, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffffffff */
|
||||
sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffffffff00000000 */
|
||||
sp_256_mont_sqr_n_avx2_10(t1, t1, 32, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffffffff00000001 */
|
||||
sp_256_mont_mul_avx2_10(t1, t1, y, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
|
||||
sp_256_mont_sqr_n_avx2_10(t1, t1, 96, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
|
||||
sp_256_mont_mul_avx2_10(t1, t1, y, p256_mod, p256_mp_mod);
|
||||
sp_256_mont_sqr_n_avx2_10(y, t1, 94, p256_mod, p256_mp_mod);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
/* t2 = y ^ 0x2 */
|
||||
sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
|
||||
|
@ -12741,9 +12445,6 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
|
|||
sp_digit* x;
|
||||
sp_digit* y;
|
||||
int err = MP_OKAY;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
d = XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
|
||||
|
@ -12766,13 +12467,6 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
|
|||
|
||||
if (err == MP_OKAY) {
|
||||
/* y = x^3 */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
|
||||
sp_256_mont_sqr_avx2_10(y, x, p256_mod, p256_mp_mod);
|
||||
sp_256_mont_mul_avx2_10(y, y, x, p256_mod, p256_mp_mod);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
sp_256_mont_sqr_10(y, x, p256_mod, p256_mp_mod);
|
||||
sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod);
|
||||
|
|
|
@ -1258,7 +1258,8 @@ static int sp_2048_div_18(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
int err = MP_OKAY;
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
td = XMALLOC(sizeof(sp_digit) * (3 * 18 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
|
||||
td = XMALLOC(sizeof(sp_digit) * (3 * 18 + 1), NULL,
|
||||
DYNAMIC_TYPE_TMP_BUFFER);
|
||||
if (td != NULL) {
|
||||
t1 = td;
|
||||
t2 = td + 2 * 18;
|
||||
|
@ -2106,7 +2107,8 @@ static int sp_2048_div_36(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
int err = MP_OKAY;
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
td = XMALLOC(sizeof(sp_digit) * (3 * 36 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
|
||||
td = XMALLOC(sizeof(sp_digit) * (3 * 36 + 1), NULL,
|
||||
DYNAMIC_TYPE_TMP_BUFFER);
|
||||
if (td != NULL) {
|
||||
t1 = td;
|
||||
t2 = td + 2 * 36;
|
||||
|
@ -4660,7 +4662,8 @@ static int sp_3072_div_27(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
int err = MP_OKAY;
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
td = XMALLOC(sizeof(sp_digit) * (3 * 27 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
|
||||
td = XMALLOC(sizeof(sp_digit) * (3 * 27 + 1), NULL,
|
||||
DYNAMIC_TYPE_TMP_BUFFER);
|
||||
if (td != NULL) {
|
||||
t1 = td;
|
||||
t2 = td + 2 * 27;
|
||||
|
@ -5484,7 +5487,8 @@ static int sp_3072_div_54(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
int err = MP_OKAY;
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
td = XMALLOC(sizeof(sp_digit) * (3 * 54 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
|
||||
td = XMALLOC(sizeof(sp_digit) * (3 * 54 + 1), NULL,
|
||||
DYNAMIC_TYPE_TMP_BUFFER);
|
||||
if (td != NULL) {
|
||||
t1 = td;
|
||||
t2 = td + 2 * 54;
|
||||
|
@ -7089,6 +7093,8 @@ static void sp_256_cond_sub_5(sp_digit* r, const sp_digit* a,
|
|||
#endif /* WOLFSSL_SP_SMALL */
|
||||
}
|
||||
|
||||
#define sp_256_mont_reduce_order_5 sp_256_mont_reduce_5
|
||||
|
||||
/* Mul a by scalar b and add into r. (r += a * b)
|
||||
*
|
||||
* r A single precision integer.
|
||||
|
@ -8066,7 +8072,7 @@ static int sp_256_ecc_mulmod_5(sp_point* r, sp_point* g, sp_digit* k,
|
|||
((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
|
||||
sp_256_proj_point_dbl_5(&t[2], &t[2], tmp);
|
||||
XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
|
||||
((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
|
||||
((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
|
||||
}
|
||||
|
||||
if (map)
|
||||
|
@ -8709,9 +8715,6 @@ int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
|
|||
sp_point* point;
|
||||
sp_digit* k = NULL;
|
||||
int err = MP_OKAY;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(heap, p, point);
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
|
@ -8727,11 +8730,6 @@ int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
|
|||
sp_256_from_mp(k, 5, km);
|
||||
sp_256_point_from_ecc_point_5(point, gm);
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_avx2_5(point, point, k, map, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_5(point, point, k, map, heap);
|
||||
}
|
||||
if (err == MP_OKAY)
|
||||
|
@ -10337,9 +10335,6 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
|
|||
sp_point* point;
|
||||
sp_digit* k = NULL;
|
||||
int err = MP_OKAY;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(heap, p, point);
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
|
@ -10354,11 +10349,6 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
|
|||
if (err == MP_OKAY) {
|
||||
sp_256_from_mp(k, 5, km);
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_base_avx2_5(point, k, map, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_base_5(point, k, map, heap);
|
||||
}
|
||||
if (err == MP_OKAY)
|
||||
|
@ -10477,9 +10467,6 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
|
|||
sp_point* infinity;
|
||||
#endif
|
||||
int err;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
(void)heap;
|
||||
|
||||
|
@ -10501,23 +10488,11 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
|
|||
if (err == MP_OKAY)
|
||||
err = sp_256_ecc_gen_k_5(rng, k);
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_base_avx2_5(point, k, 1, NULL);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_base_5(point, k, 1, NULL);
|
||||
}
|
||||
|
||||
#ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
|
||||
err = sp_256_ecc_mulmod_avx2_5(infinity, point, p256_order, 1,
|
||||
NULL);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_5(infinity, point, p256_order, 1, NULL);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
|
@ -10600,9 +10575,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
|
|||
sp_point* point = NULL;
|
||||
sp_digit* k = NULL;
|
||||
int err = MP_OKAY;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
if (*outLen < 32)
|
||||
err = BUFFER_E;
|
||||
|
@ -10622,11 +10594,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
|
|||
if (err == MP_OKAY) {
|
||||
sp_256_from_mp(k, 5, priv);
|
||||
sp_256_point_from_ecc_point_5(point, pub);
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_avx2_5(point, point, k, 1, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_5(point, point, k, 1, heap);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
|
@ -10645,8 +10612,6 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
|
|||
#endif /* HAVE_ECC_DHE */
|
||||
|
||||
#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
#endif /* HAVE_INTEL_AVX2 */
|
||||
#endif
|
||||
#if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
|
||||
/* Multiply a by scalar b into r. (r = a * b)
|
||||
|
@ -10712,7 +10677,8 @@ static int sp_256_div_5(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
int err = MP_OKAY;
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
td = XMALLOC(sizeof(sp_digit) * (3 * 5 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
|
||||
td = XMALLOC(sizeof(sp_digit) * (3 * 5 + 1), NULL,
|
||||
DYNAMIC_TYPE_TMP_BUFFER);
|
||||
if (td != NULL) {
|
||||
t1 = td;
|
||||
t2 = td + 2 * 5;
|
||||
|
@ -10808,7 +10774,7 @@ static const uint64_t p256_order_low[2] = {
|
|||
static void sp_256_mont_mul_order_5(sp_digit* r, sp_digit* a, sp_digit* b)
|
||||
{
|
||||
sp_256_mul_5(r, a, b);
|
||||
sp_256_mont_reduce_5(r, p256_order, p256_mp_order);
|
||||
sp_256_mont_reduce_order_5(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
/* Square number mod the order of P256 curve. (r = a * a mod order)
|
||||
|
@ -10819,7 +10785,7 @@ static void sp_256_mont_mul_order_5(sp_digit* r, sp_digit* a, sp_digit* b)
|
|||
static void sp_256_mont_sqr_order_5(sp_digit* r, sp_digit* a)
|
||||
{
|
||||
sp_256_sqr_5(r, a);
|
||||
sp_256_mont_reduce_5(r, p256_order, p256_mp_order);
|
||||
sp_256_mont_reduce_order_5(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
#ifndef WOLFSSL_SP_SMALL
|
||||
|
@ -10934,143 +10900,6 @@ static void sp_256_mont_inv_order_5(sp_digit* r, sp_digit* a,
|
|||
#endif /* WOLFSSL_SP_SMALL */
|
||||
}
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
/* Multiply two number mod the order of P256 curve. (r = a * b mod order)
|
||||
*
|
||||
* r Result of the multiplication.
|
||||
* a First operand of the multiplication.
|
||||
* b Second operand of the multiplication.
|
||||
*/
|
||||
static void sp_256_mont_mul_order_avx2_5(sp_digit* r, sp_digit* a, sp_digit* b)
|
||||
{
|
||||
sp_256_mul_avx2_5(r, a, b);
|
||||
sp_256_mont_reduce_avx2_5(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
/* Square number mod the order of P256 curve. (r = a * a mod order)
|
||||
*
|
||||
* r Result of the squaring.
|
||||
* a Number to square.
|
||||
*/
|
||||
static void sp_256_mont_sqr_order_avx2_5(sp_digit* r, sp_digit* a)
|
||||
{
|
||||
sp_256_sqr_avx2_5(r, a);
|
||||
sp_256_mont_reduce_avx2_5(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
#ifndef WOLFSSL_SP_SMALL
|
||||
/* Square number mod the order of P256 curve a number of times.
|
||||
* (r = a ^ n mod order)
|
||||
*
|
||||
* r Result of the squaring.
|
||||
* a Number to square.
|
||||
*/
|
||||
static void sp_256_mont_sqr_n_order_avx2_5(sp_digit* r, sp_digit* a, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
sp_256_mont_sqr_order_avx2_5(r, a);
|
||||
for (i=1; i<n; i++)
|
||||
sp_256_mont_sqr_order_avx2_5(r, r);
|
||||
}
|
||||
#endif /* !WOLFSSL_SP_SMALL */
|
||||
|
||||
/* Invert the number, in Montgomery form, modulo the order of the P256 curve.
|
||||
* (r = 1 / a mod order)
|
||||
*
|
||||
* r Inverse result.
|
||||
* a Number to invert.
|
||||
* td Temporary data.
|
||||
*/
|
||||
static void sp_256_mont_inv_order_avx2_5(sp_digit* r, sp_digit* a,
|
||||
sp_digit* td)
|
||||
{
|
||||
#ifdef WOLFSSL_SP_SMALL
|
||||
sp_digit* t = td;
|
||||
int i;
|
||||
|
||||
XMEMCPY(t, a, sizeof(sp_digit) * 5);
|
||||
for (i=254; i>=0; i--) {
|
||||
sp_256_mont_sqr_order_avx2_5(t, t);
|
||||
if (p256_order_2[i / 64] & ((sp_digit)1 << (i % 64)))
|
||||
sp_256_mont_mul_order_avx2_5(t, t, a);
|
||||
}
|
||||
XMEMCPY(r, t, sizeof(sp_digit) * 5);
|
||||
#else
|
||||
sp_digit* t = td;
|
||||
sp_digit* t2 = td + 2 * 5;
|
||||
sp_digit* t3 = td + 4 * 5;
|
||||
int i;
|
||||
|
||||
/* t = a^2 */
|
||||
sp_256_mont_sqr_order_avx2_5(t, a);
|
||||
/* t = a^3 = t * a */
|
||||
sp_256_mont_mul_order_avx2_5(t, t, a);
|
||||
/* t2= a^c = t ^ 2 ^ 2 */
|
||||
sp_256_mont_sqr_n_order_avx2_5(t2, t, 2);
|
||||
/* t3= a^f = t2 * t */
|
||||
sp_256_mont_mul_order_avx2_5(t3, t2, t);
|
||||
/* t2= a^f0 = t3 ^ 2 ^ 4 */
|
||||
sp_256_mont_sqr_n_order_avx2_5(t2, t3, 4);
|
||||
/* t = a^ff = t2 * t3 */
|
||||
sp_256_mont_mul_order_avx2_5(t, t2, t3);
|
||||
/* t3= a^ff00 = t ^ 2 ^ 8 */
|
||||
sp_256_mont_sqr_n_order_avx2_5(t2, t, 8);
|
||||
/* t = a^ffff = t2 * t */
|
||||
sp_256_mont_mul_order_avx2_5(t, t2, t);
|
||||
/* t2= a^ffff0000 = t ^ 2 ^ 16 */
|
||||
sp_256_mont_sqr_n_order_avx2_5(t2, t, 16);
|
||||
/* t = a^ffffffff = t2 * t */
|
||||
sp_256_mont_mul_order_avx2_5(t, t2, t);
|
||||
/* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64 */
|
||||
sp_256_mont_sqr_n_order_avx2_5(t2, t, 64);
|
||||
/* t2= a^ffffffff00000000ffffffff = t2 * t */
|
||||
sp_256_mont_mul_order_avx2_5(t2, t2, t);
|
||||
/* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32 */
|
||||
sp_256_mont_sqr_n_order_avx2_5(t2, t2, 32);
|
||||
/* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
|
||||
sp_256_mont_mul_order_avx2_5(t2, t2, t);
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6 */
|
||||
for (i=127; i>=112; i--) {
|
||||
sp_256_mont_sqr_order_avx2_5(t2, t2);
|
||||
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
|
||||
sp_256_mont_mul_order_avx2_5(t2, t2, a);
|
||||
}
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6f */
|
||||
sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
|
||||
sp_256_mont_mul_order_avx2_5(t2, t2, t3);
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
|
||||
for (i=107; i>=64; i--) {
|
||||
sp_256_mont_sqr_order_avx2_5(t2, t2);
|
||||
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
|
||||
sp_256_mont_mul_order_avx2_5(t2, t2, a);
|
||||
}
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
|
||||
sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
|
||||
sp_256_mont_mul_order_avx2_5(t2, t2, t3);
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
|
||||
for (i=59; i>=32; i--) {
|
||||
sp_256_mont_sqr_order_avx2_5(t2, t2);
|
||||
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
|
||||
sp_256_mont_mul_order_avx2_5(t2, t2, a);
|
||||
}
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
|
||||
sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
|
||||
sp_256_mont_mul_order_avx2_5(t2, t2, t3);
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
|
||||
for (i=27; i>=0; i--) {
|
||||
sp_256_mont_sqr_order_avx2_5(t2, t2);
|
||||
if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
|
||||
sp_256_mont_mul_order_avx2_5(t2, t2, a);
|
||||
}
|
||||
/* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
|
||||
sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
|
||||
/* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
|
||||
sp_256_mont_mul_order_avx2_5(r, t2, t3);
|
||||
#endif /* WOLFSSL_SP_SMALL */
|
||||
}
|
||||
|
||||
#endif /* HAVE_INTEL_AVX2 */
|
||||
#endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
|
||||
#ifdef HAVE_ECC_SIGN
|
||||
#ifndef SP_ECC_MAX_SIG_GEN
|
||||
|
@ -11118,9 +10947,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
|
|||
int err = MP_OKAY;
|
||||
int64_t c;
|
||||
int i;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
(void)heap;
|
||||
|
||||
|
@ -11160,11 +10986,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
|
|||
/* New random point. */
|
||||
err = sp_256_ecc_gen_k_5(rng, k);
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_base_avx2_5(point, k, 1, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_base_5(point, k, 1, NULL);
|
||||
}
|
||||
|
||||
|
@ -11177,31 +10998,16 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
|
|||
sp_256_norm_5(r);
|
||||
|
||||
/* Conv k to Montgomery form (mod order) */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_mul_avx2_5(k, k, p256_norm_order);
|
||||
else
|
||||
#endif
|
||||
sp_256_mul_5(k, k, p256_norm_order);
|
||||
err = sp_256_mod_5(k, k, p256_order);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
sp_256_norm_5(k);
|
||||
/* kInv = 1/k mod order */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_mont_inv_order_avx2_5(kInv, k, tmp);
|
||||
else
|
||||
#endif
|
||||
sp_256_mont_inv_order_5(kInv, k, tmp);
|
||||
sp_256_norm_5(kInv);
|
||||
|
||||
/* s = r * x + e */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_mul_avx2_5(x, x, r);
|
||||
else
|
||||
#endif
|
||||
sp_256_mul_5(x, x, r);
|
||||
err = sp_256_mod_5(x, x, p256_order);
|
||||
}
|
||||
|
@ -11215,11 +11021,6 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
|
|||
sp_256_norm_5(s);
|
||||
|
||||
/* s = s * k^-1 mod order */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_mont_mul_order_avx2_5(s, s, kInv);
|
||||
else
|
||||
#endif
|
||||
sp_256_mont_mul_order_5(s, s, kInv);
|
||||
sp_256_norm_5(s);
|
||||
|
||||
|
@ -11299,9 +11100,6 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
|
|||
sp_digit carry;
|
||||
int64_t c;
|
||||
int err;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(heap, p1d, p1);
|
||||
if (err == MP_OKAY)
|
||||
|
@ -11336,52 +11134,24 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
|
|||
sp_256_from_mp(p2->y, 5, pY);
|
||||
sp_256_from_mp(p2->z, 5, pZ);
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_mul_avx2_5(s, s, p256_norm_order);
|
||||
else
|
||||
#endif
|
||||
sp_256_mul_5(s, s, p256_norm_order);
|
||||
err = sp_256_mod_5(s, s, p256_order);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
sp_256_norm_5(s);
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
|
||||
sp_256_mont_inv_order_avx2_5(s, s, tmp);
|
||||
sp_256_mont_mul_order_avx2_5(u1, u1, s);
|
||||
sp_256_mont_mul_order_avx2_5(u2, u2, s);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
sp_256_mont_inv_order_5(s, s, tmp);
|
||||
sp_256_mont_mul_order_5(u1, u1, s);
|
||||
sp_256_mont_mul_order_5(u2, u2, s);
|
||||
}
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_base_avx2_5(p1, u1, 0, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_base_5(p1, u1, 0, heap);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_avx2_5(p2, p2, u2, 0, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_5(p2, p2, u2, 0, heap);
|
||||
}
|
||||
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_proj_point_add_avx2_5(p1, p1, p2, tmp);
|
||||
else
|
||||
#endif
|
||||
sp_256_proj_point_add_5(p1, p1, p2, tmp);
|
||||
|
||||
/* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
|
||||
|
@ -11544,9 +11314,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
|
|||
sp_point* p = NULL;
|
||||
byte one[1] = { 1 };
|
||||
int err;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(heap, pubd, pub);
|
||||
if (err == MP_OKAY)
|
||||
|
@ -11587,11 +11354,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
|
|||
|
||||
if (err == MP_OKAY) {
|
||||
/* Point * order = infinity */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_avx2_5(p, pub, p256_order, 1, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_5(p, pub, p256_order, 1, heap);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
|
@ -11604,11 +11366,6 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
|
|||
|
||||
if (err == MP_OKAY) {
|
||||
/* Base * private = point */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
err = sp_256_ecc_mulmod_base_avx2_5(p, priv, 1, heap);
|
||||
else
|
||||
#endif
|
||||
err = sp_256_ecc_mulmod_base_5(p, priv, 1, heap);
|
||||
}
|
||||
if (err == MP_OKAY) {
|
||||
|
@ -11657,9 +11414,6 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
|
|||
sp_point* p;
|
||||
sp_point* q = NULL;
|
||||
int err;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(NULL, pd, p);
|
||||
if (err == MP_OKAY)
|
||||
|
@ -11682,11 +11436,6 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
|
|||
sp_256_from_mp(q->y, 5, qY);
|
||||
sp_256_from_mp(q->z, 5, qZ);
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_proj_point_add_avx2_5(p, p, q, tmp);
|
||||
else
|
||||
#endif
|
||||
sp_256_proj_point_add_5(p, p, q, tmp);
|
||||
}
|
||||
|
||||
|
@ -11728,9 +11477,6 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
|
|||
sp_digit* tmp;
|
||||
sp_point* p;
|
||||
int err;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
err = sp_ecc_point_new(NULL, pd, p);
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
|
@ -11748,11 +11494,6 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
|
|||
sp_256_from_mp(p->y, 5, pY);
|
||||
sp_256_from_mp(p->z, 5, pZ);
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
|
||||
sp_256_proj_point_dbl_avx2_5(p, p, tmp);
|
||||
else
|
||||
#endif
|
||||
sp_256_proj_point_dbl_5(p, p, tmp);
|
||||
}
|
||||
|
||||
|
@ -11841,9 +11582,6 @@ static int sp_256_mont_sqrt_5(sp_digit* y)
|
|||
sp_digit* t1;
|
||||
sp_digit* t2;
|
||||
int err = MP_OKAY;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
d = XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC);
|
||||
|
@ -11859,40 +11597,6 @@ static int sp_256_mont_sqrt_5(sp_digit* y)
|
|||
#endif
|
||||
|
||||
if (err == MP_OKAY) {
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
|
||||
/* t2 = y ^ 0x2 */
|
||||
sp_256_mont_sqr_avx2_5(t2, y, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0x3 */
|
||||
sp_256_mont_mul_avx2_5(t1, t2, y, p256_mod, p256_mp_mod);
|
||||
/* t2 = y ^ 0xc */
|
||||
sp_256_mont_sqr_n_avx2_5(t2, t1, 2, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xf */
|
||||
sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
|
||||
/* t2 = y ^ 0xf0 */
|
||||
sp_256_mont_sqr_n_avx2_5(t2, t1, 4, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xff */
|
||||
sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
|
||||
/* t2 = y ^ 0xff00 */
|
||||
sp_256_mont_sqr_n_avx2_5(t2, t1, 8, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffff */
|
||||
sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
|
||||
/* t2 = y ^ 0xffff0000 */
|
||||
sp_256_mont_sqr_n_avx2_5(t2, t1, 16, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffffffff */
|
||||
sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffffffff00000000 */
|
||||
sp_256_mont_sqr_n_avx2_5(t1, t1, 32, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffffffff00000001 */
|
||||
sp_256_mont_mul_avx2_5(t1, t1, y, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
|
||||
sp_256_mont_sqr_n_avx2_5(t1, t1, 96, p256_mod, p256_mp_mod);
|
||||
/* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
|
||||
sp_256_mont_mul_avx2_5(t1, t1, y, p256_mod, p256_mp_mod);
|
||||
sp_256_mont_sqr_n_avx2_5(y, t1, 94, p256_mod, p256_mp_mod);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
/* t2 = y ^ 0x2 */
|
||||
sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod);
|
||||
|
@ -11952,9 +11656,6 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
|
|||
sp_digit* x;
|
||||
sp_digit* y;
|
||||
int err = MP_OKAY;
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
word32 cpuid_flags = cpuid_get_flags();
|
||||
#endif
|
||||
|
||||
#if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
|
||||
d = XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC);
|
||||
|
@ -11977,13 +11678,6 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
|
|||
|
||||
if (err == MP_OKAY) {
|
||||
/* y = x^3 */
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
|
||||
sp_256_mont_sqr_avx2_5(y, x, p256_mod, p256_mp_mod);
|
||||
sp_256_mont_mul_avx2_5(y, y, x, p256_mod, p256_mp_mod);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
sp_256_mont_sqr_5(y, x, p256_mod, p256_mp_mod);
|
||||
sp_256_mont_mul_5(y, y, x, p256_mod, p256_mp_mod);
|
||||
|
|
|
@ -590,7 +590,7 @@ int sp_lshd(sp_int* a, int s)
|
|||
if (a->used + s > a->size)
|
||||
a->used = a->size - s;
|
||||
|
||||
XMEMMOVE(a->dp + s, a->dp, a->used * SP_INT_DIGITS);
|
||||
XMEMMOVE(a->dp + s, a->dp, a->used * sizeof(sp_int_digit));
|
||||
a->used += s;
|
||||
XMEMSET(a->dp, 0, s * sizeof(sp_int_digit));
|
||||
|
||||
|
|
|
@ -6943,7 +6943,7 @@ static WC_INLINE int sp_2048_div_16(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
}
|
||||
|
||||
r1 = sp_2048_cmp_16(t1, d) >= 0;
|
||||
sp_2048_cond_sub_16(r, t1, t2, (sp_digit)0 - r1);
|
||||
sp_2048_cond_sub_16(r, t1, d, (sp_digit)0 - r1);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -8923,7 +8923,7 @@ static WC_INLINE int sp_2048_div_32(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
}
|
||||
|
||||
r1 = sp_2048_cmp_32(t1, d) >= 0;
|
||||
sp_2048_cond_sub_32(r, t1, t2, (sp_digit)0 - r1);
|
||||
sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -8982,7 +8982,7 @@ static WC_INLINE int sp_2048_div_32_cond(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
}
|
||||
|
||||
r1 = sp_2048_cmp_32(t1, d) >= 0;
|
||||
sp_2048_cond_sub_32(r, t1, t2, (sp_digit)0 - r1);
|
||||
sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -23504,7 +23504,7 @@ static WC_INLINE int sp_3072_div_24(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
}
|
||||
|
||||
r1 = sp_3072_cmp_24(t1, d) >= 0;
|
||||
sp_3072_cond_sub_24(r, t1, t2, (sp_digit)0 - r1);
|
||||
sp_3072_cond_sub_24(r, t1, d, (sp_digit)0 - r1);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -26156,7 +26156,7 @@ static WC_INLINE int sp_3072_div_48(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
}
|
||||
|
||||
r1 = sp_3072_cmp_48(t1, d) >= 0;
|
||||
sp_3072_cond_sub_48(r, t1, t2, (sp_digit)0 - r1);
|
||||
sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -26215,7 +26215,7 @@ static WC_INLINE int sp_3072_div_48_cond(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
}
|
||||
|
||||
r1 = sp_3072_cmp_48(t1, d) >= 0;
|
||||
sp_3072_cond_sub_48(r, t1, t2, (sp_digit)0 - r1);
|
||||
sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -27880,6 +27880,8 @@ SP_NOINLINE static sp_digit sp_256_sub_4(sp_digit* r, const sp_digit* a,
|
|||
return c;
|
||||
}
|
||||
|
||||
#define sp_256_mont_reduce_order_4 sp_256_mont_reduce_4
|
||||
|
||||
/* Reduce the number back to 256 bits using Montgomery reduction.
|
||||
*
|
||||
* a A single precision number to reduce in place.
|
||||
|
@ -44202,8 +44204,10 @@ static int sp_256_ecc_mulmod_base_4(sp_point* r, sp_digit* k, int map,
|
|||
}
|
||||
|
||||
i = 32;
|
||||
XMEMCPY(t[v[i].mul].x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
|
||||
XMEMCPY(t[v[i].mul].y, p256_table[i][v[i].i].y, sizeof(p256_table[i]->y));
|
||||
XMEMCPY(t[v[i].mul].x, p256_table[i][v[i].i].x,
|
||||
sizeof(p256_table[i]->x));
|
||||
XMEMCPY(t[v[i].mul].y, p256_table[i][v[i].i].y,
|
||||
sizeof(p256_table[i]->y));
|
||||
t[v[i].mul].infinity = p256_table[i][v[i].i].infinity;
|
||||
for (--i; i>=0; i--) {
|
||||
XMEMCPY(p->x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
|
||||
|
@ -44211,7 +44215,8 @@ static int sp_256_ecc_mulmod_base_4(sp_point* r, sp_digit* k, int map,
|
|||
p->infinity = p256_table[i][v[i].i].infinity;
|
||||
sp_256_sub_4(negy, p256_mod, p->y);
|
||||
sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg);
|
||||
sp_256_proj_point_add_qz1_4(&t[v[i].mul], &t[v[i].mul], p, tmp);
|
||||
sp_256_proj_point_add_qz1_4(&t[v[i].mul], &t[v[i].mul], p,
|
||||
tmp);
|
||||
}
|
||||
sp_256_proj_point_add_4(&t[2], &t[2], &t[3], tmp);
|
||||
sp_256_proj_point_add_4(&t[1], &t[1], &t[3], tmp);
|
||||
|
@ -44296,8 +44301,10 @@ static int sp_256_ecc_mulmod_base_avx2_4(sp_point* r, sp_digit* k, int map,
|
|||
}
|
||||
|
||||
i = 32;
|
||||
XMEMCPY(t[v[i].mul].x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
|
||||
XMEMCPY(t[v[i].mul].y, p256_table[i][v[i].i].y, sizeof(p256_table[i]->y));
|
||||
XMEMCPY(t[v[i].mul].x, p256_table[i][v[i].i].x,
|
||||
sizeof(p256_table[i]->x));
|
||||
XMEMCPY(t[v[i].mul].y, p256_table[i][v[i].i].y,
|
||||
sizeof(p256_table[i]->y));
|
||||
t[v[i].mul].infinity = p256_table[i][v[i].i].infinity;
|
||||
for (--i; i>=0; i--) {
|
||||
XMEMCPY(p->x, p256_table[i][v[i].i].x, sizeof(p256_table[i]->x));
|
||||
|
@ -44305,7 +44312,8 @@ static int sp_256_ecc_mulmod_base_avx2_4(sp_point* r, sp_digit* k, int map,
|
|||
p->infinity = p256_table[i][v[i].i].infinity;
|
||||
sp_256_sub_4(negy, p256_mod, p->y);
|
||||
sp_256_cond_copy_4(p->y, negy, (sp_digit)0 - v[i].neg);
|
||||
sp_256_proj_point_add_qz1_avx2_4(&t[v[i].mul], &t[v[i].mul], p, tmp);
|
||||
sp_256_proj_point_add_qz1_avx2_4(&t[v[i].mul], &t[v[i].mul], p,
|
||||
tmp);
|
||||
}
|
||||
sp_256_proj_point_add_avx2_4(&t[2], &t[2], &t[3], tmp);
|
||||
sp_256_proj_point_add_avx2_4(&t[1], &t[1], &t[3], tmp);
|
||||
|
@ -44407,7 +44415,6 @@ static int sp_256_iszero_4(const sp_digit* a)
|
|||
#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN */
|
||||
/* Add 1 to a. (a = a + 1)
|
||||
*
|
||||
* r A single precision integer.
|
||||
* a A single precision integer.
|
||||
*/
|
||||
static void sp_256_add_one_4(sp_digit* a)
|
||||
|
@ -45146,7 +45153,7 @@ static WC_INLINE int sp_256_div_4(sp_digit* a, sp_digit* d, sp_digit* m,
|
|||
}
|
||||
|
||||
r1 = sp_256_cmp_4(t1, d) >= 0;
|
||||
sp_256_cond_sub_4(r, t1, t2, (sp_digit)0 - r1);
|
||||
sp_256_cond_sub_4(r, t1, d, (sp_digit)0 - r1);
|
||||
|
||||
return MP_OKAY;
|
||||
}
|
||||
|
@ -45294,7 +45301,7 @@ static const uint64_t p256_order_low[2] = {
|
|||
static void sp_256_mont_mul_order_4(sp_digit* r, sp_digit* a, sp_digit* b)
|
||||
{
|
||||
sp_256_mul_4(r, a, b);
|
||||
sp_256_mont_reduce_4(r, p256_order, p256_mp_order);
|
||||
sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
/* Square number mod the order of P256 curve. (r = a * a mod order)
|
||||
|
@ -45305,7 +45312,7 @@ static void sp_256_mont_mul_order_4(sp_digit* r, sp_digit* a, sp_digit* b)
|
|||
static void sp_256_mont_sqr_order_4(sp_digit* r, sp_digit* a)
|
||||
{
|
||||
sp_256_sqr_4(r, a);
|
||||
sp_256_mont_reduce_4(r, p256_order, p256_mp_order);
|
||||
sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
#ifndef WOLFSSL_SP_SMALL
|
||||
|
@ -45497,6 +45504,8 @@ SP_NOINLINE static void sp_256_sqr_avx2_4(sp_digit* r, const sp_digit* a)
|
|||
);
|
||||
}
|
||||
|
||||
#define sp_256_mont_reduce_order_avx2_4 sp_256_mont_reduce_avx2_4
|
||||
|
||||
/* Reduce the number back to 256 bits using Montgomery reduction.
|
||||
*
|
||||
* a A single precision number to reduce in place.
|
||||
|
@ -45646,7 +45655,7 @@ SP_NOINLINE static void sp_256_mont_reduce_avx2_4(sp_digit* a, sp_digit* m,
|
|||
static void sp_256_mont_mul_order_avx2_4(sp_digit* r, sp_digit* a, sp_digit* b)
|
||||
{
|
||||
sp_256_mul_avx2_4(r, a, b);
|
||||
sp_256_mont_reduce_avx2_4(r, p256_order, p256_mp_order);
|
||||
sp_256_mont_reduce_order_avx2_4(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
/* Square number mod the order of P256 curve. (r = a * a mod order)
|
||||
|
@ -45657,7 +45666,7 @@ static void sp_256_mont_mul_order_avx2_4(sp_digit* r, sp_digit* a, sp_digit* b)
|
|||
static void sp_256_mont_sqr_order_avx2_4(sp_digit* r, sp_digit* a)
|
||||
{
|
||||
sp_256_sqr_avx2_4(r, a);
|
||||
sp_256_mont_reduce_avx2_4(r, p256_order, p256_mp_order);
|
||||
sp_256_mont_reduce_order_avx2_4(r, p256_order, p256_mp_order);
|
||||
}
|
||||
|
||||
#ifndef WOLFSSL_SP_SMALL
|
||||
|
|
|
@ -35,6 +35,8 @@
|
|||
#define SP_WORD_SIZE 64
|
||||
#elif defined(WOLFSSL_SP_ARM32_ASM)
|
||||
#define SP_WORD_SIZE 32
|
||||
#elif defined(WOLFSSL_SP_ARM_THUMB_ASM)
|
||||
#define SP_WORD_SIZE 32
|
||||
#endif
|
||||
|
||||
#ifndef SP_WORD_SIZE
|
||||
|
|
Loading…
Reference in New Issue