From 0cc21a42f38f9e03e98ce3bef84bbaea3d4c50bd Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Tue, 26 Sep 2023 09:34:25 +1000 Subject: [PATCH] SP updates for SM2 Allow wolfSSL to build with SP implementations of SM2. Updates to SP implementation of other code. --- configure.ac | 18 +- examples/server/server.c | 12 + src/include.am | 29 + tests/api.c | 4 +- wolfcrypt/src/ecc.c | 206 ++- wolfcrypt/src/eccsi.c | 17 +- wolfcrypt/src/sm2.c | 20 + wolfcrypt/src/sm3.c | 20 + wolfcrypt/src/sm3_asm.S | 20 + wolfcrypt/src/sm4.c | 20 + wolfcrypt/src/sp_arm32.c | 1086 ++++++++-------- wolfcrypt/src/sp_arm64.c | 298 ++--- wolfcrypt/src/sp_armthumb.c | 1980 ++++++++++++----------------- wolfcrypt/src/sp_c32.c | 127 +- wolfcrypt/src/sp_c64.c | 127 +- wolfcrypt/src/sp_cortexm.c | 925 ++++++-------- wolfcrypt/src/sp_int.c | 4 +- wolfcrypt/src/sp_sm2_arm32.c | 33 + wolfcrypt/src/sp_sm2_arm64.c | 33 + wolfcrypt/src/sp_sm2_armthumb.c | 33 + wolfcrypt/src/sp_sm2_c32.c | 33 + wolfcrypt/src/sp_sm2_c64.c | 33 + wolfcrypt/src/sp_sm2_cortexm.c | 33 + wolfcrypt/src/sp_sm2_x86_64.c | 33 + wolfcrypt/src/sp_sm2_x86_64_asm.S | 33 + wolfcrypt/src/sp_x86_64.c | 189 +-- wolfcrypt/src/sp_x86_64_asm.S | 185 +-- wolfcrypt/src/sp_x86_64_asm.asm | 102 +- wolfcrypt/test/test.c | 2 +- wolfssl/wolfcrypt/ecc.h | 2 +- wolfssl/wolfcrypt/settings.h | 3 + wolfssl/wolfcrypt/sm2.h | 20 + wolfssl/wolfcrypt/sm3.h | 20 + wolfssl/wolfcrypt/sm4.h | 20 + wolfssl/wolfcrypt/sp.h | 30 + 35 files changed, 2768 insertions(+), 2982 deletions(-) create mode 100644 wolfcrypt/src/sp_sm2_arm32.c create mode 100644 wolfcrypt/src/sp_sm2_arm64.c create mode 100644 wolfcrypt/src/sp_sm2_armthumb.c create mode 100644 wolfcrypt/src/sp_sm2_c32.c create mode 100644 wolfcrypt/src/sp_sm2_c64.c create mode 100644 wolfcrypt/src/sp_sm2_cortexm.c create mode 100644 wolfcrypt/src/sp_sm2_x86_64.c create mode 100644 wolfcrypt/src/sp_sm2_x86_64_asm.S diff --git a/configure.ac b/configure.ac index 4aba828b3..ddd63d3e8 100644 --- a/configure.ac +++ b/configure.ac @@ -3515,10 +3515,6 @@ AC_ARG_ENABLE([sm2], [ ENABLED_SM2=no ] ) -if test "$ENABLED_SP_MATH" = "yes" -then - ENABLED_SM2="no" -fi if test "$ENABLED_SM2" = "yes" then if test "$ENABLED_ECC" = "no" @@ -7115,6 +7111,7 @@ ENABLED_SP_ECC=no ENABLED_SP_EC_256=no ENABLED_SP_EC_384=no ENABLED_SP_EC_521=no +ENABLED_SP_SM2=$ENABLED_SM2 ENABLED_SP_SAKKE_1024=$ENABLED_SAKKE ENABLED_SP_NO_MALLOC=no ENABLED_SP_NONBLOCK=no @@ -7206,6 +7203,15 @@ do ENABLED_SP_ECC=yes ENABLED_SP_SAKKE_1024=yes ;; + smallsm2) + ENABLED_SP_SMALL=yes + ENABLED_SP_ECC=yes + ENABLED_SP_SM2=yes + ;; + sm2) + ENABLED_SP_ECC=yes + ENABLED_SP_SM2=yes + ;; small2048) ENABLED_SP_SMALL=yes @@ -7353,6 +7359,10 @@ if test "$ENABLED_ECC" != "no" && test "$ENABLED_SP_ECC" = "yes"; then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_1024" AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_1024" fi + if test "$ENABLED_SP_SM2" = "yes"; then + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_SM2" + AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_SM2" + fi fi if test "$ENABLED_SP_SMALL" = "yes"; then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_SMALL" diff --git a/examples/server/server.c b/examples/server/server.c index e01cf3151..a06088dd8 100644 --- a/examples/server/server.c +++ b/examples/server/server.c @@ -752,6 +752,18 @@ static void SetKeyShare(WOLFSSL* ssl, int onlyKeyShare, int useX25519, else err_sys("unable to use curve secp256r1"); } while (ret == WC_PENDING_E); + #elif defined(WOLFSSL_SM2) + do { + ret = wolfSSL_UseKeyShare(ssl, WOLFSSL_ECC_SM2P256V1); + if (ret == WOLFSSL_SUCCESS) + groups[count++] = WOLFSSL_ECC_SM2P256V1; + #ifdef WOLFSSL_ASYNC_CRYPT + else if (ret == WC_PENDING_E) + wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + #endif + else + err_sys("unable to use curve sm2p256r1"); + } while (ret == WC_PENDING_E); #endif #endif } diff --git a/src/include.am b/src/include.am index c222d5c67..56e92a850 100644 --- a/src/include.am +++ b/src/include.am @@ -517,6 +517,35 @@ endif !BUILD_FIPS_CURRENT if !BUILD_FIPS_CURRENT if BUILD_SM2 src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sm2.c +if BUILD_SP +if BUILD_SP_C32 +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sp_sm2_c32.c +endif +if BUILD_SP_C64 +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sp_sm2_c64.c +endif + +if BUILD_SP_X86_64 +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sp_sm2_x86_64.c +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sp_sm2_x86_64_asm.S +endif +if !BUILD_FIPS_V2 +if BUILD_SP_ARM32 +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sp_sm2_arm32.c +endif +endif +if BUILD_SP_ARM_THUMB +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sp_sm2_armthumb.c +endif +if !BUILD_FIPS_V2 +if BUILD_SP_ARM64 +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sp_sm2_arm64.c +endif +endif +if BUILD_SP_ARM_CORTEX +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sp_sm2_cortexm.c +endif +endif BUILD_SP endif BUILD_SM2 endif !BUILD_FIPS_CURRENT diff --git a/tests/api.c b/tests/api.c index c6d520313..704d4bc31 100644 --- a/tests/api.c +++ b/tests/api.c @@ -23770,8 +23770,8 @@ static int test_wc_ecc_sm2_sign_hash_ex(void) mp_int smallR[1]; sp_init_size(smallR, 1); /* Force failure in _ecc_sm2_calc_r_s by r being too small. */ - ExpectIntEQ(wc_ecc_sm2_sign_hash_ex(hash, sizeof(hash), rng, key, - smallR, s), MP_VAL); + ExpectIntLT(wc_ecc_sm2_sign_hash_ex(hash, sizeof(hash), rng, key, + smallR, s), 0); } #endif diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c index eaff8ae8c..42685dd50 100644 --- a/wolfcrypt/src/ecc.c +++ b/wolfcrypt/src/ecc.c @@ -666,7 +666,8 @@ enum { #endif #define ecc_oid_brainpoolp256r1_sz CODED_BRAINPOOLP256R1_SZ #endif /* HAVE_ECC_BRAINPOOL */ - #if defined(WOLFSSL_SM2) && !defined(WOLFSSL_SP_MATH) +#endif /* ECC256 */ + #if defined(WOLFSSL_SM2) #ifdef HAVE_OID_ENCODING #define CODED_SM2P256V1 {1,2,156,10197,1,301} #define CODED_SM2P256V1_SZ 6 @@ -680,8 +681,7 @@ enum { #define ecc_oid_sm2p256v1 CODED_SM2P256V1 #endif #define ecc_oid_sm2p256v1_sz CODED_SM2P256V1_SZ - #endif /* WOLFSSL_SM2 && !WOLFSSL_SP_MATH */ -#endif /* ECC256 */ + #endif /* WOLFSSL_SM2 */ #ifdef ECC320 #ifdef HAVE_ECC_BRAINPOOL #ifdef HAVE_OID_ENCODING @@ -1161,7 +1161,8 @@ const ecc_set_type ecc_sets[] = { 1, /* cofactor */ }, #endif /* HAVE_ECC_BRAINPOOL */ - #if defined(WOLFSSL_SM2) && !defined(WOLFSSL_SP_MATH) +#endif /* ECC256 */ + #if defined(WOLFSSL_SM2) { 32, /* size/bytes */ ECC_SM2P256V1, /* ID */ @@ -1179,8 +1180,7 @@ const ecc_set_type ecc_sets[] = { ECC_SM2P256V1_OID, /* oid sum */ 1, /* cofactor */ }, - #endif /* WOLFSSL_SM2 && !WOLFSSL_SP_MATH */ -#endif /* ECC256 */ + #endif /* WOLFSSL_SM2 */ #ifdef ECC320 #ifdef HAVE_ECC_BRAINPOOL { @@ -2157,16 +2157,14 @@ done: (void)a; (void)mp; +#if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) + if ((modBits == 256) && (!mp_is_bit_set(modulus, 224))) { + return sp_ecc_proj_add_point_sm2_256(P->x, P->y, P->z, Q->x, Q->y, Q->z, + R->x, R->y, R->z); + } +#endif #ifndef WOLFSSL_SP_NO_256 if (modBits == 256) { -#ifdef SM2_SP_IMPL_AVAILABLE - #ifdef WOLFSSL_SM2 - if (!mp_is_bit_set(modulus, 224)) { - return sp_ecc_proj_add_point_sm2_256(P->x, P->y, P->z, Q->x, Q->y, - Q->z, R->x, R->y, R->z); - } - #endif -#endif return sp_ecc_proj_add_point_256(P->x, P->y, P->z, Q->x, Q->y, Q->z, R->x, R->y, R->z); } @@ -2529,16 +2527,13 @@ static int _ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* a, (void)a; (void)mp; +#if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) + if ((modBits == 256) && (!mp_is_bit_set(modulus, 224))) { + return sp_ecc_proj_dbl_point_sm2_256(P->x, P->y, P->z, R->x, R->y, R->z); + } +#endif #ifndef WOLFSSL_SP_NO_256 if (modBits == 256) { -#ifdef SM2_SP_IMPL_AVAILABLE - #ifdef WOLFSSL_SM2 - if (!mp_is_bit_set(modulus, 224)) { - return sp_ecc_proj_dbl_point_sm2_256(P->x, P->y, P->z, R->x, R->y, - R->z); - } - #endif -#endif return sp_ecc_proj_dbl_point_256(P->x, P->y, P->z, R->x, R->y, R->z); } #endif @@ -2789,15 +2784,13 @@ done: (void)mp; (void)ct; +#if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) + if ((mp_count_bits(modulus) == 256) && (!mp_is_bit_set(modulus, 224))) { + return sp_ecc_map_sm2_256(P->x, P->y, P->z); + } +#endif #ifndef WOLFSSL_SP_NO_256 if (mp_count_bits(modulus) == 256) { -#ifdef SM2_SP_IMPL_AVAILABLE - #ifdef WOLFSSL_SM2 - if (!mp_is_bit_set(modulus, 224)) { - return sp_ecc_map_sm2_256(P->x, P->y, P->z); - } - #endif -#endif return sp_ecc_map_256(P->x, P->y, P->z); } #endif @@ -3698,15 +3691,13 @@ exit: } #ifdef WOLFSSL_HAVE_SP_ECC +#if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) + if ((mp_count_bits(modulus) == 256) && (!mp_is_bit_set(modulus, 224))) { + return sp_ecc_mulmod_sm2_256(k, G, R, map, heap); + } +#endif #ifndef WOLFSSL_SP_NO_256 if (mp_count_bits(modulus) == 256) { -#ifdef SM2_SP_IMPL_AVAILABLE - #ifdef WOLFSSL_SM2 - if (!mp_is_bit_set(modulus, 224)) { - return sp_ecc_mulmod_sm2_256(k, G, R, map, heap); - } - #endif -#endif return sp_ecc_mulmod_256(k, G, R, map, heap); } #endif @@ -3905,6 +3896,11 @@ exit: (void)rng; #ifdef WOLFSSL_HAVE_SP_ECC +#if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) + if ((mp_count_bits(modulus) == 256) && (!mp_is_bit_set(modulus, 224))) { + return sp_ecc_mulmod_sm2_256(k, G, R, map, heap); + } +#endif #ifndef WOLFSSL_SP_NO_256 if (mp_count_bits(modulus) == 256) { return sp_ecc_mulmod_256(k, G, R, map, heap); @@ -4695,8 +4691,8 @@ int wc_ecc_shared_secret_gen_sync(ecc_key* private_key, ecc_point* point, #endif /* !WC_ECC_NONBLOCK */ } else -#ifdef SM2_SP_IMPL_AVAILABLE -#ifdef WOLFSSL_SM2 +#endif /* ! WOLFSSL_SP_NO_256 */ +#if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) if (private_key->idx != ECC_CUSTOM_IDX && ecc_sets[private_key->idx].id == ECC_SM2P256V1) { err = sp_ecc_secret_gen_sm2_256(k, point, out, outlen, @@ -4704,8 +4700,6 @@ int wc_ecc_shared_secret_gen_sync(ecc_key* private_key, ecc_point* point, } else #endif -#endif -#endif /* ! WOLFSSL_SP_NO_256 */ #ifdef WOLFSSL_SP_384 if (private_key->idx != ECC_CUSTOM_IDX && ecc_sets[private_key->idx].id == ECC_SECP384R1) { @@ -5289,15 +5283,13 @@ static int ecc_make_pub_ex(ecc_key* key, ecc_curve_spec* curve, err = sp_ecc_mulmod_base_256(key->k, pub, 1, key->heap); } else -#ifdef SM2_SP_IMPL_AVAILABLE -#ifdef WOLFSSL_SM2 +#endif /* WOLFSSL_SP_NO_256 */ +#if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SM2P256V1) { - err = sp_ecc_mulmod_base_sm2_256(&key->k, pub, 1, key->heap); + err = sp_ecc_mulmod_base_sm2_256(key->k, pub, 1, key->heap); } else #endif -#endif -#endif /* WOLFSSL_SP_NO_256 */ #ifdef WOLFSSL_SP_384 if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) { err = sp_ecc_mulmod_base_384(key->k, pub, 1, key->heap); @@ -5673,18 +5665,16 @@ static int _ecc_make_key_ex(WC_RNG* rng, int keysize, ecc_key* key, } } else -#ifdef SM2_SP_IMPL_AVAILABLE -#ifdef WOLFSSL_SM2 +#endif /* !WOLFSSL_SP_NO_256 */ +#if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SM2P256V1) { - err = sp_ecc_make_key_sm2_256(rng, &key->k, &key->pubkey, key->heap); + err = sp_ecc_make_key_sm2_256(rng, key->k, &key->pubkey, key->heap); if (err == MP_OKAY) { key->type = ECC_PRIVATEKEY; } } else #endif -#endif -#endif /* !WOLFSSL_SP_NO_256 */ #ifdef WOLFSSL_SP_384 if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) { #ifndef WC_ECC_NONBLOCK @@ -6892,14 +6882,16 @@ static int ecc_sign_hash_sp(const byte* in, word32 inlen, WC_RNG* rng, } #endif } -#ifdef SM2_SP_IMPL_AVAILABLE - #ifdef WOLFSSL_SM2 + #endif + #if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) if (ecc_sets[key->idx].id == ECC_SM2P256V1) { - return sp_ecc_sign_sm2_256(in, inlen, rng, &key->k, r, s, sign_k, + int ret; + SAVE_VECTOR_REGISTERS(return _svr_ret;); + ret = sp_ecc_sign_sm2_256(in, inlen, rng, key->k, r, s, sign_k, key->heap); + RESTORE_VECTOR_REGISTERS(); + return ret; } - #endif -#endif #endif #ifdef WOLFSSL_SP_384 if (ecc_sets[key->idx].id == ECC_SECP384R1) { @@ -7018,6 +7010,9 @@ int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng, #ifndef WOLFSSL_SP_NO_256 && ecc_sets[key->idx].id != ECC_SECP256R1 #endif + #ifdef WOLFSSL_SP_SM2 + && ecc_sets[key->idx].id != ECC_SM2P256V1 + #endif #ifdef WOLFSSL_SP_384 && ecc_sets[key->idx].id != ECC_SECP384R1 #endif @@ -8422,6 +8417,9 @@ static int ecc_verify_hash_sp(mp_int *r, mp_int *s, const byte* hash, #ifndef WOLFSSL_SP_NO_256 && ecc_sets[key->idx].id != ECC_SECP256R1 #endif + #ifdef WOLFSSL_SP_SM2 + && ecc_sets[key->idx].id != ECC_SM2P256V1 + #endif #ifdef WOLFSSL_SP_384 && ecc_sets[key->idx].id != ECC_SECP384R1 #endif @@ -8470,8 +8468,8 @@ static int ecc_verify_hash_sp(mp_int *r, mp_int *s, const byte* hash, } #endif } -#ifdef SM2_SP_IMPL_AVAILABLE - #ifdef WOLFSSL_SM2 + #endif + #if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) if (ecc_sets[key->idx].id == ECC_SM2P256V1) { #if defined(FP_ECC_CONTROL) && !defined(WOLFSSL_DSP_BUILD) return sp_ecc_cache_verify_sm2_256(hash, hashlen, key->pubkey.x, @@ -8485,9 +8483,7 @@ static int ecc_verify_hash_sp(mp_int *r, mp_int *s, const byte* hash, key->pubkey.y, key->pubkey.z, r, s, res, key->heap); #endif } - #endif #endif -#endif #ifdef WOLFSSL_SP_384 if (ecc_sets[key->idx].id == ECC_SECP384R1) { #ifdef WC_ECC_NONBLOCK @@ -9108,16 +9104,14 @@ int wc_ecc_import_point_der_ex(const byte* in, word32 inLen, err = sp_ecc_uncompress_256(point->x, pointType, point->y); } else -#ifdef SM2_SP_IMPL_AVAILABLE - #ifdef WOLFSSL_SM2 + #endif + #if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) if (curve_idx != ECC_CUSTOM_IDX && - ecc_sets[curve_idx->idx].id == ECC_SM2P256V1) { + ecc_sets[curve_idx].id == ECC_SM2P256V1) { sp_ecc_uncompress_sm2_256(point->x, pointType, point->y); } else #endif -#endif - #endif #ifdef WOLFSSL_SP_384 if (curve_idx != ECC_CUSTOM_IDX && ecc_sets[curve_idx].id == ECC_SECP384R1) { @@ -9663,15 +9657,13 @@ static int _ecc_is_point(ecc_point* ecp, mp_int* a, mp_int* b, mp_int* prime) (void)b; #ifdef WOLFSSL_HAVE_SP_ECC +#if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) + if ((mp_count_bits(prime) == 256) && (!mp_is_bit_set(prime, 224))) { + return sp_ecc_is_point_sm2_256(ecp->x, ecp->y); + } +#endif #ifndef WOLFSSL_SP_NO_256 if (mp_count_bits(prime) == 256) { -#ifdef SM2_SP_IMPL_AVAILABLE - #ifdef WOLFSSL_SM2 - if (!mp_is_bit_set(prime, 224)) { - return sp_ecc_is_point_sm2_256(ecp->x, ecp->y); - } - #endif -#endif return sp_ecc_is_point_256(ecp->x, ecp->y); } #endif @@ -9764,16 +9756,14 @@ static int ecc_check_privkey_gen(ecc_key* key, mp_int* a, mp_int* prime) } } else -#ifdef SM2_SP_IMPL_AVAILABLE - #ifdef WOLFSSL_SM2 +#endif +#if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SM2P256V1) { if (err == MP_OKAY) { - err = sp_ecc_mulmod_base_sm2_256(&key->k, res, 1, key->heap); + err = sp_ecc_mulmod_base_sm2_256(key->k, res, 1, key->heap); } } else - #endif -#endif #endif #ifdef WOLFSSL_SP_384 if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) { @@ -10007,15 +9997,13 @@ static int ecc_check_pubkey_order(ecc_key* key, ecc_point* pubkey, mp_int* a, err = sp_ecc_mulmod_256(order, pubkey, inf, 1, key->heap); } else -#ifdef SM2_SP_IMPL_AVAILABLE - #ifdef WOLFSSL_SM2 +#endif +#if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SM2P256V1) { err = sp_ecc_mulmod_sm2_256(order, pubkey, inf, 1, key->heap); } else - #endif -#endif #endif #ifdef WOLFSSL_SP_384 if (key->idx != ECC_CUSTOM_IDX && @@ -10121,15 +10109,13 @@ static int _ecc_validate_public_key(ecc_key* key, int partial, int priv) return sp_ecc_check_key_256(key->pubkey.x, key->pubkey.y, key->type == ECC_PRIVATEKEY ? key->k : NULL, key->heap); } -#ifdef SM2_SP_IMPL_AVAILABLE -#ifdef WOLFSSL_SM2 +#endif +#if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SM2P256V1) { - return sp_ecc_check_key_sm2_256(key->pubkey.x, key->pubkey.y - key->type == ECC_PRIVATEKEY ? &key->k : NULL, key->heap); + return sp_ecc_check_key_sm2_256(key->pubkey.x, key->pubkey.y, + key->type == ECC_PRIVATEKEY ? key->k : NULL, key->heap); } #endif -#endif -#endif #ifdef WOLFSSL_SP_384 if (key->idx != ECC_CUSTOM_IDX && ecc_sets[key->idx].id == ECC_SECP384R1) { return sp_ecc_check_key_384(key->pubkey.x, key->pubkey.y, @@ -10506,14 +10492,12 @@ int wc_ecc_import_x963_ex(const byte* in, word32 inLen, ecc_key* key, key->pubkey.y); } else -#ifdef SM2_SP_IMPL_AVAILABLE - #ifdef WOLFSSL_SM2 + #endif + #if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) if (key->dp->id == ECC_SM2P256V1) { sp_ecc_uncompress_sm2_256(key->pubkey.x, pointType, key->pubkey.y); } else - #endif -#endif #endif #ifdef WOLFSSL_SP_384 if (key->dp->id == ECC_SECP384R1) { @@ -13059,21 +13043,20 @@ int wc_ecc_mulmod_ex(const mp_int* k, ecc_point *G, ecc_point *R, mp_int* a, return IS_POINT_E; } +#if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) + if ((mp_count_bits(modulus) == 256) && (!mp_is_bit_set(modulus, 224))) { + int ret; + SAVE_VECTOR_REGISTERS(return _svr_ret); + ret = sp_ecc_mulmod_sm2_256(k, G, R, map, heap); + RESTORE_VECTOR_REGISTERS(); + return ret; + } +#endif #ifndef WOLFSSL_SP_NO_256 if (mp_count_bits(modulus) == 256) { int ret; SAVE_VECTOR_REGISTERS(return _svr_ret); -#ifdef SM2_SP_IMPL_AVAILABLE - #ifdef WOLFSSL_SM2 - if (!mp_is_bit_set(modulus, 224)) { - ret = sp_ecc_mulmod_sm2_256(k, G, R, map, heap); - } - else - #endif -#endif - { - ret = sp_ecc_mulmod_256(k, G, R, map, heap); - } + ret = sp_ecc_mulmod_256(k, G, R, map, heap); RESTORE_VECTOR_REGISTERS(); return ret; } @@ -13238,21 +13221,20 @@ int wc_ecc_mulmod_ex2(const mp_int* k, ecc_point *G, ecc_point *R, mp_int* a, return IS_POINT_E; } +#if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) + if ((mp_count_bits(modulus) == 256) && (!mp_is_bit_set(modulus, 224))) { + int ret; + SAVE_VECTOR_REGISTERS(return _svr_ret); + ret = sp_ecc_mulmod_sm2_256(k, G, R, map, heap); + RESTORE_VECTOR_REGISTERS(); + return ret; + } +#endif #ifndef WOLFSSL_SP_NO_256 if (mp_count_bits(modulus) == 256) { int ret; SAVE_VECTOR_REGISTERS(return _svr_ret); -#ifdef SM2_SP_IMPL_AVAILABLE - #ifdef WOLFSSL_SM2 - if (!mp_is_bit_set(modulus, 224)) { - ret = sp_ecc_mulmod_sm2_256(k, G, R, map, heap); - } - else - #endif -#endif - { - ret = sp_ecc_mulmod_256(k, G, R, map, heap); - } + ret = sp_ecc_mulmod_256(k, G, R, map, heap); RESTORE_VECTOR_REGISTERS(); return ret; } diff --git a/wolfcrypt/src/eccsi.c b/wolfcrypt/src/eccsi.c index ed2e2b8bd..1725aba4b 100644 --- a/wolfcrypt/src/eccsi.c +++ b/wolfcrypt/src/eccsi.c @@ -1350,15 +1350,13 @@ static int eccsi_mulmod_base_add(EccsiKey* key, const mp_int* n, { int err = 0; -#ifdef WOLFSSL_HAVE_SP_ECC -#ifndef WOLFSSL_SP_NO_256 +#if defined(WOLFSSL_HAVE_SP_ECC) && !defined(WOLFSSL_SP_NO_256) if ((key->ecc.idx != ECC_CUSTOM_IDX) && (ecc_sets[key->ecc.idx].id == ECC_SECP256R1)) { err = sp_ecc_mulmod_base_add_256(n, a, 1, res, map, key->heap); } else #endif -#endif #ifndef WOLFSSL_SP_MATH { EccsiKeyParams* params = &key->params; @@ -1377,7 +1375,12 @@ static int eccsi_mulmod_base_add(EccsiKey* key, const mp_int* n, { err = NOT_COMPILED_IN; } + (void)key; + (void)h; + (void)a; + (void)res; (void)mp; + (void)map; #endif return err; @@ -1401,14 +1404,12 @@ static int eccsi_mulmod_point(EccsiKey* key, const mp_int* n, ecc_point* point, { int err; -#ifdef WOLFSSL_HAVE_SP_ECC -#ifndef WOLFSSL_SP_NO_256 +#if defined(WOLFSSL_HAVE_SP_ECC) && !defined(WOLFSSL_SP_NO_256) if ((key->ecc.idx != ECC_CUSTOM_IDX) && (ecc_sets[key->ecc.idx].id == ECC_SECP256R1)) { err = sp_ecc_mulmod_256(n, point, res, map, key->heap); } else -#endif #endif { EccsiKeyParams* params = &key->params; @@ -1437,8 +1438,7 @@ static int eccsi_mulmod_point(EccsiKey* key, const mp_int* n, ecc_point* point, static int eccsi_mulmod_point_add(EccsiKey* key, const mp_int* n, ecc_point* point, ecc_point* a, ecc_point* res, mp_digit mp, int map) { -#ifdef WOLFSSL_HAVE_SP_ECC -#ifndef WOLFSSL_SP_NO_256 +#if defined(WOLFSSL_HAVE_SP_ECC) && !defined(WOLFSSL_SP_NO_256) int err = NOT_COMPILED_IN; if ((key->ecc.idx != ECC_CUSTOM_IDX) && @@ -1449,7 +1449,6 @@ static int eccsi_mulmod_point_add(EccsiKey* key, const mp_int* n, (void)mp; return err; -#endif #else int err; EccsiKeyParams* params = &key->params; diff --git a/wolfcrypt/src/sm2.c b/wolfcrypt/src/sm2.c index ffdb8de7e..829d5e5b2 100644 --- a/wolfcrypt/src/sm2.c +++ b/wolfcrypt/src/sm2.c @@ -1,3 +1,23 @@ +/* sm2.c + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ #ifdef HAVE_CONFIG_H #include diff --git a/wolfcrypt/src/sm3.c b/wolfcrypt/src/sm3.c index ab6324225..1339037b7 100644 --- a/wolfcrypt/src/sm3.c +++ b/wolfcrypt/src/sm3.c @@ -1,3 +1,23 @@ +/* sm3.c + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ #ifdef HAVE_CONFIG_H #include diff --git a/wolfcrypt/src/sm3_asm.S b/wolfcrypt/src/sm3_asm.S index ab6324225..2c368f1ff 100644 --- a/wolfcrypt/src/sm3_asm.S +++ b/wolfcrypt/src/sm3_asm.S @@ -1,3 +1,23 @@ +/* sm3_asm.S + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ #ifdef HAVE_CONFIG_H #include diff --git a/wolfcrypt/src/sm4.c b/wolfcrypt/src/sm4.c index d3da2b9df..1e4f31760 100644 --- a/wolfcrypt/src/sm4.c +++ b/wolfcrypt/src/sm4.c @@ -1,3 +1,23 @@ +/* sm4.c + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ #ifdef HAVE_CONFIG_H #include diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index 876fa887a..a1ae275de 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -11581,8 +11581,8 @@ static sp_int32 sp_2048_cmp_32(const sp_digit* a_p, const sp_digit* b_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[64], t2[33]; sp_digit div, r1; @@ -11622,7 +11622,8 @@ static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_dig * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_2048_div_32(a, m, NULL, r); } @@ -15493,8 +15494,8 @@ static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[128], t2[65]; sp_digit div, r1; @@ -15550,7 +15551,8 @@ static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, s * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_2048_div_64_cond(a, m, NULL, r); } @@ -16346,8 +16348,8 @@ static sp_int32 sp_2048_cmp_64(const sp_digit* a_p, const sp_digit* b_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[128], t2[65]; sp_digit div, r1; @@ -16387,7 +16389,8 @@ static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_dig * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_2048_div_64(a, m, NULL, r); } @@ -36703,8 +36706,8 @@ static sp_int32 sp_3072_cmp_48(const sp_digit* a_p, const sp_digit* b_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[96], t2[49]; sp_digit div, r1; @@ -36744,7 +36747,8 @@ static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_dig * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_3072_div_48(a, m, NULL, r); } @@ -42127,8 +42131,8 @@ static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[192], t2[97]; sp_digit div, r1; @@ -42184,7 +42188,8 @@ static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, s * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_3072_div_96_cond(a, m, NULL, r); } @@ -43338,8 +43343,8 @@ static sp_int32 sp_3072_cmp_96(const sp_digit* a_p, const sp_digit* b_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[192], t2[97]; sp_digit div, r1; @@ -43379,7 +43384,8 @@ static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_dig * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_3072_div_96(a, m, NULL, r); } @@ -57066,8 +57072,8 @@ static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[256], t2[129]; sp_digit div, r1; @@ -57123,7 +57129,8 @@ static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_4096_div_128_cond(a, m, NULL, r); } @@ -58629,8 +58636,8 @@ static sp_int32 sp_4096_cmp_128(const sp_digit* a_p, const sp_digit* b_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[256], t2[129]; sp_digit div, r1; @@ -58670,7 +58677,8 @@ static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_di * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_4096_div_128(a, m, NULL, r); } @@ -65169,80 +65177,6 @@ static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* return (uint32_t)(size_t)r; } -#endif /* WOLFSSL_SP_SMALL */ -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "mov r12, #0\n\t" - "add lr, %[a], #32\n\t" - "\n" - "L_sp_256_sub_8_word_%=: \n\t" - "rsbs r12, r12, #0\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sbc r12, r3, r3\n\t" - "cmp %[a], lr\n\t" - "bne L_sp_256_sub_8_word_%=\n\t" - "mov %[r], r12\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr" - ); - return (uint32_t)(size_t)r; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "subs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sbc %[r], r6, r6\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); - return (uint32_t)(size_t)r; -} - #endif /* WOLFSSL_SP_SMALL */ /* Multiply a number by Montgomery normalizer mod modulus (prime). * @@ -67665,7 +67599,6 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co "str %[r], [sp, #64]\n\t" /* Start Reduction */ "ldm sp, {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" - "str %[r], [sp]\n\t" "mov r3, r11\n\t" "mov r4, r12\n\t" /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */ @@ -67707,7 +67640,7 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co "adcs r4, r4, r6\n\t" "adc lr, lr, #0\n\t" "str r4, [sp, #28]\n\t" - /* a[8] += t[0] + t[2] + t[5] */ + /* a[8] += t[0] + t[2] + t[5] + carry */ /* a[9] += t[1] + t[3] + t[6] */ /* a[10] += t[2] + t[4] + t[7] */ "add r0, sp, #32\n\t" @@ -67783,7 +67716,7 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co "sbcs r4, r4, #0\n\t" "sbcs r5, r5, #0\n\t" "sbcs r6, r6, #0\n\t" - "sbcs r7, r7, lr, LSR #31\n\t" + "sbcs r7, r7, lr, lsr #31\n\t" "sbc r8, r8, lr\n\t" "ldr %[r], [sp, #64]\n\t" "stm %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" @@ -68143,7 +68076,6 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co "stm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" /* Start Reduction */ "ldm sp, {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" - "str %[r], [sp]\n\t" "mov r3, r11\n\t" "mov r4, r12\n\t" /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */ @@ -68185,7 +68117,7 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co "adcs r4, r4, r6\n\t" "adc lr, lr, #0\n\t" "str r4, [sp, #28]\n\t" - /* a[8] += t[0] + t[2] + t[5] */ + /* a[8] += t[0] + t[2] + t[5] + carry */ /* a[9] += t[1] + t[3] + t[6] */ /* a[10] += t[2] + t[4] + t[7] */ "add r0, sp, #32\n\t" @@ -68261,7 +68193,7 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co "sbcs r4, r4, #0\n\t" "sbcs r5, r5, #0\n\t" "sbcs r6, r6, #0\n\t" - "sbcs r7, r7, lr, LSR #31\n\t" + "sbcs r7, r7, lr, lsr #31\n\t" "sbc r8, r8, lr\n\t" "ldr %[r], [sp, #64]\n\t" "stm %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" @@ -68399,7 +68331,6 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co "stm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" /* Start Reduction */ "ldm sp, {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" - "str %[r], [sp]\n\t" "mov r3, r11\n\t" "mov r4, r12\n\t" /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */ @@ -68441,7 +68372,7 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co "adcs r4, r4, r6\n\t" "adc lr, lr, #0\n\t" "str r4, [sp, #28]\n\t" - /* a[8] += t[0] + t[2] + t[5] */ + /* a[8] += t[0] + t[2] + t[5] + carry */ /* a[9] += t[1] + t[3] + t[6] */ /* a[10] += t[2] + t[4] + t[7] */ "add r0, sp, #32\n\t" @@ -68517,7 +68448,7 @@ static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, co "sbcs r4, r4, #0\n\t" "sbcs r5, r5, #0\n\t" "sbcs r6, r6, #0\n\t" - "sbcs r7, r7, lr, LSR #31\n\t" + "sbcs r7, r7, lr, lsr #31\n\t" "sbc r8, r8, lr\n\t" "ldr %[r], [sp, #68]\n\t" "stm %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" @@ -69598,7 +69529,6 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co "str %[r], [sp, #64]\n\t" /* Start Reduction */ "ldm sp, {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" - "str %[r], [sp]\n\t" "mov r3, r11\n\t" "mov r4, r12\n\t" /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */ @@ -69640,7 +69570,7 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co "adcs r4, r4, r6\n\t" "adc lr, lr, #0\n\t" "str r4, [sp, #28]\n\t" - /* a[8] += t[0] + t[2] + t[5] */ + /* a[8] += t[0] + t[2] + t[5] + carry */ /* a[9] += t[1] + t[3] + t[6] */ /* a[10] += t[2] + t[4] + t[7] */ "add r0, sp, #32\n\t" @@ -69716,7 +69646,7 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co "sbcs r4, r4, #0\n\t" "sbcs r5, r5, #0\n\t" "sbcs r6, r6, #0\n\t" - "sbcs r7, r7, lr, LSR #31\n\t" + "sbcs r7, r7, lr, lsr #31\n\t" "sbc r8, r8, lr\n\t" "ldr %[r], [sp, #64]\n\t" "stm %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" @@ -69965,7 +69895,6 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co "stm lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" /* Start Reduction */ "ldm sp, {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" - "str %[r], [sp]\n\t" "mov r3, r11\n\t" "mov r4, r12\n\t" /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */ @@ -70007,7 +69936,7 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co "adcs r4, r4, r6\n\t" "adc lr, lr, #0\n\t" "str r4, [sp, #28]\n\t" - /* a[8] += t[0] + t[2] + t[5] */ + /* a[8] += t[0] + t[2] + t[5] + carry */ /* a[9] += t[1] + t[3] + t[6] */ /* a[10] += t[2] + t[4] + t[7] */ "add r0, sp, #32\n\t" @@ -70083,7 +70012,7 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co "sbcs r4, r4, #0\n\t" "sbcs r5, r5, #0\n\t" "sbcs r6, r6, #0\n\t" - "sbcs r7, r7, lr, LSR #31\n\t" + "sbcs r7, r7, lr, lsr #31\n\t" "sbc r8, r8, lr\n\t" "ldr %[r], [sp, #64]\n\t" "stm %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" @@ -70206,7 +70135,6 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co "stm lr!, {r7}\n\t" /* Start Reduction */ "ldm sp, {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" - "str %[r], [sp]\n\t" "mov r3, r11\n\t" "mov r4, r12\n\t" /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */ @@ -70248,7 +70176,7 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co "adcs r4, r4, r6\n\t" "adc lr, lr, #0\n\t" "str r4, [sp, #28]\n\t" - /* a[8] += t[0] + t[2] + t[5] */ + /* a[8] += t[0] + t[2] + t[5] + carry */ /* a[9] += t[1] + t[3] + t[6] */ /* a[10] += t[2] + t[4] + t[7] */ "add r0, sp, #32\n\t" @@ -70324,7 +70252,7 @@ static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, co "sbcs r4, r4, #0\n\t" "sbcs r5, r5, #0\n\t" "sbcs r6, r6, #0\n\t" - "sbcs r7, r7, lr, LSR #31\n\t" + "sbcs r7, r7, lr, lsr #31\n\t" "sbc r8, r8, lr\n\t" "ldr %[r], [sp, #64]\n\t" "stm %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" @@ -71157,95 +71085,140 @@ static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a_p, const sp_digit* m_p, register sp_digit* a asm ("r0") = (sp_digit*)a_p; __asm__ __volatile__ ( - "mov r1, #0\n\t" - /* i = 0 */ - "mov r8, #0\n\t" - "\n" - "L_sp_256_mont_reduce_8_word_%=: \n\t" - "mov r4, #0\n\t" - /* mu = a[i] * 1 (mp) = a[i] */ - "ldr r2, [%[a]]\n\t" - /* a[i+0] += -1 * mu */ - "mov r5, r2\n\t" - "str r4, [%[a]]\n\t" - /* a[i+1] += -1 * mu */ - "ldr r6, [%[a], #4]\n\t" - "mov r4, r2\n\t" - "subs r5, r5, r2\n\t" - "sbc r4, r4, #0\n\t" - "adds r5, r5, r6\n\t" - "adc r4, r4, #0\n\t" - "str r5, [%[a], #4]\n\t" - /* a[i+2] += -1 * mu */ - "ldr r6, [%[a], #8]\n\t" - "mov r5, r2\n\t" - "subs r4, r4, r2\n\t" - "sbc r5, r5, #0\n\t" - "adds r4, r4, r6\n\t" - "adc r5, r5, #0\n\t" - "str r4, [%[a], #8]\n\t" - /* a[i+3] += 0 * mu */ - "ldr r6, [%[a], #12]\n\t" - "mov r4, #0\n\t" - "adds r5, r5, r6\n\t" - "adc r4, r4, #0\n\t" - "str r5, [%[a], #12]\n\t" - /* a[i+4] += 0 * mu */ - "ldr r6, [%[a], #16]\n\t" - "mov r5, #0\n\t" - "adds r4, r4, r6\n\t" - "adc r5, r5, #0\n\t" - "str r4, [%[a], #16]\n\t" - /* a[i+5] += 0 * mu */ - "ldr r6, [%[a], #20]\n\t" - "mov r4, #0\n\t" - "adds r5, r5, r6\n\t" - "adc r4, r4, #0\n\t" - "str r5, [%[a], #20]\n\t" - /* a[i+6] += 1 * mu */ - "ldr r6, [%[a], #24]\n\t" - "mov r5, #0\n\t" - "adds r4, r4, r2\n\t" - "adc r5, r5, #0\n\t" - "adds r4, r4, r6\n\t" - "adc r5, r5, #0\n\t" - "str r4, [%[a], #24]\n\t" - /* a[i+7] += -1 * mu */ - "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[a], #32]\n\t" - "adds r4, r1, r2\n\t" - "mov r1, #0\n\t" - "adc r1, r1, r1\n\t" - "subs r5, r5, r2\n\t" - "sbcs r4, r4, #0\n\t" - "sbc r1, r1, #0\n\t" - "adds r5, r5, r6\n\t" + "sub sp, sp, #0x44\n\t" + "str %[a], [sp, #64]\n\t" + "mov lr, sp\n\t" + "ldm %[a]!, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "stm lr!, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "ldm %[a], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "stm lr, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + /* Start Reduction */ + "ldm sp, {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" + "mov r3, r11\n\t" + "mov r4, r12\n\t" + /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */ + /* - a[0] << 224 */ + /* + (a[0]-a[1] * 2) << (6 * 32) */ + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + "adds r11, r11, r5\n\t" + "adc r12, r12, r6\n\t" + /* - a[0] << (7 * 32) */ + "sub r12, r12, r5\n\t" + /* + a[0]-a[4] << (3 * 32) */ + "mov r0, r8\n\t" + "mov r1, r9\n\t" + "mov r2, r10\n\t" + "adds r8, r8, r5\n\t" + "adcs r9, r9, r6\n\t" + "adcs r10, r10, r7\n\t" + "adcs r11, r11, r0\n\t" + "adc r12, r12, r1\n\t" + /* a += mu * m */ + /* += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1) */ + /* a[0] = = t[0] */ + /* a[1] = = t[1] */ + /* a[2] = = t[2] */ + /* a[3] += t[0] = t[3] */ + /* a[4] += t[1] = t[4] */ + /* a[5] += t[2] = t[5] */ + /* a[6] += t[0] + t[3] = t[6] */ + /* a[7] += t[1] + t[4] = t[7] + t[0] */ + "adds r0, r0, r5\n\t" + "adcs r1, r1, r6\n\t" + "adcs r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r6\n\t" + "adc lr, lr, #0\n\t" + "str r4, [sp, #28]\n\t" + /* a[8] += t[0] + t[2] + t[5] + carry */ + /* a[9] += t[1] + t[3] + t[6] */ + /* a[10] += t[2] + t[4] + t[7] */ + "add r0, sp, #32\n\t" + "ldm r0, {r2, r3, r4}\n\t" + "adds r2, r2, lr\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r1, r1, #0\n\t" - "str r5, [%[a], #28]\n\t" - "str r4, [%[a], #32]\n\t" - /* i += 1 */ - "add r8, r8, #4\n\t" - "add %[a], %[a], #4\n\t" - "cmp r8, #32\n\t" - "blt L_sp_256_mont_reduce_8_word_%=\n\t" - "mov r2, r1\n\t" - "sub r1, r1, #1\n\t" - "mvn r1, r1\n\t" - "ldm %[a], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" - "sub %[a], %[a], #32\n\t" - "subs r4, r4, r1\n\t" - "sbcs r5, r5, r1\n\t" - "sbcs r6, r6, r1\n\t" - "sbcs r7, r7, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc lr, lr, #0\n\t" + "adds r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "stm r0!, {r2, r3, r4}\n\t" + /* a[11] += t[3] + t[5] + carry */ + /* a[12] += t[4] + t[6] */ + /* a[13] += t[5] + t[7] */ + /* a[14] += t[6] */ + /* a[15] += t[7] */ + "ldm r0, {r0, r1, r2, r3, r4}\n\t" + "adds r0, r0, lr\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "mov lr, #0\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r8\n\t" + "adcs r1, r1, r9\n\t" + "adcs r2, r2, r10\n\t" + "adcs r3, r3, r11\n\t" + "adcs r4, r4, r12\n\t" + "adc lr, lr, #0\n\t" + "adds r0, r0, r10\n\t" + "adcs r1, r1, r11\n\t" + "adcs r2, r2, r12\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adc lr, lr, #0\n\t" + "str r0, [sp, #44]\n\t" + "str r1, [sp, #48]\n\t" + "str r2, [sp, #52]\n\t" + "str r3, [sp, #56]\n\t" + /* a[7..15] - t[0..7] */ + "add r0, sp, #28\n\t" + "ldm r0, {r0, r1, r2, r3}\n\t" + "subs r0, r0, r5\n\t" + "sbcs r1, r1, r6\n\t" + "sbcs r2, r2, r7\n\t" + "sbcs r3, r3, r8\n\t" + "add r0, sp, #44\n\t" + "mov r8, r4\n\t" + "ldm r0, {r4, r5, r6, r7}\n\t" + "sbcs r4, r4, r9\n\t" + "sbcs r5, r5, r10\n\t" + "sbcs r6, r6, r11\n\t" + "sbcs r7, r7, r12\n\t" "sbcs r8, r8, #0\n\t" - "sbcs r9, r9, #0\n\t" - "sbcs r10, r10, r2\n\t" - "sbc r11, r11, r1\n\t" - "stm %[a], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "sbc lr, lr, #0\n\t" + /* mask m and sub from result if overflow */ + "rsb lr, lr, #0\n\t" + "subs r1, r1, lr\n\t" + "sbcs r2, r2, lr\n\t" + "sbcs r3, r3, lr\n\t" + "sbcs r4, r4, #0\n\t" + "sbcs r5, r5, #0\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, lr, lsr #31\n\t" + "sbc r8, r8, lr\n\t" + "ldr %[a], [sp, #64]\n\t" + "stm %[a], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "add sp, sp, #0x44\n\t" : [a] "+r" (a) : - : "memory", "r1", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); (void)m_p; (void)mp_p; @@ -71801,7 +71774,7 @@ static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "sbcs r8, r8, #0\n\t" "sbcs r9, r9, #0\n\t" "sbcs r10, r10, #0\n\t" - "sbcs r11, r11, lr, LSR #31\n\t" + "sbcs r11, r11, lr, lsr #31\n\t" "sbcs r12, r12, lr\n\t" "sbc %[b], %[b], %[b]\n\t" "sub lr, lr, %[b]\n\t" @@ -71811,7 +71784,7 @@ static void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "sbcs r8, r8, #0\n\t" "sbcs r9, r9, #0\n\t" "sbcs r10, r10, #0\n\t" - "sbcs r11, r11, lr, LSR #31\n\t" + "sbcs r11, r11, lr, lsr #31\n\t" "sbc r12, r12, lr\n\t" "stm %[r], {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -71851,7 +71824,7 @@ static void sp_256_mont_dbl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "sbcs r7, r7, #0\n\t" "sbcs r8, r8, #0\n\t" "sbcs r9, r9, #0\n\t" - "sbcs r10, r10, r2, LSR #31\n\t" + "sbcs r10, r10, r2, lsr #31\n\t" "sbcs r11, r11, r2\n\t" "sbc %[a], %[a], %[a]\n\t" "sub r2, r2, %[a]\n\t" @@ -71861,7 +71834,7 @@ static void sp_256_mont_dbl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "sbcs r7, r7, #0\n\t" "sbcs r8, r8, #0\n\t" "sbcs r9, r9, #0\n\t" - "sbcs r10, r10, r2, LSR #31\n\t" + "sbcs r10, r10, r2, lsr #31\n\t" "sbc r11, r11, r2\n\t" "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" : [r] "+r" (r), [a] "+r" (a) @@ -71901,7 +71874,7 @@ static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "sbcs r7, r7, #0\n\t" "sbcs r8, r8, #0\n\t" "sbcs r9, r9, #0\n\t" - "sbcs r10, r10, r12, LSR #31\n\t" + "sbcs r10, r10, r12, lsr #31\n\t" "sbcs r11, r11, r12\n\t" "sbc r2, r2, r2\n\t" "sub r12, r12, r2\n\t" @@ -71911,7 +71884,7 @@ static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "sbcs r7, r7, #0\n\t" "sbcs r8, r8, #0\n\t" "sbcs r9, r9, #0\n\t" - "sbcs r10, r10, r12, LSR #31\n\t" + "sbcs r10, r10, r12, lsr #31\n\t" "sbc r11, r11, r12\n\t" "ldm %[a]!, {r2, r3}\n\t" "adds r4, r4, r2\n\t" @@ -71933,7 +71906,7 @@ static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "sbcs r7, r7, #0\n\t" "sbcs r8, r8, #0\n\t" "sbcs r9, r9, #0\n\t" - "sbcs r10, r10, r12, LSR #31\n\t" + "sbcs r10, r10, r12, lsr #31\n\t" "sbcs r11, r11, r12\n\t" "sbc r2, r2, r2\n\t" "sub r12, r12, r2\n\t" @@ -71943,7 +71916,7 @@ static void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "sbcs r7, r7, #0\n\t" "sbcs r8, r8, #0\n\t" "sbcs r9, r9, #0\n\t" - "sbcs r10, r10, r12, LSR #31\n\t" + "sbcs r10, r10, r12, lsr #31\n\t" "sbc r11, r11, r12\n\t" "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" : [r] "+r" (r), [a] "+r" (a) @@ -71988,7 +71961,7 @@ static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "adcs r8, r8, #0\n\t" "adcs r9, r9, #0\n\t" "adcs r10, r10, #0\n\t" - "adcs r11, r11, lr, LSR #31\n\t" + "adcs r11, r11, lr, lsr #31\n\t" "adcs r12, r12, lr\n\t" "adc lr, lr, #0\n\t" "adds r5, r5, lr\n\t" @@ -71997,7 +71970,7 @@ static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit "adcs r8, r8, #0\n\t" "adcs r9, r9, #0\n\t" "adcs r10, r10, #0\n\t" - "adcs r11, r11, lr, LSR #31\n\t" + "adcs r11, r11, lr, lsr #31\n\t" "adc r12, r12, lr\n\t" "stm %[r], {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -72013,7 +71986,7 @@ static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit * a Number to divide. * m Modulus (prime). */ -static void sp_256_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +static void sp_256_mont_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -72023,7 +71996,6 @@ static void sp_256_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_ "ldm %[a], {r4, r5, r6, r7}\n\t" "and r3, r4, #1\n\t" "rsb r8, r3, #0\n\t" - "and r9, r8, #1\n\t" "adds r4, r4, r8\n\t" "adcs r5, r5, r8\n\t" "adcs r6, r6, r8\n\t" @@ -72043,7 +72015,7 @@ static void sp_256_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_ #endif "adcs r4, r4, #0\n\t" "adcs r5, r5, #0\n\t" - "adcs r6, r6, r9\n\t" + "adcs r6, r6, r8, lsr #31\n\t" "adcs r7, r7, r8\n\t" "mov r3, #0\n\t" "adc r3, r3, #0\n\t" @@ -72128,7 +72100,7 @@ static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, /* T2 = Y * Y */ sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); /* T2 = T2/2 */ - sp_256_div2_8(t2, t2, p256_mod); + sp_256_mont_div2_8(t2, t2, p256_mod); /* Y = Y * X */ sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod); /* X = T1 * T1 */ @@ -72161,7 +72133,8 @@ typedef struct sp_256_proj_point_dbl_8_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) +static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, + const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_256_proj_point_dbl_8_ctx* ctx = (sp_256_proj_point_dbl_8_ctx*)sp_ctx->data; @@ -72235,7 +72208,7 @@ static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con break; case 11: /* T2 = T2/2 */ - sp_256_div2_8(ctx->t2, ctx->t2, p256_mod); + sp_256_mont_div2_8(ctx->t2, ctx->t2, p256_mod); ctx->state = 12; break; case 12: @@ -72945,7 +72918,7 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, sp_256_mont_sub_8(y, y, t1, p256_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_256_div2_8(y, y, p256_mod); + sp_256_mont_div2_8(y, y, p256_mod); } /* Convert the projective point to affine. @@ -73423,8 +73396,8 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap); @@ -73843,8 +73816,8 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap); @@ -76658,8 +76631,8 @@ static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[16], t2[9]; sp_digit div, r1; @@ -76699,7 +76672,8 @@ static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_256_div_8(a, m, NULL, r); } @@ -77268,6 +77242,80 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "add lr, %[a], #32\n\t" + "\n" + "L_sp_256_sub_8_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_256_sub_8_word_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr" + ); + return (uint32_t)(size_t)r; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ static void sp_256_rshift1_8(sp_digit* r_p, const sp_digit* a_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; @@ -77923,7 +77971,7 @@ static int sp_256_mod_inv_8(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_256_cmp_8(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_256_cmp_8(u, v) >= 0))) { sp_256_sub_8(u, u, v); o = sp_256_sub_8(b, b, d); if (o != 0) @@ -78352,19 +78400,21 @@ static int sp_256_ecc_is_point_8(const sp_point_256* point, if (err == MP_OKAY) { t2 = t1 + 2 * 8; + /* y^2 - x^3 - a.x = b */ sp_256_sqr_8(t1, point->y); (void)sp_256_mod_8(t1, t1, p256_mod); sp_256_sqr_8(t2, point->x); (void)sp_256_mod_8(t2, t2, p256_mod); sp_256_mul_8(t2, t2, point->x); (void)sp_256_mod_8(t2, t2, p256_mod); - (void)sp_256_sub_8(t2, p256_mod, t2); - sp_256_mont_add_8(t1, t1, t2, p256_mod); + sp_256_mont_sub_8(t1, t1, t2, p256_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_256_mont_add_8(t1, t1, point->x, p256_mod); sp_256_mont_add_8(t1, t1, point->x, p256_mod); sp_256_mont_add_8(t1, t1, point->x, p256_mod); + if (sp_256_cmp_8(t1, p256_b) != 0) { err = MP_VAL; } @@ -87839,87 +87889,6 @@ static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit return (uint32_t)(size_t)r; } -#endif /* WOLFSSL_SP_SMALL */ -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "mov r12, #0\n\t" - "add lr, %[a], #48\n\t" - "\n" - "L_sp_384_sub_12_word_%=: \n\t" - "rsbs r12, r12, #0\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sbc r12, r3, r3\n\t" - "cmp %[a], lr\n\t" - "bne L_sp_384_sub_12_word_%=\n\t" - "mov %[r], r12\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr" - ); - return (uint32_t)(size_t)r; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "subs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sbc %[r], r6, r6\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); - return (uint32_t)(size_t)r; -} - #endif /* WOLFSSL_SP_SMALL */ /* Multiply a number by Montgomery normalizer mod modulus (prime). * @@ -89394,6 +89363,87 @@ static void sp_384_mont_tpl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi sp_384_cond_sub_12(r, r, m, 0 - o); } +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "add lr, %[a], #48\n\t" + "\n" + "L_sp_384_sub_12_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_384_sub_12_word_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr" + ); + return (uint32_t)(size_t)r; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ #ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. @@ -89590,7 +89640,7 @@ static void sp_384_rshift1_12(sp_digit* r_p, const sp_digit* a_p) * a Number to divide. * m Modulus (prime). */ -static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_384_mont_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o; @@ -89643,7 +89693,7 @@ static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, /* T2 = Y * Y */ sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); /* T2 = T2/2 */ - sp_384_div2_12(t2, t2, p384_mod); + sp_384_mont_div2_12(t2, t2, p384_mod); /* Y = Y * X */ sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod); /* X = T1 * T1 */ @@ -89676,7 +89726,8 @@ typedef struct sp_384_proj_point_dbl_12_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) +static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, + const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_384_proj_point_dbl_12_ctx* ctx = (sp_384_proj_point_dbl_12_ctx*)sp_ctx->data; @@ -89750,7 +89801,7 @@ static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, co break; case 11: /* T2 = T2/2 */ - sp_384_div2_12(ctx->t2, ctx->t2, p384_mod); + sp_384_mont_div2_12(ctx->t2, ctx->t2, p384_mod); ctx->state = 12; break; case 12: @@ -90486,7 +90537,7 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, sp_384_mont_sub_12(y, y, t1, p384_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_384_div2_12(y, y, p384_mod); + sp_384_mont_div2_12(y, y, p384_mod); } /* Convert the projective point to affine. @@ -90980,8 +91031,8 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap); @@ -91416,8 +91467,8 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap); @@ -94376,8 +94427,8 @@ static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[24], t2[13]; sp_digit div, r1; @@ -94417,7 +94468,8 @@ static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digi * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_384_div_12(a, m, NULL, r); } @@ -95865,7 +95917,7 @@ static int sp_384_mod_inv_12(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_384_cmp_12(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_384_cmp_12(u, v) >= 0))) { sp_384_sub_12(u, u, v); o = sp_384_sub_12(b, b, d); if (o != 0) @@ -96298,19 +96350,21 @@ static int sp_384_ecc_is_point_12(const sp_point_384* point, if (err == MP_OKAY) { t2 = t1 + 2 * 12; + /* y^2 - x^3 - a.x = b */ sp_384_sqr_12(t1, point->y); (void)sp_384_mod_12(t1, t1, p384_mod); sp_384_sqr_12(t2, point->x); (void)sp_384_mod_12(t2, t2, p384_mod); sp_384_mul_12(t2, t2, point->x); (void)sp_384_mod_12(t2, t2, p384_mod); - (void)sp_384_sub_12(t2, p384_mod, t2); - sp_384_mont_add_12(t1, t1, t2, p384_mod); + sp_384_mont_sub_12(t1, t1, t2, p384_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_384_mont_add_12(t1, t1, point->x, p384_mod); sp_384_mont_add_12(t1, t1, point->x, p384_mod); sp_384_mont_add_12(t1, t1, point->x, p384_mod); + if (sp_384_cmp_12(t1, p384_b) != 0) { err = MP_VAL; } @@ -114178,103 +114232,6 @@ static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit return (uint32_t)(size_t)r; } -#endif /* WOLFSSL_SP_SMALL */ -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "mov r12, #0\n\t" - "add lr, %[a], #0x40\n\t" - "\n" - "L_sp_521_sub_17_word_%=: \n\t" - "rsbs r12, r12, #0\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sbc r12, r3, r3\n\t" - "cmp %[a], lr\n\t" - "bne L_sp_521_sub_17_word_%=\n\t" - "rsbs r12, r12, #0\n\t" - "ldm %[a]!, {r3}\n\t" - "ldm %[b]!, {r7}\n\t" - "sbcs r3, r3, r7\n\t" - "stm %[r]!, {r3}\n\t" - "sbc %[r], r6, r6\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr" - ); - return (uint32_t)(size_t)r; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "subs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "ldm %[a]!, {r3}\n\t" - "ldm %[b]!, {r7}\n\t" - "sbcs r3, r3, r7\n\t" - "stm %[r]!, {r3}\n\t" - "sbc %[r], r6, r6\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); - return (uint32_t)(size_t)r; -} - #endif /* WOLFSSL_SP_SMALL */ /* Multiply a number by Montgomery normalizer mod modulus (prime). * @@ -116694,7 +116651,7 @@ static void sp_521_rshift1_17(sp_digit* r_p, const sp_digit* a_p) * a Number to divide. * m Modulus (prime). */ -static void sp_521_div2_17(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_521_mont_div2_17(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o = a[0] & 1; @@ -116748,7 +116705,7 @@ static void sp_521_proj_point_dbl_17(sp_point_521* r, const sp_point_521* p, /* T2 = Y * Y */ sp_521_mont_sqr_17(t2, y, p521_mod, p521_mp_mod); /* T2 = T2/2 */ - sp_521_div2_17(t2, t2, p521_mod); + sp_521_mont_div2_17(t2, t2, p521_mod); /* Y = Y * X */ sp_521_mont_mul_17(y, y, p->x, p521_mod, p521_mp_mod); /* X = T1 * T1 */ @@ -116781,7 +116738,8 @@ typedef struct sp_521_proj_point_dbl_17_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) +static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_521_proj_point_dbl_17_ctx* ctx = (sp_521_proj_point_dbl_17_ctx*)sp_ctx->data; @@ -116855,7 +116813,7 @@ static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, co break; case 11: /* T2 = T2/2 */ - sp_521_div2_17(ctx->t2, ctx->t2, p521_mod); + sp_521_mont_div2_17(ctx->t2, ctx->t2, p521_mod); ctx->state = 12; break; case 12: @@ -117628,7 +117586,7 @@ static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, sp_521_mont_sub_17(y, y, t1, p521_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_521_div2_17(y, y, p521_mod); + sp_521_mont_div2_17(y, y, p521_mod); } /* Convert the projective point to affine. @@ -118142,8 +118100,8 @@ static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_521_ecc_mulmod_17(sp_point_521* r, const sp_point_521* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_521_ecc_mulmod_17(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_521_ecc_mulmod_fast_17(r, g, k, map, ct, heap); @@ -118598,8 +118556,8 @@ static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_521_ecc_mulmod_17(sp_point_521* r, const sp_point_521* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_521_ecc_mulmod_17(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_521_ecc_mulmod_fast_17(r, g, k, map, ct, heap); @@ -122730,8 +122688,8 @@ static void sp_521_mask_17(sp_digit* r, const sp_digit* a, sp_digit m) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_521_div_17(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_521_div_17(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[35]; sp_digit t2[18]; @@ -122777,7 +122735,8 @@ static WC_INLINE int sp_521_div_17(const sp_digit* a, const sp_digit* d, sp_digi * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_521_mod_17(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_521_mod_17(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_521_div_17(a, m, NULL, r); } @@ -123336,6 +123295,103 @@ int sp_ecc_sign_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "mov r12, #0\n\t" + "add lr, %[a], #0x40\n\t" + "\n" + "L_sp_521_sub_17_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_521_sub_17_word_%=\n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3}\n\t" + "ldm %[b]!, {r7}\n\t" + "sbcs r3, r3, r7\n\t" + "stm %[r]!, {r3}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr" + ); + return (uint32_t)(size_t)r; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3}\n\t" + "ldm %[b]!, {r7}\n\t" + "sbcs r3, r3, r7\n\t" + "stm %[r]!, {r3}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ /* Divide the number by 2 mod the modulus. (r = a / 2 % m) * * r Result of division by 2. @@ -124673,7 +124729,7 @@ static int sp_521_mod_inv_17(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_521_cmp_17(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_521_cmp_17(u, v) >= 0))) { sp_521_sub_17(u, u, v); o = sp_521_sub_17(b, b, d); if (o != 0) @@ -125118,19 +125174,21 @@ static int sp_521_ecc_is_point_17(const sp_point_521* point, if (err == MP_OKAY) { t2 = t1 + 2 * 17; + /* y^2 - x^3 - a.x = b */ sp_521_sqr_17(t1, point->y); (void)sp_521_mod_17(t1, t1, p521_mod); sp_521_sqr_17(t2, point->x); (void)sp_521_mod_17(t2, t2, p521_mod); sp_521_mul_17(t2, t2, point->x); (void)sp_521_mod_17(t2, t2, p521_mod); - (void)sp_521_sub_17(t2, p521_mod, t2); - sp_521_mont_add_17(t1, t1, t2, p521_mod); + sp_521_mont_sub_17(t1, t1, t2, p521_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_521_mont_add_17(t1, t1, point->x, p521_mod); sp_521_mont_add_17(t1, t1, point->x, p521_mod); sp_521_mont_add_17(t1, t1, point->x, p521_mod); + if (sp_521_cmp_17(t1, p521_b) != 0) { err = MP_VAL; } @@ -143336,8 +143394,8 @@ static sp_int32 sp_1024_cmp_32(const sp_digit* a_p, const sp_digit* b_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_1024_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_1024_div_32(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[64], t2[33]; sp_digit div, r1; @@ -143377,7 +143435,8 @@ static WC_INLINE int sp_1024_div_32(const sp_digit* a, const sp_digit* d, sp_dig * m A single precision number that is the modulus to reduce with. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_1024_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +static WC_INLINE int sp_1024_mod_32(sp_digit* r, const sp_digit* a, + const sp_digit* m) { return sp_1024_div_32(a, m, NULL, r); } @@ -146416,7 +146475,7 @@ static void sp_1024_rshift1_32(sp_digit* r_p, const sp_digit* a_p) * a Number to divide. * m Modulus (prime). */ -static void sp_1024_div2_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_1024_mont_div2_32(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o; @@ -146469,7 +146528,7 @@ static void sp_1024_proj_point_dbl_32(sp_point_1024* r, const sp_point_1024* p, /* T2 = Y * Y */ sp_1024_mont_sqr_32(t2, y, p1024_mod, p1024_mp_mod); /* T2 = T2/2 */ - sp_1024_div2_32(t2, t2, p1024_mod); + sp_1024_mont_div2_32(t2, t2, p1024_mod); /* Y = Y * X */ sp_1024_mont_mul_32(y, y, p->x, p1024_mod, p1024_mp_mod); /* X = T1 * T1 */ @@ -146502,7 +146561,8 @@ typedef struct sp_1024_proj_point_dbl_32_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) +static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, + const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_1024_proj_point_dbl_32_ctx* ctx = (sp_1024_proj_point_dbl_32_ctx*)sp_ctx->data; @@ -146576,7 +146636,7 @@ static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, break; case 11: /* T2 = T2/2 */ - sp_1024_div2_32(ctx->t2, ctx->t2, p1024_mod); + sp_1024_mont_div2_32(ctx->t2, ctx->t2, p1024_mod); ctx->state = 12; break; case 12: @@ -146626,122 +146686,6 @@ static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_1024_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "mov r12, #0\n\t" - "add lr, %[a], #0x80\n\t" - "\n" - "L_sp_1024_sub_32_word_%=: \n\t" - "rsbs r12, r12, #0\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sbc r12, r3, r3\n\t" - "cmp %[a], lr\n\t" - "bne L_sp_1024_sub_32_word_%=\n\t" - "mov %[r], r12\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr" - ); - return (uint32_t)(size_t)r; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_1024_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "subs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "ldm %[a]!, {r3, r4, r5, r6}\n\t" - "ldm %[b]!, {r7, r8, r9, r10}\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sbc %[r], r6, r6\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); - return (uint32_t)(size_t)r; -} - -#endif /* WOLFSSL_SP_SMALL */ /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -147301,7 +147245,7 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, sp_1024_mont_sub_32(y, y, t1, p1024_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_1024_div2_32(y, y, p1024_mod); + sp_1024_mont_div2_32(y, y, p1024_mod); } /* Convert the projective point to affine. @@ -147714,8 +147658,8 @@ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cach * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_fast_32(r, g, k, map, ct, heap); @@ -148069,8 +148013,8 @@ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cach * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_fast_32(r, g, k, map, ct, heap); @@ -154224,7 +154168,7 @@ static void sp_1024_accumulate_line_dbl_32(sp_digit* vx, sp_digit* vy, /* ty = 4 * p.y ^ 2 */ sp_1024_mont_sqr_32(ty, ry, p1024_mod, p1024_mp_mod); /* t1 = 2 * p.y ^ 2 */ - sp_1024_div2_32(t1, ty, p1024_mod); + sp_1024_mont_div2_32(t1, ty, p1024_mod); /* r.x -= 2 * (p.y ^ 2) */ sp_1024_mont_sub_32(rx, rx, t1, p1024_mod); /* p'.z = p.y * 2 * p.z */ @@ -154244,7 +154188,7 @@ static void sp_1024_accumulate_line_dbl_32(sp_digit* vx, sp_digit* vy, /* t1 = (4 * p.y^2) ^ 2 = 16 * p.y^4 */ sp_1024_mont_sqr_32(t1, ty, p1024_mod, p1024_mp_mod); /* t1 = 16 * p.y^4 / 2 = 8 * p.y^4 */ - sp_1024_div2_32(t1, t1, p1024_mod); + sp_1024_mont_div2_32(t1, t1, p1024_mod); /* p'.y = 4 * p.y^2 * p.x */ sp_1024_mont_mul_32(p->y, ty, p->x, p1024_mod, p1024_mp_mod); /* p'.x = l^2 */ @@ -154662,7 +154606,7 @@ static void sp_1024_accumulate_line_dbl_n_32(sp_digit* vx, sp_digit* vy, /* ty = py ^ 2 */ sp_1024_mont_sqr_32(ty, p->y, p1024_mod, p1024_mp_mod); /* t1 = py ^ 2 / 2 */ - sp_1024_div2_32(t1, ty, p1024_mod); + sp_1024_mont_div2_32(t1, ty, p1024_mod); /* r.x -= py ^ 2 / 2 */ sp_1024_mont_sub_32(rx, rx, t1, p1024_mod); /* p'.z = py * pz */ @@ -154700,7 +154644,7 @@ static void sp_1024_accumulate_line_dbl_n_32(sp_digit* vx, sp_digit* vy, } /* p'.y = py' / 2 */ - sp_1024_div2_32(p->y, p->y, p1024_mod); + sp_1024_mont_div2_32(p->y, p->y, p1024_mod); } /* Operations to perform based on order - 1. @@ -155540,19 +155484,21 @@ static int sp_1024_ecc_is_point_32(const sp_point_1024* point, if (err == MP_OKAY) { t2 = t1 + 2 * 32; + /* y^2 - x^3 - a.x = b */ sp_1024_sqr_32(t1, point->y); (void)sp_1024_mod_32(t1, t1, p1024_mod); sp_1024_sqr_32(t2, point->x); (void)sp_1024_mod_32(t2, t2, p1024_mod); sp_1024_mul_32(t2, t2, point->x); (void)sp_1024_mod_32(t2, t2, p1024_mod); - (void)sp_1024_sub_32(t2, p1024_mod, t2); - sp_1024_mont_add_32(t1, t1, t2, p1024_mod); + sp_1024_mont_sub_32(t1, t1, t2, p1024_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_1024_mont_add_32(t1, t1, point->x, p1024_mod); sp_1024_mont_add_32(t1, t1, point->x, p1024_mod); sp_1024_mont_add_32(t1, t1, point->x, p1024_mod); + n = sp_1024_cmp_32(t1, p1024_mod); sp_1024_cond_sub_32(t1, t1, p1024_mod, ~(n >> 31)); sp_1024_norm_32(t1); diff --git a/wolfcrypt/src/sp_arm64.c b/wolfcrypt/src/sp_arm64.c index ed7935443..cbacbfe88 100644 --- a/wolfcrypt/src/sp_arm64.c +++ b/wolfcrypt/src/sp_arm64.c @@ -3976,8 +3976,8 @@ static sp_int64 sp_2048_cmp_16(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[32], t2[17]; sp_digit div, r1; @@ -5016,8 +5016,8 @@ static sp_digit div_2048_word_32_cond(sp_digit d1, sp_digit d0, sp_digit div) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[64], t2[33]; sp_digit div, r1; @@ -5583,8 +5583,8 @@ static sp_int64 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[64], t2[33]; sp_digit div, r1; @@ -13167,8 +13167,8 @@ static sp_int64 sp_3072_cmp_24(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[48], t2[25]; sp_digit div, r1; @@ -14447,8 +14447,8 @@ static sp_digit div_3072_word_48_cond(sp_digit d1, sp_digit d0, sp_digit div) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[96], t2[49]; sp_digit div, r1; @@ -15166,8 +15166,8 @@ static sp_int64 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[96], t2[49]; sp_digit div, r1; @@ -19406,8 +19406,8 @@ static sp_digit div_4096_word_64_cond(sp_digit d1, sp_digit d0, sp_digit div) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[128], t2[65]; sp_digit div, r1; @@ -20277,8 +20277,8 @@ static sp_int64 sp_4096_cmp_64(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[128], t2[65]; sp_digit div, r1; @@ -23157,9 +23157,9 @@ static void sp_256_mont_tpl_4(sp_digit* r, const sp_digit* a, const sp_digit* m) "sbcs x4, x4, x7\n\t" "sub x8, xzr, x7\n\t" "sbcs x5, x5, xzr\n\t" - "stp x3, x4, [%[r], 0]\n\t" + "stp x3, x4, [%[r],0]\n\t" "sbc x6, x6, x8\n\t" - "stp x5, x6, [%[r], 16]\n\t" + "stp x5, x6, [%[r],16]\n\t" : : [r] "r" (r), [a] "r" (a) : "memory", "x9", "x10", "x11", "x12", "x3", "x4", "x5", "x6", "x7", "x8", "x13", "cc" @@ -23217,25 +23217,25 @@ static void sp_256_mont_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b, * a Number to divide. * m Modulus (prime). */ -static void sp_256_div2_4(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_256_mont_div2_4(sp_digit* r, const sp_digit* a, const sp_digit* m) { __asm__ __volatile__ ( - "ldp x3, x4, [%[a], 0]\n\t" - "ldp x5, x6, [%[a], 16]\n\t" - "sbfx x8, x3, 0, 1\n\t" - "adds x3, x3, x8\n\t" - "lsr x7, x8, 32\n\t" - "adcs x4, x4, x7\n\t" - "sub x8, xzr, x7\n\t" - "adcs x5, x5, xzr\n\t" - "extr x3, x4, x3, 1\n\t" - "adcs x6, x6, x8\n\t" - "extr x4, x5, x4, 1\n\t" - "adc x9, xzr, xzr\n\t" - "extr x5, x6, x5, 1\n\t" - "extr x6, x9, x6, 1\n\t" - "stp x3, x4, [%[r], 0]\n\t" - "stp x5, x6, [%[r], 16]\n\t" + "ldp x3, x4, [%[a], 0]\n\t" + "ldp x5, x6, [%[a], 16]\n\t" + "sbfx x8, x3, 0, 1\n\t" + "adds x3, x3, x8\n\t" + "lsr x7, x8, 32\n\t" + "adcs x4, x4, x7\n\t" + "sub x8, xzr, x7\n\t" + "adcs x5, x5, xzr\n\t" + "extr x3, x4, x3, 1\n\t" + "adcs x6, x6, x8\n\t" + "extr x4, x5, x4, 1\n\t" + "adc x9, xzr, xzr\n\t" + "extr x5, x6, x5, 1\n\t" + "extr x6, x9, x6, 1\n\t" + "stp x3, x4, [%[r], 0]\n\t" + "stp x5, x6, [%[r], 16]\n\t" : : [r] "r" (r), [a] "r" (a), [m] "r" (m) : "memory", "x3", "x4", "x5", "x6", "x7", "x9", "x8", "cc" @@ -23425,7 +23425,7 @@ static void sp_256_proj_point_dbl_4(sp_point_256* r, const sp_point_256* p, /* T2 = Y * Y */ sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod); /* T2 = T2/2 */ - sp_256_div2_4(t2, t2, p256_mod); + sp_256_mont_div2_4(t2, t2, p256_mod); /* Y = Y * X */ sp_256_mont_mul_4(y, y, p->x, p256_mod, p256_mp_mod); /* X = T1 * T1 */ @@ -23455,7 +23455,8 @@ typedef struct sp_256_proj_point_dbl_4_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) +static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, + const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_256_proj_point_dbl_4_ctx* ctx = (sp_256_proj_point_dbl_4_ctx*)sp_ctx->data; @@ -23527,7 +23528,7 @@ static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con break; case 11: /* T2 = T2/2 */ - sp_256_div2_4(ctx->t2, ctx->t2, p256_mod); + sp_256_mont_div2_4(ctx->t2, ctx->t2, p256_mod); ctx->state = 12; break; case 12: @@ -23657,7 +23658,7 @@ static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int i, sp_256_mont_sub_4(y, y, t1, p256_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_256_div2_4(y, y, p256_mod); + sp_256_mont_div2_4(y, y, p256_mod); } /* Compare two numbers to determine if they are equal. @@ -24120,7 +24121,7 @@ static void sp_256_proj_point_dbl_n_store_4(sp_point_256* r, sp_256_mont_mul_4(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_4(y, y, t1, p256_mod); /* Y = Y/2 */ - sp_256_div2_4(r[j].y, y, p256_mod); + sp_256_mont_div2_4(r[j].y, y, p256_mod); r[j].infinity = 0; } } @@ -25007,8 +25008,8 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, ct, heap); @@ -25436,8 +25437,8 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, ct, heap); @@ -40265,8 +40266,8 @@ static void sp_256_mask_4(sp_digit* r, const sp_digit* a, sp_digit m) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[8], t2[5]; sp_digit div, r1; @@ -42137,19 +42138,21 @@ static int sp_256_ecc_is_point_4(const sp_point_256* point, if (err == MP_OKAY) { t2 = t1 + 2 * 4; + /* y^2 - x^3 - a.x = b */ sp_256_sqr_4(t1, point->y); (void)sp_256_mod_4(t1, t1, p256_mod); sp_256_sqr_4(t2, point->x); (void)sp_256_mod_4(t2, t2, p256_mod); sp_256_mul_4(t2, t2, point->x); (void)sp_256_mod_4(t2, t2, p256_mod); - (void)sp_256_sub_4(t2, p256_mod, t2); - sp_256_mont_add_4(t1, t1, t2, p256_mod); + sp_256_mont_sub_4(t1, t1, t2, p256_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_256_mont_add_4(t1, t1, point->x, p256_mod); sp_256_mont_add_4(t1, t1, point->x, p256_mod); sp_256_mont_add_4(t1, t1, point->x, p256_mod); + if (sp_256_cmp_4(t1, p256_b) != 0) { err = MP_VAL; } @@ -44403,7 +44406,7 @@ static void sp_384_rshift1_6(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_384_div2_6(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_384_mont_div2_6(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o; @@ -44456,7 +44459,7 @@ static void sp_384_proj_point_dbl_6(sp_point_384* r, const sp_point_384* p, /* T2 = Y * Y */ sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod); /* T2 = T2/2 */ - sp_384_div2_6(t2, t2, p384_mod); + sp_384_mont_div2_6(t2, t2, p384_mod); /* Y = Y * X */ sp_384_mont_mul_6(y, y, p->x, p384_mod, p384_mp_mod); /* X = T1 * T1 */ @@ -44489,7 +44492,8 @@ typedef struct sp_384_proj_point_dbl_6_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_384_proj_point_dbl_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) +static int sp_384_proj_point_dbl_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, + const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_384_proj_point_dbl_6_ctx* ctx = (sp_384_proj_point_dbl_6_ctx*)sp_ctx->data; @@ -44563,7 +44567,7 @@ static int sp_384_proj_point_dbl_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, con break; case 11: /* T2 = T2/2 */ - sp_384_div2_6(ctx->t2, ctx->t2, p384_mod); + sp_384_mont_div2_6(ctx->t2, ctx->t2, p384_mod); ctx->state = 12; break; case 12: @@ -44701,7 +44705,7 @@ static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int i, sp_384_mont_sub_6(y, y, t1, p384_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_384_div2_6(y, y, p384_mod); + sp_384_mont_div2_6(y, y, p384_mod); } /* Compare two numbers to determine if they are equal. @@ -45087,7 +45091,7 @@ static void sp_384_proj_point_dbl_n_store_6(sp_point_384* r, sp_384_mont_mul_6(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_6(y, y, t1, p384_mod); /* Y = Y/2 */ - sp_384_div2_6(r[j].y, y, p384_mod); + sp_384_mont_div2_6(r[j].y, y, p384_mod); r[j].infinity = 0; } } @@ -45941,8 +45945,8 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_win_add_sub_6(r, g, k, map, ct, heap); @@ -46370,8 +46374,8 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_win_add_sub_6(r, g, k, map, ct, heap); @@ -67082,8 +67086,8 @@ static void sp_384_mask_6(sp_digit* r, const sp_digit* a, sp_digit m) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[12], t2[7]; sp_digit div, r1; @@ -67784,7 +67788,7 @@ static int sp_384_mod_inv_6(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_384_cmp_6(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_384_cmp_6(u, v) >= 0))) { sp_384_sub_6(u, u, v); o = sp_384_sub_6(b, b, d); if (o != 0) @@ -68211,19 +68215,21 @@ static int sp_384_ecc_is_point_6(const sp_point_384* point, if (err == MP_OKAY) { t2 = t1 + 2 * 6; + /* y^2 - x^3 - a.x = b */ sp_384_sqr_6(t1, point->y); (void)sp_384_mod_6(t1, t1, p384_mod); sp_384_sqr_6(t2, point->x); (void)sp_384_mod_6(t2, t2, p384_mod); sp_384_mul_6(t2, t2, point->x); (void)sp_384_mod_6(t2, t2, p384_mod); - (void)sp_384_sub_6(t2, p384_mod, t2); - sp_384_mont_add_6(t1, t1, t2, p384_mod); + sp_384_mont_sub_6(t1, t1, t2, p384_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_384_mont_add_6(t1, t1, point->x, p384_mod); sp_384_mont_add_6(t1, t1, point->x, p384_mod); sp_384_mont_add_6(t1, t1, point->x, p384_mod); + if (sp_384_cmp_6(t1, p384_b) != 0) { err = MP_VAL; } @@ -70787,8 +70793,8 @@ static sp_int64 sp_521_cmp_9(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_521_div_9(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_521_div_9(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[19]; sp_digit t2[10]; @@ -72774,7 +72780,7 @@ static void sp_521_rshift1_9(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_521_div2_9(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_521_mont_div2_9(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o; @@ -72827,7 +72833,7 @@ static void sp_521_proj_point_dbl_9(sp_point_521* r, const sp_point_521* p, /* T2 = Y * Y */ sp_521_mont_sqr_9(t2, y, p521_mod, p521_mp_mod); /* T2 = T2/2 */ - sp_521_div2_9(t2, t2, p521_mod); + sp_521_mont_div2_9(t2, t2, p521_mod); /* Y = Y * X */ sp_521_mont_mul_9(y, y, p->x, p521_mod, p521_mp_mod); /* X = T1 * T1 */ @@ -72860,7 +72866,8 @@ typedef struct sp_521_proj_point_dbl_9_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) +static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_521_proj_point_dbl_9_ctx* ctx = (sp_521_proj_point_dbl_9_ctx*)sp_ctx->data; @@ -72934,7 +72941,7 @@ static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, con break; case 11: /* T2 = T2/2 */ - sp_521_div2_9(ctx->t2, ctx->t2, p521_mod); + sp_521_mont_div2_9(ctx->t2, ctx->t2, p521_mod); ctx->state = 12; break; case 12: @@ -73072,7 +73079,7 @@ static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, sp_521_mont_sub_9(y, y, t1, p521_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_521_div2_9(y, y, p521_mod); + sp_521_mont_div2_9(y, y, p521_mod); } /* Compare two numbers to determine if they are equal. @@ -73460,7 +73467,7 @@ static void sp_521_proj_point_dbl_n_store_9(sp_point_521* r, sp_521_mont_mul_9(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_9(y, y, t1, p521_mod); /* Y = Y/2 */ - sp_521_div2_9(r[j].y, y, p521_mod); + sp_521_mont_div2_9(r[j].y, y, p521_mod); r[j].infinity = 0; } } @@ -74359,8 +74366,8 @@ static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_521_ecc_mulmod_9(sp_point_521* r, const sp_point_521* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_521_ecc_mulmod_9(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_521_ecc_mulmod_win_add_sub_9(r, g, k, map, ct, heap); @@ -74806,8 +74813,8 @@ static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_521_ecc_mulmod_9(sp_point_521* r, const sp_point_521* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_521_ecc_mulmod_9(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_521_ecc_mulmod_win_add_sub_9(r, g, k, map, ct, heap); @@ -112712,7 +112719,7 @@ static int sp_521_mod_inv_9(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_521_cmp_9(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_521_cmp_9(u, v) >= 0))) { sp_521_sub_9(u, u, v); o = sp_521_sub_9(b, b, d); if (o != 0) @@ -113149,19 +113156,21 @@ static int sp_521_ecc_is_point_9(const sp_point_521* point, if (err == MP_OKAY) { t2 = t1 + 2 * 9; + /* y^2 - x^3 - a.x = b */ sp_521_sqr_9(t1, point->y); (void)sp_521_mod_9(t1, t1, p521_mod); sp_521_sqr_9(t2, point->x); (void)sp_521_mod_9(t2, t2, p521_mod); sp_521_mul_9(t2, t2, point->x); (void)sp_521_mod_9(t2, t2, p521_mod); - (void)sp_521_sub_9(t2, p521_mod, t2); - sp_521_mont_add_9(t1, t1, t2, p521_mod); + sp_521_mont_sub_9(t1, t1, t2, p521_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_521_mont_add_9(t1, t1, point->x, p521_mod); sp_521_mont_add_9(t1, t1, point->x, p521_mod); sp_521_mont_add_9(t1, t1, point->x, p521_mod); + if (sp_521_cmp_9(t1, p521_b) != 0) { err = MP_VAL; } @@ -115479,8 +115488,8 @@ static sp_int64 sp_1024_cmp_16(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_1024_div_16(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_1024_div_16(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[32], t2[17]; sp_digit div, r1; @@ -116866,7 +116875,7 @@ static void sp_1024_rshift1_16(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_1024_div2_16(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_1024_mont_div2_16(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o; @@ -116919,7 +116928,7 @@ static void sp_1024_proj_point_dbl_16(sp_point_1024* r, const sp_point_1024* p, /* T2 = Y * Y */ sp_1024_mont_sqr_16(t2, y, p1024_mod, p1024_mp_mod); /* T2 = T2/2 */ - sp_1024_div2_16(t2, t2, p1024_mod); + sp_1024_mont_div2_16(t2, t2, p1024_mod); /* Y = Y * X */ sp_1024_mont_mul_16(y, y, p->x, p1024_mod, p1024_mp_mod); /* X = T1 * T1 */ @@ -116952,7 +116961,8 @@ typedef struct sp_1024_proj_point_dbl_16_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_1024_proj_point_dbl_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) +static int sp_1024_proj_point_dbl_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, + const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_1024_proj_point_dbl_16_ctx* ctx = (sp_1024_proj_point_dbl_16_ctx*)sp_ctx->data; @@ -117026,7 +117036,7 @@ static int sp_1024_proj_point_dbl_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, break; case 11: /* T2 = T2/2 */ - sp_1024_div2_16(ctx->t2, ctx->t2, p1024_mod); + sp_1024_mont_div2_16(ctx->t2, ctx->t2, p1024_mod); ctx->state = 12; break; case 12: @@ -117164,107 +117174,9 @@ static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int i, sp_1024_mont_sub_16(y, y, t1, p1024_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_1024_div2_16(y, y, p1024_mod); + sp_1024_mont_div2_16(y, y, p1024_mod); } -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_1024_sub_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - sp_digit c = 0; - - __asm__ __volatile__ ( - "add x11, %[a], 128\n\t" - "\n1:\n\t" - "subs %[c], xzr, %[c]\n\t" - "ldp x3, x4, [%[a]], #16\n\t" - "ldp x5, x6, [%[a]], #16\n\t" - "ldp x7, x8, [%[b]], #16\n\t" - "sbcs x3, x3, x7\n\t" - "ldp x9, x10, [%[b]], #16\n\t" - "sbcs x4, x4, x8\n\t" - "sbcs x5, x5, x9\n\t" - "stp x3, x4, [%[r]], #16\n\t" - "sbcs x6, x6, x10\n\t" - "stp x5, x6, [%[r]], #16\n\t" - "csetm %[c], cc\n\t" - "cmp %[a], x11\n\t" - "b.ne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "cc" - ); - - return c; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_1024_sub_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - __asm__ __volatile__ ( - "ldp x3, x4, [%[a], 0]\n\t" - "ldp x7, x8, [%[b], 0]\n\t" - "subs x3, x3, x7\n\t" - "ldp x5, x6, [%[a], 16]\n\t" - "sbcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 16]\n\t" - "sbcs x5, x5, x9\n\t" - "stp x3, x4, [%[r], 0]\n\t" - "sbcs x6, x6, x10\n\t" - "stp x5, x6, [%[r], 16]\n\t" - "ldp x3, x4, [%[a], 32]\n\t" - "ldp x7, x8, [%[b], 32]\n\t" - "sbcs x3, x3, x7\n\t" - "ldp x5, x6, [%[a], 48]\n\t" - "sbcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 48]\n\t" - "sbcs x5, x5, x9\n\t" - "stp x3, x4, [%[r], 32]\n\t" - "sbcs x6, x6, x10\n\t" - "stp x5, x6, [%[r], 48]\n\t" - "ldp x3, x4, [%[a], 64]\n\t" - "ldp x7, x8, [%[b], 64]\n\t" - "sbcs x3, x3, x7\n\t" - "ldp x5, x6, [%[a], 80]\n\t" - "sbcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 80]\n\t" - "sbcs x5, x5, x9\n\t" - "stp x3, x4, [%[r], 64]\n\t" - "sbcs x6, x6, x10\n\t" - "stp x5, x6, [%[r], 80]\n\t" - "ldp x3, x4, [%[a], 96]\n\t" - "ldp x7, x8, [%[b], 96]\n\t" - "sbcs x3, x3, x7\n\t" - "ldp x5, x6, [%[a], 112]\n\t" - "sbcs x4, x4, x8\n\t" - "ldp x9, x10, [%[b], 112]\n\t" - "sbcs x5, x5, x9\n\t" - "stp x3, x4, [%[r], 96]\n\t" - "sbcs x6, x6, x10\n\t" - "stp x5, x6, [%[r], 112]\n\t" - "csetm %[r], cc\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" - ); - - return (sp_digit)r; -} - -#endif /* WOLFSSL_SP_SMALL */ /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -117653,7 +117565,7 @@ static void sp_1024_proj_point_dbl_n_store_16(sp_point_1024* r, sp_1024_mont_mul_16(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_16(y, y, t1, p1024_mod); /* Y = Y/2 */ - sp_1024_div2_16(r[j].y, y, p1024_mod); + sp_1024_mont_div2_16(r[j].y, y, p1024_mod); r[j].infinity = 0; } } @@ -118373,8 +118285,8 @@ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cach * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_16(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_16(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_win_add_sub_16(r, g, k, map, ct, heap); @@ -124034,7 +123946,7 @@ static void sp_1024_accumulate_line_dbl_16(sp_digit* vx, sp_digit* vy, /* ty = 4 * p.y ^ 2 */ sp_1024_mont_sqr_16(ty, ry, p1024_mod, p1024_mp_mod); /* t1 = 2 * p.y ^ 2 */ - sp_1024_div2_16(t1, ty, p1024_mod); + sp_1024_mont_div2_16(t1, ty, p1024_mod); /* r.x -= 2 * (p.y ^ 2) */ sp_1024_mont_sub_16(rx, rx, t1, p1024_mod); /* p'.z = p.y * 2 * p.z */ @@ -124054,7 +123966,7 @@ static void sp_1024_accumulate_line_dbl_16(sp_digit* vx, sp_digit* vy, /* t1 = (4 * p.y^2) ^ 2 = 16 * p.y^4 */ sp_1024_mont_sqr_16(t1, ty, p1024_mod, p1024_mp_mod); /* t1 = 16 * p.y^4 / 2 = 8 * p.y^4 */ - sp_1024_div2_16(t1, t1, p1024_mod); + sp_1024_mont_div2_16(t1, t1, p1024_mod); /* p'.y = 4 * p.y^2 * p.x */ sp_1024_mont_mul_16(p->y, ty, p->x, p1024_mod, p1024_mp_mod); /* p'.x = l^2 */ @@ -124472,7 +124384,7 @@ static void sp_1024_accumulate_line_dbl_n_16(sp_digit* vx, sp_digit* vy, /* ty = py ^ 2 */ sp_1024_mont_sqr_16(ty, p->y, p1024_mod, p1024_mp_mod); /* t1 = py ^ 2 / 2 */ - sp_1024_div2_16(t1, ty, p1024_mod); + sp_1024_mont_div2_16(t1, ty, p1024_mod); /* r.x -= py ^ 2 / 2 */ sp_1024_mont_sub_16(rx, rx, t1, p1024_mod); /* p'.z = py * pz */ @@ -124510,7 +124422,7 @@ static void sp_1024_accumulate_line_dbl_n_16(sp_digit* vx, sp_digit* vy, } /* p'.y = py' / 2 */ - sp_1024_div2_16(p->y, p->y, p1024_mod); + sp_1024_mont_div2_16(p->y, p->y, p1024_mod); } /* Operations to perform based on order - 1. @@ -125425,19 +125337,21 @@ static int sp_1024_ecc_is_point_16(const sp_point_1024* point, if (err == MP_OKAY) { t2 = t1 + 2 * 16; + /* y^2 - x^3 - a.x = b */ sp_1024_sqr_16(t1, point->y); (void)sp_1024_mod_16(t1, t1, p1024_mod); sp_1024_sqr_16(t2, point->x); (void)sp_1024_mod_16(t2, t2, p1024_mod); sp_1024_mul_16(t2, t2, point->x); (void)sp_1024_mod_16(t2, t2, p1024_mod); - (void)sp_1024_sub_16(t2, p1024_mod, t2); - sp_1024_mont_add_16(t1, t1, t2, p1024_mod); + sp_1024_mont_sub_16(t1, t1, t2, p1024_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_1024_mont_add_16(t1, t1, point->x, p1024_mod); sp_1024_mont_add_16(t1, t1, point->x, p1024_mod); sp_1024_mont_add_16(t1, t1, point->x, p1024_mod); + n = sp_1024_cmp_16(t1, p1024_mod); sp_1024_cond_sub_16(t1, t1, p1024_mod, ~(n >> 63)); sp_1024_norm_16(t1); diff --git a/wolfcrypt/src/sp_armthumb.c b/wolfcrypt/src/sp_armthumb.c index bf6b671c5..1873ef373 100644 --- a/wolfcrypt/src/sp_armthumb.c +++ b/wolfcrypt/src/sp_armthumb.c @@ -21927,6 +21927,7 @@ SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, return (uint32_t)(size_t)r; } +#define sp_2048_mont_reduce_order_64 sp_2048_mont_reduce_64 /* Reduce the number back to 2048 bits using Montgomery reduction. * * a A single precision number to reduce in place. @@ -23943,8 +23944,8 @@ SP_NOINLINE static sp_int32 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[64], t2[33]; sp_digit div, r1; @@ -24400,6 +24401,7 @@ SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, return (uint32_t)(size_t)r; } +#define sp_2048_mont_reduce_order_64 sp_2048_mont_reduce_64 /* Reduce the number back to 2048 bits using Montgomery reduction. * * a A single precision number to reduce in place. @@ -27317,8 +27319,8 @@ SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[128], t2[65]; sp_digit div, r1; @@ -27530,8 +27532,8 @@ SP_NOINLINE static sp_int32 sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[128], t2[65]; sp_digit div, r1; @@ -73115,6 +73117,7 @@ SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, return (uint32_t)(size_t)r; } +#define sp_3072_mont_reduce_order_96 sp_3072_mont_reduce_96 /* Reduce the number back to 3072 bits using Montgomery reduction. * * a A single precision number to reduce in place. @@ -75403,8 +75406,8 @@ SP_NOINLINE static sp_int32 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[96], t2[49]; sp_digit div, r1; @@ -75860,6 +75863,7 @@ SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, return (uint32_t)(size_t)r; } +#define sp_3072_mont_reduce_order_96 sp_3072_mont_reduce_96 /* Reduce the number back to 3072 bits using Montgomery reduction. * * a A single precision number to reduce in place. @@ -79603,8 +79607,8 @@ SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[192], t2[97]; sp_digit div, r1; @@ -79821,8 +79825,8 @@ SP_NOINLINE static sp_int32 sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[192], t2[97]; sp_digit div, r1; @@ -87591,6 +87595,7 @@ SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, return (uint32_t)(size_t)r; } +#define sp_4096_mont_reduce_order_128 sp_4096_mont_reduce_128 /* Reduce the number back to 4096 bits using Montgomery reduction. * * a A single precision number to reduce in place. @@ -92150,8 +92155,8 @@ SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[256], t2[129]; sp_digit div, r1; @@ -92369,8 +92374,8 @@ SP_NOINLINE static sp_int32 sp_4096_cmp_128(const sp_digit* a, * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[256], t2[129]; sp_digit div, r1; @@ -97560,166 +97565,6 @@ SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, return (uint32_t)(size_t)r; } -#endif /* WOLFSSL_SP_SMALL */ -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - __asm__ __volatile__ ( - "movs r6, %[a]\n\t" - "movs r3, #0\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds r6, r6, #32\n\t" -#else - "add r6, r6, #32\n\t" -#endif - "\n" - "L_sp_256_sub_8_word_%=:\n\t" - "movs r5, #0\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "subs r5, r5, r3\n\t" -#else - "sub r5, r5, r3\n\t" -#endif - "ldr r4, [%[a]]\n\t" - "ldr r5, [%[b]]\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r5\n\t" -#elif defined(__clang__) - "sbcs r4, r5\n\t" -#else - "sbc r4, r5\n\t" -#endif - "str r4, [%[r]]\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r3\n\t" -#elif defined(__clang__) - "sbcs r3, r3\n\t" -#else - "sbc r3, r3\n\t" -#endif -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds %[a], %[a], #4\n\t" -#else - "add %[a], %[a], #4\n\t" -#endif -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds %[b], %[b], #4\n\t" -#else - "add %[b], %[b], #4\n\t" -#endif -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds %[r], %[r], #4\n\t" -#else - "add %[r], %[r], #4\n\t" -#endif - "cmp %[a], r6\n\t" - "bne L_sp_256_sub_8_word_%=\n\t" - "movs %[r], r3\n\t" - : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) - : - : "memory", "r3", "r4", "r5", "r6" - ); - return (uint32_t)(size_t)r; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - __asm__ __volatile__ ( - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "subs r3, r3, r5\n\t" -#else - "sub r3, r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs %[r], %[r], %[r]\n\t" -#elif defined(__clang__) - "sbcs %[r], %[r]\n\t" -#else - "sbc %[r], %[r]\n\t" -#endif - : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) - : - : "memory", "r3", "r4", "r5", "r6" - ); - return (uint32_t)(size_t)r; -} - #endif /* WOLFSSL_SP_SMALL */ /* Multiply a number by Montgomery normalizer mod modulus (prime). * @@ -97999,69 +97844,6 @@ static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm) return err; } -/* Conditionally subtract b from a using the mask m. - * m is -1 to subtract and 0 when not copying. - * - * r A single precision number representing condition subtract result. - * a A single precision number to subtract from. - * b A single precision number to subtract. - * m Mask value to apply. - */ -SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) -{ - __asm__ __volatile__ ( - "movs r4, #0\n\t" - "movs r5, #32\n\t" - "mov r8, r5\n\t" - "movs r7, #0\n\t" - "\n" - "L_sp_256_cond_sub_8_words_%=:\n\t" - "ldr r6, [%[b], r7]\n\t" -#ifdef WOLFSSL_KEIL - "ands r6, r6, %[m]\n\t" -#elif defined(__clang__) - "ands r6, %[m]\n\t" -#else - "and r6, %[m]\n\t" -#endif - "movs r5, #0\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "subs r5, r5, r4\n\t" -#else - "sub r5, r5, r4\n\t" -#endif - "ldr r5, [%[a], r7]\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r5, r5, r6\n\t" -#elif defined(__clang__) - "sbcs r5, r6\n\t" -#else - "sbc r5, r6\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r4\n\t" -#elif defined(__clang__) - "sbcs r4, r4\n\t" -#else - "sbc r4, r4\n\t" -#endif - "str r5, [%[r], r7]\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds r7, r7, #4\n\t" -#else - "add r7, r7, #4\n\t" -#endif - "cmp r7, r8\n\t" - "blt L_sp_256_cond_sub_8_words_%=\n\t" - "movs %[r], r4\n\t" - : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) - : - : "memory", "r4", "r5", "r6", "r7", "r8" - ); - return (uint32_t)(size_t)r; -} - /* Reduce the number back to 256 bits using Montgomery reduction. * * a A single precision number to reduce in place. @@ -99281,6 +99063,69 @@ SP_NOINLINE static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) */ #define sp_256_norm_8(a) +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + __asm__ __volatile__ ( + "movs r4, #0\n\t" + "movs r5, #32\n\t" + "mov r8, r5\n\t" + "movs r7, #0\n\t" + "\n" + "L_sp_256_cond_sub_8_words_%=:\n\t" + "ldr r6, [%[b], r7]\n\t" +#ifdef WOLFSSL_KEIL + "ands r6, r6, %[m]\n\t" +#elif defined(__clang__) + "ands r6, %[m]\n\t" +#else + "and r6, %[m]\n\t" +#endif + "movs r5, #0\n\t" +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif + "ldr r5, [%[a], r7]\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r5, r5, r6\n\t" +#elif defined(__clang__) + "sbcs r5, r6\n\t" +#else + "sbc r5, r6\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r4\n\t" +#elif defined(__clang__) + "sbcs r4, r4\n\t" +#else + "sbc r4, r4\n\t" +#endif + "str r5, [%[r], r7]\n\t" +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif + "cmp r7, r8\n\t" + "blt L_sp_256_cond_sub_8_words_%=\n\t" + "movs %[r], r4\n\t" + : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8" + ); + return (uint32_t)(size_t)r; +} + /* Map the Montgomery form projective coordinate point to an affine point. * * r Resulting affine coordinate point. @@ -100202,7 +100047,7 @@ SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, * a Number to divide. * m Modulus (prime). */ -SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_256_mont_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) { (void)m; @@ -100513,7 +100358,7 @@ static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, /* T2 = Y * Y */ sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); /* T2 = T2/2 */ - sp_256_div2_8(t2, t2, p256_mod); + sp_256_mont_div2_8(t2, t2, p256_mod); /* Y = Y * X */ sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod); /* X = T1 * T1 */ @@ -100546,7 +100391,8 @@ typedef struct sp_256_proj_point_dbl_8_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) +static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, + const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_256_proj_point_dbl_8_ctx* ctx = (sp_256_proj_point_dbl_8_ctx*)sp_ctx->data; @@ -100620,7 +100466,7 @@ static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con break; case 11: /* T2 = T2/2 */ - sp_256_div2_8(ctx->t2, ctx->t2, p256_mod); + sp_256_mont_div2_8(ctx->t2, ctx->t2, p256_mod); ctx->state = 12; break; case 12: @@ -101330,7 +101176,7 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, sp_256_mont_sub_8(y, y, t1, p256_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_256_div2_8(y, y, p256_mod); + sp_256_mont_div2_8(y, y, p256_mod); } /* Convert the projective point to affine. @@ -101808,8 +101654,8 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap); @@ -102228,8 +102074,8 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap); @@ -105430,8 +105276,8 @@ static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[16], t2[9]; sp_digit div, r1; @@ -106042,6 +105888,166 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "movs r6, %[a]\n\t" + "movs r3, #0\n\t" +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "adds r6, r6, #32\n\t" +#else + "add r6, r6, #32\n\t" +#endif + "\n" + "L_sp_256_sub_8_word_%=:\n\t" + "movs r5, #0\n\t" +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "subs r5, r5, r3\n\t" +#else + "sub r5, r5, r3\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r5\n\t" +#elif defined(__clang__) + "sbcs r4, r5\n\t" +#else + "sbc r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r3\n\t" +#elif defined(__clang__) + "sbcs r3, r3\n\t" +#else + "sbc r3, r3\n\t" +#endif +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "adds %[b], %[b], #4\n\t" +#else + "add %[b], %[b], #4\n\t" +#endif +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif + "cmp %[a], r6\n\t" + "bne L_sp_256_sub_8_word_%=\n\t" + "movs %[r], r3\n\t" + : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + return (uint32_t)(size_t)r; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "subs r3, r3, r5\n\t" +#else + "sub r3, r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs %[r], %[r], %[r]\n\t" +#elif defined(__clang__) + "sbcs %[r], %[r]\n\t" +#else + "sbc %[r], %[r]\n\t" +#endif + : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ /* Right shift a by 1 bit into r. (r = a >> 1) * * r A single precision integer. @@ -107266,7 +107272,7 @@ static int sp_256_mod_inv_8(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_256_cmp_8(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_256_cmp_8(u, v) >= 0))) { sp_256_sub_8(u, u, v); o = sp_256_sub_8(b, b, d); if (o != 0) @@ -107695,19 +107701,21 @@ static int sp_256_ecc_is_point_8(const sp_point_256* point, if (err == MP_OKAY) { t2 = t1 + 2 * 8; + /* y^2 - x^3 - a.x = b */ sp_256_sqr_8(t1, point->y); (void)sp_256_mod_8(t1, t1, p256_mod); sp_256_sqr_8(t2, point->x); (void)sp_256_mod_8(t2, t2, p256_mod); sp_256_mul_8(t2, t2, point->x); (void)sp_256_mod_8(t2, t2, p256_mod); - (void)sp_256_sub_8(t2, p256_mod, t2); - sp_256_mont_add_8(t1, t1, t2, p256_mod); + sp_256_mont_sub_8(t1, t1, t2, p256_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_256_mont_add_8(t1, t1, point->x, p256_mod); sp_256_mont_add_8(t1, t1, point->x, p256_mod); sp_256_mont_add_8(t1, t1, point->x, p256_mod); + if (sp_256_cmp_8(t1, p256_b) != 0) { err = MP_VAL; } @@ -109173,200 +109181,6 @@ SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, return (uint32_t)(size_t)r; } -#endif /* WOLFSSL_SP_SMALL */ -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - __asm__ __volatile__ ( - "movs r6, %[a]\n\t" - "movs r3, #0\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds r6, r6, #48\n\t" -#else - "add r6, r6, #48\n\t" -#endif - "\n" - "L_sp_384_sub_12_word_%=:\n\t" - "movs r5, #0\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "subs r5, r5, r3\n\t" -#else - "sub r5, r5, r3\n\t" -#endif - "ldr r4, [%[a]]\n\t" - "ldr r5, [%[b]]\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r5\n\t" -#elif defined(__clang__) - "sbcs r4, r5\n\t" -#else - "sbc r4, r5\n\t" -#endif - "str r4, [%[r]]\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r3\n\t" -#elif defined(__clang__) - "sbcs r3, r3\n\t" -#else - "sbc r3, r3\n\t" -#endif -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds %[a], %[a], #4\n\t" -#else - "add %[a], %[a], #4\n\t" -#endif -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds %[b], %[b], #4\n\t" -#else - "add %[b], %[b], #4\n\t" -#endif -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds %[r], %[r], #4\n\t" -#else - "add %[r], %[r], #4\n\t" -#endif - "cmp %[a], r6\n\t" - "bne L_sp_384_sub_12_word_%=\n\t" - "movs %[r], r3\n\t" - : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) - : - : "memory", "r3", "r4", "r5", "r6" - ); - return (uint32_t)(size_t)r; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - __asm__ __volatile__ ( - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "subs r3, r3, r5\n\t" -#else - "sub r3, r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs %[r], %[r], %[r]\n\t" -#elif defined(__clang__) - "sbcs %[r], %[r]\n\t" -#else - "sbc %[r], %[r]\n\t" -#endif - : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) - : - : "memory", "r3", "r4", "r5", "r6" - ); - return (uint32_t)(size_t)r; -} - #endif /* WOLFSSL_SP_SMALL */ /* Multiply a number by Montgomery normalizer mod modulus (prime). * @@ -110801,6 +110615,200 @@ SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, sp_384_cond_sub_12(r, r, m, 0 - o); } +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "movs r6, %[a]\n\t" + "movs r3, #0\n\t" +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "adds r6, r6, #48\n\t" +#else + "add r6, r6, #48\n\t" +#endif + "\n" + "L_sp_384_sub_12_word_%=:\n\t" + "movs r5, #0\n\t" +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "subs r5, r5, r3\n\t" +#else + "sub r5, r5, r3\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r5\n\t" +#elif defined(__clang__) + "sbcs r4, r5\n\t" +#else + "sbc r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r3\n\t" +#elif defined(__clang__) + "sbcs r3, r3\n\t" +#else + "sbc r3, r3\n\t" +#endif +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "adds %[b], %[b], #4\n\t" +#else + "add %[b], %[b], #4\n\t" +#endif +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif + "cmp %[a], r6\n\t" + "bne L_sp_384_sub_12_word_%=\n\t" + "movs %[r], r3\n\t" + : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + return (uint32_t)(size_t)r; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "subs r3, r3, r5\n\t" +#else + "sub r3, r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs %[r], %[r], %[r]\n\t" +#elif defined(__clang__) + "sbcs %[r], %[r]\n\t" +#else + "sbc %[r], %[r]\n\t" +#endif + : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -111122,7 +111130,7 @@ static void sp_384_rshift1_12(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -SP_NOINLINE static void sp_384_div2_12(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_384_mont_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o; @@ -111176,7 +111184,7 @@ static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, /* T2 = Y * Y */ sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); /* T2 = T2/2 */ - sp_384_div2_12(t2, t2, p384_mod); + sp_384_mont_div2_12(t2, t2, p384_mod); /* Y = Y * X */ sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod); /* X = T1 * T1 */ @@ -111209,7 +111217,8 @@ typedef struct sp_384_proj_point_dbl_12_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) +static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, + const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_384_proj_point_dbl_12_ctx* ctx = (sp_384_proj_point_dbl_12_ctx*)sp_ctx->data; @@ -111283,7 +111292,7 @@ static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, co break; case 11: /* T2 = T2/2 */ - sp_384_div2_12(ctx->t2, ctx->t2, p384_mod); + sp_384_mont_div2_12(ctx->t2, ctx->t2, p384_mod); ctx->state = 12; break; case 12: @@ -112019,7 +112028,7 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, sp_384_mont_sub_12(y, y, t1, p384_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_384_div2_12(y, y, p384_mod); + sp_384_mont_div2_12(y, y, p384_mod); } /* Convert the projective point to affine. @@ -112513,8 +112522,8 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap); @@ -112949,8 +112958,8 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap); @@ -116227,8 +116236,8 @@ static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[24], t2[13]; sp_digit div, r1; @@ -118455,7 +118464,7 @@ static int sp_384_mod_inv_12(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_384_cmp_12(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_384_cmp_12(u, v) >= 0))) { sp_384_sub_12(u, u, v); o = sp_384_sub_12(b, b, d); if (o != 0) @@ -118888,19 +118897,21 @@ static int sp_384_ecc_is_point_12(const sp_point_384* point, if (err == MP_OKAY) { t2 = t1 + 2 * 12; + /* y^2 - x^3 - a.x = b */ sp_384_sqr_12(t1, point->y); (void)sp_384_mod_12(t1, t1, p384_mod); sp_384_sqr_12(t2, point->x); (void)sp_384_mod_12(t2, t2, p384_mod); sp_384_mul_12(t2, t2, point->x); (void)sp_384_mod_12(t2, t2, p384_mod); - (void)sp_384_sub_12(t2, p384_mod, t2); - sp_384_mont_add_12(t1, t1, t2, p384_mod); + sp_384_mont_sub_12(t1, t1, t2, p384_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_384_mont_add_12(t1, t1, point->x, p384_mod); sp_384_mont_add_12(t1, t1, point->x, p384_mod); sp_384_mont_add_12(t1, t1, point->x, p384_mod); + if (sp_384_cmp_12(t1, p384_b) != 0) { err = MP_VAL; } @@ -120452,244 +120463,6 @@ SP_NOINLINE static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, return (uint32_t)(size_t)r; } -#endif /* WOLFSSL_SP_SMALL */ -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -SP_NOINLINE static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - __asm__ __volatile__ ( - "movs r6, %[a]\n\t" - "movs r3, #0\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds r6, r6, #0x44\n\t" -#else - "add r6, r6, #0x44\n\t" -#endif - "\n" - "L_sp_521_sub_17_word_%=:\n\t" - "movs r5, #0\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "subs r5, r5, r3\n\t" -#else - "sub r5, r5, r3\n\t" -#endif - "ldr r4, [%[a]]\n\t" - "ldr r5, [%[b]]\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r5\n\t" -#elif defined(__clang__) - "sbcs r4, r5\n\t" -#else - "sbc r4, r5\n\t" -#endif - "str r4, [%[r]]\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r3\n\t" -#elif defined(__clang__) - "sbcs r3, r3\n\t" -#else - "sbc r3, r3\n\t" -#endif -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds %[a], %[a], #4\n\t" -#else - "add %[a], %[a], #4\n\t" -#endif -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds %[b], %[b], #4\n\t" -#else - "add %[b], %[b], #4\n\t" -#endif -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds %[r], %[r], #4\n\t" -#else - "add %[r], %[r], #4\n\t" -#endif - "cmp %[a], r6\n\t" - "bne L_sp_521_sub_17_word_%=\n\t" - "movs %[r], r3\n\t" - : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) - : - : "memory", "r3", "r4", "r5", "r6" - ); - return (uint32_t)(size_t)r; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -SP_NOINLINE static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - __asm__ __volatile__ ( - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "subs r3, r3, r5\n\t" -#else - "sub r3, r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldr r5, [%[b]]\n\t" - "ldr r3, [%[a]]\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif - "str r3, [%[r]]\n\t" -#ifdef WOLFSSL_KEIL - "sbcs %[r], %[r], %[r]\n\t" -#elif defined(__clang__) - "sbcs %[r], %[r]\n\t" -#else - "sbc %[r], %[r]\n\t" -#endif - : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) - : - : "memory", "r3", "r4", "r5", "r6" - ); - return (uint32_t)(size_t)r; -} - #endif /* WOLFSSL_SP_SMALL */ /* Multiply a number by Montgomery normalizer mod modulus (prime). * @@ -124203,75 +123976,6 @@ SP_NOINLINE static void sp_521_mont_tpl_17(sp_digit* r, const sp_digit* a, ); } -/* Conditionally add a and b using the mask m. - * m is -1 to add and 0 when not. - * - * r A single precision number representing conditional add result. - * a A single precision number to add with. - * b A single precision number to add. - * m Mask value to apply. - */ -SP_NOINLINE static sp_digit sp_521_cond_add_17(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) -{ - __asm__ __volatile__ ( - "movs r4, #0\n\t" - "movs r5, #0x44\n\t" - "mov r8, r5\n\t" - "movs r7, #0\n\t" - "\n" - "L_sp_521_cond_add_17_words_%=:\n\t" - "ldr r6, [%[b], r7]\n\t" -#ifdef WOLFSSL_KEIL - "ands r6, r6, %[m]\n\t" -#elif defined(__clang__) - "ands r6, %[m]\n\t" -#else - "and r6, %[m]\n\t" -#endif - "movs r5, #0\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "subs r5, r5, #1\n\t" -#else - "sub r5, r5, #1\n\t" -#endif -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds r5, r5, r4\n\t" -#else - "add r5, r5, r4\n\t" -#endif - "ldr r5, [%[a], r7]\n\t" -#ifdef WOLFSSL_KEIL - "adcs r5, r5, r6\n\t" -#elif defined(__clang__) - "adcs r5, r6\n\t" -#else - "adc r5, r6\n\t" -#endif - "movs r4, #0\n\t" -#ifdef WOLFSSL_KEIL - "adcs r4, r4, r4\n\t" -#elif defined(__clang__) - "adcs r4, r4\n\t" -#else - "adc r4, r4\n\t" -#endif - "str r5, [%[r], r7]\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds r7, r7, #4\n\t" -#else - "add r7, r7, #4\n\t" -#endif - "cmp r7, r8\n\t" - "blt L_sp_521_cond_add_17_words_%=\n\t" - "movs %[r], r4\n\t" - : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) - : - : "memory", "r4", "r5", "r6", "r7", "r8" - ); - return (uint32_t)(size_t)r; -} - /* Subtract two Montgomery form numbers (r = a - b % m). * * r Result of subtration. @@ -124612,6 +124316,75 @@ SP_NOINLINE static void sp_521_mont_sub_17(sp_digit* r, const sp_digit* a, ); } +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_521_cond_add_17(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) +{ + __asm__ __volatile__ ( + "movs r4, #0\n\t" + "movs r5, #0x44\n\t" + "mov r8, r5\n\t" + "movs r7, #0\n\t" + "\n" + "L_sp_521_cond_add_17_words_%=:\n\t" + "ldr r6, [%[b], r7]\n\t" +#ifdef WOLFSSL_KEIL + "ands r6, r6, %[m]\n\t" +#elif defined(__clang__) + "ands r6, %[m]\n\t" +#else + "and r6, %[m]\n\t" +#endif + "movs r5, #0\n\t" +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "subs r5, r5, #1\n\t" +#else + "sub r5, r5, #1\n\t" +#endif +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "adds r5, r5, r4\n\t" +#else + "add r5, r5, r4\n\t" +#endif + "ldr r5, [%[a], r7]\n\t" +#ifdef WOLFSSL_KEIL + "adcs r5, r5, r6\n\t" +#elif defined(__clang__) + "adcs r5, r6\n\t" +#else + "adc r5, r6\n\t" +#endif + "movs r4, #0\n\t" +#ifdef WOLFSSL_KEIL + "adcs r4, r4, r4\n\t" +#elif defined(__clang__) + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif + "str r5, [%[r], r7]\n\t" +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif + "cmp r7, r8\n\t" + "blt L_sp_521_cond_add_17_words_%=\n\t" + "movs %[r], r4\n\t" + : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8" + ); + return (uint32_t)(size_t)r; +} + /* Right shift a by 1 bit into r. (r = a >> 1) * * r A single precision integer. @@ -124943,7 +124716,7 @@ static void sp_521_rshift1_17(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -SP_NOINLINE static void sp_521_div2_17(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_521_mont_div2_17(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o; @@ -124997,7 +124770,7 @@ static void sp_521_proj_point_dbl_17(sp_point_521* r, const sp_point_521* p, /* T2 = Y * Y */ sp_521_mont_sqr_17(t2, y, p521_mod, p521_mp_mod); /* T2 = T2/2 */ - sp_521_div2_17(t2, t2, p521_mod); + sp_521_mont_div2_17(t2, t2, p521_mod); /* Y = Y * X */ sp_521_mont_mul_17(y, y, p->x, p521_mod, p521_mp_mod); /* X = T1 * T1 */ @@ -125030,7 +124803,8 @@ typedef struct sp_521_proj_point_dbl_17_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) +static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_521_proj_point_dbl_17_ctx* ctx = (sp_521_proj_point_dbl_17_ctx*)sp_ctx->data; @@ -125104,7 +124878,7 @@ static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, co break; case 11: /* T2 = T2/2 */ - sp_521_div2_17(ctx->t2, ctx->t2, p521_mod); + sp_521_mont_div2_17(ctx->t2, ctx->t2, p521_mod); ctx->state = 12; break; case 12: @@ -125877,7 +125651,7 @@ static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, sp_521_mont_sub_17(y, y, t1, p521_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_521_div2_17(y, y, p521_mod); + sp_521_mont_div2_17(y, y, p521_mod); } /* Convert the projective point to affine. @@ -126391,8 +126165,8 @@ static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_521_ecc_mulmod_17(sp_point_521* r, const sp_point_521* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_521_ecc_mulmod_17(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_521_ecc_mulmod_fast_17(r, g, k, map, ct, heap); @@ -126847,8 +126621,8 @@ static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_521_ecc_mulmod_17(sp_point_521* r, const sp_point_521* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_521_ecc_mulmod_17(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_521_ecc_mulmod_fast_17(r, g, k, map, ct, heap); @@ -132400,8 +132174,8 @@ static void sp_521_mask_17(sp_digit* r, const sp_digit* a, sp_digit m) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_521_div_17(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_521_div_17(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[35]; sp_digit t2[18]; @@ -133008,6 +132782,244 @@ int sp_ecc_sign_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "movs r6, %[a]\n\t" + "movs r3, #0\n\t" +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "adds r6, r6, #0x44\n\t" +#else + "add r6, r6, #0x44\n\t" +#endif + "\n" + "L_sp_521_sub_17_word_%=:\n\t" + "movs r5, #0\n\t" +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "subs r5, r5, r3\n\t" +#else + "sub r5, r5, r3\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r5\n\t" +#elif defined(__clang__) + "sbcs r4, r5\n\t" +#else + "sbc r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r3\n\t" +#elif defined(__clang__) + "sbcs r3, r3\n\t" +#else + "sbc r3, r3\n\t" +#endif +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "adds %[b], %[b], #4\n\t" +#else + "add %[b], %[b], #4\n\t" +#endif +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif + "cmp %[a], r6\n\t" + "bne L_sp_521_sub_17_word_%=\n\t" + "movs %[r], r3\n\t" + : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + return (uint32_t)(size_t)r; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +SP_NOINLINE static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, + const sp_digit* b) +{ + __asm__ __volatile__ ( + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#if defined(__clang__) || defined(WOLFSSL_KEIL) + "subs r3, r3, r5\n\t" +#else + "sub r3, r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldm %[b]!, {r5, r6}\n\t" + "ldm %[a]!, {r3, r4}\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif +#ifdef WOLFSSL_KEIL + "sbcs r4, r4, r6\n\t" +#elif defined(__clang__) + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif + "stm %[r]!, {r3, r4}\n\t" + "ldr r5, [%[b]]\n\t" + "ldr r3, [%[a]]\n\t" +#ifdef WOLFSSL_KEIL + "sbcs r3, r3, r5\n\t" +#elif defined(__clang__) + "sbcs r3, r5\n\t" +#else + "sbc r3, r5\n\t" +#endif + "str r3, [%[r]]\n\t" +#ifdef WOLFSSL_KEIL + "sbcs %[r], %[r], %[r]\n\t" +#elif defined(__clang__) + "sbcs %[r], %[r]\n\t" +#else + "sbc %[r], %[r]\n\t" +#endif + : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) + : + : "memory", "r3", "r4", "r5", "r6" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ /* Divide the number by 2 mod the modulus. (r = a / 2 % m) * * r Result of division by 2. @@ -135382,7 +135394,7 @@ static int sp_521_mod_inv_17(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_521_cmp_17(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_521_cmp_17(u, v) >= 0))) { sp_521_sub_17(u, u, v); o = sp_521_sub_17(b, b, d); if (o != 0) @@ -135827,19 +135839,21 @@ static int sp_521_ecc_is_point_17(const sp_point_521* point, if (err == MP_OKAY) { t2 = t1 + 2 * 17; + /* y^2 - x^3 - a.x = b */ sp_521_sqr_17(t1, point->y); (void)sp_521_mod_17(t1, t1, p521_mod); sp_521_sqr_17(t2, point->x); (void)sp_521_mod_17(t2, t2, p521_mod); sp_521_mul_17(t2, t2, point->x); (void)sp_521_mod_17(t2, t2, p521_mod); - (void)sp_521_sub_17(t2, p521_mod, t2); - sp_521_mont_add_17(t1, t1, t2, p521_mod); + sp_521_mont_sub_17(t1, t1, t2, p521_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_521_mont_add_17(t1, t1, point->x, p521_mod); sp_521_mont_add_17(t1, t1, point->x, p521_mod); sp_521_mont_add_17(t1, t1, point->x, p521_mod); + if (sp_521_cmp_17(t1, p521_b) != 0) { err = MP_VAL; } @@ -202364,8 +202378,8 @@ SP_NOINLINE static sp_int32 sp_1024_cmp_32(const sp_digit* a, const sp_digit* b) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_1024_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_1024_div_32(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[64], t2[33]; sp_digit div, r1; @@ -202685,6 +202699,7 @@ static int sp_1024_point_to_ecc_point_32(const sp_point_1024* p, ecc_point* pm) return err; } +#define sp_1024_mont_reduce_order_32 sp_1024_mont_reduce_32 /* Reduce the number back to 1024 bits using Montgomery reduction. * * a A single precision number to reduce in place. @@ -209584,7 +209599,7 @@ static void sp_1024_rshift1_32(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -SP_NOINLINE static void sp_1024_div2_32(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_1024_mont_div2_32(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o; @@ -209638,7 +209653,7 @@ static void sp_1024_proj_point_dbl_32(sp_point_1024* r, const sp_point_1024* p, /* T2 = Y * Y */ sp_1024_mont_sqr_32(t2, y, p1024_mod, p1024_mp_mod); /* T2 = T2/2 */ - sp_1024_div2_32(t2, t2, p1024_mod); + sp_1024_mont_div2_32(t2, t2, p1024_mod); /* Y = Y * X */ sp_1024_mont_mul_32(y, y, p->x, p1024_mod, p1024_mp_mod); /* X = T1 * T1 */ @@ -209671,7 +209686,8 @@ typedef struct sp_1024_proj_point_dbl_32_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) +static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, + const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_1024_proj_point_dbl_32_ctx* ctx = (sp_1024_proj_point_dbl_32_ctx*)sp_ctx->data; @@ -209745,7 +209761,7 @@ static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, break; case 11: /* T2 = T2/2 */ - sp_1024_div2_32(ctx->t2, ctx->t2, p1024_mod); + sp_1024_mont_div2_32(ctx->t2, ctx->t2, p1024_mod); ctx->state = 12; break; case 12: @@ -209795,370 +209811,6 @@ static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -SP_NOINLINE static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - __asm__ __volatile__ ( - "movs r6, %[a]\n\t" - "movs r3, #0\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds r6, r6, #0x80\n\t" -#else - "add r6, r6, #0x80\n\t" -#endif - "\n" - "L_sp_1024_sub_32_word_%=:\n\t" - "movs r5, #0\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "subs r5, r5, r3\n\t" -#else - "sub r5, r5, r3\n\t" -#endif - "ldr r4, [%[a]]\n\t" - "ldr r5, [%[b]]\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r5\n\t" -#elif defined(__clang__) - "sbcs r4, r5\n\t" -#else - "sbc r4, r5\n\t" -#endif - "str r4, [%[r]]\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r3\n\t" -#elif defined(__clang__) - "sbcs r3, r3\n\t" -#else - "sbc r3, r3\n\t" -#endif -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds %[a], %[a], #4\n\t" -#else - "add %[a], %[a], #4\n\t" -#endif -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds %[b], %[b], #4\n\t" -#else - "add %[b], %[b], #4\n\t" -#endif -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "adds %[r], %[r], #4\n\t" -#else - "add %[r], %[r], #4\n\t" -#endif - "cmp %[a], r6\n\t" - "bne L_sp_1024_sub_32_word_%=\n\t" - "movs %[r], r3\n\t" - : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) - : - : "memory", "r3", "r4", "r5", "r6" - ); - return (uint32_t)(size_t)r; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -SP_NOINLINE static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - __asm__ __volatile__ ( - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#if defined(__clang__) || defined(WOLFSSL_KEIL) - "subs r3, r3, r5\n\t" -#else - "sub r3, r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" - "ldm %[b]!, {r5, r6}\n\t" - "ldm %[a]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs r3, r3, r5\n\t" -#elif defined(__clang__) - "sbcs r3, r5\n\t" -#else - "sbc r3, r5\n\t" -#endif -#ifdef WOLFSSL_KEIL - "sbcs r4, r4, r6\n\t" -#elif defined(__clang__) - "sbcs r4, r6\n\t" -#else - "sbc r4, r6\n\t" -#endif - "stm %[r]!, {r3, r4}\n\t" -#ifdef WOLFSSL_KEIL - "sbcs %[r], %[r], %[r]\n\t" -#elif defined(__clang__) - "sbcs %[r], %[r]\n\t" -#else - "sbc %[r], %[r]\n\t" -#endif - : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) - : - : "memory", "r3", "r4", "r5", "r6" - ); - return (uint32_t)(size_t)r; -} - -#endif /* WOLFSSL_SP_SMALL */ /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -210718,7 +210370,7 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, sp_1024_mont_sub_32(y, y, t1, p1024_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_1024_div2_32(y, y, p1024_mod); + sp_1024_mont_div2_32(y, y, p1024_mod); } /* Convert the projective point to affine. @@ -211131,8 +210783,8 @@ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cach * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_fast_32(r, g, k, map, ct, heap); @@ -211486,8 +211138,8 @@ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cach * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_fast_32(r, g, k, map, ct, heap); @@ -217641,7 +217293,7 @@ static void sp_1024_accumulate_line_dbl_32(sp_digit* vx, sp_digit* vy, /* ty = 4 * p.y ^ 2 */ sp_1024_mont_sqr_32(ty, ry, p1024_mod, p1024_mp_mod); /* t1 = 2 * p.y ^ 2 */ - sp_1024_div2_32(t1, ty, p1024_mod); + sp_1024_mont_div2_32(t1, ty, p1024_mod); /* r.x -= 2 * (p.y ^ 2) */ sp_1024_mont_sub_32(rx, rx, t1, p1024_mod); /* p'.z = p.y * 2 * p.z */ @@ -217661,7 +217313,7 @@ static void sp_1024_accumulate_line_dbl_32(sp_digit* vx, sp_digit* vy, /* t1 = (4 * p.y^2) ^ 2 = 16 * p.y^4 */ sp_1024_mont_sqr_32(t1, ty, p1024_mod, p1024_mp_mod); /* t1 = 16 * p.y^4 / 2 = 8 * p.y^4 */ - sp_1024_div2_32(t1, t1, p1024_mod); + sp_1024_mont_div2_32(t1, t1, p1024_mod); /* p'.y = 4 * p.y^2 * p.x */ sp_1024_mont_mul_32(p->y, ty, p->x, p1024_mod, p1024_mp_mod); /* p'.x = l^2 */ @@ -218079,7 +217731,7 @@ static void sp_1024_accumulate_line_dbl_n_32(sp_digit* vx, sp_digit* vy, /* ty = py ^ 2 */ sp_1024_mont_sqr_32(ty, p->y, p1024_mod, p1024_mp_mod); /* t1 = py ^ 2 / 2 */ - sp_1024_div2_32(t1, ty, p1024_mod); + sp_1024_mont_div2_32(t1, ty, p1024_mod); /* r.x -= py ^ 2 / 2 */ sp_1024_mont_sub_32(rx, rx, t1, p1024_mod); /* p'.z = py * pz */ @@ -218117,7 +217769,7 @@ static void sp_1024_accumulate_line_dbl_n_32(sp_digit* vx, sp_digit* vy, } /* p'.y = py' / 2 */ - sp_1024_div2_32(p->y, p->y, p1024_mod); + sp_1024_mont_div2_32(p->y, p->y, p1024_mod); } /* Operations to perform based on order - 1. @@ -218957,19 +218609,21 @@ static int sp_1024_ecc_is_point_32(const sp_point_1024* point, if (err == MP_OKAY) { t2 = t1 + 2 * 32; + /* y^2 - x^3 - a.x = b */ sp_1024_sqr_32(t1, point->y); (void)sp_1024_mod_32(t1, t1, p1024_mod); sp_1024_sqr_32(t2, point->x); (void)sp_1024_mod_32(t2, t2, p1024_mod); sp_1024_mul_32(t2, t2, point->x); (void)sp_1024_mod_32(t2, t2, p1024_mod); - (void)sp_1024_sub_32(t2, p1024_mod, t2); - sp_1024_mont_add_32(t1, t1, t2, p1024_mod); + sp_1024_mont_sub_32(t1, t1, t2, p1024_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_1024_mont_add_32(t1, t1, point->x, p1024_mod); sp_1024_mont_add_32(t1, t1, point->x, p1024_mod); sp_1024_mont_add_32(t1, t1, point->x, p1024_mod); + n = sp_1024_cmp_32(t1, p1024_mod); sp_1024_cond_sub_32(t1, t1, p1024_mod, ~(n >> 31)); sp_1024_norm_32(t1); diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index dc5c3385d..2f011818f 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -87,11 +87,14 @@ #define SP_PRINT_INT(var, name) \ fprintf(stderr, name "=%d\n", var) -#if (((!defined(WC_NO_CACHE_RESISTANT) && \ - (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH))) || \ - (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP))) && \ +#if ((defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && \ + ((!defined(WC_NO_CACHE_RESISTANT) && \ + (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH))) || \ + (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP))) && \ !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || (defined(WOLFSSL_SP_SMALL) && \ - defined(WOLFSSL_HAVE_SP_ECC)) + defined(WOLFSSL_HAVE_SP_ECC) && (!defined(WOLFSSL_SP_NO_256) || \ + defined(WOLFSSL_SP_384) || defined(WOLFSSL_SP_521) || \ + defined(WOLFSSL_SP_1024))) /* Mask for address to obfuscate which of the two address will be used. */ static const size_t addr_mask[2] = { 0, (size_t)-1 }; #endif @@ -21259,7 +21262,8 @@ SP_NOINLINE static void sp_256_rshift1_9(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_256_div2_9(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_256_mont_div2_9(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_256_cond_add_9(r, a, m, 0 - (a[0] & 1)); sp_256_norm_9(r); @@ -21310,7 +21314,7 @@ static void sp_256_proj_point_dbl_9(sp_point_256* r, const sp_point_256* p, /* T2 = Y * Y */ sp_256_mont_sqr_9(t2, y, p256_mod, p256_mp_mod); /* T2 = T2/2 */ - sp_256_div2_9(t2, t2, p256_mod); + sp_256_mont_div2_9(t2, t2, p256_mod); /* Y = Y * X */ sp_256_mont_mul_9(y, y, p->x, p256_mod, p256_mp_mod); /* X = T1 * T1 */ @@ -21343,7 +21347,8 @@ typedef struct sp_256_proj_point_dbl_9_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_256_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) +static int sp_256_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, + const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_256_proj_point_dbl_9_ctx* ctx = (sp_256_proj_point_dbl_9_ctx*)sp_ctx->data; @@ -21417,7 +21422,7 @@ static int sp_256_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con break; case 11: /* T2 = T2/2 */ - sp_256_div2_9(ctx->t2, ctx->t2, p256_mod); + sp_256_mont_div2_9(ctx->t2, ctx->t2, p256_mod); ctx->state = 12; break; case 12: @@ -22277,7 +22282,7 @@ static void sp_256_proj_point_dbl_n_9(sp_point_256* p, int i, sp_256_mont_sub_9(y, y, t1, p256_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_256_div2_9(y, y, p256_mod); + sp_256_mont_div2_9(y, y, p256_mod); } /* Double the Montgomery form projective point p a number of times. @@ -22348,7 +22353,7 @@ static void sp_256_proj_point_dbl_n_store_9(sp_point_256* r, sp_256_mont_mul_9(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_9(y, y, t1, p256_mod); /* Y = Y/2 */ - sp_256_div2_9(r[j].y, y, p256_mod); + sp_256_mont_div2_9(r[j].y, y, p256_mod); r[j].infinity = 0; } } @@ -23191,8 +23196,8 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_9(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_9(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_win_add_sub_9(r, g, k, map, ct, heap); @@ -26116,8 +26121,8 @@ static int sp_256_mod_inv_9(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && - sp_256_cmp_9(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && + (sp_256_cmp_9(u, v) >= 0))) { sp_256_sub_9(u, u, v); sp_256_norm_9(u); @@ -26563,19 +26568,21 @@ static int sp_256_ecc_is_point_9(const sp_point_256* point, if (err == MP_OKAY) { t2 = t1 + 2 * 9; + /* y^2 - x^3 - a.x = b */ sp_256_sqr_9(t1, point->y); (void)sp_256_mod_9(t1, t1, p256_mod); sp_256_sqr_9(t2, point->x); (void)sp_256_mod_9(t2, t2, p256_mod); sp_256_mul_9(t2, t2, point->x); (void)sp_256_mod_9(t2, t2, p256_mod); - (void)sp_256_sub_9(t2, p256_mod, t2); - sp_256_mont_add_9(t1, t1, t2, p256_mod); + sp_256_mont_sub_9(t1, t1, t2, p256_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_256_mont_add_9(t1, t1, point->x, p256_mod); sp_256_mont_add_9(t1, t1, point->x, p256_mod); sp_256_mont_add_9(t1, t1, point->x, p256_mod); + if (sp_256_cmp_9(t1, p256_b) != 0) { err = MP_VAL; } @@ -28691,7 +28698,8 @@ SP_NOINLINE static void sp_384_rshift1_15(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_384_div2_15(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_384_mont_div2_15(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_384_cond_add_15(r, a, m, 0 - (a[0] & 1)); sp_384_norm_15(r); @@ -28742,7 +28750,7 @@ static void sp_384_proj_point_dbl_15(sp_point_384* r, const sp_point_384* p, /* T2 = Y * Y */ sp_384_mont_sqr_15(t2, y, p384_mod, p384_mp_mod); /* T2 = T2/2 */ - sp_384_div2_15(t2, t2, p384_mod); + sp_384_mont_div2_15(t2, t2, p384_mod); /* Y = Y * X */ sp_384_mont_mul_15(y, y, p->x, p384_mod, p384_mp_mod); /* X = T1 * T1 */ @@ -28775,7 +28783,8 @@ typedef struct sp_384_proj_point_dbl_15_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_384_proj_point_dbl_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) +static int sp_384_proj_point_dbl_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, + const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_384_proj_point_dbl_15_ctx* ctx = (sp_384_proj_point_dbl_15_ctx*)sp_ctx->data; @@ -28849,7 +28858,7 @@ static int sp_384_proj_point_dbl_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, co break; case 11: /* T2 = T2/2 */ - sp_384_div2_15(ctx->t2, ctx->t2, p384_mod); + sp_384_mont_div2_15(ctx->t2, ctx->t2, p384_mod); ctx->state = 12; break; case 12: @@ -29767,7 +29776,7 @@ static void sp_384_proj_point_dbl_n_15(sp_point_384* p, int i, sp_384_mont_sub_15(y, y, t1, p384_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_384_div2_15(y, y, p384_mod); + sp_384_mont_div2_15(y, y, p384_mod); } /* Double the Montgomery form projective point p a number of times. @@ -29838,7 +29847,7 @@ static void sp_384_proj_point_dbl_n_store_15(sp_point_384* r, sp_384_mont_mul_15(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_15(y, y, t1, p384_mod); /* Y = Y/2 */ - sp_384_div2_15(r[j].y, y, p384_mod); + sp_384_mont_div2_15(r[j].y, y, p384_mod); r[j].infinity = 0; } } @@ -30741,8 +30750,8 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_win_add_sub_15(r, g, k, map, ct, heap); @@ -34187,8 +34196,8 @@ static int sp_384_mod_inv_15(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && - sp_384_cmp_15(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && + (sp_384_cmp_15(u, v) >= 0))) { sp_384_sub_15(u, u, v); sp_384_norm_15(u); @@ -34640,19 +34649,21 @@ static int sp_384_ecc_is_point_15(const sp_point_384* point, if (err == MP_OKAY) { t2 = t1 + 2 * 15; + /* y^2 - x^3 - a.x = b */ sp_384_sqr_15(t1, point->y); (void)sp_384_mod_15(t1, t1, p384_mod); sp_384_sqr_15(t2, point->x); (void)sp_384_mod_15(t2, t2, p384_mod); sp_384_mul_15(t2, t2, point->x); (void)sp_384_mod_15(t2, t2, p384_mod); - (void)sp_384_sub_15(t2, p384_mod, t2); - sp_384_mont_add_15(t1, t1, t2, p384_mod); + sp_384_mont_sub_15(t1, t1, t2, p384_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_384_mont_add_15(t1, t1, point->x, p384_mod); sp_384_mont_add_15(t1, t1, point->x, p384_mod); sp_384_mont_add_15(t1, t1, point->x, p384_mod); + if (sp_384_cmp_15(t1, p384_b) != 0) { err = MP_VAL; } @@ -36365,7 +36376,8 @@ SP_NOINLINE static void sp_521_rshift1_21(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_521_div2_21(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_521_mont_div2_21(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_521_cond_add_21(r, a, m, 0 - (a[0] & 1)); sp_521_norm_21(r); @@ -36416,7 +36428,7 @@ static void sp_521_proj_point_dbl_21(sp_point_521* r, const sp_point_521* p, /* T2 = Y * Y */ sp_521_mont_sqr_21(t2, y, p521_mod, p521_mp_mod); /* T2 = T2/2 */ - sp_521_div2_21(t2, t2, p521_mod); + sp_521_mont_div2_21(t2, t2, p521_mod); /* Y = Y * X */ sp_521_mont_mul_21(y, y, p->x, p521_mod, p521_mp_mod); /* X = T1 * T1 */ @@ -36449,7 +36461,8 @@ typedef struct sp_521_proj_point_dbl_21_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_521_proj_point_dbl_21_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) +static int sp_521_proj_point_dbl_21_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_521_proj_point_dbl_21_ctx* ctx = (sp_521_proj_point_dbl_21_ctx*)sp_ctx->data; @@ -36523,7 +36536,7 @@ static int sp_521_proj_point_dbl_21_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, co break; case 11: /* T2 = T2/2 */ - sp_521_div2_21(ctx->t2, ctx->t2, p521_mod); + sp_521_mont_div2_21(ctx->t2, ctx->t2, p521_mod); ctx->state = 12; break; case 12: @@ -37306,7 +37319,7 @@ static void sp_521_proj_point_dbl_n_21(sp_point_521* p, int i, sp_521_mont_sub_21(y, y, t1, p521_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_521_div2_21(y, y, p521_mod); + sp_521_mont_div2_21(y, y, p521_mod); } /* Double the Montgomery form projective point p a number of times. @@ -37377,7 +37390,7 @@ static void sp_521_proj_point_dbl_n_store_21(sp_point_521* r, sp_521_mont_mul_21(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_21(y, y, t1, p521_mod); /* Y = Y/2 */ - sp_521_div2_21(r[j].y, y, p521_mod); + sp_521_mont_div2_21(r[j].y, y, p521_mod); r[j].infinity = 0; } } @@ -38340,8 +38353,8 @@ static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_521_ecc_mulmod_21(sp_point_521* r, const sp_point_521* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_521_ecc_mulmod_21(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_521_ecc_mulmod_win_add_sub_21(r, g, k, map, ct, heap); @@ -42342,8 +42355,8 @@ static int sp_521_mod_inv_21(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && - sp_521_cmp_21(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && + (sp_521_cmp_21(u, v) >= 0))) { sp_521_sub_21(u, u, v); sp_521_norm_21(u); @@ -42810,19 +42823,21 @@ static int sp_521_ecc_is_point_21(const sp_point_521* point, if (err == MP_OKAY) { t2 = t1 + 2 * 21; + /* y^2 - x^3 - a.x = b */ sp_521_sqr_21(t1, point->y); (void)sp_521_mod_21(t1, t1, p521_mod); sp_521_sqr_21(t2, point->x); (void)sp_521_mod_21(t2, t2, p521_mod); sp_521_mul_21(t2, t2, point->x); (void)sp_521_mod_21(t2, t2, p521_mod); - (void)sp_521_sub_21(t2, p521_mod, t2); - sp_521_mont_add_21(t1, t1, t2, p521_mod); + sp_521_mont_sub_21(t1, t1, t2, p521_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_521_mont_add_21(t1, t1, point->x, p521_mod); sp_521_mont_add_21(t1, t1, point->x, p521_mod); sp_521_mont_add_21(t1, t1, point->x, p521_mod); + if (sp_521_cmp_21(t1, p521_b) != 0) { err = MP_VAL; } @@ -45161,7 +45176,8 @@ SP_NOINLINE static void sp_1024_rshift1_42(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_1024_div2_42(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_1024_mont_div2_42(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_1024_cond_add_42(r, a, m, 0 - (a[0] & 1)); sp_1024_norm_42(r); @@ -45212,7 +45228,7 @@ static void sp_1024_proj_point_dbl_42(sp_point_1024* r, const sp_point_1024* p, /* T2 = Y * Y */ sp_1024_mont_sqr_42(t2, y, p1024_mod, p1024_mp_mod); /* T2 = T2/2 */ - sp_1024_div2_42(t2, t2, p1024_mod); + sp_1024_mont_div2_42(t2, t2, p1024_mod); /* Y = Y * X */ sp_1024_mont_mul_42(y, y, p->x, p1024_mod, p1024_mp_mod); /* X = T1 * T1 */ @@ -45245,7 +45261,8 @@ typedef struct sp_1024_proj_point_dbl_42_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_1024_proj_point_dbl_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) +static int sp_1024_proj_point_dbl_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, + const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_1024_proj_point_dbl_42_ctx* ctx = (sp_1024_proj_point_dbl_42_ctx*)sp_ctx->data; @@ -45319,7 +45336,7 @@ static int sp_1024_proj_point_dbl_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, break; case 11: /* T2 = T2/2 */ - sp_1024_div2_42(ctx->t2, ctx->t2, p1024_mod); + sp_1024_mont_div2_42(ctx->t2, ctx->t2, p1024_mod); ctx->state = 12; break; case 12: @@ -46136,7 +46153,7 @@ static void sp_1024_proj_point_dbl_n_42(sp_point_1024* p, int i, sp_1024_mont_sub_42(y, y, t1, p1024_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_1024_div2_42(y, y, p1024_mod); + sp_1024_mont_div2_42(y, y, p1024_mod); } /* Double the Montgomery form projective point p a number of times. @@ -46207,7 +46224,7 @@ static void sp_1024_proj_point_dbl_n_store_42(sp_point_1024* r, sp_1024_mont_mul_42(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_42(y, y, t1, p1024_mod); /* Y = Y/2 */ - sp_1024_div2_42(r[j].y, y, p1024_mod); + sp_1024_mont_div2_42(r[j].y, y, p1024_mod); r[j].infinity = 0; } } @@ -46921,8 +46938,8 @@ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cach * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_42(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_42(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_win_add_sub_42(r, g, k, map, ct, heap); @@ -53383,7 +53400,7 @@ static void sp_1024_accumulate_line_dbl_42(sp_digit* vx, sp_digit* vy, /* ty = 4 * p.y ^ 2 */ sp_1024_mont_sqr_42(ty, ry, p1024_mod, p1024_mp_mod); /* t1 = 2 * p.y ^ 2 */ - sp_1024_div2_42(t1, ty, p1024_mod); + sp_1024_mont_div2_42(t1, ty, p1024_mod); /* r.x -= 2 * (p.y ^ 2) */ sp_1024_mont_sub_42(rx, rx, t1, p1024_mod); /* p'.z = p.y * 2 * p.z */ @@ -53403,7 +53420,7 @@ static void sp_1024_accumulate_line_dbl_42(sp_digit* vx, sp_digit* vy, /* t1 = (4 * p.y^2) ^ 2 = 16 * p.y^4 */ sp_1024_mont_sqr_42(t1, ty, p1024_mod, p1024_mp_mod); /* t1 = 16 * p.y^4 / 2 = 8 * p.y^4 */ - sp_1024_div2_42(t1, t1, p1024_mod); + sp_1024_mont_div2_42(t1, t1, p1024_mod); /* p'.y = 4 * p.y^2 * p.x */ sp_1024_mont_mul_42(p->y, ty, p->x, p1024_mod, p1024_mp_mod); /* p'.x = l^2 */ @@ -53821,7 +53838,7 @@ static void sp_1024_accumulate_line_dbl_n_42(sp_digit* vx, sp_digit* vy, /* ty = py ^ 2 */ sp_1024_mont_sqr_42(ty, p->y, p1024_mod, p1024_mp_mod); /* t1 = py ^ 2 / 2 */ - sp_1024_div2_42(t1, ty, p1024_mod); + sp_1024_mont_div2_42(t1, ty, p1024_mod); /* r.x -= py ^ 2 / 2 */ sp_1024_mont_sub_42(rx, rx, t1, p1024_mod); /* p'.z = py * pz */ @@ -53859,7 +53876,7 @@ static void sp_1024_accumulate_line_dbl_n_42(sp_digit* vx, sp_digit* vy, } /* p'.y = py' / 2 */ - sp_1024_div2_42(p->y, p->y, p1024_mod); + sp_1024_mont_div2_42(p->y, p->y, p1024_mod); } /* Operations to perform based on order - 1. @@ -54696,19 +54713,21 @@ static int sp_1024_ecc_is_point_42(const sp_point_1024* point, if (err == MP_OKAY) { t2 = t1 + 2 * 42; + /* y^2 - x^3 - a.x = b */ sp_1024_sqr_42(t1, point->y); (void)sp_1024_mod_42(t1, t1, p1024_mod); sp_1024_sqr_42(t2, point->x); (void)sp_1024_mod_42(t2, t2, p1024_mod); sp_1024_mul_42(t2, t2, point->x); (void)sp_1024_mod_42(t2, t2, p1024_mod); - (void)sp_1024_sub_42(t2, p1024_mod, t2); - sp_1024_mont_add_42(t1, t1, t2, p1024_mod); + sp_1024_mont_sub_42(t1, t1, t2, p1024_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_1024_mont_add_42(t1, t1, point->x, p1024_mod); sp_1024_mont_add_42(t1, t1, point->x, p1024_mod); sp_1024_mont_add_42(t1, t1, point->x, p1024_mod); + n = sp_1024_cmp_42(t1, p1024_mod); sp_1024_cond_sub_42(t1, t1, p1024_mod, ~(n >> 24)); sp_1024_norm_42(t1); diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c index d9a55dbe6..38f47e645 100644 --- a/wolfcrypt/src/sp_c64.c +++ b/wolfcrypt/src/sp_c64.c @@ -87,11 +87,14 @@ #define SP_PRINT_INT(var, name) \ fprintf(stderr, name "=%d\n", var) -#if (((!defined(WC_NO_CACHE_RESISTANT) && \ - (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH))) || \ - (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP))) && \ +#if ((defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && \ + ((!defined(WC_NO_CACHE_RESISTANT) && \ + (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH))) || \ + (defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_FAST_MODEXP))) && \ !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || (defined(WOLFSSL_SP_SMALL) && \ - defined(WOLFSSL_HAVE_SP_ECC)) + defined(WOLFSSL_HAVE_SP_ECC) && (!defined(WOLFSSL_SP_NO_256) || \ + defined(WOLFSSL_SP_384) || defined(WOLFSSL_SP_521) || \ + defined(WOLFSSL_SP_1024))) /* Mask for address to obfuscate which of the two address will be used. */ static const size_t addr_mask[2] = { 0, (size_t)-1 }; #endif @@ -22231,7 +22234,8 @@ SP_NOINLINE static void sp_256_rshift1_5(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_256_div2_5(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_256_mont_div2_5(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_256_cond_add_5(r, a, m, 0 - (a[0] & 1)); sp_256_norm_5(r); @@ -22282,7 +22286,7 @@ static void sp_256_proj_point_dbl_5(sp_point_256* r, const sp_point_256* p, /* T2 = Y * Y */ sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod); /* T2 = T2/2 */ - sp_256_div2_5(t2, t2, p256_mod); + sp_256_mont_div2_5(t2, t2, p256_mod); /* Y = Y * X */ sp_256_mont_mul_5(y, y, p->x, p256_mod, p256_mp_mod); /* X = T1 * T1 */ @@ -22315,7 +22319,8 @@ typedef struct sp_256_proj_point_dbl_5_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_256_proj_point_dbl_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) +static int sp_256_proj_point_dbl_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, + const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_256_proj_point_dbl_5_ctx* ctx = (sp_256_proj_point_dbl_5_ctx*)sp_ctx->data; @@ -22389,7 +22394,7 @@ static int sp_256_proj_point_dbl_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con break; case 11: /* T2 = T2/2 */ - sp_256_div2_5(ctx->t2, ctx->t2, p256_mod); + sp_256_mont_div2_5(ctx->t2, ctx->t2, p256_mod); ctx->state = 12; break; case 12: @@ -23224,7 +23229,7 @@ static void sp_256_proj_point_dbl_n_5(sp_point_256* p, int i, sp_256_mont_sub_5(y, y, t1, p256_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_256_div2_5(y, y, p256_mod); + sp_256_mont_div2_5(y, y, p256_mod); } /* Double the Montgomery form projective point p a number of times. @@ -23295,7 +23300,7 @@ static void sp_256_proj_point_dbl_n_store_5(sp_point_256* r, sp_256_mont_mul_5(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_5(y, y, t1, p256_mod); /* Y = Y/2 */ - sp_256_div2_5(r[j].y, y, p256_mod); + sp_256_mont_div2_5(r[j].y, y, p256_mod); r[j].infinity = 0; } } @@ -24098,8 +24103,8 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_win_add_sub_5(r, g, k, map, ct, heap); @@ -27014,8 +27019,8 @@ static int sp_256_mod_inv_5(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && - sp_256_cmp_5(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && + (sp_256_cmp_5(u, v) >= 0))) { sp_256_sub_5(u, u, v); sp_256_norm_5(u); @@ -27457,19 +27462,21 @@ static int sp_256_ecc_is_point_5(const sp_point_256* point, if (err == MP_OKAY) { t2 = t1 + 2 * 5; + /* y^2 - x^3 - a.x = b */ sp_256_sqr_5(t1, point->y); (void)sp_256_mod_5(t1, t1, p256_mod); sp_256_sqr_5(t2, point->x); (void)sp_256_mod_5(t2, t2, p256_mod); sp_256_mul_5(t2, t2, point->x); (void)sp_256_mod_5(t2, t2, p256_mod); - (void)sp_256_sub_5(t2, p256_mod, t2); - sp_256_mont_add_5(t1, t1, t2, p256_mod); + sp_256_mont_sub_5(t1, t1, t2, p256_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_256_mont_add_5(t1, t1, point->x, p256_mod); sp_256_mont_add_5(t1, t1, point->x, p256_mod); sp_256_mont_add_5(t1, t1, point->x, p256_mod); + if (sp_256_cmp_5(t1, p256_b) != 0) { err = MP_VAL; } @@ -29151,7 +29158,8 @@ SP_NOINLINE static void sp_384_rshift1_7(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_384_div2_7(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_384_mont_div2_7(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_384_cond_add_7(r, a, m, 0 - (a[0] & 1)); sp_384_norm_7(r); @@ -29202,7 +29210,7 @@ static void sp_384_proj_point_dbl_7(sp_point_384* r, const sp_point_384* p, /* T2 = Y * Y */ sp_384_mont_sqr_7(t2, y, p384_mod, p384_mp_mod); /* T2 = T2/2 */ - sp_384_div2_7(t2, t2, p384_mod); + sp_384_mont_div2_7(t2, t2, p384_mod); /* Y = Y * X */ sp_384_mont_mul_7(y, y, p->x, p384_mod, p384_mp_mod); /* X = T1 * T1 */ @@ -29235,7 +29243,8 @@ typedef struct sp_384_proj_point_dbl_7_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_384_proj_point_dbl_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) +static int sp_384_proj_point_dbl_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, + const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_384_proj_point_dbl_7_ctx* ctx = (sp_384_proj_point_dbl_7_ctx*)sp_ctx->data; @@ -29309,7 +29318,7 @@ static int sp_384_proj_point_dbl_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, con break; case 11: /* T2 = T2/2 */ - sp_384_div2_7(ctx->t2, ctx->t2, p384_mod); + sp_384_mont_div2_7(ctx->t2, ctx->t2, p384_mod); ctx->state = 12; break; case 12: @@ -30181,7 +30190,7 @@ static void sp_384_proj_point_dbl_n_7(sp_point_384* p, int i, sp_384_mont_sub_7(y, y, t1, p384_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_384_div2_7(y, y, p384_mod); + sp_384_mont_div2_7(y, y, p384_mod); } /* Double the Montgomery form projective point p a number of times. @@ -30252,7 +30261,7 @@ static void sp_384_proj_point_dbl_n_store_7(sp_point_384* r, sp_384_mont_mul_7(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_7(y, y, t1, p384_mod); /* Y = Y/2 */ - sp_384_div2_7(r[j].y, y, p384_mod); + sp_384_mont_div2_7(r[j].y, y, p384_mod); r[j].infinity = 0; } } @@ -31075,8 +31084,8 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_win_add_sub_7(r, g, k, map, ct, heap); @@ -34476,8 +34485,8 @@ static int sp_384_mod_inv_7(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && - sp_384_cmp_7(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && + (sp_384_cmp_7(u, v) >= 0))) { sp_384_sub_7(u, u, v); sp_384_norm_7(u); @@ -34921,19 +34930,21 @@ static int sp_384_ecc_is_point_7(const sp_point_384* point, if (err == MP_OKAY) { t2 = t1 + 2 * 7; + /* y^2 - x^3 - a.x = b */ sp_384_sqr_7(t1, point->y); (void)sp_384_mod_7(t1, t1, p384_mod); sp_384_sqr_7(t2, point->x); (void)sp_384_mod_7(t2, t2, p384_mod); sp_384_mul_7(t2, t2, point->x); (void)sp_384_mod_7(t2, t2, p384_mod); - (void)sp_384_sub_7(t2, p384_mod, t2); - sp_384_mont_add_7(t1, t1, t2, p384_mod); + sp_384_mont_sub_7(t1, t1, t2, p384_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_384_mont_add_7(t1, t1, point->x, p384_mod); sp_384_mont_add_7(t1, t1, point->x, p384_mod); sp_384_mont_add_7(t1, t1, point->x, p384_mod); + if (sp_384_cmp_7(t1, p384_b) != 0) { err = MP_VAL; } @@ -36686,7 +36697,8 @@ SP_NOINLINE static void sp_521_rshift1_9(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_521_div2_9(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_521_mont_div2_9(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_521_cond_add_9(r, a, m, 0 - (a[0] & 1)); sp_521_norm_9(r); @@ -36737,7 +36749,7 @@ static void sp_521_proj_point_dbl_9(sp_point_521* r, const sp_point_521* p, /* T2 = Y * Y */ sp_521_mont_sqr_9(t2, y, p521_mod, p521_mp_mod); /* T2 = T2/2 */ - sp_521_div2_9(t2, t2, p521_mod); + sp_521_mont_div2_9(t2, t2, p521_mod); /* Y = Y * X */ sp_521_mont_mul_9(y, y, p->x, p521_mod, p521_mp_mod); /* X = T1 * T1 */ @@ -36770,7 +36782,8 @@ typedef struct sp_521_proj_point_dbl_9_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) +static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_521_proj_point_dbl_9_ctx* ctx = (sp_521_proj_point_dbl_9_ctx*)sp_ctx->data; @@ -36844,7 +36857,7 @@ static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, con break; case 11: /* T2 = T2/2 */ - sp_521_div2_9(ctx->t2, ctx->t2, p521_mod); + sp_521_mont_div2_9(ctx->t2, ctx->t2, p521_mod); ctx->state = 12; break; case 12: @@ -37598,7 +37611,7 @@ static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, sp_521_mont_sub_9(y, y, t1, p521_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_521_div2_9(y, y, p521_mod); + sp_521_mont_div2_9(y, y, p521_mod); } /* Double the Montgomery form projective point p a number of times. @@ -37669,7 +37682,7 @@ static void sp_521_proj_point_dbl_n_store_9(sp_point_521* r, sp_521_mont_mul_9(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_9(y, y, t1, p521_mod); /* Y = Y/2 */ - sp_521_div2_9(r[j].y, y, p521_mod); + sp_521_mont_div2_9(r[j].y, y, p521_mod); r[j].infinity = 0; } } @@ -38512,8 +38525,8 @@ static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_521_ecc_mulmod_9(sp_point_521* r, const sp_point_521* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_521_ecc_mulmod_9(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_521_ecc_mulmod_win_add_sub_9(r, g, k, map, ct, heap); @@ -41945,8 +41958,8 @@ static int sp_521_mod_inv_9(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && - sp_521_cmp_9(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && + (sp_521_cmp_9(u, v) >= 0))) { sp_521_sub_9(u, u, v); sp_521_norm_9(u); @@ -42401,19 +42414,21 @@ static int sp_521_ecc_is_point_9(const sp_point_521* point, if (err == MP_OKAY) { t2 = t1 + 2 * 9; + /* y^2 - x^3 - a.x = b */ sp_521_sqr_9(t1, point->y); (void)sp_521_mod_9(t1, t1, p521_mod); sp_521_sqr_9(t2, point->x); (void)sp_521_mod_9(t2, t2, p521_mod); sp_521_mul_9(t2, t2, point->x); (void)sp_521_mod_9(t2, t2, p521_mod); - (void)sp_521_sub_9(t2, p521_mod, t2); - sp_521_mont_add_9(t1, t1, t2, p521_mod); + sp_521_mont_sub_9(t1, t1, t2, p521_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_521_mont_add_9(t1, t1, point->x, p521_mod); sp_521_mont_add_9(t1, t1, point->x, p521_mod); sp_521_mont_add_9(t1, t1, point->x, p521_mod); + if (sp_521_cmp_9(t1, p521_b) != 0) { err = MP_VAL; } @@ -44574,7 +44589,8 @@ SP_NOINLINE static void sp_1024_rshift1_18(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -static void sp_1024_div2_18(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_1024_mont_div2_18(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_1024_cond_add_18(r, a, m, 0 - (a[0] & 1)); sp_1024_norm_18(r); @@ -44625,7 +44641,7 @@ static void sp_1024_proj_point_dbl_18(sp_point_1024* r, const sp_point_1024* p, /* T2 = Y * Y */ sp_1024_mont_sqr_18(t2, y, p1024_mod, p1024_mp_mod); /* T2 = T2/2 */ - sp_1024_div2_18(t2, t2, p1024_mod); + sp_1024_mont_div2_18(t2, t2, p1024_mod); /* Y = Y * X */ sp_1024_mont_mul_18(y, y, p->x, p1024_mod, p1024_mp_mod); /* X = T1 * T1 */ @@ -44658,7 +44674,8 @@ typedef struct sp_1024_proj_point_dbl_18_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_1024_proj_point_dbl_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) +static int sp_1024_proj_point_dbl_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, + const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_1024_proj_point_dbl_18_ctx* ctx = (sp_1024_proj_point_dbl_18_ctx*)sp_ctx->data; @@ -44732,7 +44749,7 @@ static int sp_1024_proj_point_dbl_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, break; case 11: /* T2 = T2/2 */ - sp_1024_div2_18(ctx->t2, ctx->t2, p1024_mod); + sp_1024_mont_div2_18(ctx->t2, ctx->t2, p1024_mod); ctx->state = 12; break; case 12: @@ -45490,7 +45507,7 @@ static void sp_1024_proj_point_dbl_n_18(sp_point_1024* p, int i, sp_1024_mont_sub_18(y, y, t1, p1024_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_1024_div2_18(y, y, p1024_mod); + sp_1024_mont_div2_18(y, y, p1024_mod); } /* Double the Montgomery form projective point p a number of times. @@ -45561,7 +45578,7 @@ static void sp_1024_proj_point_dbl_n_store_18(sp_point_1024* r, sp_1024_mont_mul_18(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_18(y, y, t1, p1024_mod); /* Y = Y/2 */ - sp_1024_div2_18(r[j].y, y, p1024_mod); + sp_1024_mont_div2_18(r[j].y, y, p1024_mod); r[j].infinity = 0; } } @@ -46275,8 +46292,8 @@ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cach * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_18(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_18(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_win_add_sub_18(r, g, k, map, ct, heap); @@ -51967,7 +51984,7 @@ static void sp_1024_accumulate_line_dbl_18(sp_digit* vx, sp_digit* vy, /* ty = 4 * p.y ^ 2 */ sp_1024_mont_sqr_18(ty, ry, p1024_mod, p1024_mp_mod); /* t1 = 2 * p.y ^ 2 */ - sp_1024_div2_18(t1, ty, p1024_mod); + sp_1024_mont_div2_18(t1, ty, p1024_mod); /* r.x -= 2 * (p.y ^ 2) */ sp_1024_mont_sub_18(rx, rx, t1, p1024_mod); /* p'.z = p.y * 2 * p.z */ @@ -51987,7 +52004,7 @@ static void sp_1024_accumulate_line_dbl_18(sp_digit* vx, sp_digit* vy, /* t1 = (4 * p.y^2) ^ 2 = 16 * p.y^4 */ sp_1024_mont_sqr_18(t1, ty, p1024_mod, p1024_mp_mod); /* t1 = 16 * p.y^4 / 2 = 8 * p.y^4 */ - sp_1024_div2_18(t1, t1, p1024_mod); + sp_1024_mont_div2_18(t1, t1, p1024_mod); /* p'.y = 4 * p.y^2 * p.x */ sp_1024_mont_mul_18(p->y, ty, p->x, p1024_mod, p1024_mp_mod); /* p'.x = l^2 */ @@ -52405,7 +52422,7 @@ static void sp_1024_accumulate_line_dbl_n_18(sp_digit* vx, sp_digit* vy, /* ty = py ^ 2 */ sp_1024_mont_sqr_18(ty, p->y, p1024_mod, p1024_mp_mod); /* t1 = py ^ 2 / 2 */ - sp_1024_div2_18(t1, ty, p1024_mod); + sp_1024_mont_div2_18(t1, ty, p1024_mod); /* r.x -= py ^ 2 / 2 */ sp_1024_mont_sub_18(rx, rx, t1, p1024_mod); /* p'.z = py * pz */ @@ -52443,7 +52460,7 @@ static void sp_1024_accumulate_line_dbl_n_18(sp_digit* vx, sp_digit* vy, } /* p'.y = py' / 2 */ - sp_1024_div2_18(p->y, p->y, p1024_mod); + sp_1024_mont_div2_18(p->y, p->y, p1024_mod); } /* Operations to perform based on order - 1. @@ -53280,19 +53297,21 @@ static int sp_1024_ecc_is_point_18(const sp_point_1024* point, if (err == MP_OKAY) { t2 = t1 + 2 * 18; + /* y^2 - x^3 - a.x = b */ sp_1024_sqr_18(t1, point->y); (void)sp_1024_mod_18(t1, t1, p1024_mod); sp_1024_sqr_18(t2, point->x); (void)sp_1024_mod_18(t2, t2, p1024_mod); sp_1024_mul_18(t2, t2, point->x); (void)sp_1024_mod_18(t2, t2, p1024_mod); - (void)sp_1024_sub_18(t2, p1024_mod, t2); - sp_1024_mont_add_18(t1, t1, t2, p1024_mod); + sp_1024_mont_sub_18(t1, t1, t2, p1024_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_1024_mont_add_18(t1, t1, point->x, p1024_mod); sp_1024_mont_add_18(t1, t1, point->x, p1024_mod); sp_1024_mont_add_18(t1, t1, point->x, p1024_mod); + n = sp_1024_cmp_18(t1, p1024_mod); sp_1024_cond_sub_18(t1, t1, p1024_mod, ~(n >> 56)); sp_1024_norm_18(t1); diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index f2103078f..95c7820d7 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -4544,8 +4544,8 @@ static sp_int32 sp_2048_cmp_32(const sp_digit* a_p, const sp_digit* b_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[64], t2[33]; sp_digit div, r1; @@ -6492,8 +6492,8 @@ static sp_digit div_2048_word_64(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[128], t2[65]; sp_digit div, r1; @@ -7345,8 +7345,8 @@ static sp_int32 sp_2048_cmp_64(const sp_digit* a_p, const sp_digit* b_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[128], t2[65]; sp_digit div, r1; @@ -15281,8 +15281,8 @@ static sp_int32 sp_3072_cmp_48(const sp_digit* a_p, const sp_digit* b_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[96], t2[49]; sp_digit div, r1; @@ -17813,8 +17813,8 @@ static sp_digit div_3072_word_96(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[192], t2[97]; sp_digit div, r1; @@ -19018,8 +19018,8 @@ static sp_int32 sp_3072_cmp_96(const sp_digit* a_p, const sp_digit* b_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[192], t2[97]; sp_digit div, r1; @@ -25379,8 +25379,8 @@ static sp_digit div_4096_word_128(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[256], t2[129]; sp_digit div, r1; @@ -26936,8 +26936,8 @@ static sp_int32 sp_4096_cmp_128(const sp_digit* a_p, const sp_digit* b_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[256], t2[129]; sp_digit div, r1; @@ -30214,84 +30214,6 @@ static sp_digit sp_256_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* return (uint32_t)(size_t)r; } -#endif /* WOLFSSL_SP_SMALL */ -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "MOV r11, #0x0\n\t" - "ADD r12, %[a], #0x20\n\t" - "\n" - "L_sp_256_sub_8_word_%=:\n\t" - "RSBS r11, r11, #0x0\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "SBC r11, r3, r3\n\t" - "CMP %[a], r12\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_256_sub_8_word_%=\n\t" -#else - "BNE.N L_sp_256_sub_8_word_%=\n\t" -#endif - "MOV %[r], r11\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" - ); - return (uint32_t)(size_t)r; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SUBS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "SBC %[r], r6, r6\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); - return (uint32_t)(size_t)r; -} - #endif /* WOLFSSL_SP_SMALL */ /* Multiply a number by Montgomery normalizer mod modulus (prime). * @@ -33226,7 +33148,7 @@ static void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit * a Number to divide. * m Modulus (prime). */ -static void sp_256_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) +static void sp_256_mont_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; @@ -33236,7 +33158,6 @@ static void sp_256_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_ "LDM %[a], {r4, r5, r6, r7}\n\t" "AND r3, r4, #0x1\n\t" "RSB r8, r3, #0x0\n\t" - "AND r9, r8, #0x1\n\t" "ADDS r4, r4, r8\n\t" "ADCS r5, r5, r8\n\t" "ADCS r6, r6, r8\n\t" @@ -33246,7 +33167,7 @@ static void sp_256_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_ "LDRD r6, r7, [%[a], #24]\n\t" "ADCS r4, r4, #0x0\n\t" "ADCS r5, r5, #0x0\n\t" - "ADCS r6, r6, r9\n\t" + "ADCS r6, r6, r8, LSR #31\n\t" "ADCS r7, r7, r8\n\t" "MOV r3, #0x0\n\t" "ADC r3, r3, #0x0\n\t" @@ -33254,10 +33175,10 @@ static void sp_256_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_ "LSR r9, r5, #1\n\t" "LSR r10, r6, #1\n\t" "LSR r11, r7, #1\n\t" - "ORR r8, r8, r5, lsl #31\n\t" - "ORR r9, r9, r6, lsl #31\n\t" - "ORR r10, r10, r7, lsl #31\n\t" - "ORR r11, r11, r3, lsl #31\n\t" + "ORR r8, r8, r5, LSL #31\n\t" + "ORR r9, r9, r6, LSL #31\n\t" + "ORR r10, r10, r7, LSL #31\n\t" + "ORR r11, r11, r3, LSL #31\n\t" "MOV r3, r4\n\t" "STRD r8, r9, [%[r], #16]\n\t" "STRD r10, r11, [%[r], #24]\n\t" @@ -33266,10 +33187,10 @@ static void sp_256_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_ "LSR r9, r5, #1\n\t" "LSR r10, r6, #1\n\t" "LSR r11, r7, #1\n\t" - "ORR r8, r8, r5, lsl #31\n\t" - "ORR r9, r9, r6, lsl #31\n\t" - "ORR r10, r10, r7, lsl #31\n\t" - "ORR r11, r11, r3, lsl #31\n\t" + "ORR r8, r8, r5, LSL #31\n\t" + "ORR r9, r9, r6, LSL #31\n\t" + "ORR r10, r10, r7, LSL #31\n\t" + "ORR r11, r11, r3, LSL #31\n\t" "STM %[r], {r8, r9, r10, r11}\n\t" : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : @@ -33321,7 +33242,7 @@ static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, /* T2 = Y * Y */ sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod); /* T2 = T2/2 */ - sp_256_div2_8(t2, t2, p256_mod); + sp_256_mont_div2_8(t2, t2, p256_mod); /* Y = Y * X */ sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod); /* X = T1 * T1 */ @@ -33354,7 +33275,8 @@ typedef struct sp_256_proj_point_dbl_8_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) +static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, + const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_256_proj_point_dbl_8_ctx* ctx = (sp_256_proj_point_dbl_8_ctx*)sp_ctx->data; @@ -33428,7 +33350,7 @@ static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con break; case 11: /* T2 = T2/2 */ - sp_256_div2_8(ctx->t2, ctx->t2, p256_mod); + sp_256_mont_div2_8(ctx->t2, ctx->t2, p256_mod); ctx->state = 12; break; case 12: @@ -34138,7 +34060,7 @@ static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int i, sp_256_mont_sub_8(y, y, t1, p256_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_256_div2_8(y, y, p256_mod); + sp_256_mont_div2_8(y, y, p256_mod); } /* Convert the projective point to affine. @@ -34616,8 +34538,8 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap); @@ -35036,8 +34958,8 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap); @@ -37519,8 +37441,8 @@ static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[16], t2[9]; sp_digit div, r1; @@ -38129,6 +38051,84 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "MOV r11, #0x0\n\t" + "ADD r12, %[a], #0x20\n\t" + "\n" + "L_sp_256_sub_8_word_%=:\n\t" + "RSBS r11, r11, #0x0\n\t" + "LDM %[a]!, {r3, r4, r5, r6}\n\t" + "LDM %[b]!, {r7, r8, r9, r10}\n\t" + "SBCS r3, r3, r7\n\t" + "SBCS r4, r4, r8\n\t" + "SBCS r5, r5, r9\n\t" + "SBCS r6, r6, r10\n\t" + "STM %[r]!, {r3, r4, r5, r6}\n\t" + "SBC r11, r3, r3\n\t" + "CMP %[a], r12\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BNE L_sp_256_sub_8_word_%=\n\t" +#else + "BNE.N L_sp_256_sub_8_word_%=\n\t" +#endif + "MOV %[r], r11\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + return (uint32_t)(size_t)r; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_256_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "LDM %[a]!, {r3, r4, r5, r6}\n\t" + "LDM %[b]!, {r7, r8, r9, r10}\n\t" + "SUBS r3, r3, r7\n\t" + "SBCS r4, r4, r8\n\t" + "SBCS r5, r5, r9\n\t" + "SBCS r6, r6, r10\n\t" + "STM %[r]!, {r3, r4, r5, r6}\n\t" + "LDM %[a]!, {r3, r4, r5, r6}\n\t" + "LDM %[b]!, {r7, r8, r9, r10}\n\t" + "SBCS r3, r3, r7\n\t" + "SBCS r4, r4, r8\n\t" + "SBCS r5, r5, r9\n\t" + "SBCS r6, r6, r10\n\t" + "STM %[r]!, {r3, r4, r5, r6}\n\t" + "SBC %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ static void sp_256_rshift1_8(sp_digit* r_p, const sp_digit* a_p) { register sp_digit* r asm ("r0") = (sp_digit*)r_p; @@ -38390,7 +38390,7 @@ static int sp_256_mod_inv_8(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_256_cmp_8(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_256_cmp_8(u, v) >= 0))) { sp_256_sub_8(u, u, v); o = sp_256_sub_8(b, b, d); if (o != 0) @@ -38819,19 +38819,21 @@ static int sp_256_ecc_is_point_8(const sp_point_256* point, if (err == MP_OKAY) { t2 = t1 + 2 * 8; + /* y^2 - x^3 - a.x = b */ sp_256_sqr_8(t1, point->y); (void)sp_256_mod_8(t1, t1, p256_mod); sp_256_sqr_8(t2, point->x); (void)sp_256_mod_8(t2, t2, p256_mod); sp_256_mul_8(t2, t2, point->x); (void)sp_256_mod_8(t2, t2, p256_mod); - (void)sp_256_sub_8(t2, p256_mod, t2); - sp_256_mont_add_8(t1, t1, t2, p256_mod); + sp_256_mont_sub_8(t1, t1, t2, p256_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_256_mont_add_8(t1, t1, point->x, p256_mod); sp_256_mont_add_8(t1, t1, point->x, p256_mod); sp_256_mont_add_8(t1, t1, point->x, p256_mod); + if (sp_256_cmp_8(t1, p256_b) != 0) { err = MP_VAL; } @@ -41412,91 +41414,6 @@ static sp_digit sp_384_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit return (uint32_t)(size_t)r; } -#endif /* WOLFSSL_SP_SMALL */ -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "MOV r11, #0x0\n\t" - "ADD r12, %[a], #0x30\n\t" - "\n" - "L_sp_384_sub_12_word_%=:\n\t" - "RSBS r11, r11, #0x0\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "SBC r11, r3, r3\n\t" - "CMP %[a], r12\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_384_sub_12_word_%=\n\t" -#else - "BNE.N L_sp_384_sub_12_word_%=\n\t" -#endif - "MOV %[r], r11\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" - ); - return (uint32_t)(size_t)r; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SUBS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "SBC %[r], r6, r6\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); - return (uint32_t)(size_t)r; -} - #endif /* WOLFSSL_SP_SMALL */ /* Multiply a number by Montgomery normalizer mod modulus (prime). * @@ -42583,6 +42500,91 @@ static void sp_384_mont_tpl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digi sp_384_cond_sub_12(r, r, m, 0 - o); } +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "MOV r11, #0x0\n\t" + "ADD r12, %[a], #0x30\n\t" + "\n" + "L_sp_384_sub_12_word_%=:\n\t" + "RSBS r11, r11, #0x0\n\t" + "LDM %[a]!, {r3, r4, r5, r6}\n\t" + "LDM %[b]!, {r7, r8, r9, r10}\n\t" + "SBCS r3, r3, r7\n\t" + "SBCS r4, r4, r8\n\t" + "SBCS r5, r5, r9\n\t" + "SBCS r6, r6, r10\n\t" + "STM %[r]!, {r3, r4, r5, r6}\n\t" + "SBC r11, r3, r3\n\t" + "CMP %[a], r12\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BNE L_sp_384_sub_12_word_%=\n\t" +#else + "BNE.N L_sp_384_sub_12_word_%=\n\t" +#endif + "MOV %[r], r11\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + return (uint32_t)(size_t)r; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_384_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "LDM %[a]!, {r3, r4, r5, r6}\n\t" + "LDM %[b]!, {r7, r8, r9, r10}\n\t" + "SUBS r3, r3, r7\n\t" + "SBCS r4, r4, r8\n\t" + "SBCS r5, r5, r9\n\t" + "SBCS r6, r6, r10\n\t" + "STM %[r]!, {r3, r4, r5, r6}\n\t" + "LDM %[a]!, {r3, r4, r5, r6}\n\t" + "LDM %[b]!, {r7, r8, r9, r10}\n\t" + "SBCS r3, r3, r7\n\t" + "SBCS r4, r4, r8\n\t" + "SBCS r5, r5, r9\n\t" + "SBCS r6, r6, r10\n\t" + "STM %[r]!, {r3, r4, r5, r6}\n\t" + "LDM %[a]!, {r3, r4, r5, r6}\n\t" + "LDM %[b]!, {r7, r8, r9, r10}\n\t" + "SBCS r3, r3, r7\n\t" + "SBCS r4, r4, r8\n\t" + "SBCS r5, r5, r9\n\t" + "SBCS r6, r6, r10\n\t" + "STM %[r]!, {r3, r4, r5, r6}\n\t" + "SBC %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ #ifdef WOLFSSL_SP_SMALL /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. @@ -42783,7 +42785,7 @@ static void sp_384_rshift1_12(sp_digit* r_p, const sp_digit* a_p) * a Number to divide. * m Modulus (prime). */ -static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_384_mont_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o; @@ -42836,7 +42838,7 @@ static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, /* T2 = Y * Y */ sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod); /* T2 = T2/2 */ - sp_384_div2_12(t2, t2, p384_mod); + sp_384_mont_div2_12(t2, t2, p384_mod); /* Y = Y * X */ sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod); /* X = T1 * T1 */ @@ -42869,7 +42871,8 @@ typedef struct sp_384_proj_point_dbl_12_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) +static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, + const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_384_proj_point_dbl_12_ctx* ctx = (sp_384_proj_point_dbl_12_ctx*)sp_ctx->data; @@ -42943,7 +42946,7 @@ static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, co break; case 11: /* T2 = T2/2 */ - sp_384_div2_12(ctx->t2, ctx->t2, p384_mod); + sp_384_mont_div2_12(ctx->t2, ctx->t2, p384_mod); ctx->state = 12; break; case 12: @@ -43679,7 +43682,7 @@ static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int i, sp_384_mont_sub_12(y, y, t1, p384_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_384_div2_12(y, y, p384_mod); + sp_384_mont_div2_12(y, y, p384_mod); } /* Convert the projective point to affine. @@ -44173,8 +44176,8 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap); @@ -44609,8 +44612,8 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap); @@ -47129,8 +47132,8 @@ static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[24], t2[13]; sp_digit div, r1; @@ -48019,7 +48022,7 @@ static int sp_384_mod_inv_12(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_384_cmp_12(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_384_cmp_12(u, v) >= 0))) { sp_384_sub_12(u, u, v); o = sp_384_sub_12(b, b, d); if (o != 0) @@ -48452,19 +48455,21 @@ static int sp_384_ecc_is_point_12(const sp_point_384* point, if (err == MP_OKAY) { t2 = t1 + 2 * 12; + /* y^2 - x^3 - a.x = b */ sp_384_sqr_12(t1, point->y); (void)sp_384_mod_12(t1, t1, p384_mod); sp_384_sqr_12(t2, point->x); (void)sp_384_mod_12(t2, t2, p384_mod); sp_384_mul_12(t2, t2, point->x); (void)sp_384_mod_12(t2, t2, p384_mod); - (void)sp_384_sub_12(t2, p384_mod, t2); - sp_384_mont_add_12(t1, t1, t2, p384_mod); + sp_384_mont_sub_12(t1, t1, t2, p384_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_384_mont_add_12(t1, t1, point->x, p384_mod); sp_384_mont_add_12(t1, t1, point->x, p384_mod); sp_384_mont_add_12(t1, t1, point->x, p384_mod); + if (sp_384_cmp_12(t1, p384_b) != 0) { err = MP_VAL; } @@ -52708,107 +52713,6 @@ static sp_digit sp_521_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit return (uint32_t)(size_t)r; } -#endif /* WOLFSSL_SP_SMALL */ -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "MOV r11, #0x0\n\t" - "ADD r12, %[a], #0x40\n\t" - "\n" - "L_sp_521_sub_17_word_%=:\n\t" - "RSBS r11, r11, #0x0\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "SBC r11, r3, r3\n\t" - "CMP %[a], r12\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_521_sub_17_word_%=\n\t" -#else - "BNE.N L_sp_521_sub_17_word_%=\n\t" -#endif - "RSBS r11, r11, #0x0\n\t" - "LDM %[a]!, {r3}\n\t" - "LDM %[b]!, {r7}\n\t" - "SBCS r3, r3, r7\n\t" - "STM %[r]!, {r3}\n\t" - "SBC %[r], r6, r6\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" - ); - return (uint32_t)(size_t)r; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SUBS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "LDM %[a]!, {r3}\n\t" - "LDM %[b]!, {r7}\n\t" - "SBCS r3, r3, r7\n\t" - "STM %[r]!, {r3}\n\t" - "SBC %[r], r6, r6\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); - return (uint32_t)(size_t)r; -} - #endif /* WOLFSSL_SP_SMALL */ /* Multiply a number by Montgomery normalizer mod modulus (prime). * @@ -53173,41 +53077,41 @@ static void sp_521_mont_reduce_17(sp_digit* a_p, const sp_digit* m_p, sp_digit m /* 0-7 */ "LDM lr!, {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "LSR r1, r1, #9\n\t" - "ORR r1, r1, r2, lsl #23\n\t" + "ORR r1, r1, r2, LSL #23\n\t" "LSR r2, r2, #9\n\t" - "ORR r2, r2, r3, lsl #23\n\t" + "ORR r2, r2, r3, LSL #23\n\t" "LSR r3, r3, #9\n\t" - "ORR r3, r3, r4, lsl #23\n\t" + "ORR r3, r3, r4, LSL #23\n\t" "LSR r4, r4, #9\n\t" - "ORR r4, r4, r5, lsl #23\n\t" + "ORR r4, r4, r5, LSL #23\n\t" "LSR r5, r5, #9\n\t" - "ORR r5, r5, r6, lsl #23\n\t" + "ORR r5, r5, r6, LSL #23\n\t" "LSR r6, r6, #9\n\t" - "ORR r6, r6, r7, lsl #23\n\t" + "ORR r6, r6, r7, LSL #23\n\t" "LSR r7, r7, #9\n\t" - "ORR r7, r7, r8, lsl #23\n\t" + "ORR r7, r7, r8, LSL #23\n\t" "LSR r8, r8, #9\n\t" - "ORR r8, r8, r9, lsl #23\n\t" + "ORR r8, r8, r9, LSL #23\n\t" "STM r12!, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" "MOV r1, r9\n\t" /* 8-16 */ "LDM lr!, {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "LSR r1, r1, #9\n\t" - "ORR r1, r1, r2, lsl #23\n\t" + "ORR r1, r1, r2, LSL #23\n\t" "LSR r2, r2, #9\n\t" - "ORR r2, r2, r3, lsl #23\n\t" + "ORR r2, r2, r3, LSL #23\n\t" "LSR r3, r3, #9\n\t" - "ORR r3, r3, r4, lsl #23\n\t" + "ORR r3, r3, r4, LSL #23\n\t" "LSR r4, r4, #9\n\t" - "ORR r4, r4, r5, lsl #23\n\t" + "ORR r4, r4, r5, LSL #23\n\t" "LSR r5, r5, #9\n\t" - "ORR r5, r5, r6, lsl #23\n\t" + "ORR r5, r5, r6, LSL #23\n\t" "LSR r6, r6, #9\n\t" - "ORR r6, r6, r7, lsl #23\n\t" + "ORR r6, r6, r7, LSL #23\n\t" "LSR r7, r7, #9\n\t" - "ORR r7, r7, r8, lsl #23\n\t" + "ORR r7, r7, r8, LSL #23\n\t" "LSR r8, r8, #9\n\t" - "ORR r8, r8, r9, lsl #23\n\t" + "ORR r8, r8, r9, LSL #23\n\t" "LSR r9, r9, #9\n\t" "STM r12!, {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" /* Add top to bottom */ @@ -53463,67 +53367,67 @@ static void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_d "LDR r6, [%[a]]\n\t" "LDR r7, [%[a], #4]\n\t" "LSR r6, r6, #9\n\t" - "ORR r6, r6, r7, lsl #23\n\t" + "ORR r6, r6, r7, LSL #23\n\t" "STR r6, [%[a], #4]\n\t" "LDR r6, [%[a], #8]\n\t" "LSR r7, r7, #9\n\t" - "ORR r7, r7, r6, lsl #23\n\t" + "ORR r7, r7, r6, LSL #23\n\t" "STR r7, [%[a], #8]\n\t" "LDR r7, [%[a], #12]\n\t" "LSR r6, r6, #9\n\t" - "ORR r6, r6, r7, lsl #23\n\t" + "ORR r6, r6, r7, LSL #23\n\t" "STR r6, [%[a], #12]\n\t" "LDR r6, [%[a], #16]\n\t" "LSR r7, r7, #9\n\t" - "ORR r7, r7, r6, lsl #23\n\t" + "ORR r7, r7, r6, LSL #23\n\t" "STR r7, [%[a], #16]\n\t" "LDR r7, [%[a], #20]\n\t" "LSR r6, r6, #9\n\t" - "ORR r6, r6, r7, lsl #23\n\t" + "ORR r6, r6, r7, LSL #23\n\t" "STR r6, [%[a], #20]\n\t" "LDR r6, [%[a], #24]\n\t" "LSR r7, r7, #9\n\t" - "ORR r7, r7, r6, lsl #23\n\t" + "ORR r7, r7, r6, LSL #23\n\t" "STR r7, [%[a], #24]\n\t" "LDR r7, [%[a], #28]\n\t" "LSR r6, r6, #9\n\t" - "ORR r6, r6, r7, lsl #23\n\t" + "ORR r6, r6, r7, LSL #23\n\t" "STR r6, [%[a], #28]\n\t" "LDR r6, [%[a], #32]\n\t" "LSR r7, r7, #9\n\t" - "ORR r7, r7, r6, lsl #23\n\t" + "ORR r7, r7, r6, LSL #23\n\t" "STR r7, [%[a], #32]\n\t" "LDR r7, [%[a], #36]\n\t" "LSR r6, r6, #9\n\t" - "ORR r6, r6, r7, lsl #23\n\t" + "ORR r6, r6, r7, LSL #23\n\t" "STR r6, [%[a], #36]\n\t" "LDR r6, [%[a], #40]\n\t" "LSR r7, r7, #9\n\t" - "ORR r7, r7, r6, lsl #23\n\t" + "ORR r7, r7, r6, LSL #23\n\t" "STR r7, [%[a], #40]\n\t" "LDR r7, [%[a], #44]\n\t" "LSR r6, r6, #9\n\t" - "ORR r6, r6, r7, lsl #23\n\t" + "ORR r6, r6, r7, LSL #23\n\t" "STR r6, [%[a], #44]\n\t" "LDR r6, [%[a], #48]\n\t" "LSR r7, r7, #9\n\t" - "ORR r7, r7, r6, lsl #23\n\t" + "ORR r7, r7, r6, LSL #23\n\t" "STR r7, [%[a], #48]\n\t" "LDR r7, [%[a], #52]\n\t" "LSR r6, r6, #9\n\t" - "ORR r6, r6, r7, lsl #23\n\t" + "ORR r6, r6, r7, LSL #23\n\t" "STR r6, [%[a], #52]\n\t" "LDR r6, [%[a], #56]\n\t" "LSR r7, r7, #9\n\t" - "ORR r7, r7, r6, lsl #23\n\t" + "ORR r7, r7, r6, LSL #23\n\t" "STR r7, [%[a], #56]\n\t" "LDR r7, [%[a], #60]\n\t" "LSR r6, r6, #9\n\t" - "ORR r6, r6, r7, lsl #23\n\t" + "ORR r6, r6, r7, LSL #23\n\t" "STR r6, [%[a], #60]\n\t" "LDR r6, [%[a], #64]\n\t" "LSR r7, r7, #9\n\t" - "ORR r7, r7, r6, lsl #23\n\t" + "ORR r7, r7, r6, LSL #23\n\t" "STR r7, [%[a], #64]\n\t" "LSR r6, r6, #9\n\t" "STR r6, [%[a], #68]\n\t" @@ -53678,67 +53582,67 @@ static void sp_521_mont_reduce_order_17(sp_digit* a_p, const sp_digit* m_p, sp_d "LDR r12, [%[a]]\n\t" "LDR r3, [%[a], #4]\n\t" "LSR r12, r12, #9\n\t" - "ORR r12, r12, r3, lsl #23\n\t" + "ORR r12, r12, r3, LSL #23\n\t" "STR r12, [%[a], #4]\n\t" "LDR r12, [%[a], #8]\n\t" "LSR r3, r3, #9\n\t" - "ORR r3, r3, r12, lsl #23\n\t" + "ORR r3, r3, r12, LSL #23\n\t" "STR r3, [%[a], #8]\n\t" "LDR r3, [%[a], #12]\n\t" "LSR r12, r12, #9\n\t" - "ORR r12, r12, r3, lsl #23\n\t" + "ORR r12, r12, r3, LSL #23\n\t" "STR r12, [%[a], #12]\n\t" "LDR r12, [%[a], #16]\n\t" "LSR r3, r3, #9\n\t" - "ORR r3, r3, r12, lsl #23\n\t" + "ORR r3, r3, r12, LSL #23\n\t" "STR r3, [%[a], #16]\n\t" "LDR r3, [%[a], #20]\n\t" "LSR r12, r12, #9\n\t" - "ORR r12, r12, r3, lsl #23\n\t" + "ORR r12, r12, r3, LSL #23\n\t" "STR r12, [%[a], #20]\n\t" "LDR r12, [%[a], #24]\n\t" "LSR r3, r3, #9\n\t" - "ORR r3, r3, r12, lsl #23\n\t" + "ORR r3, r3, r12, LSL #23\n\t" "STR r3, [%[a], #24]\n\t" "LDR r3, [%[a], #28]\n\t" "LSR r12, r12, #9\n\t" - "ORR r12, r12, r3, lsl #23\n\t" + "ORR r12, r12, r3, LSL #23\n\t" "STR r12, [%[a], #28]\n\t" "LDR r12, [%[a], #32]\n\t" "LSR r3, r3, #9\n\t" - "ORR r3, r3, r12, lsl #23\n\t" + "ORR r3, r3, r12, LSL #23\n\t" "STR r3, [%[a], #32]\n\t" "LDR r3, [%[a], #36]\n\t" "LSR r12, r12, #9\n\t" - "ORR r12, r12, r3, lsl #23\n\t" + "ORR r12, r12, r3, LSL #23\n\t" "STR r12, [%[a], #36]\n\t" "LDR r12, [%[a], #40]\n\t" "LSR r3, r3, #9\n\t" - "ORR r3, r3, r12, lsl #23\n\t" + "ORR r3, r3, r12, LSL #23\n\t" "STR r3, [%[a], #40]\n\t" "LDR r3, [%[a], #44]\n\t" "LSR r12, r12, #9\n\t" - "ORR r12, r12, r3, lsl #23\n\t" + "ORR r12, r12, r3, LSL #23\n\t" "STR r12, [%[a], #44]\n\t" "LDR r12, [%[a], #48]\n\t" "LSR r3, r3, #9\n\t" - "ORR r3, r3, r12, lsl #23\n\t" + "ORR r3, r3, r12, LSL #23\n\t" "STR r3, [%[a], #48]\n\t" "LDR r3, [%[a], #52]\n\t" "LSR r12, r12, #9\n\t" - "ORR r12, r12, r3, lsl #23\n\t" + "ORR r12, r12, r3, LSL #23\n\t" "STR r12, [%[a], #52]\n\t" "LDR r12, [%[a], #56]\n\t" "LSR r3, r3, #9\n\t" - "ORR r3, r3, r12, lsl #23\n\t" + "ORR r3, r3, r12, LSL #23\n\t" "STR r3, [%[a], #56]\n\t" "LDR r3, [%[a], #60]\n\t" "LSR r12, r12, #9\n\t" - "ORR r12, r12, r3, lsl #23\n\t" + "ORR r12, r12, r3, LSL #23\n\t" "STR r12, [%[a], #60]\n\t" "LDR r12, [%[a], #64]\n\t" "LSR r3, r3, #9\n\t" - "ORR r3, r3, r12, lsl #23\n\t" + "ORR r3, r3, r12, LSL #23\n\t" "STR r3, [%[a], #64]\n\t" "LSR r12, r12, #9\n\t" "STR r12, [%[a], #68]\n\t" @@ -54578,7 +54482,7 @@ static void sp_521_rshift1_17(sp_digit* r_p, const sp_digit* a_p) * a Number to divide. * m Modulus (prime). */ -static void sp_521_div2_17(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_521_mont_div2_17(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o = a[0] & 1; @@ -54632,7 +54536,7 @@ static void sp_521_proj_point_dbl_17(sp_point_521* r, const sp_point_521* p, /* T2 = Y * Y */ sp_521_mont_sqr_17(t2, y, p521_mod, p521_mp_mod); /* T2 = T2/2 */ - sp_521_div2_17(t2, t2, p521_mod); + sp_521_mont_div2_17(t2, t2, p521_mod); /* Y = Y * X */ sp_521_mont_mul_17(y, y, p->x, p521_mod, p521_mp_mod); /* X = T1 * T1 */ @@ -54665,7 +54569,8 @@ typedef struct sp_521_proj_point_dbl_17_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) +static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_521_proj_point_dbl_17_ctx* ctx = (sp_521_proj_point_dbl_17_ctx*)sp_ctx->data; @@ -54739,7 +54644,7 @@ static int sp_521_proj_point_dbl_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, co break; case 11: /* T2 = T2/2 */ - sp_521_div2_17(ctx->t2, ctx->t2, p521_mod); + sp_521_mont_div2_17(ctx->t2, ctx->t2, p521_mod); ctx->state = 12; break; case 12: @@ -55512,7 +55417,7 @@ static void sp_521_proj_point_dbl_n_17(sp_point_521* p, int i, sp_521_mont_sub_17(y, y, t1, p521_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_521_div2_17(y, y, p521_mod); + sp_521_mont_div2_17(y, y, p521_mod); } /* Convert the projective point to affine. @@ -56026,8 +55931,8 @@ static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_521_ecc_mulmod_17(sp_point_521* r, const sp_point_521* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_521_ecc_mulmod_17(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_521_ecc_mulmod_fast_17(r, g, k, map, ct, heap); @@ -56482,8 +56387,8 @@ static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_521_ecc_mulmod_17(sp_point_521* r, const sp_point_521* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_521_ecc_mulmod_17(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_521_ecc_mulmod_fast_17(r, g, k, map, ct, heap); @@ -60029,8 +59934,8 @@ static void sp_521_mask_17(sp_digit* r, const sp_digit* a, sp_digit m) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_521_div_17(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_521_div_17(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[35]; sp_digit t2[18]; @@ -60635,6 +60540,107 @@ int sp_ecc_sign_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL +#ifdef WOLFSSL_SP_SMALL +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "MOV r11, #0x0\n\t" + "ADD r12, %[a], #0x40\n\t" + "\n" + "L_sp_521_sub_17_word_%=:\n\t" + "RSBS r11, r11, #0x0\n\t" + "LDM %[a]!, {r3, r4, r5, r6}\n\t" + "LDM %[b]!, {r7, r8, r9, r10}\n\t" + "SBCS r3, r3, r7\n\t" + "SBCS r4, r4, r8\n\t" + "SBCS r5, r5, r9\n\t" + "SBCS r6, r6, r10\n\t" + "STM %[r]!, {r3, r4, r5, r6}\n\t" + "SBC r11, r3, r3\n\t" + "CMP %[a], r12\n\t" +#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) + "BNE L_sp_521_sub_17_word_%=\n\t" +#else + "BNE.N L_sp_521_sub_17_word_%=\n\t" +#endif + "RSBS r11, r11, #0x0\n\t" + "LDM %[a]!, {r3}\n\t" + "LDM %[b]!, {r7}\n\t" + "SBCS r3, r3, r7\n\t" + "STM %[r]!, {r3}\n\t" + "SBC %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + ); + return (uint32_t)(size_t)r; +} + +#else +/* Sub b from a into r. (r = a - b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision integer. + */ +static sp_digit sp_521_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) +{ + register sp_digit* r asm ("r0") = (sp_digit*)r_p; + register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + + __asm__ __volatile__ ( + "LDM %[a]!, {r3, r4, r5, r6}\n\t" + "LDM %[b]!, {r7, r8, r9, r10}\n\t" + "SUBS r3, r3, r7\n\t" + "SBCS r4, r4, r8\n\t" + "SBCS r5, r5, r9\n\t" + "SBCS r6, r6, r10\n\t" + "STM %[r]!, {r3, r4, r5, r6}\n\t" + "LDM %[a]!, {r3, r4, r5, r6}\n\t" + "LDM %[b]!, {r7, r8, r9, r10}\n\t" + "SBCS r3, r3, r7\n\t" + "SBCS r4, r4, r8\n\t" + "SBCS r5, r5, r9\n\t" + "SBCS r6, r6, r10\n\t" + "STM %[r]!, {r3, r4, r5, r6}\n\t" + "LDM %[a]!, {r3, r4, r5, r6}\n\t" + "LDM %[b]!, {r7, r8, r9, r10}\n\t" + "SBCS r3, r3, r7\n\t" + "SBCS r4, r4, r8\n\t" + "SBCS r5, r5, r9\n\t" + "SBCS r6, r6, r10\n\t" + "STM %[r]!, {r3, r4, r5, r6}\n\t" + "LDM %[a]!, {r3, r4, r5, r6}\n\t" + "LDM %[b]!, {r7, r8, r9, r10}\n\t" + "SBCS r3, r3, r7\n\t" + "SBCS r4, r4, r8\n\t" + "SBCS r5, r5, r9\n\t" + "SBCS r6, r6, r10\n\t" + "STM %[r]!, {r3, r4, r5, r6}\n\t" + "LDM %[a]!, {r3}\n\t" + "LDM %[b]!, {r7}\n\t" + "SBCS r3, r3, r7\n\t" + "STM %[r]!, {r3}\n\t" + "SBC %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); + return (uint32_t)(size_t)r; +} + +#endif /* WOLFSSL_SP_SMALL */ /* Divide the number by 2 mod the modulus. (r = a / 2 % m) * * r Result of division by 2. @@ -61048,7 +61054,7 @@ static int sp_521_mod_inv_17(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_521_cmp_17(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_521_cmp_17(u, v) >= 0))) { sp_521_sub_17(u, u, v); o = sp_521_sub_17(b, b, d); if (o != 0) @@ -61493,19 +61499,21 @@ static int sp_521_ecc_is_point_17(const sp_point_521* point, if (err == MP_OKAY) { t2 = t1 + 2 * 17; + /* y^2 - x^3 - a.x = b */ sp_521_sqr_17(t1, point->y); (void)sp_521_mod_17(t1, t1, p521_mod); sp_521_sqr_17(t2, point->x); (void)sp_521_mod_17(t2, t2, p521_mod); sp_521_mul_17(t2, t2, point->x); (void)sp_521_mod_17(t2, t2, p521_mod); - (void)sp_521_sub_17(t2, p521_mod, t2); - sp_521_mont_add_17(t1, t1, t2, p521_mod); + sp_521_mont_sub_17(t1, t1, t2, p521_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_521_mont_add_17(t1, t1, point->x, p521_mod); sp_521_mont_add_17(t1, t1, point->x, p521_mod); sp_521_mont_add_17(t1, t1, point->x, p521_mod); + if (sp_521_cmp_17(t1, p521_b) != 0) { err = MP_VAL; } @@ -66643,8 +66651,8 @@ static sp_int32 sp_1024_cmp_32(const sp_digit* a_p, const sp_digit* b_p) * r Remainder from the division. * returns MP_OKAY indicating success. */ -static WC_INLINE int sp_1024_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m, - sp_digit* r) +static WC_INLINE int sp_1024_div_32(const sp_digit* a, const sp_digit* d, + sp_digit* m, sp_digit* r) { sp_digit t1[64], t2[33]; sp_digit div, r1; @@ -68750,7 +68758,7 @@ static void sp_1024_rshift1_32(sp_digit* r_p, const sp_digit* a_p) * a Number to divide. * m Modulus (prime). */ -static void sp_1024_div2_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +static void sp_1024_mont_div2_32(sp_digit* r, const sp_digit* a, const sp_digit* m) { sp_digit o; @@ -68803,7 +68811,7 @@ static void sp_1024_proj_point_dbl_32(sp_point_1024* r, const sp_point_1024* p, /* T2 = Y * Y */ sp_1024_mont_sqr_32(t2, y, p1024_mod, p1024_mp_mod); /* T2 = T2/2 */ - sp_1024_div2_32(t2, t2, p1024_mod); + sp_1024_mont_div2_32(t2, t2, p1024_mod); /* Y = Y * X */ sp_1024_mont_mul_32(y, y, p->x, p1024_mod, p1024_mp_mod); /* X = T1 * T1 */ @@ -68836,7 +68844,8 @@ typedef struct sp_1024_proj_point_dbl_32_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) +static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, + const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_1024_proj_point_dbl_32_ctx* ctx = (sp_1024_proj_point_dbl_32_ctx*)sp_ctx->data; @@ -68910,7 +68919,7 @@ static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, break; case 11: /* T2 = T2/2 */ - sp_1024_div2_32(ctx->t2, ctx->t2, p1024_mod); + sp_1024_mont_div2_32(ctx->t2, ctx->t2, p1024_mod); ctx->state = 12; break; case 12: @@ -68960,126 +68969,6 @@ static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, return err; } #endif /* WOLFSSL_SP_NONBLOCK */ -#ifdef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_1024_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "MOV r11, #0x0\n\t" - "ADD r12, %[a], #0x80\n\t" - "\n" - "L_sp_1024_sub_32_word_%=:\n\t" - "RSBS r11, r11, #0x0\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "SBC r11, r3, r3\n\t" - "CMP %[a], r12\n\t" -#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - "BNE L_sp_1024_sub_32_word_%=\n\t" -#else - "BNE.N L_sp_1024_sub_32_word_%=\n\t" -#endif - "MOV %[r], r11\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" - ); - return (uint32_t)(size_t)r; -} - -#else -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -static sp_digit sp_1024_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) -{ - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - - __asm__ __volatile__ ( - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SUBS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "LDM %[a]!, {r3, r4, r5, r6}\n\t" - "LDM %[b]!, {r7, r8, r9, r10}\n\t" - "SBCS r3, r3, r7\n\t" - "SBCS r4, r4, r8\n\t" - "SBCS r5, r5, r9\n\t" - "SBCS r6, r6, r10\n\t" - "STM %[r]!, {r3, r4, r5, r6}\n\t" - "SBC %[r], r6, r6\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); - return (uint32_t)(size_t)r; -} - -#endif /* WOLFSSL_SP_SMALL */ /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -69639,7 +69528,7 @@ static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int i, sp_1024_mont_sub_32(y, y, t1, p1024_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_1024_div2_32(y, y, p1024_mod); + sp_1024_mont_div2_32(y, y, p1024_mod); } /* Convert the projective point to affine. @@ -70052,8 +69941,8 @@ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cach * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_fast_32(r, g, k, map, ct, heap); @@ -70407,8 +70296,8 @@ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cach * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_fast_32(r, g, k, map, ct, heap); @@ -76562,7 +76451,7 @@ static void sp_1024_accumulate_line_dbl_32(sp_digit* vx, sp_digit* vy, /* ty = 4 * p.y ^ 2 */ sp_1024_mont_sqr_32(ty, ry, p1024_mod, p1024_mp_mod); /* t1 = 2 * p.y ^ 2 */ - sp_1024_div2_32(t1, ty, p1024_mod); + sp_1024_mont_div2_32(t1, ty, p1024_mod); /* r.x -= 2 * (p.y ^ 2) */ sp_1024_mont_sub_32(rx, rx, t1, p1024_mod); /* p'.z = p.y * 2 * p.z */ @@ -76582,7 +76471,7 @@ static void sp_1024_accumulate_line_dbl_32(sp_digit* vx, sp_digit* vy, /* t1 = (4 * p.y^2) ^ 2 = 16 * p.y^4 */ sp_1024_mont_sqr_32(t1, ty, p1024_mod, p1024_mp_mod); /* t1 = 16 * p.y^4 / 2 = 8 * p.y^4 */ - sp_1024_div2_32(t1, t1, p1024_mod); + sp_1024_mont_div2_32(t1, t1, p1024_mod); /* p'.y = 4 * p.y^2 * p.x */ sp_1024_mont_mul_32(p->y, ty, p->x, p1024_mod, p1024_mp_mod); /* p'.x = l^2 */ @@ -77000,7 +76889,7 @@ static void sp_1024_accumulate_line_dbl_n_32(sp_digit* vx, sp_digit* vy, /* ty = py ^ 2 */ sp_1024_mont_sqr_32(ty, p->y, p1024_mod, p1024_mp_mod); /* t1 = py ^ 2 / 2 */ - sp_1024_div2_32(t1, ty, p1024_mod); + sp_1024_mont_div2_32(t1, ty, p1024_mod); /* r.x -= py ^ 2 / 2 */ sp_1024_mont_sub_32(rx, rx, t1, p1024_mod); /* p'.z = py * pz */ @@ -77038,7 +76927,7 @@ static void sp_1024_accumulate_line_dbl_n_32(sp_digit* vx, sp_digit* vy, } /* p'.y = py' / 2 */ - sp_1024_div2_32(p->y, p->y, p1024_mod); + sp_1024_mont_div2_32(p->y, p->y, p1024_mod); } /* Operations to perform based on order - 1. @@ -77878,19 +77767,21 @@ static int sp_1024_ecc_is_point_32(const sp_point_1024* point, if (err == MP_OKAY) { t2 = t1 + 2 * 32; + /* y^2 - x^3 - a.x = b */ sp_1024_sqr_32(t1, point->y); (void)sp_1024_mod_32(t1, t1, p1024_mod); sp_1024_sqr_32(t2, point->x); (void)sp_1024_mod_32(t2, t2, p1024_mod); sp_1024_mul_32(t2, t2, point->x); (void)sp_1024_mod_32(t2, t2, p1024_mod); - (void)sp_1024_sub_32(t2, p1024_mod, t2); - sp_1024_mont_add_32(t1, t1, t2, p1024_mod); + sp_1024_mont_sub_32(t1, t1, t2, p1024_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_1024_mont_add_32(t1, t1, point->x, p1024_mod); sp_1024_mont_add_32(t1, t1, point->x, p1024_mod); sp_1024_mont_add_32(t1, t1, point->x, p1024_mod); + n = sp_1024_cmp_32(t1, p1024_mod); sp_1024_cond_sub_32(t1, t1, p1024_mod, ~(n >> 31)); sp_1024_norm_32(t1); diff --git a/wolfcrypt/src/sp_int.c b/wolfcrypt/src/sp_int.c index 022df827c..e9ffed422 100644 --- a/wolfcrypt/src/sp_int.c +++ b/wolfcrypt/src/sp_int.c @@ -5495,8 +5495,8 @@ int sp_cmp(const sp_int* a, const sp_int* b) *************************/ #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \ - (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)) || \ - defined(OPENSSL_EXTRA) + ((defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_SM2)) && \ + defined(HAVE_ECC)) || defined(OPENSSL_EXTRA) /* Check if a bit is set * * When a is NULL, result is 0. diff --git a/wolfcrypt/src/sp_sm2_arm32.c b/wolfcrypt/src/sp_sm2_arm32.c new file mode 100644 index 000000000..211b14392 --- /dev/null +++ b/wolfcrypt/src/sp_sm2_arm32.c @@ -0,0 +1,33 @@ +/* sp_sm2_arm32.c + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_SM2 + +#error "See https://github.com/wolfSSL/wolfsm for implementation of this file" + +#endif + diff --git a/wolfcrypt/src/sp_sm2_arm64.c b/wolfcrypt/src/sp_sm2_arm64.c new file mode 100644 index 000000000..5c84948a0 --- /dev/null +++ b/wolfcrypt/src/sp_sm2_arm64.c @@ -0,0 +1,33 @@ +/* sp_sm2_arm64.c + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_SM2 + +#error "See https://github.com/wolfSSL/wolfsm for implementation of this file" + +#endif + diff --git a/wolfcrypt/src/sp_sm2_armthumb.c b/wolfcrypt/src/sp_sm2_armthumb.c new file mode 100644 index 000000000..5d26e27be --- /dev/null +++ b/wolfcrypt/src/sp_sm2_armthumb.c @@ -0,0 +1,33 @@ +/* sp_sm2_armthumb.c + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_SM2 + +#error "See https://github.com/wolfSSL/wolfsm for implementation of this file" + +#endif + diff --git a/wolfcrypt/src/sp_sm2_c32.c b/wolfcrypt/src/sp_sm2_c32.c new file mode 100644 index 000000000..41c40d1ef --- /dev/null +++ b/wolfcrypt/src/sp_sm2_c32.c @@ -0,0 +1,33 @@ +/* sp_sm2_c32.c + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_SM2 + +#error "See https://github.com/wolfSSL/wolfsm for implementation of this file" + +#endif + diff --git a/wolfcrypt/src/sp_sm2_c64.c b/wolfcrypt/src/sp_sm2_c64.c new file mode 100644 index 000000000..ee3801654 --- /dev/null +++ b/wolfcrypt/src/sp_sm2_c64.c @@ -0,0 +1,33 @@ +/* sp_sm2_c64.c + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_SM2 + +#error "See https://github.com/wolfSSL/wolfsm for implementation of this file" + +#endif + diff --git a/wolfcrypt/src/sp_sm2_cortexm.c b/wolfcrypt/src/sp_sm2_cortexm.c new file mode 100644 index 000000000..3bda85f02 --- /dev/null +++ b/wolfcrypt/src/sp_sm2_cortexm.c @@ -0,0 +1,33 @@ +/* sp_sm2_cortexm.c + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_SM2 + +#error "See https://github.com/wolfSSL/wolfsm for implementation of this file" + +#endif + diff --git a/wolfcrypt/src/sp_sm2_x86_64.c b/wolfcrypt/src/sp_sm2_x86_64.c new file mode 100644 index 000000000..f73e40834 --- /dev/null +++ b/wolfcrypt/src/sp_sm2_x86_64.c @@ -0,0 +1,33 @@ +/* sp_sm2_x86_64.c + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_SM2 + +#error "See https://github.com/wolfSSL/wolfsm for implementation of this file" + +#endif + diff --git a/wolfcrypt/src/sp_sm2_x86_64_asm.S b/wolfcrypt/src/sp_sm2_x86_64_asm.S new file mode 100644 index 000000000..6ddc3c77e --- /dev/null +++ b/wolfcrypt/src/sp_sm2_x86_64_asm.S @@ -0,0 +1,33 @@ +/* sp_sm2_x86_64_asm.S + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef WOLFSSL_SM2 + +#error "See https://github.com/wolfSSL/wolfsm for implementation of this file" + +#endif + diff --git a/wolfcrypt/src/sp_x86_64.c b/wolfcrypt/src/sp_x86_64.c index bf106d28f..916a32fbf 100644 --- a/wolfcrypt/src/sp_x86_64.c +++ b/wolfcrypt/src/sp_x86_64.c @@ -8600,7 +8600,7 @@ extern void sp_256_mont_sub_4(sp_digit* r, const sp_digit* a, const sp_digit* b, #ifdef __cplusplus extern "C" { #endif -extern void sp_256_div2_4(sp_digit* r, const sp_digit* a, const sp_digit* m); +extern void sp_256_mont_div2_4(sp_digit* r, const sp_digit* a, const sp_digit* m); #ifdef __cplusplus } #endif @@ -8655,7 +8655,7 @@ static void sp_256_proj_point_dbl_4(sp_point_256* r, const sp_point_256* p, /* T2 = Y * Y */ sp_256_mont_sqr_4(t2, y, p256_mod, p256_mp_mod); /* T2 = T2/2 */ - sp_256_div2_4(t2, t2, p256_mod); + sp_256_mont_div2_4(t2, t2, p256_mod); /* Y = Y * X */ sp_256_mont_mul_4(y, y, p->x, p256_mod, p256_mp_mod); /* X = T1 * T1 */ @@ -8685,7 +8685,8 @@ typedef struct sp_256_proj_point_dbl_4_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) +static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, + const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_256_proj_point_dbl_4_ctx* ctx = (sp_256_proj_point_dbl_4_ctx*)sp_ctx->data; @@ -8759,7 +8760,7 @@ static int sp_256_proj_point_dbl_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, con break; case 11: /* T2 = T2/2 */ - sp_256_div2_4(ctx->t2, ctx->t2, p256_mod); + sp_256_mont_div2_4(ctx->t2, ctx->t2, p256_mod); ctx->state = 12; break; case 12: @@ -8889,7 +8890,7 @@ static void sp_256_proj_point_dbl_n_4(sp_point_256* p, int i, sp_256_mont_sub_4(y, y, t1, p256_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_256_div2_4(y, y, p256_mod); + sp_256_mont_div2_4(y, y, p256_mod); } /* Compare two numbers to determine if they are equal. @@ -9268,7 +9269,7 @@ static void sp_256_proj_point_dbl_n_store_4(sp_point_256* r, sp_256_mont_mul_4(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_4(y, y, t1, p256_mod); /* Y = Y/2 */ - sp_256_div2_4(r[j].y, y, p256_mod); + sp_256_mont_div2_4(r[j].y, y, p256_mod); r[j].infinity = 0; } } @@ -9698,7 +9699,7 @@ extern sp_digit sp_256_cond_sub_avx2_4(sp_digit* r, const sp_digit* a, const sp_ #ifdef __cplusplus extern "C" { #endif -extern void sp_256_mont_reduce_avx2_order_4(sp_digit* a, const sp_digit* m, sp_digit mp); +extern void sp_256_mont_reduce_order_avx2_4(sp_digit* a, const sp_digit* m, sp_digit mp); #ifdef __cplusplus } #endif @@ -9749,7 +9750,7 @@ static void sp_256_map_avx2_4(sp_point_256* r, const sp_point_256* p, #ifdef __cplusplus extern "C" { #endif -extern void sp_256_div2_avx2_4(sp_digit* r, const sp_digit* a, const sp_digit* m); +extern void sp_256_mont_div2_avx2_4(sp_digit* r, const sp_digit* a, const sp_digit* m); #ifdef __cplusplus } #endif @@ -9798,7 +9799,7 @@ static void sp_256_proj_point_dbl_avx2_4(sp_point_256* r, const sp_point_256* p, /* T2 = Y * Y */ sp_256_mont_sqr_avx2_4(t2, y, p256_mod, p256_mp_mod); /* T2 = T2/2 */ - sp_256_div2_avx2_4(t2, t2, p256_mod); + sp_256_mont_div2_avx2_4(t2, t2, p256_mod); /* Y = Y * X */ sp_256_mont_mul_avx2_4(y, y, p->x, p256_mod, p256_mp_mod); /* X = T1 * T1 */ @@ -9828,7 +9829,8 @@ typedef struct sp_256_proj_point_dbl_avx2_4_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_256_proj_point_dbl_avx2_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t) +static int sp_256_proj_point_dbl_avx2_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, + const sp_point_256* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_256_proj_point_dbl_avx2_4_ctx* ctx = (sp_256_proj_point_dbl_avx2_4_ctx*)sp_ctx->data; @@ -9902,7 +9904,7 @@ static int sp_256_proj_point_dbl_avx2_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r break; case 11: /* T2 = T2/2 */ - sp_256_div2_avx2_4(ctx->t2, ctx->t2, p256_mod); + sp_256_mont_div2_avx2_4(ctx->t2, ctx->t2, p256_mod); ctx->state = 12; break; case 12: @@ -10032,7 +10034,7 @@ static void sp_256_proj_point_dbl_n_avx2_4(sp_point_256* p, int i, sp_256_mont_sub_avx2_4(y, y, t1, p256_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_256_div2_avx2_4(y, y, p256_mod); + sp_256_mont_div2_avx2_4(y, y, p256_mod); } @@ -10387,7 +10389,7 @@ static void sp_256_proj_point_dbl_n_store_avx2_4(sp_point_256* r, sp_256_mont_mul_avx2_4(y, b, a, p256_mod, p256_mp_mod); sp_256_mont_sub_avx2_4(y, y, t1, p256_mod); /* Y = Y/2 */ - sp_256_div2_avx2_4(r[j].y, y, p256_mod); + sp_256_mont_div2_avx2_4(r[j].y, y, p256_mod); r[j].infinity = 0; } } @@ -11053,8 +11055,8 @@ static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_4(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_win_add_sub_4(r, g, k, map, ct, heap); @@ -11434,8 +11436,8 @@ static int sp_256_ecc_mulmod_stripe_avx2_4(sp_point_256* r, const sp_point_256* * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_256_ecc_mulmod_avx2_4(sp_point_256* r, const sp_point_256* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_256_ecc_mulmod_avx2_4(sp_point_256* r, const sp_point_256* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_256_ecc_mulmod_win_add_sub_avx2_4(r, g, k, map, ct, heap); @@ -26417,19 +26419,21 @@ static int sp_256_ecc_is_point_4(const sp_point_256* point, if (err == MP_OKAY) { t2 = t1 + 2 * 4; + /* y^2 - x^3 - a.x = b */ sp_256_sqr_4(t1, point->y); (void)sp_256_mod_4(t1, t1, p256_mod); sp_256_sqr_4(t2, point->x); (void)sp_256_mod_4(t2, t2, p256_mod); sp_256_mul_4(t2, t2, point->x); (void)sp_256_mod_4(t2, t2, p256_mod); - (void)sp_256_sub_4(t2, p256_mod, t2); - sp_256_mont_add_4(t1, t1, t2, p256_mod); + sp_256_mont_sub_4(t1, t1, t2, p256_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_256_mont_add_4(t1, t1, point->x, p256_mod); sp_256_mont_add_4(t1, t1, point->x, p256_mod); sp_256_mont_add_4(t1, t1, point->x, p256_mod); + if (sp_256_cmp_4(t1, p256_b) != 0) { err = MP_VAL; } @@ -27717,7 +27721,7 @@ extern void sp_384_mont_sub_6(sp_digit* r, const sp_digit* a, const sp_digit* b, #ifdef __cplusplus extern "C" { #endif -extern void sp_384_div2_6(sp_digit* r, const sp_digit* a, const sp_digit* m); +extern void sp_384_mont_div2_6(sp_digit* r, const sp_digit* a, const sp_digit* m); #ifdef __cplusplus } #endif @@ -27765,7 +27769,7 @@ static void sp_384_proj_point_dbl_6(sp_point_384* r, const sp_point_384* p, /* T2 = Y * Y */ sp_384_mont_sqr_6(t2, y, p384_mod, p384_mp_mod); /* T2 = T2/2 */ - sp_384_div2_6(t2, t2, p384_mod); + sp_384_mont_div2_6(t2, t2, p384_mod); /* Y = Y * X */ sp_384_mont_mul_6(y, y, p->x, p384_mod, p384_mp_mod); /* X = T1 * T1 */ @@ -27798,7 +27802,8 @@ typedef struct sp_384_proj_point_dbl_6_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_384_proj_point_dbl_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) +static int sp_384_proj_point_dbl_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, + const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_384_proj_point_dbl_6_ctx* ctx = (sp_384_proj_point_dbl_6_ctx*)sp_ctx->data; @@ -27872,7 +27877,7 @@ static int sp_384_proj_point_dbl_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, con break; case 11: /* T2 = T2/2 */ - sp_384_div2_6(ctx->t2, ctx->t2, p384_mod); + sp_384_mont_div2_6(ctx->t2, ctx->t2, p384_mod); ctx->state = 12; break; case 12: @@ -28010,7 +28015,7 @@ static void sp_384_proj_point_dbl_n_6(sp_point_384* p, int i, sp_384_mont_sub_6(y, y, t1, p384_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_384_div2_6(y, y, p384_mod); + sp_384_mont_div2_6(y, y, p384_mod); } /* Compare two numbers to determine if they are equal. @@ -28396,7 +28401,7 @@ static void sp_384_proj_point_dbl_n_store_6(sp_point_384* r, sp_384_mont_mul_6(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_6(y, y, t1, p384_mod); /* Y = Y/2 */ - sp_384_div2_6(r[j].y, y, p384_mod); + sp_384_mont_div2_6(r[j].y, y, p384_mod); r[j].infinity = 0; } } @@ -28913,7 +28918,7 @@ static void sp_384_map_avx2_6(sp_point_384* r, const sp_point_384* p, #ifdef __cplusplus extern "C" { #endif -extern void sp_384_div2_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit* m); +extern void sp_384_mont_div2_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit* m); #ifdef __cplusplus } #endif @@ -28961,7 +28966,7 @@ static void sp_384_proj_point_dbl_avx2_6(sp_point_384* r, const sp_point_384* p, /* T2 = Y * Y */ sp_384_mont_sqr_avx2_6(t2, y, p384_mod, p384_mp_mod); /* T2 = T2/2 */ - sp_384_div2_avx2_6(t2, t2, p384_mod); + sp_384_mont_div2_avx2_6(t2, t2, p384_mod); /* Y = Y * X */ sp_384_mont_mul_avx2_6(y, y, p->x, p384_mod, p384_mp_mod); /* X = T1 * T1 */ @@ -28994,7 +28999,8 @@ typedef struct sp_384_proj_point_dbl_avx2_6_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_384_proj_point_dbl_avx2_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t) +static int sp_384_proj_point_dbl_avx2_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, + const sp_point_384* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_384_proj_point_dbl_avx2_6_ctx* ctx = (sp_384_proj_point_dbl_avx2_6_ctx*)sp_ctx->data; @@ -29068,7 +29074,7 @@ static int sp_384_proj_point_dbl_avx2_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r break; case 11: /* T2 = T2/2 */ - sp_384_div2_avx2_6(ctx->t2, ctx->t2, p384_mod); + sp_384_mont_div2_avx2_6(ctx->t2, ctx->t2, p384_mod); ctx->state = 12; break; case 12: @@ -29206,7 +29212,7 @@ static void sp_384_proj_point_dbl_n_avx2_6(sp_point_384* p, int i, sp_384_mont_sub_avx2_6(y, y, t1, p384_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_384_div2_avx2_6(y, y, p384_mod); + sp_384_mont_div2_avx2_6(y, y, p384_mod); } @@ -29568,7 +29574,7 @@ static void sp_384_proj_point_dbl_n_store_avx2_6(sp_point_384* r, sp_384_mont_mul_avx2_6(y, b, a, p384_mod, p384_mp_mod); sp_384_mont_sub_avx2_6(y, y, t1, p384_mod); /* Y = Y/2 */ - sp_384_div2_avx2_6(r[j].y, y, p384_mod); + sp_384_mont_div2_avx2_6(r[j].y, y, p384_mod); r[j].infinity = 0; } } @@ -30237,8 +30243,8 @@ static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_6(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_win_add_sub_6(r, g, k, map, ct, heap); @@ -30621,8 +30627,8 @@ static int sp_384_ecc_mulmod_stripe_avx2_6(sp_point_384* r, const sp_point_384* * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_384_ecc_mulmod_avx2_6(sp_point_384* r, const sp_point_384* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_384_ecc_mulmod_avx2_6(sp_point_384* r, const sp_point_384* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_384_ecc_mulmod_win_add_sub_avx2_6(r, g, k, map, ct, heap); @@ -50861,7 +50867,7 @@ static int sp_384_mod_inv_6(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_384_cmp_6(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_384_cmp_6(u, v) >= 0))) { sp_384_sub_6(u, u, v); o = sp_384_sub_6(b, b, d); if (o != 0) @@ -51360,19 +51366,21 @@ static int sp_384_ecc_is_point_6(const sp_point_384* point, if (err == MP_OKAY) { t2 = t1 + 2 * 6; + /* y^2 - x^3 - a.x = b */ sp_384_sqr_6(t1, point->y); (void)sp_384_mod_6(t1, t1, p384_mod); sp_384_sqr_6(t2, point->x); (void)sp_384_mod_6(t2, t2, p384_mod); sp_384_mul_6(t2, t2, point->x); (void)sp_384_mod_6(t2, t2, p384_mod); - (void)sp_384_sub_6(t2, p384_mod, t2); - sp_384_mont_add_6(t1, t1, t2, p384_mod); + sp_384_mont_sub_6(t1, t1, t2, p384_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_384_mont_add_6(t1, t1, point->x, p384_mod); sp_384_mont_add_6(t1, t1, point->x, p384_mod); sp_384_mont_add_6(t1, t1, point->x, p384_mod); + if (sp_384_cmp_6(t1, p384_b) != 0) { err = MP_VAL; } @@ -52612,7 +52620,7 @@ extern void sp_521_mont_sub_9(sp_digit* r, const sp_digit* a, const sp_digit* b, #ifdef __cplusplus extern "C" { #endif -extern void sp_521_div2_9(sp_digit* r, const sp_digit* a, const sp_digit* m); +extern void sp_521_mont_div2_9(sp_digit* r, const sp_digit* a, const sp_digit* m); #ifdef __cplusplus } #endif @@ -52660,7 +52668,7 @@ static void sp_521_proj_point_dbl_9(sp_point_521* r, const sp_point_521* p, /* T2 = Y * Y */ sp_521_mont_sqr_9(t2, y, p521_mod, p521_mp_mod); /* T2 = T2/2 */ - sp_521_div2_9(t2, t2, p521_mod); + sp_521_mont_div2_9(t2, t2, p521_mod); /* Y = Y * X */ sp_521_mont_mul_9(y, y, p->x, p521_mod, p521_mp_mod); /* X = T1 * T1 */ @@ -52693,7 +52701,8 @@ typedef struct sp_521_proj_point_dbl_9_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) +static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_521_proj_point_dbl_9_ctx* ctx = (sp_521_proj_point_dbl_9_ctx*)sp_ctx->data; @@ -52767,7 +52776,7 @@ static int sp_521_proj_point_dbl_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, con break; case 11: /* T2 = T2/2 */ - sp_521_div2_9(ctx->t2, ctx->t2, p521_mod); + sp_521_mont_div2_9(ctx->t2, ctx->t2, p521_mod); ctx->state = 12; break; case 12: @@ -52905,7 +52914,7 @@ static void sp_521_proj_point_dbl_n_9(sp_point_521* p, int i, sp_521_mont_sub_9(y, y, t1, p521_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_521_div2_9(y, y, p521_mod); + sp_521_mont_div2_9(y, y, p521_mod); } /* Compare two numbers to determine if they are equal. @@ -53293,7 +53302,7 @@ static void sp_521_proj_point_dbl_n_store_9(sp_point_521* r, sp_521_mont_mul_9(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_9(y, y, t1, p521_mod); /* Y = Y/2 */ - sp_521_div2_9(r[j].y, y, p521_mod); + sp_521_mont_div2_9(r[j].y, y, p521_mod); r[j].infinity = 0; } } @@ -53787,7 +53796,7 @@ static void sp_521_map_avx2_9(sp_point_521* r, const sp_point_521* p, #ifdef __cplusplus extern "C" { #endif -extern void sp_521_div2_avx2_9(sp_digit* r, const sp_digit* a, const sp_digit* m); +extern void sp_521_mont_div2_avx2_9(sp_digit* r, const sp_digit* a, const sp_digit* m); #ifdef __cplusplus } #endif @@ -53835,7 +53844,7 @@ static void sp_521_proj_point_dbl_avx2_9(sp_point_521* r, const sp_point_521* p, /* T2 = Y * Y */ sp_521_mont_sqr_avx2_9(t2, y, p521_mod, p521_mp_mod); /* T2 = T2/2 */ - sp_521_div2_avx2_9(t2, t2, p521_mod); + sp_521_mont_div2_avx2_9(t2, t2, p521_mod); /* Y = Y * X */ sp_521_mont_mul_avx2_9(y, y, p->x, p521_mod, p521_mp_mod); /* X = T1 * T1 */ @@ -53868,7 +53877,8 @@ typedef struct sp_521_proj_point_dbl_avx2_9_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_521_proj_point_dbl_avx2_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, const sp_point_521* p, sp_digit* t) +static int sp_521_proj_point_dbl_avx2_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, + const sp_point_521* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_521_proj_point_dbl_avx2_9_ctx* ctx = (sp_521_proj_point_dbl_avx2_9_ctx*)sp_ctx->data; @@ -53942,7 +53952,7 @@ static int sp_521_proj_point_dbl_avx2_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r break; case 11: /* T2 = T2/2 */ - sp_521_div2_avx2_9(ctx->t2, ctx->t2, p521_mod); + sp_521_mont_div2_avx2_9(ctx->t2, ctx->t2, p521_mod); ctx->state = 12; break; case 12: @@ -54080,7 +54090,7 @@ static void sp_521_proj_point_dbl_n_avx2_9(sp_point_521* p, int i, sp_521_mont_sub_avx2_9(y, y, t1, p521_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_521_div2_avx2_9(y, y, p521_mod); + sp_521_mont_div2_avx2_9(y, y, p521_mod); } @@ -54442,7 +54452,7 @@ static void sp_521_proj_point_dbl_n_store_avx2_9(sp_point_521* r, sp_521_mont_mul_avx2_9(y, b, a, p521_mod, p521_mp_mod); sp_521_mont_sub_avx2_9(y, y, t1, p521_mod); /* Y = Y/2 */ - sp_521_div2_avx2_9(r[j].y, y, p521_mod); + sp_521_mont_div2_avx2_9(r[j].y, y, p521_mod); r[j].infinity = 0; } } @@ -55111,8 +55121,8 @@ static void sp_ecc_get_cache_521(const sp_point_521* g, sp_cache_521_t** cache) * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_521_ecc_mulmod_9(sp_point_521* r, const sp_point_521* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_521_ecc_mulmod_9(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_521_ecc_mulmod_win_add_sub_9(r, g, k, map, ct, heap); @@ -55495,8 +55505,8 @@ static int sp_521_ecc_mulmod_stripe_avx2_9(sp_point_521* r, const sp_point_521* * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_521_ecc_mulmod_avx2_9(sp_point_521* r, const sp_point_521* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_521_ecc_mulmod_avx2_9(sp_point_521* r, const sp_point_521* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_521_ecc_mulmod_win_add_sub_avx2_9(r, g, k, map, ct, heap); @@ -91985,7 +91995,7 @@ static int sp_521_mod_inv_9(sp_digit* r, const sp_digit* a, const sp_digit* m) } while (ut > 1 && vt > 1) { - if (ut > vt || (ut == vt && sp_521_cmp_9(u, v) >= 0)) { + if ((ut > vt) || ((ut == vt) && (sp_521_cmp_9(u, v) >= 0))) { sp_521_sub_9(u, u, v); o = sp_521_sub_9(b, b, d); if (o != 0) @@ -92494,19 +92504,21 @@ static int sp_521_ecc_is_point_9(const sp_point_521* point, if (err == MP_OKAY) { t2 = t1 + 2 * 9; + /* y^2 - x^3 - a.x = b */ sp_521_sqr_9(t1, point->y); (void)sp_521_mod_9(t1, t1, p521_mod); sp_521_sqr_9(t2, point->x); (void)sp_521_mod_9(t2, t2, p521_mod); sp_521_mul_9(t2, t2, point->x); (void)sp_521_mod_9(t2, t2, p521_mod); - (void)sp_521_sub_9(t2, p521_mod, t2); - sp_521_mont_add_9(t1, t1, t2, p521_mod); + sp_521_mont_sub_9(t1, t1, t2, p521_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_521_mont_add_9(t1, t1, point->x, p521_mod); sp_521_mont_add_9(t1, t1, point->x, p521_mod); sp_521_mont_add_9(t1, t1, point->x, p521_mod); + if (sp_521_cmp_9(t1, p521_b) != 0) { err = MP_VAL; } @@ -93862,7 +93874,7 @@ extern void sp_1024_mont_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* #ifdef __cplusplus extern "C" { #endif -extern void sp_1024_div2_16(sp_digit* r, const sp_digit* a, const sp_digit* m); +extern void sp_1024_mont_div2_16(sp_digit* r, const sp_digit* a, const sp_digit* m); #ifdef __cplusplus } #endif @@ -93910,7 +93922,7 @@ static void sp_1024_proj_point_dbl_16(sp_point_1024* r, const sp_point_1024* p, /* T2 = Y * Y */ sp_1024_mont_sqr_16(t2, y, p1024_mod, p1024_mp_mod); /* T2 = T2/2 */ - sp_1024_div2_16(t2, t2, p1024_mod); + sp_1024_mont_div2_16(t2, t2, p1024_mod); /* Y = Y * X */ sp_1024_mont_mul_16(y, y, p->x, p1024_mod, p1024_mp_mod); /* X = T1 * T1 */ @@ -93943,7 +93955,8 @@ typedef struct sp_1024_proj_point_dbl_16_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_1024_proj_point_dbl_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) +static int sp_1024_proj_point_dbl_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, + const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_1024_proj_point_dbl_16_ctx* ctx = (sp_1024_proj_point_dbl_16_ctx*)sp_ctx->data; @@ -94017,7 +94030,7 @@ static int sp_1024_proj_point_dbl_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, break; case 11: /* T2 = T2/2 */ - sp_1024_div2_16(ctx->t2, ctx->t2, p1024_mod); + sp_1024_mont_div2_16(ctx->t2, ctx->t2, p1024_mod); ctx->state = 12; break; case 12: @@ -94155,16 +94168,9 @@ static void sp_1024_proj_point_dbl_n_16(sp_point_1024* p, int i, sp_1024_mont_sub_16(y, y, t1, p1024_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_1024_div2_16(y, y, p1024_mod); + sp_1024_mont_div2_16(y, y, p1024_mod); } -#ifdef __cplusplus -extern "C" { -#endif -extern sp_digit sp_1024_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b); -#ifdef __cplusplus -} -#endif /* Compare two numbers to determine if they are equal. * Constant time implementation. * @@ -94553,7 +94559,7 @@ static void sp_1024_proj_point_dbl_n_store_16(sp_point_1024* r, sp_1024_mont_mul_16(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_16(y, y, t1, p1024_mod); /* Y = Y/2 */ - sp_1024_div2_16(r[j].y, y, p1024_mod); + sp_1024_mont_div2_16(r[j].y, y, p1024_mod); r[j].infinity = 0; } } @@ -95017,7 +95023,7 @@ extern void sp_1024_mont_sub_avx2_16(sp_digit* r, const sp_digit* a, const sp_di #ifdef __cplusplus extern "C" { #endif -extern void sp_1024_div2_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* m); +extern void sp_1024_mont_div2_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* m); #ifdef __cplusplus } #endif @@ -95065,7 +95071,7 @@ static void sp_1024_proj_point_dbl_avx2_16(sp_point_1024* r, const sp_point_1024 /* T2 = Y * Y */ sp_1024_mont_sqr_avx2_16(t2, y, p1024_mod, p1024_mp_mod); /* T2 = T2/2 */ - sp_1024_div2_avx2_16(t2, t2, p1024_mod); + sp_1024_mont_div2_avx2_16(t2, t2, p1024_mod); /* Y = Y * X */ sp_1024_mont_mul_avx2_16(y, y, p->x, p1024_mod, p1024_mp_mod); /* X = T1 * T1 */ @@ -95098,7 +95104,8 @@ typedef struct sp_1024_proj_point_dbl_avx2_16_ctx { * p Point to double. * t Temporary ordinate data. */ -static int sp_1024_proj_point_dbl_avx2_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t) +static int sp_1024_proj_point_dbl_avx2_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, + const sp_point_1024* p, sp_digit* t) { int err = FP_WOULDBLOCK; sp_1024_proj_point_dbl_avx2_16_ctx* ctx = (sp_1024_proj_point_dbl_avx2_16_ctx*)sp_ctx->data; @@ -95172,7 +95179,7 @@ static int sp_1024_proj_point_dbl_avx2_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024 break; case 11: /* T2 = T2/2 */ - sp_1024_div2_avx2_16(ctx->t2, ctx->t2, p1024_mod); + sp_1024_mont_div2_avx2_16(ctx->t2, ctx->t2, p1024_mod); ctx->state = 12; break; case 12: @@ -95310,7 +95317,7 @@ static void sp_1024_proj_point_dbl_n_avx2_16(sp_point_1024* p, int i, sp_1024_mont_sub_avx2_16(y, y, t1, p1024_mod); #endif /* WOLFSSL_SP_SMALL */ /* Y = Y/2 */ - sp_1024_div2_avx2_16(y, y, p1024_mod); + sp_1024_mont_div2_avx2_16(y, y, p1024_mod); } @@ -95672,7 +95679,7 @@ static void sp_1024_proj_point_dbl_n_store_avx2_16(sp_point_1024* r, sp_1024_mont_mul_avx2_16(y, b, a, p1024_mod, p1024_mp_mod); sp_1024_mont_sub_avx2_16(y, y, t1, p1024_mod); /* Y = Y/2 */ - sp_1024_div2_avx2_16(r[j].y, y, p1024_mod); + sp_1024_mont_div2_avx2_16(r[j].y, y, p1024_mod); r[j].infinity = 0; } } @@ -96314,8 +96321,8 @@ static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cach * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_16(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_16(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_win_add_sub_16(r, g, k, map, ct, heap); @@ -96681,8 +96688,8 @@ static int sp_1024_ecc_mulmod_stripe_avx2_16(sp_point_1024* r, const sp_point_10 * heap Heap to use for allocation. * returns MEMORY_E when memory allocation fails and MP_OKAY on success. */ -static int sp_1024_ecc_mulmod_avx2_16(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k, - int map, int ct, void* heap) +static int sp_1024_ecc_mulmod_avx2_16(sp_point_1024* r, const sp_point_1024* g, + const sp_digit* k, int map, int ct, void* heap) { #ifndef FP_ECC return sp_1024_ecc_mulmod_win_add_sub_avx2_16(r, g, k, map, ct, heap); @@ -102699,7 +102706,7 @@ static void sp_1024_accumulate_line_dbl_16(sp_digit* vx, sp_digit* vy, /* ty = 4 * p.y ^ 2 */ sp_1024_mont_sqr_16(ty, ry, p1024_mod, p1024_mp_mod); /* t1 = 2 * p.y ^ 2 */ - sp_1024_div2_16(t1, ty, p1024_mod); + sp_1024_mont_div2_16(t1, ty, p1024_mod); /* r.x -= 2 * (p.y ^ 2) */ sp_1024_mont_sub_16(rx, rx, t1, p1024_mod); /* p'.z = p.y * 2 * p.z */ @@ -102719,7 +102726,7 @@ static void sp_1024_accumulate_line_dbl_16(sp_digit* vx, sp_digit* vy, /* t1 = (4 * p.y^2) ^ 2 = 16 * p.y^4 */ sp_1024_mont_sqr_16(t1, ty, p1024_mod, p1024_mp_mod); /* t1 = 16 * p.y^4 / 2 = 8 * p.y^4 */ - sp_1024_div2_16(t1, t1, p1024_mod); + sp_1024_mont_div2_16(t1, t1, p1024_mod); /* p'.y = 4 * p.y^2 * p.x */ sp_1024_mont_mul_16(p->y, ty, p->x, p1024_mod, p1024_mp_mod); /* p'.x = l^2 */ @@ -103137,7 +103144,7 @@ static void sp_1024_accumulate_line_dbl_n_16(sp_digit* vx, sp_digit* vy, /* ty = py ^ 2 */ sp_1024_mont_sqr_16(ty, p->y, p1024_mod, p1024_mp_mod); /* t1 = py ^ 2 / 2 */ - sp_1024_div2_16(t1, ty, p1024_mod); + sp_1024_mont_div2_16(t1, ty, p1024_mod); /* r.x -= py ^ 2 / 2 */ sp_1024_mont_sub_16(rx, rx, t1, p1024_mod); /* p'.z = py * pz */ @@ -103175,7 +103182,7 @@ static void sp_1024_accumulate_line_dbl_n_16(sp_digit* vx, sp_digit* vy, } /* p'.y = py' / 2 */ - sp_1024_div2_16(p->y, p->y, p1024_mod); + sp_1024_mont_div2_16(p->y, p->y, p1024_mod); } /* Operations to perform based on order - 1. @@ -103500,7 +103507,7 @@ static void sp_1024_accumulate_line_dbl_avx2_16(sp_digit* vx, sp_digit* vy, /* ty = 4 * p.y ^ 2 */ sp_1024_mont_sqr_avx2_16(ty, ry, p1024_mod, p1024_mp_mod); /* t1 = 2 * p.y ^ 2 */ - sp_1024_div2_avx2_16(t1, ty, p1024_mod); + sp_1024_mont_div2_avx2_16(t1, ty, p1024_mod); /* r.x -= 2 * (p.y ^ 2) */ sp_1024_mont_sub_avx2_16(rx, rx, t1, p1024_mod); /* p'.z = p.y * 2 * p.z */ @@ -103520,7 +103527,7 @@ static void sp_1024_accumulate_line_dbl_avx2_16(sp_digit* vx, sp_digit* vy, /* t1 = (4 * p.y^2) ^ 2 = 16 * p.y^4 */ sp_1024_mont_sqr_avx2_16(t1, ty, p1024_mod, p1024_mp_mod); /* t1 = 16 * p.y^4 / 2 = 8 * p.y^4 */ - sp_1024_div2_avx2_16(t1, t1, p1024_mod); + sp_1024_mont_div2_avx2_16(t1, t1, p1024_mod); /* p'.y = 4 * p.y^2 * p.x */ sp_1024_mont_mul_avx2_16(p->y, ty, p->x, p1024_mod, p1024_mp_mod); /* p'.x = l^2 */ @@ -103938,7 +103945,7 @@ static void sp_1024_accumulate_line_dbl_n_avx2_16(sp_digit* vx, sp_digit* vy, /* ty = py ^ 2 */ sp_1024_mont_sqr_avx2_16(ty, p->y, p1024_mod, p1024_mp_mod); /* t1 = py ^ 2 / 2 */ - sp_1024_div2_avx2_16(t1, ty, p1024_mod); + sp_1024_mont_div2_avx2_16(t1, ty, p1024_mod); /* r.x -= py ^ 2 / 2 */ sp_1024_mont_sub_avx2_16(rx, rx, t1, p1024_mod); /* p'.z = py * pz */ @@ -103976,7 +103983,7 @@ static void sp_1024_accumulate_line_dbl_n_avx2_16(sp_digit* vx, sp_digit* vy, } /* p'.y = py' / 2 */ - sp_1024_div2_avx2_16(p->y, p->y, p1024_mod); + sp_1024_mont_div2_avx2_16(p->y, p->y, p1024_mod); } /* @@ -105429,19 +105436,21 @@ static int sp_1024_ecc_is_point_16(const sp_point_1024* point, if (err == MP_OKAY) { t2 = t1 + 2 * 16; + /* y^2 - x^3 - a.x = b */ sp_1024_sqr_16(t1, point->y); (void)sp_1024_mod_16(t1, t1, p1024_mod); sp_1024_sqr_16(t2, point->x); (void)sp_1024_mod_16(t2, t2, p1024_mod); sp_1024_mul_16(t2, t2, point->x); (void)sp_1024_mod_16(t2, t2, p1024_mod); - (void)sp_1024_sub_16(t2, p1024_mod, t2); - sp_1024_mont_add_16(t1, t1, t2, p1024_mod); + sp_1024_mont_sub_16(t1, t1, t2, p1024_mod); + /* y^2 - x^3 + 3.x = b, when a = -3 */ sp_1024_mont_add_16(t1, t1, point->x, p1024_mod); sp_1024_mont_add_16(t1, t1, point->x, p1024_mod); sp_1024_mont_add_16(t1, t1, point->x, p1024_mod); + n = sp_1024_cmp_16(t1, p1024_mod); sp_1024_cond_sub_16(t1, t1, p1024_mod, ~(n >> 63)); sp_1024_norm_16(t1); diff --git a/wolfcrypt/src/sp_x86_64_asm.S b/wolfcrypt/src/sp_x86_64_asm.S index 310f4611d..4c423bc82 100644 --- a/wolfcrypt/src/sp_x86_64_asm.S +++ b/wolfcrypt/src/sp_x86_64_asm.S @@ -56743,26 +56743,25 @@ _sp_256_mont_sub_4: */ #ifndef __APPLE__ .text -.globl sp_256_div2_4 -.type sp_256_div2_4,@function +.globl sp_256_mont_div2_4 +.type sp_256_mont_div2_4,@function .align 16 -sp_256_div2_4: +sp_256_mont_div2_4: #else .section __TEXT,__text -.globl _sp_256_div2_4 +.globl _sp_256_mont_div2_4 .p2align 4 -_sp_256_div2_4: +_sp_256_mont_div2_4: #endif /* __APPLE__ */ movq (%rsi), %rdx movq 8(%rsi), %rax movq 16(%rsi), %rcx movq 24(%rsi), %r8 - movq $0xffffffff, %r9 movq $0xffffffff00000001, %r10 movq %rdx, %r11 andq $0x01, %r11 negq %r11 - andq %r11, %r9 + movl %r11d, %r9d andq %r11, %r10 addq %r11, %rdx adcq %r9, %rax @@ -56780,7 +56779,7 @@ _sp_256_div2_4: movq %r8, 24(%rdi) repz retq #ifndef __APPLE__ -.size sp_256_div2_4,.-sp_256_div2_4 +.size sp_256_mont_div2_4,.-sp_256_mont_div2_4 #endif /* __APPLE__ */ /* Two Montgomery numbers, subtract double second from first (r = a - 2.b % m). * @@ -57424,15 +57423,15 @@ _sp_256_cond_sub_avx2_4: */ #ifndef __APPLE__ .text -.globl sp_256_mont_reduce_avx2_order_4 -.type sp_256_mont_reduce_avx2_order_4,@function +.globl sp_256_mont_reduce_order_avx2_4 +.type sp_256_mont_reduce_order_avx2_4,@function .align 16 -sp_256_mont_reduce_avx2_order_4: +sp_256_mont_reduce_order_avx2_4: #else .section __TEXT,__text -.globl _sp_256_mont_reduce_avx2_order_4 +.globl _sp_256_mont_reduce_order_avx2_4 .p2align 4 -_sp_256_mont_reduce_avx2_order_4: +_sp_256_mont_reduce_order_avx2_4: #endif /* __APPLE__ */ pushq %r12 pushq %r13 @@ -57576,7 +57575,7 @@ _sp_256_mont_reduce_avx2_order_4: popq %r12 repz retq #ifndef __APPLE__ -.size sp_256_mont_reduce_avx2_order_4,.-sp_256_mont_reduce_avx2_order_4 +.size sp_256_mont_reduce_order_avx2_4,.-sp_256_mont_reduce_order_avx2_4 #endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ #ifdef HAVE_INTEL_AVX2 @@ -57588,26 +57587,25 @@ _sp_256_mont_reduce_avx2_order_4: */ #ifndef __APPLE__ .text -.globl sp_256_div2_avx2_4 -.type sp_256_div2_avx2_4,@function +.globl sp_256_mont_div2_avx2_4 +.type sp_256_mont_div2_avx2_4,@function .align 16 -sp_256_div2_avx2_4: +sp_256_mont_div2_avx2_4: #else .section __TEXT,__text -.globl _sp_256_div2_avx2_4 +.globl _sp_256_mont_div2_avx2_4 .p2align 4 -_sp_256_div2_avx2_4: +_sp_256_mont_div2_avx2_4: #endif /* __APPLE__ */ movq (%rsi), %rdx movq 8(%rsi), %rax movq 16(%rsi), %rcx movq 24(%rsi), %r8 - movq $0xffffffff, %r9 movq $0xffffffff00000001, %r10 movq %rdx, %r11 andq $0x01, %r11 negq %r11 - andq %r11, %r9 + movl %r11d, %r9d andq %r11, %r10 addq %r11, %rdx adcq %r9, %rax @@ -57625,7 +57623,7 @@ _sp_256_div2_avx2_4: movq %r8, 24(%rdi) repz retq #ifndef __APPLE__ -.size sp_256_div2_avx2_4,.-sp_256_div2_avx2_4 +.size sp_256_mont_div2_avx2_4,.-sp_256_mont_div2_avx2_4 #endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ #ifndef WC_NO_CACHE_RESISTANT @@ -61092,15 +61090,15 @@ _sp_384_mont_sub_6: */ #ifndef __APPLE__ .text -.globl sp_384_div2_6 -.type sp_384_div2_6,@function +.globl sp_384_mont_div2_6 +.type sp_384_mont_div2_6,@function .align 16 -sp_384_div2_6: +sp_384_mont_div2_6: #else .section __TEXT,__text -.globl _sp_384_div2_6 +.globl _sp_384_mont_div2_6 .p2align 4 -_sp_384_div2_6: +_sp_384_mont_div2_6: #endif /* __APPLE__ */ subq $48, %rsp movq (%rsi), %r11 @@ -61159,7 +61157,7 @@ _sp_384_div2_6: addq $48, %rsp repz retq #ifndef __APPLE__ -.size sp_384_div2_6,.-sp_384_div2_6 +.size sp_384_mont_div2_6,.-sp_384_mont_div2_6 #endif /* __APPLE__ */ #ifndef WC_NO_CACHE_RESISTANT /* Touch each possible point that could be being copied. @@ -61727,15 +61725,15 @@ _sp_384_cond_sub_avx2_6: */ #ifndef __APPLE__ .text -.globl sp_384_div2_avx2_6 -.type sp_384_div2_avx2_6,@function +.globl sp_384_mont_div2_avx2_6 +.type sp_384_mont_div2_avx2_6,@function .align 16 -sp_384_div2_avx2_6: +sp_384_mont_div2_avx2_6: #else .section __TEXT,__text -.globl _sp_384_div2_avx2_6 +.globl _sp_384_mont_div2_avx2_6 .p2align 4 -_sp_384_div2_avx2_6: +_sp_384_mont_div2_avx2_6: #endif /* __APPLE__ */ movq (%rsi), %r11 xorq %r10, %r10 @@ -61793,7 +61791,7 @@ _sp_384_div2_avx2_6: movq %r9, 40(%rdi) repz retq #ifndef __APPLE__ -.size sp_384_div2_avx2_6,.-sp_384_div2_avx2_6 +.size sp_384_mont_div2_avx2_6,.-sp_384_mont_div2_avx2_6 #endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ #ifndef WC_NO_CACHE_RESISTANT @@ -66645,15 +66643,15 @@ _sp_521_mont_sub_9: */ #ifndef __APPLE__ .text -.globl sp_521_div2_9 -.type sp_521_div2_9,@function +.globl sp_521_mont_div2_9 +.type sp_521_mont_div2_9,@function .align 16 -sp_521_div2_9: +sp_521_mont_div2_9: #else .section __TEXT,__text -.globl _sp_521_div2_9 +.globl _sp_521_mont_div2_9 .p2align 4 -_sp_521_div2_9: +_sp_521_mont_div2_9: #endif /* __APPLE__ */ pushq %r12 pushq %r13 @@ -66703,7 +66701,7 @@ _sp_521_div2_9: popq %r12 repz retq #ifndef __APPLE__ -.size sp_521_div2_9,.-sp_521_div2_9 +.size sp_521_mont_div2_9,.-sp_521_mont_div2_9 #endif /* __APPLE__ */ #ifndef WC_NO_CACHE_RESISTANT /* Touch each possible point that could be being copied. @@ -68411,15 +68409,15 @@ L_521_mont_reduce_order_avx2_9_loop: */ #ifndef __APPLE__ .text -.globl sp_521_div2_avx2_9 -.type sp_521_div2_avx2_9,@function +.globl sp_521_mont_div2_avx2_9 +.type sp_521_mont_div2_avx2_9,@function .align 16 -sp_521_div2_avx2_9: +sp_521_mont_div2_avx2_9: #else .section __TEXT,__text -.globl _sp_521_div2_avx2_9 +.globl _sp_521_mont_div2_avx2_9 .p2align 4 -_sp_521_div2_avx2_9: +_sp_521_mont_div2_avx2_9: #endif /* __APPLE__ */ pushq %r12 pushq %r13 @@ -68469,7 +68467,7 @@ _sp_521_div2_avx2_9: popq %r12 repz retq #ifndef __APPLE__ -.size sp_521_div2_avx2_9,.-sp_521_div2_avx2_9 +.size sp_521_mont_div2_avx2_9,.-sp_521_mont_div2_avx2_9 #endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ #ifndef WC_NO_CACHE_RESISTANT @@ -77322,15 +77320,15 @@ _sp_1024_mont_sub_16: */ #ifndef __APPLE__ .text -.globl sp_1024_div2_16 -.type sp_1024_div2_16,@function +.globl sp_1024_mont_div2_16 +.type sp_1024_mont_div2_16,@function .align 16 -sp_1024_div2_16: +sp_1024_mont_div2_16: #else .section __TEXT,__text -.globl _sp_1024_div2_16 +.globl _sp_1024_mont_div2_16 .p2align 4 -_sp_1024_div2_16: +_sp_1024_mont_div2_16: #endif /* __APPLE__ */ subq $0x80, %rsp movq (%rsi), %r11 @@ -77469,78 +77467,7 @@ _sp_1024_div2_16: addq $0x80, %rsp repz retq #ifndef __APPLE__ -.size sp_1024_div2_16,.-sp_1024_div2_16 -#endif /* __APPLE__ */ -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -#ifndef __APPLE__ -.text -.globl sp_1024_sub_16 -.type sp_1024_sub_16,@function -.align 16 -sp_1024_sub_16: -#else -.section __TEXT,__text -.globl _sp_1024_sub_16 -.p2align 4 -_sp_1024_sub_16: -#endif /* __APPLE__ */ - movq (%rsi), %rcx - subq (%rdx), %rcx - movq 8(%rsi), %r8 - movq %rcx, (%rdi) - sbbq 8(%rdx), %r8 - movq 16(%rsi), %rcx - movq %r8, 8(%rdi) - sbbq 16(%rdx), %rcx - movq 24(%rsi), %r8 - movq %rcx, 16(%rdi) - sbbq 24(%rdx), %r8 - movq 32(%rsi), %rcx - movq %r8, 24(%rdi) - sbbq 32(%rdx), %rcx - movq 40(%rsi), %r8 - movq %rcx, 32(%rdi) - sbbq 40(%rdx), %r8 - movq 48(%rsi), %rcx - movq %r8, 40(%rdi) - sbbq 48(%rdx), %rcx - movq 56(%rsi), %r8 - movq %rcx, 48(%rdi) - sbbq 56(%rdx), %r8 - movq 64(%rsi), %rcx - movq %r8, 56(%rdi) - sbbq 64(%rdx), %rcx - movq 72(%rsi), %r8 - movq %rcx, 64(%rdi) - sbbq 72(%rdx), %r8 - movq 80(%rsi), %rcx - movq %r8, 72(%rdi) - sbbq 80(%rdx), %rcx - movq 88(%rsi), %r8 - movq %rcx, 80(%rdi) - sbbq 88(%rdx), %r8 - movq 96(%rsi), %rcx - movq %r8, 88(%rdi) - sbbq 96(%rdx), %rcx - movq 104(%rsi), %r8 - movq %rcx, 96(%rdi) - sbbq 104(%rdx), %r8 - movq 112(%rsi), %rcx - movq %r8, 104(%rdi) - sbbq 112(%rdx), %rcx - movq 120(%rsi), %r8 - movq %rcx, 112(%rdi) - sbbq 120(%rdx), %r8 - movq %r8, 120(%rdi) - sbbq %rax, %rax - repz retq -#ifndef __APPLE__ -.size sp_1024_sub_16,.-sp_1024_sub_16 +.size sp_1024_mont_div2_16,.-sp_1024_mont_div2_16 #endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX2 /* Reduce the number back to 1024 bits using Montgomery reduction. @@ -78657,15 +78584,15 @@ _sp_1024_mont_sub_avx2_16: */ #ifndef __APPLE__ .text -.globl sp_1024_div2_avx2_16 -.type sp_1024_div2_avx2_16,@function +.globl sp_1024_mont_div2_avx2_16 +.type sp_1024_mont_div2_avx2_16,@function .align 16 -sp_1024_div2_avx2_16: +sp_1024_mont_div2_avx2_16: #else .section __TEXT,__text -.globl _sp_1024_div2_avx2_16 +.globl _sp_1024_mont_div2_avx2_16 .p2align 4 -_sp_1024_div2_avx2_16: +_sp_1024_mont_div2_avx2_16: #endif /* __APPLE__ */ movq (%rsi), %r11 xorq %r10, %r10 @@ -78803,7 +78730,7 @@ _sp_1024_div2_avx2_16: movq %r9, 120(%rdi) repz retq #ifndef __APPLE__ -.size sp_1024_div2_avx2_16,.-sp_1024_div2_avx2_16 +.size sp_1024_mont_div2_avx2_16,.-sp_1024_mont_div2_avx2_16 #endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ /* Read big endian unsigned byte array into r. diff --git a/wolfcrypt/src/sp_x86_64_asm.asm b/wolfcrypt/src/sp_x86_64_asm.asm index 5bb8faa21..9e7523d7b 100644 --- a/wolfcrypt/src/sp_x86_64_asm.asm +++ b/wolfcrypt/src/sp_x86_64_asm.asm @@ -55583,19 +55583,18 @@ _text ENDS ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA -sp_256_div2_4 PROC +sp_256_mont_div2_4 PROC push r12 push r13 mov rax, QWORD PTR [rdx] mov r8, QWORD PTR [rdx+8] mov r9, QWORD PTR [rdx+16] mov r10, QWORD PTR [rdx+24] - mov r11, 4294967295 mov r12, 18446744069414584321 mov r13, rax and r13, 1 neg r13 - and r11, r13 + mov r11d, r13d and r12, r13 add rax, r13 adc r8, r11 @@ -55614,7 +55613,7 @@ sp_256_div2_4 PROC pop r13 pop r12 ret -sp_256_div2_4 ENDP +sp_256_mont_div2_4 ENDP _text ENDS ; /* Two Montgomery numbers, subtract double second from first (r = a - 2.b % m). ; * @@ -56241,7 +56240,7 @@ IFDEF HAVE_INTEL_AVX2 ; * mp The digit representing the negative inverse of m mod 2^n. ; */ _text SEGMENT READONLY PARA -sp_256_mont_reduce_avx2_order_4 PROC +sp_256_mont_reduce_order_avx2_4 PROC push r12 push r13 push r14 @@ -56389,7 +56388,7 @@ sp_256_mont_reduce_avx2_order_4 PROC pop r13 pop r12 ret -sp_256_mont_reduce_avx2_order_4 ENDP +sp_256_mont_reduce_order_avx2_4 ENDP _text ENDS ENDIF IFDEF HAVE_INTEL_AVX2 @@ -56400,19 +56399,18 @@ IFDEF HAVE_INTEL_AVX2 ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA -sp_256_div2_avx2_4 PROC +sp_256_mont_div2_avx2_4 PROC push r12 push r13 mov rax, QWORD PTR [rdx] mov r8, QWORD PTR [rdx+8] mov r9, QWORD PTR [rdx+16] mov r10, QWORD PTR [rdx+24] - mov r11, 4294967295 mov r12, 18446744069414584321 mov r13, rax and r13, 1 neg r13 - and r11, r13 + mov r11d, r13d and r12, r13 add rax, r13 adc r8, r11 @@ -56431,7 +56429,7 @@ sp_256_div2_avx2_4 PROC pop r13 pop r12 ret -sp_256_div2_avx2_4 ENDP +sp_256_mont_div2_avx2_4 ENDP _text ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT @@ -59663,7 +59661,7 @@ _text ENDS ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA -sp_384_div2_6 PROC +sp_384_mont_div2_6 PROC push r12 push r13 sub rsp, 48 @@ -59724,7 +59722,7 @@ sp_384_div2_6 PROC pop r13 pop r12 ret -sp_384_div2_6 ENDP +sp_384_mont_div2_6 ENDP _text ENDS IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible point that could be being copied. @@ -60297,7 +60295,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA -sp_384_div2_avx2_6 PROC +sp_384_mont_div2_avx2_6 PROC push r12 push r13 mov r13, QWORD PTR [rdx] @@ -60357,7 +60355,7 @@ sp_384_div2_avx2_6 PROC pop r13 pop r12 ret -sp_384_div2_avx2_6 ENDP +sp_384_mont_div2_avx2_6 ENDP _text ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT @@ -64989,7 +64987,7 @@ _text ENDS ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA -sp_521_div2_9 PROC +sp_521_mont_div2_9 PROC push r12 push r13 push r14 @@ -65041,7 +65039,7 @@ sp_521_div2_9 PROC pop r13 pop r12 ret -sp_521_div2_9 ENDP +sp_521_mont_div2_9 ENDP _text ENDS IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible point that could be being copied. @@ -66753,7 +66751,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA -sp_521_div2_avx2_9 PROC +sp_521_mont_div2_avx2_9 PROC push r12 push r13 push r14 @@ -66805,7 +66803,7 @@ sp_521_div2_avx2_9 PROC pop r13 pop r12 ret -sp_521_div2_avx2_9 ENDP +sp_521_mont_div2_avx2_9 ENDP _text ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT @@ -75404,7 +75402,7 @@ _text ENDS ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA -sp_1024_div2_16 PROC +sp_1024_mont_div2_16 PROC push r12 push r13 sub rsp, 128 @@ -75545,67 +75543,7 @@ sp_1024_div2_16 PROC pop r13 pop r12 ret -sp_1024_div2_16 ENDP -_text ENDS -; /* Sub b from a into r. (r = a - b) -; * -; * r A single precision integer. -; * a A single precision integer. -; * b A single precision integer. -; */ -_text SEGMENT READONLY PARA -sp_1024_sub_16 PROC - mov r9, QWORD PTR [rdx] - sub r9, QWORD PTR [r8] - mov r10, QWORD PTR [rdx+8] - mov QWORD PTR [rcx], r9 - sbb r10, QWORD PTR [r8+8] - mov r9, QWORD PTR [rdx+16] - mov QWORD PTR [rcx+8], r10 - sbb r9, QWORD PTR [r8+16] - mov r10, QWORD PTR [rdx+24] - mov QWORD PTR [rcx+16], r9 - sbb r10, QWORD PTR [r8+24] - mov r9, QWORD PTR [rdx+32] - mov QWORD PTR [rcx+24], r10 - sbb r9, QWORD PTR [r8+32] - mov r10, QWORD PTR [rdx+40] - mov QWORD PTR [rcx+32], r9 - sbb r10, QWORD PTR [r8+40] - mov r9, QWORD PTR [rdx+48] - mov QWORD PTR [rcx+40], r10 - sbb r9, QWORD PTR [r8+48] - mov r10, QWORD PTR [rdx+56] - mov QWORD PTR [rcx+48], r9 - sbb r10, QWORD PTR [r8+56] - mov r9, QWORD PTR [rdx+64] - mov QWORD PTR [rcx+56], r10 - sbb r9, QWORD PTR [r8+64] - mov r10, QWORD PTR [rdx+72] - mov QWORD PTR [rcx+64], r9 - sbb r10, QWORD PTR [r8+72] - mov r9, QWORD PTR [rdx+80] - mov QWORD PTR [rcx+72], r10 - sbb r9, QWORD PTR [r8+80] - mov r10, QWORD PTR [rdx+88] - mov QWORD PTR [rcx+80], r9 - sbb r10, QWORD PTR [r8+88] - mov r9, QWORD PTR [rdx+96] - mov QWORD PTR [rcx+88], r10 - sbb r9, QWORD PTR [r8+96] - mov r10, QWORD PTR [rdx+104] - mov QWORD PTR [rcx+96], r9 - sbb r10, QWORD PTR [r8+104] - mov r9, QWORD PTR [rdx+112] - mov QWORD PTR [rcx+104], r10 - sbb r9, QWORD PTR [r8+112] - mov r10, QWORD PTR [rdx+120] - mov QWORD PTR [rcx+112], r9 - sbb r10, QWORD PTR [r8+120] - mov QWORD PTR [rcx+120], r10 - sbb rax, rax - ret -sp_1024_sub_16 ENDP +sp_1024_mont_div2_16 ENDP _text ENDS IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 1024 bits using Montgomery reduction. @@ -76683,7 +76621,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA -sp_1024_div2_avx2_16 PROC +sp_1024_mont_div2_avx2_16 PROC push r12 push r13 mov r13, QWORD PTR [rdx] @@ -76823,7 +76761,7 @@ sp_1024_div2_avx2_16 PROC pop r13 pop r12 ret -sp_1024_div2_avx2_16 ENDP +sp_1024_mont_div2_avx2_16 ENDP _text ENDS ENDIF ; /* Read big endian unsigned byte array into r. diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index 143681c78..75625b724 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -24307,7 +24307,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t hpke_test(void) /* size to use for ECC key gen tests */ #ifndef ECC_KEYGEN_SIZE - #ifndef NO_ECC256 + #if !defined(NO_ECC256) || defined(WOLFSSL_SM2) #define ECC_KEYGEN_SIZE 32 #elif defined(HAVE_ECC384) #define ECC_KEYGEN_SIZE 48 diff --git a/wolfssl/wolfcrypt/ecc.h b/wolfssl/wolfcrypt/ecc.h index ff0467df9..c58e9442b 100644 --- a/wolfssl/wolfcrypt/ecc.h +++ b/wolfssl/wolfcrypt/ecc.h @@ -107,7 +107,7 @@ #define MAX_ECC_BITS_NEEDED 384 #elif defined(HAVE_ECC320) #define MAX_ECC_BITS_NEEDED 320 -#elif !defined(NO_ECC256) +#elif !defined(NO_ECC256) || defined(WOLFSSL_SM2) #define MAX_ECC_BITS_NEEDED 256 #elif defined(HAVE_ECC239) #define MAX_ECC_BITS_NEEDED 239 diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h index 7e44d312c..0bf796f1d 100644 --- a/wolfssl/wolfcrypt/settings.h +++ b/wolfssl/wolfcrypt/settings.h @@ -2123,6 +2123,9 @@ extern void uITRON4_free(void *p) ; #ifdef WOLFSSL_SP_MATH /* for single precision math only make sure the enabled key sizes are * included in the ECC curve table */ + #if defined(WOLFSSL_SP_NO_256) && !defined(NO_ECC256) + #define NO_ECC256 + #endif #if defined(WOLFSSL_SP_384) && !defined(HAVE_ECC384) #define HAVE_ECC384 #endif diff --git a/wolfssl/wolfcrypt/sm2.h b/wolfssl/wolfcrypt/sm2.h index 9176c8651..87167f42e 100644 --- a/wolfssl/wolfcrypt/sm2.h +++ b/wolfssl/wolfcrypt/sm2.h @@ -1,3 +1,23 @@ +/* sm2.h + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ #ifdef WOLFSSL_SM2 diff --git a/wolfssl/wolfcrypt/sm3.h b/wolfssl/wolfcrypt/sm3.h index 43895a030..2b3fc5034 100644 --- a/wolfssl/wolfcrypt/sm3.h +++ b/wolfssl/wolfcrypt/sm3.h @@ -1,3 +1,23 @@ +/* sm3.h + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ #ifdef WOLFSSL_SM3 diff --git a/wolfssl/wolfcrypt/sm4.h b/wolfssl/wolfcrypt/sm4.h index 73220752e..f3e66cb89 100644 --- a/wolfssl/wolfcrypt/sm4.h +++ b/wolfssl/wolfcrypt/sm4.h @@ -1,3 +1,23 @@ +/* sm4.h + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ #ifdef WOLFSSL_SM4 diff --git a/wolfssl/wolfcrypt/sp.h b/wolfssl/wolfcrypt/sp.h index e9541e9d5..88e9a069b 100644 --- a/wolfssl/wolfcrypt/sp.h +++ b/wolfssl/wolfcrypt/sp.h @@ -331,6 +331,36 @@ WOLFSSL_LOCAL int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY); WOLFSSL_LOCAL int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY, const mp_int* privm, void* heap); +WOLFSSL_LOCAL int sp_ecc_mulmod_sm2_256(const mp_int* km, const ecc_point* gm, + ecc_point* rm, int map, void* heap); +WOLFSSL_LOCAL int sp_ecc_mulmod_add_sm2_256(const mp_int* km, const ecc_point* gm, + const ecc_point* am, int inMont, ecc_point* rm, int map, void* heap); +WOLFSSL_LOCAL int sp_ecc_mulmod_base_sm2_256(const mp_int* km, ecc_point* rm, + int map, void* heap); +WOLFSSL_LOCAL int sp_ecc_mulmod_base_add_sm2_256(const mp_int* km, + const ecc_point* am, int inMont, ecc_point* rm, int map, void* heap); + +WOLFSSL_LOCAL int sp_ecc_make_key_sm2_256(WC_RNG* rng, mp_int* priv, + ecc_point* pub, void* heap); +WOLFSSL_LOCAL int sp_ecc_secret_gen_sm2_256(const mp_int* priv, + const ecc_point* pub, byte* out, word32* outlen, void* heap); +WOLFSSL_LOCAL int sp_ecc_sign_sm2_256(const byte* hash, word32 hashLen, + WC_RNG* rng, const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, + void* heap); +WOLFSSL_LOCAL int sp_ecc_verify_sm2_256(const byte* hash, word32 hashLen, + const mp_int* pX, const mp_int* pY, const mp_int* pZ, const mp_int* r, + const mp_int* sm, int* res, void* heap); +WOLFSSL_LOCAL int sp_ecc_is_point_sm2_256(const mp_int* pX, const mp_int* pY); +WOLFSSL_LOCAL int sp_ecc_check_key_sm2_256(const mp_int* pX, const mp_int* pY, + const mp_int* privm, void* heap); +WOLFSSL_LOCAL int sp_ecc_proj_add_point_sm2_256(mp_int* pX, mp_int* pY, + mp_int* pZ, mp_int* qX, mp_int* qY, mp_int* qZ, mp_int* rX, mp_int* rY, + mp_int* rZ); +WOLFSSL_LOCAL int sp_ecc_proj_dbl_point_sm2_256(mp_int* pX, mp_int* pY, + mp_int* pZ, mp_int* rX, mp_int* rY, mp_int* rZ); +WOLFSSL_LOCAL int sp_ecc_map_sm2_256(mp_int* pX, mp_int* pY, mp_int* pZ); +WOLFSSL_LOCAL int sp_ecc_uncompress_sm2_256(mp_int* xm, int odd, mp_int* ym); + #endif /* HAVE_FIPS_VERSION && HAVE_FIPS_VERSION == 2 && !WOLFSSL_SP_ARM[32|64]_ASM */ #ifdef WOLFSSL_SP_NONBLOCK