From c62f31cd271797bdebc9e86ca5230c8591438c6f Mon Sep 17 00:00:00 2001
From: Sean Parkinson <sean@wolfssl.com>
Date: Wed, 19 Feb 2020 09:33:17 +1000
Subject: [PATCH] Fix cache resist compile to work with SP C code

---
 wolfcrypt/src/sp_arm32.c    |  30 ++++++----
 wolfcrypt/src/sp_arm64.c    |  30 ++++++----
 wolfcrypt/src/sp_armthumb.c |  30 ++++++----
 wolfcrypt/src/sp_c32.c      | 116 +++++++++++++++++++++++-------------
 wolfcrypt/src/sp_c64.c      | 116 +++++++++++++++++++++++-------------
 wolfcrypt/src/sp_cortexm.c  |  30 ++++++----
 6 files changed, 228 insertions(+), 124 deletions(-)

diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c
index 8fd412d3b..ef47c8f7b 100644
--- a/wolfcrypt/src/sp_arm32.c
+++ b/wolfcrypt/src/sp_arm32.c
@@ -180,12 +180,14 @@ static void sp_2048_to_bin(sp_digit* r, byte* a)
     for (i=0; i<64 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -8487,12 +8489,14 @@ static void sp_3072_to_bin(sp_digit* r, byte* a)
     for (i=0; i<96 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -19960,12 +19964,14 @@ static void sp_4096_to_bin(sp_digit* r, byte* a)
     for (i=0; i<128 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -77807,12 +77813,14 @@ static void sp_256_to_bin(sp_digit* r, byte* a)
     for (i=0; i<8 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -86472,12 +86480,14 @@ static void sp_384_to_bin(sp_digit* r, byte* a)
     for (i=0; i<12 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
diff --git a/wolfcrypt/src/sp_arm64.c b/wolfcrypt/src/sp_arm64.c
index 5f9704332..30e7d0ff5 100644
--- a/wolfcrypt/src/sp_arm64.c
+++ b/wolfcrypt/src/sp_arm64.c
@@ -180,12 +180,14 @@ static void sp_2048_to_bin(sp_digit* r, byte* a)
     for (i=0; i<32 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 64) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -5650,12 +5652,14 @@ static void sp_3072_to_bin(sp_digit* r, byte* a)
     for (i=0; i<48 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 64) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -13190,12 +13194,14 @@ static void sp_4096_to_bin(sp_digit* r, byte* a)
     for (i=0; i<64 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 64) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -46598,12 +46604,14 @@ static void sp_256_to_bin(sp_digit* r, byte* a)
     for (i=0; i<4 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 64) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -52648,12 +52656,14 @@ static void sp_384_to_bin(sp_digit* r, byte* a)
     for (i=0; i<6 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 64) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
diff --git a/wolfcrypt/src/sp_armthumb.c b/wolfcrypt/src/sp_armthumb.c
index a1d239f8a..5c6b01ab0 100644
--- a/wolfcrypt/src/sp_armthumb.c
+++ b/wolfcrypt/src/sp_armthumb.c
@@ -180,12 +180,14 @@ static void sp_2048_to_bin(sp_digit* r, byte* a)
     for (i=0; i<64 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -5366,12 +5368,14 @@ static void sp_3072_to_bin(sp_digit* r, byte* a)
     for (i=0; i<96 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -11265,12 +11269,14 @@ static void sp_4096_to_bin(sp_digit* r, byte* a)
     for (i=0; i<128 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -20220,12 +20226,14 @@ static void sp_256_to_bin(sp_digit* r, byte* a)
     for (i=0; i<8 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -26028,12 +26036,14 @@ static void sp_384_to_bin(sp_digit* r, byte* a)
     for (i=0; i<12 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c
index d86d5a0b1..a308dcf0c 100644
--- a/wolfcrypt/src/sp_c32.c
+++ b/wolfcrypt/src/sp_c32.c
@@ -194,12 +194,14 @@ static void sp_2048_to_bin(sp_digit* r, byte* a)
     for (i=0; i<90 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 23) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -4629,12 +4631,14 @@ static void sp_3072_to_bin(sp_digit* r, byte* a)
     for (i=0; i<134 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 23) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -8588,12 +8592,14 @@ static void sp_4096_to_bin(sp_digit* r, byte* a)
     for (i=0; i<196 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 21) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -13179,7 +13185,7 @@ SP_NOINLINE static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a,
     t[ 7] = tb * a[ 7];
     t[ 8] = tb * a[ 8];
     t[ 9] = tb * a[ 9];
-    r[ 0] +=                 (sp_digit)(t[ 0] & 0x3ffffff);
+    r[ 0] += (sp_digit)                 (t[ 0] & 0x3ffffff);
     r[ 1] += (sp_digit)((t[ 0] >> 26) + (t[ 1] & 0x3ffffff));
     r[ 2] += (sp_digit)((t[ 1] >> 26) + (t[ 2] & 0x3ffffff));
     r[ 3] += (sp_digit)((t[ 2] >> 26) + (t[ 3] & 0x3ffffff));
@@ -13189,7 +13195,7 @@ SP_NOINLINE static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a,
     r[ 7] += (sp_digit)((t[ 6] >> 26) + (t[ 7] & 0x3ffffff));
     r[ 8] += (sp_digit)((t[ 7] >> 26) + (t[ 8] & 0x3ffffff));
     r[ 9] += (sp_digit)((t[ 8] >> 26) + (t[ 9] & 0x3ffffff));
-    r[10] +=  t[ 9] >> 26;
+    r[10] += (sp_digit) (t[ 9] >> 26);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -14121,11 +14127,12 @@ static int sp_256_ecc_mulmod_10(sp_point_256* r, const sp_point_256* g, const sp
         int map, void* heap)
 {
 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point_256 td[3];
-    sp_digit tmpd[2 * 10 * 5];
-#endif
+    sp_point_256 t[3];
+    sp_digit tmp[2 * 10 * 5];
+#else
     sp_point_256* t;
     sp_digit* tmp;
+#endif
     sp_digit n;
     int i;
     int c, y;
@@ -14134,28 +14141,21 @@ static int sp_256_ecc_mulmod_10(sp_point_256* r, const sp_point_256* g, const sp
     (void)heap;
 
 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    sp_point_256 td[3];
-    t = (sp_point_256*)XMALLOC(sizeof(*td) * 3, heap, DYNAMIC_TYPE_ECC);
+    t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC);
     if (t == NULL)
         err = MEMORY_E;
     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
                              DYNAMIC_TYPE_ECC);
     if (tmp == NULL)
         err = MEMORY_E;
-#else
-    t = td;
-    tmp = tmpd;
 #endif
 
     if (err == MP_OKAY) {
-        t[0] = &td[0];
-        t[1] = &td[1];
-        t[2] = &td[2];
-
         /* t[0] = {0, 0, 1} * norm */
         XMEMSET(&t[0], 0, sizeof(t[0]));
         t[0].infinity = 1;
         /* t[1] = {g->x, g->y, g->z} * norm */
+        t[1].infinity = 0;
         err = sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
     }
     if (err == MP_OKAY)
@@ -14206,8 +14206,8 @@ static int sp_256_ecc_mulmod_10(sp_point_256* r, const sp_point_256* g, const sp
         XFREE(t, heap, DYNAMIC_TYPE_ECC);
     }
 #else
-    ForceZero(tmpd, sizeof(tmpd));
-    ForceZero(td, sizeof(td));
+    ForceZero(tmp, sizeof(tmp));
+    ForceZero(t, sizeof(t));
 #endif
 
     return err;
@@ -14899,6 +14899,23 @@ static int sp_256_ecc_mulmod_base_10(sp_point_256* r, const sp_digit* k,
     return sp_256_ecc_mulmod_10(r, &p256_base, k, map, heap);
 }
 
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_10(sp_point_256* r, const sp_digit* k,
+        int map, void* heap)
+{
+    /* No pre-computed values. */
+    return sp_256_ecc_mulmod_10(r, &p256_base, k, map, heap);
+}
+
 #else
 static const sp_table_entry_256 p256_table[256] = {
     /* 0 */
@@ -16439,12 +16456,14 @@ static void sp_256_to_bin(sp_digit* r, byte* a)
     for (i=0; i<10 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 26) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -18689,7 +18708,7 @@ SP_NOINLINE static void sp_384_mul_add_15(sp_digit* r, const sp_digit* a,
     t[12] = tb * a[12];
     t[13] = tb * a[13];
     t[14] = tb * a[14];
-    r[ 0] +=                 (sp_digit)(t[ 0] & 0x3ffffff);
+    r[ 0] += (sp_digit)                 (t[ 0] & 0x3ffffff);
     r[ 1] += (sp_digit)((t[ 0] >> 26) + (t[ 1] & 0x3ffffff));
     r[ 2] += (sp_digit)((t[ 1] >> 26) + (t[ 2] & 0x3ffffff));
     r[ 3] += (sp_digit)((t[ 2] >> 26) + (t[ 3] & 0x3ffffff));
@@ -18704,7 +18723,7 @@ SP_NOINLINE static void sp_384_mul_add_15(sp_digit* r, const sp_digit* a,
     r[12] += (sp_digit)((t[11] >> 26) + (t[12] & 0x3ffffff));
     r[13] += (sp_digit)((t[12] >> 26) + (t[13] & 0x3ffffff));
     r[14] += (sp_digit)((t[13] >> 26) + (t[14] & 0x3ffffff));
-    r[15] +=  t[14] >> 26;
+    r[15] += (sp_digit) (t[14] >> 26);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -19734,11 +19753,12 @@ static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp
         int map, void* heap)
 {
 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point_384 td[3];
-    sp_digit tmpd[2 * 15 * 6];
-#endif
+    sp_point_384 t[3];
+    sp_digit tmp[2 * 15 * 6];
+#else
     sp_point_384* t;
     sp_digit* tmp;
+#endif
     sp_digit n;
     int i;
     int c, y;
@@ -19747,28 +19767,21 @@ static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp
     (void)heap;
 
 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    sp_point_384 td[3];
-    t = (sp_point_384*)XMALLOC(sizeof(*td) * 3, heap, DYNAMIC_TYPE_ECC);
+    t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC);
     if (t == NULL)
         err = MEMORY_E;
     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 15 * 6, heap,
                              DYNAMIC_TYPE_ECC);
     if (tmp == NULL)
         err = MEMORY_E;
-#else
-    t = td;
-    tmp = tmpd;
 #endif
 
     if (err == MP_OKAY) {
-        t[0] = &td[0];
-        t[1] = &td[1];
-        t[2] = &td[2];
-
         /* t[0] = {0, 0, 1} * norm */
         XMEMSET(&t[0], 0, sizeof(t[0]));
         t[0].infinity = 1;
         /* t[1] = {g->x, g->y, g->z} * norm */
+        t[1].infinity = 0;
         err = sp_384_mod_mul_norm_15(t[1].x, g->x, p384_mod);
     }
     if (err == MP_OKAY)
@@ -19819,8 +19832,8 @@ static int sp_384_ecc_mulmod_15(sp_point_384* r, const sp_point_384* g, const sp
         XFREE(t, heap, DYNAMIC_TYPE_ECC);
     }
 #else
-    ForceZero(tmpd, sizeof(tmpd));
-    ForceZero(td, sizeof(td));
+    ForceZero(tmp, sizeof(tmp));
+    ForceZero(t, sizeof(t));
 #endif
 
     return err;
@@ -20512,6 +20525,23 @@ static int sp_384_ecc_mulmod_base_15(sp_point_384* r, const sp_digit* k,
     return sp_384_ecc_mulmod_15(r, &p384_base, k, map, heap);
 }
 
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_15(sp_point_384* r, const sp_digit* k,
+        int map, void* heap)
+{
+    /* No pre-computed values. */
+    return sp_384_ecc_mulmod_15(r, &p384_base, k, map, heap);
+}
+
 #else
 static const sp_table_entry_384 p384_table[256] = {
     /* 0 */
@@ -22562,12 +22592,14 @@ static void sp_384_to_bin(sp_digit* r, byte* a)
     for (i=0; i<15 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 26) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c
index 38ca21d62..814d06d46 100644
--- a/wolfcrypt/src/sp_c64.c
+++ b/wolfcrypt/src/sp_c64.c
@@ -194,12 +194,14 @@ static void sp_2048_to_bin(sp_digit* r, byte* a)
     for (i=0; i<36 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 57) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -4159,12 +4161,14 @@ static void sp_3072_to_bin(sp_digit* r, byte* a)
     for (i=0; i<54 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 57) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -8340,12 +8344,14 @@ static void sp_4096_to_bin(sp_digit* r, byte* a)
     for (i=0; i<78 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 53) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -13098,12 +13104,12 @@ SP_NOINLINE static void sp_256_mul_add_5(sp_digit* r, const sp_digit* a,
     t[ 2] = tb * a[ 2];
     t[ 3] = tb * a[ 3];
     t[ 4] = tb * a[ 4];
-    r[ 0] +=                 (sp_digit)(t[ 0] & 0xfffffffffffffL);
+    r[ 0] += (sp_digit)                 (t[ 0] & 0xfffffffffffffL);
     r[ 1] += (sp_digit)((t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffL));
     r[ 2] += (sp_digit)((t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffL));
     r[ 3] += (sp_digit)((t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffL));
     r[ 4] += (sp_digit)((t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffL));
-    r[ 5] +=  t[ 4] >> 52;
+    r[ 5] += (sp_digit) (t[ 4] >> 52);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -13943,11 +13949,12 @@ static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_
         int map, void* heap)
 {
 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point_256 td[3];
-    sp_digit tmpd[2 * 5 * 5];
-#endif
+    sp_point_256 t[3];
+    sp_digit tmp[2 * 5 * 5];
+#else
     sp_point_256* t;
     sp_digit* tmp;
+#endif
     sp_digit n;
     int i;
     int c, y;
@@ -13956,28 +13963,21 @@ static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_
     (void)heap;
 
 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    sp_point_256 td[3];
-    t = (sp_point_256*)XMALLOC(sizeof(*td) * 3, heap, DYNAMIC_TYPE_ECC);
+    t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC);
     if (t == NULL)
         err = MEMORY_E;
     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap,
                              DYNAMIC_TYPE_ECC);
     if (tmp == NULL)
         err = MEMORY_E;
-#else
-    t = td;
-    tmp = tmpd;
 #endif
 
     if (err == MP_OKAY) {
-        t[0] = &td[0];
-        t[1] = &td[1];
-        t[2] = &td[2];
-
         /* t[0] = {0, 0, 1} * norm */
         XMEMSET(&t[0], 0, sizeof(t[0]));
         t[0].infinity = 1;
         /* t[1] = {g->x, g->y, g->z} * norm */
+        t[1].infinity = 0;
         err = sp_256_mod_mul_norm_5(t[1].x, g->x, p256_mod);
     }
     if (err == MP_OKAY)
@@ -14028,8 +14028,8 @@ static int sp_256_ecc_mulmod_5(sp_point_256* r, const sp_point_256* g, const sp_
         XFREE(t, heap, DYNAMIC_TYPE_ECC);
     }
 #else
-    ForceZero(tmpd, sizeof(tmpd));
-    ForceZero(td, sizeof(td));
+    ForceZero(tmp, sizeof(tmp));
+    ForceZero(t, sizeof(t));
 #endif
 
     return err;
@@ -14721,6 +14721,23 @@ static int sp_256_ecc_mulmod_base_5(sp_point_256* r, const sp_digit* k,
     return sp_256_ecc_mulmod_5(r, &p256_base, k, map, heap);
 }
 
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the base point of P256 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_256_ecc_mulmod_base_5(sp_point_256* r, const sp_digit* k,
+        int map, void* heap)
+{
+    /* No pre-computed values. */
+    return sp_256_ecc_mulmod_5(r, &p256_base, k, map, heap);
+}
+
 #else
 static const sp_table_entry_256 p256_table[256] = {
     /* 0 */
@@ -16260,12 +16277,14 @@ static void sp_256_to_bin(sp_digit* r, byte* a)
     for (i=0; i<5 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 52) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -18235,14 +18254,14 @@ SP_NOINLINE static void sp_384_mul_add_7(sp_digit* r, const sp_digit* a,
     t[ 4] = tb * a[ 4];
     t[ 5] = tb * a[ 5];
     t[ 6] = tb * a[ 6];
-    r[ 0] +=                 (sp_digit)(t[ 0] & 0x7fffffffffffffL);
+    r[ 0] += (sp_digit)                 (t[ 0] & 0x7fffffffffffffL);
     r[ 1] += (sp_digit)((t[ 0] >> 55) + (t[ 1] & 0x7fffffffffffffL));
     r[ 2] += (sp_digit)((t[ 1] >> 55) + (t[ 2] & 0x7fffffffffffffL));
     r[ 3] += (sp_digit)((t[ 2] >> 55) + (t[ 3] & 0x7fffffffffffffL));
     r[ 4] += (sp_digit)((t[ 3] >> 55) + (t[ 4] & 0x7fffffffffffffL));
     r[ 5] += (sp_digit)((t[ 4] >> 55) + (t[ 5] & 0x7fffffffffffffL));
     r[ 6] += (sp_digit)((t[ 5] >> 55) + (t[ 6] & 0x7fffffffffffffL));
-    r[ 7] +=  t[ 6] >> 55;
+    r[ 7] += (sp_digit) (t[ 6] >> 55);
 #endif /* WOLFSSL_SP_SMALL */
 }
 
@@ -19116,11 +19135,12 @@ static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_
         int map, void* heap)
 {
 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
-    sp_point_384 td[3];
-    sp_digit tmpd[2 * 7 * 6];
-#endif
+    sp_point_384 t[3];
+    sp_digit tmp[2 * 7 * 6];
+#else
     sp_point_384* t;
     sp_digit* tmp;
+#endif
     sp_digit n;
     int i;
     int c, y;
@@ -19129,28 +19149,21 @@ static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_
     (void)heap;
 
 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
-    sp_point_384 td[3];
-    t = (sp_point_384*)XMALLOC(sizeof(*td) * 3, heap, DYNAMIC_TYPE_ECC);
+    t = (sp_point*)XMALLOC(sizeof(*t) * 3, heap, DYNAMIC_TYPE_ECC);
     if (t == NULL)
         err = MEMORY_E;
     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 7 * 6, heap,
                              DYNAMIC_TYPE_ECC);
     if (tmp == NULL)
         err = MEMORY_E;
-#else
-    t = td;
-    tmp = tmpd;
 #endif
 
     if (err == MP_OKAY) {
-        t[0] = &td[0];
-        t[1] = &td[1];
-        t[2] = &td[2];
-
         /* t[0] = {0, 0, 1} * norm */
         XMEMSET(&t[0], 0, sizeof(t[0]));
         t[0].infinity = 1;
         /* t[1] = {g->x, g->y, g->z} * norm */
+        t[1].infinity = 0;
         err = sp_384_mod_mul_norm_7(t[1].x, g->x, p384_mod);
     }
     if (err == MP_OKAY)
@@ -19201,8 +19214,8 @@ static int sp_384_ecc_mulmod_7(sp_point_384* r, const sp_point_384* g, const sp_
         XFREE(t, heap, DYNAMIC_TYPE_ECC);
     }
 #else
-    ForceZero(tmpd, sizeof(tmpd));
-    ForceZero(td, sizeof(td));
+    ForceZero(tmp, sizeof(tmp));
+    ForceZero(t, sizeof(t));
 #endif
 
     return err;
@@ -19894,6 +19907,23 @@ static int sp_384_ecc_mulmod_base_7(sp_point_384* r, const sp_digit* k,
     return sp_384_ecc_mulmod_7(r, &p384_base, k, map, heap);
 }
 
+#elif defined(WOLFSSL_SP_CACHE_RESISTANT)
+/* Multiply the base point of P384 by the scalar and return the result.
+ * If map is true then convert result to affine coordinates.
+ *
+ * r     Resulting point.
+ * k     Scalar to multiply by.
+ * map   Indicates whether to convert result to affine.
+ * heap  Heap to use for allocation.
+ * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
+ */
+static int sp_384_ecc_mulmod_base_7(sp_point_384* r, const sp_digit* k,
+        int map, void* heap)
+{
+    /* No pre-computed values. */
+    return sp_384_ecc_mulmod_7(r, &p384_base, k, map, heap);
+}
+
 #else
 static const sp_table_entry_384 p384_table[256] = {
     /* 0 */
@@ -21943,12 +21973,14 @@ static void sp_384_to_bin(sp_digit* r, byte* a)
     for (i=0; i<7 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 55) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c
index 94decf725..39a27d630 100644
--- a/wolfcrypt/src/sp_cortexm.c
+++ b/wolfcrypt/src/sp_cortexm.c
@@ -189,12 +189,14 @@ static void sp_2048_to_bin(sp_digit* r, byte* a)
     for (i=0; i<64 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -5120,12 +5122,14 @@ static void sp_3072_to_bin(sp_digit* r, byte* a)
     for (i=0; i<96 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -9868,12 +9872,14 @@ static void sp_4096_to_bin(sp_digit* r, byte* a)
     for (i=0; i<128 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -18469,12 +18475,14 @@ static void sp_256_to_bin(sp_digit* r, byte* a)
     for (i=0; i<8 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }
@@ -23995,12 +24003,14 @@ static void sp_384_to_bin(sp_digit* r, byte* a)
     for (i=0; i<12 && j>=0; i++) {
         b = 0;
         /* lint allow cast of mismatch sp_digit and int */
-        a[j--] |= (byte)(r[i] << s); b += 8 - s; /*lint !e9033*/
+        a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
+        b += 8 - s;
         if (j < 0) {
             break;
         }
         while (b < 32) {
-            a[j--] = r[i] >> b; b += 8;
+            a[j--] = (byte)(r[i] >> b);
+            b += 8;
             if (j < 0) {
                 break;
             }