diff --git a/src/internal.c b/src/internal.c
index 0a48149a7..38c6734ff 100644
--- a/src/internal.c
+++ b/src/internal.c
@@ -26488,7 +26488,7 @@ const char* wolfSSL_ERR_reason_error_string(unsigned long e)
         return "peer ip address mismatch";
 
     case WANT_READ :
-    case -WOLFSSL_ERROR_WANT_READ :
+    case WOLFSSL_ERROR_WANT_READ_E :
         return "non-blocking socket wants data to be read";
 
     case NOT_READY_ERROR :
@@ -26498,17 +26498,17 @@ const char* wolfSSL_ERR_reason_error_string(unsigned long e)
         return "record layer version error";
 
     case WANT_WRITE :
-    case -WOLFSSL_ERROR_WANT_WRITE :
+    case WOLFSSL_ERROR_WANT_WRITE_E :
         return "non-blocking socket write buffer full";
 
-    case -WOLFSSL_ERROR_WANT_CONNECT:
-    case -WOLFSSL_ERROR_WANT_ACCEPT:
+    case WOLFSSL_ERROR_WANT_CONNECT_E :
+    case WOLFSSL_ERROR_WANT_ACCEPT_E :
         return "The underlying BIO was not yet connected";
 
-    case -WOLFSSL_ERROR_SYSCALL:
+    case WOLFSSL_ERROR_SYSCALL_E :
         return "fatal I/O error in TLS layer";
 
-    case -WOLFSSL_ERROR_WANT_X509_LOOKUP:
+    case WOLFSSL_ERROR_WANT_X509_LOOKUP_E :
         return "application client cert callback asked to be called again";
 
     case BUFFER_ERROR :
@@ -26548,7 +26548,7 @@ const char* wolfSSL_ERR_reason_error_string(unsigned long e)
         return "can't decode peer key";
 
     case ZERO_RETURN:
-    case -WOLFSSL_ERROR_ZERO_RETURN:
+    case WOLFSSL_ERROR_ZERO_RETURN_E :
         return "peer sent close notify alert";
 
     case ECC_CURVETYPE_ERROR:
diff --git a/src/ssl.c b/src/ssl.c
index a188b87f1..6d5aca6d2 100644
--- a/src/ssl.c
+++ b/src/ssl.c
@@ -234,8 +234,10 @@ static struct SystemCryptoPolicy crypto_policy;
 static WC_RNG globalRNG;
 static volatile int initGlobalRNG = 0;
 
+#if defined(OPENSSL_EXTRA) || !defined(WOLFSSL_MUTEX_INITIALIZER)
 static WC_MAYBE_UNUSED wolfSSL_Mutex globalRNGMutex
     WOLFSSL_MUTEX_INITIALIZER_CLAUSE(globalRNGMutex);
+#endif
 #ifndef WOLFSSL_MUTEX_INITIALIZER
 static int globalRNGMutex_valid = 0;
 #endif
diff --git a/src/x509.c b/src/x509.c
index b85a04981..278e743b7 100644
--- a/src/x509.c
+++ b/src/x509.c
@@ -5507,7 +5507,7 @@ int wolfSSL_X509_NAME_get_text_by_NID(WOLFSSL_X509_NAME* name,
 WOLFSSL_EVP_PKEY* wolfSSL_X509_get_pubkey(WOLFSSL_X509* x509)
 {
     WOLFSSL_EVP_PKEY* key = NULL;
-    int ret;
+    int ret = 0;
 
     (void)ret;
 
diff --git a/wolfcrypt/src/port/arm/armv8-aes.c b/wolfcrypt/src/port/arm/armv8-aes.c
index 88b9d3794..0d2d55c77 100644
--- a/wolfcrypt/src/port/arm/armv8-aes.c
+++ b/wolfcrypt/src/port/arm/armv8-aes.c
@@ -1549,7 +1549,7 @@ static void GHASH_AARCH64_EOR(Gcm* gcm, const byte* a, word32 aSz,
         "MOV w12, %w[aSz] \n"
 
         "CMP x12, #64 \n"
-        "BLT 15f \n"
+        "B.LT 15f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v11.1q, v3.2d, v3.2d \n"
@@ -1639,11 +1639,11 @@ static void GHASH_AARCH64_EOR(Gcm* gcm, const byte* a, word32 aSz,
         "MOV v0.D[1], v9.D[0] \n"
         "EOR v0.16b, v0.16b, v8.16b \n"
         "CMP x12, #64 \n"
-        "BGE 14b \n"
+        "B.GE 14b \n"
         "CBZ x12, 20f \n"
         "15: \n"
         "CMP x12, #16 \n"
-        "BLT 12f \n"
+        "B.LT 12f \n"
         "11: \n"
         "LD1 {v14.2d}, [%[a]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -1664,7 +1664,7 @@ static void GHASH_AARCH64_EOR(Gcm* gcm, const byte* a, word32 aSz,
         "MOV v10.D[1], v13.D[0] \n"
         "EOR v0.16b, v10.16b, v12.16b \n"
         "CMP x12, #16 \n"
-        "BGE 11b \n"
+        "B.GE 11b \n"
         "CBZ x12, 120f \n"
         "12: \n"
         "# Partial AAD \n"
@@ -1702,7 +1702,7 @@ static void GHASH_AARCH64_EOR(Gcm* gcm, const byte* a, word32 aSz,
         "MOV w12, %w[cSz] \n"
 
         "CMP x12, #64 \n"
-        "BLT 115f \n"
+        "B.LT 115f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v11.1q, v3.2d, v3.2d \n"
@@ -1792,11 +1792,11 @@ static void GHASH_AARCH64_EOR(Gcm* gcm, const byte* a, word32 aSz,
         "MOV v0.D[1], v9.D[0] \n"
         "EOR v0.16b, v0.16b, v8.16b \n"
         "CMP x12, #64 \n"
-        "BGE 114b \n"
+        "B.GE 114b \n"
         "CBZ x12, 120f \n"
         "115: \n"
         "CMP x12, #16 \n"
-        "BLT 112f \n"
+        "B.LT 112f \n"
         "111: \n"
         "LD1 {v14.2d}, [%[c]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -1817,7 +1817,7 @@ static void GHASH_AARCH64_EOR(Gcm* gcm, const byte* a, word32 aSz,
         "MOV v10.D[1], v13.D[0] \n"
         "EOR v0.16b, v10.16b, v12.16b \n"
         "CMP x12, #16 \n"
-        "BGE 111b \n"
+        "B.GE 111b \n"
         "CBZ x12, 120f \n"
         "112: \n"
         "# Partial cipher text \n"
@@ -1884,7 +1884,7 @@ static void GHASH_AARCH64_EOR3(Gcm* gcm, const byte* a, word32 aSz,
         "MOV w12, %w[aSz] \n"
 
         "CMP x12, #64 \n"
-        "BLT 15f \n"
+        "B.LT 15f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v11.1q, v3.2d, v3.2d \n"
@@ -1970,11 +1970,11 @@ static void GHASH_AARCH64_EOR3(Gcm* gcm, const byte* a, word32 aSz,
         "MOV v0.D[1], v9.D[0] \n"
         "EOR v0.16b, v0.16b, v8.16b \n"
         "CMP x12, #64 \n"
-        "BGE 14b \n"
+        "B.GE 14b \n"
         "CBZ x12, 20f \n"
         "15: \n"
         "CMP x12, #16 \n"
-        "BLT 12f \n"
+        "B.LT 12f \n"
         "11: \n"
         "LD1 {v14.2d}, [%[a]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -1995,7 +1995,7 @@ static void GHASH_AARCH64_EOR3(Gcm* gcm, const byte* a, word32 aSz,
         "MOV v10.D[1], v13.D[0] \n"
         "EOR v0.16b, v10.16b, v12.16b \n"
         "CMP x12, #16 \n"
-        "BGE 11b \n"
+        "B.GE 11b \n"
         "CBZ x12, 120f \n"
         "12: \n"
         "# Partial AAD \n"
@@ -2033,7 +2033,7 @@ static void GHASH_AARCH64_EOR3(Gcm* gcm, const byte* a, word32 aSz,
         "MOV w12, %w[cSz] \n"
 
         "CMP x12, #64 \n"
-        "BLT 115f \n"
+        "B.LT 115f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v11.1q, v3.2d, v3.2d \n"
@@ -2119,11 +2119,11 @@ static void GHASH_AARCH64_EOR3(Gcm* gcm, const byte* a, word32 aSz,
         "MOV v0.D[1], v9.D[0] \n"
         "EOR v0.16b, v0.16b, v8.16b \n"
         "CMP x12, #64 \n"
-        "BGE 114b \n"
+        "B.GE 114b \n"
         "CBZ x12, 120f \n"
         "115: \n"
         "CMP x12, #16 \n"
-        "BLT 112f \n"
+        "B.LT 112f \n"
         "111: \n"
         "LD1 {v14.2d}, [%[c]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -2144,7 +2144,7 @@ static void GHASH_AARCH64_EOR3(Gcm* gcm, const byte* a, word32 aSz,
         "MOV v10.D[1], v13.D[0] \n"
         "EOR v0.16b, v10.16b, v12.16b \n"
         "CMP x12, #16 \n"
-        "BGE 111b \n"
+        "B.GE 111b \n"
         "CBZ x12, 120f \n"
         "112: \n"
         "# Partial cipher text \n"
@@ -2531,7 +2531,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in,
 
         "# GHASH AAD \n"
         "CMP x12, #64 \n"
-        "BLT 115f \n"
+        "B.LT 115f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v19.1q, v16.2d, v16.2d \n"
@@ -2621,11 +2621,11 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "MOV v17.D[1], v15.D[0] \n"
         "EOR v17.16b, v17.16b, v14.16b \n"
         "CMP x12, #64 \n"
-        "BGE 114b \n"
+        "B.GE 114b \n"
         "CBZ x12, 120f \n"
         "115: \n"
         "CMP x12, #16 \n"
-        "BLT 112f \n"
+        "B.LT 112f \n"
         "111: \n"
         "LD1 {v15.2d}, [%[aad]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -2646,7 +2646,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "MOV v18.D[1], v21.D[0] \n"
         "EOR v17.16b, v18.16b, v20.16b \n"
         "CMP x12, #16 \n"
-        "BGE 111b \n"
+        "B.GE 111b \n"
         "CBZ x12, 120f \n"
         "112: \n"
         "# Partial AAD \n"
@@ -2683,9 +2683,9 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "MOV w11, %w[sz] \n"
         "REV w12, w12 \n"
         "CMP w11, #64 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
         "CMP %w[aSz], #64 \n"
-        "BGE 82f \n"
+        "B.GE 82f \n"
 
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
@@ -2724,7 +2724,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "82: \n"
         "# Should we do 8 blocks at a time? \n"
         "CMP w11, #512 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
 
         "# Calculate H^[5-8] - GMULT partials \n"
         "# Multiply H and H^4 => H^5 \n"
@@ -3318,7 +3318,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
 
         "CMP w11, #128 \n"
-        "BGE 81b \n"
+        "B.GE 81b \n"
 
         "# GHASH - 8 blocks \n"
         "RBIT v12.16b, v12.16b \n"
@@ -3423,7 +3423,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n"
         "# Can we do 4 blocks at a time? \n"
         "CMP w11, #64 \n"
-        "BLT 10f \n"
+        "B.LT 10f \n"
 
         "# First encrypt - no GHASH \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -3537,7 +3537,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "# Store cipher text \n"
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BLT 12f \n"
+        "B.LT 12f \n"
 
         "11: \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -3702,7 +3702,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "# Store cipher text \n"
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BGE 11b \n"
+        "B.GE 11b \n"
 
         "12: \n"
         "# GHASH - 4 blocks \n"
@@ -3760,7 +3760,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "10: \n"
         "CBZ w11, 30f \n"
         "CMP w11, #16 \n"
-        "BLT 20f \n"
+        "B.LT 20f \n"
         "# Encrypt first block for GHASH \n"
         "ADD w12, w12, #1 \n"
         "MOV v0.16b, v22.16b \n"
@@ -3793,7 +3793,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in,
 
         "# When only one full block to encrypt go straight to GHASH \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "LD1 {v31.2d}, [%[input]], #16 \n"
 
@@ -3843,7 +3843,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "EOR v15.16b, v0.16b, v31.16b \n \n"
         "ST1 {v15.2d}, [%[out]], #16 \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "LD1 {v31.2d}, [%[input]], #16 \n"
         "B 2b \n"
@@ -3988,7 +3988,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "RBIT v17.16b, v17.16b \n"
         "EOR v0.16b, v0.16b, v17.16b \n \n"
         "CMP %w[tagSz], #16 \n"
-        "BNE 40f \n"
+        "B.NE 40f \n"
         "ST1 {v0.2d}, [%[tag]] \n"
         "B 41f \n"
         "40: \n"
@@ -4053,7 +4053,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
 
         "# GHASH AAD \n"
         "CMP x12, #64 \n"
-        "BLT 115f \n"
+        "B.LT 115f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v19.1q, v16.2d, v16.2d \n"
@@ -4139,11 +4139,11 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "MOV v17.D[1], v15.D[0] \n"
         "EOR v17.16b, v17.16b, v14.16b \n"
         "CMP x12, #64 \n"
-        "BGE 114b \n"
+        "B.GE 114b \n"
         "CBZ x12, 120f \n"
         "115: \n"
         "CMP x12, #16 \n"
-        "BLT 112f \n"
+        "B.LT 112f \n"
         "111: \n"
         "LD1 {v15.2d}, [%[aad]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -4164,7 +4164,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "MOV v18.D[1], v21.D[0] \n"
         "EOR v17.16b, v18.16b, v20.16b \n"
         "CMP x12, #16 \n"
-        "BGE 111b \n"
+        "B.GE 111b \n"
         "CBZ x12, 120f \n"
         "112: \n"
         "# Partial AAD \n"
@@ -4201,9 +4201,9 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "MOV w11, %w[sz] \n"
         "REV w12, w12 \n"
         "CMP w11, #64 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
         "CMP %w[aSz], #64 \n"
-        "BGE 82f \n"
+        "B.GE 82f \n"
 
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
@@ -4242,7 +4242,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "82: \n"
         "# Should we do 8 blocks at a time? \n"
         "CMP w11, #512 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
 
         "# Calculate H^[5-8] - GMULT partials \n"
         "# Multiply H and H^4 => H^5 \n"
@@ -4828,7 +4828,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
 
         "CMP w11, #128 \n"
-        "BGE 81b \n"
+        "B.GE 81b \n"
 
         "# GHASH - 8 blocks \n"
         "RBIT v12.16b, v12.16b \n"
@@ -4925,7 +4925,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n"
         "# Can we do 4 blocks at a time? \n"
         "CMP w11, #64 \n"
-        "BLT 10f \n"
+        "B.LT 10f \n"
 
         "# First encrypt - no GHASH \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -5039,7 +5039,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "# Store cipher text \n"
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BLT 12f \n"
+        "B.LT 12f \n"
 
         "11: \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -5200,7 +5200,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "# Store cipher text \n"
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BGE 11b \n"
+        "B.GE 11b \n"
 
         "12: \n"
         "# GHASH - 4 blocks \n"
@@ -5254,7 +5254,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "10: \n"
         "CBZ w11, 30f \n"
         "CMP w11, #16 \n"
-        "BLT 20f \n"
+        "B.LT 20f \n"
         "# Encrypt first block for GHASH \n"
         "ADD w12, w12, #1 \n"
         "MOV v0.16b, v22.16b \n"
@@ -5287,7 +5287,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
 
         "# When only one full block to encrypt go straight to GHASH \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "LD1 {v31.2d}, [%[input]], #16 \n"
 
@@ -5337,7 +5337,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "EOR v15.16b, v0.16b, v31.16b \n \n"
         "ST1 {v15.2d}, [%[out]], #16 \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "LD1 {v31.2d}, [%[input]], #16 \n"
         "B 2b \n"
@@ -5482,7 +5482,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "RBIT v17.16b, v17.16b \n"
         "EOR v0.16b, v0.16b, v17.16b \n \n"
         "CMP %w[tagSz], #16 \n"
-        "BNE 40f \n"
+        "B.NE 40f \n"
         "ST1 {v0.2d}, [%[tag]] \n"
         "B 41f \n"
         "40: \n"
@@ -5547,7 +5547,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in,
 
         "# GHASH AAD \n"
         "CMP x12, #64 \n"
-        "BLT 115f \n"
+        "B.LT 115f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v19.1q, v16.2d, v16.2d \n"
@@ -5637,11 +5637,11 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "MOV v17.D[1], v15.D[0] \n"
         "EOR v17.16b, v17.16b, v14.16b \n"
         "CMP x12, #64 \n"
-        "BGE 114b \n"
+        "B.GE 114b \n"
         "CBZ x12, 120f \n"
         "115: \n"
         "CMP x12, #16 \n"
-        "BLT 112f \n"
+        "B.LT 112f \n"
         "111: \n"
         "LD1 {v15.2d}, [%[aad]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -5662,7 +5662,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "MOV v18.D[1], v21.D[0] \n"
         "EOR v17.16b, v18.16b, v20.16b \n"
         "CMP x12, #16 \n"
-        "BGE 111b \n"
+        "B.GE 111b \n"
         "CBZ x12, 120f \n"
         "112: \n"
         "# Partial AAD \n"
@@ -5699,9 +5699,9 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "MOV w11, %w[sz] \n"
         "REV w12, w12 \n"
         "CMP w11, #64 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
         "CMP %w[aSz], #64 \n"
-        "BGE 82f \n"
+        "B.GE 82f \n"
 
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
@@ -5740,7 +5740,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "82: \n"
         "# Should we do 8 blocks at a time? \n"
         "CMP w11, #512 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
 
         "# Calculate H^[5-8] - GMULT partials \n"
         "# Multiply H and H^4 => H^5 \n"
@@ -6402,7 +6402,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
 
         "CMP w11, #128 \n"
-        "BGE 81b \n"
+        "B.GE 81b \n"
 
         "# GHASH - 8 blocks \n"
         "RBIT v12.16b, v12.16b \n"
@@ -6508,7 +6508,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n"
         "# Can we do 4 blocks at a time? \n"
         "CMP w11, #64 \n"
-        "BLT 10f \n"
+        "B.LT 10f \n"
 
         "# First encrypt - no GHASH \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -6638,7 +6638,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "# Store cipher text \n"
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BLT 12f \n"
+        "B.LT 12f \n"
 
         "11: \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -6819,7 +6819,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "# Store cipher text \n"
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BGE 11b \n"
+        "B.GE 11b \n"
 
         "12: \n"
         "# GHASH - 4 blocks \n"
@@ -6877,7 +6877,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "10: \n"
         "CBZ w11, 30f \n"
         "CMP w11, #16 \n"
-        "BLT 20f \n"
+        "B.LT 20f \n"
         "# Encrypt first block for GHASH \n"
         "ADD w12, w12, #1 \n"
         "MOV v0.16b, v22.16b \n"
@@ -6914,7 +6914,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in,
 
         "# When only one full block to encrypt go straight to GHASH \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "LD1 {v31.2d}, [%[input]], #16 \n"
 
@@ -6968,7 +6968,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "EOR v15.16b, v0.16b, v31.16b \n \n"
         "ST1 {v15.2d}, [%[out]], #16 \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "LD1 {v31.2d}, [%[input]], #16 \n"
         "B 2b \n"
@@ -7121,7 +7121,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "RBIT v17.16b, v17.16b \n"
         "EOR v0.16b, v0.16b, v17.16b \n \n"
         "CMP %w[tagSz], #16 \n"
-        "BNE 40f \n"
+        "B.NE 40f \n"
         "ST1 {v0.2d}, [%[tag]] \n"
         "B 41f \n"
         "40: \n"
@@ -7186,7 +7186,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
 
         "# GHASH AAD \n"
         "CMP x12, #64 \n"
-        "BLT 115f \n"
+        "B.LT 115f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v19.1q, v16.2d, v16.2d \n"
@@ -7272,11 +7272,11 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "MOV v17.D[1], v15.D[0] \n"
         "EOR v17.16b, v17.16b, v14.16b \n"
         "CMP x12, #64 \n"
-        "BGE 114b \n"
+        "B.GE 114b \n"
         "CBZ x12, 120f \n"
         "115: \n"
         "CMP x12, #16 \n"
-        "BLT 112f \n"
+        "B.LT 112f \n"
         "111: \n"
         "LD1 {v15.2d}, [%[aad]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -7297,7 +7297,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "MOV v18.D[1], v21.D[0] \n"
         "EOR v17.16b, v18.16b, v20.16b \n"
         "CMP x12, #16 \n"
-        "BGE 111b \n"
+        "B.GE 111b \n"
         "CBZ x12, 120f \n"
         "112: \n"
         "# Partial AAD \n"
@@ -7334,9 +7334,9 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "MOV w11, %w[sz] \n"
         "REV w12, w12 \n"
         "CMP w11, #64 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
         "CMP %w[aSz], #64 \n"
-        "BGE 82f \n"
+        "B.GE 82f \n"
 
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
@@ -7375,7 +7375,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "82: \n"
         "# Should we do 8 blocks at a time? \n"
         "CMP w11, #512 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
 
         "# Calculate H^[5-8] - GMULT partials \n"
         "# Multiply H and H^4 => H^5 \n"
@@ -8029,7 +8029,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
 
         "CMP w11, #128 \n"
-        "BGE 81b \n"
+        "B.GE 81b \n"
 
         "# GHASH - 8 blocks \n"
         "RBIT v12.16b, v12.16b \n"
@@ -8127,7 +8127,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n"
         "# Can we do 4 blocks at a time? \n"
         "CMP w11, #64 \n"
-        "BLT 10f \n"
+        "B.LT 10f \n"
 
         "# First encrypt - no GHASH \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -8257,7 +8257,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "# Store cipher text \n"
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BLT 12f \n"
+        "B.LT 12f \n"
 
         "11: \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -8434,7 +8434,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "# Store cipher text \n"
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BGE 11b \n"
+        "B.GE 11b \n"
 
         "12: \n"
         "# GHASH - 4 blocks \n"
@@ -8488,7 +8488,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "10: \n"
         "CBZ w11, 30f \n"
         "CMP w11, #16 \n"
-        "BLT 20f \n"
+        "B.LT 20f \n"
         "# Encrypt first block for GHASH \n"
         "ADD w12, w12, #1 \n"
         "MOV v0.16b, v22.16b \n"
@@ -8525,7 +8525,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
 
         "# When only one full block to encrypt go straight to GHASH \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "LD1 {v31.2d}, [%[input]], #16 \n"
 
@@ -8579,7 +8579,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "EOR v15.16b, v0.16b, v31.16b \n \n"
         "ST1 {v15.2d}, [%[out]], #16 \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "LD1 {v31.2d}, [%[input]], #16 \n"
         "B 2b \n"
@@ -8732,7 +8732,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "RBIT v17.16b, v17.16b \n"
         "EOR v0.16b, v0.16b, v17.16b \n \n"
         "CMP %w[tagSz], #16 \n"
-        "BNE 40f \n"
+        "B.NE 40f \n"
         "ST1 {v0.2d}, [%[tag]] \n"
         "B 41f \n"
         "40: \n"
@@ -8797,7 +8797,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in,
 
         "# GHASH AAD \n"
         "CMP x12, #64 \n"
-        "BLT 115f \n"
+        "B.LT 115f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v19.1q, v16.2d, v16.2d \n"
@@ -8887,11 +8887,11 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "MOV v17.D[1], v15.D[0] \n"
         "EOR v17.16b, v17.16b, v14.16b \n"
         "CMP x12, #64 \n"
-        "BGE 114b \n"
+        "B.GE 114b \n"
         "CBZ x12, 120f \n"
         "115: \n"
         "CMP x12, #16 \n"
-        "BLT 112f \n"
+        "B.LT 112f \n"
         "111: \n"
         "LD1 {v15.2d}, [%[aad]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -8912,7 +8912,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "MOV v18.D[1], v21.D[0] \n"
         "EOR v17.16b, v18.16b, v20.16b \n"
         "CMP x12, #16 \n"
-        "BGE 111b \n"
+        "B.GE 111b \n"
         "CBZ x12, 120f \n"
         "112: \n"
         "# Partial AAD \n"
@@ -8949,9 +8949,9 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "MOV w11, %w[sz] \n"
         "REV w12, w12 \n"
         "CMP w11, #64 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
         "CMP %w[aSz], #64 \n"
-        "BGE 82f \n"
+        "B.GE 82f \n"
 
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
@@ -8990,7 +8990,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "82: \n"
         "# Should we do 8 blocks at a time? \n"
         "CMP w11, #512 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
 
         "# Calculate H^[5-8] - GMULT partials \n"
         "# Multiply H and H^4 => H^5 \n"
@@ -9720,7 +9720,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
 
         "CMP w11, #128 \n"
-        "BGE 81b \n"
+        "B.GE 81b \n"
 
         "# GHASH - 8 blocks \n"
         "RBIT v12.16b, v12.16b \n"
@@ -9826,7 +9826,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n"
         "# Can we do 4 blocks at a time? \n"
         "CMP w11, #64 \n"
-        "BLT 10f \n"
+        "B.LT 10f \n"
 
         "# First encrypt - no GHASH \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -9973,7 +9973,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "# Store cipher text \n"
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BLT 12f \n"
+        "B.LT 12f \n"
 
         "11: \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -10171,7 +10171,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "# Store cipher text \n"
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BGE 11b \n"
+        "B.GE 11b \n"
 
         "12: \n"
         "# GHASH - 4 blocks \n"
@@ -10230,7 +10230,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "SUB %[Key], %[Key], #32 \n"
         "CBZ w11, 30f \n"
         "CMP w11, #16 \n"
-        "BLT 20f \n"
+        "B.LT 20f \n"
         "# Encrypt first block for GHASH \n"
         "ADD w12, w12, #1 \n"
         "MOV v0.16b, v22.16b \n"
@@ -10274,7 +10274,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in,
 
         "# When only one full block to encrypt go straight to GHASH \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "LD1 {v31.2d}, [%[input]], #16 \n"
 
@@ -10335,7 +10335,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "EOR v15.16b, v0.16b, v31.16b \n \n"
         "ST1 {v15.2d}, [%[out]], #16 \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "LD1 {v31.2d}, [%[input]], #16 \n"
         "B 2b \n"
@@ -10502,7 +10502,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in,
         "RBIT v17.16b, v17.16b \n"
         "EOR v0.16b, v0.16b, v17.16b \n \n"
         "CMP %w[tagSz], #16 \n"
-        "BNE 40f \n"
+        "B.NE 40f \n"
         "ST1 {v0.2d}, [%[tag]] \n"
         "B 41f \n"
         "40: \n"
@@ -10567,7 +10567,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
 
         "# GHASH AAD \n"
         "CMP x12, #64 \n"
-        "BLT 115f \n"
+        "B.LT 115f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v19.1q, v16.2d, v16.2d \n"
@@ -10653,11 +10653,11 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "MOV v17.D[1], v15.D[0] \n"
         "EOR v17.16b, v17.16b, v14.16b \n"
         "CMP x12, #64 \n"
-        "BGE 114b \n"
+        "B.GE 114b \n"
         "CBZ x12, 120f \n"
         "115: \n"
         "CMP x12, #16 \n"
-        "BLT 112f \n"
+        "B.LT 112f \n"
         "111: \n"
         "LD1 {v15.2d}, [%[aad]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -10678,7 +10678,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "MOV v18.D[1], v21.D[0] \n"
         "EOR v17.16b, v18.16b, v20.16b \n"
         "CMP x12, #16 \n"
-        "BGE 111b \n"
+        "B.GE 111b \n"
         "CBZ x12, 120f \n"
         "112: \n"
         "# Partial AAD \n"
@@ -10715,9 +10715,9 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "MOV w11, %w[sz] \n"
         "REV w12, w12 \n"
         "CMP w11, #64 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
         "CMP %w[aSz], #64 \n"
-        "BGE 82f \n"
+        "B.GE 82f \n"
 
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
@@ -10756,7 +10756,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "82: \n"
         "# Should we do 8 blocks at a time? \n"
         "CMP w11, #512 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
 
         "# Calculate H^[5-8] - GMULT partials \n"
         "# Multiply H and H^4 => H^5 \n"
@@ -11478,7 +11478,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
 
         "CMP w11, #128 \n"
-        "BGE 81b \n"
+        "B.GE 81b \n"
 
         "# GHASH - 8 blocks \n"
         "RBIT v12.16b, v12.16b \n"
@@ -11576,7 +11576,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n"
         "# Can we do 4 blocks at a time? \n"
         "CMP w11, #64 \n"
-        "BLT 10f \n"
+        "B.LT 10f \n"
 
         "# First encrypt - no GHASH \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -11723,7 +11723,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "# Store cipher text \n"
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BLT 12f \n"
+        "B.LT 12f \n"
 
         "11: \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -11917,7 +11917,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "# Store cipher text \n"
         "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BGE 11b \n"
+        "B.GE 11b \n"
 
         "12: \n"
         "# GHASH - 4 blocks \n"
@@ -11972,7 +11972,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "SUB %[Key], %[Key], #32 \n"
         "CBZ w11, 30f \n"
         "CMP w11, #16 \n"
-        "BLT 20f \n"
+        "B.LT 20f \n"
         "# Encrypt first block for GHASH \n"
         "ADD w12, w12, #1 \n"
         "MOV v0.16b, v22.16b \n"
@@ -12016,7 +12016,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
 
         "# When only one full block to encrypt go straight to GHASH \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "LD1 {v31.2d}, [%[input]], #16 \n"
 
@@ -12077,7 +12077,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "EOR v15.16b, v0.16b, v31.16b \n \n"
         "ST1 {v15.2d}, [%[out]], #16 \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "LD1 {v31.2d}, [%[input]], #16 \n"
         "B 2b \n"
@@ -12244,7 +12244,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in,
         "RBIT v17.16b, v17.16b \n"
         "EOR v0.16b, v0.16b, v17.16b \n \n"
         "CMP %w[tagSz], #16 \n"
-        "BNE 40f \n"
+        "B.NE 40f \n"
         "ST1 {v0.2d}, [%[tag]] \n"
         "B 41f \n"
         "40: \n"
@@ -12381,7 +12381,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
 
         "# GHASH AAD \n"
         "CMP x12, #64 \n"
-        "BLT 115f \n"
+        "B.LT 115f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v19.1q, v16.2d, v16.2d \n"
@@ -12471,11 +12471,11 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV v17.D[1], v15.D[0] \n"
         "EOR v17.16b, v17.16b, v14.16b \n"
         "CMP x12, #64 \n"
-        "BGE 114b \n"
+        "B.GE 114b \n"
         "CBZ x12, 120f \n"
         "115: \n"
         "CMP x12, #16 \n"
-        "BLT 112f \n"
+        "B.LT 112f \n"
         "111: \n"
         "LD1 {v15.2d}, [%[aad]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -12496,7 +12496,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV v18.D[1], v21.D[0] \n"
         "EOR v17.16b, v18.16b, v20.16b \n"
         "CMP x12, #16 \n"
-        "BGE 111b \n"
+        "B.GE 111b \n"
         "CBZ x12, 120f \n"
         "112: \n"
         "# Partial AAD \n"
@@ -12533,9 +12533,9 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV w11, %w[sz] \n"
         "REV w12, w12 \n"
         "CMP w11, #64 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
         "CMP %w[aSz], #64 \n"
-        "BGE 82f \n"
+        "B.GE 82f \n"
 
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
@@ -12574,7 +12574,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "82: \n"
         "# Should we do 8 blocks at a time? \n"
         "CMP w11, #512 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
 
         "# Calculate H^[5-8] - GMULT partials \n"
         "# Multiply H and H^4 => H^5 \n"
@@ -13168,7 +13168,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
 
         "CMP w11, #128 \n"
-        "BGE 81b \n"
+        "B.GE 81b \n"
 
         "# GHASH - 8 blocks \n"
         "RBIT v12.16b, v12.16b \n"
@@ -13273,7 +13273,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n"
         "# Can we do 4 blocks at a time? \n"
         "CMP w11, #64 \n"
-        "BLT 10f \n"
+        "B.LT 10f \n"
 
         "# First decrypt - no GHASH \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -13387,7 +13387,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "# Store cipher text \n"
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BLT 12f \n"
+        "B.LT 12f \n"
 
         "11: \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -13552,7 +13552,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "# Store cipher text \n"
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BGE 11b \n"
+        "B.GE 11b \n"
 
         "12: \n"
         "# GHASH - 4 blocks \n"
@@ -13610,7 +13610,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "10: \n"
         "CBZ w11, 30f \n"
         "CMP w11, #16 \n"
-        "BLT 20f \n"
+        "B.LT 20f \n"
         "# Decrypt first block for GHASH \n"
         "ADD w12, w12, #1 \n"
         "MOV v0.16b, v22.16b \n"
@@ -13643,7 +13643,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
 
         "# When only one full block to decrypt go straight to GHASH \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "# Interweave GHASH and decrypt if more then 1 block \n"
         "2: \n"
@@ -13692,7 +13692,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "EOR v0.16b, v0.16b, v28.16b \n \n"
         "ST1 {v0.2d}, [%[out]], #16 \n"
         "CMP w11, #16 \n"
-        "BGE 2b \n"
+        "B.GE 2b \n"
 
         "# GHASH on last block \n"
         "1: \n"
@@ -13827,7 +13827,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "RBIT v17.16b, v17.16b \n"
         "EOR v0.16b, v0.16b, v17.16b \n \n"
         "CMP %w[tagSz], #16 \n"
-        "BNE 40f \n"
+        "B.NE 40f \n"
         "LD1 {v1.2d}, [%[tag]] \n"
         "B 41f \n"
         "40: \n"
@@ -13911,7 +13911,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
 
         "# GHASH AAD \n"
         "CMP x12, #64 \n"
-        "BLT 115f \n"
+        "B.LT 115f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v19.1q, v16.2d, v16.2d \n"
@@ -13997,11 +13997,11 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV v17.D[1], v15.D[0] \n"
         "EOR v17.16b, v17.16b, v14.16b \n"
         "CMP x12, #64 \n"
-        "BGE 114b \n"
+        "B.GE 114b \n"
         "CBZ x12, 120f \n"
         "115: \n"
         "CMP x12, #16 \n"
-        "BLT 112f \n"
+        "B.LT 112f \n"
         "111: \n"
         "LD1 {v15.2d}, [%[aad]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -14022,7 +14022,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV v18.D[1], v21.D[0] \n"
         "EOR v17.16b, v18.16b, v20.16b \n"
         "CMP x12, #16 \n"
-        "BGE 111b \n"
+        "B.GE 111b \n"
         "CBZ x12, 120f \n"
         "112: \n"
         "# Partial AAD \n"
@@ -14059,9 +14059,9 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV w11, %w[sz] \n"
         "REV w12, w12 \n"
         "CMP w11, #64 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
         "CMP %w[aSz], #64 \n"
-        "BGE 82f \n"
+        "B.GE 82f \n"
 
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
@@ -14100,7 +14100,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "82: \n"
         "# Should we do 8 blocks at a time? \n"
         "CMP w11, #512 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
 
         "# Calculate H^[5-8] - GMULT partials \n"
         "# Multiply H and H^4 => H^5 \n"
@@ -14686,7 +14686,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
 
         "CMP w11, #128 \n"
-        "BGE 81b \n"
+        "B.GE 81b \n"
 
         "# GHASH - 8 blocks \n"
         "RBIT v12.16b, v12.16b \n"
@@ -14783,7 +14783,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n"
         "# Can we do 4 blocks at a time? \n"
         "CMP w11, #64 \n"
-        "BLT 10f \n"
+        "B.LT 10f \n"
 
         "# First decrypt - no GHASH \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -14897,7 +14897,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "# Store cipher text \n"
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BLT 12f \n"
+        "B.LT 12f \n"
 
         "11: \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -15058,7 +15058,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "# Store cipher text \n"
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BGE 11b \n"
+        "B.GE 11b \n"
 
         "12: \n"
         "# GHASH - 4 blocks \n"
@@ -15112,7 +15112,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "10: \n"
         "CBZ w11, 30f \n"
         "CMP w11, #16 \n"
-        "BLT 20f \n"
+        "B.LT 20f \n"
         "# Decrypt first block for GHASH \n"
         "ADD w12, w12, #1 \n"
         "MOV v0.16b, v22.16b \n"
@@ -15145,7 +15145,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
 
         "# When only one full block to decrypt go straight to GHASH \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "# Interweave GHASH and decrypt if more then 1 block \n"
         "2: \n"
@@ -15194,7 +15194,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "EOR v0.16b, v0.16b, v28.16b \n \n"
         "ST1 {v0.2d}, [%[out]], #16 \n"
         "CMP w11, #16 \n"
-        "BGE 2b \n"
+        "B.GE 2b \n"
 
         "# GHASH on last block \n"
         "1: \n"
@@ -15329,7 +15329,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "RBIT v17.16b, v17.16b \n"
         "EOR v0.16b, v0.16b, v17.16b \n \n"
         "CMP %w[tagSz], #16 \n"
-        "BNE 40f \n"
+        "B.NE 40f \n"
         "LD1 {v1.2d}, [%[tag]] \n"
         "B 41f \n"
         "40: \n"
@@ -15413,7 +15413,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
 
         "# GHASH AAD \n"
         "CMP x12, #64 \n"
-        "BLT 115f \n"
+        "B.LT 115f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v19.1q, v16.2d, v16.2d \n"
@@ -15503,11 +15503,11 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV v17.D[1], v15.D[0] \n"
         "EOR v17.16b, v17.16b, v14.16b \n"
         "CMP x12, #64 \n"
-        "BGE 114b \n"
+        "B.GE 114b \n"
         "CBZ x12, 120f \n"
         "115: \n"
         "CMP x12, #16 \n"
-        "BLT 112f \n"
+        "B.LT 112f \n"
         "111: \n"
         "LD1 {v15.2d}, [%[aad]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -15528,7 +15528,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV v18.D[1], v21.D[0] \n"
         "EOR v17.16b, v18.16b, v20.16b \n"
         "CMP x12, #16 \n"
-        "BGE 111b \n"
+        "B.GE 111b \n"
         "CBZ x12, 120f \n"
         "112: \n"
         "# Partial AAD \n"
@@ -15565,9 +15565,9 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV w11, %w[sz] \n"
         "REV w12, w12 \n"
         "CMP w11, #64 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
         "CMP %w[aSz], #64 \n"
-        "BGE 82f \n"
+        "B.GE 82f \n"
 
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
@@ -15606,7 +15606,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "82: \n"
         "# Should we do 8 blocks at a time? \n"
         "CMP w11, #512 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
 
         "# Calculate H^[5-8] - GMULT partials \n"
         "# Multiply H and H^4 => H^5 \n"
@@ -16268,7 +16268,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
 
         "CMP w11, #128 \n"
-        "BGE 81b \n"
+        "B.GE 81b \n"
 
         "# GHASH - 8 blocks \n"
         "RBIT v12.16b, v12.16b \n"
@@ -16374,7 +16374,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n"
         "# Can we do 4 blocks at a time? \n"
         "CMP w11, #64 \n"
-        "BLT 10f \n"
+        "B.LT 10f \n"
 
         "# First decrypt - no GHASH \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -16504,7 +16504,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "# Store cipher text \n"
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BLT 12f \n"
+        "B.LT 12f \n"
 
         "11: \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -16685,7 +16685,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "# Store cipher text \n"
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BGE 11b \n"
+        "B.GE 11b \n"
 
         "12: \n"
         "# GHASH - 4 blocks \n"
@@ -16743,7 +16743,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "10: \n"
         "CBZ w11, 30f \n"
         "CMP w11, #16 \n"
-        "BLT 20f \n"
+        "B.LT 20f \n"
         "# Decrypt first block for GHASH \n"
         "ADD w12, w12, #1 \n"
         "MOV v0.16b, v22.16b \n"
@@ -16780,7 +16780,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
 
         "# When only one full block to decrypt go straight to GHASH \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "# Interweave GHASH and decrypt if more then 1 block \n"
         "2: \n"
@@ -16833,7 +16833,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "EOR v0.16b, v0.16b, v28.16b \n \n"
         "ST1 {v0.2d}, [%[out]], #16 \n"
         "CMP w11, #16 \n"
-        "BGE 2b \n"
+        "B.GE 2b \n"
 
         "# GHASH on last block \n"
         "1: \n"
@@ -16976,7 +16976,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "RBIT v17.16b, v17.16b \n"
         "EOR v0.16b, v0.16b, v17.16b \n \n"
         "CMP %w[tagSz], #16 \n"
-        "BNE 40f \n"
+        "B.NE 40f \n"
         "LD1 {v1.2d}, [%[tag]] \n"
         "B 41f \n"
         "40: \n"
@@ -17060,7 +17060,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
 
         "# GHASH AAD \n"
         "CMP x12, #64 \n"
-        "BLT 115f \n"
+        "B.LT 115f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v19.1q, v16.2d, v16.2d \n"
@@ -17146,11 +17146,11 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV v17.D[1], v15.D[0] \n"
         "EOR v17.16b, v17.16b, v14.16b \n"
         "CMP x12, #64 \n"
-        "BGE 114b \n"
+        "B.GE 114b \n"
         "CBZ x12, 120f \n"
         "115: \n"
         "CMP x12, #16 \n"
-        "BLT 112f \n"
+        "B.LT 112f \n"
         "111: \n"
         "LD1 {v15.2d}, [%[aad]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -17171,7 +17171,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV v18.D[1], v21.D[0] \n"
         "EOR v17.16b, v18.16b, v20.16b \n"
         "CMP x12, #16 \n"
-        "BGE 111b \n"
+        "B.GE 111b \n"
         "CBZ x12, 120f \n"
         "112: \n"
         "# Partial AAD \n"
@@ -17208,9 +17208,9 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV w11, %w[sz] \n"
         "REV w12, w12 \n"
         "CMP w11, #64 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
         "CMP %w[aSz], #64 \n"
-        "BGE 82f \n"
+        "B.GE 82f \n"
 
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
@@ -17249,7 +17249,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "82: \n"
         "# Should we do 8 blocks at a time? \n"
         "CMP w11, #512 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
 
         "# Calculate H^[5-8] - GMULT partials \n"
         "# Multiply H and H^4 => H^5 \n"
@@ -17903,7 +17903,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
 
         "CMP w11, #128 \n"
-        "BGE 81b \n"
+        "B.GE 81b \n"
 
         "# GHASH - 8 blocks \n"
         "RBIT v12.16b, v12.16b \n"
@@ -18001,7 +18001,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n"
         "# Can we do 4 blocks at a time? \n"
         "CMP w11, #64 \n"
-        "BLT 10f \n"
+        "B.LT 10f \n"
 
         "# First decrypt - no GHASH \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -18131,7 +18131,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "# Store cipher text \n"
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BLT 12f \n"
+        "B.LT 12f \n"
 
         "11: \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -18308,7 +18308,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "# Store cipher text \n"
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BGE 11b \n"
+        "B.GE 11b \n"
 
         "12: \n"
         "# GHASH - 4 blocks \n"
@@ -18362,7 +18362,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "10: \n"
         "CBZ w11, 30f \n"
         "CMP w11, #16 \n"
-        "BLT 20f \n"
+        "B.LT 20f \n"
         "# Decrypt first block for GHASH \n"
         "ADD w12, w12, #1 \n"
         "MOV v0.16b, v22.16b \n"
@@ -18399,7 +18399,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
 
         "# When only one full block to decrypt go straight to GHASH \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "# Interweave GHASH and decrypt if more then 1 block \n"
         "2: \n"
@@ -18452,7 +18452,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "EOR v0.16b, v0.16b, v28.16b \n \n"
         "ST1 {v0.2d}, [%[out]], #16 \n"
         "CMP w11, #16 \n"
-        "BGE 2b \n"
+        "B.GE 2b \n"
 
         "# GHASH on last block \n"
         "1: \n"
@@ -18595,7 +18595,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "RBIT v17.16b, v17.16b \n"
         "EOR v0.16b, v0.16b, v17.16b \n \n"
         "CMP %w[tagSz], #16 \n"
-        "BNE 40f \n"
+        "B.NE 40f \n"
         "LD1 {v1.2d}, [%[tag]] \n"
         "B 41f \n"
         "40: \n"
@@ -18679,7 +18679,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
 
         "# GHASH AAD \n"
         "CMP x12, #64 \n"
-        "BLT 115f \n"
+        "B.LT 115f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v19.1q, v16.2d, v16.2d \n"
@@ -18769,11 +18769,11 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV v17.D[1], v15.D[0] \n"
         "EOR v17.16b, v17.16b, v14.16b \n"
         "CMP x12, #64 \n"
-        "BGE 114b \n"
+        "B.GE 114b \n"
         "CBZ x12, 120f \n"
         "115: \n"
         "CMP x12, #16 \n"
-        "BLT 112f \n"
+        "B.LT 112f \n"
         "111: \n"
         "LD1 {v15.2d}, [%[aad]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -18794,7 +18794,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV v18.D[1], v21.D[0] \n"
         "EOR v17.16b, v18.16b, v20.16b \n"
         "CMP x12, #16 \n"
-        "BGE 111b \n"
+        "B.GE 111b \n"
         "CBZ x12, 120f \n"
         "112: \n"
         "# Partial AAD \n"
@@ -18831,9 +18831,9 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV w11, %w[sz] \n"
         "REV w12, w12 \n"
         "CMP w11, #64 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
         "CMP %w[aSz], #64 \n"
-        "BGE 82f \n"
+        "B.GE 82f \n"
 
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
@@ -18872,7 +18872,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "82: \n"
         "# Should we do 8 blocks at a time? \n"
         "CMP w11, #512 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
 
         "# Calculate H^[5-8] - GMULT partials \n"
         "# Multiply H and H^4 => H^5 \n"
@@ -19602,7 +19602,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
 
         "CMP w11, #128 \n"
-        "BGE 81b \n"
+        "B.GE 81b \n"
 
         "# GHASH - 8 blocks \n"
         "RBIT v12.16b, v12.16b \n"
@@ -19709,7 +19709,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "LD1 {v14.2d-v15.2d}, [%[Key]] \n"
         "# Can we do 4 blocks at a time? \n"
         "CMP w11, #64 \n"
-        "BLT 10f \n"
+        "B.LT 10f \n"
 
         "# First decrypt - no GHASH \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -19856,7 +19856,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "# Store cipher text \n"
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BLT 12f \n"
+        "B.LT 12f \n"
 
         "11: \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -20054,7 +20054,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "# Store cipher text \n"
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BGE 11b \n"
+        "B.GE 11b \n"
 
         "12: \n"
         "# GHASH - 4 blocks \n"
@@ -20113,7 +20113,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "10: \n"
         "CBZ w11, 30f \n"
         "CMP w11, #16 \n"
-        "BLT 20f \n"
+        "B.LT 20f \n"
         "LD1 {v14.2d, v15.2d}, [%[Key]] \n"
         "# Decrypt first block for GHASH \n"
         "ADD w12, w12, #1 \n"
@@ -20155,7 +20155,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
 
         "# When only one full block to decrypt go straight to GHASH \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "# Interweave GHASH and decrypt if more then 1 block \n"
         "2: \n"
@@ -20212,7 +20212,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "EOR v0.16b, v0.16b, v28.16b \n \n"
         "ST1 {v0.2d}, [%[out]], #16 \n"
         "CMP w11, #16 \n"
-        "BGE 2b \n"
+        "B.GE 2b \n"
 
         "# GHASH on last block \n"
         "1: \n"
@@ -20363,7 +20363,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         "RBIT v17.16b, v17.16b \n"
         "EOR v0.16b, v0.16b, v17.16b \n \n"
         "CMP %w[tagSz], #16 \n"
-        "BNE 40f \n"
+        "B.NE 40f \n"
         "LD1 {v1.2d}, [%[tag]] \n"
         "B 41f \n"
         "40: \n"
@@ -20447,7 +20447,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
 
         "# GHASH AAD \n"
         "CMP x12, #64 \n"
-        "BLT 115f \n"
+        "B.LT 115f \n"
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
         "PMULL2 v19.1q, v16.2d, v16.2d \n"
@@ -20533,11 +20533,11 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV v17.D[1], v15.D[0] \n"
         "EOR v17.16b, v17.16b, v14.16b \n"
         "CMP x12, #64 \n"
-        "BGE 114b \n"
+        "B.GE 114b \n"
         "CBZ x12, 120f \n"
         "115: \n"
         "CMP x12, #16 \n"
-        "BLT 112f \n"
+        "B.LT 112f \n"
         "111: \n"
         "LD1 {v15.2d}, [%[aad]], #16 \n"
         "SUB x12, x12, #16 \n"
@@ -20558,7 +20558,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV v18.D[1], v21.D[0] \n"
         "EOR v17.16b, v18.16b, v20.16b \n"
         "CMP x12, #16 \n"
-        "BGE 111b \n"
+        "B.GE 111b \n"
         "CBZ x12, 120f \n"
         "112: \n"
         "# Partial AAD \n"
@@ -20595,9 +20595,9 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "MOV w11, %w[sz] \n"
         "REV w12, w12 \n"
         "CMP w11, #64 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
         "CMP %w[aSz], #64 \n"
-        "BGE 82f \n"
+        "B.GE 82f \n"
 
         "# Calculate H^[1-4] - GMULT partials \n"
         "# Square H => H^2 \n"
@@ -20636,7 +20636,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "82: \n"
         "# Should we do 8 blocks at a time? \n"
         "CMP w11, #512 \n"
-        "BLT 80f \n"
+        "B.LT 80f \n"
 
         "# Calculate H^[5-8] - GMULT partials \n"
         "# Multiply H and H^4 => H^5 \n"
@@ -21358,7 +21358,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
 
         "CMP w11, #128 \n"
-        "BGE 81b \n"
+        "B.GE 81b \n"
 
         "# GHASH - 8 blocks \n"
         "RBIT v12.16b, v12.16b \n"
@@ -21457,7 +21457,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "LD1 {v14.2d-v15.2d}, [%[Key]] \n"
         "# Can we do 4 blocks at a time? \n"
         "CMP w11, #64 \n"
-        "BLT 10f \n"
+        "B.LT 10f \n"
 
         "# First decrypt - no GHASH \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -21604,7 +21604,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "# Store cipher text \n"
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BLT 12f \n"
+        "B.LT 12f \n"
 
         "11: \n"
         "# Calculate next 4 counters (+1-4) \n"
@@ -21798,7 +21798,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "# Store cipher text \n"
         "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n"
         "CMP w11, #64 \n"
-        "BGE 11b \n"
+        "B.GE 11b \n"
 
         "12: \n"
         "# GHASH - 4 blocks \n"
@@ -21853,7 +21853,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "10: \n"
         "CBZ w11, 30f \n"
         "CMP w11, #16 \n"
-        "BLT 20f \n"
+        "B.LT 20f \n"
         "LD1 {v14.2d, v15.2d}, [%[Key]] \n"
         "# Decrypt first block for GHASH \n"
         "ADD w12, w12, #1 \n"
@@ -21895,7 +21895,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
 
         "# When only one full block to decrypt go straight to GHASH \n"
         "CMP w11, 16 \n"
-        "BLT 1f \n"
+        "B.LT 1f \n"
 
         "# Interweave GHASH and decrypt if more then 1 block \n"
         "2: \n"
@@ -21952,7 +21952,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "EOR v0.16b, v0.16b, v28.16b \n \n"
         "ST1 {v0.2d}, [%[out]], #16 \n"
         "CMP w11, #16 \n"
-        "BGE 2b \n"
+        "B.GE 2b \n"
 
         "# GHASH on last block \n"
         "1: \n"
@@ -22103,7 +22103,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz,
         "RBIT v17.16b, v17.16b \n"
         "EOR v0.16b, v0.16b, v17.16b \n \n"
         "CMP %w[tagSz], #16 \n"
-        "BNE 40f \n"
+        "B.NE 40f \n"
         "LD1 {v1.2d}, [%[tag]] \n"
         "B 41f \n"
         "40: \n"
@@ -24846,14 +24846,14 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
         "AESMC v0.16b, v0.16b \n"                                   \
                                                                     \
         "SUBS WZR, %w[rounds], #10 \n"                              \
-        "BLE " #label "f      \n"                                   \
+        "B.LE " #label "f      \n"                                   \
         "AESE v0.16b, v10.16b \n"                                   \
         "AESMC v0.16b, v0.16b \n"                                   \
         "AESE v0.16b, v11.16b \n"                                   \
         "AESMC v0.16b, v0.16b \n"                                   \
                                                                     \
         "SUBS WZR, %w[rounds], #12 \n"                              \
-        "BLE " #label "f      \n"                                   \
+        "B.LE " #label "f      \n"                                   \
         "AESE v0.16b, v12.16b \n"                                   \
         "AESMC v0.16b, v0.16b \n"                                   \
         "AESE v0.16b, v13.16b \n"                                   \
@@ -24886,14 +24886,14 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
         "AESMC v0.16b, v0.16b \n"                                   \
                                                                     \
         "SUBS WZR, %w[rounds], #10 \n"                              \
-        "BLE " #label "f      \n"                                   \
+        "B.LE " #label "f      \n"                                   \
         "AESE v0.16b, v10.16b \n"                                   \
         "AESMC v0.16b, v0.16b \n"                                   \
         "AESE v0.16b, v11.16b \n"                                   \
         "AESMC v0.16b, v0.16b \n"                                   \
                                                                     \
         "SUBS WZR, %w[rounds], #12 \n"                              \
-        "BLE " #label "f      \n"                                   \
+        "B.LE " #label "f      \n"                                   \
         "AESE v0.16b, v12.16b \n"                                   \
         "AESMC v0.16b, v0.16b \n"                                   \
         "AESE v0.16b, v13.16b \n"                                   \
@@ -24929,14 +24929,14 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
         "AESIMC v0.16b, v0.16b \n"                                  \
                                                                     \
         "SUBS WZR, %w[rounds], #10 \n"                              \
-        "BLE " #label "f       \n"                                  \
+        "B.LE " #label "f       \n"                                  \
         "AESD v0.16b, v10.16b  \n"                                  \
         "AESIMC v0.16b, v0.16b \n"                                  \
         "AESD v0.16b, v11.16b  \n"                                  \
         "AESIMC v0.16b, v0.16b \n"                                  \
                                                                     \
         "SUBS WZR, %w[rounds], #12 \n"                              \
-        "BLE " #label "f       \n"                                  \
+        "B.LE " #label "f       \n"                                  \
         "AESD v0.16b, v12.16b  \n"                                  \
         "AESIMC v0.16b, v0.16b \n"                                  \
         "AESD v0.16b, v13.16b  \n"                                  \
@@ -24969,14 +24969,14 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
         "AESIMC v0.16b, v0.16b \n"                                  \
                                                                     \
         "SUBS WZR, %w[rounds], #10 \n"                              \
-        "BLE " #label "f       \n"                                  \
+        "B.LE " #label "f       \n"                                  \
         "AESD v0.16b, v10.16b  \n"                                  \
         "AESIMC v0.16b, v0.16b \n"                                  \
         "AESD v0.16b, v11.16b  \n"                                  \
         "AESIMC v0.16b, v0.16b \n"                                  \
                                                                     \
         "SUBS WZR, %w[rounds], #12 \n"                              \
-        "BLE " #label "f       \n"                                  \
+        "B.LE " #label "f       \n"                                  \
         "AESD v0.16b, v12.16b  \n"                                  \
         "AESIMC v0.16b, v0.16b \n"                                  \
         "AESD v0.16b, v13.16b  \n"                                  \
@@ -25257,7 +25257,7 @@ void AES_XTS_encrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz,
 
         "SUBS %w[blocks], %w[blocks], #1 \n"
         "SUB %w[sz], %w[sz], #16 \n"
-        "BGT 1b \n"
+        "B.GT 1b \n"
 
         "CBZ %w[sz], 3f \n"
 
@@ -25274,7 +25274,7 @@ void AES_XTS_encrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz,
         "STRB w13, [%[out]], #1 \n"
         "STRB w14, [%[tmp]], #1 \n"
         "SUBS w12, w12, #1 \n"
-        "BGT 4b \n"
+        "B.GT 4b \n"
 
         "SUB %[out], %[out], %x[sz] \n"
         "SUB %[tmp], %[tmp], %x[sz] \n"
@@ -25576,7 +25576,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz,
 
         "SUBS %w[blocks], %w[blocks], #1 \n"
         "SUB %w[sz], %w[sz], #16 \n"
-        "BGT 1b \n"
+        "B.GT 1b \n"
 
         "CBZ %w[sz], 4f \n"
 
@@ -25607,7 +25607,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz,
         "STRB w13, [%[out]], #1 \n"
         "STRB w14, [%[tmp]], #1 \n"
         "SUBS w12, w12, #1 \n"
-        "BGT 6b \n"
+        "B.GT 6b \n"
         "SUB %[out], %[out], %x[sz] \n"
         "SUB %[tmp], %[tmp], %x[sz] \n"
         "SUB %[out], %[out], #16 \n"
@@ -25671,7 +25671,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz,
         "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n"               \
                                                                     \
         "CMP %[rounds], #10 \n"                                     \
-        "BLE " #label "f      \n"                                   \
+        "B.LE " #label "f      \n"                                   \
         "AESE.8 q0, q10 \n"                                         \
         "AESMC.8 q0, q0 \n"                                         \
         "AESE.8 q0, q11 \n"                                         \
@@ -25679,7 +25679,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz,
         "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n"               \
                                                                     \
         "CMP %[rounds], #12 \n"                                     \
-        "BLE " #label "f      \n"                                   \
+        "B.LE " #label "f      \n"                                   \
         "AESE.8 q0, q10 \n"                                         \
         "AESMC.8 q0, q0 \n"                                         \
         "AESE.8 q0, q11 \n"                                         \
@@ -25714,7 +25714,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz,
         "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n"               \
                                                                     \
         "CMP %[rounds], #10 \n"                                     \
-        "BLE " #label "f      \n"                                   \
+        "B.LE " #label "f      \n"                                   \
         "AESE.8 q0, q10 \n"                                         \
         "AESMC.8 q0, q0 \n"                                         \
         "AESE.8 q0, q11 \n"                                         \
@@ -25722,7 +25722,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz,
         "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n"               \
                                                                     \
         "CMP %[rounds], #12 \n"                                     \
-        "BLE " #label "f      \n"                                   \
+        "B.LE " #label "f      \n"                                   \
         "AESE.8 q0, q10 \n"                                         \
         "AESMC.8 q0, q0 \n"                                         \
         "AESE.8 q0, q11 \n"                                         \
@@ -25765,7 +25765,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz,
         "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n"               \
                                                                     \
         "CMP %[rounds], #10 \n"                                     \
-        "BLE " #label "f       \n"                                  \
+        "B.LE " #label "f       \n"                                  \
         "AESD.8 q0, q10  \n"                                        \
         "AESIMC.8 q0, q0 \n"                                        \
         "AESD.8 q0, q11  \n"                                        \
@@ -25773,7 +25773,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz,
         "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n"               \
                                                                     \
         "CMP %[rounds], #12 \n"                                     \
-        "BLE " #label "f       \n"                                  \
+        "B.LE " #label "f       \n"                                  \
         "AESD.8 q0, q10  \n"                                        \
         "AESIMC.8 q0, q0 \n"                                        \
         "AESD.8 q0, q11  \n"                                        \
@@ -25808,7 +25808,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz,
         "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n"               \
                                                                     \
         "CMP %[rounds], #10 \n"                                     \
-        "BLE " #label "f       \n"                                  \
+        "B.LE " #label "f       \n"                                  \
         "AESD.8 q0, q10  \n"                                        \
         "AESIMC.8 q0, q0 \n"                                        \
         "AESD.8 q0, q11  \n"                                        \
@@ -25816,7 +25816,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz,
         "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n"               \
                                                                     \
         "CMP %[rounds], #12 \n"                                     \
-        "BLE " #label "f       \n"                                  \
+        "B.LE " #label "f       \n"                                  \
         "AESD.8 q0, q10  \n"                                        \
         "AESIMC.8 q0, q0 \n"                                        \
         "AESD.8 q0, q11  \n"                                        \
@@ -25911,7 +25911,7 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
 
         "SUBS %[blocks], %[blocks], #1 \n"
         "SUB %[sz], %[sz], #16 \n"
-        "BGT 1b \n"
+        "B.GT 1b \n"
 
         "CMP %[sz], #0 \n"
         "B.EQ 3f \n"
@@ -25929,7 +25929,7 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
         "STRB r10, [%[out]], #1 \n"
         "STRB r11, [%[tmp]], #1 \n"
         "SUBS r9, r9, #1 \n"
-        "BGT 4b \n"
+        "B.GT 4b \n"
 
         "SUB %[out], %[out], %[sz] \n"
         "SUB %[tmp], %[tmp], %[sz] \n"
@@ -26047,7 +26047,7 @@ int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
 
         "SUBS %[blocks], %[blocks], #1 \n"
         "SUB %[sz], %[sz], #16 \n"
-        "BGT 1b \n"
+        "B.GT 1b \n"
 
         "CMP %[sz], #0 \n"
         "B.EQ 4f \n"
@@ -26087,7 +26087,7 @@ int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
         "STRB r10, [%[out]], #1 \n"
         "STRB r11, [%[tmp]], #1 \n"
         "SUBS r9, r9, #1 \n"
-        "BGT 6b \n"
+        "B.GT 6b \n"
         "SUB %[out], %[out], %[sz] \n"
         "SUB %[tmp], %[tmp], %[sz] \n"
         "SUB %[out], %[out], #16 \n"
diff --git a/wolfcrypt/src/port/arm/armv8-chacha.c b/wolfcrypt/src/port/arm/armv8-chacha.c
index 019b1ee5e..283aba717 100644
--- a/wolfcrypt/src/port/arm/armv8-chacha.c
+++ b/wolfcrypt/src/port/arm/armv8-chacha.c
@@ -556,7 +556,7 @@ static WC_INLINE void wc_Chacha_encrypt_320(const word32* input, const byte* m,
         "SRI    v6.4s, v17.4s, #25 \n\t"
         "SRI    v7.4s, v18.4s, #25 \n\t"
         "SRI    v4.4s, v19.4s, #25 \n\t"
-        "BNE    L_chacha20_arm64_inner_%= \n\t"
+        "B.NE    L_chacha20_arm64_inner_%= \n\t"
         /* Add counter now rather than after transposed */
         "ADD    v12.4s, v12.4s, v28.4s \n\t"
         "ADD    w16, w16, w21 \n\t"
@@ -666,7 +666,7 @@ static WC_INLINE void wc_Chacha_encrypt_320(const word32* input, const byte* m,
         "ST1    {v16.4s-v19.4s}, [%[c]], #64 \n\t"
         "SUBS   %[bytes], %[bytes], #320 \n\t"
         "ADD    v28.4s, v28.4s, v29.4s \n\t"
-        "BNE    L_chacha20_arm64_outer_%= \n\t"
+        "B.NE    L_chacha20_arm64_outer_%= \n\t"
         : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c),
           [bytes] "+r" (bytes64)
         : [L_chacha20_neon_add_all_cntrs] "r" (L_chacha20_neon_add_all_cntrs),
@@ -959,7 +959,7 @@ static WC_INLINE int wc_Chacha_encrypt_256(
         "EXT    v9.16B, v9.16B, v9.16B, #12 \n\t"
         "EXT    v10.16B, v10.16B, v10.16B, #8 \n\t"
         "EXT    v11.16B, v11.16B, v11.16B, #4 \n\t"
-        "BNE    L_chacha20_arm64_256_loop_%= \n\t"
+        "B.NE    L_chacha20_arm64_256_loop_%= \n\t"
         /* Load message */
         "LD1    {v16.4S-v19.4S}, [%[m]], #64 \n\t"
         /* Add one (2 added during calculating vector results) */
@@ -1364,7 +1364,7 @@ static WC_INLINE int wc_Chacha_encrypt_256(
         "ROR r4, r4, #25 \n\t" // 4 4
         "VEXT.8 q11, q11, q11, #4 \n\t" // permute elements left by one
 
-        "BNE L_chacha20_arm32_256_loop_%= \n\t"
+        "B.NE L_chacha20_arm32_256_loop_%= \n\t"
 
         // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12
         //  0  1  2  3  4  5  6  7  8  9  12  13  14
@@ -1583,7 +1583,7 @@ static WC_INLINE int wc_Chacha_encrypt_128(
         "EXT    v5.16B, v5.16B, v5.16B, #12 \n\t"
         "EXT    v6.16B, v6.16B, v6.16B, #8 \n\t"
         "EXT    v7.16B, v7.16B, v7.16B, #4 \n\t"
-        "BNE    L_chacha20_arm64_128_loop_%= \n\t"
+        "B.NE    L_chacha20_arm64_128_loop_%= \n\t"
         /* Add back state, XOR in message and store (load next block) */
         "ADD    v0.4S, v0.4S, v18.4S \n\t"
         "ADD    v1.4S, v1.4S, v19.4S \n\t"
@@ -1736,7 +1736,7 @@ static WC_INLINE int wc_Chacha_encrypt_128(
         "VEXT.8 q6, q6, q6, #8 \n\t" // permute elements left by two
         "VEXT.8 q7, q7, q7, #4 \n\t" // permute elements left by one
 
-        "BNE L_chacha20_arm32_128_loop_%= \n\t"
+        "B.NE L_chacha20_arm32_128_loop_%= \n\t"
 
         "VMOV.I32 q8, #0 \n\t"
         "VADD.I32 q0, q0, q10 \n\t"
@@ -2251,7 +2251,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
         "ADD    v2.4S, v2.4S, v10.4S \n\t"
         "ADD    v3.4S, v3.4S, v11.4S \n\t"
         "CMP    %[bytes], #64 \n\t"
-        "BLT    L_chacha20_arm64_64_lt_64_%= \n\t"
+        "B.LT    L_chacha20_arm64_64_lt_64_%= \n\t"
         "LD1    {v4.4S-v7.4S}, [%[m]], #64 \n\t"
         "EOR    v4.16B, v4.16B, v0.16B \n\t"
         "EOR    v5.16B, v5.16B, v1.16B \n\t"
@@ -2260,13 +2260,13 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
         "ST1    {v4.4S-v7.4S}, [%[c]], #64 \n\t"
         "SUBS   %[bytes], %[bytes], #64 \n\t"
         "ADD    v11.4S, v11.4S, v14.4S \n\t"
-        "BNE    L_chacha20_arm64_64_loop_%= \n\t"
+        "B.NE    L_chacha20_arm64_64_loop_%= \n\t"
         "B      L_chacha20_arm64_64_done_%= \n\t"
         "\n"
     "L_chacha20_arm64_64_lt_64_%=: \n\t"
         "ST1	{v0.4s-v3.4s}, [%[over]]\n\t"
         "CMP    %[bytes], #32 \n\t"
-        "BLT    L_chacha20_arm64_64_lt_32_%= \n\t"
+        "B.LT    L_chacha20_arm64_64_lt_32_%= \n\t"
         "LD1    {v4.4S, v5.4S}, [%[m]], #32 \n\t"
         "EOR    v4.16B, v4.16B, v0.16B \n\t"
         "EOR    v5.16B, v5.16B, v1.16B \n\t"
@@ -2274,27 +2274,27 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
         "SUBS   %[bytes], %[bytes], #32 \n\t"
         "MOV    v0.16B, v2.16B \n\t"
         "MOV    v1.16B, v3.16B \n\t"
-        "BEQ    L_chacha20_arm64_64_done_%= \n\t"
+        "B.EQ    L_chacha20_arm64_64_done_%= \n\t"
         "\n"
     "L_chacha20_arm64_64_lt_32_%=: \n\t"
         "CMP    %[bytes], #16 \n\t"
-        "BLT    L_chacha20_arm64_64_lt_16_%= \n\t"
+        "B.LT    L_chacha20_arm64_64_lt_16_%= \n\t"
         "LD1    {v4.4S}, [%[m]], #16 \n\t"
         "EOR    v4.16B, v4.16B, v0.16B \n\t"
         "ST1    {v4.4S}, [%[c]], #16 \n\t"
         "SUBS   %[bytes], %[bytes], #16 \n\t"
         "MOV    v0.16B, v1.16B \n\t"
-        "BEQ    L_chacha20_arm64_64_done_%= \n\t"
+        "B.EQ    L_chacha20_arm64_64_done_%= \n\t"
         "\n"
     "L_chacha20_arm64_64_lt_16_%=: \n\t"
         "CMP    %[bytes], #8 \n\t"
-        "BLT    L_chacha20_arm64_64_lt_8_%= \n\t"
+        "B.LT    L_chacha20_arm64_64_lt_8_%= \n\t"
         "LD1    {v4.2S}, [%[m]], #8 \n\t"
         "EOR    v4.8B, v4.8B, v0.8B \n\t"
         "ST1    {v4.2S}, [%[c]], #8 \n\t"
         "SUBS   %[bytes], %[bytes], #8 \n\t"
         "MOV    v0.D[0], v0.D[1] \n\t"
-        "BEQ    L_chacha20_arm64_64_done_%= \n\t"
+        "B.EQ    L_chacha20_arm64_64_done_%= \n\t"
         "\n"
     "L_chacha20_arm64_64_lt_8_%=: \n\t"
         "MOV	x4, v0.D[0] \n\t"
@@ -2305,7 +2305,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
         "STRB	w6, [%[c]], #1 \n\t"
         "SUBS	%[bytes], %[bytes], #1 \n\t"
         "LSR	x4, x4, #8 \n\t"
-        "BGT	L_chacha20_arm64_64_loop_lt_8_%= \n\t"
+        "B.GT	L_chacha20_arm64_64_loop_lt_8_%= \n\t"
         "\n"
     "L_chacha20_arm64_64_done_%=: \n\t"
         : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c),
@@ -2816,7 +2816,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
         "VADD.I32   q2, q2, q10          \n\t"
         "VADD.I32   q3, q3, q11          \n\t"
         "CMP        %[bytes], #64        \n\t"
-        "BLT        L_chacha20_arm32_64_lt_64_%= \n\t"
+        "B.LT        L_chacha20_arm32_64_lt_64_%= \n\t"
         /* XOR full 64 byte block */
         "VLD1.8     { q4, q5 }, [%[m]]!  \n\t"
         "VLD1.8     { q6, q7 }, [%[m]]!  \n\t"
@@ -2828,14 +2828,14 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
         "VST1.8     { q2, q3 }, [%[c]]!  \n\t"
         "SUBS       %[bytes], %[bytes], #64 \n\t"
         "VADD.I32   q11, q11, q14        \n\t"
-        "BNE        L_chacha20_arm32_64_outer_loop_%= \n\t"
+        "B.NE        L_chacha20_arm32_64_outer_loop_%= \n\t"
         "B          L_chacha20_arm32_64_done_%= \n\t"
         "\n"
     "L_chacha20_arm32_64_lt_64_%=: \n\t"
         "VSTM       %[over], {q0-q3}     \n\t"
         /* XOR 32 bytes */
         "CMP        %[bytes], #32        \n\t"
-        "BLT        L_chacha20_arm32_64_lt_32_%= \n\t"
+        "B.LT        L_chacha20_arm32_64_lt_32_%= \n\t"
         "VLD1.8     { q4, q5 }, [%[m]]!  \n\t"
         "VEOR       q4, q4, q0           \n\t"
         "VEOR       q5, q5, q1           \n\t"
@@ -2843,41 +2843,41 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
         "SUBS       %[bytes], %[bytes], #32 \n\t"
         "VMOV       q0, q2               \n\t"
         "VMOV       q1, q3               \n\t"
-        "BEQ        L_chacha20_arm32_64_done_%= \n\t"
+        "B.EQ        L_chacha20_arm32_64_done_%= \n\t"
         "\n"
     "L_chacha20_arm32_64_lt_32_%=: \n\t"
         /* XOR 16 bytes */
         "CMP        %[bytes], #16        \n\t"
-        "BLT        L_chacha20_arm32_64_lt_16_%= \n\t"
+        "B.LT        L_chacha20_arm32_64_lt_16_%= \n\t"
         "VLD1.8     { q4 }, [%[m]]!      \n\t"
         "VEOR       q4, q4, q0           \n\t"
         "VST1.8     { q4 }, [%[c]]!      \n\t"
         "SUBS       %[bytes], %[bytes], #16 \n\t"
         "VMOV       q0, q1               \n\t"
-        "BEQ        L_chacha20_arm32_64_done_%= \n\t"
+        "B.EQ        L_chacha20_arm32_64_done_%= \n\t"
         "\n"
     "L_chacha20_arm32_64_lt_16_%=: \n\t"
         /* XOR 8 bytes */
         "CMP        %[bytes], #8         \n\t"
-        "BLT        L_chacha20_arm32_64_lt_8_%= \n\t"
+        "B.LT        L_chacha20_arm32_64_lt_8_%= \n\t"
         "VLD1.8     { d8 }, [%[m]]!      \n\t"
         "VEOR       d8, d8, d0           \n\t"
         "VST1.8     { d8 }, [%[c]]!      \n\t"
         "SUBS       %[bytes], %[bytes], #8 \n\t"
         "VMOV       d0, d1               \n\t"
-        "BEQ        L_chacha20_arm32_64_done_%= \n\t"
+        "B.EQ        L_chacha20_arm32_64_done_%= \n\t"
         "\n"
     "L_chacha20_arm32_64_lt_8_%=: \n\t"
         /* XOR 4 bytes */
         "CMP        %[bytes], #4         \n\t"
-        "BLT        L_chacha20_arm32_64_lt_4_%= \n\t"
+        "B.LT        L_chacha20_arm32_64_lt_4_%= \n\t"
         "LDR        r12, [%[m]], #4      \n\t"
         "VMOV       r14, d0[0]           \n\t"
         "EOR        r12, r12, r14        \n\t"
         "STR        r12, [%[c]], #4      \n\t"
         "SUBS       %[bytes], %[bytes], #4 \n\t"
         "VSHR.U64   d0, d0, #32          \n\t"
-        "BEQ        L_chacha20_arm32_64_done_%= \n\t"
+        "B.EQ        L_chacha20_arm32_64_done_%= \n\t"
         "\n"
     "L_chacha20_arm32_64_lt_4_%=: \n\t"
         /* XOR remaining bytes */
@@ -2889,7 +2889,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
         "STRB       r12, [%[c]], #1      \n\t"
         "SUBS       %[bytes], %[bytes], #1 \n\t"
         "LSR        r14, r14, #8         \n\t"
-        "BGT        L_chacha20_arm32_64_lt_4_loop_%= \n\t"
+        "B.GT        L_chacha20_arm32_64_lt_4_loop_%= \n\t"
         "\n"
     "L_chacha20_arm32_64_done_%=: \n\t"
         : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), [bytes] "+r" (bytes)
diff --git a/wolfcrypt/src/port/arm/armv8-mlkem-asm.S b/wolfcrypt/src/port/arm/armv8-mlkem-asm.S
index cd37d6eda..9a3e1f1ba 100644
--- a/wolfcrypt/src/port/arm/armv8-mlkem-asm.S
+++ b/wolfcrypt/src/port/arm/armv8-mlkem-asm.S
@@ -29,21 +29,6 @@
 #ifdef WOLFSSL_ARMASM
 #ifdef __aarch64__
 #ifndef WOLFSSL_ARMASM_INLINE
-#ifndef __APPLE__
-	.text
-	.type	L_mlkem_aarch64_q, %object
-	.section	.rodata
-	.size	L_mlkem_aarch64_q, 16
-#else
-	.section	__DATA,__data
-#endif /* __APPLE__ */
-#ifndef __APPLE__
-	.align	2
-#else
-	.p2align	2
-#endif /* __APPLE__ */
-L_mlkem_aarch64_q:
-	.short	0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01
 #ifndef __APPLE__
 	.text
 	.type	L_mlkem_aarch64_consts, %object
@@ -59,44 +44,6 @@ L_mlkem_aarch64_q:
 #endif /* __APPLE__ */
 L_mlkem_aarch64_consts:
 	.short	0x0d01,0xf301,0x4ebf,0x0549,0x5049,0x0000,0x0000,0x0000
-#ifndef __APPLE__
-	.text
-	.type	L_sha3_aarch64_r, %object
-	.section	.rodata
-	.size	L_sha3_aarch64_r, 192
-#else
-	.section	__DATA,__data
-#endif /* __APPLE__ */
-#ifndef __APPLE__
-	.align	3
-#else
-	.p2align	3
-#endif /* __APPLE__ */
-L_sha3_aarch64_r:
-	.xword	0x0000000000000001
-	.xword	0x0000000000008082
-	.xword	0x800000000000808a
-	.xword	0x8000000080008000
-	.xword	0x000000000000808b
-	.xword	0x0000000080000001
-	.xword	0x8000000080008081
-	.xword	0x8000000000008009
-	.xword	0x000000000000008a
-	.xword	0x0000000000000088
-	.xword	0x0000000080008009
-	.xword	0x000000008000000a
-	.xword	0x000000008000808b
-	.xword	0x800000000000008b
-	.xword	0x8000000000008089
-	.xword	0x8000000000008003
-	.xword	0x8000000000008002
-	.xword	0x8000000000000080
-	.xword	0x000000000000800a
-	.xword	0x800000008000000a
-	.xword	0x8000000080008081
-	.xword	0x8000000000008080
-	.xword	0x0000000080000001
-	.xword	0x8000000080008008
 #ifdef WOLFSSL_WC_MLKEM
 #ifndef __APPLE__
 	.text
@@ -7006,6 +6953,21 @@ _mlkem_basemul_mont_add:
 #ifndef __APPLE__
 	.size	mlkem_basemul_mont_add,.-mlkem_basemul_mont_add
 #endif /* __APPLE__ */
+#ifndef __APPLE__
+	.text
+	.type	L_mlkem_aarch64_q, %object
+	.section	.rodata
+	.size	L_mlkem_aarch64_q, 16
+#else
+	.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+	.align	2
+#else
+	.p2align	2
+#endif /* __APPLE__ */
+L_mlkem_aarch64_q:
+	.short	0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01
 #ifndef __APPLE__
 .text
 .globl	mlkem_csubq_neon
@@ -9724,6 +9686,44 @@ L_mlkem_rej_uniform_done:
 #ifndef __APPLE__
 	.size	mlkem_rej_uniform_neon,.-mlkem_rej_uniform_neon
 #endif /* __APPLE__ */
+#ifndef __APPLE__
+	.text
+	.type	L_sha3_aarch64_r, %object
+	.section	.rodata
+	.size	L_sha3_aarch64_r, 192
+#else
+	.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+	.align	3
+#else
+	.p2align	3
+#endif /* __APPLE__ */
+L_sha3_aarch64_r:
+	.xword	0x0000000000000001
+	.xword	0x0000000000008082
+	.xword	0x800000000000808a
+	.xword	0x8000000080008000
+	.xword	0x000000000000808b
+	.xword	0x0000000080000001
+	.xword	0x8000000080008081
+	.xword	0x8000000000008009
+	.xword	0x000000000000008a
+	.xword	0x0000000000000088
+	.xword	0x0000000080008009
+	.xword	0x000000008000000a
+	.xword	0x000000008000808b
+	.xword	0x800000000000008b
+	.xword	0x8000000000008089
+	.xword	0x8000000000008003
+	.xword	0x8000000000008002
+	.xword	0x8000000000000080
+	.xword	0x000000000000800a
+	.xword	0x800000008000000a
+	.xword	0x8000000080008081
+	.xword	0x8000000000008080
+	.xword	0x0000000080000001
+	.xword	0x8000000080008008
 #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
 #ifndef __APPLE__
 .text
diff --git a/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c b/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c
index 33a707c01..e0061e442 100644
--- a/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c
+++ b/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c
@@ -30,29 +30,10 @@
 #ifdef WOLFSSL_ARMASM
 #ifdef __aarch64__
 #ifdef WOLFSSL_ARMASM_INLINE
-static const word16 L_mlkem_aarch64_q[] = {
-    0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01,
-};
-
 static const word16 L_mlkem_aarch64_consts[] = {
     0x0d01, 0xf301, 0x4ebf, 0x0549, 0x5049, 0x0000, 0x0000, 0x0000,
 };
 
-static const word64 L_sha3_aarch64_r[] = {
-    0x0000000000000001, 0x0000000000008082,
-    0x800000000000808a, 0x8000000080008000,
-    0x000000000000808b, 0x0000000080000001,
-    0x8000000080008081, 0x8000000000008009,
-    0x000000000000008a, 0x0000000000000088,
-    0x0000000080008009, 0x000000008000000a,
-    0x000000008000808b, 0x800000000000008b,
-    0x8000000000008089, 0x8000000000008003,
-    0x8000000000008002, 0x8000000000000080,
-    0x000000000000800a, 0x800000008000000a,
-    0x8000000080008081, 0x8000000000008080,
-    0x0000000080000001, 0x8000000080008008,
-};
-
 #include <wolfssl/wolfcrypt/wc_mlkem.h>
 
 #ifdef WOLFSSL_WC_MLKEM
@@ -1405,11 +1386,9 @@ void mlkem_ntt(sword16* r)
         "stp	q17, q18, [x1, #192]\n\t"
         "stp	q19, q20, [x1, #224]\n\t"
         : [r] "+r" (r)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]),
+          [L_mlkem_aarch64_zetas] "i" (L_mlkem_aarch64_zetas),
+          [L_mlkem_aarch64_zetas_qinv] "i" (L_mlkem_aarch64_zetas_qinv)
         : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4",
             "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
             "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
@@ -2922,13 +2901,9 @@ void mlkem_invntt(sword16* r)
         "str	q23, [x1, #208]\n\t"
         "str	q24, [x1, #240]\n\t"
         : [r] "+r" (r)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]),
+          [L_mlkem_aarch64_zetas_inv] "i" (L_mlkem_aarch64_zetas_inv),
+          [L_mlkem_aarch64_zetas_inv_qinv] "i" (L_mlkem_aarch64_zetas_inv_qinv)
         : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4",
             "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
             "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
@@ -4096,13 +4071,7 @@ void mlkem_ntt_sqrdmlsh(sword16* r)
         "stp	q17, q18, [x1, #192]\n\t"
         "stp	q19, q20, [x1, #224]\n\t"
         : [r] "+r" (r)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
         : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4",
             "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
             "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
@@ -5393,13 +5362,7 @@ void mlkem_invntt_sqrdmlsh(sword16* r)
         "str	q23, [x1, #208]\n\t"
         "str	q24, [x1, #240]\n\t"
         : [r] "+r" (r)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
         : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4",
             "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
             "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
@@ -6102,14 +6065,8 @@ void mlkem_basemul_mont(sword16* r, const sword16* a, const sword16* b)
         "zip2	v25.8h, v22.8h, v23.8h\n\t"
         "stp	q24, q25, [%x[r], #480]\n\t"
         : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]),
+          [L_mlkem_aarch64_zetas_mul] "i" (L_mlkem_aarch64_zetas_mul)
         : "memory", "cc", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
             "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16",
             "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
@@ -6840,14 +6797,8 @@ void mlkem_basemul_mont_add(sword16* r, const sword16* a, const sword16* b)
         "add	v29.8h, v29.8h, v25.8h\n\t"
         "stp	q28, q29, [%x[r], #480]\n\t"
         : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]),
+          [L_mlkem_aarch64_zetas_mul] "i" (L_mlkem_aarch64_zetas_mul)
         : "memory", "cc", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
             "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16",
             "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
@@ -6855,6 +6806,10 @@ void mlkem_basemul_mont_add(sword16* r, const sword16* a, const sword16* b)
     );
 }
 
+static const word16 L_mlkem_aarch64_q[] = {
+    0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01,
+};
+
 void mlkem_csubq_neon(sword16* p)
 {
     __asm__ __volatile__ (
@@ -7013,14 +6968,7 @@ void mlkem_csubq_neon(sword16* p)
         "st4	{v8.8h, v9.8h, v10.8h, v11.8h}, [%x[p]], #0x40\n\t"
         "st4	{v12.8h, v13.8h, v14.8h, v15.8h}, [%x[p]], #0x40\n\t"
         : [p] "+r" (p)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [L_mlkem_aarch64_q] "i" (L_mlkem_aarch64_q)
         : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
             "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
             "v18", "v19", "v20"
@@ -7195,14 +7143,7 @@ void mlkem_add_reduce(sword16* r, const sword16* a)
         "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
         "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
         : [r] "+r" (r), [a] "+r" (a)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
         : "memory", "cc", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
             "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
             "v18"
@@ -7417,14 +7358,7 @@ void mlkem_add3_reduce(sword16* r, const sword16* a, const sword16* b)
         "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
         "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
         : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
         : "memory", "cc", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
             "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
             "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26"
@@ -7599,14 +7533,7 @@ void mlkem_rsub_reduce(sword16* r, const sword16* a)
         "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
         "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
         : [r] "+r" (r), [a] "+r" (a)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
         : "memory", "cc", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
             "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
             "v18"
@@ -7803,14 +7730,7 @@ void mlkem_to_mont(sword16* p)
         "st4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t"
         "st4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t"
         : [p] "+r" (p)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
         : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
             "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
             "v18"
@@ -7976,14 +7896,7 @@ void mlkem_to_mont_sqrdmlsh(sword16* p)
         "st4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t"
         "st4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t"
         : [p] "+r" (p)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
         : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
             "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
             "v18"
@@ -8231,17 +8144,9 @@ void mlkem_to_msg_neon(byte* msg, sword16* p)
         "ins	v18.b[7], v25.b[0]\n\t"
         "st1	{v18.8b}, [%x[msg]], #8\n\t"
         : [msg] "+r" (msg), [p] "+r" (p)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits)
+        : [L_mlkem_to_msg_low] "i" (L_mlkem_to_msg_low),
+          [L_mlkem_to_msg_high] "i" (L_mlkem_to_msg_high),
+          [L_mlkem_to_msg_bits] "i" (L_mlkem_to_msg_bits)
         : "memory", "cc", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5",
             "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
             "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24",
@@ -8415,19 +8320,8 @@ void mlkem_from_msg_neon(sword16* p, const byte* msg)
         "and	v7.16b, v7.16b, v1.16b\n\t"
         "st1	{v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t"
         : [p] "+r" (p), [msg] "+r" (msg)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits)
+        : [L_mlkem_from_msg_q1half] "i" (L_mlkem_from_msg_q1half),
+          [L_mlkem_from_msg_bits] "i" (L_mlkem_from_msg_bits)
         : "memory", "cc", "x2", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
             "v7", "v8", "v9", "v10", "v11"
     );
@@ -8693,19 +8587,7 @@ int mlkem_cmp_neon(const byte* a, const byte* b, int sz)
         "subs	x0, x0, xzr\n\t"
         "csetm	w0, ne\n\t"
         : [a] "+r" (a), [b] "+r" (b), [sz] "+r" (sz)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits)
+        :
         : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
             "v9", "v10", "v11"
     );
@@ -9410,22 +9292,10 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r,
     "L_mlkem_rej_uniform_done_%=: \n\t"
         "mov	x0, x12\n\t"
         : [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
-          [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
-          [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
-          [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
+        : [%[L_mlkem_aarch64_q]] "i" (%[L_mlkem_aarch64_q]),
+          [L_mlkem_rej_uniform_mask] "i" (L_mlkem_rej_uniform_mask),
+          [L_mlkem_rej_uniform_bits] "i" (L_mlkem_rej_uniform_bits),
+          [L_mlkem_rej_uniform_indices] "i" (L_mlkem_rej_uniform_indices)
         : "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11",
             "x12", "x13", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
             "v9", "v10", "v11", "v12", "v13"
@@ -9433,6 +9303,21 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r,
     return (word32)(size_t)p;
 }
 
+static const word64 L_sha3_aarch64_r[] = {
+    0x0000000000000001, 0x0000000000008082,
+    0x800000000000808a, 0x8000000080008000,
+    0x000000000000808b, 0x0000000080000001,
+    0x8000000080008081, 0x8000000000008009,
+    0x000000000000008a, 0x0000000000000088,
+    0x0000000080008009, 0x000000008000000a,
+    0x000000008000808b, 0x800000000000008b,
+    0x8000000000008089, 0x8000000000008003,
+    0x8000000000008002, 0x8000000000000080,
+    0x000000000000800a, 0x800000008000000a,
+    0x8000000080008081, 0x8000000000008080,
+    0x0000000080000001, 0x8000000080008008,
+};
+
 #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
 void mlkem_sha3_blocksx3_neon(word64* state)
 {
@@ -9728,22 +9613,7 @@ void mlkem_sha3_blocksx3_neon(word64* state)
         "str	x26, [%x[state], #192]\n\t"
         "ldp	x29, x30, [sp], #0x40\n\t"
         : [state] "+r" (state)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
-          [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
-          [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
-          [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
+        : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
         : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
             "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19",
             "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0",
@@ -10070,22 +9940,7 @@ void mlkem_shake128_blocksx3_seed_neon(word64* state, byte* seed)
         "str	x27, [%x[state], #192]\n\t"
         "ldp	x29, x30, [sp], #0x40\n\t"
         : [state] "+r" (state), [seed] "+r" (seed)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
-          [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
-          [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
-          [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
+        : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
         : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
             "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
             "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1",
@@ -10412,22 +10267,7 @@ void mlkem_shake256_blocksx3_seed_neon(word64* state, byte* seed)
         "str	x27, [%x[state], #192]\n\t"
         "ldp	x29, x30, [sp], #0x40\n\t"
         : [state] "+r" (state), [seed] "+r" (seed)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
-          [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
-          [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
-          [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
+        : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
         : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
             "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
             "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1",
@@ -10818,22 +10658,7 @@ void mlkem_sha3_blocksx3_neon(word64* state)
         "str	x26, [%x[state], #192]\n\t"
         "ldp	x29, x30, [sp], #0x40\n\t"
         : [state] "+r" (state)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
-          [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
-          [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
-          [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
+        : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
         : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
             "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19",
             "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0",
@@ -11245,22 +11070,7 @@ void mlkem_shake128_blocksx3_seed_neon(word64* state, byte* seed)
         "str	x27, [%x[state], #192]\n\t"
         "ldp	x29, x30, [sp], #0x40\n\t"
         : [state] "+r" (state), [seed] "+r" (seed)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
-          [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
-          [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
-          [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
+        : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
         : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
             "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
             "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1",
@@ -11672,22 +11482,7 @@ void mlkem_shake256_blocksx3_seed_neon(word64* state, byte* seed)
         "str	x27, [%x[state], #192]\n\t"
         "ldp	x29, x30, [sp], #0x40\n\t"
         : [state] "+r" (state), [seed] "+r" (seed)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
-          [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
-          [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
-          [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
+        : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
         : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
             "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
             "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1",
diff --git a/wolfcrypt/src/port/arm/armv8-poly1305.c b/wolfcrypt/src/port/arm/armv8-poly1305.c
index f3c447dcd..1977f4d58 100644
--- a/wolfcrypt/src/port/arm/armv8-poly1305.c
+++ b/wolfcrypt/src/port/arm/armv8-poly1305.c
@@ -49,7 +49,7 @@ static WC_INLINE void poly1305_blocks_aarch64_16(Poly1305* ctx,
     __asm__ __volatile__ (
         /* Check for zero bytes to do. */
         "CMP        %[bytes], #16 \n\t"
-        "BLO        L_poly1305_aarch64_16_done_%= \n\t"
+        "B.LO       L_poly1305_aarch64_16_done_%= \n\t"
 
         "MOV        x12, #1               \n\t"
         /* Load h */
@@ -129,7 +129,7 @@ static WC_INLINE void poly1305_blocks_aarch64_16(Poly1305* ctx,
 
         "SUBS       %[bytes], %[bytes], #16\n\t"
         "ADD        %[m], %[m], #16\n\t"
-        "BGT        L_poly1305_aarch64_16_loop_%=\n\t"
+        "B.GT       L_poly1305_aarch64_16_loop_%=\n\t"
 
         /* Base 64 -> Base 26 */
         "MOV        x10, #0x3ffffff\n\t"
@@ -146,8 +146,7 @@ static WC_INLINE void poly1305_blocks_aarch64_16(Poly1305* ctx,
         ".align 2 \n\t"
     "L_poly1305_aarch64_16_done_%=: \n\t"
         : [bytes] "+r" (bytes), [m] "+r" (m)
-        : [POLY1305_BLOCK_SIZE] "I" (POLY1305_BLOCK_SIZE),
-          [ctx_r64] "m" (ctx->r64[0]), [ctx_h] "r" (ctx->h),
+        : [ctx_r64] "m" (ctx->r64[0]), [ctx_h] "r" (ctx->h),
           [finished] "r" ((word64)ctx->finished)
         : "memory", "cc",
           "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
@@ -161,7 +160,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
     __asm__ __volatile__ (
         /* If less than 4 blocks to process then use regular method */
         "CMP        %[bytes], #64 \n\t"
-        "BLO        L_poly1305_aarch64_64_done_%= \n\t"
+        "B.LO       L_poly1305_aarch64_64_done_%= \n\t"
         "MOV        x9, #0x3ffffff       \n\t"
         /* Load h */
         "LDP        x20, x22, [%[h]]     \n\t"
@@ -189,7 +188,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
         "MOV        v26.D[1], x9         \n\t"
         "DUP        v30.4S, v26.S[0]     \n\t"
         "CMP        %[bytes], #96 \n\t"
-        "BLO        L_poly1305_aarch64_64_start_block_size_64_%= \n\t"
+        "B.LO       L_poly1305_aarch64_64_start_block_size_64_%= \n\t"
         /* Load r^2 to NEON v0, v1, v2, v3, v4 */
         "LD4        { v0.S-v3.S }[2], [%[r_2]], #16 \n\t"
         "LD1        { v4.S }[2], [%[r_2]] \n\t"
@@ -363,7 +362,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
         "UMLAL2     v25.2D, v14.4S, v0.4S \n\t"
         /* If less than six message blocks left then leave loop */
         "CMP        %[bytes], #96 \n\t"
-        "BLS        L_poly1305_aarch64_64_loop_128_final_%= \n\t"
+        "B.LS       L_poly1305_aarch64_64_loop_128_final_%= \n\t"
         /* Load m */
         /* Load four message blocks to NEON v10, v11, v12, v13, v14 */
         "LD4        { v10.4S-v13.4S }, [%[m]], #64 \n\t"
@@ -493,7 +492,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
         "MOV        v19.S[1], v19.S[2]   \n\t"
         /* If less than 2 blocks left go straight to final multiplication. */
         "CMP        %[bytes], #32 \n\t"
-        "BLO        L_poly1305_aarch64_64_last_mult_%= \n\t"
+        "B.LO       L_poly1305_aarch64_64_last_mult_%= \n\t"
         /* Else go to one loop of L_poly1305_aarch64_64_loop_64 */
         "B          L_poly1305_aarch64_64_loop_64_%= \n\t"
         "\n"
@@ -677,7 +676,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
         "MOV        v19.S[1], v19.S[2]   \n\t"
         /* If at least two message blocks left then loop_64 */
         "CMP        %[bytes], #32 \n\t"
-        "BHS        L_poly1305_aarch64_64_loop_64_%= \n\t"
+        "B.HS       L_poly1305_aarch64_64_loop_64_%= \n\t"
         "\n"
         ".align 2 \n\t"
     "L_poly1305_aarch64_64_last_mult_%=: \n\t"
@@ -821,8 +820,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
         : [bytes] "+r" (bytes),
           [m] "+r" (m),
           [ctx] "+m" (ctx)
-        : [POLY1305_BLOCK_SIZE] "I" (POLY1305_BLOCK_SIZE),
-          [h] "r" (ctx->h),
+        : [h] "r" (ctx->h),
           [r] "r" (ctx->r),
           [r_2] "r" (ctx->r_2),
           [r_4] "r" (ctx->r_4),
diff --git a/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c b/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c
index 8603b6e57..ab0fd89c7 100644
--- a/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c
+++ b/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c
@@ -162,7 +162,7 @@ void BlockSha3_crypto(word64* state)
         "st4	{v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t"
         "st1	{v24.1d}, [%x[state]]\n\t"
         : [state] "+r" (state)
-        : [L_SHA3_transform_crypto_r] "S" (L_SHA3_transform_crypto_r)
+        : [L_SHA3_transform_crypto_r] "i" (L_SHA3_transform_crypto_r)
         : "memory", "cc", "x1", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
             "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16",
             "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
@@ -369,7 +369,7 @@ void BlockSha3_base(word64* state)
         "str	x26, [%x[state], #192]\n\t"
         "ldp	x29, x30, [sp], #0x40\n\t"
         : [state] "+r" (state)
-        : [L_SHA3_transform_base_r] "S" (L_SHA3_transform_base_r)
+        : [L_SHA3_transform_base_r] "i" (L_SHA3_transform_base_r)
         : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
             "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19",
             "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"
diff --git a/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c
index 6a5cebb3c..1f5559dd2 100644
--- a/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c
+++ b/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c
@@ -1004,8 +1004,8 @@ void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len)
         "stp	x8, x9, [%x[sha512], #32]\n\t"
         "stp	x10, x11, [%x[sha512], #48]\n\t"
         : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
-        : [L_SHA512_transform_neon_len_k] "S" (L_SHA512_transform_neon_len_k),
-          [L_SHA512_transform_neon_len_r8] "S" (L_SHA512_transform_neon_len_r8)
+        : [L_SHA512_transform_neon_len_k] "i" (L_SHA512_transform_neon_len_k),
+          [L_SHA512_transform_neon_len_r8] "i" (L_SHA512_transform_neon_len_r8)
         : "memory", "cc", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
             "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
             "x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2",
@@ -1580,7 +1580,7 @@ void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data,
         /* Store digest back */
         "st1	{v24.2d, v25.2d, v26.2d, v27.2d}, [%x[sha512]]\n\t"
         : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
-        : [L_SHA512_trans_crypto_len_k] "S" (L_SHA512_trans_crypto_len_k)
+        : [L_SHA512_trans_crypto_len_k] "i" (L_SHA512_trans_crypto_len_k)
         : "memory", "cc", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
             "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16",
             "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
diff --git a/wolfcrypt/src/port/arm/armv8-sha512.c b/wolfcrypt/src/port/arm/armv8-sha512.c
index dfc679df0..6c1f18515 100644
--- a/wolfcrypt/src/port/arm/armv8-sha512.c
+++ b/wolfcrypt/src/port/arm/armv8-sha512.c
@@ -647,9 +647,7 @@ static int Sha512_Family_Final(wc_Sha512* sha512, byte* hash,
 {
     int ret;
     int digestSz;
-    int (*initfp)(wc_Sha512*);
-
-    (void)initfp;
+    int (*initfp)(wc_Sha512*) = NULL;
 
     if (sha512 == NULL || hash == NULL) {
         return BAD_FUNC_ARG;