Green Hills compiler fixes

internal.c: Move non-enumeration value out of switch. ssl.c: Only declare globalRNGMutex when required. x509.c: initialize ret armv8-aes.c, armv8-chacha.c: fix branch instructions armv8-mlkem*: ensure only required constants are input operands and move constants closer to first use. armv8-poly1305.c: remove POLY1305_BLOCK_SIZE from input operands. armv8-sha3-asm_c.c, armv8-sha512-asm_c.c: use constraint ':' instead of 'S'. armv8-sha512.c: initialize initfp. Is always used.
2025-05-20 10:55:26 +10:00 · 2025-05-20 10:55:26 +10:00 · fc1d281268
parent f8bb889712
commit fc1d281268
11 changed files with 377 additions and 584 deletions
--- a/src/internal.c
+++ b/src/internal.c
@ -26488,7 +26488,7 @@ const char* wolfSSL_ERR_reason_error_string(unsigned long e)
        return "peer ip address mismatch";

    case WANT_READ :
-    case -WOLFSSL_ERROR_WANT_READ :
+    case WOLFSSL_ERROR_WANT_READ_E :
        return "non-blocking socket wants data to be read";

    case NOT_READY_ERROR :
@ -26498,17 +26498,17 @@ const char* wolfSSL_ERR_reason_error_string(unsigned long e)
        return "record layer version error";

    case WANT_WRITE :
-    case -WOLFSSL_ERROR_WANT_WRITE :
+    case WOLFSSL_ERROR_WANT_WRITE_E :
        return "non-blocking socket write buffer full";

-    case -WOLFSSL_ERROR_WANT_CONNECT:
-    case -WOLFSSL_ERROR_WANT_ACCEPT:
+    case WOLFSSL_ERROR_WANT_CONNECT_E :
+    case WOLFSSL_ERROR_WANT_ACCEPT_E :
        return "The underlying BIO was not yet connected";

-    case -WOLFSSL_ERROR_SYSCALL:
+    case WOLFSSL_ERROR_SYSCALL_E :
        return "fatal I/O error in TLS layer";

-    case -WOLFSSL_ERROR_WANT_X509_LOOKUP:
+    case WOLFSSL_ERROR_WANT_X509_LOOKUP_E :
        return "application client cert callback asked to be called again";

    case BUFFER_ERROR :
@ -26548,7 +26548,7 @@ const char* wolfSSL_ERR_reason_error_string(unsigned long e)
        return "can't decode peer key";

    case ZERO_RETURN:
-    case -WOLFSSL_ERROR_ZERO_RETURN:
+    case WOLFSSL_ERROR_ZERO_RETURN_E :
        return "peer sent close notify alert";

    case ECC_CURVETYPE_ERROR:
--- a/src/ssl.c
+++ b/src/ssl.c
@ -234,8 +234,10 @@ static struct SystemCryptoPolicy crypto_policy;
 static WC_RNG globalRNG;
 static volatile int initGlobalRNG = 0;

+#if defined(OPENSSL_EXTRA) || !defined(WOLFSSL_MUTEX_INITIALIZER)
 static WC_MAYBE_UNUSED wolfSSL_Mutex globalRNGMutex
    WOLFSSL_MUTEX_INITIALIZER_CLAUSE(globalRNGMutex);
+#endif
 #ifndef WOLFSSL_MUTEX_INITIALIZER
 static int globalRNGMutex_valid = 0;
 #endif
--- a/src/x509.c
+++ b/src/x509.c
@ -5507,7 +5507,7 @@ int wolfSSL_X509_NAME_get_text_by_NID(WOLFSSL_X509_NAME* name,
 WOLFSSL_EVP_PKEY* wolfSSL_X509_get_pubkey(WOLFSSL_X509* x509)
 {
    WOLFSSL_EVP_PKEY* key = NULL;
-    int ret;
+    int ret = 0;

    (void)ret;

--- a/wolfcrypt/src/port/arm/armv8-aes.c
+++ b/wolfcrypt/src/port/arm/armv8-aes.c
--- a/wolfcrypt/src/port/arm/armv8-chacha.c
+++ b/wolfcrypt/src/port/arm/armv8-chacha.c
@ -556,7 +556,7 @@ static WC_INLINE void wc_Chacha_encrypt_320(const word32* input, const byte* m,
        "SRI    v6.4s, v17.4s, #25 \n\t"
        "SRI    v7.4s, v18.4s, #25 \n\t"
        "SRI    v4.4s, v19.4s, #25 \n\t"
-        "BNE    L_chacha20_arm64_inner_%= \n\t"
+        "B.NE    L_chacha20_arm64_inner_%= \n\t"
        /* Add counter now rather than after transposed */
        "ADD    v12.4s, v12.4s, v28.4s \n\t"
        "ADD    w16, w16, w21 \n\t"
@ -666,7 +666,7 @@ static WC_INLINE void wc_Chacha_encrypt_320(const word32* input, const byte* m,
        "ST1    {v16.4s-v19.4s}, [%[c]], #64 \n\t"
        "SUBS   %[bytes], %[bytes], #320 \n\t"
        "ADD    v28.4s, v28.4s, v29.4s \n\t"
-        "BNE    L_chacha20_arm64_outer_%= \n\t"
+        "B.NE    L_chacha20_arm64_outer_%= \n\t"
        : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c),
          [bytes] "+r" (bytes64)
        : [L_chacha20_neon_add_all_cntrs] "r" (L_chacha20_neon_add_all_cntrs),
@ -959,7 +959,7 @@ static WC_INLINE int wc_Chacha_encrypt_256(
        "EXT    v9.16B, v9.16B, v9.16B, #12 \n\t"
        "EXT    v10.16B, v10.16B, v10.16B, #8 \n\t"
        "EXT    v11.16B, v11.16B, v11.16B, #4 \n\t"
-        "BNE    L_chacha20_arm64_256_loop_%= \n\t"
+        "B.NE    L_chacha20_arm64_256_loop_%= \n\t"
        /* Load message */
        "LD1    {v16.4S-v19.4S}, [%[m]], #64 \n\t"
        /* Add one (2 added during calculating vector results) */
@ -1364,7 +1364,7 @@ static WC_INLINE int wc_Chacha_encrypt_256(
        "ROR r4, r4, #25 \n\t" // 4 4
        "VEXT.8 q11, q11, q11, #4 \n\t" // permute elements left by one

-        "BNE L_chacha20_arm32_256_loop_%= \n\t"
+        "B.NE L_chacha20_arm32_256_loop_%= \n\t"

        // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12
        //  0  1  2  3  4  5  6  7  8  9  12  13  14
@ -1583,7 +1583,7 @@ static WC_INLINE int wc_Chacha_encrypt_128(
        "EXT    v5.16B, v5.16B, v5.16B, #12 \n\t"
        "EXT    v6.16B, v6.16B, v6.16B, #8 \n\t"
        "EXT    v7.16B, v7.16B, v7.16B, #4 \n\t"
-        "BNE    L_chacha20_arm64_128_loop_%= \n\t"
+        "B.NE    L_chacha20_arm64_128_loop_%= \n\t"
        /* Add back state, XOR in message and store (load next block) */
        "ADD    v0.4S, v0.4S, v18.4S \n\t"
        "ADD    v1.4S, v1.4S, v19.4S \n\t"
@ -1736,7 +1736,7 @@ static WC_INLINE int wc_Chacha_encrypt_128(
        "VEXT.8 q6, q6, q6, #8 \n\t" // permute elements left by two
        "VEXT.8 q7, q7, q7, #4 \n\t" // permute elements left by one

-        "BNE L_chacha20_arm32_128_loop_%= \n\t"
+        "B.NE L_chacha20_arm32_128_loop_%= \n\t"

        "VMOV.I32 q8, #0 \n\t"
        "VADD.I32 q0, q0, q10 \n\t"
@ -2251,7 +2251,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
        "ADD    v2.4S, v2.4S, v10.4S \n\t"
        "ADD    v3.4S, v3.4S, v11.4S \n\t"
        "CMP    %[bytes], #64 \n\t"
-        "BLT    L_chacha20_arm64_64_lt_64_%= \n\t"
+        "B.LT    L_chacha20_arm64_64_lt_64_%= \n\t"
        "LD1    {v4.4S-v7.4S}, [%[m]], #64 \n\t"
        "EOR    v4.16B, v4.16B, v0.16B \n\t"
        "EOR    v5.16B, v5.16B, v1.16B \n\t"
@ -2260,13 +2260,13 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
        "ST1    {v4.4S-v7.4S}, [%[c]], #64 \n\t"
        "SUBS   %[bytes], %[bytes], #64 \n\t"
        "ADD    v11.4S, v11.4S, v14.4S \n\t"
-        "BNE    L_chacha20_arm64_64_loop_%= \n\t"
+        "B.NE    L_chacha20_arm64_64_loop_%= \n\t"
        "B      L_chacha20_arm64_64_done_%= \n\t"
        "\n"
    "L_chacha20_arm64_64_lt_64_%=: \n\t"
        "ST1	{v0.4s-v3.4s}, [%[over]]\n\t"
        "CMP    %[bytes], #32 \n\t"
-        "BLT    L_chacha20_arm64_64_lt_32_%= \n\t"
+        "B.LT    L_chacha20_arm64_64_lt_32_%= \n\t"
        "LD1    {v4.4S, v5.4S}, [%[m]], #32 \n\t"
        "EOR    v4.16B, v4.16B, v0.16B \n\t"
        "EOR    v5.16B, v5.16B, v1.16B \n\t"
@ -2274,27 +2274,27 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
        "SUBS   %[bytes], %[bytes], #32 \n\t"
        "MOV    v0.16B, v2.16B \n\t"
        "MOV    v1.16B, v3.16B \n\t"
-        "BEQ    L_chacha20_arm64_64_done_%= \n\t"
+        "B.EQ    L_chacha20_arm64_64_done_%= \n\t"
        "\n"
    "L_chacha20_arm64_64_lt_32_%=: \n\t"
        "CMP    %[bytes], #16 \n\t"
-        "BLT    L_chacha20_arm64_64_lt_16_%= \n\t"
+        "B.LT    L_chacha20_arm64_64_lt_16_%= \n\t"
        "LD1    {v4.4S}, [%[m]], #16 \n\t"
        "EOR    v4.16B, v4.16B, v0.16B \n\t"
        "ST1    {v4.4S}, [%[c]], #16 \n\t"
        "SUBS   %[bytes], %[bytes], #16 \n\t"
        "MOV    v0.16B, v1.16B \n\t"
-        "BEQ    L_chacha20_arm64_64_done_%= \n\t"
+        "B.EQ    L_chacha20_arm64_64_done_%= \n\t"
        "\n"
    "L_chacha20_arm64_64_lt_16_%=: \n\t"
        "CMP    %[bytes], #8 \n\t"
-        "BLT    L_chacha20_arm64_64_lt_8_%= \n\t"
+        "B.LT    L_chacha20_arm64_64_lt_8_%= \n\t"
        "LD1    {v4.2S}, [%[m]], #8 \n\t"
        "EOR    v4.8B, v4.8B, v0.8B \n\t"
        "ST1    {v4.2S}, [%[c]], #8 \n\t"
        "SUBS   %[bytes], %[bytes], #8 \n\t"
        "MOV    v0.D[0], v0.D[1] \n\t"
-        "BEQ    L_chacha20_arm64_64_done_%= \n\t"
+        "B.EQ    L_chacha20_arm64_64_done_%= \n\t"
        "\n"
    "L_chacha20_arm64_64_lt_8_%=: \n\t"
        "MOV	x4, v0.D[0] \n\t"
@ -2305,7 +2305,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
        "STRB	w6, [%[c]], #1 \n\t"
        "SUBS	%[bytes], %[bytes], #1 \n\t"
        "LSR	x4, x4, #8 \n\t"
-        "BGT	L_chacha20_arm64_64_loop_lt_8_%= \n\t"
+        "B.GT	L_chacha20_arm64_64_loop_lt_8_%= \n\t"
        "\n"
    "L_chacha20_arm64_64_done_%=: \n\t"
        : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c),
@ -2816,7 +2816,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
        "VADD.I32   q2, q2, q10          \n\t"
        "VADD.I32   q3, q3, q11          \n\t"
        "CMP        %[bytes], #64        \n\t"
-        "BLT        L_chacha20_arm32_64_lt_64_%= \n\t"
+        "B.LT        L_chacha20_arm32_64_lt_64_%= \n\t"
        /* XOR full 64 byte block */
        "VLD1.8     { q4, q5 }, [%[m]]!  \n\t"
        "VLD1.8     { q6, q7 }, [%[m]]!  \n\t"
@ -2828,14 +2828,14 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
        "VST1.8     { q2, q3 }, [%[c]]!  \n\t"
        "SUBS       %[bytes], %[bytes], #64 \n\t"
        "VADD.I32   q11, q11, q14        \n\t"
-        "BNE        L_chacha20_arm32_64_outer_loop_%= \n\t"
+        "B.NE        L_chacha20_arm32_64_outer_loop_%= \n\t"
        "B          L_chacha20_arm32_64_done_%= \n\t"
        "\n"
    "L_chacha20_arm32_64_lt_64_%=: \n\t"
        "VSTM       %[over], {q0-q3}     \n\t"
        /* XOR 32 bytes */
        "CMP        %[bytes], #32        \n\t"
-        "BLT        L_chacha20_arm32_64_lt_32_%= \n\t"
+        "B.LT        L_chacha20_arm32_64_lt_32_%= \n\t"
        "VLD1.8     { q4, q5 }, [%[m]]!  \n\t"
        "VEOR       q4, q4, q0           \n\t"
        "VEOR       q5, q5, q1           \n\t"
@ -2843,41 +2843,41 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
        "SUBS       %[bytes], %[bytes], #32 \n\t"
        "VMOV       q0, q2               \n\t"
        "VMOV       q1, q3               \n\t"
-        "BEQ        L_chacha20_arm32_64_done_%= \n\t"
+        "B.EQ        L_chacha20_arm32_64_done_%= \n\t"
        "\n"
    "L_chacha20_arm32_64_lt_32_%=: \n\t"
        /* XOR 16 bytes */
        "CMP        %[bytes], #16        \n\t"
-        "BLT        L_chacha20_arm32_64_lt_16_%= \n\t"
+        "B.LT        L_chacha20_arm32_64_lt_16_%= \n\t"
        "VLD1.8     { q4 }, [%[m]]!      \n\t"
        "VEOR       q4, q4, q0           \n\t"
        "VST1.8     { q4 }, [%[c]]!      \n\t"
        "SUBS       %[bytes], %[bytes], #16 \n\t"
        "VMOV       q0, q1               \n\t"
-        "BEQ        L_chacha20_arm32_64_done_%= \n\t"
+        "B.EQ        L_chacha20_arm32_64_done_%= \n\t"
        "\n"
    "L_chacha20_arm32_64_lt_16_%=: \n\t"
        /* XOR 8 bytes */
        "CMP        %[bytes], #8         \n\t"
-        "BLT        L_chacha20_arm32_64_lt_8_%= \n\t"
+        "B.LT        L_chacha20_arm32_64_lt_8_%= \n\t"
        "VLD1.8     { d8 }, [%[m]]!      \n\t"
        "VEOR       d8, d8, d0           \n\t"
        "VST1.8     { d8 }, [%[c]]!      \n\t"
        "SUBS       %[bytes], %[bytes], #8 \n\t"
        "VMOV       d0, d1               \n\t"
-        "BEQ        L_chacha20_arm32_64_done_%= \n\t"
+        "B.EQ        L_chacha20_arm32_64_done_%= \n\t"
        "\n"
    "L_chacha20_arm32_64_lt_8_%=: \n\t"
        /* XOR 4 bytes */
        "CMP        %[bytes], #4         \n\t"
-        "BLT        L_chacha20_arm32_64_lt_4_%= \n\t"
+        "B.LT        L_chacha20_arm32_64_lt_4_%= \n\t"
        "LDR        r12, [%[m]], #4      \n\t"
        "VMOV       r14, d0[0]           \n\t"
        "EOR        r12, r12, r14        \n\t"
        "STR        r12, [%[c]], #4      \n\t"
        "SUBS       %[bytes], %[bytes], #4 \n\t"
        "VSHR.U64   d0, d0, #32          \n\t"
-        "BEQ        L_chacha20_arm32_64_done_%= \n\t"
+        "B.EQ        L_chacha20_arm32_64_done_%= \n\t"
        "\n"
    "L_chacha20_arm32_64_lt_4_%=: \n\t"
        /* XOR remaining bytes */
@ -2889,7 +2889,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
        "STRB       r12, [%[c]], #1      \n\t"
        "SUBS       %[bytes], %[bytes], #1 \n\t"
        "LSR        r14, r14, #8         \n\t"
-        "BGT        L_chacha20_arm32_64_lt_4_loop_%= \n\t"
+        "B.GT        L_chacha20_arm32_64_lt_4_loop_%= \n\t"
        "\n"
    "L_chacha20_arm32_64_done_%=: \n\t"
        : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), [bytes] "+r" (bytes)
--- a/wolfcrypt/src/port/arm/armv8-mlkem-asm.S
+++ b/wolfcrypt/src/port/arm/armv8-mlkem-asm.S
@ -29,21 +29,6 @@
 #ifdef WOLFSSL_ARMASM
 #ifdef __aarch64__
 #ifndef WOLFSSL_ARMASM_INLINE
-#ifndef __APPLE__
-	.text
-	.type	L_mlkem_aarch64_q, %object
-	.section	.rodata
-	.size	L_mlkem_aarch64_q, 16
-#else
-	.section	__DATA,__data
-#endif /* __APPLE__ */
-#ifndef __APPLE__
-	.align	2
-#else
-	.p2align	2
-#endif /* __APPLE__ */
-L_mlkem_aarch64_q:
-	.short	0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01
 #ifndef __APPLE__
 	.text
 	.type	L_mlkem_aarch64_consts, %object
@ -59,44 +44,6 @@ L_mlkem_aarch64_q:
 #endif /* __APPLE__ */
 L_mlkem_aarch64_consts:
 	.short	0x0d01,0xf301,0x4ebf,0x0549,0x5049,0x0000,0x0000,0x0000
-#ifndef __APPLE__
-	.text
-	.type	L_sha3_aarch64_r, %object
-	.section	.rodata
-	.size	L_sha3_aarch64_r, 192
-#else
-	.section	__DATA,__data
-#endif /* __APPLE__ */
-#ifndef __APPLE__
-	.align	3
-#else
-	.p2align	3
-#endif /* __APPLE__ */
-L_sha3_aarch64_r:
-	.xword	0x0000000000000001
-	.xword	0x0000000000008082
-	.xword	0x800000000000808a
-	.xword	0x8000000080008000
-	.xword	0x000000000000808b
-	.xword	0x0000000080000001
-	.xword	0x8000000080008081
-	.xword	0x8000000000008009
-	.xword	0x000000000000008a
-	.xword	0x0000000000000088
-	.xword	0x0000000080008009
-	.xword	0x000000008000000a
-	.xword	0x000000008000808b
-	.xword	0x800000000000008b
-	.xword	0x8000000000008089
-	.xword	0x8000000000008003
-	.xword	0x8000000000008002
-	.xword	0x8000000000000080
-	.xword	0x000000000000800a
-	.xword	0x800000008000000a
-	.xword	0x8000000080008081
-	.xword	0x8000000000008080
-	.xword	0x0000000080000001
-	.xword	0x8000000080008008
 #ifdef WOLFSSL_WC_MLKEM
 #ifndef __APPLE__
 	.text
@ -7006,6 +6953,21 @@ _mlkem_basemul_mont_add:
 #ifndef __APPLE__
 	.size	mlkem_basemul_mont_add,.-mlkem_basemul_mont_add
 #endif /* __APPLE__ */
+#ifndef __APPLE__
+	.text
+	.type	L_mlkem_aarch64_q, %object
+	.section	.rodata
+	.size	L_mlkem_aarch64_q, 16
+#else
+	.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+	.align	2
+#else
+	.p2align	2
+#endif /* __APPLE__ */
+L_mlkem_aarch64_q:
+	.short	0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01
 #ifndef __APPLE__
 .text
 .globl	mlkem_csubq_neon
@ -9724,6 +9686,44 @@ L_mlkem_rej_uniform_done:
 #ifndef __APPLE__
 	.size	mlkem_rej_uniform_neon,.-mlkem_rej_uniform_neon
 #endif /* __APPLE__ */
+#ifndef __APPLE__
+	.text
+	.type	L_sha3_aarch64_r, %object
+	.section	.rodata
+	.size	L_sha3_aarch64_r, 192
+#else
+	.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+	.align	3
+#else
+	.p2align	3
+#endif /* __APPLE__ */
+L_sha3_aarch64_r:
+	.xword	0x0000000000000001
+	.xword	0x0000000000008082
+	.xword	0x800000000000808a
+	.xword	0x8000000080008000
+	.xword	0x000000000000808b
+	.xword	0x0000000080000001
+	.xword	0x8000000080008081
+	.xword	0x8000000000008009
+	.xword	0x000000000000008a
+	.xword	0x0000000000000088
+	.xword	0x0000000080008009
+	.xword	0x000000008000000a
+	.xword	0x000000008000808b
+	.xword	0x800000000000008b
+	.xword	0x8000000000008089
+	.xword	0x8000000000008003
+	.xword	0x8000000000008002
+	.xword	0x8000000000000080
+	.xword	0x000000000000800a
+	.xword	0x800000008000000a
+	.xword	0x8000000080008081
+	.xword	0x8000000000008080
+	.xword	0x0000000080000001
+	.xword	0x8000000080008008
 #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
 #ifndef __APPLE__
 .text
--- a/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c
+++ b/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c
@ -30,29 +30,10 @@
 #ifdef WOLFSSL_ARMASM
 #ifdef __aarch64__
 #ifdef WOLFSSL_ARMASM_INLINE
-static const word16 L_mlkem_aarch64_q[] = {
-    0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01,
-};
-
 static const word16 L_mlkem_aarch64_consts[] = {
    0x0d01, 0xf301, 0x4ebf, 0x0549, 0x5049, 0x0000, 0x0000, 0x0000,
 };

-static const word64 L_sha3_aarch64_r[] = {
-    0x0000000000000001, 0x0000000000008082,
-    0x800000000000808a, 0x8000000080008000,
-    0x000000000000808b, 0x0000000080000001,
-    0x8000000080008081, 0x8000000000008009,
-    0x000000000000008a, 0x0000000000000088,
-    0x0000000080008009, 0x000000008000000a,
-    0x000000008000808b, 0x800000000000008b,
-    0x8000000000008089, 0x8000000000008003,
-    0x8000000000008002, 0x8000000000000080,
-    0x000000000000800a, 0x800000008000000a,
-    0x8000000080008081, 0x8000000000008080,
-    0x0000000080000001, 0x8000000080008008,
-};
-
 #include <wolfssl/wolfcrypt/wc_mlkem.h>

 #ifdef WOLFSSL_WC_MLKEM
@ -1405,11 +1386,9 @@ void mlkem_ntt(sword16* r)
        "stp	q17, q18, [x1, #192]\n\t"
        "stp	q19, q20, [x1, #224]\n\t"
        : [r] "+r" (r)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]),
+          [L_mlkem_aarch64_zetas] "i" (L_mlkem_aarch64_zetas),
+          [L_mlkem_aarch64_zetas_qinv] "i" (L_mlkem_aarch64_zetas_qinv)
        : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4",
            "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
            "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
@ -2922,13 +2901,9 @@ void mlkem_invntt(sword16* r)
        "str	q23, [x1, #208]\n\t"
        "str	q24, [x1, #240]\n\t"
        : [r] "+r" (r)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]),
+          [L_mlkem_aarch64_zetas_inv] "i" (L_mlkem_aarch64_zetas_inv),
+          [L_mlkem_aarch64_zetas_inv_qinv] "i" (L_mlkem_aarch64_zetas_inv_qinv)
        : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4",
            "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
            "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
@ -4096,13 +4071,7 @@ void mlkem_ntt_sqrdmlsh(sword16* r)
        "stp	q17, q18, [x1, #192]\n\t"
        "stp	q19, q20, [x1, #224]\n\t"
        : [r] "+r" (r)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
        : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4",
            "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
            "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
@ -5393,13 +5362,7 @@ void mlkem_invntt_sqrdmlsh(sword16* r)
        "str	q23, [x1, #208]\n\t"
        "str	q24, [x1, #240]\n\t"
        : [r] "+r" (r)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
        : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4",
            "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
            "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
@ -6102,14 +6065,8 @@ void mlkem_basemul_mont(sword16* r, const sword16* a, const sword16* b)
        "zip2	v25.8h, v22.8h, v23.8h\n\t"
        "stp	q24, q25, [%x[r], #480]\n\t"
        : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]),
+          [L_mlkem_aarch64_zetas_mul] "i" (L_mlkem_aarch64_zetas_mul)
        : "memory", "cc", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
            "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16",
            "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
@ -6840,14 +6797,8 @@ void mlkem_basemul_mont_add(sword16* r, const sword16* a, const sword16* b)
        "add	v29.8h, v29.8h, v25.8h\n\t"
        "stp	q28, q29, [%x[r], #480]\n\t"
        : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]),
+          [L_mlkem_aarch64_zetas_mul] "i" (L_mlkem_aarch64_zetas_mul)
        : "memory", "cc", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
            "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16",
            "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
@ -6855,6 +6806,10 @@ void mlkem_basemul_mont_add(sword16* r, const sword16* a, const sword16* b)
    );
 }

+static const word16 L_mlkem_aarch64_q[] = {
+    0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01,
+};
+
 void mlkem_csubq_neon(sword16* p)
 {
    __asm__ __volatile__ (
@ -7013,14 +6968,7 @@ void mlkem_csubq_neon(sword16* p)
        "st4	{v8.8h, v9.8h, v10.8h, v11.8h}, [%x[p]], #0x40\n\t"
        "st4	{v12.8h, v13.8h, v14.8h, v15.8h}, [%x[p]], #0x40\n\t"
        : [p] "+r" (p)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [L_mlkem_aarch64_q] "i" (L_mlkem_aarch64_q)
        : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
            "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
            "v18", "v19", "v20"
@ -7195,14 +7143,7 @@ void mlkem_add_reduce(sword16* r, const sword16* a)
        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
        : [r] "+r" (r), [a] "+r" (a)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
        : "memory", "cc", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
            "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
            "v18"
@ -7417,14 +7358,7 @@ void mlkem_add3_reduce(sword16* r, const sword16* a, const sword16* b)
        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
        : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
        : "memory", "cc", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
            "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
            "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26"
@ -7599,14 +7533,7 @@ void mlkem_rsub_reduce(sword16* r, const sword16* a)
        "st4	{v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
        "st4	{v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
        : [r] "+r" (r), [a] "+r" (a)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
        : "memory", "cc", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
            "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
            "v18"
@ -7803,14 +7730,7 @@ void mlkem_to_mont(sword16* p)
        "st4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t"
        "st4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t"
        : [p] "+r" (p)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
        : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
            "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
            "v18"
@ -7976,14 +7896,7 @@ void mlkem_to_mont_sqrdmlsh(sword16* p)
        "st4	{v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t"
        "st4	{v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t"
        : [p] "+r" (p)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
+        : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
        : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
            "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
            "v18"
@ -8231,17 +8144,9 @@ void mlkem_to_msg_neon(byte* msg, sword16* p)
        "ins	v18.b[7], v25.b[0]\n\t"
        "st1	{v18.8b}, [%x[msg]], #8\n\t"
        : [msg] "+r" (msg), [p] "+r" (p)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits)
+        : [L_mlkem_to_msg_low] "i" (L_mlkem_to_msg_low),
+          [L_mlkem_to_msg_high] "i" (L_mlkem_to_msg_high),
+          [L_mlkem_to_msg_bits] "i" (L_mlkem_to_msg_bits)
        : "memory", "cc", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5",
            "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
            "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24",
@ -8415,19 +8320,8 @@ void mlkem_from_msg_neon(sword16* p, const byte* msg)
        "and	v7.16b, v7.16b, v1.16b\n\t"
        "st1	{v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t"
        : [p] "+r" (p), [msg] "+r" (msg)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits)
+        : [L_mlkem_from_msg_q1half] "i" (L_mlkem_from_msg_q1half),
+          [L_mlkem_from_msg_bits] "i" (L_mlkem_from_msg_bits)
        : "memory", "cc", "x2", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
            "v7", "v8", "v9", "v10", "v11"
    );
@ -8693,19 +8587,7 @@ int mlkem_cmp_neon(const byte* a, const byte* b, int sz)
        "subs	x0, x0, xzr\n\t"
        "csetm	w0, ne\n\t"
        : [a] "+r" (a), [b] "+r" (b), [sz] "+r" (sz)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits)
+        :
        : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
            "v9", "v10", "v11"
    );
@ -9410,22 +9292,10 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r,
    "L_mlkem_rej_uniform_done_%=: \n\t"
        "mov	x0, x12\n\t"
        : [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
-          [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
-          [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
-          [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
+        : [%[L_mlkem_aarch64_q]] "i" (%[L_mlkem_aarch64_q]),
+          [L_mlkem_rej_uniform_mask] "i" (L_mlkem_rej_uniform_mask),
+          [L_mlkem_rej_uniform_bits] "i" (L_mlkem_rej_uniform_bits),
+          [L_mlkem_rej_uniform_indices] "i" (L_mlkem_rej_uniform_indices)
        : "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11",
            "x12", "x13", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
            "v9", "v10", "v11", "v12", "v13"
@ -9433,6 +9303,21 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r,
    return (word32)(size_t)p;
 }

+static const word64 L_sha3_aarch64_r[] = {
+    0x0000000000000001, 0x0000000000008082,
+    0x800000000000808a, 0x8000000080008000,
+    0x000000000000808b, 0x0000000080000001,
+    0x8000000080008081, 0x8000000000008009,
+    0x000000000000008a, 0x0000000000000088,
+    0x0000000080008009, 0x000000008000000a,
+    0x000000008000808b, 0x800000000000008b,
+    0x8000000000008089, 0x8000000000008003,
+    0x8000000000008002, 0x8000000000000080,
+    0x000000000000800a, 0x800000008000000a,
+    0x8000000080008081, 0x8000000000008080,
+    0x0000000080000001, 0x8000000080008008,
+};
+
 #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
 void mlkem_sha3_blocksx3_neon(word64* state)
 {
@ -9728,22 +9613,7 @@ void mlkem_sha3_blocksx3_neon(word64* state)
        "str	x26, [%x[state], #192]\n\t"
        "ldp	x29, x30, [sp], #0x40\n\t"
        : [state] "+r" (state)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
-          [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
-          [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
-          [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
+        : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
        : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
            "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19",
            "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0",
@ -10070,22 +9940,7 @@ void mlkem_shake128_blocksx3_seed_neon(word64* state, byte* seed)
        "str	x27, [%x[state], #192]\n\t"
        "ldp	x29, x30, [sp], #0x40\n\t"
        : [state] "+r" (state), [seed] "+r" (seed)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
-          [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
-          [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
-          [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
+        : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
        : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
            "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
            "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1",
@ -10412,22 +10267,7 @@ void mlkem_shake256_blocksx3_seed_neon(word64* state, byte* seed)
        "str	x27, [%x[state], #192]\n\t"
        "ldp	x29, x30, [sp], #0x40\n\t"
        : [state] "+r" (state), [seed] "+r" (seed)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
-          [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
-          [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
-          [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
+        : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
        : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
            "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
            "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1",
@ -10818,22 +10658,7 @@ void mlkem_sha3_blocksx3_neon(word64* state)
        "str	x26, [%x[state], #192]\n\t"
        "ldp	x29, x30, [sp], #0x40\n\t"
        : [state] "+r" (state)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
-          [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
-          [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
-          [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
+        : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
        : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
            "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19",
            "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0",
@ -11245,22 +11070,7 @@ void mlkem_shake128_blocksx3_seed_neon(word64* state, byte* seed)
        "str	x27, [%x[state], #192]\n\t"
        "ldp	x29, x30, [sp], #0x40\n\t"
        : [state] "+r" (state), [seed] "+r" (seed)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
-          [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
-          [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
-          [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
+        : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
        : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
            "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
            "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1",
@ -11672,22 +11482,7 @@ void mlkem_shake256_blocksx3_seed_neon(word64* state, byte* seed)
        "str	x27, [%x[state], #192]\n\t"
        "ldp	x29, x30, [sp], #0x40\n\t"
        : [state] "+r" (state), [seed] "+r" (seed)
-        : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
-          [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
-          [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
-          [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
-          [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
-          [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
-          [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
-          [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
-          [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
-          [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
-          [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
-          [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
-          [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
-          [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
-          [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
-          [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
+        : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
        : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
            "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
            "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1",
--- a/wolfcrypt/src/port/arm/armv8-poly1305.c
+++ b/wolfcrypt/src/port/arm/armv8-poly1305.c
@ -49,7 +49,7 @@ static WC_INLINE void poly1305_blocks_aarch64_16(Poly1305* ctx,
    __asm__ __volatile__ (
        /* Check for zero bytes to do. */
        "CMP        %[bytes], #16 \n\t"
-        "BLO        L_poly1305_aarch64_16_done_%= \n\t"
+        "B.LO       L_poly1305_aarch64_16_done_%= \n\t"

        "MOV        x12, #1               \n\t"
        /* Load h */
@ -129,7 +129,7 @@ static WC_INLINE void poly1305_blocks_aarch64_16(Poly1305* ctx,

        "SUBS       %[bytes], %[bytes], #16\n\t"
        "ADD        %[m], %[m], #16\n\t"
-        "BGT        L_poly1305_aarch64_16_loop_%=\n\t"
+        "B.GT       L_poly1305_aarch64_16_loop_%=\n\t"

        /* Base 64 -> Base 26 */
        "MOV        x10, #0x3ffffff\n\t"
@ -146,8 +146,7 @@ static WC_INLINE void poly1305_blocks_aarch64_16(Poly1305* ctx,
        ".align 2 \n\t"
    "L_poly1305_aarch64_16_done_%=: \n\t"
        : [bytes] "+r" (bytes), [m] "+r" (m)
-        : [POLY1305_BLOCK_SIZE] "I" (POLY1305_BLOCK_SIZE),
-          [ctx_r64] "m" (ctx->r64[0]), [ctx_h] "r" (ctx->h),
+        : [ctx_r64] "m" (ctx->r64[0]), [ctx_h] "r" (ctx->h),
          [finished] "r" ((word64)ctx->finished)
        : "memory", "cc",
          "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
@ -161,7 +160,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
    __asm__ __volatile__ (
        /* If less than 4 blocks to process then use regular method */
        "CMP        %[bytes], #64 \n\t"
-        "BLO        L_poly1305_aarch64_64_done_%= \n\t"
+        "B.LO       L_poly1305_aarch64_64_done_%= \n\t"
        "MOV        x9, #0x3ffffff       \n\t"
        /* Load h */
        "LDP        x20, x22, [%[h]]     \n\t"
@ -189,7 +188,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
        "MOV        v26.D[1], x9         \n\t"
        "DUP        v30.4S, v26.S[0]     \n\t"
        "CMP        %[bytes], #96 \n\t"
-        "BLO        L_poly1305_aarch64_64_start_block_size_64_%= \n\t"
+        "B.LO       L_poly1305_aarch64_64_start_block_size_64_%= \n\t"
        /* Load r^2 to NEON v0, v1, v2, v3, v4 */
        "LD4        { v0.S-v3.S }[2], [%[r_2]], #16 \n\t"
        "LD1        { v4.S }[2], [%[r_2]] \n\t"
@ -363,7 +362,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
        "UMLAL2     v25.2D, v14.4S, v0.4S \n\t"
        /* If less than six message blocks left then leave loop */
        "CMP        %[bytes], #96 \n\t"
-        "BLS        L_poly1305_aarch64_64_loop_128_final_%= \n\t"
+        "B.LS       L_poly1305_aarch64_64_loop_128_final_%= \n\t"
        /* Load m */
        /* Load four message blocks to NEON v10, v11, v12, v13, v14 */
        "LD4        { v10.4S-v13.4S }, [%[m]], #64 \n\t"
@ -493,7 +492,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
        "MOV        v19.S[1], v19.S[2]   \n\t"
        /* If less than 2 blocks left go straight to final multiplication. */
        "CMP        %[bytes], #32 \n\t"
-        "BLO        L_poly1305_aarch64_64_last_mult_%= \n\t"
+        "B.LO       L_poly1305_aarch64_64_last_mult_%= \n\t"
        /* Else go to one loop of L_poly1305_aarch64_64_loop_64 */
        "B          L_poly1305_aarch64_64_loop_64_%= \n\t"
        "\n"
@ -677,7 +676,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
        "MOV        v19.S[1], v19.S[2]   \n\t"
        /* If at least two message blocks left then loop_64 */
        "CMP        %[bytes], #32 \n\t"
-        "BHS        L_poly1305_aarch64_64_loop_64_%= \n\t"
+        "B.HS       L_poly1305_aarch64_64_loop_64_%= \n\t"
        "\n"
        ".align 2 \n\t"
    "L_poly1305_aarch64_64_last_mult_%=: \n\t"
@ -821,8 +820,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
        : [bytes] "+r" (bytes),
          [m] "+r" (m),
          [ctx] "+m" (ctx)
-        : [POLY1305_BLOCK_SIZE] "I" (POLY1305_BLOCK_SIZE),
-          [h] "r" (ctx->h),
+        : [h] "r" (ctx->h),
          [r] "r" (ctx->r),
          [r_2] "r" (ctx->r_2),
          [r_4] "r" (ctx->r_4),
--- a/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c
+++ b/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c
@ -162,7 +162,7 @@ void BlockSha3_crypto(word64* state)
        "st4	{v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t"
        "st1	{v24.1d}, [%x[state]]\n\t"
        : [state] "+r" (state)
-        : [L_SHA3_transform_crypto_r] "S" (L_SHA3_transform_crypto_r)
+        : [L_SHA3_transform_crypto_r] "i" (L_SHA3_transform_crypto_r)
        : "memory", "cc", "x1", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
            "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16",
            "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
@ -369,7 +369,7 @@ void BlockSha3_base(word64* state)
        "str	x26, [%x[state], #192]\n\t"
        "ldp	x29, x30, [sp], #0x40\n\t"
        : [state] "+r" (state)
-        : [L_SHA3_transform_base_r] "S" (L_SHA3_transform_base_r)
+        : [L_SHA3_transform_base_r] "i" (L_SHA3_transform_base_r)
        : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
            "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19",
            "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"
--- a/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c
+++ b/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c
@ -1004,8 +1004,8 @@ void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len)
        "stp	x8, x9, [%x[sha512], #32]\n\t"
        "stp	x10, x11, [%x[sha512], #48]\n\t"
        : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
-        : [L_SHA512_transform_neon_len_k] "S" (L_SHA512_transform_neon_len_k),
-          [L_SHA512_transform_neon_len_r8] "S" (L_SHA512_transform_neon_len_r8)
+        : [L_SHA512_transform_neon_len_k] "i" (L_SHA512_transform_neon_len_k),
+          [L_SHA512_transform_neon_len_r8] "i" (L_SHA512_transform_neon_len_r8)
        : "memory", "cc", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
            "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
            "x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2",
@ -1580,7 +1580,7 @@ void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data,
        /* Store digest back */
        "st1	{v24.2d, v25.2d, v26.2d, v27.2d}, [%x[sha512]]\n\t"
        : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
-        : [L_SHA512_trans_crypto_len_k] "S" (L_SHA512_trans_crypto_len_k)
+        : [L_SHA512_trans_crypto_len_k] "i" (L_SHA512_trans_crypto_len_k)
        : "memory", "cc", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
            "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16",
            "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
--- a/wolfcrypt/src/port/arm/armv8-sha512.c
+++ b/wolfcrypt/src/port/arm/armv8-sha512.c
@ -647,9 +647,7 @@ static int Sha512_Family_Final(wc_Sha512* sha512, byte* hash,
 {
    int ret;
    int digestSz;
-    int (*initfp)(wc_Sha512*);
-
-    (void)initfp;
+    int (*initfp)(wc_Sha512*) = NULL;

    if (sha512 == NULL || hash == NULL) {
        return BAD_FUNC_ARG;