diff --git a/src/internal.c b/src/internal.c index 0a48149a7..38c6734ff 100644 --- a/src/internal.c +++ b/src/internal.c @@ -26488,7 +26488,7 @@ const char* wolfSSL_ERR_reason_error_string(unsigned long e) return "peer ip address mismatch"; case WANT_READ : - case -WOLFSSL_ERROR_WANT_READ : + case WOLFSSL_ERROR_WANT_READ_E : return "non-blocking socket wants data to be read"; case NOT_READY_ERROR : @@ -26498,17 +26498,17 @@ const char* wolfSSL_ERR_reason_error_string(unsigned long e) return "record layer version error"; case WANT_WRITE : - case -WOLFSSL_ERROR_WANT_WRITE : + case WOLFSSL_ERROR_WANT_WRITE_E : return "non-blocking socket write buffer full"; - case -WOLFSSL_ERROR_WANT_CONNECT: - case -WOLFSSL_ERROR_WANT_ACCEPT: + case WOLFSSL_ERROR_WANT_CONNECT_E : + case WOLFSSL_ERROR_WANT_ACCEPT_E : return "The underlying BIO was not yet connected"; - case -WOLFSSL_ERROR_SYSCALL: + case WOLFSSL_ERROR_SYSCALL_E : return "fatal I/O error in TLS layer"; - case -WOLFSSL_ERROR_WANT_X509_LOOKUP: + case WOLFSSL_ERROR_WANT_X509_LOOKUP_E : return "application client cert callback asked to be called again"; case BUFFER_ERROR : @@ -26548,7 +26548,7 @@ const char* wolfSSL_ERR_reason_error_string(unsigned long e) return "can't decode peer key"; case ZERO_RETURN: - case -WOLFSSL_ERROR_ZERO_RETURN: + case WOLFSSL_ERROR_ZERO_RETURN_E : return "peer sent close notify alert"; case ECC_CURVETYPE_ERROR: diff --git a/src/ssl.c b/src/ssl.c index a188b87f1..6d5aca6d2 100644 --- a/src/ssl.c +++ b/src/ssl.c @@ -234,8 +234,10 @@ static struct SystemCryptoPolicy crypto_policy; static WC_RNG globalRNG; static volatile int initGlobalRNG = 0; +#if defined(OPENSSL_EXTRA) || !defined(WOLFSSL_MUTEX_INITIALIZER) static WC_MAYBE_UNUSED wolfSSL_Mutex globalRNGMutex WOLFSSL_MUTEX_INITIALIZER_CLAUSE(globalRNGMutex); +#endif #ifndef WOLFSSL_MUTEX_INITIALIZER static int globalRNGMutex_valid = 0; #endif diff --git a/src/x509.c b/src/x509.c index b85a04981..278e743b7 100644 --- a/src/x509.c +++ b/src/x509.c @@ -5507,7 +5507,7 @@ int wolfSSL_X509_NAME_get_text_by_NID(WOLFSSL_X509_NAME* name, WOLFSSL_EVP_PKEY* wolfSSL_X509_get_pubkey(WOLFSSL_X509* x509) { WOLFSSL_EVP_PKEY* key = NULL; - int ret; + int ret = 0; (void)ret; diff --git a/wolfcrypt/src/port/arm/armv8-aes.c b/wolfcrypt/src/port/arm/armv8-aes.c index 88b9d3794..0d2d55c77 100644 --- a/wolfcrypt/src/port/arm/armv8-aes.c +++ b/wolfcrypt/src/port/arm/armv8-aes.c @@ -1549,7 +1549,7 @@ static void GHASH_AARCH64_EOR(Gcm* gcm, const byte* a, word32 aSz, "MOV w12, %w[aSz] \n" "CMP x12, #64 \n" - "BLT 15f \n" + "B.LT 15f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v11.1q, v3.2d, v3.2d \n" @@ -1639,11 +1639,11 @@ static void GHASH_AARCH64_EOR(Gcm* gcm, const byte* a, word32 aSz, "MOV v0.D[1], v9.D[0] \n" "EOR v0.16b, v0.16b, v8.16b \n" "CMP x12, #64 \n" - "BGE 14b \n" + "B.GE 14b \n" "CBZ x12, 20f \n" "15: \n" "CMP x12, #16 \n" - "BLT 12f \n" + "B.LT 12f \n" "11: \n" "LD1 {v14.2d}, [%[a]], #16 \n" "SUB x12, x12, #16 \n" @@ -1664,7 +1664,7 @@ static void GHASH_AARCH64_EOR(Gcm* gcm, const byte* a, word32 aSz, "MOV v10.D[1], v13.D[0] \n" "EOR v0.16b, v10.16b, v12.16b \n" "CMP x12, #16 \n" - "BGE 11b \n" + "B.GE 11b \n" "CBZ x12, 120f \n" "12: \n" "# Partial AAD \n" @@ -1702,7 +1702,7 @@ static void GHASH_AARCH64_EOR(Gcm* gcm, const byte* a, word32 aSz, "MOV w12, %w[cSz] \n" "CMP x12, #64 \n" - "BLT 115f \n" + "B.LT 115f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v11.1q, v3.2d, v3.2d \n" @@ -1792,11 +1792,11 @@ static void GHASH_AARCH64_EOR(Gcm* gcm, const byte* a, word32 aSz, "MOV v0.D[1], v9.D[0] \n" "EOR v0.16b, v0.16b, v8.16b \n" "CMP x12, #64 \n" - "BGE 114b \n" + "B.GE 114b \n" "CBZ x12, 120f \n" "115: \n" "CMP x12, #16 \n" - "BLT 112f \n" + "B.LT 112f \n" "111: \n" "LD1 {v14.2d}, [%[c]], #16 \n" "SUB x12, x12, #16 \n" @@ -1817,7 +1817,7 @@ static void GHASH_AARCH64_EOR(Gcm* gcm, const byte* a, word32 aSz, "MOV v10.D[1], v13.D[0] \n" "EOR v0.16b, v10.16b, v12.16b \n" "CMP x12, #16 \n" - "BGE 111b \n" + "B.GE 111b \n" "CBZ x12, 120f \n" "112: \n" "# Partial cipher text \n" @@ -1884,7 +1884,7 @@ static void GHASH_AARCH64_EOR3(Gcm* gcm, const byte* a, word32 aSz, "MOV w12, %w[aSz] \n" "CMP x12, #64 \n" - "BLT 15f \n" + "B.LT 15f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v11.1q, v3.2d, v3.2d \n" @@ -1970,11 +1970,11 @@ static void GHASH_AARCH64_EOR3(Gcm* gcm, const byte* a, word32 aSz, "MOV v0.D[1], v9.D[0] \n" "EOR v0.16b, v0.16b, v8.16b \n" "CMP x12, #64 \n" - "BGE 14b \n" + "B.GE 14b \n" "CBZ x12, 20f \n" "15: \n" "CMP x12, #16 \n" - "BLT 12f \n" + "B.LT 12f \n" "11: \n" "LD1 {v14.2d}, [%[a]], #16 \n" "SUB x12, x12, #16 \n" @@ -1995,7 +1995,7 @@ static void GHASH_AARCH64_EOR3(Gcm* gcm, const byte* a, word32 aSz, "MOV v10.D[1], v13.D[0] \n" "EOR v0.16b, v10.16b, v12.16b \n" "CMP x12, #16 \n" - "BGE 11b \n" + "B.GE 11b \n" "CBZ x12, 120f \n" "12: \n" "# Partial AAD \n" @@ -2033,7 +2033,7 @@ static void GHASH_AARCH64_EOR3(Gcm* gcm, const byte* a, word32 aSz, "MOV w12, %w[cSz] \n" "CMP x12, #64 \n" - "BLT 115f \n" + "B.LT 115f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v11.1q, v3.2d, v3.2d \n" @@ -2119,11 +2119,11 @@ static void GHASH_AARCH64_EOR3(Gcm* gcm, const byte* a, word32 aSz, "MOV v0.D[1], v9.D[0] \n" "EOR v0.16b, v0.16b, v8.16b \n" "CMP x12, #64 \n" - "BGE 114b \n" + "B.GE 114b \n" "CBZ x12, 120f \n" "115: \n" "CMP x12, #16 \n" - "BLT 112f \n" + "B.LT 112f \n" "111: \n" "LD1 {v14.2d}, [%[c]], #16 \n" "SUB x12, x12, #16 \n" @@ -2144,7 +2144,7 @@ static void GHASH_AARCH64_EOR3(Gcm* gcm, const byte* a, word32 aSz, "MOV v10.D[1], v13.D[0] \n" "EOR v0.16b, v10.16b, v12.16b \n" "CMP x12, #16 \n" - "BGE 111b \n" + "B.GE 111b \n" "CBZ x12, 120f \n" "112: \n" "# Partial cipher text \n" @@ -2531,7 +2531,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, "# GHASH AAD \n" "CMP x12, #64 \n" - "BLT 115f \n" + "B.LT 115f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v19.1q, v16.2d, v16.2d \n" @@ -2621,11 +2621,11 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, "MOV v17.D[1], v15.D[0] \n" "EOR v17.16b, v17.16b, v14.16b \n" "CMP x12, #64 \n" - "BGE 114b \n" + "B.GE 114b \n" "CBZ x12, 120f \n" "115: \n" "CMP x12, #16 \n" - "BLT 112f \n" + "B.LT 112f \n" "111: \n" "LD1 {v15.2d}, [%[aad]], #16 \n" "SUB x12, x12, #16 \n" @@ -2646,7 +2646,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, "MOV v18.D[1], v21.D[0] \n" "EOR v17.16b, v18.16b, v20.16b \n" "CMP x12, #16 \n" - "BGE 111b \n" + "B.GE 111b \n" "CBZ x12, 120f \n" "112: \n" "# Partial AAD \n" @@ -2683,9 +2683,9 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, "MOV w11, %w[sz] \n" "REV w12, w12 \n" "CMP w11, #64 \n" - "BLT 80f \n" + "B.LT 80f \n" "CMP %w[aSz], #64 \n" - "BGE 82f \n" + "B.GE 82f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" @@ -2724,7 +2724,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, "82: \n" "# Should we do 8 blocks at a time? \n" "CMP w11, #512 \n" - "BLT 80f \n" + "B.LT 80f \n" "# Calculate H^[5-8] - GMULT partials \n" "# Multiply H and H^4 => H^5 \n" @@ -3318,7 +3318,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #128 \n" - "BGE 81b \n" + "B.GE 81b \n" "# GHASH - 8 blocks \n" "RBIT v12.16b, v12.16b \n" @@ -3423,7 +3423,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n" "# Can we do 4 blocks at a time? \n" "CMP w11, #64 \n" - "BLT 10f \n" + "B.LT 10f \n" "# First encrypt - no GHASH \n" "# Calculate next 4 counters (+1-4) \n" @@ -3537,7 +3537,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, "# Store cipher text \n" "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BLT 12f \n" + "B.LT 12f \n" "11: \n" "# Calculate next 4 counters (+1-4) \n" @@ -3702,7 +3702,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, "# Store cipher text \n" "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BGE 11b \n" + "B.GE 11b \n" "12: \n" "# GHASH - 4 blocks \n" @@ -3760,7 +3760,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, "10: \n" "CBZ w11, 30f \n" "CMP w11, #16 \n" - "BLT 20f \n" + "B.LT 20f \n" "# Encrypt first block for GHASH \n" "ADD w12, w12, #1 \n" "MOV v0.16b, v22.16b \n" @@ -3793,7 +3793,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, "# When only one full block to encrypt go straight to GHASH \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "LD1 {v31.2d}, [%[input]], #16 \n" @@ -3843,7 +3843,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, "EOR v15.16b, v0.16b, v31.16b \n \n" "ST1 {v15.2d}, [%[out]], #16 \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "LD1 {v31.2d}, [%[input]], #16 \n" "B 2b \n" @@ -3988,7 +3988,7 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, "RBIT v17.16b, v17.16b \n" "EOR v0.16b, v0.16b, v17.16b \n \n" "CMP %w[tagSz], #16 \n" - "BNE 40f \n" + "B.NE 40f \n" "ST1 {v0.2d}, [%[tag]] \n" "B 41f \n" "40: \n" @@ -4053,7 +4053,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "# GHASH AAD \n" "CMP x12, #64 \n" - "BLT 115f \n" + "B.LT 115f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v19.1q, v16.2d, v16.2d \n" @@ -4139,11 +4139,11 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "MOV v17.D[1], v15.D[0] \n" "EOR v17.16b, v17.16b, v14.16b \n" "CMP x12, #64 \n" - "BGE 114b \n" + "B.GE 114b \n" "CBZ x12, 120f \n" "115: \n" "CMP x12, #16 \n" - "BLT 112f \n" + "B.LT 112f \n" "111: \n" "LD1 {v15.2d}, [%[aad]], #16 \n" "SUB x12, x12, #16 \n" @@ -4164,7 +4164,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "MOV v18.D[1], v21.D[0] \n" "EOR v17.16b, v18.16b, v20.16b \n" "CMP x12, #16 \n" - "BGE 111b \n" + "B.GE 111b \n" "CBZ x12, 120f \n" "112: \n" "# Partial AAD \n" @@ -4201,9 +4201,9 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "MOV w11, %w[sz] \n" "REV w12, w12 \n" "CMP w11, #64 \n" - "BLT 80f \n" + "B.LT 80f \n" "CMP %w[aSz], #64 \n" - "BGE 82f \n" + "B.GE 82f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" @@ -4242,7 +4242,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "82: \n" "# Should we do 8 blocks at a time? \n" "CMP w11, #512 \n" - "BLT 80f \n" + "B.LT 80f \n" "# Calculate H^[5-8] - GMULT partials \n" "# Multiply H and H^4 => H^5 \n" @@ -4828,7 +4828,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #128 \n" - "BGE 81b \n" + "B.GE 81b \n" "# GHASH - 8 blocks \n" "RBIT v12.16b, v12.16b \n" @@ -4925,7 +4925,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n" "# Can we do 4 blocks at a time? \n" "CMP w11, #64 \n" - "BLT 10f \n" + "B.LT 10f \n" "# First encrypt - no GHASH \n" "# Calculate next 4 counters (+1-4) \n" @@ -5039,7 +5039,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "# Store cipher text \n" "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BLT 12f \n" + "B.LT 12f \n" "11: \n" "# Calculate next 4 counters (+1-4) \n" @@ -5200,7 +5200,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "# Store cipher text \n" "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BGE 11b \n" + "B.GE 11b \n" "12: \n" "# GHASH - 4 blocks \n" @@ -5254,7 +5254,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "10: \n" "CBZ w11, 30f \n" "CMP w11, #16 \n" - "BLT 20f \n" + "B.LT 20f \n" "# Encrypt first block for GHASH \n" "ADD w12, w12, #1 \n" "MOV v0.16b, v22.16b \n" @@ -5287,7 +5287,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "# When only one full block to encrypt go straight to GHASH \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "LD1 {v31.2d}, [%[input]], #16 \n" @@ -5337,7 +5337,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "EOR v15.16b, v0.16b, v31.16b \n \n" "ST1 {v15.2d}, [%[out]], #16 \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "LD1 {v31.2d}, [%[input]], #16 \n" "B 2b \n" @@ -5482,7 +5482,7 @@ static void Aes128GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "RBIT v17.16b, v17.16b \n" "EOR v0.16b, v0.16b, v17.16b \n \n" "CMP %w[tagSz], #16 \n" - "BNE 40f \n" + "B.NE 40f \n" "ST1 {v0.2d}, [%[tag]] \n" "B 41f \n" "40: \n" @@ -5547,7 +5547,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, "# GHASH AAD \n" "CMP x12, #64 \n" - "BLT 115f \n" + "B.LT 115f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v19.1q, v16.2d, v16.2d \n" @@ -5637,11 +5637,11 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, "MOV v17.D[1], v15.D[0] \n" "EOR v17.16b, v17.16b, v14.16b \n" "CMP x12, #64 \n" - "BGE 114b \n" + "B.GE 114b \n" "CBZ x12, 120f \n" "115: \n" "CMP x12, #16 \n" - "BLT 112f \n" + "B.LT 112f \n" "111: \n" "LD1 {v15.2d}, [%[aad]], #16 \n" "SUB x12, x12, #16 \n" @@ -5662,7 +5662,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, "MOV v18.D[1], v21.D[0] \n" "EOR v17.16b, v18.16b, v20.16b \n" "CMP x12, #16 \n" - "BGE 111b \n" + "B.GE 111b \n" "CBZ x12, 120f \n" "112: \n" "# Partial AAD \n" @@ -5699,9 +5699,9 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, "MOV w11, %w[sz] \n" "REV w12, w12 \n" "CMP w11, #64 \n" - "BLT 80f \n" + "B.LT 80f \n" "CMP %w[aSz], #64 \n" - "BGE 82f \n" + "B.GE 82f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" @@ -5740,7 +5740,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, "82: \n" "# Should we do 8 blocks at a time? \n" "CMP w11, #512 \n" - "BLT 80f \n" + "B.LT 80f \n" "# Calculate H^[5-8] - GMULT partials \n" "# Multiply H and H^4 => H^5 \n" @@ -6402,7 +6402,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #128 \n" - "BGE 81b \n" + "B.GE 81b \n" "# GHASH - 8 blocks \n" "RBIT v12.16b, v12.16b \n" @@ -6508,7 +6508,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n" "# Can we do 4 blocks at a time? \n" "CMP w11, #64 \n" - "BLT 10f \n" + "B.LT 10f \n" "# First encrypt - no GHASH \n" "# Calculate next 4 counters (+1-4) \n" @@ -6638,7 +6638,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, "# Store cipher text \n" "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BLT 12f \n" + "B.LT 12f \n" "11: \n" "# Calculate next 4 counters (+1-4) \n" @@ -6819,7 +6819,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, "# Store cipher text \n" "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BGE 11b \n" + "B.GE 11b \n" "12: \n" "# GHASH - 4 blocks \n" @@ -6877,7 +6877,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, "10: \n" "CBZ w11, 30f \n" "CMP w11, #16 \n" - "BLT 20f \n" + "B.LT 20f \n" "# Encrypt first block for GHASH \n" "ADD w12, w12, #1 \n" "MOV v0.16b, v22.16b \n" @@ -6914,7 +6914,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, "# When only one full block to encrypt go straight to GHASH \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "LD1 {v31.2d}, [%[input]], #16 \n" @@ -6968,7 +6968,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, "EOR v15.16b, v0.16b, v31.16b \n \n" "ST1 {v15.2d}, [%[out]], #16 \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "LD1 {v31.2d}, [%[input]], #16 \n" "B 2b \n" @@ -7121,7 +7121,7 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, "RBIT v17.16b, v17.16b \n" "EOR v0.16b, v0.16b, v17.16b \n \n" "CMP %w[tagSz], #16 \n" - "BNE 40f \n" + "B.NE 40f \n" "ST1 {v0.2d}, [%[tag]] \n" "B 41f \n" "40: \n" @@ -7186,7 +7186,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "# GHASH AAD \n" "CMP x12, #64 \n" - "BLT 115f \n" + "B.LT 115f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v19.1q, v16.2d, v16.2d \n" @@ -7272,11 +7272,11 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "MOV v17.D[1], v15.D[0] \n" "EOR v17.16b, v17.16b, v14.16b \n" "CMP x12, #64 \n" - "BGE 114b \n" + "B.GE 114b \n" "CBZ x12, 120f \n" "115: \n" "CMP x12, #16 \n" - "BLT 112f \n" + "B.LT 112f \n" "111: \n" "LD1 {v15.2d}, [%[aad]], #16 \n" "SUB x12, x12, #16 \n" @@ -7297,7 +7297,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "MOV v18.D[1], v21.D[0] \n" "EOR v17.16b, v18.16b, v20.16b \n" "CMP x12, #16 \n" - "BGE 111b \n" + "B.GE 111b \n" "CBZ x12, 120f \n" "112: \n" "# Partial AAD \n" @@ -7334,9 +7334,9 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "MOV w11, %w[sz] \n" "REV w12, w12 \n" "CMP w11, #64 \n" - "BLT 80f \n" + "B.LT 80f \n" "CMP %w[aSz], #64 \n" - "BGE 82f \n" + "B.GE 82f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" @@ -7375,7 +7375,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "82: \n" "# Should we do 8 blocks at a time? \n" "CMP w11, #512 \n" - "BLT 80f \n" + "B.LT 80f \n" "# Calculate H^[5-8] - GMULT partials \n" "# Multiply H and H^4 => H^5 \n" @@ -8029,7 +8029,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #128 \n" - "BGE 81b \n" + "B.GE 81b \n" "# GHASH - 8 blocks \n" "RBIT v12.16b, v12.16b \n" @@ -8127,7 +8127,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n" "# Can we do 4 blocks at a time? \n" "CMP w11, #64 \n" - "BLT 10f \n" + "B.LT 10f \n" "# First encrypt - no GHASH \n" "# Calculate next 4 counters (+1-4) \n" @@ -8257,7 +8257,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "# Store cipher text \n" "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BLT 12f \n" + "B.LT 12f \n" "11: \n" "# Calculate next 4 counters (+1-4) \n" @@ -8434,7 +8434,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "# Store cipher text \n" "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BGE 11b \n" + "B.GE 11b \n" "12: \n" "# GHASH - 4 blocks \n" @@ -8488,7 +8488,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "10: \n" "CBZ w11, 30f \n" "CMP w11, #16 \n" - "BLT 20f \n" + "B.LT 20f \n" "# Encrypt first block for GHASH \n" "ADD w12, w12, #1 \n" "MOV v0.16b, v22.16b \n" @@ -8525,7 +8525,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "# When only one full block to encrypt go straight to GHASH \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "LD1 {v31.2d}, [%[input]], #16 \n" @@ -8579,7 +8579,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "EOR v15.16b, v0.16b, v31.16b \n \n" "ST1 {v15.2d}, [%[out]], #16 \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "LD1 {v31.2d}, [%[input]], #16 \n" "B 2b \n" @@ -8732,7 +8732,7 @@ static void Aes192GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "RBIT v17.16b, v17.16b \n" "EOR v0.16b, v0.16b, v17.16b \n \n" "CMP %w[tagSz], #16 \n" - "BNE 40f \n" + "B.NE 40f \n" "ST1 {v0.2d}, [%[tag]] \n" "B 41f \n" "40: \n" @@ -8797,7 +8797,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, "# GHASH AAD \n" "CMP x12, #64 \n" - "BLT 115f \n" + "B.LT 115f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v19.1q, v16.2d, v16.2d \n" @@ -8887,11 +8887,11 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, "MOV v17.D[1], v15.D[0] \n" "EOR v17.16b, v17.16b, v14.16b \n" "CMP x12, #64 \n" - "BGE 114b \n" + "B.GE 114b \n" "CBZ x12, 120f \n" "115: \n" "CMP x12, #16 \n" - "BLT 112f \n" + "B.LT 112f \n" "111: \n" "LD1 {v15.2d}, [%[aad]], #16 \n" "SUB x12, x12, #16 \n" @@ -8912,7 +8912,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, "MOV v18.D[1], v21.D[0] \n" "EOR v17.16b, v18.16b, v20.16b \n" "CMP x12, #16 \n" - "BGE 111b \n" + "B.GE 111b \n" "CBZ x12, 120f \n" "112: \n" "# Partial AAD \n" @@ -8949,9 +8949,9 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, "MOV w11, %w[sz] \n" "REV w12, w12 \n" "CMP w11, #64 \n" - "BLT 80f \n" + "B.LT 80f \n" "CMP %w[aSz], #64 \n" - "BGE 82f \n" + "B.GE 82f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" @@ -8990,7 +8990,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, "82: \n" "# Should we do 8 blocks at a time? \n" "CMP w11, #512 \n" - "BLT 80f \n" + "B.LT 80f \n" "# Calculate H^[5-8] - GMULT partials \n" "# Multiply H and H^4 => H^5 \n" @@ -9720,7 +9720,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #128 \n" - "BGE 81b \n" + "B.GE 81b \n" "# GHASH - 8 blocks \n" "RBIT v12.16b, v12.16b \n" @@ -9826,7 +9826,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n" "# Can we do 4 blocks at a time? \n" "CMP w11, #64 \n" - "BLT 10f \n" + "B.LT 10f \n" "# First encrypt - no GHASH \n" "# Calculate next 4 counters (+1-4) \n" @@ -9973,7 +9973,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, "# Store cipher text \n" "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BLT 12f \n" + "B.LT 12f \n" "11: \n" "# Calculate next 4 counters (+1-4) \n" @@ -10171,7 +10171,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, "# Store cipher text \n" "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BGE 11b \n" + "B.GE 11b \n" "12: \n" "# GHASH - 4 blocks \n" @@ -10230,7 +10230,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, "SUB %[Key], %[Key], #32 \n" "CBZ w11, 30f \n" "CMP w11, #16 \n" - "BLT 20f \n" + "B.LT 20f \n" "# Encrypt first block for GHASH \n" "ADD w12, w12, #1 \n" "MOV v0.16b, v22.16b \n" @@ -10274,7 +10274,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, "# When only one full block to encrypt go straight to GHASH \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "LD1 {v31.2d}, [%[input]], #16 \n" @@ -10335,7 +10335,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, "EOR v15.16b, v0.16b, v31.16b \n \n" "ST1 {v15.2d}, [%[out]], #16 \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "LD1 {v31.2d}, [%[input]], #16 \n" "B 2b \n" @@ -10502,7 +10502,7 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, "RBIT v17.16b, v17.16b \n" "EOR v0.16b, v0.16b, v17.16b \n \n" "CMP %w[tagSz], #16 \n" - "BNE 40f \n" + "B.NE 40f \n" "ST1 {v0.2d}, [%[tag]] \n" "B 41f \n" "40: \n" @@ -10567,7 +10567,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "# GHASH AAD \n" "CMP x12, #64 \n" - "BLT 115f \n" + "B.LT 115f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v19.1q, v16.2d, v16.2d \n" @@ -10653,11 +10653,11 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "MOV v17.D[1], v15.D[0] \n" "EOR v17.16b, v17.16b, v14.16b \n" "CMP x12, #64 \n" - "BGE 114b \n" + "B.GE 114b \n" "CBZ x12, 120f \n" "115: \n" "CMP x12, #16 \n" - "BLT 112f \n" + "B.LT 112f \n" "111: \n" "LD1 {v15.2d}, [%[aad]], #16 \n" "SUB x12, x12, #16 \n" @@ -10678,7 +10678,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "MOV v18.D[1], v21.D[0] \n" "EOR v17.16b, v18.16b, v20.16b \n" "CMP x12, #16 \n" - "BGE 111b \n" + "B.GE 111b \n" "CBZ x12, 120f \n" "112: \n" "# Partial AAD \n" @@ -10715,9 +10715,9 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "MOV w11, %w[sz] \n" "REV w12, w12 \n" "CMP w11, #64 \n" - "BLT 80f \n" + "B.LT 80f \n" "CMP %w[aSz], #64 \n" - "BGE 82f \n" + "B.GE 82f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" @@ -10756,7 +10756,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "82: \n" "# Should we do 8 blocks at a time? \n" "CMP w11, #512 \n" - "BLT 80f \n" + "B.LT 80f \n" "# Calculate H^[5-8] - GMULT partials \n" "# Multiply H and H^4 => H^5 \n" @@ -11478,7 +11478,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #128 \n" - "BGE 81b \n" + "B.GE 81b \n" "# GHASH - 8 blocks \n" "RBIT v12.16b, v12.16b \n" @@ -11576,7 +11576,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n" "# Can we do 4 blocks at a time? \n" "CMP w11, #64 \n" - "BLT 10f \n" + "B.LT 10f \n" "# First encrypt - no GHASH \n" "# Calculate next 4 counters (+1-4) \n" @@ -11723,7 +11723,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "# Store cipher text \n" "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BLT 12f \n" + "B.LT 12f \n" "11: \n" "# Calculate next 4 counters (+1-4) \n" @@ -11917,7 +11917,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "# Store cipher text \n" "ST1 {v18.2d-v21.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BGE 11b \n" + "B.GE 11b \n" "12: \n" "# GHASH - 4 blocks \n" @@ -11972,7 +11972,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "SUB %[Key], %[Key], #32 \n" "CBZ w11, 30f \n" "CMP w11, #16 \n" - "BLT 20f \n" + "B.LT 20f \n" "# Encrypt first block for GHASH \n" "ADD w12, w12, #1 \n" "MOV v0.16b, v22.16b \n" @@ -12016,7 +12016,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "# When only one full block to encrypt go straight to GHASH \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "LD1 {v31.2d}, [%[input]], #16 \n" @@ -12077,7 +12077,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "EOR v15.16b, v0.16b, v31.16b \n \n" "ST1 {v15.2d}, [%[out]], #16 \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "LD1 {v31.2d}, [%[input]], #16 \n" "B 2b \n" @@ -12244,7 +12244,7 @@ static void Aes256GcmEncrypt_EOR3(Aes* aes, byte* out, const byte* in, "RBIT v17.16b, v17.16b \n" "EOR v0.16b, v0.16b, v17.16b \n \n" "CMP %w[tagSz], #16 \n" - "BNE 40f \n" + "B.NE 40f \n" "ST1 {v0.2d}, [%[tag]] \n" "B 41f \n" "40: \n" @@ -12381,7 +12381,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "# GHASH AAD \n" "CMP x12, #64 \n" - "BLT 115f \n" + "B.LT 115f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v19.1q, v16.2d, v16.2d \n" @@ -12471,11 +12471,11 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "MOV v17.D[1], v15.D[0] \n" "EOR v17.16b, v17.16b, v14.16b \n" "CMP x12, #64 \n" - "BGE 114b \n" + "B.GE 114b \n" "CBZ x12, 120f \n" "115: \n" "CMP x12, #16 \n" - "BLT 112f \n" + "B.LT 112f \n" "111: \n" "LD1 {v15.2d}, [%[aad]], #16 \n" "SUB x12, x12, #16 \n" @@ -12496,7 +12496,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "MOV v18.D[1], v21.D[0] \n" "EOR v17.16b, v18.16b, v20.16b \n" "CMP x12, #16 \n" - "BGE 111b \n" + "B.GE 111b \n" "CBZ x12, 120f \n" "112: \n" "# Partial AAD \n" @@ -12533,9 +12533,9 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "MOV w11, %w[sz] \n" "REV w12, w12 \n" "CMP w11, #64 \n" - "BLT 80f \n" + "B.LT 80f \n" "CMP %w[aSz], #64 \n" - "BGE 82f \n" + "B.GE 82f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" @@ -12574,7 +12574,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "82: \n" "# Should we do 8 blocks at a time? \n" "CMP w11, #512 \n" - "BLT 80f \n" + "B.LT 80f \n" "# Calculate H^[5-8] - GMULT partials \n" "# Multiply H and H^4 => H^5 \n" @@ -13168,7 +13168,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #128 \n" - "BGE 81b \n" + "B.GE 81b \n" "# GHASH - 8 blocks \n" "RBIT v12.16b, v12.16b \n" @@ -13273,7 +13273,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n" "# Can we do 4 blocks at a time? \n" "CMP w11, #64 \n" - "BLT 10f \n" + "B.LT 10f \n" "# First decrypt - no GHASH \n" "# Calculate next 4 counters (+1-4) \n" @@ -13387,7 +13387,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "# Store cipher text \n" "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BLT 12f \n" + "B.LT 12f \n" "11: \n" "# Calculate next 4 counters (+1-4) \n" @@ -13552,7 +13552,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "# Store cipher text \n" "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BGE 11b \n" + "B.GE 11b \n" "12: \n" "# GHASH - 4 blocks \n" @@ -13610,7 +13610,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "10: \n" "CBZ w11, 30f \n" "CMP w11, #16 \n" - "BLT 20f \n" + "B.LT 20f \n" "# Decrypt first block for GHASH \n" "ADD w12, w12, #1 \n" "MOV v0.16b, v22.16b \n" @@ -13643,7 +13643,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "# When only one full block to decrypt go straight to GHASH \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "# Interweave GHASH and decrypt if more then 1 block \n" "2: \n" @@ -13692,7 +13692,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "EOR v0.16b, v0.16b, v28.16b \n \n" "ST1 {v0.2d}, [%[out]], #16 \n" "CMP w11, #16 \n" - "BGE 2b \n" + "B.GE 2b \n" "# GHASH on last block \n" "1: \n" @@ -13827,7 +13827,7 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "RBIT v17.16b, v17.16b \n" "EOR v0.16b, v0.16b, v17.16b \n \n" "CMP %w[tagSz], #16 \n" - "BNE 40f \n" + "B.NE 40f \n" "LD1 {v1.2d}, [%[tag]] \n" "B 41f \n" "40: \n" @@ -13911,7 +13911,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "# GHASH AAD \n" "CMP x12, #64 \n" - "BLT 115f \n" + "B.LT 115f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v19.1q, v16.2d, v16.2d \n" @@ -13997,11 +13997,11 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "MOV v17.D[1], v15.D[0] \n" "EOR v17.16b, v17.16b, v14.16b \n" "CMP x12, #64 \n" - "BGE 114b \n" + "B.GE 114b \n" "CBZ x12, 120f \n" "115: \n" "CMP x12, #16 \n" - "BLT 112f \n" + "B.LT 112f \n" "111: \n" "LD1 {v15.2d}, [%[aad]], #16 \n" "SUB x12, x12, #16 \n" @@ -14022,7 +14022,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "MOV v18.D[1], v21.D[0] \n" "EOR v17.16b, v18.16b, v20.16b \n" "CMP x12, #16 \n" - "BGE 111b \n" + "B.GE 111b \n" "CBZ x12, 120f \n" "112: \n" "# Partial AAD \n" @@ -14059,9 +14059,9 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "MOV w11, %w[sz] \n" "REV w12, w12 \n" "CMP w11, #64 \n" - "BLT 80f \n" + "B.LT 80f \n" "CMP %w[aSz], #64 \n" - "BGE 82f \n" + "B.GE 82f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" @@ -14100,7 +14100,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "82: \n" "# Should we do 8 blocks at a time? \n" "CMP w11, #512 \n" - "BLT 80f \n" + "B.LT 80f \n" "# Calculate H^[5-8] - GMULT partials \n" "# Multiply H and H^4 => H^5 \n" @@ -14686,7 +14686,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #128 \n" - "BGE 81b \n" + "B.GE 81b \n" "# GHASH - 8 blocks \n" "RBIT v12.16b, v12.16b \n" @@ -14783,7 +14783,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n" "# Can we do 4 blocks at a time? \n" "CMP w11, #64 \n" - "BLT 10f \n" + "B.LT 10f \n" "# First decrypt - no GHASH \n" "# Calculate next 4 counters (+1-4) \n" @@ -14897,7 +14897,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "# Store cipher text \n" "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BLT 12f \n" + "B.LT 12f \n" "11: \n" "# Calculate next 4 counters (+1-4) \n" @@ -15058,7 +15058,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "# Store cipher text \n" "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BGE 11b \n" + "B.GE 11b \n" "12: \n" "# GHASH - 4 blocks \n" @@ -15112,7 +15112,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "10: \n" "CBZ w11, 30f \n" "CMP w11, #16 \n" - "BLT 20f \n" + "B.LT 20f \n" "# Decrypt first block for GHASH \n" "ADD w12, w12, #1 \n" "MOV v0.16b, v22.16b \n" @@ -15145,7 +15145,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "# When only one full block to decrypt go straight to GHASH \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "# Interweave GHASH and decrypt if more then 1 block \n" "2: \n" @@ -15194,7 +15194,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "EOR v0.16b, v0.16b, v28.16b \n \n" "ST1 {v0.2d}, [%[out]], #16 \n" "CMP w11, #16 \n" - "BGE 2b \n" + "B.GE 2b \n" "# GHASH on last block \n" "1: \n" @@ -15329,7 +15329,7 @@ static int Aes128GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "RBIT v17.16b, v17.16b \n" "EOR v0.16b, v0.16b, v17.16b \n \n" "CMP %w[tagSz], #16 \n" - "BNE 40f \n" + "B.NE 40f \n" "LD1 {v1.2d}, [%[tag]] \n" "B 41f \n" "40: \n" @@ -15413,7 +15413,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "# GHASH AAD \n" "CMP x12, #64 \n" - "BLT 115f \n" + "B.LT 115f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v19.1q, v16.2d, v16.2d \n" @@ -15503,11 +15503,11 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "MOV v17.D[1], v15.D[0] \n" "EOR v17.16b, v17.16b, v14.16b \n" "CMP x12, #64 \n" - "BGE 114b \n" + "B.GE 114b \n" "CBZ x12, 120f \n" "115: \n" "CMP x12, #16 \n" - "BLT 112f \n" + "B.LT 112f \n" "111: \n" "LD1 {v15.2d}, [%[aad]], #16 \n" "SUB x12, x12, #16 \n" @@ -15528,7 +15528,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "MOV v18.D[1], v21.D[0] \n" "EOR v17.16b, v18.16b, v20.16b \n" "CMP x12, #16 \n" - "BGE 111b \n" + "B.GE 111b \n" "CBZ x12, 120f \n" "112: \n" "# Partial AAD \n" @@ -15565,9 +15565,9 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "MOV w11, %w[sz] \n" "REV w12, w12 \n" "CMP w11, #64 \n" - "BLT 80f \n" + "B.LT 80f \n" "CMP %w[aSz], #64 \n" - "BGE 82f \n" + "B.GE 82f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" @@ -15606,7 +15606,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "82: \n" "# Should we do 8 blocks at a time? \n" "CMP w11, #512 \n" - "BLT 80f \n" + "B.LT 80f \n" "# Calculate H^[5-8] - GMULT partials \n" "# Multiply H and H^4 => H^5 \n" @@ -16268,7 +16268,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #128 \n" - "BGE 81b \n" + "B.GE 81b \n" "# GHASH - 8 blocks \n" "RBIT v12.16b, v12.16b \n" @@ -16374,7 +16374,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n" "# Can we do 4 blocks at a time? \n" "CMP w11, #64 \n" - "BLT 10f \n" + "B.LT 10f \n" "# First decrypt - no GHASH \n" "# Calculate next 4 counters (+1-4) \n" @@ -16504,7 +16504,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "# Store cipher text \n" "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BLT 12f \n" + "B.LT 12f \n" "11: \n" "# Calculate next 4 counters (+1-4) \n" @@ -16685,7 +16685,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "# Store cipher text \n" "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BGE 11b \n" + "B.GE 11b \n" "12: \n" "# GHASH - 4 blocks \n" @@ -16743,7 +16743,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "10: \n" "CBZ w11, 30f \n" "CMP w11, #16 \n" - "BLT 20f \n" + "B.LT 20f \n" "# Decrypt first block for GHASH \n" "ADD w12, w12, #1 \n" "MOV v0.16b, v22.16b \n" @@ -16780,7 +16780,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "# When only one full block to decrypt go straight to GHASH \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "# Interweave GHASH and decrypt if more then 1 block \n" "2: \n" @@ -16833,7 +16833,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "EOR v0.16b, v0.16b, v28.16b \n \n" "ST1 {v0.2d}, [%[out]], #16 \n" "CMP w11, #16 \n" - "BGE 2b \n" + "B.GE 2b \n" "# GHASH on last block \n" "1: \n" @@ -16976,7 +16976,7 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "RBIT v17.16b, v17.16b \n" "EOR v0.16b, v0.16b, v17.16b \n \n" "CMP %w[tagSz], #16 \n" - "BNE 40f \n" + "B.NE 40f \n" "LD1 {v1.2d}, [%[tag]] \n" "B 41f \n" "40: \n" @@ -17060,7 +17060,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "# GHASH AAD \n" "CMP x12, #64 \n" - "BLT 115f \n" + "B.LT 115f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v19.1q, v16.2d, v16.2d \n" @@ -17146,11 +17146,11 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "MOV v17.D[1], v15.D[0] \n" "EOR v17.16b, v17.16b, v14.16b \n" "CMP x12, #64 \n" - "BGE 114b \n" + "B.GE 114b \n" "CBZ x12, 120f \n" "115: \n" "CMP x12, #16 \n" - "BLT 112f \n" + "B.LT 112f \n" "111: \n" "LD1 {v15.2d}, [%[aad]], #16 \n" "SUB x12, x12, #16 \n" @@ -17171,7 +17171,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "MOV v18.D[1], v21.D[0] \n" "EOR v17.16b, v18.16b, v20.16b \n" "CMP x12, #16 \n" - "BGE 111b \n" + "B.GE 111b \n" "CBZ x12, 120f \n" "112: \n" "# Partial AAD \n" @@ -17208,9 +17208,9 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "MOV w11, %w[sz] \n" "REV w12, w12 \n" "CMP w11, #64 \n" - "BLT 80f \n" + "B.LT 80f \n" "CMP %w[aSz], #64 \n" - "BGE 82f \n" + "B.GE 82f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" @@ -17249,7 +17249,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "82: \n" "# Should we do 8 blocks at a time? \n" "CMP w11, #512 \n" - "BLT 80f \n" + "B.LT 80f \n" "# Calculate H^[5-8] - GMULT partials \n" "# Multiply H and H^4 => H^5 \n" @@ -17903,7 +17903,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #128 \n" - "BGE 81b \n" + "B.GE 81b \n" "# GHASH - 8 blocks \n" "RBIT v12.16b, v12.16b \n" @@ -18001,7 +18001,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "LD1 {v12.2d-v13.2d}, [%[Key]], #32 \n" "# Can we do 4 blocks at a time? \n" "CMP w11, #64 \n" - "BLT 10f \n" + "B.LT 10f \n" "# First decrypt - no GHASH \n" "# Calculate next 4 counters (+1-4) \n" @@ -18131,7 +18131,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "# Store cipher text \n" "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BLT 12f \n" + "B.LT 12f \n" "11: \n" "# Calculate next 4 counters (+1-4) \n" @@ -18308,7 +18308,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "# Store cipher text \n" "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BGE 11b \n" + "B.GE 11b \n" "12: \n" "# GHASH - 4 blocks \n" @@ -18362,7 +18362,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "10: \n" "CBZ w11, 30f \n" "CMP w11, #16 \n" - "BLT 20f \n" + "B.LT 20f \n" "# Decrypt first block for GHASH \n" "ADD w12, w12, #1 \n" "MOV v0.16b, v22.16b \n" @@ -18399,7 +18399,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "# When only one full block to decrypt go straight to GHASH \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "# Interweave GHASH and decrypt if more then 1 block \n" "2: \n" @@ -18452,7 +18452,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "EOR v0.16b, v0.16b, v28.16b \n \n" "ST1 {v0.2d}, [%[out]], #16 \n" "CMP w11, #16 \n" - "BGE 2b \n" + "B.GE 2b \n" "# GHASH on last block \n" "1: \n" @@ -18595,7 +18595,7 @@ static int Aes192GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "RBIT v17.16b, v17.16b \n" "EOR v0.16b, v0.16b, v17.16b \n \n" "CMP %w[tagSz], #16 \n" - "BNE 40f \n" + "B.NE 40f \n" "LD1 {v1.2d}, [%[tag]] \n" "B 41f \n" "40: \n" @@ -18679,7 +18679,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "# GHASH AAD \n" "CMP x12, #64 \n" - "BLT 115f \n" + "B.LT 115f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v19.1q, v16.2d, v16.2d \n" @@ -18769,11 +18769,11 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "MOV v17.D[1], v15.D[0] \n" "EOR v17.16b, v17.16b, v14.16b \n" "CMP x12, #64 \n" - "BGE 114b \n" + "B.GE 114b \n" "CBZ x12, 120f \n" "115: \n" "CMP x12, #16 \n" - "BLT 112f \n" + "B.LT 112f \n" "111: \n" "LD1 {v15.2d}, [%[aad]], #16 \n" "SUB x12, x12, #16 \n" @@ -18794,7 +18794,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "MOV v18.D[1], v21.D[0] \n" "EOR v17.16b, v18.16b, v20.16b \n" "CMP x12, #16 \n" - "BGE 111b \n" + "B.GE 111b \n" "CBZ x12, 120f \n" "112: \n" "# Partial AAD \n" @@ -18831,9 +18831,9 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "MOV w11, %w[sz] \n" "REV w12, w12 \n" "CMP w11, #64 \n" - "BLT 80f \n" + "B.LT 80f \n" "CMP %w[aSz], #64 \n" - "BGE 82f \n" + "B.GE 82f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" @@ -18872,7 +18872,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "82: \n" "# Should we do 8 blocks at a time? \n" "CMP w11, #512 \n" - "BLT 80f \n" + "B.LT 80f \n" "# Calculate H^[5-8] - GMULT partials \n" "# Multiply H and H^4 => H^5 \n" @@ -19602,7 +19602,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #128 \n" - "BGE 81b \n" + "B.GE 81b \n" "# GHASH - 8 blocks \n" "RBIT v12.16b, v12.16b \n" @@ -19709,7 +19709,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "LD1 {v14.2d-v15.2d}, [%[Key]] \n" "# Can we do 4 blocks at a time? \n" "CMP w11, #64 \n" - "BLT 10f \n" + "B.LT 10f \n" "# First decrypt - no GHASH \n" "# Calculate next 4 counters (+1-4) \n" @@ -19856,7 +19856,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "# Store cipher text \n" "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BLT 12f \n" + "B.LT 12f \n" "11: \n" "# Calculate next 4 counters (+1-4) \n" @@ -20054,7 +20054,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "# Store cipher text \n" "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BGE 11b \n" + "B.GE 11b \n" "12: \n" "# GHASH - 4 blocks \n" @@ -20113,7 +20113,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "10: \n" "CBZ w11, 30f \n" "CMP w11, #16 \n" - "BLT 20f \n" + "B.LT 20f \n" "LD1 {v14.2d, v15.2d}, [%[Key]] \n" "# Decrypt first block for GHASH \n" "ADD w12, w12, #1 \n" @@ -20155,7 +20155,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "# When only one full block to decrypt go straight to GHASH \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "# Interweave GHASH and decrypt if more then 1 block \n" "2: \n" @@ -20212,7 +20212,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "EOR v0.16b, v0.16b, v28.16b \n \n" "ST1 {v0.2d}, [%[out]], #16 \n" "CMP w11, #16 \n" - "BGE 2b \n" + "B.GE 2b \n" "# GHASH on last block \n" "1: \n" @@ -20363,7 +20363,7 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, "RBIT v17.16b, v17.16b \n" "EOR v0.16b, v0.16b, v17.16b \n \n" "CMP %w[tagSz], #16 \n" - "BNE 40f \n" + "B.NE 40f \n" "LD1 {v1.2d}, [%[tag]] \n" "B 41f \n" "40: \n" @@ -20447,7 +20447,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "# GHASH AAD \n" "CMP x12, #64 \n" - "BLT 115f \n" + "B.LT 115f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" "PMULL2 v19.1q, v16.2d, v16.2d \n" @@ -20533,11 +20533,11 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "MOV v17.D[1], v15.D[0] \n" "EOR v17.16b, v17.16b, v14.16b \n" "CMP x12, #64 \n" - "BGE 114b \n" + "B.GE 114b \n" "CBZ x12, 120f \n" "115: \n" "CMP x12, #16 \n" - "BLT 112f \n" + "B.LT 112f \n" "111: \n" "LD1 {v15.2d}, [%[aad]], #16 \n" "SUB x12, x12, #16 \n" @@ -20558,7 +20558,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "MOV v18.D[1], v21.D[0] \n" "EOR v17.16b, v18.16b, v20.16b \n" "CMP x12, #16 \n" - "BGE 111b \n" + "B.GE 111b \n" "CBZ x12, 120f \n" "112: \n" "# Partial AAD \n" @@ -20595,9 +20595,9 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "MOV w11, %w[sz] \n" "REV w12, w12 \n" "CMP w11, #64 \n" - "BLT 80f \n" + "B.LT 80f \n" "CMP %w[aSz], #64 \n" - "BGE 82f \n" + "B.GE 82f \n" "# Calculate H^[1-4] - GMULT partials \n" "# Square H => H^2 \n" @@ -20636,7 +20636,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "82: \n" "# Should we do 8 blocks at a time? \n" "CMP w11, #512 \n" - "BLT 80f \n" + "B.LT 80f \n" "# Calculate H^[5-8] - GMULT partials \n" "# Multiply H and H^4 => H^5 \n" @@ -21358,7 +21358,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #128 \n" - "BGE 81b \n" + "B.GE 81b \n" "# GHASH - 8 blocks \n" "RBIT v12.16b, v12.16b \n" @@ -21457,7 +21457,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "LD1 {v14.2d-v15.2d}, [%[Key]] \n" "# Can we do 4 blocks at a time? \n" "CMP w11, #64 \n" - "BLT 10f \n" + "B.LT 10f \n" "# First decrypt - no GHASH \n" "# Calculate next 4 counters (+1-4) \n" @@ -21604,7 +21604,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "# Store cipher text \n" "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BLT 12f \n" + "B.LT 12f \n" "11: \n" "# Calculate next 4 counters (+1-4) \n" @@ -21798,7 +21798,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "# Store cipher text \n" "ST1 {v27.2d-v30.2d}, [%[out]], #64 \n \n" "CMP w11, #64 \n" - "BGE 11b \n" + "B.GE 11b \n" "12: \n" "# GHASH - 4 blocks \n" @@ -21853,7 +21853,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "10: \n" "CBZ w11, 30f \n" "CMP w11, #16 \n" - "BLT 20f \n" + "B.LT 20f \n" "LD1 {v14.2d, v15.2d}, [%[Key]] \n" "# Decrypt first block for GHASH \n" "ADD w12, w12, #1 \n" @@ -21895,7 +21895,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "# When only one full block to decrypt go straight to GHASH \n" "CMP w11, 16 \n" - "BLT 1f \n" + "B.LT 1f \n" "# Interweave GHASH and decrypt if more then 1 block \n" "2: \n" @@ -21952,7 +21952,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "EOR v0.16b, v0.16b, v28.16b \n \n" "ST1 {v0.2d}, [%[out]], #16 \n" "CMP w11, #16 \n" - "BGE 2b \n" + "B.GE 2b \n" "# GHASH on last block \n" "1: \n" @@ -22103,7 +22103,7 @@ static int Aes256GcmDecrypt_EOR3(Aes* aes, byte* out, const byte* in, word32 sz, "RBIT v17.16b, v17.16b \n" "EOR v0.16b, v0.16b, v17.16b \n \n" "CMP %w[tagSz], #16 \n" - "BNE 40f \n" + "B.NE 40f \n" "LD1 {v1.2d}, [%[tag]] \n" "B 41f \n" "40: \n" @@ -24846,14 +24846,14 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) "AESMC v0.16b, v0.16b \n" \ \ "SUBS WZR, %w[rounds], #10 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESE v0.16b, v10.16b \n" \ "AESMC v0.16b, v0.16b \n" \ "AESE v0.16b, v11.16b \n" \ "AESMC v0.16b, v0.16b \n" \ \ "SUBS WZR, %w[rounds], #12 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESE v0.16b, v12.16b \n" \ "AESMC v0.16b, v0.16b \n" \ "AESE v0.16b, v13.16b \n" \ @@ -24886,14 +24886,14 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) "AESMC v0.16b, v0.16b \n" \ \ "SUBS WZR, %w[rounds], #10 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESE v0.16b, v10.16b \n" \ "AESMC v0.16b, v0.16b \n" \ "AESE v0.16b, v11.16b \n" \ "AESMC v0.16b, v0.16b \n" \ \ "SUBS WZR, %w[rounds], #12 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESE v0.16b, v12.16b \n" \ "AESMC v0.16b, v0.16b \n" \ "AESE v0.16b, v13.16b \n" \ @@ -24929,14 +24929,14 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) "AESIMC v0.16b, v0.16b \n" \ \ "SUBS WZR, %w[rounds], #10 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESD v0.16b, v10.16b \n" \ "AESIMC v0.16b, v0.16b \n" \ "AESD v0.16b, v11.16b \n" \ "AESIMC v0.16b, v0.16b \n" \ \ "SUBS WZR, %w[rounds], #12 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESD v0.16b, v12.16b \n" \ "AESIMC v0.16b, v0.16b \n" \ "AESD v0.16b, v13.16b \n" \ @@ -24969,14 +24969,14 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) "AESIMC v0.16b, v0.16b \n" \ \ "SUBS WZR, %w[rounds], #10 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESD v0.16b, v10.16b \n" \ "AESIMC v0.16b, v0.16b \n" \ "AESD v0.16b, v11.16b \n" \ "AESIMC v0.16b, v0.16b \n" \ \ "SUBS WZR, %w[rounds], #12 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESD v0.16b, v12.16b \n" \ "AESIMC v0.16b, v0.16b \n" \ "AESD v0.16b, v13.16b \n" \ @@ -25257,7 +25257,7 @@ void AES_XTS_encrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, "SUBS %w[blocks], %w[blocks], #1 \n" "SUB %w[sz], %w[sz], #16 \n" - "BGT 1b \n" + "B.GT 1b \n" "CBZ %w[sz], 3f \n" @@ -25274,7 +25274,7 @@ void AES_XTS_encrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, "STRB w13, [%[out]], #1 \n" "STRB w14, [%[tmp]], #1 \n" "SUBS w12, w12, #1 \n" - "BGT 4b \n" + "B.GT 4b \n" "SUB %[out], %[out], %x[sz] \n" "SUB %[tmp], %[tmp], %x[sz] \n" @@ -25576,7 +25576,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, "SUBS %w[blocks], %w[blocks], #1 \n" "SUB %w[sz], %w[sz], #16 \n" - "BGT 1b \n" + "B.GT 1b \n" "CBZ %w[sz], 4f \n" @@ -25607,7 +25607,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, "STRB w13, [%[out]], #1 \n" "STRB w14, [%[tmp]], #1 \n" "SUBS w12, w12, #1 \n" - "BGT 6b \n" + "B.GT 6b \n" "SUB %[out], %[out], %x[sz] \n" "SUB %[tmp], %[tmp], %x[sz] \n" "SUB %[out], %[out], #16 \n" @@ -25671,7 +25671,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ \ "CMP %[rounds], #10 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESE.8 q0, q10 \n" \ "AESMC.8 q0, q0 \n" \ "AESE.8 q0, q11 \n" \ @@ -25679,7 +25679,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ \ "CMP %[rounds], #12 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESE.8 q0, q10 \n" \ "AESMC.8 q0, q0 \n" \ "AESE.8 q0, q11 \n" \ @@ -25714,7 +25714,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ \ "CMP %[rounds], #10 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESE.8 q0, q10 \n" \ "AESMC.8 q0, q0 \n" \ "AESE.8 q0, q11 \n" \ @@ -25722,7 +25722,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ \ "CMP %[rounds], #12 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESE.8 q0, q10 \n" \ "AESMC.8 q0, q0 \n" \ "AESE.8 q0, q11 \n" \ @@ -25765,7 +25765,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ \ "CMP %[rounds], #10 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESD.8 q0, q10 \n" \ "AESIMC.8 q0, q0 \n" \ "AESD.8 q0, q11 \n" \ @@ -25773,7 +25773,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ \ "CMP %[rounds], #12 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESD.8 q0, q10 \n" \ "AESIMC.8 q0, q0 \n" \ "AESD.8 q0, q11 \n" \ @@ -25808,7 +25808,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ \ "CMP %[rounds], #10 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESD.8 q0, q10 \n" \ "AESIMC.8 q0, q0 \n" \ "AESD.8 q0, q11 \n" \ @@ -25816,7 +25816,7 @@ void AES_XTS_decrypt_AARCH64(XtsAes* xaes, byte* out, const byte* in, word32 sz, "VLD1.32 {d20, d21, d22, d23}, [%[key2]]! \n" \ \ "CMP %[rounds], #12 \n" \ - "BLE " #label "f \n" \ + "B.LE " #label "f \n" \ "AESD.8 q0, q10 \n" \ "AESIMC.8 q0, q0 \n" \ "AESD.8 q0, q11 \n" \ @@ -25911,7 +25911,7 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, "SUBS %[blocks], %[blocks], #1 \n" "SUB %[sz], %[sz], #16 \n" - "BGT 1b \n" + "B.GT 1b \n" "CMP %[sz], #0 \n" "B.EQ 3f \n" @@ -25929,7 +25929,7 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, "STRB r10, [%[out]], #1 \n" "STRB r11, [%[tmp]], #1 \n" "SUBS r9, r9, #1 \n" - "BGT 4b \n" + "B.GT 4b \n" "SUB %[out], %[out], %[sz] \n" "SUB %[tmp], %[tmp], %[sz] \n" @@ -26047,7 +26047,7 @@ int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, "SUBS %[blocks], %[blocks], #1 \n" "SUB %[sz], %[sz], #16 \n" - "BGT 1b \n" + "B.GT 1b \n" "CMP %[sz], #0 \n" "B.EQ 4f \n" @@ -26087,7 +26087,7 @@ int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, "STRB r10, [%[out]], #1 \n" "STRB r11, [%[tmp]], #1 \n" "SUBS r9, r9, #1 \n" - "BGT 6b \n" + "B.GT 6b \n" "SUB %[out], %[out], %[sz] \n" "SUB %[tmp], %[tmp], %[sz] \n" "SUB %[out], %[out], #16 \n" diff --git a/wolfcrypt/src/port/arm/armv8-chacha.c b/wolfcrypt/src/port/arm/armv8-chacha.c index 019b1ee5e..283aba717 100644 --- a/wolfcrypt/src/port/arm/armv8-chacha.c +++ b/wolfcrypt/src/port/arm/armv8-chacha.c @@ -556,7 +556,7 @@ static WC_INLINE void wc_Chacha_encrypt_320(const word32* input, const byte* m, "SRI v6.4s, v17.4s, #25 \n\t" "SRI v7.4s, v18.4s, #25 \n\t" "SRI v4.4s, v19.4s, #25 \n\t" - "BNE L_chacha20_arm64_inner_%= \n\t" + "B.NE L_chacha20_arm64_inner_%= \n\t" /* Add counter now rather than after transposed */ "ADD v12.4s, v12.4s, v28.4s \n\t" "ADD w16, w16, w21 \n\t" @@ -666,7 +666,7 @@ static WC_INLINE void wc_Chacha_encrypt_320(const word32* input, const byte* m, "ST1 {v16.4s-v19.4s}, [%[c]], #64 \n\t" "SUBS %[bytes], %[bytes], #320 \n\t" "ADD v28.4s, v28.4s, v29.4s \n\t" - "BNE L_chacha20_arm64_outer_%= \n\t" + "B.NE L_chacha20_arm64_outer_%= \n\t" : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), [bytes] "+r" (bytes64) : [L_chacha20_neon_add_all_cntrs] "r" (L_chacha20_neon_add_all_cntrs), @@ -959,7 +959,7 @@ static WC_INLINE int wc_Chacha_encrypt_256( "EXT v9.16B, v9.16B, v9.16B, #12 \n\t" "EXT v10.16B, v10.16B, v10.16B, #8 \n\t" "EXT v11.16B, v11.16B, v11.16B, #4 \n\t" - "BNE L_chacha20_arm64_256_loop_%= \n\t" + "B.NE L_chacha20_arm64_256_loop_%= \n\t" /* Load message */ "LD1 {v16.4S-v19.4S}, [%[m]], #64 \n\t" /* Add one (2 added during calculating vector results) */ @@ -1364,7 +1364,7 @@ static WC_INLINE int wc_Chacha_encrypt_256( "ROR r4, r4, #25 \n\t" // 4 4 "VEXT.8 q11, q11, q11, #4 \n\t" // permute elements left by one - "BNE L_chacha20_arm32_256_loop_%= \n\t" + "B.NE L_chacha20_arm32_256_loop_%= \n\t" // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 // 0 1 2 3 4 5 6 7 8 9 12 13 14 @@ -1583,7 +1583,7 @@ static WC_INLINE int wc_Chacha_encrypt_128( "EXT v5.16B, v5.16B, v5.16B, #12 \n\t" "EXT v6.16B, v6.16B, v6.16B, #8 \n\t" "EXT v7.16B, v7.16B, v7.16B, #4 \n\t" - "BNE L_chacha20_arm64_128_loop_%= \n\t" + "B.NE L_chacha20_arm64_128_loop_%= \n\t" /* Add back state, XOR in message and store (load next block) */ "ADD v0.4S, v0.4S, v18.4S \n\t" "ADD v1.4S, v1.4S, v19.4S \n\t" @@ -1736,7 +1736,7 @@ static WC_INLINE int wc_Chacha_encrypt_128( "VEXT.8 q6, q6, q6, #8 \n\t" // permute elements left by two "VEXT.8 q7, q7, q7, #4 \n\t" // permute elements left by one - "BNE L_chacha20_arm32_128_loop_%= \n\t" + "B.NE L_chacha20_arm32_128_loop_%= \n\t" "VMOV.I32 q8, #0 \n\t" "VADD.I32 q0, q0, q10 \n\t" @@ -2251,7 +2251,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, "ADD v2.4S, v2.4S, v10.4S \n\t" "ADD v3.4S, v3.4S, v11.4S \n\t" "CMP %[bytes], #64 \n\t" - "BLT L_chacha20_arm64_64_lt_64_%= \n\t" + "B.LT L_chacha20_arm64_64_lt_64_%= \n\t" "LD1 {v4.4S-v7.4S}, [%[m]], #64 \n\t" "EOR v4.16B, v4.16B, v0.16B \n\t" "EOR v5.16B, v5.16B, v1.16B \n\t" @@ -2260,13 +2260,13 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, "ST1 {v4.4S-v7.4S}, [%[c]], #64 \n\t" "SUBS %[bytes], %[bytes], #64 \n\t" "ADD v11.4S, v11.4S, v14.4S \n\t" - "BNE L_chacha20_arm64_64_loop_%= \n\t" + "B.NE L_chacha20_arm64_64_loop_%= \n\t" "B L_chacha20_arm64_64_done_%= \n\t" "\n" "L_chacha20_arm64_64_lt_64_%=: \n\t" "ST1 {v0.4s-v3.4s}, [%[over]]\n\t" "CMP %[bytes], #32 \n\t" - "BLT L_chacha20_arm64_64_lt_32_%= \n\t" + "B.LT L_chacha20_arm64_64_lt_32_%= \n\t" "LD1 {v4.4S, v5.4S}, [%[m]], #32 \n\t" "EOR v4.16B, v4.16B, v0.16B \n\t" "EOR v5.16B, v5.16B, v1.16B \n\t" @@ -2274,27 +2274,27 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, "SUBS %[bytes], %[bytes], #32 \n\t" "MOV v0.16B, v2.16B \n\t" "MOV v1.16B, v3.16B \n\t" - "BEQ L_chacha20_arm64_64_done_%= \n\t" + "B.EQ L_chacha20_arm64_64_done_%= \n\t" "\n" "L_chacha20_arm64_64_lt_32_%=: \n\t" "CMP %[bytes], #16 \n\t" - "BLT L_chacha20_arm64_64_lt_16_%= \n\t" + "B.LT L_chacha20_arm64_64_lt_16_%= \n\t" "LD1 {v4.4S}, [%[m]], #16 \n\t" "EOR v4.16B, v4.16B, v0.16B \n\t" "ST1 {v4.4S}, [%[c]], #16 \n\t" "SUBS %[bytes], %[bytes], #16 \n\t" "MOV v0.16B, v1.16B \n\t" - "BEQ L_chacha20_arm64_64_done_%= \n\t" + "B.EQ L_chacha20_arm64_64_done_%= \n\t" "\n" "L_chacha20_arm64_64_lt_16_%=: \n\t" "CMP %[bytes], #8 \n\t" - "BLT L_chacha20_arm64_64_lt_8_%= \n\t" + "B.LT L_chacha20_arm64_64_lt_8_%= \n\t" "LD1 {v4.2S}, [%[m]], #8 \n\t" "EOR v4.8B, v4.8B, v0.8B \n\t" "ST1 {v4.2S}, [%[c]], #8 \n\t" "SUBS %[bytes], %[bytes], #8 \n\t" "MOV v0.D[0], v0.D[1] \n\t" - "BEQ L_chacha20_arm64_64_done_%= \n\t" + "B.EQ L_chacha20_arm64_64_done_%= \n\t" "\n" "L_chacha20_arm64_64_lt_8_%=: \n\t" "MOV x4, v0.D[0] \n\t" @@ -2305,7 +2305,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, "STRB w6, [%[c]], #1 \n\t" "SUBS %[bytes], %[bytes], #1 \n\t" "LSR x4, x4, #8 \n\t" - "BGT L_chacha20_arm64_64_loop_lt_8_%= \n\t" + "B.GT L_chacha20_arm64_64_loop_lt_8_%= \n\t" "\n" "L_chacha20_arm64_64_done_%=: \n\t" : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), @@ -2816,7 +2816,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, "VADD.I32 q2, q2, q10 \n\t" "VADD.I32 q3, q3, q11 \n\t" "CMP %[bytes], #64 \n\t" - "BLT L_chacha20_arm32_64_lt_64_%= \n\t" + "B.LT L_chacha20_arm32_64_lt_64_%= \n\t" /* XOR full 64 byte block */ "VLD1.8 { q4, q5 }, [%[m]]! \n\t" "VLD1.8 { q6, q7 }, [%[m]]! \n\t" @@ -2828,14 +2828,14 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, "VST1.8 { q2, q3 }, [%[c]]! \n\t" "SUBS %[bytes], %[bytes], #64 \n\t" "VADD.I32 q11, q11, q14 \n\t" - "BNE L_chacha20_arm32_64_outer_loop_%= \n\t" + "B.NE L_chacha20_arm32_64_outer_loop_%= \n\t" "B L_chacha20_arm32_64_done_%= \n\t" "\n" "L_chacha20_arm32_64_lt_64_%=: \n\t" "VSTM %[over], {q0-q3} \n\t" /* XOR 32 bytes */ "CMP %[bytes], #32 \n\t" - "BLT L_chacha20_arm32_64_lt_32_%= \n\t" + "B.LT L_chacha20_arm32_64_lt_32_%= \n\t" "VLD1.8 { q4, q5 }, [%[m]]! \n\t" "VEOR q4, q4, q0 \n\t" "VEOR q5, q5, q1 \n\t" @@ -2843,41 +2843,41 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, "SUBS %[bytes], %[bytes], #32 \n\t" "VMOV q0, q2 \n\t" "VMOV q1, q3 \n\t" - "BEQ L_chacha20_arm32_64_done_%= \n\t" + "B.EQ L_chacha20_arm32_64_done_%= \n\t" "\n" "L_chacha20_arm32_64_lt_32_%=: \n\t" /* XOR 16 bytes */ "CMP %[bytes], #16 \n\t" - "BLT L_chacha20_arm32_64_lt_16_%= \n\t" + "B.LT L_chacha20_arm32_64_lt_16_%= \n\t" "VLD1.8 { q4 }, [%[m]]! \n\t" "VEOR q4, q4, q0 \n\t" "VST1.8 { q4 }, [%[c]]! \n\t" "SUBS %[bytes], %[bytes], #16 \n\t" "VMOV q0, q1 \n\t" - "BEQ L_chacha20_arm32_64_done_%= \n\t" + "B.EQ L_chacha20_arm32_64_done_%= \n\t" "\n" "L_chacha20_arm32_64_lt_16_%=: \n\t" /* XOR 8 bytes */ "CMP %[bytes], #8 \n\t" - "BLT L_chacha20_arm32_64_lt_8_%= \n\t" + "B.LT L_chacha20_arm32_64_lt_8_%= \n\t" "VLD1.8 { d8 }, [%[m]]! \n\t" "VEOR d8, d8, d0 \n\t" "VST1.8 { d8 }, [%[c]]! \n\t" "SUBS %[bytes], %[bytes], #8 \n\t" "VMOV d0, d1 \n\t" - "BEQ L_chacha20_arm32_64_done_%= \n\t" + "B.EQ L_chacha20_arm32_64_done_%= \n\t" "\n" "L_chacha20_arm32_64_lt_8_%=: \n\t" /* XOR 4 bytes */ "CMP %[bytes], #4 \n\t" - "BLT L_chacha20_arm32_64_lt_4_%= \n\t" + "B.LT L_chacha20_arm32_64_lt_4_%= \n\t" "LDR r12, [%[m]], #4 \n\t" "VMOV r14, d0[0] \n\t" "EOR r12, r12, r14 \n\t" "STR r12, [%[c]], #4 \n\t" "SUBS %[bytes], %[bytes], #4 \n\t" "VSHR.U64 d0, d0, #32 \n\t" - "BEQ L_chacha20_arm32_64_done_%= \n\t" + "B.EQ L_chacha20_arm32_64_done_%= \n\t" "\n" "L_chacha20_arm32_64_lt_4_%=: \n\t" /* XOR remaining bytes */ @@ -2889,7 +2889,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, "STRB r12, [%[c]], #1 \n\t" "SUBS %[bytes], %[bytes], #1 \n\t" "LSR r14, r14, #8 \n\t" - "BGT L_chacha20_arm32_64_lt_4_loop_%= \n\t" + "B.GT L_chacha20_arm32_64_lt_4_loop_%= \n\t" "\n" "L_chacha20_arm32_64_done_%=: \n\t" : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), [bytes] "+r" (bytes) diff --git a/wolfcrypt/src/port/arm/armv8-mlkem-asm.S b/wolfcrypt/src/port/arm/armv8-mlkem-asm.S index cd37d6eda..9a3e1f1ba 100644 --- a/wolfcrypt/src/port/arm/armv8-mlkem-asm.S +++ b/wolfcrypt/src/port/arm/armv8-mlkem-asm.S @@ -29,21 +29,6 @@ #ifdef WOLFSSL_ARMASM #ifdef __aarch64__ #ifndef WOLFSSL_ARMASM_INLINE -#ifndef __APPLE__ - .text - .type L_mlkem_aarch64_q, %object - .section .rodata - .size L_mlkem_aarch64_q, 16 -#else - .section __DATA,__data -#endif /* __APPLE__ */ -#ifndef __APPLE__ - .align 2 -#else - .p2align 2 -#endif /* __APPLE__ */ -L_mlkem_aarch64_q: - .short 0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01 #ifndef __APPLE__ .text .type L_mlkem_aarch64_consts, %object @@ -59,44 +44,6 @@ L_mlkem_aarch64_q: #endif /* __APPLE__ */ L_mlkem_aarch64_consts: .short 0x0d01,0xf301,0x4ebf,0x0549,0x5049,0x0000,0x0000,0x0000 -#ifndef __APPLE__ - .text - .type L_sha3_aarch64_r, %object - .section .rodata - .size L_sha3_aarch64_r, 192 -#else - .section __DATA,__data -#endif /* __APPLE__ */ -#ifndef __APPLE__ - .align 3 -#else - .p2align 3 -#endif /* __APPLE__ */ -L_sha3_aarch64_r: - .xword 0x0000000000000001 - .xword 0x0000000000008082 - .xword 0x800000000000808a - .xword 0x8000000080008000 - .xword 0x000000000000808b - .xword 0x0000000080000001 - .xword 0x8000000080008081 - .xword 0x8000000000008009 - .xword 0x000000000000008a - .xword 0x0000000000000088 - .xword 0x0000000080008009 - .xword 0x000000008000000a - .xword 0x000000008000808b - .xword 0x800000000000008b - .xword 0x8000000000008089 - .xword 0x8000000000008003 - .xword 0x8000000000008002 - .xword 0x8000000000000080 - .xword 0x000000000000800a - .xword 0x800000008000000a - .xword 0x8000000080008081 - .xword 0x8000000000008080 - .xword 0x0000000080000001 - .xword 0x8000000080008008 #ifdef WOLFSSL_WC_MLKEM #ifndef __APPLE__ .text @@ -7006,6 +6953,21 @@ _mlkem_basemul_mont_add: #ifndef __APPLE__ .size mlkem_basemul_mont_add,.-mlkem_basemul_mont_add #endif /* __APPLE__ */ +#ifndef __APPLE__ + .text + .type L_mlkem_aarch64_q, %object + .section .rodata + .size L_mlkem_aarch64_q, 16 +#else + .section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ + .align 2 +#else + .p2align 2 +#endif /* __APPLE__ */ +L_mlkem_aarch64_q: + .short 0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01 #ifndef __APPLE__ .text .globl mlkem_csubq_neon @@ -9724,6 +9686,44 @@ L_mlkem_rej_uniform_done: #ifndef __APPLE__ .size mlkem_rej_uniform_neon,.-mlkem_rej_uniform_neon #endif /* __APPLE__ */ +#ifndef __APPLE__ + .text + .type L_sha3_aarch64_r, %object + .section .rodata + .size L_sha3_aarch64_r, 192 +#else + .section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ +L_sha3_aarch64_r: + .xword 0x0000000000000001 + .xword 0x0000000000008082 + .xword 0x800000000000808a + .xword 0x8000000080008000 + .xword 0x000000000000808b + .xword 0x0000000080000001 + .xword 0x8000000080008081 + .xword 0x8000000000008009 + .xword 0x000000000000008a + .xword 0x0000000000000088 + .xword 0x0000000080008009 + .xword 0x000000008000000a + .xword 0x000000008000808b + .xword 0x800000000000008b + .xword 0x8000000000008089 + .xword 0x8000000000008003 + .xword 0x8000000000008002 + .xword 0x8000000000000080 + .xword 0x000000000000800a + .xword 0x800000008000000a + .xword 0x8000000080008081 + .xword 0x8000000000008080 + .xword 0x0000000080000001 + .xword 0x8000000080008008 #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 #ifndef __APPLE__ .text diff --git a/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c b/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c index 33a707c01..e0061e442 100644 --- a/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c @@ -30,29 +30,10 @@ #ifdef WOLFSSL_ARMASM #ifdef __aarch64__ #ifdef WOLFSSL_ARMASM_INLINE -static const word16 L_mlkem_aarch64_q[] = { - 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, -}; - static const word16 L_mlkem_aarch64_consts[] = { 0x0d01, 0xf301, 0x4ebf, 0x0549, 0x5049, 0x0000, 0x0000, 0x0000, }; -static const word64 L_sha3_aarch64_r[] = { - 0x0000000000000001, 0x0000000000008082, - 0x800000000000808a, 0x8000000080008000, - 0x000000000000808b, 0x0000000080000001, - 0x8000000080008081, 0x8000000000008009, - 0x000000000000008a, 0x0000000000000088, - 0x0000000080008009, 0x000000008000000a, - 0x000000008000808b, 0x800000000000008b, - 0x8000000000008089, 0x8000000000008003, - 0x8000000000008002, 0x8000000000000080, - 0x000000000000800a, 0x800000008000000a, - 0x8000000080008081, 0x8000000000008080, - 0x0000000080000001, 0x8000000080008008, -}; - #include #ifdef WOLFSSL_WC_MLKEM @@ -1405,11 +1386,9 @@ void mlkem_ntt(sword16* r) "stp q17, q18, [x1, #192]\n\t" "stp q19, q20, [x1, #224]\n\t" : [r] "+r" (r) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv) + : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]), + [L_mlkem_aarch64_zetas] "i" (L_mlkem_aarch64_zetas), + [L_mlkem_aarch64_zetas_qinv] "i" (L_mlkem_aarch64_zetas_qinv) : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", @@ -2922,13 +2901,9 @@ void mlkem_invntt(sword16* r) "str q23, [x1, #208]\n\t" "str q24, [x1, #240]\n\t" : [r] "+r" (r) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv) + : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]), + [L_mlkem_aarch64_zetas_inv] "i" (L_mlkem_aarch64_zetas_inv), + [L_mlkem_aarch64_zetas_inv_qinv] "i" (L_mlkem_aarch64_zetas_inv_qinv) : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", @@ -4096,13 +4071,7 @@ void mlkem_ntt_sqrdmlsh(sword16* r) "stp q17, q18, [x1, #192]\n\t" "stp q19, q20, [x1, #224]\n\t" : [r] "+r" (r) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv) + : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]) : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", @@ -5393,13 +5362,7 @@ void mlkem_invntt_sqrdmlsh(sword16* r) "str q23, [x1, #208]\n\t" "str q24, [x1, #240]\n\t" : [r] "+r" (r) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv) + : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]) : "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", @@ -6102,14 +6065,8 @@ void mlkem_basemul_mont(sword16* r, const sword16* a, const sword16* b) "zip2 v25.8h, v22.8h, v23.8h\n\t" "stp q24, q25, [%x[r], #480]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]), + [L_mlkem_aarch64_zetas_mul] "i" (L_mlkem_aarch64_zetas_mul) : "memory", "cc", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", @@ -6840,14 +6797,8 @@ void mlkem_basemul_mont_add(sword16* r, const sword16* a, const sword16* b) "add v29.8h, v29.8h, v25.8h\n\t" "stp q28, q29, [%x[r], #480]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]), + [L_mlkem_aarch64_zetas_mul] "i" (L_mlkem_aarch64_zetas_mul) : "memory", "cc", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", @@ -6855,6 +6806,10 @@ void mlkem_basemul_mont_add(sword16* r, const sword16* a, const sword16* b) ); } +static const word16 L_mlkem_aarch64_q[] = { + 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, +}; + void mlkem_csubq_neon(sword16* p) { __asm__ __volatile__ ( @@ -7013,14 +6968,7 @@ void mlkem_csubq_neon(sword16* p) "st4 {v8.8h, v9.8h, v10.8h, v11.8h}, [%x[p]], #0x40\n\t" "st4 {v12.8h, v13.8h, v14.8h, v15.8h}, [%x[p]], #0x40\n\t" : [p] "+r" (p) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : [L_mlkem_aarch64_q] "i" (L_mlkem_aarch64_q) : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20" @@ -7195,14 +7143,7 @@ void mlkem_add_reduce(sword16* r, const sword16* a) "st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t" "st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t" : [r] "+r" (r), [a] "+r" (a) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]) : "memory", "cc", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18" @@ -7417,14 +7358,7 @@ void mlkem_add3_reduce(sword16* r, const sword16* a, const sword16* b) "st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t" "st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]) : "memory", "cc", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26" @@ -7599,14 +7533,7 @@ void mlkem_rsub_reduce(sword16* r, const sword16* a) "st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t" "st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t" : [r] "+r" (r), [a] "+r" (a) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]) : "memory", "cc", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18" @@ -7803,14 +7730,7 @@ void mlkem_to_mont(sword16* p) "st4 {v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t" "st4 {v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t" : [p] "+r" (p) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]) : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18" @@ -7976,14 +7896,7 @@ void mlkem_to_mont_sqrdmlsh(sword16* p) "st4 {v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t" "st4 {v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t" : [p] "+r" (p) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul) + : [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]) : "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18" @@ -8231,17 +8144,9 @@ void mlkem_to_msg_neon(byte* msg, sword16* p) "ins v18.b[7], v25.b[0]\n\t" "st1 {v18.8b}, [%x[msg]], #8\n\t" : [msg] "+r" (msg), [p] "+r" (p) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), - [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), - [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), - [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits) + : [L_mlkem_to_msg_low] "i" (L_mlkem_to_msg_low), + [L_mlkem_to_msg_high] "i" (L_mlkem_to_msg_high), + [L_mlkem_to_msg_bits] "i" (L_mlkem_to_msg_bits) : "memory", "cc", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", @@ -8415,19 +8320,8 @@ void mlkem_from_msg_neon(sword16* p, const byte* msg) "and v7.16b, v7.16b, v1.16b\n\t" "st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t" : [p] "+r" (p), [msg] "+r" (msg) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), - [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), - [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), - [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), - [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), - [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits) + : [L_mlkem_from_msg_q1half] "i" (L_mlkem_from_msg_q1half), + [L_mlkem_from_msg_bits] "i" (L_mlkem_from_msg_bits) : "memory", "cc", "x2", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11" ); @@ -8693,19 +8587,7 @@ int mlkem_cmp_neon(const byte* a, const byte* b, int sz) "subs x0, x0, xzr\n\t" "csetm w0, ne\n\t" : [a] "+r" (a), [b] "+r" (b), [sz] "+r" (sz) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), - [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), - [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), - [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), - [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), - [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits) + : : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11" ); @@ -9410,22 +9292,10 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, "L_mlkem_rej_uniform_done_%=: \n\t" "mov x0, x12\n\t" : [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), - [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), - [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), - [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), - [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), - [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits), - [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask), - [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits), - [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices) + : [%[L_mlkem_aarch64_q]] "i" (%[L_mlkem_aarch64_q]), + [L_mlkem_rej_uniform_mask] "i" (L_mlkem_rej_uniform_mask), + [L_mlkem_rej_uniform_bits] "i" (L_mlkem_rej_uniform_bits), + [L_mlkem_rej_uniform_indices] "i" (L_mlkem_rej_uniform_indices) : "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13" @@ -9433,6 +9303,21 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, return (word32)(size_t)p; } +static const word64 L_sha3_aarch64_r[] = { + 0x0000000000000001, 0x0000000000008082, + 0x800000000000808a, 0x8000000080008000, + 0x000000000000808b, 0x0000000080000001, + 0x8000000080008081, 0x8000000000008009, + 0x000000000000008a, 0x0000000000000088, + 0x0000000080008009, 0x000000008000000a, + 0x000000008000808b, 0x800000000000008b, + 0x8000000000008089, 0x8000000000008003, + 0x8000000000008002, 0x8000000000000080, + 0x000000000000800a, 0x800000008000000a, + 0x8000000080008081, 0x8000000000008080, + 0x0000000080000001, 0x8000000080008008, +}; + #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 void mlkem_sha3_blocksx3_neon(word64* state) { @@ -9728,22 +9613,7 @@ void mlkem_sha3_blocksx3_neon(word64* state) "str x26, [%x[state], #192]\n\t" "ldp x29, x30, [sp], #0x40\n\t" : [state] "+r" (state) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), - [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), - [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), - [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), - [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), - [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits), - [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask), - [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits), - [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices) + : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r) : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", @@ -10070,22 +9940,7 @@ void mlkem_shake128_blocksx3_seed_neon(word64* state, byte* seed) "str x27, [%x[state], #192]\n\t" "ldp x29, x30, [sp], #0x40\n\t" : [state] "+r" (state), [seed] "+r" (seed) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), - [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), - [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), - [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), - [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), - [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits), - [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask), - [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits), - [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices) + : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r) : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", @@ -10412,22 +10267,7 @@ void mlkem_shake256_blocksx3_seed_neon(word64* state, byte* seed) "str x27, [%x[state], #192]\n\t" "ldp x29, x30, [sp], #0x40\n\t" : [state] "+r" (state), [seed] "+r" (seed) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), - [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), - [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), - [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), - [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), - [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits), - [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask), - [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits), - [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices) + : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r) : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", @@ -10818,22 +10658,7 @@ void mlkem_sha3_blocksx3_neon(word64* state) "str x26, [%x[state], #192]\n\t" "ldp x29, x30, [sp], #0x40\n\t" : [state] "+r" (state) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), - [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), - [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), - [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), - [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), - [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits), - [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask), - [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits), - [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices) + : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r) : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", @@ -11245,22 +11070,7 @@ void mlkem_shake128_blocksx3_seed_neon(word64* state, byte* seed) "str x27, [%x[state], #192]\n\t" "ldp x29, x30, [sp], #0x40\n\t" : [state] "+r" (state), [seed] "+r" (seed) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), - [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), - [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), - [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), - [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), - [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits), - [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask), - [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits), - [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices) + : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r) : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", @@ -11672,22 +11482,7 @@ void mlkem_shake256_blocksx3_seed_neon(word64* state, byte* seed) "str x27, [%x[state], #192]\n\t" "ldp x29, x30, [sp], #0x40\n\t" : [state] "+r" (state), [seed] "+r" (seed) - : [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q), - [L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts), - [L_sha3_aarch64_r] "S" (L_sha3_aarch64_r), - [L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas), - [L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv), - [L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv), - [L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv), - [L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul), - [L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low), - [L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high), - [L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits), - [L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half), - [L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits), - [L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask), - [L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits), - [L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices) + : [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r) : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", diff --git a/wolfcrypt/src/port/arm/armv8-poly1305.c b/wolfcrypt/src/port/arm/armv8-poly1305.c index f3c447dcd..1977f4d58 100644 --- a/wolfcrypt/src/port/arm/armv8-poly1305.c +++ b/wolfcrypt/src/port/arm/armv8-poly1305.c @@ -49,7 +49,7 @@ static WC_INLINE void poly1305_blocks_aarch64_16(Poly1305* ctx, __asm__ __volatile__ ( /* Check for zero bytes to do. */ "CMP %[bytes], #16 \n\t" - "BLO L_poly1305_aarch64_16_done_%= \n\t" + "B.LO L_poly1305_aarch64_16_done_%= \n\t" "MOV x12, #1 \n\t" /* Load h */ @@ -129,7 +129,7 @@ static WC_INLINE void poly1305_blocks_aarch64_16(Poly1305* ctx, "SUBS %[bytes], %[bytes], #16\n\t" "ADD %[m], %[m], #16\n\t" - "BGT L_poly1305_aarch64_16_loop_%=\n\t" + "B.GT L_poly1305_aarch64_16_loop_%=\n\t" /* Base 64 -> Base 26 */ "MOV x10, #0x3ffffff\n\t" @@ -146,8 +146,7 @@ static WC_INLINE void poly1305_blocks_aarch64_16(Poly1305* ctx, ".align 2 \n\t" "L_poly1305_aarch64_16_done_%=: \n\t" : [bytes] "+r" (bytes), [m] "+r" (m) - : [POLY1305_BLOCK_SIZE] "I" (POLY1305_BLOCK_SIZE), - [ctx_r64] "m" (ctx->r64[0]), [ctx_h] "r" (ctx->h), + : [ctx_r64] "m" (ctx->r64[0]), [ctx_h] "r" (ctx->h), [finished] "r" ((word64)ctx->finished) : "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", @@ -161,7 +160,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m, __asm__ __volatile__ ( /* If less than 4 blocks to process then use regular method */ "CMP %[bytes], #64 \n\t" - "BLO L_poly1305_aarch64_64_done_%= \n\t" + "B.LO L_poly1305_aarch64_64_done_%= \n\t" "MOV x9, #0x3ffffff \n\t" /* Load h */ "LDP x20, x22, [%[h]] \n\t" @@ -189,7 +188,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m, "MOV v26.D[1], x9 \n\t" "DUP v30.4S, v26.S[0] \n\t" "CMP %[bytes], #96 \n\t" - "BLO L_poly1305_aarch64_64_start_block_size_64_%= \n\t" + "B.LO L_poly1305_aarch64_64_start_block_size_64_%= \n\t" /* Load r^2 to NEON v0, v1, v2, v3, v4 */ "LD4 { v0.S-v3.S }[2], [%[r_2]], #16 \n\t" "LD1 { v4.S }[2], [%[r_2]] \n\t" @@ -363,7 +362,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m, "UMLAL2 v25.2D, v14.4S, v0.4S \n\t" /* If less than six message blocks left then leave loop */ "CMP %[bytes], #96 \n\t" - "BLS L_poly1305_aarch64_64_loop_128_final_%= \n\t" + "B.LS L_poly1305_aarch64_64_loop_128_final_%= \n\t" /* Load m */ /* Load four message blocks to NEON v10, v11, v12, v13, v14 */ "LD4 { v10.4S-v13.4S }, [%[m]], #64 \n\t" @@ -493,7 +492,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m, "MOV v19.S[1], v19.S[2] \n\t" /* If less than 2 blocks left go straight to final multiplication. */ "CMP %[bytes], #32 \n\t" - "BLO L_poly1305_aarch64_64_last_mult_%= \n\t" + "B.LO L_poly1305_aarch64_64_last_mult_%= \n\t" /* Else go to one loop of L_poly1305_aarch64_64_loop_64 */ "B L_poly1305_aarch64_64_loop_64_%= \n\t" "\n" @@ -677,7 +676,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m, "MOV v19.S[1], v19.S[2] \n\t" /* If at least two message blocks left then loop_64 */ "CMP %[bytes], #32 \n\t" - "BHS L_poly1305_aarch64_64_loop_64_%= \n\t" + "B.HS L_poly1305_aarch64_64_loop_64_%= \n\t" "\n" ".align 2 \n\t" "L_poly1305_aarch64_64_last_mult_%=: \n\t" @@ -821,8 +820,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m, : [bytes] "+r" (bytes), [m] "+r" (m), [ctx] "+m" (ctx) - : [POLY1305_BLOCK_SIZE] "I" (POLY1305_BLOCK_SIZE), - [h] "r" (ctx->h), + : [h] "r" (ctx->h), [r] "r" (ctx->r), [r_2] "r" (ctx->r_2), [r_4] "r" (ctx->r_4), diff --git a/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c b/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c index 8603b6e57..ab0fd89c7 100644 --- a/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c @@ -162,7 +162,7 @@ void BlockSha3_crypto(word64* state) "st4 {v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t" "st1 {v24.1d}, [%x[state]]\n\t" : [state] "+r" (state) - : [L_SHA3_transform_crypto_r] "S" (L_SHA3_transform_crypto_r) + : [L_SHA3_transform_crypto_r] "i" (L_SHA3_transform_crypto_r) : "memory", "cc", "x1", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", @@ -369,7 +369,7 @@ void BlockSha3_base(word64* state) "str x26, [%x[state], #192]\n\t" "ldp x29, x30, [sp], #0x40\n\t" : [state] "+r" (state) - : [L_SHA3_transform_base_r] "S" (L_SHA3_transform_base_r) + : [L_SHA3_transform_base_r] "i" (L_SHA3_transform_base_r) : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" diff --git a/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c index 6a5cebb3c..1f5559dd2 100644 --- a/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c @@ -1004,8 +1004,8 @@ void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len) "stp x8, x9, [%x[sha512], #32]\n\t" "stp x10, x11, [%x[sha512], #48]\n\t" : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len) - : [L_SHA512_transform_neon_len_k] "S" (L_SHA512_transform_neon_len_k), - [L_SHA512_transform_neon_len_r8] "S" (L_SHA512_transform_neon_len_r8) + : [L_SHA512_transform_neon_len_k] "i" (L_SHA512_transform_neon_len_k), + [L_SHA512_transform_neon_len_r8] "i" (L_SHA512_transform_neon_len_r8) : "memory", "cc", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2", @@ -1580,7 +1580,7 @@ void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data, /* Store digest back */ "st1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[sha512]]\n\t" : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len) - : [L_SHA512_trans_crypto_len_k] "S" (L_SHA512_trans_crypto_len_k) + : [L_SHA512_trans_crypto_len_k] "i" (L_SHA512_trans_crypto_len_k) : "memory", "cc", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", diff --git a/wolfcrypt/src/port/arm/armv8-sha512.c b/wolfcrypt/src/port/arm/armv8-sha512.c index dfc679df0..6c1f18515 100644 --- a/wolfcrypt/src/port/arm/armv8-sha512.c +++ b/wolfcrypt/src/port/arm/armv8-sha512.c @@ -647,9 +647,7 @@ static int Sha512_Family_Final(wc_Sha512* sha512, byte* hash, { int ret; int digestSz; - int (*initfp)(wc_Sha512*); - - (void)initfp; + int (*initfp)(wc_Sha512*) = NULL; if (sha512 == NULL || hash == NULL) { return BAD_FUNC_ARG;