mirror of https://github.com/wolfSSL/wolfssl.git
Green Hills compiler fixes
internal.c: Move non-enumeration value out of switch. ssl.c: Only declare globalRNGMutex when required. x509.c: initialize ret armv8-aes.c, armv8-chacha.c: fix branch instructions armv8-mlkem*: ensure only required constants are input operands and move constants closer to first use. armv8-poly1305.c: remove POLY1305_BLOCK_SIZE from input operands. armv8-sha3-asm_c.c, armv8-sha512-asm_c.c: use constraint ':' instead of 'S'. armv8-sha512.c: initialize initfp. Is always used.pull/8774/head
parent
f8bb889712
commit
fc1d281268
|
@ -26488,7 +26488,7 @@ const char* wolfSSL_ERR_reason_error_string(unsigned long e)
|
|||
return "peer ip address mismatch";
|
||||
|
||||
case WANT_READ :
|
||||
case -WOLFSSL_ERROR_WANT_READ :
|
||||
case WOLFSSL_ERROR_WANT_READ_E :
|
||||
return "non-blocking socket wants data to be read";
|
||||
|
||||
case NOT_READY_ERROR :
|
||||
|
@ -26498,17 +26498,17 @@ const char* wolfSSL_ERR_reason_error_string(unsigned long e)
|
|||
return "record layer version error";
|
||||
|
||||
case WANT_WRITE :
|
||||
case -WOLFSSL_ERROR_WANT_WRITE :
|
||||
case WOLFSSL_ERROR_WANT_WRITE_E :
|
||||
return "non-blocking socket write buffer full";
|
||||
|
||||
case -WOLFSSL_ERROR_WANT_CONNECT:
|
||||
case -WOLFSSL_ERROR_WANT_ACCEPT:
|
||||
case WOLFSSL_ERROR_WANT_CONNECT_E :
|
||||
case WOLFSSL_ERROR_WANT_ACCEPT_E :
|
||||
return "The underlying BIO was not yet connected";
|
||||
|
||||
case -WOLFSSL_ERROR_SYSCALL:
|
||||
case WOLFSSL_ERROR_SYSCALL_E :
|
||||
return "fatal I/O error in TLS layer";
|
||||
|
||||
case -WOLFSSL_ERROR_WANT_X509_LOOKUP:
|
||||
case WOLFSSL_ERROR_WANT_X509_LOOKUP_E :
|
||||
return "application client cert callback asked to be called again";
|
||||
|
||||
case BUFFER_ERROR :
|
||||
|
@ -26548,7 +26548,7 @@ const char* wolfSSL_ERR_reason_error_string(unsigned long e)
|
|||
return "can't decode peer key";
|
||||
|
||||
case ZERO_RETURN:
|
||||
case -WOLFSSL_ERROR_ZERO_RETURN:
|
||||
case WOLFSSL_ERROR_ZERO_RETURN_E :
|
||||
return "peer sent close notify alert";
|
||||
|
||||
case ECC_CURVETYPE_ERROR:
|
||||
|
|
|
@ -234,8 +234,10 @@ static struct SystemCryptoPolicy crypto_policy;
|
|||
static WC_RNG globalRNG;
|
||||
static volatile int initGlobalRNG = 0;
|
||||
|
||||
#if defined(OPENSSL_EXTRA) || !defined(WOLFSSL_MUTEX_INITIALIZER)
|
||||
static WC_MAYBE_UNUSED wolfSSL_Mutex globalRNGMutex
|
||||
WOLFSSL_MUTEX_INITIALIZER_CLAUSE(globalRNGMutex);
|
||||
#endif
|
||||
#ifndef WOLFSSL_MUTEX_INITIALIZER
|
||||
static int globalRNGMutex_valid = 0;
|
||||
#endif
|
||||
|
|
|
@ -5507,7 +5507,7 @@ int wolfSSL_X509_NAME_get_text_by_NID(WOLFSSL_X509_NAME* name,
|
|||
WOLFSSL_EVP_PKEY* wolfSSL_X509_get_pubkey(WOLFSSL_X509* x509)
|
||||
{
|
||||
WOLFSSL_EVP_PKEY* key = NULL;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
(void)ret;
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -556,7 +556,7 @@ static WC_INLINE void wc_Chacha_encrypt_320(const word32* input, const byte* m,
|
|||
"SRI v6.4s, v17.4s, #25 \n\t"
|
||||
"SRI v7.4s, v18.4s, #25 \n\t"
|
||||
"SRI v4.4s, v19.4s, #25 \n\t"
|
||||
"BNE L_chacha20_arm64_inner_%= \n\t"
|
||||
"B.NE L_chacha20_arm64_inner_%= \n\t"
|
||||
/* Add counter now rather than after transposed */
|
||||
"ADD v12.4s, v12.4s, v28.4s \n\t"
|
||||
"ADD w16, w16, w21 \n\t"
|
||||
|
@ -666,7 +666,7 @@ static WC_INLINE void wc_Chacha_encrypt_320(const word32* input, const byte* m,
|
|||
"ST1 {v16.4s-v19.4s}, [%[c]], #64 \n\t"
|
||||
"SUBS %[bytes], %[bytes], #320 \n\t"
|
||||
"ADD v28.4s, v28.4s, v29.4s \n\t"
|
||||
"BNE L_chacha20_arm64_outer_%= \n\t"
|
||||
"B.NE L_chacha20_arm64_outer_%= \n\t"
|
||||
: [input] "+r" (input), [m] "+r" (m), [c] "+r" (c),
|
||||
[bytes] "+r" (bytes64)
|
||||
: [L_chacha20_neon_add_all_cntrs] "r" (L_chacha20_neon_add_all_cntrs),
|
||||
|
@ -959,7 +959,7 @@ static WC_INLINE int wc_Chacha_encrypt_256(
|
|||
"EXT v9.16B, v9.16B, v9.16B, #12 \n\t"
|
||||
"EXT v10.16B, v10.16B, v10.16B, #8 \n\t"
|
||||
"EXT v11.16B, v11.16B, v11.16B, #4 \n\t"
|
||||
"BNE L_chacha20_arm64_256_loop_%= \n\t"
|
||||
"B.NE L_chacha20_arm64_256_loop_%= \n\t"
|
||||
/* Load message */
|
||||
"LD1 {v16.4S-v19.4S}, [%[m]], #64 \n\t"
|
||||
/* Add one (2 added during calculating vector results) */
|
||||
|
@ -1364,7 +1364,7 @@ static WC_INLINE int wc_Chacha_encrypt_256(
|
|||
"ROR r4, r4, #25 \n\t" // 4 4
|
||||
"VEXT.8 q11, q11, q11, #4 \n\t" // permute elements left by one
|
||||
|
||||
"BNE L_chacha20_arm32_256_loop_%= \n\t"
|
||||
"B.NE L_chacha20_arm32_256_loop_%= \n\t"
|
||||
|
||||
// r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12
|
||||
// 0 1 2 3 4 5 6 7 8 9 12 13 14
|
||||
|
@ -1583,7 +1583,7 @@ static WC_INLINE int wc_Chacha_encrypt_128(
|
|||
"EXT v5.16B, v5.16B, v5.16B, #12 \n\t"
|
||||
"EXT v6.16B, v6.16B, v6.16B, #8 \n\t"
|
||||
"EXT v7.16B, v7.16B, v7.16B, #4 \n\t"
|
||||
"BNE L_chacha20_arm64_128_loop_%= \n\t"
|
||||
"B.NE L_chacha20_arm64_128_loop_%= \n\t"
|
||||
/* Add back state, XOR in message and store (load next block) */
|
||||
"ADD v0.4S, v0.4S, v18.4S \n\t"
|
||||
"ADD v1.4S, v1.4S, v19.4S \n\t"
|
||||
|
@ -1736,7 +1736,7 @@ static WC_INLINE int wc_Chacha_encrypt_128(
|
|||
"VEXT.8 q6, q6, q6, #8 \n\t" // permute elements left by two
|
||||
"VEXT.8 q7, q7, q7, #4 \n\t" // permute elements left by one
|
||||
|
||||
"BNE L_chacha20_arm32_128_loop_%= \n\t"
|
||||
"B.NE L_chacha20_arm32_128_loop_%= \n\t"
|
||||
|
||||
"VMOV.I32 q8, #0 \n\t"
|
||||
"VADD.I32 q0, q0, q10 \n\t"
|
||||
|
@ -2251,7 +2251,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
|
|||
"ADD v2.4S, v2.4S, v10.4S \n\t"
|
||||
"ADD v3.4S, v3.4S, v11.4S \n\t"
|
||||
"CMP %[bytes], #64 \n\t"
|
||||
"BLT L_chacha20_arm64_64_lt_64_%= \n\t"
|
||||
"B.LT L_chacha20_arm64_64_lt_64_%= \n\t"
|
||||
"LD1 {v4.4S-v7.4S}, [%[m]], #64 \n\t"
|
||||
"EOR v4.16B, v4.16B, v0.16B \n\t"
|
||||
"EOR v5.16B, v5.16B, v1.16B \n\t"
|
||||
|
@ -2260,13 +2260,13 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
|
|||
"ST1 {v4.4S-v7.4S}, [%[c]], #64 \n\t"
|
||||
"SUBS %[bytes], %[bytes], #64 \n\t"
|
||||
"ADD v11.4S, v11.4S, v14.4S \n\t"
|
||||
"BNE L_chacha20_arm64_64_loop_%= \n\t"
|
||||
"B.NE L_chacha20_arm64_64_loop_%= \n\t"
|
||||
"B L_chacha20_arm64_64_done_%= \n\t"
|
||||
"\n"
|
||||
"L_chacha20_arm64_64_lt_64_%=: \n\t"
|
||||
"ST1 {v0.4s-v3.4s}, [%[over]]\n\t"
|
||||
"CMP %[bytes], #32 \n\t"
|
||||
"BLT L_chacha20_arm64_64_lt_32_%= \n\t"
|
||||
"B.LT L_chacha20_arm64_64_lt_32_%= \n\t"
|
||||
"LD1 {v4.4S, v5.4S}, [%[m]], #32 \n\t"
|
||||
"EOR v4.16B, v4.16B, v0.16B \n\t"
|
||||
"EOR v5.16B, v5.16B, v1.16B \n\t"
|
||||
|
@ -2274,27 +2274,27 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
|
|||
"SUBS %[bytes], %[bytes], #32 \n\t"
|
||||
"MOV v0.16B, v2.16B \n\t"
|
||||
"MOV v1.16B, v3.16B \n\t"
|
||||
"BEQ L_chacha20_arm64_64_done_%= \n\t"
|
||||
"B.EQ L_chacha20_arm64_64_done_%= \n\t"
|
||||
"\n"
|
||||
"L_chacha20_arm64_64_lt_32_%=: \n\t"
|
||||
"CMP %[bytes], #16 \n\t"
|
||||
"BLT L_chacha20_arm64_64_lt_16_%= \n\t"
|
||||
"B.LT L_chacha20_arm64_64_lt_16_%= \n\t"
|
||||
"LD1 {v4.4S}, [%[m]], #16 \n\t"
|
||||
"EOR v4.16B, v4.16B, v0.16B \n\t"
|
||||
"ST1 {v4.4S}, [%[c]], #16 \n\t"
|
||||
"SUBS %[bytes], %[bytes], #16 \n\t"
|
||||
"MOV v0.16B, v1.16B \n\t"
|
||||
"BEQ L_chacha20_arm64_64_done_%= \n\t"
|
||||
"B.EQ L_chacha20_arm64_64_done_%= \n\t"
|
||||
"\n"
|
||||
"L_chacha20_arm64_64_lt_16_%=: \n\t"
|
||||
"CMP %[bytes], #8 \n\t"
|
||||
"BLT L_chacha20_arm64_64_lt_8_%= \n\t"
|
||||
"B.LT L_chacha20_arm64_64_lt_8_%= \n\t"
|
||||
"LD1 {v4.2S}, [%[m]], #8 \n\t"
|
||||
"EOR v4.8B, v4.8B, v0.8B \n\t"
|
||||
"ST1 {v4.2S}, [%[c]], #8 \n\t"
|
||||
"SUBS %[bytes], %[bytes], #8 \n\t"
|
||||
"MOV v0.D[0], v0.D[1] \n\t"
|
||||
"BEQ L_chacha20_arm64_64_done_%= \n\t"
|
||||
"B.EQ L_chacha20_arm64_64_done_%= \n\t"
|
||||
"\n"
|
||||
"L_chacha20_arm64_64_lt_8_%=: \n\t"
|
||||
"MOV x4, v0.D[0] \n\t"
|
||||
|
@ -2305,7 +2305,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
|
|||
"STRB w6, [%[c]], #1 \n\t"
|
||||
"SUBS %[bytes], %[bytes], #1 \n\t"
|
||||
"LSR x4, x4, #8 \n\t"
|
||||
"BGT L_chacha20_arm64_64_loop_lt_8_%= \n\t"
|
||||
"B.GT L_chacha20_arm64_64_loop_lt_8_%= \n\t"
|
||||
"\n"
|
||||
"L_chacha20_arm64_64_done_%=: \n\t"
|
||||
: [input] "+r" (input), [m] "+r" (m), [c] "+r" (c),
|
||||
|
@ -2816,7 +2816,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
|
|||
"VADD.I32 q2, q2, q10 \n\t"
|
||||
"VADD.I32 q3, q3, q11 \n\t"
|
||||
"CMP %[bytes], #64 \n\t"
|
||||
"BLT L_chacha20_arm32_64_lt_64_%= \n\t"
|
||||
"B.LT L_chacha20_arm32_64_lt_64_%= \n\t"
|
||||
/* XOR full 64 byte block */
|
||||
"VLD1.8 { q4, q5 }, [%[m]]! \n\t"
|
||||
"VLD1.8 { q6, q7 }, [%[m]]! \n\t"
|
||||
|
@ -2828,14 +2828,14 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
|
|||
"VST1.8 { q2, q3 }, [%[c]]! \n\t"
|
||||
"SUBS %[bytes], %[bytes], #64 \n\t"
|
||||
"VADD.I32 q11, q11, q14 \n\t"
|
||||
"BNE L_chacha20_arm32_64_outer_loop_%= \n\t"
|
||||
"B.NE L_chacha20_arm32_64_outer_loop_%= \n\t"
|
||||
"B L_chacha20_arm32_64_done_%= \n\t"
|
||||
"\n"
|
||||
"L_chacha20_arm32_64_lt_64_%=: \n\t"
|
||||
"VSTM %[over], {q0-q3} \n\t"
|
||||
/* XOR 32 bytes */
|
||||
"CMP %[bytes], #32 \n\t"
|
||||
"BLT L_chacha20_arm32_64_lt_32_%= \n\t"
|
||||
"B.LT L_chacha20_arm32_64_lt_32_%= \n\t"
|
||||
"VLD1.8 { q4, q5 }, [%[m]]! \n\t"
|
||||
"VEOR q4, q4, q0 \n\t"
|
||||
"VEOR q5, q5, q1 \n\t"
|
||||
|
@ -2843,41 +2843,41 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
|
|||
"SUBS %[bytes], %[bytes], #32 \n\t"
|
||||
"VMOV q0, q2 \n\t"
|
||||
"VMOV q1, q3 \n\t"
|
||||
"BEQ L_chacha20_arm32_64_done_%= \n\t"
|
||||
"B.EQ L_chacha20_arm32_64_done_%= \n\t"
|
||||
"\n"
|
||||
"L_chacha20_arm32_64_lt_32_%=: \n\t"
|
||||
/* XOR 16 bytes */
|
||||
"CMP %[bytes], #16 \n\t"
|
||||
"BLT L_chacha20_arm32_64_lt_16_%= \n\t"
|
||||
"B.LT L_chacha20_arm32_64_lt_16_%= \n\t"
|
||||
"VLD1.8 { q4 }, [%[m]]! \n\t"
|
||||
"VEOR q4, q4, q0 \n\t"
|
||||
"VST1.8 { q4 }, [%[c]]! \n\t"
|
||||
"SUBS %[bytes], %[bytes], #16 \n\t"
|
||||
"VMOV q0, q1 \n\t"
|
||||
"BEQ L_chacha20_arm32_64_done_%= \n\t"
|
||||
"B.EQ L_chacha20_arm32_64_done_%= \n\t"
|
||||
"\n"
|
||||
"L_chacha20_arm32_64_lt_16_%=: \n\t"
|
||||
/* XOR 8 bytes */
|
||||
"CMP %[bytes], #8 \n\t"
|
||||
"BLT L_chacha20_arm32_64_lt_8_%= \n\t"
|
||||
"B.LT L_chacha20_arm32_64_lt_8_%= \n\t"
|
||||
"VLD1.8 { d8 }, [%[m]]! \n\t"
|
||||
"VEOR d8, d8, d0 \n\t"
|
||||
"VST1.8 { d8 }, [%[c]]! \n\t"
|
||||
"SUBS %[bytes], %[bytes], #8 \n\t"
|
||||
"VMOV d0, d1 \n\t"
|
||||
"BEQ L_chacha20_arm32_64_done_%= \n\t"
|
||||
"B.EQ L_chacha20_arm32_64_done_%= \n\t"
|
||||
"\n"
|
||||
"L_chacha20_arm32_64_lt_8_%=: \n\t"
|
||||
/* XOR 4 bytes */
|
||||
"CMP %[bytes], #4 \n\t"
|
||||
"BLT L_chacha20_arm32_64_lt_4_%= \n\t"
|
||||
"B.LT L_chacha20_arm32_64_lt_4_%= \n\t"
|
||||
"LDR r12, [%[m]], #4 \n\t"
|
||||
"VMOV r14, d0[0] \n\t"
|
||||
"EOR r12, r12, r14 \n\t"
|
||||
"STR r12, [%[c]], #4 \n\t"
|
||||
"SUBS %[bytes], %[bytes], #4 \n\t"
|
||||
"VSHR.U64 d0, d0, #32 \n\t"
|
||||
"BEQ L_chacha20_arm32_64_done_%= \n\t"
|
||||
"B.EQ L_chacha20_arm32_64_done_%= \n\t"
|
||||
"\n"
|
||||
"L_chacha20_arm32_64_lt_4_%=: \n\t"
|
||||
/* XOR remaining bytes */
|
||||
|
@ -2889,7 +2889,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
|
|||
"STRB r12, [%[c]], #1 \n\t"
|
||||
"SUBS %[bytes], %[bytes], #1 \n\t"
|
||||
"LSR r14, r14, #8 \n\t"
|
||||
"BGT L_chacha20_arm32_64_lt_4_loop_%= \n\t"
|
||||
"B.GT L_chacha20_arm32_64_lt_4_loop_%= \n\t"
|
||||
"\n"
|
||||
"L_chacha20_arm32_64_done_%=: \n\t"
|
||||
: [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), [bytes] "+r" (bytes)
|
||||
|
|
|
@ -29,21 +29,6 @@
|
|||
#ifdef WOLFSSL_ARMASM
|
||||
#ifdef __aarch64__
|
||||
#ifndef WOLFSSL_ARMASM_INLINE
|
||||
#ifndef __APPLE__
|
||||
.text
|
||||
.type L_mlkem_aarch64_q, %object
|
||||
.section .rodata
|
||||
.size L_mlkem_aarch64_q, 16
|
||||
#else
|
||||
.section __DATA,__data
|
||||
#endif /* __APPLE__ */
|
||||
#ifndef __APPLE__
|
||||
.align 2
|
||||
#else
|
||||
.p2align 2
|
||||
#endif /* __APPLE__ */
|
||||
L_mlkem_aarch64_q:
|
||||
.short 0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01
|
||||
#ifndef __APPLE__
|
||||
.text
|
||||
.type L_mlkem_aarch64_consts, %object
|
||||
|
@ -59,44 +44,6 @@ L_mlkem_aarch64_q:
|
|||
#endif /* __APPLE__ */
|
||||
L_mlkem_aarch64_consts:
|
||||
.short 0x0d01,0xf301,0x4ebf,0x0549,0x5049,0x0000,0x0000,0x0000
|
||||
#ifndef __APPLE__
|
||||
.text
|
||||
.type L_sha3_aarch64_r, %object
|
||||
.section .rodata
|
||||
.size L_sha3_aarch64_r, 192
|
||||
#else
|
||||
.section __DATA,__data
|
||||
#endif /* __APPLE__ */
|
||||
#ifndef __APPLE__
|
||||
.align 3
|
||||
#else
|
||||
.p2align 3
|
||||
#endif /* __APPLE__ */
|
||||
L_sha3_aarch64_r:
|
||||
.xword 0x0000000000000001
|
||||
.xword 0x0000000000008082
|
||||
.xword 0x800000000000808a
|
||||
.xword 0x8000000080008000
|
||||
.xword 0x000000000000808b
|
||||
.xword 0x0000000080000001
|
||||
.xword 0x8000000080008081
|
||||
.xword 0x8000000000008009
|
||||
.xword 0x000000000000008a
|
||||
.xword 0x0000000000000088
|
||||
.xword 0x0000000080008009
|
||||
.xword 0x000000008000000a
|
||||
.xword 0x000000008000808b
|
||||
.xword 0x800000000000008b
|
||||
.xword 0x8000000000008089
|
||||
.xword 0x8000000000008003
|
||||
.xword 0x8000000000008002
|
||||
.xword 0x8000000000000080
|
||||
.xword 0x000000000000800a
|
||||
.xword 0x800000008000000a
|
||||
.xword 0x8000000080008081
|
||||
.xword 0x8000000000008080
|
||||
.xword 0x0000000080000001
|
||||
.xword 0x8000000080008008
|
||||
#ifdef WOLFSSL_WC_MLKEM
|
||||
#ifndef __APPLE__
|
||||
.text
|
||||
|
@ -7006,6 +6953,21 @@ _mlkem_basemul_mont_add:
|
|||
#ifndef __APPLE__
|
||||
.size mlkem_basemul_mont_add,.-mlkem_basemul_mont_add
|
||||
#endif /* __APPLE__ */
|
||||
#ifndef __APPLE__
|
||||
.text
|
||||
.type L_mlkem_aarch64_q, %object
|
||||
.section .rodata
|
||||
.size L_mlkem_aarch64_q, 16
|
||||
#else
|
||||
.section __DATA,__data
|
||||
#endif /* __APPLE__ */
|
||||
#ifndef __APPLE__
|
||||
.align 2
|
||||
#else
|
||||
.p2align 2
|
||||
#endif /* __APPLE__ */
|
||||
L_mlkem_aarch64_q:
|
||||
.short 0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01
|
||||
#ifndef __APPLE__
|
||||
.text
|
||||
.globl mlkem_csubq_neon
|
||||
|
@ -9724,6 +9686,44 @@ L_mlkem_rej_uniform_done:
|
|||
#ifndef __APPLE__
|
||||
.size mlkem_rej_uniform_neon,.-mlkem_rej_uniform_neon
|
||||
#endif /* __APPLE__ */
|
||||
#ifndef __APPLE__
|
||||
.text
|
||||
.type L_sha3_aarch64_r, %object
|
||||
.section .rodata
|
||||
.size L_sha3_aarch64_r, 192
|
||||
#else
|
||||
.section __DATA,__data
|
||||
#endif /* __APPLE__ */
|
||||
#ifndef __APPLE__
|
||||
.align 3
|
||||
#else
|
||||
.p2align 3
|
||||
#endif /* __APPLE__ */
|
||||
L_sha3_aarch64_r:
|
||||
.xword 0x0000000000000001
|
||||
.xword 0x0000000000008082
|
||||
.xword 0x800000000000808a
|
||||
.xword 0x8000000080008000
|
||||
.xword 0x000000000000808b
|
||||
.xword 0x0000000080000001
|
||||
.xword 0x8000000080008081
|
||||
.xword 0x8000000000008009
|
||||
.xword 0x000000000000008a
|
||||
.xword 0x0000000000000088
|
||||
.xword 0x0000000080008009
|
||||
.xword 0x000000008000000a
|
||||
.xword 0x000000008000808b
|
||||
.xword 0x800000000000008b
|
||||
.xword 0x8000000000008089
|
||||
.xword 0x8000000000008003
|
||||
.xword 0x8000000000008002
|
||||
.xword 0x8000000000000080
|
||||
.xword 0x000000000000800a
|
||||
.xword 0x800000008000000a
|
||||
.xword 0x8000000080008081
|
||||
.xword 0x8000000000008080
|
||||
.xword 0x0000000080000001
|
||||
.xword 0x8000000080008008
|
||||
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
|
||||
#ifndef __APPLE__
|
||||
.text
|
||||
|
|
|
@ -30,29 +30,10 @@
|
|||
#ifdef WOLFSSL_ARMASM
|
||||
#ifdef __aarch64__
|
||||
#ifdef WOLFSSL_ARMASM_INLINE
|
||||
static const word16 L_mlkem_aarch64_q[] = {
|
||||
0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01,
|
||||
};
|
||||
|
||||
static const word16 L_mlkem_aarch64_consts[] = {
|
||||
0x0d01, 0xf301, 0x4ebf, 0x0549, 0x5049, 0x0000, 0x0000, 0x0000,
|
||||
};
|
||||
|
||||
static const word64 L_sha3_aarch64_r[] = {
|
||||
0x0000000000000001, 0x0000000000008082,
|
||||
0x800000000000808a, 0x8000000080008000,
|
||||
0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009,
|
||||
0x000000000000008a, 0x0000000000000088,
|
||||
0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b,
|
||||
0x8000000000008089, 0x8000000000008003,
|
||||
0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a,
|
||||
0x8000000080008081, 0x8000000000008080,
|
||||
0x0000000080000001, 0x8000000080008008,
|
||||
};
|
||||
|
||||
#include <wolfssl/wolfcrypt/wc_mlkem.h>
|
||||
|
||||
#ifdef WOLFSSL_WC_MLKEM
|
||||
|
@ -1405,11 +1386,9 @@ void mlkem_ntt(sword16* r)
|
|||
"stp q17, q18, [x1, #192]\n\t"
|
||||
"stp q19, q20, [x1, #224]\n\t"
|
||||
: [r] "+r" (r)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv)
|
||||
: [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]),
|
||||
[L_mlkem_aarch64_zetas] "i" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "i" (L_mlkem_aarch64_zetas_qinv)
|
||||
: "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4",
|
||||
"v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
|
||||
"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
|
||||
|
@ -2922,13 +2901,9 @@ void mlkem_invntt(sword16* r)
|
|||
"str q23, [x1, #208]\n\t"
|
||||
"str q24, [x1, #240]\n\t"
|
||||
: [r] "+r" (r)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv)
|
||||
: [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]),
|
||||
[L_mlkem_aarch64_zetas_inv] "i" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "i" (L_mlkem_aarch64_zetas_inv_qinv)
|
||||
: "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4",
|
||||
"v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
|
||||
"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
|
||||
|
@ -4096,13 +4071,7 @@ void mlkem_ntt_sqrdmlsh(sword16* r)
|
|||
"stp q17, q18, [x1, #192]\n\t"
|
||||
"stp q19, q20, [x1, #224]\n\t"
|
||||
: [r] "+r" (r)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv)
|
||||
: [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
|
||||
: "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4",
|
||||
"v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
|
||||
"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
|
||||
|
@ -5393,13 +5362,7 @@ void mlkem_invntt_sqrdmlsh(sword16* r)
|
|||
"str q23, [x1, #208]\n\t"
|
||||
"str q24, [x1, #240]\n\t"
|
||||
: [r] "+r" (r)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv)
|
||||
: [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
|
||||
: "memory", "cc", "x1", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4",
|
||||
"v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
|
||||
"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
|
||||
|
@ -6102,14 +6065,8 @@ void mlkem_basemul_mont(sword16* r, const sword16* a, const sword16* b)
|
|||
"zip2 v25.8h, v22.8h, v23.8h\n\t"
|
||||
"stp q24, q25, [%x[r], #480]\n\t"
|
||||
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
|
||||
: [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]),
|
||||
[L_mlkem_aarch64_zetas_mul] "i" (L_mlkem_aarch64_zetas_mul)
|
||||
: "memory", "cc", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
|
||||
"v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16",
|
||||
"v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
||||
|
@ -6840,14 +6797,8 @@ void mlkem_basemul_mont_add(sword16* r, const sword16* a, const sword16* b)
|
|||
"add v29.8h, v29.8h, v25.8h\n\t"
|
||||
"stp q28, q29, [%x[r], #480]\n\t"
|
||||
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
|
||||
: [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts]),
|
||||
[L_mlkem_aarch64_zetas_mul] "i" (L_mlkem_aarch64_zetas_mul)
|
||||
: "memory", "cc", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
|
||||
"v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16",
|
||||
"v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
||||
|
@ -6855,6 +6806,10 @@ void mlkem_basemul_mont_add(sword16* r, const sword16* a, const sword16* b)
|
|||
);
|
||||
}
|
||||
|
||||
static const word16 L_mlkem_aarch64_q[] = {
|
||||
0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01,
|
||||
};
|
||||
|
||||
void mlkem_csubq_neon(sword16* p)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
|
@ -7013,14 +6968,7 @@ void mlkem_csubq_neon(sword16* p)
|
|||
"st4 {v8.8h, v9.8h, v10.8h, v11.8h}, [%x[p]], #0x40\n\t"
|
||||
"st4 {v12.8h, v13.8h, v14.8h, v15.8h}, [%x[p]], #0x40\n\t"
|
||||
: [p] "+r" (p)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
|
||||
: [L_mlkem_aarch64_q] "i" (L_mlkem_aarch64_q)
|
||||
: "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
|
||||
"v18", "v19", "v20"
|
||||
|
@ -7195,14 +7143,7 @@ void mlkem_add_reduce(sword16* r, const sword16* a)
|
|||
"st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
|
||||
"st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
|
||||
: [r] "+r" (r), [a] "+r" (a)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
|
||||
: [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
|
||||
: "memory", "cc", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
|
||||
"v18"
|
||||
|
@ -7417,14 +7358,7 @@ void mlkem_add3_reduce(sword16* r, const sword16* a, const sword16* b)
|
|||
"st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
|
||||
"st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
|
||||
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
|
||||
: [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
|
||||
: "memory", "cc", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
|
||||
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26"
|
||||
|
@ -7599,14 +7533,7 @@ void mlkem_rsub_reduce(sword16* r, const sword16* a)
|
|||
"st4 {v1.8h, v2.8h, v3.8h, v4.8h}, [%x[r]], #0x40\n\t"
|
||||
"st4 {v5.8h, v6.8h, v7.8h, v8.8h}, [%x[r]], #0x40\n\t"
|
||||
: [r] "+r" (r), [a] "+r" (a)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
|
||||
: [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
|
||||
: "memory", "cc", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
|
||||
"v18"
|
||||
|
@ -7803,14 +7730,7 @@ void mlkem_to_mont(sword16* p)
|
|||
"st4 {v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t"
|
||||
"st4 {v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t"
|
||||
: [p] "+r" (p)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
|
||||
: [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
|
||||
: "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
|
||||
"v18"
|
||||
|
@ -7976,14 +7896,7 @@ void mlkem_to_mont_sqrdmlsh(sword16* p)
|
|||
"st4 {v9.8h, v10.8h, v11.8h, v12.8h}, [%x[p]], #0x40\n\t"
|
||||
"st4 {v13.8h, v14.8h, v15.8h, v16.8h}, [%x[p]], #0x40\n\t"
|
||||
: [p] "+r" (p)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul)
|
||||
: [%[L_mlkem_aarch64_consts]] "i" (%[L_mlkem_aarch64_consts])
|
||||
: "memory", "cc", "x1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
|
||||
"v18"
|
||||
|
@ -8231,17 +8144,9 @@ void mlkem_to_msg_neon(byte* msg, sword16* p)
|
|||
"ins v18.b[7], v25.b[0]\n\t"
|
||||
"st1 {v18.8b}, [%x[msg]], #8\n\t"
|
||||
: [msg] "+r" (msg), [p] "+r" (p)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
|
||||
[L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
|
||||
[L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
|
||||
[L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits)
|
||||
: [L_mlkem_to_msg_low] "i" (L_mlkem_to_msg_low),
|
||||
[L_mlkem_to_msg_high] "i" (L_mlkem_to_msg_high),
|
||||
[L_mlkem_to_msg_bits] "i" (L_mlkem_to_msg_bits)
|
||||
: "memory", "cc", "x2", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5",
|
||||
"v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
|
||||
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24",
|
||||
|
@ -8415,19 +8320,8 @@ void mlkem_from_msg_neon(sword16* p, const byte* msg)
|
|||
"and v7.16b, v7.16b, v1.16b\n\t"
|
||||
"st1 {v4.8h, v5.8h, v6.8h, v7.8h}, [%x[p]], #0x40\n\t"
|
||||
: [p] "+r" (p), [msg] "+r" (msg)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
|
||||
[L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
|
||||
[L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
|
||||
[L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
|
||||
[L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
|
||||
[L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits)
|
||||
: [L_mlkem_from_msg_q1half] "i" (L_mlkem_from_msg_q1half),
|
||||
[L_mlkem_from_msg_bits] "i" (L_mlkem_from_msg_bits)
|
||||
: "memory", "cc", "x2", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
|
||||
"v7", "v8", "v9", "v10", "v11"
|
||||
);
|
||||
|
@ -8693,19 +8587,7 @@ int mlkem_cmp_neon(const byte* a, const byte* b, int sz)
|
|||
"subs x0, x0, xzr\n\t"
|
||||
"csetm w0, ne\n\t"
|
||||
: [a] "+r" (a), [b] "+r" (b), [sz] "+r" (sz)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
|
||||
[L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
|
||||
[L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
|
||||
[L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
|
||||
[L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
|
||||
[L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits)
|
||||
:
|
||||
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
|
||||
"v9", "v10", "v11"
|
||||
);
|
||||
|
@ -9410,22 +9292,10 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r,
|
|||
"L_mlkem_rej_uniform_done_%=: \n\t"
|
||||
"mov x0, x12\n\t"
|
||||
: [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
|
||||
[L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
|
||||
[L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
|
||||
[L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
|
||||
[L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
|
||||
[L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
|
||||
[L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
|
||||
[L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
|
||||
[L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
|
||||
: [%[L_mlkem_aarch64_q]] "i" (%[L_mlkem_aarch64_q]),
|
||||
[L_mlkem_rej_uniform_mask] "i" (L_mlkem_rej_uniform_mask),
|
||||
[L_mlkem_rej_uniform_bits] "i" (L_mlkem_rej_uniform_bits),
|
||||
[L_mlkem_rej_uniform_indices] "i" (L_mlkem_rej_uniform_indices)
|
||||
: "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11",
|
||||
"x12", "x13", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
|
||||
"v9", "v10", "v11", "v12", "v13"
|
||||
|
@ -9433,6 +9303,21 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r,
|
|||
return (word32)(size_t)p;
|
||||
}
|
||||
|
||||
static const word64 L_sha3_aarch64_r[] = {
|
||||
0x0000000000000001, 0x0000000000008082,
|
||||
0x800000000000808a, 0x8000000080008000,
|
||||
0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009,
|
||||
0x000000000000008a, 0x0000000000000088,
|
||||
0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b,
|
||||
0x8000000000008089, 0x8000000000008003,
|
||||
0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a,
|
||||
0x8000000080008081, 0x8000000000008080,
|
||||
0x0000000080000001, 0x8000000080008008,
|
||||
};
|
||||
|
||||
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
|
||||
void mlkem_sha3_blocksx3_neon(word64* state)
|
||||
{
|
||||
|
@ -9728,22 +9613,7 @@ void mlkem_sha3_blocksx3_neon(word64* state)
|
|||
"str x26, [%x[state], #192]\n\t"
|
||||
"ldp x29, x30, [sp], #0x40\n\t"
|
||||
: [state] "+r" (state)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
|
||||
[L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
|
||||
[L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
|
||||
[L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
|
||||
[L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
|
||||
[L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
|
||||
[L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
|
||||
[L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
|
||||
[L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
|
||||
: [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
|
||||
: "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
|
||||
"x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19",
|
||||
"x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0",
|
||||
|
@ -10070,22 +9940,7 @@ void mlkem_shake128_blocksx3_seed_neon(word64* state, byte* seed)
|
|||
"str x27, [%x[state], #192]\n\t"
|
||||
"ldp x29, x30, [sp], #0x40\n\t"
|
||||
: [state] "+r" (state), [seed] "+r" (seed)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
|
||||
[L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
|
||||
[L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
|
||||
[L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
|
||||
[L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
|
||||
[L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
|
||||
[L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
|
||||
[L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
|
||||
[L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
|
||||
: [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
|
||||
: "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
|
||||
"x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
|
||||
"x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1",
|
||||
|
@ -10412,22 +10267,7 @@ void mlkem_shake256_blocksx3_seed_neon(word64* state, byte* seed)
|
|||
"str x27, [%x[state], #192]\n\t"
|
||||
"ldp x29, x30, [sp], #0x40\n\t"
|
||||
: [state] "+r" (state), [seed] "+r" (seed)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
|
||||
[L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
|
||||
[L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
|
||||
[L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
|
||||
[L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
|
||||
[L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
|
||||
[L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
|
||||
[L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
|
||||
[L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
|
||||
: [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
|
||||
: "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
|
||||
"x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
|
||||
"x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1",
|
||||
|
@ -10818,22 +10658,7 @@ void mlkem_sha3_blocksx3_neon(word64* state)
|
|||
"str x26, [%x[state], #192]\n\t"
|
||||
"ldp x29, x30, [sp], #0x40\n\t"
|
||||
: [state] "+r" (state)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
|
||||
[L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
|
||||
[L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
|
||||
[L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
|
||||
[L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
|
||||
[L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
|
||||
[L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
|
||||
[L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
|
||||
[L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
|
||||
: [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
|
||||
: "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
|
||||
"x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19",
|
||||
"x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0",
|
||||
|
@ -11245,22 +11070,7 @@ void mlkem_shake128_blocksx3_seed_neon(word64* state, byte* seed)
|
|||
"str x27, [%x[state], #192]\n\t"
|
||||
"ldp x29, x30, [sp], #0x40\n\t"
|
||||
: [state] "+r" (state), [seed] "+r" (seed)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
|
||||
[L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
|
||||
[L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
|
||||
[L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
|
||||
[L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
|
||||
[L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
|
||||
[L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
|
||||
[L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
|
||||
[L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
|
||||
: [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
|
||||
: "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
|
||||
"x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
|
||||
"x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1",
|
||||
|
@ -11672,22 +11482,7 @@ void mlkem_shake256_blocksx3_seed_neon(word64* state, byte* seed)
|
|||
"str x27, [%x[state], #192]\n\t"
|
||||
"ldp x29, x30, [sp], #0x40\n\t"
|
||||
: [state] "+r" (state), [seed] "+r" (seed)
|
||||
: [L_mlkem_aarch64_q] "S" (L_mlkem_aarch64_q),
|
||||
[L_mlkem_aarch64_consts] "S" (L_mlkem_aarch64_consts),
|
||||
[L_sha3_aarch64_r] "S" (L_sha3_aarch64_r),
|
||||
[L_mlkem_aarch64_zetas] "S" (L_mlkem_aarch64_zetas),
|
||||
[L_mlkem_aarch64_zetas_qinv] "S" (L_mlkem_aarch64_zetas_qinv),
|
||||
[L_mlkem_aarch64_zetas_inv] "S" (L_mlkem_aarch64_zetas_inv),
|
||||
[L_mlkem_aarch64_zetas_inv_qinv] "S" (L_mlkem_aarch64_zetas_inv_qinv),
|
||||
[L_mlkem_aarch64_zetas_mul] "S" (L_mlkem_aarch64_zetas_mul),
|
||||
[L_mlkem_to_msg_low] "S" (L_mlkem_to_msg_low),
|
||||
[L_mlkem_to_msg_high] "S" (L_mlkem_to_msg_high),
|
||||
[L_mlkem_to_msg_bits] "S" (L_mlkem_to_msg_bits),
|
||||
[L_mlkem_from_msg_q1half] "S" (L_mlkem_from_msg_q1half),
|
||||
[L_mlkem_from_msg_bits] "S" (L_mlkem_from_msg_bits),
|
||||
[L_mlkem_rej_uniform_mask] "S" (L_mlkem_rej_uniform_mask),
|
||||
[L_mlkem_rej_uniform_bits] "S" (L_mlkem_rej_uniform_bits),
|
||||
[L_mlkem_rej_uniform_indices] "S" (L_mlkem_rej_uniform_indices)
|
||||
: [L_sha3_aarch64_r] "i" (L_sha3_aarch64_r)
|
||||
: "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
|
||||
"x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
|
||||
"x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1",
|
||||
|
|
|
@ -49,7 +49,7 @@ static WC_INLINE void poly1305_blocks_aarch64_16(Poly1305* ctx,
|
|||
__asm__ __volatile__ (
|
||||
/* Check for zero bytes to do. */
|
||||
"CMP %[bytes], #16 \n\t"
|
||||
"BLO L_poly1305_aarch64_16_done_%= \n\t"
|
||||
"B.LO L_poly1305_aarch64_16_done_%= \n\t"
|
||||
|
||||
"MOV x12, #1 \n\t"
|
||||
/* Load h */
|
||||
|
@ -129,7 +129,7 @@ static WC_INLINE void poly1305_blocks_aarch64_16(Poly1305* ctx,
|
|||
|
||||
"SUBS %[bytes], %[bytes], #16\n\t"
|
||||
"ADD %[m], %[m], #16\n\t"
|
||||
"BGT L_poly1305_aarch64_16_loop_%=\n\t"
|
||||
"B.GT L_poly1305_aarch64_16_loop_%=\n\t"
|
||||
|
||||
/* Base 64 -> Base 26 */
|
||||
"MOV x10, #0x3ffffff\n\t"
|
||||
|
@ -146,8 +146,7 @@ static WC_INLINE void poly1305_blocks_aarch64_16(Poly1305* ctx,
|
|||
".align 2 \n\t"
|
||||
"L_poly1305_aarch64_16_done_%=: \n\t"
|
||||
: [bytes] "+r" (bytes), [m] "+r" (m)
|
||||
: [POLY1305_BLOCK_SIZE] "I" (POLY1305_BLOCK_SIZE),
|
||||
[ctx_r64] "m" (ctx->r64[0]), [ctx_h] "r" (ctx->h),
|
||||
: [ctx_r64] "m" (ctx->r64[0]), [ctx_h] "r" (ctx->h),
|
||||
[finished] "r" ((word64)ctx->finished)
|
||||
: "memory", "cc",
|
||||
"x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
|
||||
|
@ -161,7 +160,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
|
|||
__asm__ __volatile__ (
|
||||
/* If less than 4 blocks to process then use regular method */
|
||||
"CMP %[bytes], #64 \n\t"
|
||||
"BLO L_poly1305_aarch64_64_done_%= \n\t"
|
||||
"B.LO L_poly1305_aarch64_64_done_%= \n\t"
|
||||
"MOV x9, #0x3ffffff \n\t"
|
||||
/* Load h */
|
||||
"LDP x20, x22, [%[h]] \n\t"
|
||||
|
@ -189,7 +188,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
|
|||
"MOV v26.D[1], x9 \n\t"
|
||||
"DUP v30.4S, v26.S[0] \n\t"
|
||||
"CMP %[bytes], #96 \n\t"
|
||||
"BLO L_poly1305_aarch64_64_start_block_size_64_%= \n\t"
|
||||
"B.LO L_poly1305_aarch64_64_start_block_size_64_%= \n\t"
|
||||
/* Load r^2 to NEON v0, v1, v2, v3, v4 */
|
||||
"LD4 { v0.S-v3.S }[2], [%[r_2]], #16 \n\t"
|
||||
"LD1 { v4.S }[2], [%[r_2]] \n\t"
|
||||
|
@ -363,7 +362,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
|
|||
"UMLAL2 v25.2D, v14.4S, v0.4S \n\t"
|
||||
/* If less than six message blocks left then leave loop */
|
||||
"CMP %[bytes], #96 \n\t"
|
||||
"BLS L_poly1305_aarch64_64_loop_128_final_%= \n\t"
|
||||
"B.LS L_poly1305_aarch64_64_loop_128_final_%= \n\t"
|
||||
/* Load m */
|
||||
/* Load four message blocks to NEON v10, v11, v12, v13, v14 */
|
||||
"LD4 { v10.4S-v13.4S }, [%[m]], #64 \n\t"
|
||||
|
@ -493,7 +492,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
|
|||
"MOV v19.S[1], v19.S[2] \n\t"
|
||||
/* If less than 2 blocks left go straight to final multiplication. */
|
||||
"CMP %[bytes], #32 \n\t"
|
||||
"BLO L_poly1305_aarch64_64_last_mult_%= \n\t"
|
||||
"B.LO L_poly1305_aarch64_64_last_mult_%= \n\t"
|
||||
/* Else go to one loop of L_poly1305_aarch64_64_loop_64 */
|
||||
"B L_poly1305_aarch64_64_loop_64_%= \n\t"
|
||||
"\n"
|
||||
|
@ -677,7 +676,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
|
|||
"MOV v19.S[1], v19.S[2] \n\t"
|
||||
/* If at least two message blocks left then loop_64 */
|
||||
"CMP %[bytes], #32 \n\t"
|
||||
"BHS L_poly1305_aarch64_64_loop_64_%= \n\t"
|
||||
"B.HS L_poly1305_aarch64_64_loop_64_%= \n\t"
|
||||
"\n"
|
||||
".align 2 \n\t"
|
||||
"L_poly1305_aarch64_64_last_mult_%=: \n\t"
|
||||
|
@ -821,8 +820,7 @@ void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
|
|||
: [bytes] "+r" (bytes),
|
||||
[m] "+r" (m),
|
||||
[ctx] "+m" (ctx)
|
||||
: [POLY1305_BLOCK_SIZE] "I" (POLY1305_BLOCK_SIZE),
|
||||
[h] "r" (ctx->h),
|
||||
: [h] "r" (ctx->h),
|
||||
[r] "r" (ctx->r),
|
||||
[r_2] "r" (ctx->r_2),
|
||||
[r_4] "r" (ctx->r_4),
|
||||
|
|
|
@ -162,7 +162,7 @@ void BlockSha3_crypto(word64* state)
|
|||
"st4 {v20.d, v21.d, v22.d, v23.d}[0], [%x[state]], #32\n\t"
|
||||
"st1 {v24.1d}, [%x[state]]\n\t"
|
||||
: [state] "+r" (state)
|
||||
: [L_SHA3_transform_crypto_r] "S" (L_SHA3_transform_crypto_r)
|
||||
: [L_SHA3_transform_crypto_r] "i" (L_SHA3_transform_crypto_r)
|
||||
: "memory", "cc", "x1", "x2", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
|
||||
"v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16",
|
||||
"v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
||||
|
@ -369,7 +369,7 @@ void BlockSha3_base(word64* state)
|
|||
"str x26, [%x[state], #192]\n\t"
|
||||
"ldp x29, x30, [sp], #0x40\n\t"
|
||||
: [state] "+r" (state)
|
||||
: [L_SHA3_transform_base_r] "S" (L_SHA3_transform_base_r)
|
||||
: [L_SHA3_transform_base_r] "i" (L_SHA3_transform_base_r)
|
||||
: "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
|
||||
"x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19",
|
||||
"x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"
|
||||
|
|
|
@ -1004,8 +1004,8 @@ void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len)
|
|||
"stp x8, x9, [%x[sha512], #32]\n\t"
|
||||
"stp x10, x11, [%x[sha512], #48]\n\t"
|
||||
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
|
||||
: [L_SHA512_transform_neon_len_k] "S" (L_SHA512_transform_neon_len_k),
|
||||
[L_SHA512_transform_neon_len_r8] "S" (L_SHA512_transform_neon_len_r8)
|
||||
: [L_SHA512_transform_neon_len_k] "i" (L_SHA512_transform_neon_len_k),
|
||||
[L_SHA512_transform_neon_len_r8] "i" (L_SHA512_transform_neon_len_r8)
|
||||
: "memory", "cc", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
|
||||
"x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20",
|
||||
"x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2",
|
||||
|
@ -1580,7 +1580,7 @@ void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data,
|
|||
/* Store digest back */
|
||||
"st1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[sha512]]\n\t"
|
||||
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
|
||||
: [L_SHA512_trans_crypto_len_k] "S" (L_SHA512_trans_crypto_len_k)
|
||||
: [L_SHA512_trans_crypto_len_k] "i" (L_SHA512_trans_crypto_len_k)
|
||||
: "memory", "cc", "x3", "x4", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
|
||||
"v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16",
|
||||
"v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
||||
|
|
|
@ -647,9 +647,7 @@ static int Sha512_Family_Final(wc_Sha512* sha512, byte* hash,
|
|||
{
|
||||
int ret;
|
||||
int digestSz;
|
||||
int (*initfp)(wc_Sha512*);
|
||||
|
||||
(void)initfp;
|
||||
int (*initfp)(wc_Sha512*) = NULL;
|
||||
|
||||
if (sha512 == NULL || hash == NULL) {
|
||||
return BAD_FUNC_ARG;
|
||||
|
|
Loading…
Reference in New Issue