mirror of https://github.com/wolfSSL/wolfssl.git
Kyber ARM32 ASM: add assembly using base instructions
Support ARMv4 up to ARMv8. Base instructions only - faster implemenation will use NEON.pull/8040/head
parent
d0d802a2df
commit
f7afc47d98
|
@ -1199,7 +1199,13 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-kyber-asm_
|
|||
else
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-kyber-asm.S
|
||||
endif !BUILD_ARMASM_INLINE
|
||||
endif BUILD_ARM_THUMB
|
||||
else
|
||||
if BUILD_ARMASM_INLINE
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-kyber-asm_c.c
|
||||
else
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-kyber-asm.S
|
||||
endif !BUILD_ARMASM_INLINE
|
||||
endif !BUILD_ARM_THUMB
|
||||
endif BUILD_ARMASM
|
||||
if !BUILD_X86_ASM
|
||||
if BUILD_INTELASM
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -218,7 +218,7 @@ L_poly1305_arm32_16_loop:
|
|||
# Load length
|
||||
ldr r2, [sp, #20]
|
||||
# Reduce mod 2^130 - 5
|
||||
bic r3, r8, #3
|
||||
bic r3, r8, #0x3
|
||||
and r8, r8, #3
|
||||
adds r4, r4, r3
|
||||
lsr r3, r3, #2
|
||||
|
|
|
@ -243,7 +243,7 @@ void poly1305_blocks_arm32_16(Poly1305* ctx_p, const byte* m_p, word32 len_p,
|
|||
/* Load length */
|
||||
"ldr %[len], [sp, #20]\n\t"
|
||||
/* Reduce mod 2^130 - 5 */
|
||||
"bic %[notLast], r8, #3\n\t"
|
||||
"bic %[notLast], r8, #0x3\n\t"
|
||||
"and r8, r8, #3\n\t"
|
||||
"adds r4, r4, %[notLast]\n\t"
|
||||
"lsr %[notLast], %[notLast], #2\n\t"
|
||||
|
|
|
@ -182,7 +182,7 @@ const sword16 zetas_inv[KYBER_N / 2] = {
|
|||
"SSUB16 " #a ", " #a ", r10\n\t"
|
||||
|
||||
|
||||
#if !(defined(__thumb__) || (defined(__aarch64__)) && defined(WOLFSSL_ARMASM))
|
||||
#if !defined(WOLFSSL_ARMASM)
|
||||
/* Number-Theoretic Transform.
|
||||
*
|
||||
* @param [in, out] r Polynomial to transform.
|
||||
|
@ -2154,7 +2154,7 @@ int kyber_kdf(byte* seed, int seedLen, byte* out, int outLen)
|
|||
}
|
||||
#endif
|
||||
|
||||
#if !(defined(WOLFSSL_ARMASM) && (defined(__aarch64__) || defined(__thumb__)))
|
||||
#if !defined(WOLFSSL_ARMASM)
|
||||
/* Rejection sampling on uniform random bytes to generate uniform random
|
||||
* integers mod q.
|
||||
*
|
||||
|
@ -3350,7 +3350,7 @@ int kyber_cmp(const byte* a, const byte* b, int sz)
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
#if !(defined(__thumb__) || (defined(__aarch64__)) && defined(WOLFSSL_ARMASM))
|
||||
#if !defined(WOLFSSL_ARMASM)
|
||||
|
||||
/* Conditional subtraction of q to each coefficient of a polynomial.
|
||||
*
|
||||
|
@ -3371,10 +3371,14 @@ static KYBER_NOINLINE void kyber_csubq_c(sword16* p)
|
|||
|
||||
#define kyber_csubq_c kyber_csubq_neon
|
||||
|
||||
#else
|
||||
#elif defined(__thumb__)
|
||||
|
||||
#define kyber_csubq_c kyber_thumb2_csubq
|
||||
|
||||
#else
|
||||
|
||||
#define kyber_csubq_c kyber_arm32_csubq
|
||||
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
|
|
|
@ -320,12 +320,28 @@ WOLFSSL_LOCAL void kyber_to_msg_neon(byte* msg, sword16* p);
|
|||
WOLFSSL_LOCAL void kyber_thumb2_ntt(sword16* r);
|
||||
WOLFSSL_LOCAL void kyber_thumb2_invntt(sword16* r);
|
||||
WOLFSSL_LOCAL void kyber_thumb2_basemul_mont(sword16* r, const sword16* a,
|
||||
const sword16* b);
|
||||
const sword16* b);
|
||||
WOLFSSL_LOCAL void kyber_thumb2_basemul_mont_add(sword16* r, const sword16* a,
|
||||
const sword16* b);
|
||||
const sword16* b);
|
||||
WOLFSSL_LOCAL void kyber_thumb2_csubq(sword16* p);
|
||||
WOLFSSL_LOCAL unsigned int kyber_thumb2_rej_uniform(sword16* p,
|
||||
unsigned int len, const byte* r, unsigned int rLen);
|
||||
#elif defined(WOLFSSL_ARMASM)
|
||||
#define kyber_ntt kyber_arm32_ntt
|
||||
#define kyber_invntt kyber_arm32_invntt
|
||||
#define kyber_basemul_mont kyber_arm32_basemul_mont
|
||||
#define kyber_basemul_mont_add kyber_arm32_basemul_mont_add
|
||||
#define kyber_rej_uniform_c kyber_arm32_rej_uniform
|
||||
|
||||
WOLFSSL_LOCAL void kyber_arm32_ntt(sword16* r);
|
||||
WOLFSSL_LOCAL void kyber_arm32_invntt(sword16* r);
|
||||
WOLFSSL_LOCAL void kyber_arm32_basemul_mont(sword16* r, const sword16* a,
|
||||
const sword16* b);
|
||||
WOLFSSL_LOCAL void kyber_arm32_basemul_mont_add(sword16* r, const sword16* a,
|
||||
const sword16* b);
|
||||
WOLFSSL_LOCAL void kyber_arm32_csubq(sword16* p);
|
||||
WOLFSSL_LOCAL unsigned int kyber_arm32_rej_uniform(sword16* p, unsigned int len,
|
||||
const byte* r, unsigned int rLen);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
Loading…
Reference in New Issue