AES ARM32 and Thumb2 ASM: fixup ARM32 and add Thumb2

Fix which functions and data are compiled in depending on defines.
Better handing of constants.
Also fix Aarch64 ed25519 inline assembly.
pull/6731/head
Sean Parkinson 2023-08-25 13:40:15 +10:00
parent 70c362f680
commit 0638ec234b
22 changed files with 6511 additions and 652 deletions

View File

@ -2268,7 +2268,7 @@ AC_ARG_ENABLE([aescbc],
if test "$ENABLED_AESCBC" = "no"
then
AM_CFLAGS="$AM_CFLAGS -DNO_AES_CBC"
AM_CCASFLAGS="$AM_CCASFLAGS -DHAVE_AES_CBC"
AM_CCASFLAGS="$AM_CCASFLAGS -DNO_AES_CBC"
fi
# AES-CBC length checks (checks that input lengths are multiples of block size)

View File

@ -157,8 +157,8 @@ endif
if BUILD_AES
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes.c
if BUILD_ARMASM_NEON
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-aes.c
if BUILD_ARMASM_NEON
if !BUILD_ARMASM_CRYPTO
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c
@ -166,7 +166,15 @@ else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm.S
endif !BUILD_ARMASM_INLINE
endif !BUILD_ARMASM_CRYPTO
endif BUILD_ARMASM_NEON
else
if BUILD_ARMASM
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S
endif !BUILD_ARMASM_INLINE
endif BUILD_ARMASM
endif !BUILD_ARMASM_NEON
endif BUILD_AES
if BUILD_AESNI
@ -401,16 +409,26 @@ endif
if !BUILD_FIPS_CURRENT
if BUILD_AES
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes.c
if BUILD_ARMASM_NEON
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-aes.c
if BUILD_ARMASM_NEON
if !BUILD_ARMASM_CRYPTO
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S
endif !BUILD_ARMASM_INLINE
endif !BUILD_ARMASM_CRYPTO
endif BUILD_ARMASM_NEON
else
if BUILD_ARMASM
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S
endif !BUILD_ARMASM_INLINE
endif BUILD_ARMASM
endif !BUILD_ARMASM_NEON
if BUILD_AFALG
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/af_alg/afalg_aes.c
endif BUILD_AFALG

View File

@ -97,7 +97,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
#include <wolfcrypt/src/misc.c>
#endif
#if !defined(WOLFSSL_ARMASM) || defined(WOLFSSL_ARMASM_NO_NEON)
#ifndef WOLFSSL_ARMASM
#ifdef WOLFSSL_IMX6_CAAM_BLOB
/* case of possibly not using hardware acceleration for AES but using key
@ -4573,7 +4573,7 @@ int wc_AesSetIV(Aes* aes, const byte* iv)
#endif /* NEED_AES_CTR_SOFT */
#endif /* WOLFSSL_AES_COUNTER */
#endif /* !WOLFSSL_ARMASM || WOLFSSL_ARMASM_NO_NEON */
#endif /* !WOLFSSL_ARMASM */
/*
@ -4620,7 +4620,7 @@ static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz)
#endif
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON)
#ifdef WOLFSSL_ARMASM
/* implementation is located in wolfcrypt/src/port/arm/armv8-aes.c */
#elif defined(WOLFSSL_AFALG)
@ -8851,7 +8851,7 @@ int wc_AesCcmCheckTagSize(int sz)
return 0;
}
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON)
#ifdef WOLFSSL_ARMASM
/* implementation located in wolfcrypt/src/port/arm/armv8-aes.c */
#elif defined(HAVE_COLDFIRE_SEC)

View File

@ -921,10 +921,14 @@ int ge_compress_key(byte* out, const byte* xIn, const byte* yIn, word32 keySz)
{
ge_p2 g;
byte bArray[ED25519_KEY_SIZE];
byte x[ED25519_KEY_SIZE];
byte y[ED25519_KEY_SIZE];
word32 i;
fe_frombytes(g.X, xIn);
fe_frombytes(g.Y, yIn);
XMEMCPY(x, xIn, ED25519_KEY_SIZE);
XMEMCPY(y, yIn, ED25519_KEY_SIZE);
fe_frombytes(g.X, x);
fe_frombytes(g.Y, y);
fe_1(g.Z);
ge_tobytes(bArray, &g);

View File

@ -33,6 +33,7 @@
#if !defined(__aarch64__) && defined(__arm__)
#ifndef WOLFSSL_ARMASM_INLINE
#ifndef NO_AES
#ifdef HAVE_AES_DECRYPT
.text
.type L_AES_ARM32_td_data, %object
.size L_AES_ARM32_td_data, 1024
@ -294,6 +295,8 @@ L_AES_ARM32_td_data:
.word 0x70d532b6
.word 0x74486c5c
.word 0x42d0b857
#endif /* HAVE_AES_DECRYPT */
#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
.text
.type L_AES_ARM32_te_data, %object
.size L_AES_ARM32_te_data, 1024
@ -555,18 +558,23 @@ L_AES_ARM32_te_data:
.word 0xfca85454
.word 0xd66dbbbb
.word 0x3a2c1616
#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#ifdef HAVE_AES_DECRYPT
.text
.type L_AES_ARM32_td, %object
.size L_AES_ARM32_td, 12
.align 4
L_AES_ARM32_td:
.word L_AES_ARM32_td_data
#endif /* HAVE_AES_DECRYPT */
#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
.text
.type L_AES_ARM32_te, %object
.size L_AES_ARM32_te, 12
.align 4
L_AES_ARM32_te:
.word L_AES_ARM32_te_data
#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#ifdef HAVE_AES_DECRYPT
.text
.align 4
@ -574,8 +582,10 @@ L_AES_ARM32_te:
.type AES_invert_key, %function
AES_invert_key:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
ldr r12, L_AES_ARM32_te
ldr lr, L_AES_ARM32_td
adr r12, L_AES_ARM32_te
ldr r12, [r12]
adr lr, L_AES_ARM32_td
ldr lr, [lr]
add r10, r0, r1, lsl #4
mov r11, r1
L_AES_invert_key_loop:
@ -681,7 +691,8 @@ L_AES_ARM32_rcon:
.type AES_set_encrypt_key, %function
AES_set_encrypt_key:
push {r4, r5, r6, r7, r8, lr}
ldr r8, L_AES_ARM32_te
adr r8, L_AES_ARM32_te
ldr r8, [r8]
adr lr, L_AES_ARM32_rcon
cmp r1, #0x80
beq L_AES_set_encrypt_key_start_128
@ -911,7 +922,6 @@ L_AES_set_encrypt_key_loop_128:
L_AES_set_encrypt_key_end:
pop {r4, r5, r6, r7, r8, pc}
.size AES_set_encrypt_key,.-AES_set_encrypt_key
#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
.text
.align 4
.globl AES_encrypt_block
@ -1123,12 +1133,14 @@ L_AES_encrypt_block_nr:
eor r7, r7, r11
pop {pc}
.size AES_encrypt_block,.-AES_encrypt_block
#if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
.text
.type L_AES_ARM32_te_ecb, %object
.size L_AES_ARM32_te_ecb, 12
.align 4
L_AES_ARM32_te_ecb:
.word L_AES_ARM32_te_data
#endif /* HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
.text
.align 4
@ -1137,7 +1149,8 @@ L_AES_ARM32_te_ecb:
AES_ECB_encrypt:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
mov lr, r0
ldr r0, L_AES_ARM32_te_ecb
adr r0, L_AES_ARM32_te_ecb
ldr r0, [r0]
ldr r12, [sp, #36]
push {r3}
cmp r12, #10
@ -1259,7 +1272,8 @@ AES_CBC_encrypt:
ldr r8, [sp, #36]
ldr r9, [sp, #40]
mov lr, r0
ldr r0, L_AES_ARM32_te_ecb
adr r0, L_AES_ARM32_te_ecb
ldr r0, [r0]
ldm r9, {r4, r5, r6, r7}
push {r3, r9}
cmp r8, #10
@ -1394,7 +1408,8 @@ AES_CTR_encrypt:
ldr r12, [sp, #36]
ldr r8, [sp, #40]
mov lr, r0
ldr r0, L_AES_ARM32_te_ecb
adr r0, L_AES_ARM32_te_ecb
ldr r0, [r0]
ldm r8, {r4, r5, r6, r7}
rev r4, r4
rev r5, r5
@ -1540,7 +1555,6 @@ L_AES_CTR_encrypt_end:
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.size AES_CTR_encrypt,.-AES_CTR_encrypt
#endif /* WOLFSSL_AES_COUNTER */
#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#ifdef HAVE_AES_DECRYPT
#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC)
.text
@ -2030,7 +2044,8 @@ AES_ECB_decrypt:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
ldr r8, [sp, #36]
mov lr, r0
ldr r0, L_AES_ARM32_td_ecb
adr r0, L_AES_ARM32_td_ecb
ldr r0, [r0]
adr r12, L_AES_ARM32_td4
cmp r8, #10
beq L_AES_ECB_decrypt_start_block_128
@ -2147,7 +2162,8 @@ AES_CBC_decrypt:
ldr r8, [sp, #36]
ldr r4, [sp, #40]
mov lr, r0
ldr r0, L_AES_ARM32_td_ecb
adr r0, L_AES_ARM32_td_ecb
ldr r0, [r0]
adr r12, L_AES_ARM32_td4
push {r3, r4}
cmp r8, #10
@ -3118,7 +3134,8 @@ AES_GCM_encrypt:
ldr r12, [sp, #36]
ldr r8, [sp, #40]
mov lr, r0
ldr r0, L_AES_ARM32_te_gcm
adr r0, L_AES_ARM32_te_gcm
ldr r0, [r0]
ldm r8, {r4, r5, r6, r7}
rev r4, r4
rev r5, r5

View File

@ -28,6 +28,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
@ -36,10 +37,12 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM_INLINE
#ifndef NO_AES
#include <wolfssl/wolfcrypt/aes.h>
#ifdef HAVE_AES_DECRYPT
static const uint32_t L_AES_ARM32_td_data[] = {
0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
@ -107,6 +110,8 @@ static const uint32_t L_AES_ARM32_td_data[] = {
0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
};
#endif /* HAVE_AES_DECRYPT */
#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
static const uint32_t L_AES_ARM32_te_data[] = {
0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
@ -174,18 +179,25 @@ static const uint32_t L_AES_ARM32_te_data[] = {
0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
};
#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#ifdef HAVE_AES_DECRYPT
static const uint32_t* L_AES_ARM32_td = L_AES_ARM32_td_data;
#endif /* HAVE_AES_DECRYPT */
#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
static const uint32_t* L_AES_ARM32_te = L_AES_ARM32_te_data;
#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#ifdef HAVE_AES_DECRYPT
void AES_invert_key(unsigned char* ks, word32 rounds);
void AES_invert_key(unsigned char* ks_p, word32 rounds_p)
{
register unsigned char* ks asm ("r0") = ks_p;
register word32 rounds asm ("r1") = rounds_p;
register unsigned char* ks asm ("r0") = (unsigned char*)ks_p;
register word32 rounds asm ("r1") = (word32)rounds_p;
register uint32_t* L_AES_ARM32_te_c asm ("r2") = (uint32_t*)L_AES_ARM32_te;
register uint32_t* L_AES_ARM32_td_c asm ("r3") = (uint32_t*)L_AES_ARM32_td;
__asm__ __volatile__ (
"ldr r12, %[L_AES_ARM32_te]\n\t"
"ldr lr, %[L_AES_ARM32_td]\n\t"
"mov r12, %[L_AES_ARM32_te]\n\t"
"mov lr, %[L_AES_ARM32_td]\n\t"
"add r10, %[ks], %[rounds], lsl #4\n\t"
"mov r11, %[rounds]\n\t"
"\n"
@ -269,9 +281,9 @@ void AES_invert_key(unsigned char* ks_p, word32 rounds_p)
"str r8, [%[ks]], #4\n\t"
"subs r11, r11, #1\n\t"
"bne L_AES_invert_key_mix_loop_%=\n\t"
: [ks] "+r" (ks), [rounds] "+r" (rounds)
: [L_AES_ARM32_te] "g" (L_AES_ARM32_te), [L_AES_ARM32_td] "g" (L_AES_ARM32_td)
: "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
: [ks] "+r" (ks), [rounds] "+r" (rounds), [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), [L_AES_ARM32_td] "+r" (L_AES_ARM32_td_c)
:
: "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
);
}
@ -285,9 +297,11 @@ static const uint32_t L_AES_ARM32_rcon[] = {
void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks);
void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char* ks_p)
{
register const unsigned char* key asm ("r0") = key_p;
register word32 len asm ("r1") = len_p;
register unsigned char* ks asm ("r2") = ks_p;
register const unsigned char* key asm ("r0") = (const unsigned char*)key_p;
register word32 len asm ("r1") = (word32)len_p;
register unsigned char* ks asm ("r2") = (unsigned char*)ks_p;
register uint32_t* L_AES_ARM32_te_c asm ("r3") = (uint32_t*)L_AES_ARM32_te;
register uint32_t* L_AES_ARM32_rcon_c asm ("r4") = (uint32_t*)&L_AES_ARM32_rcon;
__asm__ __volatile__ (
"mov r8, %[L_AES_ARM32_te]\n\t"
@ -524,20 +538,19 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char
"bne L_AES_set_encrypt_key_loop_128_%=\n\t"
"\n"
"L_AES_set_encrypt_key_end_%=: \n\t"
: [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks)
: [L_AES_ARM32_te] "g" (L_AES_ARM32_te), [L_AES_ARM32_rcon] "g" (L_AES_ARM32_rcon)
: "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8"
: [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), [L_AES_ARM32_rcon] "+r" (L_AES_ARM32_rcon_c)
:
: "memory", "r12", "lr", "r5", "r6", "r7", "r8"
);
}
#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
void AES_encrypt_block(const uint32_t* te, int nr, int len, const uint32_t* ks);
void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, const uint32_t* ks_p)
{
register const uint32_t* te asm ("r0") = te_p;
register int nr asm ("r1") = nr_p;
register int len asm ("r2") = len_p;
register const uint32_t* ks asm ("r3") = ks_p;
register const uint32_t* te asm ("r0") = (const uint32_t*)te_p;
register int nr asm ("r1") = (int)nr_p;
register int len asm ("r2") = (int)len_p;
register const uint32_t* ks asm ("r3") = (const uint32_t*)ks_p;
__asm__ __volatile__ (
"\n"
@ -750,20 +763,23 @@ void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, const uint32_t
);
}
#if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
static const uint32_t* L_AES_ARM32_te_ecb = L_AES_ARM32_te_data;
#endif /* HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr);
void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p)
{
register const unsigned char* in asm ("r0") = in_p;
register unsigned char* out asm ("r1") = out_p;
register unsigned long len asm ("r2") = len_p;
register const unsigned char* ks asm ("r3") = ks_p;
register int nr asm ("r4") = nr_p;
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
register unsigned long len asm ("r2") = (unsigned long)len_p;
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
register int nr asm ("r4") = (int)nr_p;
register uint32_t* L_AES_ARM32_te_ecb_c asm ("r5") = (uint32_t*)L_AES_ARM32_te_ecb;
__asm__ __volatile__ (
"mov lr, %[in]\n\t"
"ldr r0, %[L_AES_ARM32_te_ecb]\n\t"
"mov r0, %[L_AES_ARM32_te_ecb]\n\t"
"mov r12, r4\n\t"
"push {%[ks]}\n\t"
"cmp r12, #10\n\t"
@ -878,9 +894,9 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"\n"
"L_AES_ECB_encrypt_end_%=: \n\t"
"pop {%[ks]}\n\t"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr)
: [L_AES_ARM32_te_ecb] "g" (L_AES_ARM32_te_ecb)
: "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c)
:
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
);
(void)nr;
}
@ -890,18 +906,19 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv);
void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p)
{
register const unsigned char* in asm ("r0") = in_p;
register unsigned char* out asm ("r1") = out_p;
register unsigned long len asm ("r2") = len_p;
register const unsigned char* ks asm ("r3") = ks_p;
register int nr asm ("r4") = nr_p;
register unsigned char* iv asm ("r5") = iv_p;
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
register unsigned long len asm ("r2") = (unsigned long)len_p;
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
register int nr asm ("r4") = (int)nr_p;
register unsigned char* iv asm ("r5") = (unsigned char*)iv_p;
register uint32_t* L_AES_ARM32_te_ecb_c asm ("r6") = (uint32_t*)L_AES_ARM32_te_ecb;
__asm__ __volatile__ (
"mov r8, r4\n\t"
"mov r9, r5\n\t"
"mov lr, %[in]\n\t"
"ldr r0, %[L_AES_ARM32_te_ecb]\n\t"
"mov r0, %[L_AES_ARM32_te_ecb]\n\t"
"ldm r9, {r4, r5, r6, r7}\n\t"
"push {%[ks], r9}\n\t"
"cmp r8, #10\n\t"
@ -1029,9 +1046,9 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"L_AES_CBC_encrypt_end_%=: \n\t"
"pop {%[ks], r9}\n\t"
"stm r9, {r4, r5, r6, r7}\n\t"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv)
: [L_AES_ARM32_te_ecb] "g" (L_AES_ARM32_te_ecb)
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c)
:
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
(void)nr;
(void)iv;
@ -1042,18 +1059,19 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr);
void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p)
{
register const unsigned char* in asm ("r0") = in_p;
register unsigned char* out asm ("r1") = out_p;
register unsigned long len asm ("r2") = len_p;
register const unsigned char* ks asm ("r3") = ks_p;
register int nr asm ("r4") = nr_p;
register unsigned char* ctr asm ("r5") = ctr_p;
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
register unsigned long len asm ("r2") = (unsigned long)len_p;
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
register int nr asm ("r4") = (int)nr_p;
register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p;
register uint32_t* L_AES_ARM32_te_ecb_c asm ("r6") = (uint32_t*)L_AES_ARM32_te_ecb;
__asm__ __volatile__ (
"mov r12, r4\n\t"
"mov r8, r5\n\t"
"mov lr, %[in]\n\t"
"ldr r0, %[L_AES_ARM32_te_ecb]\n\t"
"mov r0, %[L_AES_ARM32_te_ecb]\n\t"
"ldm r8, {r4, r5, r6, r7}\n\t"
"rev r4, r4\n\t"
"rev r5, r5\n\t"
@ -1202,23 +1220,22 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"rev r6, r6\n\t"
"rev r7, r7\n\t"
"stm r8, {r4, r5, r6, r7}\n\t"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr)
: [L_AES_ARM32_te_ecb] "g" (L_AES_ARM32_te_ecb)
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c)
:
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
(void)nr;
(void)ctr;
}
#endif /* WOLFSSL_AES_COUNTER */
#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
#ifdef HAVE_AES_DECRYPT
#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC)
void AES_decrypt_block(const uint32_t* td, int nr);
void AES_decrypt_block(const uint32_t* td_p, int nr_p)
{
register const uint32_t* td asm ("r0") = td_p;
register int nr asm ("r1") = nr_p;
register const uint32_t* td asm ("r0") = (const uint32_t*)td_p;
register int nr asm ("r1") = (int)nr_p;
__asm__ __volatile__ (
"\n"
@ -1471,17 +1488,19 @@ static const unsigned char L_AES_ARM32_td4[] = {
void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr);
void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p)
{
register const unsigned char* in asm ("r0") = in_p;
register unsigned char* out asm ("r1") = out_p;
register unsigned long len asm ("r2") = len_p;
register const unsigned char* ks asm ("r3") = ks_p;
register int nr asm ("r4") = nr_p;
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
register unsigned long len asm ("r2") = (unsigned long)len_p;
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
register int nr asm ("r4") = (int)nr_p;
register uint32_t* L_AES_ARM32_td_ecb_c asm ("r5") = (uint32_t*)L_AES_ARM32_td_ecb;
register unsigned char* L_AES_ARM32_td4_c asm ("r6") = (unsigned char*)&L_AES_ARM32_td4;
__asm__ __volatile__ (
"mov r8, r4\n\t"
"mov lr, %[in]\n\t"
"ldr r0, %[L_AES_ARM32_td_ecb]\n\t"
"ldr r12, %[L_AES_ARM32_td4]\n\t"
"mov r0, %[L_AES_ARM32_td_ecb]\n\t"
"mov r12, %[L_AES_ARM32_td4]\n\t"
"cmp r8, #10\n\t"
"beq L_AES_ECB_decrypt_start_block_128_%=\n\t"
"cmp r8, #12\n\t"
@ -1590,9 +1609,9 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"bne L_AES_ECB_decrypt_loop_block_128_%=\n\t"
"\n"
"L_AES_ECB_decrypt_end_%=: \n\t"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr)
: [L_AES_ARM32_td_ecb] "g" (L_AES_ARM32_td_ecb), [L_AES_ARM32_td4] "g" (L_AES_ARM32_td4)
: "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c)
:
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
(void)nr;
}
@ -1602,19 +1621,21 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv);
void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p)
{
register const unsigned char* in asm ("r0") = in_p;
register unsigned char* out asm ("r1") = out_p;
register unsigned long len asm ("r2") = len_p;
register const unsigned char* ks asm ("r3") = ks_p;
register int nr asm ("r4") = nr_p;
register unsigned char* iv asm ("r5") = iv_p;
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
register unsigned long len asm ("r2") = (unsigned long)len_p;
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
register int nr asm ("r4") = (int)nr_p;
register unsigned char* iv asm ("r5") = (unsigned char*)iv_p;
register uint32_t* L_AES_ARM32_td_ecb_c asm ("r6") = (uint32_t*)L_AES_ARM32_td_ecb;
register unsigned char* L_AES_ARM32_td4_c asm ("r7") = (unsigned char*)&L_AES_ARM32_td4;
__asm__ __volatile__ (
"mov r8, r4\n\t"
"mov r4, r5\n\t"
"mov lr, %[in]\n\t"
"ldr r0, %[L_AES_ARM32_td_ecb]\n\t"
"ldr r12, %[L_AES_ARM32_td4]\n\t"
"mov r0, %[L_AES_ARM32_td_ecb]\n\t"
"mov r12, %[L_AES_ARM32_td4]\n\t"
"push {%[ks]-r4}\n\t"
"cmp r8, #10\n\t"
"beq L_AES_CBC_decrypt_loop_block_128_%=\n\t"
@ -1992,9 +2013,9 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"\n"
"L_AES_CBC_decrypt_end_%=: \n\t"
"pop {%[ks]-r4}\n\t"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv)
: [L_AES_ARM32_td_ecb] "g" (L_AES_ARM32_td_ecb), [L_AES_ARM32_td4] "g" (L_AES_ARM32_td4)
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c)
:
: "memory", "r12", "lr", "r8", "r9", "r10", "r11"
);
(void)nr;
(void)iv;
@ -2014,13 +2035,14 @@ static const uint32_t L_GCM_gmult_len_r[] = {
void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned char* data, unsigned long len);
void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned char* data_p, unsigned long len_p)
{
register unsigned char* x asm ("r0") = x_p;
register const unsigned char** m asm ("r1") = m_p;
register const unsigned char* data asm ("r2") = data_p;
register unsigned long len asm ("r3") = len_p;
register unsigned char* x asm ("r0") = (unsigned char*)x_p;
register const unsigned char** m asm ("r1") = (const unsigned char**)m_p;
register const unsigned char* data asm ("r2") = (const unsigned char*)data_p;
register unsigned long len asm ("r3") = (unsigned long)len_p;
register uint32_t* L_GCM_gmult_len_r_c asm ("r4") = (uint32_t*)&L_GCM_gmult_len_r;
__asm__ __volatile__ (
"ldr lr, %[L_GCM_gmult_len_r]\n\t"
"mov lr, %[L_GCM_gmult_len_r]\n\t"
"\n"
"L_GCM_gmult_len_start_block_%=: \n\t"
"push {r3}\n\t"
@ -2568,9 +2590,9 @@ void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned
"subs %[len], %[len], #16\n\t"
"add %[data], %[data], #16\n\t"
"bne L_GCM_gmult_len_start_block_%=\n\t"
: [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len)
: [L_AES_ARM32_td_ecb] "g" (L_AES_ARM32_td_ecb), [L_AES_ARM32_td4] "g" (L_AES_ARM32_td4), [L_GCM_gmult_len_r] "g" (L_GCM_gmult_len_r)
: "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
: [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c)
:
: "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
);
}
@ -2578,18 +2600,19 @@ static const uint32_t* L_AES_ARM32_te_gcm = L_AES_ARM32_te_data;
void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr);
void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p)
{
register const unsigned char* in asm ("r0") = in_p;
register unsigned char* out asm ("r1") = out_p;
register unsigned long len asm ("r2") = len_p;
register const unsigned char* ks asm ("r3") = ks_p;
register int nr asm ("r4") = nr_p;
register unsigned char* ctr asm ("r5") = ctr_p;
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
register unsigned long len asm ("r2") = (unsigned long)len_p;
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
register int nr asm ("r4") = (int)nr_p;
register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p;
register uint32_t* L_AES_ARM32_te_gcm_c asm ("r6") = (uint32_t*)L_AES_ARM32_te_gcm;
__asm__ __volatile__ (
"mov r12, r4\n\t"
"mov r8, r5\n\t"
"mov lr, %[in]\n\t"
"ldr r0, %[L_AES_ARM32_te_gcm]\n\t"
"mov r0, %[L_AES_ARM32_te_gcm]\n\t"
"ldm r8, {r4, r5, r6, r7}\n\t"
"rev r4, r4\n\t"
"rev r5, r5\n\t"
@ -2729,9 +2752,9 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"rev r6, r6\n\t"
"rev r7, r7\n\t"
"stm r8, {r4, r5, r6, r7}\n\t"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr)
: [L_AES_ARM32_te_gcm] "g" (L_AES_ARM32_te_gcm)
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_ARM32_te_gcm] "+r" (L_AES_ARM32_te_gcm_c)
:
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
(void)nr;
(void)ctr;

View File

@ -28,6 +28,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
@ -36,6 +37,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM_INLINE
/* Based on work by: Emil Lenngren
* https://github.com/pornin/X25519-Cortex-M4
@ -50,7 +52,6 @@
void fe_init()
{
__asm__ __volatile__ (
"\n\t"
:
@ -62,7 +63,6 @@ void fe_init()
void fe_add_sub_op(void);
void fe_add_sub_op()
{
__asm__ __volatile__ (
/* Add-Sub */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
@ -269,7 +269,6 @@ void fe_add_sub_op()
void fe_sub_op(void);
void fe_sub_op()
{
__asm__ __volatile__ (
/* Sub */
"ldm r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
@ -307,9 +306,9 @@ void fe_sub_op()
void fe_sub(fe r_p, const fe a_p, const fe b_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register const fe b asm ("r2") = b_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
register const sword32* b asm ("r2") = (const sword32*)b_p;
__asm__ __volatile__ (
"bl fe_sub_op\n\t"
@ -322,7 +321,6 @@ void fe_sub(fe r_p, const fe a_p, const fe b_p)
void fe_add_op(void);
void fe_add_op()
{
__asm__ __volatile__ (
/* Add */
"ldm r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
@ -361,9 +359,9 @@ void fe_add_op()
void fe_add(fe r_p, const fe a_p, const fe b_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register const fe b asm ("r2") = b_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
register const sword32* b asm ("r2") = (const sword32*)b_p;
__asm__ __volatile__ (
"bl fe_add_op\n\t"
@ -376,8 +374,8 @@ void fe_add(fe r_p, const fe a_p, const fe b_p)
#ifdef HAVE_ED25519
void fe_frombytes(fe out_p, const unsigned char* in_p)
{
register fe out asm ("r0") = out_p;
register const unsigned char* in asm ("r1") = in_p;
register sword32* out asm ("r0") = (sword32*)out_p;
register const unsigned char* in asm ("r1") = (const unsigned char*)in_p;
__asm__ __volatile__ (
"ldm %[in], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
@ -391,8 +389,8 @@ void fe_frombytes(fe out_p, const unsigned char* in_p)
void fe_tobytes(unsigned char* out_p, const fe n_p)
{
register unsigned char* out asm ("r0") = out_p;
register const fe n asm ("r1") = n_p;
register unsigned char* out asm ("r0") = (unsigned char*)out_p;
register const sword32* n asm ("r1") = (const sword32*)n_p;
__asm__ __volatile__ (
"ldm %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
@ -424,7 +422,7 @@ void fe_tobytes(unsigned char* out_p, const fe n_p)
void fe_1(fe n_p)
{
register fe n asm ("r0") = n_p;
register sword32* n asm ("r0") = (sword32*)n_p;
__asm__ __volatile__ (
/* Set one */
@ -463,7 +461,7 @@ void fe_1(fe n_p)
void fe_0(fe n_p)
{
register fe n asm ("r0") = n_p;
register sword32* n asm ("r0") = (sword32*)n_p;
__asm__ __volatile__ (
/* Set zero */
@ -501,8 +499,8 @@ void fe_0(fe n_p)
void fe_copy(fe r_p, const fe a_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
/* Copy */
@ -562,8 +560,8 @@ void fe_copy(fe r_p, const fe a_p)
void fe_neg(fe r_p, const fe a_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"mvn lr, #0\n\t"
@ -589,7 +587,7 @@ void fe_neg(fe r_p, const fe a_p)
int fe_isnonzero(const fe a_p)
{
register const fe a asm ("r0") = a_p;
register const sword32* a asm ("r0") = (const sword32*)a_p;
__asm__ __volatile__ (
"ldm %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
@ -628,7 +626,7 @@ int fe_isnonzero(const fe a_p)
int fe_isnegative(const fe a_p)
{
register const fe a asm ("r0") = a_p;
register const sword32* a asm ("r0") = (const sword32*)a_p;
__asm__ __volatile__ (
"ldm %[a]!, {r2, r3, r4, r5}\n\t"
@ -655,9 +653,9 @@ int fe_isnegative(const fe a_p)
#ifndef WC_NO_CACHE_RESISTANT
void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
{
register fe* r asm ("r0") = r_p;
register fe* base asm ("r1") = base_p;
register signed char b asm ("r2") = b_p;
register fe* r asm ("r0") = (fe*)r_p;
register fe* base asm ("r1") = (fe*)base_p;
register signed char b asm ("r2") = (signed char)b_p;
__asm__ __volatile__ (
"sxtb %[b], %[b]\n\t"
@ -2364,9 +2362,9 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
#else
void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
{
register fe* r asm ("r0") = r_p;
register fe* base asm ("r1") = base_p;
register signed char b asm ("r2") = b_p;
register fe* r asm ("r0") = (fe*)r_p;
register fe* base asm ("r1") = (fe*)base_p;
register signed char b asm ("r2") = (signed char)b_p;
__asm__ __volatile__ (
"sxtb %[b], %[b]\n\t"
@ -2472,7 +2470,6 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
void fe_mul_op(void);
void fe_mul_op()
{
__asm__ __volatile__ (
"sub sp, sp, #44\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
@ -2610,9 +2607,9 @@ void fe_mul_op()
void fe_mul(fe r_p, const fe a_p, const fe b_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register const fe b asm ("r2") = b_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
register const sword32* b asm ("r2") = (const sword32*)b_p;
__asm__ __volatile__ (
"bl fe_mul_op\n\t"
@ -2625,7 +2622,6 @@ void fe_mul(fe r_p, const fe a_p, const fe b_p)
void fe_sq_op(void);
void fe_sq_op()
{
__asm__ __volatile__ (
"sub sp, sp, #32\n\t"
"str r0, [sp, #28]\n\t"
@ -2749,8 +2745,8 @@ void fe_sq_op()
void fe_sq(fe r_p, const fe a_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"bl fe_sq_op\n\t"
@ -2762,8 +2758,8 @@ void fe_sq(fe r_p, const fe a_p)
void fe_mul121666(fe r_p, fe a_p)
{
register fe r asm ("r0") = r_p;
register fe a asm ("r1") = a_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register sword32* a asm ("r1") = (sword32*)a_p;
__asm__ __volatile__ (
/* Multiply by 121666 */
@ -2808,9 +2804,9 @@ void fe_mul121666(fe r_p, fe a_p)
#ifndef WC_NO_CACHE_RESISTANT
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
{
register byte* r asm ("r0") = r_p;
register const byte* n asm ("r1") = n_p;
register const byte* a asm ("r2") = a_p;
register byte* r asm ("r0") = (byte*)r_p;
register const byte* n asm ("r1") = (const byte*)n_p;
register const byte* a asm ("r2") = (const byte*)a_p;
__asm__ __volatile__ (
"sub sp, sp, #0xbc\n\t"
@ -3423,9 +3419,9 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
#else
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
{
register byte* r asm ("r0") = r_p;
register const byte* n asm ("r1") = n_p;
register const byte* a asm ("r2") = a_p;
register byte* r asm ("r0") = (byte*)r_p;
register const byte* n asm ("r1") = (const byte*)n_p;
register const byte* a asm ("r2") = (const byte*)a_p;
__asm__ __volatile__ (
"sub sp, sp, #0xc0\n\t"
@ -3802,8 +3798,8 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
#ifdef HAVE_ED25519
void fe_invert(fe r_p, const fe a_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"sub sp, sp, #0x88\n\t"
@ -3972,8 +3968,8 @@ void fe_invert(fe r_p, const fe a_p)
void fe_sq2(fe r_p, const fe a_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"sub sp, sp, #36\n\t"
@ -4138,8 +4134,8 @@ void fe_sq2(fe r_p, const fe a_p)
void fe_pow22523(fe r_p, const fe a_p)
{
register fe r asm ("r0") = r_p;
register const fe a asm ("r1") = a_p;
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"sub sp, sp, #0x68\n\t"
@ -4308,8 +4304,8 @@ void fe_pow22523(fe r_p, const fe a_p)
void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p)
{
register ge_p2 * r asm ("r0") = r_p;
register const ge_p1p1 * p asm ("r1") = p_p;
register ge_p2 * r asm ("r0") = (ge_p2 *)r_p;
register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
__asm__ __volatile__ (
"sub sp, sp, #8\n\t"
@ -4338,8 +4334,8 @@ void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p)
void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p)
{
register ge_p3 * r asm ("r0") = r_p;
register const ge_p1p1 * p asm ("r1") = p_p;
register ge_p3 * r asm ("r0") = (ge_p3 *)r_p;
register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
__asm__ __volatile__ (
"sub sp, sp, #8\n\t"
@ -4373,8 +4369,8 @@ void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p)
void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p)
{
register ge_p1p1 * r asm ("r0") = r_p;
register const ge_p2 * p asm ("r1") = p_p;
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p2 * p asm ("r1") = (const ge_p2 *)p_p;
__asm__ __volatile__ (
"sub sp, sp, #8\n\t"
@ -4420,9 +4416,9 @@ void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p)
void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
{
register ge_p1p1 * r asm ("r0") = r_p;
register const ge_p3 * p asm ("r1") = p_p;
register const ge_precomp * q asm ("r2") = q_p;
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
__asm__ __volatile__ (
"sub sp, sp, #12\n\t"
@ -4502,9 +4498,9 @@ void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
{
register ge_p1p1 * r asm ("r0") = r_p;
register const ge_p3 * p asm ("r1") = p_p;
register const ge_precomp * q asm ("r2") = q_p;
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
__asm__ __volatile__ (
"sub sp, sp, #12\n\t"
@ -4585,9 +4581,9 @@ void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
{
register ge_p1p1 * r asm ("r0") = r_p;
register const ge_p3 * p asm ("r1") = p_p;
register const ge_cached* q asm ("r2") = q_p;
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
__asm__ __volatile__ (
"sub sp, sp, #44\n\t"
@ -4668,9 +4664,9 @@ void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
{
register ge_p1p1 * r asm ("r0") = r_p;
register const ge_p3 * p asm ("r1") = p_p;
register const ge_cached* q asm ("r2") = q_p;
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
__asm__ __volatile__ (
"sub sp, sp, #44\n\t"
@ -4751,7 +4747,7 @@ void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
void sc_reduce(byte* s_p)
{
register byte* s asm ("r0") = s_p;
register byte* s asm ("r0") = (byte*)s_p;
__asm__ __volatile__ (
"sub sp, sp, #52\n\t"
@ -5163,10 +5159,10 @@ void sc_reduce(byte* s_p)
void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
{
register byte* s asm ("r0") = s_p;
register const byte* a asm ("r1") = a_p;
register const byte* b asm ("r2") = b_p;
register const byte* c asm ("r3") = c_p;
register byte* s asm ("r0") = (byte*)s_p;
register const byte* a asm ("r1") = (const byte*)a_p;
register const byte* b asm ("r2") = (const byte*)b_p;
register const byte* c asm ("r3") = (const byte*)c_p;
__asm__ __volatile__ (
"sub sp, sp, #0x50\n\t"

View File

@ -28,6 +28,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
@ -36,6 +37,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM_INLINE
#ifndef NO_SHA256
#include <wolfssl/wolfcrypt/sha256.h>
@ -63,13 +65,13 @@ static const uint32_t L_SHA256_transform_len_k[] = {
void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len);
void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
{
register wc_Sha256* sha256 asm ("r0") = sha256_p;
register const byte* data asm ("r1") = data_p;
register word32 len asm ("r2") = len_p;
register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p;
register const byte* data asm ("r1") = (const byte*)data_p;
register word32 len asm ("r2") = (word32)len_p;
register uint32_t* L_SHA256_transform_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_len_k;
__asm__ __volatile__ (
"sub sp, sp, #0xc0\n\t"
"mov r3, %[L_SHA256_transform_len_k]\n\t"
/* Copy digest to add in at end */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[sha256]]\n\t"
@ -1587,9 +1589,9 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"add %[data], %[data], #0x40\n\t"
"bne L_SHA256_transform_len_begin_%=\n\t"
"add sp, sp, #0xc0\n\t"
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len)
: [L_SHA256_transform_len_k] "g" (L_SHA256_transform_len_k)
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
}
@ -1619,9 +1621,10 @@ static const uint32_t L_SHA256_transform_neon_len_k[] = {
void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len);
void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
{
register wc_Sha256* sha256 asm ("r0") = sha256_p;
register const byte* data asm ("r1") = data_p;
register word32 len asm ("r2") = len_p;
register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p;
register const byte* data asm ("r1") = (const byte*)data_p;
register word32 len asm ("r2") = (word32)len_p;
register uint32_t* L_SHA256_transform_neon_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_neon_len_k;
__asm__ __volatile__ (
"sub sp, sp, #24\n\t"
@ -2648,9 +2651,9 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"str r10, [sp, #8]\n\t"
"bne L_SHA256_transform_neon_len_begin_%=\n\t"
"add sp, sp, #24\n\t"
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len)
: [L_SHA256_transform_neon_len_k] "g" (L_SHA256_transform_neon_len_k)
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11"
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_neon_len_k] "+r" (L_SHA256_transform_neon_len_k_c)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11"
);
}

View File

@ -7679,6 +7679,7 @@ L_SHA512_transform_neon_len_k:
.type Transform_Sha512_Len, %function
Transform_Sha512_Len:
vpush {d8-d15}
adr r3, L_SHA512_transform_neon_len_k
# Load digest into working vars
vldm.64 r0, {d0-d7}
# Start of loop processing a block
@ -7715,7 +7716,6 @@ L_SHA512_transform_neon_len_begin:
vrev64.8 d30, d30
vrev64.8 d31, d31
#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */
adr r3, L_SHA512_transform_neon_len_k
mov r12, #4
# Start of 16 rounds
L_SHA512_transform_neon_len_start:
@ -9164,6 +9164,7 @@ L_SHA512_transform_neon_len_start:
#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */
vstm.64 r0, {d0-d7}
subs r2, r2, #0x80
sub r3, r3, #0x280
bne L_SHA512_transform_neon_len_begin
vpop {d8-d15}
bx lr

View File

@ -28,6 +28,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
@ -36,64 +37,65 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_SHA512
#include <wolfssl/wolfcrypt/sha512.h>
#ifdef WOLFSSL_ARMASM_NO_NEON
static const uint64_t L_SHA512_transform_len_k[] = {
0x428a2f98d728ae22, 0x7137449123ef65cd,
0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
0x3956c25bf348b538, 0x59f111f1b605d019,
0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
0xd807aa98a3030242, 0x12835b0145706fbe,
0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
0x9bdc06a725c71235, 0xc19bf174cf692694,
0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
0x983e5152ee66dfab, 0xa831c66d2db43210,
0xb00327c898fb213f, 0xbf597fc7beef0ee4,
0xc6e00bf33da88fc2, 0xd5a79147930aa725,
0x06ca6351e003826f, 0x142929670a0e6e70,
0x27b70a8546d22ffc, 0x2e1b21385c26c926,
0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
0x650a73548baf63de, 0x766a0abb3c77b2a8,
0x81c2c92e47edaee6, 0x92722c851482353b,
0xa2bfe8a14cf10364, 0xa81a664bbc423001,
0xc24b8b70d0f89791, 0xc76c51a30654be30,
0xd192e819d6ef5218, 0xd69906245565a910,
0xf40e35855771202a, 0x106aa07032bbd1b8,
0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
0x748f82ee5defb2fc, 0x78a5636f43172f60,
0x84c87814a1f0ab72, 0x8cc702081a6439ec,
0x90befffa23631e28, 0xa4506cebde82bde9,
0xbef9a3f7b2c67915, 0xc67178f2e372532b,
0xca273eceea26619c, 0xd186b8c721c0c207,
0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
0x06f067aa72176fba, 0x0a637dc5a2c898a6,
0x113f9804bef90dae, 0x1b710b35131c471b,
0x28db77f523047d84, 0x32caab7b40c72493,
0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
0x428a2f98d728ae22UL, 0x7137449123ef65cdUL,
0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL,
0x3956c25bf348b538UL, 0x59f111f1b605d019UL,
0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL,
0xd807aa98a3030242UL, 0x12835b0145706fbeUL,
0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL,
0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL,
0x9bdc06a725c71235UL, 0xc19bf174cf692694UL,
0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL,
0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL,
0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL,
0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL,
0x983e5152ee66dfabUL, 0xa831c66d2db43210UL,
0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL,
0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL,
0x06ca6351e003826fUL, 0x142929670a0e6e70UL,
0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL,
0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL,
0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL,
0x81c2c92e47edaee6UL, 0x92722c851482353bUL,
0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL,
0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL,
0xd192e819d6ef5218UL, 0xd69906245565a910UL,
0xf40e35855771202aUL, 0x106aa07032bbd1b8UL,
0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL,
0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL,
0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL,
0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL,
0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL,
0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL,
0x90befffa23631e28UL, 0xa4506cebde82bde9UL,
0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL,
0xca273eceea26619cUL, 0xd186b8c721c0c207UL,
0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL,
0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL,
0x113f9804bef90daeUL, 0x1b710b35131c471bUL,
0x28db77f523047d84UL, 0x32caab7b40c72493UL,
0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL,
0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL,
0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL,
};
void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len);
void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
{
register wc_Sha512* sha512 asm ("r0") = sha512_p;
register const byte* data asm ("r1") = data_p;
register word32 len asm ("r2") = len_p;
register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p;
register const byte* data asm ("r1") = (const byte*)data_p;
register word32 len asm ("r2") = (word32)len_p;
register uint64_t* L_SHA512_transform_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_len_k;
__asm__ __volatile__ (
"sub sp, sp, #0xc0\n\t"
"mov r3, %[L_SHA512_transform_len_k]\n\t"
/* Copy digest to add in at end */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[sha512]]\n\t"
@ -7392,9 +7394,9 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
"bne L_SHA512_transform_len_begin_%=\n\t"
"eor r0, r0, r0\n\t"
"add sp, sp, #0xc0\n\t"
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
: [L_SHA512_transform_len_k] "g" (L_SHA512_transform_len_k)
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
}
@ -7403,54 +7405,55 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
#ifndef WOLFSSL_ARMASM_NO_NEON
static const uint64_t L_SHA512_transform_neon_len_k[] = {
0x428a2f98d728ae22, 0x7137449123ef65cd,
0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
0x3956c25bf348b538, 0x59f111f1b605d019,
0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
0xd807aa98a3030242, 0x12835b0145706fbe,
0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
0x9bdc06a725c71235, 0xc19bf174cf692694,
0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
0x983e5152ee66dfab, 0xa831c66d2db43210,
0xb00327c898fb213f, 0xbf597fc7beef0ee4,
0xc6e00bf33da88fc2, 0xd5a79147930aa725,
0x06ca6351e003826f, 0x142929670a0e6e70,
0x27b70a8546d22ffc, 0x2e1b21385c26c926,
0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
0x650a73548baf63de, 0x766a0abb3c77b2a8,
0x81c2c92e47edaee6, 0x92722c851482353b,
0xa2bfe8a14cf10364, 0xa81a664bbc423001,
0xc24b8b70d0f89791, 0xc76c51a30654be30,
0xd192e819d6ef5218, 0xd69906245565a910,
0xf40e35855771202a, 0x106aa07032bbd1b8,
0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
0x748f82ee5defb2fc, 0x78a5636f43172f60,
0x84c87814a1f0ab72, 0x8cc702081a6439ec,
0x90befffa23631e28, 0xa4506cebde82bde9,
0xbef9a3f7b2c67915, 0xc67178f2e372532b,
0xca273eceea26619c, 0xd186b8c721c0c207,
0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
0x06f067aa72176fba, 0x0a637dc5a2c898a6,
0x113f9804bef90dae, 0x1b710b35131c471b,
0x28db77f523047d84, 0x32caab7b40c72493,
0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
0x428a2f98d728ae22UL, 0x7137449123ef65cdUL,
0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL,
0x3956c25bf348b538UL, 0x59f111f1b605d019UL,
0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL,
0xd807aa98a3030242UL, 0x12835b0145706fbeUL,
0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL,
0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL,
0x9bdc06a725c71235UL, 0xc19bf174cf692694UL,
0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL,
0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL,
0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL,
0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL,
0x983e5152ee66dfabUL, 0xa831c66d2db43210UL,
0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL,
0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL,
0x06ca6351e003826fUL, 0x142929670a0e6e70UL,
0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL,
0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL,
0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL,
0x81c2c92e47edaee6UL, 0x92722c851482353bUL,
0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL,
0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL,
0xd192e819d6ef5218UL, 0xd69906245565a910UL,
0xf40e35855771202aUL, 0x106aa07032bbd1b8UL,
0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL,
0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL,
0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL,
0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL,
0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL,
0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL,
0x90befffa23631e28UL, 0xa4506cebde82bde9UL,
0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL,
0xca273eceea26619cUL, 0xd186b8c721c0c207UL,
0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL,
0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL,
0x113f9804bef90daeUL, 0x1b710b35131c471bUL,
0x28db77f523047d84UL, 0x32caab7b40c72493UL,
0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL,
0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL,
0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL,
};
void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len);
void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
{
register wc_Sha512* sha512 asm ("r0") = sha512_p;
register const byte* data asm ("r1") = data_p;
register word32 len asm ("r2") = len_p;
register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p;
register const byte* data asm ("r1") = (const byte*)data_p;
register word32 len asm ("r2") = (word32)len_p;
register uint64_t* L_SHA512_transform_neon_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_neon_len_k;
__asm__ __volatile__ (
/* Load digest into working vars */
@ -7490,7 +7493,6 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
"vrev64.8 d30, d30\n\t"
"vrev64.8 d31, d31\n\t"
#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */
"mov r3, %[L_SHA512_transform_neon_len_k]\n\t"
"mov r12, #4\n\t"
/* Start of 16 rounds */
"\n"
@ -8940,10 +8942,11 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */
"vstm.64 %[sha512], {d0-d7}\n\t"
"subs %[len], %[len], #0x80\n\t"
"sub r3, r3, #0x280\n\t"
"bne L_SHA512_transform_neon_len_begin_%=\n\t"
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
: [L_SHA512_transform_neon_len_k] "g" (L_SHA512_transform_neon_len_k)
: "memory", "r3", "r12", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_neon_len_k] "+r" (L_SHA512_transform_neon_len_k_c)
:
: "memory", "r12", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}

View File

@ -31,6 +31,7 @@
#endif
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#if !defined(NO_AES) && defined(WOLFSSL_ARMASM)
@ -41,7 +42,6 @@
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
#include <wolfssl/wolfcrypt/aes.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#include <wolfssl/wolfcrypt/logging.h>
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>
@ -5467,7 +5467,6 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
#else /* !WOLFSSL_ARMASM_NO_HW_CRYPTO */
#include <wolfssl/wolfcrypt/logging.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#include <wolfssl/wolfcrypt/aes.h>
#ifdef NO_INLINE
#include <wolfssl/wolfcrypt/misc.h>

View File

@ -23,6 +23,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
/* Generated using (from wolfssl):
* cd ../scripts
@ -6312,9 +6313,9 @@ void ge_msub(ge_p1p1* r, const ge_p3* p, const ge_precomp* q)
__asm__ __volatile__ (
"stp x29, x30, [sp, #-48]!\n\t"
"add x29, sp, #0\n\t"
"str %w[r], [x29, #16]\n\t"
"str %w[p], [x29, #24]\n\t"
"str %w[q], [x29, #32]\n\t"
"str %x[r], [x29, #16]\n\t"
"str %x[p], [x29, #24]\n\t"
"str %x[q], [x29, #32]\n\t"
"mov x3, x1\n\t"
"add x2, x1, #32\n\t"
"add x1, x0, #32\n\t"
@ -6808,9 +6809,9 @@ void ge_add(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
__asm__ __volatile__ (
"stp x29, x30, [sp, #-48]!\n\t"
"add x29, sp, #0\n\t"
"str %w[r], [x29, #16]\n\t"
"str %w[p], [x29, #24]\n\t"
"str %w[q], [x29, #32]\n\t"
"str %x[r], [x29, #16]\n\t"
"str %x[p], [x29, #24]\n\t"
"str %x[q], [x29, #32]\n\t"
"mov x3, x1\n\t"
"add x2, x1, #32\n\t"
"add x1, x0, #32\n\t"
@ -7430,9 +7431,9 @@ void ge_sub(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
__asm__ __volatile__ (
"stp x29, x30, [sp, #-48]!\n\t"
"add x29, sp, #0\n\t"
"str %w[r], [x29, #16]\n\t"
"str %w[p], [x29, #24]\n\t"
"str %w[q], [x29, #32]\n\t"
"str %x[r], [x29, #16]\n\t"
"str %x[p], [x29, #24]\n\t"
"str %x[q], [x29, #32]\n\t"
"mov x3, x1\n\t"
"add x2, x1, #32\n\t"
"add x1, x0, #32\n\t"

View File

@ -23,6 +23,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
/* Generated using (from wolfssl):
* cd ../scripts

View File

@ -23,6 +23,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
/* Generated using (from wolfssl):
* cd ../scripts

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -280,13 +280,14 @@ fe_1:
# Set one
MOV r2, #0x1
MOV r3, #0x0
STRD r2, r3, [r0]
STM r0!, {r2, r3}
MOV r2, #0x0
STRD r2, r3, [r0, #8]
STRD r2, r3, [r0, #16]
STRD r2, r3, [r0, #24]
STM r0!, {r2, r3}
STM r0!, {r2, r3}
STM r0!, {r2, r3}
SUB r0, r0, #0x20
BX lr
# Cycle Count = 19
# Cycle Count = 20
.size fe_1,.-fe_1
.text
.align 4
@ -296,12 +297,13 @@ fe_0:
# Set zero
MOV r2, #0x0
MOV r3, #0x0
STRD r2, r3, [r0]
STRD r2, r3, [r0, #8]
STRD r2, r3, [r0, #16]
STRD r2, r3, [r0, #24]
STM r0!, {r2, r3}
STM r0!, {r2, r3}
STM r0!, {r2, r3}
STM r0!, {r2, r3}
SUB r0, r0, #0x20
BX lr
# Cycle Count = 18
# Cycle Count = 19
.size fe_0,.-fe_0
.text
.align 4
@ -1751,6 +1753,7 @@ fe_sq:
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
# Cycle Count = 24
.size fe_sq,.-fe_sq
#ifdef HAVE_CURVE25519
.text
.align 4
.globl fe_mul121666
@ -1803,26 +1806,31 @@ curve25519:
# Set one
MOV r10, #0x1
MOV r11, #0x0
STRD r10, r11, [r0]
STM r0!, {r10, r11}
MOV r10, #0x0
STRD r10, r11, [r0, #8]
STRD r10, r11, [r0, #16]
STRD r10, r11, [r0, #24]
STM r0!, {r10, r11}
STM r0!, {r10, r11}
STM r0!, {r10, r11}
SUB r0, r0, #0x20
MOV r3, sp
# Set zero
MOV r10, #0x0
MOV r11, #0x0
STRD r10, r11, [sp]
STRD r10, r11, [sp, #8]
STRD r10, r11, [sp, #16]
STRD r10, r11, [sp, #24]
STM r3!, {r10, r11}
STM r3!, {r10, r11}
STM r3!, {r10, r11}
STM r3!, {r10, r11}
SUB r3, r3, #0x20
ADD r3, sp, #0x20
# Set one
MOV r10, #0x1
MOV r11, #0x0
STRD r10, r11, [sp, #32]
STM r3!, {r10, r11}
MOV r10, #0x0
STRD r10, r11, [sp, #40]
STRD r10, r11, [sp, #48]
STRD r10, r11, [sp, #56]
STM r3!, {r10, r11}
STM r3!, {r10, r11}
STM r3!, {r10, r11}
SUB r3, r3, #0x20
ADD r3, sp, #0x40
# Copy
LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}
@ -1845,8 +1853,10 @@ L_curve25519_bits:
LDR r0, [sp, #160]
# Conditional Swap
RSB r1, r1, #0x0
LDRD r4, r5, [r0]
LDRD r6, r7, [sp, #64]
MOV r3, r0
ADD r12, sp, #0x40
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1855,10 +1865,10 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [r0]
STRD r6, r7, [sp, #64]
LDRD r4, r5, [r0, #8]
LDRD r6, r7, [sp, #72]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1867,10 +1877,10 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [r0, #8]
STRD r6, r7, [sp, #72]
LDRD r4, r5, [r0, #16]
LDRD r6, r7, [sp, #80]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1879,10 +1889,10 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [r0, #16]
STRD r6, r7, [sp, #80]
LDRD r4, r5, [r0, #24]
LDRD r6, r7, [sp, #88]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1891,13 +1901,15 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [r0, #24]
STRD r6, r7, [sp, #88]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDR r1, [sp, #172]
# Conditional Swap
RSB r1, r1, #0x0
LDRD r4, r5, [sp]
LDRD r6, r7, [sp, #32]
MOV r3, sp
ADD r12, sp, #0x20
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1906,10 +1918,10 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [sp]
STRD r6, r7, [sp, #32]
LDRD r4, r5, [sp, #8]
LDRD r6, r7, [sp, #40]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1918,10 +1930,10 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [sp, #8]
STRD r6, r7, [sp, #40]
LDRD r4, r5, [sp, #16]
LDRD r6, r7, [sp, #48]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1930,10 +1942,10 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [sp, #16]
STRD r6, r7, [sp, #48]
LDRD r4, r5, [sp, #24]
LDRD r6, r7, [sp, #56]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDM r3, {r4, r5}
LDM r12, {r6, r7}
EOR r8, r4, r6
EOR r9, r5, r7
AND r8, r8, r1
@ -1942,8 +1954,8 @@ L_curve25519_bits:
EOR r5, r5, r9
EOR r6, r6, r8
EOR r7, r7, r9
STRD r4, r5, [sp, #24]
STRD r6, r7, [sp, #56]
STM r3!, {r4, r5}
STM r12!, {r6, r7}
LDR r1, [sp, #184]
STR r1, [sp, #172]
MOV r3, sp
@ -2165,7 +2177,7 @@ L_curve25519_inv_8:
MOV r0, #0x0
ADD sp, sp, #0xbc
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
# Cycle Count = 684
# Cycle Count = 693
.size curve25519,.-curve25519
#else
.text
@ -2188,26 +2200,31 @@ curve25519:
# Set one
MOV r10, #0x1
MOV r11, #0x0
STRD r10, r11, [r0]
STM r0!, {r10, r11}
MOV r10, #0x0
STRD r10, r11, [r0, #8]
STRD r10, r11, [r0, #16]
STRD r10, r11, [r0, #24]
STM r0!, {r10, r11}
STM r0!, {r10, r11}
STM r0!, {r10, r11}
SUB r0, r0, #0x20
MOV r3, sp
# Set zero
MOV r10, #0x0
MOV r11, #0x0
STRD r10, r11, [sp]
STRD r10, r11, [sp, #8]
STRD r10, r11, [sp, #16]
STRD r10, r11, [sp, #24]
STM r3!, {r10, r11}
STM r3!, {r10, r11}
STM r3!, {r10, r11}
STM r3!, {r10, r11}
SUB r3, r3, #0x20
ADD r3, sp, #0x20
# Set one
MOV r10, #0x1
MOV r11, #0x0
STRD r10, r11, [sp, #32]
STM r3!, {r10, r11}
MOV r10, #0x0
STRD r10, r11, [sp, #40]
STRD r10, r11, [sp, #48]
STRD r10, r11, [sp, #56]
STM r3!, {r10, r11}
STM r3!, {r10, r11}
STM r3!, {r10, r11}
SUB r3, r3, #0x20
ADD r3, sp, #0x40
# Copy
LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}
@ -2470,9 +2487,10 @@ L_curve25519_inv_8:
MOV r0, #0x0
ADD sp, sp, #0xc0
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
# Cycle Count = 595
# Cycle Count = 600
.size curve25519,.-curve25519
#endif /* WC_NO_CACHE_RESISTANT */
#endif /* HAVE_CURVE25519 */
#ifdef HAVE_ED25519
.text
.align 4

View File

@ -48,7 +48,7 @@
#if defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
#if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL)
void fe_init(void)
void fe_init()
{
__asm__ __volatile__ (
"\n\t"
@ -59,7 +59,7 @@ void fe_init(void)
}
void fe_add_sub_op(void);
void fe_add_sub_op(void)
void fe_add_sub_op()
{
__asm__ __volatile__ (
/* Add-Sub */
@ -156,7 +156,7 @@ void fe_add_sub_op(void)
}
void fe_sub_op(void);
void fe_sub_op(void)
void fe_sub_op()
{
__asm__ __volatile__ (
/* Sub */
@ -190,18 +190,22 @@ void fe_sub_op(void)
);
}
void fe_sub(fe r, const fe a, const fe b)
void fe_sub(fe r_p, const fe a_p, const fe b_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
register const sword32* b asm ("r2") = (const sword32*)b_p;
__asm__ __volatile__ (
"BL fe_sub_op\n\t"
: [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void fe_add_op(void);
void fe_add_op(void)
void fe_add_op()
{
__asm__ __volatile__ (
/* Add */
@ -235,31 +239,41 @@ void fe_add_op(void)
);
}
void fe_add(fe r, const fe a, const fe b)
void fe_add(fe r_p, const fe a_p, const fe b_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
register const sword32* b asm ("r2") = (const sword32*)b_p;
__asm__ __volatile__ (
"BL fe_add_op\n\t"
: [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
#ifdef HAVE_ED25519
void fe_frombytes(fe out, const unsigned char* in)
void fe_frombytes(fe out_p, const unsigned char* in_p)
{
register sword32* out asm ("r0") = (sword32*)out_p;
register const unsigned char* in asm ("r1") = (const unsigned char*)in_p;
__asm__ __volatile__ (
"LDM %[in], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
"BFC r9, #31, #1\n\t"
"STM %[out], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
: [out] "+l" (out), [in] "+l" (in)
: [out] "+r" (out), [in] "+r" (in)
:
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
);
}
void fe_tobytes(unsigned char* out, const fe n)
void fe_tobytes(unsigned char* out_p, const fe n_p)
{
register unsigned char* out asm ("r0") = (unsigned char*)out_p;
register const sword32* n asm ("r1") = (const sword32*)n_p;
__asm__ __volatile__ (
"LDM %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
"ADDS r10, r2, #0x13\n\t"
@ -282,47 +296,56 @@ void fe_tobytes(unsigned char* out, const fe n)
"ADC r9, r9, #0x0\n\t"
"BFC r9, #31, #1\n\t"
"STM %[out], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
: [out] "+l" (out), [n] "+l" (n)
: [out] "+r" (out), [n] "+r" (n)
:
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
);
}
void fe_1(fe n)
void fe_1(fe n_p)
{
register sword32* n asm ("r0") = (sword32*)n_p;
__asm__ __volatile__ (
/* Set one */
"MOV r2, #0x1\n\t"
"MOV r3, #0x0\n\t"
"STRD r2, r3, [%[n]]\n\t"
"STM %[n]!, {r2, r3}\n\t"
"MOV r2, #0x0\n\t"
"STRD r2, r3, [%[n], #8]\n\t"
"STRD r2, r3, [%[n], #16]\n\t"
"STRD r2, r3, [%[n], #24]\n\t"
: [n] "+l" (n)
"STM %[n]!, {r2, r3}\n\t"
"STM %[n]!, {r2, r3}\n\t"
"STM %[n]!, {r2, r3}\n\t"
"SUB %[n], %[n], #0x20\n\t"
: [n] "+r" (n)
:
: "memory", "r2", "r3"
);
}
void fe_0(fe n)
void fe_0(fe n_p)
{
register sword32* n asm ("r0") = (sword32*)n_p;
__asm__ __volatile__ (
/* Set zero */
"MOV r2, #0x0\n\t"
"MOV r3, #0x0\n\t"
"STRD r2, r3, [%[n]]\n\t"
"STRD r2, r3, [%[n], #8]\n\t"
"STRD r2, r3, [%[n], #16]\n\t"
"STRD r2, r3, [%[n], #24]\n\t"
: [n] "+l" (n)
"STM %[n]!, {r2, r3}\n\t"
"STM %[n]!, {r2, r3}\n\t"
"STM %[n]!, {r2, r3}\n\t"
"STM %[n]!, {r2, r3}\n\t"
"SUB %[n], %[n], #0x20\n\t"
: [n] "+r" (n)
:
: "memory", "r2", "r3"
);
}
void fe_copy(fe r, const fe a)
void fe_copy(fe r_p, const fe a_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
/* Copy */
"LDRD r2, r3, [%[a]]\n\t"
@ -333,14 +356,17 @@ void fe_copy(fe r, const fe a)
"LDRD r4, r5, [%[a], #24]\n\t"
"STRD r2, r3, [%[r], #16]\n\t"
"STRD r4, r5, [%[r], #24]\n\t"
: [r] "+l" (r), [a] "+l" (a)
: [r] "+r" (r), [a] "+r" (a)
:
: "memory", "r2", "r3", "r4", "r5"
);
}
void fe_neg(fe r, const fe a)
void fe_neg(fe r_p, const fe a_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"MVN r7, #0x0\n\t"
"MVN r6, #0x12\n\t"
@ -357,14 +383,16 @@ void fe_neg(fe r, const fe a)
"SBCS r4, r7, r4\n\t"
"SBC r5, r6, r5\n\t"
"STM %[r]!, {r2, r3, r4, r5}\n\t"
: [r] "+l" (r), [a] "+l" (a)
: [r] "+r" (r), [a] "+r" (a)
:
: "memory", "r2", "r3", "r4", "r5", "r6", "r7"
);
}
int fe_isnonzero(const fe a)
int fe_isnonzero(const fe a_p)
{
register const sword32* a asm ("r0") = (const sword32*)a_p;
__asm__ __volatile__ (
"LDM %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
"ADDS r1, r2, #0x13\n\t"
@ -393,15 +421,17 @@ int fe_isnonzero(const fe a)
"ORR r4, r4, r6\n\t"
"ORR r2, r2, r8\n\t"
"ORR %[a], r2, r4\n\t"
: [a] "+l" (a)
: [a] "+r" (a)
:
: "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
);
return (uint32_t)(size_t)a;
}
int fe_isnegative(const fe a)
int fe_isnegative(const fe a_p)
{
register const sword32* a asm ("r0") = (const sword32*)a_p;
__asm__ __volatile__ (
"LDM %[a]!, {r2, r3, r4, r5}\n\t"
"ADDS r1, r2, #0x13\n\t"
@ -417,7 +447,7 @@ int fe_isnegative(const fe a)
"AND %[a], r2, #0x1\n\t"
"LSR r1, r1, #31\n\t"
"EOR %[a], %[a], r1\n\t"
: [a] "+l" (a)
: [a] "+r" (a)
:
: "memory", "r1", "r2", "r3", "r4", "r5"
);
@ -425,8 +455,12 @@ int fe_isnegative(const fe a)
}
#ifndef WC_NO_CACHE_RESISTANT
void fe_cmov_table(fe* r, fe* base, signed char b)
void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
{
register fe* r asm ("r0") = (fe*)r_p;
register fe* base asm ("r1") = (fe*)base_p;
register signed char b asm ("r2") = (signed char)b_p;
__asm__ __volatile__ (
"SXTB %[b], %[b]\n\t"
"SBFX r3, %[b], #7, #1\n\t"
@ -1391,15 +1425,19 @@ void fe_cmov_table(fe* r, fe* base, signed char b)
"STRD r4, r5, [%[r], #24]\n\t"
"STRD r6, r7, [%[r], #56]\n\t"
"STRD r8, r9, [%[r], #88]\n\t"
: [r] "+l" (r), [base] "+l" (base), [b] "+l" (b)
: [r] "+r" (r), [base] "+r" (base), [b] "+r" (b)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r3", "r10", "r11", "r12", "lr"
);
}
#else
void fe_cmov_table(fe* r, fe* base, signed char b)
void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
{
register fe* r asm ("r0") = (fe*)r_p;
register fe* base asm ("r1") = (fe*)base_p;
register signed char b asm ("r2") = (signed char)b_p;
__asm__ __volatile__ (
"SXTB %[b], %[b]\n\t"
"SBFX r3, %[b], #7, #1\n\t"
@ -1493,7 +1531,7 @@ void fe_cmov_table(fe* r, fe* base, signed char b)
"AND r7, r7, lr\n\t"
"STM %[r]!, {r4, r5, r6, r7}\n\t"
"SUB %[base], %[base], %[b]\n\t"
: [r] "+l" (r), [base] "+l" (base), [b] "+l" (b)
: [r] "+r" (r), [base] "+r" (base), [b] "+r" (b)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
@ -1502,7 +1540,7 @@ void fe_cmov_table(fe* r, fe* base, signed char b)
#endif /* WC_NO_CACHE_RESISTANT */
#endif /* HAVE_ED25519 */
void fe_mul_op(void);
void fe_mul_op(void)
void fe_mul_op()
{
__asm__ __volatile__ (
"SUB sp, sp, #0x2c\n\t"
@ -1634,18 +1672,22 @@ void fe_mul_op(void)
);
}
void fe_mul(fe r, const fe a, const fe b)
void fe_mul(fe r_p, const fe a_p, const fe b_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
register const sword32* b asm ("r2") = (const sword32*)b_p;
__asm__ __volatile__ (
"BL fe_mul_op\n\t"
: [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void fe_sq_op(void);
void fe_sq_op(void)
void fe_sq_op()
{
__asm__ __volatile__ (
"SUB sp, sp, #0x20\n\t"
@ -1763,18 +1805,25 @@ void fe_sq_op(void)
);
}
void fe_sq(fe r, const fe a)
void fe_sq(fe r_p, const fe a_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"BL fe_sq_op\n\t"
: [r] "+l" (r), [a] "+l" (a)
: [r] "+r" (r), [a] "+r" (a)
:
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void fe_mul121666(fe r, fe a)
#ifdef HAVE_CURVE25519
void fe_mul121666(fe r_p, fe a_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register sword32* a asm ("r1") = (sword32*)a_p;
__asm__ __volatile__ (
/* Multiply by 121666 */
"LDM %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
@ -1803,15 +1852,19 @@ void fe_mul121666(fe r, fe a)
"ADCS r8, r8, #0x0\n\t"
"ADC r9, r9, #0x0\n\t"
"STM %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
: [r] "+l" (r), [a] "+l" (a)
: [r] "+r" (r), [a] "+r" (a)
:
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
}
#ifndef WC_NO_CACHE_RESISTANT
int curve25519(byte* r, const byte* n, const byte* a)
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
{
register byte* r asm ("r0") = (byte*)r_p;
register const byte* n asm ("r1") = (const byte*)n_p;
register const byte* a asm ("r2") = (const byte*)a_p;
__asm__ __volatile__ (
"SUB sp, sp, #0xbc\n\t"
"STR %[r], [sp, #160]\n\t"
@ -1822,26 +1875,31 @@ int curve25519(byte* r, const byte* n, const byte* a)
/* Set one */
"MOV r10, #0x1\n\t"
"MOV r11, #0x0\n\t"
"STRD r10, r11, [%[r]]\n\t"
"STM %[r]!, {r10, r11}\n\t"
"MOV r10, #0x0\n\t"
"STRD r10, r11, [%[r], #8]\n\t"
"STRD r10, r11, [%[r], #16]\n\t"
"STRD r10, r11, [%[r], #24]\n\t"
"STM %[r]!, {r10, r11}\n\t"
"STM %[r]!, {r10, r11}\n\t"
"STM %[r]!, {r10, r11}\n\t"
"SUB %[r], %[r], #0x20\n\t"
"MOV r3, sp\n\t"
/* Set zero */
"MOV r10, #0x0\n\t"
"MOV r11, #0x0\n\t"
"STRD r10, r11, [sp]\n\t"
"STRD r10, r11, [sp, #8]\n\t"
"STRD r10, r11, [sp, #16]\n\t"
"STRD r10, r11, [sp, #24]\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"SUB r3, r3, #0x20\n\t"
"ADD r3, sp, #0x20\n\t"
/* Set one */
"MOV r10, #0x1\n\t"
"MOV r11, #0x0\n\t"
"STRD r10, r11, [sp, #32]\n\t"
"STM r3!, {r10, r11}\n\t"
"MOV r10, #0x0\n\t"
"STRD r10, r11, [sp, #40]\n\t"
"STRD r10, r11, [sp, #48]\n\t"
"STRD r10, r11, [sp, #56]\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"SUB r3, r3, #0x20\n\t"
"ADD r3, sp, #0x40\n\t"
/* Copy */
"LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
@ -1866,8 +1924,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
"LDR %[r], [sp, #160]\n\t"
/* Conditional Swap */
"RSB %[n], %[n], #0x0\n\t"
"LDRD r4, r5, [%[r]]\n\t"
"LDRD r6, r7, [sp, #64]\n\t"
"MOV r3, r0\n\t"
"ADD r12, sp, #0x40\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1876,10 +1936,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [%[r]]\n\t"
"STRD r6, r7, [sp, #64]\n\t"
"LDRD r4, r5, [%[r], #8]\n\t"
"LDRD r6, r7, [sp, #72]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1888,10 +1948,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [%[r], #8]\n\t"
"STRD r6, r7, [sp, #72]\n\t"
"LDRD r4, r5, [%[r], #16]\n\t"
"LDRD r6, r7, [sp, #80]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1900,10 +1960,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [%[r], #16]\n\t"
"STRD r6, r7, [sp, #80]\n\t"
"LDRD r4, r5, [%[r], #24]\n\t"
"LDRD r6, r7, [sp, #88]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1912,13 +1972,15 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [%[r], #24]\n\t"
"STRD r6, r7, [sp, #88]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDR %[n], [sp, #172]\n\t"
/* Conditional Swap */
"RSB %[n], %[n], #0x0\n\t"
"LDRD r4, r5, [sp]\n\t"
"LDRD r6, r7, [sp, #32]\n\t"
"MOV r3, sp\n\t"
"ADD r12, sp, #0x20\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1927,10 +1989,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [sp]\n\t"
"STRD r6, r7, [sp, #32]\n\t"
"LDRD r4, r5, [sp, #8]\n\t"
"LDRD r6, r7, [sp, #40]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1939,10 +2001,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [sp, #8]\n\t"
"STRD r6, r7, [sp, #40]\n\t"
"LDRD r4, r5, [sp, #16]\n\t"
"LDRD r6, r7, [sp, #48]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1951,10 +2013,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [sp, #16]\n\t"
"STRD r6, r7, [sp, #48]\n\t"
"LDRD r4, r5, [sp, #24]\n\t"
"LDRD r6, r7, [sp, #56]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDM r3, {r4, r5}\n\t"
"LDM r12, {r6, r7}\n\t"
"EOR r8, r4, r6\n\t"
"EOR r9, r5, r7\n\t"
"AND r8, r8, %[n]\n\t"
@ -1963,8 +2025,8 @@ int curve25519(byte* r, const byte* n, const byte* a)
"EOR r5, r5, r9\n\t"
"EOR r6, r6, r8\n\t"
"EOR r7, r7, r9\n\t"
"STRD r4, r5, [sp, #24]\n\t"
"STRD r6, r7, [sp, #56]\n\t"
"STM r3!, {r4, r5}\n\t"
"STM r12!, {r6, r7}\n\t"
"LDR %[n], [sp, #184]\n\t"
"STR %[n], [sp, #172]\n\t"
"MOV r3, sp\n\t"
@ -2193,7 +2255,7 @@ int curve25519(byte* r, const byte* n, const byte* a)
"BL fe_mul_op\n\t"
"MOV r0, #0x0\n\t"
"ADD sp, sp, #0xbc\n\t"
: [r] "+l" (r), [n] "+l" (n), [a] "+l" (a)
: [r] "+r" (r), [n] "+r" (n), [a] "+r" (a)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr"
);
@ -2201,8 +2263,12 @@ int curve25519(byte* r, const byte* n, const byte* a)
}
#else
int curve25519(byte* r, const byte* n, const byte* a)
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
{
register byte* r asm ("r0") = (byte*)r_p;
register const byte* n asm ("r1") = (const byte*)n_p;
register const byte* a asm ("r2") = (const byte*)a_p;
__asm__ __volatile__ (
"SUB sp, sp, #0xc0\n\t"
"STR %[r], [sp, #176]\n\t"
@ -2218,26 +2284,31 @@ int curve25519(byte* r, const byte* n, const byte* a)
/* Set one */
"MOV r10, #0x1\n\t"
"MOV r11, #0x0\n\t"
"STRD r10, r11, [%[r]]\n\t"
"STM %[r]!, {r10, r11}\n\t"
"MOV r10, #0x0\n\t"
"STRD r10, r11, [%[r], #8]\n\t"
"STRD r10, r11, [%[r], #16]\n\t"
"STRD r10, r11, [%[r], #24]\n\t"
"STM %[r]!, {r10, r11}\n\t"
"STM %[r]!, {r10, r11}\n\t"
"STM %[r]!, {r10, r11}\n\t"
"SUB %[r], %[r], #0x20\n\t"
"MOV r3, sp\n\t"
/* Set zero */
"MOV r10, #0x0\n\t"
"MOV r11, #0x0\n\t"
"STRD r10, r11, [sp]\n\t"
"STRD r10, r11, [sp, #8]\n\t"
"STRD r10, r11, [sp, #16]\n\t"
"STRD r10, r11, [sp, #24]\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"SUB r3, r3, #0x20\n\t"
"ADD r3, sp, #0x20\n\t"
/* Set one */
"MOV r10, #0x1\n\t"
"MOV r11, #0x0\n\t"
"STRD r10, r11, [sp, #32]\n\t"
"STM r3!, {r10, r11}\n\t"
"MOV r10, #0x0\n\t"
"STRD r10, r11, [sp, #40]\n\t"
"STRD r10, r11, [sp, #48]\n\t"
"STRD r10, r11, [sp, #56]\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"STM r3!, {r10, r11}\n\t"
"SUB r3, r3, #0x20\n\t"
"ADD r3, sp, #0x40\n\t"
/* Copy */
"LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
@ -2508,7 +2579,7 @@ int curve25519(byte* r, const byte* n, const byte* a)
"STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
"MOV r0, #0x0\n\t"
"ADD sp, sp, #0xc0\n\t"
: [r] "+l" (r), [n] "+l" (n), [a] "+l" (a)
: [r] "+r" (r), [n] "+r" (n), [a] "+r" (a)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr"
);
@ -2516,9 +2587,13 @@ int curve25519(byte* r, const byte* n, const byte* a)
}
#endif /* WC_NO_CACHE_RESISTANT */
#endif /* HAVE_CURVE25519 */
#ifdef HAVE_ED25519
void fe_invert(fe r, const fe a)
void fe_invert(fe r_p, const fe a_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x88\n\t"
/* Invert */
@ -2678,14 +2753,17 @@ void fe_invert(fe r, const fe a)
"LDR %[a], [sp, #132]\n\t"
"LDR %[r], [sp, #128]\n\t"
"ADD sp, sp, #0x88\n\t"
: [r] "+l" (r), [a] "+l" (a)
: [r] "+r" (r), [a] "+r" (a)
:
: "memory", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
);
}
void fe_sq2(fe r, const fe a)
void fe_sq2(fe r_p, const fe a_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x24\n\t"
"STRD r0, r1, [sp, #28]\n\t"
@ -2831,14 +2909,17 @@ void fe_sq2(fe r, const fe a)
"STM r12, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
"MOV r0, r12\n\t"
"MOV r1, lr\n\t"
: [r] "+l" (r), [a] "+l" (a)
: [r] "+r" (r), [a] "+r" (a)
:
: "memory", "lr"
);
}
void fe_pow22523(fe r, const fe a)
void fe_pow22523(fe r_p, const fe a_p)
{
register sword32* r asm ("r0") = (sword32*)r_p;
register const sword32* a asm ("r1") = (const sword32*)a_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x68\n\t"
/* pow22523 */
@ -2998,14 +3079,17 @@ void fe_pow22523(fe r, const fe a)
"LDR %[a], [sp, #100]\n\t"
"LDR %[r], [sp, #96]\n\t"
"ADD sp, sp, #0x68\n\t"
: [r] "+l" (r), [a] "+l" (a)
: [r] "+r" (r), [a] "+r" (a)
:
: "memory", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
);
}
void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p)
void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p)
{
register ge_p2 * r asm ("r0") = (ge_p2 *)r_p;
register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x8\n\t"
"STR %[r], [sp]\n\t"
@ -3025,14 +3109,17 @@ void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p)
"ADD r0, r0, #0x40\n\t"
"BL fe_mul_op\n\t"
"ADD sp, sp, #0x8\n\t"
: [r] "+l" (r), [p] "+l" (p)
: [r] "+r" (r), [p] "+r" (p)
:
: "memory", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
}
void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p)
void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p)
{
register ge_p3 * r asm ("r0") = (ge_p3 *)r_p;
register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x8\n\t"
"STR %[r], [sp]\n\t"
@ -3057,14 +3144,17 @@ void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p)
"ADD r0, r0, #0x60\n\t"
"BL fe_mul_op\n\t"
"ADD sp, sp, #0x8\n\t"
: [r] "+l" (r), [p] "+l" (p)
: [r] "+r" (r), [p] "+r" (p)
:
: "memory", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
}
void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p)
void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p)
{
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p2 * p asm ("r1") = (const ge_p2 *)p_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x8\n\t"
"STR %[r], [sp]\n\t"
@ -3101,14 +3191,18 @@ void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p)
"MOV r1, r0\n\t"
"BL fe_sub_op\n\t"
"ADD sp, sp, #0x8\n\t"
: [r] "+l" (r), [p] "+l" (p)
: [r] "+r" (r), [p] "+r" (p)
:
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
{
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
__asm__ __volatile__ (
"SUB sp, sp, #0xc\n\t"
"STR %[r], [sp]\n\t"
@ -3179,14 +3273,18 @@ void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
"ADD r1, r0, #0x20\n\t"
"BL fe_add_sub_op\n\t"
"ADD sp, sp, #0xc\n\t"
: [r] "+l" (r), [p] "+l" (p), [q] "+l" (q)
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
{
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
__asm__ __volatile__ (
"SUB sp, sp, #0xc\n\t"
"STR %[r], [sp]\n\t"
@ -3258,14 +3356,18 @@ void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
"ADD r0, r0, #0x20\n\t"
"BL fe_add_sub_op\n\t"
"ADD sp, sp, #0xc\n\t"
: [r] "+l" (r), [p] "+l" (p), [q] "+l" (q)
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
{
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x2c\n\t"
"STR %[r], [sp]\n\t"
@ -3337,14 +3439,18 @@ void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
"ADD r0, r0, #0x20\n\t"
"BL fe_add_sub_op\n\t"
"ADD sp, sp, #0x2c\n\t"
: [r] "+l" (r), [p] "+l" (p), [q] "+l" (q)
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
{
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x2c\n\t"
"STR %[r], [sp]\n\t"
@ -3416,14 +3522,16 @@ void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
"ADD r0, r0, #0x40\n\t"
"BL fe_add_sub_op\n\t"
"ADD sp, sp, #0x2c\n\t"
: [r] "+l" (r), [p] "+l" (p), [q] "+l" (q)
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void sc_reduce(byte* s)
void sc_reduce(byte* s_p)
{
register byte* s asm ("r0") = (byte*)s_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x34\n\t"
/* Load bits 252-511 */
@ -3694,14 +3802,19 @@ void sc_reduce(byte* s)
/* Store result */
"STM %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
"ADD sp, sp, #0x34\n\t"
: [s] "+l" (s)
: [s] "+r" (s)
:
: "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);
}
void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
{
register byte* s asm ("r0") = (byte*)s_p;
register const byte* a asm ("r1") = (const byte*)a_p;
register const byte* b asm ("r2") = (const byte*)b_p;
register const byte* c asm ("r3") = (const byte*)c_p;
__asm__ __volatile__ (
"SUB sp, sp, #0x50\n\t"
"ADD lr, sp, #0x44\n\t"
@ -4096,7 +4209,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
/* Store result */
"STM %[s], {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t"
"ADD sp, sp, #0x50\n\t"
: [s] "+l" (s), [a] "+l" (a), [b] "+l" (b), [c] "+l" (c)
: [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
);

View File

@ -42,75 +42,32 @@
#ifdef WOLFSSL_ARMASM_NO_NEON
static const uint32_t L_SHA256_transform_len_k[] = {
0x428a2f98,
0x71374491,
0xb5c0fbcf,
0xe9b5dba5,
0x3956c25b,
0x59f111f1,
0x923f82a4,
0xab1c5ed5,
0xd807aa98,
0x12835b01,
0x243185be,
0x550c7dc3,
0x72be5d74,
0x80deb1fe,
0x9bdc06a7,
0xc19bf174,
0xe49b69c1,
0xefbe4786,
0xfc19dc6,
0x240ca1cc,
0x2de92c6f,
0x4a7484aa,
0x5cb0a9dc,
0x76f988da,
0x983e5152,
0xa831c66d,
0xb00327c8,
0xbf597fc7,
0xc6e00bf3,
0xd5a79147,
0x6ca6351,
0x14292967,
0x27b70a85,
0x2e1b2138,
0x4d2c6dfc,
0x53380d13,
0x650a7354,
0x766a0abb,
0x81c2c92e,
0x92722c85,
0xa2bfe8a1,
0xa81a664b,
0xc24b8b70,
0xc76c51a3,
0xd192e819,
0xd6990624,
0xf40e3585,
0x106aa070,
0x19a4c116,
0x1e376c08,
0x2748774c,
0x34b0bcb5,
0x391c0cb3,
0x4ed8aa4a,
0x5b9cca4f,
0x682e6ff3,
0x748f82ee,
0x78a5636f,
0x84c87814,
0x8cc70208,
0x90befffa,
0xa4506ceb,
0xbef9a3f7,
0xc67178f2,
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
};
void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len);
void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len)
void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
{
register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p;
register const byte* data asm ("r1") = (const byte*)data_p;
register word32 len asm ("r2") = (word32)len_p;
register uint32_t* L_SHA256_transform_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_len_k;
__asm__ __volatile__ (
"SUB sp, sp, #0xc0\n\t"
"MOV r3, %[L_SHA256_transform_len_k]\n\t"
@ -1463,9 +1420,9 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len)
"ADD %[data], %[data], #0x40\n\t"
"BNE L_SHA256_transform_len_begin_%=\n\t"
"ADD sp, sp, #0xc0\n\t"
: [sha256] "+l" (sha256), [data] "+l" (data), [len] "+l" (len)
: [L_SHA256_transform_len_k] "r" (L_SHA256_transform_len_k)
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
}

View File

@ -42,91 +42,56 @@
#ifdef WOLFSSL_ARMASM_NO_NEON
static const uint64_t L_SHA512_transform_len_k[] = {
0x428a2f98d728ae22UL,
0x7137449123ef65cdUL,
0xb5c0fbcfec4d3b2fUL,
0xe9b5dba58189dbbcUL,
0x3956c25bf348b538UL,
0x59f111f1b605d019UL,
0x923f82a4af194f9bUL,
0xab1c5ed5da6d8118UL,
0xd807aa98a3030242UL,
0x12835b0145706fbeUL,
0x243185be4ee4b28cUL,
0x550c7dc3d5ffb4e2UL,
0x72be5d74f27b896fUL,
0x80deb1fe3b1696b1UL,
0x9bdc06a725c71235UL,
0xc19bf174cf692694UL,
0xe49b69c19ef14ad2UL,
0xefbe4786384f25e3UL,
0xfc19dc68b8cd5b5UL,
0x240ca1cc77ac9c65UL,
0x2de92c6f592b0275UL,
0x4a7484aa6ea6e483UL,
0x5cb0a9dcbd41fbd4UL,
0x76f988da831153b5UL,
0x983e5152ee66dfabUL,
0xa831c66d2db43210UL,
0xb00327c898fb213fUL,
0xbf597fc7beef0ee4UL,
0xc6e00bf33da88fc2UL,
0xd5a79147930aa725UL,
0x6ca6351e003826fUL,
0x142929670a0e6e70UL,
0x27b70a8546d22ffcUL,
0x2e1b21385c26c926UL,
0x4d2c6dfc5ac42aedUL,
0x53380d139d95b3dfUL,
0x650a73548baf63deUL,
0x766a0abb3c77b2a8UL,
0x81c2c92e47edaee6UL,
0x92722c851482353bUL,
0xa2bfe8a14cf10364UL,
0xa81a664bbc423001UL,
0xc24b8b70d0f89791UL,
0xc76c51a30654be30UL,
0xd192e819d6ef5218UL,
0xd69906245565a910UL,
0xf40e35855771202aUL,
0x106aa07032bbd1b8UL,
0x19a4c116b8d2d0c8UL,
0x1e376c085141ab53UL,
0x2748774cdf8eeb99UL,
0x34b0bcb5e19b48a8UL,
0x391c0cb3c5c95a63UL,
0x4ed8aa4ae3418acbUL,
0x5b9cca4f7763e373UL,
0x682e6ff3d6b2b8a3UL,
0x748f82ee5defb2fcUL,
0x78a5636f43172f60UL,
0x84c87814a1f0ab72UL,
0x8cc702081a6439ecUL,
0x90befffa23631e28UL,
0xa4506cebde82bde9UL,
0xbef9a3f7b2c67915UL,
0xc67178f2e372532bUL,
0xca273eceea26619cUL,
0xd186b8c721c0c207UL,
0xeada7dd6cde0eb1eUL,
0xf57d4f7fee6ed178UL,
0x6f067aa72176fbaUL,
0xa637dc5a2c898a6UL,
0x113f9804bef90daeUL,
0x1b710b35131c471bUL,
0x28db77f523047d84UL,
0x32caab7b40c72493UL,
0x3c9ebe0a15c9bebcUL,
0x431d67c49c100d4cUL,
0x4cc5d4becb3e42b6UL,
0x597f299cfc657e2aUL,
0x5fcb6fab3ad6faecUL,
0x6c44198c4a475817UL,
0x428a2f98d728ae22UL, 0x7137449123ef65cdUL,
0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL,
0x3956c25bf348b538UL, 0x59f111f1b605d019UL,
0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL,
0xd807aa98a3030242UL, 0x12835b0145706fbeUL,
0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL,
0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL,
0x9bdc06a725c71235UL, 0xc19bf174cf692694UL,
0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL,
0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL,
0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL,
0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL,
0x983e5152ee66dfabUL, 0xa831c66d2db43210UL,
0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL,
0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL,
0x06ca6351e003826fUL, 0x142929670a0e6e70UL,
0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL,
0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL,
0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL,
0x81c2c92e47edaee6UL, 0x92722c851482353bUL,
0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL,
0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL,
0xd192e819d6ef5218UL, 0xd69906245565a910UL,
0xf40e35855771202aUL, 0x106aa07032bbd1b8UL,
0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL,
0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL,
0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL,
0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL,
0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL,
0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL,
0x90befffa23631e28UL, 0xa4506cebde82bde9UL,
0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL,
0xca273eceea26619cUL, 0xd186b8c721c0c207UL,
0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL,
0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL,
0x113f9804bef90daeUL, 0x1b710b35131c471bUL,
0x28db77f523047d84UL, 0x32caab7b40c72493UL,
0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL,
0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL,
0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL,
};
void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len);
void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
{
register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p;
register const byte* data asm ("r1") = (const byte*)data_p;
register word32 len asm ("r2") = (word32)len_p;
register uint64_t* L_SHA512_transform_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_len_k;
__asm__ __volatile__ (
"SUB sp, sp, #0xc0\n\t"
"MOV r3, %[L_SHA512_transform_len_k]\n\t"
@ -3578,9 +3543,9 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"BNE L_SHA512_transform_len_begin_%=\n\t"
"EOR r0, r0, r0\n\t"
"ADD sp, sp, #0xc0\n\t"
: [sha512] "+l" (sha512), [data] "+l" (data), [len] "+l" (len)
: [L_SHA512_transform_len_k] "r" (L_SHA512_transform_len_k)
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c)
:
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
}

View File

@ -56,7 +56,7 @@ typedef struct Gcm {
} Gcm;
WOLFSSL_LOCAL void GenerateM0(Gcm* gcm);
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON)
#ifdef WOLFSSL_ARMASM
WOLFSSL_LOCAL void GMULT(byte* X, byte* Y);
#endif
WOLFSSL_LOCAL void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c,

View File

@ -115,7 +115,7 @@ typedef struct {
void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p);
void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p);
void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p);
#define ge_p3_dbl(r, p) ge_p2_dbl((ge_p1p1 *)r, (ge_p2 *)p)
#define ge_p3_dbl(r, p) ge_p2_dbl((ge_p1p1 *)(r), (ge_p2 *)(p))
void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);