mirror of https://github.com/wolfSSL/wolfssl.git
AES ARM32 and Thumb2 ASM: fixup ARM32 and add Thumb2
Fix which functions and data are compiled in depending on defines. Better handing of constants. Also fix Aarch64 ed25519 inline assembly.pull/6731/head
parent
70c362f680
commit
0638ec234b
|
@ -2268,7 +2268,7 @@ AC_ARG_ENABLE([aescbc],
|
|||
if test "$ENABLED_AESCBC" = "no"
|
||||
then
|
||||
AM_CFLAGS="$AM_CFLAGS -DNO_AES_CBC"
|
||||
AM_CCASFLAGS="$AM_CCASFLAGS -DHAVE_AES_CBC"
|
||||
AM_CCASFLAGS="$AM_CCASFLAGS -DNO_AES_CBC"
|
||||
fi
|
||||
|
||||
# AES-CBC length checks (checks that input lengths are multiples of block size)
|
||||
|
|
|
@ -157,8 +157,8 @@ endif
|
|||
|
||||
if BUILD_AES
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes.c
|
||||
if BUILD_ARMASM_NEON
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-aes.c
|
||||
if BUILD_ARMASM_NEON
|
||||
if !BUILD_ARMASM_CRYPTO
|
||||
if BUILD_ARMASM_INLINE
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c
|
||||
|
@ -166,7 +166,15 @@ else
|
|||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm.S
|
||||
endif !BUILD_ARMASM_INLINE
|
||||
endif !BUILD_ARMASM_CRYPTO
|
||||
endif BUILD_ARMASM_NEON
|
||||
else
|
||||
if BUILD_ARMASM
|
||||
if BUILD_ARMASM_INLINE
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
|
||||
else
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S
|
||||
endif !BUILD_ARMASM_INLINE
|
||||
endif BUILD_ARMASM
|
||||
endif !BUILD_ARMASM_NEON
|
||||
endif BUILD_AES
|
||||
|
||||
if BUILD_AESNI
|
||||
|
@ -401,16 +409,26 @@ endif
|
|||
if !BUILD_FIPS_CURRENT
|
||||
if BUILD_AES
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes.c
|
||||
if BUILD_ARMASM_NEON
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-aes.c
|
||||
if BUILD_ARMASM_NEON
|
||||
if !BUILD_ARMASM_CRYPTO
|
||||
if BUILD_ARMASM_INLINE
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
|
||||
else
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm.S
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S
|
||||
endif !BUILD_ARMASM_INLINE
|
||||
endif !BUILD_ARMASM_CRYPTO
|
||||
endif BUILD_ARMASM_NEON
|
||||
else
|
||||
if BUILD_ARMASM
|
||||
if BUILD_ARMASM_INLINE
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
|
||||
else
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S
|
||||
endif !BUILD_ARMASM_INLINE
|
||||
endif BUILD_ARMASM
|
||||
endif !BUILD_ARMASM_NEON
|
||||
if BUILD_AFALG
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/af_alg/afalg_aes.c
|
||||
endif BUILD_AFALG
|
||||
|
|
|
@ -97,7 +97,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
|
|||
#include <wolfcrypt/src/misc.c>
|
||||
#endif
|
||||
|
||||
#if !defined(WOLFSSL_ARMASM) || defined(WOLFSSL_ARMASM_NO_NEON)
|
||||
#ifndef WOLFSSL_ARMASM
|
||||
|
||||
#ifdef WOLFSSL_IMX6_CAAM_BLOB
|
||||
/* case of possibly not using hardware acceleration for AES but using key
|
||||
|
@ -4573,7 +4573,7 @@ int wc_AesSetIV(Aes* aes, const byte* iv)
|
|||
#endif /* NEED_AES_CTR_SOFT */
|
||||
|
||||
#endif /* WOLFSSL_AES_COUNTER */
|
||||
#endif /* !WOLFSSL_ARMASM || WOLFSSL_ARMASM_NO_NEON */
|
||||
#endif /* !WOLFSSL_ARMASM */
|
||||
|
||||
|
||||
/*
|
||||
|
@ -4620,7 +4620,7 @@ static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz)
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON)
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
/* implementation is located in wolfcrypt/src/port/arm/armv8-aes.c */
|
||||
|
||||
#elif defined(WOLFSSL_AFALG)
|
||||
|
@ -8851,7 +8851,7 @@ int wc_AesCcmCheckTagSize(int sz)
|
|||
return 0;
|
||||
}
|
||||
|
||||
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON)
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
/* implementation located in wolfcrypt/src/port/arm/armv8-aes.c */
|
||||
|
||||
#elif defined(HAVE_COLDFIRE_SEC)
|
||||
|
|
|
@ -921,10 +921,14 @@ int ge_compress_key(byte* out, const byte* xIn, const byte* yIn, word32 keySz)
|
|||
{
|
||||
ge_p2 g;
|
||||
byte bArray[ED25519_KEY_SIZE];
|
||||
byte x[ED25519_KEY_SIZE];
|
||||
byte y[ED25519_KEY_SIZE];
|
||||
word32 i;
|
||||
|
||||
fe_frombytes(g.X, xIn);
|
||||
fe_frombytes(g.Y, yIn);
|
||||
XMEMCPY(x, xIn, ED25519_KEY_SIZE);
|
||||
XMEMCPY(y, yIn, ED25519_KEY_SIZE);
|
||||
fe_frombytes(g.X, x);
|
||||
fe_frombytes(g.Y, y);
|
||||
fe_1(g.Z);
|
||||
|
||||
ge_tobytes(bArray, &g);
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#if !defined(__aarch64__) && defined(__arm__)
|
||||
#ifndef WOLFSSL_ARMASM_INLINE
|
||||
#ifndef NO_AES
|
||||
#ifdef HAVE_AES_DECRYPT
|
||||
.text
|
||||
.type L_AES_ARM32_td_data, %object
|
||||
.size L_AES_ARM32_td_data, 1024
|
||||
|
@ -294,6 +295,8 @@ L_AES_ARM32_td_data:
|
|||
.word 0x70d532b6
|
||||
.word 0x74486c5c
|
||||
.word 0x42d0b857
|
||||
#endif /* HAVE_AES_DECRYPT */
|
||||
#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
|
||||
.text
|
||||
.type L_AES_ARM32_te_data, %object
|
||||
.size L_AES_ARM32_te_data, 1024
|
||||
|
@ -555,18 +558,23 @@ L_AES_ARM32_te_data:
|
|||
.word 0xfca85454
|
||||
.word 0xd66dbbbb
|
||||
.word 0x3a2c1616
|
||||
#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
|
||||
#ifdef HAVE_AES_DECRYPT
|
||||
.text
|
||||
.type L_AES_ARM32_td, %object
|
||||
.size L_AES_ARM32_td, 12
|
||||
.align 4
|
||||
L_AES_ARM32_td:
|
||||
.word L_AES_ARM32_td_data
|
||||
#endif /* HAVE_AES_DECRYPT */
|
||||
#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
|
||||
.text
|
||||
.type L_AES_ARM32_te, %object
|
||||
.size L_AES_ARM32_te, 12
|
||||
.align 4
|
||||
L_AES_ARM32_te:
|
||||
.word L_AES_ARM32_te_data
|
||||
#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
|
||||
#ifdef HAVE_AES_DECRYPT
|
||||
.text
|
||||
.align 4
|
||||
|
@ -574,8 +582,10 @@ L_AES_ARM32_te:
|
|||
.type AES_invert_key, %function
|
||||
AES_invert_key:
|
||||
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
ldr r12, L_AES_ARM32_te
|
||||
ldr lr, L_AES_ARM32_td
|
||||
adr r12, L_AES_ARM32_te
|
||||
ldr r12, [r12]
|
||||
adr lr, L_AES_ARM32_td
|
||||
ldr lr, [lr]
|
||||
add r10, r0, r1, lsl #4
|
||||
mov r11, r1
|
||||
L_AES_invert_key_loop:
|
||||
|
@ -681,7 +691,8 @@ L_AES_ARM32_rcon:
|
|||
.type AES_set_encrypt_key, %function
|
||||
AES_set_encrypt_key:
|
||||
push {r4, r5, r6, r7, r8, lr}
|
||||
ldr r8, L_AES_ARM32_te
|
||||
adr r8, L_AES_ARM32_te
|
||||
ldr r8, [r8]
|
||||
adr lr, L_AES_ARM32_rcon
|
||||
cmp r1, #0x80
|
||||
beq L_AES_set_encrypt_key_start_128
|
||||
|
@ -911,7 +922,6 @@ L_AES_set_encrypt_key_loop_128:
|
|||
L_AES_set_encrypt_key_end:
|
||||
pop {r4, r5, r6, r7, r8, pc}
|
||||
.size AES_set_encrypt_key,.-AES_set_encrypt_key
|
||||
#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
|
||||
.text
|
||||
.align 4
|
||||
.globl AES_encrypt_block
|
||||
|
@ -1123,12 +1133,14 @@ L_AES_encrypt_block_nr:
|
|||
eor r7, r7, r11
|
||||
pop {pc}
|
||||
.size AES_encrypt_block,.-AES_encrypt_block
|
||||
#if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
|
||||
.text
|
||||
.type L_AES_ARM32_te_ecb, %object
|
||||
.size L_AES_ARM32_te_ecb, 12
|
||||
.align 4
|
||||
L_AES_ARM32_te_ecb:
|
||||
.word L_AES_ARM32_te_data
|
||||
#endif /* HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
|
||||
#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
|
||||
.text
|
||||
.align 4
|
||||
|
@ -1137,7 +1149,8 @@ L_AES_ARM32_te_ecb:
|
|||
AES_ECB_encrypt:
|
||||
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
mov lr, r0
|
||||
ldr r0, L_AES_ARM32_te_ecb
|
||||
adr r0, L_AES_ARM32_te_ecb
|
||||
ldr r0, [r0]
|
||||
ldr r12, [sp, #36]
|
||||
push {r3}
|
||||
cmp r12, #10
|
||||
|
@ -1259,7 +1272,8 @@ AES_CBC_encrypt:
|
|||
ldr r8, [sp, #36]
|
||||
ldr r9, [sp, #40]
|
||||
mov lr, r0
|
||||
ldr r0, L_AES_ARM32_te_ecb
|
||||
adr r0, L_AES_ARM32_te_ecb
|
||||
ldr r0, [r0]
|
||||
ldm r9, {r4, r5, r6, r7}
|
||||
push {r3, r9}
|
||||
cmp r8, #10
|
||||
|
@ -1394,7 +1408,8 @@ AES_CTR_encrypt:
|
|||
ldr r12, [sp, #36]
|
||||
ldr r8, [sp, #40]
|
||||
mov lr, r0
|
||||
ldr r0, L_AES_ARM32_te_ecb
|
||||
adr r0, L_AES_ARM32_te_ecb
|
||||
ldr r0, [r0]
|
||||
ldm r8, {r4, r5, r6, r7}
|
||||
rev r4, r4
|
||||
rev r5, r5
|
||||
|
@ -1540,7 +1555,6 @@ L_AES_CTR_encrypt_end:
|
|||
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
.size AES_CTR_encrypt,.-AES_CTR_encrypt
|
||||
#endif /* WOLFSSL_AES_COUNTER */
|
||||
#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
|
||||
#ifdef HAVE_AES_DECRYPT
|
||||
#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC)
|
||||
.text
|
||||
|
@ -2030,7 +2044,8 @@ AES_ECB_decrypt:
|
|||
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
ldr r8, [sp, #36]
|
||||
mov lr, r0
|
||||
ldr r0, L_AES_ARM32_td_ecb
|
||||
adr r0, L_AES_ARM32_td_ecb
|
||||
ldr r0, [r0]
|
||||
adr r12, L_AES_ARM32_td4
|
||||
cmp r8, #10
|
||||
beq L_AES_ECB_decrypt_start_block_128
|
||||
|
@ -2147,7 +2162,8 @@ AES_CBC_decrypt:
|
|||
ldr r8, [sp, #36]
|
||||
ldr r4, [sp, #40]
|
||||
mov lr, r0
|
||||
ldr r0, L_AES_ARM32_td_ecb
|
||||
adr r0, L_AES_ARM32_td_ecb
|
||||
ldr r0, [r0]
|
||||
adr r12, L_AES_ARM32_td4
|
||||
push {r3, r4}
|
||||
cmp r8, #10
|
||||
|
@ -3118,7 +3134,8 @@ AES_GCM_encrypt:
|
|||
ldr r12, [sp, #36]
|
||||
ldr r8, [sp, #40]
|
||||
mov lr, r0
|
||||
ldr r0, L_AES_ARM32_te_gcm
|
||||
adr r0, L_AES_ARM32_te_gcm
|
||||
ldr r0, [r0]
|
||||
ldm r8, {r4, r5, r6, r7}
|
||||
rev r4, r4
|
||||
rev r5, r5
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__arm__)
|
||||
|
@ -36,10 +37,12 @@
|
|||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
#ifdef WOLFSSL_ARMASM_INLINE
|
||||
#ifndef NO_AES
|
||||
#include <wolfssl/wolfcrypt/aes.h>
|
||||
|
||||
#ifdef HAVE_AES_DECRYPT
|
||||
static const uint32_t L_AES_ARM32_td_data[] = {
|
||||
0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e,
|
||||
0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
|
||||
|
@ -107,6 +110,8 @@ static const uint32_t L_AES_ARM32_td_data[] = {
|
|||
0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
|
||||
};
|
||||
|
||||
#endif /* HAVE_AES_DECRYPT */
|
||||
#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
|
||||
static const uint32_t L_AES_ARM32_te_data[] = {
|
||||
0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b,
|
||||
0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
|
||||
|
@ -174,18 +179,25 @@ static const uint32_t L_AES_ARM32_te_data[] = {
|
|||
0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
|
||||
};
|
||||
|
||||
#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
|
||||
#ifdef HAVE_AES_DECRYPT
|
||||
static const uint32_t* L_AES_ARM32_td = L_AES_ARM32_td_data;
|
||||
#endif /* HAVE_AES_DECRYPT */
|
||||
#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
|
||||
static const uint32_t* L_AES_ARM32_te = L_AES_ARM32_te_data;
|
||||
#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
|
||||
#ifdef HAVE_AES_DECRYPT
|
||||
void AES_invert_key(unsigned char* ks, word32 rounds);
|
||||
void AES_invert_key(unsigned char* ks_p, word32 rounds_p)
|
||||
{
|
||||
register unsigned char* ks asm ("r0") = ks_p;
|
||||
register word32 rounds asm ("r1") = rounds_p;
|
||||
register unsigned char* ks asm ("r0") = (unsigned char*)ks_p;
|
||||
register word32 rounds asm ("r1") = (word32)rounds_p;
|
||||
register uint32_t* L_AES_ARM32_te_c asm ("r2") = (uint32_t*)L_AES_ARM32_te;
|
||||
register uint32_t* L_AES_ARM32_td_c asm ("r3") = (uint32_t*)L_AES_ARM32_td;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"ldr r12, %[L_AES_ARM32_te]\n\t"
|
||||
"ldr lr, %[L_AES_ARM32_td]\n\t"
|
||||
"mov r12, %[L_AES_ARM32_te]\n\t"
|
||||
"mov lr, %[L_AES_ARM32_td]\n\t"
|
||||
"add r10, %[ks], %[rounds], lsl #4\n\t"
|
||||
"mov r11, %[rounds]\n\t"
|
||||
"\n"
|
||||
|
@ -269,9 +281,9 @@ void AES_invert_key(unsigned char* ks_p, word32 rounds_p)
|
|||
"str r8, [%[ks]], #4\n\t"
|
||||
"subs r11, r11, #1\n\t"
|
||||
"bne L_AES_invert_key_mix_loop_%=\n\t"
|
||||
: [ks] "+r" (ks), [rounds] "+r" (rounds)
|
||||
: [L_AES_ARM32_te] "g" (L_AES_ARM32_te), [L_AES_ARM32_td] "g" (L_AES_ARM32_td)
|
||||
: "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
|
||||
: [ks] "+r" (ks), [rounds] "+r" (rounds), [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), [L_AES_ARM32_td] "+r" (L_AES_ARM32_td_c)
|
||||
:
|
||||
: "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -285,9 +297,11 @@ static const uint32_t L_AES_ARM32_rcon[] = {
|
|||
void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks);
|
||||
void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char* ks_p)
|
||||
{
|
||||
register const unsigned char* key asm ("r0") = key_p;
|
||||
register word32 len asm ("r1") = len_p;
|
||||
register unsigned char* ks asm ("r2") = ks_p;
|
||||
register const unsigned char* key asm ("r0") = (const unsigned char*)key_p;
|
||||
register word32 len asm ("r1") = (word32)len_p;
|
||||
register unsigned char* ks asm ("r2") = (unsigned char*)ks_p;
|
||||
register uint32_t* L_AES_ARM32_te_c asm ("r3") = (uint32_t*)L_AES_ARM32_te;
|
||||
register uint32_t* L_AES_ARM32_rcon_c asm ("r4") = (uint32_t*)&L_AES_ARM32_rcon;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"mov r8, %[L_AES_ARM32_te]\n\t"
|
||||
|
@ -524,20 +538,19 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char
|
|||
"bne L_AES_set_encrypt_key_loop_128_%=\n\t"
|
||||
"\n"
|
||||
"L_AES_set_encrypt_key_end_%=: \n\t"
|
||||
: [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks)
|
||||
: [L_AES_ARM32_te] "g" (L_AES_ARM32_te), [L_AES_ARM32_rcon] "g" (L_AES_ARM32_rcon)
|
||||
: "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8"
|
||||
: [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), [L_AES_ARM32_rcon] "+r" (L_AES_ARM32_rcon_c)
|
||||
:
|
||||
: "memory", "r12", "lr", "r5", "r6", "r7", "r8"
|
||||
);
|
||||
}
|
||||
|
||||
#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
|
||||
void AES_encrypt_block(const uint32_t* te, int nr, int len, const uint32_t* ks);
|
||||
void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, const uint32_t* ks_p)
|
||||
{
|
||||
register const uint32_t* te asm ("r0") = te_p;
|
||||
register int nr asm ("r1") = nr_p;
|
||||
register int len asm ("r2") = len_p;
|
||||
register const uint32_t* ks asm ("r3") = ks_p;
|
||||
register const uint32_t* te asm ("r0") = (const uint32_t*)te_p;
|
||||
register int nr asm ("r1") = (int)nr_p;
|
||||
register int len asm ("r2") = (int)len_p;
|
||||
register const uint32_t* ks asm ("r3") = (const uint32_t*)ks_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"\n"
|
||||
|
@ -750,20 +763,23 @@ void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, const uint32_t
|
|||
);
|
||||
}
|
||||
|
||||
#if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
|
||||
static const uint32_t* L_AES_ARM32_te_ecb = L_AES_ARM32_te_data;
|
||||
#endif /* HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
|
||||
#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
|
||||
void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr);
|
||||
void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p)
|
||||
{
|
||||
register const unsigned char* in asm ("r0") = in_p;
|
||||
register unsigned char* out asm ("r1") = out_p;
|
||||
register unsigned long len asm ("r2") = len_p;
|
||||
register const unsigned char* ks asm ("r3") = ks_p;
|
||||
register int nr asm ("r4") = nr_p;
|
||||
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
|
||||
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
|
||||
register unsigned long len asm ("r2") = (unsigned long)len_p;
|
||||
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
|
||||
register int nr asm ("r4") = (int)nr_p;
|
||||
register uint32_t* L_AES_ARM32_te_ecb_c asm ("r5") = (uint32_t*)L_AES_ARM32_te_ecb;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"mov lr, %[in]\n\t"
|
||||
"ldr r0, %[L_AES_ARM32_te_ecb]\n\t"
|
||||
"mov r0, %[L_AES_ARM32_te_ecb]\n\t"
|
||||
"mov r12, r4\n\t"
|
||||
"push {%[ks]}\n\t"
|
||||
"cmp r12, #10\n\t"
|
||||
|
@ -878,9 +894,9 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
|
|||
"\n"
|
||||
"L_AES_ECB_encrypt_end_%=: \n\t"
|
||||
"pop {%[ks]}\n\t"
|
||||
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr)
|
||||
: [L_AES_ARM32_te_ecb] "g" (L_AES_ARM32_te_ecb)
|
||||
: "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
|
||||
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c)
|
||||
:
|
||||
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
|
||||
);
|
||||
(void)nr;
|
||||
}
|
||||
|
@ -890,18 +906,19 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
|
|||
void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv);
|
||||
void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p)
|
||||
{
|
||||
register const unsigned char* in asm ("r0") = in_p;
|
||||
register unsigned char* out asm ("r1") = out_p;
|
||||
register unsigned long len asm ("r2") = len_p;
|
||||
register const unsigned char* ks asm ("r3") = ks_p;
|
||||
register int nr asm ("r4") = nr_p;
|
||||
register unsigned char* iv asm ("r5") = iv_p;
|
||||
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
|
||||
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
|
||||
register unsigned long len asm ("r2") = (unsigned long)len_p;
|
||||
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
|
||||
register int nr asm ("r4") = (int)nr_p;
|
||||
register unsigned char* iv asm ("r5") = (unsigned char*)iv_p;
|
||||
register uint32_t* L_AES_ARM32_te_ecb_c asm ("r6") = (uint32_t*)L_AES_ARM32_te_ecb;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"mov r8, r4\n\t"
|
||||
"mov r9, r5\n\t"
|
||||
"mov lr, %[in]\n\t"
|
||||
"ldr r0, %[L_AES_ARM32_te_ecb]\n\t"
|
||||
"mov r0, %[L_AES_ARM32_te_ecb]\n\t"
|
||||
"ldm r9, {r4, r5, r6, r7}\n\t"
|
||||
"push {%[ks], r9}\n\t"
|
||||
"cmp r8, #10\n\t"
|
||||
|
@ -1029,9 +1046,9 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
|
|||
"L_AES_CBC_encrypt_end_%=: \n\t"
|
||||
"pop {%[ks], r9}\n\t"
|
||||
"stm r9, {r4, r5, r6, r7}\n\t"
|
||||
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv)
|
||||
: [L_AES_ARM32_te_ecb] "g" (L_AES_ARM32_te_ecb)
|
||||
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
|
||||
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c)
|
||||
:
|
||||
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
|
||||
);
|
||||
(void)nr;
|
||||
(void)iv;
|
||||
|
@ -1042,18 +1059,19 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
|
|||
void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr);
|
||||
void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p)
|
||||
{
|
||||
register const unsigned char* in asm ("r0") = in_p;
|
||||
register unsigned char* out asm ("r1") = out_p;
|
||||
register unsigned long len asm ("r2") = len_p;
|
||||
register const unsigned char* ks asm ("r3") = ks_p;
|
||||
register int nr asm ("r4") = nr_p;
|
||||
register unsigned char* ctr asm ("r5") = ctr_p;
|
||||
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
|
||||
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
|
||||
register unsigned long len asm ("r2") = (unsigned long)len_p;
|
||||
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
|
||||
register int nr asm ("r4") = (int)nr_p;
|
||||
register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p;
|
||||
register uint32_t* L_AES_ARM32_te_ecb_c asm ("r6") = (uint32_t*)L_AES_ARM32_te_ecb;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"mov r12, r4\n\t"
|
||||
"mov r8, r5\n\t"
|
||||
"mov lr, %[in]\n\t"
|
||||
"ldr r0, %[L_AES_ARM32_te_ecb]\n\t"
|
||||
"mov r0, %[L_AES_ARM32_te_ecb]\n\t"
|
||||
"ldm r8, {r4, r5, r6, r7}\n\t"
|
||||
"rev r4, r4\n\t"
|
||||
"rev r5, r5\n\t"
|
||||
|
@ -1202,23 +1220,22 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
|
|||
"rev r6, r6\n\t"
|
||||
"rev r7, r7\n\t"
|
||||
"stm r8, {r4, r5, r6, r7}\n\t"
|
||||
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr)
|
||||
: [L_AES_ARM32_te_ecb] "g" (L_AES_ARM32_te_ecb)
|
||||
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
|
||||
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c)
|
||||
:
|
||||
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
|
||||
);
|
||||
(void)nr;
|
||||
(void)ctr;
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_AES_COUNTER */
|
||||
#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
|
||||
#ifdef HAVE_AES_DECRYPT
|
||||
#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC)
|
||||
void AES_decrypt_block(const uint32_t* td, int nr);
|
||||
void AES_decrypt_block(const uint32_t* td_p, int nr_p)
|
||||
{
|
||||
register const uint32_t* td asm ("r0") = td_p;
|
||||
register int nr asm ("r1") = nr_p;
|
||||
register const uint32_t* td asm ("r0") = (const uint32_t*)td_p;
|
||||
register int nr asm ("r1") = (int)nr_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"\n"
|
||||
|
@ -1471,17 +1488,19 @@ static const unsigned char L_AES_ARM32_td4[] = {
|
|||
void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr);
|
||||
void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p)
|
||||
{
|
||||
register const unsigned char* in asm ("r0") = in_p;
|
||||
register unsigned char* out asm ("r1") = out_p;
|
||||
register unsigned long len asm ("r2") = len_p;
|
||||
register const unsigned char* ks asm ("r3") = ks_p;
|
||||
register int nr asm ("r4") = nr_p;
|
||||
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
|
||||
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
|
||||
register unsigned long len asm ("r2") = (unsigned long)len_p;
|
||||
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
|
||||
register int nr asm ("r4") = (int)nr_p;
|
||||
register uint32_t* L_AES_ARM32_td_ecb_c asm ("r5") = (uint32_t*)L_AES_ARM32_td_ecb;
|
||||
register unsigned char* L_AES_ARM32_td4_c asm ("r6") = (unsigned char*)&L_AES_ARM32_td4;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"mov r8, r4\n\t"
|
||||
"mov lr, %[in]\n\t"
|
||||
"ldr r0, %[L_AES_ARM32_td_ecb]\n\t"
|
||||
"ldr r12, %[L_AES_ARM32_td4]\n\t"
|
||||
"mov r0, %[L_AES_ARM32_td_ecb]\n\t"
|
||||
"mov r12, %[L_AES_ARM32_td4]\n\t"
|
||||
"cmp r8, #10\n\t"
|
||||
"beq L_AES_ECB_decrypt_start_block_128_%=\n\t"
|
||||
"cmp r8, #12\n\t"
|
||||
|
@ -1590,9 +1609,9 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
|
|||
"bne L_AES_ECB_decrypt_loop_block_128_%=\n\t"
|
||||
"\n"
|
||||
"L_AES_ECB_decrypt_end_%=: \n\t"
|
||||
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr)
|
||||
: [L_AES_ARM32_td_ecb] "g" (L_AES_ARM32_td_ecb), [L_AES_ARM32_td4] "g" (L_AES_ARM32_td4)
|
||||
: "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
|
||||
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c)
|
||||
:
|
||||
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
|
||||
);
|
||||
(void)nr;
|
||||
}
|
||||
|
@ -1602,19 +1621,21 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
|
|||
void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv);
|
||||
void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p)
|
||||
{
|
||||
register const unsigned char* in asm ("r0") = in_p;
|
||||
register unsigned char* out asm ("r1") = out_p;
|
||||
register unsigned long len asm ("r2") = len_p;
|
||||
register const unsigned char* ks asm ("r3") = ks_p;
|
||||
register int nr asm ("r4") = nr_p;
|
||||
register unsigned char* iv asm ("r5") = iv_p;
|
||||
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
|
||||
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
|
||||
register unsigned long len asm ("r2") = (unsigned long)len_p;
|
||||
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
|
||||
register int nr asm ("r4") = (int)nr_p;
|
||||
register unsigned char* iv asm ("r5") = (unsigned char*)iv_p;
|
||||
register uint32_t* L_AES_ARM32_td_ecb_c asm ("r6") = (uint32_t*)L_AES_ARM32_td_ecb;
|
||||
register unsigned char* L_AES_ARM32_td4_c asm ("r7") = (unsigned char*)&L_AES_ARM32_td4;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"mov r8, r4\n\t"
|
||||
"mov r4, r5\n\t"
|
||||
"mov lr, %[in]\n\t"
|
||||
"ldr r0, %[L_AES_ARM32_td_ecb]\n\t"
|
||||
"ldr r12, %[L_AES_ARM32_td4]\n\t"
|
||||
"mov r0, %[L_AES_ARM32_td_ecb]\n\t"
|
||||
"mov r12, %[L_AES_ARM32_td4]\n\t"
|
||||
"push {%[ks]-r4}\n\t"
|
||||
"cmp r8, #10\n\t"
|
||||
"beq L_AES_CBC_decrypt_loop_block_128_%=\n\t"
|
||||
|
@ -1992,9 +2013,9 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
|
|||
"\n"
|
||||
"L_AES_CBC_decrypt_end_%=: \n\t"
|
||||
"pop {%[ks]-r4}\n\t"
|
||||
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv)
|
||||
: [L_AES_ARM32_td_ecb] "g" (L_AES_ARM32_td_ecb), [L_AES_ARM32_td4] "g" (L_AES_ARM32_td4)
|
||||
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
|
||||
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c)
|
||||
:
|
||||
: "memory", "r12", "lr", "r8", "r9", "r10", "r11"
|
||||
);
|
||||
(void)nr;
|
||||
(void)iv;
|
||||
|
@ -2014,13 +2035,14 @@ static const uint32_t L_GCM_gmult_len_r[] = {
|
|||
void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned char* data, unsigned long len);
|
||||
void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned char* data_p, unsigned long len_p)
|
||||
{
|
||||
register unsigned char* x asm ("r0") = x_p;
|
||||
register const unsigned char** m asm ("r1") = m_p;
|
||||
register const unsigned char* data asm ("r2") = data_p;
|
||||
register unsigned long len asm ("r3") = len_p;
|
||||
register unsigned char* x asm ("r0") = (unsigned char*)x_p;
|
||||
register const unsigned char** m asm ("r1") = (const unsigned char**)m_p;
|
||||
register const unsigned char* data asm ("r2") = (const unsigned char*)data_p;
|
||||
register unsigned long len asm ("r3") = (unsigned long)len_p;
|
||||
register uint32_t* L_GCM_gmult_len_r_c asm ("r4") = (uint32_t*)&L_GCM_gmult_len_r;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"ldr lr, %[L_GCM_gmult_len_r]\n\t"
|
||||
"mov lr, %[L_GCM_gmult_len_r]\n\t"
|
||||
"\n"
|
||||
"L_GCM_gmult_len_start_block_%=: \n\t"
|
||||
"push {r3}\n\t"
|
||||
|
@ -2568,9 +2590,9 @@ void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned
|
|||
"subs %[len], %[len], #16\n\t"
|
||||
"add %[data], %[data], #16\n\t"
|
||||
"bne L_GCM_gmult_len_start_block_%=\n\t"
|
||||
: [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len)
|
||||
: [L_AES_ARM32_td_ecb] "g" (L_AES_ARM32_td_ecb), [L_AES_ARM32_td4] "g" (L_AES_ARM32_td4), [L_GCM_gmult_len_r] "g" (L_GCM_gmult_len_r)
|
||||
: "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
|
||||
: [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c)
|
||||
:
|
||||
: "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -2578,18 +2600,19 @@ static const uint32_t* L_AES_ARM32_te_gcm = L_AES_ARM32_te_data;
|
|||
void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr);
|
||||
void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p)
|
||||
{
|
||||
register const unsigned char* in asm ("r0") = in_p;
|
||||
register unsigned char* out asm ("r1") = out_p;
|
||||
register unsigned long len asm ("r2") = len_p;
|
||||
register const unsigned char* ks asm ("r3") = ks_p;
|
||||
register int nr asm ("r4") = nr_p;
|
||||
register unsigned char* ctr asm ("r5") = ctr_p;
|
||||
register const unsigned char* in asm ("r0") = (const unsigned char*)in_p;
|
||||
register unsigned char* out asm ("r1") = (unsigned char*)out_p;
|
||||
register unsigned long len asm ("r2") = (unsigned long)len_p;
|
||||
register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p;
|
||||
register int nr asm ("r4") = (int)nr_p;
|
||||
register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p;
|
||||
register uint32_t* L_AES_ARM32_te_gcm_c asm ("r6") = (uint32_t*)L_AES_ARM32_te_gcm;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"mov r12, r4\n\t"
|
||||
"mov r8, r5\n\t"
|
||||
"mov lr, %[in]\n\t"
|
||||
"ldr r0, %[L_AES_ARM32_te_gcm]\n\t"
|
||||
"mov r0, %[L_AES_ARM32_te_gcm]\n\t"
|
||||
"ldm r8, {r4, r5, r6, r7}\n\t"
|
||||
"rev r4, r4\n\t"
|
||||
"rev r5, r5\n\t"
|
||||
|
@ -2729,9 +2752,9 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
|
|||
"rev r6, r6\n\t"
|
||||
"rev r7, r7\n\t"
|
||||
"stm r8, {r4, r5, r6, r7}\n\t"
|
||||
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr)
|
||||
: [L_AES_ARM32_te_gcm] "g" (L_AES_ARM32_te_gcm)
|
||||
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
|
||||
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_ARM32_te_gcm] "+r" (L_AES_ARM32_te_gcm_c)
|
||||
:
|
||||
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
|
||||
);
|
||||
(void)nr;
|
||||
(void)ctr;
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__arm__)
|
||||
|
@ -36,6 +37,7 @@
|
|||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
#ifdef WOLFSSL_ARMASM_INLINE
|
||||
/* Based on work by: Emil Lenngren
|
||||
* https://github.com/pornin/X25519-Cortex-M4
|
||||
|
@ -50,7 +52,6 @@
|
|||
|
||||
void fe_init()
|
||||
{
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"\n\t"
|
||||
:
|
||||
|
@ -62,7 +63,6 @@ void fe_init()
|
|||
void fe_add_sub_op(void);
|
||||
void fe_add_sub_op()
|
||||
{
|
||||
|
||||
__asm__ __volatile__ (
|
||||
/* Add-Sub */
|
||||
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
|
||||
|
@ -269,7 +269,6 @@ void fe_add_sub_op()
|
|||
void fe_sub_op(void);
|
||||
void fe_sub_op()
|
||||
{
|
||||
|
||||
__asm__ __volatile__ (
|
||||
/* Sub */
|
||||
"ldm r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
|
||||
|
@ -307,9 +306,9 @@ void fe_sub_op()
|
|||
|
||||
void fe_sub(fe r_p, const fe a_p, const fe b_p)
|
||||
{
|
||||
register fe r asm ("r0") = r_p;
|
||||
register const fe a asm ("r1") = a_p;
|
||||
register const fe b asm ("r2") = b_p;
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
register const sword32* b asm ("r2") = (const sword32*)b_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"bl fe_sub_op\n\t"
|
||||
|
@ -322,7 +321,6 @@ void fe_sub(fe r_p, const fe a_p, const fe b_p)
|
|||
void fe_add_op(void);
|
||||
void fe_add_op()
|
||||
{
|
||||
|
||||
__asm__ __volatile__ (
|
||||
/* Add */
|
||||
"ldm r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t"
|
||||
|
@ -361,9 +359,9 @@ void fe_add_op()
|
|||
|
||||
void fe_add(fe r_p, const fe a_p, const fe b_p)
|
||||
{
|
||||
register fe r asm ("r0") = r_p;
|
||||
register const fe a asm ("r1") = a_p;
|
||||
register const fe b asm ("r2") = b_p;
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
register const sword32* b asm ("r2") = (const sword32*)b_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"bl fe_add_op\n\t"
|
||||
|
@ -376,8 +374,8 @@ void fe_add(fe r_p, const fe a_p, const fe b_p)
|
|||
#ifdef HAVE_ED25519
|
||||
void fe_frombytes(fe out_p, const unsigned char* in_p)
|
||||
{
|
||||
register fe out asm ("r0") = out_p;
|
||||
register const unsigned char* in asm ("r1") = in_p;
|
||||
register sword32* out asm ("r0") = (sword32*)out_p;
|
||||
register const unsigned char* in asm ("r1") = (const unsigned char*)in_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"ldm %[in], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
|
||||
|
@ -391,8 +389,8 @@ void fe_frombytes(fe out_p, const unsigned char* in_p)
|
|||
|
||||
void fe_tobytes(unsigned char* out_p, const fe n_p)
|
||||
{
|
||||
register unsigned char* out asm ("r0") = out_p;
|
||||
register const fe n asm ("r1") = n_p;
|
||||
register unsigned char* out asm ("r0") = (unsigned char*)out_p;
|
||||
register const sword32* n asm ("r1") = (const sword32*)n_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"ldm %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
|
||||
|
@ -424,7 +422,7 @@ void fe_tobytes(unsigned char* out_p, const fe n_p)
|
|||
|
||||
void fe_1(fe n_p)
|
||||
{
|
||||
register fe n asm ("r0") = n_p;
|
||||
register sword32* n asm ("r0") = (sword32*)n_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
/* Set one */
|
||||
|
@ -463,7 +461,7 @@ void fe_1(fe n_p)
|
|||
|
||||
void fe_0(fe n_p)
|
||||
{
|
||||
register fe n asm ("r0") = n_p;
|
||||
register sword32* n asm ("r0") = (sword32*)n_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
/* Set zero */
|
||||
|
@ -501,8 +499,8 @@ void fe_0(fe n_p)
|
|||
|
||||
void fe_copy(fe r_p, const fe a_p)
|
||||
{
|
||||
register fe r asm ("r0") = r_p;
|
||||
register const fe a asm ("r1") = a_p;
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
/* Copy */
|
||||
|
@ -562,8 +560,8 @@ void fe_copy(fe r_p, const fe a_p)
|
|||
|
||||
void fe_neg(fe r_p, const fe a_p)
|
||||
{
|
||||
register fe r asm ("r0") = r_p;
|
||||
register const fe a asm ("r1") = a_p;
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"mvn lr, #0\n\t"
|
||||
|
@ -589,7 +587,7 @@ void fe_neg(fe r_p, const fe a_p)
|
|||
|
||||
int fe_isnonzero(const fe a_p)
|
||||
{
|
||||
register const fe a asm ("r0") = a_p;
|
||||
register const sword32* a asm ("r0") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"ldm %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
|
||||
|
@ -628,7 +626,7 @@ int fe_isnonzero(const fe a_p)
|
|||
|
||||
int fe_isnegative(const fe a_p)
|
||||
{
|
||||
register const fe a asm ("r0") = a_p;
|
||||
register const sword32* a asm ("r0") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"ldm %[a]!, {r2, r3, r4, r5}\n\t"
|
||||
|
@ -655,9 +653,9 @@ int fe_isnegative(const fe a_p)
|
|||
#ifndef WC_NO_CACHE_RESISTANT
|
||||
void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
|
||||
{
|
||||
register fe* r asm ("r0") = r_p;
|
||||
register fe* base asm ("r1") = base_p;
|
||||
register signed char b asm ("r2") = b_p;
|
||||
register fe* r asm ("r0") = (fe*)r_p;
|
||||
register fe* base asm ("r1") = (fe*)base_p;
|
||||
register signed char b asm ("r2") = (signed char)b_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sxtb %[b], %[b]\n\t"
|
||||
|
@ -2364,9 +2362,9 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
|
|||
#else
|
||||
void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
|
||||
{
|
||||
register fe* r asm ("r0") = r_p;
|
||||
register fe* base asm ("r1") = base_p;
|
||||
register signed char b asm ("r2") = b_p;
|
||||
register fe* r asm ("r0") = (fe*)r_p;
|
||||
register fe* base asm ("r1") = (fe*)base_p;
|
||||
register signed char b asm ("r2") = (signed char)b_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sxtb %[b], %[b]\n\t"
|
||||
|
@ -2472,7 +2470,6 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
|
|||
void fe_mul_op(void);
|
||||
void fe_mul_op()
|
||||
{
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #44\n\t"
|
||||
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
|
||||
|
@ -2610,9 +2607,9 @@ void fe_mul_op()
|
|||
|
||||
void fe_mul(fe r_p, const fe a_p, const fe b_p)
|
||||
{
|
||||
register fe r asm ("r0") = r_p;
|
||||
register const fe a asm ("r1") = a_p;
|
||||
register const fe b asm ("r2") = b_p;
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
register const sword32* b asm ("r2") = (const sword32*)b_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"bl fe_mul_op\n\t"
|
||||
|
@ -2625,7 +2622,6 @@ void fe_mul(fe r_p, const fe a_p, const fe b_p)
|
|||
void fe_sq_op(void);
|
||||
void fe_sq_op()
|
||||
{
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #32\n\t"
|
||||
"str r0, [sp, #28]\n\t"
|
||||
|
@ -2749,8 +2745,8 @@ void fe_sq_op()
|
|||
|
||||
void fe_sq(fe r_p, const fe a_p)
|
||||
{
|
||||
register fe r asm ("r0") = r_p;
|
||||
register const fe a asm ("r1") = a_p;
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"bl fe_sq_op\n\t"
|
||||
|
@ -2762,8 +2758,8 @@ void fe_sq(fe r_p, const fe a_p)
|
|||
|
||||
void fe_mul121666(fe r_p, fe a_p)
|
||||
{
|
||||
register fe r asm ("r0") = r_p;
|
||||
register fe a asm ("r1") = a_p;
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register sword32* a asm ("r1") = (sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
/* Multiply by 121666 */
|
||||
|
@ -2808,9 +2804,9 @@ void fe_mul121666(fe r_p, fe a_p)
|
|||
#ifndef WC_NO_CACHE_RESISTANT
|
||||
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
|
||||
{
|
||||
register byte* r asm ("r0") = r_p;
|
||||
register const byte* n asm ("r1") = n_p;
|
||||
register const byte* a asm ("r2") = a_p;
|
||||
register byte* r asm ("r0") = (byte*)r_p;
|
||||
register const byte* n asm ("r1") = (const byte*)n_p;
|
||||
register const byte* a asm ("r2") = (const byte*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #0xbc\n\t"
|
||||
|
@ -3423,9 +3419,9 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
|
|||
#else
|
||||
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
|
||||
{
|
||||
register byte* r asm ("r0") = r_p;
|
||||
register const byte* n asm ("r1") = n_p;
|
||||
register const byte* a asm ("r2") = a_p;
|
||||
register byte* r asm ("r0") = (byte*)r_p;
|
||||
register const byte* n asm ("r1") = (const byte*)n_p;
|
||||
register const byte* a asm ("r2") = (const byte*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #0xc0\n\t"
|
||||
|
@ -3802,8 +3798,8 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
|
|||
#ifdef HAVE_ED25519
|
||||
void fe_invert(fe r_p, const fe a_p)
|
||||
{
|
||||
register fe r asm ("r0") = r_p;
|
||||
register const fe a asm ("r1") = a_p;
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #0x88\n\t"
|
||||
|
@ -3972,8 +3968,8 @@ void fe_invert(fe r_p, const fe a_p)
|
|||
|
||||
void fe_sq2(fe r_p, const fe a_p)
|
||||
{
|
||||
register fe r asm ("r0") = r_p;
|
||||
register const fe a asm ("r1") = a_p;
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #36\n\t"
|
||||
|
@ -4138,8 +4134,8 @@ void fe_sq2(fe r_p, const fe a_p)
|
|||
|
||||
void fe_pow22523(fe r_p, const fe a_p)
|
||||
{
|
||||
register fe r asm ("r0") = r_p;
|
||||
register const fe a asm ("r1") = a_p;
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #0x68\n\t"
|
||||
|
@ -4308,8 +4304,8 @@ void fe_pow22523(fe r_p, const fe a_p)
|
|||
|
||||
void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p)
|
||||
{
|
||||
register ge_p2 * r asm ("r0") = r_p;
|
||||
register const ge_p1p1 * p asm ("r1") = p_p;
|
||||
register ge_p2 * r asm ("r0") = (ge_p2 *)r_p;
|
||||
register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #8\n\t"
|
||||
|
@ -4338,8 +4334,8 @@ void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p)
|
|||
|
||||
void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p)
|
||||
{
|
||||
register ge_p3 * r asm ("r0") = r_p;
|
||||
register const ge_p1p1 * p asm ("r1") = p_p;
|
||||
register ge_p3 * r asm ("r0") = (ge_p3 *)r_p;
|
||||
register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #8\n\t"
|
||||
|
@ -4373,8 +4369,8 @@ void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p)
|
|||
|
||||
void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p)
|
||||
{
|
||||
register ge_p1p1 * r asm ("r0") = r_p;
|
||||
register const ge_p2 * p asm ("r1") = p_p;
|
||||
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
|
||||
register const ge_p2 * p asm ("r1") = (const ge_p2 *)p_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #8\n\t"
|
||||
|
@ -4420,9 +4416,9 @@ void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p)
|
|||
|
||||
void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
|
||||
{
|
||||
register ge_p1p1 * r asm ("r0") = r_p;
|
||||
register const ge_p3 * p asm ("r1") = p_p;
|
||||
register const ge_precomp * q asm ("r2") = q_p;
|
||||
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
|
||||
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
|
||||
register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #12\n\t"
|
||||
|
@ -4502,9 +4498,9 @@ void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
|
|||
|
||||
void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
|
||||
{
|
||||
register ge_p1p1 * r asm ("r0") = r_p;
|
||||
register const ge_p3 * p asm ("r1") = p_p;
|
||||
register const ge_precomp * q asm ("r2") = q_p;
|
||||
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
|
||||
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
|
||||
register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #12\n\t"
|
||||
|
@ -4585,9 +4581,9 @@ void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
|
|||
|
||||
void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
|
||||
{
|
||||
register ge_p1p1 * r asm ("r0") = r_p;
|
||||
register const ge_p3 * p asm ("r1") = p_p;
|
||||
register const ge_cached* q asm ("r2") = q_p;
|
||||
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
|
||||
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
|
||||
register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #44\n\t"
|
||||
|
@ -4668,9 +4664,9 @@ void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
|
|||
|
||||
void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
|
||||
{
|
||||
register ge_p1p1 * r asm ("r0") = r_p;
|
||||
register const ge_p3 * p asm ("r1") = p_p;
|
||||
register const ge_cached* q asm ("r2") = q_p;
|
||||
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
|
||||
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
|
||||
register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #44\n\t"
|
||||
|
@ -4751,7 +4747,7 @@ void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
|
|||
|
||||
void sc_reduce(byte* s_p)
|
||||
{
|
||||
register byte* s asm ("r0") = s_p;
|
||||
register byte* s asm ("r0") = (byte*)s_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #52\n\t"
|
||||
|
@ -5163,10 +5159,10 @@ void sc_reduce(byte* s_p)
|
|||
|
||||
void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
|
||||
{
|
||||
register byte* s asm ("r0") = s_p;
|
||||
register const byte* a asm ("r1") = a_p;
|
||||
register const byte* b asm ("r2") = b_p;
|
||||
register const byte* c asm ("r3") = c_p;
|
||||
register byte* s asm ("r0") = (byte*)s_p;
|
||||
register const byte* a asm ("r1") = (const byte*)a_p;
|
||||
register const byte* b asm ("r2") = (const byte*)b_p;
|
||||
register const byte* c asm ("r3") = (const byte*)c_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #0x50\n\t"
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__arm__)
|
||||
|
@ -36,6 +37,7 @@
|
|||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
#ifdef WOLFSSL_ARMASM_INLINE
|
||||
#ifndef NO_SHA256
|
||||
#include <wolfssl/wolfcrypt/sha256.h>
|
||||
|
@ -63,13 +65,13 @@ static const uint32_t L_SHA256_transform_len_k[] = {
|
|||
void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len);
|
||||
void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
|
||||
{
|
||||
register wc_Sha256* sha256 asm ("r0") = sha256_p;
|
||||
register const byte* data asm ("r1") = data_p;
|
||||
register word32 len asm ("r2") = len_p;
|
||||
register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p;
|
||||
register const byte* data asm ("r1") = (const byte*)data_p;
|
||||
register word32 len asm ("r2") = (word32)len_p;
|
||||
register uint32_t* L_SHA256_transform_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_len_k;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #0xc0\n\t"
|
||||
"mov r3, %[L_SHA256_transform_len_k]\n\t"
|
||||
/* Copy digest to add in at end */
|
||||
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
|
||||
"ldr r4, [%[sha256]]\n\t"
|
||||
|
@ -1587,9 +1589,9 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
|
|||
"add %[data], %[data], #0x40\n\t"
|
||||
"bne L_SHA256_transform_len_begin_%=\n\t"
|
||||
"add sp, sp, #0xc0\n\t"
|
||||
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len)
|
||||
: [L_SHA256_transform_len_k] "g" (L_SHA256_transform_len_k)
|
||||
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
|
||||
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c)
|
||||
:
|
||||
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -1619,9 +1621,10 @@ static const uint32_t L_SHA256_transform_neon_len_k[] = {
|
|||
void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len);
|
||||
void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
|
||||
{
|
||||
register wc_Sha256* sha256 asm ("r0") = sha256_p;
|
||||
register const byte* data asm ("r1") = data_p;
|
||||
register word32 len asm ("r2") = len_p;
|
||||
register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p;
|
||||
register const byte* data asm ("r1") = (const byte*)data_p;
|
||||
register word32 len asm ("r2") = (word32)len_p;
|
||||
register uint32_t* L_SHA256_transform_neon_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_neon_len_k;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #24\n\t"
|
||||
|
@ -2648,9 +2651,9 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
|
|||
"str r10, [sp, #8]\n\t"
|
||||
"bne L_SHA256_transform_neon_len_begin_%=\n\t"
|
||||
"add sp, sp, #24\n\t"
|
||||
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len)
|
||||
: [L_SHA256_transform_neon_len_k] "g" (L_SHA256_transform_neon_len_k)
|
||||
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11"
|
||||
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_neon_len_k] "+r" (L_SHA256_transform_neon_len_k_c)
|
||||
:
|
||||
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11"
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -7679,6 +7679,7 @@ L_SHA512_transform_neon_len_k:
|
|||
.type Transform_Sha512_Len, %function
|
||||
Transform_Sha512_Len:
|
||||
vpush {d8-d15}
|
||||
adr r3, L_SHA512_transform_neon_len_k
|
||||
# Load digest into working vars
|
||||
vldm.64 r0, {d0-d7}
|
||||
# Start of loop processing a block
|
||||
|
@ -7715,7 +7716,6 @@ L_SHA512_transform_neon_len_begin:
|
|||
vrev64.8 d30, d30
|
||||
vrev64.8 d31, d31
|
||||
#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */
|
||||
adr r3, L_SHA512_transform_neon_len_k
|
||||
mov r12, #4
|
||||
# Start of 16 rounds
|
||||
L_SHA512_transform_neon_len_start:
|
||||
|
@ -9164,6 +9164,7 @@ L_SHA512_transform_neon_len_start:
|
|||
#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */
|
||||
vstm.64 r0, {d0-d7}
|
||||
subs r2, r2, #0x80
|
||||
sub r3, r3, #0x280
|
||||
bne L_SHA512_transform_neon_len_begin
|
||||
vpop {d8-d15}
|
||||
bx lr
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__arm__)
|
||||
|
@ -36,64 +37,65 @@
|
|||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
#ifdef WOLFSSL_ARMASM_INLINE
|
||||
#ifdef WOLFSSL_SHA512
|
||||
#include <wolfssl/wolfcrypt/sha512.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM_NO_NEON
|
||||
static const uint64_t L_SHA512_transform_len_k[] = {
|
||||
0x428a2f98d728ae22, 0x7137449123ef65cd,
|
||||
0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
|
||||
0x3956c25bf348b538, 0x59f111f1b605d019,
|
||||
0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
|
||||
0xd807aa98a3030242, 0x12835b0145706fbe,
|
||||
0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
|
||||
0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
|
||||
0x9bdc06a725c71235, 0xc19bf174cf692694,
|
||||
0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
|
||||
0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
|
||||
0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
|
||||
0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
|
||||
0x983e5152ee66dfab, 0xa831c66d2db43210,
|
||||
0xb00327c898fb213f, 0xbf597fc7beef0ee4,
|
||||
0xc6e00bf33da88fc2, 0xd5a79147930aa725,
|
||||
0x06ca6351e003826f, 0x142929670a0e6e70,
|
||||
0x27b70a8546d22ffc, 0x2e1b21385c26c926,
|
||||
0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
|
||||
0x650a73548baf63de, 0x766a0abb3c77b2a8,
|
||||
0x81c2c92e47edaee6, 0x92722c851482353b,
|
||||
0xa2bfe8a14cf10364, 0xa81a664bbc423001,
|
||||
0xc24b8b70d0f89791, 0xc76c51a30654be30,
|
||||
0xd192e819d6ef5218, 0xd69906245565a910,
|
||||
0xf40e35855771202a, 0x106aa07032bbd1b8,
|
||||
0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
|
||||
0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
|
||||
0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
|
||||
0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
|
||||
0x748f82ee5defb2fc, 0x78a5636f43172f60,
|
||||
0x84c87814a1f0ab72, 0x8cc702081a6439ec,
|
||||
0x90befffa23631e28, 0xa4506cebde82bde9,
|
||||
0xbef9a3f7b2c67915, 0xc67178f2e372532b,
|
||||
0xca273eceea26619c, 0xd186b8c721c0c207,
|
||||
0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
|
||||
0x06f067aa72176fba, 0x0a637dc5a2c898a6,
|
||||
0x113f9804bef90dae, 0x1b710b35131c471b,
|
||||
0x28db77f523047d84, 0x32caab7b40c72493,
|
||||
0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
|
||||
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
|
||||
0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
|
||||
0x428a2f98d728ae22UL, 0x7137449123ef65cdUL,
|
||||
0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL,
|
||||
0x3956c25bf348b538UL, 0x59f111f1b605d019UL,
|
||||
0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL,
|
||||
0xd807aa98a3030242UL, 0x12835b0145706fbeUL,
|
||||
0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL,
|
||||
0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL,
|
||||
0x9bdc06a725c71235UL, 0xc19bf174cf692694UL,
|
||||
0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL,
|
||||
0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL,
|
||||
0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL,
|
||||
0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL,
|
||||
0x983e5152ee66dfabUL, 0xa831c66d2db43210UL,
|
||||
0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL,
|
||||
0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL,
|
||||
0x06ca6351e003826fUL, 0x142929670a0e6e70UL,
|
||||
0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL,
|
||||
0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL,
|
||||
0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL,
|
||||
0x81c2c92e47edaee6UL, 0x92722c851482353bUL,
|
||||
0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL,
|
||||
0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL,
|
||||
0xd192e819d6ef5218UL, 0xd69906245565a910UL,
|
||||
0xf40e35855771202aUL, 0x106aa07032bbd1b8UL,
|
||||
0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL,
|
||||
0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL,
|
||||
0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL,
|
||||
0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL,
|
||||
0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL,
|
||||
0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL,
|
||||
0x90befffa23631e28UL, 0xa4506cebde82bde9UL,
|
||||
0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL,
|
||||
0xca273eceea26619cUL, 0xd186b8c721c0c207UL,
|
||||
0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL,
|
||||
0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL,
|
||||
0x113f9804bef90daeUL, 0x1b710b35131c471bUL,
|
||||
0x28db77f523047d84UL, 0x32caab7b40c72493UL,
|
||||
0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL,
|
||||
0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL,
|
||||
0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL,
|
||||
};
|
||||
|
||||
void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len);
|
||||
void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
|
||||
{
|
||||
register wc_Sha512* sha512 asm ("r0") = sha512_p;
|
||||
register const byte* data asm ("r1") = data_p;
|
||||
register word32 len asm ("r2") = len_p;
|
||||
register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p;
|
||||
register const byte* data asm ("r1") = (const byte*)data_p;
|
||||
register word32 len asm ("r2") = (word32)len_p;
|
||||
register uint64_t* L_SHA512_transform_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_len_k;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #0xc0\n\t"
|
||||
"mov r3, %[L_SHA512_transform_len_k]\n\t"
|
||||
/* Copy digest to add in at end */
|
||||
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
|
||||
"ldr r4, [%[sha512]]\n\t"
|
||||
|
@ -7392,9 +7394,9 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
|
|||
"bne L_SHA512_transform_len_begin_%=\n\t"
|
||||
"eor r0, r0, r0\n\t"
|
||||
"add sp, sp, #0xc0\n\t"
|
||||
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
|
||||
: [L_SHA512_transform_len_k] "g" (L_SHA512_transform_len_k)
|
||||
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
|
||||
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c)
|
||||
:
|
||||
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -7403,54 +7405,55 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
|
|||
|
||||
#ifndef WOLFSSL_ARMASM_NO_NEON
|
||||
static const uint64_t L_SHA512_transform_neon_len_k[] = {
|
||||
0x428a2f98d728ae22, 0x7137449123ef65cd,
|
||||
0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
|
||||
0x3956c25bf348b538, 0x59f111f1b605d019,
|
||||
0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
|
||||
0xd807aa98a3030242, 0x12835b0145706fbe,
|
||||
0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
|
||||
0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
|
||||
0x9bdc06a725c71235, 0xc19bf174cf692694,
|
||||
0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
|
||||
0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
|
||||
0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
|
||||
0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
|
||||
0x983e5152ee66dfab, 0xa831c66d2db43210,
|
||||
0xb00327c898fb213f, 0xbf597fc7beef0ee4,
|
||||
0xc6e00bf33da88fc2, 0xd5a79147930aa725,
|
||||
0x06ca6351e003826f, 0x142929670a0e6e70,
|
||||
0x27b70a8546d22ffc, 0x2e1b21385c26c926,
|
||||
0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
|
||||
0x650a73548baf63de, 0x766a0abb3c77b2a8,
|
||||
0x81c2c92e47edaee6, 0x92722c851482353b,
|
||||
0xa2bfe8a14cf10364, 0xa81a664bbc423001,
|
||||
0xc24b8b70d0f89791, 0xc76c51a30654be30,
|
||||
0xd192e819d6ef5218, 0xd69906245565a910,
|
||||
0xf40e35855771202a, 0x106aa07032bbd1b8,
|
||||
0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
|
||||
0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
|
||||
0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
|
||||
0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
|
||||
0x748f82ee5defb2fc, 0x78a5636f43172f60,
|
||||
0x84c87814a1f0ab72, 0x8cc702081a6439ec,
|
||||
0x90befffa23631e28, 0xa4506cebde82bde9,
|
||||
0xbef9a3f7b2c67915, 0xc67178f2e372532b,
|
||||
0xca273eceea26619c, 0xd186b8c721c0c207,
|
||||
0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
|
||||
0x06f067aa72176fba, 0x0a637dc5a2c898a6,
|
||||
0x113f9804bef90dae, 0x1b710b35131c471b,
|
||||
0x28db77f523047d84, 0x32caab7b40c72493,
|
||||
0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
|
||||
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
|
||||
0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
|
||||
0x428a2f98d728ae22UL, 0x7137449123ef65cdUL,
|
||||
0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL,
|
||||
0x3956c25bf348b538UL, 0x59f111f1b605d019UL,
|
||||
0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL,
|
||||
0xd807aa98a3030242UL, 0x12835b0145706fbeUL,
|
||||
0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL,
|
||||
0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL,
|
||||
0x9bdc06a725c71235UL, 0xc19bf174cf692694UL,
|
||||
0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL,
|
||||
0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL,
|
||||
0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL,
|
||||
0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL,
|
||||
0x983e5152ee66dfabUL, 0xa831c66d2db43210UL,
|
||||
0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL,
|
||||
0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL,
|
||||
0x06ca6351e003826fUL, 0x142929670a0e6e70UL,
|
||||
0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL,
|
||||
0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL,
|
||||
0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL,
|
||||
0x81c2c92e47edaee6UL, 0x92722c851482353bUL,
|
||||
0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL,
|
||||
0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL,
|
||||
0xd192e819d6ef5218UL, 0xd69906245565a910UL,
|
||||
0xf40e35855771202aUL, 0x106aa07032bbd1b8UL,
|
||||
0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL,
|
||||
0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL,
|
||||
0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL,
|
||||
0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL,
|
||||
0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL,
|
||||
0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL,
|
||||
0x90befffa23631e28UL, 0xa4506cebde82bde9UL,
|
||||
0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL,
|
||||
0xca273eceea26619cUL, 0xd186b8c721c0c207UL,
|
||||
0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL,
|
||||
0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL,
|
||||
0x113f9804bef90daeUL, 0x1b710b35131c471bUL,
|
||||
0x28db77f523047d84UL, 0x32caab7b40c72493UL,
|
||||
0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL,
|
||||
0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL,
|
||||
0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL,
|
||||
};
|
||||
|
||||
void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len);
|
||||
void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
|
||||
{
|
||||
register wc_Sha512* sha512 asm ("r0") = sha512_p;
|
||||
register const byte* data asm ("r1") = data_p;
|
||||
register word32 len asm ("r2") = len_p;
|
||||
register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p;
|
||||
register const byte* data asm ("r1") = (const byte*)data_p;
|
||||
register word32 len asm ("r2") = (word32)len_p;
|
||||
register uint64_t* L_SHA512_transform_neon_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_neon_len_k;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
/* Load digest into working vars */
|
||||
|
@ -7490,7 +7493,6 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
|
|||
"vrev64.8 d30, d30\n\t"
|
||||
"vrev64.8 d31, d31\n\t"
|
||||
#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */
|
||||
"mov r3, %[L_SHA512_transform_neon_len_k]\n\t"
|
||||
"mov r12, #4\n\t"
|
||||
/* Start of 16 rounds */
|
||||
"\n"
|
||||
|
@ -8940,10 +8942,11 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
|
|||
#endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */
|
||||
"vstm.64 %[sha512], {d0-d7}\n\t"
|
||||
"subs %[len], %[len], #0x80\n\t"
|
||||
"sub r3, r3, #0x280\n\t"
|
||||
"bne L_SHA512_transform_neon_len_begin_%=\n\t"
|
||||
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
|
||||
: [L_SHA512_transform_neon_len_k] "g" (L_SHA512_transform_neon_len_k)
|
||||
: "memory", "r3", "r12", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_neon_len_k] "+r" (L_SHA512_transform_neon_len_k_c)
|
||||
:
|
||||
: "memory", "r12", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#endif
|
||||
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
#if !defined(NO_AES) && defined(WOLFSSL_ARMASM)
|
||||
|
||||
|
@ -41,7 +42,6 @@
|
|||
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
|
||||
|
||||
#include <wolfssl/wolfcrypt/aes.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
#include <wolfssl/wolfcrypt/logging.h>
|
||||
#ifdef NO_INLINE
|
||||
#include <wolfssl/wolfcrypt/misc.h>
|
||||
|
@ -5467,7 +5467,6 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
|
|||
#else /* !WOLFSSL_ARMASM_NO_HW_CRYPTO */
|
||||
|
||||
#include <wolfssl/wolfcrypt/logging.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
#include <wolfssl/wolfcrypt/aes.h>
|
||||
#ifdef NO_INLINE
|
||||
#include <wolfssl/wolfcrypt/misc.h>
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
/* Generated using (from wolfssl):
|
||||
* cd ../scripts
|
||||
|
@ -6312,9 +6313,9 @@ void ge_msub(ge_p1p1* r, const ge_p3* p, const ge_precomp* q)
|
|||
__asm__ __volatile__ (
|
||||
"stp x29, x30, [sp, #-48]!\n\t"
|
||||
"add x29, sp, #0\n\t"
|
||||
"str %w[r], [x29, #16]\n\t"
|
||||
"str %w[p], [x29, #24]\n\t"
|
||||
"str %w[q], [x29, #32]\n\t"
|
||||
"str %x[r], [x29, #16]\n\t"
|
||||
"str %x[p], [x29, #24]\n\t"
|
||||
"str %x[q], [x29, #32]\n\t"
|
||||
"mov x3, x1\n\t"
|
||||
"add x2, x1, #32\n\t"
|
||||
"add x1, x0, #32\n\t"
|
||||
|
@ -6808,9 +6809,9 @@ void ge_add(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
|
|||
__asm__ __volatile__ (
|
||||
"stp x29, x30, [sp, #-48]!\n\t"
|
||||
"add x29, sp, #0\n\t"
|
||||
"str %w[r], [x29, #16]\n\t"
|
||||
"str %w[p], [x29, #24]\n\t"
|
||||
"str %w[q], [x29, #32]\n\t"
|
||||
"str %x[r], [x29, #16]\n\t"
|
||||
"str %x[p], [x29, #24]\n\t"
|
||||
"str %x[q], [x29, #32]\n\t"
|
||||
"mov x3, x1\n\t"
|
||||
"add x2, x1, #32\n\t"
|
||||
"add x1, x0, #32\n\t"
|
||||
|
@ -7430,9 +7431,9 @@ void ge_sub(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
|
|||
__asm__ __volatile__ (
|
||||
"stp x29, x30, [sp, #-48]!\n\t"
|
||||
"add x29, sp, #0\n\t"
|
||||
"str %w[r], [x29, #16]\n\t"
|
||||
"str %w[p], [x29, #24]\n\t"
|
||||
"str %w[q], [x29, #32]\n\t"
|
||||
"str %x[r], [x29, #16]\n\t"
|
||||
"str %x[p], [x29, #24]\n\t"
|
||||
"str %x[q], [x29, #32]\n\t"
|
||||
"mov x3, x1\n\t"
|
||||
"add x2, x1, #32\n\t"
|
||||
"add x1, x0, #32\n\t"
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
/* Generated using (from wolfssl):
|
||||
* cd ../scripts
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
/* Generated using (from wolfssl):
|
||||
* cd ../scripts
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -280,13 +280,14 @@ fe_1:
|
|||
# Set one
|
||||
MOV r2, #0x1
|
||||
MOV r3, #0x0
|
||||
STRD r2, r3, [r0]
|
||||
STM r0!, {r2, r3}
|
||||
MOV r2, #0x0
|
||||
STRD r2, r3, [r0, #8]
|
||||
STRD r2, r3, [r0, #16]
|
||||
STRD r2, r3, [r0, #24]
|
||||
STM r0!, {r2, r3}
|
||||
STM r0!, {r2, r3}
|
||||
STM r0!, {r2, r3}
|
||||
SUB r0, r0, #0x20
|
||||
BX lr
|
||||
# Cycle Count = 19
|
||||
# Cycle Count = 20
|
||||
.size fe_1,.-fe_1
|
||||
.text
|
||||
.align 4
|
||||
|
@ -296,12 +297,13 @@ fe_0:
|
|||
# Set zero
|
||||
MOV r2, #0x0
|
||||
MOV r3, #0x0
|
||||
STRD r2, r3, [r0]
|
||||
STRD r2, r3, [r0, #8]
|
||||
STRD r2, r3, [r0, #16]
|
||||
STRD r2, r3, [r0, #24]
|
||||
STM r0!, {r2, r3}
|
||||
STM r0!, {r2, r3}
|
||||
STM r0!, {r2, r3}
|
||||
STM r0!, {r2, r3}
|
||||
SUB r0, r0, #0x20
|
||||
BX lr
|
||||
# Cycle Count = 18
|
||||
# Cycle Count = 19
|
||||
.size fe_0,.-fe_0
|
||||
.text
|
||||
.align 4
|
||||
|
@ -1751,6 +1753,7 @@ fe_sq:
|
|||
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
# Cycle Count = 24
|
||||
.size fe_sq,.-fe_sq
|
||||
#ifdef HAVE_CURVE25519
|
||||
.text
|
||||
.align 4
|
||||
.globl fe_mul121666
|
||||
|
@ -1803,26 +1806,31 @@ curve25519:
|
|||
# Set one
|
||||
MOV r10, #0x1
|
||||
MOV r11, #0x0
|
||||
STRD r10, r11, [r0]
|
||||
STM r0!, {r10, r11}
|
||||
MOV r10, #0x0
|
||||
STRD r10, r11, [r0, #8]
|
||||
STRD r10, r11, [r0, #16]
|
||||
STRD r10, r11, [r0, #24]
|
||||
STM r0!, {r10, r11}
|
||||
STM r0!, {r10, r11}
|
||||
STM r0!, {r10, r11}
|
||||
SUB r0, r0, #0x20
|
||||
MOV r3, sp
|
||||
# Set zero
|
||||
MOV r10, #0x0
|
||||
MOV r11, #0x0
|
||||
STRD r10, r11, [sp]
|
||||
STRD r10, r11, [sp, #8]
|
||||
STRD r10, r11, [sp, #16]
|
||||
STRD r10, r11, [sp, #24]
|
||||
STM r3!, {r10, r11}
|
||||
STM r3!, {r10, r11}
|
||||
STM r3!, {r10, r11}
|
||||
STM r3!, {r10, r11}
|
||||
SUB r3, r3, #0x20
|
||||
ADD r3, sp, #0x20
|
||||
# Set one
|
||||
MOV r10, #0x1
|
||||
MOV r11, #0x0
|
||||
STRD r10, r11, [sp, #32]
|
||||
STM r3!, {r10, r11}
|
||||
MOV r10, #0x0
|
||||
STRD r10, r11, [sp, #40]
|
||||
STRD r10, r11, [sp, #48]
|
||||
STRD r10, r11, [sp, #56]
|
||||
STM r3!, {r10, r11}
|
||||
STM r3!, {r10, r11}
|
||||
STM r3!, {r10, r11}
|
||||
SUB r3, r3, #0x20
|
||||
ADD r3, sp, #0x40
|
||||
# Copy
|
||||
LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}
|
||||
|
@ -1845,8 +1853,10 @@ L_curve25519_bits:
|
|||
LDR r0, [sp, #160]
|
||||
# Conditional Swap
|
||||
RSB r1, r1, #0x0
|
||||
LDRD r4, r5, [r0]
|
||||
LDRD r6, r7, [sp, #64]
|
||||
MOV r3, r0
|
||||
ADD r12, sp, #0x40
|
||||
LDM r3, {r4, r5}
|
||||
LDM r12, {r6, r7}
|
||||
EOR r8, r4, r6
|
||||
EOR r9, r5, r7
|
||||
AND r8, r8, r1
|
||||
|
@ -1855,10 +1865,10 @@ L_curve25519_bits:
|
|||
EOR r5, r5, r9
|
||||
EOR r6, r6, r8
|
||||
EOR r7, r7, r9
|
||||
STRD r4, r5, [r0]
|
||||
STRD r6, r7, [sp, #64]
|
||||
LDRD r4, r5, [r0, #8]
|
||||
LDRD r6, r7, [sp, #72]
|
||||
STM r3!, {r4, r5}
|
||||
STM r12!, {r6, r7}
|
||||
LDM r3, {r4, r5}
|
||||
LDM r12, {r6, r7}
|
||||
EOR r8, r4, r6
|
||||
EOR r9, r5, r7
|
||||
AND r8, r8, r1
|
||||
|
@ -1867,10 +1877,10 @@ L_curve25519_bits:
|
|||
EOR r5, r5, r9
|
||||
EOR r6, r6, r8
|
||||
EOR r7, r7, r9
|
||||
STRD r4, r5, [r0, #8]
|
||||
STRD r6, r7, [sp, #72]
|
||||
LDRD r4, r5, [r0, #16]
|
||||
LDRD r6, r7, [sp, #80]
|
||||
STM r3!, {r4, r5}
|
||||
STM r12!, {r6, r7}
|
||||
LDM r3, {r4, r5}
|
||||
LDM r12, {r6, r7}
|
||||
EOR r8, r4, r6
|
||||
EOR r9, r5, r7
|
||||
AND r8, r8, r1
|
||||
|
@ -1879,10 +1889,10 @@ L_curve25519_bits:
|
|||
EOR r5, r5, r9
|
||||
EOR r6, r6, r8
|
||||
EOR r7, r7, r9
|
||||
STRD r4, r5, [r0, #16]
|
||||
STRD r6, r7, [sp, #80]
|
||||
LDRD r4, r5, [r0, #24]
|
||||
LDRD r6, r7, [sp, #88]
|
||||
STM r3!, {r4, r5}
|
||||
STM r12!, {r6, r7}
|
||||
LDM r3, {r4, r5}
|
||||
LDM r12, {r6, r7}
|
||||
EOR r8, r4, r6
|
||||
EOR r9, r5, r7
|
||||
AND r8, r8, r1
|
||||
|
@ -1891,13 +1901,15 @@ L_curve25519_bits:
|
|||
EOR r5, r5, r9
|
||||
EOR r6, r6, r8
|
||||
EOR r7, r7, r9
|
||||
STRD r4, r5, [r0, #24]
|
||||
STRD r6, r7, [sp, #88]
|
||||
STM r3!, {r4, r5}
|
||||
STM r12!, {r6, r7}
|
||||
LDR r1, [sp, #172]
|
||||
# Conditional Swap
|
||||
RSB r1, r1, #0x0
|
||||
LDRD r4, r5, [sp]
|
||||
LDRD r6, r7, [sp, #32]
|
||||
MOV r3, sp
|
||||
ADD r12, sp, #0x20
|
||||
LDM r3, {r4, r5}
|
||||
LDM r12, {r6, r7}
|
||||
EOR r8, r4, r6
|
||||
EOR r9, r5, r7
|
||||
AND r8, r8, r1
|
||||
|
@ -1906,10 +1918,10 @@ L_curve25519_bits:
|
|||
EOR r5, r5, r9
|
||||
EOR r6, r6, r8
|
||||
EOR r7, r7, r9
|
||||
STRD r4, r5, [sp]
|
||||
STRD r6, r7, [sp, #32]
|
||||
LDRD r4, r5, [sp, #8]
|
||||
LDRD r6, r7, [sp, #40]
|
||||
STM r3!, {r4, r5}
|
||||
STM r12!, {r6, r7}
|
||||
LDM r3, {r4, r5}
|
||||
LDM r12, {r6, r7}
|
||||
EOR r8, r4, r6
|
||||
EOR r9, r5, r7
|
||||
AND r8, r8, r1
|
||||
|
@ -1918,10 +1930,10 @@ L_curve25519_bits:
|
|||
EOR r5, r5, r9
|
||||
EOR r6, r6, r8
|
||||
EOR r7, r7, r9
|
||||
STRD r4, r5, [sp, #8]
|
||||
STRD r6, r7, [sp, #40]
|
||||
LDRD r4, r5, [sp, #16]
|
||||
LDRD r6, r7, [sp, #48]
|
||||
STM r3!, {r4, r5}
|
||||
STM r12!, {r6, r7}
|
||||
LDM r3, {r4, r5}
|
||||
LDM r12, {r6, r7}
|
||||
EOR r8, r4, r6
|
||||
EOR r9, r5, r7
|
||||
AND r8, r8, r1
|
||||
|
@ -1930,10 +1942,10 @@ L_curve25519_bits:
|
|||
EOR r5, r5, r9
|
||||
EOR r6, r6, r8
|
||||
EOR r7, r7, r9
|
||||
STRD r4, r5, [sp, #16]
|
||||
STRD r6, r7, [sp, #48]
|
||||
LDRD r4, r5, [sp, #24]
|
||||
LDRD r6, r7, [sp, #56]
|
||||
STM r3!, {r4, r5}
|
||||
STM r12!, {r6, r7}
|
||||
LDM r3, {r4, r5}
|
||||
LDM r12, {r6, r7}
|
||||
EOR r8, r4, r6
|
||||
EOR r9, r5, r7
|
||||
AND r8, r8, r1
|
||||
|
@ -1942,8 +1954,8 @@ L_curve25519_bits:
|
|||
EOR r5, r5, r9
|
||||
EOR r6, r6, r8
|
||||
EOR r7, r7, r9
|
||||
STRD r4, r5, [sp, #24]
|
||||
STRD r6, r7, [sp, #56]
|
||||
STM r3!, {r4, r5}
|
||||
STM r12!, {r6, r7}
|
||||
LDR r1, [sp, #184]
|
||||
STR r1, [sp, #172]
|
||||
MOV r3, sp
|
||||
|
@ -2165,7 +2177,7 @@ L_curve25519_inv_8:
|
|||
MOV r0, #0x0
|
||||
ADD sp, sp, #0xbc
|
||||
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
# Cycle Count = 684
|
||||
# Cycle Count = 693
|
||||
.size curve25519,.-curve25519
|
||||
#else
|
||||
.text
|
||||
|
@ -2188,26 +2200,31 @@ curve25519:
|
|||
# Set one
|
||||
MOV r10, #0x1
|
||||
MOV r11, #0x0
|
||||
STRD r10, r11, [r0]
|
||||
STM r0!, {r10, r11}
|
||||
MOV r10, #0x0
|
||||
STRD r10, r11, [r0, #8]
|
||||
STRD r10, r11, [r0, #16]
|
||||
STRD r10, r11, [r0, #24]
|
||||
STM r0!, {r10, r11}
|
||||
STM r0!, {r10, r11}
|
||||
STM r0!, {r10, r11}
|
||||
SUB r0, r0, #0x20
|
||||
MOV r3, sp
|
||||
# Set zero
|
||||
MOV r10, #0x0
|
||||
MOV r11, #0x0
|
||||
STRD r10, r11, [sp]
|
||||
STRD r10, r11, [sp, #8]
|
||||
STRD r10, r11, [sp, #16]
|
||||
STRD r10, r11, [sp, #24]
|
||||
STM r3!, {r10, r11}
|
||||
STM r3!, {r10, r11}
|
||||
STM r3!, {r10, r11}
|
||||
STM r3!, {r10, r11}
|
||||
SUB r3, r3, #0x20
|
||||
ADD r3, sp, #0x20
|
||||
# Set one
|
||||
MOV r10, #0x1
|
||||
MOV r11, #0x0
|
||||
STRD r10, r11, [sp, #32]
|
||||
STM r3!, {r10, r11}
|
||||
MOV r10, #0x0
|
||||
STRD r10, r11, [sp, #40]
|
||||
STRD r10, r11, [sp, #48]
|
||||
STRD r10, r11, [sp, #56]
|
||||
STM r3!, {r10, r11}
|
||||
STM r3!, {r10, r11}
|
||||
STM r3!, {r10, r11}
|
||||
SUB r3, r3, #0x20
|
||||
ADD r3, sp, #0x40
|
||||
# Copy
|
||||
LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}
|
||||
|
@ -2470,9 +2487,10 @@ L_curve25519_inv_8:
|
|||
MOV r0, #0x0
|
||||
ADD sp, sp, #0xc0
|
||||
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
# Cycle Count = 595
|
||||
# Cycle Count = 600
|
||||
.size curve25519,.-curve25519
|
||||
#endif /* WC_NO_CACHE_RESISTANT */
|
||||
#endif /* HAVE_CURVE25519 */
|
||||
#ifdef HAVE_ED25519
|
||||
.text
|
||||
.align 4
|
||||
|
|
|
@ -48,7 +48,7 @@
|
|||
#if defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
|
||||
#if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL)
|
||||
|
||||
void fe_init(void)
|
||||
void fe_init()
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"\n\t"
|
||||
|
@ -59,7 +59,7 @@ void fe_init(void)
|
|||
}
|
||||
|
||||
void fe_add_sub_op(void);
|
||||
void fe_add_sub_op(void)
|
||||
void fe_add_sub_op()
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
/* Add-Sub */
|
||||
|
@ -156,7 +156,7 @@ void fe_add_sub_op(void)
|
|||
}
|
||||
|
||||
void fe_sub_op(void);
|
||||
void fe_sub_op(void)
|
||||
void fe_sub_op()
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
/* Sub */
|
||||
|
@ -190,18 +190,22 @@ void fe_sub_op(void)
|
|||
);
|
||||
}
|
||||
|
||||
void fe_sub(fe r, const fe a, const fe b)
|
||||
void fe_sub(fe r_p, const fe a_p, const fe b_p)
|
||||
{
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
register const sword32* b asm ("r2") = (const sword32*)b_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"BL fe_sub_op\n\t"
|
||||
: [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
|
||||
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
|
||||
:
|
||||
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
|
||||
);
|
||||
}
|
||||
|
||||
void fe_add_op(void);
|
||||
void fe_add_op(void)
|
||||
void fe_add_op()
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
/* Add */
|
||||
|
@ -235,31 +239,41 @@ void fe_add_op(void)
|
|||
);
|
||||
}
|
||||
|
||||
void fe_add(fe r, const fe a, const fe b)
|
||||
void fe_add(fe r_p, const fe a_p, const fe b_p)
|
||||
{
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
register const sword32* b asm ("r2") = (const sword32*)b_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"BL fe_add_op\n\t"
|
||||
: [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
|
||||
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
|
||||
:
|
||||
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
|
||||
);
|
||||
}
|
||||
|
||||
#ifdef HAVE_ED25519
|
||||
void fe_frombytes(fe out, const unsigned char* in)
|
||||
void fe_frombytes(fe out_p, const unsigned char* in_p)
|
||||
{
|
||||
register sword32* out asm ("r0") = (sword32*)out_p;
|
||||
register const unsigned char* in asm ("r1") = (const unsigned char*)in_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"LDM %[in], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
|
||||
"BFC r9, #31, #1\n\t"
|
||||
"STM %[out], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
|
||||
: [out] "+l" (out), [in] "+l" (in)
|
||||
: [out] "+r" (out), [in] "+r" (in)
|
||||
:
|
||||
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
|
||||
);
|
||||
}
|
||||
|
||||
void fe_tobytes(unsigned char* out, const fe n)
|
||||
void fe_tobytes(unsigned char* out_p, const fe n_p)
|
||||
{
|
||||
register unsigned char* out asm ("r0") = (unsigned char*)out_p;
|
||||
register const sword32* n asm ("r1") = (const sword32*)n_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"LDM %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
|
||||
"ADDS r10, r2, #0x13\n\t"
|
||||
|
@ -282,47 +296,56 @@ void fe_tobytes(unsigned char* out, const fe n)
|
|||
"ADC r9, r9, #0x0\n\t"
|
||||
"BFC r9, #31, #1\n\t"
|
||||
"STM %[out], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
|
||||
: [out] "+l" (out), [n] "+l" (n)
|
||||
: [out] "+r" (out), [n] "+r" (n)
|
||||
:
|
||||
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
|
||||
);
|
||||
}
|
||||
|
||||
void fe_1(fe n)
|
||||
void fe_1(fe n_p)
|
||||
{
|
||||
register sword32* n asm ("r0") = (sword32*)n_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
/* Set one */
|
||||
"MOV r2, #0x1\n\t"
|
||||
"MOV r3, #0x0\n\t"
|
||||
"STRD r2, r3, [%[n]]\n\t"
|
||||
"STM %[n]!, {r2, r3}\n\t"
|
||||
"MOV r2, #0x0\n\t"
|
||||
"STRD r2, r3, [%[n], #8]\n\t"
|
||||
"STRD r2, r3, [%[n], #16]\n\t"
|
||||
"STRD r2, r3, [%[n], #24]\n\t"
|
||||
: [n] "+l" (n)
|
||||
"STM %[n]!, {r2, r3}\n\t"
|
||||
"STM %[n]!, {r2, r3}\n\t"
|
||||
"STM %[n]!, {r2, r3}\n\t"
|
||||
"SUB %[n], %[n], #0x20\n\t"
|
||||
: [n] "+r" (n)
|
||||
:
|
||||
: "memory", "r2", "r3"
|
||||
);
|
||||
}
|
||||
|
||||
void fe_0(fe n)
|
||||
void fe_0(fe n_p)
|
||||
{
|
||||
register sword32* n asm ("r0") = (sword32*)n_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
/* Set zero */
|
||||
"MOV r2, #0x0\n\t"
|
||||
"MOV r3, #0x0\n\t"
|
||||
"STRD r2, r3, [%[n]]\n\t"
|
||||
"STRD r2, r3, [%[n], #8]\n\t"
|
||||
"STRD r2, r3, [%[n], #16]\n\t"
|
||||
"STRD r2, r3, [%[n], #24]\n\t"
|
||||
: [n] "+l" (n)
|
||||
"STM %[n]!, {r2, r3}\n\t"
|
||||
"STM %[n]!, {r2, r3}\n\t"
|
||||
"STM %[n]!, {r2, r3}\n\t"
|
||||
"STM %[n]!, {r2, r3}\n\t"
|
||||
"SUB %[n], %[n], #0x20\n\t"
|
||||
: [n] "+r" (n)
|
||||
:
|
||||
: "memory", "r2", "r3"
|
||||
);
|
||||
}
|
||||
|
||||
void fe_copy(fe r, const fe a)
|
||||
void fe_copy(fe r_p, const fe a_p)
|
||||
{
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
/* Copy */
|
||||
"LDRD r2, r3, [%[a]]\n\t"
|
||||
|
@ -333,14 +356,17 @@ void fe_copy(fe r, const fe a)
|
|||
"LDRD r4, r5, [%[a], #24]\n\t"
|
||||
"STRD r2, r3, [%[r], #16]\n\t"
|
||||
"STRD r4, r5, [%[r], #24]\n\t"
|
||||
: [r] "+l" (r), [a] "+l" (a)
|
||||
: [r] "+r" (r), [a] "+r" (a)
|
||||
:
|
||||
: "memory", "r2", "r3", "r4", "r5"
|
||||
);
|
||||
}
|
||||
|
||||
void fe_neg(fe r, const fe a)
|
||||
void fe_neg(fe r_p, const fe a_p)
|
||||
{
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"MVN r7, #0x0\n\t"
|
||||
"MVN r6, #0x12\n\t"
|
||||
|
@ -357,14 +383,16 @@ void fe_neg(fe r, const fe a)
|
|||
"SBCS r4, r7, r4\n\t"
|
||||
"SBC r5, r6, r5\n\t"
|
||||
"STM %[r]!, {r2, r3, r4, r5}\n\t"
|
||||
: [r] "+l" (r), [a] "+l" (a)
|
||||
: [r] "+r" (r), [a] "+r" (a)
|
||||
:
|
||||
: "memory", "r2", "r3", "r4", "r5", "r6", "r7"
|
||||
);
|
||||
}
|
||||
|
||||
int fe_isnonzero(const fe a)
|
||||
int fe_isnonzero(const fe a_p)
|
||||
{
|
||||
register const sword32* a asm ("r0") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"LDM %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
|
||||
"ADDS r1, r2, #0x13\n\t"
|
||||
|
@ -393,15 +421,17 @@ int fe_isnonzero(const fe a)
|
|||
"ORR r4, r4, r6\n\t"
|
||||
"ORR r2, r2, r8\n\t"
|
||||
"ORR %[a], r2, r4\n\t"
|
||||
: [a] "+l" (a)
|
||||
: [a] "+r" (a)
|
||||
:
|
||||
: "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
|
||||
);
|
||||
return (uint32_t)(size_t)a;
|
||||
}
|
||||
|
||||
int fe_isnegative(const fe a)
|
||||
int fe_isnegative(const fe a_p)
|
||||
{
|
||||
register const sword32* a asm ("r0") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"LDM %[a]!, {r2, r3, r4, r5}\n\t"
|
||||
"ADDS r1, r2, #0x13\n\t"
|
||||
|
@ -417,7 +447,7 @@ int fe_isnegative(const fe a)
|
|||
"AND %[a], r2, #0x1\n\t"
|
||||
"LSR r1, r1, #31\n\t"
|
||||
"EOR %[a], %[a], r1\n\t"
|
||||
: [a] "+l" (a)
|
||||
: [a] "+r" (a)
|
||||
:
|
||||
: "memory", "r1", "r2", "r3", "r4", "r5"
|
||||
);
|
||||
|
@ -425,8 +455,12 @@ int fe_isnegative(const fe a)
|
|||
}
|
||||
|
||||
#ifndef WC_NO_CACHE_RESISTANT
|
||||
void fe_cmov_table(fe* r, fe* base, signed char b)
|
||||
void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
|
||||
{
|
||||
register fe* r asm ("r0") = (fe*)r_p;
|
||||
register fe* base asm ("r1") = (fe*)base_p;
|
||||
register signed char b asm ("r2") = (signed char)b_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SXTB %[b], %[b]\n\t"
|
||||
"SBFX r3, %[b], #7, #1\n\t"
|
||||
|
@ -1391,15 +1425,19 @@ void fe_cmov_table(fe* r, fe* base, signed char b)
|
|||
"STRD r4, r5, [%[r], #24]\n\t"
|
||||
"STRD r6, r7, [%[r], #56]\n\t"
|
||||
"STRD r8, r9, [%[r], #88]\n\t"
|
||||
: [r] "+l" (r), [base] "+l" (base), [b] "+l" (b)
|
||||
: [r] "+r" (r), [base] "+r" (base), [b] "+r" (b)
|
||||
:
|
||||
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r3", "r10", "r11", "r12", "lr"
|
||||
);
|
||||
}
|
||||
|
||||
#else
|
||||
void fe_cmov_table(fe* r, fe* base, signed char b)
|
||||
void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p)
|
||||
{
|
||||
register fe* r asm ("r0") = (fe*)r_p;
|
||||
register fe* base asm ("r1") = (fe*)base_p;
|
||||
register signed char b asm ("r2") = (signed char)b_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SXTB %[b], %[b]\n\t"
|
||||
"SBFX r3, %[b], #7, #1\n\t"
|
||||
|
@ -1493,7 +1531,7 @@ void fe_cmov_table(fe* r, fe* base, signed char b)
|
|||
"AND r7, r7, lr\n\t"
|
||||
"STM %[r]!, {r4, r5, r6, r7}\n\t"
|
||||
"SUB %[base], %[base], %[b]\n\t"
|
||||
: [r] "+l" (r), [base] "+l" (base), [b] "+l" (b)
|
||||
: [r] "+r" (r), [base] "+r" (base), [b] "+r" (b)
|
||||
:
|
||||
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
|
||||
);
|
||||
|
@ -1502,7 +1540,7 @@ void fe_cmov_table(fe* r, fe* base, signed char b)
|
|||
#endif /* WC_NO_CACHE_RESISTANT */
|
||||
#endif /* HAVE_ED25519 */
|
||||
void fe_mul_op(void);
|
||||
void fe_mul_op(void)
|
||||
void fe_mul_op()
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0x2c\n\t"
|
||||
|
@ -1634,18 +1672,22 @@ void fe_mul_op(void)
|
|||
);
|
||||
}
|
||||
|
||||
void fe_mul(fe r, const fe a, const fe b)
|
||||
void fe_mul(fe r_p, const fe a_p, const fe b_p)
|
||||
{
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
register const sword32* b asm ("r2") = (const sword32*)b_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"BL fe_mul_op\n\t"
|
||||
: [r] "+l" (r), [a] "+l" (a), [b] "+l" (b)
|
||||
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
|
||||
:
|
||||
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
|
||||
);
|
||||
}
|
||||
|
||||
void fe_sq_op(void);
|
||||
void fe_sq_op(void)
|
||||
void fe_sq_op()
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0x20\n\t"
|
||||
|
@ -1763,18 +1805,25 @@ void fe_sq_op(void)
|
|||
);
|
||||
}
|
||||
|
||||
void fe_sq(fe r, const fe a)
|
||||
void fe_sq(fe r_p, const fe a_p)
|
||||
{
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"BL fe_sq_op\n\t"
|
||||
: [r] "+l" (r), [a] "+l" (a)
|
||||
: [r] "+r" (r), [a] "+r" (a)
|
||||
:
|
||||
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
|
||||
);
|
||||
}
|
||||
|
||||
void fe_mul121666(fe r, fe a)
|
||||
#ifdef HAVE_CURVE25519
|
||||
void fe_mul121666(fe r_p, fe a_p)
|
||||
{
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register sword32* a asm ("r1") = (sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
/* Multiply by 121666 */
|
||||
"LDM %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
|
||||
|
@ -1803,15 +1852,19 @@ void fe_mul121666(fe r, fe a)
|
|||
"ADCS r8, r8, #0x0\n\t"
|
||||
"ADC r9, r9, #0x0\n\t"
|
||||
"STM %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
|
||||
: [r] "+l" (r), [a] "+l" (a)
|
||||
: [r] "+r" (r), [a] "+r" (a)
|
||||
:
|
||||
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
|
||||
);
|
||||
}
|
||||
|
||||
#ifndef WC_NO_CACHE_RESISTANT
|
||||
int curve25519(byte* r, const byte* n, const byte* a)
|
||||
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
|
||||
{
|
||||
register byte* r asm ("r0") = (byte*)r_p;
|
||||
register const byte* n asm ("r1") = (const byte*)n_p;
|
||||
register const byte* a asm ("r2") = (const byte*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0xbc\n\t"
|
||||
"STR %[r], [sp, #160]\n\t"
|
||||
|
@ -1822,26 +1875,31 @@ int curve25519(byte* r, const byte* n, const byte* a)
|
|||
/* Set one */
|
||||
"MOV r10, #0x1\n\t"
|
||||
"MOV r11, #0x0\n\t"
|
||||
"STRD r10, r11, [%[r]]\n\t"
|
||||
"STM %[r]!, {r10, r11}\n\t"
|
||||
"MOV r10, #0x0\n\t"
|
||||
"STRD r10, r11, [%[r], #8]\n\t"
|
||||
"STRD r10, r11, [%[r], #16]\n\t"
|
||||
"STRD r10, r11, [%[r], #24]\n\t"
|
||||
"STM %[r]!, {r10, r11}\n\t"
|
||||
"STM %[r]!, {r10, r11}\n\t"
|
||||
"STM %[r]!, {r10, r11}\n\t"
|
||||
"SUB %[r], %[r], #0x20\n\t"
|
||||
"MOV r3, sp\n\t"
|
||||
/* Set zero */
|
||||
"MOV r10, #0x0\n\t"
|
||||
"MOV r11, #0x0\n\t"
|
||||
"STRD r10, r11, [sp]\n\t"
|
||||
"STRD r10, r11, [sp, #8]\n\t"
|
||||
"STRD r10, r11, [sp, #16]\n\t"
|
||||
"STRD r10, r11, [sp, #24]\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"SUB r3, r3, #0x20\n\t"
|
||||
"ADD r3, sp, #0x20\n\t"
|
||||
/* Set one */
|
||||
"MOV r10, #0x1\n\t"
|
||||
"MOV r11, #0x0\n\t"
|
||||
"STRD r10, r11, [sp, #32]\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"MOV r10, #0x0\n\t"
|
||||
"STRD r10, r11, [sp, #40]\n\t"
|
||||
"STRD r10, r11, [sp, #48]\n\t"
|
||||
"STRD r10, r11, [sp, #56]\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"SUB r3, r3, #0x20\n\t"
|
||||
"ADD r3, sp, #0x40\n\t"
|
||||
/* Copy */
|
||||
"LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
|
||||
|
@ -1866,8 +1924,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
|
|||
"LDR %[r], [sp, #160]\n\t"
|
||||
/* Conditional Swap */
|
||||
"RSB %[n], %[n], #0x0\n\t"
|
||||
"LDRD r4, r5, [%[r]]\n\t"
|
||||
"LDRD r6, r7, [sp, #64]\n\t"
|
||||
"MOV r3, r0\n\t"
|
||||
"ADD r12, sp, #0x40\n\t"
|
||||
"LDM r3, {r4, r5}\n\t"
|
||||
"LDM r12, {r6, r7}\n\t"
|
||||
"EOR r8, r4, r6\n\t"
|
||||
"EOR r9, r5, r7\n\t"
|
||||
"AND r8, r8, %[n]\n\t"
|
||||
|
@ -1876,10 +1936,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
|
|||
"EOR r5, r5, r9\n\t"
|
||||
"EOR r6, r6, r8\n\t"
|
||||
"EOR r7, r7, r9\n\t"
|
||||
"STRD r4, r5, [%[r]]\n\t"
|
||||
"STRD r6, r7, [sp, #64]\n\t"
|
||||
"LDRD r4, r5, [%[r], #8]\n\t"
|
||||
"LDRD r6, r7, [sp, #72]\n\t"
|
||||
"STM r3!, {r4, r5}\n\t"
|
||||
"STM r12!, {r6, r7}\n\t"
|
||||
"LDM r3, {r4, r5}\n\t"
|
||||
"LDM r12, {r6, r7}\n\t"
|
||||
"EOR r8, r4, r6\n\t"
|
||||
"EOR r9, r5, r7\n\t"
|
||||
"AND r8, r8, %[n]\n\t"
|
||||
|
@ -1888,10 +1948,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
|
|||
"EOR r5, r5, r9\n\t"
|
||||
"EOR r6, r6, r8\n\t"
|
||||
"EOR r7, r7, r9\n\t"
|
||||
"STRD r4, r5, [%[r], #8]\n\t"
|
||||
"STRD r6, r7, [sp, #72]\n\t"
|
||||
"LDRD r4, r5, [%[r], #16]\n\t"
|
||||
"LDRD r6, r7, [sp, #80]\n\t"
|
||||
"STM r3!, {r4, r5}\n\t"
|
||||
"STM r12!, {r6, r7}\n\t"
|
||||
"LDM r3, {r4, r5}\n\t"
|
||||
"LDM r12, {r6, r7}\n\t"
|
||||
"EOR r8, r4, r6\n\t"
|
||||
"EOR r9, r5, r7\n\t"
|
||||
"AND r8, r8, %[n]\n\t"
|
||||
|
@ -1900,10 +1960,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
|
|||
"EOR r5, r5, r9\n\t"
|
||||
"EOR r6, r6, r8\n\t"
|
||||
"EOR r7, r7, r9\n\t"
|
||||
"STRD r4, r5, [%[r], #16]\n\t"
|
||||
"STRD r6, r7, [sp, #80]\n\t"
|
||||
"LDRD r4, r5, [%[r], #24]\n\t"
|
||||
"LDRD r6, r7, [sp, #88]\n\t"
|
||||
"STM r3!, {r4, r5}\n\t"
|
||||
"STM r12!, {r6, r7}\n\t"
|
||||
"LDM r3, {r4, r5}\n\t"
|
||||
"LDM r12, {r6, r7}\n\t"
|
||||
"EOR r8, r4, r6\n\t"
|
||||
"EOR r9, r5, r7\n\t"
|
||||
"AND r8, r8, %[n]\n\t"
|
||||
|
@ -1912,13 +1972,15 @@ int curve25519(byte* r, const byte* n, const byte* a)
|
|||
"EOR r5, r5, r9\n\t"
|
||||
"EOR r6, r6, r8\n\t"
|
||||
"EOR r7, r7, r9\n\t"
|
||||
"STRD r4, r5, [%[r], #24]\n\t"
|
||||
"STRD r6, r7, [sp, #88]\n\t"
|
||||
"STM r3!, {r4, r5}\n\t"
|
||||
"STM r12!, {r6, r7}\n\t"
|
||||
"LDR %[n], [sp, #172]\n\t"
|
||||
/* Conditional Swap */
|
||||
"RSB %[n], %[n], #0x0\n\t"
|
||||
"LDRD r4, r5, [sp]\n\t"
|
||||
"LDRD r6, r7, [sp, #32]\n\t"
|
||||
"MOV r3, sp\n\t"
|
||||
"ADD r12, sp, #0x20\n\t"
|
||||
"LDM r3, {r4, r5}\n\t"
|
||||
"LDM r12, {r6, r7}\n\t"
|
||||
"EOR r8, r4, r6\n\t"
|
||||
"EOR r9, r5, r7\n\t"
|
||||
"AND r8, r8, %[n]\n\t"
|
||||
|
@ -1927,10 +1989,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
|
|||
"EOR r5, r5, r9\n\t"
|
||||
"EOR r6, r6, r8\n\t"
|
||||
"EOR r7, r7, r9\n\t"
|
||||
"STRD r4, r5, [sp]\n\t"
|
||||
"STRD r6, r7, [sp, #32]\n\t"
|
||||
"LDRD r4, r5, [sp, #8]\n\t"
|
||||
"LDRD r6, r7, [sp, #40]\n\t"
|
||||
"STM r3!, {r4, r5}\n\t"
|
||||
"STM r12!, {r6, r7}\n\t"
|
||||
"LDM r3, {r4, r5}\n\t"
|
||||
"LDM r12, {r6, r7}\n\t"
|
||||
"EOR r8, r4, r6\n\t"
|
||||
"EOR r9, r5, r7\n\t"
|
||||
"AND r8, r8, %[n]\n\t"
|
||||
|
@ -1939,10 +2001,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
|
|||
"EOR r5, r5, r9\n\t"
|
||||
"EOR r6, r6, r8\n\t"
|
||||
"EOR r7, r7, r9\n\t"
|
||||
"STRD r4, r5, [sp, #8]\n\t"
|
||||
"STRD r6, r7, [sp, #40]\n\t"
|
||||
"LDRD r4, r5, [sp, #16]\n\t"
|
||||
"LDRD r6, r7, [sp, #48]\n\t"
|
||||
"STM r3!, {r4, r5}\n\t"
|
||||
"STM r12!, {r6, r7}\n\t"
|
||||
"LDM r3, {r4, r5}\n\t"
|
||||
"LDM r12, {r6, r7}\n\t"
|
||||
"EOR r8, r4, r6\n\t"
|
||||
"EOR r9, r5, r7\n\t"
|
||||
"AND r8, r8, %[n]\n\t"
|
||||
|
@ -1951,10 +2013,10 @@ int curve25519(byte* r, const byte* n, const byte* a)
|
|||
"EOR r5, r5, r9\n\t"
|
||||
"EOR r6, r6, r8\n\t"
|
||||
"EOR r7, r7, r9\n\t"
|
||||
"STRD r4, r5, [sp, #16]\n\t"
|
||||
"STRD r6, r7, [sp, #48]\n\t"
|
||||
"LDRD r4, r5, [sp, #24]\n\t"
|
||||
"LDRD r6, r7, [sp, #56]\n\t"
|
||||
"STM r3!, {r4, r5}\n\t"
|
||||
"STM r12!, {r6, r7}\n\t"
|
||||
"LDM r3, {r4, r5}\n\t"
|
||||
"LDM r12, {r6, r7}\n\t"
|
||||
"EOR r8, r4, r6\n\t"
|
||||
"EOR r9, r5, r7\n\t"
|
||||
"AND r8, r8, %[n]\n\t"
|
||||
|
@ -1963,8 +2025,8 @@ int curve25519(byte* r, const byte* n, const byte* a)
|
|||
"EOR r5, r5, r9\n\t"
|
||||
"EOR r6, r6, r8\n\t"
|
||||
"EOR r7, r7, r9\n\t"
|
||||
"STRD r4, r5, [sp, #24]\n\t"
|
||||
"STRD r6, r7, [sp, #56]\n\t"
|
||||
"STM r3!, {r4, r5}\n\t"
|
||||
"STM r12!, {r6, r7}\n\t"
|
||||
"LDR %[n], [sp, #184]\n\t"
|
||||
"STR %[n], [sp, #172]\n\t"
|
||||
"MOV r3, sp\n\t"
|
||||
|
@ -2193,7 +2255,7 @@ int curve25519(byte* r, const byte* n, const byte* a)
|
|||
"BL fe_mul_op\n\t"
|
||||
"MOV r0, #0x0\n\t"
|
||||
"ADD sp, sp, #0xbc\n\t"
|
||||
: [r] "+l" (r), [n] "+l" (n), [a] "+l" (a)
|
||||
: [r] "+r" (r), [n] "+r" (n), [a] "+r" (a)
|
||||
:
|
||||
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr"
|
||||
);
|
||||
|
@ -2201,8 +2263,12 @@ int curve25519(byte* r, const byte* n, const byte* a)
|
|||
}
|
||||
|
||||
#else
|
||||
int curve25519(byte* r, const byte* n, const byte* a)
|
||||
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
|
||||
{
|
||||
register byte* r asm ("r0") = (byte*)r_p;
|
||||
register const byte* n asm ("r1") = (const byte*)n_p;
|
||||
register const byte* a asm ("r2") = (const byte*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0xc0\n\t"
|
||||
"STR %[r], [sp, #176]\n\t"
|
||||
|
@ -2218,26 +2284,31 @@ int curve25519(byte* r, const byte* n, const byte* a)
|
|||
/* Set one */
|
||||
"MOV r10, #0x1\n\t"
|
||||
"MOV r11, #0x0\n\t"
|
||||
"STRD r10, r11, [%[r]]\n\t"
|
||||
"STM %[r]!, {r10, r11}\n\t"
|
||||
"MOV r10, #0x0\n\t"
|
||||
"STRD r10, r11, [%[r], #8]\n\t"
|
||||
"STRD r10, r11, [%[r], #16]\n\t"
|
||||
"STRD r10, r11, [%[r], #24]\n\t"
|
||||
"STM %[r]!, {r10, r11}\n\t"
|
||||
"STM %[r]!, {r10, r11}\n\t"
|
||||
"STM %[r]!, {r10, r11}\n\t"
|
||||
"SUB %[r], %[r], #0x20\n\t"
|
||||
"MOV r3, sp\n\t"
|
||||
/* Set zero */
|
||||
"MOV r10, #0x0\n\t"
|
||||
"MOV r11, #0x0\n\t"
|
||||
"STRD r10, r11, [sp]\n\t"
|
||||
"STRD r10, r11, [sp, #8]\n\t"
|
||||
"STRD r10, r11, [sp, #16]\n\t"
|
||||
"STRD r10, r11, [sp, #24]\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"SUB r3, r3, #0x20\n\t"
|
||||
"ADD r3, sp, #0x20\n\t"
|
||||
/* Set one */
|
||||
"MOV r10, #0x1\n\t"
|
||||
"MOV r11, #0x0\n\t"
|
||||
"STRD r10, r11, [sp, #32]\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"MOV r10, #0x0\n\t"
|
||||
"STRD r10, r11, [sp, #40]\n\t"
|
||||
"STRD r10, r11, [sp, #48]\n\t"
|
||||
"STRD r10, r11, [sp, #56]\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"STM r3!, {r10, r11}\n\t"
|
||||
"SUB r3, r3, #0x20\n\t"
|
||||
"ADD r3, sp, #0x40\n\t"
|
||||
/* Copy */
|
||||
"LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
|
||||
|
@ -2508,7 +2579,7 @@ int curve25519(byte* r, const byte* n, const byte* a)
|
|||
"STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t"
|
||||
"MOV r0, #0x0\n\t"
|
||||
"ADD sp, sp, #0xc0\n\t"
|
||||
: [r] "+l" (r), [n] "+l" (n), [a] "+l" (a)
|
||||
: [r] "+r" (r), [n] "+r" (n), [a] "+r" (a)
|
||||
:
|
||||
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr"
|
||||
);
|
||||
|
@ -2516,9 +2587,13 @@ int curve25519(byte* r, const byte* n, const byte* a)
|
|||
}
|
||||
|
||||
#endif /* WC_NO_CACHE_RESISTANT */
|
||||
#endif /* HAVE_CURVE25519 */
|
||||
#ifdef HAVE_ED25519
|
||||
void fe_invert(fe r, const fe a)
|
||||
void fe_invert(fe r_p, const fe a_p)
|
||||
{
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0x88\n\t"
|
||||
/* Invert */
|
||||
|
@ -2678,14 +2753,17 @@ void fe_invert(fe r, const fe a)
|
|||
"LDR %[a], [sp, #132]\n\t"
|
||||
"LDR %[r], [sp, #128]\n\t"
|
||||
"ADD sp, sp, #0x88\n\t"
|
||||
: [r] "+l" (r), [a] "+l" (a)
|
||||
: [r] "+r" (r), [a] "+r" (a)
|
||||
:
|
||||
: "memory", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
|
||||
);
|
||||
}
|
||||
|
||||
void fe_sq2(fe r, const fe a)
|
||||
void fe_sq2(fe r_p, const fe a_p)
|
||||
{
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0x24\n\t"
|
||||
"STRD r0, r1, [sp, #28]\n\t"
|
||||
|
@ -2831,14 +2909,17 @@ void fe_sq2(fe r, const fe a)
|
|||
"STM r12, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t"
|
||||
"MOV r0, r12\n\t"
|
||||
"MOV r1, lr\n\t"
|
||||
: [r] "+l" (r), [a] "+l" (a)
|
||||
: [r] "+r" (r), [a] "+r" (a)
|
||||
:
|
||||
: "memory", "lr"
|
||||
);
|
||||
}
|
||||
|
||||
void fe_pow22523(fe r, const fe a)
|
||||
void fe_pow22523(fe r_p, const fe a_p)
|
||||
{
|
||||
register sword32* r asm ("r0") = (sword32*)r_p;
|
||||
register const sword32* a asm ("r1") = (const sword32*)a_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0x68\n\t"
|
||||
/* pow22523 */
|
||||
|
@ -2998,14 +3079,17 @@ void fe_pow22523(fe r, const fe a)
|
|||
"LDR %[a], [sp, #100]\n\t"
|
||||
"LDR %[r], [sp, #96]\n\t"
|
||||
"ADD sp, sp, #0x68\n\t"
|
||||
: [r] "+l" (r), [a] "+l" (a)
|
||||
: [r] "+r" (r), [a] "+r" (a)
|
||||
:
|
||||
: "memory", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
|
||||
);
|
||||
}
|
||||
|
||||
void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p)
|
||||
void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p)
|
||||
{
|
||||
register ge_p2 * r asm ("r0") = (ge_p2 *)r_p;
|
||||
register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0x8\n\t"
|
||||
"STR %[r], [sp]\n\t"
|
||||
|
@ -3025,14 +3109,17 @@ void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p)
|
|||
"ADD r0, r0, #0x40\n\t"
|
||||
"BL fe_mul_op\n\t"
|
||||
"ADD sp, sp, #0x8\n\t"
|
||||
: [r] "+l" (r), [p] "+l" (p)
|
||||
: [r] "+r" (r), [p] "+r" (p)
|
||||
:
|
||||
: "memory", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
|
||||
);
|
||||
}
|
||||
|
||||
void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p)
|
||||
void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p)
|
||||
{
|
||||
register ge_p3 * r asm ("r0") = (ge_p3 *)r_p;
|
||||
register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0x8\n\t"
|
||||
"STR %[r], [sp]\n\t"
|
||||
|
@ -3057,14 +3144,17 @@ void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p)
|
|||
"ADD r0, r0, #0x60\n\t"
|
||||
"BL fe_mul_op\n\t"
|
||||
"ADD sp, sp, #0x8\n\t"
|
||||
: [r] "+l" (r), [p] "+l" (p)
|
||||
: [r] "+r" (r), [p] "+r" (p)
|
||||
:
|
||||
: "memory", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
|
||||
);
|
||||
}
|
||||
|
||||
void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p)
|
||||
void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p)
|
||||
{
|
||||
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
|
||||
register const ge_p2 * p asm ("r1") = (const ge_p2 *)p_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0x8\n\t"
|
||||
"STR %[r], [sp]\n\t"
|
||||
|
@ -3101,14 +3191,18 @@ void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p)
|
|||
"MOV r1, r0\n\t"
|
||||
"BL fe_sub_op\n\t"
|
||||
"ADD sp, sp, #0x8\n\t"
|
||||
: [r] "+l" (r), [p] "+l" (p)
|
||||
: [r] "+r" (r), [p] "+r" (p)
|
||||
:
|
||||
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
|
||||
);
|
||||
}
|
||||
|
||||
void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
|
||||
void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
|
||||
{
|
||||
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
|
||||
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
|
||||
register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0xc\n\t"
|
||||
"STR %[r], [sp]\n\t"
|
||||
|
@ -3179,14 +3273,18 @@ void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
|
|||
"ADD r1, r0, #0x20\n\t"
|
||||
"BL fe_add_sub_op\n\t"
|
||||
"ADD sp, sp, #0xc\n\t"
|
||||
: [r] "+l" (r), [p] "+l" (p), [q] "+l" (q)
|
||||
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
|
||||
:
|
||||
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
|
||||
);
|
||||
}
|
||||
|
||||
void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
|
||||
void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p)
|
||||
{
|
||||
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
|
||||
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
|
||||
register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0xc\n\t"
|
||||
"STR %[r], [sp]\n\t"
|
||||
|
@ -3258,14 +3356,18 @@ void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q)
|
|||
"ADD r0, r0, #0x20\n\t"
|
||||
"BL fe_add_sub_op\n\t"
|
||||
"ADD sp, sp, #0xc\n\t"
|
||||
: [r] "+l" (r), [p] "+l" (p), [q] "+l" (q)
|
||||
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
|
||||
:
|
||||
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
|
||||
);
|
||||
}
|
||||
|
||||
void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
|
||||
void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
|
||||
{
|
||||
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
|
||||
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
|
||||
register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0x2c\n\t"
|
||||
"STR %[r], [sp]\n\t"
|
||||
|
@ -3337,14 +3439,18 @@ void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
|
|||
"ADD r0, r0, #0x20\n\t"
|
||||
"BL fe_add_sub_op\n\t"
|
||||
"ADD sp, sp, #0x2c\n\t"
|
||||
: [r] "+l" (r), [p] "+l" (p), [q] "+l" (q)
|
||||
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
|
||||
:
|
||||
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
|
||||
);
|
||||
}
|
||||
|
||||
void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
|
||||
void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p)
|
||||
{
|
||||
register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p;
|
||||
register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p;
|
||||
register const ge_cached* q asm ("r2") = (const ge_cached*)q_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0x2c\n\t"
|
||||
"STR %[r], [sp]\n\t"
|
||||
|
@ -3416,14 +3522,16 @@ void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
|
|||
"ADD r0, r0, #0x40\n\t"
|
||||
"BL fe_add_sub_op\n\t"
|
||||
"ADD sp, sp, #0x2c\n\t"
|
||||
: [r] "+l" (r), [p] "+l" (p), [q] "+l" (q)
|
||||
: [r] "+r" (r), [p] "+r" (p), [q] "+r" (q)
|
||||
:
|
||||
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
|
||||
);
|
||||
}
|
||||
|
||||
void sc_reduce(byte* s)
|
||||
void sc_reduce(byte* s_p)
|
||||
{
|
||||
register byte* s asm ("r0") = (byte*)s_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0x34\n\t"
|
||||
/* Load bits 252-511 */
|
||||
|
@ -3694,14 +3802,19 @@ void sc_reduce(byte* s)
|
|||
/* Store result */
|
||||
"STM %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t"
|
||||
"ADD sp, sp, #0x34\n\t"
|
||||
: [s] "+l" (s)
|
||||
: [s] "+r" (s)
|
||||
:
|
||||
: "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
|
||||
);
|
||||
}
|
||||
|
||||
void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
|
||||
void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
|
||||
{
|
||||
register byte* s asm ("r0") = (byte*)s_p;
|
||||
register const byte* a asm ("r1") = (const byte*)a_p;
|
||||
register const byte* b asm ("r2") = (const byte*)b_p;
|
||||
register const byte* c asm ("r3") = (const byte*)c_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0x50\n\t"
|
||||
"ADD lr, sp, #0x44\n\t"
|
||||
|
@ -4096,7 +4209,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
|
|||
/* Store result */
|
||||
"STM %[s], {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t"
|
||||
"ADD sp, sp, #0x50\n\t"
|
||||
: [s] "+l" (s), [a] "+l" (a), [b] "+l" (b), [c] "+l" (c)
|
||||
: [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c)
|
||||
:
|
||||
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr"
|
||||
);
|
||||
|
|
|
@ -42,75 +42,32 @@
|
|||
|
||||
#ifdef WOLFSSL_ARMASM_NO_NEON
|
||||
static const uint32_t L_SHA256_transform_len_k[] = {
|
||||
0x428a2f98,
|
||||
0x71374491,
|
||||
0xb5c0fbcf,
|
||||
0xe9b5dba5,
|
||||
0x3956c25b,
|
||||
0x59f111f1,
|
||||
0x923f82a4,
|
||||
0xab1c5ed5,
|
||||
0xd807aa98,
|
||||
0x12835b01,
|
||||
0x243185be,
|
||||
0x550c7dc3,
|
||||
0x72be5d74,
|
||||
0x80deb1fe,
|
||||
0x9bdc06a7,
|
||||
0xc19bf174,
|
||||
0xe49b69c1,
|
||||
0xefbe4786,
|
||||
0xfc19dc6,
|
||||
0x240ca1cc,
|
||||
0x2de92c6f,
|
||||
0x4a7484aa,
|
||||
0x5cb0a9dc,
|
||||
0x76f988da,
|
||||
0x983e5152,
|
||||
0xa831c66d,
|
||||
0xb00327c8,
|
||||
0xbf597fc7,
|
||||
0xc6e00bf3,
|
||||
0xd5a79147,
|
||||
0x6ca6351,
|
||||
0x14292967,
|
||||
0x27b70a85,
|
||||
0x2e1b2138,
|
||||
0x4d2c6dfc,
|
||||
0x53380d13,
|
||||
0x650a7354,
|
||||
0x766a0abb,
|
||||
0x81c2c92e,
|
||||
0x92722c85,
|
||||
0xa2bfe8a1,
|
||||
0xa81a664b,
|
||||
0xc24b8b70,
|
||||
0xc76c51a3,
|
||||
0xd192e819,
|
||||
0xd6990624,
|
||||
0xf40e3585,
|
||||
0x106aa070,
|
||||
0x19a4c116,
|
||||
0x1e376c08,
|
||||
0x2748774c,
|
||||
0x34b0bcb5,
|
||||
0x391c0cb3,
|
||||
0x4ed8aa4a,
|
||||
0x5b9cca4f,
|
||||
0x682e6ff3,
|
||||
0x748f82ee,
|
||||
0x78a5636f,
|
||||
0x84c87814,
|
||||
0x8cc70208,
|
||||
0x90befffa,
|
||||
0xa4506ceb,
|
||||
0xbef9a3f7,
|
||||
0xc67178f2,
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
|
||||
};
|
||||
|
||||
void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len);
|
||||
void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len)
|
||||
void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
|
||||
{
|
||||
register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p;
|
||||
register const byte* data asm ("r1") = (const byte*)data_p;
|
||||
register word32 len asm ("r2") = (word32)len_p;
|
||||
register uint32_t* L_SHA256_transform_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_len_k;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0xc0\n\t"
|
||||
"MOV r3, %[L_SHA256_transform_len_k]\n\t"
|
||||
|
@ -1463,9 +1420,9 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len)
|
|||
"ADD %[data], %[data], #0x40\n\t"
|
||||
"BNE L_SHA256_transform_len_begin_%=\n\t"
|
||||
"ADD sp, sp, #0xc0\n\t"
|
||||
: [sha256] "+l" (sha256), [data] "+l" (data), [len] "+l" (len)
|
||||
: [L_SHA256_transform_len_k] "r" (L_SHA256_transform_len_k)
|
||||
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
|
||||
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c)
|
||||
:
|
||||
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -42,91 +42,56 @@
|
|||
|
||||
#ifdef WOLFSSL_ARMASM_NO_NEON
|
||||
static const uint64_t L_SHA512_transform_len_k[] = {
|
||||
0x428a2f98d728ae22UL,
|
||||
0x7137449123ef65cdUL,
|
||||
0xb5c0fbcfec4d3b2fUL,
|
||||
0xe9b5dba58189dbbcUL,
|
||||
0x3956c25bf348b538UL,
|
||||
0x59f111f1b605d019UL,
|
||||
0x923f82a4af194f9bUL,
|
||||
0xab1c5ed5da6d8118UL,
|
||||
0xd807aa98a3030242UL,
|
||||
0x12835b0145706fbeUL,
|
||||
0x243185be4ee4b28cUL,
|
||||
0x550c7dc3d5ffb4e2UL,
|
||||
0x72be5d74f27b896fUL,
|
||||
0x80deb1fe3b1696b1UL,
|
||||
0x9bdc06a725c71235UL,
|
||||
0xc19bf174cf692694UL,
|
||||
0xe49b69c19ef14ad2UL,
|
||||
0xefbe4786384f25e3UL,
|
||||
0xfc19dc68b8cd5b5UL,
|
||||
0x240ca1cc77ac9c65UL,
|
||||
0x2de92c6f592b0275UL,
|
||||
0x4a7484aa6ea6e483UL,
|
||||
0x5cb0a9dcbd41fbd4UL,
|
||||
0x76f988da831153b5UL,
|
||||
0x983e5152ee66dfabUL,
|
||||
0xa831c66d2db43210UL,
|
||||
0xb00327c898fb213fUL,
|
||||
0xbf597fc7beef0ee4UL,
|
||||
0xc6e00bf33da88fc2UL,
|
||||
0xd5a79147930aa725UL,
|
||||
0x6ca6351e003826fUL,
|
||||
0x142929670a0e6e70UL,
|
||||
0x27b70a8546d22ffcUL,
|
||||
0x2e1b21385c26c926UL,
|
||||
0x4d2c6dfc5ac42aedUL,
|
||||
0x53380d139d95b3dfUL,
|
||||
0x650a73548baf63deUL,
|
||||
0x766a0abb3c77b2a8UL,
|
||||
0x81c2c92e47edaee6UL,
|
||||
0x92722c851482353bUL,
|
||||
0xa2bfe8a14cf10364UL,
|
||||
0xa81a664bbc423001UL,
|
||||
0xc24b8b70d0f89791UL,
|
||||
0xc76c51a30654be30UL,
|
||||
0xd192e819d6ef5218UL,
|
||||
0xd69906245565a910UL,
|
||||
0xf40e35855771202aUL,
|
||||
0x106aa07032bbd1b8UL,
|
||||
0x19a4c116b8d2d0c8UL,
|
||||
0x1e376c085141ab53UL,
|
||||
0x2748774cdf8eeb99UL,
|
||||
0x34b0bcb5e19b48a8UL,
|
||||
0x391c0cb3c5c95a63UL,
|
||||
0x4ed8aa4ae3418acbUL,
|
||||
0x5b9cca4f7763e373UL,
|
||||
0x682e6ff3d6b2b8a3UL,
|
||||
0x748f82ee5defb2fcUL,
|
||||
0x78a5636f43172f60UL,
|
||||
0x84c87814a1f0ab72UL,
|
||||
0x8cc702081a6439ecUL,
|
||||
0x90befffa23631e28UL,
|
||||
0xa4506cebde82bde9UL,
|
||||
0xbef9a3f7b2c67915UL,
|
||||
0xc67178f2e372532bUL,
|
||||
0xca273eceea26619cUL,
|
||||
0xd186b8c721c0c207UL,
|
||||
0xeada7dd6cde0eb1eUL,
|
||||
0xf57d4f7fee6ed178UL,
|
||||
0x6f067aa72176fbaUL,
|
||||
0xa637dc5a2c898a6UL,
|
||||
0x113f9804bef90daeUL,
|
||||
0x1b710b35131c471bUL,
|
||||
0x28db77f523047d84UL,
|
||||
0x32caab7b40c72493UL,
|
||||
0x3c9ebe0a15c9bebcUL,
|
||||
0x431d67c49c100d4cUL,
|
||||
0x4cc5d4becb3e42b6UL,
|
||||
0x597f299cfc657e2aUL,
|
||||
0x5fcb6fab3ad6faecUL,
|
||||
0x6c44198c4a475817UL,
|
||||
0x428a2f98d728ae22UL, 0x7137449123ef65cdUL,
|
||||
0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL,
|
||||
0x3956c25bf348b538UL, 0x59f111f1b605d019UL,
|
||||
0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL,
|
||||
0xd807aa98a3030242UL, 0x12835b0145706fbeUL,
|
||||
0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL,
|
||||
0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL,
|
||||
0x9bdc06a725c71235UL, 0xc19bf174cf692694UL,
|
||||
0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL,
|
||||
0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL,
|
||||
0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL,
|
||||
0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL,
|
||||
0x983e5152ee66dfabUL, 0xa831c66d2db43210UL,
|
||||
0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL,
|
||||
0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL,
|
||||
0x06ca6351e003826fUL, 0x142929670a0e6e70UL,
|
||||
0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL,
|
||||
0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL,
|
||||
0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL,
|
||||
0x81c2c92e47edaee6UL, 0x92722c851482353bUL,
|
||||
0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL,
|
||||
0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL,
|
||||
0xd192e819d6ef5218UL, 0xd69906245565a910UL,
|
||||
0xf40e35855771202aUL, 0x106aa07032bbd1b8UL,
|
||||
0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL,
|
||||
0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL,
|
||||
0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL,
|
||||
0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL,
|
||||
0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL,
|
||||
0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL,
|
||||
0x90befffa23631e28UL, 0xa4506cebde82bde9UL,
|
||||
0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL,
|
||||
0xca273eceea26619cUL, 0xd186b8c721c0c207UL,
|
||||
0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL,
|
||||
0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL,
|
||||
0x113f9804bef90daeUL, 0x1b710b35131c471bUL,
|
||||
0x28db77f523047d84UL, 0x32caab7b40c72493UL,
|
||||
0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL,
|
||||
0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL,
|
||||
0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL,
|
||||
};
|
||||
|
||||
void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len);
|
||||
void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
|
||||
void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
|
||||
{
|
||||
register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p;
|
||||
register const byte* data asm ("r1") = (const byte*)data_p;
|
||||
register word32 len asm ("r2") = (word32)len_p;
|
||||
register uint64_t* L_SHA512_transform_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_len_k;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"SUB sp, sp, #0xc0\n\t"
|
||||
"MOV r3, %[L_SHA512_transform_len_k]\n\t"
|
||||
|
@ -3578,9 +3543,9 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
|
|||
"BNE L_SHA512_transform_len_begin_%=\n\t"
|
||||
"EOR r0, r0, r0\n\t"
|
||||
"ADD sp, sp, #0xc0\n\t"
|
||||
: [sha512] "+l" (sha512), [data] "+l" (data), [len] "+l" (len)
|
||||
: [L_SHA512_transform_len_k] "r" (L_SHA512_transform_len_k)
|
||||
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
|
||||
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c)
|
||||
:
|
||||
: "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -56,7 +56,7 @@ typedef struct Gcm {
|
|||
} Gcm;
|
||||
|
||||
WOLFSSL_LOCAL void GenerateM0(Gcm* gcm);
|
||||
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON)
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
WOLFSSL_LOCAL void GMULT(byte* X, byte* Y);
|
||||
#endif
|
||||
WOLFSSL_LOCAL void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c,
|
||||
|
|
|
@ -115,7 +115,7 @@ typedef struct {
|
|||
void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p);
|
||||
void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p);
|
||||
void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p);
|
||||
#define ge_p3_dbl(r, p) ge_p2_dbl((ge_p1p1 *)r, (ge_p2 *)p)
|
||||
#define ge_p3_dbl(r, p) ge_p2_dbl((ge_p1p1 *)(r), (ge_p2 *)(p))
|
||||
void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
|
||||
void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
|
||||
void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
|
||||
|
|
Loading…
Reference in New Issue