mirror of https://github.com/wolfSSL/wolfssl.git
ARM32 ChaCha20, Poly1305: assembly code
Add assembly code for ChaCha20 and Poly1305 on ARM32 when no NEON available.pull/8020/head
parent
e26ac5e122
commit
2323a5cf59
|
@ -924,8 +924,10 @@ if BUILD_ARMASM
|
|||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-poly1305.c
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-poly1305.c
|
||||
if BUILD_ARMASM_INLINE
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c
|
||||
else
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-poly1305-asm.S
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-poly1305-asm.S
|
||||
endif !BUILD_ARMASM_INLINE
|
||||
endif
|
||||
|
@ -999,17 +1001,17 @@ endif
|
|||
|
||||
if BUILD_CHACHA
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha.c
|
||||
if BUILD_ARMASM_NEON
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-chacha.c
|
||||
else
|
||||
if BUILD_ARMASM
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-chacha.c
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-chacha.c
|
||||
if BUILD_ARMASM_INLINE
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c
|
||||
else
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-chacha-asm.S
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-chacha-asm.S
|
||||
endif !BUILD_ARMASM_INLINE
|
||||
endif BUILD_ARMASM
|
||||
else
|
||||
if BUILD_RISCV_ASM
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-chacha.c
|
||||
endif BUILD_RISCV_ASM
|
||||
|
@ -1018,7 +1020,7 @@ if BUILD_INTELASM
|
|||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha_asm.S
|
||||
endif BUILD_INTELASM
|
||||
endif !BUILD_X86_ASM
|
||||
endif !BUILD_ARMASM_NEON
|
||||
endif !BUILD_ARMASM
|
||||
if BUILD_POLY1305
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha20_poly1305.c
|
||||
endif BUILD_POLY1305
|
||||
|
|
|
@ -72,8 +72,7 @@ Public domain.
|
|||
#endif /* HAVE_CHACHA */
|
||||
|
||||
|
||||
#if defined(WOLFSSL_ARMASM) && (!defined(WOLFSSL_ARMASM_NO_NEON) || \
|
||||
defined(__thumb__))
|
||||
#if defined(WOLFSSL_ARMASM)
|
||||
/* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */
|
||||
|
||||
#elif defined(WOLFSSL_RISCV_ASM)
|
||||
|
|
|
@ -232,7 +232,7 @@ extern void poly1305_final_avx2(Poly1305* ctx, byte* mac);
|
|||
}
|
||||
#endif/* !WOLFSSL_ARMASM && !WOLFSSL_RISCV_ASM */
|
||||
/* if not 64 bit then use 32 bit */
|
||||
#elif !defined(WOLFSSL_ARMASM) || !defined(__thumb__)
|
||||
#elif !defined(WOLFSSL_ARMASM)
|
||||
|
||||
static word32 U8TO32(const byte *p)
|
||||
{
|
||||
|
@ -269,8 +269,7 @@ static WC_INLINE void u32tole64(const word32 inLe32, byte outLe64[8])
|
|||
}
|
||||
|
||||
|
||||
#if (!defined(WOLFSSL_ARMASM) || (!defined(__aarch64__) && \
|
||||
!defined(__thumb__))) && !defined(WOLFSSL_RISCV_ASM)
|
||||
#if !defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_RISCV_ASM)
|
||||
/*
|
||||
This local function operates on a message with a given number of bytes
|
||||
with a given ctx pointer to a Poly1305 structure.
|
||||
|
@ -789,8 +788,7 @@ int wc_Poly1305Final(Poly1305* ctx, byte* mac)
|
|||
|
||||
return 0;
|
||||
}
|
||||
#endif /* (!WOLFSSL_ARMASM || (!__aarch64__ && !__thumb__)) &&
|
||||
* !WOLFSSL_RISCV_ASM */
|
||||
#endif /* !WOLFSSL_ARMASM && !WOLFSSL_RISCV_ASM */
|
||||
|
||||
|
||||
int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes)
|
||||
|
@ -885,8 +883,7 @@ int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes)
|
|||
/* process full blocks */
|
||||
if (bytes >= POLY1305_BLOCK_SIZE) {
|
||||
size_t want = ((size_t)bytes & ~((size_t)POLY1305_BLOCK_SIZE - 1));
|
||||
#if (!defined(WOLFSSL_ARMASM) || (!defined(__aarch64__) && \
|
||||
!defined(__thumb__))) && !defined(WOLFSSL_RISCV_ASM)
|
||||
#if !defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_RISCV_ASM)
|
||||
int ret;
|
||||
ret = poly1305_blocks(ctx, m, want);
|
||||
if (ret != 0)
|
||||
|
|
|
@ -411,7 +411,7 @@ void AES_invert_key(unsigned char* ks_p, word32 rounds_p)
|
|||
static const uint32_t L_AES_ARM32_rcon[] = {
|
||||
0x01000000, 0x02000000, 0x04000000, 0x08000000,
|
||||
0x10000000, 0x20000000, 0x40000000, 0x80000000,
|
||||
0x1b000000, 0x36000000,
|
||||
0x1b000000, 0x36000000,
|
||||
};
|
||||
|
||||
void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks);
|
||||
|
|
|
@ -0,0 +1,522 @@
|
|||
/* armv8-32-chacha-asm
|
||||
*
|
||||
* Copyright (C) 2006-2024 wolfSSL Inc.
|
||||
*
|
||||
* This file is part of wolfSSL.
|
||||
*
|
||||
* wolfSSL is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* wolfSSL is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
|
||||
*/
|
||||
|
||||
/* Generated using (from wolfssl):
|
||||
* cd ../scripts
|
||||
* ruby ./chacha/chacha.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-chacha-asm.S
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__)
|
||||
#ifndef WOLFSSL_ARMASM_INLINE
|
||||
#ifdef HAVE_CHACHA
|
||||
.text
|
||||
.align 4
|
||||
.globl wc_chacha_setiv
|
||||
.type wc_chacha_setiv, %function
|
||||
wc_chacha_setiv:
|
||||
push {r4, lr}
|
||||
add r3, r0, #52
|
||||
ldr r4, [r1]
|
||||
ldr r12, [r1, #4]
|
||||
ldr lr, [r1, #8]
|
||||
str r2, [r0, #48]
|
||||
#ifdef BIG_ENDIAN_ORDER
|
||||
rev r4, r4
|
||||
rev r12, r12
|
||||
rev lr, lr
|
||||
#endif /* BIG_ENDIAN_ORDER */
|
||||
stm r3, {r4, r12, lr}
|
||||
pop {r4, pc}
|
||||
.size wc_chacha_setiv,.-wc_chacha_setiv
|
||||
.text
|
||||
.type L_chacha_arm32_constants, %object
|
||||
.size L_chacha_arm32_constants, 32
|
||||
.align 4
|
||||
L_chacha_arm32_constants:
|
||||
.word 0x61707865
|
||||
.word 0x3120646e
|
||||
.word 0x79622d36
|
||||
.word 0x6b206574
|
||||
.word 0x61707865
|
||||
.word 0x3320646e
|
||||
.word 0x79622d32
|
||||
.word 0x6b206574
|
||||
.text
|
||||
.align 4
|
||||
.globl wc_chacha_setkey
|
||||
.type wc_chacha_setkey, %function
|
||||
wc_chacha_setkey:
|
||||
push {r4, r5, lr}
|
||||
adr r3, L_chacha_arm32_constants
|
||||
subs r2, r2, #16
|
||||
add r3, r3, r2
|
||||
# Start state with constants
|
||||
ldm r3, {r4, r5, r12, lr}
|
||||
stm r0!, {r4, r5, r12, lr}
|
||||
# Next is first 16 bytes of key.
|
||||
ldr r4, [r1]
|
||||
ldr r5, [r1, #4]
|
||||
ldr r12, [r1, #8]
|
||||
ldr lr, [r1, #12]
|
||||
#ifdef BIG_ENDIAN_ORDER
|
||||
rev r4, r4
|
||||
rev r5, r5
|
||||
rev r12, r12
|
||||
rev lr, lr
|
||||
#endif /* BIG_ENDIAN_ORDER */
|
||||
stm r0!, {r4, r5, r12, lr}
|
||||
# Next 16 bytes of key.
|
||||
beq L_chacha_arm32_setkey_same_keyb_ytes
|
||||
# Update key pointer for next 16 bytes.
|
||||
add r1, r1, r2
|
||||
ldr r4, [r1]
|
||||
ldr r5, [r1, #4]
|
||||
ldr r12, [r1, #8]
|
||||
ldr lr, [r1, #12]
|
||||
L_chacha_arm32_setkey_same_keyb_ytes:
|
||||
stm r0, {r4, r5, r12, lr}
|
||||
pop {r4, r5, pc}
|
||||
.size wc_chacha_setkey,.-wc_chacha_setkey
|
||||
#ifdef WOLFSSL_ARMASM_NO_NEON
|
||||
.text
|
||||
.align 4
|
||||
.globl wc_chacha_crypt_bytes
|
||||
.type wc_chacha_crypt_bytes, %function
|
||||
wc_chacha_crypt_bytes:
|
||||
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
sub sp, sp, #52
|
||||
mov lr, r0
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
|
||||
str r0, [sp, #32]
|
||||
str r1, [sp, #36]
|
||||
#else
|
||||
strd r0, r1, [sp, #32]
|
||||
#endif
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
|
||||
str r2, [sp, #40]
|
||||
str r3, [sp, #44]
|
||||
#else
|
||||
strd r2, r3, [sp, #40]
|
||||
#endif
|
||||
L_chacha_arm32_crypt_block:
|
||||
# Put x[12]..x[15] onto stack.
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
|
||||
ldr r4, [lr, #48]
|
||||
ldr r5, [lr, #52]
|
||||
#else
|
||||
ldrd r4, r5, [lr, #48]
|
||||
#endif
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
|
||||
ldr r6, [lr, #56]
|
||||
ldr r7, [lr, #60]
|
||||
#else
|
||||
ldrd r6, r7, [lr, #56]
|
||||
#endif
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
|
||||
str r4, [sp, #16]
|
||||
str r5, [sp, #20]
|
||||
#else
|
||||
strd r4, r5, [sp, #16]
|
||||
#endif
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
|
||||
str r6, [sp, #24]
|
||||
str r7, [sp, #28]
|
||||
#else
|
||||
strd r6, r7, [sp, #24]
|
||||
#endif
|
||||
# Load x[0]..x[12] into registers.
|
||||
ldm lr, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12}
|
||||
# 10x 2 full rounds to perform.
|
||||
mov lr, #10
|
||||
str lr, [sp, #48]
|
||||
L_chacha_arm32_crypt_loop:
|
||||
# 0, 4, 8, 12
|
||||
# 1, 5, 9, 13
|
||||
ldr lr, [sp, #20]
|
||||
add r0, r0, r4
|
||||
add r1, r1, r5
|
||||
eor r12, r12, r0
|
||||
eor lr, lr, r1
|
||||
ror r12, r12, #16
|
||||
ror lr, lr, #16
|
||||
add r8, r8, r12
|
||||
add r9, r9, lr
|
||||
eor r4, r4, r8
|
||||
eor r5, r5, r9
|
||||
ror r4, r4, #20
|
||||
ror r5, r5, #20
|
||||
add r0, r0, r4
|
||||
add r1, r1, r5
|
||||
eor r12, r12, r0
|
||||
eor lr, lr, r1
|
||||
ror r12, r12, #24
|
||||
ror lr, lr, #24
|
||||
add r8, r8, r12
|
||||
add r9, r9, lr
|
||||
eor r4, r4, r8
|
||||
eor r5, r5, r9
|
||||
ror r4, r4, #25
|
||||
ror r5, r5, #25
|
||||
str r12, [sp, #16]
|
||||
str lr, [sp, #20]
|
||||
# 2, 6, 10, 14
|
||||
# 3, 7, 11, 15
|
||||
ldr r12, [sp, #24]
|
||||
ldr lr, [sp, #28]
|
||||
add r2, r2, r6
|
||||
add r3, r3, r7
|
||||
eor r12, r12, r2
|
||||
eor lr, lr, r3
|
||||
ror r12, r12, #16
|
||||
ror lr, lr, #16
|
||||
add r10, r10, r12
|
||||
add r11, r11, lr
|
||||
eor r6, r6, r10
|
||||
eor r7, r7, r11
|
||||
ror r6, r6, #20
|
||||
ror r7, r7, #20
|
||||
add r2, r2, r6
|
||||
add r3, r3, r7
|
||||
eor r12, r12, r2
|
||||
eor lr, lr, r3
|
||||
ror r12, r12, #24
|
||||
ror lr, lr, #24
|
||||
add r10, r10, r12
|
||||
add r11, r11, lr
|
||||
eor r6, r6, r10
|
||||
eor r7, r7, r11
|
||||
ror r6, r6, #25
|
||||
ror r7, r7, #25
|
||||
# 3, 4, 9, 14
|
||||
# 0, 5, 10, 15
|
||||
add r3, r3, r4
|
||||
add r0, r0, r5
|
||||
eor r12, r12, r3
|
||||
eor lr, lr, r0
|
||||
ror r12, r12, #16
|
||||
ror lr, lr, #16
|
||||
add r9, r9, r12
|
||||
add r10, r10, lr
|
||||
eor r4, r4, r9
|
||||
eor r5, r5, r10
|
||||
ror r4, r4, #20
|
||||
ror r5, r5, #20
|
||||
add r3, r3, r4
|
||||
add r0, r0, r5
|
||||
eor r12, r12, r3
|
||||
eor lr, lr, r0
|
||||
ror r12, r12, #24
|
||||
ror lr, lr, #24
|
||||
add r9, r9, r12
|
||||
add r10, r10, lr
|
||||
eor r4, r4, r9
|
||||
eor r5, r5, r10
|
||||
ror r4, r4, #25
|
||||
ror r5, r5, #25
|
||||
str r12, [sp, #24]
|
||||
str lr, [sp, #28]
|
||||
ldr r12, [sp, #16]
|
||||
ldr lr, [sp, #20]
|
||||
# 1, 6, 11, 12
|
||||
# 2, 7, 8, 13
|
||||
add r1, r1, r6
|
||||
add r2, r2, r7
|
||||
eor r12, r12, r1
|
||||
eor lr, lr, r2
|
||||
ror r12, r12, #16
|
||||
ror lr, lr, #16
|
||||
add r11, r11, r12
|
||||
add r8, r8, lr
|
||||
eor r6, r6, r11
|
||||
eor r7, r7, r8
|
||||
ror r6, r6, #20
|
||||
ror r7, r7, #20
|
||||
add r1, r1, r6
|
||||
add r2, r2, r7
|
||||
eor r12, r12, r1
|
||||
eor lr, lr, r2
|
||||
ror r12, r12, #24
|
||||
ror lr, lr, #24
|
||||
add r11, r11, r12
|
||||
add r8, r8, lr
|
||||
eor r6, r6, r11
|
||||
eor r7, r7, r8
|
||||
ror r6, r6, #25
|
||||
ror r7, r7, #25
|
||||
str lr, [sp, #20]
|
||||
# Check if we have done enough rounds.
|
||||
ldr lr, [sp, #48]
|
||||
subs lr, lr, #1
|
||||
str lr, [sp, #48]
|
||||
bgt L_chacha_arm32_crypt_loop
|
||||
stm sp, {r8, r9, r10, r11, r12}
|
||||
ldr lr, [sp, #32]
|
||||
mov r12, sp
|
||||
# Add in original state
|
||||
ldm lr!, {r8, r9, r10, r11}
|
||||
add r0, r0, r8
|
||||
add r1, r1, r9
|
||||
add r2, r2, r10
|
||||
add r3, r3, r11
|
||||
ldm lr!, {r8, r9, r10, r11}
|
||||
add r4, r4, r8
|
||||
add r5, r5, r9
|
||||
add r6, r6, r10
|
||||
add r7, r7, r11
|
||||
ldm r12, {r8, r9}
|
||||
ldm lr!, {r10, r11}
|
||||
add r8, r8, r10
|
||||
add r9, r9, r11
|
||||
stm r12!, {r8, r9}
|
||||
ldm r12, {r8, r9}
|
||||
ldm lr!, {r10, r11}
|
||||
add r8, r8, r10
|
||||
add r9, r9, r11
|
||||
stm r12!, {r8, r9}
|
||||
ldm r12, {r8, r9}
|
||||
ldm lr!, {r10, r11}
|
||||
add r8, r8, r10
|
||||
add r9, r9, r11
|
||||
add r10, r10, #1
|
||||
stm r12!, {r8, r9}
|
||||
str r10, [lr, #-8]
|
||||
ldm r12, {r8, r9}
|
||||
ldm lr, {r10, r11}
|
||||
add r8, r8, r10
|
||||
add r9, r9, r11
|
||||
stm r12, {r8, r9}
|
||||
ldr r12, [sp, #44]
|
||||
cmp r12, #0x40
|
||||
blt L_chacha_arm32_crypt_lt_block
|
||||
ldr r12, [sp, #40]
|
||||
ldr lr, [sp, #36]
|
||||
# XOR state into 64 bytes.
|
||||
ldr r8, [r12]
|
||||
ldr r9, [r12, #4]
|
||||
ldr r10, [r12, #8]
|
||||
ldr r11, [r12, #12]
|
||||
eor r0, r0, r8
|
||||
eor r1, r1, r9
|
||||
eor r2, r2, r10
|
||||
eor r3, r3, r11
|
||||
str r0, [lr]
|
||||
str r1, [lr, #4]
|
||||
str r2, [lr, #8]
|
||||
str r3, [lr, #12]
|
||||
ldr r8, [r12, #16]
|
||||
ldr r9, [r12, #20]
|
||||
ldr r10, [r12, #24]
|
||||
ldr r11, [r12, #28]
|
||||
eor r4, r4, r8
|
||||
eor r5, r5, r9
|
||||
eor r6, r6, r10
|
||||
eor r7, r7, r11
|
||||
str r4, [lr, #16]
|
||||
str r5, [lr, #20]
|
||||
str r6, [lr, #24]
|
||||
str r7, [lr, #28]
|
||||
ldr r4, [sp]
|
||||
ldr r5, [sp, #4]
|
||||
ldr r6, [sp, #8]
|
||||
ldr r7, [sp, #12]
|
||||
ldr r8, [r12, #32]
|
||||
ldr r9, [r12, #36]
|
||||
ldr r10, [r12, #40]
|
||||
ldr r11, [r12, #44]
|
||||
eor r4, r4, r8
|
||||
eor r5, r5, r9
|
||||
eor r6, r6, r10
|
||||
eor r7, r7, r11
|
||||
str r4, [lr, #32]
|
||||
str r5, [lr, #36]
|
||||
str r6, [lr, #40]
|
||||
str r7, [lr, #44]
|
||||
ldr r4, [sp, #16]
|
||||
ldr r5, [sp, #20]
|
||||
ldr r6, [sp, #24]
|
||||
ldr r7, [sp, #28]
|
||||
ldr r8, [r12, #48]
|
||||
ldr r9, [r12, #52]
|
||||
ldr r10, [r12, #56]
|
||||
ldr r11, [r12, #60]
|
||||
eor r4, r4, r8
|
||||
eor r5, r5, r9
|
||||
eor r6, r6, r10
|
||||
eor r7, r7, r11
|
||||
str r4, [lr, #48]
|
||||
str r5, [lr, #52]
|
||||
str r6, [lr, #56]
|
||||
str r7, [lr, #60]
|
||||
ldr r3, [sp, #44]
|
||||
add r12, r12, #0x40
|
||||
add lr, lr, #0x40
|
||||
str r12, [sp, #40]
|
||||
str lr, [sp, #36]
|
||||
subs r3, r3, #0x40
|
||||
ldr lr, [sp, #32]
|
||||
str r3, [sp, #44]
|
||||
bne L_chacha_arm32_crypt_block
|
||||
b L_chacha_arm32_crypt_done
|
||||
L_chacha_arm32_crypt_lt_block:
|
||||
# Store in over field of ChaCha.
|
||||
ldr lr, [sp, #32]
|
||||
add r12, lr, #0x44
|
||||
stm r12!, {r0, r1, r2, r3, r4, r5, r6, r7}
|
||||
ldm sp, {r0, r1, r2, r3, r4, r5, r6, r7}
|
||||
stm r12, {r0, r1, r2, r3, r4, r5, r6, r7}
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
|
||||
ldr r2, [sp, #40]
|
||||
ldr r3, [sp, #44]
|
||||
#else
|
||||
ldrd r2, r3, [sp, #40]
|
||||
#endif
|
||||
ldr r1, [sp, #36]
|
||||
rsb r12, r3, #0x40
|
||||
str r12, [lr, #64]
|
||||
add lr, lr, #0x44
|
||||
L_chacha_arm32_crypt_16byte_loop:
|
||||
cmp r3, #16
|
||||
blt L_chacha_arm32_crypt_word_loop
|
||||
# 16 bytes of state XORed into message.
|
||||
ldm lr!, {r4, r5, r6, r7}
|
||||
ldr r8, [r2]
|
||||
ldr r9, [r2, #4]
|
||||
ldr r10, [r2, #8]
|
||||
ldr r11, [r2, #12]
|
||||
eor r8, r8, r4
|
||||
eor r9, r9, r5
|
||||
eor r10, r10, r6
|
||||
eor r11, r11, r7
|
||||
subs r3, r3, #16
|
||||
str r8, [r1]
|
||||
str r9, [r1, #4]
|
||||
str r10, [r1, #8]
|
||||
str r11, [r1, #12]
|
||||
beq L_chacha_arm32_crypt_done
|
||||
add r2, r2, #16
|
||||
add r1, r1, #16
|
||||
b L_chacha_arm32_crypt_16byte_loop
|
||||
L_chacha_arm32_crypt_word_loop:
|
||||
cmp r3, #4
|
||||
blt L_chacha_arm32_crypt_byte_start
|
||||
# 4 bytes of state XORed into message.
|
||||
ldr r4, [lr]
|
||||
ldr r8, [r2]
|
||||
eor r8, r8, r4
|
||||
subs r3, r3, #4
|
||||
str r8, [r1]
|
||||
beq L_chacha_arm32_crypt_done
|
||||
add lr, lr, #4
|
||||
add r2, r2, #4
|
||||
add r1, r1, #4
|
||||
b L_chacha_arm32_crypt_word_loop
|
||||
L_chacha_arm32_crypt_byte_start:
|
||||
ldr r4, [lr]
|
||||
L_chacha_arm32_crypt_byte_loop:
|
||||
ldrb r8, [r2]
|
||||
eor r8, r8, r4
|
||||
subs r3, r3, #1
|
||||
strb r8, [r1]
|
||||
beq L_chacha_arm32_crypt_done
|
||||
lsr r4, r4, #8
|
||||
add r2, r2, #1
|
||||
add r1, r1, #1
|
||||
b L_chacha_arm32_crypt_byte_loop
|
||||
L_chacha_arm32_crypt_done:
|
||||
add sp, sp, #52
|
||||
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
.size wc_chacha_crypt_bytes,.-wc_chacha_crypt_bytes
|
||||
.text
|
||||
.align 4
|
||||
.globl wc_chacha_use_over
|
||||
.type wc_chacha_use_over, %function
|
||||
wc_chacha_use_over:
|
||||
push {r4, r5, r6, r7, r8, r9, lr}
|
||||
L_chacha_arm32_over_16byte_loop:
|
||||
cmp r3, #16
|
||||
blt L_chacha_arm32_over_word_loop
|
||||
# 16 bytes of state XORed into message.
|
||||
ldr r12, [r0]
|
||||
ldr lr, [r0, #4]
|
||||
ldr r4, [r0, #8]
|
||||
ldr r5, [r0, #12]
|
||||
ldr r6, [r2]
|
||||
ldr r7, [r2, #4]
|
||||
ldr r8, [r2, #8]
|
||||
ldr r9, [r2, #12]
|
||||
eor r12, r12, r6
|
||||
eor lr, lr, r7
|
||||
eor r4, r4, r8
|
||||
eor r5, r5, r9
|
||||
subs r3, r3, #16
|
||||
str r12, [r1]
|
||||
str lr, [r1, #4]
|
||||
str r4, [r1, #8]
|
||||
str r5, [r1, #12]
|
||||
beq L_chacha_arm32_over_done
|
||||
add r0, r0, #16
|
||||
add r2, r2, #16
|
||||
add r1, r1, #16
|
||||
b L_chacha_arm32_over_16byte_loop
|
||||
L_chacha_arm32_over_word_loop:
|
||||
cmp r3, #4
|
||||
blt L_chacha_arm32_over_byte_loop
|
||||
# 4 bytes of state XORed into message.
|
||||
ldr r12, [r0]
|
||||
ldr r6, [r2]
|
||||
eor r12, r12, r6
|
||||
subs r3, r3, #4
|
||||
str r12, [r1]
|
||||
beq L_chacha_arm32_over_done
|
||||
add r0, r0, #4
|
||||
add r2, r2, #4
|
||||
add r1, r1, #4
|
||||
b L_chacha_arm32_over_word_loop
|
||||
L_chacha_arm32_over_byte_loop:
|
||||
# 4 bytes of state XORed into message.
|
||||
ldrb r12, [r0]
|
||||
ldrb r6, [r2]
|
||||
eor r12, r12, r6
|
||||
subs r3, r3, #1
|
||||
strb r12, [r1]
|
||||
beq L_chacha_arm32_over_done
|
||||
add r0, r0, #1
|
||||
add r2, r2, #1
|
||||
add r1, r1, #1
|
||||
b L_chacha_arm32_over_byte_loop
|
||||
L_chacha_arm32_over_done:
|
||||
pop {r4, r5, r6, r7, r8, r9, pc}
|
||||
.size wc_chacha_use_over,.-wc_chacha_use_over
|
||||
#endif /* WOLFSSL_ARMASM_NO_NEON */
|
||||
#endif /* HAVE_CHACHA */
|
||||
#endif /* !__aarch64__ && __arm__ && !__thumb__ */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
||||
#endif /* !WOLFSSL_ARMASM_INLINE */
|
|
@ -0,0 +1,569 @@
|
|||
/* armv8-32-chacha-asm
|
||||
*
|
||||
* Copyright (C) 2006-2024 wolfSSL Inc.
|
||||
*
|
||||
* This file is part of wolfSSL.
|
||||
*
|
||||
* wolfSSL is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* wolfSSL is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
|
||||
*/
|
||||
|
||||
/* Generated using (from wolfssl):
|
||||
* cd ../scripts
|
||||
* ruby ./chacha/chacha.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-chacha-asm.c
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__)
|
||||
#include <stdint.h>
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
#ifdef WOLFSSL_ARMASM_INLINE
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__)
|
||||
|
||||
#ifdef __IAR_SYSTEMS_ICC__
|
||||
#define __asm__ asm
|
||||
#define __volatile__ volatile
|
||||
#endif /* __IAR_SYSTEMS_ICC__ */
|
||||
#ifdef __KEIL__
|
||||
#define __asm__ __asm
|
||||
#define __volatile__ volatile
|
||||
#endif /* __KEIL__ */
|
||||
#ifdef HAVE_CHACHA
|
||||
#include <wolfssl/wolfcrypt/chacha.h>
|
||||
|
||||
void wc_chacha_setiv(word32* x_p, const byte* iv_p, word32 counter_p)
|
||||
{
|
||||
register word32* x asm ("r0") = (word32*)x_p;
|
||||
register const byte* iv asm ("r1") = (const byte*)iv_p;
|
||||
register word32 counter asm ("r2") = (word32)counter_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"add r3, %[x], #52\n\t"
|
||||
"ldr r4, [%[iv]]\n\t"
|
||||
"ldr r12, [%[iv], #4]\n\t"
|
||||
"ldr lr, [%[iv], #8]\n\t"
|
||||
"str %[counter], [%[x], #48]\n\t"
|
||||
#ifdef BIG_ENDIAN_ORDER
|
||||
"rev r4, r4\n\t"
|
||||
"rev r12, r12\n\t"
|
||||
"rev lr, lr\n\t"
|
||||
#endif /* BIG_ENDIAN_ORDER */
|
||||
"stm r3, {r4, r12, lr}\n\t"
|
||||
: [x] "+r" (x), [iv] "+r" (iv), [counter] "+r" (counter)
|
||||
:
|
||||
: "memory", "r3", "r12", "lr", "r4", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
static const uint32_t L_chacha_arm32_constants[] = {
|
||||
0x61707865, 0x3120646e, 0x79622d36, 0x6b206574,
|
||||
0x61707865, 0x3320646e, 0x79622d32, 0x6b206574,
|
||||
};
|
||||
|
||||
void wc_chacha_setkey(word32* x_p, const byte* key_p, word32 keySz_p)
|
||||
{
|
||||
register word32* x asm ("r0") = (word32*)x_p;
|
||||
register const byte* key asm ("r1") = (const byte*)key_p;
|
||||
register word32 keySz asm ("r2") = (word32)keySz_p;
|
||||
register uint32_t* L_chacha_arm32_constants_c asm ("r3") = (uint32_t*)&L_chacha_arm32_constants;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"subs %[keySz], %[keySz], #16\n\t"
|
||||
"add r3, r3, %[keySz]\n\t"
|
||||
/* Start state with constants */
|
||||
"ldm r3, {r4, r5, r12, lr}\n\t"
|
||||
"stm %[x]!, {r4, r5, r12, lr}\n\t"
|
||||
/* Next is first 16 bytes of key. */
|
||||
"ldr r4, [%[key]]\n\t"
|
||||
"ldr r5, [%[key], #4]\n\t"
|
||||
"ldr r12, [%[key], #8]\n\t"
|
||||
"ldr lr, [%[key], #12]\n\t"
|
||||
#ifdef BIG_ENDIAN_ORDER
|
||||
"rev r4, r4\n\t"
|
||||
"rev r5, r5\n\t"
|
||||
"rev r12, r12\n\t"
|
||||
"rev lr, lr\n\t"
|
||||
#endif /* BIG_ENDIAN_ORDER */
|
||||
"stm %[x]!, {r4, r5, r12, lr}\n\t"
|
||||
/* Next 16 bytes of key. */
|
||||
"beq L_chacha_arm32_setkey_same_keyb_ytes_%=\n\t"
|
||||
/* Update key pointer for next 16 bytes. */
|
||||
"add %[key], %[key], %[keySz]\n\t"
|
||||
"ldr r4, [%[key]]\n\t"
|
||||
"ldr r5, [%[key], #4]\n\t"
|
||||
"ldr r12, [%[key], #8]\n\t"
|
||||
"ldr lr, [%[key], #12]\n\t"
|
||||
"\n"
|
||||
"L_chacha_arm32_setkey_same_keyb_ytes_%=: \n\t"
|
||||
"stm %[x], {r4, r5, r12, lr}\n\t"
|
||||
: [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz), [L_chacha_arm32_constants] "+r" (L_chacha_arm32_constants_c)
|
||||
:
|
||||
: "memory", "r12", "lr", "r4", "r5", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
#ifdef WOLFSSL_ARMASM_NO_NEON
|
||||
void wc_chacha_crypt_bytes(ChaCha* ctx_p, byte* c_p, const byte* m_p, word32 len_p)
|
||||
{
|
||||
register ChaCha* ctx asm ("r0") = (ChaCha*)ctx_p;
|
||||
register byte* c asm ("r1") = (byte*)c_p;
|
||||
register const byte* m asm ("r2") = (const byte*)m_p;
|
||||
register word32 len asm ("r3") = (word32)len_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #52\n\t"
|
||||
"mov lr, %[ctx]\n\t"
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
|
||||
"str %[ctx], [sp, #32]\n\t"
|
||||
"str %[c], [sp, #36]\n\t"
|
||||
#else
|
||||
"strd %[ctx], %[c], [sp, #32]\n\t"
|
||||
#endif
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
|
||||
"str %[m], [sp, #40]\n\t"
|
||||
"str %[len], [sp, #44]\n\t"
|
||||
#else
|
||||
"strd %[m], %[len], [sp, #40]\n\t"
|
||||
#endif
|
||||
"\n"
|
||||
"L_chacha_arm32_crypt_block_%=: \n\t"
|
||||
/* Put x[12]..x[15] onto stack. */
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
|
||||
"ldr r4, [lr, #48]\n\t"
|
||||
"ldr r5, [lr, #52]\n\t"
|
||||
#else
|
||||
"ldrd r4, r5, [lr, #48]\n\t"
|
||||
#endif
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
|
||||
"ldr r6, [lr, #56]\n\t"
|
||||
"ldr r7, [lr, #60]\n\t"
|
||||
#else
|
||||
"ldrd r6, r7, [lr, #56]\n\t"
|
||||
#endif
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
|
||||
"str r4, [sp, #16]\n\t"
|
||||
"str r5, [sp, #20]\n\t"
|
||||
#else
|
||||
"strd r4, r5, [sp, #16]\n\t"
|
||||
#endif
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
|
||||
"str r6, [sp, #24]\n\t"
|
||||
"str r7, [sp, #28]\n\t"
|
||||
#else
|
||||
"strd r6, r7, [sp, #24]\n\t"
|
||||
#endif
|
||||
/* Load x[0]..x[12] into registers. */
|
||||
"ldm lr, {%[ctx], %[c], %[m], %[len], r4, r5, r6, r7, r8, r9, r10, r11, r12}\n\t"
|
||||
/* 10x 2 full rounds to perform. */
|
||||
"mov lr, #10\n\t"
|
||||
"str lr, [sp, #48]\n\t"
|
||||
"\n"
|
||||
"L_chacha_arm32_crypt_loop_%=: \n\t"
|
||||
/* 0, 4, 8, 12 */
|
||||
/* 1, 5, 9, 13 */
|
||||
"ldr lr, [sp, #20]\n\t"
|
||||
"add %[ctx], %[ctx], r4\n\t"
|
||||
"add %[c], %[c], r5\n\t"
|
||||
"eor r12, r12, %[ctx]\n\t"
|
||||
"eor lr, lr, %[c]\n\t"
|
||||
"ror r12, r12, #16\n\t"
|
||||
"ror lr, lr, #16\n\t"
|
||||
"add r8, r8, r12\n\t"
|
||||
"add r9, r9, lr\n\t"
|
||||
"eor r4, r4, r8\n\t"
|
||||
"eor r5, r5, r9\n\t"
|
||||
"ror r4, r4, #20\n\t"
|
||||
"ror r5, r5, #20\n\t"
|
||||
"add %[ctx], %[ctx], r4\n\t"
|
||||
"add %[c], %[c], r5\n\t"
|
||||
"eor r12, r12, %[ctx]\n\t"
|
||||
"eor lr, lr, %[c]\n\t"
|
||||
"ror r12, r12, #24\n\t"
|
||||
"ror lr, lr, #24\n\t"
|
||||
"add r8, r8, r12\n\t"
|
||||
"add r9, r9, lr\n\t"
|
||||
"eor r4, r4, r8\n\t"
|
||||
"eor r5, r5, r9\n\t"
|
||||
"ror r4, r4, #25\n\t"
|
||||
"ror r5, r5, #25\n\t"
|
||||
"str r12, [sp, #16]\n\t"
|
||||
"str lr, [sp, #20]\n\t"
|
||||
/* 2, 6, 10, 14 */
|
||||
/* 3, 7, 11, 15 */
|
||||
"ldr r12, [sp, #24]\n\t"
|
||||
"ldr lr, [sp, #28]\n\t"
|
||||
"add %[m], %[m], r6\n\t"
|
||||
"add %[len], %[len], r7\n\t"
|
||||
"eor r12, r12, %[m]\n\t"
|
||||
"eor lr, lr, %[len]\n\t"
|
||||
"ror r12, r12, #16\n\t"
|
||||
"ror lr, lr, #16\n\t"
|
||||
"add r10, r10, r12\n\t"
|
||||
"add r11, r11, lr\n\t"
|
||||
"eor r6, r6, r10\n\t"
|
||||
"eor r7, r7, r11\n\t"
|
||||
"ror r6, r6, #20\n\t"
|
||||
"ror r7, r7, #20\n\t"
|
||||
"add %[m], %[m], r6\n\t"
|
||||
"add %[len], %[len], r7\n\t"
|
||||
"eor r12, r12, %[m]\n\t"
|
||||
"eor lr, lr, %[len]\n\t"
|
||||
"ror r12, r12, #24\n\t"
|
||||
"ror lr, lr, #24\n\t"
|
||||
"add r10, r10, r12\n\t"
|
||||
"add r11, r11, lr\n\t"
|
||||
"eor r6, r6, r10\n\t"
|
||||
"eor r7, r7, r11\n\t"
|
||||
"ror r6, r6, #25\n\t"
|
||||
"ror r7, r7, #25\n\t"
|
||||
/* 3, 4, 9, 14 */
|
||||
/* 0, 5, 10, 15 */
|
||||
"add %[len], %[len], r4\n\t"
|
||||
"add %[ctx], %[ctx], r5\n\t"
|
||||
"eor r12, r12, %[len]\n\t"
|
||||
"eor lr, lr, %[ctx]\n\t"
|
||||
"ror r12, r12, #16\n\t"
|
||||
"ror lr, lr, #16\n\t"
|
||||
"add r9, r9, r12\n\t"
|
||||
"add r10, r10, lr\n\t"
|
||||
"eor r4, r4, r9\n\t"
|
||||
"eor r5, r5, r10\n\t"
|
||||
"ror r4, r4, #20\n\t"
|
||||
"ror r5, r5, #20\n\t"
|
||||
"add %[len], %[len], r4\n\t"
|
||||
"add %[ctx], %[ctx], r5\n\t"
|
||||
"eor r12, r12, %[len]\n\t"
|
||||
"eor lr, lr, %[ctx]\n\t"
|
||||
"ror r12, r12, #24\n\t"
|
||||
"ror lr, lr, #24\n\t"
|
||||
"add r9, r9, r12\n\t"
|
||||
"add r10, r10, lr\n\t"
|
||||
"eor r4, r4, r9\n\t"
|
||||
"eor r5, r5, r10\n\t"
|
||||
"ror r4, r4, #25\n\t"
|
||||
"ror r5, r5, #25\n\t"
|
||||
"str r12, [sp, #24]\n\t"
|
||||
"str lr, [sp, #28]\n\t"
|
||||
"ldr r12, [sp, #16]\n\t"
|
||||
"ldr lr, [sp, #20]\n\t"
|
||||
/* 1, 6, 11, 12 */
|
||||
/* 2, 7, 8, 13 */
|
||||
"add %[c], %[c], r6\n\t"
|
||||
"add %[m], %[m], r7\n\t"
|
||||
"eor r12, r12, %[c]\n\t"
|
||||
"eor lr, lr, %[m]\n\t"
|
||||
"ror r12, r12, #16\n\t"
|
||||
"ror lr, lr, #16\n\t"
|
||||
"add r11, r11, r12\n\t"
|
||||
"add r8, r8, lr\n\t"
|
||||
"eor r6, r6, r11\n\t"
|
||||
"eor r7, r7, r8\n\t"
|
||||
"ror r6, r6, #20\n\t"
|
||||
"ror r7, r7, #20\n\t"
|
||||
"add %[c], %[c], r6\n\t"
|
||||
"add %[m], %[m], r7\n\t"
|
||||
"eor r12, r12, %[c]\n\t"
|
||||
"eor lr, lr, %[m]\n\t"
|
||||
"ror r12, r12, #24\n\t"
|
||||
"ror lr, lr, #24\n\t"
|
||||
"add r11, r11, r12\n\t"
|
||||
"add r8, r8, lr\n\t"
|
||||
"eor r6, r6, r11\n\t"
|
||||
"eor r7, r7, r8\n\t"
|
||||
"ror r6, r6, #25\n\t"
|
||||
"ror r7, r7, #25\n\t"
|
||||
"str lr, [sp, #20]\n\t"
|
||||
/* Check if we have done enough rounds. */
|
||||
"ldr lr, [sp, #48]\n\t"
|
||||
"subs lr, lr, #1\n\t"
|
||||
"str lr, [sp, #48]\n\t"
|
||||
"bgt L_chacha_arm32_crypt_loop_%=\n\t"
|
||||
"stm sp, {r8, r9, r10, r11, r12}\n\t"
|
||||
"ldr lr, [sp, #32]\n\t"
|
||||
"mov r12, sp\n\t"
|
||||
/* Add in original state */
|
||||
"ldm lr!, {r8, r9, r10, r11}\n\t"
|
||||
"add %[ctx], %[ctx], r8\n\t"
|
||||
"add %[c], %[c], r9\n\t"
|
||||
"add %[m], %[m], r10\n\t"
|
||||
"add %[len], %[len], r11\n\t"
|
||||
"ldm lr!, {r8, r9, r10, r11}\n\t"
|
||||
"add r4, r4, r8\n\t"
|
||||
"add r5, r5, r9\n\t"
|
||||
"add r6, r6, r10\n\t"
|
||||
"add r7, r7, r11\n\t"
|
||||
"ldm r12, {r8, r9}\n\t"
|
||||
"ldm lr!, {r10, r11}\n\t"
|
||||
"add r8, r8, r10\n\t"
|
||||
"add r9, r9, r11\n\t"
|
||||
"stm r12!, {r8, r9}\n\t"
|
||||
"ldm r12, {r8, r9}\n\t"
|
||||
"ldm lr!, {r10, r11}\n\t"
|
||||
"add r8, r8, r10\n\t"
|
||||
"add r9, r9, r11\n\t"
|
||||
"stm r12!, {r8, r9}\n\t"
|
||||
"ldm r12, {r8, r9}\n\t"
|
||||
"ldm lr!, {r10, r11}\n\t"
|
||||
"add r8, r8, r10\n\t"
|
||||
"add r9, r9, r11\n\t"
|
||||
"add r10, r10, #1\n\t"
|
||||
"stm r12!, {r8, r9}\n\t"
|
||||
"str r10, [lr, #-8]\n\t"
|
||||
"ldm r12, {r8, r9}\n\t"
|
||||
"ldm lr, {r10, r11}\n\t"
|
||||
"add r8, r8, r10\n\t"
|
||||
"add r9, r9, r11\n\t"
|
||||
"stm r12, {r8, r9}\n\t"
|
||||
"ldr r12, [sp, #44]\n\t"
|
||||
"cmp r12, #0x40\n\t"
|
||||
"blt L_chacha_arm32_crypt_lt_block_%=\n\t"
|
||||
"ldr r12, [sp, #40]\n\t"
|
||||
"ldr lr, [sp, #36]\n\t"
|
||||
/* XOR state into 64 bytes. */
|
||||
"ldr r8, [r12]\n\t"
|
||||
"ldr r9, [r12, #4]\n\t"
|
||||
"ldr r10, [r12, #8]\n\t"
|
||||
"ldr r11, [r12, #12]\n\t"
|
||||
"eor %[ctx], %[ctx], r8\n\t"
|
||||
"eor %[c], %[c], r9\n\t"
|
||||
"eor %[m], %[m], r10\n\t"
|
||||
"eor %[len], %[len], r11\n\t"
|
||||
"str %[ctx], [lr]\n\t"
|
||||
"str %[c], [lr, #4]\n\t"
|
||||
"str %[m], [lr, #8]\n\t"
|
||||
"str %[len], [lr, #12]\n\t"
|
||||
"ldr r8, [r12, #16]\n\t"
|
||||
"ldr r9, [r12, #20]\n\t"
|
||||
"ldr r10, [r12, #24]\n\t"
|
||||
"ldr r11, [r12, #28]\n\t"
|
||||
"eor r4, r4, r8\n\t"
|
||||
"eor r5, r5, r9\n\t"
|
||||
"eor r6, r6, r10\n\t"
|
||||
"eor r7, r7, r11\n\t"
|
||||
"str r4, [lr, #16]\n\t"
|
||||
"str r5, [lr, #20]\n\t"
|
||||
"str r6, [lr, #24]\n\t"
|
||||
"str r7, [lr, #28]\n\t"
|
||||
"ldr r4, [sp]\n\t"
|
||||
"ldr r5, [sp, #4]\n\t"
|
||||
"ldr r6, [sp, #8]\n\t"
|
||||
"ldr r7, [sp, #12]\n\t"
|
||||
"ldr r8, [r12, #32]\n\t"
|
||||
"ldr r9, [r12, #36]\n\t"
|
||||
"ldr r10, [r12, #40]\n\t"
|
||||
"ldr r11, [r12, #44]\n\t"
|
||||
"eor r4, r4, r8\n\t"
|
||||
"eor r5, r5, r9\n\t"
|
||||
"eor r6, r6, r10\n\t"
|
||||
"eor r7, r7, r11\n\t"
|
||||
"str r4, [lr, #32]\n\t"
|
||||
"str r5, [lr, #36]\n\t"
|
||||
"str r6, [lr, #40]\n\t"
|
||||
"str r7, [lr, #44]\n\t"
|
||||
"ldr r4, [sp, #16]\n\t"
|
||||
"ldr r5, [sp, #20]\n\t"
|
||||
"ldr r6, [sp, #24]\n\t"
|
||||
"ldr r7, [sp, #28]\n\t"
|
||||
"ldr r8, [r12, #48]\n\t"
|
||||
"ldr r9, [r12, #52]\n\t"
|
||||
"ldr r10, [r12, #56]\n\t"
|
||||
"ldr r11, [r12, #60]\n\t"
|
||||
"eor r4, r4, r8\n\t"
|
||||
"eor r5, r5, r9\n\t"
|
||||
"eor r6, r6, r10\n\t"
|
||||
"eor r7, r7, r11\n\t"
|
||||
"str r4, [lr, #48]\n\t"
|
||||
"str r5, [lr, #52]\n\t"
|
||||
"str r6, [lr, #56]\n\t"
|
||||
"str r7, [lr, #60]\n\t"
|
||||
"ldr %[len], [sp, #44]\n\t"
|
||||
"add r12, r12, #0x40\n\t"
|
||||
"add lr, lr, #0x40\n\t"
|
||||
"str r12, [sp, #40]\n\t"
|
||||
"str lr, [sp, #36]\n\t"
|
||||
"subs %[len], %[len], #0x40\n\t"
|
||||
"ldr lr, [sp, #32]\n\t"
|
||||
"str %[len], [sp, #44]\n\t"
|
||||
"bne L_chacha_arm32_crypt_block_%=\n\t"
|
||||
"b L_chacha_arm32_crypt_done_%=\n\t"
|
||||
"\n"
|
||||
"L_chacha_arm32_crypt_lt_block_%=: \n\t"
|
||||
/* Store in over field of ChaCha. */
|
||||
"ldr lr, [sp, #32]\n\t"
|
||||
"add r12, lr, #0x44\n\t"
|
||||
"stm r12!, {%[ctx], %[c], %[m], %[len], r4, r5, r6, r7}\n\t"
|
||||
"ldm sp, {%[ctx], %[c], %[m], %[len], r4, r5, r6, r7}\n\t"
|
||||
"stm r12, {%[ctx], %[c], %[m], %[len], r4, r5, r6, r7}\n\t"
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
|
||||
"ldr %[m], [sp, #40]\n\t"
|
||||
"ldr %[len], [sp, #44]\n\t"
|
||||
#else
|
||||
"ldrd %[m], %[len], [sp, #40]\n\t"
|
||||
#endif
|
||||
"ldr %[c], [sp, #36]\n\t"
|
||||
"rsb r12, %[len], #0x40\n\t"
|
||||
"str r12, [lr, #64]\n\t"
|
||||
"add lr, lr, #0x44\n\t"
|
||||
"\n"
|
||||
"L_chacha_arm32_crypt_16byte_loop_%=: \n\t"
|
||||
"cmp %[len], #16\n\t"
|
||||
"blt L_chacha_arm32_crypt_word_loop_%=\n\t"
|
||||
/* 16 bytes of state XORed into message. */
|
||||
"ldm lr!, {r4, r5, r6, r7}\n\t"
|
||||
"ldr r8, [%[m]]\n\t"
|
||||
"ldr r9, [%[m], #4]\n\t"
|
||||
"ldr r10, [%[m], #8]\n\t"
|
||||
"ldr r11, [%[m], #12]\n\t"
|
||||
"eor r8, r8, r4\n\t"
|
||||
"eor r9, r9, r5\n\t"
|
||||
"eor r10, r10, r6\n\t"
|
||||
"eor r11, r11, r7\n\t"
|
||||
"subs %[len], %[len], #16\n\t"
|
||||
"str r8, [%[c]]\n\t"
|
||||
"str r9, [%[c], #4]\n\t"
|
||||
"str r10, [%[c], #8]\n\t"
|
||||
"str r11, [%[c], #12]\n\t"
|
||||
"beq L_chacha_arm32_crypt_done_%=\n\t"
|
||||
"add %[m], %[m], #16\n\t"
|
||||
"add %[c], %[c], #16\n\t"
|
||||
"b L_chacha_arm32_crypt_16byte_loop_%=\n\t"
|
||||
"\n"
|
||||
"L_chacha_arm32_crypt_word_loop_%=: \n\t"
|
||||
"cmp %[len], #4\n\t"
|
||||
"blt L_chacha_arm32_crypt_byte_start_%=\n\t"
|
||||
/* 4 bytes of state XORed into message. */
|
||||
"ldr r4, [lr]\n\t"
|
||||
"ldr r8, [%[m]]\n\t"
|
||||
"eor r8, r8, r4\n\t"
|
||||
"subs %[len], %[len], #4\n\t"
|
||||
"str r8, [%[c]]\n\t"
|
||||
"beq L_chacha_arm32_crypt_done_%=\n\t"
|
||||
"add lr, lr, #4\n\t"
|
||||
"add %[m], %[m], #4\n\t"
|
||||
"add %[c], %[c], #4\n\t"
|
||||
"b L_chacha_arm32_crypt_word_loop_%=\n\t"
|
||||
"\n"
|
||||
"L_chacha_arm32_crypt_byte_start_%=: \n\t"
|
||||
"ldr r4, [lr]\n\t"
|
||||
"\n"
|
||||
"L_chacha_arm32_crypt_byte_loop_%=: \n\t"
|
||||
"ldrb r8, [%[m]]\n\t"
|
||||
"eor r8, r8, r4\n\t"
|
||||
"subs %[len], %[len], #1\n\t"
|
||||
"strb r8, [%[c]]\n\t"
|
||||
"beq L_chacha_arm32_crypt_done_%=\n\t"
|
||||
"lsr r4, r4, #8\n\t"
|
||||
"add %[m], %[m], #1\n\t"
|
||||
"add %[c], %[c], #1\n\t"
|
||||
"b L_chacha_arm32_crypt_byte_loop_%=\n\t"
|
||||
"\n"
|
||||
"L_chacha_arm32_crypt_done_%=: \n\t"
|
||||
"add sp, sp, #52\n\t"
|
||||
: [ctx] "+r" (ctx), [c] "+r" (c), [m] "+r" (m), [len] "+r" (len)
|
||||
:
|
||||
: "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
void wc_chacha_use_over(byte* over_p, byte* output_p, const byte* input_p, word32 len_p)
|
||||
{
|
||||
register byte* over asm ("r0") = (byte*)over_p;
|
||||
register byte* output asm ("r1") = (byte*)output_p;
|
||||
register const byte* input asm ("r2") = (const byte*)input_p;
|
||||
register word32 len asm ("r3") = (word32)len_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"\n"
|
||||
"L_chacha_arm32_over_16byte_loop_%=: \n\t"
|
||||
"cmp %[len], #16\n\t"
|
||||
"blt L_chacha_arm32_over_word_loop_%=\n\t"
|
||||
/* 16 bytes of state XORed into message. */
|
||||
"ldr r12, [%[over]]\n\t"
|
||||
"ldr lr, [%[over], #4]\n\t"
|
||||
"ldr r4, [%[over], #8]\n\t"
|
||||
"ldr r5, [%[over], #12]\n\t"
|
||||
"ldr r6, [%[input]]\n\t"
|
||||
"ldr r7, [%[input], #4]\n\t"
|
||||
"ldr r8, [%[input], #8]\n\t"
|
||||
"ldr r9, [%[input], #12]\n\t"
|
||||
"eor r12, r12, r6\n\t"
|
||||
"eor lr, lr, r7\n\t"
|
||||
"eor r4, r4, r8\n\t"
|
||||
"eor r5, r5, r9\n\t"
|
||||
"subs %[len], %[len], #16\n\t"
|
||||
"str r12, [%[output]]\n\t"
|
||||
"str lr, [%[output], #4]\n\t"
|
||||
"str r4, [%[output], #8]\n\t"
|
||||
"str r5, [%[output], #12]\n\t"
|
||||
"beq L_chacha_arm32_over_done_%=\n\t"
|
||||
"add %[over], %[over], #16\n\t"
|
||||
"add %[input], %[input], #16\n\t"
|
||||
"add %[output], %[output], #16\n\t"
|
||||
"b L_chacha_arm32_over_16byte_loop_%=\n\t"
|
||||
"\n"
|
||||
"L_chacha_arm32_over_word_loop_%=: \n\t"
|
||||
"cmp %[len], #4\n\t"
|
||||
"blt L_chacha_arm32_over_byte_loop_%=\n\t"
|
||||
/* 4 bytes of state XORed into message. */
|
||||
"ldr r12, [%[over]]\n\t"
|
||||
"ldr r6, [%[input]]\n\t"
|
||||
"eor r12, r12, r6\n\t"
|
||||
"subs %[len], %[len], #4\n\t"
|
||||
"str r12, [%[output]]\n\t"
|
||||
"beq L_chacha_arm32_over_done_%=\n\t"
|
||||
"add %[over], %[over], #4\n\t"
|
||||
"add %[input], %[input], #4\n\t"
|
||||
"add %[output], %[output], #4\n\t"
|
||||
"b L_chacha_arm32_over_word_loop_%=\n\t"
|
||||
"\n"
|
||||
"L_chacha_arm32_over_byte_loop_%=: \n\t"
|
||||
/* 4 bytes of state XORed into message. */
|
||||
"ldrb r12, [%[over]]\n\t"
|
||||
"ldrb r6, [%[input]]\n\t"
|
||||
"eor r12, r12, r6\n\t"
|
||||
"subs %[len], %[len], #1\n\t"
|
||||
"strb r12, [%[output]]\n\t"
|
||||
"beq L_chacha_arm32_over_done_%=\n\t"
|
||||
"add %[over], %[over], #1\n\t"
|
||||
"add %[input], %[input], #1\n\t"
|
||||
"add %[output], %[output], #1\n\t"
|
||||
"b L_chacha_arm32_over_byte_loop_%=\n\t"
|
||||
"\n"
|
||||
"L_chacha_arm32_over_done_%=: \n\t"
|
||||
: [over] "+r" (over), [output] "+r" (output), [input] "+r" (input), [len] "+r" (len)
|
||||
:
|
||||
: "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_ARMASM_NO_NEON */
|
||||
#endif /* HAVE_CHACHA */
|
||||
#endif /* !__aarch64__ && __arm__ && !__thumb__ */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
#endif /* !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
||||
#endif /* WOLFSSL_ARMASM_INLINE */
|
|
@ -0,0 +1,356 @@
|
|||
/* armv8-32-poly1305-asm
|
||||
*
|
||||
* Copyright (C) 2006-2024 wolfSSL Inc.
|
||||
*
|
||||
* This file is part of wolfSSL.
|
||||
*
|
||||
* wolfSSL is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* wolfSSL is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
|
||||
*/
|
||||
|
||||
/* Generated using (from wolfssl):
|
||||
* cd ../scripts
|
||||
* ruby ./poly1305/poly1305.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-poly1305-asm.S
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__)
|
||||
#ifndef WOLFSSL_ARMASM_INLINE
|
||||
#ifdef HAVE_POLY1305
|
||||
.text
|
||||
.align 4
|
||||
.globl poly1305_blocks_arm32_16
|
||||
.type poly1305_blocks_arm32_16, %function
|
||||
poly1305_blocks_arm32_16:
|
||||
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
sub sp, sp, #28
|
||||
cmp r2, #0
|
||||
beq L_poly1305_arm32_16_done
|
||||
add lr, sp, #12
|
||||
stm lr, {r0, r1, r2, r3}
|
||||
# Get h pointer
|
||||
add lr, r0, #16
|
||||
ldm lr, {r4, r5, r6, r7, r8}
|
||||
L_poly1305_arm32_16_loop:
|
||||
# Add m to h
|
||||
ldr r1, [sp, #16]
|
||||
ldr r2, [r1]
|
||||
ldr r3, [r1, #4]
|
||||
ldr r9, [r1, #8]
|
||||
ldr r10, [r1, #12]
|
||||
ldr r11, [sp, #24]
|
||||
adds r4, r4, r2
|
||||
adcs r5, r5, r3
|
||||
adcs r6, r6, r9
|
||||
adcs r7, r7, r10
|
||||
add r1, r1, #16
|
||||
adc r8, r8, r11
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
|
||||
stm lr, {r4, r5, r6, r7, r8}
|
||||
#else
|
||||
# h[0]-h[2] in r4-r6 for multiplication.
|
||||
str r7, [lr, #12]
|
||||
str r8, [lr, #16]
|
||||
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */
|
||||
str r1, [sp, #16]
|
||||
ldr r1, [sp, #12]
|
||||
# Multiply h by r
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
|
||||
# r0 = #0, r1 = r, lr = h, r2 = h[j], r3 = r[i]
|
||||
ldr r3, [r1]
|
||||
eor r0, r0, r0
|
||||
# r[0] * h[0]
|
||||
# h[0] in r4
|
||||
umull r4, r5, r3, r4
|
||||
# r[0] * h[2]
|
||||
# h[2] in r6
|
||||
umull r6, r7, r3, r6
|
||||
# r[0] * h[4]
|
||||
# h[4] in r8
|
||||
mul r8, r3, r8
|
||||
# r[0] * h[1]
|
||||
ldr r2, [lr, #4]
|
||||
mov r12, r0
|
||||
umlal r5, r12, r3, r2
|
||||
# r[0] * h[3]
|
||||
ldr r2, [lr, #12]
|
||||
adds r6, r6, r12
|
||||
adc r7, r7, r0
|
||||
umlal r7, r8, r3, r2
|
||||
# r[1] * h[0]
|
||||
ldr r3, [r1, #4]
|
||||
ldr r2, [lr]
|
||||
mov r12, r0
|
||||
umlal r5, r12, r3, r2
|
||||
# r[1] * h[1]
|
||||
ldr r2, [lr, #4]
|
||||
adds r6, r6, r12
|
||||
adc r12, r0, r0
|
||||
umlal r6, r12, r3, r2
|
||||
# r[1] * h[2]
|
||||
ldr r2, [lr, #8]
|
||||
adds r7, r7, r12
|
||||
adc r12, r0, r0
|
||||
umlal r7, r12, r3, r2
|
||||
# r[1] * h[3]
|
||||
ldr r2, [lr, #12]
|
||||
adds r8, r8, r12
|
||||
adc r9, r0, r0
|
||||
umlal r8, r9, r3, r2
|
||||
# r[1] * h[4]
|
||||
ldr r2, [lr, #16]
|
||||
mla r9, r3, r2, r9
|
||||
# r[2] * h[0]
|
||||
ldr r3, [r1, #8]
|
||||
ldr r2, [lr]
|
||||
mov r12, r0
|
||||
umlal r6, r12, r3, r2
|
||||
# r[2] * h[1]
|
||||
ldr r2, [lr, #4]
|
||||
adds r7, r7, r12
|
||||
adc r12, r0, r0
|
||||
umlal r7, r12, r3, r2
|
||||
# r[2] * h[2]
|
||||
ldr r2, [lr, #8]
|
||||
adds r8, r8, r12
|
||||
adc r12, r0, r0
|
||||
umlal r8, r12, r3, r2
|
||||
# r[2] * h[3]
|
||||
ldr r2, [lr, #12]
|
||||
adds r9, r9, r12
|
||||
adc r10, r0, r0
|
||||
umlal r9, r10, r3, r2
|
||||
# r[2] * h[4]
|
||||
ldr r2, [lr, #16]
|
||||
mla r10, r3, r2, r10
|
||||
# r[3] * h[0]
|
||||
ldr r3, [r1, #12]
|
||||
ldr r2, [lr]
|
||||
mov r12, r0
|
||||
umlal r7, r12, r3, r2
|
||||
# r[3] * h[1]
|
||||
ldr r2, [lr, #4]
|
||||
adds r8, r8, r12
|
||||
adc r12, r0, r0
|
||||
umlal r8, r12, r3, r2
|
||||
# r[3] * h[2]
|
||||
ldr r2, [lr, #8]
|
||||
adds r9, r9, r12
|
||||
adc r10, r10, r0
|
||||
umlal r9, r10, r3, r2
|
||||
# r[3] * h[3]
|
||||
ldr r2, [lr, #12]
|
||||
mov r11, r0
|
||||
umlal r10, r11, r3, r2
|
||||
# r[3] * h[4]
|
||||
ldr r2, [lr, #16]
|
||||
mov r12, r0
|
||||
mla r11, r3, r2, r11
|
||||
#else
|
||||
ldm r1, {r0, r1, r2, r3}
|
||||
# r[0] * h[0]
|
||||
umull r10, r11, r0, r4
|
||||
# r[1] * h[0]
|
||||
umull r12, r7, r1, r4
|
||||
# r[0] * h[1]
|
||||
umaal r11, r12, r0, r5
|
||||
# r[2] * h[0]
|
||||
umull r8, r9, r2, r4
|
||||
# r[1] * h[1]
|
||||
umaal r12, r8, r1, r5
|
||||
# r[0] * h[2]
|
||||
umaal r12, r7, r0, r6
|
||||
# r[3] * h[0]
|
||||
umaal r8, r9, r3, r4
|
||||
stm sp, {r10, r11, r12}
|
||||
# r[2] * h[1]
|
||||
umaal r7, r8, r2, r5
|
||||
# Replace h[0] with h[3]
|
||||
ldr r4, [lr, #12]
|
||||
# r[1] * h[2]
|
||||
umull r10, r11, r1, r6
|
||||
# r[2] * h[2]
|
||||
umaal r8, r9, r2, r6
|
||||
# r[0] * h[3]
|
||||
umaal r7, r10, r0, r4
|
||||
# r[3] * h[1]
|
||||
umaal r8, r11, r3, r5
|
||||
# r[1] * h[3]
|
||||
umaal r8, r10, r1, r4
|
||||
# r[3] * h[2]
|
||||
umaal r9, r11, r3, r6
|
||||
# r[2] * h[3]
|
||||
umaal r9, r10, r2, r4
|
||||
# Replace h[1] with h[4]
|
||||
ldr r5, [lr, #16]
|
||||
# r[3] * h[3]
|
||||
umaal r10, r11, r3, r4
|
||||
mov r12, #0
|
||||
# r[0] * h[4]
|
||||
umaal r8, r12, r0, r5
|
||||
# r[1] * h[4]
|
||||
umaal r9, r12, r1, r5
|
||||
# r[2] * h[4]
|
||||
umaal r10, r12, r2, r5
|
||||
# r[3] * h[4]
|
||||
umaal r11, r12, r3, r5
|
||||
# DONE
|
||||
ldm sp, {r4, r5, r6}
|
||||
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */
|
||||
# r12 will be zero because r is masked.
|
||||
# Load length
|
||||
ldr r2, [sp, #20]
|
||||
# Reduce mod 2^130 - 5
|
||||
bic r3, r8, #3
|
||||
and r8, r8, #3
|
||||
adds r4, r4, r3
|
||||
lsr r3, r3, #2
|
||||
adcs r5, r5, r9
|
||||
orr r3, r3, r9, LSL #30
|
||||
adcs r6, r6, r10
|
||||
lsr r9, r9, #2
|
||||
adcs r7, r7, r11
|
||||
orr r9, r9, r10, LSL #30
|
||||
adc r8, r8, r12
|
||||
lsr r10, r10, #2
|
||||
adds r4, r4, r3
|
||||
orr r10, r10, r11, LSL #30
|
||||
adcs r5, r5, r9
|
||||
lsr r11, r11, #2
|
||||
adcs r6, r6, r10
|
||||
adcs r7, r7, r11
|
||||
adc r8, r8, r12
|
||||
# Sub 16 from length.
|
||||
subs r2, r2, #16
|
||||
# Store length.
|
||||
str r2, [sp, #20]
|
||||
# Loop again if more message to do.
|
||||
bgt L_poly1305_arm32_16_loop
|
||||
stm lr, {r4, r5, r6, r7, r8}
|
||||
L_poly1305_arm32_16_done:
|
||||
add sp, sp, #28
|
||||
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
.size poly1305_blocks_arm32_16,.-poly1305_blocks_arm32_16
|
||||
.text
|
||||
.type L_poly1305_arm32_clamp, %object
|
||||
.size L_poly1305_arm32_clamp, 16
|
||||
.align 4
|
||||
L_poly1305_arm32_clamp:
|
||||
.word 0xfffffff
|
||||
.word 0xffffffc
|
||||
.word 0xffffffc
|
||||
.word 0xffffffc
|
||||
.text
|
||||
.align 4
|
||||
.globl poly1305_set_key
|
||||
.type poly1305_set_key, %function
|
||||
poly1305_set_key:
|
||||
push {r4, r5, r6, r7, r8, lr}
|
||||
# Load mask.
|
||||
adr lr, L_poly1305_arm32_clamp
|
||||
ldm lr, {r6, r7, r8, r12}
|
||||
# Load and cache padding.
|
||||
ldr r2, [r1, #16]
|
||||
ldr r3, [r1, #20]
|
||||
ldr r4, [r1, #24]
|
||||
ldr r5, [r1, #28]
|
||||
add lr, r0, #36
|
||||
stm lr, {r2, r3, r4, r5}
|
||||
# Load, mask and store r.
|
||||
ldr r2, [r1]
|
||||
ldr r3, [r1, #4]
|
||||
ldr r4, [r1, #8]
|
||||
ldr r5, [r1, #12]
|
||||
and r2, r2, r6
|
||||
and r3, r3, r7
|
||||
and r4, r4, r8
|
||||
and r5, r5, r12
|
||||
add lr, r0, #0
|
||||
stm lr, {r2, r3, r4, r5}
|
||||
# h (accumulator) = 0
|
||||
eor r6, r6, r6
|
||||
eor r7, r7, r7
|
||||
eor r8, r8, r8
|
||||
eor r12, r12, r12
|
||||
add lr, r0, #16
|
||||
eor r5, r5, r5
|
||||
stm lr, {r5, r6, r7, r8, r12}
|
||||
# Zero leftover
|
||||
str r5, [r0, #52]
|
||||
pop {r4, r5, r6, r7, r8, pc}
|
||||
.size poly1305_set_key,.-poly1305_set_key
|
||||
.text
|
||||
.align 4
|
||||
.globl poly1305_final
|
||||
.type poly1305_final, %function
|
||||
poly1305_final:
|
||||
push {r4, r5, r6, r7, r8, r9, lr}
|
||||
add r9, r0, #16
|
||||
ldm r9, {r4, r5, r6, r7, r8}
|
||||
# Add 5 and check for h larger than p.
|
||||
adds r2, r4, #5
|
||||
adcs r2, r5, #0
|
||||
adcs r2, r6, #0
|
||||
adcs r2, r7, #0
|
||||
adc r2, r8, #0
|
||||
sub r2, r2, #4
|
||||
lsr r2, r2, #31
|
||||
sub r2, r2, #1
|
||||
and r2, r2, #5
|
||||
# Add 0/5 to h.
|
||||
adds r4, r4, r2
|
||||
adcs r5, r5, #0
|
||||
adcs r6, r6, #0
|
||||
adc r7, r7, #0
|
||||
# Add padding
|
||||
add r9, r0, #36
|
||||
ldm r9, {r2, r3, r12, lr}
|
||||
adds r4, r4, r2
|
||||
adcs r5, r5, r3
|
||||
adcs r6, r6, r12
|
||||
adc r7, r7, lr
|
||||
# Store MAC
|
||||
str r4, [r1]
|
||||
str r5, [r1, #4]
|
||||
str r6, [r1, #8]
|
||||
str r7, [r1, #12]
|
||||
# Zero out h.
|
||||
eor r4, r4, r4
|
||||
eor r5, r5, r5
|
||||
eor r6, r6, r6
|
||||
eor r7, r7, r7
|
||||
eor r8, r8, r8
|
||||
add r9, r0, #16
|
||||
stm r9, {r4, r5, r6, r7, r8}
|
||||
# Zero out r.
|
||||
add r9, r0, #0
|
||||
stm r9, {r4, r5, r6, r7}
|
||||
# Zero out padding.
|
||||
add r9, r0, #36
|
||||
stm r9, {r4, r5, r6, r7}
|
||||
pop {r4, r5, r6, r7, r8, r9, pc}
|
||||
.size poly1305_final,.-poly1305_final
|
||||
#endif /* HAVE_POLY1305 */
|
||||
#endif /* !__aarch64__ && __arm__ && !__thumb__ */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
||||
#endif /* !WOLFSSL_ARMASM_INLINE */
|
|
@ -0,0 +1,388 @@
|
|||
/* armv8-32-poly1305-asm
|
||||
*
|
||||
* Copyright (C) 2006-2024 wolfSSL Inc.
|
||||
*
|
||||
* This file is part of wolfSSL.
|
||||
*
|
||||
* wolfSSL is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* wolfSSL is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
|
||||
*/
|
||||
|
||||
/* Generated using (from wolfssl):
|
||||
* cd ../scripts
|
||||
* ruby ./poly1305/poly1305.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-poly1305-asm.c
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__)
|
||||
#include <stdint.h>
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
#ifdef WOLFSSL_ARMASM_INLINE
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__)
|
||||
|
||||
#ifdef __IAR_SYSTEMS_ICC__
|
||||
#define __asm__ asm
|
||||
#define __volatile__ volatile
|
||||
#endif /* __IAR_SYSTEMS_ICC__ */
|
||||
#ifdef __KEIL__
|
||||
#define __asm__ __asm
|
||||
#define __volatile__ volatile
|
||||
#endif /* __KEIL__ */
|
||||
#ifdef HAVE_POLY1305
|
||||
#include <wolfssl/wolfcrypt/poly1305.h>
|
||||
|
||||
void poly1305_blocks_arm32_16(Poly1305* ctx_p, const byte* m_p, word32 len_p, int notLast_p)
|
||||
{
|
||||
register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p;
|
||||
register const byte* m asm ("r1") = (const byte*)m_p;
|
||||
register word32 len asm ("r2") = (word32)len_p;
|
||||
register int notLast asm ("r3") = (int)notLast_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #28\n\t"
|
||||
"cmp %[len], #0\n\t"
|
||||
"beq L_poly1305_arm32_16_done_%=\n\t"
|
||||
"add lr, sp, #12\n\t"
|
||||
"stm lr, {%[ctx], %[m], %[len], %[notLast]}\n\t"
|
||||
/* Get h pointer */
|
||||
"add lr, %[ctx], #16\n\t"
|
||||
"ldm lr, {r4, r5, r6, r7, r8}\n\t"
|
||||
"\n"
|
||||
"L_poly1305_arm32_16_loop_%=: \n\t"
|
||||
/* Add m to h */
|
||||
"ldr %[m], [sp, #16]\n\t"
|
||||
"ldr %[len], [%[m]]\n\t"
|
||||
"ldr %[notLast], [%[m], #4]\n\t"
|
||||
"ldr r9, [%[m], #8]\n\t"
|
||||
"ldr r10, [%[m], #12]\n\t"
|
||||
"ldr r11, [sp, #24]\n\t"
|
||||
"adds r4, r4, %[len]\n\t"
|
||||
"adcs r5, r5, %[notLast]\n\t"
|
||||
"adcs r6, r6, r9\n\t"
|
||||
"adcs r7, r7, r10\n\t"
|
||||
"add %[m], %[m], #16\n\t"
|
||||
"adc r8, r8, r11\n\t"
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
|
||||
"stm lr, {r4, r5, r6, r7, r8}\n\t"
|
||||
#else
|
||||
/* h[0]-h[2] in r4-r6 for multiplication. */
|
||||
"str r7, [lr, #12]\n\t"
|
||||
"str r8, [lr, #16]\n\t"
|
||||
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */
|
||||
"str %[m], [sp, #16]\n\t"
|
||||
"ldr %[m], [sp, #12]\n\t"
|
||||
/* Multiply h by r */
|
||||
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
|
||||
/* r0 = #0, r1 = r, lr = h, r2 = h[j], r3 = r[i] */
|
||||
"ldr %[notLast], [%[m]]\n\t"
|
||||
"eor %[ctx], %[ctx], %[ctx]\n\t"
|
||||
/* r[0] * h[0] */
|
||||
/* h[0] in r4 */
|
||||
"umull r4, r5, %[notLast], r4\n\t"
|
||||
/* r[0] * h[2] */
|
||||
/* h[2] in r6 */
|
||||
"umull r6, r7, %[notLast], r6\n\t"
|
||||
/* r[0] * h[4] */
|
||||
/* h[4] in r8 */
|
||||
"mul r8, %[notLast], r8\n\t"
|
||||
/* r[0] * h[1] */
|
||||
"ldr %[len], [lr, #4]\n\t"
|
||||
"mov r12, %[ctx]\n\t"
|
||||
"umlal r5, r12, %[notLast], %[len]\n\t"
|
||||
/* r[0] * h[3] */
|
||||
"ldr %[len], [lr, #12]\n\t"
|
||||
"adds r6, r6, r12\n\t"
|
||||
"adc r7, r7, %[ctx]\n\t"
|
||||
"umlal r7, r8, %[notLast], %[len]\n\t"
|
||||
/* r[1] * h[0] */
|
||||
"ldr %[notLast], [%[m], #4]\n\t"
|
||||
"ldr %[len], [lr]\n\t"
|
||||
"mov r12, %[ctx]\n\t"
|
||||
"umlal r5, r12, %[notLast], %[len]\n\t"
|
||||
/* r[1] * h[1] */
|
||||
"ldr %[len], [lr, #4]\n\t"
|
||||
"adds r6, r6, r12\n\t"
|
||||
"adc r12, %[ctx], %[ctx]\n\t"
|
||||
"umlal r6, r12, %[notLast], %[len]\n\t"
|
||||
/* r[1] * h[2] */
|
||||
"ldr %[len], [lr, #8]\n\t"
|
||||
"adds r7, r7, r12\n\t"
|
||||
"adc r12, %[ctx], %[ctx]\n\t"
|
||||
"umlal r7, r12, %[notLast], %[len]\n\t"
|
||||
/* r[1] * h[3] */
|
||||
"ldr %[len], [lr, #12]\n\t"
|
||||
"adds r8, r8, r12\n\t"
|
||||
"adc r9, %[ctx], %[ctx]\n\t"
|
||||
"umlal r8, r9, %[notLast], %[len]\n\t"
|
||||
/* r[1] * h[4] */
|
||||
"ldr %[len], [lr, #16]\n\t"
|
||||
"mla r9, %[notLast], %[len], r9\n\t"
|
||||
/* r[2] * h[0] */
|
||||
"ldr %[notLast], [%[m], #8]\n\t"
|
||||
"ldr %[len], [lr]\n\t"
|
||||
"mov r12, %[ctx]\n\t"
|
||||
"umlal r6, r12, %[notLast], %[len]\n\t"
|
||||
/* r[2] * h[1] */
|
||||
"ldr %[len], [lr, #4]\n\t"
|
||||
"adds r7, r7, r12\n\t"
|
||||
"adc r12, %[ctx], %[ctx]\n\t"
|
||||
"umlal r7, r12, %[notLast], %[len]\n\t"
|
||||
/* r[2] * h[2] */
|
||||
"ldr %[len], [lr, #8]\n\t"
|
||||
"adds r8, r8, r12\n\t"
|
||||
"adc r12, %[ctx], %[ctx]\n\t"
|
||||
"umlal r8, r12, %[notLast], %[len]\n\t"
|
||||
/* r[2] * h[3] */
|
||||
"ldr %[len], [lr, #12]\n\t"
|
||||
"adds r9, r9, r12\n\t"
|
||||
"adc r10, %[ctx], %[ctx]\n\t"
|
||||
"umlal r9, r10, %[notLast], %[len]\n\t"
|
||||
/* r[2] * h[4] */
|
||||
"ldr %[len], [lr, #16]\n\t"
|
||||
"mla r10, %[notLast], %[len], r10\n\t"
|
||||
/* r[3] * h[0] */
|
||||
"ldr %[notLast], [%[m], #12]\n\t"
|
||||
"ldr %[len], [lr]\n\t"
|
||||
"mov r12, %[ctx]\n\t"
|
||||
"umlal r7, r12, %[notLast], %[len]\n\t"
|
||||
/* r[3] * h[1] */
|
||||
"ldr %[len], [lr, #4]\n\t"
|
||||
"adds r8, r8, r12\n\t"
|
||||
"adc r12, %[ctx], %[ctx]\n\t"
|
||||
"umlal r8, r12, %[notLast], %[len]\n\t"
|
||||
/* r[3] * h[2] */
|
||||
"ldr %[len], [lr, #8]\n\t"
|
||||
"adds r9, r9, r12\n\t"
|
||||
"adc r10, r10, %[ctx]\n\t"
|
||||
"umlal r9, r10, %[notLast], %[len]\n\t"
|
||||
/* r[3] * h[3] */
|
||||
"ldr %[len], [lr, #12]\n\t"
|
||||
"mov r11, %[ctx]\n\t"
|
||||
"umlal r10, r11, %[notLast], %[len]\n\t"
|
||||
/* r[3] * h[4] */
|
||||
"ldr %[len], [lr, #16]\n\t"
|
||||
"mov r12, %[ctx]\n\t"
|
||||
"mla r11, %[notLast], %[len], r11\n\t"
|
||||
#else
|
||||
"ldm %[m], {%[ctx], %[m], %[len], %[notLast]}\n\t"
|
||||
/* r[0] * h[0] */
|
||||
"umull r10, r11, %[ctx], r4\n\t"
|
||||
/* r[1] * h[0] */
|
||||
"umull r12, r7, %[m], r4\n\t"
|
||||
/* r[0] * h[1] */
|
||||
"umaal r11, r12, %[ctx], r5\n\t"
|
||||
/* r[2] * h[0] */
|
||||
"umull r8, r9, %[len], r4\n\t"
|
||||
/* r[1] * h[1] */
|
||||
"umaal r12, r8, %[m], r5\n\t"
|
||||
/* r[0] * h[2] */
|
||||
"umaal r12, r7, %[ctx], r6\n\t"
|
||||
/* r[3] * h[0] */
|
||||
"umaal r8, r9, %[notLast], r4\n\t"
|
||||
"stm sp, {r10, r11, r12}\n\t"
|
||||
/* r[2] * h[1] */
|
||||
"umaal r7, r8, %[len], r5\n\t"
|
||||
/* Replace h[0] with h[3] */
|
||||
"ldr r4, [lr, #12]\n\t"
|
||||
/* r[1] * h[2] */
|
||||
"umull r10, r11, %[m], r6\n\t"
|
||||
/* r[2] * h[2] */
|
||||
"umaal r8, r9, %[len], r6\n\t"
|
||||
/* r[0] * h[3] */
|
||||
"umaal r7, r10, %[ctx], r4\n\t"
|
||||
/* r[3] * h[1] */
|
||||
"umaal r8, r11, %[notLast], r5\n\t"
|
||||
/* r[1] * h[3] */
|
||||
"umaal r8, r10, %[m], r4\n\t"
|
||||
/* r[3] * h[2] */
|
||||
"umaal r9, r11, %[notLast], r6\n\t"
|
||||
/* r[2] * h[3] */
|
||||
"umaal r9, r10, %[len], r4\n\t"
|
||||
/* Replace h[1] with h[4] */
|
||||
"ldr r5, [lr, #16]\n\t"
|
||||
/* r[3] * h[3] */
|
||||
"umaal r10, r11, %[notLast], r4\n\t"
|
||||
"mov r12, #0\n\t"
|
||||
/* r[0] * h[4] */
|
||||
"umaal r8, r12, %[ctx], r5\n\t"
|
||||
/* r[1] * h[4] */
|
||||
"umaal r9, r12, %[m], r5\n\t"
|
||||
/* r[2] * h[4] */
|
||||
"umaal r10, r12, %[len], r5\n\t"
|
||||
/* r[3] * h[4] */
|
||||
"umaal r11, r12, %[notLast], r5\n\t"
|
||||
/* DONE */
|
||||
"ldm sp, {r4, r5, r6}\n\t"
|
||||
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */
|
||||
/* r12 will be zero because r is masked. */
|
||||
/* Load length */
|
||||
"ldr %[len], [sp, #20]\n\t"
|
||||
/* Reduce mod 2^130 - 5 */
|
||||
"bic %[notLast], r8, #3\n\t"
|
||||
"and r8, r8, #3\n\t"
|
||||
"adds r4, r4, %[notLast]\n\t"
|
||||
"lsr %[notLast], %[notLast], #2\n\t"
|
||||
"adcs r5, r5, r9\n\t"
|
||||
"orr %[notLast], %[notLast], r9, LSL #30\n\t"
|
||||
"adcs r6, r6, r10\n\t"
|
||||
"lsr r9, r9, #2\n\t"
|
||||
"adcs r7, r7, r11\n\t"
|
||||
"orr r9, r9, r10, LSL #30\n\t"
|
||||
"adc r8, r8, r12\n\t"
|
||||
"lsr r10, r10, #2\n\t"
|
||||
"adds r4, r4, %[notLast]\n\t"
|
||||
"orr r10, r10, r11, LSL #30\n\t"
|
||||
"adcs r5, r5, r9\n\t"
|
||||
"lsr r11, r11, #2\n\t"
|
||||
"adcs r6, r6, r10\n\t"
|
||||
"adcs r7, r7, r11\n\t"
|
||||
"adc r8, r8, r12\n\t"
|
||||
/* Sub 16 from length. */
|
||||
"subs %[len], %[len], #16\n\t"
|
||||
/* Store length. */
|
||||
"str %[len], [sp, #20]\n\t"
|
||||
/* Loop again if more message to do. */
|
||||
"bgt L_poly1305_arm32_16_loop_%=\n\t"
|
||||
"stm lr, {r4, r5, r6, r7, r8}\n\t"
|
||||
"\n"
|
||||
"L_poly1305_arm32_16_done_%=: \n\t"
|
||||
"add sp, sp, #28\n\t"
|
||||
: [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len), [notLast] "+r" (notLast)
|
||||
:
|
||||
: "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
static const uint32_t L_poly1305_arm32_clamp[] = {
|
||||
0x0fffffff, 0x0ffffffc, 0x0ffffffc, 0x0ffffffc,
|
||||
};
|
||||
|
||||
void poly1305_set_key(Poly1305* ctx_p, const byte* key_p)
|
||||
{
|
||||
register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p;
|
||||
register const byte* key asm ("r1") = (const byte*)key_p;
|
||||
register uint32_t* L_poly1305_arm32_clamp_c asm ("r2") = (uint32_t*)&L_poly1305_arm32_clamp;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
/* Load mask. */
|
||||
"mov lr, %[L_poly1305_arm32_clamp]\n\t"
|
||||
"ldm lr, {r6, r7, r8, r12}\n\t"
|
||||
/* Load and cache padding. */
|
||||
"ldr r2, [%[key], #16]\n\t"
|
||||
"ldr r3, [%[key], #20]\n\t"
|
||||
"ldr r4, [%[key], #24]\n\t"
|
||||
"ldr r5, [%[key], #28]\n\t"
|
||||
"add lr, %[ctx], #36\n\t"
|
||||
"stm lr, {r2, r3, r4, r5}\n\t"
|
||||
/* Load, mask and store r. */
|
||||
"ldr r2, [%[key]]\n\t"
|
||||
"ldr r3, [%[key], #4]\n\t"
|
||||
"ldr r4, [%[key], #8]\n\t"
|
||||
"ldr r5, [%[key], #12]\n\t"
|
||||
"and r2, r2, r6\n\t"
|
||||
"and r3, r3, r7\n\t"
|
||||
"and r4, r4, r8\n\t"
|
||||
"and r5, r5, r12\n\t"
|
||||
"add lr, %[ctx], #0\n\t"
|
||||
"stm lr, {r2, r3, r4, r5}\n\t"
|
||||
/* h (accumulator) = 0 */
|
||||
"eor r6, r6, r6\n\t"
|
||||
"eor r7, r7, r7\n\t"
|
||||
"eor r8, r8, r8\n\t"
|
||||
"eor r12, r12, r12\n\t"
|
||||
"add lr, %[ctx], #16\n\t"
|
||||
"eor r5, r5, r5\n\t"
|
||||
"stm lr, {r5, r6, r7, r8, r12}\n\t"
|
||||
/* Zero leftover */
|
||||
"str r5, [%[ctx], #52]\n\t"
|
||||
: [ctx] "+r" (ctx), [key] "+r" (key), [L_poly1305_arm32_clamp] "+r" (L_poly1305_arm32_clamp_c)
|
||||
:
|
||||
: "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
void poly1305_final(Poly1305* ctx_p, byte* mac_p)
|
||||
{
|
||||
register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p;
|
||||
register byte* mac asm ("r1") = (byte*)mac_p;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"add r9, %[ctx], #16\n\t"
|
||||
"ldm r9, {r4, r5, r6, r7, r8}\n\t"
|
||||
/* Add 5 and check for h larger than p. */
|
||||
"adds r2, r4, #5\n\t"
|
||||
"adcs r2, r5, #0\n\t"
|
||||
"adcs r2, r6, #0\n\t"
|
||||
"adcs r2, r7, #0\n\t"
|
||||
"adc r2, r8, #0\n\t"
|
||||
"sub r2, r2, #4\n\t"
|
||||
"lsr r2, r2, #31\n\t"
|
||||
"sub r2, r2, #1\n\t"
|
||||
"and r2, r2, #5\n\t"
|
||||
/* Add 0/5 to h. */
|
||||
"adds r4, r4, r2\n\t"
|
||||
"adcs r5, r5, #0\n\t"
|
||||
"adcs r6, r6, #0\n\t"
|
||||
"adc r7, r7, #0\n\t"
|
||||
/* Add padding */
|
||||
"add r9, %[ctx], #36\n\t"
|
||||
"ldm r9, {r2, r3, r12, lr}\n\t"
|
||||
"adds r4, r4, r2\n\t"
|
||||
"adcs r5, r5, r3\n\t"
|
||||
"adcs r6, r6, r12\n\t"
|
||||
"adc r7, r7, lr\n\t"
|
||||
/* Store MAC */
|
||||
"str r4, [%[mac]]\n\t"
|
||||
"str r5, [%[mac], #4]\n\t"
|
||||
"str r6, [%[mac], #8]\n\t"
|
||||
"str r7, [%[mac], #12]\n\t"
|
||||
/* Zero out h. */
|
||||
"eor r4, r4, r4\n\t"
|
||||
"eor r5, r5, r5\n\t"
|
||||
"eor r6, r6, r6\n\t"
|
||||
"eor r7, r7, r7\n\t"
|
||||
"eor r8, r8, r8\n\t"
|
||||
"add r9, %[ctx], #16\n\t"
|
||||
"stm r9, {r4, r5, r6, r7, r8}\n\t"
|
||||
/* Zero out r. */
|
||||
"add r9, %[ctx], #0\n\t"
|
||||
"stm r9, {r4, r5, r6, r7}\n\t"
|
||||
/* Zero out padding. */
|
||||
"add r9, %[ctx], #36\n\t"
|
||||
"stm r9, {r4, r5, r6, r7}\n\t"
|
||||
: [ctx] "+r" (ctx), [mac] "+r" (mac)
|
||||
:
|
||||
: "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
#endif /* HAVE_POLY1305 */
|
||||
#endif /* !__aarch64__ && __arm__ && !__thumb__ */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
#endif /* !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
||||
#endif /* WOLFSSL_ARMASM_INLINE */
|
|
@ -32,6 +32,8 @@
|
|||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__)
|
||||
#ifndef WOLFSSL_ARMASM_INLINE
|
||||
#ifdef WOLFSSL_SHA3
|
||||
#ifndef WOLFSSL_ARMASM_NO_NEON
|
||||
.text
|
||||
.type L_sha3_arm2_neon_rt, %object
|
||||
.size L_sha3_arm2_neon_rt, 192
|
||||
|
@ -85,60 +87,6 @@ L_sha3_arm2_neon_rt:
|
|||
.word 0x0
|
||||
.word 0x80008008
|
||||
.word 0x80000000
|
||||
.text
|
||||
.type L_sha3_arm2_rt, %object
|
||||
.size L_sha3_arm2_rt, 192
|
||||
.align 4
|
||||
L_sha3_arm2_rt:
|
||||
.word 0x1
|
||||
.word 0x0
|
||||
.word 0x8082
|
||||
.word 0x0
|
||||
.word 0x808a
|
||||
.word 0x80000000
|
||||
.word 0x80008000
|
||||
.word 0x80000000
|
||||
.word 0x808b
|
||||
.word 0x0
|
||||
.word 0x80000001
|
||||
.word 0x0
|
||||
.word 0x80008081
|
||||
.word 0x80000000
|
||||
.word 0x8009
|
||||
.word 0x80000000
|
||||
.word 0x8a
|
||||
.word 0x0
|
||||
.word 0x88
|
||||
.word 0x0
|
||||
.word 0x80008009
|
||||
.word 0x0
|
||||
.word 0x8000000a
|
||||
.word 0x0
|
||||
.word 0x8000808b
|
||||
.word 0x0
|
||||
.word 0x8b
|
||||
.word 0x80000000
|
||||
.word 0x8089
|
||||
.word 0x80000000
|
||||
.word 0x8003
|
||||
.word 0x80000000
|
||||
.word 0x8002
|
||||
.word 0x80000000
|
||||
.word 0x80
|
||||
.word 0x80000000
|
||||
.word 0x800a
|
||||
.word 0x0
|
||||
.word 0x8000000a
|
||||
.word 0x80000000
|
||||
.word 0x80008081
|
||||
.word 0x80000000
|
||||
.word 0x8080
|
||||
.word 0x80000000
|
||||
.word 0x80000001
|
||||
.word 0x0
|
||||
.word 0x80008008
|
||||
.word 0x80000000
|
||||
#ifndef WOLFSSL_ARMASM_NO_NEON
|
||||
.text
|
||||
.align 4
|
||||
.globl BlockSha3
|
||||
|
@ -407,6 +355,59 @@ L_sha3_arm32_neon_begin:
|
|||
.size BlockSha3,.-BlockSha3
|
||||
#endif /* WOLFSSL_ARMASM_NO_NEON */
|
||||
#ifdef WOLFSSL_ARMASM_NO_NEON
|
||||
.text
|
||||
.type L_sha3_arm2_rt, %object
|
||||
.size L_sha3_arm2_rt, 192
|
||||
.align 4
|
||||
L_sha3_arm2_rt:
|
||||
.word 0x1
|
||||
.word 0x0
|
||||
.word 0x8082
|
||||
.word 0x0
|
||||
.word 0x808a
|
||||
.word 0x80000000
|
||||
.word 0x80008000
|
||||
.word 0x80000000
|
||||
.word 0x808b
|
||||
.word 0x0
|
||||
.word 0x80000001
|
||||
.word 0x0
|
||||
.word 0x80008081
|
||||
.word 0x80000000
|
||||
.word 0x8009
|
||||
.word 0x80000000
|
||||
.word 0x8a
|
||||
.word 0x0
|
||||
.word 0x88
|
||||
.word 0x0
|
||||
.word 0x80008009
|
||||
.word 0x0
|
||||
.word 0x8000000a
|
||||
.word 0x0
|
||||
.word 0x8000808b
|
||||
.word 0x0
|
||||
.word 0x8b
|
||||
.word 0x80000000
|
||||
.word 0x8089
|
||||
.word 0x80000000
|
||||
.word 0x8003
|
||||
.word 0x80000000
|
||||
.word 0x8002
|
||||
.word 0x80000000
|
||||
.word 0x80
|
||||
.word 0x80000000
|
||||
.word 0x800a
|
||||
.word 0x0
|
||||
.word 0x8000000a
|
||||
.word 0x80000000
|
||||
.word 0x80008081
|
||||
.word 0x80000000
|
||||
.word 0x8080
|
||||
.word 0x80000000
|
||||
.word 0x80000001
|
||||
.word 0x0
|
||||
.word 0x80008008
|
||||
.word 0x80000000
|
||||
.text
|
||||
.align 4
|
||||
.globl BlockSha3
|
||||
|
@ -2391,6 +2392,7 @@ L_sha3_arm32_begin:
|
|||
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
.size BlockSha3,.-BlockSha3
|
||||
#endif /* WOLFSSL_ARMASM_NO_NEON */
|
||||
#endif /* WOLFSSL_SHA3 */
|
||||
#endif /* !__aarch64__ && __arm__ && !__thumb__ */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
||||
|
|
|
@ -51,6 +51,8 @@
|
|||
#define __asm__ __asm
|
||||
#define __volatile__ volatile
|
||||
#endif /* __KEIL__ */
|
||||
#ifdef WOLFSSL_SHA3
|
||||
#ifndef WOLFSSL_ARMASM_NO_NEON
|
||||
static const uint64_t L_sha3_arm2_neon_rt[] = {
|
||||
0x0000000000000001UL, 0x0000000000008082UL,
|
||||
0x800000000000808aUL, 0x8000000080008000UL,
|
||||
|
@ -66,29 +68,12 @@ static const uint64_t L_sha3_arm2_neon_rt[] = {
|
|||
0x0000000080000001UL, 0x8000000080008008UL,
|
||||
};
|
||||
|
||||
static const uint64_t L_sha3_arm2_rt[] = {
|
||||
0x0000000000000001UL, 0x0000000000008082UL,
|
||||
0x800000000000808aUL, 0x8000000080008000UL,
|
||||
0x000000000000808bUL, 0x0000000080000001UL,
|
||||
0x8000000080008081UL, 0x8000000000008009UL,
|
||||
0x000000000000008aUL, 0x0000000000000088UL,
|
||||
0x0000000080008009UL, 0x000000008000000aUL,
|
||||
0x000000008000808bUL, 0x800000000000008bUL,
|
||||
0x8000000000008089UL, 0x8000000000008003UL,
|
||||
0x8000000000008002UL, 0x8000000000000080UL,
|
||||
0x000000000000800aUL, 0x800000008000000aUL,
|
||||
0x8000000080008081UL, 0x8000000000008080UL,
|
||||
0x0000000080000001UL, 0x8000000080008008UL,
|
||||
};
|
||||
|
||||
#include <wolfssl/wolfcrypt/sha3.h>
|
||||
|
||||
#ifndef WOLFSSL_ARMASM_NO_NEON
|
||||
void BlockSha3(word64* state_p)
|
||||
{
|
||||
register word64* state asm ("r0") = (word64*)state_p;
|
||||
register uint64_t* L_sha3_arm2_neon_rt_c asm ("r1") = (uint64_t*)&L_sha3_arm2_neon_rt;
|
||||
register uint64_t* L_sha3_arm2_rt_c asm ("r2") = (uint64_t*)&L_sha3_arm2_rt;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #16\n\t"
|
||||
|
@ -348,16 +333,31 @@ void BlockSha3(word64* state_p)
|
|||
"vst1.8 {d20-d23}, [%[state]]!\n\t"
|
||||
"vst1.8 {d24}, [%[state]]\n\t"
|
||||
"add sp, sp, #16\n\t"
|
||||
: [state] "+r" (state), [L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c), [L_sha3_arm2_rt] "+r" (L_sha3_arm2_rt_c)
|
||||
: [state] "+r" (state), [L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c)
|
||||
:
|
||||
: "memory", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "cc"
|
||||
: "memory", "r2", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_ARMASM_NO_NEON */
|
||||
#ifdef WOLFSSL_ARMASM_NO_NEON
|
||||
static const uint64_t L_sha3_arm2_rt[] = {
|
||||
0x0000000000000001UL, 0x0000000000008082UL,
|
||||
0x800000000000808aUL, 0x8000000080008000UL,
|
||||
0x000000000000808bUL, 0x0000000080000001UL,
|
||||
0x8000000080008081UL, 0x8000000000008009UL,
|
||||
0x000000000000008aUL, 0x0000000000000088UL,
|
||||
0x0000000080008009UL, 0x000000008000000aUL,
|
||||
0x000000008000808bUL, 0x800000000000008bUL,
|
||||
0x8000000000008089UL, 0x8000000000008003UL,
|
||||
0x8000000000008002UL, 0x8000000000000080UL,
|
||||
0x000000000000800aUL, 0x800000008000000aUL,
|
||||
0x8000000080008081UL, 0x8000000000008080UL,
|
||||
0x0000000080000001UL, 0x8000000080008008UL,
|
||||
};
|
||||
|
||||
#include <wolfssl/wolfcrypt/sha3.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM_NO_NEON
|
||||
void BlockSha3(word64* state_p)
|
||||
{
|
||||
register word64* state asm ("r0") = (word64*)state_p;
|
||||
|
@ -2348,6 +2348,7 @@ void BlockSha3(word64* state_p)
|
|||
}
|
||||
|
||||
#endif /* WOLFSSL_ARMASM_NO_NEON */
|
||||
#endif /* WOLFSSL_SHA3 */
|
||||
#endif /* !__aarch64__ && __arm__ && !__thumb__ */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
#endif /* !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) */
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
|
||||
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON)
|
||||
#if defined(WOLFSSL_ARMASM)
|
||||
#ifdef HAVE_CHACHA
|
||||
|
||||
#include <wolfssl/wolfcrypt/chacha.h>
|
||||
|
@ -73,15 +73,43 @@
|
|||
* Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version
|
||||
* uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB.
|
||||
*/
|
||||
int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
|
||||
int wc_Chacha_SetIV(ChaCha* ctx, const byte* iv, word32 counter)
|
||||
{
|
||||
#ifndef __aarch64__
|
||||
int ret = 0;
|
||||
#ifdef CHACHA_AEAD_TEST
|
||||
word32 i;
|
||||
|
||||
printf("NONCE : ");
|
||||
if (iv != NULL) {
|
||||
for (i = 0; i < CHACHA_IV_BYTES; i++) {
|
||||
printf("%02x", iv[i]);
|
||||
}
|
||||
}
|
||||
printf("\n\n");
|
||||
#endif
|
||||
|
||||
/* Validate parameters. */
|
||||
if ((ctx == NULL) || (iv == NULL)) {
|
||||
ret = BAD_FUNC_ARG;
|
||||
}
|
||||
if (ret == 0) {
|
||||
/* No unused bytes to XOR into input. */
|
||||
ctx->left = 0;
|
||||
|
||||
/* Set counter and IV into state. */
|
||||
wc_chacha_setiv(ctx->X, iv, counter);
|
||||
}
|
||||
|
||||
return ret;
|
||||
#else
|
||||
word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */
|
||||
|
||||
#ifdef CHACHA_AEAD_TEST
|
||||
word32 i;
|
||||
printf("NONCE : ");
|
||||
for (i = 0; i < CHACHA_IV_BYTES; i++) {
|
||||
printf("%02x", inIv[i]);
|
||||
printf("%02x", iv[i]);
|
||||
}
|
||||
printf("\n\n");
|
||||
#endif
|
||||
|
@ -89,7 +117,7 @@ int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
|
|||
if (ctx == NULL)
|
||||
return BAD_FUNC_ARG;
|
||||
|
||||
XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
|
||||
XMEMCPY(temp, iv, CHACHA_IV_BYTES);
|
||||
|
||||
ctx->left = 0;
|
||||
ctx->X[CHACHA_IV_BYTES+0] = counter; /* block counter */
|
||||
|
@ -98,18 +126,54 @@ int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
|
|||
ctx->X[CHACHA_IV_BYTES+3] = LITTLE32(temp[2]); /* counter from nonce */
|
||||
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __aarch64__
|
||||
/* "expand 32-byte k" as unsigned 32 byte */
|
||||
static const word32 sigma[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
|
||||
/* "expand 16-byte k" as unsigned 16 byte */
|
||||
static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Key setup. 8 word iv (nonce)
|
||||
*/
|
||||
int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
|
||||
{
|
||||
#ifndef __aarch64__
|
||||
int ret = 0;
|
||||
|
||||
#ifdef CHACHA_AEAD_TEST
|
||||
printf("ChaCha key used :\n");
|
||||
if (key != NULL) {
|
||||
word32 i;
|
||||
for (i = 0; i < keySz; i++) {
|
||||
printf("%02x", key[i]);
|
||||
if ((i % 8) == 7)
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
printf("\n\n");
|
||||
#endif
|
||||
|
||||
/* Validate parameters. */
|
||||
if ((ctx == NULL) || (key == NULL)) {
|
||||
ret = BAD_FUNC_ARG;
|
||||
}
|
||||
else if ((keySz != (CHACHA_MAX_KEY_SZ / 2)) &&
|
||||
(keySz != CHACHA_MAX_KEY_SZ )) {
|
||||
ret = BAD_FUNC_ARG;
|
||||
}
|
||||
|
||||
if (ret == 0) {
|
||||
ctx->left = 0;
|
||||
|
||||
wc_chacha_setkey(ctx->X, key, keySz);
|
||||
}
|
||||
|
||||
return ret;
|
||||
#else
|
||||
const word32* constants;
|
||||
const byte* k;
|
||||
|
||||
|
@ -169,8 +233,10 @@ int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
|
|||
ctx->left = 0;
|
||||
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef WOLFSSL_ARMASM_NO_NEON
|
||||
static const word32 L_chacha20_neon_inc_first_word[] = {
|
||||
0x1,
|
||||
0x0,
|
||||
|
@ -2815,7 +2881,6 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
|
|||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Encrypt a stream of bytes
|
||||
*/
|
||||
|
@ -2862,40 +2927,68 @@ static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c,
|
|||
ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* API to encrypt/decrypt a message of any size.
|
||||
*/
|
||||
int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
|
||||
word32 msglen)
|
||||
word32 len)
|
||||
{
|
||||
#ifdef WOLFSSL_ARMASM_NO_NEON
|
||||
int ret = 0;
|
||||
|
||||
if ((ctx == NULL) || (output == NULL) || (input == NULL)) {
|
||||
ret = BAD_FUNC_ARG;
|
||||
}
|
||||
|
||||
/* Handle left over bytes from last block. */
|
||||
if ((ret == 0) && (len > 0) && (ctx->left > 0)) {
|
||||
byte* over = ((byte*)ctx->over) + CHACHA_CHUNK_BYTES - ctx->left;
|
||||
word32 l = min(len, ctx->left);
|
||||
|
||||
wc_chacha_use_over(over, output, input, l);
|
||||
|
||||
ctx->left -= l;
|
||||
input += l;
|
||||
output += l;
|
||||
len -= l;
|
||||
}
|
||||
|
||||
if ((ret == 0) && (len != 0)) {
|
||||
wc_chacha_crypt_bytes(ctx, output, input, len);
|
||||
}
|
||||
|
||||
return ret;
|
||||
#else
|
||||
if (ctx == NULL || output == NULL || input == NULL)
|
||||
return BAD_FUNC_ARG;
|
||||
|
||||
/* handle left overs */
|
||||
if (msglen > 0 && ctx->left > 0) {
|
||||
if (len > 0 && ctx->left > 0) {
|
||||
byte* out;
|
||||
word32 i;
|
||||
|
||||
out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left;
|
||||
for (i = 0; i < msglen && i < ctx->left; i++) {
|
||||
for (i = 0; i < len && i < ctx->left; i++) {
|
||||
output[i] = (byte)(input[i] ^ out[i]);
|
||||
}
|
||||
ctx->left -= i;
|
||||
|
||||
msglen -= i;
|
||||
len -= i;
|
||||
output += i;
|
||||
input += i;
|
||||
}
|
||||
|
||||
if (msglen == 0) {
|
||||
if (len == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
|
||||
wc_Chacha_encrypt_bytes(ctx, input, output, len);
|
||||
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* HAVE_CHACHA */
|
||||
#endif /* WOLFSSL_ARMASM && !WOLFSSL_ARMASM_NO_NEON */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
|
|
@ -32,7 +32,6 @@
|
|||
#include <wolfssl/wolfcrypt/types.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#ifdef __aarch64__
|
||||
|
||||
#ifdef HAVE_POLY1305
|
||||
#include <wolfssl/wolfcrypt/poly1305.h>
|
||||
|
@ -49,6 +48,8 @@
|
|||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#ifdef __aarch64__
|
||||
|
||||
static WC_INLINE void poly1305_blocks_aarch64_16(Poly1305* ctx,
|
||||
const unsigned char *m, size_t bytes)
|
||||
{
|
||||
|
@ -1118,6 +1119,127 @@ int wc_Poly1305Final(Poly1305* ctx, byte* mac)
|
|||
return 0;
|
||||
}
|
||||
|
||||
#endif /* HAVE_POLY1305 */
|
||||
#else
|
||||
#ifdef __thumb__
|
||||
/* Process 16 bytes of message at a time.
|
||||
*
|
||||
* @param [in] ctx Poly1305 context.
|
||||
* @param [in] m Message to process.
|
||||
* @param [in] bytes Length of message in bytes.
|
||||
*/
|
||||
void poly1305_blocks_thumb2(Poly1305* ctx, const unsigned char* m,
|
||||
size_t bytes)
|
||||
{
|
||||
poly1305_blocks_thumb2_16(ctx, m, bytes, 1);
|
||||
}
|
||||
|
||||
/* Process 16 bytes of message.
|
||||
*
|
||||
* @param [in] ctx Poly1305 context.
|
||||
* @param [in] m Message to process.
|
||||
*/
|
||||
void poly1305_block_thumb2(Poly1305* ctx, const unsigned char* m)
|
||||
{
|
||||
poly1305_blocks_thumb2_16(ctx, m, POLY1305_BLOCK_SIZE, 1);
|
||||
}
|
||||
#else
|
||||
/* Process 16 bytes of message at a time.
|
||||
*
|
||||
* @param [in] ctx Poly1305 context.
|
||||
* @param [in] m Message to process.
|
||||
* @param [in] bytes Length of message in bytes.
|
||||
*/
|
||||
void poly1305_blocks_arm32(Poly1305* ctx, const unsigned char* m, size_t bytes)
|
||||
{
|
||||
poly1305_blocks_arm32_16(ctx, m, bytes, 1);
|
||||
}
|
||||
|
||||
/* Process 16 bytes of message.
|
||||
*
|
||||
* @param [in] ctx Poly1305 context.
|
||||
* @param [in] m Message to process.
|
||||
*/
|
||||
void poly1305_block_arm32(Poly1305* ctx, const unsigned char* m)
|
||||
{
|
||||
poly1305_blocks_arm32_16(ctx, m, POLY1305_BLOCK_SIZE, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Set the key for the Poly1305 operation.
|
||||
*
|
||||
* @param [in] ctx Poly1305 context.
|
||||
* @param [in] key Key data to use.
|
||||
* @param [in] keySz Size of key in bytes. Must be 32.
|
||||
* @return 0 on success.
|
||||
* @return BAD_FUNC_ARG when ctx or key is NULL or keySz is not 32.
|
||||
*/
|
||||
int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
#ifdef CHACHA_AEAD_TEST
|
||||
word32 k;
|
||||
printf("Poly key used:\n");
|
||||
if (key != NULL) {
|
||||
for (k = 0; k < keySz; k++) {
|
||||
printf("%02x", key[k]);
|
||||
if ((k+1) % 8 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
/* Validate parameters. */
|
||||
if ((ctx == NULL) || (key == NULL) || (keySz != 32)) {
|
||||
ret = BAD_FUNC_ARG;
|
||||
}
|
||||
|
||||
if (ret == 0) {
|
||||
poly1305_set_key(ctx, key);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Finalize the Poly1305 operation calculating the MAC.
|
||||
*
|
||||
* @param [in] ctx Poly1305 context.
|
||||
* @param [in] mac Buffer to hold the MAC. Myst be at least 16 bytes long.
|
||||
* @return 0 on success.
|
||||
* @return BAD_FUNC_ARG when ctx or mac is NULL.
|
||||
*/
|
||||
int wc_Poly1305Final(Poly1305* ctx, byte* mac)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/* Validate parameters. */
|
||||
if ((ctx == NULL) || (mac == NULL)) {
|
||||
ret = BAD_FUNC_ARG;
|
||||
}
|
||||
|
||||
/* Process the remaining partial block - last block. */
|
||||
if (ret == 0) {
|
||||
if (ctx->leftover) {
|
||||
size_t i = ctx->leftover;
|
||||
ctx->buffer[i++] = 1;
|
||||
for (; i < POLY1305_BLOCK_SIZE; i++) {
|
||||
ctx->buffer[i] = 0;
|
||||
}
|
||||
#ifdef __thumb__
|
||||
poly1305_blocks_thumb2_16(ctx, ctx->buffer, POLY1305_BLOCK_SIZE,
|
||||
0);
|
||||
#else
|
||||
poly1305_blocks_arm32_16(ctx, ctx->buffer, POLY1305_BLOCK_SIZE, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
poly1305_final(ctx, mac);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* __aarch64__ */
|
||||
#endif /* HAVE_POLY1305 */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
|
|
@ -107,12 +107,18 @@ WOLFSSL_API int wc_XChacha_SetKey(ChaCha *ctx, const byte *key, word32 keySz,
|
|||
word32 counter);
|
||||
#endif
|
||||
|
||||
#if defined(WOLFSSL_ARMASM) && defined(__thumb__)
|
||||
#if defined(WOLFSSL_ARMASM)
|
||||
|
||||
#ifndef __aarch64__
|
||||
void wc_chacha_setiv(word32* x, const byte* iv, word32 counter);
|
||||
void wc_chacha_setkey(word32* x, const byte* key, word32 keySz);
|
||||
#endif
|
||||
|
||||
#if defined(WOLFSSL_ARMASM_NO_NEON) || defined(__thumb__)
|
||||
void wc_chacha_use_over(byte* over, byte* output, const byte* input,
|
||||
word32 len);
|
||||
void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -98,7 +98,7 @@ typedef struct Poly1305 {
|
|||
word64 leftover;
|
||||
unsigned char buffer[POLY1305_BLOCK_SIZE];
|
||||
unsigned char finished;
|
||||
#elif defined(WOLFSSL_ARMASM) && defined(__thumb__)
|
||||
#elif defined(WOLFSSL_ARMASM)
|
||||
word32 r[4];
|
||||
word32 h[5];
|
||||
word32 pad[4];
|
||||
|
@ -147,16 +147,16 @@ WOLFSSL_API int wc_Poly1305_EncodeSizes64(Poly1305* ctx, word64 aadSz,
|
|||
WOLFSSL_API int wc_Poly1305_MAC(Poly1305* ctx, const byte* additional,
|
||||
word32 addSz, const byte* input, word32 sz, byte* tag, word32 tagSz);
|
||||
|
||||
#if defined(__aarch64__ ) && defined(WOLFSSL_ARMASM)
|
||||
#if defined(WOLFSSL_ARMASM)
|
||||
#if defined(__aarch64__ )
|
||||
#define poly1305_blocks poly1305_blocks_aarch64
|
||||
#define poly1305_block poly1305_block_aarch64
|
||||
|
||||
void poly1305_blocks_aarch64(Poly1305* ctx, const unsigned char *m,
|
||||
size_t bytes);
|
||||
void poly1305_block_aarch64(Poly1305* ctx, const unsigned char *m);
|
||||
#endif
|
||||
|
||||
#if defined(__thumb__ ) && defined(WOLFSSL_ARMASM)
|
||||
#else
|
||||
#if defined(__thumb__)
|
||||
#define poly1305_blocks poly1305_blocks_thumb2
|
||||
#define poly1305_block poly1305_block_thumb2
|
||||
|
||||
|
@ -166,9 +166,20 @@ void poly1305_block_thumb2(Poly1305* ctx, const unsigned char *m);
|
|||
|
||||
void poly1305_blocks_thumb2_16(Poly1305* ctx, const unsigned char* m,
|
||||
word32 len, int notLast);
|
||||
#else
|
||||
#define poly1305_blocks poly1305_blocks_arm32
|
||||
#define poly1305_block poly1305_block_arm32
|
||||
|
||||
void poly1305_blocks_arm32(Poly1305* ctx, const unsigned char *m, size_t bytes);
|
||||
void poly1305_block_arm32(Poly1305* ctx, const unsigned char *m);
|
||||
|
||||
void poly1305_blocks_arm32_16(Poly1305* ctx, const unsigned char* m, word32 len,
|
||||
int notLast);
|
||||
#endif
|
||||
void poly1305_set_key(Poly1305* ctx, const byte* key);
|
||||
void poly1305_final(Poly1305* ctx, byte* mac);
|
||||
#endif
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
||||
#if defined(WOLFSSL_RISCV_ASM)
|
||||
#define poly1305_blocks poly1305_blocks_riscv64
|
||||
|
|
Loading…
Reference in New Issue