Added Benchmark script. Added ARMASM support.

pull/513/head
Daniele Lacamera 2024-10-23 13:15:40 +02:00
parent 518909ee18
commit 72a0aa3853
10 changed files with 207 additions and 33 deletions

View File

@ -70,28 +70,28 @@ jobs:
with:
arch: arm
config-file: ./config/examples/imx-rt1040.config
make-args: PKA=1
make-args: PKA=1 NO_ARM_ASM=1
imx_rt1050_test_pka:
uses: ./.github/workflows/test-build-mcux-sdk.yml
with:
arch: arm
config-file: ./config/examples/imx-rt1050.config
make-args: PKA=1
make-args: PKA=1 NO_ARM_ASM=1
imx_rt1060_test_pka:
uses: ./.github/workflows/test-build-mcux-sdk.yml
with:
arch: arm
config-file: ./config/examples/imx-rt1060.config
make-args: PKA=1
make-args: PKA=1 NO_ARM_ASM=1
imx_rt1064_test_pka:
uses: ./.github/workflows/test-build-mcux-sdk.yml
with:
arch: arm
config-file: ./config/examples/imx-rt1064.config
make-args: PKA=1
make-args: PKA=1 NO_ARM_ASM=1
kinetis_k64f_test:
uses: ./.github/workflows/test-build-mcux-sdk.yml

View File

@ -318,6 +318,7 @@ clean:
$(Q)rm -f $(MACHINE_OBJ) $(MAIN_TARGET) $(LSCRIPT)
$(Q)rm -f $(OBJS)
$(Q)rm -f tools/keytools/otp/otp-keystore-gen
$(Q)rm -f .stack_usage
$(Q)$(MAKE) -C test-app -s clean
$(Q)$(MAKE) -C tools/check_config -s clean
$(Q)$(MAKE) -C stage1 -s clean
@ -385,6 +386,13 @@ line-count-nrf52:
line-count-x86:
cloc --force-lang-def cloc_lang_def.txt src/boot_x86_fsp.c src/boot_x86_fsp_payload.c src/boot_x86_fsp_start.S src/image.c src/keystore.c src/libwolfboot.c src/loader.c src/string.c src/update_disk.c src/x86/ahci.c src/x86/ata.c src/x86/common.c src/x86/gpt.c src/x86/hob.c src/pci.c src/x86/tgl_fsp.c hal/x86_fsp_tgl.c hal/x86_uart.c
stack-usage: wolfboot.bin
$(Q)echo $(STACK_USAGE) > .stack_usage
image-header-size: wolfboot.bin
$(Q)echo $(IMAGE_HEADER_SIZE) > .image_header_size
cppcheck:
cppcheck -f --enable=warning --enable=portability \
--suppress="ctunullpointer" --suppress="nullPointer" \

55
arch.mk
View File

@ -188,7 +188,8 @@ ifeq ($(ARCH),ARM)
ifeq ($(CORTEX_A5),1)
FPU=-mfpu=vfp4-d16
CFLAGS+=-mcpu=cortex-a5 -mtune=cortex-a5 -static -z noexecstack
LDLAGS+=-mcpu=cortex-a5 -mtune=cortex-a5 -mtune=cortex-a5 -static -z noexecstack -Ttext 0x300000
LDLAGS+=-mcpu=cortex-a5 -mtune=cortex-a5 -mtune=cortex-a5 -static \
-z noexecstack -Ttext 0x300000
# Cortex-A uses boot_arm32.o
OBJS+=src/boot_arm32.o src/boot_arm32_start.o
ifeq ($(NO_ASM),1)
@ -198,11 +199,37 @@ ifeq ($(CORTEX_A5),1)
OBJS+=./lib/wolfssl/wolfcrypt/src/port/arm/armv8-sha256.o
OBJS+=./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm.o
OBJS+=./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.o
CFLAGS+=-DWOLFSSL_SP_ARM32_ASM -DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO -DWOLFSSL_ARM_ARCH=7 -DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON
CFLAGS+=-DWOLFSSL_SP_ARM32_ASM -DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO \
-DWOLFSSL_ARM_ARCH=7 -DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON
endif
else
# All others use boot_arm.o
OBJS+=src/boot_arm.o
ifneq ($(NO_ARM_ASM),1)
CORTEXM_ARM_EXTRA_OBJS= \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-aes.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-chacha.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-sha256.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-sha512.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-aes-asm.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha3-asm.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-chacha-asm.o \
./lib/wolfssl/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.o
CORTEXM_ARM_THUMB_EXTRA_OBJS= \
./lib/wolfssl/wolfcrypt/src/port/arm/thumb2-sha256-asm.o \
./lib/wolfssl/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.o
CORTEXM_ARM_EXTRA_CFLAGS+=-DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO \
-DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON
endif
ifeq ($(CORTEX_M33),1)
CFLAGS+=-mcpu=cortex-m33 -DCORTEX_M33
LDFLAGS+=-mcpu=cortex-m33
@ -212,28 +239,25 @@ else
endif
CFLAGS+=-mcmse
ifeq ($(WOLFCRYPT_TZ),1)
CORTEXM_ARM_EXTRA_OBJS=
CORTEXM_ARM_EXTRA_CFLAGS=
SECURE_OBJS+=./src/wc_callable.o
SECURE_OBJS+=./lib/wolfssl/wolfcrypt/src/random.o
CFLAGS+=-DWOLFCRYPT_SECURE_MODE
SECURE_LDFLAGS+=-Wl,--cmse-implib -Wl,--out-implib=./src/wc_secure_calls.o
endif
endif # TZEN=1
ifeq ($(NO_ASM),1)
ifeq ($(SPMATH),1)
ifeq ($(NO_ASM),1)
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_c32.o
else
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o
CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=8
OBJS+=$(CORTEXM_ARM_EXTRA_OBJS)
endif
endif
else
ifeq ($(SPMATH),1)
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o
endif
endif
else
ifeq ($(CORTEX_M7),1)
CFLAGS+=-mcpu=cortex-m7
LDFLAGS+=-mcpu=cortex-m7
@ -243,10 +267,12 @@ else
else
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o
CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=7
OBJS+=$(CORTEXM_ARM_EXTRA_OBJS)
endif
endif
endif
else
ifeq ($(CORTEX_M0),1)
ifeq ($(CORTEX_M0),1)
CFLAGS+=-mcpu=cortex-m0
LDFLAGS+=-mcpu=cortex-m0
ifeq ($(SPMATH),1)
@ -255,6 +281,9 @@ else
else
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_THUMB_ASM
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_armthumb.o
# TODO: integrate thumb2-asm
#CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=6
#OBJS+=$(CORTEXM_ARM_THUMB_EXTRA_OBJS)
endif
endif
else
@ -269,6 +298,8 @@ else
ifeq ($(SPMATH),1)
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM -DWOLFSSL_SP_NO_UMAAL
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o
CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=7
OBJS+=$(CORTEXM_ARM_EXTRA_OBJS)
endif
endif
else
@ -284,6 +315,8 @@ else
ifeq ($(SPMATH),1)
CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM
MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o
CFLAGS+=$(CORTEXM_ARM_EXTRA_CFLAGS) -DWOLFSSL_ARM_ARCH=7
OBJS+=$(CORTEXM_ARM_EXTRA_OBJS)
endif
endif
endif

View File

@ -42,6 +42,28 @@ By default, wolfBoot is compiled for ARM Cortex-M3/4/7. To compile for Cortex-M0
`CORTEX_M0=1`
### Speed vs. size
On a number of targets, algorithm may be optimized automatically to use assembly
optimizations. To disable assembly optimizations, use `NO_ASM=1`. This option will
produce smaller code, but will also impact on the boot time.
ARM-specific ARM optimizations affecting hash and symmetric key ciphers can be
disabled with the option `NO_ARM_ASM=1`. This is useful for example when you want
to use SP math optimizations for key verification, but exclude SHA2/AES optimizations
to save some space.
#### Example: ECC256 + SHA256 on STM32H7
Benchmark footprint vs. boot time SHA of 100KB image + signature verification
| Description | Selected options | wolfBoot size (B) | Boot time (s) |
|-------------|------------------|-------------------|---------------|
| Full ECC256 assembly optimizations. Fastest. | `SIGN=ECC256` | 21836 | .583 |
| Optimize ECC only (SP math assembly only) | `SIGN=ECC256 NO_ARM_ASM=1` | 18624 | .760 |
| No assembly optimizations (smallest) | `SIGN=ECC256 NO_ASM=1` | 14416 | 3.356 |
### Flash partitions
The file [include/target.h](../include/target.h) is generated according to the configured flash geometry,

View File

@ -75,8 +75,8 @@ extern int tolower(int c);
# define ED25519_SMALL
# define NO_ED25519_SIGN
# define NO_ED25519_EXPORT
# define WOLFSSL_SHA512
# define USE_SLOW_SHA512
# define WOLFSSL_SHA512
#endif
/* ED448 and SHA3/SHAKE256 */
@ -267,6 +267,9 @@ extern int tolower(int c);
!defined(WOLFCRYPT_SECURE_MODE)
# define NO_SHA256
# endif
#ifndef WOLFSSL_SHA512
#define WOLFSSL_SHA512
#endif
#endif
/* If SP math is enabled determine word size */
@ -499,4 +502,8 @@ extern int tolower(int c);
#endif /* WOLFBOOT_PKCS11_APP */
#ifndef XTOLOWER
#define XTOLOWER(x) (x)
#endif
#endif /* !_WOLFBOOT_USER_SETTINGS_H_ */

View File

@ -448,7 +448,7 @@ ifeq ($(SIGN),XMSS)
ifeq ($(WOLFBOOT_SMALL_STACK),1)
$(error WOLFBOOT_SMALL_STACK with XMSS not supported)
else
STACK_USAGE=2720
STACK_USAGE=9352
endif
endif

View File

@ -362,6 +362,9 @@ void uart_print(const char *s)
}
}
#define FILLER_SIZE (100 * 1024)
static volatile uint8_t filler_data[FILLER_SIZE] = { 0x01, 0x02, 0x03 };
void main(void)
{
uint8_t firmware_version = 0;
@ -373,6 +376,7 @@ void main(void)
if (FIRMWARE_A)
ld3_write(LED_INIT);
filler_data[FILLER_SIZE - 1] = 0xAA;
/* LED Indicator of successful UART initialization. SUCCESS = ON, FAIL = OFF */
if (uart_setup(115200) < 0)
ld2_write(LED_OFF);

View File

@ -23,6 +23,7 @@ ifeq ($(ARCH),)
CORTEX_M7?=0
CORTEX_M3?=0
NO_ASM?=0
NO_ARM_ASM?=0
EXT_FLASH?=0
SPI_FLASH?=0
QSPI_FLASH?=0
@ -104,5 +105,6 @@ CONFIG_VARS:= ARCH TARGET SIGN HASH MCUXSDK MCUXPRESSO MCUXPRESSO_CPU MCUXPRESSO
NXP_CUSTOM_DCD NXP_CUSTOM_DCD_OBJS \
FLASH_OTP_KEYSTORE \
KEYVAULT_OBJ_SIZE \
KEYVAULT_MAX_ITEMS
KEYVAULT_MAX_ITEMS \
NO_ARM_ASM

View File

@ -0,0 +1,98 @@
#!/bin/bash
#
function run_on_board() {
# GPIO2: RST
# GPIO3: BOOT (input)
if ! (st-flash reset &>/dev/null); then
echo -n "No data."
else
sleep 1
st-flash --connect-under-reset write factory.bin 0x8000000 &>/dev/null
sleep .2
echo "2" > /sys/class/gpio/export 2>/dev/null
echo "out" > /sys/class/gpio/gpio2/direction
echo "1" > /sys/class/gpio/gpio2/value # Release reset
echo "0" > /sys/class/gpio/gpio2/value # Keep reset low
sleep 1
echo -n " | "
echo "1" > /sys/class/gpio/gpio2/value # Release reset
START=`date +%s.%N`
while (test `cat /sys/class/gpio/gpio4/value` -eq 0); do
sleep .01
done
while (test `cat /sys/class/gpio/gpio4/value` -eq 0); do
sleep .01
done
END=`date +%s.%N`
echo "scale=3; $END/1 - $START/1 "| bc
echo "in" > /sys/class/gpio/gpio2/direction
echo "2" >/sys/class/gpio/unexport 2>/dev/null
fi
}
function set_benchmark {
NAME=$1
shift
CONFIG=$@
# Name
echo -n "| "
echo -n $NAME
echo -n " | "
# Configuration
echo -n $CONFIG | tr -d '\n'
echo -n " | "
make clean &>/dev/null
make keysclean &>/dev/null
make $@ factory.bin &>/dev/null || make $@ factory.bin
make $@ stack-usage &>/dev/null
make $@ image-header-size &>/dev/null
# Bootloader size
echo -n `ls -l wolfboot.bin | cut -d " " -f 5 | tr -d '\n'`
echo -n " | "
# Stack size
cat .stack_usage | tr -d '\n'
echo -n " | "
# Image header size
cat .image_header_size | tr -d '\n'
# Boot time
run_on_board 2>&1 | tr -d '\n'
echo " |"
}
echo "4" > /sys/class/gpio/export 2>/dev/null
echo "2" > /sys/class/gpio/unexport 2>/dev/null
make keytools &>/dev/null
cp config/examples/stm32h7.config .config
echo "in" > /sys/class/gpio/gpio4/direction
# Output benchmark results in a Markdown table
echo "| Name | Configuration | Bootloader size | Stack size | Image header size | Boot time |"
echo "|------|---------------|-----------------|------------|-------------------|-----------|"
set_benchmark "SHA2 only" SIGN=NONE
set_benchmark "SHA384 only" SIGN=NONE HASH=SHA384
set_benchmark "SHA3 only" SIGN=NONE HASH=SHA3
set_benchmark "SHA2 only,small" SIGN=NONE NO_ASM=1
set_benchmark "rsa2048" SIGN=RSA2048
set_benchmark "rsa3072" SIGN=RSA3072
set_benchmark "rsa4096" SIGN=RSA4096
set_benchmark "rsa4096 with sha384" SIGN=RSA4096 HASH=SHA384
set_benchmark "ecdsa256" SIGN=ECC256
set_benchmark "ecdsa384" SIGN=ECC384
set_benchmark "ecdsa521" SIGN=ECC521
set_benchmark "ecdsa256 with small stack" SIGN=ECC384 WOLFBOOT_SMALL_STACK=1
set_benchmark "ecdsa256 with fast math" SIGN=ECC384 SP_MATH=0
set_benchmark "ecdsa256, no asm" SIGN=ECC256 NO_ASM=1
set_benchmark "ecdsa384, no asm" SIGN=ECC384 NO_ASM=1
set_benchmark "ecdsa521, no asm" SIGN=ECC521 NO_ASM=1
set_benchmark "ecdsa384 with sha384" SIGN=ECC384 HASH=SHA384
set_benchmark "ed25519 with sha384, small" SIGN=ED25519 HASH=SHA384 NO_ASM=1
set_benchmark "ed25519 fast" SIGN=ED25519 NO_ASM=0
set_benchmark "ed448" SIGN=ED448
set_benchmark "ML_DSA-44" SIGN=ML_DSA ML_DSA_LEVEL=2 IMAGE_SIGNATURE_SIZE=2420 IMAGE_HEADER_SIZE=8192
set_benchmark "ML_DSA-65" SIGN=ML_DSA ML_DSA_LEVEL=3 IMAGE_SIGNATURE_SIZE=3309 IMAGE_HEADER_SIZE=8192
set_benchmark "ML_DSA-87" SIGN=ML_DSA ML_DSA_LEVEL=5 IMAGE_SIGNATURE_SIZE=4627 IMAGE_HEADER_SIZE=12288
set_benchmark "LMS 1-10-8" SIGN=LMS LMS_LEVELS=1 LMS_HEIGHT=10 LMS_WINTERNITZ=8 IMAGE_HEADER_SIZE=4096 IMAGE_SIGNATURE_SIZE=1456
set_benchmark "XMSS-SHA2_10_256'" XMSS_PARAMS='XMSS-SHA2_10_256' SIGN=XMSS IMAGE_SIGNATURE_SIZE=2500 IMAGE_HEADER_SIZE=8192

View File

@ -986,37 +986,37 @@ test-all: clean
test-size-all:
make test-size SIGN=NONE LIMIT=4816
make test-size SIGN=NONE LIMIT=4816 NO_ARM_ASM=1
make keysclean
make test-size SIGN=ED25519 LIMIT=11396
make test-size SIGN=ED25519 LIMIT=11396 NO_ARM_ASM=1
make keysclean
make test-size SIGN=ECC256 LIMIT=17936
make test-size SIGN=ECC256 LIMIT=17936 NO_ARM_ASM=1
make clean
make test-size SIGN=ECC256 NO_ASM=1 LIMIT=13480
make test-size SIGN=ECC256 NO_ASM=1 LIMIT=13480 NO_ARM_ASM=1
make keysclean
make test-size SIGN=RSA2048 LIMIT=11212
make test-size SIGN=RSA2048 LIMIT=11212 NO_ARM_ASM=1
make clean
make test-size SIGN=RSA2048 NO_ASM=1 LIMIT=11788
make test-size SIGN=RSA2048 NO_ASM=1 LIMIT=11788 NO_ARM_ASM=1
make keysclean
make test-size SIGN=RSA4096 LIMIT=11500
make test-size SIGN=RSA4096 LIMIT=11500 NO_ARM_ASM=1
make clean
make test-size SIGN=RSA4096 NO_ASM=1 LIMIT=12076
make test-size SIGN=RSA4096 NO_ASM=1 LIMIT=12076 NO_ARM_ASM=1
make keysclean
make test-size SIGN=ECC384 LIMIT=17504
make test-size SIGN=ECC384 LIMIT=17504 NO_ARM_ASM=1
make clean
make test-size SIGN=ECC384 NO_ASM=1 LIMIT=14872
make test-size SIGN=ECC384 NO_ASM=1 LIMIT=14872 NO_ARM_ASM=1
make keysclean
make test-size SIGN=ED448 LIMIT=13408
make test-size SIGN=ED448 LIMIT=13408 NO_ARM_ASM=1
make keysclean
make test-size SIGN=RSA3072 LIMIT=11352
make test-size SIGN=RSA3072 LIMIT=11352 NO_ARM_ASM=1
make clean
make test-size SIGN=RSA3072 NO_ASM=1 LIMIT=11892
make test-size SIGN=RSA3072 NO_ASM=1 LIMIT=11892 NO_ARM_ASM=1
make keysclean
make test-size SIGN=LMS LMS_LEVELS=2 LMS_HEIGHT=5 LMS_WINTERNITZ=8 \
WOLFBOOT_SMALL_STACK=0 IMAGE_SIGNATURE_SIZE=2644 \
IMAGE_HEADER_SIZE?=5288 LIMIT=7504
IMAGE_HEADER_SIZE?=5288 LIMIT=7504 NO_ARM_ASM=1
make keysclean
make test-size SIGN=XMSS XMSS_PARAMS='XMSS-SHA2_10_256' \
IMAGE_SIGNATURE_SIZE=2500 IMAGE_HEADER_SIZE?=4096 \
LIMIT=8220
LIMIT=8220 NO_ARM_ASM=1
make keysclean