diff --git a/Makefile.am b/Makefile.am index d8225d167..3ce3118ef 100644 --- a/Makefile.am +++ b/Makefile.am @@ -198,7 +198,7 @@ endif include scripts/include.am if BUILD_LINUXKM -export KERNEL_ROOT KERNEL_ARCH KERNEL_OPT AM_CFLAGS CFLAGS AM_CCASFLAGS CCASFLAGS src_libwolfssl_la_OBJECTS ENABLED_CRYPT_TESTS +export KERNEL_ROOT KERNEL_ARCH KERNEL_OPT AM_CFLAGS CFLAGS AM_CCASFLAGS CCASFLAGS src_libwolfssl_la_OBJECTS ENABLED_CRYPT_TESTS ENABLED_ASM SUBDIRS = linuxkm endif diff --git a/configure.ac b/configure.ac index a62b04e12..3464283b7 100644 --- a/configure.ac +++ b/configure.ac @@ -279,6 +279,7 @@ if test "$ENABLED_ASM" = "no" then AM_CFLAGS="$AM_CFLAGS -DTFM_NO_ASM -DWOLFSSL_NO_ASM" fi +AC_SUBST([ENABLED_ASM]) # SINGLE THREADED diff --git a/linuxkm/Kbuild b/linuxkm/Kbuild index ab7211f04..2f63c6fee 100644 --- a/linuxkm/Kbuild +++ b/linuxkm/Kbuild @@ -10,6 +10,8 @@ ifeq "$(WOLFSSL_CFLAGS)" "" $(error $$WOLFSSL_CFLAGS is unset.) endif +WOLFSSL_CFLAGS += -Wframe-larger-than=$(MAX_STACK_FRAME_SIZE) -mpreferred-stack-boundary=4 + obj-m := libwolfssl.o WOLFSSL_OBJ_TARGETS=$(patsubst %, $(obj)/%, $(WOLFSSL_OBJ_FILES)) @@ -30,17 +32,42 @@ MAX_STACK_FRAME_SIZE=$(shell echo $$(( $(KERNEL_THREAD_STACK_SIZE) / 4))) libwolfssl-y := $(WOLFSSL_OBJ_FILES) linuxkm/module_hooks.o linuxkm/module_exports.o -ccflags-y = $(WOLFSSL_CFLAGS) -Wframe-larger-than=$(MAX_STACK_FRAME_SIZE) -mpreferred-stack-boundary=4 +ifeq "$(KERNEL_ARCH)" "x86" + WOLFSSL_CFLAGS_NO_VECTOR_INSNS ::= -mno-sse + ifeq "$(ENABLED_ASM)" "yes" + # x86 kernel disables fp and vector insns and register usage with + # "-mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-80387 -mno-fp-ret-in-387". + # reenable minimum subset of vector ops needed for compilation, + # while explicitly disabling auto-vectorization, and leave fp disabled. + # note that including -mavx here is known to introduce unaccommodated + # simd register ops, e.g. in integer.c:mp_exch() . + WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= -msse -mmmx -fno-builtin -fno-tree-vectorize -fno-tree-loop-vectorize -fno-tree-slp-vectorize + else + WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= + endif +else ifeq "$(KERNEL_ARCH)" "arm64" + WOLFSSL_CFLAGS_NO_VECTOR_INSNS ::= + WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= +# WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= $AM_CFLAGS -mno-general-regs-only -mno-fpu -fno-tree-vectorize -fno-tree-loop-vectorize -fno-tree-slp-vectorize +else ifeq "$(KERNEL_ARCH)" "arm" + WOLFSSL_CFLAGS_NO_VECTOR_INSNS ::= + WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= +# WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= $AM_CFLAGS -mno-general-regs-only -mno-fpu -fno-tree-vectorize -fno-tree-loop-vectorize -fno-tree-slp-vectorize +endif -%/libwolfssl.mod.o: ccflags-y := -%/test.o: ccflags-y += -DNO_MAIN_DRIVER +ccflags-y = $(WOLFSSL_CFLAGS) $(WOLFSSL_CFLAGS_NO_VECTOR_INSNS) + +$(obj)/libwolfssl.mod.o: ccflags-y := +$(obj)/wolfcrypt/test/test.o: ccflags-y += -DNO_MAIN_DRIVER + +$(obj)/wolfcrypt/src/aes.o: ccflags-y = $(WOLFSSL_CFLAGS) $(WOLFSSL_CFLAGS_YES_VECTOR_INSNS) asflags-y := $(WOLFSSL_ASFLAGS) # these two _asms are kernel-compatible (they don't reference the pic-related _GLOBAL_OFFSET_TABLE_) # but they still irritate objtool: "unannotated intra-function call" and "BP used as a scratch register" -%/aes_asm.o: OBJECT_FILES_NON_STANDARD := y -%/aes_gcm_asm.o: OBJECT_FILES_NON_STANDARD := y +$(obj)/wolfcrypt/src/aes_asm.o: OBJECT_FILES_NON_STANDARD := y +$(obj)/wolfcrypt/src/aes_gcm_asm.o: OBJECT_FILES_NON_STANDARD := y # auto-generate the exported symbol list, leveraging the WOLFSSL_API visibility tags. # exclude symbols that don't match wc_* or wolf*. diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index c9c6f50da..023fd53a5 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -777,6 +777,8 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits nr = temp_key.rounds; aes->rounds = nr; + SAVE_VECTOR_REGISTERS(); + Key_Schedule[nr] = Temp_Key_Schedule[0]; Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]); Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]); @@ -800,6 +802,8 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits Key_Schedule[0] = Temp_Key_Schedule[nr]; + RESTORE_VECTOR_REGISTERS(); + return 0; } #endif /* HAVE_AES_DECRYPT */ @@ -1696,12 +1700,12 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN)); - XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE); SAVE_VECTOR_REGISTERS(); + XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE); AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE, (byte*)aes->key, aes->rounds); - RESTORE_VECTOR_REGISTERS(); XMEMCPY(outBlock, tmp_align, AES_BLOCK_SIZE); + RESTORE_VECTOR_REGISTERS(); XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); return; #else @@ -1995,9 +1999,9 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) #endif /* if input and output same will overwrite input iv */ + SAVE_VECTOR_REGISTERS(); if ((const byte*)aes->tmp != inBlock) XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE); - SAVE_VECTOR_REGISTERS(); AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key, aes->rounds); RESTORE_VECTOR_REGISTERS(); @@ -7268,15 +7272,17 @@ int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, #ifdef WOLFSSL_AESNI if (haveAESNI && aes->use_aesni) { while (inSz >= AES_BLOCK_SIZE * 4) { + SAVE_VECTOR_REGISTERS(); + AesCcmCtrIncSet4(B, lenSz); - SAVE_VECTOR_REGISTERS(); AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key, aes->rounds); - RESTORE_VECTOR_REGISTERS(); xorbuf(A, in, AES_BLOCK_SIZE * 4); XMEMCPY(out, A, AES_BLOCK_SIZE * 4); + RESTORE_VECTOR_REGISTERS(); + inSz -= AES_BLOCK_SIZE * 4; in += AES_BLOCK_SIZE * 4; out += AES_BLOCK_SIZE * 4; @@ -7352,15 +7358,18 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, #ifdef WOLFSSL_AESNI if (haveAESNI && aes->use_aesni) { while (oSz >= AES_BLOCK_SIZE * 4) { + SAVE_VECTOR_REGISTERS(); + AesCcmCtrIncSet4(B, lenSz); - SAVE_VECTOR_REGISTERS(); AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key, aes->rounds); - RESTORE_VECTOR_REGISTERS(); + xorbuf(A, in, AES_BLOCK_SIZE * 4); XMEMCPY(o, A, AES_BLOCK_SIZE * 4); + RESTORE_VECTOR_REGISTERS(); + oSz -= AES_BLOCK_SIZE * 4; in += AES_BLOCK_SIZE * 4; o += AES_BLOCK_SIZE * 4; diff --git a/wolfssl/wolfcrypt/wc_port.h b/wolfssl/wolfcrypt/wc_port.h index ae68674f1..8ebc0808a 100644 --- a/wolfssl/wolfcrypt/wc_port.h +++ b/wolfssl/wolfcrypt/wc_port.h @@ -88,7 +88,13 @@ #endif #include #include - #include + #if defined(WOLFSSL_AESNI) || defined(USE_INTEL_SPEEDUP) || defined(WOLFSSL_ARMASM) + #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) + #include + #else + #include + #endif + #endif _Pragma("GCC diagnostic pop"); /* remove this multifariously conflicting macro, picked up from