linuxkm: override-disable SIMD instructions for all .c.o's, with exceptions enumerated in Kbuild (currently only aes.c), and couple -msse with -fno-builtin-functions; export ENABLED_ASM for use as a pivot in Kbuild; use asm/i387.h, not asm/simd.h, for kernel_fpu_{begin,end}() protos.

pull/3244/head
Daniel Pouzzner 2020-08-31 18:37:04 -05:00
parent 3626332334
commit cd14cfb092
5 changed files with 57 additions and 14 deletions

View File

@ -198,7 +198,7 @@ endif
include scripts/include.am
if BUILD_LINUXKM
export KERNEL_ROOT KERNEL_ARCH KERNEL_OPT AM_CFLAGS CFLAGS AM_CCASFLAGS CCASFLAGS src_libwolfssl_la_OBJECTS ENABLED_CRYPT_TESTS
export KERNEL_ROOT KERNEL_ARCH KERNEL_OPT AM_CFLAGS CFLAGS AM_CCASFLAGS CCASFLAGS src_libwolfssl_la_OBJECTS ENABLED_CRYPT_TESTS ENABLED_ASM
SUBDIRS = linuxkm
endif

View File

@ -279,6 +279,7 @@ if test "$ENABLED_ASM" = "no"
then
AM_CFLAGS="$AM_CFLAGS -DTFM_NO_ASM -DWOLFSSL_NO_ASM"
fi
AC_SUBST([ENABLED_ASM])
# SINGLE THREADED

View File

@ -10,6 +10,8 @@ ifeq "$(WOLFSSL_CFLAGS)" ""
$(error $$WOLFSSL_CFLAGS is unset.)
endif
WOLFSSL_CFLAGS += -Wframe-larger-than=$(MAX_STACK_FRAME_SIZE) -mpreferred-stack-boundary=4
obj-m := libwolfssl.o
WOLFSSL_OBJ_TARGETS=$(patsubst %, $(obj)/%, $(WOLFSSL_OBJ_FILES))
@ -30,17 +32,42 @@ MAX_STACK_FRAME_SIZE=$(shell echo $$(( $(KERNEL_THREAD_STACK_SIZE) / 4)))
libwolfssl-y := $(WOLFSSL_OBJ_FILES) linuxkm/module_hooks.o linuxkm/module_exports.o
ccflags-y = $(WOLFSSL_CFLAGS) -Wframe-larger-than=$(MAX_STACK_FRAME_SIZE) -mpreferred-stack-boundary=4
ifeq "$(KERNEL_ARCH)" "x86"
WOLFSSL_CFLAGS_NO_VECTOR_INSNS ::= -mno-sse
ifeq "$(ENABLED_ASM)" "yes"
# x86 kernel disables fp and vector insns and register usage with
# "-mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-80387 -mno-fp-ret-in-387".
# reenable minimum subset of vector ops needed for compilation,
# while explicitly disabling auto-vectorization, and leave fp disabled.
# note that including -mavx here is known to introduce unaccommodated
# simd register ops, e.g. in integer.c:mp_exch() .
WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= -msse -mmmx -fno-builtin -fno-tree-vectorize -fno-tree-loop-vectorize -fno-tree-slp-vectorize
else
WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::=
endif
else ifeq "$(KERNEL_ARCH)" "arm64"
WOLFSSL_CFLAGS_NO_VECTOR_INSNS ::=
WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::=
# WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= $AM_CFLAGS -mno-general-regs-only -mno-fpu -fno-tree-vectorize -fno-tree-loop-vectorize -fno-tree-slp-vectorize
else ifeq "$(KERNEL_ARCH)" "arm"
WOLFSSL_CFLAGS_NO_VECTOR_INSNS ::=
WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::=
# WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= $AM_CFLAGS -mno-general-regs-only -mno-fpu -fno-tree-vectorize -fno-tree-loop-vectorize -fno-tree-slp-vectorize
endif
%/libwolfssl.mod.o: ccflags-y :=
%/test.o: ccflags-y += -DNO_MAIN_DRIVER
ccflags-y = $(WOLFSSL_CFLAGS) $(WOLFSSL_CFLAGS_NO_VECTOR_INSNS)
$(obj)/libwolfssl.mod.o: ccflags-y :=
$(obj)/wolfcrypt/test/test.o: ccflags-y += -DNO_MAIN_DRIVER
$(obj)/wolfcrypt/src/aes.o: ccflags-y = $(WOLFSSL_CFLAGS) $(WOLFSSL_CFLAGS_YES_VECTOR_INSNS)
asflags-y := $(WOLFSSL_ASFLAGS)
# these two _asms are kernel-compatible (they don't reference the pic-related _GLOBAL_OFFSET_TABLE_)
# but they still irritate objtool: "unannotated intra-function call" and "BP used as a scratch register"
%/aes_asm.o: OBJECT_FILES_NON_STANDARD := y
%/aes_gcm_asm.o: OBJECT_FILES_NON_STANDARD := y
$(obj)/wolfcrypt/src/aes_asm.o: OBJECT_FILES_NON_STANDARD := y
$(obj)/wolfcrypt/src/aes_gcm_asm.o: OBJECT_FILES_NON_STANDARD := y
# auto-generate the exported symbol list, leveraging the WOLFSSL_API visibility tags.
# exclude symbols that don't match wc_* or wolf*.

View File

@ -777,6 +777,8 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
nr = temp_key.rounds;
aes->rounds = nr;
SAVE_VECTOR_REGISTERS();
Key_Schedule[nr] = Temp_Key_Schedule[0];
Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]);
Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]);
@ -800,6 +802,8 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
Key_Schedule[0] = Temp_Key_Schedule[nr];
RESTORE_VECTOR_REGISTERS();
return 0;
}
#endif /* HAVE_AES_DECRYPT */
@ -1696,12 +1700,12 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN));
XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE);
SAVE_VECTOR_REGISTERS();
XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE);
AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE,
(byte*)aes->key, aes->rounds);
RESTORE_VECTOR_REGISTERS();
XMEMCPY(outBlock, tmp_align, AES_BLOCK_SIZE);
RESTORE_VECTOR_REGISTERS();
XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
return;
#else
@ -1995,9 +1999,9 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
#endif
/* if input and output same will overwrite input iv */
SAVE_VECTOR_REGISTERS();
if ((const byte*)aes->tmp != inBlock)
XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE);
SAVE_VECTOR_REGISTERS();
AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key,
aes->rounds);
RESTORE_VECTOR_REGISTERS();
@ -7268,15 +7272,17 @@ int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
#ifdef WOLFSSL_AESNI
if (haveAESNI && aes->use_aesni) {
while (inSz >= AES_BLOCK_SIZE * 4) {
SAVE_VECTOR_REGISTERS();
AesCcmCtrIncSet4(B, lenSz);
SAVE_VECTOR_REGISTERS();
AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key,
aes->rounds);
RESTORE_VECTOR_REGISTERS();
xorbuf(A, in, AES_BLOCK_SIZE * 4);
XMEMCPY(out, A, AES_BLOCK_SIZE * 4);
RESTORE_VECTOR_REGISTERS();
inSz -= AES_BLOCK_SIZE * 4;
in += AES_BLOCK_SIZE * 4;
out += AES_BLOCK_SIZE * 4;
@ -7352,15 +7358,18 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
#ifdef WOLFSSL_AESNI
if (haveAESNI && aes->use_aesni) {
while (oSz >= AES_BLOCK_SIZE * 4) {
SAVE_VECTOR_REGISTERS();
AesCcmCtrIncSet4(B, lenSz);
SAVE_VECTOR_REGISTERS();
AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key,
aes->rounds);
RESTORE_VECTOR_REGISTERS();
xorbuf(A, in, AES_BLOCK_SIZE * 4);
XMEMCPY(o, A, AES_BLOCK_SIZE * 4);
RESTORE_VECTOR_REGISTERS();
oSz -= AES_BLOCK_SIZE * 4;
in += AES_BLOCK_SIZE * 4;
o += AES_BLOCK_SIZE * 4;

View File

@ -88,7 +88,13 @@
#endif
#include <linux/net.h>
#include <linux/slab.h>
#if defined(WOLFSSL_AESNI) || defined(USE_INTEL_SPEEDUP) || defined(WOLFSSL_ARMASM)
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
#include <asm/i387.h>
#else
#include <asm/simd.h>
#endif
#endif
_Pragma("GCC diagnostic pop");
/* remove this multifariously conflicting macro, picked up from