mirror of https://github.com/wolfSSL/wolfssl.git
linuxkm: override-disable SIMD instructions for all .c.o's, with exceptions enumerated in Kbuild (currently only aes.c), and couple -msse with -fno-builtin-functions; export ENABLED_ASM for use as a pivot in Kbuild; use asm/i387.h, not asm/simd.h, for kernel_fpu_{begin,end}() protos.
parent
3626332334
commit
cd14cfb092
|
@ -198,7 +198,7 @@ endif
|
|||
include scripts/include.am
|
||||
|
||||
if BUILD_LINUXKM
|
||||
export KERNEL_ROOT KERNEL_ARCH KERNEL_OPT AM_CFLAGS CFLAGS AM_CCASFLAGS CCASFLAGS src_libwolfssl_la_OBJECTS ENABLED_CRYPT_TESTS
|
||||
export KERNEL_ROOT KERNEL_ARCH KERNEL_OPT AM_CFLAGS CFLAGS AM_CCASFLAGS CCASFLAGS src_libwolfssl_la_OBJECTS ENABLED_CRYPT_TESTS ENABLED_ASM
|
||||
SUBDIRS = linuxkm
|
||||
endif
|
||||
|
||||
|
|
|
@ -279,6 +279,7 @@ if test "$ENABLED_ASM" = "no"
|
|||
then
|
||||
AM_CFLAGS="$AM_CFLAGS -DTFM_NO_ASM -DWOLFSSL_NO_ASM"
|
||||
fi
|
||||
AC_SUBST([ENABLED_ASM])
|
||||
|
||||
|
||||
# SINGLE THREADED
|
||||
|
|
|
@ -10,6 +10,8 @@ ifeq "$(WOLFSSL_CFLAGS)" ""
|
|||
$(error $$WOLFSSL_CFLAGS is unset.)
|
||||
endif
|
||||
|
||||
WOLFSSL_CFLAGS += -Wframe-larger-than=$(MAX_STACK_FRAME_SIZE) -mpreferred-stack-boundary=4
|
||||
|
||||
obj-m := libwolfssl.o
|
||||
|
||||
WOLFSSL_OBJ_TARGETS=$(patsubst %, $(obj)/%, $(WOLFSSL_OBJ_FILES))
|
||||
|
@ -30,17 +32,42 @@ MAX_STACK_FRAME_SIZE=$(shell echo $$(( $(KERNEL_THREAD_STACK_SIZE) / 4)))
|
|||
|
||||
libwolfssl-y := $(WOLFSSL_OBJ_FILES) linuxkm/module_hooks.o linuxkm/module_exports.o
|
||||
|
||||
ccflags-y = $(WOLFSSL_CFLAGS) -Wframe-larger-than=$(MAX_STACK_FRAME_SIZE) -mpreferred-stack-boundary=4
|
||||
ifeq "$(KERNEL_ARCH)" "x86"
|
||||
WOLFSSL_CFLAGS_NO_VECTOR_INSNS ::= -mno-sse
|
||||
ifeq "$(ENABLED_ASM)" "yes"
|
||||
# x86 kernel disables fp and vector insns and register usage with
|
||||
# "-mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-80387 -mno-fp-ret-in-387".
|
||||
# reenable minimum subset of vector ops needed for compilation,
|
||||
# while explicitly disabling auto-vectorization, and leave fp disabled.
|
||||
# note that including -mavx here is known to introduce unaccommodated
|
||||
# simd register ops, e.g. in integer.c:mp_exch() .
|
||||
WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= -msse -mmmx -fno-builtin -fno-tree-vectorize -fno-tree-loop-vectorize -fno-tree-slp-vectorize
|
||||
else
|
||||
WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::=
|
||||
endif
|
||||
else ifeq "$(KERNEL_ARCH)" "arm64"
|
||||
WOLFSSL_CFLAGS_NO_VECTOR_INSNS ::=
|
||||
WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::=
|
||||
# WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= $AM_CFLAGS -mno-general-regs-only -mno-fpu -fno-tree-vectorize -fno-tree-loop-vectorize -fno-tree-slp-vectorize
|
||||
else ifeq "$(KERNEL_ARCH)" "arm"
|
||||
WOLFSSL_CFLAGS_NO_VECTOR_INSNS ::=
|
||||
WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::=
|
||||
# WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= $AM_CFLAGS -mno-general-regs-only -mno-fpu -fno-tree-vectorize -fno-tree-loop-vectorize -fno-tree-slp-vectorize
|
||||
endif
|
||||
|
||||
%/libwolfssl.mod.o: ccflags-y :=
|
||||
%/test.o: ccflags-y += -DNO_MAIN_DRIVER
|
||||
ccflags-y = $(WOLFSSL_CFLAGS) $(WOLFSSL_CFLAGS_NO_VECTOR_INSNS)
|
||||
|
||||
$(obj)/libwolfssl.mod.o: ccflags-y :=
|
||||
$(obj)/wolfcrypt/test/test.o: ccflags-y += -DNO_MAIN_DRIVER
|
||||
|
||||
$(obj)/wolfcrypt/src/aes.o: ccflags-y = $(WOLFSSL_CFLAGS) $(WOLFSSL_CFLAGS_YES_VECTOR_INSNS)
|
||||
|
||||
asflags-y := $(WOLFSSL_ASFLAGS)
|
||||
|
||||
# these two _asms are kernel-compatible (they don't reference the pic-related _GLOBAL_OFFSET_TABLE_)
|
||||
# but they still irritate objtool: "unannotated intra-function call" and "BP used as a scratch register"
|
||||
%/aes_asm.o: OBJECT_FILES_NON_STANDARD := y
|
||||
%/aes_gcm_asm.o: OBJECT_FILES_NON_STANDARD := y
|
||||
$(obj)/wolfcrypt/src/aes_asm.o: OBJECT_FILES_NON_STANDARD := y
|
||||
$(obj)/wolfcrypt/src/aes_gcm_asm.o: OBJECT_FILES_NON_STANDARD := y
|
||||
|
||||
# auto-generate the exported symbol list, leveraging the WOLFSSL_API visibility tags.
|
||||
# exclude symbols that don't match wc_* or wolf*.
|
||||
|
|
|
@ -777,6 +777,8 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
|
|||
nr = temp_key.rounds;
|
||||
aes->rounds = nr;
|
||||
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
|
||||
Key_Schedule[nr] = Temp_Key_Schedule[0];
|
||||
Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]);
|
||||
Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]);
|
||||
|
@ -800,6 +802,8 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
|
|||
|
||||
Key_Schedule[0] = Temp_Key_Schedule[nr];
|
||||
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* HAVE_AES_DECRYPT */
|
||||
|
@ -1696,12 +1700,12 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
|
|||
|
||||
tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN));
|
||||
|
||||
XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE);
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE);
|
||||
AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE,
|
||||
(byte*)aes->key, aes->rounds);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
XMEMCPY(outBlock, tmp_align, AES_BLOCK_SIZE);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
|
||||
return;
|
||||
#else
|
||||
|
@ -1995,9 +1999,9 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
|
|||
#endif
|
||||
|
||||
/* if input and output same will overwrite input iv */
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
if ((const byte*)aes->tmp != inBlock)
|
||||
XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE);
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key,
|
||||
aes->rounds);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
|
@ -7268,15 +7272,17 @@ int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
|
|||
#ifdef WOLFSSL_AESNI
|
||||
if (haveAESNI && aes->use_aesni) {
|
||||
while (inSz >= AES_BLOCK_SIZE * 4) {
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
|
||||
AesCcmCtrIncSet4(B, lenSz);
|
||||
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key,
|
||||
aes->rounds);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
xorbuf(A, in, AES_BLOCK_SIZE * 4);
|
||||
XMEMCPY(out, A, AES_BLOCK_SIZE * 4);
|
||||
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
|
||||
inSz -= AES_BLOCK_SIZE * 4;
|
||||
in += AES_BLOCK_SIZE * 4;
|
||||
out += AES_BLOCK_SIZE * 4;
|
||||
|
@ -7352,15 +7358,18 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
|
|||
#ifdef WOLFSSL_AESNI
|
||||
if (haveAESNI && aes->use_aesni) {
|
||||
while (oSz >= AES_BLOCK_SIZE * 4) {
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
|
||||
AesCcmCtrIncSet4(B, lenSz);
|
||||
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key,
|
||||
aes->rounds);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
|
||||
xorbuf(A, in, AES_BLOCK_SIZE * 4);
|
||||
XMEMCPY(o, A, AES_BLOCK_SIZE * 4);
|
||||
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
|
||||
oSz -= AES_BLOCK_SIZE * 4;
|
||||
in += AES_BLOCK_SIZE * 4;
|
||||
o += AES_BLOCK_SIZE * 4;
|
||||
|
|
|
@ -88,7 +88,13 @@
|
|||
#endif
|
||||
#include <linux/net.h>
|
||||
#include <linux/slab.h>
|
||||
#include <asm/simd.h>
|
||||
#if defined(WOLFSSL_AESNI) || defined(USE_INTEL_SPEEDUP) || defined(WOLFSSL_ARMASM)
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
|
||||
#include <asm/i387.h>
|
||||
#else
|
||||
#include <asm/simd.h>
|
||||
#endif
|
||||
#endif
|
||||
_Pragma("GCC diagnostic pop");
|
||||
|
||||
/* remove this multifariously conflicting macro, picked up from
|
||||
|
|
Loading…
Reference in New Issue