mirror of https://github.com/wolfSSL/wolfssl.git
linuxkm: override-disable SIMD instructions for all .c.o's, with exceptions enumerated in Kbuild (currently only aes.c), and couple -msse with -fno-builtin-functions; export ENABLED_ASM for use as a pivot in Kbuild; use asm/i387.h, not asm/simd.h, for kernel_fpu_{begin,end}() protos.
parent
3626332334
commit
cd14cfb092
|
@ -198,7 +198,7 @@ endif
|
||||||
include scripts/include.am
|
include scripts/include.am
|
||||||
|
|
||||||
if BUILD_LINUXKM
|
if BUILD_LINUXKM
|
||||||
export KERNEL_ROOT KERNEL_ARCH KERNEL_OPT AM_CFLAGS CFLAGS AM_CCASFLAGS CCASFLAGS src_libwolfssl_la_OBJECTS ENABLED_CRYPT_TESTS
|
export KERNEL_ROOT KERNEL_ARCH KERNEL_OPT AM_CFLAGS CFLAGS AM_CCASFLAGS CCASFLAGS src_libwolfssl_la_OBJECTS ENABLED_CRYPT_TESTS ENABLED_ASM
|
||||||
SUBDIRS = linuxkm
|
SUBDIRS = linuxkm
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
|
@ -279,6 +279,7 @@ if test "$ENABLED_ASM" = "no"
|
||||||
then
|
then
|
||||||
AM_CFLAGS="$AM_CFLAGS -DTFM_NO_ASM -DWOLFSSL_NO_ASM"
|
AM_CFLAGS="$AM_CFLAGS -DTFM_NO_ASM -DWOLFSSL_NO_ASM"
|
||||||
fi
|
fi
|
||||||
|
AC_SUBST([ENABLED_ASM])
|
||||||
|
|
||||||
|
|
||||||
# SINGLE THREADED
|
# SINGLE THREADED
|
||||||
|
|
|
@ -10,6 +10,8 @@ ifeq "$(WOLFSSL_CFLAGS)" ""
|
||||||
$(error $$WOLFSSL_CFLAGS is unset.)
|
$(error $$WOLFSSL_CFLAGS is unset.)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
WOLFSSL_CFLAGS += -Wframe-larger-than=$(MAX_STACK_FRAME_SIZE) -mpreferred-stack-boundary=4
|
||||||
|
|
||||||
obj-m := libwolfssl.o
|
obj-m := libwolfssl.o
|
||||||
|
|
||||||
WOLFSSL_OBJ_TARGETS=$(patsubst %, $(obj)/%, $(WOLFSSL_OBJ_FILES))
|
WOLFSSL_OBJ_TARGETS=$(patsubst %, $(obj)/%, $(WOLFSSL_OBJ_FILES))
|
||||||
|
@ -30,17 +32,42 @@ MAX_STACK_FRAME_SIZE=$(shell echo $$(( $(KERNEL_THREAD_STACK_SIZE) / 4)))
|
||||||
|
|
||||||
libwolfssl-y := $(WOLFSSL_OBJ_FILES) linuxkm/module_hooks.o linuxkm/module_exports.o
|
libwolfssl-y := $(WOLFSSL_OBJ_FILES) linuxkm/module_hooks.o linuxkm/module_exports.o
|
||||||
|
|
||||||
ccflags-y = $(WOLFSSL_CFLAGS) -Wframe-larger-than=$(MAX_STACK_FRAME_SIZE) -mpreferred-stack-boundary=4
|
ifeq "$(KERNEL_ARCH)" "x86"
|
||||||
|
WOLFSSL_CFLAGS_NO_VECTOR_INSNS ::= -mno-sse
|
||||||
|
ifeq "$(ENABLED_ASM)" "yes"
|
||||||
|
# x86 kernel disables fp and vector insns and register usage with
|
||||||
|
# "-mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-80387 -mno-fp-ret-in-387".
|
||||||
|
# reenable minimum subset of vector ops needed for compilation,
|
||||||
|
# while explicitly disabling auto-vectorization, and leave fp disabled.
|
||||||
|
# note that including -mavx here is known to introduce unaccommodated
|
||||||
|
# simd register ops, e.g. in integer.c:mp_exch() .
|
||||||
|
WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= -msse -mmmx -fno-builtin -fno-tree-vectorize -fno-tree-loop-vectorize -fno-tree-slp-vectorize
|
||||||
|
else
|
||||||
|
WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::=
|
||||||
|
endif
|
||||||
|
else ifeq "$(KERNEL_ARCH)" "arm64"
|
||||||
|
WOLFSSL_CFLAGS_NO_VECTOR_INSNS ::=
|
||||||
|
WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::=
|
||||||
|
# WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= $AM_CFLAGS -mno-general-regs-only -mno-fpu -fno-tree-vectorize -fno-tree-loop-vectorize -fno-tree-slp-vectorize
|
||||||
|
else ifeq "$(KERNEL_ARCH)" "arm"
|
||||||
|
WOLFSSL_CFLAGS_NO_VECTOR_INSNS ::=
|
||||||
|
WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::=
|
||||||
|
# WOLFSSL_CFLAGS_YES_VECTOR_INSNS ::= $AM_CFLAGS -mno-general-regs-only -mno-fpu -fno-tree-vectorize -fno-tree-loop-vectorize -fno-tree-slp-vectorize
|
||||||
|
endif
|
||||||
|
|
||||||
%/libwolfssl.mod.o: ccflags-y :=
|
ccflags-y = $(WOLFSSL_CFLAGS) $(WOLFSSL_CFLAGS_NO_VECTOR_INSNS)
|
||||||
%/test.o: ccflags-y += -DNO_MAIN_DRIVER
|
|
||||||
|
$(obj)/libwolfssl.mod.o: ccflags-y :=
|
||||||
|
$(obj)/wolfcrypt/test/test.o: ccflags-y += -DNO_MAIN_DRIVER
|
||||||
|
|
||||||
|
$(obj)/wolfcrypt/src/aes.o: ccflags-y = $(WOLFSSL_CFLAGS) $(WOLFSSL_CFLAGS_YES_VECTOR_INSNS)
|
||||||
|
|
||||||
asflags-y := $(WOLFSSL_ASFLAGS)
|
asflags-y := $(WOLFSSL_ASFLAGS)
|
||||||
|
|
||||||
# these two _asms are kernel-compatible (they don't reference the pic-related _GLOBAL_OFFSET_TABLE_)
|
# these two _asms are kernel-compatible (they don't reference the pic-related _GLOBAL_OFFSET_TABLE_)
|
||||||
# but they still irritate objtool: "unannotated intra-function call" and "BP used as a scratch register"
|
# but they still irritate objtool: "unannotated intra-function call" and "BP used as a scratch register"
|
||||||
%/aes_asm.o: OBJECT_FILES_NON_STANDARD := y
|
$(obj)/wolfcrypt/src/aes_asm.o: OBJECT_FILES_NON_STANDARD := y
|
||||||
%/aes_gcm_asm.o: OBJECT_FILES_NON_STANDARD := y
|
$(obj)/wolfcrypt/src/aes_gcm_asm.o: OBJECT_FILES_NON_STANDARD := y
|
||||||
|
|
||||||
# auto-generate the exported symbol list, leveraging the WOLFSSL_API visibility tags.
|
# auto-generate the exported symbol list, leveraging the WOLFSSL_API visibility tags.
|
||||||
# exclude symbols that don't match wc_* or wolf*.
|
# exclude symbols that don't match wc_* or wolf*.
|
||||||
|
|
|
@ -777,6 +777,8 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
|
||||||
nr = temp_key.rounds;
|
nr = temp_key.rounds;
|
||||||
aes->rounds = nr;
|
aes->rounds = nr;
|
||||||
|
|
||||||
|
SAVE_VECTOR_REGISTERS();
|
||||||
|
|
||||||
Key_Schedule[nr] = Temp_Key_Schedule[0];
|
Key_Schedule[nr] = Temp_Key_Schedule[0];
|
||||||
Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]);
|
Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]);
|
||||||
Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]);
|
Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]);
|
||||||
|
@ -800,6 +802,8 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits
|
||||||
|
|
||||||
Key_Schedule[0] = Temp_Key_Schedule[nr];
|
Key_Schedule[0] = Temp_Key_Schedule[nr];
|
||||||
|
|
||||||
|
RESTORE_VECTOR_REGISTERS();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif /* HAVE_AES_DECRYPT */
|
#endif /* HAVE_AES_DECRYPT */
|
||||||
|
@ -1696,12 +1700,12 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
|
||||||
|
|
||||||
tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN));
|
tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN));
|
||||||
|
|
||||||
XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE);
|
|
||||||
SAVE_VECTOR_REGISTERS();
|
SAVE_VECTOR_REGISTERS();
|
||||||
|
XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE);
|
||||||
AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE,
|
AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE,
|
||||||
(byte*)aes->key, aes->rounds);
|
(byte*)aes->key, aes->rounds);
|
||||||
RESTORE_VECTOR_REGISTERS();
|
|
||||||
XMEMCPY(outBlock, tmp_align, AES_BLOCK_SIZE);
|
XMEMCPY(outBlock, tmp_align, AES_BLOCK_SIZE);
|
||||||
|
RESTORE_VECTOR_REGISTERS();
|
||||||
XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
|
XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
|
||||||
return;
|
return;
|
||||||
#else
|
#else
|
||||||
|
@ -1995,9 +1999,9 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* if input and output same will overwrite input iv */
|
/* if input and output same will overwrite input iv */
|
||||||
|
SAVE_VECTOR_REGISTERS();
|
||||||
if ((const byte*)aes->tmp != inBlock)
|
if ((const byte*)aes->tmp != inBlock)
|
||||||
XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE);
|
XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE);
|
||||||
SAVE_VECTOR_REGISTERS();
|
|
||||||
AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key,
|
AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key,
|
||||||
aes->rounds);
|
aes->rounds);
|
||||||
RESTORE_VECTOR_REGISTERS();
|
RESTORE_VECTOR_REGISTERS();
|
||||||
|
@ -7268,15 +7272,17 @@ int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
|
||||||
#ifdef WOLFSSL_AESNI
|
#ifdef WOLFSSL_AESNI
|
||||||
if (haveAESNI && aes->use_aesni) {
|
if (haveAESNI && aes->use_aesni) {
|
||||||
while (inSz >= AES_BLOCK_SIZE * 4) {
|
while (inSz >= AES_BLOCK_SIZE * 4) {
|
||||||
|
SAVE_VECTOR_REGISTERS();
|
||||||
|
|
||||||
AesCcmCtrIncSet4(B, lenSz);
|
AesCcmCtrIncSet4(B, lenSz);
|
||||||
|
|
||||||
SAVE_VECTOR_REGISTERS();
|
|
||||||
AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key,
|
AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key,
|
||||||
aes->rounds);
|
aes->rounds);
|
||||||
RESTORE_VECTOR_REGISTERS();
|
|
||||||
xorbuf(A, in, AES_BLOCK_SIZE * 4);
|
xorbuf(A, in, AES_BLOCK_SIZE * 4);
|
||||||
XMEMCPY(out, A, AES_BLOCK_SIZE * 4);
|
XMEMCPY(out, A, AES_BLOCK_SIZE * 4);
|
||||||
|
|
||||||
|
RESTORE_VECTOR_REGISTERS();
|
||||||
|
|
||||||
inSz -= AES_BLOCK_SIZE * 4;
|
inSz -= AES_BLOCK_SIZE * 4;
|
||||||
in += AES_BLOCK_SIZE * 4;
|
in += AES_BLOCK_SIZE * 4;
|
||||||
out += AES_BLOCK_SIZE * 4;
|
out += AES_BLOCK_SIZE * 4;
|
||||||
|
@ -7352,15 +7358,18 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
|
||||||
#ifdef WOLFSSL_AESNI
|
#ifdef WOLFSSL_AESNI
|
||||||
if (haveAESNI && aes->use_aesni) {
|
if (haveAESNI && aes->use_aesni) {
|
||||||
while (oSz >= AES_BLOCK_SIZE * 4) {
|
while (oSz >= AES_BLOCK_SIZE * 4) {
|
||||||
|
SAVE_VECTOR_REGISTERS();
|
||||||
|
|
||||||
AesCcmCtrIncSet4(B, lenSz);
|
AesCcmCtrIncSet4(B, lenSz);
|
||||||
|
|
||||||
SAVE_VECTOR_REGISTERS();
|
|
||||||
AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key,
|
AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key,
|
||||||
aes->rounds);
|
aes->rounds);
|
||||||
RESTORE_VECTOR_REGISTERS();
|
|
||||||
xorbuf(A, in, AES_BLOCK_SIZE * 4);
|
xorbuf(A, in, AES_BLOCK_SIZE * 4);
|
||||||
XMEMCPY(o, A, AES_BLOCK_SIZE * 4);
|
XMEMCPY(o, A, AES_BLOCK_SIZE * 4);
|
||||||
|
|
||||||
|
RESTORE_VECTOR_REGISTERS();
|
||||||
|
|
||||||
oSz -= AES_BLOCK_SIZE * 4;
|
oSz -= AES_BLOCK_SIZE * 4;
|
||||||
in += AES_BLOCK_SIZE * 4;
|
in += AES_BLOCK_SIZE * 4;
|
||||||
o += AES_BLOCK_SIZE * 4;
|
o += AES_BLOCK_SIZE * 4;
|
||||||
|
|
|
@ -88,7 +88,13 @@
|
||||||
#endif
|
#endif
|
||||||
#include <linux/net.h>
|
#include <linux/net.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <asm/simd.h>
|
#if defined(WOLFSSL_AESNI) || defined(USE_INTEL_SPEEDUP) || defined(WOLFSSL_ARMASM)
|
||||||
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
|
||||||
|
#include <asm/i387.h>
|
||||||
|
#else
|
||||||
|
#include <asm/simd.h>
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
_Pragma("GCC diagnostic pop");
|
_Pragma("GCC diagnostic pop");
|
||||||
|
|
||||||
/* remove this multifariously conflicting macro, picked up from
|
/* remove this multifariously conflicting macro, picked up from
|
||||||
|
|
Loading…
Reference in New Issue