Disable AVX2 by default even on supported machines.

pull/48/head
Mooneer Salem 2022-11-15 18:46:27 -08:00
parent ba463962a5
commit 7ee18d49b0
1 changed files with 39 additions and 22 deletions

View File

@ -9,9 +9,9 @@ project(LPCNet C)
option(DISABLE_CPU_OPTIMIZATION "Disable CPU optimization discovery." OFF)
option(AVX2 "Enable AVX2 CPU optimizations." OFF)
option(AVX "Enable AVX CPU optimizations." OFF)
option(SSE "Enable SSE CPU optimizations." OFF)
option(NEON "Enable NEON CPU optimizations for RPi." OFF)
option(AVX "Enable AVX CPU optimizations." ON)
option(SSE "Enable SSE CPU optimizations." ON)
option(NEON "Enable NEON CPU optimizations for RPi." ON)
include(GNUInstallDirs)
mark_as_advanced(CLEAR
@ -89,54 +89,71 @@ if(NOT DISABLE_CPU_OPTIMIZATION)
if(UNIX AND NOT APPLE)
message(STATUS "Looking for available CPU optimizations on Linux/BSD system...")
execute_process(COMMAND grep -c "avx2" /proc/cpuinfo
OUTPUT_VARIABLE AVX2)
OUTPUT_VARIABLE AVX2_PRESENT)
execute_process(COMMAND grep -c "avx " /proc/cpuinfo
OUTPUT_VARIABLE AVX)
OUTPUT_VARIABLE AVX_PRESENT)
execute_process(COMMAND grep -c "sse4_1 " /proc/cpuinfo
OUTPUT_VARIABLE SSE)
OUTPUT_VARIABLE SSE_PRESENT)
execute_process(COMMAND grep -c "neon" /proc/cpuinfo
OUTPUT_VARIABLE NEON)
OUTPUT_VARIABLE NEON_PRESENT)
elseif(APPLE)
if(BUILD_OSX_UNIVERSAL)
# Presume AVX/AVX2 are enabled on the x86 side. The ARM side will auto-enable
# NEON optimizations by virtue of being aarch64.
set(AVX TRUE)
set(AVX2 TRUE)
set(SSE TRUE)
# Presume AVX and SSE are enabled on the x86 side. (AVX2 is not guaranteed depending
# on model.) The ARM side will auto-enable NEON optimizations by virtue of being aarch64.
set(AVX_PRESENT TRUE)
set(SSE_PRESENT TRUE)
set(NEON_PRESENT TRUE)
else()
# Under OSX we need to look through a few sysctl entries to determine what our CPU supports.
message(STATUS "Looking for available CPU optimizations on an OSX system...")
execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.leaf7_features COMMAND grep -c AVX2
OUTPUT_VARIABLE AVX2)
OUTPUT_VARIABLE AVX2_PRESENT)
execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c AVX
OUTPUT_VARIABLE AVX)
OUTPUT_VARIABLE AVX_PRESENT)
execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c SSE4.1
OUTPUT_VARIABLE SSE_PRESENT)
# Unlike with the above, NEON *is* guaranteed if on ARM as there were never any ARM32 Macs
# available. We don't need any specific compiler flags for this, though.
set(NEON_PRESENT TRUE)
endif(BUILD_OSX_UNIVERSAL)
elseif(WIN32)
message(STATUS "No detection capability on Windows, assuming AVX is available.")
set(AVX TRUE)
set(AVX_PRESENT TRUE)
else()
message(STATUS "System is not *nix, processor specific optimizations cannot be determined.")
message(" You can try setting them manually, e.g.: -DAVX2=1 or -DAVX=1 or -DNEON=1")
# Presume all optimizations are available as the user will need to set them themselves.
set(AVX2_PRESENT TRUE)
set(AVX_PRESENT TRUE)
set(SSE_PRESENT TRUE)
set(NEON_PRESENT TRUE)
endif()
else()
# Presume all optimizations are available as the user is likely setting them themselves
# (e.g. cross-compiling)
set(AVX2_PRESENT TRUE)
set(AVX_PRESENT TRUE)
set(SSE_PRESENT TRUE)
set(NEON_PRESENT TRUE)
endif()
set(LPCNET_C_PROC_FLAGS "")
if(${AVX2} OR ${AVX2} GREATER 0)
if(${AVX2} AND (${AVX2_PRESENT} OR ${AVX2_PRESENT} GREATER 0))
message(STATUS "avx2 processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS -mavx2 -mfma)
elseif(${AVX} OR ${AVX} GREATER 0)
elseif(${AVX} AND (${AVX_PRESENT} OR ${AVX_PRESENT} GREATER 0))
# AVX2 machines will also match on AVX
message(STATUS "avx processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS -mavx)
elseif(${SSE} OR ${SSE} GREATER 0)
elseif(${SSE} AND (${SSE_PRESENT} OR ${SSE_PRESENT} GREATER 0))
# AVX and AVX2 machines will also match on SSE
message(STATUS "sse processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS -msse4.1)
endif()
# RPi / ARM 32bit
if(${NEON} OR ${NEON} GREATER 0)
elseif(${NEON} AND (${NEON_PRESENT} OR ${NEON_PRESENT} GREATER 0))
# RPi / ARM 32bit
message(STATUS "neon processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS -mfpu=neon -march=armv8-a -mtune=cortex-a53)
endif()