From 6c9780720b3f46127e0a36fe34b8e281a7358f70 Mon Sep 17 00:00:00 2001 From: Ozkan Sezer Date: Tue, 21 Mar 2023 14:55:32 +0300 Subject: [PATCH] cmake: added configuration options for AVX2, AVX512F, SSE4.1, and SSE4.2 adjusted SDL_intrin.h and testautomation_intrinsics.c accordingly. --- CMakeLists.txt | 158 ++++++++++++++---- include/SDL3/SDL_intrin.h | 4 + include/build_config/SDL_build_config.h.cmake | 5 +- test/testautomation_intrinsics.c | 4 +- 4 files changed, 137 insertions(+), 34 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1edfefb91..f74a91ba9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -368,9 +368,13 @@ option_string(SDL_ASSERTIONS "Enable internal sanity checks (auto/disabled/relea #set_option(SDL_DEPENDENCY_TRACKING "Use gcc -MMD -MT dependency tracking" ON) set_option(SDL_ASSEMBLY "Enable assembly routines" ${SDL_ASSEMBLY_DEFAULT}) dep_option(SDL_AVX "Use AVX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF) +dep_option(SDL_AVX2 "Use AVX2 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF) +dep_option(SDL_AVX512F "Use AVX512F assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF) dep_option(SDL_SSE "Use SSE assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF) dep_option(SDL_SSE2 "Use SSE2 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF) dep_option(SDL_SSE3 "Use SSE3 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF) +dep_option(SDL_SSE41 "Use SSE4.1 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF) +dep_option(SDL_SSE42 "Use SSE4.2 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF) dep_option(SDL_MMX "Use MMX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF) dep_option(SDL_ALTIVEC "Use Altivec assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_POWERPC32 OR SDL_CPU_POWERPC64" OFF) dep_option(SDL_ARMSIMD "Use SIMD assembly blitters on ARM" OFF "SDL_ASSEMBLY;SDL_CPU_ARM32" OFF) @@ -773,6 +777,71 @@ if(SDL_ASSEMBLY) set(HAVE_SSE2 TRUE) endif() endif() + if(SDL_SSE3) + cmake_push_check_state() + if(USE_GCC OR USE_CLANG) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -msse3") + endif() + check_c_source_compiles(" + #include + void ints_add(int *dest, int *a, int *b, unsigned size) { + for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) { + _mm_storeu_si128((__m128i*)dest, _mm_add_epi32(_mm_lddqu_si128((__m128i*)a), _mm_lddqu_si128((__m128i*)b))); + } + } + int main(int argc, char **argv) { + ints_add((int*)0, (int*)0, (int*)0, 0); + return 0; + }" COMPILER_SUPPORTS_SSE3) + cmake_pop_check_state() + if(COMPILER_SUPPORTS_SSE3) + set(HAVE_SSE3 TRUE) + endif() + endif() + if(SDL_SSE41) + cmake_push_check_state() + if(USE_GCC OR USE_CLANG) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -msse4.1") + endif() + check_c_source_compiles(" + #include + void ints_mul(int *dest, int *a, int *b, unsigned size) { + for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) { + _mm_storeu_si128((__m128i*)dest, _mm_mullo_epi32(_mm_lddqu_si128((__m128i*)a), _mm_lddqu_si128((__m128i*)b))); + } + } + int main(int argc, char **argv) { + ints_mul((int*)0, (int*)0, (int*)0, 0); + return 0; + }" COMPILER_SUPPORTS_SSE41) + cmake_pop_check_state() + if(COMPILER_SUPPORTS_SSE41) + set(HAVE_SSE41 TRUE) + endif() + endif() + if(SDL_SSE42) + cmake_push_check_state() + if(USE_GCC OR USE_CLANG) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -msse4.2") + endif() + check_c_source_compiles(" + #include + unsigned calc_crc32c(const char *text, unsigned len) { + unsigned crc32c = ~0; + for (; len >= 4; len -= 4, text += 4) { + crc32c = (unsigned)_mm_crc32_u32(crc32c, *(unsigned*)text); + } + return crc32c; + } + int main(int argc, char **argv) { + calc_crc32c(\"SDL_SSE4\",8); + return 0; + }" COMPILER_SUPPORTS_SSE42) + cmake_pop_check_state() + if(COMPILER_SUPPORTS_SSE42) + set(HAVE_SSE42 TRUE) + endif() + endif() if(SDL_AVX) cmake_push_check_state() if(USE_GCC OR USE_CLANG) @@ -794,35 +863,53 @@ if(SDL_ASSEMBLY) set(HAVE_AVX TRUE) endif() endif() + if(SDL_AVX2) + cmake_push_check_state() + if(USE_GCC OR USE_CLANG) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mavx2") + endif() + check_c_source_compiles(" + #include + void ints_add(int *dest, int *a, int *b, unsigned size) { + for (; size >= 8; size -= 8, dest += 8, a += 8, b += 8) { + _mm256_storeu_si256((__m256i*)dest, _mm256_add_epi32(_mm256_loadu_si256((__m256i*)a), _mm256_loadu_si256((__m256i*)b))); + } + } + int main(int argc, char **argv) { + ints_add((int*)0, (int*)0, (int*)0, 0); + return 0; + }" COMPILER_SUPPORTS_AVX2) + cmake_pop_check_state() + if(COMPILER_SUPPORTS_AVX2) + set(HAVE_AVX2 TRUE) + endif() + endif() + if(SDL_AVX512F) + cmake_push_check_state() + if(USE_GCC OR USE_CLANG) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mavx512f") + endif() + check_c_source_compiles(" + #include + void floats_add(float *dest, float *a, float *b, unsigned size) { + for (; size >= 16; size -= 16, dest += 16, a += 16, b += 16) { + _mm512_storeu_ps(dest, _mm512_add_ps(_mm512_loadu_ps(a), _mm512_loadu_ps(b))); + } + } + int main(int argc, char **argv) { + floats_add((float*)0, (float*)0, (float*)0, 0); + return 0; + }" COMPILER_SUPPORTS_AVX512F) + cmake_pop_check_state() + if(COMPILER_SUPPORTS_AVX512F) + set(HAVE_AVX512F TRUE) + endif() + endif() if(USE_GCC OR USE_CLANG) # TODO: Those all seem to be quite GCC specific - needs to be # reworked for better compiler support - if(SDL_SSE3) - cmake_push_check_state() - set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -msse3") - check_c_source_compiles(" - #ifdef __MINGW32__ - #include <_mingw.h> - #ifdef __MINGW64_VERSION_MAJOR - #include - #else - #include - #endif - #else - #include - #endif - #ifndef __SSE3__ - #error Assembler CPP flag not enabled - #endif - int main(int argc, char **argv) { return 0; }" COMPILER_SUPPORTS_SSE3) - cmake_pop_check_state() - if(COMPILER_SUPPORTS_SSE3) - set(HAVE_SSE3 TRUE) - endif() - endif() - if(SDL_ALTIVEC) cmake_push_check_state() set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -maltivec") @@ -938,13 +1025,6 @@ if(SDL_ASSEMBLY) set(WARN_ABOUT_ARM_NEON_ASM_MIT TRUE) endif() endif() - - elseif(MSVC_VERSION GREATER 1500) - # for MSVC - right now it is always activated - if(SDL_SSE3) - # TODO: SDL_cpuinfo.h needs to support the user's configuration wish - set(HAVE_SSE3 TRUE) - endif() endif() endif() @@ -964,10 +1044,26 @@ if(NOT HAVE_SSE3) set(SDL_DISABLE_SSE3 1) endif() +if(NOT HAVE_SSE41) + set(SDL_DISABLE_SSE41 1) +endif() + +if(NOT HAVE_SSE41) + set(SDL_DISABLE_SSE41 1) +endif() + if(NOT HAVE_AVX) set(SDL_DISABLE_AVX 1) endif() +if(NOT HAVE_AVX2) + set(SDL_DISABLE_AVX2 1) +endif() + +if(NOT HAVE_AVX512F) + set(SDL_DISABLE_AVX512F 1) +endif() + if(NOT HAVE_LSX) set(SDL_DISABLE_LSX 1) endif() diff --git a/include/SDL3/SDL_intrin.h b/include/SDL3/SDL_intrin.h index a30c71a45..c44df98ef 100644 --- a/include/SDL3/SDL_intrin.h +++ b/include/SDL3/SDL_intrin.h @@ -144,6 +144,10 @@ _m_prefetch(void *__P) # define SDL_AVX_INTRINSICS 1 # include # endif +# if (defined(_MSC_VER) || defined(__AVX__) || defined(SDL_HAS_TARGET_ATTRIBS)) && !defined(SDL_DISABLE_AVX2) +# define SDL_AVX2_INTRINSICS 1 +# include +# endif # if (defined(_MSC_VER) || defined(__AVX512F__) || defined(SDL_HAS_TARGET_ATTRIBS)) && !defined(SDL_DISABLE_AVX512F) # define SDL_AVX512F_INTRINSICS 1 # include diff --git a/include/build_config/SDL_build_config.h.cmake b/include/build_config/SDL_build_config.h.cmake index 742172c24..728fd0b83 100644 --- a/include/build_config/SDL_build_config.h.cmake +++ b/include/build_config/SDL_build_config.h.cmake @@ -585,11 +585,14 @@ typedef unsigned int uintptr_t; #endif /* !_STDINT_H_ && !HAVE_STDINT_H */ /* Configure use of intrinsics */ - #cmakedefine SDL_DISABLE_SSE 1 #cmakedefine SDL_DISABLE_SSE2 1 #cmakedefine SDL_DISABLE_SSE3 1 +#cmakedefine SDL_DISABLE_SSE41 1 +#cmakedefine SDL_DISABLE_SSE42 1 #cmakedefine SDL_DISABLE_AVX 1 +#cmakedefine SDL_DISABLE_AVX2 1 +#cmakedefine SDL_DISABLE_AVX512F 1 #cmakedefine SDL_DISABLE_MMX 1 #cmakedefine SDL_DISABLE_LSX 1 #cmakedefine SDL_DISABLE_LASX 1 diff --git a/test/testautomation_intrinsics.c b/test/testautomation_intrinsics.c index 16b19d547..b8e8a1b46 100644 --- a/test/testautomation_intrinsics.c +++ b/test/testautomation_intrinsics.c @@ -287,7 +287,7 @@ SDL_TARGETING("avx") static void kernel_floats_add_avx(float *dest, const float } #endif -#if SDL_AVX_INTRINSICS +#if SDL_AVX2_INTRINSICS SDL_TARGETING("avx2") static void kernel_ints_add_avx2(Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size) { for (; size >= 8; size -= 8, dest += 8, a += 8, b += 8) { _mm256_storeu_si256((__m256i*)dest, _mm256_add_epi32(_mm256_loadu_si256((__m256i*)a), _mm256_loadu_si256((__m256i*)b))); @@ -591,7 +591,7 @@ static int intrinsics_testAVX2(void *arg) { if (SDL_HasAVX2()) { SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has AVX2 support."); -#if SDL_AVX_INTRINSICS +#if SDL_AVX2_INTRINSICS { size_t size; Sint32 *dest, *a, *b;