cmake: added configuration options for AVX2, AVX512F, SSE4.1, and SSE4.2
adjusted SDL_intrin.h and testautomation_intrinsics.c accordingly.main
parent
feec1305c6
commit
6c9780720b
158
CMakeLists.txt
158
CMakeLists.txt
|
@ -368,9 +368,13 @@ option_string(SDL_ASSERTIONS "Enable internal sanity checks (auto/disabled/relea
|
||||||
#set_option(SDL_DEPENDENCY_TRACKING "Use gcc -MMD -MT dependency tracking" ON)
|
#set_option(SDL_DEPENDENCY_TRACKING "Use gcc -MMD -MT dependency tracking" ON)
|
||||||
set_option(SDL_ASSEMBLY "Enable assembly routines" ${SDL_ASSEMBLY_DEFAULT})
|
set_option(SDL_ASSEMBLY "Enable assembly routines" ${SDL_ASSEMBLY_DEFAULT})
|
||||||
dep_option(SDL_AVX "Use AVX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
dep_option(SDL_AVX "Use AVX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||||
|
dep_option(SDL_AVX2 "Use AVX2 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||||
|
dep_option(SDL_AVX512F "Use AVX512F assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||||
dep_option(SDL_SSE "Use SSE assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
dep_option(SDL_SSE "Use SSE assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||||
dep_option(SDL_SSE2 "Use SSE2 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
dep_option(SDL_SSE2 "Use SSE2 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||||
dep_option(SDL_SSE3 "Use SSE3 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
dep_option(SDL_SSE3 "Use SSE3 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||||
|
dep_option(SDL_SSE41 "Use SSE4.1 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||||
|
dep_option(SDL_SSE42 "Use SSE4.2 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||||
dep_option(SDL_MMX "Use MMX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
dep_option(SDL_MMX "Use MMX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||||
dep_option(SDL_ALTIVEC "Use Altivec assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_POWERPC32 OR SDL_CPU_POWERPC64" OFF)
|
dep_option(SDL_ALTIVEC "Use Altivec assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_POWERPC32 OR SDL_CPU_POWERPC64" OFF)
|
||||||
dep_option(SDL_ARMSIMD "Use SIMD assembly blitters on ARM" OFF "SDL_ASSEMBLY;SDL_CPU_ARM32" OFF)
|
dep_option(SDL_ARMSIMD "Use SIMD assembly blitters on ARM" OFF "SDL_ASSEMBLY;SDL_CPU_ARM32" OFF)
|
||||||
|
@ -773,6 +777,71 @@ if(SDL_ASSEMBLY)
|
||||||
set(HAVE_SSE2 TRUE)
|
set(HAVE_SSE2 TRUE)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
if(SDL_SSE3)
|
||||||
|
cmake_push_check_state()
|
||||||
|
if(USE_GCC OR USE_CLANG)
|
||||||
|
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -msse3")
|
||||||
|
endif()
|
||||||
|
check_c_source_compiles("
|
||||||
|
#include <pmmintrin.h>
|
||||||
|
void ints_add(int *dest, int *a, int *b, unsigned size) {
|
||||||
|
for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) {
|
||||||
|
_mm_storeu_si128((__m128i*)dest, _mm_add_epi32(_mm_lddqu_si128((__m128i*)a), _mm_lddqu_si128((__m128i*)b)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
ints_add((int*)0, (int*)0, (int*)0, 0);
|
||||||
|
return 0;
|
||||||
|
}" COMPILER_SUPPORTS_SSE3)
|
||||||
|
cmake_pop_check_state()
|
||||||
|
if(COMPILER_SUPPORTS_SSE3)
|
||||||
|
set(HAVE_SSE3 TRUE)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
if(SDL_SSE41)
|
||||||
|
cmake_push_check_state()
|
||||||
|
if(USE_GCC OR USE_CLANG)
|
||||||
|
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -msse4.1")
|
||||||
|
endif()
|
||||||
|
check_c_source_compiles("
|
||||||
|
#include <smmintrin.h>
|
||||||
|
void ints_mul(int *dest, int *a, int *b, unsigned size) {
|
||||||
|
for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) {
|
||||||
|
_mm_storeu_si128((__m128i*)dest, _mm_mullo_epi32(_mm_lddqu_si128((__m128i*)a), _mm_lddqu_si128((__m128i*)b)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
ints_mul((int*)0, (int*)0, (int*)0, 0);
|
||||||
|
return 0;
|
||||||
|
}" COMPILER_SUPPORTS_SSE41)
|
||||||
|
cmake_pop_check_state()
|
||||||
|
if(COMPILER_SUPPORTS_SSE41)
|
||||||
|
set(HAVE_SSE41 TRUE)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
if(SDL_SSE42)
|
||||||
|
cmake_push_check_state()
|
||||||
|
if(USE_GCC OR USE_CLANG)
|
||||||
|
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -msse4.2")
|
||||||
|
endif()
|
||||||
|
check_c_source_compiles("
|
||||||
|
#include <nmmintrin.h>
|
||||||
|
unsigned calc_crc32c(const char *text, unsigned len) {
|
||||||
|
unsigned crc32c = ~0;
|
||||||
|
for (; len >= 4; len -= 4, text += 4) {
|
||||||
|
crc32c = (unsigned)_mm_crc32_u32(crc32c, *(unsigned*)text);
|
||||||
|
}
|
||||||
|
return crc32c;
|
||||||
|
}
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
calc_crc32c(\"SDL_SSE4\",8);
|
||||||
|
return 0;
|
||||||
|
}" COMPILER_SUPPORTS_SSE42)
|
||||||
|
cmake_pop_check_state()
|
||||||
|
if(COMPILER_SUPPORTS_SSE42)
|
||||||
|
set(HAVE_SSE42 TRUE)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
if(SDL_AVX)
|
if(SDL_AVX)
|
||||||
cmake_push_check_state()
|
cmake_push_check_state()
|
||||||
if(USE_GCC OR USE_CLANG)
|
if(USE_GCC OR USE_CLANG)
|
||||||
|
@ -794,35 +863,53 @@ if(SDL_ASSEMBLY)
|
||||||
set(HAVE_AVX TRUE)
|
set(HAVE_AVX TRUE)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
if(SDL_AVX2)
|
||||||
|
cmake_push_check_state()
|
||||||
|
if(USE_GCC OR USE_CLANG)
|
||||||
|
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mavx2")
|
||||||
|
endif()
|
||||||
|
check_c_source_compiles("
|
||||||
|
#include <immintrin.h>
|
||||||
|
void ints_add(int *dest, int *a, int *b, unsigned size) {
|
||||||
|
for (; size >= 8; size -= 8, dest += 8, a += 8, b += 8) {
|
||||||
|
_mm256_storeu_si256((__m256i*)dest, _mm256_add_epi32(_mm256_loadu_si256((__m256i*)a), _mm256_loadu_si256((__m256i*)b)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
ints_add((int*)0, (int*)0, (int*)0, 0);
|
||||||
|
return 0;
|
||||||
|
}" COMPILER_SUPPORTS_AVX2)
|
||||||
|
cmake_pop_check_state()
|
||||||
|
if(COMPILER_SUPPORTS_AVX2)
|
||||||
|
set(HAVE_AVX2 TRUE)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
if(SDL_AVX512F)
|
||||||
|
cmake_push_check_state()
|
||||||
|
if(USE_GCC OR USE_CLANG)
|
||||||
|
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mavx512f")
|
||||||
|
endif()
|
||||||
|
check_c_source_compiles("
|
||||||
|
#include <immintrin.h>
|
||||||
|
void floats_add(float *dest, float *a, float *b, unsigned size) {
|
||||||
|
for (; size >= 16; size -= 16, dest += 16, a += 16, b += 16) {
|
||||||
|
_mm512_storeu_ps(dest, _mm512_add_ps(_mm512_loadu_ps(a), _mm512_loadu_ps(b)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
floats_add((float*)0, (float*)0, (float*)0, 0);
|
||||||
|
return 0;
|
||||||
|
}" COMPILER_SUPPORTS_AVX512F)
|
||||||
|
cmake_pop_check_state()
|
||||||
|
if(COMPILER_SUPPORTS_AVX512F)
|
||||||
|
set(HAVE_AVX512F TRUE)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
if(USE_GCC OR USE_CLANG)
|
if(USE_GCC OR USE_CLANG)
|
||||||
# TODO: Those all seem to be quite GCC specific - needs to be
|
# TODO: Those all seem to be quite GCC specific - needs to be
|
||||||
# reworked for better compiler support
|
# reworked for better compiler support
|
||||||
|
|
||||||
if(SDL_SSE3)
|
|
||||||
cmake_push_check_state()
|
|
||||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -msse3")
|
|
||||||
check_c_source_compiles("
|
|
||||||
#ifdef __MINGW32__
|
|
||||||
#include <_mingw.h>
|
|
||||||
#ifdef __MINGW64_VERSION_MAJOR
|
|
||||||
#include <intrin.h>
|
|
||||||
#else
|
|
||||||
#include <pmmintrin.h>
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#include <pmmintrin.h>
|
|
||||||
#endif
|
|
||||||
#ifndef __SSE3__
|
|
||||||
#error Assembler CPP flag not enabled
|
|
||||||
#endif
|
|
||||||
int main(int argc, char **argv) { return 0; }" COMPILER_SUPPORTS_SSE3)
|
|
||||||
cmake_pop_check_state()
|
|
||||||
if(COMPILER_SUPPORTS_SSE3)
|
|
||||||
set(HAVE_SSE3 TRUE)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(SDL_ALTIVEC)
|
if(SDL_ALTIVEC)
|
||||||
cmake_push_check_state()
|
cmake_push_check_state()
|
||||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -maltivec")
|
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -maltivec")
|
||||||
|
@ -938,13 +1025,6 @@ if(SDL_ASSEMBLY)
|
||||||
set(WARN_ABOUT_ARM_NEON_ASM_MIT TRUE)
|
set(WARN_ABOUT_ARM_NEON_ASM_MIT TRUE)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
elseif(MSVC_VERSION GREATER 1500)
|
|
||||||
# for MSVC - right now it is always activated
|
|
||||||
if(SDL_SSE3)
|
|
||||||
# TODO: SDL_cpuinfo.h needs to support the user's configuration wish
|
|
||||||
set(HAVE_SSE3 TRUE)
|
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -964,10 +1044,26 @@ if(NOT HAVE_SSE3)
|
||||||
set(SDL_DISABLE_SSE3 1)
|
set(SDL_DISABLE_SSE3 1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(NOT HAVE_SSE41)
|
||||||
|
set(SDL_DISABLE_SSE41 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NOT HAVE_SSE41)
|
||||||
|
set(SDL_DISABLE_SSE41 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(NOT HAVE_AVX)
|
if(NOT HAVE_AVX)
|
||||||
set(SDL_DISABLE_AVX 1)
|
set(SDL_DISABLE_AVX 1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(NOT HAVE_AVX2)
|
||||||
|
set(SDL_DISABLE_AVX2 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NOT HAVE_AVX512F)
|
||||||
|
set(SDL_DISABLE_AVX512F 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(NOT HAVE_LSX)
|
if(NOT HAVE_LSX)
|
||||||
set(SDL_DISABLE_LSX 1)
|
set(SDL_DISABLE_LSX 1)
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -144,6 +144,10 @@ _m_prefetch(void *__P)
|
||||||
# define SDL_AVX_INTRINSICS 1
|
# define SDL_AVX_INTRINSICS 1
|
||||||
# include <immintrin.h>
|
# include <immintrin.h>
|
||||||
# endif
|
# endif
|
||||||
|
# if (defined(_MSC_VER) || defined(__AVX__) || defined(SDL_HAS_TARGET_ATTRIBS)) && !defined(SDL_DISABLE_AVX2)
|
||||||
|
# define SDL_AVX2_INTRINSICS 1
|
||||||
|
# include <immintrin.h>
|
||||||
|
# endif
|
||||||
# if (defined(_MSC_VER) || defined(__AVX512F__) || defined(SDL_HAS_TARGET_ATTRIBS)) && !defined(SDL_DISABLE_AVX512F)
|
# if (defined(_MSC_VER) || defined(__AVX512F__) || defined(SDL_HAS_TARGET_ATTRIBS)) && !defined(SDL_DISABLE_AVX512F)
|
||||||
# define SDL_AVX512F_INTRINSICS 1
|
# define SDL_AVX512F_INTRINSICS 1
|
||||||
# include <immintrin.h>
|
# include <immintrin.h>
|
||||||
|
|
|
@ -585,11 +585,14 @@ typedef unsigned int uintptr_t;
|
||||||
#endif /* !_STDINT_H_ && !HAVE_STDINT_H */
|
#endif /* !_STDINT_H_ && !HAVE_STDINT_H */
|
||||||
|
|
||||||
/* Configure use of intrinsics */
|
/* Configure use of intrinsics */
|
||||||
|
|
||||||
#cmakedefine SDL_DISABLE_SSE 1
|
#cmakedefine SDL_DISABLE_SSE 1
|
||||||
#cmakedefine SDL_DISABLE_SSE2 1
|
#cmakedefine SDL_DISABLE_SSE2 1
|
||||||
#cmakedefine SDL_DISABLE_SSE3 1
|
#cmakedefine SDL_DISABLE_SSE3 1
|
||||||
|
#cmakedefine SDL_DISABLE_SSE41 1
|
||||||
|
#cmakedefine SDL_DISABLE_SSE42 1
|
||||||
#cmakedefine SDL_DISABLE_AVX 1
|
#cmakedefine SDL_DISABLE_AVX 1
|
||||||
|
#cmakedefine SDL_DISABLE_AVX2 1
|
||||||
|
#cmakedefine SDL_DISABLE_AVX512F 1
|
||||||
#cmakedefine SDL_DISABLE_MMX 1
|
#cmakedefine SDL_DISABLE_MMX 1
|
||||||
#cmakedefine SDL_DISABLE_LSX 1
|
#cmakedefine SDL_DISABLE_LSX 1
|
||||||
#cmakedefine SDL_DISABLE_LASX 1
|
#cmakedefine SDL_DISABLE_LASX 1
|
||||||
|
|
|
@ -287,7 +287,7 @@ SDL_TARGETING("avx") static void kernel_floats_add_avx(float *dest, const float
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if SDL_AVX_INTRINSICS
|
#if SDL_AVX2_INTRINSICS
|
||||||
SDL_TARGETING("avx2") static void kernel_ints_add_avx2(Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size) {
|
SDL_TARGETING("avx2") static void kernel_ints_add_avx2(Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size) {
|
||||||
for (; size >= 8; size -= 8, dest += 8, a += 8, b += 8) {
|
for (; size >= 8; size -= 8, dest += 8, a += 8, b += 8) {
|
||||||
_mm256_storeu_si256((__m256i*)dest, _mm256_add_epi32(_mm256_loadu_si256((__m256i*)a), _mm256_loadu_si256((__m256i*)b)));
|
_mm256_storeu_si256((__m256i*)dest, _mm256_add_epi32(_mm256_loadu_si256((__m256i*)a), _mm256_loadu_si256((__m256i*)b)));
|
||||||
|
@ -591,7 +591,7 @@ static int intrinsics_testAVX2(void *arg)
|
||||||
{
|
{
|
||||||
if (SDL_HasAVX2()) {
|
if (SDL_HasAVX2()) {
|
||||||
SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has AVX2 support.");
|
SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has AVX2 support.");
|
||||||
#if SDL_AVX_INTRINSICS
|
#if SDL_AVX2_INTRINSICS
|
||||||
{
|
{
|
||||||
size_t size;
|
size_t size;
|
||||||
Sint32 *dest, *a, *b;
|
Sint32 *dest, *a, *b;
|
||||||
|
|
Loading…
Reference in New Issue