From 4681240241b86372a62b98d2f19d33c2971bada0 Mon Sep 17 00:00:00 2001 From: Anonymous Maarten Date: Sat, 25 Feb 2023 00:21:15 +0100 Subject: [PATCH] cmake: Detect AVX + allow build system to disable Intel intrinsics --- CMakeLists.txt | 72 +++++++++++++++++-- include/SDL3/SDL_intrin.h | 13 ++-- include/build_config/SDL_build_config.h.cmake | 19 ++--- include/build_config/SDL_build_config_macos.h | 4 +- .../build_config/SDL_build_config_windows.h | 7 +- .../build_config/SDL_build_config_wingdk.h | 7 +- include/build_config/SDL_build_config_xbox.h | 7 +- src/SDL_internal.h | 40 +++++++++++ src/audio/SDL_audiocvt.c | 29 -------- src/audio/SDL_audiotypecvt.c | 4 -- src/video/SDL_blit_A.c | 20 +++--- src/video/SDL_blit_copy.c | 12 ++-- src/video/SDL_fillrect.c | 8 +-- src/video/SDL_stretch.c | 4 -- src/video/SDL_yuv.c | 40 +++++------ src/video/yuv2rgb/yuv_rgb.c | 5 +- 16 files changed, 175 insertions(+), 116 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 056b293b5..dd6616648 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -367,6 +367,7 @@ option_string(SDL_ASSERTIONS "Enable internal sanity checks (auto/disabled/relea #set_option(SDL_DEPENDENCY_TRACKING "Use gcc -MMD -MT dependency tracking" ON) set_option(SDL_ASSEMBLY "Enable assembly routines" ${SDL_ASSEMBLY_DEFAULT}) dep_option(SDL_SSEMATH "Allow GCC to use SSE floating point math" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF) +dep_option(SDL_AVX "Use AVX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF) dep_option(SDL_SSE "Use SSE assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF) dep_option(SDL_SSE2 "Use SSE2 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF) dep_option(SDL_SSE3 "Use SSE3 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF) @@ -711,6 +712,32 @@ if(SDL_ASSEMBLY) # TODO: Those all seem to be quite GCC specific - needs to be # reworked for better compiler support set(HAVE_ASSEMBLY TRUE) + + if(SDL_AVX) + cmake_push_check_state() + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mavx") + check_c_source_compiles(" + #ifdef __MINGW32__ + #include <_mingw.h> + #ifdef __MINGW64_VERSION_MAJOR + #include + #else + #include + #endif + #else + #include + #endif + #ifndef __AVX__ + #error Assembler CPP flag not enabled + #endif + int main(int argc, char **argv) { return 0; }" CPU_SUPPORTS_AVX) + cmake_pop_check_state() + if(CPU_SUPPORTS_AVX) + set(HAVE_AVX TRUE) + target_compile_options(sdl-build-options INTERFACE "-mavx") + endif() + endif() + if(SDL_MMX) cmake_push_check_state() set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mmmx") @@ -728,9 +755,10 @@ if(SDL_ASSEMBLY) #ifndef __MMX__ #error Assembler CPP flag not enabled #endif - int main(int argc, char **argv) { return 0; }" HAVE_MMX) + int main(int argc, char **argv) { return 0; }" CPU_SUPPORTS_MMX) cmake_pop_check_state() - if(HAVE_MMX) + if(CPU_SUPPORTS_MMX) + set(HAVE_MMX TRUE) target_compile_options(sdl-build-options INTERFACE "-mmmx") endif() endif() @@ -823,8 +851,6 @@ if(SDL_ASSEMBLY) set(HAVE_SSEMATH TRUE) endif() - check_include_file("immintrin.h" HAVE_IMMINTRIN_H) - if(SDL_ALTIVEC) cmake_push_check_state() set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -maltivec") @@ -954,10 +980,46 @@ if(SDL_ASSEMBLY) if(SDL_SSE3) set(HAVE_SSE3 TRUE) endif() - check_include_file("immintrin.h" HAVE_IMMINTRIN_H) + if(SDL_AVX) + cmake_push_check_state() + # FIXME: should be CMAKE_REQUIRED_LINK_OPTIONS for CMake 3.14+ + list(APPEND CMAKE_REQUIRED_LIBRARIES "/ARCH:AVX") + check_c_source_compiles(" + #include + #ifndef __AVX__ + #error Assembler CPP flag not enabled + #endif + int main(int argc, char **argv) { return 0; }" CPU_SUPPORTS_AVX) + cmake_pop_check_state() + if(CPU_SUPPORTS_AVX) + # FIXME: should be target_link_options for CMake 3.13+ + target_link_libraries(sdl-build-options INTERFACE "/ARCH:AVX") + set(HAVE_AVX TRUE) + endif() + endif() endif() endif() +if(NOT HAVE_AVX) + set(SDL_DISABLE_AVX 1) +endif() + +if(NOT HAVE_MMX) + set(SDL_DISABLE_MMX 1) +endif() + +if(NOT HAVE_SSE) + set(SDL_DISABLE_SSE 1) +endif() + +if(NOT HAVE_SSE2) + set(SDL_DISABLE_SSE2 1) +endif() + +if(NOT HAVE_SSE3) + set(SDL_DISABLE_SSE3 1) +endif() + # TODO: Can't deactivate on FreeBSD? w/o LIBC, SDL_stdinc.h can't define # anything. if(SDL_LIBC) diff --git a/include/SDL3/SDL_intrin.h b/include/SDL3/SDL_intrin.h index 8add09bae..be9b29b0a 100644 --- a/include/SDL3/SDL_intrin.h +++ b/include/SDL3/SDL_intrin.h @@ -101,21 +101,20 @@ _m_prefetch(void *__P) #include #define __LASX__ #endif -#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H) +#if defined(__AVX__) && !defined(SDL_DISABLE_AVX) #include -#else -#if defined(__MMX__) && !defined(SDL_DISABLE_MMINTRIN_H) +#endif +#if defined(__MMX__) && !defined(SDL_DISABLE_MMX) #include #endif -#if defined(__SSE__) && !defined(SDL_DISABLE_XMMINTRIN_H) +#if defined(__SSE__) && !defined(SDL_DISABLE_SSE) #include #endif -#if defined(__SSE2__) && !defined(SDL_DISABLE_EMMINTRIN_H) +#if defined(__SSE2__) && !defined(SDL_DISABLE_SSE2) #include #endif -#if defined(__SSE3__) && !defined(SDL_DISABLE_PMMINTRIN_H) +#if defined(__SSE3__) && !defined(SDL_DISABLE_SSE3) #include #endif -#endif /* HAVE_IMMINTRIN_H */ #endif /* SDL_intrin_h_ */ diff --git a/include/build_config/SDL_build_config.h.cmake b/include/build_config/SDL_build_config.h.cmake index 8f26167b1..568e6c6a5 100644 --- a/include/build_config/SDL_build_config.h.cmake +++ b/include/build_config/SDL_build_config.h.cmake @@ -226,18 +226,6 @@ #cmakedefine HAVE_LIBUSB 1 #cmakedefine HAVE_O_CLOEXEC 1 -/* Apple platforms might be building universal binaries, where Intel builds - can use immintrin.h but other architectures can't. */ -#ifdef __APPLE__ -# if defined(__has_include) && (defined(__i386__) || defined(__x86_64)) -# if __has_include() -# define HAVE_IMMINTRIN_H 1 -# endif -# endif -#else /* non-Apple platforms can use the normal CMake check for this. */ -#cmakedefine HAVE_IMMINTRIN_H 1 -#endif - #cmakedefine HAVE_LIBUDEV_H 1 #cmakedefine HAVE_LIBSAMPLERATE_H 1 #cmakedefine HAVE_LIBDECOR_H 1 @@ -597,4 +585,11 @@ typedef unsigned int uintptr_t; #endif /* Visual Studio 2008 */ #endif /* !_STDINT_H_ && !HAVE_STDINT_H */ +/* Configure use of intrinsics */ + +#cmakedefine SDL_DISABLE_SSE 1 +#cmakedefine SDL_DISABLE_SSE2 1 +#cmakedefine SDL_DISABLE_SSE3 1 +#cmakedefine SDL_DISABLE_AVX 1 + #endif /* SDL_build_config_h_ */ diff --git a/include/build_config/SDL_build_config_macos.h b/include/build_config/SDL_build_config_macos.h index df6d679e1..139cd7228 100644 --- a/include/build_config/SDL_build_config_macos.h +++ b/include/build_config/SDL_build_config_macos.h @@ -138,8 +138,8 @@ #define HAVE_SYSCTLBYNAME 1 #if defined(__has_include) && (defined(__i386__) || defined(__x86_64)) -# if __has_include() -# define HAVE_IMMINTRIN_H 1 +# if !__has_include() +# define SDL_DISABLE_AVX 1 # endif #endif diff --git a/include/build_config/SDL_build_config_windows.h b/include/build_config/SDL_build_config_windows.h index e1d76dc6a..18cf96e91 100644 --- a/include/build_config/SDL_build_config_windows.h +++ b/include/build_config/SDL_build_config_windows.h @@ -103,11 +103,12 @@ typedef unsigned int uintptr_t; #define HAVE_TPCSHRD_H 1 #define HAVE_SENSORSAPI_H 1 #if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600) -#define HAVE_IMMINTRIN_H 1 #elif defined(__has_include) && (defined(__i386__) || defined(__x86_64)) -# if __has_include() -# define HAVE_IMMINTRIN_H 1 +# if !__has_include() +# define SDL_DISABLE_AVX 1 # endif +#else +# define SDL_DISABLE_AVX 1 #endif /* This is disabled by default to avoid C runtime dependencies and manifest requirements */ diff --git a/include/build_config/SDL_build_config_wingdk.h b/include/build_config/SDL_build_config_wingdk.h index 49fecf39f..512213b7e 100644 --- a/include/build_config/SDL_build_config_wingdk.h +++ b/include/build_config/SDL_build_config_wingdk.h @@ -48,11 +48,12 @@ #define HAVE_TPCSHRD_H 1 #define HAVE_SENSORSAPI_H 1 #if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600) -#define HAVE_IMMINTRIN_H 1 #elif defined(__has_include) && (defined(__i386__) || defined(__x86_64)) -# if __has_include() -# define HAVE_IMMINTRIN_H 1 +# if !__has_include() +# define SDL_DISABLE_AVX 1 # endif +#else +# define SDL_DISABLE_AVX 1 #endif /* This is disabled by default to avoid C runtime dependencies and manifest requirements */ diff --git a/include/build_config/SDL_build_config_xbox.h b/include/build_config/SDL_build_config_xbox.h index 0f978a0fa..0059ca7d8 100644 --- a/include/build_config/SDL_build_config_xbox.h +++ b/include/build_config/SDL_build_config_xbox.h @@ -48,11 +48,12 @@ /*#define HAVE_TPCSHRD_H 1*/ /*#define HAVE_SENSORSAPI_H 1*/ #if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600) -#define HAVE_IMMINTRIN_H 1 #elif defined(__has_include) && (defined(__i386__) || defined(__x86_64)) -# if __has_include() -# define HAVE_IMMINTRIN_H 1 +# if !__has_include() +# define SDL_DISABLE_AVX 1 # endif +#else +# define SDL_DISABLE_AVX 1 #endif /* This is disabled by default to avoid C runtime dependencies and manifest requirements */ diff --git a/src/SDL_internal.h b/src/SDL_internal.h index 511d5b752..8e1e14191 100644 --- a/src/SDL_internal.h +++ b/src/SDL_internal.h @@ -185,6 +185,46 @@ #include #include + + +#ifdef __ARM_NEON +#define HAVE_NEON_INTRINSICS 1 +#endif + +#if defined(__MMX__) && !defined(SDL_DISABLE_MMX) +#define HAVE_MMX_INTRINSICS 1 +#endif + +#if defined(__SSE__) && !defined(SDL_DISABLE_SSE) +#define HAVE_SSE_INTRINSICS 1 +#endif + +#if defined(__SSE2__) && !defined(SDL_DISABLE_SSE2) +#define HAVE_SSE2_INTRINSICS 1 +#endif + +#if defined(__SSE3__) && !defined(SDL_DISABLE_SSE3) +#define HAVE_SSE3_INTRINSICS 1 +#endif + +#if defined(__AVX__) && !defined(SDL_DISABLE_AVX) +#define HAVE_AVX_INTRINSICS 1 +#endif + +#if defined __clang__ +#if (!__has_attribute(target)) +#undef HAVE_AVX_INTRINSICS +#endif +#if (defined(_MSC_VER) || defined(__SCE__)) && !defined(__AVX__) +#undef HAVE_AVX_INTRINSICS +#endif +#elif defined __GNUC__ +#if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9) +#undef HAVE_AVX_INTRINSICS +#endif +#endif + + #define SDL_MAIN_NOIMPL /* don't drag in header-only implementation of SDL_main */ #include diff --git a/src/audio/SDL_audiocvt.c b/src/audio/SDL_audiocvt.c index 904721bb0..b597e4cf3 100644 --- a/src/audio/SDL_audiocvt.c +++ b/src/audio/SDL_audiocvt.c @@ -29,35 +29,6 @@ #define DEBUG_AUDIOSTREAM 0 -#ifdef __ARM_NEON -#define HAVE_NEON_INTRINSICS 1 -#endif - -#ifdef __SSE__ -#define HAVE_SSE_INTRINSICS 1 -#endif - -#ifdef __SSE3__ -#define HAVE_SSE3_INTRINSICS 1 -#endif - -#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H) -#define HAVE_AVX_INTRINSICS 1 -#endif -#if defined __clang__ -#if (!__has_attribute(target)) -#undef HAVE_AVX_INTRINSICS -#endif -#if (defined(_MSC_VER) || defined(__SCE__)) && !defined(__AVX__) -#undef HAVE_AVX_INTRINSICS -#endif -#elif defined __GNUC__ -#if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9) -#undef HAVE_AVX_INTRINSICS -#endif -#endif - - /** * Initialize an SDL_AudioCVT structure for conversion. * diff --git a/src/audio/SDL_audiotypecvt.c b/src/audio/SDL_audiotypecvt.c index 6a99038fc..22595ec41 100644 --- a/src/audio/SDL_audiotypecvt.c +++ b/src/audio/SDL_audiotypecvt.c @@ -27,10 +27,6 @@ #define HAVE_NEON_INTRINSICS 1 #endif -#ifdef __SSE2__ -#define HAVE_SSE2_INTRINSICS 1 -#endif - #if defined(__x86_64__) && HAVE_SSE2_INTRINSICS #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* x86_64 guarantees SSE2. */ #elif __MACOS__ && HAVE_SSE2_INTRINSICS diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c index 4d3c8bef4..b0e20743a 100644 --- a/src/video/SDL_blit_A.c +++ b/src/video/SDL_blit_A.c @@ -166,7 +166,7 @@ static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info) } } -#ifdef __MMX__ +#if HAVE_MMX_INTRINSICS /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) @@ -409,7 +409,7 @@ static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) _mm_empty(); } -#endif /* __MMX__ */ +#endif /* HAVE_MMX_INTRINSICS */ #if SDL_ARM_SIMD_BLITTERS void BlitARGBto565PixelAlphaARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride); @@ -750,7 +750,7 @@ static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask) } } -#ifdef __MMX__ +#if HAVE_MMX_INTRINSICS /* fast RGB565->RGB565 blending with surface alpha */ static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info) @@ -1025,7 +1025,7 @@ static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info) } } -#endif /* __MMX__ */ +#endif /* HAVE_MMX_INTRINSICS */ /* fast RGB565->RGB565 blending with surface alpha */ static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info) @@ -1357,15 +1357,13 @@ SDL_CalculateBlitA(SDL_Surface *surface) case 4: if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { -#if defined(__MMX__) +#if HAVE_MMX_INTRINSICS if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && sf->Ashift % 8 == 0 && sf->Aloss == 0) { -#ifdef __MMX__ if (SDL_HasMMX()) { return BlitRGBtoRGBPixelAlphaMMX; } -#endif } -#endif /* __MMX__ */ +#endif /* HAVE_MMX_INTRINSICS */ if (sf->Amask == 0xff000000) { #if SDL_ARM_NEON_BLITTERS if (SDL_HasNEON()) { @@ -1407,7 +1405,7 @@ SDL_CalculateBlitA(SDL_Surface *surface) case 2: if (surface->map->identity) { if (df->Gmask == 0x7e0) { -#ifdef __MMX__ +#if HAVE_MMX_INTRINSICS if (SDL_HasMMX()) { return Blit565to565SurfaceAlphaMMX; } else @@ -1416,7 +1414,7 @@ SDL_CalculateBlitA(SDL_Surface *surface) return Blit565to565SurfaceAlpha; } } else if (df->Gmask == 0x3e0) { -#ifdef __MMX__ +#if HAVE_MMX_INTRINSICS if (SDL_HasMMX()) { return Blit555to555SurfaceAlphaMMX; } else @@ -1430,7 +1428,7 @@ SDL_CalculateBlitA(SDL_Surface *surface) case 4: if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { -#ifdef __MMX__ +#if HAVE_MMX_INTRINSICS if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && SDL_HasMMX()) { return BlitRGBtoRGBSurfaceAlphaMMX; } diff --git a/src/video/SDL_blit_copy.c b/src/video/SDL_blit_copy.c index 79e65b153..45536b4b2 100644 --- a/src/video/SDL_blit_copy.c +++ b/src/video/SDL_blit_copy.c @@ -23,7 +23,7 @@ #include "SDL_blit.h" #include "SDL_blit_copy.h" -#ifdef __SSE__ +#if HAVE_SSE_INTRINSICS /* This assumes 16-byte aligned src and dst */ static SDL_INLINE void SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len) { @@ -48,9 +48,9 @@ static SDL_INLINE void SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len) SDL_memcpy(dst, src, len & 63); } } -#endif /* __SSE__ */ +#endif /* HAVE_SSE_INTRINSICS */ -#ifdef __MMX__ +#if HAVE_MMX_INTRINSICS #ifdef _MSC_VER #pragma warning(disable : 4799) #endif @@ -81,7 +81,7 @@ static SDL_INLINE void SDL_memcpyMMX(Uint8 *dst, const Uint8 *src, int len) SDL_memcpy(dst + skip, src + skip, remain); } } -#endif /* __MMX__ */ +#endif /* HAVE_MMX_INTRINSICS */ void SDL_BlitCopy(SDL_BlitInfo *info) { @@ -122,7 +122,7 @@ void SDL_BlitCopy(SDL_BlitInfo *info) return; } -#ifdef __SSE__ +#if HAVE_SSE_INTRINSICS if (SDL_HasSSE() && !((uintptr_t)src & 15) && !(srcskip & 15) && !((uintptr_t)dst & 15) && !(dstskip & 15)) { @@ -135,7 +135,7 @@ void SDL_BlitCopy(SDL_BlitInfo *info) } #endif -#ifdef __MMX__ +#if HAVE_MMX_INTRINSICS if (SDL_HasMMX() && !(srcskip & 7) && !(dstskip & 7)) { while (h--) { SDL_memcpyMMX(dst, src, w); diff --git a/src/video/SDL_fillrect.c b/src/video/SDL_fillrect.c index 20a0fccdb..595cf7c32 100644 --- a/src/video/SDL_fillrect.c +++ b/src/video/SDL_fillrect.c @@ -22,7 +22,7 @@ #include "SDL_blit.h" -#ifdef __SSE__ +#if HAVE_SSE_INTRINSICS /* *INDENT-OFF* */ /* clang-format off */ #if defined(_MSC_VER) && !defined(__clang__) @@ -376,7 +376,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count, { color |= (color << 8); color |= (color << 16); -#ifdef __SSE__ +#if HAVE_SSE_INTRINSICS if (SDL_HasSSE()) { fill_function = SDL_FillSurfaceRect1SSE; break; @@ -389,7 +389,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count, case 2: { color |= (color << 16); -#ifdef __SSE__ +#if HAVE_SSE_INTRINSICS if (SDL_HasSSE()) { fill_function = SDL_FillSurfaceRect2SSE; break; @@ -408,7 +408,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count, case 4: { -#ifdef __SSE__ +#if HAVE_SSE_INTRINSICS if (SDL_HasSSE()) { fill_function = SDL_FillSurfaceRect4SSE; break; diff --git a/src/video/SDL_stretch.c b/src/video/SDL_stretch.c index 05cf10262..ce3eae9a1 100644 --- a/src/video/SDL_stretch.c +++ b/src/video/SDL_stretch.c @@ -332,10 +332,6 @@ static int scale_mat(const Uint32 *src, int src_w, int src_h, int src_pitch, return 0; } -#if defined(__SSE2__) -#define HAVE_SSE2_INTRINSICS 1 -#endif - #if defined(__ARM_NEON) #define HAVE_NEON_INTRINSICS 1 #define CAST_uint8x8_t (uint8x8_t) diff --git a/src/video/SDL_yuv.c b/src/video/SDL_yuv.c index 2df6f64e4..acf2bae05 100644 --- a/src/video/SDL_yuv.c +++ b/src/video/SDL_yuv.c @@ -310,7 +310,7 @@ static SDL_bool yuv_rgb_sse( Uint8 *rgb, Uint32 rgb_stride, YCbCrType yuv_type) { -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS if (!SDL_HasSSE2()) { return SDL_FALSE; } @@ -1114,7 +1114,7 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi const Uint8 *src1, *src2; Uint8 *dstUV; Uint8 *tmp = NULL; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS const SDL_bool use_SSE2 = SDL_HasSSE2(); #endif @@ -1144,7 +1144,7 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi y = UVheight; while (y--) { x = UVwidth; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS if (use_SSE2) { while (x >= 16) { __m128i u = _mm_loadu_si128((__m128i *)src1); @@ -1187,7 +1187,7 @@ static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const vo const Uint8 *srcUV; Uint8 *dst1, *dst2; Uint8 *tmp = NULL; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS const SDL_bool use_SSE2 = SDL_HasSSE2(); #endif @@ -1217,7 +1217,7 @@ static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const vo y = UVheight; while (y--) { x = UVwidth; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS if (use_SSE2) { __m128i mask = _mm_set1_epi16(0x00FF); while (x >= 16) { @@ -1264,7 +1264,7 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int const int dstUVPitchLeft = (dstUVPitch - UVwidth * 2) / sizeof(Uint16); const Uint16 *srcUV; Uint16 *dstUV; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS const SDL_bool use_SSE2 = SDL_HasSSE2(); #endif @@ -1277,7 +1277,7 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int y = UVheight; while (y--) { x = UVwidth; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS if (use_SSE2) { while (x >= 8) { __m128i uv = _mm_loadu_si128((__m128i *)srcUV); @@ -1372,7 +1372,7 @@ static int SDL_ConvertPixels_Planar2x2_to_Planar2x2(int width, int height, SDL_GetPixelFormatName(dst_format)); } -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS #define PACKED4_TO_PACKED4_ROW_SSE2(shuffle) \ while (x >= 4) { \ __m128i yuv = _mm_loadu_si128((__m128i *)srcYUV); \ @@ -1399,14 +1399,14 @@ static int SDL_ConvertPixels_YUY2_to_UYVY(int width, int height, const void *src const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4); const Uint8 *srcYUV = (const Uint8 *)src; Uint8 *dstYUV = (Uint8 *)dst; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS const SDL_bool use_SSE2 = SDL_HasSSE2(); #endif y = height; while (y--) { x = YUVwidth; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS if (use_SSE2) { PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1)); } @@ -1440,14 +1440,14 @@ static int SDL_ConvertPixels_YUY2_to_YVYU(int width, int height, const void *src const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4); const Uint8 *srcYUV = (const Uint8 *)src; Uint8 *dstYUV = (Uint8 *)dst; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS const SDL_bool use_SSE2 = SDL_HasSSE2(); #endif y = height; while (y--) { x = YUVwidth; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS if (use_SSE2) { PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0)); } @@ -1481,14 +1481,14 @@ static int SDL_ConvertPixels_UYVY_to_YUY2(int width, int height, const void *src const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4); const Uint8 *srcYUV = (const Uint8 *)src; Uint8 *dstYUV = (Uint8 *)dst; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS const SDL_bool use_SSE2 = SDL_HasSSE2(); #endif y = height; while (y--) { x = YUVwidth; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS if (use_SSE2) { PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1)); } @@ -1522,14 +1522,14 @@ static int SDL_ConvertPixels_UYVY_to_YVYU(int width, int height, const void *src const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4); const Uint8 *srcYUV = (const Uint8 *)src; Uint8 *dstYUV = (Uint8 *)dst; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS const SDL_bool use_SSE2 = SDL_HasSSE2(); #endif y = height; while (y--) { x = YUVwidth; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS if (use_SSE2) { PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(0, 3, 2, 1)); } @@ -1563,14 +1563,14 @@ static int SDL_ConvertPixels_YVYU_to_YUY2(int width, int height, const void *src const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4); const Uint8 *srcYUV = (const Uint8 *)src; Uint8 *dstYUV = (Uint8 *)dst; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS const SDL_bool use_SSE2 = SDL_HasSSE2(); #endif y = height; while (y--) { x = YUVwidth; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS if (use_SSE2) { PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0)); } @@ -1604,14 +1604,14 @@ static int SDL_ConvertPixels_YVYU_to_UYVY(int width, int height, const void *src const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4); const Uint8 *srcYUV = (const Uint8 *)src; Uint8 *dstYUV = (Uint8 *)dst; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS const SDL_bool use_SSE2 = SDL_HasSSE2(); #endif y = height; while (y--) { x = YUVwidth; -#ifdef __SSE2__ +#if HAVE_SSE2_INTRINSICS if (use_SSE2) { PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 1, 0, 3)); } diff --git a/src/video/yuv2rgb/yuv_rgb.c b/src/video/yuv2rgb/yuv_rgb.c index 0ec04f59c..68139bdbc 100644 --- a/src/video/yuv2rgb/yuv_rgb.c +++ b/src/video/yuv2rgb/yuv_rgb.c @@ -6,7 +6,6 @@ #include "yuv_rgb.h" - #define PRECISION 6 #define PRECISION_FACTOR (1<