cmake: Detect AVX + allow build system to disable Intel intrinsics
parent
683411e96f
commit
4681240241
|
@ -367,6 +367,7 @@ option_string(SDL_ASSERTIONS "Enable internal sanity checks (auto/disabled/relea
|
|||
#set_option(SDL_DEPENDENCY_TRACKING "Use gcc -MMD -MT dependency tracking" ON)
|
||||
set_option(SDL_ASSEMBLY "Enable assembly routines" ${SDL_ASSEMBLY_DEFAULT})
|
||||
dep_option(SDL_SSEMATH "Allow GCC to use SSE floating point math" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||
dep_option(SDL_AVX "Use AVX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||
dep_option(SDL_SSE "Use SSE assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||
dep_option(SDL_SSE2 "Use SSE2 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||
dep_option(SDL_SSE3 "Use SSE3 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||
|
@ -711,6 +712,32 @@ if(SDL_ASSEMBLY)
|
|||
# TODO: Those all seem to be quite GCC specific - needs to be
|
||||
# reworked for better compiler support
|
||||
set(HAVE_ASSEMBLY TRUE)
|
||||
|
||||
if(SDL_AVX)
|
||||
cmake_push_check_state()
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mavx")
|
||||
check_c_source_compiles("
|
||||
#ifdef __MINGW32__
|
||||
#include <_mingw.h>
|
||||
#ifdef __MINGW64_VERSION_MAJOR
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#else
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#ifndef __AVX__
|
||||
#error Assembler CPP flag not enabled
|
||||
#endif
|
||||
int main(int argc, char **argv) { return 0; }" CPU_SUPPORTS_AVX)
|
||||
cmake_pop_check_state()
|
||||
if(CPU_SUPPORTS_AVX)
|
||||
set(HAVE_AVX TRUE)
|
||||
target_compile_options(sdl-build-options INTERFACE "-mavx")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(SDL_MMX)
|
||||
cmake_push_check_state()
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mmmx")
|
||||
|
@ -728,9 +755,10 @@ if(SDL_ASSEMBLY)
|
|||
#ifndef __MMX__
|
||||
#error Assembler CPP flag not enabled
|
||||
#endif
|
||||
int main(int argc, char **argv) { return 0; }" HAVE_MMX)
|
||||
int main(int argc, char **argv) { return 0; }" CPU_SUPPORTS_MMX)
|
||||
cmake_pop_check_state()
|
||||
if(HAVE_MMX)
|
||||
if(CPU_SUPPORTS_MMX)
|
||||
set(HAVE_MMX TRUE)
|
||||
target_compile_options(sdl-build-options INTERFACE "-mmmx")
|
||||
endif()
|
||||
endif()
|
||||
|
@ -823,8 +851,6 @@ if(SDL_ASSEMBLY)
|
|||
set(HAVE_SSEMATH TRUE)
|
||||
endif()
|
||||
|
||||
check_include_file("immintrin.h" HAVE_IMMINTRIN_H)
|
||||
|
||||
if(SDL_ALTIVEC)
|
||||
cmake_push_check_state()
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -maltivec")
|
||||
|
@ -954,8 +980,44 @@ if(SDL_ASSEMBLY)
|
|||
if(SDL_SSE3)
|
||||
set(HAVE_SSE3 TRUE)
|
||||
endif()
|
||||
check_include_file("immintrin.h" HAVE_IMMINTRIN_H)
|
||||
if(SDL_AVX)
|
||||
cmake_push_check_state()
|
||||
# FIXME: should be CMAKE_REQUIRED_LINK_OPTIONS for CMake 3.14+
|
||||
list(APPEND CMAKE_REQUIRED_LIBRARIES "/ARCH:AVX")
|
||||
check_c_source_compiles("
|
||||
#include <immintrin.h>
|
||||
#ifndef __AVX__
|
||||
#error Assembler CPP flag not enabled
|
||||
#endif
|
||||
int main(int argc, char **argv) { return 0; }" CPU_SUPPORTS_AVX)
|
||||
cmake_pop_check_state()
|
||||
if(CPU_SUPPORTS_AVX)
|
||||
# FIXME: should be target_link_options for CMake 3.13+
|
||||
target_link_libraries(sdl-build-options INTERFACE "/ARCH:AVX")
|
||||
set(HAVE_AVX TRUE)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT HAVE_AVX)
|
||||
set(SDL_DISABLE_AVX 1)
|
||||
endif()
|
||||
|
||||
if(NOT HAVE_MMX)
|
||||
set(SDL_DISABLE_MMX 1)
|
||||
endif()
|
||||
|
||||
if(NOT HAVE_SSE)
|
||||
set(SDL_DISABLE_SSE 1)
|
||||
endif()
|
||||
|
||||
if(NOT HAVE_SSE2)
|
||||
set(SDL_DISABLE_SSE2 1)
|
||||
endif()
|
||||
|
||||
if(NOT HAVE_SSE3)
|
||||
set(SDL_DISABLE_SSE3 1)
|
||||
endif()
|
||||
|
||||
# TODO: Can't deactivate on FreeBSD? w/o LIBC, SDL_stdinc.h can't define
|
||||
|
|
|
@ -101,21 +101,20 @@ _m_prefetch(void *__P)
|
|||
#include <lasxintrin.h>
|
||||
#define __LASX__
|
||||
#endif
|
||||
#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
|
||||
#if defined(__AVX__) && !defined(SDL_DISABLE_AVX)
|
||||
#include <immintrin.h>
|
||||
#else
|
||||
#if defined(__MMX__) && !defined(SDL_DISABLE_MMINTRIN_H)
|
||||
#endif
|
||||
#if defined(__MMX__) && !defined(SDL_DISABLE_MMX)
|
||||
#include <mmintrin.h>
|
||||
#endif
|
||||
#if defined(__SSE__) && !defined(SDL_DISABLE_XMMINTRIN_H)
|
||||
#if defined(__SSE__) && !defined(SDL_DISABLE_SSE)
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
#if defined(__SSE2__) && !defined(SDL_DISABLE_EMMINTRIN_H)
|
||||
#if defined(__SSE2__) && !defined(SDL_DISABLE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#if defined(__SSE3__) && !defined(SDL_DISABLE_PMMINTRIN_H)
|
||||
#if defined(__SSE3__) && !defined(SDL_DISABLE_SSE3)
|
||||
#include <pmmintrin.h>
|
||||
#endif
|
||||
#endif /* HAVE_IMMINTRIN_H */
|
||||
|
||||
#endif /* SDL_intrin_h_ */
|
||||
|
|
|
@ -226,18 +226,6 @@
|
|||
#cmakedefine HAVE_LIBUSB 1
|
||||
#cmakedefine HAVE_O_CLOEXEC 1
|
||||
|
||||
/* Apple platforms might be building universal binaries, where Intel builds
|
||||
can use immintrin.h but other architectures can't. */
|
||||
#ifdef __APPLE__
|
||||
# if defined(__has_include) && (defined(__i386__) || defined(__x86_64))
|
||||
# if __has_include(<immintrin.h>)
|
||||
# define HAVE_IMMINTRIN_H 1
|
||||
# endif
|
||||
# endif
|
||||
#else /* non-Apple platforms can use the normal CMake check for this. */
|
||||
#cmakedefine HAVE_IMMINTRIN_H 1
|
||||
#endif
|
||||
|
||||
#cmakedefine HAVE_LIBUDEV_H 1
|
||||
#cmakedefine HAVE_LIBSAMPLERATE_H 1
|
||||
#cmakedefine HAVE_LIBDECOR_H 1
|
||||
|
@ -597,4 +585,11 @@ typedef unsigned int uintptr_t;
|
|||
#endif /* Visual Studio 2008 */
|
||||
#endif /* !_STDINT_H_ && !HAVE_STDINT_H */
|
||||
|
||||
/* Configure use of intrinsics */
|
||||
|
||||
#cmakedefine SDL_DISABLE_SSE 1
|
||||
#cmakedefine SDL_DISABLE_SSE2 1
|
||||
#cmakedefine SDL_DISABLE_SSE3 1
|
||||
#cmakedefine SDL_DISABLE_AVX 1
|
||||
|
||||
#endif /* SDL_build_config_h_ */
|
||||
|
|
|
@ -138,8 +138,8 @@
|
|||
#define HAVE_SYSCTLBYNAME 1
|
||||
|
||||
#if defined(__has_include) && (defined(__i386__) || defined(__x86_64))
|
||||
# if __has_include(<immintrin.h>)
|
||||
# define HAVE_IMMINTRIN_H 1
|
||||
# if !__has_include(<immintrin.h>)
|
||||
# define SDL_DISABLE_AVX 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -103,11 +103,12 @@ typedef unsigned int uintptr_t;
|
|||
#define HAVE_TPCSHRD_H 1
|
||||
#define HAVE_SENSORSAPI_H 1
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600)
|
||||
#define HAVE_IMMINTRIN_H 1
|
||||
#elif defined(__has_include) && (defined(__i386__) || defined(__x86_64))
|
||||
# if __has_include(<immintrin.h>)
|
||||
# define HAVE_IMMINTRIN_H 1
|
||||
# if !__has_include(<immintrin.h>)
|
||||
# define SDL_DISABLE_AVX 1
|
||||
# endif
|
||||
#else
|
||||
# define SDL_DISABLE_AVX 1
|
||||
#endif
|
||||
|
||||
/* This is disabled by default to avoid C runtime dependencies and manifest requirements */
|
||||
|
|
|
@ -48,11 +48,12 @@
|
|||
#define HAVE_TPCSHRD_H 1
|
||||
#define HAVE_SENSORSAPI_H 1
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600)
|
||||
#define HAVE_IMMINTRIN_H 1
|
||||
#elif defined(__has_include) && (defined(__i386__) || defined(__x86_64))
|
||||
# if __has_include(<immintrin.h>)
|
||||
# define HAVE_IMMINTRIN_H 1
|
||||
# if !__has_include(<immintrin.h>)
|
||||
# define SDL_DISABLE_AVX 1
|
||||
# endif
|
||||
#else
|
||||
# define SDL_DISABLE_AVX 1
|
||||
#endif
|
||||
|
||||
/* This is disabled by default to avoid C runtime dependencies and manifest requirements */
|
||||
|
|
|
@ -48,11 +48,12 @@
|
|||
/*#define HAVE_TPCSHRD_H 1*/
|
||||
/*#define HAVE_SENSORSAPI_H 1*/
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600)
|
||||
#define HAVE_IMMINTRIN_H 1
|
||||
#elif defined(__has_include) && (defined(__i386__) || defined(__x86_64))
|
||||
# if __has_include(<immintrin.h>)
|
||||
# define HAVE_IMMINTRIN_H 1
|
||||
# if !__has_include(<immintrin.h>)
|
||||
# define SDL_DISABLE_AVX 1
|
||||
# endif
|
||||
#else
|
||||
# define SDL_DISABLE_AVX 1
|
||||
#endif
|
||||
|
||||
/* This is disabled by default to avoid C runtime dependencies and manifest requirements */
|
||||
|
|
|
@ -185,6 +185,46 @@
|
|||
|
||||
#include <SDL3/SDL.h>
|
||||
#include <SDL3/SDL_intrin.h>
|
||||
|
||||
|
||||
#ifdef __ARM_NEON
|
||||
#define HAVE_NEON_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#if defined(__MMX__) && !defined(SDL_DISABLE_MMX)
|
||||
#define HAVE_MMX_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#if defined(__SSE__) && !defined(SDL_DISABLE_SSE)
|
||||
#define HAVE_SSE_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__) && !defined(SDL_DISABLE_SSE2)
|
||||
#define HAVE_SSE2_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#if defined(__SSE3__) && !defined(SDL_DISABLE_SSE3)
|
||||
#define HAVE_SSE3_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#if defined(__AVX__) && !defined(SDL_DISABLE_AVX)
|
||||
#define HAVE_AVX_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#if defined __clang__
|
||||
#if (!__has_attribute(target))
|
||||
#undef HAVE_AVX_INTRINSICS
|
||||
#endif
|
||||
#if (defined(_MSC_VER) || defined(__SCE__)) && !defined(__AVX__)
|
||||
#undef HAVE_AVX_INTRINSICS
|
||||
#endif
|
||||
#elif defined __GNUC__
|
||||
#if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)
|
||||
#undef HAVE_AVX_INTRINSICS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#define SDL_MAIN_NOIMPL /* don't drag in header-only implementation of SDL_main */
|
||||
#include <SDL3/SDL_main.h>
|
||||
|
||||
|
|
|
@ -29,35 +29,6 @@
|
|||
|
||||
#define DEBUG_AUDIOSTREAM 0
|
||||
|
||||
#ifdef __ARM_NEON
|
||||
#define HAVE_NEON_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#ifdef __SSE__
|
||||
#define HAVE_SSE_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#ifdef __SSE3__
|
||||
#define HAVE_SSE3_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
|
||||
#define HAVE_AVX_INTRINSICS 1
|
||||
#endif
|
||||
#if defined __clang__
|
||||
#if (!__has_attribute(target))
|
||||
#undef HAVE_AVX_INTRINSICS
|
||||
#endif
|
||||
#if (defined(_MSC_VER) || defined(__SCE__)) && !defined(__AVX__)
|
||||
#undef HAVE_AVX_INTRINSICS
|
||||
#endif
|
||||
#elif defined __GNUC__
|
||||
#if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)
|
||||
#undef HAVE_AVX_INTRINSICS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Initialize an SDL_AudioCVT structure for conversion.
|
||||
*
|
||||
|
|
|
@ -27,10 +27,6 @@
|
|||
#define HAVE_NEON_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#ifdef __SSE2__
|
||||
#define HAVE_SSE2_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
|
||||
#define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* x86_64 guarantees SSE2. */
|
||||
#elif __MACOS__ && HAVE_SSE2_INTRINSICS
|
||||
|
|
|
@ -166,7 +166,7 @@ static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info)
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef __MMX__
|
||||
#if HAVE_MMX_INTRINSICS
|
||||
|
||||
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
|
||||
static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info)
|
||||
|
@ -409,7 +409,7 @@ static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info)
|
|||
_mm_empty();
|
||||
}
|
||||
|
||||
#endif /* __MMX__ */
|
||||
#endif /* HAVE_MMX_INTRINSICS */
|
||||
|
||||
#if SDL_ARM_SIMD_BLITTERS
|
||||
void BlitARGBto565PixelAlphaARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
|
||||
|
@ -750,7 +750,7 @@ static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask)
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef __MMX__
|
||||
#if HAVE_MMX_INTRINSICS
|
||||
|
||||
/* fast RGB565->RGB565 blending with surface alpha */
|
||||
static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info)
|
||||
|
@ -1025,7 +1025,7 @@ static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info)
|
|||
}
|
||||
}
|
||||
|
||||
#endif /* __MMX__ */
|
||||
#endif /* HAVE_MMX_INTRINSICS */
|
||||
|
||||
/* fast RGB565->RGB565 blending with surface alpha */
|
||||
static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info)
|
||||
|
@ -1357,15 +1357,13 @@ SDL_CalculateBlitA(SDL_Surface *surface)
|
|||
|
||||
case 4:
|
||||
if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
|
||||
#if defined(__MMX__)
|
||||
#if HAVE_MMX_INTRINSICS
|
||||
if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
|
||||
#ifdef __MMX__
|
||||
if (SDL_HasMMX()) {
|
||||
return BlitRGBtoRGBPixelAlphaMMX;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif /* __MMX__ */
|
||||
#endif /* HAVE_MMX_INTRINSICS */
|
||||
if (sf->Amask == 0xff000000) {
|
||||
#if SDL_ARM_NEON_BLITTERS
|
||||
if (SDL_HasNEON()) {
|
||||
|
@ -1407,7 +1405,7 @@ SDL_CalculateBlitA(SDL_Surface *surface)
|
|||
case 2:
|
||||
if (surface->map->identity) {
|
||||
if (df->Gmask == 0x7e0) {
|
||||
#ifdef __MMX__
|
||||
#if HAVE_MMX_INTRINSICS
|
||||
if (SDL_HasMMX()) {
|
||||
return Blit565to565SurfaceAlphaMMX;
|
||||
} else
|
||||
|
@ -1416,7 +1414,7 @@ SDL_CalculateBlitA(SDL_Surface *surface)
|
|||
return Blit565to565SurfaceAlpha;
|
||||
}
|
||||
} else if (df->Gmask == 0x3e0) {
|
||||
#ifdef __MMX__
|
||||
#if HAVE_MMX_INTRINSICS
|
||||
if (SDL_HasMMX()) {
|
||||
return Blit555to555SurfaceAlphaMMX;
|
||||
} else
|
||||
|
@ -1430,7 +1428,7 @@ SDL_CalculateBlitA(SDL_Surface *surface)
|
|||
|
||||
case 4:
|
||||
if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
|
||||
#ifdef __MMX__
|
||||
#if HAVE_MMX_INTRINSICS
|
||||
if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && SDL_HasMMX()) {
|
||||
return BlitRGBtoRGBSurfaceAlphaMMX;
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#include "SDL_blit.h"
|
||||
#include "SDL_blit_copy.h"
|
||||
|
||||
#ifdef __SSE__
|
||||
#if HAVE_SSE_INTRINSICS
|
||||
/* This assumes 16-byte aligned src and dst */
|
||||
static SDL_INLINE void SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
|
||||
{
|
||||
|
@ -48,9 +48,9 @@ static SDL_INLINE void SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
|
|||
SDL_memcpy(dst, src, len & 63);
|
||||
}
|
||||
}
|
||||
#endif /* __SSE__ */
|
||||
#endif /* HAVE_SSE_INTRINSICS */
|
||||
|
||||
#ifdef __MMX__
|
||||
#if HAVE_MMX_INTRINSICS
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4799)
|
||||
#endif
|
||||
|
@ -81,7 +81,7 @@ static SDL_INLINE void SDL_memcpyMMX(Uint8 *dst, const Uint8 *src, int len)
|
|||
SDL_memcpy(dst + skip, src + skip, remain);
|
||||
}
|
||||
}
|
||||
#endif /* __MMX__ */
|
||||
#endif /* HAVE_MMX_INTRINSICS */
|
||||
|
||||
void SDL_BlitCopy(SDL_BlitInfo *info)
|
||||
{
|
||||
|
@ -122,7 +122,7 @@ void SDL_BlitCopy(SDL_BlitInfo *info)
|
|||
return;
|
||||
}
|
||||
|
||||
#ifdef __SSE__
|
||||
#if HAVE_SSE_INTRINSICS
|
||||
if (SDL_HasSSE() &&
|
||||
!((uintptr_t)src & 15) && !(srcskip & 15) &&
|
||||
!((uintptr_t)dst & 15) && !(dstskip & 15)) {
|
||||
|
@ -135,7 +135,7 @@ void SDL_BlitCopy(SDL_BlitInfo *info)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef __MMX__
|
||||
#if HAVE_MMX_INTRINSICS
|
||||
if (SDL_HasMMX() && !(srcskip & 7) && !(dstskip & 7)) {
|
||||
while (h--) {
|
||||
SDL_memcpyMMX(dst, src, w);
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
|
||||
#include "SDL_blit.h"
|
||||
|
||||
#ifdef __SSE__
|
||||
#if HAVE_SSE_INTRINSICS
|
||||
/* *INDENT-OFF* */ /* clang-format off */
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
|
@ -376,7 +376,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
|
|||
{
|
||||
color |= (color << 8);
|
||||
color |= (color << 16);
|
||||
#ifdef __SSE__
|
||||
#if HAVE_SSE_INTRINSICS
|
||||
if (SDL_HasSSE()) {
|
||||
fill_function = SDL_FillSurfaceRect1SSE;
|
||||
break;
|
||||
|
@ -389,7 +389,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
|
|||
case 2:
|
||||
{
|
||||
color |= (color << 16);
|
||||
#ifdef __SSE__
|
||||
#if HAVE_SSE_INTRINSICS
|
||||
if (SDL_HasSSE()) {
|
||||
fill_function = SDL_FillSurfaceRect2SSE;
|
||||
break;
|
||||
|
@ -408,7 +408,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
|
|||
|
||||
case 4:
|
||||
{
|
||||
#ifdef __SSE__
|
||||
#if HAVE_SSE_INTRINSICS
|
||||
if (SDL_HasSSE()) {
|
||||
fill_function = SDL_FillSurfaceRect4SSE;
|
||||
break;
|
||||
|
|
|
@ -332,10 +332,6 @@ static int scale_mat(const Uint32 *src, int src_w, int src_h, int src_pitch,
|
|||
return 0;
|
||||
}
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#define HAVE_SSE2_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_NEON)
|
||||
#define HAVE_NEON_INTRINSICS 1
|
||||
#define CAST_uint8x8_t (uint8x8_t)
|
||||
|
|
|
@ -310,7 +310,7 @@ static SDL_bool yuv_rgb_sse(
|
|||
Uint8 *rgb, Uint32 rgb_stride,
|
||||
YCbCrType yuv_type)
|
||||
{
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (!SDL_HasSSE2()) {
|
||||
return SDL_FALSE;
|
||||
}
|
||||
|
@ -1114,7 +1114,7 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi
|
|||
const Uint8 *src1, *src2;
|
||||
Uint8 *dstUV;
|
||||
Uint8 *tmp = NULL;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
|
@ -1144,7 +1144,7 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi
|
|||
y = UVheight;
|
||||
while (y--) {
|
||||
x = UVwidth;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
while (x >= 16) {
|
||||
__m128i u = _mm_loadu_si128((__m128i *)src1);
|
||||
|
@ -1187,7 +1187,7 @@ static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const vo
|
|||
const Uint8 *srcUV;
|
||||
Uint8 *dst1, *dst2;
|
||||
Uint8 *tmp = NULL;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
|
@ -1217,7 +1217,7 @@ static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const vo
|
|||
y = UVheight;
|
||||
while (y--) {
|
||||
x = UVwidth;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
__m128i mask = _mm_set1_epi16(0x00FF);
|
||||
while (x >= 16) {
|
||||
|
@ -1264,7 +1264,7 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int
|
|||
const int dstUVPitchLeft = (dstUVPitch - UVwidth * 2) / sizeof(Uint16);
|
||||
const Uint16 *srcUV;
|
||||
Uint16 *dstUV;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
|
@ -1277,7 +1277,7 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int
|
|||
y = UVheight;
|
||||
while (y--) {
|
||||
x = UVwidth;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
while (x >= 8) {
|
||||
__m128i uv = _mm_loadu_si128((__m128i *)srcUV);
|
||||
|
@ -1372,7 +1372,7 @@ static int SDL_ConvertPixels_Planar2x2_to_Planar2x2(int width, int height,
|
|||
SDL_GetPixelFormatName(dst_format));
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
#define PACKED4_TO_PACKED4_ROW_SSE2(shuffle) \
|
||||
while (x >= 4) { \
|
||||
__m128i yuv = _mm_loadu_si128((__m128i *)srcYUV); \
|
||||
|
@ -1399,14 +1399,14 @@ static int SDL_ConvertPixels_YUY2_to_UYVY(int width, int height, const void *src
|
|||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
y = height;
|
||||
while (y--) {
|
||||
x = YUVwidth;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
|
||||
}
|
||||
|
@ -1440,14 +1440,14 @@ static int SDL_ConvertPixels_YUY2_to_YVYU(int width, int height, const void *src
|
|||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
y = height;
|
||||
while (y--) {
|
||||
x = YUVwidth;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
|
||||
}
|
||||
|
@ -1481,14 +1481,14 @@ static int SDL_ConvertPixels_UYVY_to_YUY2(int width, int height, const void *src
|
|||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
y = height;
|
||||
while (y--) {
|
||||
x = YUVwidth;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
|
||||
}
|
||||
|
@ -1522,14 +1522,14 @@ static int SDL_ConvertPixels_UYVY_to_YVYU(int width, int height, const void *src
|
|||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
y = height;
|
||||
while (y--) {
|
||||
x = YUVwidth;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(0, 3, 2, 1));
|
||||
}
|
||||
|
@ -1563,14 +1563,14 @@ static int SDL_ConvertPixels_YVYU_to_YUY2(int width, int height, const void *src
|
|||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
y = height;
|
||||
while (y--) {
|
||||
x = YUVwidth;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
|
||||
}
|
||||
|
@ -1604,14 +1604,14 @@ static int SDL_ConvertPixels_YVYU_to_UYVY(int width, int height, const void *src
|
|||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
y = height;
|
||||
while (y--) {
|
||||
x = YUVwidth;
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 1, 0, 3));
|
||||
}
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
|
||||
#include "yuv_rgb.h"
|
||||
|
||||
|
||||
#define PRECISION 6
|
||||
#define PRECISION_FACTOR (1<<PRECISION)
|
||||
|
||||
|
@ -240,7 +239,7 @@ void rgb24_yuv420_std(
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
|
||||
#define SSE_FUNCTION_NAME yuv420_rgb565_sse
|
||||
#define STD_FUNCTION_NAME yuv420_rgb565_std
|
||||
|
@ -683,7 +682,7 @@ void rgb24_yuv420_sseu(uint32_t width, uint32_t height,
|
|||
}
|
||||
|
||||
|
||||
#endif //__SSE2__
|
||||
#endif //HAVE_SSE2_INTRINSICS
|
||||
|
||||
#ifdef __loongarch_sx
|
||||
|
||||
|
|
Loading…
Reference in New Issue