cmake: Detect AVX + allow build system to disable Intel intrinsics
parent
683411e96f
commit
4681240241
|
@ -367,6 +367,7 @@ option_string(SDL_ASSERTIONS "Enable internal sanity checks (auto/disabled/relea
|
||||||
#set_option(SDL_DEPENDENCY_TRACKING "Use gcc -MMD -MT dependency tracking" ON)
|
#set_option(SDL_DEPENDENCY_TRACKING "Use gcc -MMD -MT dependency tracking" ON)
|
||||||
set_option(SDL_ASSEMBLY "Enable assembly routines" ${SDL_ASSEMBLY_DEFAULT})
|
set_option(SDL_ASSEMBLY "Enable assembly routines" ${SDL_ASSEMBLY_DEFAULT})
|
||||||
dep_option(SDL_SSEMATH "Allow GCC to use SSE floating point math" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
dep_option(SDL_SSEMATH "Allow GCC to use SSE floating point math" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||||
|
dep_option(SDL_AVX "Use AVX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||||
dep_option(SDL_SSE "Use SSE assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
dep_option(SDL_SSE "Use SSE assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||||
dep_option(SDL_SSE2 "Use SSE2 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
dep_option(SDL_SSE2 "Use SSE2 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||||
dep_option(SDL_SSE3 "Use SSE3 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
dep_option(SDL_SSE3 "Use SSE3 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
|
||||||
|
@ -711,6 +712,32 @@ if(SDL_ASSEMBLY)
|
||||||
# TODO: Those all seem to be quite GCC specific - needs to be
|
# TODO: Those all seem to be quite GCC specific - needs to be
|
||||||
# reworked for better compiler support
|
# reworked for better compiler support
|
||||||
set(HAVE_ASSEMBLY TRUE)
|
set(HAVE_ASSEMBLY TRUE)
|
||||||
|
|
||||||
|
if(SDL_AVX)
|
||||||
|
cmake_push_check_state()
|
||||||
|
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mavx")
|
||||||
|
check_c_source_compiles("
|
||||||
|
#ifdef __MINGW32__
|
||||||
|
#include <_mingw.h>
|
||||||
|
#ifdef __MINGW64_VERSION_MAJOR
|
||||||
|
#include <intrin.h>
|
||||||
|
#else
|
||||||
|
#include <immintrin.h>
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#include <immintrin.h>
|
||||||
|
#endif
|
||||||
|
#ifndef __AVX__
|
||||||
|
#error Assembler CPP flag not enabled
|
||||||
|
#endif
|
||||||
|
int main(int argc, char **argv) { return 0; }" CPU_SUPPORTS_AVX)
|
||||||
|
cmake_pop_check_state()
|
||||||
|
if(CPU_SUPPORTS_AVX)
|
||||||
|
set(HAVE_AVX TRUE)
|
||||||
|
target_compile_options(sdl-build-options INTERFACE "-mavx")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
if(SDL_MMX)
|
if(SDL_MMX)
|
||||||
cmake_push_check_state()
|
cmake_push_check_state()
|
||||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mmmx")
|
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mmmx")
|
||||||
|
@ -728,9 +755,10 @@ if(SDL_ASSEMBLY)
|
||||||
#ifndef __MMX__
|
#ifndef __MMX__
|
||||||
#error Assembler CPP flag not enabled
|
#error Assembler CPP flag not enabled
|
||||||
#endif
|
#endif
|
||||||
int main(int argc, char **argv) { return 0; }" HAVE_MMX)
|
int main(int argc, char **argv) { return 0; }" CPU_SUPPORTS_MMX)
|
||||||
cmake_pop_check_state()
|
cmake_pop_check_state()
|
||||||
if(HAVE_MMX)
|
if(CPU_SUPPORTS_MMX)
|
||||||
|
set(HAVE_MMX TRUE)
|
||||||
target_compile_options(sdl-build-options INTERFACE "-mmmx")
|
target_compile_options(sdl-build-options INTERFACE "-mmmx")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
@ -823,8 +851,6 @@ if(SDL_ASSEMBLY)
|
||||||
set(HAVE_SSEMATH TRUE)
|
set(HAVE_SSEMATH TRUE)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
check_include_file("immintrin.h" HAVE_IMMINTRIN_H)
|
|
||||||
|
|
||||||
if(SDL_ALTIVEC)
|
if(SDL_ALTIVEC)
|
||||||
cmake_push_check_state()
|
cmake_push_check_state()
|
||||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -maltivec")
|
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -maltivec")
|
||||||
|
@ -954,8 +980,44 @@ if(SDL_ASSEMBLY)
|
||||||
if(SDL_SSE3)
|
if(SDL_SSE3)
|
||||||
set(HAVE_SSE3 TRUE)
|
set(HAVE_SSE3 TRUE)
|
||||||
endif()
|
endif()
|
||||||
check_include_file("immintrin.h" HAVE_IMMINTRIN_H)
|
if(SDL_AVX)
|
||||||
|
cmake_push_check_state()
|
||||||
|
# FIXME: should be CMAKE_REQUIRED_LINK_OPTIONS for CMake 3.14+
|
||||||
|
list(APPEND CMAKE_REQUIRED_LIBRARIES "/ARCH:AVX")
|
||||||
|
check_c_source_compiles("
|
||||||
|
#include <immintrin.h>
|
||||||
|
#ifndef __AVX__
|
||||||
|
#error Assembler CPP flag not enabled
|
||||||
|
#endif
|
||||||
|
int main(int argc, char **argv) { return 0; }" CPU_SUPPORTS_AVX)
|
||||||
|
cmake_pop_check_state()
|
||||||
|
if(CPU_SUPPORTS_AVX)
|
||||||
|
# FIXME: should be target_link_options for CMake 3.13+
|
||||||
|
target_link_libraries(sdl-build-options INTERFACE "/ARCH:AVX")
|
||||||
|
set(HAVE_AVX TRUE)
|
||||||
endif()
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NOT HAVE_AVX)
|
||||||
|
set(SDL_DISABLE_AVX 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NOT HAVE_MMX)
|
||||||
|
set(SDL_DISABLE_MMX 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NOT HAVE_SSE)
|
||||||
|
set(SDL_DISABLE_SSE 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NOT HAVE_SSE2)
|
||||||
|
set(SDL_DISABLE_SSE2 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NOT HAVE_SSE3)
|
||||||
|
set(SDL_DISABLE_SSE3 1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# TODO: Can't deactivate on FreeBSD? w/o LIBC, SDL_stdinc.h can't define
|
# TODO: Can't deactivate on FreeBSD? w/o LIBC, SDL_stdinc.h can't define
|
||||||
|
|
|
@ -101,21 +101,20 @@ _m_prefetch(void *__P)
|
||||||
#include <lasxintrin.h>
|
#include <lasxintrin.h>
|
||||||
#define __LASX__
|
#define __LASX__
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
|
#if defined(__AVX__) && !defined(SDL_DISABLE_AVX)
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#else
|
#endif
|
||||||
#if defined(__MMX__) && !defined(SDL_DISABLE_MMINTRIN_H)
|
#if defined(__MMX__) && !defined(SDL_DISABLE_MMX)
|
||||||
#include <mmintrin.h>
|
#include <mmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
#if defined(__SSE__) && !defined(SDL_DISABLE_XMMINTRIN_H)
|
#if defined(__SSE__) && !defined(SDL_DISABLE_SSE)
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
#if defined(__SSE2__) && !defined(SDL_DISABLE_EMMINTRIN_H)
|
#if defined(__SSE2__) && !defined(SDL_DISABLE_SSE2)
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
#if defined(__SSE3__) && !defined(SDL_DISABLE_PMMINTRIN_H)
|
#if defined(__SSE3__) && !defined(SDL_DISABLE_SSE3)
|
||||||
#include <pmmintrin.h>
|
#include <pmmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
#endif /* HAVE_IMMINTRIN_H */
|
|
||||||
|
|
||||||
#endif /* SDL_intrin_h_ */
|
#endif /* SDL_intrin_h_ */
|
||||||
|
|
|
@ -226,18 +226,6 @@
|
||||||
#cmakedefine HAVE_LIBUSB 1
|
#cmakedefine HAVE_LIBUSB 1
|
||||||
#cmakedefine HAVE_O_CLOEXEC 1
|
#cmakedefine HAVE_O_CLOEXEC 1
|
||||||
|
|
||||||
/* Apple platforms might be building universal binaries, where Intel builds
|
|
||||||
can use immintrin.h but other architectures can't. */
|
|
||||||
#ifdef __APPLE__
|
|
||||||
# if defined(__has_include) && (defined(__i386__) || defined(__x86_64))
|
|
||||||
# if __has_include(<immintrin.h>)
|
|
||||||
# define HAVE_IMMINTRIN_H 1
|
|
||||||
# endif
|
|
||||||
# endif
|
|
||||||
#else /* non-Apple platforms can use the normal CMake check for this. */
|
|
||||||
#cmakedefine HAVE_IMMINTRIN_H 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#cmakedefine HAVE_LIBUDEV_H 1
|
#cmakedefine HAVE_LIBUDEV_H 1
|
||||||
#cmakedefine HAVE_LIBSAMPLERATE_H 1
|
#cmakedefine HAVE_LIBSAMPLERATE_H 1
|
||||||
#cmakedefine HAVE_LIBDECOR_H 1
|
#cmakedefine HAVE_LIBDECOR_H 1
|
||||||
|
@ -597,4 +585,11 @@ typedef unsigned int uintptr_t;
|
||||||
#endif /* Visual Studio 2008 */
|
#endif /* Visual Studio 2008 */
|
||||||
#endif /* !_STDINT_H_ && !HAVE_STDINT_H */
|
#endif /* !_STDINT_H_ && !HAVE_STDINT_H */
|
||||||
|
|
||||||
|
/* Configure use of intrinsics */
|
||||||
|
|
||||||
|
#cmakedefine SDL_DISABLE_SSE 1
|
||||||
|
#cmakedefine SDL_DISABLE_SSE2 1
|
||||||
|
#cmakedefine SDL_DISABLE_SSE3 1
|
||||||
|
#cmakedefine SDL_DISABLE_AVX 1
|
||||||
|
|
||||||
#endif /* SDL_build_config_h_ */
|
#endif /* SDL_build_config_h_ */
|
||||||
|
|
|
@ -138,8 +138,8 @@
|
||||||
#define HAVE_SYSCTLBYNAME 1
|
#define HAVE_SYSCTLBYNAME 1
|
||||||
|
|
||||||
#if defined(__has_include) && (defined(__i386__) || defined(__x86_64))
|
#if defined(__has_include) && (defined(__i386__) || defined(__x86_64))
|
||||||
# if __has_include(<immintrin.h>)
|
# if !__has_include(<immintrin.h>)
|
||||||
# define HAVE_IMMINTRIN_H 1
|
# define SDL_DISABLE_AVX 1
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -103,11 +103,12 @@ typedef unsigned int uintptr_t;
|
||||||
#define HAVE_TPCSHRD_H 1
|
#define HAVE_TPCSHRD_H 1
|
||||||
#define HAVE_SENSORSAPI_H 1
|
#define HAVE_SENSORSAPI_H 1
|
||||||
#if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600)
|
#if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600)
|
||||||
#define HAVE_IMMINTRIN_H 1
|
|
||||||
#elif defined(__has_include) && (defined(__i386__) || defined(__x86_64))
|
#elif defined(__has_include) && (defined(__i386__) || defined(__x86_64))
|
||||||
# if __has_include(<immintrin.h>)
|
# if !__has_include(<immintrin.h>)
|
||||||
# define HAVE_IMMINTRIN_H 1
|
# define SDL_DISABLE_AVX 1
|
||||||
# endif
|
# endif
|
||||||
|
#else
|
||||||
|
# define SDL_DISABLE_AVX 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* This is disabled by default to avoid C runtime dependencies and manifest requirements */
|
/* This is disabled by default to avoid C runtime dependencies and manifest requirements */
|
||||||
|
|
|
@ -48,11 +48,12 @@
|
||||||
#define HAVE_TPCSHRD_H 1
|
#define HAVE_TPCSHRD_H 1
|
||||||
#define HAVE_SENSORSAPI_H 1
|
#define HAVE_SENSORSAPI_H 1
|
||||||
#if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600)
|
#if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600)
|
||||||
#define HAVE_IMMINTRIN_H 1
|
|
||||||
#elif defined(__has_include) && (defined(__i386__) || defined(__x86_64))
|
#elif defined(__has_include) && (defined(__i386__) || defined(__x86_64))
|
||||||
# if __has_include(<immintrin.h>)
|
# if !__has_include(<immintrin.h>)
|
||||||
# define HAVE_IMMINTRIN_H 1
|
# define SDL_DISABLE_AVX 1
|
||||||
# endif
|
# endif
|
||||||
|
#else
|
||||||
|
# define SDL_DISABLE_AVX 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* This is disabled by default to avoid C runtime dependencies and manifest requirements */
|
/* This is disabled by default to avoid C runtime dependencies and manifest requirements */
|
||||||
|
|
|
@ -48,11 +48,12 @@
|
||||||
/*#define HAVE_TPCSHRD_H 1*/
|
/*#define HAVE_TPCSHRD_H 1*/
|
||||||
/*#define HAVE_SENSORSAPI_H 1*/
|
/*#define HAVE_SENSORSAPI_H 1*/
|
||||||
#if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600)
|
#if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600)
|
||||||
#define HAVE_IMMINTRIN_H 1
|
|
||||||
#elif defined(__has_include) && (defined(__i386__) || defined(__x86_64))
|
#elif defined(__has_include) && (defined(__i386__) || defined(__x86_64))
|
||||||
# if __has_include(<immintrin.h>)
|
# if !__has_include(<immintrin.h>)
|
||||||
# define HAVE_IMMINTRIN_H 1
|
# define SDL_DISABLE_AVX 1
|
||||||
# endif
|
# endif
|
||||||
|
#else
|
||||||
|
# define SDL_DISABLE_AVX 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* This is disabled by default to avoid C runtime dependencies and manifest requirements */
|
/* This is disabled by default to avoid C runtime dependencies and manifest requirements */
|
||||||
|
|
|
@ -185,6 +185,46 @@
|
||||||
|
|
||||||
#include <SDL3/SDL.h>
|
#include <SDL3/SDL.h>
|
||||||
#include <SDL3/SDL_intrin.h>
|
#include <SDL3/SDL_intrin.h>
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __ARM_NEON
|
||||||
|
#define HAVE_NEON_INTRINSICS 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__MMX__) && !defined(SDL_DISABLE_MMX)
|
||||||
|
#define HAVE_MMX_INTRINSICS 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE__) && !defined(SDL_DISABLE_SSE)
|
||||||
|
#define HAVE_SSE_INTRINSICS 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE2__) && !defined(SDL_DISABLE_SSE2)
|
||||||
|
#define HAVE_SSE2_INTRINSICS 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE3__) && !defined(SDL_DISABLE_SSE3)
|
||||||
|
#define HAVE_SSE3_INTRINSICS 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX__) && !defined(SDL_DISABLE_AVX)
|
||||||
|
#define HAVE_AVX_INTRINSICS 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined __clang__
|
||||||
|
#if (!__has_attribute(target))
|
||||||
|
#undef HAVE_AVX_INTRINSICS
|
||||||
|
#endif
|
||||||
|
#if (defined(_MSC_VER) || defined(__SCE__)) && !defined(__AVX__)
|
||||||
|
#undef HAVE_AVX_INTRINSICS
|
||||||
|
#endif
|
||||||
|
#elif defined __GNUC__
|
||||||
|
#if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)
|
||||||
|
#undef HAVE_AVX_INTRINSICS
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define SDL_MAIN_NOIMPL /* don't drag in header-only implementation of SDL_main */
|
#define SDL_MAIN_NOIMPL /* don't drag in header-only implementation of SDL_main */
|
||||||
#include <SDL3/SDL_main.h>
|
#include <SDL3/SDL_main.h>
|
||||||
|
|
||||||
|
|
|
@ -29,35 +29,6 @@
|
||||||
|
|
||||||
#define DEBUG_AUDIOSTREAM 0
|
#define DEBUG_AUDIOSTREAM 0
|
||||||
|
|
||||||
#ifdef __ARM_NEON
|
|
||||||
#define HAVE_NEON_INTRINSICS 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __SSE__
|
|
||||||
#define HAVE_SSE_INTRINSICS 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __SSE3__
|
|
||||||
#define HAVE_SSE3_INTRINSICS 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
|
|
||||||
#define HAVE_AVX_INTRINSICS 1
|
|
||||||
#endif
|
|
||||||
#if defined __clang__
|
|
||||||
#if (!__has_attribute(target))
|
|
||||||
#undef HAVE_AVX_INTRINSICS
|
|
||||||
#endif
|
|
||||||
#if (defined(_MSC_VER) || defined(__SCE__)) && !defined(__AVX__)
|
|
||||||
#undef HAVE_AVX_INTRINSICS
|
|
||||||
#endif
|
|
||||||
#elif defined __GNUC__
|
|
||||||
#if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)
|
|
||||||
#undef HAVE_AVX_INTRINSICS
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize an SDL_AudioCVT structure for conversion.
|
* Initialize an SDL_AudioCVT structure for conversion.
|
||||||
*
|
*
|
||||||
|
|
|
@ -27,10 +27,6 @@
|
||||||
#define HAVE_NEON_INTRINSICS 1
|
#define HAVE_NEON_INTRINSICS 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __SSE2__
|
|
||||||
#define HAVE_SSE2_INTRINSICS 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
|
#if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
|
||||||
#define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* x86_64 guarantees SSE2. */
|
#define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* x86_64 guarantees SSE2. */
|
||||||
#elif __MACOS__ && HAVE_SSE2_INTRINSICS
|
#elif __MACOS__ && HAVE_SSE2_INTRINSICS
|
||||||
|
|
|
@ -166,7 +166,7 @@ static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __MMX__
|
#if HAVE_MMX_INTRINSICS
|
||||||
|
|
||||||
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
|
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
|
||||||
static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info)
|
static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info)
|
||||||
|
@ -409,7 +409,7 @@ static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info)
|
||||||
_mm_empty();
|
_mm_empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* __MMX__ */
|
#endif /* HAVE_MMX_INTRINSICS */
|
||||||
|
|
||||||
#if SDL_ARM_SIMD_BLITTERS
|
#if SDL_ARM_SIMD_BLITTERS
|
||||||
void BlitARGBto565PixelAlphaARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
|
void BlitARGBto565PixelAlphaARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
|
||||||
|
@ -750,7 +750,7 @@ static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __MMX__
|
#if HAVE_MMX_INTRINSICS
|
||||||
|
|
||||||
/* fast RGB565->RGB565 blending with surface alpha */
|
/* fast RGB565->RGB565 blending with surface alpha */
|
||||||
static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info)
|
static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info)
|
||||||
|
@ -1025,7 +1025,7 @@ static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* __MMX__ */
|
#endif /* HAVE_MMX_INTRINSICS */
|
||||||
|
|
||||||
/* fast RGB565->RGB565 blending with surface alpha */
|
/* fast RGB565->RGB565 blending with surface alpha */
|
||||||
static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info)
|
static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info)
|
||||||
|
@ -1357,15 +1357,13 @@ SDL_CalculateBlitA(SDL_Surface *surface)
|
||||||
|
|
||||||
case 4:
|
case 4:
|
||||||
if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
|
if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
|
||||||
#if defined(__MMX__)
|
#if HAVE_MMX_INTRINSICS
|
||||||
if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
|
if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
|
||||||
#ifdef __MMX__
|
|
||||||
if (SDL_HasMMX()) {
|
if (SDL_HasMMX()) {
|
||||||
return BlitRGBtoRGBPixelAlphaMMX;
|
return BlitRGBtoRGBPixelAlphaMMX;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
#endif /* __MMX__ */
|
#endif /* HAVE_MMX_INTRINSICS */
|
||||||
if (sf->Amask == 0xff000000) {
|
if (sf->Amask == 0xff000000) {
|
||||||
#if SDL_ARM_NEON_BLITTERS
|
#if SDL_ARM_NEON_BLITTERS
|
||||||
if (SDL_HasNEON()) {
|
if (SDL_HasNEON()) {
|
||||||
|
@ -1407,7 +1405,7 @@ SDL_CalculateBlitA(SDL_Surface *surface)
|
||||||
case 2:
|
case 2:
|
||||||
if (surface->map->identity) {
|
if (surface->map->identity) {
|
||||||
if (df->Gmask == 0x7e0) {
|
if (df->Gmask == 0x7e0) {
|
||||||
#ifdef __MMX__
|
#if HAVE_MMX_INTRINSICS
|
||||||
if (SDL_HasMMX()) {
|
if (SDL_HasMMX()) {
|
||||||
return Blit565to565SurfaceAlphaMMX;
|
return Blit565to565SurfaceAlphaMMX;
|
||||||
} else
|
} else
|
||||||
|
@ -1416,7 +1414,7 @@ SDL_CalculateBlitA(SDL_Surface *surface)
|
||||||
return Blit565to565SurfaceAlpha;
|
return Blit565to565SurfaceAlpha;
|
||||||
}
|
}
|
||||||
} else if (df->Gmask == 0x3e0) {
|
} else if (df->Gmask == 0x3e0) {
|
||||||
#ifdef __MMX__
|
#if HAVE_MMX_INTRINSICS
|
||||||
if (SDL_HasMMX()) {
|
if (SDL_HasMMX()) {
|
||||||
return Blit555to555SurfaceAlphaMMX;
|
return Blit555to555SurfaceAlphaMMX;
|
||||||
} else
|
} else
|
||||||
|
@ -1430,7 +1428,7 @@ SDL_CalculateBlitA(SDL_Surface *surface)
|
||||||
|
|
||||||
case 4:
|
case 4:
|
||||||
if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
|
if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
|
||||||
#ifdef __MMX__
|
#if HAVE_MMX_INTRINSICS
|
||||||
if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && SDL_HasMMX()) {
|
if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && SDL_HasMMX()) {
|
||||||
return BlitRGBtoRGBSurfaceAlphaMMX;
|
return BlitRGBtoRGBSurfaceAlphaMMX;
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
#include "SDL_blit.h"
|
#include "SDL_blit.h"
|
||||||
#include "SDL_blit_copy.h"
|
#include "SDL_blit_copy.h"
|
||||||
|
|
||||||
#ifdef __SSE__
|
#if HAVE_SSE_INTRINSICS
|
||||||
/* This assumes 16-byte aligned src and dst */
|
/* This assumes 16-byte aligned src and dst */
|
||||||
static SDL_INLINE void SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
|
static SDL_INLINE void SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
|
||||||
{
|
{
|
||||||
|
@ -48,9 +48,9 @@ static SDL_INLINE void SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
|
||||||
SDL_memcpy(dst, src, len & 63);
|
SDL_memcpy(dst, src, len & 63);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* __SSE__ */
|
#endif /* HAVE_SSE_INTRINSICS */
|
||||||
|
|
||||||
#ifdef __MMX__
|
#if HAVE_MMX_INTRINSICS
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning(disable : 4799)
|
#pragma warning(disable : 4799)
|
||||||
#endif
|
#endif
|
||||||
|
@ -81,7 +81,7 @@ static SDL_INLINE void SDL_memcpyMMX(Uint8 *dst, const Uint8 *src, int len)
|
||||||
SDL_memcpy(dst + skip, src + skip, remain);
|
SDL_memcpy(dst + skip, src + skip, remain);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* __MMX__ */
|
#endif /* HAVE_MMX_INTRINSICS */
|
||||||
|
|
||||||
void SDL_BlitCopy(SDL_BlitInfo *info)
|
void SDL_BlitCopy(SDL_BlitInfo *info)
|
||||||
{
|
{
|
||||||
|
@ -122,7 +122,7 @@ void SDL_BlitCopy(SDL_BlitInfo *info)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __SSE__
|
#if HAVE_SSE_INTRINSICS
|
||||||
if (SDL_HasSSE() &&
|
if (SDL_HasSSE() &&
|
||||||
!((uintptr_t)src & 15) && !(srcskip & 15) &&
|
!((uintptr_t)src & 15) && !(srcskip & 15) &&
|
||||||
!((uintptr_t)dst & 15) && !(dstskip & 15)) {
|
!((uintptr_t)dst & 15) && !(dstskip & 15)) {
|
||||||
|
@ -135,7 +135,7 @@ void SDL_BlitCopy(SDL_BlitInfo *info)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __MMX__
|
#if HAVE_MMX_INTRINSICS
|
||||||
if (SDL_HasMMX() && !(srcskip & 7) && !(dstskip & 7)) {
|
if (SDL_HasMMX() && !(srcskip & 7) && !(dstskip & 7)) {
|
||||||
while (h--) {
|
while (h--) {
|
||||||
SDL_memcpyMMX(dst, src, w);
|
SDL_memcpyMMX(dst, src, w);
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
|
|
||||||
#include "SDL_blit.h"
|
#include "SDL_blit.h"
|
||||||
|
|
||||||
#ifdef __SSE__
|
#if HAVE_SSE_INTRINSICS
|
||||||
/* *INDENT-OFF* */ /* clang-format off */
|
/* *INDENT-OFF* */ /* clang-format off */
|
||||||
|
|
||||||
#if defined(_MSC_VER) && !defined(__clang__)
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
|
@ -376,7 +376,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
|
||||||
{
|
{
|
||||||
color |= (color << 8);
|
color |= (color << 8);
|
||||||
color |= (color << 16);
|
color |= (color << 16);
|
||||||
#ifdef __SSE__
|
#if HAVE_SSE_INTRINSICS
|
||||||
if (SDL_HasSSE()) {
|
if (SDL_HasSSE()) {
|
||||||
fill_function = SDL_FillSurfaceRect1SSE;
|
fill_function = SDL_FillSurfaceRect1SSE;
|
||||||
break;
|
break;
|
||||||
|
@ -389,7 +389,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
|
||||||
case 2:
|
case 2:
|
||||||
{
|
{
|
||||||
color |= (color << 16);
|
color |= (color << 16);
|
||||||
#ifdef __SSE__
|
#if HAVE_SSE_INTRINSICS
|
||||||
if (SDL_HasSSE()) {
|
if (SDL_HasSSE()) {
|
||||||
fill_function = SDL_FillSurfaceRect2SSE;
|
fill_function = SDL_FillSurfaceRect2SSE;
|
||||||
break;
|
break;
|
||||||
|
@ -408,7 +408,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
|
||||||
|
|
||||||
case 4:
|
case 4:
|
||||||
{
|
{
|
||||||
#ifdef __SSE__
|
#if HAVE_SSE_INTRINSICS
|
||||||
if (SDL_HasSSE()) {
|
if (SDL_HasSSE()) {
|
||||||
fill_function = SDL_FillSurfaceRect4SSE;
|
fill_function = SDL_FillSurfaceRect4SSE;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -332,10 +332,6 @@ static int scale_mat(const Uint32 *src, int src_w, int src_h, int src_pitch,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SSE2__)
|
|
||||||
#define HAVE_SSE2_INTRINSICS 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__ARM_NEON)
|
#if defined(__ARM_NEON)
|
||||||
#define HAVE_NEON_INTRINSICS 1
|
#define HAVE_NEON_INTRINSICS 1
|
||||||
#define CAST_uint8x8_t (uint8x8_t)
|
#define CAST_uint8x8_t (uint8x8_t)
|
||||||
|
|
|
@ -310,7 +310,7 @@ static SDL_bool yuv_rgb_sse(
|
||||||
Uint8 *rgb, Uint32 rgb_stride,
|
Uint8 *rgb, Uint32 rgb_stride,
|
||||||
YCbCrType yuv_type)
|
YCbCrType yuv_type)
|
||||||
{
|
{
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
if (!SDL_HasSSE2()) {
|
if (!SDL_HasSSE2()) {
|
||||||
return SDL_FALSE;
|
return SDL_FALSE;
|
||||||
}
|
}
|
||||||
|
@ -1114,7 +1114,7 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi
|
||||||
const Uint8 *src1, *src2;
|
const Uint8 *src1, *src2;
|
||||||
Uint8 *dstUV;
|
Uint8 *dstUV;
|
||||||
Uint8 *tmp = NULL;
|
Uint8 *tmp = NULL;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1144,7 +1144,7 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi
|
||||||
y = UVheight;
|
y = UVheight;
|
||||||
while (y--) {
|
while (y--) {
|
||||||
x = UVwidth;
|
x = UVwidth;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
if (use_SSE2) {
|
if (use_SSE2) {
|
||||||
while (x >= 16) {
|
while (x >= 16) {
|
||||||
__m128i u = _mm_loadu_si128((__m128i *)src1);
|
__m128i u = _mm_loadu_si128((__m128i *)src1);
|
||||||
|
@ -1187,7 +1187,7 @@ static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const vo
|
||||||
const Uint8 *srcUV;
|
const Uint8 *srcUV;
|
||||||
Uint8 *dst1, *dst2;
|
Uint8 *dst1, *dst2;
|
||||||
Uint8 *tmp = NULL;
|
Uint8 *tmp = NULL;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1217,7 +1217,7 @@ static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const vo
|
||||||
y = UVheight;
|
y = UVheight;
|
||||||
while (y--) {
|
while (y--) {
|
||||||
x = UVwidth;
|
x = UVwidth;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
if (use_SSE2) {
|
if (use_SSE2) {
|
||||||
__m128i mask = _mm_set1_epi16(0x00FF);
|
__m128i mask = _mm_set1_epi16(0x00FF);
|
||||||
while (x >= 16) {
|
while (x >= 16) {
|
||||||
|
@ -1264,7 +1264,7 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int
|
||||||
const int dstUVPitchLeft = (dstUVPitch - UVwidth * 2) / sizeof(Uint16);
|
const int dstUVPitchLeft = (dstUVPitch - UVwidth * 2) / sizeof(Uint16);
|
||||||
const Uint16 *srcUV;
|
const Uint16 *srcUV;
|
||||||
Uint16 *dstUV;
|
Uint16 *dstUV;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1277,7 +1277,7 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int
|
||||||
y = UVheight;
|
y = UVheight;
|
||||||
while (y--) {
|
while (y--) {
|
||||||
x = UVwidth;
|
x = UVwidth;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
if (use_SSE2) {
|
if (use_SSE2) {
|
||||||
while (x >= 8) {
|
while (x >= 8) {
|
||||||
__m128i uv = _mm_loadu_si128((__m128i *)srcUV);
|
__m128i uv = _mm_loadu_si128((__m128i *)srcUV);
|
||||||
|
@ -1372,7 +1372,7 @@ static int SDL_ConvertPixels_Planar2x2_to_Planar2x2(int width, int height,
|
||||||
SDL_GetPixelFormatName(dst_format));
|
SDL_GetPixelFormatName(dst_format));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
#define PACKED4_TO_PACKED4_ROW_SSE2(shuffle) \
|
#define PACKED4_TO_PACKED4_ROW_SSE2(shuffle) \
|
||||||
while (x >= 4) { \
|
while (x >= 4) { \
|
||||||
__m128i yuv = _mm_loadu_si128((__m128i *)srcYUV); \
|
__m128i yuv = _mm_loadu_si128((__m128i *)srcYUV); \
|
||||||
|
@ -1399,14 +1399,14 @@ static int SDL_ConvertPixels_YUY2_to_UYVY(int width, int height, const void *src
|
||||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||||
Uint8 *dstYUV = (Uint8 *)dst;
|
Uint8 *dstYUV = (Uint8 *)dst;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
y = height;
|
y = height;
|
||||||
while (y--) {
|
while (y--) {
|
||||||
x = YUVwidth;
|
x = YUVwidth;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
if (use_SSE2) {
|
if (use_SSE2) {
|
||||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
|
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
|
||||||
}
|
}
|
||||||
|
@ -1440,14 +1440,14 @@ static int SDL_ConvertPixels_YUY2_to_YVYU(int width, int height, const void *src
|
||||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||||
Uint8 *dstYUV = (Uint8 *)dst;
|
Uint8 *dstYUV = (Uint8 *)dst;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
y = height;
|
y = height;
|
||||||
while (y--) {
|
while (y--) {
|
||||||
x = YUVwidth;
|
x = YUVwidth;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
if (use_SSE2) {
|
if (use_SSE2) {
|
||||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
|
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
|
||||||
}
|
}
|
||||||
|
@ -1481,14 +1481,14 @@ static int SDL_ConvertPixels_UYVY_to_YUY2(int width, int height, const void *src
|
||||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||||
Uint8 *dstYUV = (Uint8 *)dst;
|
Uint8 *dstYUV = (Uint8 *)dst;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
y = height;
|
y = height;
|
||||||
while (y--) {
|
while (y--) {
|
||||||
x = YUVwidth;
|
x = YUVwidth;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
if (use_SSE2) {
|
if (use_SSE2) {
|
||||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
|
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
|
||||||
}
|
}
|
||||||
|
@ -1522,14 +1522,14 @@ static int SDL_ConvertPixels_UYVY_to_YVYU(int width, int height, const void *src
|
||||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||||
Uint8 *dstYUV = (Uint8 *)dst;
|
Uint8 *dstYUV = (Uint8 *)dst;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
y = height;
|
y = height;
|
||||||
while (y--) {
|
while (y--) {
|
||||||
x = YUVwidth;
|
x = YUVwidth;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
if (use_SSE2) {
|
if (use_SSE2) {
|
||||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(0, 3, 2, 1));
|
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(0, 3, 2, 1));
|
||||||
}
|
}
|
||||||
|
@ -1563,14 +1563,14 @@ static int SDL_ConvertPixels_YVYU_to_YUY2(int width, int height, const void *src
|
||||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||||
Uint8 *dstYUV = (Uint8 *)dst;
|
Uint8 *dstYUV = (Uint8 *)dst;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
y = height;
|
y = height;
|
||||||
while (y--) {
|
while (y--) {
|
||||||
x = YUVwidth;
|
x = YUVwidth;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
if (use_SSE2) {
|
if (use_SSE2) {
|
||||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
|
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
|
||||||
}
|
}
|
||||||
|
@ -1604,14 +1604,14 @@ static int SDL_ConvertPixels_YVYU_to_UYVY(int width, int height, const void *src
|
||||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||||
Uint8 *dstYUV = (Uint8 *)dst;
|
Uint8 *dstYUV = (Uint8 *)dst;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
y = height;
|
y = height;
|
||||||
while (y--) {
|
while (y--) {
|
||||||
x = YUVwidth;
|
x = YUVwidth;
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
if (use_SSE2) {
|
if (use_SSE2) {
|
||||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 1, 0, 3));
|
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 1, 0, 3));
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,6 @@
|
||||||
|
|
||||||
#include "yuv_rgb.h"
|
#include "yuv_rgb.h"
|
||||||
|
|
||||||
|
|
||||||
#define PRECISION 6
|
#define PRECISION 6
|
||||||
#define PRECISION_FACTOR (1<<PRECISION)
|
#define PRECISION_FACTOR (1<<PRECISION)
|
||||||
|
|
||||||
|
@ -240,7 +239,7 @@ void rgb24_yuv420_std(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#if HAVE_SSE2_INTRINSICS
|
||||||
|
|
||||||
#define SSE_FUNCTION_NAME yuv420_rgb565_sse
|
#define SSE_FUNCTION_NAME yuv420_rgb565_sse
|
||||||
#define STD_FUNCTION_NAME yuv420_rgb565_std
|
#define STD_FUNCTION_NAME yuv420_rgb565_std
|
||||||
|
@ -683,7 +682,7 @@ void rgb24_yuv420_sseu(uint32_t width, uint32_t height,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif //__SSE2__
|
#endif //HAVE_SSE2_INTRINSICS
|
||||||
|
|
||||||
#ifdef __loongarch_sx
|
#ifdef __loongarch_sx
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue