cmake: Detect AVX + allow build system to disable Intel intrinsics

main
Anonymous Maarten 2023-02-25 00:21:15 +01:00 committed by Anonymous Maarten
parent 683411e96f
commit 4681240241
16 changed files with 175 additions and 116 deletions

View File

@ -367,6 +367,7 @@ option_string(SDL_ASSERTIONS "Enable internal sanity checks (auto/disabled/relea
#set_option(SDL_DEPENDENCY_TRACKING "Use gcc -MMD -MT dependency tracking" ON)
set_option(SDL_ASSEMBLY "Enable assembly routines" ${SDL_ASSEMBLY_DEFAULT})
dep_option(SDL_SSEMATH "Allow GCC to use SSE floating point math" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
dep_option(SDL_AVX "Use AVX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
dep_option(SDL_SSE "Use SSE assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
dep_option(SDL_SSE2 "Use SSE2 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
dep_option(SDL_SSE3 "Use SSE3 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
@ -711,6 +712,32 @@ if(SDL_ASSEMBLY)
# TODO: Those all seem to be quite GCC specific - needs to be
# reworked for better compiler support
set(HAVE_ASSEMBLY TRUE)
if(SDL_AVX)
cmake_push_check_state()
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mavx")
check_c_source_compiles("
#ifdef __MINGW32__
#include <_mingw.h>
#ifdef __MINGW64_VERSION_MAJOR
#include <intrin.h>
#else
#include <immintrin.h>
#endif
#else
#include <immintrin.h>
#endif
#ifndef __AVX__
#error Assembler CPP flag not enabled
#endif
int main(int argc, char **argv) { return 0; }" CPU_SUPPORTS_AVX)
cmake_pop_check_state()
if(CPU_SUPPORTS_AVX)
set(HAVE_AVX TRUE)
target_compile_options(sdl-build-options INTERFACE "-mavx")
endif()
endif()
if(SDL_MMX)
cmake_push_check_state()
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mmmx")
@ -728,9 +755,10 @@ if(SDL_ASSEMBLY)
#ifndef __MMX__
#error Assembler CPP flag not enabled
#endif
int main(int argc, char **argv) { return 0; }" HAVE_MMX)
int main(int argc, char **argv) { return 0; }" CPU_SUPPORTS_MMX)
cmake_pop_check_state()
if(HAVE_MMX)
if(CPU_SUPPORTS_MMX)
set(HAVE_MMX TRUE)
target_compile_options(sdl-build-options INTERFACE "-mmmx")
endif()
endif()
@ -823,8 +851,6 @@ if(SDL_ASSEMBLY)
set(HAVE_SSEMATH TRUE)
endif()
check_include_file("immintrin.h" HAVE_IMMINTRIN_H)
if(SDL_ALTIVEC)
cmake_push_check_state()
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -maltivec")
@ -954,10 +980,46 @@ if(SDL_ASSEMBLY)
if(SDL_SSE3)
set(HAVE_SSE3 TRUE)
endif()
check_include_file("immintrin.h" HAVE_IMMINTRIN_H)
if(SDL_AVX)
cmake_push_check_state()
# FIXME: should be CMAKE_REQUIRED_LINK_OPTIONS for CMake 3.14+
list(APPEND CMAKE_REQUIRED_LIBRARIES "/ARCH:AVX")
check_c_source_compiles("
#include <immintrin.h>
#ifndef __AVX__
#error Assembler CPP flag not enabled
#endif
int main(int argc, char **argv) { return 0; }" CPU_SUPPORTS_AVX)
cmake_pop_check_state()
if(CPU_SUPPORTS_AVX)
# FIXME: should be target_link_options for CMake 3.13+
target_link_libraries(sdl-build-options INTERFACE "/ARCH:AVX")
set(HAVE_AVX TRUE)
endif()
endif()
endif()
endif()
if(NOT HAVE_AVX)
set(SDL_DISABLE_AVX 1)
endif()
if(NOT HAVE_MMX)
set(SDL_DISABLE_MMX 1)
endif()
if(NOT HAVE_SSE)
set(SDL_DISABLE_SSE 1)
endif()
if(NOT HAVE_SSE2)
set(SDL_DISABLE_SSE2 1)
endif()
if(NOT HAVE_SSE3)
set(SDL_DISABLE_SSE3 1)
endif()
# TODO: Can't deactivate on FreeBSD? w/o LIBC, SDL_stdinc.h can't define
# anything.
if(SDL_LIBC)

View File

@ -101,21 +101,20 @@ _m_prefetch(void *__P)
#include <lasxintrin.h>
#define __LASX__
#endif
#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
#if defined(__AVX__) && !defined(SDL_DISABLE_AVX)
#include <immintrin.h>
#else
#if defined(__MMX__) && !defined(SDL_DISABLE_MMINTRIN_H)
#endif
#if defined(__MMX__) && !defined(SDL_DISABLE_MMX)
#include <mmintrin.h>
#endif
#if defined(__SSE__) && !defined(SDL_DISABLE_XMMINTRIN_H)
#if defined(__SSE__) && !defined(SDL_DISABLE_SSE)
#include <xmmintrin.h>
#endif
#if defined(__SSE2__) && !defined(SDL_DISABLE_EMMINTRIN_H)
#if defined(__SSE2__) && !defined(SDL_DISABLE_SSE2)
#include <emmintrin.h>
#endif
#if defined(__SSE3__) && !defined(SDL_DISABLE_PMMINTRIN_H)
#if defined(__SSE3__) && !defined(SDL_DISABLE_SSE3)
#include <pmmintrin.h>
#endif
#endif /* HAVE_IMMINTRIN_H */
#endif /* SDL_intrin_h_ */

View File

@ -226,18 +226,6 @@
#cmakedefine HAVE_LIBUSB 1
#cmakedefine HAVE_O_CLOEXEC 1
/* Apple platforms might be building universal binaries, where Intel builds
can use immintrin.h but other architectures can't. */
#ifdef __APPLE__
# if defined(__has_include) && (defined(__i386__) || defined(__x86_64))
# if __has_include(<immintrin.h>)
# define HAVE_IMMINTRIN_H 1
# endif
# endif
#else /* non-Apple platforms can use the normal CMake check for this. */
#cmakedefine HAVE_IMMINTRIN_H 1
#endif
#cmakedefine HAVE_LIBUDEV_H 1
#cmakedefine HAVE_LIBSAMPLERATE_H 1
#cmakedefine HAVE_LIBDECOR_H 1
@ -597,4 +585,11 @@ typedef unsigned int uintptr_t;
#endif /* Visual Studio 2008 */
#endif /* !_STDINT_H_ && !HAVE_STDINT_H */
/* Configure use of intrinsics */
#cmakedefine SDL_DISABLE_SSE 1
#cmakedefine SDL_DISABLE_SSE2 1
#cmakedefine SDL_DISABLE_SSE3 1
#cmakedefine SDL_DISABLE_AVX 1
#endif /* SDL_build_config_h_ */

View File

@ -138,8 +138,8 @@
#define HAVE_SYSCTLBYNAME 1
#if defined(__has_include) && (defined(__i386__) || defined(__x86_64))
# if __has_include(<immintrin.h>)
# define HAVE_IMMINTRIN_H 1
# if !__has_include(<immintrin.h>)
# define SDL_DISABLE_AVX 1
# endif
#endif

View File

@ -103,11 +103,12 @@ typedef unsigned int uintptr_t;
#define HAVE_TPCSHRD_H 1
#define HAVE_SENSORSAPI_H 1
#if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600)
#define HAVE_IMMINTRIN_H 1
#elif defined(__has_include) && (defined(__i386__) || defined(__x86_64))
# if __has_include(<immintrin.h>)
# define HAVE_IMMINTRIN_H 1
# if !__has_include(<immintrin.h>)
# define SDL_DISABLE_AVX 1
# endif
#else
# define SDL_DISABLE_AVX 1
#endif
/* This is disabled by default to avoid C runtime dependencies and manifest requirements */

View File

@ -48,11 +48,12 @@
#define HAVE_TPCSHRD_H 1
#define HAVE_SENSORSAPI_H 1
#if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600)
#define HAVE_IMMINTRIN_H 1
#elif defined(__has_include) && (defined(__i386__) || defined(__x86_64))
# if __has_include(<immintrin.h>)
# define HAVE_IMMINTRIN_H 1
# if !__has_include(<immintrin.h>)
# define SDL_DISABLE_AVX 1
# endif
#else
# define SDL_DISABLE_AVX 1
#endif
/* This is disabled by default to avoid C runtime dependencies and manifest requirements */

View File

@ -48,11 +48,12 @@
/*#define HAVE_TPCSHRD_H 1*/
/*#define HAVE_SENSORSAPI_H 1*/
#if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600)
#define HAVE_IMMINTRIN_H 1
#elif defined(__has_include) && (defined(__i386__) || defined(__x86_64))
# if __has_include(<immintrin.h>)
# define HAVE_IMMINTRIN_H 1
# if !__has_include(<immintrin.h>)
# define SDL_DISABLE_AVX 1
# endif
#else
# define SDL_DISABLE_AVX 1
#endif
/* This is disabled by default to avoid C runtime dependencies and manifest requirements */

View File

@ -185,6 +185,46 @@
#include <SDL3/SDL.h>
#include <SDL3/SDL_intrin.h>
#ifdef __ARM_NEON
#define HAVE_NEON_INTRINSICS 1
#endif
#if defined(__MMX__) && !defined(SDL_DISABLE_MMX)
#define HAVE_MMX_INTRINSICS 1
#endif
#if defined(__SSE__) && !defined(SDL_DISABLE_SSE)
#define HAVE_SSE_INTRINSICS 1
#endif
#if defined(__SSE2__) && !defined(SDL_DISABLE_SSE2)
#define HAVE_SSE2_INTRINSICS 1
#endif
#if defined(__SSE3__) && !defined(SDL_DISABLE_SSE3)
#define HAVE_SSE3_INTRINSICS 1
#endif
#if defined(__AVX__) && !defined(SDL_DISABLE_AVX)
#define HAVE_AVX_INTRINSICS 1
#endif
#if defined __clang__
#if (!__has_attribute(target))
#undef HAVE_AVX_INTRINSICS
#endif
#if (defined(_MSC_VER) || defined(__SCE__)) && !defined(__AVX__)
#undef HAVE_AVX_INTRINSICS
#endif
#elif defined __GNUC__
#if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)
#undef HAVE_AVX_INTRINSICS
#endif
#endif
#define SDL_MAIN_NOIMPL /* don't drag in header-only implementation of SDL_main */
#include <SDL3/SDL_main.h>

View File

@ -29,35 +29,6 @@
#define DEBUG_AUDIOSTREAM 0
#ifdef __ARM_NEON
#define HAVE_NEON_INTRINSICS 1
#endif
#ifdef __SSE__
#define HAVE_SSE_INTRINSICS 1
#endif
#ifdef __SSE3__
#define HAVE_SSE3_INTRINSICS 1
#endif
#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
#define HAVE_AVX_INTRINSICS 1
#endif
#if defined __clang__
#if (!__has_attribute(target))
#undef HAVE_AVX_INTRINSICS
#endif
#if (defined(_MSC_VER) || defined(__SCE__)) && !defined(__AVX__)
#undef HAVE_AVX_INTRINSICS
#endif
#elif defined __GNUC__
#if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)
#undef HAVE_AVX_INTRINSICS
#endif
#endif
/**
* Initialize an SDL_AudioCVT structure for conversion.
*

View File

@ -27,10 +27,6 @@
#define HAVE_NEON_INTRINSICS 1
#endif
#ifdef __SSE2__
#define HAVE_SSE2_INTRINSICS 1
#endif
#if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
#define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* x86_64 guarantees SSE2. */
#elif __MACOS__ && HAVE_SSE2_INTRINSICS

View File

@ -166,7 +166,7 @@ static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info)
}
}
#ifdef __MMX__
#if HAVE_MMX_INTRINSICS
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info)
@ -409,7 +409,7 @@ static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info)
_mm_empty();
}
#endif /* __MMX__ */
#endif /* HAVE_MMX_INTRINSICS */
#if SDL_ARM_SIMD_BLITTERS
void BlitARGBto565PixelAlphaARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
@ -750,7 +750,7 @@ static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask)
}
}
#ifdef __MMX__
#if HAVE_MMX_INTRINSICS
/* fast RGB565->RGB565 blending with surface alpha */
static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info)
@ -1025,7 +1025,7 @@ static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info)
}
}
#endif /* __MMX__ */
#endif /* HAVE_MMX_INTRINSICS */
/* fast RGB565->RGB565 blending with surface alpha */
static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info)
@ -1357,15 +1357,13 @@ SDL_CalculateBlitA(SDL_Surface *surface)
case 4:
if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
#if defined(__MMX__)
#if HAVE_MMX_INTRINSICS
if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
#ifdef __MMX__
if (SDL_HasMMX()) {
return BlitRGBtoRGBPixelAlphaMMX;
}
#endif
}
#endif /* __MMX__ */
#endif /* HAVE_MMX_INTRINSICS */
if (sf->Amask == 0xff000000) {
#if SDL_ARM_NEON_BLITTERS
if (SDL_HasNEON()) {
@ -1407,7 +1405,7 @@ SDL_CalculateBlitA(SDL_Surface *surface)
case 2:
if (surface->map->identity) {
if (df->Gmask == 0x7e0) {
#ifdef __MMX__
#if HAVE_MMX_INTRINSICS
if (SDL_HasMMX()) {
return Blit565to565SurfaceAlphaMMX;
} else
@ -1416,7 +1414,7 @@ SDL_CalculateBlitA(SDL_Surface *surface)
return Blit565to565SurfaceAlpha;
}
} else if (df->Gmask == 0x3e0) {
#ifdef __MMX__
#if HAVE_MMX_INTRINSICS
if (SDL_HasMMX()) {
return Blit555to555SurfaceAlphaMMX;
} else
@ -1430,7 +1428,7 @@ SDL_CalculateBlitA(SDL_Surface *surface)
case 4:
if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
#ifdef __MMX__
#if HAVE_MMX_INTRINSICS
if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && SDL_HasMMX()) {
return BlitRGBtoRGBSurfaceAlphaMMX;
}

View File

@ -23,7 +23,7 @@
#include "SDL_blit.h"
#include "SDL_blit_copy.h"
#ifdef __SSE__
#if HAVE_SSE_INTRINSICS
/* This assumes 16-byte aligned src and dst */
static SDL_INLINE void SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
{
@ -48,9 +48,9 @@ static SDL_INLINE void SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
SDL_memcpy(dst, src, len & 63);
}
}
#endif /* __SSE__ */
#endif /* HAVE_SSE_INTRINSICS */
#ifdef __MMX__
#if HAVE_MMX_INTRINSICS
#ifdef _MSC_VER
#pragma warning(disable : 4799)
#endif
@ -81,7 +81,7 @@ static SDL_INLINE void SDL_memcpyMMX(Uint8 *dst, const Uint8 *src, int len)
SDL_memcpy(dst + skip, src + skip, remain);
}
}
#endif /* __MMX__ */
#endif /* HAVE_MMX_INTRINSICS */
void SDL_BlitCopy(SDL_BlitInfo *info)
{
@ -122,7 +122,7 @@ void SDL_BlitCopy(SDL_BlitInfo *info)
return;
}
#ifdef __SSE__
#if HAVE_SSE_INTRINSICS
if (SDL_HasSSE() &&
!((uintptr_t)src & 15) && !(srcskip & 15) &&
!((uintptr_t)dst & 15) && !(dstskip & 15)) {
@ -135,7 +135,7 @@ void SDL_BlitCopy(SDL_BlitInfo *info)
}
#endif
#ifdef __MMX__
#if HAVE_MMX_INTRINSICS
if (SDL_HasMMX() && !(srcskip & 7) && !(dstskip & 7)) {
while (h--) {
SDL_memcpyMMX(dst, src, w);

View File

@ -22,7 +22,7 @@
#include "SDL_blit.h"
#ifdef __SSE__
#if HAVE_SSE_INTRINSICS
/* *INDENT-OFF* */ /* clang-format off */
#if defined(_MSC_VER) && !defined(__clang__)
@ -376,7 +376,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
{
color |= (color << 8);
color |= (color << 16);
#ifdef __SSE__
#if HAVE_SSE_INTRINSICS
if (SDL_HasSSE()) {
fill_function = SDL_FillSurfaceRect1SSE;
break;
@ -389,7 +389,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
case 2:
{
color |= (color << 16);
#ifdef __SSE__
#if HAVE_SSE_INTRINSICS
if (SDL_HasSSE()) {
fill_function = SDL_FillSurfaceRect2SSE;
break;
@ -408,7 +408,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
case 4:
{
#ifdef __SSE__
#if HAVE_SSE_INTRINSICS
if (SDL_HasSSE()) {
fill_function = SDL_FillSurfaceRect4SSE;
break;

View File

@ -332,10 +332,6 @@ static int scale_mat(const Uint32 *src, int src_w, int src_h, int src_pitch,
return 0;
}
#if defined(__SSE2__)
#define HAVE_SSE2_INTRINSICS 1
#endif
#if defined(__ARM_NEON)
#define HAVE_NEON_INTRINSICS 1
#define CAST_uint8x8_t (uint8x8_t)

View File

@ -310,7 +310,7 @@ static SDL_bool yuv_rgb_sse(
Uint8 *rgb, Uint32 rgb_stride,
YCbCrType yuv_type)
{
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (!SDL_HasSSE2()) {
return SDL_FALSE;
}
@ -1114,7 +1114,7 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi
const Uint8 *src1, *src2;
Uint8 *dstUV;
Uint8 *tmp = NULL;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
@ -1144,7 +1144,7 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi
y = UVheight;
while (y--) {
x = UVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
while (x >= 16) {
__m128i u = _mm_loadu_si128((__m128i *)src1);
@ -1187,7 +1187,7 @@ static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const vo
const Uint8 *srcUV;
Uint8 *dst1, *dst2;
Uint8 *tmp = NULL;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
@ -1217,7 +1217,7 @@ static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const vo
y = UVheight;
while (y--) {
x = UVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
__m128i mask = _mm_set1_epi16(0x00FF);
while (x >= 16) {
@ -1264,7 +1264,7 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int
const int dstUVPitchLeft = (dstUVPitch - UVwidth * 2) / sizeof(Uint16);
const Uint16 *srcUV;
Uint16 *dstUV;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
@ -1277,7 +1277,7 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int
y = UVheight;
while (y--) {
x = UVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
while (x >= 8) {
__m128i uv = _mm_loadu_si128((__m128i *)srcUV);
@ -1372,7 +1372,7 @@ static int SDL_ConvertPixels_Planar2x2_to_Planar2x2(int width, int height,
SDL_GetPixelFormatName(dst_format));
}
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
#define PACKED4_TO_PACKED4_ROW_SSE2(shuffle) \
while (x >= 4) { \
__m128i yuv = _mm_loadu_si128((__m128i *)srcYUV); \
@ -1399,14 +1399,14 @@ static int SDL_ConvertPixels_YUY2_to_UYVY(int width, int height, const void *src
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
const Uint8 *srcYUV = (const Uint8 *)src;
Uint8 *dstYUV = (Uint8 *)dst;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
y = height;
while (y--) {
x = YUVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
}
@ -1440,14 +1440,14 @@ static int SDL_ConvertPixels_YUY2_to_YVYU(int width, int height, const void *src
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
const Uint8 *srcYUV = (const Uint8 *)src;
Uint8 *dstYUV = (Uint8 *)dst;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
y = height;
while (y--) {
x = YUVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
}
@ -1481,14 +1481,14 @@ static int SDL_ConvertPixels_UYVY_to_YUY2(int width, int height, const void *src
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
const Uint8 *srcYUV = (const Uint8 *)src;
Uint8 *dstYUV = (Uint8 *)dst;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
y = height;
while (y--) {
x = YUVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
}
@ -1522,14 +1522,14 @@ static int SDL_ConvertPixels_UYVY_to_YVYU(int width, int height, const void *src
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
const Uint8 *srcYUV = (const Uint8 *)src;
Uint8 *dstYUV = (Uint8 *)dst;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
y = height;
while (y--) {
x = YUVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(0, 3, 2, 1));
}
@ -1563,14 +1563,14 @@ static int SDL_ConvertPixels_YVYU_to_YUY2(int width, int height, const void *src
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
const Uint8 *srcYUV = (const Uint8 *)src;
Uint8 *dstYUV = (Uint8 *)dst;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
y = height;
while (y--) {
x = YUVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
}
@ -1604,14 +1604,14 @@ static int SDL_ConvertPixels_YVYU_to_UYVY(int width, int height, const void *src
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
const Uint8 *srcYUV = (const Uint8 *)src;
Uint8 *dstYUV = (Uint8 *)dst;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
y = height;
while (y--) {
x = YUVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 1, 0, 3));
}

View File

@ -6,7 +6,6 @@
#include "yuv_rgb.h"
#define PRECISION 6
#define PRECISION_FACTOR (1<<PRECISION)
@ -240,7 +239,7 @@ void rgb24_yuv420_std(
}
}
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
#define SSE_FUNCTION_NAME yuv420_rgb565_sse
#define STD_FUNCTION_NAME yuv420_rgb565_std
@ -683,7 +682,7 @@ void rgb24_yuv420_sseu(uint32_t width, uint32_t height,
}
#endif //__SSE2__
#endif //HAVE_SSE2_INTRINSICS
#ifdef __loongarch_sx