use target attributes of gcc / clang for SIMD code.
parent
cd7a3f8af5
commit
69de6964e5
|
@ -735,7 +735,6 @@ if(SDL_ASSEMBLY)
|
|||
cmake_pop_check_state()
|
||||
if(CPU_SUPPORTS_AVX)
|
||||
set(HAVE_AVX TRUE)
|
||||
target_compile_options(sdl-build-options INTERFACE "-mavx")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -760,7 +759,6 @@ if(SDL_ASSEMBLY)
|
|||
cmake_pop_check_state()
|
||||
if(CPU_SUPPORTS_MMX)
|
||||
set(HAVE_MMX TRUE)
|
||||
target_compile_options(sdl-build-options INTERFACE "-mmmx")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -785,7 +783,6 @@ if(SDL_ASSEMBLY)
|
|||
cmake_pop_check_state()
|
||||
if(CPU_SUPPORTS_SSE)
|
||||
set(HAVE_SSE ON)
|
||||
target_compile_options(sdl-build-options INTERFACE "-msse")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -810,7 +807,6 @@ if(SDL_ASSEMBLY)
|
|||
cmake_pop_check_state()
|
||||
if(CPU_SUPPORTS_SSE2)
|
||||
set(HAVE_SSE2 TRUE)
|
||||
target_compile_options(sdl-build-options INTERFACE "-msse2")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -835,7 +831,6 @@ if(SDL_ASSEMBLY)
|
|||
cmake_pop_check_state()
|
||||
if(CPU_SUPPORTS_SSE3)
|
||||
set(HAVE_SSE3 TRUE)
|
||||
target_compile_options(sdl-build-options INTERFACE "-msse3")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
|
@ -93,25 +93,39 @@ _m_prefetch(void *__P)
|
|||
#endif
|
||||
#endif /* compiler version */
|
||||
|
||||
#if defined(__clang__) && defined(__has_attribute)
|
||||
# if __has_attribute(target)
|
||||
# define SDL_HAS_TARGET_ATTRIBS
|
||||
# endif
|
||||
#elif defined(__GNUC__) && (__GNUC__ + (__GNUC_MINOR__ >= 9) > 4) /* gcc >= 4.9 */
|
||||
# define SDL_HAS_TARGET_ATTRIBS
|
||||
#endif
|
||||
|
||||
#ifdef SDL_HAS_TARGET_ATTRIBS
|
||||
# define SDL_TARGETING(x) __attribute__((target(x)))
|
||||
#else
|
||||
# define SDL_TARGETING(x)
|
||||
#endif
|
||||
|
||||
#if defined(__loongarch_sx) && !defined(SDL_DISABLE_LSX)
|
||||
#include <lsxintrin.h>
|
||||
#endif
|
||||
#if defined(__loongarch_asx) && !defined(SDL_DISABLE_LASX)
|
||||
#include <lasxintrin.h>
|
||||
#endif
|
||||
#if defined(__AVX__) && !defined(SDL_DISABLE_AVX)
|
||||
#if (defined(__AVX__) || defined(SDL_HAS_TARGET_ATTRIBS)) && !defined(SDL_DISABLE_AVX)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#if defined(__MMX__) && !defined(SDL_DISABLE_MMX)
|
||||
#if (defined(__MMX__) || defined(SDL_HAS_TARGET_ATTRIBS)) && !defined(SDL_DISABLE_MMX)
|
||||
#include <mmintrin.h>
|
||||
#endif
|
||||
#if defined(__SSE__) && !defined(SDL_DISABLE_SSE)
|
||||
#if (defined(__SSE__) || defined(SDL_HAS_TARGET_ATTRIBS)) && !defined(SDL_DISABLE_SSE)
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
#if defined(__SSE2__) && !defined(SDL_DISABLE_SSE2)
|
||||
#if (defined(__SSE2__) || defined(SDL_HAS_TARGET_ATTRIBS)) && !defined(SDL_DISABLE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#if defined(__SSE3__) && !defined(SDL_DISABLE_SSE3)
|
||||
#if (defined(__SSE3__) || defined(SDL_HAS_TARGET_ATTRIBS)) && !defined(SDL_DISABLE_SSE3)
|
||||
#include <pmmintrin.h>
|
||||
#endif
|
||||
|
||||
|
|
|
@ -194,23 +194,23 @@
|
|||
#define HAVE_NEON_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#if defined(__MMX__) && !defined(SDL_DISABLE_MMX)
|
||||
#if (defined(__MMX__) || defined(SDL_HAS_TARGET_ATTRIBS)) && !defined(SDL_DISABLE_MMX)
|
||||
#define HAVE_MMX_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#if defined(__SSE__) && !defined(SDL_DISABLE_SSE)
|
||||
#if (defined(__SSE__) || defined(SDL_HAS_TARGET_ATTRIBS)) && !defined(SDL_DISABLE_SSE)
|
||||
#define HAVE_SSE_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__) && !defined(SDL_DISABLE_SSE2)
|
||||
#if (defined(__SSE2__) || defined(SDL_HAS_TARGET_ATTRIBS)) && !defined(SDL_DISABLE_SSE2)
|
||||
#define HAVE_SSE2_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#if defined(__SSE3__) && !defined(SDL_DISABLE_SSE3)
|
||||
#if (defined(__SSE3__) || defined(SDL_HAS_TARGET_ATTRIBS)) && !defined(SDL_DISABLE_SSE3)
|
||||
#define HAVE_SSE3_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#if defined(__AVX__) && !defined(SDL_DISABLE_AVX)
|
||||
#if (defined(__AVX__) || defined(SDL_HAS_TARGET_ATTRIBS)) && !defined(SDL_DISABLE_AVX)
|
||||
#define HAVE_AVX_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
|
@ -222,19 +222,6 @@
|
|||
#define HAVE_LASX_INTRINSICS 1
|
||||
#endif
|
||||
|
||||
#if defined __clang__
|
||||
#if (!__has_attribute(target))
|
||||
#undef HAVE_AVX_INTRINSICS
|
||||
#endif
|
||||
#if (defined(_MSC_VER) || defined(__SCE__)) && !defined(__AVX__)
|
||||
#undef HAVE_AVX_INTRINSICS
|
||||
#endif
|
||||
#elif defined __GNUC__
|
||||
#if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)
|
||||
#undef HAVE_AVX_INTRINSICS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#define SDL_MAIN_NOIMPL /* don't drag in header-only implementation of SDL_main */
|
||||
#include <SDL3/SDL_main.h>
|
||||
|
|
|
@ -146,7 +146,7 @@ static int SDL_ConvertAudio(SDL_AudioCVT * cvt);
|
|||
|
||||
#if HAVE_SSE3_INTRINSICS
|
||||
/* Convert from stereo to mono. Average left and right. */
|
||||
static void SDLCALL SDL_ConvertStereoToMono_SSE3(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
static void SDLCALL SDL_TARGETING("sse3") SDL_ConvertStereoToMono_SSE3(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
{
|
||||
const __m128 divby2 = _mm_set1_ps(0.5f);
|
||||
float *dst = (float *)cvt->buf;
|
||||
|
@ -183,7 +183,7 @@ static void SDLCALL SDL_ConvertStereoToMono_SSE3(SDL_AudioCVT *cvt, SDL_AudioFor
|
|||
|
||||
#if HAVE_SSE_INTRINSICS
|
||||
/* Convert from mono to stereo. Duplicate to stereo left and right. */
|
||||
static void SDLCALL SDL_ConvertMonoToStereo_SSE(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
static void SDLCALL SDL_TARGETING("sse") SDL_ConvertMonoToStereo_SSE(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
{
|
||||
float *dst = ((float *)(cvt->buf + (cvt->len_cvt * 2))) - 8;
|
||||
const float *src = ((const float *)(cvt->buf + cvt->len_cvt)) - 4;
|
||||
|
|
|
@ -225,7 +225,7 @@ static void SDLCALL SDL_Convert_F32_to_S32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFo
|
|||
#endif
|
||||
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
static void SDLCALL SDL_Convert_S8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
static void SDLCALL SDL_TARGETING("sse2") SDL_Convert_S8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
{
|
||||
const Sint8 *src = ((const Sint8 *)(cvt->buf + cvt->len_cvt)) - 1;
|
||||
float *dst = ((float *)(cvt->buf + cvt->len_cvt * 4)) - 1;
|
||||
|
@ -289,7 +289,7 @@ static void SDLCALL SDL_Convert_S8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioForma
|
|||
}
|
||||
}
|
||||
|
||||
static void SDLCALL SDL_Convert_U8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
static void SDLCALL SDL_TARGETING("sse2") SDL_Convert_U8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
{
|
||||
const Uint8 *src = ((const Uint8 *)(cvt->buf + cvt->len_cvt)) - 1;
|
||||
float *dst = ((float *)(cvt->buf + cvt->len_cvt * 4)) - 1;
|
||||
|
@ -355,7 +355,7 @@ static void SDLCALL SDL_Convert_U8_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioForma
|
|||
}
|
||||
}
|
||||
|
||||
static void SDLCALL SDL_Convert_S16_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
static void SDLCALL SDL_TARGETING("sse2") SDL_Convert_S16_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
{
|
||||
const Sint16 *src = ((const Sint16 *)(cvt->buf + cvt->len_cvt)) - 1;
|
||||
float *dst = ((float *)(cvt->buf + cvt->len_cvt * 2)) - 1;
|
||||
|
@ -408,7 +408,7 @@ static void SDLCALL SDL_Convert_S16_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioForm
|
|||
}
|
||||
}
|
||||
|
||||
static void SDLCALL SDL_Convert_S32_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
static void SDLCALL SDL_TARGETING("sse2") SDL_Convert_S32_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
{
|
||||
const Sint32 *src = (const Sint32 *)cvt->buf;
|
||||
float *dst = (float *)cvt->buf;
|
||||
|
@ -451,7 +451,7 @@ static void SDLCALL SDL_Convert_S32_to_F32_SSE2(SDL_AudioCVT *cvt, SDL_AudioForm
|
|||
}
|
||||
}
|
||||
|
||||
static void SDLCALL SDL_Convert_F32_to_S8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
static void SDLCALL SDL_TARGETING("sse2") SDL_Convert_F32_to_S8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
{
|
||||
const float *src = (const float *)cvt->buf;
|
||||
Sint8 *dst = (Sint8 *)cvt->buf;
|
||||
|
@ -514,7 +514,7 @@ static void SDLCALL SDL_Convert_F32_to_S8_SSE2(SDL_AudioCVT *cvt, SDL_AudioForma
|
|||
}
|
||||
}
|
||||
|
||||
static void SDLCALL SDL_Convert_F32_to_U8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
static void SDLCALL SDL_TARGETING("sse2") SDL_Convert_F32_to_U8_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
{
|
||||
const float *src = (const float *)cvt->buf;
|
||||
Uint8 *dst = cvt->buf;
|
||||
|
@ -577,7 +577,7 @@ static void SDLCALL SDL_Convert_F32_to_U8_SSE2(SDL_AudioCVT *cvt, SDL_AudioForma
|
|||
}
|
||||
}
|
||||
|
||||
static void SDLCALL SDL_Convert_F32_to_S16_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
static void SDLCALL SDL_TARGETING("sse2") SDL_Convert_F32_to_S16_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
{
|
||||
const float *src = (const float *)cvt->buf;
|
||||
Sint16 *dst = (Sint16 *)cvt->buf;
|
||||
|
@ -638,7 +638,7 @@ static void SDLCALL SDL_Convert_F32_to_S16_SSE2(SDL_AudioCVT *cvt, SDL_AudioForm
|
|||
}
|
||||
}
|
||||
|
||||
static void SDLCALL SDL_Convert_F32_to_S32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
static void SDLCALL SDL_TARGETING("sse2") SDL_Convert_F32_to_S32_SSE2(SDL_AudioCVT *cvt, SDL_AudioFormat format)
|
||||
{
|
||||
const float *src = (const float *)cvt->buf;
|
||||
Sint32 *dst = (Sint32 *)cvt->buf;
|
||||
|
|
|
@ -169,7 +169,7 @@ static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info)
|
|||
#if HAVE_MMX_INTRINSICS
|
||||
|
||||
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
|
||||
static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info)
|
||||
static void SDL_TARGETING("mmx") BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info)
|
||||
{
|
||||
int width = info->dst_w;
|
||||
int height = info->dst_h;
|
||||
|
@ -223,7 +223,7 @@ static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info)
|
|||
}
|
||||
|
||||
/* fast RGB888->(A)RGB888 blending with surface alpha */
|
||||
static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info)
|
||||
static void SDL_TARGETING("mmx") BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info)
|
||||
{
|
||||
SDL_PixelFormat *df = info->dst_fmt;
|
||||
Uint32 chanmask;
|
||||
|
@ -318,7 +318,7 @@ static void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info)
|
|||
}
|
||||
|
||||
/* fast ARGB888->(A)RGB888 blending with pixel alpha */
|
||||
static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info)
|
||||
static void SDL_TARGETING("mmx") BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info)
|
||||
{
|
||||
int width = info->dst_w;
|
||||
int height = info->dst_h;
|
||||
|
@ -753,7 +753,7 @@ static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask)
|
|||
#if HAVE_MMX_INTRINSICS
|
||||
|
||||
/* fast RGB565->RGB565 blending with surface alpha */
|
||||
static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info)
|
||||
static void SDL_TARGETING("mmx") Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info)
|
||||
{
|
||||
unsigned alpha = info->a;
|
||||
if (alpha == 128) {
|
||||
|
@ -889,7 +889,7 @@ static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info)
|
|||
}
|
||||
|
||||
/* fast RGB555->RGB555 blending with surface alpha */
|
||||
static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info)
|
||||
static void SDL_TARGETING("mmx") Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info)
|
||||
{
|
||||
unsigned alpha = info->a;
|
||||
if (alpha == 128) {
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
#if HAVE_SSE_INTRINSICS
|
||||
/* This assumes 16-byte aligned src and dst */
|
||||
static SDL_INLINE void SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
|
||||
static SDL_INLINE void SDL_TARGETING("sse") SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -54,7 +54,7 @@ static SDL_INLINE void SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
|
|||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4799)
|
||||
#endif
|
||||
static SDL_INLINE void SDL_memcpyMMX(Uint8 *dst, const Uint8 *src, int len)
|
||||
static SDL_INLINE void SDL_TARGETING("mmx") SDL_memcpyMMX(Uint8 *dst, const Uint8 *src, int len)
|
||||
{
|
||||
const int remain = (len & 63);
|
||||
int i;
|
||||
|
@ -81,6 +81,16 @@ static SDL_INLINE void SDL_memcpyMMX(Uint8 *dst, const Uint8 *src, int len)
|
|||
SDL_memcpy(dst + skip, src + skip, remain);
|
||||
}
|
||||
}
|
||||
|
||||
static SDL_INLINE void SDL_TARGETING("mmx") SDL_BlitCopyMMX(Uint8 *dst, const Uint8 *src, const int dstskip, const int srcskip, const int w, int h)
|
||||
{
|
||||
while (h--) {
|
||||
SDL_memcpyMMX(dst, src, w);
|
||||
src += srcskip;
|
||||
dst += dstskip;
|
||||
}
|
||||
_mm_empty();
|
||||
}
|
||||
#endif /* HAVE_MMX_INTRINSICS */
|
||||
|
||||
void SDL_BlitCopy(SDL_BlitInfo *info)
|
||||
|
@ -137,12 +147,7 @@ void SDL_BlitCopy(SDL_BlitInfo *info)
|
|||
|
||||
#if HAVE_MMX_INTRINSICS
|
||||
if (SDL_HasMMX() && !(srcskip & 7) && !(dstskip & 7)) {
|
||||
while (h--) {
|
||||
SDL_memcpyMMX(dst, src, w);
|
||||
src += srcskip;
|
||||
dst += dstskip;
|
||||
}
|
||||
_mm_empty();
|
||||
SDL_BlitCopyMMX(dst, src, w, h, dstskip, srcskip);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -55,7 +55,7 @@
|
|||
#define SSE_END
|
||||
|
||||
#define DEFINE_SSE_FILLRECT(bpp, type) \
|
||||
static void SDL_FillSurfaceRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
|
||||
static void SDL_TARGETING("sse") SDL_FillSurfaceRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
|
||||
{ \
|
||||
int i, n; \
|
||||
Uint8 *p = NULL; \
|
||||
|
@ -92,7 +92,7 @@ static void SDL_FillSurfaceRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color
|
|||
SSE_END; \
|
||||
}
|
||||
|
||||
static void SDL_FillSurfaceRect1SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
|
||||
static void SDL_TARGETING("sse") SDL_FillSurfaceRect1SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
|
||||
{
|
||||
int i, n;
|
||||
|
||||
|
|
|
@ -349,7 +349,7 @@ static int scale_mat(const Uint32 *src, int src_w, int src_h, int src_pitch,
|
|||
#if defined(HAVE_SSE2_INTRINSICS)
|
||||
|
||||
#if 0
|
||||
static void printf_128(const char *str, __m128i var)
|
||||
static void SDL_TARGETING("sse2") printf_128(const char *str, __m128i var)
|
||||
{
|
||||
uint16_t *val = (uint16_t*) &var;
|
||||
printf(" * %s: %04x %04x %04x %04x _ %04x %04x %04x %04x\n",
|
||||
|
@ -367,7 +367,7 @@ static SDL_INLINE int hasSSE2(void)
|
|||
return val;
|
||||
}
|
||||
|
||||
static SDL_INLINE void INTERPOL_BILINEAR_SSE(const Uint32 *s0, const Uint32 *s1, int frac_w, __m128i v_frac_h0, __m128i v_frac_h1, Uint32 *dst, __m128i zero)
|
||||
static SDL_INLINE void SDL_TARGETING("sse2") INTERPOL_BILINEAR_SSE(const Uint32 *s0, const Uint32 *s1, int frac_w, __m128i v_frac_h0, __m128i v_frac_h1, Uint32 *dst, __m128i zero)
|
||||
{
|
||||
__m128i x_00_01, x_10_11; /* Pixels in 4*uint8 in row */
|
||||
__m128i v_frac_w0, k0, l0, d0, e0;
|
||||
|
@ -404,7 +404,7 @@ static SDL_INLINE void INTERPOL_BILINEAR_SSE(const Uint32 *s0, const Uint32 *s1,
|
|||
*dst = _mm_cvtsi128_si32(e0);
|
||||
}
|
||||
|
||||
static int scale_mat_SSE(const Uint32 *src, int src_w, int src_h, int src_pitch, Uint32 *dst, int dst_w, int dst_h, int dst_pitch)
|
||||
static int SDL_TARGETING("sse2") scale_mat_SSE(const Uint32 *src, int src_w, int src_h, int src_pitch, Uint32 *dst, int dst_w, int dst_h, int dst_pitch)
|
||||
{
|
||||
BILINEAR___START
|
||||
|
||||
|
|
|
@ -303,14 +303,14 @@ static int GetYUVPlanes(int width, int height, Uint32 format, const void *yuv, i
|
|||
return 0;
|
||||
}
|
||||
|
||||
static SDL_bool yuv_rgb_sse(
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
static SDL_bool SDL_TARGETING("sse2") yuv_rgb_sse(
|
||||
Uint32 src_format, Uint32 dst_format,
|
||||
Uint32 width, Uint32 height,
|
||||
const Uint8 *y, const Uint8 *u, const Uint8 *v, Uint32 y_stride, Uint32 uv_stride,
|
||||
Uint8 *rgb, Uint32 rgb_stride,
|
||||
YCbCrType yuv_type)
|
||||
{
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (!SDL_HasSSE2()) {
|
||||
return SDL_FALSE;
|
||||
}
|
||||
|
@ -408,10 +408,21 @@ static SDL_bool yuv_rgb_sse(
|
|||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return SDL_FALSE;
|
||||
}
|
||||
#else
|
||||
static SDL_bool yuv_rgb_sse(
|
||||
Uint32 src_format, Uint32 dst_format,
|
||||
Uint32 width, Uint32 height,
|
||||
const Uint8 *y, const Uint8 *u, const Uint8 *v, Uint32 y_stride, Uint32 uv_stride,
|
||||
Uint8 *rgb, Uint32 rgb_stride,
|
||||
YCbCrType yuv_type)
|
||||
{
|
||||
return SDL_FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_LSX_INTRINSICS
|
||||
static SDL_bool yuv_rgb_lsx(
|
||||
Uint32 src_format, Uint32 dst_format,
|
||||
Uint32 width, Uint32 height,
|
||||
|
@ -419,7 +430,6 @@ static SDL_bool yuv_rgb_lsx(
|
|||
Uint8 *rgb, Uint32 rgb_stride,
|
||||
YCbCrType yuv_type)
|
||||
{
|
||||
#if HAVE_LSX_INTRINSICS
|
||||
if (!SDL_HasLSX()) {
|
||||
return SDL_FALSE;
|
||||
}
|
||||
|
@ -450,9 +460,19 @@ static SDL_bool yuv_rgb_lsx(
|
|||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return SDL_FALSE;
|
||||
}
|
||||
#else
|
||||
static SDL_bool yuv_rgb_lsx(
|
||||
Uint32 src_format, Uint32 dst_format,
|
||||
Uint32 width, Uint32 height,
|
||||
const Uint8 *y, const Uint8 *u, const Uint8 *v, Uint32 y_stride, Uint32 uv_stride,
|
||||
Uint8 *rgb, Uint32 rgb_stride,
|
||||
YCbCrType yuv_type)
|
||||
{
|
||||
return SDL_FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
static SDL_bool yuv_rgb_std(
|
||||
Uint32 src_format, Uint32 dst_format,
|
||||
|
@ -1102,7 +1122,8 @@ static int SDL_ConvertPixels_SwapUVPlanes(int width, int height, const void *src
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch, SDL_bool reverseUV)
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
static int SDL_TARGETING("sse2") SDL_ConvertPixels_PackUVPlanes_to_NV_SSE2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch, SDL_bool reverseUV)
|
||||
{
|
||||
int x, y;
|
||||
const int UVwidth = (width + 1) / 2;
|
||||
|
@ -1114,9 +1135,6 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi
|
|||
const Uint8 *src1, *src2;
|
||||
Uint8 *dstUV;
|
||||
Uint8 *tmp = NULL;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
/* Skip the Y plane */
|
||||
src = (const Uint8 *)src + height * src_pitch;
|
||||
|
@ -1144,8 +1162,6 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi
|
|||
y = UVheight;
|
||||
while (y--) {
|
||||
x = UVwidth;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
while (x >= 16) {
|
||||
__m128i u = _mm_loadu_si128((__m128i *)src1);
|
||||
__m128i v = _mm_loadu_si128((__m128i *)src2);
|
||||
|
@ -1158,8 +1174,6 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi
|
|||
dstUV += 32;
|
||||
x -= 16;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
while (x--) {
|
||||
*dstUV++ = *src1++;
|
||||
*dstUV++ = *src2++;
|
||||
|
@ -1174,8 +1188,68 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch, SDL_bool reverseUV)
|
||||
static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch, SDL_bool reverseUV)
|
||||
{
|
||||
if (SDL_HasSSE2()) {
|
||||
return SDL_ConvertPixels_PackUVPlanes_to_NV_SSE2(width, height, src, src_pitch, dst, dst_pitch, reverseUV);
|
||||
} else {
|
||||
int x, y;
|
||||
const int UVwidth = (width + 1) / 2;
|
||||
const int UVheight = (height + 1) / 2;
|
||||
const int srcUVPitch = ((src_pitch + 1) / 2);
|
||||
const int srcUVPitchLeft = srcUVPitch - UVwidth;
|
||||
const int dstUVPitch = ((dst_pitch + 1) / 2) * 2;
|
||||
const int dstUVPitchLeft = dstUVPitch - UVwidth * 2;
|
||||
const Uint8 *src1, *src2;
|
||||
Uint8 *dstUV;
|
||||
Uint8 *tmp = NULL;
|
||||
|
||||
/* Skip the Y plane */
|
||||
src = (const Uint8 *)src + height * src_pitch;
|
||||
dst = (Uint8 *)dst + height * dst_pitch;
|
||||
|
||||
if (src == dst) {
|
||||
/* Need to make a copy of the buffer so we don't clobber it while converting */
|
||||
tmp = (Uint8 *)SDL_malloc((size_t)2 * UVheight * srcUVPitch);
|
||||
if (tmp == NULL) {
|
||||
return SDL_OutOfMemory();
|
||||
}
|
||||
SDL_memcpy(tmp, src, (size_t)2 * UVheight * srcUVPitch);
|
||||
src = tmp;
|
||||
}
|
||||
|
||||
if (reverseUV) {
|
||||
src2 = (const Uint8 *)src;
|
||||
src1 = src2 + UVheight * srcUVPitch;
|
||||
} else {
|
||||
src1 = (const Uint8 *)src;
|
||||
src2 = src1 + UVheight * srcUVPitch;
|
||||
}
|
||||
dstUV = (Uint8 *)dst;
|
||||
|
||||
y = UVheight;
|
||||
while (y--) {
|
||||
x = UVwidth;
|
||||
while (x--) {
|
||||
*dstUV++ = *src1++;
|
||||
*dstUV++ = *src2++;
|
||||
}
|
||||
src1 += srcUVPitchLeft;
|
||||
src2 += srcUVPitchLeft;
|
||||
dstUV += dstUVPitchLeft;
|
||||
}
|
||||
|
||||
if (tmp) {
|
||||
SDL_free(tmp);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
static int SDL_TARGETING("sse2") SDL_ConvertPixels_SplitNV_to_UVPlanes_SSE2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch, SDL_bool reverseUV)
|
||||
{
|
||||
int x, y;
|
||||
const int UVwidth = (width + 1) / 2;
|
||||
|
@ -1187,10 +1261,82 @@ static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const vo
|
|||
const Uint8 *srcUV;
|
||||
Uint8 *dst1, *dst2;
|
||||
Uint8 *tmp = NULL;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
|
||||
/* Skip the Y plane */
|
||||
src = (const Uint8 *)src + height * src_pitch;
|
||||
dst = (Uint8 *)dst + height * dst_pitch;
|
||||
|
||||
if (src == dst) {
|
||||
/* Need to make a copy of the buffer so we don't clobber it while converting */
|
||||
tmp = (Uint8 *)SDL_malloc((size_t)UVheight * srcUVPitch);
|
||||
if (tmp == NULL) {
|
||||
return SDL_OutOfMemory();
|
||||
}
|
||||
SDL_memcpy(tmp, src, (size_t)UVheight * srcUVPitch);
|
||||
src = tmp;
|
||||
}
|
||||
|
||||
if (reverseUV) {
|
||||
dst2 = (Uint8 *)dst;
|
||||
dst1 = dst2 + UVheight * dstUVPitch;
|
||||
} else {
|
||||
dst1 = (Uint8 *)dst;
|
||||
dst2 = dst1 + UVheight * dstUVPitch;
|
||||
}
|
||||
srcUV = (const Uint8 *)src;
|
||||
|
||||
y = UVheight;
|
||||
while (y--) {
|
||||
__m128i mask = _mm_set1_epi16(0x00FF);
|
||||
x = UVwidth;
|
||||
while (x >= 16) {
|
||||
__m128i uv1 = _mm_loadu_si128((__m128i *)srcUV);
|
||||
__m128i uv2 = _mm_loadu_si128((__m128i *)(srcUV + 16));
|
||||
__m128i u1 = _mm_and_si128(uv1, mask);
|
||||
__m128i u2 = _mm_and_si128(uv2, mask);
|
||||
__m128i u = _mm_packus_epi16(u1, u2);
|
||||
__m128i v1 = _mm_srli_epi16(uv1, 8);
|
||||
__m128i v2 = _mm_srli_epi16(uv2, 8);
|
||||
__m128i v = _mm_packus_epi16(v1, v2);
|
||||
_mm_storeu_si128((__m128i *)dst1, u);
|
||||
_mm_storeu_si128((__m128i *)dst2, v);
|
||||
srcUV += 32;
|
||||
dst1 += 16;
|
||||
dst2 += 16;
|
||||
x -= 16;
|
||||
}
|
||||
while (x--) {
|
||||
*dst1++ = *srcUV++;
|
||||
*dst2++ = *srcUV++;
|
||||
}
|
||||
srcUV += srcUVPitchLeft;
|
||||
dst1 += dstUVPitchLeft;
|
||||
dst2 += dstUVPitchLeft;
|
||||
}
|
||||
|
||||
if (tmp) {
|
||||
SDL_free(tmp);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch, SDL_bool reverseUV)
|
||||
{
|
||||
if (SDL_HasSSE2()) {
|
||||
return SDL_ConvertPixels_SplitNV_to_UVPlanes_SSE2(width, height, src, src_pitch, dst, dst_pitch, reverseUV);
|
||||
} else {
|
||||
int x, y;
|
||||
const int UVwidth = (width + 1) / 2;
|
||||
const int UVheight = (height + 1) / 2;
|
||||
const int srcUVPitch = ((src_pitch + 1) / 2) * 2;
|
||||
const int srcUVPitchLeft = srcUVPitch - UVwidth * 2;
|
||||
const int dstUVPitch = ((dst_pitch + 1) / 2);
|
||||
const int dstUVPitchLeft = dstUVPitch - UVwidth;
|
||||
const Uint8 *srcUV;
|
||||
Uint8 *dst1, *dst2;
|
||||
Uint8 *tmp = NULL;
|
||||
|
||||
/* Skip the Y plane */
|
||||
src = (const Uint8 *)src + height * src_pitch;
|
||||
dst = (Uint8 *)dst + height * dst_pitch;
|
||||
|
@ -1217,27 +1363,6 @@ static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const vo
|
|||
y = UVheight;
|
||||
while (y--) {
|
||||
x = UVwidth;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
__m128i mask = _mm_set1_epi16(0x00FF);
|
||||
while (x >= 16) {
|
||||
__m128i uv1 = _mm_loadu_si128((__m128i *)srcUV);
|
||||
__m128i uv2 = _mm_loadu_si128((__m128i *)(srcUV + 16));
|
||||
__m128i u1 = _mm_and_si128(uv1, mask);
|
||||
__m128i u2 = _mm_and_si128(uv2, mask);
|
||||
__m128i u = _mm_packus_epi16(u1, u2);
|
||||
__m128i v1 = _mm_srli_epi16(uv1, 8);
|
||||
__m128i v2 = _mm_srli_epi16(uv2, 8);
|
||||
__m128i v = _mm_packus_epi16(v1, v2);
|
||||
_mm_storeu_si128((__m128i *)dst1, u);
|
||||
_mm_storeu_si128((__m128i *)dst2, v);
|
||||
srcUV += 32;
|
||||
dst1 += 16;
|
||||
dst2 += 16;
|
||||
x -= 16;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
while (x--) {
|
||||
*dst1++ = *srcUV++;
|
||||
*dst2++ = *srcUV++;
|
||||
|
@ -1251,9 +1376,11 @@ static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const vo
|
|||
SDL_free(tmp);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
static int SDL_TARGETING("sse2") SDL_ConvertPixels_SwapNV_SSE2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
|
||||
{
|
||||
int x, y;
|
||||
const int UVwidth = (width + 1) / 2;
|
||||
|
@ -1264,9 +1391,6 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int
|
|||
const int dstUVPitchLeft = (dstUVPitch - UVwidth * 2) / sizeof(Uint16);
|
||||
const Uint16 *srcUV;
|
||||
Uint16 *dstUV;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
/* Skip the Y plane */
|
||||
src = (const Uint8 *)src + height * src_pitch;
|
||||
|
@ -1277,8 +1401,6 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int
|
|||
y = UVheight;
|
||||
while (y--) {
|
||||
x = UVwidth;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
while (x >= 8) {
|
||||
__m128i uv = _mm_loadu_si128((__m128i *)srcUV);
|
||||
__m128i v = _mm_slli_epi16(uv, 8);
|
||||
|
@ -1289,8 +1411,6 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int
|
|||
dstUV += 8;
|
||||
x -= 8;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
while (x--) {
|
||||
*dstUV++ = SDL_Swap16(*srcUV++);
|
||||
}
|
||||
|
@ -1299,6 +1419,41 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
|
||||
{
|
||||
if (SDL_HasSSE2()) {
|
||||
return SDL_ConvertPixels_SwapNV_SSE2(width, height, src, src_pitch, dst, dst_pitch);
|
||||
} else {
|
||||
int x, y;
|
||||
const int UVwidth = (width + 1) / 2;
|
||||
const int UVheight = (height + 1) / 2;
|
||||
const int srcUVPitch = ((src_pitch + 1) / 2) * 2;
|
||||
const int srcUVPitchLeft = (srcUVPitch - UVwidth * 2) / sizeof(Uint16);
|
||||
const int dstUVPitch = ((dst_pitch + 1) / 2) * 2;
|
||||
const int dstUVPitchLeft = (dstUVPitch - UVwidth * 2) / sizeof(Uint16);
|
||||
const Uint16 *srcUV;
|
||||
Uint16 *dstUV;
|
||||
|
||||
/* Skip the Y plane */
|
||||
src = (const Uint8 *)src + height * src_pitch;
|
||||
dst = (Uint8 *)dst + height * dst_pitch;
|
||||
|
||||
srcUV = (const Uint16 *)src;
|
||||
dstUV = (Uint16 *)dst;
|
||||
y = UVheight;
|
||||
while (y--) {
|
||||
x = UVwidth;
|
||||
while (x--) {
|
||||
*dstUV++ = SDL_Swap16(*srcUV++);
|
||||
}
|
||||
srcUV += srcUVPitchLeft;
|
||||
dstUV += dstUVPitchLeft;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int SDL_ConvertPixels_Planar2x2_to_Planar2x2(int width, int height,
|
||||
Uint32 src_format, const void *src, int src_pitch,
|
||||
|
@ -1389,28 +1544,232 @@ static int SDL_ConvertPixels_Planar2x2_to_Planar2x2(int width, int height,
|
|||
x -= 4; \
|
||||
}
|
||||
|
||||
static int SDL_TARGETING("sse2") SDL_ConvertPixels_YUY2_to_UYVY_SSE2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
|
||||
{
|
||||
int x, y;
|
||||
const int YUVwidth = (width + 1) / 2;
|
||||
const int srcYUVPitchLeft = (src_pitch - YUVwidth * 4);
|
||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
|
||||
y = height;
|
||||
x = YUVwidth;
|
||||
while (y--) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
|
||||
while (x--) {
|
||||
Uint8 Y1, U, Y2, V;
|
||||
|
||||
Y1 = srcYUV[0];
|
||||
U = srcYUV[1];
|
||||
Y2 = srcYUV[2];
|
||||
V = srcYUV[3];
|
||||
srcYUV += 4;
|
||||
|
||||
dstYUV[0] = U;
|
||||
dstYUV[1] = Y1;
|
||||
dstYUV[2] = V;
|
||||
dstYUV[3] = Y2;
|
||||
dstYUV += 4;
|
||||
}
|
||||
srcYUV += srcYUVPitchLeft;
|
||||
dstYUV += dstYUVPitchLeft;
|
||||
x = YUVwidth;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int SDL_TARGETING("sse2") SDL_ConvertPixels_YUY2_to_YVYU_SSE2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
|
||||
{
|
||||
int x, y;
|
||||
const int YUVwidth = (width + 1) / 2;
|
||||
const int srcYUVPitchLeft = (src_pitch - YUVwidth * 4);
|
||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
|
||||
y = height;
|
||||
x = YUVwidth;
|
||||
while (y--) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
|
||||
while (x--) {
|
||||
Uint8 Y1, U, Y2, V;
|
||||
|
||||
Y1 = srcYUV[0];
|
||||
U = srcYUV[1];
|
||||
Y2 = srcYUV[2];
|
||||
V = srcYUV[3];
|
||||
srcYUV += 4;
|
||||
|
||||
dstYUV[0] = Y1;
|
||||
dstYUV[1] = V;
|
||||
dstYUV[2] = Y2;
|
||||
dstYUV[3] = U;
|
||||
dstYUV += 4;
|
||||
}
|
||||
srcYUV += srcYUVPitchLeft;
|
||||
dstYUV += dstYUVPitchLeft;
|
||||
x = YUVwidth;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int SDL_TARGETING("sse2") SDL_ConvertPixels_UYVY_to_YUY2_SSE2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
|
||||
{
|
||||
int x, y;
|
||||
const int YUVwidth = (width + 1) / 2;
|
||||
const int srcYUVPitchLeft = (src_pitch - YUVwidth * 4);
|
||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
|
||||
y = height;
|
||||
x = YUVwidth;
|
||||
while (y--) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
|
||||
while (x--) {
|
||||
Uint8 Y1, U, Y2, V;
|
||||
|
||||
U = srcYUV[0];
|
||||
Y1 = srcYUV[1];
|
||||
V = srcYUV[2];
|
||||
Y2 = srcYUV[3];
|
||||
srcYUV += 4;
|
||||
|
||||
dstYUV[0] = Y1;
|
||||
dstYUV[1] = U;
|
||||
dstYUV[2] = Y2;
|
||||
dstYUV[3] = V;
|
||||
dstYUV += 4;
|
||||
}
|
||||
srcYUV += srcYUVPitchLeft;
|
||||
dstYUV += dstYUVPitchLeft;
|
||||
x = YUVwidth;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int SDL_TARGETING("sse2") SDL_ConvertPixels_UYVY_to_YVYU_SSE2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
|
||||
{
|
||||
int x, y;
|
||||
const int YUVwidth = (width + 1) / 2;
|
||||
const int srcYUVPitchLeft = (src_pitch - YUVwidth * 4);
|
||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
|
||||
y = height;
|
||||
x = YUVwidth;
|
||||
while (y--) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(0, 3, 2, 1));
|
||||
while (x--) {
|
||||
Uint8 Y1, U, Y2, V;
|
||||
|
||||
U = srcYUV[0];
|
||||
Y1 = srcYUV[1];
|
||||
V = srcYUV[2];
|
||||
Y2 = srcYUV[3];
|
||||
srcYUV += 4;
|
||||
|
||||
dstYUV[0] = Y1;
|
||||
dstYUV[1] = V;
|
||||
dstYUV[2] = Y2;
|
||||
dstYUV[3] = U;
|
||||
dstYUV += 4;
|
||||
}
|
||||
srcYUV += srcYUVPitchLeft;
|
||||
dstYUV += dstYUVPitchLeft;
|
||||
x = YUVwidth;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int SDL_TARGETING("sse2") SDL_ConvertPixels_YVYU_to_YUY2_SSE2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
|
||||
{
|
||||
int x, y;
|
||||
const int YUVwidth = (width + 1) / 2;
|
||||
const int srcYUVPitchLeft = (src_pitch - YUVwidth * 4);
|
||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
|
||||
y = height;
|
||||
x = YUVwidth;
|
||||
while (y--) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
|
||||
while (x--) {
|
||||
Uint8 Y1, U, Y2, V;
|
||||
|
||||
Y1 = srcYUV[0];
|
||||
V = srcYUV[1];
|
||||
Y2 = srcYUV[2];
|
||||
U = srcYUV[3];
|
||||
srcYUV += 4;
|
||||
|
||||
dstYUV[0] = Y1;
|
||||
dstYUV[1] = U;
|
||||
dstYUV[2] = Y2;
|
||||
dstYUV[3] = V;
|
||||
dstYUV += 4;
|
||||
}
|
||||
srcYUV += srcYUVPitchLeft;
|
||||
dstYUV += dstYUVPitchLeft;
|
||||
x = YUVwidth;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int SDL_TARGETING("sse2") SDL_ConvertPixels_YVYU_to_UYVY_SSE2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
|
||||
{
|
||||
int x, y;
|
||||
const int YUVwidth = (width + 1) / 2;
|
||||
const int srcYUVPitchLeft = (src_pitch - YUVwidth * 4);
|
||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
|
||||
y = height;
|
||||
x = YUVwidth;
|
||||
while (y--) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 1, 0, 3));
|
||||
while (x--) {
|
||||
Uint8 Y1, U, Y2, V;
|
||||
|
||||
Y1 = srcYUV[0];
|
||||
V = srcYUV[1];
|
||||
Y2 = srcYUV[2];
|
||||
U = srcYUV[3];
|
||||
srcYUV += 4;
|
||||
|
||||
dstYUV[0] = U;
|
||||
dstYUV[1] = Y1;
|
||||
dstYUV[2] = V;
|
||||
dstYUV[3] = Y2;
|
||||
dstYUV += 4;
|
||||
}
|
||||
srcYUV += srcYUVPitchLeft;
|
||||
dstYUV += dstYUVPitchLeft;
|
||||
x = YUVwidth;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int SDL_ConvertPixels_YUY2_to_UYVY(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
|
||||
{
|
||||
if (SDL_HasSSE2()) {
|
||||
return SDL_ConvertPixels_YUY2_to_UYVY_SSE2(width, height, src, src_pitch, dst, dst_pitch);
|
||||
} else {
|
||||
int x, y;
|
||||
const int YUVwidth = (width + 1) / 2;
|
||||
const int srcYUVPitchLeft = (src_pitch - YUVwidth * 4);
|
||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
y = height;
|
||||
while (y--) {
|
||||
x = YUVwidth;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
|
||||
}
|
||||
#endif
|
||||
while (x--) {
|
||||
Uint8 Y1, U, Y2, V;
|
||||
|
||||
|
@ -1430,28 +1789,24 @@ static int SDL_ConvertPixels_YUY2_to_UYVY(int width, int height, const void *src
|
|||
dstYUV += dstYUVPitchLeft;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int SDL_ConvertPixels_YUY2_to_YVYU(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
|
||||
{
|
||||
if (SDL_HasSSE2()) {
|
||||
return SDL_ConvertPixels_YUY2_to_YVYU_SSE2(width, height, src, src_pitch, dst, dst_pitch);
|
||||
} else {
|
||||
int x, y;
|
||||
const int YUVwidth = (width + 1) / 2;
|
||||
const int srcYUVPitchLeft = (src_pitch - YUVwidth * 4);
|
||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
y = height;
|
||||
while (y--) {
|
||||
x = YUVwidth;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
|
||||
}
|
||||
#endif
|
||||
while (x--) {
|
||||
Uint8 Y1, U, Y2, V;
|
||||
|
||||
|
@ -1471,28 +1826,24 @@ static int SDL_ConvertPixels_YUY2_to_YVYU(int width, int height, const void *src
|
|||
dstYUV += dstYUVPitchLeft;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int SDL_ConvertPixels_UYVY_to_YUY2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
|
||||
{
|
||||
if (SDL_HasSSE2()) {
|
||||
return SDL_ConvertPixels_UYVY_to_YUY2_SSE2(width, height, src, src_pitch, dst, dst_pitch);
|
||||
} else {
|
||||
int x, y;
|
||||
const int YUVwidth = (width + 1) / 2;
|
||||
const int srcYUVPitchLeft = (src_pitch - YUVwidth * 4);
|
||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
y = height;
|
||||
while (y--) {
|
||||
x = YUVwidth;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
|
||||
}
|
||||
#endif
|
||||
while (x--) {
|
||||
Uint8 Y1, U, Y2, V;
|
||||
|
||||
|
@ -1512,28 +1863,24 @@ static int SDL_ConvertPixels_UYVY_to_YUY2(int width, int height, const void *src
|
|||
dstYUV += dstYUVPitchLeft;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int SDL_ConvertPixels_UYVY_to_YVYU(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
|
||||
{
|
||||
if (SDL_HasSSE2()) {
|
||||
return SDL_ConvertPixels_UYVY_to_YVYU_SSE2(width, height, src, src_pitch, dst, dst_pitch);
|
||||
} else {
|
||||
int x, y;
|
||||
const int YUVwidth = (width + 1) / 2;
|
||||
const int srcYUVPitchLeft = (src_pitch - YUVwidth * 4);
|
||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
y = height;
|
||||
while (y--) {
|
||||
x = YUVwidth;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(0, 3, 2, 1));
|
||||
}
|
||||
#endif
|
||||
while (x--) {
|
||||
Uint8 Y1, U, Y2, V;
|
||||
|
||||
|
@ -1553,28 +1900,24 @@ static int SDL_ConvertPixels_UYVY_to_YVYU(int width, int height, const void *src
|
|||
dstYUV += dstYUVPitchLeft;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int SDL_ConvertPixels_YVYU_to_YUY2(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
|
||||
{
|
||||
if (SDL_HasSSE2()) {
|
||||
return SDL_ConvertPixels_YVYU_to_YUY2_SSE2(width, height, src, src_pitch, dst, dst_pitch);
|
||||
} else {
|
||||
int x, y;
|
||||
const int YUVwidth = (width + 1) / 2;
|
||||
const int srcYUVPitchLeft = (src_pitch - YUVwidth * 4);
|
||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
y = height;
|
||||
while (y--) {
|
||||
x = YUVwidth;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
|
||||
}
|
||||
#endif
|
||||
while (x--) {
|
||||
Uint8 Y1, U, Y2, V;
|
||||
|
||||
|
@ -1594,28 +1937,24 @@ static int SDL_ConvertPixels_YVYU_to_YUY2(int width, int height, const void *src
|
|||
dstYUV += dstYUVPitchLeft;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int SDL_ConvertPixels_YVYU_to_UYVY(int width, int height, const void *src, int src_pitch, void *dst, int dst_pitch)
|
||||
{
|
||||
if (SDL_HasSSE2()) {
|
||||
return SDL_ConvertPixels_YVYU_to_UYVY_SSE2(width, height, src, src_pitch, dst, dst_pitch);
|
||||
} else {
|
||||
int x, y;
|
||||
const int YUVwidth = (width + 1) / 2;
|
||||
const int srcYUVPitchLeft = (src_pitch - YUVwidth * 4);
|
||||
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
|
||||
const Uint8 *srcYUV = (const Uint8 *)src;
|
||||
Uint8 *dstYUV = (Uint8 *)dst;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
const SDL_bool use_SSE2 = SDL_HasSSE2();
|
||||
#endif
|
||||
|
||||
y = height;
|
||||
while (y--) {
|
||||
x = YUVwidth;
|
||||
#if HAVE_SSE2_INTRINSICS
|
||||
if (use_SSE2) {
|
||||
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 1, 0, 3));
|
||||
}
|
||||
#endif
|
||||
while (x--) {
|
||||
Uint8 Y1, U, Y2, V;
|
||||
|
||||
|
@ -1635,6 +1974,7 @@ static int SDL_ConvertPixels_YVYU_to_UYVY(int width, int height, const void *src
|
|||
dstYUV += dstYUVPitchLeft;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int SDL_ConvertPixels_Packed4_to_Packed4(int width, int height,
|
||||
|
|
|
@ -609,7 +609,7 @@ V = _mm_srai_epi16(V, PRECISION);
|
|||
SAVE_SI128((__m128i*)(u_ptr), u1); \
|
||||
SAVE_SI128((__m128i*)(v_ptr), v1);
|
||||
|
||||
void rgb24_yuv420_sse(uint32_t width, uint32_t height,
|
||||
void SDL_TARGETING("sse2") rgb24_yuv420_sse(uint32_t width, uint32_t height,
|
||||
const uint8_t *RGB, uint32_t RGB_stride,
|
||||
uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
|
||||
YCbCrType yuv_type)
|
||||
|
@ -645,7 +645,7 @@ void rgb24_yuv420_sse(uint32_t width, uint32_t height,
|
|||
#undef SAVE_SI128
|
||||
}
|
||||
|
||||
void rgb24_yuv420_sseu(uint32_t width, uint32_t height,
|
||||
void SDL_TARGETING("sse2") rgb24_yuv420_sseu(uint32_t width, uint32_t height,
|
||||
const uint8_t *RGB, uint32_t RGB_stride,
|
||||
uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
|
||||
YCbCrType yuv_type)
|
||||
|
|
|
@ -382,7 +382,7 @@ PACK_RGB24_32_STEP1(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6)
|
|||
\
|
||||
|
||||
|
||||
void SSE_FUNCTION_NAME(uint32_t width, uint32_t height,
|
||||
void SDL_TARGETING("sse2") SSE_FUNCTION_NAME(uint32_t width, uint32_t height,
|
||||
const uint8_t *Y, const uint8_t *U, const uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
|
||||
uint8_t *RGB, uint32_t RGB_stride,
|
||||
YCbCrType yuv_type)
|
||||
|
|
Loading…
Reference in New Issue