diff --git a/CMakeLists.txt b/CMakeLists.txt index dd6616648..739ccce23 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1020,6 +1020,14 @@ if(NOT HAVE_SSE3) set(SDL_DISABLE_SSE3 1) endif() +if(NOT HAVE_LSX) + set(SDL_DISABLE_LSX 1) +endif() + +if(NOT HAVE_LASX) + set(SDL_DISABLE_LASX 1) +endif() + # TODO: Can't deactivate on FreeBSD? w/o LIBC, SDL_stdinc.h can't define # anything. if(SDL_LIBC) diff --git a/include/SDL3/SDL_intrin.h b/include/SDL3/SDL_intrin.h index be9b29b0a..9ca942c3d 100644 --- a/include/SDL3/SDL_intrin.h +++ b/include/SDL3/SDL_intrin.h @@ -93,13 +93,11 @@ _m_prefetch(void *__P) #endif #endif /* compiler version */ -#if defined(__loongarch_sx) && !defined(SDL_DISABLE_LSX_H) +#if defined(__loongarch_sx) && !defined(SDL_DISABLE_LSX) #include -#define __LSX__ #endif -#if defined(__loongarch_asx) && !defined(SDL_DISABLE_LASX_H) +#if defined(__loongarch_asx) && !defined(SDL_DISABLE_LASX) #include -#define __LASX__ #endif #if defined(__AVX__) && !defined(SDL_DISABLE_AVX) #include diff --git a/include/build_config/SDL_build_config.h.cmake b/include/build_config/SDL_build_config.h.cmake index 9197c94c3..a35650f9e 100644 --- a/include/build_config/SDL_build_config.h.cmake +++ b/include/build_config/SDL_build_config.h.cmake @@ -592,5 +592,7 @@ typedef unsigned int uintptr_t; #cmakedefine SDL_DISABLE_SSE3 1 #cmakedefine SDL_DISABLE_AVX 1 #cmakedefine SDL_DISABLE_MMX 1 +#cmakedefine SDL_DISABLE_LSX 1 +#cmakedefine SDL_DISABLE_LASX 1 #endif /* SDL_build_config_h_ */ diff --git a/src/SDL_internal.h b/src/SDL_internal.h index 8e1e14191..208b92943 100644 --- a/src/SDL_internal.h +++ b/src/SDL_internal.h @@ -211,6 +211,14 @@ #define HAVE_AVX_INTRINSICS 1 #endif +#if defined(__loongarch_sx) && !defined(SDL_DISABLE_LSX) +#define HAVE_LSX_INTRINSICS 1 +#endif + +#if defined(__loongarch_asx) && !defined(SDL_DISABLE_LASX) +#define HAVE_LASX_INTRINSICS 1 +#endif + #if defined __clang__ #if (!__has_attribute(target)) #undef HAVE_AVX_INTRINSICS diff --git a/src/video/SDL_yuv.c b/src/video/SDL_yuv.c index acf2bae05..06a7fa708 100644 --- a/src/video/SDL_yuv.c +++ b/src/video/SDL_yuv.c @@ -419,7 +419,7 @@ static SDL_bool yuv_rgb_lsx( Uint8 *rgb, Uint32 rgb_stride, YCbCrType yuv_type) { -#ifdef __loongarch_sx +#if HAVE_LSX_INTRINSICS if (!SDL_HasLSX()) { return SDL_FALSE; } diff --git a/src/video/yuv2rgb/yuv_rgb.c b/src/video/yuv2rgb/yuv_rgb.c index 68139bdbc..5b96a4b7e 100644 --- a/src/video/yuv2rgb/yuv_rgb.c +++ b/src/video/yuv2rgb/yuv_rgb.c @@ -73,7 +73,7 @@ static const RGB2YUVParam RGB2YUV[3] = { // input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range static uint8_t clampU8(int32_t v) { - static const uint8_t lut[512] = + static const uint8_t lut[512] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46, @@ -183,52 +183,52 @@ static uint8_t clampU8(int32_t v) #include "yuv_rgb_std_func.h" void rgb24_yuv420_std( - uint32_t width, uint32_t height, - const uint8_t *RGB, uint32_t RGB_stride, - uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, + uint32_t width, uint32_t height, + const uint8_t *RGB, uint32_t RGB_stride, + uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, YCbCrType yuv_type) { const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); - + uint32_t x, y; for(y=0; y<(height-1); y+=2) { const uint8_t *rgb_ptr1=RGB+y*RGB_stride, *rgb_ptr2=RGB+(y+1)*RGB_stride; - + uint8_t *y_ptr1=Y+y*Y_stride, *y_ptr2=Y+(y+1)*Y_stride, *u_ptr=U+(y/2)*UV_stride, *v_ptr=V+(y/2)*UV_stride; - + for(x=0; x<(width-1); x+=2) { // compute yuv for the four pixels, u and v values are summed int32_t y_tmp, u_tmp, v_tmp; - + y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2]; u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2]; v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2]; y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5]; u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5]; v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5]; y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2]; u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2]; v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2]; y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5]; u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5]; v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5]; y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<