ARM: SIMD assembly optimization for BGR-to-RGB 32bpp normal blits
parent
8425d9d5d0
commit
7ac733f025
|
@ -41,7 +41,8 @@
|
||||||
enum blit_features {
|
enum blit_features {
|
||||||
BLIT_FEATURE_HAS_MMX = 1,
|
BLIT_FEATURE_HAS_MMX = 1,
|
||||||
BLIT_FEATURE_HAS_ALTIVEC = 2,
|
BLIT_FEATURE_HAS_ALTIVEC = 2,
|
||||||
BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH = 4
|
BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH = 4,
|
||||||
|
BLIT_FEATURE_HAS_ARM_SIMD = 8
|
||||||
};
|
};
|
||||||
|
|
||||||
#if SDL_ALTIVEC_BLITTERS
|
#if SDL_ALTIVEC_BLITTERS
|
||||||
|
@ -931,7 +932,24 @@ GetBlitFeatures(void)
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
/* Feature 1 is has-MMX */
|
/* Feature 1 is has-MMX */
|
||||||
#define GetBlitFeatures() (SDL_HasMMX() ? BLIT_FEATURE_HAS_MMX : 0)
|
#define GetBlitFeatures() ((SDL_HasMMX() ? BLIT_FEATURE_HAS_MMX : 0) | (SDL_HasARMSIMD() ? BLIT_FEATURE_HAS_ARM_SIMD : 0))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if SDL_ARM_SIMD_BLITTERS
|
||||||
|
void Blit_BGR888_RGB888ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
|
||||||
|
|
||||||
|
static void
|
||||||
|
Blit_BGR888_RGB888ARMSIMD(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
int32_t width = info->dst_w;
|
||||||
|
int32_t height = info->dst_h;
|
||||||
|
uint32_t *dstp = (uint32_t *)info->dst;
|
||||||
|
int32_t dststride = width + (info->dst_skip >> 2);
|
||||||
|
uint32_t *srcp = (uint32_t *)info->src;
|
||||||
|
int32_t srcstride = width + (info->src_skip >> 2);
|
||||||
|
|
||||||
|
Blit_BGR888_RGB888ARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* This is now endian dependent */
|
/* This is now endian dependent */
|
||||||
|
@ -3269,6 +3287,10 @@ static const struct blit_table normal_blit_4[] = {
|
||||||
/* has-altivec */
|
/* has-altivec */
|
||||||
{0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
|
{0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
|
||||||
BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB888_RGB565Altivec, NO_ALPHA},
|
BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB888_RGB565Altivec, NO_ALPHA},
|
||||||
|
#endif
|
||||||
|
#if SDL_ARM_SIMD_BLITTERS
|
||||||
|
{0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
|
||||||
|
BLIT_FEATURE_HAS_ARM_SIMD, Blit_BGR888_RGB888ARMSIMD, NO_ALPHA | COPY_ALPHA },
|
||||||
#endif
|
#endif
|
||||||
/* 4->3 with same rgb triplet */
|
/* 4->3 with same rgb triplet */
|
||||||
{0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
|
{0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
|
||||||
|
|
|
@ -363,3 +363,45 @@ generate_composite_function \
|
||||||
nop_macro, /* cleanup */ \
|
nop_macro, /* cleanup */ \
|
||||||
ARGBto565PixelAlpha_process_head, \
|
ARGBto565PixelAlpha_process_head, \
|
||||||
ARGBto565PixelAlpha_process_tail
|
ARGBto565PixelAlpha_process_tail
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
|
||||||
|
.macro BGR888toRGB888_1pixel cond, reg, tmp
|
||||||
|
uxtb16&cond tmp, WK®, ror #8
|
||||||
|
uxtb16&cond WK®, WK®, ror #16
|
||||||
|
orr&cond WK®, WK®, tmp, lsl #8
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro BGR888toRGB888_2pixels cond, reg1, reg2, tmp1, tmp2
|
||||||
|
uxtb16&cond tmp1, WK®1, ror #8
|
||||||
|
uxtb16&cond WK®1, WK®1, ror #16
|
||||||
|
uxtb16&cond tmp2, WK®2, ror #8
|
||||||
|
uxtb16&cond WK®2, WK®2, ror #16
|
||||||
|
orr&cond WK®1, WK®1, tmp1, lsl #8
|
||||||
|
orr&cond WK®2, WK®2, tmp2, lsl #8
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro BGR888toRGB888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
||||||
|
pixld cond, numbytes, firstreg, SRC, unaligned_src
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro BGR888toRGB888_process_tail cond, numbytes, firstreg
|
||||||
|
.if numbytes >= 8
|
||||||
|
BGR888toRGB888_2pixels cond, %(firstreg+0), %(firstreg+1), MASK, STRIDE_M
|
||||||
|
.if numbytes == 16
|
||||||
|
BGR888toRGB888_2pixels cond, %(firstreg+2), %(firstreg+3), MASK, STRIDE_M
|
||||||
|
.endif
|
||||||
|
.else @ numbytes == 4
|
||||||
|
BGR888toRGB888_1pixel cond, %(firstreg+0), MASK
|
||||||
|
.endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
generate_composite_function \
|
||||||
|
Blit_BGR888_RGB888ARMSIMDAsm, 32, 0, 32, \
|
||||||
|
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_SCRATCH, \
|
||||||
|
2, /* prefetch distance */ \
|
||||||
|
nop_macro, /* init */ \
|
||||||
|
nop_macro, /* newline */ \
|
||||||
|
nop_macro, /* cleanup */ \
|
||||||
|
BGR888toRGB888_process_head, \
|
||||||
|
BGR888toRGB888_process_tail
|
||||||
|
|
Loading…
Reference in New Issue