From 337fed3df61b8539900615d9a458ec4fe4751336 Mon Sep 17 00:00:00 2001 From: Brick <6098371+0x1F9F1@users.noreply.github.com> Date: Mon, 28 Aug 2023 22:21:08 +0100 Subject: [PATCH] Tweaked ResampleFrame_SSE Use _mm_unpack(lo|hi)_ps instead of _mm_shuffle_ps --- src/audio/SDL_audiocvt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/audio/SDL_audiocvt.c b/src/audio/SDL_audiocvt.c index ce48b94d7..6ebfc81c9 100644 --- a/src/audio/SDL_audiocvt.c +++ b/src/audio/SDL_audiocvt.c @@ -160,11 +160,11 @@ static void SDL_TARGETING("sse") ResampleFrame_SSE(const float* src, float* dst, if (chans == 2) { // Duplicate each of the filter elements - g0 = _mm_shuffle_ps(f0, f0, _MM_SHUFFLE(3, 3, 2, 2)); - f0 = _mm_shuffle_ps(f0, f0, _MM_SHUFFLE(1, 1, 0, 0)); - g1 = _mm_shuffle_ps(f1, f1, _MM_SHUFFLE(3, 3, 2, 2)); - f1 = _mm_shuffle_ps(f1, f1, _MM_SHUFFLE(1, 1, 0, 0)); - f2 = _mm_shuffle_ps(f2, f2, _MM_SHUFFLE(1, 1, 0, 0)); + g0 = _mm_unpackhi_ps(f0, f0); + f0 = _mm_unpacklo_ps(f0, f0); + g1 = _mm_unpackhi_ps(f1, f1); + f1 = _mm_unpacklo_ps(f1, f1); + f2 = _mm_unpacklo_ps(f2, f2); // Multiply the filter by the input f0 = _mm_mul_ps(f0, _mm_loadu_ps(src + 0));