diff --git a/src/audio/SDL_audiocvt.c b/src/audio/SDL_audiocvt.c index 7c3e5bf37..36c631a2f 100644 --- a/src/audio/SDL_audiocvt.c +++ b/src/audio/SDL_audiocvt.c @@ -43,6 +43,10 @@ #define HAVE_SSE3_INTRINSICS 1 #endif +#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H) +#define HAVE_AVX_INTRINSICS 1 +#endif + #if HAVE_SSE3_INTRINSICS /* Convert from stereo to mono. Average left and right. */ static void SDLCALL @@ -100,6 +104,88 @@ SDL_ConvertStereoToMono(SDL_AudioCVT * cvt, SDL_AudioFormat format) } } +#if HAVE_AVX_INTRINSICS +/* MSVC will always accept AVX intrinsics when compiling for x64 */ +#if defined(__clang__) +#pragma clang attribute push (__attribute__((target("avx"))), apply_to=function) +#elif defined(__GNUC__) +#pragma GCC push_options +#pragma GCC target("avx") +#endif +/* Convert from 5.1 to stereo. Average left and right, distribute center, discard LFE. */ +static void SDLCALL +SDL_Convert51ToStereo_AVX(SDL_AudioCVT * cvt, SDL_AudioFormat format) +{ + float *dst = (float *) cvt->buf; + const float *src = dst; + int i = cvt->len_cvt / (sizeof (float) * 6); + const float two_fifths_f = 1.0f / 2.5f; + const __m256 two_fifths_v = _mm256_set1_ps(two_fifths_f); + const __m256 half = _mm256_set1_ps(0.5f); + + LOG_DEBUG_CONVERT("5.1", "stereo (using AVX)"); + SDL_assert(format == AUDIO_F32SYS); + + /* SDL's 5.1 layout: FL+FR+FC+LFE+BL+BR */ + /* This implementation is based on SDL_Convert51ToStereo_SSE */ + /* Because AVX operates with two 128 bit lanes, the shuffling */ + /* here is not very efficient. AVX512F lifts this limitation. */ + while (i >= 4) { + __m128 in0 = _mm_loadu_ps(src); /* 0FL 0FR 0FC 0LF */ + __m128 in1 = _mm_loadu_ps(src + 4); /* 0BL 0BR 1FL 1FR */ + __m128 in2 = _mm_loadu_ps(src + 8); /* 1FC 1LF 1BL 1BR */ + __m128 in3 = _mm_loadu_ps(src + 12); /* 2FL 2FR 2FC 2LF */ + __m128 in4 = _mm_loadu_ps(src + 16); /* 2BL 2BR 3FL 3FR */ + __m128 in5 = _mm_loadu_ps(src + 20); /* 3FC 3LF 3BL 3BR */ + + /* 0FC 0FC 1FC 1FC */ + __m128 fc_distributed_lo = _mm_shuffle_ps(in0, in2, _MM_SHUFFLE(0, 0, 2, 2)); + /* 2FC 2FC 3FC 3FC */ + __m128 fc_distributed_hi = _mm_shuffle_ps(in3, in5, _MM_SHUFFLE(0, 0, 2, 2)); + /* 0FC 0FC 1FC 1FC 2FC 2FC 3FC 3FC */ + __m256 fc_distributed = _mm256_mul_ps(half, _mm256_insertf128_ps(_mm256_castps128_ps256(fc_distributed_lo), fc_distributed_hi, 1)); + + /* 0FL 0FR 1BL 1BR */ + __m128 permuted0_lo = _mm_shuffle_ps(in0, in2, _MM_SHUFFLE(3, 2, 1, 0)); + /* 2FL 2FR 3BL 3BR */ + __m128 permuted0_hi = _mm_shuffle_ps(in3, in5, _MM_SHUFFLE(3, 2, 1, 0)); + /* 0FL 0FR 1BL 1BR 2FL 2FR 3BL 3BR */ + __m256 permuted0 = _mm256_insertf128_ps(_mm256_castps128_ps256(permuted0_lo), permuted0_hi, 1); + /* 0BL 0BR 1FL 1FR 2BL 2BR 3FL 3FR */ + __m256 permuted1 = _mm256_insertf128_ps(_mm256_castps128_ps256(in1), in4, 1); + + /* 0FL 0FR 1BL 1BR 2FL 2FR 3BL 3BR */ + /* + 0BL 0BR 1FL 1FR 2BL 2BR 3FL 3FR */ + /* = 0L 0R 1L 1R 2L 2R 3L 3R */ + __m256 out = _mm256_add_ps(permuted0, permuted1); + out = _mm256_add_ps(out, fc_distributed); + out = _mm256_mul_ps(out, two_fifths_v); + + _mm256_storeu_ps(dst, out); + + i -= 4; src += 24; dst += 8; + } + + + /* Finish off any leftovers with scalar operations. */ + while (i) { + const float front_center_distributed = src[2] * 0.5f; + dst[0] = (src[0] + front_center_distributed + src[4]) * two_fifths_f; /* left */ + dst[1] = (src[1] + front_center_distributed + src[5]) * two_fifths_f; /* right */ + i--; src += 6; dst+=2; + } + + cvt->len_cvt /= 3; + if (cvt->filters[++cvt->filter_index]) { + cvt->filters[cvt->filter_index] (cvt, format); + } +} +#if defined(__clang__) +#pragma clang attribute pop +#elif defined(__GNUC__) +#pragma GCC pop_options +#endif +#endif #if HAVE_SSE_INTRINSICS /* Convert from 5.1 to stereo. Average left and right, distribute center, discard LFE. */ @@ -1086,8 +1172,14 @@ SDL_BuildAudioCVT(SDL_AudioCVT * cvt, if ((src_channels == 6) && (dst_channels <= 2)) { SDL_AudioFilter filter = NULL; + #if HAVE_AVX_INTRINSICS + if (SDL_HasAVX()) { + filter = SDL_Convert51ToStereo_AVX; + } + #endif + #if HAVE_SSE_INTRINSICS - if (SDL_HasSSE()) { + if (!filter && SDL_HasSSE()) { filter = SDL_Convert51ToStereo_SSE; } #endif