Refactored ConvertAudio, added SIMD endian-swapping

main
Brick 2024-04-05 17:16:17 +01:00 committed by Sam Lantinga
parent e9e9424b9a
commit 33f28d6143
3 changed files with 236 additions and 110 deletions

View File

@ -132,59 +132,6 @@ static void SDL_TARGETING("sse") SDL_ConvertMonoToStereo_SSE(float *dst, const f
// Include the autogenerated channel converters... // Include the autogenerated channel converters...
#include "SDL_audio_channel_converters.h" #include "SDL_audio_channel_converters.h"
static void AudioConvertByteswap(void *dst, const void *src, int num_samples, int bitsize)
{
#if DEBUG_AUDIO_CONVERT
SDL_Log("SDL_AUDIO_CONVERT: Converting %d-bit byte order", bitsize);
#endif
switch (bitsize) {
#define CASESWAP(b) \
case b: { \
const Uint##b *tsrc = (const Uint##b *)src; \
Uint##b *tdst = (Uint##b *)dst; \
for (int i = 0; i < num_samples; i++) { \
tdst[i] = SDL_Swap##b(tsrc[i]); \
} \
break; \
}
CASESWAP(16);
CASESWAP(32);
#undef CASESWAP
default:
SDL_assert(!"unhandled byteswap datatype!");
break;
}
}
static void AudioConvertToFloat(float *dst, const void *src, int num_samples, SDL_AudioFormat src_fmt)
{
// Endian conversion is handled separately
switch (src_fmt & ~SDL_AUDIO_MASK_BIG_ENDIAN) {
case SDL_AUDIO_S8: SDL_Convert_S8_to_F32(dst, (const Sint8 *) src, num_samples); break;
case SDL_AUDIO_U8: SDL_Convert_U8_to_F32(dst, (const Uint8 *) src, num_samples); break;
case SDL_AUDIO_S16LE: SDL_Convert_S16_to_F32(dst, (const Sint16 *) src, num_samples); break;
case SDL_AUDIO_S32LE: SDL_Convert_S32_to_F32(dst, (const Sint32 *) src, num_samples); break;
default: SDL_assert(!"Unexpected audio format!"); break;
}
}
static void AudioConvertFromFloat(void *dst, const float *src, int num_samples, SDL_AudioFormat dst_fmt)
{
// Endian conversion is handled separately
switch (dst_fmt & ~SDL_AUDIO_MASK_BIG_ENDIAN) {
case SDL_AUDIO_S8: SDL_Convert_F32_to_S8((Sint8 *) dst, src, num_samples); break;
case SDL_AUDIO_U8: SDL_Convert_F32_to_U8((Uint8 *) dst, src, num_samples); break;
case SDL_AUDIO_S16LE: SDL_Convert_F32_to_S16((Sint16 *) dst, src, num_samples); break;
case SDL_AUDIO_S32LE: SDL_Convert_F32_to_S32((Sint32 *) dst, src, num_samples); break;
default: SDL_assert(!"Unexpected audio format!"); break;
}
}
static SDL_bool SDL_IsSupportedAudioFormat(const SDL_AudioFormat fmt) static SDL_bool SDL_IsSupportedAudioFormat(const SDL_AudioFormat fmt)
{ {
switch (fmt) { switch (fmt) {
@ -210,7 +157,6 @@ static SDL_bool SDL_IsSupportedChannelCount(const int channels)
return ((channels >= 1) && (channels <= 8)); return ((channels >= 1) && (channels <= 8));
} }
// This does type and channel conversions _but not resampling_ (resampling happens in SDL_AudioStream). // This does type and channel conversions _but not resampling_ (resampling happens in SDL_AudioStream).
// This does not check parameter validity, (beyond asserts), it expects you did that already! // This does not check parameter validity, (beyond asserts), it expects you did that already!
// All of this has to function as if src==dst==scratch (conversion in-place), but as a convenience // All of this has to function as if src==dst==scratch (conversion in-place), but as a convenience
@ -266,14 +212,8 @@ void ConvertAudio(int num_frames, const void *src, SDL_AudioFormat src_format, i
} }
// just a byteswap needed? // just a byteswap needed?
if ((src_format & ~SDL_AUDIO_MASK_BIG_ENDIAN) == (dst_format & ~SDL_AUDIO_MASK_BIG_ENDIAN)) { if ((src_format ^ dst_format) == SDL_AUDIO_MASK_BIG_ENDIAN) {
if (src_bitsize == 8) { ConvertAudioSwapEndian(dst, src, num_frames * src_channels, src_bitsize);
if (src != dst) {
SDL_memcpy(dst, src, num_frames * dst_sample_frame_size);
}
return; // nothing to do, it's a 1-byte format.
}
AudioConvertByteswap(dst, src, num_frames * src_channels, src_bitsize);
return; // all done. return; // all done.
} }
} }
@ -282,23 +222,14 @@ void ConvertAudio(int num_frames, const void *src, SDL_AudioFormat src_format, i
scratch = dst; scratch = dst;
} }
const SDL_bool srcbyteswap = (SDL_AUDIO_ISBIGENDIAN(src_format) != 0) == (SDL_BYTEORDER == SDL_LIL_ENDIAN) && (src_bitsize > 8); const SDL_bool srcconvert = src_format != SDL_AUDIO_F32;
const SDL_bool srcconvert = !SDL_AUDIO_ISFLOAT(src_format);
const SDL_bool channelconvert = src_channels != dst_channels; const SDL_bool channelconvert = src_channels != dst_channels;
const SDL_bool dstconvert = !SDL_AUDIO_ISFLOAT(dst_format); const SDL_bool dstconvert = dst_format != SDL_AUDIO_F32;
const SDL_bool dstbyteswap = (SDL_AUDIO_ISBIGENDIAN(dst_format) != 0) == (SDL_BYTEORDER == SDL_LIL_ENDIAN) && (dst_bitsize > 8);
// make sure we're in native byte order.
if (srcbyteswap) {
// No point writing straight to dst. If we only need a byteswap, we wouldn't be bere.
AudioConvertByteswap(scratch, src, num_frames * src_channels, src_bitsize);
src = scratch;
}
// get us to float format. // get us to float format.
if (srcconvert) { if (srcconvert) {
void* buf = (channelconvert || dstconvert || dstbyteswap) ? scratch : dst; void* buf = (channelconvert || dstconvert) ? scratch : dst;
AudioConvertToFloat((float *) buf, src, num_frames * src_channels, src_format); ConvertAudioToFloat((float *) buf, src, num_frames * src_channels, src_format);
src = buf; src = buf;
} }
@ -330,7 +261,7 @@ void ConvertAudio(int num_frames, const void *src, SDL_AudioFormat src_format, i
channel_converter = override; channel_converter = override;
} }
void* buf = (dstconvert || dstbyteswap) ? scratch : dst; void* buf = dstconvert ? scratch : dst;
channel_converter((float *) buf, (const float *) src, num_frames); channel_converter((float *) buf, (const float *) src, num_frames);
src = buf; src = buf;
} }
@ -339,16 +270,10 @@ void ConvertAudio(int num_frames, const void *src, SDL_AudioFormat src_format, i
// Move to final data type. // Move to final data type.
if (dstconvert) { if (dstconvert) {
AudioConvertFromFloat(dst, (const float *) src, num_frames * dst_channels, dst_format); ConvertAudioFromFloat(dst, (const float *) src, num_frames * dst_channels, dst_format);
src = dst; src = dst;
} }
// make sure we're in final byte order.
if (dstbyteswap) {
AudioConvertByteswap(dst, src, num_frames * dst_channels, dst_bitsize);
src = dst; // we've written to dst, future work will convert in-place.
}
SDL_assert(src == dst); // if we got here, we _had_ to have done _something_. Otherwise, we should have memcpy'd! SDL_assert(src == dst); // if we got here, we _had_ to have done _something_. Otherwise, we should have memcpy'd!
} }

View File

@ -181,6 +181,24 @@ static void SDL_Convert_F32_to_S32_Scalar(Sint32 *dst, const float *src, int num
#undef SIGNMASK #undef SIGNMASK
static void SDL_Convert_Swap16_Scalar(Uint16* dst, const Uint16* src, int num_samples)
{
int i;
for (i = 0; i < num_samples; ++i) {
dst[i] = SDL_Swap16(src[i]);
}
}
static void SDL_Convert_Swap32_Scalar(Uint32* dst, const Uint32* src, int num_samples)
{
int i;
for (i = 0; i < num_samples; ++i) {
dst[i] = SDL_Swap32(src[i]);
}
}
// end fallback scalar converters // end fallback scalar converters
// Convert forwards, when sizeof(*src) >= sizeof(*dst) // Convert forwards, when sizeof(*src) >= sizeof(*dst)
@ -463,6 +481,51 @@ static void SDL_TARGETING("sse2") SDL_Convert_F32_to_S32_SSE2(Sint32 *dst, const
} }
#endif #endif
// FIXME: SDL doesn't have SSSE3 detection, so use the next one up
#ifdef SDL_SSE4_1_INTRINSICS
static void SDL_TARGETING("ssse3") SDL_Convert_Swap16_SSSE3(Uint16* dst, const Uint16* src, int num_samples)
{
const __m128i shuffle = _mm_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
CONVERT_16_FWD({
dst[i] = SDL_Swap16(src[i]);
}, {
__m128i ints0 = _mm_loadu_si128((const __m128i*)&src[i]);
__m128i ints1 = _mm_loadu_si128((const __m128i*)&src[i + 8]);
ints0 = _mm_shuffle_epi8(ints0, shuffle);
ints1 = _mm_shuffle_epi8(ints1, shuffle);
_mm_store_si128((__m128i*)&dst[i], ints0);
_mm_store_si128((__m128i*)&dst[i + 8], ints1);
})
}
static void SDL_TARGETING("ssse3") SDL_Convert_Swap32_SSSE3(Uint32* dst, const Uint32* src, int num_samples)
{
const __m128i shuffle = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
CONVERT_16_FWD({
dst[i] = SDL_Swap32(src[i]);
}, {
__m128i ints0 = _mm_loadu_si128((const __m128i*)&src[i]);
__m128i ints1 = _mm_loadu_si128((const __m128i*)&src[i + 4]);
__m128i ints2 = _mm_loadu_si128((const __m128i*)&src[i + 8]);
__m128i ints3 = _mm_loadu_si128((const __m128i*)&src[i + 12]);
ints0 = _mm_shuffle_epi8(ints0, shuffle);
ints1 = _mm_shuffle_epi8(ints1, shuffle);
ints2 = _mm_shuffle_epi8(ints2, shuffle);
ints3 = _mm_shuffle_epi8(ints3, shuffle);
_mm_store_si128((__m128i*)&dst[i], ints0);
_mm_store_si128((__m128i*)&dst[i + 4], ints1);
_mm_store_si128((__m128i*)&dst[i + 8], ints2);
_mm_store_si128((__m128i*)&dst[i + 12], ints3);
})
}
#endif
#ifdef SDL_NEON_INTRINSICS #ifdef SDL_NEON_INTRINSICS
static void SDL_Convert_S8_to_F32_NEON(float *dst, const Sint8 *src, int num_samples) static void SDL_Convert_S8_to_F32_NEON(float *dst, const Sint8 *src, int num_samples)
{ {
@ -666,17 +729,144 @@ static void SDL_Convert_F32_to_S32_NEON(Sint32 *dst, const float *src, int num_s
vst1q_s32(&dst[i + 12], ints3); vst1q_s32(&dst[i + 12], ints3);
}) })
} }
static void SDL_Convert_Swap16_NEON(Uint16* dst, const Uint16* src, int num_samples)
{
CONVERT_16_FWD({
dst[i] = SDL_Swap16(src[i]);
}, {
uint8x16_t ints0 = vld1q_u8((const Uint8*)&src[i]);
uint8x16_t ints1 = vld1q_u8((const Uint8*)&src[i + 8]);
ints0 = vrev16q_u8(ints0);
ints1 = vrev16q_u8(ints1);
vst1q_u8((Uint8*)&dst[i], ints0);
vst1q_u8((Uint8*)&dst[i + 8], ints1);
})
}
static void SDL_Convert_Swap32_NEON(Uint32* dst, const Uint32* src, int num_samples)
{
CONVERT_16_FWD({
dst[i] = SDL_Swap32(src[i]);
}, {
uint8x16_t ints0 = vld1q_u8((const Uint8*)&src[i]);
uint8x16_t ints1 = vld1q_u8((const Uint8*)&src[i + 4]);
uint8x16_t ints2 = vld1q_u8((const Uint8*)&src[i + 8]);
uint8x16_t ints3 = vld1q_u8((const Uint8*)&src[i + 12]);
ints0 = vrev32q_u8(ints0);
ints1 = vrev32q_u8(ints1);
ints2 = vrev32q_u8(ints2);
ints3 = vrev32q_u8(ints3);
vst1q_u8((Uint8*)&dst[i], ints0);
vst1q_u8((Uint8*)&dst[i + 4], ints1);
vst1q_u8((Uint8*)&dst[i + 8], ints2);
vst1q_u8((Uint8*)&dst[i + 12], ints3);
})
}
#endif #endif
#undef CONVERT_16_FWD
#undef CONVERT_16_REV
// Function pointers set to a CPU-specific implementation. // Function pointers set to a CPU-specific implementation.
void (*SDL_Convert_S8_to_F32)(float *dst, const Sint8 *src, int num_samples) = NULL; static void (*SDL_Convert_S8_to_F32)(float *dst, const Sint8 *src, int num_samples) = NULL;
void (*SDL_Convert_U8_to_F32)(float *dst, const Uint8 *src, int num_samples) = NULL; static void (*SDL_Convert_U8_to_F32)(float *dst, const Uint8 *src, int num_samples) = NULL;
void (*SDL_Convert_S16_to_F32)(float *dst, const Sint16 *src, int num_samples) = NULL; static void (*SDL_Convert_S16_to_F32)(float *dst, const Sint16 *src, int num_samples) = NULL;
void (*SDL_Convert_S32_to_F32)(float *dst, const Sint32 *src, int num_samples) = NULL; static void (*SDL_Convert_S32_to_F32)(float *dst, const Sint32 *src, int num_samples) = NULL;
void (*SDL_Convert_F32_to_S8)(Sint8 *dst, const float *src, int num_samples) = NULL; static void (*SDL_Convert_F32_to_S8)(Sint8 *dst, const float *src, int num_samples) = NULL;
void (*SDL_Convert_F32_to_U8)(Uint8 *dst, const float *src, int num_samples) = NULL; static void (*SDL_Convert_F32_to_U8)(Uint8 *dst, const float *src, int num_samples) = NULL;
void (*SDL_Convert_F32_to_S16)(Sint16 *dst, const float *src, int num_samples) = NULL; static void (*SDL_Convert_F32_to_S16)(Sint16 *dst, const float *src, int num_samples) = NULL;
void (*SDL_Convert_F32_to_S32)(Sint32 *dst, const float *src, int num_samples) = NULL; static void (*SDL_Convert_F32_to_S32)(Sint32 *dst, const float *src, int num_samples) = NULL;
static void (*SDL_Convert_Swap16)(Uint16* dst, const Uint16* src, int num_samples) = NULL;
static void (*SDL_Convert_Swap32)(Uint32* dst, const Uint32* src, int num_samples) = NULL;
void ConvertAudioToFloat(float *dst, const void *src, int num_samples, SDL_AudioFormat src_fmt)
{
switch (src_fmt) {
case SDL_AUDIO_S8:
SDL_Convert_S8_to_F32(dst, (const Sint8 *) src, num_samples);
break;
case SDL_AUDIO_U8:
SDL_Convert_U8_to_F32(dst, (const Uint8 *) src, num_samples);
break;
case SDL_AUDIO_S16:
SDL_Convert_S16_to_F32(dst, (const Sint16 *) src, num_samples);
break;
case SDL_AUDIO_S16 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
SDL_Convert_Swap16((Uint16*) dst, (const Uint16*) src, num_samples);
SDL_Convert_S16_to_F32(dst, (const Sint16 *) dst, num_samples);
break;
case SDL_AUDIO_S32:
SDL_Convert_S32_to_F32(dst, (const Sint32 *) src, num_samples);
break;
case SDL_AUDIO_S32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
SDL_Convert_Swap32((Uint32*) dst, (const Uint32*) src, num_samples);
SDL_Convert_S32_to_F32(dst, (const Sint32 *) dst, num_samples);
break;
case SDL_AUDIO_F32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
SDL_Convert_Swap32((Uint32*) dst, (const Uint32*) src, num_samples);
break;
default: SDL_assert(!"Unexpected audio format!"); break;
}
}
void ConvertAudioFromFloat(void *dst, const float *src, int num_samples, SDL_AudioFormat dst_fmt)
{
switch (dst_fmt) {
case SDL_AUDIO_S8:
SDL_Convert_F32_to_S8((Sint8 *) dst, src, num_samples);
break;
case SDL_AUDIO_U8:
SDL_Convert_F32_to_U8((Uint8 *) dst, src, num_samples);
break;
case SDL_AUDIO_S16:
SDL_Convert_F32_to_S16((Sint16 *) dst, src, num_samples);
break;
case SDL_AUDIO_S16 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
SDL_Convert_F32_to_S16((Sint16 *) dst, src, num_samples);
SDL_Convert_Swap16((Uint16*) dst, (const Uint16*) dst, num_samples);
break;
case SDL_AUDIO_S32:
SDL_Convert_F32_to_S32((Sint32 *) dst, src, num_samples);
break;
case SDL_AUDIO_S32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
SDL_Convert_F32_to_S32((Sint32 *) dst, src, num_samples);
SDL_Convert_Swap32((Uint32*) dst, (const Uint32*) dst, num_samples);
break;
case SDL_AUDIO_F32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
SDL_Convert_Swap32((Uint32*) dst, (const Uint32*) src, num_samples);
break;
default: SDL_assert(!"Unexpected audio format!"); break;
}
}
void ConvertAudioSwapEndian(void* dst, const void* src, int num_samples, int bitsize)
{
switch (bitsize) {
case 16: SDL_Convert_Swap16((Uint16*) dst, (const Uint16*) src, num_samples); break;
case 32: SDL_Convert_Swap32((Uint32*) dst, (const Uint32*) src, num_samples); break;
default: SDL_assert(!"Unexpected audio format!"); break;
}
}
void SDL_ChooseAudioConverters(void) void SDL_ChooseAudioConverters(void)
{ {
@ -685,6 +875,26 @@ void SDL_ChooseAudioConverters(void)
return; return;
} }
#define SET_CONVERTER_FUNCS(fntype) \
SDL_Convert_Swap16 = SDL_Convert_Swap16_##fntype; \
SDL_Convert_Swap32 = SDL_Convert_Swap32_##fntype;
#ifdef SDL_SSE4_1_INTRINSICS
if (SDL_HasSSE41()) {
SET_CONVERTER_FUNCS(SSSE3);
} else
#endif
#ifdef SDL_NEON_INTRINSICS
if (SDL_HasNEON()) {
SET_CONVERTER_FUNCS(NEON);
} else
#endif
{
SET_CONVERTER_FUNCS(Scalar);
}
#undef SET_CONVERTER_FUNCS
#define SET_CONVERTER_FUNCS(fntype) \ #define SET_CONVERTER_FUNCS(fntype) \
SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \ SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \
SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \ SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \
@ -694,25 +904,22 @@ void SDL_ChooseAudioConverters(void)
SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \ SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \
SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \ SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \
SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \ SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \
converters_chosen = SDL_TRUE
#ifdef SDL_SSE2_INTRINSICS #ifdef SDL_SSE2_INTRINSICS
if (SDL_HasSSE2()) { if (SDL_HasSSE2()) {
SET_CONVERTER_FUNCS(SSE2); SET_CONVERTER_FUNCS(SSE2);
return; } else
}
#endif #endif
#ifdef SDL_NEON_INTRINSICS #ifdef SDL_NEON_INTRINSICS
if (SDL_HasNEON()) { if (SDL_HasNEON()) {
SET_CONVERTER_FUNCS(NEON); SET_CONVERTER_FUNCS(NEON);
return; } else
}
#endif #endif
{
SET_CONVERTER_FUNCS(Scalar); SET_CONVERTER_FUNCS(Scalar);
}
#undef SET_CONVERTER_FUNCS #undef SET_CONVERTER_FUNCS
SDL_assert(converters_chosen == SDL_TRUE); converters_chosen = SDL_TRUE;
} }

View File

@ -35,16 +35,6 @@
#define LOG_DEBUG_AUDIO_CONVERT(from, to) #define LOG_DEBUG_AUDIO_CONVERT(from, to)
#endif #endif
// These pointers get set during SDL_ChooseAudioConverters() to various SIMD implementations.
extern void (*SDL_Convert_S8_to_F32)(float *dst, const Sint8 *src, int num_samples);
extern void (*SDL_Convert_U8_to_F32)(float *dst, const Uint8 *src, int num_samples);
extern void (*SDL_Convert_S16_to_F32)(float *dst, const Sint16 *src, int num_samples);
extern void (*SDL_Convert_S32_to_F32)(float *dst, const Sint32 *src, int num_samples);
extern void (*SDL_Convert_F32_to_S8)(Sint8 *dst, const float *src, int num_samples);
extern void (*SDL_Convert_F32_to_U8)(Uint8 *dst, const float *src, int num_samples);
extern void (*SDL_Convert_F32_to_S16)(Sint16 *dst, const float *src, int num_samples);
extern void (*SDL_Convert_F32_to_S32)(Sint32 *dst, const float *src, int num_samples);
// !!! FIXME: These are wordy and unlocalized... // !!! FIXME: These are wordy and unlocalized...
#define DEFAULT_OUTPUT_DEVNAME "System audio output device" #define DEFAULT_OUTPUT_DEVNAME "System audio output device"
#define DEFAULT_INPUT_DEVNAME "System audio capture device" #define DEFAULT_INPUT_DEVNAME "System audio capture device"
@ -119,6 +109,10 @@ extern SDL_bool SDL_CaptureAudioThreadIterate(SDL_AudioDevice *device);
extern void SDL_CaptureAudioThreadShutdown(SDL_AudioDevice *device); extern void SDL_CaptureAudioThreadShutdown(SDL_AudioDevice *device);
extern void SDL_AudioThreadFinalize(SDL_AudioDevice *device); extern void SDL_AudioThreadFinalize(SDL_AudioDevice *device);
extern void ConvertAudioToFloat(float *dst, const void *src, int num_samples, SDL_AudioFormat src_fmt);
extern void ConvertAudioFromFloat(void *dst, const float *src, int num_samples, SDL_AudioFormat dst_fmt);
extern void ConvertAudioSwapEndian(void* dst, const void* src, int num_samples, int bitsize);
// this gets used from the audio device threads. It has rules, don't use this if you don't know how to use it! // this gets used from the audio device threads. It has rules, don't use this if you don't know how to use it!
extern void ConvertAudio(int num_frames, const void *src, SDL_AudioFormat src_format, int src_channels, extern void ConvertAudio(int num_frames, const void *src, SDL_AudioFormat src_format, int src_channels,
void *dst, SDL_AudioFormat dst_format, int dst_channels, void* scratch); void *dst, SDL_AudioFormat dst_format, int dst_channels, void* scratch);