Refactored ConvertAudio, added SIMD endian-swapping

main
Brick 2024-04-05 17:16:17 +01:00 committed by Sam Lantinga
parent e9e9424b9a
commit 33f28d6143
3 changed files with 236 additions and 110 deletions

View File

@ -132,59 +132,6 @@ static void SDL_TARGETING("sse") SDL_ConvertMonoToStereo_SSE(float *dst, const f
// Include the autogenerated channel converters...
#include "SDL_audio_channel_converters.h"
static void AudioConvertByteswap(void *dst, const void *src, int num_samples, int bitsize)
{
#if DEBUG_AUDIO_CONVERT
SDL_Log("SDL_AUDIO_CONVERT: Converting %d-bit byte order", bitsize);
#endif
switch (bitsize) {
#define CASESWAP(b) \
case b: { \
const Uint##b *tsrc = (const Uint##b *)src; \
Uint##b *tdst = (Uint##b *)dst; \
for (int i = 0; i < num_samples; i++) { \
tdst[i] = SDL_Swap##b(tsrc[i]); \
} \
break; \
}
CASESWAP(16);
CASESWAP(32);
#undef CASESWAP
default:
SDL_assert(!"unhandled byteswap datatype!");
break;
}
}
static void AudioConvertToFloat(float *dst, const void *src, int num_samples, SDL_AudioFormat src_fmt)
{
// Endian conversion is handled separately
switch (src_fmt & ~SDL_AUDIO_MASK_BIG_ENDIAN) {
case SDL_AUDIO_S8: SDL_Convert_S8_to_F32(dst, (const Sint8 *) src, num_samples); break;
case SDL_AUDIO_U8: SDL_Convert_U8_to_F32(dst, (const Uint8 *) src, num_samples); break;
case SDL_AUDIO_S16LE: SDL_Convert_S16_to_F32(dst, (const Sint16 *) src, num_samples); break;
case SDL_AUDIO_S32LE: SDL_Convert_S32_to_F32(dst, (const Sint32 *) src, num_samples); break;
default: SDL_assert(!"Unexpected audio format!"); break;
}
}
static void AudioConvertFromFloat(void *dst, const float *src, int num_samples, SDL_AudioFormat dst_fmt)
{
// Endian conversion is handled separately
switch (dst_fmt & ~SDL_AUDIO_MASK_BIG_ENDIAN) {
case SDL_AUDIO_S8: SDL_Convert_F32_to_S8((Sint8 *) dst, src, num_samples); break;
case SDL_AUDIO_U8: SDL_Convert_F32_to_U8((Uint8 *) dst, src, num_samples); break;
case SDL_AUDIO_S16LE: SDL_Convert_F32_to_S16((Sint16 *) dst, src, num_samples); break;
case SDL_AUDIO_S32LE: SDL_Convert_F32_to_S32((Sint32 *) dst, src, num_samples); break;
default: SDL_assert(!"Unexpected audio format!"); break;
}
}
static SDL_bool SDL_IsSupportedAudioFormat(const SDL_AudioFormat fmt)
{
switch (fmt) {
@ -210,7 +157,6 @@ static SDL_bool SDL_IsSupportedChannelCount(const int channels)
return ((channels >= 1) && (channels <= 8));
}
// This does type and channel conversions _but not resampling_ (resampling happens in SDL_AudioStream).
// This does not check parameter validity, (beyond asserts), it expects you did that already!
// All of this has to function as if src==dst==scratch (conversion in-place), but as a convenience
@ -266,14 +212,8 @@ void ConvertAudio(int num_frames, const void *src, SDL_AudioFormat src_format, i
}
// just a byteswap needed?
if ((src_format & ~SDL_AUDIO_MASK_BIG_ENDIAN) == (dst_format & ~SDL_AUDIO_MASK_BIG_ENDIAN)) {
if (src_bitsize == 8) {
if (src != dst) {
SDL_memcpy(dst, src, num_frames * dst_sample_frame_size);
}
return; // nothing to do, it's a 1-byte format.
}
AudioConvertByteswap(dst, src, num_frames * src_channels, src_bitsize);
if ((src_format ^ dst_format) == SDL_AUDIO_MASK_BIG_ENDIAN) {
ConvertAudioSwapEndian(dst, src, num_frames * src_channels, src_bitsize);
return; // all done.
}
}
@ -282,23 +222,14 @@ void ConvertAudio(int num_frames, const void *src, SDL_AudioFormat src_format, i
scratch = dst;
}
const SDL_bool srcbyteswap = (SDL_AUDIO_ISBIGENDIAN(src_format) != 0) == (SDL_BYTEORDER == SDL_LIL_ENDIAN) && (src_bitsize > 8);
const SDL_bool srcconvert = !SDL_AUDIO_ISFLOAT(src_format);
const SDL_bool srcconvert = src_format != SDL_AUDIO_F32;
const SDL_bool channelconvert = src_channels != dst_channels;
const SDL_bool dstconvert = !SDL_AUDIO_ISFLOAT(dst_format);
const SDL_bool dstbyteswap = (SDL_AUDIO_ISBIGENDIAN(dst_format) != 0) == (SDL_BYTEORDER == SDL_LIL_ENDIAN) && (dst_bitsize > 8);
// make sure we're in native byte order.
if (srcbyteswap) {
// No point writing straight to dst. If we only need a byteswap, we wouldn't be bere.
AudioConvertByteswap(scratch, src, num_frames * src_channels, src_bitsize);
src = scratch;
}
const SDL_bool dstconvert = dst_format != SDL_AUDIO_F32;
// get us to float format.
if (srcconvert) {
void* buf = (channelconvert || dstconvert || dstbyteswap) ? scratch : dst;
AudioConvertToFloat((float *) buf, src, num_frames * src_channels, src_format);
void* buf = (channelconvert || dstconvert) ? scratch : dst;
ConvertAudioToFloat((float *) buf, src, num_frames * src_channels, src_format);
src = buf;
}
@ -330,7 +261,7 @@ void ConvertAudio(int num_frames, const void *src, SDL_AudioFormat src_format, i
channel_converter = override;
}
void* buf = (dstconvert || dstbyteswap) ? scratch : dst;
void* buf = dstconvert ? scratch : dst;
channel_converter((float *) buf, (const float *) src, num_frames);
src = buf;
}
@ -339,16 +270,10 @@ void ConvertAudio(int num_frames, const void *src, SDL_AudioFormat src_format, i
// Move to final data type.
if (dstconvert) {
AudioConvertFromFloat(dst, (const float *) src, num_frames * dst_channels, dst_format);
ConvertAudioFromFloat(dst, (const float *) src, num_frames * dst_channels, dst_format);
src = dst;
}
// make sure we're in final byte order.
if (dstbyteswap) {
AudioConvertByteswap(dst, src, num_frames * dst_channels, dst_bitsize);
src = dst; // we've written to dst, future work will convert in-place.
}
SDL_assert(src == dst); // if we got here, we _had_ to have done _something_. Otherwise, we should have memcpy'd!
}

View File

@ -181,6 +181,24 @@ static void SDL_Convert_F32_to_S32_Scalar(Sint32 *dst, const float *src, int num
#undef SIGNMASK
static void SDL_Convert_Swap16_Scalar(Uint16* dst, const Uint16* src, int num_samples)
{
int i;
for (i = 0; i < num_samples; ++i) {
dst[i] = SDL_Swap16(src[i]);
}
}
static void SDL_Convert_Swap32_Scalar(Uint32* dst, const Uint32* src, int num_samples)
{
int i;
for (i = 0; i < num_samples; ++i) {
dst[i] = SDL_Swap32(src[i]);
}
}
// end fallback scalar converters
// Convert forwards, when sizeof(*src) >= sizeof(*dst)
@ -463,6 +481,51 @@ static void SDL_TARGETING("sse2") SDL_Convert_F32_to_S32_SSE2(Sint32 *dst, const
}
#endif
// FIXME: SDL doesn't have SSSE3 detection, so use the next one up
#ifdef SDL_SSE4_1_INTRINSICS
static void SDL_TARGETING("ssse3") SDL_Convert_Swap16_SSSE3(Uint16* dst, const Uint16* src, int num_samples)
{
const __m128i shuffle = _mm_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
CONVERT_16_FWD({
dst[i] = SDL_Swap16(src[i]);
}, {
__m128i ints0 = _mm_loadu_si128((const __m128i*)&src[i]);
__m128i ints1 = _mm_loadu_si128((const __m128i*)&src[i + 8]);
ints0 = _mm_shuffle_epi8(ints0, shuffle);
ints1 = _mm_shuffle_epi8(ints1, shuffle);
_mm_store_si128((__m128i*)&dst[i], ints0);
_mm_store_si128((__m128i*)&dst[i + 8], ints1);
})
}
static void SDL_TARGETING("ssse3") SDL_Convert_Swap32_SSSE3(Uint32* dst, const Uint32* src, int num_samples)
{
const __m128i shuffle = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
CONVERT_16_FWD({
dst[i] = SDL_Swap32(src[i]);
}, {
__m128i ints0 = _mm_loadu_si128((const __m128i*)&src[i]);
__m128i ints1 = _mm_loadu_si128((const __m128i*)&src[i + 4]);
__m128i ints2 = _mm_loadu_si128((const __m128i*)&src[i + 8]);
__m128i ints3 = _mm_loadu_si128((const __m128i*)&src[i + 12]);
ints0 = _mm_shuffle_epi8(ints0, shuffle);
ints1 = _mm_shuffle_epi8(ints1, shuffle);
ints2 = _mm_shuffle_epi8(ints2, shuffle);
ints3 = _mm_shuffle_epi8(ints3, shuffle);
_mm_store_si128((__m128i*)&dst[i], ints0);
_mm_store_si128((__m128i*)&dst[i + 4], ints1);
_mm_store_si128((__m128i*)&dst[i + 8], ints2);
_mm_store_si128((__m128i*)&dst[i + 12], ints3);
})
}
#endif
#ifdef SDL_NEON_INTRINSICS
static void SDL_Convert_S8_to_F32_NEON(float *dst, const Sint8 *src, int num_samples)
{
@ -666,17 +729,144 @@ static void SDL_Convert_F32_to_S32_NEON(Sint32 *dst, const float *src, int num_s
vst1q_s32(&dst[i + 12], ints3);
})
}
static void SDL_Convert_Swap16_NEON(Uint16* dst, const Uint16* src, int num_samples)
{
CONVERT_16_FWD({
dst[i] = SDL_Swap16(src[i]);
}, {
uint8x16_t ints0 = vld1q_u8((const Uint8*)&src[i]);
uint8x16_t ints1 = vld1q_u8((const Uint8*)&src[i + 8]);
ints0 = vrev16q_u8(ints0);
ints1 = vrev16q_u8(ints1);
vst1q_u8((Uint8*)&dst[i], ints0);
vst1q_u8((Uint8*)&dst[i + 8], ints1);
})
}
static void SDL_Convert_Swap32_NEON(Uint32* dst, const Uint32* src, int num_samples)
{
CONVERT_16_FWD({
dst[i] = SDL_Swap32(src[i]);
}, {
uint8x16_t ints0 = vld1q_u8((const Uint8*)&src[i]);
uint8x16_t ints1 = vld1q_u8((const Uint8*)&src[i + 4]);
uint8x16_t ints2 = vld1q_u8((const Uint8*)&src[i + 8]);
uint8x16_t ints3 = vld1q_u8((const Uint8*)&src[i + 12]);
ints0 = vrev32q_u8(ints0);
ints1 = vrev32q_u8(ints1);
ints2 = vrev32q_u8(ints2);
ints3 = vrev32q_u8(ints3);
vst1q_u8((Uint8*)&dst[i], ints0);
vst1q_u8((Uint8*)&dst[i + 4], ints1);
vst1q_u8((Uint8*)&dst[i + 8], ints2);
vst1q_u8((Uint8*)&dst[i + 12], ints3);
})
}
#endif
#undef CONVERT_16_FWD
#undef CONVERT_16_REV
// Function pointers set to a CPU-specific implementation.
void (*SDL_Convert_S8_to_F32)(float *dst, const Sint8 *src, int num_samples) = NULL;
void (*SDL_Convert_U8_to_F32)(float *dst, const Uint8 *src, int num_samples) = NULL;
void (*SDL_Convert_S16_to_F32)(float *dst, const Sint16 *src, int num_samples) = NULL;
void (*SDL_Convert_S32_to_F32)(float *dst, const Sint32 *src, int num_samples) = NULL;
void (*SDL_Convert_F32_to_S8)(Sint8 *dst, const float *src, int num_samples) = NULL;
void (*SDL_Convert_F32_to_U8)(Uint8 *dst, const float *src, int num_samples) = NULL;
void (*SDL_Convert_F32_to_S16)(Sint16 *dst, const float *src, int num_samples) = NULL;
void (*SDL_Convert_F32_to_S32)(Sint32 *dst, const float *src, int num_samples) = NULL;
static void (*SDL_Convert_S8_to_F32)(float *dst, const Sint8 *src, int num_samples) = NULL;
static void (*SDL_Convert_U8_to_F32)(float *dst, const Uint8 *src, int num_samples) = NULL;
static void (*SDL_Convert_S16_to_F32)(float *dst, const Sint16 *src, int num_samples) = NULL;
static void (*SDL_Convert_S32_to_F32)(float *dst, const Sint32 *src, int num_samples) = NULL;
static void (*SDL_Convert_F32_to_S8)(Sint8 *dst, const float *src, int num_samples) = NULL;
static void (*SDL_Convert_F32_to_U8)(Uint8 *dst, const float *src, int num_samples) = NULL;
static void (*SDL_Convert_F32_to_S16)(Sint16 *dst, const float *src, int num_samples) = NULL;
static void (*SDL_Convert_F32_to_S32)(Sint32 *dst, const float *src, int num_samples) = NULL;
static void (*SDL_Convert_Swap16)(Uint16* dst, const Uint16* src, int num_samples) = NULL;
static void (*SDL_Convert_Swap32)(Uint32* dst, const Uint32* src, int num_samples) = NULL;
void ConvertAudioToFloat(float *dst, const void *src, int num_samples, SDL_AudioFormat src_fmt)
{
switch (src_fmt) {
case SDL_AUDIO_S8:
SDL_Convert_S8_to_F32(dst, (const Sint8 *) src, num_samples);
break;
case SDL_AUDIO_U8:
SDL_Convert_U8_to_F32(dst, (const Uint8 *) src, num_samples);
break;
case SDL_AUDIO_S16:
SDL_Convert_S16_to_F32(dst, (const Sint16 *) src, num_samples);
break;
case SDL_AUDIO_S16 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
SDL_Convert_Swap16((Uint16*) dst, (const Uint16*) src, num_samples);
SDL_Convert_S16_to_F32(dst, (const Sint16 *) dst, num_samples);
break;
case SDL_AUDIO_S32:
SDL_Convert_S32_to_F32(dst, (const Sint32 *) src, num_samples);
break;
case SDL_AUDIO_S32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
SDL_Convert_Swap32((Uint32*) dst, (const Uint32*) src, num_samples);
SDL_Convert_S32_to_F32(dst, (const Sint32 *) dst, num_samples);
break;
case SDL_AUDIO_F32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
SDL_Convert_Swap32((Uint32*) dst, (const Uint32*) src, num_samples);
break;
default: SDL_assert(!"Unexpected audio format!"); break;
}
}
void ConvertAudioFromFloat(void *dst, const float *src, int num_samples, SDL_AudioFormat dst_fmt)
{
switch (dst_fmt) {
case SDL_AUDIO_S8:
SDL_Convert_F32_to_S8((Sint8 *) dst, src, num_samples);
break;
case SDL_AUDIO_U8:
SDL_Convert_F32_to_U8((Uint8 *) dst, src, num_samples);
break;
case SDL_AUDIO_S16:
SDL_Convert_F32_to_S16((Sint16 *) dst, src, num_samples);
break;
case SDL_AUDIO_S16 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
SDL_Convert_F32_to_S16((Sint16 *) dst, src, num_samples);
SDL_Convert_Swap16((Uint16*) dst, (const Uint16*) dst, num_samples);
break;
case SDL_AUDIO_S32:
SDL_Convert_F32_to_S32((Sint32 *) dst, src, num_samples);
break;
case SDL_AUDIO_S32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
SDL_Convert_F32_to_S32((Sint32 *) dst, src, num_samples);
SDL_Convert_Swap32((Uint32*) dst, (const Uint32*) dst, num_samples);
break;
case SDL_AUDIO_F32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
SDL_Convert_Swap32((Uint32*) dst, (const Uint32*) src, num_samples);
break;
default: SDL_assert(!"Unexpected audio format!"); break;
}
}
void ConvertAudioSwapEndian(void* dst, const void* src, int num_samples, int bitsize)
{
switch (bitsize) {
case 16: SDL_Convert_Swap16((Uint16*) dst, (const Uint16*) src, num_samples); break;
case 32: SDL_Convert_Swap32((Uint32*) dst, (const Uint32*) src, num_samples); break;
default: SDL_assert(!"Unexpected audio format!"); break;
}
}
void SDL_ChooseAudioConverters(void)
{
@ -685,6 +875,26 @@ void SDL_ChooseAudioConverters(void)
return;
}
#define SET_CONVERTER_FUNCS(fntype) \
SDL_Convert_Swap16 = SDL_Convert_Swap16_##fntype; \
SDL_Convert_Swap32 = SDL_Convert_Swap32_##fntype;
#ifdef SDL_SSE4_1_INTRINSICS
if (SDL_HasSSE41()) {
SET_CONVERTER_FUNCS(SSSE3);
} else
#endif
#ifdef SDL_NEON_INTRINSICS
if (SDL_HasNEON()) {
SET_CONVERTER_FUNCS(NEON);
} else
#endif
{
SET_CONVERTER_FUNCS(Scalar);
}
#undef SET_CONVERTER_FUNCS
#define SET_CONVERTER_FUNCS(fntype) \
SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \
SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \
@ -694,25 +904,22 @@ void SDL_ChooseAudioConverters(void)
SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \
SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \
SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \
converters_chosen = SDL_TRUE
#ifdef SDL_SSE2_INTRINSICS
if (SDL_HasSSE2()) {
SET_CONVERTER_FUNCS(SSE2);
return;
}
} else
#endif
#ifdef SDL_NEON_INTRINSICS
if (SDL_HasNEON()) {
SET_CONVERTER_FUNCS(NEON);
return;
}
} else
#endif
{
SET_CONVERTER_FUNCS(Scalar);
}
#undef SET_CONVERTER_FUNCS
SDL_assert(converters_chosen == SDL_TRUE);
converters_chosen = SDL_TRUE;
}

View File

@ -35,16 +35,6 @@
#define LOG_DEBUG_AUDIO_CONVERT(from, to)
#endif
// These pointers get set during SDL_ChooseAudioConverters() to various SIMD implementations.
extern void (*SDL_Convert_S8_to_F32)(float *dst, const Sint8 *src, int num_samples);
extern void (*SDL_Convert_U8_to_F32)(float *dst, const Uint8 *src, int num_samples);
extern void (*SDL_Convert_S16_to_F32)(float *dst, const Sint16 *src, int num_samples);
extern void (*SDL_Convert_S32_to_F32)(float *dst, const Sint32 *src, int num_samples);
extern void (*SDL_Convert_F32_to_S8)(Sint8 *dst, const float *src, int num_samples);
extern void (*SDL_Convert_F32_to_U8)(Uint8 *dst, const float *src, int num_samples);
extern void (*SDL_Convert_F32_to_S16)(Sint16 *dst, const float *src, int num_samples);
extern void (*SDL_Convert_F32_to_S32)(Sint32 *dst, const float *src, int num_samples);
// !!! FIXME: These are wordy and unlocalized...
#define DEFAULT_OUTPUT_DEVNAME "System audio output device"
#define DEFAULT_INPUT_DEVNAME "System audio capture device"
@ -119,6 +109,10 @@ extern SDL_bool SDL_CaptureAudioThreadIterate(SDL_AudioDevice *device);
extern void SDL_CaptureAudioThreadShutdown(SDL_AudioDevice *device);
extern void SDL_AudioThreadFinalize(SDL_AudioDevice *device);
extern void ConvertAudioToFloat(float *dst, const void *src, int num_samples, SDL_AudioFormat src_fmt);
extern void ConvertAudioFromFloat(void *dst, const float *src, int num_samples, SDL_AudioFormat dst_fmt);
extern void ConvertAudioSwapEndian(void* dst, const void* src, int num_samples, int bitsize);
// this gets used from the audio device threads. It has rules, don't use this if you don't know how to use it!
extern void ConvertAudio(int num_frames, const void *src, SDL_AudioFormat src_format, int src_channels,
void *dst, SDL_AudioFormat dst_format, int dst_channels, void* scratch);