audio: Resampler optimizations.

- Calculate `j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING` once per loop
  iteration since we use it multiple times.
- Do the left-wing loop in two sections: while `srcframe < 0` and then
  the remaining calculations when `srcframe >= 0`. This bubbles a conditional
  out of every iteration of a tight loop, giving us a boost. We could
  _probably_ do this to the right-wing loop too, but it's less straightforward
  there.
- The real win: Use floats instead of doubles. This almost doubles the speed
  of the entire function on Intel CPUs, and for embedded things without
  hardware-level support for doubles, the speedup is enormous. This in
  theory might reduce audio quality, though, and I had to put a check in
  place to avoid a division-by-zero that we avoided at higher precision, but
  this is likely to be worth keeping for at least the Sony PSP and other
  smaller platforms, if not everyone.
main
Ryan C. Gordon 2022-04-10 13:23:51 -04:00
parent de019568dc
commit 111c3add73
1 changed files with 32 additions and 14 deletions

View File

@ -725,46 +725,64 @@ SDL_ResampleAudio(const int chans, const int inrate, const int outrate,
const float *inbuf, const int inbuflen,
float *outbuf, const int outbuflen)
{
const double finrate = (double) inrate;
const double outtimeincr = 1.0 / ((float) outrate);
const double ratio = ((float) outrate) / ((float) inrate);
/* Note that this used to be double, but it looks like we can get by with float in most cases at
almost twice the speed on Intel processors, and orders of magnitude more
on CPUs that need a software fallback for double calculations. */
typedef float ResampleFloatType;
const ResampleFloatType finrate = (ResampleFloatType) inrate;
const ResampleFloatType outtimeincr = ((ResampleFloatType) 1.0f) / ((ResampleFloatType) outrate);
const ResampleFloatType ratio = ((float) outrate) / ((float) inrate);
const int paddinglen = ResamplerPadding(inrate, outrate);
const int framelen = chans * (int)sizeof (float);
const int inframes = inbuflen / framelen;
const int wantedoutframes = (int) ((inbuflen / framelen) * ratio); /* outbuflen isn't total to write, it's total available. */
const int maxoutframes = outbuflen / framelen;
const int outframes = SDL_min(wantedoutframes, maxoutframes);
ResampleFloatType outtime = 0.0f;
float *dst = outbuf;
double outtime = 0.0;
int i, j, chan;
for (i = 0; i < outframes; i++) {
const int srcindex = (int) (outtime * inrate);
const double intime = ((double) srcindex) / finrate;
const double innexttime = ((double) (srcindex + 1)) / finrate;
const double interpolation1 = 1.0 - ((innexttime - outtime) / (innexttime - intime));
const ResampleFloatType intime = ((ResampleFloatType) srcindex) / finrate;
const ResampleFloatType innexttime = ((ResampleFloatType) (srcindex + 1)) / finrate;
const ResampleFloatType indeltatime = innexttime - intime;
const ResampleFloatType interpolation1 = (indeltatime == 0.0f) ? 1.0f : (1.0f - ((innexttime - outtime) / indeltatime));
const int filterindex1 = (int) (interpolation1 * RESAMPLER_SAMPLES_PER_ZERO_CROSSING);
const double interpolation2 = 1.0 - interpolation1;
const ResampleFloatType interpolation2 = 1.0f - interpolation1;
const int filterindex2 = (int) (interpolation2 * RESAMPLER_SAMPLES_PER_ZERO_CROSSING);
for (chan = 0; chan < chans; chan++) {
float outsample = 0.0f;
/* do this twice to calculate the sample, once for the "left wing" and then same for the right. */
/* !!! FIXME: do both wings in one loop */
for (j = 0; (filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) {
/* Left wing! split the "srcframe < 0" condition out into a preloop. */
for (j = 0; srcindex < j; j++) {
const int jsamples = j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING;
const int srcframe = srcindex - j;
/* !!! FIXME: we can bubble this conditional out of here by doing a pre loop. */
const float insample = (srcframe < 0) ? lpadding[((paddinglen + srcframe) * chans) + chan] : inbuf[(srcframe * chans) + chan];
outsample += (float)(insample * (ResamplerFilter[filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)] + (interpolation1 * ResamplerFilterDifference[filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)])));
const float insample = lpadding[((paddinglen + srcframe) * chans) + chan];
outsample += (float)(insample * (ResamplerFilter[filterindex1 + jsamples] + (interpolation1 * ResamplerFilterDifference[filterindex1 + jsamples])));
}
/* Finish the left wing now that srcframe >= 0 */
for (; (filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) {
const int jsamples = j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING;
const int srcframe = srcindex - j;
const float insample = inbuf[(srcframe * chans) + chan];
outsample += (float)(insample * (ResamplerFilter[filterindex1 + jsamples] + (interpolation1 * ResamplerFilterDifference[filterindex1 + jsamples])));
}
/* Do the right wing! */
for (j = 0; (filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) {
const int jsamples = j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING;
const int srcframe = srcindex + 1 + j;
/* !!! FIXME: we can bubble this conditional out of here by doing a post loop. */
const float insample = (srcframe >= inframes) ? rpadding[((srcframe - inframes) * chans) + chan] : inbuf[(srcframe * chans) + chan];
outsample += (float)(insample * (ResamplerFilter[filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)] + (interpolation2 * ResamplerFilterDifference[filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)])));
outsample += (float)(insample * (ResamplerFilter[filterindex2 + jsamples] + (interpolation2 * ResamplerFilterDifference[filterindex2 + jsamples])));
}
*(dst++) = outsample;
}