Add an SSE-enhanced HRTF mixer using intrinsics
[openal-soft/openal-hmr.git] / Alc / mixer.c
blobf5d9bb096d1ceaf8ce156127c6bc0d8be3bc610c
1 /**
2 * OpenAL cross platform audio library
3 * Copyright (C) 1999-2007 by authors.
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
18 * Or go to http://www.gnu.org/copyleft/lgpl.html
21 #include "config.h"
23 #include <math.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <ctype.h>
27 #include <assert.h>
28 #ifdef HAVE_XMMINTRIN_H
29 #include <xmmintrin.h>
30 #endif
31 #ifdef HAVE_ARM_NEON_H
32 #include <arm_neon.h>
33 #endif
35 #include "alMain.h"
36 #include "AL/al.h"
37 #include "AL/alc.h"
38 #include "alSource.h"
39 #include "alBuffer.h"
40 #include "alListener.h"
41 #include "alAuxEffectSlot.h"
42 #include "alu.h"
43 #include "bs2b.h"
46 static __inline ALfloat point32(const ALfloat *vals, ALint step, ALint frac)
47 { return vals[0]; (void)step; (void)frac; }
48 static __inline ALfloat lerp32(const ALfloat *vals, ALint step, ALint frac)
49 { return lerp(vals[0], vals[step], frac * (1.0f/FRACTIONONE)); }
50 static __inline ALfloat cubic32(const ALfloat *vals, ALint step, ALint frac)
51 { return cubic(vals[-step], vals[0], vals[step], vals[step+step],
52 frac * (1.0f/FRACTIONONE)); }
54 #ifdef __GNUC__
55 #define LIKELY(x) __builtin_expect(!!(x), 1)
56 #define UNLIKELY(x) __builtin_expect(!!(x), 0)
57 #else
58 #define LIKELY(x) (x)
59 #define UNLIKELY(x) (x)
60 #endif
62 static __inline void ApplyCoeffsC(ALuint Offset, ALfloat (*RESTRICT Values)[2],
63 ALfloat (*RESTRICT Coeffs)[2],
64 ALfloat left, ALfloat right)
66 ALuint c;
67 for(c = 0;c < HRIR_LENGTH;c++)
69 const ALuint off = (Offset+c)&HRIR_MASK;
70 Values[off][0] += Coeffs[c][0] * left;
71 Values[off][1] += Coeffs[c][1] * right;
75 #define DECL_TEMPLATE(sampler,acc) \
76 static void MixDirect_Hrtf_##sampler##_##acc( \
77 ALsource *Source, ALCdevice *Device, DirectParams *params, \
78 const ALfloat *RESTRICT data, ALuint srcfrac, \
79 ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize) \
80 { \
81 const ALuint NumChannels = Source->NumChannels; \
82 const ALint *RESTRICT DelayStep = params->Hrtf.DelayStep; \
83 ALfloat (*RESTRICT DryBuffer)[MaxChannels]; \
84 ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks; \
85 ALfloat (*RESTRICT CoeffStep)[2] = params->Hrtf.CoeffStep; \
86 ALuint pos, frac; \
87 FILTER *DryFilter; \
88 ALuint BufferIdx; \
89 ALuint increment; \
90 ALfloat value; \
91 ALuint i, c; \
93 increment = Source->Params.Step; \
95 DryBuffer = Device->DryBuffer; \
96 ClickRemoval = Device->ClickRemoval; \
97 PendingClicks = Device->PendingClicks; \
98 DryFilter = &params->iirFilter; \
100 for(i = 0;i < NumChannels;i++) \
102 ALfloat (*RESTRICT TargetCoeffs)[2] = params->Hrtf.Coeffs[i]; \
103 ALuint *RESTRICT TargetDelay = params->Hrtf.Delay[i]; \
104 ALfloat *RESTRICT History = Source->Hrtf.History[i]; \
105 ALfloat (*RESTRICT Values)[2] = Source->Hrtf.Values[i]; \
106 ALint Counter = maxu(Source->Hrtf.Counter, OutPos) - OutPos; \
107 ALuint Offset = Source->Hrtf.Offset + OutPos; \
108 ALfloat Coeffs[HRIR_LENGTH][2]; \
109 ALuint Delay[2]; \
110 ALfloat left, right; \
112 pos = 0; \
113 frac = srcfrac; \
115 for(c = 0;c < HRIR_LENGTH;c++) \
117 Coeffs[c][0] = TargetCoeffs[c][0] - (CoeffStep[c][0]*Counter); \
118 Coeffs[c][1] = TargetCoeffs[c][1] - (CoeffStep[c][1]*Counter); \
121 Delay[0] = TargetDelay[0] - (DelayStep[0]*Counter); \
122 Delay[1] = TargetDelay[1] - (DelayStep[1]*Counter); \
124 if(LIKELY(OutPos == 0)) \
126 value = sampler(data + pos*NumChannels + i, NumChannels, frac); \
127 value = lpFilter2PC(DryFilter, i, value); \
129 History[Offset&SRC_HISTORY_MASK] = value; \
130 left = lerp(History[(Offset-(Delay[0]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK], \
131 History[(Offset-(Delay[0]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK], \
132 (Delay[0]&HRTFDELAY_MASK)/(ALfloat)HRTFDELAY_FRACONE); \
133 right = lerp(History[(Offset-(Delay[1]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK], \
134 History[(Offset-(Delay[1]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK], \
135 (Delay[1]&HRTFDELAY_MASK)/(ALfloat)HRTFDELAY_FRACONE); \
137 ClickRemoval[FrontLeft] -= Values[(Offset+1)&HRIR_MASK][0] + \
138 Coeffs[0][0] * left; \
139 ClickRemoval[FrontRight] -= Values[(Offset+1)&HRIR_MASK][1] + \
140 Coeffs[0][1] * right; \
142 for(BufferIdx = 0;BufferIdx < BufferSize && Counter > 0;BufferIdx++) \
144 value = sampler(data + pos*NumChannels + i, NumChannels, frac); \
145 value = lpFilter2P(DryFilter, i, value); \
147 History[Offset&SRC_HISTORY_MASK] = value; \
148 left = lerp(History[(Offset-(Delay[0]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK], \
149 History[(Offset-(Delay[0]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK], \
150 (Delay[0]&HRTFDELAY_MASK)/(ALfloat)HRTFDELAY_FRACONE); \
151 right = lerp(History[(Offset-(Delay[1]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK], \
152 History[(Offset-(Delay[1]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK], \
153 (Delay[1]&HRTFDELAY_MASK)/(ALfloat)HRTFDELAY_FRACONE); \
155 Delay[0] += DelayStep[0]; \
156 Delay[1] += DelayStep[1]; \
158 Values[Offset&HRIR_MASK][0] = 0.0f; \
159 Values[Offset&HRIR_MASK][1] = 0.0f; \
160 Offset++; \
162 for(c = 0;c < HRIR_LENGTH;c++) \
164 const ALuint off = (Offset+c)&HRIR_MASK; \
165 Values[off][0] += Coeffs[c][0] * left; \
166 Values[off][1] += Coeffs[c][1] * right; \
167 Coeffs[c][0] += CoeffStep[c][0]; \
168 Coeffs[c][1] += CoeffStep[c][1]; \
171 DryBuffer[OutPos][FrontLeft] += Values[Offset&HRIR_MASK][0]; \
172 DryBuffer[OutPos][FrontRight] += Values[Offset&HRIR_MASK][1]; \
174 frac += increment; \
175 pos += frac>>FRACTIONBITS; \
176 frac &= FRACTIONMASK; \
177 OutPos++; \
178 Counter--; \
181 Delay[0] >>= HRTFDELAY_BITS; \
182 Delay[1] >>= HRTFDELAY_BITS; \
183 for(;BufferIdx < BufferSize;BufferIdx++) \
185 value = sampler(data + pos*NumChannels + i, NumChannels, frac); \
186 value = lpFilter2P(DryFilter, i, value); \
188 History[Offset&SRC_HISTORY_MASK] = value; \
189 left = History[(Offset-Delay[0])&SRC_HISTORY_MASK]; \
190 right = History[(Offset-Delay[1])&SRC_HISTORY_MASK]; \
192 Values[Offset&HRIR_MASK][0] = 0.0f; \
193 Values[Offset&HRIR_MASK][1] = 0.0f; \
194 Offset++; \
196 ApplyCoeffs##acc(Offset, Values, Coeffs, left, right); \
197 DryBuffer[OutPos][FrontLeft] += Values[Offset&HRIR_MASK][0]; \
198 DryBuffer[OutPos][FrontRight] += Values[Offset&HRIR_MASK][1]; \
200 frac += increment; \
201 pos += frac>>FRACTIONBITS; \
202 frac &= FRACTIONMASK; \
203 OutPos++; \
205 if(LIKELY(OutPos == SamplesToDo)) \
207 value = sampler(data + pos*NumChannels + i, NumChannels, frac); \
208 value = lpFilter2PC(DryFilter, i, value); \
210 History[Offset&SRC_HISTORY_MASK] = value; \
211 left = History[(Offset-Delay[0])&SRC_HISTORY_MASK]; \
212 right = History[(Offset-Delay[1])&SRC_HISTORY_MASK]; \
214 PendingClicks[FrontLeft] += Values[(Offset+1)&HRIR_MASK][0] + \
215 Coeffs[0][0] * left; \
216 PendingClicks[FrontRight] += Values[(Offset+1)&HRIR_MASK][1] + \
217 Coeffs[0][1] * right; \
219 OutPos -= BufferSize; \
223 DECL_TEMPLATE(point32, C)
224 DECL_TEMPLATE(lerp32, C)
225 DECL_TEMPLATE(cubic32, C)
227 #ifdef HAVE_XMMINTRIN_H
229 static __inline void ApplyCoeffsSSE(ALuint Offset, ALfloat (*RESTRICT Values)[2],
230 ALfloat (*RESTRICT Coeffs)[2],
231 ALfloat left, ALfloat right)
233 const __m128 lrlr = { left, right, left, right };
234 ALuint c;
235 for(c = 0;c < HRIR_LENGTH;c += 2)
237 const ALuint o0 = (Offset++)&HRIR_MASK;
238 const ALuint o1 = (Offset++)&HRIR_MASK;
239 __m128 vals = { 0.0f, 0.0f, 0.0f, 0.0f };
240 __m128 coeffs = { 0.0f, 0.0f, 0.0f, 0.0f };
242 vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]);
243 vals = _mm_loadh_pi(vals, (__m64*)&Values[o1][0]);
244 coeffs = _mm_loadl_pi(coeffs, (__m64*)&Coeffs[c ][0]);
245 coeffs = _mm_loadh_pi(coeffs, (__m64*)&Coeffs[c+1][0]);
247 vals = _mm_add_ps(vals, _mm_mul_ps(coeffs, lrlr));
249 _mm_storel_pi((__m64*)&Values[o0][0], vals);
250 _mm_storeh_pi((__m64*)&Values[o1][0], vals);
254 DECL_TEMPLATE(point32, SSE)
255 DECL_TEMPLATE(lerp32, SSE)
256 DECL_TEMPLATE(cubic32, SSE)
258 #endif
260 #ifdef HAVE_ARM_NEON_H
262 static __inline void ApplyCoeffsNeon(ALuint Offset, ALfloat (*RESTRICT Values)[2],
263 ALfloat (*RESTRICT Coeffs)[2],
264 ALfloat left, ALfloat right)
266 ALuint c;
267 float32x4_t leftright4;
269 float32x2_t leftright2 = vdup_n_f32(0.0);
270 leftright2 = vset_lane_f32(left, leftright2, 0);
271 leftright2 = vset_lane_f32(right, leftright2, 1);
272 leftright4 = vcombine_f32(leftright2, leftright2);
274 for(c = 0;c < HRIR_LENGTH;c += 2)
276 const ALuint o0 = (Offset+c)&HRIR_MASK;
277 const ALuint o1 = (o0+1)&HRIR_MASK;
278 float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]),
279 vld1_f32((float32_t*)&Values[o1][0]));
280 float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]);
282 vals = vmlaq_f32(vals, coefs, leftright4);
284 vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals));
285 vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals));
289 DECL_TEMPLATE(point32, Neon)
290 DECL_TEMPLATE(lerp32, Neon)
291 DECL_TEMPLATE(cubic32, Neon)
293 #endif
295 #undef DECL_TEMPLATE
298 #define DECL_TEMPLATE(sampler) \
299 static void MixDirect_##sampler(ALsource *Source, ALCdevice *Device, \
300 DirectParams *params, const ALfloat *RESTRICT data, ALuint srcfrac, \
301 ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize) \
303 const ALuint NumChannels = Source->NumChannels; \
304 ALfloat (*RESTRICT DryBuffer)[MaxChannels]; \
305 ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks; \
306 ALfloat DrySend[MaxChannels]; \
307 FILTER *DryFilter; \
308 ALuint pos, frac; \
309 ALuint BufferIdx; \
310 ALuint increment; \
311 ALfloat value; \
312 ALuint i, c; \
314 increment = Source->Params.Step; \
316 DryBuffer = Device->DryBuffer; \
317 ClickRemoval = Device->ClickRemoval; \
318 PendingClicks = Device->PendingClicks; \
319 DryFilter = &params->iirFilter; \
321 for(i = 0;i < NumChannels;i++) \
323 for(c = 0;c < MaxChannels;c++) \
324 DrySend[c] = params->Gains[i][c]; \
326 pos = 0; \
327 frac = srcfrac; \
329 if(OutPos == 0) \
331 value = sampler(data + pos*NumChannels + i, NumChannels, frac); \
333 value = lpFilter2PC(DryFilter, i, value); \
334 for(c = 0;c < MaxChannels;c++) \
335 ClickRemoval[c] -= value*DrySend[c]; \
337 for(BufferIdx = 0;BufferIdx < BufferSize;BufferIdx++) \
339 value = sampler(data + pos*NumChannels + i, NumChannels, frac); \
341 value = lpFilter2P(DryFilter, i, value); \
342 for(c = 0;c < MaxChannels;c++) \
343 DryBuffer[OutPos][c] += value*DrySend[c]; \
345 frac += increment; \
346 pos += frac>>FRACTIONBITS; \
347 frac &= FRACTIONMASK; \
348 OutPos++; \
350 if(OutPos == SamplesToDo) \
352 value = sampler(data + pos*NumChannels + i, NumChannels, frac); \
354 value = lpFilter2PC(DryFilter, i, value); \
355 for(c = 0;c < MaxChannels;c++) \
356 PendingClicks[c] += value*DrySend[c]; \
358 OutPos -= BufferSize; \
362 DECL_TEMPLATE(point32)
363 DECL_TEMPLATE(lerp32)
364 DECL_TEMPLATE(cubic32)
366 #undef DECL_TEMPLATE
368 #define DECL_TEMPLATE(sampler) \
369 static void MixSend_##sampler(ALsource *Source, ALuint sendidx, \
370 SendParams *params, const ALfloat *RESTRICT data, ALuint srcfrac, \
371 ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize) \
373 const ALuint NumChannels = Source->NumChannels; \
374 ALeffectslot *Slot; \
375 ALfloat WetSend; \
376 ALfloat *WetBuffer; \
377 ALfloat *WetClickRemoval; \
378 ALfloat *WetPendingClicks; \
379 FILTER *WetFilter; \
380 ALuint pos, frac; \
381 ALuint BufferIdx; \
382 ALuint increment; \
383 ALfloat value; \
384 ALuint i; \
386 increment = Source->Params.Step; \
388 Slot = Source->Params.Slot[sendidx]; \
389 WetBuffer = Slot->WetBuffer; \
390 WetClickRemoval = Slot->ClickRemoval; \
391 WetPendingClicks = Slot->PendingClicks; \
392 WetFilter = &params->iirFilter; \
393 WetSend = params->Gain; \
395 for(i = 0;i < NumChannels;i++) \
397 pos = 0; \
398 frac = srcfrac; \
400 if(OutPos == 0) \
402 value = sampler(data + pos*NumChannels + i, NumChannels, frac); \
404 value = lpFilter2PC(WetFilter, i, value); \
405 WetClickRemoval[0] -= value * WetSend; \
407 for(BufferIdx = 0;BufferIdx < BufferSize;BufferIdx++) \
409 value = sampler(data + pos*NumChannels + i, NumChannels, frac); \
411 value = lpFilter2P(WetFilter, i, value); \
412 WetBuffer[OutPos] += value * WetSend; \
414 frac += increment; \
415 pos += frac>>FRACTIONBITS; \
416 frac &= FRACTIONMASK; \
417 OutPos++; \
419 if(OutPos == SamplesToDo) \
421 value = sampler(data + pos*NumChannels + i, NumChannels, frac); \
423 value = lpFilter2PC(WetFilter, i, value); \
424 WetPendingClicks[0] += value * WetSend; \
426 OutPos -= BufferSize; \
430 DECL_TEMPLATE(point32)
431 DECL_TEMPLATE(lerp32)
432 DECL_TEMPLATE(cubic32)
434 #undef DECL_TEMPLATE
437 DryMixerFunc SelectDirectMixer(enum Resampler Resampler)
439 switch(Resampler)
441 case PointResampler:
442 return MixDirect_point32;
443 case LinearResampler:
444 return MixDirect_lerp32;
445 case CubicResampler:
446 return MixDirect_cubic32;
447 case ResamplerMax:
448 break;
450 return NULL;
453 DryMixerFunc SelectHrtfMixer(enum Resampler Resampler)
455 switch(Resampler)
457 case PointResampler:
458 #ifdef HAVE_XMMINTRIN_H
459 if((CPUCapFlags&CPU_CAP_SSE))
460 return MixDirect_Hrtf_point32_SSE;
461 #endif
462 #ifdef HAVE_ARM_NEON_H
463 if((CPUCapFlags&CPU_CAP_NEON))
464 return MixDirect_Hrtf_point32_Neon;
465 #endif
466 return MixDirect_Hrtf_point32_C;
467 case LinearResampler:
468 #ifdef HAVE_XMMINTRIN_H
469 if((CPUCapFlags&CPU_CAP_SSE))
470 return MixDirect_Hrtf_lerp32_SSE;
471 #endif
472 #ifdef HAVE_ARM_NEON_H
473 if((CPUCapFlags&CPU_CAP_NEON))
474 return MixDirect_Hrtf_lerp32_Neon;
475 #endif
476 return MixDirect_Hrtf_lerp32_C;
477 case CubicResampler:
478 #ifdef HAVE_XMMINTRIN_H
479 if((CPUCapFlags&CPU_CAP_SSE))
480 return MixDirect_Hrtf_cubic32_SSE;
481 #endif
482 #ifdef HAVE_ARM_NEON_H
483 if((CPUCapFlags&CPU_CAP_NEON))
484 return MixDirect_Hrtf_cubic32_Neon;
485 #endif
486 return MixDirect_Hrtf_cubic32_C;
487 case ResamplerMax:
488 break;
490 return NULL;
493 WetMixerFunc SelectSendMixer(enum Resampler Resampler)
495 switch(Resampler)
497 case PointResampler:
498 return MixSend_point32;
499 case LinearResampler:
500 return MixSend_lerp32;
501 case CubicResampler:
502 return MixSend_cubic32;
503 case ResamplerMax:
504 break;
506 return NULL;
510 static __inline ALfloat Sample_ALbyte(ALbyte val)
511 { return val * (1.0f/127.0f); }
513 static __inline ALfloat Sample_ALshort(ALshort val)
514 { return val * (1.0f/32767.0f); }
516 static __inline ALfloat Sample_ALfloat(ALfloat val)
517 { return val; }
519 #define DECL_TEMPLATE(T) \
520 static void Load_##T(ALfloat *dst, const T *src, ALuint samples) \
522 ALuint i; \
523 for(i = 0;i < samples;i++) \
524 dst[i] = Sample_##T(src[i]); \
527 DECL_TEMPLATE(ALbyte)
528 DECL_TEMPLATE(ALshort)
529 DECL_TEMPLATE(ALfloat)
531 #undef DECL_TEMPLATE
533 static void LoadStack(ALfloat *dst, const ALvoid *src, enum FmtType srctype, ALuint samples)
535 switch(srctype)
537 case FmtByte:
538 Load_ALbyte(dst, src, samples);
539 break;
540 case FmtShort:
541 Load_ALshort(dst, src, samples);
542 break;
543 case FmtFloat:
544 Load_ALfloat(dst, src, samples);
545 break;
549 static void SilenceStack(ALfloat *dst, ALuint samples)
551 ALuint i;
552 for(i = 0;i < samples;i++)
553 dst[i] = 0.0f;
557 ALvoid MixSource(ALsource *Source, ALCdevice *Device, ALuint SamplesToDo)
559 ALbufferlistitem *BufferListItem;
560 ALuint DataPosInt, DataPosFrac;
561 ALuint BuffersPlayed;
562 ALboolean Looping;
563 ALuint increment;
564 enum Resampler Resampler;
565 ALenum State;
566 ALuint OutPos;
567 ALuint NumChannels;
568 ALuint FrameSize;
569 ALint64 DataSize64;
570 ALuint i;
572 /* Get source info */
573 State = Source->state;
574 BuffersPlayed = Source->BuffersPlayed;
575 DataPosInt = Source->position;
576 DataPosFrac = Source->position_fraction;
577 Looping = Source->Looping;
578 increment = Source->Params.Step;
579 Resampler = Source->Resampler;
580 NumChannels = Source->NumChannels;
581 FrameSize = NumChannels * Source->SampleSize;
583 /* Get current buffer queue item */
584 BufferListItem = Source->queue;
585 for(i = 0;i < BuffersPlayed;i++)
586 BufferListItem = BufferListItem->next;
588 OutPos = 0;
589 do {
590 const ALuint BufferPrePadding = ResamplerPrePadding[Resampler];
591 const ALuint BufferPadding = ResamplerPadding[Resampler];
592 ALfloat StackData[STACK_DATA_SIZE/sizeof(ALfloat)];
593 ALfloat *SrcData = StackData;
594 ALuint SrcDataSize = 0;
595 ALuint BufferSize;
597 /* Figure out how many buffer bytes will be needed */
598 DataSize64 = SamplesToDo-OutPos+1;
599 DataSize64 *= increment;
600 DataSize64 += DataPosFrac+FRACTIONMASK;
601 DataSize64 >>= FRACTIONBITS;
602 DataSize64 += BufferPadding+BufferPrePadding;
603 DataSize64 *= NumChannels;
605 BufferSize = (ALuint)mini64(DataSize64, STACK_DATA_SIZE/sizeof(ALfloat));
606 BufferSize /= NumChannels;
608 if(Source->SourceType == AL_STATIC)
610 const ALbuffer *ALBuffer = Source->queue->buffer;
611 const ALubyte *Data = ALBuffer->data;
612 ALuint DataSize;
613 ALuint pos;
615 /* If current pos is beyond the loop range, do not loop */
616 if(Looping == AL_FALSE || DataPosInt >= (ALuint)ALBuffer->LoopEnd)
618 Looping = AL_FALSE;
620 if(DataPosInt >= BufferPrePadding)
621 pos = DataPosInt - BufferPrePadding;
622 else
624 DataSize = BufferPrePadding - DataPosInt;
625 DataSize = minu(BufferSize, DataSize);
627 SilenceStack(&SrcData[SrcDataSize*NumChannels],
628 DataSize*NumChannels);
629 SrcDataSize += DataSize;
630 BufferSize -= DataSize;
632 pos = 0;
635 /* Copy what's left to play in the source buffer, and clear the
636 * rest of the temp buffer */
637 DataSize = ALBuffer->SampleLen - pos;
638 DataSize = minu(BufferSize, DataSize);
640 LoadStack(&SrcData[SrcDataSize*NumChannels], &Data[pos*FrameSize],
641 ALBuffer->FmtType, DataSize*NumChannels);
642 SrcDataSize += DataSize;
643 BufferSize -= DataSize;
645 SilenceStack(&SrcData[SrcDataSize*NumChannels],
646 BufferSize*NumChannels);
647 SrcDataSize += BufferSize;
648 BufferSize -= BufferSize;
650 else
652 ALuint LoopStart = ALBuffer->LoopStart;
653 ALuint LoopEnd = ALBuffer->LoopEnd;
655 if(DataPosInt >= LoopStart)
657 pos = DataPosInt-LoopStart;
658 while(pos < BufferPrePadding)
659 pos += LoopEnd-LoopStart;
660 pos -= BufferPrePadding;
661 pos += LoopStart;
663 else if(DataPosInt >= BufferPrePadding)
664 pos = DataPosInt - BufferPrePadding;
665 else
667 DataSize = BufferPrePadding - DataPosInt;
668 DataSize = minu(BufferSize, DataSize);
670 SilenceStack(&SrcData[SrcDataSize*NumChannels], DataSize*NumChannels);
671 SrcDataSize += DataSize;
672 BufferSize -= DataSize;
674 pos = 0;
677 /* Copy what's left of this loop iteration, then copy repeats
678 * of the loop section */
679 DataSize = LoopEnd - pos;
680 DataSize = minu(BufferSize, DataSize);
682 LoadStack(&SrcData[SrcDataSize*NumChannels], &Data[pos*FrameSize],
683 ALBuffer->FmtType, DataSize*NumChannels);
684 SrcDataSize += DataSize;
685 BufferSize -= DataSize;
687 DataSize = LoopEnd-LoopStart;
688 while(BufferSize > 0)
690 DataSize = minu(BufferSize, DataSize);
692 LoadStack(&SrcData[SrcDataSize*NumChannels], &Data[LoopStart*FrameSize],
693 ALBuffer->FmtType, DataSize*NumChannels);
694 SrcDataSize += DataSize;
695 BufferSize -= DataSize;
699 else
701 /* Crawl the buffer queue to fill in the temp buffer */
702 ALbufferlistitem *tmpiter = BufferListItem;
703 ALuint pos;
705 if(DataPosInt >= BufferPrePadding)
706 pos = DataPosInt - BufferPrePadding;
707 else
709 pos = BufferPrePadding - DataPosInt;
710 while(pos > 0)
712 if(!tmpiter->prev && !Looping)
714 ALuint DataSize = minu(BufferSize, pos);
716 SilenceStack(&SrcData[SrcDataSize*NumChannels], DataSize*NumChannels);
717 SrcDataSize += DataSize;
718 BufferSize -= DataSize;
720 pos = 0;
721 break;
724 if(tmpiter->prev)
725 tmpiter = tmpiter->prev;
726 else
728 while(tmpiter->next)
729 tmpiter = tmpiter->next;
732 if(tmpiter->buffer)
734 if((ALuint)tmpiter->buffer->SampleLen > pos)
736 pos = tmpiter->buffer->SampleLen - pos;
737 break;
739 pos -= tmpiter->buffer->SampleLen;
744 while(tmpiter && BufferSize > 0)
746 const ALbuffer *ALBuffer;
747 if((ALBuffer=tmpiter->buffer) != NULL)
749 const ALubyte *Data = ALBuffer->data;
750 ALuint DataSize = ALBuffer->SampleLen;
752 /* Skip the data already played */
753 if(DataSize <= pos)
754 pos -= DataSize;
755 else
757 Data += pos*FrameSize;
758 DataSize -= pos;
759 pos -= pos;
761 DataSize = minu(BufferSize, DataSize);
762 LoadStack(&SrcData[SrcDataSize*NumChannels], Data,
763 ALBuffer->FmtType, DataSize*NumChannels);
764 SrcDataSize += DataSize;
765 BufferSize -= DataSize;
768 tmpiter = tmpiter->next;
769 if(!tmpiter && Looping)
770 tmpiter = Source->queue;
771 else if(!tmpiter)
773 SilenceStack(&SrcData[SrcDataSize*NumChannels], BufferSize*NumChannels);
774 SrcDataSize += BufferSize;
775 BufferSize -= BufferSize;
780 /* Figure out how many samples we can mix. */
781 DataSize64 = SrcDataSize;
782 DataSize64 -= BufferPadding+BufferPrePadding;
783 DataSize64 <<= FRACTIONBITS;
784 DataSize64 -= increment;
785 DataSize64 -= DataPosFrac;
787 BufferSize = (ALuint)((DataSize64+(increment-1)) / increment);
788 BufferSize = minu(BufferSize, (SamplesToDo-OutPos));
790 SrcData += BufferPrePadding*NumChannels;
791 Source->Params.DryMix(Source, Device, &Source->Params.Direct,
792 SrcData, DataPosFrac,
793 OutPos, SamplesToDo, BufferSize);
794 for(i = 0;i < Device->NumAuxSends;i++)
796 if(!Source->Params.Slot[i])
797 continue;
798 Source->Params.WetMix(Source, i, &Source->Params.Send[i],
799 SrcData, DataPosFrac,
800 OutPos, SamplesToDo, BufferSize);
802 for(i = 0;i < BufferSize;i++)
804 DataPosFrac += increment;
805 DataPosInt += DataPosFrac>>FRACTIONBITS;
806 DataPosFrac &= FRACTIONMASK;
807 OutPos++;
810 /* Handle looping sources */
811 while(1)
813 const ALbuffer *ALBuffer;
814 ALuint DataSize = 0;
815 ALuint LoopStart = 0;
816 ALuint LoopEnd = 0;
818 if((ALBuffer=BufferListItem->buffer) != NULL)
820 DataSize = ALBuffer->SampleLen;
821 LoopStart = ALBuffer->LoopStart;
822 LoopEnd = ALBuffer->LoopEnd;
823 if(LoopEnd > DataPosInt)
824 break;
827 if(Looping && Source->SourceType == AL_STATIC)
829 DataPosInt = ((DataPosInt-LoopStart)%(LoopEnd-LoopStart)) + LoopStart;
830 break;
833 if(DataSize > DataPosInt)
834 break;
836 if(BufferListItem->next)
838 BufferListItem = BufferListItem->next;
839 BuffersPlayed++;
841 else if(Looping)
843 BufferListItem = Source->queue;
844 BuffersPlayed = 0;
846 else
848 State = AL_STOPPED;
849 BufferListItem = Source->queue;
850 BuffersPlayed = Source->BuffersInQueue;
851 DataPosInt = 0;
852 DataPosFrac = 0;
853 break;
856 DataPosInt -= DataSize;
858 } while(State == AL_PLAYING && OutPos < SamplesToDo);
860 /* Update source info */
861 Source->state = State;
862 Source->BuffersPlayed = BuffersPlayed;
863 Source->position = DataPosInt;
864 Source->position_fraction = DataPosFrac;
865 Source->Hrtf.Offset += OutPos;
866 if(State == AL_PLAYING)
868 Source->Hrtf.Counter = maxu(Source->Hrtf.Counter, OutPos) - OutPos;
869 Source->Hrtf.Moving = AL_TRUE;
871 else
873 Source->Hrtf.Counter = 0;
874 Source->Hrtf.Moving = AL_FALSE;