F1 and F3 HAL / LL libraries
[betaflight.git] / lib / main / STM32F3 / Drivers / CMSIS / DSP_Lib / Source / FilteringFunctions / arm_fir_decimate_q15.c
blobf3a6a4a2a1131b269c60dd579764edbc02e697de
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_decimate_q15.c
9 *
10 * Description: Q15 FIR Decimator.
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
41 #include "arm_math.h"
43 /**
44 * @ingroup groupFilters
47 /**
48 * @addtogroup FIR_decimate
49 * @{
52 /**
53 * @brief Processing function for the Q15 FIR decimator.
54 * @param[in] *S points to an instance of the Q15 FIR decimator structure.
55 * @param[in] *pSrc points to the block of input data.
56 * @param[out] *pDst points to the location where the output result is written.
57 * @param[in] blockSize number of input samples to process per call.
58 * @return none.
60 * <b>Scaling and Overflow Behavior:</b>
61 * \par
62 * The function is implemented using a 64-bit internal accumulator.
63 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
64 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
65 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
66 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
67 * Lastly, the accumulator is saturated to yield a result in 1.15 format.
69 * \par
70 * Refer to the function <code>arm_fir_decimate_fast_q15()</code> for a faster but less precise implementation of this function for Cortex-M3 and Cortex-M4.
73 #ifndef ARM_MATH_CM0_FAMILY
75 #ifndef UNALIGNED_SUPPORT_DISABLE
77 void arm_fir_decimate_q15(
78 const arm_fir_decimate_instance_q15 * S,
79 q15_t * pSrc,
80 q15_t * pDst,
81 uint32_t blockSize)
83 q15_t *pState = S->pState; /* State pointer */
84 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
85 q15_t *pStateCurnt; /* Points to the current sample of the state */
86 q15_t *px; /* Temporary pointer for state buffer */
87 q15_t *pb; /* Temporary pointer coefficient buffer */
88 q31_t x0, x1, c0, c1; /* Temporary variables to hold state and coefficient values */
89 q63_t sum0; /* Accumulators */
90 q63_t acc0, acc1;
91 q15_t *px0, *px1;
92 uint32_t blkCntN3;
93 uint32_t numTaps = S->numTaps; /* Number of taps */
94 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */
97 /* S->pState buffer contains previous frame (numTaps - 1) samples */
98 /* pStateCurnt points to the location where the new input data should be written */
99 pStateCurnt = S->pState + (numTaps - 1u);
102 /* Total number of output samples to be computed */
103 blkCnt = outBlockSize / 2;
104 blkCntN3 = outBlockSize - (2 * blkCnt);
107 while(blkCnt > 0u)
109 /* Copy decimation factor number of new input samples into the state buffer */
110 i = 2 * S->M;
114 *pStateCurnt++ = *pSrc++;
116 } while(--i);
118 /* Set accumulator to zero */
119 acc0 = 0;
120 acc1 = 0;
122 /* Initialize state pointer */
123 px0 = pState;
125 px1 = pState + S->M;
128 /* Initialize coeff pointer */
129 pb = pCoeffs;
131 /* Loop unrolling. Process 4 taps at a time. */
132 tapCnt = numTaps >> 2;
134 /* Loop over the number of taps. Unroll by a factor of 4.
135 ** Repeat until we've computed numTaps-4 coefficients. */
136 while(tapCnt > 0u)
138 /* Read the Read b[numTaps-1] and b[numTaps-2] coefficients */
139 c0 = *__SIMD32(pb)++;
141 /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */
142 x0 = *__SIMD32(px0)++;
144 x1 = *__SIMD32(px1)++;
146 /* Perform the multiply-accumulate */
147 acc0 = __SMLALD(x0, c0, acc0);
149 acc1 = __SMLALD(x1, c0, acc1);
151 /* Read the b[numTaps-3] and b[numTaps-4] coefficient */
152 c0 = *__SIMD32(pb)++;
154 /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */
155 x0 = *__SIMD32(px0)++;
157 x1 = *__SIMD32(px1)++;
159 /* Perform the multiply-accumulate */
160 acc0 = __SMLALD(x0, c0, acc0);
162 acc1 = __SMLALD(x1, c0, acc1);
164 /* Decrement the loop counter */
165 tapCnt--;
168 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
169 tapCnt = numTaps % 0x4u;
171 while(tapCnt > 0u)
173 /* Read coefficients */
174 c0 = *pb++;
176 /* Fetch 1 state variable */
177 x0 = *px0++;
179 x1 = *px1++;
181 /* Perform the multiply-accumulate */
182 acc0 = __SMLALD(x0, c0, acc0);
183 acc1 = __SMLALD(x1, c0, acc1);
185 /* Decrement the loop counter */
186 tapCnt--;
189 /* Advance the state pointer by the decimation factor
190 * to process the next group of decimation factor number samples */
191 pState = pState + S->M * 2;
193 /* Store filter output, smlad returns the values in 2.14 format */
194 /* so downsacle by 15 to get output in 1.15 */
195 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
196 *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
198 /* Decrement the loop counter */
199 blkCnt--;
204 while(blkCntN3 > 0u)
206 /* Copy decimation factor number of new input samples into the state buffer */
207 i = S->M;
211 *pStateCurnt++ = *pSrc++;
213 } while(--i);
215 /*Set sum to zero */
216 sum0 = 0;
218 /* Initialize state pointer */
219 px = pState;
221 /* Initialize coeff pointer */
222 pb = pCoeffs;
224 /* Loop unrolling. Process 4 taps at a time. */
225 tapCnt = numTaps >> 2;
227 /* Loop over the number of taps. Unroll by a factor of 4.
228 ** Repeat until we've computed numTaps-4 coefficients. */
229 while(tapCnt > 0u)
231 /* Read the Read b[numTaps-1] and b[numTaps-2] coefficients */
232 c0 = *__SIMD32(pb)++;
234 /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */
235 x0 = *__SIMD32(px)++;
237 /* Read the b[numTaps-3] and b[numTaps-4] coefficient */
238 c1 = *__SIMD32(pb)++;
240 /* Perform the multiply-accumulate */
241 sum0 = __SMLALD(x0, c0, sum0);
243 /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */
244 x0 = *__SIMD32(px)++;
246 /* Perform the multiply-accumulate */
247 sum0 = __SMLALD(x0, c1, sum0);
249 /* Decrement the loop counter */
250 tapCnt--;
253 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
254 tapCnt = numTaps % 0x4u;
256 while(tapCnt > 0u)
258 /* Read coefficients */
259 c0 = *pb++;
261 /* Fetch 1 state variable */
262 x0 = *px++;
264 /* Perform the multiply-accumulate */
265 sum0 = __SMLALD(x0, c0, sum0);
267 /* Decrement the loop counter */
268 tapCnt--;
271 /* Advance the state pointer by the decimation factor
272 * to process the next group of decimation factor number samples */
273 pState = pState + S->M;
275 /* Store filter output, smlad returns the values in 2.14 format */
276 /* so downsacle by 15 to get output in 1.15 */
277 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
279 /* Decrement the loop counter */
280 blkCntN3--;
283 /* Processing is complete.
284 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
285 ** This prepares the state buffer for the next function call. */
287 /* Points to the start of the state buffer */
288 pStateCurnt = S->pState;
290 i = (numTaps - 1u) >> 2u;
292 /* copy data */
293 while(i > 0u)
295 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
296 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
298 /* Decrement the loop counter */
299 i--;
302 i = (numTaps - 1u) % 0x04u;
304 /* copy data */
305 while(i > 0u)
307 *pStateCurnt++ = *pState++;
309 /* Decrement the loop counter */
310 i--;
314 #else
317 void arm_fir_decimate_q15(
318 const arm_fir_decimate_instance_q15 * S,
319 q15_t * pSrc,
320 q15_t * pDst,
321 uint32_t blockSize)
323 q15_t *pState = S->pState; /* State pointer */
324 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
325 q15_t *pStateCurnt; /* Points to the current sample of the state */
326 q15_t *px; /* Temporary pointer for state buffer */
327 q15_t *pb; /* Temporary pointer coefficient buffer */
328 q15_t x0, x1, c0; /* Temporary variables to hold state and coefficient values */
329 q63_t sum0; /* Accumulators */
330 q63_t acc0, acc1;
331 q15_t *px0, *px1;
332 uint32_t blkCntN3;
333 uint32_t numTaps = S->numTaps; /* Number of taps */
334 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */
337 /* S->pState buffer contains previous frame (numTaps - 1) samples */
338 /* pStateCurnt points to the location where the new input data should be written */
339 pStateCurnt = S->pState + (numTaps - 1u);
342 /* Total number of output samples to be computed */
343 blkCnt = outBlockSize / 2;
344 blkCntN3 = outBlockSize - (2 * blkCnt);
346 while(blkCnt > 0u)
348 /* Copy decimation factor number of new input samples into the state buffer */
349 i = 2 * S->M;
353 *pStateCurnt++ = *pSrc++;
355 } while(--i);
357 /* Set accumulator to zero */
358 acc0 = 0;
359 acc1 = 0;
361 /* Initialize state pointer */
362 px0 = pState;
364 px1 = pState + S->M;
367 /* Initialize coeff pointer */
368 pb = pCoeffs;
370 /* Loop unrolling. Process 4 taps at a time. */
371 tapCnt = numTaps >> 2;
373 /* Loop over the number of taps. Unroll by a factor of 4.
374 ** Repeat until we've computed numTaps-4 coefficients. */
375 while(tapCnt > 0u)
377 /* Read the Read b[numTaps-1] coefficients */
378 c0 = *pb++;
380 /* Read x[n-numTaps-1] for sample 0 and for sample 1 */
381 x0 = *px0++;
382 x1 = *px1++;
384 /* Perform the multiply-accumulate */
385 acc0 += x0 * c0;
386 acc1 += x1 * c0;
388 /* Read the b[numTaps-2] coefficient */
389 c0 = *pb++;
391 /* Read x[n-numTaps-2] for sample 0 and sample 1 */
392 x0 = *px0++;
393 x1 = *px1++;
395 /* Perform the multiply-accumulate */
396 acc0 += x0 * c0;
397 acc1 += x1 * c0;
399 /* Read the b[numTaps-3] coefficients */
400 c0 = *pb++;
402 /* Read x[n-numTaps-3] for sample 0 and sample 1 */
403 x0 = *px0++;
404 x1 = *px1++;
406 /* Perform the multiply-accumulate */
407 acc0 += x0 * c0;
408 acc1 += x1 * c0;
410 /* Read the b[numTaps-4] coefficient */
411 c0 = *pb++;
413 /* Read x[n-numTaps-4] for sample 0 and sample 1 */
414 x0 = *px0++;
415 x1 = *px1++;
417 /* Perform the multiply-accumulate */
418 acc0 += x0 * c0;
419 acc1 += x1 * c0;
421 /* Decrement the loop counter */
422 tapCnt--;
425 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
426 tapCnt = numTaps % 0x4u;
428 while(tapCnt > 0u)
430 /* Read coefficients */
431 c0 = *pb++;
433 /* Fetch 1 state variable */
434 x0 = *px0++;
435 x1 = *px1++;
437 /* Perform the multiply-accumulate */
438 acc0 += x0 * c0;
439 acc1 += x1 * c0;
441 /* Decrement the loop counter */
442 tapCnt--;
445 /* Advance the state pointer by the decimation factor
446 * to process the next group of decimation factor number samples */
447 pState = pState + S->M * 2;
449 /* Store filter output, smlad returns the values in 2.14 format */
450 /* so downsacle by 15 to get output in 1.15 */
452 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
453 *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
455 /* Decrement the loop counter */
456 blkCnt--;
459 while(blkCntN3 > 0u)
461 /* Copy decimation factor number of new input samples into the state buffer */
462 i = S->M;
466 *pStateCurnt++ = *pSrc++;
468 } while(--i);
470 /*Set sum to zero */
471 sum0 = 0;
473 /* Initialize state pointer */
474 px = pState;
476 /* Initialize coeff pointer */
477 pb = pCoeffs;
479 /* Loop unrolling. Process 4 taps at a time. */
480 tapCnt = numTaps >> 2;
482 /* Loop over the number of taps. Unroll by a factor of 4.
483 ** Repeat until we've computed numTaps-4 coefficients. */
484 while(tapCnt > 0u)
486 /* Read the Read b[numTaps-1] coefficients */
487 c0 = *pb++;
489 /* Read x[n-numTaps-1] and sample */
490 x0 = *px++;
492 /* Perform the multiply-accumulate */
493 sum0 += x0 * c0;
495 /* Read the b[numTaps-2] coefficient */
496 c0 = *pb++;
498 /* Read x[n-numTaps-2] and sample */
499 x0 = *px++;
501 /* Perform the multiply-accumulate */
502 sum0 += x0 * c0;
504 /* Read the b[numTaps-3] coefficients */
505 c0 = *pb++;
507 /* Read x[n-numTaps-3] sample */
508 x0 = *px++;
510 /* Perform the multiply-accumulate */
511 sum0 += x0 * c0;
513 /* Read the b[numTaps-4] coefficient */
514 c0 = *pb++;
516 /* Read x[n-numTaps-4] sample */
517 x0 = *px++;
519 /* Perform the multiply-accumulate */
520 sum0 += x0 * c0;
522 /* Decrement the loop counter */
523 tapCnt--;
526 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
527 tapCnt = numTaps % 0x4u;
529 while(tapCnt > 0u)
531 /* Read coefficients */
532 c0 = *pb++;
534 /* Fetch 1 state variable */
535 x0 = *px++;
537 /* Perform the multiply-accumulate */
538 sum0 += x0 * c0;
540 /* Decrement the loop counter */
541 tapCnt--;
544 /* Advance the state pointer by the decimation factor
545 * to process the next group of decimation factor number samples */
546 pState = pState + S->M;
548 /* Store filter output, smlad returns the values in 2.14 format */
549 /* so downsacle by 15 to get output in 1.15 */
550 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
552 /* Decrement the loop counter */
553 blkCntN3--;
556 /* Processing is complete.
557 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
558 ** This prepares the state buffer for the next function call. */
560 /* Points to the start of the state buffer */
561 pStateCurnt = S->pState;
563 i = (numTaps - 1u) >> 2u;
565 /* copy data */
566 while(i > 0u)
568 *pStateCurnt++ = *pState++;
569 *pStateCurnt++ = *pState++;
570 *pStateCurnt++ = *pState++;
571 *pStateCurnt++ = *pState++;
573 /* Decrement the loop counter */
574 i--;
577 i = (numTaps - 1u) % 0x04u;
579 /* copy data */
580 while(i > 0u)
582 *pStateCurnt++ = *pState++;
584 /* Decrement the loop counter */
585 i--;
590 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
592 #else
595 void arm_fir_decimate_q15(
596 const arm_fir_decimate_instance_q15 * S,
597 q15_t * pSrc,
598 q15_t * pDst,
599 uint32_t blockSize)
601 q15_t *pState = S->pState; /* State pointer */
602 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
603 q15_t *pStateCurnt; /* Points to the current sample of the state */
604 q15_t *px; /* Temporary pointer for state buffer */
605 q15_t *pb; /* Temporary pointer coefficient buffer */
606 q31_t x0, c0; /* Temporary variables to hold state and coefficient values */
607 q63_t sum0; /* Accumulators */
608 uint32_t numTaps = S->numTaps; /* Number of taps */
609 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */
613 /* Run the below code for Cortex-M0 */
615 /* S->pState buffer contains previous frame (numTaps - 1) samples */
616 /* pStateCurnt points to the location where the new input data should be written */
617 pStateCurnt = S->pState + (numTaps - 1u);
619 /* Total number of output samples to be computed */
620 blkCnt = outBlockSize;
622 while(blkCnt > 0u)
624 /* Copy decimation factor number of new input samples into the state buffer */
625 i = S->M;
629 *pStateCurnt++ = *pSrc++;
631 } while(--i);
633 /*Set sum to zero */
634 sum0 = 0;
636 /* Initialize state pointer */
637 px = pState;
639 /* Initialize coeff pointer */
640 pb = pCoeffs;
642 tapCnt = numTaps;
644 while(tapCnt > 0u)
646 /* Read coefficients */
647 c0 = *pb++;
649 /* Fetch 1 state variable */
650 x0 = *px++;
652 /* Perform the multiply-accumulate */
653 sum0 += (q31_t) x0 *c0;
655 /* Decrement the loop counter */
656 tapCnt--;
659 /* Advance the state pointer by the decimation factor
660 * to process the next group of decimation factor number samples */
661 pState = pState + S->M;
663 /*Store filter output , smlad will return the values in 2.14 format */
664 /* so downsacle by 15 to get output in 1.15 */
665 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
667 /* Decrement the loop counter */
668 blkCnt--;
671 /* Processing is complete.
672 ** Now copy the last numTaps - 1 samples to the start of the state buffer.
673 ** This prepares the state buffer for the next function call. */
675 /* Points to the start of the state buffer */
676 pStateCurnt = S->pState;
678 i = numTaps - 1u;
680 /* copy data */
681 while(i > 0u)
683 *pStateCurnt++ = *pState++;
685 /* Decrement the loop counter */
686 i--;
691 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
694 /**
695 * @} end of FIR_decimate group