]> git.gir.st - tmk_keyboard.git/blob - tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_fir_decimate_q15.c
Merge commit '1fe4406f374291ab2e86e95a97341fd9c475fcb8'
[tmk_keyboard.git] / tmk_core / tool / mbed / mbed-sdk / libraries / dsp / cmsis_dsp / FilteringFunctions / arm_fir_decimate_q15.c
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
3 *
4 * $Date: 17. January 2013
5 * $Revision: V1.4.1
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_decimate_q15.c
9 *
10 * Description: Q15 FIR Decimator.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40
41 #include "arm_math.h"
42
43 /**
44 * @ingroup groupFilters
45 */
46
47 /**
48 * @addtogroup FIR_decimate
49 * @{
50 */
51
52 /**
53 * @brief Processing function for the Q15 FIR decimator.
54 * @param[in] *S points to an instance of the Q15 FIR decimator structure.
55 * @param[in] *pSrc points to the block of input data.
56 * @param[out] *pDst points to the location where the output result is written.
57 * @param[in] blockSize number of input samples to process per call.
58 * @return none.
59 *
60 * <b>Scaling and Overflow Behavior:</b>
61 * \par
62 * The function is implemented using a 64-bit internal accumulator.
63 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
64 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
65 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
66 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
67 * Lastly, the accumulator is saturated to yield a result in 1.15 format.
68 *
69 * \par
70 * Refer to the function <code>arm_fir_decimate_fast_q15()</code> for a faster but less precise implementation of this function for Cortex-M3 and Cortex-M4.
71 */
72
73 #ifndef ARM_MATH_CM0_FAMILY
74
75 #ifndef UNALIGNED_SUPPORT_DISABLE
76
77 void arm_fir_decimate_q15(
78 const arm_fir_decimate_instance_q15 * S,
79 q15_t * pSrc,
80 q15_t * pDst,
81 uint32_t blockSize)
82 {
83 q15_t *pState = S->pState; /* State pointer */
84 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
85 q15_t *pStateCurnt; /* Points to the current sample of the state */
86 q15_t *px; /* Temporary pointer for state buffer */
87 q15_t *pb; /* Temporary pointer coefficient buffer */
88 q31_t x0, x1, c0, c1; /* Temporary variables to hold state and coefficient values */
89 q63_t sum0; /* Accumulators */
90 q63_t acc0, acc1;
91 q15_t *px0, *px1;
92 uint32_t blkCntN3;
93 uint32_t numTaps = S->numTaps; /* Number of taps */
94 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */
95
96
97 /* S->pState buffer contains previous frame (numTaps - 1) samples */
98 /* pStateCurnt points to the location where the new input data should be written */
99 pStateCurnt = S->pState + (numTaps - 1u);
100
101
102 /* Total number of output samples to be computed */
103 blkCnt = outBlockSize / 2;
104 blkCntN3 = outBlockSize - (2 * blkCnt);
105
106
107 while(blkCnt > 0u)
108 {
109 /* Copy decimation factor number of new input samples into the state buffer */
110 i = 2 * S->M;
111
112 do
113 {
114 *pStateCurnt++ = *pSrc++;
115
116 } while(--i);
117
118 /* Set accumulator to zero */
119 acc0 = 0;
120 acc1 = 0;
121
122 /* Initialize state pointer */
123 px0 = pState;
124
125 px1 = pState + S->M;
126
127
128 /* Initialize coeff pointer */
129 pb = pCoeffs;
130
131 /* Loop unrolling. Process 4 taps at a time. */
132 tapCnt = numTaps >> 2;
133
134 /* Loop over the number of taps. Unroll by a factor of 4.
135 ** Repeat until we've computed numTaps-4 coefficients. */
136 while(tapCnt > 0u)
137 {
138 /* Read the Read b[numTaps-1] and b[numTaps-2] coefficients */
139 c0 = *__SIMD32(pb)++;
140
141 /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */
142 x0 = *__SIMD32(px0)++;
143
144 x1 = *__SIMD32(px1)++;
145
146 /* Perform the multiply-accumulate */
147 acc0 = __SMLALD(x0, c0, acc0);
148
149 acc1 = __SMLALD(x1, c0, acc1);
150
151 /* Read the b[numTaps-3] and b[numTaps-4] coefficient */
152 c0 = *__SIMD32(pb)++;
153
154 /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */
155 x0 = *__SIMD32(px0)++;
156
157 x1 = *__SIMD32(px1)++;
158
159 /* Perform the multiply-accumulate */
160 acc0 = __SMLALD(x0, c0, acc0);
161
162 acc1 = __SMLALD(x1, c0, acc1);
163
164 /* Decrement the loop counter */
165 tapCnt--;
166 }
167
168 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
169 tapCnt = numTaps % 0x4u;
170
171 while(tapCnt > 0u)
172 {
173 /* Read coefficients */
174 c0 = *pb++;
175
176 /* Fetch 1 state variable */
177 x0 = *px0++;
178
179 x1 = *px1++;
180
181 /* Perform the multiply-accumulate */
182 acc0 = __SMLALD(x0, c0, acc0);
183 acc1 = __SMLALD(x1, c0, acc1);
184
185 /* Decrement the loop counter */
186 tapCnt--;
187 }
188
189 /* Advance the state pointer by the decimation factor
190 * to process the next group of decimation factor number samples */
191 pState = pState + S->M * 2;
192
193 /* Store filter output, smlad returns the values in 2.14 format */
194 /* so downsacle by 15 to get output in 1.15 */
195 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
196 *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
197
198 /* Decrement the loop counter */
199 blkCnt--;
200 }
201
202
203
204 while(blkCntN3 > 0u)
205 {
206 /* Copy decimation factor number of new input samples into the state buffer */
207 i = S->M;
208
209 do
210 {
211 *pStateCurnt++ = *pSrc++;
212
213 } while(--i);
214
215 /*Set sum to zero */
216 sum0 = 0;
217
218 /* Initialize state pointer */
219 px = pState;
220
221 /* Initialize coeff pointer */
222 pb = pCoeffs;
223
224 /* Loop unrolling. Process 4 taps at a time. */
225 tapCnt = numTaps >> 2;
226
227 /* Loop over the number of taps. Unroll by a factor of 4.
228 ** Repeat until we've computed numTaps-4 coefficients. */
229 while(tapCnt > 0u)
230 {
231 /* Read the Read b[numTaps-1] and b[numTaps-2] coefficients */
232 c0 = *__SIMD32(pb)++;
233
234 /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */
235 x0 = *__SIMD32(px)++;
236
237 /* Read the b[numTaps-3] and b[numTaps-4] coefficient */
238 c1 = *__SIMD32(pb)++;
239
240 /* Perform the multiply-accumulate */
241 sum0 = __SMLALD(x0, c0, sum0);
242
243 /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */
244 x0 = *__SIMD32(px)++;
245
246 /* Perform the multiply-accumulate */
247 sum0 = __SMLALD(x0, c1, sum0);
248
249 /* Decrement the loop counter */
250 tapCnt--;
251 }
252
253 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
254 tapCnt = numTaps % 0x4u;
255
256 while(tapCnt > 0u)
257 {
258 /* Read coefficients */
259 c0 = *pb++;
260
261 /* Fetch 1 state variable */
262 x0 = *px++;
263
264 /* Perform the multiply-accumulate */
265 sum0 = __SMLALD(x0, c0, sum0);
266
267 /* Decrement the loop counter */
268 tapCnt--;
269 }
270
271 /* Advance the state pointer by the decimation factor
272 * to process the next group of decimation factor number samples */
273 pState = pState + S->M;
274
275 /* Store filter output, smlad returns the values in 2.14 format */
276 /* so downsacle by 15 to get output in 1.15 */
277 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
278
279 /* Decrement the loop counter */
280 blkCntN3--;
281 }
282
283 /* Processing is complete.
284 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
285 ** This prepares the state buffer for the next function call. */
286
287 /* Points to the start of the state buffer */
288 pStateCurnt = S->pState;
289
290 i = (numTaps - 1u) >> 2u;
291
292 /* copy data */
293 while(i > 0u)
294 {
295 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
296 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
297
298 /* Decrement the loop counter */
299 i--;
300 }
301
302 i = (numTaps - 1u) % 0x04u;
303
304 /* copy data */
305 while(i > 0u)
306 {
307 *pStateCurnt++ = *pState++;
308
309 /* Decrement the loop counter */
310 i--;
311 }
312 }
313
314 #else
315
316
317 void arm_fir_decimate_q15(
318 const arm_fir_decimate_instance_q15 * S,
319 q15_t * pSrc,
320 q15_t * pDst,
321 uint32_t blockSize)
322 {
323 q15_t *pState = S->pState; /* State pointer */
324 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
325 q15_t *pStateCurnt; /* Points to the current sample of the state */
326 q15_t *px; /* Temporary pointer for state buffer */
327 q15_t *pb; /* Temporary pointer coefficient buffer */
328 q15_t x0, x1, c0; /* Temporary variables to hold state and coefficient values */
329 q63_t sum0; /* Accumulators */
330 q63_t acc0, acc1;
331 q15_t *px0, *px1;
332 uint32_t blkCntN3;
333 uint32_t numTaps = S->numTaps; /* Number of taps */
334 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */
335
336
337 /* S->pState buffer contains previous frame (numTaps - 1) samples */
338 /* pStateCurnt points to the location where the new input data should be written */
339 pStateCurnt = S->pState + (numTaps - 1u);
340
341
342 /* Total number of output samples to be computed */
343 blkCnt = outBlockSize / 2;
344 blkCntN3 = outBlockSize - (2 * blkCnt);
345
346 while(blkCnt > 0u)
347 {
348 /* Copy decimation factor number of new input samples into the state buffer */
349 i = 2 * S->M;
350
351 do
352 {
353 *pStateCurnt++ = *pSrc++;
354
355 } while(--i);
356
357 /* Set accumulator to zero */
358 acc0 = 0;
359 acc1 = 0;
360
361 /* Initialize state pointer */
362 px0 = pState;
363
364 px1 = pState + S->M;
365
366
367 /* Initialize coeff pointer */
368 pb = pCoeffs;
369
370 /* Loop unrolling. Process 4 taps at a time. */
371 tapCnt = numTaps >> 2;
372
373 /* Loop over the number of taps. Unroll by a factor of 4.
374 ** Repeat until we've computed numTaps-4 coefficients. */
375 while(tapCnt > 0u)
376 {
377 /* Read the Read b[numTaps-1] coefficients */
378 c0 = *pb++;
379
380 /* Read x[n-numTaps-1] for sample 0 and for sample 1 */
381 x0 = *px0++;
382 x1 = *px1++;
383
384 /* Perform the multiply-accumulate */
385 acc0 += x0 * c0;
386 acc1 += x1 * c0;
387
388 /* Read the b[numTaps-2] coefficient */
389 c0 = *pb++;
390
391 /* Read x[n-numTaps-2] for sample 0 and sample 1 */
392 x0 = *px0++;
393 x1 = *px1++;
394
395 /* Perform the multiply-accumulate */
396 acc0 += x0 * c0;
397 acc1 += x1 * c0;
398
399 /* Read the b[numTaps-3] coefficients */
400 c0 = *pb++;
401
402 /* Read x[n-numTaps-3] for sample 0 and sample 1 */
403 x0 = *px0++;
404 x1 = *px1++;
405
406 /* Perform the multiply-accumulate */
407 acc0 += x0 * c0;
408 acc1 += x1 * c0;
409
410 /* Read the b[numTaps-4] coefficient */
411 c0 = *pb++;
412
413 /* Read x[n-numTaps-4] for sample 0 and sample 1 */
414 x0 = *px0++;
415 x1 = *px1++;
416
417 /* Perform the multiply-accumulate */
418 acc0 += x0 * c0;
419 acc1 += x1 * c0;
420
421 /* Decrement the loop counter */
422 tapCnt--;
423 }
424
425 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
426 tapCnt = numTaps % 0x4u;
427
428 while(tapCnt > 0u)
429 {
430 /* Read coefficients */
431 c0 = *pb++;
432
433 /* Fetch 1 state variable */
434 x0 = *px0++;
435 x1 = *px1++;
436
437 /* Perform the multiply-accumulate */
438 acc0 += x0 * c0;
439 acc1 += x1 * c0;
440
441 /* Decrement the loop counter */
442 tapCnt--;
443 }
444
445 /* Advance the state pointer by the decimation factor
446 * to process the next group of decimation factor number samples */
447 pState = pState + S->M * 2;
448
449 /* Store filter output, smlad returns the values in 2.14 format */
450 /* so downsacle by 15 to get output in 1.15 */
451
452 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
453 *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
454
455 /* Decrement the loop counter */
456 blkCnt--;
457 }
458
459 while(blkCntN3 > 0u)
460 {
461 /* Copy decimation factor number of new input samples into the state buffer */
462 i = S->M;
463
464 do
465 {
466 *pStateCurnt++ = *pSrc++;
467
468 } while(--i);
469
470 /*Set sum to zero */
471 sum0 = 0;
472
473 /* Initialize state pointer */
474 px = pState;
475
476 /* Initialize coeff pointer */
477 pb = pCoeffs;
478
479 /* Loop unrolling. Process 4 taps at a time. */
480 tapCnt = numTaps >> 2;
481
482 /* Loop over the number of taps. Unroll by a factor of 4.
483 ** Repeat until we've computed numTaps-4 coefficients. */
484 while(tapCnt > 0u)
485 {
486 /* Read the Read b[numTaps-1] coefficients */
487 c0 = *pb++;
488
489 /* Read x[n-numTaps-1] and sample */
490 x0 = *px++;
491
492 /* Perform the multiply-accumulate */
493 sum0 += x0 * c0;
494
495 /* Read the b[numTaps-2] coefficient */
496 c0 = *pb++;
497
498 /* Read x[n-numTaps-2] and sample */
499 x0 = *px++;
500
501 /* Perform the multiply-accumulate */
502 sum0 += x0 * c0;
503
504 /* Read the b[numTaps-3] coefficients */
505 c0 = *pb++;
506
507 /* Read x[n-numTaps-3] sample */
508 x0 = *px++;
509
510 /* Perform the multiply-accumulate */
511 sum0 += x0 * c0;
512
513 /* Read the b[numTaps-4] coefficient */
514 c0 = *pb++;
515
516 /* Read x[n-numTaps-4] sample */
517 x0 = *px++;
518
519 /* Perform the multiply-accumulate */
520 sum0 += x0 * c0;
521
522 /* Decrement the loop counter */
523 tapCnt--;
524 }
525
526 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
527 tapCnt = numTaps % 0x4u;
528
529 while(tapCnt > 0u)
530 {
531 /* Read coefficients */
532 c0 = *pb++;
533
534 /* Fetch 1 state variable */
535 x0 = *px++;
536
537 /* Perform the multiply-accumulate */
538 sum0 += x0 * c0;
539
540 /* Decrement the loop counter */
541 tapCnt--;
542 }
543
544 /* Advance the state pointer by the decimation factor
545 * to process the next group of decimation factor number samples */
546 pState = pState + S->M;
547
548 /* Store filter output, smlad returns the values in 2.14 format */
549 /* so downsacle by 15 to get output in 1.15 */
550 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
551
552 /* Decrement the loop counter */
553 blkCntN3--;
554 }
555
556 /* Processing is complete.
557 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
558 ** This prepares the state buffer for the next function call. */
559
560 /* Points to the start of the state buffer */
561 pStateCurnt = S->pState;
562
563 i = (numTaps - 1u) >> 2u;
564
565 /* copy data */
566 while(i > 0u)
567 {
568 *pStateCurnt++ = *pState++;
569 *pStateCurnt++ = *pState++;
570 *pStateCurnt++ = *pState++;
571 *pStateCurnt++ = *pState++;
572
573 /* Decrement the loop counter */
574 i--;
575 }
576
577 i = (numTaps - 1u) % 0x04u;
578
579 /* copy data */
580 while(i > 0u)
581 {
582 *pStateCurnt++ = *pState++;
583
584 /* Decrement the loop counter */
585 i--;
586 }
587 }
588
589
590 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
591
592 #else
593
594
595 void arm_fir_decimate_q15(
596 const arm_fir_decimate_instance_q15 * S,
597 q15_t * pSrc,
598 q15_t * pDst,
599 uint32_t blockSize)
600 {
601 q15_t *pState = S->pState; /* State pointer */
602 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
603 q15_t *pStateCurnt; /* Points to the current sample of the state */
604 q15_t *px; /* Temporary pointer for state buffer */
605 q15_t *pb; /* Temporary pointer coefficient buffer */
606 q31_t x0, c0; /* Temporary variables to hold state and coefficient values */
607 q63_t sum0; /* Accumulators */
608 uint32_t numTaps = S->numTaps; /* Number of taps */
609 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */
610
611
612
613 /* Run the below code for Cortex-M0 */
614
615 /* S->pState buffer contains previous frame (numTaps - 1) samples */
616 /* pStateCurnt points to the location where the new input data should be written */
617 pStateCurnt = S->pState + (numTaps - 1u);
618
619 /* Total number of output samples to be computed */
620 blkCnt = outBlockSize;
621
622 while(blkCnt > 0u)
623 {
624 /* Copy decimation factor number of new input samples into the state buffer */
625 i = S->M;
626
627 do
628 {
629 *pStateCurnt++ = *pSrc++;
630
631 } while(--i);
632
633 /*Set sum to zero */
634 sum0 = 0;
635
636 /* Initialize state pointer */
637 px = pState;
638
639 /* Initialize coeff pointer */
640 pb = pCoeffs;
641
642 tapCnt = numTaps;
643
644 while(tapCnt > 0u)
645 {
646 /* Read coefficients */
647 c0 = *pb++;
648
649 /* Fetch 1 state variable */
650 x0 = *px++;
651
652 /* Perform the multiply-accumulate */
653 sum0 += (q31_t) x0 *c0;
654
655 /* Decrement the loop counter */
656 tapCnt--;
657 }
658
659 /* Advance the state pointer by the decimation factor
660 * to process the next group of decimation factor number samples */
661 pState = pState + S->M;
662
663 /*Store filter output , smlad will return the values in 2.14 format */
664 /* so downsacle by 15 to get output in 1.15 */
665 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
666
667 /* Decrement the loop counter */
668 blkCnt--;
669 }
670
671 /* Processing is complete.
672 ** Now copy the last numTaps - 1 samples to the start of the state buffer.
673 ** This prepares the state buffer for the next function call. */
674
675 /* Points to the start of the state buffer */
676 pStateCurnt = S->pState;
677
678 i = numTaps - 1u;
679
680 /* copy data */
681 while(i > 0u)
682 {
683 *pStateCurnt++ = *pState++;
684
685 /* Decrement the loop counter */
686 i--;
687 }
688
689
690 }
691 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
692
693
694 /**
695 * @} end of FIR_decimate group
696 */
Imprint / Impressum