1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
4 * $Date: 17. January 2013
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_decimate_q15.c
10 * Description: Q15 FIR Decimator.
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
44 * @ingroup groupFilters
48 * @addtogroup FIR_decimate
53 * @brief Processing function for the Q15 FIR decimator.
54 * @param[in] *S points to an instance of the Q15 FIR decimator structure.
55 * @param[in] *pSrc points to the block of input data.
56 * @param[out] *pDst points to the location where the output result is written.
57 * @param[in] blockSize number of input samples to process per call.
60 * <b>Scaling and Overflow Behavior:</b>
62 * The function is implemented using a 64-bit internal accumulator.
63 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
64 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
65 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
66 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
67 * Lastly, the accumulator is saturated to yield a result in 1.15 format.
70 * Refer to the function <code>arm_fir_decimate_fast_q15()</code> for a faster but less precise implementation of this function for Cortex-M3 and Cortex-M4.
73 #ifndef ARM_MATH_CM0_FAMILY
75 #ifndef UNALIGNED_SUPPORT_DISABLE
77 void arm_fir_decimate_q15(
78 const arm_fir_decimate_instance_q15
* S
,
83 q15_t
*pState
= S
->pState
; /* State pointer */
84 q15_t
*pCoeffs
= S
->pCoeffs
; /* Coefficient pointer */
85 q15_t
*pStateCurnt
; /* Points to the current sample of the state */
86 q15_t
*px
; /* Temporary pointer for state buffer */
87 q15_t
*pb
; /* Temporary pointer coefficient buffer */
88 q31_t x0
, x1
, c0
, c1
; /* Temporary variables to hold state and coefficient values */
89 q63_t sum0
; /* Accumulators */
93 uint32_t numTaps
= S
->numTaps
; /* Number of taps */
94 uint32_t i
, blkCnt
, tapCnt
, outBlockSize
= blockSize
/ S
->M
; /* Loop counters */
97 /* S->pState buffer contains previous frame (numTaps - 1) samples */
98 /* pStateCurnt points to the location where the new input data should be written */
99 pStateCurnt
= S
->pState
+ (numTaps
- 1u);
102 /* Total number of output samples to be computed */
103 blkCnt
= outBlockSize
/ 2;
104 blkCntN3
= outBlockSize
- (2 * blkCnt
);
109 /* Copy decimation factor number of new input samples into the state buffer */
114 *pStateCurnt
++ = *pSrc
++;
118 /* Set accumulator to zero */
122 /* Initialize state pointer */
128 /* Initialize coeff pointer */
131 /* Loop unrolling. Process 4 taps at a time. */
132 tapCnt
= numTaps
>> 2;
134 /* Loop over the number of taps. Unroll by a factor of 4.
135 ** Repeat until we've computed numTaps-4 coefficients. */
138 /* Read the Read b[numTaps-1] and b[numTaps-2] coefficients */
139 c0
= *__SIMD32(pb
)++;
141 /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */
142 x0
= *__SIMD32(px0
)++;
144 x1
= *__SIMD32(px1
)++;
146 /* Perform the multiply-accumulate */
147 acc0
= __SMLALD(x0
, c0
, acc0
);
149 acc1
= __SMLALD(x1
, c0
, acc1
);
151 /* Read the b[numTaps-3] and b[numTaps-4] coefficient */
152 c0
= *__SIMD32(pb
)++;
154 /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */
155 x0
= *__SIMD32(px0
)++;
157 x1
= *__SIMD32(px1
)++;
159 /* Perform the multiply-accumulate */
160 acc0
= __SMLALD(x0
, c0
, acc0
);
162 acc1
= __SMLALD(x1
, c0
, acc1
);
164 /* Decrement the loop counter */
168 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
169 tapCnt
= numTaps
% 0x4u
;
173 /* Read coefficients */
176 /* Fetch 1 state variable */
181 /* Perform the multiply-accumulate */
182 acc0
= __SMLALD(x0
, c0
, acc0
);
183 acc1
= __SMLALD(x1
, c0
, acc1
);
185 /* Decrement the loop counter */
189 /* Advance the state pointer by the decimation factor
190 * to process the next group of decimation factor number samples */
191 pState
= pState
+ S
->M
* 2;
193 /* Store filter output, smlad returns the values in 2.14 format */
194 /* so downsacle by 15 to get output in 1.15 */
195 *pDst
++ = (q15_t
) (__SSAT((acc0
>> 15), 16));
196 *pDst
++ = (q15_t
) (__SSAT((acc1
>> 15), 16));
198 /* Decrement the loop counter */
206 /* Copy decimation factor number of new input samples into the state buffer */
211 *pStateCurnt
++ = *pSrc
++;
218 /* Initialize state pointer */
221 /* Initialize coeff pointer */
224 /* Loop unrolling. Process 4 taps at a time. */
225 tapCnt
= numTaps
>> 2;
227 /* Loop over the number of taps. Unroll by a factor of 4.
228 ** Repeat until we've computed numTaps-4 coefficients. */
231 /* Read the Read b[numTaps-1] and b[numTaps-2] coefficients */
232 c0
= *__SIMD32(pb
)++;
234 /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */
235 x0
= *__SIMD32(px
)++;
237 /* Read the b[numTaps-3] and b[numTaps-4] coefficient */
238 c1
= *__SIMD32(pb
)++;
240 /* Perform the multiply-accumulate */
241 sum0
= __SMLALD(x0
, c0
, sum0
);
243 /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */
244 x0
= *__SIMD32(px
)++;
246 /* Perform the multiply-accumulate */
247 sum0
= __SMLALD(x0
, c1
, sum0
);
249 /* Decrement the loop counter */
253 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
254 tapCnt
= numTaps
% 0x4u
;
258 /* Read coefficients */
261 /* Fetch 1 state variable */
264 /* Perform the multiply-accumulate */
265 sum0
= __SMLALD(x0
, c0
, sum0
);
267 /* Decrement the loop counter */
271 /* Advance the state pointer by the decimation factor
272 * to process the next group of decimation factor number samples */
273 pState
= pState
+ S
->M
;
275 /* Store filter output, smlad returns the values in 2.14 format */
276 /* so downsacle by 15 to get output in 1.15 */
277 *pDst
++ = (q15_t
) (__SSAT((sum0
>> 15), 16));
279 /* Decrement the loop counter */
283 /* Processing is complete.
284 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
285 ** This prepares the state buffer for the next function call. */
287 /* Points to the start of the state buffer */
288 pStateCurnt
= S
->pState
;
290 i
= (numTaps
- 1u) >> 2u;
295 *__SIMD32(pStateCurnt
)++ = *__SIMD32(pState
)++;
296 *__SIMD32(pStateCurnt
)++ = *__SIMD32(pState
)++;
298 /* Decrement the loop counter */
302 i
= (numTaps
- 1u) % 0x04u
;
307 *pStateCurnt
++ = *pState
++;
309 /* Decrement the loop counter */
317 void arm_fir_decimate_q15(
318 const arm_fir_decimate_instance_q15
* S
,
323 q15_t
*pState
= S
->pState
; /* State pointer */
324 q15_t
*pCoeffs
= S
->pCoeffs
; /* Coefficient pointer */
325 q15_t
*pStateCurnt
; /* Points to the current sample of the state */
326 q15_t
*px
; /* Temporary pointer for state buffer */
327 q15_t
*pb
; /* Temporary pointer coefficient buffer */
328 q15_t x0
, x1
, c0
; /* Temporary variables to hold state and coefficient values */
329 q63_t sum0
; /* Accumulators */
333 uint32_t numTaps
= S
->numTaps
; /* Number of taps */
334 uint32_t i
, blkCnt
, tapCnt
, outBlockSize
= blockSize
/ S
->M
; /* Loop counters */
337 /* S->pState buffer contains previous frame (numTaps - 1) samples */
338 /* pStateCurnt points to the location where the new input data should be written */
339 pStateCurnt
= S
->pState
+ (numTaps
- 1u);
342 /* Total number of output samples to be computed */
343 blkCnt
= outBlockSize
/ 2;
344 blkCntN3
= outBlockSize
- (2 * blkCnt
);
348 /* Copy decimation factor number of new input samples into the state buffer */
353 *pStateCurnt
++ = *pSrc
++;
357 /* Set accumulator to zero */
361 /* Initialize state pointer */
367 /* Initialize coeff pointer */
370 /* Loop unrolling. Process 4 taps at a time. */
371 tapCnt
= numTaps
>> 2;
373 /* Loop over the number of taps. Unroll by a factor of 4.
374 ** Repeat until we've computed numTaps-4 coefficients. */
377 /* Read the Read b[numTaps-1] coefficients */
380 /* Read x[n-numTaps-1] for sample 0 and for sample 1 */
384 /* Perform the multiply-accumulate */
388 /* Read the b[numTaps-2] coefficient */
391 /* Read x[n-numTaps-2] for sample 0 and sample 1 */
395 /* Perform the multiply-accumulate */
399 /* Read the b[numTaps-3] coefficients */
402 /* Read x[n-numTaps-3] for sample 0 and sample 1 */
406 /* Perform the multiply-accumulate */
410 /* Read the b[numTaps-4] coefficient */
413 /* Read x[n-numTaps-4] for sample 0 and sample 1 */
417 /* Perform the multiply-accumulate */
421 /* Decrement the loop counter */
425 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
426 tapCnt
= numTaps
% 0x4u
;
430 /* Read coefficients */
433 /* Fetch 1 state variable */
437 /* Perform the multiply-accumulate */
441 /* Decrement the loop counter */
445 /* Advance the state pointer by the decimation factor
446 * to process the next group of decimation factor number samples */
447 pState
= pState
+ S
->M
* 2;
449 /* Store filter output, smlad returns the values in 2.14 format */
450 /* so downsacle by 15 to get output in 1.15 */
452 *pDst
++ = (q15_t
) (__SSAT((acc0
>> 15), 16));
453 *pDst
++ = (q15_t
) (__SSAT((acc1
>> 15), 16));
455 /* Decrement the loop counter */
461 /* Copy decimation factor number of new input samples into the state buffer */
466 *pStateCurnt
++ = *pSrc
++;
473 /* Initialize state pointer */
476 /* Initialize coeff pointer */
479 /* Loop unrolling. Process 4 taps at a time. */
480 tapCnt
= numTaps
>> 2;
482 /* Loop over the number of taps. Unroll by a factor of 4.
483 ** Repeat until we've computed numTaps-4 coefficients. */
486 /* Read the Read b[numTaps-1] coefficients */
489 /* Read x[n-numTaps-1] and sample */
492 /* Perform the multiply-accumulate */
495 /* Read the b[numTaps-2] coefficient */
498 /* Read x[n-numTaps-2] and sample */
501 /* Perform the multiply-accumulate */
504 /* Read the b[numTaps-3] coefficients */
507 /* Read x[n-numTaps-3] sample */
510 /* Perform the multiply-accumulate */
513 /* Read the b[numTaps-4] coefficient */
516 /* Read x[n-numTaps-4] sample */
519 /* Perform the multiply-accumulate */
522 /* Decrement the loop counter */
526 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
527 tapCnt
= numTaps
% 0x4u
;
531 /* Read coefficients */
534 /* Fetch 1 state variable */
537 /* Perform the multiply-accumulate */
540 /* Decrement the loop counter */
544 /* Advance the state pointer by the decimation factor
545 * to process the next group of decimation factor number samples */
546 pState
= pState
+ S
->M
;
548 /* Store filter output, smlad returns the values in 2.14 format */
549 /* so downsacle by 15 to get output in 1.15 */
550 *pDst
++ = (q15_t
) (__SSAT((sum0
>> 15), 16));
552 /* Decrement the loop counter */
556 /* Processing is complete.
557 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
558 ** This prepares the state buffer for the next function call. */
560 /* Points to the start of the state buffer */
561 pStateCurnt
= S
->pState
;
563 i
= (numTaps
- 1u) >> 2u;
568 *pStateCurnt
++ = *pState
++;
569 *pStateCurnt
++ = *pState
++;
570 *pStateCurnt
++ = *pState
++;
571 *pStateCurnt
++ = *pState
++;
573 /* Decrement the loop counter */
577 i
= (numTaps
- 1u) % 0x04u
;
582 *pStateCurnt
++ = *pState
++;
584 /* Decrement the loop counter */
590 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
595 void arm_fir_decimate_q15(
596 const arm_fir_decimate_instance_q15
* S
,
601 q15_t
*pState
= S
->pState
; /* State pointer */
602 q15_t
*pCoeffs
= S
->pCoeffs
; /* Coefficient pointer */
603 q15_t
*pStateCurnt
; /* Points to the current sample of the state */
604 q15_t
*px
; /* Temporary pointer for state buffer */
605 q15_t
*pb
; /* Temporary pointer coefficient buffer */
606 q31_t x0
, c0
; /* Temporary variables to hold state and coefficient values */
607 q63_t sum0
; /* Accumulators */
608 uint32_t numTaps
= S
->numTaps
; /* Number of taps */
609 uint32_t i
, blkCnt
, tapCnt
, outBlockSize
= blockSize
/ S
->M
; /* Loop counters */
613 /* Run the below code for Cortex-M0 */
615 /* S->pState buffer contains previous frame (numTaps - 1) samples */
616 /* pStateCurnt points to the location where the new input data should be written */
617 pStateCurnt
= S
->pState
+ (numTaps
- 1u);
619 /* Total number of output samples to be computed */
620 blkCnt
= outBlockSize
;
624 /* Copy decimation factor number of new input samples into the state buffer */
629 *pStateCurnt
++ = *pSrc
++;
636 /* Initialize state pointer */
639 /* Initialize coeff pointer */
646 /* Read coefficients */
649 /* Fetch 1 state variable */
652 /* Perform the multiply-accumulate */
653 sum0
+= (q31_t
) x0
*c0
;
655 /* Decrement the loop counter */
659 /* Advance the state pointer by the decimation factor
660 * to process the next group of decimation factor number samples */
661 pState
= pState
+ S
->M
;
663 /*Store filter output , smlad will return the values in 2.14 format */
664 /* so downsacle by 15 to get output in 1.15 */
665 *pDst
++ = (q15_t
) (__SSAT((sum0
>> 15), 16));
667 /* Decrement the loop counter */
671 /* Processing is complete.
672 ** Now copy the last numTaps - 1 samples to the start of the state buffer.
673 ** This prepares the state buffer for the next function call. */
675 /* Points to the start of the state buffer */
676 pStateCurnt
= S
->pState
;
683 *pStateCurnt
++ = *pState
++;
685 /* Decrement the loop counter */
691 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
695 * @} end of FIR_decimate group