1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
4 * $Date: 17. January 2013
7 * Project: CMSIS DSP Library
8 * Title: arm_iir_lattice_q15.c
10 * Description: Q15 IIR lattice filter processing function.
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
44 * @ingroup groupFilters
48 * @addtogroup IIR_Lattice
53 * @brief Processing function for the Q15 IIR lattice filter.
54 * @param[in] *S points to an instance of the Q15 IIR lattice structure.
55 * @param[in] *pSrc points to the block of input data.
56 * @param[out] *pDst points to the block of output data.
57 * @param[in] blockSize number of samples to process.
61 * <b>Scaling and Overflow Behavior:</b>
63 * The function is implemented using a 64-bit internal accumulator.
64 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
65 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
66 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
67 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
68 * Lastly, the accumulator is saturated to yield a result in 1.15 format.
71 void arm_iir_lattice_q15(
72 const arm_iir_lattice_instance_q15
* S
,
79 #ifndef ARM_MATH_CM0_FAMILY
81 /* Run the below code for Cortex-M4 and Cortex-M3 */
83 q31_t fcurr
, fnext
, gcurr
= 0, gnext
; /* Temporary variables for lattice stages */
84 q15_t gnext1
, gnext2
; /* Temporary variables for lattice stages */
85 uint32_t stgCnt
; /* Temporary variables for counts */
86 q63_t acc
; /* Accumlator */
87 uint32_t blkCnt
, tapCnt
; /* Temporary variables for counts */
88 q15_t
*px1
, *px2
, *pk
, *pv
; /* temporary pointers for state and coef */
89 uint32_t numStages
= S
->numStages
; /* number of stages */
90 q15_t
*pState
; /* State pointer */
91 q15_t
*pStateCurnt
; /* State current pointer */
92 q15_t out
; /* Temporary variable for output */
93 q31_t v
; /* Temporary variable for ladder coefficient */
94 #ifdef UNALIGNED_SUPPORT_DISABLE
101 pState
= &S
->pState
[0];
103 /* Sample processing */
106 /* Read Sample from input buffer */
110 /* Initialize state read pointer */
112 /* Initialize state write pointer */
114 /* Set accumulator to zero */
116 /* Initialize Ladder coeff pointer */
117 pv
= &S
->pvCoeffs
[0];
118 /* Initialize Reflection coeff pointer */
119 pk
= &S
->pkCoeffs
[0];
122 /* Process sample for first tap */
124 /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
125 fnext
= fcurr
- (((q31_t
) gcurr
* (*pk
)) >> 15);
126 fnext
= __SSAT(fnext
, 16);
127 /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
128 gnext
= (((q31_t
) fnext
* (*pk
++)) >> 15) + gcurr
;
129 gnext
= __SSAT(gnext
, 16);
130 /* write gN(n) into state for next sample processing */
131 *px2
++ = (q15_t
) gnext
;
132 /* y(n) += gN(n) * vN */
133 acc
+= (q31_t
) ((gnext
* (*pv
++)));
136 /* Update f values for next coefficient processing */
139 /* Loop unrolling. Process 4 taps at a time. */
140 tapCnt
= (numStages
- 1u) >> 2;
145 /* Process sample for 2nd, 6th ...taps */
146 /* Read gN-2(n-1) from state buffer */
148 /* Process sample for 2nd, 6th .. taps */
149 /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */
150 fnext
= fcurr
- (((q31_t
) gcurr
* (*pk
)) >> 15);
151 fnext
= __SSAT(fnext
, 16);
152 /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */
153 gnext
= (((q31_t
) fnext
* (*pk
++)) >> 15) + gcurr
;
154 gnext1
= (q15_t
) __SSAT(gnext
, 16);
155 /* write gN-1(n) into state */
156 *px2
++ = (q15_t
) gnext1
;
159 /* Process sample for 3nd, 7th ...taps */
160 /* Read gN-3(n-1) from state */
162 /* Process sample for 3rd, 7th .. taps */
163 /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */
164 fcurr
= fnext
- (((q31_t
) gcurr
* (*pk
)) >> 15);
165 fcurr
= __SSAT(fcurr
, 16);
166 /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */
167 gnext
= (((q31_t
) fcurr
* (*pk
++)) >> 15) + gcurr
;
168 gnext2
= (q15_t
) __SSAT(gnext
, 16);
169 /* write gN-2(n) into state */
170 *px2
++ = (q15_t
) gnext2
;
172 /* Read vN-1 and vN-2 at a time */
173 #ifndef UNALIGNED_SUPPORT_DISABLE
182 #ifndef ARM_MATH_BIG_ENDIAN
184 v
= __PKHBT(v1
, v2
, 16);
188 v
= __PKHBT(v2
, v1
, 16);
190 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
192 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
195 /* Pack gN-1(n) and gN-2(n) */
197 #ifndef ARM_MATH_BIG_ENDIAN
199 gnext
= __PKHBT(gnext1
, gnext2
, 16);
203 gnext
= __PKHBT(gnext2
, gnext1
, 16);
205 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
207 /* y(n) += gN-1(n) * vN-1 */
208 /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */
209 /* y(n) += gN-2(n) * vN-2 */
210 /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */
211 acc
= __SMLALD(gnext
, v
, acc
);
214 /* Process sample for 4th, 8th ...taps */
215 /* Read gN-4(n-1) from state */
217 /* Process sample for 4th, 8th .. taps */
218 /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */
219 fnext
= fcurr
- (((q31_t
) gcurr
* (*pk
)) >> 15);
220 fnext
= __SSAT(fnext
, 16);
221 /* gN-3(n) = kN-3 * fN-1(n) + gN-1(n-1) */
222 gnext
= (((q31_t
) fnext
* (*pk
++)) >> 15) + gcurr
;
223 gnext1
= (q15_t
) __SSAT(gnext
, 16);
224 /* write gN-3(n) for the next sample process */
225 *px2
++ = (q15_t
) gnext1
;
228 /* Process sample for 5th, 9th ...taps */
229 /* Read gN-5(n-1) from state */
231 /* Process sample for 5th, 9th .. taps */
232 /* fN-5(n) = fN-4(n) - kN-4 * gN-5(n-1) */
233 fcurr
= fnext
- (((q31_t
) gcurr
* (*pk
)) >> 15);
234 fcurr
= __SSAT(fcurr
, 16);
235 /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */
236 gnext
= (((q31_t
) fcurr
* (*pk
++)) >> 15) + gcurr
;
237 gnext2
= (q15_t
) __SSAT(gnext
, 16);
238 /* write gN-4(n) for the next sample process */
239 *px2
++ = (q15_t
) gnext2
;
241 /* Read vN-3 and vN-4 at a time */
242 #ifndef UNALIGNED_SUPPORT_DISABLE
251 #ifndef ARM_MATH_BIG_ENDIAN
253 v
= __PKHBT(v1
, v2
, 16);
257 v
= __PKHBT(v2
, v1
, 16);
259 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
261 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
264 /* Pack gN-3(n) and gN-4(n) */
265 #ifndef ARM_MATH_BIG_ENDIAN
267 gnext
= __PKHBT(gnext1
, gnext2
, 16);
271 gnext
= __PKHBT(gnext2
, gnext1
, 16);
273 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
275 /* y(n) += gN-4(n) * vN-4 */
276 /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */
277 /* y(n) += gN-3(n) * vN-3 */
278 /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */
279 acc
= __SMLALD(gnext
, v
, acc
);
287 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
288 tapCnt
= (numStages
- 1u) % 0x4u
;
293 /* Process sample for last taps */
294 fnext
= fcurr
- (((q31_t
) gcurr
* (*pk
)) >> 15);
295 fnext
= __SSAT(fnext
, 16);
296 gnext
= (((q31_t
) fnext
* (*pk
++)) >> 15) + gcurr
;
297 gnext
= __SSAT(gnext
, 16);
298 /* Output samples for last taps */
299 acc
+= (q31_t
) (((q31_t
) gnext
* (*pv
++)));
300 *px2
++ = (q15_t
) gnext
;
306 /* y(n) += g0(n) * v0 */
307 acc
+= (q31_t
) (((q31_t
) fnext
* (*pv
++)));
309 out
= (q15_t
) __SSAT(acc
>> 15, 16);
310 *px2
++ = (q15_t
) fnext
;
312 /* write out into pDst */
315 /* Advance the state pointer by 4 to process the next group of 4 samples */
316 pState
= pState
+ 1u;
321 /* Processing is complete. Now copy last S->numStages samples to start of the buffer
322 for the preperation of next frame process */
323 /* Points to the start of the state buffer */
324 pStateCurnt
= &S
->pState
[0];
325 pState
= &S
->pState
[blockSize
];
327 stgCnt
= (numStages
>> 2u);
332 #ifndef UNALIGNED_SUPPORT_DISABLE
334 *__SIMD32(pStateCurnt
)++ = *__SIMD32(pState
)++;
335 *__SIMD32(pStateCurnt
)++ = *__SIMD32(pState
)++;
339 *pStateCurnt
++ = *pState
++;
340 *pStateCurnt
++ = *pState
++;
341 *pStateCurnt
++ = *pState
++;
342 *pStateCurnt
++ = *pState
++;
344 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
346 /* Decrement the loop counter */
351 /* Calculation of count for remaining q15_t data */
352 stgCnt
= (numStages
) % 0x4u
;
357 *pStateCurnt
++ = *pState
++;
359 /* Decrement the loop counter */
365 /* Run the below code for Cortex-M0 */
367 q31_t fcurr
, fnext
= 0, gcurr
= 0, gnext
; /* Temporary variables for lattice stages */
368 uint32_t stgCnt
; /* Temporary variables for counts */
369 q63_t acc
; /* Accumlator */
370 uint32_t blkCnt
, tapCnt
; /* Temporary variables for counts */
371 q15_t
*px1
, *px2
, *pk
, *pv
; /* temporary pointers for state and coef */
372 uint32_t numStages
= S
->numStages
; /* number of stages */
373 q15_t
*pState
; /* State pointer */
374 q15_t
*pStateCurnt
; /* State current pointer */
375 q15_t out
; /* Temporary variable for output */
380 pState
= &S
->pState
[0];
382 /* Sample processing */
385 /* Read Sample from input buffer */
389 /* Initialize state read pointer */
391 /* Initialize state write pointer */
393 /* Set accumulator to zero */
395 /* Initialize Ladder coeff pointer */
396 pv
= &S
->pvCoeffs
[0];
397 /* Initialize Reflection coeff pointer */
398 pk
= &S
->pkCoeffs
[0];
406 /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
407 fnext
= fcurr
- ((gcurr
* (*pk
)) >> 15);
408 fnext
= __SSAT(fnext
, 16);
409 /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
410 gnext
= ((fnext
* (*pk
++)) >> 15) + gcurr
;
411 gnext
= __SSAT(gnext
, 16);
413 /* y(n) += gN(n) * vN */
414 acc
+= (q31_t
) ((gnext
* (*pv
++)));
415 /* write gN(n) into state for next sample processing */
416 *px2
++ = (q15_t
) gnext
;
417 /* Update f values for next coefficient processing */
423 /* y(n) += g0(n) * v0 */
424 acc
+= (q31_t
) ((fnext
* (*pv
++)));
426 out
= (q15_t
) __SSAT(acc
>> 15, 16);
427 *px2
++ = (q15_t
) fnext
;
429 /* write out into pDst */
432 /* Advance the state pointer by 1 to process the next group of samples */
433 pState
= pState
+ 1u;
438 /* Processing is complete. Now copy last S->numStages samples to start of the buffer
439 for the preperation of next frame process */
440 /* Points to the start of the state buffer */
441 pStateCurnt
= &S
->pState
[0];
442 pState
= &S
->pState
[blockSize
];
449 *pStateCurnt
++ = *pState
++;
451 /* Decrement the loop counter */
455 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
463 * @} end of IIR_Lattice group