1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
4 * $Date: 17. January 2013
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_lattice_q15.c
10 * Description: Q15 FIR lattice filter processing function.
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
44 * @ingroup groupFilters
48 * @addtogroup FIR_Lattice
54 * @brief Processing function for the Q15 FIR lattice filter.
55 * @param[in] *S points to an instance of the Q15 FIR lattice structure.
56 * @param[in] *pSrc points to the block of input data.
57 * @param[out] *pDst points to the block of output data
58 * @param[in] blockSize number of samples to process.
62 void arm_fir_lattice_q15(
63 const arm_fir_lattice_instance_q15
* S
,
68 q15_t
*pState
; /* State pointer */
69 q15_t
*pCoeffs
= S
->pCoeffs
; /* Coefficient pointer */
70 q15_t
*px
; /* temporary state pointer */
71 q15_t
*pk
; /* temporary coefficient pointer */
74 #ifndef ARM_MATH_CM0_FAMILY
76 /* Run the below code for Cortex-M4 and Cortex-M3 */
78 q31_t fcurnt1
, fnext1
, gcurnt1
= 0, gnext1
; /* temporary variables for first sample in loop unrolling */
79 q31_t fcurnt2
, fnext2
, gnext2
; /* temporary variables for second sample in loop unrolling */
80 q31_t fcurnt3
, fnext3
, gnext3
; /* temporary variables for third sample in loop unrolling */
81 q31_t fcurnt4
, fnext4
, gnext4
; /* temporary variables for fourth sample in loop unrolling */
82 uint32_t numStages
= S
->numStages
; /* Number of stages in the filter */
83 uint32_t blkCnt
, stageCnt
; /* temporary variables for counts */
85 pState
= &S
->pState
[0];
87 blkCnt
= blockSize
>> 2u;
89 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
90 ** a second loop below computes the remaining 1 to 3 samples. */
94 /* Read two samples from input buffer */
99 /* Initialize coeff pointer */
102 /* Initialize state pointer */
105 /* Read g0(n-1) from state */
108 /* Process first sample for first tap */
109 /* f1(n) = f0(n) + K1 * g0(n-1) */
110 fnext1
= (q31_t
) ((gcurnt1
* (*pk
)) >> 15u) + fcurnt1
;
111 fnext1
= __SSAT(fnext1
, 16);
113 /* g1(n) = f0(n) * K1 + g0(n-1) */
114 gnext1
= (q31_t
) ((fcurnt1
* (*pk
)) >> 15u) + gcurnt1
;
115 gnext1
= __SSAT(gnext1
, 16);
117 /* Process second sample for first tap */
118 /* for sample 2 processing */
119 fnext2
= (q31_t
) ((fcurnt1
* (*pk
)) >> 15u) + fcurnt2
;
120 fnext2
= __SSAT(fnext2
, 16);
122 gnext2
= (q31_t
) ((fcurnt2
* (*pk
)) >> 15u) + fcurnt1
;
123 gnext2
= __SSAT(gnext2
, 16);
126 /* Read next two samples from input buffer */
127 /* f0(n+2) = x(n+2) */
131 /* Copy only last input samples into the state buffer
132 which is used for next four samples processing */
133 *px
++ = (q15_t
) fcurnt4
;
135 /* Process third sample for first tap */
136 fnext3
= (q31_t
) ((fcurnt2
* (*pk
)) >> 15u) + fcurnt3
;
137 fnext3
= __SSAT(fnext3
, 16);
138 gnext3
= (q31_t
) ((fcurnt3
* (*pk
)) >> 15u) + fcurnt2
;
139 gnext3
= __SSAT(gnext3
, 16);
141 /* Process fourth sample for first tap */
142 fnext4
= (q31_t
) ((fcurnt3
* (*pk
)) >> 15u) + fcurnt4
;
143 fnext4
= __SSAT(fnext4
, 16);
144 gnext4
= (q31_t
) ((fcurnt4
* (*pk
++)) >> 15u) + fcurnt3
;
145 gnext4
= __SSAT(gnext4
, 16);
147 /* Update of f values for next coefficient set processing */
154 /* Loop unrolling. Process 4 taps at a time . */
155 stageCnt
= (numStages
- 1u) >> 2;
158 /* Loop over the number of taps. Unroll by a factor of 4.
159 ** Repeat until we've computed numStages-3 coefficients. */
161 /* Process 2nd, 3rd, 4th and 5th taps ... here */
164 /* Read g1(n-1), g3(n-1) .... from state */
167 /* save g1(n) in state buffer */
168 *px
++ = (q15_t
) gnext4
;
170 /* Process first sample for 2nd, 6th .. tap */
171 /* Sample processing for K2, K6.... */
172 /* f1(n) = f0(n) + K1 * g0(n-1) */
173 fnext1
= (q31_t
) ((gcurnt1
* (*pk
)) >> 15u) + fcurnt1
;
174 fnext1
= __SSAT(fnext1
, 16);
177 /* Process second sample for 2nd, 6th .. tap */
178 /* for sample 2 processing */
179 fnext2
= (q31_t
) ((gnext1
* (*pk
)) >> 15u) + fcurnt2
;
180 fnext2
= __SSAT(fnext2
, 16);
181 /* Process third sample for 2nd, 6th .. tap */
182 fnext3
= (q31_t
) ((gnext2
* (*pk
)) >> 15u) + fcurnt3
;
183 fnext3
= __SSAT(fnext3
, 16);
184 /* Process fourth sample for 2nd, 6th .. tap */
185 /* fnext4 = fcurnt4 + (*pk) * gnext3; */
186 fnext4
= (q31_t
) ((gnext3
* (*pk
)) >> 15u) + fcurnt4
;
187 fnext4
= __SSAT(fnext4
, 16);
189 /* g1(n) = f0(n) * K1 + g0(n-1) */
190 /* Calculation of state values for next stage */
191 gnext4
= (q31_t
) ((fcurnt4
* (*pk
)) >> 15u) + gnext3
;
192 gnext4
= __SSAT(gnext4
, 16);
193 gnext3
= (q31_t
) ((fcurnt3
* (*pk
)) >> 15u) + gnext2
;
194 gnext3
= __SSAT(gnext3
, 16);
196 gnext2
= (q31_t
) ((fcurnt2
* (*pk
)) >> 15u) + gnext1
;
197 gnext2
= __SSAT(gnext2
, 16);
199 gnext1
= (q31_t
) ((fcurnt1
* (*pk
++)) >> 15u) + gcurnt1
;
200 gnext1
= __SSAT(gnext1
, 16);
203 /* Read g2(n-1), g4(n-1) .... from state */
206 /* save g1(n) in state buffer */
207 *px
++ = (q15_t
) gnext4
;
209 /* Sample processing for K3, K7.... */
210 /* Process first sample for 3rd, 7th .. tap */
211 /* f3(n) = f2(n) + K3 * g2(n-1) */
212 fcurnt1
= (q31_t
) ((gcurnt1
* (*pk
)) >> 15u) + fnext1
;
213 fcurnt1
= __SSAT(fcurnt1
, 16);
215 /* Process second sample for 3rd, 7th .. tap */
216 fcurnt2
= (q31_t
) ((gnext1
* (*pk
)) >> 15u) + fnext2
;
217 fcurnt2
= __SSAT(fcurnt2
, 16);
219 /* Process third sample for 3rd, 7th .. tap */
220 fcurnt3
= (q31_t
) ((gnext2
* (*pk
)) >> 15u) + fnext3
;
221 fcurnt3
= __SSAT(fcurnt3
, 16);
223 /* Process fourth sample for 3rd, 7th .. tap */
224 fcurnt4
= (q31_t
) ((gnext3
* (*pk
)) >> 15u) + fnext4
;
225 fcurnt4
= __SSAT(fcurnt4
, 16);
227 /* Calculation of state values for next stage */
228 /* g3(n) = f2(n) * K3 + g2(n-1) */
229 gnext4
= (q31_t
) ((fnext4
* (*pk
)) >> 15u) + gnext3
;
230 gnext4
= __SSAT(gnext4
, 16);
232 gnext3
= (q31_t
) ((fnext3
* (*pk
)) >> 15u) + gnext2
;
233 gnext3
= __SSAT(gnext3
, 16);
235 gnext2
= (q31_t
) ((fnext2
* (*pk
)) >> 15u) + gnext1
;
236 gnext2
= __SSAT(gnext2
, 16);
238 gnext1
= (q31_t
) ((fnext1
* (*pk
++)) >> 15u) + gcurnt1
;
239 gnext1
= __SSAT(gnext1
, 16);
241 /* Read g1(n-1), g3(n-1) .... from state */
244 /* save g1(n) in state buffer */
245 *px
++ = (q15_t
) gnext4
;
247 /* Sample processing for K4, K8.... */
248 /* Process first sample for 4th, 8th .. tap */
249 /* f4(n) = f3(n) + K4 * g3(n-1) */
250 fnext1
= (q31_t
) ((gcurnt1
* (*pk
)) >> 15u) + fcurnt1
;
251 fnext1
= __SSAT(fnext1
, 16);
253 /* Process second sample for 4th, 8th .. tap */
254 /* for sample 2 processing */
255 fnext2
= (q31_t
) ((gnext1
* (*pk
)) >> 15u) + fcurnt2
;
256 fnext2
= __SSAT(fnext2
, 16);
258 /* Process third sample for 4th, 8th .. tap */
259 fnext3
= (q31_t
) ((gnext2
* (*pk
)) >> 15u) + fcurnt3
;
260 fnext3
= __SSAT(fnext3
, 16);
262 /* Process fourth sample for 4th, 8th .. tap */
263 fnext4
= (q31_t
) ((gnext3
* (*pk
)) >> 15u) + fcurnt4
;
264 fnext4
= __SSAT(fnext4
, 16);
266 /* g4(n) = f3(n) * K4 + g3(n-1) */
267 /* Calculation of state values for next stage */
268 gnext4
= (q31_t
) ((fcurnt4
* (*pk
)) >> 15u) + gnext3
;
269 gnext4
= __SSAT(gnext4
, 16);
271 gnext3
= (q31_t
) ((fcurnt3
* (*pk
)) >> 15u) + gnext2
;
272 gnext3
= __SSAT(gnext3
, 16);
274 gnext2
= (q31_t
) ((fcurnt2
* (*pk
)) >> 15u) + gnext1
;
275 gnext2
= __SSAT(gnext2
, 16);
276 gnext1
= (q31_t
) ((fcurnt1
* (*pk
++)) >> 15u) + gcurnt1
;
277 gnext1
= __SSAT(gnext1
, 16);
280 /* Read g2(n-1), g4(n-1) .... from state */
283 /* save g4(n) in state buffer */
284 *px
++ = (q15_t
) gnext4
;
286 /* Sample processing for K5, K9.... */
287 /* Process first sample for 5th, 9th .. tap */
288 /* f5(n) = f4(n) + K5 * g4(n-1) */
289 fcurnt1
= (q31_t
) ((gcurnt1
* (*pk
)) >> 15u) + fnext1
;
290 fcurnt1
= __SSAT(fcurnt1
, 16);
292 /* Process second sample for 5th, 9th .. tap */
293 fcurnt2
= (q31_t
) ((gnext1
* (*pk
)) >> 15u) + fnext2
;
294 fcurnt2
= __SSAT(fcurnt2
, 16);
296 /* Process third sample for 5th, 9th .. tap */
297 fcurnt3
= (q31_t
) ((gnext2
* (*pk
)) >> 15u) + fnext3
;
298 fcurnt3
= __SSAT(fcurnt3
, 16);
300 /* Process fourth sample for 5th, 9th .. tap */
301 fcurnt4
= (q31_t
) ((gnext3
* (*pk
)) >> 15u) + fnext4
;
302 fcurnt4
= __SSAT(fcurnt4
, 16);
304 /* Calculation of state values for next stage */
305 /* g5(n) = f4(n) * K5 + g4(n-1) */
306 gnext4
= (q31_t
) ((fnext4
* (*pk
)) >> 15u) + gnext3
;
307 gnext4
= __SSAT(gnext4
, 16);
308 gnext3
= (q31_t
) ((fnext3
* (*pk
)) >> 15u) + gnext2
;
309 gnext3
= __SSAT(gnext3
, 16);
310 gnext2
= (q31_t
) ((fnext2
* (*pk
)) >> 15u) + gnext1
;
311 gnext2
= __SSAT(gnext2
, 16);
312 gnext1
= (q31_t
) ((fnext1
* (*pk
++)) >> 15u) + gcurnt1
;
313 gnext1
= __SSAT(gnext1
, 16);
318 /* If the (filter length -1) is not a multiple of 4, compute the remaining filter taps */
319 stageCnt
= (numStages
- 1u) % 0x4u
;
325 /* save g value in state buffer */
326 *px
++ = (q15_t
) gnext4
;
328 /* Process four samples for last three taps here */
329 fnext1
= (q31_t
) ((gcurnt1
* (*pk
)) >> 15u) + fcurnt1
;
330 fnext1
= __SSAT(fnext1
, 16);
331 fnext2
= (q31_t
) ((gnext1
* (*pk
)) >> 15u) + fcurnt2
;
332 fnext2
= __SSAT(fnext2
, 16);
334 fnext3
= (q31_t
) ((gnext2
* (*pk
)) >> 15u) + fcurnt3
;
335 fnext3
= __SSAT(fnext3
, 16);
337 fnext4
= (q31_t
) ((gnext3
* (*pk
)) >> 15u) + fcurnt4
;
338 fnext4
= __SSAT(fnext4
, 16);
340 /* g1(n) = f0(n) * K1 + g0(n-1) */
341 gnext4
= (q31_t
) ((fcurnt4
* (*pk
)) >> 15u) + gnext3
;
342 gnext4
= __SSAT(gnext4
, 16);
343 gnext3
= (q31_t
) ((fcurnt3
* (*pk
)) >> 15u) + gnext2
;
344 gnext3
= __SSAT(gnext3
, 16);
345 gnext2
= (q31_t
) ((fcurnt2
* (*pk
)) >> 15u) + gnext1
;
346 gnext2
= __SSAT(gnext2
, 16);
347 gnext1
= (q31_t
) ((fcurnt1
* (*pk
++)) >> 15u) + gcurnt1
;
348 gnext1
= __SSAT(gnext1
, 16);
350 /* Update of f values for next coefficient set processing */
360 /* The results in the 4 accumulators, store in the destination buffer. */
363 #ifndef ARM_MATH_BIG_ENDIAN
365 *__SIMD32(pDst
)++ = __PKHBT(fcurnt1
, fcurnt2
, 16);
366 *__SIMD32(pDst
)++ = __PKHBT(fcurnt3
, fcurnt4
, 16);
370 *__SIMD32(pDst
)++ = __PKHBT(fcurnt2
, fcurnt1
, 16);
371 *__SIMD32(pDst
)++ = __PKHBT(fcurnt4
, fcurnt3
, 16);
373 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
378 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
379 ** No loop unrolling is used. */
380 blkCnt
= blockSize
% 0x4u
;
387 /* Initialize coeff pointer */
390 /* Initialize state pointer */
393 /* read g2(n) from state buffer */
396 /* for sample 1 processing */
397 /* f1(n) = f0(n) + K1 * g0(n-1) */
398 fnext1
= (((q31_t
) gcurnt1
* (*pk
)) >> 15u) + fcurnt1
;
399 fnext1
= __SSAT(fnext1
, 16);
402 /* g1(n) = f0(n) * K1 + g0(n-1) */
403 gnext1
= (((q31_t
) fcurnt1
* (*pk
++)) >> 15u) + gcurnt1
;
404 gnext1
= __SSAT(gnext1
, 16);
406 /* save g1(n) in state buffer */
407 *px
++ = (q15_t
) fcurnt1
;
409 /* f1(n) is saved in fcurnt1
410 for next stage processing */
413 stageCnt
= (numStages
- 1u);
418 /* read g2(n) from state buffer */
421 /* save g1(n) in state buffer */
422 *px
++ = (q15_t
) gnext1
;
424 /* Sample processing for K2, K3.... */
425 /* f2(n) = f1(n) + K2 * g1(n-1) */
426 fnext1
= (((q31_t
) gcurnt1
* (*pk
)) >> 15u) + fcurnt1
;
427 fnext1
= __SSAT(fnext1
, 16);
429 /* g2(n) = f1(n) * K2 + g1(n-1) */
430 gnext1
= (((q31_t
) fcurnt1
* (*pk
++)) >> 15u) + gcurnt1
;
431 gnext1
= __SSAT(gnext1
, 16);
434 /* f1(n) is saved in fcurnt1
435 for next stage processing */
443 *pDst
++ = __SSAT(fcurnt1
, 16);
452 /* Run the below code for Cortex-M0 */
454 q31_t fcurnt
, fnext
, gcurnt
, gnext
; /* temporary variables */
455 uint32_t numStages
= S
->numStages
; /* Length of the filter */
456 uint32_t blkCnt
, stageCnt
; /* temporary variables for counts */
458 pState
= &S
->pState
[0];
467 /* Initialize coeff pointer */
470 /* Initialize state pointer */
473 /* read g0(n-1) from state buffer */
476 /* for sample 1 processing */
477 /* f1(n) = f0(n) + K1 * g0(n-1) */
478 fnext
= ((gcurnt
* (*pk
)) >> 15u) + fcurnt
;
479 fnext
= __SSAT(fnext
, 16);
482 /* g1(n) = f0(n) * K1 + g0(n-1) */
483 gnext
= ((fcurnt
* (*pk
++)) >> 15u) + gcurnt
;
484 gnext
= __SSAT(gnext
, 16);
486 /* save f0(n) in state buffer */
487 *px
++ = (q15_t
) fcurnt
;
489 /* f1(n) is saved in fcurnt
490 for next stage processing */
493 stageCnt
= (numStages
- 1u);
498 /* read g1(n-1) from state buffer */
501 /* save g0(n-1) in state buffer */
502 *px
++ = (q15_t
) gnext
;
504 /* Sample processing for K2, K3.... */
505 /* f2(n) = f1(n) + K2 * g1(n-1) */
506 fnext
= ((gcurnt
* (*pk
)) >> 15u) + fcurnt
;
507 fnext
= __SSAT(fnext
, 16);
509 /* g2(n) = f1(n) * K2 + g1(n-1) */
510 gnext
= ((fcurnt
* (*pk
++)) >> 15u) + gcurnt
;
511 gnext
= __SSAT(gnext
, 16);
514 /* f1(n) is saved in fcurnt
515 for next stage processing */
523 *pDst
++ = __SSAT(fcurnt
, 16);
530 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
535 * @} end of FIR_Lattice group