]> git.gir.st - tmk_keyboard.git/blob - tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_iir_lattice_q15.c
Squashed 'tmk_core/' changes from 7967731..b9e0ea0
[tmk_keyboard.git] / tool / mbed / mbed-sdk / libraries / dsp / cmsis_dsp / FilteringFunctions / arm_iir_lattice_q15.c
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
3 *
4 * $Date: 17. January 2013
5 * $Revision: V1.4.1
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_iir_lattice_q15.c
9 *
10 * Description: Q15 IIR lattice filter processing function.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40
41 #include "arm_math.h"
42
43 /**
44 * @ingroup groupFilters
45 */
46
47 /**
48 * @addtogroup IIR_Lattice
49 * @{
50 */
51
52 /**
53 * @brief Processing function for the Q15 IIR lattice filter.
54 * @param[in] *S points to an instance of the Q15 IIR lattice structure.
55 * @param[in] *pSrc points to the block of input data.
56 * @param[out] *pDst points to the block of output data.
57 * @param[in] blockSize number of samples to process.
58 * @return none.
59 *
60 * @details
61 * <b>Scaling and Overflow Behavior:</b>
62 * \par
63 * The function is implemented using a 64-bit internal accumulator.
64 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
65 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
66 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
67 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
68 * Lastly, the accumulator is saturated to yield a result in 1.15 format.
69 */
70
71 void arm_iir_lattice_q15(
72 const arm_iir_lattice_instance_q15 * S,
73 q15_t * pSrc,
74 q15_t * pDst,
75 uint32_t blockSize)
76 {
77
78
79 #ifndef ARM_MATH_CM0_FAMILY
80
81 /* Run the below code for Cortex-M4 and Cortex-M3 */
82
83 q31_t fcurr, fnext, gcurr = 0, gnext; /* Temporary variables for lattice stages */
84 q15_t gnext1, gnext2; /* Temporary variables for lattice stages */
85 uint32_t stgCnt; /* Temporary variables for counts */
86 q63_t acc; /* Accumlator */
87 uint32_t blkCnt, tapCnt; /* Temporary variables for counts */
88 q15_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */
89 uint32_t numStages = S->numStages; /* number of stages */
90 q15_t *pState; /* State pointer */
91 q15_t *pStateCurnt; /* State current pointer */
92 q15_t out; /* Temporary variable for output */
93 q31_t v; /* Temporary variable for ladder coefficient */
94 #ifdef UNALIGNED_SUPPORT_DISABLE
95 q15_t v1, v2;
96 #endif
97
98
99 blkCnt = blockSize;
100
101 pState = &S->pState[0];
102
103 /* Sample processing */
104 while(blkCnt > 0u)
105 {
106 /* Read Sample from input buffer */
107 /* fN(n) = x(n) */
108 fcurr = *pSrc++;
109
110 /* Initialize state read pointer */
111 px1 = pState;
112 /* Initialize state write pointer */
113 px2 = pState;
114 /* Set accumulator to zero */
115 acc = 0;
116 /* Initialize Ladder coeff pointer */
117 pv = &S->pvCoeffs[0];
118 /* Initialize Reflection coeff pointer */
119 pk = &S->pkCoeffs[0];
120
121
122 /* Process sample for first tap */
123 gcurr = *px1++;
124 /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
125 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
126 fnext = __SSAT(fnext, 16);
127 /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
128 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
129 gnext = __SSAT(gnext, 16);
130 /* write gN(n) into state for next sample processing */
131 *px2++ = (q15_t) gnext;
132 /* y(n) += gN(n) * vN */
133 acc += (q31_t) ((gnext * (*pv++)));
134
135
136 /* Update f values for next coefficient processing */
137 fcurr = fnext;
138
139 /* Loop unrolling. Process 4 taps at a time. */
140 tapCnt = (numStages - 1u) >> 2;
141
142 while(tapCnt > 0u)
143 {
144
145 /* Process sample for 2nd, 6th ...taps */
146 /* Read gN-2(n-1) from state buffer */
147 gcurr = *px1++;
148 /* Process sample for 2nd, 6th .. taps */
149 /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */
150 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
151 fnext = __SSAT(fnext, 16);
152 /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */
153 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
154 gnext1 = (q15_t) __SSAT(gnext, 16);
155 /* write gN-1(n) into state */
156 *px2++ = (q15_t) gnext1;
157
158
159 /* Process sample for 3nd, 7th ...taps */
160 /* Read gN-3(n-1) from state */
161 gcurr = *px1++;
162 /* Process sample for 3rd, 7th .. taps */
163 /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */
164 fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15);
165 fcurr = __SSAT(fcurr, 16);
166 /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */
167 gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr;
168 gnext2 = (q15_t) __SSAT(gnext, 16);
169 /* write gN-2(n) into state */
170 *px2++ = (q15_t) gnext2;
171
172 /* Read vN-1 and vN-2 at a time */
173 #ifndef UNALIGNED_SUPPORT_DISABLE
174
175 v = *__SIMD32(pv)++;
176
177 #else
178
179 v1 = *pv++;
180 v2 = *pv++;
181
182 #ifndef ARM_MATH_BIG_ENDIAN
183
184 v = __PKHBT(v1, v2, 16);
185
186 #else
187
188 v = __PKHBT(v2, v1, 16);
189
190 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
191
192 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
193
194
195 /* Pack gN-1(n) and gN-2(n) */
196
197 #ifndef ARM_MATH_BIG_ENDIAN
198
199 gnext = __PKHBT(gnext1, gnext2, 16);
200
201 #else
202
203 gnext = __PKHBT(gnext2, gnext1, 16);
204
205 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
206
207 /* y(n) += gN-1(n) * vN-1 */
208 /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */
209 /* y(n) += gN-2(n) * vN-2 */
210 /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */
211 acc = __SMLALD(gnext, v, acc);
212
213
214 /* Process sample for 4th, 8th ...taps */
215 /* Read gN-4(n-1) from state */
216 gcurr = *px1++;
217 /* Process sample for 4th, 8th .. taps */
218 /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */
219 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
220 fnext = __SSAT(fnext, 16);
221 /* gN-3(n) = kN-3 * fN-1(n) + gN-1(n-1) */
222 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
223 gnext1 = (q15_t) __SSAT(gnext, 16);
224 /* write gN-3(n) for the next sample process */
225 *px2++ = (q15_t) gnext1;
226
227
228 /* Process sample for 5th, 9th ...taps */
229 /* Read gN-5(n-1) from state */
230 gcurr = *px1++;
231 /* Process sample for 5th, 9th .. taps */
232 /* fN-5(n) = fN-4(n) - kN-4 * gN-5(n-1) */
233 fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15);
234 fcurr = __SSAT(fcurr, 16);
235 /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */
236 gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr;
237 gnext2 = (q15_t) __SSAT(gnext, 16);
238 /* write gN-4(n) for the next sample process */
239 *px2++ = (q15_t) gnext2;
240
241 /* Read vN-3 and vN-4 at a time */
242 #ifndef UNALIGNED_SUPPORT_DISABLE
243
244 v = *__SIMD32(pv)++;
245
246 #else
247
248 v1 = *pv++;
249 v2 = *pv++;
250
251 #ifndef ARM_MATH_BIG_ENDIAN
252
253 v = __PKHBT(v1, v2, 16);
254
255 #else
256
257 v = __PKHBT(v2, v1, 16);
258
259 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
260
261 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
262
263
264 /* Pack gN-3(n) and gN-4(n) */
265 #ifndef ARM_MATH_BIG_ENDIAN
266
267 gnext = __PKHBT(gnext1, gnext2, 16);
268
269 #else
270
271 gnext = __PKHBT(gnext2, gnext1, 16);
272
273 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
274
275 /* y(n) += gN-4(n) * vN-4 */
276 /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */
277 /* y(n) += gN-3(n) * vN-3 */
278 /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */
279 acc = __SMLALD(gnext, v, acc);
280
281 tapCnt--;
282
283 }
284
285 fnext = fcurr;
286
287 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
288 tapCnt = (numStages - 1u) % 0x4u;
289
290 while(tapCnt > 0u)
291 {
292 gcurr = *px1++;
293 /* Process sample for last taps */
294 fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
295 fnext = __SSAT(fnext, 16);
296 gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
297 gnext = __SSAT(gnext, 16);
298 /* Output samples for last taps */
299 acc += (q31_t) (((q31_t) gnext * (*pv++)));
300 *px2++ = (q15_t) gnext;
301 fcurr = fnext;
302
303 tapCnt--;
304 }
305
306 /* y(n) += g0(n) * v0 */
307 acc += (q31_t) (((q31_t) fnext * (*pv++)));
308
309 out = (q15_t) __SSAT(acc >> 15, 16);
310 *px2++ = (q15_t) fnext;
311
312 /* write out into pDst */
313 *pDst++ = out;
314
315 /* Advance the state pointer by 4 to process the next group of 4 samples */
316 pState = pState + 1u;
317 blkCnt--;
318
319 }
320
321 /* Processing is complete. Now copy last S->numStages samples to start of the buffer
322 for the preperation of next frame process */
323 /* Points to the start of the state buffer */
324 pStateCurnt = &S->pState[0];
325 pState = &S->pState[blockSize];
326
327 stgCnt = (numStages >> 2u);
328
329 /* copy data */
330 while(stgCnt > 0u)
331 {
332 #ifndef UNALIGNED_SUPPORT_DISABLE
333
334 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
335 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
336
337 #else
338
339 *pStateCurnt++ = *pState++;
340 *pStateCurnt++ = *pState++;
341 *pStateCurnt++ = *pState++;
342 *pStateCurnt++ = *pState++;
343
344 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
345
346 /* Decrement the loop counter */
347 stgCnt--;
348
349 }
350
351 /* Calculation of count for remaining q15_t data */
352 stgCnt = (numStages) % 0x4u;
353
354 /* copy data */
355 while(stgCnt > 0u)
356 {
357 *pStateCurnt++ = *pState++;
358
359 /* Decrement the loop counter */
360 stgCnt--;
361 }
362
363 #else
364
365 /* Run the below code for Cortex-M0 */
366
367 q31_t fcurr, fnext = 0, gcurr = 0, gnext; /* Temporary variables for lattice stages */
368 uint32_t stgCnt; /* Temporary variables for counts */
369 q63_t acc; /* Accumlator */
370 uint32_t blkCnt, tapCnt; /* Temporary variables for counts */
371 q15_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */
372 uint32_t numStages = S->numStages; /* number of stages */
373 q15_t *pState; /* State pointer */
374 q15_t *pStateCurnt; /* State current pointer */
375 q15_t out; /* Temporary variable for output */
376
377
378 blkCnt = blockSize;
379
380 pState = &S->pState[0];
381
382 /* Sample processing */
383 while(blkCnt > 0u)
384 {
385 /* Read Sample from input buffer */
386 /* fN(n) = x(n) */
387 fcurr = *pSrc++;
388
389 /* Initialize state read pointer */
390 px1 = pState;
391 /* Initialize state write pointer */
392 px2 = pState;
393 /* Set accumulator to zero */
394 acc = 0;
395 /* Initialize Ladder coeff pointer */
396 pv = &S->pvCoeffs[0];
397 /* Initialize Reflection coeff pointer */
398 pk = &S->pkCoeffs[0];
399
400 tapCnt = numStages;
401
402 while(tapCnt > 0u)
403 {
404 gcurr = *px1++;
405 /* Process sample */
406 /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
407 fnext = fcurr - ((gcurr * (*pk)) >> 15);
408 fnext = __SSAT(fnext, 16);
409 /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
410 gnext = ((fnext * (*pk++)) >> 15) + gcurr;
411 gnext = __SSAT(gnext, 16);
412 /* Output samples */
413 /* y(n) += gN(n) * vN */
414 acc += (q31_t) ((gnext * (*pv++)));
415 /* write gN(n) into state for next sample processing */
416 *px2++ = (q15_t) gnext;
417 /* Update f values for next coefficient processing */
418 fcurr = fnext;
419
420 tapCnt--;
421 }
422
423 /* y(n) += g0(n) * v0 */
424 acc += (q31_t) ((fnext * (*pv++)));
425
426 out = (q15_t) __SSAT(acc >> 15, 16);
427 *px2++ = (q15_t) fnext;
428
429 /* write out into pDst */
430 *pDst++ = out;
431
432 /* Advance the state pointer by 1 to process the next group of samples */
433 pState = pState + 1u;
434 blkCnt--;
435
436 }
437
438 /* Processing is complete. Now copy last S->numStages samples to start of the buffer
439 for the preperation of next frame process */
440 /* Points to the start of the state buffer */
441 pStateCurnt = &S->pState[0];
442 pState = &S->pState[blockSize];
443
444 stgCnt = numStages;
445
446 /* copy data */
447 while(stgCnt > 0u)
448 {
449 *pStateCurnt++ = *pState++;
450
451 /* Decrement the loop counter */
452 stgCnt--;
453 }
454
455 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
456
457 }
458
459
460
461
462 /**
463 * @} end of IIR_Lattice group
464 */
Imprint / Impressum