]> git.gir.st - tmk_keyboard.git/blob - tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_lms_q31.c
remove experimental return, cleanup slash_question key
[tmk_keyboard.git] / tmk_core / tool / mbed / mbed-sdk / libraries / dsp / cmsis_dsp / FilteringFunctions / arm_lms_q31.c
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
3 *
4 * $Date: 17. January 2013
5 * $Revision: V1.4.1
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_lms_q31.c
9 *
10 * Description: Processing function for the Q31 LMS filter.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40
41 #include "arm_math.h"
42 /**
43 * @ingroup groupFilters
44 */
45
46 /**
47 * @addtogroup LMS
48 * @{
49 */
50
51 /**
52 * @brief Processing function for Q31 LMS filter.
53 * @param[in] *S points to an instance of the Q15 LMS filter structure.
54 * @param[in] *pSrc points to the block of input data.
55 * @param[in] *pRef points to the block of reference data.
56 * @param[out] *pOut points to the block of output data.
57 * @param[out] *pErr points to the block of error data.
58 * @param[in] blockSize number of samples to process.
59 * @return none.
60 *
61 * \par Scaling and Overflow Behavior:
62 * The function is implemented using an internal 64-bit accumulator.
63 * The accumulator has a 2.62 format and maintains full precision of the intermediate
64 * multiplication results but provides only a single guard bit.
65 * Thus, if the accumulator result overflows it wraps around rather than clips.
66 * In order to avoid overflows completely the input signal must be scaled down by
67 * log2(numTaps) bits.
68 * The reference signal should not be scaled down.
69 * After all multiply-accumulates are performed, the 2.62 accumulator is shifted
70 * and saturated to 1.31 format to yield the final result.
71 * The output signal and error signal are in 1.31 format.
72 *
73 * \par
74 * In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted.
75 */
76
77 void arm_lms_q31(
78 const arm_lms_instance_q31 * S,
79 q31_t * pSrc,
80 q31_t * pRef,
81 q31_t * pOut,
82 q31_t * pErr,
83 uint32_t blockSize)
84 {
85 q31_t *pState = S->pState; /* State pointer */
86 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
87 q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
88 q31_t *pStateCurnt; /* Points to the current sample of the state */
89 q31_t mu = S->mu; /* Adaptive factor */
90 q31_t *px; /* Temporary pointer for state */
91 q31_t *pb; /* Temporary pointer for coefficient buffer */
92 uint32_t tapCnt, blkCnt; /* Loop counters */
93 q63_t acc; /* Accumulator */
94 q31_t e = 0; /* error of data sample */
95 q31_t alpha; /* Intermediate constant for taps update */
96 q31_t coef; /* Temporary variable for coef */
97 q31_t acc_l, acc_h; /* temporary input */
98 uint32_t uShift = ((uint32_t) S->postShift + 1u);
99 uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */
100
101 /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
102 /* pStateCurnt points to the location where the new input data should be written */
103 pStateCurnt = &(S->pState[(numTaps - 1u)]);
104
105 /* Initializing blkCnt with blockSize */
106 blkCnt = blockSize;
107
108
109 #ifndef ARM_MATH_CM0_FAMILY
110
111 /* Run the below code for Cortex-M4 and Cortex-M3 */
112
113 while(blkCnt > 0u)
114 {
115 /* Copy the new input sample into the state buffer */
116 *pStateCurnt++ = *pSrc++;
117
118 /* Initialize state pointer */
119 px = pState;
120
121 /* Initialize coefficient pointer */
122 pb = pCoeffs;
123
124 /* Set the accumulator to zero */
125 acc = 0;
126
127 /* Loop unrolling. Process 4 taps at a time. */
128 tapCnt = numTaps >> 2;
129
130 while(tapCnt > 0u)
131 {
132 /* Perform the multiply-accumulate */
133 /* acc += b[N] * x[n-N] */
134 acc += ((q63_t) (*px++)) * (*pb++);
135
136 /* acc += b[N-1] * x[n-N-1] */
137 acc += ((q63_t) (*px++)) * (*pb++);
138
139 /* acc += b[N-2] * x[n-N-2] */
140 acc += ((q63_t) (*px++)) * (*pb++);
141
142 /* acc += b[N-3] * x[n-N-3] */
143 acc += ((q63_t) (*px++)) * (*pb++);
144
145 /* Decrement the loop counter */
146 tapCnt--;
147 }
148
149 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
150 tapCnt = numTaps % 0x4u;
151
152 while(tapCnt > 0u)
153 {
154 /* Perform the multiply-accumulate */
155 acc += ((q63_t) (*px++)) * (*pb++);
156
157 /* Decrement the loop counter */
158 tapCnt--;
159 }
160
161 /* Converting the result to 1.31 format */
162 /* Calc lower part of acc */
163 acc_l = acc & 0xffffffff;
164
165 /* Calc upper part of acc */
166 acc_h = (acc >> 32) & 0xffffffff;
167
168 acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
169
170 /* Store the result from accumulator into the destination buffer. */
171 *pOut++ = (q31_t) acc;
172
173 /* Compute and store error */
174 e = *pRef++ - (q31_t) acc;
175
176 *pErr++ = (q31_t) e;
177
178 /* Compute alpha i.e. intermediate constant for taps update */
179 alpha = (q31_t) (((q63_t) e * mu) >> 31);
180
181 /* Initialize state pointer */
182 /* Advance state pointer by 1 for the next sample */
183 px = pState++;
184
185 /* Initialize coefficient pointer */
186 pb = pCoeffs;
187
188 /* Loop unrolling. Process 4 taps at a time. */
189 tapCnt = numTaps >> 2;
190
191 /* Update filter coefficients */
192 while(tapCnt > 0u)
193 {
194 /* coef is in 2.30 format */
195 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
196 /* get coef in 1.31 format by left shifting */
197 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
198 /* update coefficient buffer to next coefficient */
199 pb++;
200
201 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
202 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
203 pb++;
204
205 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
206 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
207 pb++;
208
209 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
210 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
211 pb++;
212
213 /* Decrement the loop counter */
214 tapCnt--;
215 }
216
217 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
218 tapCnt = numTaps % 0x4u;
219
220 while(tapCnt > 0u)
221 {
222 /* Perform the multiply-accumulate */
223 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
224 *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
225 pb++;
226
227 /* Decrement the loop counter */
228 tapCnt--;
229 }
230
231 /* Decrement the loop counter */
232 blkCnt--;
233 }
234
235 /* Processing is complete. Now copy the last numTaps - 1 samples to the
236 satrt of the state buffer. This prepares the state buffer for the
237 next function call. */
238
239 /* Points to the start of the pState buffer */
240 pStateCurnt = S->pState;
241
242 /* Loop unrolling for (numTaps - 1u) samples copy */
243 tapCnt = (numTaps - 1u) >> 2u;
244
245 /* copy data */
246 while(tapCnt > 0u)
247 {
248 *pStateCurnt++ = *pState++;
249 *pStateCurnt++ = *pState++;
250 *pStateCurnt++ = *pState++;
251 *pStateCurnt++ = *pState++;
252
253 /* Decrement the loop counter */
254 tapCnt--;
255 }
256
257 /* Calculate remaining number of copies */
258 tapCnt = (numTaps - 1u) % 0x4u;
259
260 /* Copy the remaining q31_t data */
261 while(tapCnt > 0u)
262 {
263 *pStateCurnt++ = *pState++;
264
265 /* Decrement the loop counter */
266 tapCnt--;
267 }
268
269 #else
270
271 /* Run the below code for Cortex-M0 */
272
273 while(blkCnt > 0u)
274 {
275 /* Copy the new input sample into the state buffer */
276 *pStateCurnt++ = *pSrc++;
277
278 /* Initialize pState pointer */
279 px = pState;
280
281 /* Initialize pCoeffs pointer */
282 pb = pCoeffs;
283
284 /* Set the accumulator to zero */
285 acc = 0;
286
287 /* Loop over numTaps number of values */
288 tapCnt = numTaps;
289
290 while(tapCnt > 0u)
291 {
292 /* Perform the multiply-accumulate */
293 acc += ((q63_t) (*px++)) * (*pb++);
294
295 /* Decrement the loop counter */
296 tapCnt--;
297 }
298
299 /* Converting the result to 1.31 format */
300 /* Store the result from accumulator into the destination buffer. */
301 /* Calc lower part of acc */
302 acc_l = acc & 0xffffffff;
303
304 /* Calc upper part of acc */
305 acc_h = (acc >> 32) & 0xffffffff;
306
307 acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
308
309 *pOut++ = (q31_t) acc;
310
311 /* Compute and store error */
312 e = *pRef++ - (q31_t) acc;
313
314 *pErr++ = (q31_t) e;
315
316 /* Weighting factor for the LMS version */
317 alpha = (q31_t) (((q63_t) e * mu) >> 31);
318
319 /* Initialize pState pointer */
320 /* Advance state pointer by 1 for the next sample */
321 px = pState++;
322
323 /* Initialize pCoeffs pointer */
324 pb = pCoeffs;
325
326 /* Loop over numTaps number of values */
327 tapCnt = numTaps;
328
329 while(tapCnt > 0u)
330 {
331 /* Perform the multiply-accumulate */
332 coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
333 *pb += (coef << 1u);
334 pb++;
335
336 /* Decrement the loop counter */
337 tapCnt--;
338 }
339
340 /* Decrement the loop counter */
341 blkCnt--;
342 }
343
344 /* Processing is complete. Now copy the last numTaps - 1 samples to the
345 start of the state buffer. This prepares the state buffer for the
346 next function call. */
347
348 /* Points to the start of the pState buffer */
349 pStateCurnt = S->pState;
350
351 /* Copy (numTaps - 1u) samples */
352 tapCnt = (numTaps - 1u);
353
354 /* Copy the data */
355 while(tapCnt > 0u)
356 {
357 *pStateCurnt++ = *pState++;
358
359 /* Decrement the loop counter */
360 tapCnt--;
361 }
362
363 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
364
365 }
366
367 /**
368 * @} end of LMS group
369 */
Imprint / Impressum