]> git.gir.st - tmk_keyboard.git/blob - tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_lms_q15.c
Merge commit '1fe4406f374291ab2e86e95a97341fd9c475fcb8'
[tmk_keyboard.git] / tmk_core / tool / mbed / mbed-sdk / libraries / dsp / cmsis_dsp / FilteringFunctions / arm_lms_q15.c
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
3 *
4 * $Date: 17. January 2013
5 * $Revision: V1.4.1
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_lms_q15.c
9 *
10 * Description: Processing function for the Q15 LMS filter.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40
41 #include "arm_math.h"
42 /**
43 * @ingroup groupFilters
44 */
45
46 /**
47 * @addtogroup LMS
48 * @{
49 */
50
51 /**
52 * @brief Processing function for Q15 LMS filter.
53 * @param[in] *S points to an instance of the Q15 LMS filter structure.
54 * @param[in] *pSrc points to the block of input data.
55 * @param[in] *pRef points to the block of reference data.
56 * @param[out] *pOut points to the block of output data.
57 * @param[out] *pErr points to the block of error data.
58 * @param[in] blockSize number of samples to process.
59 * @return none.
60 *
61 * \par Scaling and Overflow Behavior:
62 * The function is implemented using a 64-bit internal accumulator.
63 * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
64 * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
65 * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
66 * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
67 * Lastly, the accumulator is saturated to yield a result in 1.15 format.
68 *
69 * \par
70 * In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted.
71 *
72 */
73
74 void arm_lms_q15(
75 const arm_lms_instance_q15 * S,
76 q15_t * pSrc,
77 q15_t * pRef,
78 q15_t * pOut,
79 q15_t * pErr,
80 uint32_t blockSize)
81 {
82 q15_t *pState = S->pState; /* State pointer */
83 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
84 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
85 q15_t *pStateCurnt; /* Points to the current sample of the state */
86 q15_t mu = S->mu; /* Adaptive factor */
87 q15_t *px; /* Temporary pointer for state */
88 q15_t *pb; /* Temporary pointer for coefficient buffer */
89 uint32_t tapCnt, blkCnt; /* Loop counters */
90 q63_t acc; /* Accumulator */
91 q15_t e = 0; /* error of data sample */
92 q15_t alpha; /* Intermediate constant for taps update */
93 q31_t acc_l, acc_h;
94 int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */
95 int32_t uShift = (32 - lShift);
96
97
98 #ifndef ARM_MATH_CM0_FAMILY
99
100 /* Run the below code for Cortex-M4 and Cortex-M3 */
101
102 q31_t coef; /* Teporary variable for coefficient */
103
104 /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
105 /* pStateCurnt points to the location where the new input data should be written */
106 pStateCurnt = &(S->pState[(numTaps - 1u)]);
107
108 /* Initializing blkCnt with blockSize */
109 blkCnt = blockSize;
110
111 while(blkCnt > 0u)
112 {
113 /* Copy the new input sample into the state buffer */
114 *pStateCurnt++ = *pSrc++;
115
116 /* Initialize state pointer */
117 px = pState;
118
119 /* Initialize coefficient pointer */
120 pb = pCoeffs;
121
122 /* Set the accumulator to zero */
123 acc = 0;
124
125 /* Loop unrolling. Process 4 taps at a time. */
126 tapCnt = numTaps >> 2u;
127
128 while(tapCnt > 0u)
129 {
130 /* acc += b[N] * x[n-N] + b[N-1] * x[n-N-1] */
131 /* Perform the multiply-accumulate */
132 #ifndef UNALIGNED_SUPPORT_DISABLE
133
134 acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
135 acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
136
137 #else
138
139 acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
140 acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
141 acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
142 acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
143
144
145 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
146
147 /* Decrement the loop counter */
148 tapCnt--;
149 }
150
151 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
152 tapCnt = numTaps % 0x4u;
153
154 while(tapCnt > 0u)
155 {
156 /* Perform the multiply-accumulate */
157 acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
158
159 /* Decrement the loop counter */
160 tapCnt--;
161 }
162
163 /* Calc lower part of acc */
164 acc_l = acc & 0xffffffff;
165
166 /* Calc upper part of acc */
167 acc_h = (acc >> 32) & 0xffffffff;
168
169 /* Apply shift for lower part of acc and upper part of acc */
170 acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
171
172 /* Converting the result to 1.15 format and saturate the output */
173 acc = __SSAT(acc, 16);
174
175 /* Store the result from accumulator into the destination buffer. */
176 *pOut++ = (q15_t) acc;
177
178 /* Compute and store error */
179 e = *pRef++ - (q15_t) acc;
180
181 *pErr++ = (q15_t) e;
182
183 /* Compute alpha i.e. intermediate constant for taps update */
184 alpha = (q15_t) (((q31_t) e * (mu)) >> 15);
185
186 /* Initialize state pointer */
187 /* Advance state pointer by 1 for the next sample */
188 px = pState++;
189
190 /* Initialize coefficient pointer */
191 pb = pCoeffs;
192
193 /* Loop unrolling. Process 4 taps at a time. */
194 tapCnt = numTaps >> 2u;
195
196 /* Update filter coefficients */
197 while(tapCnt > 0u)
198 {
199 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
200 *pb++ = (q15_t) __SSAT((coef), 16);
201 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
202 *pb++ = (q15_t) __SSAT((coef), 16);
203 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
204 *pb++ = (q15_t) __SSAT((coef), 16);
205 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
206 *pb++ = (q15_t) __SSAT((coef), 16);
207
208 /* Decrement the loop counter */
209 tapCnt--;
210 }
211
212 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
213 tapCnt = numTaps % 0x4u;
214
215 while(tapCnt > 0u)
216 {
217 /* Perform the multiply-accumulate */
218 coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
219 *pb++ = (q15_t) __SSAT((coef), 16);
220
221 /* Decrement the loop counter */
222 tapCnt--;
223 }
224
225 /* Decrement the loop counter */
226 blkCnt--;
227
228 }
229
230 /* Processing is complete. Now copy the last numTaps - 1 samples to the
231 satrt of the state buffer. This prepares the state buffer for the
232 next function call. */
233
234 /* Points to the start of the pState buffer */
235 pStateCurnt = S->pState;
236
237 /* Calculation of count for copying integer writes */
238 tapCnt = (numTaps - 1u) >> 2;
239
240 while(tapCnt > 0u)
241 {
242
243 #ifndef UNALIGNED_SUPPORT_DISABLE
244
245 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
246 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
247 #else
248 *pStateCurnt++ = *pState++;
249 *pStateCurnt++ = *pState++;
250 *pStateCurnt++ = *pState++;
251 *pStateCurnt++ = *pState++;
252 #endif
253
254 tapCnt--;
255
256 }
257
258 /* Calculation of count for remaining q15_t data */
259 tapCnt = (numTaps - 1u) % 0x4u;
260
261 /* copy data */
262 while(tapCnt > 0u)
263 {
264 *pStateCurnt++ = *pState++;
265
266 /* Decrement the loop counter */
267 tapCnt--;
268 }
269
270 #else
271
272 /* Run the below code for Cortex-M0 */
273
274 /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
275 /* pStateCurnt points to the location where the new input data should be written */
276 pStateCurnt = &(S->pState[(numTaps - 1u)]);
277
278 /* Loop over blockSize number of values */
279 blkCnt = blockSize;
280
281 while(blkCnt > 0u)
282 {
283 /* Copy the new input sample into the state buffer */
284 *pStateCurnt++ = *pSrc++;
285
286 /* Initialize pState pointer */
287 px = pState;
288
289 /* Initialize pCoeffs pointer */
290 pb = pCoeffs;
291
292 /* Set the accumulator to zero */
293 acc = 0;
294
295 /* Loop over numTaps number of values */
296 tapCnt = numTaps;
297
298 while(tapCnt > 0u)
299 {
300 /* Perform the multiply-accumulate */
301 acc += (q63_t) ((q31_t) (*px++) * (*pb++));
302
303 /* Decrement the loop counter */
304 tapCnt--;
305 }
306
307 /* Calc lower part of acc */
308 acc_l = acc & 0xffffffff;
309
310 /* Calc upper part of acc */
311 acc_h = (acc >> 32) & 0xffffffff;
312
313 /* Apply shift for lower part of acc and upper part of acc */
314 acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
315
316 /* Converting the result to 1.15 format and saturate the output */
317 acc = __SSAT(acc, 16);
318
319 /* Store the result from accumulator into the destination buffer. */
320 *pOut++ = (q15_t) acc;
321
322 /* Compute and store error */
323 e = *pRef++ - (q15_t) acc;
324
325 *pErr++ = (q15_t) e;
326
327 /* Compute alpha i.e. intermediate constant for taps update */
328 alpha = (q15_t) (((q31_t) e * (mu)) >> 15);
329
330 /* Initialize pState pointer */
331 /* Advance state pointer by 1 for the next sample */
332 px = pState++;
333
334 /* Initialize pCoeffs pointer */
335 pb = pCoeffs;
336
337 /* Loop over numTaps number of values */
338 tapCnt = numTaps;
339
340 while(tapCnt > 0u)
341 {
342 /* Perform the multiply-accumulate */
343 *pb++ += (q15_t) (((q31_t) alpha * (*px++)) >> 15);
344
345 /* Decrement the loop counter */
346 tapCnt--;
347 }
348
349 /* Decrement the loop counter */
350 blkCnt--;
351
352 }
353
354 /* Processing is complete. Now copy the last numTaps - 1 samples to the
355 start of the state buffer. This prepares the state buffer for the
356 next function call. */
357
358 /* Points to the start of the pState buffer */
359 pStateCurnt = S->pState;
360
361 /* Copy (numTaps - 1u) samples */
362 tapCnt = (numTaps - 1u);
363
364 /* Copy the data */
365 while(tapCnt > 0u)
366 {
367 *pStateCurnt++ = *pState++;
368
369 /* Decrement the loop counter */
370 tapCnt--;
371 }
372
373 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
374
375 }
376
377 /**
378 * @} end of LMS group
379 */
Imprint / Impressum