1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
4 * $Date: 17. January 2013
7 * Project: CMSIS DSP Library
8 * Title: arm_lms_norm_q15.c
10 * Description: Q15 NLMS filter.
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
44 * @ingroup groupFilters
48 * @addtogroup LMS_NORM
53 * @brief Processing function for Q15 normalized LMS filter.
54 * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
55 * @param[in] *pSrc points to the block of input data.
56 * @param[in] *pRef points to the block of reference data.
57 * @param[out] *pOut points to the block of output data.
58 * @param[out] *pErr points to the block of error data.
59 * @param[in] blockSize number of samples to process.
62 * <b>Scaling and Overflow Behavior:</b>
64 * The function is implemented using a 64-bit internal accumulator.
65 * Both coefficients and state variables are represented in 1.15 format and
66 * multiplications yield a 2.30 result. The 2.30 intermediate results are
67 * accumulated in a 64-bit accumulator in 34.30 format.
68 * There is no risk of internal overflow with this approach and the full
69 * precision of intermediate multiplications is preserved. After all additions
70 * have been performed, the accumulator is truncated to 34.15 format by
71 * discarding low 15 bits. Lastly, the accumulator is saturated to yield a
72 * result in 1.15 format.
75 * In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted.
79 void arm_lms_norm_q15(
80 arm_lms_norm_instance_q15
* S
,
87 q15_t
*pState
= S
->pState
; /* State pointer */
88 q15_t
*pCoeffs
= S
->pCoeffs
; /* Coefficient pointer */
89 q15_t
*pStateCurnt
; /* Points to the current sample of the state */
90 q15_t
*px
, *pb
; /* Temporary pointers for state and coefficient buffers */
91 q15_t mu
= S
->mu
; /* Adaptive factor */
92 uint32_t numTaps
= S
->numTaps
; /* Number of filter coefficients in the filter */
93 uint32_t tapCnt
, blkCnt
; /* Loop counters */
94 q31_t energy
; /* Energy of the input */
95 q63_t acc
; /* Accumulator */
96 q15_t e
= 0, d
= 0; /* error, reference data sample */
97 q15_t w
= 0, in
; /* weight factor and state */
98 q15_t x0
; /* temporary variable to hold input sample */
99 //uint32_t shift = (uint32_t) S->postShift + 1u; /* Shift to be applied to the output */
100 q15_t errorXmu
, oneByEnergy
; /* Temporary variables to store error and mu product and reciprocal of energy */
101 q15_t postShift
; /* Post shift to be applied to weight after reciprocal calculation */
102 q31_t coef
; /* Teporary variable for coefficient */
104 int32_t lShift
= (15 - (int32_t) S
->postShift
); /* Post shift */
105 int32_t uShift
= (32 - lShift
);
110 /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
111 /* pStateCurnt points to the location where the new input data should be written */
112 pStateCurnt
= &(S
->pState
[(numTaps
- 1u)]);
114 /* Loop over blockSize number of values */
118 #ifndef ARM_MATH_CM0_FAMILY
120 /* Run the below code for Cortex-M4 and Cortex-M3 */
124 /* Copy the new input sample into the state buffer */
125 *pStateCurnt
++ = *pSrc
;
127 /* Initialize pState pointer */
130 /* Initialize coeff pointer */
133 /* Read the sample from input buffer */
136 /* Update the energy calculation */
137 energy
-= (((q31_t
) x0
* (x0
)) >> 15);
138 energy
+= (((q31_t
) in
* (in
)) >> 15);
140 /* Set the accumulator to zero */
143 /* Loop unrolling. Process 4 taps at a time. */
144 tapCnt
= numTaps
>> 2;
149 /* Perform the multiply-accumulate */
150 #ifndef UNALIGNED_SUPPORT_DISABLE
152 acc
= __SMLALD(*__SIMD32(px
)++, (*__SIMD32(pb
)++), acc
);
153 acc
= __SMLALD(*__SIMD32(px
)++, (*__SIMD32(pb
)++), acc
);
157 acc
+= (((q31_t
) * px
++ * (*pb
++)));
158 acc
+= (((q31_t
) * px
++ * (*pb
++)));
159 acc
+= (((q31_t
) * px
++ * (*pb
++)));
160 acc
+= (((q31_t
) * px
++ * (*pb
++)));
162 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
164 /* Decrement the loop counter */
168 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
169 tapCnt
= numTaps
% 0x4u
;
173 /* Perform the multiply-accumulate */
174 acc
+= (((q31_t
) * px
++ * (*pb
++)));
176 /* Decrement the loop counter */
180 /* Calc lower part of acc */
181 acc_l
= acc
& 0xffffffff;
183 /* Calc upper part of acc */
184 acc_h
= (acc
>> 32) & 0xffffffff;
186 /* Apply shift for lower part of acc and upper part of acc */
187 acc
= (uint32_t) acc_l
>> lShift
| acc_h
<< uShift
;
189 /* Converting the result to 1.15 format and saturate the output */
190 acc
= __SSAT(acc
, 16u);
192 /* Store the result from accumulator into the destination buffer. */
193 *pOut
++ = (q15_t
) acc
;
195 /* Compute and store error */
200 /* Calculation of 1/energy */
201 postShift
= arm_recip_q15((q15_t
) energy
+ DELTA_Q15
,
202 &oneByEnergy
, S
->recipTable
);
204 /* Calculation of e * mu value */
205 errorXmu
= (q15_t
) (((q31_t
) e
* mu
) >> 15);
207 /* Calculation of (e * mu) * (1/energy) value */
208 acc
= (((q31_t
) errorXmu
* oneByEnergy
) >> (15 - postShift
));
210 /* Weighting factor for the normalized version */
211 w
= (q15_t
) __SSAT((q31_t
) acc
, 16);
213 /* Initialize pState pointer */
216 /* Initialize coeff pointer */
219 /* Loop unrolling. Process 4 taps at a time. */
220 tapCnt
= numTaps
>> 2;
222 /* Update filter coefficients */
225 coef
= *pb
+ (((q31_t
) w
* (*px
++)) >> 15);
226 *pb
++ = (q15_t
) __SSAT((coef
), 16);
227 coef
= *pb
+ (((q31_t
) w
* (*px
++)) >> 15);
228 *pb
++ = (q15_t
) __SSAT((coef
), 16);
229 coef
= *pb
+ (((q31_t
) w
* (*px
++)) >> 15);
230 *pb
++ = (q15_t
) __SSAT((coef
), 16);
231 coef
= *pb
+ (((q31_t
) w
* (*px
++)) >> 15);
232 *pb
++ = (q15_t
) __SSAT((coef
), 16);
234 /* Decrement the loop counter */
238 /* If the filter length is not a multiple of 4, compute the remaining filter taps */
239 tapCnt
= numTaps
% 0x4u
;
243 /* Perform the multiply-accumulate */
244 coef
= *pb
+ (((q31_t
) w
* (*px
++)) >> 15);
245 *pb
++ = (q15_t
) __SSAT((coef
), 16);
247 /* Decrement the loop counter */
251 /* Read the sample from state buffer */
254 /* Advance state pointer by 1 for the next sample */
255 pState
= pState
+ 1u;
257 /* Decrement the loop counter */
261 /* Save energy and x0 values for the next frame */
262 S
->energy
= (q15_t
) energy
;
265 /* Processing is complete. Now copy the last numTaps - 1 samples to the
266 satrt of the state buffer. This prepares the state buffer for the
267 next function call. */
269 /* Points to the start of the pState buffer */
270 pStateCurnt
= S
->pState
;
272 /* Calculation of count for copying integer writes */
273 tapCnt
= (numTaps
- 1u) >> 2;
278 #ifndef UNALIGNED_SUPPORT_DISABLE
280 *__SIMD32(pStateCurnt
)++ = *__SIMD32(pState
)++;
281 *__SIMD32(pStateCurnt
)++ = *__SIMD32(pState
)++;
285 *pStateCurnt
++ = *pState
++;
286 *pStateCurnt
++ = *pState
++;
287 *pStateCurnt
++ = *pState
++;
288 *pStateCurnt
++ = *pState
++;
296 /* Calculation of count for remaining q15_t data */
297 tapCnt
= (numTaps
- 1u) % 0x4u
;
302 *pStateCurnt
++ = *pState
++;
304 /* Decrement the loop counter */
310 /* Run the below code for Cortex-M0 */
314 /* Copy the new input sample into the state buffer */
315 *pStateCurnt
++ = *pSrc
;
317 /* Initialize pState pointer */
320 /* Initialize pCoeffs pointer */
323 /* Read the sample from input buffer */
326 /* Update the energy calculation */
327 energy
-= (((q31_t
) x0
* (x0
)) >> 15);
328 energy
+= (((q31_t
) in
* (in
)) >> 15);
330 /* Set the accumulator to zero */
333 /* Loop over numTaps number of values */
338 /* Perform the multiply-accumulate */
339 acc
+= (((q31_t
) * px
++ * (*pb
++)));
341 /* Decrement the loop counter */
345 /* Calc lower part of acc */
346 acc_l
= acc
& 0xffffffff;
348 /* Calc upper part of acc */
349 acc_h
= (acc
>> 32) & 0xffffffff;
351 /* Apply shift for lower part of acc and upper part of acc */
352 acc
= (uint32_t) acc_l
>> lShift
| acc_h
<< uShift
;
354 /* Converting the result to 1.15 format and saturate the output */
355 acc
= __SSAT(acc
, 16u);
357 /* Converting the result to 1.15 format */
358 //acc = __SSAT((acc >> (16u - shift)), 16u);
360 /* Store the result from accumulator into the destination buffer. */
361 *pOut
++ = (q15_t
) acc
;
363 /* Compute and store error */
368 /* Calculation of 1/energy */
369 postShift
= arm_recip_q15((q15_t
) energy
+ DELTA_Q15
,
370 &oneByEnergy
, S
->recipTable
);
372 /* Calculation of e * mu value */
373 errorXmu
= (q15_t
) (((q31_t
) e
* mu
) >> 15);
375 /* Calculation of (e * mu) * (1/energy) value */
376 acc
= (((q31_t
) errorXmu
* oneByEnergy
) >> (15 - postShift
));
378 /* Weighting factor for the normalized version */
379 w
= (q15_t
) __SSAT((q31_t
) acc
, 16);
381 /* Initialize pState pointer */
384 /* Initialize coeff pointer */
387 /* Loop over numTaps number of values */
392 /* Perform the multiply-accumulate */
393 coef
= *pb
+ (((q31_t
) w
* (*px
++)) >> 15);
394 *pb
++ = (q15_t
) __SSAT((coef
), 16);
396 /* Decrement the loop counter */
400 /* Read the sample from state buffer */
403 /* Advance state pointer by 1 for the next sample */
404 pState
= pState
+ 1u;
406 /* Decrement the loop counter */
410 /* Save energy and x0 values for the next frame */
411 S
->energy
= (q15_t
) energy
;
414 /* Processing is complete. Now copy the last numTaps - 1 samples to the
415 satrt of the state buffer. This prepares the state buffer for the
416 next function call. */
418 /* Points to the start of the pState buffer */
419 pStateCurnt
= S
->pState
;
421 /* copy (numTaps - 1u) data */
422 tapCnt
= (numTaps
- 1u);
427 *pStateCurnt
++ = *pState
++;
429 /* Decrement the loop counter */
433 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
439 * @} end of LMS_NORM group