tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_lms_norm_q15.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_lms_norm_q15.c
   9 *
  10 * Description:  Q15 NLMS filter.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Redistribution and use in source and binary forms, with or without
  15 * modification, are permitted provided that the following conditions
  16 * are met:
  17 *   - Redistributions of source code must retain the above copyright
  18 *     notice, this list of conditions and the following disclaimer.
  19 *   - Redistributions in binary form must reproduce the above copyright
  20 *     notice, this list of conditions and the following disclaimer in
  21 *     the documentation and/or other materials provided with the
  22 *     distribution.
  23 *   - Neither the name of ARM LIMITED nor the names of its contributors
  24 *     may be used to endorse or promote products derived from this
  25 *     software without specific prior written permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 * POSSIBILITY OF SUCH DAMAGE.
  39 * -------------------------------------------------------------------- */
  40
  41 #include "arm_math.h"
  42
  43 /**
  44  * @ingroup groupFilters
  45  */
  46
  47 /**
  48  * @addtogroup LMS_NORM
  49  * @{
  50  */
  51
  52 /**
  53 * @brief Processing function for Q15 normalized LMS filter.
  54 * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
  55 * @param[in] *pSrc points to the block of input data.
  56 * @param[in] *pRef points to the block of reference data.
  57 * @param[out] *pOut points to the block of output data.
  58 * @param[out] *pErr points to the block of error data.
  59 * @param[in] blockSize number of samples to process.
  60 * @return none.
  61 *
  62 * <b>Scaling and Overflow Behavior:</b>
  63 * \par
  64 * The function is implemented using a 64-bit internal accumulator.
  65 * Both coefficients and state variables are represented in 1.15 format and
  66 * multiplications yield a 2.30 result. The 2.30 intermediate results are
  67 * accumulated in a 64-bit accumulator in 34.30 format.
  68 * There is no risk of internal overflow with this approach and the full
  69 * precision of intermediate multiplications is preserved. After all additions
  70 * have been performed, the accumulator is truncated to 34.15 format by
  71 * discarding low 15 bits. Lastly, the accumulator is saturated to yield a
  72 * result in 1.15 format.
  73 *
  74 * \par
  75 *       In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted.
  76 *
  77  */
  78
  79 void arm_lms_norm_q15(
  80   arm_lms_norm_instance_q15 * S,
  81   q15_t * pSrc,
  82   q15_t * pRef,
  83   q15_t * pOut,
  84   q15_t * pErr,
  85   uint32_t blockSize)
  86 {
  87   q15_t *pState = S->pState;                     /* State pointer */
  88   q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
  89   q15_t *pStateCurnt;                            /* Points to the current sample of the state */
  90   q15_t *px, *pb;                                /* Temporary pointers for state and coefficient buffers */
  91   q15_t mu = S->mu;                              /* Adaptive factor */
  92   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
  93   uint32_t tapCnt, blkCnt;                       /* Loop counters */
  94   q31_t energy;                                  /* Energy of the input */
  95   q63_t acc;                                     /* Accumulator */
  96   q15_t e = 0, d = 0;                            /* error, reference data sample */
  97   q15_t w = 0, in;                               /* weight factor and state */
  98   q15_t x0;                                      /* temporary variable to hold input sample */
  99   //uint32_t shift = (uint32_t) S->postShift + 1u; /* Shift to be applied to the output */
 100   q15_t errorXmu, oneByEnergy;                   /* Temporary variables to store error and mu product and reciprocal of energy */
 101   q15_t postShift;                               /* Post shift to be applied to weight after reciprocal calculation */
 102   q31_t coef;                                    /* Teporary variable for coefficient */
 103   q31_t acc_l, acc_h;
 104   int32_t lShift = (15 - (int32_t) S->postShift);       /*  Post shift  */
 105   int32_t uShift = (32 - lShift);
 106
 107   energy = S->energy;
 108   x0 = S->x0;
 109
 110   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
 111   /* pStateCurnt points to the location where the new input data should be written */
 112   pStateCurnt = &(S->pState[(numTaps - 1u)]);
 113
 114   /* Loop over blockSize number of values */
 115   blkCnt = blockSize;
 116
 117
 118 #ifndef ARM_MATH_CM0_FAMILY
 119
 120   /* Run the below code for Cortex-M4 and Cortex-M3 */
 121
 122   while(blkCnt > 0u)
 123   {
 124     /* Copy the new input sample into the state buffer */
 125     *pStateCurnt++ = *pSrc;
 126
 127     /* Initialize pState pointer */
 128     px = pState;
 129
 130     /* Initialize coeff pointer */
 131     pb = (pCoeffs);
 132
 133     /* Read the sample from input buffer */
 134     in = *pSrc++;
 135
 136     /* Update the energy calculation */
 137     energy -= (((q31_t) x0 * (x0)) >> 15);
 138     energy += (((q31_t) in * (in)) >> 15);
 139
 140     /* Set the accumulator to zero */
 141     acc = 0;
 142
 143     /* Loop unrolling.  Process 4 taps at a time. */
 144     tapCnt = numTaps >> 2;
 145
 146     while(tapCnt > 0u)
 147     {
 148
 149       /* Perform the multiply-accumulate */
 150 #ifndef UNALIGNED_SUPPORT_DISABLE
 151
 152       acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
 153       acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
 154
 155 #else
 156
 157       acc += (((q31_t) * px++ * (*pb++)));
 158       acc += (((q31_t) * px++ * (*pb++)));
 159       acc += (((q31_t) * px++ * (*pb++)));
 160       acc += (((q31_t) * px++ * (*pb++)));
 161
 162 #endif  /*      #ifndef UNALIGNED_SUPPORT_DISABLE       */
 163
 164       /* Decrement the loop counter */
 165       tapCnt--;
 166     }
 167
 168     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 169     tapCnt = numTaps % 0x4u;
 170
 171     while(tapCnt > 0u)
 172     {
 173       /* Perform the multiply-accumulate */
 174       acc += (((q31_t) * px++ * (*pb++)));
 175
 176       /* Decrement the loop counter */
 177       tapCnt--;
 178     }
 179
 180     /* Calc lower part of acc */
 181     acc_l = acc & 0xffffffff;
 182
 183     /* Calc upper part of acc */
 184     acc_h = (acc >> 32) & 0xffffffff;
 185
 186     /* Apply shift for lower part of acc and upper part of acc */
 187     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
 188
 189     /* Converting the result to 1.15 format and saturate the output */
 190     acc = __SSAT(acc, 16u);
 191
 192     /* Store the result from accumulator into the destination buffer. */
 193     *pOut++ = (q15_t) acc;
 194
 195     /* Compute and store error */
 196     d = *pRef++;
 197     e = d - (q15_t) acc;
 198     *pErr++ = e;
 199
 200     /* Calculation of 1/energy */
 201     postShift = arm_recip_q15((q15_t) energy + DELTA_Q15,
 202                               &oneByEnergy, S->recipTable);
 203
 204     /* Calculation of e * mu value */
 205     errorXmu = (q15_t) (((q31_t) e * mu) >> 15);
 206
 207     /* Calculation of (e * mu) * (1/energy) value */
 208     acc = (((q31_t) errorXmu * oneByEnergy) >> (15 - postShift));
 209
 210     /* Weighting factor for the normalized version */
 211     w = (q15_t) __SSAT((q31_t) acc, 16);
 212
 213     /* Initialize pState pointer */
 214     px = pState;
 215
 216     /* Initialize coeff pointer */
 217     pb = (pCoeffs);
 218
 219     /* Loop unrolling.  Process 4 taps at a time. */
 220     tapCnt = numTaps >> 2;
 221
 222     /* Update filter coefficients */
 223     while(tapCnt > 0u)
 224     {
 225       coef = *pb + (((q31_t) w * (*px++)) >> 15);
 226       *pb++ = (q15_t) __SSAT((coef), 16);
 227       coef = *pb + (((q31_t) w * (*px++)) >> 15);
 228       *pb++ = (q15_t) __SSAT((coef), 16);
 229       coef = *pb + (((q31_t) w * (*px++)) >> 15);
 230       *pb++ = (q15_t) __SSAT((coef), 16);
 231       coef = *pb + (((q31_t) w * (*px++)) >> 15);
 232       *pb++ = (q15_t) __SSAT((coef), 16);
 233
 234       /* Decrement the loop counter */
 235       tapCnt--;
 236     }
 237
 238     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 239     tapCnt = numTaps % 0x4u;
 240
 241     while(tapCnt > 0u)
 242     {
 243       /* Perform the multiply-accumulate */
 244       coef = *pb + (((q31_t) w * (*px++)) >> 15);
 245       *pb++ = (q15_t) __SSAT((coef), 16);
 246
 247       /* Decrement the loop counter */
 248       tapCnt--;
 249     }
 250
 251     /* Read the sample from state buffer */
 252     x0 = *pState;
 253
 254     /* Advance state pointer by 1 for the next sample */
 255     pState = pState + 1u;
 256
 257     /* Decrement the loop counter */
 258     blkCnt--;
 259   }
 260
 261   /* Save energy and x0 values for the next frame */
 262   S->energy = (q15_t) energy;
 263   S->x0 = x0;
 264
 265   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 266      satrt of the state buffer. This prepares the state buffer for the
 267      next function call. */
 268
 269   /* Points to the start of the pState buffer */
 270   pStateCurnt = S->pState;
 271
 272   /* Calculation of count for copying integer writes */
 273   tapCnt = (numTaps - 1u) >> 2;
 274
 275   while(tapCnt > 0u)
 276   {
 277
 278 #ifndef UNALIGNED_SUPPORT_DISABLE
 279
 280     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
 281     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
 282
 283 #else
 284
 285     *pStateCurnt++ = *pState++;
 286     *pStateCurnt++ = *pState++;
 287     *pStateCurnt++ = *pState++;
 288     *pStateCurnt++ = *pState++;
 289
 290 #endif
 291
 292     tapCnt--;
 293
 294   }
 295
 296   /* Calculation of count for remaining q15_t data */
 297   tapCnt = (numTaps - 1u) % 0x4u;
 298
 299   /* copy data */
 300   while(tapCnt > 0u)
 301   {
 302     *pStateCurnt++ = *pState++;
 303
 304     /* Decrement the loop counter */
 305     tapCnt--;
 306   }
 307
 308 #else
 309
 310   /* Run the below code for Cortex-M0 */
 311
 312   while(blkCnt > 0u)
 313   {
 314     /* Copy the new input sample into the state buffer */
 315     *pStateCurnt++ = *pSrc;
 316
 317     /* Initialize pState pointer */
 318     px = pState;
 319
 320     /* Initialize pCoeffs pointer */
 321     pb = pCoeffs;
 322
 323     /* Read the sample from input buffer */
 324     in = *pSrc++;
 325
 326     /* Update the energy calculation */
 327     energy -= (((q31_t) x0 * (x0)) >> 15);
 328     energy += (((q31_t) in * (in)) >> 15);
 329
 330     /* Set the accumulator to zero */
 331     acc = 0;
 332
 333     /* Loop over numTaps number of values */
 334     tapCnt = numTaps;
 335
 336     while(tapCnt > 0u)
 337     {
 338       /* Perform the multiply-accumulate */
 339       acc += (((q31_t) * px++ * (*pb++)));
 340
 341       /* Decrement the loop counter */
 342       tapCnt--;
 343     }
 344
 345     /* Calc lower part of acc */
 346     acc_l = acc & 0xffffffff;
 347
 348     /* Calc upper part of acc */
 349     acc_h = (acc >> 32) & 0xffffffff;
 350
 351     /* Apply shift for lower part of acc and upper part of acc */
 352     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
 353
 354     /* Converting the result to 1.15 format and saturate the output */
 355     acc = __SSAT(acc, 16u);
 356
 357     /* Converting the result to 1.15 format */
 358     //acc = __SSAT((acc >> (16u - shift)), 16u);
 359
 360     /* Store the result from accumulator into the destination buffer. */
 361     *pOut++ = (q15_t) acc;
 362
 363     /* Compute and store error */
 364     d = *pRef++;
 365     e = d - (q15_t) acc;
 366     *pErr++ = e;
 367
 368     /* Calculation of 1/energy */
 369     postShift = arm_recip_q15((q15_t) energy + DELTA_Q15,
 370                               &oneByEnergy, S->recipTable);
 371
 372     /* Calculation of e * mu value */
 373     errorXmu = (q15_t) (((q31_t) e * mu) >> 15);
 374
 375     /* Calculation of (e * mu) * (1/energy) value */
 376     acc = (((q31_t) errorXmu * oneByEnergy) >> (15 - postShift));
 377
 378     /* Weighting factor for the normalized version */
 379     w = (q15_t) __SSAT((q31_t) acc, 16);
 380
 381     /* Initialize pState pointer */
 382     px = pState;
 383
 384     /* Initialize coeff pointer */
 385     pb = (pCoeffs);
 386
 387     /* Loop over numTaps number of values */
 388     tapCnt = numTaps;
 389
 390     while(tapCnt > 0u)
 391     {
 392       /* Perform the multiply-accumulate */
 393       coef = *pb + (((q31_t) w * (*px++)) >> 15);
 394       *pb++ = (q15_t) __SSAT((coef), 16);
 395
 396       /* Decrement the loop counter */
 397       tapCnt--;
 398     }
 399
 400     /* Read the sample from state buffer */
 401     x0 = *pState;
 402
 403     /* Advance state pointer by 1 for the next sample */
 404     pState = pState + 1u;
 405
 406     /* Decrement the loop counter */
 407     blkCnt--;
 408   }
 409
 410   /* Save energy and x0 values for the next frame */
 411   S->energy = (q15_t) energy;
 412   S->x0 = x0;
 413
 414   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 415      satrt of the state buffer. This prepares the state buffer for the
 416      next function call. */
 417
 418   /* Points to the start of the pState buffer */
 419   pStateCurnt = S->pState;
 420
 421   /* copy (numTaps - 1u) data */
 422   tapCnt = (numTaps - 1u);
 423
 424   /* copy data */
 425   while(tapCnt > 0u)
 426   {
 427     *pStateCurnt++ = *pState++;
 428
 429     /* Decrement the loop counter */
 430     tapCnt--;
 431   }
 432
 433 #endif /*   #ifndef ARM_MATH_CM0_FAMILY */
 434
 435 }
 436
 437
 438 /**
 439    * @} end of LMS_NORM group
 440    */