tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_lms_q31.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_lms_q31.c
   9 *
  10 * Description:  Processing function for the Q31 LMS filter.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Redistribution and use in source and binary forms, with or without
  15 * modification, are permitted provided that the following conditions
  16 * are met:
  17 *   - Redistributions of source code must retain the above copyright
  18 *     notice, this list of conditions and the following disclaimer.
  19 *   - Redistributions in binary form must reproduce the above copyright
  20 *     notice, this list of conditions and the following disclaimer in
  21 *     the documentation and/or other materials provided with the
  22 *     distribution.
  23 *   - Neither the name of ARM LIMITED nor the names of its contributors
  24 *     may be used to endorse or promote products derived from this
  25 *     software without specific prior written permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 * POSSIBILITY OF SUCH DAMAGE.
  39 * -------------------------------------------------------------------- */
  40
  41 #include "arm_math.h"
  42 /**
  43  * @ingroup groupFilters
  44  */
  45
  46 /**
  47  * @addtogroup LMS
  48  * @{
  49  */
  50
  51  /**
  52  * @brief Processing function for Q31 LMS filter.
  53  * @param[in]  *S points to an instance of the Q15 LMS filter structure.
  54  * @param[in]  *pSrc points to the block of input data.
  55  * @param[in]  *pRef points to the block of reference data.
  56  * @param[out] *pOut points to the block of output data.
  57  * @param[out] *pErr points to the block of error data.
  58  * @param[in]  blockSize number of samples to process.
  59  * @return     none.
  60  *
  61  * \par Scaling and Overflow Behavior:
  62  * The function is implemented using an internal 64-bit accumulator.
  63  * The accumulator has a 2.62 format and maintains full precision of the intermediate
  64  * multiplication results but provides only a single guard bit.
  65  * Thus, if the accumulator result overflows it wraps around rather than clips.
  66  * In order to avoid overflows completely the input signal must be scaled down by
  67  * log2(numTaps) bits.
  68  * The reference signal should not be scaled down.
  69  * After all multiply-accumulates are performed, the 2.62 accumulator is shifted
  70  * and saturated to 1.31 format to yield the final result.
  71  * The output signal and error signal are in 1.31 format.
  72  *
  73  * \par
  74  *      In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted.
  75  */
  76
  77 void arm_lms_q31(
  78   const arm_lms_instance_q31 * S,
  79   q31_t * pSrc,
  80   q31_t * pRef,
  81   q31_t * pOut,
  82   q31_t * pErr,
  83   uint32_t blockSize)
  84 {
  85   q31_t *pState = S->pState;                     /* State pointer */
  86   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
  87   q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
  88   q31_t *pStateCurnt;                            /* Points to the current sample of the state */
  89   q31_t mu = S->mu;                              /* Adaptive factor */
  90   q31_t *px;                                     /* Temporary pointer for state */
  91   q31_t *pb;                                     /* Temporary pointer for coefficient buffer */
  92   uint32_t tapCnt, blkCnt;                       /* Loop counters */
  93   q63_t acc;                                     /* Accumulator */
  94   q31_t e = 0;                                   /* error of data sample */
  95   q31_t alpha;                                   /* Intermediate constant for taps update */
  96   q31_t coef;                                    /* Temporary variable for coef */
  97   q31_t acc_l, acc_h;                            /*  temporary input */
  98   uint32_t uShift = ((uint32_t) S->postShift + 1u);
  99   uint32_t lShift = 32u - uShift;                /*  Shift to be applied to the output */
 100
 101   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
 102   /* pStateCurnt points to the location where the new input data should be written */
 103   pStateCurnt = &(S->pState[(numTaps - 1u)]);
 104
 105   /* Initializing blkCnt with blockSize */
 106   blkCnt = blockSize;
 107
 108
 109 #ifndef ARM_MATH_CM0_FAMILY
 110
 111   /* Run the below code for Cortex-M4 and Cortex-M3 */
 112
 113   while(blkCnt > 0u)
 114   {
 115     /* Copy the new input sample into the state buffer */
 116     *pStateCurnt++ = *pSrc++;
 117
 118     /* Initialize state pointer */
 119     px = pState;
 120
 121     /* Initialize coefficient pointer */
 122     pb = pCoeffs;
 123
 124     /* Set the accumulator to zero */
 125     acc = 0;
 126
 127     /* Loop unrolling.  Process 4 taps at a time. */
 128     tapCnt = numTaps >> 2;
 129
 130     while(tapCnt > 0u)
 131     {
 132       /* Perform the multiply-accumulate */
 133       /* acc +=  b[N] * x[n-N] */
 134       acc += ((q63_t) (*px++)) * (*pb++);
 135
 136       /* acc +=  b[N-1] * x[n-N-1] */
 137       acc += ((q63_t) (*px++)) * (*pb++);
 138
 139       /* acc +=  b[N-2] * x[n-N-2] */
 140       acc += ((q63_t) (*px++)) * (*pb++);
 141
 142       /* acc +=  b[N-3] * x[n-N-3] */
 143       acc += ((q63_t) (*px++)) * (*pb++);
 144
 145       /* Decrement the loop counter */
 146       tapCnt--;
 147     }
 148
 149     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 150     tapCnt = numTaps % 0x4u;
 151
 152     while(tapCnt > 0u)
 153     {
 154       /* Perform the multiply-accumulate */
 155       acc += ((q63_t) (*px++)) * (*pb++);
 156
 157       /* Decrement the loop counter */
 158       tapCnt--;
 159     }
 160
 161     /* Converting the result to 1.31 format */
 162     /* Calc lower part of acc */
 163     acc_l = acc & 0xffffffff;
 164
 165     /* Calc upper part of acc */
 166     acc_h = (acc >> 32) & 0xffffffff;
 167
 168     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
 169
 170     /* Store the result from accumulator into the destination buffer. */
 171     *pOut++ = (q31_t) acc;
 172
 173     /* Compute and store error */
 174     e = *pRef++ - (q31_t) acc;
 175
 176     *pErr++ = (q31_t) e;
 177
 178     /* Compute alpha i.e. intermediate constant for taps update */
 179     alpha = (q31_t) (((q63_t) e * mu) >> 31);
 180
 181     /* Initialize state pointer */
 182     /* Advance state pointer by 1 for the next sample */
 183     px = pState++;
 184
 185     /* Initialize coefficient pointer */
 186     pb = pCoeffs;
 187
 188     /* Loop unrolling.  Process 4 taps at a time. */
 189     tapCnt = numTaps >> 2;
 190
 191     /* Update filter coefficients */
 192     while(tapCnt > 0u)
 193     {
 194       /* coef is in 2.30 format */
 195       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
 196       /* get coef in 1.31 format by left shifting */
 197       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 198       /* update coefficient buffer to next coefficient */
 199       pb++;
 200
 201       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
 202       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 203       pb++;
 204
 205       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
 206       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 207       pb++;
 208
 209       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
 210       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 211       pb++;
 212
 213       /* Decrement the loop counter */
 214       tapCnt--;
 215     }
 216
 217     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 218     tapCnt = numTaps % 0x4u;
 219
 220     while(tapCnt > 0u)
 221     {
 222       /* Perform the multiply-accumulate */
 223       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
 224       *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
 225       pb++;
 226
 227       /* Decrement the loop counter */
 228       tapCnt--;
 229     }
 230
 231     /* Decrement the loop counter */
 232     blkCnt--;
 233   }
 234
 235   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 236      satrt of the state buffer. This prepares the state buffer for the
 237      next function call. */
 238
 239   /* Points to the start of the pState buffer */
 240   pStateCurnt = S->pState;
 241
 242   /* Loop unrolling for (numTaps - 1u) samples copy */
 243   tapCnt = (numTaps - 1u) >> 2u;
 244
 245   /* copy data */
 246   while(tapCnt > 0u)
 247   {
 248     *pStateCurnt++ = *pState++;
 249     *pStateCurnt++ = *pState++;
 250     *pStateCurnt++ = *pState++;
 251     *pStateCurnt++ = *pState++;
 252
 253     /* Decrement the loop counter */
 254     tapCnt--;
 255   }
 256
 257   /* Calculate remaining number of copies */
 258   tapCnt = (numTaps - 1u) % 0x4u;
 259
 260   /* Copy the remaining q31_t data */
 261   while(tapCnt > 0u)
 262   {
 263     *pStateCurnt++ = *pState++;
 264
 265     /* Decrement the loop counter */
 266     tapCnt--;
 267   }
 268
 269 #else
 270
 271   /* Run the below code for Cortex-M0 */
 272
 273   while(blkCnt > 0u)
 274   {
 275     /* Copy the new input sample into the state buffer */
 276     *pStateCurnt++ = *pSrc++;
 277
 278     /* Initialize pState pointer */
 279     px = pState;
 280
 281     /* Initialize pCoeffs pointer */
 282     pb = pCoeffs;
 283
 284     /* Set the accumulator to zero */
 285     acc = 0;
 286
 287     /* Loop over numTaps number of values */
 288     tapCnt = numTaps;
 289
 290     while(tapCnt > 0u)
 291     {
 292       /* Perform the multiply-accumulate */
 293       acc += ((q63_t) (*px++)) * (*pb++);
 294
 295       /* Decrement the loop counter */
 296       tapCnt--;
 297     }
 298
 299     /* Converting the result to 1.31 format */
 300     /* Store the result from accumulator into the destination buffer. */
 301     /* Calc lower part of acc */
 302     acc_l = acc & 0xffffffff;
 303
 304     /* Calc upper part of acc */
 305     acc_h = (acc >> 32) & 0xffffffff;
 306
 307     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
 308
 309     *pOut++ = (q31_t) acc;
 310
 311     /* Compute and store error */
 312     e = *pRef++ - (q31_t) acc;
 313
 314     *pErr++ = (q31_t) e;
 315
 316     /* Weighting factor for the LMS version */
 317     alpha = (q31_t) (((q63_t) e * mu) >> 31);
 318
 319     /* Initialize pState pointer */
 320     /* Advance state pointer by 1 for the next sample */
 321     px = pState++;
 322
 323     /* Initialize pCoeffs pointer */
 324     pb = pCoeffs;
 325
 326     /* Loop over numTaps number of values */
 327     tapCnt = numTaps;
 328
 329     while(tapCnt > 0u)
 330     {
 331       /* Perform the multiply-accumulate */
 332       coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32));
 333       *pb += (coef << 1u);
 334       pb++;
 335
 336       /* Decrement the loop counter */
 337       tapCnt--;
 338     }
 339
 340     /* Decrement the loop counter */
 341     blkCnt--;
 342   }
 343
 344   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 345      start of the state buffer. This prepares the state buffer for the
 346      next function call. */
 347
 348   /* Points to the start of the pState buffer */
 349   pStateCurnt = S->pState;
 350
 351   /*  Copy (numTaps - 1u) samples  */
 352   tapCnt = (numTaps - 1u);
 353
 354   /* Copy the data */
 355   while(tapCnt > 0u)
 356   {
 357     *pStateCurnt++ = *pState++;
 358
 359     /* Decrement the loop counter */
 360     tapCnt--;
 361   }
 362
 363 #endif /*   #ifndef ARM_MATH_CM0_FAMILY */
 364
 365 }
 366
 367 /**
 368    * @} end of LMS group
 369    */