tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_lms_q15.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_lms_q15.c
   9 *
  10 * Description:  Processing function for the Q15 LMS filter.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Redistribution and use in source and binary forms, with or without
  15 * modification, are permitted provided that the following conditions
  16 * are met:
  17 *   - Redistributions of source code must retain the above copyright
  18 *     notice, this list of conditions and the following disclaimer.
  19 *   - Redistributions in binary form must reproduce the above copyright
  20 *     notice, this list of conditions and the following disclaimer in
  21 *     the documentation and/or other materials provided with the
  22 *     distribution.
  23 *   - Neither the name of ARM LIMITED nor the names of its contributors
  24 *     may be used to endorse or promote products derived from this
  25 *     software without specific prior written permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 * POSSIBILITY OF SUCH DAMAGE.
  39 * -------------------------------------------------------------------- */
  40
  41 #include "arm_math.h"
  42 /**
  43  * @ingroup groupFilters
  44  */
  45
  46 /**
  47  * @addtogroup LMS
  48  * @{
  49  */
  50
  51  /**
  52  * @brief Processing function for Q15 LMS filter.
  53  * @param[in] *S points to an instance of the Q15 LMS filter structure.
  54  * @param[in] *pSrc points to the block of input data.
  55  * @param[in] *pRef points to the block of reference data.
  56  * @param[out] *pOut points to the block of output data.
  57  * @param[out] *pErr points to the block of error data.
  58  * @param[in] blockSize number of samples to process.
  59  * @return none.
  60  *
  61  * \par Scaling and Overflow Behavior:
  62  * The function is implemented using a 64-bit internal accumulator.
  63  * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
  64  * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
  65  * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
  66  * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
  67  * Lastly, the accumulator is saturated to yield a result in 1.15 format.
  68  *
  69  * \par
  70  *      In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted.
  71  *
  72  */
  73
  74 void arm_lms_q15(
  75   const arm_lms_instance_q15 * S,
  76   q15_t * pSrc,
  77   q15_t * pRef,
  78   q15_t * pOut,
  79   q15_t * pErr,
  80   uint32_t blockSize)
  81 {
  82   q15_t *pState = S->pState;                     /* State pointer */
  83   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
  84   q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
  85   q15_t *pStateCurnt;                            /* Points to the current sample of the state */
  86   q15_t mu = S->mu;                              /* Adaptive factor */
  87   q15_t *px;                                     /* Temporary pointer for state */
  88   q15_t *pb;                                     /* Temporary pointer for coefficient buffer */
  89   uint32_t tapCnt, blkCnt;                       /* Loop counters */
  90   q63_t acc;                                     /* Accumulator */
  91   q15_t e = 0;                                   /* error of data sample */
  92   q15_t alpha;                                   /* Intermediate constant for taps update */
  93   q31_t acc_l, acc_h;
  94   int32_t lShift = (15 - (int32_t) S->postShift);       /*  Post shift  */
  95   int32_t uShift = (32 - lShift);
  96
  97
  98 #ifndef ARM_MATH_CM0_FAMILY
  99
 100   /* Run the below code for Cortex-M4 and Cortex-M3 */
 101
 102   q31_t coef;                                    /* Teporary variable for coefficient */
 103
 104   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
 105   /* pStateCurnt points to the location where the new input data should be written */
 106   pStateCurnt = &(S->pState[(numTaps - 1u)]);
 107
 108   /* Initializing blkCnt with blockSize */
 109   blkCnt = blockSize;
 110
 111   while(blkCnt > 0u)
 112   {
 113     /* Copy the new input sample into the state buffer */
 114     *pStateCurnt++ = *pSrc++;
 115
 116     /* Initialize state pointer */
 117     px = pState;
 118
 119     /* Initialize coefficient pointer */
 120     pb = pCoeffs;
 121
 122     /* Set the accumulator to zero */
 123     acc = 0;
 124
 125     /* Loop unrolling.  Process 4 taps at a time. */
 126     tapCnt = numTaps >> 2u;
 127
 128     while(tapCnt > 0u)
 129     {
 130       /* acc +=  b[N] * x[n-N] + b[N-1] * x[n-N-1] */
 131       /* Perform the multiply-accumulate */
 132 #ifndef UNALIGNED_SUPPORT_DISABLE
 133
 134       acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
 135       acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
 136
 137 #else
 138
 139       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
 140       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
 141       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
 142       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
 143
 144
 145 #endif  /*      #ifndef UNALIGNED_SUPPORT_DISABLE       */
 146
 147       /* Decrement the loop counter */
 148       tapCnt--;
 149     }
 150
 151     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 152     tapCnt = numTaps % 0x4u;
 153
 154     while(tapCnt > 0u)
 155     {
 156       /* Perform the multiply-accumulate */
 157       acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
 158
 159       /* Decrement the loop counter */
 160       tapCnt--;
 161     }
 162
 163     /* Calc lower part of acc */
 164     acc_l = acc & 0xffffffff;
 165
 166     /* Calc upper part of acc */
 167     acc_h = (acc >> 32) & 0xffffffff;
 168
 169     /* Apply shift for lower part of acc and upper part of acc */
 170     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
 171
 172     /* Converting the result to 1.15 format and saturate the output */
 173     acc = __SSAT(acc, 16);
 174
 175     /* Store the result from accumulator into the destination buffer. */
 176     *pOut++ = (q15_t) acc;
 177
 178     /* Compute and store error */
 179     e = *pRef++ - (q15_t) acc;
 180
 181     *pErr++ = (q15_t) e;
 182
 183     /* Compute alpha i.e. intermediate constant for taps update */
 184     alpha = (q15_t) (((q31_t) e * (mu)) >> 15);
 185
 186     /* Initialize state pointer */
 187     /* Advance state pointer by 1 for the next sample */
 188     px = pState++;
 189
 190     /* Initialize coefficient pointer */
 191     pb = pCoeffs;
 192
 193     /* Loop unrolling.  Process 4 taps at a time. */
 194     tapCnt = numTaps >> 2u;
 195
 196     /* Update filter coefficients */
 197     while(tapCnt > 0u)
 198     {
 199       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
 200       *pb++ = (q15_t) __SSAT((coef), 16);
 201       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
 202       *pb++ = (q15_t) __SSAT((coef), 16);
 203       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
 204       *pb++ = (q15_t) __SSAT((coef), 16);
 205       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
 206       *pb++ = (q15_t) __SSAT((coef), 16);
 207
 208       /* Decrement the loop counter */
 209       tapCnt--;
 210     }
 211
 212     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 213     tapCnt = numTaps % 0x4u;
 214
 215     while(tapCnt > 0u)
 216     {
 217       /* Perform the multiply-accumulate */
 218       coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
 219       *pb++ = (q15_t) __SSAT((coef), 16);
 220
 221       /* Decrement the loop counter */
 222       tapCnt--;
 223     }
 224
 225     /* Decrement the loop counter */
 226     blkCnt--;
 227
 228   }
 229
 230   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 231      satrt of the state buffer. This prepares the state buffer for the
 232      next function call. */
 233
 234   /* Points to the start of the pState buffer */
 235   pStateCurnt = S->pState;
 236
 237   /* Calculation of count for copying integer writes */
 238   tapCnt = (numTaps - 1u) >> 2;
 239
 240   while(tapCnt > 0u)
 241   {
 242
 243 #ifndef UNALIGNED_SUPPORT_DISABLE
 244
 245     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
 246     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
 247 #else
 248     *pStateCurnt++ = *pState++;
 249     *pStateCurnt++ = *pState++;
 250     *pStateCurnt++ = *pState++;
 251     *pStateCurnt++ = *pState++;
 252 #endif
 253
 254     tapCnt--;
 255
 256   }
 257
 258   /* Calculation of count for remaining q15_t data */
 259   tapCnt = (numTaps - 1u) % 0x4u;
 260
 261   /* copy data */
 262   while(tapCnt > 0u)
 263   {
 264     *pStateCurnt++ = *pState++;
 265
 266     /* Decrement the loop counter */
 267     tapCnt--;
 268   }
 269
 270 #else
 271
 272   /* Run the below code for Cortex-M0 */
 273
 274   /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
 275   /* pStateCurnt points to the location where the new input data should be written */
 276   pStateCurnt = &(S->pState[(numTaps - 1u)]);
 277
 278   /* Loop over blockSize number of values */
 279   blkCnt = blockSize;
 280
 281   while(blkCnt > 0u)
 282   {
 283     /* Copy the new input sample into the state buffer */
 284     *pStateCurnt++ = *pSrc++;
 285
 286     /* Initialize pState pointer */
 287     px = pState;
 288
 289     /* Initialize pCoeffs pointer */
 290     pb = pCoeffs;
 291
 292     /* Set the accumulator to zero */
 293     acc = 0;
 294
 295     /* Loop over numTaps number of values */
 296     tapCnt = numTaps;
 297
 298     while(tapCnt > 0u)
 299     {
 300       /* Perform the multiply-accumulate */
 301       acc += (q63_t) ((q31_t) (*px++) * (*pb++));
 302
 303       /* Decrement the loop counter */
 304       tapCnt--;
 305     }
 306
 307     /* Calc lower part of acc */
 308     acc_l = acc & 0xffffffff;
 309
 310     /* Calc upper part of acc */
 311     acc_h = (acc >> 32) & 0xffffffff;
 312
 313     /* Apply shift for lower part of acc and upper part of acc */
 314     acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
 315
 316     /* Converting the result to 1.15 format and saturate the output */
 317     acc = __SSAT(acc, 16);
 318
 319     /* Store the result from accumulator into the destination buffer. */
 320     *pOut++ = (q15_t) acc;
 321
 322     /* Compute and store error */
 323     e = *pRef++ - (q15_t) acc;
 324
 325     *pErr++ = (q15_t) e;
 326
 327     /* Compute alpha i.e. intermediate constant for taps update */
 328     alpha = (q15_t) (((q31_t) e * (mu)) >> 15);
 329
 330     /* Initialize pState pointer */
 331     /* Advance state pointer by 1 for the next sample */
 332     px = pState++;
 333
 334     /* Initialize pCoeffs pointer */
 335     pb = pCoeffs;
 336
 337     /* Loop over numTaps number of values */
 338     tapCnt = numTaps;
 339
 340     while(tapCnt > 0u)
 341     {
 342       /* Perform the multiply-accumulate */
 343       *pb++ += (q15_t) (((q31_t) alpha * (*px++)) >> 15);
 344
 345       /* Decrement the loop counter */
 346       tapCnt--;
 347     }
 348
 349     /* Decrement the loop counter */
 350     blkCnt--;
 351
 352   }
 353
 354   /* Processing is complete. Now copy the last numTaps - 1 samples to the
 355      start of the state buffer. This prepares the state buffer for the
 356      next function call. */
 357
 358   /* Points to the start of the pState buffer */
 359   pStateCurnt = S->pState;
 360
 361   /*  Copy (numTaps - 1u) samples  */
 362   tapCnt = (numTaps - 1u);
 363
 364   /* Copy the data */
 365   while(tapCnt > 0u)
 366   {
 367     *pStateCurnt++ = *pState++;
 368
 369     /* Decrement the loop counter */
 370     tapCnt--;
 371   }
 372
 373 #endif /*   #ifndef ARM_MATH_CM0_FAMILY */
 374
 375 }
 376
 377 /**
 378    * @} end of LMS group
 379    */