tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_fir_interpolate_q31.c

   1 /*-----------------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:                arm_fir_interpolate_q31.c
   9 *
  10 * Description:  Q31 FIR interpolation.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Redistribution and use in source and binary forms, with or without
  15 * modification, are permitted provided that the following conditions
  16 * are met:
  17 *   - Redistributions of source code must retain the above copyright
  18 *     notice, this list of conditions and the following disclaimer.
  19 *   - Redistributions in binary form must reproduce the above copyright
  20 *     notice, this list of conditions and the following disclaimer in
  21 *     the documentation and/or other materials provided with the
  22 *     distribution.
  23 *   - Neither the name of ARM LIMITED nor the names of its contributors
  24 *     may be used to endorse or promote products derived from this
  25 *     software without specific prior written permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 * POSSIBILITY OF SUCH DAMAGE.
  39 * ---------------------------------------------------------------------------*/
  40
  41 #include "arm_math.h"
  42
  43 /**
  44  * @ingroup groupFilters
  45  */
  46
  47 /**
  48  * @addtogroup FIR_Interpolate
  49  * @{
  50  */
  51
  52 /**
  53  * @brief Processing function for the Q31 FIR interpolator.
  54  * @param[in] *S        points to an instance of the Q31 FIR interpolator structure.
  55  * @param[in] *pSrc     points to the block of input data.
  56  * @param[out] *pDst    points to the block of output data.
  57  * @param[in] blockSize number of input samples to process per call.
  58  * @return none.
  59  *
  60  * <b>Scaling and Overflow Behavior:</b>
  61  * \par
  62  * The function is implemented using an internal 64-bit accumulator.
  63  * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
  64  * Thus, if the accumulator result overflows it wraps around rather than clip.
  65  * In order to avoid overflows completely the input signal must be scaled down by <code>1/(numTaps/L)</code>.
  66  * since <code>numTaps/L</code> additions occur per output sample.
  67  * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
  68  */
  69
  70 #ifndef ARM_MATH_CM0_FAMILY
  71
  72   /* Run the below code for Cortex-M4 and Cortex-M3 */
  73
  74 void arm_fir_interpolate_q31(
  75   const arm_fir_interpolate_instance_q31 * S,
  76   q31_t * pSrc,
  77   q31_t * pDst,
  78   uint32_t blockSize)
  79 {
  80   q31_t *pState = S->pState;                     /* State pointer */
  81   q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
  82   q31_t *pStateCurnt;                            /* Points to the current sample of the state */
  83   q31_t *ptr1, *ptr2;                            /* Temporary pointers for state and coefficient buffers */
  84   q63_t sum0;                                    /* Accumulators */
  85   q31_t x0, c0;                                  /* Temporary variables to hold state and coefficient values */
  86   uint32_t i, blkCnt, j;                         /* Loop counters */
  87   uint16_t phaseLen = S->phaseLength, tapCnt;    /* Length of each polyphase filter component */
  88
  89   uint32_t blkCntN2;
  90   q63_t acc0, acc1;
  91   q31_t x1;
  92
  93   /* S->pState buffer contains previous frame (phaseLen - 1) samples */
  94   /* pStateCurnt points to the location where the new input data should be written */
  95   pStateCurnt = S->pState + ((q31_t) phaseLen - 1);
  96
  97   /* Initialise  blkCnt */
  98   blkCnt = blockSize / 2;
  99   blkCntN2 = blockSize - (2 * blkCnt);
 100
 101   /* Samples loop unrolled by 2 */
 102   while(blkCnt > 0u)
 103   {
 104     /* Copy new input sample into the state buffer */
 105     *pStateCurnt++ = *pSrc++;
 106     *pStateCurnt++ = *pSrc++;
 107
 108     /* Address modifier index of coefficient buffer */
 109     j = 1u;
 110
 111     /* Loop over the Interpolation factor. */
 112     i = (S->L);
 113
 114     while(i > 0u)
 115     {
 116       /* Set accumulator to zero */
 117       acc0 = 0;
 118       acc1 = 0;
 119
 120       /* Initialize state pointer */
 121       ptr1 = pState;
 122
 123       /* Initialize coefficient pointer */
 124       ptr2 = pCoeffs + (S->L - j);
 125
 126       /* Loop over the polyPhase length. Unroll by a factor of 4.
 127        ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
 128       tapCnt = phaseLen >> 2u;
 129
 130       x0 = *(ptr1++);
 131
 132       while(tapCnt > 0u)
 133       {
 134
 135         /* Read the input sample */
 136         x1 = *(ptr1++);
 137
 138         /* Read the coefficient */
 139         c0 = *(ptr2);
 140
 141         /* Perform the multiply-accumulate */
 142         acc0 += (q63_t) x0 *c0;
 143         acc1 += (q63_t) x1 *c0;
 144
 145
 146         /* Read the coefficient */
 147         c0 = *(ptr2 + S->L);
 148
 149         /* Read the input sample */
 150         x0 = *(ptr1++);
 151
 152         /* Perform the multiply-accumulate */
 153         acc0 += (q63_t) x1 *c0;
 154         acc1 += (q63_t) x0 *c0;
 155
 156
 157         /* Read the coefficient */
 158         c0 = *(ptr2 + S->L * 2);
 159
 160         /* Read the input sample */
 161         x1 = *(ptr1++);
 162
 163         /* Perform the multiply-accumulate */
 164         acc0 += (q63_t) x0 *c0;
 165         acc1 += (q63_t) x1 *c0;
 166
 167         /* Read the coefficient */
 168         c0 = *(ptr2 + S->L * 3);
 169
 170         /* Read the input sample */
 171         x0 = *(ptr1++);
 172
 173         /* Perform the multiply-accumulate */
 174         acc0 += (q63_t) x1 *c0;
 175         acc1 += (q63_t) x0 *c0;
 176
 177
 178         /* Upsampling is done by stuffing L-1 zeros between each sample.
 179          * So instead of multiplying zeros with coefficients,
 180          * Increment the coefficient pointer by interpolation factor times. */
 181         ptr2 += 4 * S->L;
 182
 183         /* Decrement the loop counter */
 184         tapCnt--;
 185       }
 186
 187       /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
 188       tapCnt = phaseLen % 0x4u;
 189
 190       while(tapCnt > 0u)
 191       {
 192
 193         /* Read the input sample */
 194         x1 = *(ptr1++);
 195
 196         /* Read the coefficient */
 197         c0 = *(ptr2);
 198
 199         /* Perform the multiply-accumulate */
 200         acc0 += (q63_t) x0 *c0;
 201         acc1 += (q63_t) x1 *c0;
 202
 203         /* Increment the coefficient pointer by interpolation factor times. */
 204         ptr2 += S->L;
 205
 206         /* update states for next sample processing */
 207         x0 = x1;
 208
 209         /* Decrement the loop counter */
 210         tapCnt--;
 211       }
 212
 213       /* The result is in the accumulator, store in the destination buffer. */
 214       *pDst = (q31_t) (acc0 >> 31);
 215       *(pDst + S->L) = (q31_t) (acc1 >> 31);
 216
 217
 218       pDst++;
 219
 220       /* Increment the address modifier index of coefficient buffer */
 221       j++;
 222
 223       /* Decrement the loop counter */
 224       i--;
 225     }
 226
 227     /* Advance the state pointer by 1
 228      * to process the next group of interpolation factor number samples */
 229     pState = pState + 2;
 230
 231     pDst += S->L;
 232
 233     /* Decrement the loop counter */
 234     blkCnt--;
 235   }
 236
 237   /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
 238    ** No loop unrolling is used. */
 239   blkCnt = blkCntN2;
 240
 241   /* Loop over the blockSize. */
 242   while(blkCnt > 0u)
 243   {
 244     /* Copy new input sample into the state buffer */
 245     *pStateCurnt++ = *pSrc++;
 246
 247     /* Address modifier index of coefficient buffer */
 248     j = 1u;
 249
 250     /* Loop over the Interpolation factor. */
 251     i = S->L;
 252     while(i > 0u)
 253     {
 254       /* Set accumulator to zero */
 255       sum0 = 0;
 256
 257       /* Initialize state pointer */
 258       ptr1 = pState;
 259
 260       /* Initialize coefficient pointer */
 261       ptr2 = pCoeffs + (S->L - j);
 262
 263       /* Loop over the polyPhase length. Unroll by a factor of 4.
 264        ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
 265       tapCnt = phaseLen >> 2;
 266       while(tapCnt > 0u)
 267       {
 268
 269         /* Read the coefficient */
 270         c0 = *(ptr2);
 271
 272         /* Upsampling is done by stuffing L-1 zeros between each sample.
 273          * So instead of multiplying zeros with coefficients,
 274          * Increment the coefficient pointer by interpolation factor times. */
 275         ptr2 += S->L;
 276
 277         /* Read the input sample */
 278         x0 = *(ptr1++);
 279
 280         /* Perform the multiply-accumulate */
 281         sum0 += (q63_t) x0 *c0;
 282
 283         /* Read the coefficient */
 284         c0 = *(ptr2);
 285
 286         /* Increment the coefficient pointer by interpolation factor times. */
 287         ptr2 += S->L;
 288
 289         /* Read the input sample */
 290         x0 = *(ptr1++);
 291
 292         /* Perform the multiply-accumulate */
 293         sum0 += (q63_t) x0 *c0;
 294
 295         /* Read the coefficient */
 296         c0 = *(ptr2);
 297
 298         /* Increment the coefficient pointer by interpolation factor times. */
 299         ptr2 += S->L;
 300
 301         /* Read the input sample */
 302         x0 = *(ptr1++);
 303
 304         /* Perform the multiply-accumulate */
 305         sum0 += (q63_t) x0 *c0;
 306
 307         /* Read the coefficient */
 308         c0 = *(ptr2);
 309
 310         /* Increment the coefficient pointer by interpolation factor times. */
 311         ptr2 += S->L;
 312
 313         /* Read the input sample */
 314         x0 = *(ptr1++);
 315
 316         /* Perform the multiply-accumulate */
 317         sum0 += (q63_t) x0 *c0;
 318
 319         /* Decrement the loop counter */
 320         tapCnt--;
 321       }
 322
 323       /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
 324       tapCnt = phaseLen & 0x3u;
 325
 326       while(tapCnt > 0u)
 327       {
 328         /* Read the coefficient */
 329         c0 = *(ptr2);
 330
 331         /* Increment the coefficient pointer by interpolation factor times. */
 332         ptr2 += S->L;
 333
 334         /* Read the input sample */
 335         x0 = *(ptr1++);
 336
 337         /* Perform the multiply-accumulate */
 338         sum0 += (q63_t) x0 *c0;
 339
 340         /* Decrement the loop counter */
 341         tapCnt--;
 342       }
 343
 344       /* The result is in the accumulator, store in the destination buffer. */
 345       *pDst++ = (q31_t) (sum0 >> 31);
 346
 347       /* Increment the address modifier index of coefficient buffer */
 348       j++;
 349
 350       /* Decrement the loop counter */
 351       i--;
 352     }
 353
 354     /* Advance the state pointer by 1
 355      * to process the next group of interpolation factor number samples */
 356     pState = pState + 1;
 357
 358     /* Decrement the loop counter */
 359     blkCnt--;
 360   }
 361
 362   /* Processing is complete.
 363    ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
 364    ** This prepares the state buffer for the next function call. */
 365
 366   /* Points to the start of the state buffer */
 367   pStateCurnt = S->pState;
 368
 369   tapCnt = (phaseLen - 1u) >> 2u;
 370
 371   /* copy data */
 372   while(tapCnt > 0u)
 373   {
 374     *pStateCurnt++ = *pState++;
 375     *pStateCurnt++ = *pState++;
 376     *pStateCurnt++ = *pState++;
 377     *pStateCurnt++ = *pState++;
 378
 379     /* Decrement the loop counter */
 380     tapCnt--;
 381   }
 382
 383   tapCnt = (phaseLen - 1u) % 0x04u;
 384
 385   /* copy data */
 386   while(tapCnt > 0u)
 387   {
 388     *pStateCurnt++ = *pState++;
 389
 390     /* Decrement the loop counter */
 391     tapCnt--;
 392   }
 393
 394 }
 395
 396
 397 #else
 398
 399 void arm_fir_interpolate_q31(
 400   const arm_fir_interpolate_instance_q31 * S,
 401   q31_t * pSrc,
 402   q31_t * pDst,
 403   uint32_t blockSize)
 404 {
 405   q31_t *pState = S->pState;                     /* State pointer */
 406   q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */
 407   q31_t *pStateCurnt;                            /* Points to the current sample of the state */
 408   q31_t *ptr1, *ptr2;                            /* Temporary pointers for state and coefficient buffers */
 409
 410   /* Run the below code for Cortex-M0 */
 411
 412   q63_t sum;                                     /* Accumulator */
 413   q31_t x0, c0;                                  /* Temporary variables to hold state and coefficient values */
 414   uint32_t i, blkCnt;                            /* Loop counters */
 415   uint16_t phaseLen = S->phaseLength, tapCnt;    /* Length of each polyphase filter component */
 416
 417
 418   /* S->pState buffer contains previous frame (phaseLen - 1) samples */
 419   /* pStateCurnt points to the location where the new input data should be written */
 420   pStateCurnt = S->pState + ((q31_t) phaseLen - 1);
 421
 422   /* Total number of intput samples */
 423   blkCnt = blockSize;
 424
 425   /* Loop over the blockSize. */
 426   while(blkCnt > 0u)
 427   {
 428     /* Copy new input sample into the state buffer */
 429     *pStateCurnt++ = *pSrc++;
 430
 431     /* Loop over the Interpolation factor. */
 432     i = S->L;
 433
 434     while(i > 0u)
 435     {
 436       /* Set accumulator to zero */
 437       sum = 0;
 438
 439       /* Initialize state pointer */
 440       ptr1 = pState;
 441
 442       /* Initialize coefficient pointer */
 443       ptr2 = pCoeffs + (i - 1u);
 444
 445       tapCnt = phaseLen;
 446
 447       while(tapCnt > 0u)
 448       {
 449         /* Read the coefficient */
 450         c0 = *(ptr2);
 451
 452         /* Increment the coefficient pointer by interpolation factor times. */
 453         ptr2 += S->L;
 454
 455         /* Read the input sample */
 456         x0 = *ptr1++;
 457
 458         /* Perform the multiply-accumulate */
 459         sum += (q63_t) x0 *c0;
 460
 461         /* Decrement the loop counter */
 462         tapCnt--;
 463       }
 464
 465       /* The result is in the accumulator, store in the destination buffer. */
 466       *pDst++ = (q31_t) (sum >> 31);
 467
 468       /* Decrement the loop counter */
 469       i--;
 470     }
 471
 472     /* Advance the state pointer by 1
 473      * to process the next group of interpolation factor number samples */
 474     pState = pState + 1;
 475
 476     /* Decrement the loop counter */
 477     blkCnt--;
 478   }
 479
 480   /* Processing is complete.
 481    ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
 482    ** This prepares the state buffer for the next function call. */
 483
 484   /* Points to the start of the state buffer */
 485   pStateCurnt = S->pState;
 486
 487   tapCnt = phaseLen - 1u;
 488
 489   /* copy data */
 490   while(tapCnt > 0u)
 491   {
 492     *pStateCurnt++ = *pState++;
 493
 494     /* Decrement the loop counter */
 495     tapCnt--;
 496   }
 497
 498 }
 499
 500 #endif /*   #ifndef ARM_MATH_CM0_FAMILY */
 501
 502  /**
 503   * @} end of FIR_Interpolate group
 504   */