tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_fir_decimate_f32.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_fir_decimate_f32.c
   9 *
  10 * Description:  FIR decimation for floating-point sequences.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Redistribution and use in source and binary forms, with or without
  15 * modification, are permitted provided that the following conditions
  16 * are met:
  17 *   - Redistributions of source code must retain the above copyright
  18 *     notice, this list of conditions and the following disclaimer.
  19 *   - Redistributions in binary form must reproduce the above copyright
  20 *     notice, this list of conditions and the following disclaimer in
  21 *     the documentation and/or other materials provided with the
  22 *     distribution.
  23 *   - Neither the name of ARM LIMITED nor the names of its contributors
  24 *     may be used to endorse or promote products derived from this
  25 *     software without specific prior written permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 * POSSIBILITY OF SUCH DAMAGE.
  39 * -------------------------------------------------------------------- */
  40
  41 #include "arm_math.h"
  42
  43 /**
  44  * @ingroup groupFilters
  45  */
  46
  47 /**
  48  * @defgroup FIR_decimate Finite Impulse Response (FIR) Decimator
  49  *
  50  * These functions combine an FIR filter together with a decimator.
  51  * They are used in multirate systems for reducing the sample rate of a signal without introducing aliasing distortion.
  52  * Conceptually, the functions are equivalent to the block diagram below:
  53  * \image html FIRDecimator.gif "Components included in the FIR Decimator functions"
  54  * When decimating by a factor of <code>M</code>, the signal should be prefiltered by a lowpass filter with a normalized
  55  * cutoff frequency of <code>1/M</code> in order to prevent aliasing distortion.
  56  * The user of the function is responsible for providing the filter coefficients.
  57  *
  58  * The FIR decimator functions provided in the CMSIS DSP Library combine the FIR filter and the decimator in an efficient manner.
  59  * Instead of calculating all of the FIR filter outputs and discarding <code>M-1</code> out of every <code>M</code>, only the
  60  * samples output by the decimator are computed.
  61  * The functions operate on blocks of input and output data.
  62  * <code>pSrc</code> points to an array of <code>blockSize</code> input values and
  63  * <code>pDst</code> points to an array of <code>blockSize/M</code> output values.
  64  * In order to have an integer number of output samples <code>blockSize</code>
  65  * must always be a multiple of the decimation factor <code>M</code>.
  66  *
  67  * The library provides separate functions for Q15, Q31 and floating-point data types.
  68  *
  69  * \par Algorithm:
  70  * The FIR portion of the algorithm uses the standard form filter:
  71  * <pre>
  72  *    y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1]
  73  * </pre>
  74  * where, <code>b[n]</code> are the filter coefficients.
  75  * \par
  76  * The <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.
  77  * Coefficients are stored in time reversed order.
  78  * \par
  79  * <pre>
  80  *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
  81  * </pre>
  82  * \par
  83  * <code>pState</code> points to a state array of size <code>numTaps + blockSize - 1</code>.
  84  * Samples in the state buffer are stored in the order:
  85  * \par
  86  * <pre>
  87  *    {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[0], x[1], ..., x[blockSize-1]}
  88  * </pre>
  89  * The state variables are updated after each block of data is processed, the coefficients are untouched.
  90  *
  91  * \par Instance Structure
  92  * The coefficients and state variables for a filter are stored together in an instance data structure.
  93  * A separate instance structure must be defined for each filter.
  94  * Coefficient arrays may be shared among several instances while state variable array should be allocated separately.
  95  * There are separate instance structure declarations for each of the 3 supported data types.
  96  *
  97  * \par Initialization Functions
  98  * There is also an associated initialization function for each data type.
  99  * The initialization function performs the following operations:
 100  * - Sets the values of the internal structure fields.
 101  * - Zeros out the values in the state buffer.
 102  * - Checks to make sure that the size of the input is a multiple of the decimation factor.
 103  * To do this manually without calling the init function, assign the follow subfields of the instance structure:
 104  * numTaps, pCoeffs, M (decimation factor), pState. Also set all of the values in pState to zero.
 105  *
 106  * \par
 107  * Use of the initialization function is optional.
 108  * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
 109  * To place an instance structure into a const data section, the instance structure must be manually initialized.
 110  * The code below statically initializes each of the 3 different data type filter instance structures
 111  * <pre>
 112  *arm_fir_decimate_instance_f32 S = {M, numTaps, pCoeffs, pState};
 113  *arm_fir_decimate_instance_q31 S = {M, numTaps, pCoeffs, pState};
 114  *arm_fir_decimate_instance_q15 S = {M, numTaps, pCoeffs, pState};
 115  * </pre>
 116  * where <code>M</code> is the decimation factor; <code>numTaps</code> is the number of filter coefficients in the filter;
 117  * <code>pCoeffs</code> is the address of the coefficient buffer;
 118  * <code>pState</code> is the address of the state buffer.
 119  * Be sure to set the values in the state buffer to zeros when doing static initialization.
 120  *
 121  * \par Fixed-Point Behavior
 122  * Care must be taken when using the fixed-point versions of the FIR decimate filter functions.
 123  * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
 124  * Refer to the function specific documentation below for usage guidelines.
 125  */
 126
 127 /**
 128  * @addtogroup FIR_decimate
 129  * @{
 130  */
 131
 132   /**
 133    * @brief Processing function for the floating-point FIR decimator.
 134    * @param[in] *S        points to an instance of the floating-point FIR decimator structure.
 135    * @param[in] *pSrc     points to the block of input data.
 136    * @param[out] *pDst    points to the block of output data.
 137    * @param[in] blockSize number of input samples to process per call.
 138    * @return none.
 139    */
 140
 141 void arm_fir_decimate_f32(
 142   const arm_fir_decimate_instance_f32 * S,
 143   float32_t * pSrc,
 144   float32_t * pDst,
 145   uint32_t blockSize)
 146 {
 147   float32_t *pState = S->pState;                 /* State pointer */
 148   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
 149   float32_t *pStateCurnt;                        /* Points to the current sample of the state */
 150   float32_t *px, *pb;                            /* Temporary pointers for state and coefficient buffers */
 151   float32_t sum0;                                /* Accumulator */
 152   float32_t x0, c0;                              /* Temporary variables to hold state and coefficient values */
 153   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
 154   uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M;  /* Loop counters */
 155
 156 #ifndef ARM_MATH_CM0_FAMILY
 157
 158   uint32_t blkCntN4;
 159   float32_t *px0, *px1, *px2, *px3;
 160   float32_t acc0, acc1, acc2, acc3;
 161   float32_t x1, x2, x3;
 162
 163   /* Run the below code for Cortex-M4 and Cortex-M3 */
 164
 165   /* S->pState buffer contains previous frame (numTaps - 1) samples */
 166   /* pStateCurnt points to the location where the new input data should be written */
 167   pStateCurnt = S->pState + (numTaps - 1u);
 168
 169   /* Total number of output samples to be computed */
 170   blkCnt = outBlockSize / 4;
 171   blkCntN4 = outBlockSize - (4 * blkCnt);
 172
 173   while(blkCnt > 0u)
 174   {
 175     /* Copy 4 * decimation factor number of new input samples into the state buffer */
 176     i = 4 * S->M;
 177
 178     do
 179     {
 180       *pStateCurnt++ = *pSrc++;
 181
 182     } while(--i);
 183
 184     /* Set accumulators to zero */
 185     acc0 = 0.0f;
 186     acc1 = 0.0f;
 187     acc2 = 0.0f;
 188     acc3 = 0.0f;
 189
 190     /* Initialize state pointer for all the samples */
 191     px0 = pState;
 192     px1 = pState + S->M;
 193     px2 = pState + 2 * S->M;
 194     px3 = pState + 3 * S->M;
 195
 196     /* Initialize coeff pointer */
 197     pb = pCoeffs;
 198
 199     /* Loop unrolling.  Process 4 taps at a time. */
 200     tapCnt = numTaps >> 2;
 201
 202     /* Loop over the number of taps.  Unroll by a factor of 4.
 203      ** Repeat until we've computed numTaps-4 coefficients. */
 204
 205     while(tapCnt > 0u)
 206     {
 207       /* Read the b[numTaps-1] coefficient */
 208       c0 = *(pb++);
 209
 210       /* Read x[n-numTaps-1] sample for acc0 */
 211       x0 = *(px0++);
 212       /* Read x[n-numTaps-1] sample for acc1 */
 213       x1 = *(px1++);
 214       /* Read x[n-numTaps-1] sample for acc2 */
 215       x2 = *(px2++);
 216       /* Read x[n-numTaps-1] sample for acc3 */
 217       x3 = *(px3++);
 218
 219       /* Perform the multiply-accumulate */
 220       acc0 += x0 * c0;
 221       acc1 += x1 * c0;
 222       acc2 += x2 * c0;
 223       acc3 += x3 * c0;
 224
 225       /* Read the b[numTaps-2] coefficient */
 226       c0 = *(pb++);
 227
 228       /* Read x[n-numTaps-2] sample for acc0, acc1, acc2, acc3 */
 229       x0 = *(px0++);
 230       x1 = *(px1++);
 231       x2 = *(px2++);
 232       x3 = *(px3++);
 233
 234       /* Perform the multiply-accumulate */
 235       acc0 += x0 * c0;
 236       acc1 += x1 * c0;
 237       acc2 += x2 * c0;
 238       acc3 += x3 * c0;
 239
 240       /* Read the b[numTaps-3] coefficient */
 241       c0 = *(pb++);
 242
 243       /* Read x[n-numTaps-3] sample acc0, acc1, acc2, acc3 */
 244       x0 = *(px0++);
 245       x1 = *(px1++);
 246       x2 = *(px2++);
 247       x3 = *(px3++);
 248
 249       /* Perform the multiply-accumulate */
 250       acc0 += x0 * c0;
 251       acc1 += x1 * c0;
 252       acc2 += x2 * c0;
 253       acc3 += x3 * c0;
 254
 255       /* Read the b[numTaps-4] coefficient */
 256       c0 = *(pb++);
 257
 258       /* Read x[n-numTaps-4] sample acc0, acc1, acc2, acc3 */
 259       x0 = *(px0++);
 260       x1 = *(px1++);
 261       x2 = *(px2++);
 262       x3 = *(px3++);
 263
 264       /* Perform the multiply-accumulate */
 265       acc0 += x0 * c0;
 266       acc1 += x1 * c0;
 267       acc2 += x2 * c0;
 268       acc3 += x3 * c0;
 269
 270       /* Decrement the loop counter */
 271       tapCnt--;
 272     }
 273
 274     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 275     tapCnt = numTaps % 0x4u;
 276
 277     while(tapCnt > 0u)
 278     {
 279       /* Read coefficients */
 280       c0 = *(pb++);
 281
 282       /* Fetch  state variables for acc0, acc1, acc2, acc3 */
 283       x0 = *(px0++);
 284       x1 = *(px1++);
 285       x2 = *(px2++);
 286       x3 = *(px3++);
 287
 288       /* Perform the multiply-accumulate */
 289       acc0 += x0 * c0;
 290       acc1 += x1 * c0;
 291       acc2 += x2 * c0;
 292       acc3 += x3 * c0;
 293
 294       /* Decrement the loop counter */
 295       tapCnt--;
 296     }
 297
 298     /* Advance the state pointer by the decimation factor
 299      * to process the next group of decimation factor number samples */
 300     pState = pState + 4 * S->M;
 301
 302     /* The result is in the accumulator, store in the destination buffer. */
 303     *pDst++ = acc0;
 304     *pDst++ = acc1;
 305     *pDst++ = acc2;
 306     *pDst++ = acc3;
 307
 308     /* Decrement the loop counter */
 309     blkCnt--;
 310   }
 311
 312   while(blkCntN4 > 0u)
 313   {
 314     /* Copy decimation factor number of new input samples into the state buffer */
 315     i = S->M;
 316
 317     do
 318     {
 319       *pStateCurnt++ = *pSrc++;
 320
 321     } while(--i);
 322
 323     /* Set accumulator to zero */
 324     sum0 = 0.0f;
 325
 326     /* Initialize state pointer */
 327     px = pState;
 328
 329     /* Initialize coeff pointer */
 330     pb = pCoeffs;
 331
 332     /* Loop unrolling.  Process 4 taps at a time. */
 333     tapCnt = numTaps >> 2;
 334
 335     /* Loop over the number of taps.  Unroll by a factor of 4.
 336      ** Repeat until we've computed numTaps-4 coefficients. */
 337     while(tapCnt > 0u)
 338     {
 339       /* Read the b[numTaps-1] coefficient */
 340       c0 = *(pb++);
 341
 342       /* Read x[n-numTaps-1] sample */
 343       x0 = *(px++);
 344
 345       /* Perform the multiply-accumulate */
 346       sum0 += x0 * c0;
 347
 348       /* Read the b[numTaps-2] coefficient */
 349       c0 = *(pb++);
 350
 351       /* Read x[n-numTaps-2] sample */
 352       x0 = *(px++);
 353
 354       /* Perform the multiply-accumulate */
 355       sum0 += x0 * c0;
 356
 357       /* Read the b[numTaps-3] coefficient */
 358       c0 = *(pb++);
 359
 360       /* Read x[n-numTaps-3] sample */
 361       x0 = *(px++);
 362
 363       /* Perform the multiply-accumulate */
 364       sum0 += x0 * c0;
 365
 366       /* Read the b[numTaps-4] coefficient */
 367       c0 = *(pb++);
 368
 369       /* Read x[n-numTaps-4] sample */
 370       x0 = *(px++);
 371
 372       /* Perform the multiply-accumulate */
 373       sum0 += x0 * c0;
 374
 375       /* Decrement the loop counter */
 376       tapCnt--;
 377     }
 378
 379     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 380     tapCnt = numTaps % 0x4u;
 381
 382     while(tapCnt > 0u)
 383     {
 384       /* Read coefficients */
 385       c0 = *(pb++);
 386
 387       /* Fetch 1 state variable */
 388       x0 = *(px++);
 389
 390       /* Perform the multiply-accumulate */
 391       sum0 += x0 * c0;
 392
 393       /* Decrement the loop counter */
 394       tapCnt--;
 395     }
 396
 397     /* Advance the state pointer by the decimation factor
 398      * to process the next group of decimation factor number samples */
 399     pState = pState + S->M;
 400
 401     /* The result is in the accumulator, store in the destination buffer. */
 402     *pDst++ = sum0;
 403
 404     /* Decrement the loop counter */
 405     blkCntN4--;
 406   }
 407
 408   /* Processing is complete.
 409    ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
 410    ** This prepares the state buffer for the next function call. */
 411
 412   /* Points to the start of the state buffer */
 413   pStateCurnt = S->pState;
 414
 415   i = (numTaps - 1u) >> 2;
 416
 417   /* copy data */
 418   while(i > 0u)
 419   {
 420     *pStateCurnt++ = *pState++;
 421     *pStateCurnt++ = *pState++;
 422     *pStateCurnt++ = *pState++;
 423     *pStateCurnt++ = *pState++;
 424
 425     /* Decrement the loop counter */
 426     i--;
 427   }
 428
 429   i = (numTaps - 1u) % 0x04u;
 430
 431   /* copy data */
 432   while(i > 0u)
 433   {
 434     *pStateCurnt++ = *pState++;
 435
 436     /* Decrement the loop counter */
 437     i--;
 438   }
 439
 440 #else
 441
 442 /* Run the below code for Cortex-M0 */
 443
 444   /* S->pState buffer contains previous frame (numTaps - 1) samples */
 445   /* pStateCurnt points to the location where the new input data should be written */
 446   pStateCurnt = S->pState + (numTaps - 1u);
 447
 448   /* Total number of output samples to be computed */
 449   blkCnt = outBlockSize;
 450
 451   while(blkCnt > 0u)
 452   {
 453     /* Copy decimation factor number of new input samples into the state buffer */
 454     i = S->M;
 455
 456     do
 457     {
 458       *pStateCurnt++ = *pSrc++;
 459
 460     } while(--i);
 461
 462     /* Set accumulator to zero */
 463     sum0 = 0.0f;
 464
 465     /* Initialize state pointer */
 466     px = pState;
 467
 468     /* Initialize coeff pointer */
 469     pb = pCoeffs;
 470
 471     tapCnt = numTaps;
 472
 473     while(tapCnt > 0u)
 474     {
 475       /* Read coefficients */
 476       c0 = *pb++;
 477
 478       /* Fetch 1 state variable */
 479       x0 = *px++;
 480
 481       /* Perform the multiply-accumulate */
 482       sum0 += x0 * c0;
 483
 484       /* Decrement the loop counter */
 485       tapCnt--;
 486     }
 487
 488     /* Advance the state pointer by the decimation factor
 489      * to process the next group of decimation factor number samples */
 490     pState = pState + S->M;
 491
 492     /* The result is in the accumulator, store in the destination buffer. */
 493     *pDst++ = sum0;
 494
 495     /* Decrement the loop counter */
 496     blkCnt--;
 497   }
 498
 499   /* Processing is complete.
 500    ** Now copy the last numTaps - 1 samples to the start of the state buffer.
 501    ** This prepares the state buffer for the next function call. */
 502
 503   /* Points to the start of the state buffer */
 504   pStateCurnt = S->pState;
 505
 506   /* Copy numTaps number of values */
 507   i = (numTaps - 1u);
 508
 509   /* copy data */
 510   while(i > 0u)
 511   {
 512     *pStateCurnt++ = *pState++;
 513
 514     /* Decrement the loop counter */
 515     i--;
 516   }
 517
 518 #endif /*   #ifndef ARM_MATH_CM0_FAMILY        */
 519
 520 }
 521
 522 /**
 523  * @} end of FIR_decimate group
 524  */