tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_fir_sparse_f32.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_fir_sparse_f32.c
   9 *
  10 * Description:  Floating-point sparse FIR filter processing function.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Redistribution and use in source and binary forms, with or without
  15 * modification, are permitted provided that the following conditions
  16 * are met:
  17 *   - Redistributions of source code must retain the above copyright
  18 *     notice, this list of conditions and the following disclaimer.
  19 *   - Redistributions in binary form must reproduce the above copyright
  20 *     notice, this list of conditions and the following disclaimer in
  21 *     the documentation and/or other materials provided with the
  22 *     distribution.
  23 *   - Neither the name of ARM LIMITED nor the names of its contributors
  24 *     may be used to endorse or promote products derived from this
  25 *     software without specific prior written permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 * POSSIBILITY OF SUCH DAMAGE.
  39 * ------------------------------------------------------------------- */
  40 #include "arm_math.h"
  41
  42 /**
  43  * @ingroup groupFilters
  44  */
  45
  46 /**
  47  * @defgroup FIR_Sparse Finite Impulse Response (FIR) Sparse Filters
  48  *
  49  * This group of functions implements sparse FIR filters.
  50  * Sparse FIR filters are equivalent to standard FIR filters except that most of the coefficients are equal to zero.
  51  * Sparse filters are used for simulating reflections in communications and audio applications.
  52  *
  53  * There are separate functions for Q7, Q15, Q31, and floating-point data types.
  54  * The functions operate on blocks  of input and output data and each call to the function processes
  55  * <code>blockSize</code> samples through the filter.  <code>pSrc</code> and
  56  * <code>pDst</code> points to input and output arrays respectively containing <code>blockSize</code> values.
  57  *
  58  * \par Algorithm:
  59  * The sparse filter instant structure contains an array of tap indices <code>pTapDelay</code> which specifies the locations of the non-zero coefficients.
  60  * This is in addition to the coefficient array <code>b</code>.
  61  * The implementation essentially skips the multiplications by zero and leads to an efficient realization.
  62  * <pre>
  63  *     y[n] = b[0] * x[n-pTapDelay[0]] + b[1] * x[n-pTapDelay[1]] + b[2] * x[n-pTapDelay[2]] + ...+ b[numTaps-1] * x[n-pTapDelay[numTaps-1]]
  64  * </pre>
  65  * \par
  66  * \image html FIRSparse.gif "Sparse FIR filter.  b[n] represents the filter coefficients"
  67  * \par
  68  * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>;
  69  * <code>pTapDelay</code> points to an array of nonzero indices and is also of size <code>numTaps</code>;
  70  * <code>pState</code> points to a state array of size <code>maxDelay + blockSize</code>, where
  71  * <code>maxDelay</code> is the largest offset value that is ever used in the <code>pTapDelay</code> array.
  72  * Some of the processing functions also require temporary working buffers.
  73  *
  74  * \par Instance Structure
  75  * The coefficients and state variables for a filter are stored together in an instance data structure.
  76  * A separate instance structure must be defined for each filter.
  77  * Coefficient and offset arrays may be shared among several instances while state variable arrays cannot be shared.
  78  * There are separate instance structure declarations for each of the 4 supported data types.
  79  *
  80  * \par Initialization Functions
  81  * There is also an associated initialization function for each data type.
  82  * The initialization function performs the following operations:
  83  * - Sets the values of the internal structure fields.
  84  * - Zeros out the values in the state buffer.
  85  * To do this manually without calling the init function, assign the follow subfields of the instance structure:
  86  * numTaps, pCoeffs, pTapDelay, maxDelay, stateIndex, pState. Also set all of the values in pState to zero.
  87  *
  88  * \par
  89  * Use of the initialization function is optional.
  90  * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
  91  * To place an instance structure into a const data section, the instance structure must be manually initialized.
  92  * Set the values in the state buffer to zeros before static initialization.
  93  * The code below statically initializes each of the 4 different data type filter instance structures
  94  * <pre>
  95  *arm_fir_sparse_instance_f32 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
  96  *arm_fir_sparse_instance_q31 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
  97  *arm_fir_sparse_instance_q15 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
  98  *arm_fir_sparse_instance_q7 S =  {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
  99  * </pre>
 100  * \par
 101  *
 102  * \par Fixed-Point Behavior
 103  * Care must be taken when using the fixed-point versions of the sparse FIR filter functions.
 104  * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
 105  * Refer to the function specific documentation below for usage guidelines.
 106  */
 107
 108 /**
 109  * @addtogroup FIR_Sparse
 110  * @{
 111  */
 112
 113 /**
 114  * @brief Processing function for the floating-point sparse FIR filter.
 115  * @param[in]  *S          points to an instance of the floating-point sparse FIR structure.
 116  * @param[in]  *pSrc       points to the block of input data.
 117  * @param[out] *pDst       points to the block of output data
 118  * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
 119  * @param[in]  blockSize   number of input samples to process per call.
 120  * @return none.
 121  */
 122
 123 void arm_fir_sparse_f32(
 124   arm_fir_sparse_instance_f32 * S,
 125   float32_t * pSrc,
 126   float32_t * pDst,
 127   float32_t * pScratchIn,
 128   uint32_t blockSize)
 129 {
 130
 131   float32_t *pState = S->pState;                 /* State pointer */
 132   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
 133   float32_t *px;                                 /* Scratch buffer pointer */
 134   float32_t *py = pState;                        /* Temporary pointers for state buffer */
 135   float32_t *pb = pScratchIn;                    /* Temporary pointers for scratch buffer */
 136   float32_t *pOut;                               /* Destination pointer */
 137   int32_t *pTapDelay = S->pTapDelay;             /* Pointer to the array containing offset of the non-zero tap values. */
 138   uint32_t delaySize = S->maxDelay + blockSize;  /* state length */
 139   uint16_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter  */
 140   int32_t readIndex;                             /* Read index of the state buffer */
 141   uint32_t tapCnt, blkCnt;                       /* loop counters */
 142   float32_t coeff = *pCoeffs++;                  /* Read the first coefficient value */
 143
 144
 145
 146   /* BlockSize of Input samples are copied into the state buffer */
 147   /* StateIndex points to the starting position to write in the state buffer */
 148   arm_circularWrite_f32((int32_t *) py, delaySize, &S->stateIndex, 1,
 149                         (int32_t *) pSrc, 1, blockSize);
 150
 151
 152   /* Read Index, from where the state buffer should be read, is calculated. */
 153   readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
 154
 155   /* Wraparound of readIndex */
 156   if(readIndex < 0)
 157   {
 158     readIndex += (int32_t) delaySize;
 159   }
 160
 161   /* Working pointer for state buffer is updated */
 162   py = pState;
 163
 164   /* blockSize samples are read from the state buffer */
 165   arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
 166                        (int32_t *) pb, (int32_t *) pb, blockSize, 1,
 167                        blockSize);
 168
 169   /* Working pointer for the scratch buffer */
 170   px = pb;
 171
 172   /* Working pointer for destination buffer */
 173   pOut = pDst;
 174
 175
 176 #ifndef ARM_MATH_CM0_FAMILY
 177
 178   /* Run the below code for Cortex-M4 and Cortex-M3 */
 179
 180   /* Loop over the blockSize. Unroll by a factor of 4.
 181    * Compute 4 Multiplications at a time. */
 182   blkCnt = blockSize >> 2u;
 183
 184   while(blkCnt > 0u)
 185   {
 186     /* Perform Multiplications and store in destination buffer */
 187     *pOut++ = *px++ * coeff;
 188     *pOut++ = *px++ * coeff;
 189     *pOut++ = *px++ * coeff;
 190     *pOut++ = *px++ * coeff;
 191
 192     /* Decrement the loop counter */
 193     blkCnt--;
 194   }
 195
 196   /* If the blockSize is not a multiple of 4,
 197    * compute the remaining samples */
 198   blkCnt = blockSize % 0x4u;
 199
 200   while(blkCnt > 0u)
 201   {
 202     /* Perform Multiplications and store in destination buffer */
 203     *pOut++ = *px++ * coeff;
 204
 205     /* Decrement the loop counter */
 206     blkCnt--;
 207   }
 208
 209   /* Load the coefficient value and
 210    * increment the coefficient buffer for the next set of state values */
 211   coeff = *pCoeffs++;
 212
 213   /* Read Index, from where the state buffer should be read, is calculated. */
 214   readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
 215
 216   /* Wraparound of readIndex */
 217   if(readIndex < 0)
 218   {
 219     readIndex += (int32_t) delaySize;
 220   }
 221
 222   /* Loop over the number of taps. */
 223   tapCnt = (uint32_t) numTaps - 1u;
 224
 225   while(tapCnt > 0u)
 226   {
 227
 228     /* Working pointer for state buffer is updated */
 229     py = pState;
 230
 231     /* blockSize samples are read from the state buffer */
 232     arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
 233                          (int32_t *) pb, (int32_t *) pb, blockSize, 1,
 234                          blockSize);
 235
 236     /* Working pointer for the scratch buffer */
 237     px = pb;
 238
 239     /* Working pointer for destination buffer */
 240     pOut = pDst;
 241
 242     /* Loop over the blockSize. Unroll by a factor of 4.
 243      * Compute 4 MACS at a time. */
 244     blkCnt = blockSize >> 2u;
 245
 246     while(blkCnt > 0u)
 247     {
 248       /* Perform Multiply-Accumulate */
 249       *pOut++ += *px++ * coeff;
 250       *pOut++ += *px++ * coeff;
 251       *pOut++ += *px++ * coeff;
 252       *pOut++ += *px++ * coeff;
 253
 254       /* Decrement the loop counter */
 255       blkCnt--;
 256     }
 257
 258     /* If the blockSize is not a multiple of 4,
 259      * compute the remaining samples */
 260     blkCnt = blockSize % 0x4u;
 261
 262     while(blkCnt > 0u)
 263     {
 264       /* Perform Multiply-Accumulate */
 265       *pOut++ += *px++ * coeff;
 266
 267       /* Decrement the loop counter */
 268       blkCnt--;
 269     }
 270
 271     /* Load the coefficient value and
 272      * increment the coefficient buffer for the next set of state values */
 273     coeff = *pCoeffs++;
 274
 275     /* Read Index, from where the state buffer should be read, is calculated. */
 276     readIndex = ((int32_t) S->stateIndex -
 277                  (int32_t) blockSize) - *pTapDelay++;
 278
 279     /* Wraparound of readIndex */
 280     if(readIndex < 0)
 281     {
 282       readIndex += (int32_t) delaySize;
 283     }
 284
 285     /* Decrement the tap loop counter */
 286     tapCnt--;
 287   }
 288
 289 #else
 290
 291 /* Run the below code for Cortex-M0 */
 292
 293   blkCnt = blockSize;
 294
 295   while(blkCnt > 0u)
 296   {
 297     /* Perform Multiplications and store in destination buffer */
 298     *pOut++ = *px++ * coeff;
 299
 300     /* Decrement the loop counter */
 301     blkCnt--;
 302   }
 303
 304   /* Load the coefficient value and
 305    * increment the coefficient buffer for the next set of state values */
 306   coeff = *pCoeffs++;
 307
 308   /* Read Index, from where the state buffer should be read, is calculated. */
 309   readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
 310
 311   /* Wraparound of readIndex */
 312   if(readIndex < 0)
 313   {
 314     readIndex += (int32_t) delaySize;
 315   }
 316
 317   /* Loop over the number of taps. */
 318   tapCnt = (uint32_t) numTaps - 1u;
 319
 320   while(tapCnt > 0u)
 321   {
 322
 323     /* Working pointer for state buffer is updated */
 324     py = pState;
 325
 326     /* blockSize samples are read from the state buffer */
 327     arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
 328                          (int32_t *) pb, (int32_t *) pb, blockSize, 1,
 329                          blockSize);
 330
 331     /* Working pointer for the scratch buffer */
 332     px = pb;
 333
 334     /* Working pointer for destination buffer */
 335     pOut = pDst;
 336
 337     blkCnt = blockSize;
 338
 339     while(blkCnt > 0u)
 340     {
 341       /* Perform Multiply-Accumulate */
 342       *pOut++ += *px++ * coeff;
 343
 344       /* Decrement the loop counter */
 345       blkCnt--;
 346     }
 347
 348     /* Load the coefficient value and
 349      * increment the coefficient buffer for the next set of state values */
 350     coeff = *pCoeffs++;
 351
 352     /* Read Index, from where the state buffer should be read, is calculated. */
 353     readIndex =
 354       ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
 355
 356     /* Wraparound of readIndex */
 357     if(readIndex < 0)
 358     {
 359       readIndex += (int32_t) delaySize;
 360     }
 361
 362     /* Decrement the tap loop counter */
 363     tapCnt--;
 364   }
 365
 366 #endif /*   #ifndef ARM_MATH_CM0_FAMILY        */
 367
 368 }
 369
 370 /**
 371  * @} end of FIR_Sparse group
 372  */