1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
4 * $Date: 17. January 2013
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_interpolate_f32.c
10 * Description: FIR interpolation for floating-point sequences.
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
44 * @defgroup FIR_Interpolate Finite Impulse Response (FIR) Interpolator
46 * These functions combine an upsampler (zero stuffer) and an FIR filter.
47 * They are used in multirate systems for increasing the sample rate of a signal without introducing high frequency images.
48 * Conceptually, the functions are equivalent to the block diagram below:
49 * \image html FIRInterpolator.gif "Components included in the FIR Interpolator functions"
50 * After upsampling by a factor of <code>L</code>, the signal should be filtered by a lowpass filter with a normalized
51 * cutoff frequency of <code>1/L</code> in order to eliminate high frequency copies of the spectrum.
52 * The user of the function is responsible for providing the filter coefficients.
54 * The FIR interpolator functions provided in the CMSIS DSP Library combine the upsampler and FIR filter in an efficient manner.
55 * The upsampler inserts <code>L-1</code> zeros between each sample.
56 * Instead of multiplying by these zero values, the FIR filter is designed to skip them.
57 * This leads to an efficient implementation without any wasted effort.
58 * The functions operate on blocks of input and output data.
59 * <code>pSrc</code> points to an array of <code>blockSize</code> input values and
60 * <code>pDst</code> points to an array of <code>blockSize*L</code> output values.
62 * The library provides separate functions for Q15, Q31, and floating-point data types.
65 * The functions use a polyphase filter structure:
67 * y[n] = b[0] * x[n] + b[L] * x[n-1] + ... + b[L*(phaseLength-1)] * x[n-phaseLength+1]
68 * y[n+1] = b[1] * x[n] + b[L+1] * x[n-1] + ... + b[L*(phaseLength-1)+1] * x[n-phaseLength+1]
70 * y[n+(L-1)] = b[L-1] * x[n] + b[2*L-1] * x[n-1] + ....+ b[L*(phaseLength-1)+(L-1)] * x[n-phaseLength+1]
72 * This approach is more efficient than straightforward upsample-then-filter algorithms.
73 * With this method the computation is reduced by a factor of <code>1/L</code> when compared to using a standard FIR filter.
75 * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.
76 * <code>numTaps</code> must be a multiple of the interpolation factor <code>L</code> and this is checked by the
77 * initialization functions.
78 * Internally, the function divides the FIR filter's impulse response into shorter filters of length
79 * <code>phaseLength=numTaps/L</code>.
80 * Coefficients are stored in time reversed order.
83 * {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
86 * <code>pState</code> points to a state array of size <code>blockSize + phaseLength - 1</code>.
87 * Samples in the state buffer are stored in the order:
90 * {x[n-phaseLength+1], x[n-phaseLength], x[n-phaseLength-1], x[n-phaseLength-2]....x[0], x[1], ..., x[blockSize-1]}
92 * The state variables are updated after each block of data is processed, the coefficients are untouched.
94 * \par Instance Structure
95 * The coefficients and state variables for a filter are stored together in an instance data structure.
96 * A separate instance structure must be defined for each filter.
97 * Coefficient arrays may be shared among several instances while state variable array should be allocated separately.
98 * There are separate instance structure declarations for each of the 3 supported data types.
100 * \par Initialization Functions
101 * There is also an associated initialization function for each data type.
102 * The initialization function performs the following operations:
103 * - Sets the values of the internal structure fields.
104 * - Zeros out the values in the state buffer.
105 * - Checks to make sure that the length of the filter is a multiple of the interpolation factor.
106 * To do this manually without calling the init function, assign the follow subfields of the instance structure:
107 * L (interpolation factor), pCoeffs, phaseLength (numTaps / L), pState. Also set all of the values in pState to zero.
110 * Use of the initialization function is optional.
111 * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
112 * To place an instance structure into a const data section, the instance structure must be manually initialized.
113 * The code below statically initializes each of the 3 different data type filter instance structures
115 * arm_fir_interpolate_instance_f32 S = {L, phaseLength, pCoeffs, pState};
116 * arm_fir_interpolate_instance_q31 S = {L, phaseLength, pCoeffs, pState};
117 * arm_fir_interpolate_instance_q15 S = {L, phaseLength, pCoeffs, pState};
119 * where <code>L</code> is the interpolation factor; <code>phaseLength=numTaps/L</code> is the
120 * length of each of the shorter FIR filters used internally,
121 * <code>pCoeffs</code> is the address of the coefficient buffer;
122 * <code>pState</code> is the address of the state buffer.
123 * Be sure to set the values in the state buffer to zeros when doing static initialization.
125 * \par Fixed-Point Behavior
126 * Care must be taken when using the fixed-point versions of the FIR interpolate filter functions.
127 * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
128 * Refer to the function specific documentation below for usage guidelines.
132 * @addtogroup FIR_Interpolate
137 * @brief Processing function for the floating-point FIR interpolator.
138 * @param[in] *S points to an instance of the floating-point FIR interpolator structure.
139 * @param[in] *pSrc points to the block of input data.
140 * @param[out] *pDst points to the block of output data.
141 * @param[in] blockSize number of input samples to process per call.
144 #ifndef ARM_MATH_CM0_FAMILY
146 /* Run the below code for Cortex-M4 and Cortex-M3 */
148 void arm_fir_interpolate_f32(
149 const arm_fir_interpolate_instance_f32
* S
,
154 float32_t
*pState
= S
->pState
; /* State pointer */
155 float32_t
*pCoeffs
= S
->pCoeffs
; /* Coefficient pointer */
156 float32_t
*pStateCurnt
; /* Points to the current sample of the state */
157 float32_t
*ptr1
, *ptr2
; /* Temporary pointers for state and coefficient buffers */
158 float32_t sum0
; /* Accumulators */
159 float32_t x0
, c0
; /* Temporary variables to hold state and coefficient values */
160 uint32_t i
, blkCnt
, j
; /* Loop counters */
161 uint16_t phaseLen
= S
->phaseLength
, tapCnt
; /* Length of each polyphase filter component */
162 float32_t acc0
, acc1
, acc2
, acc3
;
163 float32_t x1
, x2
, x3
;
165 float32_t c1
, c2
, c3
;
167 /* S->pState buffer contains previous frame (phaseLen - 1) samples */
168 /* pStateCurnt points to the location where the new input data should be written */
169 pStateCurnt
= S
->pState
+ (phaseLen
- 1u);
171 /* Initialise blkCnt */
172 blkCnt
= blockSize
/ 4;
173 blkCntN4
= blockSize
- (4 * blkCnt
);
175 /* Samples loop unrolled by 4 */
178 /* Copy new input sample into the state buffer */
179 *pStateCurnt
++ = *pSrc
++;
180 *pStateCurnt
++ = *pSrc
++;
181 *pStateCurnt
++ = *pSrc
++;
182 *pStateCurnt
++ = *pSrc
++;
184 /* Address modifier index of coefficient buffer */
187 /* Loop over the Interpolation factor. */
192 /* Set accumulator to zero */
198 /* Initialize state pointer */
201 /* Initialize coefficient pointer */
202 ptr2
= pCoeffs
+ (S
->L
- j
);
204 /* Loop over the polyPhase length. Unroll by a factor of 4.
205 ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
206 tapCnt
= phaseLen
>> 2u;
215 /* Read the input sample */
218 /* Read the coefficient */
221 /* Perform the multiply-accumulate */
227 /* Read the coefficient */
230 /* Read the input sample */
233 /* Perform the multiply-accumulate */
239 /* Read the coefficient */
240 c2
= *(ptr2
+ S
->L
* 2);
242 /* Read the input sample */
245 /* Perform the multiply-accumulate */
251 /* Read the coefficient */
252 c3
= *(ptr2
+ S
->L
* 3);
254 /* Read the input sample */
257 /* Perform the multiply-accumulate */
264 /* Upsampling is done by stuffing L-1 zeros between each sample.
265 * So instead of multiplying zeros with coefficients,
266 * Increment the coefficient pointer by interpolation factor times. */
269 /* Decrement the loop counter */
273 /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
274 tapCnt
= phaseLen
% 0x4u
;
279 /* Read the input sample */
282 /* Read the coefficient */
285 /* Perform the multiply-accumulate */
291 /* Increment the coefficient pointer by interpolation factor times. */
294 /* update states for next sample processing */
299 /* Decrement the loop counter */
303 /* The result is in the accumulator, store in the destination buffer. */
305 *(pDst
+ S
->L
) = acc1
;
306 *(pDst
+ 2 * S
->L
) = acc2
;
307 *(pDst
+ 3 * S
->L
) = acc3
;
311 /* Increment the address modifier index of coefficient buffer */
314 /* Decrement the loop counter */
318 /* Advance the state pointer by 1
319 * to process the next group of interpolation factor number samples */
324 /* Decrement the loop counter */
328 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
329 ** No loop unrolling is used. */
333 /* Copy new input sample into the state buffer */
334 *pStateCurnt
++ = *pSrc
++;
336 /* Address modifier index of coefficient buffer */
339 /* Loop over the Interpolation factor. */
343 /* Set accumulator to zero */
346 /* Initialize state pointer */
349 /* Initialize coefficient pointer */
350 ptr2
= pCoeffs
+ (S
->L
- j
);
352 /* Loop over the polyPhase length. Unroll by a factor of 4.
353 ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
354 tapCnt
= phaseLen
>> 2u;
358 /* Read the coefficient */
361 /* Upsampling is done by stuffing L-1 zeros between each sample.
362 * So instead of multiplying zeros with coefficients,
363 * Increment the coefficient pointer by interpolation factor times. */
366 /* Read the input sample */
369 /* Perform the multiply-accumulate */
372 /* Read the coefficient */
375 /* Increment the coefficient pointer by interpolation factor times. */
378 /* Read the input sample */
381 /* Perform the multiply-accumulate */
384 /* Read the coefficient */
387 /* Increment the coefficient pointer by interpolation factor times. */
390 /* Read the input sample */
393 /* Perform the multiply-accumulate */
396 /* Read the coefficient */
399 /* Increment the coefficient pointer by interpolation factor times. */
402 /* Read the input sample */
405 /* Perform the multiply-accumulate */
408 /* Decrement the loop counter */
412 /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
413 tapCnt
= phaseLen
% 0x4u
;
417 /* Perform the multiply-accumulate */
418 sum0
+= *(ptr1
++) * (*ptr2
);
420 /* Increment the coefficient pointer by interpolation factor times. */
423 /* Decrement the loop counter */
427 /* The result is in the accumulator, store in the destination buffer. */
430 /* Increment the address modifier index of coefficient buffer */
433 /* Decrement the loop counter */
437 /* Advance the state pointer by 1
438 * to process the next group of interpolation factor number samples */
441 /* Decrement the loop counter */
445 /* Processing is complete.
446 ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
447 ** This prepares the state buffer for the next function call. */
449 /* Points to the start of the state buffer */
450 pStateCurnt
= S
->pState
;
452 tapCnt
= (phaseLen
- 1u) >> 2u;
457 *pStateCurnt
++ = *pState
++;
458 *pStateCurnt
++ = *pState
++;
459 *pStateCurnt
++ = *pState
++;
460 *pStateCurnt
++ = *pState
++;
462 /* Decrement the loop counter */
466 tapCnt
= (phaseLen
- 1u) % 0x04u
;
471 *pStateCurnt
++ = *pState
++;
473 /* Decrement the loop counter */
480 /* Run the below code for Cortex-M0 */
482 void arm_fir_interpolate_f32(
483 const arm_fir_interpolate_instance_f32
* S
,
488 float32_t
*pState
= S
->pState
; /* State pointer */
489 float32_t
*pCoeffs
= S
->pCoeffs
; /* Coefficient pointer */
490 float32_t
*pStateCurnt
; /* Points to the current sample of the state */
491 float32_t
*ptr1
, *ptr2
; /* Temporary pointers for state and coefficient buffers */
494 float32_t sum
; /* Accumulator */
495 uint32_t i
, blkCnt
; /* Loop counters */
496 uint16_t phaseLen
= S
->phaseLength
, tapCnt
; /* Length of each polyphase filter component */
499 /* S->pState buffer contains previous frame (phaseLen - 1) samples */
500 /* pStateCurnt points to the location where the new input data should be written */
501 pStateCurnt
= S
->pState
+ (phaseLen
- 1u);
503 /* Total number of intput samples */
506 /* Loop over the blockSize. */
509 /* Copy new input sample into the state buffer */
510 *pStateCurnt
++ = *pSrc
++;
512 /* Loop over the Interpolation factor. */
517 /* Set accumulator to zero */
520 /* Initialize state pointer */
523 /* Initialize coefficient pointer */
524 ptr2
= pCoeffs
+ (i
- 1u);
526 /* Loop over the polyPhase length */
531 /* Perform the multiply-accumulate */
532 sum
+= *ptr1
++ * *ptr2
;
534 /* Increment the coefficient pointer by interpolation factor times. */
537 /* Decrement the loop counter */
541 /* The result is in the accumulator, store in the destination buffer. */
544 /* Decrement the loop counter */
548 /* Advance the state pointer by 1
549 * to process the next group of interpolation factor number samples */
552 /* Decrement the loop counter */
556 /* Processing is complete.
557 ** Now copy the last phaseLen - 1 samples to the start of the state buffer.
558 ** This prepares the state buffer for the next function call. */
560 /* Points to the start of the state buffer */
561 pStateCurnt
= S
->pState
;
563 tapCnt
= phaseLen
- 1u;
567 *pStateCurnt
++ = *pState
++;
569 /* Decrement the loop counter */
575 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
580 * @} end of FIR_Interpolate group