]> git.gir.st - tmk_keyboard.git/blob - tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_fir_sparse_f32.c
Merge commit '1fe4406f374291ab2e86e95a97341fd9c475fcb8'
[tmk_keyboard.git] / tmk_core / tool / mbed / mbed-sdk / libraries / dsp / cmsis_dsp / FilteringFunctions / arm_fir_sparse_f32.c
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
3 *
4 * $Date: 17. January 2013
5 * $Revision: V1.4.1
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_sparse_f32.c
9 *
10 * Description: Floating-point sparse FIR filter processing function.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * ------------------------------------------------------------------- */
40 #include "arm_math.h"
41
42 /**
43 * @ingroup groupFilters
44 */
45
46 /**
47 * @defgroup FIR_Sparse Finite Impulse Response (FIR) Sparse Filters
48 *
49 * This group of functions implements sparse FIR filters.
50 * Sparse FIR filters are equivalent to standard FIR filters except that most of the coefficients are equal to zero.
51 * Sparse filters are used for simulating reflections in communications and audio applications.
52 *
53 * There are separate functions for Q7, Q15, Q31, and floating-point data types.
54 * The functions operate on blocks of input and output data and each call to the function processes
55 * <code>blockSize</code> samples through the filter. <code>pSrc</code> and
56 * <code>pDst</code> points to input and output arrays respectively containing <code>blockSize</code> values.
57 *
58 * \par Algorithm:
59 * The sparse filter instant structure contains an array of tap indices <code>pTapDelay</code> which specifies the locations of the non-zero coefficients.
60 * This is in addition to the coefficient array <code>b</code>.
61 * The implementation essentially skips the multiplications by zero and leads to an efficient realization.
62 * <pre>
63 * y[n] = b[0] * x[n-pTapDelay[0]] + b[1] * x[n-pTapDelay[1]] + b[2] * x[n-pTapDelay[2]] + ...+ b[numTaps-1] * x[n-pTapDelay[numTaps-1]]
64 * </pre>
65 * \par
66 * \image html FIRSparse.gif "Sparse FIR filter. b[n] represents the filter coefficients"
67 * \par
68 * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>;
69 * <code>pTapDelay</code> points to an array of nonzero indices and is also of size <code>numTaps</code>;
70 * <code>pState</code> points to a state array of size <code>maxDelay + blockSize</code>, where
71 * <code>maxDelay</code> is the largest offset value that is ever used in the <code>pTapDelay</code> array.
72 * Some of the processing functions also require temporary working buffers.
73 *
74 * \par Instance Structure
75 * The coefficients and state variables for a filter are stored together in an instance data structure.
76 * A separate instance structure must be defined for each filter.
77 * Coefficient and offset arrays may be shared among several instances while state variable arrays cannot be shared.
78 * There are separate instance structure declarations for each of the 4 supported data types.
79 *
80 * \par Initialization Functions
81 * There is also an associated initialization function for each data type.
82 * The initialization function performs the following operations:
83 * - Sets the values of the internal structure fields.
84 * - Zeros out the values in the state buffer.
85 * To do this manually without calling the init function, assign the follow subfields of the instance structure:
86 * numTaps, pCoeffs, pTapDelay, maxDelay, stateIndex, pState. Also set all of the values in pState to zero.
87 *
88 * \par
89 * Use of the initialization function is optional.
90 * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
91 * To place an instance structure into a const data section, the instance structure must be manually initialized.
92 * Set the values in the state buffer to zeros before static initialization.
93 * The code below statically initializes each of the 4 different data type filter instance structures
94 * <pre>
95 *arm_fir_sparse_instance_f32 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
96 *arm_fir_sparse_instance_q31 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
97 *arm_fir_sparse_instance_q15 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
98 *arm_fir_sparse_instance_q7 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};
99 * </pre>
100 * \par
101 *
102 * \par Fixed-Point Behavior
103 * Care must be taken when using the fixed-point versions of the sparse FIR filter functions.
104 * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
105 * Refer to the function specific documentation below for usage guidelines.
106 */
107
108 /**
109 * @addtogroup FIR_Sparse
110 * @{
111 */
112
113 /**
114 * @brief Processing function for the floating-point sparse FIR filter.
115 * @param[in] *S points to an instance of the floating-point sparse FIR structure.
116 * @param[in] *pSrc points to the block of input data.
117 * @param[out] *pDst points to the block of output data
118 * @param[in] *pScratchIn points to a temporary buffer of size blockSize.
119 * @param[in] blockSize number of input samples to process per call.
120 * @return none.
121 */
122
123 void arm_fir_sparse_f32(
124 arm_fir_sparse_instance_f32 * S,
125 float32_t * pSrc,
126 float32_t * pDst,
127 float32_t * pScratchIn,
128 uint32_t blockSize)
129 {
130
131 float32_t *pState = S->pState; /* State pointer */
132 float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
133 float32_t *px; /* Scratch buffer pointer */
134 float32_t *py = pState; /* Temporary pointers for state buffer */
135 float32_t *pb = pScratchIn; /* Temporary pointers for scratch buffer */
136 float32_t *pOut; /* Destination pointer */
137 int32_t *pTapDelay = S->pTapDelay; /* Pointer to the array containing offset of the non-zero tap values. */
138 uint32_t delaySize = S->maxDelay + blockSize; /* state length */
139 uint16_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
140 int32_t readIndex; /* Read index of the state buffer */
141 uint32_t tapCnt, blkCnt; /* loop counters */
142 float32_t coeff = *pCoeffs++; /* Read the first coefficient value */
143
144
145
146 /* BlockSize of Input samples are copied into the state buffer */
147 /* StateIndex points to the starting position to write in the state buffer */
148 arm_circularWrite_f32((int32_t *) py, delaySize, &S->stateIndex, 1,
149 (int32_t *) pSrc, 1, blockSize);
150
151
152 /* Read Index, from where the state buffer should be read, is calculated. */
153 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
154
155 /* Wraparound of readIndex */
156 if(readIndex < 0)
157 {
158 readIndex += (int32_t) delaySize;
159 }
160
161 /* Working pointer for state buffer is updated */
162 py = pState;
163
164 /* blockSize samples are read from the state buffer */
165 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
166 (int32_t *) pb, (int32_t *) pb, blockSize, 1,
167 blockSize);
168
169 /* Working pointer for the scratch buffer */
170 px = pb;
171
172 /* Working pointer for destination buffer */
173 pOut = pDst;
174
175
176 #ifndef ARM_MATH_CM0_FAMILY
177
178 /* Run the below code for Cortex-M4 and Cortex-M3 */
179
180 /* Loop over the blockSize. Unroll by a factor of 4.
181 * Compute 4 Multiplications at a time. */
182 blkCnt = blockSize >> 2u;
183
184 while(blkCnt > 0u)
185 {
186 /* Perform Multiplications and store in destination buffer */
187 *pOut++ = *px++ * coeff;
188 *pOut++ = *px++ * coeff;
189 *pOut++ = *px++ * coeff;
190 *pOut++ = *px++ * coeff;
191
192 /* Decrement the loop counter */
193 blkCnt--;
194 }
195
196 /* If the blockSize is not a multiple of 4,
197 * compute the remaining samples */
198 blkCnt = blockSize % 0x4u;
199
200 while(blkCnt > 0u)
201 {
202 /* Perform Multiplications and store in destination buffer */
203 *pOut++ = *px++ * coeff;
204
205 /* Decrement the loop counter */
206 blkCnt--;
207 }
208
209 /* Load the coefficient value and
210 * increment the coefficient buffer for the next set of state values */
211 coeff = *pCoeffs++;
212
213 /* Read Index, from where the state buffer should be read, is calculated. */
214 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
215
216 /* Wraparound of readIndex */
217 if(readIndex < 0)
218 {
219 readIndex += (int32_t) delaySize;
220 }
221
222 /* Loop over the number of taps. */
223 tapCnt = (uint32_t) numTaps - 1u;
224
225 while(tapCnt > 0u)
226 {
227
228 /* Working pointer for state buffer is updated */
229 py = pState;
230
231 /* blockSize samples are read from the state buffer */
232 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
233 (int32_t *) pb, (int32_t *) pb, blockSize, 1,
234 blockSize);
235
236 /* Working pointer for the scratch buffer */
237 px = pb;
238
239 /* Working pointer for destination buffer */
240 pOut = pDst;
241
242 /* Loop over the blockSize. Unroll by a factor of 4.
243 * Compute 4 MACS at a time. */
244 blkCnt = blockSize >> 2u;
245
246 while(blkCnt > 0u)
247 {
248 /* Perform Multiply-Accumulate */
249 *pOut++ += *px++ * coeff;
250 *pOut++ += *px++ * coeff;
251 *pOut++ += *px++ * coeff;
252 *pOut++ += *px++ * coeff;
253
254 /* Decrement the loop counter */
255 blkCnt--;
256 }
257
258 /* If the blockSize is not a multiple of 4,
259 * compute the remaining samples */
260 blkCnt = blockSize % 0x4u;
261
262 while(blkCnt > 0u)
263 {
264 /* Perform Multiply-Accumulate */
265 *pOut++ += *px++ * coeff;
266
267 /* Decrement the loop counter */
268 blkCnt--;
269 }
270
271 /* Load the coefficient value and
272 * increment the coefficient buffer for the next set of state values */
273 coeff = *pCoeffs++;
274
275 /* Read Index, from where the state buffer should be read, is calculated. */
276 readIndex = ((int32_t) S->stateIndex -
277 (int32_t) blockSize) - *pTapDelay++;
278
279 /* Wraparound of readIndex */
280 if(readIndex < 0)
281 {
282 readIndex += (int32_t) delaySize;
283 }
284
285 /* Decrement the tap loop counter */
286 tapCnt--;
287 }
288
289 #else
290
291 /* Run the below code for Cortex-M0 */
292
293 blkCnt = blockSize;
294
295 while(blkCnt > 0u)
296 {
297 /* Perform Multiplications and store in destination buffer */
298 *pOut++ = *px++ * coeff;
299
300 /* Decrement the loop counter */
301 blkCnt--;
302 }
303
304 /* Load the coefficient value and
305 * increment the coefficient buffer for the next set of state values */
306 coeff = *pCoeffs++;
307
308 /* Read Index, from where the state buffer should be read, is calculated. */
309 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
310
311 /* Wraparound of readIndex */
312 if(readIndex < 0)
313 {
314 readIndex += (int32_t) delaySize;
315 }
316
317 /* Loop over the number of taps. */
318 tapCnt = (uint32_t) numTaps - 1u;
319
320 while(tapCnt > 0u)
321 {
322
323 /* Working pointer for state buffer is updated */
324 py = pState;
325
326 /* blockSize samples are read from the state buffer */
327 arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
328 (int32_t *) pb, (int32_t *) pb, blockSize, 1,
329 blockSize);
330
331 /* Working pointer for the scratch buffer */
332 px = pb;
333
334 /* Working pointer for destination buffer */
335 pOut = pDst;
336
337 blkCnt = blockSize;
338
339 while(blkCnt > 0u)
340 {
341 /* Perform Multiply-Accumulate */
342 *pOut++ += *px++ * coeff;
343
344 /* Decrement the loop counter */
345 blkCnt--;
346 }
347
348 /* Load the coefficient value and
349 * increment the coefficient buffer for the next set of state values */
350 coeff = *pCoeffs++;
351
352 /* Read Index, from where the state buffer should be read, is calculated. */
353 readIndex =
354 ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
355
356 /* Wraparound of readIndex */
357 if(readIndex < 0)
358 {
359 readIndex += (int32_t) delaySize;
360 }
361
362 /* Decrement the tap loop counter */
363 tapCnt--;
364 }
365
366 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
367
368 }
369
370 /**
371 * @} end of FIR_Sparse group
372 */
Imprint / Impressum