]> git.gir.st - tmk_keyboard.git/blob - tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix4_q31.c
Merge commit '1fe4406f374291ab2e86e95a97341fd9c475fcb8'
[tmk_keyboard.git] / tmk_core / tool / mbed / mbed-sdk / libraries / dsp / cmsis_dsp / TransformFunctions / arm_cfft_radix4_q31.c
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
3 *
4 * $Date: 17. January 2013
5 * $Revision: V1.4.1
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_cfft_radix4_q31.c
9 *
10 * Description: This file has function definition of Radix-4 FFT & IFFT function and
11 * In-place bit reversal using bit reversal table
12 *
13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * - Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * - Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in
22 * the documentation and/or other materials provided with the
23 * distribution.
24 * - Neither the name of ARM LIMITED nor the names of its contributors
25 * may be used to endorse or promote products derived from this
26 * software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
32 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
33 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
36 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
38 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 * -------------------------------------------------------------------- */
41
42 #include "arm_math.h"
43
44 void arm_radix4_butterfly_inverse_q31(
45 q31_t * pSrc,
46 uint32_t fftLen,
47 q31_t * pCoef,
48 uint32_t twidCoefModifier);
49
50 void arm_radix4_butterfly_q31(
51 q31_t * pSrc,
52 uint32_t fftLen,
53 q31_t * pCoef,
54 uint32_t twidCoefModifier);
55
56 void arm_bitreversal_q31(
57 q31_t * pSrc,
58 uint32_t fftLen,
59 uint16_t bitRevFactor,
60 uint16_t * pBitRevTab);
61
62 /**
63 * @ingroup groupTransforms
64 */
65
66 /**
67 * @addtogroup ComplexFFT
68 * @{
69 */
70
71 /**
72 * @details
73 * @brief Processing function for the Q31 CFFT/CIFFT.
74 * @param[in] *S points to an instance of the Q31 CFFT/CIFFT structure.
75 * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
76 * @return none.
77 *
78 * \par Input and output formats:
79 * \par
80 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
81 * Hence the output format is different for different FFT sizes.
82 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
83 * \par
84 * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"
85 * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"
86 *
87 */
88
89 void arm_cfft_radix4_q31(
90 const arm_cfft_radix4_instance_q31 * S,
91 q31_t * pSrc)
92 {
93 if(S->ifftFlag == 1u)
94 {
95 /* Complex IFFT radix-4 */
96 arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle,
97 S->twidCoefModifier);
98 }
99 else
100 {
101 /* Complex FFT radix-4 */
102 arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle,
103 S->twidCoefModifier);
104 }
105
106
107 if(S->bitReverseFlag == 1u)
108 {
109 /* Bit Reversal */
110 arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
111 }
112
113 }
114
115 /**
116 * @} end of ComplexFFT group
117 */
118
119 /*
120 * Radix-4 FFT algorithm used is :
121 *
122 * Input real and imaginary data:
123 * x(n) = xa + j * ya
124 * x(n+N/4 ) = xb + j * yb
125 * x(n+N/2 ) = xc + j * yc
126 * x(n+3N 4) = xd + j * yd
127 *
128 *
129 * Output real and imaginary data:
130 * x(4r) = xa'+ j * ya'
131 * x(4r+1) = xb'+ j * yb'
132 * x(4r+2) = xc'+ j * yc'
133 * x(4r+3) = xd'+ j * yd'
134 *
135 *
136 * Twiddle factors for radix-4 FFT:
137 * Wn = co1 + j * (- si1)
138 * W2n = co2 + j * (- si2)
139 * W3n = co3 + j * (- si3)
140 *
141 * Butterfly implementation:
142 * xa' = xa + xb + xc + xd
143 * ya' = ya + yb + yc + yd
144 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
145 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
146 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
147 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
148 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
149 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
150 *
151 */
152
153 /**
154 * @brief Core function for the Q31 CFFT butterfly process.
155 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
156 * @param[in] fftLen length of the FFT.
157 * @param[in] *pCoef points to twiddle coefficient buffer.
158 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
159 * @return none.
160 */
161
162 void arm_radix4_butterfly_q31(
163 q31_t * pSrc,
164 uint32_t fftLen,
165 q31_t * pCoef,
166 uint32_t twidCoefModifier)
167 {
168 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
169 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
170
171 q31_t xa, xb, xc, xd;
172 q31_t ya, yb, yc, yd;
173 q31_t xa_out, xb_out, xc_out, xd_out;
174 q31_t ya_out, yb_out, yc_out, yd_out;
175
176 q31_t *ptr1;
177 q63_t xaya, xbyb, xcyc, xdyd;
178 /* Total process is divided into three stages */
179
180 /* process first stage, middle stages, & last stage */
181
182
183 /* start of first stage process */
184
185 /* Initializations for the first stage */
186 n2 = fftLen;
187 n1 = n2;
188 /* n2 = fftLen/4 */
189 n2 >>= 2u;
190 i0 = 0u;
191 ia1 = 0u;
192
193 j = n2;
194
195 /* Calculation of first stage */
196 do
197 {
198 /* index calculation for the input as, */
199 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
200 i1 = i0 + n2;
201 i2 = i1 + n2;
202 i3 = i2 + n2;
203
204 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
205
206 /* Butterfly implementation */
207 /* xa + xc */
208 r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u);
209 /* xa - xc */
210 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
211
212 /* xb + xd */
213 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
214
215 /* ya + yc */
216 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
217 /* ya - yc */
218 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
219
220 /* xa' = xa + xb + xc + xd */
221 pSrc[2u * i0] = (r1 + t1);
222 /* (xa + xc) - (xb + xd) */
223 r1 = r1 - t1;
224 /* yb + yd */
225 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
226
227 /* ya' = ya + yb + yc + yd */
228 pSrc[(2u * i0) + 1u] = (s1 + t2);
229
230 /* (ya + yc) - (yb + yd) */
231 s1 = s1 - t2;
232
233 /* yb - yd */
234 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
235 /* xb - xd */
236 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
237
238 /* index calculation for the coefficients */
239 ia2 = 2u * ia1;
240 co2 = pCoef[ia2 * 2u];
241 si2 = pCoef[(ia2 * 2u) + 1u];
242
243 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
244 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
245 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
246
247 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
248 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
249 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
250
251 /* (xa - xc) + (yb - yd) */
252 r1 = r2 + t1;
253 /* (xa - xc) - (yb - yd) */
254 r2 = r2 - t1;
255
256 /* (ya - yc) - (xb - xd) */
257 s1 = s2 - t2;
258 /* (ya - yc) + (xb - xd) */
259 s2 = s2 + t2;
260
261 co1 = pCoef[ia1 * 2u];
262 si1 = pCoef[(ia1 * 2u) + 1u];
263
264 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
265 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
266 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
267
268 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
269 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
270 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
271
272 /* index calculation for the coefficients */
273 ia3 = 3u * ia1;
274 co3 = pCoef[ia3 * 2u];
275 si3 = pCoef[(ia3 * 2u) + 1u];
276
277 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
278 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
279 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
280
281 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
282 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
283 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
284
285 /* Twiddle coefficients index modifier */
286 ia1 = ia1 + twidCoefModifier;
287
288 /* Updating input index */
289 i0 = i0 + 1u;
290
291 } while(--j);
292
293 /* end of first stage process */
294
295 /* data is in 5.27(q27) format */
296
297
298 /* start of Middle stages process */
299
300
301 /* each stage in middle stages provides two down scaling of the input */
302
303 twidCoefModifier <<= 2u;
304
305
306 for (k = fftLen / 4u; k > 4u; k >>= 2u)
307 {
308 /* Initializations for the first stage */
309 n1 = n2;
310 n2 >>= 2u;
311 ia1 = 0u;
312
313 /* Calculation of first stage */
314 for (j = 0u; j <= (n2 - 1u); j++)
315 {
316 /* index calculation for the coefficients */
317 ia2 = ia1 + ia1;
318 ia3 = ia2 + ia1;
319 co1 = pCoef[ia1 * 2u];
320 si1 = pCoef[(ia1 * 2u) + 1u];
321 co2 = pCoef[ia2 * 2u];
322 si2 = pCoef[(ia2 * 2u) + 1u];
323 co3 = pCoef[ia3 * 2u];
324 si3 = pCoef[(ia3 * 2u) + 1u];
325 /* Twiddle coefficients index modifier */
326 ia1 = ia1 + twidCoefModifier;
327
328 for (i0 = j; i0 < fftLen; i0 += n1)
329 {
330 /* index calculation for the input as, */
331 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
332 i1 = i0 + n2;
333 i2 = i1 + n2;
334 i3 = i2 + n2;
335
336 /* Butterfly implementation */
337 /* xa + xc */
338 r1 = pSrc[2u * i0] + pSrc[2u * i2];
339 /* xa - xc */
340 r2 = pSrc[2u * i0] - pSrc[2u * i2];
341
342 /* ya + yc */
343 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
344 /* ya - yc */
345 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
346
347 /* xb + xd */
348 t1 = pSrc[2u * i1] + pSrc[2u * i3];
349
350 /* xa' = xa + xb + xc + xd */
351 pSrc[2u * i0] = (r1 + t1) >> 2u;
352 /* xa + xc -(xb + xd) */
353 r1 = r1 - t1;
354
355 /* yb + yd */
356 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
357 /* ya' = ya + yb + yc + yd */
358 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
359
360 /* (ya + yc) - (yb + yd) */
361 s1 = s1 - t2;
362
363 /* (yb - yd) */
364 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
365 /* (xb - xd) */
366 t2 = pSrc[2u * i1] - pSrc[2u * i3];
367
368 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
369 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
370 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u;
371
372 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
373 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
374 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u;
375
376 /* (xa - xc) + (yb - yd) */
377 r1 = r2 + t1;
378 /* (xa - xc) - (yb - yd) */
379 r2 = r2 - t1;
380
381 /* (ya - yc) - (xb - xd) */
382 s1 = s2 - t2;
383 /* (ya - yc) + (xb - xd) */
384 s2 = s2 + t2;
385
386 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
387 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
388 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
389
390 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
391 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
392 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
393
394 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
395 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
396 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
397
398 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
399 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
400 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
401 }
402 }
403 twidCoefModifier <<= 2u;
404 }
405
406 /* End of Middle stages process */
407
408 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
409 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
410 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
411 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
412
413
414 /* start of Last stage process */
415 /* Initializations for the last stage */
416 j = fftLen >> 2;
417 ptr1 = &pSrc[0];
418
419 /* Calculations of last stage */
420 do
421 {
422
423 #ifndef ARM_MATH_BIG_ENDIAN
424
425 /* Read xa (real), ya(imag) input */
426 xaya = *__SIMD64(ptr1)++;
427 xa = (q31_t) xaya;
428 ya = (q31_t) (xaya >> 32);
429
430 /* Read xb (real), yb(imag) input */
431 xbyb = *__SIMD64(ptr1)++;
432 xb = (q31_t) xbyb;
433 yb = (q31_t) (xbyb >> 32);
434
435 /* Read xc (real), yc(imag) input */
436 xcyc = *__SIMD64(ptr1)++;
437 xc = (q31_t) xcyc;
438 yc = (q31_t) (xcyc >> 32);
439
440 /* Read xc (real), yc(imag) input */
441 xdyd = *__SIMD64(ptr1)++;
442 xd = (q31_t) xdyd;
443 yd = (q31_t) (xdyd >> 32);
444
445 #else
446
447 /* Read xa (real), ya(imag) input */
448 xaya = *__SIMD64(ptr1)++;
449 ya = (q31_t) xaya;
450 xa = (q31_t) (xaya >> 32);
451
452 /* Read xb (real), yb(imag) input */
453 xbyb = *__SIMD64(ptr1)++;
454 yb = (q31_t) xbyb;
455 xb = (q31_t) (xbyb >> 32);
456
457 /* Read xc (real), yc(imag) input */
458 xcyc = *__SIMD64(ptr1)++;
459 yc = (q31_t) xcyc;
460 xc = (q31_t) (xcyc >> 32);
461
462 /* Read xc (real), yc(imag) input */
463 xdyd = *__SIMD64(ptr1)++;
464 yd = (q31_t) xdyd;
465 xd = (q31_t) (xdyd >> 32);
466
467
468 #endif
469
470 /* xa' = xa + xb + xc + xd */
471 xa_out = xa + xb + xc + xd;
472
473 /* ya' = ya + yb + yc + yd */
474 ya_out = ya + yb + yc + yd;
475
476 /* pointer updation for writing */
477 ptr1 = ptr1 - 8u;
478
479 /* writing xa' and ya' */
480 *ptr1++ = xa_out;
481 *ptr1++ = ya_out;
482
483 xc_out = (xa - xb + xc - xd);
484 yc_out = (ya - yb + yc - yd);
485
486 /* writing xc' and yc' */
487 *ptr1++ = xc_out;
488 *ptr1++ = yc_out;
489
490 xb_out = (xa + yb - xc - yd);
491 yb_out = (ya - xb - yc + xd);
492
493 /* writing xb' and yb' */
494 *ptr1++ = xb_out;
495 *ptr1++ = yb_out;
496
497 xd_out = (xa - yb - xc + yd);
498 yd_out = (ya + xb - yc - xd);
499
500 /* writing xd' and yd' */
501 *ptr1++ = xd_out;
502 *ptr1++ = yd_out;
503
504
505 } while(--j);
506
507 /* output is in 11.21(q21) format for the 1024 point */
508 /* output is in 9.23(q23) format for the 256 point */
509 /* output is in 7.25(q25) format for the 64 point */
510 /* output is in 5.27(q27) format for the 16 point */
511
512 /* End of last stage process */
513
514 }
515
516
517 /**
518 * @brief Core function for the Q31 CIFFT butterfly process.
519 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
520 * @param[in] fftLen length of the FFT.
521 * @param[in] *pCoef points to twiddle coefficient buffer.
522 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
523 * @return none.
524 */
525
526
527 /*
528 * Radix-4 IFFT algorithm used is :
529 *
530 * CIFFT uses same twiddle coefficients as CFFT Function
531 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
532 *
533 *
534 * IFFT is implemented with following changes in equations from FFT
535 *
536 * Input real and imaginary data:
537 * x(n) = xa + j * ya
538 * x(n+N/4 ) = xb + j * yb
539 * x(n+N/2 ) = xc + j * yc
540 * x(n+3N 4) = xd + j * yd
541 *
542 *
543 * Output real and imaginary data:
544 * x(4r) = xa'+ j * ya'
545 * x(4r+1) = xb'+ j * yb'
546 * x(4r+2) = xc'+ j * yc'
547 * x(4r+3) = xd'+ j * yd'
548 *
549 *
550 * Twiddle factors for radix-4 IFFT:
551 * Wn = co1 + j * (si1)
552 * W2n = co2 + j * (si2)
553 * W3n = co3 + j * (si3)
554
555 * The real and imaginary output values for the radix-4 butterfly are
556 * xa' = xa + xb + xc + xd
557 * ya' = ya + yb + yc + yd
558 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
559 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
560 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
561 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
562 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
563 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
564 *
565 */
566
567 void arm_radix4_butterfly_inverse_q31(
568 q31_t * pSrc,
569 uint32_t fftLen,
570 q31_t * pCoef,
571 uint32_t twidCoefModifier)
572 {
573 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
574 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
575 q31_t xa, xb, xc, xd;
576 q31_t ya, yb, yc, yd;
577 q31_t xa_out, xb_out, xc_out, xd_out;
578 q31_t ya_out, yb_out, yc_out, yd_out;
579
580 q31_t *ptr1;
581 q63_t xaya, xbyb, xcyc, xdyd;
582
583 /* input is be 1.31(q31) format for all FFT sizes */
584 /* Total process is divided into three stages */
585 /* process first stage, middle stages, & last stage */
586
587 /* Start of first stage process */
588
589 /* Initializations for the first stage */
590 n2 = fftLen;
591 n1 = n2;
592 /* n2 = fftLen/4 */
593 n2 >>= 2u;
594 i0 = 0u;
595 ia1 = 0u;
596
597 j = n2;
598
599 do
600 {
601
602 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
603
604 /* index calculation for the input as, */
605 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
606 i1 = i0 + n2;
607 i2 = i1 + n2;
608 i3 = i2 + n2;
609
610 /* Butterfly implementation */
611 /* xa + xc */
612 r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u);
613 /* xa - xc */
614 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
615
616 /* xb + xd */
617 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
618
619 /* ya + yc */
620 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
621 /* ya - yc */
622 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
623
624 /* xa' = xa + xb + xc + xd */
625 pSrc[2u * i0] = (r1 + t1);
626 /* (xa + xc) - (xb + xd) */
627 r1 = r1 - t1;
628 /* yb + yd */
629 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
630 /* ya' = ya + yb + yc + yd */
631 pSrc[(2u * i0) + 1u] = (s1 + t2);
632
633 /* (ya + yc) - (yb + yd) */
634 s1 = s1 - t2;
635
636 /* yb - yd */
637 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
638 /* xb - xd */
639 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
640
641 /* index calculation for the coefficients */
642 ia2 = 2u * ia1;
643 co2 = pCoef[ia2 * 2u];
644 si2 = pCoef[(ia2 * 2u) + 1u];
645
646 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
647 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
648 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
649
650 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
651 pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
652 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
653
654 /* (xa - xc) - (yb - yd) */
655 r1 = r2 - t1;
656 /* (xa - xc) + (yb - yd) */
657 r2 = r2 + t1;
658
659 /* (ya - yc) + (xb - xd) */
660 s1 = s2 + t2;
661 /* (ya - yc) - (xb - xd) */
662 s2 = s2 - t2;
663
664 co1 = pCoef[ia1 * 2u];
665 si1 = pCoef[(ia1 * 2u) + 1u];
666
667 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
668 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
669 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
670
671 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
672 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
673 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
674
675 /* index calculation for the coefficients */
676 ia3 = 3u * ia1;
677 co3 = pCoef[ia3 * 2u];
678 si3 = pCoef[(ia3 * 2u) + 1u];
679
680 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
681 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
682 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
683
684 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
685 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
686 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
687
688 /* Twiddle coefficients index modifier */
689 ia1 = ia1 + twidCoefModifier;
690
691 /* Updating input index */
692 i0 = i0 + 1u;
693
694 } while(--j);
695
696 /* data is in 5.27(q27) format */
697 /* each stage provides two down scaling of the input */
698
699
700 /* Start of Middle stages process */
701
702 twidCoefModifier <<= 2u;
703
704 /* Calculation of second stage to excluding last stage */
705 for (k = fftLen / 4u; k > 4u; k >>= 2u)
706 {
707 /* Initializations for the first stage */
708 n1 = n2;
709 n2 >>= 2u;
710 ia1 = 0u;
711
712 for (j = 0; j <= (n2 - 1u); j++)
713 {
714 /* index calculation for the coefficients */
715 ia2 = ia1 + ia1;
716 ia3 = ia2 + ia1;
717 co1 = pCoef[ia1 * 2u];
718 si1 = pCoef[(ia1 * 2u) + 1u];
719 co2 = pCoef[ia2 * 2u];
720 si2 = pCoef[(ia2 * 2u) + 1u];
721 co3 = pCoef[ia3 * 2u];
722 si3 = pCoef[(ia3 * 2u) + 1u];
723 /* Twiddle coefficients index modifier */
724 ia1 = ia1 + twidCoefModifier;
725
726 for (i0 = j; i0 < fftLen; i0 += n1)
727 {
728 /* index calculation for the input as, */
729 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
730 i1 = i0 + n2;
731 i2 = i1 + n2;
732 i3 = i2 + n2;
733
734 /* Butterfly implementation */
735 /* xa + xc */
736 r1 = pSrc[2u * i0] + pSrc[2u * i2];
737 /* xa - xc */
738 r2 = pSrc[2u * i0] - pSrc[2u * i2];
739
740 /* ya + yc */
741 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
742 /* ya - yc */
743 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
744
745 /* xb + xd */
746 t1 = pSrc[2u * i1] + pSrc[2u * i3];
747
748 /* xa' = xa + xb + xc + xd */
749 pSrc[2u * i0] = (r1 + t1) >> 2u;
750 /* xa + xc -(xb + xd) */
751 r1 = r1 - t1;
752 /* yb + yd */
753 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
754 /* ya' = ya + yb + yc + yd */
755 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
756
757 /* (ya + yc) - (yb + yd) */
758 s1 = s1 - t2;
759
760 /* (yb - yd) */
761 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
762 /* (xb - xd) */
763 t2 = pSrc[2u * i1] - pSrc[2u * i3];
764
765 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
766 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) -
767 ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u;
768
769 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
770 pSrc[(2u * i1) + 1u] =
771 (((int32_t) (((q63_t) s1 * co2) >> 32u)) +
772 ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u;
773
774 /* (xa - xc) - (yb - yd) */
775 r1 = r2 - t1;
776 /* (xa - xc) + (yb - yd) */
777 r2 = r2 + t1;
778
779 /* (ya - yc) + (xb - xd) */
780 s1 = s2 + t2;
781 /* (ya - yc) - (xb - xd) */
782 s2 = s2 - t2;
783
784 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
785 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
786 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
787
788 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
789 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
790 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
791
792 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
793 pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
794 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
795
796 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
797 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
798 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
799 }
800 }
801 twidCoefModifier <<= 2u;
802 }
803
804 /* End of Middle stages process */
805
806 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
807 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
808 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
809 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
810
811
812 /* Start of last stage process */
813
814
815 /* Initializations for the last stage */
816 j = fftLen >> 2;
817 ptr1 = &pSrc[0];
818
819 /* Calculations of last stage */
820 do
821 {
822 #ifndef ARM_MATH_BIG_ENDIAN
823 /* Read xa (real), ya(imag) input */
824 xaya = *__SIMD64(ptr1)++;
825 xa = (q31_t) xaya;
826 ya = (q31_t) (xaya >> 32);
827
828 /* Read xb (real), yb(imag) input */
829 xbyb = *__SIMD64(ptr1)++;
830 xb = (q31_t) xbyb;
831 yb = (q31_t) (xbyb >> 32);
832
833 /* Read xc (real), yc(imag) input */
834 xcyc = *__SIMD64(ptr1)++;
835 xc = (q31_t) xcyc;
836 yc = (q31_t) (xcyc >> 32);
837
838 /* Read xc (real), yc(imag) input */
839 xdyd = *__SIMD64(ptr1)++;
840 xd = (q31_t) xdyd;
841 yd = (q31_t) (xdyd >> 32);
842
843 #else
844
845 /* Read xa (real), ya(imag) input */
846 xaya = *__SIMD64(ptr1)++;
847 ya = (q31_t) xaya;
848 xa = (q31_t) (xaya >> 32);
849
850 /* Read xb (real), yb(imag) input */
851 xbyb = *__SIMD64(ptr1)++;
852 yb = (q31_t) xbyb;
853 xb = (q31_t) (xbyb >> 32);
854
855 /* Read xc (real), yc(imag) input */
856 xcyc = *__SIMD64(ptr1)++;
857 yc = (q31_t) xcyc;
858 xc = (q31_t) (xcyc >> 32);
859
860 /* Read xc (real), yc(imag) input */
861 xdyd = *__SIMD64(ptr1)++;
862 yd = (q31_t) xdyd;
863 xd = (q31_t) (xdyd >> 32);
864
865
866 #endif
867
868 /* xa' = xa + xb + xc + xd */
869 xa_out = xa + xb + xc + xd;
870
871 /* ya' = ya + yb + yc + yd */
872 ya_out = ya + yb + yc + yd;
873
874 /* pointer updation for writing */
875 ptr1 = ptr1 - 8u;
876
877 /* writing xa' and ya' */
878 *ptr1++ = xa_out;
879 *ptr1++ = ya_out;
880
881 xc_out = (xa - xb + xc - xd);
882 yc_out = (ya - yb + yc - yd);
883
884 /* writing xc' and yc' */
885 *ptr1++ = xc_out;
886 *ptr1++ = yc_out;
887
888 xb_out = (xa - yb - xc + yd);
889 yb_out = (ya + xb - yc - xd);
890
891 /* writing xb' and yb' */
892 *ptr1++ = xb_out;
893 *ptr1++ = yb_out;
894
895 xd_out = (xa + yb - xc - yd);
896 yd_out = (ya - xb - yc + xd);
897
898 /* writing xd' and yd' */
899 *ptr1++ = xd_out;
900 *ptr1++ = yd_out;
901
902
903 } while(--j);
904
905 /* output is in 11.21(q21) format for the 1024 point */
906 /* output is in 9.23(q23) format for the 256 point */
907 /* output is in 7.25(q25) format for the 64 point */
908 /* output is in 5.27(q27) format for the 16 point */
909
910 /* End of last stage process */
911 }
Imprint / Impressum