1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
4 * $Date: 17. January 2013
7 * Project: CMSIS DSP Library
8 * Title: arm_cfft_radix4_q31.c
10 * Description: This file has function definition of Radix-4 FFT & IFFT function and
11 * In-place bit reversal using bit reversal table
13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
18 * - Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * - Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in
22 * the documentation and/or other materials provided with the
24 * - Neither the name of ARM LIMITED nor the names of its contributors
25 * may be used to endorse or promote products derived from this
26 * software without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
32 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
33 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
36 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
38 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 * -------------------------------------------------------------------- */
44 void arm_radix4_butterfly_inverse_q31(
48 uint32_t twidCoefModifier
);
50 void arm_radix4_butterfly_q31(
54 uint32_t twidCoefModifier
);
56 void arm_bitreversal_q31(
59 uint16_t bitRevFactor
,
60 uint16_t * pBitRevTab
);
63 * @ingroup groupTransforms
67 * @addtogroup ComplexFFT
73 * @brief Processing function for the Q31 CFFT/CIFFT.
74 * @param[in] *S points to an instance of the Q31 CFFT/CIFFT structure.
75 * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
78 * \par Input and output formats:
80 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
81 * Hence the output format is different for different FFT sizes.
82 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
84 * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"
85 * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"
89 void arm_cfft_radix4_q31(
90 const arm_cfft_radix4_instance_q31
* S
,
95 /* Complex IFFT radix-4 */
96 arm_radix4_butterfly_inverse_q31(pSrc
, S
->fftLen
, S
->pTwiddle
,
101 /* Complex FFT radix-4 */
102 arm_radix4_butterfly_q31(pSrc
, S
->fftLen
, S
->pTwiddle
,
103 S
->twidCoefModifier
);
107 if(S
->bitReverseFlag
== 1u)
110 arm_bitreversal_q31(pSrc
, S
->fftLen
, S
->bitRevFactor
, S
->pBitRevTable
);
116 * @} end of ComplexFFT group
120 * Radix-4 FFT algorithm used is :
122 * Input real and imaginary data:
124 * x(n+N/4 ) = xb + j * yb
125 * x(n+N/2 ) = xc + j * yc
126 * x(n+3N 4) = xd + j * yd
129 * Output real and imaginary data:
130 * x(4r) = xa'+ j * ya'
131 * x(4r+1) = xb'+ j * yb'
132 * x(4r+2) = xc'+ j * yc'
133 * x(4r+3) = xd'+ j * yd'
136 * Twiddle factors for radix-4 FFT:
137 * Wn = co1 + j * (- si1)
138 * W2n = co2 + j * (- si2)
139 * W3n = co3 + j * (- si3)
141 * Butterfly implementation:
142 * xa' = xa + xb + xc + xd
143 * ya' = ya + yb + yc + yd
144 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
145 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
146 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
147 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
148 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
149 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
154 * @brief Core function for the Q31 CFFT butterfly process.
155 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
156 * @param[in] fftLen length of the FFT.
157 * @param[in] *pCoef points to twiddle coefficient buffer.
158 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
162 void arm_radix4_butterfly_q31(
166 uint32_t twidCoefModifier
)
168 uint32_t n1
, n2
, ia1
, ia2
, ia3
, i0
, i1
, i2
, i3
, j
, k
;
169 q31_t t1
, t2
, r1
, r2
, s1
, s2
, co1
, co2
, co3
, si1
, si2
, si3
;
171 q31_t xa
, xb
, xc
, xd
;
172 q31_t ya
, yb
, yc
, yd
;
173 q31_t xa_out
, xb_out
, xc_out
, xd_out
;
174 q31_t ya_out
, yb_out
, yc_out
, yd_out
;
177 q63_t xaya
, xbyb
, xcyc
, xdyd
;
178 /* Total process is divided into three stages */
180 /* process first stage, middle stages, & last stage */
183 /* start of first stage process */
185 /* Initializations for the first stage */
195 /* Calculation of first stage */
198 /* index calculation for the input as, */
199 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
204 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
206 /* Butterfly implementation */
208 r1
= (pSrc
[(2u * i0
)] >> 4u) + (pSrc
[(2u * i2
)] >> 4u);
210 r2
= (pSrc
[2u * i0
] >> 4u) - (pSrc
[2u * i2
] >> 4u);
213 t1
= (pSrc
[2u * i1
] >> 4u) + (pSrc
[2u * i3
] >> 4u);
216 s1
= (pSrc
[(2u * i0
) + 1u] >> 4u) + (pSrc
[(2u * i2
) + 1u] >> 4u);
218 s2
= (pSrc
[(2u * i0
) + 1u] >> 4u) - (pSrc
[(2u * i2
) + 1u] >> 4u);
220 /* xa' = xa + xb + xc + xd */
221 pSrc
[2u * i0
] = (r1
+ t1
);
222 /* (xa + xc) - (xb + xd) */
225 t2
= (pSrc
[(2u * i1
) + 1u] >> 4u) + (pSrc
[(2u * i3
) + 1u] >> 4u);
227 /* ya' = ya + yb + yc + yd */
228 pSrc
[(2u * i0
) + 1u] = (s1
+ t2
);
230 /* (ya + yc) - (yb + yd) */
234 t1
= (pSrc
[(2u * i1
) + 1u] >> 4u) - (pSrc
[(2u * i3
) + 1u] >> 4u);
236 t2
= (pSrc
[2u * i1
] >> 4u) - (pSrc
[2u * i3
] >> 4u);
238 /* index calculation for the coefficients */
240 co2
= pCoef
[ia2
* 2u];
241 si2
= pCoef
[(ia2
* 2u) + 1u];
243 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
244 pSrc
[2u * i1
] = (((int32_t) (((q63_t
) r1
* co2
) >> 32)) +
245 ((int32_t) (((q63_t
) s1
* si2
) >> 32))) << 1u;
247 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
248 pSrc
[(2u * i1
) + 1u] = (((int32_t) (((q63_t
) s1
* co2
) >> 32)) -
249 ((int32_t) (((q63_t
) r1
* si2
) >> 32))) << 1u;
251 /* (xa - xc) + (yb - yd) */
253 /* (xa - xc) - (yb - yd) */
256 /* (ya - yc) - (xb - xd) */
258 /* (ya - yc) + (xb - xd) */
261 co1
= pCoef
[ia1
* 2u];
262 si1
= pCoef
[(ia1
* 2u) + 1u];
264 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
265 pSrc
[2u * i2
] = (((int32_t) (((q63_t
) r1
* co1
) >> 32)) +
266 ((int32_t) (((q63_t
) s1
* si1
) >> 32))) << 1u;
268 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
269 pSrc
[(2u * i2
) + 1u] = (((int32_t) (((q63_t
) s1
* co1
) >> 32)) -
270 ((int32_t) (((q63_t
) r1
* si1
) >> 32))) << 1u;
272 /* index calculation for the coefficients */
274 co3
= pCoef
[ia3
* 2u];
275 si3
= pCoef
[(ia3
* 2u) + 1u];
277 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
278 pSrc
[2u * i3
] = (((int32_t) (((q63_t
) r2
* co3
) >> 32)) +
279 ((int32_t) (((q63_t
) s2
* si3
) >> 32))) << 1u;
281 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
282 pSrc
[(2u * i3
) + 1u] = (((int32_t) (((q63_t
) s2
* co3
) >> 32)) -
283 ((int32_t) (((q63_t
) r2
* si3
) >> 32))) << 1u;
285 /* Twiddle coefficients index modifier */
286 ia1
= ia1
+ twidCoefModifier
;
288 /* Updating input index */
293 /* end of first stage process */
295 /* data is in 5.27(q27) format */
298 /* start of Middle stages process */
301 /* each stage in middle stages provides two down scaling of the input */
303 twidCoefModifier
<<= 2u;
306 for (k
= fftLen
/ 4u; k
> 4u; k
>>= 2u)
308 /* Initializations for the first stage */
313 /* Calculation of first stage */
314 for (j
= 0u; j
<= (n2
- 1u); j
++)
316 /* index calculation for the coefficients */
319 co1
= pCoef
[ia1
* 2u];
320 si1
= pCoef
[(ia1
* 2u) + 1u];
321 co2
= pCoef
[ia2
* 2u];
322 si2
= pCoef
[(ia2
* 2u) + 1u];
323 co3
= pCoef
[ia3
* 2u];
324 si3
= pCoef
[(ia3
* 2u) + 1u];
325 /* Twiddle coefficients index modifier */
326 ia1
= ia1
+ twidCoefModifier
;
328 for (i0
= j
; i0
< fftLen
; i0
+= n1
)
330 /* index calculation for the input as, */
331 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
336 /* Butterfly implementation */
338 r1
= pSrc
[2u * i0
] + pSrc
[2u * i2
];
340 r2
= pSrc
[2u * i0
] - pSrc
[2u * i2
];
343 s1
= pSrc
[(2u * i0
) + 1u] + pSrc
[(2u * i2
) + 1u];
345 s2
= pSrc
[(2u * i0
) + 1u] - pSrc
[(2u * i2
) + 1u];
348 t1
= pSrc
[2u * i1
] + pSrc
[2u * i3
];
350 /* xa' = xa + xb + xc + xd */
351 pSrc
[2u * i0
] = (r1
+ t1
) >> 2u;
352 /* xa + xc -(xb + xd) */
356 t2
= pSrc
[(2u * i1
) + 1u] + pSrc
[(2u * i3
) + 1u];
357 /* ya' = ya + yb + yc + yd */
358 pSrc
[(2u * i0
) + 1u] = (s1
+ t2
) >> 2u;
360 /* (ya + yc) - (yb + yd) */
364 t1
= pSrc
[(2u * i1
) + 1u] - pSrc
[(2u * i3
) + 1u];
366 t2
= pSrc
[2u * i1
] - pSrc
[2u * i3
];
368 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
369 pSrc
[2u * i1
] = (((int32_t) (((q63_t
) r1
* co2
) >> 32)) +
370 ((int32_t) (((q63_t
) s1
* si2
) >> 32))) >> 1u;
372 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
373 pSrc
[(2u * i1
) + 1u] = (((int32_t) (((q63_t
) s1
* co2
) >> 32)) -
374 ((int32_t) (((q63_t
) r1
* si2
) >> 32))) >> 1u;
376 /* (xa - xc) + (yb - yd) */
378 /* (xa - xc) - (yb - yd) */
381 /* (ya - yc) - (xb - xd) */
383 /* (ya - yc) + (xb - xd) */
386 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
387 pSrc
[2u * i2
] = (((int32_t) (((q63_t
) r1
* co1
) >> 32)) +
388 ((int32_t) (((q63_t
) s1
* si1
) >> 32))) >> 1u;
390 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
391 pSrc
[(2u * i2
) + 1u] = (((int32_t) (((q63_t
) s1
* co1
) >> 32)) -
392 ((int32_t) (((q63_t
) r1
* si1
) >> 32))) >> 1u;
394 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
395 pSrc
[2u * i3
] = (((int32_t) (((q63_t
) r2
* co3
) >> 32)) +
396 ((int32_t) (((q63_t
) s2
* si3
) >> 32))) >> 1u;
398 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
399 pSrc
[(2u * i3
) + 1u] = (((int32_t) (((q63_t
) s2
* co3
) >> 32)) -
400 ((int32_t) (((q63_t
) r2
* si3
) >> 32))) >> 1u;
403 twidCoefModifier
<<= 2u;
406 /* End of Middle stages process */
408 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
409 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
410 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
411 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
414 /* start of Last stage process */
415 /* Initializations for the last stage */
419 /* Calculations of last stage */
423 #ifndef ARM_MATH_BIG_ENDIAN
425 /* Read xa (real), ya(imag) input */
426 xaya
= *__SIMD64(ptr1
)++;
428 ya
= (q31_t
) (xaya
>> 32);
430 /* Read xb (real), yb(imag) input */
431 xbyb
= *__SIMD64(ptr1
)++;
433 yb
= (q31_t
) (xbyb
>> 32);
435 /* Read xc (real), yc(imag) input */
436 xcyc
= *__SIMD64(ptr1
)++;
438 yc
= (q31_t
) (xcyc
>> 32);
440 /* Read xc (real), yc(imag) input */
441 xdyd
= *__SIMD64(ptr1
)++;
443 yd
= (q31_t
) (xdyd
>> 32);
447 /* Read xa (real), ya(imag) input */
448 xaya
= *__SIMD64(ptr1
)++;
450 xa
= (q31_t
) (xaya
>> 32);
452 /* Read xb (real), yb(imag) input */
453 xbyb
= *__SIMD64(ptr1
)++;
455 xb
= (q31_t
) (xbyb
>> 32);
457 /* Read xc (real), yc(imag) input */
458 xcyc
= *__SIMD64(ptr1
)++;
460 xc
= (q31_t
) (xcyc
>> 32);
462 /* Read xc (real), yc(imag) input */
463 xdyd
= *__SIMD64(ptr1
)++;
465 xd
= (q31_t
) (xdyd
>> 32);
470 /* xa' = xa + xb + xc + xd */
471 xa_out
= xa
+ xb
+ xc
+ xd
;
473 /* ya' = ya + yb + yc + yd */
474 ya_out
= ya
+ yb
+ yc
+ yd
;
476 /* pointer updation for writing */
479 /* writing xa' and ya' */
483 xc_out
= (xa
- xb
+ xc
- xd
);
484 yc_out
= (ya
- yb
+ yc
- yd
);
486 /* writing xc' and yc' */
490 xb_out
= (xa
+ yb
- xc
- yd
);
491 yb_out
= (ya
- xb
- yc
+ xd
);
493 /* writing xb' and yb' */
497 xd_out
= (xa
- yb
- xc
+ yd
);
498 yd_out
= (ya
+ xb
- yc
- xd
);
500 /* writing xd' and yd' */
507 /* output is in 11.21(q21) format for the 1024 point */
508 /* output is in 9.23(q23) format for the 256 point */
509 /* output is in 7.25(q25) format for the 64 point */
510 /* output is in 5.27(q27) format for the 16 point */
512 /* End of last stage process */
518 * @brief Core function for the Q31 CIFFT butterfly process.
519 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
520 * @param[in] fftLen length of the FFT.
521 * @param[in] *pCoef points to twiddle coefficient buffer.
522 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
528 * Radix-4 IFFT algorithm used is :
530 * CIFFT uses same twiddle coefficients as CFFT Function
531 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
534 * IFFT is implemented with following changes in equations from FFT
536 * Input real and imaginary data:
538 * x(n+N/4 ) = xb + j * yb
539 * x(n+N/2 ) = xc + j * yc
540 * x(n+3N 4) = xd + j * yd
543 * Output real and imaginary data:
544 * x(4r) = xa'+ j * ya'
545 * x(4r+1) = xb'+ j * yb'
546 * x(4r+2) = xc'+ j * yc'
547 * x(4r+3) = xd'+ j * yd'
550 * Twiddle factors for radix-4 IFFT:
551 * Wn = co1 + j * (si1)
552 * W2n = co2 + j * (si2)
553 * W3n = co3 + j * (si3)
555 * The real and imaginary output values for the radix-4 butterfly are
556 * xa' = xa + xb + xc + xd
557 * ya' = ya + yb + yc + yd
558 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
559 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
560 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
561 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
562 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
563 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
567 void arm_radix4_butterfly_inverse_q31(
571 uint32_t twidCoefModifier
)
573 uint32_t n1
, n2
, ia1
, ia2
, ia3
, i0
, i1
, i2
, i3
, j
, k
;
574 q31_t t1
, t2
, r1
, r2
, s1
, s2
, co1
, co2
, co3
, si1
, si2
, si3
;
575 q31_t xa
, xb
, xc
, xd
;
576 q31_t ya
, yb
, yc
, yd
;
577 q31_t xa_out
, xb_out
, xc_out
, xd_out
;
578 q31_t ya_out
, yb_out
, yc_out
, yd_out
;
581 q63_t xaya
, xbyb
, xcyc
, xdyd
;
583 /* input is be 1.31(q31) format for all FFT sizes */
584 /* Total process is divided into three stages */
585 /* process first stage, middle stages, & last stage */
587 /* Start of first stage process */
589 /* Initializations for the first stage */
602 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
604 /* index calculation for the input as, */
605 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
610 /* Butterfly implementation */
612 r1
= (pSrc
[2u * i0
] >> 4u) + (pSrc
[2u * i2
] >> 4u);
614 r2
= (pSrc
[2u * i0
] >> 4u) - (pSrc
[2u * i2
] >> 4u);
617 t1
= (pSrc
[2u * i1
] >> 4u) + (pSrc
[2u * i3
] >> 4u);
620 s1
= (pSrc
[(2u * i0
) + 1u] >> 4u) + (pSrc
[(2u * i2
) + 1u] >> 4u);
622 s2
= (pSrc
[(2u * i0
) + 1u] >> 4u) - (pSrc
[(2u * i2
) + 1u] >> 4u);
624 /* xa' = xa + xb + xc + xd */
625 pSrc
[2u * i0
] = (r1
+ t1
);
626 /* (xa + xc) - (xb + xd) */
629 t2
= (pSrc
[(2u * i1
) + 1u] >> 4u) + (pSrc
[(2u * i3
) + 1u] >> 4u);
630 /* ya' = ya + yb + yc + yd */
631 pSrc
[(2u * i0
) + 1u] = (s1
+ t2
);
633 /* (ya + yc) - (yb + yd) */
637 t1
= (pSrc
[(2u * i1
) + 1u] >> 4u) - (pSrc
[(2u * i3
) + 1u] >> 4u);
639 t2
= (pSrc
[2u * i1
] >> 4u) - (pSrc
[2u * i3
] >> 4u);
641 /* index calculation for the coefficients */
643 co2
= pCoef
[ia2
* 2u];
644 si2
= pCoef
[(ia2
* 2u) + 1u];
646 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
647 pSrc
[2u * i1
] = (((int32_t) (((q63_t
) r1
* co2
) >> 32)) -
648 ((int32_t) (((q63_t
) s1
* si2
) >> 32))) << 1u;
650 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
651 pSrc
[2u * i1
+ 1u] = (((int32_t) (((q63_t
) s1
* co2
) >> 32)) +
652 ((int32_t) (((q63_t
) r1
* si2
) >> 32))) << 1u;
654 /* (xa - xc) - (yb - yd) */
656 /* (xa - xc) + (yb - yd) */
659 /* (ya - yc) + (xb - xd) */
661 /* (ya - yc) - (xb - xd) */
664 co1
= pCoef
[ia1
* 2u];
665 si1
= pCoef
[(ia1
* 2u) + 1u];
667 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
668 pSrc
[2u * i2
] = (((int32_t) (((q63_t
) r1
* co1
) >> 32)) -
669 ((int32_t) (((q63_t
) s1
* si1
) >> 32))) << 1u;
671 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
672 pSrc
[(2u * i2
) + 1u] = (((int32_t) (((q63_t
) s1
* co1
) >> 32)) +
673 ((int32_t) (((q63_t
) r1
* si1
) >> 32))) << 1u;
675 /* index calculation for the coefficients */
677 co3
= pCoef
[ia3
* 2u];
678 si3
= pCoef
[(ia3
* 2u) + 1u];
680 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
681 pSrc
[2u * i3
] = (((int32_t) (((q63_t
) r2
* co3
) >> 32)) -
682 ((int32_t) (((q63_t
) s2
* si3
) >> 32))) << 1u;
684 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
685 pSrc
[(2u * i3
) + 1u] = (((int32_t) (((q63_t
) s2
* co3
) >> 32)) +
686 ((int32_t) (((q63_t
) r2
* si3
) >> 32))) << 1u;
688 /* Twiddle coefficients index modifier */
689 ia1
= ia1
+ twidCoefModifier
;
691 /* Updating input index */
696 /* data is in 5.27(q27) format */
697 /* each stage provides two down scaling of the input */
700 /* Start of Middle stages process */
702 twidCoefModifier
<<= 2u;
704 /* Calculation of second stage to excluding last stage */
705 for (k
= fftLen
/ 4u; k
> 4u; k
>>= 2u)
707 /* Initializations for the first stage */
712 for (j
= 0; j
<= (n2
- 1u); j
++)
714 /* index calculation for the coefficients */
717 co1
= pCoef
[ia1
* 2u];
718 si1
= pCoef
[(ia1
* 2u) + 1u];
719 co2
= pCoef
[ia2
* 2u];
720 si2
= pCoef
[(ia2
* 2u) + 1u];
721 co3
= pCoef
[ia3
* 2u];
722 si3
= pCoef
[(ia3
* 2u) + 1u];
723 /* Twiddle coefficients index modifier */
724 ia1
= ia1
+ twidCoefModifier
;
726 for (i0
= j
; i0
< fftLen
; i0
+= n1
)
728 /* index calculation for the input as, */
729 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
734 /* Butterfly implementation */
736 r1
= pSrc
[2u * i0
] + pSrc
[2u * i2
];
738 r2
= pSrc
[2u * i0
] - pSrc
[2u * i2
];
741 s1
= pSrc
[(2u * i0
) + 1u] + pSrc
[(2u * i2
) + 1u];
743 s2
= pSrc
[(2u * i0
) + 1u] - pSrc
[(2u * i2
) + 1u];
746 t1
= pSrc
[2u * i1
] + pSrc
[2u * i3
];
748 /* xa' = xa + xb + xc + xd */
749 pSrc
[2u * i0
] = (r1
+ t1
) >> 2u;
750 /* xa + xc -(xb + xd) */
753 t2
= pSrc
[(2u * i1
) + 1u] + pSrc
[(2u * i3
) + 1u];
754 /* ya' = ya + yb + yc + yd */
755 pSrc
[(2u * i0
) + 1u] = (s1
+ t2
) >> 2u;
757 /* (ya + yc) - (yb + yd) */
761 t1
= pSrc
[(2u * i1
) + 1u] - pSrc
[(2u * i3
) + 1u];
763 t2
= pSrc
[2u * i1
] - pSrc
[2u * i3
];
765 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
766 pSrc
[2u * i1
] = (((int32_t) (((q63_t
) r1
* co2
) >> 32u)) -
767 ((int32_t) (((q63_t
) s1
* si2
) >> 32u))) >> 1u;
769 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
770 pSrc
[(2u * i1
) + 1u] =
771 (((int32_t) (((q63_t
) s1
* co2
) >> 32u)) +
772 ((int32_t) (((q63_t
) r1
* si2
) >> 32u))) >> 1u;
774 /* (xa - xc) - (yb - yd) */
776 /* (xa - xc) + (yb - yd) */
779 /* (ya - yc) + (xb - xd) */
781 /* (ya - yc) - (xb - xd) */
784 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
785 pSrc
[2u * i2
] = (((int32_t) (((q63_t
) r1
* co1
) >> 32)) -
786 ((int32_t) (((q63_t
) s1
* si1
) >> 32))) >> 1u;
788 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
789 pSrc
[(2u * i2
) + 1u] = (((int32_t) (((q63_t
) s1
* co1
) >> 32)) +
790 ((int32_t) (((q63_t
) r1
* si1
) >> 32))) >> 1u;
792 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
793 pSrc
[(2u * i3
)] = (((int32_t) (((q63_t
) r2
* co3
) >> 32)) -
794 ((int32_t) (((q63_t
) s2
* si3
) >> 32))) >> 1u;
796 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
797 pSrc
[(2u * i3
) + 1u] = (((int32_t) (((q63_t
) s2
* co3
) >> 32)) +
798 ((int32_t) (((q63_t
) r2
* si3
) >> 32))) >> 1u;
801 twidCoefModifier
<<= 2u;
804 /* End of Middle stages process */
806 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
807 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
808 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
809 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
812 /* Start of last stage process */
815 /* Initializations for the last stage */
819 /* Calculations of last stage */
822 #ifndef ARM_MATH_BIG_ENDIAN
823 /* Read xa (real), ya(imag) input */
824 xaya
= *__SIMD64(ptr1
)++;
826 ya
= (q31_t
) (xaya
>> 32);
828 /* Read xb (real), yb(imag) input */
829 xbyb
= *__SIMD64(ptr1
)++;
831 yb
= (q31_t
) (xbyb
>> 32);
833 /* Read xc (real), yc(imag) input */
834 xcyc
= *__SIMD64(ptr1
)++;
836 yc
= (q31_t
) (xcyc
>> 32);
838 /* Read xc (real), yc(imag) input */
839 xdyd
= *__SIMD64(ptr1
)++;
841 yd
= (q31_t
) (xdyd
>> 32);
845 /* Read xa (real), ya(imag) input */
846 xaya
= *__SIMD64(ptr1
)++;
848 xa
= (q31_t
) (xaya
>> 32);
850 /* Read xb (real), yb(imag) input */
851 xbyb
= *__SIMD64(ptr1
)++;
853 xb
= (q31_t
) (xbyb
>> 32);
855 /* Read xc (real), yc(imag) input */
856 xcyc
= *__SIMD64(ptr1
)++;
858 xc
= (q31_t
) (xcyc
>> 32);
860 /* Read xc (real), yc(imag) input */
861 xdyd
= *__SIMD64(ptr1
)++;
863 xd
= (q31_t
) (xdyd
>> 32);
868 /* xa' = xa + xb + xc + xd */
869 xa_out
= xa
+ xb
+ xc
+ xd
;
871 /* ya' = ya + yb + yc + yd */
872 ya_out
= ya
+ yb
+ yc
+ yd
;
874 /* pointer updation for writing */
877 /* writing xa' and ya' */
881 xc_out
= (xa
- xb
+ xc
- xd
);
882 yc_out
= (ya
- yb
+ yc
- yd
);
884 /* writing xc' and yc' */
888 xb_out
= (xa
- yb
- xc
+ yd
);
889 yb_out
= (ya
+ xb
- yc
- xd
);
891 /* writing xb' and yb' */
895 xd_out
= (xa
+ yb
- xc
- yd
);
896 yd_out
= (ya
- xb
- yc
+ xd
);
898 /* writing xd' and yd' */
905 /* output is in 11.21(q21) format for the 1024 point */
906 /* output is in 9.23(q23) format for the 256 point */
907 /* output is in 7.25(q25) format for the 64 point */
908 /* output is in 5.27(q27) format for the 16 point */
910 /* End of last stage process */