tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_f32.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_cfft_f32.c
   9 *
  10 * Description:  Combined Radix Decimation in Frequency CFFT Floating point processing function
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Redistribution and use in source and binary forms, with or without
  15 * modification, are permitted provided that the following conditions
  16 * are met:
  17 *   - Redistributions of source code must retain the above copyright
  18 *     notice, this list of conditions and the following disclaimer.
  19 *   - Redistributions in binary form must reproduce the above copyright
  20 *     notice, this list of conditions and the following disclaimer in
  21 *     the documentation and/or other materials provided with the
  22 *     distribution.
  23 *   - Neither the name of ARM LIMITED nor the names of its contributors
  24 *     may be used to endorse or promote products derived from this
  25 *     software without specific prior written permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 * POSSIBILITY OF SUCH DAMAGE.
  39 * -------------------------------------------------------------------- */
  40
  41
  42 #include "arm_math.h"
  43 #include "arm_common_tables.h"
  44
  45 extern void arm_radix8_butterfly_f32(
  46   float32_t * pSrc,
  47   uint16_t fftLen,
  48   const float32_t * pCoef,
  49   uint16_t twidCoefModifier);
  50
  51 extern void arm_bitreversal_32(
  52                 uint32_t * pSrc,
  53                 const uint16_t bitRevLen,
  54                 const uint16_t * pBitRevTable);
  55
  56 /**
  57 * @ingroup groupTransforms
  58 */
  59
  60 /**
  61 * @defgroup ComplexFFT Complex FFT Functions
  62 *
  63 * \par
  64 * The Fast Fourier Transform (FFT) is an efficient algorithm for computing the
  65 * Discrete Fourier Transform (DFT).  The FFT can be orders of magnitude faster
  66 * than the DFT, especially for long lengths.
  67 * The algorithms described in this section
  68 * operate on complex data.  A separate set of functions is devoted to handling
  69 * of real sequences.
  70 * \par
  71 * There are separate algorithms for handling floating-point, Q15, and Q31 data
  72 * types.  The algorithms available for each data type are described next.
  73 * \par
  74 * The FFT functions operate in-place.  That is, the array holding the input data
  75 * will also be used to hold the corresponding result.  The input data is complex
  76 * and contains <code>2*fftLen</code> interleaved values as shown below.
  77 * <pre> {real[0], imag[0], real[1], imag[1],..} </pre>
  78 * The FFT result will be contained in the same array and the frequency domain
  79 * values will have the same interleaving.
  80 *
  81 * \par Floating-point
  82 * The floating-point complex FFT uses a mixed-radix algorithm.  Multiple radix-8
  83 * stages are performed along with a single radix-2 or radix-4 stage, as needed.
  84 * The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
  85 * a different twiddle factor table.
  86 * \par
  87 * The function uses the standard FFT definition and output values may grow by a
  88 * factor of <code>fftLen</code> when computing the forward transform.  The
  89 * inverse transform includes a scale of <code>1/fftLen</code> as part of the
  90 * calculation and this matches the textbook definition of the inverse FFT.
  91 * \par
  92 * Preinitialized data structures containing twiddle factors and bit reversal
  93 * tables are provided and defined in <code>arm_const_structs.h</code>.  Include
  94 * this header in your function and then pass one of the constant structures as
  95 * an argument to arm_cfft_f32.  For example:
  96 * \par
  97 * <code>arm_cfft_f32(arm_cfft_sR_f32_len64, pSrc, 1, 1)</code>
  98 * \par
  99 * computes a 64-point inverse complex FFT including bit reversal.
 100 * The data structures are treated as constant data and not modified during the
 101 * calculation.  The same data structure can be reused for multiple transforms
 102 * including mixing forward and inverse transforms.
 103 * \par
 104 * Earlier releases of the library provided separate radix-2 and radix-4
 105 * algorithms that operated on floating-point data.  These functions are still
 106 * provided but are deprecated.  The older functions are slower and less general
 107 * than the new functions.
 108 * \par
 109 * An example of initialization of the constants for the arm_cfft_f32 function follows:
 110 * \par
 111 * const static arm_cfft_instance_f32 *S;
 112 * ...
 113 *               switch (length) {
 114 *               case 16:
 115 *                       S = & arm_cfft_sR_f32_len16;
 116 *               break;
 117 *               case 32:
 118 *                       S = & arm_cfft_sR_f32_len32;
 119 *               break;
 120 *                       case 64:
 121 *                       S = & arm_cfft_sR_f32_len64;
 122 *               break;
 123 *               case 128:
 124 *                       S = & arm_cfft_sR_f32_len128;
 125 *               break;
 126 *               case 256:
 127 *                       S = & arm_cfft_sR_f32_len256;
 128 *               break;
 129 *               case 512:
 130 *                       S = & arm_cfft_sR_f32_len512;
 131 *               break;
 132 *               case 1024:
 133 *                       S = & arm_cfft_sR_f32_len1024;
 134 *               break;
 135 *               case 2048:
 136 *                       S = & arm_cfft_sR_f32_len2048;
 137 *               break;
 138 *               case 4096:
 139 *                       S = & arm_cfft_sR_f32_len4096;
 140 *               break;
 141 *                       }
 142 * \par Q15 and Q31
 143 * The library provides radix-2 and radix-4 FFT algorithms for fixed-point data.  The
 144 * radix-2 algorithm supports lengths of [16, 32, 64, ..., 4096].  The radix-4
 145 * algorithm supports lengths of [16, 64, 256, ..., 4096].  When possible, you
 146 * should use the radix-4 algorithm since it is faster than the radix-2 of the
 147 * same length.
 148 * \par
 149 * The forward FFTs include scaling in order to prevent results from overflowing.
 150 * Intermediate results are scaled down during each butterfly stage.  In the
 151 * radix-2 algorithm, a scale of 0.5 is applied during each butterfly.  In the
 152 * radix-4 algorithm, a scale of 0.25 is applied.  The scaling applies to both
 153 * the forward and the inverse FFTs.  Thus the forward FFT contains an additional
 154 * scale factor of <code>1/fftLen</code> as compared to the standard textbook
 155 * definition of the FFT.  The inverse FFT also scales down during each butterfly
 156 * stage and this corresponds to the standard textbook definition.
 157 * \par
 158 * A separate instance structure must be defined for each transform used but
 159 * twiddle factor and bit reversal tables can be reused.
 160 * \par
 161 * There is also an associated initialization function for each data type.
 162 * The initialization function performs the following operations:
 163 * - Sets the values of the internal structure fields.
 164 * - Initializes twiddle factor table and bit reversal table pointers.
 165 * \par
 166 * Use of the initialization function is optional.
 167 * However, if the initialization function is used, then the instance structure
 168 * cannot be placed into a const data section. To place an instance structure
 169 * into a const data section, the instance structure should be manually
 170 * initialized as follows:
 171 * <pre>
 172 *arm_cfft_radix2_instance_q31 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};
 173 *arm_cfft_radix2_instance_q15 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};
 174 *arm_cfft_radix4_instance_q31 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};
 175 *arm_cfft_radix4_instance_q15 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};
 176 *arm_cfft_instance_f32 S = {fftLen, pTwiddle, pBitRevTable, bitRevLength};
 177 * </pre>
 178 * \par
 179 * where <code>fftLen</code> length of CFFT/CIFFT; <code>ifftFlag</code> Flag for
 180 * selection of forward or inverse transform.  When ifftFlag is set the inverse
 181 * transform is calculated.
 182 * <code>bitReverseFlag</code> Flag for selection of output order (Set bitReverseFlag to output in normal order otherwise output in bit reversed order);
 183 * <code>pTwiddle</code>points to array of twiddle coefficients; <code>pBitRevTable</code> points to the bit reversal table.
 184 * <code>twidCoefModifier</code> modifier for twiddle factor table which supports all FFT lengths with same table;
 185 * <code>pBitRevTable</code> modifier for bit reversal table which supports all FFT lengths with same table.
 186 * <code>onebyfftLen</code> value of 1/fftLen to calculate CIFFT;
 187 * \par
 188 * The Q15 and Q31 FFT functions use a large bit reversal and twiddle factor
 189 * table.  The tables are defined for the maximum length transform and a subset
 190 * of the coefficients are used in shorter transforms.
 191 *
 192 */
 193
 194 void arm_cfft_radix8by2_f32( arm_cfft_instance_f32 * S, float32_t * p1)
 195 {
 196    uint32_t    L  = S->fftLen;
 197    float32_t * pCol1, * pCol2, * pMid1, * pMid2;
 198    float32_t * p2 = p1 + L;
 199    const float32_t * tw = (float32_t *) S->pTwiddle;
 200    float32_t t1[4], t2[4], t3[4], t4[4], twR, twI;
 201    float32_t m0, m1, m2, m3;
 202    uint32_t l;
 203
 204    pCol1 = p1;
 205    pCol2 = p2;
 206
 207    //    Define new length
 208    L >>= 1;
 209    //    Initialize mid pointers
 210    pMid1 = p1 + L;
 211    pMid2 = p2 + L;
 212
 213    // do two dot Fourier transform
 214    for ( l = L >> 2; l > 0; l-- )
 215    {
 216       t1[0] = p1[0];
 217       t1[1] = p1[1];
 218       t1[2] = p1[2];
 219       t1[3] = p1[3];
 220
 221       t2[0] = p2[0];
 222       t2[1] = p2[1];
 223       t2[2] = p2[2];
 224       t2[3] = p2[3];
 225
 226       t3[0] = pMid1[0];
 227       t3[1] = pMid1[1];
 228       t3[2] = pMid1[2];
 229       t3[3] = pMid1[3];
 230
 231       t4[0] = pMid2[0];
 232       t4[1] = pMid2[1];
 233       t4[2] = pMid2[2];
 234       t4[3] = pMid2[3];
 235
 236       *p1++ = t1[0] + t2[0];
 237       *p1++ = t1[1] + t2[1];
 238       *p1++ = t1[2] + t2[2];
 239       *p1++ = t1[3] + t2[3];    // col 1
 240
 241       t2[0] = t1[0] - t2[0];
 242       t2[1] = t1[1] - t2[1];
 243       t2[2] = t1[2] - t2[2];
 244       t2[3] = t1[3] - t2[3];    // for col 2
 245
 246       *pMid1++ = t3[0] + t4[0];
 247       *pMid1++ = t3[1] + t4[1];
 248       *pMid1++ = t3[2] + t4[2];
 249       *pMid1++ = t3[3] + t4[3]; // col 1
 250
 251       t4[0] = t4[0] - t3[0];
 252       t4[1] = t4[1] - t3[1];
 253       t4[2] = t4[2] - t3[2];
 254       t4[3] = t4[3] - t3[3];    // for col 2
 255
 256       twR = *tw++;
 257       twI = *tw++;
 258
 259       // multiply by twiddle factors
 260       m0 = t2[0] * twR;
 261       m1 = t2[1] * twI;
 262       m2 = t2[1] * twR;
 263       m3 = t2[0] * twI;
 264
 265       // R  =  R  *  Tr - I * Ti
 266       *p2++ = m0 + m1;
 267       // I  =  I  *  Tr + R * Ti
 268       *p2++ = m2 - m3;
 269
 270       // use vertical symmetry
 271           //  0.9988 - 0.0491i <==> -0.0491 - 0.9988i
 272       m0 = t4[0] * twI;
 273       m1 = t4[1] * twR;
 274       m2 = t4[1] * twI;
 275       m3 = t4[0] * twR;
 276
 277       *pMid2++ = m0 - m1;
 278       *pMid2++ = m2 + m3;
 279
 280       twR = *tw++;
 281       twI = *tw++;
 282
 283       m0 = t2[2] * twR;
 284       m1 = t2[3] * twI;
 285       m2 = t2[3] * twR;
 286       m3 = t2[2] * twI;
 287
 288       *p2++ = m0 + m1;
 289       *p2++ = m2 - m3;
 290
 291       m0 = t4[2] * twI;
 292       m1 = t4[3] * twR;
 293       m2 = t4[3] * twI;
 294       m3 = t4[2] * twR;
 295
 296       *pMid2++ = m0 - m1;
 297       *pMid2++ = m2 + m3;
 298    }
 299
 300    // first col
 301    arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 2u);
 302    // second col
 303    arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 2u);
 304
 305 }
 306
 307 void arm_cfft_radix8by4_f32( arm_cfft_instance_f32 * S, float32_t * p1)
 308 {
 309    uint32_t    L  = S->fftLen >> 1;
 310    float32_t * pCol1, *pCol2, *pCol3, *pCol4, *pEnd1, *pEnd2, *pEnd3, *pEnd4;
 311         const float32_t *tw2, *tw3, *tw4;
 312    float32_t * p2 = p1 + L;
 313    float32_t * p3 = p2 + L;
 314    float32_t * p4 = p3 + L;
 315    float32_t t2[4], t3[4], t4[4], twR, twI;
 316    float32_t p1ap3_0, p1sp3_0, p1ap3_1, p1sp3_1;
 317    float32_t m0, m1, m2, m3;
 318    uint32_t l, twMod2, twMod3, twMod4;
 319
 320    pCol1 = p1;         // points to real values by default
 321    pCol2 = p2;
 322    pCol3 = p3;
 323    pCol4 = p4;
 324    pEnd1 = p2 - 1;     // points to imaginary values by default
 325    pEnd2 = p3 - 1;
 326    pEnd3 = p4 - 1;
 327    pEnd4 = pEnd3 + L;
 328
 329    tw2 = tw3 = tw4 = (float32_t *) S->pTwiddle;
 330
 331    L >>= 1;
 332
 333    // do four dot Fourier transform
 334
 335    twMod2 = 2;
 336    twMod3 = 4;
 337    twMod4 = 6;
 338
 339    // TOP
 340    p1ap3_0 = p1[0] + p3[0];
 341    p1sp3_0 = p1[0] - p3[0];
 342    p1ap3_1 = p1[1] + p3[1];
 343    p1sp3_1 = p1[1] - p3[1];
 344
 345    // col 2
 346    t2[0] = p1sp3_0 + p2[1] - p4[1];
 347    t2[1] = p1sp3_1 - p2[0] + p4[0];
 348    // col 3
 349    t3[0] = p1ap3_0 - p2[0] - p4[0];
 350    t3[1] = p1ap3_1 - p2[1] - p4[1];
 351    // col 4
 352    t4[0] = p1sp3_0 - p2[1] + p4[1];
 353    t4[1] = p1sp3_1 + p2[0] - p4[0];
 354    // col 1
 355    *p1++ = p1ap3_0 + p2[0] + p4[0];
 356    *p1++ = p1ap3_1 + p2[1] + p4[1];
 357
 358    // Twiddle factors are ones
 359    *p2++ = t2[0];
 360    *p2++ = t2[1];
 361    *p3++ = t3[0];
 362    *p3++ = t3[1];
 363    *p4++ = t4[0];
 364    *p4++ = t4[1];
 365
 366    tw2 += twMod2;
 367    tw3 += twMod3;
 368    tw4 += twMod4;
 369
 370    for (l = (L - 2) >> 1; l > 0; l-- )
 371    {
 372
 373       // TOP
 374       p1ap3_0 = p1[0] + p3[0];
 375       p1sp3_0 = p1[0] - p3[0];
 376       p1ap3_1 = p1[1] + p3[1];
 377       p1sp3_1 = p1[1] - p3[1];
 378       // col 2
 379       t2[0] = p1sp3_0 + p2[1] - p4[1];
 380       t2[1] = p1sp3_1 - p2[0] + p4[0];
 381       // col 3
 382       t3[0] = p1ap3_0 - p2[0] - p4[0];
 383       t3[1] = p1ap3_1 - p2[1] - p4[1];
 384       // col 4
 385       t4[0] = p1sp3_0 - p2[1] + p4[1];
 386       t4[1] = p1sp3_1 + p2[0] - p4[0];
 387       // col 1 - top
 388       *p1++ = p1ap3_0 + p2[0] + p4[0];
 389       *p1++ = p1ap3_1 + p2[1] + p4[1];
 390
 391       // BOTTOM
 392       p1ap3_1 = pEnd1[-1] + pEnd3[-1];
 393       p1sp3_1 = pEnd1[-1] - pEnd3[-1];
 394       p1ap3_0 = pEnd1[0] + pEnd3[0];
 395       p1sp3_0 = pEnd1[0] - pEnd3[0];
 396       // col 2
 397       t2[2] = pEnd2[0]  - pEnd4[0] + p1sp3_1;
 398       t2[3] = pEnd1[0] - pEnd3[0] - pEnd2[-1] + pEnd4[-1];
 399       // col 3
 400       t3[2] = p1ap3_1 - pEnd2[-1] - pEnd4[-1];
 401       t3[3] = p1ap3_0 - pEnd2[0]  - pEnd4[0];
 402       // col 4
 403       t4[2] = pEnd2[0]  - pEnd4[0]  - p1sp3_1;
 404       t4[3] = pEnd4[-1] - pEnd2[-1] - p1sp3_0;
 405       // col 1 - Bottom
 406       *pEnd1-- = p1ap3_0 + pEnd2[0] + pEnd4[0];
 407       *pEnd1-- = p1ap3_1 + pEnd2[-1] + pEnd4[-1];
 408
 409       // COL 2
 410       // read twiddle factors
 411       twR = *tw2++;
 412       twI = *tw2++;
 413       // multiply by twiddle factors
 414       //  let    Z1 = a + i(b),   Z2 = c + i(d)
 415       //   =>  Z1 * Z2  =  (a*c - b*d) + i(b*c + a*d)
 416       // Top
 417       m0 = t2[0] * twR;
 418       m1 = t2[1] * twI;
 419       m2 = t2[1] * twR;
 420       m3 = t2[0] * twI;
 421
 422       *p2++ = m0 + m1;
 423       *p2++ = m2 - m3;
 424       // use vertical symmetry col 2
 425       // 0.9997 - 0.0245i  <==>  0.0245 - 0.9997i
 426       // Bottom
 427       m0 = t2[3] * twI;
 428       m1 = t2[2] * twR;
 429       m2 = t2[2] * twI;
 430       m3 = t2[3] * twR;
 431
 432       *pEnd2-- = m0 - m1;
 433       *pEnd2-- = m2 + m3;
 434
 435       // COL 3
 436       twR = tw3[0];
 437       twI = tw3[1];
 438       tw3 += twMod3;
 439       // Top
 440       m0 = t3[0] * twR;
 441       m1 = t3[1] * twI;
 442       m2 = t3[1] * twR;
 443       m3 = t3[0] * twI;
 444
 445       *p3++ = m0 + m1;
 446       *p3++ = m2 - m3;
 447       // use vertical symmetry col 3
 448       // 0.9988 - 0.0491i  <==>  -0.9988 - 0.0491i
 449       // Bottom
 450       m0 = -t3[3] * twR;
 451       m1 = t3[2] * twI;
 452       m2 = t3[2] * twR;
 453       m3 = t3[3] * twI;
 454
 455       *pEnd3-- = m0 - m1;
 456       *pEnd3-- = m3 - m2;
 457
 458       // COL 4
 459       twR = tw4[0];
 460       twI = tw4[1];
 461       tw4 += twMod4;
 462       // Top
 463       m0 = t4[0] * twR;
 464       m1 = t4[1] * twI;
 465       m2 = t4[1] * twR;
 466       m3 = t4[0] * twI;
 467
 468       *p4++ = m0 + m1;
 469       *p4++ = m2 - m3;
 470       // use vertical symmetry col 4
 471       // 0.9973 - 0.0736i  <==>  -0.0736 + 0.9973i
 472       // Bottom
 473       m0 = t4[3] * twI;
 474       m1 = t4[2] * twR;
 475       m2 = t4[2] * twI;
 476       m3 = t4[3] * twR;
 477
 478       *pEnd4-- = m0 - m1;
 479       *pEnd4-- = m2 + m3;
 480    }
 481
 482    //MIDDLE
 483    // Twiddle factors are
 484    //  1.0000  0.7071-0.7071i  -1.0000i  -0.7071-0.7071i
 485    p1ap3_0 = p1[0] + p3[0];
 486    p1sp3_0 = p1[0] - p3[0];
 487    p1ap3_1 = p1[1] + p3[1];
 488    p1sp3_1 = p1[1] - p3[1];
 489
 490    // col 2
 491    t2[0] = p1sp3_0 + p2[1] - p4[1];
 492    t2[1] = p1sp3_1 - p2[0] + p4[0];
 493    // col 3
 494    t3[0] = p1ap3_0 - p2[0] - p4[0];
 495    t3[1] = p1ap3_1 - p2[1] - p4[1];
 496    // col 4
 497    t4[0] = p1sp3_0 - p2[1] + p4[1];
 498    t4[1] = p1sp3_1 + p2[0] - p4[0];
 499    // col 1 - Top
 500    *p1++ = p1ap3_0 + p2[0] + p4[0];
 501    *p1++ = p1ap3_1 + p2[1] + p4[1];
 502
 503    // COL 2
 504    twR = tw2[0];
 505    twI = tw2[1];
 506
 507    m0 = t2[0] * twR;
 508    m1 = t2[1] * twI;
 509    m2 = t2[1] * twR;
 510    m3 = t2[0] * twI;
 511
 512    *p2++ = m0 + m1;
 513    *p2++ = m2 - m3;
 514       // COL 3
 515    twR = tw3[0];
 516    twI = tw3[1];
 517
 518    m0 = t3[0] * twR;
 519    m1 = t3[1] * twI;
 520    m2 = t3[1] * twR;
 521    m3 = t3[0] * twI;
 522
 523    *p3++ = m0 + m1;
 524    *p3++ = m2 - m3;
 525    // COL 4
 526    twR = tw4[0];
 527    twI = tw4[1];
 528
 529    m0 = t4[0] * twR;
 530    m1 = t4[1] * twI;
 531    m2 = t4[1] * twR;
 532    m3 = t4[0] * twI;
 533
 534    *p4++ = m0 + m1;
 535    *p4++ = m2 - m3;
 536
 537    // first col
 538    arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 4u);
 539    // second col
 540    arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 4u);
 541    // third col
 542    arm_radix8_butterfly_f32( pCol3, L, (float32_t *) S->pTwiddle, 4u);
 543    // fourth col
 544    arm_radix8_butterfly_f32( pCol4, L, (float32_t *) S->pTwiddle, 4u);
 545
 546 }
 547
 548 /**
 549 * @addtogroup ComplexFFT
 550 * @{
 551 */
 552
 553 /**
 554 * @details
 555 * @brief       Processing function for the floating-point complex FFT.
 556 * @param[in]      *S    points to an instance of the floating-point CFFT structure.
 557 * @param[in, out] *p1   points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
 558 * @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
 559 * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
 560 * @return none.
 561 */
 562
 563 void arm_cfft_f32(
 564    const arm_cfft_instance_f32 * S,
 565    float32_t * p1,
 566    uint8_t ifftFlag,
 567    uint8_t bitReverseFlag)
 568 {
 569
 570    uint32_t  L = S->fftLen, l;
 571    float32_t invL, * pSrc;
 572
 573   if(ifftFlag == 1u)
 574   {
 575           /*  Conjugate input data  */
 576           pSrc = p1 + 1;
 577           for(l=0; l<L; l++) {
 578                   *pSrc = -*pSrc;
 579                    pSrc += 2;
 580           }
 581   }
 582
 583                 switch (L) {
 584                 case 16:
 585                 case 128:
 586                 case 1024:
 587                          arm_cfft_radix8by2_f32  ( (arm_cfft_instance_f32 *) S, p1);
 588                          break;
 589                 case 32:
 590                 case 256:
 591                 case 2048:
 592                          arm_cfft_radix8by4_f32  ( (arm_cfft_instance_f32 *) S, p1);
 593                          break;
 594                 case 64:
 595                 case 512:
 596                 case 4096:
 597           arm_radix8_butterfly_f32( p1, L, (float32_t *) S->pTwiddle, 1);
 598                          break;
 599                 }
 600
 601         if( bitReverseFlag )
 602                 arm_bitreversal_32((uint32_t*)p1,S->bitRevLength,S->pBitRevTable);
 603
 604   if(ifftFlag == 1u)
 605   {
 606           invL = 1.0f/(float32_t)L;
 607           /*  Conjugate and scale output data */
 608           pSrc = p1;
 609           for(l=0; l<L; l++) {
 610                  *pSrc++ *=   invL ;
 611                  *pSrc  = -(*pSrc) * invL;
 612                  pSrc++;
 613           }
 614   }
 615 }
 616