tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix8_f32.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_cfft_radix8_f32.c
   9 *
  10 * Description:  Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Redistribution and use in source and binary forms, with or without
  15 * modification, are permitted provided that the following conditions
  16 * are met:
  17 *   - Redistributions of source code must retain the above copyright
  18 *     notice, this list of conditions and the following disclaimer.
  19 *   - Redistributions in binary form must reproduce the above copyright
  20 *     notice, this list of conditions and the following disclaimer in
  21 *     the documentation and/or other materials provided with the
  22 *     distribution.
  23 *   - Neither the name of ARM LIMITED nor the names of its contributors
  24 *     may be used to endorse or promote products derived from this
  25 *     software without specific prior written permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 * POSSIBILITY OF SUCH DAMAGE.
  39 * -------------------------------------------------------------------- */
  40
  41 #include "arm_math.h"
  42
  43 /**
  44 * @ingroup groupTransforms
  45 */
  46
  47 /**
  48 * @defgroup Radix8_CFFT_CIFFT Radix-8 Complex FFT Functions
  49 *
  50 * \par
  51 * Complex Fast Fourier Transform(CFFT) and Complex Inverse Fast Fourier Transform(CIFFT) is an efficient algorithm to compute Discrete Fourier Transform(DFT) and Inverse Discrete Fourier Transform(IDFT).
  52 * Computational complexity of CFFT reduces drastically when compared to DFT.
  53 * \par
  54 * This set of functions implements CFFT/CIFFT
  55 * for floating-point data types.  The functions operates on in-place buffer which uses same buffer for input and output.
  56 * Complex input is stored in input buffer in an interleaved fashion.
  57 *
  58 * \par
  59 * The functions operate on blocks of input and output data and each call to the function processes
  60 * <code>2*fftLen</code> samples through the transform.  <code>pSrc</code>  points to In-place arrays containing <code>2*fftLen</code> values.
  61 * \par
  62 * The <code>pSrc</code> points to the array of in-place buffer of size <code>2*fftLen</code> and inputs and outputs are stored in an interleaved fashion as shown below.
  63 * <pre> {real[0], imag[0], real[1], imag[1],..} </pre>
  64 *
  65 * \par Lengths supported by the transform:
  66 * \par
  67 * Internally, the function utilize a Radix-8 decimation in frequency(DIF) algorithm
  68 * and the size of the FFT supported are of the lengths [ 64, 512, 4096].
  69 *
  70 *
  71 * \par Algorithm:
  72 *
  73 * <b>Complex Fast Fourier Transform:</b>
  74 * \par
  75 * Input real and imaginary data:
  76 * <pre>
  77 * x(n) = xa + j * ya
  78 * x(n+N/4 ) = xb + j * yb
  79 * x(n+N/2 ) = xc + j * yc
  80 * x(n+3N 4) = xd + j * yd
  81 * </pre>
  82 * where N is length of FFT
  83 * \par
  84 * Output real and imaginary data:
  85 * <pre>
  86 * X(4r) = xa'+ j * ya'
  87 * X(4r+1) = xb'+ j * yb'
  88 * X(4r+2) = xc'+ j * yc'
  89 * X(4r+3) = xd'+ j * yd'
  90 * </pre>
  91 * \par
  92 * Twiddle factors for Radix-8 FFT:
  93 * <pre>
  94 * Wn = co1 + j * (- si1)
  95 * W2n = co2 + j * (- si2)
  96 * W3n = co3 + j * (- si3)
  97 * </pre>
  98 *
  99 * \par
 100 * \image html CFFT.gif "Radix-8 Decimation-in Frequency Complex Fast Fourier Transform"
 101 *
 102 * \par
 103 * Output from Radix-8 CFFT Results in Digit reversal order. Interchange middle two branches of every butterfly results in Bit reversed output.
 104 * \par
 105 * <b> Butterfly CFFT equations:</b>
 106 * <pre>
 107 * xa' = xa + xb + xc + xd
 108 * ya' = ya + yb + yc + yd
 109 * xc' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
 110 * yc' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
 111 * xb' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
 112 * yb' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
 113 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
 114 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
 115 * </pre>
 116 *
 117 * \par
 118 * where <code>fftLen</code> length of CFFT/CIFFT; <code>ifftFlag</code> Flag for selection of CFFT or CIFFT(Set ifftFlag to calculate CIFFT otherwise calculates CFFT);
 119 * <code>bitReverseFlag</code> Flag for selection of output order(Set bitReverseFlag to output in normal order otherwise output in bit reversed order);
 120 * <code>pTwiddle</code>points to array of twiddle coefficients; <code>pBitRevTable</code> points to the array of bit reversal table.
 121 * <code>twidCoefModifier</code> modifier for twiddle factor table which supports all FFT lengths with same table;
 122 * <code>pBitRevTable</code> modifier for bit reversal table which supports all FFT lengths with same table.
 123 * <code>onebyfftLen</code> value of 1/fftLen to calculate CIFFT;
 124 *
 125 * \par Fixed-Point Behavior
 126 * Care must be taken when using the fixed-point versions of the CFFT/CIFFT function.
 127 * Refer to the function specific documentation below for usage guidelines.
 128 */
 129
 130
 131 /*
 132 * @brief  Core function for the floating-point CFFT butterfly process.
 133 * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
 134 * @param[in]      fftLen           length of the FFT.
 135 * @param[in]      *pCoef           points to the twiddle coefficient buffer.
 136 * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
 137 * @return none.
 138 */
 139
 140 void arm_radix8_butterfly_f32(
 141 float32_t * pSrc,
 142 uint16_t fftLen,
 143 const float32_t * pCoef,
 144 uint16_t twidCoefModifier)
 145 {
 146    uint32_t ia1, ia2, ia3, ia4, ia5, ia6, ia7;
 147    uint32_t i1, i2, i3, i4, i5, i6, i7, i8;
 148    uint32_t id;
 149    uint32_t n1, n2, j;
 150
 151    float32_t r1, r2, r3, r4, r5, r6, r7, r8;
 152    float32_t t1, t2;
 153    float32_t s1, s2, s3, s4, s5, s6, s7, s8;
 154    float32_t p1, p2, p3, p4;
 155    float32_t co2, co3, co4, co5, co6, co7, co8;
 156    float32_t si2, si3, si4, si5, si6, si7, si8;
 157    const float32_t C81 = 0.70710678118f;
 158
 159    n2 = fftLen;
 160
 161    do
 162    {
 163       n1 = n2;
 164       n2 = n2 >> 3;
 165       i1 = 0;
 166
 167       do
 168       {
 169          i2 = i1 + n2;
 170          i3 = i2 + n2;
 171          i4 = i3 + n2;
 172          i5 = i4 + n2;
 173          i6 = i5 + n2;
 174          i7 = i6 + n2;
 175          i8 = i7 + n2;
 176          r1 = pSrc[2 * i1] + pSrc[2 * i5];
 177          r5 = pSrc[2 * i1] - pSrc[2 * i5];
 178          r2 = pSrc[2 * i2] + pSrc[2 * i6];
 179          r6 = pSrc[2 * i2] - pSrc[2 * i6];
 180          r3 = pSrc[2 * i3] + pSrc[2 * i7];
 181          r7 = pSrc[2 * i3] - pSrc[2 * i7];
 182          r4 = pSrc[2 * i4] + pSrc[2 * i8];
 183          r8 = pSrc[2 * i4] - pSrc[2 * i8];
 184          t1 = r1 - r3;
 185          r1 = r1 + r3;
 186          r3 = r2 - r4;
 187          r2 = r2 + r4;
 188          pSrc[2 * i1] = r1 + r2;
 189          pSrc[2 * i5] = r1 - r2;
 190          r1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
 191          s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
 192          r2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
 193          s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
 194          s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
 195          s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
 196          r4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
 197          s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
 198          t2 = r1 - s3;
 199          r1 = r1 + s3;
 200          s3 = r2 - r4;
 201          r2 = r2 + r4;
 202          pSrc[2 * i1 + 1] = r1 + r2;
 203          pSrc[2 * i5 + 1] = r1 - r2;
 204          pSrc[2 * i3]     = t1 + s3;
 205          pSrc[2 * i7]     = t1 - s3;
 206          pSrc[2 * i3 + 1] = t2 - r3;
 207          pSrc[2 * i7 + 1] = t2 + r3;
 208          r1 = (r6 - r8) * C81;
 209          r6 = (r6 + r8) * C81;
 210          r2 = (s6 - s8) * C81;
 211          s6 = (s6 + s8) * C81;
 212          t1 = r5 - r1;
 213          r5 = r5 + r1;
 214          r8 = r7 - r6;
 215          r7 = r7 + r6;
 216          t2 = s5 - r2;
 217          s5 = s5 + r2;
 218          s8 = s7 - s6;
 219          s7 = s7 + s6;
 220          pSrc[2 * i2]     = r5 + s7;
 221          pSrc[2 * i8]     = r5 - s7;
 222          pSrc[2 * i6]     = t1 + s8;
 223          pSrc[2 * i4]     = t1 - s8;
 224          pSrc[2 * i2 + 1] = s5 - r7;
 225          pSrc[2 * i8 + 1] = s5 + r7;
 226          pSrc[2 * i6 + 1] = t2 - r8;
 227          pSrc[2 * i4 + 1] = t2 + r8;
 228
 229          i1 += n1;
 230       } while(i1 < fftLen);
 231
 232       if(n2 < 8)
 233          break;
 234
 235       ia1 = 0;
 236       j = 1;
 237
 238       do
 239       {
 240          /*  index calculation for the coefficients */
 241          id  = ia1 + twidCoefModifier;
 242          ia1 = id;
 243          ia2 = ia1 + id;
 244          ia3 = ia2 + id;
 245          ia4 = ia3 + id;
 246          ia5 = ia4 + id;
 247          ia6 = ia5 + id;
 248          ia7 = ia6 + id;
 249
 250          co2 = pCoef[2 * ia1];
 251          co3 = pCoef[2 * ia2];
 252          co4 = pCoef[2 * ia3];
 253          co5 = pCoef[2 * ia4];
 254          co6 = pCoef[2 * ia5];
 255          co7 = pCoef[2 * ia6];
 256          co8 = pCoef[2 * ia7];
 257          si2 = pCoef[2 * ia1 + 1];
 258          si3 = pCoef[2 * ia2 + 1];
 259          si4 = pCoef[2 * ia3 + 1];
 260          si5 = pCoef[2 * ia4 + 1];
 261          si6 = pCoef[2 * ia5 + 1];
 262          si7 = pCoef[2 * ia6 + 1];
 263          si8 = pCoef[2 * ia7 + 1];
 264
 265          i1 = j;
 266
 267          do
 268          {
 269             /*  index calculation for the input */
 270             i2 = i1 + n2;
 271             i3 = i2 + n2;
 272             i4 = i3 + n2;
 273             i5 = i4 + n2;
 274             i6 = i5 + n2;
 275             i7 = i6 + n2;
 276             i8 = i7 + n2;
 277             r1 = pSrc[2 * i1] + pSrc[2 * i5];
 278             r5 = pSrc[2 * i1] - pSrc[2 * i5];
 279             r2 = pSrc[2 * i2] + pSrc[2 * i6];
 280             r6 = pSrc[2 * i2] - pSrc[2 * i6];
 281             r3 = pSrc[2 * i3] + pSrc[2 * i7];
 282             r7 = pSrc[2 * i3] - pSrc[2 * i7];
 283             r4 = pSrc[2 * i4] + pSrc[2 * i8];
 284             r8 = pSrc[2 * i4] - pSrc[2 * i8];
 285             t1 = r1 - r3;
 286             r1 = r1 + r3;
 287             r3 = r2 - r4;
 288             r2 = r2 + r4;
 289             pSrc[2 * i1] = r1 + r2;
 290             r2 = r1 - r2;
 291             s1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
 292             s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
 293             s2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
 294             s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
 295             s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
 296             s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
 297             s4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
 298             s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
 299             t2 = s1 - s3;
 300             s1 = s1 + s3;
 301             s3 = s2 - s4;
 302             s2 = s2 + s4;
 303             r1 = t1 + s3;
 304             t1 = t1 - s3;
 305             pSrc[2 * i1 + 1] = s1 + s2;
 306             s2 = s1 - s2;
 307             s1 = t2 - r3;
 308             t2 = t2 + r3;
 309             p1 = co5 * r2;
 310             p2 = si5 * s2;
 311             p3 = co5 * s2;
 312             p4 = si5 * r2;
 313             pSrc[2 * i5]     = p1 + p2;
 314             pSrc[2 * i5 + 1] = p3 - p4;
 315             p1 = co3 * r1;
 316             p2 = si3 * s1;
 317             p3 = co3 * s1;
 318             p4 = si3 * r1;
 319             pSrc[2 * i3]     = p1 + p2;
 320             pSrc[2 * i3 + 1] = p3 - p4;
 321             p1 = co7 * t1;
 322             p2 = si7 * t2;
 323             p3 = co7 * t2;
 324             p4 = si7 * t1;
 325             pSrc[2 * i7]     = p1 + p2;
 326             pSrc[2 * i7 + 1] = p3 - p4;
 327             r1 = (r6 - r8) * C81;
 328             r6 = (r6 + r8) * C81;
 329             s1 = (s6 - s8) * C81;
 330             s6 = (s6 + s8) * C81;
 331             t1 = r5 - r1;
 332             r5 = r5 + r1;
 333             r8 = r7 - r6;
 334             r7 = r7 + r6;
 335             t2 = s5 - s1;
 336             s5 = s5 + s1;
 337             s8 = s7 - s6;
 338             s7 = s7 + s6;
 339             r1 = r5 + s7;
 340             r5 = r5 - s7;
 341             r6 = t1 + s8;
 342             t1 = t1 - s8;
 343             s1 = s5 - r7;
 344             s5 = s5 + r7;
 345             s6 = t2 - r8;
 346             t2 = t2 + r8;
 347             p1 = co2 * r1;
 348             p2 = si2 * s1;
 349             p3 = co2 * s1;
 350             p4 = si2 * r1;
 351             pSrc[2 * i2]     = p1 + p2;
 352             pSrc[2 * i2 + 1] = p3 - p4;
 353             p1 = co8 * r5;
 354             p2 = si8 * s5;
 355             p3 = co8 * s5;
 356             p4 = si8 * r5;
 357             pSrc[2 * i8]     = p1 + p2;
 358             pSrc[2 * i8 + 1] = p3 - p4;
 359             p1 = co6 * r6;
 360             p2 = si6 * s6;
 361             p3 = co6 * s6;
 362             p4 = si6 * r6;
 363             pSrc[2 * i6]     = p1 + p2;
 364             pSrc[2 * i6 + 1] = p3 - p4;
 365             p1 = co4 * t1;
 366             p2 = si4 * t2;
 367             p3 = co4 * t2;
 368             p4 = si4 * t1;
 369             pSrc[2 * i4]     = p1 + p2;
 370             pSrc[2 * i4 + 1] = p3 - p4;
 371
 372             i1 += n1;
 373          } while(i1 < fftLen);
 374
 375          j++;
 376       } while(j < n2);
 377
 378       twidCoefModifier <<= 3;
 379    } while(n2 > 7);
 380 }
 381
 382 /**
 383 * @} end of Radix8_CFFT_CIFFT group
 384 */