tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_fast_f32.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_rfft_f32.c
   9 *
  10 * Description:  RFFT & RIFFT Floating point process function
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Redistribution and use in source and binary forms, with or without
  15 * modification, are permitted provided that the following conditions
  16 * are met:
  17 *   - Redistributions of source code must retain the above copyright
  18 *     notice, this list of conditions and the following disclaimer.
  19 *   - Redistributions in binary form must reproduce the above copyright
  20 *     notice, this list of conditions and the following disclaimer in
  21 *     the documentation and/or other materials provided with the
  22 *     distribution.
  23 *   - Neither the name of ARM LIMITED nor the names of its contributors
  24 *     may be used to endorse or promote products derived from this
  25 *     software without specific prior written permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 * POSSIBILITY OF SUCH DAMAGE.
  39 * -------------------------------------------------------------------- */
  40
  41 #include "arm_math.h"
  42
  43 void stage_rfft_f32(
  44   arm_rfft_fast_instance_f32 * S,
  45   float32_t * p, float32_t * pOut)
  46 {
  47    uint32_t  k;                                                            /* Loop Counter                     */
  48    float32_t twR, twI;                                             /* RFFT Twiddle coefficients        */
  49    float32_t * pCoeff = S->pTwiddleRFFT;  /* Points to RFFT Twiddle factors   */
  50    float32_t *pA = p;                                              /* increasing pointer               */
  51    float32_t *pB = p;                                              /* decreasing pointer               */
  52    float32_t xAR, xAI, xBR, xBI;                                /* temporary variables              */
  53    float32_t t1a, t1b;                                   /* temporary variables              */
  54    float32_t p0, p1, p2, p3;                               /* temporary variables              */
  55
  56
  57    k = (S->Sint).fftLen - 1;
  58
  59    /* Pack first and last sample of the frequency domain together */
  60
  61    xBR = pB[0];
  62    xBI = pB[1];
  63    xAR = pA[0];
  64    xAI = pA[1];
  65
  66    twR = *pCoeff++ ;
  67    twI = *pCoeff++ ;
  68
  69    // U1 = XA(1) + XB(1); % It is real
  70    t1a = xBR + xAR  ;
  71
  72    // U2 = XB(1) - XA(1); % It is imaginary
  73    t1b = xBI + xAI  ;
  74
  75    // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
  76    // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
  77    *pOut++ = 0.5f * ( t1a + t1b );
  78    *pOut++ = 0.5f * ( t1a - t1b );
  79
  80    // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
  81    pB  = p + 2*k;
  82    pA += 2;
  83
  84    do
  85    {
  86       /*
  87          function X = my_split_rfft(X, ifftFlag)
  88          % X is a series of real numbers
  89          L  = length(X);
  90          XC = X(1:2:end) +i*X(2:2:end);
  91          XA = fft(XC);
  92          XB = conj(XA([1 end:-1:2]));
  93          TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
  94          for l = 2:L/2
  95             XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
  96          end
  97          XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
  98          X = XA;
  99       */
 100
 101       xBI = pB[1];
 102       xBR = pB[0];
 103       xAR = pA[0];
 104       xAI = pA[1];
 105
 106       twR = *pCoeff++;
 107       twI = *pCoeff++;
 108
 109       t1a = xBR - xAR ;
 110       t1b = xBI + xAI ;
 111
 112       // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
 113       // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
 114       p0 = twR * t1a;
 115       p1 = twI * t1a;
 116       p2 = twR * t1b;
 117       p3 = twI * t1b;
 118
 119       *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
 120       *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
 121
 122       pA += 2;
 123       pB -= 2;
 124       k--;
 125    } while(k > 0u);
 126 }
 127
 128 /* Prepares data for inverse cfft */
 129 void merge_rfft_f32(
 130 arm_rfft_fast_instance_f32 * S,
 131 float32_t * p, float32_t * pOut)
 132 {
 133    uint32_t  k;                                                         /* Loop Counter                     */
 134    float32_t twR, twI;                                          /* RFFT Twiddle coefficients        */
 135    float32_t *pCoeff = S->pTwiddleRFFT;         /* Points to RFFT Twiddle factors   */
 136    float32_t *pA = p;                                           /* increasing pointer               */
 137    float32_t *pB = p;                                           /* decreasing pointer               */
 138    float32_t xAR, xAI, xBR, xBI;                        /* temporary variables              */
 139    float32_t t1a, t1b, r, s, t, u;                      /* temporary variables              */
 140
 141    k = (S->Sint).fftLen - 1;
 142
 143    xAR = pA[0];
 144    xAI = pA[1];
 145
 146    pCoeff += 2 ;
 147
 148    *pOut++ = 0.5f * ( xAR + xAI );
 149    *pOut++ = 0.5f * ( xAR - xAI );
 150
 151    pB  =  p + 2*k ;
 152    pA +=  2        ;
 153
 154    while(k > 0u)
 155    {
 156       /* G is half of the frequency complex spectrum */
 157       //for k = 2:N
 158       //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
 159       xBI =   pB[1]    ;
 160       xBR =   pB[0]    ;
 161       xAR =  pA[0];
 162       xAI =  pA[1];
 163
 164       twR = *pCoeff++;
 165       twI = *pCoeff++;
 166
 167       t1a = xAR - xBR ;
 168       t1b = xAI + xBI ;
 169
 170       r = twR * t1a;
 171       s = twI * t1b;
 172       t = twI * t1a;
 173       u = twR * t1b;
 174
 175       // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
 176       // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
 177       *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
 178       *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
 179
 180       pA += 2;
 181       pB -= 2;
 182       k--;
 183    }
 184
 185 }
 186
 187 /**
 188 * @ingroup groupTransforms
 189 */
 190
 191 /**
 192  * @defgroup Fast Real FFT Functions
 193  *
 194  * \par
 195  * The CMSIS DSP library includes specialized algorithms for computing the
 196  * FFT of real data sequences.  The FFT is defined over complex data but
 197  * in many applications the input is real.  Real FFT algorithms take advantage
 198  * of the symmetry properties of the FFT and have a speed advantage over complex
 199  * algorithms of the same length.
 200  * \par
 201  * The Fast RFFT algorith relays on the mixed radix CFFT that save processor usage.
 202  * \par
 203  * The real length N forward FFT of a sequence is computed using the steps shown below.
 204  * \par
 205  * \image html RFFT.gif "Real Fast Fourier Transform"
 206  * \par
 207  * The real sequence is initially treated as if it were complex to perform a CFFT.
 208  * Later, a processing stage reshapes the data to obtain half of the frequency spectrum
 209  * in complex format. Except the first complex number that contains the two real numbers
 210  * X[0] and X[N/2] all the data is complex. In other words, the first complex sample
 211  * contains two real values packed.
 212  * \par
 213  * The input for the inverse RFFT should keep the same format as the output of the
 214  * forward RFFT. A first processing stage pre-process the data to later perform an
 215  * inverse CFFT.
 216  * \par
 217  * \image html RIFFT.gif "Real Inverse Fast Fourier Transform"
 218  * \par
 219  * The algorithms for floating-point, Q15, and Q31 data are slightly different
 220  * and we describe each algorithm in turn.
 221  * \par Floating-point
 222  * The main functions are <code>arm_rfft_fast_f32()</code>
 223  * and <code>arm_rfft_fast_init_f32()</code>.  The older functions
 224  * <code>arm_rfft_f32()</code> and <code>arm_rfft_init_f32()</code> have been
 225  * deprecated but are still documented.
 226  * \par
 227  * The FFT of a real N-point sequence has even symmetry in the frequency
 228  * domain.  The second half of the data equals the conjugate of the first half
 229  * flipped in frequency:
 230  * <pre>
 231  *X[0] - real data
 232  *X[1] - complex data
 233  *X[2] - complex data
 234  *...
 235  *X[fftLen/2-1] - complex data
 236  *X[fftLen/2] - real data
 237  *X[fftLen/2+1] - conjugate of X[fftLen/2-1]
 238  *X[fftLen/2+2] - conjugate of X[fftLen/2-2]
 239  *...
 240  *X[fftLen-1] - conjugate of X[1]
 241  * </pre>
 242  * Looking at the data, we see that we can uniquely represent the FFT using only
 243  * <pre>
 244  *N/2+1 samples:
 245  *X[0] - real data
 246  *X[1] - complex data
 247  *X[2] - complex data
 248  *...
 249  *X[fftLen/2-1] - complex data
 250  *X[fftLen/2] - real data
 251  * </pre>
 252  * Looking more closely we see that the first and last samples are real valued.
 253  * They can be packed together and we can thus represent the FFT of an N-point
 254  * real sequence by N/2 complex values:
 255  * <pre>
 256  *X[0],X[N/2] - packed real data: X[0] + jX[N/2]
 257  *X[1] - complex data
 258  *X[2] - complex data
 259  *...
 260  *X[fftLen/2-1] - complex data
 261  * </pre>
 262  * The real FFT functions pack the frequency domain data in this fashion.  The
 263  * forward transform outputs the data in this form and the inverse transform
 264  * expects input data in this form.  The function always performs the needed
 265  * bitreversal so that the input and output data is always in normal order.  The
 266  * functions support lengths of [32, 64, 128, ..., 4096] samples.
 267  * \par
 268  * The forward and inverse real FFT functions apply the standard FFT scaling; no
 269  * scaling on the forward transform and 1/fftLen scaling on the inverse
 270  * transform.
 271  * \par Q15 and Q31
 272  * The real algorithms are defined in a similar manner and utilize N/2 complex
 273  * transforms behind the scenes.  In the case of fixed-point data, a radix-4
 274  * complex transform is performed and this limits the allows sequence lengths to
 275  * 128, 512, and 2048 samples.
 276  * \par
 277  * TBD.  We need to document input and output order of data.
 278  * \par
 279  * The complex transforms used internally include scaling to prevent fixed-point
 280  * overflows.  The overall scaling equals 1/(fftLen/2).
 281  * \par
 282  * A separate instance structure must be defined for each transform used but
 283  * twiddle factor and bit reversal tables can be reused.
 284  * \par
 285  * There is also an associated initialization function for each data type.
 286  * The initialization function performs the following operations:
 287  * - Sets the values of the internal structure fields.
 288  * - Initializes twiddle factor table and bit reversal table pointers.
 289  * - Initializes the internal complex FFT data structure.
 290  * \par
 291  * Use of the initialization function is optional.
 292  * However, if the initialization function is used, then the instance structure
 293  * cannot be placed into a const data section. To place an instance structure
 294  * into a const data section, the instance structure should be manually
 295  * initialized as follows:
 296  * <pre>
 297  *arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
 298  *arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
 299  * </pre>
 300  * where <code>fftLenReal</code> is the length of the real transform;
 301  * <code>fftLenBy2</code> length of  the internal complex transform.
 302  * <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
 303  * <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
 304  * output (=1).
 305  * <code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
 306  * The value is based on the FFT length;
 307  * <code>pTwiddleAReal</code>points to the A array of twiddle coefficients;
 308  * <code>pTwiddleBReal</code>points to the B array of twiddle coefficients;
 309  * <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
 310  * must also be initialized.  Refer to arm_cfft_radix4_f32() for details regarding
 311  * static initialization of the complex FFT instance structure.
 312  */
 313
 314 /**
 315 * @addtogroup RealFFT
 316 * @{
 317 */
 318
 319 /**
 320 * @brief Processing function for the floating-point real FFT.
 321 * @param[in]  *S              points to an arm_rfft_fast_instance_f32 structure.
 322 * @param[in]  *p              points to the input buffer.
 323 * @param[in]  *pOut           points to an arm_rfft_fast_instance_f32 structure.
 324 * @param[in]  ifftFlag        RFFT if flag is 0, RIFFT if flag is 1
 325 * @return none.
 326 */
 327
 328 void arm_rfft_fast_f32(
 329 arm_rfft_fast_instance_f32 * S,
 330 float32_t * p, float32_t * pOut,
 331 uint8_t ifftFlag)
 332 {
 333    arm_cfft_instance_f32 * Sint = &(S->Sint);
 334    Sint->fftLen = S->fftLenRFFT / 2;
 335
 336    /* Calculation of Real FFT */
 337    if(ifftFlag)
 338    {
 339       /*  Real FFT comression */
 340       merge_rfft_f32(S, p, pOut);
 341
 342       /* Complex radix-4 IFFT process */
 343       arm_cfft_f32( Sint, pOut, ifftFlag, 1);
 344    }
 345    else
 346    {
 347       /* Calculation of RFFT of input */
 348       arm_cfft_f32( Sint, p, ifftFlag, 1);
 349
 350       /*  Real FFT extraction */
 351       stage_rfft_f32(S, p, pOut);
 352    }
 353 }
 354