tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/FilteringFunctions/arm_iir_lattice_q15.c

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        17. January 2013
   5 * $Revision:    V1.4.1
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_iir_lattice_q15.c
   9 *
  10 * Description:  Q15 IIR lattice filter processing function.
  11 *
  12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13 *
  14 * Redistribution and use in source and binary forms, with or without
  15 * modification, are permitted provided that the following conditions
  16 * are met:
  17 *   - Redistributions of source code must retain the above copyright
  18 *     notice, this list of conditions and the following disclaimer.
  19 *   - Redistributions in binary form must reproduce the above copyright
  20 *     notice, this list of conditions and the following disclaimer in
  21 *     the documentation and/or other materials provided with the
  22 *     distribution.
  23 *   - Neither the name of ARM LIMITED nor the names of its contributors
  24 *     may be used to endorse or promote products derived from this
  25 *     software without specific prior written permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 * POSSIBILITY OF SUCH DAMAGE.
  39 * -------------------------------------------------------------------- */
  40
  41 #include "arm_math.h"
  42
  43 /**
  44  * @ingroup groupFilters
  45  */
  46
  47 /**
  48  * @addtogroup IIR_Lattice
  49  * @{
  50  */
  51
  52 /**
  53  * @brief Processing function for the Q15 IIR lattice filter.
  54  * @param[in] *S points to an instance of the Q15 IIR lattice structure.
  55  * @param[in] *pSrc points to the block of input data.
  56  * @param[out] *pDst points to the block of output data.
  57  * @param[in] blockSize number of samples to process.
  58  * @return none.
  59  *
  60  * @details
  61  * <b>Scaling and Overflow Behavior:</b>
  62  * \par
  63  * The function is implemented using a 64-bit internal accumulator.
  64  * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
  65  * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
  66  * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
  67  * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
  68  * Lastly, the accumulator is saturated to yield a result in 1.15 format.
  69  */
  70
  71 void arm_iir_lattice_q15(
  72   const arm_iir_lattice_instance_q15 * S,
  73   q15_t * pSrc,
  74   q15_t * pDst,
  75   uint32_t blockSize)
  76 {
  77
  78
  79 #ifndef ARM_MATH_CM0_FAMILY
  80
  81   /* Run the below code for Cortex-M4 and Cortex-M3 */
  82
  83   q31_t fcurr, fnext, gcurr = 0, gnext;          /* Temporary variables for lattice stages */
  84   q15_t gnext1, gnext2;                          /* Temporary variables for lattice stages */
  85   uint32_t stgCnt;                               /* Temporary variables for counts */
  86   q63_t acc;                                     /* Accumlator */
  87   uint32_t blkCnt, tapCnt;                       /* Temporary variables for counts */
  88   q15_t *px1, *px2, *pk, *pv;                    /* temporary pointers for state and coef */
  89   uint32_t numStages = S->numStages;             /* number of stages */
  90   q15_t *pState;                                 /* State pointer */
  91   q15_t *pStateCurnt;                            /* State current pointer */
  92   q15_t out;                                     /* Temporary variable for output */
  93   q31_t v;                                       /* Temporary variable for ladder coefficient */
  94 #ifdef UNALIGNED_SUPPORT_DISABLE
  95         q15_t v1, v2;
  96 #endif
  97
  98
  99   blkCnt = blockSize;
 100
 101   pState = &S->pState[0];
 102
 103   /* Sample processing */
 104   while(blkCnt > 0u)
 105   {
 106     /* Read Sample from input buffer */
 107     /* fN(n) = x(n) */
 108     fcurr = *pSrc++;
 109
 110     /* Initialize state read pointer */
 111     px1 = pState;
 112     /* Initialize state write pointer */
 113     px2 = pState;
 114     /* Set accumulator to zero */
 115     acc = 0;
 116     /* Initialize Ladder coeff pointer */
 117     pv = &S->pvCoeffs[0];
 118     /* Initialize Reflection coeff pointer */
 119     pk = &S->pkCoeffs[0];
 120
 121
 122     /* Process sample for first tap */
 123     gcurr = *px1++;
 124     /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
 125     fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
 126     fnext = __SSAT(fnext, 16);
 127     /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
 128     gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
 129     gnext = __SSAT(gnext, 16);
 130     /* write gN(n) into state for next sample processing */
 131     *px2++ = (q15_t) gnext;
 132     /* y(n) += gN(n) * vN  */
 133     acc += (q31_t) ((gnext * (*pv++)));
 134
 135
 136     /* Update f values for next coefficient processing */
 137     fcurr = fnext;
 138
 139     /* Loop unrolling.  Process 4 taps at a time. */
 140     tapCnt = (numStages - 1u) >> 2;
 141
 142     while(tapCnt > 0u)
 143     {
 144
 145       /* Process sample for 2nd, 6th ...taps */
 146       /* Read gN-2(n-1) from state buffer */
 147       gcurr = *px1++;
 148       /* Process sample for 2nd, 6th .. taps */
 149       /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */
 150       fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
 151       fnext = __SSAT(fnext, 16);
 152       /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */
 153       gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
 154       gnext1 = (q15_t) __SSAT(gnext, 16);
 155       /* write gN-1(n) into state */
 156       *px2++ = (q15_t) gnext1;
 157
 158
 159       /* Process sample for 3nd, 7th ...taps */
 160       /* Read gN-3(n-1) from state */
 161       gcurr = *px1++;
 162       /* Process sample for 3rd, 7th .. taps */
 163       /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */
 164       fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15);
 165       fcurr = __SSAT(fcurr, 16);
 166       /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */
 167       gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr;
 168       gnext2 = (q15_t) __SSAT(gnext, 16);
 169       /* write gN-2(n) into state */
 170       *px2++ = (q15_t) gnext2;
 171
 172       /* Read vN-1 and vN-2 at a time */
 173 #ifndef UNALIGNED_SUPPORT_DISABLE
 174
 175       v = *__SIMD32(pv)++;
 176
 177 #else
 178
 179           v1 = *pv++;
 180           v2 = *pv++;
 181
 182 #ifndef ARM_MATH_BIG_ENDIAN
 183
 184           v = __PKHBT(v1, v2, 16);
 185
 186 #else
 187
 188           v = __PKHBT(v2, v1, 16);
 189
 190 #endif  /*      #ifndef ARM_MATH_BIG_ENDIAN             */
 191
 192 #endif  /*      #ifndef UNALIGNED_SUPPORT_DISABLE */
 193
 194
 195       /* Pack gN-1(n) and gN-2(n) */
 196
 197 #ifndef  ARM_MATH_BIG_ENDIAN
 198
 199       gnext = __PKHBT(gnext1, gnext2, 16);
 200
 201 #else
 202
 203       gnext = __PKHBT(gnext2, gnext1, 16);
 204
 205 #endif /*   #ifndef  ARM_MATH_BIG_ENDIAN    */
 206
 207       /* y(n) += gN-1(n) * vN-1  */
 208       /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */
 209       /* y(n) += gN-2(n) * vN-2  */
 210       /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */
 211       acc = __SMLALD(gnext, v, acc);
 212
 213
 214       /* Process sample for 4th, 8th ...taps */
 215       /* Read gN-4(n-1) from state */
 216       gcurr = *px1++;
 217       /* Process sample for 4th, 8th .. taps */
 218       /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */
 219       fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
 220       fnext = __SSAT(fnext, 16);
 221       /* gN-3(n) = kN-3 * fN-1(n) + gN-1(n-1) */
 222       gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
 223       gnext1 = (q15_t) __SSAT(gnext, 16);
 224       /* write  gN-3(n) for the next sample process */
 225       *px2++ = (q15_t) gnext1;
 226
 227
 228       /* Process sample for 5th, 9th ...taps */
 229       /* Read gN-5(n-1) from state */
 230       gcurr = *px1++;
 231       /* Process sample for 5th, 9th .. taps */
 232       /* fN-5(n) = fN-4(n) - kN-4 * gN-5(n-1) */
 233       fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15);
 234       fcurr = __SSAT(fcurr, 16);
 235       /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */
 236       gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr;
 237       gnext2 = (q15_t) __SSAT(gnext, 16);
 238       /* write      gN-4(n) for the next sample process */
 239       *px2++ = (q15_t) gnext2;
 240
 241       /* Read vN-3 and vN-4 at a time */
 242 #ifndef UNALIGNED_SUPPORT_DISABLE
 243
 244       v = *__SIMD32(pv)++;
 245
 246 #else
 247
 248           v1 = *pv++;
 249           v2 = *pv++;
 250
 251 #ifndef ARM_MATH_BIG_ENDIAN
 252
 253           v = __PKHBT(v1, v2, 16);
 254
 255 #else
 256
 257           v = __PKHBT(v2, v1, 16);
 258
 259 #endif  /* #ifndef ARM_MATH_BIG_ENDIAN   */
 260
 261 #endif  /*      #ifndef UNALIGNED_SUPPORT_DISABLE */
 262
 263
 264       /* Pack gN-3(n) and gN-4(n) */
 265 #ifndef  ARM_MATH_BIG_ENDIAN
 266
 267       gnext = __PKHBT(gnext1, gnext2, 16);
 268
 269 #else
 270
 271       gnext = __PKHBT(gnext2, gnext1, 16);
 272
 273 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */
 274
 275       /* y(n) += gN-4(n) * vN-4  */
 276       /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */
 277       /* y(n) += gN-3(n) * vN-3  */
 278       /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */
 279       acc = __SMLALD(gnext, v, acc);
 280
 281       tapCnt--;
 282
 283     }
 284
 285     fnext = fcurr;
 286
 287     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
 288     tapCnt = (numStages - 1u) % 0x4u;
 289
 290     while(tapCnt > 0u)
 291     {
 292       gcurr = *px1++;
 293       /* Process sample for last taps */
 294       fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
 295       fnext = __SSAT(fnext, 16);
 296       gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
 297       gnext = __SSAT(gnext, 16);
 298       /* Output samples for last taps */
 299       acc += (q31_t) (((q31_t) gnext * (*pv++)));
 300       *px2++ = (q15_t) gnext;
 301       fcurr = fnext;
 302
 303       tapCnt--;
 304     }
 305
 306     /* y(n) += g0(n) * v0 */
 307     acc += (q31_t) (((q31_t) fnext * (*pv++)));
 308
 309     out = (q15_t) __SSAT(acc >> 15, 16);
 310     *px2++ = (q15_t) fnext;
 311
 312     /* write out into pDst */
 313     *pDst++ = out;
 314
 315     /* Advance the state pointer by 4 to process the next group of 4 samples */
 316     pState = pState + 1u;
 317     blkCnt--;
 318
 319   }
 320
 321   /* Processing is complete. Now copy last S->numStages samples to start of the buffer
 322      for the preperation of next frame process */
 323   /* Points to the start of the state buffer */
 324   pStateCurnt = &S->pState[0];
 325   pState = &S->pState[blockSize];
 326
 327   stgCnt = (numStages >> 2u);
 328
 329   /* copy data */
 330   while(stgCnt > 0u)
 331   {
 332 #ifndef UNALIGNED_SUPPORT_DISABLE
 333
 334     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
 335     *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
 336
 337 #else
 338
 339     *pStateCurnt++ = *pState++;
 340     *pStateCurnt++ = *pState++;
 341     *pStateCurnt++ = *pState++;
 342     *pStateCurnt++ = *pState++;
 343
 344 #endif /*       #ifndef UNALIGNED_SUPPORT_DISABLE */
 345
 346     /* Decrement the loop counter */
 347     stgCnt--;
 348
 349   }
 350
 351   /* Calculation of count for remaining q15_t data */
 352   stgCnt = (numStages) % 0x4u;
 353
 354   /* copy data */
 355   while(stgCnt > 0u)
 356   {
 357     *pStateCurnt++ = *pState++;
 358
 359     /* Decrement the loop counter */
 360     stgCnt--;
 361   }
 362
 363 #else
 364
 365   /* Run the below code for Cortex-M0 */
 366
 367   q31_t fcurr, fnext = 0, gcurr = 0, gnext;      /* Temporary variables for lattice stages */
 368   uint32_t stgCnt;                               /* Temporary variables for counts */
 369   q63_t acc;                                     /* Accumlator */
 370   uint32_t blkCnt, tapCnt;                       /* Temporary variables for counts */
 371   q15_t *px1, *px2, *pk, *pv;                    /* temporary pointers for state and coef */
 372   uint32_t numStages = S->numStages;             /* number of stages */
 373   q15_t *pState;                                 /* State pointer */
 374   q15_t *pStateCurnt;                            /* State current pointer */
 375   q15_t out;                                     /* Temporary variable for output */
 376
 377
 378   blkCnt = blockSize;
 379
 380   pState = &S->pState[0];
 381
 382   /* Sample processing */
 383   while(blkCnt > 0u)
 384   {
 385     /* Read Sample from input buffer */
 386     /* fN(n) = x(n) */
 387     fcurr = *pSrc++;
 388
 389     /* Initialize state read pointer */
 390     px1 = pState;
 391     /* Initialize state write pointer */
 392     px2 = pState;
 393     /* Set accumulator to zero */
 394     acc = 0;
 395     /* Initialize Ladder coeff pointer */
 396     pv = &S->pvCoeffs[0];
 397     /* Initialize Reflection coeff pointer */
 398     pk = &S->pkCoeffs[0];
 399
 400     tapCnt = numStages;
 401
 402     while(tapCnt > 0u)
 403     {
 404       gcurr = *px1++;
 405       /* Process sample */
 406       /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
 407       fnext = fcurr - ((gcurr * (*pk)) >> 15);
 408       fnext = __SSAT(fnext, 16);
 409       /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
 410       gnext = ((fnext * (*pk++)) >> 15) + gcurr;
 411       gnext = __SSAT(gnext, 16);
 412       /* Output samples */
 413       /* y(n) += gN(n) * vN */
 414       acc += (q31_t) ((gnext * (*pv++)));
 415       /* write gN(n) into state for next sample processing */
 416       *px2++ = (q15_t) gnext;
 417       /* Update f values for next coefficient processing */
 418       fcurr = fnext;
 419
 420       tapCnt--;
 421     }
 422
 423     /* y(n) += g0(n) * v0 */
 424     acc += (q31_t) ((fnext * (*pv++)));
 425
 426     out = (q15_t) __SSAT(acc >> 15, 16);
 427     *px2++ = (q15_t) fnext;
 428
 429     /* write out into pDst */
 430     *pDst++ = out;
 431
 432     /* Advance the state pointer by 1 to process the next group of samples */
 433     pState = pState + 1u;
 434     blkCnt--;
 435
 436   }
 437
 438   /* Processing is complete. Now copy last S->numStages samples to start of the buffer
 439      for the preperation of next frame process */
 440   /* Points to the start of the state buffer */
 441   pStateCurnt = &S->pState[0];
 442   pState = &S->pState[blockSize];
 443
 444   stgCnt = numStages;
 445
 446   /* copy data */
 447   while(stgCnt > 0u)
 448   {
 449     *pStateCurnt++ = *pState++;
 450
 451     /* Decrement the loop counter */
 452     stgCnt--;
 453   }
 454
 455 #endif /*   #ifndef ARM_MATH_CM0_FAMILY */
 456
 457 }
 458
 459
 460
 461
 462 /**
 463  * @} end of IIR_Lattice group
 464  */