]>
git.gir.st - tmk_keyboard.git/blob - tmk_core/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_dct4_q15.c
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
4 * $Date: 17. January 2013
7 * Project: CMSIS DSP Library
8 * Title: arm_dct4_q15.c
10 * Description: Processing function of DCT4 & IDCT4 Q15.
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
44 * @addtogroup DCT4_IDCT4
49 * @brief Processing function for the Q15 DCT4/IDCT4.
50 * @param[in] *S points to an instance of the Q15 DCT4 structure.
51 * @param[in] *pState points to state buffer.
52 * @param[in,out] *pInlineBuffer points to the in-place input and output buffer.
55 * \par Input an output formats:
56 * Internally inputs are downscaled in the RFFT process function to avoid overflows.
57 * Number of bits downscaled, depends on the size of the transform.
58 * The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
60 * \image html dct4FormatsQ15Table.gif
64 const arm_dct4_instance_q15
* S
,
66 q15_t
* pInlineBuffer
)
68 uint32_t i
; /* Loop counter */
69 q15_t
*weights
= S
->pTwiddle
; /* Pointer to the Weights table */
70 q15_t
*cosFact
= S
->pCosFactor
; /* Pointer to the cos factors table */
71 q15_t
*pS1
, *pS2
, *pbuff
; /* Temporary pointers for input buffer and pState buffer */
72 q15_t in
; /* Temporary variable */
75 /* DCT4 computation involves DCT2 (which is calculated using RFFT)
76 * along with some pre-processing and post-processing.
77 * Computational procedure is explained as follows:
78 * (a) Pre-processing involves multiplying input with cos factor,
79 * r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
81 * r(n) -- output of preprocessing
82 * u(n) -- input to preprocessing(actual Source buffer)
83 * (b) Calculation of DCT2 using FFT is divided into three steps:
84 * Step1: Re-ordering of even and odd elements of input.
85 * Step2: Calculating FFT of the re-ordered input.
86 * Step3: Taking the real part of the product of FFT output and weights.
87 * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
88 * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
90 * Y4 -- DCT4 output, Y2 -- DCT2 output
91 * (d) Multiplying the output with the normalizing factor sqrt(2/N).
94 /*-------- Pre-processing ------------*/
95 /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
96 arm_mult_q15(pInlineBuffer
, cosFact
, pInlineBuffer
, S
->N
);
97 arm_shift_q15(pInlineBuffer
, 1, pInlineBuffer
, S
->N
);
99 /* ----------------------------------------------------------------
100 * Step1: Re-ordering of even and odd elements as
101 * pState[i] = pInlineBuffer[2*i] and
102 * pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
103 ---------------------------------------------------------------------*/
105 /* pS1 initialized to pState */
108 /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
109 pS2
= pState
+ (S
->N
- 1u);
111 /* pbuff initialized to input buffer */
112 pbuff
= pInlineBuffer
;
115 #ifndef ARM_MATH_CM0_FAMILY
117 /* Run the below code for Cortex-M4 and Cortex-M3 */
119 /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
120 i
= (uint32_t) S
->Nby2
>> 2u;
122 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
123 ** a second loop below computes the remaining 1 to 3 samples. */
126 /* Re-ordering of even and odd elements */
127 /* pState[i] = pInlineBuffer[2*i] */
129 /* pState[N-i-1] = pInlineBuffer[2*i+1] */
141 /* Decrement the loop counter */
145 /* pbuff initialized to input buffer */
146 pbuff
= pInlineBuffer
;
148 /* pS1 initialized to pState */
151 /* Initializing the loop counter to N/4 instead of N for loop unrolling */
152 i
= (uint32_t) S
->N
>> 2u;
154 /* Processing with loop unrolling 4 times as N is always multiple of 4.
155 * Compute 4 outputs at a time */
158 /* Writing the re-ordered output back to inplace input buffer */
164 /* Decrement the loop counter */
169 /* ---------------------------------------------------------
170 * Step2: Calculate RFFT for N-point input
171 * ---------------------------------------------------------- */
172 /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
173 arm_rfft_q15(S
->pRfft
, pInlineBuffer
, pState
);
175 /*----------------------------------------------------------------------
176 * Step3: Multiply the FFT output with the weights.
177 *----------------------------------------------------------------------*/
178 arm_cmplx_mult_cmplx_q15(pState
, weights
, pState
, S
->N
);
180 /* The output of complex multiplication is in 3.13 format.
181 * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
182 arm_shift_q15(pState
, 2, pState
, S
->N
* 2);
184 /* ----------- Post-processing ---------- */
185 /* DCT-IV can be obtained from DCT-II by the equation,
186 * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
187 * Hence, Y4(0) = Y2(0)/2 */
188 /* Getting only real part from the output and Converting to DCT-IV */
190 /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
191 i
= ((uint32_t) S
->N
- 1u) >> 2u;
193 /* pbuff initialized to input buffer. */
194 pbuff
= pInlineBuffer
;
196 /* pS1 initialized to pState */
199 /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
201 /* input buffer acts as inplace, so output values are stored in the input itself. */
204 /* pState pointer is incremented twice as the real values are located alternatively in the array */
207 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
208 ** a second loop below computes the remaining 1 to 3 samples. */
211 /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
212 /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
215 /* points to the next real value */
230 /* Decrement the loop counter */
234 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
235 ** No loop unrolling is used. */
236 i
= ((uint32_t) S
->N
- 1u) % 0x4u
;
240 /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
241 /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
244 /* points to the next real value */
247 /* Decrement the loop counter */
252 /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
254 /* Initializing the loop counter to N/4 instead of N for loop unrolling */
255 i
= (uint32_t) S
->N
>> 2u;
257 /* pbuff initialized to the pInlineBuffer(now contains the output values) */
258 pbuff
= pInlineBuffer
;
260 /* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
263 /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
265 *pbuff
++ = ((q15_t
) (((q31_t
) in
* S
->normalize
) >> 15));
268 *pbuff
++ = ((q15_t
) (((q31_t
) in
* S
->normalize
) >> 15));
271 *pbuff
++ = ((q15_t
) (((q31_t
) in
* S
->normalize
) >> 15));
274 *pbuff
++ = ((q15_t
) (((q31_t
) in
* S
->normalize
) >> 15));
276 /* Decrement the loop counter */
283 /* Run the below code for Cortex-M0 */
285 /* Initializing the loop counter to N/2 */
286 i
= (uint32_t) S
->Nby2
;
290 /* Re-ordering of even and odd elements */
291 /* pState[i] = pInlineBuffer[2*i] */
293 /* pState[N-i-1] = pInlineBuffer[2*i+1] */
296 /* Decrement the loop counter */
300 /* pbuff initialized to input buffer */
301 pbuff
= pInlineBuffer
;
303 /* pS1 initialized to pState */
306 /* Initializing the loop counter */
311 /* Writing the re-ordered output back to inplace input buffer */
314 /* Decrement the loop counter */
319 /* ---------------------------------------------------------
320 * Step2: Calculate RFFT for N-point input
321 * ---------------------------------------------------------- */
322 /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
323 arm_rfft_q15(S
->pRfft
, pInlineBuffer
, pState
);
325 /*----------------------------------------------------------------------
326 * Step3: Multiply the FFT output with the weights.
327 *----------------------------------------------------------------------*/
328 arm_cmplx_mult_cmplx_q15(pState
, weights
, pState
, S
->N
);
330 /* The output of complex multiplication is in 3.13 format.
331 * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
332 arm_shift_q15(pState
, 2, pState
, S
->N
* 2);
334 /* ----------- Post-processing ---------- */
335 /* DCT-IV can be obtained from DCT-II by the equation,
336 * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
337 * Hence, Y4(0) = Y2(0)/2 */
338 /* Getting only real part from the output and Converting to DCT-IV */
340 /* Initializing the loop counter */
341 i
= ((uint32_t) S
->N
- 1u);
343 /* pbuff initialized to input buffer. */
344 pbuff
= pInlineBuffer
;
346 /* pS1 initialized to pState */
349 /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
351 /* input buffer acts as inplace, so output values are stored in the input itself. */
354 /* pState pointer is incremented twice as the real values are located alternatively in the array */
359 /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
360 /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
363 /* points to the next real value */
366 /* Decrement the loop counter */
370 /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
372 /* Initializing the loop counter */
375 /* pbuff initialized to the pInlineBuffer(now contains the output values) */
376 pbuff
= pInlineBuffer
;
380 /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
382 *pbuff
++ = ((q15_t
) (((q31_t
) in
* S
->normalize
) >> 15));
384 /* Decrement the loop counter */
388 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
393 * @} end of DCT4_IDCT4 group