/**********************************************************************************************/ /* This program is part of the Barcelona OpenMP Tasks Suite */ /* Copyright (C) 2009 Barcelona Supercomputing Center - Centro Nacional de Supercomputacion */ /* Copyright (C) 2009 Universitat Politecnica de Catalunya */ /* */ /* This program is free software; you can redistribute it and/or modify */ /* it under the terms of the GNU General Public License as published by */ /* the Free Software Foundation; either version 2 of the License, or */ /* (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /**********************************************************************************************/ /* * Original code from the Cilk project * * Copyright (c) 2000 Massachusetts Institute of Technology * Copyright (c) 2000 Matteo Frigo */ #include #include #include #include #include #include "fft.h" //#include "app-desc.h" /* Definitions and operations for complex numbers */ /* * compute the W coefficients (that is, powers of the root of 1) * and store them into an array. */ void compute_w_coefficients(int n, int a, int b, COMPLEX * W) { double twoPiOverN; int k; REAL s, c; if (b - a < 128) { twoPiOverN = 2.0 * 3.1415926535897932384626434 / n; for (k = a; k <= b; ++k) { c = cos(twoPiOverN * k); c_re(W[k]) = c_re(W[n - k]) = c; s = sin(twoPiOverN * k); c_im(W[k]) = -s; c_im(W[n - k]) = s; } } else { int ab = (a + b) / 2; #pragma omp task untied compute_w_coefficients(n, a, ab, W); #pragma omp task untied compute_w_coefficients(n, ab + 1, b, W); #pragma omp taskwait } } void compute_w_coefficients_seq(int n, int a, int b, COMPLEX * W) { double twoPiOverN; int k; REAL s, c; if (b - a < 128) { twoPiOverN = 2.0 * 3.1415926535897932384626434 / n; for (k = a; k <= b; ++k) { c = cos(twoPiOverN * k); c_re(W[k]) = c_re(W[n - k]) = c; s = sin(twoPiOverN * k); c_im(W[k]) = -s; c_im(W[n - k]) = s; } } else { int ab = (a + b) / 2; compute_w_coefficients_seq(n, a, ab, W); compute_w_coefficients_seq(n, ab + 1, b, W); } } /* * Determine (in a stupid way) if n is divisible by eight, then by four, else * find the smallest prime factor of n. */ int factor(int n) { int r; if (n < 2) return 1; if (n == 64 || n == 128 || n == 256 || n == 1024 || n == 2048 || n == 4096) return 8; if ((n & 15) == 0) return 16; if ((n & 7) == 0) return 8; if ((n & 3) == 0) return 4; if ((n & 1) == 0) return 2; /* try odd numbers up to n (computing the sqrt may be slower) */ for (r = 3; r < n; r += 2) if (n % r == 0) return r; /* n is prime */ return n; } void unshuffle(int a, int b, COMPLEX * in, COMPLEX * out, int r, int m) { int i, j; int r4 = r & (~0x3); const COMPLEX *ip; COMPLEX *jp; if (b - a < 16) { ip = in + a * r; for (i = a; i < b; ++i) { jp = out + i; for (j = 0; j < r4; j += 4) { jp[0] = ip[0]; jp[m] = ip[1]; jp[2 * m] = ip[2]; jp[3 * m] = ip[3]; jp += 4 * m; ip += 4; } for (; j < r; ++j) { *jp = *ip; ip++; jp += m; } } } else { int ab = (a + b) / 2; #pragma omp task untied unshuffle(a, ab, in, out, r, m); #pragma omp task untied unshuffle(ab, b, in, out, r, m); #pragma omp taskwait } } void unshuffle_seq(int a, int b, COMPLEX * in, COMPLEX * out, int r, int m) { int i, j; int r4 = r & (~0x3); const COMPLEX *ip; COMPLEX *jp; if (b - a < 16) { ip = in + a * r; for (i = a; i < b; ++i) { jp = out + i; for (j = 0; j < r4; j += 4) { jp[0] = ip[0]; jp[m] = ip[1]; jp[2 * m] = ip[2]; jp[3 * m] = ip[3]; jp += 4 * m; ip += 4; } for (; j < r; ++j) { *jp = *ip; ip++; jp += m; } } } else { int ab = (a + b) / 2; unshuffle_seq(a, ab, in, out, r, m); unshuffle_seq(ab, b, in, out, r, m); } } void fft_twiddle_gen1(COMPLEX * in, COMPLEX * out, COMPLEX * W, int r, int m, int nW, int nWdnti, int nWdntm) { int j, k; COMPLEX *jp, *kp; for (k = 0, kp = out; k < r; ++k, kp += m) { REAL r0, i0, rt, it, rw, iw; int l1 = nWdnti + nWdntm * k; int l0; r0 = i0 = 0.0; for (j = 0, jp = in, l0 = 0; j < r; ++j, jp += m) { rw = c_re(W[l0]); iw = c_im(W[l0]); rt = c_re(*jp); it = c_im(*jp); r0 += rt * rw - it * iw; i0 += rt * iw + it * rw; l0 += l1; if (l0 > nW) l0 -= nW; } c_re(*kp) = r0; c_im(*kp) = i0; } } void fft_twiddle_gen(int i, int i1, COMPLEX * in, COMPLEX * out, COMPLEX * W, int nW, int nWdn, int r, int m) { if (i == i1 - 1) { #pragma omp task untied fft_twiddle_gen1(in + i, out + i, W, r, m, nW, nWdn * i, nWdn * m); } else { int i2 = (i + i1) / 2; #pragma omp task untied fft_twiddle_gen(i, i2, in, out, W, nW, nWdn, r, m); #pragma omp task untied fft_twiddle_gen(i2, i1, in, out, W, nW, nWdn, r, m); } #pragma omp taskwait } void fft_twiddle_gen_seq(int i, int i1, COMPLEX * in, COMPLEX * out, COMPLEX * W, int nW, int nWdn, int r, int m) { if (i == i1 - 1) { fft_twiddle_gen1(in + i, out + i, W, r, m, nW, nWdn * i, nWdn * m); } else { int i2 = (i + i1) / 2; fft_twiddle_gen_seq(i, i2, in, out, W, nW, nWdn, r, m); fft_twiddle_gen_seq(i2, i1, in, out, W, nW, nWdn, r, m); } } /* machine-generated code begins here */ void fft_base_2(COMPLEX * in, COMPLEX * out) { REAL r1_0, i1_0; REAL r1_1, i1_1; r1_0 = c_re(in[0]); i1_0 = c_im(in[0]); r1_1 = c_re(in[1]); i1_1 = c_im(in[1]); c_re(out[0]) = (r1_0 + r1_1); c_im(out[0]) = (i1_0 + i1_1); c_re(out[1]) = (r1_0 - r1_1); c_im(out[1]) = (i1_0 - i1_1); } void fft_twiddle_2(int a, int b, COMPLEX * in, COMPLEX * out, COMPLEX * W, int nW, int nWdn, int m) { int l1, i; COMPLEX *jp, *kp; REAL tmpr, tmpi, wr, wi; if ((b - a) < 128) { for (i = a, l1 = nWdn * i, kp = out + i; i < b; i++, l1 += nWdn, kp++) { jp = in + i; { REAL r1_0, i1_0; REAL r1_1, i1_1; r1_0 = c_re(jp[0 * m]); i1_0 = c_im(jp[0 * m]); wr = c_re(W[1 * l1]); wi = c_im(W[1 * l1]); tmpr = c_re(jp[1 * m]); tmpi = c_im(jp[1 * m]); r1_1 = ((wr * tmpr) - (wi * tmpi)); i1_1 = ((wi * tmpr) + (wr * tmpi)); c_re(kp[0 * m]) = (r1_0 + r1_1); c_im(kp[0 * m]) = (i1_0 + i1_1); c_re(kp[1 * m]) = (r1_0 - r1_1); c_im(kp[1 * m]) = (i1_0 - i1_1); } } } else { int ab = (a + b) / 2; #pragma omp task untied fft_twiddle_2(a, ab, in, out, W, nW, nWdn, m); #pragma omp task untied fft_twiddle_2(ab, b, in, out, W, nW, nWdn, m); #pragma omp taskwait } } void fft_twiddle_2_seq(int a, int b, COMPLEX * in, COMPLEX * out, COMPLEX * W, int nW, int nWdn, int m) { int l1, i; COMPLEX *jp, *kp; REAL tmpr, tmpi, wr, wi; if ((b - a) < 128) { for (i = a, l1 = nWdn * i, kp = out + i; i < b; i++, l1 += nWdn, kp++) { jp = in + i; { REAL r1_0, i1_0; REAL r1_1, i1_1; r1_0 = c_re(jp[0 * m]); i1_0 = c_im(jp[0 * m]); wr = c_re(W[1 * l1]); wi = c_im(W[1 * l1]); tmpr = c_re(jp[1 * m]); tmpi = c_im(jp[1 * m]); r1_1 = ((wr * tmpr) - (wi * tmpi)); i1_1 = ((wi * tmpr) + (wr * tmpi)); c_re(kp[0 * m]) = (r1_0 + r1_1); c_im(kp[0 * m]) = (i1_0 + i1_1); c_re(kp[1 * m]) = (r1_0 - r1_1); c_im(kp[1 * m]) = (i1_0 - i1_1); } } } else { int ab = (a + b) / 2; fft_twiddle_2_seq(a, ab, in, out, W, nW, nWdn, m); fft_twiddle_2_seq(ab, b, in, out, W, nW, nWdn, m); } } void fft_unshuffle_2(int a, int b, COMPLEX * in, COMPLEX * out, int m) { int i; const COMPLEX *ip; COMPLEX *jp; if ((b - a) < 128) { ip = in + a * 2; for (i = a; i < b; ++i) { jp = out + i; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; } } else { int ab = (a + b) / 2; #pragma omp task untied fft_unshuffle_2(a, ab, in, out, m); #pragma omp task untied fft_unshuffle_2(ab, b, in, out, m); #pragma omp taskwait } } void fft_unshuffle_2_seq(int a, int b, COMPLEX * in, COMPLEX * out, int m) { int i; const COMPLEX *ip; COMPLEX *jp; if ((b - a) < 128) { ip = in + a * 2; for (i = a; i < b; ++i) { jp = out + i; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; } } else { int ab = (a + b) / 2; fft_unshuffle_2_seq(a, ab, in, out, m); fft_unshuffle_2_seq(ab, b, in, out, m); } } void fft_base_4(COMPLEX * in, COMPLEX * out) { REAL r1_0, i1_0; REAL r1_1, i1_1; REAL r1_2, i1_2; REAL r1_3, i1_3; { REAL r2_0, i2_0; REAL r2_2, i2_2; r2_0 = c_re(in[0]); i2_0 = c_im(in[0]); r2_2 = c_re(in[2]); i2_2 = c_im(in[2]); r1_0 = (r2_0 + r2_2); i1_0 = (i2_0 + i2_2); r1_2 = (r2_0 - r2_2); i1_2 = (i2_0 - i2_2); } { REAL r2_1, i2_1; REAL r2_3, i2_3; r2_1 = c_re(in[1]); i2_1 = c_im(in[1]); r2_3 = c_re(in[3]); i2_3 = c_im(in[3]); r1_1 = (r2_1 + r2_3); i1_1 = (i2_1 + i2_3); r1_3 = (r2_1 - r2_3); i1_3 = (i2_1 - i2_3); } c_re(out[0]) = (r1_0 + r1_1); c_im(out[0]) = (i1_0 + i1_1); c_re(out[2]) = (r1_0 - r1_1); c_im(out[2]) = (i1_0 - i1_1); c_re(out[1]) = (r1_2 + i1_3); c_im(out[1]) = (i1_2 - r1_3); c_re(out[3]) = (r1_2 - i1_3); c_im(out[3]) = (i1_2 + r1_3); } void fft_twiddle_4(int a, int b, COMPLEX * in, COMPLEX * out, COMPLEX * W, int nW, int nWdn, int m) { int l1, i; COMPLEX *jp, *kp; REAL tmpr, tmpi, wr, wi; if ((b - a) < 128) { for (i = a, l1 = nWdn * i, kp = out + i; i < b; i++, l1 += nWdn, kp++) { jp = in + i; { REAL r1_0, i1_0; REAL r1_1, i1_1; REAL r1_2, i1_2; REAL r1_3, i1_3; { REAL r2_0, i2_0; REAL r2_2, i2_2; r2_0 = c_re(jp[0 * m]); i2_0 = c_im(jp[0 * m]); wr = c_re(W[2 * l1]); wi = c_im(W[2 * l1]); tmpr = c_re(jp[2 * m]); tmpi = c_im(jp[2 * m]); r2_2 = ((wr * tmpr) - (wi * tmpi)); i2_2 = ((wi * tmpr) + (wr * tmpi)); r1_0 = (r2_0 + r2_2); i1_0 = (i2_0 + i2_2); r1_2 = (r2_0 - r2_2); i1_2 = (i2_0 - i2_2); } { REAL r2_1, i2_1; REAL r2_3, i2_3; wr = c_re(W[1 * l1]); wi = c_im(W[1 * l1]); tmpr = c_re(jp[1 * m]); tmpi = c_im(jp[1 * m]); r2_1 = ((wr * tmpr) - (wi * tmpi)); i2_1 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[3 * l1]); wi = c_im(W[3 * l1]); tmpr = c_re(jp[3 * m]); tmpi = c_im(jp[3 * m]); r2_3 = ((wr * tmpr) - (wi * tmpi)); i2_3 = ((wi * tmpr) + (wr * tmpi)); r1_1 = (r2_1 + r2_3); i1_1 = (i2_1 + i2_3); r1_3 = (r2_1 - r2_3); i1_3 = (i2_1 - i2_3); } c_re(kp[0 * m]) = (r1_0 + r1_1); c_im(kp[0 * m]) = (i1_0 + i1_1); c_re(kp[2 * m]) = (r1_0 - r1_1); c_im(kp[2 * m]) = (i1_0 - i1_1); c_re(kp[1 * m]) = (r1_2 + i1_3); c_im(kp[1 * m]) = (i1_2 - r1_3); c_re(kp[3 * m]) = (r1_2 - i1_3); c_im(kp[3 * m]) = (i1_2 + r1_3); } } } else { int ab = (a + b) / 2; #pragma omp task untied fft_twiddle_4(a, ab, in, out, W, nW, nWdn, m); #pragma omp task untied fft_twiddle_4(ab, b, in, out, W, nW, nWdn, m); #pragma omp taskwait } } void fft_twiddle_4_seq(int a, int b, COMPLEX * in, COMPLEX * out, COMPLEX * W, int nW, int nWdn, int m) { int l1, i; COMPLEX *jp, *kp; REAL tmpr, tmpi, wr, wi; if ((b - a) < 128) { for (i = a, l1 = nWdn * i, kp = out + i; i < b; i++, l1 += nWdn, kp++) { jp = in + i; { REAL r1_0, i1_0; REAL r1_1, i1_1; REAL r1_2, i1_2; REAL r1_3, i1_3; { REAL r2_0, i2_0; REAL r2_2, i2_2; r2_0 = c_re(jp[0 * m]); i2_0 = c_im(jp[0 * m]); wr = c_re(W[2 * l1]); wi = c_im(W[2 * l1]); tmpr = c_re(jp[2 * m]); tmpi = c_im(jp[2 * m]); r2_2 = ((wr * tmpr) - (wi * tmpi)); i2_2 = ((wi * tmpr) + (wr * tmpi)); r1_0 = (r2_0 + r2_2); i1_0 = (i2_0 + i2_2); r1_2 = (r2_0 - r2_2); i1_2 = (i2_0 - i2_2); } { REAL r2_1, i2_1; REAL r2_3, i2_3; wr = c_re(W[1 * l1]); wi = c_im(W[1 * l1]); tmpr = c_re(jp[1 * m]); tmpi = c_im(jp[1 * m]); r2_1 = ((wr * tmpr) - (wi * tmpi)); i2_1 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[3 * l1]); wi = c_im(W[3 * l1]); tmpr = c_re(jp[3 * m]); tmpi = c_im(jp[3 * m]); r2_3 = ((wr * tmpr) - (wi * tmpi)); i2_3 = ((wi * tmpr) + (wr * tmpi)); r1_1 = (r2_1 + r2_3); i1_1 = (i2_1 + i2_3); r1_3 = (r2_1 - r2_3); i1_3 = (i2_1 - i2_3); } c_re(kp[0 * m]) = (r1_0 + r1_1); c_im(kp[0 * m]) = (i1_0 + i1_1); c_re(kp[2 * m]) = (r1_0 - r1_1); c_im(kp[2 * m]) = (i1_0 - i1_1); c_re(kp[1 * m]) = (r1_2 + i1_3); c_im(kp[1 * m]) = (i1_2 - r1_3); c_re(kp[3 * m]) = (r1_2 - i1_3); c_im(kp[3 * m]) = (i1_2 + r1_3); } } } else { int ab = (a + b) / 2; fft_twiddle_4_seq(a, ab, in, out, W, nW, nWdn, m); fft_twiddle_4_seq(ab, b, in, out, W, nW, nWdn, m); } } void fft_unshuffle_4(int a, int b, COMPLEX * in, COMPLEX * out, int m) { int i; const COMPLEX *ip; COMPLEX *jp; if ((b - a) < 128) { ip = in + a * 4; for (i = a; i < b; ++i) { jp = out + i; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; } } else { int ab = (a + b) / 2; #pragma omp task untied fft_unshuffle_4(a, ab, in, out, m); #pragma omp task untied fft_unshuffle_4(ab, b, in, out, m); #pragma omp taskwait } } void fft_unshuffle_4_seq(int a, int b, COMPLEX * in, COMPLEX * out, int m) { int i; const COMPLEX *ip; COMPLEX *jp; if ((b - a) < 128) { ip = in + a * 4; for (i = a; i < b; ++i) { jp = out + i; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; } } else { int ab = (a + b) / 2; fft_unshuffle_4_seq(a, ab, in, out, m); fft_unshuffle_4_seq(ab, b, in, out, m); } } void fft_base_8(COMPLEX * in, COMPLEX * out) { REAL tmpr, tmpi; { REAL r1_0, i1_0; REAL r1_1, i1_1; REAL r1_2, i1_2; REAL r1_3, i1_3; REAL r1_4, i1_4; REAL r1_5, i1_5; REAL r1_6, i1_6; REAL r1_7, i1_7; { REAL r2_0, i2_0; REAL r2_2, i2_2; REAL r2_4, i2_4; REAL r2_6, i2_6; { REAL r3_0, i3_0; REAL r3_4, i3_4; r3_0 = c_re(in[0]); i3_0 = c_im(in[0]); r3_4 = c_re(in[4]); i3_4 = c_im(in[4]); r2_0 = (r3_0 + r3_4); i2_0 = (i3_0 + i3_4); r2_4 = (r3_0 - r3_4); i2_4 = (i3_0 - i3_4); } { REAL r3_2, i3_2; REAL r3_6, i3_6; r3_2 = c_re(in[2]); i3_2 = c_im(in[2]); r3_6 = c_re(in[6]); i3_6 = c_im(in[6]); r2_2 = (r3_2 + r3_6); i2_2 = (i3_2 + i3_6); r2_6 = (r3_2 - r3_6); i2_6 = (i3_2 - i3_6); } r1_0 = (r2_0 + r2_2); i1_0 = (i2_0 + i2_2); r1_4 = (r2_0 - r2_2); i1_4 = (i2_0 - i2_2); r1_2 = (r2_4 + i2_6); i1_2 = (i2_4 - r2_6); r1_6 = (r2_4 - i2_6); i1_6 = (i2_4 + r2_6); } { REAL r2_1, i2_1; REAL r2_3, i2_3; REAL r2_5, i2_5; REAL r2_7, i2_7; { REAL r3_1, i3_1; REAL r3_5, i3_5; r3_1 = c_re(in[1]); i3_1 = c_im(in[1]); r3_5 = c_re(in[5]); i3_5 = c_im(in[5]); r2_1 = (r3_1 + r3_5); i2_1 = (i3_1 + i3_5); r2_5 = (r3_1 - r3_5); i2_5 = (i3_1 - i3_5); } { REAL r3_3, i3_3; REAL r3_7, i3_7; r3_3 = c_re(in[3]); i3_3 = c_im(in[3]); r3_7 = c_re(in[7]); i3_7 = c_im(in[7]); r2_3 = (r3_3 + r3_7); i2_3 = (i3_3 + i3_7); r2_7 = (r3_3 - r3_7); i2_7 = (i3_3 - i3_7); } r1_1 = (r2_1 + r2_3); i1_1 = (i2_1 + i2_3); r1_5 = (r2_1 - r2_3); i1_5 = (i2_1 - i2_3); r1_3 = (r2_5 + i2_7); i1_3 = (i2_5 - r2_7); r1_7 = (r2_5 - i2_7); i1_7 = (i2_5 + r2_7); } c_re(out[0]) = (r1_0 + r1_1); c_im(out[0]) = (i1_0 + i1_1); c_re(out[4]) = (r1_0 - r1_1); c_im(out[4]) = (i1_0 - i1_1); tmpr = (0.707106781187 * (r1_3 + i1_3)); tmpi = (0.707106781187 * (i1_3 - r1_3)); c_re(out[1]) = (r1_2 + tmpr); c_im(out[1]) = (i1_2 + tmpi); c_re(out[5]) = (r1_2 - tmpr); c_im(out[5]) = (i1_2 - tmpi); c_re(out[2]) = (r1_4 + i1_5); c_im(out[2]) = (i1_4 - r1_5); c_re(out[6]) = (r1_4 - i1_5); c_im(out[6]) = (i1_4 + r1_5); tmpr = (0.707106781187 * (i1_7 - r1_7)); tmpi = (0.707106781187 * (r1_7 + i1_7)); c_re(out[3]) = (r1_6 + tmpr); c_im(out[3]) = (i1_6 - tmpi); c_re(out[7]) = (r1_6 - tmpr); c_im(out[7]) = (i1_6 + tmpi); } } void fft_twiddle_8(int a, int b, COMPLEX * in, COMPLEX * out, COMPLEX * W, int nW, int nWdn, int m) { int l1, i; COMPLEX *jp, *kp; REAL tmpr, tmpi, wr, wi; if ((b - a) < 128) { for (i = a, l1 = nWdn * i, kp = out + i; i < b; i++, l1 += nWdn, kp++) { jp = in + i; { REAL r1_0, i1_0; REAL r1_1, i1_1; REAL r1_2, i1_2; REAL r1_3, i1_3; REAL r1_4, i1_4; REAL r1_5, i1_5; REAL r1_6, i1_6; REAL r1_7, i1_7; { REAL r2_0, i2_0; REAL r2_2, i2_2; REAL r2_4, i2_4; REAL r2_6, i2_6; { REAL r3_0, i3_0; REAL r3_4, i3_4; r3_0 = c_re(jp[0 * m]); i3_0 = c_im(jp[0 * m]); wr = c_re(W[4 * l1]); wi = c_im(W[4 * l1]); tmpr = c_re(jp[4 * m]); tmpi = c_im(jp[4 * m]); r3_4 = ((wr * tmpr) - (wi * tmpi)); i3_4 = ((wi * tmpr) + (wr * tmpi)); r2_0 = (r3_0 + r3_4); i2_0 = (i3_0 + i3_4); r2_4 = (r3_0 - r3_4); i2_4 = (i3_0 - i3_4); } { REAL r3_2, i3_2; REAL r3_6, i3_6; wr = c_re(W[2 * l1]); wi = c_im(W[2 * l1]); tmpr = c_re(jp[2 * m]); tmpi = c_im(jp[2 * m]); r3_2 = ((wr * tmpr) - (wi * tmpi)); i3_2 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[6 * l1]); wi = c_im(W[6 * l1]); tmpr = c_re(jp[6 * m]); tmpi = c_im(jp[6 * m]); r3_6 = ((wr * tmpr) - (wi * tmpi)); i3_6 = ((wi * tmpr) + (wr * tmpi)); r2_2 = (r3_2 + r3_6); i2_2 = (i3_2 + i3_6); r2_6 = (r3_2 - r3_6); i2_6 = (i3_2 - i3_6); } r1_0 = (r2_0 + r2_2); i1_0 = (i2_0 + i2_2); r1_4 = (r2_0 - r2_2); i1_4 = (i2_0 - i2_2); r1_2 = (r2_4 + i2_6); i1_2 = (i2_4 - r2_6); r1_6 = (r2_4 - i2_6); i1_6 = (i2_4 + r2_6); } { REAL r2_1, i2_1; REAL r2_3, i2_3; REAL r2_5, i2_5; REAL r2_7, i2_7; { REAL r3_1, i3_1; REAL r3_5, i3_5; wr = c_re(W[1 * l1]); wi = c_im(W[1 * l1]); tmpr = c_re(jp[1 * m]); tmpi = c_im(jp[1 * m]); r3_1 = ((wr * tmpr) - (wi * tmpi)); i3_1 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[5 * l1]); wi = c_im(W[5 * l1]); tmpr = c_re(jp[5 * m]); tmpi = c_im(jp[5 * m]); r3_5 = ((wr * tmpr) - (wi * tmpi)); i3_5 = ((wi * tmpr) + (wr * tmpi)); r2_1 = (r3_1 + r3_5); i2_1 = (i3_1 + i3_5); r2_5 = (r3_1 - r3_5); i2_5 = (i3_1 - i3_5); } { REAL r3_3, i3_3; REAL r3_7, i3_7; wr = c_re(W[3 * l1]); wi = c_im(W[3 * l1]); tmpr = c_re(jp[3 * m]); tmpi = c_im(jp[3 * m]); r3_3 = ((wr * tmpr) - (wi * tmpi)); i3_3 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[7 * l1]); wi = c_im(W[7 * l1]); tmpr = c_re(jp[7 * m]); tmpi = c_im(jp[7 * m]); r3_7 = ((wr * tmpr) - (wi * tmpi)); i3_7 = ((wi * tmpr) + (wr * tmpi)); r2_3 = (r3_3 + r3_7); i2_3 = (i3_3 + i3_7); r2_7 = (r3_3 - r3_7); i2_7 = (i3_3 - i3_7); } r1_1 = (r2_1 + r2_3); i1_1 = (i2_1 + i2_3); r1_5 = (r2_1 - r2_3); i1_5 = (i2_1 - i2_3); r1_3 = (r2_5 + i2_7); i1_3 = (i2_5 - r2_7); r1_7 = (r2_5 - i2_7); i1_7 = (i2_5 + r2_7); } c_re(kp[0 * m]) = (r1_0 + r1_1); c_im(kp[0 * m]) = (i1_0 + i1_1); c_re(kp[4 * m]) = (r1_0 - r1_1); c_im(kp[4 * m]) = (i1_0 - i1_1); tmpr = (0.707106781187 * (r1_3 + i1_3)); tmpi = (0.707106781187 * (i1_3 - r1_3)); c_re(kp[1 * m]) = (r1_2 + tmpr); c_im(kp[1 * m]) = (i1_2 + tmpi); c_re(kp[5 * m]) = (r1_2 - tmpr); c_im(kp[5 * m]) = (i1_2 - tmpi); c_re(kp[2 * m]) = (r1_4 + i1_5); c_im(kp[2 * m]) = (i1_4 - r1_5); c_re(kp[6 * m]) = (r1_4 - i1_5); c_im(kp[6 * m]) = (i1_4 + r1_5); tmpr = (0.707106781187 * (i1_7 - r1_7)); tmpi = (0.707106781187 * (r1_7 + i1_7)); c_re(kp[3 * m]) = (r1_6 + tmpr); c_im(kp[3 * m]) = (i1_6 - tmpi); c_re(kp[7 * m]) = (r1_6 - tmpr); c_im(kp[7 * m]) = (i1_6 + tmpi); } } } else { int ab = (a + b) / 2; #pragma omp task untied fft_twiddle_8(a, ab, in, out, W, nW, nWdn, m); #pragma omp task untied fft_twiddle_8(ab, b, in, out, W, nW, nWdn, m); #pragma omp taskwait } } void fft_twiddle_8_seq(int a, int b, COMPLEX * in, COMPLEX * out, COMPLEX * W, int nW, int nWdn, int m) { int l1, i; COMPLEX *jp, *kp; REAL tmpr, tmpi, wr, wi; if ((b - a) < 128) { for (i = a, l1 = nWdn * i, kp = out + i; i < b; i++, l1 += nWdn, kp++) { jp = in + i; { REAL r1_0, i1_0; REAL r1_1, i1_1; REAL r1_2, i1_2; REAL r1_3, i1_3; REAL r1_4, i1_4; REAL r1_5, i1_5; REAL r1_6, i1_6; REAL r1_7, i1_7; { REAL r2_0, i2_0; REAL r2_2, i2_2; REAL r2_4, i2_4; REAL r2_6, i2_6; { REAL r3_0, i3_0; REAL r3_4, i3_4; r3_0 = c_re(jp[0 * m]); i3_0 = c_im(jp[0 * m]); wr = c_re(W[4 * l1]); wi = c_im(W[4 * l1]); tmpr = c_re(jp[4 * m]); tmpi = c_im(jp[4 * m]); r3_4 = ((wr * tmpr) - (wi * tmpi)); i3_4 = ((wi * tmpr) + (wr * tmpi)); r2_0 = (r3_0 + r3_4); i2_0 = (i3_0 + i3_4); r2_4 = (r3_0 - r3_4); i2_4 = (i3_0 - i3_4); } { REAL r3_2, i3_2; REAL r3_6, i3_6; wr = c_re(W[2 * l1]); wi = c_im(W[2 * l1]); tmpr = c_re(jp[2 * m]); tmpi = c_im(jp[2 * m]); r3_2 = ((wr * tmpr) - (wi * tmpi)); i3_2 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[6 * l1]); wi = c_im(W[6 * l1]); tmpr = c_re(jp[6 * m]); tmpi = c_im(jp[6 * m]); r3_6 = ((wr * tmpr) - (wi * tmpi)); i3_6 = ((wi * tmpr) + (wr * tmpi)); r2_2 = (r3_2 + r3_6); i2_2 = (i3_2 + i3_6); r2_6 = (r3_2 - r3_6); i2_6 = (i3_2 - i3_6); } r1_0 = (r2_0 + r2_2); i1_0 = (i2_0 + i2_2); r1_4 = (r2_0 - r2_2); i1_4 = (i2_0 - i2_2); r1_2 = (r2_4 + i2_6); i1_2 = (i2_4 - r2_6); r1_6 = (r2_4 - i2_6); i1_6 = (i2_4 + r2_6); } { REAL r2_1, i2_1; REAL r2_3, i2_3; REAL r2_5, i2_5; REAL r2_7, i2_7; { REAL r3_1, i3_1; REAL r3_5, i3_5; wr = c_re(W[1 * l1]); wi = c_im(W[1 * l1]); tmpr = c_re(jp[1 * m]); tmpi = c_im(jp[1 * m]); r3_1 = ((wr * tmpr) - (wi * tmpi)); i3_1 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[5 * l1]); wi = c_im(W[5 * l1]); tmpr = c_re(jp[5 * m]); tmpi = c_im(jp[5 * m]); r3_5 = ((wr * tmpr) - (wi * tmpi)); i3_5 = ((wi * tmpr) + (wr * tmpi)); r2_1 = (r3_1 + r3_5); i2_1 = (i3_1 + i3_5); r2_5 = (r3_1 - r3_5); i2_5 = (i3_1 - i3_5); } { REAL r3_3, i3_3; REAL r3_7, i3_7; wr = c_re(W[3 * l1]); wi = c_im(W[3 * l1]); tmpr = c_re(jp[3 * m]); tmpi = c_im(jp[3 * m]); r3_3 = ((wr * tmpr) - (wi * tmpi)); i3_3 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[7 * l1]); wi = c_im(W[7 * l1]); tmpr = c_re(jp[7 * m]); tmpi = c_im(jp[7 * m]); r3_7 = ((wr * tmpr) - (wi * tmpi)); i3_7 = ((wi * tmpr) + (wr * tmpi)); r2_3 = (r3_3 + r3_7); i2_3 = (i3_3 + i3_7); r2_7 = (r3_3 - r3_7); i2_7 = (i3_3 - i3_7); } r1_1 = (r2_1 + r2_3); i1_1 = (i2_1 + i2_3); r1_5 = (r2_1 - r2_3); i1_5 = (i2_1 - i2_3); r1_3 = (r2_5 + i2_7); i1_3 = (i2_5 - r2_7); r1_7 = (r2_5 - i2_7); i1_7 = (i2_5 + r2_7); } c_re(kp[0 * m]) = (r1_0 + r1_1); c_im(kp[0 * m]) = (i1_0 + i1_1); c_re(kp[4 * m]) = (r1_0 - r1_1); c_im(kp[4 * m]) = (i1_0 - i1_1); tmpr = (0.707106781187 * (r1_3 + i1_3)); tmpi = (0.707106781187 * (i1_3 - r1_3)); c_re(kp[1 * m]) = (r1_2 + tmpr); c_im(kp[1 * m]) = (i1_2 + tmpi); c_re(kp[5 * m]) = (r1_2 - tmpr); c_im(kp[5 * m]) = (i1_2 - tmpi); c_re(kp[2 * m]) = (r1_4 + i1_5); c_im(kp[2 * m]) = (i1_4 - r1_5); c_re(kp[6 * m]) = (r1_4 - i1_5); c_im(kp[6 * m]) = (i1_4 + r1_5); tmpr = (0.707106781187 * (i1_7 - r1_7)); tmpi = (0.707106781187 * (r1_7 + i1_7)); c_re(kp[3 * m]) = (r1_6 + tmpr); c_im(kp[3 * m]) = (i1_6 - tmpi); c_re(kp[7 * m]) = (r1_6 - tmpr); c_im(kp[7 * m]) = (i1_6 + tmpi); } } } else { int ab = (a + b) / 2; fft_twiddle_8_seq(a, ab, in, out, W, nW, nWdn, m); fft_twiddle_8_seq(ab, b, in, out, W, nW, nWdn, m); } } void fft_unshuffle_8(int a, int b, COMPLEX * in, COMPLEX * out, int m) { int i; const COMPLEX *ip; COMPLEX *jp; if ((b - a) < 128) { ip = in + a * 8; for (i = a; i < b; ++i) { jp = out + i; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; } } else { int ab = (a + b) / 2; #pragma omp task untied fft_unshuffle_8(a, ab, in, out, m); #pragma omp task untied fft_unshuffle_8(ab, b, in, out, m); #pragma omp taskwait } } void fft_unshuffle_8_seq(int a, int b, COMPLEX * in, COMPLEX * out, int m) { int i; const COMPLEX *ip; COMPLEX *jp; if ((b - a) < 128) { ip = in + a * 8; for (i = a; i < b; ++i) { jp = out + i; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; } } else { int ab = (a + b) / 2; fft_unshuffle_8_seq(a, ab, in, out, m); fft_unshuffle_8_seq(ab, b, in, out, m); } } void fft_base_16(COMPLEX * in, COMPLEX * out) { REAL tmpr, tmpi; { REAL r1_0, i1_0; REAL r1_1, i1_1; REAL r1_2, i1_2; REAL r1_3, i1_3; REAL r1_4, i1_4; REAL r1_5, i1_5; REAL r1_6, i1_6; REAL r1_7, i1_7; REAL r1_8, i1_8; REAL r1_9, i1_9; REAL r1_10, i1_10; REAL r1_11, i1_11; REAL r1_12, i1_12; REAL r1_13, i1_13; REAL r1_14, i1_14; REAL r1_15, i1_15; { REAL r2_0, i2_0; REAL r2_2, i2_2; REAL r2_4, i2_4; REAL r2_6, i2_6; REAL r2_8, i2_8; REAL r2_10, i2_10; REAL r2_12, i2_12; REAL r2_14, i2_14; { REAL r3_0, i3_0; REAL r3_4, i3_4; REAL r3_8, i3_8; REAL r3_12, i3_12; { REAL r4_0, i4_0; REAL r4_8, i4_8; r4_0 = c_re(in[0]); i4_0 = c_im(in[0]); r4_8 = c_re(in[8]); i4_8 = c_im(in[8]); r3_0 = (r4_0 + r4_8); i3_0 = (i4_0 + i4_8); r3_8 = (r4_0 - r4_8); i3_8 = (i4_0 - i4_8); } { REAL r4_4, i4_4; REAL r4_12, i4_12; r4_4 = c_re(in[4]); i4_4 = c_im(in[4]); r4_12 = c_re(in[12]); i4_12 = c_im(in[12]); r3_4 = (r4_4 + r4_12); i3_4 = (i4_4 + i4_12); r3_12 = (r4_4 - r4_12); i3_12 = (i4_4 - i4_12); } r2_0 = (r3_0 + r3_4); i2_0 = (i3_0 + i3_4); r2_8 = (r3_0 - r3_4); i2_8 = (i3_0 - i3_4); r2_4 = (r3_8 + i3_12); i2_4 = (i3_8 - r3_12); r2_12 = (r3_8 - i3_12); i2_12 = (i3_8 + r3_12); } { REAL r3_2, i3_2; REAL r3_6, i3_6; REAL r3_10, i3_10; REAL r3_14, i3_14; { REAL r4_2, i4_2; REAL r4_10, i4_10; r4_2 = c_re(in[2]); i4_2 = c_im(in[2]); r4_10 = c_re(in[10]); i4_10 = c_im(in[10]); r3_2 = (r4_2 + r4_10); i3_2 = (i4_2 + i4_10); r3_10 = (r4_2 - r4_10); i3_10 = (i4_2 - i4_10); } { REAL r4_6, i4_6; REAL r4_14, i4_14; r4_6 = c_re(in[6]); i4_6 = c_im(in[6]); r4_14 = c_re(in[14]); i4_14 = c_im(in[14]); r3_6 = (r4_6 + r4_14); i3_6 = (i4_6 + i4_14); r3_14 = (r4_6 - r4_14); i3_14 = (i4_6 - i4_14); } r2_2 = (r3_2 + r3_6); i2_2 = (i3_2 + i3_6); r2_10 = (r3_2 - r3_6); i2_10 = (i3_2 - i3_6); r2_6 = (r3_10 + i3_14); i2_6 = (i3_10 - r3_14); r2_14 = (r3_10 - i3_14); i2_14 = (i3_10 + r3_14); } r1_0 = (r2_0 + r2_2); i1_0 = (i2_0 + i2_2); r1_8 = (r2_0 - r2_2); i1_8 = (i2_0 - i2_2); tmpr = (0.707106781187 * (r2_6 + i2_6)); tmpi = (0.707106781187 * (i2_6 - r2_6)); r1_2 = (r2_4 + tmpr); i1_2 = (i2_4 + tmpi); r1_10 = (r2_4 - tmpr); i1_10 = (i2_4 - tmpi); r1_4 = (r2_8 + i2_10); i1_4 = (i2_8 - r2_10); r1_12 = (r2_8 - i2_10); i1_12 = (i2_8 + r2_10); tmpr = (0.707106781187 * (i2_14 - r2_14)); tmpi = (0.707106781187 * (r2_14 + i2_14)); r1_6 = (r2_12 + tmpr); i1_6 = (i2_12 - tmpi); r1_14 = (r2_12 - tmpr); i1_14 = (i2_12 + tmpi); } { REAL r2_1, i2_1; REAL r2_3, i2_3; REAL r2_5, i2_5; REAL r2_7, i2_7; REAL r2_9, i2_9; REAL r2_11, i2_11; REAL r2_13, i2_13; REAL r2_15, i2_15; { REAL r3_1, i3_1; REAL r3_5, i3_5; REAL r3_9, i3_9; REAL r3_13, i3_13; { REAL r4_1, i4_1; REAL r4_9, i4_9; r4_1 = c_re(in[1]); i4_1 = c_im(in[1]); r4_9 = c_re(in[9]); i4_9 = c_im(in[9]); r3_1 = (r4_1 + r4_9); i3_1 = (i4_1 + i4_9); r3_9 = (r4_1 - r4_9); i3_9 = (i4_1 - i4_9); } { REAL r4_5, i4_5; REAL r4_13, i4_13; r4_5 = c_re(in[5]); i4_5 = c_im(in[5]); r4_13 = c_re(in[13]); i4_13 = c_im(in[13]); r3_5 = (r4_5 + r4_13); i3_5 = (i4_5 + i4_13); r3_13 = (r4_5 - r4_13); i3_13 = (i4_5 - i4_13); } r2_1 = (r3_1 + r3_5); i2_1 = (i3_1 + i3_5); r2_9 = (r3_1 - r3_5); i2_9 = (i3_1 - i3_5); r2_5 = (r3_9 + i3_13); i2_5 = (i3_9 - r3_13); r2_13 = (r3_9 - i3_13); i2_13 = (i3_9 + r3_13); } { REAL r3_3, i3_3; REAL r3_7, i3_7; REAL r3_11, i3_11; REAL r3_15, i3_15; { REAL r4_3, i4_3; REAL r4_11, i4_11; r4_3 = c_re(in[3]); i4_3 = c_im(in[3]); r4_11 = c_re(in[11]); i4_11 = c_im(in[11]); r3_3 = (r4_3 + r4_11); i3_3 = (i4_3 + i4_11); r3_11 = (r4_3 - r4_11); i3_11 = (i4_3 - i4_11); } { REAL r4_7, i4_7; REAL r4_15, i4_15; r4_7 = c_re(in[7]); i4_7 = c_im(in[7]); r4_15 = c_re(in[15]); i4_15 = c_im(in[15]); r3_7 = (r4_7 + r4_15); i3_7 = (i4_7 + i4_15); r3_15 = (r4_7 - r4_15); i3_15 = (i4_7 - i4_15); } r2_3 = (r3_3 + r3_7); i2_3 = (i3_3 + i3_7); r2_11 = (r3_3 - r3_7); i2_11 = (i3_3 - i3_7); r2_7 = (r3_11 + i3_15); i2_7 = (i3_11 - r3_15); r2_15 = (r3_11 - i3_15); i2_15 = (i3_11 + r3_15); } r1_1 = (r2_1 + r2_3); i1_1 = (i2_1 + i2_3); r1_9 = (r2_1 - r2_3); i1_9 = (i2_1 - i2_3); tmpr = (0.707106781187 * (r2_7 + i2_7)); tmpi = (0.707106781187 * (i2_7 - r2_7)); r1_3 = (r2_5 + tmpr); i1_3 = (i2_5 + tmpi); r1_11 = (r2_5 - tmpr); i1_11 = (i2_5 - tmpi); r1_5 = (r2_9 + i2_11); i1_5 = (i2_9 - r2_11); r1_13 = (r2_9 - i2_11); i1_13 = (i2_9 + r2_11); tmpr = (0.707106781187 * (i2_15 - r2_15)); tmpi = (0.707106781187 * (r2_15 + i2_15)); r1_7 = (r2_13 + tmpr); i1_7 = (i2_13 - tmpi); r1_15 = (r2_13 - tmpr); i1_15 = (i2_13 + tmpi); } c_re(out[0]) = (r1_0 + r1_1); c_im(out[0]) = (i1_0 + i1_1); c_re(out[8]) = (r1_0 - r1_1); c_im(out[8]) = (i1_0 - i1_1); tmpr = ((0.923879532511 * r1_3) + (0.382683432365 * i1_3)); tmpi = ((0.923879532511 * i1_3) - (0.382683432365 * r1_3)); c_re(out[1]) = (r1_2 + tmpr); c_im(out[1]) = (i1_2 + tmpi); c_re(out[9]) = (r1_2 - tmpr); c_im(out[9]) = (i1_2 - tmpi); tmpr = (0.707106781187 * (r1_5 + i1_5)); tmpi = (0.707106781187 * (i1_5 - r1_5)); c_re(out[2]) = (r1_4 + tmpr); c_im(out[2]) = (i1_4 + tmpi); c_re(out[10]) = (r1_4 - tmpr); c_im(out[10]) = (i1_4 - tmpi); tmpr = ((0.382683432365 * r1_7) + (0.923879532511 * i1_7)); tmpi = ((0.382683432365 * i1_7) - (0.923879532511 * r1_7)); c_re(out[3]) = (r1_6 + tmpr); c_im(out[3]) = (i1_6 + tmpi); c_re(out[11]) = (r1_6 - tmpr); c_im(out[11]) = (i1_6 - tmpi); c_re(out[4]) = (r1_8 + i1_9); c_im(out[4]) = (i1_8 - r1_9); c_re(out[12]) = (r1_8 - i1_9); c_im(out[12]) = (i1_8 + r1_9); tmpr = ((0.923879532511 * i1_11) - (0.382683432365 * r1_11)); tmpi = ((0.923879532511 * r1_11) + (0.382683432365 * i1_11)); c_re(out[5]) = (r1_10 + tmpr); c_im(out[5]) = (i1_10 - tmpi); c_re(out[13]) = (r1_10 - tmpr); c_im(out[13]) = (i1_10 + tmpi); tmpr = (0.707106781187 * (i1_13 - r1_13)); tmpi = (0.707106781187 * (r1_13 + i1_13)); c_re(out[6]) = (r1_12 + tmpr); c_im(out[6]) = (i1_12 - tmpi); c_re(out[14]) = (r1_12 - tmpr); c_im(out[14]) = (i1_12 + tmpi); tmpr = ((0.382683432365 * i1_15) - (0.923879532511 * r1_15)); tmpi = ((0.382683432365 * r1_15) + (0.923879532511 * i1_15)); c_re(out[7]) = (r1_14 + tmpr); c_im(out[7]) = (i1_14 - tmpi); c_re(out[15]) = (r1_14 - tmpr); c_im(out[15]) = (i1_14 + tmpi); } } void fft_twiddle_16(int a, int b, COMPLEX * in, COMPLEX * out, COMPLEX * W, int nW, int nWdn, int m) { int l1, i; COMPLEX *jp, *kp; REAL tmpr, tmpi, wr, wi; if ((b - a) < 128) { for (i = a, l1 = nWdn * i, kp = out + i; i < b; i++, l1 += nWdn, kp++) { jp = in + i; { REAL r1_0, i1_0; REAL r1_1, i1_1; REAL r1_2, i1_2; REAL r1_3, i1_3; REAL r1_4, i1_4; REAL r1_5, i1_5; REAL r1_6, i1_6; REAL r1_7, i1_7; REAL r1_8, i1_8; REAL r1_9, i1_9; REAL r1_10, i1_10; REAL r1_11, i1_11; REAL r1_12, i1_12; REAL r1_13, i1_13; REAL r1_14, i1_14; REAL r1_15, i1_15; { REAL r2_0, i2_0; REAL r2_2, i2_2; REAL r2_4, i2_4; REAL r2_6, i2_6; REAL r2_8, i2_8; REAL r2_10, i2_10; REAL r2_12, i2_12; REAL r2_14, i2_14; { REAL r3_0, i3_0; REAL r3_4, i3_4; REAL r3_8, i3_8; REAL r3_12, i3_12; { REAL r4_0, i4_0; REAL r4_8, i4_8; r4_0 = c_re(jp[0 * m]); i4_0 = c_im(jp[0 * m]); wr = c_re(W[8 * l1]); wi = c_im(W[8 * l1]); tmpr = c_re(jp[8 * m]); tmpi = c_im(jp[8 * m]); r4_8 = ((wr * tmpr) - (wi * tmpi)); i4_8 = ((wi * tmpr) + (wr * tmpi)); r3_0 = (r4_0 + r4_8); i3_0 = (i4_0 + i4_8); r3_8 = (r4_0 - r4_8); i3_8 = (i4_0 - i4_8); } { REAL r4_4, i4_4; REAL r4_12, i4_12; wr = c_re(W[4 * l1]); wi = c_im(W[4 * l1]); tmpr = c_re(jp[4 * m]); tmpi = c_im(jp[4 * m]); r4_4 = ((wr * tmpr) - (wi * tmpi)); i4_4 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[12 * l1]); wi = c_im(W[12 * l1]); tmpr = c_re(jp[12 * m]); tmpi = c_im(jp[12 * m]); r4_12 = ((wr * tmpr) - (wi * tmpi)); i4_12 = ((wi * tmpr) + (wr * tmpi)); r3_4 = (r4_4 + r4_12); i3_4 = (i4_4 + i4_12); r3_12 = (r4_4 - r4_12); i3_12 = (i4_4 - i4_12); } r2_0 = (r3_0 + r3_4); i2_0 = (i3_0 + i3_4); r2_8 = (r3_0 - r3_4); i2_8 = (i3_0 - i3_4); r2_4 = (r3_8 + i3_12); i2_4 = (i3_8 - r3_12); r2_12 = (r3_8 - i3_12); i2_12 = (i3_8 + r3_12); } { REAL r3_2, i3_2; REAL r3_6, i3_6; REAL r3_10, i3_10; REAL r3_14, i3_14; { REAL r4_2, i4_2; REAL r4_10, i4_10; wr = c_re(W[2 * l1]); wi = c_im(W[2 * l1]); tmpr = c_re(jp[2 * m]); tmpi = c_im(jp[2 * m]); r4_2 = ((wr * tmpr) - (wi * tmpi)); i4_2 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[10 * l1]); wi = c_im(W[10 * l1]); tmpr = c_re(jp[10 * m]); tmpi = c_im(jp[10 * m]); r4_10 = ((wr * tmpr) - (wi * tmpi)); i4_10 = ((wi * tmpr) + (wr * tmpi)); r3_2 = (r4_2 + r4_10); i3_2 = (i4_2 + i4_10); r3_10 = (r4_2 - r4_10); i3_10 = (i4_2 - i4_10); } { REAL r4_6, i4_6; REAL r4_14, i4_14; wr = c_re(W[6 * l1]); wi = c_im(W[6 * l1]); tmpr = c_re(jp[6 * m]); tmpi = c_im(jp[6 * m]); r4_6 = ((wr * tmpr) - (wi * tmpi)); i4_6 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[14 * l1]); wi = c_im(W[14 * l1]); tmpr = c_re(jp[14 * m]); tmpi = c_im(jp[14 * m]); r4_14 = ((wr * tmpr) - (wi * tmpi)); i4_14 = ((wi * tmpr) + (wr * tmpi)); r3_6 = (r4_6 + r4_14); i3_6 = (i4_6 + i4_14); r3_14 = (r4_6 - r4_14); i3_14 = (i4_6 - i4_14); } r2_2 = (r3_2 + r3_6); i2_2 = (i3_2 + i3_6); r2_10 = (r3_2 - r3_6); i2_10 = (i3_2 - i3_6); r2_6 = (r3_10 + i3_14); i2_6 = (i3_10 - r3_14); r2_14 = (r3_10 - i3_14); i2_14 = (i3_10 + r3_14); } r1_0 = (r2_0 + r2_2); i1_0 = (i2_0 + i2_2); r1_8 = (r2_0 - r2_2); i1_8 = (i2_0 - i2_2); tmpr = (0.707106781187 * (r2_6 + i2_6)); tmpi = (0.707106781187 * (i2_6 - r2_6)); r1_2 = (r2_4 + tmpr); i1_2 = (i2_4 + tmpi); r1_10 = (r2_4 - tmpr); i1_10 = (i2_4 - tmpi); r1_4 = (r2_8 + i2_10); i1_4 = (i2_8 - r2_10); r1_12 = (r2_8 - i2_10); i1_12 = (i2_8 + r2_10); tmpr = (0.707106781187 * (i2_14 - r2_14)); tmpi = (0.707106781187 * (r2_14 + i2_14)); r1_6 = (r2_12 + tmpr); i1_6 = (i2_12 - tmpi); r1_14 = (r2_12 - tmpr); i1_14 = (i2_12 + tmpi); } { REAL r2_1, i2_1; REAL r2_3, i2_3; REAL r2_5, i2_5; REAL r2_7, i2_7; REAL r2_9, i2_9; REAL r2_11, i2_11; REAL r2_13, i2_13; REAL r2_15, i2_15; { REAL r3_1, i3_1; REAL r3_5, i3_5; REAL r3_9, i3_9; REAL r3_13, i3_13; { REAL r4_1, i4_1; REAL r4_9, i4_9; wr = c_re(W[1 * l1]); wi = c_im(W[1 * l1]); tmpr = c_re(jp[1 * m]); tmpi = c_im(jp[1 * m]); r4_1 = ((wr * tmpr) - (wi * tmpi)); i4_1 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[9 * l1]); wi = c_im(W[9 * l1]); tmpr = c_re(jp[9 * m]); tmpi = c_im(jp[9 * m]); r4_9 = ((wr * tmpr) - (wi * tmpi)); i4_9 = ((wi * tmpr) + (wr * tmpi)); r3_1 = (r4_1 + r4_9); i3_1 = (i4_1 + i4_9); r3_9 = (r4_1 - r4_9); i3_9 = (i4_1 - i4_9); } { REAL r4_5, i4_5; REAL r4_13, i4_13; wr = c_re(W[5 * l1]); wi = c_im(W[5 * l1]); tmpr = c_re(jp[5 * m]); tmpi = c_im(jp[5 * m]); r4_5 = ((wr * tmpr) - (wi * tmpi)); i4_5 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[13 * l1]); wi = c_im(W[13 * l1]); tmpr = c_re(jp[13 * m]); tmpi = c_im(jp[13 * m]); r4_13 = ((wr * tmpr) - (wi * tmpi)); i4_13 = ((wi * tmpr) + (wr * tmpi)); r3_5 = (r4_5 + r4_13); i3_5 = (i4_5 + i4_13); r3_13 = (r4_5 - r4_13); i3_13 = (i4_5 - i4_13); } r2_1 = (r3_1 + r3_5); i2_1 = (i3_1 + i3_5); r2_9 = (r3_1 - r3_5); i2_9 = (i3_1 - i3_5); r2_5 = (r3_9 + i3_13); i2_5 = (i3_9 - r3_13); r2_13 = (r3_9 - i3_13); i2_13 = (i3_9 + r3_13); } { REAL r3_3, i3_3; REAL r3_7, i3_7; REAL r3_11, i3_11; REAL r3_15, i3_15; { REAL r4_3, i4_3; REAL r4_11, i4_11; wr = c_re(W[3 * l1]); wi = c_im(W[3 * l1]); tmpr = c_re(jp[3 * m]); tmpi = c_im(jp[3 * m]); r4_3 = ((wr * tmpr) - (wi * tmpi)); i4_3 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[11 * l1]); wi = c_im(W[11 * l1]); tmpr = c_re(jp[11 * m]); tmpi = c_im(jp[11 * m]); r4_11 = ((wr * tmpr) - (wi * tmpi)); i4_11 = ((wi * tmpr) + (wr * tmpi)); r3_3 = (r4_3 + r4_11); i3_3 = (i4_3 + i4_11); r3_11 = (r4_3 - r4_11); i3_11 = (i4_3 - i4_11); } { REAL r4_7, i4_7; REAL r4_15, i4_15; wr = c_re(W[7 * l1]); wi = c_im(W[7 * l1]); tmpr = c_re(jp[7 * m]); tmpi = c_im(jp[7 * m]); r4_7 = ((wr * tmpr) - (wi * tmpi)); i4_7 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[15 * l1]); wi = c_im(W[15 * l1]); tmpr = c_re(jp[15 * m]); tmpi = c_im(jp[15 * m]); r4_15 = ((wr * tmpr) - (wi * tmpi)); i4_15 = ((wi * tmpr) + (wr * tmpi)); r3_7 = (r4_7 + r4_15); i3_7 = (i4_7 + i4_15); r3_15 = (r4_7 - r4_15); i3_15 = (i4_7 - i4_15); } r2_3 = (r3_3 + r3_7); i2_3 = (i3_3 + i3_7); r2_11 = (r3_3 - r3_7); i2_11 = (i3_3 - i3_7); r2_7 = (r3_11 + i3_15); i2_7 = (i3_11 - r3_15); r2_15 = (r3_11 - i3_15); i2_15 = (i3_11 + r3_15); } r1_1 = (r2_1 + r2_3); i1_1 = (i2_1 + i2_3); r1_9 = (r2_1 - r2_3); i1_9 = (i2_1 - i2_3); tmpr = (0.707106781187 * (r2_7 + i2_7)); tmpi = (0.707106781187 * (i2_7 - r2_7)); r1_3 = (r2_5 + tmpr); i1_3 = (i2_5 + tmpi); r1_11 = (r2_5 - tmpr); i1_11 = (i2_5 - tmpi); r1_5 = (r2_9 + i2_11); i1_5 = (i2_9 - r2_11); r1_13 = (r2_9 - i2_11); i1_13 = (i2_9 + r2_11); tmpr = (0.707106781187 * (i2_15 - r2_15)); tmpi = (0.707106781187 * (r2_15 + i2_15)); r1_7 = (r2_13 + tmpr); i1_7 = (i2_13 - tmpi); r1_15 = (r2_13 - tmpr); i1_15 = (i2_13 + tmpi); } c_re(kp[0 * m]) = (r1_0 + r1_1); c_im(kp[0 * m]) = (i1_0 + i1_1); c_re(kp[8 * m]) = (r1_0 - r1_1); c_im(kp[8 * m]) = (i1_0 - i1_1); tmpr = ((0.923879532511 * r1_3) + (0.382683432365 * i1_3)); tmpi = ((0.923879532511 * i1_3) - (0.382683432365 * r1_3)); c_re(kp[1 * m]) = (r1_2 + tmpr); c_im(kp[1 * m]) = (i1_2 + tmpi); c_re(kp[9 * m]) = (r1_2 - tmpr); c_im(kp[9 * m]) = (i1_2 - tmpi); tmpr = (0.707106781187 * (r1_5 + i1_5)); tmpi = (0.707106781187 * (i1_5 - r1_5)); c_re(kp[2 * m]) = (r1_4 + tmpr); c_im(kp[2 * m]) = (i1_4 + tmpi); c_re(kp[10 * m]) = (r1_4 - tmpr); c_im(kp[10 * m]) = (i1_4 - tmpi); tmpr = ((0.382683432365 * r1_7) + (0.923879532511 * i1_7)); tmpi = ((0.382683432365 * i1_7) - (0.923879532511 * r1_7)); c_re(kp[3 * m]) = (r1_6 + tmpr); c_im(kp[3 * m]) = (i1_6 + tmpi); c_re(kp[11 * m]) = (r1_6 - tmpr); c_im(kp[11 * m]) = (i1_6 - tmpi); c_re(kp[4 * m]) = (r1_8 + i1_9); c_im(kp[4 * m]) = (i1_8 - r1_9); c_re(kp[12 * m]) = (r1_8 - i1_9); c_im(kp[12 * m]) = (i1_8 + r1_9); tmpr = ((0.923879532511 * i1_11) - (0.382683432365 * r1_11)); tmpi = ((0.923879532511 * r1_11) + (0.382683432365 * i1_11)); c_re(kp[5 * m]) = (r1_10 + tmpr); c_im(kp[5 * m]) = (i1_10 - tmpi); c_re(kp[13 * m]) = (r1_10 - tmpr); c_im(kp[13 * m]) = (i1_10 + tmpi); tmpr = (0.707106781187 * (i1_13 - r1_13)); tmpi = (0.707106781187 * (r1_13 + i1_13)); c_re(kp[6 * m]) = (r1_12 + tmpr); c_im(kp[6 * m]) = (i1_12 - tmpi); c_re(kp[14 * m]) = (r1_12 - tmpr); c_im(kp[14 * m]) = (i1_12 + tmpi); tmpr = ((0.382683432365 * i1_15) - (0.923879532511 * r1_15)); tmpi = ((0.382683432365 * r1_15) + (0.923879532511 * i1_15)); c_re(kp[7 * m]) = (r1_14 + tmpr); c_im(kp[7 * m]) = (i1_14 - tmpi); c_re(kp[15 * m]) = (r1_14 - tmpr); c_im(kp[15 * m]) = (i1_14 + tmpi); } } } else { int ab = (a + b) / 2; #pragma omp task untied fft_twiddle_16(a, ab, in, out, W, nW, nWdn, m); #pragma omp task untied fft_twiddle_16(ab, b, in, out, W, nW, nWdn, m); #pragma omp taskwait } } void fft_twiddle_16_seq(int a, int b, COMPLEX * in, COMPLEX * out, COMPLEX * W, int nW, int nWdn, int m) { int l1, i; COMPLEX *jp, *kp; REAL tmpr, tmpi, wr, wi; if ((b - a) < 128) { for (i = a, l1 = nWdn * i, kp = out + i; i < b; i++, l1 += nWdn, kp++) { jp = in + i; { REAL r1_0, i1_0; REAL r1_1, i1_1; REAL r1_2, i1_2; REAL r1_3, i1_3; REAL r1_4, i1_4; REAL r1_5, i1_5; REAL r1_6, i1_6; REAL r1_7, i1_7; REAL r1_8, i1_8; REAL r1_9, i1_9; REAL r1_10, i1_10; REAL r1_11, i1_11; REAL r1_12, i1_12; REAL r1_13, i1_13; REAL r1_14, i1_14; REAL r1_15, i1_15; { REAL r2_0, i2_0; REAL r2_2, i2_2; REAL r2_4, i2_4; REAL r2_6, i2_6; REAL r2_8, i2_8; REAL r2_10, i2_10; REAL r2_12, i2_12; REAL r2_14, i2_14; { REAL r3_0, i3_0; REAL r3_4, i3_4; REAL r3_8, i3_8; REAL r3_12, i3_12; { REAL r4_0, i4_0; REAL r4_8, i4_8; r4_0 = c_re(jp[0 * m]); i4_0 = c_im(jp[0 * m]); wr = c_re(W[8 * l1]); wi = c_im(W[8 * l1]); tmpr = c_re(jp[8 * m]); tmpi = c_im(jp[8 * m]); r4_8 = ((wr * tmpr) - (wi * tmpi)); i4_8 = ((wi * tmpr) + (wr * tmpi)); r3_0 = (r4_0 + r4_8); i3_0 = (i4_0 + i4_8); r3_8 = (r4_0 - r4_8); i3_8 = (i4_0 - i4_8); } { REAL r4_4, i4_4; REAL r4_12, i4_12; wr = c_re(W[4 * l1]); wi = c_im(W[4 * l1]); tmpr = c_re(jp[4 * m]); tmpi = c_im(jp[4 * m]); r4_4 = ((wr * tmpr) - (wi * tmpi)); i4_4 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[12 * l1]); wi = c_im(W[12 * l1]); tmpr = c_re(jp[12 * m]); tmpi = c_im(jp[12 * m]); r4_12 = ((wr * tmpr) - (wi * tmpi)); i4_12 = ((wi * tmpr) + (wr * tmpi)); r3_4 = (r4_4 + r4_12); i3_4 = (i4_4 + i4_12); r3_12 = (r4_4 - r4_12); i3_12 = (i4_4 - i4_12); } r2_0 = (r3_0 + r3_4); i2_0 = (i3_0 + i3_4); r2_8 = (r3_0 - r3_4); i2_8 = (i3_0 - i3_4); r2_4 = (r3_8 + i3_12); i2_4 = (i3_8 - r3_12); r2_12 = (r3_8 - i3_12); i2_12 = (i3_8 + r3_12); } { REAL r3_2, i3_2; REAL r3_6, i3_6; REAL r3_10, i3_10; REAL r3_14, i3_14; { REAL r4_2, i4_2; REAL r4_10, i4_10; wr = c_re(W[2 * l1]); wi = c_im(W[2 * l1]); tmpr = c_re(jp[2 * m]); tmpi = c_im(jp[2 * m]); r4_2 = ((wr * tmpr) - (wi * tmpi)); i4_2 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[10 * l1]); wi = c_im(W[10 * l1]); tmpr = c_re(jp[10 * m]); tmpi = c_im(jp[10 * m]); r4_10 = ((wr * tmpr) - (wi * tmpi)); i4_10 = ((wi * tmpr) + (wr * tmpi)); r3_2 = (r4_2 + r4_10); i3_2 = (i4_2 + i4_10); r3_10 = (r4_2 - r4_10); i3_10 = (i4_2 - i4_10); } { REAL r4_6, i4_6; REAL r4_14, i4_14; wr = c_re(W[6 * l1]); wi = c_im(W[6 * l1]); tmpr = c_re(jp[6 * m]); tmpi = c_im(jp[6 * m]); r4_6 = ((wr * tmpr) - (wi * tmpi)); i4_6 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[14 * l1]); wi = c_im(W[14 * l1]); tmpr = c_re(jp[14 * m]); tmpi = c_im(jp[14 * m]); r4_14 = ((wr * tmpr) - (wi * tmpi)); i4_14 = ((wi * tmpr) + (wr * tmpi)); r3_6 = (r4_6 + r4_14); i3_6 = (i4_6 + i4_14); r3_14 = (r4_6 - r4_14); i3_14 = (i4_6 - i4_14); } r2_2 = (r3_2 + r3_6); i2_2 = (i3_2 + i3_6); r2_10 = (r3_2 - r3_6); i2_10 = (i3_2 - i3_6); r2_6 = (r3_10 + i3_14); i2_6 = (i3_10 - r3_14); r2_14 = (r3_10 - i3_14); i2_14 = (i3_10 + r3_14); } r1_0 = (r2_0 + r2_2); i1_0 = (i2_0 + i2_2); r1_8 = (r2_0 - r2_2); i1_8 = (i2_0 - i2_2); tmpr = (0.707106781187 * (r2_6 + i2_6)); tmpi = (0.707106781187 * (i2_6 - r2_6)); r1_2 = (r2_4 + tmpr); i1_2 = (i2_4 + tmpi); r1_10 = (r2_4 - tmpr); i1_10 = (i2_4 - tmpi); r1_4 = (r2_8 + i2_10); i1_4 = (i2_8 - r2_10); r1_12 = (r2_8 - i2_10); i1_12 = (i2_8 + r2_10); tmpr = (0.707106781187 * (i2_14 - r2_14)); tmpi = (0.707106781187 * (r2_14 + i2_14)); r1_6 = (r2_12 + tmpr); i1_6 = (i2_12 - tmpi); r1_14 = (r2_12 - tmpr); i1_14 = (i2_12 + tmpi); } { REAL r2_1, i2_1; REAL r2_3, i2_3; REAL r2_5, i2_5; REAL r2_7, i2_7; REAL r2_9, i2_9; REAL r2_11, i2_11; REAL r2_13, i2_13; REAL r2_15, i2_15; { REAL r3_1, i3_1; REAL r3_5, i3_5; REAL r3_9, i3_9; REAL r3_13, i3_13; { REAL r4_1, i4_1; REAL r4_9, i4_9; wr = c_re(W[1 * l1]); wi = c_im(W[1 * l1]); tmpr = c_re(jp[1 * m]); tmpi = c_im(jp[1 * m]); r4_1 = ((wr * tmpr) - (wi * tmpi)); i4_1 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[9 * l1]); wi = c_im(W[9 * l1]); tmpr = c_re(jp[9 * m]); tmpi = c_im(jp[9 * m]); r4_9 = ((wr * tmpr) - (wi * tmpi)); i4_9 = ((wi * tmpr) + (wr * tmpi)); r3_1 = (r4_1 + r4_9); i3_1 = (i4_1 + i4_9); r3_9 = (r4_1 - r4_9); i3_9 = (i4_1 - i4_9); } { REAL r4_5, i4_5; REAL r4_13, i4_13; wr = c_re(W[5 * l1]); wi = c_im(W[5 * l1]); tmpr = c_re(jp[5 * m]); tmpi = c_im(jp[5 * m]); r4_5 = ((wr * tmpr) - (wi * tmpi)); i4_5 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[13 * l1]); wi = c_im(W[13 * l1]); tmpr = c_re(jp[13 * m]); tmpi = c_im(jp[13 * m]); r4_13 = ((wr * tmpr) - (wi * tmpi)); i4_13 = ((wi * tmpr) + (wr * tmpi)); r3_5 = (r4_5 + r4_13); i3_5 = (i4_5 + i4_13); r3_13 = (r4_5 - r4_13); i3_13 = (i4_5 - i4_13); } r2_1 = (r3_1 + r3_5); i2_1 = (i3_1 + i3_5); r2_9 = (r3_1 - r3_5); i2_9 = (i3_1 - i3_5); r2_5 = (r3_9 + i3_13); i2_5 = (i3_9 - r3_13); r2_13 = (r3_9 - i3_13); i2_13 = (i3_9 + r3_13); } { REAL r3_3, i3_3; REAL r3_7, i3_7; REAL r3_11, i3_11; REAL r3_15, i3_15; { REAL r4_3, i4_3; REAL r4_11, i4_11; wr = c_re(W[3 * l1]); wi = c_im(W[3 * l1]); tmpr = c_re(jp[3 * m]); tmpi = c_im(jp[3 * m]); r4_3 = ((wr * tmpr) - (wi * tmpi)); i4_3 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[11 * l1]); wi = c_im(W[11 * l1]); tmpr = c_re(jp[11 * m]); tmpi = c_im(jp[11 * m]); r4_11 = ((wr * tmpr) - (wi * tmpi)); i4_11 = ((wi * tmpr) + (wr * tmpi)); r3_3 = (r4_3 + r4_11); i3_3 = (i4_3 + i4_11); r3_11 = (r4_3 - r4_11); i3_11 = (i4_3 - i4_11); } { REAL r4_7, i4_7; REAL r4_15, i4_15; wr = c_re(W[7 * l1]); wi = c_im(W[7 * l1]); tmpr = c_re(jp[7 * m]); tmpi = c_im(jp[7 * m]); r4_7 = ((wr * tmpr) - (wi * tmpi)); i4_7 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[15 * l1]); wi = c_im(W[15 * l1]); tmpr = c_re(jp[15 * m]); tmpi = c_im(jp[15 * m]); r4_15 = ((wr * tmpr) - (wi * tmpi)); i4_15 = ((wi * tmpr) + (wr * tmpi)); r3_7 = (r4_7 + r4_15); i3_7 = (i4_7 + i4_15); r3_15 = (r4_7 - r4_15); i3_15 = (i4_7 - i4_15); } r2_3 = (r3_3 + r3_7); i2_3 = (i3_3 + i3_7); r2_11 = (r3_3 - r3_7); i2_11 = (i3_3 - i3_7); r2_7 = (r3_11 + i3_15); i2_7 = (i3_11 - r3_15); r2_15 = (r3_11 - i3_15); i2_15 = (i3_11 + r3_15); } r1_1 = (r2_1 + r2_3); i1_1 = (i2_1 + i2_3); r1_9 = (r2_1 - r2_3); i1_9 = (i2_1 - i2_3); tmpr = (0.707106781187 * (r2_7 + i2_7)); tmpi = (0.707106781187 * (i2_7 - r2_7)); r1_3 = (r2_5 + tmpr); i1_3 = (i2_5 + tmpi); r1_11 = (r2_5 - tmpr); i1_11 = (i2_5 - tmpi); r1_5 = (r2_9 + i2_11); i1_5 = (i2_9 - r2_11); r1_13 = (r2_9 - i2_11); i1_13 = (i2_9 + r2_11); tmpr = (0.707106781187 * (i2_15 - r2_15)); tmpi = (0.707106781187 * (r2_15 + i2_15)); r1_7 = (r2_13 + tmpr); i1_7 = (i2_13 - tmpi); r1_15 = (r2_13 - tmpr); i1_15 = (i2_13 + tmpi); } c_re(kp[0 * m]) = (r1_0 + r1_1); c_im(kp[0 * m]) = (i1_0 + i1_1); c_re(kp[8 * m]) = (r1_0 - r1_1); c_im(kp[8 * m]) = (i1_0 - i1_1); tmpr = ((0.923879532511 * r1_3) + (0.382683432365 * i1_3)); tmpi = ((0.923879532511 * i1_3) - (0.382683432365 * r1_3)); c_re(kp[1 * m]) = (r1_2 + tmpr); c_im(kp[1 * m]) = (i1_2 + tmpi); c_re(kp[9 * m]) = (r1_2 - tmpr); c_im(kp[9 * m]) = (i1_2 - tmpi); tmpr = (0.707106781187 * (r1_5 + i1_5)); tmpi = (0.707106781187 * (i1_5 - r1_5)); c_re(kp[2 * m]) = (r1_4 + tmpr); c_im(kp[2 * m]) = (i1_4 + tmpi); c_re(kp[10 * m]) = (r1_4 - tmpr); c_im(kp[10 * m]) = (i1_4 - tmpi); tmpr = ((0.382683432365 * r1_7) + (0.923879532511 * i1_7)); tmpi = ((0.382683432365 * i1_7) - (0.923879532511 * r1_7)); c_re(kp[3 * m]) = (r1_6 + tmpr); c_im(kp[3 * m]) = (i1_6 + tmpi); c_re(kp[11 * m]) = (r1_6 - tmpr); c_im(kp[11 * m]) = (i1_6 - tmpi); c_re(kp[4 * m]) = (r1_8 + i1_9); c_im(kp[4 * m]) = (i1_8 - r1_9); c_re(kp[12 * m]) = (r1_8 - i1_9); c_im(kp[12 * m]) = (i1_8 + r1_9); tmpr = ((0.923879532511 * i1_11) - (0.382683432365 * r1_11)); tmpi = ((0.923879532511 * r1_11) + (0.382683432365 * i1_11)); c_re(kp[5 * m]) = (r1_10 + tmpr); c_im(kp[5 * m]) = (i1_10 - tmpi); c_re(kp[13 * m]) = (r1_10 - tmpr); c_im(kp[13 * m]) = (i1_10 + tmpi); tmpr = (0.707106781187 * (i1_13 - r1_13)); tmpi = (0.707106781187 * (r1_13 + i1_13)); c_re(kp[6 * m]) = (r1_12 + tmpr); c_im(kp[6 * m]) = (i1_12 - tmpi); c_re(kp[14 * m]) = (r1_12 - tmpr); c_im(kp[14 * m]) = (i1_12 + tmpi); tmpr = ((0.382683432365 * i1_15) - (0.923879532511 * r1_15)); tmpi = ((0.382683432365 * r1_15) + (0.923879532511 * i1_15)); c_re(kp[7 * m]) = (r1_14 + tmpr); c_im(kp[7 * m]) = (i1_14 - tmpi); c_re(kp[15 * m]) = (r1_14 - tmpr); c_im(kp[15 * m]) = (i1_14 + tmpi); } } } else { int ab = (a + b) / 2; fft_twiddle_16_seq(a, ab, in, out, W, nW, nWdn, m); fft_twiddle_16_seq(ab, b, in, out, W, nW, nWdn, m); } } void fft_unshuffle_16(int a, int b, COMPLEX * in, COMPLEX * out, int m) { int i; const COMPLEX *ip; COMPLEX *jp; if ((b - a) < 128) { ip = in + a * 16; for (i = a; i < b; ++i) { jp = out + i; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; } } else { int ab = (a + b) / 2; #pragma omp task untied fft_unshuffle_16(a, ab, in, out, m); #pragma omp task untied fft_unshuffle_16(ab, b, in, out, m); #pragma omp taskwait } } void fft_unshuffle_16_seq(int a, int b, COMPLEX * in, COMPLEX * out, int m) { int i; const COMPLEX *ip; COMPLEX *jp; if ((b - a) < 128) { ip = in + a * 16; for (i = a; i < b; ++i) { jp = out + i; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; } } else { int ab = (a + b) / 2; fft_unshuffle_16_seq(a, ab, in, out, m); fft_unshuffle_16_seq(ab, b, in, out, m); } } void fft_base_32(COMPLEX * in, COMPLEX * out) { REAL tmpr, tmpi; { REAL r1_0, i1_0; REAL r1_1, i1_1; REAL r1_2, i1_2; REAL r1_3, i1_3; REAL r1_4, i1_4; REAL r1_5, i1_5; REAL r1_6, i1_6; REAL r1_7, i1_7; REAL r1_8, i1_8; REAL r1_9, i1_9; REAL r1_10, i1_10; REAL r1_11, i1_11; REAL r1_12, i1_12; REAL r1_13, i1_13; REAL r1_14, i1_14; REAL r1_15, i1_15; REAL r1_16, i1_16; REAL r1_17, i1_17; REAL r1_18, i1_18; REAL r1_19, i1_19; REAL r1_20, i1_20; REAL r1_21, i1_21; REAL r1_22, i1_22; REAL r1_23, i1_23; REAL r1_24, i1_24; REAL r1_25, i1_25; REAL r1_26, i1_26; REAL r1_27, i1_27; REAL r1_28, i1_28; REAL r1_29, i1_29; REAL r1_30, i1_30; REAL r1_31, i1_31; { REAL r2_0, i2_0; REAL r2_2, i2_2; REAL r2_4, i2_4; REAL r2_6, i2_6; REAL r2_8, i2_8; REAL r2_10, i2_10; REAL r2_12, i2_12; REAL r2_14, i2_14; REAL r2_16, i2_16; REAL r2_18, i2_18; REAL r2_20, i2_20; REAL r2_22, i2_22; REAL r2_24, i2_24; REAL r2_26, i2_26; REAL r2_28, i2_28; REAL r2_30, i2_30; { REAL r3_0, i3_0; REAL r3_4, i3_4; REAL r3_8, i3_8; REAL r3_12, i3_12; REAL r3_16, i3_16; REAL r3_20, i3_20; REAL r3_24, i3_24; REAL r3_28, i3_28; { REAL r4_0, i4_0; REAL r4_8, i4_8; REAL r4_16, i4_16; REAL r4_24, i4_24; { REAL r5_0, i5_0; REAL r5_16, i5_16; r5_0 = c_re(in[0]); i5_0 = c_im(in[0]); r5_16 = c_re(in[16]); i5_16 = c_im(in[16]); r4_0 = (r5_0 + r5_16); i4_0 = (i5_0 + i5_16); r4_16 = (r5_0 - r5_16); i4_16 = (i5_0 - i5_16); } { REAL r5_8, i5_8; REAL r5_24, i5_24; r5_8 = c_re(in[8]); i5_8 = c_im(in[8]); r5_24 = c_re(in[24]); i5_24 = c_im(in[24]); r4_8 = (r5_8 + r5_24); i4_8 = (i5_8 + i5_24); r4_24 = (r5_8 - r5_24); i4_24 = (i5_8 - i5_24); } r3_0 = (r4_0 + r4_8); i3_0 = (i4_0 + i4_8); r3_16 = (r4_0 - r4_8); i3_16 = (i4_0 - i4_8); r3_8 = (r4_16 + i4_24); i3_8 = (i4_16 - r4_24); r3_24 = (r4_16 - i4_24); i3_24 = (i4_16 + r4_24); } { REAL r4_4, i4_4; REAL r4_12, i4_12; REAL r4_20, i4_20; REAL r4_28, i4_28; { REAL r5_4, i5_4; REAL r5_20, i5_20; r5_4 = c_re(in[4]); i5_4 = c_im(in[4]); r5_20 = c_re(in[20]); i5_20 = c_im(in[20]); r4_4 = (r5_4 + r5_20); i4_4 = (i5_4 + i5_20); r4_20 = (r5_4 - r5_20); i4_20 = (i5_4 - i5_20); } { REAL r5_12, i5_12; REAL r5_28, i5_28; r5_12 = c_re(in[12]); i5_12 = c_im(in[12]); r5_28 = c_re(in[28]); i5_28 = c_im(in[28]); r4_12 = (r5_12 + r5_28); i4_12 = (i5_12 + i5_28); r4_28 = (r5_12 - r5_28); i4_28 = (i5_12 - i5_28); } r3_4 = (r4_4 + r4_12); i3_4 = (i4_4 + i4_12); r3_20 = (r4_4 - r4_12); i3_20 = (i4_4 - i4_12); r3_12 = (r4_20 + i4_28); i3_12 = (i4_20 - r4_28); r3_28 = (r4_20 - i4_28); i3_28 = (i4_20 + r4_28); } r2_0 = (r3_0 + r3_4); i2_0 = (i3_0 + i3_4); r2_16 = (r3_0 - r3_4); i2_16 = (i3_0 - i3_4); tmpr = (0.707106781187 * (r3_12 + i3_12)); tmpi = (0.707106781187 * (i3_12 - r3_12)); r2_4 = (r3_8 + tmpr); i2_4 = (i3_8 + tmpi); r2_20 = (r3_8 - tmpr); i2_20 = (i3_8 - tmpi); r2_8 = (r3_16 + i3_20); i2_8 = (i3_16 - r3_20); r2_24 = (r3_16 - i3_20); i2_24 = (i3_16 + r3_20); tmpr = (0.707106781187 * (i3_28 - r3_28)); tmpi = (0.707106781187 * (r3_28 + i3_28)); r2_12 = (r3_24 + tmpr); i2_12 = (i3_24 - tmpi); r2_28 = (r3_24 - tmpr); i2_28 = (i3_24 + tmpi); } { REAL r3_2, i3_2; REAL r3_6, i3_6; REAL r3_10, i3_10; REAL r3_14, i3_14; REAL r3_18, i3_18; REAL r3_22, i3_22; REAL r3_26, i3_26; REAL r3_30, i3_30; { REAL r4_2, i4_2; REAL r4_10, i4_10; REAL r4_18, i4_18; REAL r4_26, i4_26; { REAL r5_2, i5_2; REAL r5_18, i5_18; r5_2 = c_re(in[2]); i5_2 = c_im(in[2]); r5_18 = c_re(in[18]); i5_18 = c_im(in[18]); r4_2 = (r5_2 + r5_18); i4_2 = (i5_2 + i5_18); r4_18 = (r5_2 - r5_18); i4_18 = (i5_2 - i5_18); } { REAL r5_10, i5_10; REAL r5_26, i5_26; r5_10 = c_re(in[10]); i5_10 = c_im(in[10]); r5_26 = c_re(in[26]); i5_26 = c_im(in[26]); r4_10 = (r5_10 + r5_26); i4_10 = (i5_10 + i5_26); r4_26 = (r5_10 - r5_26); i4_26 = (i5_10 - i5_26); } r3_2 = (r4_2 + r4_10); i3_2 = (i4_2 + i4_10); r3_18 = (r4_2 - r4_10); i3_18 = (i4_2 - i4_10); r3_10 = (r4_18 + i4_26); i3_10 = (i4_18 - r4_26); r3_26 = (r4_18 - i4_26); i3_26 = (i4_18 + r4_26); } { REAL r4_6, i4_6; REAL r4_14, i4_14; REAL r4_22, i4_22; REAL r4_30, i4_30; { REAL r5_6, i5_6; REAL r5_22, i5_22; r5_6 = c_re(in[6]); i5_6 = c_im(in[6]); r5_22 = c_re(in[22]); i5_22 = c_im(in[22]); r4_6 = (r5_6 + r5_22); i4_6 = (i5_6 + i5_22); r4_22 = (r5_6 - r5_22); i4_22 = (i5_6 - i5_22); } { REAL r5_14, i5_14; REAL r5_30, i5_30; r5_14 = c_re(in[14]); i5_14 = c_im(in[14]); r5_30 = c_re(in[30]); i5_30 = c_im(in[30]); r4_14 = (r5_14 + r5_30); i4_14 = (i5_14 + i5_30); r4_30 = (r5_14 - r5_30); i4_30 = (i5_14 - i5_30); } r3_6 = (r4_6 + r4_14); i3_6 = (i4_6 + i4_14); r3_22 = (r4_6 - r4_14); i3_22 = (i4_6 - i4_14); r3_14 = (r4_22 + i4_30); i3_14 = (i4_22 - r4_30); r3_30 = (r4_22 - i4_30); i3_30 = (i4_22 + r4_30); } r2_2 = (r3_2 + r3_6); i2_2 = (i3_2 + i3_6); r2_18 = (r3_2 - r3_6); i2_18 = (i3_2 - i3_6); tmpr = (0.707106781187 * (r3_14 + i3_14)); tmpi = (0.707106781187 * (i3_14 - r3_14)); r2_6 = (r3_10 + tmpr); i2_6 = (i3_10 + tmpi); r2_22 = (r3_10 - tmpr); i2_22 = (i3_10 - tmpi); r2_10 = (r3_18 + i3_22); i2_10 = (i3_18 - r3_22); r2_26 = (r3_18 - i3_22); i2_26 = (i3_18 + r3_22); tmpr = (0.707106781187 * (i3_30 - r3_30)); tmpi = (0.707106781187 * (r3_30 + i3_30)); r2_14 = (r3_26 + tmpr); i2_14 = (i3_26 - tmpi); r2_30 = (r3_26 - tmpr); i2_30 = (i3_26 + tmpi); } r1_0 = (r2_0 + r2_2); i1_0 = (i2_0 + i2_2); r1_16 = (r2_0 - r2_2); i1_16 = (i2_0 - i2_2); tmpr = ((0.923879532511 * r2_6) + (0.382683432365 * i2_6)); tmpi = ((0.923879532511 * i2_6) - (0.382683432365 * r2_6)); r1_2 = (r2_4 + tmpr); i1_2 = (i2_4 + tmpi); r1_18 = (r2_4 - tmpr); i1_18 = (i2_4 - tmpi); tmpr = (0.707106781187 * (r2_10 + i2_10)); tmpi = (0.707106781187 * (i2_10 - r2_10)); r1_4 = (r2_8 + tmpr); i1_4 = (i2_8 + tmpi); r1_20 = (r2_8 - tmpr); i1_20 = (i2_8 - tmpi); tmpr = ((0.382683432365 * r2_14) + (0.923879532511 * i2_14)); tmpi = ((0.382683432365 * i2_14) - (0.923879532511 * r2_14)); r1_6 = (r2_12 + tmpr); i1_6 = (i2_12 + tmpi); r1_22 = (r2_12 - tmpr); i1_22 = (i2_12 - tmpi); r1_8 = (r2_16 + i2_18); i1_8 = (i2_16 - r2_18); r1_24 = (r2_16 - i2_18); i1_24 = (i2_16 + r2_18); tmpr = ((0.923879532511 * i2_22) - (0.382683432365 * r2_22)); tmpi = ((0.923879532511 * r2_22) + (0.382683432365 * i2_22)); r1_10 = (r2_20 + tmpr); i1_10 = (i2_20 - tmpi); r1_26 = (r2_20 - tmpr); i1_26 = (i2_20 + tmpi); tmpr = (0.707106781187 * (i2_26 - r2_26)); tmpi = (0.707106781187 * (r2_26 + i2_26)); r1_12 = (r2_24 + tmpr); i1_12 = (i2_24 - tmpi); r1_28 = (r2_24 - tmpr); i1_28 = (i2_24 + tmpi); tmpr = ((0.382683432365 * i2_30) - (0.923879532511 * r2_30)); tmpi = ((0.382683432365 * r2_30) + (0.923879532511 * i2_30)); r1_14 = (r2_28 + tmpr); i1_14 = (i2_28 - tmpi); r1_30 = (r2_28 - tmpr); i1_30 = (i2_28 + tmpi); } { REAL r2_1, i2_1; REAL r2_3, i2_3; REAL r2_5, i2_5; REAL r2_7, i2_7; REAL r2_9, i2_9; REAL r2_11, i2_11; REAL r2_13, i2_13; REAL r2_15, i2_15; REAL r2_17, i2_17; REAL r2_19, i2_19; REAL r2_21, i2_21; REAL r2_23, i2_23; REAL r2_25, i2_25; REAL r2_27, i2_27; REAL r2_29, i2_29; REAL r2_31, i2_31; { REAL r3_1, i3_1; REAL r3_5, i3_5; REAL r3_9, i3_9; REAL r3_13, i3_13; REAL r3_17, i3_17; REAL r3_21, i3_21; REAL r3_25, i3_25; REAL r3_29, i3_29; { REAL r4_1, i4_1; REAL r4_9, i4_9; REAL r4_17, i4_17; REAL r4_25, i4_25; { REAL r5_1, i5_1; REAL r5_17, i5_17; r5_1 = c_re(in[1]); i5_1 = c_im(in[1]); r5_17 = c_re(in[17]); i5_17 = c_im(in[17]); r4_1 = (r5_1 + r5_17); i4_1 = (i5_1 + i5_17); r4_17 = (r5_1 - r5_17); i4_17 = (i5_1 - i5_17); } { REAL r5_9, i5_9; REAL r5_25, i5_25; r5_9 = c_re(in[9]); i5_9 = c_im(in[9]); r5_25 = c_re(in[25]); i5_25 = c_im(in[25]); r4_9 = (r5_9 + r5_25); i4_9 = (i5_9 + i5_25); r4_25 = (r5_9 - r5_25); i4_25 = (i5_9 - i5_25); } r3_1 = (r4_1 + r4_9); i3_1 = (i4_1 + i4_9); r3_17 = (r4_1 - r4_9); i3_17 = (i4_1 - i4_9); r3_9 = (r4_17 + i4_25); i3_9 = (i4_17 - r4_25); r3_25 = (r4_17 - i4_25); i3_25 = (i4_17 + r4_25); } { REAL r4_5, i4_5; REAL r4_13, i4_13; REAL r4_21, i4_21; REAL r4_29, i4_29; { REAL r5_5, i5_5; REAL r5_21, i5_21; r5_5 = c_re(in[5]); i5_5 = c_im(in[5]); r5_21 = c_re(in[21]); i5_21 = c_im(in[21]); r4_5 = (r5_5 + r5_21); i4_5 = (i5_5 + i5_21); r4_21 = (r5_5 - r5_21); i4_21 = (i5_5 - i5_21); } { REAL r5_13, i5_13; REAL r5_29, i5_29; r5_13 = c_re(in[13]); i5_13 = c_im(in[13]); r5_29 = c_re(in[29]); i5_29 = c_im(in[29]); r4_13 = (r5_13 + r5_29); i4_13 = (i5_13 + i5_29); r4_29 = (r5_13 - r5_29); i4_29 = (i5_13 - i5_29); } r3_5 = (r4_5 + r4_13); i3_5 = (i4_5 + i4_13); r3_21 = (r4_5 - r4_13); i3_21 = (i4_5 - i4_13); r3_13 = (r4_21 + i4_29); i3_13 = (i4_21 - r4_29); r3_29 = (r4_21 - i4_29); i3_29 = (i4_21 + r4_29); } r2_1 = (r3_1 + r3_5); i2_1 = (i3_1 + i3_5); r2_17 = (r3_1 - r3_5); i2_17 = (i3_1 - i3_5); tmpr = (0.707106781187 * (r3_13 + i3_13)); tmpi = (0.707106781187 * (i3_13 - r3_13)); r2_5 = (r3_9 + tmpr); i2_5 = (i3_9 + tmpi); r2_21 = (r3_9 - tmpr); i2_21 = (i3_9 - tmpi); r2_9 = (r3_17 + i3_21); i2_9 = (i3_17 - r3_21); r2_25 = (r3_17 - i3_21); i2_25 = (i3_17 + r3_21); tmpr = (0.707106781187 * (i3_29 - r3_29)); tmpi = (0.707106781187 * (r3_29 + i3_29)); r2_13 = (r3_25 + tmpr); i2_13 = (i3_25 - tmpi); r2_29 = (r3_25 - tmpr); i2_29 = (i3_25 + tmpi); } { REAL r3_3, i3_3; REAL r3_7, i3_7; REAL r3_11, i3_11; REAL r3_15, i3_15; REAL r3_19, i3_19; REAL r3_23, i3_23; REAL r3_27, i3_27; REAL r3_31, i3_31; { REAL r4_3, i4_3; REAL r4_11, i4_11; REAL r4_19, i4_19; REAL r4_27, i4_27; { REAL r5_3, i5_3; REAL r5_19, i5_19; r5_3 = c_re(in[3]); i5_3 = c_im(in[3]); r5_19 = c_re(in[19]); i5_19 = c_im(in[19]); r4_3 = (r5_3 + r5_19); i4_3 = (i5_3 + i5_19); r4_19 = (r5_3 - r5_19); i4_19 = (i5_3 - i5_19); } { REAL r5_11, i5_11; REAL r5_27, i5_27; r5_11 = c_re(in[11]); i5_11 = c_im(in[11]); r5_27 = c_re(in[27]); i5_27 = c_im(in[27]); r4_11 = (r5_11 + r5_27); i4_11 = (i5_11 + i5_27); r4_27 = (r5_11 - r5_27); i4_27 = (i5_11 - i5_27); } r3_3 = (r4_3 + r4_11); i3_3 = (i4_3 + i4_11); r3_19 = (r4_3 - r4_11); i3_19 = (i4_3 - i4_11); r3_11 = (r4_19 + i4_27); i3_11 = (i4_19 - r4_27); r3_27 = (r4_19 - i4_27); i3_27 = (i4_19 + r4_27); } { REAL r4_7, i4_7; REAL r4_15, i4_15; REAL r4_23, i4_23; REAL r4_31, i4_31; { REAL r5_7, i5_7; REAL r5_23, i5_23; r5_7 = c_re(in[7]); i5_7 = c_im(in[7]); r5_23 = c_re(in[23]); i5_23 = c_im(in[23]); r4_7 = (r5_7 + r5_23); i4_7 = (i5_7 + i5_23); r4_23 = (r5_7 - r5_23); i4_23 = (i5_7 - i5_23); } { REAL r5_15, i5_15; REAL r5_31, i5_31; r5_15 = c_re(in[15]); i5_15 = c_im(in[15]); r5_31 = c_re(in[31]); i5_31 = c_im(in[31]); r4_15 = (r5_15 + r5_31); i4_15 = (i5_15 + i5_31); r4_31 = (r5_15 - r5_31); i4_31 = (i5_15 - i5_31); } r3_7 = (r4_7 + r4_15); i3_7 = (i4_7 + i4_15); r3_23 = (r4_7 - r4_15); i3_23 = (i4_7 - i4_15); r3_15 = (r4_23 + i4_31); i3_15 = (i4_23 - r4_31); r3_31 = (r4_23 - i4_31); i3_31 = (i4_23 + r4_31); } r2_3 = (r3_3 + r3_7); i2_3 = (i3_3 + i3_7); r2_19 = (r3_3 - r3_7); i2_19 = (i3_3 - i3_7); tmpr = (0.707106781187 * (r3_15 + i3_15)); tmpi = (0.707106781187 * (i3_15 - r3_15)); r2_7 = (r3_11 + tmpr); i2_7 = (i3_11 + tmpi); r2_23 = (r3_11 - tmpr); i2_23 = (i3_11 - tmpi); r2_11 = (r3_19 + i3_23); i2_11 = (i3_19 - r3_23); r2_27 = (r3_19 - i3_23); i2_27 = (i3_19 + r3_23); tmpr = (0.707106781187 * (i3_31 - r3_31)); tmpi = (0.707106781187 * (r3_31 + i3_31)); r2_15 = (r3_27 + tmpr); i2_15 = (i3_27 - tmpi); r2_31 = (r3_27 - tmpr); i2_31 = (i3_27 + tmpi); } r1_1 = (r2_1 + r2_3); i1_1 = (i2_1 + i2_3); r1_17 = (r2_1 - r2_3); i1_17 = (i2_1 - i2_3); tmpr = ((0.923879532511 * r2_7) + (0.382683432365 * i2_7)); tmpi = ((0.923879532511 * i2_7) - (0.382683432365 * r2_7)); r1_3 = (r2_5 + tmpr); i1_3 = (i2_5 + tmpi); r1_19 = (r2_5 - tmpr); i1_19 = (i2_5 - tmpi); tmpr = (0.707106781187 * (r2_11 + i2_11)); tmpi = (0.707106781187 * (i2_11 - r2_11)); r1_5 = (r2_9 + tmpr); i1_5 = (i2_9 + tmpi); r1_21 = (r2_9 - tmpr); i1_21 = (i2_9 - tmpi); tmpr = ((0.382683432365 * r2_15) + (0.923879532511 * i2_15)); tmpi = ((0.382683432365 * i2_15) - (0.923879532511 * r2_15)); r1_7 = (r2_13 + tmpr); i1_7 = (i2_13 + tmpi); r1_23 = (r2_13 - tmpr); i1_23 = (i2_13 - tmpi); r1_9 = (r2_17 + i2_19); i1_9 = (i2_17 - r2_19); r1_25 = (r2_17 - i2_19); i1_25 = (i2_17 + r2_19); tmpr = ((0.923879532511 * i2_23) - (0.382683432365 * r2_23)); tmpi = ((0.923879532511 * r2_23) + (0.382683432365 * i2_23)); r1_11 = (r2_21 + tmpr); i1_11 = (i2_21 - tmpi); r1_27 = (r2_21 - tmpr); i1_27 = (i2_21 + tmpi); tmpr = (0.707106781187 * (i2_27 - r2_27)); tmpi = (0.707106781187 * (r2_27 + i2_27)); r1_13 = (r2_25 + tmpr); i1_13 = (i2_25 - tmpi); r1_29 = (r2_25 - tmpr); i1_29 = (i2_25 + tmpi); tmpr = ((0.382683432365 * i2_31) - (0.923879532511 * r2_31)); tmpi = ((0.382683432365 * r2_31) + (0.923879532511 * i2_31)); r1_15 = (r2_29 + tmpr); i1_15 = (i2_29 - tmpi); r1_31 = (r2_29 - tmpr); i1_31 = (i2_29 + tmpi); } c_re(out[0]) = (r1_0 + r1_1); c_im(out[0]) = (i1_0 + i1_1); c_re(out[16]) = (r1_0 - r1_1); c_im(out[16]) = (i1_0 - i1_1); tmpr = ((0.980785280403 * r1_3) + (0.195090322016 * i1_3)); tmpi = ((0.980785280403 * i1_3) - (0.195090322016 * r1_3)); c_re(out[1]) = (r1_2 + tmpr); c_im(out[1]) = (i1_2 + tmpi); c_re(out[17]) = (r1_2 - tmpr); c_im(out[17]) = (i1_2 - tmpi); tmpr = ((0.923879532511 * r1_5) + (0.382683432365 * i1_5)); tmpi = ((0.923879532511 * i1_5) - (0.382683432365 * r1_5)); c_re(out[2]) = (r1_4 + tmpr); c_im(out[2]) = (i1_4 + tmpi); c_re(out[18]) = (r1_4 - tmpr); c_im(out[18]) = (i1_4 - tmpi); tmpr = ((0.831469612303 * r1_7) + (0.55557023302 * i1_7)); tmpi = ((0.831469612303 * i1_7) - (0.55557023302 * r1_7)); c_re(out[3]) = (r1_6 + tmpr); c_im(out[3]) = (i1_6 + tmpi); c_re(out[19]) = (r1_6 - tmpr); c_im(out[19]) = (i1_6 - tmpi); tmpr = (0.707106781187 * (r1_9 + i1_9)); tmpi = (0.707106781187 * (i1_9 - r1_9)); c_re(out[4]) = (r1_8 + tmpr); c_im(out[4]) = (i1_8 + tmpi); c_re(out[20]) = (r1_8 - tmpr); c_im(out[20]) = (i1_8 - tmpi); tmpr = ((0.55557023302 * r1_11) + (0.831469612303 * i1_11)); tmpi = ((0.55557023302 * i1_11) - (0.831469612303 * r1_11)); c_re(out[5]) = (r1_10 + tmpr); c_im(out[5]) = (i1_10 + tmpi); c_re(out[21]) = (r1_10 - tmpr); c_im(out[21]) = (i1_10 - tmpi); tmpr = ((0.382683432365 * r1_13) + (0.923879532511 * i1_13)); tmpi = ((0.382683432365 * i1_13) - (0.923879532511 * r1_13)); c_re(out[6]) = (r1_12 + tmpr); c_im(out[6]) = (i1_12 + tmpi); c_re(out[22]) = (r1_12 - tmpr); c_im(out[22]) = (i1_12 - tmpi); tmpr = ((0.195090322016 * r1_15) + (0.980785280403 * i1_15)); tmpi = ((0.195090322016 * i1_15) - (0.980785280403 * r1_15)); c_re(out[7]) = (r1_14 + tmpr); c_im(out[7]) = (i1_14 + tmpi); c_re(out[23]) = (r1_14 - tmpr); c_im(out[23]) = (i1_14 - tmpi); c_re(out[8]) = (r1_16 + i1_17); c_im(out[8]) = (i1_16 - r1_17); c_re(out[24]) = (r1_16 - i1_17); c_im(out[24]) = (i1_16 + r1_17); tmpr = ((0.980785280403 * i1_19) - (0.195090322016 * r1_19)); tmpi = ((0.980785280403 * r1_19) + (0.195090322016 * i1_19)); c_re(out[9]) = (r1_18 + tmpr); c_im(out[9]) = (i1_18 - tmpi); c_re(out[25]) = (r1_18 - tmpr); c_im(out[25]) = (i1_18 + tmpi); tmpr = ((0.923879532511 * i1_21) - (0.382683432365 * r1_21)); tmpi = ((0.923879532511 * r1_21) + (0.382683432365 * i1_21)); c_re(out[10]) = (r1_20 + tmpr); c_im(out[10]) = (i1_20 - tmpi); c_re(out[26]) = (r1_20 - tmpr); c_im(out[26]) = (i1_20 + tmpi); tmpr = ((0.831469612303 * i1_23) - (0.55557023302 * r1_23)); tmpi = ((0.831469612303 * r1_23) + (0.55557023302 * i1_23)); c_re(out[11]) = (r1_22 + tmpr); c_im(out[11]) = (i1_22 - tmpi); c_re(out[27]) = (r1_22 - tmpr); c_im(out[27]) = (i1_22 + tmpi); tmpr = (0.707106781187 * (i1_25 - r1_25)); tmpi = (0.707106781187 * (r1_25 + i1_25)); c_re(out[12]) = (r1_24 + tmpr); c_im(out[12]) = (i1_24 - tmpi); c_re(out[28]) = (r1_24 - tmpr); c_im(out[28]) = (i1_24 + tmpi); tmpr = ((0.55557023302 * i1_27) - (0.831469612303 * r1_27)); tmpi = ((0.55557023302 * r1_27) + (0.831469612303 * i1_27)); c_re(out[13]) = (r1_26 + tmpr); c_im(out[13]) = (i1_26 - tmpi); c_re(out[29]) = (r1_26 - tmpr); c_im(out[29]) = (i1_26 + tmpi); tmpr = ((0.382683432365 * i1_29) - (0.923879532511 * r1_29)); tmpi = ((0.382683432365 * r1_29) + (0.923879532511 * i1_29)); c_re(out[14]) = (r1_28 + tmpr); c_im(out[14]) = (i1_28 - tmpi); c_re(out[30]) = (r1_28 - tmpr); c_im(out[30]) = (i1_28 + tmpi); tmpr = ((0.195090322016 * i1_31) - (0.980785280403 * r1_31)); tmpi = ((0.195090322016 * r1_31) + (0.980785280403 * i1_31)); c_re(out[15]) = (r1_30 + tmpr); c_im(out[15]) = (i1_30 - tmpi); c_re(out[31]) = (r1_30 - tmpr); c_im(out[31]) = (i1_30 + tmpi); } } void fft_twiddle_32(int a, int b, COMPLEX * in, COMPLEX * out, COMPLEX * W, int nW, int nWdn, int m) { int l1, i; COMPLEX *jp, *kp; REAL tmpr, tmpi, wr, wi; if ((b - a) < 128) { for (i = a, l1 = nWdn * i, kp = out + i; i < b; i++, l1 += nWdn, kp++) { jp = in + i; { REAL r1_0, i1_0; REAL r1_1, i1_1; REAL r1_2, i1_2; REAL r1_3, i1_3; REAL r1_4, i1_4; REAL r1_5, i1_5; REAL r1_6, i1_6; REAL r1_7, i1_7; REAL r1_8, i1_8; REAL r1_9, i1_9; REAL r1_10, i1_10; REAL r1_11, i1_11; REAL r1_12, i1_12; REAL r1_13, i1_13; REAL r1_14, i1_14; REAL r1_15, i1_15; REAL r1_16, i1_16; REAL r1_17, i1_17; REAL r1_18, i1_18; REAL r1_19, i1_19; REAL r1_20, i1_20; REAL r1_21, i1_21; REAL r1_22, i1_22; REAL r1_23, i1_23; REAL r1_24, i1_24; REAL r1_25, i1_25; REAL r1_26, i1_26; REAL r1_27, i1_27; REAL r1_28, i1_28; REAL r1_29, i1_29; REAL r1_30, i1_30; REAL r1_31, i1_31; { REAL r2_0, i2_0; REAL r2_2, i2_2; REAL r2_4, i2_4; REAL r2_6, i2_6; REAL r2_8, i2_8; REAL r2_10, i2_10; REAL r2_12, i2_12; REAL r2_14, i2_14; REAL r2_16, i2_16; REAL r2_18, i2_18; REAL r2_20, i2_20; REAL r2_22, i2_22; REAL r2_24, i2_24; REAL r2_26, i2_26; REAL r2_28, i2_28; REAL r2_30, i2_30; { REAL r3_0, i3_0; REAL r3_4, i3_4; REAL r3_8, i3_8; REAL r3_12, i3_12; REAL r3_16, i3_16; REAL r3_20, i3_20; REAL r3_24, i3_24; REAL r3_28, i3_28; { REAL r4_0, i4_0; REAL r4_8, i4_8; REAL r4_16, i4_16; REAL r4_24, i4_24; { REAL r5_0, i5_0; REAL r5_16, i5_16; r5_0 = c_re(jp[0 * m]); i5_0 = c_im(jp[0 * m]); wr = c_re(W[16 * l1]); wi = c_im(W[16 * l1]); tmpr = c_re(jp[16 * m]); tmpi = c_im(jp[16 * m]); r5_16 = ((wr * tmpr) - (wi * tmpi)); i5_16 = ((wi * tmpr) + (wr * tmpi)); r4_0 = (r5_0 + r5_16); i4_0 = (i5_0 + i5_16); r4_16 = (r5_0 - r5_16); i4_16 = (i5_0 - i5_16); } { REAL r5_8, i5_8; REAL r5_24, i5_24; wr = c_re(W[8 * l1]); wi = c_im(W[8 * l1]); tmpr = c_re(jp[8 * m]); tmpi = c_im(jp[8 * m]); r5_8 = ((wr * tmpr) - (wi * tmpi)); i5_8 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[24 * l1]); wi = c_im(W[24 * l1]); tmpr = c_re(jp[24 * m]); tmpi = c_im(jp[24 * m]); r5_24 = ((wr * tmpr) - (wi * tmpi)); i5_24 = ((wi * tmpr) + (wr * tmpi)); r4_8 = (r5_8 + r5_24); i4_8 = (i5_8 + i5_24); r4_24 = (r5_8 - r5_24); i4_24 = (i5_8 - i5_24); } r3_0 = (r4_0 + r4_8); i3_0 = (i4_0 + i4_8); r3_16 = (r4_0 - r4_8); i3_16 = (i4_0 - i4_8); r3_8 = (r4_16 + i4_24); i3_8 = (i4_16 - r4_24); r3_24 = (r4_16 - i4_24); i3_24 = (i4_16 + r4_24); } { REAL r4_4, i4_4; REAL r4_12, i4_12; REAL r4_20, i4_20; REAL r4_28, i4_28; { REAL r5_4, i5_4; REAL r5_20, i5_20; wr = c_re(W[4 * l1]); wi = c_im(W[4 * l1]); tmpr = c_re(jp[4 * m]); tmpi = c_im(jp[4 * m]); r5_4 = ((wr * tmpr) - (wi * tmpi)); i5_4 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[20 * l1]); wi = c_im(W[20 * l1]); tmpr = c_re(jp[20 * m]); tmpi = c_im(jp[20 * m]); r5_20 = ((wr * tmpr) - (wi * tmpi)); i5_20 = ((wi * tmpr) + (wr * tmpi)); r4_4 = (r5_4 + r5_20); i4_4 = (i5_4 + i5_20); r4_20 = (r5_4 - r5_20); i4_20 = (i5_4 - i5_20); } { REAL r5_12, i5_12; REAL r5_28, i5_28; wr = c_re(W[12 * l1]); wi = c_im(W[12 * l1]); tmpr = c_re(jp[12 * m]); tmpi = c_im(jp[12 * m]); r5_12 = ((wr * tmpr) - (wi * tmpi)); i5_12 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[28 * l1]); wi = c_im(W[28 * l1]); tmpr = c_re(jp[28 * m]); tmpi = c_im(jp[28 * m]); r5_28 = ((wr * tmpr) - (wi * tmpi)); i5_28 = ((wi * tmpr) + (wr * tmpi)); r4_12 = (r5_12 + r5_28); i4_12 = (i5_12 + i5_28); r4_28 = (r5_12 - r5_28); i4_28 = (i5_12 - i5_28); } r3_4 = (r4_4 + r4_12); i3_4 = (i4_4 + i4_12); r3_20 = (r4_4 - r4_12); i3_20 = (i4_4 - i4_12); r3_12 = (r4_20 + i4_28); i3_12 = (i4_20 - r4_28); r3_28 = (r4_20 - i4_28); i3_28 = (i4_20 + r4_28); } r2_0 = (r3_0 + r3_4); i2_0 = (i3_0 + i3_4); r2_16 = (r3_0 - r3_4); i2_16 = (i3_0 - i3_4); tmpr = (0.707106781187 * (r3_12 + i3_12)); tmpi = (0.707106781187 * (i3_12 - r3_12)); r2_4 = (r3_8 + tmpr); i2_4 = (i3_8 + tmpi); r2_20 = (r3_8 - tmpr); i2_20 = (i3_8 - tmpi); r2_8 = (r3_16 + i3_20); i2_8 = (i3_16 - r3_20); r2_24 = (r3_16 - i3_20); i2_24 = (i3_16 + r3_20); tmpr = (0.707106781187 * (i3_28 - r3_28)); tmpi = (0.707106781187 * (r3_28 + i3_28)); r2_12 = (r3_24 + tmpr); i2_12 = (i3_24 - tmpi); r2_28 = (r3_24 - tmpr); i2_28 = (i3_24 + tmpi); } { REAL r3_2, i3_2; REAL r3_6, i3_6; REAL r3_10, i3_10; REAL r3_14, i3_14; REAL r3_18, i3_18; REAL r3_22, i3_22; REAL r3_26, i3_26; REAL r3_30, i3_30; { REAL r4_2, i4_2; REAL r4_10, i4_10; REAL r4_18, i4_18; REAL r4_26, i4_26; { REAL r5_2, i5_2; REAL r5_18, i5_18; wr = c_re(W[2 * l1]); wi = c_im(W[2 * l1]); tmpr = c_re(jp[2 * m]); tmpi = c_im(jp[2 * m]); r5_2 = ((wr * tmpr) - (wi * tmpi)); i5_2 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[18 * l1]); wi = c_im(W[18 * l1]); tmpr = c_re(jp[18 * m]); tmpi = c_im(jp[18 * m]); r5_18 = ((wr * tmpr) - (wi * tmpi)); i5_18 = ((wi * tmpr) + (wr * tmpi)); r4_2 = (r5_2 + r5_18); i4_2 = (i5_2 + i5_18); r4_18 = (r5_2 - r5_18); i4_18 = (i5_2 - i5_18); } { REAL r5_10, i5_10; REAL r5_26, i5_26; wr = c_re(W[10 * l1]); wi = c_im(W[10 * l1]); tmpr = c_re(jp[10 * m]); tmpi = c_im(jp[10 * m]); r5_10 = ((wr * tmpr) - (wi * tmpi)); i5_10 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[26 * l1]); wi = c_im(W[26 * l1]); tmpr = c_re(jp[26 * m]); tmpi = c_im(jp[26 * m]); r5_26 = ((wr * tmpr) - (wi * tmpi)); i5_26 = ((wi * tmpr) + (wr * tmpi)); r4_10 = (r5_10 + r5_26); i4_10 = (i5_10 + i5_26); r4_26 = (r5_10 - r5_26); i4_26 = (i5_10 - i5_26); } r3_2 = (r4_2 + r4_10); i3_2 = (i4_2 + i4_10); r3_18 = (r4_2 - r4_10); i3_18 = (i4_2 - i4_10); r3_10 = (r4_18 + i4_26); i3_10 = (i4_18 - r4_26); r3_26 = (r4_18 - i4_26); i3_26 = (i4_18 + r4_26); } { REAL r4_6, i4_6; REAL r4_14, i4_14; REAL r4_22, i4_22; REAL r4_30, i4_30; { REAL r5_6, i5_6; REAL r5_22, i5_22; wr = c_re(W[6 * l1]); wi = c_im(W[6 * l1]); tmpr = c_re(jp[6 * m]); tmpi = c_im(jp[6 * m]); r5_6 = ((wr * tmpr) - (wi * tmpi)); i5_6 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[22 * l1]); wi = c_im(W[22 * l1]); tmpr = c_re(jp[22 * m]); tmpi = c_im(jp[22 * m]); r5_22 = ((wr * tmpr) - (wi * tmpi)); i5_22 = ((wi * tmpr) + (wr * tmpi)); r4_6 = (r5_6 + r5_22); i4_6 = (i5_6 + i5_22); r4_22 = (r5_6 - r5_22); i4_22 = (i5_6 - i5_22); } { REAL r5_14, i5_14; REAL r5_30, i5_30; wr = c_re(W[14 * l1]); wi = c_im(W[14 * l1]); tmpr = c_re(jp[14 * m]); tmpi = c_im(jp[14 * m]); r5_14 = ((wr * tmpr) - (wi * tmpi)); i5_14 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[30 * l1]); wi = c_im(W[30 * l1]); tmpr = c_re(jp[30 * m]); tmpi = c_im(jp[30 * m]); r5_30 = ((wr * tmpr) - (wi * tmpi)); i5_30 = ((wi * tmpr) + (wr * tmpi)); r4_14 = (r5_14 + r5_30); i4_14 = (i5_14 + i5_30); r4_30 = (r5_14 - r5_30); i4_30 = (i5_14 - i5_30); } r3_6 = (r4_6 + r4_14); i3_6 = (i4_6 + i4_14); r3_22 = (r4_6 - r4_14); i3_22 = (i4_6 - i4_14); r3_14 = (r4_22 + i4_30); i3_14 = (i4_22 - r4_30); r3_30 = (r4_22 - i4_30); i3_30 = (i4_22 + r4_30); } r2_2 = (r3_2 + r3_6); i2_2 = (i3_2 + i3_6); r2_18 = (r3_2 - r3_6); i2_18 = (i3_2 - i3_6); tmpr = (0.707106781187 * (r3_14 + i3_14)); tmpi = (0.707106781187 * (i3_14 - r3_14)); r2_6 = (r3_10 + tmpr); i2_6 = (i3_10 + tmpi); r2_22 = (r3_10 - tmpr); i2_22 = (i3_10 - tmpi); r2_10 = (r3_18 + i3_22); i2_10 = (i3_18 - r3_22); r2_26 = (r3_18 - i3_22); i2_26 = (i3_18 + r3_22); tmpr = (0.707106781187 * (i3_30 - r3_30)); tmpi = (0.707106781187 * (r3_30 + i3_30)); r2_14 = (r3_26 + tmpr); i2_14 = (i3_26 - tmpi); r2_30 = (r3_26 - tmpr); i2_30 = (i3_26 + tmpi); } r1_0 = (r2_0 + r2_2); i1_0 = (i2_0 + i2_2); r1_16 = (r2_0 - r2_2); i1_16 = (i2_0 - i2_2); tmpr = ((0.923879532511 * r2_6) + (0.382683432365 * i2_6)); tmpi = ((0.923879532511 * i2_6) - (0.382683432365 * r2_6)); r1_2 = (r2_4 + tmpr); i1_2 = (i2_4 + tmpi); r1_18 = (r2_4 - tmpr); i1_18 = (i2_4 - tmpi); tmpr = (0.707106781187 * (r2_10 + i2_10)); tmpi = (0.707106781187 * (i2_10 - r2_10)); r1_4 = (r2_8 + tmpr); i1_4 = (i2_8 + tmpi); r1_20 = (r2_8 - tmpr); i1_20 = (i2_8 - tmpi); tmpr = ((0.382683432365 * r2_14) + (0.923879532511 * i2_14)); tmpi = ((0.382683432365 * i2_14) - (0.923879532511 * r2_14)); r1_6 = (r2_12 + tmpr); i1_6 = (i2_12 + tmpi); r1_22 = (r2_12 - tmpr); i1_22 = (i2_12 - tmpi); r1_8 = (r2_16 + i2_18); i1_8 = (i2_16 - r2_18); r1_24 = (r2_16 - i2_18); i1_24 = (i2_16 + r2_18); tmpr = ((0.923879532511 * i2_22) - (0.382683432365 * r2_22)); tmpi = ((0.923879532511 * r2_22) + (0.382683432365 * i2_22)); r1_10 = (r2_20 + tmpr); i1_10 = (i2_20 - tmpi); r1_26 = (r2_20 - tmpr); i1_26 = (i2_20 + tmpi); tmpr = (0.707106781187 * (i2_26 - r2_26)); tmpi = (0.707106781187 * (r2_26 + i2_26)); r1_12 = (r2_24 + tmpr); i1_12 = (i2_24 - tmpi); r1_28 = (r2_24 - tmpr); i1_28 = (i2_24 + tmpi); tmpr = ((0.382683432365 * i2_30) - (0.923879532511 * r2_30)); tmpi = ((0.382683432365 * r2_30) + (0.923879532511 * i2_30)); r1_14 = (r2_28 + tmpr); i1_14 = (i2_28 - tmpi); r1_30 = (r2_28 - tmpr); i1_30 = (i2_28 + tmpi); } { REAL r2_1, i2_1; REAL r2_3, i2_3; REAL r2_5, i2_5; REAL r2_7, i2_7; REAL r2_9, i2_9; REAL r2_11, i2_11; REAL r2_13, i2_13; REAL r2_15, i2_15; REAL r2_17, i2_17; REAL r2_19, i2_19; REAL r2_21, i2_21; REAL r2_23, i2_23; REAL r2_25, i2_25; REAL r2_27, i2_27; REAL r2_29, i2_29; REAL r2_31, i2_31; { REAL r3_1, i3_1; REAL r3_5, i3_5; REAL r3_9, i3_9; REAL r3_13, i3_13; REAL r3_17, i3_17; REAL r3_21, i3_21; REAL r3_25, i3_25; REAL r3_29, i3_29; { REAL r4_1, i4_1; REAL r4_9, i4_9; REAL r4_17, i4_17; REAL r4_25, i4_25; { REAL r5_1, i5_1; REAL r5_17, i5_17; wr = c_re(W[1 * l1]); wi = c_im(W[1 * l1]); tmpr = c_re(jp[1 * m]); tmpi = c_im(jp[1 * m]); r5_1 = ((wr * tmpr) - (wi * tmpi)); i5_1 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[17 * l1]); wi = c_im(W[17 * l1]); tmpr = c_re(jp[17 * m]); tmpi = c_im(jp[17 * m]); r5_17 = ((wr * tmpr) - (wi * tmpi)); i5_17 = ((wi * tmpr) + (wr * tmpi)); r4_1 = (r5_1 + r5_17); i4_1 = (i5_1 + i5_17); r4_17 = (r5_1 - r5_17); i4_17 = (i5_1 - i5_17); } { REAL r5_9, i5_9; REAL r5_25, i5_25; wr = c_re(W[9 * l1]); wi = c_im(W[9 * l1]); tmpr = c_re(jp[9 * m]); tmpi = c_im(jp[9 * m]); r5_9 = ((wr * tmpr) - (wi * tmpi)); i5_9 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[25 * l1]); wi = c_im(W[25 * l1]); tmpr = c_re(jp[25 * m]); tmpi = c_im(jp[25 * m]); r5_25 = ((wr * tmpr) - (wi * tmpi)); i5_25 = ((wi * tmpr) + (wr * tmpi)); r4_9 = (r5_9 + r5_25); i4_9 = (i5_9 + i5_25); r4_25 = (r5_9 - r5_25); i4_25 = (i5_9 - i5_25); } r3_1 = (r4_1 + r4_9); i3_1 = (i4_1 + i4_9); r3_17 = (r4_1 - r4_9); i3_17 = (i4_1 - i4_9); r3_9 = (r4_17 + i4_25); i3_9 = (i4_17 - r4_25); r3_25 = (r4_17 - i4_25); i3_25 = (i4_17 + r4_25); } { REAL r4_5, i4_5; REAL r4_13, i4_13; REAL r4_21, i4_21; REAL r4_29, i4_29; { REAL r5_5, i5_5; REAL r5_21, i5_21; wr = c_re(W[5 * l1]); wi = c_im(W[5 * l1]); tmpr = c_re(jp[5 * m]); tmpi = c_im(jp[5 * m]); r5_5 = ((wr * tmpr) - (wi * tmpi)); i5_5 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[21 * l1]); wi = c_im(W[21 * l1]); tmpr = c_re(jp[21 * m]); tmpi = c_im(jp[21 * m]); r5_21 = ((wr * tmpr) - (wi * tmpi)); i5_21 = ((wi * tmpr) + (wr * tmpi)); r4_5 = (r5_5 + r5_21); i4_5 = (i5_5 + i5_21); r4_21 = (r5_5 - r5_21); i4_21 = (i5_5 - i5_21); } { REAL r5_13, i5_13; REAL r5_29, i5_29; wr = c_re(W[13 * l1]); wi = c_im(W[13 * l1]); tmpr = c_re(jp[13 * m]); tmpi = c_im(jp[13 * m]); r5_13 = ((wr * tmpr) - (wi * tmpi)); i5_13 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[29 * l1]); wi = c_im(W[29 * l1]); tmpr = c_re(jp[29 * m]); tmpi = c_im(jp[29 * m]); r5_29 = ((wr * tmpr) - (wi * tmpi)); i5_29 = ((wi * tmpr) + (wr * tmpi)); r4_13 = (r5_13 + r5_29); i4_13 = (i5_13 + i5_29); r4_29 = (r5_13 - r5_29); i4_29 = (i5_13 - i5_29); } r3_5 = (r4_5 + r4_13); i3_5 = (i4_5 + i4_13); r3_21 = (r4_5 - r4_13); i3_21 = (i4_5 - i4_13); r3_13 = (r4_21 + i4_29); i3_13 = (i4_21 - r4_29); r3_29 = (r4_21 - i4_29); i3_29 = (i4_21 + r4_29); } r2_1 = (r3_1 + r3_5); i2_1 = (i3_1 + i3_5); r2_17 = (r3_1 - r3_5); i2_17 = (i3_1 - i3_5); tmpr = (0.707106781187 * (r3_13 + i3_13)); tmpi = (0.707106781187 * (i3_13 - r3_13)); r2_5 = (r3_9 + tmpr); i2_5 = (i3_9 + tmpi); r2_21 = (r3_9 - tmpr); i2_21 = (i3_9 - tmpi); r2_9 = (r3_17 + i3_21); i2_9 = (i3_17 - r3_21); r2_25 = (r3_17 - i3_21); i2_25 = (i3_17 + r3_21); tmpr = (0.707106781187 * (i3_29 - r3_29)); tmpi = (0.707106781187 * (r3_29 + i3_29)); r2_13 = (r3_25 + tmpr); i2_13 = (i3_25 - tmpi); r2_29 = (r3_25 - tmpr); i2_29 = (i3_25 + tmpi); } { REAL r3_3, i3_3; REAL r3_7, i3_7; REAL r3_11, i3_11; REAL r3_15, i3_15; REAL r3_19, i3_19; REAL r3_23, i3_23; REAL r3_27, i3_27; REAL r3_31, i3_31; { REAL r4_3, i4_3; REAL r4_11, i4_11; REAL r4_19, i4_19; REAL r4_27, i4_27; { REAL r5_3, i5_3; REAL r5_19, i5_19; wr = c_re(W[3 * l1]); wi = c_im(W[3 * l1]); tmpr = c_re(jp[3 * m]); tmpi = c_im(jp[3 * m]); r5_3 = ((wr * tmpr) - (wi * tmpi)); i5_3 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[19 * l1]); wi = c_im(W[19 * l1]); tmpr = c_re(jp[19 * m]); tmpi = c_im(jp[19 * m]); r5_19 = ((wr * tmpr) - (wi * tmpi)); i5_19 = ((wi * tmpr) + (wr * tmpi)); r4_3 = (r5_3 + r5_19); i4_3 = (i5_3 + i5_19); r4_19 = (r5_3 - r5_19); i4_19 = (i5_3 - i5_19); } { REAL r5_11, i5_11; REAL r5_27, i5_27; wr = c_re(W[11 * l1]); wi = c_im(W[11 * l1]); tmpr = c_re(jp[11 * m]); tmpi = c_im(jp[11 * m]); r5_11 = ((wr * tmpr) - (wi * tmpi)); i5_11 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[27 * l1]); wi = c_im(W[27 * l1]); tmpr = c_re(jp[27 * m]); tmpi = c_im(jp[27 * m]); r5_27 = ((wr * tmpr) - (wi * tmpi)); i5_27 = ((wi * tmpr) + (wr * tmpi)); r4_11 = (r5_11 + r5_27); i4_11 = (i5_11 + i5_27); r4_27 = (r5_11 - r5_27); i4_27 = (i5_11 - i5_27); } r3_3 = (r4_3 + r4_11); i3_3 = (i4_3 + i4_11); r3_19 = (r4_3 - r4_11); i3_19 = (i4_3 - i4_11); r3_11 = (r4_19 + i4_27); i3_11 = (i4_19 - r4_27); r3_27 = (r4_19 - i4_27); i3_27 = (i4_19 + r4_27); } { REAL r4_7, i4_7; REAL r4_15, i4_15; REAL r4_23, i4_23; REAL r4_31, i4_31; { REAL r5_7, i5_7; REAL r5_23, i5_23; wr = c_re(W[7 * l1]); wi = c_im(W[7 * l1]); tmpr = c_re(jp[7 * m]); tmpi = c_im(jp[7 * m]); r5_7 = ((wr * tmpr) - (wi * tmpi)); i5_7 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[23 * l1]); wi = c_im(W[23 * l1]); tmpr = c_re(jp[23 * m]); tmpi = c_im(jp[23 * m]); r5_23 = ((wr * tmpr) - (wi * tmpi)); i5_23 = ((wi * tmpr) + (wr * tmpi)); r4_7 = (r5_7 + r5_23); i4_7 = (i5_7 + i5_23); r4_23 = (r5_7 - r5_23); i4_23 = (i5_7 - i5_23); } { REAL r5_15, i5_15; REAL r5_31, i5_31; wr = c_re(W[15 * l1]); wi = c_im(W[15 * l1]); tmpr = c_re(jp[15 * m]); tmpi = c_im(jp[15 * m]); r5_15 = ((wr * tmpr) - (wi * tmpi)); i5_15 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[31 * l1]); wi = c_im(W[31 * l1]); tmpr = c_re(jp[31 * m]); tmpi = c_im(jp[31 * m]); r5_31 = ((wr * tmpr) - (wi * tmpi)); i5_31 = ((wi * tmpr) + (wr * tmpi)); r4_15 = (r5_15 + r5_31); i4_15 = (i5_15 + i5_31); r4_31 = (r5_15 - r5_31); i4_31 = (i5_15 - i5_31); } r3_7 = (r4_7 + r4_15); i3_7 = (i4_7 + i4_15); r3_23 = (r4_7 - r4_15); i3_23 = (i4_7 - i4_15); r3_15 = (r4_23 + i4_31); i3_15 = (i4_23 - r4_31); r3_31 = (r4_23 - i4_31); i3_31 = (i4_23 + r4_31); } r2_3 = (r3_3 + r3_7); i2_3 = (i3_3 + i3_7); r2_19 = (r3_3 - r3_7); i2_19 = (i3_3 - i3_7); tmpr = (0.707106781187 * (r3_15 + i3_15)); tmpi = (0.707106781187 * (i3_15 - r3_15)); r2_7 = (r3_11 + tmpr); i2_7 = (i3_11 + tmpi); r2_23 = (r3_11 - tmpr); i2_23 = (i3_11 - tmpi); r2_11 = (r3_19 + i3_23); i2_11 = (i3_19 - r3_23); r2_27 = (r3_19 - i3_23); i2_27 = (i3_19 + r3_23); tmpr = (0.707106781187 * (i3_31 - r3_31)); tmpi = (0.707106781187 * (r3_31 + i3_31)); r2_15 = (r3_27 + tmpr); i2_15 = (i3_27 - tmpi); r2_31 = (r3_27 - tmpr); i2_31 = (i3_27 + tmpi); } r1_1 = (r2_1 + r2_3); i1_1 = (i2_1 + i2_3); r1_17 = (r2_1 - r2_3); i1_17 = (i2_1 - i2_3); tmpr = ((0.923879532511 * r2_7) + (0.382683432365 * i2_7)); tmpi = ((0.923879532511 * i2_7) - (0.382683432365 * r2_7)); r1_3 = (r2_5 + tmpr); i1_3 = (i2_5 + tmpi); r1_19 = (r2_5 - tmpr); i1_19 = (i2_5 - tmpi); tmpr = (0.707106781187 * (r2_11 + i2_11)); tmpi = (0.707106781187 * (i2_11 - r2_11)); r1_5 = (r2_9 + tmpr); i1_5 = (i2_9 + tmpi); r1_21 = (r2_9 - tmpr); i1_21 = (i2_9 - tmpi); tmpr = ((0.382683432365 * r2_15) + (0.923879532511 * i2_15)); tmpi = ((0.382683432365 * i2_15) - (0.923879532511 * r2_15)); r1_7 = (r2_13 + tmpr); i1_7 = (i2_13 + tmpi); r1_23 = (r2_13 - tmpr); i1_23 = (i2_13 - tmpi); r1_9 = (r2_17 + i2_19); i1_9 = (i2_17 - r2_19); r1_25 = (r2_17 - i2_19); i1_25 = (i2_17 + r2_19); tmpr = ((0.923879532511 * i2_23) - (0.382683432365 * r2_23)); tmpi = ((0.923879532511 * r2_23) + (0.382683432365 * i2_23)); r1_11 = (r2_21 + tmpr); i1_11 = (i2_21 - tmpi); r1_27 = (r2_21 - tmpr); i1_27 = (i2_21 + tmpi); tmpr = (0.707106781187 * (i2_27 - r2_27)); tmpi = (0.707106781187 * (r2_27 + i2_27)); r1_13 = (r2_25 + tmpr); i1_13 = (i2_25 - tmpi); r1_29 = (r2_25 - tmpr); i1_29 = (i2_25 + tmpi); tmpr = ((0.382683432365 * i2_31) - (0.923879532511 * r2_31)); tmpi = ((0.382683432365 * r2_31) + (0.923879532511 * i2_31)); r1_15 = (r2_29 + tmpr); i1_15 = (i2_29 - tmpi); r1_31 = (r2_29 - tmpr); i1_31 = (i2_29 + tmpi); } c_re(kp[0 * m]) = (r1_0 + r1_1); c_im(kp[0 * m]) = (i1_0 + i1_1); c_re(kp[16 * m]) = (r1_0 - r1_1); c_im(kp[16 * m]) = (i1_0 - i1_1); tmpr = ((0.980785280403 * r1_3) + (0.195090322016 * i1_3)); tmpi = ((0.980785280403 * i1_3) - (0.195090322016 * r1_3)); c_re(kp[1 * m]) = (r1_2 + tmpr); c_im(kp[1 * m]) = (i1_2 + tmpi); c_re(kp[17 * m]) = (r1_2 - tmpr); c_im(kp[17 * m]) = (i1_2 - tmpi); tmpr = ((0.923879532511 * r1_5) + (0.382683432365 * i1_5)); tmpi = ((0.923879532511 * i1_5) - (0.382683432365 * r1_5)); c_re(kp[2 * m]) = (r1_4 + tmpr); c_im(kp[2 * m]) = (i1_4 + tmpi); c_re(kp[18 * m]) = (r1_4 - tmpr); c_im(kp[18 * m]) = (i1_4 - tmpi); tmpr = ((0.831469612303 * r1_7) + (0.55557023302 * i1_7)); tmpi = ((0.831469612303 * i1_7) - (0.55557023302 * r1_7)); c_re(kp[3 * m]) = (r1_6 + tmpr); c_im(kp[3 * m]) = (i1_6 + tmpi); c_re(kp[19 * m]) = (r1_6 - tmpr); c_im(kp[19 * m]) = (i1_6 - tmpi); tmpr = (0.707106781187 * (r1_9 + i1_9)); tmpi = (0.707106781187 * (i1_9 - r1_9)); c_re(kp[4 * m]) = (r1_8 + tmpr); c_im(kp[4 * m]) = (i1_8 + tmpi); c_re(kp[20 * m]) = (r1_8 - tmpr); c_im(kp[20 * m]) = (i1_8 - tmpi); tmpr = ((0.55557023302 * r1_11) + (0.831469612303 * i1_11)); tmpi = ((0.55557023302 * i1_11) - (0.831469612303 * r1_11)); c_re(kp[5 * m]) = (r1_10 + tmpr); c_im(kp[5 * m]) = (i1_10 + tmpi); c_re(kp[21 * m]) = (r1_10 - tmpr); c_im(kp[21 * m]) = (i1_10 - tmpi); tmpr = ((0.382683432365 * r1_13) + (0.923879532511 * i1_13)); tmpi = ((0.382683432365 * i1_13) - (0.923879532511 * r1_13)); c_re(kp[6 * m]) = (r1_12 + tmpr); c_im(kp[6 * m]) = (i1_12 + tmpi); c_re(kp[22 * m]) = (r1_12 - tmpr); c_im(kp[22 * m]) = (i1_12 - tmpi); tmpr = ((0.195090322016 * r1_15) + (0.980785280403 * i1_15)); tmpi = ((0.195090322016 * i1_15) - (0.980785280403 * r1_15)); c_re(kp[7 * m]) = (r1_14 + tmpr); c_im(kp[7 * m]) = (i1_14 + tmpi); c_re(kp[23 * m]) = (r1_14 - tmpr); c_im(kp[23 * m]) = (i1_14 - tmpi); c_re(kp[8 * m]) = (r1_16 + i1_17); c_im(kp[8 * m]) = (i1_16 - r1_17); c_re(kp[24 * m]) = (r1_16 - i1_17); c_im(kp[24 * m]) = (i1_16 + r1_17); tmpr = ((0.980785280403 * i1_19) - (0.195090322016 * r1_19)); tmpi = ((0.980785280403 * r1_19) + (0.195090322016 * i1_19)); c_re(kp[9 * m]) = (r1_18 + tmpr); c_im(kp[9 * m]) = (i1_18 - tmpi); c_re(kp[25 * m]) = (r1_18 - tmpr); c_im(kp[25 * m]) = (i1_18 + tmpi); tmpr = ((0.923879532511 * i1_21) - (0.382683432365 * r1_21)); tmpi = ((0.923879532511 * r1_21) + (0.382683432365 * i1_21)); c_re(kp[10 * m]) = (r1_20 + tmpr); c_im(kp[10 * m]) = (i1_20 - tmpi); c_re(kp[26 * m]) = (r1_20 - tmpr); c_im(kp[26 * m]) = (i1_20 + tmpi); tmpr = ((0.831469612303 * i1_23) - (0.55557023302 * r1_23)); tmpi = ((0.831469612303 * r1_23) + (0.55557023302 * i1_23)); c_re(kp[11 * m]) = (r1_22 + tmpr); c_im(kp[11 * m]) = (i1_22 - tmpi); c_re(kp[27 * m]) = (r1_22 - tmpr); c_im(kp[27 * m]) = (i1_22 + tmpi); tmpr = (0.707106781187 * (i1_25 - r1_25)); tmpi = (0.707106781187 * (r1_25 + i1_25)); c_re(kp[12 * m]) = (r1_24 + tmpr); c_im(kp[12 * m]) = (i1_24 - tmpi); c_re(kp[28 * m]) = (r1_24 - tmpr); c_im(kp[28 * m]) = (i1_24 + tmpi); tmpr = ((0.55557023302 * i1_27) - (0.831469612303 * r1_27)); tmpi = ((0.55557023302 * r1_27) + (0.831469612303 * i1_27)); c_re(kp[13 * m]) = (r1_26 + tmpr); c_im(kp[13 * m]) = (i1_26 - tmpi); c_re(kp[29 * m]) = (r1_26 - tmpr); c_im(kp[29 * m]) = (i1_26 + tmpi); tmpr = ((0.382683432365 * i1_29) - (0.923879532511 * r1_29)); tmpi = ((0.382683432365 * r1_29) + (0.923879532511 * i1_29)); c_re(kp[14 * m]) = (r1_28 + tmpr); c_im(kp[14 * m]) = (i1_28 - tmpi); c_re(kp[30 * m]) = (r1_28 - tmpr); c_im(kp[30 * m]) = (i1_28 + tmpi); tmpr = ((0.195090322016 * i1_31) - (0.980785280403 * r1_31)); tmpi = ((0.195090322016 * r1_31) + (0.980785280403 * i1_31)); c_re(kp[15 * m]) = (r1_30 + tmpr); c_im(kp[15 * m]) = (i1_30 - tmpi); c_re(kp[31 * m]) = (r1_30 - tmpr); c_im(kp[31 * m]) = (i1_30 + tmpi); } } } else { int ab = (a + b) / 2; #pragma omp task untied fft_twiddle_32(a, ab, in, out, W, nW, nWdn, m); #pragma omp task untied fft_twiddle_32(ab, b, in, out, W, nW, nWdn, m); #pragma omp taskwait } } void fft_twiddle_32_seq(int a, int b, COMPLEX * in, COMPLEX * out, COMPLEX * W, int nW, int nWdn, int m) { int l1, i; COMPLEX *jp, *kp; REAL tmpr, tmpi, wr, wi; if ((b - a) < 128) { for (i = a, l1 = nWdn * i, kp = out + i; i < b; i++, l1 += nWdn, kp++) { jp = in + i; { REAL r1_0, i1_0; REAL r1_1, i1_1; REAL r1_2, i1_2; REAL r1_3, i1_3; REAL r1_4, i1_4; REAL r1_5, i1_5; REAL r1_6, i1_6; REAL r1_7, i1_7; REAL r1_8, i1_8; REAL r1_9, i1_9; REAL r1_10, i1_10; REAL r1_11, i1_11; REAL r1_12, i1_12; REAL r1_13, i1_13; REAL r1_14, i1_14; REAL r1_15, i1_15; REAL r1_16, i1_16; REAL r1_17, i1_17; REAL r1_18, i1_18; REAL r1_19, i1_19; REAL r1_20, i1_20; REAL r1_21, i1_21; REAL r1_22, i1_22; REAL r1_23, i1_23; REAL r1_24, i1_24; REAL r1_25, i1_25; REAL r1_26, i1_26; REAL r1_27, i1_27; REAL r1_28, i1_28; REAL r1_29, i1_29; REAL r1_30, i1_30; REAL r1_31, i1_31; { REAL r2_0, i2_0; REAL r2_2, i2_2; REAL r2_4, i2_4; REAL r2_6, i2_6; REAL r2_8, i2_8; REAL r2_10, i2_10; REAL r2_12, i2_12; REAL r2_14, i2_14; REAL r2_16, i2_16; REAL r2_18, i2_18; REAL r2_20, i2_20; REAL r2_22, i2_22; REAL r2_24, i2_24; REAL r2_26, i2_26; REAL r2_28, i2_28; REAL r2_30, i2_30; { REAL r3_0, i3_0; REAL r3_4, i3_4; REAL r3_8, i3_8; REAL r3_12, i3_12; REAL r3_16, i3_16; REAL r3_20, i3_20; REAL r3_24, i3_24; REAL r3_28, i3_28; { REAL r4_0, i4_0; REAL r4_8, i4_8; REAL r4_16, i4_16; REAL r4_24, i4_24; { REAL r5_0, i5_0; REAL r5_16, i5_16; r5_0 = c_re(jp[0 * m]); i5_0 = c_im(jp[0 * m]); wr = c_re(W[16 * l1]); wi = c_im(W[16 * l1]); tmpr = c_re(jp[16 * m]); tmpi = c_im(jp[16 * m]); r5_16 = ((wr * tmpr) - (wi * tmpi)); i5_16 = ((wi * tmpr) + (wr * tmpi)); r4_0 = (r5_0 + r5_16); i4_0 = (i5_0 + i5_16); r4_16 = (r5_0 - r5_16); i4_16 = (i5_0 - i5_16); } { REAL r5_8, i5_8; REAL r5_24, i5_24; wr = c_re(W[8 * l1]); wi = c_im(W[8 * l1]); tmpr = c_re(jp[8 * m]); tmpi = c_im(jp[8 * m]); r5_8 = ((wr * tmpr) - (wi * tmpi)); i5_8 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[24 * l1]); wi = c_im(W[24 * l1]); tmpr = c_re(jp[24 * m]); tmpi = c_im(jp[24 * m]); r5_24 = ((wr * tmpr) - (wi * tmpi)); i5_24 = ((wi * tmpr) + (wr * tmpi)); r4_8 = (r5_8 + r5_24); i4_8 = (i5_8 + i5_24); r4_24 = (r5_8 - r5_24); i4_24 = (i5_8 - i5_24); } r3_0 = (r4_0 + r4_8); i3_0 = (i4_0 + i4_8); r3_16 = (r4_0 - r4_8); i3_16 = (i4_0 - i4_8); r3_8 = (r4_16 + i4_24); i3_8 = (i4_16 - r4_24); r3_24 = (r4_16 - i4_24); i3_24 = (i4_16 + r4_24); } { REAL r4_4, i4_4; REAL r4_12, i4_12; REAL r4_20, i4_20; REAL r4_28, i4_28; { REAL r5_4, i5_4; REAL r5_20, i5_20; wr = c_re(W[4 * l1]); wi = c_im(W[4 * l1]); tmpr = c_re(jp[4 * m]); tmpi = c_im(jp[4 * m]); r5_4 = ((wr * tmpr) - (wi * tmpi)); i5_4 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[20 * l1]); wi = c_im(W[20 * l1]); tmpr = c_re(jp[20 * m]); tmpi = c_im(jp[20 * m]); r5_20 = ((wr * tmpr) - (wi * tmpi)); i5_20 = ((wi * tmpr) + (wr * tmpi)); r4_4 = (r5_4 + r5_20); i4_4 = (i5_4 + i5_20); r4_20 = (r5_4 - r5_20); i4_20 = (i5_4 - i5_20); } { REAL r5_12, i5_12; REAL r5_28, i5_28; wr = c_re(W[12 * l1]); wi = c_im(W[12 * l1]); tmpr = c_re(jp[12 * m]); tmpi = c_im(jp[12 * m]); r5_12 = ((wr * tmpr) - (wi * tmpi)); i5_12 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[28 * l1]); wi = c_im(W[28 * l1]); tmpr = c_re(jp[28 * m]); tmpi = c_im(jp[28 * m]); r5_28 = ((wr * tmpr) - (wi * tmpi)); i5_28 = ((wi * tmpr) + (wr * tmpi)); r4_12 = (r5_12 + r5_28); i4_12 = (i5_12 + i5_28); r4_28 = (r5_12 - r5_28); i4_28 = (i5_12 - i5_28); } r3_4 = (r4_4 + r4_12); i3_4 = (i4_4 + i4_12); r3_20 = (r4_4 - r4_12); i3_20 = (i4_4 - i4_12); r3_12 = (r4_20 + i4_28); i3_12 = (i4_20 - r4_28); r3_28 = (r4_20 - i4_28); i3_28 = (i4_20 + r4_28); } r2_0 = (r3_0 + r3_4); i2_0 = (i3_0 + i3_4); r2_16 = (r3_0 - r3_4); i2_16 = (i3_0 - i3_4); tmpr = (0.707106781187 * (r3_12 + i3_12)); tmpi = (0.707106781187 * (i3_12 - r3_12)); r2_4 = (r3_8 + tmpr); i2_4 = (i3_8 + tmpi); r2_20 = (r3_8 - tmpr); i2_20 = (i3_8 - tmpi); r2_8 = (r3_16 + i3_20); i2_8 = (i3_16 - r3_20); r2_24 = (r3_16 - i3_20); i2_24 = (i3_16 + r3_20); tmpr = (0.707106781187 * (i3_28 - r3_28)); tmpi = (0.707106781187 * (r3_28 + i3_28)); r2_12 = (r3_24 + tmpr); i2_12 = (i3_24 - tmpi); r2_28 = (r3_24 - tmpr); i2_28 = (i3_24 + tmpi); } { REAL r3_2, i3_2; REAL r3_6, i3_6; REAL r3_10, i3_10; REAL r3_14, i3_14; REAL r3_18, i3_18; REAL r3_22, i3_22; REAL r3_26, i3_26; REAL r3_30, i3_30; { REAL r4_2, i4_2; REAL r4_10, i4_10; REAL r4_18, i4_18; REAL r4_26, i4_26; { REAL r5_2, i5_2; REAL r5_18, i5_18; wr = c_re(W[2 * l1]); wi = c_im(W[2 * l1]); tmpr = c_re(jp[2 * m]); tmpi = c_im(jp[2 * m]); r5_2 = ((wr * tmpr) - (wi * tmpi)); i5_2 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[18 * l1]); wi = c_im(W[18 * l1]); tmpr = c_re(jp[18 * m]); tmpi = c_im(jp[18 * m]); r5_18 = ((wr * tmpr) - (wi * tmpi)); i5_18 = ((wi * tmpr) + (wr * tmpi)); r4_2 = (r5_2 + r5_18); i4_2 = (i5_2 + i5_18); r4_18 = (r5_2 - r5_18); i4_18 = (i5_2 - i5_18); } { REAL r5_10, i5_10; REAL r5_26, i5_26; wr = c_re(W[10 * l1]); wi = c_im(W[10 * l1]); tmpr = c_re(jp[10 * m]); tmpi = c_im(jp[10 * m]); r5_10 = ((wr * tmpr) - (wi * tmpi)); i5_10 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[26 * l1]); wi = c_im(W[26 * l1]); tmpr = c_re(jp[26 * m]); tmpi = c_im(jp[26 * m]); r5_26 = ((wr * tmpr) - (wi * tmpi)); i5_26 = ((wi * tmpr) + (wr * tmpi)); r4_10 = (r5_10 + r5_26); i4_10 = (i5_10 + i5_26); r4_26 = (r5_10 - r5_26); i4_26 = (i5_10 - i5_26); } r3_2 = (r4_2 + r4_10); i3_2 = (i4_2 + i4_10); r3_18 = (r4_2 - r4_10); i3_18 = (i4_2 - i4_10); r3_10 = (r4_18 + i4_26); i3_10 = (i4_18 - r4_26); r3_26 = (r4_18 - i4_26); i3_26 = (i4_18 + r4_26); } { REAL r4_6, i4_6; REAL r4_14, i4_14; REAL r4_22, i4_22; REAL r4_30, i4_30; { REAL r5_6, i5_6; REAL r5_22, i5_22; wr = c_re(W[6 * l1]); wi = c_im(W[6 * l1]); tmpr = c_re(jp[6 * m]); tmpi = c_im(jp[6 * m]); r5_6 = ((wr * tmpr) - (wi * tmpi)); i5_6 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[22 * l1]); wi = c_im(W[22 * l1]); tmpr = c_re(jp[22 * m]); tmpi = c_im(jp[22 * m]); r5_22 = ((wr * tmpr) - (wi * tmpi)); i5_22 = ((wi * tmpr) + (wr * tmpi)); r4_6 = (r5_6 + r5_22); i4_6 = (i5_6 + i5_22); r4_22 = (r5_6 - r5_22); i4_22 = (i5_6 - i5_22); } { REAL r5_14, i5_14; REAL r5_30, i5_30; wr = c_re(W[14 * l1]); wi = c_im(W[14 * l1]); tmpr = c_re(jp[14 * m]); tmpi = c_im(jp[14 * m]); r5_14 = ((wr * tmpr) - (wi * tmpi)); i5_14 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[30 * l1]); wi = c_im(W[30 * l1]); tmpr = c_re(jp[30 * m]); tmpi = c_im(jp[30 * m]); r5_30 = ((wr * tmpr) - (wi * tmpi)); i5_30 = ((wi * tmpr) + (wr * tmpi)); r4_14 = (r5_14 + r5_30); i4_14 = (i5_14 + i5_30); r4_30 = (r5_14 - r5_30); i4_30 = (i5_14 - i5_30); } r3_6 = (r4_6 + r4_14); i3_6 = (i4_6 + i4_14); r3_22 = (r4_6 - r4_14); i3_22 = (i4_6 - i4_14); r3_14 = (r4_22 + i4_30); i3_14 = (i4_22 - r4_30); r3_30 = (r4_22 - i4_30); i3_30 = (i4_22 + r4_30); } r2_2 = (r3_2 + r3_6); i2_2 = (i3_2 + i3_6); r2_18 = (r3_2 - r3_6); i2_18 = (i3_2 - i3_6); tmpr = (0.707106781187 * (r3_14 + i3_14)); tmpi = (0.707106781187 * (i3_14 - r3_14)); r2_6 = (r3_10 + tmpr); i2_6 = (i3_10 + tmpi); r2_22 = (r3_10 - tmpr); i2_22 = (i3_10 - tmpi); r2_10 = (r3_18 + i3_22); i2_10 = (i3_18 - r3_22); r2_26 = (r3_18 - i3_22); i2_26 = (i3_18 + r3_22); tmpr = (0.707106781187 * (i3_30 - r3_30)); tmpi = (0.707106781187 * (r3_30 + i3_30)); r2_14 = (r3_26 + tmpr); i2_14 = (i3_26 - tmpi); r2_30 = (r3_26 - tmpr); i2_30 = (i3_26 + tmpi); } r1_0 = (r2_0 + r2_2); i1_0 = (i2_0 + i2_2); r1_16 = (r2_0 - r2_2); i1_16 = (i2_0 - i2_2); tmpr = ((0.923879532511 * r2_6) + (0.382683432365 * i2_6)); tmpi = ((0.923879532511 * i2_6) - (0.382683432365 * r2_6)); r1_2 = (r2_4 + tmpr); i1_2 = (i2_4 + tmpi); r1_18 = (r2_4 - tmpr); i1_18 = (i2_4 - tmpi); tmpr = (0.707106781187 * (r2_10 + i2_10)); tmpi = (0.707106781187 * (i2_10 - r2_10)); r1_4 = (r2_8 + tmpr); i1_4 = (i2_8 + tmpi); r1_20 = (r2_8 - tmpr); i1_20 = (i2_8 - tmpi); tmpr = ((0.382683432365 * r2_14) + (0.923879532511 * i2_14)); tmpi = ((0.382683432365 * i2_14) - (0.923879532511 * r2_14)); r1_6 = (r2_12 + tmpr); i1_6 = (i2_12 + tmpi); r1_22 = (r2_12 - tmpr); i1_22 = (i2_12 - tmpi); r1_8 = (r2_16 + i2_18); i1_8 = (i2_16 - r2_18); r1_24 = (r2_16 - i2_18); i1_24 = (i2_16 + r2_18); tmpr = ((0.923879532511 * i2_22) - (0.382683432365 * r2_22)); tmpi = ((0.923879532511 * r2_22) + (0.382683432365 * i2_22)); r1_10 = (r2_20 + tmpr); i1_10 = (i2_20 - tmpi); r1_26 = (r2_20 - tmpr); i1_26 = (i2_20 + tmpi); tmpr = (0.707106781187 * (i2_26 - r2_26)); tmpi = (0.707106781187 * (r2_26 + i2_26)); r1_12 = (r2_24 + tmpr); i1_12 = (i2_24 - tmpi); r1_28 = (r2_24 - tmpr); i1_28 = (i2_24 + tmpi); tmpr = ((0.382683432365 * i2_30) - (0.923879532511 * r2_30)); tmpi = ((0.382683432365 * r2_30) + (0.923879532511 * i2_30)); r1_14 = (r2_28 + tmpr); i1_14 = (i2_28 - tmpi); r1_30 = (r2_28 - tmpr); i1_30 = (i2_28 + tmpi); } { REAL r2_1, i2_1; REAL r2_3, i2_3; REAL r2_5, i2_5; REAL r2_7, i2_7; REAL r2_9, i2_9; REAL r2_11, i2_11; REAL r2_13, i2_13; REAL r2_15, i2_15; REAL r2_17, i2_17; REAL r2_19, i2_19; REAL r2_21, i2_21; REAL r2_23, i2_23; REAL r2_25, i2_25; REAL r2_27, i2_27; REAL r2_29, i2_29; REAL r2_31, i2_31; { REAL r3_1, i3_1; REAL r3_5, i3_5; REAL r3_9, i3_9; REAL r3_13, i3_13; REAL r3_17, i3_17; REAL r3_21, i3_21; REAL r3_25, i3_25; REAL r3_29, i3_29; { REAL r4_1, i4_1; REAL r4_9, i4_9; REAL r4_17, i4_17; REAL r4_25, i4_25; { REAL r5_1, i5_1; REAL r5_17, i5_17; wr = c_re(W[1 * l1]); wi = c_im(W[1 * l1]); tmpr = c_re(jp[1 * m]); tmpi = c_im(jp[1 * m]); r5_1 = ((wr * tmpr) - (wi * tmpi)); i5_1 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[17 * l1]); wi = c_im(W[17 * l1]); tmpr = c_re(jp[17 * m]); tmpi = c_im(jp[17 * m]); r5_17 = ((wr * tmpr) - (wi * tmpi)); i5_17 = ((wi * tmpr) + (wr * tmpi)); r4_1 = (r5_1 + r5_17); i4_1 = (i5_1 + i5_17); r4_17 = (r5_1 - r5_17); i4_17 = (i5_1 - i5_17); } { REAL r5_9, i5_9; REAL r5_25, i5_25; wr = c_re(W[9 * l1]); wi = c_im(W[9 * l1]); tmpr = c_re(jp[9 * m]); tmpi = c_im(jp[9 * m]); r5_9 = ((wr * tmpr) - (wi * tmpi)); i5_9 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[25 * l1]); wi = c_im(W[25 * l1]); tmpr = c_re(jp[25 * m]); tmpi = c_im(jp[25 * m]); r5_25 = ((wr * tmpr) - (wi * tmpi)); i5_25 = ((wi * tmpr) + (wr * tmpi)); r4_9 = (r5_9 + r5_25); i4_9 = (i5_9 + i5_25); r4_25 = (r5_9 - r5_25); i4_25 = (i5_9 - i5_25); } r3_1 = (r4_1 + r4_9); i3_1 = (i4_1 + i4_9); r3_17 = (r4_1 - r4_9); i3_17 = (i4_1 - i4_9); r3_9 = (r4_17 + i4_25); i3_9 = (i4_17 - r4_25); r3_25 = (r4_17 - i4_25); i3_25 = (i4_17 + r4_25); } { REAL r4_5, i4_5; REAL r4_13, i4_13; REAL r4_21, i4_21; REAL r4_29, i4_29; { REAL r5_5, i5_5; REAL r5_21, i5_21; wr = c_re(W[5 * l1]); wi = c_im(W[5 * l1]); tmpr = c_re(jp[5 * m]); tmpi = c_im(jp[5 * m]); r5_5 = ((wr * tmpr) - (wi * tmpi)); i5_5 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[21 * l1]); wi = c_im(W[21 * l1]); tmpr = c_re(jp[21 * m]); tmpi = c_im(jp[21 * m]); r5_21 = ((wr * tmpr) - (wi * tmpi)); i5_21 = ((wi * tmpr) + (wr * tmpi)); r4_5 = (r5_5 + r5_21); i4_5 = (i5_5 + i5_21); r4_21 = (r5_5 - r5_21); i4_21 = (i5_5 - i5_21); } { REAL r5_13, i5_13; REAL r5_29, i5_29; wr = c_re(W[13 * l1]); wi = c_im(W[13 * l1]); tmpr = c_re(jp[13 * m]); tmpi = c_im(jp[13 * m]); r5_13 = ((wr * tmpr) - (wi * tmpi)); i5_13 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[29 * l1]); wi = c_im(W[29 * l1]); tmpr = c_re(jp[29 * m]); tmpi = c_im(jp[29 * m]); r5_29 = ((wr * tmpr) - (wi * tmpi)); i5_29 = ((wi * tmpr) + (wr * tmpi)); r4_13 = (r5_13 + r5_29); i4_13 = (i5_13 + i5_29); r4_29 = (r5_13 - r5_29); i4_29 = (i5_13 - i5_29); } r3_5 = (r4_5 + r4_13); i3_5 = (i4_5 + i4_13); r3_21 = (r4_5 - r4_13); i3_21 = (i4_5 - i4_13); r3_13 = (r4_21 + i4_29); i3_13 = (i4_21 - r4_29); r3_29 = (r4_21 - i4_29); i3_29 = (i4_21 + r4_29); } r2_1 = (r3_1 + r3_5); i2_1 = (i3_1 + i3_5); r2_17 = (r3_1 - r3_5); i2_17 = (i3_1 - i3_5); tmpr = (0.707106781187 * (r3_13 + i3_13)); tmpi = (0.707106781187 * (i3_13 - r3_13)); r2_5 = (r3_9 + tmpr); i2_5 = (i3_9 + tmpi); r2_21 = (r3_9 - tmpr); i2_21 = (i3_9 - tmpi); r2_9 = (r3_17 + i3_21); i2_9 = (i3_17 - r3_21); r2_25 = (r3_17 - i3_21); i2_25 = (i3_17 + r3_21); tmpr = (0.707106781187 * (i3_29 - r3_29)); tmpi = (0.707106781187 * (r3_29 + i3_29)); r2_13 = (r3_25 + tmpr); i2_13 = (i3_25 - tmpi); r2_29 = (r3_25 - tmpr); i2_29 = (i3_25 + tmpi); } { REAL r3_3, i3_3; REAL r3_7, i3_7; REAL r3_11, i3_11; REAL r3_15, i3_15; REAL r3_19, i3_19; REAL r3_23, i3_23; REAL r3_27, i3_27; REAL r3_31, i3_31; { REAL r4_3, i4_3; REAL r4_11, i4_11; REAL r4_19, i4_19; REAL r4_27, i4_27; { REAL r5_3, i5_3; REAL r5_19, i5_19; wr = c_re(W[3 * l1]); wi = c_im(W[3 * l1]); tmpr = c_re(jp[3 * m]); tmpi = c_im(jp[3 * m]); r5_3 = ((wr * tmpr) - (wi * tmpi)); i5_3 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[19 * l1]); wi = c_im(W[19 * l1]); tmpr = c_re(jp[19 * m]); tmpi = c_im(jp[19 * m]); r5_19 = ((wr * tmpr) - (wi * tmpi)); i5_19 = ((wi * tmpr) + (wr * tmpi)); r4_3 = (r5_3 + r5_19); i4_3 = (i5_3 + i5_19); r4_19 = (r5_3 - r5_19); i4_19 = (i5_3 - i5_19); } { REAL r5_11, i5_11; REAL r5_27, i5_27; wr = c_re(W[11 * l1]); wi = c_im(W[11 * l1]); tmpr = c_re(jp[11 * m]); tmpi = c_im(jp[11 * m]); r5_11 = ((wr * tmpr) - (wi * tmpi)); i5_11 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[27 * l1]); wi = c_im(W[27 * l1]); tmpr = c_re(jp[27 * m]); tmpi = c_im(jp[27 * m]); r5_27 = ((wr * tmpr) - (wi * tmpi)); i5_27 = ((wi * tmpr) + (wr * tmpi)); r4_11 = (r5_11 + r5_27); i4_11 = (i5_11 + i5_27); r4_27 = (r5_11 - r5_27); i4_27 = (i5_11 - i5_27); } r3_3 = (r4_3 + r4_11); i3_3 = (i4_3 + i4_11); r3_19 = (r4_3 - r4_11); i3_19 = (i4_3 - i4_11); r3_11 = (r4_19 + i4_27); i3_11 = (i4_19 - r4_27); r3_27 = (r4_19 - i4_27); i3_27 = (i4_19 + r4_27); } { REAL r4_7, i4_7; REAL r4_15, i4_15; REAL r4_23, i4_23; REAL r4_31, i4_31; { REAL r5_7, i5_7; REAL r5_23, i5_23; wr = c_re(W[7 * l1]); wi = c_im(W[7 * l1]); tmpr = c_re(jp[7 * m]); tmpi = c_im(jp[7 * m]); r5_7 = ((wr * tmpr) - (wi * tmpi)); i5_7 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[23 * l1]); wi = c_im(W[23 * l1]); tmpr = c_re(jp[23 * m]); tmpi = c_im(jp[23 * m]); r5_23 = ((wr * tmpr) - (wi * tmpi)); i5_23 = ((wi * tmpr) + (wr * tmpi)); r4_7 = (r5_7 + r5_23); i4_7 = (i5_7 + i5_23); r4_23 = (r5_7 - r5_23); i4_23 = (i5_7 - i5_23); } { REAL r5_15, i5_15; REAL r5_31, i5_31; wr = c_re(W[15 * l1]); wi = c_im(W[15 * l1]); tmpr = c_re(jp[15 * m]); tmpi = c_im(jp[15 * m]); r5_15 = ((wr * tmpr) - (wi * tmpi)); i5_15 = ((wi * tmpr) + (wr * tmpi)); wr = c_re(W[31 * l1]); wi = c_im(W[31 * l1]); tmpr = c_re(jp[31 * m]); tmpi = c_im(jp[31 * m]); r5_31 = ((wr * tmpr) - (wi * tmpi)); i5_31 = ((wi * tmpr) + (wr * tmpi)); r4_15 = (r5_15 + r5_31); i4_15 = (i5_15 + i5_31); r4_31 = (r5_15 - r5_31); i4_31 = (i5_15 - i5_31); } r3_7 = (r4_7 + r4_15); i3_7 = (i4_7 + i4_15); r3_23 = (r4_7 - r4_15); i3_23 = (i4_7 - i4_15); r3_15 = (r4_23 + i4_31); i3_15 = (i4_23 - r4_31); r3_31 = (r4_23 - i4_31); i3_31 = (i4_23 + r4_31); } r2_3 = (r3_3 + r3_7); i2_3 = (i3_3 + i3_7); r2_19 = (r3_3 - r3_7); i2_19 = (i3_3 - i3_7); tmpr = (0.707106781187 * (r3_15 + i3_15)); tmpi = (0.707106781187 * (i3_15 - r3_15)); r2_7 = (r3_11 + tmpr); i2_7 = (i3_11 + tmpi); r2_23 = (r3_11 - tmpr); i2_23 = (i3_11 - tmpi); r2_11 = (r3_19 + i3_23); i2_11 = (i3_19 - r3_23); r2_27 = (r3_19 - i3_23); i2_27 = (i3_19 + r3_23); tmpr = (0.707106781187 * (i3_31 - r3_31)); tmpi = (0.707106781187 * (r3_31 + i3_31)); r2_15 = (r3_27 + tmpr); i2_15 = (i3_27 - tmpi); r2_31 = (r3_27 - tmpr); i2_31 = (i3_27 + tmpi); } r1_1 = (r2_1 + r2_3); i1_1 = (i2_1 + i2_3); r1_17 = (r2_1 - r2_3); i1_17 = (i2_1 - i2_3); tmpr = ((0.923879532511 * r2_7) + (0.382683432365 * i2_7)); tmpi = ((0.923879532511 * i2_7) - (0.382683432365 * r2_7)); r1_3 = (r2_5 + tmpr); i1_3 = (i2_5 + tmpi); r1_19 = (r2_5 - tmpr); i1_19 = (i2_5 - tmpi); tmpr = (0.707106781187 * (r2_11 + i2_11)); tmpi = (0.707106781187 * (i2_11 - r2_11)); r1_5 = (r2_9 + tmpr); i1_5 = (i2_9 + tmpi); r1_21 = (r2_9 - tmpr); i1_21 = (i2_9 - tmpi); tmpr = ((0.382683432365 * r2_15) + (0.923879532511 * i2_15)); tmpi = ((0.382683432365 * i2_15) - (0.923879532511 * r2_15)); r1_7 = (r2_13 + tmpr); i1_7 = (i2_13 + tmpi); r1_23 = (r2_13 - tmpr); i1_23 = (i2_13 - tmpi); r1_9 = (r2_17 + i2_19); i1_9 = (i2_17 - r2_19); r1_25 = (r2_17 - i2_19); i1_25 = (i2_17 + r2_19); tmpr = ((0.923879532511 * i2_23) - (0.382683432365 * r2_23)); tmpi = ((0.923879532511 * r2_23) + (0.382683432365 * i2_23)); r1_11 = (r2_21 + tmpr); i1_11 = (i2_21 - tmpi); r1_27 = (r2_21 - tmpr); i1_27 = (i2_21 + tmpi); tmpr = (0.707106781187 * (i2_27 - r2_27)); tmpi = (0.707106781187 * (r2_27 + i2_27)); r1_13 = (r2_25 + tmpr); i1_13 = (i2_25 - tmpi); r1_29 = (r2_25 - tmpr); i1_29 = (i2_25 + tmpi); tmpr = ((0.382683432365 * i2_31) - (0.923879532511 * r2_31)); tmpi = ((0.382683432365 * r2_31) + (0.923879532511 * i2_31)); r1_15 = (r2_29 + tmpr); i1_15 = (i2_29 - tmpi); r1_31 = (r2_29 - tmpr); i1_31 = (i2_29 + tmpi); } c_re(kp[0 * m]) = (r1_0 + r1_1); c_im(kp[0 * m]) = (i1_0 + i1_1); c_re(kp[16 * m]) = (r1_0 - r1_1); c_im(kp[16 * m]) = (i1_0 - i1_1); tmpr = ((0.980785280403 * r1_3) + (0.195090322016 * i1_3)); tmpi = ((0.980785280403 * i1_3) - (0.195090322016 * r1_3)); c_re(kp[1 * m]) = (r1_2 + tmpr); c_im(kp[1 * m]) = (i1_2 + tmpi); c_re(kp[17 * m]) = (r1_2 - tmpr); c_im(kp[17 * m]) = (i1_2 - tmpi); tmpr = ((0.923879532511 * r1_5) + (0.382683432365 * i1_5)); tmpi = ((0.923879532511 * i1_5) - (0.382683432365 * r1_5)); c_re(kp[2 * m]) = (r1_4 + tmpr); c_im(kp[2 * m]) = (i1_4 + tmpi); c_re(kp[18 * m]) = (r1_4 - tmpr); c_im(kp[18 * m]) = (i1_4 - tmpi); tmpr = ((0.831469612303 * r1_7) + (0.55557023302 * i1_7)); tmpi = ((0.831469612303 * i1_7) - (0.55557023302 * r1_7)); c_re(kp[3 * m]) = (r1_6 + tmpr); c_im(kp[3 * m]) = (i1_6 + tmpi); c_re(kp[19 * m]) = (r1_6 - tmpr); c_im(kp[19 * m]) = (i1_6 - tmpi); tmpr = (0.707106781187 * (r1_9 + i1_9)); tmpi = (0.707106781187 * (i1_9 - r1_9)); c_re(kp[4 * m]) = (r1_8 + tmpr); c_im(kp[4 * m]) = (i1_8 + tmpi); c_re(kp[20 * m]) = (r1_8 - tmpr); c_im(kp[20 * m]) = (i1_8 - tmpi); tmpr = ((0.55557023302 * r1_11) + (0.831469612303 * i1_11)); tmpi = ((0.55557023302 * i1_11) - (0.831469612303 * r1_11)); c_re(kp[5 * m]) = (r1_10 + tmpr); c_im(kp[5 * m]) = (i1_10 + tmpi); c_re(kp[21 * m]) = (r1_10 - tmpr); c_im(kp[21 * m]) = (i1_10 - tmpi); tmpr = ((0.382683432365 * r1_13) + (0.923879532511 * i1_13)); tmpi = ((0.382683432365 * i1_13) - (0.923879532511 * r1_13)); c_re(kp[6 * m]) = (r1_12 + tmpr); c_im(kp[6 * m]) = (i1_12 + tmpi); c_re(kp[22 * m]) = (r1_12 - tmpr); c_im(kp[22 * m]) = (i1_12 - tmpi); tmpr = ((0.195090322016 * r1_15) + (0.980785280403 * i1_15)); tmpi = ((0.195090322016 * i1_15) - (0.980785280403 * r1_15)); c_re(kp[7 * m]) = (r1_14 + tmpr); c_im(kp[7 * m]) = (i1_14 + tmpi); c_re(kp[23 * m]) = (r1_14 - tmpr); c_im(kp[23 * m]) = (i1_14 - tmpi); c_re(kp[8 * m]) = (r1_16 + i1_17); c_im(kp[8 * m]) = (i1_16 - r1_17); c_re(kp[24 * m]) = (r1_16 - i1_17); c_im(kp[24 * m]) = (i1_16 + r1_17); tmpr = ((0.980785280403 * i1_19) - (0.195090322016 * r1_19)); tmpi = ((0.980785280403 * r1_19) + (0.195090322016 * i1_19)); c_re(kp[9 * m]) = (r1_18 + tmpr); c_im(kp[9 * m]) = (i1_18 - tmpi); c_re(kp[25 * m]) = (r1_18 - tmpr); c_im(kp[25 * m]) = (i1_18 + tmpi); tmpr = ((0.923879532511 * i1_21) - (0.382683432365 * r1_21)); tmpi = ((0.923879532511 * r1_21) + (0.382683432365 * i1_21)); c_re(kp[10 * m]) = (r1_20 + tmpr); c_im(kp[10 * m]) = (i1_20 - tmpi); c_re(kp[26 * m]) = (r1_20 - tmpr); c_im(kp[26 * m]) = (i1_20 + tmpi); tmpr = ((0.831469612303 * i1_23) - (0.55557023302 * r1_23)); tmpi = ((0.831469612303 * r1_23) + (0.55557023302 * i1_23)); c_re(kp[11 * m]) = (r1_22 + tmpr); c_im(kp[11 * m]) = (i1_22 - tmpi); c_re(kp[27 * m]) = (r1_22 - tmpr); c_im(kp[27 * m]) = (i1_22 + tmpi); tmpr = (0.707106781187 * (i1_25 - r1_25)); tmpi = (0.707106781187 * (r1_25 + i1_25)); c_re(kp[12 * m]) = (r1_24 + tmpr); c_im(kp[12 * m]) = (i1_24 - tmpi); c_re(kp[28 * m]) = (r1_24 - tmpr); c_im(kp[28 * m]) = (i1_24 + tmpi); tmpr = ((0.55557023302 * i1_27) - (0.831469612303 * r1_27)); tmpi = ((0.55557023302 * r1_27) + (0.831469612303 * i1_27)); c_re(kp[13 * m]) = (r1_26 + tmpr); c_im(kp[13 * m]) = (i1_26 - tmpi); c_re(kp[29 * m]) = (r1_26 - tmpr); c_im(kp[29 * m]) = (i1_26 + tmpi); tmpr = ((0.382683432365 * i1_29) - (0.923879532511 * r1_29)); tmpi = ((0.382683432365 * r1_29) + (0.923879532511 * i1_29)); c_re(kp[14 * m]) = (r1_28 + tmpr); c_im(kp[14 * m]) = (i1_28 - tmpi); c_re(kp[30 * m]) = (r1_28 - tmpr); c_im(kp[30 * m]) = (i1_28 + tmpi); tmpr = ((0.195090322016 * i1_31) - (0.980785280403 * r1_31)); tmpi = ((0.195090322016 * r1_31) + (0.980785280403 * i1_31)); c_re(kp[15 * m]) = (r1_30 + tmpr); c_im(kp[15 * m]) = (i1_30 - tmpi); c_re(kp[31 * m]) = (r1_30 - tmpr); c_im(kp[31 * m]) = (i1_30 + tmpi); } } } else { int ab = (a + b) / 2; fft_twiddle_32_seq(a, ab, in, out, W, nW, nWdn, m); fft_twiddle_32_seq(ab, b, in, out, W, nW, nWdn, m); } } void fft_unshuffle_32(int a, int b, COMPLEX * in, COMPLEX * out, int m) { int i; const COMPLEX *ip; COMPLEX *jp; if ((b - a) < 128) { ip = in + a * 32; for (i = a; i < b; ++i) { jp = out + i; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; } } else { int ab = (a + b) / 2; #pragma omp task untied fft_unshuffle_32(a, ab, in, out, m); #pragma omp task untied fft_unshuffle_32(ab, b, in, out, m); #pragma omp taskwait } } void fft_unshuffle_32_seq(int a, int b, COMPLEX * in, COMPLEX * out, int m) { int i; const COMPLEX *ip; COMPLEX *jp; if ((b - a) < 128) { ip = in + a * 32; for (i = a; i < b; ++i) { jp = out + i; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; jp += 2 * m; jp[0] = ip[0]; jp[m] = ip[1]; ip += 2; } } else { int ab = (a + b) / 2; fft_unshuffle_32_seq(a, ab, in, out, m); fft_unshuffle_32_seq(ab, b, in, out, m); } } /* end of machine-generated code */ /* * Recursive complex FFT on the n complex components of the array in: * basic Cooley-Tukey algorithm, with some improvements for * n power of two. The result is placed in the array out. n is arbitrary. * The algorithm runs in time O(n*(r1 + ... + rk)) where r1, ..., rk * are prime numbers, and r1 * r2 * ... * rk = n. * * n: size of the input * in: pointer to input * out: pointer to output * factors: list of factors of n, precomputed * W: twiddle factors * nW: size of W, that is, size of the original transform * */ void fft_aux(int n, COMPLEX * in, COMPLEX * out, int *factors, COMPLEX * W, int nW) { int r, m; int k; /* special cases */ if (n == 32) { fft_base_32(in, out); return; } if (n == 16) { fft_base_16(in, out); return; } if (n == 8) { fft_base_8(in, out); return; } if (n == 4) { fft_base_4(in, out); return; } if (n == 2) { fft_base_2(in, out); return; } /* * the cases n == 3, n == 5, and maybe 7 should be implemented as well */ r = *factors; m = n / r; if (r < n) { /* * split the DFT of length n into r DFTs of length n/r, and * recurse */ if (r == 32) { #pragma omp task untied fft_unshuffle_32(0, m, in, out, m); } else if (r == 16) { #pragma omp task untied fft_unshuffle_16(0, m, in, out, m); } else if (r == 8) { #pragma omp task untied fft_unshuffle_8(0, m, in, out, m); } else if (r == 4) { #pragma omp task untied fft_unshuffle_4(0, m, in, out, m); } else if (r == 2) { #pragma omp task untied fft_unshuffle_2(0, m, in, out, m); } else unshuffle(0, m, in, out, r, m); #pragma omp taskwait for (k = 0; k < n; k += m) { #pragma omp task untied fft_aux(m, out + k, in + k, factors + 1, W, nW); } #pragma omp taskwait } /* * now multiply by the twiddle factors, and perform m FFTs * of length r */ if (r == 2) { #pragma omp task untied fft_twiddle_2(0, m, in, out, W, nW, nW / n, m); } else if (r == 4) { #pragma omp task untied fft_twiddle_4(0, m, in, out, W, nW, nW / n, m); } else if (r == 8) { #pragma omp task untied fft_twiddle_8(0, m, in, out, W, nW, nW / n, m); } else if (r == 16) { #pragma omp task untied fft_twiddle_16(0, m, in, out, W, nW, nW / n, m); } else if (r == 32) { #pragma omp task untied fft_twiddle_32(0, m, in, out, W, nW, nW / n, m); } else { #pragma omp task untied fft_twiddle_gen(0, m, in, out, W, nW, nW / n, r, m); } #pragma omp taskwait return; } void fft_aux_seq(int n, COMPLEX * in, COMPLEX * out, int *factors, COMPLEX * W, int nW) { int r, m; int k; /* special cases */ if (n == 32) { fft_base_32(in, out); return; } if (n == 16) { fft_base_16(in, out); return; } if (n == 8) { fft_base_8(in, out); return; } if (n == 4) { fft_base_4(in, out); return; } if (n == 2) { fft_base_2(in, out); return; } /* * the cases n == 3, n == 5, and maybe 7 should be implemented as well */ r = *factors; m = n / r; if (r < n) { /* * split the DFT of length n into r DFTs of length n/r, and * recurse */ if (r == 32) fft_unshuffle_32_seq(0, m, in, out, m); else if (r == 16) fft_unshuffle_16_seq(0, m, in, out, m); else if (r == 8) fft_unshuffle_8_seq(0, m, in, out, m); else if (r == 4) fft_unshuffle_4_seq(0, m, in, out, m); else if (r == 2) fft_unshuffle_2_seq(0, m, in, out, m); else unshuffle_seq(0, m, in, out, r, m); for (k = 0; k < n; k += m) { fft_aux_seq(m, out + k, in + k, factors + 1, W, nW); } } /* * now multiply by the twiddle factors, and perform m FFTs * of length r */ if (r == 2) fft_twiddle_2_seq(0, m, in, out, W, nW, nW / n, m); else if (r == 4) fft_twiddle_4_seq(0, m, in, out, W, nW, nW / n, m); else if (r == 8) fft_twiddle_8_seq(0, m, in, out, W, nW, nW / n, m); else if (r == 16) fft_twiddle_16_seq(0, m, in, out, W, nW, nW / n, m); else if (r == 32) fft_twiddle_32_seq(0, m, in, out, W, nW, nW / n, m); else fft_twiddle_gen_seq(0, m, in, out, W, nW, nW / n, r, m); return; } /* * user interface for fft_aux */ void fft(int n, COMPLEX * in, COMPLEX * out) { int factors[40]; /* allows FFTs up to at least 3^40 */ int *p = factors; int l = n; int r; COMPLEX *W; printf("Computing coefficients "); W = (COMPLEX *) malloc((n + 1) * sizeof(COMPLEX)); #pragma omp parallel #pragma omp single #pragma omp task untied compute_w_coefficients(n, 0, n / 2, W); printf(" completed!\n"); /* * find factors of n, first 8, then 4 and then primes in ascending * order */ do { r = factor(l); *p++ = r; l /= r; } while (l > 1); printf("Computing FFT "); #pragma omp parallel #pragma omp single #pragma omp task untied fft_aux(n, in, out, factors, W, n); printf(" completed!\n"); free(W); return; } void fft_seq(int n, COMPLEX * in, COMPLEX * out) { int factors[40]; /* allows FFTs up to at least 3^40 */ int *p = factors; int l = n; int r; COMPLEX *W; W = (COMPLEX *) malloc((n + 1) * sizeof(COMPLEX)); compute_w_coefficients_seq(n, 0, n / 2, W); /* * find factors of n, first 8, then 4 and then primes in ascending * order */ do { r = factor(l); *p++ = r; l /= r; } while (l > 1); fft_aux_seq(n, in, out, factors, W, n); free(W); return; } int test_correctness(int n, COMPLEX *out1, COMPLEX *out2) { int i; double a,d,error = 0.0; for (i = 0; i < n; ++i) { a = sqrt((c_re(out1[i]) - c_re(out2[i])) * (c_re(out1[i]) - c_re(out2[i])) + (c_im(out1[i]) - c_im(out2[i])) * (c_im(out1[i]) - c_im(out2[i]))); d = sqrt(c_re(out2[i]) * c_re(out2[i]) + c_im(out2[i]) * c_im(out2[i])); if (d < -1.0e-10 || d > 1.0e-10) a /= d; if (a > error) error = a; } printf("relative error=%e\n", error); if (error > 1e-3) return -1; // Fail else return 0; // Success } void compute_w_coefficients_tf(int n, int a, int b, COMPLEX * W, tf::Taskflow& flow, tf::Task& sync) { if (b - a < 128) { flow.emplace([&, a, b, n, W](){ double twoPiOverN; int k; REAL s, c; twoPiOverN = 2.0 * 3.1415926535897932384626434 / n; for (k = a; k <= b; ++k) { c = cos(twoPiOverN * k); c_re(W[k]) = c_re(W[n - k]) = c; s = sin(twoPiOverN * k); c_im(W[k]) = -s; c_im(W[n - k]) = s; } }).precede(sync); } else { int ab = (a + b) / 2; compute_w_coefficients_tf(n, a, ab, W, flow, sync); compute_w_coefficients_tf(n, ab + 1, b, W, flow, sync); } } // ---------------------------------------------------------------------------------------- std::optional fft_aux_tf(int n, COMPLEX * in, COMPLEX * out, int *factors, COMPLEX * W, int nW, tf::Taskflow &flow, std::optional task) { int r, m; int k; /* special cases */ if (n == 32) { if(task) return flow.emplace([in, out](){ fft_base_32(in, out); }).gather(*task); else return flow.emplace([in, out](){ fft_base_32(in, out); }); } if (n == 16) { if(task) return flow.emplace([in, out](){ fft_base_16(in, out); }).gather(*task); else return flow.emplace([in, out](){ fft_base_16(in, out); }); } if (n == 8) { if(task) return flow.emplace([in, out](){ fft_base_8(in, out); }).gather(*task); else return flow.emplace([in, out](){ fft_base_8(in, out); }); } if (n == 4) { if(task) return flow.emplace([in, out](){ fft_base_4(in, out); }).gather(*task); else return flow.emplace([in, out](){ fft_base_4(in, out); }); } if (n == 2) { if(task) return flow.emplace([in, out](){ fft_base_2(in, out); }).gather(*task); else return flow.emplace([in, out](){ fft_base_2(in, out); }); } /* * the cases n == 3, n == 5, and maybe 7 should be implemented as well */ r = *factors; m = n / r; assert(r != 0); std::vector> tasks; std::optional t1 = std::nullopt; if (r < n) { /* * split the DFT of length n into r DFTs of length n/r, and * recurse */ if (r == 32) { t1 = flow.emplace([&, m, in, out](){ fft_unshuffle_32(0, m, in, out, m); }); if(task) (*t1).gather(*task); } else if (r == 16) { t1 = flow.emplace([&, m, in, out](){ fft_unshuffle_16(0, m, in, out, m); }); if(task) (*t1).gather(*task); } else if (r == 8) { t1 = flow.emplace([&, m, in, out](){ fft_unshuffle_8(0, m, in, out, m); }); if(task) (*t1).gather(*task); } else if (r == 4) { t1 = flow.emplace([&, m, in, out](){ fft_unshuffle_4(0, m, in, out, m); }); if(task) (*t1).gather(*task); } else if (r == 2) { t1 = flow.emplace([&, m, in, out](){ fft_unshuffle_2(0, m, in, out, m); }); if(task) (*t1).gather(*task); } else { t1 = flow.emplace([&, m, in, out, r](){ unshuffle(0, m, in, out, r, m); }); if(task) (*t1).gather(*task); } for (k = 0; k < n; k += m) { if(t1) tasks.emplace_back(fft_aux_tf(m, out + k, in + k, factors + 1, W, nW, flow, t1)); else tasks.emplace_back(fft_aux_tf(m, out + k, in + k, factors + 1, W, nW, flow, task)); } } auto sync = flow.emplace([](){}); for(auto& t: tasks) { if(t) { (*t).precede(sync); } } if(t1) (*t1).precede(sync); if(task) (*task).precede(sync); /* * now multiply by the twiddle factors, and perform m FFTs * of length r */ std::optional last_task = std::nullopt; if (r == 2) { last_task = flow.emplace([&, m, in, out, W, nW, n]() { fft_twiddle_2(0, m, in, out, W, nW, nW / n, m); }); } else if (r == 4) { last_task = flow.emplace([&, m, in, out, W, nW, n]() { fft_twiddle_4(0, m, in, out, W, nW, nW / n, m); }); } else if (r == 8) { last_task = flow.emplace([&, m, in, out, W, nW, n]() { fft_twiddle_8(0, m, in, out, W, nW, nW / n, m); }); } else if (r == 16) { last_task = flow.emplace([&, m, in, out, W, nW, n]() { fft_twiddle_16(0, m, in, out, W, nW, nW / n, m); }); } else if (r == 32) { last_task = flow.emplace([&, m, in, out, W, nW, n]() { fft_twiddle_32(0, m, in, out, W, nW, nW / n, m); }); } else { last_task = flow.emplace([&, m, in, out, W, nW, n, r]() { fft_twiddle_gen(0, m, in, out, W, nW, nW / n, r, m); }); } if(last_task) { sync.precede(*last_task); return last_task; } return sync; } void fft_tf(int n, COMPLEX * in, COMPLEX * out) { int factors[40]; /* allows FFTs up to at least 3^40 */ int *p = factors; int l = n; int r; COMPLEX *W; //printf("Computing coefficients "); W = (COMPLEX *) malloc((n + 1) * sizeof(COMPLEX)); tf::Taskflow flow; auto sync = flow.emplace([&, W, p]() mutable { int r; int l = n; /* * find factors of n, first 8, then 4 and then primes in ascending * order */ do { r = factor(l); *p++ = r; l /= r; } while (l > 1); }); compute_w_coefficients_tf(n, 0, n / 2, W, flow, sync); tf::Executor executor; //tf::Executor executor; executor.run(flow).wait(); //printf(" completed!\n"); //printf("Computing FFT "); tf::Taskflow flow2; fft_aux_tf(n, in, out, factors, W, n, flow2, std::nullopt); //flow2.dump(std::cout); //exit(0); executor.run(flow2).wait(); printf(" completed!\n"); free(W); return; }