Line data Source code
1 : /*
2 : * Copyright (c) 2008-2012 Stefan Krah. All rights reserved.
3 : *
4 : * Redistribution and use in source and binary forms, with or without
5 : * modification, are permitted provided that the following conditions
6 : * are met:
7 : *
8 : * 1. Redistributions of source code must retain the above copyright
9 : * notice, this list of conditions and the following disclaimer.
10 : *
11 : * 2. Redistributions in binary form must reproduce the above copyright
12 : * notice, this list of conditions and the following disclaimer in the
13 : * documentation and/or other materials provided with the distribution.
14 : *
15 : * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16 : * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 : * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 : * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 : * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 : * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 : * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 : * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 : * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 : * SUCH DAMAGE.
26 : */
27 :
28 :
29 : #include "mpdecimal.h"
30 : #include <stdio.h>
31 : #include <stdlib.h>
32 : #include <assert.h>
33 : #include "bits.h"
34 : #include "difradix2.h"
35 : #include "numbertheory.h"
36 : #include "transpose.h"
37 : #include "umodarith.h"
38 : #include "sixstep.h"
39 :
40 :
41 : /* Bignum: Cache efficient Matrix Fourier Transform for arrays of the
42 : form 2**n (See literature/six-step.txt). */
43 :
44 :
45 : /* forward transform with sign = -1 */
46 : int
47 0 : six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
48 : {
49 : struct fnt_params *tparams;
50 : mpd_size_t log2n, C, R;
51 : mpd_uint_t kernel;
52 : mpd_uint_t umod;
53 : #ifdef PPRO
54 : double dmod;
55 : uint32_t dinvmod[3];
56 : #endif
57 : mpd_uint_t *x, w0, w1, wstep;
58 : mpd_size_t i, k;
59 :
60 :
61 : assert(ispower2(n));
62 : assert(n >= 16);
63 : assert(n <= MPD_MAXTRANSFORM_2N);
64 :
65 0 : log2n = mpd_bsr(n);
66 0 : C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */
67 0 : R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */
68 :
69 :
70 : /* Transpose the matrix. */
71 0 : if (!transpose_pow2(a, R, C)) {
72 0 : return 0;
73 : }
74 :
75 : /* Length R transform on the rows. */
76 0 : if ((tparams = _mpd_init_fnt_params(R, -1, modnum)) == NULL) {
77 0 : return 0;
78 : }
79 0 : for (x = a; x < a+n; x += R) {
80 0 : fnt_dif2(x, R, tparams);
81 : }
82 :
83 : /* Transpose the matrix. */
84 0 : if (!transpose_pow2(a, C, R)) {
85 0 : mpd_free(tparams);
86 0 : return 0;
87 : }
88 :
89 : /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
90 0 : SETMODULUS(modnum);
91 0 : kernel = _mpd_getkernel(n, -1, modnum);
92 0 : for (i = 1; i < R; i++) {
93 0 : w0 = 1; /* r**(i*0): initial value for k=0 */
94 0 : w1 = POWMOD(kernel, i); /* r**(i*1): initial value for k=1 */
95 0 : wstep = MULMOD(w1, w1); /* r**(2*i) */
96 0 : for (k = 0; k < C; k += 2) {
97 0 : mpd_uint_t x0 = a[i*C+k];
98 0 : mpd_uint_t x1 = a[i*C+k+1];
99 0 : MULMOD2(&x0, w0, &x1, w1);
100 0 : MULMOD2C(&w0, &w1, wstep); /* r**(i*(k+2)) = r**(i*k) * r**(2*i) */
101 0 : a[i*C+k] = x0;
102 0 : a[i*C+k+1] = x1;
103 : }
104 : }
105 :
106 : /* Length C transform on the rows. */
107 0 : if (C != R) {
108 0 : mpd_free(tparams);
109 0 : if ((tparams = _mpd_init_fnt_params(C, -1, modnum)) == NULL) {
110 0 : return 0;
111 : }
112 : }
113 0 : for (x = a; x < a+n; x += C) {
114 0 : fnt_dif2(x, C, tparams);
115 : }
116 0 : mpd_free(tparams);
117 :
118 : #if 0
119 : /* An unordered transform is sufficient for convolution. */
120 : /* Transpose the matrix. */
121 : if (!transpose_pow2(a, R, C)) {
122 : return 0;
123 : }
124 : #endif
125 :
126 0 : return 1;
127 : }
128 :
129 :
130 : /* reverse transform, sign = 1 */
131 : int
132 0 : inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
133 : {
134 : struct fnt_params *tparams;
135 : mpd_size_t log2n, C, R;
136 : mpd_uint_t kernel;
137 : mpd_uint_t umod;
138 : #ifdef PPRO
139 : double dmod;
140 : uint32_t dinvmod[3];
141 : #endif
142 : mpd_uint_t *x, w0, w1, wstep;
143 : mpd_size_t i, k;
144 :
145 :
146 : assert(ispower2(n));
147 : assert(n >= 16);
148 : assert(n <= MPD_MAXTRANSFORM_2N);
149 :
150 0 : log2n = mpd_bsr(n);
151 0 : C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */
152 0 : R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */
153 :
154 :
155 : #if 0
156 : /* An unordered transform is sufficient for convolution. */
157 : /* Transpose the matrix, producing an R*C matrix. */
158 : if (!transpose_pow2(a, C, R)) {
159 : return 0;
160 : }
161 : #endif
162 :
163 : /* Length C transform on the rows. */
164 0 : if ((tparams = _mpd_init_fnt_params(C, 1, modnum)) == NULL) {
165 0 : return 0;
166 : }
167 0 : for (x = a; x < a+n; x += C) {
168 0 : fnt_dif2(x, C, tparams);
169 : }
170 :
171 : /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
172 0 : SETMODULUS(modnum);
173 0 : kernel = _mpd_getkernel(n, 1, modnum);
174 0 : for (i = 1; i < R; i++) {
175 0 : w0 = 1;
176 0 : w1 = POWMOD(kernel, i);
177 0 : wstep = MULMOD(w1, w1);
178 0 : for (k = 0; k < C; k += 2) {
179 0 : mpd_uint_t x0 = a[i*C+k];
180 0 : mpd_uint_t x1 = a[i*C+k+1];
181 0 : MULMOD2(&x0, w0, &x1, w1);
182 0 : MULMOD2C(&w0, &w1, wstep);
183 0 : a[i*C+k] = x0;
184 0 : a[i*C+k+1] = x1;
185 : }
186 : }
187 :
188 : /* Transpose the matrix. */
189 0 : if (!transpose_pow2(a, R, C)) {
190 0 : mpd_free(tparams);
191 0 : return 0;
192 : }
193 :
194 : /* Length R transform on the rows. */
195 0 : if (R != C) {
196 0 : mpd_free(tparams);
197 0 : if ((tparams = _mpd_init_fnt_params(R, 1, modnum)) == NULL) {
198 0 : return 0;
199 : }
200 : }
201 0 : for (x = a; x < a+n; x += R) {
202 0 : fnt_dif2(x, R, tparams);
203 : }
204 0 : mpd_free(tparams);
205 :
206 : /* Transpose the matrix. */
207 0 : if (!transpose_pow2(a, C, R)) {
208 0 : return 0;
209 : }
210 :
211 0 : return 1;
212 : }
213 :
214 :
|