Line data Source code
1 : /*
2 : * Copyright (c) 2008-2012 Stefan Krah. All rights reserved.
3 : *
4 : * Redistribution and use in source and binary forms, with or without
5 : * modification, are permitted provided that the following conditions
6 : * are met:
7 : *
8 : * 1. Redistributions of source code must retain the above copyright
9 : * notice, this list of conditions and the following disclaimer.
10 : *
11 : * 2. Redistributions in binary form must reproduce the above copyright
12 : * notice, this list of conditions and the following disclaimer in the
13 : * documentation and/or other materials provided with the distribution.
14 : *
15 : * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16 : * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 : * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 : * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 : * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 : * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 : * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 : * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 : * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 : * SUCH DAMAGE.
26 : */
27 :
28 :
29 : #include "mpdecimal.h"
30 : #include <stdio.h>
31 : #include <stdlib.h>
32 : #include <string.h>
33 : #include <limits.h>
34 : #include <assert.h>
35 : #include "bits.h"
36 : #include "constants.h"
37 : #include "typearith.h"
38 : #include "transpose.h"
39 :
40 :
41 : #define BUFSIZE 4096
42 : #define SIDE 128
43 :
44 :
45 : /* Bignum: The transpose functions are used for very large transforms
46 : in sixstep.c and fourstep.c. */
47 :
48 :
49 : /* Definition of the matrix transpose */
50 : void
51 0 : std_trans(mpd_uint_t dest[], mpd_uint_t src[], mpd_size_t rows, mpd_size_t cols)
52 : {
53 : mpd_size_t idest, isrc;
54 : mpd_size_t r, c;
55 :
56 0 : for (r = 0; r < rows; r++) {
57 0 : isrc = r * cols;
58 0 : idest = r;
59 0 : for (c = 0; c < cols; c++) {
60 0 : dest[idest] = src[isrc];
61 0 : isrc += 1;
62 0 : idest += rows;
63 : }
64 : }
65 0 : }
66 :
67 : /*
68 : * Swap half-rows of 2^n * (2*2^n) matrix.
69 : * FORWARD_CYCLE: even/odd permutation of the halfrows.
70 : * BACKWARD_CYCLE: reverse the even/odd permutation.
71 : */
72 : static int
73 0 : swap_halfrows_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols, int dir)
74 : {
75 : mpd_uint_t buf1[BUFSIZE];
76 : mpd_uint_t buf2[BUFSIZE];
77 : mpd_uint_t *readbuf, *writebuf, *hp;
78 : mpd_size_t *done, dbits;
79 0 : mpd_size_t b = BUFSIZE, stride;
80 : mpd_size_t hn, hmax; /* halfrow number */
81 0 : mpd_size_t m, r=0;
82 : mpd_size_t offset;
83 : mpd_size_t next;
84 :
85 :
86 : assert(cols == mul_size_t(2, rows));
87 :
88 0 : if (dir == FORWARD_CYCLE) {
89 0 : r = rows;
90 : }
91 0 : else if (dir == BACKWARD_CYCLE) {
92 0 : r = 2;
93 : }
94 : else {
95 0 : abort(); /* GCOV_NOT_REACHED */
96 : }
97 :
98 0 : m = cols - 1;
99 0 : hmax = rows; /* cycles start at odd halfrows */
100 0 : dbits = 8 * sizeof *done;
101 0 : if ((done = mpd_calloc(hmax/(sizeof *done) + 1, sizeof *done)) == NULL) {
102 0 : return 0;
103 : }
104 :
105 0 : for (hn = 1; hn <= hmax; hn += 2) {
106 :
107 0 : if (done[hn/dbits] & mpd_bits[hn%dbits]) {
108 0 : continue;
109 : }
110 :
111 0 : readbuf = buf1; writebuf = buf2;
112 :
113 0 : for (offset = 0; offset < cols/2; offset += b) {
114 :
115 0 : stride = (offset + b < cols/2) ? b : cols/2-offset;
116 :
117 0 : hp = matrix + hn*cols/2;
118 0 : memcpy(readbuf, hp+offset, stride*(sizeof *readbuf));
119 0 : pointerswap(&readbuf, &writebuf);
120 :
121 0 : next = mulmod_size_t(hn, r, m);
122 0 : hp = matrix + next*cols/2;
123 :
124 0 : while (next != hn) {
125 :
126 0 : memcpy(readbuf, hp+offset, stride*(sizeof *readbuf));
127 0 : memcpy(hp+offset, writebuf, stride*(sizeof *writebuf));
128 0 : pointerswap(&readbuf, &writebuf);
129 :
130 0 : done[next/dbits] |= mpd_bits[next%dbits];
131 :
132 0 : next = mulmod_size_t(next, r, m);
133 0 : hp = matrix + next*cols/2;
134 :
135 : }
136 :
137 0 : memcpy(hp+offset, writebuf, stride*(sizeof *writebuf));
138 :
139 0 : done[hn/dbits] |= mpd_bits[hn%dbits];
140 : }
141 : }
142 :
143 0 : mpd_free(done);
144 0 : return 1;
145 : }
146 :
147 : /* In-place transpose of a square matrix */
148 : static inline void
149 0 : squaretrans(mpd_uint_t *buf, mpd_size_t cols)
150 : {
151 : mpd_uint_t tmp;
152 : mpd_size_t idest, isrc;
153 : mpd_size_t r, c;
154 :
155 0 : for (r = 0; r < cols; r++) {
156 0 : c = r+1;
157 0 : isrc = r*cols + c;
158 0 : idest = c*cols + r;
159 0 : for (c = r+1; c < cols; c++) {
160 0 : tmp = buf[isrc];
161 0 : buf[isrc] = buf[idest];
162 0 : buf[idest] = tmp;
163 0 : isrc += 1;
164 0 : idest += cols;
165 : }
166 : }
167 0 : }
168 :
169 : /*
170 : * Transpose 2^n * 2^n matrix. For cache efficiency, the matrix is split into
171 : * square blocks with side length 'SIDE'. First, the blocks are transposed,
172 : * then a square tranposition is done on each individual block.
173 : */
174 : static void
175 0 : squaretrans_pow2(mpd_uint_t *matrix, mpd_size_t size)
176 : {
177 : mpd_uint_t buf1[SIDE*SIDE];
178 : mpd_uint_t buf2[SIDE*SIDE];
179 : mpd_uint_t *to, *from;
180 0 : mpd_size_t b = size;
181 : mpd_size_t r, c;
182 : mpd_size_t i;
183 :
184 0 : while (b > SIDE) b >>= 1;
185 :
186 0 : for (r = 0; r < size; r += b) {
187 :
188 0 : for (c = r; c < size; c += b) {
189 :
190 0 : from = matrix + r*size + c;
191 0 : to = buf1;
192 0 : for (i = 0; i < b; i++) {
193 0 : memcpy(to, from, b*(sizeof *to));
194 0 : from += size;
195 0 : to += b;
196 : }
197 0 : squaretrans(buf1, b);
198 :
199 0 : if (r == c) {
200 0 : to = matrix + r*size + c;
201 0 : from = buf1;
202 0 : for (i = 0; i < b; i++) {
203 0 : memcpy(to, from, b*(sizeof *to));
204 0 : from += b;
205 0 : to += size;
206 : }
207 0 : continue;
208 : }
209 : else {
210 0 : from = matrix + c*size + r;
211 0 : to = buf2;
212 0 : for (i = 0; i < b; i++) {
213 0 : memcpy(to, from, b*(sizeof *to));
214 0 : from += size;
215 0 : to += b;
216 : }
217 0 : squaretrans(buf2, b);
218 :
219 0 : to = matrix + c*size + r;
220 0 : from = buf1;
221 0 : for (i = 0; i < b; i++) {
222 0 : memcpy(to, from, b*(sizeof *to));
223 0 : from += b;
224 0 : to += size;
225 : }
226 :
227 0 : to = matrix + r*size + c;
228 0 : from = buf2;
229 0 : for (i = 0; i < b; i++) {
230 0 : memcpy(to, from, b*(sizeof *to));
231 0 : from += b;
232 0 : to += size;
233 : }
234 : }
235 : }
236 : }
237 :
238 0 : }
239 :
240 : /*
241 : * In-place transposition of a 2^n x 2^n or a 2^n x (2*2^n)
242 : * or a (2*2^n) x 2^n matrix.
243 : */
244 : int
245 0 : transpose_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols)
246 : {
247 0 : mpd_size_t size = mul_size_t(rows, cols);
248 :
249 : assert(ispower2(rows));
250 : assert(ispower2(cols));
251 :
252 0 : if (cols == rows) {
253 0 : squaretrans_pow2(matrix, rows);
254 : }
255 0 : else if (cols == mul_size_t(2, rows)) {
256 0 : if (!swap_halfrows_pow2(matrix, rows, cols, FORWARD_CYCLE)) {
257 0 : return 0;
258 : }
259 0 : squaretrans_pow2(matrix, rows);
260 0 : squaretrans_pow2(matrix+(size/2), rows);
261 : }
262 0 : else if (rows == mul_size_t(2, cols)) {
263 0 : squaretrans_pow2(matrix, cols);
264 0 : squaretrans_pow2(matrix+(size/2), cols);
265 0 : if (!swap_halfrows_pow2(matrix, cols, rows, BACKWARD_CYCLE)) {
266 0 : return 0;
267 : }
268 : }
269 : else {
270 0 : abort(); /* GCOV_NOT_REACHED */
271 : }
272 :
273 0 : return 1;
274 : }
275 :
276 :
|