Line data Source code
1 : /*
2 : * Copyright (c) 2008-2012 Stefan Krah. All rights reserved.
3 : *
4 : * Redistribution and use in source and binary forms, with or without
5 : * modification, are permitted provided that the following conditions
6 : * are met:
7 : *
8 : * 1. Redistributions of source code must retain the above copyright
9 : * notice, this list of conditions and the following disclaimer.
10 : *
11 : * 2. Redistributions in binary form must reproduce the above copyright
12 : * notice, this list of conditions and the following disclaimer in the
13 : * documentation and/or other materials provided with the distribution.
14 : *
15 : * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16 : * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 : * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 : * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 : * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 : * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 : * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 : * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 : * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 : * SUCH DAMAGE.
26 : */
27 :
28 :
29 : #include "mpdecimal.h"
30 : #include <stdio.h>
31 : #include <assert.h>
32 : #include "numbertheory.h"
33 : #include "umodarith.h"
34 : #include "crt.h"
35 :
36 :
37 : /* Bignum: Chinese Remainder Theorem, extends the maximum transform length. */
38 :
39 :
40 : /* Multiply P1P2 by v, store result in w. */
41 : static inline void
42 0 : _crt_mulP1P2_3(mpd_uint_t w[3], mpd_uint_t v)
43 : {
44 : mpd_uint_t hi1, hi2, lo;
45 :
46 0 : _mpd_mul_words(&hi1, &lo, LH_P1P2, v);
47 0 : w[0] = lo;
48 :
49 0 : _mpd_mul_words(&hi2, &lo, UH_P1P2, v);
50 0 : lo = hi1 + lo;
51 0 : if (lo < hi1) hi2++;
52 :
53 0 : w[1] = lo;
54 0 : w[2] = hi2;
55 0 : }
56 :
57 : /* Add 3 words from v to w. The result is known to fit in w. */
58 : static inline void
59 0 : _crt_add3(mpd_uint_t w[3], mpd_uint_t v[3])
60 : {
61 : mpd_uint_t carry;
62 : mpd_uint_t s;
63 :
64 0 : s = w[0] + v[0];
65 0 : carry = (s < w[0]);
66 0 : w[0] = s;
67 :
68 0 : s = w[1] + (v[1] + carry);
69 0 : carry = (s < w[1]);
70 0 : w[1] = s;
71 :
72 0 : w[2] = w[2] + (v[2] + carry);
73 0 : }
74 :
75 : /* Divide 3 words in u by v, store result in w, return remainder. */
76 : static inline mpd_uint_t
77 0 : _crt_div3(mpd_uint_t *w, const mpd_uint_t *u, mpd_uint_t v)
78 : {
79 0 : mpd_uint_t r1 = u[2];
80 : mpd_uint_t r2;
81 :
82 0 : if (r1 < v) {
83 0 : w[2] = 0;
84 : }
85 : else {
86 0 : _mpd_div_word(&w[2], &r1, u[2], v); /* GCOV_NOT_REACHED */
87 : }
88 :
89 0 : _mpd_div_words(&w[1], &r2, r1, u[1], v);
90 0 : _mpd_div_words(&w[0], &r1, r2, u[0], v);
91 :
92 0 : return r1;
93 : }
94 :
95 :
96 : /*
97 : * Chinese Remainder Theorem:
98 : * Algorithm from Joerg Arndt, "Matters Computational",
99 : * Chapter 37.4.1 [http://www.jjj.de/fxt/]
100 : *
101 : * See also Knuth, TAOCP, Volume 2, 4.3.2, exercise 7.
102 : */
103 :
104 : /*
105 : * CRT with carry: x1, x2, x3 contain numbers modulo p1, p2, p3. For each
106 : * triple of members of the arrays, find the unique z modulo p1*p2*p3, with
107 : * zmax = p1*p2*p3 - 1.
108 : *
109 : * In each iteration of the loop, split z into result[i] = z % MPD_RADIX
110 : * and carry = z / MPD_RADIX. Let N be the size of carry[] and cmax the
111 : * maximum carry.
112 : *
113 : * Limits for the 32-bit build:
114 : *
115 : * N = 2**96
116 : * cmax = 7711435591312380274
117 : *
118 : * Limits for the 64 bit build:
119 : *
120 : * N = 2**192
121 : * cmax = 627710135393475385904124401220046371710
122 : *
123 : * The following statements hold for both versions:
124 : *
125 : * 1) cmax + zmax < N, so the addition does not overflow.
126 : *
127 : * 2) (cmax + zmax) / MPD_RADIX == cmax.
128 : *
129 : * 3) If c <= cmax, then c_next = (c + zmax) / MPD_RADIX <= cmax.
130 : */
131 : void
132 0 : crt3(mpd_uint_t *x1, mpd_uint_t *x2, mpd_uint_t *x3, mpd_size_t rsize)
133 : {
134 0 : mpd_uint_t p1 = mpd_moduli[P1];
135 : mpd_uint_t umod;
136 : #ifdef PPRO
137 : double dmod;
138 : uint32_t dinvmod[3];
139 : #endif
140 : mpd_uint_t a1, a2, a3;
141 : mpd_uint_t s;
142 : mpd_uint_t z[3], t[3];
143 0 : mpd_uint_t carry[3] = {0,0,0};
144 : mpd_uint_t hi, lo;
145 : mpd_size_t i;
146 :
147 0 : for (i = 0; i < rsize; i++) {
148 :
149 0 : a1 = x1[i];
150 0 : a2 = x2[i];
151 0 : a3 = x3[i];
152 :
153 0 : SETMODULUS(P2);
154 0 : s = ext_submod(a2, a1, umod);
155 0 : s = MULMOD(s, INV_P1_MOD_P2);
156 :
157 0 : _mpd_mul_words(&hi, &lo, s, p1);
158 0 : lo = lo + a1;
159 0 : if (lo < a1) hi++;
160 :
161 0 : SETMODULUS(P3);
162 0 : s = dw_submod(a3, hi, lo, umod);
163 0 : s = MULMOD(s, INV_P1P2_MOD_P3);
164 :
165 0 : z[0] = lo;
166 0 : z[1] = hi;
167 0 : z[2] = 0;
168 :
169 0 : _crt_mulP1P2_3(t, s);
170 0 : _crt_add3(z, t);
171 0 : _crt_add3(carry, z);
172 :
173 0 : x1[i] = _crt_div3(carry, carry, MPD_RADIX);
174 : }
175 :
176 : assert(carry[0] == 0 && carry[1] == 0 && carry[2] == 0);
177 0 : }
178 :
179 :
|