LCOV - code coverage report
Current view: top level - libreoffice/workdir/unxlngi6.pro/UnpackedTarball/python3/Modules/_decimal/libmpdec - fourstep.c (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 0 70 0.0 %
Date: 2012-12-17 Functions: 0 3 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2008-2012 Stefan Krah. All rights reserved.
       3             :  *
       4             :  * Redistribution and use in source and binary forms, with or without
       5             :  * modification, are permitted provided that the following conditions
       6             :  * are met:
       7             :  *
       8             :  * 1. Redistributions of source code must retain the above copyright
       9             :  *    notice, this list of conditions and the following disclaimer.
      10             :  *
      11             :  * 2. Redistributions in binary form must reproduce the above copyright
      12             :  *    notice, this list of conditions and the following disclaimer in the
      13             :  *    documentation and/or other materials provided with the distribution.
      14             :  *
      15             :  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
      16             :  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      17             :  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      18             :  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
      19             :  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
      20             :  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
      21             :  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      22             :  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      23             :  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      24             :  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      25             :  * SUCH DAMAGE.
      26             :  */
      27             : 
      28             : 
      29             : #include "mpdecimal.h"
      30             : #include <assert.h>
      31             : #include "numbertheory.h"
      32             : #include "sixstep.h"
      33             : #include "transpose.h"
      34             : #include "umodarith.h"
      35             : #include "fourstep.h"
      36             : 
      37             : 
      38             : /* Bignum: Cache efficient Matrix Fourier Transform for arrays of the
      39             :    form 3 * 2**n (See literature/matrix-transform.txt). */
      40             : 
      41             : 
      42             : #ifndef PPRO
      43             : static inline void
      44             : std_size3_ntt(mpd_uint_t *x1, mpd_uint_t *x2, mpd_uint_t *x3,
      45             :               mpd_uint_t w3table[3], mpd_uint_t umod)
      46             : {
      47             :     mpd_uint_t r1, r2;
      48             :     mpd_uint_t w;
      49             :     mpd_uint_t s, tmp;
      50             : 
      51             : 
      52             :     /* k = 0 -> w = 1 */
      53             :     s = *x1;
      54             :     s = addmod(s, *x2, umod);
      55             :     s = addmod(s, *x3, umod);
      56             : 
      57             :     r1 = s;
      58             : 
      59             :     /* k = 1 */
      60             :     s = *x1;
      61             : 
      62             :     w = w3table[1];
      63             :     tmp = MULMOD(*x2, w);
      64             :     s = addmod(s, tmp, umod);
      65             : 
      66             :     w = w3table[2];
      67             :     tmp = MULMOD(*x3, w);
      68             :     s = addmod(s, tmp, umod);
      69             : 
      70             :     r2 = s;
      71             : 
      72             :     /* k = 2 */
      73             :     s = *x1;
      74             : 
      75             :     w = w3table[2];
      76             :     tmp = MULMOD(*x2, w);
      77             :     s = addmod(s, tmp, umod);
      78             : 
      79             :     w = w3table[1];
      80             :     tmp = MULMOD(*x3, w);
      81             :     s = addmod(s, tmp, umod);
      82             : 
      83             :     *x3 = s;
      84             :     *x2 = r2;
      85             :     *x1 = r1;
      86             : }
      87             : #else /* PPRO */
      88             : static inline void
      89           0 : ppro_size3_ntt(mpd_uint_t *x1, mpd_uint_t *x2, mpd_uint_t *x3, mpd_uint_t w3table[3],
      90             :                mpd_uint_t umod, double *dmod, uint32_t dinvmod[3])
      91             : {
      92             :     mpd_uint_t r1, r2;
      93             :     mpd_uint_t w;
      94             :     mpd_uint_t s, tmp;
      95             : 
      96             : 
      97             :     /* k = 0 -> w = 1 */
      98           0 :     s = *x1;
      99           0 :     s = addmod(s, *x2, umod);
     100           0 :     s = addmod(s, *x3, umod);
     101             : 
     102           0 :     r1 = s;
     103             : 
     104             :     /* k = 1 */
     105           0 :     s = *x1;
     106             : 
     107           0 :     w = w3table[1];
     108           0 :     tmp = ppro_mulmod(*x2, w, dmod, dinvmod);
     109           0 :     s = addmod(s, tmp, umod);
     110             : 
     111           0 :     w = w3table[2];
     112           0 :     tmp = ppro_mulmod(*x3, w, dmod, dinvmod);
     113           0 :     s = addmod(s, tmp, umod);
     114             : 
     115           0 :     r2 = s;
     116             : 
     117             :     /* k = 2 */
     118           0 :     s = *x1;
     119             : 
     120           0 :     w = w3table[2];
     121           0 :     tmp = ppro_mulmod(*x2, w, dmod, dinvmod);
     122           0 :     s = addmod(s, tmp, umod);
     123             : 
     124           0 :     w = w3table[1];
     125           0 :     tmp = ppro_mulmod(*x3, w, dmod, dinvmod);
     126           0 :     s = addmod(s, tmp, umod);
     127             : 
     128           0 :     *x3 = s;
     129           0 :     *x2 = r2;
     130           0 :     *x1 = r1;
     131           0 : }
     132             : #endif
     133             : 
     134             : 
     135             : /* forward transform, sign = -1; transform length = 3 * 2**n */
     136             : int
     137           0 : four_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
     138             : {
     139           0 :     mpd_size_t R = 3; /* number of rows */
     140           0 :     mpd_size_t C = n / 3; /* number of columns */
     141             :     mpd_uint_t w3table[3];
     142             :     mpd_uint_t kernel, w0, w1, wstep;
     143             :     mpd_uint_t *s, *p0, *p1, *p2;
     144             :     mpd_uint_t umod;
     145             : #ifdef PPRO
     146             :     double dmod;
     147             :     uint32_t dinvmod[3];
     148             : #endif
     149             :     mpd_size_t i, k;
     150             : 
     151             : 
     152             :     assert(n >= 48);
     153             :     assert(n <= 3*MPD_MAXTRANSFORM_2N);
     154             : 
     155             : 
     156             :     /* Length R transform on the columns. */
     157           0 :     SETMODULUS(modnum);
     158           0 :     _mpd_init_w3table(w3table, -1, modnum);
     159           0 :     for (p0=a, p1=p0+C, p2=p0+2*C; p0<a+C; p0++,p1++,p2++) {
     160             : 
     161           0 :         SIZE3_NTT(p0, p1, p2, w3table);
     162             :     }
     163             : 
     164             :     /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
     165           0 :     kernel = _mpd_getkernel(n, -1, modnum);
     166           0 :     for (i = 1; i < R; i++) {
     167           0 :         w0 = 1;                  /* r**(i*0): initial value for k=0 */
     168           0 :         w1 = POWMOD(kernel, i);  /* r**(i*1): initial value for k=1 */
     169           0 :         wstep = MULMOD(w1, w1);  /* r**(2*i) */
     170           0 :         for (k = 0; k < C-1; k += 2) {
     171           0 :             mpd_uint_t x0 = a[i*C+k];
     172           0 :             mpd_uint_t x1 = a[i*C+k+1];
     173           0 :             MULMOD2(&x0, w0, &x1, w1);
     174           0 :             MULMOD2C(&w0, &w1, wstep);  /* r**(i*(k+2)) = r**(i*k) * r**(2*i) */
     175           0 :             a[i*C+k] = x0;
     176           0 :             a[i*C+k+1] = x1;
     177             :         }
     178             :     }
     179             : 
     180             :     /* Length C transform on the rows. */
     181           0 :     for (s = a; s < a+n; s += C) {
     182           0 :         if (!six_step_fnt(s, C, modnum)) {
     183           0 :             return 0;
     184             :         }
     185             :     }
     186             : 
     187             : #if 0
     188             :     /* An unordered transform is sufficient for convolution. */
     189             :     /* Transpose the matrix. */
     190             :     transpose_3xpow2(a, R, C);
     191             : #endif
     192             : 
     193           0 :     return 1;
     194             : }
     195             : 
     196             : /* backward transform, sign = 1; transform length = 3 * 2**n */
     197             : int
     198           0 : inv_four_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
     199             : {
     200           0 :     mpd_size_t R = 3; /* number of rows */
     201           0 :     mpd_size_t C = n / 3; /* number of columns */
     202             :     mpd_uint_t w3table[3];
     203             :     mpd_uint_t kernel, w0, w1, wstep;
     204             :     mpd_uint_t *s, *p0, *p1, *p2;
     205             :     mpd_uint_t umod;
     206             : #ifdef PPRO
     207             :     double dmod;
     208             :     uint32_t dinvmod[3];
     209             : #endif
     210             :     mpd_size_t i, k;
     211             : 
     212             : 
     213             :     assert(n >= 48);
     214             :     assert(n <= 3*MPD_MAXTRANSFORM_2N);
     215             : 
     216             : 
     217             : #if 0
     218             :     /* An unordered transform is sufficient for convolution. */
     219             :     /* Transpose the matrix, producing an R*C matrix. */
     220             :     transpose_3xpow2(a, C, R);
     221             : #endif
     222             : 
     223             :     /* Length C transform on the rows. */
     224           0 :     for (s = a; s < a+n; s += C) {
     225           0 :         if (!inv_six_step_fnt(s, C, modnum)) {
     226           0 :             return 0;
     227             :         }
     228             :     }
     229             : 
     230             :     /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
     231           0 :     SETMODULUS(modnum);
     232           0 :     kernel = _mpd_getkernel(n, 1, modnum);
     233           0 :     for (i = 1; i < R; i++) {
     234           0 :         w0 = 1;
     235           0 :         w1 = POWMOD(kernel, i);
     236           0 :         wstep = MULMOD(w1, w1);
     237           0 :         for (k = 0; k < C; k += 2) {
     238           0 :             mpd_uint_t x0 = a[i*C+k];
     239           0 :             mpd_uint_t x1 = a[i*C+k+1];
     240           0 :             MULMOD2(&x0, w0, &x1, w1);
     241           0 :             MULMOD2C(&w0, &w1, wstep);
     242           0 :             a[i*C+k] = x0;
     243           0 :             a[i*C+k+1] = x1;
     244             :         }
     245             :     }
     246             : 
     247             :     /* Length R transform on the columns. */
     248           0 :     _mpd_init_w3table(w3table, 1, modnum);
     249           0 :     for (p0=a, p1=p0+C, p2=p0+2*C; p0<a+C; p0++,p1++,p2++) {
     250             : 
     251           0 :         SIZE3_NTT(p0, p1, p2, w3table);
     252             :     }
     253             : 
     254           0 :     return 1;
     255             : }
     256             : 
     257             : 

Generated by: LCOV version 1.10