#include <stdio.h>
#include <stdlib.h>
#include "fundefs.h"

long flops; // -- Global flop counter.

int main()
/*----------------------------------------------------------------------
 **********************************************************************
  *** This program is part of the Euroben Benchmark                  ***
  ***                                                                ***
  *** Copyright: EuroBen Group p/o                                   ***
  ***            NCF/HPC Research                                    ***
  ***            L.J. Costerstraat 5                                 ***
  ***            6827 AR, Arnhem                                     ***
  ***            The Netherlands                                     ***
  ***                                                                ***
  *** Author of this program: Aad van der Steen                      ***
  *** Date                    Autumn 2010                            ***
  **********************************************************************
  -- Purpose of program mod2cr
  ----------------------------
  -- Solver for sparse linear systems with iterative methods and
     two types of preconditioners. The systems are not actually
     solved but rather a predifined number of iterations is performed
     to assess the speed of the solver-preconditioner combination.
     Two types of systems are considered:
     1. - Systems stemming from 3-D finite difference problems
          resulting in 7-banded matrices. We consider only the 
          symmetric type in this program 'mod2cr'.
     2. - Finite element type, irregularly filled matrices.
          They are stored in CRS format. These systems are addressed
          in ANOTHER program: 'mod2ci'. 
 
  -- In this Program 'mod2cr' we address the regular, banded type
     of problem.
 
     Solvers used:
     -  For the symmetric banded systems in 'mod2cr': CG with ILU(0)
        and polynomial preconditioning.
     -  For irregular systems in 'mod2ci': RGMRES and TFQMR with
        polynomial preconditioning.
  ---------------------------------------------------------------------*/
{ 
   double       **areg, **aorg, *b, *borg, *fac, *x, *q;
   double       corr, mflops, res, time;
   double       zero = 0.0, one = 1.0;
   int const    m = 1, maxit = 50, wid = 4;
   double       conv[maxit], gamma[m+1];
   double const tol = 1.0e-6;
   int          i, its, l, nrep;
   int          ntot, n1, n2, n3;
   FILE         *inl;
// ------------------------------------------------------------------------
// -- Do 7-band case with ILU(0) preconditioning (with cg1).

   state( "mod2cr" );
   prthead( "ILU(0)" );
   inl = fopen( "mod2cr.in", "r" );
   while( ( fscanf( inl, "%d%d%d%d\n", &n1, &n2, &n3, &nrep ) != EOF ) ) {
      ntot = n1*n2*n3;
      areg = makmat( ntot, wid );
      aorg = makmat( ntot, wid );
      b    = calloc( ntot, sizeof(double) );
      borg = calloc( ntot, sizeof(double) );
      fac  = calloc( ntot, sizeof(double) );
      x    = calloc( ntot, sizeof(double) );
      q    = calloc( ntot, sizeof(double) );
      sym7gen( ntot, aorg, borg );
      flops = 0;
      setval( ntot, q, one );
      time = cclock();
      for( i = 0; i < nrep; i++ ) {
         setval( ntot, x, zero );
         for( l = 0; l < ntot; ++l ) {
            areg[l][0] = aorg[l][0]; areg[l][1] = aorg[l][1];
            areg[l][2] = aorg[l][2]; areg[l][3] = aorg[l][3];
            b[l]       = borg[l];
         }
         sym7pre( n1, n2, n3, areg, b, fac );
         cg1( n1, n2, n3, m, areg, b, x, q, gamma, maxit, conv, &its, tol );
         for( l = 0; l < ntot; ++l ) {
            x[l] = x[l]*fac[l];
         }
         flops += ntot;
      }
      time = cclock() - time;
      corr = cclock();
      for( i = 0; i < nrep; i++ ) {
         setval( ntot, x, zero );
         for( l = 0; l < ntot; ++l ) {
            areg[l][0] = aorg[l][0]; areg[l][1] = aorg[l][1];
            areg[l][2] = aorg[l][2]; areg[l][3] = aorg[l][3];
            b[l]       = borg[l];
         }
      }
      time = time - cclock() + corr;
      res  = conv[its-1];
      mflops = 1.0e-6*(double)flops/time;
      time = time/nrep;
      prtspeed( n1, n2, n3, time, mflops, res );
      delmat( m, areg );
      delmat( l, aorg );
      free( b ); free( fac ); free( q ); free( x );
   }
   printf( "--------------------------------------------------------------\n" );
   printf( "Ran OK\n\n" );
// ----------------------------------------------------------------------------
// -- Here starts the solver with polynomial preconditioning.

   prthead( "polynomial" );
   pcoefs( m, gamma );
   rewind( inl );
   while( ( fscanf( inl, "%d%d%d%d\n", &n1, &n2, &n3, &nrep ) != EOF ) ) {
      ntot = n1*n2*n3;
      areg = makmat( ntot, wid );
      aorg = makmat( ntot, wid );
      b    = calloc( ntot, sizeof(double) );
      borg = calloc( ntot, sizeof(double) );
      fac  = calloc( ntot, sizeof(double) );
      x    = calloc( ntot, sizeof(double) );
      q    = calloc( ntot, sizeof(double) );
      sym7gen( ntot, aorg, borg );
      flops = 0;
      setval( ntot, q, one );
      time = cclock();
      for( i = 0; i < nrep; i++ ) {
         setval( ntot, x, zero );
         for( l = 0; l < ntot; ++l ) {
            areg[l][0] = aorg[l][0]; areg[l][1] = aorg[l][1];
            areg[l][2] = aorg[l][2]; areg[l][3] = aorg[l][3];
            b[l]       = borg[l];
         }
         sym7pre( n1, n2, n3, areg, b, fac );
         cg2( n1, n2, n3, m, areg, b, x, q, gamma, maxit, conv, &its, tol );
         for( l = 0; l < ntot; ++l ) {
            x[l] = x[l]*fac[l];
         }
         flops += ntot;
      }
      time = cclock() - time;
      corr = cclock();
      for( i = 0; i < nrep; i++ ) {
         setval( ntot, x, zero );
         for( l = 0; l < ntot; ++l ) {
            areg[l][0] = aorg[l][0]; areg[l][1] = aorg[l][1];
            areg[l][2] = aorg[l][2]; areg[l][3] = aorg[l][3];
            b[l]       = borg[l];
         }
      }
      time = time - cclock() + corr;
      res  = conv[its-1];
      mflops = 1.0e-6*(double)flops/time;
      time = time/nrep;
      prtspeed( n1, n2, n3, time, mflops, res );
      delmat( m, areg );
      delmat( l, aorg );
      free( b ); free( fac ); free( q ); free( x );
   }
   printf( "--------------------------------------------------------------\n" );
   printf( "Ran OK\n" );
}
