#include <stdlib.h>
#include <math.h>
#include "fundefs.h"

void cg2( int n1, int n2, int n3, int m, double **a, double *b, double *x,
         double *q, double *gamma, int maxit, double *conv, int *its,
         double tol )
/*---------------------------------------------------------------------
  --- cg2 does an iterative solve of Ax = b with polynomial preconditioning.

      double a(n1*n2*n3,0:3) : 7-diagonal symmetric matrix of which
                                 only the 4 upper diagonals are stored.
      double b(n1*n2*n3)     : the right hand side.
      double x(n1*n2*n3)     : On input the initial estimate of the
                                 solution, on output the solution
                                 (hopefully).
      double q(n1*n2*n3)     : Contains (left) preconditioning vector
      double gamma(m+1)      : Polynomial coefficients used in the
                                 preconditioning.
      int    maxit           : On input the maximum number of
                                 iterations allowed.
      double conv(maxit)     : Residuals of the sequence of iter's.
      double tol             : Tolerance used as a stop criterion.
 ----------------------------------------------------------------------*/
{
   extern long flops;
   int    i, it, j, k, ntot;
   double *ap, *p, *r;
   double nr0, nnrm;
   double alpha, beta, nrm, pap, tol2;
// ---------------------------------------------------------------------
   ntot = n1*n2*n3;
   tol2 = tol*tol;
   ap = calloc( ntot, sizeof(double) );
   p  = calloc( ntot, sizeof(double) );
   r  = calloc( ntot, sizeof(double) );
   sym7mxv( n1, n2, n3, a, x, r );
   for( k = 0; k < ntot; ++k ) {
      r[k] = b[k] - r[k];
   }
// ---------------------------------------------------------------------
// -- Precondition of initial residual.

   sym7pol( n1, n2, n3, m, a, &nr0, q, r, p, gamma );
   nrm = nr0;
// ---------------------------------------------------------------------
// -- Iterate at most maxit times.

   for( i = 0; i < maxit; ++i ) {
      sym7mxv( n1, n2, n3, a, p, ap );
      pap = dotpr( ntot, p, ap );
      alpha = nrm/pap;
      for( k = 0; k < ntot; ++ k ) {
         x[k] += alpha*p[k];
         r[k] += alpha*ap[k];
      }
      sym7pol( n1, n2, n3, m, a, &nnrm, q, r, ap, gamma );
      conv[i] = sqrt( nnrm );
      if ( nnrm < nr0*tol2 || i >= maxit ) {
         goto L1;
      }
      beta = nnrm/nrm;
      nrm  = nnrm;
      for( k = 0; k < ntot; ++ k ) {
         p[k] += beta*p[k];
      }
   }
L1: flops += i*( 8*ntot + 4 ) + ntot + 1;
   *its = i;
   free( ap ); free( p ); free( r );
} // -- End of cg
