      Program mod2cr
! ----------------------------------------------------------------------
! **********************************************************************
! *** This program is part of the Euroben Benchmark                  ***
! ***                                                                ***
! *** Copyright: EuroBen Group p/o                                   ***
! ***            Utrecht University, Computational Physics Dept.     ***
! ***            P.O. Box 80.000                                     ***
! ***            3508 TA Utrecht                                     ***
! ***            The Netherlands                                     ***
! ***                                                                ***
! *** Author of this program: Aad van der Steen                      ***
! *** Date                    Summer 2005                            ***
! **********************************************************************
! ----------------------------------------------------------------------
! --- Version 1.0 (Distributed-memory parallel, MPI).

! --- Purpose of program mod2cr
! -----------------------------
! --- Solver for sparse linear systems with iterative methods and
!     two types of preconditioners. The systems are not actually
!     solved but rather a predifined number of iterations is performed
!     to assess the speed of the solver-preconditioner combination.
!     Two types of systems are considered:
!     1. - Systems stemming from 3-D finite difference problems
!          resulting in 7-banded matrices. We consider only the 
!          symmetric type in this program 'mod2cr'.
!     2. - Finite element type, irregularly filled matrices.
!          They are stored in CRS format. These systems are addressed
!          in ANOTHER program: 'mod2ci'. 
!
! --- In this Program 'mod2cr' we address the regular, banded type
!     of problem. For the irregular CRS matrix-based type we have
!     program 'mod2ci'.
!
!     Solvers used:
!     -  For the symmetric banded systems in 'mod2cr': CG with ILU(0)
!        and polynomial preconditioning.
!     -  For irregular systems in 'mod2ci': RGMRES and TFQMR with
!        polynomial preconditioning.
! ----------------------------------------------------------------------
      Use                       numerics
      Use                       mpi_module
      Use                       floptime
      Implicit                  None

      Integer, Allocatable   :: indx(:), rowp(:)
      Real(l_), Allocatable  :: areg(:,:), aorg(:,:), al(:,:), b(:),
     &                          borg(:), conv(:), fac(:), x(:)
      Real(l_)               :: corr, maxdf, mindf, res
      Real(l_)               :: gtime, mflops, time
      Integer                :: maxit = 10
      Integer(8)             :: gflops
      Integer, Parameter     :: mp = 1
      Real(l_)               :: gamma(mp+1)
      Real(l_), Parameter    :: tol = 1.0e-12_l_
      Real(l_), Parameter    :: micro = 1.0e-6_l_, two = 2.0_l_,
     &                          perc = 1.0e2_l_
      Integer                :: m, n1, n2, n3, ntot, nrep
      Integer                :: i
      Logical                :: ok
      External               :: sym7icg0, sym7pol
! ----------------------------------------------------------------------
      Call mpistart
      If ( me == 0 ) Call state( 'mod2cr  ' )
      Open( 1, File = 'mod2cr.in' )
      If ( me == 0 ) Print 1000, nodes
      If ( me == 0 ) Print 1005
! ----------------------------------------------------------------------
! --- Do 7-band case with ILU(0) preconditioning.
 
      Call pcoefs( mp, gamma )
   10 Read( 1, *, End = 20 ) n1, n2, n3, nrep
         ntot = n1*n2*n3
         Call evdist( ntot )
         Call bsaddr
         Call taylor( n1, n2, n3 )
         m = sizes(me)
         Allocate( areg(m,0:3), aorg(m,0:3), al(m,1:3),
     &             b(ntot), borg(ntot), conv(maxit), fac(ntot),
     &             x(ntot) ) 
         Call sym7gen( ntot, m, aorg, borg )
         flops = 0
         time = MPI_Wtime()
         Do i = 1, nrep
            x = 0.0_l_
            areg = aorg
            b    = borg
            Call sym7pre( m, n1, n2, n3, areg, b, fac )
            Call sym7com( m, n1, n2, ntot, areg, al )
            Call cg( n1, n2, n3, m, mp, areg, al, b, x, gamma, maxit,
     &               conv, tol, sym7icg0 )
            x = x*fac
            flops = flops + ntot
         End Do
         time   = MPI_Wtime() - time
         corr = MPI_Wtime()
         Do i = 1, nrep
            x = 0.0_l_
            areg = aorg
            b    = borg
         End Do
         time   = time - MPI_Wtime() + corr
         Call MPI_Allreduce(flops, gflops, 1, ityp, MPI_Sum, comm, ierr)
         Call MPI_Allreduce( time, gtime, 1, rtyp, MPI_Max, comm, ierr )
         mflops = micro*Real( gflops, l_ )/gtime
         gtime   = gtime/Real( nrep, l_ )
         res = conv(maxit)
         ok = .TRUE.
! ----------------------------------------------------------------------
! --- Routine 'checkreg' is only used for debugging but has no function
!     in the finished program.
!        Call checkreg( n1, n2, n3, areg, b, x, mindf, maxdf )

         Deallocate ( areg, aorg, al, b, borg, conv, fac, x )
         If ( me == 0 ) Print 1010, n1, n2, n3, gtime, mflops, res
      Go To 10
   20 If ( me == 0 ) Print 1020
      If ( me == 0 ) Print 1025
! ----------------------------------------------------------------------
! --- Do 7-band case with von Neumann polynomial preconditioning.

      Rewind 1
      If ( me == 0 ) Print 1030, nodes
      If ( me == 0 ) Print 1005
      Call pcoefs( mp, gamma )
   30 Read ( 1, *, End = 40 ) n1, n2, n3, nrep
         ntot = n1*n2*n3
         Call evdist( ntot )
         Call bsaddr
         Call taylor( n1, n2, n3 )
         m = sizes(me)
         Allocate( areg(m,0:3), aorg(m,0:3), al(m,1:3),
     &             b(ntot), borg(ntot), conv(maxit), fac(ntot),
     &             x(ntot) ) 
         Call sym7gen( ntot, m, aorg, borg )
         flops = 0
         time = MPI_Wtime()
         Do i = 1, nrep
            x = 0.0_l_
            areg = aorg
            b    = borg
            Call sym7pre( m, n1, n2, n3, areg, b, fac )
            Call sym7com( m, n1, n2, ntot, areg, al )
            Call cg( n1, n2, n3, m, mp, areg, al, b, x, gamma, maxit,
     &               conv, tol, sym7pol )
            x = x*fac
            flops = flops + ntot
         End Do
         time = MPI_Wtime() - time
         corr = MPI_Wtime()
         Do i = 1, nrep
            x = 0.0_l_
            areg = aorg
            b    = borg
         End Do
         time = time - MPI_Wtime() + corr
         Call MPI_Allreduce(flops, gflops, 1, ityp, MPI_Sum, comm, ierr)
         Call MPI_Allreduce( time, gtime, 1, rtyp, MPI_Max, comm, ierr )
         mflops = micro*Real( gflops, l_ )/gtime
         gtime   = gtime/Real( nrep, l_ )
         res = conv(maxit)
         ok = .TRUE.
! ----------------------------------------------------------------------
! --- Routine 'checkreg' is only used for debugging but has no function
!     in the finished program.
!        Call checkreg( n1, n2, n3, areg, b, x, mindf, maxdf )

         Deallocate ( aorg, areg, al, b, borg, conv, fac, x )
         If ( me == 0 ) Print 1010, n1, n2, n3, gtime, mflops, res
      Go To 30
   40 If ( me == 0 ) Print 1020
      Call mpibye
! ----------------------------------------------------------------------
 1000 Format( 'Program mod2cr: Sparse iterative solver test'/
     &        'Symmetric, 7-band: CG with ILU(0) preconditioner.'/
     &        'No. of proc.s = ', i5 )
 1005 Format( '-------------------------------------------------------',
     &        '-----'/
     &        '  n1 |  n2 |  n3 |   Time(s)   |   Mflop/s   |   Residu',
     &        'e   |'/
     &        '-------------------------------------------------------',
     &        '----|' )
 1010 Format( i5, '|', i5, '|', i5, '|', g13.5, '|', g13.5, '|', g13.5,
     &        '|' )
 1020 Format( '-------------------------------------------------------', 
     &        '-----' )
 1025 Format ( / )
 1030 Format( '-------------------------------------------------------',
     &        '-----'/
     &        'Program mod2cr: Sparse iterative solver test'/
     &        'Symmetric, 7-band: CG with polynomial preconditioner.'/
     &        'No. of proc.s = ', i5 )
! ----------------------------------------------------------------------
      End Program mod2cr
