      Subroutine tfqmr( n, nl, nel, m, indx, rowp, matvals, q, x, b,
     &                  gamma, maxit, tol, exnrm, prec )
! ----------------------------------------------------------------------
! --- tfqmr does an iterative solve of Ax = b, where A is in CRS format:
!     Integer  indx(nel),
!     Integer  rowp(nl+1), and
!     Real(l_) matvals(nel). 
!     Real(l_) b(n)       : The righthand side.
!     Real(l_) x(n)       : On input the initial guess of the solution
!                           On convergence 'x' contains the solution.
!     Real(l_) q(n)       : Contains a (left) preconditioning vector.
!     Real(l_) gamma(m+1) : Polynomial coefficients used in the
!                           preconditioning.
!     Integer  maxit      : The maximum number of iterations allowed.
! --- Real(l_) tol        : Tolerance used as a stop criterium.
!     External prec       : Name of subroutine performing the
!                           preconditioning.
! ----------------------------------------------------------------------
      Use         numerics
      Use         floptime
      Use         mpi_module
      Implicit    None

      Integer  :: n, nl, nel, m, maxit
      Integer  :: indx(nel), rowp(nl+1)
      Real(l_) :: matvals(nel)
      Real(l_) :: q(n), x(n), b(n)
      Real(l_) :: gamma(m+1), tol, exnrm

      External    prec

      Integer  :: i, im, im0, iter
      Real(l_) :: alpha, beta, c, eta, eta0, kappa, rho, rho0, rhstp,
     &            sigma, tau, tau0, theta, theta0
      Real(l_) :: d(n), g(n), h(n), p(n), r(n), rt(n), v(n), w(n),
     &            y(n), y0(n), z(n)
      Real(l_) :: dotpr, nrm2
      External    dotpr, nrm2
! ----------------------------------------------------------------------
      Call spmxv( nl, nel, indx, rowp, matvals, x, w(lb) )
      z(lb:gub) = b(lb:gub) - w(lb:gub)
      Call prec( nl, nel, m, indx, rowp, matvals, z, r, gamma )
      w(lb:gub) = r(lb:gub)
      y(lb:gub) = r(lb:gub)
      Call spmxv( nl, nel, indx, rowp, matvals, y, z(lb) )
      Call prec( nl, nel, m, indx, rowp, matvals, z, g, gamma )
      v(lb:gub) = g(lb:gub)
      d = 0.0_l_
      tau = Sqrt( nrm2( nl, r(lb) ) )
      theta = 0.0_l_
      eta   = 0.0_l_
      rt(lb:gub) = r(lb:gub)
!
! --- We can use 'nrm2' instead of dotpr because rt = r.
!
      rho = nrm2( nl, r(lb) )
      rhstp = tol*Sqrt( nrm2( nl, b(lb) ) )
      im0 = 1
      Do i = 1, maxit
         iter = i
         Call MPI_Allgatherv( rt(lb), nl, rtyp, rt, sizes, offset,
     &                        rtyp, comm, ierr )
         Call MPI_Allgatherv( v(lb), nl, rtyp, v, sizes, offset,
     &                        rtyp, comm, ierr )
         sigma = dotpr( nl, rt(lb), v(lb) )
         If ( sigma == 0.0_l_ ) Then
            Print *, 'Stop: sigma = 0'; Stop
         End If
         alpha = rho/sigma
         y0(lb:gub) = y(lb:gub)
         y(lb:gub)  = y(lb:gub) - alpha*v(lb:gub)
         Call spmxv( nl, nel, indx, rowp, matvals, y, z(lb) )
         Call prec( nl, nel, m, indx, rowp, matvals, z, h, gamma )
         Do im = im0, im0 + 1
            w(lb:gub) = w(lb:gub) - alpha*g(lb:gub)
            theta0 = theta
            tau0   = tau
            If ( tau0 == 0.0_l_ ) Then
               Print *, 'Stop: tau0 = 0'; Stop
            End If
            theta = Sqrt( nrm2( nl, w(lb) ) )/tau
            c = 1.0_l_/Sqrt( 1.0_l_ + theta*theta )
            tau  = tau0*theta*c
            eta0 = eta
            eta  = c*c*alpha
            If ( alpha == 0.0_l_ ) Then
               Print *, 'Stop: alpha = 0'; Stop
            End If
            d(lb:gub) = y0(lb:gub)+(theta0*theta0*eta0/alpha)*d(lb:gub)
            x(lb:gub) = x(lb:gub) + eta*d(lb:gub)
            exnrm = Sqrt( nrm2( nl, r(lb) ) )
            kappa = Sqrt( Real( im + 1, l_ ) )*tau
            If ( kappa < tol ) Then
               Call spmxv( nl, nel, indx, rowp, matvals, x, p(lb) )
               z(lb:gub) = b(lb:gub) - p(lb:gub)
               Call prec( nl, nel, m, indx, rowp, matvals, z, r, gamma )
               exnrm = Sqrt( nrm2( nl, r(lb) ) )
               If ( exnrm < rhstp ) Go To 10  ! <--- Convergence.
               flops = flops + n + 9
            End If
            y0(lb:gub) = y(lb:gub)
            g(lb:gub)  = h(lb:gub)
         End Do                               ! <--- im-loop
         rho0 = rho
         Call MPI_Allgatherv( w(lb), nl, rtyp, w, sizes, offset,
     &                        rtyp, comm, ierr )
         rho  = dotpr( nl, rt(lb), w(lb) )
         If ( rho0 == 0.0_l_ ) Then
            Print *, 'Stop: rho0 = 0'; Stop
         End If
         beta = rho/rho0
         y(lb:gub) = w(lb:gub) + beta*y0(lb:gub)
         Call spmxv( nl, nel, indx, rowp, matvals, y, z(lb) )
         Call prec( nl, nel, m, indx, rowp, matvals, z, g, gamma )
         v(lb:gub) = g(lb:gub) + beta*( h(lb:gub) + beta*v(lb:gub) )
         im0 = im0 + 2
      End Do                                 ! <--- i-loop
! ----------------------------------------------------------------------
! --- Normally we would end up here and with no convergence issue a
!     warning. Because we only want to see the residual value and
!     the speed for benchmarking purposes, we comment out the following
!     lines:
!
!     If ( iter >= maxit ) Then
!          iter = maxit
!          Print *, 'No convergence in ', maxit, ' iterations;'
!          Print *, 'Norm of residual =', exnrm
!          Stop
!     End If
   10 flops = flops + nl + 10 + iter*( 22*nl + 79 )   ! <--- # of flops.
! ----------------------------------------------------------------------
      End Subroutine tfqmr
