      Program mod3a
! **********************************************************************
! *** This program is part of the EuroBen Benchmark                  ***
! *** Copyright: EuroBen Group p/o                                   ***
! ***            Utrecht University, Computational Physics Dept.     ***
! ***            P.O. Box 80.000                                     ***
! ***            3508 TA Utrecht                                     ***
! ***            The Netherlands                                     ***
! ***                                                                ***
! *** Author of this program: Aad van der Steen                      ***
! *** Date       January 1995, bug fix  May 1997, Fortran 90 version ***
! ***            Spring 1999                                         ***
! **********************************************************************
!  Version 4.0
!
! --------------------------------------------------------------------
!
! --- MOD3a tests a version of a condensed matrix-vector multiplication.
!     The main program drives the subroutine 'abc01' which does the
!     actual work.
! --- Routine 'drsmxv' does a vector update c(n) = A(n,m)*b(m) + c(n).
!     A is an (n,m) matrix in condensed form: For each of the 'n' rows
!     the number of elements /= 0.0 is held in array na(n) which
!     resides on disk.
!     The column numbers for entries /= 0.0 of A are held in array
!     'ja' and the entries proper in array 'ra'. Both are on disk.
!     'b' is an (m)-vector which is held in core.
!     'c' is an (n)-vector which resides on disk.
!
! --------------------------------------------------------------------
      Use           numerics
      Implicit      None

      Integer, Parameter  :: lamax = 1024, mmax= 200 000
      Integer, Parameter  :: lua = 1, luc = 9, luna = 10
      Real(l_)            :: b(mmax), c(lamax), ra(lamax)
      Real(l_)            :: dmacdp, dran1, wclock
      Real(l_)            :: diff, eps, ioread, iowrit, mflops, time,
     &                       var
      Real(l_), Parameter :: zero = 0.0_l_, one   = 1.0_l_,
     &                       two  = 2.0_l_, twenp = 0.2_l_,
     &                       half = 0.5_l_, micro = 1.0e-6_l_,
     &                       nano = 1.0e-9_l_
      Integer             :: ja(lamax), na(lamax)
      Integer             :: i, idum, j, lral, m, n, nfill, nflor,
     &                       nflops, nrest
      Logical             :: allok

! --------------------------------------------------------------------
!
! --- Call identification routine for this program.
!
      Call state( 'mod3a   ' )
!
! --------------------------------------------------------------------
!
! --- Open files for input and to hold 'c', 'na', 'ja', and 'ra'.
!
      Open( 2, File = 'mod3a.in' )
      Open( lua, File = 'jara', Form = 'unformatted',
     &      Status = 'scratch' )
      Open( luc, File = 'cvals', Form = 'unformatted',
     &      Status = 'scratch' )
      Open( luna, File = 'nafl', Form = 'unformatted',
     &      Status = 'scratch' )
      Print 1000
!
! --------------------------------------------------------------------
!
! --- Generate date for 'b', 'c', 'na', 'ja', and 'ra' after reading
!     row and column dimensions of the matrix. The column dimension 
!     should not exceed 'mmax'.
!
! <<<<<<<<<<<<<<<<<<<<<
   10 Continue
! <<<<<<<<<<<<<<<<<<<<<
! >>>>>>>>>>>>>>>>>>>>>
      Read( 2, *, End = 20 ) m, n
! >>>>>>>>>>>>>>>>>>>>>
      If ( m > mmax ) Then
         Print *, 'm = ', m, ' > mmax (', mmax, '): Increase mmax'
         Stop
      End If
      Do i = 1, mmax
         b(i) = one
      End Do

! --------------------------------------------------------------------
! --- Now generate for each row the number indicating the columns that
!     are /= 0.0. The array 'na' holding these numbers is never
!     entirely in core. It is generated in chunks of length 'lamax' and
!     written to unit 'luna'.
! --- The filling of the matrix with elements /= 0.0 is about 0.1%
!     and we choose a variation in the number of row entries of
!     about 20%.
! --- We count the total number of row entries as 2*Sum(na(i)) is the
!     number of flops performed in the program.

      nflor  = n/lamax
      nfill  = m/1000
      nflops = 0
      var    = twenp*Real( nfill, l_ )
      nrest  = Mod( n, lamax )
      idum   = -1993
      Do i = 1, nflor
         Do j = 1, lamax
            na(j) = nfill + Int( var*( dran1( idum ) - half ) )
            nflops = nflops + na(j)
         End Do
         Write( luna ) na
      End Do
      Do j = 1, nrest
         na(j) = nfill + Int( var*( dran1( idum ) - half ) )
         nflops = nflops + na(j)
      End Do
      nflops = 2*nflops
      Do j = nrest+1, lamax
         na(j) = 0
      End Do 
      Write( luna ) na

! --------------------------------------------------------------------
! --- Generate 'ja' and 'ra'. As in the case of 'na' the arrays are
!     never entirely in core and are written in chunks of length
!     'lamax' to unit 'lua'.

      Rewind luna
      lral = 1
      Do i = 1, nflor
         Call genraja( na, ja, ra, m, lamax, lamax, lua, luna, lral )
      End Do
      If ( nrest /= 0 ) Then
         Call genraja( na, ja, ra, m, nrest, lamax, lua, luna, lral )
      End If    

! --------------------------------------------------------------------
! --- End of data generation. We now time the matrix multiplication
!     routine 'drsmxv'.

      time = wclock()
      Call drsmxv( ra, ja, na, m, n, lamax, lua, luc, luna, b, c, 
     &             ioread, iowrit )
      time = wclock () - time
      mflops = micro * Max( Real( nflops, l_ )/time, nano )
      Print 1010, n, m, time, mflops, ioread, iowrit

! --------------------------------------------------------------------
! --- Correctness check for matrix multiplication.
!     'eps' is the tolerance allowed in the elements of 'c'. 

      Rewind luna
      Rewind luc
      eps = two*Real( nfill, l_ )*Epsilon( one )
      allok = .TRUE.
      Do i = 1, nflor
         Read( luna ) na
         Read( luc ) c
         Do j = 1, lamax
            diff = Real( na(j), l_ ) - c(j)
            If ( Abs ( diff ) > eps ) Then
               Print 1030, (i-1)*lamax + j, diff
               allok = .FALSE.
            End If
         End Do
      End Do
      Do j = 1, nrest
         diff = Real( na(j), l_ ) - c(j)
         If ( Abs( diff ) > eps ) Then
            Print 1030, nflor*lamax + j, diff
            allok = .FALSE.
         End If
      End Do
! >>>>>>>>>>>>>>>>>>>>>
      Go To 10
! >>>>>>>>>>>>>>>>>>>>>
! <<<<<<<<<<<<<<<<<<<<<
   20 Continue
! <<<<<<<<<<<<<<<<<<<<<
      Print 1020
      If ( allok ) Print 1040
! --------------------------------------------------------------------
! --- Formats.

 1000 Format ( ///, ' ----------------------------------------------',
     1         '---------------------------',
     2         /,' Mod3a: Out-of-core Matrix-vector ',
     3         'multiplication',/
     4         74('-'),/
     5         '  Row   | Column | Exec. time  |  Mflop rate |',
     5         '  Read rate  |  Write rate |',/
     7         '  (n)   |   (m)  |   (sec)     |   (Mflop/s) |',
     8         '    (MB/s)   |    (MB/s)   |',/
     9         74('-') )
 1010 Format ( I7, ' |', I7, ' |', G13.5, '|', G13.5, '|', G13.5,
     &         '|', G13.5, '|' )
 1020 Format ( 74('-') )    
 1030 Format ( 'Deviation in row ', I7, ' = ', G13.5 ) 
 1040 Format ( //,' >>> All results were within error bounds <<<' )
! --------------------------------------------------------------------
      End Program mod3a
