      Program mod2am
! ----------------------------------------------------------------------
! **********************************************************************
! *** This program is part of the EuroBen Benchmark                  ***
! ***                                                                ***
! *** Copyright: EuroBen (European Benchmark Group)                  ***
! *** Distribution by:                                               ***
! ***            National Computer Facilities Foundation             ***
! ***            L.J. Costerstraat 5                                 ***
! ***            6827 AR Arnhem                                      ***
! ***            The Netherlands                                     ***
! ***                                                                ***
! *** Author of this program: Aad van der Steen                      ***
! *** Contributed             Spring 2009                            ***
! **********************************************************************
! Version 1.0
!
! --- Program 'mod2am' does a dense matrix-matrix multiplication via a
!     vector-update.
!     The size of the matrices is read and they are generated
!     internally by routine 'gendat'.
! ----------------------------------------------------------------------
      Use                      numerics
      Implicit                 None

      Real(l_), Allocatable :: a(:,:), b(:,:), c(:,:)
      Integer               :: i, lda, m, l, n, nrep
      Integer               :: nprocs, omp_get_num_threads
      Real(l_)              :: nops, speed, time, wclock
      Logical               :: ok
! ----------------------------------------------------------------------
      Call state( 'mod2am  ' )
      Open( 1, File = 'mod2am.in' )
!$omp parallel shared(nprocs)
      nprocs = omp_get_num_threads()
!$omp end parallel
      Print 1000, nprocs
   10 Read( 1, *, End = 20 ) m, l, n, nrep
      lda = m + 1
      Allocate( a(lda,l), b(l,n), c(m,n) )
      nops = 2*Real( m, l_ )*Real( l, l_ )*Real( n, l_ )
      Call gendat( a, b, lda, m, l, n )   ! --- Generate data.

      time = wclock()                     ! --- Time MxM mult.
      Do i  = 1, nrep
         Call mxm( a, b, c, lda, m, l, n )
      End Do
      time = wclock() - time
      ok = .TRUE.
      Call check( c, m, n, ok )           ! --- Correctness check.

      speed = 1.0e-6_l_*((1.0_l_*nrep)*nops)/Max( time, 1.0e-9_l_ )
      Print 1010, n, l, m, time/nrep, speed, ok
      Deallocate( a, b, c )
      Go To 10
   20 Print 1020
! ---------------------------------------------------------------------
 1000 Format( 'Matrix-matrix multiplication test C(m,n)=A(m,l)*B(l,n)'/
     &        'No. of OpenMP threads = ', i3/
     &        '-----------------------------------------------------'/,
     &        '    Problem size    |            |            |     |'/,
     &        '   m  |   l  |   n  |  Time (s)  |  (Mflop/s) | OK? |'/,
     &        '-----------------------------------------------------' )
 1010 Format( i5, ' |', i5, ' |', i5,' |', g11.4, ' |', g11.4, ' |',
     &        l3, '  |' )
 1020 Format( '-----------------------------------------------------' )
! ----------------------------------------------------------------------
      End Program mod2am
