      Program pingpong
! ----------------------------------------------------------------------
! **********************************************************************
! *** This program is part of the EuroBen Efficiency Benchmark       ***
! ***                                                                ***
! *** Copyright: European Benchmark Group p/o                        ***
! ***            Utrecht University, High Perf. Computing Group      ***
! ***            P.O. Box 80195                                      ***
! ***            3508 TD Utrecht                                     ***
! ***            The Netherlands                                     ***
! ***                                                                ***
! *** Author of this program: Aad J. van der Steen                   ***
! *** Email: steen@phys.uu.nl                                        ***
! *** Date : Spring 1998                                             ***
! **********************************************************************
! ----------------------------------------------------------------------
      Use                   numerics
      Implicit              None
      Include               'mpif.h'

      Integer            :: status, istat(MPI_Status_Size) 
      Integer            :: i, icase, ireps, length, nreps
      Logical            :: ok

      Integer, Parameter :: nelem = 1 000 000, ncases = 51,
     &                      totsiz = 10 000 000
      Integer            :: message(nelem), cases(ncases)
      Real(l_)           :: incpt, perc, slope, time
      Real(l_)           :: bw, bwmax, latency, lperc
 
      Integer            :: me, nodes
      Common /dist/         me, nodes

! --- Number of elements transfered:
!
      Data cases/    1,    2,    3,    4,    5,    6,    7,    8,    9,
     &              10,   20,   30,   40,   50,   60,   70,   80,   90,
     &             100,  200,  300,  400,  500,  600,  700,  800,  900,
     &            1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000,
     &           10000,20000,30000,40000,50000,60000,70000,80000,90000,
     &          100000,     200000,     400000,     600000,     800000,
     &         1000000 /
! ---------------------------------------------------------------------
! --- Call identification routine.
! --- Initialise MPI and spawn task that has to answer back.
      Call csetup

      If ( me == 0 ) Call state('pingpong')

! ---------------------------------------------------------------------
! --- If I am the first node, initialize data , pack and send data
!     and wait for data from the second node.
!     Also initialise the least-squares routine and max. bandwidth.
! ---------------------------------------------------------------------
      If ( me == 0 ) Then
         Do i = 1, nelem
            message(i) = i
         End Do
         Call lsq( 0, 0.0_l_, 0.0_l_, slope, incpt, perc )
         bwmax = 0.0_l_
! ---------------------------------------------------------------------
! --- Functional part: Perform transfer functions and time them.
! ---------------------------------------------------------------------
! --- Time moment of transfer of the message "length" elements of
!     the integer array "message". (Loop over "ncases" cases)
! --- The send/receives are repeated "nreps" times to get a reliable
!     timing. At present always about "totsiz*4" bytes
!     are transfered in two directions. This is done in packages of
!     "length" bytes.
!
         Print 1000
         Do icase = 1, ncases
            length = cases(icase)
            nreps = totsiz/length
! --------------------------------------------------------------------- 
! --- Pack data in buffer to be sent.
!
            time = MPI_Wtime()
            Do ireps = 1, nreps
! ---------------------------------------------------------------------
! --- Send data to other processsor.
!
               Call MPI_Send( message, length, MPI_Integer, 1,
     &                        1, MPI_Comm_World, status )

! --- Now, get message back from the destination processor.
!
               Call MPI_Recv( message, length, MPI_Integer, 1,
     &                        2, MPI_Comm_World, istat, status )
!              Call MPI_Barrier( MPI_Comm_World, status )
            End Do
! ---------------------------------------------------------------------
! --- Time again and divide by 2 to get communication time.
!
            time = ( MPI_Wtime() - time )/( 2.0_l_*nreps )
            If ( Abs( time ) <= 1.0e-14_l_ ) Then
                Print *, 'Time interval too short to measure', time
            Else
                Call lsq( 1, Real( length, l_ ), time, slope, incpt,
     &                    perc )
                If ( length == 30 ) Then
                   latency = incpt*1.0e6_l_ 
                   lperc   = perc
                End If
                bw = 1.0e-6_l_*Real( 4*length, l_ )/time
                bwmax = Max( bwmax, bw )
                Print 1010, 4*length, time, bw
            End If
         End Do
         Print 1020
         Print 1030, bwmax, latency, lperc
! ---------------------------------------------------------------------
! --- Test whether the values have been transfered correctly.
!
         ok = .TRUE.
         Do i = 1, nelem
            ok = ok .AND. ( message(i) .EQ. i )
         End Do
         If( ok ) Then
            Print *, ' '
            Print *, ' No errors during processor-processor '
            Print *, ' communication.'
         Else
            Print *, ' '
            Print *, ' Not all values were transfered correctly '
            Print *, ' between  processors.'
         End If

      Else
! ---------------------------------------------------------------------
! --- This is the receiving processor that sends back the messages 
!     from processor 0 as soon as they are received.
! ---------------------------------------------------------------------

!     Loop over "ncases" cases.
!
! --- Every case of length "length" bytes is repeated "nreps" times
!     to obtain a reliable timing. At present a total of "totsiz*4"
!     bytes are transfered in chuncks of "length" bytes.
!
         Do icase = 1, ncases
            length = cases(icase)
            nreps = totsiz/length
            Do ireps = 1, nreps
! ---------------------------------------------------------------------   
! --- Receive message from the sending processor and send it back
!     immediately.
 
               Call MPI_Recv( message, length, MPI_Integer, 0,
     &                        1, MPI_Comm_World, istat, status )
               Call MPI_Send( message, length, MPI_Integer, 0,
     &                        2, MPI_Comm_World, status )
            End Do
         End Do
      End If
! ----------------------------------------------------------------------
         Call MPI_Finalize( istat )
         Stop
 1000 Format( 'Program pingpong: measure distributed memory communicat',
     &        'ion'/
     &        '-------------------------------------------------------'/
     &        '| Mess. length | Transfer time |   Bandwidth   |'/
     &        '|    (Bytes)   |    (seconds)  |   (Mbyte/s)   |'/
     &        '------------------------------------------------' )
 1010 Format( '|', 3x, i8, 3x, '|', 1x, g13.5, 1x, '|', 1x, g13.5, 1x,
     &        '|' )
 1020 Format( '------------------------------------------------' )
 1030 Format( 'Maximum bandwidth = ' g12.5, ' MB/s'/
     &        'Latency   = ', g11.4, ' microsec., Error = ', f6.2, '%'/
     &        '-------------------------------------------------------')
! ----------------------------------------------------------------------
      End Program pingpong
