!    This program is free software: you can redistribute it and/or modify
!    it under the terms of the GNU General Public License as published by
!    the Free Software Foundation, either version 3 of the License, or
!    (at your option) any later version.
!
!    This program is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU General Public License for more details.
!
!    You should have received a copy of the GNU General Public License
!    along with this program.  If not, see <http://www.gnu.org/licenses/>.

      program matrix_multiplication

! Code to illustrate example given in talk
! (c) Manchester Computing  Spring 1999
! Michael Bane (michael.bane@man.ac.uk)

      implicit none
        include 'f90papi.h'
        !include 'test_utils.h'

      !integer,parameter :: n=1024
      integer,parameter :: n=512
      !integer,parameter :: n=256
      !integer,parameter :: n=350

      real a(n,n), b(n,n), c(n,n)

      integer counter, counter2
      integer i,j,k

      integer num_pes
      double precision start, finish, time_taken, timef
!     PAPI standardized event to be monitored
      INTEGER event(2)
!     PAPI values of the counters
      INTEGER*8 values(2)
      integer num_events,retval
      num_events=0
      retval=-1

! initialize a, b and c
      call initial(b,c,n)
      call zero(a,n)

      start=timef()
!     See how many hardware events at one time are supported
!     This also initializes the PAPI library
      call PAPIf_num_counters( num_events )
      if ( num_events .LT. 2 ) then
        print *,'This example program requries the architecture to ', &
            'support 2 simultaneous hardware events...shutting down.'
      else 
        print *,num_events,'simultaneous hardware events supported'  
      end if

      call PAPIf_query_event(PAPI_FP_INS, retval)
      if (retval .NE. PAPI_OK) then
        event(1) = PAPI_TOT_INS
      else
!     Total floating point operations
        event(1) = PAPI_FP_INS
      end if

!     Time used
      event(2) = PAPI_TOT_CYC

#ifdef _A
!     Set up the counters
      num_events = 2
      call PAPIf_start_counters( event, num_events, retval)
      if ( retval .NE. PAPI_OK ) then
        print *,'echec PAPIf_start_counters retval=',retval
      end if

!     Clear the counter values
      call PAPIf_read_counters(values, num_events,retval)
      if ( retval .NE. PAPI_OK ) then
        print *,'echec PAPIf_read_counters retval=',retval
      end if
        print *,"ijk"
      do i=1,n
         do j=1,n
            do k=1,n
               a(i,j) = a(i,j) + b(i,k)*c(k,j)
            end do
         end do
      end do
!     Stop the counters and put the results in the array values 
      call PAPIf_stop_counters(values,num_events,retval)
      if ( retval .NE. PAPI_OK ) then
        print *,'echec PAPIf_read_counters retval=',retval
      end if
#endif

#ifdef _B
        print *,"ikj" ! WORST
      do i=1,n
            do k=1,n
         do j=1,n
               a(i,j) = a(i,j) + b(i,k)*c(k,j)
            end do
         end do
      end do
#endif

#ifdef _C
        print *,"jik"
         do j=1,n
      do i=1,n
            do k=1,n
               a(i,j) = a(i,j) + b(i,k)*c(k,j)
            end do
         end do
      end do
#endif

#ifdef _D
!     Set up the counters
      num_events = 2
      call PAPIf_start_counters( event, num_events, retval)
      if ( retval .NE. PAPI_OK ) then
        print *,'echec PAPIf_start_counters retval=',retval
      end if

!     Clear the counter values
      call PAPIf_read_counters(values, num_events,retval)
      if ( retval .NE. PAPI_OK ) then
        print *,'echec PAPIf_read_counters retval=',retval
      end if
        print *,"jki"
         do j=1,n
            do k=1,n
      do i=1,n
               a(i,j) = a(i,j) + b(i,k)*c(k,j)
            end do
         end do
      end do
!     Stop the counters and put the results in the array values 
      call PAPIf_stop_counters(values,num_events,retval)
      if ( retval .NE. PAPI_OK ) then
        print *,'echec PAPIf_read_counters retval=',retval
      end if
#endif

#ifdef _E
        print *,"kij" ! WORST
            do k=1,n
      do i=1,n
         do j=1,n
               a(i,j) = a(i,j) + b(i,k)*c(k,j)
            end do
         end do
      end do
#endif

#ifdef _F
        print *,"kji"
            do k=1,n
         do j=1,n
      do i=1,n
               a(i,j) = a(i,j) + b(i,k)*c(k,j)
            end do
         end do
      end do
#endif

!$      num_pes=omp_get_num_threads()
        if (event(1) .EQ. PAPI_TOT_INS) then
          print *, 'TOT Instructions:  ',values(1)
        else
          print *, 'FP Instructions:  ',values(1)
        end if

        print *, 'Cycles: ',values(2)


      finish=timef()

      time_taken = finish-start

!$    print*, time_taken, "ms on ",num_pes," threads"
      !print*, time_taken, "sec"
      write(*,'(a2,f8.2,a4)'), 't=',time_taken,' sec'

      end

!

      subroutine initial(a,b,size)
      
      integer size, i,j
      real a(size,size), b(size,size)


      do j=1,size
         do i=1,size
            a(i,j) = float(i+j)
            b(i,j) = a(i,j) * float(i)
         end do
      end do

      return
      end

!
      subroutine zero(a,size)
      
      integer size, i,j
      real a(size,size)

      do j=1,size
         do i=1,size
            a(i,j) = 0.0
         end do
      end do

      return
      end


      function timef()
        real :: timef
        call cpu_time(timef)
        !timef = timef * 1000
      end
        

