program matMult
  use mpi
  implicit none  
  integer :: rank, ierr
  integer, parameter :: n = 10, nloc, size
  real(kind=8), allocatable, dimension(:,:) :: A, B, C, Cmpi
  real(kind=8), allocatable, dimension(:,:) :: Bloc, Cloc

  call MPI_Init(ierr)
  call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
  call MPI_Comm_size(MPI_COMM_WORLD, size, ierr)

  nloc = n/size

  allocate(A(n, n))

  if (rank == 0) then
     allocate(B(n, n), C(n, n), Cmpi(n, n))
     call random_number(A)
     call random_number(B)

     C = matmul(A, B)
  end if

  allocate(Bloc(n,nloc), Cloc(n,nloc))

  call MPI_Bcast(A, n*n, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
  call MPI_Scatter(B, n*nloc, MPI_DOUBLE_PRECISION, &
                   Bloc, n*nloc, MPI_DOUBLE_PRECISION, &
                   0, MPI_COMM_WORLD, ierr)

  Cloc = matmul(A, Bloc)

  call MPI_Gather(Cloc, n*nloc, MPI_DOUBLE_PRECISION, &
                  Cmpi, n*nloc, MPI_DOUBLE_PRECISION, &
                  0, MPI_COMM_WORLD, ierr)

  if (rang == 0) then 
     print*, all(C == Cmpi)
     deallocate(B, C, Cmpi)    
  end if
  deallocate(A, Bloc, Cloc)
  call MPI_Finalize(ierr)
end program matMult
