/*
 A parallel Jacobi solver for the Laplacian equation in 2D
 Written by Jean M. Favre, Swiss National Supercomputing Center
 Last tested on Mon May 23 08:44:07 CEST 2016 with VisIt v 2.10.3

 Code inspired from an older example by Kadin Tseng, Boston University, November 1999
 The compile flag -D_VISIT_ enables compilation with VisIt. Otherwise, the program runs
 in stand-alone mode
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef PARALLEL
#include <mpi.h>
MPI_Datatype rowtype, coltype; 
#endif

#include "solvers.h"

#define BASENAME "/tmp"
//#define BASENAME "/scratch/daint/jfavre"

#ifdef _VISIT_
#include <VisItControlInterface_V2.h>
#include <VisItDataInterface_V2.h>
#include "SimulationExample.h"


/* Data Access Function prototypes */
visit_handle SimGetMetaData(void *);
visit_handle SimGetMesh(int, const char *, void *);
visit_handle SimGetVariable(int, const char *, void *);
visit_handle SimGetDomainList(const char *, void *);
void ControlCommandCallback(const char *cmd, const char *args, void *cbdata);
void SlaveProcessCallback(void *cbdata);
int ProcessVisItCommand(simulation_data *sim);
void SetupCallbacks(simulation_data *sim);

#ifdef PARALLEL
int visit_broadcast_int_callback(int *value, int sender, void *cbdata);
int visit_broadcast_string_callback(char *str, int len, int sender, void *cbdata);
#endif
#endif

int get_procmem(double *bytes)
{
  FILE *fh;
  int proc_ret;
  char proc_var[80];
  char *cp;
  long long int ibytes;

#ifndef max
#define max( a, b ) ( ((a) > (b)) ? (a) : (b) )
#endif


  *bytes=0.0;
  fh = fopen("/proc/self/status","r");
  while(!feof(fh)) {
    fgets(proc_var,80,fh);
    cp = strstr(proc_var,"VmHWM:");
    if (cp) {sscanf(cp, "VmHWM:"" %llu",&ibytes );
      *bytes=max(*bytes,ibytes);
    }
  }
  fclose(fh);
  *bytes *= 1.0;
   return 0;
}

void mainloop_interactive(simulation_data *sim)
{
  int blocking, visitstate, err = 0;
  while (sim->gdel > TOL)
    {  // iterate until error below threshold
    if(sim->iter > MAXSTEPS)
      {
      //fprintf(stdout,"Iteration terminated (exceeds %6d)\n", MAXSTEPS);
      break;       /* nonconvergent solution */
      }
#ifdef _VISIT_
    blocking = (sim->runMode == SIM_RUNNING) ? 0 : 1;

        /* Get input from VisIt or timeout so the simulation can run. */
    if(sim->par_rank == 0)
      visitstate = VisItDetectInput(blocking, -1);
#ifdef PARALLEL
    MPI_Bcast(&visitstate, 1, MPI_INT, 0, sim->topocomm);
#endif
        /* Do different things depending on the output from VisItDetectInput. */
    switch(visitstate)
      {
      case 0:
        /* There was no input from VisIt, return control to sim. */
        simulate_one_timestep(sim);
      break;
      case 1:
            /* VisIt is trying to connect to sim. */
        if(VisItAttemptToCompleteConnection() == VISIT_OKAY)
          {
          if(1) //!sim.par_rank)
            {
            fprintf(stderr, "VisIt connected\n");
            }
          SetupCallbacks(sim);
          }
        else 
          {
           /* Print the error message */
          char *err = VisItGetLastError();
          fprintf(stderr, "VisIt did not connect: %s\n", err);
          free(err);
          }
      break;
      case 2:
        /* VisIt wants to tell the engine something. */
        if(!ProcessVisItCommand(sim))
          {
          /* Disconnect on an error or closed connection. */
          VisItDisconnect();
          if(!sim->par_rank)
            {
            fprintf(stderr, "VisIt disconnected\n");
            }
          // Start running again if VisIt closes. 
          //runMode = SIM_RUNNING;
          }
      break;
      default:
        fprintf(stderr, "Can't recover from error %d!\n", visitstate);
      break;
      }
#else
    simulate_one_timestep(sim);
#endif
    }
}

#ifdef _VISIT_
void mainloop_batch(simulation_data *sim)
{
    /* Explicitly load VisIt runtime functions and install callbacks. */
    VisItInitializeRuntime();
    SetupCallbacks(sim);

    /* Set up some plots. */
    simulate_one_timestep(sim);

    if(sim->sessionfile != NULL)
    {
        if(VisItRestoreSession(sim->sessionfile) != 0)
        {
            if(sim->par_rank == 0)
            {
                fprintf(stderr, "Could not restore session file %s\n",
                        sim->sessionfile);
            }
            return;
        }
    }
    else
    {
        /* Set up some plots using libsim functions. */
        //VisItAddPlot("Mesh", "mesh");
        VisItAddPlot("Pseudocolor", "temperature");
        VisItAddOperator("Isosurface", 0);
        VisItDrawPlots();
    }

    /* Turn in image saving. */
    sim->savingFiles = 1;

    /* Iterate over time. */
    while (sim->gdel > TOL)
        simulate_one_timestep(sim);
}
#endif

int main(int argc, char *argv[])
{
  int i;
  //int blocking, visitstate, err = 0;
  char *env = NULL;
  double before, during, after, *mem;
  simulation_data sim;
  sim.savingFiles = 0;
  sim.saveCounter = 0;
  sim.batch = 0;
  sim.export = 0;
  sim.sessionfile = NULL;
  sim.par_rank = 0;
  sim.par_size = 1;
#ifdef PARALLEL
  sim.cart_dims[0] = sim.cart_dims[1] = 0;
  int periods[2]={0,0};
  int PartitioningDimension = 2; // want a 2D MPI partitioning. otherwise set to 1.
  int coords[2];
  MPI_Init(&argc, &argv);                       /* starts MPI */
  MPI_Comm_rank(MPI_COMM_WORLD, &sim.par_rank); /* get current process id */
  MPI_Comm_size(MPI_COMM_WORLD, &sim.par_size); /* get # procs from env or */
  if(PartitioningDimension == 1)
    sim.cart_dims[1] = 1;

  MPI_Dims_create(sim.par_size, 2, sim.cart_dims);
  fprintf(stdout,"%d: cart_dims[]= %d, %d\n", sim.par_rank, sim.cart_dims[0], sim.cart_dims[1]);

  if(MPI_Cart_create(MPI_COMM_WORLD, 2, sim.cart_dims, periods, 0, &sim.topocomm) != MPI_SUCCESS)
    sim.topocomm = MPI_COMM_WORLD;

  MPI_Comm_rank(sim.topocomm, &sim.par_rank);
  MPI_Comm_size(sim.topocomm, &sim.par_size);
  MPI_Cart_coords(sim.topocomm, sim.par_rank, 2, coords);

  neighbors(&sim);

  //fprintf(stdout,"%d: Rank_xy[]= %d, %d\n", sim.par_rank, coords[0], coords[1]);
  //fprintf(stdout,"      %2d\n%2d<->[%2d]<->%2d\n      %2d\n", sim.north, sim.west, sim.par_rank, sim.east, sim.south);
  
  sim.rankx = coords[0];
  sim.ranky = coords[1];
#endif

    /* Check for command line arguments. */
  for(i = 1; i < argc; ++i)
    {
        if(strcmp(argv[i], "-batch") == 0)
            sim.batch = 1;
        else if(strcmp(argv[i], "-export") == 0)
            sim.export = 1;
        else if(strcmp(argv[i], "-sessionfile") == 0 && (i+1) < argc)
        {
            sim.sessionfile = strdup(argv[i+1]);
            ++i;
        }
    }

#ifdef _VISIT_
  SimulationArguments(argc, argv);

#ifdef PARALLEL
  char name[128];

  sprintf(name, BASENAME"/jacobi.%03d.txt", sim.par_rank);
  fprintf(stderr, "name = %s\n", name);
  VisItOpenTraceFile(name);
  VisItSetBroadcastIntFunction2(visit_broadcast_int_callback, (void*)&sim);
  VisItSetBroadcastStringFunction2(visit_broadcast_string_callback, (void*)&sim);
  VisItSetParallel(sim.par_size > 1);
  VisItSetParallelRank(sim.par_rank);
  VisItSetMPICommunicator((void *)&sim.topocomm);
#endif

  if(sim.par_rank == 0)
     env = VisItGetEnvironment();

    /* Pass the environment to all other processors collectively. */
  VisItSetupEnvironment2(env);
  if(env != NULL)
    free(env);

  if(!sim.par_rank)
    {
    VisItInitializeSocketAndDumpSimFile(
#ifdef PARALLEL
            "pjacobi",
#else
            "jacobi",
#endif
            "Jacobi solver for Laplace Equation",
            "/path/to/where/sim/was/started",
            NULL, NULL, "/users/jfavre/.visit/simulations/temp.sim2");
    }

#endif

  sim.runMode = SIM_STOPPED;
  sim.m = 56; // mesh size = (m+2)x(m+2) including the bc grid lines

// We make no attempt to check that the number of grid points divides evenly
// with the number of MPI tasks.
// rank 0 will display the bottom (southern) boundary wall
// rank (size-1) will display the top (northern) boundary wall
// if run with m=20 and 4 MPI tasks, we will have 5 grid lines per rank
// and VisIt will display a 22x22 grid
#ifdef PARALLEL
  MPI_Bcast(&sim.m, 1, MPI_INT, 0, sim.topocomm);
  sim.bx = sim.m / sim.cart_dims[0]; // block size in x
  sim.by = sim.m / sim.cart_dims[1]; // block size in y

  MPI_Type_contiguous(sim.bx+1, MPI_DOUBLE, &rowtype); 
  MPI_Type_commit(&rowtype);

  MPI_Type_vector(sim.by, 1, sim.bx+2, MPI_DOUBLE, &coltype); // count, blocklength, stride,
  MPI_Type_commit(&coltype);
#else
  sim.bx = sim.by = sim.m;
#endif
// We use (bx + 2) grid points in the X direction, i.e. interior points plus 2 b.c. points
// We use (by + 2) grid points in the Y direction, i.e. interior points plus 2 b.c. points
  // decompose the domain

  sim.oldTemp = (double *)calloc((sim.bx + 2) * (sim.by + 2), sizeof(double));
  sim.Temp    = (double *)calloc((sim.bx + 2) * (sim.by + 2), sizeof(double));
  sim.cx = (float *)malloc(sizeof(float) * (sim.bx + 2));
  sim.cy = (float *)malloc(sizeof(float) * (sim.by + 2));

  float hsize = 1.0/(sim.m+1.0);

#ifdef PARALLEL
fprintf(stdout,"%d: extents_xy[]= %d, %d, %d, %d\n", sim.par_rank,
sim.rankx * sim.bx, (1+sim.rankx) * sim.bx + 1,
sim.ranky * sim.by, (1+sim.ranky) * sim.by + 1
);
  for(i = 0; i < (sim.bx + 2); i++)
    sim.cx[i] = (i + sim.rankx* sim.bx) * hsize;
  for(i = 0; i < (sim.by + 2); i++)
    sim.cy[i] = ((i + sim.ranky* sim.by) * hsize);
#else
  for(i = 0; i < (sim.bx + 2); i++)
    sim.cx[i] = i  * hsize;
  for(i = 0; i < (sim.by + 2); i++)
    sim.cy[i] = i  * hsize;
#endif

  sim.gdel = 1.0;
  sim.iter = 0;
  set_initial_bc(&sim);
#ifdef PARALLEL
  //if(!sim.par_rank)
    //get_procmem(&before);
#endif

#ifdef _VISIT_
  if(sim.batch)
    mainloop_batch(&sim);
  else
#endif
    mainloop_interactive(&sim);

#ifdef PARALLEL
  if (!sim.par_rank)
#endif
    fprintf(stdout,"Stopped at iteration %d\nThe maximum error = %f\n", sim.iter, sim.gdel);


  // first write a header file in BOV format, to enable reading by VisIt
  const char *fname = BASENAME"/Jacobi";
  FILE * fpbov = fopen(BASENAME"/Jacobi.bov", "w");
  fprintf(fpbov,"TIME: %f\n", 0.0); // dummy value 0.0
  fprintf(fpbov,"DATA_FILE: %s.bin\n", fname);
  fprintf(fpbov,"DATA_SIZE: %d %d %d\n", sim.m+2, sim.m+2, 1); // size of grid in IJK
  fprintf(fpbov,"DATA_FORMAT: DOUBLE\n");
  fprintf(fpbov,"VARIABLE: temperature\n");
  fprintf(fpbov,"DATA_ENDIAN: LITTLE\n");
  fprintf(fpbov,"CENTERING: nodal\n");
  fprintf(fpbov,"BYTE_OFFSET: %d\n", 0); // was 2*(int)sizeof(int));
  fclose(fpbov);

  // first write a header file in XDMF format, to enable reading by ParaView
  FILE * fpxmf = fopen(BASENAME"/Jacobi.xmf", "w");
  fprintf(fpxmf,"<?xml version=\"1.0\" ?>\n");
  fprintf(fpxmf,"<!DOCTYPE Xdmf SYSTEM \"Xdmf.dtd\" []>\n");
  fprintf(fpxmf,"<Xdmf xmlns:xi=\"http://www.w3.org/2003/XInclude\" Version=\"2.2\">\n");
  fprintf(fpxmf,"  <Domain>\n");
  fprintf(fpxmf,"    <Grid Name=\"Jacobi Mesh\" GridType=\"Uniform\">\n");
  fprintf(fpxmf,"      <Topology TopologyType=\"3DCORECTMESH\" Dimensions=\"1 %d %d\"/>\n", sim.m+2, sim.m+2);

  fprintf(fpxmf,"      <Geometry GeometryType=\"ORIGIN_DXDYDZ\">\n");
  fprintf(fpxmf,"         <DataItem Name=\"Origin\" NumberType=\"Float\" Dimensions=\"3\" Format=\"XML\">0. 0. 0.</DataItem>\n");
  fprintf(fpxmf,"         <DataItem Name=\"Spacing\" NumberType=\"Float\" Dimensions=\"3\" Format=\"XML\">1. 1. 1.</DataItem>\n");
  fprintf(fpxmf,"      </Geometry>\n");
  fprintf(fpxmf,"      <Attribute Name=\"Temperature\" Active=\"1\" AttributeType=\"Scalar\" Center=\"Node\">\n");
  fprintf(fpxmf,"          <DataItem Dimensions=\"1 %d %d\" NumberType=\"Float\" Precision=\"8\" Format=\"Binary\">Jacobi.bin</DataItem>\n", sim.m+2, sim.m+2);
  fprintf(fpxmf,"      </Attribute>\n");
  fprintf(fpxmf,"    </Grid>\n");
  fprintf(fpxmf,"  </Domain>\n");
  fprintf(fpxmf,"</Xdmf>\n");
  fclose(fpxmf);

  // second write the result file in binary
#ifdef PARALLEL
  MPIIOWriteData(fname, &sim);
#ifdef ADIOS
  ADIOSWriteData(fname, &sim);
#endif
  MPI_Type_free(&rowtype);
  MPI_Type_free(&coltype);
/*
  if(!sim.par_rank){
    get_procmem(&after);
    fprintf(stderr, "Testing get_procmem... %f %f %f\n",
          before, after, after-before);
    }
*/
  MPI_Barrier(sim.topocomm);
  MPI_Finalize();
#else
  char fname2[256];
  strcpy(fname2, fname);
  strcpy(&fname2[strlen(fname)], ".bin");
  FILE * fp = fopen(fname2, "w");
  int dimuids[2]={sim.m+2, sim.m+2};
  fwrite(dimuids, sizeof(int), 2, fp);
  fwrite(sim.Temp, sizeof(double), (sim.m+2)*(sim.m+2), fp);
  fclose(fp);
#endif

  free(sim.oldTemp);
  free(sim.Temp);
  free(sim.cx);
  free(sim.cy);
  return (0);
}





