/* 
 * Copyright 2008-2009 CAPS entreprise. All rights reserved.
 */

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>

#define TITLE "sgemm2_advancedload-delegatedstore"
#define PLOT "replot"

#include "util.h"
#include "sgemm.h"

// TODO: write codelet directive with label sgemm2:
// - for CUDA target, 
// - with atfirstcall transfer policy for alpha and beta
// - with manual transfer policy for vin1, vin2 and vout
// - with atcall transfer policy for n
// - arguments vin1, vin2 and vout should be mirrored

extern void sgemm( int n, float alpha, const float vin1[n][n], const float vin2[n][n], float beta, float vout[n][n] );

int main(int argc, char **argv) {
  struct timeval start, end;

  if( argc != 4 ) {
    printf( "usage: %s <seed> <from> <to>\n", argv[0] );
    exit(1);
  }
  
  FILE *data_file = fopen( TITLE".dat", "w" );
  if( ! data_file ) {
    perror( TITLE".dat" );
    exit(1);
  }

  int seed = atoi( argv[1] );
  int range_from = atoi( argv[2] );
  int range_to = atoi( argv[3] );

  float alpha, beta, *vin1, *vin2, *vout;
  if( ! init( range_to, seed, &alpha, &beta, &vin1, &vin2, &vout ) ) {
    printf( "Initialization failed.\n" );
    return 1;
  }

  int *sizes = getSizes( range_from, range_to );

  int i = 0;
  while( sizes[i] != 0 ) {
    int size = sizes[i++];
    double best = 0;

		// TODO: write allocate directive for vin1, vin2 and vout

		// TODO: write advancedload directive for vin1,vin2 and vout


    int j;
    for( j = 0 ; j < 2 ; j++ ) {
      double current, t0, t1;
      
      t0 = wallclock();

			// TODO: write callsite directive

      sgemm( size, alpha, vin1, vin2, beta, vout );
      
      t1 = wallclock();
      current = t1 - t0;

      if( best == 0 )
	best = current;
      else if( best > current )
	best = current;
    }
  
	  // TODO: write delegatedstore directive

		// TODO: write free directive for vin1, vin2 and vout


    fprintf( data_file, "%8d %10lf\n", size,  (double)NB_FLOP((long long)size) / (double)best );
    printf("[%4d x %4d] %12f %12f (...) %12f %12f \n", size, size, vout[0], vout[1], vout[size*(size-1) + size-2], vout[size*(size-1) + size-1]);
  }

  fclose( data_file );

  if( ! printGnuplotFile( TITLE".gp", TITLE, PLOT ) )
    return 1;

  free( sizes );
  free( vin1 );
  free( vin2 );
  free( vout );

  return 0;
}
