#
# Copyright 2008-2009 CAPS entreprise. All rights reserved.
#

#************************************************************
# TO modify the range of execution edit the following figures
# IMPORTANT: TO -> 10000000 is the max for the current CUDA generator

# modify the seed to change the matrix initialisation
SEED=2
# modify the FROM to TO to define the range 
# from one step to anoth the rule is multiply by 2
# IMPORTANT: TO -> 10000000 is the max for the current CUDA generator
FROM=16
TO=2000

#************************************************************
# DO NOT EDIT UNDER THAT POINT
#************************************************************

CPU_FREQ=`cat /proc/cpuinfo | grep "^cpu MHz" | head -1 | awk -F': ' '{print $$2}'`

GCC=gcc
CFLAGS=-O3
HMPPCC=hmpp $(GCC) --force
export HMPP_CODELET_COMPILER_CFLAGS=-msse2
export NVCCFLAGS=-O3 -use_fast_math
export HMPPCG_FLAGS=--cuda-block-size 64x1


all: sgemm1.exe sgemm2.exe sgemm3.exe sgemm4.exe
run: sgemm1.run sgemm2.run sgemm3.run sgemm4.run


# HMPP version
sgemm1.exe: sgemm1.c sgemm1-codelet.c
	$(HMPPCC) $(CFLAGS) -o $@ $^


# HMPP version
sgemm2.exe: sgemm2.c sgemm2-codelet.c
	$(HMPPCC) $(CFLAGS) -o $@ $^


# HMPP version
sgemm3.exe: sgemm3.c sgemm3-codelet.c
	$(HMPPCC) $(CFLAGS) -o $@ $^


# HMPP version
sgemm4.exe: sgemm4.c sgemm3-codelet.c
	$(HMPPCC) --codelet-off $(CFLAGS) -o $@ $^


%.run: %.exe
	$(RUNCMD) ./$< $(SEED) $(FROM) $(TO)


display:
	gnuplot -persist *.gp

clean:
	rm -rf *.gp *.dat *~ *.so *.o *.exe *_cuda.cu* *.bic *_sse.c *opencl* *.ptx *.fatbin *.dpil *.hmc* *.gnuplot

