/*
 * (aio-lio-)ent: computes the binary entropy of a file using the Markov Model
 * (<http://en.wikipedia.org/wiki/Information_entropy#Data_as_a_Markov_process>).
 *
 * Loic Tortay, 2012.
 *
 */

#include <sys/stat.h>

#include <aio.h>
#include <errno.h>
#include <fcntl.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "errwarn.h"

const char	progname[] = "aio-lio-ent";
const int	max_inflight = 1024U;
const int	default_inflight = 4;
const size_t	default_bsize = 1048576U;

void usage(FILE *);

void usage(FILE *fp)
{
	fprintf(fp,
"\nComputes the binary entropy of a file using the Markov model.\n"
"\nUsage:\n"
"%s [-B bsize] [-N ios_in_flight] filename\n"
"\nWhere:\n"
" -B bsize: set the read block size to bsize bytes, default is minimum between:\n"
"    %lu bytes & file size.\n"
" -N ios_in_flight: number of I/Os \"in flight\" i.e. running concurrently\n"
"    (min: 1, default: %d, max: %d)\n",
	    progname, (unsigned long) default_bsize, default_inflight,
	    max_inflight);

	exit(1);
}


int main(int argc, char *argv[])
{
	size_t		 stats[256];
	struct stat	 st;
	struct aiocb	*inflights[max_inflight];
	char		*filename = NULL;
	size_t		 toread = 0, bytesread = 0, currbyte, bsize;
	ssize_t		 nr = 0;
	int	 	 fd = -1, i = -1, ninflight = 0;
	int		 ch = -1, opt_bsize = 0, opt_ninflight = 0, ret = -1;
	/* Variables for the actual processing. */
	double		 H = 0.0, Pi = 0.0, dsize = 0.0;
	int		 n = 0;

	/* Parse arguments */
	while ((ch = getopt(argc, argv, ":B:N:")) != -1) {
		switch (ch) {
		case 'B':
			opt_bsize = atoi(optarg);
			break;
		case 'N':
			opt_ninflight = atoi(optarg);
			break;
		default:
			usage(stderr);
			break;
		}
	}
	if (optind != (argc - 1)) {
		warning(-1, "File name required");
		usage(stderr);
	}
	filename = argv[optind];

	/* Open target file */
	fd = open(filename, O_RDONLY, 0);
	if (fd == -1)
		error(1, errno, "Unable to open '%s'", filename);

	/* Get target file size */
	memset(stats, 0, sizeof(stats));
	if (fstat(fd, &st) == -1)
		error(1, errno, "Unable to stat '%s'", filename);

	/* Make sure target is a file */
	if (!S_ISREG(st.st_mode))
		error(1, -1, "'%s' is not a regular file", filename);

	toread = (size_t) st.st_size;
	dsize = (double) toread;

	/* Set I/O block size */
	if (opt_bsize != 0)
		bsize = (size_t) opt_bsize;
	else
		bsize = default_bsize;

	/* */
	if (opt_ninflight) {
		if (opt_ninflight > 0 && opt_ninflight <= max_inflight)
			ninflight = opt_ninflight;
		else
			error(1, -1, "Invalid number of I/Os in flight: %d "
			    "(min: 1, max: %d)\n", opt_ninflight,
			    max_inflight);
	} else
		ninflight = default_inflight;

	/*
	 * Count characters occurences (P[i]).
	 */
	while (toread > 0) {
		if (toread < bsize)
			bsize = toread;

		if (toread < (bsize * ninflight)) {
			/* Reduce ninflight -> 1 */
			ninflight = (toread / bsize) + 1;
		}

		/* Prepare AIO control blocks for all I/Os */
		memset(inflights, 0, sizeof(inflights));
		for (i = 0; i < ninflight; i++) {
			if (inflights[i] == NULL) {
				inflights[i] = malloc(sizeof(struct aiocb));
				if (inflights[i] == NULL)
					error(1, errno, "Unable to allocate "
					    "memory for inflight[%d]", i);

				memset(inflights[i], 0, sizeof(struct aiocb));
				inflights[i]->aio_buf = malloc(bsize);
				if (inflights[i]->aio_buf == NULL)
					error(1, errno, "Unable to allocate "
					    "memory for inflight[%d] aio_buf", i);

				memset((void *)inflights[i]->aio_buf, 0, bsize);
				inflights[i]->aio_fildes = fd;
				inflights[i]->aio_lio_opcode = LIO_READ;
				inflights[i]->aio_nbytes = bsize;
			}
			inflights[i]->aio_offset = bytesread
				+ ((off_t) i) * bsize;
		}
		ret = lio_listio(LIO_WAIT, inflights, ninflight, NULL);
		if (ret == -1)
			error(1, errno, "Error submitting round %d of I/Os on "
			    "'%s' at offset %lu", i, filename,
			    (unsigned long) bytesread);

		for (i = 0; i < ninflight; i++) {
			nr = aio_return(inflights[i]);
			if (nr == -1)
				warning(errno, "AIO read failed for '%s' at "
				    "offset %lu", filename,
				    (unsigned long) bytesread);
			else {
				bytesread += (size_t) nr;
				toread -= (size_t) nr;
			}
			for (currbyte = 0; currbyte < (size_t) nr; currbyte++)
				stats[(int) ((unsigned char*)
					inflights[i]->aio_buf)[currbyte]]++;
		}
	}

	if (close(fd) == -1)
		warning(errno, "Problem closing '%s'", filename);

	/* Free previously allocated memory */
	for (i = 0; i < ninflight; i++) {
		free((void *)inflights[i]->aio_buf);
		free(inflights[i]);
		inflights[i] = NULL;
	}

	/*
	 * Now compute the result:
	 *  H(S) = -sum{i = 0 -> N} (P[i] * log2(P[i])
	 */
	for (i = 0; i < 256; i++) {
		if (stats[i] > 0) {
			Pi = ((double) stats[i]) / dsize;
			H += Pi * log2(Pi);
			n++;
		}
	}
	H = -H;
	printf("H('%s') = %.2lf bits (n = %d, avg(H): %.2f bits)\n", filename,
	    dsize * H, n, H);

	return (0);
}

