/*
 * A Red-Black SOR
 *
 *	using separate red and block matrices
 *	to minimize false sharing
 *
 * Solves a M+2 by 2N+2 array
 */
#include <stdio.h>

#include <sys/time.h>

struct	timeval	start, finish;

#include <Tmk.h>

/*
 * To begin statistics collection after the first iteration
 * has completed use the following #define
 *
#define	RESET_AFTER_ONE_ITERATION
 */
int	iterations = 10;

int	M = 2000;
int	N = 500;	/* N.B. There are 2N columns. */

float **red_;
float **black_;

/*
 * begin is odd
 */
void	sor_odd(begin, end)
	int	begin;
	int	end;
{
	int	i, j, k;

	for (i = 0; i < iterations; i++) {

		for (j = begin; j <= end; j++) {

			for (k = 0; k < N; k++) {

				black_[j][k] = (red_[j-1][k] + red_[j+1][k] + red_[j][k] + red_[j][k+1])/(float) 4.0;
			}
			if ((j += 1) > end)
				break;

			for (k = 1; k <= N; k++) {

				black_[j][k] = (red_[j-1][k] + red_[j+1][k] + red_[j][k-1] + red_[j][k])/(float) 4.0;
			}
		}
		Tmk_barrier(0);

		for (j = begin; j <= end; j++) {

			for (k = 1; k <= N; k++) {

				red_[j][k] = (black_[j-1][k] + black_[j+1][k] + black_[j][k-1] + black_[j][k])/(float) 4.0;
			}
			if ((j += 1) > end)
				break;

			for (k = 0; k < N; k++) {

				red_[j][k] = (black_[j-1][k] + black_[j+1][k] + black_[j][k] + black_[j][k+1])/(float) 4.0;
			}
		}				
		Tmk_barrier(0);
#ifdef	RESET_AFTER_ONE_ITERATION
		if (i == 0) {

			bzero(&Tmk_stat, sizeof(Tmk_stat));

			gettimeofday(&start, NULL);
		}
#endif
	}
}

/*
 * begin is even
 */
void	sor_even(begin, end)
	int	begin;
	int	end;
{
	int	i, j, k;

	for (i = 0; i < iterations; i++) {

		for (j = begin; j <= end; j++) {

			for (k = 1; k <= N; k++) {

				black_[j][k] = (red_[j-1][k] + red_[j+1][k] + red_[j][k-1] + red_[j][k])/(float) 4.0;
			}
			if ((j += 1) > end)
				break;

			for (k = 0; k < N; k++) {

				black_[j][k] = (red_[j-1][k] + red_[j+1][k] + red_[j][k] + red_[j][k+1])/(float) 4.0;
			}
		}
		Tmk_barrier(0);

		for (j = begin; j <= end; j++) {

			for (k = 0; k < N; k++) {

				red_[j][k] = (black_[j-1][k] + black_[j+1][k] + black_[j][k] + black_[j][k+1])/(float) 4.0;
			}
			if ((j += 1) > end)
				break;

			for (k = 1; k <= N; k++) {

				red_[j][k] = (black_[j-1][k] + black_[j+1][k] + black_[j][k-1] + black_[j][k])/(float) 4.0;
			}
		}				
		Tmk_barrier(0);
#ifdef	RESET_AFTER_ONE_ITERATION
		if (i == 0) {

			bzero(&Tmk_stat, sizeof(Tmk_stat));

			gettimeofday(&start, NULL);
		}
#endif
	}
}

extern	char	       *optarg;

main(argc, argv)
	int		argc;
	char	       *argv[];
{
	int		c, i, j;
	int		begin, end;

	while ((c = getopt(argc, argv, "i:m:n:")) != -1)
		switch (c) {
		case 'i':
			iterations = atoi(optarg);
			break;
		case 'm':
			M = atoi(optarg);
			break;
		case 'n':
			N = atoi(optarg);
			break;
		}
	Tmk_startup(argc, argv);

	if (Tmk_proc_id == 0) {

		if ((red_ = (float **) Tmk_malloc((M + 2)*sizeof(float *))) == 0)
			Tmk_errexit("out of shared memory");

		if ((black_ = (float **) Tmk_malloc((M + 2)*sizeof(float *))) == 0)
			Tmk_errexit("out of shared memory");

		for (i = 0; i <= M + 1; i++) {

			if ((red_[i] = (float *) Tmk_malloc((N + 1)*sizeof(float))) == 0)
				Tmk_errexit("out of shared memory");

			if ((black_[i] = (float *) Tmk_malloc((N + 1)*sizeof(float))) == 0)
				Tmk_errexit("out of shared memory");

			if ((i == 0) || (i == M + 1))
				for (j = 0; j <= N; j++)
					red_[i][j] = black_[i][j] = 1.0;
			else if (i & 1) {
				red_[i][0] = 1.0;
				black_[i][N] = 1.0;
			}
			else {
				black_[i][0] = 1.0;
				red_[i][N] = 1.0;
			}
		}
		Tmk_distribute((char *)&red_, sizeof(red_));

		Tmk_distribute((char *)&black_, sizeof(black_));
	}
	begin = (M*Tmk_proc_id)/Tmk_nprocs + 1;
	end   = (M*(Tmk_proc_id + 1))/Tmk_nprocs;

	if (Tmk_proc_id == 0)
		printf("Running %d iterations over a %d by %d array\n", iterations, M, 2*N);

	Tmk_barrier(0);

	gettimeofday(&start, NULL);

	if (begin & 1)
		sor_odd(begin, end);
	else
		sor_even(begin, end);

	gettimeofday(&finish, NULL);

	printf("Elapsed time: %.2f seconds\n",
	       (((finish.tv_sec * 1000000.0) + finish.tv_usec) -
		((start.tv_sec * 1000000.0) + start.tv_usec)) / 1000000.0);

	Tmk_exit(0);
}