#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include <Windows.h>

typedef uint32_t COLOR32;

/*
convertRGBToYUV

Description:
	Convert an RGB color to YUV

Parameters:
	r		Red channel
	g		Green channel
	b		Blue channel
	y		Output Y channel
	u		Output U channel
	v		Output V channel
*/
void convertRGBToYUV(double r, double g, double b, double *y, double *u, double *v) {
	*y =  0.2990 * r + 0.5870 * g + 0.1140 * b;
	*u = -0.1684 * r - 0.3316 * g + 0.5000 * b;
	*v =  0.5000 * r - 0.4187 * g - 0.0813 * b;
}

/*
writeBitmap

Description:
	Write pixel data to a 32-bit bitmap file at a given path.

Parameters:
	px		Input pixel array
	width	Image width
	height	Image height
	path	Output image path

*/
void writeBitmap(COLOR32 *px, int width, int height, const char *path) {

	BITMAPFILEHEADER fh = { 0 };
	fh.bfType = 'B' | ('M' << 8);
	fh.bfSize = width * height * 4 + sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER);
	fh.bfOffBits = sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER);

	BITMAPINFOHEADER hdr = { 0 };
	hdr.biSize = sizeof(hdr);
	hdr.biWidth = width;
	hdr.biHeight = height;
	hdr.biPlanes = 1;
	hdr.biBitCount = 32;
	hdr.biCompression = BI_RGB;

	FILE *fp = fopen(path, "wb");
	fwrite(&fh, sizeof(fh), 1, fp);
	fwrite(&hdr, sizeof(hdr), 1, fp);
	for (int y = height - 1; y >= 0; y--) {
		for (int x = 0; x < width; x++) {
			COLOR32 c = px[x + y * width];
			c = (c & 0xFF00FF00) | ((c >> 16) & 0xFF) | ((c & 0xFF) << 16);
			fwrite(&c, sizeof(c), 1, fp);
		}
	}
	fclose(fp);
}

/*
drawPixel

Description:
	Draw a pixel on a given image

Parameters:
	px		Image pixel array
	width	Image width
	height	Image height
	x		Pixel X
	y		Pixel Y
	c		Color to draw the pixel
*/
void drawPixel(COLOR32 *px, int width, int height, int x, int y, COLOR32 c) {
	if (x < 0 || y < 0) return;
	if (x >= width || y >= height) return;
	px[x + y * width] = c;
}

/*
drawBezier

Description:
	px		Image pixel array
	width	Image width
	height	Image height
	x1		Endpoint 1 X
	y1		Endpoint 1 Y
	x2		Endpoint 2 X
	y2		Endpoint 2 Y
	cx		Control point X
	cy		Control point Y
	c		Color of the bezier curve
*/
void drawCircle(COLOR32 *px, int width, int height, int cx, int cy, int a, int r, COLOR32 c) {
	int minY = cy - r, minX = cx - r;
	int maxX = cx + r, maxY = cy + r;
	if (minX < 0) minX = 0;
	if (minY < 0) minY = 0;
	if (maxX >= width) maxX = width - 1;
	if (maxY >= height) maxY = height - 1;

	//draw
	int r2 = r * r;
	for (int x = minX; x <= maxX; x++) {
		for (int y = minY; y <= maxY; y++) {
			int doMultiSample = 0;
			COLOR32 src = px[x + y * width];
			COLOR32 destCol = src;

			//first, rule out the necessity of multisample
			{
				//if 2 greater than radius, don't do anything. If 2 less than radius, set directly.
				int rMin = r - 2, rMax = r + 2;
				int dx = x - cx;
				int dy = y - cy;
				int d2 = dx * dx + dy * dy;
				int rMin2 = rMin * rMin, rMax2 = rMax * rMax;
				if (d2 > rMax2) {
					//do nothing
				} else if (rMin >= 0 && d2 < rMin2) {
					//set directly
					destCol = c;
				} else {
					//need to multisample :(
					doMultiSample = 1;
				}
			}

			//multisample
			if (doMultiSample) {
				int tr = 0, tg = 0, tb = 0, ns = 4;
				int scaleR = r * ns;
				int ns2 = ns * ns;
				int scaleR2 = scaleR * scaleR;
				for (int sy = 0; sy < ns; sy++) {
					for (int sx = 0; sx < ns; sx++) {
						int scaleX = x * ns + sx;
						int scaleY = y * ns + sy;
						int dx = scaleX - (cx * ns);
						int dy = scaleY - (cy * ns);
						int r2 = dx * dx + dy * dy;
						if (r2 <= scaleR2) {
							tr += c & 0xFF;
							tg += (c >> 8) & 0xFF;
							tb += (c >> 16) & 0xFF;
						} else {
							tr += src & 0xFF;
							tg += (src >> 8) & 0xFF;
							tb += (src >> 16) & 0xFF;
						}
					}
				}
				tr = (2 * tr + ns2) / (2 * ns2);
				tg = (2 * tg + ns2) / (2 * ns2);
				tb = (2 * tb + ns2) / (2 * ns2);
				destCol = tr | (tg << 8) | (tb << 16);
			}

			//alpha blend
			if (a == 0) {
				//nothing
			} else if (a == 255) {
				px[x + y * width] = destCol;
			} else {
				int srcR = src & 0xFF;
				int srcG = (src >> 8) & 0xFF;
				int srcB = (src >> 16) & 0xFF;
				int destR = destCol & 0xFF;
				int destG = (destCol >> 8) & 0xFF;
				int destB = (destCol >> 16) & 0xFF;

				destR = (2 * (destR * a + srcR * (255 - a)) + 255) / 510;
				destG = (2 * (destG * a + srcG * (255 - a)) + 255) / 510;
				destB = (2 * (destB * a + srcB * (255 - a)) + 255) / 510;
				px[x + y * width] = destR | (destG << 8) | (destB << 16);
			}
		}
	}

}

/*
drawRandomBezier

Description:
	Draw a bezier curve onto the destination image with random endpoints and
	a random color.

Parameters:
	px		Image pixel array
	width	Image width
	height	Image height
*/
void drawRandomCircle(COLOR32 *px, int width, int height, int *px1, int *py1, int *pa, int *pr, COLOR32 *pcol) {
	COLOR32 c = (rand() & 0xFF) | ((rand() & 0xFF) << 8) | ((rand() & 0xFF) << 16);
	int x = rand() % width, y = rand() % height;
	int r = rand() % ((width + height) / 2);
	int a = rand() % 256;

	drawCircle(px, width, height, x, y, a, r, c);
	*px1 = x; *py1 = y;
	*pr = r;
	*pa = a;
	*pcol = c;
}

/*
colorDifference

Description:
	Compute the difference between two RGB colors

Parameters:
	r1		Color 1 red
	g1		Color 1 green
	b1		Colro 1 blue
	r2		Color 2 red
	g2		Color 2 green
	b2		Color 2 blue
*/
double __inline colorDifference(int r1, int g1, int b1, int r2, int g2, int b2) {
	double dr = r2 - r1;
	double dg = g2 - g1;
	double db = b2 - b1;
	double dy, du, dv;
	convertRGBToYUV(dr, dg, db, &dy, &du, &dv);
	return 4 * dy * dy + du * du + dv * dv;
}

/*
imageDifference

Description:
	Compute the difference between two images.

Parameters:
	px1		Image 1 pixel array
	px2		Image 2 pixel array
	width	Image width
	height	Image height
	maxDiff	Maximum difference to count up to
*/
double imageDifference(COLOR32 *px1, COLOR32 *px2, int width, int height, double maxDiff) {
	int nPx = width * height;
	double diff = 0;
	for (int i = 0; i < nPx; i++) {
		int x = i % width;
		if (x == 0 || x == width - 1) continue;
		int y = i / width;
		if (y == 0 || y == height - 1) continue;

		COLOR32 cc1 = px1[i];
		COLOR32 cc2 = px2[i];
		
		//unpack
		int r1 = cc1 & 0xFF, r2 = cc2 & 0xFF;
		int g1 = (cc1 >> 8) & 0xFF, g2 = (cc2 >> 8) & 0xFF;
		int b1 = (cc1 >> 16) & 0xFF, b2 = (cc2 >> 16) & 0xFF;

		//compute diff
		diff += colorDifference(r1, g1, b1, r2, g2, b2);
		if (diff >= maxDiff) {
			return maxDiff;
		}
	}
	return diff;
}

/*
readBitmap24

Description:
	Read a 24-bit bitmap image to a pixel array

Parameters:
	path	File path of the input bitmap
	pWidth	Output image width
	pHeight	Output image height
*/
COLOR32 *readBitmap24(const char *path, int *pWidth, int *pHeight) {
	FILE *fp = fopen(path, "rb");
	if (fp == NULL) {
		*pWidth = *pHeight = 0;
		return NULL;
	}
	BITMAPFILEHEADER fh;
	BITMAPINFOHEADER bmi;
	fread(&fh, sizeof(fh), 1, fp);
	fread(&bmi, sizeof(bmi), 1, fp);
	if (bmi.biBitCount != 24) {
		*pWidth = *pHeight = 0;
		return NULL;
	}
	*pWidth = bmi.biWidth;
	*pHeight = bmi.biHeight;

	int width = bmi.biWidth, height = bmi.biHeight;
	int stride = (width * 3 + 3) & ~3;
	COLOR32 *px = (COLOR32 *) calloc(width * height, sizeof(COLOR32));
	unsigned char *row = (unsigned char *) calloc(stride, 1);
	for (int y = height - 1; y >= 0; y--) {
		fread(row, stride, 1, fp);
		COLOR32 *rDest = px + y * width;
		for (int i = 0; i < width; i++) {
			rDest[i] = (row[i * 3 + 0] << 16) | (row[i * 3 + 1] << 8) | (row[i * 3 + 2] << 0);
		}
	}
	free(row);
	fclose(fp);
	return px;
}

/*
allocImage

Description:
	Allocate a pixel array for an image

Parameters:
	width	Width of the image
	height	Height of the image
*/
COLOR32 *allocImage(int width, int height) {
	return (COLOR32 *) calloc(width * height, sizeof(COLOR32));
}

/*
imageAverage

Description:
	Compute the average color of an image

Parameters:
	px		Image pixel array
	width	Image width
	height	Image height
*/
COLOR32 imageAverage(COLOR32 *px, int width, int height) {
	int tr = 0, tg = 0, tb = 0;
	int nPx = width * height;
	for (int i = 0; i < nPx; i++) {
		COLOR32 c = px[i];
		tr += c & 0xFF;
		tg += (c >> 8) & 0xFF;
		tb += (c >> 16) & 0xFF;
	}
	tr /= nPx;
	tg /= nPx;
	tb /= nPx;
	return tr | (tg << 8) | (tb << 16);
}

int g_kill = 0;

/*
killerCallback

Description:
	Called in another thread that responds to keyboard input to stop the main
	thread computing

Parameters:
	lpParam	Unused
*/
DWORD CALLBACK killerCallback(LPVOID lpParam) {
	while (1) {
		getchar();
		g_kill = 1;
	}
	return 0;
}

typedef struct CIRCLE_INFO {
	int x;
	int y;
	int r;
	int a;
	COLOR32 color;
} CIRCLE_INFO;

typedef struct WORKSPACE_ {
	COLOR32 **images;
	CIRCLE_INFO *circles;
	COLOR32 *target;
	int width;
	int height;
	int nImages;
	int nImagesPerThread;
	HANDLE *hThreadCompletions;
} WORKSPACE;

typedef struct WORKER_STRUCT_ {
	WORKSPACE *workspace;
	HANDLE hEvent;
	int threadIndex;
	volatile int bestIndex;
	volatile double bestDistance;
} WORKER_STRUCT;

/*
workerthreadProc

Description:
	Function called to do work on the computations in another thread to run
	concurrently.

Parameters:
	lpParam	A pointer to the worker struct of parameters from the main thread
*/
DWORD CALLBACK workerThreadProc(LPVOID lpParam) {
	WORKER_STRUCT *worker = (WORKER_STRUCT *) lpParam;
	WORKSPACE *workspace = worker->workspace;
	int tid = worker->threadIndex;

	int startIndex = tid == 0 ? 1 : (tid * workspace->nImagesPerThread);
	int baseIndex = tid * workspace->nImagesPerThread;
	int nCanvas = workspace->nImagesPerThread;
	int width = workspace->width;
	int height = workspace->height;
	COLOR32 **images = workspace->images;
	COLOR32 *target = workspace->target;

	srand(tid + (int) time(0));
	while (1) {
		//wait for work
		WaitForSingleObject(worker->hEvent, INFINITE);

		//run calculations

		//draw tests
		for (int i = startIndex; i < baseIndex + nCanvas; i++) {
			COLOR32 *px = images[i];
			CIRCLE_INFO *cInfo = workspace->circles + i;
			volatile COLOR32 f = *px;
			drawRandomCircle(px, width, height, &cInfo->x, &cInfo->y, &cInfo->a, &cInfo->r, &cInfo->color);
		}

		//Find best match
		double bestDistance = 1e32;
		int bestImage = baseIndex;
		//if (tid == 0) __debugbreak();
		for (int i = baseIndex; i < baseIndex + nCanvas; i++) {
			double diff = imageDifference(target, images[i], width, height, bestDistance);
			if (diff < bestDistance) {
				bestDistance = diff;
				bestImage = i;
			}
		}

		worker->bestDistance = bestDistance;
		worker->bestIndex = bestImage;

		//signal completion
		SetEvent(workspace->hThreadCompletions[tid]);
	}
}

#define SVG_SWIZZLE(c) (((c)&0xFF00)|(((c)&0xFF)<<16)|(((c)>>16)&0xFF))

void writeBeziers(CIRCLE_INFO *list, int nCircles, COLOR32 bg, int width, int height, const char *path) {
	FILE *fp = fopen(path, "w");
	fprintf(fp, "<svg width=\"%d\" height=\"%d\" xmlns=\"http://www.w3.org/2000/svg\">", width, height);
	fprintf(fp, "<rect width=\"%d\" height=\"%d\" fill=\"#%06X\"/>", width, height, SVG_SWIZZLE(bg));
	fprintf(fp, "<g fill=\"none\" stroke-width=\"1\">");
	char cmdBufferAbsolute[64], cmdBufferRelative[64];
	/*for (int i = 0; i < nBeziers; i++) {
		COLOR32 col = list[i].color;
		col = (col & 0xFF00) | ((col & 0xFF) << 16) | ((col >> 16) & 0xFF);
		int x1 = list[i].x1, y1 = list[i].y1;
		int x2 = list[i].x2, y2 = list[i].y2;
		int cx = list[i].cx, cy = list[i].cy;

		int lenAbsolute = sprintf(cmdBufferAbsolute, "M%d %d Q%d %d %d %d", x1, y1, cx, cy, x2, y2);
		int lenRelative = sprintf(cmdBufferRelative, "M%d %d q%d %d %d %d", x1, y1, cx - x1, cy - y1, x2 - x1, y2 - y1);
		if (lenAbsolute <= lenRelative) {
			fprintf(fp, "<path d=\"%s\" stroke=\"#%06X\"/>",
				cmdBufferAbsolute, col);
		} else {
			fprintf(fp, "<path d=\"%s\" stroke=\"#%06X\"/>",
				cmdBufferRelative, col);
		}
	}*/
	fprintf(fp, "</g></svg>");
	fclose(fp);
}

#define MAXIMUM_ITERATIONS 0x4000

/*
main

Description:
	Main function
*/
int main(int argc, char **argv) {
	if (argc < 2) {
		puts("Usage: CircleTest <input bitmap> [optional iteration count]");
		return 1;
	}

	int width, height;
	COLOR32 *target = readBitmap24(argv[1], &width, &height);
	if (target == NULL) {
		puts("File read error. Make sure the file exists and is a 24-bit bitmap.");
		return 1;
	}
	COLOR32 avg = imageAverage(target, width, height);

	//read iterations
	int nIterations = MAXIMUM_ITERATIONS;
	if (argc >= 3) {
		nIterations = atoi(argv[2]);
	}

	srand((int) time(0));
	int nCanvas = 256;
	COLOR32 **images = (COLOR32 **) calloc(nCanvas, sizeof(COLOR32 *));
	for (int i = 0; i < nCanvas; i++) {
		images[i] = allocImage(width, height);
		for (int j = 0; j < width * height; j++) {
			images[i][j] = avg;
		}
	}

	COLOR32 *best = images[0];
	int nMaxBezier = 4;
	int nIter = 0x1;
	DWORD tid;
	CreateThread(NULL, 0, killerCallback, NULL, 0, &tid);

	//create worker threads
	WORKSPACE workspace = { 0 };
	workspace.images = images;
	workspace.nImages = nCanvas;
	workspace.nImagesPerThread = 32;
	workspace.width = width;
	workspace.height = height;
	workspace.target = target;
	workspace.circles = (CIRCLE_INFO *) calloc(nCanvas, sizeof(CIRCLE_INFO));

	//create buffer to hold result of beziers
	CIRCLE_INFO *circleList = (CIRCLE_INFO *) calloc(nIterations, sizeof(CIRCLE_INFO));
	int nBeziers = 0;

	int nThreads = nCanvas / workspace.nImagesPerThread;
	WORKER_STRUCT *workers = (WORKER_STRUCT *) calloc(nThreads, sizeof(WORKER_STRUCT));
	workspace.hThreadCompletions = (HANDLE *) calloc(nThreads, sizeof(HANDLE));

	for (int i = 0; i < nThreads; i++) {
		workers[i].hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
		workers[i].threadIndex = i;
		workers[i].workspace = &workspace;
		workers[i].bestDistance = 1e32;
		workers[i].bestIndex = 0;
		CreateThread(NULL, 0, workerThreadProc, (LPVOID) &workers[i], 0, &tid);
		workspace.hThreadCompletions[i] = CreateEvent(NULL, TRUE, FALSE, NULL);
	}

	int k = 0;
	while (k < nIterations) {
		k++;

		//wake all threads
		for (int i = 0; i < nThreads; i++) {
			SetEvent(workers[i].hEvent);
		}

		WaitForMultipleObjects(nThreads, workspace.hThreadCompletions, TRUE, INFINITE);
		for (int i = 0; i < nThreads; i++) {
			ResetEvent(workspace.hThreadCompletions[i]);
		}

		//get best distance
		//__debugbreak();
		double bestDistance = 1e32;
		int bestImage = 0;
		for (int i = 0; i < nThreads; i++) {
			if (workers[i].bestDistance < bestDistance) {
				bestDistance = workers[i].bestDistance;
				bestImage = workers[i].bestIndex;
			}
		}
		if (k & 1)
			printf("%04X Distance: %f (%d)\n", k, bestDistance, bestImage);

		//copy best to all
		best = images[bestImage];
		for (int i = 0; i < nCanvas; i++) {
			if (i != bestImage) {
				memcpy(images[i], best, width * height * sizeof(COLOR32));
			}
		}

		//if best is nonzero, then that means a bezier was added
		if (bestImage) {
			CIRCLE_INFO *toAdd = workspace.circles + bestImage;
			CIRCLE_INFO *dest = circleList + nBeziers;
			memcpy(dest, toAdd, sizeof(CIRCLE_INFO));
			nBeziers++;
		}

		if (g_kill) {
			writeBitmap(best, width, height, "out.bmp");
			//writeBeziers(bezierList, nBeziers, avg, width, height, "out.svg");
			g_kill = 0;
		}
	}

	writeBitmap(best, width, height, "out.bmp");
	//writeBeziers(bezierList, nBeziers, avg, width, height, "out.svg");
	return 0;
}
