#include <windows.h>
#include <time.h>
//#include "Thread.h"
#include <windows.h>
#include <stdio.h>

#include <windows.h>
#include <tchar.h>
#include <strsafe.h>

//for timer
#include <iostream>
#include <cstdio>
#include <ctime>
#include <list>

#define MAX_THREADS 16
// 4 threads
//Total time elapsed:   11.27700
// 8 threads
//Total time elapsed:   10.67000
//16 threads
//Total time elapsed:   10.79100


#define BUF_SIZE 255




///////////////////////////////////////////
inline void parsePermutation (int n, int m, int * rindx,int *results) {
	int data[20] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};

	for (int i=0; i<m;++i) {
		int p = rindx[i];
		results[i] = data[p];
		for(int j=p; j<n-i; ++j)
			data[j] = data[j+1];
	}
}



inline void getXs(int n,int *inputs, int * rs) {
	rs[10]=inputs[0], rs[11]=inputs[1], rs[14]=inputs[2], rs[15]=inputs[3], 
		rs[16]=inputs[4], rs[18]=inputs[5], rs[19]=inputs[6];
	rs[17] = 38 -rs[18] -rs[19];
	rs[13] = 38 - rs[14] - rs[15] - rs[16];
	rs[12] = 38 - rs[16] - rs[19];
	rs[8] = 38 -rs[13] - rs[17];
	rs[9] = 38 - rs[8] - rs[10] - rs[11] - rs[12];
	rs[7] = 38 -rs[11] -rs[15] -rs[18];
	rs[3] = 38 - rs[7] -rs[12];
	rs[6] = 38 - rs[3] - rs[10]- rs[14]- rs[17];
	rs[5] = rs[6] -rs[9] +rs[11] -rs[13] +rs[16];
	rs[4] = 38 -rs[9] -rs[14] -rs[18];
	rs[1] = 38 -rs[5] -rs[10] -rs[15] -rs[19];
	rs[2] = 38-rs[1]-rs[3];
}




bool testXs(int n, int * x, int * rs ) {

	int tmp[20] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
		-1, -1, -1, -1, -1, -1 };

	int i = 0;
	bool tmpflag = true;

	for (i = 1; i<= n ;i++) {
		if ( x[i] <= n && x[i] >= 1) {
			if (tmp[ x[i] ] == -1) {
				tmp[ x[i] ] = x[i];
			} else {
				tmpflag = false;
				break;
			}
		} else {
			tmpflag = false;
			break;
		}
	}

	if (tmpflag == false) 
		return false;

	for (i = 1; i<= n ;i++) {
		if (tmp[ i ] == -1 || tmp[ i ] != i) {
			tmpflag = false;
			break;
		}
		rs[i-1]= x[i];
	}

	return tmpflag;
}

inline void createIndices(int n, int m, int k, int * rindx) {
	int i = 0,dsi =1,j=0;
	for (i=0; i<m; i++) {
		dsi = (k) %(n-i);
		if (dsi==0)
			dsi = n-i;
		rindx[i] = dsi;
		if (k<= n-i) {
			for (j = i+1;j<m;j++)
				rindx[j] = 1;
			break;
		}
		//k = (k-1) /(n-i+1);
		k = (k-1)/(n-i)+1;
	}
}

inline void increaseIndices(int n, int m, int k, int * rindx) {
	int i = 0,dsi =1,j=0;
	for (i=0; i<m; ++i) {
		++rindx[i];

		if (rindx[i] <= n-i) return;

		rindx[i] -= (n-i); 

		// carry forward
		for(j=i+1; j<m;++j) {
			++rindx[j]; 
			if (rindx[j] <= (n-j)) return;
			rindx[j] -= (n-j);
		}
	}
}
/////////////////////////////////////////

DWORD WINAPI MyThreadFunction( LPVOID lpParam );
void ErrorHandler(LPTSTR lpszFunction);

// Sample custom data structure for threads to use.
// This is passed by void pointer so it can be any data type
// that can be passed using a single void pointer (LPVOID).
typedef struct MyData {
	//int val1;
	//int val2;
	int m;
	int n;
	int startIndex;
	int endIndex;
} MYDATA, *PMYDATA;


int _tmain()
{
	PMYDATA pDataArray[MAX_THREADS];
	DWORD dwThreadIdArray[MAX_THREADS];
	HANDLE hThreadArray[MAX_THREADS];

	std::clock_t start;
	double diff;
	int i;



	start = std::clock();
	diff = ( std::clock() - start ) / (double)CLOCKS_PER_SEC;
	printf ("\nTotal time elapsed: %10.5f",diff);

	// Create MAX_THREADS worker threads.
	int total = 1,m=7,n=19;
	int j=0;
	total = 1;
	for (i = 1; i<= m; i++) {
		total *= (n-i+1);
	}
	int segs = total/(MAX_THREADS);
	for( i=0; i<MAX_THREADS; i++ )
	{
		// Allocate memory for thread data.

		pDataArray[i] = (PMYDATA) HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
			sizeof(MYDATA));

		if( pDataArray[i] == NULL )
		{
			// If the array allocation fails, the system is out of memory
			// so there is no point in trying to print an error message.
			// Just terminate execution.
			ExitProcess(2);
		}

		// Generate unique data for each thread to work with.

		//pDataArray[i]->val1 = i*10000000+1;
		//pDataArray[i]->val2 = (i+1)*10000000-1;

		pDataArray[i]->m = m;
		pDataArray[i]->n = n;
		if (i< MAX_THREADS -1) {
			pDataArray[i]->startIndex= i*segs +1;
			pDataArray[i]->endIndex = (i+1)*segs;
		} else {
			pDataArray[i]->startIndex= i*segs +1;
			pDataArray[i]->endIndex = total;
		}

		// Create the thread to begin execution on its own.

		hThreadArray[i] = CreateThread(
			NULL, // default security attributes
			0, // use default stack size
			MyThreadFunction, // thread function name
			pDataArray[i], // argument to thread function
			0, // use default creation flags
			&dwThreadIdArray[i]); // returns the thread identifier


		// Check the return value for success.
		// If CreateThread fails, terminate execution.
		// This will automatically clean up threads and memory.

		if (hThreadArray[i] == NULL)
		{
			ErrorHandler(TEXT("CreateThread"));
			ExitProcess(3);
		}
	} // End of main thread creation loop.

	// Wait until all threads have terminated.

	WaitForMultipleObjects(MAX_THREADS, hThreadArray, TRUE, INFINITE);

	// Close all thread handles and free memory allocations.

	for(int i=0; i<MAX_THREADS; i++)
	{
		CloseHandle(hThreadArray[i]);
		if(pDataArray[i] != NULL)
		{
			HeapFree(GetProcessHeap(), 0, pDataArray[i]);
			pDataArray[i] = NULL; // Ensure address is not reused.
		}
	}
	diff = ( std::clock() - start ) / (double)CLOCKS_PER_SEC;
	printf ("\nTotal time elapsed: %10.5f",diff);

	return 0;
}


DWORD WINAPI MyThreadFunction( LPVOID lpParam )
{
	HANDLE hStdout;
	PMYDATA pDataArray;

	TCHAR msgBuf[BUF_SIZE];
	size_t cchStringSize;
	DWORD dwChars;

	// Make sure there is a console to receive output results.

	hStdout = GetStdHandle(STD_OUTPUT_HANDLE);
	if( hStdout == INVALID_HANDLE_VALUE )
		return 1;

	// Cast the parameter to the correct data type.
	// The pointer is known to be valid because
	// it was checked for NULL before the thread was created.

	pDataArray = (PMYDATA)lpParam;

	// Print the parameter values using thread-safe functions.

	int m = pDataArray->m;
	int n = pDataArray->n;
	int startIndex = pDataArray->startIndex;
	int endIndex = pDataArray->endIndex;

	int indxs [ 8] ;
	int results[8];
	int results2[20];
	int x[20];
	bool b = false;
	int i = 0;
	int segs = (endIndex- startIndex)/10;
	int segk=0;
	StringCchPrintf(msgBuf, BUF_SIZE, TEXT("Thread %d progress 0/10\n"),
		GetCurrentThreadId());
	StringCchLength(msgBuf, BUF_SIZE, &cchStringSize);
	WriteConsole(hStdout, msgBuf, (DWORD)cchStringSize, &dwChars, NULL);

	int pid = GetCurrentThreadId();

#define _NEW_STUFF
#ifdef _NEW_STUFF
	createIndices(n,m,startIndex,indxs);
	indxs[0] --;
	for (i =startIndex; i<=endIndex;++i) {
		increaseIndices(n,m,i, indxs) ; 
#else
	for (i =startIndex; i<=endIndex;i++) {
		createIndices(n,m,i, indxs) ; 
#endif
		parsePermutation (n,m,indxs,results);
		getXs(n,results, results2);
		b = testXs(n, results2, x ) ;

		if (b ) {
			//printf("\nTrue %d",i);
			//printf("\nResults");
			//printxs(n-1,results3);
			StringCchPrintf(msgBuf, BUF_SIZE, TEXT("Thread %d ,Solution at index %d:\n%d,%d,%d\n%d,%d,%d,%d\n%d,%d,%d,%d,%d\n%d,%d,%d,%d\n%d,%d,%d\n"),
				GetCurrentThreadId(),i,x[0],x[1],x[2],x[3],x[4],x[5],x[6],x[7],x[8],x[9],x[10],x[11],x[12],x[13],x[14],x[15],x[16],x[17],x[18]);
					StringCchLength(msgBuf, BUF_SIZE, &cchStringSize);
					WriteConsole(hStdout, msgBuf, (DWORD)cchStringSize, &
						dwChars,
						NULL);

		}

		if (++segk == segs ) {
			segk=0;
			//printf ("\nProgress %d:",i);
			StringCchPrintf(msgBuf, BUF_SIZE, TEXT("Thread %d progress %d/10 \n"),
				GetCurrentThreadId(),i/segs);
			StringCchLength(msgBuf, BUF_SIZE, &cchStringSize);
			WriteConsole(hStdout, msgBuf, (DWORD)cchStringSize, &dwChars,
				NULL);
		}
	}
	StringCchPrintf(msgBuf, BUF_SIZE, TEXT("Thread %d progress 10/10\n"),
		GetCurrentThreadId());
	StringCchLength(msgBuf, BUF_SIZE, &cchStringSize);
	WriteConsole(hStdout, msgBuf, (DWORD)cchStringSize, &dwChars, NULL);

	return 0;
}

void ErrorHandler(LPTSTR lpszFunction)
{
	// Retrieve the system error message for the last-error code.
	LPVOID lpMsgBuf;
	LPVOID lpDisplayBuf;
	DWORD dw = GetLastError();

	FormatMessage(
		FORMAT_MESSAGE_ALLOCATE_BUFFER |
		FORMAT_MESSAGE_FROM_SYSTEM |
		FORMAT_MESSAGE_IGNORE_INSERTS,
		NULL,
		dw,
		MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
		(LPTSTR) &lpMsgBuf,
		0, NULL );

	// Display the error message.

	lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
		(lstrlen((LPCTSTR) lpMsgBuf) + lstrlen((LPCTSTR) lpszFunction) + 40)
		* sizeof(TCHAR));
	StringCchPrintf((LPTSTR)lpDisplayBuf,
		LocalSize(lpDisplayBuf) / sizeof(TCHAR),
		TEXT("%s failed with error %d: %s"),
		lpszFunction, dw, lpMsgBuf);
	MessageBox(NULL, (LPCTSTR) lpDisplayBuf, TEXT("Error"), MB_OK);



	// Free error-handling buffer allocations.

	LocalFree(lpMsgBuf);
	LocalFree(lpDisplayBuf);
}
