/*
Science and technology promotion license applied.
(c) Zhikai Wang/ www.heteroclinic.net 2013
*/
/* results:
$  g++ -o RecursiveRandomBitFlipPermuationGeneration.exe RecursiveRandomBitFlipPermuationGeneration.cpp
$ ./RecursiveRandomBitFlipPermuationGeneration.exe
It took  25.849 seconds for size=30.
So theoretically for size = 40 (forty bits) will take 25.84 * 1024 seconds, namely ABT 5,000 minutes.
My 6G/Q9000/Vista Cygwin laptop can do it.

You print the result for small value of variable 'size', e.g., 5, 4 etc.
*/

#include <iostream>
#include <cstdlib>
#include <climits>
#include <string.h>
#include <bitset>
#include <time.h>
#include <ctime>

// ULLONG_MAX should be 2^64-1 = 1.8446744073709551615e+19
// 20! is 2.43290200817664e+18
// 21! is 5.109094217170944e+19 //overflow
// So it is not possible to use index permutation, we still need shuffling.
// c is the length
void printULLinBits (unsigned long long start_pattern) ;
void shuffleAnArray (int c, int * myarray) {
	//srand(3);
	int array_i=0; 
	for (array_i=0; array_i < c-1 ; array_i++) {
		int v = rand() % (c -array_i);
		//std::cout<<"v:"<<v<<" "<<"c-array_i:"<<c -array_i<<" ";
		//std::cout<<"v:"<<v<<" ";
		if (v!=0)  {
			int cursor = v+array_i;
			if (cursor >= c)
				continue;
			int tmp = myarray[cursor ];
			myarray[cursor ] = myarray[array_i];
			myarray[array_i] = tmp;
		}
	}
	//std::cout<<std::endl;
}
// assert : position >=1 && position <= 64
// I am not sure about the endianness of bit OPs
inline void flipOneBitofAULL (unsigned long long * p_start_pattern, int position) {
	(*p_start_pattern) ^= 1 << (position-1);
}
// inline void flipOneBitofAULL (unsigned long long * p_start_pattern, int position) {
	// int spi = position;
	// //unsigned long long start_pattern = ULLONG_MAX;
	// unsigned long upper_pattern = 0LL;
	// unsigned long lower_pattern = 0LL;
	// //memcpy(&lower_pattern, &start_pattern, sizeof lower_pattern);
	// //memcpy(&upper_pattern, (char *) &start_pattern + sizeof lower_pattern, sizeof upper_pattern);
	// memcpy(&lower_pattern, p_start_pattern, sizeof lower_pattern);
	// memcpy(&upper_pattern, (char *) p_start_pattern + sizeof lower_pattern, sizeof upper_pattern);
	// if ( spi < 32 ) {
		// lower_pattern &= ~(1 << spi);
		// memcpy(p_start_pattern, &lower_pattern,  sizeof lower_pattern);

	// } else {
		// upper_pattern &= ~(1 << (spi-32));
		// memcpy( (char *) p_start_pattern + sizeof lower_pattern,&upper_pattern, sizeof upper_pattern);
	// }
	// std::cout<<lower_pattern <<std::endl;
	// std::cout<<upper_pattern <<std::endl;
	// std::cout<<*p_start_pattern <<std::endl;
// }
// assert : totaldepth >= currentdepth > 1s
// If you pet something with claw(s), other things' pattern(s) may be changed.
void recursiveRandomBitsFlipPermute(unsigned long long * p_start_pattern, int totaldepth, int currentdepth, int * claw, bool printdeepest  ) {
	
	if ( currentdepth < totaldepth) {
		//std::cout<<"currentdepth "<<currentdepth<<std::endl;
		
		//flip enter recursion
		
		//flipOneBitofAULL (p_start_pattern,claw[currentdepth-1]);
		if (currentdepth > 0 ) {
			//std::cout<<"\t\t"<< * p_start_pattern<<"\t claw"<<claw[currentdepth-1]<<std::endl;
			(*p_start_pattern) ^= 1 << (claw[currentdepth-1]-1);
			//std::cout<<"\t\t"<< * p_start_pattern<<"\t claw"<<claw[currentdepth-1]<<std::endl;
		}		
		
		recursiveRandomBitsFlipPermute(p_start_pattern, totaldepth, currentdepth + 1, claw ,printdeepest);

		//flip back enter recursion
		//flipOneBitofAULL (p_start_pattern,claw[currentdepth-1]);
		if (currentdepth > 0 ) {
			
			//std::cout<<"\t\t"<< * p_start_pattern<<"\t claw"<<claw[currentdepth-1]<<std::endl;
			(*p_start_pattern) ^= 1 << (claw[currentdepth-1]-1);
			//std::cout<<"\t\t"<< * p_start_pattern<<"\t claw"<<claw[currentdepth-1]<<std::endl;
		}		

		recursiveRandomBitsFlipPermute(p_start_pattern, totaldepth, currentdepth + 1, claw ,printdeepest);
		//++currentdepth;
	}
	if( currentdepth == totaldepth) {
		// flip print
		// flip back print
		// no more recursive call

		// std::cout<<"currentdepth "<<currentdepth<<std::endl;
	
		(*p_start_pattern) ^= 1 << (claw[currentdepth-1]-1);
		//printULLinBits (*p_start_pattern);
		if (printdeepest)
			std::cout<<"\t\t\t"<< * p_start_pattern<<std::endl;
		(*p_start_pattern) ^= 1 << (claw[currentdepth-1]-1);
		if (printdeepest)
			std::cout<<"\t\t\t"<< * p_start_pattern<<std::endl;
		//printULLinBits (*p_start_pattern);
	}
}

void printULLinBits (unsigned long long start_pattern) {
	//unsigned long long start_pattern = ULLONG_MAX;
	unsigned long upper_pattern = 0LL;
	unsigned long lower_pattern = 0LL;

	memcpy(&lower_pattern, &start_pattern, sizeof lower_pattern);
	memcpy(&upper_pattern, (char *) &start_pattern + sizeof lower_pattern, sizeof upper_pattern);
	
	std::bitset<32> bsup(upper_pattern);
	std::bitset<32> bslo(lower_pattern);
	std::cout<<bsup <<bslo <<std::endl; 
}

int main () {
	srand((unsigned)time(NULL));
	int size = 20;
	int * array = new int[size];
	
	int i = 0; 
	for (i = 0; i< size; i++ ) {
		array[i] = i+1;
	}
	shuffleAnArray (size, array);
	
	
	
	
	//for (i = 0; i< size; i++ ) {
	//	std::cout<<array[i]<<std::endl;
	//}
	// int * tmp_arr =  new int[size];
	// std::cout<<"(sizeof (int)) * size :"<< ((sizeof (int)) * size)<<std::endl;
	// for (i = 0; i< size; i++ ) {
		// tmp_arr[i]=array[i];
	// }

	// for (i = 0; i< 1024; i++) {
		// memcpy(tmp_arr, array, ((sizeof (int)) * size));
		// // std::cout<<rand() % 3<<std::endl;
		// shuffleAnArray (size, tmp_arr);
		// int tmp_i = i;
		// for (i = 0; i< size; i++ ) {
			// std::cout<<tmp_arr[i]<<" ";
		// }
		// std::cout<<std::endl;
		// i = tmp_i;
	// }


	// unsigned long upper_pattern = 1024-1L;
	// //upper_pattern ^= ~(1 << 5);
	// unsigned int x = 5;
	// upper_pattern ^= 1 << x;
	// std::cout<<upper_pattern<<std::endl;
	// //upper_pattern ^= ~(1 << 5);
	// upper_pattern ^= 1 << x;
	// std::cout<<upper_pattern<<std::endl;


	// //!!!! DO NOT DELELET The following is the correct way to toggle ULL bit
	// //But the least bit is 0 not 1.
	// //???? hope c++ will deal with the endianness
	// unsigned long long start_pattern = ULLONG_MAX;
	// printULLinBits (start_pattern );
	// start_pattern ^= 1ULL << 10;
	// printULLinBits (start_pattern );
	// start_pattern ^= 1ULL << 40;
	// printULLinBits (start_pattern );
	// start_pattern ^= 1ULL << 40;
	// printULLinBits (start_pattern );
	// start_pattern ^= 1ULL << 63;
	// printULLinBits (start_pattern );
	
	//unsigned long long start_pattern = ULLONG_MAX;
	unsigned long long start_pattern = 0ULL;
	//recursiveRandomBitsFlipPermute(unsigned long long * p_start_pattern, int totaldepth, int currentdepth, int * claw ) 
	
	
	clock_t start;
	double diff;
    clock_t endd = std::clock();

    start = std::clock();
	recursiveRandomBitsFlipPermute(&start_pattern, size, 1, array,false ) ;
	
   endd = std::clock();
    diff = ( endd - start ) / (double)CLOCKS_PER_SEC;

    std::cout<<"It took  "<<diff <<" seconds for size="<<size<<"." <<std::endl;	
	return 0;
}