// quine.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include "..\..\daves.h" // for and, or, etc.

/*
int main(int argc, char* argv[])
{
	printf("Hello World!\n");
	return 0;
}
*/

const char *
putstring( const char * c ){
	while( *c ){ // for each letter
		putchar(*c++);
	};
	return(c);
}


/*
DAV:
quine that doesn't use printf(), only uses putchar().


2 different philosophies:
the information that is accessible to the program at runtime
needs to be traversed twice:
-- once to replicate the dna string
-- once to regenerate the rest of the program.

Since I'm pretty sure it's impossible to make these identical in C,
our choices become
(a) make the dna string trivial to replicate with
 printf("%s", dna); aka putstring(dna)
 i.e., make sure the dna string does *not*
 contain any of the special characters
 that the compiler does not copy directly to the executable.
 This means the "regenerate rest of program"
 needs to interpret the string somehow
 to restore any special programs needed for regeneration.
 We needed to interpret the
 "self-referential particle"
 anyway.
 Traditionally the '%' character is used
 in the run-time data (for printf() etc.)
 to (escape) indicate something special to be interpreted;
 the "%%" indicates a literal '%' to be emitted.
 (the backslash '\\' is used in the source file text
 to escape something special)
(b) make the rest of the program trivial to replicate with
 something like
 printf("%s", dna); aka putstring(dna)
 (this won't work exactly because
 we need to interpret the self-referential particle).
 This seems to be the most general-purpose idea;
 it allows *any* file containing *any* byte
 (except for the 0x00 byte ?)
 to be embedded in the executable file,
 and its seems elegant to have
 that file exactly copied into the executable
 without worrying about escapes.
(c) some sort of hybrid,
 where we do (slightly different) interpretation both ways.


case (a):
  
DAV:
If we limit ourselves to dna strings
that look identical in the source code (between quotes)
 dna = "";
as they do when printed with 
 printf("%s", dna); aka putstring(dna) // introduces the 0x00 limitation
then some things
cannot be represented literally inside the dna string:

	dna itself (think about it ... this is true for *all* quines in all finite languages)
	double quotes.
	newlines.
	backslashes.
	characters that are (!printable()), including 0x00 (I don't have to worry about this).
	And, depending on our encoding scheme,
	whatever other character we use as an escape character
	whenever we want to represent
	one of the above items.

*/

/*
DAV:
However, we can break that restriction.
If so, then the dna string becomes much more difficult to reconstruct ...

		// There exists one string that 
		// cannot be represented literally inside dna:
		// dna itself (think about it ...).

	// In the executable, the dna string can hold *any* character
	// except \x00.
	// (It can even hold \x00 if we use some other end-of-string indicator --
	// either literal length, or some other end-of-string like "...\x00" (double zeros).
	// However, these characters are represented differently in the source code:
		// quotes.
		// backslashes.
		// newlines.
		// tabs.
		// unprintable characters (which I don't have to worry about).
		// And, depending on our encoding scheme,
		// whatever other character we use as an escape character.
		// (I think I *will* use a unprintable character
		// as the escape character to print the dna string ...).


*/
		// isspace() necessary to catch newlines and tabs.
				//case '\\': b(); b(); break;
				//case '\t': b(); p('t'); break;
/*
//#include <ctype.h> // for isprint().
                                                           6         7         8
12345678901234567890123456789012345678901234567890123456789012345678901234567890
*/
#include <stdio.h>
#include <ctype.h>

void p(const char c){putchar(c);}
void b(void){p('\\');}
const char q = '"';
char * dna = // embed the entire file literally into the executable, except for the self-referential particle.
"#include <stdio.h>\n"
"#include <ctype.h>\n"
"\n"
"void p(char c){putchar(c);};void b(void){p('\\');}char q = '\"';char * dna =\n"
"\"\a\";\n"
"\n"
"int main(void){\n"
"	char * s = dna; char * b;\n"
"	do{\n"
"		if( isprint(*s) || isspace(*s) ){ p(*s);\n"
"		}else{\n"
"			char * b = dna; int i = 0;\n"
"			do{\n"
"				switch(*b){\n"
"				case '\n': b(); p('n'); p(q); p(*b); p(q); break;\n"
"				case '\\': p(*b); p(*b);\n"
"				case q: b(); p(*b); break;\n"
"				default: putchar(*b);\n"
"				};\n"
"			}while(*++b);\n"
"		};\n"
"	}while(*++s);\n"
"	return(0);\n"
"}\n";

// I picked \a as the self-referent particle, because
// it was not a common character ...
// I could have picked any character that didn't occur in the source,
// say \x7f ... or perhaps even a 2 character sequence.

int main(void){
	char * s = dna;
	do{
		if( *s != '\a' ){
			p(*s);
		}else{
			// special case: this is the self-referent particle.
			// Reconstruct the DNA string, adding the appropriate C escape codes.
			char * s = dna; int i = 0; // a *different* s ... confusing. Sorry.
			do{
				switch(*s){
				case '\n': b(); p('n'); p(q); p(*s); p(q);
					break;
				case '\t': b(); p('t'); // not really nesssary -- could let default handle.
					break;
				case '\a': b(); p('a'); // self-referent particle -- could let default handle ?
					break;
				case '\\': p(*s); p(*s); // or  b(); p(*s); or  b();  b();
					break;
				case q: b(); p(*s);
					break;
				default: // p(*b); // all I need for quine.
					if( isprint(*s) || isspace(*s) ){
						p(*s);
					}else{
						// this isn't really needed for quine ...
						b();p('x'); p( (*s)/0x10 ); p( (*s)%0x10 );
						// I prefer hex over octal ...
						p(q); p(q); // add 2 quotes to avoid compiler wierdness when the *next* letter looks like another hex digit.
					};
					break;
				};
			}while(*++s);
		};
	}while(*++s);
	return(0);
}
/*
"#include <stdio.h>\n"
"#include <ctype.h> // for isprint().\n"
"\n"
"void p(char c){putchar(c);};\n"
"void b(void){p('\\\\');}\n"
"\n"
"char * dna =\n"
" \"\x01\";\n"
"\n"
"void quote_dna_string(void){\n"
"  char * s = dna;\n"
"  do{\n"
"  switch(*s){\n"
"  case  '\n':\n"
"  b();  p('n');\n"
"  p('\"');\n"
"  p(*s);\n"
"  p('\"');\n"
"  break;\n"
"  case  '\\':\n"
"  b();  b();\n"
"  break;\n"
"  case  '\"':\n"
"  b();  p(*s);\n"
"  break;\n"
"  case  '\t':\n"
"  b();  p('t');\n"
"  break;\n"
"  default:\n"
"  putchar(*s);\n"
"  };\n"
"  }while(*++s);\n"
"}\n"
"\n"
"int main(void){\n"
"\n"
"  char * s = dna;\n"
"  do{\n"
"    if(isprint(*s)){\n"
"      p(*s);\n"
"    }else{\n"
"      quote_dna_string();\n"
"    };\n"
"\n"
"	}while(*++s);\n"
"	return(0);\n"
"}\n";
*/


/*
make dna string easy to copy.
use "%" followed by a number to indicate one of the special characters
that cannot be embedded in the dna string.
use "%%" to indicate a literal "%".

  %0
	dna itself (think about it ... this is true for *all* quines in all finite languages)
  %1
	double quotes.
  %2
	newlines. (?)
  %3
	backslashes.
  %x00
	characters that are (!printable()), including 0x00 (I don't have to worry about this).


*/
const char * dna4 =
"-----------------------"
"---------%2------------"
"%1%0%1"// self-referential particle
"--------------------- return(0);%2}%2";

int main4(void){
	char * s = dna;
	do{
		if( *s != '%' ){
			p(*s);
		}else{
			// *s == '%'; check out the next character
			// to figure out what to do.
			s++;
			// special cases
			switch(*s){
			case '1': // double quotes.
				putchar('"');
				break;
			case '2': // newlines. (?)
				putchar('\n');
				break;
			case '3': // backslashes.
				putchar('\\');
				break;
			case '%': // the '%' itself // not really needed -- the default case catches it.
				putchar('%');
				break;
			case 'x': { // characters that are (!printable()), including 0x00 (I don't have to worry about this).
				char t = ((s[0]-'A') << 4) + (s[1]-'A');
				putchar(t);
				s++;
					  };
				break;
			case '0': //self-referent particle: replicate dna string.
				putstring(dna); // assumes no "difficult symbols" in the dna string.
				break;
			default:
				putchar(*s);
				break;
			};
		};
	}while(*++s);
	return(0);
}




// when reconstructing this dna string,
// the zeros need to be expanded to quotes and newlines ...
const char * dna_with_zeros[] = {
"-------------",
"----------------",
"-----------",
"\x01",
};



/*
simpler version: assume dna string contains only characters allowed in
valid .c source files, plus:
\x02 in dna string represents self-referential particle in this source.
plus:
the standard \x00 at end-of-string to represent end-of-file.
Other than that, dna string at runtime contains literal text of file.
(in particular, the \n in this source string represents the newline
in the .c source).

Most characters ('a', 'b', tab, space, etc.)
are represented literally in the dna string.
*/

const char * d =
"----------------\n"
"\"\x02\";\n" // self-referential particle -- I think it's simpler to embed the beginning and ending quotes here.
"-------------\n"
"----------------\n";

// expand dna string so that when it is compiled,
// the compiler compresses it back down
// to what is currently in memory as this runs.
void replicate_dna(void){
	const char *c = d;
	// assume \n comes often enough in source text
	// so the encoded dna strings don't get too long.

	// assume first beginning quote has just been emitted
	// prior to calling replicate_dna().
	while( *c ){ // for each letter
#if 0
		// teleomere includes:
		// every 7 letters:
		// a special character,
		// followed by a string of up to 5 letters
		// that indicates what to print instead of the special character,
		// followed by a \x00 character.

		// is it a special character ?
		int i = 0;
		int special = -1;
		while( i < 21 ){ // length of teleomere
			if( d[i] == *c ){ // special character
				special = i;
			};
			i += 7;
		};
		if( -1 == special ){
			putchar(*c);
		}else{
			special++;
			while( d[special] ){
				putchar(d[special++]);
			};
		};


#endif

		if('\x02' == *c){ // is there a nicer way to do this ?
			putchar('\\');
			putchar('x');
			putchar('0');
			putchar('2');
		}else if('\n' == *c){ // should I embed this entire sequence in the teleomere ?
			putchar('\\');
			putchar('n');
			putchar('"'); // ending quote
			putchar('\n');
			putchar('"'); // beginning quote
		}else{
			if( '"' == *c or // alternate: d[0], d[1], d[2].
				'\\' == *c
				){
				putchar('\\');
			};
			putchar(*c);
		};
		c++;
	};
	// assume last ending quote will be emitted after calling replicate_dna().
}


int main2(void){
	const char *c = d+3; // skip teleomere
	while( *c ){ // for each letter
		if( '\x02' == *c ){
			replicate_dna();
		}else{
			putchar(*c);
		};
		c++;
	}
	return 0; // required by ANSI
}


//-----------------------


/*
second-order quine:
When run, the DNA string is decoded to a "intermediate RNA string".
Then when the *output* is compiled and run,
the "intermediate RNA string" is decoded to generate the source code.

In particular:
"special" characters are double-escaped:
quotes in the source code become have 3 slashes (\\\") in the DNA.

\x00 in dna string represents self-referential particle in this source.
double \x00 \x00 at end-of-string to represent end-of-file.
Other than that, RNA string at runtime contains literal text of file,
and DNA string at runtime contains literal text of RNA.
(in particular, the \\\n in this source string represents the newline
in the .c source).

Most characters ('a', 'b', tab, space, etc.)
are represented literally in the dna string.

When replicating the DNA string, the only special char is the backslash (\) --
backslashes at runtime need to be doubled.

So we have:
source text:
	typical_line_of("source\\text\x27");
RNA:
"	typical_line_of(\"source\\\\text\\x27\");\n"
DNA:
"	typical_line_of(\\\"source\\\\\\\\text\\\\x27\\\");\\n"


*/

const char * d =
"----------------\\n"
"\\\"\\x00\\\";\\n" // self-referential particle -- I think it's simpler to embed the beginning and ending quotes here.
"-------------\\n"
"----------------\\n\\x00";

// expand dna string so that when it is compiled,
// the compiler compresses it back down
// to what is currently in memory as this runs.
void replicate_dna(void){
	const char *c = d;
	// assume \n comes often enough in source text
	// so the encoded dna strings don't get too long.

	// assume first beginning quote has just been emitted
	// prior to calling replicate_dna().
	do{// for each letter

		if( '\\' == *c ){
			putchar('\\');
		};
		putchar(*c);

		if('\\' == c[-1] and 'n' == c[0] ){
			putchar('"'); // ending quote
			putchar('\n');
			putchar('"'); // beginning quote
		}
		c++;
	}while( *c );
	// assume last ending quote will be emitted after calling replicate_dna().
}


int main2(void){
	const char *c = rna+3; // skip teleomere/intron
	c = putstring( c );
	replicate_dna();
	c++; // skip \x00 character
	c = putstring( c );
	(void)putstring( dna ); //copy_dna_to_rna
	c++; // skip \x00 character
	c = putstring( c );
	return 0; // required by ANSI
}



//-----------------------


// use '\x00' twice in dna string:
// the first one is the self-referential particle;
// the 2nd is the end-of-string marker.

const char * d4 =
"----------------\n"
"\"\x00\";\n" // self-referential particle -- I think it's simpler to embed the beginning and ending quotes here.
"-------------\n"
"----------------\n\x00"; // double-zeros mark end-of-file

// expand dna string so that when it is compiled,
// the compiler compresses it back down
// to what is currently in memory as this runs.
void replicate_dna3(void){
	// assume \n comes often enough in source text
	// so the encoded dna strings don't get too long.

	// assume first beginning quote has just been emitted
	// prior to calling replicate_dna().
	const char * c = d;
	while( *c or *(c+1) ){ // for each letter, until we hit the double-zero at end-of-string
		if('\x00' == *c){ // is there a nicer way to do this ?
			putchar('\\');
			putchar('x');
			putchar('0');
			putchar('0');
		}else if('\n' == *c){ // should I embed this entire sequence in the teleomere ?
			putchar('\\');
			putchar('n');
			putchar('"'); // ending quote
			putchar(*c); // i.e., putchar('\n');
			putchar('"'); // beginning quote
		}else{
			if( '"' == *c or // alternate: d[0], d[1], d[2].
				'\\' == *c
				){
				putchar('\\');
			};
			putchar(*c);
		};
		c++;
	};

	// assume last ending quote will be emitted after calling replicate_dna().
}

// expand dna string so that when it is compiled,
// the compiler compresses it back down
// to what is currently in memory as this runs.
void replicate_dna6(void){
	// assume first beginning quote has just been emitted
	// prior to calling replicate_dna().

	// assume \n comes often enough in source text
	// so the encoded dna strings don't get too long.
	const char * c = d;
	do{ // for each letter, until we hit the double-zero at end-of-string
		if('\x00' == *c){ // is there a nicer way to do this ?
			putstring("\\x00");
		}else if('\n' == *c){
			putstring("\\n\"\n\""); // special end-of-line sequence:
			// backslash, n, ending quote, literal newline, beginning quote.
		}else{
			if( '"' == *c or // alternate: d[0], d[1], d[2].
				'\\' == *c
				){
				putchar('\\');
			};
			putchar(*c);
		};
		c++;
	}while( *c or *(c+1) ); // for each letter, until we hit the double-zero at end-of-string

	// assume last ending quote will be emitted after calling replicate_dna().
}
// special case: does not catch double-zeros as the first 2 bytes in the string.
// That means that if the string starts with double-zeros,
// it will be emitted entirely in an escaped string ... dunno if this is useful.


int main3(void){
	const char *c = d+3; // skip teleomere
	while( *c ){ // for each letter
		putchar(*c++);
	}
	replicate_dna6();
	c++; // skip \x00 character
	while( *c ){ // for each letter
		putchar(*c++);
	}
	return 0; // required by ANSI
}

int main3a(void){
	const char *c = d+3; // skip teleomere/intron
	c = putstring( c );
	replicate_dna6();
	c++; // skip \x00 character
	c = putstring( c );
	return 0; // required by ANSI
}
	// if there is any more text after the second \x00 character,
	// it is not emitted literally,
	// but it *is* carried along in the new DNA string as an intron / junk DNA.











// embed the entire file literally into the executable, except for the self-referential particle.
// use 2 dna strings to encode the file;
// the self-referential particle is implied between them.
const char * d5[3] = {
// first part of literal file
"----------------\n"
"----------------\n"
"--------"\"
, // self-referential particle
// last part of literal file
"\"-------------\n"
"----------------\n"
,
};

// expand dna string so that when it is compiled,
// the compiler compresses it back down
// to what is currently in memory as this runs.
void replicate_dna5(const char * c){
	// assume first beginning quote has *not* been emitted
	// prior to calling replicate_dna().
	putchar('"');

	// assume \n comes often enough in source text
	// so the encoded dna strings don't get too long.
	while( *c ){ // for each letter, until we hit the end-of-string
		if('\n' == *c){
			// putstring( d5[2] ); // special end-of-line sequence embedded in the teleomere
			putstring("\\n\"\n\""); // special end-of-line sequence.
		}else{
			if( '"' == *c or // alternate: d[0], d[1], d[2].
				'\\' == *c
				){
				putchar('\\');
			};
			putchar(*c);
		};
		c++;
	};

	// emit last ending quote and the end-of-string comma:
	putchar('"');
	putchar(',');
	putchar('\n');
}



int main3a(void){
	putstring( d5[0] );
	replicate_dna(d5[0]);
	replicate_dna(d5[1]);
	putstring( d5[1] );
	return 0; // required by ANSI
}
/*
It's not possible to embed any arbitrary text using this scheme -- in particular,
one cannot encode a standard non-self-referential file.
On the other hand, it's impossible to self-reference twice,
which seems to be useless ...
*/

