#include <stdio.h>

#define	MAX_STRINGS	2500
#define	MAX_FILES	50

unsigned
	scount[MAX_STRINGS],	// Count of string use
	line,					// Current line number
	ftop,					// Top of file list
	sptr,					// Pointer into current string
	stop,					// Top of string list
	sptop;					// Top of string pool
FILE
	*fp,
	*outfile;
int uc = -1;

unsigned char
	pass,					// 1st or second pass
	*strings[MAX_STRINGS],	// Pointers to strings
	*file,					// Current file
	*files[MAX_FILES],		// Pointers to filenames
	string[2000],			// Current string
	nlflag,					// Newline flag
	xext = 'h',				// File extension
	xfile = -1,				// Process files
	xsort,					// Sort alphabetically
	spool[45000];			// String storage pool
unsigned
	xall;					// List ALL strings


// Report error and abort
register error(unsigned args)
{
	char buffer[81];

	_format_(nargs() * 2 + &args, buffer);
	printf("%s[%u]: %s\n", file, line+1, buffer);
	exit(-1);
}

// Get character from file (supporting unget)
gc()
{
	int t;
	if(uc != -1) {
		t = uc;
		uc = -1;
		return t; }
	if((t = getc(fp)) == '\n')
		++line;
	return t;
}

// Test next character in file
tn(char c)
{
	int t;
	if((t = gc()) == c)
		return 1;
	uc = t;
	return 0;
}

// Write character to output file (second pass only)
xwrite(char c)
{
	if(pass)
		putc(c, outfile);
}

// Process file - collect strings
process_file(char *filename)
{
	char c, cmt, quot, *p;

	if(pass) {
		p = file = filename;
		while(*p) switch(*p++) {
			case ':' :
			case '\\' :
				file = p; }
		sprintf(string, "%s\\%s", files[0], file);
		printf("Processing '%s' to '%s'\n", filename, string);
		outfile = fopen(string, "wvq");
		fprintf(outfile, "#include \"xstrings.%c\"\n", xext);
		xext = 'h'; }
	else
		printf("Scanning '%s'\n", filename);
	fp = fopen(file = filename, "rvq");
	line = cmt = quot = 0;
	while((c = gc()) != -1) {
		if((c != ' ') && nlflag) {
			if(c == '#') {
				sptr = 0;
				cmt = 1; }
			nlflag = 0; }
		if(c == '\n')
			nlflag = -1;
		if(cmt == 1) {	// In line comment
			xwrite(c);
			if(c == '\n')
				cmt = 0;
			continue; }
		if(cmt == 2) {	// In block comment
			xwrite(c);
			if((c == '*') && tn('/')) {
				cmt = 0;
				xwrite('/'); }
			continue; }
		if(quot == 1) {	// In character
			if(sptr > (sizeof(string)-3))
				error("String too long");
			if(c == '\\') {
				string[sptr++] = c;
				string[sptr++] = gc();
				continue; }
			if(c == '\'') {
				string[sptr++] = 0;
				process_string(quot);
				quot = 0;
				continue; }
			string[sptr++] = c;
			continue; }
		if(quot == 2) {	// In string
			if(sptr > (sizeof(string)-3))
				error("String too long");
			if(c == '\\') {
				string[sptr++] = c;
				string[sptr++] = gc();
				continue; }
			if(c == '"') {
				string[sptr++] = 0;
				process_string(quot);
				quot = 0;
				continue; }
			string[sptr++] = c;
			continue; }
		if(c == '"') {	// Beginning quote
			sptr = 0;
			quot = 2;
			continue; }
		if(c == '\'') {	// Beginning charcter
			sptr = 0;
			quot = 1;
			continue; }
		if(c == '/') {
			xwrite(c);
			if(tn('*')) {
				xwrite('*');
				cmt = 2;
				continue; }
			if(tn('/')) {
				xwrite('/');
				cmt = 1;
				continue; }
			continue; }
		xwrite(c); 	}
	fclose(fp);
	if(pass)
		fclose(outfile);
}

// Process string
process_string(int quot)
{
	unsigned i;
	char *p;

	if(!pass) {	// First pass - collect strings
		if(quot != 2) 
			return;

		// Search for existing string
		for(i=0; i < stop; ++i) {
			if(!strcmp(strings[i], string)) {
				if(scount[i] != -1)
					++scount[i];
				return; } }
		// New string
		if(stop >= MAX_STRINGS)
			error("Too many strings");
		strings[stop++] = spool+sptop;	// Set pointer to string
		p = string;
		do {
			if(sptop >= sizeof(spool))
				error("String space exhausted");
			spool[sptop++] = *p; }
		while(*p++);
		return; }

	// Second pass - output strings
	if(quot != 2) {
		putc('\'', outfile);
		for(p=string; *p; ++p)
			putc(*p, outfile);
		putc('\'', outfile);
		return; }

	for(i=0; i < stop; ++i) {
		if(!strcmp(strings[i], string))
			goto found; }
	error("String not found on second pass");
found:
	if(scount[i] | xall)
		fprintf(outfile, "Str%u/*", i+1);
	putc('"', outfile);
	for(p=string; *p; ++p)
		putc(*p, outfile);
	putc('"', outfile);
	if(scount[i] | xall)
		fputs("*/", outfile);
}

// Sort the string pool
sort_pool()
{
	unsigned i, j, k;
	char *p;
	// Remove any inhibited strings by making them appear as 1 only
	for(i=0; i < stop; ++i) {
		if(scount[i] == -1)
			scount[i] = 0; }

	if(xsort) {	// Sort by alphabetic ordering
		for(i=0; i < stop; ++i) {
			for(j=i+1; j < stop; ++j) {
				if(strcmp(strings[j], strings[i]) < 0) {
					k = scount[i];
					scount[i] = scount[j];
					scount[j] = k;
					p = strings[i];
					strings[i] = strings[j];
					strings[j] = p; } } }
		return; }
			
	// Sort into frequency ordering
	for(i=0; i < stop; ++i) {
		for(j=i+1; j < stop; ++j) {
			if(scount[i] < scount[j]) {
				k = scount[i];
				scount[i] = scount[j];
				scount[j] = k;
				p = strings[i];
				strings[i] = strings[j];
				strings[j] = p; } } }
}

// Write the string pool files
dump_pool()
{
	unsigned i;
	char *p;

	if(!xfile) {	// Write the string summary
		sprintf(string, "%s\\STRINGS.TXT", files[0]);
		printf("Writing '%s'\n", string);
		fp = fopen(string, "wvq");
		for(i=0; i < stop; ++i) {
			if(scount[i] | xall) {
				putc('"', fp);
				fputs(strings[i], fp);
				fputs("\"\n", fp); } }
		fclose(fp);
		return; }

	// Write the 'C' definitions files
	sprintf(string, "%s\\XSTRINGS.C", files[0]);
	printf("Writing '%s'\n", string);
	fp = fopen(string, "wvq");
	for(i=0; i < stop; ++i) {
		if(scount[i] | xall) {
			fprintf(fp, "char Str%u[] = {", i+1);
			if(strlen(p = strings[i]) > 50)
				fputs("\n ", fp);
			putc('"', fp);
			fputs(p, fp);
			fputs("\"};\n", fp); } }
	fclose(fp);

	// Write the 'H' reference file
	sprintf(string, "%s\\XSTRINGS.H", files[0]);
	printf("Writing '%s'\n", string);
	fp = fopen(string, "wvq");
	fputs("extern char\n", fp);
	for(i=0; i < stop; ++i) {
		if(scount[i] | xall) {
			fprintf(fp, "\tStr%u[]", i+1);
			putc((((i+1) < stop) && scount[i+1]) ? ',' : ';', fp);
			if(i < 9)
				putc('\t', fp);
			fprintf(fp, "\t/* %u \"", scount[i]+1);
			fputs(strings[i], fp);
			fputs("\" */\n", fp); } }
	fclose(fp);
}

// Copy string with '\' protection
copy_string(char *p)
{
	char c;
	while(c = *p++) {
		if(c == '\\') {
			if(!*p)
				error("Malformed string");
			spool[sptop++] = c;
			spool[sptop++] = *p++;
			continue; }
		if(c == '"') {
			while(c = *p++) {
				if(!isspace(c))
					error("Malformed string"); }
			spool[sptop++] = 0;
			return; }
		spool[sptop++] = c; }
	error("Malformed string");
}

main(int argc, char *argv[])
{
	int i;
	char *p;

	for(i=1; i < argc; ++i) {
		file = "Command argument";
		p = argv[line = i];
		switch((toupper(*p++)<<8) | toupper(*p++)) {
		case '-A' :		// Convert ALL Strings
		case '/A' :	xall = -1;	continue;
		case '-C' :		// Set output extension
		case '/C' :	xext = 'c';	continue;
		case '-L' :		// Listing only	
		case '/L' :	xfile = 0;	continue;
		case '-S' :		// Sort alphabetically
		case '/S' : xsort = -1;	continue; }
		if(*(p = argv[i]) == '@') {
			fp = fopen(file = p+1, "rvq");
			line = 0;
			while(fgets(p = string, sizeof(string)-1, fp)) {
				++line;
				while(isspace(*p))
					++p;
				if((*p == ';') || !*p)
					continue;
				if(ftop >= MAX_FILES)
					error("Too many files");
				if(*p == '"') {
					scount[stop] = -1;
					strings[stop++] = spool+sptop;
					copy_string(p+1);
					continue; }
				files[ftop++] = spool + sptop;
				while(*p && !isspace(*p))
					spool[sptop++] = *p++;
				spool[sptop++] = 0; }
			fclose(fp);
			continue; }
		if(ftop >= MAX_FILES)
			error("Too many files");
		strcpy(files[ftop++] = p); }

	if(ftop < 2) {
		p = 0;
		while(i = get_text(p++))
			putc(i, stdout);
		exit(-1); }

	for(i=1; i < ftop; ++i)
		process_file(files[i]);
	sort_pool();
	pass = -1;
	if(xfile) {
		for(i=1; i < ftop; ++i)
			process_file(files[i]); }
	dump_pool();
	for(i=line=0; i < stop; ++i) {
		if(scount[i] | xall)
			line += (strlen(strings[i])+1) * scount[i]; }
	printf("Estimated storage reduction: %u bytes\n", line);
}

/*
 * Get a byte from the code block text
 */
get_text(offset) asm
{
	MOV	BX,4[BP]					; Get offset
	MOV	AL,CS:byte ptr HELP[BX]		; Get byte
	XOR	AH,AH						; Zero high
}

/*
 * Help text - stored in code segment to avoid using precious data space
 */
asm {
HELP:
DB'Duplicate String Eliminator - v1.0',10,10
DB'DSE reads one or more .C source files, and replaces all duplicated literal',10
DB'strings with string array references. It also generates the files XSTRINGS.C',10
DB'and XSTRINGS.H which contain definitions for the string arrays.',10,10
DB'This effectively performs duplicate string reduction across all modules.',10,10
DB'Use: DSE destination source-file(s) ... [@parm-file] [options]',10,10
DB'destination	= destination directory to receive converted files',10
DB'source-file(s)	= names of files to process/convert',10
DB'parm-file	= ASCII text, 1 entry/line, containing:',10
DB"  ; any line beginning with ';' is ignored (comment)",10
DB'  "entries in double-quotes are strings to NOT convert"',10
DB'  other entries are read as: destination, source-files(s)',10,10
DB'opts:	/A	= convert ALL strings (even if occurs only once)',10
DB'	/C	= use .C extension for string include in first file',10
DB'	/L	= generate Listing only (do not copy/build strings headers)',10
DB'	/S	= Sort string table alphabetically',10,10
DB'?COPY.TXT 2002-2005 Dave Dunfield -  -- see COPY.TXT --.',10,0
DB	0,0
}
