/* MICRO 8086 ASSEMBLER:

	This is an 8086 assembler which is kinda novel in that It supports
a "motorola" style syntax (instead of "intel"). I plan to eventually write
a companion linker, and thereby remove the requirement for MASM in the PC
implementation of MICRO-C. However.... time is short and rather than keep
it on the shelf.... Here it is as a "demo" program.

	I have not yet decided on an object file format, so this incarnation of
the assembler simply outputs the binary code generated to a ".COM" file. If
you perform an "ORG $100" at the beginning of your program, this COM file will
be directly execeutable on the IBM PC (Entry point is the beginning.. $100).

	You can also implement your own output routines for whatever format you
desire. That section of code is clearly marked in the source.

-	Syntax of command: ASM86 <filename> [-C -F -Q -S -T]
		Reads:	<filename>.ASM		Assembler source input
		Writes:	<filename>.COM		Binary output file
				<filename>.LST		Listing file
		Switch:	+C		Case sensitive assembly *
				+F		Full listing, otherwise, only errors are shown
				+Q		Quiet... Inhibit progress messages
				+S		Include sorted symbol table in listing
				+T		Send listing to TTY (console), instead of .LST file
		* Note: When using Case sensitive assembly, the register names
				must be entered in UPPER case.

-	Any lines beginning with ';' or '*' (in column 1) are considered to
	be comments, and are ignored by the assembler.

-	Any symbol found in column one is determined to be a LABEL, no special
	characters (such as ':') are used. Only labels may begin in column one,
	and there must be at least one space or tab between the label and the
	instruction.

-	Operands to instructions may not contain any spaces except for
	those found in strings and character constants:
		MOV		AX,BX		This is OK
		MOV		AL,' '		This too is OK
		MOV		AX, BX		ERROR: "Additional operands required"

-	No special character is needed before comments following instructions,
	EXCEPT for the "RET" instruction, use ';' to delimit the comment if you
	do not specify a stack adjust value.

-	Assembler makes no distinction between symbols and numbers, the '#'
	character is used to denote IMMEDIATE values:
		MOV		AX,DATA		Loads contents of variable 'DATA'
		MOV		AX,#DATA	Loads address of variable 'DATA'
		MOV		AX,123		Loads contents of address '123'
		MOV		AX,#123		Loads value '123'

-	Symbols are used only to reference addresses, no information is recorded
	as to the size of the symbol. At least one of the operands to an
	instruction must contain an explicit size (8 or 16 bits). The assembler
	knows the the size of the registers, and thus any instructions involving
	registers will automatically use the correct size. For instructions which
	do not reference registers, you can force one of the arguments to be
	recognized as an 8 or 16 bit value by using the '<'	or '>' character as a
	prefix:
		MOV		AL,DATA		Moves 8 bits
		MOV		AX,DATA		Moves 16 bits
		MOV		DATA,#12	ERROR: "Size not known"
		MOV		<DATA,#12	Moves 8 bits
		MOV		DATA,#<12	Equivalent to above
		MOV		>DATA,#12	Moves 16 bits
		MOV		DATA,#>12	Equivalent to above
		MOV		<DATA,#>12	ERROR: "Incompatible sizes"

-	The '>' and '<' prefix's have special meaning in JMP and CALL
	instructions:
		JMP		label		NEAR 16 bit RELATIVE jump
		JMP		<label		NEAR 8 bit RELATIVE jump
		JMP		>LABEL		NEAR INDIRECT jump
		JMP		BX			NEAR Jump to address in BX
		JMP		[BX]		NEAR Jump indirect through BX
		JMP		SEG:LABEL	FAR DIRECT jump
		JMP		SEG:>LABEL	FAR INDIRECT jump *
		JMP		SEG:[BX]	FAR INDIRECT jump *
	* In the last two examples, the SEG value is ignored, and
	  the SEGMENT:OFFSET is taken from the operand address.

-	Expression value items supported:
		n		- Decimal value (0 - 65535)
		%b		- Binary value (%0 - %1111111111111111)
		@o		- Octal value (@0 - @177777)
		$h		- Hexidecimal value ($0 - $ffff)
		'cc'	- Quoted value (one or two chars)
		name	- Symbol name ('A'-'Z', '0'-'9', '?', '_')
		*		- Current program counter

-	Expression operators supported:
		Unary:	-	Negation
				=	Swap high and low bytes
				<	Force 8 bit size
				>	Force 16 bit size
		Binary:	+	Addition
				-	Subtraction
				*	Multiplication
				/	Division
				%	Modulus
				&	Logical AND
				|	Logical OR
				^	Exclusive OR
				<	Shift left
				>	Shift right

-	Offsets to indirect accesses are specified by placing a constant
	value immediatly before the opening '['. When both a BASE and an
	INDEX register are involved, use '+' to indicate them:
		MOV		AX,[BX]		Indirect through BX, no offset
		MOV		AX,[SI]		Indirect through SI, no offset
		MOV		AX,10[DI]	Indirect to DI with 10 byte offset
		MOV		AX,[BP+SI]	Indirect through BP+SI, no offset
		MOV		AX,5[BP+SI]	Indirect thought BP+SI, 5 byte offset
		MOV		AX,[BX+10]	This MASM syntax NOT supported!!!

-	Directives supported:
			ORG		<value>			Set program counter
	<label>	EQU		<value>			Set a symbol value
			SEG		<value>			Set output segment (0-3) *
			DB		<value>[,...]	Define byte(s)
			DW		<value>[,...]	Define word(s)
			DRW		<value>[,...]	Define reversed word(s)
			DS		<value>			Define storage (un-initialized)
			STR		<d>string<d>	Define string <d>=any delimiter char.
			STRZ	<d>string<d>	As above... Appends zero (NULL)
			STRH	<d>string<d>	As above... Sets high bit on last char.
			PUBLIC	<symbol>[,...]	Declare a symbol as public *
			EXTERN	<symbol>[,...]	Declare a symbol as external *
			SPACE					Insert blank line in listing
			PAGE					Eject page
			TITLE	<string>		Set listing title
			NOLIST					Disable listing output **
			LIST					Enable listing output **
	*	These directives are not very useful yet...
	**	LIST/NOLIST can be nested. Lines with errors are always listed.

?COPY.TXT 1990-2003 Dave Dunfield
 -- see COPY.TXT --.

Permission granted for personal (non-commercial) use only.

Compile command: cc asm86 -fop
*/
#include <stdio.h>

/* Fixed assembler parameters */
#define	NUMREG	20		/* Number of registers defined */
#define	NUMSEG	4		/* Maximum number of segments */
#define	PAGSIZE	60		/* Number of lines per page */
#define	SYMSIZE	15		/* Maximum length of a symbol name */
#define	SYMBOLS	2000	/* Maximum number of symbols */
#define	LINSIZE	100		/* Maximum size of input line */

/* Operand types */
#define	REGIS	1		/* Simple register */
#define	VALUE	2		/* Direct memory reference */
#define	INDEX	3		/* Indexed memory reference */
#define	IMMED	4		/* Immediate value */

/* Symbol flags */
#define	SDUP	0x80	/* Duplicate symbol */
#define	SPUB	0x40	/* Public symbol */
#define	SEXT	0x20	/* External reference */

/* Instruction opcode table */
#define	IDATSIZ	5		/* Number of bytes of instruction data */
	unsigned char inst_table[] = {
		/* Type 1 - all bytes up to 0x00 are written */
		'A','A','A'+128,		1, 0x37, 0x00, 0x00, 0x00,
		'A','A','D'+128,		1, 0xD5, 0x0A, 0x00, 0x00,
		'A','A','M'+128,		1, 0xD4, 0x0A, 0x00, 0x00,
		'A','A','S'+128,		1, 0x3F, 0x00, 0x00, 0x00,
		'C','B','W'+128,		1, 0x98, 0x00, 0x00, 0x00,
		'C','L','C'+128,		1, 0xF8, 0x00, 0x00, 0x00,
		'C','L','D'+128,		1, 0xFC, 0x00, 0x00, 0x00,
		'C','L','I'+128,		1, 0xFA, 0x00, 0x00, 0x00,
		'C','M','C'+128,		1, 0xF5, 0x00, 0x00, 0x00,
		'C','M','P','S','B'+128,1, 0xA6, 0x00, 0x00, 0x00,
		'C','M','P','S','W'+128,1, 0xA7, 0x00, 0x00, 0x00,
		'C','W','D'+128,		1, 0x99, 0x00, 0x00, 0x00,
		'D','A','A'+128,		1, 0x27, 0x00, 0x00, 0x00,
		'D','A','S'+128,		1, 0x2F, 0x00, 0x00, 0x00,
		'H','L','T'+128,		1, 0xF4, 0x00, 0x00, 0x00,
		'I','N','T','O'+128,	1, 0xCE, 0x00, 0x00, 0x00,
		'I','R','E','T'+128,	1, 0xCF, 0x00, 0x00, 0x00,
		'L','A','H','F'+128,	1, 0x9F, 0x00, 0x00, 0x00,
		'L','O','D','S','B'+128,1, 0xAC, 0x00, 0x00, 0x00,
		'L','O','D','S','W'+128,1, 0xAD, 0x00, 0x00, 0x00,
		'M','O','V','S','B'+128,1, 0xA4, 0x00, 0x00, 0x00,
		'M','O','V','S','W'+128,1, 0xA5, 0x00, 0x00, 0x00,
		'N','O','P'+128,		1, 0x90, 0x00, 0x00, 0x00,
		'P','O','P','F'+128,	1, 0x9D, 0x00, 0x00, 0x00,
		'P','U','S','H','F'+128,1, 0x9C, 0x00, 0x00, 0x00,
		'S','A','H','F'+128,	1, 0x9E, 0x00, 0x00, 0x00,
		'S','C','A','S','B'+128,1, 0xAE, 0x00, 0x00, 0x00,
		'S','C','A','S','W'+128,1, 0xAF, 0x00, 0x00, 0x00,
		'S','T','C'+128,		1, 0xF9, 0x00, 0x00, 0x00,
		'S','T','D'+128,		1, 0xFD, 0x00, 0x00, 0x00,
		'S','T','I'+128,		1, 0xFB, 0x00, 0x00, 0x00,
		'S','T','O','S','B'+128,1, 0xAA, 0x00, 0x00, 0x00,
		'S','T','O','S','W'+128,1, 0xAB, 0x00, 0x00, 0x00,
		'W','A','I','T'+128,	1, 0x9B, 0x00, 0x00, 0x00,
		'X','L','A','T'+128,	1, 0xD7, 0x00, 0x00, 0x00,
		'R','E','P'+128,		1, 0xF3, 0x00, 0x00, 0x00,
		'R','E','P','Z'+128,	1, 0xF3, 0x00, 0x00, 0x00,
		'R','E','P','E'+128,	1, 0xF3, 0x00, 0x00, 0x00,
		'R','E','P','N','Z'+128,1, 0xF2, 0x00, 0x00, 0x00,
		'R','E','P','N','E'+128,1, 0xF2, 0x00, 0x00, 0x00,
		'L','O','C','K'+128,	1, 0xF0, 0x00, 0x00, 0x00,
		/* Type 2 - MEM, REGIS, 16BIT */
		'D','E','C'+128,		2, 0xFE, 0x01, 0x48, 0x00,
		'D','I','V'+128,		2, 0xF6, 0x06, 0x00, 0x00,
		'I','D','I','V'+128,	2, 0xF6, 0x07, 0x00, 0x00,
		'I','M','U','L'+128,	2, 0xF6, 0x05, 0x00, 0x00,
		'I','N','C'+128,		2, 0xFE, 0x00, 0x40, 0x00,
		'M','U','L'+128,		2, 0xF6, 0x04, 0x00, 0x00,
		'N','E','G'+128,		2, 0xF6, 0x03, 0x00, 0x00,
		'N','O','T'+128,		2, 0xF6, 0x02, 0x00, 0x00,
		/* Type 2 - opcode */
		'L','D','S'+128,		3, 0xC5, 0x00, 0x00, 0x00,
		'L','E','A'+128,		3, 0x8D, 0x00, 0x00, 0x00,
		'L','E','S'+128,		3, 0xC4, 0x00, 0x00, 0x00,
		/* Type 4 - ACC/IMMED, REGIS,IMMED, INDEX,REGIS, XXX (Fn ='S') */
		'A','D','C'+128,		4, 0x14, 0x80, 0x10, 0xF2,
		'A','D','D'+128,		4, 0x04, 0x80, 0x00, 0xF0,
		'S','U','B'+128,		4, 0x2C, 0x80, 0x28, 0xF5,
		'S','B','B'+128,		4, 0x1C, 0x80, 0x18, 0xF3,
		'C','M','P'+128,		4, 0x3C, 0x80, 0x38, 0xF7,
		'A','N','D'+128,		4, 0x24, 0x80, 0x20, 0x04,
		'O','R'+128,			4, 0x0C, 0x80, 0x08, 0x01,
		'X','O','R'+128,		4, 0x34, 0x80, 0x30, 0x06,
		'T','E','S','T'+128,	4, 0xA8, 0xF6, 0x84, 0x00,
		/* Type 5 - MOV */
		'M','O','V'+128,		5, 0x00, 0x00, 0x00, 0x00,	
		/* Type 6 - single byte relative jumps */
		'J','A'+128,			6, 0x77, 0x00, 0x00, 0x00,
		'J','N','B','E'+128,	6, 0x77, 0x00, 0x00, 0x00,
		'J','A','E'+128,		6, 0x73, 0x00, 0x00, 0x00,
		'J','N','B'+128,		6, 0x73, 0x00, 0x00, 0x00,
		'J','C'+128,			6, 0x72, 0x00, 0x00, 0x00,
		'J','N','C'+128,		6, 0x73, 0x00, 0x00, 0x00,
		'J','B'+128,			6, 0x72, 0x00, 0x00, 0x00,
		'J','N','A','E'+128,	6, 0x72, 0x00, 0x00, 0x00,
		'J','B','E'+128,		6, 0x76, 0x00, 0x00, 0x00,
		'J','N','A'+128,		6, 0x76, 0x00, 0x00, 0x00,
		'J','C','X','Z'+128,	6, 0xE3, 0x00, 0x00, 0x00,
		'J','E'+128,			6, 0x74, 0x00, 0x00, 0x00,
		'J','Z'+128,			6, 0x74, 0x00, 0x00, 0x00,
		'J','G'+128,			6, 0x7F, 0x00, 0x00, 0x00,
		'J','N','L','E'+128,	6, 0x7F, 0x00, 0x00, 0x00,
		'J','G','E'+128,		6, 0x7D, 0x00, 0x00, 0x00,
		'J','N','L'+128,		6, 0x7D, 0x00, 0x00, 0x00,
		'J','L'+128,			6, 0x7C, 0x00, 0x00, 0x00,
		'J','N','G','E'+128,	6, 0x7C, 0x00, 0x00, 0x00,
		'J','L','E'+128,		6, 0x7E, 0x00, 0x00, 0x00,
		'J','N','G'+128,		6, 0x7e, 0x00, 0x00, 0x00,
		'J','N','E'+128,		6, 0x75, 0x00, 0x00, 0x00,
		'J','N','Z'+128,		6, 0x75, 0x00, 0x00, 0x00,
		'J','O'+128,			6, 0x70, 0x00, 0x00, 0x00,
		'J','N','O'+128,		6, 0x71, 0x00, 0x00, 0x00,
		'J','S'+128,			6, 0x78, 0x00, 0x00, 0x00,
		'J','N','S'+128,		6, 0x79, 0x00, 0x00, 0x00,
		'J','N','P'+128,		6, 0x7B, 0x00, 0x00, 0x00,
		'J','P','O'+128,		6, 0x7B, 0x00, 0x00, 0x00,
		'J','P'+128,			6, 0x7A, 0x00, 0x00, 0x00,
		'J','P','E'+128,		6, 0x7A, 0x00, 0x00, 0x00,
		'L','O','O','P'+128,	6, 0xE2, 0x00, 0x00, 0x00,
		'L','O','O','P','E'+128,6, 0xE1, 0x00, 0x00, 0x00,
		'L','O','O','P','Z'+128,6, 0xE1, 0x00, 0x00, 0x00,
		'L','O','O','P','N','E'+128,6,0xE0,0x00,0x00,0x00,
		'L','O','O','P','N','Z'+128,6,0xE0,0x00,0x00,0x00,
		/* Type 7 - JMP/CALL */
		'J','M','P'+128,		7, 0xE9, 0xEA, 0x04, 0x05,
		'C','A','L','L'+128,	7, 0xE8, 0x9A, 0x02, 0x03,
		/* Type 8 - RET/RETF */
		'R','E','T'+128,		8, 0xC3, 0xC2, 0x00, 0x00,
		'R','E','T','F'+128,	8, 0xCB, 0xCA, 0x00, 0x00,
		/* Type 9 - SEGMENT, REGIS, MEMOP */
		'P','O','P'+128,		9, 0x07, 0x58, 0x8F, 0x00,
		'P','U','S','H'+128,	9, 0x06, 0x50, 0xFF, 0x06,
		/* Type 10 - STRING instructions */
		'C','M','P','S'+128,	10,0xA6, 0x00, 0x00, 0x00,
		'L','O','D','S'+128,	10,0xAC, 0x00, 0x00, 0x00,
		'M','O','V','S'+128,	10,0xA4, 0x00, 0x00, 0x00,
		'S','C','A','S'+128,	10,0xAE, 0x00, 0x00, 0x00,
		'S','T','O','S'+128,	10,0xAA, 0x00, 0x00, 0x00,
		/* Type 11 - Shifts & Rotates */
		'R','C','L'+128,		11,0xD0, 0x02, 0x00, 0x00,
		'R','C','R'+128,		11,0xD0, 0x03, 0x00, 0x00,
		'R','O','L'+128,		11,0xD0, 0x00, 0x00, 0x00,
		'R','O','R'+128,		11,0xD0, 0x01, 0x00, 0x00,
		'S','A','L'+128,		11,0xD0, 0x04, 0x00, 0x00,
		'S','H','L'+128,		11,0xD0, 0x04, 0x00, 0x00,
		'S','A','R'+128,		11,0xD0, 0x07, 0x00, 0x00,
		'S','H','R'+128,		11,0xD0, 0x05, 0x00, 0x00,
		/* Type 12 - INT */
		'I','N','T'+128,		12, 0x00, 0x00, 0x00, 0x00,
		/* Type 13 - ESC */
		'E','S','C'+128,		13,0x00, 0x00, 0x00, 0x00,
		/* Type 14 - IN/OUT */
		'I','N'+128,			14,0xE4, 0xEC, 0x00, 0x00,
		'O','U','T'+128,		14,0xE6, 0xEE, 0xFF, 0x00,
		/* Type 15 - XCHG */
		'X','C','H','G'+128,	15,0x86, 0x90, 0x00, 0x00,
		/* Directives */
		'E','Q','U'+128,		100, 0, 0, 0, 0,
		'O','R','G'+128,		101, 0, 0, 0, 0,
		'S','E','G'+128,		102, 0, 0, 0, 0,
		'D','B'+128,			103, 0, 0, 0, 0,
		'D','W'+128,			104, 0, 0, 0, 0,
		'D','S'+128,			105, 0, 0, 0, 0,
		'S','T','R'+128,		106, 0, 0, 0, 0,
		'S','T','R','Z'+128,	106, 1, 0, 0, 0,
		'S','T','R','H'+128,	106, 2, 0, 0, 0,
		'P','U','B','L','I','C'+128,107,0,0,0,0,
		'E','X','T','E','R','N'+128,108,0,0,0,0,
		'P','A','G','E'+128,	120, 0, 0, 0, 0,
		'T','I','T','L','E'+128,121, 0, 0, 0, 0,
		'S','P','A','C','E'+128,122, 0, 0, 0, 0,
		'L','I','S','T'+128,	123, 0, 0, 0, 0,
		'N','O','L','I','S','T'+128,124,0,0,0,0,
		0 }, *inst_ptr;

/* 8086 register table */
	char *registers[] = {
		"AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH",
		"AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI",
		"ES", "CS", "SS", "DS" };

/* Error messages */
	char *error_text[] = {
		"Phase error",
		"Syntax error",
		"Invalid instruction",
		"Additional operands reqired",
		"Constant value required",
		"16 bit operand required",
		"Illegal addressing mode",
		"Illegal segment override",
		"Illegal use of segment register",
		"Invalid register operation",
		"Invalid immediate expression",
		"Invalid index register",
		"Cannot preset register size",
		"Unterminated character constant",
		"Incompatible operand sizes",
		"Size not known",
		"Out of range",
		"Undefined symbol",
		"Duplicate sumbol",
		"Symbol table overflow",
		"Shift count must be CL or 1",
		"Port must be DX or value",
		"Accumulator required" };
				
/* Input line buffer & holding areas */
	char inline[LINSIZE+1], *input_ptr;
	unsigned char label[SYMSIZE+1], instruction[80], operand[80];

/* Assembler symbol table & related routines */
	char symbols[SYMBOLS][SYMSIZE+1];
	int	svalue[SYMBOLS], scount = 0, sindex;
	char sflags[SYMBOLS];

/* Assembler Segment holding areas */
	unsigned active_seg, seg_pc[NUMSEG], seg_size[NUMSEG] = { 0 };

/* Global locations for operand results */
	int type, length, value, xvalue;
	int type1, value1, xvalue1;

/* Command line options flags */
	char casf = 0, fulf = 0, symf = 0, quif = 0, ttyf = 0;

/* Global counters and misc variables */
	char dir, title[50], list = 0;
	unsigned pc, line, ocount, daddr;
	unsigned pass = 0, error_flag = 0, error_count = 0;
	unsigned ecount = 999, pcount = 1;

	FILE *asm_fp, *hex_fp, *lst_fp;

/*
 * Open a filename with the appriopriate extension &
 * report an error if not possible
 */
FILE *open_file(filename, extension, options)
	char *filename, *extension, *options;
{
	char buffer[50], *ptr;

	for(ptr = buffer; *filename; ++ptr)
		*ptr = *filename++;
	*ptr++ = '.';
	do
		*ptr++ = *extension;
	while(*extension++);

	return fopen(buffer, options);
}

/*
 * Main program
 */
main(argc, argv)
	int argc;
	char *argv[];
{
	int i;
	char *ptr;

	if(argc < 2)
		abort("\nUse: asm86 <filename> [+c +f +q +s +t]\n\n?COPY.TXT 1990-2003 Dave Dunfield\n -- see COPY.TXT --.\n");

	/* Parse for command line options */
	for(i=2; i <argc; ++i) {
		switch((argv[i][0]<<8)|toupper(argv[i][1])) {
			case ('+'<<8)|'C' : casf = -1;	break;	/* Case sensitive */
			case ('+'<<8)|'F' : fulf = -1;	break;	/* Full listing */
			case ('+'<<8)|'S' : symf = -1;	break;	/* Symbol table */
			case ('+'<<8)|'Q' : quif = -1;	break;	/* Quiet mode */
			case ('+'<<8)|'T' : ttyf = -1;	break;	/* TTY output */
			default:
				fprintf(stderr,"Unknown option: '%s'\n", argv[i]);
				exit(-1); } }

	strcpy(title, argv[1]);	/* Initial title is filename */

	/* Open input & output files */
	asm_fp = open_file(title, "ASM", "rvq");
	lst_fp = (ttyf) ? stdout : open_file(title, "LST", "wvq");
	hex_fp = open_file(title, "COM", "wbvq");

	do {
		if(!quif)
			fprintf(stderr,"Pass %u... ", pass+1);
		for(i = pc = line = active_seg = 0; i < NUMSEG; ++i)
			seg_pc[i] = 0;
		while(fgets(ptr = inline, LINSIZE, asm_fp)) {
			error_flag = ocount = 0;
			daddr = pc;
			++line;
			if((*inline != '*')  && (*inline != ';')) {	/* Not a comment */
				i = 0;						/* Parse label */
				while(!isterm(*ptr))
					label[i++] = chupper(*ptr++);
				label[i]=0;
				while(isspace(*ptr))
					++ptr;
				i = 0;						/* Parse instruction */
				while(!isterm(*ptr))
					instruction[i++] = toupper(*ptr++);
				instruction[i]=0;
				while(isspace(*ptr))
					++ptr;
				i = 0;						/* Parse operands */
				while(*ptr && (i < 79))
					operand[i++] = *ptr++;
				operand[i] = 0;
				if(*label && !pass)			/* Label this line */
					define(label, pc, active_seg);
				if(!lookup_inst())
					asmerr(2);
				input_ptr = operand;
				switch(*inst_ptr) {
case 1 :	/* No operand */
	i = 1;
	do
		instruction[ocount++] = inst_ptr[i];
	while(inst_ptr[++i]);
	break;
case 2 :	/* DEC ... */
	switch(oper3()) {
		case REGIS :
			test_register(value, 0x02);
			if((value > 7) && inst_ptr[3]) {
				instruction[ocount++] = inst_ptr[3] | (value & 7);
				break; }
		case INDEX :
		case VALUE :
			instruction[ocount++] = inst_ptr[1] | (length > 1);
			write_ea(inst_ptr[2]);
			break;
		default:
			asmerr(6); }
	break;
case 3 :	/* LDS ... */
	switch(oper4()) {
		case (REGIS<<8)+INDEX :
		case (REGIS<<8)+VALUE :
			test_register(value, 0x03);
			direction();
			instruction[ocount++] = inst_ptr[1];
			write_ea(value1);
			break;
		default:
			asmerr(6); }
	break;
case 4 :	/* ADC ... */
	dir = 0;
	switch(oper4()) {
		case (REGIS<<8)+IMMED :		/* AX,#nn */
			test_register(value, 0x02);
			if(!(value & 0x07)) {
				instruction[ocount++] = inst_ptr[1] | (length > 1);
				write_value1();
				break; }
		case (INDEX<<8)+IMMED :		/* [BX],#nn */
		case (VALUE<<8)+IMMED :		/* SYM,#nn */
			i = inst_ptr[2];
			if(length > 1) {
				i |= 0x01;
				if((inst_ptr[4] & 0xF0) && (value1 <= 127) && (value1 >= -128)) {
					i |= 0x02;
					--length; } }
			instruction[ocount++] = i;
			write_ea(inst_ptr[4]);
			write_value1();
			break;
		case (REGIS<<8)+REGIS :		/* BX,AX */
			test_register(value, 0x02);
			goto adc1;
		case (REGIS<<8)+INDEX :		/* AX,[BX] */
		case (REGIS<<8)+VALUE :		/* AX,nn */
			direction();
		case (INDEX<<8)+REGIS :		/* [BX],AX */
		case (VALUE<<8)+REGIS :		/* 123,AX */
			if(!inst_ptr[4])		/* Special case for "test" */
				dir = 0;
		adc1:
			test_register(value1, 0x02);
			instruction[ocount++] = (inst_ptr[3] | (length > 1)) ^ dir;
			write_ea(value1);
			break;
		default:
			asmerr(6); }
	break;
case 5 :	/* MOV .. */
	dir = 0;
	switch(oper4()) {
		case (REGIS<<8)+IMMED :
			instruction[ocount++] = 0xB0 + (value & 0x07) + ((length > 1) * 0x08);
			write_value1();
			break;
		case (VALUE<<8)+IMMED :
		case (INDEX<<8)+IMMED :
			instruction[ocount++] = 0xC6 + (length > 1);
			write_ea(0);
			write_value1();
			break;
		case (REGIS<<8)+INDEX :		/* AX,[BX] */
		case (REGIS<<8)+VALUE :		/* AX,nn */
			direction();
		case (INDEX<<8)+REGIS :		/* [BX],AX */
		case (VALUE<<8)+REGIS :		/* 123,AX */
			if(value1 > 15) {		/* Segment register */
				instruction[ocount++] = 0x8C | dir;
				write_ea(value1); }
			else if((type == VALUE) && !(value1 & 0x07)) {
				instruction[ocount++] = (0xA0 | length > 1) ^ dir ^ 0x02;
				instruction[ocount++] = value;
				instruction[ocount++] = value >> 8; }
			else {
				instruction[ocount++] = (0x88 | (length > 1)) ^ dir;
				write_ea(value1); }
			break;
		case (REGIS<<8)+REGIS :		/* BX,AX */
			if(value > 15) {
				test_register(value1, 0x03);
				direction();
				i = 0x8E; }
			else if(value1 > 15) {
				test_register(value, 0x03);
				i = 0x8C; }
			else
				i = 0x88 | (length > 1);
			instruction[ocount++] = i;
			write_ea(value1);
			break;
		default:
			asmerr(6); }
	break;
case 6 :	/* short jumps */
	if(oper2() == VALUE) {
		value -= pc + 2;
		if((value > 127) || (value < -128))
			asmerr(16);
		instruction[ocount++] = inst_ptr[1];
		instruction[ocount++] = value; }
	else
		asmerr(6);
	break;
case 7 :	/* JMP and CALL */
	i = 0;
	type = oper2();
	if(test_next(':')) {
		if(type != VALUE)
			asmerr(6);
		i = -1;
		value1 = value;
		type = oper2(); }
	switch(type) {
		case VALUE :
			if(length != 2) {
				if(i) {
					instruction[ocount++] = inst_ptr[2];
					instruction[ocount++] = value;
					instruction[ocount++] = value >> 8;
					length = 2;
					write_value1();
					break; }
				if(length == 1) {
					if(inst_ptr[1] != 0xE9)
						asmerr(6);
					value -= pc + 2;
					if((value > 127) || (value < -128))
						asmerr(16);
					instruction[ocount++] = 0xEB;
					instruction[ocount++] = value;
					break; }
				value -= pc + 3;
				instruction[ocount++] = inst_ptr[1];
				instruction[ocount++] = value;
				instruction[ocount++] = value >> 8;
				break; }
		case REGIS :
		case INDEX :
			instruction[ocount++] = 0xFF;
			write_ea(inst_ptr[i ? 4 : 3]);
			break;
		default:
			asmerr(6); }
	break;
case 8 :	/* RET and RETF */
	if((*input_ptr ==';') || !*input_ptr)
		instruction[ocount++] = inst_ptr[1];
	else {
		get_constant(-1);
		instruction[ocount++] = inst_ptr[2];
		instruction[ocount++] = value;
		instruction[ocount++] = value >> 8; }
	break;
case 9 :	/* PUSH and POP */
	switch(oper3()) {
		case REGIS :
			if(length == 1)
				asmerr(5);
			if(value > 15)
				instruction[ocount++] = inst_ptr[1] | ((value & 7) << 3);
			else
				instruction[ocount++] = inst_ptr[2] | (value & 7);
			break;
		case VALUE :
		case INDEX :
			if(length == 1)
				asmerr(5);
			instruction[ocount++] = inst_ptr[3];
			write_ea(inst_ptr[4]);
			break;
		default:
			asmerr(6); }
	break;
case 10 :	/* String instructions */
	if(oper3() == VALUE)
		instruction[ocount++] = inst_ptr[1] | (length > 1);
	else
		asmerr(6);
	break;
case 11 :	/* Shifts & Rotates */
	if((type = oper2()) == IMMED)
		asmerr(6);
	if(!length)
		asmerr(15);
	i = length > 1;
	if(!test_next(','))
		asmerr(3);
	direction();
	type = oper2();
	direction();
	if(value1 != 1)
		asmerr(20);
	if(type1 == REGIS)
		instruction[ocount++] = inst_ptr[1] | 2 | i;
	else if(type1 == VALUE)
		instruction[ocount++] = inst_ptr[1] | i;
	else {
		asmerr(20);
		break; }
	write_ea(inst_ptr[2]);
	break;
case 12 :	/* INT Statement */
	get_constant(255);
	if((value == 3) && (length < 2))
		instruction[ocount++] = 0xCC;
	else {
		instruction[ocount++] = 0xCD;
		instruction[ocount++] = value; }
	break;
case 13 :	/* ESC opcode */
	switch(oper4()) {
		case (VALUE<<8)+REGIS :
			test_register(0x02);
		case (VALUE<<8)+VALUE :
		case (VALUE<<8)+INDEX :
			direction();
			instruction[ocount++] = 0xD8 | ((value1 >> 3) & 7);
			write_ea(value1);
			break;
		default:
			asmerr(6); }
		break;
case 14 :	/* IN and OUT */
	type = oper2();
	i = length;
	if(!test_next(','))
		asmerr(3);
	direction();
	type = oper2();
	if(!inst_ptr[3]) {
		direction();
		length = i; }
	if((type != REGIS) || (value & 0x17))
		asmerr(22);
	switch(type1) {
		case VALUE :
			if((value1 < 0 ) || (value1 > 255))
				asmerr(16);
			instruction[ocount++] = inst_ptr[1] | (length > 1);
			instruction[ocount++] = value1;
			break;
		case REGIS :
			if(value1 != 10)
				asmerr(21);
			instruction[ocount++] = inst_ptr[2] | (length > 1);
			break;
		default:
			asmerr(21); }
	break;
case 15 :	/* XCHG */
	switch(oper4()) {
		case (REGIS<<8)+REGIS :
			test_register(value, 2);
			test_register(value1, 2);
			if((value > 7) && !(value & 7))
				instruction[ocount++] = 0x90 | (value1 & 7);
			else if((value1 > 7) && !(value1 & 7))
				instruction[ocount++] = 0x90 | (value & 7);
			else
				goto doxchg;
			break;
		case (REGIS<<8)+INDEX :
		case (REGIS<<8)+VALUE :
			direction();
		case (INDEX<<8)+REGIS :
		case (VALUE<<8)+REGIS :
			test_register(value1, 2);
		doxchg:
			instruction[ocount++] = inst_ptr[1] | (length > 1);
			write_ea(value1);
			break;
		default:
			asmerr(6); }
	break;
case 100 :	/* EQU statement */
	if(pass)
		daddr = get_constant(-1);
	else {
		svalue[sindex] = get_constant(-1);
		if(error_flag)
			show_error(error_flag); }
	break;
case 101 :	/* ORG statement */
	daddr = pc = get_constant(-1);
	break;
case 102 :	/* SEG directive */
	seg_pc[active_seg] = pc;
	if(seg_size[active_seg] < pc)
		seg_size[active_seg] = pc;
	daddr = pc = seg_pc[active_seg = get_constant(NUMSEG-1) & (NUMSEG-1)];
	break;
case 103 :	/* DB statement */
	do
		instruction[ocount++] = get_constant(255);
	while(test_next(','));
	break;
case 104 :	/* DW statement */
	do {
		get_constant(-1);
		instruction[ocount++] = value;
		instruction[ocount++] = value >> 8; }
	while(test_next(','));
	break;
case 105 :	/* DS statement */
	pc += get_constant(-1);
	break;
case 106 :	/* STR statement */
	i = *input_ptr++;
	while(*input_ptr != i) {
		if(!*input_ptr) {
			asmerr(13);
			break; }
		instruction[ocount++] = *input_ptr++; }
	switch(inst_ptr[1]) {
		case 1 :
			instruction[ocount++] = 0;
			break;
		case 2 :
			instruction[ocount-1] |= 0x80; }
	break;
case 107 :	/* PUBLIC statement */
	if(pass) do {
		ptr = input_ptr;
		while(!isterm(i = *input_ptr))
			*input_ptr++ = chupper(i);
		*input_ptr++ = 0;
		if(looksym(ptr, &value))
			sflags[sindex] |= SPUB;
		else
			asmerr(17); }
	while(i == ',');
	break;
case 108 :	/* EXTERN statement */
	if(!pass) do {
		ptr = input_ptr;
		while(!isterm(i = *input_ptr))
			*input_ptr++ = chupper(i);
		*input_ptr++ = 0;
		define(ptr, 0, SEXT | active_seg); }
	while(i == ',');
	break;
case 120 :	/* PAGE directive */
	ecount = 999;
	break;
case 121 :	/* TITLE directive */
	strcpy(title, operand);
	break;
case 122 :	/* SPACE directive */
	if(pass && fulf && !list) {
		putc('\n', lst_fp);
		++ecount; }
	break;
case 123 :	/* LIST directive */
	if(list)
		--list;
	break;
case 124 :	/* NOLIST directive */
	++list;
}
			/* Check for out of PHASE errors & duplicated symbols */
			if(*label && pass) {
				looksym(label, &i);
				if(sflags[sindex] & SDUP)
					error_flag = 18;
				else if(i != daddr) {
					show_error(0);
					exit(-1); } } }

		if(pass) {
	/* Generate formatted output listing */
			if((fulf && (*inst_ptr < 120) && !list) || error_flag) {
				if(++ecount > PAGSIZE)
					write_title();
				fprintf(lst_fp, "%04x ", daddr);
				for(i=0; i < 6; ++i)
					if(i < ocount)
						fprintf(lst_fp," %02x", instruction[i]);
					else
						fputs("   ", lst_fp);
				fprintf(lst_fp, " %c%5u  %s\n", (ocount <= 6) ? ' ' : '+', line, inline);
				if(error_flag)
					show_error(error_flag); }
	/* Write the code file (just output binary for now) */
			if(ocount)
				fput(instruction, ocount, hex_fp); }
		pc += ocount; /* For now till write code */
		}
	if(seg_size[active_seg] < pc)
		seg_size[active_seg] = pc;
	rewind(asm_fp); }
	while(++pass < 2);

	if(error_count)
		fprintf(lst_fp,"\n %u error(s) occurred in this assembly.\n",error_count);

	if(!quif)
		fprintf(stderr,"%u error(s).\n",error_count);

/* display the symbol table */
	if(symf) {
		for(i=0; i < scount; ++i) {
			daddr=i;
			for(pc=i+1; pc < scount; ++pc)
				if(strcmp(symbols[daddr], symbols[pc]) > 0)
					daddr = pc;
			pc = svalue[i];
			strcpy(inline, symbols[i]);
			svalue[i] = svalue[daddr];
			strcpy(symbols[i], symbols[daddr]);
			svalue[daddr] = pc;
			strcpy(symbols[daddr], inline); }
		write_title();
		fprintf(lst_fp,"\nSYMBOL TABLE:\n\n");
		for(i=0; i < scount;) {
			fprintf(lst_fp,"%-9s-%04x", &symbols[i][0], svalue[i]);
			++i;
			fputs((i%8) ? "  " : "\n", lst_fp); }
		if(i%8) fprintf(lst_fp,"\n"); }

	fclose(asm_fp);
	fclose(hex_fp);
	fclose(lst_fp);
}

/* output title */
write_title()
{
	if(pcount > 1)
		putc('\f',lst_fp);
	ecount=0;
	fprintf(lst_fp,"MICRO 8086 ASSEMBLER: %-80s   PAGE: %u\n\n",title,pcount);
	++pcount;
}

/*
 * Reverse operands, setting diretion flag
 */
direction()
{
	int i;

	i = type;	type = type1;	type1 = i;
	i = value;	value = value1;	value1 = i;
	i = xvalue; xvalue=xvalue1; xvalue1 = i;
	dir = 0x02;
}

/*
 * Test a register for valid ranges
 */
test_register(reg, flags)
	int reg;
	char flags;
{
	if(pass) {
		if((flags & 0x01) && (reg < 8))		/* No 8 bit registers */
			asmerr(5);
		if((flags & 0x02) && (reg > 15))	/* No segment registers */
			asmerr(8); }
}

/*
 * Writes the value of value1
 */
write_value1()
{
	instruction[ocount++] = value1;
	if(length > 1)
		instruction[ocount++] = value1 >> 8;
}

/*
 * Build the EA value from parsed data
 */
write_ea(xxx)
	int xxx;
{
	int dflag;

	dflag = 0;
	xxx = (xxx & 7) << 3;
	switch(type) {
		case REGIS :	/* Register reference */
			xxx |= 0xC0 + (value & 7);
			break;
		case VALUE :	/* Direct memory reference */
			xxx |= 6;
			dflag = 2;
			break;
		case INDEX :	/* Indexed addressing mode */
			if((value > 127) || (value < -128)) {
				xxx |= 0x80;	/* 16 bit displacement */
				dflag = 2; }
			else if(value || (xvalue == 6)) {
				xxx |= 0x40;	/* 8 bit displacement */
				dflag = 1; }
			xxx |= xvalue;
			break;
		default:
			asmerr(6); }
		instruction[ocount++] = xxx;
		if(dflag)
			instruction[ocount++] = value;
		if(dflag > 1)
			instruction[ocount++] = value >> 8;
	return dflag+1;
}

/*
 * Get a constant value
 */
get_constant(limit)
	unsigned limit;
{
	if(oper2() != VALUE)
		asmerr(4);
	if(value > limit) {
		value = 0;
		asmerr(16); }
	return value;
}

/*
 * Get two complete operands with error checking
 */
oper4()
{
	int l, v, x;

	type = oper2();
	l = length;
	v = value;
	x = xvalue;
	if(!test_next(','))
		asmerr(3);
	type1 = oper2();
	if(!isterm(*input_ptr))
		asmerr(1);

	/* Make sure that the lengths are compatible */
	if(!(length || l))
		asmerr(15);
	else if(length && l && (length != l))
		asmerr(14);
	else if(l)
		length = l;

	value1 = value; value = v;
	xvalue1 = xvalue; xvalue = x;

	return (type << 8) + type1;
}

/*
 * Get a one complete operand with error checking
 */
oper3()
{
	type = oper2();
	if(!isterm(*input_ptr))
		asmerr(1);
	if(!length)
		asmerr(15);
	return type;
}

/*
 * Get and evaluate a single operand expression
 */
oper2()
{
	int i, j;
	char o, immed;

	immed = test_next('#');

	if(isterm(*input_ptr))
		asmerr(3);

	if(oper1(&value, &length) == REGIS) {	/* Simple register */
		if((value > 15) && (*input_ptr == ':')) {
			++input_ptr;
			instruction[ocount++] = 0x26 | ((value & 3) << 3);
			i = oper1(&value, &length);
			if((i != VALUE) && (i != INDEX))
				asmerr(7); }
		else {
			if(!isterm(*input_ptr))
				asmerr(9);
			return REGIS; } }

	/* A value of a memory reference */
	while(*input_ptr && !isterm(*input_ptr)) {
		if(isoper(o = *input_ptr)) {
			++input_ptr;
			if(oper1(&i, &j) == REGIS)
				asmerr(9);
			if(length && j && (length != j))
				asmerr(14);
			if(j)
				length = j;
			switch(o) {
				case '+' : value += i;	break;
				case '-' : value -= i;	break;
				case '*' : value *= i;	break;
				case '/' : value /= i;	break;
				case '%' : value %= i;	break;
				case '&' : value &= i;	break;
				case '|' : value |= i;	break;
				case '^' : value ^= i;	break;
				case '<' : value <<=i;	break;
				case '>' : value >>=i; } }
		else if(o == '[') {				/* Indexing operation */
			++input_ptr;
			xvalue = test_index(255);
			if(test_next('+'))			/* Two registers */
				xvalue = test_index(xvalue);
			if(!test_next(']'))
				asmerr(1);
			if(immed)
				asmerr(10);
			return INDEX; }
		else {
			asmerr(1);
			break; } }

	return immed ? IMMED : VALUE;
}

/*
 * Scan for a single operand
 */
oper1(val, len)
	int *val, *len;
{
	unsigned base, v, l, v1, l1;
	char symbol[SYMSIZE+1], c;

	v = l = 0;
	for(;;) switch(*input_ptr++) {
		case '(' :			/* Nested expression */
			v1 = value;
			l1 = length;
			if(oper2() != VALUE)
				asmerr(4);
			v = value;
			l = length;
			value = v1;
			length = l1;
			if(!test_next(')'))
				asmerr(1);
			goto retval;
		case '<' :			/* Force 8 bit value */
			l = 1;
			break;
		case '>' :			/* Force 16 bit value */
			l = 2;
			break;
		case '-' :			/* Unary minus */
			base = oper1(&v, &l);
			*val = -v;
			*len = l;
			return base;
		case '=' :			/* Swap high and low */
			base = oper1(&v, &l);
			*val = (v<<8)+(v>>8);
			*len = l;
			return base;
		case '$' :			/* Hexidecimal number */
			base = 16;
			goto getn;
		case '%' :			/* Binary number */
			base = 2;
			goto getn;
		case '@' :			/* Octal number */
			base = 8;
			goto getn;
		case '0' :			/* Decimal number */
		case '1' :
		case '2' :
		case '3' :
		case '4' :
		case '5' :
		case '6' :
		case '7' :
		case '8' :
		case '9' :
			base = 10;
			--input_ptr;
		getn:
			for(;;) {
				if(isdigit(c = *input_ptr))		/* convert numeric digits */
					c -= '0';
				else if(c >= 'a')				/* convert lower case alphabetics */
					c -= ('a' - 10);
				else if(c >= 'A')				/* convert upper case alphabetics */
					c -= ('A' - 10);
				else							/* not a valid "digit" */
					break;
				if(c >= base)					/* outside of base */
					break;
				v = (v * base) + c;				/* include in total */
				++input_ptr; }
			goto retval;
		case '\'' :			/* Quoted value */
			while((c = *input_ptr) && (c != '\'')) {
				++input_ptr;
				v = (v << 8) + c; }
			if(*input_ptr)
				++input_ptr;
			else
				asmerr(13);
			goto retval;
		case '*' :			/* Program counter */
			v = pc;
			goto retval;
		default :			/* Symbol value */
			--input_ptr;
			while(issymbol(c = *input_ptr)) {
				++input_ptr;
				symbol[v++] = chupper(c); }
			symbol[v] = 0;
			if(v) {
				for(v = 0; v < NUMREG; ++v)
					if(!strcmp(symbol, registers[v])) {
						if(l)
							asmerr(12);
						*val = v;
						*len = (v > 7) + 1;
						return REGIS; }
				if(!looksym(symbol, &v))
					asmerr(17); }
		retval:
			*val = v;
			*len = l;
			return VALUE; }
}

/*
 * Test the next character in the stream
 */
test_next(c)
	char c;
{
	if(*input_ptr == c) {
		++input_ptr;
		return -1; }
	return 0;
}

/*
 * Convert character to upper case if NOT case sensitive
 */
chupper(c)
	char c;
{
	return casf ? c : ((c >= 'a') && (c <= 'z')) ? c - ('a'-'A') : c;
}

/*
 * Test for a valid INDEX register combination
 */
test_index(reg)
	int reg;
{
	int v, l;

	if(oper1(&v, &l) == VALUE)	/* Invalid, force an error */
		reg = 0;
	switch((reg << 8) + v) {
		case 0xFF0B :	/* [BX] */
			return 7;
		case 0xFF0D :	/* [BP] */
			return 6;
		case 0xFF0E :	/* [SI] */
			return 4;
		case 0xFF0F :	/* [DI] */
			return 5;
		case 0x070E :	/* [BX+SI] */
		case 0x040B :	/* [SI+BX] */
			return 0;
		case 0x070F :	/* [BX+DI] */
		case 0x050B :	/* [DI+BX] */
			return 1;
		case 0x060E :	/* [BP+SI] */
		case 0x040D :	/* [SI+BP] */
			return 2;
		case 0x060F :	/* [BP+DI] */
		case 0x050D :	/* [DI+BP] */
			return 3;
		default:
			asmerr(11); }
}

/*
 * Report an assembly error
 */
asmerr(errno)
	int errno;
{
	if(!error_flag)
		error_flag = errno;
}

/*
 * Display an error
 */
show_error(errno)
	int errno;
{
	fprintf(lst_fp,"** Error %u in line %u : %s\n", errno, line, error_text[errno]);
	++error_count;
}

/*
 * Test for a valid symbol name character
 */
issymbol(c)
	char c;
{
	return isalpha(c) || isdigit(c) || (c == '_') || (c == '?');
}

/*
 * Test for a valid operation character
 */
isoper(c)
	char c;
{
	char d, *ptr;

	ptr = "+-*/%&|^<>";		/* Table of valid operators */
	while(d = *ptr++)
		if(c == d)
			return -1;

	return 0;
}

/*
 * Test for terminater character
 */
isterm(c)
	char c;
{
	switch(c) {
		case 0 :
		case ' ' :
		case '\t':
		case ',' :
		case ':' :
		case ')' :
			return 1; }
	return 0;
}

/*
 * Define a symbol
 */
define(name, value, flags)
	char *name;
	int value, flags;
{
	int i;

	if(looksym(label, &i))
		sflags[sindex] |= SDUP;
	else {
		if(scount >= SYMBOLS) {		/* Symbol table overflow */
			show_error(19);
			exit(-1); }
		strcpy(symbols[sindex = scount], name);
		svalue[scount] = value;
		sflags[scount++] = flags; }
}

/*
 * Lookup a symbol in the symbol table
 */
looksym(symbol, v)
	char *symbol;
	int *v;
{
	int i;

	for(i=0; i < scount; ++i)
		if(!strcmp(symbol, symbols[i])) {
			*v = svalue[sindex = i];
			return -1; }
	return 0;
}

/*
 * Lookup instruction in table
 */
lookup_inst()
{
	char *ptr1, *ptr2;

	ptr1 = inst_table;
	while(*ptr1) {
		ptr2 = instruction;
		while(*ptr1 == *ptr2) {
			++ptr1;
			++ptr2; }
		if(((*ptr1 & 0x7f) == *ptr2) && isterm(*++ptr2)) {
			inst_ptr = ptr1 + 1;
			return 1; }
		while(!(*ptr1++ & 0x80));
		ptr1 += IDATSIZ; }

	inst_ptr = ptr1;
	return 0;
}
