/* =========================================================================== Copyright (C) 1999-2005 Id Software, Inc. This file is part of Quake III Arena source code. Quake III Arena source code is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. Quake III Arena source code is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Foobar; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA =========================================================================== */ #include "cmdlib.h" #include "mathlib.h" #include "qfiles.h" /* MSVC-ism fix. */ #ifdef _WIN32 #define atoi(s) strtoul(s,NULL,10) #endif /* 19079 total symbols in FI, 2002 Jan 23 */ #define Q3ASM_TURBO #define DEFAULT_HASHTABLE_SIZE 2048 char outputFilename[MAX_OS_PATH]; // the zero page size is just used for detecting run time faults #define ZERO_PAGE_SIZE 0 // 256 typedef enum { OP_UNDEF, OP_IGNORE, OP_BREAK, OP_ENTER, OP_LEAVE, OP_CALL, OP_PUSH, OP_POP, OP_CONST, OP_LOCAL, OP_JUMP, //------------------- OP_EQ, OP_NE, OP_LTI, OP_LEI, OP_GTI, OP_GEI, OP_LTU, OP_LEU, OP_GTU, OP_GEU, OP_EQF, OP_NEF, OP_LTF, OP_LEF, OP_GTF, OP_GEF, //------------------- OP_LOAD1, OP_LOAD2, OP_LOAD4, OP_STORE1, OP_STORE2, OP_STORE4, // *(stack[top-1]) = stack[yop OP_ARG, OP_BLOCK_COPY, //------------------- OP_SEX8, OP_SEX16, OP_NEGI, OP_ADD, OP_SUB, OP_DIVI, OP_DIVU, OP_MODI, OP_MODU, OP_MULI, OP_MULU, OP_BAND, OP_BOR, OP_BXOR, OP_BCOM, OP_LSH, OP_RSHI, OP_RSHU, OP_NEGF, OP_ADDF, OP_SUBF, OP_DIVF, OP_MULF, OP_CVIF, OP_CVFI } opcode_t; typedef struct { int imageBytes; // after decompression int entryPoint; int stackBase; int stackSize; } executableHeader_t; typedef enum { CODESEG, DATASEG, // initialized 32 bit data, will be byte swapped LITSEG, // strings BSSSEG, // 0 filled NUM_SEGMENTS } segmentName_t; #define MAX_IMAGE 0x400000 typedef struct { byte image[MAX_IMAGE]; int imageUsed; int segmentBase; // only valid on second pass } segment_t; typedef struct symbol_s { struct symbol_s *next; int hash; segment_t *segment; char *name; int value; } symbol_t; #ifdef Q3ASM_TURBO typedef struct hashchain_s { void *data; struct hashchain_s *next; } hashchain_t; typedef struct hashtable_s { int buckets; hashchain_t **table; } hashtable_t; int symtablelen = DEFAULT_HASHTABLE_SIZE; hashtable_t *symtable; hashtable_t *optable; #endif /* Q3ASM_TURBO */ segment_t segment[NUM_SEGMENTS]; segment_t *currentSegment; int passNumber; int numSymbols; int errorCount; qboolean optionVerbose = qfalse; qboolean optionWriteMapFile = qfalse; symbol_t *symbols; symbol_t *lastSymbol = 0; /* Most recent symbol defined. */ #define MAX_ASM_FILES 256 int numAsmFiles; char *asmFiles[MAX_ASM_FILES]; char *asmFileNames[MAX_ASM_FILES]; int currentFileIndex; char *currentFileName; int currentFileLine; //int stackSize = 16384; int stackSize = 0x10000; // we need to convert arg and ret instructions to // stores to the local stack frame, so we need to track the // characteristics of the current functions stack frame int currentLocals; // bytes of locals needed by this function int currentArgs; // bytes of largest argument list called from this function int currentArgOffset; // byte offset in currentArgs to store next arg, reset each call #define MAX_LINE_LENGTH 1024 char lineBuffer[MAX_LINE_LENGTH]; int lineParseOffset; char token[MAX_LINE_LENGTH]; int instructionCount; typedef struct { char *name; int opcode; } sourceOps_t; sourceOps_t sourceOps[] = { #include "opstrings.h" }; #define NUM_SOURCE_OPS ( sizeof( sourceOps ) / sizeof( sourceOps[0] ) ) int opcodesHash[ NUM_SOURCE_OPS ]; #ifdef Q3ASM_TURBO int vreport (const char* fmt, va_list vp) { if (optionVerbose != qtrue) return 0; return vprintf(fmt, vp); } int report (const char *fmt, ...) { va_list va; int retval; va_start(va, fmt); retval = vreport(fmt, va); va_end(va); return retval; } /* The chain-and-bucket hash table. -PH */ void hashtable_init (hashtable_t *H, int buckets) { H->buckets = buckets; H->table = calloc(H->buckets, sizeof(*(H->table))); return; } hashtable_t * hashtable_new (int buckets) { hashtable_t *H; H = malloc(sizeof(hashtable_t)); hashtable_init(H, buckets); return H; } /* No destroy/destructor. No need. */ void hashtable_add (hashtable_t *H, int hashvalue, void *datum) { hashchain_t *hc, **hb; hashvalue = (abs(hashvalue) % H->buckets); hb = &(H->table[hashvalue]); if (*hb == 0) { /* Empty bucket. Create new one. */ *hb = calloc(1, sizeof(**hb)); hc = *hb; } else { /* Get hc to point to last node in chain. */ for (hc = *hb; hc && hc->next; hc = hc->next); hc->next = calloc(1, sizeof(*hc)); hc = hc->next; } hc->data = datum; hc->next = 0; return; } hashchain_t * hashtable_get (hashtable_t *H, int hashvalue) { hashvalue = (abs(hashvalue) % H->buckets); return (H->table[hashvalue]); } void hashtable_stats (hashtable_t *H) { int len, empties, longest, nodes; int i; float meanlen; hashchain_t *hc; report("Stats for hashtable %08X", H); empties = 0; longest = 0; nodes = 0; for (i = 0; i < H->buckets; i++) { if (H->table[i] == 0) { empties++; continue; } for (hc = H->table[i], len = 0; hc; hc = hc->next, len++); if (len > longest) { longest = len; } nodes += len; } meanlen = (float)(nodes) / (H->buckets - empties); #if 0 /* Long stats display */ report(" Total buckets: %d\n", H->buckets); report(" Total stored nodes: %d\n", nodes); report(" Longest chain: %d\n", longest); report(" Empty chains: %d\n", empties); report(" Mean non-empty chain length: %f\n", meanlen); #else //0 /* Short stats display */ report(", %d buckets, %d nodes", H->buckets, nodes); report("\n"); report(" Longest chain: %d, empty chains: %d, mean non-empty: %f", longest, empties, meanlen); #endif //0 report("\n"); } /* Kludge. */ /* Check if symbol already exists. */ /* Returns 0 if symbol does NOT already exist, non-zero otherwise. */ int hashtable_symbol_exists (hashtable_t *H, int hash, char *sym) { hashchain_t *hc; symbol_t *s; hash = (abs(hash) % H->buckets); hc = H->table[hash]; if (hc == 0) { /* Empty chain means this symbol has not yet been defined. */ return 0; } for (; hc; hc = hc->next) { s = (symbol_t*)hc->data; // if ((hash == s->hash) && (strcmp(sym, s->name) == 0)) /* We _already_ know the hash is the same. That's why we're probing! */ if (strcmp(sym, s->name) == 0) { /* Symbol collisions -- symbol already exists. */ return 1; } } return 0; /* Can't find collision. */ } /* Comparator function for quicksorting. */ int symlist_cmp (const void *e1, const void *e2) { const symbol_t *a, *b; a = *(const symbol_t **)e1; b = *(const symbol_t **)e2; //crumb("Symbol comparison (1) %d to (2) %d\n", a->value, b->value); return ( a->value - b->value); } /* Sort the symbols list by using QuickSort (qsort()). This may take a LOT of memory (a few megabytes?), but memory is cheap these days. However, qsort(3) already exists, and I'm really lazy. -PH */ void sort_symbols () { int i, elems; symbol_t *s; symbol_t **symlist; //crumb("sort_symbols: Constructing symlist array\n"); for (elems = 0, s = symbols; s; s = s->next, elems++) /* nop */ ; symlist = malloc(elems * sizeof(symbol_t*)); for (i = 0, s = symbols; s; s = s->next, i++) { symlist[i] = s; } //crumbf("sort_symbols: Quick-sorting %d symbols\n", elems); qsort(symlist, elems, sizeof(symbol_t*), symlist_cmp); //crumbf("sort_symbols: Reconstructing symbols list\n"); s = symbols = symlist[0]; for (i = 1; i < elems; i++) { s->next = symlist[i]; s = s->next; } lastSymbol = s; s->next = 0; //crumbf("sort_symbols: verifying..."); fflush(stdout); for (i = 0, s = symbols; s; s = s->next, i++) /*nop*/ ; //crumbf(" %d elements\n", i); free(symlist); /* d'oh. no gc. */ } /* Problem: BYTE values are specified as signed decimal string. A properly functional atoi() will cap large signed values at 0x7FFFFFFF. Negative word values are often specified as very large decimal values by lcc. Therefore, values that should be between 0x7FFFFFFF and 0xFFFFFFFF come out as 0x7FFFFFFF when using atoi(). Bad. This function is one big evil hack to work around this problem. */ /* FIXME: Find out maximum token length for VC++ -PH */ int ThingToConvertDecimalIntoSigned32SoThatAtoiDoesntCapAt7FFFFFFF (const char *s) { /* Variable `l' should be an integer variant larger than 32 bits. On gnu-x86, "long long" is 64 bits. -PH */ long long int l; union { unsigned int u; signed int i; } retval; l = atoll(s); /* Now smash to signed 32 bits accordingly. */ if (l < 0) { retval.i = (int)l; } else { retval.u = (unsigned int)l; } return retval.i; /* <- union hackage. I feel dirty with this. -PH */ } /* Programmer Attribute #1: laziness */ #ifndef _WIN32 #define atoi ThingToConvertDecimalIntoSigned32SoThatAtoiDoesntCapAt7FFFFFFF #endif #endif /* Q3ASM_TURBO */ /* ============= HashString ============= */ #ifndef Q3ASM_TURBO int HashString( char *s ) { int v = 0; while ( *s ) { v += *s; s++; } return v; } #else /* Q3ASM_TURBO */ /* Default hash function of Kazlib 1.19, slightly modified. */ unsigned int HashString (const char *key) { static unsigned long randbox[] = { 0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U, 0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU, 0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU, 0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU, }; const char *str = key; unsigned int acc = 0; while (*str) { acc ^= randbox[(*str + acc) & 0xf]; acc = (acc << 1) | (acc >> 31); acc &= 0xffffffffU; acc ^= randbox[((*str++ >> 4) + acc) & 0xf]; acc = (acc << 2) | (acc >> 30); acc &= 0xffffffffU; } return abs(acc); } #endif /* Q3ASM_TURBO */ /* ============ CodeError ============ */ void CodeError( char *fmt, ... ) { va_list argptr; errorCount++; report( "%s:%i ", currentFileName, currentFileLine ); va_start( argptr,fmt ); vprintf( fmt,argptr ); va_end( argptr ); } /* ============ EmitByte ============ */ void EmitByte( segment_t *seg, int v ) { if ( seg->imageUsed >= MAX_IMAGE ) { Error( "MAX_IMAGE" ); } seg->image[ seg->imageUsed ] = v; seg->imageUsed++; } /* ============ EmitInt ============ */ void EmitInt( segment_t *seg, int v ) { if ( seg->imageUsed >= MAX_IMAGE - 4) { Error( "MAX_IMAGE" ); } seg->image[ seg->imageUsed ] = v & 255; seg->image[ seg->imageUsed + 1 ] = ( v >> 8 ) & 255; seg->image[ seg->imageUsed + 2 ] = ( v >> 16 ) & 255; seg->image[ seg->imageUsed + 3 ] = ( v >> 24 ) & 255; seg->imageUsed += 4; } /* ============ DefineSymbol Symbols can only be defined on pass 0 ============ */ void DefineSymbol( char *sym, int value ) { #ifndef Q3ASM_TURBO symbol_t *s, *after; char expanded[MAX_LINE_LENGTH]; int hash; if ( passNumber == 1 ) { return; } // TTimo // https://zerowing.idsoftware.com/bugzilla/show_bug.cgi?id=381 // as a security, bail out if vmMain entry point is not first if (!Q_stricmp(sym, "vmMain")) if (value) Error( "vmMain must be the first symbol in the qvm (got offset %d)\n", value ); // add the file prefix to local symbols to guarantee unique if ( sym[0] == '$' ) { sprintf( expanded, "%s_%i", sym, currentFileIndex ); sym = expanded; } hash = HashString( sym ); for ( s = symbols ; s ; s = s->next ) { if ( hash == s->hash && !strcmp( sym, s->name ) ) { CodeError( "Multiple definitions for %s\n", sym ); return; } } s = malloc( sizeof( *s ) ); s->name = copystring( sym ); s->hash = hash; s->value = value; s->segment = currentSegment; lastSymbol = s; /* for the move-to-lit-segment byteswap hack */ // insert it in order if ( !symbols || s->value < symbols->value ) { s->next = symbols; symbols = s; return; } for ( after = symbols ; after->next && after->next->value < value ; after = after->next ) { } s->next = after->next; after->next = s; #else /* Q3ASM_TURBO */ /* Hand optimization by PhaethonH */ symbol_t *s; char expanded[MAX_LINE_LENGTH]; int hash; if ( passNumber == 1 ) { return; } // add the file prefix to local symbols to guarantee unique if ( sym[0] == '$' ) { sprintf( expanded, "%s_%i", sym, currentFileIndex ); sym = expanded; } hash = HashString( sym ); if (hashtable_symbol_exists(symtable, hash, sym)) { CodeError( "Multiple definitions for %s\n", sym ); return; } s = malloc( sizeof( *s ) ); s->next = NULL; s->name = copystring( sym ); s->hash = hash; s->value = value; s->segment = currentSegment; hashtable_add(symtable, hash, s); /* Hash table lookup already speeds up symbol lookup enormously. We postpone sorting until end of pass 0. Since we're not doing the insertion sort, lastSymbol should always wind up pointing to the end of list. This allows constant time for adding to the list. -PH */ if (symbols == 0) { lastSymbol = symbols = s; } else { lastSymbol->next = s; lastSymbol = s; } #endif /* Q3ASM_TURBO */ } /* ============ LookupSymbol Symbols can only be evaluated on pass 1 ============ */ int LookupSymbol( char *sym ) { #ifndef Q3ASM_TURBO symbol_t *s; char expanded[MAX_LINE_LENGTH]; int hash; if ( passNumber == 0 ) { return 0; } // add the file prefix to local symbols to guarantee unique if ( sym[0] == '$' ) { sprintf( expanded, "%s_%i", sym, currentFileIndex ); sym = expanded; } hash = HashString( sym ); for ( s = symbols ; s ; s = s->next ) { if ( hash == s->hash && !strcmp( sym, s->name ) ) { return s->segment->segmentBase + s->value; } } CodeError( "ERROR: symbol %s undefined\n", sym ); passNumber = 0; DefineSymbol( sym, 0 ); // so more errors aren't printed passNumber = 1; return 0; #else /* Q3ASM_TURBO */ symbol_t *s; char expanded[MAX_LINE_LENGTH]; int hash; hashchain_t *hc; if ( passNumber == 0 ) { return 0; } // add the file prefix to local symbols to guarantee unique if ( sym[0] == '$' ) { sprintf( expanded, "%s_%i", sym, currentFileIndex ); sym = expanded; } hash = HashString( sym ); /* Hand optimization by PhaethonH Using a hash table with chain/bucket for lookups alone sped up q3asm by almost 3x for me. -PH */ for (hc = hashtable_get(symtable, hash); hc; hc = hc->next) { s = (symbol_t*)hc->data; /* ugly typecasting, but it's fast! */ if ( (hash == s->hash) && !strcmp(sym, s->name) ) { return s->segment->segmentBase + s->value; } } CodeError( "error: symbol %s undefined\n", sym ); passNumber = 0; DefineSymbol( sym, 0 ); // so more errors aren't printed passNumber = 1; return 0; #endif /* Q3ASM_TURBO */ } /* ============== ExtractLine Extracts the next line from the given text block. If a full line isn't parsed, returns NULL Otherwise returns the updated parse pointer =============== */ char *ExtractLine( char *data ) { #ifndef Q3ASM_TURBO int i; currentFileLine++; lineParseOffset = 0; token[0] = 0; if ( data[0] == 0 ) { lineBuffer[0] = 0; return NULL; } for ( i = 0 ; i < MAX_LINE_LENGTH ; i++ ) { if ( data[i] == 0 || data[i] == '\n' ) { break; } } if ( i == MAX_LINE_LENGTH ) { CodeError( "MAX_LINE_LENGTH" ); return data; } memcpy( lineBuffer, data, i ); lineBuffer[i] = 0; data += i; if ( data[0] == '\n' ) { data++; } return data; #else /* Q3ASM_TURBO */ /* Goal: Given a string `data', extract one text line into buffer `lineBuffer' that is no longer than MAX_LINE_LENGTH characters long. Return value is remainder of `data' that isn't part of `lineBuffer'. -PH */ /* Hand-optimized by PhaethonH */ char *p, *q; currentFileLine++; lineParseOffset = 0; token[0] = 0; *lineBuffer = 0; p = q = data; if (!*q) { return NULL; } for ( ; !((*p == 0) || (*p == '\n')); p++) /* nop */ ; if ((p - q) >= MAX_LINE_LENGTH) { CodeError( "MAX_LINE_LENGTH" ); return data; } memcpy( lineBuffer, data, (p - data) ); lineBuffer[(p - data)] = 0; p += (*p == '\n') ? 1 : 0; /* Skip over final newline. */ return p; #endif /* Q3ASM_TURBO */ } /* ============== Parse Parse a token out of linebuffer ============== */ qboolean Parse( void ) { #ifndef Q3ASM_TURBO int c; int len; len = 0; token[0] = 0; // skip whitespace while ( lineBuffer[ lineParseOffset ] <= ' ' ) { if ( lineBuffer[ lineParseOffset ] == 0 ) { return qfalse; } lineParseOffset++; } // skip ; comments c = lineBuffer[ lineParseOffset ]; if ( c == ';' ) { return qfalse; } // parse a regular word do { token[len] = c; len++; lineParseOffset++; c = lineBuffer[ lineParseOffset ]; } while (c>32); token[len] = 0; return qtrue; #else /* Q3ASM_TURBO */ /* Hand-optimized by PhaethonH */ const char *p, *q; /* Because lineParseOffset is only updated just before exit, this makes this code version somewhat harder to debug under a symbolic debugger. */ *token = 0; /* Clear token. */ // skip whitespace for (p = lineBuffer + lineParseOffset; *p && (*p <= ' '); p++) /* nop */ ; // skip ; comments /* die on end-of-string */ if ((*p == ';') || (*p == 0)) { lineParseOffset = p - lineBuffer; return qfalse; } q = p; /* Mark the start of token. */ /* Find separator first. */ for ( ; *p > 32; p++) /* nop */ ; /* XXX: unsafe assumptions. */ /* *p now sits on separator. Mangle other values accordingly. */ strncpy(token, q, p - q); token[p - q] = 0; lineParseOffset = p - lineBuffer; return qtrue; #endif /* Q3ASM_TURBO */ } /* ============== ParseValue ============== */ int ParseValue( void ) { Parse(); return atoi( token ); } /* ============== ParseExpression ============== */ int ParseExpression(void) { #ifndef Q3ASM_TURBO int i, j; char sym[MAX_LINE_LENGTH]; int v; if ( token[0] == '-' ) { i = 1; } else { i = 0; } for ( ; i < MAX_LINE_LENGTH ; i++ ) { if ( token[i] == '+' || token[i] == '-' || token[i] == 0 ) { break; } } memcpy( sym, token, i ); sym[i] = 0; if ( ( sym[0] >= '0' && sym[0] <= '9' ) || sym[0] == '-' ) { v = atoi( sym ); } else { v = LookupSymbol( sym ); } // parse add / subtract offsets while ( token[i] != 0 ) { for ( j = i + 1 ; j < MAX_LINE_LENGTH ; j++ ) { if ( token[j] == '+' || token[j] == '-' || token[j] == 0 ) { break; } } memcpy( sym, token+i+1, j-i-1 ); sym[j-i-1] = 0; if ( token[i] == '+' ) { v += atoi( sym ); } if ( token[i] == '-' ) { v -= atoi( sym ); } i = j; } return v; #else /* Q3ASM_TURBO */ /* Hand optimization, PhaethonH */ int i, j; char sym[MAX_LINE_LENGTH]; int v; /* Skip over a leading minus. */ for ( i = ((token[0] == '-') ? 1 : 0) ; i < MAX_LINE_LENGTH ; i++ ) { if ( token[i] == '+' || token[i] == '-' || token[i] == 0 ) { break; } } memcpy( sym, token, i ); sym[i] = 0; switch (*sym) { /* Resolve depending on first character. */ /* Optimizing compilers can convert cases into "calculated jumps". I think these are faster. -PH */ case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': v = atoi(sym); break; default: v = LookupSymbol(sym); break; } // parse add / subtract offsets while ( token[i] != 0 ) { for ( j = i + 1 ; j < MAX_LINE_LENGTH ; j++ ) { if ( token[j] == '+' || token[j] == '-' || token[j] == 0 ) { break; } } memcpy( sym, token+i+1, j-i-1 ); sym[j-i-1] = 0; switch (token[i]) { case '+': v += atoi(sym); break; case '-': v -= atoi(sym); break; } i = j; } return v; #endif /* Q3ASM_TURBO */ } /* ============== HackToSegment BIG HACK: I want to put all 32 bit values in the data segment so they can be byte swapped, and all char data in the lit segment, but switch jump tables are emited in the lit segment and initialized strng variables are put in the data segment. I can change segments here, but I also need to fixup the label that was just defined Note that the lit segment is read-write in the VM, so strings aren't read only as in some architectures. ============== */ void HackToSegment( segmentName_t seg ) { if ( currentSegment == &segment[seg] ) { return; } currentSegment = &segment[seg]; if ( passNumber == 0 ) { lastSymbol->segment = currentSegment; lastSymbol->value = currentSegment->imageUsed; } } #ifdef Q3ASM_TURBO //#define STAT(L) report("STAT " L "\n"); #define STAT(L) #define ASM(O) int TryAssemble##O () /* These clauses were moved out from AssembleLine() to allow reordering of if's. An optimizing compiler should reconstruct these back into inline code. -PH */ // call instructions reset currentArgOffset ASM(CALL) { if ( !strncmp( token, "CALL", 4 ) ) { STAT("CALL"); EmitByte( &segment[CODESEG], OP_CALL ); instructionCount++; currentArgOffset = 0; return 1; } return 0; } // arg is converted to a reversed store ASM(ARG) { if ( !strncmp( token, "ARG", 3 ) ) { STAT("ARG"); EmitByte( &segment[CODESEG], OP_ARG ); instructionCount++; if ( 8 + currentArgOffset >= 256 ) { CodeError( "currentArgOffset >= 256" ); return 1; } EmitByte( &segment[CODESEG], 8 + currentArgOffset ); currentArgOffset += 4; return 1; } return 0; } // ret just leaves something on the op stack ASM(RET) { if ( !strncmp( token, "RET", 3 ) ) { STAT("RET"); EmitByte( &segment[CODESEG], OP_LEAVE ); instructionCount++; EmitInt( &segment[CODESEG], 8 + currentLocals + currentArgs ); return 1; } return 0; } // pop is needed to discard the return value of // a function ASM(POP) { if ( !strncmp( token, "pop", 3 ) ) { STAT("POP"); EmitByte( &segment[CODESEG], OP_POP ); instructionCount++; return 1; } return 0; } // address of a parameter is converted to OP_LOCAL ASM(ADDRF) { int v; if ( !strncmp( token, "ADDRF", 5 ) ) { STAT("ADDRF"); instructionCount++; Parse(); v = ParseExpression(); v = 16 + currentArgs + currentLocals + v; EmitByte( &segment[CODESEG], OP_LOCAL ); EmitInt( &segment[CODESEG], v ); return 1; } return 0; } // address of a local is converted to OP_LOCAL ASM(ADDRL) { int v; if ( !strncmp( token, "ADDRL", 5 ) ) { STAT("ADDRL"); instructionCount++; Parse(); v = ParseExpression(); v = 8 + currentArgs + v; EmitByte( &segment[CODESEG], OP_LOCAL ); EmitInt( &segment[CODESEG], v ); return 1; } return 0; } ASM(PROC) { char name[1024]; if ( !strcmp( token, "proc" ) ) { STAT("PROC"); Parse(); // function name strcpy( name, token ); DefineSymbol( token, instructionCount ); // segment[CODESEG].imageUsed ); currentLocals = ParseValue(); // locals currentLocals = ( currentLocals + 3 ) & ~3; currentArgs = ParseValue(); // arg marshalling currentArgs = ( currentArgs + 3 ) & ~3; if ( 8 + currentLocals + currentArgs >= 32767 ) { CodeError( "Locals > 32k in %s\n", name ); } instructionCount++; EmitByte( &segment[CODESEG], OP_ENTER ); EmitInt( &segment[CODESEG], 8 + currentLocals + currentArgs ); return 1; } return 0; } ASM(ENDPROC) { int v, v2; if ( !strcmp( token, "endproc" ) ) { STAT("ENDPROC"); Parse(); // skip the function name v = ParseValue(); // locals v2 = ParseValue(); // arg marshalling // all functions must leave something on the opstack instructionCount++; EmitByte( &segment[CODESEG], OP_PUSH ); instructionCount++; EmitByte( &segment[CODESEG], OP_LEAVE ); EmitInt( &segment[CODESEG], 8 + currentLocals + currentArgs ); return 1; } return 0; } ASM(ADDRESS) { int v; if ( !strcmp( token, "address" ) ) { STAT("ADDRESS"); Parse(); v = ParseExpression(); /* Addresses are 32 bits wide, and therefore go into data segment. */ HackToSegment( DATASEG ); EmitInt( currentSegment, v ); return 1; } return 0; } ASM(EXPORT) { if ( !strcmp( token, "export" ) ) { STAT("EXPORT"); return 1; } return 0; } ASM(IMPORT) { if ( !strcmp( token, "import" ) ) { STAT("IMPORT"); return 1; } return 0; } ASM(CODE) { if ( !strcmp( token, "code" ) ) { STAT("CODE"); currentSegment = &segment[CODESEG]; return 1; } return 0; } ASM(BSS) { if ( !strcmp( token, "bss" ) ) { STAT("BSS"); currentSegment = &segment[BSSSEG]; return 1; } return 0; } ASM(DATA) { if ( !strcmp( token, "data" ) ) { STAT("DATA"); currentSegment = &segment[DATASEG]; return 1; } return 0; } ASM(LIT) { if ( !strcmp( token, "lit" ) ) { STAT("LIT"); currentSegment = &segment[LITSEG]; return 1; } return 0; } ASM(LINE) { if ( !strcmp( token, "line" ) ) { STAT("LINE"); return 1; } return 0; } ASM(FILE) { if ( !strcmp( token, "file" ) ) { STAT("FILE"); return 1; } return 0; } ASM(EQU) { char name[1024]; if ( !strcmp( token, "equ" ) ) { STAT("EQU"); Parse(); strcpy( name, token ); Parse(); DefineSymbol( name, atoi(token) ); return 1; } return 0; } ASM(ALIGN) { int v; if ( !strcmp( token, "align" ) ) { STAT("ALIGN"); v = ParseValue(); currentSegment->imageUsed = (currentSegment->imageUsed + v - 1 ) & ~( v - 1 ); return 1; } return 0; } ASM(SKIP) { int v; if ( !strcmp( token, "skip" ) ) { STAT("SKIP"); v = ParseValue(); currentSegment->imageUsed += v; return 1; } return 0; } ASM(BYTE) { int i, v, v2; if ( !strcmp( token, "byte" ) ) { STAT("BYTE"); v = ParseValue(); v2 = ParseValue(); if ( v == 1 ) { /* Character (1-byte) values go into lit(eral) segment. */ HackToSegment( LITSEG ); } else if ( v == 4 ) { /* 32-bit (4-byte) values go into data segment. */ HackToSegment( DATASEG ); } else if ( v == 2 ) { /* and 16-bit (2-byte) values will cause q3asm to barf. */ CodeError( "16 bit initialized data not supported" ); } // emit little endien for ( i = 0 ; i < v ; i++ ) { EmitByte( currentSegment, (v2 & 0xFF) ); /* paranoid ANDing -PH */ v2 >>= 8; } return 1; } return 0; } // code labels are emited as instruction counts, not byte offsets, // because the physical size of the code will change with // different run time compilers and we want to minimize the // size of the required translation table ASM(LABEL) { if ( !strncmp( token, "LABEL", 5 ) ) { STAT("LABEL"); Parse(); if ( currentSegment == &segment[CODESEG] ) { DefineSymbol( token, instructionCount ); } else { DefineSymbol( token, currentSegment->imageUsed ); } return 1; } return 0; } #endif /* Q3ASM_TURBO */ /* ============== AssembleLine ============== */ void AssembleLine( void ) { #ifndef Q3ASM_TURBO int v, v2; #else /* Q3ASM_TURBO */ hashchain_t *hc; sourceOps_t *op; #endif /* Q3ASM_TURBO */ int i; int hash; Parse(); if ( !token[0] ) { return; } hash = HashString( token ); #ifndef Q3ASM_TURBO for ( i = 0 ; i < NUM_SOURCE_OPS ; i++ ) { if ( hash == opcodesHash[i] && !strcmp( token, sourceOps[i].name ) ) { int opcode; int expression; if ( sourceOps[i].opcode == OP_UNDEF ) { CodeError( "Undefined opcode: %s\n", token ); } if ( sourceOps[i].opcode == OP_IGNORE ) { return; // we ignore most conversions } // sign extensions need to check next parm opcode = sourceOps[i].opcode; if ( opcode == OP_SEX8 ) { Parse(); if ( token[0] == '1' ) { opcode = OP_SEX8; } else if ( token[0] == '2' ) { opcode = OP_SEX16; } else { CodeError( "Bad sign extension: %s\n", token ); return; } } // check for expression Parse(); if ( token[0] && sourceOps[i].opcode != OP_CVIF && sourceOps[i].opcode != OP_CVFI ) { expression = ParseExpression(); // code like this can generate non-dword block copies: // auto char buf[2] = " "; // we are just going to round up. This might conceivably // be incorrect if other initialized chars follow. if ( opcode == OP_BLOCK_COPY ) { expression = ( expression + 3 ) & ~3; } EmitByte( &segment[CODESEG], opcode ); EmitInt( &segment[CODESEG], expression ); } else { EmitByte( &segment[CODESEG], opcode ); } instructionCount++; return; } } // call instructions reset currentArgOffset if ( !strncmp( token, "CALL", 4 ) ) { EmitByte( &segment[CODESEG], OP_CALL ); instructionCount++; currentArgOffset = 0; return; } // arg is converted to a reversed store if ( !strncmp( token, "ARG", 3 ) ) { EmitByte( &segment[CODESEG], OP_ARG ); instructionCount++; if ( 8 + currentArgOffset >= 256 ) { CodeError( "currentArgOffset >= 256" ); return; } EmitByte( &segment[CODESEG], 8 + currentArgOffset ); currentArgOffset += 4; return; } // ret just leaves something on the op stack if ( !strncmp( token, "RET", 3 ) ) { EmitByte( &segment[CODESEG], OP_LEAVE ); instructionCount++; EmitInt( &segment[CODESEG], 8 + currentLocals + currentArgs ); return; } // pop is needed to discard the return value of // a function if ( !strncmp( token, "pop", 3 ) ) { EmitByte( &segment[CODESEG], OP_POP ); instructionCount++; return; } // address of a parameter is converted to OP_LOCAL if ( !strncmp( token, "ADDRF", 5 ) ) { instructionCount++; Parse(); v = ParseExpression(); v = 16 + currentArgs + currentLocals + v; EmitByte( &segment[CODESEG], OP_LOCAL ); EmitInt( &segment[CODESEG], v ); return; } // address of a local is converted to OP_LOCAL if ( !strncmp( token, "ADDRL", 5 ) ) { instructionCount++; Parse(); v = ParseExpression(); v = 8 + currentArgs + v; EmitByte( &segment[CODESEG], OP_LOCAL ); EmitInt( &segment[CODESEG], v ); return; } if ( !strcmp( token, "proc" ) ) { char name[1024]; Parse(); // function name strcpy( name, token ); DefineSymbol( token, instructionCount ); // segment[CODESEG].imageUsed ); currentLocals = ParseValue(); // locals currentLocals = ( currentLocals + 3 ) & ~3; currentArgs = ParseValue(); // arg marshalling currentArgs = ( currentArgs + 3 ) & ~3; if ( 8 + currentLocals + currentArgs >= 32767 ) { CodeError( "Locals > 32k in %s\n", name ); } instructionCount++; EmitByte( &segment[CODESEG], OP_ENTER ); EmitInt( &segment[CODESEG], 8 + currentLocals + currentArgs ); return; } if ( !strcmp( token, "endproc" ) ) { Parse(); // skip the function name v = ParseValue(); // locals v2 = ParseValue(); // arg marshalling // all functions must leave something on the opstack instructionCount++; EmitByte( &segment[CODESEG], OP_PUSH ); instructionCount++; EmitByte( &segment[CODESEG], OP_LEAVE ); EmitInt( &segment[CODESEG], 8 + currentLocals + currentArgs ); return; } if ( !strcmp( token, "address" ) ) { Parse(); v = ParseExpression(); HackToSegment( DATASEG ); EmitInt( currentSegment, v ); return; } if ( !strcmp( token, "export" ) ) { return; } if ( !strcmp( token, "import" ) ) { return; } if ( !strcmp( token, "code" ) ) { currentSegment = &segment[CODESEG]; return; } if ( !strcmp( token, "bss" ) ) { currentSegment = &segment[BSSSEG]; return; } if ( !strcmp( token, "data" ) ) { currentSegment = &segment[DATASEG]; return; } if ( !strcmp( token, "lit" ) ) { currentSegment = &segment[LITSEG]; return; } if ( !strcmp( token, "line" ) ) { return; } if ( !strcmp( token, "file" ) ) { return; } if ( !strcmp( token, "equ" ) ) { char name[1024]; Parse(); strcpy( name, token ); Parse(); DefineSymbol( name, atoi(token) ); return; } if ( !strcmp( token, "align" ) ) { v = ParseValue(); currentSegment->imageUsed = (currentSegment->imageUsed + v - 1 ) & ~( v - 1 ); return; } if ( !strcmp( token, "skip" ) ) { v = ParseValue(); currentSegment->imageUsed += v; return; } if ( !strcmp( token, "byte" ) ) { v = ParseValue(); v2 = ParseValue(); if ( v == 1 ) { HackToSegment( LITSEG ); } else if ( v == 4 ) { HackToSegment( DATASEG ); } else if ( v == 2 ) { CodeError( "16 bit initialized data not supported" ); } // emit little endien for ( i = 0 ; i < v ; i++ ) { EmitByte( currentSegment, v2 ); v2 >>= 8; } return; } // code labels are emited as instruction counts, not byte offsets, // because the physical size of the code will change with // different run time compilers and we want to minimize the // size of the required translation table if ( !strncmp( token, "LABEL", 5 ) ) { Parse(); if ( currentSegment == &segment[CODESEG] ) { DefineSymbol( token, instructionCount ); } else { DefineSymbol( token, currentSegment->imageUsed ); } return; } #else /* Q3ASM_TURBO */ /* Opcode search using hash table. Since the opcodes stays mostly fixed, this may benefit even more from a tree. Always with the tree :) -PH */ for (hc = hashtable_get(optable, hash); hc; hc = hc->next) { op = (sourceOps_t*)(hc->data); i = op - sourceOps; if ((hash == opcodesHash[i]) && (!strcmp(token, op->name))) { int opcode; int expression; if ( op->opcode == OP_UNDEF ) { CodeError( "Undefined opcode: %s\n", token ); } if ( op->opcode == OP_IGNORE ) { return; // we ignore most conversions } // sign extensions need to check next parm opcode = op->opcode; if ( opcode == OP_SEX8 ) { Parse(); if ( token[0] == '1' ) { opcode = OP_SEX8; } else if ( token[0] == '2' ) { opcode = OP_SEX16; } else { CodeError( "Bad sign extension: %s\n", token ); return; } } // check for expression Parse(); if ( token[0] && op->opcode != OP_CVIF && op->opcode != OP_CVFI ) { expression = ParseExpression(); // code like this can generate non-dword block copies: // auto char buf[2] = " "; // we are just going to round up. This might conceivably // be incorrect if other initialized chars follow. if ( opcode == OP_BLOCK_COPY ) { expression = ( expression + 3 ) & ~3; } EmitByte( &segment[CODESEG], opcode ); EmitInt( &segment[CODESEG], expression ); } else { EmitByte( &segment[CODESEG], opcode ); } instructionCount++; return; } } /* This falls through if an assembly opcode is not found. -PH */ /* The following should be sorted in sequence of statistical frequency, most frequent first. -PH */ /* Empirical frequency statistics from FI 2001.01.23: 109892 STAT ADDRL 72188 STAT BYTE 51150 STAT LINE 50906 STAT ARG 43704 STAT IMPORT 34902 STAT LABEL 32066 STAT ADDRF 23704 STAT CALL 7720 STAT POP 7256 STAT RET 5198 STAT ALIGN 3292 STAT EXPORT 2878 STAT PROC 2878 STAT ENDPROC 2812 STAT ADDRESS 738 STAT SKIP 374 STAT EQU 280 STAT CODE 176 STAT LIT 102 STAT FILE 100 STAT BSS 68 STAT DATA -PH */ #undef ASM #define ASM(O) if (TryAssemble##O ()) return; ASM(ADDRL) ASM(BYTE) ASM(LINE) ASM(ARG) ASM(IMPORT) ASM(LABEL) ASM(ADDRF) ASM(CALL) ASM(POP) ASM(RET) ASM(ALIGN) ASM(EXPORT) ASM(PROC) ASM(ENDPROC) ASM(ADDRESS) ASM(SKIP) ASM(EQU) ASM(CODE) ASM(LIT) ASM(FILE) ASM(BSS) ASM(DATA) #endif /* Q3ASM_TURBO */ CodeError( "Unknown token: %s\n", token ); } /* ============== InitTables ============== */ void InitTables( void ) { #ifndef Q3ASM_TURBO int i; for ( i = 0 ; i < NUM_SOURCE_OPS ; i++ ) { opcodesHash[i] = HashString( sourceOps[i].name ); } #else /* Q3ASM_TURBO */ int i; symtable = hashtable_new(symtablelen); optable = hashtable_new(100); /* There's hardly 100 opcodes anyway. */ for ( i = 0 ; i < NUM_SOURCE_OPS ; i++ ) { opcodesHash[i] = HashString( sourceOps[i].name ); hashtable_add(optable, opcodesHash[i], sourceOps + i); } #endif /* Q3ASM_TURBO */ } /* ============== WriteMapFile ============== */ void WriteMapFile( void ) { FILE *f; symbol_t *s; char imageName[MAX_OS_PATH]; int seg; strcpy( imageName, outputFilename ); StripExtension( imageName ); strcat( imageName, ".map" ); report( "Writing %s...\n", imageName ); f = SafeOpenWrite( imageName ); for ( seg = CODESEG ; seg <= BSSSEG ; seg++ ) { for ( s = symbols ; s ; s = s->next ) { if ( s->name[0] == '$' ) { continue; // skip locals } if ( &segment[seg] != s->segment ) { continue; } fprintf( f, "%i %8x %s\n", seg, s->value, s->name ); } } fclose( f ); } /* =============== WriteVmFile =============== */ void WriteVmFile( void ) { char imageName[MAX_OS_PATH]; vmHeader_t header; FILE *f; report( "%i total errors\n", errorCount ); strcpy( imageName, outputFilename ); StripExtension( imageName ); strcat( imageName, ".qvm" ); remove( imageName ); report( "code segment: %7i\n", segment[CODESEG].imageUsed ); report( "data segment: %7i\n", segment[DATASEG].imageUsed ); report( "lit segment: %7i\n", segment[LITSEG].imageUsed ); report( "bss segment: %7i\n", segment[BSSSEG].imageUsed ); report( "instruction count: %i\n", instructionCount ); if ( errorCount != 0 ) { report( "Not writing a file due to errors\n" ); return; } header.vmMagic = VM_MAGIC; header.instructionCount = instructionCount; header.codeOffset = sizeof( header ); header.codeLength = segment[CODESEG].imageUsed; header.dataOffset = header.codeOffset + segment[CODESEG].imageUsed; header.dataLength = segment[DATASEG].imageUsed; header.litLength = segment[LITSEG].imageUsed; header.bssLength = segment[BSSSEG].imageUsed; report( "Writing to %s\n", imageName ); CreatePath( imageName ); f = SafeOpenWrite( imageName ); SafeWrite( f, &header, sizeof( header ) ); SafeWrite( f, &segment[CODESEG].image, segment[CODESEG].imageUsed ); SafeWrite( f, &segment[DATASEG].image, segment[DATASEG].imageUsed ); SafeWrite( f, &segment[LITSEG].image, segment[LITSEG].imageUsed ); fclose( f ); } /* =============== Assemble =============== */ void Assemble( void ) { int i; char filename[MAX_OS_PATH]; char *ptr; report( "outputFilename: %s\n", outputFilename ); for ( i = 0 ; i < numAsmFiles ; i++ ) { strcpy( filename, asmFileNames[ i ] ); DefaultExtension( filename, ".asm" ); LoadFile( filename, (void **)&asmFiles[i] ); } // assemble for ( passNumber = 0 ; passNumber < 2 ; passNumber++ ) { segment[LITSEG].segmentBase = segment[DATASEG].imageUsed; segment[BSSSEG].segmentBase = segment[LITSEG].segmentBase + segment[LITSEG].imageUsed; for ( i = 0 ; i < NUM_SEGMENTS ; i++ ) { segment[i].imageUsed = 0; } segment[DATASEG].imageUsed = 4; // skip the 0 byte, so NULL pointers are fixed up properly instructionCount = 0; for ( i = 0 ; i < numAsmFiles ; i++ ) { currentFileIndex = i; currentFileName = asmFileNames[ i ]; currentFileLine = 0; report("pass %i: %s\n", passNumber, currentFileName ); fflush( NULL ); ptr = asmFiles[i]; while ( ptr ) { ptr = ExtractLine( ptr ); AssembleLine(); } } // align all segment for ( i = 0 ; i < NUM_SEGMENTS ; i++ ) { segment[i].imageUsed = (segment[i].imageUsed + 3) & ~3; } #ifdef Q3ASM_TURBO if (passNumber == 0) { sort_symbols(); } #endif /* Q3ASM_TURBO */ } // reserve the stack in bss DefineSymbol( "_stackStart", segment[BSSSEG].imageUsed ); segment[BSSSEG].imageUsed += stackSize; DefineSymbol( "_stackEnd", segment[BSSSEG].imageUsed ); // write the image WriteVmFile(); // write the map file even if there were errors if( optionWriteMapFile ) { WriteMapFile(); } } /* ============= ParseOptionFile ============= */ void ParseOptionFile( const char *filename ) { char expanded[MAX_OS_PATH]; char *text, *text_p; strcpy( expanded, filename ); DefaultExtension( expanded, ".q3asm" ); LoadFile( expanded, (void **)&text ); if ( !text ) { return; } text_p = text; while( ( text_p = COM_Parse( text_p ) ) != 0 ) { if ( !strcmp( com_token, "-o" ) ) { // allow output override in option file text_p = COM_Parse( text_p ); if ( text_p ) { strcpy( outputFilename, com_token ); } continue; } asmFileNames[ numAsmFiles ] = copystring( com_token ); numAsmFiles++; } } /* ============== main ============== */ int main( int argc, char **argv ) { int i; double start, end; // _chdir( "/quake3/jccode/cgame/lccout" ); // hack for vc profiler if ( argc < 2 ) { #ifndef Q3ASM_TURBO Error( "usage: q3asm [-o output] or q3asm -f \n" ); #else /* Q3ASM_TURBO */ Error("Usage: %s [OPTION]... [FILES]...\n\ Assemble LCC bytecode assembly to Q3VM bytecode.\n\ \n\ -o OUTPUT Write assembled output to file OUTPUT.qvm\n\ -f LISTFILE Read options and list of files to assemble from LISTFILE\n\ -b BUCKETS Set symbol hash table to BUCKETS buckets\n\ -v Verbose compilation report\n\ ", argv[0]); #endif /* Q3ASM_TURBO */ } start = I_FloatTime (); #ifndef Q3ASM_TURBO InitTables(); #endif /* !Q3ASM_TURBO */ // default filename is "q3asm" strcpy( outputFilename, "q3asm" ); numAsmFiles = 0; for ( i = 1 ; i < argc ; i++ ) { if ( argv[i][0] != '-' ) { break; } if ( !strcmp( argv[i], "-o" ) ) { if ( i == argc - 1 ) { Error( "-o must preceed a filename" ); } /* Timbo of Tremulous pointed out -o not working; stock ID q3asm folded in the change. Yay. */ strcpy( outputFilename, argv[ i+1 ] ); i++; continue; } if ( !strcmp( argv[i], "-f" ) ) { if ( i == argc - 1 ) { Error( "-f must preceed a filename" ); } ParseOptionFile( argv[ i+1 ] ); i++; continue; } #ifdef Q3ASM_TURBO if (!strcmp(argv[i], "-b")) { if (i == argc - 1) { Error("-b requires an argument"); } i++; symtablelen = atoi(argv[i]); continue; } #endif /* Q3ASM_TURBO */ if( !strcmp( argv[ i ], "-v" ) ) { /* Verbosity option added by Timbo, 2002.09.14. By default (no -v option), q3asm remains silent except for critical errors. Verbosity turns on all messages, error or not. Motivation: not wanting to scrollback for pages to find asm error. */ optionVerbose = qtrue; continue; } if( !strcmp( argv[ i ], "-m" ) ) { optionWriteMapFile = qtrue; continue; } Error( "Unknown option: %s", argv[i] ); } // the rest of the command line args are asm files for ( ; i < argc ; i++ ) { asmFileNames[ numAsmFiles ] = copystring( argv[ i ] ); numAsmFiles++; } #ifdef Q3ASM_TURBO InitTables(); #endif /* Q3ASM_TURBO */ Assemble(); #ifdef Q3ASM_TURBO { symbol_t *s; for ( i = 0, s = symbols ; s ; s = s->next, i++ ) /* nop */ ; if (optionVerbose) { report("%d symbols defined\n", i); hashtable_stats(symtable); hashtable_stats(optable); } } #endif /* Q3ASM_TURBO */ end = I_FloatTime (); report ("%5.0f seconds elapsed\n", end-start); return errorCount; }