From 163e8dc4db61a3daf608edf693849f99e8f2f7ba Mon Sep 17 00:00:00 2001 From: ludwig Date: Thu, 27 Oct 2005 21:13:47 +0000 Subject: add x86_64 vm. experimental, not enabled by default. you need as for it to work. git-svn-id: svn://svn.icculus.org/quake3/trunk@188 edf5b092-35ff-0310-97b2-ce42778d08ea --- code/game/bg_lib.c | 2 + code/game/bg_lib.h | 7 + code/qcommon/cvar.c | 2 +- code/qcommon/qcommon.h | 4 +- code/qcommon/vm.c | 19 +- code/qcommon/vm_x86_64.c | 1264 ++++++++++++++++++++++++++++++++++++++++++++++ code/unix/Makefile | 13 +- code/unix/unix_main.c | 1 - 8 files changed, 1303 insertions(+), 9 deletions(-) create mode 100644 code/qcommon/vm_x86_64.c (limited to 'code') diff --git a/code/game/bg_lib.c b/code/game/bg_lib.c index b198f72..e313087 100644 --- a/code/game/bg_lib.c +++ b/code/game/bg_lib.c @@ -38,6 +38,8 @@ * SUCH DAMAGE. */ +#include "bg_lib.h" + #if defined(LIBC_SCCS) && !defined(lint) #if 0 static char sccsid[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93"; diff --git a/code/game/bg_lib.h b/code/game/bg_lib.h index d6deb60..70603d2 100644 --- a/code/game/bg_lib.h +++ b/code/game/bg_lib.h @@ -23,6 +23,12 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA // compiled for the virtual machine // This file is NOT included on native builds +#ifndef BG_LIB_H +#define BG_LIB_H + +#ifndef NULL +#define NULL ((void *)0) +#endif typedef int size_t; @@ -89,3 +95,4 @@ int abs( int n ); double fabs( double x ); double acos( double x ); +#endif // BG_LIB_H diff --git a/code/qcommon/cvar.c b/code/qcommon/cvar.c index f954f0d..d1fad9a 100644 --- a/code/qcommon/cvar.c +++ b/code/qcommon/cvar.c @@ -286,7 +286,7 @@ Cvar_Set2 cvar_t *Cvar_Set2( const char *var_name, const char *value, qboolean force ) { cvar_t *var; - Com_DPrintf( "Cvar_Set2: %s %s\n", var_name, value ); +// Com_DPrintf( "Cvar_Set2: %s %s\n", var_name, value ); if ( !Cvar_ValidateString( var_name ) ) { Com_Printf("invalid cvar name string: %s\n", var_name ); diff --git a/code/qcommon/qcommon.h b/code/qcommon/qcommon.h index a99bfcf..2530940 100644 --- a/code/qcommon/qcommon.h +++ b/code/qcommon/qcommon.h @@ -547,6 +547,8 @@ void FS_FreeFileList( char **list ); qboolean FS_FileExists( const char *file ); +char *FS_BuildOSPath( const char *base, const char *game, const char *qpath ); + int FS_LoadStack( void ); int FS_GetFileList( const char *path, const char *extension, char *listbuf, int bufsize ); @@ -603,7 +605,7 @@ int FS_FTell( fileHandle_t f ); void FS_Flush( fileHandle_t f ); -void QDECL FS_Printf( fileHandle_t f, const char *fmt, ... ); +void QDECL FS_Printf( fileHandle_t f, const char *fmt, ... ) __attribute__ ((format (printf, 2, 3))); // like fprintf int FS_FOpenFileByMode( const char *qpath, fileHandle_t *f, fsMode_t mode ); diff --git a/code/qcommon/vm.c b/code/qcommon/vm.c index 8ebe73d..a20e69f 100644 --- a/code/qcommon/vm.c +++ b/code/qcommon/vm.c @@ -745,12 +745,15 @@ long QDECL VM_Call( vm_t *vm, long callnum, ... ) { args[4], args[5], args[6], args[7], args[8], args[9], args[10], args[11], args[12], args[13], args[14], args[15]); + } else { +#ifdef __i386__ // i386 calling convention doesn't need conversion #if defined(HAVE_VM_COMPILED) - } else if ( vm->compiled ) { - // only used on 32bit machines so this cast is fine - r = VM_CallCompiled( vm, (int*)&callnum ); + if ( vm->compiled ) + r = VM_CallCompiled( vm, (int*)callnum ); + else #endif - } else { + r = VM_CallInterpreted( vm, (int*)callnum ); +#else struct { int callnum; int args[16]; @@ -763,7 +766,13 @@ long QDECL VM_Call( vm_t *vm, long callnum, ... ) { a.args[i] = va_arg(ap, long); } va_end(ap); - r = VM_CallInterpreted( vm, &a.callnum ); +#if defined(HAVE_VM_COMPILED) + if ( vm->compiled ) + r = VM_CallCompiled( vm, &a.callnum ); + else +#endif + r = VM_CallInterpreted( vm, &a.callnum ); +#endif } if ( oldVM != NULL ) // bk001220 - assert(currentVM!=NULL) for oldVM==NULL diff --git a/code/qcommon/vm_x86_64.c b/code/qcommon/vm_x86_64.c new file mode 100644 index 0000000..e8192a5 --- /dev/null +++ b/code/qcommon/vm_x86_64.c @@ -0,0 +1,1264 @@ +/* +=========================================================================== +Copyright (C) 1999-2005 Id Software, Inc. +Copyright (C) 2005 Ludwig Nussel + +This file is part of Quake III Arena source code. + +Quake III Arena source code is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Quake III Arena source code is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Foobar; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +=========================================================================== +*/ +// vm_x86_64.c -- load time compiler and execution environment for x86-64 + +#include "vm_local.h" + +#include +#include +#include +#include +#include +#include +#include + +#ifdef VM_X86_64_STANDALONE +#include +#include +#include + +static vmInterpret_t interpret = VMI_COMPILED; + +#define DEBUG_VM + +static FILE* _asout; +vm_t* currentVM; + +static cvar_t _com_developer; +cvar_t *com_developer = &_com_developer; + +char* mmapfile(const char* fn, size_t* size); + +long printsyscall(long* args) +{ + printf("callnr %ld, args: %ld %ld %ld %ld\n", + args[0], + args[1], + args[2], + args[3], + args[4]); + + switch( args[0] ) { + case 1: + fputs((VMA(1)?VMA(1):"(NULL)\n"), stderr); + return 0; + case 999: + { + int a[5]; + a[0] = 3; + a[1] = args[1]; + a[2] = args[2]; + a[3] = args[3]; + a[4] = args[4]; + if(!currentVM->compiled) + return VM_CallInterpreted(currentVM, a); + else + return VM_CallCompiled(currentVM, a); + } + case 1000: + { + int a[5]; + a[0] = 4; + a[1] = args[1]; + a[2] = args[2]; + a[3] = args[3]; + a[4] = args[4]; + if(!currentVM->compiled) + return VM_CallInterpreted(currentVM, a); + else + return VM_CallCompiled(currentVM, a); + } + case 1001: + printf("got buffer with content '%s', length %d\n", (char*)VMA(1), (int)args[2]); + strncpy(VMA(1), "blah\n", args[2]); + return 0; + } + + return 0x66; +} + +fileHandle_t FS_FOpenFileWrite( const char *filename ) +{ + _asout = fopen(filename, "w"); + return 0; +} + +int FS_Write( const void *buffer, int len, fileHandle_t h ) +{ + return fwrite(buffer, 1, len, _asout); +} + +void FS_Printf( fileHandle_t h, const char *fmt, ... ) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(_asout, fmt, ap); + va_end(ap); +} + +void FS_Flush( fileHandle_t f ) +{ + fflush(_asout); +} + +void FS_FCloseFile( fileHandle_t f ) +{ + fclose(_asout); +} + +char *FS_BuildOSPath( const char *base, const char *game, const char *qpath ) +{ + static char buf[4096]; + strcpy(buf, "./"); + strcat(buf, qpath); + return buf; +} + +cvar_t *Cvar_Get( const char *var_name, const char *var_value, int flags ) { + static cvar_t c = { .string = "" }; + return &c; +} +#endif // VM_X86_64_STANDALONE + +#ifdef DEBUG_VM +#define Dfprintf(fd, args...) fprintf(fd, ##args) +static FILE* qdasmout; +#else +#define Dfprintf(args...) +#endif + +/* + + eax scratch + ebx scratch + ecx scratch (required for shifts) + edx scratch (required for divisions) + rsi stack pointer + rdi program frame pointer + r8 pointer to begin of real stack memory + r9 return address to real program + r10 start of generated code +*/ + + +static long callAsmCall(long callProgramStack, long callSyscallNum) +{ + vm_t *savedVM; + long ret = 0x77; + long args[11]; + int iargs[11]; + int i; + +// Dfprintf(stderr, "callAsmCall(%ld, %ld)\n", callProgramStack, callSyscallNum); +// Com_Printf("-> callAsmCall %s, level %d, num %ld\n", currentVM->name, currentVM->callLevel, callSyscallNum); + + savedVM = currentVM; + + // save the stack to allow recursive VM entry + currentVM->programStack = callProgramStack - 4; + + args[0] = callSyscallNum; + iargs[0] = callSyscallNum; + for(i = 0; i < 10; ++i) + { + iargs[i+1] = *(int *)((byte *)currentVM->dataBase + callProgramStack + 8 + 4*i); + args[i+1] = *(int *)((byte *)currentVM->dataBase + callProgramStack + 8 + 4*i); + } + ret = currentVM->systemCall(args); + + currentVM = savedVM; +// Com_Printf("<- callAsmCall %s, level %d, num %ld\n", currentVM->name, currentVM->callLevel, callSyscallNum); + + return ret; +} + +#ifdef DEBUG_VM // bk001204 +static char *opnames[256] = { + "OP_UNDEF", + + "OP_IGNORE", + + "OP_BREAK", + + "OP_ENTER", + "OP_LEAVE", + "OP_CALL", + "OP_PUSH", + "OP_POP", + + "OP_CONST", + + "OP_LOCAL", + + "OP_JUMP", + + //------------------- + + "OP_EQ", + "OP_NE", + + "OP_LTI", + "OP_LEI", + "OP_GTI", + "OP_GEI", + + "OP_LTU", + "OP_LEU", + "OP_GTU", + "OP_GEU", + + "OP_EQF", + "OP_NEF", + + "OP_LTF", + "OP_LEF", + "OP_GTF", + "OP_GEF", + + //------------------- + + "OP_LOAD1", + "OP_LOAD2", + "OP_LOAD4", + "OP_STORE1", + "OP_STORE2", + "OP_STORE4", + "OP_ARG", + + "OP_BLOCK_COPY", + + //------------------- + + "OP_SEX8", + "OP_SEX16", + + "OP_NEGI", + "OP_ADD", + "OP_SUB", + "OP_DIVI", + "OP_DIVU", + "OP_MODI", + "OP_MODU", + "OP_MULI", + "OP_MULU", + + "OP_BAND", + "OP_BOR", + "OP_BXOR", + "OP_BCOM", + + "OP_LSH", + "OP_RSHI", + "OP_RSHU", + + "OP_NEGF", + "OP_ADDF", + "OP_SUBF", + "OP_DIVF", + "OP_MULF", + + "OP_CVIF", + "OP_CVFI" +}; +#endif // DEBUG_VM + +static unsigned char op_argsize[256] = +{ + [OP_ENTER] = 4, + [OP_LEAVE] = 4, + [OP_CONST] = 4, + [OP_LOCAL] = 4, + [OP_EQ] = 4, + [OP_NE] = 4, + [OP_LTI] = 4, + [OP_LEI] = 4, + [OP_GTI] = 4, + [OP_GEI] = 4, + [OP_LTU] = 4, + [OP_LEU] = 4, + [OP_GTU] = 4, + [OP_GEU] = 4, + [OP_EQF] = 4, + [OP_NEF] = 4, + [OP_LTF] = 4, + [OP_LEF] = 4, + [OP_GTF] = 4, + [OP_GEF] = 4, + [OP_ARG] = 1, + [OP_BLOCK_COPY] = 4, +}; + +#define emit(x...) \ + do { FS_Printf(fh_s, ##x); FS_Write("\n", 1, fh_s); } while(0) + +// integer compare and jump +#define IJ(op) \ + emit("subq $8, %%rsi"); \ + emit("movl 4(%%rsi), %%eax"); \ + emit("cmpl 8(%%rsi), %%eax"); \ + emit(op " i_%08x", instruction+1); \ + emit("jmp i_%08x", iarg); + +#ifdef USE_X87 +#define FJ(bits, op) \ + emit("subq $8, %%rsi");\ + emit("flds 4(%%rsi)");\ + emit("fcomps 8(%%rsi)");\ + emit("fnstsw %%ax");\ + emit("testb $" #bits ", %%ah");\ + emit(op " i_%08x", instruction+1);\ + emit("jmp i_%08x", iarg); +#define XJ(x) +#else +#define FJ(x, y) +#define XJ(op) \ + emit("subq $8, %%rsi");\ + emit("movss 4(%%rsi), %%xmm0");\ + emit("ucomiss 8(%%rsi), %%xmm0");\ + emit("jp i_%08x", instruction+1);\ + emit(op " i_%08x", instruction+1);\ + emit("jmp i_%08x", iarg); +#endif + +#define SIMPLE(op) \ + emit("subq $4, %%rsi"); \ + emit("movl 4(%%rsi), %%eax"); \ + emit(op " %%eax, 0(%%rsi)"); + +#ifdef USE_X87 +#define FSIMPLE(op) \ + emit("subq $4, %%rsi"); \ + emit("flds 0(%%rsi)"); \ + emit(op " 4(%%rsi)"); \ + emit("fstps 0(%%rsi)"); +#define XSIMPLE(op) +#else +#define FSIMPLE(op) +#define XSIMPLE(op) \ + emit("subq $4, %%rsi"); \ + emit("movss 0(%%rsi), %%xmm0"); \ + emit(op " 4(%%rsi), %%xmm0"); \ + emit("movss %%xmm0, 0(%%rsi)"); +#endif + +#define SHIFT(op) \ + emit("subq $4, %%rsi"); \ + emit("movl 4(%%rsi), %%ecx"); \ + emit("movl 0(%%rsi), %%eax"); \ + emit(op " %%cl, %%eax"); \ + emit("movl %%eax, 0(%%rsi)"); + +#if 1 +#define RANGECHECK(reg) \ + emit("andl $0x%x, %%" #reg, vm->dataMask); +#else +#define RANGECHECK(reg) +#endif + +#ifdef DEBUG_VM +#define NOTIMPL(x) \ + do { Com_Error(ERR_DROP, "instruction not implemented: %s\n", opnames[x]); } while(0) +#else +#define NOTIMPL(x) \ + do { Com_Error(ERR_DROP, "instruction not implemented: %x\n", x); } while(0) +#endif + +static void* getentrypoint(vm_t* vm) +{ + return vm->codeBase+64; // skip ELF header +} + +char* mmapfile(const char* fn, size_t* size) +{ + int fd = -1; + char* mem = NULL; + struct stat stb; + + fd = open(fn, O_RDONLY); + if(fd == -1) + goto out; + + if(fstat(fd, &stb) == -1) + goto out; + + *size = stb.st_size; + + mem = mmap(NULL, stb.st_size, PROT_READ|PROT_EXEC, MAP_SHARED, fd, 0); + if(mem == (void*)-1) + mem = NULL; + +out: + if(fd != -1) + close(fd); + + return mem; +} + +static int doas(const char* in, const char* out, unsigned char** compiledcode) +{ + char rin[4096]; + char rout[4096]; + char* buf; + char* mem; + cvar_t* homedir; + size_t size = -1, allocsize; + int ps; + pid_t pid; + + homedir = Cvar_Get("fs_homepath", "", 0); + + buf = FS_BuildOSPath(homedir->string, NULL, in); + strcpy(rin, buf); + + buf = FS_BuildOSPath(homedir->string, NULL, out); + strcpy(rout, buf); + + Com_Printf("running assembler\n"); + pid = fork(); + if(pid == -1) + Com_Error(ERR_FATAL, "can't fork\n"); + + if(!pid) + { + char* const argv[] = { + "as", + "-o", + rout, + rin, + NULL + }; + + execvp(argv[0], argv); + _exit(-1); + } + else + { + int status; + if(waitpid(pid, &status, 0) == -1) + Com_Error(ERR_FATAL, "can't wait for as: %s\n", strerror(errno)); + + if(!WIFEXITED(status)) + Com_Error(ERR_FATAL, "as died\n"); + if(WEXITSTATUS(status)) + Com_Error(ERR_FATAL, "as failed with status %d\n", WEXITSTATUS(status)); + } + + Com_Printf("done\n"); + + mem = mmapfile(rout, &size); + if(!mem) + Com_Error(ERR_FATAL, "can't mmap object file %s: %s\n", rout, strerror(errno)); + + ps = sysconf(_SC_PAGE_SIZE); + if(ps == -1) + Com_Error(ERR_FATAL, "can't determine page size: %s\n", strerror(errno)); + + --ps; + + allocsize = (size+ps)&~ps; + buf = Hunk_Alloc(allocsize, h_high); + + buf = (void*)(((unsigned long)buf+ps)&~ps); + + memcpy(buf, mem, size); + + munmap(mem, 0); + + if((*compiledcode = (unsigned char*)buf)) + { +#ifdef VM_X86_64_STANDALONE // no idea why + if(mprotect(buf, allocsize, PROT_READ|PROT_EXEC) == -1) + Com_Error(ERR_FATAL, "mprotect failed on %p+%x: %s\n", buf, allocsize, strerror(errno)); +#endif + return size; + } + + return -1; +} + +/* +================= +VM_Compile +================= +*/ +void VM_Compile( vm_t *vm, vmHeader_t *header ) { + unsigned char op; + int pc; + unsigned instruction; + char* code; + unsigned iarg = 0; + unsigned char barg = 0; + void* entryPoint; + + char fn_s[MAX_QPATH]; // output file for assembler code + char fn_o[MAX_QPATH]; // file written by as +#ifdef DEBUG_VM + char fn_d[MAX_QPATH]; // disassembled +#endif + fileHandle_t fh_s; + byte* compiledcode; + int compiledsize; + + Com_Printf("compiling %s\n", vm->name); + + strcpy(fn_s,vm->name); + strcpy(fn_o,vm->name); + strcat(fn_s, ".s"); + strcat(fn_o, ".o"); +#ifdef DEBUG_VM + strcpy(fn_d,vm->name); + strcat(fn_d, ".qdasm"); + + qdasmout = fopen(fn_d, "w"); +#endif + + fh_s = FS_FOpenFileWrite(fn_s); + if(fh_s == -1) + Com_Error(ERR_DROP, "can't write %s\n", fn_s); + + // translate all instructions + pc = 0; + code = (char *)header + header->codeOffset; + + emit("start:"); + emit("or %%r8, %%r8"); // check whether to set up instruction pointers + emit("jnz main"); + emit("jmp setupinstructionpointers"); + emit("exit:"); + emit("jmp *%%r9"); + + emit("main:"); + + for ( instruction = 0; instruction < header->instructionCount; ++instruction ) + { + op = code[ pc ]; + ++pc; + + vm->instructionPointers[instruction] = pc; + +#if 0 + emit("nop"); + emit("movq $%d, %%r15", instruction); + emit("nop"); +#endif + + if(op_argsize[op] == 4) + { + iarg = *(int*)(code+pc); + pc += 4; + Dfprintf(qdasmout, "%s %8u\n", opnames[op], iarg); + } + else if(op_argsize[op] == 1) + { + barg = code[pc++]; + Dfprintf(qdasmout, "%s %8hhu\n", opnames[op], barg); + } + else + { + Dfprintf(qdasmout, "%s\n", opnames[op]); + } + emit("i_%08x:", instruction); + switch ( op ) + { + case OP_UNDEF: + NOTIMPL(op); + break; + case OP_IGNORE: + emit("nop"); + break; + case OP_BREAK: + emit("int3"); + break; + case OP_ENTER: + emit("subl $%d, %%edi", iarg); + RANGECHECK(edi); + break; + case OP_LEAVE: + emit("addl $%d, %%edi", iarg); // get rid of stack frame + RANGECHECK(edi); + emit("movl 0(%%r8, %%rdi, 1), %%eax"); // get return address + emit("movq $%lu, %%rbx", (unsigned long)vm->instructionPointers); + emit("cmp $-1, %%eax"); + emit("je jumptoexit%d", instruction); + emit("movl (%%rbx, %%rax, 4), %%eax"); // load new relative jump address + emit("addq %%r10, %%rax"); + emit("jmp *%%rax"); + emit("jumptoexit%d:", instruction); + emit("jmp exit"); + break; + case OP_CALL: + emit("movl 0(%%rsi), %%eax"); // get instr from stack + emit("subq $4, %%rsi"); + emit("movl $%d, 0(%%r8, %%rdi, 1)", instruction+1); // save next instruction + emit("orl %%eax, %%eax"); + emit("jl callSyscall%d", instruction); + emit("movq $%lu, %%rbx", (unsigned long)vm->instructionPointers); + emit("movl (%%rbx, %%rax, 4), %%eax"); // load new relative jump address + emit("addq %%r10, %%rax"); + emit("jmp *%%rax"); + emit("callSyscall%d:", instruction); +// emit("fnsave 4(%%rsi)"); + emit("push %%rsi"); + emit("push %%rdi"); + emit("push %%r8"); + emit("push %%r9"); + emit("push %%r10"); + emit("negl %%eax"); // convert to actual number + emit("decl %%eax"); + // first argument already in rdi + emit("movq %%rax, %%rsi"); // second argument in rsi + emit("movq $%ld, %%rax", (unsigned long)callAsmCall); + emit("callq *%%rax"); + emit("pop %%r10"); + emit("pop %%r9"); + emit("pop %%r8"); + emit("pop %%rdi"); + emit("pop %%rsi"); +// emit("frstor 4(%%rsi)"); + emit("addq $4, %%rsi"); + emit("movl %%eax, (%%rsi)"); + break; + case OP_PUSH: + emit("addq $4, %%rsi"); + break; + case OP_POP: + emit("subq $4, %%rsi"); + break; + case OP_CONST: + emit("addq $4, %%rsi"); + emit("movl $%d, 0(%%rsi)", iarg); + break; + case OP_LOCAL: + emit("movl %%edi, %%ebx"); + emit("addl $%d,%%ebx", iarg); + emit("addq $4, %%rsi"); + emit("movl %%ebx, 0(%%rsi)"); + break; + case OP_JUMP: + emit("movl 0(%%rsi), %%eax"); // get instr from stack + emit("subq $4, %%rsi"); + emit("movq $%lu, %%rbx", (unsigned long)vm->instructionPointers); + emit("movl (%%rbx, %%rax, 4), %%eax"); // load new relative jump address + emit("addq %%r10, %%rax"); + emit("jmp *%%rax"); + break; + case OP_EQ: + IJ("jne"); + break; + case OP_NE: + IJ("je"); + break; + case OP_LTI: + IJ("jnl"); + break; + case OP_LEI: + IJ("jnle"); + break; + case OP_GTI: + IJ("jng"); + break; + case OP_GEI: + IJ("jnge"); + break; + case OP_LTU: + IJ("jnb"); + break; + case OP_LEU: + IJ("jnbe"); + break; + case OP_GTU: + IJ("jna"); + break; + case OP_GEU: + IJ("jnae"); + break; + case OP_EQF: + FJ(0x40, "jz"); + XJ("jnz"); + break; + case OP_NEF: + FJ(0x40, "jnz"); +#ifndef USE_X87 + emit("subq $8, %%rsi"); + emit("movss 4(%%rsi), %%xmm0"); + emit("ucomiss 8(%%rsi), %%xmm0"); + emit("jp dojump_i_%08x", instruction); + emit("jz i_%08x", instruction+1); + emit("dojump_i_%08x:", instruction); + emit("jmp i_%08x", iarg); +#endif + break; + case OP_LTF: + FJ(0x01, "jz"); + XJ("jnc"); + break; + case OP_LEF: + FJ(0x41, "jz"); + XJ("ja"); + break; + case OP_GTF: + FJ(0x41, "jnz"); + XJ("jbe"); + break; + case OP_GEF: + FJ(0x01, "jnz"); + XJ("jb"); + break; + case OP_LOAD1: + emit("movl 0(%%rsi), %%eax"); // get pointer from stack + RANGECHECK(eax); + emit("movb 0(%%r8, %%rax, 1), %%al"); // deref into eax + emit("andq $255, %%rax"); + emit("movl %%eax, 0(%%rsi)"); // store on stack + break; + case OP_LOAD2: + emit("movl 0(%%rsi), %%eax"); // get pointer from stack + RANGECHECK(eax); + emit("movw 0(%%r8, %%rax, 1), %%rax"); // deref into eax + emit("movl %%eax, 0(%%rsi)"); // store on stack + break; + case OP_LOAD4: + emit("movl 0(%%rsi), %%eax"); // get pointer from stack + RANGECHECK(eax); + emit("movl 0(%%r8, %%rax, 1), %%eax"); // deref into eax + emit("movl %%eax, 0(%%rsi)"); // store on stack + break; + case OP_STORE1: + emit("movl 0(%%rsi), %%eax"); // get value from stack + emit("andq $255, %%rax"); + emit("movl -4(%%rsi), %%ebx"); // get pointer from stack + RANGECHECK(ebx); + emit("movb %%al, 0(%%r8, %%rbx, 1)"); // store in memory + emit("subq $8, %%rsi"); + break; + case OP_STORE2: + emit("movl 0(%%rsi), %%eax"); // get value from stack + emit("movl -4(%%rsi), %%ebx"); // get pointer from stack + RANGECHECK(ebx); + emit("movw %%rax, 0(%%r8, %%rbx, 1)"); // store in memory + emit("subq $8, %%rsi"); + break; + case OP_STORE4: + emit("movl -4(%%rsi), %%ebx"); // get pointer from stack + RANGECHECK(ebx); + emit("movl 0(%%rsi), %%ecx"); // get value from stack + emit("movl %%ecx, 0(%%r8, %%rbx, 1)"); // store in memory + emit("subq $8, %%rsi"); + break; + case OP_ARG: + emit("subq $4, %%rsi"); + emit("movl 4(%%rsi), %%eax"); // get value from stack + emit("movl $0x%hhx, %%ebx", barg); + emit("addl %%edi, %%ebx"); + RANGECHECK(ebx); + emit("movl %%eax, 0(%%r8,%%rbx, 1)"); // store in args space + break; + case OP_BLOCK_COPY: + if(iarg % 4) Com_Error(ERR_DROP, + "argument to OP_BLOCK_COPY not multiple of 4\n"); + + emit("subq $8, %%rsi"); + emit("movl 8(%%rsi), %%ebx"); // get pointer from stack + + emit("movl %%ebx, %%ecx"); + RANGECHECK(ecx); + emit("cmp %%ebx, %%ecx"); + emit("jne broken%d", instruction); + + emit("movl %%ecx, %%edx"); + emit("addl $%d, %%edx", iarg); + emit("addl $%d, %%ecx", iarg); + RANGECHECK(edx); + emit("cmp %%ecx, %%edx"); + emit("jne broken%d", instruction); + + emit("movl 4(%%rsi), %%eax"); // get pointer from stack + + emit("movl %%eax, %%ecx"); + RANGECHECK(ecx); + emit("cmp %%eax, %%ecx"); + emit("jne broken%d", instruction); + + emit("movl %%ecx, %%edx"); + emit("addl $%d, %%edx", iarg); + emit("addl $%d, %%ecx", iarg); + RANGECHECK(edx); + emit("cmp %%ecx, %%edx"); + emit("jne broken%d", instruction); + + emit("addq %%r8, %%rax"); // calc real address + emit("addq %%r8, %%rbx"); // calc real address + emit("movl $%d, %%ecx", iarg); + emit("shrl $2, %%ecx"); + emit("block_copy_loop_%d:", instruction); + emit("decl %%ecx"); + emit("movl 0(%%rbx, %%rcx, 4), %%edx"); + emit("movl %%edx, 0(%%rax, %%rcx, 4)"); + emit("orl %%ecx, %%ecx"); + emit("jnz block_copy_loop_%d", instruction); + emit("jmp i_%08x", instruction+1); + + emit("broken%d:", instruction); + emit("int3"); + + break; + case OP_SEX8: + emit("movw 0(%%rsi), %%rax"); + emit("andq $255, %%rax"); + emit("cbw"); + emit("cwde"); + emit("movl %%eax, 0(%%rsi)"); + break; + case OP_SEX16: + emit("movw 0(%%rsi), %%rax"); + emit("cwde"); + emit("movl %%eax, 0(%%rsi)"); + break; + case OP_NEGI: + emit("negl 0(%%rsi)"); + break; + case OP_ADD: + SIMPLE("addl"); + break; + case OP_SUB: + SIMPLE("subl"); + break; + case OP_DIVI: + emit("subq $4, %%rsi"); + emit("movl 0(%%rsi), %%eax"); + emit("cdq"); + emit("idivl 4(%%rsi)"); + emit("movl %%eax, 0(%%rsi)"); + break; + case OP_DIVU: + emit("subq $4, %%rsi"); + emit("movl 0(%%rsi), %%eax"); + emit("xorq %%rdx, %%rdx"); + emit("divl 4(%%rsi)"); + emit("movl %%eax, 0(%%rsi)"); + break; + case OP_MODI: + emit("subq $4, %%rsi"); + emit("movl 0(%%rsi), %%eax"); + emit("xorl %%edx, %%edx"); + emit("divl 4(%%rsi)"); + emit("movl %%edx, 0(%%rsi)"); + break; + case OP_MODU: + emit("subq $4, %%rsi"); + emit("movl 0(%%rsi), %%eax"); + emit("xorl %%edx, %%edx"); + emit("idivl 4(%%rsi)"); + emit("movl %%edx, 0(%%rsi)"); + break; + case OP_MULI: + emit("subq $4, %%rsi"); + emit("movl 0(%%rsi), %%eax"); + emit("imull 4(%%rsi)"); + emit("movl %%eax, 0(%%rsi)"); + break; + case OP_MULU: + emit("subq $4, %%rsi"); + emit("movl 0(%%rsi), %%eax"); + emit("mull 4(%%rsi)"); + emit("movl %%eax, 0(%%rsi)"); + break; + case OP_BAND: + SIMPLE("andl"); + break; + case OP_BOR: + SIMPLE("orl"); + break; + case OP_BXOR: + SIMPLE("xorl"); + break; + case OP_BCOM: + emit("notl 0(%%rsi)"); + break; + case OP_LSH: + SHIFT("shl"); + break; + case OP_RSHI: + SHIFT("sarl"); + break; + case OP_RSHU: + SHIFT("shrl"); + break; + case OP_NEGF: +#ifdef USE_X87 + emit("flds 0(%%rsi)"); + emit("fchs"); + emit("fstps 0(%%rsi)"); +#else + emit("movl $0x80000000, %%eax"); + emit("xorl %%eax, 0(%%rsi)"); +#endif + break; + case OP_ADDF: + FSIMPLE("fadds"); + XSIMPLE("addss"); + break; + case OP_SUBF: + FSIMPLE("fsubs"); + XSIMPLE("subss"); + break; + case OP_DIVF: + FSIMPLE("fdivs"); + XSIMPLE("divss"); + break; + case OP_MULF: + FSIMPLE("fmuls"); + XSIMPLE("mulss"); + break; + case OP_CVIF: +#ifdef USE_X87 + emit("filds 0(%%rsi)"); + emit("fstps 0(%%rsi)"); +#else + emit("movl 0(%%rsi), %%eax"); + emit("cvtsi2ss %%eax, %%xmm0"); + emit("movss %%xmm0, 0(%%rsi)"); +#endif + break; + case OP_CVFI: +#ifdef USE_X87 + emit("flds 0(%%rsi)"); + emit("fnstcw 4(%%rsi)"); + emit("movw $0x0F7F, 8(%%rsi)"); // round toward zero + emit("fldcw 8(%%rsi)"); + emit("fistpl 0(%%rsi)"); + emit("fldcw 4(%%rsi)"); +#else + emit("movss 0(%%rsi), %%xmm0"); + emit("cvttss2si %%xmm0, %%eax"); + emit("movl %%eax, 0(%%rsi)"); +#endif + break; + default: + NOTIMPL(op); + break; + } + } + + + emit("setupinstructionpointers:"); + emit("movq $%lu, %%rax", (unsigned long)vm->instructionPointers); + for ( instruction = 0; instruction < header->instructionCount; ++instruction ) + { + emit("movl $i_%08x-start, %d(%%rax)", instruction, instruction*4); + } + emit("jmp exit"); + + emit("debugger:"); + if(1); + { + int i = 6; + while(i--) + { + emit("nop"); + emit("int3"); + } + } + + FS_Flush(fh_s); + FS_FCloseFile(fh_s); + + compiledsize = doas(fn_s, fn_o, &compiledcode); + + vm->codeBase = compiledcode; // remember to skip ELF header! + vm->codeLength = compiledsize; + + entryPoint = getentrypoint(vm); + +// __asm__ __volatile__ ("int3"); + Com_Printf("computing jump table\n"); + + // call code with r8 set to zero to set up instruction pointers + __asm__ __volatile__ ( + " xorq %%r8,%%r8 \r\n" \ + " movq $doneinit,%%r9 \r\n" \ + " movq %0,%%r10 \r\n" \ + " jmp *%%r10 \r\n" \ + "doneinit: \r\n" \ + : + : "m" (entryPoint) + : "%r8", "%r9", "%r10", "%rax" + ); + +#ifdef DEBUG_VM + fflush(qdasmout); +#endif + + Com_Printf( "VM file %s compiled to %i bytes of code (0x%lx - 0x%lx)\n", vm->name, vm->codeLength, vm->codeBase, vm->codeBase+vm->codeLength ); +} + +/* +============== +VM_CallCompiled + +This function is called directly by the generated code +============== +*/ + +#ifdef DEBUG_VM +static char* memData; +#endif + +int VM_CallCompiled( vm_t *vm, int *args ) { + int programCounter; + int programStack; + int stackOnEntry; + byte *image; + void *entryPoint; + void *opStack; + int stack[1024] = { 0xDEADBEEF }; + + currentVM = vm; + + ++vm->callLevel; +// Com_Printf("entering %s level %d, call %d, arg1 = 0x%x\n", vm->name, vm->callLevel, args[0], args[1]); + + // interpret the code + vm->currentlyInterpreting = qtrue; + +// callMask = vm->dataMask; + + // we might be called recursively, so this might not be the very top + programStack = vm->programStack; + stackOnEntry = programStack; + + // set up the stack frame + image = vm->dataBase; +#ifdef DEBUG_VM + memData = (char*)image; +#endif + + programCounter = 0; + + programStack -= 48; + + *(int *)&image[ programStack + 44] = args[9]; + *(int *)&image[ programStack + 40] = args[8]; + *(int *)&image[ programStack + 36] = args[7]; + *(int *)&image[ programStack + 32] = args[6]; + *(int *)&image[ programStack + 28] = args[5]; + *(int *)&image[ programStack + 24] = args[4]; + *(int *)&image[ programStack + 20] = args[3]; + *(int *)&image[ programStack + 16] = args[2]; + *(int *)&image[ programStack + 12] = args[1]; + *(int *)&image[ programStack + 8 ] = args[0]; + *(int *)&image[ programStack + 4 ] = 0x77777777; // return stack + *(int *)&image[ programStack ] = -1; // will terminate the loop on return + + // off we go into generated code... + entryPoint = getentrypoint(vm); + opStack = &stack; + + __asm__ __volatile__ ( + " movq %5,%%rsi \r\n" \ + " movl %4,%%edi \r\n" \ + " movq $done,%%r9 \r\n" \ + " movq %2,%%r10 \r\n" \ + " movq %3,%%r8 \r\n" \ + " jmp *%%r10 \r\n" \ + "done: \r\n" \ + " movl %%edi, %0 \r\n" \ + " movq %%rsi, %1 \r\n" \ + : "=m" (programStack), "=m" (opStack) + : "m" (entryPoint), "m" (vm->dataBase), "m" (programStack), "m" (opStack) + : "%rsi", "%rdi", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r15", "%xmm0" + ); + + if ( opStack != &stack[1] ) { + Com_Error( ERR_DROP, "opStack corrupted in compiled code (offset %d)\n", (void*)&stack[1] - opStack); + } + if ( programStack != stackOnEntry - 48 ) { + Com_Error( ERR_DROP, "programStack corrupted in compiled code\n" ); + } + +// Com_Printf("exiting %s level %d\n", vm->name, vm->callLevel); + --vm->callLevel; + vm->programStack = stackOnEntry; + + return *(int *)opStack; +} + +#ifdef VM_X86_64_STANDALONE + +#include + +int testops(vm_t* vm) +{ + int i1, i2, vmres; + int i; + float f1, f2, fres, fvmres; + int numitests = 26; + int numftests = 11; + int ret = 0; + int testno; + int res = 0xC0DEDBAD; + + srand(time(NULL)); + + i1 = 1 + (int) (1000.0 * (rand() / (RAND_MAX + 1.0))); + i2 = 1 + (int) (1000.0 * (rand() / (RAND_MAX + 1.0))); + + if(i1 < i2) + { + i = i1; + i1 = i2; + i2 = i; + } + + f1 = i1/1.5; + f2 = i2/1.5; + + if(!i2) i2=i1; + if(!f2) f2=f1; + + printf("i1: %d i2: %d\n", i1, i2); + printf("f1: %f f2: %f\n", f1, f2); + +testintops: + for (testno = 1; testno < numitests; ++testno) + { + printf("int test %d ... ", testno); + fflush(stdout); + + res = test(testno, i1, i2); + vmres = VM_Call(vm, testno, i1, i2); + + if(vmres == res) + { + printf("ok: %d == %d\n", res, vmres); + } + else + { + printf("failed: %d != %d\n", res, vmres); + ret = 1; + } + } + if(i1 != i2) + { + i2 = i1; + goto testintops; + } + +testfops: + i1 = *(int*)&f1; + i2 = *(int*)&f2; + + for (testno = 100; testno < 100+numftests; ++testno) + { + printf("float test %d ... ", testno); + fflush(stdout); + + res = test(testno, i1, i2); + vmres = VM_Call(vm, testno, i1, i2); + + fres = *(float*)&res; + fvmres = *(float*)&vmres; + + if(fvmres == fres) + { + printf("ok: %f == %f\n", fres, fvmres); + } + else + { + printf("failed: %f != %f\n", fres, fvmres); + ret = 1; + } + } + if(f1 > f2) + { + float t = f1; + f1 = f2; + f2 = t; + goto testfops; + } + else if(f1 < f2) + { + f2 = f1; + goto testfops; + } + + return ret; +} + +void VM_VmInfo_f( void ); + +int main(int argc, char* argv[]) +{ + size_t size; + vmHeader_t *header; + vm_t* vm[3]; + unsigned dataLength; + int i; + long args[11] = {0}; + int ret = 0xDEADBEEF; + char* mem; + + char* file = argv[1]; + + char module[128]; + + if(argc < 2) + return -1; + + strcpy(module, file); + *strchr(module, '.') = '\0'; + + vm[0] = VM_Create( module, printsyscall, interpret ); + vm[1] = VM_Create( module, printsyscall, interpret ); + vm[2] = VM_Create( module, printsyscall, interpret ); + + VM_VmInfo_f(); + + if(argc > 2) + { + for(i = 2; i < argc; ++i) + { + args[i-2] = strtol(argv[i],NULL,0); + } + + ret = VM_Call(vm[0], args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7]); + ret += VM_Call(vm[1], args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7]); + ret += VM_Call(vm[2], args[0], args[1], args[2], args[3], args[4], args[5], args[6], args[7]); + } + else + { + ret = testops(vm[0]); + ret += testops(vm[1]); + ret += testops(vm[2]); + ret += testops(vm[1]); + ret += testops(vm[0]); + } + + +#ifdef DEBUG_VM + printf("ret: %d [%X]\n", ret, ret); +#endif + + return 0; +} +#endif diff --git a/code/unix/Makefile b/code/unix/Makefile index 71db2bd..5bf4777 100644 --- a/code/unix/Makefile +++ b/code/unix/Makefile @@ -135,6 +135,8 @@ ifeq ($(PLATFORM),linux) OPTIMIZE = -O3 -fomit-frame-pointer -ffast-math -falign-loops=2 \ -falign-jumps=2 -falign-functions=2 -fstrength-reduce \ -fno-strict-aliasing +# experimental! you need as +# BASE_CFLAGS += -DHAVE_VM_COMPILED else ifeq ($(ARCH),i386) OPTIMIZE = -O3 -march=i686 -fomit-frame-pointer -ffast-math \ @@ -404,7 +406,7 @@ endif DO_CC=$(CC) $(CFLAGS) -o $@ -c $< DO_CXX=$(CXX) $(CFLAGS) -o $@ -c $< DO_SMP_CC=$(CC) $(CFLAGS) -DSMP -o $@ -c $< -DO_BOT_CC=$(CC) $(CFLAGS) -DBOTLIB -o $@ -c $< # $(SHLIBCFLAGS) # bk001212 +DO_BOT_CC=$(CC) $(CFLAGS) -DBOTLIB -o $@ -c $< # $(SHLIBCFLAGS) # bk001212 DO_DEBUG_CC=$(CC) $(DEBUG_CFLAGS) -o $@ -c $< DO_SHLIB_CC=$(CC) $(CFLAGS) $(SHLIBCFLAGS) -o $@ -c $< DO_SHLIB_DEBUG_CC=$(CC) $(DEBUG_CFLAGS) $(SHLIBCFLAGS) -o $@ -c $< @@ -618,6 +620,9 @@ endif ifeq ($(ARCH),x86) Q3OBJ += $(B)/client/vm_x86.o endif +ifeq ($(ARCH),x86_64) + Q3OBJ += $(B)/client/vm_x86_64.o +endif ifeq ($(ARCH),ppc) ifneq ($(VM_PPC),) @@ -902,6 +907,7 @@ $(B)/client/win_wndproc.o : $(W32DIR)/win_wndproc.c; $(DO_CC) $(DX_CFLAGS) $(B)/client/win_resource.o : $(W32DIR)/winquake.rc; $(DO_WINDRES) $(B)/client/vm_x86.o : $(CMDIR)/vm_x86.c; $(DO_CC) +$(B)/client/vm_x86_64.o : $(CMDIR)/vm_x86_64.c; $(DO_CC) ifneq ($(VM_PPC),) $(B)/client/$(VM_PPC).o : $(CMDIR)/$(VM_PPC).c; $(DO_CC) endif @@ -989,6 +995,10 @@ ifeq ($(ARCH),i386) Q3DOBJ += $(B)/ded/vm_x86.o $(B)/ded/ftola.o $(B)/ded/snapvectora.o endif +ifeq ($(ARCH),x86_64) + Q3DOBJ += $(B)/ded/vm_x86_64.o +endif + ifeq ($(ARCH),ppc) ifneq ($(VM_PPC),) Q3DOBJ += $(B)/ded/$(VM_PPC).o @@ -1069,6 +1079,7 @@ $(B)/ded/ftola.o : $(UDIR)/ftola.s; $(DO_AS) $(B)/ded/snapvectora.o : $(UDIR)/snapvectora.s; $(DO_AS) $(B)/ded/vm_x86.o : $(CMDIR)/vm_x86.c; $(DO_DED_CC) +$(B)/ded/vm_x86_64.o : $(CMDIR)/vm_x86_64.c; $(DO_DED_CC) ifneq ($(VM_PPC),) $(B)/ded/$(VM_PPC).o : $(CMDIR)/$(VM_PPC).c; $(DO_DED_CC) endif diff --git a/code/unix/unix_main.c b/code/unix/unix_main.c index 5c51615..9a38302 100644 --- a/code/unix/unix_main.c +++ b/code/unix/unix_main.c @@ -730,7 +730,6 @@ changed the load procedure to match VFS logic, and allow developer use #3 look in fs_basepath ================= */ -extern char *FS_BuildOSPath( const char *base, const char *game, const char *qpath ); static void* try_dlopen(const char* base, const char* gamedir, const char* fname, char* fqpath ) { -- cgit v1.2.3