diff options
author | ludwig <ludwig@edf5b092-35ff-0310-97b2-ce42778d08ea> | 2008-11-10 09:46:01 +0000 |
---|---|---|
committer | ludwig <ludwig@edf5b092-35ff-0310-97b2-ce42778d08ea> | 2008-11-10 09:46:01 +0000 |
commit | c65cbabad3729e70c8dc1d24e9bf9141727785a5 (patch) | |
tree | 361da9e59248679498fad0746f6b01e62bbcf24e | |
parent | b31e8a67d9b1f11b9943b595a4fd66d3a07dd085 (diff) | |
download | ioquake3-aero-c65cbabad3729e70c8dc1d24e9bf9141727785a5.tar.gz ioquake3-aero-c65cbabad3729e70c8dc1d24e9bf9141727785a5.zip |
new PowerPC vm
Patch by Przemysław Iskra (#3796)
git-svn-id: svn://svn.icculus.org/quake3/trunk@1482 edf5b092-35ff-0310-97b2-ce42778d08ea
-rw-r--r-- | code/qcommon/vm_powerpc.c | 2152 | ||||
-rw-r--r-- | code/qcommon/vm_powerpc_asm.c | 1009 | ||||
-rw-r--r-- | code/qcommon/vm_powerpc_asm.h | 156 |
3 files changed, 3317 insertions, 0 deletions
diff --git a/code/qcommon/vm_powerpc.c b/code/qcommon/vm_powerpc.c new file mode 100644 index 0000000..19a56f7 --- /dev/null +++ b/code/qcommon/vm_powerpc.c @@ -0,0 +1,2152 @@ +/* +=========================================================================== +Copyright (C) 2008 Przemyslaw Iskra <sparky@pld-linux.org> + +This file is part of Quake III Arena source code. + +Quake III Arena source code is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Quake III Arena source code is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Quake III Arena source code; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +=========================================================================== +*/ + +#include <sys/types.h> /* needed by sys/mman.h on OSX */ +#include <sys/mman.h> +#include <sys/time.h> +#include <time.h> +#include <stddef.h> + +#ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +#endif + +#include "vm_local.h" +#include "vm_powerpc_asm.h" + +/* + * VM_TIMES enables showing information about time spent inside + * and outside generated code + */ +//#define VM_TIMES +#ifdef VM_TIMES +#include <sys/times.h> +static clock_t time_outside_vm = 0; +static clock_t time_total_vm = 0; +#endif + +/* exit() won't be called but use it because it is marked with noreturn */ +#define DIE( reason ) \ + do { \ + Com_Error(ERR_DROP, "vm_powerpc compiler error: " reason "\n"); \ + exit(1); \ + } while(0) + +/* + * vm_powerpc uses large quantities of memory during compilation, + * Z_Malloc memory may not be enough for some big qvm files + */ + +//#define VM_SYSTEM_MALLOC +#ifdef VM_SYSTEM_MALLOC +static inline void * +VM_Malloc( size_t size ) +{ + void *mem = malloc( size ); + if ( ! mem ) + DIE( "Not enough memory" ); + + return mem; +} +# define VM_Free free +#else +# define VM_Malloc Z_Malloc +# define VM_Free Z_Free +#endif + +/* + * optimizations: + * - hole: bubble optimization (OP_CONST+instruction) + * - copy: inline OP_BLOCK_COPY for lengths under 16/32 bytes + * - mask: use rlwinm instruction as dataMask + */ + +#ifdef __OPTIMIZE__ +# define OPTIMIZE_HOLE 1 +# define OPTIMIZE_COPY 1 +# define OPTIMIZE_MASK 1 +#else +# define OPTIMIZE_HOLE 0 +# define OPTIMIZE_COPY 0 +# define OPTIMIZE_MASK 0 +#endif + +/* + * SUPPORTED TARGETS: + * - Linux 32 bits + * ( http://refspecs.freestandards.org/elf/elfspec_ppc.pdf ) + * * LR at r0 + 4 + * * Local variable space not needed + * -> store caller safe regs at 16+ + * + * - Linux 64 bits (not fully conformant) + * ( http://www.ibm.com/developerworks/linux/library/l-powasm4.html ) + * * needs "official procedure descriptors" (only first function has one) + * * LR at r0 + 16 + * * local variable space required, min 64 bytes, starts at 48 + * -> store caller safe regs at 128+ + * + * - OS X 32 bits + * ( http://developer.apple.com/documentation/DeveloperTools/Conceptual/LowLevelABI/Articles/32bitPowerPC.html ) + * * LR at r0 + 8 + * * local variable space required, min 32 bytes (?), starts at 24 + * -> store caller safe regs at 64+ + * + * - OS X 64 bits (completely untested) + * ( http://developer.apple.com/documentation/DeveloperTools/Conceptual/LowLevelABI/Articles/64bitPowerPC.html ) + * * LR at r0 + 16 + * * local variable space required, min 64 bytes (?), starts at 48 + * -> store caller safe regs at 128+ + */ + +/* Select Length - first value on 32 bits, second on 64 */ +#ifdef __PPC64__ +# define SL( a, b ) (b) +#else +# define SL( a, b ) (a) +#endif + +/* Select ABI - first for ELF, second for OS X */ +#ifdef __ELF__ +# define SA( a, b ) (a) +#else +# define SA( a, b ) (b) +#endif + +#define ELF32 SL( SA( 1, 0 ), 0 ) +#define ELF64 SL( 0, SA( 1, 0 ) ) +#define OSX32 SL( SA( 0, 1 ), 0 ) +#define OSX64 SL( 0, SA( 0, 1 ) ) + +/* native length load/store instructions ( L stands for long ) */ +#define iSTLU SL( iSTWU, iSTDU ) +#define iSTL SL( iSTW, iSTD ) +#define iLL SL( iLWZ, iLD ) +#define iLLX SL( iLWZX, iLDX ) + +/* register length */ +#define GPRLEN SL( 4, 8 ) +#define FPRLEN (8) +/* shift that many bits to obtain value miltiplied by GPRLEN */ +#define GPRLEN_SHIFT SL( 2, 3 ) + +/* Link register position */ +#define STACK_LR SL( SA( 4, 8 ), 16 ) +/* register save position */ +#define STACK_SAVE SL( SA( 16, 64 ), 128 ) +/* temporary space, for float<->int exchange */ +#define STACK_TEMP SL( SA( 8, 24 ), 48 ) +/* red zone temporary space, used instead of STACK_TEMP if stack isn't + * prepared properly */ +#define STACK_RTEMP (-16) + +#if ELF64 +/* + * Official Procedure Descriptor + * we need to prepare one for generated code if we want to call it + * as function + */ +typedef struct { + void *function; + void *toc; + void *env; +} opd_t; +#endif + + +/* + * opcode information table: + * - length of immediate value + * - returned register type + * - required register(s) type + */ +#define opImm0 0x0000 /* no immediate */ +#define opImm1 0x0001 /* 1 byte immadiate value after opcode */ +#define opImm4 0x0002 /* 4 bytes immediate value after opcode */ + +#define opRet0 0x0000 /* returns nothing */ +#define opRetI 0x0004 /* returns integer */ +#define opRetF 0x0008 /* returns float */ +#define opRetIF (opRetI | opRetF) /* returns integer or float */ + +#define opArg0 0x0000 /* requires nothing */ +#define opArgI 0x0010 /* requires integer(s) */ +#define opArgF 0x0020 /* requires float(s) */ +#define opArgIF (opArgI | opArgF) /* requires integer or float */ + +#define opArg2I 0x0040 /* requires second argument, integer */ +#define opArg2F 0x0080 /* requires second argument, float */ +#define opArg2IF (opArg2I | opArg2F) /* requires second argument, integer or float */ + +static const unsigned char vm_opInfo[256] = +{ + [OP_UNDEF] = opImm0, + [OP_IGNORE] = opImm0, + [OP_BREAK] = opImm0, + [OP_ENTER] = opImm4, + /* OP_LEAVE has to accept floats, they will be converted to ints */ + [OP_LEAVE] = opImm4 | opRet0 | opArgIF, + /* only STORE4 and POP use values from OP_CALL, + * no need to convert floats back */ + [OP_CALL] = opImm0 | opRetI | opArgI, + [OP_PUSH] = opImm0 | opRetIF, + [OP_POP] = opImm0 | opRet0 | opArgIF, + [OP_CONST] = opImm4 | opRetIF, + [OP_LOCAL] = opImm4 | opRetI, + [OP_JUMP] = opImm0 | opRet0 | opArgI, + + [OP_EQ] = opImm4 | opRet0 | opArgI | opArg2I, + [OP_NE] = opImm4 | opRet0 | opArgI | opArg2I, + [OP_LTI] = opImm4 | opRet0 | opArgI | opArg2I, + [OP_LEI] = opImm4 | opRet0 | opArgI | opArg2I, + [OP_GTI] = opImm4 | opRet0 | opArgI | opArg2I, + [OP_GEI] = opImm4 | opRet0 | opArgI | opArg2I, + [OP_LTU] = opImm4 | opRet0 | opArgI | opArg2I, + [OP_LEU] = opImm4 | opRet0 | opArgI | opArg2I, + [OP_GTU] = opImm4 | opRet0 | opArgI | opArg2I, + [OP_GEU] = opImm4 | opRet0 | opArgI | opArg2I, + [OP_EQF] = opImm4 | opRet0 | opArgF | opArg2F, + [OP_NEF] = opImm4 | opRet0 | opArgF | opArg2F, + [OP_LTF] = opImm4 | opRet0 | opArgF | opArg2F, + [OP_LEF] = opImm4 | opRet0 | opArgF | opArg2F, + [OP_GTF] = opImm4 | opRet0 | opArgF | opArg2F, + [OP_GEF] = opImm4 | opRet0 | opArgF | opArg2F, + + [OP_LOAD1] = opImm0 | opRetI | opArgI, + [OP_LOAD2] = opImm0 | opRetI | opArgI, + [OP_LOAD4] = opImm0 | opRetIF| opArgI, + [OP_STORE1] = opImm0 | opRet0 | opArgI | opArg2I, + [OP_STORE2] = opImm0 | opRet0 | opArgI | opArg2I, + [OP_STORE4] = opImm0 | opRet0 | opArgIF| opArg2I, + [OP_ARG] = opImm1 | opRet0 | opArgIF, + [OP_BLOCK_COPY] = opImm4 | opRet0 | opArgI | opArg2I, + + [OP_SEX8] = opImm0 | opRetI | opArgI, + [OP_SEX16] = opImm0 | opRetI | opArgI, + [OP_NEGI] = opImm0 | opRetI | opArgI, + [OP_ADD] = opImm0 | opRetI | opArgI | opArg2I, + [OP_SUB] = opImm0 | opRetI | opArgI | opArg2I, + [OP_DIVI] = opImm0 | opRetI | opArgI | opArg2I, + [OP_DIVU] = opImm0 | opRetI | opArgI | opArg2I, + [OP_MODI] = opImm0 | opRetI | opArgI | opArg2I, + [OP_MODU] = opImm0 | opRetI | opArgI | opArg2I, + [OP_MULI] = opImm0 | opRetI | opArgI | opArg2I, + [OP_MULU] = opImm0 | opRetI | opArgI | opArg2I, + [OP_BAND] = opImm0 | opRetI | opArgI | opArg2I, + [OP_BOR] = opImm0 | opRetI | opArgI | opArg2I, + [OP_BXOR] = opImm0 | opRetI | opArgI | opArg2I, + [OP_BCOM] = opImm0 | opRetI | opArgI, + [OP_LSH] = opImm0 | opRetI | opArgI | opArg2I, + [OP_RSHI] = opImm0 | opRetI | opArgI | opArg2I, + [OP_RSHU] = opImm0 | opRetI | opArgI | opArg2I, + [OP_NEGF] = opImm0 | opRetF | opArgF, + [OP_ADDF] = opImm0 | opRetF | opArgF | opArg2F, + [OP_SUBF] = opImm0 | opRetF | opArgF | opArg2F, + [OP_DIVF] = opImm0 | opRetF | opArgF | opArg2F, + [OP_MULF] = opImm0 | opRetF | opArgF | opArg2F, + [OP_CVIF] = opImm0 | opRetF | opArgI, + [OP_CVFI] = opImm0 | opRetI | opArgF, +}; + +/* + * source instruction data + */ +typedef struct source_instruction_s source_instruction_t; +struct source_instruction_s { + // opcode + unsigned long int op; + + // number of instruction + unsigned long int i_count; + + // immediate value (if any) + union { + unsigned int i; + signed int si; + signed short ss[2]; + unsigned short us[2]; + unsigned char b; + } arg; + + // required and returned registers + unsigned char regA1; + unsigned char regA2; + unsigned char regR; + unsigned char regPos; + + // next instruction + source_instruction_t *next; +}; + + + +/* + * read-only data needed by the generated code + */ +typedef struct VM_Data { + // length of this struct + data + size_t dataLength; + // compiled code size (in bytes) + // it only is code size, without the data + size_t codeLength; + + // function pointers, no use to waste registers for them + long int (* AsmCall)( int, int ); + void (* BlockCopy )( unsigned int, unsigned int, unsigned int ); + + // instruction pointers, rarely used so don't waste register + ppc_instruction_t *iPointers; + + // data mask for load and store, not used if optimized + unsigned int dataMask; + + // fixed number used to convert from integer to float + unsigned int floatBase; // 0x59800004 + +#if ELF64 + // official procedure descriptor + opd_t opd; +#endif + + // additional constants, for floating point OP_CONST + // this data has dynamic length, thus '0' here + unsigned int data[0]; +} vm_data_t; + +#ifdef offsetof +# define VM_Data_Offset( field ) offsetof( vm_data_t, field ) +#else +# define OFFSET( structName, field ) \ + ( (void *)&(((structName *)NULL)->field) - NULL ) +# define VM_Data_Offset( field ) OFFSET( vm_data_t, field ) +#endif + + +/* + * functions used by generated code + */ +static long int +VM_AsmCall( int callSyscallInvNum, int callProgramStack ) +{ + vm_t *savedVM = currentVM; + long int i, ret; +#ifdef VM_TIMES + struct tms start_time, stop_time; + clock_t saved_time = time_outside_vm; + times( &start_time ); +#endif + + // save the stack to allow recursive VM entry + currentVM->programStack = callProgramStack - 4; + + // we need to convert ints to longs on 64bit powerpcs + if ( sizeof( intptr_t ) == sizeof( int ) ) { + intptr_t *argPosition = (int *)((byte *)currentVM->dataBase + callProgramStack + 4); + + // generated code does not invert syscall number + argPosition[ 0 ] = -1 - callSyscallInvNum; + + ret = currentVM->systemCall( argPosition ); + } else { + intptr_t args[11]; + + // generated code does not invert syscall number + args[0] = -1 - callSyscallInvNum; + + int *argPosition = (int *)((byte *)currentVM->dataBase + callProgramStack + 4); + for( i = 1; i < 11; i++ ) + args[ i ] = argPosition[ i ]; + + ret = currentVM->systemCall( args ); + } + + currentVM = savedVM; + +#ifdef VM_TIMES + times( &stop_time ); + time_outside_vm = saved_time + ( stop_time.tms_utime - start_time.tms_utime ); +#endif + + return ret; +} + +static void +VM_BlockCopy( unsigned int dest, unsigned int src, unsigned int count ) +{ + unsigned dataMask = currentVM->dataMask; + + if ( (dest & dataMask) != dest + || (src & dataMask) != src + || ((dest+count) & dataMask) != dest + count + || ((src+count) & dataMask) != src + count) + { + DIE( "OP_BLOCK_COPY out of range!"); + } + + memcpy( currentVM->dataBase+dest, currentVM->dataBase+src, count ); +} + + +/* + * code-block descriptors + */ +typedef struct dest_instruction dest_instruction_t; +typedef struct symbolic_jump symbolic_jump_t; + +struct symbolic_jump { + // number of source instruction it has to jump to + unsigned long int jump_to; + + // jump condition true/false, (4*cr7+(eq|gt..)) + long int bo, bi; + + // extensions / modifiers (branch-link) + unsigned long ext; + + // dest_instruction refering to this jump + dest_instruction_t *parent; + + // next jump + symbolic_jump_t *nextJump; +}; + +struct dest_instruction { + // position in the output chain + unsigned long int count; + + // source instruction number + unsigned long int i_count; + + // exact (for instructins), or maximum (for jump) length + unsigned short length; + + dest_instruction_t *next; + + // if the instruction is a jump than jump will be non NULL + symbolic_jump_t *jump; + + // if jump is NULL than all the instructions will be here + ppc_instruction_t code[0]; +}; + +// first and last instruction, +// di_first is a dummy instruction +static dest_instruction_t *di_first = NULL, *di_last = NULL; +// number of instructions +static unsigned long int di_count = 0; +// pointers needed to compute local jumps, those aren't pointers to +// actual instructions, just used to check how long the jump is going +// to be and whether it is positive or negative +static dest_instruction_t **di_pointers = NULL; + +// output instructions which does not come from source code +// use false i_count value +#define FALSE_ICOUNT 0xffffffff + + +/* + * append specified instructions at the end of instruction chain + */ +static void +PPC_Append( + dest_instruction_t *di_now, + unsigned long int i_count + ) +{ + di_now->count = di_count++; + di_now->i_count = i_count; + di_now->next = NULL; + + di_last->next = di_now; + di_last = di_now; + + if ( i_count != FALSE_ICOUNT ) { + if ( ! di_pointers[ i_count ] ) + di_pointers[ i_count ] = di_now; + } +} + +/* + * make space for instructions and append + */ +static void +PPC_AppendInstructions( + unsigned long int i_count, + size_t num_instructions, + const ppc_instruction_t *is + ) +{ + if ( num_instructions < 0 ) + num_instructions = 0; + size_t iBytes = sizeof( ppc_instruction_t ) * num_instructions; + dest_instruction_t *di_now = VM_Malloc( sizeof( dest_instruction_t ) + iBytes ); + + di_now->length = num_instructions; + di_now->jump = NULL; + + if ( iBytes > 0 ) + memcpy( &(di_now->code[0]), is, iBytes ); + + PPC_Append( di_now, i_count ); +} + +/* + * create symbolic jump and append + */ +static symbolic_jump_t *sj_first = NULL, *sj_last = NULL; +static void +PPC_PrepareJump( + unsigned long int i_count, + unsigned long int dest, + long int bo, + long int bi, + unsigned long int ext + ) +{ + dest_instruction_t *di_now = VM_Malloc( sizeof( dest_instruction_t ) ); + symbolic_jump_t *sj = VM_Malloc( sizeof( symbolic_jump_t ) ); + + sj->jump_to = dest; + sj->bo = bo; + sj->bi = bi; + sj->ext = ext; + sj->parent = di_now; + sj->nextJump = NULL; + + sj_last->nextJump = sj; + sj_last = sj; + + di_now->length = (bo == branchAlways ? 1 : 2); + di_now->jump = sj; + + PPC_Append( di_now, i_count ); +} + +/* + * simplyfy instruction emission + */ +#define emitStart( i_cnt ) \ + unsigned long int i_count = i_cnt; \ + size_t num_instructions = 0; \ + long int force_emit = 0; \ + ppc_instruction_t instructions[50]; + +#define pushIn( inst ) \ + (instructions[ num_instructions++ ] = inst) +#define in( inst, args... ) pushIn( IN( inst, args ) ) + +#define emitEnd() \ + do{ \ + if ( num_instructions || force_emit ) \ + PPC_AppendInstructions( i_count, num_instructions, instructions );\ + num_instructions = 0; \ + } while(0) + +#define emitJump( dest, bo, bi, ext ) \ + do { \ + emitEnd(); \ + PPC_PrepareJump( i_count, dest, bo, bi, ext ); \ + } while(0) + + +/* + * definitions for creating .data section, + * used in cases where constant float is needed + */ +#define LOCAL_DATA_CHUNK 50 +typedef struct local_data_s local_data_t; +struct local_data_s { + // number of data in this structure + long int count; + + // data placeholder + unsigned int data[ LOCAL_DATA_CHUNK ]; + + // next chunk, if this one wasn't enough + local_data_t *next; +}; + +// first data chunk +static local_data_t *data_first = NULL; +// total number of data +static long int data_acc = 0; + +/* + * append the data and return its offset + */ +static size_t +PPC_PushData( unsigned int datum ) +{ + local_data_t *d_now = data_first; + long int accumulated = 0; + + // check whether we have this one already + do { + long int i; + for ( i = 0; i < d_now->count; i++ ) { + if ( d_now->data[ i ] == datum ) { + accumulated += i; + return VM_Data_Offset( data[ accumulated ] ); + } + } + if ( !d_now->next ) + break; + + accumulated += d_now->count; + d_now = d_now->next; + } while (1); + + // not found, need to append + accumulated += d_now->count; + + // last chunk is full, create new one + if ( d_now->count >= LOCAL_DATA_CHUNK ) { + d_now->next = VM_Malloc( sizeof( local_data_t ) ); + d_now = d_now->next; + d_now->count = 0; + d_now->next = NULL; + } + + d_now->data[ d_now->count ] = datum; + d_now->count += 1; + + data_acc = accumulated + 1; + + return VM_Data_Offset( data[ accumulated ] ); +} + +/* + * find leading zeros in dataMask to implement it with + * "rotate and mask" instruction + */ +static long int fastMaskHi = 0, fastMaskLo = 31; +static void +PPC_MakeFastMask( int mask ) +{ +#if defined( __GNUC__ ) && ( __GNUC__ >= 4 || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4 ) ) + /* count leading zeros */ + fastMaskHi = __builtin_clz( mask ); + + /* count trailing zeros */ + fastMaskLo = 31 - __builtin_ctz( mask ); +#else + fastMaskHi = 0; + while ( ( mask & ( 0x80000000 >> fastMaskHi ) ) == 0 ) + fastMaskHi++; + + fastMaskLo = 31; + while ( ( mask & ( 0x80000000 >> fastMaskLo ) ) == 0 ) + fastMaskLo--; +#endif +} + + +/* + * register definitions + */ + +/* registers which are global for generated code */ + +// pointer to VM_Data (constant) +#define rVMDATA r14 +// vm->dataBase (constant) +#define rDATABASE r15 +// programStack (variable) +#define rPSTACK r16 + +/* + * function local registers, + * + * normally only volatile registers are used, but if there aren't enough + * or function has to preserve some value while calling annother one + * then caller safe registers are used as well + */ +static const long int gpr_list[] = { + /* caller safe registers, normally only one is used */ + r24, r23, r22, r21, + r20, r19, r18, r17, + /* volatile registers (preferred), + * normally no more than 5 is used */ + r3, r4, r5, r6, + r7, r8, r9, r10, +}; +static const long int gpr_vstart = 8; /* position of first volatile register */ +static const long int gpr_total = sizeof( gpr_list ) / sizeof( gpr_list[0] ); + +static const long int fpr_list[] = { + /* static registers, normally none is used */ + f20, f21, f19, f18, + f17, f16, f15, f14, + /* volatile registers (preferred), + * normally no more than 7 is used */ + f0, f1, f2, f3, + f4, f5, f6, f7, + f8, f9, f10, f11, + f12, f13, +}; +static const long int fpr_vstart = 8; +static const long int fpr_total = sizeof( fpr_list ) / sizeof( fpr_list[0] ); + +/* + * prepare some dummy structures and emit init code + */ +static void +PPC_CompileInit( void ) +{ + di_first = di_last = VM_Malloc( sizeof( dest_instruction_t ) ); + di_first->count = 0; + di_first->next = NULL; + di_first->jump = NULL; + + sj_first = sj_last = VM_Malloc( sizeof( symbolic_jump_t ) ); + sj_first->nextJump = NULL; + + data_first = VM_Malloc( sizeof( local_data_t ) ); + data_first->count = 0; + data_first->next = NULL; + + /* + * init function: + * saves old values of global registers and sets our values + * function prototype is: + * int begin( void *data, int programStack, void *vm->dataBase ) + */ + + /* first instruction must not be placed on instruction list */ + emitStart( FALSE_ICOUNT ); + + long int stack = STACK_SAVE + 4 * GPRLEN; + + in( iMFLR, r0 ); + in( iSTLU, r1, -stack, r1 ); + in( iSTL, rVMDATA, STACK_SAVE + 0 * GPRLEN, r1 ); + in( iSTL, rPSTACK, STACK_SAVE + 1 * GPRLEN, r1 ); + in( iSTL, rDATABASE, STACK_SAVE + 2 * GPRLEN, r1 ); + in( iSTL, r0, stack + STACK_LR, r1 ); + in( iMR, rVMDATA, r3 ); + in( iMR, rPSTACK, r4 ); + in( iMR, rDATABASE, r5 ); + in( iBL, +4*8 ); // LINK JUMP: first generated instruction | XXX jump ! + in( iLL, rVMDATA, STACK_SAVE + 0 * GPRLEN, r1 ); + in( iLL, rPSTACK, STACK_SAVE + 1 * GPRLEN, r1 ); + in( iLL, rDATABASE, STACK_SAVE + 2 * GPRLEN, r1 ); + in( iLL, r0, stack + STACK_LR, r1 ); + in( iMTLR, r0 ); + in( iADDI, r1, r1, stack ); + in( iBLR ); + + emitEnd(); +} + +// rFIRST is the copy of the top value on the opstack +#define rFIRST (gpr_list[ gpr_pos - 1]) +// second value on the opstack +#define rSECOND (gpr_list[ gpr_pos - 2 ]) +// temporary registers, not on the opstack +#define rTEMP(x) (gpr_list[ gpr_pos + x ]) +#define rTMP rTEMP(0) + +#define fFIRST (fpr_list[ fpr_pos - 1 ]) +#define fSECOND (fpr_list[ fpr_pos - 2 ]) +#define fTEMP(x) (fpr_list[ fpr_pos + x ]) +#define fTMP fTEMP(0) + +// register types +#define rTYPE_STATIC 0x01 +#define rTYPE_FLOAT 0x02 + +// what type should this opcode return +#define RET_INT ( !(i_now->regR & rTYPE_FLOAT) ) +#define RET_FLOAT ( i_now->regR & rTYPE_FLOAT ) +// what type should it accept +#define ARG_INT ( ! i_now->regA1 ) +#define ARG_FLOAT ( i_now->regA1 ) +#define ARG2_INT ( ! i_now->regA2 ) +#define ARG2_FLOAT ( i_now->regA2 ) + +/* + * emit OP_CONST, called if nothing has used the const value directly + */ +static void +PPC_EmitConst( source_instruction_t * const i_const ) +{ + emitStart( i_const->i_count ); + + if ( !(i_const->regR & rTYPE_FLOAT) ) { + // gpr_pos needed for "rFIRST" to work + long int gpr_pos = i_const->regPos; + + if ( i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) { + in( iLI, rFIRST, i_const->arg.si ); + } else if ( i_const->arg.i < 0x10000 ) { + in( iLI, rFIRST, 0 ); + in( iORI, rFIRST, rFIRST, i_const->arg.i ); + } else { + in( iLIS, rFIRST, i_const->arg.ss[ 0 ] ); + if ( i_const->arg.us[ 1 ] != 0 ) + in( iORI, rFIRST, rFIRST, i_const->arg.us[ 1 ] ); + } + + } else { + // fpr_pos needed for "fFIRST" to work + long int fpr_pos = i_const->regPos; + + // there's no good way to generate the data, + // just read it from data section + in( iLFS, fFIRST, PPC_PushData( i_const->arg.i ), rVMDATA ); + } + + emitEnd(); +} +#define maybeEmitConst() if ( i_const ) PPC_EmitConst( i_const ) + +/* + * emit empty instruction, just sets the needed pointers + */ +static inline void +PPC_EmitNull( source_instruction_t * const i_null ) +{ + PPC_AppendInstructions( i_null->i_count, 0, NULL ); +} +#define emitFalseConst() PPC_EmitNull( i_const ) + + +/* + * analize function for register usage and whether it needs stack (r1) prepared + */ +static void +VM_AnalyzeFunction( + source_instruction_t * const i_first, + long int *prepareStack, + long int *gpr_start_pos, + long int *fpr_start_pos + ) +{ + source_instruction_t *i_now = i_first; + + source_instruction_t *value_provider[20] = { NULL }; + unsigned long int opstack_depth = 0; + + /* + * first step: + * remember what codes returned some value and mark the value type + * when we get to know what it should be + */ + while ( (i_now = i_now->next) ) { + unsigned long int op = i_now->op; + unsigned long int opi = vm_opInfo[ op ]; + + if ( opi & opArgIF ) { + assert( opstack_depth > 0 ); + + opstack_depth--; + source_instruction_t *vp = value_provider[ opstack_depth ]; + unsigned long int vpopi = vm_opInfo[ vp->op ]; + + if ( (opi & opArgI) && (vpopi & opRetI) ) { + // instruction accepts integer, provider returns integer + //vp->regR |= rTYPE_INT; + //i_now->regA1 = rTYPE_INT; + } else if ( (opi & opArgF) && (vpopi & opRetF) ) { + // instruction accepts float, provider returns float + vp->regR |= rTYPE_FLOAT; // use OR here - could be marked as static + i_now->regA1 = rTYPE_FLOAT; + } else { + // instruction arg type does not agree with + // provider return type + DIE( "unrecognized instruction combination" ); + } + + } + if ( opi & opArg2IF ) { + assert( opstack_depth > 0 ); + + opstack_depth--; + source_instruction_t *vp = value_provider[ opstack_depth ]; + unsigned long int vpopi = vm_opInfo[ vp->op ]; + + if ( (opi & opArg2I) && (vpopi & opRetI) ) { + // instruction accepts integer, provider returns integer + //vp->regR |= rTYPE_INT; + //i_now->regA2 = rTYPE_INT; + } else if ( (opi & opArg2F) && (vpopi & opRetF) ) { + // instruction accepts float, provider returns float + vp->regR |= rTYPE_FLOAT; // use OR here - could be marked as static + i_now->regA2 = rTYPE_FLOAT; + } else { + // instruction arg type does not agree with + // provider return type + DIE( "unrecognized instruction combination" ); + } + } + + + if ( + ( op == OP_CALL ) + || + ( op == OP_BLOCK_COPY && ( i_now->arg.i > SL( 16, 32 ) || !OPTIMIZE_COPY ) ) + ) { + long int i; + *prepareStack = 1; + // force caller safe registers so we won't have to save them + for ( i = 0; i < opstack_depth; i++ ) { + source_instruction_t *vp = value_provider[ i ]; + vp->regR |= rTYPE_STATIC; + } + } + + + if ( opi & opRetIF ) { + value_provider[ opstack_depth ] = i_now; + opstack_depth++; + } + } + + /* + * second step: + * now that we know register types; compute exactly how many registers + * of each type we need + */ + + i_now = i_first; + long int needed_reg[4] = {0,0,0,0}, max_reg[4] = {0,0,0,0}; + opstack_depth = 0; + while ( (i_now = i_now->next) ) { + unsigned long int op = i_now->op; + unsigned long int opi = vm_opInfo[ op ]; + + if ( opi & opArgIF ) { + assert( opstack_depth > 0 ); + opstack_depth--; + source_instruction_t *vp = value_provider[ opstack_depth ]; + + needed_reg[ ( vp->regR & 2 ) ] -= 1; + if ( vp->regR & 1 ) // static + needed_reg[ ( vp->regR & 3 ) ] -= 1; + } + if ( opi & opArg2IF ) { + assert( opstack_depth > 0 ); + opstack_depth--; + source_instruction_t *vp = value_provider[ opstack_depth ]; + + needed_reg[ ( vp->regR & 2 ) ] -= 1; + if ( vp->regR & 1 ) // static + needed_reg[ ( vp->regR & 3 ) ] -= 1; + } + + if ( opi & opRetIF ) { + long int i; + value_provider[ opstack_depth ] = i_now; + opstack_depth++; + + i = i_now->regR & 2; + needed_reg[ i ] += 1; + if ( max_reg[ i ] < needed_reg[ i ] ) + max_reg[ i ] = needed_reg[ i ]; + + i = i_now->regR & 3; + if ( i & 1 ) { + needed_reg[ i ] += 1; + if ( max_reg[ i ] < needed_reg[ i ] ) + max_reg[ i ] = needed_reg[ i ]; + } + } + } + + long int gpr_start = gpr_vstart; + const long int gpr_volatile = gpr_total - gpr_vstart; + if ( max_reg[ 1 ] > 0 || max_reg[ 0 ] > gpr_volatile ) { + // max_reg[ 0 ] - all gprs needed + // max_reg[ 1 ] - static gprs needed + long int max = max_reg[ 0 ] - gpr_volatile; + if ( max_reg[ 1 ] > max ) + max = max_reg[ 1 ]; + if ( max > gpr_vstart ) { + /* error */ + DIE( "Need more GPRs" ); + } + + gpr_start -= max; + + // need stack to save caller safe registers + *prepareStack = 1; + } + *gpr_start_pos = gpr_start; + + long int fpr_start = fpr_vstart; + const long int fpr_volatile = fpr_total - fpr_vstart; + if ( max_reg[ 3 ] > 0 || max_reg[ 2 ] > fpr_volatile ) { + // max_reg[ 2 ] - all fprs needed + // max_reg[ 3 ] - static fprs needed + long int max = max_reg[ 2 ] - fpr_volatile; + if ( max_reg[ 3 ] > max ) + max = max_reg[ 3 ]; + if ( max > fpr_vstart ) { + /* error */ + DIE( "Need more FPRs" ); + } + + fpr_start -= max; + + // need stack to save caller safe registers + *prepareStack = 1; + } + *fpr_start_pos = fpr_start; +} + +/* + * translate opcodes to ppc instructions, + * it works on functions, not on whole code at once + */ +static void +VM_CompileFunction( source_instruction_t * const i_first ) +{ + long int prepareStack = 0; + long int gpr_start_pos, fpr_start_pos; + + VM_AnalyzeFunction( i_first, &prepareStack, &gpr_start_pos, &fpr_start_pos ); + + long int gpr_pos = gpr_start_pos, fpr_pos = fpr_start_pos; + + // OP_CONST combines well with many opcodes so we treat it in a special way + source_instruction_t *i_const = NULL; + source_instruction_t *i_now = i_first; + + // how big the stack has to be + long int save_space = STACK_SAVE; + { + if ( gpr_start_pos < gpr_vstart ) + save_space += (gpr_vstart - gpr_start_pos) * GPRLEN; + save_space = ( save_space + 15 ) & ~0x0f; + + if ( fpr_start_pos < fpr_vstart ) + save_space += (fpr_vstart - fpr_start_pos) * FPRLEN; + save_space = ( save_space + 15 ) & ~0x0f; + } + + long int stack_temp = prepareStack ? STACK_TEMP : STACK_RTEMP; + + while ( (i_now = i_now->next) ) { + emitStart( i_now->i_count ); + + switch ( i_now->op ) + { + default: + case OP_UNDEF: + case OP_IGNORE: + maybeEmitConst(); + in( iNOP ); + break; + + case OP_BREAK: + maybeEmitConst(); + // force SEGV + in( iLWZ, r0, 0, r0 ); + break; + + case OP_ENTER: + if ( i_const ) + DIE( "Weird opcode order" ); + + // don't prepare stack if not needed + if ( prepareStack ) { + long int i, save_pos = STACK_SAVE; + + in( iMFLR, r0 ); + in( iSTLU, r1, -save_space, r1 ); + in( iSTL, r0, save_space + STACK_LR, r1 ); + + /* save registers */ + for ( i = gpr_start_pos; i < gpr_vstart; i++ ) { + in( iSTL, gpr_list[ i ], save_pos, r1 ); + save_pos += GPRLEN; + } + save_pos = ( save_pos + 15 ) & ~0x0f; + + for ( i = fpr_start_pos; i < fpr_vstart; i++ ) { + in( iSTFD, fpr_list[ i ], save_pos, r1 ); + save_pos += FPRLEN; + } + prepareStack = 2; + } + + in( iADDI, rPSTACK, rPSTACK, - i_now->arg.si ); + break; + + case OP_LEAVE: + if ( i_const ) { + emitFalseConst(); + + if ( i_const->regR & rTYPE_FLOAT) + DIE( "constant float in OP_LEAVE" ); + + if ( i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) { + in( iLI, r3, i_const->arg.si ); + } else if ( i_const->arg.i < 0x10000 ) { + in( iLI, r3, 0 ); + in( iORI, r3, r3, i_const->arg.i ); + } else { + in( iLIS, r3, i_const->arg.ss[ 0 ] ); + if ( i_const->arg.us[ 1 ] != 0 ) + in( iORI, r3, r3, i_const->arg.us[ 1 ] ); + } + gpr_pos--; + } else { + /* place return value in r3 */ + if ( ARG_INT ) { + if ( rFIRST != r3 ) + in( iMR, r3, rFIRST ); + gpr_pos--; + } else { + in( iSTFS, fFIRST, stack_temp, r1 ); + in( iLWZ, r3, stack_temp, r1 ); + fpr_pos--; + } + } + + // don't undo stack if not prepared + if ( prepareStack >= 2 ) { + long int i, save_pos = STACK_SAVE; + + in( iLL, r0, save_space + STACK_LR, r1 ); + + + /* restore registers */ + for ( i = gpr_start_pos; i < gpr_vstart; i++ ) { + in( iLL, gpr_list[ i ], save_pos, r1 ); + save_pos += GPRLEN; + } + save_pos = ( save_pos + 15 ) & ~0x0f; + for ( i = fpr_start_pos; i < fpr_vstart; i++ ) { + in( iLFD, fpr_list[ i ], save_pos, r1 ); + save_pos += FPRLEN; + } + + in( iMTLR, r0 ); + in( iADDI, r1, r1, save_space ); + } + in( iADDI, rPSTACK, rPSTACK, i_now->arg.si); + in( iBLR ); + assert( gpr_pos == gpr_start_pos ); + assert( fpr_pos == fpr_start_pos ); + break; + + case OP_CALL: + if ( i_const ) { + emitFalseConst(); + + if ( i_const->arg.si >= 0 ) { + emitJump( + i_const->arg.i, + branchAlways, 0, branchExtLink + ); + } else { + /* syscall */ + in( iLL, r0, VM_Data_Offset( AsmCall ), rVMDATA ); + + in( iLI, r3, i_const->arg.si ); // negative value + in( iMR, r4, rPSTACK ); // push PSTACK on argument list + + in( iMTCTR, r0 ); + in( iBCTRL ); + } + if ( rFIRST != r3 ) + in( iMR, rFIRST, r3 ); + } else { + + in( iCMPWI, cr7, rFIRST, 0 ); + in( iBLTm, cr7, +4*5 /* syscall */ ); // XXX jump ! + /* instruction call */ + + // get instruction address + in( iLL, r0, VM_Data_Offset( iPointers ), rVMDATA ); + in( iRLWINM, rFIRST, rFIRST, GPRLEN_SHIFT, 0, 31-GPRLEN_SHIFT ); // mul * GPRLEN + in( iLLX, r0, rFIRST, r0 ); // load pointer + + in( iB, +4*(3 + (rFIRST != r3 ? 1 : 0) ) ); // XXX jump ! + + /* syscall */ + in( iLL, r0, VM_Data_Offset( AsmCall ), rVMDATA ); // get asmCall pointer + /* rFIRST can be r3 or some static register */ + if ( rFIRST != r3 ) + in( iMR, r3, rFIRST ); // push OPSTACK top value on argument list + in( iMR, r4, rPSTACK ); // push PSTACK on argument list + + /* common code */ + in( iMTCTR, r0 ); + in( iBCTRL ); + + if ( rFIRST != r3 ) + in( iMR, rFIRST, r3 ); // push return value on the top of the opstack + } + break; + + case OP_PUSH: + maybeEmitConst(); + if ( RET_INT ) + gpr_pos++; + else + fpr_pos++; + /* no instructions here */ + force_emit = 1; + break; + + case OP_POP: + maybeEmitConst(); + if ( ARG_INT ) + gpr_pos--; + else + fpr_pos--; + /* no instructions here */ + force_emit = 1; + break; + + case OP_CONST: + maybeEmitConst(); + /* nothing here */ + break; + + case OP_LOCAL: + maybeEmitConst(); + { + signed long int hi, lo; + hi = i_now->arg.ss[ 0 ]; + lo = i_now->arg.ss[ 1 ]; + if ( lo < 0 ) + hi += 1; + + gpr_pos++; + if ( hi == 0 ) { + in( iADDI, rFIRST, rPSTACK, lo ); + } else { + in( iADDIS, rFIRST, rPSTACK, hi ); + if ( lo != 0 ) + in( iADDI, rFIRST, rFIRST, lo ); + } + } + break; + + case OP_JUMP: + if ( i_const ) { + emitFalseConst(); + + emitJump( + i_const->arg.i, + branchAlways, 0, 0 + ); + } else { + in( iLL, r0, VM_Data_Offset( iPointers ), rVMDATA ); + in( iRLWINM, rFIRST, rFIRST, GPRLEN_SHIFT, 0, 31-GPRLEN_SHIFT ); // mul * GPRLEN + in( iLLX, r0, rFIRST, r0 ); // load pointer + in( iMTCTR, r0 ); + in( iBCTR ); + } + gpr_pos--; + break; + + case OP_EQ: + case OP_NE: + if ( i_const && i_const->arg.si >= -0x8000 && i_const->arg.si < 0x10000 ) { + emitFalseConst(); + if ( i_const->arg.si >= 0x8000 ) + in( iCMPLWI, cr7, rSECOND, i_const->arg.i ); + else + in( iCMPWI, cr7, rSECOND, i_const->arg.si ); + } else { + maybeEmitConst(); + in( iCMPW, cr7, rSECOND, rFIRST ); + } + emitJump( + i_now->arg.i, + (i_now->op == OP_EQ ? branchTrue : branchFalse), + 4*cr7+eq, 0 + ); + gpr_pos -= 2; + break; + + case OP_LTI: + case OP_GEI: + if ( i_const && i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) { + emitFalseConst(); + in( iCMPWI, cr7, rSECOND, i_const->arg.si ); + } else { + maybeEmitConst(); + in( iCMPW, cr7, rSECOND, rFIRST ); + } + emitJump( + i_now->arg.i, + ( i_now->op == OP_LTI ? branchTrue : branchFalse ), + 4*cr7+lt, 0 + ); + gpr_pos -= 2; + break; + + case OP_GTI: + case OP_LEI: + if ( i_const && i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) { + emitFalseConst(); + in( iCMPWI, cr7, rSECOND, i_const->arg.si ); + } else { + maybeEmitConst(); + in( iCMPW, cr7, rSECOND, rFIRST ); + } + emitJump( + i_now->arg.i, + ( i_now->op == OP_GTI ? branchTrue : branchFalse ), + 4*cr7+gt, 0 + ); + gpr_pos -= 2; + break; + + case OP_LTU: + case OP_GEU: + if ( i_const && i_const->arg.i < 0x10000 ) { + emitFalseConst(); + in( iCMPLWI, cr7, rSECOND, i_const->arg.i ); + } else { + maybeEmitConst(); + in( iCMPLW, cr7, rSECOND, rFIRST ); + } + emitJump( + i_now->arg.i, + ( i_now->op == OP_LTU ? branchTrue : branchFalse ), + 4*cr7+lt, 0 + ); + gpr_pos -= 2; + break; + + case OP_GTU: + case OP_LEU: + if ( i_const && i_const->arg.i < 0x10000 ) { + emitFalseConst(); + in( iCMPLWI, cr7, rSECOND, i_const->arg.i ); + } else { + maybeEmitConst(); + in( iCMPLW, cr7, rSECOND, rFIRST ); + } + emitJump( + i_now->arg.i, + ( i_now->op == OP_GTU ? branchTrue : branchFalse ), + 4*cr7+gt, 0 + ); + gpr_pos -= 2; + break; + + case OP_EQF: + case OP_NEF: + maybeEmitConst(); + in( iFCMPU, cr7, fSECOND, fFIRST ); + emitJump( + i_now->arg.i, + ( i_now->op == OP_EQF ? branchTrue : branchFalse ), + 4*cr7+eq, 0 + ); + fpr_pos -= 2; + break; + + case OP_LTF: + case OP_GEF: + maybeEmitConst(); + in( iFCMPU, cr7, fSECOND, fFIRST ); + emitJump( + i_now->arg.i, + ( i_now->op == OP_LTF ? branchTrue : branchFalse ), + 4*cr7+lt, 0 + ); + fpr_pos -= 2; + break; + + case OP_GTF: + case OP_LEF: + maybeEmitConst(); + in( iFCMPU, cr7, fSECOND, fFIRST ); + emitJump( + i_now->arg.i, + ( i_now->op == OP_GTF ? branchTrue : branchFalse ), + 4*cr7+gt, 0 + ); + fpr_pos -= 2; + break; + + case OP_LOAD1: + maybeEmitConst(); +#if OPTIMIZE_MASK + in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo ); +#else + in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA ); + in( iAND, rFIRST, rFIRST, r0 ); +#endif + in( iLBZX, rFIRST, rFIRST, rDATABASE ); + break; + + case OP_LOAD2: + maybeEmitConst(); +#if OPTIMIZE_MASK + in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo ); +#else + in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA ); + in( iAND, rFIRST, rFIRST, r0 ); +#endif + in( iLHZX, rFIRST, rFIRST, rDATABASE ); + break; + + case OP_LOAD4: + maybeEmitConst(); +#if OPTIMIZE_MASK + in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo ); +#else + in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA ); + in( iAND, rFIRST, rFIRST, r0 ); +#endif + if ( RET_INT ) { + in( iLWZX, rFIRST, rFIRST, rDATABASE ); + } else { + fpr_pos++; + in( iLFSX, fFIRST, rFIRST, rDATABASE ); + gpr_pos--; + } + break; + + case OP_STORE1: + maybeEmitConst(); +#if OPTIMIZE_MASK + in( iRLWINM, rSECOND, rSECOND, 0, fastMaskHi, fastMaskLo ); +#else + in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA ); + in( iAND, rSECOND, rSECOND, r0 ); +#endif + in( iSTBX, rFIRST, rSECOND, rDATABASE ); + gpr_pos -= 2; + break; + + case OP_STORE2: + maybeEmitConst(); +#if OPTIMIZE_MASK + in( iRLWINM, rSECOND, rSECOND, 0, fastMaskHi, fastMaskLo ); +#else + in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA ); + in( iAND, rSECOND, rSECOND, r0 ); +#endif + in( iSTHX, rFIRST, rSECOND, rDATABASE ); + gpr_pos -= 2; + break; + + case OP_STORE4: + maybeEmitConst(); + if ( ARG_INT ) { +#if OPTIMIZE_MASK + in( iRLWINM, rSECOND, rSECOND, 0, fastMaskHi, fastMaskLo ); +#else + in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA ); + in( iAND, rSECOND, rSECOND, r0 ); +#endif + + in( iSTWX, rFIRST, rSECOND, rDATABASE ); + gpr_pos--; + } else { +#if OPTIMIZE_MASK + in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo ); +#else + in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA ); + in( iAND, rFIRST, rFIRST, r0 ); +#endif + + in( iSTFSX, fFIRST, rFIRST, rDATABASE ); + fpr_pos--; + } + gpr_pos--; + break; + + case OP_ARG: + maybeEmitConst(); + in( iADDI, r0, rPSTACK, i_now->arg.b ); + if ( ARG_INT ) { + in( iSTWX, rFIRST, rDATABASE, r0 ); + gpr_pos--; + } else { + in( iSTFSX, fFIRST, rDATABASE, r0 ); + fpr_pos--; + } + break; + + case OP_BLOCK_COPY: + maybeEmitConst(); +#if OPTIMIZE_COPY + if ( i_now->arg.i <= SL( 16, 32 ) ) { + /* block is very short so copy it in-place */ + + unsigned int len = i_now->arg.i; + unsigned int copied = 0, left = len; + + in( iADD, rFIRST, rFIRST, rDATABASE ); + in( iADD, rSECOND, rSECOND, rDATABASE ); + + if ( len >= GPRLEN ) { + long int i, words = len / GPRLEN; + in( iLL, r0, 0, rFIRST ); + for ( i = 1; i < words; i++ ) + in( iLL, rTEMP( i - 1 ), GPRLEN * i, rFIRST ); + + in( iSTL, r0, 0, rSECOND ); + for ( i = 1; i < words; i++ ) + in( iSTL, rTEMP( i - 1 ), GPRLEN * i, rSECOND ); + + copied += words * GPRLEN; + left -= copied; + } + + if ( SL( 0, left >= 4 ) ) { + in( iLWZ, r0, copied+0, rFIRST ); + in( iSTW, r0, copied+0, rSECOND ); + copied += 4; + left -= 4; + } + if ( left >= 4 ) { + DIE("Bug in OP_BLOCK_COPY"); + } + if ( left == 3 ) { + in( iLHZ, r0, copied+0, rFIRST ); + in( iLBZ, rTMP, copied+2, rFIRST ); + in( iSTH, r0, copied+0, rSECOND ); + in( iSTB, rTMP, copied+2, rSECOND ); + } else if ( left == 2 ) { + in( iLHZ, r0, copied+0, rFIRST ); + in( iSTH, r0, copied+0, rSECOND ); + } else if ( left == 1 ) { + in( iLBZ, r0, copied+0, rFIRST ); + in( iSTB, r0, copied+0, rSECOND ); + } + } else +#endif + { + unsigned long int r5_ori = 0; + if ( i_now->arg.si >= -0x8000 && i_now->arg.si < 0x8000 ) { + in( iLI, r5, i_now->arg.si ); + } else if ( i_now->arg.i < 0x10000 ) { + in( iLI, r5, 0 ); + r5_ori = i_now->arg.i; + } else { + in( iLIS, r5, i_now->arg.ss[ 0 ] ); + r5_ori = i_now->arg.us[ 1 ]; + } + + in( iLL, r0, VM_Data_Offset( BlockCopy ), rVMDATA ); // get blockCopy pointer + + if ( r5_ori ) + in( iORI, r5, r5, r5_ori ); + + in( iMTCTR, r0 ); + + if ( rFIRST != r4 ) + in( iMR, r4, rFIRST ); + if ( rSECOND != r3 ) + in( iMR, r3, rSECOND ); + + in( iBCTRL ); + } + + gpr_pos -= 2; + break; + + case OP_SEX8: + maybeEmitConst(); + in( iEXTSB, rFIRST, rFIRST ); + break; + + case OP_SEX16: + maybeEmitConst(); + in( iEXTSH, rFIRST, rFIRST ); + break; + + case OP_NEGI: + maybeEmitConst(); + in( iNEG, rFIRST, rFIRST ); + break; + + case OP_ADD: + if ( i_const ) { + emitFalseConst(); + + signed short int hi, lo; + hi = i_const->arg.ss[ 0 ]; + lo = i_const->arg.ss[ 1 ]; + if ( lo < 0 ) + hi += 1; + + if ( hi != 0 ) + in( iADDIS, rSECOND, rSECOND, hi ); + if ( lo != 0 ) + in( iADDI, rSECOND, rSECOND, lo ); + } else { + in( iADD, rSECOND, rSECOND, rFIRST ); + } + gpr_pos--; + break; + + case OP_SUB: + maybeEmitConst(); + in( iSUB, rSECOND, rSECOND, rFIRST ); + gpr_pos--; + break; + + case OP_DIVI: + maybeEmitConst(); + in( iDIVW, rSECOND, rSECOND, rFIRST ); + gpr_pos--; + break; + + case OP_DIVU: + maybeEmitConst(); + in( iDIVWU, rSECOND, rSECOND, rFIRST ); + gpr_pos--; + break; + + case OP_MODI: + maybeEmitConst(); + in( iDIVW, r0, rSECOND, rFIRST ); + in( iMULLW, r0, r0, rFIRST ); + in( iSUB, rSECOND, rSECOND, r0 ); + gpr_pos--; + break; + + case OP_MODU: + maybeEmitConst(); + in( iDIVWU, r0, rSECOND, rFIRST ); + in( iMULLW, r0, r0, rFIRST ); + in( iSUB, rSECOND, rSECOND, r0 ); + gpr_pos--; + break; + + case OP_MULI: + case OP_MULU: + maybeEmitConst(); + in( iMULLW, rSECOND, rSECOND, rFIRST ); + gpr_pos--; + break; + + case OP_BAND: + maybeEmitConst(); + in( iAND, rSECOND, rSECOND, rFIRST ); + gpr_pos--; + break; + + case OP_BOR: + maybeEmitConst(); + in( iOR, rSECOND, rSECOND, rFIRST ); + gpr_pos--; + break; + + case OP_BXOR: + maybeEmitConst(); + in( iXOR, rSECOND, rSECOND, rFIRST ); + gpr_pos--; + break; + + case OP_BCOM: + maybeEmitConst(); + in( iNOT, rFIRST, rFIRST ); + break; + + case OP_LSH: + maybeEmitConst(); + in( iSLW, rSECOND, rSECOND, rFIRST ); + gpr_pos--; + break; + + case OP_RSHI: + maybeEmitConst(); + in( iSRAW, rSECOND, rSECOND, rFIRST ); + gpr_pos--; + break; + + case OP_RSHU: + maybeEmitConst(); + in( iSRW, rSECOND, rSECOND, rFIRST ); + gpr_pos--; + break; + + case OP_NEGF: + maybeEmitConst(); + in( iFNEG, fFIRST, fFIRST ); + break; + + case OP_ADDF: + maybeEmitConst(); + in( iFADDS, fSECOND, fSECOND, fFIRST ); + fpr_pos--; + break; + + case OP_SUBF: + maybeEmitConst(); + in( iFSUBS, fSECOND, fSECOND, fFIRST ); + fpr_pos--; + break; + + case OP_DIVF: + maybeEmitConst(); + in( iFDIVS, fSECOND, fSECOND, fFIRST ); + fpr_pos--; + break; + + case OP_MULF: + maybeEmitConst(); + in( iFMULS, fSECOND, fSECOND, fFIRST ); + fpr_pos--; + break; + + case OP_CVIF: + maybeEmitConst(); + fpr_pos++; + in( iXORIS, rFIRST, rFIRST, 0x8000 ); + in( iLIS, r0, 0x4330 ); + in( iSTW, rFIRST, stack_temp + 4, r1 ); + in( iSTW, r0, stack_temp, r1 ); + in( iLFS, fTMP, VM_Data_Offset( floatBase ), rVMDATA ); + in( iLFD, fFIRST, stack_temp, r1 ); + in( iFSUB, fFIRST, fFIRST, fTMP ); + in( iFRSP, fFIRST, fFIRST ); + gpr_pos--; + break; + + case OP_CVFI: + maybeEmitConst(); + gpr_pos++; + in( iFCTIWZ, fFIRST, fFIRST ); + in( iSTFD, fFIRST, stack_temp, r1 ); + in( iLWZ, rFIRST, stack_temp + 4, r1 ); + fpr_pos--; + break; + } + + i_const = NULL; + + if ( i_now->op != OP_CONST ) { + // emit the instructions if it isn't OP_CONST + emitEnd(); + } else { + // mark in what register the value should be saved + if ( RET_INT ) + i_now->regPos = ++gpr_pos; + else + i_now->regPos = ++fpr_pos; + +#if OPTIMIZE_HOLE + i_const = i_now; +#else + PPC_EmitConst( i_now ); +#endif + } + } + if ( i_const ) + DIE( "left (unused) OP_CONST" ); + + { + // free opcode information, don't free first dummy one + source_instruction_t *i_next = i_first->next; + while ( i_next ) { + i_now = i_next; + i_next = i_now->next; + VM_Free( i_now ); + } + } +} + + +/* + * check which jumps are short enough to use signed 16bit immediate branch + */ +static void +PPC_ShrinkJumps( void ) +{ + symbolic_jump_t *sj_now = sj_first; + while ( (sj_now = sj_now->nextJump) ) { + if ( sj_now->bo == branchAlways ) + // non-conditional branch has 26bit immediate + sj_now->parent->length = 1; + + else { + dest_instruction_t *di = di_pointers[ sj_now->jump_to ]; + dest_instruction_t *ji = sj_now->parent; + long int jump_length = 0; + if ( ! di ) + DIE( "No instruction to jump to" ); + if ( ji->count > di->count ) { + do { + jump_length += di->length; + } while ( ( di = di->next ) != ji ); + } else { + jump_length = 1; + while ( ( ji = ji->next ) != di ) + jump_length += ji->length; + } + if ( jump_length < 0x2000 ) + // jump is short, use normal instruction + sj_now->parent->length = 1; + } + } +} + +/* + * puts all the data in one place, it consists of many different tasks + */ +static void +PPC_ComputeCode( vm_t *vm ) +{ + dest_instruction_t *di_now = di_first; + + unsigned long int codeInstructions = 0; + // count total instruciton number + while ( (di_now = di_now->next ) ) + codeInstructions += di_now->length; + + size_t codeLength = sizeof( vm_data_t ) + + sizeof( unsigned int ) * data_acc + + sizeof( ppc_instruction_t ) * codeInstructions; + + // get the memory for the generated code, smarter ppcs need the + // mem to be marked as executable (whill change later) + unsigned char *dataAndCode = mmap( NULL, codeLength, + PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0 ); + + if ( ! dataAndCode ) + DIE( "Not enough memory" ); + + ppc_instruction_t *codeNow, *codeBegin; + codeNow = codeBegin = (ppc_instruction_t *)( dataAndCode + VM_Data_Offset( data[ data_acc ] ) ); + + ppc_instruction_t nop = IN( iNOP ); + + // copy instructions to the destination + // fills the jump instructions with nops + // saves pointers of all instructions + di_now = di_first; + while ( (di_now = di_now->next ) ) { + unsigned long int i_count = di_now->i_count; + if ( i_count != FALSE_ICOUNT ) { + if ( ! di_pointers[ i_count ] ) + di_pointers[ i_count ] = (void *) codeNow; + } + + if ( di_now->jump == NULL ) { + memcpy( codeNow, &(di_now->code[0]), di_now->length * sizeof( ppc_instruction_t ) ); + codeNow += di_now->length; + } else { + long int i; + symbolic_jump_t *sj; + for ( i = 0; i < di_now->length; i++ ) + codeNow[ i ] = nop; + codeNow += di_now->length; + + sj = di_now->jump; + // save position of jumping instruction + sj->parent = (void *)(codeNow - 1); + } + } + + // compute the jumps and write corresponding instructions + symbolic_jump_t *sj_now = sj_first; + while ( (sj_now = sj_now->nextJump ) ) { + ppc_instruction_t *jumpFrom = (void *) sj_now->parent; + ppc_instruction_t *jumpTo = (void *) di_pointers[ sj_now->jump_to ]; + signed long int jumpLength = jumpTo - jumpFrom; + + // if jump is short, just write it + if ( jumpLength >= - 8192 && jumpLength < 8192 ) { + powerpc_iname_t branchConditional = sj_now->ext & branchExtLink ? iBCL : iBC; + *jumpFrom = IN( branchConditional, sj_now->bo, sj_now->bi, jumpLength * 4 ); + continue; + } + + // jump isn't short so write it as two instructions + // + // the letter one is a non-conditional branch instruction which + // accepts immediate values big enough (26 bits) + *jumpFrom = IN( (sj_now->ext & branchExtLink ? iBL : iB), jumpLength * 4 ); + if ( sj_now->bo == branchAlways ) + continue; + + // there should have been additional space prepared for this case + if ( jumpFrom[ -1 ] != nop ) + DIE( "additional space for long jump not prepared" ); + + // invert instruction condition + long int bo = 0; + switch ( sj_now->bo ) { + case branchTrue: + bo = branchFalse; + break; + case branchFalse: + bo = branchTrue; + break; + default: + DIE( "unrecognized branch type" ); + break; + } + + // the former instruction is an inverted conditional branch which + // jumps over the non-conditional one + jumpFrom[ -1 ] = IN( iBC, bo, sj_now->bi, +2*4 ); + } + + vm->codeBase = dataAndCode; + vm->codeLength = codeLength; + + vm_data_t *data = (vm_data_t *)dataAndCode; + +#if ELF64 + // prepare Official Procedure Descriptor for the generated code + // and retrieve real function pointer for helper functions + + opd_t *ac = (void *)VM_AsmCall, *bc = (void *)VM_BlockCopy; + data->opd.function = codeBegin; + // trick it into using the same TOC + // this way we won't have to switch TOC before calling AsmCall or BlockCopy + data->opd.toc = ac->toc; + data->opd.env = ac->env; + + data->AsmCall = ac->function; + data->BlockCopy = bc->function; +#else + data->AsmCall = VM_AsmCall; + data->BlockCopy = VM_BlockCopy; +#endif + + data->dataMask = vm->dataMask; + data->iPointers = (ppc_instruction_t *)vm->instructionPointers; + data->dataLength = VM_Data_Offset( data[ data_acc ] ); + data->codeLength = ( codeNow - codeBegin ) * sizeof( ppc_instruction_t ); + data->floatBase = 0x59800004; + + + /* write dynamic data (float constants) */ + { + local_data_t *d_next, *d_now = data_first; + long int accumulated = 0; + + do { + long int i; + for ( i = 0; i < d_now->count; i++ ) + data->data[ accumulated + i ] = d_now->data[ i ]; + + accumulated += d_now->count; + d_next = d_now->next; + VM_Free( d_now ); + + if ( !d_next ) + break; + d_now = d_next; + } while (1); + data_first = NULL; + } + + /* free most of the compilation memory */ + { + di_now = di_first->next; + VM_Free( di_first ); + VM_Free( sj_first ); + + while ( di_now ) { + di_first = di_now->next; + if ( di_now->jump ) + VM_Free( di_now->jump ); + VM_Free( di_now ); + di_now = di_first; + } + } + + return; +} + +static void +VM_Destroy_Compiled( vm_t *self ) +{ + if ( self->codeBase ) { + if ( munmap( self->codeBase, self->codeLength ) ) + Com_Printf( S_COLOR_RED "Memory unmap failed, possible memory leak\n" ); + } + self->codeBase = NULL; +} + +void +VM_Compile( vm_t *vm, vmHeader_t *header ) +{ + long int pc = 0; + unsigned long int i_count; + char* code; + struct timeval tvstart = {0, 0}; + source_instruction_t *i_first /* dummy */, *i_last = NULL, *i_now; + + vm->compiled = qfalse; + + gettimeofday(&tvstart, NULL); + + PPC_MakeFastMask( vm->dataMask ); + + i_first = VM_Malloc( sizeof( source_instruction_t ) ); + i_first->next = NULL; + + di_pointers = (void *)vm->instructionPointers; + memset( di_pointers, 0, header->instructionCount * sizeof( void * ) ); + + + PPC_CompileInit(); + + /* + * read the input program + * divide it into functions and send each function to compiler + */ + code = (char *)header + header->codeOffset; + for ( i_count = 0; i_count < header->instructionCount; ++i_count ) + { + unsigned char op = code[ pc++ ]; + + if ( op == OP_ENTER ) { + if ( i_first->next ) + VM_CompileFunction( i_first ); + i_first->next = NULL; + i_last = i_first; + } + + i_now = VM_Malloc( sizeof( source_instruction_t ) ); + i_now->op = op; + i_now->i_count = i_count; + i_now->arg.i = 0; + i_now->regA1 = 0; + i_now->regA2 = 0; + i_now->regR = 0; + i_now->regPos = 0; + i_now->next = NULL; + + if ( vm_opInfo[op] & opImm4 ) { + union { + unsigned char b[4]; + unsigned int i; + } c = { { code[ pc + 3 ], code[ pc + 2 ], code[ pc + 1 ], code[ pc + 0 ] }, }; + + i_now->arg.i = c.i; + pc += 4; + } else if ( vm_opInfo[op] & opImm1 ) { + i_now->arg.b = code[ pc++ ]; + } + + i_last->next = i_now; + i_last = i_now; + } + VM_CompileFunction( i_first ); + VM_Free( i_first ); + + PPC_ShrinkJumps(); + memset( di_pointers, 0, header->instructionCount * sizeof( void * ) ); + PPC_ComputeCode( vm ); + + /* check for uninitialized pointers */ + long int i; + for ( i = 0; i < header->instructionCount; i++ ) + if ( vm->instructionPointers[ i ] == 0 ) + Com_Printf( S_COLOR_RED "Pointer %ld not initialized !\n", i ); + + /* mark memory as executable and not writeable */ + if ( mprotect( vm->codeBase, vm->codeLength, PROT_READ|PROT_EXEC ) ) { + + // it has failed, make sure memory is unmapped before throwing the error + VM_Destroy_Compiled( vm ); + DIE( "mprotect failed" ); + } + + vm->destroy = VM_Destroy_Compiled; + vm->compiled = qtrue; + + { + struct timeval tvdone = {0, 0}; + struct timeval dur = {0, 0}; + Com_Printf( "VM file %s compiled to %i bytes of code (%p - %p)\n", + vm->name, vm->codeLength, vm->codeBase, vm->codeBase+vm->codeLength ); + + gettimeofday(&tvdone, NULL); + timersub(&tvdone, &tvstart, &dur); + Com_Printf( "compilation took %lu.%06lu seconds\n", + (long unsigned int)dur.tv_sec, (long unsigned int)dur.tv_usec ); + } +} + +int +VM_CallCompiled( vm_t *vm, int *args ) +{ + int retVal; + int *argPointer; + + vm_data_t *vm_dataAndCode = (void *)( vm->codeBase ); + int programStack = vm->programStack; + int stackOnEntry = programStack; + + byte *image = vm->dataBase; + + currentVM = vm; + + vm->currentlyInterpreting = qtrue; + + programStack -= 48; + argPointer = (int *)&image[ programStack + 8 ]; + memcpy( argPointer, args, 4 * 9 ); + argPointer[ -1 ] = 0; + argPointer[ -2 ] = -1; + +#ifdef VM_TIMES + struct tms start_time, stop_time; + clock_t time_diff; + + times( &start_time ); + time_outside_vm = 0; +#endif + + /* call generated code */ + { + int ( *entry )( void *, int, void * ); +#ifdef __PPC64__ + entry = (void *)&(vm_dataAndCode->opd); +#else + entry = (void *)(vm->codeBase + vm_dataAndCode->dataLength); +#endif + retVal = entry( vm->codeBase, programStack, vm->dataBase ); + } + +#ifdef VM_TIMES + times( &stop_time ); + time_diff = stop_time.tms_utime - start_time.tms_utime; + time_total_vm += time_diff - time_outside_vm; + if ( time_diff > 100 ) { + printf( "App clock: %ld, vm total: %ld, vm this: %ld, vm real: %ld, vm out: %ld\n" + "Inside VM %f%% of app time\n", + stop_time.tms_utime, + time_total_vm, + time_diff, + time_diff - time_outside_vm, + time_outside_vm, + (double)100 * time_total_vm / stop_time.tms_utime ); + } +#endif + + vm->programStack = stackOnEntry; + vm->currentlyInterpreting = qfalse; + + return retVal; +} diff --git a/code/qcommon/vm_powerpc_asm.c b/code/qcommon/vm_powerpc_asm.c new file mode 100644 index 0000000..64c5977 --- /dev/null +++ b/code/qcommon/vm_powerpc_asm.c @@ -0,0 +1,1009 @@ +/* +=========================================================================== +Copyright (C) 2008 Przemyslaw Iskra <sparky@pld-linux.org> + +This file is part of Quake III Arena source code. + +Quake III Arena source code is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Quake III Arena source code is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Quake III Arena source code; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +=========================================================================== + + * File includes code from GNU binutils, exactly: + * - include/opcode/ppc.h - licensed under GPL v1 or later + * - opcodes/ppc-opc.c - licensed under GPL v2 or later + * + * ppc.h -- Header file for PowerPC opcode table + * Copyright 1994, 1995, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + * 2007 Free Software Foundation, Inc. + * Written by Ian Lance Taylor, Cygnus Suppor + * + * This file is part of GDB, GAS, and the GNU binutils. + * + * ppc-opc.c -- PowerPC opcode list + * Copyright 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004, + * 2005, 2006, 2007 Free Software Foundation, Inc. + * Written by Ian Lance Taylor, Cygnus Support + * + * This file is part of GDB, GAS, and the GNU binutils. + * + */ + +#include "vm_powerpc_asm.h" + +#include <string.h> +#include <stdio.h> +#include <inttypes.h> + +/* return nop on error */ +#define ASM_ERROR_OPC (0x60000000) + +/* + * BEGIN OF ppc.h + */ + +#define ppc_cpu_t int + +struct powerpc_opcode +{ + const char *name; + unsigned long opcode; + unsigned long mask; + ppc_cpu_t flags; + unsigned char operands[8]; +}; + +static const struct powerpc_opcode powerpc_opcodes[]; +static const int powerpc_num_opcodes; + +#define PPC_OPCODE_PPC 1 +#define PPC_OPCODE_POWER 2 +#define PPC_OPCODE_POWER2 4 +#define PPC_OPCODE_32 8 +#define PPC_OPCODE_64 0x10 +#define PPC_OPCODE_601 0x20 +#define PPC_OPCODE_COMMON 0x40 +#define PPC_OPCODE_ANY 0x80 +#define PPC_OPCODE_64_BRIDGE 0x100 +#define PPC_OPCODE_ALTIVEC 0x200 +#define PPC_OPCODE_403 0x400 +#define PPC_OPCODE_BOOKE 0x800 +#define PPC_OPCODE_BOOKE64 0x1000 +#define PPC_OPCODE_440 0x2000 +#define PPC_OPCODE_POWER4 0x4000 +#define PPC_OPCODE_NOPOWER4 0x8000 +#define PPC_OPCODE_CLASSIC 0x10000 +#define PPC_OPCODE_SPE 0x20000 +#define PPC_OPCODE_ISEL 0x40000 +#define PPC_OPCODE_EFS 0x80000 +#define PPC_OPCODE_BRLOCK 0x100000 +#define PPC_OPCODE_PMR 0x200000 +#define PPC_OPCODE_CACHELCK 0x400000 +#define PPC_OPCODE_RFMCI 0x800000 +#define PPC_OPCODE_POWER5 0x1000000 +#define PPC_OPCODE_E300 0x2000000 +#define PPC_OPCODE_POWER6 0x4000000 +#define PPC_OPCODE_CELL 0x8000000 +#define PPC_OPCODE_PPCPS 0x10000000 +#define PPC_OPCODE_E500MC 0x20000000 +#define PPC_OPCODE_405 0x40000000 +#define PPC_OPCODE_VSX 0x80000000 + +#define PPC_OP(i) (((i) >> 26) & 0x3f) + +struct powerpc_operand +{ + unsigned int bitm; + int shift; + unsigned long (*insert) + (unsigned long, long, int, const char **); + unsigned long flags; +}; + +static const struct powerpc_operand powerpc_operands[]; +static const unsigned int num_powerpc_operands; + +#define PPC_OPERAND_SIGNED (0x1) +#define PPC_OPERAND_SIGNOPT (0x2) +#define PPC_OPERAND_FAKE (0x4) +#define PPC_OPERAND_PARENS (0x8) +#define PPC_OPERAND_CR (0x10) +#define PPC_OPERAND_GPR (0x20) +#define PPC_OPERAND_GPR_0 (0x40) +#define PPC_OPERAND_FPR (0x80) +#define PPC_OPERAND_RELATIVE (0x100) +#define PPC_OPERAND_ABSOLUTE (0x200) +#define PPC_OPERAND_OPTIONAL (0x400) +#define PPC_OPERAND_NEXT (0x800) +#define PPC_OPERAND_NEGATIVE (0x1000) +#define PPC_OPERAND_VR (0x2000) +#define PPC_OPERAND_DS (0x4000) +#define PPC_OPERAND_DQ (0x8000) +#define PPC_OPERAND_PLUS1 (0x10000) +#define PPC_OPERAND_FSL (0x20000) +#define PPC_OPERAND_FCR (0x40000) +#define PPC_OPERAND_UDI (0x80000) +#define PPC_OPERAND_VSR (0x100000) + +/* + * END OF ppc.h + */ + +#define PPC_DEST_ARCH PPC_OPCODE_PPC + +ppc_instruction_t +asm_instruction( powerpc_iname_t sname, const int argc, const long int *argv ) +{ + const char *errmsg = NULL; + const char *name; + unsigned long int ret; + const struct powerpc_opcode *opcode = NULL; + int argi, argj; + + opcode = &powerpc_opcodes[ sname ]; + name = opcode->name; + + if ( ! opcode ) { + printf( "Can't find opcode %d\n", sname ); + return ASM_ERROR_OPC; + } + if ( ( opcode->flags & PPC_DEST_ARCH ) != PPC_DEST_ARCH ) { + printf( "opcode %s not defined for this arch\n", name ); + return ASM_ERROR_OPC; + } + + ret = opcode->opcode; + + argi = argj = 0; + while ( opcode->operands[ argi ] != 0 ) { + long int op = 0; + const struct powerpc_operand *operand = &powerpc_operands[ opcode->operands[ argi ] ]; + + if ( ! (operand->flags & PPC_OPERAND_FAKE) ) { + if ( argj >= argc ) { + printf( "Not enough arguments for %s, got %d\n", name, argc ); + return ASM_ERROR_OPC; + } + + op = argv[ argj++ ]; + } + + if ( operand->insert ) { + errmsg = NULL; + ret = operand->insert( ret, op, PPC_DEST_ARCH, &errmsg ); + if ( errmsg ) { + printf( "%s: error while inserting operand %d (0x%.2lx): %s\n", + name, argi, op, errmsg ); + } + } else { + unsigned long int opu = *(unsigned long int *)&op; + unsigned long int bitm = operand->bitm; + unsigned long int bitm_full = bitm | ( bitm & 1 ? 0 : 0xf ); + + if ( operand->flags & PPC_OPERAND_SIGNED ) { + bitm_full >>= 1; + + if ( ( opu & ~bitm_full ) != 0 && ( opu | bitm_full ) != -1 ) + printf( "%s: signed operand nr.%d to wide. op: %.8lx, mask: %.8lx\n", + name, argi, opu, bitm ); + } else { + if ( ( opu & ~bitm_full ) != 0 ) + printf( "%s: unsigned operand nr.%d to wide. op: %.8lx, mask: %.8lx\n", + name, argi, opu, bitm ); + } + if ( (bitm & 1) == 0 ) { + if ( opu & 0xf & ~bitm ) + printf( "%s: operand nr.%d not aligned correctly. op: %.8lx, mask: %.8lx\n", + name, argi, opu, bitm ); + } + + ret |= ( op & operand->bitm ) << operand->shift; + } + argi++; + } + if ( argc > argj ) { + printf( "Too many arguments for %s, got %d\n", name, argc ); + return ASM_ERROR_OPC; + } + + return ret; +} + + +/* + * BEGIN OF ppc-opc.c + */ + +#define ATTRIBUTE_UNUSED +#define _(x) (x) + +/* Local insertion and extraction functions. */ + +static unsigned long insert_bdm (unsigned long, long, int, const char **); +static unsigned long insert_bo (unsigned long, long, int, const char **); +static unsigned long insert_ras (unsigned long, long, int, const char **); +static unsigned long insert_rbs (unsigned long, long, int, const char **); + +/* The operands table. + + The fields are bitm, shift, insert, extract, flags. + */ + +static const struct powerpc_operand powerpc_operands[] = +{ + /* The zero index is used to indicate the end of the list of + operands. */ +#define UNUSED 0 + { 0, 0, NULL, 0 }, + + /* The BA field in an XL form instruction. */ +#define BA UNUSED + 1 + /* The BI field in a B form or XL form instruction. */ +#define BI BA +#define BI_MASK (0x1f << 16) + { 0x1f, 16, NULL, PPC_OPERAND_CR }, + + /* The BD field in a B form instruction. The lower two bits are + forced to zero. */ +#define BD BA + 1 + { 0xfffc, 0, NULL, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED }, + + /* The BD field in a B form instruction when the - modifier is used. + This sets the y bit of the BO field appropriately. */ +#define BDM BD + 1 + { 0xfffc, 0, insert_bdm, + PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED }, + + /* The BF field in an X or XL form instruction. */ +#define BF BDM + 1 + /* The CRFD field in an X form instruction. */ +#define CRFD BF + { 0x7, 23, NULL, PPC_OPERAND_CR }, + + /* An optional BF field. This is used for comparison instructions, + in which an omitted BF field is taken as zero. */ +#define OBF BF + 1 + { 0x7, 23, NULL, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL }, + + /* The BO field in a B form instruction. Certain values are + illegal. */ +#define BO OBF + 1 +#define BO_MASK (0x1f << 21) + { 0x1f, 21, insert_bo, 0 }, + + /* The condition register number portion of the BI field in a B form + or XL form instruction. This is used for the extended + conditional branch mnemonics, which set the lower two bits of the + BI field. This field is optional. */ +#define CR BO + 1 + { 0x7, 18, NULL, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL }, + + /* The D field in a D form instruction. This is a displacement off + a register, and implies that the next operand is a register in + parentheses. */ +#define D CR + 1 + { 0xffff, 0, NULL, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED }, + + /* The DS field in a DS form instruction. This is like D, but the + lower two bits are forced to zero. */ +#define DS D + 1 + { 0xfffc, 0, NULL, + PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED | PPC_OPERAND_DS }, + + /* The FRA field in an X or A form instruction. */ +#define FRA DS + 1 +#define FRA_MASK (0x1f << 16) + { 0x1f, 16, NULL, PPC_OPERAND_FPR }, + + /* The FRB field in an X or A form instruction. */ +#define FRB FRA + 1 +#define FRB_MASK (0x1f << 11) + { 0x1f, 11, NULL, PPC_OPERAND_FPR }, + + /* The FRC field in an A form instruction. */ +#define FRC FRB + 1 +#define FRC_MASK (0x1f << 6) + { 0x1f, 6, NULL, PPC_OPERAND_FPR }, + + /* The FRS field in an X form instruction or the FRT field in a D, X + or A form instruction. */ +#define FRS FRC + 1 +#define FRT FRS + { 0x1f, 21, NULL, PPC_OPERAND_FPR }, + + /* The LI field in an I form instruction. The lower two bits are + forced to zero. */ +#define LI FRS + 1 + { 0x3fffffc, 0, NULL, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED }, + + /* The ME field in an M form instruction. */ +#define ME LI + 1 +#define ME_MASK (0x1f << 1) + { 0x1f, 1, NULL, 0 }, + + /* The MB and ME fields in an M form instruction expressed a single + operand which is a bitmask indicating which bits to select. This + is a two operand form using PPC_OPERAND_NEXT. See the + description in opcode/ppc.h for what this means. */ +#define MBE ME + 1 + { 0x1f, 6, NULL, PPC_OPERAND_OPTIONAL | PPC_OPERAND_NEXT }, + + /* The RA field in an D, DS, DQ, X, XO, M, or MDS form instruction. */ +#define RA MBE + 1 +#define RA_MASK (0x1f << 16) + { 0x1f, 16, NULL, PPC_OPERAND_GPR }, + + /* As above, but 0 in the RA field means zero, not r0. */ +#define RA0 RA + 1 + { 0x1f, 16, NULL, PPC_OPERAND_GPR_0 }, + + /* The RA field in a D or X form instruction which is an updating + store or an updating floating point load, which means that the RA + field may not be zero. */ +#define RAS RA0 + 1 + { 0x1f, 16, insert_ras, PPC_OPERAND_GPR_0 }, + + /* The RB field in an X, XO, M, or MDS form instruction. */ +#define RB RAS + 1 +#define RB_MASK (0x1f << 11) + { 0x1f, 11, NULL, PPC_OPERAND_GPR }, + + /* The RB field in an X form instruction when it must be the same as + the RS field in the instruction. This is used for extended + mnemonics like mr. */ +#define RBS RB + 1 + { 0x1f, 11, insert_rbs, PPC_OPERAND_FAKE }, + + /* The RS field in a D, DS, X, XFX, XS, M, MD or MDS form + instruction or the RT field in a D, DS, X, XFX or XO form + instruction. */ +#define RS RBS + 1 +#define RT RS +#define RT_MASK (0x1f << 21) + { 0x1f, 21, NULL, PPC_OPERAND_GPR }, + + /* The SH field in an X or M form instruction. */ +#define SH RS + 1 +#define SH_MASK (0x1f << 11) + /* The other UIMM field in a EVX form instruction. */ +#define EVUIMM SH + { 0x1f, 11, NULL, 0 }, + + /* The SI field in a D form instruction. */ +#define SI SH + 1 + { 0xffff, 0, NULL, PPC_OPERAND_SIGNED }, + + /* The UI field in a D form instruction. */ +#define UI SI + 1 + { 0xffff, 0, NULL, 0 }, + +}; + +static const unsigned int num_powerpc_operands = + (sizeof (powerpc_operands) / sizeof (powerpc_operands[0])); + +/* The functions used to insert and extract complicated operands. */ + +/* The BD field in a B form instruction when the - modifier is used. + This modifier means that the branch is not expected to be taken. + For chips built to versions of the architecture prior to version 2 + (ie. not Power4 compatible), we set the y bit of the BO field to 1 + if the offset is negative. When extracting, we require that the y + bit be 1 and that the offset be positive, since if the y bit is 0 + we just want to print the normal form of the instruction. + Power4 compatible targets use two bits, "a", and "t", instead of + the "y" bit. "at" == 00 => no hint, "at" == 01 => unpredictable, + "at" == 10 => not taken, "at" == 11 => taken. The "t" bit is 00001 + in BO field, the "a" bit is 00010 for branch on CR(BI) and 01000 + for branch on CTR. We only handle the taken/not-taken hint here. + Note that we don't relax the conditions tested here when + disassembling with -Many because insns using extract_bdm and + extract_bdp always occur in pairs. One or the other will always + be valid. */ + +static unsigned long +insert_bdm (unsigned long insn, + long value, + int dialect, + const char **errmsg ATTRIBUTE_UNUSED) +{ + if ((dialect & PPC_OPCODE_POWER4) == 0) + { + if ((value & 0x8000) != 0) + insn |= 1 << 21; + } + else + { + if ((insn & (0x14 << 21)) == (0x04 << 21)) + insn |= 0x02 << 21; + else if ((insn & (0x14 << 21)) == (0x10 << 21)) + insn |= 0x08 << 21; + } + return insn | (value & 0xfffc); +} + + +/* Check for legal values of a BO field. */ + +static int +valid_bo (long value, int dialect, int extract) +{ + if ((dialect & PPC_OPCODE_POWER4) == 0) + { + int valid; + /* Certain encodings have bits that are required to be zero. + These are (z must be zero, y may be anything): + 001zy + 011zy + 1z00y + 1z01y + 1z1zz + */ + switch (value & 0x14) + { + default: + case 0: + valid = 1; + break; + case 0x4: + valid = (value & 0x2) == 0; + break; + case 0x10: + valid = (value & 0x8) == 0; + break; + case 0x14: + valid = value == 0x14; + break; + } + /* When disassembling with -Many, accept power4 encodings too. */ + if (valid + || (dialect & PPC_OPCODE_ANY) == 0 + || !extract) + return valid; + } + + /* Certain encodings have bits that are required to be zero. + These are (z must be zero, a & t may be anything): + 0000z + 0001z + 0100z + 0101z + 001at + 011at + 1a00t + 1a01t + 1z1zz + */ + if ((value & 0x14) == 0) + return (value & 0x1) == 0; + else if ((value & 0x14) == 0x14) + return value == 0x14; + else + return 1; +} + +/* The BO field in a B form instruction. Warn about attempts to set + the field to an illegal value. */ + +static unsigned long +insert_bo (unsigned long insn, + long value, + int dialect, + const char **errmsg) +{ + if (!valid_bo (value, dialect, 0)) + *errmsg = _("invalid conditional option"); + return insn | ((value & 0x1f) << 21); +} + +/* The RA field in a D or X form instruction which is an updating + store or an updating floating point load, which means that the RA + field may not be zero. */ + +static unsigned long +insert_ras (unsigned long insn, + long value, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg) +{ + if (value == 0) + *errmsg = _("invalid register operand when updating"); + return insn | ((value & 0x1f) << 16); +} + +/* The RB field in an X form instruction when it must be the same as + the RS field in the instruction. This is used for extended + mnemonics like mr. This operand is marked FAKE. The insertion + function just copies the BT field into the BA field, and the + extraction function just checks that the fields are the same. */ + +static unsigned long +insert_rbs (unsigned long insn, + long value ATTRIBUTE_UNUSED, + int dialect ATTRIBUTE_UNUSED, + const char **errmsg ATTRIBUTE_UNUSED) +{ + return insn | (((insn >> 21) & 0x1f) << 11); +} + + +/* Macros used to form opcodes. */ + +/* The main opcode. */ +#define OP(x) ((((unsigned long)(x)) & 0x3f) << 26) +#define OP_MASK OP (0x3f) + +/* The main opcode combined with a trap code in the TO field of a D + form instruction. Used for extended mnemonics for the trap + instructions. */ +#define OPTO(x,to) (OP (x) | ((((unsigned long)(to)) & 0x1f) << 21)) +#define OPTO_MASK (OP_MASK | TO_MASK) + +/* The main opcode combined with a comparison size bit in the L field + of a D form or X form instruction. Used for extended mnemonics for + the comparison instructions. */ +#define OPL(x,l) (OP (x) | ((((unsigned long)(l)) & 1) << 21)) +#define OPL_MASK OPL (0x3f,1) + +/* An A form instruction. */ +#define A(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 1) | (((unsigned long)(rc)) & 1)) +#define A_MASK A (0x3f, 0x1f, 1) + +/* An A_MASK with the FRB field fixed. */ +#define AFRB_MASK (A_MASK | FRB_MASK) + +/* An A_MASK with the FRC field fixed. */ +#define AFRC_MASK (A_MASK | FRC_MASK) + +/* An A_MASK with the FRA and FRC fields fixed. */ +#define AFRAFRC_MASK (A_MASK | FRA_MASK | FRC_MASK) + +/* An AFRAFRC_MASK, but with L bit clear. */ +#define AFRALFRC_MASK (AFRAFRC_MASK & ~((unsigned long) 1 << 16)) + +/* A B form instruction. */ +#define B(op, aa, lk) (OP (op) | ((((unsigned long)(aa)) & 1) << 1) | ((lk) & 1)) +#define B_MASK B (0x3f, 1, 1) + +/* A B form instruction setting the BO field. */ +#define BBO(op, bo, aa, lk) (B ((op), (aa), (lk)) | ((((unsigned long)(bo)) & 0x1f) << 21)) +#define BBO_MASK BBO (0x3f, 0x1f, 1, 1) + +/* A BBO_MASK with the y bit of the BO field removed. This permits + matching a conditional branch regardless of the setting of the y + bit. Similarly for the 'at' bits used for power4 branch hints. */ +#define Y_MASK (((unsigned long) 1) << 21) +#define AT1_MASK (((unsigned long) 3) << 21) +#define AT2_MASK (((unsigned long) 9) << 21) +#define BBOY_MASK (BBO_MASK &~ Y_MASK) +#define BBOAT_MASK (BBO_MASK &~ AT1_MASK) + +/* A B form instruction setting the BO field and the condition bits of + the BI field. */ +#define BBOCB(op, bo, cb, aa, lk) \ + (BBO ((op), (bo), (aa), (lk)) | ((((unsigned long)(cb)) & 0x3) << 16)) +#define BBOCB_MASK BBOCB (0x3f, 0x1f, 0x3, 1, 1) + +/* A BBOCB_MASK with the y bit of the BO field removed. */ +#define BBOYCB_MASK (BBOCB_MASK &~ Y_MASK) +#define BBOATCB_MASK (BBOCB_MASK &~ AT1_MASK) +#define BBOAT2CB_MASK (BBOCB_MASK &~ AT2_MASK) + +/* A BBOYCB_MASK in which the BI field is fixed. */ +#define BBOYBI_MASK (BBOYCB_MASK | BI_MASK) +#define BBOATBI_MASK (BBOAT2CB_MASK | BI_MASK) + +/* An Context form instruction. */ +#define CTX(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x7)) +#define CTX_MASK CTX(0x3f, 0x7) + +/* An User Context form instruction. */ +#define UCTX(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x1f)) +#define UCTX_MASK UCTX(0x3f, 0x1f) + +/* The main opcode mask with the RA field clear. */ +#define DRA_MASK (OP_MASK | RA_MASK) + +/* A DS form instruction. */ +#define DSO(op, xop) (OP (op) | ((xop) & 0x3)) +#define DS_MASK DSO (0x3f, 3) + +/* A DE form instruction. */ +#define DEO(op, xop) (OP (op) | ((xop) & 0xf)) +#define DE_MASK DEO (0x3e, 0xf) + +/* An EVSEL form instruction. */ +#define EVSEL(op, xop) (OP (op) | (((unsigned long)(xop)) & 0xff) << 3) +#define EVSEL_MASK EVSEL(0x3f, 0xff) + +/* An M form instruction. */ +#define M(op, rc) (OP (op) | ((rc) & 1)) +#define M_MASK M (0x3f, 1) + +/* An M form instruction with the ME field specified. */ +#define MME(op, me, rc) (M ((op), (rc)) | ((((unsigned long)(me)) & 0x1f) << 1)) + +/* An M_MASK with the MB and ME fields fixed. */ +#define MMBME_MASK (M_MASK | MB_MASK | ME_MASK) + +/* An M_MASK with the SH and ME fields fixed. */ +#define MSHME_MASK (M_MASK | SH_MASK | ME_MASK) + +/* An MD form instruction. */ +#define MD(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x7) << 2) | ((rc) & 1)) +#define MD_MASK MD (0x3f, 0x7, 1) + +/* An MD_MASK with the MB field fixed. */ +#define MDMB_MASK (MD_MASK | MB6_MASK) + +/* An MD_MASK with the SH field fixed. */ +#define MDSH_MASK (MD_MASK | SH6_MASK) + +/* An MDS form instruction. */ +#define MDS(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0xf) << 1) | ((rc) & 1)) +#define MDS_MASK MDS (0x3f, 0xf, 1) + +/* An MDS_MASK with the MB field fixed. */ +#define MDSMB_MASK (MDS_MASK | MB6_MASK) + +/* An SC form instruction. */ +#define SC(op, sa, lk) (OP (op) | ((((unsigned long)(sa)) & 1) << 1) | ((lk) & 1)) +#define SC_MASK (OP_MASK | (((unsigned long)0x3ff) << 16) | (((unsigned long)1) << 1) | 1) + +/* An VX form instruction. */ +#define VX(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x7ff)) + +/* The mask for an VX form instruction. */ +#define VX_MASK VX(0x3f, 0x7ff) + +/* An VA form instruction. */ +#define VXA(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x03f)) + +/* The mask for an VA form instruction. */ +#define VXA_MASK VXA(0x3f, 0x3f) + +/* An VXR form instruction. */ +#define VXR(op, xop, rc) (OP (op) | (((rc) & 1) << 10) | (((unsigned long)(xop)) & 0x3ff)) + +/* The mask for a VXR form instruction. */ +#define VXR_MASK VXR(0x3f, 0x3ff, 1) + +/* An X form instruction. */ +#define X(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x3ff) << 1)) + +/* A Z form instruction. */ +#define Z(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x1ff) << 1)) + +/* An X form instruction with the RC bit specified. */ +#define XRC(op, xop, rc) (X ((op), (xop)) | ((rc) & 1)) + +/* A Z form instruction with the RC bit specified. */ +#define ZRC(op, xop, rc) (Z ((op), (xop)) | ((rc) & 1)) + +/* The mask for an X form instruction. */ +#define X_MASK XRC (0x3f, 0x3ff, 1) + +/* The mask for a Z form instruction. */ +#define Z_MASK ZRC (0x3f, 0x1ff, 1) +#define Z2_MASK ZRC (0x3f, 0xff, 1) + +/* An X_MASK with the RA field fixed. */ +#define XRA_MASK (X_MASK | RA_MASK) + +/* An XRA_MASK with the W field clear. */ +#define XWRA_MASK (XRA_MASK & ~((unsigned long) 1 << 16)) + +/* An X_MASK with the RB field fixed. */ +#define XRB_MASK (X_MASK | RB_MASK) + +/* An X_MASK with the RT field fixed. */ +#define XRT_MASK (X_MASK | RT_MASK) + +/* An XRT_MASK mask with the L bits clear. */ +#define XLRT_MASK (XRT_MASK & ~((unsigned long) 0x3 << 21)) + +/* An X_MASK with the RA and RB fields fixed. */ +#define XRARB_MASK (X_MASK | RA_MASK | RB_MASK) + +/* An XRARB_MASK, but with the L bit clear. */ +#define XRLARB_MASK (XRARB_MASK & ~((unsigned long) 1 << 16)) + +/* An X_MASK with the RT and RA fields fixed. */ +#define XRTRA_MASK (X_MASK | RT_MASK | RA_MASK) + +/* An XRTRA_MASK, but with L bit clear. */ +#define XRTLRA_MASK (XRTRA_MASK & ~((unsigned long) 1 << 21)) + +/* An X form instruction with the L bit specified. */ +#define XOPL(op, xop, l) (X ((op), (xop)) | ((((unsigned long)(l)) & 1) << 21)) + +/* The mask for an X form comparison instruction. */ +#define XCMP_MASK (X_MASK | (((unsigned long)1) << 22)) + +/* The mask for an X form comparison instruction with the L field + fixed. */ +#define XCMPL_MASK (XCMP_MASK | (((unsigned long)1) << 21)) + +/* An X form trap instruction with the TO field specified. */ +#define XTO(op, xop, to) (X ((op), (xop)) | ((((unsigned long)(to)) & 0x1f) << 21)) +#define XTO_MASK (X_MASK | TO_MASK) + +/* An X form tlb instruction with the SH field specified. */ +#define XTLB(op, xop, sh) (X ((op), (xop)) | ((((unsigned long)(sh)) & 0x1f) << 11)) +#define XTLB_MASK (X_MASK | SH_MASK) + +/* An X form sync instruction. */ +#define XSYNC(op, xop, l) (X ((op), (xop)) | ((((unsigned long)(l)) & 3) << 21)) + +/* An X form sync instruction with everything filled in except the LS field. */ +#define XSYNC_MASK (0xff9fffff) + +/* An X_MASK, but with the EH bit clear. */ +#define XEH_MASK (X_MASK & ~((unsigned long )1)) + +/* An X form AltiVec dss instruction. */ +#define XDSS(op, xop, a) (X ((op), (xop)) | ((((unsigned long)(a)) & 1) << 25)) +#define XDSS_MASK XDSS(0x3f, 0x3ff, 1) + +/* An XFL form instruction. */ +#define XFL(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x3ff) << 1) | (((unsigned long)(rc)) & 1)) +#define XFL_MASK XFL (0x3f, 0x3ff, 1) + +/* An X form isel instruction. */ +#define XISEL(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 1)) +#define XISEL_MASK XISEL(0x3f, 0x1f) + +/* An XL form instruction with the LK field set to 0. */ +#define XL(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x3ff) << 1)) + +/* An XL form instruction which uses the LK field. */ +#define XLLK(op, xop, lk) (XL ((op), (xop)) | ((lk) & 1)) + +/* The mask for an XL form instruction. */ +#define XL_MASK XLLK (0x3f, 0x3ff, 1) + +/* An XL form instruction which explicitly sets the BO field. */ +#define XLO(op, bo, xop, lk) \ + (XLLK ((op), (xop), (lk)) | ((((unsigned long)(bo)) & 0x1f) << 21)) +#define XLO_MASK (XL_MASK | BO_MASK) + +/* An XL form instruction which explicitly sets the y bit of the BO + field. */ +#define XLYLK(op, xop, y, lk) (XLLK ((op), (xop), (lk)) | ((((unsigned long)(y)) & 1) << 21)) +#define XLYLK_MASK (XL_MASK | Y_MASK) + +/* An XL form instruction which sets the BO field and the condition + bits of the BI field. */ +#define XLOCB(op, bo, cb, xop, lk) \ + (XLO ((op), (bo), (xop), (lk)) | ((((unsigned long)(cb)) & 3) << 16)) +#define XLOCB_MASK XLOCB (0x3f, 0x1f, 0x3, 0x3ff, 1) + +#define BB_MASK (0x1f << 11) +/* An XL_MASK or XLYLK_MASK or XLOCB_MASK with the BB field fixed. */ +#define XLBB_MASK (XL_MASK | BB_MASK) +#define XLYBB_MASK (XLYLK_MASK | BB_MASK) +#define XLBOCBBB_MASK (XLOCB_MASK | BB_MASK) + +/* A mask for branch instructions using the BH field. */ +#define XLBH_MASK (XL_MASK | (0x1c << 11)) + +/* An XL_MASK with the BO and BB fields fixed. */ +#define XLBOBB_MASK (XL_MASK | BO_MASK | BB_MASK) + +/* An XL_MASK with the BO, BI and BB fields fixed. */ +#define XLBOBIBB_MASK (XL_MASK | BO_MASK | BI_MASK | BB_MASK) + +/* An XO form instruction. */ +#define XO(op, xop, oe, rc) \ + (OP (op) | ((((unsigned long)(xop)) & 0x1ff) << 1) | ((((unsigned long)(oe)) & 1) << 10) | (((unsigned long)(rc)) & 1)) +#define XO_MASK XO (0x3f, 0x1ff, 1, 1) + +/* An XO_MASK with the RB field fixed. */ +#define XORB_MASK (XO_MASK | RB_MASK) + +/* An XS form instruction. */ +#define XS(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x1ff) << 2) | (((unsigned long)(rc)) & 1)) +#define XS_MASK XS (0x3f, 0x1ff, 1) + +/* A mask for the FXM version of an XFX form instruction. */ +#define XFXFXM_MASK (X_MASK | (1 << 11) | (1 << 20)) + +/* An XFX form instruction with the FXM field filled in. */ +#define XFXM(op, xop, fxm, p4) \ + (X ((op), (xop)) | ((((unsigned long)(fxm)) & 0xff) << 12) \ + | ((unsigned long)(p4) << 20)) + +#define SPR_MASK (0x3ff << 11) +/* An XFX form instruction with the SPR field filled in. */ +#define XSPR(op, xop, spr) \ + (X ((op), (xop)) | ((((unsigned long)(spr)) & 0x1f) << 16) | ((((unsigned long)(spr)) & 0x3e0) << 6)) +#define XSPR_MASK (X_MASK | SPR_MASK) + +/* An XFX form instruction with the SPR field filled in except for the + SPRBAT field. */ +#define XSPRBAT_MASK (XSPR_MASK &~ SPRBAT_MASK) + +/* An XFX form instruction with the SPR field filled in except for the + SPRG field. */ +#define XSPRG_MASK (XSPR_MASK & ~(0x1f << 16)) + +/* An X form instruction with everything filled in except the E field. */ +#define XE_MASK (0xffff7fff) + +/* An X form user context instruction. */ +#define XUC(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x1f)) +#define XUC_MASK XUC(0x3f, 0x1f) + +/* The BO encodings used in extended conditional branch mnemonics. */ +#define BODNZF (0x0) +#define BODNZFP (0x1) +#define BODZF (0x2) +#define BODZFP (0x3) +#define BODNZT (0x8) +#define BODNZTP (0x9) +#define BODZT (0xa) +#define BODZTP (0xb) + +#define BOF (0x4) +#define BOFP (0x5) +#define BOFM4 (0x6) +#define BOFP4 (0x7) +#define BOT (0xc) +#define BOTP (0xd) +#define BOTM4 (0xe) +#define BOTP4 (0xf) + +#define BODNZ (0x10) +#define BODNZP (0x11) +#define BODZ (0x12) +#define BODZP (0x13) +#define BODNZM4 (0x18) +#define BODNZP4 (0x19) +#define BODZM4 (0x1a) +#define BODZP4 (0x1b) + +#define BOU (0x14) + +/* The BI condition bit encodings used in extended conditional branch + mnemonics. */ +#define CBLT (0) +#define CBGT (1) +#define CBEQ (2) +#define CBSO (3) + +/* The TO encodings used in extended trap mnemonics. */ +#define TOLGT (0x1) +#define TOLLT (0x2) +#define TOEQ (0x4) +#define TOLGE (0x5) +#define TOLNL (0x5) +#define TOLLE (0x6) +#define TOLNG (0x6) +#define TOGT (0x8) +#define TOGE (0xc) +#define TONL (0xc) +#define TOLT (0x10) +#define TOLE (0x14) +#define TONG (0x14) +#define TONE (0x18) +#define TOU (0x1f) + +/* Smaller names for the flags so each entry in the opcodes table will + fit on a single line. */ +#undef PPC +#define PPC PPC_OPCODE_PPC +#define PPCCOM PPC_OPCODE_PPC | PPC_OPCODE_COMMON +#define PPC64 PPC_OPCODE_64 | PPC_OPCODE_PPC +#define COM PPC_OPCODE_POWER | PPC_OPCODE_PPC | PPC_OPCODE_COMMON +#define COM32 PPC_OPCODE_POWER | PPC_OPCODE_PPC | PPC_OPCODE_COMMON | PPC_OPCODE_32 + +/* The opcode table. + + The format of the opcode table is: + + NAME OPCODE MASK FLAGS { OPERANDS } + + NAME is the name of the instruction. + OPCODE is the instruction opcode. + MASK is the opcode mask; this is used to tell the disassembler + which bits in the actual opcode must match OPCODE. + FLAGS are flags indicated what processors support the instruction. + OPERANDS is the list of operands. + + The disassembler reads the table in order and prints the first + instruction which matches, so this table is sorted to put more + specific instructions before more general instructions. It is also + sorted by major opcode. */ + +static const struct powerpc_opcode powerpc_opcodes[] = { + +{ "cmplwi", OPL(10,0), OPL_MASK, PPCCOM, { OBF, RA, UI } }, +{ "cmpwi", OPL(11,0), OPL_MASK, PPCCOM, { OBF, RA, SI } }, +{ "cmpw", XOPL(31,0,0), XCMPL_MASK, PPCCOM, { OBF, RA, RB } }, +{ "cmplw", XOPL(31,32,0), XCMPL_MASK, PPCCOM, { OBF, RA, RB } }, +{ "fcmpu", X(63,0), X_MASK|(3<<21), COM, { BF, FRA, FRB } }, + +{ "li", OP(14), DRA_MASK, PPCCOM, { RT, SI } }, +{ "lis", OP(15), DRA_MASK, PPCCOM, { RT, SI } }, + +{ "addi", OP(14), OP_MASK, PPCCOM, { RT, RA0, SI } }, +{ "addis", OP(15), OP_MASK, PPCCOM, { RT,RA0,SI } }, +{ "blt-", BBOCB(16,BOT,CBLT,0,0), BBOATCB_MASK, PPCCOM, { CR, BDM } }, +{ "bc", B(16,0,0), B_MASK, COM, { BO, BI, BD } }, +{ "bcl", B(16,0,1), B_MASK, COM, { BO, BI, BD } }, +{ "b", B(18,0,0), B_MASK, COM, { LI } }, +{ "bl", B(18,0,1), B_MASK, COM, { LI } }, +{ "blr", XLO(19,BOU,16,0), XLBOBIBB_MASK, PPCCOM, { 0 } }, +{ "bctr", XLO(19,BOU,528,0), XLBOBIBB_MASK, COM, { 0 } }, +{ "bctrl", XLO(19,BOU,528,1), XLBOBIBB_MASK, COM, { 0 } }, + +{ "rlwinm", M(21,0), M_MASK, PPCCOM, { RA,RS,SH,MBE,ME } }, +{ "nop", OP(24), 0xffffffff, PPCCOM, { 0 } }, +{ "ori", OP(24), OP_MASK, PPCCOM, { RA, RS, UI } }, +{ "xoris", OP(27), OP_MASK, PPCCOM, { RA, RS, UI } }, +{ "ldx", X(31,21), X_MASK, PPC64, { RT, RA0, RB } }, +{ "lwzx", X(31,23), X_MASK, PPCCOM, { RT, RA0, RB } }, +{ "slw", XRC(31,24,0), X_MASK, PPCCOM, { RA, RS, RB } }, +{ "and", XRC(31,28,0), X_MASK, COM, { RA, RS, RB } }, +{ "sub", XO(31,40,0,0), XO_MASK, PPC, { RT, RB, RA } }, +{ "lbzx", X(31,87), X_MASK, COM, { RT, RA0, RB } }, +{ "neg", XO(31,104,0,0), XORB_MASK, COM, { RT, RA } }, +{ "not", XRC(31,124,0), X_MASK, COM, { RA, RS, RBS } }, +{ "stwx", X(31,151), X_MASK, PPCCOM, { RS, RA0, RB } }, +{ "stbx", X(31,215), X_MASK, COM, { RS, RA0, RB } }, +{ "mullw", XO(31,235,0,0), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "add", XO(31,266,0,0), XO_MASK, PPCCOM, { RT, RA, RB } }, +{ "lhzx", X(31,279), X_MASK, COM, { RT, RA0, RB } }, +{ "xor", XRC(31,316,0), X_MASK, COM, { RA, RS, RB } }, +{ "mflr", XSPR(31,339,8), XSPR_MASK, COM, { RT } }, +{ "sthx", X(31,407), X_MASK, COM, { RS, RA0, RB } }, +{ "mr", XRC(31,444,0), X_MASK, COM, { RA, RS, RBS } }, +{ "or", XRC(31,444,0), X_MASK, COM, { RA, RS, RB } }, +{ "divwu", XO(31,459,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "mtlr", XSPR(31,467,8), XSPR_MASK, COM, { RS } }, +{ "mtctr", XSPR(31,467,9), XSPR_MASK, COM, { RS } }, +{ "divw", XO(31,491,0,0), XO_MASK, PPC, { RT, RA, RB } }, +{ "lfsx", X(31,535), X_MASK, COM, { FRT, RA0, RB } }, +{ "srw", XRC(31,536,0), X_MASK, PPCCOM, { RA, RS, RB } }, +{ "stfsx", X(31,663), X_MASK, COM, { FRS, RA0, RB } }, +{ "sraw", XRC(31,792,0), X_MASK, PPCCOM, { RA, RS, RB } }, +{ "extsh", XRC(31,922,0), XRB_MASK, PPCCOM, { RA, RS } }, +{ "extsb", XRC(31,954,0), XRB_MASK, PPC, { RA, RS} }, + +{ "lwz", OP(32), OP_MASK, PPCCOM, { RT, D, RA0 } }, +{ "lbz", OP(34), OP_MASK, COM, { RT, D, RA0 } }, +{ "stw", OP(36), OP_MASK, PPCCOM, { RS, D, RA0 } }, +{ "stwu", OP(37), OP_MASK, PPCCOM, { RS, D, RAS } }, +{ "stb", OP(38), OP_MASK, COM, { RS, D, RA0 } }, +{ "lhz", OP(40), OP_MASK, COM, { RT, D, RA0 } }, +{ "sth", OP(44), OP_MASK, COM, { RS, D, RA0 } }, +{ "lfs", OP(48), OP_MASK, COM, { FRT, D, RA0 } }, +{ "lfd", OP(50), OP_MASK, COM, { FRT, D, RA0 } }, +{ "stfs", OP(52), OP_MASK, COM, { FRS, D, RA0 } }, +{ "stfd", OP(54), OP_MASK, COM, { FRS, D, RA0 } }, +{ "ld", DSO(58,0), DS_MASK, PPC64, { RT, DS, RA0 } }, + +{ "fdivs", A(59,18,0), AFRC_MASK, PPC, { FRT, FRA, FRB } }, +{ "fsubs", A(59,20,0), AFRC_MASK, PPC, { FRT, FRA, FRB } }, +{ "fadds", A(59,21,0), AFRC_MASK, PPC, { FRT, FRA, FRB } }, +{ "fmuls", A(59,25,0), AFRB_MASK, PPC, { FRT, FRA, FRC } }, +{ "std", DSO(62,0), DS_MASK, PPC64, { RS, DS, RA0 } }, +{ "stdu", DSO(62,1), DS_MASK, PPC64, { RS, DS, RAS } }, +{ "frsp", XRC(63,12,0), XRA_MASK, COM, { FRT, FRB } }, +{ "fctiwz", XRC(63,15,0), XRA_MASK, PPCCOM, { FRT, FRB } }, +{ "fsub", A(63,20,0), AFRC_MASK, PPCCOM, { FRT, FRA, FRB } }, +{ "fneg", XRC(63,40,0), XRA_MASK, COM, { FRT, FRB } }, +}; + +static const int powerpc_num_opcodes = + sizeof (powerpc_opcodes) / sizeof (powerpc_opcodes[0]); diff --git a/code/qcommon/vm_powerpc_asm.h b/code/qcommon/vm_powerpc_asm.h new file mode 100644 index 0000000..9bccf18 --- /dev/null +++ b/code/qcommon/vm_powerpc_asm.h @@ -0,0 +1,156 @@ +/* +=========================================================================== +Copyright (C) 2008 Przemyslaw Iskra <sparky@pld-linux.org> + +This file is part of Quake III Arena source code. + +Quake III Arena source code is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Quake III Arena source code is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Quake III Arena source code; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +=========================================================================== +*/ + +#ifndef VM_POWERPC_ASM_H +#define VM_POWERPC_ASM_H + +/* + * Register information according to: + * http://refspecs.freestandards.org/elf/elfspec_ppc.pdf + */ + +#define r0 0 // volatile +#define r1 1 // caller safe ( stack pointer ) +#define r2 2 // reserved +#define r3 3 // callee safe +#define r4 4 // callee safe +#define r5 5 // callee safe +#define r6 6 // callee safe +#define r7 7 // callee safe +#define r8 8 // callee safe +#define r9 9 // callee safe +#define r10 10 // callee safe +#define r11 11 // volatile +#define r12 12 // volatile +#define r13 13 // reserved ( small data area ) +#define r14 14 // caller safe +#define r15 15 // caller safe +#define r16 16 // caller safe +#define r17 17 // caller safe +#define r18 18 // caller safe +#define r19 19 // caller safe +#define r20 20 // caller safe +#define r21 21 // caller safe +#define r22 22 // caller safe +#define r23 23 // caller safe +#define r24 24 // caller safe +#define r25 25 // caller safe +#define r26 26 // caller safe +#define r27 27 // caller safe +#define r28 28 // caller safe +#define r29 29 // caller safe +#define r30 30 // caller safe +#define r31 31 // caller safe ( environment pointers ) + +#define f0 0 // callee safe +#define f1 1 // callee safe +#define f2 2 // callee safe +#define f3 3 // callee safe +#define f4 4 // callee safe +#define f5 5 // callee safe +#define f6 6 // callee safe +#define f7 7 // callee safe +#define f8 8 // callee safe +#define f9 9 // callee safe +#define f10 10 // callee safe +#define f11 11 // callee safe +#define f12 12 // callee safe +#define f13 13 // callee safe +#define f14 14 // caller safe +#define f15 15 // caller safe +#define f16 16 // caller safe +#define f17 17 // caller safe +#define f18 18 // caller safe +#define f19 19 // caller safe +#define f20 20 // caller safe +#define f21 21 // caller safe +#define f22 22 // caller safe +#define f23 23 // caller safe +#define f24 24 // caller safe +#define f25 25 // caller safe +#define f26 26 // caller safe +#define f27 27 // caller safe +#define f28 28 // caller safe +#define f29 29 // caller safe +#define f30 30 // caller safe +#define f31 31 // caller safe + +#define cr0 0 // volatile +#define cr1 1 // volatile +#define cr2 2 // caller safe +#define cr3 3 // caller safe +#define cr4 4 // caller safe +#define cr5 5 // volatile +#define cr6 6 // volatile +#define cr7 7 // volatile + +#define lt 0 +#define gt 1 +#define eq 2 +#define so 3 + +// branch bo field values +#define branchLikely 1 +#define branchFalse 4 +#define branchTrue 12 +#define branchAlways 20 + +// branch extensions (change branch type) +#define branchExtLink 0x0001 + + +/* + * This list must match exactly the powerpc_opcodes list from vm_powerpc_asm.c + * If you're changing the original list remember to regenerate this one. You + * may do so using this perl script: + perl -p -e 'BEGIN{%t=("-"=>m=>"+"=>p=>"."=>d=>);$l=""}$o=0 if/^}/; + if($o && s/^{ "(.*?)([\.+-])?".+/i\U$1\E$t{$2}/s){$_.="_" while$l{$_}; + $l{$_}=1;if(length $l.$_ > 70){$s=$_;$_="\t$l\n";$l="$s,"}else + {$l.=" $_,";$_=undef}}else{$o=1 if/powerpc_opcodes.*=/;$_=undef}; + END{print "\t$l\n"}' < vm_powerpc_asm.c + */ + +typedef enum powerpc_iname { + iCMPLWI, iCMPWI, iCMPW, iCMPLW, iFCMPU, iLI, iLIS, iADDI, iADDIS, + iBLTm, iBC, iBCL, iB, iBL, iBLR, iBCTR, iBCTRL, iRLWINM, iNOP, iORI, + iXORIS, iLDX, iLWZX, iSLW, iAND, iSUB, iLBZX, iNEG, iNOT, iSTWX, iSTBX, + iMULLW, iADD, iLHZX, iXOR, iMFLR, iSTHX, iMR, iOR, iDIVWU, iMTLR, + iMTCTR, iDIVW, iLFSX, iSRW, iSTFSX, iSRAW, iEXTSH, iEXTSB, iLWZ, iLBZ, + iSTW, iSTWU, iSTB, iLHZ, iSTH, iLFS, iLFD, iSTFS, iSTFD, iLD, iFDIVS, + iFSUBS, iFADDS, iFMULS, iSTD, iSTDU, iFRSP, iFCTIWZ, iFSUB, iFNEG, +} powerpc_iname_t; + +#include <stdint.h> + +typedef uint32_t ppc_instruction_t; + +extern ppc_instruction_t +asm_instruction( powerpc_iname_t, const int, const long int * ); + +#define IN( inst, args... ) \ +({\ + const long int argv[] = { args };\ + const int argc = sizeof( argv ) / sizeof( argv[0] ); \ + asm_instruction( inst, argc, argv );\ +}) + +#endif |