00001 /* 00002 * Copyright (C) 1996-2001, The University of Queensland 00003 * Copyright (C) 2001, Sun Microsystems, Inc 00004 * Copyright (C) 2002, Trent Waddington 00005 * 00006 * See the file "LICENSE.TERMS" for information on usage and 00007 * redistribution of this file, and for a DISCLAIMER OF ALL 00008 * WARRANTIES. 00009 * 00010 */ 00011 00012 /*============================================================================== 00013 * FILE: decoder.h 00014 * OVERVIEW: The interface to the instruction decoder. 00015 *============================================================================*/ 00016 00017 /* 00018 * $Revision: 1.18 $ // 1.16.2.2 00019 * 00020 * 08 Apr 02 - Mike: Mods for boomerang 00021 */ 00022 00023 #ifndef _DECODER_H_ 00024 #define _DECODER_H_ 00025 00026 #include <list> 00027 #include "types.h" 00028 #include "rtl.h" 00029 00030 class Exp; 00031 class RTL; 00032 class BinaryFile; 00033 class Prog; 00034 00035 // These are the instruction classes defined in "A Transformational Approach to 00036 // Binary Translation of Delayed Branches" for SPARC instructions. 00037 // Extended for HPPA. Ignored by machines with no delay slots 00038 enum ICLASS { 00039 NCT, // Non Control Transfer 00040 SD, // Static Delayed 00041 DD, // Dynamic Delayed 00042 SCD, // Static Conditional Delayed 00043 SCDAN, // Static Conditional Delayed, Anulled if Not taken 00044 SCDAT, // Static Conditional Delayed, Anulled if Taken 00045 SU, // Static Unconditional (not delayed) 00046 SKIP, // Skip successor 00047 // TRAP, // Trap 00048 NOP, // No operation (e.g. sparc BN,A) 00049 // HPPA only 00050 DU, // Dynamic Unconditional (not delayed) 00051 NCTA // Non Control Transfer, with following instr Anulled 00052 }; 00053 /*============================================================================== 00054 * The DecodeResult struct contains all the information that results from 00055 * calling the decoder. This prevents excessive use of confusing 00056 * reference parameters. 00057 *============================================================================*/ 00058 struct DecodeResult { 00059 /* 00060 * Resets all the fields to their default values. 00061 */ 00062 void reset(); 00063 00064 /* 00065 * The number of bytes decoded in the main instruction 00066 */ 00067 int numBytes; 00068 00069 /* 00070 * The RTL constructed (if any). 00071 */ 00072 RTL* rtl; 00073 00074 /* 00075 * Indicates whether or not a valid instruction was decoded. 00076 */ 00077 bool valid; 00078 00079 /* 00080 * The class of the instruction decoded. Will be one of the classes described in "A Transformational Approach 00081 * to Binary Translation of Delayed Branches" (plus two more HPPA specific entries). 00082 * Ignored by machines with no delay slots 00083 */ 00084 ICLASS type; 00085 00086 /* 00087 * If true, don't add numBytes and decode there; instead, re-decode the current instruction. Needed for 00088 * instructions like the Pentium BSF/BSR, which emit branches (so numBytes needs to be carefully set for the 00089 * fall through out edge after the branch) 00090 */ 00091 bool reDecode; 00092 00093 /* 00094 * If non zero, this field represents a new native address to be used as the out-edge for this instruction's BB. * At present, only used for the SPARC call/add caller prologue 00095 */ 00096 ADDRESS forceOutEdge; 00097 00098 }; 00099 00100 /*============================================================================== 00101 * The NJMCDecoder class is a class that contains NJMC generated decoding methods. 00102 *============================================================================*/ 00103 class NJMCDecoder { 00104 protected: 00105 Prog* prog; 00106 public: 00107 /* 00108 * Constructor and destructor 00109 */ 00110 NJMCDecoder(Prog* prog); 00111 virtual ~NJMCDecoder() {}; 00112 00113 /* 00114 * Decodes the machine instruction at pc and returns an RTL instance for the instruction. 00115 */ 00116 virtual DecodeResult& decodeInstruction (ADDRESS pc, int delta) = 0; 00117 00118 /* 00119 * Disassembles the machine instruction at pc and returns the number of bytes disassembled. 00120 * Assembler output goes to global _assembly 00121 */ 00122 virtual int decodeAssemblyInstruction (ADDRESS pc, int delta) = 0; 00123 00124 RTLInstDict& getRTLDict() { return RTLDict; } 00125 00126 void computedJump(const char* name, int size, Exp* dest, ADDRESS pc, std::list<Statement*>* stmts, 00127 DecodeResult& result); 00128 00129 void computedCall(const char* name, int size, Exp* dest, ADDRESS pc, std::list<Statement*>* stmts, 00130 DecodeResult& result); 00131 00132 Prog* getProg() {return prog;} 00133 00134 protected: 00135 00136 /* 00137 * Given an instruction name and a variable list of Exps representing the actual operands of the instruction, 00138 * use the RTL template dictionary to return the list of Statements representing the semantics of the 00139 * instruction. This method also displays a disassembly of the instruction if the relevant compilation flag 00140 * has been set. 00141 */ 00142 std::list<Statement*>* instantiate(ADDRESS pc, const char* name, ...); 00143 00144 /* 00145 * Similarly, given a parameter name and a list of Exp*'s 00146 * representing sub-parameters, return a fully substituted 00147 * Exp for the whole expression 00148 */ 00149 Exp* instantiateNamedParam(char *name, ...); 00150 00151 /* 00152 * In the event that it's necessary to synthesize the call of a named parameter generated with 00153 * instantiateNamedParam(), this method will substitute the arguments that * follow into the expression. 00154 * Should only be used after e = instantiateNamedParam(name, ..); 00155 */ 00156 void substituteCallArgs(char *name, Exp*& exp, ...); 00157 00158 /* 00159 * This used to be the UNCOND_JUMP macro; it's extended to handle jumps to other procedures 00160 */ 00161 void unconditionalJump(const char* name, int size, ADDRESS relocd, int delta, ADDRESS pc, 00162 std::list<Statement*>* stmts, DecodeResult& result); 00163 00164 /* 00165 * String for the constructor names (displayed with use "-c") 00166 */ 00167 char constrName[84]; 00168 00169 /* decodes a number */ 00170 Exp* dis_Num(unsigned num); 00171 /* decodes a register */ 00172 Exp* dis_Reg(int regNum); 00173 00174 // Public dictionary of instruction patterns, and other information summarised from the SSL file 00175 // (e.g. source machine's endianness) 00176 RTLInstDict RTLDict; 00177 }; 00178 00179 // Function used to guess whether a given pc-relative address is the start of a function 00180 00181 /* 00182 * Does the instruction at the given offset correspond to a caller prologue? 00183 * NOTE: Implemented in the decoder.m files 00184 */ 00185 bool isFuncPrologue(ADDRESS hostPC); 00186 00187 00188 /*============================================================================== 00189 * These are the macros that each of the .m files depend upon. 00190 *============================================================================*/ 00191 #define DEBUG_DECODER (Boomerang::get()->debugDecoder) 00192 #define SHOW_ASM(output) if (DEBUG_DECODER) \ 00193 std::cout << std::hex << pc << std::dec << ": " << output << std::endl; 00194 #define DEBUG_STMTS \ 00195 std::list<Statement*>& lst = result.rtl->getList(); \ 00196 if (DEBUG_DECODER) { \ 00197 std::list<Statement*>::iterator ii; \ 00198 for (ii = lst.begin(); ii != lst.end(); ii++) \ 00199 std::cout << " " << *ii << "\n"; \ 00200 } 00201 00202 /* 00203 * addresstoPC returns the raw number as the address. PC could be an 00204 * abstract type, in our case, PC is the raw address. 00205 */ 00206 #define addressToPC(pc) pc 00207 00208 // Macros for branches. Note: don't put inside a "match" statement, since 00209 // the ordering is changed and multiple copies may be made 00210 00211 #define COND_JUMP(name, size, relocd, cond) \ 00212 result.rtl = new RTL(pc, stmts); \ 00213 BranchStatement* jump = new BranchStatement; \ 00214 result.rtl->appendStmt(jump); \ 00215 result.numBytes = size; \ 00216 jump->setDest(relocd-delta); \ 00217 jump->setCondType(cond); \ 00218 SHOW_ASM(name<<" "<<relocd) 00219 00220 // This one is X86 specific 00221 #define SETS(name, dest, cond) \ 00222 BoolAssign* bs = new BoolAssign(8); \ 00223 bs->setLeftFromList(stmts); \ 00224 stmts->clear(); \ 00225 result.rtl = new RTL(pc, stmts); \ 00226 result.rtl->appendStmt(bs); \ 00227 bs->setCondType(cond); \ 00228 result.numBytes = 3; \ 00229 SHOW_ASM(name<<" "<<dest) 00230 00231 /*============================================================================== 00232 * These are arrays used to map register numbers to their names. 00233 *============================================================================*/ 00234 extern char *r32_names[]; 00235 extern char *sr16_names[]; 00236 extern char *r8_names[]; 00237 extern char *r16_names[]; 00238 extern char *fp_names[]; 00239 00240 /*============================================================================== 00241 * This array decodes scale field values in an index memory expression 00242 * to the scale factor they represent. 00243 *============================================================================*/ 00244 extern int scale[]; 00245 00246 00247 // General purpose 00248 void not_used(int unwanted); 00249 00250 /********************************** 00251 * These are the fetch routines. 00252 **********************************/ 00253 00254 /* 00255 * Returns the byte (8 bits) starting at the given address. 00256 */ 00257 Byte getByte(ADDRESS lc); 00258 00259 /* 00260 * Returns the word (16 bits) starting at the given address. 00261 */ 00262 SWord getWord(ADDRESS lc); 00263 00264 /* 00265 * Returns the double (32 bits) starting at the given address. 00266 */ 00267 DWord getDword(ADDRESS lc); 00268 00269 00270 #endif