decoder.h

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 1996-2001, The University of Queensland
00003  * Copyright (C) 2001, Sun Microsystems, Inc
00004  * Copyright (C) 2002, Trent Waddington
00005  *
00006  * See the file "LICENSE.TERMS" for information on usage and
00007  * redistribution of this file, and for a DISCLAIMER OF ALL
00008  * WARRANTIES.
00009  *
00010  */
00011 
00012 /*==============================================================================
00013  * FILE:       decoder.h
00014  * OVERVIEW:   The interface to the instruction decoder.
00015  *============================================================================*/
00016 
00017 /* 
00018  * $Revision: 1.18 $    // 1.16.2.2
00019  *
00020  * 08 Apr 02 - Mike: Mods for boomerang
00021  */
00022 
00023 #ifndef _DECODER_H_
00024 #define _DECODER_H_
00025 
00026 #include <list>
00027 #include "types.h"
00028 #include "rtl.h"
00029 
00030 class Exp;
00031 class RTL;
00032 class BinaryFile;
00033 class Prog;
00034 
00035 // These are the instruction classes defined in "A Transformational Approach to
00036 // Binary Translation of Delayed Branches" for SPARC instructions.
00037 // Extended for HPPA. Ignored by machines with no delay slots
00038 enum ICLASS {
00039     NCT,            // Non Control Transfer
00040     SD,             // Static Delayed
00041     DD,             // Dynamic Delayed
00042     SCD,            // Static Conditional Delayed
00043     SCDAN,          // Static Conditional Delayed, Anulled if Not taken
00044     SCDAT,          // Static Conditional Delayed, Anulled if Taken
00045     SU,             // Static Unconditional (not delayed)
00046     SKIP,           // Skip successor
00047 //  TRAP,           // Trap
00048     NOP,            // No operation (e.g. sparc BN,A)
00049     // HPPA only
00050     DU,             // Dynamic Unconditional (not delayed)
00051     NCTA            // Non Control Transfer, with following instr Anulled
00052 };
00053 /*==============================================================================
00054  * The DecodeResult struct contains all the information that results from
00055  * calling the decoder. This prevents excessive use of confusing
00056  * reference parameters.
00057  *============================================================================*/
00058 struct DecodeResult {
00059         /*
00060          * Resets all the fields to their default values.
00061          */
00062         void        reset();
00063 
00064         /*
00065          * The number of bytes decoded in the main instruction
00066          */
00067         int         numBytes;
00068 
00069         /*
00070          * The RTL constructed (if any).
00071          */
00072         RTL*        rtl;
00073 
00074         /*
00075          * Indicates whether or not a valid instruction was decoded.
00076          */
00077         bool        valid;
00078 
00079         /*
00080          * The class of the instruction decoded. Will be one of the classes described in "A Transformational Approach
00081          * to Binary Translation of Delayed Branches" (plus two more HPPA specific entries).
00082          * Ignored by machines with no delay slots
00083          */
00084         ICLASS type;
00085 
00086         /*
00087          * If true, don't add numBytes and decode there; instead, re-decode the current instruction. Needed for
00088          * instructions like the Pentium BSF/BSR, which emit branches (so numBytes needs to be carefully set for the
00089          * fall through out edge after the branch)
00090          */
00091         bool reDecode;
00092 
00093         /*
00094          * If non zero, this field represents a new native address to be used as the out-edge for this instruction's BB.         * At present, only used for the SPARC call/add caller prologue
00095          */
00096         ADDRESS forceOutEdge;
00097 
00098 };
00099 
00100 /*==============================================================================
00101  * The NJMCDecoder class is a class that contains NJMC generated decoding methods.
00102  *============================================================================*/
00103 class NJMCDecoder {
00104 protected:
00105         Prog*       prog;
00106 public:
00107         /*
00108          * Constructor and destructor
00109          */
00110                     NJMCDecoder(Prog* prog);
00111 virtual             ~NJMCDecoder() {};
00112 
00113         /*
00114          * Decodes the machine instruction at pc and returns an RTL instance for the instruction.
00115          */
00116 virtual DecodeResult& decodeInstruction (ADDRESS pc, int delta) = 0;
00117 
00118         /*
00119          * Disassembles the machine instruction at pc and returns the number of bytes disassembled.
00120          * Assembler output goes to global _assembly
00121          */
00122 virtual int decodeAssemblyInstruction (ADDRESS pc, int delta) = 0;
00123 
00124         RTLInstDict& getRTLDict() { return RTLDict; }
00125 
00126         void        computedJump(const char* name, int size, Exp* dest, ADDRESS pc, std::list<Statement*>* stmts,
00127                         DecodeResult& result);
00128 
00129         void        computedCall(const char* name, int size, Exp* dest, ADDRESS pc, std::list<Statement*>* stmts,
00130                         DecodeResult& result);
00131 
00132         Prog*       getProg() {return prog;}
00133 
00134 protected:
00135 
00136         /*
00137          * Given an instruction name and a variable list of Exps representing the actual operands of the instruction,
00138          * use the RTL template dictionary to return the list of Statements representing the semantics of the
00139          * instruction. This method also displays a disassembly of the instruction if the relevant compilation flag
00140          * has been set.
00141          */
00142         std::list<Statement*>* instantiate(ADDRESS pc, const char* name, ...);
00143 
00144         /*
00145          * Similarly, given a parameter name and a list of Exp*'s
00146          * representing sub-parameters, return a fully substituted
00147          * Exp for the whole expression
00148          */
00149         Exp* instantiateNamedParam(char *name, ...);
00150 
00151         /*
00152          * In the event that it's necessary to synthesize the call of a named parameter generated with
00153          * instantiateNamedParam(), this method will substitute the arguments that * follow into the expression.
00154          * Should only be used after e = instantiateNamedParam(name, ..);
00155          */
00156         void        substituteCallArgs(char *name, Exp*& exp, ...);
00157 
00158         /*
00159          * This used to be the UNCOND_JUMP macro; it's extended to handle jumps to other procedures
00160          */
00161         void        unconditionalJump(const char* name, int size, ADDRESS relocd, int delta, ADDRESS pc,
00162                         std::list<Statement*>* stmts, DecodeResult& result);
00163 
00164         /*
00165          * String for the constructor names (displayed with use "-c")
00166          */
00167         char        constrName[84];
00168 
00169         /* decodes a number */
00170         Exp*        dis_Num(unsigned num);
00171         /* decodes a register */
00172         Exp*        dis_Reg(int regNum);
00173 
00174         // Public dictionary of instruction patterns, and other information summarised from the SSL file
00175         // (e.g. source machine's endianness)
00176         RTLInstDict RTLDict;
00177 };
00178 
00179 // Function used to guess whether a given pc-relative address is the start of a function
00180 
00181         /*
00182          * Does the instruction at the given offset correspond to a caller prologue?
00183          * NOTE: Implemented in the decoder.m files
00184          */
00185         bool        isFuncPrologue(ADDRESS hostPC);
00186 
00187 
00188 /*==============================================================================
00189  * These are the macros that each of the .m files depend upon.
00190  *============================================================================*/
00191 #define DEBUG_DECODER (Boomerang::get()->debugDecoder)
00192 #define SHOW_ASM(output) if (DEBUG_DECODER) \
00193     std::cout << std::hex << pc << std::dec << ": " << output << std::endl;
00194 #define DEBUG_STMTS \
00195     std::list<Statement*>& lst = result.rtl->getList(); \
00196     if (DEBUG_DECODER) { \
00197         std::list<Statement*>::iterator ii; \
00198         for (ii = lst.begin(); ii != lst.end(); ii++) \
00199             std::cout << "          " << *ii << "\n"; \
00200     }
00201 
00202 /*
00203  * addresstoPC returns the raw number as the address.  PC could be an
00204  * abstract type, in our case, PC is the raw address.
00205  */
00206 #define addressToPC(pc)  pc
00207 
00208 // Macros for branches. Note: don't put inside a "match" statement, since
00209 // the ordering is changed and multiple copies may be made
00210 
00211 #define COND_JUMP(name, size, relocd, cond) \
00212     result.rtl = new RTL(pc, stmts); \
00213     BranchStatement* jump = new BranchStatement; \
00214     result.rtl->appendStmt(jump); \
00215     result.numBytes = size; \
00216     jump->setDest(relocd-delta); \
00217     jump->setCondType(cond); \
00218     SHOW_ASM(name<<" "<<relocd)
00219 
00220 // This one is X86 specific
00221 #define SETS(name, dest, cond) \
00222     BoolAssign* bs = new BoolAssign(8); \
00223     bs->setLeftFromList(stmts); \
00224     stmts->clear();  \
00225     result.rtl = new RTL(pc, stmts); \
00226     result.rtl->appendStmt(bs); \
00227     bs->setCondType(cond); \
00228     result.numBytes = 3; \
00229     SHOW_ASM(name<<" "<<dest)
00230 
00231 /*==============================================================================
00232  * These are arrays used to map register numbers to their names.
00233  *============================================================================*/
00234 extern char *r32_names[];
00235 extern char *sr16_names[];
00236 extern char *r8_names[];
00237 extern char *r16_names[];
00238 extern char *fp_names[];
00239 
00240 /*==============================================================================
00241  * This array decodes scale field values in an index memory expression
00242  * to the scale factor they represent.
00243  *============================================================================*/
00244 extern int  scale[];
00245 
00246 
00247 // General purpose
00248 void not_used(int unwanted);
00249 
00250 /**********************************
00251  * These are the fetch routines.
00252  **********************************/
00253 
00254 /*
00255  * Returns the byte (8 bits) starting at the given address.
00256  */
00257 Byte getByte(ADDRESS lc);
00258 
00259 /*
00260  * Returns the word (16 bits) starting at the given address.
00261  */
00262 SWord getWord(ADDRESS lc);
00263 
00264 /*
00265  * Returns the double (32 bits) starting at the given address.
00266  */
00267 DWord getDword(ADDRESS lc);
00268 
00269 
00270 #endif

Generated on Tue Sep 19 21:18:16 2006 for Boomerang by  doxygen 1.4.6