00001 /* 00002 * Copyright (C) 1998-2005, The University of Queensland 00003 * Copyright (C) 2000-2001, Sun Microsystems, Inc 00004 * Copyright (C) 2002, Trent Waddington 00005 * 00006 * 00007 * See the file "LICENSE.TERMS" for information on usage and 00008 * redistribution of this file, and for a DISCLAIMER OF ALL 00009 * WARRANTIES. 00010 * 00011 */ 00012 00013 /*============================================================================== 00014 * FILE: frontend.h 00015 * OVERVIEW: This file contains the definition for the FrontEnd class, which implements the source indendent parts of 00016 * the front end: decoding machine instructions into a control flow graph populated with low and high level 00017 * RTLs. 00018 *============================================================================*/ 00019 00020 /* $Revision: 1.35 $ // 1.29.2.2 00021 * 00022 * 17 Apr 02 - Mike: Mods to adapt UQBT code to boomerang 00023 * 28 Jun 05 - Mike: Added a map of previously decoded indirect jumps and calls needed when restarting the cfg 00024 */ 00025 00026 00027 #ifndef __FRONTEND_H__ 00028 #define __FRONTEND_H__ 00029 00030 #include <list> 00031 #include <map> 00032 #include <queue> 00033 #include <fstream> 00034 #include "types.h" 00035 #include "sigenum.h" // For enums platform and cc 00036 #include "BinaryFile.h" 00037 00038 class UserProc; 00039 class Proc; 00040 class RTL; 00041 class NJMCDecoder; 00042 class BasicBlock; 00043 typedef BasicBlock* PBB; 00044 class Exp; 00045 class TypedExp; 00046 class Cfg; 00047 class Prog; 00048 struct DecodeResult; 00049 class Signature; 00050 class Statement; 00051 class CallStatement; 00052 00053 // Control flow types 00054 enum INSTTYPE { 00055 I_UNCOND, // unconditional branch 00056 I_COND, // conditional branch 00057 I_N_COND, // case branch 00058 I_CALL, // procedure call 00059 I_RET, // return 00060 I_COMPJUMP, // computed jump 00061 I_COMPCALL // computed call 00062 }; 00063 00064 // Put the target queue logic into this small class 00065 class TargetQueue { 00066 std::queue<ADDRESS> targets; 00067 00068 public: 00069 00070 /* 00071 * FUNCTION: visit 00072 * OVERVIEW: Visit a destination as a label, i.e. check whether we need to queue it as a new BB to create later. 00073 * Note: at present, it is important to visit an address BEFORE an out edge is added to that address. 00074 * This is because adding an out edge enters the address into the Cfg's BB map, and it looks like the 00075 * BB has already been visited, and it gets overlooked. It would be better to have a scheme whereby 00076 * the order of calling these functions (i.e. visit() and AddOutEdge()) did not matter. 00077 * PARAMETERS: pCfg - the enclosing CFG 00078 * uNewAddr - the address to be checked 00079 * pNewBB - set to the lower part of the BB if the address already exists as a non explicit label 00080 * (i.e. the BB has to be split) 00081 * RETURNS: <nothing> 00082 */ 00083 void visit(Cfg* pCfg, ADDRESS uNewAddr, PBB& pNewBB); 00084 /* 00085 * Provide an initial address (can call several times if there are several entry points) 00086 */ 00087 void initial(ADDRESS uAddr); 00088 00089 00090 /* 00091 * FUNCTION: nextAddress 00092 * OVERVIEW: Return the next target from the queue of non-processed targets. 00093 * PARAMETERS: cfg - the enclosing CFG 00094 * RETURNS: The next address to process, or 0 if none (queue is empty) 00095 */ 00096 ADDRESS nextAddress(Cfg* cfg); 00097 00098 /* 00099 * Print (for debugging) 00100 */ 00101 void dump(); 00102 00103 }; // class TargetQueue 00104 00105 00106 typedef bool (*PHELPER)(ADDRESS dest, ADDRESS addr, std::list<RTL*>* lrtl); 00107 00108 class FrontEnd { 00109 protected: 00110 // const int NOP_SIZE; // Size of a no-op instruction (in bytes) 00111 // const int NOP_INST; // No-op pattern 00112 NJMCDecoder *decoder; // The decoder 00113 BinaryFile *pBF; // The binary file 00114 BinaryFileFactory* pbff; // The binary file factory (for closing properly) 00115 Prog* prog; // The Prog object 00116 // The queue of addresses still to be processed 00117 TargetQueue targetQueue; 00118 // Public map from function name (string) to signature. 00119 std::map<std::string, Signature*> librarySignatures; 00120 // Map from address to meaningful name 00121 std::map<ADDRESS, std::string> refHints; 00122 // Map from address to previously decoded RTLs for decoded indirect control transfer instructions 00123 std::map<ADDRESS, RTL*> previouslyDecoded; 00124 public: 00125 /* 00126 * Constructor. Takes some parameters to save passing these around a lot 00127 */ 00128 FrontEnd(BinaryFile *pBF, Prog* prog, BinaryFileFactory* pbff); 00129 // Create from a binary file 00130 static FrontEnd* instantiate(BinaryFile *pBF, Prog* prog, BinaryFileFactory* pbff); 00131 // Load a binary 00132 static FrontEnd* Load(const char *fname, Prog* prog); 00133 00134 // Add a symbol to the loader 00135 void AddSymbol(ADDRESS addr, const char *nam) { pBF->AddSymbol(addr, nam); } 00136 00137 // Add a "hint" that an instruction at the given address references a named global 00138 void addRefHint(ADDRESS addr, const char *nam) { refHints[addr] = nam; } 00139 00140 /** 00141 * Destructor. Virtual to mute a warning 00142 */ 00143 virtual ~FrontEnd(); 00144 00145 // returns a symbolic name for a register index 00146 const char *getRegName(int idx); 00147 int getRegSize(int idx); 00148 00149 // returns an enum identifer for this frontend's platform 00150 virtual platform getFrontEndId() = 0; 00151 00152 // returns a frontend given a string (unused?) 00153 static FrontEnd *createById(std::string &str, BinaryFile *pBFi, Prog* prog); 00154 00155 bool isWin32(); // Is this a win32 frontend? 00156 00157 static bool noReturnCallDest(const char *name); 00158 00159 BinaryFile *getBinaryFile() { return pBF; } 00160 00161 /* 00162 * Function to fetch the smallest machine instruction 00163 */ 00164 virtual int getInst(int addr); 00165 00166 virtual DecodeResult& decodeInstruction(ADDRESS pc); 00167 00168 virtual void extraProcessCall(CallStatement *call, std::list<RTL*> *BB_rtls) { } 00169 00170 /* 00171 * Accessor function to get the decoder. 00172 */ 00173 NJMCDecoder *getDecoder() { return decoder; } 00174 00175 /* 00176 * Read library signatures from a file. 00177 */ 00178 void readLibrarySignatures(const char *sPath, callconv cc); 00179 // read from a catalog 00180 void readLibraryCatalog(const char *sPath); 00181 // read from default catalog 00182 void readLibraryCatalog(); 00183 00184 // lookup a library signature by name 00185 Signature *getLibSignature(const char *name); 00186 00187 // return a signature that matches the architecture best 00188 Signature *getDefaultSignature(const char *name); 00189 00190 virtual std::vector<Exp*> &getDefaultParams() = 0; 00191 virtual std::vector<Exp*> &getDefaultReturns() = 0; 00192 00193 /* 00194 * Decode all undecoded procedures and return a new program containing them. 00195 */ 00196 void decode(Prog* prog, bool decodeMain = true, const char *pname = NULL); 00197 00198 /* Decode all procs starting at a given address in a given program. */ 00199 void decode(Prog *prog, ADDRESS a); 00200 00201 /* Decode one proc starting at a given address in a given program. */ 00202 void decodeOnly(Prog *prog, ADDRESS a); 00203 00204 /* Decode a fragment of a procedure, e.g. for each destination of a switch statement */ 00205 void decodeFragment(UserProc* proc, ADDRESS a); 00206 00207 /* 00208 * processProc. This is the main function for decoding a procedure. It is usually overridden in the derived 00209 * class to do source machine specific things. If frag is set, we are decoding just a fragment of the proc 00210 * (e.g. each arm of a switch statement is decoded). If spec is set, this is a speculative decode. 00211 * Returns true on a good decode 00212 */ 00213 virtual bool processProc(ADDRESS uAddr, UserProc* pProc, std::ofstream &os, bool frag = false, 00214 bool spec = false); 00215 00216 /* 00217 * Given the dest of a call, determine if this is a machine specific helper function with special semantics. 00218 * If so, return true and set the semantics in lrtl. addr is the native address of the call instruction 00219 */ 00220 virtual bool helperFunc(ADDRESS dest, ADDRESS addr, std::list<RTL*>* lrtl) {return false; } 00221 00222 /* 00223 * Locate the starting address of "main", returning a native address 00224 */ 00225 virtual ADDRESS getMainEntryPoint( bool &gotMain ) = 0; 00226 00227 /* 00228 * Returns a list of all available entrypoints. 00229 */ 00230 std::vector<ADDRESS> getEntryPoints(); 00231 00232 /* 00233 * getInstanceFor. Get an instance of a class derived from FrontEnd, returning a pointer to the object of 00234 * that class. Do this by guessing the machine for the binary file whose name is sName, loading the 00235 * appropriate library using dlopen/dlsym, running the "construct" function in that library, and returning 00236 * the result. 00237 */ 00238 static FrontEnd* getInstanceFor( const char* sName, void*& dlHandle, BinaryFile *pBF, NJMCDecoder*& decoder); 00239 00240 /* 00241 * Close the library opened by getInstanceFor 00242 */ 00243 static void closeInstance(void* dlHandle); 00244 00245 /* 00246 * Get a Prog object (for testing and not decoding) 00247 */ 00248 Prog* getProg(); 00249 00250 /* 00251 * Create a Return or a Oneway BB if a return statement already exists 00252 * PARAMETERS: pProc: pointer to enclosing UserProc 00253 * BB_rtls: list of RTLs for the current BB 00254 * pRtl: pointer to the current RTL with the semantics for the return statement (including a 00255 * ReturnStatement as the last statement) 00256 */ 00257 PBB createReturnBlock(UserProc* pProc, std::list<RTL*>* BB_rtls, RTL* pRtl); 00258 00259 /* 00260 * Add a synthetic return instruction and basic block (or a branch to the existing return instruction). 00261 * PARAMETERS: pCallBB: a pointer to the call BB that will be followed by the return or jump 00262 * pProc: pointer to the enclosing UserProc 00263 * pRtl: pointer to the current RTL with the call instruction 00264 */ 00265 void appendSyntheticReturn(PBB pCallBB, UserProc* pProc, RTL* pRtl); 00266 00267 /* 00268 * Add an RTL to the map from native address to previously-decoded-RTLs. Used to restore case statements and 00269 * decoded indirect call statements in a new decode following analysis of such instructions. The CFG is 00270 * incomplete in these cases, and needs to be restarted from scratch 00271 */ 00272 void addDecodedRtl(ADDRESS a, RTL* rtl) { 00273 previouslyDecoded[a] = rtl; } 00274 00275 }; // class FrontEnd 00276 00277 00278 /*============================================================================== 00279 * These functions are the machine specific parts of the front end. They consist 00280 * of those that actually drive the decoding and analysis of the procedures of 00281 * the program being translated. 00282 * These functions are implemented in the files front<XXX> where XXX is a 00283 * platform name such as sparc or pentium. 00284 *============================================================================*/ 00285 00286 /* 00287 * Intialise the procedure decoder and analyser. 00288 */ 00289 void initFront(); 00290 00291 /* 00292 * Decode one RTL 00293 */ 00294 RTL* decodeRtl(ADDRESS address, int delta, NJMCDecoder* decoder); 00295 00296 /* 00297 * This decodes a given procedure. It performs the analysis to recover switch statements, call 00298 * parameters and return types etc. 00299 * If keep is false, discard the decoded procedure (only need this to find code other than main that is 00300 * reachable from _start, for coverage and speculative decoding) 00301 * If spec is true, then we are speculatively decoding (i.e. if there is an illegal instruction, we just bail 00302 * out) 00303 */ 00304 bool decodeProc(ADDRESS uAddr, FrontEnd& fe, bool keep = true, bool spec = false); 00305 00306 00307 #endif // #ifndef __FRONTEND_H__