frontend.h

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 1998-2005, The University of Queensland
00003  * Copyright (C) 2000-2001, Sun Microsystems, Inc  
00004  * Copyright (C) 2002, Trent Waddington
00005  *
00006  *
00007  * See the file "LICENSE.TERMS" for information on usage and
00008  * redistribution of this file, and for a DISCLAIMER OF ALL
00009  * WARRANTIES.
00010  *
00011  */
00012 
00013 /*==============================================================================
00014  * FILE:        frontend.h
00015  * OVERVIEW:    This file contains the definition for the FrontEnd class, which implements the source indendent parts of
00016  *              the front end: decoding machine instructions into a control flow graph populated with low and high level
00017  *              RTLs.
00018  *============================================================================*/
00019 
00020 /* $Revision: 1.35 $    // 1.29.2.2
00021  *
00022  * 17 Apr 02 - Mike: Mods to adapt UQBT code to boomerang
00023  * 28 Jun 05 - Mike: Added a map of previously decoded indirect jumps and calls needed when restarting the cfg
00024  */
00025 
00026 
00027 #ifndef __FRONTEND_H__
00028 #define __FRONTEND_H__
00029 
00030 #include <list>
00031 #include <map>
00032 #include <queue>
00033 #include <fstream>
00034 #include "types.h"
00035 #include "sigenum.h"   // For enums platform and cc
00036 #include "BinaryFile.h"
00037 
00038 class UserProc;
00039 class Proc;
00040 class RTL;
00041 class NJMCDecoder;
00042 class BasicBlock;
00043 typedef BasicBlock* PBB;
00044 class Exp;
00045 class TypedExp;
00046 class Cfg;
00047 class Prog;
00048 struct DecodeResult;
00049 class Signature;
00050 class Statement;
00051 class CallStatement;
00052 
00053 // Control flow types
00054 enum INSTTYPE {
00055     I_UNCOND,                // unconditional branch
00056     I_COND,                  // conditional branch
00057     I_N_COND,                // case branch
00058     I_CALL,                  // procedure call
00059     I_RET,                   // return
00060     I_COMPJUMP,              // computed jump
00061     I_COMPCALL               // computed call
00062 };
00063 
00064 // Put the target queue logic into this small class
00065 class TargetQueue {
00066         std::queue<ADDRESS> targets;
00067 
00068 public:
00069 
00070 /*
00071  * FUNCTION:    visit
00072  * OVERVIEW:    Visit a destination as a label, i.e. check whether we need to queue it as a new BB to create later.
00073  *              Note: at present, it is important to visit an address BEFORE an out edge is added to that address.
00074  *              This is because adding an out edge enters the address into the Cfg's BB map, and it looks like the
00075  *              BB has already been visited, and it gets overlooked. It would be better to have a scheme whereby
00076  *              the order of calling these functions (i.e. visit() and AddOutEdge()) did not matter.
00077  * PARAMETERS:  pCfg - the enclosing CFG
00078  *              uNewAddr - the address to be checked
00079  *              pNewBB - set to the lower part of the BB if the address already exists as a non explicit label
00080  *              (i.e. the BB has to be split)
00081  * RETURNS:     <nothing>
00082  */
00083     void visit(Cfg* pCfg, ADDRESS uNewAddr, PBB& pNewBB);
00084 /*
00085  * Provide an initial address (can call several times if there are several entry points)
00086  */
00087         void        initial(ADDRESS uAddr);
00088 
00089 
00090 /*
00091  * FUNCTION:      nextAddress
00092  * OVERVIEW:      Return the next target from the queue of non-processed targets.
00093  * PARAMETERS:    cfg - the enclosing CFG
00094  * RETURNS:       The next address to process, or 0 if none (queue is empty)
00095  */
00096         ADDRESS     nextAddress(Cfg* cfg);
00097 
00098 /*
00099  * Print (for debugging)
00100  */
00101         void        dump();
00102 
00103 };  // class TargetQueue
00104 
00105 
00106 typedef bool (*PHELPER)(ADDRESS dest, ADDRESS addr, std::list<RTL*>* lrtl); 
00107 
00108 class FrontEnd {
00109 protected:
00110 //    const int NOP_SIZE;           // Size of a no-op instruction (in bytes)
00111 //    const int NOP_INST;           // No-op pattern
00112         NJMCDecoder *decoder;       // The decoder
00113         BinaryFile  *pBF;           // The binary file
00114         BinaryFileFactory* pbff;    // The binary file factory (for closing properly)
00115         Prog*       prog;           // The Prog object
00116         // The queue of addresses still to be processed
00117         TargetQueue targetQueue;
00118         // Public map from function name (string) to signature.
00119         std::map<std::string, Signature*> librarySignatures;
00120         // Map from address to meaningful name
00121         std::map<ADDRESS, std::string> refHints;
00122         // Map from address to previously decoded RTLs for decoded indirect control transfer instructions
00123         std::map<ADDRESS, RTL*> previouslyDecoded;
00124 public:
00125         /*
00126          * Constructor. Takes some parameters to save passing these around a lot
00127          */
00128                     FrontEnd(BinaryFile *pBF, Prog* prog, BinaryFileFactory* pbff);
00129         // Create from a binary file
00130 static  FrontEnd*   instantiate(BinaryFile *pBF, Prog* prog, BinaryFileFactory* pbff);
00131         // Load a binary
00132 static  FrontEnd*   Load(const char *fname, Prog* prog);
00133 
00134         // Add a symbol to the loader
00135         void        AddSymbol(ADDRESS addr, const char *nam) { pBF->AddSymbol(addr, nam); }
00136 
00137         // Add a "hint" that an instruction at the given address references a named global
00138         void        addRefHint(ADDRESS addr, const char *nam) { refHints[addr] = nam; }
00139 
00140         /**
00141          * Destructor. Virtual to mute a warning
00142          */
00143 virtual             ~FrontEnd();
00144 
00145         // returns a symbolic name for a register index
00146         const char  *getRegName(int idx);
00147         int          getRegSize(int idx);
00148 
00149         // returns an enum identifer for this frontend's platform
00150 virtual platform    getFrontEndId() = 0;
00151 
00152         // returns a frontend given a string (unused?)
00153 static FrontEnd     *createById(std::string &str, BinaryFile *pBFi, Prog* prog);
00154 
00155         bool        isWin32();                  // Is this a win32 frontend?
00156 
00157 static  bool        noReturnCallDest(const char *name);
00158 
00159         BinaryFile  *getBinaryFile() { return pBF; }
00160 
00161         /*
00162          * Function to fetch the smallest machine instruction
00163          */
00164 virtual int         getInst(int addr);
00165 
00166 virtual DecodeResult& decodeInstruction(ADDRESS pc);
00167 
00168 virtual void extraProcessCall(CallStatement *call, std::list<RTL*> *BB_rtls) { }
00169 
00170         /*
00171          * Accessor function to get the decoder.
00172          */
00173         NJMCDecoder *getDecoder() { return decoder; }
00174 
00175         /*
00176          * Read library signatures from a file.
00177          */
00178         void        readLibrarySignatures(const char *sPath, callconv cc);
00179         // read from a catalog
00180         void        readLibraryCatalog(const char *sPath);
00181         // read from default catalog
00182         void        readLibraryCatalog();
00183 
00184         // lookup a library signature by name
00185         Signature   *getLibSignature(const char *name);
00186 
00187         // return a signature that matches the architecture best
00188         Signature   *getDefaultSignature(const char *name);
00189 
00190 virtual std::vector<Exp*> &getDefaultParams() = 0;
00191 virtual std::vector<Exp*> &getDefaultReturns() = 0;
00192 
00193         /*
00194          * Decode all undecoded procedures and return a new program containing them.
00195          */
00196         void        decode(Prog* prog, bool decodeMain = true, const char *pname = NULL);
00197 
00198         /* Decode all procs starting at a given address in a given program. */
00199         void        decode(Prog *prog, ADDRESS a);
00200 
00201         /* Decode one proc starting at a given address in a given program. */
00202         void        decodeOnly(Prog *prog, ADDRESS a);
00203 
00204         /* Decode a fragment of a procedure, e.g. for each destination of a switch statement */
00205         void        decodeFragment(UserProc* proc, ADDRESS a);
00206 
00207         /*
00208          * processProc. This is the main function for decoding a procedure. It is usually overridden in the derived
00209          * class to do source machine specific things.  If frag is set, we are decoding just a fragment of the proc
00210          * (e.g. each arm of a switch statement is decoded). If spec is set, this is a speculative decode.
00211          * Returns true on a good decode
00212          */
00213 virtual bool        processProc(ADDRESS uAddr, UserProc* pProc, std::ofstream &os, bool frag = false,
00214                         bool spec = false);
00215 
00216         /*
00217          * Given the dest of a call, determine if this is a machine specific helper function with special semantics.
00218          * If so, return true and set the semantics in lrtl.  addr is the native address of the call instruction
00219          */
00220 virtual bool        helperFunc(ADDRESS dest, ADDRESS addr, std::list<RTL*>* lrtl) {return false; }
00221 
00222         /*
00223          * Locate the starting address of "main", returning a native address
00224          */
00225 virtual ADDRESS     getMainEntryPoint( bool &gotMain ) = 0;
00226 
00227         /*
00228          * Returns a list of all available entrypoints.
00229          */
00230 std::vector<ADDRESS> getEntryPoints();
00231 
00232         /*
00233          * getInstanceFor. Get an instance of a class derived from FrontEnd, returning a pointer to the object of
00234          * that class. Do this by guessing the machine for the binary file whose name is sName, loading the
00235          * appropriate library using dlopen/dlsym, running the "construct" function in that library, and returning
00236          * the result.
00237          */
00238 static  FrontEnd*   getInstanceFor( const char* sName, void*& dlHandle, BinaryFile *pBF, NJMCDecoder*& decoder);
00239 
00240         /*
00241          * Close the library opened by getInstanceFor
00242          */
00243 static  void        closeInstance(void* dlHandle);
00244 
00245         /*
00246          * Get a Prog object (for testing and not decoding)
00247          */
00248         Prog*       getProg();
00249 
00250         /*
00251          * Create a Return or a Oneway BB if a return statement already exists
00252          * PARAMETERS:  pProc: pointer to enclosing UserProc
00253          *              BB_rtls: list of RTLs for the current BB
00254          *              pRtl: pointer to the current RTL with the semantics for the return statement (including a
00255          *                  ReturnStatement as the last statement)
00256          */
00257         PBB         createReturnBlock(UserProc* pProc, std::list<RTL*>* BB_rtls, RTL* pRtl);
00258 
00259         /*
00260          * Add a synthetic return instruction and basic block (or a branch to the existing return instruction).
00261          * PARAMETERS:  pCallBB: a pointer to the call BB that will be followed by the return or jump
00262          *              pProc: pointer to the enclosing UserProc
00263          *              pRtl: pointer to the current RTL with the call instruction
00264          */
00265         void        appendSyntheticReturn(PBB pCallBB, UserProc* pProc, RTL* pRtl);
00266 
00267         /*
00268          * Add an RTL to the map from native address to previously-decoded-RTLs. Used to restore case statements and
00269          * decoded indirect call statements in a new decode following analysis of such instructions. The CFG is
00270          * incomplete in these cases, and needs to be restarted from scratch
00271          */
00272         void        addDecodedRtl(ADDRESS a, RTL* rtl) {
00273                         previouslyDecoded[a] = rtl; }
00274 
00275 };  // class FrontEnd
00276 
00277 
00278 /*==============================================================================
00279  * These functions are the machine specific parts of the front end. They consist
00280  * of those that actually drive the decoding and analysis of the procedures of
00281  * the program being translated.
00282  * These functions are implemented in the files front<XXX> where XXX is a
00283  * platform name such as sparc or pentium.
00284  *============================================================================*/
00285 
00286         /*
00287          * Intialise the procedure decoder and analyser.
00288          */
00289         void        initFront();
00290 
00291         /*
00292          * Decode one RTL
00293          */
00294         RTL*        decodeRtl(ADDRESS address, int delta, NJMCDecoder* decoder);
00295 
00296         /*
00297          * This decodes a given procedure. It performs the analysis to recover switch statements, call
00298          * parameters and return types etc.
00299          * If keep is false, discard the decoded procedure (only need this to find code other than main that is
00300          * reachable from _start, for coverage and speculative decoding)
00301          * If spec is true, then we are speculatively decoding (i.e. if there is an illegal instruction, we just bail
00302          * out)
00303          */
00304         bool        decodeProc(ADDRESS uAddr, FrontEnd& fe, bool keep = true, bool spec = false);
00305 
00306 
00307 #endif      // #ifndef __FRONTEND_H__

Generated on Tue Sep 19 21:18:24 2006 for Boomerang by  doxygen 1.4.6