cfg.h

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 1997-2005, The University of Queensland
00003  * Copyright (C) 2001, Sun Microsystems, Inc
00004  * Copyright (C) 2002, Trent Waddington
00005  *
00006  * See the file "LICENSE.TERMS" for information on usage and
00007  * redistribution of this file, and for a DISCLAIMER OF ALL
00008  * WARRANTIES.
00009  *
00010  */
00011 
00012 /*==============================================================================
00013  * FILE:       cfg.h
00014  * OVERVIEW:   Interface for a control flow graph, based on basic block nodes.
00015  *============================================================================*/
00016 
00017 /*
00018  * $Revision: 1.76 $    // 1.69.2.7
00019  * 18 Apr 02 - Mike: Mods for boomerang
00020  * 04 Dec 02 - Mike: Added isJmpZ
00021  */
00022  
00023 #ifndef _CFG_H_
00024 #define _CFG_H_
00025 
00026 #include <stdio.h>      // For FILE
00027 #include <list>
00028 #include <vector>
00029 #include <set>
00030 #include <map>
00031 #include <iostream>
00032 #include <string>
00033 
00034 #if defined(_MSC_VER)
00035 #pragma warning(disable:4290)
00036 #endif
00037 
00038 #include "types.h"
00039 #include "exphelp.h"    // For lessExpStar
00040 #include "basicblock.h" // For the BB nodes
00041 #include "dataflow.h"   // For embedded class DataFlow
00042 
00043 #define DEBUG_LIVENESS  (Boomerang::get()->debugLiveness)
00044 
00045 class Proc;
00046 class Prog;
00047 class UserProc;
00048 class UseSet;
00049 class LocationSet;
00050 class SSACounts;
00051 class BinaryFile;
00052 class BasicBlock;
00053 class HLLCode;
00054 class CallStatement;
00055 class BranchStatement;
00056 class RTL;
00057 struct DOM;
00058 class XMLProgParser;
00059 class Global;
00060 class Parameter;
00061 
00062 #define BTHEN 0
00063 #define BELSE 1
00064 
00065 
00066 
00067         // A type for the ADDRESS to BB map
00068 typedef std::map<ADDRESS, PBB, std::less<ADDRESS> >   MAPBB;
00069 
00070 /*==============================================================================
00071  * Control Flow Graph class. Contains all the BasicBlock objects for a procedure.  These BBs contain all the RTLs for
00072  * the procedure, so by traversing the Cfg, one traverses the whole procedure.
00073  *============================================================================*/
00074 class Cfg {
00075         /*
00076          * Pointer to the UserProc object that contains this CFG object
00077          */
00078         UserProc*   myProc;
00079 
00080         /*
00081          * The list of pointers to BBs.
00082          */
00083         std::list<PBB> m_listBB;
00084 
00085         /*
00086          * Ordering of BBs for control flow structuring
00087          */
00088         std::vector<PBB> Ordering;
00089         std::vector<PBB> revOrdering;
00090 
00091         /*
00092          * The ADDRESS to PBB map.
00093          */
00094         MAPBB       m_mapBB;
00095 
00096         /*
00097          * The entry and exit BBs.
00098          */
00099         BasicBlock* entryBB;
00100         BasicBlock* exitBB;
00101 
00102         /*
00103          * True if well formed.
00104          */
00105         bool        m_bWellFormed, structured;
00106 
00107         /*
00108          * Set of the call instructions in this procedure.
00109          */
00110         std::set<CallStatement*> callSites;
00111 
00112         /*
00113          * Last label (positive integer) used by any BB this Cfg
00114          */
00115         int         lastLabel;
00116 
00117         /*
00118          * Map from expression to implicit assignment. The purpose is to prevent multiple implicit assignments for
00119          * the same location.
00120          */
00121 
00122         std::map<Exp*, Statement*, lessExpStar> implicitMap;
00123 
00124         bool        bImplicitsDone;         // True when the implicits are done; they can cause problems (e.g. with
00125                                             // ad-hoc global assignment)
00126 
00127 public:
00128         /*
00129          * Constructor.
00130          */
00131                     Cfg();
00132 
00133         /*
00134          * Destructor.
00135          */
00136                     ~Cfg();
00137 
00138         /*
00139          * Set the pointer to the owning UserProc object
00140          */
00141         void        setProc(UserProc* proc);
00142 
00143         /*
00144          * clear this CFG of all basic blocks, ready for decode
00145          */
00146         void        clear();
00147 
00148         /*
00149          * Get the number of BBs
00150          */
00151         unsigned    getNumBBs() {return m_listBB.size();}
00152 
00153         /*
00154          * Equality operator.
00155          */
00156         const Cfg&  operator=(const Cfg& other); /* Copy constructor */
00157 
00158         class BBAlreadyExistsError : public std::exception { 
00159         public:
00160             PBB pBB;
00161             BBAlreadyExistsError(PBB pBB) : pBB(pBB) { }
00162         };
00163         
00164         /*
00165          * Checks to see if the address associated with pRtls is already in the map as an incomplete BB; if so, it is
00166          * completed now and a pointer to that BB is returned. Otherwise, allocates memory for a new basic block node,
00167          * initializes its list of RTLs with pRtls, its type to the given type, and allocates enough space to hold
00168          * pointers to the out-edges (based on given numOutEdges).
00169          * The native address associated with the start of the BB is taken from pRtls, and added to the map (unless 0).
00170          * NOTE: You cannot assume that the returned BB will have the RTL associated with pStart as its first RTL, since
00171          * the BB could be split. You can however assume that the returned BB is suitable for adding out edges (i.e. if
00172          * the BB is split, you get the "bottom" part of the BB, not the "top" (with lower addresses at the "top").
00173          * Returns NULL if not successful, or if there already exists a completed BB at this address (this can happen
00174          * with certain kinds of forward branches).
00175          */
00176         PBB         newBB ( std::list<RTL*>* pRtls, BBTYPE bbType, int iNumOutEdges) throw(BBAlreadyExistsError);
00177 
00178         /*
00179          * Allocates space for a new, incomplete BB, and the given address is added to the map. This BB will have to be
00180          * completed before calling WellFormCfg. This function will commonly be called via AddOutEdge()
00181          */
00182         PBB         newIncompleteBB(ADDRESS addr);
00183 
00184         /*
00185          * Remove the incomplete BB at uAddr, if any. Was used when dealing with the SKIP instruction, but no longer.
00186          */
00187         void        removeIncBB(ADDRESS uAddr);
00188 
00189         /*
00190          * Adds an out-edge to the basic block pBB by filling in the first slot that is empty.  Note: an address is
00191          * given here; the out edge will be filled in as a pointer to a BB. An incomplete BB will be created if
00192          * required. If bSetLabel is true, the destination BB will have its "label required" bit set.
00193          */
00194         void        addOutEdge(PBB pBB, ADDRESS adr, bool bSetLabel = false);
00195 
00196         /*
00197          * Adds an out-edge to the basic block pBB by filling in the first slot that is empty.  Note: a pointer to a BB
00198          * is given here.
00199          */
00200         void        addOutEdge(PBB pBB, PBB pDestBB, bool bSetLabel = false);
00201 
00202         /*
00203          * Add a label for the given basicblock. The label number must be a non-zero integer
00204          */
00205         void        setLabel(PBB pBB);
00206 
00207         /*
00208          * Gets a pointer to the first BB this cfg. Also initialises `it' so that calling GetNextBB will return the
00209          * second BB, etc.  Also, *it is the first BB.  Returns 0 if there are no BBs this CFG.
00210          */
00211         PBB         getFirstBB(BB_IT& it);
00212 
00213         /*
00214          * Gets a pointer to the next BB this cfg. `it' must be from a call to GetFirstBB(), or from a subsequent call
00215          * to GetNextBB().  Also, *it is the current BB.  Returns 0 if there are no more BBs this CFG.
00216          */
00217         PBB         getNextBB(BB_IT& it);
00218 
00219         /*
00220          * An alternative to the above is to use begin() and end():
00221          */
00222         typedef     BB_IT iterator;
00223         iterator    begin() {return m_listBB.begin();}
00224         iterator    end()    {return m_listBB.end();}
00225 
00226 
00227         /*
00228          * Checks whether the given native address is a label (explicit or non explicit) or not.  Explicit labels are
00229          * addresses that have already been tagged as being labels due to transfers of control to that address.
00230          * Non explicit labels are those that belong to basic blocks that have already been constructed (i.e. have
00231          * previously been parsed) and now need to be made explicit labels.  In the case of non explicit labels, the
00232          * basic block is split into two and types and edges are adjusted accordingly. pNewBB is set to the lower part
00233          * of the split BB.
00234          * Returns true if the native address is that of an explicit or non explicit label, false otherwise.
00235          */ 
00236         bool        label ( ADDRESS uNativeAddr, PBB& pNewBB );
00237 
00238         /*
00239          * Checks whether the given native address is in the map. If not, returns false. If so, returns true if it is
00240          * incomplete.  Otherwise, returns false.
00241          */
00242         bool        isIncomplete ( ADDRESS uNativeAddr );
00243 
00244         /*
00245          * Just checks to see if there exists a BB starting with this native address. If not, the address is NOT added
00246          * to the map of labels to BBs.
00247          */
00248         bool        existsBB ( ADDRESS uNativeAddr );
00249 
00250         /*
00251          * Sorts the BBs in the CFG according to the low address of each BB.  Useful because it makes printouts easier,
00252          * if they used iterators to traverse the list of BBs.
00253          */
00254         void        sortByAddress ();
00255 
00256         /*
00257          * Sorts the BBs in the CFG by their first DFT numbers.
00258          */
00259         void        sortByFirstDFT();
00260 
00261         /*
00262          * Sorts the BBs in the CFG by their last DFT numbers.
00263          */
00264         void        sortByLastDFT();
00265 
00266         /*
00267          * Updates m_vectorBB to m_listBB
00268          */
00269         void        updateVectorBB();
00270 
00271         /*
00272          * Transforms the input machine-dependent cfg, which has ADDRESS labels for each out-edge, into a machine-
00273          * independent cfg graph (i.e. a well-formed graph) which has references to basic blocks for each out-edge.
00274          * Returns false if not successful.
00275          */
00276         bool        wellFormCfg ( );
00277 
00278         /*
00279          * Given two basic blocks that belong to a well-formed graph, merges the second block onto the first one and
00280          * returns the new block.  The in and out edges links are updated accordingly. 
00281          * Note that two basic blocks can only be merged if each has a unique out-edge and in-edge respectively, and
00282          * these edges correspond to each other.  
00283          * Returns true if the blocks are merged.
00284          */
00285         bool        mergeBBs ( PBB pb1, PBB pb2 );
00286      
00287 
00288         /*
00289          * Given a well-formed cfg graph, optimizations are performed on the graph to reduce the number of basic blocks
00290          * and edges.  
00291          * Optimizations performed are: removal of branch chains (i.e. jumps to jumps), removal of redundant jumps (i.e.
00292          *  jump to the next instruction), merge basic blocks where possible, and remove redundant basic blocks created
00293          *  by the previous optimizations.  
00294          * Returns false if not successful.
00295          */
00296         bool        compressCfg ( );
00297 
00298 
00299         /*
00300          * Given a well-formed cfg graph, a partial ordering is established between the nodes. The ordering is based on
00301          * the final visit to each node during a depth first traversal such that if node n1 was visited for the last
00302          * time before node n2 was visited for the last time, n1 will be less than n2.
00303          * The return value indicates if all nodes where ordered. This will not be the case for incomplete CFGs (e.g.
00304          * switch table not completely recognised) or where there are nodes unreachable from the entry node.
00305          */
00306         bool        establishDFTOrder();
00307 
00308         /*
00309          * Performs establishDFTOrder on the inverse of the graph (ie, flips the graph)
00310          */
00311         bool        establishRevDFTOrder();
00312 
00313         /*
00314          * Given a pointer to a basic block, return an index (e.g. 0 for the first basic block, 1 for the next, ... n-1
00315          * for the last BB.
00316          */
00317         int         pbbToIndex (PBB pBB);
00318 
00319         /*
00320          * Reset all the traversed flags.
00321          * To make this a useful public function, we need access to the traversed flag with other public functions.
00322         */
00323         void        unTraverse ( );
00324 
00325         /*
00326          * Return true if the CFG is well formed.
00327          */
00328         bool        isWellFormed ( );
00329 
00330         /*
00331          * Return true if there is a BB at the address given whose first RTL is an orphan, i.e. GetAddress() returns 0.
00332          */
00333         bool        isOrphan ( ADDRESS uAddr);
00334 
00335         /*
00336          * This is called where a two-way branch is deleted, thereby joining a two-way BB with it's successor.
00337          * This happens for example when transforming Intel floating point branches, and a branch on parity is deleted.
00338          * The joined BB becomes the type of the successor.
00339          * Returns true if succeeds.
00340          */
00341         bool        joinBB( PBB pb1, PBB pb2);
00342 
00343         /*
00344          * Completely remove a BB from the CFG.
00345          */
00346         void        removeBB( PBB bb);
00347 
00348         /*
00349          * Resets the DFA sets of all the BBs.
00350          */
00351         void        resetDFASets();
00352 
00353         /*
00354          * Add a call to the set of calls within this procedure.
00355          */
00356         void        addCall(CallStatement* call);
00357 
00358         /*
00359          * Get the set of calls within this procedure.
00360          */
00361         std::set<CallStatement*>& getCalls();
00362 
00363         /*
00364          * Replace all instances of search with replace.  Can be type sensitive if reqd
00365          */
00366         void        searchAndReplace(Exp* search, Exp* replace);
00367         bool        searchAll(Exp* search, std::list<Exp*> &result);
00368 
00369         /*
00370          * Set the return value for this CFG (assumes there is only one exit bb)
00371          */
00372         void        setReturnVal(Exp *e);
00373         Exp         *getReturnVal();
00374 
00375         /*
00376          * Structures the control flow graph
00377          */
00378         void        structure();
00379 
00380         /*
00381          * Add/Remove Junction statements
00382          */
00383         void        addJunctionStatements();
00384         void        removeJunctionStatements();
00385 
00386         /*
00387          * Resolves goto/branch destinations to statements
00388          * Good to do this late, as removing statements doesn't update this information.
00389          */
00390         void        resolveGotos();
00391 
00392         /*
00393          * Virtual Function Call analysis
00394          */
00395         void        virtualFunctionCalls(Prog* prog);
00396 
00397         std::vector<PBB> m_vectorBB; // faster access
00398 
00399         /* return a bb given an address */
00400         PBB         bbForAddr(ADDRESS addr) { return m_mapBB[addr]; }
00401 
00402         /* Simplify all the expressions in the CFG
00403          */
00404         void        simplify();
00405 
00406         /*
00407          * Change the BB enclosing stmt to be CALL, not COMPCALL
00408          */
00409         void        undoComputedBB(Statement* stmt);
00410 
00411 private:
00412 
00413         /*
00414          * Split the given basic block at the RTL associated with uNativeAddr. The first node's type becomes
00415          * fall-through and ends at the RTL prior to that associated with uNativeAddr. The second node's type becomes
00416          * the type of the original basic block (pBB), and its out-edges are those of the original basic block.
00417          * Precondition: assumes uNativeAddr is an address within the boundaries of the given basic block.
00418          * If pNewBB is non zero, it is retained as the "bottom" part of the split, i.e. splitBB just changes the "top"
00419          * BB to not overlap the existing one.
00420          * Returns a pointer to the "bottom" (new) part of the BB.
00421          */
00422         PBB         splitBB (PBB pBB, ADDRESS uNativeAddr, PBB pNewBB = 0, bool bDelRtls = false);
00423 
00424         /*
00425          * Completes the merge of pb1 and pb2 by adjusting out edges. No checks are made that the merge is valid
00426          * (hence this is a private function) Deletes pb1 if bDelete is true
00427          */
00428         void        completeMerge(PBB pb1, PBB pb2, bool bDelete);
00429 
00430         /*
00431          * checkEntryBB: emit error message if this pointer is null
00432          */
00433         bool        checkEntryBB();
00434 
00435 public:
00436         /*
00437          * Split the given BB at the RTL given, and turn it into the BranchStatement given. Sort out all the in and out
00438          * edges.
00439          */
00440         PBB         splitForBranch(PBB pBB, RTL* rtl, BranchStatement* br1, BranchStatement* br2, BB_IT& it);
00441 
00442         /*
00443          * Control flow analysis stuff, lifted from Doug Simon's honours thesis.
00444          */
00445         void        setTimeStamps();
00446         PBB         commonPDom(PBB curImmPDom, PBB succImmPDom);
00447         void        findImmedPDom();
00448         void        structConds();
00449         void        structLoops();
00450         void        checkConds();
00451         void        determineLoopType(PBB header, bool* &loopNodes);
00452         void        findLoopFollow(PBB header, bool* &loopNodes);
00453         void        tagNodesInLoop(PBB header, bool* &loopNodes);
00454 
00455         void        removeUnneededLabels(HLLCode *hll);
00456         void        generateDotFile(std::ofstream& of);
00457 
00458 
00459         /*
00460          * Get the entry-point or exit BB
00461          */
00462         PBB         getEntryBB() { return entryBB;}
00463         PBB         getExitBB()  { return exitBB;}
00464 
00465         /*
00466          * Set the entry-point BB (and exit BB as well)
00467          */
00468         void        setEntryBB(PBB bb);
00469         void        setExitBB(PBB bb);
00470 
00471         PBB         findRetNode();
00472 
00473         /*
00474          * Set an additional new out edge to a given value
00475          */
00476         void        addNewOutEdge(PBB fromBB, PBB newOutEdge);
00477 
00478         /*
00479          * print this cfg, mainly for debugging
00480          */
00481         void        print(std::ostream &out, bool html = false);
00482         void        printToLog();
00483         void        dump();             // Dump to std::cerr
00484         void        dumpImplicitMap();  // Dump the implicit map to std::cerr
00485 
00486         /*
00487          * Check for indirect jumps and calls. If any found, decode the extra code and return true
00488          */
00489         bool decodeIndirectJmp(UserProc* proc);
00490 
00491         /*
00492          * Implicit assignments
00493          */
00494         Statement* findImplicitAssign(Exp* x);          // Find or create an implicit assign for x
00495         Statement* findTheImplicitAssign(Exp* x);       // Find the existing implicit assign for x (if any)
00496         Statement* findImplicitParamAssign(Parameter* p);// Find exiting implicit assign for parameter p
00497         void    removeImplicitAssign(Exp* x);           // Remove an existing implicit assignment for x
00498         bool    implicitsDone()                         // True if implicits have been created
00499                     {return bImplicitsDone;}
00500         void    setImplicitsDone() {                    // Call when implicits have been created
00501                     bImplicitsDone = true; }
00502 
00503         void    findInterferences(ConnectionGraph& ig);
00504         void    appendBBs(std::list<PBB>& worklist, std::set<PBB>& workset);
00505 
00506         void removeUsedGlobals(std::set<Global*> &unusedGlobals);
00507 
00508 protected:
00509         void    addBB(PBB bb) { m_listBB.push_back(bb); }
00510         friend class XMLProgParser;
00511 };              /* Cfg */
00512 
00513 #endif

Generated on Tue Sep 19 21:18:15 2006 for Boomerang by  doxygen 1.4.6