00001 /* 00002 * Copyright (C) 1997-2005, The University of Queensland 00003 * Copyright (C) 2001, Sun Microsystems, Inc 00004 * Copyright (C) 2002, Trent Waddington 00005 * 00006 * See the file "LICENSE.TERMS" for information on usage and 00007 * redistribution of this file, and for a DISCLAIMER OF ALL 00008 * WARRANTIES. 00009 * 00010 */ 00011 00012 /*============================================================================== 00013 * FILE: cfg.h 00014 * OVERVIEW: Interface for a control flow graph, based on basic block nodes. 00015 *============================================================================*/ 00016 00017 /* 00018 * $Revision: 1.76 $ // 1.69.2.7 00019 * 18 Apr 02 - Mike: Mods for boomerang 00020 * 04 Dec 02 - Mike: Added isJmpZ 00021 */ 00022 00023 #ifndef _CFG_H_ 00024 #define _CFG_H_ 00025 00026 #include <stdio.h> // For FILE 00027 #include <list> 00028 #include <vector> 00029 #include <set> 00030 #include <map> 00031 #include <iostream> 00032 #include <string> 00033 00034 #if defined(_MSC_VER) 00035 #pragma warning(disable:4290) 00036 #endif 00037 00038 #include "types.h" 00039 #include "exphelp.h" // For lessExpStar 00040 #include "basicblock.h" // For the BB nodes 00041 #include "dataflow.h" // For embedded class DataFlow 00042 00043 #define DEBUG_LIVENESS (Boomerang::get()->debugLiveness) 00044 00045 class Proc; 00046 class Prog; 00047 class UserProc; 00048 class UseSet; 00049 class LocationSet; 00050 class SSACounts; 00051 class BinaryFile; 00052 class BasicBlock; 00053 class HLLCode; 00054 class CallStatement; 00055 class BranchStatement; 00056 class RTL; 00057 struct DOM; 00058 class XMLProgParser; 00059 class Global; 00060 class Parameter; 00061 00062 #define BTHEN 0 00063 #define BELSE 1 00064 00065 00066 00067 // A type for the ADDRESS to BB map 00068 typedef std::map<ADDRESS, PBB, std::less<ADDRESS> > MAPBB; 00069 00070 /*============================================================================== 00071 * Control Flow Graph class. Contains all the BasicBlock objects for a procedure. These BBs contain all the RTLs for 00072 * the procedure, so by traversing the Cfg, one traverses the whole procedure. 00073 *============================================================================*/ 00074 class Cfg { 00075 /* 00076 * Pointer to the UserProc object that contains this CFG object 00077 */ 00078 UserProc* myProc; 00079 00080 /* 00081 * The list of pointers to BBs. 00082 */ 00083 std::list<PBB> m_listBB; 00084 00085 /* 00086 * Ordering of BBs for control flow structuring 00087 */ 00088 std::vector<PBB> Ordering; 00089 std::vector<PBB> revOrdering; 00090 00091 /* 00092 * The ADDRESS to PBB map. 00093 */ 00094 MAPBB m_mapBB; 00095 00096 /* 00097 * The entry and exit BBs. 00098 */ 00099 BasicBlock* entryBB; 00100 BasicBlock* exitBB; 00101 00102 /* 00103 * True if well formed. 00104 */ 00105 bool m_bWellFormed, structured; 00106 00107 /* 00108 * Set of the call instructions in this procedure. 00109 */ 00110 std::set<CallStatement*> callSites; 00111 00112 /* 00113 * Last label (positive integer) used by any BB this Cfg 00114 */ 00115 int lastLabel; 00116 00117 /* 00118 * Map from expression to implicit assignment. The purpose is to prevent multiple implicit assignments for 00119 * the same location. 00120 */ 00121 00122 std::map<Exp*, Statement*, lessExpStar> implicitMap; 00123 00124 bool bImplicitsDone; // True when the implicits are done; they can cause problems (e.g. with 00125 // ad-hoc global assignment) 00126 00127 public: 00128 /* 00129 * Constructor. 00130 */ 00131 Cfg(); 00132 00133 /* 00134 * Destructor. 00135 */ 00136 ~Cfg(); 00137 00138 /* 00139 * Set the pointer to the owning UserProc object 00140 */ 00141 void setProc(UserProc* proc); 00142 00143 /* 00144 * clear this CFG of all basic blocks, ready for decode 00145 */ 00146 void clear(); 00147 00148 /* 00149 * Get the number of BBs 00150 */ 00151 unsigned getNumBBs() {return m_listBB.size();} 00152 00153 /* 00154 * Equality operator. 00155 */ 00156 const Cfg& operator=(const Cfg& other); /* Copy constructor */ 00157 00158 class BBAlreadyExistsError : public std::exception { 00159 public: 00160 PBB pBB; 00161 BBAlreadyExistsError(PBB pBB) : pBB(pBB) { } 00162 }; 00163 00164 /* 00165 * Checks to see if the address associated with pRtls is already in the map as an incomplete BB; if so, it is 00166 * completed now and a pointer to that BB is returned. Otherwise, allocates memory for a new basic block node, 00167 * initializes its list of RTLs with pRtls, its type to the given type, and allocates enough space to hold 00168 * pointers to the out-edges (based on given numOutEdges). 00169 * The native address associated with the start of the BB is taken from pRtls, and added to the map (unless 0). 00170 * NOTE: You cannot assume that the returned BB will have the RTL associated with pStart as its first RTL, since 00171 * the BB could be split. You can however assume that the returned BB is suitable for adding out edges (i.e. if 00172 * the BB is split, you get the "bottom" part of the BB, not the "top" (with lower addresses at the "top"). 00173 * Returns NULL if not successful, or if there already exists a completed BB at this address (this can happen 00174 * with certain kinds of forward branches). 00175 */ 00176 PBB newBB ( std::list<RTL*>* pRtls, BBTYPE bbType, int iNumOutEdges) throw(BBAlreadyExistsError); 00177 00178 /* 00179 * Allocates space for a new, incomplete BB, and the given address is added to the map. This BB will have to be 00180 * completed before calling WellFormCfg. This function will commonly be called via AddOutEdge() 00181 */ 00182 PBB newIncompleteBB(ADDRESS addr); 00183 00184 /* 00185 * Remove the incomplete BB at uAddr, if any. Was used when dealing with the SKIP instruction, but no longer. 00186 */ 00187 void removeIncBB(ADDRESS uAddr); 00188 00189 /* 00190 * Adds an out-edge to the basic block pBB by filling in the first slot that is empty. Note: an address is 00191 * given here; the out edge will be filled in as a pointer to a BB. An incomplete BB will be created if 00192 * required. If bSetLabel is true, the destination BB will have its "label required" bit set. 00193 */ 00194 void addOutEdge(PBB pBB, ADDRESS adr, bool bSetLabel = false); 00195 00196 /* 00197 * Adds an out-edge to the basic block pBB by filling in the first slot that is empty. Note: a pointer to a BB 00198 * is given here. 00199 */ 00200 void addOutEdge(PBB pBB, PBB pDestBB, bool bSetLabel = false); 00201 00202 /* 00203 * Add a label for the given basicblock. The label number must be a non-zero integer 00204 */ 00205 void setLabel(PBB pBB); 00206 00207 /* 00208 * Gets a pointer to the first BB this cfg. Also initialises `it' so that calling GetNextBB will return the 00209 * second BB, etc. Also, *it is the first BB. Returns 0 if there are no BBs this CFG. 00210 */ 00211 PBB getFirstBB(BB_IT& it); 00212 00213 /* 00214 * Gets a pointer to the next BB this cfg. `it' must be from a call to GetFirstBB(), or from a subsequent call 00215 * to GetNextBB(). Also, *it is the current BB. Returns 0 if there are no more BBs this CFG. 00216 */ 00217 PBB getNextBB(BB_IT& it); 00218 00219 /* 00220 * An alternative to the above is to use begin() and end(): 00221 */ 00222 typedef BB_IT iterator; 00223 iterator begin() {return m_listBB.begin();} 00224 iterator end() {return m_listBB.end();} 00225 00226 00227 /* 00228 * Checks whether the given native address is a label (explicit or non explicit) or not. Explicit labels are 00229 * addresses that have already been tagged as being labels due to transfers of control to that address. 00230 * Non explicit labels are those that belong to basic blocks that have already been constructed (i.e. have 00231 * previously been parsed) and now need to be made explicit labels. In the case of non explicit labels, the 00232 * basic block is split into two and types and edges are adjusted accordingly. pNewBB is set to the lower part 00233 * of the split BB. 00234 * Returns true if the native address is that of an explicit or non explicit label, false otherwise. 00235 */ 00236 bool label ( ADDRESS uNativeAddr, PBB& pNewBB ); 00237 00238 /* 00239 * Checks whether the given native address is in the map. If not, returns false. If so, returns true if it is 00240 * incomplete. Otherwise, returns false. 00241 */ 00242 bool isIncomplete ( ADDRESS uNativeAddr ); 00243 00244 /* 00245 * Just checks to see if there exists a BB starting with this native address. If not, the address is NOT added 00246 * to the map of labels to BBs. 00247 */ 00248 bool existsBB ( ADDRESS uNativeAddr ); 00249 00250 /* 00251 * Sorts the BBs in the CFG according to the low address of each BB. Useful because it makes printouts easier, 00252 * if they used iterators to traverse the list of BBs. 00253 */ 00254 void sortByAddress (); 00255 00256 /* 00257 * Sorts the BBs in the CFG by their first DFT numbers. 00258 */ 00259 void sortByFirstDFT(); 00260 00261 /* 00262 * Sorts the BBs in the CFG by their last DFT numbers. 00263 */ 00264 void sortByLastDFT(); 00265 00266 /* 00267 * Updates m_vectorBB to m_listBB 00268 */ 00269 void updateVectorBB(); 00270 00271 /* 00272 * Transforms the input machine-dependent cfg, which has ADDRESS labels for each out-edge, into a machine- 00273 * independent cfg graph (i.e. a well-formed graph) which has references to basic blocks for each out-edge. 00274 * Returns false if not successful. 00275 */ 00276 bool wellFormCfg ( ); 00277 00278 /* 00279 * Given two basic blocks that belong to a well-formed graph, merges the second block onto the first one and 00280 * returns the new block. The in and out edges links are updated accordingly. 00281 * Note that two basic blocks can only be merged if each has a unique out-edge and in-edge respectively, and 00282 * these edges correspond to each other. 00283 * Returns true if the blocks are merged. 00284 */ 00285 bool mergeBBs ( PBB pb1, PBB pb2 ); 00286 00287 00288 /* 00289 * Given a well-formed cfg graph, optimizations are performed on the graph to reduce the number of basic blocks 00290 * and edges. 00291 * Optimizations performed are: removal of branch chains (i.e. jumps to jumps), removal of redundant jumps (i.e. 00292 * jump to the next instruction), merge basic blocks where possible, and remove redundant basic blocks created 00293 * by the previous optimizations. 00294 * Returns false if not successful. 00295 */ 00296 bool compressCfg ( ); 00297 00298 00299 /* 00300 * Given a well-formed cfg graph, a partial ordering is established between the nodes. The ordering is based on 00301 * the final visit to each node during a depth first traversal such that if node n1 was visited for the last 00302 * time before node n2 was visited for the last time, n1 will be less than n2. 00303 * The return value indicates if all nodes where ordered. This will not be the case for incomplete CFGs (e.g. 00304 * switch table not completely recognised) or where there are nodes unreachable from the entry node. 00305 */ 00306 bool establishDFTOrder(); 00307 00308 /* 00309 * Performs establishDFTOrder on the inverse of the graph (ie, flips the graph) 00310 */ 00311 bool establishRevDFTOrder(); 00312 00313 /* 00314 * Given a pointer to a basic block, return an index (e.g. 0 for the first basic block, 1 for the next, ... n-1 00315 * for the last BB. 00316 */ 00317 int pbbToIndex (PBB pBB); 00318 00319 /* 00320 * Reset all the traversed flags. 00321 * To make this a useful public function, we need access to the traversed flag with other public functions. 00322 */ 00323 void unTraverse ( ); 00324 00325 /* 00326 * Return true if the CFG is well formed. 00327 */ 00328 bool isWellFormed ( ); 00329 00330 /* 00331 * Return true if there is a BB at the address given whose first RTL is an orphan, i.e. GetAddress() returns 0. 00332 */ 00333 bool isOrphan ( ADDRESS uAddr); 00334 00335 /* 00336 * This is called where a two-way branch is deleted, thereby joining a two-way BB with it's successor. 00337 * This happens for example when transforming Intel floating point branches, and a branch on parity is deleted. 00338 * The joined BB becomes the type of the successor. 00339 * Returns true if succeeds. 00340 */ 00341 bool joinBB( PBB pb1, PBB pb2); 00342 00343 /* 00344 * Completely remove a BB from the CFG. 00345 */ 00346 void removeBB( PBB bb); 00347 00348 /* 00349 * Resets the DFA sets of all the BBs. 00350 */ 00351 void resetDFASets(); 00352 00353 /* 00354 * Add a call to the set of calls within this procedure. 00355 */ 00356 void addCall(CallStatement* call); 00357 00358 /* 00359 * Get the set of calls within this procedure. 00360 */ 00361 std::set<CallStatement*>& getCalls(); 00362 00363 /* 00364 * Replace all instances of search with replace. Can be type sensitive if reqd 00365 */ 00366 void searchAndReplace(Exp* search, Exp* replace); 00367 bool searchAll(Exp* search, std::list<Exp*> &result); 00368 00369 /* 00370 * Set the return value for this CFG (assumes there is only one exit bb) 00371 */ 00372 void setReturnVal(Exp *e); 00373 Exp *getReturnVal(); 00374 00375 /* 00376 * Structures the control flow graph 00377 */ 00378 void structure(); 00379 00380 /* 00381 * Add/Remove Junction statements 00382 */ 00383 void addJunctionStatements(); 00384 void removeJunctionStatements(); 00385 00386 /* 00387 * Resolves goto/branch destinations to statements 00388 * Good to do this late, as removing statements doesn't update this information. 00389 */ 00390 void resolveGotos(); 00391 00392 /* 00393 * Virtual Function Call analysis 00394 */ 00395 void virtualFunctionCalls(Prog* prog); 00396 00397 std::vector<PBB> m_vectorBB; // faster access 00398 00399 /* return a bb given an address */ 00400 PBB bbForAddr(ADDRESS addr) { return m_mapBB[addr]; } 00401 00402 /* Simplify all the expressions in the CFG 00403 */ 00404 void simplify(); 00405 00406 /* 00407 * Change the BB enclosing stmt to be CALL, not COMPCALL 00408 */ 00409 void undoComputedBB(Statement* stmt); 00410 00411 private: 00412 00413 /* 00414 * Split the given basic block at the RTL associated with uNativeAddr. The first node's type becomes 00415 * fall-through and ends at the RTL prior to that associated with uNativeAddr. The second node's type becomes 00416 * the type of the original basic block (pBB), and its out-edges are those of the original basic block. 00417 * Precondition: assumes uNativeAddr is an address within the boundaries of the given basic block. 00418 * If pNewBB is non zero, it is retained as the "bottom" part of the split, i.e. splitBB just changes the "top" 00419 * BB to not overlap the existing one. 00420 * Returns a pointer to the "bottom" (new) part of the BB. 00421 */ 00422 PBB splitBB (PBB pBB, ADDRESS uNativeAddr, PBB pNewBB = 0, bool bDelRtls = false); 00423 00424 /* 00425 * Completes the merge of pb1 and pb2 by adjusting out edges. No checks are made that the merge is valid 00426 * (hence this is a private function) Deletes pb1 if bDelete is true 00427 */ 00428 void completeMerge(PBB pb1, PBB pb2, bool bDelete); 00429 00430 /* 00431 * checkEntryBB: emit error message if this pointer is null 00432 */ 00433 bool checkEntryBB(); 00434 00435 public: 00436 /* 00437 * Split the given BB at the RTL given, and turn it into the BranchStatement given. Sort out all the in and out 00438 * edges. 00439 */ 00440 PBB splitForBranch(PBB pBB, RTL* rtl, BranchStatement* br1, BranchStatement* br2, BB_IT& it); 00441 00442 /* 00443 * Control flow analysis stuff, lifted from Doug Simon's honours thesis. 00444 */ 00445 void setTimeStamps(); 00446 PBB commonPDom(PBB curImmPDom, PBB succImmPDom); 00447 void findImmedPDom(); 00448 void structConds(); 00449 void structLoops(); 00450 void checkConds(); 00451 void determineLoopType(PBB header, bool* &loopNodes); 00452 void findLoopFollow(PBB header, bool* &loopNodes); 00453 void tagNodesInLoop(PBB header, bool* &loopNodes); 00454 00455 void removeUnneededLabels(HLLCode *hll); 00456 void generateDotFile(std::ofstream& of); 00457 00458 00459 /* 00460 * Get the entry-point or exit BB 00461 */ 00462 PBB getEntryBB() { return entryBB;} 00463 PBB getExitBB() { return exitBB;} 00464 00465 /* 00466 * Set the entry-point BB (and exit BB as well) 00467 */ 00468 void setEntryBB(PBB bb); 00469 void setExitBB(PBB bb); 00470 00471 PBB findRetNode(); 00472 00473 /* 00474 * Set an additional new out edge to a given value 00475 */ 00476 void addNewOutEdge(PBB fromBB, PBB newOutEdge); 00477 00478 /* 00479 * print this cfg, mainly for debugging 00480 */ 00481 void print(std::ostream &out, bool html = false); 00482 void printToLog(); 00483 void dump(); // Dump to std::cerr 00484 void dumpImplicitMap(); // Dump the implicit map to std::cerr 00485 00486 /* 00487 * Check for indirect jumps and calls. If any found, decode the extra code and return true 00488 */ 00489 bool decodeIndirectJmp(UserProc* proc); 00490 00491 /* 00492 * Implicit assignments 00493 */ 00494 Statement* findImplicitAssign(Exp* x); // Find or create an implicit assign for x 00495 Statement* findTheImplicitAssign(Exp* x); // Find the existing implicit assign for x (if any) 00496 Statement* findImplicitParamAssign(Parameter* p);// Find exiting implicit assign for parameter p 00497 void removeImplicitAssign(Exp* x); // Remove an existing implicit assignment for x 00498 bool implicitsDone() // True if implicits have been created 00499 {return bImplicitsDone;} 00500 void setImplicitsDone() { // Call when implicits have been created 00501 bImplicitsDone = true; } 00502 00503 void findInterferences(ConnectionGraph& ig); 00504 void appendBBs(std::list<PBB>& worklist, std::set<PBB>& workset); 00505 00506 void removeUsedGlobals(std::set<Global*> &unusedGlobals); 00507 00508 protected: 00509 void addBB(PBB bb) { m_listBB.push_back(bb); } 00510 friend class XMLProgParser; 00511 }; /* Cfg */ 00512 00513 #endif