proc.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 1997-2001, The University of Queensland
00003  * Copyright (C) 2000-2001, Sun Microsystems, Inc
00004  * Copyright (C) 2002-2006, Trent Waddington and Mike Van Emmerik
00005  *
00006  * See the file "LICENSE.TERMS" for information on usage and
00007  * redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
00008  *
00009  */
00010 
00011 /*==============================================================================
00012  * FILE:       proc.cc
00013  * OVERVIEW:   Implementation of the Proc hierachy (Proc, UserProc, LibProc).
00014  *             All aspects of a procedure, apart from the actual code in the
00015  *             Cfg, are stored here
00016  *
00017  * Copyright (C) 1997-2001, The University of Queensland, BT group
00018  * Copyright (C) 2000-2001, Sun Microsystems, Inc
00019  *============================================================================*/
00020 
00021 /*
00022  * $Revision: 1.346 $   // 1.238.2.44
00023  *
00024  * 14 Mar 02 - Mike: Fixed a problem caused with 16-bit pushes in richards2
00025  * 20 Apr 02 - Mike: Mods for boomerang
00026  * 31 Jan 03 - Mike: Tabs and indenting
00027  * 03 Feb 03 - Mike: removeStatement no longer linear searches for the BB
00028  * 13 Jul 05 - Mike: Fixed a segfault in processDecodedICTs with zero lentgth (!) BBs. Also one in the ad-hoc TA
00029  * 08 Mar 06 - Mike: fixed use of invalidated iterator in set/map::erase() (thanks, tamlin!)
00030  */
00031 
00032 /*==============================================================================
00033  * Dependencies.
00034  *============================================================================*/
00035 
00036 #include "proc.h"
00037 #include <types.h>
00038 #include <sstream>
00039 #include <algorithm>        // For find()
00040 #include "type.h"
00041 #include "cluster.h"
00042 #include "statement.h"
00043 #include "register.h"
00044 #include "rtl.h"
00045 #include "prog.h"
00046 #include "BinaryFile.h"
00047 #include "frontend.h"
00048 #include "util.h"
00049 #include "signature.h"
00050 #include "boomerang.h"
00051 #include "constraint.h"
00052 #include "visitor.h"
00053 #include "log.h"
00054 #include <iomanip>          // For std::setw etc
00055 #include <sstream>
00056 
00057 #ifdef _WIN32
00058 #undef NO_ADDRESS
00059 #include <windows.h>
00060 #ifndef __MINGW32__
00061 namespace dbghelp {
00062 #include <dbghelp.h>
00063 };
00064 #endif
00065 #undef NO_ADDRESS
00066 #define NO_ADDRESS ((ADDRESS)-1)
00067 #endif
00068 #if defined(_MSC_VER) && _MSC_VER >= 1400
00069 #pragma warning(disable:4996)       // Warnings about e.g. _strdup deprecated in VS 2005
00070 #endif
00071 
00072 typedef std::map<Statement*, int> RefCounter;
00073 
00074 extern char debug_buffer[];     // Defined in basicblock.cpp, size DEBUG_BUFSIZE
00075 
00076 /************************
00077  * Proc methods.
00078  ***********************/
00079 
00080 Proc::~Proc()
00081 {}
00082 
00083 /*==============================================================================
00084  * FUNCTION:        Proc::Proc
00085  * OVERVIEW:        Constructor with name, native address.
00086  * PARAMETERS:      uNative - Native address of entry point of procedure
00087  * RETURNS:         <nothing>
00088  *============================================================================*/
00089 Proc::Proc(Prog *prog, ADDRESS uNative, Signature *sig)
00090      : prog(prog), signature(sig), address(uNative), m_firstCaller(NULL) 
00091 {
00092     if (sig)
00093         cluster = prog->getDefaultCluster(sig->getName());
00094     else
00095         cluster = prog->getRootCluster();
00096 }
00097 
00098 /*==============================================================================
00099  * FUNCTION:        Proc::getName
00100  * OVERVIEW:        Returns the name of this procedure
00101  * PARAMETERS:      <none>
00102  * RETURNS:         the name of this procedure
00103  *============================================================================*/
00104 const char* Proc::getName() {
00105     assert(signature);
00106     return signature->getName();
00107 }
00108 
00109 /*==============================================================================
00110  * FUNCTION:        Proc::setName
00111  * OVERVIEW:        Sets the name of this procedure
00112  * PARAMETERS:      new name
00113  * RETURNS:         <nothing>
00114  *============================================================================*/
00115 void Proc::setName(const char *nam) {
00116     assert(signature);
00117     signature->setName(nam);
00118 }
00119 
00120 
00121 /*==============================================================================
00122  * FUNCTION:        Proc::getNativeAddress
00123  * OVERVIEW:        Get the native address (entry point).
00124  * PARAMETERS:      <none>
00125  * RETURNS:         the native address of this procedure (entry point)
00126  *============================================================================*/
00127 ADDRESS Proc::getNativeAddress() {
00128     return address;
00129 }
00130 
00131 void Proc::setNativeAddress(ADDRESS a) {
00132     address = a;
00133 }
00134 
00135 bool LibProc::isNoReturn()
00136 {
00137     return FrontEnd::noReturnCallDest(getName());
00138 }
00139 
00140 bool UserProc::isNoReturn()
00141 {
00142     // undecoded procs are assumed to always return (and define everything)
00143     if (!this->isDecoded())
00144         return false;
00145 
00146     PBB exitbb = cfg->getExitBB();
00147     if (exitbb == NULL)
00148         return true;
00149     if (exitbb->getNumInEdges() == 1) {
00150         Statement *s = exitbb->getInEdges()[0]->getLastStmt();
00151         CallStatement *call = (CallStatement*)s;
00152         if (s->isCall() && call->getDestProc() && call->getDestProc()->isNoReturn())
00153             return true;
00154     }
00155     return false;
00156 }
00157 
00158 /*==============================================================================
00159  * FUNCTION:      Proc::containsAddr
00160  * OVERVIEW:      Return true if this procedure contains the given address
00161  * PARAMETERS:    address
00162  * RETURNS:       true if it does
00163  *============================================================================*/
00164 bool UserProc::containsAddr(ADDRESS uAddr) {
00165     BB_IT it;
00166     for (PBB bb = cfg->getFirstBB(it); bb; bb = cfg->getNextBB(it))
00167         if (bb->getRTLs() && bb->getLowAddr() <= uAddr && bb->getHiAddr() >= uAddr)
00168             return true;    
00169     return false;
00170 }
00171 
00172 void Proc::renameParam(const char *oldName, const char *newName) { 
00173     signature->renameParam(oldName, newName); 
00174 }
00175 
00176 void UserProc::renameParam(const char *oldName, const char *newName)
00177 {
00178     oldName = strdup(oldName);
00179     Proc::renameParam(oldName, newName);
00180     //cfg->searchAndReplace(Location::param(strdup(oldName), this), Location::param(strdup(newName), this));
00181 }
00182 
00183 void UserProc::setParamType(const char* nam, Type* ty) {
00184     signature->setParamType(nam, ty);
00185 }
00186 
00187 void UserProc::setParamType(int idx, Type* ty) {
00188     int n = 0;
00189     StatementList::iterator it;
00190     for (it = parameters.begin(); n != idx && it != parameters.end(); it++, n++)
00191         ;
00192     if (it != parameters.end()) {
00193         Assign *a = (Assign*)*it;
00194         a->setType(ty);
00195         // Sometimes the signature isn't up to date with the latest parameters
00196         signature->setParamType(a->getLeft(), ty);
00197     }
00198 }
00199 
00200 void UserProc::renameLocal(const char *oldName, const char *newName) {
00201     Type *ty = locals[oldName];
00202     Exp *oldExp = expFromSymbol(oldName);
00203     locals.erase(oldName);
00204     Exp *oldLoc = getSymbolFor(oldExp, ty);
00205     Exp *newLoc = Location::local(strdup(newName), this);
00206     mapSymbolToRepl(oldExp, oldLoc, newLoc);
00207     locals[strdup(newName)] = ty;
00208     cfg->searchAndReplace(oldLoc, newLoc);
00209 }
00210 
00211 bool UserProc::searchAll(Exp* search, std::list<Exp*> &result)
00212 {
00213     return cfg->searchAll(search, result);  
00214 }
00215 
00216 void Proc::printCallGraphXML(std::ostream &os, int depth, bool recurse)
00217 {
00218     if (!DUMP_XML)
00219         return;
00220     visited = true;
00221     for (int i = 0; i < depth; i++)
00222         os << "   ";
00223     os << "<proc name=\"" << getName() << "\"/>\n";
00224 }
00225 
00226 void UserProc::printCallGraphXML(std::ostream &os, int depth, bool recurse)
00227 {
00228     if (!DUMP_XML)
00229         return;
00230     bool wasVisited = visited;
00231     visited = true;
00232     int i;
00233     for (i = 0; i < depth; i++)
00234         os << "   ";
00235     os << "<proc name=\"" << getName() << "\">\n";
00236     if (recurse) {
00237         for (std::list<Proc*>::iterator it = calleeList.begin(); it != calleeList.end(); it++) 
00238             (*it)->printCallGraphXML(os, depth+1, !wasVisited && !(*it)->isVisited());
00239     }
00240     for (i = 0; i < depth; i++)
00241         os << "   ";
00242     os << "</proc>\n";
00243 }
00244 
00245 void Proc::printDetailsXML() {
00246     if (!DUMP_XML)
00247         return;
00248     std::ofstream out((Boomerang::get()->getOutputPath() + getName() + "-details.xml").c_str());
00249     out << "<proc name=\"" << getName() << "\">\n";
00250     unsigned i;
00251     for (i = 0; i < signature->getNumParams(); i++)
00252         out << "   <param name=\"" << signature->getParamName(i) << "\" " << "exp=\"" << signature->getParamExp(i)
00253             << "\" " << "type=\"" << signature->getParamType(i)->getCtype() << "\"\n";
00254     for (i = 0; i < signature->getNumReturns(); i++)
00255         out << "   <return exp=\"" << signature->getReturnExp(i) << "\" "
00256             << "type=\"" << signature->getReturnType(i)->getCtype() << "\"/>\n";
00257     out << "</proc>\n";
00258     out.close();
00259 }
00260 
00261 void UserProc::printDecodedXML()
00262 {
00263     if (!DUMP_XML)
00264         return;
00265     std::ofstream out((Boomerang::get()->getOutputPath() + getName() + "-decoded.xml").c_str());
00266     out << "<proc name=\"" << getName() << "\">\n";
00267     out << "    <decoded>\n";
00268     std::ostringstream os;
00269     print(os);
00270     std::string s = os.str();
00271     escapeXMLChars(s);
00272     out << s;
00273     out << "    </decoded>\n";
00274     out << "</proc>\n";
00275     out.close();
00276 }
00277 
00278 void UserProc::printAnalysedXML()
00279 {
00280     if (!DUMP_XML)
00281         return;
00282     std::ofstream out((Boomerang::get()->getOutputPath() + getName() + "-analysed.xml").c_str());
00283     out << "<proc name=\"" << getName() << "\">\n";
00284     out << "    <analysed>\n";
00285     std::ostringstream os;
00286     print(os);
00287     std::string s = os.str();
00288     escapeXMLChars(s);
00289     out << s;
00290     out << "    </analysed>\n";
00291     out << "</proc>\n";
00292     out.close();
00293 }
00294 
00295 void UserProc::printSSAXML()
00296 {
00297     if (!DUMP_XML)
00298         return;
00299     std::ofstream out((Boomerang::get()->getOutputPath() + getName() + "-ssa.xml").c_str());
00300     out << "<proc name=\"" << getName() << "\">\n";
00301     out << "    <ssa>\n";
00302     std::ostringstream os;
00303     print(os);
00304     std::string s = os.str();
00305     escapeXMLChars(s);
00306     out << s;
00307     out << "    </ssa>\n";
00308     out << "</proc>\n";
00309     out.close();
00310 }
00311 
00312 
00313 void UserProc::printXML()
00314 {
00315     if (!DUMP_XML)
00316         return;
00317     printDetailsXML();
00318     printSSAXML();
00319     prog->printCallGraphXML();
00320     printUseGraph();
00321 }
00322 
00323 void UserProc::printUseGraph()
00324 {
00325     std::ofstream out((Boomerang::get()->getOutputPath() + getName() + "-usegraph.dot").c_str());
00326     out << "digraph " << getName() << " {\n";
00327     StatementList stmts;
00328     getStatements(stmts);
00329     StatementList::iterator it;
00330     for (it = stmts.begin(); it != stmts.end(); it++) {
00331         Statement* s = *it;
00332         if (s->isPhi())
00333             out << s->getNumber() << " [shape=diamond];\n";
00334         LocationSet refs;
00335         s->addUsedLocs(refs);
00336         LocationSet::iterator rr;
00337         for (rr = refs.begin(); rr != refs.end(); rr++) {
00338             if (((Exp*)*rr)->isSubscript()) {
00339                 RefExp *r = (RefExp*)*rr;
00340                 if (r->getDef())
00341                     out << r->getDef()->getNumber() << " -> " << s->getNumber() << ";\n";
00342             }
00343         }
00344     }
00345     out << "}\n";
00346     out.close();
00347 }
00348 
00349 /*==============================================================================
00350  * FUNCTION:        operator<<
00351  * OVERVIEW:        Output operator for a Proc object.
00352  * PARAMETERS:      os - output stream
00353  *                  proc -
00354  * RETURNS:         os
00355  *============================================================================*/
00356 //std::ostream& operator<<(std::ostream& os, Proc& proc) {
00357 //  return proc.put(os);
00358 //}
00359 
00360 
00361 Proc *Proc::getFirstCaller() { 
00362     if (m_firstCaller == NULL && m_firstCallerAddr != NO_ADDRESS) {
00363         m_firstCaller = prog->findProc(m_firstCallerAddr);
00364         m_firstCallerAddr = NO_ADDRESS;
00365     }
00366 
00367     return m_firstCaller; 
00368 }
00369 
00370 /**********************
00371  * LibProc methods.
00372  *********************/
00373 
00374 /*==============================================================================
00375  * FUNCTION:        LibProc::LibProc
00376  * OVERVIEW:        Constructor with name, native address.
00377  * PARAMETERS:      name - Name of procedure
00378  *                  uNative - Native address of entry point of procedure
00379  * RETURNS:         <nothing>
00380  *============================================================================*/
00381 LibProc::LibProc(Prog *prog, std::string& name, ADDRESS uNative) : Proc(prog, uNative, NULL) {
00382     Signature* sig = prog->getLibSignature(name.c_str());
00383     signature = sig;
00384 }
00385 
00386 LibProc::~LibProc()
00387 {}
00388 
00389 /*==============================================================================
00390  * FUNCTION:        LibProc::put
00391  * OVERVIEW:        Display on os.
00392  * PARAMETERS:      os -
00393  * RETURNS:         os
00394  *============================================================================*/
00395 //std::ostream& LibProc::put(std::ostream& os) {
00396 //  os << "library procedure `" << signature->getName() << "' resides at 0x";
00397 //  return os << std::hex << address << std::endl;
00398 //}
00399 
00400 Exp *LibProc::getProven(Exp *left) {
00401     // Just use the signature information (all we have, after all)
00402     return signature->getProven(left);
00403 }
00404 
00405 bool LibProc::isPreserved(Exp* e) {
00406     return signature->isPreserved(e);
00407 }
00408 
00409 /**********************
00410  * UserProc methods.
00411  *********************/
00412 
00413 /*==============================================================================
00414  * FUNCTION:        UserProc::UserProc
00415  * OVERVIEW:        Constructor with name, native address.
00416  * PARAMETERS:      name - Name of procedure
00417  *                  uNative - Native address of entry point of procedure
00418  * RETURNS:         <nothing>
00419  *============================================================================*/
00420 UserProc::UserProc() : Proc(), cfg(NULL), status(PROC_UNDECODED),
00421         // decoded(false), analysed(false),
00422         nextLocal(0), nextParam(0), // decompileSeen(false), decompiled(false), isRecursive(false)
00423         cycleGrp(NULL), theReturnStatement(NULL) {
00424     localTable.setProc(this);
00425 }
00426 UserProc::UserProc(Prog *prog, std::string& name, ADDRESS uNative) :
00427         // Not quite ready for the below fix:
00428         // Proc(prog, uNative, prog->getDefaultSignature(name.c_str())),
00429         Proc(prog, uNative, new Signature(name.c_str())),
00430         cfg(new Cfg()), status(PROC_UNDECODED),
00431         nextLocal(0),  nextParam(0),// decompileSeen(false), decompiled(false), isRecursive(false),
00432         cycleGrp(NULL), theReturnStatement(NULL), DFGcount(0)
00433 {
00434     cfg->setProc(this);              // Initialise cfg.myProc
00435     localTable.setProc(this);
00436 }
00437 
00438 UserProc::~UserProc() {
00439     if (cfg)
00440         delete cfg; 
00441 }
00442 
00443 /*==============================================================================
00444  * FUNCTION:        UserProc::deleteCFG
00445  * OVERVIEW:        Deletes the whole CFG for this proc object. Also clears the
00446  *                  cfg pointer, to prevent strange errors after this is called
00447  * PARAMETERS:      <none>
00448  * RETURNS:         <nothing>
00449  *============================================================================*/
00450 void UserProc::deleteCFG() {
00451     delete cfg;
00452     cfg = NULL;
00453 }
00454 
00455 class lessEvaluate : public std::binary_function<SyntaxNode*, SyntaxNode*, bool> {
00456 public:
00457     bool operator()(const SyntaxNode* x, const SyntaxNode* y) const
00458     {
00459         return ((SyntaxNode*)x)->getScore() > ((SyntaxNode*)y)->getScore();
00460     }
00461 };
00462 
00463 SyntaxNode *UserProc::getAST()
00464 {
00465     int numBBs = 0;
00466     BlockSyntaxNode *init = new BlockSyntaxNode();
00467     BB_IT it;
00468     for (PBB bb = cfg->getFirstBB(it); bb; bb = cfg->getNextBB(it)) {
00469         BlockSyntaxNode *b = new BlockSyntaxNode();
00470         b->setBB(bb);
00471         init->addStatement(b);
00472         numBBs++;
00473     }
00474     
00475     // perform a best first search for the nicest AST
00476     std::priority_queue<SyntaxNode*, std::vector<SyntaxNode*>, lessEvaluate > ASTs;
00477     ASTs.push(init);
00478 
00479     SyntaxNode *best = init;
00480     int best_score = init->getScore();
00481     int count = 0;
00482     while (ASTs.size()) {
00483         if (best_score < numBBs * 2) {
00484             LOG << "exit early: " << best_score << "\n";
00485             break;
00486         }
00487 
00488         SyntaxNode *top = ASTs.top();
00489         ASTs.pop();
00490         int score = top->evaluate(top);
00491 
00492         printAST(top); // debug
00493 
00494         if (score < best_score) {
00495             if (best && top != best)
00496                 delete best;
00497             best = top;
00498             best_score = score;
00499         }
00500 
00501         count++;
00502         if (count > 100)
00503             break;
00504 
00505         // add successors
00506         std::vector<SyntaxNode*> successors;
00507         top->addSuccessors(top, successors);
00508         for (unsigned i = 0; i < successors.size(); i++) {
00509             //successors[i]->addToScore(top->getScore());   // uncomment for A*
00510             successors[i]->addToScore(successors[i]->getDepth()); // or this
00511             ASTs.push(successors[i]);
00512         }
00513 
00514         if (top != best)
00515             delete top;
00516     }
00517 
00518     // clean up memory
00519     while(ASTs.size()) {
00520         SyntaxNode *top = ASTs.top();
00521         ASTs.pop();
00522         if (top != best)
00523             delete top;
00524     }
00525     
00526     return best;
00527 }
00528 
00529 int count = 1;
00530 
00531 void UserProc::printAST(SyntaxNode *a)
00532 {
00533     char s[1024];
00534     if (a == NULL)
00535         a = getAST();
00536     sprintf(s, "ast%i-%s.dot", count++, getName());
00537     std::ofstream of(s);
00538     of << "digraph " << getName() << " {" << std::endl;
00539     of << "  label=\"score: " << a->evaluate(a) << "\";" << std::endl;
00540     a->printAST(a, of);
00541     of << "}" << std::endl;
00542     of.close();
00543 }
00544 
00545 /*==============================================================================
00546  * FUNCTION:        UserProc::setDecoded
00547  * OVERVIEW:        
00548  * PARAMETERS:      
00549  * RETURNS:         
00550  *============================================================================*/
00551 void UserProc::setDecoded() {
00552     setStatus(PROC_DECODED);
00553     printDecodedXML();
00554 }
00555 
00556 /*==============================================================================
00557  * FUNCTION:        UserProc::unDecode
00558  * OVERVIEW:        
00559  * PARAMETERS:      
00560  * RETURNS:         
00561  *============================================================================*/
00562 void UserProc::unDecode() {
00563     cfg->clear();
00564     setStatus(PROC_UNDECODED);
00565 }
00566 
00567 /*==============================================================================
00568  * FUNCTION:    UserProc::getEntryBB
00569  * OVERVIEW:    Get the BB with the entry point address for this procedure
00570  * PARAMETERS:  
00571  * RETURNS:     Pointer to the entry point BB, or NULL if not found
00572  *============================================================================*/
00573 PBB UserProc::getEntryBB() {
00574     return cfg->getEntryBB();
00575 }
00576 
00577 /*==============================================================================
00578  * FUNCTION:        UserProc::setEntryBB
00579  * OVERVIEW:        Set the entry BB for this procedure
00580  * PARAMETERS:      <none>
00581  * RETURNS:         <nothing>
00582  *============================================================================*/
00583 void UserProc::setEntryBB() {
00584     std::list<PBB>::iterator bbit;
00585     PBB pBB = cfg->getFirstBB(bbit);        // Get an iterator to the first BB
00586     // Usually, but not always, this will be the first BB, or at least in the first few
00587     while (pBB && address != pBB->getLowAddr()) {
00588         pBB = cfg->getNextBB(bbit);
00589     }
00590     cfg->setEntryBB(pBB);
00591 }
00592 
00593 /*==============================================================================
00594  * FUNCTION:        UserProc::addCallee
00595  * OVERVIEW:        Add this callee to the set of callees for this proc
00596  * PARAMETERS:      A pointer to the Proc object for the callee
00597  * RETURNS:         <nothing>
00598  *============================================================================*/
00599 void UserProc::addCallee(Proc* callee) {
00600     // is it already in? (this is much slower than using a set)
00601     std::list<Proc*>::iterator cc;
00602     for (cc = calleeList.begin(); cc != calleeList.end(); cc++)
00603         if(*cc == callee)
00604             return; // it's already in
00605 
00606     calleeList.push_back(callee);
00607 }
00608 
00609 void UserProc::generateCode(HLLCode *hll) {
00610     assert(cfg);
00611     assert(getEntryBB());
00612 
00613     cfg->structure();
00614     removeUnusedLocals();
00615 
00616     // Note: don't try to remove unused statements here; that requires the
00617     // RefExps, which are all gone now (transformed out of SSA form)!
00618 
00619     if (VERBOSE || Boomerang::get()->printRtl)
00620         printToLog();
00621 
00622     hll->AddProcStart(this);
00623     
00624     // Local variables; print everything in the locals map
00625     std::map<std::string, Type*>::iterator last = locals.end();
00626     if (locals.size()) last--;
00627     for (std::map<std::string, Type*>::iterator it = locals.begin(); it != locals.end(); it++) {
00628         Type* locType = it->second;
00629         if (locType == NULL || locType->isVoid())
00630             locType = new IntegerType();
00631         hll->AddLocal(it->first.c_str(), locType, it == last);
00632     }
00633 
00634     if (Boomerang::get()->noDecompile && std::string(getName()) == "main") {
00635         StatementList args, results;
00636         if (prog->getFrontEndId() == PLAT_PENTIUM)
00637             hll->AddCallStatement(1, NULL, "PENTIUMSETUP", args, &results);
00638         else if (prog->getFrontEndId() == PLAT_SPARC)
00639             hll->AddCallStatement(1, NULL, "SPARCSETUP", args, &results);
00640     }
00641 
00642     std::list<PBB> followSet, gotoSet;
00643     getEntryBB()->generateCode(hll, 1, NULL, followSet, gotoSet, this);
00644     
00645     hll->AddProcEnd();
00646 
00647     if (!Boomerang::get()->noRemoveLabels)
00648         cfg->removeUnneededLabels(hll);
00649 
00650     setStatus(PROC_CODE_GENERATED);
00651 }
00652 
00653 // print this userproc, maining for debugging
00654 void UserProc::print(std::ostream &out, bool html) {
00655     signature->print(out, html);
00656     if (html)
00657         out << "<br>";
00658     out << "in cluster " << cluster->getName() << "\n";
00659     if (html)
00660         out << "<br>";
00661     std::ostringstream ost;
00662     printParams(ost, html);
00663     dumpLocals(ost, html);
00664     out << ost.str().c_str();
00665     printSymbolMap(out, html);
00666     if (html)
00667         out << "<br>";
00668     out << "live variables: ";
00669     std::ostringstream ost2;
00670     col.print(ost2);
00671     out << ost2.str().c_str() << "\n";
00672     if (html)
00673         out << "<br>";
00674     out << "end live variables\n";
00675     std::ostringstream ost3;
00676     cfg->print(ost3, html);
00677     out << ost3.str().c_str();
00678     out << "\n";
00679 }
00680 
00681 void UserProc::setStatus(ProcStatus s)
00682 {
00683     status = s;
00684     Boomerang::get()->alert_proc_status_change(this);
00685 }
00686 
00687 void UserProc::printParams(std::ostream& out, bool html) {
00688     if (html)
00689         out << "<br>";
00690     out << "parameters: ";
00691     bool first = true;
00692     for (StatementList::iterator pp = parameters.begin(); pp != parameters.end(); ++pp) {
00693         if (first)
00694             first = false;
00695         else
00696             out << ", ";
00697         out << ((Assign*)*pp)->getType() << " " << ((Assign*)*pp)->getLeft();
00698     }
00699     out << "\n";
00700     if (html)
00701         out << "<br>";
00702     out << "end parameters\n";
00703 }
00704 
00705 char* UserProc::prints() {
00706     std::ostringstream ost;
00707     print(ost);
00708     strncpy(debug_buffer, ost.str().c_str(), DEBUG_BUFSIZE);
00709     debug_buffer[DEBUG_BUFSIZE-1] = '\0';
00710     return debug_buffer;
00711 }
00712 
00713 void UserProc::dump() {
00714     print(std::cerr);
00715 }
00716 
00717 void UserProc::printToLog() {
00718     std::ostringstream ost;
00719     print(ost);
00720     LOG << ost.str().c_str();
00721 }
00722 
00723 void UserProc::printDFG() { 
00724     char fname[1024];
00725     sprintf(fname, "%s%s-%i-dfg.dot", Boomerang::get()->getOutputPath().c_str(), getName(), DFGcount);
00726     DFGcount++;
00727     if (VERBOSE)
00728         LOG << "outputing DFG to " << fname << "\n";
00729     std::ofstream out(fname);
00730     out << "digraph " << getName() << " {\n";
00731     StatementList stmts;
00732     getStatements(stmts);
00733     StatementList::iterator it;
00734     for (it = stmts.begin(); it != stmts.end(); it++) {
00735         Statement *s = *it;
00736         if (s->isPhi())
00737             out << s->getNumber() << " [shape=\"triangle\"];\n";
00738         if (s->isCall())
00739             out << s->getNumber() << " [shape=\"box\"];\n";
00740         if (s->isBranch())
00741             out << s->getNumber() << " [shape=\"diamond\"];\n";
00742         LocationSet refs;
00743         s->addUsedLocs(refs);
00744         LocationSet::iterator rr;
00745         for (rr = refs.begin(); rr != refs.end(); rr++) {
00746             RefExp* r = dynamic_cast<RefExp*>(*rr);
00747             if (r) {
00748                 if (r->getDef())
00749                     out << r->getDef()->getNumber();
00750                 else
00751                     out << "input";
00752                 out << " -> ";
00753                 if (s->isReturn())
00754                     out << "output";
00755                 else
00756                     out << s->getNumber();
00757                 out << ";\n";
00758             }
00759         }
00760     }
00761     out << "}\n";
00762     out.close();
00763 }
00764 
00765 // initialise all statements
00766 void UserProc::initStatements() {
00767     BB_IT it;
00768     BasicBlock::rtlit rit; StatementList::iterator sit;
00769     for (PBB bb = cfg->getFirstBB(it); bb; bb = cfg->getNextBB(it)) {
00770         for (Statement* s = bb->getFirstStmt(rit, sit); s; s = bb->getNextStmt(rit, sit)) {
00771             s->setProc(this);
00772             s->setBB(bb);
00773             CallStatement* call = dynamic_cast<CallStatement*>(s);
00774             if (call) {
00775                 call->setSigArguments();
00776                 if (call->getDestProc() && call->getDestProc()->isNoReturn() && bb->getNumOutEdges() == 1) {
00777                     PBB out = bb->getOutEdge(0);
00778                     if (out != cfg->getExitBB() || cfg->getExitBB()->getNumInEdges() != 1) {
00779                         out->deleteInEdge(bb);
00780                         bb->getOutEdges().clear();
00781                     }
00782                 }
00783             }
00784         }
00785     }
00786 }
00787 
00788 void UserProc::numberStatements() {
00789     BB_IT it;
00790     BasicBlock::rtlit rit; StatementList::iterator sit;
00791     for (PBB bb = cfg->getFirstBB(it); bb; bb = cfg->getNextBB(it)) {
00792         for (Statement* s = bb->getFirstStmt(rit, sit); s; s = bb->getNextStmt(rit, sit))
00793             if (!s->isImplicit() &&         // Don't renumber implicits (remain number 0)
00794                     s->getNumber() == 0)    // Don't renumber existing (or waste numbers)
00795                 s->setNumber(++stmtNumber);
00796     }
00797 }
00798 
00799 
00800 // get all statements
00801 // Get to a statement list, so they come out in a reasonable and consistent order
00802 void UserProc::getStatements(StatementList &stmts) {
00803     BB_IT it;
00804     for (PBB bb = cfg->getFirstBB(it); bb; bb = cfg->getNextBB(it)) 
00805         bb->getStatements(stmts);
00806 
00807     for (StatementList::iterator it = stmts.begin(); it != stmts.end(); it++)
00808         if ((*it)->getProc() == NULL)
00809             (*it)->setProc(this);
00810 }
00811 
00812 // Remove a statement. This is somewhat inefficient - we have to search the whole BB for the statement.
00813 // Should use iterators or other context to find out how to erase "in place" (without having to linearly search)
00814 void UserProc::removeStatement(Statement *stmt) {
00815     // remove anything proven about this statement
00816     for (std::map<Exp*, Exp*, lessExpStar>::iterator it = provenTrue.begin(); it != provenTrue.end(); ) {
00817         LocationSet refs;
00818         it->second->addUsedLocs(refs);
00819         it->first->addUsedLocs(refs);       // Could be say m[esp{99} - 4] on LHS and we are deleting stmt 99
00820         LocationSet::iterator rr;
00821         bool usesIt = false;
00822         for (rr = refs.begin(); rr != refs.end(); rr++) {
00823             Exp* r = *rr;
00824             if (r->isSubscript() && ((RefExp*)r)->getDef() == stmt) {
00825                 usesIt = true;
00826                 break;
00827             }
00828         }
00829         if (usesIt) {
00830             if (VERBOSE)
00831                 LOG << "removing proven true exp " << it->first << " = " << it->second <<
00832                     " that uses statement being removed.\n";
00833             provenTrue.erase(it++);
00834             // it = provenTrue.begin();
00835             continue;
00836         }
00837         ++it;           // it is incremented with the erase, or here
00838     }
00839 
00840     // remove from BB/RTL
00841     PBB bb = stmt->getBB();         // Get our enclosing BB
00842     std::list<RTL*> *rtls = bb->getRTLs();
00843     for (std::list<RTL*>::iterator rit = rtls->begin(); rit != rtls->end(); rit++) {
00844         std::list<Statement*>& stmts = (*rit)->getList();
00845         for (RTL::iterator it = stmts.begin(); it != stmts.end(); it++) {
00846             if (*it == stmt) {
00847                 stmts.erase(it);
00848                 return;
00849             }
00850         }
00851     }
00852 }
00853 
00854 void UserProc::insertAssignAfter(Statement* s, Exp* left, Exp* right) {
00855     std::list<Statement*>::iterator it;
00856     std::list<Statement*>* stmts;
00857     if (s == NULL) {
00858         // This means right is supposed to be a parameter. We can insert the assignment at the start of the entryBB
00859         PBB entryBB = cfg->getEntryBB();
00860         std::list<RTL*> *rtls = entryBB->getRTLs();
00861         assert(rtls->size());       // Entry BB should have at least 1 RTL
00862         stmts = &rtls->front()->getList();
00863         it = stmts->begin();
00864     } else {
00865         // An ordinary definition; put the assignment at the end of s's BB
00866         PBB bb = s->getBB();         // Get the enclosing BB for s
00867         std::list<RTL*> *rtls = bb->getRTLs();
00868         assert(rtls->size());       // If s is defined here, there should be
00869                                     // at least 1 RTL
00870         stmts = &rtls->back()->getList();
00871         it = stmts->end();          // Insert before the end
00872     }
00873     Assign* as = new Assign(left, right);
00874     as->setProc(this);
00875     stmts->insert(it, as);
00876     return;
00877 }
00878 
00879 // Note: this procedure is designed for the front end, where enclosing BBs are not set up yet.
00880 // So this is an inefficient linear search!
00881 void UserProc::insertStatementAfter(Statement* s, Statement* a) {
00882     BB_IT bb;
00883     for (bb = cfg->begin(); bb != cfg->end(); bb++) {
00884         std::list<RTL*>::iterator rr;
00885         std::list<RTL*>* rtls = (*bb)->getRTLs();
00886         if (rtls == NULL)
00887             continue;           // e.g. *bb is (as yet) invalid
00888         for (rr = rtls->begin(); rr != rtls->end(); rr++) {
00889             std::list<Statement*>& stmts = (*rr)->getList();
00890             std::list<Statement*>::iterator ss;
00891             for (ss = stmts.begin(); ss != stmts.end(); ss++) {
00892                 if (*ss == s) {
00893                     ss++;       // This is the point to insert before
00894                     stmts.insert(ss, a);
00895                     return;
00896                 }
00897             }
00898         }
00899     }
00900     assert(false);          // Should have found this statement in this BB
00901 }
00902 
00903 /* Cycle detection logic:
00904  * *********************
00905  * cycleGrp is an initially NULL pointer to a set of procedures, representing the procedures involved in the current
00906  * recursion group, if any. These procedures have to be analysed together as a group, after individual pre-group
00907  * analysis.
00908  * child is a set of procedures, cleared at the top of decompile(), representing the cycles associated with the
00909  * current procedure and all of its children. If this is empty, the current procedure is not involved in recursion,
00910  * and can be decompiled up to and including removing unused statements.
00911  * path is an initially empty list of procedures, representing the call path from the current entry point to the
00912  * current procedure, inclusive.
00913  * If (after all children have been processed: important!) the first element in path and also cycleGrp is the current
00914  * procedure, we have the maximal set of distinct cycles, so we can do the recursion group analysis and return an empty
00915  * set. At the end of the recursion group analysis, the whole group is complete, ready for the global analyses. 
00916  cycleSet decompile(ProcList path)      // path initially empty
00917     child = new ProcSet
00918     append this proc to path
00919     for each child c called by this proc
00920         if c has already been visited but not finished
00921             // have new cycle
00922             if c is in path
00923               // this is a completely new cycle
00924               insert every proc from c to the end of path into child
00925             else
00926               // this is a new branch of an existing cycle
00927               child = c->cycleGrp
00928               find first element f of path that is in cycleGrp
00929               insert every proc after f to the end of path into child
00930             for each element e of child
00931               insert e->cycleGrp into child
00932               e->cycleGrp = child
00933         else
00934             // no new cycle
00935             tmp = c->decompile(path)
00936             child = union(child, tmp)
00937             set return statement in call to that of c
00938     if (child empty)
00939         earlyDecompile()
00940         child = middleDecompile()
00941         removeUnusedStatments()         // Not involved in recursion
00942     else
00943         // Is involved in recursion
00944         find first element f in path that is also in cycleGrp
00945         if (f == this)                      // The big test: have we got the complete strongly connected component?
00946             recursionGroupAnalysis()        // Yes, we have
00947             child = new ProcSet         // Don't add these processed cycles to the parent
00948     remove last element (= this) from path
00949     return child
00950  */
00951 
00952 // Decompile this UserProc
00953 ProcSet* UserProc::decompile(ProcList* path, int& indent) {
00954     Boomerang::get()->alert_considering(path->empty() ? NULL : path->back(), this);
00955     std::cout << std::setw(++indent) << " " << (status >= PROC_VISITED ? "re" : "") << "considering " << getName() <<
00956         "\n";
00957     if (VERBOSE)
00958         LOG << "begin decompile(" << getName() << ")\n";
00959 
00960     // Prevent infinite loops when there are cycles in the call graph (should never happen now)
00961     if (status >= PROC_FINAL) {
00962         std::cerr << "Error: " << getName() << " already has status PROC_FINAL\n";
00963         return NULL;                            // Already decompiled
00964     }
00965     if (status < PROC_DECODED)
00966         // Can happen e.g. if a callee is visible only after analysing a switch statement
00967         prog->reDecode(this);                   // Actually decoding for the first time, not REdecoding
00968 
00969     if (status < PROC_VISITED)
00970         setStatus(PROC_VISITED);                    // We have at least visited this proc "on the way down"
00971     ProcSet* child = new ProcSet;
00972     path->push_back(this);                      // Append this proc to path
00973 
00974     /*  *   *   *   *   *   *   *   *   *   *   *
00975      *                                          *
00976      *  R e c u r s e   t o   c h i l d r e n   *
00977      *                                          *
00978      *  *   *   *   *   *   *   *   *   *   *   */
00979 
00980     if (!Boomerang::get()->noDecodeChildren) {
00981         // Recurse to children first, to perform a depth first search
00982         BB_IT it;
00983         // Look at each call, to do the DFS
00984         for (PBB bb = cfg->getFirstBB(it); bb; bb = cfg->getNextBB(it)) {
00985             if (bb->getType() == CALL) {
00986                 // The call Statement will be in the last RTL in this BB
00987                 CallStatement* call = (CallStatement*)bb->getRTLs()->back()->getHlStmt();
00988                 if (!call->isCall()) {
00989                     LOG << "bb at " << bb->getLowAddr() << " is a CALL but last stmt is not a call: " << call << "\n";
00990                 }
00991                 assert(call->isCall());
00992                 UserProc* c = (UserProc*)call->getDestProc();
00993                 if (c == NULL || c->isLib()) continue;
00994                 if (c->status == PROC_FINAL) {
00995                     // Already decompiled, but the return statement still needs to be set for this call
00996                     call->setCalleeReturn(c->getTheReturnStatement());
00997                     continue;
00998                 }
00999                 // if c has already been visited but not done (apart from global analyses, i.e. we have a new cycle)
01000                 if (c->status >= PROC_VISITED && c->status <= PROC_EARLYDONE) {
01001                     // if c is in path
01002                     ProcList::iterator pi;
01003                     bool inPath = false;
01004                     for (pi = path->begin(); pi != path->end(); ++pi) {
01005                         if (*pi == c) {
01006                             inPath = true;
01007                             break;
01008                         }
01009                     }
01010                     if (inPath) {
01011                         // This is a completely new cycle
01012                         // Insert every proc from c to the end of path into child
01013                         do {
01014                             child->insert(*pi);
01015                             ++pi;
01016                         } while (pi != path->end());
01017                     } else {
01018                         // This is new branch of an existing cycle
01019                         child = c->cycleGrp;
01020                         // Find first element f of path that is in c->cycleGrp
01021                         ProcList::iterator pi;
01022                         Proc* f = NULL;
01023                         for (pi = path->begin(); pi != path->end(); ++pi) {
01024                             if (c->cycleGrp->find(*pi) != c->cycleGrp->end()) {
01025                                 f = *pi;
01026                                 break;
01027                             }
01028                         }
01029                         assert(f);
01030                         // Insert every proc after f to the end of path into child
01031                         // There must be at least one element in the list (this proc), so the ++pi should be safe
01032                         while (++pi != path->end()) {
01033                             child->insert(*pi);
01034                         } 
01035                     }
01036                     // point cycleGrp for each element of child to child, unioning in each element's cycleGrp
01037                     ProcSet::iterator cc;
01038                     for (cc = child->begin(); cc != child->end(); ++cc) {
01039                         ProcSet*& cg = (*cc)->cycleGrp;
01040                         if (cg)
01041                             child->insert(cg->begin(), cg->end());
01042                         cg = child;
01043                     }
01044                     setStatus(PROC_INCYCLE);    
01045                 } else {
01046                     // No new cycle
01047                     if (VERBOSE)
01048                         LOG << "visiting on the way down child " << c->getName() << " from " << getName() << "\n";
01049                     ProcSet* tmp = c->decompile(path, indent);
01050                     child->insert(tmp->begin(), tmp->end());
01051                     // Child has at least done middleDecompile(), possibly more
01052                     call->setCalleeReturn(c->getTheReturnStatement());
01053                     if (tmp->size() > 0) {
01054                         setStatus(PROC_INCYCLE);
01055                     }
01056                 }
01057             }
01058         }
01059     }
01060 
01061 
01062     // if child is empty, i.e. no child involved in recursion
01063     if (child->size() == 0) {
01064         Boomerang::get()->alert_decompiling(this);
01065         std::cout << std::setw(indent) << " " << "decompiling " << getName() << "\n";
01066         initialiseDecompile();                  // Sort the CFG, number statements, etc
01067         earlyDecompile();
01068         child = middleDecompile(path, indent);
01069         // If there is a switch statement, middleDecompile could contribute some cycles. If so, we need to test for
01070         // the recursion logic again
01071         if (child->size() != 0)
01072             // We've just come back out of decompile(), so we've lost the current proc from the path.
01073             path->push_back(this);
01074     }
01075     if (child->size() == 0) {
01076         remUnusedStmtEtc(); // Do the whole works
01077         setStatus(PROC_FINAL);
01078         Boomerang::get()->alert_end_decompile(this);
01079     } else {
01080         // this proc's children, and hence this proc, is/are involved in recursion
01081         // find first element f in path that is also in cycleGrp
01082         ProcList::iterator f;
01083         for (f = path->begin(); f != path->end(); ++f)
01084             if (cycleGrp->find(*f) != cycleGrp->end())
01085                 break;
01086         // The big test: have we found all the strongly connected components (in the call graph)?
01087         if (*f == this) {
01088             // Yes, process these procs as a group
01089             recursionGroupAnalysis(path, indent);// Includes remUnusedStmtEtc on all procs in cycleGrp
01090             setStatus(PROC_FINAL);
01091             Boomerang::get()->alert_end_decompile(this);
01092             child = new ProcSet;
01093         }
01094     }
01095 
01096     // Remove last element (= this) from path
01097     // The if should not be neccesary, but nestedswitch needs it
01098     if (path->size())
01099         path->erase(--path->end());
01100     else
01101         LOG << "WARNING: UserProc::decompile: empty path when trying to remove last proc\n";
01102 
01103     --indent;
01104     if (VERBOSE)
01105         LOG << "end decompile(" << getName() << ")\n";
01106     return child;
01107 }
01108 
01109 /*  *   *   *   *   *   *   *   *   *   *   *
01110  *                                          *
01111  *      D e c o m p i l e   p r o p e r     *
01112  *          ( i n i t i a l )               *
01113  *                                          *
01114  *  *   *   *   *   *   *   *   *   *   *   */
01115 
01116 void UserProc::initialiseDecompile() {
01117 
01118     Boomerang::get()->alert_start_decompile(this);
01119 
01120     Boomerang::get()->alert_decompile_debug_point(this, "before initialise");
01121 
01122     if (VERBOSE) LOG << "initialise decompile for " << getName() << "\n";
01123 
01124     // Sort by address, so printouts make sense
01125     cfg->sortByAddress();
01126 
01127     // Initialise statements
01128     initStatements();
01129 
01130     if (VERBOSE) {
01131         LOG << "--- debug print before SSA for " << getName() << " ---\n";
01132         printToLog();
01133         LOG << "=== end debug print before SSA for " << getName() << " ===\n\n";
01134     }
01135 
01136     // Compute dominance frontier
01137     df.dominators(cfg);
01138 
01139     // Number the statements
01140     stmtNumber = 0;
01141     numberStatements(); 
01142 
01143     printXML();
01144 
01145     if (Boomerang::get()->noDecompile) {
01146         std::cout << "not decompiling.\n";
01147         setStatus(PROC_FINAL);              // ??!
01148         return;
01149     }
01150 
01151     if (VERBOSE) {
01152         LOG << "--- debug initial print after decoding for " << getName() << " ---\n";
01153         printToLog();
01154         LOG << "=== end initial debug print after decoding for " << getName() << " ===\n\n";
01155     }
01156 
01157     Boomerang::get()->alert_decompile_debug_point(this, "after initialise");
01158 }
01159 // Can merge these two now
01160 void UserProc::earlyDecompile() {
01161 
01162     if (status >= PROC_EARLYDONE)
01163         return; 
01164 
01165     Boomerang::get()->alert_decompile_debug_point(this, "before early");
01166     if (VERBOSE) LOG << "early decompile for " << getName() << "\n";
01167 
01168     // Update the defines in the calls. Will redo if involved in recursion
01169     updateCallDefines();
01170 
01171     // First placement of phi functions, renaming, and initial propagation. This is mostly for the stack pointer
01172     //maxDepth = findMaxDepth() + 1;
01173     //if (Boomerang::get()->maxMemDepth < maxDepth)
01174     //  maxDepth = Boomerang::get()->maxMemDepth;
01175     // TODO: Check if this makes sense. It seems to me that we only want to do one pass of propagation here, since
01176     // the status == check had been knobbled below. Hopefully, one call to placing phi functions etc will be
01177     // equivalent to depth 0 in the old scheme
01178     if (VERBOSE)
01179         LOG << "placing phi functions 1st pass\n";
01180     // Place the phi functions
01181     df.placePhiFunctions(this);
01182 
01183     if (VERBOSE)
01184         LOG << "numbering phi statements 1st pass\n";
01185     numberStatements();             // Number them
01186 
01187     if (VERBOSE)
01188         LOG << "renaming block variables 1st pass\n";
01189     // Rename variables
01190     doRenameBlockVars(1, true);
01191     if (VERBOSE) {
01192         LOG << "\n--- after rename (1) for " << getName() << " 1st pass\n";
01193         printToLog();
01194         LOG << "\n=== done after rename (1) for " << getName() << " 1st pass\n\n";
01195     }
01196 
01197     bool convert;
01198     propagateStatements(convert, 1);
01199     if (VERBOSE) {
01200         LOG << "\n--- after propagation (1) for " << getName() << " 1st pass ---\n";
01201         printToLog();
01202         LOG << "\n=== done after propagation (1) for " << getName() << " 1st pass ===\n\n";
01203     }
01204 
01205     Boomerang::get()->alert_decompile_debug_point(this, "after early");
01206 }
01207 
01208 ProcSet* UserProc::middleDecompile(ProcList* path, int indent) {
01209 
01210     Boomerang::get()->alert_decompile_debug_point(this, "before middle");
01211 
01212     // The call bypass logic should be staged as well. For example, consider m[r1{11}]{11} where 11 is a call.
01213     // The first stage bypass yields m[r1{2}]{11}, which needs another round of propagation to yield m[r1{-}-32]{11}
01214     // (which can safely be processed at depth 1).
01215     // Except that this is now inherent in the visitor nature of the latest algorithm.
01216     fixCallAndPhiRefs();            // Bypass children that are finalised (if any)
01217     bool convert;
01218     if (status != PROC_INCYCLE)     // FIXME: need this test?
01219         propagateStatements(convert, 2);
01220     if (VERBOSE) {
01221         LOG << "\n--- after call and phi bypass (1) of " << getName() << " ---\n";
01222         printToLog();
01223         LOG << "\n=== done after call and phi bypass (1) of " << getName() << " ===\n\n";
01224     }
01225 
01226     // This part used to be calle middleDecompile():
01227 
01228     findSpPreservation();
01229     // Oops - the idea of splitting the sp from the rest of the preservations was to allow correct naming of locals
01230     // so you are alias conservative. But of course some locals are ebp (etc) based, and so these will never be correct
01231     // until all the registers have preservation analysis done. So I may as well do them all together here.
01232     findPreserveds();
01233     fixCallAndPhiRefs();    // Propagate and bypass sp
01234     if (VERBOSE) {
01235         LOG << "--- after preservation, bypass and propagation ---\n";
01236         printToLog();
01237         LOG << "=== end after preservation, bypass and propagation ===\n";
01238     }
01239     // Oh, no, we keep doing preservations till almost the end...
01240     //setStatus(PROC_PRESERVEDS);       // Preservation done
01241 
01242     if (!Boomerang::get()->noPromote)
01243         // We want functions other than main to be promoted. Needed before mapExpressionsToLocals
01244         promoteSignature();
01245     // The problem with doing locals too early is that the symbol map ends up with some {-} and some {0}
01246     // Also, once named as a local, it is tempting to propagate the memory location, but that might be unsafe if the
01247     // address is taken. But see mapLocalsAndParams just a page below.
01248     //mapExpressionsToLocals();
01249     
01250     // Update the arguments for calls (mainly for the non recursion affected calls)
01251     // We have only done limited propagation and collecting to this point. Need e.g. to put m[esp-K]
01252     // into the collectors of calls, so when a stack parameter is created, it will be correctly localised
01253     // Note that we'd like to limit propagation before this point, because we have not yet created any arguments, so
01254     // it is possible to get "excessive propagation" to parameters. In fact, because uses vary so much throughout a
01255     // program, it may end up better not limiting propagation until very late in the decompilation, and undoing some
01256     // propagation just before removing unused statements. Or even later, if that is possible.
01257     // For now, we create the initial arguments here (relatively early), and live with the fact that some apparently
01258     // distinct memof argument expressions (e.g. m[eax{30}] and m[esp{40}-4]) will turn out to be duplicates, and so
01259     // the duplicates must be eliminated.
01260     bool change = df.placePhiFunctions(this);
01261     if (change) numberStatements();     // Number the new statements
01262     doRenameBlockVars(2);
01263     propagateStatements(convert, 2);    // Otherwise sometimes sp is not fully propagated
01264     // Map locals and temporary parameters as symbols, so that they can be propagated later
01265 //  mapLocalsAndParams();               // FIXME: unsure where this belongs
01266     updateArguments();
01267     reverseStrengthReduction();
01268     //processTypes();
01269 
01270     // Repeat until no change
01271     int pass;
01272     for (pass = 3; pass <= 12; ++pass) {
01273         // Redo the renaming process to take into account the arguments
01274         if (VERBOSE)
01275             LOG << "renaming block variables (2) pass " << pass << "\n";
01276         // Rename variables
01277         change = df.placePhiFunctions(this);
01278         if (change) numberStatements();     // Number the new statements
01279         change |= doRenameBlockVars(pass, false);       // E.g. for new arguments
01280 
01281         // Seed the return statement with reaching definitions
01282         // FIXME: does this have to be in this loop?
01283         if (theReturnStatement) {
01284             theReturnStatement->updateModifieds();      // Everything including new arguments reaching the exit
01285             theReturnStatement->updateReturns();
01286         }
01287 
01288         printXML();
01289 
01290         // Print if requested
01291         if (VERBOSE) {      // was if debugPrintSSA
01292             LOG << "--- debug print SSA for " << getName() << " pass " << pass << " (no propagations) ---\n";
01293             printToLog();
01294             LOG << "=== end debug print SSA for " << getName() << " pass " << pass << " (no propagations) ===\n\n";
01295         }
01296         
01297         if (Boomerang::get()->dotFile)                          // Require -gd now (though doesn't listen to file name)
01298             printDFG();
01299         Boomerang::get()->alert_decompile_SSADepth(this, pass); // FIXME: need depth -> pass in GUI code
01300 
01301         // (* Was: mapping expressions to Parameters as we go *)
01302 
01303 #if 1   // FIXME: Check if this is needed any more. At least fib seems to need it at present.
01304         if (!Boomerang::get()->noChangeSignatures) {
01305             // addNewReturns(depth);
01306             for (int i=0; i < 3; i++) {     // FIXME: should be iterate until no change
01307                 if (VERBOSE)
01308                     LOG << "### update returns loop iteration " << i << " ###\n";
01309                 if (status != PROC_INCYCLE)
01310                     doRenameBlockVars(pass, true);
01311                 findPreserveds();
01312                 updateCallDefines();        // Returns have uses which affect call defines (if childless)
01313                 fixCallAndPhiRefs();
01314                 findPreserveds();           // Preserveds subtract from returns
01315             }
01316             printXML();
01317             if (VERBOSE) {
01318                 LOG << "--- debug print SSA for " << getName() << " at pass " << pass <<
01319                     " (after updating returns) ---\n";
01320                 printToLog();
01321                 LOG << "=== end debug print SSA for " << getName() << " at pass " << pass << " ===\n\n";
01322             }
01323         }
01324 #endif
01325 
01326         printXML();
01327         // Print if requested
01328         if (VERBOSE) {      // was if debugPrintSSA
01329             LOG << "--- debug print SSA for " << getName() << " at pass " << pass <<
01330                 " (after trimming return set) ---\n";
01331             printToLog();
01332             LOG << "=== end debug print SSA for " << getName() << " at pass " << pass << " ===\n\n";
01333         }
01334 
01335         Boomerang::get()->alert_decompile_beforePropagate(this, pass);
01336         Boomerang::get()->alert_decompile_debug_point(this, "before propagating statements");
01337 
01338         // Propagate
01339         bool convert;           // True when indirect call converted to direct
01340         do {
01341             convert = false;
01342             if (VERBOSE)
01343                 LOG << "propagating at pass " << pass << "\n";
01344             change |= propagateStatements(convert, pass);
01345             change |= doRenameBlockVars(pass, true);
01346             // If you have an indirect to direct call conversion, some propagations that were blocked by
01347             // the indirect call might now succeed, and may be needed to prevent alias problems
01348             // FIXME: I think that the below, and even the convert parameter to propagateStatements(), is no longer
01349             // needed - MVE
01350             if (convert) {
01351                 if (VERBOSE)
01352                     LOG << "\nabout to restart propagations and dataflow at pass " << pass <<
01353                         " due to conversion of indirect to direct call(s)\n\n";
01354                 df.setRenameLocalsParams(false);
01355                 change |= doRenameBlockVars(0, true);           // Initial dataflow level 0
01356                 LOG << "\nafter rename (2) of " << getName() << ":\n";
01357                 printToLog();
01358                 LOG << "\ndone after rename (2) of " << getName() << ":\n\n";
01359             }
01360         } while (convert);
01361 
01362         printXML();
01363         if (VERBOSE) {
01364             LOG << "--- after propagate for " << getName() << " at pass " << pass << " ---\n";
01365             printToLog();
01366             LOG << "=== end propagate for " << getName() << " at pass " << pass << " ===\n\n";
01367         }
01368 
01369         Boomerang::get()->alert_decompile_afterPropagate(this, pass);
01370         Boomerang::get()->alert_decompile_debug_point(this, "after propagating statements");
01371 
01372         // this is just to make it readable, do NOT rely on these statements being removed 
01373         removeSpAssignsIfPossible();
01374         // The problem with removing %flags and %CF is that %CF is a subset of %flags
01375         //removeMatchingAssignsIfPossible(new Terminal(opFlags));
01376         //removeMatchingAssignsIfPossible(new Terminal(opCF));
01377         removeMatchingAssignsIfPossible(new Unary(opTemp, new Terminal(opWildStrConst)));
01378         removeMatchingAssignsIfPossible(new Terminal(opPC));
01379 
01380         //processTypes();
01381 
01382         if (!change)
01383             break;              // Until no change
01384     }
01385 
01386     // At this point, there will be some memofs that have still not been renamed. They have been prevented from
01387     // getting renamed so that they didn't get renamed incorrectly (usually as {-}), when propagation and/or bypassing
01388     // may have ended up changing the address expression. There is now no chance that this will happen, so we need
01389     // to rename the existing memofs. Note that this can still link uses to definitions, e.g.
01390     // 50 r26 := phi(...)
01391     // 51 m[r26{50}] := 99;
01392     //  ... := m[r26{50}]{should be 51}
01393 
01394     if (VERBOSE)
01395         LOG << "### allowing SSA renaming of all memof expressions ###\n";
01396     df.setRenameLocalsParams(true);
01397 
01398     // Now we need another pass to inert phis for the memofs, rename them and propagate them
01399     ++pass;
01400     if (VERBOSE)
01401         LOG << "setting phis, renaming block variables after memofs renamable pass " << pass << "\n";
01402     change = df.placePhiFunctions(this);
01403     if (change) numberStatements();     // Number the new statements
01404     doRenameBlockVars(pass, false);     // MVE: do we want this parameter false or not?
01405     if (VERBOSE) {
01406         LOG << "--- after setting phis for memofs, renaming them for " << getName() << "\n";
01407         printToLog();
01408         LOG << "=== done after setting phis for memofs, renaming them for " << getName() << "\n";
01409     }
01410     propagateStatements(convert, pass);
01411     // Now that memofs are renamed, the bypassing for memofs can work
01412     fixCallAndPhiRefs();            // Bypass children that are finalised (if any)
01413 
01414 #if 0           // Now also done where ellipsis processing is done for dfa-based TA
01415     // Note: processConstants is also where ellipsis processing is done
01416     if (processConstants()) {
01417         if (status != PROC_INCYCLE) {
01418             doRenameBlockVars(-1, true);            // Needed if there was an indirect call to an ellipsis function
01419         }
01420     }
01421     processTypes();
01422 #endif
01423 
01424     if (!Boomerang::get()->noParameterNames) {
01425         // ? Crazy time to do this... haven't even done "final" parameters as yet
01426         //mapExpressionsToParameters();
01427     }
01428 
01429     // Check for indirect jumps or calls not already removed by propagation of constants
01430     if (cfg->decodeIndirectJmp(this)) {
01431         // There was at least one indirect jump or call found and decoded. That means that most of what has been done
01432         // to this function so far is invalid. So redo everything. Very expensive!!
01433         // Code pointed to by the switch table entries has merely had FrontEnd::processFragment() called on it
01434         LOG << "=== about to restart decompilation of " << getName() <<
01435             " because indirect jumps or calls have been analysed\n\n";
01436         Boomerang::get()->alert_decompile_debug_point(this, "before restarting decompilation because indirect jumps or calls have been analysed");
01437 
01438         // First copy any new indirect jumps or calls that were decoded this time around. Just copy them all, the map
01439         // will prevent duplicates
01440         processDecodedICTs();
01441         // Now, decode from scratch
01442         theReturnStatement = NULL;
01443         cfg->clear();
01444         std::ofstream os;
01445         prog->reDecode(this);
01446         df.setRenameLocalsParams(false);        // Start again with memofs
01447         setStatus(PROC_VISITED);                // Back to only visited progress
01448         path->erase(--path->end());             // Remove self from path
01449         --indent;                               // Because this is not recursion
01450         ProcSet* ret = decompile(path, indent); // Restart decompiling this proc
01451         ++indent;                               // Restore indent
01452         path->push_back(this);                  // Restore self to path
01453         // It is important to keep the result of this call for the recursion analysis
01454         return ret;
01455     }
01456 
01457     findPreserveds();
01458 
01459 
01460     // Used to be later...
01461     if (!Boomerang::get()->noParameterNames) {
01462         //findPreserveds();     // FIXME: is this necessary here?
01463         //fixCallBypass();  // FIXME: surely this is not necessary now?
01464         //trimParameters(); // FIXME: surely there aren't any parameters to trim yet?
01465         if (VERBOSE) {
01466             LOG << "--- after replacing expressions, trimming params and returns for " << getName() << " ---\n";
01467             printToLog();
01468             LOG << "=== end after replacing expressions, trimming params and returns for " << getName() << " ===\n";
01469         }
01470     }
01471 
01472     eliminateDuplicateArgs();
01473 
01474     if (VERBOSE)
01475         LOG << "===== end early decompile for " << getName() << " =====\n\n";
01476     setStatus(PROC_EARLYDONE);
01477 
01478     Boomerang::get()->alert_decompile_debug_point(this, "after middle");
01479 
01480     return new ProcSet;
01481 }
01482 
01483 /*  *   *   *   *   *   *   *   *   *   *   *   *   *
01484  *                                                  *
01485  *  R e m o v e   u n u s e d   s t a t e m e n t s *
01486  *                                                  *
01487  *  *   *   *   *   *   *   *   *   *   *   *   *   */
01488 
01489 void UserProc::remUnusedStmtEtc() {
01490 
01491     // NO! Removing of unused statements is an important part of the global removing unused returns analysis, which
01492     // happens after UserProc::decompile is complete
01493     //if (status >= PROC_FINAL)
01494     //  return;
01495 
01496     Boomerang::get()->alert_decompiling(this);
01497     Boomerang::get()->alert_decompile_debug_point(this, "before final");
01498 
01499     if (VERBOSE)
01500         LOG << "--- remove unused statements for " << getName() << " ---\n";
01501     // A temporary hack to remove %CF = %CF{7} when 7 isn't a SUBFLAGS
01502 //  if (theReturnStatement)
01503 //      theReturnStatement->specialProcessing();
01504 
01505     // Perform type analysis. If we are relying (as we are at present) on TA to perform ellipsis processing,
01506     // do the local TA pass now. Ellipsis processing often reveals additional uses (e.g. additional parameters
01507     // to printf/scanf), and removing unused statements is unsafe without full use information
01508     if (status < PROC_FINAL) {
01509         typeAnalysis();
01510         // Now that locals are identified, redo the dataflow
01511         bool change = df.placePhiFunctions(this);
01512         if (change) numberStatements();     // Number the new statements
01513         doRenameBlockVars(20);              // Rename the locals
01514         bool convert;
01515         propagateStatements(convert, 20);   // Surely need propagation too
01516         if (VERBOSE) {
01517             LOG << "--- after propagating locals for " << getName() << " ---\n";
01518             printToLog();
01519             LOG << "=== end after propagating locals for " << getName() << " ===\n\n";
01520         }
01521 #if 0
01522         // Note: processConstants is also where ellipsis processing is done
01523         if (processConstants()) {
01524             if (status != PROC_INCYCLE) {
01525                 doRenameBlockVars(-1, true);            // Needed if there was an indirect call to an ellipsis function
01526             }
01527         }
01528 #endif
01529 
01530     }
01531 
01532     // Only remove unused statements after decompiling as much as possible of the proc
01533     // Remove unused statements
01534     RefCounter refCounts;           // The map
01535     // Count the references first
01536     countRefs(refCounts);
01537     // Now remove any that have no used
01538     if (!Boomerang::get()->noRemoveNull)
01539         remUnusedStmtEtc(refCounts);
01540 
01541     // Remove null statements
01542     if (!Boomerang::get()->noRemoveNull)
01543         removeNullStatements();
01544 
01545     printXML();
01546     if (VERBOSE && !Boomerang::get()->noRemoveNull) {
01547         LOG << "--- after removing unused and null statements pass " << 1 << " for " << getName() << " ---\n";
01548         printToLog();
01549         LOG << "=== end after removing unused statements for " << getName() << " ===\n\n";
01550     }
01551     Boomerang::get()->alert_decompile_afterRemoveStmts(this, 1);
01552 
01553     findFinalParameters();
01554     if (!Boomerang::get()->noParameterNames) {
01555         // Replace the existing temporary parameters with the final ones:
01556         //mapExpressionsToParameters();
01557         addParameterSymbols();
01558 
01559         if (VERBOSE) {
01560             LOG << "--- after adding new parameters ---\n";
01561             printToLog();
01562             LOG << "=== end after adding new parameters ===\n";
01563         }
01564     }
01565 
01566 #if 0               // Construction zone; pay no attention
01567     bool convert;
01568     propagateStatements(convert, 222);  // This is the first opportunity to safely propagate memory parameters
01569     if (VERBOSE) {
01570         LOG << "--- after propagating new parameters ---\n";
01571         printToLog();
01572         LOG << "=== end after propagating new parameters ===\n";
01573     }
01574 #endif
01575 
01576     updateCalls();              // Or just updateArguments?
01577 
01578     branchAnalysis();
01579     fixUglyBranches();
01580     
01581     if (VERBOSE) {
01582         LOG << "--- after remove unused statements etc for " << getName() << "\n";
01583         printToLog();
01584         LOG << "=== after remove unused statements etc for " << getName() << "\n";
01585     }
01586 
01587     Boomerang::get()->alert_decompile_debug_point(this, "after final");
01588 }
01589 
01590 void UserProc::remUnusedStmtEtc(RefCounter& refCounts) {
01591     
01592     Boomerang::get()->alert_decompile_debug_point(this, "before remUnusedStmtEtc");
01593 
01594     StatementList stmts;
01595     getStatements(stmts);
01596     bool change;
01597     do {                                // FIXME: check if this is ever needed
01598         change = false;
01599         StatementList::iterator ll = stmts.begin();
01600         while (ll != stmts.end()) {
01601             Statement* s = *ll;
01602             if (!s->isAssignment()) {
01603                 // Never delete a statement other than an assignment (e.g. nothing "uses" a Jcond)
01604                 ll++;
01605                 continue;
01606             }
01607             Assignment* as = (Assignment*)s;
01608             Exp* asLeft = as->getLeft();
01609             // If depth < 0, consider all depths
01610             //if (asLeft && depth >= 0 && asLeft->getMemDepth() > depth) {
01611             //  ll++;
01612             //  continue;
01613             //}
01614             if (asLeft && asLeft->getOper() == opGlobal) {
01615                 // assignments to globals must always be kept
01616                 ll++;
01617                 continue;
01618             }
01619             // If it's a memof and renameable it can still be deleted
01620             if (asLeft->getOper() == opMemOf && !canRename(asLeft)) {
01621                 // Assignments to memof-anything-but-local must always be kept.
01622                 ll++;
01623                 continue;
01624             }
01625             if (asLeft->getOper() == opMemberAccess || asLeft->getOper() == opArrayIndex) {
01626                 // can't say with these; conservatively never remove them
01627                 ll++;
01628                 continue;
01629             }
01630             if (refCounts.find(s) == refCounts.end() || refCounts[s] == 0) {    // Care not to insert unnecessarily
01631                 // First adjust the counts, due to statements only referenced by statements that are themselves unused.
01632                 // Need to be careful not to count two refs to the same def as two; refCounts is a count of the number
01633                 // of statements that use a definition, not the total number of refs
01634                 StatementSet stmtsRefdByUnused;
01635                 LocationSet components;
01636                 s->addUsedLocs(components, false);      // Second parameter false to ignore uses in collectors
01637                 LocationSet::iterator cc;
01638                 for (cc = components.begin(); cc != components.end(); cc++) {
01639                     if ((*cc)->isSubscript()) {
01640                         stmtsRefdByUnused.insert(((RefExp*)*cc)->getDef());
01641                     }
01642                 }
01643                 StatementSet::iterator dd;
01644                 for (dd = stmtsRefdByUnused.begin(); dd != stmtsRefdByUnused.end(); dd++) {
01645                     if (*dd == NULL) continue;
01646                     if (DEBUG_UNUSED)
01647                         LOG << "decrementing ref count of " << (*dd)->getNumber() << " because " << s->getNumber() <<
01648                             " is unused\n";
01649                     refCounts[*dd]--;
01650                 }
01651                 if (DEBUG_UNUSED)
01652                     LOG << "removing unused statement " << s->getNumber() << " " << s << "\n";
01653                 removeStatement(s);
01654                 ll = stmts.erase(ll);   // So we don't try to re-remove it
01655                 change = true;
01656                 continue;               // Don't call getNext this time
01657             }
01658             ll++;
01659         }
01660     } while (change);
01661     // Recaluclate at least the livenesses. Example: first call to printf in test/pentium/fromssa2, eax used only in a
01662     // removed statement, so liveness in the call needs to be removed
01663     removeCallLiveness();       // Kill all existing livenesses
01664     doRenameBlockVars(-2);      // Recalculate new livenesses
01665     setStatus(PROC_FINAL);      // Now fully decompiled (apart from one final pass, and transforming out of SSA form)
01666 
01667     Boomerang::get()->alert_decompile_debug_point(this, "after remUnusedStmtEtc");
01668 }
01669 
01670 void UserProc::recursionGroupAnalysis(ProcList* path, int indent) {
01671     /* Overall algorithm:
01672         for each proc in the group
01673             initialise
01674             earlyDecompile
01675         for eac proc in the group
01676             middleDecompile
01677         mark all calls involved in cs as non-childless
01678         for each proc in cs
01679             update parameters and returns, redoing call bypass, until no change
01680         for each proc in cs
01681             remove unused statements
01682         for each proc in cs
01683             update parameters and returns, redoing call bypass, until no change
01684     */
01685     if (VERBOSE) {
01686         LOG << "\n\n# # # recursion group analysis for ";
01687         ProcSet::iterator csi;
01688         for (csi = cycleGrp->begin(); csi != cycleGrp->end(); ++csi)
01689             LOG << (*csi)->getName() << ", ";
01690         LOG << "# # #\n";
01691     }
01692 
01693     // First, do the initial decompile, and call earlyDecompile
01694     ProcSet::iterator curp;
01695     for (curp = cycleGrp->begin(); curp != cycleGrp->end(); ++curp) {
01696         (*curp)->setStatus(PROC_INCYCLE);               // So the calls are treated as childless
01697         Boomerang::get()->alert_decompiling(*curp);
01698         (*curp)->initialiseDecompile();                 // Sort the CFG, number statements, etc
01699         (*curp)->earlyDecompile();
01700     }
01701 
01702     // Now all the procs in the group should be ready for preservation analysis
01703     // The standard preservation analysis should automatically perform conditional preservation
01704     for (curp = cycleGrp->begin(); curp != cycleGrp->end(); ++curp) {
01705         (*curp)->middleDecompile(path, indent);
01706         (*curp)->setStatus(PROC_PRESERVEDS);
01707     }
01708 
01709 
01710     // FIXME: why exactly do we do this?
01711     // Mark all the relevant calls as non childless (will harmlessly get done again later)
01712     ProcSet::iterator it;
01713     for (it = cycleGrp->begin(); it != cycleGrp->end(); it++)
01714         (*it)->markAsNonChildless(cycleGrp);
01715 
01716     ProcSet::iterator p;
01717     // Need to propagate into the initial arguments, since arguments are uses, and we are about to remove unused
01718     // statements.
01719     bool convert;
01720     for (p = cycleGrp->begin(); p != cycleGrp->end(); ++p) {
01721         //(*p)->initialParameters();                    // FIXME: I think this needs to be mapping locals and params now
01722         (*p)->mapLocalsAndParams();
01723         (*p)->updateArguments();
01724         (*p)->propagateStatements(convert, 0);      // Need to propagate into arguments
01725     }
01726 
01727     // while no change
01728 for (int i=0; i < 2; i++) {
01729     for (p = cycleGrp->begin(); p != cycleGrp->end(); ++p) {
01730         (*p)->remUnusedStmtEtc();               // Also does final parameters and arguments at present
01731     }
01732 }
01733     if (VERBOSE)
01734         LOG << "=== end recursion group analysis ===\n";
01735     Boomerang::get()->alert_end_decompile(this);
01736 
01737 }
01738 
01739 void UserProc::updateCalls() {
01740     if (VERBOSE)
01741         LOG << "### updateCalls for " << getName() << " ###\n";
01742     updateCallDefines();
01743     updateArguments();
01744     if (VERBOSE) {
01745         LOG << "--- after update calls for " << getName() << "\n";
01746         printToLog();
01747         LOG << "=== after update calls for " << getName() << "\n";
01748     }
01749 }
01750 
01751 void UserProc::branchAnalysis() {
01752     Boomerang::get()->alert_decompile_debug_point(this, "before branch analysis.");
01753 
01754     StatementList stmts;
01755     getStatements(stmts);
01756     for (StatementList::iterator it = stmts.begin(); it != stmts.end(); it++) {
01757         Statement *stmt = *it;
01758         if (stmt->isBranch()) {
01759             BranchStatement *branch = (BranchStatement*)stmt;
01760             if (branch->getFallBB() && branch->getTakenBB()) {
01761                 StatementList fallstmts;
01762                 branch->getFallBB()->getStatements(fallstmts);
01763                 if (fallstmts.size() == 1 && (*fallstmts.begin())->isBranch()) {
01764                     BranchStatement *fallto = (BranchStatement*)*fallstmts.begin();
01765                     //   branch to A if cond1
01766                     //   branch to B if cond2
01767                     // A: something
01768                     // B:
01769                     // ->
01770                     //   branch to B if !cond1 && cond2
01771                     // A: something
01772                     // B:
01773                     if (fallto->getFallBB() == branch->getTakenBB() && fallto->getBB()->getNumInEdges() == 1) {
01774                         branch->setFallBB(fallto->getFallBB());
01775                         branch->setTakenBB(fallto->getTakenBB());
01776                         branch->setDest(fallto->getFixedDest());
01777                         Exp* cond = new Binary(opAnd,
01778                             new Unary(opNot, branch->getCondExpr()),
01779                             fallto->getCondExpr()->clone());
01780                         branch->setCondExpr(cond->simplify());
01781                         assert(fallto->getBB()->getNumInEdges() == 0);
01782                         fallto->getBB()->deleteEdge(fallto->getBB()->getOutEdge(0));
01783                         fallto->getBB()->deleteEdge(fallto->getBB()->getOutEdge(0));
01784                         assert(fallto->getBB()->getNumOutEdges() == 0);
01785                         cfg->removeBB(fallto->getBB());
01786                     }
01787                     //   branch to B if cond1
01788                     //   branch to B if cond2
01789                     // A: something
01790                     // B:
01791                     // ->
01792                     //   branch to B if cond1 || cond2
01793                     // A: something
01794                     // B:
01795                     if (fallto->getTakenBB() == branch->getTakenBB() && fallto->getBB()->getNumInEdges() == 1) {
01796                         branch->setFallBB(fallto->getFallBB());
01797                         branch->setCondExpr(new Binary(opOr,
01798                             branch->getCondExpr(),
01799                             fallto->getCondExpr()->clone()));
01800                         assert(fallto->getBB()->getNumInEdges() == 0);
01801                         fallto->getBB()->deleteEdge(fallto->getBB()->getOutEdge(0));
01802                         fallto->getBB()->deleteEdge(fallto->getBB()->getOutEdge(0));
01803                         assert(fallto->getBB()->getNumOutEdges() == 0);
01804                         cfg->removeBB(fallto->getBB());
01805                     }
01806 
01807                 }
01808             }           
01809         }
01810     }
01811 
01812     Boomerang::get()->alert_decompile_debug_point(this, "after branch analysis.");
01813 }
01814 
01815 void UserProc::fixUglyBranches() {
01816     if (VERBOSE)
01817         LOG << "### fixUglyBranches for " << getName() << " ###\n";
01818 
01819     StatementList stmts;
01820     getStatements(stmts);
01821     for (StatementList::iterator it = stmts.begin(); it != stmts.end(); it++) {
01822         Statement *stmt = *it;
01823         if (stmt->isBranch()) {
01824             Exp *hl = ((BranchStatement*)stmt)->getCondExpr();
01825             // of the form: x{n} - 1 >= 0
01826             if (hl && hl->getOper() == opGtrEq && 
01827                     hl->getSubExp2()->isIntConst() &&
01828                     ((Const*)hl->getSubExp2())->getInt() == 0 &&
01829                     hl->getSubExp1()->getOper() == opMinus &&
01830                     hl->getSubExp1()->getSubExp2()->isIntConst() &&
01831                     ((Const*)hl->getSubExp1()->getSubExp2())->getInt() == 1 &&
01832                     hl->getSubExp1()->getSubExp1()->isSubscript()) {
01833                 Statement *n = ((RefExp*)hl->getSubExp1()->getSubExp1())->getDef();
01834                 if (n && n->isPhi()) {
01835                     PhiAssign *p = (PhiAssign*)n;
01836                     for (int i = 0; i < p->getNumDefs(); i++)
01837                         if (p->getStmtAt(i)->isAssign()) {
01838                             Assign *a = (Assign*)p->getStmtAt(i);
01839                             if (*a->getRight() == *hl->getSubExp1()) {
01840                                 hl->setSubExp1(new RefExp(a->getLeft(), a));
01841                                 break;
01842                             }
01843                         }
01844                 }
01845             }
01846         }
01847     }
01848     
01849     if (VERBOSE) {
01850         LOG << "--- after fixUglyBranches for " << getName() << "\n";
01851         printToLog();
01852         LOG << "=== after fixUglyBranches for " << getName() << "\n";
01853     }
01854 }
01855 
01856 bool UserProc::doRenameBlockVars(int pass, bool clearStacks) {
01857     if (VERBOSE)
01858         LOG << "### rename block vars for " << getName() << " pass " << pass << ", clear = " << clearStacks << " ###\n";
01859     bool b = df.renameBlockVars(this, 0, clearStacks);
01860     if (VERBOSE)
01861         LOG << "df.renameBlockVars return " << (b ? "true" : "false") << "\n";
01862     return b;
01863 }
01864 
01865 void UserProc::findSpPreservation() {
01866     if (VERBOSE)
01867         LOG << "finding stack pointer preservation for " << getName() << "\n";
01868 
01869     bool stdsp = false;     // FIXME: are these really used?
01870     // Note: need this non-virtual version most of the time, since nothing proved yet
01871     int sp = signature->getStackRegister(prog);
01872 
01873     for (int n = 0; n < 2; n++) {
01874         // may need to do multiple times due to dependencies FIXME: efficiency! Needed any more?
01875 
01876         // Special case for 32-bit stack-based machines (e.g. Pentium).
01877         // RISC machines generally preserve the stack pointer (so no special case required)
01878         for (int p = 0; !stdsp && p < 8; p++) {
01879             if (DEBUG_PROOF)
01880                 LOG << "attempting to prove sp = sp + " << p*4 << " for " << getName() << "\n";
01881             stdsp = prove(new Binary(opEquals,
01882                 Location::regOf(sp),
01883                 new Binary(opPlus,
01884                     Location::regOf(sp),
01885                     new Const(p * 4))));
01886         }
01887     }
01888 
01889     if (DEBUG_PROOF) {
01890         LOG << "proven for " << getName() << ":\n";
01891         for (std::map<Exp*, Exp*, lessExpStar>::iterator it = provenTrue.begin(); it != provenTrue.end(); it++)
01892             LOG << it->first << " = " << it->second << "\n";
01893     }
01894 
01895 }
01896 
01897 void UserProc::findPreserveds() {
01898     std::set<Exp*> removes;
01899 
01900     if (VERBOSE)
01901         LOG << "finding preserveds for " << getName() << "\n";
01902 
01903     Boomerang::get()->alert_decompile_debug_point(this, "before finding preserveds");
01904 
01905     if (theReturnStatement == NULL) {
01906         if (DEBUG_PROOF)
01907             LOG << "can't find preservations as there is no return statement!\n";
01908         Boomerang::get()->alert_decompile_debug_point(this, "after finding preserveds (no return)");
01909         return;
01910     }
01911 
01912     // prove preservation for all modifieds in the return statement
01913     ReturnStatement::iterator mm;
01914     StatementList& modifieds = theReturnStatement->getModifieds();
01915     for (mm = modifieds.begin(); mm != modifieds.end(); ++mm) {
01916         Exp* lhs = ((Assignment*)*mm)->getLeft();
01917         Exp* equation = new Binary(opEquals, lhs, lhs);
01918         if (DEBUG_PROOF)
01919             LOG << "attempting to prove " << equation << " is preserved by " << getName() << "\n";
01920         if (prove(equation)) {
01921             removes.insert(equation);   
01922         }
01923     }
01924 
01925     if (DEBUG_PROOF) {
01926         LOG << "### proven true for procedure " << getName() << ":\n";
01927         for (std::map<Exp*, Exp*, lessExpStar>::iterator it = provenTrue.begin(); it != provenTrue.end(); it++)
01928             LOG << it->first << " = " << it->second << "\n";
01929         LOG << "### end proven true for procedure " << getName() << "\n\n";
01930 #if PROVEN_FALSE
01931         LOG << "### proven false for procedure " << getName() << ":\n";
01932         for (std::map<Exp*, Exp*, lessExpStar>::iterator it = provenFalse.begin(); it != provenFalse.end(); it++)
01933             LOG << it->first << " != " << it->second << "\n";
01934         LOG << "### end proven false for procedure " << getName() << "\n\n";
01935 #endif
01936     }
01937 
01938     // Remove the preserved locations from the modifieds and the returns
01939     std::map<Exp*, Exp*, lessExpStar>::iterator pp;
01940     for (pp = provenTrue.begin(); pp != provenTrue.end(); ++pp) {
01941         Exp* lhs = pp->first;
01942         Exp* rhs = pp->second;
01943         // Has to be of the form loc = loc, not say loc+4, otherwise the bypass logic won't see the add of 4
01944         if (!(*lhs == *rhs)) continue;
01945         theReturnStatement->removeModified(lhs);
01946     }
01947 
01948     Boomerang::get()->alert_decompile_debug_point(this, "after finding preserveds");
01949 }
01950 
01951 void UserProc::removeSpAssignsIfPossible()
01952 {
01953     // if there are no uses of sp other than sp{-} in the whole procedure,
01954     // we can safely remove all assignments to sp, this will make the output
01955     // more readable for human eyes.
01956     
01957     Exp *sp = Location::regOf(signature->getStackRegister(prog));
01958     bool foundone = false;
01959     
01960     StatementList stmts;
01961     getStatements(stmts);
01962     for (StatementList::iterator it = stmts.begin(); it != stmts.end(); it++) {
01963         Statement *stmt = *it;
01964         if (stmt->isAssign() && *((Assign*)stmt)->getLeft() == *sp)
01965             foundone = true;
01966         LocationSet refs;
01967         stmt->addUsedLocs(refs);
01968         LocationSet::iterator rr;
01969         for (rr = refs.begin(); rr != refs.end(); rr++)
01970             if ((*rr)->isSubscript() && *(*rr)->getSubExp1() == *sp) {
01971                 Statement *def = ((RefExp*)(*rr))->getDef();
01972                 if (def && def->getProc() == this)
01973                     return;
01974             }
01975     }
01976 
01977     if (!foundone)
01978         return;
01979     
01980     Boomerang::get()->alert_decompile_debug_point(this, "before removing stack pointer assigns.");
01981 
01982     for (StatementList::iterator it = stmts.begin(); it != stmts.end(); it++)
01983         if ((*it)->isAssign()) {
01984             Assign *a = (Assign*)*it;
01985             if (*a->getLeft() == *sp) {
01986                 removeStatement(a);
01987             }
01988         }
01989     
01990     Boomerang::get()->alert_decompile_debug_point(this, "after removing stack pointer assigns.");
01991 }
01992 
01993 void UserProc::removeMatchingAssignsIfPossible(Exp *e)
01994 {
01995     // if there are no uses of %flags in the whole procedure,
01996     // we can safely remove all assignments to %flags, this will make the output
01997     // more readable for human eyes and makes short circuit analysis easier.
01998     
01999     bool foundone = false;
02000     
02001     StatementList stmts;
02002     getStatements(stmts);
02003     for (StatementList::iterator it = stmts.begin(); it != stmts.end(); it++) {
02004         Statement *stmt = *it;
02005         if (stmt->isAssign() && *((Assign*)stmt)->getLeft() == *e)
02006             foundone = true;
02007         if (stmt->isPhi()) {
02008             if (*((PhiAssign*)stmt)->getLeft() == *e)
02009                 foundone = true;
02010             continue;
02011         }
02012         LocationSet refs;
02013         stmt->addUsedLocs(refs);
02014         LocationSet::iterator rr;
02015         for (rr = refs.begin(); rr != refs.end(); rr++)
02016             if ((*rr)->isSubscript() && *(*rr)->getSubExp1() == *e) {
02017                 Statement *def = ((RefExp*)(*rr))->getDef();
02018                 if (def && def->getProc() == this)
02019                     return;
02020             }
02021     }
02022 
02023     if (!foundone)
02024         return;
02025     
02026     std::ostringstream str;
02027     str << "before removing matching assigns (" << e << ").";
02028     Boomerang::get()->alert_decompile_debug_point(this, str.str().c_str());
02029     if (VERBOSE)
02030         LOG << str.str().c_str() << "\n";
02031 
02032     for (StatementList::iterator it = stmts.begin(); it != stmts.end(); it++)
02033         if ((*it)->isAssign()) {
02034             Assign *a = (Assign*)*it;
02035             if (*a->getLeft() == *e)
02036                 removeStatement(a);
02037         } else if ((*it)->isPhi()) {
02038             PhiAssign *a = (PhiAssign*)*it;
02039             if (*a->getLeft() == *e)
02040                 removeStatement(a);
02041         }
02042     
02043     str.str("");
02044     str << "after removing matching assigns (" << e << ").";
02045     Boomerang::get()->alert_decompile_debug_point(this, str.str().c_str());
02046         LOG << str.str().c_str() << "\n";
02047 
02048 }
02049 
02050 /*
02051  * Find the procs the calls point to.
02052  * To be called after decoding all procs.
02053  * was in: analyis.cpp
02054  */
02055 void UserProc::assignProcsToCalls()
02056 {
02057     std::list<PBB>::iterator it;
02058     PBB pBB = cfg->getFirstBB(it);
02059     while (pBB)
02060     {
02061         std::list<RTL*>* rtls = pBB->getRTLs();
02062         if (rtls == NULL) {
02063             pBB = cfg->getNextBB(it);
02064             continue;
02065         }
02066         for (std::list<RTL*>::iterator it2 = rtls->begin(); it2 != rtls->end(); it2++) {
02067             if (!(*it2)->isCall()) continue;
02068             CallStatement* call = (CallStatement*)(*it2)->getList().back();
02069             if (call->getDestProc() == NULL && !call->isComputed()) {
02070                 Proc *p = prog->findProc(call->getFixedDest());
02071                 if (p == NULL) {
02072                     std::cerr << "Cannot find proc for dest " << call->getFixedDest() << " in call at "
02073                         << (*it2)->getAddress() << "\n";
02074                     assert(p);
02075                 }
02076                 call->setDestProc(p);
02077             }
02078             // call->setSigArguments();     // But BBs not set yet; will get done in initStatements()
02079         }
02080 
02081         pBB = cfg->getNextBB(it);
02082     }
02083 }
02084 
02085 /*
02086  * Perform final simplifications
02087  * was in: analyis.cpp
02088  */
02089 void UserProc::finalSimplify()
02090 {
02091     std::list<PBB>::iterator it;
02092     PBB pBB = cfg->getFirstBB(it);
02093     while (pBB)
02094     {
02095         std::list<RTL*>* pRtls = pBB->getRTLs();
02096         if (pRtls == NULL) {
02097             pBB = cfg->getNextBB(it);
02098             continue;
02099         }
02100         std::list<RTL*>::iterator rit;
02101         for (rit = pRtls->begin(); rit != pRtls->end(); rit++) {
02102             for (int i=0; i < (*rit)->getNumStmt(); i++) {
02103                 Statement* rt = (*rit)->elementAt(i);
02104                 rt->simplifyAddr();
02105                 // Also simplify everything; in particular, stack offsets are
02106                 // often negative, so we at least canonicalise [esp + -8] to [esp-8]
02107                 rt->simplify();
02108             }
02109         }
02110         pBB = cfg->getNextBB(it);
02111     }
02112 }
02113 
02114 
02115 // m[WILD]{-}
02116 static RefExp *memOfWild = new RefExp(
02117     Location::memOf(new Terminal(opWild)), NULL);
02118 // r[WILD INT]{-}
02119 static RefExp* regOfWild = new RefExp(
02120     Location::regOf(new Terminal(opWildIntConst)), NULL);
02121 
02122 // Search for expressions without explicit definitions (i.e. WILDCARD{0}), which represent parameters (use before
02123 // definition).
02124 // These are called final parameters, because they are determined from implicit references, not from the use collector
02125 // at the start of the proc, which include some caused by recursive calls
02126 #define DEBUG_PARAMS 1
02127 void UserProc::findFinalParameters() {
02128 
02129     Boomerang::get()->alert_decompile_debug_point(this, "before find final parameters.");
02130 
02131     parameters.clear();
02132 
02133     if (signature->isForced()) {
02134         // Copy from signature
02135         int n = signature->getNumParams();
02136         ImplicitConverter ic(cfg);
02137         for (int i=0; i < n; ++i) {
02138             Exp* paramLoc = signature->getParamExp(i)->clone();     // E.g. m[r28 + 4]
02139             LocationSet components;
02140             LocationSet::iterator cc;
02141             paramLoc->addUsedLocs(components);
02142             for (cc = components.begin(); cc != components.end(); ++cc) {
02143                 if (*cc != paramLoc) {                              // Don't subscript outer level
02144                     paramLoc->expSubscriptVar(*cc, NULL);           // E.g. r28 -> r28{-}
02145                     paramLoc->accept(&ic);                          // E.g. r28{-} -> r28{0}
02146                 }
02147             }
02148             ImplicitAssign* ia = new ImplicitAssign(signature->getParamType(i), paramLoc);
02149             parameters.append(ia);
02150             const char* name = signature->getParamName(i);
02151             Exp* param = Location::param(name, this);
02152             RefExp* reParamLoc = new RefExp(paramLoc, cfg->findImplicitAssign(paramLoc));
02153             mapSymbolTo(reParamLoc, param);                         // Update name map
02154         }
02155         return;
02156     }
02157     if (VERBOSE || DEBUG_PARAMS)
02158         LOG << "finding final parameters for " << getName() << "\n";
02159 
02160 //  int sp = signature->getStackRegister();
02161     signature->setNumParams(0);         // Clear any old ideas
02162     StatementList stmts;
02163     getStatements(stmts);
02164 
02165     StatementList::iterator it;
02166     for (it = stmts.begin(); it != stmts.end(); ++it) {
02167         Statement* s = *it;
02168         // Assume that all parameters will be m[]{0} or r[]{0}, and in the implicit definitions at the start of the
02169         // program
02170         if (!s->isImplicit())
02171             // Note: phis can get converted to assignments, but I hope that this is only later on: check this!
02172             break;                  // Stop after reading all implicit assignments
02173         Exp *e = ((ImplicitAssign*)s)->getLeft();
02174         if (signature->findParam(e) == -1) {
02175             if (VERBOSE || DEBUG_PARAMS)
02176                 LOG << "potential param " << e << "\n";
02177 #if 1       // I believe that the only true parameters will be registers or memofs that look like locals (stack
02178             // pararameters)
02179             if (!(e->isRegOf() || isLocalOrParamPattern(e)))
02180                 continue;
02181 #else
02182             if (signature->isStackLocal(prog, e) || e->getOper() == opLocal) {
02183                 if (VERBOSE || DEBUG_PARAMS)
02184                     LOG << "ignoring local " << e << "\n";
02185                 continue;
02186             }
02187             if (e->isGlobal()) {
02188                 if (VERBOSE || DEBUG_PARAMS)
02189                     LOG << "ignoring global " << e << "\n";
02190                 continue;
02191             }
02192             if (e->getMemDepth() > 1) {
02193                 if (VERBOSE || DEBUG_PARAMS)
02194                     LOG << "ignoring complex " << e << "\n";
02195                 continue;
02196             }
02197             if (e->isMemOf() && e->getSubExp1()->isGlobal()) {
02198                 if (VERBOSE || DEBUG_PARAMS)
02199                     LOG << "ignoring m[global] " << e << "\n";
02200                 continue;
02201             }
02202             if (e->isMemOf() && e->getSubExp1()->getOper() == opParam) {
02203                 if (VERBOSE || DEBUG_PARAMS)
02204                     LOG << "ignoring m[param] " << e << "\n";
02205                 continue;
02206             }
02207             if (e->isMemOf() &&
02208                     e->getSubExp1()->getOper() == opPlus &&
02209                     e->getSubExp1()->getSubExp1()->isGlobal() &&
02210                     e->getSubExp1()->getSubExp2()->isIntConst()) {
02211                 if (VERBOSE || DEBUG_PARAMS)
02212                     LOG << "ignoring m[global + int] " << e << "\n";
02213                 continue;
02214             }
02215             if (e->isRegN(sp)) {
02216                 if (VERBOSE || DEBUG_PARAMS)
02217                     LOG << "ignoring stack pointer register\n";
02218                 continue;
02219             }
02220             if (e->isMemOf() && e->getSubExp1()->isConst()) {
02221                 if (VERBOSE || DEBUG_PARAMS)
02222                     LOG << "ignoring m[const]\n";
02223                 continue;
02224             }
02225             bool allZero;
02226             e->clone()->removeSubscripts(allZero);
02227             if (!allZero) {
02228                 if (VERBOSE || DEBUG_PARAMS)
02229                     LOG << "ignoring not all-zero\n";
02230                 continue;
02231             }
02232 #endif
02233             if (VERBOSE || DEBUG_PARAMS)
02234                 LOG << "found new parameter " << e << "\n";
02235 
02236             Type* ty = ((ImplicitAssign*)s)->getType();
02237             // Add this parameter to the signature (for now; creates parameter names)
02238             addParameter(e, ty);
02239             // Insert it into the parameters StatementList, in sensible order
02240             insertParameter(e, ty);
02241         }
02242     }
02243 
02244     Boomerang::get()->alert_decompile_debug_point(this, "after find final parameters.");
02245 }
02246 
02247 #if 0       // FIXME: not currently used; do we want this any more?
02248 void UserProc::trimParameters(int depth) {
02249 
02250     if (signature->isForced())
02251         return;
02252 
02253     if (VERBOSE)
02254         LOG << "trimming parameters for " << getName() << "\n";
02255 
02256     StatementList stmts;
02257     getStatements(stmts);
02258 
02259     // find parameters that are referenced (ignore calls to this)
02260     int nparams = signature->getNumParams();
02261     int totparams = nparams;
02262     std::vector<Exp*> params;
02263     bool referenced[64];
02264     assert(totparams <= (int)(sizeof(referenced)/sizeof(bool)));
02265     int i;
02266     for (i = 0; i < nparams; i++) {
02267         referenced[i] = false;
02268         // Push parameters implicitly defined e.g. m[r28{0}+8]{0}, (these are the parameters for the current proc)
02269         params.push_back(signature->getParamExp(i)->clone()->expSubscriptAllNull());
02270     }
02271 
02272     std::set<Statement*> excluded;
02273     StatementList::iterator it;
02274     
02275     for (it = stmts.begin(); it != stmts.end(); it++) {
02276         Statement* s = *it;
02277         if (!s->isCall() || ((CallStatement*)s)->getDestProc() != this) {
02278             for (int i = 0; i < totparams; i++) {
02279                 Exp *p, *pe;
02280                 //if (i < nparams) {
02281                     p = Location::param(signature->getParamName(i), this);
02282                     pe = signature->getParamExp(i);
02283                 //} else {
02284                 //  p = Location::param(signature->getImplicitParamName( i - nparams), this);
02285                 //  pe = signature->getImplicitParamExp(i - nparams);
02286                 //}
02287                 if (!referenced[i] && excluded.find(s) == excluded.end() && 
02288                         // Search for the named parameter (e.g. param1), and just in case, also for the expression
02289                         // (e.g. r8{0})
02290                         (s->usesExp(p) || s->usesExp(params[i]))) {
02291                     referenced[i] = true;
02292                     if (DEBUG_UNUSED) {
02293                         LOG << "parameter " << p << " used by statement " << s->getNumber() << " : " << s->getKind() <<
02294                             "\n";
02295                     }
02296                 }
02297                 if (!referenced[i] && excluded.find(s) == excluded.end() &&
02298                         s->isPhi() && *((PhiAssign*)s)->getLeft() == *pe) {
02299                     if (DEBUG_UNUSED)
02300                         LOG << "searching " << s << " for uses of " << params[i] << "\n";
02301                     PhiAssign *pa = (PhiAssign*)s;
02302                     PhiAssign::iterator it1;
02303                     for (it1 = pa->begin(); it1 != pa->end(); it1++)
02304                         if (it1->def == NULL) {
02305                             referenced[i] = true;
02306                             if (DEBUG_UNUSED)
02307                                 LOG << "parameter " << p << " used by phi statement " << s->getNumber() << "\n";
02308                             break;
02309                         }
02310                 }
02311                 // delete p;
02312             }
02313         }
02314     }
02315 
02316     for (i = 0; i < totparams; i++) {
02317         if (!referenced[i] && (depth == -1 || params[i]->getMemDepth() == depth)) {
02318             bool allZero;
02319             Exp *e = params[i]->removeSubscripts(allZero);
02320             if (VERBOSE) 
02321                 LOG << "removing unused parameter " << e << "\n";
02322             removeParameter(e);
02323         }
02324     }
02325 }
02326 #endif
02327 
02328 void UserProc::removeReturn(Exp *e) {
02329     if (theReturnStatement)
02330         theReturnStatement->removeReturn(e);
02331 }
02332 
02333 void Proc::removeParameter(Exp *e) {
02334     int n = signature->findParam(e);
02335     if (n != -1) {
02336         signature->removeParameter(n);
02337         for (std::set<CallStatement*>::iterator it = callerSet.begin(); it != callerSet.end(); it++) {
02338             if (DEBUG_UNUSED)
02339                 LOG << "removing argument " << e << " in pos " << n << " from " << *it << "\n";
02340             (*it)->removeArgument(n);
02341         }
02342     }
02343 }
02344 
02345 void Proc::removeReturn(Exp *e)
02346 {
02347     signature->removeReturn(e);
02348 }
02349 
02350 // Add the parameter to the signature.
02351 void UserProc::addParameter(Exp *e, Type* ty) {
02352     // In case it's already an implicit argument:
02353     removeParameter(e);
02354 
02355     signature->addParameter(e, ty);
02356 }
02357 
02358 void UserProc::processFloatConstants()
02359 {
02360     StatementList stmts;
02361     getStatements(stmts);
02362 
02363     Exp *match = new Ternary(opFsize,
02364                         new Terminal(opWild), 
02365                         new Terminal(opWild), 
02366                         Location::memOf(new Terminal(opWild)));
02367     
02368     StatementList::iterator it;
02369     for (it = stmts.begin(); it != stmts.end(); it++) {
02370         Statement *s = *it;
02371 
02372         std::list<Exp*> results;
02373         s->searchAll(match, results);
02374         for (std::list<Exp*>::iterator it1 = results.begin(); it1 != results.end(); it1++) {
02375             Ternary *fsize = (Ternary*) *it1;
02376             if (fsize->getSubExp3()->getOper() == opMemOf &&
02377                     fsize->getSubExp3()->getSubExp1()->getOper() == opIntConst) {
02378                 Exp *memof = fsize->getSubExp3();
02379                 ADDRESS u = ((Const*)memof->getSubExp1())->getInt();
02380                 bool ok;
02381                 double d = prog->getFloatConstant(u, ok);
02382                 if (ok) {
02383                     LOG << "replacing " << memof << " with " << d << " in " << fsize << "\n";
02384                     fsize->setSubExp3(new Const(d));
02385                 }
02386             }
02387         }
02388         s->simplify();
02389     }
02390 }
02391 
02392 
02393 void UserProc::addParameterSymbols() {
02394     StatementList::iterator it;
02395     ImplicitConverter ic(cfg);
02396     int i=0;
02397     for (it = parameters.begin(); it != parameters.end(); ++it, ++i) {
02398         Exp* lhs = ((Assignment*)*it)->getLeft();
02399         lhs = lhs->expSubscriptAllNull();
02400         lhs = lhs->accept(&ic);
02401         Location* to = Location::param(strdup((char*)signature->getParamName(i)), this);
02402         mapSymbolTo(lhs, to);
02403     }
02404 }
02405 
02406 // Return an expression that is equivilent to e in terms of symbols. Creates new symbols as needed.
02407 Exp *UserProc::getSymbolExp(Exp *le, Type *ty, bool lastPass) {
02408     Exp *e = NULL;
02409 
02410     // check for references to the middle of a local
02411     if (    le->isMemOf() &&
02412             le->getSubExp1()->getOper() == opMinus &&
02413             le->getSubExp1()->getSubExp1()->isSubscript() &&
02414             le->getSubExp1()->getSubExp1()->getSubExp1()->isRegN(signature->getStackRegister()) &&
02415             le->getSubExp1()->getSubExp2()->isIntConst()) {
02416         for (SymbolMap::iterator it = symbolMap.begin(); it != symbolMap.end(); it++) {
02417             if ((*it).second->isLocal()) {
02418                 char *nam = ((Const*)(*it).second->getSubExp1())->getStr();
02419                 if (locals.find(nam) != locals.end()) {
02420                     Type *lty = locals[nam];
02421                     Exp *loc = (*it).first;
02422                     if (    loc->isMemOf() &&
02423                             loc->getSubExp1()->getOper() == opMinus &&
02424                             loc->getSubExp1()->getSubExp1()->isSubscript() &&
02425                             loc->getSubExp1()->getSubExp1()->getSubExp1()->isRegN(signature->getStackRegister()) &&
02426                             loc->getSubExp1()->getSubExp2()->isIntConst()) {
02427                         int n = -((Const*)loc->getSubExp1()->getSubExp2())->getInt();
02428                         int m = -((Const*)le->getSubExp1()->getSubExp2())->getInt();
02429                         if (m > n && m < n + (int)(lty->getSize() / 8)) {
02430                             e = Location::memOf(
02431                                 new Binary(opPlus,
02432                                     new Unary(opAddrOf, (*it).second->clone()),
02433                                     new Const(m - n)));
02434                             if (VERBOSE)
02435                                 LOG << "seems " << le << " is in the middle of " << loc << " returning " << e << "\n";
02436                             return e;
02437                         }   
02438                     }
02439                 }
02440             }
02441         }
02442     }
02443     
02444     if (symbolMap.find(le) == symbolMap.end()) {
02445         if (ty == NULL) {
02446             if (lastPass)
02447                 ty = new IntegerType();
02448             else
02449                 ty = new VoidType();            // HACK MVE
02450         }
02451 
02452         // the default of just assigning an int type is bad..  if the locals is not an int then assigning it this
02453         // type early results in aliases to this local not being recognised 
02454         if (ty) {
02455             Exp* base = le;
02456             if (le->isSubscript()) base = ((RefExp*)le)->getSubExp1();
02457             e = newLocal(ty->clone(), le);
02458             mapSymbolTo(le->clone(), e);
02459             e = e->clone();
02460         }
02461     } else {
02462 #if 0   // This code was assuming that locations only every have one type associated with them. The reality is that
02463         // compilers and machine language programmers sometimes re-use the same locations with different types.
02464         // Let type analysis sort out which live ranges have which type
02465         e = symbolMap[le]->clone();
02466         if (e->getOper() == opLocal && e->getSubExp1()->getOper() == opStrConst) {
02467             std::string name = ((Const*)e->getSubExp1())->getStr();
02468             Type *nty = ty;
02469             Type *ty = locals[name];
02470             assert(ty);
02471             if (nty && !(*ty == *nty) && nty->getSize() > ty->getSize()) {
02472                 // FIXME: should this be a type meeting?
02473                 if (DEBUG_TA)
02474                     LOG << "getSymbolExp: updating type of " << name.c_str() << " to " << nty->getCtype() << "\n";
02475                 ty = nty;
02476                 locals[name] = ty;
02477             }
02478             if (ty->resolvesToCompound()) {
02479                 CompoundType *compound = ty->asCompound();
02480                 if (VERBOSE)
02481                     LOG << "found reference to first member of compound " << name.c_str() << ": " << le << "\n";
02482                 char* nam = (char*)compound->getName(0);
02483                 if (nam == NULL) nam = "??";
02484                 return new TypedExp(ty, new Binary(opMemberAccess, e, new Const(nam)));
02485             }
02486         }
02487 #else
02488         e = getSymbolFor(le, ty);
02489 #endif
02490     }
02491     return e;
02492 }
02493 
02494 // Not used with DFA Type Analysis; the equivalent thing happens in mapLocalsAndParams() now
02495 void UserProc::mapExpressionsToLocals(bool lastPass) {
02496     StatementList stmts;
02497     getStatements(stmts);
02498 
02499     Boomerang::get()->alert_decompile_debug_point(this, "before mapping expressions to locals");
02500 
02501     if (VERBOSE) {
02502         LOG << "mapping expressions to locals for " << getName();
02503         if (lastPass)
02504             LOG << " last pass";
02505         LOG << "\n";
02506     }
02507 
02508     int sp = signature->getStackRegister(prog);
02509     if (getProven(Location::regOf(sp)) == NULL) {
02510         if (VERBOSE)
02511             LOG << "can't map locals since sp unproven\n";
02512         return;     // can't replace if nothing proven about sp
02513     }
02514 
02515     // start with calls because that's where we have the most types
02516     StatementList::iterator it;
02517     for (it = stmts.begin(); it != stmts.end(); it++) {
02518         if ((*it)->isCall()) {
02519             CallStatement *call = (CallStatement*)*it;
02520             for (int i = 0; i < call->getNumArguments(); i++) {
02521                 Type *ty = call->getArgumentType(i);
02522                 Exp *e = call->getArgumentExp(i);
02523                 // If a pointer type and e is of the form m[sp{0} - K]:
02524                 if (ty && ty->resolvesToPointer() && signature->isAddrOfStackLocal(prog, e)) {
02525                     LOG << "argument " << e << " is an addr of stack local and the type resolves to a pointer\n";
02526                     Exp *olde = e->clone();
02527                     Type *pty = ty->asPointer()->getPointsTo();
02528                     if (e->isAddrOf() && e->getSubExp1()->isSubscript() && 
02529                             e->getSubExp1()->getSubExp1()->isMemOf())
02530                         e = e->getSubExp1()->getSubExp1()->getSubExp1();
02531                     if (pty->resolvesToArray() && pty->asArray()->isUnbounded()) {
02532                         ArrayType *a = (ArrayType*)pty->asArray()->clone();
02533                         pty = a;
02534                         a->setLength(1024);     // just something arbitrary
02535                         if (i+1 < call->getNumArguments()) {
02536                             Type *nt = call->getArgumentType(i+1);
02537                             if (nt->isNamed())
02538                                 nt = ((NamedType*)nt)->resolvesTo();
02539                             if (nt->isInteger() && call->getArgumentExp(i+1)->isIntConst())
02540                                 a->setLength(((Const*)call->getArgumentExp(i+1)) ->getInt());
02541                         }
02542                     }
02543                     e = getSymbolExp(Location::memOf(e->clone(), this), pty);
02544                     if (e) {
02545                         Exp *ne = new Unary(opAddrOf, e);
02546                         if (VERBOSE)
02547                             LOG << "replacing argument " << olde << " with " << ne << " in " << call << "\n";
02548                         call->setArgumentExp(i, ne);
02549                     }
02550                 }
02551             }
02552         }
02553     }
02554 
02555     Boomerang::get()->alert_decompile_debug_point(this, "after processing locals in calls");
02556 
02557     // normalise sp usage (turn WILD + sp{0} into sp{0} + WILD)
02558     Exp *nn = new Binary(opPlus, new Terminal(opWild), new RefExp(Location::regOf(sp), NULL));
02559     for (it = stmts.begin(); it != stmts.end(); it++) {
02560         Statement* s = *it;
02561         std::list<Exp*> results;
02562         s->searchAll(nn, results);
02563         for (std::list<Exp*>::iterator it1 = results.begin(); it1 != results.end(); it1++) {
02564             Exp *wild = (*it1)->getSubExp1();
02565             (*it1)->setSubExp1((*it1)->getSubExp2());
02566             (*it1)->setSubExp2(wild);
02567         }
02568     }
02569 
02570     // FIXME: this is probably part of the ADHOC TA
02571     // look for array locals
02572     // l = m[(sp{0} + WILD1) - K2]
02573     Exp *l = Location::memOf(new Binary(opMinus, 
02574                 new Binary(opPlus,
02575                     new RefExp(Location::regOf(sp), NULL),
02576                     new Terminal(opWild)),
02577                 new Terminal(opWildIntConst)));
02578     for (it = stmts.begin(); it != stmts.end(); it++) {
02579         Statement* s = *it;
02580         std::list<Exp*> results;
02581         s->searchAll(l, results);
02582         for (std::list<Exp*>::iterator it1 = results.begin(); it1 != results.end(); it1++) {
02583             Exp *result = *it1;
02584             // arr = m[sp{0} - K2]
02585             Location *arr = Location::memOf(
02586                 new Binary(opMinus, 
02587                     new RefExp(Location::regOf(sp), NULL),
02588                     result->getSubExp1()->getSubExp2()->clone()));
02589             int n = ((Const*)result->getSubExp1()->getSubExp2())->getInt();
02590             arr->setProc(this);
02591             Type *base = new IntegerType();
02592             if (s->isAssign() && ((Assign*)s)->getLeft() == result) {
02593                 Type* at = ((Assign*)s)->getType();
02594                 if(at && at->getSize() != 0)
02595                     base = ((Assign*)s)->getType()->clone();
02596             }
02597             //arr->setType(new ArrayType(base, n / (base->getSize() / 8)));
02598             if (VERBOSE)
02599                 LOG << "found a local array using " << n << " bytes\n";
02600             Exp *replace = Location::memOf(
02601                 new Binary(opPlus,
02602                     new Unary(opAddrOf, arr),
02603                     result->getSubExp1()->getSubExp1()->getSubExp2()->clone()), this);
02604             if (VERBOSE)
02605                 LOG << "replacing " << result << " with " << replace << " in " << s << "\n";
02606             s->searchAndReplace(result->clone(), replace);
02607         }
02608     }
02609 
02610     Boomerang::get()->alert_decompile_debug_point(this, "after processing array locals");
02611 
02612     // Stack offsets for local variables could be negative (most machines), positive (PA/RISC), or both (SPARC)
02613     if (signature->isLocalOffsetNegative())
02614         searchRegularLocals(opMinus, lastPass, sp, stmts);
02615     if (signature->isLocalOffsetPositive())
02616         searchRegularLocals(opPlus, lastPass, sp, stmts);
02617     // Ugh - m[sp] is a special case: neither positive or negative.  SPARC uses this to save %i0
02618     if (signature->isLocalOffsetPositive() && signature->isLocalOffsetNegative())
02619         searchRegularLocals(opWild, lastPass, sp, stmts);
02620 
02621     Boomerang::get()->alert_decompile_debug_point(this, "after mapping expressions to locals");
02622 }
02623 
02624 void UserProc::searchRegularLocals(OPER minusOrPlus, bool lastPass, int sp, StatementList& stmts) {
02625     // replace expressions in regular statements with locals
02626     Location* l;
02627     if (minusOrPlus == opWild)
02628         // l = m[sp{0}]
02629         l = Location::memOf(
02630             new RefExp(Location::regOf(sp), NULL));
02631     else
02632         // l = m[sp{0} +/- K]
02633         l = Location::memOf(
02634             new Binary(minusOrPlus, 
02635                 new RefExp(Location::regOf(sp), NULL),
02636                 new Terminal(opWildIntConst)));
02637     StatementList::iterator it;
02638     for (it = stmts.begin(); it != stmts.end(); it++) {
02639         Statement* s = *it;
02640         std::list<Exp*> results;
02641         s->searchAll(l, results);
02642         for (std::list<Exp*>::iterator it1 = results.begin(); it1 != results.end(); it1++) {
02643             Exp *result = *it1;
02644             Type* ty = s->getTypeFor(result);
02645             Exp *e = getSymbolExp(result, ty, lastPass);
02646             if (e) {
02647                 Exp* search = result->clone();
02648                 if (VERBOSE)
02649                     LOG << "mapping " << search << " to " << e << " in " << s << "\n";
02650                 //s->searchAndReplace(search, e);
02651             }
02652         }
02653         //s->simplify();
02654     }
02655 }
02656 
02657 bool UserProc::removeNullStatements() {
02658     bool change = false;
02659     StatementList stmts;
02660     getStatements(stmts);
02661     // remove null code
02662     StatementList::iterator it;
02663     for (it = stmts.begin(); it != stmts.end(); it++) {
02664         Statement* s = *it;
02665         if (s->isNullStatement()) {
02666             // A statement of the form x := x
02667             if (VERBOSE) {
02668                 LOG << "removing null statement: " << s->getNumber() <<
02669                 " " << s << "\n";
02670             }
02671             removeStatement(s);
02672             change = true;
02673         }
02674     }
02675     return change;
02676 }
02677 
02678 // Propagate statements, but don't remove
02679 // Return true if change; set convert if an indirect call is converted to direct (else clear)
02680 bool UserProc::propagateStatements(bool& convert, int pass) {
02681     if (VERBOSE)
02682         LOG << "--- begin propagating statements pass " << pass << " ---\n";
02683     StatementList stmts;
02684     getStatements(stmts);
02685     // propagate any statements that can be
02686     StatementList::iterator it;
02687     // Find the locations that are used by a live, dominating phi-function
02688     LocationSet usedByDomPhi;
02689     findLiveAtDomPhi(usedByDomPhi);
02690     // Next pass: count the number of times each assignment LHS would be propagated somewhere
02691     std::map<Exp*, int, lessExpStar> destCounts;
02692     // Also maintain a set of locations which are used by phi statements
02693     for (it = stmts.begin(); it != stmts.end(); it++) {
02694         Statement* s = *it;
02695         ExpDestCounter edc(destCounts);
02696         StmtDestCounter sdc(&edc);
02697         s->accept(&sdc);
02698     }
02699 #if USE_DOMINANCE_NUMS
02700     // A third pass for dominance numbers
02701     setDominanceNumbers();
02702 #endif
02703     // A fourth pass to propagate only the flags (these must be propagated even if it results in extra locals)
02704     bool change = false;
02705     for (it = stmts.begin(); it != stmts.end(); it++) {
02706         Statement* s = *it;
02707         if (s->isPhi()) continue;
02708         change |= s->propagateFlagsTo();
02709     }
02710     // Finally the actual propagation
02711     convert = false;
02712     for (it = stmts.begin(); it != stmts.end(); it++) {
02713         Statement* s = *it;
02714         if (s->isPhi()) continue;
02715         change |= s->propagateTo(convert, &destCounts, &usedByDomPhi);
02716     }
02717     simplify();
02718     propagateToCollector();
02719     if (VERBOSE)
02720         LOG << "=== end propagating statements at pass " << pass << " ===\n";
02721     return change;
02722 }   // propagateStatements
02723 
02724 Statement *UserProc::getStmtAtLex(unsigned int begin, unsigned int end)
02725 {
02726     StatementList stmts;
02727     getStatements(stmts);
02728     
02729     unsigned int lowest = begin;
02730     Statement *loweststmt = NULL;
02731     for (StatementList::iterator it = stmts.begin(); it != stmts.end(); it++)
02732         if (begin >= (*it)->getLexBegin() && begin <= lowest && begin <= (*it)->getLexEnd() &&
02733                 (end == (unsigned)-1 || end < (*it)->getLexEnd())) {
02734             loweststmt = (*it);
02735             lowest = (*it)->getLexBegin();
02736         }
02737     return loweststmt;
02738 }
02739 
02740 void UserProc::promoteSignature() {
02741     signature = signature->promote(this);
02742 }
02743 
02744 
02745 char* UserProc::newLocalName(Exp* e) {
02746     std::ostringstream ost;
02747     if (e->isSubscript() && ((RefExp*)e)->getSubExp1()->isRegOf()) {
02748         // Assume that it's better to know what register this location was created from
02749         char* regName = getRegName(((RefExp*)e)->getSubExp1());
02750         int tag = 0;
02751         do {
02752             ost.str("");
02753             ost << regName << "_" << ++tag;
02754         } while (locals.find(ost.str()) != locals.end());
02755         return strdup (ost.str().c_str());
02756     }
02757     ost << "local" << nextLocal++;
02758     return strdup(ost.str().c_str());
02759 }
02760 
02761 Exp* UserProc::newLocal(Type* ty, Exp* e, char* nam /* = NULL */) {
02762     std::string name;
02763     if (nam == NULL)
02764         name = newLocalName(e);
02765     else
02766         name = nam;             // Use provided name
02767     locals[name] = ty;
02768     if (ty == NULL) {
02769         std::cerr << "null type passed to newLocal\n";
02770         assert(false);
02771     }
02772     if (VERBOSE)
02773         LOG << "assigning type " << ty->getCtype() << " to new " << name.c_str() << "\n";
02774     return Location::local(strdup(name.c_str()), this);
02775 }
02776 
02777 void UserProc::addLocal(Type *ty, const char *nam, Exp *e)
02778 {
02779     // symbolMap is a multimap now; you might have r8->o0 for integers and r8->o0_1 for char*
02780     //assert(symbolMap.find(e) == symbolMap.end());
02781     mapSymbolTo(e, Location::local(strdup(nam), this));
02782     //assert(locals.find(nam) == locals.end());     // Could be r10{20} -> o2, r10{30}->o2 now
02783     locals[nam] = ty;
02784 }
02785 
02786 Type *UserProc::getLocalType(const char *nam)
02787 {
02788     if (locals.find(nam) == locals.end())
02789         return NULL;
02790     Type *ty = locals[nam];
02791     return ty;
02792 }
02793 
02794 void UserProc::setLocalType(const char *nam, Type *ty)
02795 {
02796     locals[nam] = ty;
02797     if (VERBOSE)
02798         LOG << "setLocalType: updating type of " << nam << " to " << ty->getCtype() << "\n";
02799 }
02800 
02801 Type *UserProc::getParamType(const char *nam)
02802 {
02803     for (unsigned int i = 0; i < signature->getNumParams(); i++)
02804         if (std::string(nam) == signature->getParamName(i))
02805             return signature->getParamType(i);
02806     return NULL;
02807 }
02808 
02809 void UserProc::setExpSymbol(const char *nam, Exp *e, Type* ty)
02810 {
02811     TypedExp *te = new TypedExp(ty, Location::local(strdup(nam), this));
02812     mapSymbolTo(e, te);
02813 }
02814 
02815 void UserProc::mapSymbolToRepl(Exp* from, Exp* oldTo, Exp* newTo) {
02816     removeSymbolMapping(from, oldTo);
02817     mapSymbolTo(from, newTo);       // The compiler could optimise this call to a fall through
02818 }
02819 
02820 void UserProc::mapSymbolTo(Exp* from, Exp* to) {
02821     SymbolMap::iterator it = symbolMap.find(from);
02822     while (it != symbolMap.end() && *it->first == *from) {
02823         if (*it->second == *to)
02824             return;             // Already in the multimap
02825         ++it;
02826     }
02827     std::pair<Exp*, Exp*> pr;
02828     pr.first = from;
02829     pr.second = to;
02830     symbolMap.insert(pr);
02831 }
02832 
02833 // FIXME: is this the same as lookupSym() now?
02834 Exp* UserProc::getSymbolFor(Exp* from, Type* ty) {
02835     SymbolMap::iterator ff = symbolMap.find(from);
02836     while (ff != symbolMap.end() && *ff->first == *from) {
02837         Exp* currTo = ff->second;
02838         assert(currTo->isLocal() || currTo->isParam());
02839         char* name = ((Const*)((Location*)currTo)->getSubExp1())->getStr();
02840         Type* currTy = getLocalType(name);
02841         if (currTy == NULL) currTy = getParamType(name);
02842         if (currTy && currTy->isCompatibleWith(ty))
02843             return currTo;
02844         ++ff;
02845     }
02846     return NULL;
02847 }
02848 
02849 void UserProc::removeSymbolMapping(Exp* from, Exp* to) {
02850     SymbolMap::iterator it = symbolMap.find(from);
02851     while (it != symbolMap.end() && *it->first == *from) {
02852         if (*it->second == *to) {
02853             symbolMap.erase(it);
02854             return;
02855         }
02856         it++;
02857     }
02858 }
02859 
02860 
02861 
02862 // NOTE: linear search!!
02863 Exp *UserProc::expFromSymbol(const char *nam)
02864 {
02865     for (SymbolMap::iterator it = symbolMap.begin(); it != symbolMap.end(); it++) {
02866         Exp* e = it->second;
02867         if (e->isLocal() && !strcmp(((Const*)((Location*)e)->getSubExp1())->getStr(), nam))
02868             return it->first;
02869     }
02870     return NULL;
02871 }
02872 
02873 const char* UserProc::getLocalName(int n) { 
02874     int i = 0;
02875     for (std::map<std::string, Type*>::iterator it = locals.begin(); it != locals.end(); it++, i++)
02876         if (i == n)
02877             return it->first.c_str();
02878     return NULL;
02879 }
02880 
02881 char* UserProc::getSymbolName(Exp* e) {
02882     SymbolMap::iterator it = symbolMap.find(e);
02883     if (it == symbolMap.end()) return NULL;
02884     Exp* loc = it->second;
02885     if (!loc->isLocal() && !loc->isParam()) return NULL;
02886     return ((Const*)loc->getSubExp1())->getStr();
02887 }
02888 
02889 
02890 // Count references to the things that are under SSA control. For each SSA subscripting, increment a counter for that
02891 // definition
02892 void UserProc::countRefs(RefCounter& refCounts) {
02893     StatementList stmts;
02894     getStatements(stmts);
02895     StatementList::iterator it;
02896     for (it = stmts.begin(); it != stmts.end(); it++) {
02897         Statement* s = *it;
02898         // Don't count uses in implicit statements. There is no RHS of course, but you can still have x from m[x] on the
02899         // LHS and so on, and these are not real uses
02900         if (s->isImplicit()) continue;
02901         if (DEBUG_UNUSED)
02902             LOG << "counting references in " << s << "\n";
02903         LocationSet refs;
02904         s->addUsedLocs(refs, false);            // Ignore uses in collectors
02905         LocationSet::iterator rr;
02906         for (rr = refs.begin(); rr != refs.end(); rr++) {
02907             if (((Exp*)*rr)->isSubscript()) {
02908                 Statement *def = ((RefExp*)*rr)->getDef();
02909                 // Used to not count implicit refs here (def->getNumber() == 0), meaning that implicit definitions get
02910                 // removed as dead code! But these are the ideal place to read off final parameters, and it is
02911                 // guaranteed now that implicit statements are sorted out for us by now (for dfa type analysis)
02912                 if (def /* && def->getNumber() */ ) {
02913                     refCounts[def]++;
02914                     if (DEBUG_UNUSED)
02915                         LOG << "counted ref to " << *rr << "\n";
02916                 }
02917             }
02918         }
02919     }
02920     if (DEBUG_UNUSED) {
02921         RefCounter::iterator rr;
02922         LOG << "### reference counts for " << getName() << ":\n";
02923         for (rr = refCounts.begin(); rr != refCounts.end(); ++rr)
02924             LOG << "  " << rr->first->getNumber() << ":" << rr->second << "\t";
02925         LOG << "\n### end reference counts\n"; 
02926     }
02927 }
02928 
02929 // Note: call the below after translating from SSA form
02930 // FIXME: this can be done before transforming out of SSA form now, surely...
02931 void UserProc::removeUnusedLocals() {
02932     Boomerang::get()->alert_decompile_debug_point(this, "before removing unused locals");
02933     if (VERBOSE)
02934         LOG << "removing unused locals (final) for " << getName() << "\n";
02935 
02936     std::set<std::string> usedLocals;
02937     StatementList stmts;
02938     getStatements(stmts);
02939     // First count any uses of the locals
02940     StatementList::iterator ss;
02941     bool all = false;
02942     for (ss = stmts.begin(); ss != stmts.end(); ss++) {
02943         Statement* s = *ss;
02944         LocationSet locs;
02945         all |= s->addUsedLocals(locs);
02946         LocationSet::iterator uu;
02947         for (uu = locs.begin(); uu != locs.end(); uu++) {
02948             Exp* u = *uu;
02949             // Must be a real symbol, and not defined in this statement, unless it is a return statement (in which case
02950             // it is used outside this procedure), or a call statement. Consider local7 = local7+1 and
02951             // return local7 = local7+1 and local7 = call(local7+1), where in all cases, local7 is not used elsewhere
02952             // outside this procedure. With the assign, it can be deleted, but with the return or call statements, it
02953             // can't.
02954             if ((s->isReturn() || s->isCall() || !s->definesLoc(u))) {
02955                 if (!u->isLocal()) continue;
02956                 std::string name(((Const*)((Location*)u)->getSubExp1())->getStr());
02957                 usedLocals.insert(name);
02958                 if (DEBUG_UNUSED)
02959                     LOG << "counted local " << name << " in " << s << "\n";
02960             }
02961         }
02962         if (s->isAssignment() && !s->isImplicit() && ((Assignment*)s)->getLeft()->isLocal()) {
02963             Assignment* as = (Assignment*)s;
02964             Const* c = (Const*)((Unary*)as->getLeft())->getSubExp1();
02965             std::string name(c->getStr());
02966             usedLocals.insert(name);
02967             if (DEBUG_UNUSED) LOG << "counted local " << name.c_str() << " on left of " << s << "\n";
02968 
02969         }
02970     }
02971     // Now record the unused ones in set removes
02972     std::map<std::string, Type*>::iterator it;
02973     std::set<std::string> removes;
02974     for (it = locals.begin(); it != locals.end(); it++) {
02975         std::string& name = const_cast<std::string&>(it->first);
02976         // LOG << "Considering local " << name << "\n";
02977         if (VERBOSE && all && removes.size())
02978             LOG << "WARNING: defineall seen in procedure " << name.c_str() << " so not removing " << (int)removes.size()
02979                  << " locals\n";
02980         if (usedLocals.find(name) == usedLocals.end() && !all) {
02981             if (VERBOSE)
02982                 LOG << "removed unused local " << name.c_str() << "\n";
02983             removes.insert(name);
02984         }
02985     }
02986     // Remove any definitions of the removed locals
02987     for (ss = stmts.begin(); ss != stmts.end(); ++ss) {
02988         Statement* s = *ss;
02989         LocationSet ls;
02990         LocationSet::iterator ll;
02991         s->getDefinitions(ls);
02992         for (ll = ls.begin(); ll != ls.end(); ++ll) {
02993             Type* ty = s->getTypeFor(*ll);
02994             char* name = findLocal(*ll, ty);
02995             if (name == NULL) continue;
02996             std::string str(name);
02997             if (removes.find(str) != removes.end()) {
02998                 // Remove it. If an assign, delete it; otherwise (call), remove the define
02999                 if (s->isAssignment()) {
03000                     removeStatement(s);
03001                     break;              // Break to next statement
03002                 } else if (s->isCall())
03003                     // Remove just this define. May end up removing several defines from this call.
03004                     ((CallStatement*)s)->removeDefine(*ll);
03005                 // else if a ReturnStatement, don't attempt to remove it. The definition is used *outside* this proc.
03006             }
03007         }
03008     }
03009     // Finally, remove them from locals, so they don't get declared
03010     for (std::set<std::string>::iterator it1 = removes.begin(); it1 != removes.end(); )
03011         locals.erase(*it1++);
03012     // Also remove them from the symbols, since symbols are a superset of locals at present
03013     for (SymbolMap::iterator sm = symbolMap.begin(); sm != symbolMap.end(); ) {
03014         Exp* mapsTo = sm->second;
03015         if (mapsTo->isLocal()) {
03016             char* tmpName = ((Const*)((Location*)mapsTo)->getSubExp1())->getStr();
03017             if (removes.find(tmpName) != removes.end()) {
03018                 symbolMap.erase(sm++);
03019                 continue;
03020             }
03021         }
03022         ++sm;           // sm is itcremented with the erase, or here
03023     }
03024     Boomerang::get()->alert_decompile_debug_point(this, "after removing unused locals");
03025 }
03026 
03027 //
03028 //  SSA code
03029 //
03030 
03031 void UserProc::fromSSAform() {
03032     Boomerang::get()->alert_decompiling(this);
03033 
03034     if (VERBOSE)
03035         LOG << "transforming " << getName() << " from SSA\n";
03036 
03037     Boomerang::get()->alert_decompile_debug_point(this, "before transforming from SSA form");
03038 
03039     if (cfg->getNumBBs() >= 100)        // Only for the larger procs
03040         // Note: emit newline at end of this proc, so we can distinguish getting stuck in this proc with doing a lot of
03041         // little procs that don't get messages. Also, looks better with progress dots
03042         std::cout << " transforming out of SSA form " << getName() << " with " << cfg->getNumBBs() << " BBs";
03043 
03044     StatementList stmts;
03045     getStatements(stmts);
03046     StatementList::iterator it;
03047 
03048     for (it = stmts.begin(); it != stmts.end(); it++) {
03049         // Map registers to initial local variables
03050         (*it)->mapRegistersToLocals();
03051         // Insert casts where needed, as types are about to become inaccessible
03052         (*it)->insertCasts();
03053     }
03054 
03055 
03056     // First split the live ranges where needed by reason of type incompatibility, i.e. when the type of a subscripted
03057     // variable is different to its previous type. Start at the top, because we don't want to rename parameters (e.g.
03058     // argc)
03059     typedef std::pair<Type*, Exp*> FirstTypeEnt;
03060     typedef std::map<Exp*, FirstTypeEnt, lessExpStar> FirstTypesMap;
03061     FirstTypesMap firstTypes;
03062     FirstTypesMap::iterator ff;
03063     ConnectionGraph ig;             // The interference graph; these can't have the same local variable
03064     ConnectionGraph pu;             // The Phi Unites: these need the same local variable or copies
03065 #if 0
03066     // Start with the parameters. There is not always a use of every parameter, yet that location may be used with
03067     // a different type (e.g. envp used as int in test/sparc/fibo-O4)
03068     int n = signature->getNumParams();
03069     
03070     for (int i=0; i < n; i++) {
03071         Exp* paramExp = signature->getParamExp(i);
03072         FirstTypeEnt fte;
03073         fte.first = signature->getParamType(i);
03074         fte.second = new RefExp(paramExp, cfg->findTheImplicitAssign(paramExp));
03075         firstTypes[paramExp] = fte;
03076     }
03077 #endif
03078     int progress = 0;
03079     for (it = stmts.begin(); it != stmts.end(); it++) {
03080         if (++progress > 2000) {
03081             std::cout << "." << std::flush;
03082             progress = 0;
03083         }
03084         Statement* s = *it;
03085         LocationSet defs;
03086         s->getDefinitions(defs);
03087         LocationSet::iterator dd;
03088         for (dd = defs.begin(); dd != defs.end(); dd++) {
03089             Exp* base = *dd;
03090             Type* ty = s->getTypeFor(base);
03091             if (VERBOSE)
03092                 LOG << "got type " << ty << " for " << base << " from " << s << "\n";
03093             if (ty == NULL)             // Can happen e.g. when getting the type for %flags
03094                 ty = new VoidType();
03095             ff = firstTypes.find(base);
03096             RefExp* ref = new RefExp(base, s);
03097             if (ff == firstTypes.end()) {
03098                 // There is no first type yet. Record it.
03099                 FirstTypeEnt fte;
03100                 fte.first = ty;
03101                 fte.second = ref;
03102                 firstTypes[base] = fte;
03103             } else if (ff->second.first && !ty->isCompatibleWith(ff->second.first)) {
03104                 if (DEBUG_LIVENESS)
03105                     LOG << "def of " << base << " at " << s->getNumber() << " type " << ty <<
03106                         " is not compatible with first type " << ff->second.first << ".\n";
03107                 // There already is a type for base, and it is different to the type for this definition.
03108                 // Record an "interference" so it will get a new variable
03109                 if (!ty->isVoid())      // just ignore void interferences ??!!
03110                     ig.connect(ref, ff->second.second);
03111             }
03112         }
03113     }
03114     // Find the interferences generated by more than one version of a variable being live at the same program point
03115     cfg->findInterferences(ig);
03116 
03117     // Find the set of locations that are "united" by phi-functions
03118     // FIXME: are these going to be trivially predictable?
03119     findPhiUnites(pu);
03120 
03121     ConnectionGraph::iterator ii;
03122     if (DEBUG_LIVENESS) {
03123         LOG << "## ig interference graph:\n";
03124         for (ii = ig.begin(); ii != ig.end(); ii++)
03125             LOG << "   ig " << ii->first << " -> " << ii->second << "\n";
03126         LOG << "## pu phi unites graph:\n";
03127         for (ii = pu.begin(); ii != pu.end(); ii++)
03128             LOG << "   pu " << ii->first << " -> " << ii->second << "\n";
03129         LOG << "  ---\n";
03130     }
03131 
03132     // Choose one of each interfering location to give a new name to
03133     
03134     for (ii = ig.begin(); ii != ig.end(); ++ii) {
03135         RefExp* r1, *r2;
03136         r1 = (RefExp*)ii->first;
03137         r2 = (RefExp*)ii->second;           // r1 -> r2 and vice versa
03138         char* name1 = lookupSymFromRefAny(r1);
03139         char* name2 = lookupSymFromRefAny(r2);
03140         if (name1 && name2 && strcmp(name1, name2) != 0)
03141             continue;                       // Already different names, probably because of the redundant mapping
03142         RefExp* rename = NULL;
03143         if (r1->isImplicitDef())
03144             // If r1 is an implicit definition, don't rename it (it is probably a parameter, and should retain its
03145             // current name)
03146             rename = r2;
03147         else if (r2->isImplicitDef())
03148             rename = r1;        // Ditto for r2
03149         if (rename == NULL) {
03150 #if 0       // By preferring to rename the destination of the phi, we have more chance that all the phi operands will
03151             // end up being the same, so the phi can end up as one copy assignment
03152 
03153             // In general, it is best to rename the location (r1 or r2) which has the smallest number of uniting
03154             // connections (or at least, this seems like a reasonable criterion)
03155             int n1 = pu.count(r1);
03156             int n2 = pu.count(r2);
03157             if (n2 <= n1)
03158 #else
03159             Statement* def2 = r2->getDef();
03160             if (def2->isPhi())          // Prefer the destinations of phis
03161 #endif
03162                 rename = r2;
03163             else
03164                 rename = r1;
03165         }
03166         Type *ty;
03167         ty = rename->getDef()->getTypeFor(rename->getSubExp1());
03168         Exp* local = newLocal(ty, rename);
03169         if (DEBUG_LIVENESS)
03170             LOG << "renaming " << rename << " to " << local << "\n";
03171         mapSymbolTo(rename, local);
03172     }
03173 
03174     // Implement part of the Phi Unites list, where renamings or parameters have broken them, by renaming
03175     // The rest of them will be done as phis are removed
03176     // The idea is that where l1 and l2 have to unite, and exactly one of them already has a local/name, you can
03177     // implement the unification by giving the unnamed one the same name as the named one, as long as they don't
03178     // interfere
03179     for (ii = pu.begin(); ii != pu.end(); ++ii) {
03180         RefExp* r1 = (RefExp*)ii->first;
03181         RefExp* r2 = (RefExp*)ii->second;
03182         char* name1 = lookupSymFromRef(r1);
03183         char* name2 = lookupSymFromRef(r2);
03184         if (name1 && !name2 && !ig.isConnected(r1, r2)) {
03185             // There is a case where this is unhelpful, and it happen in test/pentium/fromssa2. We have renamed the
03186             // destination of the phi to ebx_1, and that leaves the two phi operands as ebx. However, we attempt to
03187             // unite them here, which will cause one of the operands to become ebx_1, so the neat oprimisation of
03188             // replacing the phi with one copy doesn't work. The result is an extra copy.
03189             // So check of r1 is a phi and r2 one of its operands, and all other operands for the phi have the same
03190             // name. If so, don't rename.
03191             Statement* def1 = r1->getDef();
03192             if (def1->isPhi()) {
03193                 bool allSame = true;
03194                 bool r2IsOperand = false;
03195                 char* firstName = NULL;
03196                 PhiAssign::iterator rr;
03197                 PhiAssign* pi = (PhiAssign*)def1;
03198                 for (rr = pi->begin(); rr != pi->end(); ++rr) {
03199                     RefExp* re = new RefExp(rr->e, rr->def);
03200                     if (*re == *r2)
03201                         r2IsOperand = true;
03202                     if (firstName == NULL)
03203                         firstName = lookupSymFromRefAny(re);
03204                     else if (strcmp(firstName, lookupSymFromRefAny(re)) != 0) {
03205                         allSame = false;
03206                         break;
03207                     }
03208                 }
03209                 if (allSame && r2IsOperand)
03210                     continue;                       // This situation has happened, don't map now
03211             }
03212             mapSymbolTo(r2, Location::local(name1, this));
03213             continue;
03214         }
03215     }
03216 
03217 
03218 /*  *   *   *   *   *   *   *   *   *   *   *   *   *   *\
03219  *                                                      *
03220  *   IR gets changed with hard locals and params here   *
03221  *                                                      *
03222 \*  *   *   *   *   *   *   *   *   *   *   *   *   *   */
03223 
03224     // First rename the variables (including phi's, but don't remove).
03225     // NOTE: it is not possible to postpone renaming these locals till the back end, since the same base location
03226     // may require different names at different locations, e.g. r28{0} is local0, r28{16} is local1
03227     // Update symbols and parameters, particularly for the stack pointer inside memofs.
03228     // NOTE: the ordering of the below operations is critical! Re-ordering may well prevent e.g. parameters from
03229     // renaming successfully.
03230     nameParameterPhis();
03231     mapLocalsAndParams();
03232     mapParameters();
03233     removeSubscriptsFromSymbols();
03234     removeSubscriptsFromParameters();
03235     for (it = stmts.begin(); it != stmts.end(); it++) {
03236         Statement* s = *it;
03237         s->replaceSubscriptsWithLocals();
03238     }
03239 
03240     // Now remove the phis
03241     for (it = stmts.begin(); it != stmts.end(); it++) {
03242         Statement* s = *it;
03243         if (!s->isPhi()) continue;
03244         // Check if the base variables are all the same
03245         PhiAssign* pa = (PhiAssign*)s;
03246         if (pa->begin() == pa->end()) {
03247             // no params to this phi, just remove it
03248             if (VERBOSE)
03249                 LOG << "phi with no params, removing: " << s << "\n";
03250             removeStatement(s);
03251             continue;
03252         }
03253         LocationSet refs;
03254         pa->addUsedLocs(refs);
03255         bool phiParamsSame = true;
03256         Exp* first = NULL;
03257         if (pa->getNumDefs() > 1) {
03258             PhiAssign::iterator uu;
03259             for (uu = pa->begin(); uu != pa->end(); uu++) {
03260                 if (uu->e == NULL) continue;
03261                 if (first == NULL) { first = uu->e; continue; }
03262                 if (!(*uu->e == *first)) {
03263                     phiParamsSame = false;
03264                     break;
03265                 }
03266             }
03267         }
03268         if (phiParamsSame && first) {
03269             // Is the left of the phi assignment the same base variable as all the operands?
03270             if (*pa->getLeft() == *first) {
03271                 if (DEBUG_LIVENESS || DEBUG_UNUSED)
03272                     LOG << "removing phi: left and all refs same or 0: " << s << "\n";
03273                 // Just removing the refs will work, or removing the whole phi
03274                 // NOTE: Removing the phi here may cause other statments to be not used.
03275                 removeStatement(s);
03276             } else
03277                 // Need to replace the phi by an expression,
03278                 // e.g. local0 = phi(r24{3}, r24{5}) becomes 
03279                 //      local0 = r24
03280                 pa->convertToAssign(first->clone());
03281         }
03282         else {
03283             // Need new local(s) for phi operands that have different names from the lhs
03284 #if 0       // The big problem with this idea is that it extends the range of the phi destination, and it could "pass"
03285             // definitions and uses of that variable. It could be that the dominance numbers could solve this, but it
03286             // seems unlikely
03287             Exp* lhs = pa->getLeft();
03288             RefExp* wrappedLeft = new RefExp(lhs, pa)
03289             char* lhsName = lookupSymForRef(wrappedLeft);
03290             PhiAssign::iterator rr;
03291             for (rr = pa->begin(); rr != pa->end(); rr++) {
03292                 RefExp* operand = new RefExp(rr->e, rr->def);
03293                 char* operName = lookupSymFromRef(operand);
03294                 if (strcmp(operName, lhsName) == 0)
03295                     continue;                   // No need for copies for this operand
03296                 // Consider possible optimisation here: if have a = phi(b, b, b, b, a), then there is only one copy
03297                 // needed. If by some fluke the program already had a=b after the definition for b, then no copy is
03298                 // needed at all. So a map of existing copies could be useful
03299                 insertAssignAfter(rr->def, lhs, operand);
03300             }
03301 #else       // This way is costly in copies, but has no problems with extending live ranges
03302             // Exp* tempLoc = newLocal(pa->getType());
03303             Exp* tempLoc = getSymbolExp(new RefExp(pa->getLeft(), pa), pa->getType());
03304             if (DEBUG_LIVENESS)
03305                 LOG << "phi statement " << s << " requires local, using " << tempLoc << "\n";
03306             // For each definition ref'd in the phi
03307             PhiAssign::iterator rr;
03308             for (rr = pa->begin(); rr != pa->end(); rr++) {
03309                 if (rr->e == NULL) continue;
03310                 insertAssignAfter(rr->def, tempLoc, rr->e);
03311             }
03312             // Replace the RHS of the phi with tempLoc
03313             pa->convertToAssign(tempLoc);
03314 #endif
03315         }
03316     }
03317 
03318 
03319     if (cfg->getNumBBs() >= 100)        // Only for the larger procs
03320         std::cout << "\n";
03321 
03322     Boomerang::get()->alert_decompile_debug_point(this, "after transforming from SSA form");
03323 }
03324 
03325 void UserProc::mapParameters() {
03326     // Replace the parameters with their mappings
03327     StatementList::iterator pp;
03328     for (pp = parameters.begin(); pp != parameters.end(); ++pp) {
03329         Exp* lhs = ((Assignment*)*pp)->getLeft();
03330         char* mappedName = lookupParam(lhs);
03331         if (mappedName == NULL) {
03332             LOG << "WARNING! No symbol mapping for parameter " << lhs << "\n";
03333             bool allZero;
03334             Exp* clean = lhs->clone()->removeSubscripts(allZero);
03335             if (allZero)
03336                 ((Assignment*)*pp)->setLeft(clean);
03337             // Else leave them alone
03338         } else
03339             ((Assignment*)*pp)->setLeft(Location::param(mappedName, this));
03340     }
03341 }
03342 
03343 void UserProc::removeSubscriptsFromSymbols() {
03344     // Basically, use the symbol map to map the symbols in the symbol map!
03345     // However, do not remove subscripts from the outer level; they are still needed for comments in the output and also
03346     // for when removing subscripts from parameters (still need the {0})
03347     // Since this will potentially change the ordering of entries, need to copy the map
03348     SymbolMap sm2 = symbolMap;              // Object copy
03349     SymbolMap::iterator it;
03350     symbolMap.clear();
03351     ExpSsaXformer esx(this);
03352     for (it = sm2.begin(); it != sm2.end(); ++it) {
03353         Exp* from = it->first;
03354         if (from->isSubscript()) {
03355             // As noted above, don't touch the outer level of subscripts
03356             Exp*& sub = ((RefExp*)from)->refSubExp1();
03357             sub = sub->accept(&esx);
03358         } else
03359             from = it->first->accept(&esx);
03360         mapSymbolTo(from, it->second);
03361     }
03362 }
03363 
03364 void UserProc::removeSubscriptsFromParameters() {
03365     StatementList::iterator it;
03366     ExpSsaXformer esx(this);
03367     for (it = parameters.begin(); it != parameters.end(); ++it) {
03368         Exp* left = ((Assignment*)*it)->getLeft();
03369         left = left->accept(&esx);
03370         ((Assignment*)*it)->setLeft(left);
03371     }
03372 }
03373 
03374 
03375 static Binary allEqAll(opEquals,
03376     new Terminal(opDefineAll),
03377     new Terminal(opDefineAll));
03378 
03379 // this function was non-reentrant, but now reentrancy is frequently used
03380 bool UserProc::prove(Exp *query, bool conditional /* = false */) {
03381 
03382     assert(query->isEquality());
03383     Exp* queryLeft = ((Binary*)query)->getSubExp1();
03384     Exp* queryRight = ((Binary*)query)->getSubExp2();
03385     if (provenTrue.find(queryLeft) != provenTrue.end() && *provenTrue[queryLeft] == *queryRight) {
03386         if (DEBUG_PROOF) LOG << "found true in provenTrue cache " << query << " in " << getName() << "\n";
03387         return true;
03388     }
03389 #if PROVEN_FALSE            // Maybe not so smart... may prove true after some iterations
03390     if (provenFalse.find(queryLeft) != provenFalse.end() && *provenFalse[queryLeft] == *queryRight) {
03391         if (DEBUG_PROOF) LOG << "found false in provenFalse cache " << query << " in " << getName() << "\n";
03392         return false;
03393     }
03394 #endif
03395 
03396     if (Boomerang::get()->noProve)
03397         return false;
03398 
03399     Exp *original = query->clone();
03400     Exp* origLeft = ((Binary*)original)->getSubExp1();
03401     Exp* origRight = ((Binary*)original)->getSubExp2();
03402 
03403     
03404     // subscript locs on the right with {-} (NULL reference)
03405     LocationSet locs;
03406     query->getSubExp2()->addUsedLocs(locs);
03407     LocationSet::iterator xx;
03408     for (xx = locs.begin(); xx != locs.end(); xx++) {
03409         query->setSubExp2(query->getSubExp2()->expSubscriptValNull(*xx));
03410     }
03411 
03412     if (query->getSubExp1()->getOper() != opSubscript) {
03413         bool gotDef = false;
03414         // replace expression from return set with expression in the collector of the return 
03415         if (theReturnStatement) {
03416             Exp* def = theReturnStatement->findDefFor(query->getSubExp1());
03417             if (def) {
03418                 query->setSubExp1(def);
03419                 gotDef = true;
03420             }
03421         }
03422         if (!gotDef) {
03423             // OK, the thing I'm looking for isn't in the return collector, but perhaps there is an entry for <all>
03424             // If this is proved, then it is safe to say that x == x for any x with no definition reaching the exit
03425             Exp* right = origRight->clone()->simplify();        // In case it's sp+0
03426             if (*origLeft == *right &&                          // x == x
03427                     origLeft->getOper() != opDefineAll &&           // Beware infinite recursion
03428                     prove(&allEqAll)) {                         // Recurse in case <all> not proven yet
03429                 if (DEBUG_PROOF)
03430                     LOG << "Using all=all for " << query->getSubExp1() << "\n" << "prove returns true\n";
03431                 provenTrue[origLeft->clone()] = right;
03432                 return true;
03433             }
03434             if (DEBUG_PROOF)
03435                 LOG << "not in return collector: " << query->getSubExp1() << "\n" << "prove returns false\n";
03436             return false;
03437         }
03438     }
03439 
03440     if (cycleGrp)           // If in involved in a recursion cycle
03441                             //  then save the original query as a premise for bypassing calls
03442         recurPremises[origLeft->clone()] = origRight;
03443 
03444     std::set<PhiAssign*> lastPhis;
03445     std::map<PhiAssign*, Exp*> cache;
03446     bool result = prover(query, lastPhis, cache, original);
03447     if (cycleGrp)
03448         recurPremises.erase(origLeft);          // Remove the premise, regardless of result
03449     if (DEBUG_PROOF) LOG << "prove returns " << (result ? "true" : "false") << " for " << query << " in " << getName()
03450                             << "\n";
03451  
03452     if (!conditional) {
03453         if (result)
03454             provenTrue[origLeft] = origRight;   // Save the now proven equation
03455 #if PROVEN_FALSE
03456         else
03457             provenFalse[origLeft] = origRight;  // Save the now proven-to-be-false equation
03458 #endif
03459     }
03460     return result;
03461 }
03462 
03463 bool UserProc::prover(Exp *query, std::set<PhiAssign*>& lastPhis, std::map<PhiAssign*, Exp*> &cache, Exp* original,
03464         PhiAssign* lastPhi /* = NULL */) {
03465     // A map that seems to be used to detect loops in the call graph:
03466     std::map<CallStatement*, Exp*> called;
03467     Exp *phiInd = query->getSubExp2()->clone();
03468 
03469     if (lastPhi && cache.find(lastPhi) != cache.end() && *cache[lastPhi] == *phiInd) {
03470         if (DEBUG_PROOF)
03471             LOG << "true - in the phi cache\n";
03472         return true;
03473     } 
03474 
03475     std::set<Statement*> refsTo;
03476 
03477     query = query->clone();
03478     bool change = true;
03479     bool swapped = false;
03480     while (change) {
03481         if (DEBUG_PROOF) {
03482             LOG << query << "\n";
03483         }
03484     
03485         change = false;
03486         if (query->getOper() == opEquals) {
03487 
03488             // same left and right means true
03489             if (*query->getSubExp1() == *query->getSubExp2()) {
03490                 query = new Terminal(opTrue);
03491                 change = true;
03492             }
03493 
03494             // move constants to the right
03495             Exp *plus = query->getSubExp1();
03496             Exp *s1s2 = plus ? plus->getSubExp2() : NULL;
03497             if (!change && plus->getOper() == opPlus && s1s2->isIntConst()) {
03498                 query->setSubExp2(new Binary(opPlus,
03499                     query->getSubExp2(),
03500                     new Unary(opNeg, s1s2->clone())));
03501                 query->setSubExp1(((Binary*)plus)->getSubExp1());
03502                 change = true;
03503             }
03504             if (!change && plus->getOper() == opMinus && s1s2->isIntConst()) {
03505                 query->setSubExp2(new Binary(opPlus, query->getSubExp2(), s1s2->clone()));
03506                 query->setSubExp1(((Binary*)plus)->getSubExp1());
03507                 change = true;
03508             }
03509 
03510 
03511             // substitute using a statement that has the same left as the query
03512             if (!change && query->getSubExp1()->getOper() == opSubscript) {
03513                 RefExp *r = (RefExp*)query->getSubExp1();
03514                 Statement *s = r->getDef();
03515                 CallStatement *call = dynamic_cast<CallStatement*>(s);
03516                 if (call) {
03517                     // See if we can prove something about this register.
03518                     UserProc* destProc = (UserProc*)call->getDestProc();
03519                     Exp* base = r->getSubExp1();
03520                     if (destProc && !destProc->isLib() && ((UserProc*)destProc)->cycleGrp != NULL &&
03521                             ((UserProc*)destProc)->cycleGrp->find(this) != ((UserProc*)destProc)->cycleGrp->end()) {
03522                         // The destination procedure may not have preservation proved as yet, because it is involved
03523                         // in our recursion group. Use the conditional preservation logic to determine whether query is
03524                         // true for this procedure
03525                         Exp* provenTo = destProc->getProven(base);
03526                         if (provenTo) {
03527                             // There is a proven preservation. Use it to bypass the call
03528                             Exp* queryLeft = call->localiseExp(provenTo->clone());
03529                             query->setSubExp1(queryLeft);
03530                             // Now try everything on the result
03531                             return prover(query, lastPhis, cache, original, lastPhi);
03532                         } else {
03533                             // Check if the required preservation is one of the premises already assumed
03534                             Exp* premisedTo = destProc->getPremised(base);
03535                             if (premisedTo) {
03536                                 if (DEBUG_PROOF)
03537                                     LOG << "conditional preservation for call from " << getName() << " to " << 
03538                                         destProc->getName() << ", allows bypassing\n";
03539                                 Exp* queryLeft = call->localiseExp(premisedTo->clone());
03540                                 query->setSubExp1(queryLeft);
03541                                 return prover(query, lastPhis, cache, original, lastPhi);
03542                             } else {
03543                                 // There is no proof, and it's not one of the premises. It may yet succeed, by making
03544                                 // another premise! Example: try to prove esp, depends on whether ebp is preserved, so
03545                                 // recurse to check ebp's preservation. Won't infinitely loop because of the premise map
03546                                 // FIXME: what if it needs a rx = rx + K preservation?
03547                                 Exp* newQuery = new Binary(opEquals,
03548                                     base->clone(),
03549                                     base->clone());
03550                                 destProc->setPremise(base);
03551                                 if (DEBUG_PROOF)
03552                                     LOG << "new required premise " << newQuery << " for " << destProc->getName() <<
03553                                         "\n";
03554                                 // Pass conditional as true, since even if proven, this is conditional on other things
03555                                 bool result = destProc->prove(newQuery, true);
03556                                 destProc->killPremise(base);
03557                                 if (result) {
03558                                     if (DEBUG_PROOF)
03559                                         LOG << "conditional preservation with new premise " << newQuery <<
03560                                             " succeeds for " << destProc->getName() << "\n";
03561                                     // Use the new conditionally proven result
03562                                     Exp* queryLeft = call->localiseExp(base->clone());
03563                                     query->setSubExp1(queryLeft);
03564                                     return destProc->prover(query, lastPhis, cache, original, lastPhi);
03565                                 } else {
03566                                     if (DEBUG_PROOF)
03567                                         LOG << "conditional preservation required premise " << newQuery << " fails!\n";
03568                                     // Do nothing else; the outer proof will likely fail
03569                                 }
03570                             }
03571                         }
03572                             
03573                     } // End call involved in this recursion group
03574                     // Seems reasonable that recursive procs need protection from call loops too
03575                     Exp *right = call->getProven(r->getSubExp1());  // getProven returns the right side of what is
03576                     if (right) {                                    //  proven about r (the LHS of query)
03577                         right = right->clone();
03578                         if (called.find(call) != called.end() && *called[call] == *query) {
03579                             LOG << "found call loop to " << call->getDestProc()->getName() << " " << query << "\n";
03580                             query = new Terminal(opFalse);
03581                             change = true;
03582                         } else {
03583                             called[call] = query->clone();
03584                             if (DEBUG_PROOF)
03585                                 LOG << "using proven for " << call->getDestProc()->getName() << " " 
03586                                     << r->getSubExp1() << " = " << right << "\n";
03587                             right = call->localiseExp(right);
03588                             if (DEBUG_PROOF)
03589                                 LOG << "right with subs: " << right << "\n";
03590                             query->setSubExp1(right);               // Replace LHS of query with right
03591                             change = true;
03592                         }
03593                     }
03594                 } else if (s && s->isPhi()) {
03595                     // for a phi, we have to prove the query for every statement
03596                     PhiAssign *pa = (PhiAssign*)s;
03597                     PhiAssign::iterator it;
03598                     bool ok = true;
03599                     if (lastPhis.find(pa) != lastPhis.end() || pa == lastPhi) {
03600                         if (DEBUG_PROOF)
03601                             LOG << "phi loop detected ";
03602                         ok = (*query->getSubExp2() == *phiInd);
03603                         if (ok && DEBUG_PROOF)
03604                             LOG << "(set true due to induction)\n";     // FIXME: induction??!
03605                         if (!ok && DEBUG_PROOF)
03606                             LOG << "(set false " << query->getSubExp2() << " != " << phiInd << ")\n";
03607                     } else {
03608                         if (DEBUG_PROOF)
03609                             LOG << "found " << s << " prove for each\n";
03610                         for (it = pa->begin(); it != pa->end(); it++) {
03611                             Exp *e = query->clone();
03612                             RefExp *r1 = (RefExp*)e->getSubExp1();
03613                             r1->setDef(it->def);
03614                             if (DEBUG_PROOF)
03615                                 LOG << "proving for " << e << "\n";
03616                             lastPhis.insert(lastPhi);
03617                             if (!prover(e, lastPhis, cache, original, pa)) { 
03618                                 ok = false; 
03619                                 //delete e; 
03620                                 break; 
03621                             }
03622                             lastPhis.erase(lastPhi);
03623                             //delete e;
03624                         }
03625                         if (ok)
03626                             cache[pa] = query->getSubExp2()->clone();
03627                     }
03628                     if (ok)
03629                         query = new Terminal(opTrue);
03630                     else 
03631                         query = new Terminal(opFalse);
03632                     change = true;
03633                 } else if (s && s->isAssign()) {
03634                     if (s && refsTo.find(s) != refsTo.end()) {
03635                         LOG << "detected ref loop " << s << "\n";
03636                         LOG << "refsTo: ";
03637                         std::set<Statement*>::iterator ll;
03638                         for (ll = refsTo.begin(); ll != refsTo.end(); ++ll)
03639                             LOG << (*ll)->getNumber() << ", ";
03640                         LOG << "\n";
03641                         assert(false);
03642                     } else {
03643                         refsTo.insert(s);
03644                         query->setSubExp1(((Assign*)s)->getRight()->clone());
03645                         change = true;
03646                     }
03647                 }
03648             }
03649 
03650             // remove memofs from both sides if possible
03651             if (!change && query->getSubExp1()->getOper() == opMemOf && query->getSubExp2()->getOper() == opMemOf) {
03652                 query->setSubExp1(((Unary*)query->getSubExp1())->getSubExp1());
03653                 query->setSubExp2(((Unary*)query->getSubExp2())->getSubExp1());
03654                 change = true;
03655             }
03656 
03657             // is ok if both of the memofs are subscripted with NULL
03658             if (!change && query->getSubExp1()->getOper() == opSubscript &&
03659                     query->getSubExp1()->getSubExp1()->getOper() == opMemOf &&
03660                     ((RefExp*)query->getSubExp1())->getDef() == NULL &&
03661                     query->getSubExp2()->getOper() == opSubscript &&
03662                     query->getSubExp2()->getSubExp1()->getOper() == opMemOf &&
03663                     ((RefExp*)query->getSubExp2())->getDef() == NULL) {
03664                 query->setSubExp1(((Unary*)query->getSubExp1()->getSubExp1())->getSubExp1());
03665                 query->setSubExp2(((Unary*)query->getSubExp2()->getSubExp1())->getSubExp1());
03666                 change = true;
03667             }
03668 
03669             // find a memory def for the right if there is a memof on the left
03670             // FIXME: this seems pretty much like a bad hack!
03671             if (!change && query->getSubExp1()->getOper() == opMemOf) {
03672                 StatementList stmts;
03673                 getStatements(stmts);
03674                 StatementList::iterator it;
03675                 for (it = stmts.begin(); it != stmts.end(); it++) {
03676                     Assign* s = (Assign*)*it;
03677                     if (s->isAssign() && *s->getRight() == *query->getSubExp2() && s->getLeft()->getOper() == opMemOf) {
03678                         query->setSubExp2(s->getLeft()->clone());
03679                         change = true;
03680                         break;
03681                     }
03682                 }
03683             }
03684 
03685             // last chance, swap left and right if haven't swapped before
03686             if (!change && !swapped) {
03687                 Exp *e = query->getSubExp1();
03688                 query->setSubExp1(query->getSubExp2());
03689                 query->setSubExp2(e);
03690                 change = true;
03691                 swapped = true;
03692                 refsTo.clear();
03693             }
03694         } else if (query->isIntConst()) {
03695             Const *c = (Const*)query;
03696             query = new Terminal(c->getInt() ? opTrue : opFalse);
03697         }
03698 
03699         Exp *old = query->clone();
03700 
03701         query = query->clone()->simplify();
03702 
03703         if (change && !(*old == *query) && DEBUG_PROOF) {
03704             LOG << old << "\n";
03705         }
03706         //delete old;
03707     }
03708     
03709     return query->getOper() == opTrue;
03710 }
03711 
03712 // Get the set of locations defined by this proc. In other words, the define set, currently called returns
03713 void UserProc::getDefinitions(LocationSet& ls) {
03714     int n = signature->getNumReturns();
03715     for (int j=0; j < n; j++) {
03716         ls.insert(signature->getReturnExp(j));
03717     }
03718 }
03719 
03720 void Proc::addCallers(std::set<UserProc*>& callers) {
03721     std::set<CallStatement*>::iterator it;
03722     for (it = callerSet.begin(); it != callerSet.end(); it++) {
03723         UserProc* callerProc = (*it)->getProc();
03724         callers.insert(callerProc);
03725     }
03726 }
03727 
03728 void UserProc::addCallees(std::list<UserProc*>& callees) {
03729     // SLOW SLOW SLOW
03730     // this function is evil now... REALLY evil... hope it doesn't get called too often
03731     std::list<Proc*>::iterator it;
03732     for (it = calleeList.begin(); it != calleeList.end(); it++) {
03733         UserProc* callee = (UserProc*)(*it);
03734         if (callee->isLib()) continue;
03735         addCallee(callee);
03736     }
03737 }
03738 
03739 void UserProc::conTypeAnalysis() {
03740     if (DEBUG_TA)
03741         LOG << "type analysis for procedure " << getName() << "\n";
03742     Constraints consObj;
03743     LocationSet cons;
03744     StatementList stmts;
03745     getStatements(stmts);
03746     StatementList::iterator ss;
03747     // For each statement this proc
03748     int conscript = 0;
03749     for (ss = stmts.begin(); ss != stmts.end(); ss++) {
03750         cons.clear();
03751         // So we can co-erce constants:
03752         conscript = (*ss)->setConscripts(conscript);
03753         (*ss)->genConstraints(cons);
03754         consObj.addConstraints(cons);
03755         if (DEBUG_TA)
03756             LOG << (*ss) << "\n" << &cons << "\n";
03757         // Remove the sizes immediately the constraints are generated.
03758         // Otherwise, x and x*8* look like different expressions
03759         (*ss)->stripSizes();
03760     }
03761 
03762     std::list<ConstraintMap> solns;
03763     bool ret = consObj.solve(solns);
03764     if (VERBOSE || DEBUG_TA) {
03765         if (!ret)
03766             LOG << "** could not solve type constraints for proc " << getName() << "!\n";
03767         else if (solns.size() > 1)
03768             // Note: require cast to unsigned for OS X and 64-bit hosts
03769             LOG << "** " << (unsigned)solns.size() << " solutions to type constraints for proc " << getName() << "!\n";
03770     }
03771         
03772     std::list<ConstraintMap>::iterator it;
03773     int solnNum = 0;
03774     ConstraintMap::iterator cc;
03775     if (DEBUG_TA) {
03776         for (it = solns.begin(); it != solns.end(); it++) {
03777             LOG << "solution " << ++solnNum << " for proc " << getName() << "\n";
03778             ConstraintMap& cm = *it;
03779             for (cc = cm.begin(); cc != cm.end(); cc++)
03780                 LOG << cc->first << " = " << cc->second << "\n";
03781             LOG << "\n";
03782         }
03783     }
03784 
03785     // Just use the first solution, if there is one
03786     Prog* prog = getProg();
03787     if (solns.size()) {
03788         ConstraintMap& cm = *solns.begin();
03789         for (cc = cm.begin(); cc != cm.end(); cc++) {
03790             // Ick. A workaround for now (see test/pentium/sumarray-O4)
03791             //assert(cc->first->isTypeOf());
03792 if (!cc->first->isTypeOf()) continue;
03793             Exp* loc = ((Unary*)cc->first)->getSubExp1();
03794             assert(cc->second->isTypeVal());
03795             Type* ty = ((TypeVal*)cc->second)->getType();
03796             if (loc->isSubscript())
03797                 loc = ((RefExp*)loc)->getSubExp1();
03798             if (loc->isGlobal()) {
03799                 char* nam = ((Const*)((Unary*)loc)->getSubExp1())->getStr();
03800                 if (!ty->resolvesToVoid())
03801                     prog->setGlobalType(nam, ty->clone());
03802             } else if (loc->isLocal()) {
03803                 char* nam = ((Const*)((Unary*)loc)->getSubExp1())->getStr();
03804                 setLocalType(nam, ty);
03805             } else if (loc->isIntConst()) {
03806                 Const* con = (Const*)loc;
03807                 int val = con->getInt();
03808                 if (ty->isFloat()) {
03809                     // Need heavy duty cast here
03810                     // MVE: check this! Especially when a double prec float
03811                     con->setFlt(*(float*)&val);
03812                     con->setOper(opFltConst);
03813                 } else if (ty->isCString()) {
03814                     // Convert to a string
03815                     char* str = prog->getStringConstant(val, true);
03816                     if (str) {
03817                         // Make a string
03818                         con->setStr(str);
03819                         con->setOper(opStrConst);
03820                     }
03821                 }
03822                 else {
03823                     if (ty->isInteger() && ty->getSize() && ty->getSize() != STD_SIZE)
03824                         // Wrap the constant in a TypedExp (for a cast)
03825                         castConst(con->getConscript(), ty);
03826                 }
03827             }
03828         }
03829     }
03830 
03831     // Clear the conscripts. These confuse the fromSSA logic, causing infinite
03832     // loops
03833     for (ss = stmts.begin(); ss != stmts.end(); ss++) {
03834         (*ss)->clearConscripts();
03835     }
03836 }
03837 
03838 
03839 
03840 
03841 bool UserProc::searchAndReplace(Exp *search, Exp *replace)
03842 {
03843     bool ch = false;
03844     StatementList stmts;
03845     getStatements(stmts);
03846     StatementList::iterator it;
03847     for (it = stmts.begin(); it != stmts.end(); it++) {
03848         Statement* s = *it;
03849         ch |= s->searchAndReplace(search, replace); 
03850     }
03851     return ch; 
03852 }
03853 
03854 unsigned fudge(StatementList::iterator x) {
03855     StatementList::iterator y = x;
03856     return *(unsigned*)&y;
03857 }
03858 
03859 Exp *UserProc::getProven(Exp *left) {
03860     // Note: proven information is in the form r28 mapsto (r28 + 4)
03861     std::map<Exp*, Exp*, lessExpStar>::iterator it = provenTrue.find(left);
03862     if (it != provenTrue.end())
03863         return it->second;
03864     //  not found, try the signature
03865     // No! The below should only be for library functions!
03866     // return signature->getProven(left);
03867     return NULL;
03868 }
03869 
03870 Exp* UserProc::getPremised(Exp* left) {
03871     std::map<Exp*, Exp*, lessExpStar>::iterator it = recurPremises.find(left);
03872     if (it != recurPremises.end())
03873         return it->second;
03874     return NULL;
03875 }
03876 
03877 bool UserProc::isPreserved(Exp* e) {
03878     return provenTrue.find(e) != provenTrue.end() && *provenTrue[e] == *e;
03879 }
03880 
03881 void UserProc::castConst(int num, Type* ty) {
03882     StatementList stmts;
03883     getStatements(stmts);
03884     StatementList::iterator it;
03885     for (it = stmts.begin(); it != stmts.end(); it++) {
03886         if ((*it)->castConst(num, ty))
03887             break;
03888     }
03889 }
03890 
03891 // Process calls with ellipsis parameters. Return true if any signature parameter types added.
03892 bool UserProc::ellipsisProcessing() {
03893     BB_IT it;
03894     BasicBlock::rtlrit rrit; StatementList::reverse_iterator srit;
03895     bool ch = false;
03896     for (it = cfg->begin(); it != cfg->end(); ++it) {
03897         CallStatement* c = (CallStatement*) (*it)->getLastStmt(rrit, srit);
03898         // Note: we may have removed some statements, so there may no longer be a last statement!
03899         if (c == NULL || !c->isCall()) continue;
03900         ch |= c->ellipsisProcessing(prog);
03901     }
03902     if (ch)
03903         fixCallAndPhiRefs();
03904     return ch;
03905 }
03906 
03907 // Before Type Analysis, refs like r28{0} have a NULL Statement pointer. After this, they will point to an
03908 // implicit assignment for the location. Thus, during and after type analysis, you can find the type of any
03909 // location by following the reference to the definition
03910 // Note: you need something recursive to make sure that child subexpressions are processed before parents
03911 // Example: m[r28{0} - 12]{0} could end up adding an implicit assignment for r28{-} with a null reference, when other
03912 // pieces of code add r28{0}
03913 void UserProc::addImplicitAssigns() {
03914     Boomerang::get()->alert_decompile_debug_point(this, "before adding implicit assigns");
03915 
03916     StatementList stmts;
03917     getStatements(stmts);
03918     StatementList::iterator it;
03919     ImplicitConverter ic(cfg);
03920     StmtImplicitConverter sm(&ic, cfg);
03921     for (it = stmts.begin(); it != stmts.end(); it++) {
03922         (*it)->accept(&sm);
03923     }
03924     cfg->setImplicitsDone();
03925     df.convertImplicits(cfg);           // Some maps have m[...]{-} need to be m[...]{0} now
03926     makeSymbolsImplicit();
03927     // makeParamsImplicit();            // Not necessary yet, since registers are not yet mapped
03928 
03929     Boomerang::get()->alert_decompile_debug_point(this, "after adding implicit assigns");
03930 }
03931 
03932 // e is a parameter location, e.g. r8 or m[r28{0}+8]. Lookup a symbol for it
03933 char* UserProc::lookupParam(Exp* e) {
03934                                                 // Originally e.g. m[esp+K]
03935     Statement* def = cfg->findTheImplicitAssign(e);
03936     if (def == NULL) {
03937         LOG << "ERROR: no implicit definition for parameter " << e << " !\n";
03938         return NULL;
03939     }
03940     RefExp* re = new RefExp(e, def);
03941     Type* ty = def->getTypeFor(e);
03942     return lookupSym(re, ty);
03943 }
03944 
03945 char* UserProc::lookupSymFromRef(RefExp* r) {
03946     Statement* def = r->getDef();
03947     Exp* base = r->getSubExp1();
03948     Type* ty = def->getTypeFor(base);
03949     return lookupSym(r, ty);
03950 }
03951 
03952 char* UserProc::lookupSymFromRefAny(RefExp* r) {
03953     Statement* def = r->getDef();
03954     Exp* base = r->getSubExp1();
03955     Type* ty = def->getTypeFor(base);
03956     char* ret = lookupSym(r, ty);
03957     if (ret)
03958         return ret;                     // Found a specific symbol
03959     return lookupSym(base, ty);         // Check for a general symbol
03960 }
03961 
03962 char* UserProc::lookupSym(Exp* e, Type* ty) {
03963     if (e->isTypedExp())
03964         e = ((TypedExp*)e)->getSubExp1();
03965     SymbolMap::iterator it;
03966     it = symbolMap.find(e);
03967     while (it != symbolMap.end() && *it->first == *e) {
03968         Exp* sym = it->second;
03969         assert(sym->isLocal() || sym->isParam());
03970         char* name = ((Const*)((Location*)sym)->getSubExp1())->getStr();
03971         Type* type = getLocalType(name);
03972         if (type == NULL) type = getParamType(name);    // Ick currently linear search
03973         if (type && type->isCompatibleWith(ty))
03974             return name;
03975         ++it;
03976     }
03977 #if 0
03978     if (e->isSubscript())
03979         // A subscripted location, where there was no specific mapping. Check for a general mapping covering all
03980         // versions of the location
03981         return lookupSym(((RefExp*)e)->getSubExp1(), ty);
03982 #endif
03983     // Else there is no symbol
03984     return NULL;
03985 }
03986 
03987 void UserProc::printSymbolMap(std::ostream &out, bool html) {
03988     if (html)
03989         out << "<br>";
03990     out << "symbols:\n";
03991     SymbolMap::iterator it;
03992     for (it = symbolMap.begin(); it != symbolMap.end(); it++) {
03993         Type* ty = getTypeForLocation(it->second);
03994         out << "  " << it->first << " maps to " << it->second << " type " << (ty ? ty->getCtype() : "NULL") << "\n";
03995         if (html)
03996             out << "<br>";
03997     }
03998     if (html)
03999         out << "<br>";
04000     out << "end symbols\n";
04001 }
04002 
04003 void UserProc::dumpLocals(std::ostream& os, bool html) {
04004     if (html)
04005         os << "<br>";
04006     os << "locals:\n";
04007     for (std::map<std::string, Type*>::iterator it = locals.begin(); it != locals.end(); it++) {
04008         os << it->second->getCtype() << " " << it->first.c_str() << " ";
04009         Exp *e = expFromSymbol((*it).first.c_str());
04010         // Beware: for some locals, expFromSymbol() returns NULL (? No longer?)
04011         if (e)
04012             os << e << "\n";
04013         else
04014             os << "-\n";
04015     }
04016     if (html)
04017         os << "<br>";
04018     os << "end locals\n";
04019 }
04020 
04021 void UserProc::dumpSymbolMap() {
04022     SymbolMap::iterator it;
04023     for (it = symbolMap.begin(); it != symbolMap.end(); it++) {
04024         Type* ty = getTypeForLocation(it->second);
04025         std::cerr << "  " << it->first << " maps to " << it->second << " type " << (ty ? ty->getCtype() : "NULL") <<
04026             "\n";
04027     }
04028 }
04029 
04030 void UserProc::dumpSymbolMapx() {
04031     SymbolMap::iterator it;
04032     for (it = symbolMap.begin(); it != symbolMap.end(); it++) {
04033         Type* ty = getTypeForLocation(it->second);
04034         std::cerr << "  " << it->first << " maps to " << it->second << " type " << (ty ? ty->getCtype() : "NULL") <<
04035             "\n";
04036         it->first->printx(2);
04037     }
04038 }
04039 
04040 void UserProc::testSymbolMap() {
04041     SymbolMap::iterator it1, it2;
04042     bool OK = true;
04043     it1 = symbolMap.begin();
04044     if (it1 != symbolMap.end()) {
04045         it2 = it1;
04046         ++it2;
04047         while (it2 != symbolMap.end()) {
04048             if (*it2->first < *it1->first) {        // Compare keys
04049                 OK = false;
04050                 std::cerr << "*it2->first < *it1->first: " << it2->first << " < " << it1->first << "!\n";
04051                 // it2->first->printx(0); it1->first->printx(5);
04052             }
04053             ++it1;
04054             ++it2;
04055         }
04056     }
04057     std::cerr << "Symbolmap is " << (OK ? "OK" : "NOT OK!!!!!") << "\n";
04058 }
04059 
04060 void UserProc::dumpLocals() {
04061     std::stringstream ost;
04062     dumpLocals(ost);
04063     std::cerr << ost.str();
04064 }
04065 
04066 void UserProc::updateArguments() {
04067     Boomerang::get()->alert_decompiling(this);
04068     if (VERBOSE)
04069         LOG << "### update arguments for " << getName() << " ###\n";
04070     Boomerang::get()->alert_decompile_debug_point(this, "before updating arguments");
04071     BB_IT it;
04072     BasicBlock::rtlrit rrit; StatementList::reverse_iterator srit;
04073     for (it = cfg->begin(); it != cfg->end(); ++it) {
04074         CallStatement* c = (CallStatement*) (*it)->getLastStmt(rrit, srit);
04075         // Note: we may have removed some statements, so there may no longer be a last statement!
04076         if (c == NULL || !c->isCall()) continue;
04077         c->updateArguments();
04078         //c->bypass();
04079         if (VERBOSE) {
04080             std::ostringstream ost;
04081             c->print(ost);
04082             LOG << ost.str().c_str() << "\n";
04083         }
04084     }
04085     if (VERBOSE)
04086         LOG << "=== end update arguments for " << getName() << "\n";
04087     Boomerang::get()->alert_decompile_debug_point(this, "after updating arguments");
04088 }
04089 
04090 void UserProc::updateCallDefines() {
04091     if (VERBOSE)
04092         LOG << "### update call defines for " << getName() << " ###\n";
04093     StatementList stmts;
04094     getStatements(stmts);
04095     StatementList::iterator it;
04096     for (it = stmts.begin(); it != stmts.end(); it++) {
04097         CallStatement* call = dynamic_cast<CallStatement*>(*it);
04098         if (call == NULL) continue;
04099         call->updateDefines();
04100     }
04101 }
04102 
04103 void UserProc::reverseStrengthReduction()
04104 {
04105     Boomerang::get()->alert_decompile_debug_point(this, "before reversing strength reduction");
04106 
04107     StatementList stmts;
04108     getStatements(stmts);
04109     StatementList::iterator it;
04110     for (it = stmts.begin(); it != stmts.end(); it++) 
04111         if ((*it)->isAssign()) {
04112             Assign *as = (Assign*)*it;
04113             // of the form x = x{p} + c
04114             if (as->getRight()->getOper() == opPlus && 
04115                     as->getRight()->getSubExp1()->isSubscript() &&
04116                     *as->getLeft() == *as->getRight()->getSubExp1()->getSubExp1() &&
04117                     as->getRight()->getSubExp2()->isIntConst()) {
04118                 int c = ((Const*)as->getRight()->getSubExp2())->getInt();
04119                 RefExp *r = (RefExp*)as->getRight()->getSubExp1();
04120                 if (r->getDef() && r->getDef()->isPhi()) {
04121                     PhiAssign *p = (PhiAssign*)r->getDef();
04122                     if (p->getNumDefs() == 2) {
04123                         Statement *first = p->getDefs().front().def;
04124                         Statement *second = p->getDefs().back().def;
04125                         if (first == as) {
04126                             // want the increment in second
04127                             Statement *tmp = first;
04128                             first = second;
04129                             second = tmp;
04130                         }
04131                         // first must be of form x := 0
04132                         if (first && first->isAssign() && 
04133                                 ((Assign*)first)->getRight()->isIntConst() &&
04134                                 ((Const*)((Assign*)first)->getRight())->getInt() == 0) {
04135                             // ok, fun, now we need to find every reference to p and
04136                             // replace with x{p} * c
04137                             StatementList stmts2;
04138                             getStatements(stmts2);
04139                             StatementList::iterator it2;
04140                             for (it2 = stmts2.begin(); it2 != stmts2.end(); it2++)
04141                                 if (*it2 != as)
04142                                     (*it2)->searchAndReplace(r, new Binary(opMult, r->clone(), new Const(c)));
04143                             // that done we can replace c with 1 in as
04144                             ((Const*)as->getRight()->getSubExp2())->setInt(1);
04145                         }
04146                     }
04147                 }
04148             }       
04149         }
04150     Boomerang::get()->alert_decompile_debug_point(this, "after reversing strength reduction");
04151 }
04152 
04153 // Update the parameters, in case the signature and hence ordering and filtering has changed, or the locations in the
04154 // collector have changed
04155 void UserProc::insertParameter(Exp* e, Type* ty) {
04156 
04157     if (filterParams(e))
04158         return;                     // Filtered out
04159     // Used to filter out preserved locations here: no! Propagation and dead code elimination solve the problem.
04160     // See test/pentium/restoredparam for an example where you must not remove restored locations
04161             
04162     // Wrap it in an implicit assignment; DFA based TA should update the type later
04163     ImplicitAssign* as = new ImplicitAssign(ty->clone(), e->clone());
04164     // Insert as, in order, into the existing set of parameters
04165     StatementList::iterator nn;
04166     bool inserted = false;
04167     for (nn = parameters.begin(); nn != parameters.end(); ++nn) {
04168         // If the new assignment is less than the current one ...
04169         if (signature->argumentCompare(*as, *(Assignment*)*nn)) {
04170             nn = parameters.insert(nn, as);     // ... then insert before this position
04171             inserted = true;
04172             break;
04173         }
04174     }
04175     if (!inserted)
04176         parameters.insert(parameters.end(), as);    // In case larger than all existing elements
04177 
04178     // update the signature
04179     signature->setNumParams(0);
04180     int i = 1;
04181     for (nn = parameters.begin(); nn != parameters.end(); ++nn, ++i) {
04182         Assignment *a = (Assignment*)*nn;
04183         char tmp[20];
04184         sprintf(tmp, "param%i", i);
04185         signature->addParameter(a->getType(), tmp, a->getLeft());
04186     }
04187 }
04188 
04189 
04190 // Filter out locations not possible as return locations. Return true to *remove* (filter *out*)
04191 bool UserProc::filterReturns(Exp* e) {
04192     if (isPreserved(e))
04193         // If it is preserved, then it can't be a return (since we don't change it)
04194         return true;
04195     switch (e->getOper()) {
04196         case opPC:  return true;            // Ignore %pc
04197         case opDefineAll: return true;      // Ignore <all>
04198         case opTemp: return true;           // Ignore all temps (should be local to one instruction)
04199         // Would like to handle at least %ZF, %CF one day. For now, filter them out
04200         case opZF: case opCF: case opFlags:
04201             return true;
04202         case opMemOf: {
04203             // return signature->isStackLocal(prog, e);     // Filter out local variables
04204             // Actually, surely all sensible architectures will only every return in registers. So for now, just
04205             // filter out all mem-ofs
04206             return true;
04207         }
04208         case opGlobal:
04209             return true;                // Never return in globals
04210         default:
04211             return false;
04212     }
04213     return false;
04214 }
04215 
04216 // Filter out locations not possible as parameters or arguments. Return true to remove
04217 bool UserProc::filterParams(Exp* e) {
04218     switch (e->getOper()) {
04219         case opPC:  return true;
04220         case opTemp: return true;
04221         case opRegOf: {
04222             int sp = 999;
04223             if (signature) sp = signature->getStackRegister(prog);
04224             int r = ((Const*)((Location*)e)->getSubExp1())->getInt();
04225             return r == sp;
04226         }
04227         case opMemOf: {
04228             Exp* addr = ((Location*)e)->getSubExp1();
04229             if (addr->isIntConst())
04230                 return true;            // Global memory location
04231             if (addr->isSubscript() && ((RefExp*)addr)->isImplicitDef()) {
04232                 Exp* reg = ((RefExp*)addr)->getSubExp1();
04233                 int sp = 999;
04234                 if (signature) sp = signature->getStackRegister(prog);
04235                 if (reg->isRegN(sp))
04236                     return true;        // Filter out m[sp{-}] assuming it is the return address
04237             }
04238             return false;               // Might be some weird memory expression that is not a local
04239         }
04240         case opGlobal:
04241             return true;                // Never use globals as argument locations (could appear on RHS of args)
04242         default:
04243             return false;
04244     }
04245     return false;
04246 }
04247 
04248 char* UserProc::findLocal(Exp* e, Type* ty) {
04249     if (e->isLocal())
04250         return ((Const*)((Unary*)e)->getSubExp1())->getStr();
04251     // Look it up in the symbol map
04252     char* name = lookupSym(e, ty);
04253     if (name == NULL)
04254         return NULL;
04255     // Now make sure it is a local; some symbols (e.g. parameters) are in the symbol map but not locals
04256     std::string str(name);
04257     if (locals.find(str) != locals.end())
04258         return name;
04259     return NULL;
04260 }
04261 
04262 char* UserProc::findLocalFromRef(RefExp* r) {
04263     Statement* def = r->getDef();
04264     Exp* base = r->getSubExp1();
04265     Type* ty = def->getTypeFor(base);
04266     char* name = lookupSym(r, ty);
04267     if (name == NULL)
04268         return NULL;
04269     // Now make sure it is a local; some symbols (e.g. parameters) are in the symbol map but not locals
04270     std::string str(name);
04271     if (locals.find(str) != locals.end())
04272         return name;
04273     return NULL;
04274 }
04275 
04276 char* UserProc::findFirstSymbol(Exp* e) {
04277     SymbolMap::iterator ff = symbolMap.find(e);
04278     if (ff == symbolMap.end()) return NULL;
04279     return ((Const*)((Location*)ff->second)->getSubExp1())->getStr();
04280 }
04281 
04282 
04283 // Algorithm:
04284 /* fixCallAndPhiRefs
04285     for each statement s in this proc
04286       if s is a phi statement ps
04287         let r be a ref made up of lhs and s
04288         for each parameter p of ps
04289           if p == r                     // e.g. test/pentium/fromssa2 r28{56}
04290             remove p from ps
04291         let lhs be left hand side of ps
04292         allSame = true
04293         let first be a ref built from first p
04294         do bypass but not propagation on first
04295         if result is of the form lhs{x}
04296           replace first with x
04297         for each parameter p of ps after the first
04298           let current be a ref built from p
04299           do bypass but not propagation on current
04300           if result is of form lhs{x}
04301             replace cur with x
04302           if first != current
04303             allSame = false
04304         if allSame
04305           let best be ref built from the "best" parameter p in ps ({-} better than {assign} better than {call})
04306           replace ps with an assignment lhs := best
04307     else (ordinary statement)
04308       do bypass and propagation for s
04309 */
04310 
04311 void UserProc::fixCallAndPhiRefs() {
04312     if (VERBOSE)
04313         LOG << "### start fix call and phi bypass analysis for " << getName() << " ###\n";
04314 
04315     Boomerang::get()->alert_decompile_debug_point(this, "before fixing call and phi refs");
04316 
04317     std::map<Exp*, int, lessExpStar> destCounts;
04318     StatementList::iterator it;
04319     Statement* s;
04320     StatementList stmts;
04321     getStatements(stmts);
04322 
04323     // a[m[]] hack, aint nothing better.
04324     bool found = true;
04325     for (it = stmts.begin(); it != stmts.end(); it++) 
04326         if ((*it)->isCall()) {
04327             CallStatement *call = (CallStatement*)*it;
04328             for (StatementList::iterator it1 = call->getArguments().begin(); it1 != call->getArguments().end(); it1++) {
04329                 Assign *a = (Assign*)*it1;
04330                 if (a->getType()->resolvesToPointer()) {
04331                     Exp *e = a->getRight();
04332                     if (e->getOper() == opPlus || e->getOper() == opMinus)
04333                         if (e->getSubExp2()->isIntConst())
04334                             if (e->getSubExp1()->isSubscript() &&
04335                                     e->getSubExp1()->getSubExp1()->isRegN(signature->getStackRegister()) &&
04336                                     (((RefExp*)e->getSubExp1())->getDef() == NULL ||
04337                                         ((RefExp*)e->getSubExp1())->getDef()->isImplicit())) {
04338                                 a->setRight(new Unary(opAddrOf, Location::memOf(e->clone())));
04339                                 found = true;
04340                             }
04341                 }
04342             }
04343         }
04344     if (found)
04345         doRenameBlockVars(2);
04346 
04347     // Scan for situations like this:
04348     // 56 r28 := phi{6, 26}
04349     // ...
04350     // 26 r28 := r28{56}
04351     // So we can remove the second parameter, then reduce the phi to an assignment, then propagate it
04352     for (it = stmts.begin(); it != stmts.end(); it++) {
04353         s = *it;
04354         if (s->isPhi()) {
04355             PhiAssign* ps = (PhiAssign*)s;
04356             RefExp* r = new RefExp(ps->getLeft(), ps);
04357             for (PhiAssign::iterator p = ps->begin(); p != ps->end(); ) {
04358                 if (p->e == NULL) {                     // Can happen due to PhiAssign::setAt
04359                     ++p;
04360                     continue;
04361                 }
04362                 Exp* current = new RefExp(p->e, p->def);
04363                 if (*current == *r) {                   // Will we ever see this?
04364                     p = ps->erase(p);                   // Erase this phi parameter
04365                     continue;
04366                 }
04367                 // Chase the definition
04368                 if (p->def) {
04369                     if (!p->def->isAssign()) {
04370                         ++p;
04371                         continue;
04372                     }
04373                     Exp* rhs = ((Assign*)p->def)->getRight();
04374                     if (*rhs == *r) {                   // Check if RHS is a single reference to ps
04375                         p = ps->erase(p);               // Yes, erase this phi parameter
04376                         continue;
04377                     }
04378                 }
04379                 ++p;
04380             }
04381         }
04382     }
04383         
04384     // Second pass
04385     for (it = stmts.begin(); it != stmts.end(); it++) {
04386         s = *it;
04387         if (s->isPhi()) {
04388             PhiAssign* ps = (PhiAssign*)s;
04389             if (ps->getNumDefs() == 0) continue;        // Can happen e.g. for m[...] := phi {} when this proc is
04390                                                         // involved in a recursion group
04391             Exp* lhs = ps->getLeft();
04392             bool allSame = true;
04393             // Let first be a reference built from the first parameter
04394             PhiAssign::iterator p = ps->begin();
04395             while (p->e == NULL && p != ps->end())
04396                 ++p;                                    // Skip any null parameters
04397             assert(p != ps->end());                     // Should have been deleted
04398             Exp* first = new RefExp(p->e, p->def);
04399             // bypass to first
04400             CallBypasser cb(ps);
04401             first = first->accept(&cb);
04402             if (cb.isTopChanged())
04403                 first = first->simplify();
04404             first = first->propagateAll();              // Propagate everything repeatedly
04405             if (cb.isMod()) {                       // Modified? 
04406                 // if first is of the form lhs{x}
04407                 if (first->isSubscript() && *((RefExp*)first)->getSubExp1() == *lhs)
04408                     // replace first with x
04409                     p->def = ((RefExp*)first)->getDef();
04410             }
04411             // For each parameter p of ps after the first
04412             for (++p; p != ps->end(); ++p) {
04413                 if (p->e == NULL) continue;
04414                 Exp* current = new RefExp(p->e, p->def);
04415                 CallBypasser cb2(ps);
04416                 current = current->accept(&cb2);
04417                 if (cb2.isTopChanged())
04418                     current = current->simplify();
04419                 current = current->propagateAll();
04420                 if (cb2.isMod() )                   // Modified?
04421                     // if current is of the form lhs{x}
04422                     if (current->isSubscript() && *((RefExp*)current)->getSubExp1() == *lhs)
04423                         // replace current with x
04424                         p->def = ((RefExp*)current)->getDef();
04425                 if (!(*first == *current))
04426                     allSame = false;
04427             }
04428 
04429             if (allSame) {
04430                 // let best be ref built from the "best" parameter p in ps ({-} better than {assign} better than {call})
04431                 p = ps->begin();
04432                 while (p->e == NULL && p != ps->end())
04433                     ++p;                                    // Skip any null parameters
04434                 assert(p != ps->end());                     // Should have been deleted
04435                 RefExp* best = new RefExp(p->e, p->def);
04436                 for (++p; p != ps->end(); ++p) {
04437                     if (p->e == NULL) continue;
04438                     RefExp* current = new RefExp(p->e, p->def);
04439                     if (current->isImplicitDef()) {
04440                         best = current;
04441                         break;
04442                     }
04443                     if (p->def->isAssign())
04444                         best = current;
04445                     // If p->def is a call, this is the worst case; keep only (via first) if all parameters are calls
04446                 }
04447                 ps->convertToAssign(best);
04448                 if (VERBOSE)
04449                     LOG << "redundant phi replaced with copy assign; now " << ps << "\n";
04450             }
04451         } else {    // Ordinary statement
04452             s->bypass();
04453         }
04454     }
04455 
04456     // Also do xxx in m[xxx] in the use collector
04457     UseCollector::iterator cc;
04458     for (cc = col.begin(); cc != col.end(); ++cc) {
04459         if (!(*cc)->isMemOf()) continue;
04460         Exp* addr = ((Location*)*cc)->getSubExp1();
04461         CallBypasser cb(NULL);
04462         addr = addr->accept(&cb);
04463         if (cb.isMod())
04464             ((Location*)*cc)->setSubExp1(addr);
04465     }
04466 
04467     if (VERBOSE)
04468         LOG << "### end fix call and phi bypass analysis for " << getName() << " ###\n";
04469 
04470     Boomerang::get()->alert_decompile_debug_point(this, "after fixing call and phi refs");
04471 }
04472 
04473 // Not sure that this is needed...
04474 void UserProc::markAsNonChildless(ProcSet* cs) {
04475     BasicBlock::rtlrit rrit; StatementList::reverse_iterator srit;
04476     BB_IT it;
04477     for (PBB bb = cfg->getFirstBB(it); bb; bb = cfg->getNextBB(it)) {
04478         CallStatement* c = (CallStatement*) bb->getLastStmt(rrit, srit);
04479         if (c && c->isCall() && c->isChildless()) {
04480             UserProc* dest = (UserProc*)c->getDestProc();
04481             if (cs->find(dest) != cs->end())    // Part of the cycle?
04482                 // Yes, set the callee return statement (making it non childless)
04483                 c->setCalleeReturn(dest->getTheReturnStatement());
04484         }
04485     }
04486 }
04487 
04488 // Propagate into xxx of m[xxx] in the UseCollector (locations live at the entry of this proc)
04489 void UserProc::propagateToCollector() {
04490     UseCollector::iterator it;
04491     for (it = col.begin(); it != col.end(); ) {
04492         if (!(*it)->isMemOf()) {
04493             ++it;
04494             continue;
04495         }
04496         Exp* addr = ((Location*)*it)->getSubExp1();
04497         LocationSet used;
04498         LocationSet::iterator uu;
04499         addr->addUsedLocs(used);
04500         for (uu = used.begin(); uu != used.end(); uu++) {
04501             RefExp* r = (RefExp*)*uu;
04502             if (!r->isSubscript()) continue;
04503             Assign* as = (Assign*)r->getDef();
04504             if (as == NULL || !as->isAssign()) continue;
04505             bool ch;
04506             Exp* res = addr->clone()->searchReplaceAll(r, as->getRight(), ch);
04507             if (!ch) continue;              // No change
04508             Exp* memOfRes = Location::memOf(res)->simplify();
04509             // First check to see if memOfRes is already in the set
04510             if (col.exists(memOfRes)) {
04511                 // Take care not to use an iterator to the newly erased element.
04512                 /* it = */ col.remove(it++);        // Already exists; just remove the old one
04513                 continue;
04514             } else {
04515                 if (VERBOSE)
04516                     LOG << "propagating " << r << " to " << as->getRight() << " in collector; result " << memOfRes <<
04517                         "\n";
04518                 ((Location*)*it)->setSubExp1(res);  // Change the child of the memof
04519             }
04520         }
04521         ++it;           // it is iterated either with the erase, or the continue, or here
04522     }
04523 }
04524 
04525 // Get the initial parameters, based on this UserProc's use collector
04526 // Probably unused now
04527 void UserProc::initialParameters() {
04528     if (VERBOSE)
04529         LOG << "### initial parameters for " << getName() << "\n";
04530     parameters.clear();
04531     UseCollector::iterator cc;
04532     for (cc = col.begin(); cc != col.end(); ++cc)
04533         parameters.append(new ImplicitAssign((*cc)->clone()));
04534     if (VERBOSE) {
04535         std::ostringstream ost;
04536         printParams(ost);
04537         LOG << ost.str().c_str();
04538     }
04539 }
04540 
04541 bool UserProc::inductivePreservation(UserProc* topOfCycle) {
04542     // FIXME: This is not correct in general!! It should work OK for self recursion, but not for general mutual
04543     //recursion. Not that hard, just not done yet.
04544     return true;
04545 }
04546 
04547 bool UserProc::isLocal(Exp* e) {
04548     if (!e->isMemOf()) return false;            // Don't want say a register
04549     SymbolMap::iterator ff = symbolMap.find(e);
04550     if (ff == symbolMap.end()) return false;
04551     Exp* mapTo = ff->second;
04552     return mapTo->isLocal();
04553 }
04554 
04555 bool UserProc::isPropagatable(Exp* e) {
04556     if (addressEscapedVars.exists(e)) return false;
04557     return isLocalOrParam(e);
04558 }
04559 
04560 bool UserProc::isLocalOrParam(Exp* e) {
04561     if (isLocal(e)) return true;
04562     return parameters.existsOnLeft(e);
04563 }
04564 
04565 // Is this m[sp{-} +/- K]?
04566 bool UserProc::isLocalOrParamPattern(Exp* e) {
04567     if (!e->isMemOf()) return false;            // Don't want say a register
04568     Exp* addr = ((Location*)e)->getSubExp1();
04569     if (!signature->isPromoted()) return false; // Prevent an assert failure if using -E
04570     int sp = signature->getStackRegister();
04571     Exp* initSp = new RefExp(Location::regOf(sp), NULL);    // sp{-}
04572     if (*addr == *initSp) return true;          // Accept m[sp{-}]
04573     if (addr->getArity() != 2) return false;    // Require sp +/- K
04574     OPER op = ((Binary*)addr)->getOper();
04575     if (op != opPlus && op != opMinus) return false;
04576     Exp* left =  ((Binary*)addr)->getSubExp1();
04577     if (!(*left == *initSp)) return false;
04578     Exp* right = ((Binary*)addr)->getSubExp2();
04579     return right->isIntConst();
04580 }
04581 
04582 // Remove the unused parameters. Check for uses for each parameter as param{0}.
04583 // Some parameters are apparently used when in fact they are only used as parameters to calls to procedures in the
04584 // recursion set. So don't count components of arguments of calls in the current recursion group that chain through to
04585 // ultimately use the argument as a parameter to the current procedure.
04586 // Some parameters are apparently used when in fact they are only used by phi statements which transmit a return from
04587 // a recursive call ultimately to the current procedure, to the exit of the current procedure, and the return exists
04588 // only because of a liveness created by a parameter to a recursive call. So when examining phi statements, check if
04589 // referenced from a return of the current procedure, and has an implicit operand, and all the others satisfy a call
04590 // to doesReturnChainToCall(param, this proc).
04591 
04592 // visited is a set of procs already visited, to prevent infinite recursion
04593 bool UserProc::doesParamChainToCall(Exp* param, UserProc* p, ProcSet* visited) {
04594     BB_IT it;
04595     BasicBlock::rtlrit rrit; StatementList::reverse_iterator srit;
04596     for (it = cfg->begin(); it != cfg->end(); ++it) {
04597         CallStatement* c = (CallStatement*) (*it)->getLastStmt(rrit, srit);
04598         if (c == NULL || !c->isCall())  continue;       // Only interested in calls
04599         UserProc* dest = (UserProc*)c->getDestProc();
04600         if (dest == NULL || dest->isLib()) continue;  // Only interested in calls to UserProcs
04601         if (dest == p) {                // Pointer comparison is OK here
04602             // This is a recursive call to p. Check for an argument of the form param{-} FIXME: should be looking for
04603             // component
04604             StatementList& args = c->getArguments();
04605             StatementList::iterator aa;
04606             for (aa = args.begin(); aa != args.end(); ++aa) {
04607                 Exp* rhs = ((Assign*)*aa)->getRight();
04608                 if (rhs && rhs->isSubscript() && ((RefExp*)rhs)->isImplicitDef()) {
04609                     Exp* base = ((RefExp*)rhs)->getSubExp1();
04610                     // Check if this argument location matches loc
04611                     if (*base == *param)
04612                         // We have a call to p that takes param{-} as an argument
04613                         return true;
04614                 }
04615             }
04616         } else {
04617             if (dest->doesRecurseTo(p)) {
04618                 // We have come to a call that is not to p, but is in the same recursion group as p and this proc.
04619                 visited->insert(this);
04620                 if (visited->find(dest) != visited->end()) {
04621                     // Recurse to the next proc
04622                     bool res = dest->doesParamChainToCall(param, p, visited);
04623                     if (res)
04624                         return true;
04625                     // Else consider more calls this proc
04626                 }
04627             }
04628         }
04629     }
04630     return false;
04631 }
04632 
04633 bool UserProc::isRetNonFakeUsed(CallStatement* c, Exp* retLoc, UserProc* p, ProcSet* visited) {
04634     // Ick! This algorithm has to search every statement for uses of the return location retLoc defined at call c that
04635     // are not arguments of calls to p. If we had def-use information, it would be much more efficient
04636     StatementList stmts;
04637     getStatements(stmts);
04638     StatementList::iterator it;
04639     for (it = stmts.begin(); it != stmts.end(); it++) {
04640         Statement* s = *it;
04641         LocationSet ls;
04642         LocationSet::iterator ll;
04643         s->addUsedLocs(ls);
04644         bool found = false;
04645         for (ll = ls.begin(); ll != ls.end(); ++ll) {
04646             if (!(*ll)->isSubscript()) continue;
04647             Statement* def = ((RefExp*)*ll)->getDef();
04648             if (def != c) continue;                         // Not defined at c, ignore
04649             Exp* base = ((RefExp*)*ll)->getSubExp1();
04650             if (!(*base == *retLoc)) continue;              // Defined at c, but not the right location
04651             found = true;
04652             break;
04653         }
04654         if (!found)
04655             continue;
04656         if (!s->isCall())
04657             // This non-call uses the return; return true as it is non-fake used
04658             return true;
04659         UserProc* dest = (UserProc*)((CallStatement*)s)->getDestProc();
04660         if (dest == NULL)
04661             // This childless call seems to use the return. Count it as a non-fake use
04662             return true;
04663         if (dest == p)
04664             // This procedure uses the parameter, but it's a recursive call to p, so ignore it
04665             continue;
04666         if (dest->isLib())
04667             // Can't be a recursive call
04668             return true;
04669         if (!dest->doesRecurseTo(p))
04670             return true;
04671         // We have a call that uses the return, but it may well recurse to p
04672         visited->insert(this);
04673         if (visited->find(dest) != visited->end())
04674             // We've not found any way for loc to be fake-used. Count it as non-fake
04675             return true;
04676         if (!doesParamChainToCall(retLoc, p, visited))
04677             // It is a recursive call, but it doesn't end up passing param as an argument in a call to p
04678             return true;
04679     }
04680     return false;
04681 }
04682 
04683 // Check for a gainful use of bparam{0} in this proc. Return with true when the first such use is found.
04684 // Ignore uses in return statements of recursive functions, and phi statements that define them
04685 // Procs in visited are already visited
04686 bool UserProc::checkForGainfulUse(Exp* bparam, ProcSet& visited) {
04687     visited.insert(this);                   // Prevent infinite recursion
04688     StatementList::iterator pp;
04689     StatementList stmts;
04690     getStatements(stmts);
04691     StatementList::iterator it;
04692     for (it = stmts.begin(); it != stmts.end(); it++) {
04693         Statement* s = *it;
04694         // Special checking for recursive calls
04695         if (s->isCall()) {
04696             CallStatement* c = (CallStatement*)s;
04697             UserProc* dest = (UserProc*)c->getDestProc();
04698             if (dest && !dest->isLib() && dest->doesRecurseTo(this)) {
04699                 // In the destination expression?
04700                 LocationSet u;
04701                 c->getDest()->addUsedLocs(u);
04702                 if (u.existsImplicit(bparam))
04703                     return true;            // Used by the destination expression
04704                 // Else check for arguments of the form lloc := f(bparam{0})
04705                 StatementList& args = c->getArguments();
04706                 StatementList::iterator aa;
04707                 for (aa = args.begin(); aa != args.end(); ++aa) {
04708                     Exp* rhs = ((Assign*)*aa)->getRight();
04709                     LocationSet argUses;
04710                     rhs->addUsedLocs(argUses);
04711                     if (argUses.existsImplicit(bparam)) {
04712                         Exp* lloc = ((Assign*)*aa)->getLeft();
04713                         if (visited.find(dest) == visited.end() && dest->checkForGainfulUse(lloc, visited))
04714                             return true;
04715                     }
04716                 }
04717                 // If get to here, then none of the arguments is of this form, and we can ignore this call
04718                 continue;
04719             }
04720         }
04721         else if (s->isReturn()) {
04722             if (cycleGrp && cycleGrp->size())       // If this function is involved in recursion
04723                 continue;               //  then ignore this return statement
04724         } else if (s->isPhi() && theReturnStatement != NULL && cycleGrp && cycleGrp->size()) {
04725             Exp* phiLeft = ((PhiAssign*)s)->getLeft();
04726             RefExp* refPhi = new RefExp(phiLeft, s);
04727             ReturnStatement::iterator rr;
04728             bool foundPhi = false;
04729             for (rr = theReturnStatement->begin(); rr != theReturnStatement->end(); ++rr) {
04730                 Exp* rhs = ((Assign*)*rr)->getRight();
04731                 LocationSet uses;
04732                 rhs->addUsedLocs(uses);
04733                 if (uses.exists(refPhi)) {
04734                     // s is a phi that defines a component of a recursive return. Ignore it
04735                     foundPhi = true;
04736                     break;
04737                 }
04738             }
04739             if (foundPhi)
04740                 continue;           // Ignore this phi
04741         }
04742 
04743         // Otherwise, consider uses in s
04744         LocationSet uses;
04745         s->addUsedLocs(uses);
04746         if (uses.existsImplicit(bparam))
04747             return true;                // A gainful use
04748     }   // for each statement s
04749 
04750     return false;
04751 }
04752 
04753 // See comments three procedures above
04754 bool UserProc::removeRedundantParameters() {
04755     if (signature->isForced())
04756         // Assume that no extra parameters would have been inserted... not sure always valid
04757         return false;
04758 
04759     bool ret = false;
04760     StatementList newParameters;
04761     
04762     Boomerang::get()->alert_decompile_debug_point(this, "before removing redundant parameters");
04763 
04764     if (DEBUG_UNUSED)
04765         LOG << "%%% removing unused parameters for " << getName() << "\n";
04766     // Note: this would be far more efficient if we had def-use information
04767     StatementList::iterator pp;
04768     for (pp = parameters.begin(); pp != parameters.end(); ++pp) {
04769         Exp* param = ((Assign*)*pp)->getLeft();
04770         bool az;
04771         Exp* bparam = param->clone()->removeSubscripts(az);     // FIXME: why does main have subscripts on parameters?
04772         // Memory parameters will be of the form m[sp + K]; convert to m[sp{0} + K] as will be found in uses
04773         bparam = bparam->expSubscriptAllNull();     // Now m[sp{-}+K]{-}
04774         ImplicitConverter ic(cfg);
04775         bparam = bparam->accept(&ic);               // Now m[sp{0}+K]{0}
04776         assert(bparam->isSubscript());
04777         bparam = ((RefExp*)bparam)->getSubExp1();   // now m[sp{0}+K] (bare parameter)
04778 
04779         ProcSet visited;
04780         if (checkForGainfulUse(bparam, visited))
04781             newParameters.append(*pp);              // Keep this parameter
04782         else {
04783             // Remove the parameter
04784             ret = true;
04785             if (DEBUG_UNUSED)
04786                 LOG << " %%% removing unused parameter " << param << " in " << getName() << "\n";
04787             // Check if it is in the symbol map. If so, delete it; a local will be created later
04788             SymbolMap::iterator ss = symbolMap.find(param);
04789             if (ss != symbolMap.end())
04790                 symbolMap.erase(ss);            // Kill the symbol
04791             signature->removeParameter(param);  // Also remove from the signature
04792             cfg->removeImplicitAssign(param);   // Remove the implicit assignment so it doesn't come back
04793         }
04794     }
04795     parameters = newParameters;
04796     if (DEBUG_UNUSED)
04797         LOG << "%%% end removing unused parameters for " << getName() << "\n";
04798 
04799     Boomerang::get()->alert_decompile_debug_point(this, "after removing redundant parameters");
04800 
04801     return ret;
04802 }
04803 
04804 // Remove unused returns for this procedure, based on the equation returns = modifieds isect union(live at c) for all
04805 // c calling this procedure.
04806 // The intersection operation will only remove locations. Removing returns can have three effects for each component y
04807 // used by that return (e.g. if return r24 := r25{10} + r26{20} is removed, statements 10 and 20 will be affected and
04808 // y will take the values r25{10} and r26{20}):
04809 // 1) a statement s defining a return becomes unused if the only use of its definition was y
04810 // 2) a call statement c defining y will no longer have y live if the return was the only use of y. This could cause a
04811 //  change to the returns of c's destination, so removeRedundantReturns has to be called for c's destination proc (if it
04812 //  turns out to be the only definition, and that proc was not already scheduled for return removing).
04813 // 3) if y is a parameter (i.e. y is of the form loc{0}), then the signature of this procedure changes, and all callers
04814 //  have to have their arguments trimmed, and a similar process has to be applied to all those caller's removed
04815 //  arguments as is applied here to the removed returns.
04816 // The removeRetSet is the set of procedures to process with this logic; caller in Prog calls all elements in this set
04817 // (only add procs to this set, never remove)
04818 // Return true if any change
04819 bool UserProc::removeRedundantReturns(std::set<UserProc*>& removeRetSet) {
04820     Boomerang::get()->alert_decompiling(this);
04821     Boomerang::get()->alert_decompile_debug_point(this, "before removing unused returns");
04822     // First remove the unused parameters
04823     bool removedParams = removeRedundantParameters();
04824     if (theReturnStatement == NULL)
04825         return removedParams;
04826     if (DEBUG_UNUSED)
04827         LOG << "%%% removing unused returns for " << getName() << " %%%\n";
04828 
04829     if (signature->isForced()) {
04830         // Respect the forced signature, but use it to remove returns if necessary
04831         bool removedRets = false;
04832         ReturnStatement::iterator rr;
04833         for (rr = theReturnStatement->begin(); rr != theReturnStatement->end(); ) {
04834             Assign* a = (Assign*)*rr;
04835             Exp *lhs = a->getLeft();
04836             // For each location in the returns, check if in the signature
04837             bool found = false;
04838             for (unsigned int i = 0; i < signature->getNumReturns(); i++)
04839                 if (*signature->getReturnExp(i) == *lhs) {
04840                     found = true;
04841                     break;
04842                 }
04843             if (found)
04844                 rr++;       // Yes, in signature; OK
04845             else {
04846                 // This return is not in the signature. Remove it
04847                 rr = theReturnStatement->erase(rr);
04848                 removedRets = true;
04849                 if (DEBUG_UNUSED)
04850                     LOG << "%%%  removing unused return " << a << " from proc " << getName() << " (forced signature)\n";
04851             }
04852         }
04853         if (removedRets)
04854             // Still may have effects on calls or now unused statements
04855             updateForUseChange(removeRetSet);
04856         return removedRets;
04857     }
04858 
04859     // FIXME: this needs to be more sensible when we don't decompile down from main! Probably should assume just the
04860     // first return is valid, for example (presently assume none are valid)
04861     LocationSet unionOfCallerLiveLocs;
04862     if (strcmp(getName(), "main") == 0)     // Probably not needed: main is forced so handled above
04863         // Just insert one return for main. Note: at present, the first parameter is still the stack pointer
04864         unionOfCallerLiveLocs.insert(signature->getReturnExp(1));
04865     else {
04866         // For each caller
04867         std::set<CallStatement*>& callers = getCallers();
04868         std::set<CallStatement*>::iterator cc;
04869         for (cc = callers.begin(); cc != callers.end(); ++cc) {
04870             // Union in the set of locations live at this call
04871             UseCollector* useCol = (*cc)->getUseCollector();
04872             unionOfCallerLiveLocs.makeUnion(useCol->getLocSet());
04873         }
04874     }
04875     // Intersect with the current returns
04876     bool removedRets = false;
04877     ReturnStatement::iterator rr;
04878     for (rr = theReturnStatement->begin(); rr != theReturnStatement->end(); ) {
04879         Assign* a = (Assign*)*rr;
04880         if (!unionOfCallerLiveLocs.exists(a->getLeft())) {
04881             if (DEBUG_UNUSED)
04882                 LOG << "%%%  removing unused return " << a << " from proc " << getName() << "\n";
04883             // If a component of the RHS referenced a call statement, the liveness used to be killed here.
04884             // This was wrong; you need to notice the liveness changing inside updateForUseChange() to correctly
04885             // recurse to callee
04886             rr = theReturnStatement->erase(rr);
04887             removedRets = true;
04888         }
04889         else
04890             rr++;
04891     }
04892 
04893     if (DEBUG_UNUSED) {
04894         std::ostringstream ost;
04895         unionOfCallerLiveLocs.print(ost);
04896         LOG << "%%%  union of caller live locations for " << getName() << ": " << ost.str().c_str() << "\n";
04897         LOG << "%%%  final returns for " << getName() << ": " << theReturnStatement->getReturns().prints() << "\n";
04898     }
04899 
04900     // removing returns might result in params that can be removed, might as well do it now.
04901     removedParams |= removeRedundantParameters();
04902 
04903     ProcSet updateSet;          // Set of procs to update
04904 
04905     if (removedParams || removedRets) {
04906         // Update the statements that call us
04907         std::set<CallStatement*>::iterator it;
04908         for (it = callerSet.begin(); it != callerSet.end() ; it++) {
04909             (*it)->updateArguments();               // Update caller's arguments
04910             updateSet.insert((*it)->getProc());     // Make sure we redo the dataflow
04911             removeRetSet.insert((*it)->getProc());  // Also schedule caller proc for more analysis
04912         }
04913 
04914         // Now update myself
04915         updateForUseChange(removeRetSet);
04916 
04917         // Update any other procs that need updating
04918         updateSet.erase(this);      // Already done this proc
04919         while (updateSet.size()) {
04920             UserProc* proc = *updateSet.begin();
04921             updateSet.erase(proc);
04922             proc->updateForUseChange(removeRetSet);
04923         }
04924     }
04925 
04926     if (theReturnStatement->getNumReturns() == 1) {
04927         Assign *a = (Assign*)*theReturnStatement->getReturns().begin();
04928         signature->setRetType(a->getType());
04929     }
04930 
04931     Boomerang::get()->alert_decompile_debug_point(this, "after removing unused and redundant returns");
04932     return removedRets || removedParams;
04933 }
04934 
04935 // See comments above for removeRedundantReturns(). Need to save the old parameters and call livenesses, redo the
04936 // dataflow and removal of unused statements, recalculate the parameters and call livenesses, and if either or both of
04937 // these are changed, recurse to parents or those calls' children respectively. (When call livenesses change like this,
04938 // it means that the recently removed return was the only use of that liveness, i.e. there was a return chain.)
04939 void UserProc::updateForUseChange(std::set<UserProc*>& removeRetSet) {
04940     // We need to remember the parameters, and all the livenesses for all the calls, to see if these are changed
04941     // by removing returns
04942     if (DEBUG_UNUSED) {
04943         LOG << "%%% updating " << getName() << " for changes to uses (returns or arguments)\n";
04944         LOG << "%%% updating dataflow:\n";
04945     }
04946 
04947     // Save the old parameters and call liveness
04948     StatementList oldParameters(parameters);
04949     std::map<CallStatement*, UseCollector> callLiveness;
04950     BasicBlock::rtlrit rrit; StatementList::reverse_iterator srit;
04951     BB_IT it;
04952     for (PBB bb = cfg->getFirstBB(it); bb; bb = cfg->getNextBB(it)) {
04953         CallStatement* c = (CallStatement*) bb->getLastStmt(rrit, srit);
04954         // Note: we may have removed some statements, so there may no longer be a last statement!
04955         if (c == NULL || !c->isCall()) continue;
04956         UserProc* dest = (UserProc*)c->getDestProc();
04957         // Not interested in unanalysed indirect calls (not sure) or calls to lib procs
04958         if (dest == NULL || dest->isLib()) continue;
04959         callLiveness[c].makeCloneOf(*c->getUseCollector());
04960     }
04961 
04962     // Have to redo dataflow to get the liveness at the calls correct
04963     removeCallLiveness();           // Want to recompute the call livenesses
04964     doRenameBlockVars(-3, true);
04965 
04966     remUnusedStmtEtc();             // Also redoes parameters
04967 
04968     // Have the parameters changed? If so, then all callers will need to update their arguments, and do similar
04969     // analysis to the removal of returns
04970     //findFinalParameters();
04971     removeRedundantParameters();
04972     if (parameters.size() != oldParameters.size()) {
04973         if (DEBUG_UNUSED)
04974             LOG << "%%%  parameters changed for " << getName() << "\n";
04975         std::set<CallStatement*>& callers = getCallers();
04976         std::set<CallStatement*>::iterator cc;
04977         std::set<UserProc*> callerProcs;
04978         std::set<UserProc*>::iterator pp;
04979         for (cc = callers.begin(); cc != callers.end(); ++cc) {
04980             (*cc)->updateArguments();
04981             // Schedule the callers for analysis
04982             removeRetSet.insert((*cc)->getProc());
04983         }
04984     }
04985     // Check if the liveness of any calls has changed
04986     std::map<CallStatement*, UseCollector>::iterator ll;
04987     for (ll = callLiveness.begin(); ll != callLiveness.end(); ++ll) {
04988         CallStatement* call = ll->first;
04989         UseCollector& oldLiveness = ll->second;
04990         UseCollector& newLiveness = *call->getUseCollector();
04991         if (!(newLiveness == oldLiveness)) {
04992             if (DEBUG_UNUSED)
04993                 LOG << "%%%  liveness for call to " << call->getDestProc()->getName() << " in " << getName() <<
04994                     " changed\n";
04995             removeRetSet.insert((UserProc*)call->getDestProc());
04996         }
04997     }
04998 }
04999 
05000 void UserProc::clearUses() {
05001     if (VERBOSE)
05002         LOG << "### clearing usage for " << getName() << " ###\n";
05003     col.clear();
05004     BB_IT it;
05005     BasicBlock::rtlrit rrit; StatementList::reverse_iterator srit;
05006     for (it = cfg->begin(); it != cfg->end(); ++it) {
05007         CallStatement* c = (CallStatement*) (*it)->getLastStmt(rrit, srit);
05008         // Note: we may have removed some statements, so there may no longer be a last statement!
05009         if (c == NULL || !c->isCall()) continue;
05010         c->clearUseCollector();
05011     }
05012 }
05013 
05014 void UserProc::typeAnalysis() {
05015     if (VERBOSE)
05016         LOG << "### type analysis for " << getName() << " ###\n";
05017 
05018     // Now we need to add the implicit assignments. Doing this earlier is extremely problematic, because
05019     // of all the m[...] that change their sorting order as their arguments get subscripted or propagated into
05020     // Do this regardless of whether doing dfa-based TA, so things like finding parameters can rely on implicit assigns
05021     addImplicitAssigns();
05022 
05023     // Data flow based type analysis
05024     // Want to be after all propagation, but before converting expressions to locals etc
05025     if (DFA_TYPE_ANALYSIS) {
05026         if (VERBOSE || DEBUG_TA)
05027             LOG << "--- start data flow based type analysis for " << getName() << " ---\n";
05028 
05029         bool first = true;
05030         do {
05031             if (!first) {
05032                 doRenameBlockVars(-1, true);        // Subscript the discovered extra parameters
05033                 //propagateAtDepth(maxDepth);       // Hack: Can sometimes be needed, if call was indirect
05034                 bool convert;
05035                 propagateStatements(convert, 0);
05036             }
05037             first = false;
05038             dfaTypeAnalysis();
05039 
05040             // There used to be a pass here to insert casts. This is best left until global type analysis is complete,
05041             // so do it just before translating from SSA form (which is the where type information becomes inaccessible)
05042 
05043         } while (ellipsisProcessing());
05044         simplify();                     // In case there are new struct members
05045         if (VERBOSE || DEBUG_TA)
05046             LOG << "=== end type analysis for " << getName() << " ===\n";
05047     }
05048 
05049     else if (CON_TYPE_ANALYSIS) {
05050         // FIXME: if we want to do comparison
05051     }
05052 
05053     printXML();
05054 }
05055 
05056 void UserProc::clearRanges()
05057 {
05058     StatementList stmts;
05059     getStatements(stmts);
05060     StatementList::iterator it;
05061     for (it = stmts.begin(); it != stmts.end(); it++)
05062         (*it)->clearRanges();
05063 }
05064 
05065 void UserProc::rangeAnalysis()
05066 {
05067     std::cout << "performing range analysis on " << getName() << "\n";
05068 
05069     // this helps
05070     cfg->sortByAddress();
05071 
05072     cfg->addJunctionStatements();
05073     cfg->establishDFTOrder();
05074 
05075     clearRanges();
05076 
05077     if (VERBOSE) {
05078         LOG << "=== Before performing range analysis for " << getName() << " ===\n";
05079         printToLog();
05080         LOG << "=== end before performing range analysis for " << getName() << " ===\n\n";
05081     }
05082 
05083     std::list<Statement*> execution_paths;
05084     std::list<Statement*> junctions;
05085 
05086     assert(cfg->getEntryBB());
05087     assert(cfg->getEntryBB()->getFirstStmt());
05088     execution_paths.push_back(cfg->getEntryBB()->getFirstStmt());
05089 
05090     int watchdog = 0;
05091 
05092     while(execution_paths.size()) {
05093         while(execution_paths.size()) {
05094             Statement *stmt = execution_paths.front();
05095             execution_paths.pop_front();
05096             if (stmt == NULL)
05097                 continue;  // ??
05098             if (stmt->isJunction())
05099                 junctions.push_back(stmt);
05100             else
05101                 stmt->rangeAnalysis(execution_paths);
05102         }
05103         if (watchdog > 45) 
05104             LOG << "processing execution paths resulted in " << (int)junctions.size() << " junctions to process\n";
05105         while(junctions.size()) {
05106             Statement *junction = junctions.front();
05107             junctions.pop_front();
05108             if (watchdog > 45)
05109                 LOG << "processing junction " << junction << "\n";
05110             assert(junction->isJunction());
05111             junction->rangeAnalysis(execution_paths);
05112         }
05113 
05114         watchdog++;
05115         if (watchdog > 10) {
05116             LOG << "  watchdog " << watchdog << "\n";
05117             if (watchdog > 45) {
05118                 LOG << (int)execution_paths.size() << " execution paths remaining.\n";
05119                 LOG << "=== After range analysis watchdog " << watchdog << " for " << getName() << " ===\n";
05120                 printToLog();
05121                 LOG << "=== end after range analysis watchdog " << watchdog << " for " << getName() << " ===\n\n";
05122             }
05123         }
05124         if (watchdog > 50) {
05125             LOG << "  watchdog expired\n";
05126             break;
05127         }
05128     }
05129 
05130     LOG << "=== After range analysis for " << getName() << " ===\n";
05131     printToLog();
05132     LOG << "=== end after range analysis for " << getName() << " ===\n\n";
05133 
05134     cfg->removeJunctionStatements();
05135 }
05136 
05137 void UserProc::logSuspectMemoryDefs()
05138 {
05139     StatementList stmts;
05140     getStatements(stmts);
05141     StatementList::iterator it;
05142     for (it = stmts.begin(); it != stmts.end(); it++)
05143         if ((*it)->isAssign()) {
05144             Assign *a = (Assign*)*it;
05145             if (a->getLeft()->isMemOf()) {
05146                 RangeMap &rm = a->getRanges();
05147                 Exp *p = rm.substInto(a->getLeft()->getSubExp1()->clone());
05148                 if (rm.hasRange(p)) {
05149                     Range &r = rm.getRange(p);
05150                     LOG << "got p " << p << " with range " << r << "\n";
05151                     if (r.getBase()->getOper() == opInitValueOf &&
05152                         r.getBase()->getSubExp1()->isRegOfK() &&
05153                         ((Const*)r.getBase()->getSubExp1()->getSubExp1())->getInt() == 28) {
05154                         RTL *rtl = a->getBB()->getRTLWithStatement(a);
05155                         LOG << "interesting stack reference at " << rtl->getAddress() << " " << a << "\n";
05156                     }
05157                 }
05158             }
05159         }
05160 }
05161 
05162 // Copy the RTLs for the already decoded Indirect Control Transfer instructions, and decode any new targets in this CFG
05163 // Note that we have to delay the new target decoding till now, because otherwise we will attempt to decode nested
05164 // switch statements without having any SSA renaming, propagation, etc
05165 RTL* globalRtl = 0;
05166 void UserProc::processDecodedICTs() {
05167     BB_IT it;
05168     BasicBlock::rtlrit rrit; StatementList::reverse_iterator srit;
05169     for (PBB bb = cfg->getFirstBB(it); bb; bb = cfg->getNextBB(it)) {
05170         Statement* last = bb->getLastStmt(rrit, srit);
05171         if (last == NULL) continue;         // e.g. a BB with just a NOP in it
05172         if (!last->isHL_ICT()) continue;
05173         RTL* rtl = bb->getLastRtl();
05174         if (DEBUG_SWITCH)
05175             LOG << "Saving high level switch statement " << rtl << "\n";
05176         prog->addDecodedRtl(bb->getHiAddr(), rtl);
05177         // Now decode those new targets, adding out edges as well
05178 //      if (last->isCase())
05179 //          bb->processSwitch(this);
05180     }
05181 }
05182 
05183 // Find or insert a new implicit reference just before statement s, for address expression a with type t.
05184 // Meet types if necessary
05185 void UserProc::setImplicitRef(Statement* s, Exp* a, Type* ty) {
05186     PBB bb = s->getBB();            // Get s' enclosing BB
05187     std::list<RTL*> *rtls = bb->getRTLs();
05188     for (std::list<RTL*>::iterator rit = rtls->begin(); rit != rtls->end(); rit++) {
05189         std::list<Statement*>& stmts = (*rit)->getList();
05190         RTL::iterator it, itForS;
05191         RTL* rtlForS;
05192         for (it = stmts.begin(); it != stmts.end(); it++) {
05193             if (*it == s ||
05194                     // Not the searched for statement. But if it is a call or return statement, it will be the last, and
05195                     // s must be a substatement (e.g. argument, return, define, etc).
05196                     ((*it)->isCall() || (*it)->isReturn())) {
05197                 // Found s. Search preceeding statements for an implicit reference with address a
05198                 itForS = it;
05199                 rtlForS = *rit;
05200                 bool found = false;
05201                 bool searchEarlierRtls = true;
05202                 while (it != stmts.begin()) {
05203                     ImpRefStatement* irs = (ImpRefStatement*) *--it;
05204                     if (!irs->isImpRef()) {
05205                         searchEarlierRtls = false;
05206                         break;
05207                     }
05208                     if (*irs->getAddressExp() == *a) {
05209                         found = true;
05210                         searchEarlierRtls = false;
05211                         break;
05212                     }
05213                 }
05214                 while (searchEarlierRtls && rit != rtls->begin()) {
05215                     for (std::list<RTL*>::reverse_iterator revit = rtls->rbegin(); revit != rtls->rend(); ++revit) {
05216                         std::list<Statement*>& stmts2 = (*revit)->getList();
05217                         it = stmts2.end();
05218                         while (it != stmts2.begin()) {
05219                             ImpRefStatement* irs = (ImpRefStatement*) *--it;
05220                             if (!irs->isImpRef()) {
05221                                 searchEarlierRtls = false;
05222                                 break;
05223                             }
05224                             if (*irs->getAddressExp() == *a) {
05225                                 found = true;
05226                                 searchEarlierRtls = false;
05227                                 break;
05228                             }
05229                         }
05230                         if (!searchEarlierRtls) break;
05231                     }
05232                 }
05233                 if (found) {
05234                     ImpRefStatement* irs = (ImpRefStatement*)*it;
05235                     bool ch;
05236                     irs->meetWith(ty, ch);
05237                 } else {
05238                     ImpRefStatement* irs = new ImpRefStatement(ty, a);
05239                     rtlForS->insertStmt(irs, itForS);
05240                 }
05241                 return;
05242             }
05243         }
05244     }
05245     assert(0);              // Could not find s withing its enclosing BB
05246 }
05247 
05248 void UserProc::eliminateDuplicateArgs() {
05249     if (VERBOSE)
05250         LOG << "### eliminate duplicate args for " << getName() << " ###\n";
05251     BB_IT it;
05252     BasicBlock::rtlrit rrit; StatementList::reverse_iterator srit;
05253     for (it = cfg->begin(); it != cfg->end(); ++it) {
05254         CallStatement* c = (CallStatement*) (*it)->getLastStmt(rrit, srit);
05255         // Note: we may have removed some statements, so there may no longer be a last statement!
05256         if (c == NULL || !c->isCall()) continue;
05257         c->eliminateDuplicateArgs();
05258     }
05259 }
05260 
05261 void UserProc::removeCallLiveness() {
05262     if (VERBOSE)
05263         LOG << "### removing call livenesses for " << getName() << " ###\n";
05264     BB_IT it;
05265     BasicBlock::rtlrit rrit; StatementList::reverse_iterator srit;
05266     for (it = cfg->begin(); it != cfg->end(); ++it) {
05267         CallStatement* c = (CallStatement*) (*it)->getLastStmt(rrit, srit);
05268         // Note: we may have removed some statements, so there may no longer be a last statement!
05269         if (c == NULL || !c->isCall()) continue;
05270         c->removeAllLive();
05271     }
05272 }
05273 
05274 void UserProc::mapTempsToLocals() {
05275     StatementList stmts;
05276     getStatements(stmts);
05277     StatementList::iterator it;
05278     TempToLocalMapper ttlm(this);
05279     StmtExpVisitor sv(&ttlm);
05280     for (it = stmts.begin(); it != stmts.end(); it++) {
05281         Statement* s = *it;
05282         s->accept(&sv);
05283     }
05284 }
05285 
05286 // For debugging:
05287 void dumpProcList(ProcList* pc) {
05288     ProcList::iterator pi;
05289     for (pi = pc->begin(); pi != pc->end(); ++pi)
05290         std::cerr << (*pi)->getName() << ", ";
05291     std::cerr << "\n";
05292 }
05293 
05294 void dumpProcSet(ProcSet* pc) {
05295     ProcSet::iterator pi;
05296     for (pi = pc->begin(); pi != pc->end(); ++pi)
05297         std::cerr << (*pi)->getName() << ", ";
05298     std::cerr << "\n";
05299 }
05300 
05301 void Proc::setProvenTrue(Exp* fact) {
05302     assert(fact->isEquality());
05303     Exp* lhs = ((Binary*)fact)->getSubExp1();
05304     Exp* rhs = ((Binary*)fact)->getSubExp2();
05305     provenTrue[lhs] = rhs;
05306 }
05307 
05308 void UserProc::mapLocalsAndParams() {
05309     Boomerang::get()->alert_decompile_debug_point(this, "before mapping locals from dfa type analysis");
05310     if (DEBUG_TA)
05311         LOG << " ### mapping expressions to local variables for " << getName() << " ###\n";
05312     StatementList stmts;
05313     getStatements(stmts);
05314     StatementList::iterator it;
05315     for (it = stmts.begin(); it != stmts.end(); it++) {
05316         Statement* s = *it;
05317         s->dfaMapLocals();
05318     }
05319     if (DEBUG_TA)
05320         LOG << " ### end mapping expressions to local variables for " << getName() << " ###\n";
05321 }
05322 
05323 void UserProc::makeSymbolsImplicit() {
05324     SymbolMap::iterator it;
05325     SymbolMap sm2 = symbolMap;          // Copy the whole map; necessary because the keys (Exps) change
05326     symbolMap.clear();
05327     ImplicitConverter ic(cfg);
05328     for (it = sm2.begin(); it != sm2.end(); ++it) {
05329         Exp* impFrom = it->first->accept(&ic);
05330         mapSymbolTo(impFrom, it->second);
05331     }
05332 }
05333 
05334 void UserProc::makeParamsImplicit() {
05335     StatementList::iterator it;
05336     ImplicitConverter ic(cfg);
05337     for (it = parameters.begin(); it != parameters.end(); ++it) {
05338         Exp* lhs = ((Assignment*)*it)->getLeft();
05339         lhs = lhs->accept(&ic);
05340         ((Assignment*)*it)->setLeft(lhs);
05341     }
05342 }
05343 
05344 void UserProc::findLiveAtDomPhi(LocationSet& usedByDomPhi) {
05345     LocationSet usedByDomPhi0;
05346     std::map<Exp*, PhiAssign*, lessExpStar> defdByPhi;
05347     df.findLiveAtDomPhi(0, usedByDomPhi, usedByDomPhi0, defdByPhi);
05348     // Note that the above is not the complete algorithm; it has found the dead phi-functions in the defdAtPhi
05349     std::map<Exp*, PhiAssign*, lessExpStar>::iterator it;
05350     for (it = defdByPhi.begin(); it != defdByPhi.end(); ++it) {
05351         // For each phi parameter, remove from the final usedByDomPhi set
05352         PhiAssign::iterator pp;
05353         for (pp = it->second->begin(); pp != it->second->end(); ++pp) {
05354             if (pp->e == NULL) continue;
05355             RefExp* wrappedParam = new RefExp(pp->e, pp->def);
05356             usedByDomPhi.remove(wrappedParam);
05357         }
05358         // Now remove the actual phi-function (a PhiAssign Statement)
05359         // Ick - some problem with return statements not using their returns until more analysis is done
05360         //removeStatement(it->second);
05361     }
05362 }
05363 
05364 #if USE_DOMINANCE_NUMS
05365 void UserProc::setDominanceNumbers() {
05366     int currNum = 1;
05367     df.setDominanceNums(0, currNum);
05368 }
05369 #endif
05370 
05371 void UserProc::findPhiUnites(ConnectionGraph& pu) {
05372     StatementList stmts;
05373     getStatements(stmts);
05374     StatementList::iterator it;
05375     for (it = stmts.begin(); it != stmts.end(); it++) {
05376         PhiAssign* pa = (PhiAssign*)*it;
05377         if (!pa->isPhi()) continue;
05378         Exp* lhs = pa->getLeft();
05379         RefExp* reLhs = new RefExp(lhs, pa);
05380         PhiAssign::iterator pp;
05381         for (pp = pa->begin(); pp != pa->end(); ++pp) {
05382             if (pp->e == NULL) continue;
05383             RefExp* re = new RefExp(pp->e, pp->def);
05384             pu.connect(reLhs, re);
05385         }
05386     }
05387 }
05388 
05389 char* UserProc::getRegName(Exp* r) {
05390     assert(r->isRegOf());
05391     int regNum = ((Const*)((Location*)r)->getSubExp1())->getInt();
05392     char* regName = const_cast<char*>(prog->getRegName(regNum));
05393     if (regName[0] == '%') regName++;       // Skip % if %eax
05394     return regName;
05395 }
05396 
05397 Type* UserProc::getTypeForLocation(Exp* e) {
05398     char* name;
05399     if (e->isLocal()) {
05400         name = ((Const*)((Unary*)e)->getSubExp1())->getStr();
05401         if (locals.find(name) != locals.end())
05402             return locals[name];
05403     }
05404     // Sometimes parameters use opLocal, so fall through
05405     name = ((Const*)((Unary*)e)->getSubExp1())->getStr();
05406     return getParamType(name);
05407 }
05408 
05409 // The idea here is to give a name to those SSA variables that have one and only one parameter amongst the phi
05410 // arguments. For example, in test/source/param1, there is 18 *v* m[r28{-} + 8] := phi{- 7} with m[r28{-} + 8]{0} mapped
05411 // to param1; insert a mapping for m[r28{-} + 8]{18} to param1. This will avoid a copy, and will use the name of the
05412 // parameter only when it is acually used as a parameter
05413 void UserProc::nameParameterPhis() {
05414     StatementList stmts;
05415     getStatements(stmts);
05416     StatementList::iterator it;
05417     for (it = stmts.begin(); it != stmts.end(); it++) {
05418         PhiAssign* pi = (PhiAssign*)*it;
05419         if (!pi->isPhi()) continue;         // Might be able to optimise this a bit
05420         // See if the destination has a symbol already
05421         Exp* lhs = pi->getLeft();
05422         RefExp* lhsRef = new RefExp(lhs, pi);
05423         if (findFirstSymbol(lhsRef) != NULL)
05424             continue;                       // Already mapped to something
05425         bool multiple = false;              // True if find more than one unique parameter
05426         char* firstName = NULL;             // The name for the first parameter found
05427         Type* ty = pi->getType();
05428         PhiAssign::iterator pp;
05429         for (pp = pi->begin(); pp != pi->end(); ++pp) {
05430             if (pp->def->isImplicit()) {
05431                 RefExp* phiArg = new RefExp(pp->e, pp->def);
05432                 char* name = lookupSym(phiArg, ty);
05433                 if (name != NULL) {
05434                     if (firstName != NULL && strcmp(firstName, name) != 0) {
05435                         multiple = true;
05436                         break;
05437                     }
05438                     firstName = name;       // Remember this candidate
05439                 }
05440             }
05441         } 
05442         if (multiple || firstName == NULL) continue;
05443         mapSymbolTo(lhsRef, Location::param(firstName, this));
05444     }
05445 }
05446 
05447 bool UserProc::existsLocal(char* name) {
05448     std::string s(name);
05449     return locals.find(s) != locals.end();
05450 }
05451 
05452 void UserProc::checkLocalFor(RefExp* r) {
05453     if (lookupSymFromRefAny(r)) return;         // Already have a symbol for r
05454     Statement* def = r->getDef();
05455     if (def) {
05456         Exp* base = r->getSubExp1();
05457         Type* ty = def->getTypeFor(base);
05458         // No, get its name from the front end
05459         if (base->isRegOf()) {
05460             char* regName = getRegName(base);
05461             // Create a new local, for the base name if it doesn't exist yet, so we don't need several names for the
05462             // same combination of location and type. However if it does already exist, addLocal will allocate a
05463             // new name. Example: r8{0}->argc type int, r8->o0 type int, now r8->o0_1 type char*.
05464             if (existsLocal(regName))
05465                 regName = newLocalName(r);
05466             addLocal(ty, regName, base);
05467         }
05468         else {
05469             char* locName = newLocalName(r);
05470             addLocal(ty, locName, base);
05471         }
05472     }
05473 }
05474 
05475 
05476 //  -   -   -   -   -   -   -   -   -
05477 
05478 #ifdef USING_MEMOS
05479 class LibProcMemo : public Memo {
05480 public:
05481     LibProcMemo(int mId) : Memo(mId) { }
05482 
05483     bool visited;
05484     Prog *prog;
05485     Signature *signature;                       // r
05486     ADDRESS address;
05487     Proc *m_firstCaller;
05488     ADDRESS m_firstCallerAddr;
05489     std::set<Exp*, lessExpStar> provenTrue;         // r
05490     std::set<CallStatement*> callerSet;
05491     Cluster *cluster;
05492 };
05493 
05494 Memo *LibProc::makeMemo(int mId)
05495 {
05496     LibProcMemo *m = new LibProcMemo(mId);
05497     m->visited = visited;
05498     m->prog = prog;
05499     m->signature = signature;
05500     m->address = address;
05501     m->m_firstCaller = m_firstCaller;
05502     m->m_firstCallerAddr = m_firstCallerAddr;
05503     m->provenTrue = provenTrue;
05504     m->callerSet = callerSet;
05505     m->cluster = cluster;
05506 
05507 //  signature->takeMemo(mId);
05508 //  for (std::set<Exp*, lessExpStar>::iterator it = provenTrue.begin(); it != provenTrue.end(); it++)
05509 //      (*it)->takeMemo(mId);
05510 
05511     return m;
05512 }
05513 
05514 void LibProc::readMemo(Memo *mm, bool dec)
05515 {
05516     LibProcMemo *m = dynamic_cast<LibProcMemo*>(mm);
05517     visited = m->visited;
05518     prog = m->prog;
05519     signature = m->signature;
05520     address = m->address;
05521     m_firstCaller = m->m_firstCaller;
05522     m_firstCallerAddr = m->m_firstCallerAddr;
05523     provenTrue = m->provenTrue;
05524     callerSet = m->callerSet;
05525     cluster = m->cluster;
05526 
05527 //  signature->restoreMemo(m->mId, dec);
05528 //  for (std::set<Exp*, lessExpStar>::iterator it = provenTrue.begin(); it != provenTrue.end(); it++)
05529 //      (*it)->restoreMemo(m->mId, dec);
05530 }
05531 
05532 class UserProcMemo : public Memo {
05533 public:
05534     UserProcMemo(int mId) : Memo(mId) { }
05535 
05536     bool visited;
05537     Prog *prog;
05538     Signature *signature;                       // r
05539     ADDRESS address;
05540     Proc *m_firstCaller;
05541     ADDRESS m_firstCallerAddr;
05542     std::set<Exp*, lessExpStar> provenTrue;         // r
05543     std::set<CallStatement*> callerSet;
05544     Cluster *cluster;
05545 
05546     Cfg* cfg;
05547     ProcStatus status;
05548     std::map<std::string, Type*> locals;        // r
05549     UserProc::SymbolMap symbolMap;          // r
05550     std::list<Proc*> calleeList;
05551 };
05552 
05553 Memo *UserProc::makeMemo(int mId)
05554 {
05555     UserProcMemo *m = new UserProcMemo(mId);
05556     m->visited = visited;
05557     m->prog = prog;
05558     m->signature = signature;
05559     m->address = address;
05560     m->m_firstCaller = m_firstCaller;
05561     m->m_firstCallerAddr = m_firstCallerAddr;
05562     m->provenTrue = provenTrue;
05563     m->callerSet = callerSet;
05564     m->cluster = cluster;
05565 
05566     m->cfg = cfg;
05567     m->status = status;
05568     m->locals = locals;
05569     m->symbolMap = symbolMap;
05570     m->calleeList = calleeList;
05571 
05572     signature->takeMemo(mId);
05573     for (std::set<Exp*, lessExpStar>::iterator it = provenTrue.begin(); it != provenTrue.end(); it++)
05574         (*it)->takeMemo(mId);
05575 
05576     for (std::map<std::string, Type*>::iterator it = locals.begin(); it != locals.end(); it++)
05577         (*it).second->takeMemo(mId);
05578 
05579     for (SymbolMap::iterator it = symbolMap.begin(); it != symbolMap.end(); it++) {
05580         (*it).first->takeMemo(mId);
05581         (*it).second->takeMemo(mId);
05582     }
05583 
05584     return m;
05585 }
05586 
05587 void UserProc::readMemo(Memo *mm, bool dec)
05588 {
05589     UserProcMemo *m = dynamic_cast<UserProcMemo*>(mm);
05590     visited = m->visited;
05591     prog = m->prog;
05592     signature = m->signature;
05593     address = m->address;
05594     m_firstCaller = m->m_firstCaller;
05595     m_firstCallerAddr = m->m_firstCallerAddr;
05596     provenTrue = m->provenTrue;
05597     callerSet = m->callerSet;
05598     cluster = m->cluster;
05599 
05600     cfg = m->cfg;
05601     status = m->status;
05602     locals = m->locals;
05603     symbolMap = m->symbolMap;
05604     calleeList = m->calleeList;
05605 
05606     signature->restoreMemo(m->mId, dec);
05607     for (std::set<Exp*, lessExpStar>::iterator it = provenTrue.begin(); it != provenTrue.end(); it++)
05608         (*it)->restoreMemo(m->mId, dec);
05609 
05610     for (std::map<std::string, Type*>::iterator it = locals.begin(); it != locals.end(); it++)
05611         (*it).second->restoreMemo(m->mId, dec);
05612 
05613     for (SymbolMap::iterator it = symbolMap.begin(); it != symbolMap.end(); it++) {
05614         (*it).first->restoreMemo(m->mId, dec);
05615         (*it).second->restoreMemo(m->mId, dec);
05616     }
05617 }
05618 #endif      // #ifdef USING_MEMOS

Generated on Tue Sep 19 21:18:30 2006 for Boomerang by  doxygen 1.4.6