frontend.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 1999-2001, The University of Queensland
00003  * Copyright (C) 2001, Sun Microsystems, Inc
00004  * Copyright (C) 2002, Trent Waddington
00005  *
00006  * See the file "LICENSE.TERMS" for information on usage and
00007  * redistribution of this file, and for a DISCLAIMER OF ALL
00008  * WARRANTIES.
00009  *
00010  */
00011 
00012 /*==============================================================================
00013  * FILE:       frontend.cpp
00014  * OVERVIEW:   This file contains common code for all front ends. The majority
00015  *              of frontend logic remains in the source dependent files such as
00016  *              frontsparc.cpp
00017  *============================================================================*/
00018 
00019 /*
00020  * $Revision: 1.128 $   // 1.89.2.7
00021  * 08 Apr 02 - Mike: Mods to adapt UQBT code to boomerang
00022  * 16 May 02 - Mike: Moved getMainEntry point here from prog
00023  * 09 Jul 02 - Mike: Fixed machine check for elf files (was checking endianness rather than machine type)
00024  * 22 Nov 02 - Mike: Quelched warnings
00025  * 16 Apr 03 - Mike: trace (-t) to cerr not cout now
00026  * 02 Feb 05 - Gerard: Check for thunks to library functions and don't create procs for these
00027  */
00028 
00029 #include <assert.h>
00030 #if defined(_MSC_VER) && _MSC_VER <= 1200
00031 #pragma warning(disable:4786)
00032 #endif
00033 
00034 #include "frontend.h"
00035 #include <queue>
00036 #include <stdarg.h>         // For varargs
00037 #include <sstream>
00038 #ifndef _WIN32
00039 #include <dlfcn.h>          // dlopen, dlsym
00040 #endif
00041 
00042 #include "types.h"
00043 #include "exp.h"
00044 #include "cfg.h"
00045 #include "proc.h"
00046 #include "register.h"
00047 #include "rtl.h"
00048 #include "BinaryFile.h"
00049 #include "decoder.h"
00050 #include "sparcfrontend.h"
00051 #include "pentiumfrontend.h"
00052 #include "ppcfrontend.h"
00053 #include "st20frontend.h"
00054 #include "prog.h"
00055 #include "signature.h"
00056 #include "boomerang.h"
00057 #include "log.h"
00058 #include "ansi-c-parser.h"
00059 
00060 /*==============================================================================
00061  * FUNCTION:      FrontEnd::FrontEnd
00062  * OVERVIEW:      Construct the FrontEnd object
00063  * PARAMETERS:    pBF: pointer to the BinaryFile object (loader)
00064  *                prog: program being decoded
00065  *                pbff: pointer to a BinaryFileFactory object (so the library can be unloaded)
00066  * RETURNS:       <N/a>
00067  *============================================================================*/
00068 FrontEnd::FrontEnd(BinaryFile *pBF, Prog* prog, BinaryFileFactory* pbff) : pBF(pBF), pbff(pbff), prog(prog)
00069 {}
00070 
00071 // Static function to instantiate an appropriate concrete front end
00072 FrontEnd* FrontEnd::instantiate(BinaryFile *pBF, Prog* prog, BinaryFileFactory* pbff) {
00073     switch(pBF->GetMachine()) {
00074         case MACHINE_PENTIUM:
00075             return new PentiumFrontEnd(pBF, prog, pbff);
00076         case MACHINE_SPARC:
00077             return new SparcFrontEnd(pBF, prog, pbff);
00078         case MACHINE_PPC:
00079             return new PPCFrontEnd(pBF, prog, pbff);
00080         case MACHINE_ST20:
00081             return new ST20FrontEnd(pBF, prog, pbff);
00082         default:
00083             std::cerr << "Machine architecture not supported!\n";
00084     }
00085     return NULL;
00086 }
00087 
00088 FrontEnd* FrontEnd::Load(const char *fname, Prog* prog) {
00089     BinaryFileFactory* pbff = new BinaryFileFactory;
00090     if (pbff == NULL) return NULL;
00091     BinaryFile *pBF = pbff->Load(fname);
00092     if (pBF == NULL) return NULL;
00093     return instantiate(pBF, prog, pbff);
00094 }
00095 
00096 // destructor
00097 FrontEnd::~FrontEnd() {
00098     if (pbff)
00099         pbff->UnLoad();         // Unload the BinaryFile library with dlclose() or FreeLibrary()
00100 }
00101 
00102 const char *FrontEnd::getRegName(int idx) { 
00103     std::map<std::string, int, std::less<std::string> >::iterator it;
00104     for (it = decoder->getRTLDict().RegMap.begin();  it != decoder->getRTLDict().RegMap.end(); it++)
00105         if ((*it).second == idx) 
00106             return (*it).first.c_str();
00107     return NULL;
00108 }
00109 
00110 int FrontEnd::getRegSize(int idx) {
00111     if (decoder->getRTLDict().DetRegMap.find(idx) == decoder->getRTLDict().DetRegMap.end())
00112         return 32;
00113     return decoder->getRTLDict().DetRegMap[idx].g_size();
00114 }
00115 
00116 bool FrontEnd::isWin32() {
00117     return pBF->GetFormat() == LOADFMT_PE;
00118 }
00119 
00120 bool FrontEnd::noReturnCallDest(const char *name)
00121 {
00122     return ((strcmp(name, "_exit") == 0) || (strcmp(name,   "exit") == 0) || (strcmp(name, "ExitProcess") == 0) || (strcmp(name, "abort") == 0) || (strcmp(name, "_assert") == 0));
00123 }
00124 
00125 // FIXME: Is this ever used? Need to pass a real pbff?
00126 FrontEnd *FrontEnd::createById(std::string &str, BinaryFile *pBF, Prog* prog) {
00127     if (str == "pentium")
00128         return new PentiumFrontEnd(pBF, prog, NULL);
00129     if (str == "sparc")
00130         return new SparcFrontEnd(pBF, prog, NULL);
00131     if (str == "ppc")
00132         return new PPCFrontEnd(pBF, prog, NULL);
00133     if (str == "st20")
00134         return new ST20FrontEnd(pBF, prog, NULL);
00135     return NULL;
00136 }
00137 
00138 void FrontEnd::readLibraryCatalog(const char *sPath) {
00139     std::ifstream inf(sPath);
00140     if (!inf.good()) {
00141         std::cerr << "can't open `" << sPath << "'\n";
00142         exit(1);
00143     }
00144 
00145     while (!inf.eof()) {
00146         std::string sFile;
00147         inf >> sFile;
00148         size_t j = sFile.find('#');
00149         if (j != (size_t)-1)
00150             sFile = sFile.substr(0, j);
00151         if (sFile.size() > 0 && sFile[sFile.size()-1] == '\n')
00152             sFile = sFile.substr(0, sFile.size()-1);
00153         if (sFile == "") continue;
00154         std::string sPath = Boomerang::get()->getProgPath() + "signatures/" + sFile;
00155         callconv cc = CONV_C;           // Most APIs are C calling convention
00156         if (sFile == "windows.h")   cc = CONV_PASCAL;       // One exception
00157         if (sFile == "mfc.h")       cc = CONV_THISCALL;     // Another exception
00158         readLibrarySignatures(sPath.c_str(), cc);
00159     }
00160     inf.close();
00161 }
00162 
00163 void FrontEnd::readLibraryCatalog() {
00164     librarySignatures.clear();
00165     std::string sList = Boomerang::get()->getProgPath() + "signatures/common.hs";
00166 
00167     readLibraryCatalog(sList.c_str());
00168     sList = Boomerang::get()->getProgPath() + "signatures/" + Signature::platformName(getFrontEndId()) + ".hs";
00169     readLibraryCatalog(sList.c_str());
00170     if (isWin32()) {
00171         sList = Boomerang::get()->getProgPath() + "signatures/win32.hs";
00172         readLibraryCatalog(sList.c_str());
00173     }
00174 }
00175 
00176 std::vector<ADDRESS> FrontEnd::getEntryPoints()
00177 {
00178     std::vector<ADDRESS> entrypoints;
00179     bool gotMain = false;
00180     ADDRESS a = getMainEntryPoint(gotMain);
00181     if (a != NO_ADDRESS)
00182         entrypoints.push_back(a);
00183     else {  // try some other tricks
00184         const char *fname = pBF->getFilename();
00185         // X11 Module
00186         if (!strcmp(fname + strlen(fname) - 6, "_drv.o")) {
00187             const char *p = fname + strlen(fname) - 6;
00188             while (*p != '/' && *p != '\\' && p != fname)
00189                 p--;
00190             if (p != fname) {
00191                 p++;
00192                 char *name = (char*)malloc(strlen(p) + 30);
00193                 strcpy(name, p);
00194                 name[strlen(name)-6] = 0;
00195                 strcat(name, "ModuleData");
00196                 ADDRESS a = pBF->GetAddressByName(name, true);
00197                 if (a != NO_ADDRESS) {
00198                     ADDRESS vers, setup, teardown;
00199                     vers = pBF->readNative4(a);
00200                     setup = pBF->readNative4(a+4);
00201                     teardown = pBF->readNative4(a+8);
00202                     if (setup) {
00203                         Type *ty = NamedType::getNamedType("ModuleSetupProc");
00204                         assert(ty->isFunc());
00205                         UserProc *proc = (UserProc*)prog->setNewProc(setup);
00206                         assert(proc);
00207                         Signature *sig = ty->asFunc()->getSignature()->clone();
00208                         const char *sym = pBF->SymbolByAddress(setup);
00209                         if (sym)
00210                             sig->setName(sym);
00211                         sig->setForced(true);
00212                         proc->setSignature(sig);
00213                         entrypoints.push_back(setup);
00214                     }
00215                     if (teardown) {
00216                         Type *ty = NamedType::getNamedType("ModuleTearDownProc");
00217                         assert(ty->isFunc());
00218                         UserProc *proc = (UserProc*)prog->setNewProc(teardown);
00219                         assert(proc);
00220                         Signature *sig = ty->asFunc()->getSignature()->clone();
00221                         const char *sym = pBF->SymbolByAddress(teardown);
00222                         if (sym)
00223                             sig->setName(sym);
00224                         sig->setForced(true);
00225                         proc->setSignature(sig);                        
00226                         entrypoints.push_back(teardown);
00227                     }
00228                 }
00229             }
00230         }
00231         // Linux kernel module
00232         if (!strcmp(fname + strlen(fname) - 3, ".ko")) {
00233             a = pBF->GetAddressByName("init_module");
00234             if (a != NO_ADDRESS)
00235                 entrypoints.push_back(a);
00236             a = pBF->GetAddressByName("cleanup_module");
00237             if (a != NO_ADDRESS)
00238                 entrypoints.push_back(a);
00239         }
00240     }
00241     return entrypoints;
00242 }
00243 
00244 void FrontEnd::decode(Prog* prog, bool decodeMain, const char *pname) {
00245     if (pname)
00246         prog->setName(pname);
00247 
00248     if (!decodeMain)
00249         return;
00250     
00251     Boomerang::get()->alert_start_decode(pBF->getLimitTextLow(), pBF->getLimitTextHigh() - pBF->getLimitTextLow());
00252 
00253     bool gotMain;
00254     ADDRESS a = getMainEntryPoint(gotMain);
00255     if (VERBOSE)
00256         LOG << "start: " << a << " gotmain: " << (gotMain ? "true" : "false") << "\n";
00257     if (a == NO_ADDRESS) {
00258         std::vector<ADDRESS> entrypoints = getEntryPoints();
00259         for (std::vector<ADDRESS>::iterator it = entrypoints.begin(); it != entrypoints.end(); it++)
00260             decode(prog, *it);
00261         return;
00262     }
00263 
00264     decode(prog, a);
00265     prog->setEntryPoint(a);
00266 
00267     if (gotMain) {
00268         static const char *mainName[] = { "main", "WinMain", "DriverEntry" };
00269         const char *name = pBF->SymbolByAddress(a);
00270         if (name == NULL)
00271             name = mainName[0];
00272         for (size_t i = 0; i < sizeof(mainName)/sizeof(char*); i++) {
00273             if (!strcmp(name, mainName[i])) {
00274                 Proc *proc = prog->findProc(a);
00275                 if (proc == NULL) {
00276                     if (VERBOSE)
00277                         LOG << "no proc found for address " << a << "\n";
00278                     return;
00279                 }
00280                 FuncType *fty = dynamic_cast<FuncType*>(Type::getNamedType(name));
00281                 if (fty == NULL)
00282                     LOG << "unable to find signature for known entrypoint " << name << "\n";
00283                 else {
00284                     proc->setSignature(fty->getSignature()->clone());
00285                     proc->getSignature()->setName(name);
00286                     //proc->getSignature()->setFullSig(true);       // Don't add or remove parameters
00287                     proc->getSignature()->setForced(true);          // Don't add or remove parameters
00288                 }
00289                 break;
00290             }
00291         }
00292     }
00293     return;
00294 }
00295 
00296 // Somehow, a == NO_ADDRESS has come to mean decode anything not already decoded
00297 void FrontEnd::decode(Prog *prog, ADDRESS a) {
00298     if (a != NO_ADDRESS) {
00299         prog->setNewProc(a);
00300         if (VERBOSE)
00301             LOG << "starting decode at address " << a << "\n";
00302         UserProc* p = (UserProc*)prog->findProc(a);
00303         if (p == NULL) {
00304             if (VERBOSE)
00305                 LOG << "no proc found at address " << a << "\n";
00306             return;
00307         }
00308         if (p->isLib()) {
00309             LOG << "NOT decoding library proc at address 0x" << a << "\n";
00310             return;
00311         }
00312         std::ofstream os;
00313         processProc(a, p, os);
00314         p->setDecoded();
00315 
00316     } else {                        // a == NO_ADDRESS
00317         bool change = true;
00318         while (change) {
00319             change = false;
00320             PROGMAP::const_iterator it;
00321             for (Proc *pProc = prog->getFirstProc(it); pProc != NULL; pProc = prog->getNextProc(it)) {
00322                 if (pProc->isLib()) continue;
00323                 UserProc *p = (UserProc*)pProc;
00324                 if (p->isDecoded()) continue;
00325 
00326                 // undecoded userproc.. decode it           
00327                 change = true;
00328                 std::ofstream os;
00329                 int res = processProc(p->getNativeAddress(), p, os);
00330                 if (res == 1)
00331                     p->setDecoded();
00332                 else
00333                     break;
00334                 // Break out of the loops if not decoding children
00335                 if (Boomerang::get()->noDecodeChildren)
00336                     break;
00337             }
00338             if (Boomerang::get()->noDecodeChildren)
00339                 break;
00340         }
00341     }
00342     prog->wellForm();
00343 }
00344 
00345 // a should be the address of a UserProc
00346 void FrontEnd::decodeOnly(Prog *prog, ADDRESS a) {
00347     UserProc* p = (UserProc*)prog->setNewProc(a);
00348     assert(!p->isLib());
00349     std::ofstream os;
00350     if (processProc(p->getNativeAddress(), p, os))
00351         p->setDecoded();
00352     prog->wellForm();
00353 }
00354 
00355 
00356 void FrontEnd::decodeFragment(UserProc* proc, ADDRESS a) {
00357     if (Boomerang::get()->traceDecoder)
00358         LOG << "decoding fragment at 0x" << a << "\n";
00359     std::ofstream os;
00360     processProc(a, proc, os, true);
00361 }
00362 
00363 DecodeResult& FrontEnd::decodeInstruction(ADDRESS pc) {
00364     if (pBF->GetSectionInfoByAddr(pc) == NULL) {
00365         LOG << "ERROR: attempted to decode outside any known segment " << pc << "\n";
00366         static DecodeResult invalid;
00367         invalid.reset();
00368         invalid.valid = false;
00369         return invalid;
00370     }
00371     return decoder->decodeInstruction(pc, pBF->getTextDelta());
00372 }
00373 
00374 /*==============================================================================
00375  * FUNCTION:       FrontEnd::readLibrarySignatures
00376  * OVERVIEW:       Read the library signatures from a file
00377  * PARAMETERS:     sPath: The file to read from
00378  *                 cc: the calling convention assumed
00379  * RETURNS:        <nothing>
00380  *============================================================================*/
00381 void FrontEnd::readLibrarySignatures(const char *sPath, callconv cc) {
00382     std::ifstream ifs;
00383 
00384     ifs.open(sPath);
00385 
00386     if (!ifs.good()) {
00387         std::cerr << "can't open `" << sPath << "'\n";
00388         exit(1);
00389     }
00390 
00391     AnsiCParser *p = new AnsiCParser(ifs, false);
00392     
00393     platform plat = getFrontEndId();
00394     p->yyparse(plat, cc);
00395 
00396     for (std::list<Signature*>::iterator it = p->signatures.begin(); it != p->signatures.end(); it++) {
00397 #if 0
00398         std::cerr << "readLibrarySignatures from " << sPath << ": " << (*it)->getName() << "\n";
00399 #endif
00400         librarySignatures[(*it)->getName()] = *it;
00401         (*it)->setSigFile(sPath);
00402     }
00403 
00404     delete p;
00405     ifs.close();
00406 }
00407 
00408 Signature *FrontEnd::getDefaultSignature(const char *name)
00409 {
00410     Signature *signature = NULL;
00411     // Get a default library signature
00412     if (isWin32())
00413         signature = Signature::instantiate(PLAT_PENTIUM, CONV_PASCAL, name);
00414     else {
00415         signature = Signature::instantiate(getFrontEndId(), CONV_C, name);
00416     } 
00417     return signature;
00418 }
00419 
00420 // get a library signature by name
00421 Signature *FrontEnd::getLibSignature(const char *name) {
00422     Signature *signature;
00423     // Look up the name in the librarySignatures map
00424     std::map<std::string, Signature*>::iterator it;
00425     it = librarySignatures.find(name);
00426     if (it == librarySignatures.end()) {
00427         LOG << "Unknown library function " << name << "\n";
00428         signature = getDefaultSignature(name);
00429     }
00430     else {
00431         // Don't clone here; cloned in CallStatement::setSigArguments
00432         signature = (*it).second;
00433         signature->setUnknown(false);
00434     }
00435     return signature;
00436 }
00437 
00438 /*==============================================================================
00439  * FUNCTION:      FrontEnd::processProc
00440  * OVERVIEW:      Process a procedure, given a native (source machine) address.
00441  * PARAMETERS:    address - the address at which the procedure starts
00442  *                pProc - the procedure object
00443  *                frag - if true, this is just a fragment of a procedure
00444  *                spec - if true, this is a speculative decode
00445  *                os - the output stream for .rtl output
00446  * NOTE:          This is a sort of generic front end. For many processors, this will be overridden
00447  *                  in the FrontEnd derived class, sometimes calling this function to do most of the work
00448  * RETURNS:       true for a good decode (no illegal instructions)
00449  *============================================================================*/
00450 bool FrontEnd::processProc(ADDRESS uAddr, UserProc* pProc, std::ofstream &os, bool frag /* = false */,
00451         bool spec /* = false */) {
00452     PBB pBB;                    // Pointer to the current basic block
00453 
00454     // just in case you missed it
00455     Boomerang::get()->alert_new(pProc);
00456 
00457     // We have a set of CallStatement pointers. These may be disregarded if this is a speculative decode
00458     // that fails (i.e. an illegal instruction is found). If not, this set will be used to add to the set of calls
00459     // to be analysed in the cfg, and also to call newProc()
00460     std::list<CallStatement*> callList;
00461 
00462     // Indicates whether or not the next instruction to be decoded is the lexical successor of the current one.
00463     // Will be true for all NCTs and for CTIs with a fall through branch.
00464     bool sequentialDecode = true;
00465 
00466     Cfg* pCfg = pProc->getCFG();
00467 
00468     // If this is a speculative decode, the second time we decode the same address, we get no cfg. Else an error.
00469     if (spec && (pCfg == 0))
00470         return false;
00471     assert(pCfg);
00472 
00473     // Initialise the queue of control flow targets that have yet to be decoded.
00474     targetQueue.initial(uAddr);
00475 
00476     // Clear the pointer used by the caller prologue code to access the last call rtl of this procedure
00477     //decoder.resetLastCall();
00478 
00479     // ADDRESS initAddr = uAddr;
00480     int nTotalBytes = 0;
00481     ADDRESS startAddr = uAddr;
00482     ADDRESS lastAddr = uAddr;
00483 
00484     while ((uAddr = targetQueue.nextAddress(pCfg)) != NO_ADDRESS) {
00485         // The list of RTLs for the current basic block
00486         std::list<RTL*>* BB_rtls = new std::list<RTL*>();
00487 
00488         // Keep decoding sequentially until a CTI without a fall through branch is decoded
00489         //ADDRESS start = uAddr;
00490         DecodeResult inst;
00491         while (sequentialDecode) {
00492 
00493             // Decode and classify the current source instruction
00494             if (Boomerang::get()->traceDecoder)
00495                 LOG << "*" << uAddr << "\t";
00496 
00497             // Decode the inst at uAddr.
00498             inst = decodeInstruction(uAddr);
00499 
00500             // If invalid and we are speculating, just exit
00501             if (spec && !inst.valid)
00502                 return false;
00503 
00504             // Need to construct a new list of RTLs if a basic block has just been finished but decoding is
00505             // continuing from its lexical successor
00506             if (BB_rtls == NULL)
00507                 BB_rtls = new std::list<RTL*>();
00508 
00509             RTL* pRtl = inst.rtl;
00510             if (inst.valid == false) {
00511                 // Alert the watchers to the problem
00512                 Boomerang::get()->alert_baddecode(uAddr);
00513 
00514                 // An invalid instruction. Most likely because a call did not return (e.g. call _exit()), etc.
00515                 // Best thing is to emit a INVALID BB, and continue with valid instructions
00516                 if (VERBOSE) {
00517                     LOG << "Warning: invalid instruction at " << uAddr << ": ";
00518                     // Emit the next 4 bytes for debugging
00519                     for (int ii=0; ii < 4; ii++)
00520                         LOG << (unsigned)(pBF->readNative1(uAddr + ii) & 0xFF) << " ";
00521                     LOG << "\n";
00522                 }
00523                 // Emit the RTL anyway, so we have the address and maybe some other clues
00524                 BB_rtls->push_back(new RTL(uAddr));  
00525                 pBB = pCfg->newBB(BB_rtls, INVALID, 0);
00526                 sequentialDecode = false; BB_rtls = NULL; continue;
00527             }
00528 
00529             // alert the watchers that we have decoded an instruction
00530             Boomerang::get()->alert_decode(uAddr, inst.numBytes);
00531             nTotalBytes += inst.numBytes;           
00532     
00533             // Check if this is an already decoded jump instruction (from a previous pass with propagation etc)
00534             // If so, we throw away the just decoded RTL (but we still may have needed to calculate the number
00535             // of bytes.. ick.)
00536             std::map<ADDRESS, RTL*>::iterator ff = previouslyDecoded.find(uAddr);
00537             if (ff != previouslyDecoded.end())
00538                 pRtl = ff->second;
00539 
00540             if (pRtl == NULL) {
00541                 // This can happen if an instruction is "cancelled", e.g. call to __main in a hppa program
00542                 // Just ignore the whole instruction
00543                 if (inst.numBytes > 0)
00544                     uAddr += inst.numBytes;
00545                 continue;
00546             }
00547 
00548             // Display RTL representation if asked
00549             if (Boomerang::get()->printRtl) {
00550                 std::ostringstream st;
00551                 pRtl->print(st);
00552                 LOG << st.str().c_str();
00553             }
00554     
00555             ADDRESS uDest;
00556 
00557             // For each Statement in the RTL
00558             //std::list<Statement*>& sl = pRtl->getList();
00559             std::list<Statement*> sl = pRtl->getList();
00560             // Make a copy (!) of the list. This is needed temporarily to work around the following problem.
00561             // We are currently iterating an RTL, which could be a return instruction. The RTL is passed to
00562             // createReturnBlock; if this is not the first return statement, it will get cleared, and this will
00563             // cause problems with the current iteration. The effects seem to be worse for MSVC/Windows.
00564             // This problem will likely be easier to cope with when the RTLs are removed, and there are special
00565             // Statements to mark the start of instructions (and their native address).
00566             // FIXME: However, this workaround breaks logic below where a GOTO is changed to a CALL followed by a return
00567             // if it points to the start of a known procedure
00568             std::list<Statement*>::iterator ss;
00569 #if 1
00570             for (ss = sl.begin(); ss != sl.end(); ss++) { // }
00571 #else
00572             // The counter is introduced because ss != sl.end() does not work as it should
00573             // FIXME: why? Does this really fix the problem?
00574             int counter = sl.size();
00575             for (ss = sl.begin(); counter > 0; ss++, counter--) {
00576 #endif
00577                 Statement* s = *ss;
00578                 s->setProc(pProc);      // let's do this really early!
00579                 if (refHints.find(pRtl->getAddress()) != refHints.end()) {
00580                     const char *nam = refHints[pRtl->getAddress()].c_str();
00581                     ADDRESS gu = prog->getGlobalAddr((char*)nam);
00582                     if (gu != NO_ADDRESS) {
00583                         s->searchAndReplace(new Const((int)gu), new Unary(opAddrOf, Location::global(nam, pProc)));
00584                     }
00585                 }
00586                 s->simplify();
00587                 GotoStatement* stmt_jump = static_cast<GotoStatement*>(s);
00588 
00589                 // Check for a call to an already existing procedure (including self recursive jumps), or to the PLT
00590                 // (note that a LibProc entry for the PLT function may not yet exist)
00591                 ADDRESS dest;
00592                 Proc* proc;
00593                 if (s->getKind() == STMT_GOTO) {
00594                     dest = stmt_jump->getFixedDest();
00595                     if (dest != NO_ADDRESS) {
00596                         proc = prog->findProc(dest);
00597                         if (proc == NULL) {
00598                             if (pBF->IsDynamicLinkedProc(dest))
00599                                 proc = prog->setNewProc(dest);
00600                         }
00601                         if (proc != NULL && proc != (Proc*)-1) {
00602                             s = new CallStatement();
00603                             CallStatement *call = static_cast<CallStatement*>(s);
00604                             call->setDest(dest);
00605                             call->setDestProc(proc);
00606                             call->setReturnAfterCall(true);
00607                             // also need to change it in the actual RTL
00608                             std::list<Statement*>::iterator ss1 = ss;
00609                             ss1++;
00610                             assert(ss1 == sl.end());
00611                             pRtl->replaceLastStmt(s);
00612                             *ss = s;
00613                         }
00614                     }
00615                 }
00616 
00617                 switch (s->getKind())
00618                 {
00619 
00620                 case STMT_GOTO: {
00621                     uDest = stmt_jump->getFixedDest();
00622     
00623                     // Handle one way jumps and computed jumps separately
00624                     if (uDest != NO_ADDRESS) {
00625 
00626                         BB_rtls->push_back(pRtl);
00627                         sequentialDecode = false;
00628 
00629                         pBB = pCfg->newBB(BB_rtls,ONEWAY,1);
00630                         BB_rtls = NULL;     // Clear when make new BB
00631 
00632                         // Exit the switch now if the basic block already existed
00633                         if (pBB == 0) {
00634                             break;
00635                         }
00636 
00637                         // Add the out edge if it is to a destination within the
00638                         // procedure
00639                         if (uDest < pBF->getLimitTextHigh()) {
00640                             targetQueue.visit(pCfg, uDest, pBB);
00641                             pCfg->addOutEdge(pBB, uDest, true);
00642                         }
00643                         else {
00644                             LOG << "Error: Instruction at " << uAddr << " branches beyond end of section, to "
00645                                 << uDest << "\n";
00646                         }
00647                     }
00648                     break;
00649                 }
00650 
00651                 case STMT_CASE: {
00652                     Exp* pDest = stmt_jump->getDest();
00653                     if (pDest == NULL) {                // Happens if already analysed (now redecoding)
00654                         // SWITCH_INFO* psi = ((CaseStatement*)stmt_jump)->getSwitchInfo();
00655                         BB_rtls->push_back(pRtl);
00656                         pBB = pCfg->newBB(BB_rtls, NWAY, 0);    // processSwitch will update num outedges
00657                         pBB->processSwitch(pProc);      // decode arms, set out edges, etc
00658                         sequentialDecode = false;       // Don't decode after the jump
00659                         BB_rtls = NULL;                 // New RTLList for next BB
00660                         break;                          // Just leave it alone
00661                     }
00662                     // Check for indirect calls to library functions, especially in Win32 programs
00663                     if (pDest && pDest->getOper() == opMemOf &&
00664                             pDest->getSubExp1()->getOper() == opIntConst && 
00665                             pBF->IsDynamicLinkedProcPointer(((Const*)pDest->getSubExp1())->getAddr())) {
00666                         if (VERBOSE)
00667                             LOG << "jump to a library function: " << stmt_jump << ", replacing with a call/ret.\n";
00668                         // jump to a library function
00669                         // replace with a call ret
00670                         std::string func = pBF->GetDynamicProcName(
00671                             ((Const*)stmt_jump->getDest()->getSubExp1())->getAddr());
00672                         CallStatement *call = new CallStatement;
00673                         call->setDest(stmt_jump->getDest()->clone());
00674                         LibProc *lp = pProc->getProg()->getLibraryProc(func.c_str());
00675                         if (lp == NULL)
00676                             LOG << "getLibraryProc returned NULL, aborting\n";
00677                         assert(lp);
00678                         call->setDestProc(lp);
00679                         std::list<Statement*>* stmt_list = new std::list<Statement*>;
00680                         stmt_list->push_back(call);
00681                         BB_rtls->push_back(new RTL(pRtl->getAddress(), stmt_list));
00682                         pBB = pCfg->newBB(BB_rtls, CALL, 1);
00683                         appendSyntheticReturn(pBB, pProc, pRtl);
00684                         sequentialDecode = false;
00685                         BB_rtls = NULL;
00686                         if (pRtl->getAddress() == pProc->getNativeAddress()) {
00687                             // it's a thunk
00688                             // Proc *lp = prog->findProc(func.c_str());
00689                             func = std::string("__imp_") + func;
00690                             pProc->setName(func.c_str());
00691                             //lp->setName(func.c_str());
00692                             Boomerang::get()->alert_update_signature(pProc);
00693                         }
00694                         callList.push_back(call);
00695                         ss = sl.end(); ss--;    // get out of the loop
00696                         break;
00697                     }
00698                     BB_rtls->push_back(pRtl);
00699                     // We create the BB as a COMPJUMP type, then change to an NWAY if it turns out to be a switch stmt
00700                     pBB = pCfg->newBB(BB_rtls, COMPJUMP, 0);
00701                     LOG << "COMPUTED JUMP at " << uAddr << ", pDest = " << pDest << "\n";
00702                     if (Boomerang::get()->noDecompile) {
00703                         // try some hacks
00704                         if (pDest->isMemOf() && pDest->getSubExp1()->getOper() == opPlus &&
00705                                 pDest->getSubExp1()->getSubExp2()->isIntConst()) {
00706                             // assume subExp2 is a jump table
00707                             ADDRESS jmptbl = ((Const*)pDest->getSubExp1()->getSubExp2())->getInt();
00708                             unsigned int i;
00709                             for (i = 0; ; i++) {
00710                                 ADDRESS uDest = pBF->readNative4(jmptbl + i * 4);
00711                                 if (pBF->getLimitTextLow() <= uDest && uDest < pBF->getLimitTextHigh()) {
00712                                     LOG << "  guessed uDest " << uDest << "\n";
00713                                     targetQueue.visit(pCfg, uDest, pBB);
00714                                     pCfg->addOutEdge(pBB, uDest, true);
00715                                 } else
00716                                     break;
00717                             }
00718                             pBB->updateType(NWAY, i);
00719                         }
00720                     }
00721                     sequentialDecode = false;
00722                     BB_rtls = NULL;     // New RTLList for next BB
00723                     break;
00724                 }
00725 
00726 
00727                 case STMT_BRANCH: {
00728                     uDest = stmt_jump->getFixedDest();
00729                     BB_rtls->push_back(pRtl);
00730                     pBB = pCfg->newBB(BB_rtls, TWOWAY, 2);
00731 
00732                     // Stop decoding sequentially if the basic block already existed otherwise complete the basic block
00733                     if (pBB == 0)
00734                         sequentialDecode = false;
00735                     else {
00736 
00737                         // Add the out edge if it is to a destination within the procedure
00738                         if (uDest < pBF->getLimitTextHigh()) {
00739                             targetQueue.visit(pCfg, uDest, pBB);
00740                             pCfg->addOutEdge(pBB, uDest, true);
00741                         }
00742                         else {
00743                             LOG << "Error: Instruction at " << uAddr << " branches beyond end of section, to "
00744                                 << uDest << "\n";
00745                         }
00746 
00747                         // Add the fall-through outedge
00748                         pCfg->addOutEdge(pBB, uAddr + inst.numBytes); 
00749                     }
00750 
00751                     // Create the list of RTLs for the next basic block and continue with the next instruction.
00752                     BB_rtls = NULL;
00753                     break;
00754                 }
00755 
00756                 case STMT_CALL: {
00757                     CallStatement* call = static_cast<CallStatement*>(s);
00758                     
00759                     // Check for a dynamic linked library function
00760                     if (call->getDest()->getOper() == opMemOf &&
00761                             call->getDest()->getSubExp1()->getOper() == opIntConst &&
00762                             pBF->IsDynamicLinkedProcPointer(((Const*)call->getDest()->getSubExp1())->getAddr())) {
00763                         // Dynamic linked proc pointers are treated as static.
00764                         const char *nam = pBF->GetDynamicProcName( ((Const*)call->getDest()->getSubExp1())->getAddr());
00765                         Proc *p = pProc->getProg()->getLibraryProc(nam);
00766                         call->setDestProc(p);
00767                         call->setIsComputed(false);
00768                     }
00769 
00770                     // Is the called function a thunk calling a library function?
00771                     // A "thunk" is a function which only consists of: "GOTO library_function"
00772                     if( call && call->getFixedDest() != NO_ADDRESS ) {
00773                         // Get the address of the called function.
00774                         ADDRESS callAddr=call->getFixedDest();
00775                         // It should not be in the PLT either, but getLimitTextHigh() takes this into account
00776                         if (callAddr < pBF->getLimitTextHigh()) {
00777                             // Decode it.
00778                             DecodeResult decoded=decodeInstruction(callAddr);
00779                             if (decoded.valid) { // is the instruction decoded succesfully?
00780                                 // Yes, it is. Create a Statement from it.
00781                                 RTL *rtl = decoded.rtl;
00782                                 Statement* first_statement = *rtl->getList().begin();
00783                                 if (first_statement) {
00784                                     first_statement->setProc(pProc);
00785                                     first_statement->simplify();
00786                                     GotoStatement* stmt_jump = static_cast<GotoStatement*>(first_statement);
00787                                     // In fact it's a computed (looked up) jump, so the jump seems to be a case
00788                                     // statement.
00789                                     if ( first_statement->getKind() == STMT_CASE &&
00790                                         stmt_jump->getDest()->getOper() == opMemOf &&
00791                                         stmt_jump->getDest()->getSubExp1()->getOper() == opIntConst &&
00792                                         pBF->IsDynamicLinkedProcPointer(((Const*)stmt_jump->getDest()->getSubExp1())->
00793                                             getAddr())) // Is it an "DynamicLinkedProcPointer"?
00794                                     {
00795                                         // Yes, it's a library function. Look up it's name.
00796                                         ADDRESS a = ((Const*)stmt_jump->getDest()->getSubExp1())->getAddr();
00797                                         const char *nam = pBF->GetDynamicProcName(a);
00798                                         // Assign the proc to the call
00799                                         Proc *p = pProc->getProg()->getLibraryProc(nam);
00800                                         if (call->getDestProc()) {
00801                                             // prevent unnecessary __imp procs
00802                                             prog->removeProc(call->getDestProc()->getName());
00803                                         }
00804                                         call->setDestProc(p);
00805                                         call->setIsComputed(false);
00806                                         call->setDest(Location::memOf(new Const(a)));
00807                                     }
00808                                 }
00809                             }
00810                         }
00811                     }
00812 
00813                     // Treat computed and static calls separately
00814                     if (call->isComputed()) {
00815                         BB_rtls->push_back(pRtl);
00816                         pBB = pCfg->newBB(BB_rtls, COMPCALL, 1);
00817 
00818                         // Stop decoding sequentially if the basic block already
00819                         // existed otherwise complete the basic block
00820                         if (pBB == 0)
00821                             sequentialDecode = false;
00822                         else
00823                             pCfg->addOutEdge(pBB, uAddr + inst.numBytes);
00824                         // Add this call to the list of calls to analyse. We won't
00825                         // be able to analyse it's callee(s), of course.
00826                         callList.push_back(call);
00827                     }
00828                     else {      // Static call
00829                         // Find the address of the callee.
00830                         ADDRESS uNewAddr = call->getFixedDest();
00831 
00832                         // Calls with 0 offset (i.e. call the next instruction) are simply pushing the PC to the
00833                         // stack. Treat these as non-control flow instructions and continue.
00834                         if (uNewAddr == uAddr + inst.numBytes)
00835                             break;
00836 
00837                         // Call the virtual helper function. If implemented, will check for machine specific funcion
00838                         // calls
00839                         if (helperFunc(uNewAddr, uAddr, BB_rtls)) {
00840                             // We have already added to BB_rtls
00841                             pRtl = NULL;        // Discard the call semantics
00842                             break;
00843                         }
00844 
00845                         BB_rtls->push_back(pRtl);
00846 
00847                         // Add this non computed call site to the set of call sites which need to be analysed later.
00848                         //pCfg->addCall(call);
00849                         callList.push_back(call);
00850 
00851                         // Record the called address as the start of a new procedure if it didn't already exist.
00852                         if (uNewAddr && uNewAddr != NO_ADDRESS && pProc->getProg()->findProc(uNewAddr) == NULL) {
00853                             callList.push_back(call);
00854                             //newProc(pProc->getProg(), uNewAddr);
00855                             if (Boomerang::get()->traceDecoder)
00856                                 LOG << "p" << uNewAddr << "\t";
00857                         }
00858 
00859                         // Check if this is the _exit or exit function. May prevent us from attempting to decode
00860                         // invalid instructions, and getting invalid stack height errors
00861                         const char* name = pBF->SymbolByAddress(uNewAddr);
00862                         if (name == NULL && call->getDest()->isMemOf() && 
00863                                             call->getDest()->getSubExp1()->isIntConst()) {
00864                             ADDRESS a = ((Const*)call->getDest()->getSubExp1())->getInt();
00865                             if (pBF->IsDynamicLinkedProcPointer(a))
00866                                 name = pBF->GetDynamicProcName(a);
00867                         }   
00868                         if (name && noReturnCallDest(name)) {
00869                             // Make sure it has a return appended (so there is only one exit from the function)
00870                             //call->setReturnAfterCall(true);       // I think only the Sparc frontend cares
00871                             // Create the new basic block
00872                             pBB = pCfg->newBB(BB_rtls, CALL, 1);
00873                             appendSyntheticReturn(pBB, pProc, pRtl);
00874 
00875                             // Stop decoding sequentially
00876                             sequentialDecode = false;
00877                         }
00878                         else {
00879                             // Create the new basic block
00880                             pBB = pCfg->newBB(BB_rtls, CALL, 1);
00881 
00882                             if (call->isReturnAfterCall()) {
00883                                 // Constuct the RTLs for the new basic block
00884                                 std::list<RTL*>* rtls = new std::list<RTL*>();
00885                                 // The only RTL in the basic block is one with a ReturnStatement
00886                                 std::list<Statement*>* sl = new std::list<Statement*>;
00887                                 sl->push_back(new ReturnStatement());
00888                                 rtls->push_back(new RTL(pRtl->getAddress()+1, sl));
00889         
00890                                 BasicBlock* returnBB = pCfg->newBB(rtls, RET, 0);
00891                                 // Add out edge from call to return
00892                                 pCfg->addOutEdge(pBB, returnBB);
00893                                 // Put a label on the return BB (since it's an orphan); a jump will be reqd
00894                                 pCfg->setLabel(returnBB);
00895                                 pBB->setJumpReqd();
00896                                 // Mike: do we need to set return locations?
00897                                 // This ends the function
00898                                 sequentialDecode = false;
00899                             }
00900                             else {
00901                                 // Add the fall through edge if the block didn't
00902                                 // already exist
00903                                 if (pBB != NULL)
00904                                     pCfg->addOutEdge(pBB, uAddr+inst.numBytes);
00905                             }
00906                         }
00907                     }
00908 
00909                     extraProcessCall(call, BB_rtls);
00910 
00911                     // Create the list of RTLs for the next basic block and continue with the next instruction.
00912                     BB_rtls = NULL;
00913                     break;  
00914                 }
00915 
00916                 case STMT_RET: {
00917                     // Stop decoding sequentially
00918                     sequentialDecode = false;
00919 
00920                     pBB = createReturnBlock(pProc, BB_rtls, pRtl);
00921 
00922                     // Create the list of RTLs for the next basic block and
00923                     // continue with the next instruction.
00924                     BB_rtls = NULL;     // New RTLList for next BB
00925                 }
00926                 break;
00927 
00928                 case STMT_BOOLASSIGN:
00929                     // This is just an ordinary instruction; no control transfer
00930                     // Fall through
00931                 case STMT_JUNCTION:
00932                     // FIXME: Do we need to do anything here?
00933                 case STMT_ASSIGN:
00934                 case STMT_PHIASSIGN:
00935                 case STMT_IMPASSIGN:
00936                 case STMT_IMPREF:
00937                     // Do nothing
00938                     break;
00939         
00940                 } // switch (s->getKind())
00941             }
00942             if (BB_rtls && pRtl)
00943                 // If non null, we haven't put this RTL into a the current BB as yet
00944                 BB_rtls->push_back(pRtl);
00945 
00946             if (inst.reDecode)
00947                 // Special case: redecode the last instruction, without advancing uAddr by numBytes
00948                 continue;
00949             uAddr += inst.numBytes;
00950             if (uAddr > lastAddr)
00951                 lastAddr = uAddr;
00952 
00953             // If sequentially decoding, check if the next address happens to be the start of an existing BB. If so,
00954             // finish off the current BB (if any RTLs) as a fallthrough, and no need to decode again (unless it's an
00955             // incomplete BB, then we do decode it).
00956             // In fact, mustn't decode twice, because it will muck up the coverage, but also will cause subtle problems
00957             // like add a call to the list of calls to be processed, then delete the call RTL (e.g. Pentium 134.perl
00958             // benchmark)
00959             if (sequentialDecode && pCfg->existsBB(uAddr)) {
00960                 // Create the fallthrough BB, if there are any RTLs at all
00961                 if (BB_rtls) {
00962                     PBB pBB = pCfg->newBB(BB_rtls, FALL, 1);
00963                     // Add an out edge to this address
00964                     if (pBB) {
00965                         pCfg->addOutEdge(pBB, uAddr);
00966                         BB_rtls = NULL;         // Need new list of RTLs
00967                     }
00968                 }
00969                 // Pick a new address to decode from, if the BB is complete
00970                 if (!pCfg->isIncomplete(uAddr))
00971                     sequentialDecode = false;
00972             }
00973         }   // while sequentialDecode
00974 
00975         // Add this range to the coverage
00976 //        pProc->addRange(start, uAddr);
00977 
00978         // Must set sequentialDecode back to true
00979         sequentialDecode = true;
00980 
00981     }   // while nextAddress() != NO_ADDRESS
00982 
00983     //ProgWatcher *w = prog->getWatcher();
00984     //if (w)
00985     //    w->alert_done(pProc, initAddr, lastAddr, nTotalBytes);
00986 
00987     // Add the callees to the set of CallStatements, and also to the Prog object
00988     std::list<CallStatement*>::iterator it;
00989     for (it = callList.begin(); it != callList.end(); it++) {
00990         ADDRESS dest = (*it)->getFixedDest();
00991         // Don't speculatively decode procs that are outside of the main text section, apart from dynamically
00992         // linked ones (in the .plt)
00993         if (pBF->IsDynamicLinkedProc(dest) || !spec || (dest < pBF->getLimitTextHigh())) {
00994             pCfg->addCall(*it);
00995             // Don't visit the destination of a register call
00996             Proc *np = (*it)->getDestProc();
00997             if (np == NULL && dest != NO_ADDRESS) {
00998                 //np = newProc(pProc->getProg(), dest);
00999                 np = pProc->getProg()->setNewProc(dest);
01000             }
01001             if (np != NULL) {
01002                 np->setFirstCaller(pProc);
01003                 pProc->addCallee(np);
01004             }           
01005         }
01006     }
01007 
01008     Boomerang::get()->alert_decode(pProc, startAddr, lastAddr, nTotalBytes);
01009 
01010     if (VERBOSE)
01011         LOG << "finished processing proc " << pProc->getName() << " at address " << pProc->getNativeAddress() << "\n";
01012 
01013     return true;
01014 }
01015 
01016 /*==============================================================================
01017  * FUNCTION:    FrontEnd::getInst
01018  * OVERVIEW:    Fetch the smallest (nop-sized) instruction, in an endianness independent manner
01019  * NOTE:        Frequently overridden
01020  * PARAMETERS:  addr - host address to getch from
01021  * RETURNS:     An integer with the instruction in it
01022  *============================================================================*/
01023 int FrontEnd::getInst(int addr)
01024 {
01025     return (int)(*(unsigned char*)addr);
01026 }
01027 
01028 
01029 /*==============================================================================
01030  * FUNCTION:    TargetQueue::visit
01031  * OVERVIEW:    Visit a destination as a label, i.e. check whether we need to queue it as a new BB to create later.
01032  *              Note: at present, it is important to visit an address BEFORE an out edge is added to that address.
01033  *              This is because adding an out edge enters the address into the Cfg's BB map, and it looks like the
01034  *              BB has already been visited, and it gets overlooked. It would be better to have a scheme whereby
01035  *              the order of calling these functions (i.e. visit() and AddOutEdge()) did not matter.
01036  * PARAMETERS:  pCfg - the enclosing CFG
01037  *              uNewAddr - the address to be checked
01038  *              pNewBB - set to the lower part of the BB if the address
01039  *              already exists as a non explicit label (BB has to be split)
01040  * RETURNS:     <nothing>
01041  *============================================================================*/
01042 void TargetQueue::visit(Cfg* pCfg, ADDRESS uNewAddr, PBB& pNewBB) {
01043     // Find out if we've already parsed the destination
01044     bool bParsed = pCfg->label(uNewAddr, pNewBB);
01045     // Add this address to the back of the local queue,
01046     // if not already processed
01047     if (!bParsed) {
01048         targets.push(uNewAddr);
01049         if (Boomerang::get()->traceDecoder)
01050             LOG << ">" << uNewAddr << "\t";
01051     }
01052 }
01053 
01054 /*==============================================================================
01055  * FUNCTION:    TargetQueue::initial
01056  * OVERVIEW:    Seed the queue with an initial address
01057  * NOTE:        Can be some targets already in the queue now
01058  * PARAMETERS:  uAddr: Native address to seed the queue with
01059  * RETURNS:     <nothing>
01060  *============================================================================*/
01061 void TargetQueue::initial(ADDRESS uAddr) {
01062     targets.push(uAddr);
01063 }
01064 
01065 /*==============================================================================
01066  * FUNCTION:          TergetQueue::nextAddress
01067  * OVERVIEW:          Return the next target from the queue of non-processed
01068  *                    targets.
01069  * PARAMETERS:        cfg - the enclosing CFG
01070  * RETURNS:           The next address to process, or NO_ADDRESS if none
01071  *                      (targets is empty)
01072  *============================================================================*/
01073 ADDRESS TargetQueue::nextAddress(Cfg* cfg) {
01074     while (!targets.empty())
01075     {
01076         ADDRESS address = targets.front();
01077         targets.pop();
01078         if (Boomerang::get()->traceDecoder)
01079             LOG << "<" << address << "\t";
01080 
01081         // If no label there at all, or if there is a BB, it's incomplete, then we can parse this address next
01082         if (!cfg->existsBB(address) || cfg->isIncomplete(address))
01083             return address;
01084     }
01085     return NO_ADDRESS;
01086 }
01087 
01088 void TargetQueue::dump() {
01089     std::queue<ADDRESS> copy(targets);
01090     while (!copy.empty()) {
01091         ADDRESS a = copy.front();
01092         copy.pop();
01093         std::cerr << std::hex << a << ", ";
01094     }
01095     std::cerr << std::dec << "\n";
01096 }
01097 
01098 
01099 /*==============================================================================
01100  * FUNCTION:      decodeRtl
01101  * OVERVIEW:      Decode the RTL at the given address
01102  * PARAMETERS:    address - native address of the instruction
01103  *                delta - difference between host and native addresses
01104  *                decoder - decoder object
01105  * NOTE:          Only called from findCoverage()
01106  * RETURNS:       a pointer to the decoded RTL
01107  *============================================================================*/
01108 RTL* decodeRtl(ADDRESS address, int delta, NJMCDecoder* decoder) {
01109     DecodeResult inst = 
01110         decoder->decodeInstruction(address, delta);
01111 
01112     RTL*    rtl = inst.rtl;
01113 
01114     return rtl;
01115 }
01116 
01117 /*==============================================================================
01118  * FUNCTION:    FrontEnd::getProg
01119  * OVERVIEW:    Get a Prog object (mainly for testing and not decoding)
01120  * PARAMETERS:  None
01121  * RETURNS:     Pointer to a Prog object (with pFE and pBF filled in)
01122  *============================================================================*/
01123 Prog* FrontEnd::getProg() {
01124     return prog;
01125 }
01126 
01127 /*==============================================================================
01128  * FUNCTION:    createReturnBlock
01129  * OVERVIEW:    Create a Return or a Oneway BB if a return statement already exists
01130  * PARAMETERS:  pProc: pointer to enclosing UserProc
01131  *              BB_rtls: list of RTLs for the current BB (not including pRtl)
01132  *              pRtl: pointer to the current RTL with the semantics for the return statement (including a
01133  *                  ReturnStatement as the last statement)
01134  * RETURNS:     Pointer to the newly created BB
01135  *============================================================================*/
01136 PBB FrontEnd::createReturnBlock(UserProc* pProc, std::list<RTL*>* BB_rtls, RTL* pRtl) {
01137     Cfg* pCfg = pProc->getCFG();
01138     PBB pBB;
01139     // Add the RTL to the list; this has the semantics for the return instruction as well as the ReturnStatement
01140     // The last Statement may get replaced with a GotoStatement
01141     if (BB_rtls == NULL) BB_rtls = new std::list<RTL*>;     // In case no other semantics
01142     BB_rtls->push_back(pRtl);
01143     ADDRESS retAddr = pProc->getTheReturnAddr();
01144     // LOG << "retAddr = " << retAddr << " rtl = " << pRtl->getAddress() << "\n";
01145     if (retAddr == NO_ADDRESS) {
01146         // Create the basic block
01147         pBB = pCfg->newBB(BB_rtls, RET, 0);
01148         Statement* s = pRtl->getList().back();      // The last statement should be the ReturnStatement
01149         pProc->setTheReturnAddr((ReturnStatement*)s, pRtl->getAddress());
01150     } else {
01151         // We want to replace the *whole* RTL with a branch to THE first return's RTL. There can sometimes be extra
01152         // semantics associated with a return (e.g. Pentium return adds to the stack pointer before setting %pc and
01153         // branching). Other semantics (e.g. SPARC returning a value as part of the restore instruction) are assumed to
01154         // appear in a previous RTL. It is assumed that THE return statement will have the same semantics (NOTE: may
01155         // not always be valid). To avoid this assumption, we need branches to statements, not just to native addresses
01156         // (RTLs).
01157         PBB retBB = pProc->getCFG()->findRetNode();
01158         assert(retBB);
01159         if (retBB->getFirstStmt()->isReturn()) {
01160             // ret node has no semantics, clearly we need to keep ours
01161             pRtl->deleteLastStmt();
01162         } else
01163             pRtl->clear();
01164         pRtl->appendStmt(new GotoStatement(retAddr));
01165         try {
01166             pBB = pCfg->newBB(BB_rtls, ONEWAY, 1);
01167             // if BB already exists but is incomplete, exception is thrown
01168             pCfg->addOutEdge(pBB, retAddr, true);
01169             // Visit the return instruction. This will be needed in most cases to split the return BB (if it has other
01170             // instructions before the return instruction).
01171             targetQueue.visit(pCfg, retAddr, pBB);
01172         } catch(Cfg::BBAlreadyExistsError &) {
01173             if (VERBOSE)
01174                 LOG << "not visiting " << retAddr << " due to exception\n";
01175         }
01176     }
01177     return pBB;
01178 }
01179 
01180 // Add a synthetic return instruction (or branch to the existing return instruction).
01181 // NOTE: the call BB should be created with one out edge (the return or branch BB)
01182 void FrontEnd::appendSyntheticReturn(PBB pCallBB, UserProc* pProc, RTL* pRtl) {
01183     ReturnStatement *ret = new ReturnStatement();
01184     std::list<RTL*> *ret_rtls = new std::list<RTL*>();
01185     std::list<Statement*>* stmt_list = new std::list<Statement*>;
01186     stmt_list->push_back(ret);
01187     PBB pret = createReturnBlock(pProc, ret_rtls, new RTL(pRtl->getAddress()+1, stmt_list));
01188     pret->addInEdge(pCallBB);
01189     pCallBB->setOutEdge(0, pret);
01190 }

Generated on Tue Sep 19 21:18:24 2006 for Boomerang by  doxygen 1.4.6