boomerang.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2002-2006, Mike Van Emmerik and Trent Waddington
00003  */
00004 /*==============================================================================
00005  * FILE:       boomerang.cpp
00006  * OVERVIEW:   Command line processing for the Boomerang decompiler
00007  *============================================================================*/
00008 /*
00009  * $Revision: 1.185 $   // 1.115.2.5
00010  *
00011  * 28 Jan 05 - G. Krol: Separated -h output into sections and neatened
00012  * 02 Sep 06 - Mike: introduced USE_XML to make it easy to disable use of the expat library
00013 */
00014 
00015 #define VERSION "alpha 0.3.1 09/Sep/2006"
00016 
00017 #if __CYGWIN__
00018 #define USE_XML 0           // Cygwin has a weird problem that causes libBinaryFile.dll not to load if the expat library
00019                             // is used. Note that other Windows versions require expat.
00020 #else                       // For all platforms other than Cygwin:
00021 #define USE_XML 0           // Set to 0 to not use the expat library for XML loading and saving
00022 #endif
00023 
00024 #include <iostream>
00025 #include <fstream>
00026 #include <time.h>
00027 #ifdef _WIN32
00028 #include <direct.h>         // mkdir under Windows
00029 #else
00030 #include <sys/stat.h>       // For mkdir
00031 #include <unistd.h>         // For unlink
00032 #include <signal.h>
00033 #endif
00034 #if defined(_MSC_VER) || defined(__MINGW32__)
00035 #include <windows.h>
00036 #endif
00037 #include "prog.h"
00038 #include "proc.h"
00039 #include "BinaryFile.h"
00040 #include "frontend.h"
00041 #include "hllcode.h"
00042 #include "codegen/chllcode.h"
00043 //#include "transformer.h"
00044 #include "boomerang.h"
00045 #include "log.h"
00046 #if USE_XML
00047 #include "xmlprogparser.h"
00048 #endif
00049 #if defined(_MSC_VER) && _MSC_VER >= 1400
00050 #pragma warning(disable:4996)       // Warnings about e.g. _strdup deprecated in VS 2005
00051 #endif
00052 
00053 
00054 // For the -nG switch to disable the garbage collector
00055 #include "gc.h"
00056 
00057 Boomerang *Boomerang::boomerang = NULL;
00058 
00059 /**
00060  * Initializes the Boomerang object.
00061  * The default settings are:
00062  * - All options disabled
00063  * - Infinite propagations
00064  * - A maximum memory depth of 99
00065  * - The path to the executable is "./"
00066  * - The output directory is "./output/"
00067  */
00068 Boomerang::Boomerang() : logger(NULL), vFlag(false), printRtl(false), 
00069     noBranchSimplify(false), noRemoveNull(false), noLocals(false),
00070     noRemoveLabels(false), noDataflow(false), noDecompile(false), stopBeforeDecompile(false),
00071     traceDecoder(false), dotFile(NULL), numToPropagate(-1),
00072     noPromote(false), propOnlyToAll(false), debugGen(false),
00073     maxMemDepth(99), debugSwitch(false), noParameterNames(false), debugLiveness(false),
00074     stopAtDebugPoints(false), debugTA(false), decodeMain(true), printAST(false), dumpXML(false),
00075     noRemoveReturns(false), debugDecoder(false), decodeThruIndCall(false), ofsIndCallReport(NULL),
00076     noDecodeChildren(false), debugProof(false), debugUnused(false),
00077     loadBeforeDecompile(false), saveBeforeDecompile(false),
00078     noProve(false), noChangeSignatures(false), conTypeAnalysis(false), dfaTypeAnalysis(true),
00079     propMaxDepth(3), generateCallGraph(false), generateSymbols(false), noGlobals(false), assumeABI(false),
00080     experimental(false), minsToStopAfter(0)
00081 {
00082     progPath = "./";
00083     outputPath = "./output/";
00084 }
00085 
00086 /**
00087  * Returns the Log object associated with the object.
00088  */
00089 Log &Boomerang::log() {
00090     return *logger;
00091 }
00092 
00093 /**
00094  * Sets the outputfile to be the file "log" in the default output directory.
00095  */
00096 FileLogger::FileLogger() : out((Boomerang::get()->getOutputPath() + "log").c_str()) {
00097 }
00098 
00099 /**
00100  * Returns the HLLCode for the given proc.
00101  */
00102 HLLCode *Boomerang::getHLLCode(UserProc *p) {
00103     return new CHLLCode(p);
00104 }
00105 
00106 /**
00107  * Prints a short usage statement.
00108  */
00109 void Boomerang::usage() {
00110     std::cout << "Usage: boomerang [ switches ] <program>" << std::endl;
00111     std::cout << "boomerang -h for switch help" << std::endl;
00112     exit(1);
00113 }
00114 
00115 /**
00116  * Prints help for the interactive mode.
00117  */
00118 void Boomerang::helpcmd() {
00119     // Column 98 of this source file is column 80 of output (don't use tabs)
00120     //            ____.____1____.____2____.____3____.____4____.____5____.____6____.____7____.____8
00121     std::cout << "Available commands (for use with -k):\n";
00122     std::cout << "  decode                             : Loads and decodes the specified binary.\n";
00123     std::cout << "  decompile [proc]                   : Decompiles the program or specified proc.\n";
00124     std::cout << "  codegen [cluster]                  : Generates code for the program or a\n";
00125     std::cout << "                                       specified cluster.\n";
00126     std::cout << "  move proc <proc> <cluster>         : Moves the specified proc to the specified\n";
00127     std::cout << "                                       cluster.\n";
00128     std::cout << "  move cluster <cluster> <parent>    : Moves the specified cluster to the\n";
00129     std::cout << "                                       specified parent cluster.\n";
00130     std::cout << "  add cluster <cluster> [parent]     : Adds a new cluster to the root/specified\n";
00131     std::cout << "                                       cluster.\n";
00132     std::cout << "  delete cluster <cluster>           : Deletes an empty cluster.\n";
00133     std::cout << "  rename proc <proc> <newname>       : Renames the specified proc.\n";
00134     std::cout << "  rename cluster <cluster> <newname> : Renames the specified cluster.\n";
00135     std::cout << "  info prog                          : Print info about the program.\n";
00136     std::cout << "  info cluster <cluster>             : Print info about a cluster.\n";
00137     std::cout << "  info proc <proc>                   : Print info about a proc.\n";
00138     std::cout << "  print <proc>                       : Print the RTL for a proc.\n";
00139     std::cout << "  help                               : This help.\n";
00140     std::cout << "  exit                               : Quit the shell.\n";
00141 }
00142 
00143 /**
00144  * Prints help about the command line switches.
00145  */
00146 void Boomerang::help() {
00147     std::cout << "Symbols\n";
00148     std::cout << "  -s <addr> <name> : Define a symbol\n";
00149     std::cout << "  -sf <filename>   : Read a symbol/signature file\n";
00150     std::cout << "Decoding/decompilation options\n";
00151     std::cout << "  -e <addr>        : Decode the procedure beginning at addr, and callees\n";
00152     std::cout << "  -E <addr>        : Decode the procedure at addr, no callees\n";
00153     std::cout << "                     Use -e and -E repeatedly for multiple entry points\n";
00154     std::cout << "  -ic              : Decode through type 0 Indirect Calls\n";
00155     std::cout << "  -S <min>         : Stop decompilation after specified number of minutes\n";
00156     std::cout << "  -t               : Trace (print address of) every instruction decoded\n";
00157     std::cout << "  -Tc              : Use old constraint-based type analysis\n";
00158     std::cout << "  -Td              : Use data-flow-based type analysis\n";
00159 #if USE_XML
00160     std::cout << "  -LD              : Load before decompile (<program> becomes xml input file)\n";
00161     std::cout << "  -SD              : Save before decompile\n";
00162 #endif
00163     std::cout << "  -a               : Assume ABI compliance\n";
00164     std::cout << "  -W               : Windows specific decompilation mode (requires pdb information)\n";
00165 //  std::cout << "  -pa              : only propagate if can propagate to all\n";
00166     std::cout << "Output\n";
00167     std::cout << "  -v               : Verbose\n";
00168     std::cout << "  -h               : This help\n";
00169     std::cout << "  -o <output path> : Where to generate output (defaults to ./output/)\n";
00170     std::cout << "  -x               : Dump XML files\n";
00171     std::cout << "  -r               : Print RTL for each proc to log before code generation\n";
00172     std::cout << "  -gd <dot file>   : Generate a dotty graph of the program's CFG and DFG\n";
00173     std::cout << "  -gc              : Generate a call graph (callgraph.out and callgraph.dot)\n";
00174     std::cout << "  -gs              : Generate a symbol file (symbols.h)\n";
00175     std::cout << "  -iw              : Write indirect call report to output/indirect.txt\n";
00176     std::cout << "Misc.\n";
00177     std::cout << "  -k               : Command mode, for available commands see -h cmd\n";
00178     std::cout << "  -P <path>        : Path to Boomerang files, defaults to where you run\n";
00179     std::cout << "                     Boomerang from\n";
00180     std::cout << "  -X               : activate eXperimental code; errors likely\n";
00181     std::cout << "  --               : No effect (used for testing)\n";
00182     std::cout << "Debug\n";
00183     std::cout << "  -da              : Print AST before code generation\n";
00184     std::cout << "  -dc              : Debug switch (Case) analysis\n";
00185     std::cout << "  -dd              : Debug decoder to stdout\n";
00186     std::cout << "  -dg              : Debug code Generation\n";
00187     std::cout << "  -dl              : Debug liveness (from SSA) code\n";
00188     std::cout << "  -dp              : Debug proof engine\n";
00189     std::cout << "  -ds              : Stop at debug points for keypress\n";
00190     std::cout << "  -dt              : Debug type analysis\n";
00191     std::cout << "  -du              : Debug removing unused statements etc\n";
00192     std::cout << "Restrictions\n";
00193     std::cout << "  -nb              : No simplifications for branches\n";
00194     std::cout << "  -nc              : No decode children in the call graph (callees)\n";
00195     std::cout << "  -nd              : No (reduced) dataflow analysis\n";
00196     std::cout << "  -nD              : No decompilation (at all!)\n";
00197     std::cout << "  -nl              : No creation of local variables\n";
00198 //  std::cout << "  -nm              : No decoding of the 'main' procedure\n";
00199     std::cout << "  -ng              : No replacement of expressions with Globals\n";
00200     std::cout << "  -nG              : No garbage collection\n";
00201     std::cout << "  -nn              : No removal of NULL and unused statements\n";
00202     std::cout << "  -np              : No replacement of expressions with Parameter names\n";
00203     std::cout << "  -nP              : No promotion of signatures (other than main/WinMain/\n";
00204     std::cout << "                     DriverMain)\n";
00205     std::cout << "  -nr              : No removal of unneeded labels\n";
00206     std::cout << "  -nR              : No removal of unused Returns\n";
00207     std::cout << "  -l <depth>       : Limit multi-propagations to expressions with depth <depth>\n";
00208     std::cout << "  -p <num>         : Only do num propagations\n";
00209     std::cout << "  -m <num>         : Max memory depth\n";
00210     exit(1);
00211 }
00212         
00213 /**
00214  * Creates a directory and tests it.
00215  *
00216  * \param dir   The name of the directory.
00217  * 
00218  * \retval true The directory is valid.
00219  * \retval false The directory is invalid.
00220  */
00221 bool createDirectory(std::string dir) {
00222     std::string remainder(dir);
00223     std::string path;
00224     unsigned i;
00225     while ((i = remainder.find('/')) != std::string::npos) {
00226         path += remainder.substr(0, i+1);
00227         remainder = remainder.substr(i+1);
00228 #ifdef _WIN32
00229         mkdir(path.c_str());
00230 #else
00231         mkdir(path.c_str(), 0777);              // Doesn't matter if already exists
00232 #endif
00233             }
00234     // Now try to create a test file
00235     path += remainder;
00236 #ifdef _WIN32
00237     mkdir(path.c_str());                    // Make the last dir if needed
00238 #else
00239     mkdir(path.c_str(), 0777);              // Make the last dir if needed
00240 #endif
00241     path += "test.file";
00242     std::ofstream test;
00243     test.open(path.c_str(), std::ios::out);
00244     test << "testing\n";
00245     bool pathOK = !test.bad();
00246     test.close();
00247     if (pathOK)
00248         remove(path.c_str());
00249     return pathOK;
00250 }
00251 
00252 /**
00253  * Prints a tree graph.
00254  */
00255 void Cluster::printTree(std::ostream &out)
00256 {
00257     out << "\t\t" << name << "\n";
00258     for (unsigned i = 0; i < children.size(); i++)
00259     children[i]->printTree(out);
00260 }
00261 
00262 typedef char *crazy_vc_bug;
00263 
00264 /**
00265  * Splits a string up in different words.
00266  * use like: argc = splitLine(line, &argv);
00267  *
00268  * \param[in] line      the string to parse
00269  * \param[out] pargc    &argv
00270  *
00271  * \return The number of words found (argc).
00272  */
00273 int Boomerang::splitLine(char *line, char ***pargv)
00274 {
00275     int argc = 0;
00276     *pargv = new crazy_vc_bug[100];
00277     const char *p = strtok(line, " \r\n");
00278     while(p) {
00279         (*pargv)[argc++] = (char*)p;
00280         p = strtok(NULL, " \r\n");
00281     }
00282     return argc;
00283 }
00284 
00285 /**
00286  * Parse and execute a command supplied in interactive mode.
00287  *
00288  * \param argc      The number of arguments.
00289  * \param argv      Pointers to the arguments.
00290  *
00291  * \return A value indicating what happened.
00292  *
00293  * \retval 0 Success
00294  * \retval 1 Faillure
00295  * \retval 2 The user exited with \a quit or \a exit
00296  */
00297 int Boomerang::parseCmd(int argc, const char **argv)
00298 {
00299     static Prog *prog = NULL;
00300     if (!strcmp(argv[0], "decode")) {
00301         if (argc <= 1) {
00302             std::cerr << "not enough arguments for cmd\n";
00303             return 1;
00304         }
00305         const char *fname = argv[1];
00306         Prog *p = loadAndDecode(fname);
00307             if (p == NULL) {
00308                 std::cerr << "failed to load " << fname << "\n";
00309                 return 1;
00310             }
00311         prog = p;
00312 #if USE_XML
00313     } else if (!strcmp(argv[0], "load")) {
00314         if (argc <= 1) {
00315             std::cerr << "not enough arguments for cmd\n";
00316             return 1;
00317         }
00318         const char *fname = argv[1];
00319         XMLProgParser *p = new XMLProgParser();
00320         Prog *pr = p->parse(fname);
00321         if (pr == NULL) {
00322             // try guessing
00323             pr = p->parse((outputPath + fname + "/" + fname + ".xml").c_str());
00324             if (pr == NULL) {
00325             std::cerr << "failed to read xml " << fname << "\n";
00326             return 1;
00327             }
00328         }
00329         prog = pr;
00330     } else if (!strcmp(argv[0], "save")) {
00331         if (prog == NULL) {
00332             std::cerr << "need to load or decode before save!\n";
00333             return 1;
00334         }
00335         XMLProgParser *p = new XMLProgParser();
00336         p->persistToXML(prog);
00337 #endif
00338     } else if (!strcmp(argv[0], "decompile")) {
00339         if (argc > 1) {
00340             Proc *proc = prog->findProc(argv[1]);
00341             if (proc == NULL) {
00342                 std::cerr << "cannot find proc " << argv[1] << "\n";
00343                 return 1;
00344             }
00345             if (proc->isLib()) {
00346                 std::cerr << "cannot decompile a lib proc\n";
00347                 return 1;
00348             }
00349             int indent = 0;
00350             ((UserProc*)proc)->decompile(new ProcList, indent);
00351         } else {
00352             prog->decompile();
00353         }
00354     } else if (!strcmp(argv[0], "codegen")) {
00355         if (argc > 1 ) {
00356             Cluster *cluster = prog->findCluster(argv[1]);
00357             if (cluster == NULL) {
00358             std::cerr << "cannot find cluster " << argv[1] << "\n";
00359             return 1;
00360             }
00361             prog->generateCode(cluster);
00362         } else {
00363             prog->generateCode();
00364         }
00365     } else if (!strcmp(argv[0], "move")) {
00366         if (argc <= 1) {
00367             std::cerr << "not enough arguments for cmd\n";
00368             return 1;
00369         }
00370         if (!strcmp(argv[1], "proc")) {
00371             if (argc <= 3) {
00372                 std::cerr << "not enough arguments for cmd\n";
00373                 return 1;
00374             }
00375 
00376             Proc *proc = prog->findProc(argv[2]);
00377             if (proc == NULL) {
00378                 std::cerr << "cannot find proc " << argv[2] << "\n";
00379                 return 1;
00380             }
00381 
00382             Cluster *cluster = prog->findCluster(argv[3]);
00383             if (cluster == NULL) {
00384                 std::cerr << "cannot find cluster " << argv[3] << "\n";
00385                 return 1;
00386             }
00387             proc->setCluster(cluster);
00388         } else if (!strcmp(argv[1], "cluster")) {
00389             if (argc <= 3) {
00390                 std::cerr << "not enough arguments for cmd\n";
00391                 return 1;
00392             }
00393 
00394             Cluster *cluster = prog->findCluster(argv[2]);
00395             if (cluster == NULL) {
00396                 std::cerr << "cannot find cluster " << argv[2] << "\n";
00397                 return 1;
00398             }
00399 
00400             Cluster *parent = prog->findCluster(argv[3]);
00401             if (parent == NULL) {
00402                 std::cerr << "cannot find cluster " << argv[3] << "\n";
00403                 return 1;
00404             }
00405 
00406             parent->addChild(cluster);
00407         } else {
00408             std::cerr << "don't know how to move a " << argv[1] << "\n";
00409             return 1;
00410         }
00411     } else if (!strcmp(argv[0], "add")) {
00412         if (argc <= 1) {
00413             std::cerr << "not enough arguments for cmd\n";
00414             return 1;
00415         }
00416         if (!strcmp(argv[1], "cluster")) {
00417             if (argc <= 2) {
00418                 std::cerr << "not enough arguments for cmd\n";
00419                 return 1;
00420             }
00421 
00422             Cluster *cluster = new Cluster(argv[2]);
00423             if (cluster == NULL) {
00424                 std::cerr << "cannot create cluster " << argv[2] << "\n";
00425                 return 1;
00426             }
00427 
00428             Cluster *parent = prog->getRootCluster();
00429             if (argc > 3) {
00430                 parent = prog->findCluster(argv[3]);
00431                 if (cluster == NULL) {
00432                     std::cerr << "cannot find cluster " << argv[3] << "\n";
00433                     return 1;
00434                 }
00435             }
00436 
00437             parent->addChild(cluster);
00438         } else {
00439             std::cerr << "don't know how to add a " << argv[1] << "\n";
00440             return 1;
00441         }
00442     } else if (!strcmp(argv[0], "delete")) {
00443         if (argc <= 1) {
00444             std::cerr << "not enough arguments for cmd\n";
00445             return 1;
00446         }
00447         if (!strcmp(argv[1], "cluster")) {
00448             if (argc <= 2) {
00449                 std::cerr << "not enough arguments for cmd\n";
00450                 return 1;
00451             }
00452 
00453             Cluster *cluster = prog->findCluster(argv[2]);
00454             if (cluster == NULL) {
00455                 std::cerr << "cannot find cluster " << argv[2] << "\n";
00456                 return 1;
00457             }
00458 
00459             if (cluster->hasChildren() || cluster == prog->getRootCluster()) {
00460                 std::cerr << "cluster " << argv[2] << " is not empty\n";
00461                 return 1;
00462             }
00463 
00464             if (prog->clusterUsed(cluster)) {
00465                 std::cerr << "cluster " << argv[2] << " is not empty\n";
00466                 return 1;
00467             }
00468 
00469             unlink(cluster->getOutPath("xml"));
00470             unlink(cluster->getOutPath("c"));
00471             assert(cluster->getParent());
00472             cluster->getParent()->removeChild(cluster);
00473         } else {
00474             std::cerr << "don't know how to delete a " << argv[1] << "\n";
00475             return 1;
00476         }
00477     } else if (!strcmp(argv[0], "rename")) {
00478         if (argc <= 1) {
00479             std::cerr << "not enough arguments for cmd\n";
00480             return 1;
00481         }
00482         if (!strcmp(argv[1], "proc")) {
00483             if (argc <= 3) {
00484                 std::cerr << "not enough arguments for cmd\n";
00485                 return 1;
00486             }
00487 
00488             Proc *proc = prog->findProc(argv[2]);
00489             if (proc == NULL) {
00490                 std::cerr << "cannot find proc " << argv[2] << "\n";
00491                 return 1;
00492             }
00493 
00494             Proc *nproc = prog->findProc(argv[3]);
00495             if (nproc != NULL) {
00496                 std::cerr << "proc " << argv[3] << " already exists\n";
00497                 return 1;
00498             }
00499 
00500             proc->setName(argv[3]);
00501         } else if (!strcmp(argv[1], "cluster")) {
00502             if (argc <= 3) {
00503                 std::cerr << "not enough arguments for cmd\n";
00504                 return 1;
00505             }
00506 
00507             Cluster *cluster = prog->findCluster(argv[2]);
00508             if (cluster == NULL) {
00509                 std::cerr << "cannot find cluster " << argv[2] << "\n";
00510                 return 1;
00511             }
00512 
00513             Cluster *ncluster = prog->findCluster(argv[3]);
00514             if (ncluster == NULL) {
00515                 std::cerr << "cluster " << argv[3] << " already exists\n";
00516                 return 1;
00517             }
00518 
00519             cluster->setName(argv[3]);
00520         } else {
00521             std::cerr << "don't know how to rename a " << argv[1] << "\n";
00522             return 1;
00523         }
00524     } else if (!strcmp(argv[0], "info")) {
00525         if (argc <= 1) {
00526             std::cerr << "not enough arguments for cmd\n";
00527             return 1;
00528         }
00529         if (!strcmp(argv[1], "prog")) {
00530 
00531             std::cout << "prog " << prog->getName() << ":\n";
00532             std::cout << "\tclusters:\n";
00533             prog->getRootCluster()->printTree(std::cout);
00534             std::cout << "\n\tlibprocs:\n";
00535             PROGMAP::const_iterator it;
00536             for (Proc *p = prog->getFirstProc(it); p; p = prog->getNextProc(it))
00537             if (p->isLib())
00538                 std::cout << "\t\t" << p->getName() << "\n";
00539             std::cout << "\n\tuserprocs:\n";
00540             for (Proc *p = prog->getFirstProc(it); p; p = prog->getNextProc(it))
00541             if (!p->isLib())
00542                 std::cout << "\t\t" << p->getName() << "\n";
00543             std::cout << "\n";
00544             
00545             return 0;
00546         } else if (!strcmp(argv[1], "cluster")) {
00547             if (argc <= 2) {
00548                 std::cerr << "not enough arguments for cmd\n";
00549                 return 1;
00550             }
00551 
00552             Cluster *cluster = prog->findCluster(argv[2]);
00553             if (cluster == NULL) {
00554                 std::cerr << "cannot find cluster " << argv[2] << "\n";
00555                 return 1;
00556             }
00557 
00558             std::cout << "cluster " << cluster->getName() << ":\n";
00559             if (cluster->getParent())
00560                 std::cout << "\tparent = " << cluster->getParent()->getName() << "\n";
00561             else
00562                 std::cout << "\troot cluster.\n";
00563             std::cout << "\tprocs:\n";
00564             PROGMAP::const_iterator it;
00565             for (Proc *p = prog->getFirstProc(it); p; p = prog->getNextProc(it))
00566                 if (p->getCluster() == cluster)
00567                     std::cout << "\t\t" << p->getName() << "\n";
00568             std::cout << "\n";
00569             
00570             return 0;
00571         } else if (!strcmp(argv[1], "proc")) {
00572             if (argc <= 2) {
00573                 std::cerr << "not enough arguments for cmd\n";
00574                 return 1;
00575             }
00576 
00577             Proc *proc = prog->findProc(argv[2]);
00578             if (proc == NULL) {
00579                 std::cerr << "cannot find proc " << argv[2] << "\n";
00580                 return 1;
00581             }
00582 
00583             std::cout << "proc " << proc->getName() << ":\n";
00584             std::cout << "\tbelongs to cluster " << proc->getCluster()->getName() << "\n";
00585             std::cout << "\tnative address " << std::hex << proc->getNativeAddress() << std::dec << "\n";
00586             if (proc->isLib())
00587                 std::cout << "\tis a library proc.\n";
00588             else {
00589                 std::cout << "\tis a user proc.\n";
00590                 UserProc *p = (UserProc*)proc;
00591                 if (p->isDecoded())
00592                     std::cout << "\thas been decoded.\n";
00593                 //if (p->isAnalysed())
00594                 //  std::cout << "\thas been analysed.\n";
00595             }
00596             std::cout << "\n";
00597 
00598             return 0;
00599         } else {
00600             std::cerr << "don't know how to print info about a " << argv[1] << "\n";
00601             return 1;
00602         }
00603     } else if (!strcmp(argv[0], "print")) {
00604         if (argc <= 1) {
00605             std::cerr << "not enough arguments for cmd\n";
00606             return 1;
00607         }
00608 
00609         Proc *proc = prog->findProc(argv[1]);
00610         if (proc == NULL) {
00611             std::cerr << "cannot find proc " << argv[1] << "\n";
00612             return 1;
00613         }
00614         if (proc->isLib()) {
00615             std::cerr << "cannot print a libproc.\n";
00616             return 1;
00617         }
00618 
00619         ((UserProc*)proc)->print(std::cout);
00620         std::cout << "\n";
00621         return 0;
00622     } else if (!strcmp(argv[0], "exit")) {
00623         return 2;
00624     } else if (!strcmp(argv[0], "quit")) {
00625         return 2;
00626     } else if (!strcmp(argv[0], "help")) {
00627         helpcmd();
00628         return 0;
00629     } else {
00630         std::cerr << "unknown cmd " << argv[0] << ".\n";
00631         return 1;
00632     }
00633 
00634     return 0;
00635 }
00636 
00637 /**
00638  * Displays a command line and processes the commands entered.
00639  *
00640  * \retval 0 stdin was closed.
00641  * \retval 2 The user typed exit or quit.
00642  */
00643 int Boomerang::cmdLine()
00644 {
00645     char line[1024];
00646     printf("boomerang: ");
00647     fflush(stdout);
00648     while (fgets(line, sizeof(line), stdin)) {
00649         char **argv;
00650         int argc = splitLine(line, &argv);
00651         if (parseCmd(argc, (const char **)argv) == 2) 
00652             return 2;
00653         printf("boomerang: ");
00654         fflush(stdout);
00655     }
00656     return 0;
00657 }
00658 
00659 /**
00660  * The main function for the command line mode. Parses switches and runs decompile(filename).
00661  *
00662  * \return Zero on success, nonzero on faillure.
00663  */
00664 int Boomerang::commandLine(int argc, const char **argv) 
00665 {
00666     printf("Boomerang %s\n", VERSION);      // Display a version and date (mainly for release versions)
00667     if (argc < 2) usage();
00668     progPath = argv[0];
00669     size_t j = progPath.rfind('/');         // Chop off after the last slash
00670     if (j == (size_t)-1) 
00671         j = progPath.rfind('\\');           // .. or reverse slash
00672     if (j != (size_t)-1) {
00673         // Do the chop; keep the trailing slash or reverse slash
00674         progPath = progPath.substr(0, j+1);
00675     }
00676     else {
00677         progPath = "./";            // Just assume the current directory
00678     }
00679 #ifdef _MSC_VER                     // For the console mode version; Windows GUI will override in windows.cpp
00680     // As a special case for MSVC testing, make the program path the parent of the dir with the .exe
00681     j = progPath.find("ebug\\", progPath.length() - (4+1));
00682     if (j != std::string::npos)
00683         j--;            // Point to the 'd' or 'D'
00684     if (j == std::string::npos) {
00685             j = progPath.rfind("elease\\", progPath.length() - (6+1));
00686             if (j != std::string::npos)
00687                 j--;            // Point to the 'r' or 'R'
00688     }
00689     if (j != std::string::npos)
00690         progPath = progPath.substr(0, j);           // Chop off "Release\" or "Debug\"
00691     SetCurrentDirectoryA(progPath.c_str());         // Note: setcwd() doesn't seem to work
00692 #endif
00693     outputPath = progPath + "output/";              // Default output path (can be overridden with -o below)
00694 
00695     // Parse switches on command line
00696     if ((argc == 2) && (strcmp(argv[1], "-h") == 0)) {
00697         help();
00698         return 1;
00699     }
00700     if (argc == 3 && !strcmp(argv[1], "-h") && !strcmp(argv[2], "cmd")) {
00701         helpcmd();
00702         return 1;
00703     }
00704 
00705     int kmd = 0;
00706 
00707     for (int i=1; i < argc; i++) {
00708         if (argv[i][0] != '-' && i == argc - 1)
00709             break;
00710         if (argv[i][0] != '-')
00711             usage();
00712         switch (argv[i][1]) {
00713             case '-': break;        // No effect: ignored
00714             case 'h': help(); break;
00715             case 'v': vFlag = true; break;
00716             case 'x': dumpXML = true; break;
00717             case 'X': experimental = true;
00718                 std::cout << "Warning: experimental code active!\n"; break;
00719             case 'r': printRtl = true; break;
00720             case 't': traceDecoder = true; break;
00721             case 'T':
00722                 if (argv[i][2] == 'c') {
00723                     conTypeAnalysis = true;     // -Tc: use old constraint-based type analysis
00724                     dfaTypeAnalysis = false;
00725                 }
00726                 else if (argv[i][2] == 'd')
00727                     dfaTypeAnalysis = true;     // -Td: use data-flow-based type analysis (now default)
00728                 break;
00729             case 'g': 
00730                 if(argv[i][2]=='d')
00731                     dotFile = argv[++i];
00732                 else if(argv[i][2]=='c')
00733                     generateCallGraph=true;
00734                 else if(argv[i][2]=='s') {
00735                     generateSymbols=true;
00736                     stopBeforeDecompile=true;
00737                 }
00738                 break;
00739             case 'o': {
00740                 outputPath = argv[++i];
00741                 char lastCh = outputPath[outputPath.size()-1];
00742                 if (lastCh != '/' && lastCh != '\\')
00743                     outputPath += '/';      // Maintain the convention of a trailing slash
00744                 break;
00745             }
00746             case 'p':
00747                 if (argv[i][2] == 'a') {
00748                     propOnlyToAll = true;
00749                     std::cerr << " * * Warning! -pa is not implemented yet!\n";
00750                 }
00751                 else {
00752                     if (++i == argc) {
00753                         usage();
00754                         return 1;
00755                     }
00756                     sscanf(argv[i], "%i", &numToPropagate);
00757                 }
00758                 break;
00759             case 'n':
00760                 switch(argv[i][2]) {
00761                     case 'b':
00762                         noBranchSimplify = true;
00763                         break;
00764                     case 'c':
00765                         noDecodeChildren = true;
00766                         break;
00767                     case 'd':
00768                         noDataflow = true;
00769                         break;
00770                     case 'D':
00771                         noDecompile = true;
00772                         break;
00773                     case 'l':
00774                         noLocals = true;
00775                         break;
00776                     case 'n':
00777                         noRemoveNull = true;
00778                         break;
00779                     case 'P':
00780                         noPromote = true;
00781                         break;
00782                     case 'p':
00783                         noParameterNames = true;
00784                         break;
00785                     case 'r':
00786                         noRemoveLabels = true;
00787                         break;
00788                     case 'R':
00789                         noRemoveReturns = true;
00790                         break;
00791                     case 'g':
00792                         noGlobals = true;
00793                         break;
00794                     case 'G':
00795 #ifndef NO_GARBAGE_COLLECTOR
00796                         GC_disable();
00797 #endif
00798                         break;
00799                     default:
00800                         help();
00801                 }
00802                 break;
00803             case 'E':
00804                 noDecodeChildren = true;
00805                 // Fall through
00806             case 'e':
00807                 {
00808                     ADDRESS addr;
00809                     int n;
00810                     decodeMain = false;
00811                     if (++i == argc) {
00812                         usage();
00813                         return 1;
00814                     }
00815                     if (argv[i][0] == '0' && argv[i+1][1] == 'x') {
00816                         n = sscanf(argv[i], "0x%x", &addr);
00817                     } else {
00818                         n = sscanf(argv[i], "%i", &addr);
00819                     }
00820                     if (n != 1) {
00821                         std::cerr << "bad address: " << argv[i] << std::endl;
00822                         exit(1);
00823                     }
00824                     entrypoints.push_back(addr);
00825                 }
00826                 break;
00827             case 's':
00828                 {
00829                     if (argv[i][2] == 'f') {
00830                         symbolFiles.push_back(argv[i+1]);
00831                         i++;
00832                         break;
00833                     }
00834                     ADDRESS addr;
00835                     int n;
00836                     if (++i == argc) {
00837                         usage();
00838                         return 1;
00839                     }
00840                     if (argv[i][0] == '0' && argv[i+1][1] == 'x') {
00841                         n = sscanf(argv[i], "0x%x", &addr);
00842                     } else {
00843                         n = sscanf(argv[i], "%i", &addr);
00844                     }
00845                     if (n != 1) {
00846                         std::cerr << "bad address: " << argv[i+1] << std::endl;
00847                         exit(1);
00848                     }
00849                     const char *nam = argv[++i];
00850                     symbols[addr] = nam;
00851                 }
00852                 break;
00853             case 'd':
00854                 switch(argv[i][2]) {
00855                     case 'a':
00856                         printAST = true;
00857                         break;
00858                     case 'c':
00859                         debugSwitch = true;
00860                         break;
00861                     case 'd':
00862                         debugDecoder = true;
00863                         break;
00864                     case 'g':
00865                         debugGen = true;
00866                         break;
00867                     case 'l':
00868                         debugLiveness = true;
00869                         break;
00870                     case 'p':
00871                         debugProof = true;
00872                         break;
00873                     case 's':
00874                         stopAtDebugPoints = true;
00875                         break;
00876                     case 't':       // debug type analysis
00877                         debugTA = true;
00878                         break;
00879                     case 'u':       // debug unused locations (including returns and parameters now)
00880                         debugUnused = true;
00881                         break;
00882                     default:
00883                         help();
00884                 }
00885                 break;
00886             case 'm':
00887                 if (++i == argc) {
00888                     usage();
00889                     return 1;
00890                 }
00891                 sscanf(argv[i], "%i", &maxMemDepth);
00892                 break;
00893             case 'i':
00894                 if (argv[i][2] == 'c')
00895                     decodeThruIndCall = true;       // -ic;
00896                 if (argv[i][2] == 'w')              // -iw
00897                     if (ofsIndCallReport) {
00898                         std::string fname = getOutputPath() + "indirect.txt";
00899                         ofsIndCallReport = new std::ofstream(fname.c_str());
00900                     }
00901                 break;
00902             case 'L':
00903                 if (argv[i][2] == 'D')
00904                     #if USE_XML
00905                     loadBeforeDecompile = true;
00906                     #else
00907                     std::cerr << "LD command not enabled since compiled without USE_XML\n";
00908                     #endif
00909                 break;
00910             case 'S':
00911                 if (argv[i][2] == 'D')
00912                     #if USE_XML
00913                     saveBeforeDecompile = true;
00914                     #else
00915                     std::cerr << "SD command not enabled since compiled without USE_XML\n";
00916                     #endif
00917                 else {
00918                     sscanf(argv[++i], "%i", &minsToStopAfter);                  
00919                 }
00920                 break;
00921             case 'k':
00922                 kmd = 1;
00923                 break;
00924             case 'P':
00925                 progPath = argv[++i];
00926                 if (progPath[progPath.length()-1] != '\\')
00927                     progPath += "\\";
00928                 break;
00929             case 'a':
00930                 assumeABI = true;
00931                 break;
00932             case 'l':
00933                 if (++i == argc) {
00934                     usage();
00935                     return 1;
00936                 }
00937                 sscanf(argv[i], "%i", &propMaxDepth);
00938                 break;
00939             default:
00940                 help();
00941         }
00942     }
00943 
00944     setOutputDirectory(outputPath.c_str());
00945     
00946     if (kmd)
00947         return cmdLine();
00948 
00949     return decompile(argv[argc-1]);    
00950 }
00951 
00952 /**
00953  * Sets the directory in which Boomerang creates its output files.  The directory will be created if it doesn't exist.
00954  *
00955  * \param path      the path to the directory
00956  *
00957  * \retval true Success.
00958  * \retval false The directory could not be created.
00959  */
00960 bool Boomerang::setOutputDirectory(const char *path)
00961 {
00962     outputPath = path;
00963     // Create the output directory, if needed
00964     if (!createDirectory(outputPath)) {
00965         std::cerr << "Warning! Could not create path " << outputPath << "!\n";
00966         return false;
00967     }
00968     if (logger == NULL)
00969         setLogger(new FileLogger());
00970     return true;
00971 }
00972 
00973 /**
00974  * Adds information about functions and classes from Objective-C modules to the Prog object.
00975  *
00976  * \param modules A map from name to the Objective-C modules.
00977  * \param prog The Prog object to add the information to.
00978  */
00979 void Boomerang::objcDecode(std::map<std::string, ObjcModule> &modules, Prog *prog)
00980 {
00981     if (VERBOSE)
00982         LOG << "Adding Objective-C information to Prog.\n";
00983     Cluster *root = prog->getRootCluster();
00984     for (std::map<std::string, ObjcModule>::iterator it = modules.begin(); it != modules.end(); it++) {
00985         ObjcModule &mod = (*it).second;
00986         Module *module = new Module(mod.name.c_str());
00987         root->addChild(module);
00988         if (VERBOSE)
00989             LOG << "\tModule: " << mod.name.c_str() << "\n";
00990         for (std::map<std::string, ObjcClass>::iterator it1 = mod.classes.begin(); it1 != mod.classes.end(); it1++) {
00991             ObjcClass &c = (*it1).second;
00992             Class *cl = new Class(c.name.c_str());
00993             root->addChild(cl);
00994             if (VERBOSE)
00995                 LOG << "\t\tClass: " << c.name.c_str() << "\n";
00996             for (std::map<std::string, ObjcMethod>::iterator it2 = c.methods.begin(); it2 != c.methods.end(); it2++) {
00997                 ObjcMethod &m = (*it2).second;
00998                 // TODO: parse :'s in names
00999                 Proc *p = prog->newProc(m.name.c_str(), m.addr);
01000                 p->setCluster(cl);
01001                 // TODO: decode types in m.types
01002                 if (VERBOSE)
01003                     LOG << "\t\t\tMethod: " << m.name.c_str() << "\n";
01004             }
01005         }
01006     }
01007     if (VERBOSE)
01008         LOG << "\n";
01009 }
01010 
01011 /**
01012  * Loads the executable file and decodes it.
01013  *
01014  * \param fname The name of the file to load.
01015  * \param pname How the Prog will be named.
01016  *
01017  * \returns A Prog object.
01018  */
01019 Prog *Boomerang::loadAndDecode(const char *fname, const char *pname)
01020 {
01021     std::cout << "loading...\n";
01022     Prog *prog = new Prog();
01023     FrontEnd *fe = FrontEnd::Load(fname, prog);
01024     if (fe == NULL) {
01025         std::cerr << "failed.\n";
01026         return NULL;
01027     }
01028     prog->setFrontEnd(fe);
01029 
01030     // Add symbols from -s switch(es)
01031     for (std::map<ADDRESS, std::string>::iterator it = symbols.begin();
01032          it != symbols.end(); it++) {
01033         fe->AddSymbol((*it).first, (*it).second.c_str());
01034     }
01035     fe->readLibraryCatalog();       // Needed before readSymbolFile()
01036 
01037     for (unsigned i = 0; i < symbolFiles.size(); i++) {
01038         std::cout << "reading symbol file " << symbolFiles[i].c_str() << "\n";
01039         prog->readSymbolFile(symbolFiles[i].c_str());
01040     }
01041 
01042     std::map<std::string, ObjcModule> &objcmodules = fe->getBinaryFile()->getObjcModules();
01043     if (objcmodules.size())
01044         objcDecode(objcmodules, prog);
01045 
01046     // Entry points from -e (and -E) switch(es)
01047     for (unsigned i = 0; i < entrypoints.size(); i++) {
01048         std::cout<< "decoding specified entrypoint " << std::hex << entrypoints[i] << "\n";
01049         prog->decodeEntryPoint(entrypoints[i]);
01050     }
01051 
01052     if (entrypoints.size() == 0) {      // no -e or -E given
01053         if (decodeMain)
01054             std::cout << "decoding entry point...\n";
01055         fe->decode(prog, decodeMain, pname);
01056 
01057         if (!noDecodeChildren) {
01058             // this causes any undecoded userprocs to be decoded
01059             std::cout << "decoding anything undecoded...\n";
01060             fe->decode(prog, NO_ADDRESS);
01061         }
01062     }
01063 
01064     std::cout << "finishing decode...\n";
01065     prog->finishDecode();
01066 
01067     Boomerang::get()->alert_end_decode();
01068 
01069     std::cout << "found " << std::dec << prog->getNumUserProcs() << " procs\n";
01070 
01071     // GK: The analysis which was performed was not exactly very "analysing", and so it has been moved to
01072     // prog::finishDecode, UserProc::assignProcsToCalls and UserProc::finalSimplify
01073     //std::cout << "analysing...\n";
01074     //prog->analyse();
01075 
01076     if (generateSymbols) {
01077         prog->printSymbolsToFile();
01078     }
01079     if (generateCallGraph) {
01080         prog->printCallGraph();
01081         prog->printCallGraphXML();
01082     }
01083     return prog;
01084 }
01085 
01086 #if defined(_WIN32) && !defined(__MINGW32__)
01087 DWORD WINAPI stopProcess(
01088     time_t start
01089 )
01090 {
01091     int mins = Boomerang::get()->minsToStopAfter;
01092     while(1) {
01093         time_t now;
01094         time(&now);
01095         if ((now - start) > mins * 60) {
01096             std::cerr << "\n\n Stopping process, timeout.\n";
01097             ExitProcess(1);
01098         }
01099         Sleep(1000);
01100     }
01101 }
01102 #else
01103 void stopProcess(int n)
01104 {
01105     std::cerr << "\n\n Stopping process, timeout.\n";
01106     exit(1);
01107 }
01108 #endif
01109 
01110 /**
01111  * The program will be subsequently be loaded, decoded, decompiled and written to a source file.
01112  * After decompilation the elapsed time is printed to std::cerr.
01113  *
01114  * \param fname The name of the file to load.
01115  * \param pname The name that will be given to the Proc.
01116  *
01117  * \return Zero on success, nonzero on faillure.
01118  */
01119 int Boomerang::decompile(const char *fname, const char *pname)
01120 {
01121     Prog *prog;
01122     time_t start;
01123     time(&start);
01124 
01125     if (minsToStopAfter) {
01126         std::cout << "stopping decompile after " << minsToStopAfter << " minutes.\n";
01127 #if defined(_WIN32)             // Includes MinGW
01128         DWORD id;
01129         CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)stopProcess, (LPVOID)start, 0, &id);
01130 #else
01131         signal(SIGALRM, stopProcess);
01132         alarm(minsToStopAfter * 60);
01133 #endif
01134     }
01135 
01136 //  std::cout << "setting up transformers...\n";
01137 //  ExpTransformer::loadAll();
01138 
01139 #if USE_XML
01140     if (loadBeforeDecompile) {
01141         std::cout << "loading persisted state...\n";
01142         XMLProgParser *p = new XMLProgParser();
01143         prog = p->parse(fname);
01144     } else
01145 #endif
01146     {
01147         prog = loadAndDecode(fname, pname);
01148         if (prog == NULL)
01149             return 1;
01150     }
01151 
01152 #if USE_XML
01153     if (saveBeforeDecompile) {
01154         std::cout << "saving persistable state...\n";
01155         XMLProgParser *p = new XMLProgParser();
01156         p->persistToXML(prog);
01157     }
01158 #endif
01159 
01160     if (stopBeforeDecompile)
01161         return 0;
01162 
01163     std::cout << "decompiling...\n";
01164     prog->decompile();
01165 
01166     if (dotFile)
01167         prog->generateDotFile();
01168 
01169     if (printAST) {
01170         std::cout << "printing AST...\n";
01171         PROGMAP::const_iterator it;
01172         for (Proc *p = prog->getFirstProc(it); p; p = prog->getNextProc(it))
01173             if (!p->isLib()) {
01174                 UserProc *u = (UserProc*)p;
01175                 u->getCFG()->compressCfg();
01176                 u->printAST();
01177             }
01178     }
01179 
01180     std::cout << "generating code...\n";
01181     prog->generateCode();
01182 
01183     std::cout << "output written to " << outputPath << prog->getRootCluster()->getName() << "\n";
01184 
01185     if (Boomerang::get()->ofsIndCallReport)
01186         ofsIndCallReport->close();
01187 
01188     time_t end;
01189     time(&end);
01190     int hours = (int)((end-start) / 60 / 60);
01191     int mins = (int)((end-start) / 60 - hours * 60);
01192     int secs = (int)((end-start) - hours * 60 * 60 - mins * 60);
01193     std::cout << "completed in " << std::dec;
01194     if (hours)
01195         std::cout << hours << " hours ";
01196     if (hours || mins)
01197         std::cout << mins << " mins ";
01198     std::cout << secs << " sec" << (secs == 1 ? "" : "s") << ".\n";
01199 
01200     return 0;
01201 }
01202 
01203 #if USE_XML
01204 /**
01205  * Saves the state of the Prog object to a XML file.
01206  * \param prog The Prog object to save.
01207  */
01208 void Boomerang::persistToXML(Prog *prog)
01209 {
01210     LOG << "saving persistable state...\n";
01211     XMLProgParser *p = new XMLProgParser();
01212     p->persistToXML(prog);
01213 }
01214 /**
01215  * Loads the state of a Prog object from a XML file.
01216  * \param fname The name of the XML file.
01217  * \return The loaded Prog object.
01218  */
01219 Prog *Boomerang::loadFromXML(const char *fname)
01220 {
01221     LOG << "loading persistable state...\n";
01222     XMLProgParser *p = new XMLProgParser();
01223     return p->parse(fname);
01224 }
01225 #endif
01226 
01227 /**
01228  * Prints the last lines of the log file.
01229  */
01230 void Boomerang::logTail() {
01231     logger->tail();
01232 }
01233 
01234 void Boomerang::alert_decompile_debug_point(UserProc *p, const char *description) {
01235     if (stopAtDebugPoints) {
01236         std::cout << "decompiling " << p->getName() << ": " << description << "\n";
01237         static char *stopAt = NULL;
01238         static std::set<Statement*> watches;
01239         if (stopAt == NULL || !strcmp(p->getName(), stopAt)) {
01240             // This is a mini command line debugger.  Feel free to expand it.
01241             for (std::set<Statement*>::iterator it = watches.begin(); it != watches.end(); it++) {
01242                 (*it)->print(std::cout);
01243                 std::cout << "\n";
01244             }
01245             std::cout << " <press enter to continue> \n";
01246             char line[1024];
01247             while(1) {
01248                 *line = 0;
01249                 fgets(line, 1024, stdin);
01250                 if (!strncmp(line, "print", 5))
01251                     p->print(std::cout);
01252                 else if (!strncmp(line, "fprint", 6)) {
01253                     std::ofstream of("out.proc");
01254                     p->print(of);
01255                     of.close();
01256                 } else if (!strncmp(line, "run ", 4)) {
01257                     stopAt = strdup(line + 4);
01258                     if (strchr(stopAt, '\n'))
01259                         *strchr(stopAt, '\n') = 0;
01260                     if (strchr(stopAt, ' '))
01261                         *strchr(stopAt, ' ') = 0;
01262                     break;
01263                 } else if (!strncmp(line, "watch ", 6)) {
01264                     int n = atoi(line + 6);
01265                     StatementList stmts;
01266                     p->getStatements(stmts);
01267                     StatementList::iterator it;
01268                     for (it = stmts.begin(); it != stmts.end(); it++) 
01269                         if ((*it)->getNumber() == n) {
01270                             watches.insert(*it);
01271                             std::cout << "watching " << *it << "\n";
01272                         }
01273                 } else
01274                     break;
01275             }
01276         }
01277     }
01278     for (std::set<Watcher*>::iterator it = watchers.begin(); it != watchers.end(); it++)
01279         (*it)->alert_decompile_debug_point(p, description);
01280 }
01281 
01282 char* Boomerang::getVersionStr() {
01283     return VERSION;
01284 }

Generated on Tue Sep 19 21:18:15 2006 for Boomerang by  doxygen 1.4.6