BinaryFile.h

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 1997-2001, The University of Queensland
00003  * Copyright (C) 2001, Sun Microsystems, Inc
00004  * Copyright (C) 2002, Trent Waddington
00005  *
00006  * See the file "LICENSE.TERMS" for information on usage and
00007  * redistribution of this file, and for a DISCLAIMER OF ALL
00008  * WARRANTIES.
00009  *
00010  */
00011 
00012 /* File: BinaryFile.h
00013  * Desc: This file contains the definition of the abstract class BinaryFile
00014 */
00015 
00016 /* $Revision: 1.31 $
00017  * This class attempts to provide a relatively machine independent interface for programs that read binary files.
00018  * Created by Mike, 97
00019  * 01 Aug 01 - Mike: Changed the definition of GetGlobalPointerInfo()
00020  * 10 Aug 01 - Mike: Added GetDynamicGlobalMap()
00021  */
00022 
00023 #ifndef __BINARYFILE_H__
00024 #define __BINARYFILE_H__
00025 
00026 /*==============================================================================
00027  * Dependencies.
00028  *============================================================================*/
00029 
00030 #include "types.h"
00031 //#include "SymTab.h"   // Was used for relocaton stuff
00032 #include <list>
00033 #include <map>
00034 #include <string>
00035 #include <vector>
00036 #include <stdio.h>      // For FILE
00037 
00038 // Note: #including windows.h causes problems later in the objective C code.
00039 
00040 // Given a pointer p, returns the 16 bits (halfword) in the two bytes
00041 // starting at p.
00042 #define LH(p)  ((int)((Byte *)(p))[0] + ((int)((Byte *)(p))[1] << 8))
00043 
00044 // Some Windows voodoo; Windows doesn't seem to export everything unless you tell it to
00045 #ifdef _WIN32
00046 #if defined _MSC_VER || defined BUILDING_LIBBINARYFILE          // If don't use dllexport, get Vtable undefined!
00047 #define IMPORT_BINARYFILE __declspec(dllexport)
00048 #else
00049 #define IMPORT_BINARYFILE __declspec(dllimport)
00050 #endif
00051 #else
00052 #define IMPORT_BINARYFILE
00053 #endif
00054 
00055 // SectionInfo structure. GetSectionInfo returns a pointer to an array of
00056 // these structs. All information about the sections is contained in these
00057 // structures.
00058 
00059 struct IMPORT_BINARYFILE SectionInfo
00060 {
00061                 SectionInfo();      // Constructor
00062     virtual     ~SectionInfo();     // Quell a warning in gcc
00063 
00064     // Windows's PE file sections can contain any combination of code, data and bss.
00065     // As such, it can't be correctly described by SectionInfo, why we need to override
00066     // the behaviour of (at least) the question "Is this address in BSS".
00067     virtual bool isAddressBss(ADDRESS a) const
00068     {
00069         return bBss != 0;
00070     }
00071 
00072     char*       pSectionName;       // Name of section
00073     ADDRESS     uNativeAddr;        // Logical or native load address
00074     ADDRESS     uHostAddr;          // Host or actual address of data
00075     ADDRESS     uSectionSize;       // Size of section in bytes
00076     ADDRESS     uSectionEntrySize;  // Size of one section entry (if applic)
00077     unsigned    uType;               // Type of section (format dependent)
00078     unsigned    bCode:1;            // Set if section contains instructions
00079     unsigned    bData:1;            // Set if section contains data
00080     unsigned    bBss:1;             // Set if section is BSS (allocated only)
00081     unsigned    bReadOnly:1;        // Set if this is a read only section
00082 };
00083 
00084 typedef SectionInfo* PSectionInfo;
00085     
00086 // Objective-C stuff
00087 class ObjcIvar {
00088 public:
00089     std::string name, type;
00090     unsigned offset;
00091 };
00092 
00093 class ObjcMethod {
00094 public:
00095     std::string name, types;
00096     ADDRESS addr;
00097 };
00098 
00099 class ObjcClass {
00100 public:
00101     std::string name;
00102     std::map<std::string, ObjcIvar> ivars;
00103     std::map<std::string, ObjcMethod> methods;
00104 };
00105 
00106 class ObjcModule {
00107 public:
00108     std::string name;
00109     std::map<std::string, ObjcClass> classes;
00110 };
00111 
00112 /*
00113  * callback function, which when given the name of a library, should return
00114  * a pointer to an opened BinaryFile, or NULL if the name cannot be resolved.
00115  */
00116 class BinaryFile;
00117 typedef BinaryFile *(*get_library_callback_t)(char *name);
00118 
00119 // This enum allows a sort of run time type identification, without using
00120 // compiler specific features
00121 enum LOAD_FMT {LOADFMT_ELF, LOADFMT_PE, LOADFMT_PALM, LOADFMT_PAR, LOADFMT_EXE, LOADFMT_MACHO, LOADFMT_LX};
00122 enum MACHINE {MACHINE_PENTIUM, MACHINE_SPARC, MACHINE_HPRISC, MACHINE_PALM, MACHINE_PPC, MACHINE_ST20};
00123 
00124 class BinaryFileFactory {
00125 #ifdef _WIN32
00126 // The below should be of type HINSTANCE, but #including windows.h here causes problems later compiling the objective C
00127 // code. So just cast as needed.
00128         void*       hModule;
00129 #else
00130         void*       dlHandle;       // Needed for UnLoading the library
00131 #endif
00132 public:
00133         BinaryFile  *Load( const char *sName );
00134         void        UnLoad();
00135 private:
00136     /*
00137      * Perform simple magic on the file by the given name in order to determine the appropriate type, and then return an
00138      * instance of the appropriate subclass.
00139      */
00140         BinaryFile  *getInstanceFor(const char *sName);
00141 };
00142 
00143 
00144 class IMPORT_BINARYFILE BinaryFile {
00145 
00146   friend class ArchiveFile;         // So can use the protected Load()
00147   friend class BinaryFileFactory;   // So can use getTextLimits
00148  
00149   public:
00150 
00151 virtual         ~BinaryFile() {}
00152     
00153         // General loader functions
00154                 BinaryFile(bool bArchive = false);  // Constructor
00155         // Unload the file. Pure virtual
00156 virtual void    UnLoad() = 0;
00157         // Open the file for r/w; pure virt
00158 virtual bool    Open(const char* sName) = 0;
00159         // Close file opened with Open()
00160 virtual void    Close() = 0;
00161         // Get the format (e.g. LOADFMT_ELF)
00162 virtual LOAD_FMT GetFormat() const = 0;
00163         // Get the expected machine (e.g. MACHINE_PENTIUM)
00164 virtual MACHINE GetMachine() const = 0;
00165 virtual const char *getFilename() const = 0;
00166 
00167         // Return whether or not the object is a library file.
00168 virtual bool    isLibrary() const = 0;
00169         // Return whether the object can be relocated if necessary
00170         // (ie if it is not tied to a particular base address). If not, the object
00171         // must be loaded at the address given by getImageBase()
00172 virtual bool    isRelocatable() const { return isLibrary(); }
00173         // Return a list of library names which the binary file depends on
00174 virtual std::list<const char *> getDependencyList() = 0;    
00175         // Return the virtual address at which the binary expects to be loaded.
00176         // For position independent / relocatable code this should be NO_ADDDRESS
00177 virtual ADDRESS getImageBase() = 0;
00178         // Return the total size of the loaded image
00179 virtual size_t  getImageSize() = 0;
00180 
00181 // Section functions
00182         int         GetNumSections() const;     // Return number of sections
00183         PSectionInfo GetSectionInfo(int idx) const; // Return section struct
00184         // Find section info given name, or 0 if not found
00185         PSectionInfo GetSectionInfoByName(const char* sName);
00186         // Find the end of a section, given an address in the section
00187         PSectionInfo GetSectionInfoByAddr(ADDRESS uEntry) const;
00188 
00189         // returns true if the given address is in a read only section
00190         bool isReadOnly(ADDRESS uEntry) {
00191             PSectionInfo p = GetSectionInfoByAddr(uEntry);
00192             return p && p->bReadOnly;
00193         }
00194 virtual int         readNative1(ADDRESS a) {return 0;}
00195         // Read 2 bytes from given native address a; considers endianness
00196 virtual int         readNative2(ADDRESS a) {return 0;}
00197         // Read 4 bytes from given native address a; considers endianness
00198 virtual int         readNative4(ADDRESS a) {return 0;}
00199         // Read 8 bytes from given native address a; considers endianness
00200 virtual QWord       readNative8(ADDRESS a) {return 0;}
00201         // Read 4 bytes as a float; consider endianness
00202 virtual float       readNativeFloat4(ADDRESS a) {return 0.;}
00203         // Read 8 bytes as a float; consider endianness
00204 virtual double      readNativeFloat8(ADDRESS a) {return 0.;}
00205 
00206 // Symbol table functions
00207         // Lookup the address, return the name, or 0 if not found
00208 virtual const char* SymbolByAddress(ADDRESS uNative);
00209         // Lookup the name, return the address. If not found, return NO_ADDRESS
00210 virtual ADDRESS     GetAddressByName(const char* pName, bool bNoTypeOK = false);
00211 virtual void        AddSymbol(ADDRESS uNative, const char *pName) { }
00212         // Lookup the name, return the size
00213 virtual int GetSizeByName(const char* pName, bool bTypeOK = false);
00214     // Get an array of addresses of imported function stubs
00215     // Set number of these to numImports
00216 virtual ADDRESS* GetImportStubs(int& numImports);
00217 virtual const char *getFilenameSymbolFor(const char *sym) { return NULL; }
00218 virtual std::vector<ADDRESS> GetExportedAddresses(bool funcsOnly = true) { return std::vector<ADDRESS>(); }
00219 
00220 // Relocation table functions
00221 //virtual bool  IsAddressRelocatable(ADDRESS uNative);
00222 //virtual ADDRESS GetRelocatedAddress(ADDRESS uNative);
00223 //virtual   ADDRESS ApplyRelocation(ADDRESS uNative, ADDRESS uWord);
00224         // Get symbol associated with relocation at address, if any
00225 //virtual const char* GetRelocSym(ADDRESS uNative, ADDRESS *a = NULL, unsigned int *sz = NULL) { return NULL; }
00226 virtual bool IsRelocationAt(ADDRESS uNative) { return false; }
00227 
00228         // Specific to BinaryFile objects that implement a "global pointer"
00229         // Gets a pair of unsigned integers representing the address of the
00230         // abstract global pointer (%agp) (in first) and a constant that will
00231         // be available in the csrparser as GLOBALOFFSET (second). At present,
00232         // the latter is only used by the Palm machine, to represent the space
00233         // allocated below the %a5 register (i.e. the difference between %a5 and
00234         // %agp). This value could possibly be used for other purposes.
00235 virtual std::pair<unsigned,unsigned> GetGlobalPointerInfo();
00236 
00237         // Get a map from ADDRESS to const char*. This map contains the native addresses and symbolic names of global
00238         // data items (if any) which are shared with dynamically linked libraries. Example: __iob (basis for stdout).
00239         // The ADDRESS is the native address of a pointer to the real dynamic data object.
00240 virtual std::map<ADDRESS, const char*>* GetDynamicGlobalMap();
00241 
00242 //
00243 //  --  --  --  --  --  --  --  --  --  --  --
00244 //
00245 
00246 // Internal information
00247         // Dump headers, etc
00248 virtual bool    DisplayDetails(const char* fileName, FILE* f = stdout);
00249 
00250         // Analysis functions
00251 virtual bool        IsDynamicLinkedProc(ADDRESS uNative);
00252 virtual bool        IsStaticLinkedLibProc(ADDRESS uNative);
00253 virtual bool        IsDynamicLinkedProcPointer(ADDRESS uNative);
00254 virtual ADDRESS     IsJumpToAnotherAddr(ADDRESS uNative);
00255 virtual const char* GetDynamicProcName(ADDRESS uNative);
00256 virtual std::list<SectionInfo*>& GetEntryPoints(const char* pEntry = "main") = 0;
00257 virtual ADDRESS     GetMainEntryPoint() = 0;
00258 
00259         /*
00260          * Return the "real" entry point, ie where execution of the program begins
00261          */
00262 virtual ADDRESS GetEntryPoint() = 0; 
00263         // Find section index given name, or -1 if not found
00264         int         GetSectionIndexByName(const char* sName);
00265 
00266 
00267 virtual bool    RealLoad(const char* sName) = 0;
00268 
00269 virtual std::map<ADDRESS, std::string> &getFuncSymbols() { return *new std::map<ADDRESS, std::string>(); }
00270 
00271 virtual std::map<ADDRESS, std::string> &getSymbols() { return *new std::map<ADDRESS, std::string>(); }
00272 
00273 virtual std::map<std::string, ObjcModule> &getObjcModules() { return *new std::map<std::string, ObjcModule>(); }
00274 
00275         ADDRESS     getLimitTextLow() { return limitTextLow; }
00276         ADDRESS     getLimitTextHigh() { return limitTextHigh; }
00277 
00278         int         getTextDelta() { return textDelta; }
00279 
00280 virtual bool        hasDebugInfo() { return false; }
00281 
00282 //
00283 //  --  --  --  --  --  --  --  --  --  --  --
00284 //
00285 
00286 protected:
00287         // Special load function for archive members
00288 virtual bool        PostLoad(void* handle) = 0;     // Called after loading archive member
00289 
00290         // Get the lower and upper limits of the text segment
00291         void        getTextLimits();
00292 
00293         // Data
00294         bool        m_bArchive;                 // True if archive member
00295         int         m_iNumSections;             // Number of sections
00296         PSectionInfo m_pSections;               // The section info
00297         ADDRESS     m_uInitPC;                  // Initial program counter
00298         ADDRESS     m_uInitSP;                  // Initial stack pointer
00299 
00300         // Public addresses being the lowest used native address (inclusive), and
00301         // the highest used address (not inclusive) in the text segment
00302         ADDRESS     limitTextLow;
00303         ADDRESS     limitTextHigh;
00304         // Also the difference between the host and native addresses (host - native)
00305         // At this stage, we are assuming that the difference is the same for all
00306         // text sections of the BinaryFile image
00307         int         textDelta;
00308 
00309 };
00310 
00311 #endif      // #ifndef __BINARYFILE_H__

Generated on Tue Sep 19 21:18:14 2006 for Boomerang by  doxygen 1.4.6