MachOBinaryFile.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2000, The University of Queensland
00003  * Copyright (C) 2001, Sun Microsystems, Inc
00004  * Copyright (C) 2002, Trent Waddington
00005  *
00006  * See the file "LICENSE.TERMS" for information on usage and
00007  * redistribution of this file, and for a DISCLAIMER OF ALL
00008  * WARRANTIES.
00009  *
00010  */
00011 
00012 /* File: MachOBinaryFile.cc
00013  * $Revision: 1.7 $
00014  * Desc: This file contains the implementation of the class MachOBinaryFile.
00015  */
00016 
00017 /* MachO binary file format.
00018  *  This file implements the class MachOBinaryFile, derived from class
00019  *  BinaryFile. See MachOBinaryFile.h and BinaryFile.h for details.
00020  * 13 Jan 05 - Trent: Created.
00021  */
00022 
00023 #if defined(_MSC_VER) && _MSC_VER <= 1200
00024 #pragma warning(disable:4786)
00025 #endif
00026 
00027 #include "BinaryFile.h"
00028 #include "MachOBinaryFile.h"
00029 #include "config.h"
00030 #include <iostream>
00031 #include <sstream>
00032 #include <assert.h>
00033 
00034 #include "nlist.h"
00035 #include "macho-apple.h"
00036 
00037 #include "objc/objc-class.h"
00038 #include <stdarg.h>                 // For va_list for MinGW at least
00039 #include "objc/objc-runtime.h"
00040 
00041 //#define DEBUG_MACHO_LOADER
00042 //#define DEBUG_MACHO_LOADER_OBJC
00043 
00044 MachOBinaryFile::MachOBinaryFile() : m_pFileName(0)
00045 { }
00046 
00047 MachOBinaryFile::~MachOBinaryFile()
00048 {
00049     for (int i=0; i < m_iNumSections; i++) {
00050         if (m_pSections[i].pSectionName)
00051             delete [] m_pSections[i].pSectionName;
00052     }
00053     if (m_pSections) delete [] m_pSections;
00054 }
00055 
00056 bool MachOBinaryFile::Open(const char* sName) {
00057     //return Load(sName) != 0;
00058     return false;
00059 }
00060 
00061 void MachOBinaryFile::Close() {
00062     UnLoad();
00063 }
00064 
00065 std::list<SectionInfo*>& MachOBinaryFile::GetEntryPoints(
00066     const char* pEntry)
00067 {
00068     fprintf(stderr,"really don't know how to implement GetEntryPoints\n");
00069     exit(0);
00070     static std::list<SectionInfo*> l;
00071     return l;
00072 }
00073 
00074 ADDRESS MachOBinaryFile::GetEntryPoint()
00075 {
00076     return entrypoint;
00077 }
00078 
00079 ADDRESS MachOBinaryFile::GetMainEntryPoint() {
00080     ADDRESS aMain = GetAddressByName ("main", true);
00081     if (aMain != NO_ADDRESS)
00082         return aMain;
00083     aMain = GetAddressByName ("_main", true);
00084     if (aMain != NO_ADDRESS)
00085         return aMain;
00086 
00087     return NO_ADDRESS;
00088 }
00089 
00090 
00091 bool MachOBinaryFile::RealLoad(const char* sName)
00092 {
00093     m_pFileName = sName;
00094     FILE *fp = fopen(sName,"rb");
00095 
00096     header = new struct mach_header;
00097     fread(header, sizeof(*header), 1, fp);
00098 
00099     if (BMMH(header->magic) != MH_MAGIC) {
00100         fclose(fp);
00101         fprintf(stderr,"error loading file %s, bad Mach-O magic\n", sName);
00102         return false;
00103     }
00104 
00105     std::vector<struct segment_command> segments;
00106     std::vector<struct nlist> symbols;
00107     unsigned startlocal, nlocal, startdef, ndef, startundef, nundef;
00108     std::vector<struct section> stubs_sects;
00109     char *strtbl = NULL;
00110     unsigned *indirectsymtbl = NULL;
00111     ADDRESS objc_symbols = NO_ADDRESS, objc_modules = NO_ADDRESS, objc_strings = NO_ADDRESS, objc_refs = NO_ADDRESS;
00112     unsigned objc_modules_size = 0;
00113 
00114     fseek(fp, sizeof(*header), SEEK_SET);
00115     for (unsigned i = 0; i < BMMH(header->ncmds); i++) {
00116         struct load_command cmd;
00117         long pos = ftell(fp);
00118         fread(&cmd, 1, sizeof(struct load_command), fp);
00119 
00120         fseek(fp, pos, SEEK_SET);
00121         switch(BMMH(cmd.cmd)) {
00122             case LC_SEGMENT:
00123                 {
00124                     struct segment_command seg;
00125                     fread(&seg, 1, sizeof(seg), fp);
00126                     segments.push_back(seg);
00127 #ifdef DEBUG_MACHO_LOADER
00128                     fprintf(stdout, "seg addr %x size %i fileoff %x filesize %i flags %x\n", BMMH(seg.vmaddr), BMMH(seg.vmsize), BMMH(seg.fileoff), BMMH(seg.filesize), BMMH(seg.flags));
00129 #endif
00130                     for (unsigned n = 0; n < BMMH(seg.nsects); n++) {
00131                         struct section sect;
00132                         fread(&sect, 1, sizeof(sect), fp);
00133 #ifdef DEBUG_MACHO_LOADER
00134                         fprintf(stdout, "    sectname %s segname %s addr %x size %i flags %x\n", sect.sectname, sect.segname, BMMH(sect.addr), BMMH(sect.size), BMMH(sect.flags));
00135 #endif
00136                         if ((BMMH(sect.flags) & SECTION_TYPE) == S_SYMBOL_STUBS) {
00137                             stubs_sects.push_back(sect);
00138 #ifdef DEBUG_MACHO_LOADER
00139                             fprintf(stdout, "        symbol stubs section, start index %i, stub size %i\n", BMMH(sect.reserved1), BMMH(sect.reserved2));
00140 #endif
00141                         }
00142                         if (!strcmp(sect.sectname, SECT_OBJC_SYMBOLS)) {
00143                             assert(objc_symbols == NO_ADDRESS);
00144                             objc_symbols = BMMH(sect.addr);
00145                         }
00146                         if (!strcmp(sect.sectname, SECT_OBJC_MODULES)) {
00147                             assert(objc_modules == NO_ADDRESS);
00148                             objc_modules = BMMH(sect.addr);
00149                             objc_modules_size = BMMH(sect.size);
00150                         }
00151                         if (!strcmp(sect.sectname, SECT_OBJC_STRINGS)) {
00152                             assert(objc_strings == NO_ADDRESS);
00153                             objc_strings = BMMH(sect.addr);
00154                         }
00155                         if (!strcmp(sect.sectname, SECT_OBJC_REFS)) {
00156                             assert(objc_refs == NO_ADDRESS);
00157                             objc_refs = BMMH(sect.addr);
00158                         }
00159                     }
00160                 }
00161                 break;
00162             case LC_SYMTAB:
00163                 {
00164                     struct symtab_command syms;
00165                     fread(&syms, 1, sizeof(syms), fp);
00166                     fseek(fp, BMMH(syms.stroff), SEEK_SET);
00167                     strtbl = new char[BMMH(syms.strsize)];
00168                     fread(strtbl, 1, BMMH(syms.strsize), fp);
00169                     fseek(fp, BMMH(syms.symoff), SEEK_SET);
00170                     for (unsigned n = 0; n < BMMH(syms.nsyms); n++) {
00171                         struct nlist sym;
00172                         fread(&sym, 1, sizeof(sym), fp);
00173                         symbols.push_back(sym);
00174 #ifdef DEBUG_MACHO_LOADER
00175                         //fprintf(stdout, "got sym %s flags %x value %x\n", strtbl + BMMH(sym.n_un.n_strx), sym.n_type, BMMH(sym.n_value));
00176 #endif
00177                     }
00178 #ifdef DEBUG_MACHO_LOADER
00179                     fprintf(stdout, "symtab contains %i symbols\n", BMMH(syms.nsyms));
00180 #endif
00181                 }
00182                 break;
00183             case LC_DYSYMTAB:
00184                 {
00185                     struct dysymtab_command syms;
00186                     fread(&syms, 1, sizeof(syms), fp);
00187 #ifdef DEBUG_MACHO_LOADER
00188                     fprintf(stdout, "dysymtab local %i %i defext %i %i undef %i %i\n", 
00189                         BMMH(syms.ilocalsym), BMMH(syms.nlocalsym), 
00190                         BMMH(syms.iextdefsym), BMMH(syms.nextdefsym),
00191                         BMMH(syms.iundefsym), BMMH(syms.nundefsym));
00192 #endif
00193                     startlocal = BMMH(syms.ilocalsym);
00194                     nlocal = BMMH(syms.nlocalsym);
00195                     startdef = BMMH(syms.iextdefsym);
00196                     ndef = BMMH(syms.nextdefsym);
00197                     startundef = BMMH(syms.iundefsym);
00198                     nundef = BMMH(syms.nundefsym);
00199 
00200 #ifdef DEBUG_MACHO_LOADER
00201                     fprintf(stdout, "dysymtab has %i indirect symbols: ", BMMH(syms.nindirectsyms));
00202 #endif
00203                     indirectsymtbl = new unsigned[BMMH(syms.nindirectsyms)];
00204                     fseek(fp, BMMH(syms.indirectsymoff), SEEK_SET);
00205                     fread(indirectsymtbl, 1, BMMH(syms.nindirectsyms)*sizeof(unsigned), fp);
00206 #ifdef DEBUG_MACHO_LOADER
00207                     for (unsigned j = 0; j < BMMH(syms.nindirectsyms); j++) {
00208                         fprintf(stdout, "%i ", BMMH(indirectsymtbl[j]));
00209                     }
00210                     fprintf(stdout, "\n");
00211 #endif
00212                 }
00213                 break;
00214             default:
00215 #ifdef DEBUG_MACHO_LOADER
00216                 fprintf(stderr, "not handled load command %x\n", BMMH(cmd.cmd));
00217 #endif
00218                 // yep, there's lots of em
00219                 break;
00220         }
00221 
00222         fseek(fp, pos + BMMH(cmd.cmdsize), SEEK_SET);
00223     }
00224 
00225     struct segment_command *lowest = &segments[0], *highest = &segments[0];
00226     for (unsigned i = 1; i < segments.size(); i++) {
00227         if (BMMH(segments[i].vmaddr) < BMMH(lowest->vmaddr))
00228             lowest = &segments[i];
00229         if (BMMH(segments[i].vmaddr) > BMMH(highest->vmaddr))
00230             highest = &segments[i];
00231     }
00232 
00233     loaded_addr = BMMH(lowest->vmaddr);
00234     loaded_size = BMMH(highest->vmaddr) - BMMH(lowest->vmaddr) + BMMH(highest->vmsize);        
00235 
00236     base = (char *)malloc(loaded_size);
00237 
00238     if (!base) {
00239         fclose(fp);
00240         fprintf(stderr,"Cannot allocate memory for copy of image\n");
00241         return false;
00242     }
00243 
00244     m_iNumSections = segments.size();
00245     m_pSections = new SectionInfo[m_iNumSections];
00246 
00247     for (unsigned i = 0; i < segments.size(); i++) {
00248         fseek(fp, BMMH(segments[i].fileoff), SEEK_SET);
00249         ADDRESS a = BMMH(segments[i].vmaddr);
00250         unsigned sz = BMMH(segments[i].vmsize);
00251         unsigned fsz = BMMH(segments[i].filesize);
00252         memset(base + a - loaded_addr, 0, sz);
00253         fread(base + a - loaded_addr, 1, fsz, fp);
00254 #ifdef DEBUG_MACHO_LOADER
00255         fprintf(stderr, "loaded segment %x %i in mem %i in file\n", a, sz, fsz);
00256 #endif
00257 
00258         m_pSections[i].pSectionName = new char[17];
00259         strncpy(m_pSections[i].pSectionName, segments[i].segname, 16);
00260         m_pSections[i].pSectionName[16] = 0;
00261         m_pSections[i].uNativeAddr = BMMH(segments[i].vmaddr);
00262         m_pSections[i].uHostAddr = (ADDRESS)base + BMMH(segments[i].vmaddr) - loaded_addr;
00263         m_pSections[i].uSectionSize = BMMH(segments[i].vmsize);
00264 
00265         unsigned long l = BMMH(segments[i].initprot);
00266         m_pSections[i].bBss     = false; // TODO
00267         m_pSections[i].bCode        = l&VM_PROT_EXECUTE?1:0;
00268         m_pSections[i].bData        = l&VM_PROT_READ?1:0;
00269         m_pSections[i].bReadOnly    = ~(l&VM_PROT_WRITE)?0:1;
00270     }
00271 
00272     // process stubs_sects
00273     for (unsigned j = 0; j < stubs_sects.size(); j++) {
00274         for (unsigned i = 0; i < BMMH(stubs_sects[j].size) / BMMH(stubs_sects[j].reserved2); i++) {
00275             unsigned startidx = BMMH(stubs_sects[j].reserved1);
00276             unsigned symbol = BMMH(indirectsymtbl[startidx + i]);
00277             ADDRESS addr = BMMH(stubs_sects[j].addr) + i * BMMH(stubs_sects[j].reserved2);
00278 #ifdef DEBUG_MACHO_LOADER
00279             fprintf(stdout, "stub for %s at %x\n", strtbl + BMMH(symbols[symbol].n_un.n_strx), addr);
00280 #endif
00281             char *name = strtbl + BMMH(symbols[symbol].n_un.n_strx);
00282             if (*name == '_')  // we want printf not _printf
00283                 name++;
00284             m_SymA[addr] = name;
00285             dlprocs[addr] = name;
00286         }
00287     }
00288 
00289     // process the remaining symbols
00290     for (unsigned i = 0; i < symbols.size(); i++) {
00291         char *name = strtbl + BMMH(symbols[i].n_un.n_strx);
00292         if (BMMH(symbols[i].n_un.n_strx) != 0 && BMMH(symbols[i].n_value) != 0 && *name != 0) {
00293             
00294 #ifdef DEBUG_MACHO_LOADER
00295             fprintf(stdout, "symbol %s at %x type %x\n", name, 
00296                                                     BMMH(symbols[i].n_value), 
00297                                                     BMMH(symbols[i].n_type) & N_TYPE);
00298 #endif
00299             if (*name == '_')  // we want main not _main
00300                 name++;
00301             m_SymA[BMMH(symbols[i].n_value)] = name;
00302         }
00303     }
00304 
00305     // process objective-c section
00306     if (objc_modules != NO_ADDRESS) {
00307 #ifdef DEBUG_MACHO_LOADER_OBJC
00308         fprintf(stdout, "processing objective-c section\n");
00309 #endif
00310         for (unsigned i = 0; i < objc_modules_size; ) {
00311             struct objc_module *module = (struct objc_module *)((ADDRESS)base + objc_modules - loaded_addr + i);
00312             char *name = (char *)((ADDRESS)base + BMMH(module->name) - loaded_addr);
00313             Symtab symtab = (Symtab)((ADDRESS)base + BMMH(module->symtab) - loaded_addr);
00314 #ifdef DEBUG_MACHO_LOADER_OBJC
00315             fprintf(stdout, "module %s (%i classes)\n", name, BMMHW(symtab->cls_def_cnt));
00316 #endif
00317             ObjcModule *m = &modules[name];
00318             m->name = name;
00319             for (unsigned j = 0; j < BMMHW(symtab->cls_def_cnt); j++) {
00320                 struct objc_class *def = (struct objc_class *)((ADDRESS)base + BMMH(symtab->defs[j]) - loaded_addr);
00321                 char *name = (char *)((ADDRESS)base + BMMH(def->name) - loaded_addr);
00322 #ifdef DEBUG_MACHO_LOADER_OBJC
00323                 fprintf(stdout, "  class %s\n", name);
00324 #endif
00325                 ObjcClass *cl = &m->classes[name];
00326                 cl->name = name;
00327                 struct objc_ivar_list *ivars = (struct objc_ivar_list *)((ADDRESS)base + BMMH(def->ivars) - loaded_addr);
00328                 for (unsigned k = 0; k < BMMH(ivars->ivar_count); k++) {
00329                     struct objc_ivar *ivar = &ivars->ivar_list[k];
00330                     char *name = (char*)((ADDRESS)base + BMMH(ivar->ivar_name) - loaded_addr);
00331                     char *types = (char*)((ADDRESS)base + BMMH(ivar->ivar_type) - loaded_addr);
00332 #ifdef DEBUG_MACHO_LOADER_OBJC
00333                     fprintf(stdout, "    ivar %s %s %x\n", name, types, BMMH(ivar->ivar_offset));
00334 #endif
00335                     ObjcIvar *iv = &cl->ivars[name];
00336                     iv->name = name;
00337                     iv->type = types;
00338                     iv->offset = BMMH(ivar->ivar_offset);
00339                 }
00340                 // this is weird, why is it defined as a ** in the struct but used as a * in otool?
00341                 struct objc_method_list *methods = (struct objc_method_list *)((ADDRESS)base + BMMH(def->methodLists) - loaded_addr);
00342                 for (unsigned k = 0; k < BMMH(methods->method_count); k++) {
00343                     struct objc_method *method = &methods->method_list[k];
00344                     char *name = (char*)((ADDRESS)base + BMMH(method->method_name) - loaded_addr);
00345                     char *types = (char*)((ADDRESS)base + BMMH(method->method_types) - loaded_addr);
00346 #ifdef DEBUG_MACHO_LOADER_OBJC
00347                     fprintf(stdout, "    method %s %s %x\n", name, types, BMMH(method->method_imp));
00348 #endif
00349                     ObjcMethod *me = &cl->methods[name];
00350                     me->name = name;
00351                     me->types = types;
00352                     me->addr = BMMH(method->method_imp);
00353                 }
00354             }
00355             i += BMMH(module->size);
00356         }
00357     }
00358 
00359     // Give the entry point a symbol
00360     // ADDRESS entry = GetMainEntryPoint();
00361     entrypoint = GetMainEntryPoint();
00362 
00363     fclose(fp);
00364     return true;
00365 }
00366 
00367 // Clean up and unload the binary image
00368 void MachOBinaryFile::UnLoad()
00369 {
00370 } 
00371 
00372 bool MachOBinaryFile::PostLoad(void* handle)
00373 {
00374     return false;
00375 }
00376 
00377 const char* MachOBinaryFile::SymbolByAddress(ADDRESS dwAddr) {
00378     std::map<ADDRESS, std::string>::iterator it = m_SymA.find(dwAddr);
00379     if (it == m_SymA.end())
00380             return 0;
00381     return (char*) it->second.c_str();
00382 }
00383 
00384 ADDRESS MachOBinaryFile::GetAddressByName(const char* pName,
00385     bool bNoTypeOK /* = false */) {
00386     // This is "looking up the wrong way" and hopefully is uncommon
00387     // Use linear search
00388     std::map<ADDRESS, std::string>::iterator it = m_SymA.begin();
00389     while (it != m_SymA.end()) {
00390         // std::cerr << "Symbol: " << it->second.c_str() << " at 0x" << std::hex << it->first << "\n";
00391         if (strcmp(it->second.c_str(), pName) == 0)
00392             return it->first;
00393         it++;
00394     }
00395     return NO_ADDRESS;
00396 }
00397 
00398 void MachOBinaryFile::AddSymbol(ADDRESS uNative, const char *pName)
00399 {
00400     m_SymA[uNative] = pName;
00401 }
00402 
00403 bool MachOBinaryFile::DisplayDetails(const char* fileName, FILE* f
00404      /* = stdout */)
00405 {
00406     return false;
00407 }
00408 
00409 int MachOBinaryFile::machORead2(short* ps) const {
00410     unsigned char* p = (unsigned char*)ps;
00411     // Big endian
00412     int n = (int)(p[1] + (p[0] << 8));
00413     return n;
00414 }
00415 
00416 int MachOBinaryFile::machORead4(int* pi) const{
00417     short* p = (short*)pi;
00418     int n1 = machORead2(p);
00419     int n2 = machORead2(p+1);
00420     int n = (int) (n2 | (n1 << 16));
00421     return n;
00422 }
00423 
00424 // Read 2 bytes from given native address
00425 int MachOBinaryFile::readNative1(ADDRESS nat) {
00426     PSectionInfo si = GetSectionInfoByAddr(nat);
00427     if (si == 0) 
00428         si = GetSectionInfo(0);
00429     ADDRESS host = si->uHostAddr - si->uNativeAddr + nat;
00430     return *(char*)host;
00431 }
00432 
00433 // Read 2 bytes from given native address
00434 int MachOBinaryFile::readNative2(ADDRESS nat) {
00435     PSectionInfo si = GetSectionInfoByAddr(nat);
00436     if (si == 0) return 0;
00437     ADDRESS host = si->uHostAddr - si->uNativeAddr + nat;
00438     int n = machORead2((short*)host);
00439     return n;
00440 }
00441 
00442 // Read 4 bytes from given native address
00443 int MachOBinaryFile::readNative4(ADDRESS nat) {
00444     PSectionInfo si = GetSectionInfoByAddr(nat);
00445     if (si == 0) return 0;
00446     ADDRESS host = si->uHostAddr - si->uNativeAddr + nat;
00447     int n = machORead4((int*)host);
00448     return n;
00449 }
00450 
00451 // Read 8 bytes from given native address
00452 QWord MachOBinaryFile::readNative8(ADDRESS nat) {
00453     int raw[2];
00454 #ifdef WORDS_BIGENDIAN      // This tests the host machine
00455     // Source and host are different endianness
00456     raw[1] = readNative4(nat);
00457     raw[0] = readNative4(nat+4);
00458 #else
00459     // Source and host are same endianness
00460     raw[0] = readNative4(nat);
00461     raw[1] = readNative4(nat+4);
00462 #endif
00463     return *(QWord*)raw;
00464 }
00465 
00466 // Read 4 bytes as a float
00467 float MachOBinaryFile::readNativeFloat4(ADDRESS nat) {
00468     int raw = readNative4(nat);
00469     // Ugh! gcc says that reinterpreting from int to float is invalid!!
00470     //return reinterpret_cast<float>(raw);      // Note: cast, not convert!!
00471     return *(float*)&raw;                       // Note: cast, not convert
00472 }
00473 
00474 // Read 8 bytes as a float
00475 double MachOBinaryFile::readNativeFloat8(ADDRESS nat) {
00476     int raw[2];
00477 #ifdef WORDS_BIGENDIAN      // This tests the host machine
00478     // Source and host are different endianness
00479     raw[1] = readNative4(nat);
00480     raw[0] = readNative4(nat+4);
00481 #else
00482     // Source and host are same endianness
00483     raw[0] = readNative4(nat);
00484     raw[1] = readNative4(nat+4);
00485 #endif
00486     //return reinterpret_cast<double>(*raw);    // Note: cast, not convert!!
00487     return *(double*)raw;
00488 }
00489 
00490 const char *MachOBinaryFile::GetDynamicProcName(ADDRESS uNative)
00491 {
00492     return dlprocs[uNative].c_str();
00493 }
00494 
00495 LOAD_FMT MachOBinaryFile::GetFormat() const
00496 {
00497     return LOADFMT_MACHO;
00498 }
00499 
00500 MACHINE MachOBinaryFile::GetMachine() const
00501 {
00502     return MACHINE_PPC;
00503 }
00504 
00505 bool MachOBinaryFile::isLibrary() const
00506 {
00507     return false;
00508 }
00509 
00510 ADDRESS MachOBinaryFile::getImageBase()
00511 {
00512     return loaded_addr;
00513 }
00514 
00515 size_t MachOBinaryFile::getImageSize()
00516 {
00517     return loaded_size;
00518 }
00519 
00520 std::list<const char *> MachOBinaryFile::getDependencyList()
00521 {
00522     return std::list<const char *>(); /* FIXME */
00523 }
00524 
00525 DWord MachOBinaryFile::getDelta()
00526 {
00527     // Stupid function anyway: delta depends on section
00528     // This should work for the header only
00529     //  return (DWord)base - LMMH(m_pPEHeader->Imagebase); 
00530     return (DWord)base - (DWord)loaded_addr; 
00531 }
00532 
00533 // This function is called via dlopen/dlsym; it returns a new BinaryFile
00534 // derived concrete object. After this object is returned, the virtual function
00535 // call mechanism will call the rest of the code in this library
00536 // It needs to be C linkage so that it its name is not mangled
00537 extern "C" {
00538 #ifdef _WIN32
00539     __declspec(dllexport)
00540 #endif
00541     BinaryFile* construct()
00542     {
00543         return new MachOBinaryFile;
00544     }    
00545 }

Generated on Tue Sep 19 21:18:25 2006 for Boomerang by  doxygen 1.4.6