ElfBinaryFile.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 1997-2001, The University of Queensland
00003  *
00004  * See the file "LICENSE.TERMS" for information on usage and
00005  * redistribution of this file, and for a DISCLAIMER OF ALL
00006  * WARRANTIES.
00007  *
00008  */
00009 
00010 /*******************************************************************************
00011  * File: ElfBinaryFile.cc
00012  * Desc: This file contains the implementation of the class ElfBinaryFile.
00013  ******************************************************************************/
00014 
00015 /*
00016  * $Revision: 1.51 $
00017  *
00018  * ELF binary file format.
00019  *  This file implements the class ElfBinaryFile, derived from class BinaryFile.
00020  * See ElfBinaryFile.h and BinaryFile.h for details
00021  *  MVE 30/9/97
00022  * 10 Mar 02 - Mike: Mods for stand alone operation; constuct function
00023  * 21 May 02 - Mike: Slight mod for gcc 3.1
00024  * 01 Oct 02 - Mike: Removed elf library (and include file) dependencies
00025  * 02 Oct 02 - Mike: Fixed some more endianness issues
00026  * 24 Mar 03 - Mike: GetAddressByName returns NO_ADDRESS on failure now
00027  * 12 Jul 05 - Mike: fixed an endless loop in findRelPltOffset for pre-3.3.3 gcc compiled input files
00028 */
00029 
00030 /*==============================================================================
00031  * Dependencies.
00032  *============================================================================*/
00033 
00034 #include "ElfBinaryFile.h"
00035 #include <sys/types.h>      // Next three for open()
00036 #include <sys/stat.h>
00037 #include <fcntl.h>
00038 #include <iostream>
00039 #include <assert.h>
00040 #include "config.h"
00041 #if defined(_MSC_VER) && _MSC_VER >= 1400
00042 #pragma warning(disable:4996)       // Warnings about e.g. _strdup deprecated in VS 2005
00043 #endif
00044 
00045 typedef std::map<std::string, int, std::less<std::string> >     StrIntMap;
00046 
00047 ElfBinaryFile::ElfBinaryFile(bool bArchive /* = false */)
00048     : BinaryFile(bArchive), // Initialise base class
00049       next_extern(0)
00050 {
00051     m_fd = 0;
00052     m_pFileName = 0;
00053     Init();                 // Initialise all the common stuff
00054 }
00055 
00056 ElfBinaryFile::~ElfBinaryFile()
00057 {
00058     if (m_pImportStubs)
00059         // Delete the array of import stubs
00060         delete [] m_pImportStubs;
00061 }
00062 
00063 // Reset internal state, except for those that keep track of which member
00064 // we're up to
00065 void ElfBinaryFile::Init()
00066 {
00067     m_pImage = 0;
00068     m_pPhdrs = 0;           // No program headers
00069     m_pShdrs = 0;           // No section headers
00070     m_pStrings = 0;         // No strings
00071     m_pReloc = 0;
00072     m_pSym = 0;
00073     m_uPltMin = 0;          // No PLT limits
00074     m_uPltMax = 0;
00075     m_iLastSize = 0;
00076     m_pImportStubs = 0;
00077 }
00078 
00079 // Hand decompiled from sparc library function
00080 extern "C" {        // So we can call this with dlopen()
00081 unsigned elf_hash(const char* o0) {
00082     int o3 = *o0;
00083     const char* g1 = o0;
00084     unsigned o4 = 0;
00085     while (o3 != 0) {
00086         o4 <<= 4;
00087         o3 += o4;
00088         g1++;
00089         o4 = o3 & 0xf0000000;
00090         if (o4 != 0) {
00091             int o2 = (int)((unsigned)o4 >> 24);
00092             o3 = o3 ^ o2;
00093         }
00094         o4 = o3 & ~o4;
00095         o3 = *g1;
00096     }
00097     return o4;
00098 }
00099 }   // extern "C"
00100 
00101 // Return true for a good load
00102 bool ElfBinaryFile::RealLoad(const char* sName)
00103 {
00104     int i;
00105 
00106     if (m_bArchive) {
00107         // This is a member of an archive. Should not be using this function at all
00108         return false;
00109     }
00110 
00111     m_pFileName = sName;
00112     m_fd = fopen (sName, "rb");
00113     if (m_fd == NULL) return 0;
00114 
00115     // Determine file size
00116     if (fseek(m_fd, 0, SEEK_END)) {
00117         fprintf(stderr, "Error seeking to end of binary file\n");
00118         return false;
00119     }
00120     m_lImageSize = ftell(m_fd);
00121 
00122     // Allocate memory to hold the file
00123     m_pImage = new char[m_lImageSize];
00124     if (m_pImage == 0) {
00125         fprintf(stderr, "Could not allocate %ld bytes for program image\n", m_lImageSize);
00126         return false;
00127     }
00128     Elf32_Ehdr* pHeader = (Elf32_Ehdr*)m_pImage;    // Save a lot of casts
00129 
00130     // Read the whole file in
00131     fseek(m_fd, 0, SEEK_SET);
00132     size_t size = fread(m_pImage, 1, m_lImageSize, m_fd);
00133     if (size != (size_t)m_lImageSize)
00134         fprintf(stderr, "WARNING! Only read %ud of %ld bytes of binary file!\n", size, m_lImageSize);
00135 
00136     // Basic checks
00137     if (strncmp(m_pImage, "\x7F""ELF", 4) != 0) {
00138         fprintf(stderr, "Incorrect header: %02X %02X %02X %02X\n",
00139           pHeader->e_ident[0], pHeader->e_ident[1], pHeader->e_ident[2],
00140           pHeader->e_ident[3]);
00141         return 0;
00142     }
00143     if ((pHeader->endianness != 1) && (pHeader->endianness != 2)) {
00144         fprintf(stderr, "Unknown endianness %02X\n", pHeader->endianness);
00145         return 0;
00146     }
00147     // Needed for elfRead4 to work:
00148     m_elfEndianness = pHeader->endianness - 1;
00149 
00150     // Set up program header pointer (in case needed)
00151     i = elfRead4(&pHeader->e_phoff);
00152     if (i) m_pPhdrs = (Elf32_Phdr*)(m_pImage + i);
00153 
00154     // Set up section header pointer
00155     i = elfRead4(&pHeader->e_shoff);
00156     if (i) m_pShdrs = (Elf32_Shdr*)(m_pImage + i);
00157 
00158     // Set up section header string table pointer
00159     // NOTE: it does not appear that endianness affects shorts.. they are always in little endian format
00160     // Gerard: I disagree. I need the elfRead on linux/i386
00161     i = elfRead2(&pHeader->e_shstrndx); // pHeader->e_shstrndx;
00162     if (i) m_pStrings = m_pImage + elfRead4(&m_pShdrs[i].sh_offset);
00163 
00164     i = 1;              // counter - # sects. Start @ 1, total m_iNumSections
00165     char* pName;        // Section's name
00166 
00167     // Number of sections
00168     m_iNumSections = elfRead2(&pHeader->e_shnum);
00169 
00170     // Allocate room for all the Elf sections (including the silly first one)
00171     m_pSections = new SectionInfo[m_iNumSections];
00172     if (m_pSections == 0) return false;     // Failed!
00173 
00174     // Set up the m_sh_link and m_sh_info arrays
00175     m_sh_link = new int[m_iNumSections];
00176     m_sh_info = new int[m_iNumSections];
00177 
00178     // Number of elf sections
00179     bool bGotCode = false;                  // True when have seen a code sect
00180     ADDRESS arbitaryLoadAddr = 0x08000000;
00181     for (i=0; i < m_iNumSections; i++) {
00182         // Get section information.
00183         Elf32_Shdr* pShdr = m_pShdrs + i;
00184         if ((char*)pShdr > m_pImage + m_lImageSize) {
00185             std::cerr << "section " << i << " header is outside the image size\n";
00186             return false;
00187         }
00188         pName = m_pStrings + elfRead4(&pShdr->sh_name);
00189         if (pName > m_pImage + m_lImageSize) {
00190             std::cerr << "name for section " << i << " is outside the image size\n";
00191             return false;
00192         }
00193         m_pSections[i].pSectionName = pName;
00194         int off = elfRead4(&pShdr->sh_offset);
00195         if (off) m_pSections[i].uHostAddr = (ADDRESS)(m_pImage + off);
00196         m_pSections[i].uNativeAddr = elfRead4(&pShdr->sh_addr);
00197         m_pSections[i].uSectionSize = elfRead4(&pShdr->sh_size);
00198         if (m_pSections[i].uNativeAddr == 0 && strncmp(pName, ".rel", 4)) {
00199             int align = elfRead4(&pShdr->sh_addralign);
00200             if (align > 1) {
00201                 if (arbitaryLoadAddr % align)
00202                     arbitaryLoadAddr += align - (arbitaryLoadAddr % align);
00203             }
00204             m_pSections[i].uNativeAddr = arbitaryLoadAddr;
00205             arbitaryLoadAddr += m_pSections[i].uSectionSize;
00206         }
00207         m_pSections[i].uType = elfRead4(&pShdr->sh_type);
00208         m_sh_link[i] = elfRead4(&pShdr->sh_link);
00209         m_sh_info[i] = elfRead4(&pShdr->sh_info);
00210         m_pSections[i].uSectionEntrySize = elfRead4(&pShdr->sh_entsize);
00211         if (m_pSections[i].uNativeAddr + m_pSections[i].uSectionSize > next_extern)
00212             first_extern = next_extern = m_pSections[i].uNativeAddr + m_pSections[i].uSectionSize;
00213         if ((elfRead4(&pShdr->sh_flags) & SHF_WRITE) == 0)
00214             m_pSections[i].bReadOnly = true;
00215         // Can't use the SHF_ALLOC bit to determine bss section; the bss section has SHF_ALLOC but also SHT_NOBITS.
00216         // (But many other sections, such as .comment, also have SHT_NOBITS). So for now, just use the name
00217 //      if ((elfRead4(&pShdr->sh_flags) & SHF_ALLOC) == 0)
00218         if (strcmp(pName, ".bss") == 0)
00219             m_pSections[i].bBss = true;
00220         if (elfRead4(&pShdr->sh_flags) & SHF_EXECINSTR) {
00221             m_pSections[i].bCode = true;
00222             bGotCode = true;            // We've got to a code section
00223         }
00224         // Deciding what is data and what is not is actually quite tricky but important.
00225         // For example, it's crucial to flag the .exception_ranges section as data, otherwise there is a "hole" in the
00226         // allocation map, that means that there is more than one "delta" from a read-only section to a page, and in the
00227         // end using -C results in a file that looks OK but when run just says "Killed".
00228         // So we use the Elf designations; it seems that ALLOC.!EXEC -> data
00229         // But we don't want sections before the .text section, like .interp, .hash, etc etc. Hence bGotCode.
00230         // NOTE: this ASSUMES that sections appear in a sensible order in the input binary file:
00231         // junk, code, rodata, data, bss
00232         if (bGotCode && ((elfRead4(&pShdr->sh_flags) & (SHF_EXECINSTR | SHF_ALLOC)) == SHF_ALLOC) &&
00233                 (elfRead4(&pShdr->sh_type) != SHT_NOBITS))
00234             m_pSections[i].bData = true;
00235     }   // for each section
00236 
00237     // assign arbitary addresses to .rel.* sections too
00238     for (i=0; i < m_iNumSections; i++)
00239         if (m_pSections[i].uNativeAddr == 0 && !strncmp(m_pSections[i].pSectionName, ".rel", 4)) {
00240             m_pSections[i].uNativeAddr = arbitaryLoadAddr;
00241             arbitaryLoadAddr += m_pSections[i].uSectionSize;
00242         }
00243 
00244     // Add symbol info. Note that some symbols will be in the main table only, and others in the dynamic table only.
00245     // So the best idea is to add symbols for all sections of the appropriate type
00246     for (i=1; i < m_iNumSections; ++i) {
00247         unsigned uType = m_pSections[i].uType;
00248         if (uType == SHT_SYMTAB || uType == SHT_DYNSYM)
00249             AddSyms(i);
00250 #if 0   // Ick; bad logic. Done with fake library function pointers now (-2 .. -1024)
00251         if (uType == SHT_REL || uType == SHT_RELA)
00252             AddRelocsAsSyms(i);
00253 #endif
00254     }
00255 
00256     // Save the relocation to symbol table info
00257     PSectionInfo pRel = GetSectionInfoByName(".rela.text"); 
00258     if (pRel) {
00259         m_bAddend = true;               // Remember its a relA table
00260         m_pReloc = (Elf32_Rel*)pRel->uHostAddr;     // Save pointer to reloc table
00261         //SetRelocInfo(pRel);
00262     }
00263     else {
00264         m_bAddend = false;
00265         pRel = GetSectionInfoByName(".rel.text");
00266         if (pRel) {
00267             //SetRelocInfo(pRel);
00268             m_pReloc = (Elf32_Rel*)pRel->uHostAddr;     // Save pointer to reloc table
00269         }
00270     }
00271 
00272     // Find the PLT limits. Required for IsDynamicLinkedProc(), e.g.
00273     PSectionInfo pPlt = GetSectionInfoByName(".plt");
00274     if (pPlt) {
00275         m_uPltMin = pPlt->uNativeAddr;
00276         m_uPltMax = pPlt->uNativeAddr + pPlt->uSectionSize;
00277     }
00278 
00279     // Apply relocations; important when the input program is not compiled with -fPIC
00280     applyRelocations();
00281 
00282     return true;                        // Success
00283 }
00284 
00285 // Clean up and unload the binary image
00286 void ElfBinaryFile::UnLoad()
00287 {
00288     if (m_pImage) delete [] m_pImage;
00289     fclose (m_fd);
00290     Init();                     // Set all internal state to 0
00291 } 
00292 
00293 // Like a replacement for elf_strptr()
00294 char* ElfBinaryFile::GetStrPtr(int idx, int offset)
00295 {
00296     if (idx < 0)
00297     {
00298         // Most commonly, this will be an index of -1, because a call to GetSectionIndexByName() failed
00299         fprintf(stderr, "Error! GetStrPtr passed index of %d\n", idx);
00300         return "Error!";
00301     }
00302     // Get a pointer to the start of the string table
00303     char* pSym = (char*)m_pSections[idx].uHostAddr;
00304     // Just add the offset
00305     return pSym + offset;
00306 }
00307 
00308 // Search the .rel[a].plt section for an entry with symbol table index i.
00309 // If found, return the native address of the associated PLT entry.
00310 // A linear search will be needed. However, starting at offset i and searching backwards with wraparound should
00311 // typically minimise the number of entries to search
00312 ADDRESS ElfBinaryFile::findRelPltOffset(int i, ADDRESS addrRelPlt, int sizeRelPlt, int numRelPlt, ADDRESS addrPlt) {
00313     int first = i;
00314     if (first >= numRelPlt)
00315         first = numRelPlt-1;
00316     int curr = first;
00317     do {
00318         // Each entry is sizeRelPlt bytes, and will contain the offset, then the info (addend optionally follows)
00319         int* pEntry = (int*)(addrRelPlt + (curr*sizeRelPlt));
00320         int entry = elfRead4(pEntry+1);     // Read pEntry[1]
00321         int sym = entry >> 8;               // The symbol index is in the top 24 bits (Elf32 only)
00322         if (sym == i) {
00323             // Found! Now we want the native address of the associated PLT entry.
00324             // For now, assume a size of 0x10 for each PLT entry, and assume that each entry in the .rel.plt section
00325             // corresponds exactly to an entry in the .plt (except there is one dummy .plt entry)
00326             return addrPlt + 0x10 * (curr+1);
00327         }
00328         if (--curr < 0)
00329             curr = numRelPlt - 1;
00330     } while (curr != first);            // Will eventually wrap around to first if not present
00331     return 0;                           // Exit if this happens
00332 }
00333 
00334 // Add appropriate symbols to the symbol table.  secIndex is the section index of the symbol table.
00335 void ElfBinaryFile::AddSyms(int secIndex) {
00336     int e_type = elfRead2(&((Elf32_Ehdr*)m_pImage)->e_type);
00337     PSectionInfo pSect = &m_pSections[secIndex];
00338     // Calc number of symbols
00339     int nSyms = pSect->uSectionSize / pSect->uSectionEntrySize;
00340     m_pSym = (Elf32_Sym*) pSect->uHostAddr;         // Pointer to symbols
00341     int strIdx = m_sh_link[secIndex];               // sh_link points to the string table
00342 
00343     PSectionInfo siPlt = GetSectionInfoByName(".plt");
00344     ADDRESS addrPlt = siPlt ? siPlt->uNativeAddr : 0;
00345     PSectionInfo siRelPlt = GetSectionInfoByName(".rel.plt");
00346     int sizeRelPlt = 8;         // Size of each entry in the .rel.plt table
00347     if (siRelPlt == NULL) {
00348         siRelPlt = GetSectionInfoByName(".rela.plt");
00349         sizeRelPlt = 12;        // Size of each entry in the .rela.plt table is 12 bytes
00350     }
00351     ADDRESS addrRelPlt = 0;
00352     int numRelPlt = 0;
00353     if (siRelPlt) {
00354         addrRelPlt = siRelPlt->uHostAddr;
00355         numRelPlt = sizeRelPlt ? siRelPlt->uSectionSize / sizeRelPlt : 0;
00356     }
00357     // Number of entries in the PLT:
00358     // int max_i_for_hack = siPlt ? (int)siPlt->uSectionSize / 0x10 : 0;
00359     // Index 0 is a dummy entry
00360     for (int i = 1; i < nSyms; i++) {
00361         ADDRESS val = (ADDRESS) elfRead4((int*)&m_pSym[i].st_value);
00362         int name = elfRead4(&m_pSym[i].st_name);
00363         if (name == 0)  /* Silly symbols with no names */ continue;
00364         std::string str(GetStrPtr(strIdx, name));
00365         // Hack off the "@@GLIBC_2.0" of Linux, if present
00366         unsigned pos;
00367         if ((pos = str.find("@@")) != std::string::npos)
00368             str.erase(pos);
00369         std::map<ADDRESS, std::string>::iterator aa = m_SymTab.find(val);
00370         // Ensure no overwriting (except functions)
00371         if (aa == m_SymTab.end() || ELF32_ST_TYPE(m_pSym[i].st_info) == STT_FUNC) {
00372             if (val == 0 && siPlt) { //&& i < max_i_for_hack) {
00373                 // Special hack for gcc circa 3.3.3: (e.g. test/pentium/settest).  The value in the dynamic symbol table
00374                 // is zero!  I was assuming that index i in the dynamic symbol table would always correspond to index i
00375                 // in the .plt section, but for fedora2_true, this doesn't work. So we have to look in the .rel[a].plt
00376                 // section. Thanks, gcc!  Note that this hack can cause strange symbol names to appear
00377                 val = findRelPltOffset(i, addrRelPlt, sizeRelPlt, numRelPlt, addrPlt);
00378             } else if (e_type == E_REL) {
00379                 int nsec = elfRead2(&m_pSym[i].st_shndx);
00380                 if (nsec >= 0 && nsec < m_iNumSections)
00381                     val += GetSectionInfo(nsec)->uNativeAddr;
00382             }
00383 
00384 #define ECHO_SYMS 0
00385 #if     ECHO_SYMS
00386             std::cerr << "Elf AddSym: about to add " << str << " to address " << std::hex << val << std::dec << "\n";
00387 #endif
00388             m_SymTab[val] = str;
00389         }
00390     }
00391     ADDRESS uMain = GetMainEntryPoint();
00392     if (uMain != NO_ADDRESS && m_SymTab.find(uMain) == m_SymTab.end()) {
00393         // Ugh - main mustn't have the STT_FUNC attribute. Add it
00394         std::string sMain("main");
00395         m_SymTab[uMain] = sMain;
00396     }
00397     return;
00398 }
00399 
00400 std::vector<ADDRESS> ElfBinaryFile::GetExportedAddresses(bool funcsOnly)
00401 {
00402     std::vector<ADDRESS> exported;
00403 
00404     int i;
00405     int secIndex = 0;
00406     for (i=1; i < m_iNumSections; ++i) {
00407         unsigned uType = m_pSections[i].uType;
00408         if (uType == SHT_SYMTAB) {
00409             secIndex = i;
00410             break;
00411         }
00412     }
00413     if (secIndex == 0)
00414         return exported;
00415     
00416     int e_type = elfRead2(&((Elf32_Ehdr*)m_pImage)->e_type);
00417     PSectionInfo pSect = &m_pSections[secIndex];
00418     // Calc number of symbols
00419     int nSyms = pSect->uSectionSize / pSect->uSectionEntrySize;
00420     m_pSym = (Elf32_Sym*) pSect->uHostAddr;         // Pointer to symbols
00421     int strIdx = m_sh_link[secIndex];               // sh_link points to the string table
00422 
00423     // Index 0 is a dummy entry
00424     for (int i = 1; i < nSyms; i++) {
00425         ADDRESS val = (ADDRESS) elfRead4((int*)&m_pSym[i].st_value);
00426         int name = elfRead4(&m_pSym[i].st_name);
00427         if (name == 0)  /* Silly symbols with no names */ continue;
00428         std::string str(GetStrPtr(strIdx, name));
00429         // Hack off the "@@GLIBC_2.0" of Linux, if present
00430         unsigned pos;
00431         if ((pos = str.find("@@")) != std::string::npos)
00432             str.erase(pos);
00433         if (ELF32_ST_BIND(m_pSym[i].st_info) == STB_GLOBAL || ELF32_ST_BIND(m_pSym[i].st_info) == STB_WEAK) {
00434             if (funcsOnly == false || ELF32_ST_TYPE(m_pSym[i].st_info) == STT_FUNC) {
00435                 if (e_type == E_REL) {
00436                     int nsec = elfRead2(&m_pSym[i].st_shndx);
00437                     if (nsec >= 0 && nsec < m_iNumSections)
00438                         val += GetSectionInfo(nsec)->uNativeAddr;
00439                 }
00440                 exported.push_back(val);
00441             }
00442         }
00443     }
00444     return exported;
00445 
00446 }
00447 
00448 
00449 // FIXME: this function is way off the rails. It seems to always overwrite the relocation entry with the 32 bit value
00450 // from the symbol table. Totally invalid for SPARC, and most X86 relocations!
00451 // So currently not called
00452 void ElfBinaryFile::AddRelocsAsSyms(int relSecIdx) {
00453     PSectionInfo pSect = &m_pSections[relSecIdx];
00454     if (pSect == 0) return;
00455    // Calc number of relocations
00456     int nRelocs = pSect->uSectionSize / pSect->uSectionEntrySize;
00457     m_pReloc = (Elf32_Rel*) pSect->uHostAddr;       // Pointer to symbols
00458     int symSecIdx = m_sh_link[relSecIdx];
00459     int strSecIdx = m_sh_link[symSecIdx];
00460 
00461     // Index 0 is a dummy entry
00462     for (int i = 1; i < nRelocs; i++) {
00463         ADDRESS val = (ADDRESS) elfRead4((int*)&m_pReloc[i].r_offset);
00464         int symIndex = elfRead4(&m_pReloc[i].r_info) >> 8;
00465         int flags = elfRead4(&m_pReloc[i].r_info);
00466         if ((flags & 0xFF) == R_386_32) {
00467             // Lookup the value of the symbol table entry
00468             ADDRESS a = elfRead4((int*)&m_pSym[symIndex].st_value);
00469             if (m_pSym[symIndex].st_info & STT_SECTION)
00470                 a = GetSectionInfo(elfRead2(&m_pSym[symIndex].st_shndx))->uNativeAddr;
00471             // Overwrite the relocation value... ?
00472             writeNative4(val, a);
00473             continue;
00474         }
00475         if ((flags & R_386_PC32) == 0)
00476             continue;
00477         if (symIndex == 0)  /* Silly symbols with no names */ continue;
00478         std::string str(GetStrPtr(strSecIdx, elfRead4(&m_pSym[symIndex].st_name)));
00479         // Hack off the "@@GLIBC_2.0" of Linux, if present
00480         unsigned pos;
00481         if ((pos = str.find("@@")) != std::string::npos)
00482             str.erase(pos);
00483         std::map<ADDRESS, std::string>::iterator it;
00484         // Linear search!
00485         for (it = m_SymTab.begin(); it != m_SymTab.end(); it++)
00486             if ((*it).second == str)
00487                 break;
00488         // Add new extern
00489         if (it == m_SymTab.end()) {
00490             m_SymTab[next_extern] = str;
00491             it = m_SymTab.find(next_extern);
00492             next_extern += 4;
00493         }
00494         writeNative4(val, (*it).first - val - 4);
00495     }
00496     return;
00497 }
00498 
00499 // Note: this function overrides a simple "return 0" function in the base class (i.e. BinaryFile::SymbolByAddress())
00500 const char* ElfBinaryFile::SymbolByAddress(const ADDRESS dwAddr) {
00501     std::map<ADDRESS, std::string>::iterator aa = m_SymTab.find(dwAddr);
00502     if (aa == m_SymTab.end())
00503         return 0;
00504     return (char*)aa->second.c_str();
00505 }
00506 
00507 bool ElfBinaryFile::ValueByName(const char* pName, SymValue* pVal, bool bNoTypeOK /* = false */) {
00508     int  hash, numBucket, numChain, y;
00509     int  *pBuckets, *pChains;   // For symbol table work
00510     int  found;
00511     int* pHash;                 // Pointer to hash table
00512     Elf32_Sym*  pSym;           // Pointer to the symbol table
00513     int  iStr;                  // Section index of the string table
00514     PSectionInfo pSect;
00515 
00516     pSect = GetSectionInfoByName(".dynsym");
00517     if (pSect == 0)
00518     {
00519         // We have a file with no .dynsym section, and hence no .hash section (from my understanding - MVE).
00520         // It seems that the only alternative is to linearly search the symbol tables.
00521         // This must be one of the big reasons that linking is so slow! (at least, for statically linked files)
00522         // Note MVE: We can't use m_SymTab because we may need the size
00523         return SearchValueByName(pName, pVal);
00524     }
00525     pSym = (Elf32_Sym*)pSect->uHostAddr;
00526     if (pSym == 0) return false;
00527     pSect = GetSectionInfoByName(".hash");
00528     if (pSect == 0) return false;
00529     pHash = (int*) pSect->uHostAddr;
00530     iStr = GetSectionIndexByName(".dynstr");
00531     
00532     // First organise the hash table
00533     numBucket = elfRead4(&pHash[0]);
00534     numChain  = elfRead4(&pHash[1]);
00535     pBuckets = &pHash[2];
00536     pChains  = &pBuckets[numBucket];
00537 
00538     // Hash the symbol
00539     hash = elf_hash(pName) % numBucket;
00540     y = elfRead4(&pBuckets[hash]);      // Look it up in the bucket list
00541     // Beware of symbol tables with 0 in the buckets, e.g. libstdc++.
00542     // In that case, set found to false.
00543     found = (y != 0);
00544     if (y) {
00545         while (strcmp(pName, GetStrPtr(iStr, elfRead4(&pSym[y].st_name))) != 0) {
00546             y = elfRead4(&pChains[y]);
00547             if (y == 0) {
00548                 found = false;
00549                 break;
00550             }
00551         }
00552     }
00553     // Beware of symbols with STT_NOTYPE, e.g. "open" in libstdc++ !
00554     // But sometimes "main" has the STT_NOTYPE attribute, so if bNoTypeOK is passed as true, return true
00555     if (found && (bNoTypeOK || (ELF32_ST_TYPE(pSym[y].st_info) != STT_NOTYPE))) {
00556         pVal->uSymAddr = elfRead4((int*)&pSym[y].st_value);
00557         int e_type = elfRead2(&((Elf32_Ehdr*)m_pImage)->e_type);
00558         if (e_type == E_REL) {
00559             int nsec = elfRead2(&pSym[y].st_shndx);
00560             if (nsec >= 0 && nsec < m_iNumSections)
00561                 pVal->uSymAddr += GetSectionInfo(nsec)->uNativeAddr;
00562         }
00563         pVal->iSymSize = elfRead4(&pSym[y].st_size);
00564         return true;
00565     }
00566     else {
00567         // We may as well do a linear search of the main symbol table. Some symbols (e.g. init_dummy) are
00568         // in the main symbol table, but not in the hash table
00569         return SearchValueByName(pName, pVal);
00570     }
00571 }
00572 
00573 // Lookup the symbol table using linear searching. See comments above for why this appears to be needed.
00574 bool ElfBinaryFile::SearchValueByName(const char* pName, SymValue* pVal, const char* pSectName, const char* pStrName)
00575 {
00576     // Note: this assumes .symtab. Many files don't have this section!!!
00577     PSectionInfo pSect, pStrSect;
00578 
00579     pSect = GetSectionInfoByName(pSectName);
00580     if (pSect == 0) return false;
00581     pStrSect = GetSectionInfoByName(pStrName);
00582     if (pStrSect == 0) return false;
00583     const char* pStr = (const char*) pStrSect->uHostAddr;
00584     // Find number of symbols
00585     int n = pSect->uSectionSize / pSect->uSectionEntrySize;
00586     Elf32_Sym* pSym = (Elf32_Sym*)pSect->uHostAddr;
00587     // Search all the symbols. It may be possible to start later than index 0
00588     for (int i=0; i < n; i++) {
00589         int idx = elfRead4(&pSym[i].st_name);
00590         if (strcmp(pName, pStr+idx) == 0) {
00591             // We have found the symbol
00592             pVal->uSymAddr = elfRead4((int*)&pSym[i].st_value);
00593             int e_type = elfRead2(&((Elf32_Ehdr*)m_pImage)->e_type);
00594             if (e_type == E_REL) {
00595                 int nsec = elfRead2(&pSym[i].st_shndx);
00596                 if (nsec >= 0 && nsec < m_iNumSections)
00597                     pVal->uSymAddr += GetSectionInfo(nsec)->uNativeAddr;
00598             }
00599             pVal->iSymSize = elfRead4(      &pSym[i].st_size);
00600             return true;
00601         }
00602     }
00603     return false;           // Not found (this table)
00604 }
00605 
00606 // Search for the given symbol. First search .symtab (if present); if not found or the table has been stripped,
00607 // search .dynstr
00608 bool ElfBinaryFile::SearchValueByName(const char* pName, SymValue* pVal) {
00609     if (SearchValueByName(pName, pVal, ".symtab", ".strtab"))
00610         return true;
00611     return SearchValueByName(pName, pVal, ".dynsym", ".dynstr");
00612 }
00613 
00614 
00615 ADDRESS ElfBinaryFile::GetAddressByName(const char* pName,
00616     bool bNoTypeOK /* = false */) {
00617     SymValue Val;
00618     bool bSuccess = ValueByName(pName, &Val, bNoTypeOK);
00619     if (bSuccess) {
00620         m_iLastSize = Val.iSymSize;
00621         m_uLastAddr = Val.uSymAddr;
00622         return Val.uSymAddr;
00623     }
00624     else return NO_ADDRESS;
00625 }
00626 
00627 int ElfBinaryFile::GetSizeByName(const char* pName, bool bNoTypeOK /* = false */) {
00628     SymValue Val;
00629     bool bSuccess = ValueByName(pName, &Val, bNoTypeOK);
00630     if (bSuccess) {
00631         m_iLastSize = Val.iSymSize;
00632         m_uLastAddr = Val.uSymAddr;
00633         return Val.iSymSize;
00634     }
00635     else return 0;
00636 }
00637 
00638 // Guess the size of a function by finding the next symbol after it, and subtracting the distance.
00639 // This function is NOT efficient; it has to compare the closeness of ALL symbols in the symbol table
00640 int ElfBinaryFile::GetDistanceByName(const char* sName, const char* pSectName)
00641 {
00642     int size = GetSizeByName(sName);
00643     if (size) return size;          // No need to guess!
00644     // No need to guess, but if there are fillers, then subtracting labels will give a better answer for coverage
00645     // purposes. For example, switch_cc. But some programs (e.g. switch_ps) have the switch tables between the
00646     // end of _start and main! So we are better off overall not trying to guess the size of _start
00647     unsigned value = GetAddressByName(sName);
00648     if (value == 0) return 0;       // Symbol doesn't even exist!
00649 
00650     PSectionInfo pSect;
00651     pSect = GetSectionInfoByName(pSectName);
00652     if (pSect == 0) return 0;
00653     // Find number of symbols
00654     int n = pSect->uSectionSize / pSect->uSectionEntrySize;
00655     Elf32_Sym* pSym = (Elf32_Sym*)pSect->uHostAddr;
00656     // Search all the symbols. It may be possible to start later than index 0
00657     unsigned closest = 0xFFFFFFFF;
00658     int idx = -1;
00659     for (int i=0; i < n; i++) {
00660         if ((pSym[i].st_value > value) && (pSym[i].st_value < closest)) {
00661             idx = i;
00662             closest = pSym[i].st_value;
00663         }
00664     }
00665     if (idx == -1) return 0;
00666     // Do some checks on the symbol's value; it might be at the end of the .text section
00667     pSect = GetSectionInfoByName(".text");
00668     ADDRESS low = pSect->uNativeAddr;
00669     ADDRESS hi = low + pSect->uSectionSize;
00670     if ((value >= low) && (value < hi)) {
00671         // Our symbol is in the .text section. Put a ceiling of the end of the section on closest.
00672         if (closest > hi) closest = hi;
00673     }
00674     return closest - value;
00675 }
00676 
00677 int ElfBinaryFile::GetDistanceByName(const char* sName) {
00678     int val = GetDistanceByName(sName, ".symtab");
00679     if (val) return val;
00680     return GetDistanceByName(sName, ".dynsym");
00681 }
00682 
00683 bool ElfBinaryFile::IsDynamicLinkedProc(ADDRESS uNative) {
00684     if (uNative > (unsigned)-1024 && uNative != (unsigned)-1)
00685         return true;                                // Say yes for fake library functions
00686     if (uNative >= first_extern && uNative < next_extern)
00687         return true;                                // Yes for externs (not currently used)
00688     if (m_uPltMin == 0) return false;
00689     return (uNative >= m_uPltMin) && (uNative < m_uPltMax); // Yes if a call to the PLT (false otherwise)
00690 }
00691 
00692 
00693 //
00694 // GetEntryPoints()
00695 // Returns a list of pointers to SectionInfo structs representing entry points to the program
00696 // Item 0 is the main() function; items 1 and 2 are .init and .fini
00697 //
00698 std::list<SectionInfo*>& ElfBinaryFile::GetEntryPoints(
00699     const char* pEntry /* = "main" */) {
00700     SectionInfo* pSect = GetSectionInfoByName(".text");
00701     ADDRESS uMain = GetAddressByName(pEntry, true);
00702     ADDRESS delta = uMain - pSect->uNativeAddr;
00703     pSect->uNativeAddr += delta;
00704     pSect->uHostAddr += delta;
00705     // Adjust uSectionSize so uNativeAddr + uSectionSize still is end of sect
00706     pSect->uSectionSize -= delta;       
00707     m_EntryPoint.push_back(pSect);
00708     // .init and .fini sections
00709     pSect = GetSectionInfoByName(".init");
00710     m_EntryPoint.push_back(pSect);
00711     pSect = GetSectionInfoByName(".fini");
00712     m_EntryPoint.push_back(pSect);
00713     return m_EntryPoint;
00714 }
00715 
00716 
00717 //
00718 // GetMainEntryPoint()
00719 // Returns the entry point to main (this should be a label in elf binaries generated by compilers).
00720 //
00721 ADDRESS ElfBinaryFile::GetMainEntryPoint() {
00722     return GetAddressByName ("main", true);
00723 }
00724 
00725 ADDRESS ElfBinaryFile::GetEntryPoint() {
00726     return (ADDRESS) elfRead4(&((Elf32_Ehdr*)m_pImage)->e_entry);
00727 }
00728 
00729 // FIXME: the below assumes a fixed delta
00730 ADDRESS ElfBinaryFile::NativeToHostAddress(ADDRESS uNative) {
00731     if (m_iNumSections == 0) return 0;
00732     return m_pSections[1].uHostAddr - m_pSections[1].uNativeAddr + uNative; 
00733 }
00734 
00735 ADDRESS ElfBinaryFile::GetRelocatedAddress(ADDRESS uNative) {
00736     // Not implemented yet. But we need the function to make it all link
00737     return 0;
00738 }
00739 
00740 bool ElfBinaryFile::PostLoad(void* handle) {
00741     // This function is called after an archive member has been loaded by ElfArchiveFile
00742 
00743     // Save the elf pointer
00744     //m_elf = (Elf*) handle;
00745 
00746     //return ProcessElfFile();
00747     return false;
00748 }
00749 
00750 
00751 // Open this binaryfile for reading AND writing
00752 bool ElfBinaryFile::Open(const char* sName) {
00753     return false;
00754 }
00755 
00756 
00757 void ElfBinaryFile::Close() {
00758     UnLoad();
00759 }
00760 
00761 LOAD_FMT ElfBinaryFile::GetFormat() const {
00762     return LOADFMT_ELF;
00763 }
00764 
00765 MACHINE ElfBinaryFile::GetMachine() const {
00766     int machine = elfRead2(&((Elf32_Ehdr*)m_pImage)->e_machine);
00767     if ((machine == EM_SPARC) || (machine == EM_SPARC32PLUS)) return MACHINE_SPARC;
00768     else if (machine == EM_386)     return MACHINE_PENTIUM;
00769     else if (machine == EM_PA_RISC) return MACHINE_HPRISC;
00770     else if (machine == EM_68K)     return MACHINE_PALM;    // Unlikely
00771     else if (machine == EM_PPC)     return MACHINE_PPC;
00772     else if (machine == EM_ST20)        return MACHINE_ST20;
00773     else if (machine == EM_X86_64)  {
00774         std::cerr << "Error: ElfBinaryFile::GetMachine: The AMD x86-64 architecture is not supported yet\n";
00775         return (MACHINE)-1;
00776     }
00777     // An unknown machine type
00778     std::cerr << "Error: ElfBinaryFile::GetMachine: Unsupported machine type: " 
00779         << machine << " (0x" << std::hex << machine << ")\n";
00780     std::cerr << "(Please add a description for this type, thanks!)\n";
00781     return (MACHINE)-1;
00782 }
00783 
00784 bool ElfBinaryFile::isLibrary() const {
00785     int type = elfRead2(&((Elf32_Ehdr*)m_pImage)->e_type);
00786     return (type == ET_DYN);
00787 }
00788 
00789 std::list<const char *> ElfBinaryFile::getDependencyList() {
00790     std::list<const char *> result;
00791     ADDRESS stringtab = NO_ADDRESS;
00792     PSectionInfo dynsect = GetSectionInfoByName(".dynamic");
00793     if( dynsect == NULL )
00794         return result; /* no dynamic section = statically linked */
00795 
00796     Elf32_Dyn *dyn;
00797     for( dyn = (Elf32_Dyn *)dynsect->uHostAddr; dyn->d_tag != DT_NULL; dyn++ ) {
00798         if( dyn->d_tag == DT_STRTAB ) {
00799             stringtab = (ADDRESS)dyn->d_un.d_ptr;
00800             break;
00801         }
00802     }
00803     
00804     if( stringtab == NO_ADDRESS ) /* No string table = no names */
00805         return result;
00806     stringtab = NativeToHostAddress( stringtab );
00807     
00808     for( dyn = (Elf32_Dyn *)dynsect->uHostAddr; dyn->d_tag != DT_NULL; dyn++ ) {
00809         if( dyn->d_tag == DT_NEEDED ) {
00810             const char *need = (char *)stringtab + dyn->d_un.d_val;
00811             if( need != NULL )
00812                 result.push_back( need );
00813         }
00814     }
00815     return result;
00816 }
00817 
00818 ADDRESS ElfBinaryFile::getImageBase() {
00819     return m_uBaseAddr;
00820 }
00821 
00822 size_t ElfBinaryFile::getImageSize() {
00823     return m_uImageSize;
00824 }
00825 
00826 /*==============================================================================
00827  * FUNCTION:      ElfBinaryFile::GetImportStubs
00828  * OVERVIEW:      Get an array of addresses of imported function stubs
00829  *                  This function relies on the fact that the symbols are sorted by address, and that Elf PLT
00830  *                  entries have successive addresses beginning soon after m_PltMin
00831  * PARAMETERS:    numImports - reference to integer set to the number of these
00832  * RETURNS:       An array of native ADDRESSes
00833  *============================================================================*/
00834 ADDRESS* ElfBinaryFile::GetImportStubs(int& numImports) {
00835     ADDRESS a = m_uPltMin;
00836     int n = 0;
00837     std::map<ADDRESS, std::string>::iterator aa = m_SymTab.find(a);
00838     std::map<ADDRESS, std::string>::iterator ff = aa;
00839     bool delDummy = false;
00840     if (aa == m_SymTab.end()) {
00841         // Need to insert a dummy entry at m_uPltMin
00842         delDummy = true;
00843         m_SymTab[a] = std::string();
00844         ff = m_SymTab.find(a);
00845         aa = ff;
00846         aa++;
00847     }
00848     while ((aa != m_SymTab.end()) && (a < m_uPltMax)) {
00849         n++;
00850         a = aa->first;
00851         aa++;
00852     }
00853     // Allocate an array of ADDRESSESes
00854     m_pImportStubs = new ADDRESS[n];
00855     aa = ff;                // Start at first
00856     a = aa->first;
00857     int i=0;
00858     while ((aa != m_SymTab.end()) && (a < m_uPltMax)) {
00859         m_pImportStubs[i++] = a;
00860         a = aa->first;
00861         aa++;
00862     }
00863     if (delDummy)
00864         m_SymTab.erase(ff);         // Delete dummy entry
00865     numImports = n;
00866     return m_pImportStubs;
00867 }
00868 
00869 /*==============================================================================
00870  * FUNCTION:    ElfBinaryFile::GetDynamicGlobalMap
00871  * OVERVIEW:    Get a map from ADDRESS to const char*. This map contains the native addresses
00872  *                  and symbolic names of global data items (if any) which are shared with dynamically
00873  *                  linked libraries.
00874  *                  Example: __iob (basis for stdout). The ADDRESS is the native address of a pointer
00875  *                  to the real dynamic data object.
00876  * NOTE:        The caller should delete the returned map.
00877  * PARAMETERS:  None
00878  * RETURNS:     Pointer to a new map with the info, or 0 if none
00879  *============================================================================*/
00880 std::map<ADDRESS, const char*>* ElfBinaryFile::GetDynamicGlobalMap() {
00881     std::map<ADDRESS, const char*>* ret = new std::map<ADDRESS, const char*>;
00882     SectionInfo* pSect = GetSectionInfoByName(".rel.bss");
00883     if (pSect == 0)
00884         pSect = GetSectionInfoByName(".rela.bss");
00885     if (pSect == 0) {
00886         // This could easily mean that this file has no dynamic globals, and
00887         // that is fine.
00888         return ret;
00889     }
00890     int numEnt = pSect->uSectionSize / pSect->uSectionEntrySize;
00891     SectionInfo* sym = GetSectionInfoByName(".dynsym");
00892     if (sym == 0) {
00893         fprintf(stderr, "Could not find section .dynsym in source binary file");
00894         return ret;
00895     }
00896     Elf32_Sym* pSym = (Elf32_Sym*)sym->uHostAddr;
00897     int idxStr = GetSectionIndexByName(".dynstr");
00898     if (idxStr == -1) {
00899         fprintf(stderr, "Could not find section .dynstr in source binary file");
00900         return ret;
00901     }
00902 
00903     unsigned p = pSect->uHostAddr;
00904     for (int i=0; i < numEnt; i++) {
00905         // The ugly p[1] below is because it p might point to an Elf32_Rela struct, or an Elf32_Rel struct
00906         int sym = ELF32_R_SYM(((int*)p)[1]);
00907         int name = pSym[sym].st_name;       // Index into string table
00908         const char* s = GetStrPtr(idxStr, name);
00909         ADDRESS val = ((int*)p)[0];
00910         (*ret)[val] = s;            // Add the (val, s) mapping to ret
00911         p += pSect->uSectionEntrySize;
00912     }
00913 
00914     return ret;
00915 }
00916 
00917 /*==============================================================================
00918  * FUNCTION:    ElfBinaryFile::elfRead2 and elfRead4
00919  * OVERVIEW:    Read a 2 or 4 byte quantity from host address (C pointer) p
00920  * NOTE:        Takes care of reading the correct endianness, set early on into m_elfEndianness
00921  * PARAMETERS:  ps or pi: host pointer to the data
00922  * RETURNS:     An integer representing the data
00923  *============================================================================*/
00924 int ElfBinaryFile::elfRead2(short* ps) const {
00925     unsigned char* p = (unsigned char*)ps;
00926     if (m_elfEndianness) {
00927         // Big endian
00928         return (int)((p[0] << 8) + p[1]);
00929     } else {
00930         // Little endian
00931         return (int)(p[0] + (p[1] << 8));
00932     }
00933 }
00934 int ElfBinaryFile::elfRead4(int* pi) const{
00935     short* p = (short*)pi;
00936     if (m_elfEndianness) {
00937         return (int)((elfRead2(p) << 16) + elfRead2(p+1));
00938     } else
00939         return (int) (elfRead2(p) + (elfRead2(p+1) << 16));
00940 }
00941 
00942 void ElfBinaryFile::elfWrite4(int* pi, int val) {
00943     char* p = (char*)pi;
00944     if (m_elfEndianness) {
00945         // Big endian
00946         *p++ = (char)(val >> 24);
00947         *p++ = (char)(val >> 16);
00948         *p++ = (char)(val >> 8);
00949         *p   = (char)val;
00950     } else {
00951         *p++ = (char)val;
00952         *p++ = (char)(val >> 8);
00953         *p++ = (char)(val >> 16);
00954         *p   = (char)(val >> 24);
00955     }
00956 }
00957 
00958 int ElfBinaryFile::readNative1(ADDRESS nat) {
00959     PSectionInfo si = GetSectionInfoByAddr(nat);
00960     if (si == 0) {
00961         si = GetSectionInfo(0);
00962     }
00963     ADDRESS host = si->uHostAddr - si->uNativeAddr + nat;
00964     return *(char *)host;
00965 }
00966 
00967 // Read 2 bytes from given native address
00968 int ElfBinaryFile::readNative2(ADDRESS nat) {
00969     PSectionInfo si = GetSectionInfoByAddr(nat);
00970     if (si == 0) return 0;
00971     ADDRESS host = si->uHostAddr - si->uNativeAddr + nat;
00972     return elfRead2((short*)host);
00973 }
00974 
00975 // Read 4 bytes from given native address
00976 int ElfBinaryFile::readNative4(ADDRESS nat) {
00977     PSectionInfo si = GetSectionInfoByAddr(nat);
00978     if (si == 0) return 0;
00979     ADDRESS host = si->uHostAddr - si->uNativeAddr + nat;
00980     return elfRead4((int*)host);
00981 }
00982 
00983 void ElfBinaryFile::writeNative4(ADDRESS nat, unsigned int n) {
00984     PSectionInfo si = GetSectionInfoByAddr(nat);
00985     if (si == 0) return;
00986     ADDRESS host = si->uHostAddr - si->uNativeAddr + nat;
00987     if (m_elfEndianness) {
00988         *(unsigned char*)host     = (n >> 24) & 0xff;
00989         *(unsigned char*)(host+1) = (n >> 16) & 0xff;
00990         *(unsigned char*)(host+2) = (n >> 8)  & 0xff;
00991         *(unsigned char*)(host+3) =  n        & 0xff;
00992     } else {
00993         *(unsigned char*)(host+3) = (n >> 24) & 0xff;
00994         *(unsigned char*)(host+2) = (n >> 16) & 0xff;
00995         *(unsigned char*)(host+1) = (n >> 8)  & 0xff;
00996         *(unsigned char*)host     =  n        & 0xff;
00997     }
00998 }
00999 
01000 // Read 8 bytes from given native address
01001 QWord ElfBinaryFile::readNative8(ADDRESS nat) {
01002     int raw[2];
01003 #ifdef WORDS_BIGENDIAN      // This tests the  host  machine
01004     if (m_elfEndianness) {  // This tests the source machine
01005 #else
01006     if (!m_elfEndianness) {
01007 #endif  // Balance }
01008         // Source and host are same endianness
01009         raw[0] = readNative4(nat);
01010         raw[1] = readNative4(nat+4);
01011     } else {
01012         // Source and host are different endianness
01013         raw[1] = readNative4(nat);
01014         raw[0] = readNative4(nat+4);
01015     }
01016     //return reinterpret_cast<long long>(*raw);    // Note: cast, not convert!!
01017     return *(QWord*)raw;
01018 }
01019 
01020 // Read 4 bytes as a float
01021 float ElfBinaryFile::readNativeFloat4(ADDRESS nat) {
01022     int raw = readNative4(nat);
01023     // Ugh! gcc says that reinterpreting from int to float is invalid!!
01024     //return reinterpret_cast<float>(raw);    // Note: cast, not convert!!
01025     return *(float*)&raw;           // Note: cast, not convert
01026 }
01027 
01028 // Read 8 bytes as a float
01029 double ElfBinaryFile::readNativeFloat8(ADDRESS nat) {
01030     int raw[2];
01031 #ifdef WORDS_BIGENDIAN      // This tests the  host  machine
01032     if (m_elfEndianness) {  // This tests the source machine
01033 #else
01034     if (!m_elfEndianness) {
01035 #endif  // Balance }
01036         // Source and host are same endianness
01037         raw[0] = readNative4(nat);
01038         raw[1] = readNative4(nat+4);
01039     } else {
01040         // Source and host are different endianness
01041         raw[1] = readNative4(nat);
01042         raw[0] = readNative4(nat+4);
01043     }
01044     //return reinterpret_cast<double>(*raw);    // Note: cast, not convert!!
01045     return *(double*)raw;
01046 }
01047 
01048 // This function is called via dlopen/dlsym; it returns a new BinaryFile derived concrete object.
01049 // After this object is returned, the virtual function call mechanism will call the rest of the code
01050 // in this library. It needs to be C linkage so that it its name is not mangled
01051 extern "C" {
01052 #ifdef _WIN32
01053     __declspec(dllexport)
01054 #endif
01055     BinaryFile* construct() {
01056         return new ElfBinaryFile;
01057     }    
01058 }
01059 
01060 void ElfBinaryFile::applyRelocations() {
01061     int nextFakeLibAddr = -2;           // See R_386_PC32 below; -1 sometimes used for main
01062     if (m_pImage == 0) return;          // No file loaded
01063     int machine = elfRead2(&((Elf32_Ehdr*)m_pImage)->e_machine);
01064     int e_type = elfRead2(&((Elf32_Ehdr*)m_pImage)->e_type);
01065     switch (machine) {
01066         case EM_SPARC:
01067             break;                      // Not implemented yet
01068         case EM_386: {
01069             for (int i=1; i < m_iNumSections; ++i) {
01070                 SectionInfo* ps = &m_pSections[i];
01071                 if (ps->uType == SHT_REL) {
01072                     // A section such as .rel.dyn or .rel.plt (without an addend field).
01073                     // Each entry has 2 words: r_offet and r_info. The r_offset is just the offset from the beginning
01074                     // of the section (section given by the section header's sh_info) to the word to be modified.
01075                     // r_info has the type in the bottom byte, and a symbol table index in the top 3 bytes.
01076                     // A symbol table offset of 0 (STN_UNDEF) means use value 0. The symbol table involved comes from 
01077                     // the section header's sh_link field.
01078                     int* pReloc = (int*)ps->uHostAddr;
01079                     unsigned size = ps->uSectionSize;
01080                     // NOTE: the r_offset is different for .o files (E_REL in the e_type header field) than for exe's
01081                     // and shared objects!
01082                     ADDRESS destNatOrigin=0, destHostOrigin=0;
01083                     if (e_type == E_REL) {
01084                         int destSection = m_sh_info[i];
01085                         destNatOrigin   = m_pSections[destSection].uNativeAddr;
01086                         destHostOrigin  = m_pSections[destSection].uHostAddr;
01087                     }
01088                     int symSection = m_sh_link[i];          // Section index for the associated symbol table
01089                     int strSection = m_sh_link[symSection]; // Section index for the string section assoc with this
01090                     char* pStrSection = (char*)m_pSections[strSection].uHostAddr;
01091                     Elf32_Sym* symOrigin = (Elf32_Sym*) m_pSections[symSection].uHostAddr;
01092                     for (unsigned u=0; u < size; u+= 2*sizeof(unsigned)) {
01093                         unsigned r_offset = elfRead4(pReloc++);
01094                         unsigned info   = elfRead4(pReloc++);
01095                         unsigned char relType = (unsigned char) info;
01096                         unsigned symTabIndex = info >> 8;
01097                         int* pRelWord;              // Pointer to the word to be relocated
01098                         if (e_type == E_REL)
01099                             pRelWord = ((int*)(destHostOrigin + r_offset));
01100                         else {
01101                             SectionInfo* destSec = GetSectionInfoByAddr(r_offset);
01102                             pRelWord = (int*)(destSec->uHostAddr - destSec->uNativeAddr + r_offset);
01103                             destNatOrigin = 0;
01104                         }
01105                         ADDRESS A, S=0, P;
01106                         int nsec;
01107                         switch (relType) {
01108                             case 0:             // R_386_NONE: just ignore (common)
01109                                 break;
01110                             case 1:             // R_386_32: S + A
01111                                 S = elfRead4((int*)&symOrigin[symTabIndex].st_value);
01112                                 if (e_type == E_REL) {
01113                                     nsec = elfRead2(&symOrigin[symTabIndex].st_shndx);
01114                                     if (nsec >= 0 && nsec < m_iNumSections)
01115                                         S += GetSectionInfo(nsec)->uNativeAddr;
01116                                 }
01117                                 A = elfRead4(pRelWord);
01118                                 elfWrite4(pRelWord, S+A);
01119                                 break;
01120                             case 2:             // R_386_PC32: S + A - P
01121                                 if (ELF32_ST_TYPE(symOrigin[symTabIndex].st_info) == STT_SECTION) {
01122                                     nsec = elfRead2(&symOrigin[symTabIndex].st_shndx);
01123                                     if (nsec >= 0 && nsec < m_iNumSections)
01124                                         S = GetSectionInfo(nsec)->uNativeAddr;
01125                                 } else {
01126                                     S = elfRead4((int*)&symOrigin[symTabIndex].st_value);
01127                                     if (S == 0) {
01128                                         // This means that the symbol doesn't exist in this module, and is not accessed
01129                                         // through the PLT, i.e. it will be statically linked, e.g. strcmp. We have the
01130                                         // name of the symbol right here in the symbol table entry, but the only way
01131                                         // to communicate with the loader is through the target address of the call.
01132                                         // So we use some very improbable addresses (e.g. -1, -2, etc) and give them entries
01133                                         // in the symbol table
01134                                         int nameOffset = elfRead4((int*)&symOrigin[symTabIndex].st_name);
01135                                         char* pName = pStrSection + nameOffset;
01136                                         // this is too slow, I'm just going to assume it is 0
01137                                         //S = GetAddressByName(pName);
01138                                         //if (S == (e_type == E_REL ? 0x8000000 : 0)) {
01139                                             S = nextFakeLibAddr--;      // Allocate a new fake address
01140                                             AddSymbol(S, pName);
01141                                         //}
01142                                     } else if (e_type == E_REL) {
01143                                         nsec = elfRead2(&symOrigin[symTabIndex].st_shndx);
01144                                         if (nsec >= 0 && nsec < m_iNumSections)
01145                                             S += GetSectionInfo(nsec)->uNativeAddr;
01146                                     }
01147                                 }
01148                                 A = elfRead4(pRelWord);
01149                                 P = destNatOrigin + r_offset;
01150                                 elfWrite4(pRelWord, S+A-P);
01151                                 break;
01152                             case 7:
01153                             case 8:             // R_386_RELATIVE
01154                                 break;          // No need to do anything with these, if a shared object
01155                             default:
01156                                 // std::cout << "Relocation type " << (int)relType << " not handled yet\n";
01157                                 ;
01158                         }
01159                     }
01160                 }
01161             }
01162         }
01163         default:
01164             break;                      // Not implemented
01165     }
01166 }   
01167 
01168 bool ElfBinaryFile::IsRelocationAt(ADDRESS uNative) 
01169 {
01170     //int nextFakeLibAddr = -2;         // See R_386_PC32 below; -1 sometimes used for main
01171     if (m_pImage == 0) return false;            // No file loaded
01172     int machine = elfRead2(&((Elf32_Ehdr*)m_pImage)->e_machine);
01173     int e_type = elfRead2(&((Elf32_Ehdr*)m_pImage)->e_type);
01174     switch (machine) {
01175         case EM_SPARC:
01176             break;                      // Not implemented yet
01177         case EM_386: {
01178             for (int i=1; i < m_iNumSections; ++i) {
01179                 SectionInfo* ps = &m_pSections[i];
01180                 if (ps->uType == SHT_REL) {
01181                     // A section such as .rel.dyn or .rel.plt (without an addend field).
01182                     // Each entry has 2 words: r_offet and r_info. The r_offset is just the offset from the beginning
01183                     // of the section (section given by the section header's sh_info) to the word to be modified.
01184                     // r_info has the type in the bottom byte, and a symbol table index in the top 3 bytes.
01185                     // A symbol table offset of 0 (STN_UNDEF) means use value 0. The symbol table involved comes from 
01186                     // the section header's sh_link field.
01187                     int* pReloc = (int*)ps->uHostAddr;
01188                     unsigned size = ps->uSectionSize;
01189                     // NOTE: the r_offset is different for .o files (E_REL in the e_type header field) than for exe's
01190                     // and shared objects!
01191                     ADDRESS destNatOrigin=0, destHostOrigin;
01192                     if (e_type == E_REL) {
01193                         int destSection = m_sh_info[i];
01194                         destNatOrigin   = m_pSections[destSection].uNativeAddr;
01195                         destHostOrigin  = m_pSections[destSection].uHostAddr;
01196                     }
01197                     //int symSection = m_sh_link[i];            // Section index for the associated symbol table
01198                     //int strSection = m_sh_link[symSection];   // Section index for the string section assoc with this
01199                     //char* pStrSection = (char*)m_pSections[strSection].uHostAddr;
01200                     //Elf32_Sym* symOrigin = (Elf32_Sym*) m_pSections[symSection].uHostAddr;
01201                     for (unsigned u=0; u < size; u+= 2*sizeof(unsigned)) {
01202                         unsigned r_offset = elfRead4(pReloc++);
01203                         //unsigned info = elfRead4(pReloc);
01204                         pReloc++;
01205                         //unsigned char relType = (unsigned char) info;
01206                         //unsigned symTabIndex = info >> 8;
01207                         ADDRESS pRelWord;               // Pointer to the word to be relocated
01208                         if (e_type == E_REL)
01209                             pRelWord = destNatOrigin + r_offset;
01210                         else {
01211                             SectionInfo* destSec = GetSectionInfoByAddr(r_offset);
01212                             pRelWord = destSec->uNativeAddr + r_offset;
01213                             destNatOrigin = 0;
01214                         }
01215                         if (uNative == pRelWord)
01216                             return true;
01217                     }
01218                 }
01219             }
01220         }
01221         default:
01222             break;                      // Not implemented
01223     }
01224     return false;
01225 }   
01226 
01227 const char *ElfBinaryFile::getFilenameSymbolFor(const char *sym)
01228 {
01229     int i;
01230     int secIndex = 0;
01231     for (i=1; i < m_iNumSections; ++i) {
01232         unsigned uType = m_pSections[i].uType;
01233         if (uType == SHT_SYMTAB) {
01234             secIndex = i;
01235             break;
01236         }
01237     }
01238     if (secIndex == 0)
01239         return NULL;
01240     
01241     //int e_type = elfRead2(&((Elf32_Ehdr*)m_pImage)->e_type);
01242     PSectionInfo pSect = &m_pSections[secIndex];
01243     // Calc number of symbols
01244     int nSyms = pSect->uSectionSize / pSect->uSectionEntrySize;
01245     m_pSym = (Elf32_Sym*) pSect->uHostAddr;         // Pointer to symbols
01246     int strIdx = m_sh_link[secIndex];               // sh_link points to the string table
01247 
01248     std::string filename;
01249 
01250     // Index 0 is a dummy entry
01251     for (int i = 1; i < nSyms; i++) {
01252         //ADDRESS val = (ADDRESS) elfRead4((int*)&m_pSym[i].st_value);
01253         int name = elfRead4(&m_pSym[i].st_name);
01254         if (name == 0)  /* Silly symbols with no names */ continue;
01255         std::string str(GetStrPtr(strIdx, name));
01256         // Hack off the "@@GLIBC_2.0" of Linux, if present
01257         unsigned pos;
01258         if ((pos = str.find("@@")) != std::string::npos)
01259             str.erase(pos);
01260         if (ELF32_ST_TYPE(m_pSym[i].st_info) == STT_FILE) {
01261             filename = str;
01262             continue;
01263         }
01264         if (str == sym) {
01265             if (filename.length())
01266                 return strdup(filename.c_str());
01267             return NULL;
01268         }
01269     }
01270     return NULL;
01271 }
01272 
01273 // A map for extra symbols, those not in the usual Elf symbol tables
01274 void ElfBinaryFile::AddSymbol(ADDRESS uNative, const char *pName)
01275 {
01276     m_SymTab[uNative] = pName;
01277 }
01278 
01279 void ElfBinaryFile::dumpSymbols() {
01280     std::map<ADDRESS, std::string>::iterator it;
01281     std::cerr << std::hex;
01282     for (it = m_SymTab.begin(); it != m_SymTab.end(); ++it)
01283         std::cerr << "0x" << it->first << " " << it->second << "        ";
01284     std::cerr << std::dec << "\n";
01285 }

Generated on Tue Sep 19 21:18:22 2006 for Boomerang by  doxygen 1.4.6