macho-apple.h

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
00003  *
00004  * @APPLE_LICENSE_HEADER_START@
00005  * 
00006  * This file contains Original Code and/or Modifications of Original Code
00007  * as defined in and that are subject to the Apple Public Source License
00008  * Version 2.0 (the 'License'). You may not use this file except in
00009  * compliance with the License. Please obtain a copy of the License at
00010  * http://www.opensource.apple.com/apsl/ and read it before using this
00011  * file.
00012  * 
00013  * The Original Code and all software distributed under the License are
00014  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
00015  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
00016  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
00017  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
00018  * Please see the License for the specific language governing rights and
00019  * limitations under the License.
00020  * 
00021  * @APPLE_LICENSE_HEADER_END@
00022  */
00023 #ifndef _MACHO_LOADER_H_
00024 #define _MACHO_LOADER_H_
00025 
00026 /*
00027  * This file describes the format of mach object files.
00028  */
00029 
00030 /*
00031  * <mach/machine.h> is needed here for the cpu_type_t and cpu_subtype_t types
00032  * and contains the constants for the possible values of these types.
00033  */
00034 //#include <mach/machine.h>
00035 
00036 /*
00037  * <mach/vm_prot.h> is needed here for the vm_prot_t type and contains the 
00038  * constants that are or'ed together for the possible values of this type.
00039  */
00040 //#include <mach/vm_prot.h>
00041 #define VM_PROT_NONE    ((vm_prot_t) 0x00)
00042 #define VM_PROT_READ    ((vm_prot_t) 0x01)  /* read permission */
00043 #define VM_PROT_WRITE   ((vm_prot_t) 0x02)  /* write permission */
00044 #define VM_PROT_EXECUTE ((vm_prot_t) 0x04)  /* execute permission */
00045 
00046 /*
00047  * <machine/thread_status.h> is expected to define the flavors of the thread
00048  * states and the structures of those flavors for each machine.
00049  */
00050 //#include <mach/machine/thread_status.h>
00051 //#include <architecture/byte_order.h>
00052 
00053 /*
00054  * The mach header appears at the very beginning of the object file.
00055  */
00056 struct mach_header {
00057     unsigned long   magic;      /* mach magic number identifier */
00058     cpu_type_t  cputype;    /* cpu specifier */
00059     cpu_subtype_t   cpusubtype; /* machine specifier */
00060     unsigned long   filetype;   /* type of file */
00061     unsigned long   ncmds;      /* number of load commands */
00062     unsigned long   sizeofcmds; /* the size of all the load commands */
00063     unsigned long   flags;      /* flags */
00064 };
00065 
00066 /* Constant for the magic field of the mach_header */
00067 #define MH_MAGIC    0xfeedface  /* the mach magic number */
00068 #define MH_CIGAM    NXSwapInt(MH_MAGIC)
00069 
00070 /*
00071  * The layout of the file depends on the filetype.  For all but the MH_OBJECT
00072  * file type the segments are padded out and aligned on a segment alignment
00073  * boundary for efficient demand pageing.  The MH_EXECUTE, MH_FVMLIB, MH_DYLIB,
00074  * MH_DYLINKER and MH_BUNDLE file types also have the headers included as part
00075  * of their first segment.
00076  * 
00077  * The file type MH_OBJECT is a compact format intended as output of the
00078  * assembler and input (and possibly output) of the link editor (the .o
00079  * format).  All sections are in one unnamed segment with no segment padding. 
00080  * This format is used as an executable format when the file is so small the
00081  * segment padding greatly increases its size.
00082  *
00083  * The file type MH_PRELOAD is an executable format intended for things that
00084  * are not executed under the kernel (proms, stand alones, kernels, etc).  The
00085  * format can be executed under the kernel but may demand paged it and not
00086  * preload it before execution.
00087  *
00088  * A core file is in MH_CORE format and can be any in an arbritray legal
00089  * Mach-O file.
00090  *
00091  * Constants for the filetype field of the mach_header
00092  */
00093 #define MH_OBJECT   0x1     /* relocatable object file */
00094 #define MH_EXECUTE  0x2     /* demand paged executable file */
00095 #define MH_FVMLIB   0x3     /* fixed VM shared library file */
00096 #define MH_CORE     0x4     /* core file */
00097 #define MH_PRELOAD  0x5     /* preloaded executable file */
00098 #define MH_DYLIB    0x6     /* dynamically bound shared library */
00099 #define MH_DYLINKER 0x7     /* dynamic link editor */
00100 #define MH_BUNDLE   0x8     /* dynamically bound bundle file */
00101 #define MH_DYLIB_STUB   0x9     /* shared library stub for static */
00102                     /*  linking only, no section contents */
00103 
00104 /* Constants for the flags field of the mach_header */
00105 #define MH_NOUNDEFS 0x1     /* the object file has no undefined
00106                        references */
00107 #define MH_INCRLINK 0x2     /* the object file is the output of an
00108                        incremental link against a base file
00109                        and can't be link edited again */
00110 #define MH_DYLDLINK 0x4     /* the object file is input for the
00111                        dynamic linker and can't be staticly
00112                        link edited again */
00113 #define MH_BINDATLOAD   0x8     /* the object file's undefined
00114                        references are bound by the dynamic
00115                        linker when loaded. */
00116 #define MH_PREBOUND 0x10        /* the file has its dynamic undefined
00117                        references prebound. */
00118 #define MH_SPLIT_SEGS   0x20        /* the file has its read-only and
00119                        read-write segments split */
00120 #define MH_LAZY_INIT    0x40        /* the shared library init routine is
00121                        to be run lazily via catching memory
00122                        faults to its writeable segments
00123                        (obsolete) */
00124 #define MH_TWOLEVEL 0x80        /* the image is using two-level name
00125                        space bindings */
00126 #define MH_FORCE_FLAT   0x100       /* the executable is forcing all images
00127                        to use flat name space bindings */
00128 #define MH_NOMULTIDEFS  0x200       /* this umbrella guarantees no multiple
00129                        defintions of symbols in its
00130                        sub-images so the two-level namespace
00131                        hints can always be used. */
00132 #define MH_NOFIXPREBINDING 0x400    /* do not have dyld notify the
00133                        prebinding agent about this
00134                        executable */
00135 #define MH_PREBINDABLE  0x800           /* the binary is not prebound but can
00136                        have its prebinding redone. only used
00137                                            when MH_PREBOUND is not set. */
00138 #define MH_ALLMODSBOUND 0x1000      /* indicates that this binary binds to
00139                                            all two-level namespace modules of
00140                        its dependent libraries. only used
00141                        when MH_PREBINDABLE and MH_TWOLEVEL
00142                        are both set. */ 
00143 #define MH_CANONICAL    0x4000      /* the binary has been canonicalized
00144                        via the unprebind operation */
00145 /*
00146  * The load commands directly follow the mach_header.  The total size of all
00147  * of the commands is given by the sizeofcmds field in the mach_header.  All
00148  * load commands must have as their first two fields cmd and cmdsize.  The cmd
00149  * field is filled in with a constant for that command type.  Each command type
00150  * has a structure specifically for it.  The cmdsize field is the size in bytes
00151  * of the particular load command structure plus anything that follows it that
00152  * is a part of the load command (i.e. section structures, strings, etc.).  To
00153  * advance to the next load command the cmdsize can be added to the offset or
00154  * pointer of the current load command.  The cmdsize MUST be a multiple of
00155  * 4 bytes (this is forever the maximum alignment of any load commands).
00156  * The padded bytes must be zero.  All tables in the object file must also
00157  * follow these rules so the file can be memory mapped.  Otherwise the pointers
00158  * to these tables will not work well or at all on some machines.  With all
00159  * padding zeroed like objects will compare byte for byte.
00160  */
00161 struct load_command {
00162     unsigned long cmd;      /* type of load command */
00163     unsigned long cmdsize;      /* total size of command in bytes */
00164 };
00165 
00166 /*
00167  * After MacOS X 10.1 when a new load command is added that is required to be
00168  * understood by the dynamic linker for the image to execute properly the
00169  * LC_REQ_DYLD bit will be or'ed into the load command constant.  If the dynamic
00170  * linker sees such a load command it it does not understand will issue a
00171  * "unknown load command required for execution" error and refuse to use the
00172  * image.  Other load commands without this bit that are not understood will
00173  * simply be ignored.
00174  */
00175 #define LC_REQ_DYLD 0x80000000
00176 
00177 /* Constants for the cmd field of all load commands, the type */
00178 #define LC_SEGMENT  0x1 /* segment of this file to be mapped */
00179 #define LC_SYMTAB   0x2 /* link-edit stab symbol table info */
00180 #define LC_SYMSEG   0x3 /* link-edit gdb symbol table info (obsolete) */
00181 #define LC_THREAD   0x4 /* thread */
00182 #define LC_UNIXTHREAD   0x5 /* unix thread (includes a stack) */
00183 #define LC_LOADFVMLIB   0x6 /* load a specified fixed VM shared library */
00184 #define LC_IDFVMLIB 0x7 /* fixed VM shared library identification */
00185 #define LC_IDENT    0x8 /* object identification info (obsolete) */
00186 #define LC_FVMFILE  0x9 /* fixed VM file inclusion (internal use) */
00187 #define LC_PREPAGE      0xa     /* prepage command (internal use) */
00188 #define LC_DYSYMTAB 0xb /* dynamic link-edit symbol table info */
00189 #define LC_LOAD_DYLIB   0xc /* load a dynamically linked shared library */
00190 #define LC_ID_DYLIB 0xd /* dynamically linked shared lib ident */
00191 #define LC_LOAD_DYLINKER 0xe    /* load a dynamic linker */
00192 #define LC_ID_DYLINKER  0xf /* dynamic linker identification */
00193 #define LC_PREBOUND_DYLIB 0x10  /* modules prebound for a dynamically */
00194                 /*  linked shared library */
00195 #define LC_ROUTINES 0x11    /* image routines */
00196 #define LC_SUB_FRAMEWORK 0x12   /* sub framework */
00197 #define LC_SUB_UMBRELLA 0x13    /* sub umbrella */
00198 #define LC_SUB_CLIENT   0x14    /* sub client */
00199 #define LC_SUB_LIBRARY  0x15    /* sub library */
00200 #define LC_TWOLEVEL_HINTS 0x16  /* two-level namespace lookup hints */
00201 #define LC_PREBIND_CKSUM  0x17  /* prebind checksum */
00202 /*
00203  * load a dynamically linked shared library that is allowed to be missing
00204  * (all symbols are weak imported).
00205  */
00206 #define LC_LOAD_WEAK_DYLIB (0x18 | LC_REQ_DYLD)
00207 
00208 /*
00209  * A variable length string in a load command is represented by an lc_str
00210  * union.  The strings are stored just after the load command structure and
00211  * the offset is from the start of the load command structure.  The size
00212  * of the string is reflected in the cmdsize field of the load command.
00213  * Once again any padded bytes to bring the cmdsize field to a multiple
00214  * of 4 bytes must be zero.
00215  */
00216 union lc_str {
00217     unsigned long   offset; /* offset to the string */
00218     char        *ptr;   /* pointer to the string */
00219 };
00220 
00221 /*
00222  * The segment load command indicates that a part of this file is to be
00223  * mapped into the task's address space.  The size of this segment in memory,
00224  * vmsize, maybe equal to or larger than the amount to map from this file,
00225  * filesize.  The file is mapped starting at fileoff to the beginning of
00226  * the segment in memory, vmaddr.  The rest of the memory of the segment,
00227  * if any, is allocated zero fill on demand.  The segment's maximum virtual
00228  * memory protection and initial virtual memory protection are specified
00229  * by the maxprot and initprot fields.  If the segment has sections then the
00230  * section structures directly follow the segment command and their size is
00231  * reflected in cmdsize.
00232  */
00233 struct segment_command {
00234     unsigned long   cmd;        /* LC_SEGMENT */
00235     unsigned long   cmdsize;    /* includes sizeof section structs */
00236     char        segname[16];    /* segment name */
00237     unsigned long   vmaddr;     /* memory address of this segment */
00238     unsigned long   vmsize;     /* memory size of this segment */
00239     unsigned long   fileoff;    /* file offset of this segment */
00240     unsigned long   filesize;   /* amount to map from the file */
00241     vm_prot_t   maxprot;    /* maximum VM protection */
00242     vm_prot_t   initprot;   /* initial VM protection */
00243     unsigned long   nsects;     /* number of sections in segment */
00244     unsigned long   flags;      /* flags */
00245 };
00246 
00247 /* Constants for the flags field of the segment_command */
00248 #define SG_HIGHVM   0x1 /* the file contents for this segment is for
00249                    the high part of the VM space, the low part
00250                    is zero filled (for stacks in core files) */
00251 #define SG_FVMLIB   0x2 /* this segment is the VM that is allocated by
00252                    a fixed VM library, for overlap checking in
00253                    the link editor */
00254 #define SG_NORELOC  0x4 /* this segment has nothing that was relocated
00255                    in it and nothing relocated to it, that is
00256                    it maybe safely replaced without relocation*/
00257 
00258 /*
00259  * A segment is made up of zero or more sections.  Non-MH_OBJECT files have
00260  * all of their segments with the proper sections in each, and padded to the
00261  * specified segment alignment when produced by the link editor.  The first
00262  * segment of a MH_EXECUTE and MH_FVMLIB format file contains the mach_header
00263  * and load commands of the object file before its first section.  The zero
00264  * fill sections are always last in their segment (in all formats).  This
00265  * allows the zeroed segment padding to be mapped into memory where zero fill
00266  * sections might be.
00267  *
00268  * The MH_OBJECT format has all of its sections in one segment for
00269  * compactness.  There is no padding to a specified segment boundary and the
00270  * mach_header and load commands are not part of the segment.
00271  *
00272  * Sections with the same section name, sectname, going into the same segment,
00273  * segname, are combined by the link editor.  The resulting section is aligned
00274  * to the maximum alignment of the combined sections and is the new section's
00275  * alignment.  The combined sections are aligned to their original alignment in
00276  * the combined section.  Any padded bytes to get the specified alignment are
00277  * zeroed.
00278  *
00279  * The format of the relocation entries referenced by the reloff and nreloc
00280  * fields of the section structure for mach object files is described in the
00281  * header file <reloc.h>.
00282  */
00283 struct section {
00284     char        sectname[16];   /* name of this section */
00285     char        segname[16];    /* segment this section goes in */
00286     unsigned long   addr;       /* memory address of this section */
00287     unsigned long   size;       /* size in bytes of this section */
00288     unsigned long   offset;     /* file offset of this section */
00289     unsigned long   align;      /* section alignment (power of 2) */
00290     unsigned long   reloff;     /* file offset of relocation entries */
00291     unsigned long   nreloc;     /* number of relocation entries */
00292     unsigned long   flags;      /* flags (section type and attributes)*/
00293     unsigned long   reserved1;  /* reserved */
00294     unsigned long   reserved2;  /* reserved */
00295 };
00296 
00297 /*
00298  * The flags field of a section structure is separated into two parts a section
00299  * type and section attributes.  The section types are mutually exclusive (it
00300  * can only have one type) but the section attributes are not (it may have more
00301  * than one attribute).
00302  */
00303 #define SECTION_TYPE         0x000000ff /* 256 section types */
00304 #define SECTION_ATTRIBUTES   0xffffff00 /*  24 section attributes */
00305 
00306 /* Constants for the type of a section */
00307 #define S_REGULAR       0x0 /* regular section */
00308 #define S_ZEROFILL      0x1 /* zero fill on demand section */
00309 #define S_CSTRING_LITERALS  0x2 /* section with only literal C strings*/
00310 #define S_4BYTE_LITERALS    0x3 /* section with only 4 byte literals */
00311 #define S_8BYTE_LITERALS    0x4 /* section with only 8 byte literals */
00312 #define S_LITERAL_POINTERS  0x5 /* section with only pointers to */
00313                     /*  literals */
00314 /*
00315  * For the two types of symbol pointers sections and the symbol stubs section
00316  * they have indirect symbol table entries.  For each of the entries in the
00317  * section the indirect symbol table entries, in corresponding order in the
00318  * indirect symbol table, start at the index stored in the reserved1 field
00319  * of the section structure.  Since the indirect symbol table entries
00320  * correspond to the entries in the section the number of indirect symbol table
00321  * entries is inferred from the size of the section divided by the size of the
00322  * entries in the section.  For symbol pointers sections the size of the entries
00323  * in the section is 4 bytes and for symbol stubs sections the byte size of the
00324  * stubs is stored in the reserved2 field of the section structure.
00325  */
00326 #define S_NON_LAZY_SYMBOL_POINTERS  0x6 /* section with only non-lazy
00327                            symbol pointers */
00328 #define S_LAZY_SYMBOL_POINTERS      0x7 /* section with only lazy symbol
00329                            pointers */
00330 #define S_SYMBOL_STUBS          0x8 /* section with only symbol
00331                            stubs, byte size of stub in
00332                            the reserved2 field */
00333 #define S_MOD_INIT_FUNC_POINTERS    0x9 /* section with only function
00334                            pointers for initialization*/
00335 #define S_MOD_TERM_FUNC_POINTERS    0xa /* section with only function
00336                            pointers for termination */
00337 #define S_COALESCED         0xb /* section contains symbols that
00338                            are to be coalesced */
00339 /*
00340  * Constants for the section attributes part of the flags field of a section
00341  * structure.
00342  */
00343 #define SECTION_ATTRIBUTES_USR   0xff000000 /* User setable attributes */
00344 #define S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section contains only true
00345                            machine instructions */
00346 #define S_ATTR_NO_TOC        0x40000000 /* section contains coalesced
00347                            symbols that are not to be
00348                            in a ranlib table of
00349                            contents */
00350 #define S_ATTR_STRIP_STATIC_SYMS 0x20000000 /* ok to strip static symbols
00351                            in this section in files
00352                            with the MH_DYLDLINK flag */
00353 #define SECTION_ATTRIBUTES_SYS   0x00ffff00 /* system setable attributes */
00354 #define S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some
00355                            machine instructions */
00356 #define S_ATTR_EXT_RELOC     0x00000200 /* section has external
00357                            relocation entries */
00358 #define S_ATTR_LOC_RELOC     0x00000100 /* section has local
00359                            relocation entries */
00360 
00361 
00362 /*
00363  * The names of segments and sections in them are mostly meaningless to the
00364  * link-editor.  But there are few things to support traditional UNIX
00365  * executables that require the link-editor and assembler to use some names
00366  * agreed upon by convention.
00367  *
00368  * The initial protection of the "__TEXT" segment has write protection turned
00369  * off (not writeable).
00370  *
00371  * The link-editor will allocate common symbols at the end of the "__common"
00372  * section in the "__DATA" segment.  It will create the section and segment
00373  * if needed.
00374  */
00375 
00376 /* The currently known segment names and the section names in those segments */
00377 
00378 #define SEG_PAGEZERO    "__PAGEZERO"    /* the pagezero segment which has no */
00379                     /* protections and catches NULL */
00380                     /* references for MH_EXECUTE files */
00381 
00382 
00383 #define SEG_TEXT    "__TEXT"    /* the tradition UNIX text segment */
00384 #define SECT_TEXT   "__text"    /* the real text part of the text */
00385                     /* section no headers, and no padding */
00386 #define SECT_FVMLIB_INIT0 "__fvmlib_init0"  /* the fvmlib initialization */
00387                         /*  section */
00388 #define SECT_FVMLIB_INIT1 "__fvmlib_init1"  /* the section following the */
00389                             /*  fvmlib initialization */
00390                         /*  section */
00391 
00392 #define SEG_DATA    "__DATA"    /* the tradition UNIX data segment */
00393 #define SECT_DATA   "__data"    /* the real initialized data section */
00394                     /* no padding, no bss overlap */
00395 #define SECT_BSS    "__bss"     /* the real uninitialized data section*/
00396                     /* no padding */
00397 #define SECT_COMMON "__common"  /* the section common symbols are */
00398                     /* allocated in by the link editor */
00399 
00400 #define SEG_OBJC    "__OBJC"    /* objective-C runtime segment */
00401 #define SECT_OBJC_SYMBOLS "__symbol_table"  /* symbol table */
00402 #define SECT_OBJC_MODULES "__module_info"   /* module information */
00403 #define SECT_OBJC_STRINGS "__selector_strs" /* string table */
00404 #define SECT_OBJC_REFS "__selector_refs"    /* string table */
00405 
00406 #define SEG_ICON     "__ICON"   /* the icon segment */
00407 #define SECT_ICON_HEADER "__header" /* the icon headers */
00408 #define SECT_ICON_TIFF   "__tiff"   /* the icons in tiff format */
00409 
00410 #define SEG_LINKEDIT    "__LINKEDIT"    /* the segment containing all structs */
00411                     /* created and maintained by the link */
00412                     /* editor.  Created with -seglinkedit */
00413                     /* option to ld(1) for MH_EXECUTE and */
00414                     /* FVMLIB file types only */
00415 
00416 #define SEG_UNIXSTACK   "__UNIXSTACK"   /* the unix stack segment */
00417 
00418 /*
00419  * Fixed virtual memory shared libraries are identified by two things.  The
00420  * target pathname (the name of the library as found for execution), and the
00421  * minor version number.  The address of where the headers are loaded is in
00422  * header_addr.
00423  */
00424 struct fvmlib {
00425     union lc_str    name;       /* library's target pathname */
00426     unsigned long   minor_version;  /* library's minor version number */
00427     unsigned long   header_addr;    /* library's header address */
00428 };
00429 
00430 /*
00431  * A fixed virtual shared library (filetype == MH_FVMLIB in the mach header)
00432  * contains a fvmlib_command (cmd == LC_IDFVMLIB) to identify the library.
00433  * An object that uses a fixed virtual shared library also contains a
00434  * fvmlib_command (cmd == LC_LOADFVMLIB) for each library it uses.
00435  */
00436 struct fvmlib_command {
00437     unsigned long   cmd;        /* LC_IDFVMLIB or LC_LOADFVMLIB */
00438     unsigned long   cmdsize;    /* includes pathname string */
00439     struct fvmlib   fvmlib;     /* the library identification */
00440 };
00441 
00442 /*
00443  * Dynamicly linked shared libraries are identified by two things.  The
00444  * pathname (the name of the library as found for execution), and the
00445  * compatibility version number.  The pathname must match and the compatibility
00446  * number in the user of the library must be greater than or equal to the
00447  * library being used.  The time stamp is used to record the time a library was
00448  * built and copied into user so it can be use to determined if the library used
00449  * at runtime is exactly the same as used to built the program.
00450  */
00451 struct dylib {
00452     union lc_str  name;         /* library's path name */
00453     unsigned long timestamp;        /* library's build time stamp */
00454     unsigned long current_version;  /* library's current version number */
00455     unsigned long compatibility_version;/* library's compatibility vers number*/
00456 };
00457 
00458 /*
00459  * A dynamically linked shared library (filetype == MH_DYLIB in the mach header)
00460  * contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library.
00461  * An object that uses a dynamically linked shared library also contains a
00462  * dylib_command (cmd == LC_LOAD_DYLIB or cmd == LC_LOAD_WEAK_DYLIB) for each
00463  * library it uses.
00464  */
00465 struct dylib_command {
00466     unsigned long   cmd;        /* LC_ID_DYLIB, LC_LOAD_{,WEAK_}DYLIB */
00467     unsigned long   cmdsize;    /* includes pathname string */
00468     struct dylib    dylib;      /* the library identification */
00469 };
00470 
00471 /*
00472  * A dynamically linked shared library may be a subframework of an umbrella
00473  * framework.  If so it will be linked with "-umbrella umbrella_name" where
00474  * Where "umbrella_name" is the name of the umbrella framework. A subframework
00475  * can only be linked against by its umbrella framework or other subframeworks
00476  * that are part of the same umbrella framework.  Otherwise the static link
00477  * editor produces an error and states to link against the umbrella framework.
00478  * The name of the umbrella framework for subframeworks is recorded in the
00479  * following structure.
00480  */
00481 struct sub_framework_command {
00482     unsigned long   cmd;        /* LC_SUB_FRAMEWORK */
00483     unsigned long   cmdsize;    /* includes umbrella string */
00484     union lc_str    umbrella;   /* the umbrella framework name */
00485 };
00486 
00487 /*
00488  * For dynamically linked shared libraries that are subframework of an umbrella
00489  * framework they can allow clients other than the umbrella framework or other
00490  * subframeworks in the same umbrella framework.  To do this the subframework
00491  * is built with "-allowable_client client_name" and an LC_SUB_CLIENT load
00492  * command is created for each -allowable_client flag.  The client_name is
00493  * usually a framework name.  It can also be a name used for bundles clients
00494  * where the bundle is built with "-client_name client_name".
00495  */
00496 struct sub_client_command {
00497     unsigned long   cmd;        /* LC_SUB_CLIENT */
00498     unsigned long   cmdsize;    /* includes client string */
00499     union lc_str    client;     /* the client name */
00500 };
00501 
00502 /*
00503  * A dynamically linked shared library may be a sub_umbrella of an umbrella
00504  * framework.  If so it will be linked with "-sub_umbrella umbrella_name" where
00505  * Where "umbrella_name" is the name of the sub_umbrella framework.  When
00506  * staticly linking when -twolevel_namespace is in effect a twolevel namespace 
00507  * umbrella framework will only cause its subframeworks and those frameworks
00508  * listed as sub_umbrella frameworks to be implicited linked in.  Any other
00509  * dependent dynamic libraries will not be linked it when -twolevel_namespace
00510  * is in effect.  The primary library recorded by the static linker when
00511  * resolving a symbol in these libraries will be the umbrella framework.
00512  * Zero or more sub_umbrella frameworks may be use by an umbrella framework.
00513  * The name of a sub_umbrella framework is recorded in the following structure.
00514  */
00515 struct sub_umbrella_command {
00516     unsigned long   cmd;        /* LC_SUB_UMBRELLA */
00517     unsigned long   cmdsize;    /* includes sub_umbrella string */
00518     union lc_str    sub_umbrella;   /* the sub_umbrella framework name */
00519 };
00520 
00521 /*
00522  * A dynamically linked shared library may be a sub_library of another shared
00523  * library.  If so it will be linked with "-sub_library library_name" where
00524  * Where "library_name" is the name of the sub_library shared library.  When
00525  * staticly linking when -twolevel_namespace is in effect a twolevel namespace 
00526  * shared library will only cause its subframeworks and those frameworks
00527  * listed as sub_umbrella frameworks and libraries listed as sub_libraries to
00528  * be implicited linked in.  Any other dependent dynamic libraries will not be
00529  * linked it when -twolevel_namespace is in effect.  The primary library
00530  * recorded by the static linker when resolving a symbol in these libraries
00531  * will be the umbrella framework (or dynamic library). Zero or more sub_library
00532  * shared libraries may be use by an umbrella framework or (or dynamic library).
00533  * The name of a sub_library framework is recorded in the following structure.
00534  * For example /usr/lib/libobjc_profile.A.dylib would be recorded as "libobjc".
00535  */
00536 struct sub_library_command {
00537     unsigned long   cmd;        /* LC_SUB_LIBRARY */
00538     unsigned long   cmdsize;    /* includes sub_library string */
00539     union lc_str    sub_library;    /* the sub_library name */
00540 };
00541 
00542 /*
00543  * A program (filetype == MH_EXECUTE) that is
00544  * prebound to its dynamic libraries has one of these for each library that
00545  * the static linker used in prebinding.  It contains a bit vector for the
00546  * modules in the library.  The bits indicate which modules are bound (1) and
00547  * which are not (0) from the library.  The bit for module 0 is the low bit
00548  * of the first byte.  So the bit for the Nth module is:
00549  * (linked_modules[N/8] >> N%8) & 1
00550  */
00551 struct prebound_dylib_command {
00552     unsigned long   cmd;        /* LC_PREBOUND_DYLIB */
00553     unsigned long   cmdsize;    /* includes strings */
00554     union lc_str    name;       /* library's path name */
00555     unsigned long   nmodules;   /* number of modules in library */
00556     union lc_str    linked_modules; /* bit vector of linked modules */
00557 };
00558 
00559 /*
00560  * A program that uses a dynamic linker contains a dylinker_command to identify
00561  * the name of the dynamic linker (LC_LOAD_DYLINKER).  And a dynamic linker
00562  * contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER).
00563  * A file can have at most one of these.
00564  */
00565 struct dylinker_command {
00566     unsigned long   cmd;        /* LC_ID_DYLINKER or LC_LOAD_DYLINKER */
00567     unsigned long   cmdsize;    /* includes pathname string */
00568     union lc_str    name;       /* dynamic linker's path name */
00569 };
00570 
00571 /*
00572  * Thread commands contain machine-specific data structures suitable for
00573  * use in the thread state primitives.  The machine specific data structures
00574  * follow the struct thread_command as follows.
00575  * Each flavor of machine specific data structure is preceded by an unsigned
00576  * long constant for the flavor of that data structure, an unsigned long
00577  * that is the count of longs of the size of the state data structure and then
00578  * the state data structure follows.  This triple may be repeated for many
00579  * flavors.  The constants for the flavors, counts and state data structure
00580  * definitions are expected to be in the header file <machine/thread_status.h>.
00581  * These machine specific data structures sizes must be multiples of
00582  * 4 bytes  The cmdsize reflects the total size of the thread_command
00583  * and all of the sizes of the constants for the flavors, counts and state
00584  * data structures.
00585  *
00586  * For executable objects that are unix processes there will be one
00587  * thread_command (cmd == LC_UNIXTHREAD) created for it by the link-editor.
00588  * This is the same as a LC_THREAD, except that a stack is automatically
00589  * created (based on the shell's limit for the stack size).  Command arguments
00590  * and environment variables are copied onto that stack.
00591  */
00592 struct thread_command {
00593     unsigned long   cmd;        /* LC_THREAD or  LC_UNIXTHREAD */
00594     unsigned long   cmdsize;    /* total size of this command */
00595     /* unsigned long flavor        flavor of thread state */
00596     /* unsigned long count         count of longs in thread state */
00597     /* struct XXX_thread_state state   thread state for this flavor */
00598     /* ... */
00599 };
00600 
00601 /*
00602  * The routines command contains the address of the dynamic shared library 
00603  * initialization routine and an index into the module table for the module
00604  * that defines the routine.  Before any modules are used from the library the
00605  * dynamic linker fully binds the module that defines the initialization routine
00606  * and then calls it.  This gets called before any module initialization
00607  * routines (used for C++ static constructors) in the library.
00608  */
00609 struct routines_command {
00610     unsigned long   cmd;        /* LC_ROUTINES */
00611     unsigned long   cmdsize;    /* total size of this command */
00612     unsigned long   init_address;   /* address of initialization routine */
00613     unsigned long   init_module;    /* index into the module table that */
00614                         /*  the init routine is defined in */
00615     unsigned long   reserved1;
00616     unsigned long   reserved2;
00617     unsigned long   reserved3;
00618     unsigned long   reserved4;
00619     unsigned long   reserved5;
00620     unsigned long   reserved6;
00621 };
00622 
00623 /*
00624  * The symtab_command contains the offsets and sizes of the link-edit 4.3BSD
00625  * "stab" style symbol table information as described in the header files
00626  * <nlist.h> and <stab.h>.
00627  */
00628 struct symtab_command {
00629     unsigned long   cmd;        /* LC_SYMTAB */
00630     unsigned long   cmdsize;    /* sizeof(struct symtab_command) */
00631     unsigned long   symoff;     /* symbol table offset */
00632     unsigned long   nsyms;      /* number of symbol table entries */
00633     unsigned long   stroff;     /* string table offset */
00634     unsigned long   strsize;    /* string table size in bytes */
00635 };
00636 
00637 /*
00638  * This is the second set of the symbolic information which is used to support
00639  * the data structures for the dynamically link editor.
00640  *
00641  * The original set of symbolic information in the symtab_command which contains
00642  * the symbol and string tables must also be present when this load command is
00643  * present.  When this load command is present the symbol table is organized
00644  * into three groups of symbols:
00645  *  local symbols (static and debugging symbols) - grouped by module
00646  *  defined external symbols - grouped by module (sorted by name if not lib)
00647  *  undefined external symbols (sorted by name if MH_BINDATLOAD is not set,
00648  *                      and in order the were seen by the static
00649  *                  linker if MH_BINDATLOAD is set)
00650  * In this load command there are offsets and counts to each of the three groups
00651  * of symbols.
00652  *
00653  * This load command contains a the offsets and sizes of the following new
00654  * symbolic information tables:
00655  *  table of contents
00656  *  module table
00657  *  reference symbol table
00658  *  indirect symbol table
00659  * The first three tables above (the table of contents, module table and
00660  * reference symbol table) are only present if the file is a dynamically linked
00661  * shared library.  For executable and object modules, which are files
00662  * containing only one module, the information that would be in these three
00663  * tables is determined as follows:
00664  *  table of contents - the defined external symbols are sorted by name
00665  *  module table - the file contains only one module so everything in the
00666  *             file is part of the module.
00667  *  reference symbol table - is the defined and undefined external symbols
00668  *
00669  * For dynamically linked shared library files this load command also contains
00670  * offsets and sizes to the pool of relocation entries for all sections
00671  * separated into two groups:
00672  *  external relocation entries
00673  *  local relocation entries
00674  * For executable and object modules the relocation entries continue to hang
00675  * off the section structures.
00676  */
00677 struct dysymtab_command {
00678     unsigned long cmd;      /* LC_DYSYMTAB */
00679     unsigned long cmdsize;  /* sizeof(struct dysymtab_command) */
00680 
00681     /*
00682      * The symbols indicated by symoff and nsyms of the LC_SYMTAB load command
00683      * are grouped into the following three groups:
00684      *    local symbols (further grouped by the module they are from)
00685      *    defined external symbols (further grouped by the module they are from)
00686      *    undefined symbols
00687      *
00688      * The local symbols are used only for debugging.  The dynamic binding
00689      * process may have to use them to indicate to the debugger the local
00690      * symbols for a module that is being bound.
00691      *
00692      * The last two groups are used by the dynamic binding process to do the
00693      * binding (indirectly through the module table and the reference symbol
00694      * table when this is a dynamically linked shared library file).
00695      */
00696     unsigned long ilocalsym;    /* index to local symbols */
00697     unsigned long nlocalsym;    /* number of local symbols */
00698 
00699     unsigned long iextdefsym;   /* index to externally defined symbols */
00700     unsigned long nextdefsym;   /* number of externally defined symbols */
00701 
00702     unsigned long iundefsym;    /* index to undefined symbols */
00703     unsigned long nundefsym;    /* number of undefined symbols */
00704 
00705     /*
00706      * For the for the dynamic binding process to find which module a symbol
00707      * is defined in the table of contents is used (analogous to the ranlib
00708      * structure in an archive) which maps defined external symbols to modules
00709      * they are defined in.  This exists only in a dynamically linked shared
00710      * library file.  For executable and object modules the defined external
00711      * symbols are sorted by name and is use as the table of contents.
00712      */
00713     unsigned long tocoff;   /* file offset to table of contents */
00714     unsigned long ntoc;     /* number of entries in table of contents */
00715 
00716     /*
00717      * To support dynamic binding of "modules" (whole object files) the symbol
00718      * table must reflect the modules that the file was created from.  This is
00719      * done by having a module table that has indexes and counts into the merged
00720      * tables for each module.  The module structure that these two entries
00721      * refer to is described below.  This exists only in a dynamically linked
00722      * shared library file.  For executable and object modules the file only
00723      * contains one module so everything in the file belongs to the module.
00724      */
00725     unsigned long modtaboff;    /* file offset to module table */
00726     unsigned long nmodtab;  /* number of module table entries */
00727 
00728     /*
00729      * To support dynamic module binding the module structure for each module
00730      * indicates the external references (defined and undefined) each module
00731      * makes.  For each module there is an offset and a count into the
00732      * reference symbol table for the symbols that the module references.
00733      * This exists only in a dynamically linked shared library file.  For
00734      * executable and object modules the defined external symbols and the
00735      * undefined external symbols indicates the external references.
00736      */
00737     unsigned long extrefsymoff;  /* offset to referenced symbol table */
00738     unsigned long nextrefsyms;   /* number of referenced symbol table entries */
00739 
00740     /*
00741      * The sections that contain "symbol pointers" and "routine stubs" have
00742      * indexes and (implied counts based on the size of the section and fixed
00743      * size of the entry) into the "indirect symbol" table for each pointer
00744      * and stub.  For every section of these two types the index into the
00745      * indirect symbol table is stored in the section header in the field
00746      * reserved1.  An indirect symbol table entry is simply a 32bit index into
00747      * the symbol table to the symbol that the pointer or stub is referring to.
00748      * The indirect symbol table is ordered to match the entries in the section.
00749      */
00750     unsigned long indirectsymoff; /* file offset to the indirect symbol table */
00751     unsigned long nindirectsyms;  /* number of indirect symbol table entries */
00752 
00753     /*
00754      * To support relocating an individual module in a library file quickly the
00755      * external relocation entries for each module in the library need to be
00756      * accessed efficiently.  Since the relocation entries can't be accessed
00757      * through the section headers for a library file they are separated into
00758      * groups of local and external entries further grouped by module.  In this
00759      * case the presents of this load command who's extreloff, nextrel,
00760      * locreloff and nlocrel fields are non-zero indicates that the relocation
00761      * entries of non-merged sections are not referenced through the section
00762      * structures (and the reloff and nreloc fields in the section headers are
00763      * set to zero).
00764      *
00765      * Since the relocation entries are not accessed through the section headers
00766      * this requires the r_address field to be something other than a section
00767      * offset to identify the item to be relocated.  In this case r_address is
00768      * set to the offset from the vmaddr of the first LC_SEGMENT command.
00769      * For MH_SPLIT_SEGS images r_address is set to the the offset from the
00770      * vmaddr of the first read-write LC_SEGMENT command.
00771      *
00772      * The relocation entries are grouped by module and the module table
00773      * entries have indexes and counts into them for the group of external
00774      * relocation entries for that the module.
00775      *
00776      * For sections that are merged across modules there must not be any
00777      * remaining external relocation entries for them (for merged sections
00778      * remaining relocation entries must be local).
00779      */
00780     unsigned long extreloff;    /* offset to external relocation entries */
00781     unsigned long nextrel;  /* number of external relocation entries */
00782 
00783     /*
00784      * All the local relocation entries are grouped together (they are not
00785      * grouped by their module since they are only used if the object is moved
00786      * from it staticly link edited address).
00787      */
00788     unsigned long locreloff;    /* offset to local relocation entries */
00789     unsigned long nlocrel;  /* number of local relocation entries */
00790 
00791 };  
00792 
00793 /*
00794  * An indirect symbol table entry is simply a 32bit index into the symbol table 
00795  * to the symbol that the pointer or stub is refering to.  Unless it is for a
00796  * non-lazy symbol pointer section for a defined symbol which strip(1) as 
00797  * removed.  In which case it has the value INDIRECT_SYMBOL_LOCAL.  If the
00798  * symbol was also absolute INDIRECT_SYMBOL_ABS is or'ed with that.
00799  */
00800 #define INDIRECT_SYMBOL_LOCAL   0x80000000
00801 #define INDIRECT_SYMBOL_ABS 0x40000000
00802 
00803 
00804 /* a table of contents entry */
00805 struct dylib_table_of_contents {
00806     unsigned long symbol_index; /* the defined external symbol
00807                    (index into the symbol table) */
00808     unsigned long module_index; /* index into the module table this symbol
00809                    is defined in */
00810 };  
00811 
00812 /* a module table entry */
00813 struct dylib_module {
00814     unsigned long module_name;  /* the module name (index into string table) */
00815 
00816     unsigned long iextdefsym;   /* index into externally defined symbols */
00817     unsigned long nextdefsym;   /* number of externally defined symbols */
00818     unsigned long irefsym;  /* index into reference symbol table */
00819     unsigned long nrefsym;  /* number of reference symbol table entries */
00820     unsigned long ilocalsym;    /* index into symbols for local symbols */
00821     unsigned long nlocalsym;    /* number of local symbols */
00822 
00823     unsigned long iextrel;  /* index into external relocation entries */
00824     unsigned long nextrel;  /* number of external relocation entries */
00825 
00826     unsigned long iinit_iterm;  /* low 16 bits are the index into the init
00827                    section, high 16 bits are the index into
00828                        the term section */
00829     unsigned long ninit_nterm;  /* low 16 bits are the number of init section
00830                    entries, high 16 bits are the number of
00831                    term section entries */
00832 
00833     unsigned long       /* for this module address of the start of */
00834     objc_module_info_addr;  /*  the (__OBJC,__module_info) section */
00835     unsigned long       /* for this module size of */
00836     objc_module_info_size;  /*  the (__OBJC,__module_info) section */
00837 };  
00838 
00839 /* 
00840  * The entries in the reference symbol table are used when loading the module
00841  * (both by the static and dynamic link editors) and if the module is unloaded
00842  * or replaced.  Therefore all external symbols (defined and undefined) are
00843  * listed in the module's reference table.  The flags describe the type of
00844  * reference that is being made.  The constants for the flags are defined in
00845  * <mach-o/nlist.h> as they are also used for symbol table entries.
00846  */
00847 struct dylib_reference {
00848     unsigned long isym:24,  /* index into the symbol table */
00849               flags:8;  /* flags to indicate the type of reference */
00850 };
00851 
00852 /*
00853  * The twolevel_hints_command contains the offset and number of hints in the
00854  * two-level namespace lookup hints table.
00855  */
00856 struct twolevel_hints_command {
00857     unsigned long cmd;      /* LC_TWOLEVEL_HINTS */
00858     unsigned long cmdsize;  /* sizeof(struct twolevel_hints_command) */
00859     unsigned long offset;   /* offset to the hint table */
00860     unsigned long nhints;   /* number of hints in the hint table */
00861 };
00862 
00863 /*
00864  * The entries in the two-level namespace lookup hints table are twolevel_hint
00865  * structs.  These provide hints to the dynamic link editor where to start
00866  * looking for an undefined symbol in a two-level namespace image.  The
00867  * isub_image field is an index into the sub-images (sub-frameworks and
00868  * sub-umbrellas list) that made up the two-level image that the undefined
00869  * symbol was found in when it was built by the static link editor.  If
00870  * isub-image is 0 the the symbol is expected to be defined in library and not
00871  * in the sub-images.  If isub-image is non-zero it is an index into the array
00872  * of sub-images for the umbrella with the first index in the sub-images being
00873  * 1. The array of sub-images is the ordered list of sub-images of the umbrella
00874  * that would be searched for a symbol that has the umbrella recorded as its
00875  * primary library.  The table of contents index is an index into the
00876  * library's table of contents.  This is used as the starting point of the
00877  * binary search or a directed linear search.
00878  */
00879 struct twolevel_hint {
00880     unsigned long 
00881     isub_image:8,   /* index into the sub images */
00882     itoc:24;    /* index into the table of contents */
00883 };
00884 
00885 /*
00886  * The prebind_cksum_command contains the value of the original check sum for
00887  * prebound files or zero.  When a prebound file is first created or modified
00888  * for other than updating its prebinding information the value of the check sum
00889  * is set to zero.  When the file has it prebinding re-done and if the value of
00890  * the check sum is zero the original check sum is calculated and stored in
00891  * cksum field of this load command in the output file.  If when the prebinding
00892  * is re-done and the cksum field is non-zero it is left unchanged from the
00893  * input file.
00894  */
00895 struct prebind_cksum_command {
00896     unsigned long cmd;      /* LC_PREBIND_CKSUM */
00897     unsigned long cmdsize;  /* sizeof(struct prebind_cksum_command) */
00898     unsigned long cksum;    /* the check sum or zero */
00899 };
00900 
00901 /*
00902  * The symseg_command contains the offset and size of the GNU style
00903  * symbol table information as described in the header file <symseg.h>.
00904  * The symbol roots of the symbol segments must also be aligned properly
00905  * in the file.  So the requirement of keeping the offsets aligned to a
00906  * multiple of a 4 bytes translates to the length field of the symbol
00907  * roots also being a multiple of a long.  Also the padding must again be
00908  * zeroed. (THIS IS OBSOLETE and no longer supported).
00909  */
00910 struct symseg_command {
00911     unsigned long   cmd;        /* LC_SYMSEG */
00912     unsigned long   cmdsize;    /* sizeof(struct symseg_command) */
00913     unsigned long   offset;     /* symbol segment offset */
00914     unsigned long   size;       /* symbol segment size in bytes */
00915 };
00916 
00917 /*
00918  * The ident_command contains a free format string table following the
00919  * ident_command structure.  The strings are null terminated and the size of
00920  * the command is padded out with zero bytes to a multiple of 4 bytes/
00921  * (THIS IS OBSOLETE and no longer supported).
00922  */
00923 struct ident_command {
00924     unsigned long cmd;      /* LC_IDENT */
00925     unsigned long cmdsize;      /* strings that follow this command */
00926 };
00927 
00928 /*
00929  * The fvmfile_command contains a reference to a file to be loaded at the
00930  * specified virtual address.  (Presently, this command is reserved for
00931  * internal use.  The kernel ignores this command when loading a program into
00932  * memory).
00933  */
00934 struct fvmfile_command {
00935     unsigned long cmd;      /* LC_FVMFILE */
00936     unsigned long cmdsize;      /* includes pathname string */
00937     union lc_str    name;       /* files pathname */
00938     unsigned long   header_addr;    /* files virtual address */
00939 };
00940 
00941 #endif /* _MACHO_LOADER_H_ */

Generated on Tue Sep 19 21:18:25 2006 for Boomerang by  doxygen 1.4.6