changes: document fixed crash on invalid UTF-8 sequences
[nasm.git] / output / outmacho32.c
blobc9e4365951f6e92318f3867595be288461ad7849
1 /* ----------------------------------------------------------------------- *
2 *
3 * Copyright 1996-2009 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
35 * outmacho.c output routines for the Netwide Assembler to produce
36 * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X object files
39 /* Most of this file is, like Mach-O itself, based on a.out. For more
40 * guidelines see outaout.c. */
42 #include "compiler.h"
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <ctype.h>
48 #include <inttypes.h>
50 #include "nasm.h"
51 #include "nasmlib.h"
52 #include "saa.h"
53 #include "raa.h"
54 #include "output/outform.h"
55 #include "output/outlib.h"
57 #if defined(OF_MACHO32)
59 /* Mach-O in-file header structure sizes */
60 #define MACHO_HEADER_SIZE (28)
61 #define MACHO_SEGCMD_SIZE (56)
62 #define MACHO_SECTCMD_SIZE (68)
63 #define MACHO_SYMCMD_SIZE (24)
64 #define MACHO_NLIST_SIZE (12)
65 #define MACHO_RELINFO_SIZE (8)
67 /* Mach-O file header values */
68 #define MH_MAGIC (0xfeedface)
69 #define CPU_TYPE_I386 (7) /* x86 platform */
70 #define CPU_SUBTYPE_I386_ALL (3) /* all-x86 compatible */
71 #define MH_OBJECT (0x1) /* object file */
73 #define LC_SEGMENT (0x1) /* segment load command */
74 #define LC_SYMTAB (0x2) /* symbol table load command */
76 #define VM_PROT_NONE (0x00)
77 #define VM_PROT_READ (0x01)
78 #define VM_PROT_WRITE (0x02)
79 #define VM_PROT_EXECUTE (0x04)
81 #define VM_PROT_DEFAULT (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
82 #define VM_PROT_ALL (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
84 struct section {
85 /* nasm internal data */
86 struct section *next;
87 struct SAA *data;
88 int32_t index;
89 struct reloc *relocs;
90 int align;
92 /* data that goes into the file */
93 char sectname[16]; /* what this section is called */
94 char segname[16]; /* segment this section will be in */
95 uint32_t addr; /* in-memory address (subject to alignment) */
96 uint32_t size; /* in-memory and -file size */
97 uint32_t nreloc; /* relocation entry count */
98 uint32_t flags; /* type and attributes (masked) */
101 #define SECTION_TYPE 0x000000ff /* section type mask */
103 #define S_REGULAR (0x0) /* standard section */
104 #define S_ZEROFILL (0x1) /* zerofill, in-memory only */
106 #define SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */
107 #define S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some
108 machine instructions */
109 #define S_ATTR_EXT_RELOC 0x00000200 /* section has external
110 relocation entries */
111 #define S_ATTR_LOC_RELOC 0x00000100 /* section has local
112 relocation entries */
115 static struct sectmap {
116 const char *nasmsect;
117 const char *segname;
118 const char *sectname;
119 const int32_t flags;
120 } sectmap[] = {
121 {".text", "__TEXT", "__text", S_REGULAR|S_ATTR_SOME_INSTRUCTIONS},
122 {".data", "__DATA", "__data", S_REGULAR},
123 {".rodata", "__DATA", "__const", S_REGULAR},
124 {".bss", "__DATA", "__bss", S_ZEROFILL},
125 {NULL, NULL, NULL, 0}
128 struct reloc {
129 /* nasm internal data */
130 struct reloc *next;
132 /* data that goes into the file */
133 int32_t addr; /* op's offset in section */
134 unsigned int snum:24, /* contains symbol index if
135 ** ext otherwise in-file
136 ** section number */
137 pcrel:1, /* relative relocation */
138 length:2, /* 0=byte, 1=word, 2=int32_t */
139 ext:1, /* external symbol referenced */
140 type:4; /* reloc type, 0 for us */
143 #define R_ABS 0 /* absolute relocation */
144 #define R_SCATTERED 0x80000000 /* reloc entry is scattered if
145 ** highest bit == 1 */
147 struct symbol {
148 /* nasm internal data */
149 struct symbol *next; /* next symbol in the list */
150 char *name; /* name of this symbol */
151 int32_t initial_snum; /* symbol number used above in
152 reloc */
153 int32_t snum; /* true snum for reloc */
155 /* data that goes into the file */
156 int32_t strx; /* string table index */
157 uint8_t type; /* symbol type */
158 uint8_t sect; /* NO_SECT or section number */
159 int16_t desc; /* for stab debugging, 0 for us */
160 uint32_t value; /* offset of symbol in section */
163 /* symbol type bits */
164 #define N_EXT 0x01 /* global or external symbol */
166 #define N_UNDF 0x0 /* undefined symbol | n_sect == */
167 #define N_ABS 0x2 /* absolute symbol | NO_SECT */
168 #define N_SECT 0xe /* defined symbol, n_sect holds
169 ** section number */
171 #define N_TYPE 0x0e /* type bit mask */
173 #define DEFAULT_SECTION_ALIGNMENT 0 /* byte (i.e. no) alignment */
175 /* special section number values */
176 #define NO_SECT 0 /* no section, invalid */
177 #define MAX_SECT 255 /* maximum number of sections */
179 static struct section *sects, **sectstail;
180 static struct symbol *syms, **symstail;
181 static uint32_t nsyms;
183 /* These variables are set by macho_layout_symbols() to organize
184 the symbol table and string table in order the dynamic linker
185 expects. They are then used in macho_write() to put out the
186 symbols and strings in that order.
188 The order of the symbol table is:
189 local symbols
190 defined external symbols (sorted by name)
191 undefined external symbols (sorted by name)
193 The order of the string table is:
194 strings for external symbols
195 strings for local symbols
197 static uint32_t ilocalsym = 0;
198 static uint32_t iextdefsym = 0;
199 static uint32_t iundefsym = 0;
200 static uint32_t nlocalsym;
201 static uint32_t nextdefsym;
202 static uint32_t nundefsym;
203 static struct symbol **extdefsyms = NULL;
204 static struct symbol **undefsyms = NULL;
206 static struct RAA *extsyms;
207 static struct SAA *strs;
208 static uint32_t strslen;
210 static FILE *machofp;
211 static efunc error;
212 static evalfunc evaluate;
214 extern struct ofmt of_macho;
216 /* Global file information. This should be cleaned up into either
217 a structure or as function arguments. */
218 uint32_t head_ncmds = 0;
219 uint32_t head_sizeofcmds = 0;
220 uint32_t seg_filesize = 0;
221 uint32_t seg_vmsize = 0;
222 uint32_t seg_nsects = 0;
223 uint32_t rel_padcnt = 0;
226 #define xstrncpy(xdst, xsrc) \
227 memset(xdst, '\0', sizeof(xdst)); /* zero out whole buffer */ \
228 strncpy(xdst, xsrc, sizeof(xdst)); /* copy over string */ \
229 xdst[sizeof(xdst) - 1] = '\0'; /* proper null-termination */
231 #define align(x, y) \
232 (((x) + (y) - 1) & ~((y) - 1)) /* align x to multiple of y */
234 #define alignint32_t(x) \
235 align(x, sizeof(int32_t)) /* align x to int32_t boundary */
237 static void debug_reloc (struct reloc *);
238 static void debug_section_relocs (struct section *) _unused;
240 static int exact_log2 (uint32_t align)
242 if (align == 0) {
243 return 0;
244 } else if (align & (align-1)) {
245 return -1; /* Not a power of 2 */
246 } else {
247 #ifdef HAVE_GNUC_4
248 return __builtin_ctzl (align);
249 #else
250 uint32_t result = 0;
252 /* We know exactly one bit is set at this point. */
253 if (align & 0xffff0000)
254 result |= 16;
255 if (align & 0xff00ff00)
256 result |= 8;
257 if (align & 0xf0f0f0f0)
258 result |= 4;
259 if (align & 0xcccccccc)
260 result |= 2;
261 if (align & 0xaaaaaaaa)
262 result |= 1;
264 return result;
265 #endif
269 static struct section *get_section_by_name(const char *segname,
270 const char *sectname)
272 struct section *s;
274 for (s = sects; s != NULL; s = s->next)
275 if (!strcmp(s->segname, segname) && !strcmp(s->sectname, sectname))
276 break;
278 return s;
281 static struct section *get_section_by_index(const int32_t index)
283 struct section *s;
285 for (s = sects; s != NULL; s = s->next)
286 if (index == s->index)
287 break;
289 return s;
292 static int32_t get_section_index_by_name(const char *segname,
293 const char *sectname)
295 struct section *s;
297 for (s = sects; s != NULL; s = s->next)
298 if (!strcmp(s->segname, segname) && !strcmp(s->sectname, sectname))
299 return s->index;
301 return -1;
304 static char *get_section_name_by_index(const int32_t index)
306 struct section *s;
308 for (s = sects; s != NULL; s = s->next)
309 if (index == s->index)
310 return s->sectname;
312 return NULL;
315 static uint8_t get_section_fileindex_by_index(const int32_t index)
317 struct section *s;
318 uint8_t i = 1;
320 for (s = sects; s != NULL && i < MAX_SECT; s = s->next, ++i)
321 if (index == s->index)
322 return i;
324 if (i == MAX_SECT)
325 error(ERR_WARNING,
326 "too many sections (>255) - clipped by fileindex");
328 return NO_SECT;
331 static void macho_init(FILE * fp, efunc errfunc, ldfunc ldef,
332 evalfunc eval)
334 char zero = 0;
336 machofp = fp;
337 error = errfunc;
338 evaluate = eval;
340 (void)ldef; /* placate optimisers */
342 sects = NULL;
343 sectstail = &sects;
345 syms = NULL;
346 symstail = &syms;
347 nsyms = 0;
348 nlocalsym = 0;
349 nextdefsym = 0;
350 nundefsym = 0;
352 extsyms = raa_init();
353 strs = saa_init(1L);
355 /* string table starts with a zero byte - don't ask why */
356 saa_wbytes(strs, &zero, sizeof(char));
357 strslen = 1;
360 static int macho_setinfo(enum geninfo type, char **val)
362 (void)type;
363 (void)val;
364 return 0;
367 static void sect_write(struct section *sect,
368 const uint8_t *data, uint32_t len)
370 saa_wbytes(sect->data, data, len);
371 sect->size += len;
374 static void add_reloc(struct section *sect, int32_t section,
375 int pcrel, int bytes)
377 struct reloc *r;
378 int32_t fi;
380 /* NeXT as puts relocs in reversed order (address-wise) into the
381 ** files, so we do the same, doesn't seem to make much of a
382 ** difference either way */
383 r = nasm_malloc(sizeof(struct reloc));
384 r->next = sect->relocs;
385 sect->relocs = r;
387 /* the current end of the section will be the symbol's address for
388 ** now, might have to be fixed by macho_fixup_relocs() later on. make
389 ** sure we don't make the symbol scattered by setting the highest
390 ** bit by accident */
391 r->addr = sect->size & ~R_SCATTERED;
392 r->ext = 0;
393 r->pcrel = pcrel;
395 /* match byte count 1, 2, 4 to length codes 0, 1, 2 respectively */
396 r->length = bytes >> 1;
398 /* vanilla relocation (GENERIC_RELOC_VANILLA) */
399 r->type = 0;
401 if (section == NO_SEG) {
402 /* absolute local symbol if no section index given */
403 r->snum = R_ABS;
404 } else {
405 fi = get_section_fileindex_by_index(section);
407 if (fi == NO_SECT) {
408 /* external symbol if no section with that index known,
409 ** symbol number was saved in macho_symdef() */
410 r->snum = raa_read(extsyms, section);
411 r->ext = 1;
412 } else {
413 /* local symbol in section fi */
414 r->snum = fi;
418 ++sect->nreloc;
421 static void macho_output(int32_t secto, const void *data,
422 enum out_type type, uint64_t size,
423 int32_t section, int32_t wrt)
425 struct section *s, *sbss;
426 int32_t addr;
427 uint8_t mydata[4], *p;
429 if (wrt != NO_SEG) {
430 wrt = NO_SEG;
431 error(ERR_NONFATAL, "WRT not supported by Mach-O output format");
432 /* continue to do _something_ */
435 if (secto == NO_SEG) {
436 if (type != OUT_RESERVE)
437 error(ERR_NONFATAL, "attempt to assemble code in "
438 "[ABSOLUTE] space");
440 return;
443 s = get_section_by_index(secto);
445 if (s == NULL) {
446 error(ERR_WARNING, "attempt to assemble code in"
447 " section %d: defaulting to `.text'", secto);
448 s = get_section_by_name("__TEXT", "__text");
450 /* should never happen */
451 if (s == NULL)
452 error(ERR_PANIC, "text section not found");
455 sbss = get_section_by_name("__DATA", "__bss");
457 if (s == sbss && type != OUT_RESERVE) {
458 error(ERR_WARNING, "attempt to initialize memory in the"
459 " BSS section: ignored");
460 s->size += realsize(type, size);
461 return;
464 switch (type) {
465 case OUT_RESERVE:
466 if (s != sbss) {
467 error(ERR_WARNING, "uninitialized space declared in"
468 " %s section: zeroing",
469 get_section_name_by_index(secto));
471 sect_write(s, NULL, size);
472 } else
473 s->size += size;
475 break;
477 case OUT_RAWDATA:
478 if (section != NO_SEG)
479 error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG");
481 sect_write(s, data, size);
482 break;
484 case OUT_ADDRESS:
485 addr = *(int64_t *)data;
487 if (section != NO_SEG) {
488 if (section % 2) {
489 error(ERR_NONFATAL, "Mach-O format does not support"
490 " section base references");
491 } else
492 add_reloc(s, section, 0, size);
495 p = mydata;
496 WRITEADDR(p, addr, size);
497 sect_write(s, mydata, size);
498 break;
500 case OUT_REL2ADR:
501 if (section == secto)
502 error(ERR_PANIC, "intra-section OUT_REL2ADR");
504 if (section != NO_SEG && section % 2) {
505 error(ERR_NONFATAL, "Mach-O format does not support"
506 " section base references");
507 } else
508 add_reloc(s, section, 1, 2);
510 p = mydata;
511 WRITESHORT(p, *(int32_t *)data - (size + s->size));
512 sect_write(s, mydata, 2L);
513 break;
515 case OUT_REL4ADR:
516 if (section == secto)
517 error(ERR_PANIC, "intra-section OUT_REL4ADR");
519 if (section != NO_SEG && section % 2) {
520 error(ERR_NONFATAL, "Mach-O format does not support"
521 " section base references");
522 } else
523 add_reloc(s, section, 1, 4);
525 p = mydata;
526 WRITELONG(p, *(int32_t *)data - (size + s->size));
527 sect_write(s, mydata, 4L);
528 break;
530 default:
531 error(ERR_PANIC, "unknown output type?");
532 break;
536 static int32_t macho_section(char *name, int pass, int *bits)
538 int32_t index, originalIndex;
539 char *sectionAttributes;
540 struct sectmap *sm;
541 struct section *s;
543 (void)pass;
545 /* Default to 32 bits. */
546 if (!name) {
547 *bits = 32;
548 name = ".text";
549 sectionAttributes = NULL;
550 } else {
551 sectionAttributes = name;
552 name = nasm_strsep(&sectionAttributes, " \t");
555 for (sm = sectmap; sm->nasmsect != NULL; ++sm) {
556 /* make lookup into section name translation table */
557 if (!strcmp(name, sm->nasmsect)) {
558 char *currentAttribute;
560 /* try to find section with that name */
561 originalIndex = index = get_section_index_by_name(sm->segname,
562 sm->sectname);
564 /* create it if it doesn't exist yet */
565 if (index == -1) {
566 s = *sectstail = nasm_malloc(sizeof(struct section));
567 s->next = NULL;
568 sectstail = &s->next;
570 s->data = saa_init(1L);
571 s->index = seg_alloc();
572 s->relocs = NULL;
573 s->align = -1;
575 xstrncpy(s->segname, sm->segname);
576 xstrncpy(s->sectname, sm->sectname);
577 s->size = 0;
578 s->nreloc = 0;
579 s->flags = sm->flags;
581 index = s->index;
582 } else {
583 s = get_section_by_index(index);
586 while ((NULL != sectionAttributes)
587 && (currentAttribute = nasm_strsep(&sectionAttributes, " \t"))) {
588 if (0 != *currentAttribute) {
589 if (!nasm_strnicmp("align=", currentAttribute, 6)) {
590 char *end;
591 int newAlignment, value;
593 value = strtoul(currentAttribute + 6, (char**)&end, 0);
594 newAlignment = exact_log2(value);
596 if (0 != *end) {
597 error(ERR_PANIC,
598 "unknown or missing alignment value \"%s\" "
599 "specified for section \"%s\"",
600 currentAttribute + 6,
601 name);
602 return NO_SEG;
603 } else if (0 > newAlignment) {
604 error(ERR_PANIC,
605 "alignment of %d (for section \"%s\") is not "
606 "a power of two",
607 value,
608 name);
609 return NO_SEG;
612 if ((-1 != originalIndex)
613 && (s->align != newAlignment)
614 && (s->align != -1)) {
615 error(ERR_PANIC,
616 "section \"%s\" has already been specified "
617 "with alignment %d, conflicts with new "
618 "alignment of %d",
619 name,
620 (1 << s->align),
621 value);
622 return NO_SEG;
625 s->align = newAlignment;
626 } else if (!nasm_stricmp("data", currentAttribute)) {
627 /* Do nothing; 'data' is implicit */
628 } else {
629 error(ERR_PANIC,
630 "unknown section attribute %s for section %s",
631 currentAttribute,
632 name);
633 return NO_SEG;
638 return index;
642 error(ERR_PANIC, "invalid section name %s", name);
643 return NO_SEG;
646 static void macho_symdef(char *name, int32_t section, int64_t offset,
647 int is_global, char *special)
649 struct symbol *sym;
651 if (special) {
652 error(ERR_NONFATAL, "The Mach-O output format does "
653 "not support any special symbol types");
654 return;
657 if (is_global == 3) {
658 error(ERR_NONFATAL, "The Mach-O format does not "
659 "(yet) support forward reference fixups.");
660 return;
663 sym = *symstail = nasm_malloc(sizeof(struct symbol));
664 sym->next = NULL;
665 symstail = &sym->next;
667 sym->name = name;
668 sym->strx = strslen;
669 sym->type = 0;
670 sym->desc = 0;
671 sym->value = offset;
672 sym->initial_snum = -1;
674 /* external and common symbols get N_EXT */
675 if (is_global != 0)
676 sym->type |= N_EXT;
678 if (section == NO_SEG) {
679 /* symbols in no section get absolute */
680 sym->type |= N_ABS;
681 sym->sect = NO_SECT;
682 } else {
683 sym->type |= N_SECT;
685 /* get the in-file index of the section the symbol was defined in */
686 sym->sect = get_section_fileindex_by_index(section);
688 if (sym->sect == NO_SECT) {
689 /* remember symbol number of references to external
690 ** symbols, this works because every external symbol gets
691 ** its own section number allocated internally by nasm and
692 ** can so be used as a key */
693 extsyms = raa_write(extsyms, section, nsyms);
694 sym->initial_snum = nsyms;
696 switch (is_global) {
697 case 1:
698 case 2:
699 /* there isn't actually a difference between global
700 ** and common symbols, both even have their size in
701 ** sym->value */
702 sym->type = N_EXT;
703 break;
705 default:
706 /* give an error on unfound section if it's not an
707 ** external or common symbol (assemble_file() does a
708 ** seg_alloc() on every call for them) */
709 error(ERR_PANIC, "in-file index for section %d not found",
710 section);
715 ++nsyms;
718 static int32_t macho_segbase(int32_t section)
720 return section;
723 static int macho_directive(char *directive, char *value, int pass)
725 (void)directive;
726 (void)value;
727 (void)pass;
728 return 0;
731 static void macho_filename(char *inname, char *outname, efunc error)
733 standard_extension(inname, outname, ".o", error);
736 extern macros_t macho_stdmac[];
738 /* Comparison function for qsort symbol layout. */
739 static int layout_compare (const struct symbol **s1,
740 const struct symbol **s2)
742 return (strcmp ((*s1)->name, (*s2)->name));
745 /* The native assembler does a few things in a similar function
747 * Remove temporary labels
748 * Sort symbols according to local, external, undefined (by name)
749 * Order the string table
751 We do not remove temporary labels right now.
753 numsyms is the total number of symbols we have. strtabsize is the
754 number entries in the string table. */
756 static void macho_layout_symbols (uint32_t *numsyms,
757 uint32_t *strtabsize)
759 struct symbol *sym, **symp;
760 uint32_t i,j;
762 *numsyms = 0;
763 *strtabsize = sizeof (char);
765 symp = &syms;
767 while ((sym = *symp)) {
768 /* Undefined symbols are now external. */
769 if (sym->type == N_UNDF)
770 sym->type |= N_EXT;
772 if ((sym->type & N_EXT) == 0) {
773 sym->snum = *numsyms;
774 *numsyms = *numsyms + 1;
775 nlocalsym++;
777 else {
778 if ((sym->type & N_TYPE) != N_UNDF)
779 nextdefsym++;
780 else
781 nundefsym++;
783 /* If we handle debug info we'll want
784 to check for it here instead of just
785 adding the symbol to the string table. */
786 sym->strx = *strtabsize;
787 saa_wbytes (strs, sym->name, (int32_t)(strlen(sym->name) + 1));
788 *strtabsize += strlen(sym->name) + 1;
790 symp = &(sym->next);
793 /* Next, sort the symbols. Most of this code is a direct translation from
794 the Apple cctools symbol layout. We need to keep compatibility with that. */
795 /* Set the indexes for symbol groups into the symbol table */
796 ilocalsym = 0;
797 iextdefsym = nlocalsym;
798 iundefsym = nlocalsym + nextdefsym;
800 /* allocate arrays for sorting externals by name */
801 extdefsyms = nasm_malloc(nextdefsym * sizeof(struct symbol *));
802 undefsyms = nasm_malloc(nundefsym * sizeof(struct symbol *));
804 i = 0;
805 j = 0;
807 symp = &syms;
809 while ((sym = *symp)) {
811 if((sym->type & N_EXT) == 0) {
812 sym->strx = *strtabsize;
813 saa_wbytes (strs, sym->name, (int32_t)(strlen (sym->name) + 1));
814 *strtabsize += strlen(sym->name) + 1;
816 else {
817 if((sym->type & N_TYPE) != N_UNDF)
818 extdefsyms[i++] = sym;
819 else
820 undefsyms[j++] = sym;
822 symp = &(sym->next);
825 qsort(extdefsyms, nextdefsym, sizeof(struct symbol *),
826 (int (*)(const void *, const void *))layout_compare);
827 qsort(undefsyms, nundefsym, sizeof(struct symbol *),
828 (int (*)(const void *, const void *))layout_compare);
830 for(i = 0; i < nextdefsym; i++) {
831 extdefsyms[i]->snum = *numsyms;
832 *numsyms += 1;
834 for(j = 0; j < nundefsym; j++) {
835 undefsyms[j]->snum = *numsyms;
836 *numsyms += 1;
840 /* Calculate some values we'll need for writing later. */
842 static void macho_calculate_sizes (void)
844 struct section *s;
846 /* count sections and calculate in-memory and in-file offsets */
847 for (s = sects; s != NULL; s = s->next) {
848 uint32_t pad = 0;
850 /* zerofill sections aren't actually written to the file */
851 if ((s->flags & SECTION_TYPE) != S_ZEROFILL)
852 seg_filesize += s->size;
854 /* recalculate segment address based on alignment and vm size */
855 s->addr = seg_vmsize;
856 /* we need section alignment to calculate final section address */
857 if (s->align == -1)
858 s->align = DEFAULT_SECTION_ALIGNMENT;
859 if(s->align) {
860 uint32_t newaddr = align(s->addr, 1 << s->align);
861 pad = newaddr - s->addr;
862 s->addr = newaddr;
865 seg_vmsize += s->size + pad;
866 ++seg_nsects;
869 /* calculate size of all headers, load commands and sections to
870 ** get a pointer to the start of all the raw data */
871 if (seg_nsects > 0) {
872 ++head_ncmds;
873 head_sizeofcmds +=
874 MACHO_SEGCMD_SIZE + seg_nsects * MACHO_SECTCMD_SIZE;
877 if (nsyms > 0) {
878 ++head_ncmds;
879 head_sizeofcmds += MACHO_SYMCMD_SIZE;
883 /* Write out the header information for the file. */
885 static void macho_write_header (void)
887 fwriteint32_t(MH_MAGIC, machofp); /* magic */
888 fwriteint32_t(CPU_TYPE_I386, machofp); /* CPU type */
889 fwriteint32_t(CPU_SUBTYPE_I386_ALL, machofp); /* CPU subtype */
890 fwriteint32_t(MH_OBJECT, machofp); /* Mach-O file type */
891 fwriteint32_t(head_ncmds, machofp); /* number of load commands */
892 fwriteint32_t(head_sizeofcmds, machofp); /* size of load commands */
893 fwriteint32_t(0, machofp); /* no flags */
896 /* Write out the segment load command at offset. */
898 static uint32_t macho_write_segment (uint32_t offset)
900 uint32_t rel_base = alignint32_t (offset + seg_filesize);
901 uint32_t s_reloff = 0;
902 struct section *s;
904 fwriteint32_t(LC_SEGMENT, machofp); /* cmd == LC_SEGMENT */
906 /* size of load command including section load commands */
907 fwriteint32_t(MACHO_SEGCMD_SIZE + seg_nsects *
908 MACHO_SECTCMD_SIZE, machofp);
910 /* in an MH_OBJECT file all sections are in one unnamed (name
911 ** all zeros) segment */
912 fwritezero(16, machofp);
913 fwriteint32_t(0, machofp); /* in-memory offset */
914 fwriteint32_t(seg_vmsize, machofp); /* in-memory size */
915 fwriteint32_t(offset, machofp); /* in-file offset to data */
916 fwriteint32_t(seg_filesize, machofp); /* in-file size */
917 fwriteint32_t(VM_PROT_DEFAULT, machofp); /* maximum vm protection */
918 fwriteint32_t(VM_PROT_DEFAULT, machofp); /* initial vm protection */
919 fwriteint32_t(seg_nsects, machofp); /* number of sections */
920 fwriteint32_t(0, machofp); /* no flags */
922 /* emit section headers */
923 for (s = sects; s != NULL; s = s->next) {
924 fwrite(s->sectname, sizeof(s->sectname), 1, machofp);
925 fwrite(s->segname, sizeof(s->segname), 1, machofp);
926 fwriteint32_t(s->addr, machofp);
927 fwriteint32_t(s->size, machofp);
929 /* dummy data for zerofill sections or proper values */
930 if ((s->flags & SECTION_TYPE) != S_ZEROFILL) {
931 fwriteint32_t(offset, machofp);
932 /* Write out section alignment, as a power of two.
933 e.g. 32-bit word alignment would be 2 (2^^2 = 4). */
934 if (s->align == -1)
935 s->align = DEFAULT_SECTION_ALIGNMENT;
936 fwriteint32_t(s->align, machofp);
937 /* To be compatible with cctools as we emit
938 a zero reloff if we have no relocations. */
939 fwriteint32_t(s->nreloc ? rel_base + s_reloff : 0, machofp);
940 fwriteint32_t(s->nreloc, machofp);
942 offset += s->size;
943 s_reloff += s->nreloc * MACHO_RELINFO_SIZE;
944 } else {
945 fwriteint32_t(0, machofp);
946 fwriteint32_t(0, machofp);
947 fwriteint32_t(0, machofp);
948 fwriteint32_t(0, machofp);
951 fwriteint32_t(s->flags, machofp); /* flags */
952 fwriteint32_t(0, machofp); /* reserved */
953 fwriteint32_t(0, machofp); /* reserved */
956 rel_padcnt = rel_base - offset;
957 offset = rel_base + s_reloff;
959 return offset;
962 /* For a given chain of relocs r, write out the entire relocation
963 chain to the object file. */
965 static void macho_write_relocs (struct reloc *r)
967 while (r) {
968 uint32_t word2;
970 fwriteint32_t(r->addr, machofp); /* reloc offset */
972 word2 = r->snum;
973 word2 |= r->pcrel << 24;
974 word2 |= r->length << 25;
975 word2 |= r->ext << 27;
976 word2 |= r->type << 28;
977 fwriteint32_t(word2, machofp); /* reloc data */
979 r = r->next;
983 /* Write out the section data. */
984 static void macho_write_section (void)
986 struct section *s, *s2;
987 struct reloc *r;
988 uint8_t fi, *p, *q, blk[4];
989 int32_t l;
991 for (s = sects; s != NULL; s = s->next) {
992 if ((s->flags & SECTION_TYPE) == S_ZEROFILL)
993 continue;
995 /* no padding needs to be done to the sections */
997 /* Like a.out Mach-O references things in the data or bss
998 * sections by addresses which are actually relative to the
999 * start of the _text_ section, in the _file_. See outaout.c
1000 * for more information. */
1001 saa_rewind(s->data);
1002 for (r = s->relocs; r != NULL; r = r->next) {
1003 saa_fread(s->data, r->addr, blk, (int32_t)r->length << 1);
1004 p = q = blk;
1005 l = *p++;
1007 /* get offset based on relocation type */
1008 if (r->length > 0) {
1009 l += ((int32_t)*p++) << 8;
1011 if (r->length == 2) {
1012 l += ((int32_t)*p++) << 16;
1013 l += ((int32_t)*p++) << 24;
1017 /* If the relocation is internal add to the current section
1018 offset. Otherwise the only value we need is the symbol
1019 offset which we already have. The linker takes care
1020 of the rest of the address. */
1021 if (!r->ext) {
1022 /* generate final address by section address and offset */
1023 for (s2 = sects, fi = 1;
1024 s2 != NULL; s2 = s2->next, fi++) {
1025 if (fi == r->snum) {
1026 l += s2->addr;
1027 break;
1032 /* write new offset back */
1033 if (r->length == 2)
1034 WRITELONG(q, l);
1035 else if (r->length == 1)
1036 WRITESHORT(q, l);
1037 else
1038 *q++ = l & 0xFF;
1040 saa_fwrite(s->data, r->addr, blk, (int32_t)r->length << 1);
1043 /* dump the section data to file */
1044 saa_fpwrite(s->data, machofp);
1047 /* pad last section up to reloc entries on int32_t boundary */
1048 fwritezero(rel_padcnt, machofp);
1050 /* emit relocation entries */
1051 for (s = sects; s != NULL; s = s->next)
1052 macho_write_relocs (s->relocs);
1055 /* Write out the symbol table. We should already have sorted this
1056 before now. */
1057 static void macho_write_symtab (void)
1059 struct symbol *sym;
1060 struct section *s;
1061 int32_t fi;
1062 uint32_t i;
1064 /* we don't need to pad here since MACHO_RELINFO_SIZE == 8 */
1066 for (sym = syms; sym != NULL; sym = sym->next) {
1067 if ((sym->type & N_EXT) == 0) {
1068 fwriteint32_t(sym->strx, machofp); /* string table entry number */
1069 fwrite(&sym->type, 1, 1, machofp); /* symbol type */
1070 fwrite(&sym->sect, 1, 1, machofp); /* section */
1071 fwriteint16_t(sym->desc, machofp); /* description */
1073 /* Fix up the symbol value now that we know the final section
1074 sizes. */
1075 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1076 for (s = sects, fi = 1;
1077 s != NULL && fi < sym->sect; s = s->next, ++fi)
1078 sym->value += s->size;
1081 fwriteint32_t(sym->value, machofp); /* value (i.e. offset) */
1085 for (i = 0; i < nextdefsym; i++) {
1086 sym = extdefsyms[i];
1087 fwriteint32_t(sym->strx, machofp);
1088 fwrite(&sym->type, 1, 1, machofp); /* symbol type */
1089 fwrite(&sym->sect, 1, 1, machofp); /* section */
1090 fwriteint16_t(sym->desc, machofp); /* description */
1092 /* Fix up the symbol value now that we know the final section
1093 sizes. */
1094 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1095 for (s = sects, fi = 1;
1096 s != NULL && fi < sym->sect; s = s->next, ++fi)
1097 sym->value += s->size;
1100 fwriteint32_t(sym->value, machofp); /* value (i.e. offset) */
1103 for (i = 0; i < nundefsym; i++) {
1104 sym = undefsyms[i];
1105 fwriteint32_t(sym->strx, machofp);
1106 fwrite(&sym->type, 1, 1, machofp); /* symbol type */
1107 fwrite(&sym->sect, 1, 1, machofp); /* section */
1108 fwriteint16_t(sym->desc, machofp); /* description */
1110 /* Fix up the symbol value now that we know the final section
1111 sizes. */
1112 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1113 for (s = sects, fi = 1;
1114 s != NULL && fi < sym->sect; s = s->next, ++fi)
1115 sym->value += s->size;
1118 fwriteint32_t(sym->value, machofp); /* value (i.e. offset) */
1122 /* Fixup the snum in the relocation entries, we should be
1123 doing this only for externally undefined symbols. */
1124 static void macho_fixup_relocs (struct reloc *r)
1126 struct symbol *sym;
1127 uint32_t i;
1129 while (r != NULL) {
1130 if (r->ext) {
1131 for (i = 0; i < nundefsym; i++) {
1132 sym = undefsyms[i];
1133 if (sym->initial_snum == r->snum) {
1134 r->snum = sym->snum;
1135 break;
1139 r = r->next;
1143 /* Write out the object file. */
1145 static void macho_write (void)
1147 uint32_t offset = 0;
1149 /* mach-o object file structure:
1151 ** mach header
1152 ** uint32_t magic
1153 ** int cpu type
1154 ** int cpu subtype
1155 ** uint32_t mach file type
1156 ** uint32_t number of load commands
1157 ** uint32_t size of all load commands
1158 ** (includes section struct size of segment command)
1159 ** uint32_t flags
1161 ** segment command
1162 ** uint32_t command type == LC_SEGMENT
1163 ** uint32_t size of load command
1164 ** (including section load commands)
1165 ** char[16] segment name
1166 ** uint32_t in-memory offset
1167 ** uint32_t in-memory size
1168 ** uint32_t in-file offset to data area
1169 ** uint32_t in-file size
1170 ** (in-memory size excluding zerofill sections)
1171 ** int maximum vm protection
1172 ** int initial vm protection
1173 ** uint32_t number of sections
1174 ** uint32_t flags
1176 ** section commands
1177 ** char[16] section name
1178 ** char[16] segment name
1179 ** uint32_t in-memory offset
1180 ** uint32_t in-memory size
1181 ** uint32_t in-file offset
1182 ** uint32_t alignment
1183 ** (irrelevant in MH_OBJECT)
1184 ** uint32_t in-file offset of relocation entires
1185 ** uint32_t number of relocations
1186 ** uint32_t flags
1187 ** uint32_t reserved
1188 ** uint32_t reserved
1190 ** symbol table command
1191 ** uint32_t command type == LC_SYMTAB
1192 ** uint32_t size of load command
1193 ** uint32_t symbol table offset
1194 ** uint32_t number of symbol table entries
1195 ** uint32_t string table offset
1196 ** uint32_t string table size
1198 ** raw section data
1200 ** padding to int32_t boundary
1202 ** relocation data (struct reloc)
1203 ** int32_t offset
1204 ** uint data (symbolnum, pcrel, length, extern, type)
1206 ** symbol table data (struct nlist)
1207 ** int32_t string table entry number
1208 ** uint8_t type
1209 ** (extern, absolute, defined in section)
1210 ** uint8_t section
1211 ** (0 for global symbols, section number of definition (>= 1, <=
1212 ** 254) for local symbols, size of variable for common symbols
1213 ** [type == extern])
1214 ** int16_t description
1215 ** (for stab debugging format)
1216 ** uint32_t value (i.e. file offset) of symbol or stab offset
1218 ** string table data
1219 ** list of null-terminated strings
1222 /* Emit the Mach-O header. */
1223 macho_write_header();
1225 offset = MACHO_HEADER_SIZE + head_sizeofcmds;
1227 /* emit the segment load command */
1228 if (seg_nsects > 0)
1229 offset = macho_write_segment (offset);
1230 else
1231 error(ERR_WARNING, "no sections?");
1233 if (nsyms > 0) {
1234 /* write out symbol command */
1235 fwriteint32_t(LC_SYMTAB, machofp); /* cmd == LC_SYMTAB */
1236 fwriteint32_t(MACHO_SYMCMD_SIZE, machofp); /* size of load command */
1237 fwriteint32_t(offset, machofp); /* symbol table offset */
1238 fwriteint32_t(nsyms, machofp); /* number of symbol
1239 ** table entries */
1241 offset += nsyms * MACHO_NLIST_SIZE;
1242 fwriteint32_t(offset, machofp); /* string table offset */
1243 fwriteint32_t(strslen, machofp); /* string table size */
1246 /* emit section data */
1247 if (seg_nsects > 0)
1248 macho_write_section ();
1250 /* emit symbol table if we have symbols */
1251 if (nsyms > 0)
1252 macho_write_symtab ();
1254 /* we don't need to pad here since MACHO_NLIST_SIZE == 12 */
1256 /* emit string table */
1257 saa_fpwrite(strs, machofp);
1259 /* We do quite a bit here, starting with finalizing all of the data
1260 for the object file, writing, and then freeing all of the data from
1261 the file. */
1263 static void macho_cleanup(int debuginfo)
1265 struct section *s;
1266 struct reloc *r;
1267 struct symbol *sym;
1269 (void)debuginfo;
1271 /* Sort all symbols. */
1272 macho_layout_symbols (&nsyms, &strslen);
1274 /* Fixup relocation entries */
1275 for (s = sects; s != NULL; s = s->next) {
1276 macho_fixup_relocs (s->relocs);
1279 /* First calculate and finalize needed values. */
1280 macho_calculate_sizes();
1281 macho_write();
1283 /* done - yay! */
1284 fclose(machofp);
1286 /* free up everything */
1287 while (sects->next) {
1288 s = sects;
1289 sects = sects->next;
1291 saa_free(s->data);
1292 while (s->relocs != NULL) {
1293 r = s->relocs;
1294 s->relocs = s->relocs->next;
1295 nasm_free(r);
1298 nasm_free(s);
1301 saa_free(strs);
1302 raa_free(extsyms);
1304 if (syms) {
1305 while (syms->next) {
1306 sym = syms;
1307 syms = syms->next;
1309 nasm_free (sym);
1314 /* Debugging routines. */
1315 static void debug_reloc (struct reloc *r)
1317 fprintf (stdout, "reloc:\n");
1318 fprintf (stdout, "\taddr: %"PRId32"\n", r->addr);
1319 fprintf (stdout, "\tsnum: %d\n", r->snum);
1320 fprintf (stdout, "\tpcrel: %d\n", r->pcrel);
1321 fprintf (stdout, "\tlength: %d\n", r->length);
1322 fprintf (stdout, "\text: %d\n", r->ext);
1323 fprintf (stdout, "\ttype: %d\n", r->type);
1326 static void debug_section_relocs (struct section *s)
1328 struct reloc *r = s->relocs;
1330 fprintf (stdout, "relocs for section %s:\n\n", s->sectname);
1332 while (r != NULL) {
1333 debug_reloc (r);
1334 r = r->next;
1338 struct ofmt of_macho32 = {
1339 "NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (i386) object files",
1340 "macho32",
1342 null_debug_arr,
1343 &null_debug_form,
1344 macho_stdmac,
1345 macho_init,
1346 macho_setinfo,
1347 macho_output,
1348 macho_symdef,
1349 macho_section,
1350 macho_segbase,
1351 macho_directive,
1352 macho_filename,
1353 macho_cleanup
1356 struct ofmt of_macho = {
1357 "MACHO (short name for MACHO32)",
1358 "macho",
1360 null_debug_arr,
1361 &null_debug_form,
1362 macho_stdmac,
1363 macho_init,
1364 macho_setinfo,
1365 macho_output,
1366 macho_symdef,
1367 macho_section,
1368 macho_segbase,
1369 macho_directive,
1370 macho_filename,
1371 macho_cleanup
1374 #endif
1377 * Local Variables:
1378 * mode:c
1379 * c-basic-offset:4
1380 * End:
1382 * end of file */