preproc: formatting cleanups
[nasm.git] / output / outmacho64.c
blobd3f5a8e8cde2e24b2758c203d56e16fa0efa7c18
1 /* ----------------------------------------------------------------------- *
2 *
3 * Copyright 1996-2009 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
35 * outmacho64.c output routines for the Netwide Assembler to produce
36 * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (x86_64) object files
39 /* Most of this file is, like Mach-O itself, based on a.out. For more
40 * guidelines see outaout.c. */
42 #include "compiler.h"
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <ctype.h>
48 #include <inttypes.h>
50 #include "nasm.h"
51 #include "nasmlib.h"
52 #include "saa.h"
53 #include "raa.h"
54 #include "output/outform.h"
55 #include "output/outlib.h"
57 #if defined(OF_MACHO64)
59 /* Mach-O in-file header structure sizes */
60 #define MACHO_HEADER64_SIZE (32)
61 #define MACHO_SEGCMD64_SIZE (72)
62 #define MACHO_SECTCMD64_SIZE (80)
63 #define MACHO_SYMCMD_SIZE (24)
64 #define MACHO_NLIST64_SIZE (16)
65 #define MACHO_RELINFO64_SIZE (8)
67 /* Mach-O file header values */
68 #define MH_MAGIC_64 (0xfeedfacf)
69 #define CPU_TYPE_X86_64 (0x01000007) /* x86-64 platform */
70 #define CPU_SUBTYPE_I386_ALL (3) /* all-x86 compatible */
71 #define MH_OBJECT (0x1) /* object file */
73 #define LC_SEGMENT_64 (0x19) /* segment load command */
74 #define LC_SYMTAB (0x2) /* symbol table load command */
76 #define VM_PROT_NONE (0x00)
77 #define VM_PROT_READ (0x01)
78 #define VM_PROT_WRITE (0x02)
79 #define VM_PROT_EXECUTE (0x04)
81 #define VM_PROT_DEFAULT (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
82 #define VM_PROT_ALL (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
84 struct section {
85 /* nasm internal data */
86 struct section *next;
87 struct SAA *data;
88 int32_t index;
89 struct reloc *relocs;
90 int align;
92 /* data that goes into the file */
93 char sectname[16]; /* what this section is called */
94 char segname[16]; /* segment this section will be in */
95 uint64_t addr; /* in-memory address (subject to alignment) */
96 uint64_t size; /* in-memory and -file size */
97 uint32_t nreloc; /* relocation entry count */
98 uint32_t flags; /* type and attributes (masked) */
99 uint32_t extreloc; /* external relocations */
102 #define SECTION_TYPE 0x000000ff /* section type mask */
104 #define S_REGULAR (0x0) /* standard section */
105 #define S_ZEROFILL (0x1) /* zerofill, in-memory only */
107 #define SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */
108 #define S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some
109 machine instructions */
110 #define S_ATTR_EXT_RELOC 0x00000200 /* section has external
111 relocation entries */
112 #define S_ATTR_LOC_RELOC 0x00000100 /* section has local
113 relocation entries */
114 #define S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section uses pure
115 machine instructions */
117 static struct sectmap {
118 const char *nasmsect;
119 const char *segname;
120 const char *sectname;
121 const int32_t flags;
122 } sectmap[] = {
123 {".text", "__TEXT", "__text", S_REGULAR|S_ATTR_SOME_INSTRUCTIONS|S_ATTR_PURE_INSTRUCTIONS},
124 {".data", "__DATA", "__data", S_REGULAR},
125 {".rodata", "__DATA", "__const", S_REGULAR},
126 {".bss", "__DATA", "__bss", S_ZEROFILL},
127 {NULL, NULL, NULL, 0}
130 struct reloc {
131 /* nasm internal data */
132 struct reloc *next;
134 /* data that goes into the file */
135 int32_t addr; /* op's offset in section */
136 uint32_t snum:24, /* contains symbol index if
137 ** ext otherwise in-file
138 ** section number */
139 pcrel:1, /* relative relocation */
140 length:2, /* 0=byte, 1=word, 2=int32_t, 3=int64_t */
141 ext:1, /* external symbol referenced */
142 type:4; /* reloc type */
145 #define R_ABS 0 /* absolute relocation */
146 #define R_SCATTERED 0x80000000 /* reloc entry is scattered if
147 ** highest bit == 1 */
149 struct symbol {
150 /* nasm internal data */
151 struct symbol *next; /* next symbol in the list */
152 char *name; /* name of this symbol */
153 int32_t initial_snum; /* symbol number used above in
154 reloc */
155 int32_t snum; /* true snum for reloc */
157 /* data that goes into the file */
158 uint32_t strx; /* string table index */
159 uint8_t type; /* symbol type */
160 uint8_t sect; /* NO_SECT or section number */
161 uint16_t desc; /* for stab debugging, 0 for us */
162 uint64_t value; /* offset of symbol in section */
165 /* symbol type bits */
166 #define N_EXT 0x01 /* global or external symbol */
168 #define N_UNDF 0x0 /* undefined symbol | n_sect == */
169 #define N_ABS 0x2 /* absolute symbol | NO_SECT */
170 #define N_SECT 0xe /* defined symbol, n_sect holds
171 ** section number */
173 #define N_TYPE 0x0e /* type bit mask */
175 #define DEFAULT_SECTION_ALIGNMENT 0 /* byte (i.e. no) alignment */
177 /* special section number values */
178 #define NO_SECT 0 /* no section, invalid */
179 #define MAX_SECT 255 /* maximum number of sections */
181 static struct section *sects, **sectstail;
182 static struct symbol *syms, **symstail;
183 static uint32_t nsyms;
185 /* These variables are set by macho_layout_symbols() to organize
186 the symbol table and string table in order the dynamic linker
187 expects. They are then used in macho_write() to put out the
188 symbols and strings in that order.
190 The order of the symbol table is:
191 local symbols
192 defined external symbols (sorted by name)
193 undefined external symbols (sorted by name)
195 The order of the string table is:
196 strings for external symbols
197 strings for local symbols
199 static uint32_t ilocalsym = 0;
200 static uint32_t iextdefsym = 0;
201 static uint32_t iundefsym = 0;
202 static uint32_t nlocalsym;
203 static uint32_t nextdefsym;
204 static uint32_t nundefsym;
205 static struct symbol **extdefsyms = NULL;
206 static struct symbol **undefsyms = NULL;
208 static struct RAA *extsyms;
209 static struct SAA *strs;
210 static uint32_t strslen;
212 static FILE *machofp;
213 static efunc error;
214 static evalfunc evaluate;
216 extern struct ofmt of_macho64;
218 /* Global file information. This should be cleaned up into either
219 a structure or as function arguments. */
220 uint32_t head_ncmds64 = 0;
221 uint32_t head_sizeofcmds64 = 0;
222 uint64_t seg_filesize64 = 0;
223 uint64_t seg_vmsize64 = 0;
224 uint32_t seg_nsects64 = 0;
225 uint64_t rel_padcnt64 = 0;
228 #define xstrncpy(xdst, xsrc) \
229 memset(xdst, '\0', sizeof(xdst)); /* zero out whole buffer */ \
230 strncpy(xdst, xsrc, sizeof(xdst)); /* copy over string */ \
231 xdst[sizeof(xdst) - 1] = '\0'; /* proper null-termination */
233 #define align(x, y) \
234 (((x) + (y) - 1) & ~((y) - 1)) /* align x to multiple of y */
236 #define alignint32_t(x) \
237 align(x, sizeof(int32_t)) /* align x to int32_t boundary */
239 #define alignint64_t(x) \
240 align(x, sizeof(int64_t)) /* align x to int64_t boundary */
242 static void debug_reloc (struct reloc *);
243 static void debug_section_relocs (struct section *) _unused;
245 static int exact_log2 (uint32_t align)
247 if (align == 0) {
248 return 0;
249 } else if (align & (align-1)) {
250 return -1; /* Not a power of 2 */
251 } else {
252 #ifdef HAVE_GNUC_4
253 return __builtin_ctzl (align);
254 #else
255 uint32_t result = 0;
257 /* We know exactly one bit is set at this point. */
258 if (align & 0xffff0000)
259 result |= 16;
260 if (align & 0xff00ff00)
261 result |= 8;
262 if (align & 0xf0f0f0f0)
263 result |= 4;
264 if (align & 0xcccccccc)
265 result |= 2;
266 if (align & 0xaaaaaaaa)
267 result |= 1;
269 return result;
270 #endif
274 static struct section *get_section_by_name(const char *segname,
275 const char *sectname)
277 struct section *s;
279 for (s = sects; s != NULL; s = s->next)
280 if (!strcmp(s->segname, segname) && !strcmp(s->sectname, sectname))
281 break;
283 return s;
286 static struct section *get_section_by_index(const int32_t index)
288 struct section *s;
290 for (s = sects; s != NULL; s = s->next)
291 if (index == s->index)
292 break;
294 return s;
297 static int32_t get_section_index_by_name(const char *segname,
298 const char *sectname)
300 struct section *s;
302 for (s = sects; s != NULL; s = s->next)
303 if (!strcmp(s->segname, segname) && !strcmp(s->sectname, sectname))
304 return s->index;
306 return -1;
309 static char *get_section_name_by_index(const int32_t index)
311 struct section *s;
313 for (s = sects; s != NULL; s = s->next)
314 if (index == s->index)
315 return s->sectname;
317 return NULL;
320 static uint8_t get_section_fileindex_by_index(const int32_t index)
322 struct section *s;
323 uint8_t i = 1;
325 for (s = sects; s != NULL && i < MAX_SECT; s = s->next, ++i)
326 if (index == s->index)
327 return i;
329 if (i == MAX_SECT)
330 error(ERR_WARNING,
331 "too many sections (>255) - clipped by fileindex");
333 return NO_SECT;
336 static struct symbol *get_closest_section_symbol_by_offset(uint8_t fileindex, int64_t offset)
338 struct symbol *sym;
340 for (sym = syms; sym != NULL; sym = sym->next) {
341 if ((sym->sect != NO_SECT) &&
342 (sym->sect == fileindex) &&
343 ((int64_t)sym->value >= offset))
344 return sym;
347 return NULL;
352 * Special section numbers which are used to define Mach-O special
353 * symbols, which can be used with WRT to provide PIC relocation
354 * types.
356 static int32_t macho_gotpcrel_sect;
358 static void macho_init(FILE * fp, efunc errfunc, ldfunc ldef,
359 evalfunc eval)
361 char zero = 0;
363 maxbits = 64;
364 machofp = fp;
365 error = errfunc;
366 evaluate = eval;
368 (void)ldef; /* placate optimizers */
370 sects = NULL;
371 sectstail = &sects;
373 syms = NULL;
374 symstail = &syms;
375 nsyms = 0;
376 nlocalsym = 0;
377 nextdefsym = 0;
378 nundefsym = 0;
380 extsyms = raa_init();
381 strs = saa_init(1L);
383 /* string table starts with a zero byte - don't ask why */
384 saa_wbytes(strs, &zero, sizeof(char));
385 strslen = 1;
387 /* add special symbol for ..gotpcrel */
388 macho_gotpcrel_sect = seg_alloc();
389 macho_gotpcrel_sect ++;
390 ldef("..gotpcrel", macho_gotpcrel_sect, 0L, NULL, false, false, &of_macho64, error);
393 static void sect_write(struct section *sect,
394 const uint8_t *data, uint32_t len)
396 saa_wbytes(sect->data, data, len);
397 sect->size += len;
400 static int32_t add_reloc(struct section *sect, int32_t section,
401 int pcrel, int bytes, int64_t reloff)
403 struct reloc *r;
404 struct symbol *sym;
405 int32_t fi;
406 int32_t adjustment = 0;
408 /* NeXT as puts relocs in reversed order (address-wise) into the
409 ** files, so we do the same, doesn't seem to make much of a
410 ** difference either way */
411 r = nasm_malloc(sizeof(struct reloc));
412 r->next = sect->relocs;
413 sect->relocs = r;
415 /* the current end of the section will be the symbol's address for
416 ** now, might have to be fixed by macho_fixup_relocs() later on. make
417 ** sure we don't make the symbol scattered by setting the highest
418 ** bit by accident */
419 r->addr = sect->size & ~R_SCATTERED;
420 r->ext = 1;
421 r->pcrel = (pcrel ? 1 : 0);
423 /* match byte count 1, 2, 4, 8 to length codes 0, 1, 2, 3 respectively */
424 switch(bytes){
425 case 1:
426 r->length = 0;
427 break;
428 case 2:
429 r->length = 1;
430 break;
431 case 4:
432 r->length = 2;
433 break;
434 case 8:
435 r->length = 3;
436 break;
437 default:
438 break;
441 /* set default relocation values */
442 r->type = 0; // X86_64_RELOC_UNSIGNED
443 r->snum = R_ABS; // Absolute Symbol (indicates no relocation)
445 /* absolute relocation */
446 if (pcrel == 0) {
448 /* intra-section */
449 if (section == NO_SEG) {
450 // r->snum = R_ABS; // Set above
452 /* inter-section */
453 } else {
454 fi = get_section_fileindex_by_index(section);
456 /* external */
457 if (fi == NO_SECT) {
458 r->snum = raa_read(extsyms, section);
460 /* local */
461 } else {
462 sym = get_closest_section_symbol_by_offset(fi, reloff);
463 r->snum = sym->initial_snum;
464 adjustment = sym->value;
468 /* relative relocation */
469 } else if (pcrel == 1) {
471 /* intra-section */
472 if (section == NO_SEG) {
473 r->type = 1; // X86_64_RELOC_SIGNED
475 /* inter-section */
476 } else {
477 r->type = 2; // X86_64_RELOC_BRANCH
478 fi = get_section_fileindex_by_index(section);
480 /* external */
481 if (fi == NO_SECT) {
482 sect->extreloc = 1;
483 r->snum = raa_read(extsyms, section);
485 /* local */
486 } else {
487 sym = get_closest_section_symbol_by_offset(fi, reloff);
488 r->snum = sym->initial_snum;
489 adjustment = sym->value;
493 /* subtractor */
494 } else if (pcrel == 2) {
495 r->pcrel = 0;
496 r->type = 5; // X86_64_RELOC_SUBTRACTOR
498 /* gotpcrel */
499 } else if (pcrel == 3) {
500 r->type = 4; // X86_64_RELOC_GOT
501 r->snum = macho_gotpcrel_sect;
503 /* gotpcrel MOVQ load */
504 } else if (pcrel == 4) {
505 r->type = 3; // X86_64_RELOC_GOT_LOAD
506 r->snum = macho_gotpcrel_sect;
509 ++sect->nreloc;
511 return adjustment;
514 static void macho_output(int32_t secto, const void *data,
515 enum out_type type, uint64_t size,
516 int32_t section, int32_t wrt)
518 struct section *s, *sbss;
519 int64_t addr;
520 uint8_t mydata[16], *p, gotload;
522 if (secto == NO_SEG) {
523 if (type != OUT_RESERVE)
524 error(ERR_NONFATAL, "attempt to assemble code in "
525 "[ABSOLUTE] space");
527 return;
530 s = get_section_by_index(secto);
532 if (s == NULL) {
533 error(ERR_WARNING, "attempt to assemble code in"
534 " section %d: defaulting to `.text'", secto);
535 s = get_section_by_name("__TEXT", "__text");
537 /* should never happen */
538 if (s == NULL)
539 error(ERR_PANIC, "text section not found");
542 sbss = get_section_by_name("__DATA", "__bss");
544 if (s == sbss && type != OUT_RESERVE) {
545 error(ERR_WARNING, "attempt to initialize memory in the"
546 " BSS section: ignored");
547 s->size += realsize(type, size);
548 return;
551 switch (type) {
552 case OUT_RESERVE:
553 if (s != sbss) {
554 error(ERR_WARNING, "uninitialized space declared in"
555 " %s section: zeroing",
556 get_section_name_by_index(secto));
558 sect_write(s, NULL, size);
559 } else
560 s->size += size;
562 break;
564 case OUT_RAWDATA:
565 if (section != NO_SEG)
566 error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG");
568 sect_write(s, data, size);
569 break;
571 case OUT_ADDRESS:
572 addr = *(int64_t *)data;
573 if (section != NO_SEG) {
574 if (section % 2) {
575 error(ERR_NONFATAL, "Mach-O format does not support"
576 " section base references");
577 } else {
578 if (wrt == NO_SEG) {
579 if (size < 8) {
580 error(ERR_NONFATAL, "Mach-O 64-bit format does not support"
581 " 32-bit absolute addresses");
583 Seemingly, Mach-O's X86_64_RELOC_SUBTRACTOR would require
584 pre-determined knowledge of where the image base would be,
585 making it impractical for use in intermediate object files
587 } else {
588 addr -= add_reloc(s, section, 0, size, addr); // X86_64_RELOC_UNSIGNED
590 } else {
591 error(ERR_NONFATAL, "Mach-O format does not support"
592 " this use of WRT");
597 p = mydata;
598 WRITEADDR(p, addr, size);
599 sect_write(s, mydata, size);
600 break;
602 case OUT_REL2ADR:
603 p = mydata;
604 WRITESHORT(p, *(int64_t *)data);
606 if (section == secto)
607 error(ERR_PANIC, "intra-section OUT_REL2ADR");
609 if (section == NO_SEG) {
610 /* Do nothing */
611 } else if (section % 2) {
612 error(ERR_NONFATAL, "Mach-O format does not support"
613 " section base references");
614 } else {
615 error(ERR_NONFATAL, "Unsupported non-32-bit"
616 " Macho-O relocation [2]");
619 sect_write(s, mydata, 2L);
620 break;
622 case OUT_REL4ADR:
623 p = mydata;
624 WRITELONG(p, *(int64_t *)data);
626 if (section == secto)
627 error(ERR_PANIC, "intra-section OUT_REL4ADR");
629 if (section != NO_SEG && section % 2) {
630 error(ERR_NONFATAL, "Mach-O format does not support"
631 " section base references");
632 } else {
633 if (wrt == NO_SEG) {
634 *mydata -= add_reloc(s, section, 1, 4, (int64_t)*mydata); // X86_64_RELOC_SIGNED/BRANCH
635 } else if (wrt == macho_gotpcrel_sect) {
636 if (s->data->datalen > 1) {
637 saa_fread(s->data, s->data->datalen-2, &gotload, 1); // Retrieve Instruction Opcode
638 } else {
639 gotload = 0;
641 if (gotload == 0x8B) { // Check for MOVQ Opcode
642 *mydata -= add_reloc(s, section, 4, 4, (int64_t)*mydata); // X86_64_GOT_LOAD (MOVQ load)
643 } else {
644 *mydata -= add_reloc(s, section, 3, 4, (int64_t)*mydata); // X86_64_GOT
646 } else {
647 error(ERR_NONFATAL, "Mach-O format does not support"
648 " this use of WRT");
649 wrt = NO_SEG; /* we can at least _try_ to continue */
653 sect_write(s, mydata, 4L);
654 break;
656 default:
657 error(ERR_PANIC, "unknown output type?");
658 break;
662 static int32_t macho_section(char *name, int pass, int *bits)
664 int32_t index, originalIndex;
665 char *sectionAttributes;
666 struct sectmap *sm;
667 struct section *s;
669 (void)pass;
671 /* Default to 64 bits. */
672 if (!name) {
673 *bits = 64;
674 name = ".text";
675 sectionAttributes = NULL;
676 } else {
677 sectionAttributes = name;
678 name = nasm_strsep(&sectionAttributes, " \t");
681 for (sm = sectmap; sm->nasmsect != NULL; ++sm) {
682 /* make lookup into section name translation table */
683 if (!strcmp(name, sm->nasmsect)) {
684 char *currentAttribute;
686 /* try to find section with that name */
687 originalIndex = index = get_section_index_by_name(sm->segname,
688 sm->sectname);
690 /* create it if it doesn't exist yet */
691 if (index == -1) {
692 s = *sectstail = nasm_malloc(sizeof(struct section));
693 s->next = NULL;
694 sectstail = &s->next;
696 s->data = saa_init(1L);
697 s->index = seg_alloc();
698 s->relocs = NULL;
699 s->align = -1;
701 xstrncpy(s->segname, sm->segname);
702 xstrncpy(s->sectname, sm->sectname);
703 s->size = 0;
704 s->nreloc = 0;
705 s->flags = sm->flags;
707 index = s->index;
708 } else {
709 s = get_section_by_index(index);
712 while ((NULL != sectionAttributes)
713 && (currentAttribute = nasm_strsep(&sectionAttributes, " \t"))) {
714 if (0 != *currentAttribute) {
715 if (!nasm_strnicmp("align=", currentAttribute, 6)) {
716 char *end;
717 int newAlignment, value;
719 value = strtoul(currentAttribute + 6, (char**)&end, 0);
720 newAlignment = exact_log2(value);
722 if (0 != *end) {
723 error(ERR_PANIC,
724 "unknown or missing alignment value \"%s\" "
725 "specified for section \"%s\"",
726 currentAttribute + 6,
727 name);
728 return NO_SEG;
729 } else if (0 > newAlignment) {
730 error(ERR_PANIC,
731 "alignment of %d (for section \"%s\") is not "
732 "a power of two",
733 value,
734 name);
735 return NO_SEG;
738 if ((-1 != originalIndex)
739 && (s->align != newAlignment)
740 && (s->align != -1)) {
741 error(ERR_PANIC,
742 "section \"%s\" has already been specified "
743 "with alignment %d, conflicts with new "
744 "alignment of %d",
745 name,
746 (1 << s->align),
747 value);
748 return NO_SEG;
751 s->align = newAlignment;
752 } else if (!nasm_stricmp("data", currentAttribute)) {
753 /* Do nothing; 'data' is implicit */
754 } else {
755 error(ERR_PANIC,
756 "unknown section attribute %s for section %s",
757 currentAttribute,
758 name);
759 return NO_SEG;
764 return index;
768 error(ERR_PANIC, "invalid section name %s", name);
769 return NO_SEG;
772 static void macho_symdef(char *name, int32_t section, int64_t offset,
773 int is_global, char *special)
775 struct symbol *sym;
777 if (special) {
778 error(ERR_NONFATAL, "The Mach-O output format does "
779 "not support any special symbol types");
780 return;
783 if (is_global == 3) {
784 error(ERR_NONFATAL, "The Mach-O format does not "
785 "(yet) support forward reference fixups.");
786 return;
789 if (name[0] == '.' && name[1] == '.' && name[2] != '@') {
791 * This is a NASM special symbol. We never allow it into
792 * the Macho-O symbol table, even if it's a valid one. If it
793 * _isn't_ a valid one, we should barf immediately.
795 if (strcmp(name, "..gotpcrel"))
796 error(ERR_NONFATAL, "unrecognized special symbol `%s'", name);
797 return;
800 sym = *symstail = nasm_malloc(sizeof(struct symbol));
801 sym->next = NULL;
802 symstail = &sym->next;
804 sym->name = name;
805 sym->strx = strslen;
806 sym->type = 0;
807 sym->desc = 0;
808 sym->value = offset;
809 sym->initial_snum = -1;
811 /* external and common symbols get N_EXT */
812 if (is_global != 0) {
813 sym->type |= N_EXT;
816 if (section == NO_SEG) {
817 /* symbols in no section get absolute */
818 sym->type |= N_ABS;
819 sym->sect = NO_SECT;
820 } else {
821 sym->type |= N_SECT;
823 /* get the in-file index of the section the symbol was defined in */
824 sym->sect = get_section_fileindex_by_index(section);
826 /* track the initially allocated symbol number for use in future fix-ups */
827 sym->initial_snum = nsyms;
829 if (sym->sect == NO_SECT) {
831 /* remember symbol number of references to external
832 ** symbols, this works because every external symbol gets
833 ** its own section number allocated internally by nasm and
834 ** can so be used as a key */
835 extsyms = raa_write(extsyms, section, nsyms);
837 switch (is_global) {
838 case 1:
839 case 2:
840 /* there isn't actually a difference between global
841 ** and common symbols, both even have their size in
842 ** sym->value */
843 sym->type = N_EXT;
844 break;
846 default:
847 /* give an error on unfound section if it's not an
848 ** external or common symbol (assemble_file() does a
849 ** seg_alloc() on every call for them) */
850 error(ERR_PANIC, "in-file index for section %d not found",
851 section);
855 ++nsyms;
858 static int32_t macho_segbase(int32_t section)
860 return section;
863 static void macho_filename(char *inname, char *outname, efunc error)
865 standard_extension(inname, outname, ".o", error);
868 extern macros_t macho_stdmac[];
870 /* Comparison function for qsort symbol layout. */
871 static int layout_compare (const struct symbol **s1,
872 const struct symbol **s2)
874 return (strcmp ((*s1)->name, (*s2)->name));
877 /* The native assembler does a few things in a similar function
879 * Remove temporary labels
880 * Sort symbols according to local, external, undefined (by name)
881 * Order the string table
883 We do not remove temporary labels right now.
885 numsyms is the total number of symbols we have. strtabsize is the
886 number entries in the string table. */
888 static void macho_layout_symbols (uint32_t *numsyms,
889 uint32_t *strtabsize)
891 struct symbol *sym, **symp;
892 uint32_t i,j;
894 *numsyms = 0;
895 *strtabsize = sizeof (char);
897 symp = &syms;
899 while ((sym = *symp)) {
900 /* Undefined symbols are now external. */
901 if (sym->type == N_UNDF)
902 sym->type |= N_EXT;
904 if ((sym->type & N_EXT) == 0) {
905 sym->snum = *numsyms;
906 *numsyms = *numsyms + 1;
907 nlocalsym++;
909 else {
910 if ((sym->type & N_TYPE) != N_UNDF) {
911 nextdefsym++;
912 } else {
913 nundefsym++;
916 /* If we handle debug info we'll want
917 to check for it here instead of just
918 adding the symbol to the string table. */
919 sym->strx = *strtabsize;
920 saa_wbytes (strs, sym->name, (int32_t)(strlen(sym->name) + 1));
921 *strtabsize += strlen(sym->name) + 1;
923 symp = &(sym->next);
926 /* Next, sort the symbols. Most of this code is a direct translation from
927 the Apple cctools symbol layout. We need to keep compatibility with that. */
928 /* Set the indexes for symbol groups into the symbol table */
929 ilocalsym = 0;
930 iextdefsym = nlocalsym;
931 iundefsym = nlocalsym + nextdefsym;
933 /* allocate arrays for sorting externals by name */
934 extdefsyms = nasm_malloc(nextdefsym * sizeof(struct symbol *));
935 undefsyms = nasm_malloc(nundefsym * sizeof(struct symbol *));
937 i = 0;
938 j = 0;
940 symp = &syms;
942 while ((sym = *symp)) {
944 if((sym->type & N_EXT) == 0) {
945 sym->strx = *strtabsize;
946 saa_wbytes (strs, sym->name, (int32_t)(strlen (sym->name) + 1));
947 *strtabsize += strlen(sym->name) + 1;
949 else {
950 if((sym->type & N_TYPE) != N_UNDF) {
951 extdefsyms[i++] = sym;
952 } else {
953 undefsyms[j++] = sym;
956 symp = &(sym->next);
959 qsort(extdefsyms, nextdefsym, sizeof(struct symbol *),
960 (int (*)(const void *, const void *))layout_compare);
961 qsort(undefsyms, nundefsym, sizeof(struct symbol *),
962 (int (*)(const void *, const void *))layout_compare);
964 for(i = 0; i < nextdefsym; i++) {
965 extdefsyms[i]->snum = *numsyms;
966 *numsyms += 1;
968 for(j = 0; j < nundefsym; j++) {
969 undefsyms[j]->snum = *numsyms;
970 *numsyms += 1;
974 /* Calculate some values we'll need for writing later. */
976 static void macho_calculate_sizes (void)
978 struct section *s;
980 /* count sections and calculate in-memory and in-file offsets */
981 for (s = sects; s != NULL; s = s->next) {
982 uint64_t pad = 0;
984 /* zerofill sections aren't actually written to the file */
985 if ((s->flags & SECTION_TYPE) != S_ZEROFILL)
986 seg_filesize64 += s->size;
988 /* recalculate segment address based on alignment and vm size */
989 s->addr = seg_vmsize64;
990 /* we need section alignment to calculate final section address */
991 if (s->align == -1)
992 s->align = DEFAULT_SECTION_ALIGNMENT;
993 if(s->align) {
994 uint64_t newaddr = align(s->addr, 1 << s->align);
995 pad = newaddr - s->addr;
996 s->addr = newaddr;
999 seg_vmsize64 += s->size + pad;
1000 ++seg_nsects64;
1003 /* calculate size of all headers, load commands and sections to
1004 ** get a pointer to the start of all the raw data */
1005 if (seg_nsects64 > 0) {
1006 ++head_ncmds64;
1007 head_sizeofcmds64 +=
1008 MACHO_SEGCMD64_SIZE + seg_nsects64 * MACHO_SECTCMD64_SIZE;
1011 if (nsyms > 0) {
1012 ++head_ncmds64;
1013 head_sizeofcmds64 += MACHO_SYMCMD_SIZE;
1017 /* Write out the header information for the file. */
1019 static void macho_write_header (void)
1021 fwriteint32_t(MH_MAGIC_64, machofp); /* magic */
1022 fwriteint32_t(CPU_TYPE_X86_64, machofp); /* CPU type */
1023 fwriteint32_t(CPU_SUBTYPE_I386_ALL, machofp); /* CPU subtype */
1024 fwriteint32_t(MH_OBJECT, machofp); /* Mach-O file type */
1025 fwriteint32_t(head_ncmds64, machofp); /* number of load commands */
1026 fwriteint32_t(head_sizeofcmds64, machofp); /* size of load commands */
1027 fwriteint32_t(0, machofp); /* no flags */
1028 fwriteint32_t(0, machofp); /* reserved for future use */
1031 /* Write out the segment load command at offset. */
1033 static uint32_t macho_write_segment (uint64_t offset)
1035 uint64_t rel_base = alignint64_t (offset + seg_filesize64);
1036 uint32_t s_reloff = 0;
1037 struct section *s;
1039 fwriteint32_t(LC_SEGMENT_64, machofp); /* cmd == LC_SEGMENT_64 */
1041 /* size of load command including section load commands */
1042 fwriteint32_t(MACHO_SEGCMD64_SIZE + seg_nsects64 *
1043 MACHO_SECTCMD64_SIZE, machofp);
1045 /* in an MH_OBJECT file all sections are in one unnamed (name
1046 ** all zeros) segment */
1047 fwritezero(16, machofp);
1048 fwriteint64_t(0, machofp); /* in-memory offset */
1049 fwriteint64_t(seg_vmsize64, machofp); /* in-memory size */
1050 fwriteint64_t(offset, machofp); /* in-file offset to data */
1051 fwriteint64_t(seg_filesize64, machofp); /* in-file size */
1052 fwriteint32_t(VM_PROT_DEFAULT, machofp); /* maximum vm protection */
1053 fwriteint32_t(VM_PROT_DEFAULT, machofp); /* initial vm protection */
1054 fwriteint32_t(seg_nsects64, machofp); /* number of sections */
1055 fwriteint32_t(0, machofp); /* no flags */
1057 /* emit section headers */
1058 for (s = sects; s != NULL; s = s->next) {
1059 fwrite(s->sectname, sizeof(s->sectname), 1, machofp);
1060 fwrite(s->segname, sizeof(s->segname), 1, machofp);
1061 fwriteint64_t(s->addr, machofp);
1062 fwriteint64_t(s->size, machofp);
1064 /* dummy data for zerofill sections or proper values */
1065 if ((s->flags & SECTION_TYPE) != S_ZEROFILL) {
1066 fwriteint32_t(offset, machofp);
1067 /* Write out section alignment, as a power of two.
1068 e.g. 32-bit word alignment would be 2 (2^2 = 4). */
1069 if (s->align == -1)
1070 s->align = DEFAULT_SECTION_ALIGNMENT;
1071 fwriteint32_t(s->align, machofp);
1072 /* To be compatible with cctools as we emit
1073 a zero reloff if we have no relocations. */
1074 fwriteint32_t(s->nreloc ? rel_base + s_reloff : 0, machofp);
1075 fwriteint32_t(s->nreloc, machofp);
1077 offset += s->size;
1078 s_reloff += s->nreloc * MACHO_RELINFO64_SIZE;
1079 } else {
1080 fwriteint32_t(0, machofp);
1081 fwriteint32_t(0, machofp);
1082 fwriteint32_t(0, machofp);
1083 fwriteint32_t(0, machofp);
1086 if (s->nreloc) {
1087 s->flags |= S_ATTR_LOC_RELOC;
1088 if (s->extreloc)
1089 s->flags |= S_ATTR_EXT_RELOC;
1092 fwriteint32_t(s->flags, machofp); /* flags */
1093 fwriteint32_t(0, machofp); /* reserved */
1094 fwriteint32_t(0, machofp); /* reserved */
1096 fwriteint32_t(0, machofp); /* align */
1099 rel_padcnt64 = rel_base - offset;
1100 offset = rel_base + s_reloff;
1102 return offset;
1105 /* For a given chain of relocs r, write out the entire relocation
1106 chain to the object file. */
1108 static void macho_write_relocs (struct reloc *r)
1110 while (r) {
1111 uint32_t word2;
1113 fwriteint32_t(r->addr, machofp); /* reloc offset */
1115 word2 = r->snum;
1116 word2 |= r->pcrel << 24;
1117 word2 |= r->length << 25;
1118 word2 |= r->ext << 27;
1119 word2 |= r->type << 28;
1120 fwriteint32_t(word2, machofp); /* reloc data */
1121 r = r->next;
1125 /* Write out the section data. */
1126 static void macho_write_section (void)
1128 struct section *s, *s2;
1129 struct reloc *r;
1130 uint8_t fi, *p, *q, blk[8];
1131 int32_t len;
1132 int64_t l;
1134 for (s = sects; s != NULL; s = s->next) {
1135 if ((s->flags & SECTION_TYPE) == S_ZEROFILL)
1136 continue;
1138 /* no padding needs to be done to the sections */
1140 /* Like a.out Mach-O references things in the data or bss
1141 * sections by addresses which are actually relative to the
1142 * start of the _text_ section, in the _file_. See outaout.c
1143 * for more information. */
1144 saa_rewind(s->data);
1145 for (r = s->relocs; r != NULL; r = r->next) {
1146 len = (int32_t)r->length << 1;
1147 if(len > 4) len = 8;
1148 saa_fread(s->data, r->addr, blk, len);
1149 p = q = blk;
1150 l = *p++;
1152 /* get offset based on relocation type */
1153 if (r->length > 0) {
1154 l += ((int64_t)*p++) << 8;
1156 if (r->length > 1) {
1157 l += ((int64_t)*p++) << 16;
1158 l += ((int64_t)*p++) << 24;
1161 if (r->length > 2) {
1162 l += ((int64_t)*p++) << 32;
1163 l += ((int64_t)*p++) << 40;
1164 l += ((int64_t)*p++) << 48;
1165 l += ((int64_t)*p++) << 56;
1171 /* If the relocation is internal add to the current section
1172 offset. Otherwise the only value we need is the symbol
1173 offset which we already have. The linker takes care
1174 of the rest of the address. */
1175 if (!r->ext) {
1176 /* generate final address by section address and offset */
1177 for (s2 = sects, fi = 1;
1178 s2 != NULL; s2 = s2->next, fi++) {
1179 if (fi == r->snum) {
1180 l += s2->addr;
1181 break;
1186 /* write new offset back */
1187 if (r->length == 3)
1188 WRITEDLONG(q, l);
1189 else if (r->length == 2)
1190 WRITELONG(q, l);
1191 else if (r->length == 1)
1192 WRITESHORT(q, l);
1193 else
1194 *q++ = l & 0xFF;
1196 saa_fwrite(s->data, r->addr, blk, len);
1199 /* dump the section data to file */
1200 saa_fpwrite(s->data, machofp);
1203 /* pad last section up to reloc entries on int64_t boundary */
1204 fwritezero(rel_padcnt64, machofp);
1206 /* emit relocation entries */
1207 for (s = sects; s != NULL; s = s->next)
1208 macho_write_relocs (s->relocs);
1211 /* Write out the symbol table. We should already have sorted this
1212 before now. */
1213 static void macho_write_symtab (void)
1215 struct symbol *sym;
1216 struct section *s;
1217 int64_t fi;
1218 uint64_t i;
1220 /* we don't need to pad here since MACHO_RELINFO_SIZE == 8 */
1222 for (sym = syms; sym != NULL; sym = sym->next) {
1223 if ((sym->type & N_EXT) == 0) {
1224 fwriteint32_t(sym->strx, machofp); /* string table entry number */
1225 fwrite(&sym->type, 1, 1, machofp); /* symbol type */
1226 fwrite(&sym->sect, 1, 1, machofp); /* section */
1227 fwriteint16_t(sym->desc, machofp); /* description */
1229 /* Fix up the symbol value now that we know the final section
1230 sizes. */
1231 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1232 for (s = sects, fi = 1;
1233 s != NULL && fi < sym->sect; s = s->next, ++fi)
1234 sym->value += s->size;
1237 fwriteint64_t(sym->value, machofp); /* value (i.e. offset) */
1241 for (i = 0; i < nextdefsym; i++) {
1242 sym = extdefsyms[i];
1243 fwriteint32_t(sym->strx, machofp);
1244 fwrite(&sym->type, 1, 1, machofp); /* symbol type */
1245 fwrite(&sym->sect, 1, 1, machofp); /* section */
1246 fwriteint16_t(sym->desc, machofp); /* description */
1248 /* Fix up the symbol value now that we know the final section
1249 sizes. */
1250 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1251 for (s = sects, fi = 1;
1252 s != NULL && fi < sym->sect; s = s->next, ++fi)
1253 sym->value += s->size;
1256 fwriteint64_t(sym->value, machofp); /* value (i.e. offset) */
1259 for (i = 0; i < nundefsym; i++) {
1260 sym = undefsyms[i];
1261 fwriteint32_t(sym->strx, machofp);
1262 fwrite(&sym->type, 1, 1, machofp); /* symbol type */
1263 fwrite(&sym->sect, 1, 1, machofp); /* section */
1264 fwriteint16_t(sym->desc, machofp); /* description */
1266 // Fix up the symbol value now that we know the final section sizes.
1267 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1268 for (s = sects, fi = 1;
1269 s != NULL && fi < sym->sect; s = s->next, ++fi)
1270 sym->value += s->size;
1273 fwriteint64_t(sym->value, machofp); // value (i.e. offset)
1278 /* Fixup the snum in the relocation entries, we should be
1279 doing this only for externally referenced symbols. */
1280 static void macho_fixup_relocs (struct reloc *r)
1282 struct symbol *sym;
1284 while (r != NULL) {
1285 if (r->ext) {
1286 for (sym = syms; sym != NULL; sym = sym->next) {
1287 if (sym->initial_snum == r->snum) {
1288 r->snum = sym->snum;
1289 break;
1293 r = r->next;
1297 /* Write out the object file. */
1299 static void macho_write (void)
1301 uint64_t offset = 0;
1303 /* mach-o object file structure:
1305 ** mach header
1306 ** uint32_t magic
1307 ** int cpu type
1308 ** int cpu subtype
1309 ** uint32_t mach file type
1310 ** uint32_t number of load commands
1311 ** uint32_t size of all load commands
1312 ** (includes section struct size of segment command)
1313 ** uint32_t flags
1315 ** segment command
1316 ** uint32_t command type == LC_SEGMENT_64
1317 ** uint32_t size of load command
1318 ** (including section load commands)
1319 ** char[16] segment name
1320 ** uint64_t in-memory offset
1321 ** uint64_t in-memory size
1322 ** uint64_t in-file offset to data area
1323 ** uint64_t in-file size
1324 ** (in-memory size excluding zerofill sections)
1325 ** int maximum vm protection
1326 ** int initial vm protection
1327 ** uint32_t number of sections
1328 ** uint32_t flags
1330 ** section commands
1331 ** char[16] section name
1332 ** char[16] segment name
1333 ** uint64_t in-memory offset
1334 ** uint64_t in-memory size
1335 ** uint32_t in-file offset
1336 ** uint32_t alignment
1337 ** (irrelevant in MH_OBJECT)
1338 ** uint32_t in-file offset of relocation entires
1339 ** uint32_t number of relocations
1340 ** uint32_t flags
1341 ** uint32_t reserved
1342 ** uint32_t reserved
1344 ** symbol table command
1345 ** uint32_t command type == LC_SYMTAB
1346 ** uint32_t size of load command
1347 ** uint32_t symbol table offset
1348 ** uint32_t number of symbol table entries
1349 ** uint32_t string table offset
1350 ** uint32_t string table size
1352 ** raw section data
1354 ** padding to int64_t boundary
1356 ** relocation data (struct reloc)
1357 ** int32_t offset
1358 ** uint data (symbolnum, pcrel, length, extern, type)
1360 ** symbol table data (struct nlist)
1361 ** int32_t string table entry number
1362 ** uint8_t type
1363 ** (extern, absolute, defined in section)
1364 ** uint8_t section
1365 ** (0 for global symbols, section number of definition (>= 1, <=
1366 ** 254) for local symbols, size of variable for common symbols
1367 ** [type == extern])
1368 ** int16_t description
1369 ** (for stab debugging format)
1370 ** uint64_t value (i.e. file offset) of symbol or stab offset
1372 ** string table data
1373 ** list of null-terminated strings
1376 /* Emit the Mach-O header. */
1377 macho_write_header();
1379 offset = MACHO_HEADER64_SIZE + head_sizeofcmds64;
1381 /* emit the segment load command */
1382 if (seg_nsects64 > 0)
1383 offset = macho_write_segment (offset);
1384 else
1385 error(ERR_WARNING, "no sections?");
1387 if (nsyms > 0) {
1388 /* write out symbol command */
1389 fwriteint32_t(LC_SYMTAB, machofp); /* cmd == LC_SYMTAB */
1390 fwriteint32_t(MACHO_SYMCMD_SIZE, machofp); /* size of load command */
1391 fwriteint32_t(offset, machofp); /* symbol table offset */
1392 fwriteint32_t(nsyms, machofp); /* number of symbol
1393 ** table entries */
1395 offset += nsyms * MACHO_NLIST64_SIZE;
1396 fwriteint32_t(offset, machofp); /* string table offset */
1397 fwriteint32_t(strslen, machofp); /* string table size */
1400 /* emit section data */
1401 if (seg_nsects64 > 0)
1402 macho_write_section ();
1404 /* emit symbol table if we have symbols */
1405 if (nsyms > 0)
1406 macho_write_symtab ();
1408 /* we don't need to pad here since MACHO_NLIST64_SIZE == 16 */
1410 /* emit string table */
1411 saa_fpwrite(strs, machofp);
1413 /* We do quite a bit here, starting with finalizing all of the data
1414 for the object file, writing, and then freeing all of the data from
1415 the file. */
1417 static void macho_cleanup(int debuginfo)
1419 struct section *s;
1420 struct reloc *r;
1421 struct symbol *sym;
1423 (void)debuginfo;
1425 /* Sort all symbols. */
1426 macho_layout_symbols (&nsyms, &strslen);
1428 /* Fixup relocation entries */
1429 for (s = sects; s != NULL; s = s->next) {
1430 macho_fixup_relocs (s->relocs);
1433 /* First calculate and finalize needed values. */
1434 macho_calculate_sizes();
1435 macho_write();
1437 /* free up everything */
1438 while (sects->next) {
1439 s = sects;
1440 sects = sects->next;
1442 saa_free(s->data);
1443 while (s->relocs != NULL) {
1444 r = s->relocs;
1445 s->relocs = s->relocs->next;
1446 nasm_free(r);
1449 nasm_free(s);
1452 saa_free(strs);
1453 raa_free(extsyms);
1455 if (syms) {
1456 while (syms->next) {
1457 sym = syms;
1458 syms = syms->next;
1460 nasm_free (sym);
1465 /* Debugging routines. */
1466 static void debug_reloc (struct reloc *r)
1468 fprintf (stdout, "reloc:\n");
1469 fprintf (stdout, "\taddr: %"PRId32"\n", r->addr);
1470 fprintf (stdout, "\tsnum: %d\n", r->snum);
1471 fprintf (stdout, "\tpcrel: %d\n", r->pcrel);
1472 fprintf (stdout, "\tlength: %d\n", r->length);
1473 fprintf (stdout, "\text: %d\n", r->ext);
1474 fprintf (stdout, "\ttype: %d\n", r->type);
1477 static void debug_section_relocs (struct section *s)
1479 struct reloc *r = s->relocs;
1481 fprintf (stdout, "relocs for section %s:\n\n", s->sectname);
1483 while (r != NULL) {
1484 debug_reloc (r);
1485 r = r->next;
1489 struct ofmt of_macho64 = {
1490 "NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (x86_64) object files",
1491 "macho64",
1493 null_debug_arr,
1494 &null_debug_form,
1495 macho_stdmac,
1496 macho_init,
1497 null_setinfo,
1498 macho_output,
1499 macho_symdef,
1500 macho_section,
1501 macho_segbase,
1502 null_directive,
1503 macho_filename,
1504 macho_cleanup
1507 #endif
1510 * Local Variables:
1511 * mode:c
1512 * c-basic-offset:4
1513 * End:
1515 * end of file */