BR3079777: Explain %00 in documentation
[nasm.git] / output / outmac64.c
blobf633db038f3607b3e39a213363337271fdd9b898
1 /* ----------------------------------------------------------------------- *
2 *
3 * Copyright 1996-2009 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
35 * outmac64.c output routines for the Netwide Assembler to produce
36 * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (x86_64) object files
39 /* Most of this file is, like Mach-O itself, based on a.out. For more
40 * guidelines see outaout.c. */
42 #include "compiler.h"
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <ctype.h>
48 #include <inttypes.h>
50 #include "nasm.h"
51 #include "nasmlib.h"
52 #include "saa.h"
53 #include "raa.h"
54 #include "output/outform.h"
55 #include "output/outlib.h"
57 #if defined(OF_MACHO64)
59 /* Mach-O in-file header structure sizes */
60 #define MACHO_HEADER64_SIZE (32)
61 #define MACHO_SEGCMD64_SIZE (72)
62 #define MACHO_SECTCMD64_SIZE (80)
63 #define MACHO_SYMCMD_SIZE (24)
64 #define MACHO_NLIST64_SIZE (16)
65 #define MACHO_RELINFO64_SIZE (8)
67 /* Mach-O file header values */
68 #define MH_MAGIC_64 (0xfeedfacf)
69 #define CPU_TYPE_X86_64 (0x01000007) /* x86-64 platform */
70 #define CPU_SUBTYPE_I386_ALL (3) /* all-x86 compatible */
71 #define MH_OBJECT (0x1) /* object file */
73 #define LC_SEGMENT_64 (0x19) /* segment load command */
74 #define LC_SYMTAB (0x2) /* symbol table load command */
76 #define VM_PROT_NONE (0x00)
77 #define VM_PROT_READ (0x01)
78 #define VM_PROT_WRITE (0x02)
79 #define VM_PROT_EXECUTE (0x04)
81 #define VM_PROT_DEFAULT (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
82 #define VM_PROT_ALL (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
84 struct section {
85 /* nasm internal data */
86 struct section *next;
87 struct SAA *data;
88 int32_t index;
89 struct reloc *relocs;
90 int align;
92 /* data that goes into the file */
93 char sectname[16]; /* what this section is called */
94 char segname[16]; /* segment this section will be in */
95 uint64_t addr; /* in-memory address (subject to alignment) */
96 uint64_t size; /* in-memory and -file size */
97 uint32_t nreloc; /* relocation entry count */
98 uint32_t flags; /* type and attributes (masked) */
99 uint32_t extreloc; /* external relocations */
102 #define SECTION_TYPE 0x000000ff /* section type mask */
104 #define S_REGULAR (0x0) /* standard section */
105 #define S_ZEROFILL (0x1) /* zerofill, in-memory only */
107 #define SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */
108 #define S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some
109 machine instructions */
110 #define S_ATTR_EXT_RELOC 0x00000200 /* section has external
111 relocation entries */
112 #define S_ATTR_LOC_RELOC 0x00000100 /* section has local
113 relocation entries */
114 #define S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section uses pure
115 machine instructions */
117 static struct sectmap {
118 const char *nasmsect;
119 const char *segname;
120 const char *sectname;
121 const int32_t flags;
122 } sectmap[] = {
123 {".text", "__TEXT", "__text", S_REGULAR|S_ATTR_SOME_INSTRUCTIONS|S_ATTR_PURE_INSTRUCTIONS},
124 {".data", "__DATA", "__data", S_REGULAR},
125 {".rodata", "__DATA", "__const", S_REGULAR},
126 {".bss", "__DATA", "__bss", S_ZEROFILL},
127 {NULL, NULL, NULL, 0}
130 struct reloc {
131 /* nasm internal data */
132 struct reloc *next;
134 /* data that goes into the file */
135 int32_t addr; /* op's offset in section */
136 uint32_t snum:24, /* contains symbol index if
137 ** ext otherwise in-file
138 ** section number */
139 pcrel:1, /* relative relocation */
140 length:2, /* 0=byte, 1=word, 2=int32_t, 3=int64_t */
141 ext:1, /* external symbol referenced */
142 type:4; /* reloc type */
145 #define R_ABS 0 /* absolute relocation */
146 #define R_SCATTERED 0x80000000 /* reloc entry is scattered if
147 ** highest bit == 1 */
149 struct symbol {
150 /* nasm internal data */
151 struct symbol *next; /* next symbol in the list */
152 char *name; /* name of this symbol */
153 int32_t initial_snum; /* symbol number used above in
154 reloc */
155 int32_t snum; /* true snum for reloc */
157 /* data that goes into the file */
158 uint32_t strx; /* string table index */
159 uint8_t type; /* symbol type */
160 uint8_t sect; /* NO_SECT or section number */
161 uint16_t desc; /* for stab debugging, 0 for us */
162 uint64_t value; /* offset of symbol in section */
165 /* symbol type bits */
166 #define N_EXT 0x01 /* global or external symbol */
168 #define N_UNDF 0x0 /* undefined symbol | n_sect == */
169 #define N_ABS 0x2 /* absolute symbol | NO_SECT */
170 #define N_SECT 0xe /* defined symbol, n_sect holds
171 ** section number */
173 #define N_TYPE 0x0e /* type bit mask */
175 #define DEFAULT_SECTION_ALIGNMENT 0 /* byte (i.e. no) alignment */
177 /* special section number values */
178 #define NO_SECT 0 /* no section, invalid */
179 #define MAX_SECT 255 /* maximum number of sections */
181 static struct section *sects, **sectstail;
182 static struct symbol *syms, **symstail;
183 static uint32_t nsyms;
185 /* These variables are set by macho_layout_symbols() to organize
186 the symbol table and string table in order the dynamic linker
187 expects. They are then used in macho_write() to put out the
188 symbols and strings in that order.
190 The order of the symbol table is:
191 local symbols
192 defined external symbols (sorted by name)
193 undefined external symbols (sorted by name)
195 The order of the string table is:
196 strings for external symbols
197 strings for local symbols
199 static uint32_t ilocalsym = 0;
200 static uint32_t iextdefsym = 0;
201 static uint32_t iundefsym = 0;
202 static uint32_t nlocalsym;
203 static uint32_t nextdefsym;
204 static uint32_t nundefsym;
205 static struct symbol **extdefsyms = NULL;
206 static struct symbol **undefsyms = NULL;
208 static struct RAA *extsyms;
209 static struct SAA *strs;
210 static uint32_t strslen;
212 extern struct ofmt of_macho64;
214 /* Global file information. This should be cleaned up into either
215 a structure or as function arguments. */
216 uint32_t head_ncmds64 = 0;
217 uint32_t head_sizeofcmds64 = 0;
218 uint64_t seg_filesize64 = 0;
219 uint64_t seg_vmsize64 = 0;
220 uint32_t seg_nsects64 = 0;
221 uint64_t rel_padcnt64 = 0;
224 #define xstrncpy(xdst, xsrc) \
225 memset(xdst, '\0', sizeof(xdst)); /* zero out whole buffer */ \
226 strncpy(xdst, xsrc, sizeof(xdst)); /* copy over string */ \
227 xdst[sizeof(xdst) - 1] = '\0'; /* proper null-termination */
229 #define alignint32_t(x) \
230 ALIGN(x, sizeof(int32_t)) /* align x to int32_t boundary */
232 #define alignint64_t(x) \
233 ALIGN(x, sizeof(int64_t)) /* align x to int64_t boundary */
235 static void debug_reloc (struct reloc *);
236 static void debug_section_relocs (struct section *) _unused;
238 static struct section *get_section_by_name(const char *segname,
239 const char *sectname)
241 struct section *s;
243 for (s = sects; s != NULL; s = s->next)
244 if (!strcmp(s->segname, segname) && !strcmp(s->sectname, sectname))
245 break;
247 return s;
250 static struct section *get_section_by_index(const int32_t index)
252 struct section *s;
254 for (s = sects; s != NULL; s = s->next)
255 if (index == s->index)
256 break;
258 return s;
261 static int32_t get_section_index_by_name(const char *segname,
262 const char *sectname)
264 struct section *s;
266 for (s = sects; s != NULL; s = s->next)
267 if (!strcmp(s->segname, segname) && !strcmp(s->sectname, sectname))
268 return s->index;
270 return -1;
273 static char *get_section_name_by_index(const int32_t index)
275 struct section *s;
277 for (s = sects; s != NULL; s = s->next)
278 if (index == s->index)
279 return s->sectname;
281 return NULL;
284 static uint8_t get_section_fileindex_by_index(const int32_t index)
286 struct section *s;
287 uint8_t i = 1;
289 for (s = sects; s != NULL && i < MAX_SECT; s = s->next, ++i)
290 if (index == s->index)
291 return i;
293 if (i == MAX_SECT)
294 nasm_error(ERR_WARNING,
295 "too many sections (>255) - clipped by fileindex");
297 return NO_SECT;
300 static struct symbol *get_closest_section_symbol_by_offset(uint8_t fileindex, int64_t offset)
302 struct symbol *sym;
304 for (sym = syms; sym != NULL; sym = sym->next) {
305 if ((sym->sect != NO_SECT) &&
306 (sym->sect == fileindex) &&
307 ((int64_t)sym->value >= offset))
308 return sym;
311 return NULL;
316 * Special section numbers which are used to define Mach-O special
317 * symbols, which can be used with WRT to provide PIC relocation
318 * types.
320 static int32_t macho_gotpcrel_sect;
322 static void macho_init(void)
324 char zero = 0;
326 maxbits = 64;
328 sects = NULL;
329 sectstail = &sects;
331 syms = NULL;
332 symstail = &syms;
333 nsyms = 0;
334 nlocalsym = 0;
335 nextdefsym = 0;
336 nundefsym = 0;
338 extsyms = raa_init();
339 strs = saa_init(1L);
341 /* string table starts with a zero byte - don't ask why */
342 saa_wbytes(strs, &zero, sizeof(char));
343 strslen = 1;
345 /* add special symbol for ..gotpcrel */
346 macho_gotpcrel_sect = seg_alloc();
347 macho_gotpcrel_sect++;
348 define_label("..gotpcrel", macho_gotpcrel_sect, 0L, NULL, false, false);
351 static void sect_write(struct section *sect,
352 const uint8_t *data, uint32_t len)
354 saa_wbytes(sect->data, data, len);
355 sect->size += len;
358 static int32_t add_reloc(struct section *sect, int32_t section,
359 int pcrel, int bytes, int64_t reloff)
361 struct reloc *r;
362 struct symbol *sym;
363 int32_t fi;
364 int32_t adjustment = 0;
366 /* NeXT as puts relocs in reversed order (address-wise) into the
367 ** files, so we do the same, doesn't seem to make much of a
368 ** difference either way */
369 r = nasm_malloc(sizeof(struct reloc));
370 r->next = sect->relocs;
371 sect->relocs = r;
373 /* the current end of the section will be the symbol's address for
374 ** now, might have to be fixed by macho_fixup_relocs() later on. make
375 ** sure we don't make the symbol scattered by setting the highest
376 ** bit by accident */
377 r->addr = sect->size & ~R_SCATTERED;
378 r->ext = 1;
379 r->pcrel = (pcrel ? 1 : 0);
381 /* match byte count 1, 2, 4, 8 to length codes 0, 1, 2, 3 respectively */
382 switch(bytes){
383 case 1:
384 r->length = 0;
385 break;
386 case 2:
387 r->length = 1;
388 break;
389 case 4:
390 r->length = 2;
391 break;
392 case 8:
393 r->length = 3;
394 break;
395 default:
396 break;
399 /* set default relocation values */
400 r->type = 0; // X86_64_RELOC_UNSIGNED
401 r->snum = R_ABS; // Absolute Symbol (indicates no relocation)
403 /* absolute relocation */
404 if (pcrel == 0) {
406 /* intra-section */
407 if (section == NO_SEG) {
408 // r->snum = R_ABS; // Set above
410 /* inter-section */
411 } else {
412 fi = get_section_fileindex_by_index(section);
414 /* external */
415 if (fi == NO_SECT) {
416 r->snum = raa_read(extsyms, section);
418 /* local */
419 } else {
420 sym = get_closest_section_symbol_by_offset(fi, reloff);
421 r->snum = sym->initial_snum;
422 adjustment = sym->value;
426 /* relative relocation */
427 } else if (pcrel == 1) {
429 /* intra-section */
430 if (section == NO_SEG) {
431 r->type = 1; // X86_64_RELOC_SIGNED
433 /* inter-section */
434 } else {
435 r->type = 2; // X86_64_RELOC_BRANCH
436 fi = get_section_fileindex_by_index(section);
438 /* external */
439 if (fi == NO_SECT) {
440 sect->extreloc = 1;
441 r->snum = raa_read(extsyms, section);
443 /* local */
444 } else {
445 sym = get_closest_section_symbol_by_offset(fi, reloff);
446 r->snum = sym->initial_snum;
447 adjustment = sym->value;
451 /* subtractor */
452 } else if (pcrel == 2) {
453 r->pcrel = 0;
454 r->type = 5; // X86_64_RELOC_SUBTRACTOR
456 /* gotpcrel */
457 } else if (pcrel == 3) {
458 r->type = 4; // X86_64_RELOC_GOT
459 r->snum = macho_gotpcrel_sect;
461 /* gotpcrel MOVQ load */
462 } else if (pcrel == 4) {
463 r->type = 3; // X86_64_RELOC_GOT_LOAD
464 r->snum = macho_gotpcrel_sect;
467 ++sect->nreloc;
469 return adjustment;
472 static void macho_output(int32_t secto, const void *data,
473 enum out_type type, uint64_t size,
474 int32_t section, int32_t wrt)
476 struct section *s, *sbss;
477 int64_t addr;
478 uint8_t mydata[16], *p, gotload;
480 if (secto == NO_SEG) {
481 if (type != OUT_RESERVE)
482 nasm_error(ERR_NONFATAL, "attempt to assemble code in "
483 "[ABSOLUTE] space");
485 return;
488 s = get_section_by_index(secto);
490 if (s == NULL) {
491 nasm_error(ERR_WARNING, "attempt to assemble code in"
492 " section %d: defaulting to `.text'", secto);
493 s = get_section_by_name("__TEXT", "__text");
495 /* should never happen */
496 if (s == NULL)
497 nasm_error(ERR_PANIC, "text section not found");
500 sbss = get_section_by_name("__DATA", "__bss");
502 if (s == sbss && type != OUT_RESERVE) {
503 nasm_error(ERR_WARNING, "attempt to initialize memory in the"
504 " BSS section: ignored");
505 s->size += realsize(type, size);
506 return;
509 switch (type) {
510 case OUT_RESERVE:
511 if (s != sbss) {
512 nasm_error(ERR_WARNING, "uninitialized space declared in"
513 " %s section: zeroing",
514 get_section_name_by_index(secto));
516 sect_write(s, NULL, size);
517 } else
518 s->size += size;
520 break;
522 case OUT_RAWDATA:
523 if (section != NO_SEG)
524 nasm_error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG");
526 sect_write(s, data, size);
527 break;
529 case OUT_ADDRESS:
530 addr = *(int64_t *)data;
531 if (section != NO_SEG) {
532 if (section % 2) {
533 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
534 " section base references");
535 } else {
536 if (wrt == NO_SEG) {
537 if (size < 8) {
538 nasm_error(ERR_NONFATAL, "Mach-O 64-bit format does not support"
539 " 32-bit absolute addresses");
541 Seemingly, Mach-O's X86_64_RELOC_SUBTRACTOR would require
542 pre-determined knowledge of where the image base would be,
543 making it impractical for use in intermediate object files
545 } else {
546 addr -= add_reloc(s, section, 0, size, addr); // X86_64_RELOC_UNSIGNED
548 } else {
549 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
550 " this use of WRT");
555 p = mydata;
556 WRITEADDR(p, addr, size);
557 sect_write(s, mydata, size);
558 break;
560 case OUT_REL2ADR:
561 p = mydata;
562 WRITESHORT(p, *(int64_t *)data);
564 if (section == secto)
565 nasm_error(ERR_PANIC, "intra-section OUT_REL2ADR");
567 if (section == NO_SEG) {
568 /* Do nothing */
569 } else if (section % 2) {
570 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
571 " section base references");
572 } else {
573 nasm_error(ERR_NONFATAL, "Unsupported non-32-bit"
574 " Macho-O relocation [2]");
577 sect_write(s, mydata, 2L);
578 break;
580 case OUT_REL4ADR:
581 p = mydata;
582 WRITELONG(p, *(int64_t *)data);
584 if (section == secto)
585 nasm_error(ERR_PANIC, "intra-section OUT_REL4ADR");
587 if (section != NO_SEG && section % 2) {
588 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
589 " section base references");
590 } else {
591 if (wrt == NO_SEG) {
592 *mydata -= add_reloc(s, section, 1, 4, (int64_t)*mydata); // X86_64_RELOC_SIGNED/BRANCH
593 } else if (wrt == macho_gotpcrel_sect) {
594 if (s->data->datalen > 1) {
595 saa_fread(s->data, s->data->datalen-2, &gotload, 1); // Retrieve Instruction Opcode
596 } else {
597 gotload = 0;
599 if (gotload == 0x8B) { // Check for MOVQ Opcode
600 *mydata -= add_reloc(s, section, 4, 4, (int64_t)*mydata); // X86_64_GOT_LOAD (MOVQ load)
601 } else {
602 *mydata -= add_reloc(s, section, 3, 4, (int64_t)*mydata); // X86_64_GOT
604 } else {
605 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
606 " this use of WRT");
607 wrt = NO_SEG; /* we can at least _try_ to continue */
611 sect_write(s, mydata, 4L);
612 break;
614 default:
615 nasm_error(ERR_PANIC, "unknown output type?");
616 break;
620 static int32_t macho_section(char *name, int pass, int *bits)
622 int32_t index, originalIndex;
623 char *sectionAttributes;
624 struct sectmap *sm;
625 struct section *s;
627 (void)pass;
629 /* Default to 64 bits. */
630 if (!name) {
631 *bits = 64;
632 name = ".text";
633 sectionAttributes = NULL;
634 } else {
635 sectionAttributes = name;
636 name = nasm_strsep(&sectionAttributes, " \t");
639 for (sm = sectmap; sm->nasmsect != NULL; ++sm) {
640 /* make lookup into section name translation table */
641 if (!strcmp(name, sm->nasmsect)) {
642 char *currentAttribute;
644 /* try to find section with that name */
645 originalIndex = index = get_section_index_by_name(sm->segname,
646 sm->sectname);
648 /* create it if it doesn't exist yet */
649 if (index == -1) {
650 s = *sectstail = nasm_malloc(sizeof(struct section));
651 s->next = NULL;
652 sectstail = &s->next;
654 s->data = saa_init(1L);
655 s->index = seg_alloc();
656 s->relocs = NULL;
657 s->align = -1;
659 xstrncpy(s->segname, sm->segname);
660 xstrncpy(s->sectname, sm->sectname);
661 s->size = 0;
662 s->nreloc = 0;
663 s->flags = sm->flags;
665 index = s->index;
666 } else {
667 s = get_section_by_index(index);
670 while ((NULL != sectionAttributes)
671 && (currentAttribute = nasm_strsep(&sectionAttributes, " \t"))) {
672 if (0 != *currentAttribute) {
673 if (!nasm_strnicmp("align=", currentAttribute, 6)) {
674 char *end;
675 int newAlignment, value;
677 value = strtoul(currentAttribute + 6, (char**)&end, 0);
678 newAlignment = alignlog2_32(value);
680 if (0 != *end) {
681 nasm_error(ERR_PANIC,
682 "unknown or missing alignment value \"%s\" "
683 "specified for section \"%s\"",
684 currentAttribute + 6,
685 name);
686 return NO_SEG;
687 } else if (0 > newAlignment) {
688 nasm_error(ERR_PANIC,
689 "alignment of %d (for section \"%s\") is not "
690 "a power of two",
691 value,
692 name);
693 return NO_SEG;
696 if ((-1 != originalIndex)
697 && (s->align != newAlignment)
698 && (s->align != -1)) {
699 nasm_error(ERR_PANIC,
700 "section \"%s\" has already been specified "
701 "with alignment %d, conflicts with new "
702 "alignment of %d",
703 name,
704 (1 << s->align),
705 value);
706 return NO_SEG;
709 s->align = newAlignment;
710 } else if (!nasm_stricmp("data", currentAttribute)) {
711 /* Do nothing; 'data' is implicit */
712 } else {
713 nasm_error(ERR_PANIC,
714 "unknown section attribute %s for section %s",
715 currentAttribute,
716 name);
717 return NO_SEG;
722 return index;
726 nasm_error(ERR_PANIC, "invalid section name %s", name);
727 return NO_SEG;
730 static void macho_symdef(char *name, int32_t section, int64_t offset,
731 int is_global, char *special)
733 struct symbol *sym;
735 if (special) {
736 nasm_error(ERR_NONFATAL, "The Mach-O output format does "
737 "not support any special symbol types");
738 return;
741 if (is_global == 3) {
742 nasm_error(ERR_NONFATAL, "The Mach-O format does not "
743 "(yet) support forward reference fixups.");
744 return;
747 if (name[0] == '.' && name[1] == '.' && name[2] != '@') {
749 * This is a NASM special symbol. We never allow it into
750 * the Macho-O symbol table, even if it's a valid one. If it
751 * _isn't_ a valid one, we should barf immediately.
753 if (strcmp(name, "..gotpcrel"))
754 nasm_error(ERR_NONFATAL, "unrecognized special symbol `%s'", name);
755 return;
758 sym = *symstail = nasm_malloc(sizeof(struct symbol));
759 sym->next = NULL;
760 symstail = &sym->next;
762 sym->name = name;
763 sym->strx = strslen;
764 sym->type = 0;
765 sym->desc = 0;
766 sym->value = offset;
767 sym->initial_snum = -1;
769 /* external and common symbols get N_EXT */
770 if (is_global != 0) {
771 sym->type |= N_EXT;
774 if (section == NO_SEG) {
775 /* symbols in no section get absolute */
776 sym->type |= N_ABS;
777 sym->sect = NO_SECT;
778 } else {
779 sym->type |= N_SECT;
781 /* get the in-file index of the section the symbol was defined in */
782 sym->sect = get_section_fileindex_by_index(section);
784 /* track the initially allocated symbol number for use in future fix-ups */
785 sym->initial_snum = nsyms;
787 if (sym->sect == NO_SECT) {
789 /* remember symbol number of references to external
790 ** symbols, this works because every external symbol gets
791 ** its own section number allocated internally by nasm and
792 ** can so be used as a key */
793 extsyms = raa_write(extsyms, section, nsyms);
795 switch (is_global) {
796 case 1:
797 case 2:
798 /* there isn't actually a difference between global
799 ** and common symbols, both even have their size in
800 ** sym->value */
801 sym->type = N_EXT;
802 break;
804 default:
805 /* give an error on unfound section if it's not an
806 ** external or common symbol (assemble_file() does a
807 ** seg_alloc() on every call for them) */
808 nasm_error(ERR_PANIC, "in-file index for section %d not found",
809 section);
813 ++nsyms;
816 static void macho_sectalign(int32_t seg, unsigned int value)
818 struct section *s;
820 list_for_each(s, sects) {
821 if (s->index == seg)
822 break;
825 if (!s || !is_power2(value))
826 return;
828 value = alignlog2_32(value);
829 if (s->align < (int)value)
830 s->align = value;
833 static int32_t macho_segbase(int32_t section)
835 return section;
838 static void macho_filename(char *inname, char *outname)
840 standard_extension(inname, outname, ".o");
843 extern macros_t macho_stdmac[];
845 /* Comparison function for qsort symbol layout. */
846 static int layout_compare (const struct symbol **s1,
847 const struct symbol **s2)
849 return (strcmp ((*s1)->name, (*s2)->name));
852 /* The native assembler does a few things in a similar function
854 * Remove temporary labels
855 * Sort symbols according to local, external, undefined (by name)
856 * Order the string table
858 We do not remove temporary labels right now.
860 numsyms is the total number of symbols we have. strtabsize is the
861 number entries in the string table. */
863 static void macho_layout_symbols (uint32_t *numsyms,
864 uint32_t *strtabsize)
866 struct symbol *sym, **symp;
867 uint32_t i,j;
869 *numsyms = 0;
870 *strtabsize = sizeof (char);
872 symp = &syms;
874 while ((sym = *symp)) {
875 /* Undefined symbols are now external. */
876 if (sym->type == N_UNDF)
877 sym->type |= N_EXT;
879 if ((sym->type & N_EXT) == 0) {
880 sym->snum = *numsyms;
881 *numsyms = *numsyms + 1;
882 nlocalsym++;
884 else {
885 if ((sym->type & N_TYPE) != N_UNDF) {
886 nextdefsym++;
887 } else {
888 nundefsym++;
891 /* If we handle debug info we'll want
892 to check for it here instead of just
893 adding the symbol to the string table. */
894 sym->strx = *strtabsize;
895 saa_wbytes (strs, sym->name, (int32_t)(strlen(sym->name) + 1));
896 *strtabsize += strlen(sym->name) + 1;
898 symp = &(sym->next);
901 /* Next, sort the symbols. Most of this code is a direct translation from
902 the Apple cctools symbol layout. We need to keep compatibility with that. */
903 /* Set the indexes for symbol groups into the symbol table */
904 ilocalsym = 0;
905 iextdefsym = nlocalsym;
906 iundefsym = nlocalsym + nextdefsym;
908 /* allocate arrays for sorting externals by name */
909 extdefsyms = nasm_malloc(nextdefsym * sizeof(struct symbol *));
910 undefsyms = nasm_malloc(nundefsym * sizeof(struct symbol *));
912 i = 0;
913 j = 0;
915 symp = &syms;
917 while ((sym = *symp)) {
919 if((sym->type & N_EXT) == 0) {
920 sym->strx = *strtabsize;
921 saa_wbytes (strs, sym->name, (int32_t)(strlen (sym->name) + 1));
922 *strtabsize += strlen(sym->name) + 1;
924 else {
925 if((sym->type & N_TYPE) != N_UNDF) {
926 extdefsyms[i++] = sym;
927 } else {
928 undefsyms[j++] = sym;
931 symp = &(sym->next);
934 qsort(extdefsyms, nextdefsym, sizeof(struct symbol *),
935 (int (*)(const void *, const void *))layout_compare);
936 qsort(undefsyms, nundefsym, sizeof(struct symbol *),
937 (int (*)(const void *, const void *))layout_compare);
939 for(i = 0; i < nextdefsym; i++) {
940 extdefsyms[i]->snum = *numsyms;
941 *numsyms += 1;
943 for(j = 0; j < nundefsym; j++) {
944 undefsyms[j]->snum = *numsyms;
945 *numsyms += 1;
949 /* Calculate some values we'll need for writing later. */
951 static void macho_calculate_sizes (void)
953 struct section *s;
955 /* count sections and calculate in-memory and in-file offsets */
956 for (s = sects; s != NULL; s = s->next) {
957 uint64_t pad = 0;
959 /* zerofill sections aren't actually written to the file */
960 if ((s->flags & SECTION_TYPE) != S_ZEROFILL)
961 seg_filesize64 += s->size;
963 /* recalculate segment address based on alignment and vm size */
964 s->addr = seg_vmsize64;
965 /* we need section alignment to calculate final section address */
966 if (s->align == -1)
967 s->align = DEFAULT_SECTION_ALIGNMENT;
968 if(s->align) {
969 uint64_t newaddr = ALIGN(s->addr, 1 << s->align);
970 pad = newaddr - s->addr;
971 s->addr = newaddr;
974 seg_vmsize64 += s->size + pad;
975 ++seg_nsects64;
978 /* calculate size of all headers, load commands and sections to
979 ** get a pointer to the start of all the raw data */
980 if (seg_nsects64 > 0) {
981 ++head_ncmds64;
982 head_sizeofcmds64 +=
983 MACHO_SEGCMD64_SIZE + seg_nsects64 * MACHO_SECTCMD64_SIZE;
986 if (nsyms > 0) {
987 ++head_ncmds64;
988 head_sizeofcmds64 += MACHO_SYMCMD_SIZE;
992 /* Write out the header information for the file. */
994 static void macho_write_header (void)
996 fwriteint32_t(MH_MAGIC_64, ofile); /* magic */
997 fwriteint32_t(CPU_TYPE_X86_64, ofile); /* CPU type */
998 fwriteint32_t(CPU_SUBTYPE_I386_ALL, ofile); /* CPU subtype */
999 fwriteint32_t(MH_OBJECT, ofile); /* Mach-O file type */
1000 fwriteint32_t(head_ncmds64, ofile); /* number of load commands */
1001 fwriteint32_t(head_sizeofcmds64, ofile); /* size of load commands */
1002 fwriteint32_t(0, ofile); /* no flags */
1003 fwriteint32_t(0, ofile); /* reserved for future use */
1006 /* Write out the segment load command at offset. */
1008 static uint32_t macho_write_segment (uint64_t offset)
1010 uint64_t rel_base = alignint64_t (offset + seg_filesize64);
1011 uint32_t s_reloff = 0;
1012 struct section *s;
1014 fwriteint32_t(LC_SEGMENT_64, ofile); /* cmd == LC_SEGMENT_64 */
1016 /* size of load command including section load commands */
1017 fwriteint32_t(MACHO_SEGCMD64_SIZE + seg_nsects64 *
1018 MACHO_SECTCMD64_SIZE, ofile);
1020 /* in an MH_OBJECT file all sections are in one unnamed (name
1021 ** all zeros) segment */
1022 fwritezero(16, ofile);
1023 fwriteint64_t(0, ofile); /* in-memory offset */
1024 fwriteint64_t(seg_vmsize64, ofile); /* in-memory size */
1025 fwriteint64_t(offset, ofile); /* in-file offset to data */
1026 fwriteint64_t(seg_filesize64, ofile); /* in-file size */
1027 fwriteint32_t(VM_PROT_DEFAULT, ofile); /* maximum vm protection */
1028 fwriteint32_t(VM_PROT_DEFAULT, ofile); /* initial vm protection */
1029 fwriteint32_t(seg_nsects64, ofile); /* number of sections */
1030 fwriteint32_t(0, ofile); /* no flags */
1032 /* emit section headers */
1033 for (s = sects; s != NULL; s = s->next) {
1034 fwrite(s->sectname, sizeof(s->sectname), 1, ofile);
1035 fwrite(s->segname, sizeof(s->segname), 1, ofile);
1036 fwriteint64_t(s->addr, ofile);
1037 fwriteint64_t(s->size, ofile);
1039 /* dummy data for zerofill sections or proper values */
1040 if ((s->flags & SECTION_TYPE) != S_ZEROFILL) {
1041 fwriteint32_t(offset, ofile);
1042 /* Write out section alignment, as a power of two.
1043 e.g. 32-bit word alignment would be 2 (2^2 = 4). */
1044 if (s->align == -1)
1045 s->align = DEFAULT_SECTION_ALIGNMENT;
1046 fwriteint32_t(s->align, ofile);
1047 /* To be compatible with cctools as we emit
1048 a zero reloff if we have no relocations. */
1049 fwriteint32_t(s->nreloc ? rel_base + s_reloff : 0, ofile);
1050 fwriteint32_t(s->nreloc, ofile);
1052 offset += s->size;
1053 s_reloff += s->nreloc * MACHO_RELINFO64_SIZE;
1054 } else {
1055 fwriteint32_t(0, ofile);
1056 fwriteint32_t(0, ofile);
1057 fwriteint32_t(0, ofile);
1058 fwriteint32_t(0, ofile);
1061 if (s->nreloc) {
1062 s->flags |= S_ATTR_LOC_RELOC;
1063 if (s->extreloc)
1064 s->flags |= S_ATTR_EXT_RELOC;
1067 fwriteint32_t(s->flags, ofile); /* flags */
1068 fwriteint32_t(0, ofile); /* reserved */
1069 fwriteint32_t(0, ofile); /* reserved */
1071 fwriteint32_t(0, ofile); /* align */
1074 rel_padcnt64 = rel_base - offset;
1075 offset = rel_base + s_reloff;
1077 return offset;
1080 /* For a given chain of relocs r, write out the entire relocation
1081 chain to the object file. */
1083 static void macho_write_relocs (struct reloc *r)
1085 while (r) {
1086 uint32_t word2;
1088 fwriteint32_t(r->addr, ofile); /* reloc offset */
1090 word2 = r->snum;
1091 word2 |= r->pcrel << 24;
1092 word2 |= r->length << 25;
1093 word2 |= r->ext << 27;
1094 word2 |= r->type << 28;
1095 fwriteint32_t(word2, ofile); /* reloc data */
1096 r = r->next;
1100 /* Write out the section data. */
1101 static void macho_write_section (void)
1103 struct section *s, *s2;
1104 struct reloc *r;
1105 uint8_t fi, *p, *q, blk[8];
1106 int32_t len;
1107 int64_t l;
1109 for (s = sects; s != NULL; s = s->next) {
1110 if ((s->flags & SECTION_TYPE) == S_ZEROFILL)
1111 continue;
1113 /* no padding needs to be done to the sections */
1115 /* Like a.out Mach-O references things in the data or bss
1116 * sections by addresses which are actually relative to the
1117 * start of the _text_ section, in the _file_. See outaout.c
1118 * for more information. */
1119 saa_rewind(s->data);
1120 for (r = s->relocs; r != NULL; r = r->next) {
1121 len = (int32_t)r->length << 1;
1122 if(len > 4) len = 8;
1123 saa_fread(s->data, r->addr, blk, len);
1124 p = q = blk;
1125 l = *p++;
1127 /* get offset based on relocation type */
1128 if (r->length > 0) {
1129 l += ((int64_t)*p++) << 8;
1131 if (r->length > 1) {
1132 l += ((int64_t)*p++) << 16;
1133 l += ((int64_t)*p++) << 24;
1136 if (r->length > 2) {
1137 l += ((int64_t)*p++) << 32;
1138 l += ((int64_t)*p++) << 40;
1139 l += ((int64_t)*p++) << 48;
1140 l += ((int64_t)*p++) << 56;
1146 /* If the relocation is internal add to the current section
1147 offset. Otherwise the only value we need is the symbol
1148 offset which we already have. The linker takes care
1149 of the rest of the address. */
1150 if (!r->ext) {
1151 /* generate final address by section address and offset */
1152 for (s2 = sects, fi = 1;
1153 s2 != NULL; s2 = s2->next, fi++) {
1154 if (fi == r->snum) {
1155 l += s2->addr;
1156 break;
1161 /* write new offset back */
1162 if (r->length == 3)
1163 WRITEDLONG(q, l);
1164 else if (r->length == 2)
1165 WRITELONG(q, l);
1166 else if (r->length == 1)
1167 WRITESHORT(q, l);
1168 else
1169 *q++ = l & 0xFF;
1171 saa_fwrite(s->data, r->addr, blk, len);
1174 /* dump the section data to file */
1175 saa_fpwrite(s->data, ofile);
1178 /* pad last section up to reloc entries on int64_t boundary */
1179 fwritezero(rel_padcnt64, ofile);
1181 /* emit relocation entries */
1182 for (s = sects; s != NULL; s = s->next)
1183 macho_write_relocs (s->relocs);
1186 /* Write out the symbol table. We should already have sorted this
1187 before now. */
1188 static void macho_write_symtab (void)
1190 struct symbol *sym;
1191 struct section *s;
1192 int64_t fi;
1193 uint64_t i;
1195 /* we don't need to pad here since MACHO_RELINFO_SIZE == 8 */
1197 for (sym = syms; sym != NULL; sym = sym->next) {
1198 if ((sym->type & N_EXT) == 0) {
1199 fwriteint32_t(sym->strx, ofile); /* string table entry number */
1200 fwrite(&sym->type, 1, 1, ofile); /* symbol type */
1201 fwrite(&sym->sect, 1, 1, ofile); /* section */
1202 fwriteint16_t(sym->desc, ofile); /* description */
1204 /* Fix up the symbol value now that we know the final section
1205 sizes. */
1206 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1207 for (s = sects, fi = 1; s != NULL; s = s->next, fi++) {
1208 if (fi == sym->sect) {
1209 sym->value += s->addr;
1210 break;
1215 fwriteint64_t(sym->value, ofile); /* value (i.e. offset) */
1219 for (i = 0; i < nextdefsym; i++) {
1220 sym = extdefsyms[i];
1221 fwriteint32_t(sym->strx, ofile);
1222 fwrite(&sym->type, 1, 1, ofile); /* symbol type */
1223 fwrite(&sym->sect, 1, 1, ofile); /* section */
1224 fwriteint16_t(sym->desc, ofile); /* description */
1226 /* Fix up the symbol value now that we know the final section
1227 sizes. */
1228 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1229 for (s = sects, fi = 1;
1230 s != NULL && fi < sym->sect; s = s->next, ++fi)
1231 sym->value += s->size;
1234 fwriteint64_t(sym->value, ofile); /* value (i.e. offset) */
1237 for (i = 0; i < nundefsym; i++) {
1238 sym = undefsyms[i];
1239 fwriteint32_t(sym->strx, ofile);
1240 fwrite(&sym->type, 1, 1, ofile); /* symbol type */
1241 fwrite(&sym->sect, 1, 1, ofile); /* section */
1242 fwriteint16_t(sym->desc, ofile); /* description */
1244 // Fix up the symbol value now that we know the final section sizes.
1245 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1246 for (s = sects, fi = 1;
1247 s != NULL && fi < sym->sect; s = s->next, ++fi)
1248 sym->value += s->size;
1251 fwriteint64_t(sym->value, ofile); // value (i.e. offset)
1256 /* Fixup the snum in the relocation entries, we should be
1257 doing this only for externally referenced symbols. */
1258 static void macho_fixup_relocs (struct reloc *r)
1260 struct symbol *sym;
1262 while (r != NULL) {
1263 if (r->ext) {
1264 for (sym = syms; sym != NULL; sym = sym->next) {
1265 if (sym->initial_snum == r->snum) {
1266 r->snum = sym->snum;
1267 break;
1271 r = r->next;
1275 /* Write out the object file. */
1277 static void macho_write (void)
1279 uint64_t offset = 0;
1281 /* mach-o object file structure:
1283 ** mach header
1284 ** uint32_t magic
1285 ** int cpu type
1286 ** int cpu subtype
1287 ** uint32_t mach file type
1288 ** uint32_t number of load commands
1289 ** uint32_t size of all load commands
1290 ** (includes section struct size of segment command)
1291 ** uint32_t flags
1293 ** segment command
1294 ** uint32_t command type == LC_SEGMENT_64
1295 ** uint32_t size of load command
1296 ** (including section load commands)
1297 ** char[16] segment name
1298 ** uint64_t in-memory offset
1299 ** uint64_t in-memory size
1300 ** uint64_t in-file offset to data area
1301 ** uint64_t in-file size
1302 ** (in-memory size excluding zerofill sections)
1303 ** int maximum vm protection
1304 ** int initial vm protection
1305 ** uint32_t number of sections
1306 ** uint32_t flags
1308 ** section commands
1309 ** char[16] section name
1310 ** char[16] segment name
1311 ** uint64_t in-memory offset
1312 ** uint64_t in-memory size
1313 ** uint32_t in-file offset
1314 ** uint32_t alignment
1315 ** (irrelevant in MH_OBJECT)
1316 ** uint32_t in-file offset of relocation entires
1317 ** uint32_t number of relocations
1318 ** uint32_t flags
1319 ** uint32_t reserved
1320 ** uint32_t reserved
1322 ** symbol table command
1323 ** uint32_t command type == LC_SYMTAB
1324 ** uint32_t size of load command
1325 ** uint32_t symbol table offset
1326 ** uint32_t number of symbol table entries
1327 ** uint32_t string table offset
1328 ** uint32_t string table size
1330 ** raw section data
1332 ** padding to int64_t boundary
1334 ** relocation data (struct reloc)
1335 ** int32_t offset
1336 ** uint data (symbolnum, pcrel, length, extern, type)
1338 ** symbol table data (struct nlist)
1339 ** int32_t string table entry number
1340 ** uint8_t type
1341 ** (extern, absolute, defined in section)
1342 ** uint8_t section
1343 ** (0 for global symbols, section number of definition (>= 1, <=
1344 ** 254) for local symbols, size of variable for common symbols
1345 ** [type == extern])
1346 ** int16_t description
1347 ** (for stab debugging format)
1348 ** uint64_t value (i.e. file offset) of symbol or stab offset
1350 ** string table data
1351 ** list of null-terminated strings
1354 /* Emit the Mach-O header. */
1355 macho_write_header();
1357 offset = MACHO_HEADER64_SIZE + head_sizeofcmds64;
1359 /* emit the segment load command */
1360 if (seg_nsects64 > 0)
1361 offset = macho_write_segment (offset);
1362 else
1363 nasm_error(ERR_WARNING, "no sections?");
1365 if (nsyms > 0) {
1366 /* write out symbol command */
1367 fwriteint32_t(LC_SYMTAB, ofile); /* cmd == LC_SYMTAB */
1368 fwriteint32_t(MACHO_SYMCMD_SIZE, ofile); /* size of load command */
1369 fwriteint32_t(offset, ofile); /* symbol table offset */
1370 fwriteint32_t(nsyms, ofile); /* number of symbol
1371 ** table entries */
1373 offset += nsyms * MACHO_NLIST64_SIZE;
1374 fwriteint32_t(offset, ofile); /* string table offset */
1375 fwriteint32_t(strslen, ofile); /* string table size */
1378 /* emit section data */
1379 if (seg_nsects64 > 0)
1380 macho_write_section ();
1382 /* emit symbol table if we have symbols */
1383 if (nsyms > 0)
1384 macho_write_symtab ();
1386 /* we don't need to pad here since MACHO_NLIST64_SIZE == 16 */
1388 /* emit string table */
1389 saa_fpwrite(strs, ofile);
1391 /* We do quite a bit here, starting with finalizing all of the data
1392 for the object file, writing, and then freeing all of the data from
1393 the file. */
1395 static void macho_cleanup(int debuginfo)
1397 struct section *s;
1398 struct reloc *r;
1399 struct symbol *sym;
1401 (void)debuginfo;
1403 /* Sort all symbols. */
1404 macho_layout_symbols (&nsyms, &strslen);
1406 /* Fixup relocation entries */
1407 for (s = sects; s != NULL; s = s->next) {
1408 macho_fixup_relocs (s->relocs);
1411 /* First calculate and finalize needed values. */
1412 macho_calculate_sizes();
1413 macho_write();
1415 /* free up everything */
1416 while (sects->next) {
1417 s = sects;
1418 sects = sects->next;
1420 saa_free(s->data);
1421 while (s->relocs != NULL) {
1422 r = s->relocs;
1423 s->relocs = s->relocs->next;
1424 nasm_free(r);
1427 nasm_free(s);
1430 saa_free(strs);
1431 raa_free(extsyms);
1433 if (syms) {
1434 while (syms->next) {
1435 sym = syms;
1436 syms = syms->next;
1438 nasm_free (sym);
1443 /* Debugging routines. */
1444 static void debug_reloc (struct reloc *r)
1446 fprintf (stdout, "reloc:\n");
1447 fprintf (stdout, "\taddr: %"PRId32"\n", r->addr);
1448 fprintf (stdout, "\tsnum: %d\n", r->snum);
1449 fprintf (stdout, "\tpcrel: %d\n", r->pcrel);
1450 fprintf (stdout, "\tlength: %d\n", r->length);
1451 fprintf (stdout, "\text: %d\n", r->ext);
1452 fprintf (stdout, "\ttype: %d\n", r->type);
1455 static void debug_section_relocs (struct section *s)
1457 struct reloc *r = s->relocs;
1459 fprintf (stdout, "relocs for section %s:\n\n", s->sectname);
1461 while (r != NULL) {
1462 debug_reloc (r);
1463 r = r->next;
1467 struct ofmt of_macho64 = {
1468 "NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (x86_64) object files",
1469 "macho64",
1471 null_debug_arr,
1472 &null_debug_form,
1473 macho_stdmac,
1474 macho_init,
1475 null_setinfo,
1476 macho_output,
1477 macho_symdef,
1478 macho_section,
1479 macho_sectalign,
1480 macho_segbase,
1481 null_directive,
1482 macho_filename,
1483 macho_cleanup
1486 #endif
1489 * Local Variables:
1490 * mode:c
1491 * c-basic-offset:4
1492 * End:
1494 * end of file */