macho64: remove LC_DATA_IN_CODE, change reloc type to 1
[nasm.git] / output / outmac64.c
blob76fa7ad3a354d947ff8c27336c92db5efbd3504a
1 /* ----------------------------------------------------------------------- *
2 *
3 * Copyright 1996-2016 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
35 * outmac64.c output routines for the Netwide Assembler to produce
36 * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (x86_64) object files
39 /* Most of this file is, like Mach-O itself, based on a.out. For more
40 * guidelines see outaout.c. */
42 #include "compiler.h"
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <ctype.h>
48 #include <inttypes.h>
50 #include "nasm.h"
51 #include "nasmlib.h"
52 #include "saa.h"
53 #include "raa.h"
54 #include "output/outform.h"
55 #include "output/outlib.h"
57 #if defined(OF_MACHO64)
59 /* Mach-O in-file header structure sizes */
60 #define MACHO_HEADER64_SIZE (32)
61 #define MACHO_SEGCMD64_SIZE (72)
62 #define MACHO_SECTCMD64_SIZE (80)
63 #define MACHO_SYMCMD_SIZE (24)
64 #define MACHO_NLIST64_SIZE (16)
65 #define MACHO_RELINFO64_SIZE (8)
66 #define MACHO_DATA_IN_CODE_CMD_SIZE (16)
68 /* Mach-O file header values */
69 #define MH_MAGIC_64 (0xfeedfacf)
70 #define CPU_TYPE_X86_64 (0x01000007) /* x86-64 platform */
71 #define CPU_SUBTYPE_I386_ALL (3) /* all-x86 compatible */
72 #define MH_OBJECT (0x1) /* object file */
74 #define LC_SEGMENT_64 (0x19) /* segment load command */
75 #define LC_SYMTAB (0x2) /* symbol table load command */
76 #define LC_DATA_IN_CODE (0x29) /* data in code command */
78 #define VM_PROT_NONE (0x00)
79 #define VM_PROT_READ (0x01)
80 #define VM_PROT_WRITE (0x02)
81 #define VM_PROT_EXECUTE (0x04)
83 #define VM_PROT_DEFAULT (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
84 #define VM_PROT_ALL (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
86 struct section {
87 /* nasm internal data */
88 struct section *next;
89 struct SAA *data;
90 int32_t index;
91 struct reloc *relocs;
92 int align;
94 /* data that goes into the file */
95 char sectname[16]; /* what this section is called */
96 char segname[16]; /* segment this section will be in */
97 uint64_t addr; /* in-memory address (subject to alignment) */
98 uint64_t size; /* in-memory and -file size */
99 uint64_t offset; /* in-file offset */
100 uint32_t pad; /* padding bytes before section */
101 uint32_t nreloc; /* relocation entry count */
102 uint32_t flags; /* type and attributes (masked) */
103 uint32_t extreloc; /* external relocations */
106 #define SECTION_TYPE 0x000000ff /* section type mask */
108 #define S_REGULAR (0x0) /* standard section */
109 #define S_ZEROFILL (0x1) /* zerofill, in-memory only */
111 #define SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */
112 #define S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some
113 machine instructions */
114 #define S_ATTR_EXT_RELOC 0x00000200 /* section has external
115 relocation entries */
116 #define S_ATTR_LOC_RELOC 0x00000100 /* section has local
117 relocation entries */
118 #define S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section uses pure
119 machine instructions */
121 static struct sectmap {
122 const char *nasmsect;
123 const char *segname;
124 const char *sectname;
125 const int32_t flags;
126 } sectmap[] = {
127 {".text", "__TEXT", "__text", S_REGULAR|S_ATTR_SOME_INSTRUCTIONS|S_ATTR_PURE_INSTRUCTIONS},
128 {".data", "__DATA", "__data", S_REGULAR},
129 {".rodata", "__DATA", "__const", S_REGULAR},
130 {".bss", "__DATA", "__bss", S_ZEROFILL},
131 {NULL, NULL, NULL, 0}
134 struct reloc {
135 /* nasm internal data */
136 struct reloc *next;
138 /* data that goes into the file */
139 int32_t addr; /* op's offset in section */
140 uint32_t snum:24, /* contains symbol index if
141 ** ext otherwise in-file
142 ** section number */
143 pcrel:1, /* relative relocation */
144 length:2, /* 0=byte, 1=word, 2=int32_t, 3=int64_t */
145 ext:1, /* external symbol referenced */
146 type:4; /* reloc type */
149 #define R_ABS 0 /* absolute relocation */
150 #define R_SCATTERED 0x80000000 /* reloc entry is scattered if
151 ** highest bit == 1 */
153 struct symbol {
154 /* nasm internal data */
155 struct symbol *next; /* next symbol in the list */
156 char *name; /* name of this symbol */
157 int32_t initial_snum; /* symbol number used above in
158 reloc */
159 int32_t snum; /* true snum for reloc */
161 /* data that goes into the file */
162 uint32_t strx; /* string table index */
163 uint8_t type; /* symbol type */
164 uint8_t sect; /* NO_SECT or section number */
165 uint16_t desc; /* for stab debugging, 0 for us */
166 uint64_t value; /* offset of symbol in section */
169 /* symbol type bits */
170 #define N_EXT 0x01 /* global or external symbol */
172 #define N_UNDF 0x0 /* undefined symbol | n_sect == */
173 #define N_ABS 0x2 /* absolute symbol | NO_SECT */
174 #define N_SECT 0xe /* defined symbol, n_sect holds
175 ** section number */
177 #define N_TYPE 0x0e /* type bit mask */
179 #define DEFAULT_SECTION_ALIGNMENT 0 /* byte (i.e. no) alignment */
181 /* special section number values */
182 #define NO_SECT 0 /* no section, invalid */
183 #define MAX_SECT 255 /* maximum number of sections */
185 static struct section *sects, **sectstail, **sectstab;
186 static struct symbol *syms, **symstail;
187 static uint32_t nsyms;
189 /* These variables are set by macho_layout_symbols() to organize
190 the symbol table and string table in order the dynamic linker
191 expects. They are then used in macho_write() to put out the
192 symbols and strings in that order.
194 The order of the symbol table is:
195 local symbols
196 defined external symbols (sorted by name)
197 undefined external symbols (sorted by name)
199 The order of the string table is:
200 strings for external symbols
201 strings for local symbols
203 static uint32_t ilocalsym = 0;
204 static uint32_t iextdefsym = 0;
205 static uint32_t iundefsym = 0;
206 static uint32_t nlocalsym;
207 static uint32_t nextdefsym;
208 static uint32_t nundefsym;
209 static struct symbol **extdefsyms = NULL;
210 static struct symbol **undefsyms = NULL;
212 static struct RAA *extsyms;
213 static struct SAA *strs;
214 static uint32_t strslen;
216 extern struct ofmt of_macho64;
218 /* Global file information. This should be cleaned up into either
219 a structure or as function arguments. */
220 uint32_t head_ncmds64 = 0;
221 uint32_t head_sizeofcmds64 = 0;
222 uint64_t seg_filesize64 = 0;
223 uint64_t seg_vmsize64 = 0;
224 uint32_t seg_nsects64 = 0;
225 uint64_t rel_padcnt64 = 0;
228 #define xstrncpy(xdst, xsrc) \
229 memset(xdst, '\0', sizeof(xdst)); /* zero out whole buffer */ \
230 strncpy(xdst, xsrc, sizeof(xdst)); /* copy over string */ \
231 xdst[sizeof(xdst) - 1] = '\0'; /* proper null-termination */
233 #define alignint32_t(x) \
234 ALIGN(x, sizeof(int32_t)) /* align x to int32_t boundary */
236 #define alignint64_t(x) \
237 ALIGN(x, sizeof(int64_t)) /* align x to int64_t boundary */
239 static void debug_reloc (struct reloc *);
240 static void debug_section_relocs (struct section *) _unused;
242 static struct section *get_section_by_name(const char *segname,
243 const char *sectname)
245 struct section *s;
247 for (s = sects; s != NULL; s = s->next)
248 if (!strcmp(s->segname, segname) && !strcmp(s->sectname, sectname))
249 break;
251 return s;
254 static struct section *get_section_by_index(const int32_t index)
256 struct section *s;
258 for (s = sects; s != NULL; s = s->next)
259 if (index == s->index)
260 break;
262 return s;
265 static int32_t get_section_index_by_name(const char *segname,
266 const char *sectname)
268 struct section *s;
270 for (s = sects; s != NULL; s = s->next)
271 if (!strcmp(s->segname, segname) && !strcmp(s->sectname, sectname))
272 return s->index;
274 return -1;
277 static char *get_section_name_by_index(const int32_t index)
279 struct section *s;
281 for (s = sects; s != NULL; s = s->next)
282 if (index == s->index)
283 return s->sectname;
285 return NULL;
288 static uint8_t get_section_fileindex_by_index(const int32_t index)
290 struct section *s;
291 uint8_t i = 1;
293 for (s = sects; s != NULL && i < MAX_SECT; s = s->next, ++i)
294 if (index == s->index)
295 return i;
297 if (i == MAX_SECT)
298 nasm_error(ERR_WARNING,
299 "too many sections (>255) - clipped by fileindex");
301 return NO_SECT;
304 static struct symbol *get_closest_section_symbol_by_offset(uint8_t fileindex, int64_t offset)
306 struct symbol *nearest = NULL;
307 struct symbol *sym;
309 for (sym = syms; sym; sym = sym->next) {
310 if ((sym->sect != NO_SECT) && (sym->sect == fileindex)) {
311 if ((int64_t)sym->value > offset)
312 break;
313 nearest = sym;
317 if (!nearest)
318 nasm_error(ERR_FATAL, "No section for index %x offset %llx found\n",
319 fileindex, (long long)offset);
321 return nearest;
325 * Special section numbers which are used to define Mach-O special
326 * symbols, which can be used with WRT to provide PIC relocation
327 * types.
329 static int32_t macho_gotpcrel_sect;
331 static void macho_init(void)
333 char zero = 0;
335 maxbits = 64;
337 sects = NULL;
338 sectstail = &sects;
340 syms = NULL;
341 symstail = &syms;
342 nsyms = 0;
343 nlocalsym = 0;
344 nextdefsym = 0;
345 nundefsym = 0;
347 extsyms = raa_init();
348 strs = saa_init(1L);
350 /* string table starts with a zero byte - don't ask why */
351 saa_wbytes(strs, &zero, sizeof(char));
352 strslen = 1;
354 /* add special symbol for ..gotpcrel */
355 macho_gotpcrel_sect = seg_alloc();
356 macho_gotpcrel_sect++;
357 define_label("..gotpcrel", macho_gotpcrel_sect, 0L, NULL, false, false);
360 static void sect_write(struct section *sect,
361 const uint8_t *data, uint32_t len)
363 saa_wbytes(sect->data, data, len);
364 sect->size += len;
367 static int32_t add_reloc(struct section *sect, int32_t section,
368 int pcrel, int bytes, int64_t reloff)
370 struct reloc *r;
371 struct symbol *sym;
372 int32_t fi;
373 int32_t adjustment = 0;
375 /* NeXT as puts relocs in reversed order (address-wise) into the
376 ** files, so we do the same, doesn't seem to make much of a
377 ** difference either way */
378 r = nasm_malloc(sizeof(struct reloc));
379 r->next = sect->relocs;
380 sect->relocs = r;
382 /* the current end of the section will be the symbol's address for
383 ** now, might have to be fixed by macho_fixup_relocs() later on. make
384 ** sure we don't make the symbol scattered by setting the highest
385 ** bit by accident */
386 r->addr = sect->size & ~R_SCATTERED;
387 r->ext = 1;
388 r->pcrel = (pcrel ? 1 : 0);
390 /* match byte count 1, 2, 4, 8 to length codes 0, 1, 2, 3 respectively */
391 switch(bytes){
392 case 1:
393 r->length = 0;
394 break;
395 case 2:
396 r->length = 1;
397 break;
398 case 4:
399 r->length = 2;
400 break;
401 case 8:
402 r->length = 3;
403 break;
404 default:
405 break;
408 /* set default relocation values */
409 r->type = 0; // X86_64_RELOC_UNSIGNED
410 r->snum = R_ABS; // Absolute Symbol (indicates no relocation)
412 /* absolute relocation */
413 if (pcrel == 0) {
415 /* intra-section */
416 if (section == NO_SEG) {
417 // r->snum = R_ABS; // Set above
419 /* inter-section */
420 } else {
421 fi = get_section_fileindex_by_index(section);
423 /* external */
424 if (fi == NO_SECT) {
425 r->snum = raa_read(extsyms, section);
427 /* local */
428 } else {
429 sym = get_closest_section_symbol_by_offset(fi, reloff);
430 r->snum = sym->initial_snum;
431 adjustment = sym->value;
435 /* relative relocation */
436 } else if (pcrel == 1) {
438 /* intra-section */
439 if (section == NO_SEG) {
440 r->type = 1; // X86_64_RELOC_SIGNED
442 /* inter-section */
443 } else {
444 r->type = 1; // X86_64_RELOC_SIGNED
445 fi = get_section_fileindex_by_index(section);
447 /* external */
448 if (fi == NO_SECT) {
449 sect->extreloc = 1;
450 r->snum = raa_read(extsyms, section);
452 /* local */
453 } else {
454 sym = get_closest_section_symbol_by_offset(fi, reloff);
455 r->snum = sym->initial_snum;
456 adjustment = sym->value;
460 /* subtractor */
461 } else if (pcrel == 2) {
462 r->pcrel = 0;
463 r->type = 5; // X86_64_RELOC_SUBTRACTOR
465 /* gotpcrel */
466 } else if (pcrel == 3) {
467 r->type = 4; // X86_64_RELOC_GOT
468 r->snum = macho_gotpcrel_sect;
470 /* gotpcrel MOVQ load */
471 } else if (pcrel == 4) {
472 r->type = 3; // X86_64_RELOC_GOT_LOAD
473 r->snum = macho_gotpcrel_sect;
476 ++sect->nreloc;
478 return adjustment;
481 static void macho_output(int32_t secto, const void *data,
482 enum out_type type, uint64_t size,
483 int32_t section, int32_t wrt)
485 struct section *s, *sbss;
486 int64_t addr;
487 uint8_t mydata[16], *p, gotload;
489 if (secto == NO_SEG) {
490 if (type != OUT_RESERVE)
491 nasm_error(ERR_NONFATAL, "attempt to assemble code in "
492 "[ABSOLUTE] space");
494 return;
497 s = get_section_by_index(secto);
499 if (s == NULL) {
500 nasm_error(ERR_WARNING, "attempt to assemble code in"
501 " section %d: defaulting to `.text'", secto);
502 s = get_section_by_name("__TEXT", "__text");
504 /* should never happen */
505 if (s == NULL)
506 nasm_error(ERR_PANIC, "text section not found");
509 sbss = get_section_by_name("__DATA", "__bss");
511 if (s == sbss && type != OUT_RESERVE) {
512 nasm_error(ERR_WARNING, "attempt to initialize memory in the"
513 " BSS section: ignored");
514 s->size += realsize(type, size);
515 return;
518 memset(mydata, 0, sizeof(mydata));
520 switch (type) {
521 case OUT_RESERVE:
522 if (s != sbss) {
523 nasm_error(ERR_WARNING, "uninitialized space declared in"
524 " %s section: zeroing",
525 get_section_name_by_index(secto));
527 sect_write(s, NULL, size);
528 } else
529 s->size += size;
531 break;
533 case OUT_RAWDATA:
534 if (section != NO_SEG)
535 nasm_error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG");
537 sect_write(s, data, size);
538 break;
540 case OUT_ADDRESS:
542 int asize = abs((int)size);
544 addr = *(int64_t *)data;
545 if (section != NO_SEG) {
546 if (section % 2) {
547 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
548 " section base references");
549 } else {
550 if (wrt == NO_SEG) {
551 if (asize < 8) {
552 nasm_error(ERR_NONFATAL, "Mach-O 64-bit format does not support"
553 " 32-bit absolute addresses");
555 Seemingly, Mach-O's X86_64_RELOC_SUBTRACTOR would require
556 pre-determined knowledge of where the image base would be,
557 making it impractical for use in intermediate object files
559 } else {
560 addr -= add_reloc(s, section, 0, asize, addr); // X86_64_RELOC_UNSIGNED
562 } else {
563 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
564 " this use of WRT");
569 p = mydata;
570 WRITEADDR(p, addr, asize);
571 sect_write(s, mydata, asize);
572 break;
575 case OUT_REL2ADR:
576 p = mydata;
577 WRITESHORT(p, *(int64_t *)data);
579 if (section == secto)
580 nasm_error(ERR_PANIC, "intra-section OUT_REL2ADR");
582 if (section == NO_SEG) {
583 /* Do nothing */
584 } else if (section % 2) {
585 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
586 " section base references");
587 } else {
588 nasm_error(ERR_NONFATAL, "Unsupported non-32-bit"
589 " Macho-O relocation [2]");
592 sect_write(s, mydata, 2L);
593 break;
595 case OUT_REL4ADR:
596 p = mydata;
597 addr = *(int64_t *)data + 4 - size;
599 if (section == secto)
600 nasm_error(ERR_PANIC, "intra-section OUT_REL4ADR");
602 if (section != NO_SEG && section % 2) {
603 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
604 " section base references");
605 } else {
606 if (wrt == NO_SEG) {
607 addr -= add_reloc(s, section, 1, 4, addr); // X86_64_RELOC_SIGNED/BRANCH
608 } else if (wrt == macho_gotpcrel_sect) {
609 if (s->data->datalen > 1) {
610 saa_fread(s->data, s->data->datalen-2, &gotload, 1); // Retrieve Instruction Opcode
611 } else {
612 gotload = 0;
614 if (gotload == 0x8B) { // Check for MOVQ Opcode
615 addr -= add_reloc(s, section, 4, 4, addr); // X86_64_GOT_LOAD (MOVQ load)
616 } else {
617 addr -= add_reloc(s, section, 3, 4, addr); // X86_64_GOT
619 } else {
620 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
621 " this use of WRT");
622 wrt = NO_SEG; /* we can at least _try_ to continue */
626 WRITELONG(p, addr);
627 sect_write(s, mydata, 4L);
628 break;
630 default:
631 nasm_error(ERR_PANIC, "unknown output type?");
632 break;
636 static int32_t macho_section(char *name, int pass, int *bits)
638 int32_t index, originalIndex;
639 char *sectionAttributes;
640 struct sectmap *sm;
641 struct section *s;
643 (void)pass;
645 /* Default to 64 bits. */
646 if (!name) {
647 *bits = 64;
648 name = ".text";
649 sectionAttributes = NULL;
650 } else {
651 sectionAttributes = name;
652 name = nasm_strsep(&sectionAttributes, " \t");
655 for (sm = sectmap; sm->nasmsect != NULL; ++sm) {
656 /* make lookup into section name translation table */
657 if (!strcmp(name, sm->nasmsect)) {
658 char *currentAttribute;
660 /* try to find section with that name */
661 originalIndex = index = get_section_index_by_name(sm->segname,
662 sm->sectname);
664 /* create it if it doesn't exist yet */
665 if (index == -1) {
666 s = *sectstail = nasm_malloc(sizeof(struct section));
667 s->next = NULL;
668 sectstail = &s->next;
670 s->data = saa_init(1L);
671 s->index = seg_alloc();
672 s->relocs = NULL;
673 s->align = -1;
674 s->pad = -1;
675 s->offset = -1;
677 xstrncpy(s->segname, sm->segname);
678 xstrncpy(s->sectname, sm->sectname);
679 s->size = 0;
680 s->nreloc = 0;
681 s->flags = sm->flags;
683 index = s->index;
684 } else {
685 s = get_section_by_index(index);
688 while ((NULL != sectionAttributes)
689 && (currentAttribute = nasm_strsep(&sectionAttributes, " \t"))) {
690 if (0 != *currentAttribute) {
691 if (!nasm_strnicmp("align=", currentAttribute, 6)) {
692 char *end;
693 int newAlignment, value;
695 value = strtoul(currentAttribute + 6, (char**)&end, 0);
696 newAlignment = alignlog2_32(value);
698 if (0 != *end) {
699 nasm_error(ERR_PANIC,
700 "unknown or missing alignment value \"%s\" "
701 "specified for section \"%s\"",
702 currentAttribute + 6,
703 name);
704 return NO_SEG;
705 } else if (0 > newAlignment) {
706 nasm_error(ERR_PANIC,
707 "alignment of %d (for section \"%s\") is not "
708 "a power of two",
709 value,
710 name);
711 return NO_SEG;
714 if ((-1 != originalIndex)
715 && (s->align != newAlignment)
716 && (s->align != -1)) {
717 nasm_error(ERR_PANIC,
718 "section \"%s\" has already been specified "
719 "with alignment %d, conflicts with new "
720 "alignment of %d",
721 name,
722 (1 << s->align),
723 value);
724 return NO_SEG;
727 s->align = newAlignment;
728 } else if (!nasm_stricmp("data", currentAttribute)) {
729 /* Do nothing; 'data' is implicit */
730 } else {
731 nasm_error(ERR_PANIC,
732 "unknown section attribute %s for section %s",
733 currentAttribute,
734 name);
735 return NO_SEG;
740 return index;
744 nasm_error(ERR_PANIC, "invalid section name %s", name);
745 return NO_SEG;
748 static void macho_symdef(char *name, int32_t section, int64_t offset,
749 int is_global, char *special)
751 struct symbol *sym;
753 if (special) {
754 nasm_error(ERR_NONFATAL, "The Mach-O output format does "
755 "not support any special symbol types");
756 return;
759 if (is_global == 3) {
760 nasm_error(ERR_NONFATAL, "The Mach-O format does not "
761 "(yet) support forward reference fixups.");
762 return;
765 if (name[0] == '.' && name[1] == '.' && name[2] != '@') {
767 * This is a NASM special symbol. We never allow it into
768 * the Macho-O symbol table, even if it's a valid one. If it
769 * _isn't_ a valid one, we should barf immediately.
771 if (strcmp(name, "..gotpcrel"))
772 nasm_error(ERR_NONFATAL, "unrecognized special symbol `%s'", name);
773 return;
776 sym = *symstail = nasm_malloc(sizeof(struct symbol));
777 sym->next = NULL;
778 symstail = &sym->next;
780 sym->name = name;
781 sym->strx = strslen;
782 sym->type = 0;
783 sym->desc = 0;
784 sym->value = offset;
785 sym->initial_snum = -1;
787 /* external and common symbols get N_EXT */
788 if (is_global != 0) {
789 sym->type |= N_EXT;
792 if (section == NO_SEG) {
793 /* symbols in no section get absolute */
794 sym->type |= N_ABS;
795 sym->sect = NO_SECT;
796 } else {
797 sym->type |= N_SECT;
799 /* get the in-file index of the section the symbol was defined in */
800 sym->sect = get_section_fileindex_by_index(section);
802 /* track the initially allocated symbol number for use in future fix-ups */
803 sym->initial_snum = nsyms;
805 if (sym->sect == NO_SECT) {
807 /* remember symbol number of references to external
808 ** symbols, this works because every external symbol gets
809 ** its own section number allocated internally by nasm and
810 ** can so be used as a key */
811 extsyms = raa_write(extsyms, section, nsyms);
813 switch (is_global) {
814 case 1:
815 case 2:
816 /* there isn't actually a difference between global
817 ** and common symbols, both even have their size in
818 ** sym->value */
819 sym->type = N_EXT;
820 break;
822 default:
823 /* give an error on unfound section if it's not an
824 ** external or common symbol (assemble_file() does a
825 ** seg_alloc() on every call for them) */
826 nasm_error(ERR_PANIC, "in-file index for section %d not found",
827 section);
831 ++nsyms;
834 static void macho_sectalign(int32_t seg, unsigned int value)
836 struct section *s;
838 list_for_each(s, sects) {
839 if (s->index == seg)
840 break;
843 if (!s || !is_power2(value))
844 return;
846 value = alignlog2_32(value);
847 if (s->align < (int)value)
848 s->align = value;
851 static int32_t macho_segbase(int32_t section)
853 return section;
856 static void macho_filename(char *inname, char *outname)
858 standard_extension(inname, outname, ".o");
861 extern macros_t macho_stdmac[];
863 /* Comparison function for qsort symbol layout. */
864 static int layout_compare (const struct symbol **s1,
865 const struct symbol **s2)
867 return (strcmp ((*s1)->name, (*s2)->name));
870 /* The native assembler does a few things in a similar function
872 * Remove temporary labels
873 * Sort symbols according to local, external, undefined (by name)
874 * Order the string table
876 We do not remove temporary labels right now.
878 numsyms is the total number of symbols we have. strtabsize is the
879 number entries in the string table. */
881 static void macho_layout_symbols (uint32_t *numsyms,
882 uint32_t *strtabsize)
884 struct symbol *sym, **symp;
885 uint32_t i,j;
887 *numsyms = 0;
888 *strtabsize = sizeof (char);
890 symp = &syms;
892 while ((sym = *symp)) {
893 /* Undefined symbols are now external. */
894 if (sym->type == N_UNDF)
895 sym->type |= N_EXT;
897 if ((sym->type & N_EXT) == 0) {
898 sym->snum = *numsyms;
899 *numsyms = *numsyms + 1;
900 nlocalsym++;
902 else {
903 if ((sym->type & N_TYPE) != N_UNDF) {
904 nextdefsym++;
905 } else {
906 nundefsym++;
909 /* If we handle debug info we'll want
910 to check for it here instead of just
911 adding the symbol to the string table. */
912 sym->strx = *strtabsize;
913 saa_wbytes (strs, sym->name, (int32_t)(strlen(sym->name) + 1));
914 *strtabsize += strlen(sym->name) + 1;
916 symp = &(sym->next);
919 /* Next, sort the symbols. Most of this code is a direct translation from
920 the Apple cctools symbol layout. We need to keep compatibility with that. */
921 /* Set the indexes for symbol groups into the symbol table */
922 ilocalsym = 0;
923 iextdefsym = nlocalsym;
924 iundefsym = nlocalsym + nextdefsym;
926 /* allocate arrays for sorting externals by name */
927 extdefsyms = nasm_malloc(nextdefsym * sizeof(struct symbol *));
928 undefsyms = nasm_malloc(nundefsym * sizeof(struct symbol *));
930 i = 0;
931 j = 0;
933 symp = &syms;
935 while ((sym = *symp)) {
937 if((sym->type & N_EXT) == 0) {
938 sym->strx = *strtabsize;
939 saa_wbytes (strs, sym->name, (int32_t)(strlen (sym->name) + 1));
940 *strtabsize += strlen(sym->name) + 1;
942 else {
943 if((sym->type & N_TYPE) != N_UNDF) {
944 extdefsyms[i++] = sym;
945 } else {
946 undefsyms[j++] = sym;
949 symp = &(sym->next);
952 qsort(extdefsyms, nextdefsym, sizeof(struct symbol *),
953 (int (*)(const void *, const void *))layout_compare);
954 qsort(undefsyms, nundefsym, sizeof(struct symbol *),
955 (int (*)(const void *, const void *))layout_compare);
957 for(i = 0; i < nextdefsym; i++) {
958 extdefsyms[i]->snum = *numsyms;
959 *numsyms += 1;
961 for(j = 0; j < nundefsym; j++) {
962 undefsyms[j]->snum = *numsyms;
963 *numsyms += 1;
967 /* Calculate some values we'll need for writing later. */
969 static void macho_calculate_sizes (void)
971 struct section *s;
972 int fi;
974 /* count sections and calculate in-memory and in-file offsets */
975 for (s = sects; s != NULL; s = s->next) {
976 uint64_t newaddr;
978 /* recalculate segment address based on alignment and vm size */
979 s->addr = seg_vmsize64;
981 /* we need section alignment to calculate final section address */
982 if (s->align == -1)
983 s->align = DEFAULT_SECTION_ALIGNMENT;
985 newaddr = ALIGN(s->addr, 1 << s->align);
986 s->addr = newaddr;
988 seg_vmsize64 = newaddr + s->size;
990 /* zerofill sections aren't actually written to the file */
991 if ((s->flags & SECTION_TYPE) != S_ZEROFILL) {
993 * LLVM/Xcode as always aligns the section data to 4
994 * bytes; there is a comment in the LLVM source code that
995 * perhaps aligning to pointer size would be better.
997 s->pad = ALIGN(seg_filesize64, 4) - seg_filesize64;
998 s->offset = seg_filesize64 + s->pad;
999 seg_filesize64 += s->size + s->pad;
1002 ++seg_nsects64;
1005 /* calculate size of all headers, load commands and sections to
1006 ** get a pointer to the start of all the raw data */
1007 if (seg_nsects64 > 0) {
1008 ++head_ncmds64;
1009 head_sizeofcmds64 +=
1010 MACHO_SEGCMD64_SIZE + seg_nsects64 * MACHO_SECTCMD64_SIZE;
1013 if (nsyms > 0) {
1014 ++head_ncmds64;
1015 head_sizeofcmds64 += MACHO_SYMCMD_SIZE;
1018 /* Create a table of sections by file index to avoid linear search */
1019 sectstab = nasm_malloc(seg_nsects64 + 1);
1020 sectstab[0] = NULL;
1021 for (s = sects, fi = 1; s != NULL; s = s->next, fi++)
1022 sectstab[fi] = s;
1025 /* Write out the header information for the file. */
1027 static void macho_write_header (void)
1029 fwriteint32_t(MH_MAGIC_64, ofile); /* magic */
1030 fwriteint32_t(CPU_TYPE_X86_64, ofile); /* CPU type */
1031 fwriteint32_t(CPU_SUBTYPE_I386_ALL, ofile); /* CPU subtype */
1032 fwriteint32_t(MH_OBJECT, ofile); /* Mach-O file type */
1033 fwriteint32_t(head_ncmds64, ofile); /* number of load commands */
1034 fwriteint32_t(head_sizeofcmds64, ofile); /* size of load commands */
1035 fwriteint32_t(0, ofile); /* no flags */
1036 fwriteint32_t(0, ofile); /* reserved for future use */
1039 /* Write out the segment load command at offset. */
1041 static uint32_t macho_write_segment (uint64_t offset)
1043 uint64_t rel_base = alignint64_t (offset + seg_filesize64);
1044 uint32_t s_reloff = 0;
1045 struct section *s;
1047 fwriteint32_t(LC_SEGMENT_64, ofile); /* cmd == LC_SEGMENT_64 */
1049 /* size of load command including section load commands */
1050 fwriteint32_t(MACHO_SEGCMD64_SIZE +
1051 seg_nsects64 * MACHO_SECTCMD64_SIZE,
1052 ofile);
1054 /* in an MH_OBJECT file all sections are in one unnamed (name
1055 ** all zeros) segment */
1056 fwritezero(16, ofile);
1057 fwriteint64_t(0, ofile); /* in-memory offset */
1058 fwriteint64_t(seg_vmsize64, ofile); /* in-memory size */
1059 fwriteint64_t(offset, ofile); /* in-file offset to data */
1060 fwriteint64_t(seg_filesize64, ofile); /* in-file size */
1061 fwriteint32_t(VM_PROT_DEFAULT, ofile); /* maximum vm protection */
1062 fwriteint32_t(VM_PROT_DEFAULT, ofile); /* initial vm protection */
1063 fwriteint32_t(seg_nsects64, ofile); /* number of sections */
1064 fwriteint32_t(0, ofile); /* no flags */
1066 /* emit section headers */
1067 for (s = sects; s != NULL; s = s->next) {
1068 nasm_write(s->sectname, sizeof(s->sectname), ofile);
1069 nasm_write(s->segname, sizeof(s->segname), ofile);
1070 fwriteint64_t(s->addr, ofile);
1071 fwriteint64_t(s->size, ofile);
1073 /* dummy data for zerofill sections or proper values */
1074 if ((s->flags & SECTION_TYPE) != S_ZEROFILL) {
1075 nasm_assert(s->pad != (uint32_t)-1);
1076 offset += s->pad;
1077 fwriteint32_t(offset, ofile);
1078 offset += s->size;
1079 /* Write out section alignment, as a power of two.
1080 e.g. 32-bit word alignment would be 2 (2^2 = 4). */
1081 if (s->align == -1)
1082 s->align = DEFAULT_SECTION_ALIGNMENT;
1083 fwriteint32_t(s->align, ofile);
1084 /* To be compatible with cctools as we emit
1085 a zero reloff if we have no relocations. */
1086 fwriteint32_t(s->nreloc ? rel_base + s_reloff : 0, ofile);
1087 fwriteint32_t(s->nreloc, ofile);
1089 s_reloff += s->nreloc * MACHO_RELINFO64_SIZE;
1090 } else {
1091 fwriteint32_t(0, ofile);
1092 fwriteint32_t(0, ofile); /* No alignment?! */
1093 fwriteint32_t(0, ofile);
1094 fwriteint32_t(0, ofile);
1097 if (s->nreloc) {
1098 s->flags |= S_ATTR_LOC_RELOC;
1099 if (s->extreloc)
1100 s->flags |= S_ATTR_EXT_RELOC;
1103 fwriteint32_t(s->flags, ofile); /* flags */
1104 fwriteint32_t(0, ofile); /* reserved */
1105 fwriteint32_t(0, ofile); /* reserved */
1107 fwriteint32_t(0, ofile); /* align */
1110 rel_padcnt64 = rel_base - offset;
1111 offset = rel_base + s_reloff;
1113 return offset;
1116 /* For a given chain of relocs r, write out the entire relocation
1117 chain to the object file. */
1119 static void macho_write_relocs (struct reloc *r)
1121 while (r) {
1122 uint32_t word2;
1124 fwriteint32_t(r->addr, ofile); /* reloc offset */
1126 word2 = r->snum;
1127 word2 |= r->pcrel << 24;
1128 word2 |= r->length << 25;
1129 word2 |= r->ext << 27;
1130 word2 |= r->type << 28;
1131 fwriteint32_t(word2, ofile); /* reloc data */
1132 r = r->next;
1136 /* Write out the section data. */
1137 static void macho_write_section (void)
1139 struct section *s, *s2;
1140 struct reloc *r;
1141 uint8_t fi, *p, *q, blk[8];
1142 int32_t len;
1143 int64_t l;
1145 for (s = sects; s != NULL; s = s->next) {
1146 if ((s->flags & SECTION_TYPE) == S_ZEROFILL)
1147 continue;
1149 /* Like a.out Mach-O references things in the data or bss
1150 * sections by addresses which are actually relative to the
1151 * start of the _text_ section, in the _file_. See outaout.c
1152 * for more information. */
1153 saa_rewind(s->data);
1154 for (r = s->relocs; r != NULL; r = r->next) {
1155 len = (int32_t)r->length << 1;
1156 if(len > 4) len = 8;
1157 saa_fread(s->data, r->addr, blk, len);
1158 p = q = blk;
1159 l = *p++;
1161 /* get offset based on relocation type */
1162 if (r->length > 0) {
1163 l += ((int64_t)*p++) << 8;
1165 if (r->length > 1) {
1166 l += ((int64_t)*p++) << 16;
1167 l += ((int64_t)*p++) << 24;
1170 if (r->length > 2) {
1171 l += ((int64_t)*p++) << 32;
1172 l += ((int64_t)*p++) << 40;
1173 l += ((int64_t)*p++) << 48;
1174 l += ((int64_t)*p++) << 56;
1180 /* If the relocation is internal add to the current section
1181 offset. Otherwise the only value we need is the symbol
1182 offset which we already have. The linker takes care
1183 of the rest of the address. */
1184 if (!r->ext) {
1185 /* generate final address by section address and offset */
1186 for (s2 = sects, fi = 1;
1187 s2 != NULL; s2 = s2->next, fi++) {
1188 if (fi == r->snum) {
1189 l += s2->addr;
1190 break;
1195 /* write new offset back */
1196 if (r->length == 3)
1197 WRITEDLONG(q, l);
1198 else if (r->length == 2)
1199 WRITELONG(q, l);
1200 else if (r->length == 1)
1201 WRITESHORT(q, l);
1202 else
1203 *q++ = l & 0xFF;
1205 saa_fwrite(s->data, r->addr, blk, len);
1208 /* dump the section data to file */
1209 fwritezero(s->pad, ofile);
1210 saa_fpwrite(s->data, ofile);
1213 /* pad last section up to reloc entries on int64_t boundary */
1214 fwritezero(rel_padcnt64, ofile);
1216 /* emit relocation entries */
1217 for (s = sects; s != NULL; s = s->next)
1218 macho_write_relocs (s->relocs);
1221 /* Write out the symbol table. We should already have sorted this
1222 before now. */
1223 static void macho_write_symtab (void)
1225 struct symbol *sym;
1226 uint64_t i;
1228 /* we don't need to pad here since MACHO_RELINFO_SIZE == 8 */
1230 for (sym = syms; sym != NULL; sym = sym->next) {
1231 if ((sym->type & N_EXT) == 0) {
1232 fwriteint32_t(sym->strx, ofile); /* string table entry number */
1233 nasm_write(&sym->type, 1, ofile); /* symbol type */
1234 nasm_write(&sym->sect, 1, ofile); /* section */
1235 fwriteint16_t(sym->desc, ofile); /* description */
1237 /* Fix up the symbol value now that we know the final section
1238 sizes. */
1239 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1240 nasm_assert(sym->sect <= seg_nsects64);
1241 sym->value += sectstab[sym->sect]->addr;
1244 fwriteint64_t(sym->value, ofile); /* value (i.e. offset) */
1248 for (i = 0; i < nextdefsym; i++) {
1249 sym = extdefsyms[i];
1250 fwriteint32_t(sym->strx, ofile);
1251 nasm_write(&sym->type, 1, ofile); /* symbol type */
1252 nasm_write(&sym->sect, 1, ofile); /* section */
1253 fwriteint16_t(sym->desc, ofile); /* description */
1255 /* Fix up the symbol value now that we know the final section
1256 sizes. */
1257 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1258 nasm_assert(sym->sect <= seg_nsects64);
1259 sym->value += sectstab[sym->sect]->addr;
1262 fwriteint64_t(sym->value, ofile); /* value (i.e. offset) */
1265 for (i = 0; i < nundefsym; i++) {
1266 sym = undefsyms[i];
1267 fwriteint32_t(sym->strx, ofile);
1268 nasm_write(&sym->type, 1, ofile); /* symbol type */
1269 nasm_write(&sym->sect, 1, ofile); /* section */
1270 fwriteint16_t(sym->desc, ofile); /* description */
1272 // Fix up the symbol value now that we know the final section sizes.
1273 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1274 nasm_assert(sym->sect <= seg_nsects64);
1275 sym->value += sectstab[sym->sect]->addr;
1278 fwriteint64_t(sym->value, ofile); // value (i.e. offset)
1283 /* Fixup the snum in the relocation entries, we should be
1284 doing this only for externally referenced symbols. */
1285 static void macho_fixup_relocs (struct reloc *r)
1287 struct symbol *sym;
1289 while (r != NULL) {
1290 if (r->ext) {
1291 for (sym = syms; sym != NULL; sym = sym->next) {
1292 if (sym->initial_snum == r->snum) {
1293 r->snum = sym->snum;
1294 break;
1298 r = r->next;
1302 /* Write out the object file. */
1304 static void macho_write (void)
1306 uint64_t offset = 0;
1308 /* mach-o object file structure:
1310 ** mach header
1311 ** uint32_t magic
1312 ** int cpu type
1313 ** int cpu subtype
1314 ** uint32_t mach file type
1315 ** uint32_t number of load commands
1316 ** uint32_t size of all load commands
1317 ** (includes section struct size of segment command)
1318 ** uint32_t flags
1320 ** segment command
1321 ** uint32_t command type == LC_SEGMENT_64
1322 ** uint32_t size of load command
1323 ** (including section load commands)
1324 ** char[16] segment name
1325 ** uint64_t in-memory offset
1326 ** uint64_t in-memory size
1327 ** uint64_t in-file offset to data area
1328 ** uint64_t in-file size
1329 ** (in-memory size excluding zerofill sections)
1330 ** int maximum vm protection
1331 ** int initial vm protection
1332 ** uint32_t number of sections
1333 ** uint32_t flags
1335 ** section commands
1336 ** char[16] section name
1337 ** char[16] segment name
1338 ** uint64_t in-memory offset
1339 ** uint64_t in-memory size
1340 ** uint32_t in-file offset
1341 ** uint32_t alignment
1342 ** (irrelevant in MH_OBJECT)
1343 ** uint32_t in-file offset of relocation entires
1344 ** uint32_t number of relocations
1345 ** uint32_t flags
1346 ** uint32_t reserved
1347 ** uint32_t reserved
1349 ** symbol table command
1350 ** uint32_t command type == LC_SYMTAB
1351 ** uint32_t size of load command
1352 ** uint32_t symbol table offset
1353 ** uint32_t number of symbol table entries
1354 ** uint32_t string table offset
1355 ** uint32_t string table size
1357 ** raw section data
1359 ** padding to int64_t boundary
1361 ** relocation data (struct reloc)
1362 ** int32_t offset
1363 ** uint data (symbolnum, pcrel, length, extern, type)
1365 ** symbol table data (struct nlist)
1366 ** int32_t string table entry number
1367 ** uint8_t type
1368 ** (extern, absolute, defined in section)
1369 ** uint8_t section
1370 ** (0 for global symbols, section number of definition (>= 1, <=
1371 ** 254) for local symbols, size of variable for common symbols
1372 ** [type == extern])
1373 ** int16_t description
1374 ** (for stab debugging format)
1375 ** uint64_t value (i.e. file offset) of symbol or stab offset
1377 ** string table data
1378 ** list of null-terminated strings
1381 /* Emit the Mach-O header. */
1382 macho_write_header();
1384 offset = MACHO_HEADER64_SIZE + head_sizeofcmds64;
1386 /* emit the segment load command */
1387 if (seg_nsects64 > 0)
1388 offset = macho_write_segment (offset);
1389 else
1390 nasm_error(ERR_WARNING, "no sections?");
1392 if (nsyms > 0) {
1393 /* write out symbol command */
1394 fwriteint32_t(LC_SYMTAB, ofile); /* cmd == LC_SYMTAB */
1395 fwriteint32_t(MACHO_SYMCMD_SIZE, ofile); /* size of load command */
1396 fwriteint32_t(offset, ofile); /* symbol table offset */
1397 fwriteint32_t(nsyms, ofile); /* number of symbol
1398 ** table entries */
1399 offset += nsyms * MACHO_NLIST64_SIZE;
1400 fwriteint32_t(offset, ofile); /* string table offset */
1401 fwriteint32_t(strslen, ofile); /* string table size */
1404 /* emit section data */
1405 if (seg_nsects64 > 0)
1406 macho_write_section ();
1408 /* emit symbol table if we have symbols */
1409 if (nsyms > 0)
1410 macho_write_symtab ();
1412 /* we don't need to pad here since MACHO_NLIST64_SIZE == 16 */
1414 /* emit string table */
1415 saa_fpwrite(strs, ofile);
1417 /* We do quite a bit here, starting with finalizing all of the data
1418 for the object file, writing, and then freeing all of the data from
1419 the file. */
1421 static void macho_cleanup(int debuginfo)
1423 struct section *s;
1424 struct reloc *r;
1425 struct symbol *sym;
1427 (void)debuginfo;
1429 /* Sort all symbols. */
1430 macho_layout_symbols (&nsyms, &strslen);
1432 /* Fixup relocation entries */
1433 for (s = sects; s != NULL; s = s->next) {
1434 macho_fixup_relocs (s->relocs);
1437 /* First calculate and finalize needed values. */
1438 macho_calculate_sizes();
1439 macho_write();
1441 /* free up everything */
1442 while (sects->next) {
1443 s = sects;
1444 sects = sects->next;
1446 saa_free(s->data);
1447 while (s->relocs != NULL) {
1448 r = s->relocs;
1449 s->relocs = s->relocs->next;
1450 nasm_free(r);
1453 nasm_free(s);
1456 saa_free(strs);
1457 raa_free(extsyms);
1459 if (syms) {
1460 while (syms->next) {
1461 sym = syms;
1462 syms = syms->next;
1464 nasm_free (sym);
1469 /* Debugging routines. */
1470 static void debug_reloc (struct reloc *r)
1472 fprintf (stdout, "reloc:\n");
1473 fprintf (stdout, "\taddr: %"PRId32"\n", r->addr);
1474 fprintf (stdout, "\tsnum: %d\n", r->snum);
1475 fprintf (stdout, "\tpcrel: %d\n", r->pcrel);
1476 fprintf (stdout, "\tlength: %d\n", r->length);
1477 fprintf (stdout, "\text: %d\n", r->ext);
1478 fprintf (stdout, "\ttype: %d\n", r->type);
1481 static void debug_section_relocs (struct section *s)
1483 struct reloc *r = s->relocs;
1485 fprintf (stdout, "relocs for section %s:\n\n", s->sectname);
1487 while (r != NULL) {
1488 debug_reloc (r);
1489 r = r->next;
1493 struct ofmt of_macho64 = {
1494 "NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (x86_64) object files",
1495 "macho64",
1497 null_debug_arr,
1498 &null_debug_form,
1499 macho_stdmac,
1500 macho_init,
1501 null_setinfo,
1502 macho_output,
1503 macho_symdef,
1504 macho_section,
1505 macho_sectalign,
1506 macho_segbase,
1507 null_directive,
1508 macho_filename,
1509 macho_cleanup
1512 #endif
1515 * Local Variables:
1516 * mode:c
1517 * c-basic-offset:4
1518 * End:
1520 * end of file */