realpath: prefer the buffer size given by pathconf()
[nasm.git] / output / outmac64.c
blobcc8f6ba20b0fa320512bd7d15d0b38b60dd8ebcf
1 /* ----------------------------------------------------------------------- *
2 *
3 * Copyright 1996-2016 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
35 * outmac64.c output routines for the Netwide Assembler to produce
36 * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (x86_64) object files
39 /* Most of this file is, like Mach-O itself, based on a.out. For more
40 * guidelines see outaout.c. */
42 #include "compiler.h"
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <ctype.h>
48 #include <inttypes.h>
50 #include "nasm.h"
51 #include "nasmlib.h"
52 #include "saa.h"
53 #include "raa.h"
54 #include "output/outform.h"
55 #include "output/outlib.h"
57 #if defined(OF_MACHO64)
59 /* Mach-O in-file header structure sizes */
60 #define MACHO_HEADER64_SIZE (32)
61 #define MACHO_SEGCMD64_SIZE (72)
62 #define MACHO_SECTCMD64_SIZE (80)
63 #define MACHO_SYMCMD_SIZE (24)
64 #define MACHO_NLIST64_SIZE (16)
65 #define MACHO_RELINFO64_SIZE (8)
66 #define MACHO_DATA_IN_CODE_CMD_SIZE (16)
68 /* Mach-O file header values */
69 #define MH_MAGIC_64 (0xfeedfacf)
70 #define CPU_TYPE_X86_64 (0x01000007) /* x86-64 platform */
71 #define CPU_SUBTYPE_I386_ALL (3) /* all-x86 compatible */
72 #define MH_OBJECT (0x1) /* object file */
74 #define LC_SEGMENT_64 (0x19) /* segment load command */
75 #define LC_SYMTAB (0x2) /* symbol table load command */
76 #define LC_DATA_IN_CODE (0x29) /* data in code command */
78 #define VM_PROT_NONE (0x00)
79 #define VM_PROT_READ (0x01)
80 #define VM_PROT_WRITE (0x02)
81 #define VM_PROT_EXECUTE (0x04)
83 #define VM_PROT_DEFAULT (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
84 #define VM_PROT_ALL (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
86 struct section {
87 /* nasm internal data */
88 struct section *next;
89 struct SAA *data;
90 int32_t index;
91 struct reloc *relocs;
92 int align;
94 /* data that goes into the file */
95 char sectname[16]; /* what this section is called */
96 char segname[16]; /* segment this section will be in */
97 uint64_t addr; /* in-memory address (subject to alignment) */
98 uint64_t size; /* in-memory and -file size */
99 uint64_t offset; /* in-file offset */
100 uint32_t pad; /* padding bytes before section */
101 uint32_t nreloc; /* relocation entry count */
102 uint32_t flags; /* type and attributes (masked) */
103 uint32_t extreloc; /* external relocations */
106 #define SECTION_TYPE 0x000000ff /* section type mask */
108 #define S_REGULAR (0x0) /* standard section */
109 #define S_ZEROFILL (0x1) /* zerofill, in-memory only */
111 #define SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */
112 #define S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some
113 machine instructions */
114 #define S_ATTR_EXT_RELOC 0x00000200 /* section has external
115 relocation entries */
116 #define S_ATTR_LOC_RELOC 0x00000100 /* section has local
117 relocation entries */
118 #define S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section uses pure
119 machine instructions */
121 static struct sectmap {
122 const char *nasmsect;
123 const char *segname;
124 const char *sectname;
125 const int32_t flags;
126 } sectmap[] = {
127 {".text", "__TEXT", "__text", S_REGULAR|S_ATTR_SOME_INSTRUCTIONS|S_ATTR_PURE_INSTRUCTIONS},
128 {".data", "__DATA", "__data", S_REGULAR},
129 {".rodata", "__DATA", "__const", S_REGULAR},
130 {".bss", "__DATA", "__bss", S_ZEROFILL},
131 {NULL, NULL, NULL, 0}
134 struct reloc {
135 /* nasm internal data */
136 struct reloc *next;
138 /* data that goes into the file */
139 int32_t addr; /* op's offset in section */
140 uint32_t snum:24, /* contains symbol index if
141 ** ext otherwise in-file
142 ** section number */
143 pcrel:1, /* relative relocation */
144 length:2, /* 0=byte, 1=word, 2=int32_t, 3=int64_t */
145 ext:1, /* external symbol referenced */
146 type:4; /* reloc type */
149 #define R_ABS 0 /* absolute relocation */
150 #define R_SCATTERED 0x80000000 /* reloc entry is scattered if
151 ** highest bit == 1 */
153 struct symbol {
154 /* nasm internal data */
155 struct symbol *next; /* next symbol in the list */
156 char *name; /* name of this symbol */
157 int32_t initial_snum; /* symbol number used above in
158 reloc */
159 int32_t snum; /* true snum for reloc */
161 /* data that goes into the file */
162 uint32_t strx; /* string table index */
163 uint8_t type; /* symbol type */
164 uint8_t sect; /* NO_SECT or section number */
165 uint16_t desc; /* for stab debugging, 0 for us */
166 uint64_t value; /* offset of symbol in section */
169 /* symbol type bits */
170 #define N_EXT 0x01 /* global or external symbol */
172 #define N_UNDF 0x0 /* undefined symbol | n_sect == */
173 #define N_ABS 0x2 /* absolute symbol | NO_SECT */
174 #define N_SECT 0xe /* defined symbol, n_sect holds
175 ** section number */
177 #define N_TYPE 0x0e /* type bit mask */
179 #define DEFAULT_SECTION_ALIGNMENT 0 /* byte (i.e. no) alignment */
181 /* special section number values */
182 #define NO_SECT 0 /* no section, invalid */
183 #define MAX_SECT 255 /* maximum number of sections */
185 static struct section *sects, **sectstail, **sectstab;
186 static struct symbol *syms, **symstail;
187 static uint32_t nsyms;
189 /* These variables are set by macho_layout_symbols() to organize
190 the symbol table and string table in order the dynamic linker
191 expects. They are then used in macho_write() to put out the
192 symbols and strings in that order.
194 The order of the symbol table is:
195 local symbols
196 defined external symbols (sorted by name)
197 undefined external symbols (sorted by name)
199 The order of the string table is:
200 strings for external symbols
201 strings for local symbols
203 static uint32_t ilocalsym = 0;
204 static uint32_t iextdefsym = 0;
205 static uint32_t iundefsym = 0;
206 static uint32_t nlocalsym;
207 static uint32_t nextdefsym;
208 static uint32_t nundefsym;
209 static struct symbol **extdefsyms = NULL;
210 static struct symbol **undefsyms = NULL;
212 static struct RAA *extsyms;
213 static struct SAA *strs;
214 static uint32_t strslen;
216 extern struct ofmt of_macho64;
218 /* Global file information. This should be cleaned up into either
219 a structure or as function arguments. */
220 static uint32_t head_ncmds = 0;
221 static uint32_t head_sizeofcmds = 0;
222 static uint64_t seg_filesize = 0;
223 static uint64_t seg_vmsize = 0;
224 static uint32_t seg_nsects = 0;
225 static uint64_t rel_padcnt = 0;
228 #define xstrncpy(xdst, xsrc) \
229 memset(xdst, '\0', sizeof(xdst)); /* zero out whole buffer */ \
230 strncpy(xdst, xsrc, sizeof(xdst)); /* copy over string */ \
231 xdst[sizeof(xdst) - 1] = '\0'; /* proper null-termination */
233 #define alignint32_t(x) \
234 ALIGN(x, sizeof(int32_t)) /* align x to int32_t boundary */
236 #define alignint64_t(x) \
237 ALIGN(x, sizeof(int64_t)) /* align x to int64_t boundary */
239 static void debug_reloc (struct reloc *);
240 static void debug_section_relocs (struct section *) _unused;
242 static struct section *get_section_by_name(const char *segname,
243 const char *sectname)
245 struct section *s;
247 for (s = sects; s != NULL; s = s->next)
248 if (!strcmp(s->segname, segname) && !strcmp(s->sectname, sectname))
249 break;
251 return s;
254 static struct section *get_section_by_index(const int32_t index)
256 struct section *s;
258 for (s = sects; s != NULL; s = s->next)
259 if (index == s->index)
260 break;
262 return s;
265 static int32_t get_section_index_by_name(const char *segname,
266 const char *sectname)
268 struct section *s;
270 for (s = sects; s != NULL; s = s->next)
271 if (!strcmp(s->segname, segname) && !strcmp(s->sectname, sectname))
272 return s->index;
274 return -1;
277 static char *get_section_name_by_index(const int32_t index)
279 struct section *s;
281 for (s = sects; s != NULL; s = s->next)
282 if (index == s->index)
283 return s->sectname;
285 return NULL;
288 static uint8_t get_section_fileindex_by_index(const int32_t index)
290 struct section *s;
291 uint8_t i = 1;
293 for (s = sects; s != NULL && i < MAX_SECT; s = s->next, ++i)
294 if (index == s->index)
295 return i;
297 if (i == MAX_SECT)
298 nasm_error(ERR_WARNING,
299 "too many sections (>255) - clipped by fileindex");
301 return NO_SECT;
304 static struct symbol *get_closest_section_symbol_by_offset(uint8_t fileindex, int64_t offset)
306 struct symbol *nearest = NULL;
307 struct symbol *sym;
309 for (sym = syms; sym; sym = sym->next) {
310 if ((sym->sect != NO_SECT) && (sym->sect == fileindex)) {
311 if ((int64_t)sym->value > offset)
312 break;
313 nearest = sym;
317 if (!nearest)
318 nasm_error(ERR_FATAL, "No section for index %x offset %llx found\n",
319 fileindex, (long long)offset);
321 return nearest;
325 * Special section numbers which are used to define Mach-O special
326 * symbols, which can be used with WRT to provide PIC relocation
327 * types.
329 static int32_t macho_gotpcrel_sect;
331 static void macho_init(void)
333 char zero = 0;
335 sects = NULL;
336 sectstail = &sects;
338 syms = NULL;
339 symstail = &syms;
340 nsyms = 0;
341 nlocalsym = 0;
342 nextdefsym = 0;
343 nundefsym = 0;
345 extsyms = raa_init();
346 strs = saa_init(1L);
348 /* string table starts with a zero byte - don't ask why */
349 saa_wbytes(strs, &zero, sizeof(char));
350 strslen = 1;
352 /* add special symbol for ..gotpcrel */
353 macho_gotpcrel_sect = seg_alloc();
354 macho_gotpcrel_sect++;
355 define_label("..gotpcrel", macho_gotpcrel_sect, 0L, NULL, false, false);
358 static void sect_write(struct section *sect,
359 const uint8_t *data, uint32_t len)
361 saa_wbytes(sect->data, data, len);
362 sect->size += len;
365 static int32_t add_reloc(struct section *sect, int32_t section,
366 int pcrel, int bytes, int64_t reloff)
368 struct reloc *r;
369 struct symbol *sym;
370 int32_t fi;
371 int32_t adjustment = 0;
373 /* NeXT as puts relocs in reversed order (address-wise) into the
374 ** files, so we do the same, doesn't seem to make much of a
375 ** difference either way */
376 r = nasm_malloc(sizeof(struct reloc));
377 r->next = sect->relocs;
378 sect->relocs = r;
380 /* the current end of the section will be the symbol's address for
381 ** now, might have to be fixed by macho_fixup_relocs() later on. make
382 ** sure we don't make the symbol scattered by setting the highest
383 ** bit by accident */
384 r->addr = sect->size & ~R_SCATTERED;
385 r->ext = 1;
386 r->pcrel = (pcrel ? 1 : 0);
388 /* match byte count 1, 2, 4, 8 to length codes 0, 1, 2, 3 respectively */
389 switch(bytes){
390 case 1:
391 r->length = 0;
392 break;
393 case 2:
394 r->length = 1;
395 break;
396 case 4:
397 r->length = 2;
398 break;
399 case 8:
400 r->length = 3;
401 break;
402 default:
403 break;
406 /* set default relocation values */
407 r->type = 0; // X86_64_RELOC_UNSIGNED
408 r->snum = R_ABS; // Absolute Symbol (indicates no relocation)
410 /* absolute relocation */
411 if (pcrel == 0) {
413 /* intra-section */
414 if (section == NO_SEG) {
415 // r->snum = R_ABS; // Set above
417 /* inter-section */
418 } else {
419 fi = get_section_fileindex_by_index(section);
421 /* external */
422 if (fi == NO_SECT) {
423 r->snum = raa_read(extsyms, section);
425 /* local */
426 } else {
427 sym = get_closest_section_symbol_by_offset(fi, reloff);
428 r->snum = sym->initial_snum;
429 adjustment = sym->value;
433 /* relative relocation */
434 } else if (pcrel == 1) {
436 /* intra-section */
437 if (section == NO_SEG) {
438 r->type = 1; // X86_64_RELOC_SIGNED
440 /* inter-section */
441 } else {
442 r->type = 1; // X86_64_RELOC_SIGNED
443 fi = get_section_fileindex_by_index(section);
445 /* external */
446 if (fi == NO_SECT) {
447 sect->extreloc = 1;
448 r->snum = raa_read(extsyms, section);
450 /* local */
451 } else {
452 sym = get_closest_section_symbol_by_offset(fi, reloff);
453 r->snum = sym->initial_snum;
454 adjustment = sym->value;
458 /* subtractor */
459 } else if (pcrel == 2) {
460 r->pcrel = 0;
461 r->type = 5; // X86_64_RELOC_SUBTRACTOR
463 /* gotpcrel */
464 } else if (pcrel == 3) {
465 r->type = 4; // X86_64_RELOC_GOT
466 r->snum = macho_gotpcrel_sect;
468 /* gotpcrel MOVQ load */
469 } else if (pcrel == 4) {
470 r->type = 3; // X86_64_RELOC_GOT_LOAD
471 r->snum = macho_gotpcrel_sect;
474 ++sect->nreloc;
476 return adjustment;
479 static void macho_output(int32_t secto, const void *data,
480 enum out_type type, uint64_t size,
481 int32_t section, int32_t wrt)
483 struct section *s, *sbss;
484 int64_t addr;
485 uint8_t mydata[16], *p, gotload;
487 if (secto == NO_SEG) {
488 if (type != OUT_RESERVE)
489 nasm_error(ERR_NONFATAL, "attempt to assemble code in "
490 "[ABSOLUTE] space");
492 return;
495 s = get_section_by_index(secto);
497 if (s == NULL) {
498 nasm_error(ERR_WARNING, "attempt to assemble code in"
499 " section %d: defaulting to `.text'", secto);
500 s = get_section_by_name("__TEXT", "__text");
502 /* should never happen */
503 if (s == NULL)
504 nasm_error(ERR_PANIC, "text section not found");
507 sbss = get_section_by_name("__DATA", "__bss");
509 if (s == sbss && type != OUT_RESERVE) {
510 nasm_error(ERR_WARNING, "attempt to initialize memory in the"
511 " BSS section: ignored");
512 s->size += realsize(type, size);
513 return;
516 memset(mydata, 0, sizeof(mydata));
518 switch (type) {
519 case OUT_RESERVE:
520 if (s != sbss) {
521 nasm_error(ERR_WARNING, "uninitialized space declared in"
522 " %s section: zeroing",
523 get_section_name_by_index(secto));
525 sect_write(s, NULL, size);
526 } else
527 s->size += size;
529 break;
531 case OUT_RAWDATA:
532 if (section != NO_SEG)
533 nasm_error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG");
535 sect_write(s, data, size);
536 break;
538 case OUT_ADDRESS:
540 int asize = abs((int)size);
542 addr = *(int64_t *)data;
543 if (section != NO_SEG) {
544 if (section % 2) {
545 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
546 " section base references");
547 } else {
548 if (wrt == NO_SEG) {
549 if (asize < 8) {
550 nasm_error(ERR_NONFATAL, "Mach-O 64-bit format does not support"
551 " 32-bit absolute addresses");
553 Seemingly, Mach-O's X86_64_RELOC_SUBTRACTOR would require
554 pre-determined knowledge of where the image base would be,
555 making it impractical for use in intermediate object files
557 } else {
558 addr -= add_reloc(s, section, 0, asize, addr); // X86_64_RELOC_UNSIGNED
560 } else {
561 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
562 " this use of WRT");
567 p = mydata;
568 WRITEADDR(p, addr, asize);
569 sect_write(s, mydata, asize);
570 break;
573 case OUT_REL2ADR:
574 p = mydata;
575 WRITESHORT(p, *(int64_t *)data);
577 if (section == secto)
578 nasm_error(ERR_PANIC, "intra-section OUT_REL2ADR");
580 if (section == NO_SEG) {
581 /* Do nothing */
582 } else if (section % 2) {
583 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
584 " section base references");
585 } else {
586 nasm_error(ERR_NONFATAL, "Unsupported non-32-bit"
587 " Macho-O relocation [2]");
590 sect_write(s, mydata, 2L);
591 break;
593 case OUT_REL4ADR:
594 p = mydata;
595 addr = *(int64_t *)data + 4 - size;
597 if (section == secto)
598 nasm_error(ERR_PANIC, "intra-section OUT_REL4ADR");
600 if (section != NO_SEG && section % 2) {
601 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
602 " section base references");
603 } else {
604 if (wrt == NO_SEG) {
605 addr -= add_reloc(s, section, 1, 4, addr); // X86_64_RELOC_SIGNED/BRANCH
606 } else if (wrt == macho_gotpcrel_sect) {
607 if (s->data->datalen > 1) {
608 saa_fread(s->data, s->data->datalen-2, &gotload, 1); // Retrieve Instruction Opcode
609 } else {
610 gotload = 0;
612 if (gotload == 0x8B) { // Check for MOVQ Opcode
613 addr -= add_reloc(s, section, 4, 4, addr); // X86_64_GOT_LOAD (MOVQ load)
614 } else {
615 addr -= add_reloc(s, section, 3, 4, addr); // X86_64_GOT
617 } else {
618 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
619 " this use of WRT");
620 wrt = NO_SEG; /* we can at least _try_ to continue */
624 WRITELONG(p, addr);
625 sect_write(s, mydata, 4L);
626 break;
628 default:
629 nasm_error(ERR_PANIC, "unknown output type?");
630 break;
634 static int32_t macho_section(char *name, int pass, int *bits)
636 int32_t index, originalIndex;
637 char *sectionAttributes;
638 struct sectmap *sm;
639 struct section *s;
641 (void)pass;
643 /* Default to 64 bits. */
644 if (!name) {
645 *bits = 64;
646 name = ".text";
647 sectionAttributes = NULL;
648 } else {
649 sectionAttributes = name;
650 name = nasm_strsep(&sectionAttributes, " \t");
653 for (sm = sectmap; sm->nasmsect != NULL; ++sm) {
654 /* make lookup into section name translation table */
655 if (!strcmp(name, sm->nasmsect)) {
656 char *currentAttribute;
658 /* try to find section with that name */
659 originalIndex = index = get_section_index_by_name(sm->segname,
660 sm->sectname);
662 /* create it if it doesn't exist yet */
663 if (index == -1) {
664 s = *sectstail = nasm_malloc(sizeof(struct section));
665 s->next = NULL;
666 sectstail = &s->next;
668 s->data = saa_init(1L);
669 s->index = seg_alloc();
670 s->relocs = NULL;
671 s->align = -1;
672 s->pad = -1;
673 s->offset = -1;
675 xstrncpy(s->segname, sm->segname);
676 xstrncpy(s->sectname, sm->sectname);
677 s->size = 0;
678 s->nreloc = 0;
679 s->flags = sm->flags;
681 index = s->index;
682 } else {
683 s = get_section_by_index(index);
686 while ((NULL != sectionAttributes)
687 && (currentAttribute = nasm_strsep(&sectionAttributes, " \t"))) {
688 if (0 != *currentAttribute) {
689 if (!nasm_strnicmp("align=", currentAttribute, 6)) {
690 char *end;
691 int newAlignment, value;
693 value = strtoul(currentAttribute + 6, (char**)&end, 0);
694 newAlignment = alignlog2_32(value);
696 if (0 != *end) {
697 nasm_error(ERR_PANIC,
698 "unknown or missing alignment value \"%s\" "
699 "specified for section \"%s\"",
700 currentAttribute + 6,
701 name);
702 return NO_SEG;
703 } else if (0 > newAlignment) {
704 nasm_error(ERR_PANIC,
705 "alignment of %d (for section \"%s\") is not "
706 "a power of two",
707 value,
708 name);
709 return NO_SEG;
712 if ((-1 != originalIndex)
713 && (s->align != newAlignment)
714 && (s->align != -1)) {
715 nasm_error(ERR_PANIC,
716 "section \"%s\" has already been specified "
717 "with alignment %d, conflicts with new "
718 "alignment of %d",
719 name,
720 (1 << s->align),
721 value);
722 return NO_SEG;
725 s->align = newAlignment;
726 } else if (!nasm_stricmp("data", currentAttribute)) {
727 /* Do nothing; 'data' is implicit */
728 } else {
729 nasm_error(ERR_PANIC,
730 "unknown section attribute %s for section %s",
731 currentAttribute,
732 name);
733 return NO_SEG;
738 return index;
742 nasm_error(ERR_PANIC, "invalid section name %s", name);
743 return NO_SEG;
746 static void macho_symdef(char *name, int32_t section, int64_t offset,
747 int is_global, char *special)
749 struct symbol *sym;
751 if (special) {
752 nasm_error(ERR_NONFATAL, "The Mach-O output format does "
753 "not support any special symbol types");
754 return;
757 if (is_global == 3) {
758 nasm_error(ERR_NONFATAL, "The Mach-O format does not "
759 "(yet) support forward reference fixups.");
760 return;
763 if (name[0] == '.' && name[1] == '.' && name[2] != '@') {
765 * This is a NASM special symbol. We never allow it into
766 * the Macho-O symbol table, even if it's a valid one. If it
767 * _isn't_ a valid one, we should barf immediately.
769 if (strcmp(name, "..gotpcrel"))
770 nasm_error(ERR_NONFATAL, "unrecognized special symbol `%s'", name);
771 return;
774 sym = *symstail = nasm_malloc(sizeof(struct symbol));
775 sym->next = NULL;
776 symstail = &sym->next;
778 sym->name = name;
779 sym->strx = strslen;
780 sym->type = 0;
781 sym->desc = 0;
782 sym->value = offset;
783 sym->initial_snum = -1;
785 /* external and common symbols get N_EXT */
786 if (is_global != 0) {
787 sym->type |= N_EXT;
790 if (section == NO_SEG) {
791 /* symbols in no section get absolute */
792 sym->type |= N_ABS;
793 sym->sect = NO_SECT;
794 } else {
795 sym->type |= N_SECT;
797 /* get the in-file index of the section the symbol was defined in */
798 sym->sect = get_section_fileindex_by_index(section);
800 /* track the initially allocated symbol number for use in future fix-ups */
801 sym->initial_snum = nsyms;
803 if (sym->sect == NO_SECT) {
805 /* remember symbol number of references to external
806 ** symbols, this works because every external symbol gets
807 ** its own section number allocated internally by nasm and
808 ** can so be used as a key */
809 extsyms = raa_write(extsyms, section, nsyms);
811 switch (is_global) {
812 case 1:
813 case 2:
814 /* there isn't actually a difference between global
815 ** and common symbols, both even have their size in
816 ** sym->value */
817 sym->type = N_EXT;
818 break;
820 default:
821 /* give an error on unfound section if it's not an
822 ** external or common symbol (assemble_file() does a
823 ** seg_alloc() on every call for them) */
824 nasm_error(ERR_PANIC, "in-file index for section %d not found",
825 section);
829 ++nsyms;
832 static void macho_sectalign(int32_t seg, unsigned int value)
834 struct section *s;
836 list_for_each(s, sects) {
837 if (s->index == seg)
838 break;
841 if (!s || !is_power2(value))
842 return;
844 value = alignlog2_32(value);
845 if (s->align < (int)value)
846 s->align = value;
849 static int32_t macho_segbase(int32_t section)
851 return section;
854 static void macho_filename(char *inname, char *outname)
856 standard_extension(inname, outname, ".o");
859 extern macros_t macho_stdmac[];
861 /* Comparison function for qsort symbol layout. */
862 static int layout_compare (const struct symbol **s1,
863 const struct symbol **s2)
865 return (strcmp ((*s1)->name, (*s2)->name));
868 /* The native assembler does a few things in a similar function
870 * Remove temporary labels
871 * Sort symbols according to local, external, undefined (by name)
872 * Order the string table
874 We do not remove temporary labels right now.
876 numsyms is the total number of symbols we have. strtabsize is the
877 number entries in the string table. */
879 static void macho_layout_symbols (uint32_t *numsyms,
880 uint32_t *strtabsize)
882 struct symbol *sym, **symp;
883 uint32_t i,j;
885 *numsyms = 0;
886 *strtabsize = sizeof (char);
888 symp = &syms;
890 while ((sym = *symp)) {
891 /* Undefined symbols are now external. */
892 if (sym->type == N_UNDF)
893 sym->type |= N_EXT;
895 if ((sym->type & N_EXT) == 0) {
896 sym->snum = *numsyms;
897 *numsyms = *numsyms + 1;
898 nlocalsym++;
900 else {
901 if ((sym->type & N_TYPE) != N_UNDF) {
902 nextdefsym++;
903 } else {
904 nundefsym++;
907 /* If we handle debug info we'll want
908 to check for it here instead of just
909 adding the symbol to the string table. */
910 sym->strx = *strtabsize;
911 saa_wbytes (strs, sym->name, (int32_t)(strlen(sym->name) + 1));
912 *strtabsize += strlen(sym->name) + 1;
914 symp = &(sym->next);
917 /* Next, sort the symbols. Most of this code is a direct translation from
918 the Apple cctools symbol layout. We need to keep compatibility with that. */
919 /* Set the indexes for symbol groups into the symbol table */
920 ilocalsym = 0;
921 iextdefsym = nlocalsym;
922 iundefsym = nlocalsym + nextdefsym;
924 /* allocate arrays for sorting externals by name */
925 extdefsyms = nasm_malloc(nextdefsym * sizeof(struct symbol *));
926 undefsyms = nasm_malloc(nundefsym * sizeof(struct symbol *));
928 i = 0;
929 j = 0;
931 symp = &syms;
933 while ((sym = *symp)) {
935 if((sym->type & N_EXT) == 0) {
936 sym->strx = *strtabsize;
937 saa_wbytes (strs, sym->name, (int32_t)(strlen (sym->name) + 1));
938 *strtabsize += strlen(sym->name) + 1;
940 else {
941 if((sym->type & N_TYPE) != N_UNDF) {
942 extdefsyms[i++] = sym;
943 } else {
944 undefsyms[j++] = sym;
947 symp = &(sym->next);
950 qsort(extdefsyms, nextdefsym, sizeof(struct symbol *),
951 (int (*)(const void *, const void *))layout_compare);
952 qsort(undefsyms, nundefsym, sizeof(struct symbol *),
953 (int (*)(const void *, const void *))layout_compare);
955 for(i = 0; i < nextdefsym; i++) {
956 extdefsyms[i]->snum = *numsyms;
957 *numsyms += 1;
959 for(j = 0; j < nundefsym; j++) {
960 undefsyms[j]->snum = *numsyms;
961 *numsyms += 1;
965 /* Calculate some values we'll need for writing later. */
967 static void macho_calculate_sizes (void)
969 struct section *s;
970 int fi;
972 /* count sections and calculate in-memory and in-file offsets */
973 for (s = sects; s != NULL; s = s->next) {
974 uint64_t newaddr;
976 /* recalculate segment address based on alignment and vm size */
977 s->addr = seg_vmsize;
979 /* we need section alignment to calculate final section address */
980 if (s->align == -1)
981 s->align = DEFAULT_SECTION_ALIGNMENT;
983 newaddr = ALIGN(s->addr, 1 << s->align);
984 s->addr = newaddr;
986 seg_vmsize = newaddr + s->size;
988 /* zerofill sections aren't actually written to the file */
989 if ((s->flags & SECTION_TYPE) != S_ZEROFILL) {
991 * LLVM/Xcode as always aligns the section data to 4
992 * bytes; there is a comment in the LLVM source code that
993 * perhaps aligning to pointer size would be better.
995 s->pad = ALIGN(seg_filesize, 4) - seg_filesize;
996 s->offset = seg_filesize + s->pad;
997 seg_filesize += s->size + s->pad;
1000 ++seg_nsects;
1003 /* calculate size of all headers, load commands and sections to
1004 ** get a pointer to the start of all the raw data */
1005 if (seg_nsects > 0) {
1006 ++head_ncmds;
1007 head_sizeofcmds +=
1008 MACHO_SEGCMD64_SIZE + seg_nsects * MACHO_SECTCMD64_SIZE;
1011 if (nsyms > 0) {
1012 ++head_ncmds;
1013 head_sizeofcmds += MACHO_SYMCMD_SIZE;
1016 /* Create a table of sections by file index to avoid linear search */
1017 sectstab = nasm_malloc((seg_nsects + 1) * sizeof(*sectstab));
1018 sectstab[0] = NULL;
1019 for (s = sects, fi = 1; s != NULL; s = s->next, fi++)
1020 sectstab[fi] = s;
1023 /* Write out the header information for the file. */
1025 static void macho_write_header (void)
1027 fwriteint32_t(MH_MAGIC_64, ofile); /* magic */
1028 fwriteint32_t(CPU_TYPE_X86_64, ofile); /* CPU type */
1029 fwriteint32_t(CPU_SUBTYPE_I386_ALL, ofile); /* CPU subtype */
1030 fwriteint32_t(MH_OBJECT, ofile); /* Mach-O file type */
1031 fwriteint32_t(head_ncmds, ofile); /* number of load commands */
1032 fwriteint32_t(head_sizeofcmds, ofile); /* size of load commands */
1033 fwriteint32_t(0, ofile); /* no flags */
1034 fwriteint32_t(0, ofile); /* reserved for future use */
1037 /* Write out the segment load command at offset. */
1039 static uint32_t macho_write_segment (uint64_t offset)
1041 uint64_t rel_base = alignint64_t (offset + seg_filesize);
1042 uint32_t s_reloff = 0;
1043 struct section *s;
1045 fwriteint32_t(LC_SEGMENT_64, ofile); /* cmd == LC_SEGMENT_64 */
1047 /* size of load command including section load commands */
1048 fwriteint32_t(MACHO_SEGCMD64_SIZE +
1049 seg_nsects * MACHO_SECTCMD64_SIZE,
1050 ofile);
1052 /* in an MH_OBJECT file all sections are in one unnamed (name
1053 ** all zeros) segment */
1054 fwritezero(16, ofile);
1055 fwriteint64_t(0, ofile); /* in-memory offset */
1056 fwriteint64_t(seg_vmsize, ofile); /* in-memory size */
1057 fwriteint64_t(offset, ofile); /* in-file offset to data */
1058 fwriteint64_t(seg_filesize, ofile); /* in-file size */
1059 fwriteint32_t(VM_PROT_DEFAULT, ofile); /* maximum vm protection */
1060 fwriteint32_t(VM_PROT_DEFAULT, ofile); /* initial vm protection */
1061 fwriteint32_t(seg_nsects, ofile); /* number of sections */
1062 fwriteint32_t(0, ofile); /* no flags */
1064 /* emit section headers */
1065 for (s = sects; s != NULL; s = s->next) {
1066 nasm_write(s->sectname, sizeof(s->sectname), ofile);
1067 nasm_write(s->segname, sizeof(s->segname), ofile);
1068 fwriteint64_t(s->addr, ofile);
1069 fwriteint64_t(s->size, ofile);
1071 /* dummy data for zerofill sections or proper values */
1072 if ((s->flags & SECTION_TYPE) != S_ZEROFILL) {
1073 nasm_assert(s->pad != (uint32_t)-1);
1074 offset += s->pad;
1075 fwriteint32_t(offset, ofile);
1076 offset += s->size;
1077 /* Write out section alignment, as a power of two.
1078 e.g. 32-bit word alignment would be 2 (2^2 = 4). */
1079 if (s->align == -1)
1080 s->align = DEFAULT_SECTION_ALIGNMENT;
1081 fwriteint32_t(s->align, ofile);
1082 /* To be compatible with cctools as we emit
1083 a zero reloff if we have no relocations. */
1084 fwriteint32_t(s->nreloc ? rel_base + s_reloff : 0, ofile);
1085 fwriteint32_t(s->nreloc, ofile);
1087 s_reloff += s->nreloc * MACHO_RELINFO64_SIZE;
1088 } else {
1089 fwriteint32_t(0, ofile);
1090 fwriteint32_t(0, ofile); /* No alignment?! */
1091 fwriteint32_t(0, ofile);
1092 fwriteint32_t(0, ofile);
1095 if (s->nreloc) {
1096 s->flags |= S_ATTR_LOC_RELOC;
1097 if (s->extreloc)
1098 s->flags |= S_ATTR_EXT_RELOC;
1101 fwriteint32_t(s->flags, ofile); /* flags */
1102 fwriteint32_t(0, ofile); /* reserved */
1103 fwriteint32_t(0, ofile); /* reserved */
1105 fwriteint32_t(0, ofile); /* align */
1108 rel_padcnt = rel_base - offset;
1109 offset = rel_base + s_reloff;
1111 return offset;
1114 /* For a given chain of relocs r, write out the entire relocation
1115 chain to the object file. */
1117 static void macho_write_relocs (struct reloc *r)
1119 while (r) {
1120 uint32_t word2;
1122 fwriteint32_t(r->addr, ofile); /* reloc offset */
1124 word2 = r->snum;
1125 word2 |= r->pcrel << 24;
1126 word2 |= r->length << 25;
1127 word2 |= r->ext << 27;
1128 word2 |= r->type << 28;
1129 fwriteint32_t(word2, ofile); /* reloc data */
1130 r = r->next;
1134 /* Write out the section data. */
1135 static void macho_write_section (void)
1137 struct section *s, *s2;
1138 struct reloc *r;
1139 uint8_t fi, *p, *q, blk[8];
1140 int32_t len;
1141 int64_t l;
1143 for (s = sects; s != NULL; s = s->next) {
1144 if ((s->flags & SECTION_TYPE) == S_ZEROFILL)
1145 continue;
1147 /* Like a.out Mach-O references things in the data or bss
1148 * sections by addresses which are actually relative to the
1149 * start of the _text_ section, in the _file_. See outaout.c
1150 * for more information. */
1151 saa_rewind(s->data);
1152 for (r = s->relocs; r != NULL; r = r->next) {
1153 len = (int32_t)r->length << 1;
1154 if(len > 4) len = 8;
1155 saa_fread(s->data, r->addr, blk, len);
1156 p = q = blk;
1157 l = *p++;
1159 /* get offset based on relocation type */
1160 if (r->length > 0) {
1161 l += ((int64_t)*p++) << 8;
1163 if (r->length > 1) {
1164 l += ((int64_t)*p++) << 16;
1165 l += ((int64_t)*p++) << 24;
1168 if (r->length > 2) {
1169 l += ((int64_t)*p++) << 32;
1170 l += ((int64_t)*p++) << 40;
1171 l += ((int64_t)*p++) << 48;
1172 l += ((int64_t)*p++) << 56;
1178 /* If the relocation is internal add to the current section
1179 offset. Otherwise the only value we need is the symbol
1180 offset which we already have. The linker takes care
1181 of the rest of the address. */
1182 if (!r->ext) {
1183 /* generate final address by section address and offset */
1184 for (s2 = sects, fi = 1;
1185 s2 != NULL; s2 = s2->next, fi++) {
1186 if (fi == r->snum) {
1187 l += s2->addr;
1188 break;
1193 /* write new offset back */
1194 if (r->length == 3)
1195 WRITEDLONG(q, l);
1196 else if (r->length == 2)
1197 WRITELONG(q, l);
1198 else if (r->length == 1)
1199 WRITESHORT(q, l);
1200 else
1201 *q++ = l & 0xFF;
1203 saa_fwrite(s->data, r->addr, blk, len);
1206 /* dump the section data to file */
1207 fwritezero(s->pad, ofile);
1208 saa_fpwrite(s->data, ofile);
1211 /* pad last section up to reloc entries on int64_t boundary */
1212 fwritezero(rel_padcnt, ofile);
1214 /* emit relocation entries */
1215 for (s = sects; s != NULL; s = s->next)
1216 macho_write_relocs (s->relocs);
1219 /* Write out the symbol table. We should already have sorted this
1220 before now. */
1221 static void macho_write_symtab (void)
1223 struct symbol *sym;
1224 uint64_t i;
1226 /* we don't need to pad here since MACHO_RELINFO_SIZE == 8 */
1228 for (sym = syms; sym != NULL; sym = sym->next) {
1229 if ((sym->type & N_EXT) == 0) {
1230 fwriteint32_t(sym->strx, ofile); /* string table entry number */
1231 nasm_write(&sym->type, 1, ofile); /* symbol type */
1232 nasm_write(&sym->sect, 1, ofile); /* section */
1233 fwriteint16_t(sym->desc, ofile); /* description */
1235 /* Fix up the symbol value now that we know the final section
1236 sizes. */
1237 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1238 nasm_assert(sym->sect <= seg_nsects);
1239 sym->value += sectstab[sym->sect]->addr;
1242 fwriteint64_t(sym->value, ofile); /* value (i.e. offset) */
1246 for (i = 0; i < nextdefsym; i++) {
1247 sym = extdefsyms[i];
1248 fwriteint32_t(sym->strx, ofile);
1249 nasm_write(&sym->type, 1, ofile); /* symbol type */
1250 nasm_write(&sym->sect, 1, ofile); /* section */
1251 fwriteint16_t(sym->desc, ofile); /* description */
1253 /* Fix up the symbol value now that we know the final section
1254 sizes. */
1255 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1256 nasm_assert(sym->sect <= seg_nsects);
1257 sym->value += sectstab[sym->sect]->addr;
1260 fwriteint64_t(sym->value, ofile); /* value (i.e. offset) */
1263 for (i = 0; i < nundefsym; i++) {
1264 sym = undefsyms[i];
1265 fwriteint32_t(sym->strx, ofile);
1266 nasm_write(&sym->type, 1, ofile); /* symbol type */
1267 nasm_write(&sym->sect, 1, ofile); /* section */
1268 fwriteint16_t(sym->desc, ofile); /* description */
1270 // Fix up the symbol value now that we know the final section sizes.
1271 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1272 nasm_assert(sym->sect <= seg_nsects);
1273 sym->value += sectstab[sym->sect]->addr;
1276 fwriteint64_t(sym->value, ofile); // value (i.e. offset)
1281 /* Fixup the snum in the relocation entries, we should be
1282 doing this only for externally referenced symbols. */
1283 static void macho_fixup_relocs (struct reloc *r)
1285 struct symbol *sym;
1287 while (r != NULL) {
1288 if (r->ext) {
1289 for (sym = syms; sym != NULL; sym = sym->next) {
1290 if (sym->initial_snum == r->snum) {
1291 r->snum = sym->snum;
1292 break;
1296 r = r->next;
1300 /* Write out the object file. */
1302 static void macho_write (void)
1304 uint64_t offset = 0;
1306 /* mach-o object file structure:
1308 ** mach header
1309 ** uint32_t magic
1310 ** int cpu type
1311 ** int cpu subtype
1312 ** uint32_t mach file type
1313 ** uint32_t number of load commands
1314 ** uint32_t size of all load commands
1315 ** (includes section struct size of segment command)
1316 ** uint32_t flags
1318 ** segment command
1319 ** uint32_t command type == LC_SEGMENT_64
1320 ** uint32_t size of load command
1321 ** (including section load commands)
1322 ** char[16] segment name
1323 ** uint64_t in-memory offset
1324 ** uint64_t in-memory size
1325 ** uint64_t in-file offset to data area
1326 ** uint64_t in-file size
1327 ** (in-memory size excluding zerofill sections)
1328 ** int maximum vm protection
1329 ** int initial vm protection
1330 ** uint32_t number of sections
1331 ** uint32_t flags
1333 ** section commands
1334 ** char[16] section name
1335 ** char[16] segment name
1336 ** uint64_t in-memory offset
1337 ** uint64_t in-memory size
1338 ** uint32_t in-file offset
1339 ** uint32_t alignment
1340 ** (irrelevant in MH_OBJECT)
1341 ** uint32_t in-file offset of relocation entires
1342 ** uint32_t number of relocations
1343 ** uint32_t flags
1344 ** uint32_t reserved
1345 ** uint32_t reserved
1347 ** symbol table command
1348 ** uint32_t command type == LC_SYMTAB
1349 ** uint32_t size of load command
1350 ** uint32_t symbol table offset
1351 ** uint32_t number of symbol table entries
1352 ** uint32_t string table offset
1353 ** uint32_t string table size
1355 ** raw section data
1357 ** padding to int64_t boundary
1359 ** relocation data (struct reloc)
1360 ** int32_t offset
1361 ** uint data (symbolnum, pcrel, length, extern, type)
1363 ** symbol table data (struct nlist)
1364 ** int32_t string table entry number
1365 ** uint8_t type
1366 ** (extern, absolute, defined in section)
1367 ** uint8_t section
1368 ** (0 for global symbols, section number of definition (>= 1, <=
1369 ** 254) for local symbols, size of variable for common symbols
1370 ** [type == extern])
1371 ** int16_t description
1372 ** (for stab debugging format)
1373 ** uint64_t value (i.e. file offset) of symbol or stab offset
1375 ** string table data
1376 ** list of null-terminated strings
1379 /* Emit the Mach-O header. */
1380 macho_write_header();
1382 offset = MACHO_HEADER64_SIZE + head_sizeofcmds;
1384 /* emit the segment load command */
1385 if (seg_nsects > 0)
1386 offset = macho_write_segment (offset);
1387 else
1388 nasm_error(ERR_WARNING, "no sections?");
1390 if (nsyms > 0) {
1391 /* write out symbol command */
1392 fwriteint32_t(LC_SYMTAB, ofile); /* cmd == LC_SYMTAB */
1393 fwriteint32_t(MACHO_SYMCMD_SIZE, ofile); /* size of load command */
1394 fwriteint32_t(offset, ofile); /* symbol table offset */
1395 fwriteint32_t(nsyms, ofile); /* number of symbol
1396 ** table entries */
1397 offset += nsyms * MACHO_NLIST64_SIZE;
1398 fwriteint32_t(offset, ofile); /* string table offset */
1399 fwriteint32_t(strslen, ofile); /* string table size */
1402 /* emit section data */
1403 if (seg_nsects > 0)
1404 macho_write_section ();
1406 /* emit symbol table if we have symbols */
1407 if (nsyms > 0)
1408 macho_write_symtab ();
1410 /* we don't need to pad here since MACHO_NLIST64_SIZE == 16 */
1412 /* emit string table */
1413 saa_fpwrite(strs, ofile);
1415 /* We do quite a bit here, starting with finalizing all of the data
1416 for the object file, writing, and then freeing all of the data from
1417 the file. */
1419 static void macho_cleanup(int debuginfo)
1421 struct section *s;
1422 struct reloc *r;
1423 struct symbol *sym;
1425 (void)debuginfo;
1427 /* Sort all symbols. */
1428 macho_layout_symbols (&nsyms, &strslen);
1430 /* Fixup relocation entries */
1431 for (s = sects; s != NULL; s = s->next) {
1432 macho_fixup_relocs (s->relocs);
1435 /* First calculate and finalize needed values. */
1436 macho_calculate_sizes();
1437 macho_write();
1439 /* free up everything */
1440 while (sects->next) {
1441 s = sects;
1442 sects = sects->next;
1444 saa_free(s->data);
1445 while (s->relocs != NULL) {
1446 r = s->relocs;
1447 s->relocs = s->relocs->next;
1448 nasm_free(r);
1451 nasm_free(s);
1454 saa_free(strs);
1455 raa_free(extsyms);
1457 while (syms) {
1458 sym = syms;
1459 syms = syms->next;
1460 nasm_free (sym);
1463 nasm_free(extdefsyms);
1464 nasm_free(undefsyms);
1465 nasm_free(sectstab);
1468 /* Debugging routines. */
1469 static void debug_reloc (struct reloc *r)
1471 fprintf (stdout, "reloc:\n");
1472 fprintf (stdout, "\taddr: %"PRId32"\n", r->addr);
1473 fprintf (stdout, "\tsnum: %d\n", r->snum);
1474 fprintf (stdout, "\tpcrel: %d\n", r->pcrel);
1475 fprintf (stdout, "\tlength: %d\n", r->length);
1476 fprintf (stdout, "\text: %d\n", r->ext);
1477 fprintf (stdout, "\ttype: %d\n", r->type);
1480 static void debug_section_relocs (struct section *s)
1482 struct reloc *r = s->relocs;
1484 fprintf (stdout, "relocs for section %s:\n\n", s->sectname);
1486 while (r != NULL) {
1487 debug_reloc (r);
1488 r = r->next;
1492 struct ofmt of_macho64 = {
1493 "NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (x86_64) object files",
1494 "macho64",
1497 null_debug_arr,
1498 &null_debug_form,
1499 macho_stdmac,
1500 macho_init,
1501 null_setinfo,
1502 macho_output,
1503 macho_symdef,
1504 macho_section,
1505 macho_sectalign,
1506 macho_segbase,
1507 null_directive,
1508 macho_filename,
1509 macho_cleanup
1512 #endif
1515 * Local Variables:
1516 * mode:c
1517 * c-basic-offset:4
1518 * End:
1520 * end of file */