outmacho: dwarf debug (2/4)
[nasm.git] / output / outmacho.c
blob75bcedf6764b8a430911b535ae88430110cd37ce
1 /* ----------------------------------------------------------------------- *
3 * Copyright 1996-2017 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
35 * outmacho.c output routines for the Netwide Assembler to produce
36 * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X object files
39 #include "compiler.h"
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <ctype.h>
46 #include "nasm.h"
47 #include "nasmlib.h"
48 #include "labels.h"
49 #include "error.h"
50 #include "saa.h"
51 #include "raa.h"
52 #include "rbtree.h"
53 #include "outform.h"
54 #include "outlib.h"
56 #if defined(OF_MACHO) || defined(OF_MACHO64)
58 /* Mach-O in-file header structure sizes */
59 #define MACHO_HEADER_SIZE 28
60 #define MACHO_SEGCMD_SIZE 56
61 #define MACHO_SECTCMD_SIZE 68
62 #define MACHO_SYMCMD_SIZE 24
63 #define MACHO_NLIST_SIZE 12
64 #define MACHO_RELINFO_SIZE 8
66 #define MACHO_HEADER64_SIZE 32
67 #define MACHO_SEGCMD64_SIZE 72
68 #define MACHO_SECTCMD64_SIZE 80
69 #define MACHO_NLIST64_SIZE 16
71 /* Mach-O file header values */
72 #define MH_MAGIC 0xfeedface
73 #define MH_MAGIC_64 0xfeedfacf
74 #define CPU_TYPE_I386 7 /* x86 platform */
75 #define CPU_TYPE_X86_64 0x01000007 /* x86-64 platform */
76 #define CPU_SUBTYPE_I386_ALL 3 /* all-x86 compatible */
77 #define MH_OBJECT 0x1 /* object file */
79 /* Mach-O header flags */
80 #define MH_SUBSECTIONS_VIA_SYMBOLS 0x2000
82 /* Mach-O load commands */
83 #define LC_SEGMENT 0x1 /* 32-bit segment load cmd */
84 #define LC_SEGMENT_64 0x19 /* 64-bit segment load cmd */
85 #define LC_SYMTAB 0x2 /* symbol table load command */
87 /* Mach-O relocations numbers */
89 /* Generic relocs, used by i386 Mach-O */
90 #define GENERIC_RELOC_VANILLA 0 /* Generic relocation */
91 #define GENERIC_RELOC_TLV 5 /* Thread local */
93 #define X86_64_RELOC_UNSIGNED 0 /* Absolute address */
94 #define X86_64_RELOC_SIGNED 1 /* Signed 32-bit disp */
95 #define X86_64_RELOC_BRANCH 2 /* CALL/JMP with 32-bit disp */
96 #define X86_64_RELOC_GOT_LOAD 3 /* MOVQ of GOT entry */
97 #define X86_64_RELOC_GOT 4 /* Different GOT entry */
98 #define X86_64_RELOC_SUBTRACTOR 5 /* Subtracting two symbols */
99 #define X86_64_RELOC_SIGNED_1 6 /* SIGNED with -1 addend */
100 #define X86_64_RELOC_SIGNED_2 7 /* SIGNED with -2 addend */
101 #define X86_64_RELOC_SIGNED_4 8 /* SIGNED with -4 addend */
102 #define X86_64_RELOC_TLV 9 /* Thread local */
104 /* Mach-O VM permission constants */
105 #define VM_PROT_NONE (0x00)
106 #define VM_PROT_READ (0x01)
107 #define VM_PROT_WRITE (0x02)
108 #define VM_PROT_EXECUTE (0x04)
110 #define VM_PROT_DEFAULT (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
111 #define VM_PROT_ALL (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
113 /* Our internal relocation types */
114 enum reltype {
115 RL_ABS, /* Absolute relocation */
116 RL_REL, /* Relative relocation */
117 RL_TLV, /* Thread local */
118 RL_BRANCH, /* Relative direct branch */
119 RL_SUB, /* X86_64_RELOC_SUBTRACT */
120 RL_GOT, /* X86_64_RELOC_GOT */
121 RL_GOTLOAD /* X86_64_RELOC_GOT_LOAD */
123 #define RL_MAX_32 RL_TLV
124 #define RL_MAX_64 RL_GOTLOAD
126 struct macho_fmt {
127 uint32_t ptrsize; /* Pointer size in bytes */
128 uint32_t mh_magic; /* Which magic number to use */
129 uint32_t cpu_type; /* Which CPU type */
130 uint32_t lc_segment; /* Which segment load command */
131 uint32_t header_size; /* Header size */
132 uint32_t segcmd_size; /* Segment command size */
133 uint32_t sectcmd_size; /* Section command size */
134 uint32_t nlist_size; /* Nlist (symbol) size */
135 enum reltype maxreltype; /* Maximum entry in enum reltype permitted */
136 uint32_t reloc_abs; /* Absolute relocation type */
137 uint32_t reloc_rel; /* Relative relocation type */
138 uint32_t reloc_tlv; /* Thread local relocation type */
141 static struct macho_fmt fmt;
143 static void fwriteptr(uint64_t data, FILE * fp)
145 fwriteaddr(data, fmt.ptrsize, fp);
148 struct section {
149 /* nasm internal data */
150 struct section *next;
151 struct SAA *data;
152 int32_t index;
153 int32_t fileindex;
154 struct reloc *relocs;
155 struct rbtree *gsyms; /* Global symbols in section */
156 int align;
157 bool by_name; /* This section was specified by full MachO name */
159 /* data that goes into the file */
160 char sectname[16]; /* what this section is called */
161 char segname[16]; /* segment this section will be in */
162 uint64_t addr; /* in-memory address (subject to alignment) */
163 uint64_t size; /* in-memory and -file size */
164 uint64_t offset; /* in-file offset */
165 uint32_t pad; /* padding bytes before section */
166 uint32_t nreloc; /* relocation entry count */
167 uint32_t flags; /* type and attributes (masked) */
168 uint32_t extreloc; /* external relocations */
171 #define SECTION_TYPE 0x000000ff /* section type mask */
173 #define S_REGULAR (0x0) /* standard section */
174 #define S_ZEROFILL (0x1) /* zerofill, in-memory only */
176 #define SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */
177 #define S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some
178 machine instructions */
179 #define S_ATTR_EXT_RELOC 0x00000200 /* section has external relocation entries */
180 #define S_ATTR_LOC_RELOC 0x00000100 /* section has local relocation entries */
181 #define S_ATTR_DEBUG 0x02000000
182 #define S_ATTR_SELF_MODIFYING_CODE 0x04000000
183 #define S_ATTR_LIVE_SUPPORT 0x08000000
184 #define S_ATTR_NO_DEAD_STRIP 0x10000000 /* no dead stripping */
185 #define S_ATTR_STRIP_STATIC_SYMS 0x20000000
186 #define S_ATTR_NO_TOC 0x40000000
187 #define S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section uses pure machine instructions */
189 #define S_NASM_TYPE_MASK 0x800004ff /* we consider these bits "section type" */
191 /* fake section for absolute symbols, *not* part of the section linked list */
192 static struct section absolute_sect;
194 struct reloc {
195 /* nasm internal data */
196 struct reloc *next;
198 /* data that goes into the file */
199 int32_t addr; /* op's offset in section */
200 uint32_t snum:24, /* contains symbol index if
201 ** ext otherwise in-file
202 ** section number */
203 pcrel:1, /* relative relocation */
204 length:2, /* 0=byte, 1=word, 2=int32_t, 3=int64_t */
205 ext:1, /* external symbol referenced */
206 type:4; /* reloc type */
209 #define R_ABS 0 /* absolute relocation */
210 #define R_SCATTERED 0x80000000 /* reloc entry is scattered if
211 ** highest bit == 1 */
213 struct symbol {
214 /* nasm internal data */
215 struct rbtree symv; /* Global symbol rbtree; "key" contains the
216 symbol offset. */
217 struct symbol *next; /* next symbol in the list */
218 char *name; /* name of this symbol */
219 int32_t initial_snum; /* symbol number used above in reloc */
220 int32_t snum; /* true snum for reloc */
222 /* data that goes into the file */
223 uint32_t strx; /* string table index */
224 uint8_t type; /* symbol type */
225 uint8_t sect; /* NO_SECT or section number */
226 uint16_t desc; /* for stab debugging, 0 for us */
229 /* symbol type bits */
230 #define N_EXT 0x01 /* global or external symbol */
232 #define N_UNDF 0x0 /* undefined symbol | n_sect == */
233 #define N_ABS 0x2 /* absolute symbol | NO_SECT */
234 #define N_SECT 0xe /* defined symbol, n_sect holds
235 ** section number */
237 #define N_TYPE 0x0e /* type bit mask */
239 #define DEFAULT_SECTION_ALIGNMENT 0 /* byte (i.e. no) alignment */
241 /* special section number values */
242 #define NO_SECT 0 /* no section, invalid */
243 #define MAX_SECT 255 /* maximum number of sections */
245 static struct section *sects, **sectstail, **sectstab;
246 static struct symbol *syms, **symstail;
247 static uint32_t nsyms;
249 /* These variables are set by macho_layout_symbols() to organize
250 the symbol table and string table in order the dynamic linker
251 expects. They are then used in macho_write() to put out the
252 symbols and strings in that order.
254 The order of the symbol table is:
255 local symbols
256 defined external symbols (sorted by name)
257 undefined external symbols (sorted by name)
259 The order of the string table is:
260 strings for external symbols
261 strings for local symbols
263 static uint32_t ilocalsym = 0;
264 static uint32_t iextdefsym = 0;
265 static uint32_t iundefsym = 0;
266 static uint32_t nlocalsym;
267 static uint32_t nextdefsym;
268 static uint32_t nundefsym;
269 static struct symbol **extdefsyms = NULL;
270 static struct symbol **undefsyms = NULL;
272 static struct RAA *extsyms;
273 static struct SAA *strs;
274 static uint32_t strslen;
276 /* Global file information. This should be cleaned up into either
277 a structure or as function arguments. */
278 static uint32_t head_ncmds = 0;
279 static uint32_t head_sizeofcmds = 0;
280 static uint32_t head_flags = 0;
281 static uint64_t seg_filesize = 0;
282 static uint64_t seg_vmsize = 0;
283 static uint32_t seg_nsects = 0;
284 static uint64_t rel_padcnt = 0;
286 #define xstrncpy(xdst, xsrc) \
287 memset(xdst, '\0', sizeof(xdst)); /* zero out whole buffer */ \
288 strncpy(xdst, xsrc, sizeof(xdst)); /* copy over string */ \
289 xdst[sizeof(xdst) - 1] = '\0'; /* proper null-termination */
291 #define alignint32_t(x) \
292 ALIGN(x, sizeof(int32_t)) /* align x to int32_t boundary */
294 #define alignint64_t(x) \
295 ALIGN(x, sizeof(int64_t)) /* align x to int64_t boundary */
297 #define alignptr(x) \
298 ALIGN(x, fmt.ptrsize) /* align x to output format width */
300 static struct section *get_section_by_name(const char *segname,
301 const char *sectname)
303 struct section *s;
305 for (s = sects; s != NULL; s = s->next)
306 if (!strcmp(s->segname, segname) && !strcmp(s->sectname, sectname))
307 break;
309 return s;
312 static struct section *get_section_by_index(const int32_t index)
314 struct section *s;
316 for (s = sects; s != NULL; s = s->next)
317 if (index == s->index)
318 break;
320 return s;
323 struct file_list {
324 struct file_list *next;
325 struct file_list *last;
326 char *file_name;
327 uint32_t file;
330 struct dw_sect_list {
331 struct SAA *psaa;
332 int32_t section;
333 uint32_t line;
334 uint64_t offset;
335 uint32_t file;
336 struct dw_sect_list *next;
337 struct dw_sect_list *last;
340 struct section_info {
341 uint64_t size;
342 int32_t secto;
345 static struct file_list *dw_head_list = 0, *dw_cur_list = 0, *dw_last_list = 0;
346 static struct dw_sect_list *dw_head_sect = 0, *dw_cur_sect = 0, *dw_last_sect = 0;
347 static uint32_t cur_line = 0, dw_num_files = 0, dw_num_sects = 0;
348 static bool dbg_immcall = false;
351 * Special section numbers which are used to define Mach-O special
352 * symbols, which can be used with WRT to provide PIC relocation
353 * types.
355 static int32_t macho_tlvp_sect;
356 static int32_t macho_gotpcrel_sect;
358 static void macho_init(void)
360 sects = NULL;
361 sectstail = &sects;
363 /* Fake section for absolute symbols */
364 absolute_sect.index = NO_SEG;
366 syms = NULL;
367 symstail = &syms;
368 nsyms = 0;
369 nlocalsym = 0;
370 nextdefsym = 0;
371 nundefsym = 0;
373 extsyms = raa_init();
374 strs = saa_init(1L);
376 /* string table starts with a zero byte so index 0 is an empty string */
377 saa_wbytes(strs, zero_buffer, 1);
378 strslen = 1;
380 /* add special symbol for TLVP */
381 macho_tlvp_sect = seg_alloc() + 1;
382 define_label("..tlvp", macho_tlvp_sect, 0L, NULL, false, false);
386 static void sect_write(struct section *sect,
387 const uint8_t *data, uint32_t len)
389 saa_wbytes(sect->data, data, len);
390 sect->size += len;
394 * Find a suitable global symbol for a ..gotpcrel or ..tlvp reference
396 static struct symbol *macho_find_gsym(struct section *s,
397 uint64_t offset, bool exact)
399 struct rbtree *srb;
401 srb = rb_search(s->gsyms, offset);
403 if (!srb || (exact && srb->key != offset)) {
404 nasm_error(ERR_NONFATAL, "unable to find a suitable %s symbol"
405 " for this reference",
406 s == &absolute_sect ? "absolute" : "global");
407 return NULL;
410 return container_of(srb, struct symbol, symv);
413 static int64_t add_reloc(struct section *sect, int32_t section,
414 int64_t offset,
415 enum reltype reltype, int bytes)
417 struct reloc *r;
418 struct section *s;
419 int32_t fi;
420 int64_t adjust;
422 /* Double check this is a valid relocation type for this platform */
423 nasm_assert(reltype <= fmt.maxreltype);
425 /* the current end of the section will be the symbol's address for
426 ** now, might have to be fixed by macho_fixup_relocs() later on. make
427 ** sure we don't make the symbol scattered by setting the highest
428 ** bit by accident */
429 r = nasm_malloc(sizeof(struct reloc));
430 r->addr = sect->size & ~R_SCATTERED;
431 r->ext = 1;
432 adjust = bytes;
434 /* match byte count 1, 2, 4, 8 to length codes 0, 1, 2, 3 respectively */
435 r->length = ilog2_32(bytes);
437 /* set default relocation values */
438 r->type = fmt.reloc_abs;
439 r->pcrel = 0;
440 r->snum = R_ABS;
442 s = NULL;
443 if (section != NO_SEG)
444 s = get_section_by_index(section);
445 fi = s ? s->fileindex : NO_SECT;
447 /* absolute relocation */
448 switch (reltype) {
449 case RL_ABS:
450 if (section == NO_SEG) {
451 /* absolute (can this even happen?) */
452 r->ext = 0;
453 r->snum = R_ABS;
454 } else if (fi == NO_SECT) {
455 /* external */
456 r->snum = raa_read(extsyms, section);
457 } else {
458 /* local */
459 r->ext = 0;
460 r->snum = fi;
461 adjust = -sect->size;
463 break;
465 case RL_REL:
466 case RL_BRANCH:
467 r->type = fmt.reloc_rel;
468 r->pcrel = 1;
469 if (section == NO_SEG) {
470 /* absolute - seems to produce garbage no matter what */
471 nasm_error(ERR_NONFATAL, "Mach-O does not support relative "
472 "references to absolute addresses");
473 goto bail;
474 #if 0
475 /* This "seems" to be how it ought to work... */
477 struct symbol *sym = macho_find_gsym(&absolute_sect,
478 offset, false);
479 if (!sym)
480 goto bail;
482 sect->extreloc = 1;
483 r->snum = NO_SECT;
484 adjust = -sect->size;
485 #endif
486 } else if (fi == NO_SECT) {
487 /* external */
488 sect->extreloc = 1;
489 r->snum = raa_read(extsyms, section);
490 if (reltype == RL_BRANCH)
491 r->type = X86_64_RELOC_BRANCH;
492 else if (r->type == GENERIC_RELOC_VANILLA)
493 adjust = -sect->size;
494 } else {
495 /* local */
496 r->ext = 0;
497 r->snum = fi;
498 adjust = -sect->size;
500 break;
502 case RL_SUB:
503 r->pcrel = 0;
504 r->type = X86_64_RELOC_SUBTRACTOR;
505 break;
507 case RL_GOT:
508 r->type = X86_64_RELOC_GOT;
509 goto needsym;
511 case RL_GOTLOAD:
512 r->type = X86_64_RELOC_GOT_LOAD;
513 goto needsym;
515 case RL_TLV:
516 r->type = fmt.reloc_tlv;
517 goto needsym;
519 needsym:
520 r->pcrel = 1;
521 if (section == NO_SEG) {
522 nasm_error(ERR_NONFATAL, "Unsupported use of use of WRT");
523 } else if (fi == NO_SECT) {
524 /* external */
525 r->snum = raa_read(extsyms, section);
526 } else {
527 /* internal */
528 struct symbol *sym = macho_find_gsym(s, offset, reltype != RL_TLV);
529 if (!sym)
530 goto bail;
531 r->snum = sym->initial_snum;
533 break;
536 /* NeXT as puts relocs in reversed order (address-wise) into the
537 ** files, so we do the same, doesn't seem to make much of a
538 ** difference either way */
539 r->next = sect->relocs;
540 sect->relocs = r;
541 if (r->ext)
542 sect->extreloc = 1;
543 ++sect->nreloc;
545 return adjust;
547 bail:
548 nasm_free(r);
549 return 0;
552 static void macho_output(int32_t secto, const void *data,
553 enum out_type type, uint64_t size,
554 int32_t section, int32_t wrt)
556 struct section *s;
557 int64_t addr, offset;
558 uint8_t mydata[16], *p;
559 bool is_bss;
560 enum reltype reltype;
562 if (secto == NO_SEG) {
563 if (type != OUT_RESERVE)
564 nasm_error(ERR_NONFATAL, "attempt to assemble code in "
565 "[ABSOLUTE] space");
566 return;
569 s = get_section_by_index(secto);
571 if (s == NULL) {
572 nasm_error(ERR_WARNING, "attempt to assemble code in"
573 " section %d: defaulting to `.text'", secto);
574 s = get_section_by_name("__TEXT", "__text");
576 /* should never happen */
577 if (s == NULL)
578 nasm_panic(0, "text section not found");
581 /* debug code generation only for sections tagged with
582 * instruction attribute */
583 if (s->flags & S_ATTR_SOME_INSTRUCTIONS)
585 struct section_info sinfo;
586 sinfo.size = s->size;
587 sinfo.secto = secto;
588 dfmt->debug_output(0, &sinfo);
591 is_bss = (s->flags & SECTION_TYPE) == S_ZEROFILL;
593 if (is_bss && type != OUT_RESERVE) {
594 nasm_error(ERR_WARNING, "attempt to initialize memory in "
595 "BSS section: ignored");
596 s->size += realsize(type, size);
597 return;
600 memset(mydata, 0, sizeof(mydata));
602 switch (type) {
603 case OUT_RESERVE:
604 if (!is_bss) {
605 nasm_error(ERR_WARNING, "uninitialized space declared in"
606 " %s,%s section: zeroing", s->segname, s->sectname);
608 sect_write(s, NULL, size);
609 } else
610 s->size += size;
612 break;
614 case OUT_RAWDATA:
615 if (section != NO_SEG)
616 nasm_panic(0, "OUT_RAWDATA with other than NO_SEG");
618 sect_write(s, data, size);
619 break;
621 case OUT_ADDRESS:
623 int asize = abs((int)size);
625 addr = *(int64_t *)data;
626 if (section != NO_SEG) {
627 if (section % 2) {
628 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
629 " section base references");
630 } else if (wrt == NO_SEG) {
631 if (fmt.ptrsize == 8 && asize != 8) {
632 nasm_error(ERR_NONFATAL,
633 "Mach-O 64-bit format does not support"
634 " 32-bit absolute addresses");
635 } else {
636 add_reloc(s, section, addr, RL_ABS, asize);
638 } else {
639 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
640 " this use of WRT");
644 p = mydata;
645 WRITEADDR(p, addr, asize);
646 sect_write(s, mydata, asize);
647 break;
650 case OUT_REL2ADR:
651 nasm_assert(section != secto);
653 p = mydata;
654 offset = *(int64_t *)data;
655 addr = offset - size;
657 if (section != NO_SEG && section % 2) {
658 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
659 " section base references");
660 } else if (fmt.ptrsize == 8) {
661 nasm_error(ERR_NONFATAL, "Unsupported non-32-bit"
662 " Macho-O relocation [2]");
663 } else if (wrt != NO_SEG) {
664 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
665 " this use of WRT");
666 wrt = NO_SEG; /* we can at least _try_ to continue */
667 } else {
668 addr += add_reloc(s, section, addr+size, RL_REL, 2);
671 WRITESHORT(p, addr);
672 sect_write(s, mydata, 2);
673 break;
675 case OUT_REL4ADR:
676 nasm_assert(section != secto);
678 p = mydata;
679 offset = *(int64_t *)data;
680 addr = offset - size;
681 reltype = RL_REL;
683 if (section != NO_SEG && section % 2) {
684 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
685 " section base references");
686 } else if (wrt == NO_SEG) {
687 if (fmt.ptrsize == 8 &&
688 (s->flags & S_ATTR_SOME_INSTRUCTIONS)) {
689 uint8_t opcode[2];
691 opcode[0] = opcode[1] = 0;
693 /* HACK: Retrieve instruction opcode */
694 if (likely(s->data->datalen >= 2)) {
695 saa_fread(s->data, s->data->datalen-2, opcode, 2);
696 } else if (s->data->datalen == 1) {
697 saa_fread(s->data, 0, opcode+1, 1);
700 if ((opcode[0] != 0x0f && (opcode[1] & 0xfe) == 0xe8) ||
701 (opcode[0] == 0x0f && (opcode[1] & 0xf0) == 0x80)) {
702 /* Direct call, jmp, or jcc */
703 reltype = RL_BRANCH;
706 } else if (wrt == macho_gotpcrel_sect) {
707 reltype = RL_GOT;
709 if ((s->flags & S_ATTR_SOME_INSTRUCTIONS) &&
710 s->data->datalen >= 3) {
711 uint8_t gotload[3];
713 /* HACK: Retrieve instruction opcode */
714 saa_fread(s->data, s->data->datalen-3, gotload, 3);
715 if ((gotload[0] & 0xf8) == 0x48 &&
716 gotload[1] == 0x8b &&
717 (gotload[2] & 0307) == 0005) {
718 /* movq <reg>,[rel sym wrt ..gotpcrel] */
719 reltype = RL_GOTLOAD;
722 } else if (wrt == macho_tlvp_sect) {
723 reltype = RL_TLV;
724 } else {
725 nasm_error(ERR_NONFATAL, "Mach-O format does not support"
726 " this use of WRT");
727 /* continue with RL_REL */
730 addr += add_reloc(s, section, offset, reltype, 4);
731 WRITELONG(p, addr);
732 sect_write(s, mydata, 4);
733 break;
735 default:
736 nasm_error(ERR_NONFATAL, "Unrepresentable relocation in Mach-O");
737 break;
741 /* Translation table from traditional Unix section names to Mach-O */
742 static const struct sectmap {
743 const char *nasmsect;
744 const char *segname;
745 const char *sectname;
746 const uint32_t flags;
747 } sectmap[] = {
748 {".text", "__TEXT", "__text",
749 S_REGULAR|S_ATTR_SOME_INSTRUCTIONS|S_ATTR_PURE_INSTRUCTIONS},
750 {".data", "__DATA", "__data", S_REGULAR},
751 {".rodata", "__DATA", "__const", S_REGULAR},
752 {".bss", "__DATA", "__bss", S_ZEROFILL},
753 {NULL, NULL, NULL, 0}
756 #define NO_TYPE S_NASM_TYPE_MASK
758 /* Section type or attribute directives */
759 static const struct sect_attribs {
760 const char *name;
761 uint32_t flags;
762 } sect_attribs[] = {
763 { "data", S_REGULAR },
764 { "code", S_REGULAR|S_ATTR_SOME_INSTRUCTIONS|S_ATTR_PURE_INSTRUCTIONS },
765 { "mixed", S_REGULAR|S_ATTR_SOME_INSTRUCTIONS },
766 { "bss", S_ZEROFILL },
767 { "zerofill", S_ZEROFILL },
768 { "no_dead_strip", NO_TYPE|S_ATTR_NO_DEAD_STRIP },
769 { "live_support", NO_TYPE|S_ATTR_LIVE_SUPPORT },
770 { "strip_static_syms", NO_TYPE|S_ATTR_STRIP_STATIC_SYMS },
771 { NULL, 0 }
774 static int32_t macho_section(char *name, int pass, int *bits)
776 char *sectionAttributes;
777 const struct sectmap *sm;
778 struct section *s;
779 const char *section, *segment;
780 uint32_t flags;
781 const struct sect_attribs *sa;
782 char *currentAttribute;
783 char *comma;
785 bool new_seg;
787 (void)pass;
789 /* Default to the appropriate number of bits. */
790 if (!name) {
791 *bits = fmt.ptrsize << 3;
792 name = ".text";
793 sectionAttributes = NULL;
794 } else {
795 sectionAttributes = name;
796 name = nasm_strsep(&sectionAttributes, " \t");
799 section = segment = NULL;
800 flags = 0;
802 comma = strchr(name, ',');
803 if (comma) {
804 int len;
806 *comma = '\0';
807 segment = name;
808 section = comma+1;
810 len = strlen(segment);
811 if (len == 0) {
812 nasm_error(ERR_NONFATAL, "empty segment name\n");
813 } else if (len >= 16) {
814 nasm_error(ERR_NONFATAL, "segment name %s too long\n", segment);
817 len = strlen(section);
818 if (len == 0) {
819 nasm_error(ERR_NONFATAL, "empty section name\n");
820 } else if (len >= 16) {
821 nasm_error(ERR_NONFATAL, "section name %s too long\n", section);
824 if (!strcmp(section, "__text")) {
825 flags = S_REGULAR | S_ATTR_SOME_INSTRUCTIONS |
826 S_ATTR_PURE_INSTRUCTIONS;
827 } else if (!strcmp(section, "__bss")) {
828 flags = S_ZEROFILL;
829 } else {
830 flags = S_REGULAR;
832 } else {
833 for (sm = sectmap; sm->nasmsect != NULL; ++sm) {
834 /* make lookup into section name translation table */
835 if (!strcmp(name, sm->nasmsect)) {
836 segment = sm->segname;
837 section = sm->sectname;
838 flags = sm->flags;
839 goto found;
842 nasm_error(ERR_NONFATAL, "unknown section name\n");
843 return NO_SEG;
846 found:
847 /* try to find section with that name */
848 s = get_section_by_name(segment, section);
850 /* create it if it doesn't exist yet */
851 if (!s) {
852 new_seg = true;
854 s = *sectstail = nasm_zalloc(sizeof(struct section));
855 sectstail = &s->next;
857 s->data = saa_init(1L);
858 s->index = seg_alloc();
859 s->fileindex = ++seg_nsects;
860 s->align = -1;
861 s->pad = -1;
862 s->offset = -1;
863 s->by_name = false;
865 xstrncpy(s->segname, segment);
866 xstrncpy(s->sectname, section);
867 s->size = 0;
868 s->nreloc = 0;
869 s->flags = flags;
870 } else {
871 new_seg = false;
874 if (comma)
875 *comma = ','; /* Restore comma */
877 s->by_name = s->by_name || comma; /* Was specified by name */
879 flags = NO_TYPE;
881 while (sectionAttributes &&
882 (currentAttribute = nasm_strsep(&sectionAttributes, " \t"))) {
883 if (!*currentAttribute)
884 continue;
886 if (!nasm_strnicmp("align=", currentAttribute, 6)) {
887 char *end;
888 int newAlignment, value;
890 value = strtoul(currentAttribute + 6, (char**)&end, 0);
891 newAlignment = alignlog2_32(value);
893 if (0 != *end) {
894 nasm_error(ERR_NONFATAL,
895 "unknown or missing alignment value \"%s\" "
896 "specified for section \"%s\"",
897 currentAttribute + 6,
898 name);
899 } else if (0 > newAlignment) {
900 nasm_error(ERR_NONFATAL,
901 "alignment of %d (for section \"%s\") is not "
902 "a power of two",
903 value,
904 name);
907 if (s->align < newAlignment)
908 s->align = newAlignment;
909 } else {
910 for (sa = sect_attribs; sa->name; sa++) {
911 if (!nasm_stricmp(sa->name, currentAttribute)) {
912 if ((sa->flags & S_NASM_TYPE_MASK) != NO_TYPE) {
913 flags = (flags & ~S_NASM_TYPE_MASK)
914 | (sa->flags & S_NASM_TYPE_MASK);
916 flags |= sa->flags & ~S_NASM_TYPE_MASK;
917 break;
921 if (!sa->name) {
922 nasm_error(ERR_NONFATAL,
923 "unknown section attribute %s for section %s",
924 currentAttribute, name);
929 if ((flags & S_NASM_TYPE_MASK) != NO_TYPE) {
930 if (!new_seg && ((s->flags ^ flags) & S_NASM_TYPE_MASK)) {
931 nasm_error(ERR_NONFATAL,
932 "inconsistent section attributes for section %s\n",
933 name);
934 } else {
935 s->flags = (s->flags & ~S_NASM_TYPE_MASK) | flags;
937 } else {
938 s->flags |= flags & ~S_NASM_TYPE_MASK;
941 return s->index;
944 static void macho_symdef(char *name, int32_t section, int64_t offset,
945 int is_global, char *special)
947 struct symbol *sym;
949 if (special) {
950 nasm_error(ERR_NONFATAL, "The Mach-O output format does "
951 "not support any special symbol types");
952 return;
955 if (is_global == 3) {
956 nasm_error(ERR_NONFATAL, "The Mach-O format does not "
957 "(yet) support forward reference fixups.");
958 return;
961 if (name[0] == '.' && name[1] == '.' && name[2] != '@') {
963 * This is a NASM special symbol. We never allow it into
964 * the Macho-O symbol table, even if it's a valid one. If it
965 * _isn't_ a valid one, we should barf immediately.
967 if (strcmp(name, "..gotpcrel") && strcmp(name, "..tlvp"))
968 nasm_error(ERR_NONFATAL, "unrecognized special symbol `%s'", name);
969 return;
972 sym = *symstail = nasm_zalloc(sizeof(struct symbol));
973 sym->next = NULL;
974 symstail = &sym->next;
976 sym->name = name;
977 sym->strx = strslen;
978 sym->type = 0;
979 sym->desc = 0;
980 sym->symv.key = offset;
981 sym->initial_snum = -1;
983 /* external and common symbols get N_EXT */
984 if (is_global != 0) {
985 sym->type |= N_EXT;
988 if (section == NO_SEG) {
989 /* symbols in no section get absolute */
990 sym->type |= N_ABS;
991 sym->sect = NO_SECT;
993 /* all absolute symbols are available to use as references */
994 absolute_sect.gsyms = rb_insert(absolute_sect.gsyms, &sym->symv);
995 } else {
996 struct section *s = get_section_by_index(section);
998 sym->type |= N_SECT;
1000 /* get the in-file index of the section the symbol was defined in */
1001 sym->sect = s ? s->fileindex : NO_SECT;
1003 /* track the initially allocated symbol number for use in future fix-ups */
1004 sym->initial_snum = nsyms;
1006 if (!s) {
1007 /* remember symbol number of references to external
1008 ** symbols, this works because every external symbol gets
1009 ** its own section number allocated internally by nasm and
1010 ** can so be used as a key */
1011 extsyms = raa_write(extsyms, section, nsyms);
1013 switch (is_global) {
1014 case 1:
1015 case 2:
1016 /* there isn't actually a difference between global
1017 ** and common symbols, both even have their size in
1018 ** sym->symv.key */
1019 sym->type = N_EXT;
1020 break;
1022 default:
1023 /* give an error on unfound section if it's not an
1024 ** external or common symbol (assemble_file() does a
1025 ** seg_alloc() on every call for them) */
1026 nasm_panic(0, "in-file index for section %d not found, is_global = %d", section, is_global);
1027 break;
1029 } else if (is_global) {
1030 s->gsyms = rb_insert(s->gsyms, &sym->symv);
1033 ++nsyms;
1036 static void macho_sectalign(int32_t seg, unsigned int value)
1038 struct section *s;
1039 int align;
1041 nasm_assert(!(seg & 1));
1043 s = get_section_by_index(seg);
1045 if (!s || !is_power2(value))
1046 return;
1048 align = alignlog2_32(value);
1049 if (s->align < align)
1050 s->align = align;
1053 static int32_t macho_segbase(int32_t section)
1055 return section;
1058 static void macho_filename(char *inname, char *outname)
1060 standard_extension(inname, outname, ".o");
1063 extern macros_t macho_stdmac[];
1065 /* Comparison function for qsort symbol layout. */
1066 static int layout_compare (const struct symbol **s1,
1067 const struct symbol **s2)
1069 return (strcmp ((*s1)->name, (*s2)->name));
1072 /* The native assembler does a few things in a similar function
1074 * Remove temporary labels
1075 * Sort symbols according to local, external, undefined (by name)
1076 * Order the string table
1078 We do not remove temporary labels right now.
1080 numsyms is the total number of symbols we have. strtabsize is the
1081 number entries in the string table. */
1083 static void macho_layout_symbols (uint32_t *numsyms,
1084 uint32_t *strtabsize)
1086 struct symbol *sym, **symp;
1087 uint32_t i,j;
1089 *numsyms = 0;
1090 *strtabsize = sizeof (char);
1092 symp = &syms;
1094 while ((sym = *symp)) {
1095 /* Undefined symbols are now external. */
1096 if (sym->type == N_UNDF)
1097 sym->type |= N_EXT;
1099 if ((sym->type & N_EXT) == 0) {
1100 sym->snum = *numsyms;
1101 *numsyms = *numsyms + 1;
1102 nlocalsym++;
1104 else {
1105 if ((sym->type & N_TYPE) != N_UNDF) {
1106 nextdefsym++;
1107 } else {
1108 nundefsym++;
1111 /* If we handle debug info we'll want
1112 to check for it here instead of just
1113 adding the symbol to the string table. */
1114 sym->strx = *strtabsize;
1115 saa_wbytes (strs, sym->name, (int32_t)(strlen(sym->name) + 1));
1116 *strtabsize += strlen(sym->name) + 1;
1118 symp = &(sym->next);
1121 /* Next, sort the symbols. Most of this code is a direct translation from
1122 the Apple cctools symbol layout. We need to keep compatibility with that. */
1123 /* Set the indexes for symbol groups into the symbol table */
1124 ilocalsym = 0;
1125 iextdefsym = nlocalsym;
1126 iundefsym = nlocalsym + nextdefsym;
1128 /* allocate arrays for sorting externals by name */
1129 extdefsyms = nasm_malloc(nextdefsym * sizeof(struct symbol *));
1130 undefsyms = nasm_malloc(nundefsym * sizeof(struct symbol *));
1132 i = 0;
1133 j = 0;
1135 symp = &syms;
1137 while ((sym = *symp)) {
1139 if((sym->type & N_EXT) == 0) {
1140 sym->strx = *strtabsize;
1141 saa_wbytes (strs, sym->name, (int32_t)(strlen (sym->name) + 1));
1142 *strtabsize += strlen(sym->name) + 1;
1144 else {
1145 if ((sym->type & N_TYPE) != N_UNDF) {
1146 extdefsyms[i++] = sym;
1147 } else {
1148 undefsyms[j++] = sym;
1151 symp = &(sym->next);
1154 qsort(extdefsyms, nextdefsym, sizeof(struct symbol *),
1155 (int (*)(const void *, const void *))layout_compare);
1156 qsort(undefsyms, nundefsym, sizeof(struct symbol *),
1157 (int (*)(const void *, const void *))layout_compare);
1159 for(i = 0; i < nextdefsym; i++) {
1160 extdefsyms[i]->snum = *numsyms;
1161 *numsyms += 1;
1163 for(j = 0; j < nundefsym; j++) {
1164 undefsyms[j]->snum = *numsyms;
1165 *numsyms += 1;
1169 /* Calculate some values we'll need for writing later. */
1171 static void macho_calculate_sizes (void)
1173 struct section *s;
1174 int fi;
1176 /* count sections and calculate in-memory and in-file offsets */
1177 for (s = sects; s != NULL; s = s->next) {
1178 uint64_t newaddr;
1180 /* recalculate segment address based on alignment and vm size */
1181 s->addr = seg_vmsize;
1183 /* we need section alignment to calculate final section address */
1184 if (s->align == -1)
1185 s->align = DEFAULT_SECTION_ALIGNMENT;
1187 newaddr = ALIGN(s->addr, UINT64_C(1) << s->align);
1188 s->addr = newaddr;
1190 seg_vmsize = newaddr + s->size;
1192 /* zerofill sections aren't actually written to the file */
1193 if ((s->flags & SECTION_TYPE) != S_ZEROFILL) {
1195 * LLVM/Xcode as always aligns the section data to 4
1196 * bytes; there is a comment in the LLVM source code that
1197 * perhaps aligning to pointer size would be better.
1199 s->pad = ALIGN(seg_filesize, 4) - seg_filesize;
1200 s->offset = seg_filesize + s->pad;
1201 seg_filesize += s->size + s->pad;
1203 /* filesize and vmsize needs to be aligned */
1204 seg_vmsize += s->pad;
1209 /* calculate size of all headers, load commands and sections to
1210 ** get a pointer to the start of all the raw data */
1211 if (seg_nsects > 0) {
1212 ++head_ncmds;
1213 head_sizeofcmds += fmt.segcmd_size + seg_nsects * fmt.sectcmd_size;
1216 if (nsyms > 0) {
1217 ++head_ncmds;
1218 head_sizeofcmds += MACHO_SYMCMD_SIZE;
1221 if (seg_nsects > MAX_SECT) {
1222 nasm_fatal(0, "MachO output is limited to %d sections\n",
1223 MAX_SECT);
1226 /* Create a table of sections by file index to avoid linear search */
1227 sectstab = nasm_malloc((seg_nsects + 1) * sizeof(*sectstab));
1228 sectstab[NO_SECT] = &absolute_sect;
1229 for (s = sects, fi = 1; s != NULL; s = s->next, fi++)
1230 sectstab[fi] = s;
1233 /* Write out the header information for the file. */
1235 static void macho_write_header (void)
1237 fwriteint32_t(fmt.mh_magic, ofile); /* magic */
1238 fwriteint32_t(fmt.cpu_type, ofile); /* CPU type */
1239 fwriteint32_t(CPU_SUBTYPE_I386_ALL, ofile); /* CPU subtype */
1240 fwriteint32_t(MH_OBJECT, ofile); /* Mach-O file type */
1241 fwriteint32_t(head_ncmds, ofile); /* number of load commands */
1242 fwriteint32_t(head_sizeofcmds, ofile); /* size of load commands */
1243 fwriteint32_t(head_flags, ofile); /* flags, if any */
1244 fwritezero(fmt.header_size - 7*4, ofile); /* reserved fields */
1247 /* Write out the segment load command at offset. */
1249 static uint32_t macho_write_segment (uint64_t offset)
1251 uint64_t rel_base = alignptr(offset + seg_filesize);
1252 uint32_t s_reloff = 0;
1253 struct section *s;
1255 fwriteint32_t(fmt.lc_segment, ofile); /* cmd == LC_SEGMENT_64 */
1257 /* size of load command including section load commands */
1258 fwriteint32_t(fmt.segcmd_size + seg_nsects * fmt.sectcmd_size,
1259 ofile);
1261 /* in an MH_OBJECT file all sections are in one unnamed (name
1262 ** all zeros) segment */
1263 fwritezero(16, ofile);
1264 fwriteptr(0, ofile); /* in-memory offset */
1265 fwriteptr(seg_vmsize, ofile); /* in-memory size */
1266 fwriteptr(offset, ofile); /* in-file offset to data */
1267 fwriteptr(seg_filesize, ofile); /* in-file size */
1268 fwriteint32_t(VM_PROT_DEFAULT, ofile); /* maximum vm protection */
1269 fwriteint32_t(VM_PROT_DEFAULT, ofile); /* initial vm protection */
1270 fwriteint32_t(seg_nsects, ofile); /* number of sections */
1271 fwriteint32_t(0, ofile); /* no flags */
1273 /* emit section headers */
1274 for (s = sects; s != NULL; s = s->next) {
1275 if (s->nreloc) {
1276 nasm_assert((s->flags & SECTION_TYPE) != S_ZEROFILL);
1277 s->flags |= S_ATTR_LOC_RELOC;
1278 if (s->extreloc)
1279 s->flags |= S_ATTR_EXT_RELOC;
1280 } else if (!strcmp(s->segname, "__DATA") &&
1281 !strcmp(s->sectname, "__const") &&
1282 !s->by_name &&
1283 !get_section_by_name("__TEXT", "__const")) {
1285 * The MachO equivalent to .rodata can be either
1286 * __DATA,__const or __TEXT,__const; the latter only if
1287 * there are no relocations. However, when mixed it is
1288 * better to specify the segments explicitly.
1290 xstrncpy(s->segname, "__TEXT");
1293 nasm_write(s->sectname, sizeof(s->sectname), ofile);
1294 nasm_write(s->segname, sizeof(s->segname), ofile);
1295 fwriteptr(s->addr, ofile);
1296 fwriteptr(s->size, ofile);
1298 /* dummy data for zerofill sections or proper values */
1299 if ((s->flags & SECTION_TYPE) != S_ZEROFILL) {
1300 nasm_assert(s->pad != (uint32_t)-1);
1301 offset += s->pad;
1302 fwriteint32_t(offset, ofile);
1303 offset += s->size;
1304 /* Write out section alignment, as a power of two.
1305 e.g. 32-bit word alignment would be 2 (2^2 = 4). */
1306 fwriteint32_t(s->align, ofile);
1307 /* To be compatible with cctools as we emit
1308 a zero reloff if we have no relocations. */
1309 fwriteint32_t(s->nreloc ? rel_base + s_reloff : 0, ofile);
1310 fwriteint32_t(s->nreloc, ofile);
1312 s_reloff += s->nreloc * MACHO_RELINFO_SIZE;
1313 } else {
1314 fwriteint32_t(0, ofile);
1315 fwriteint32_t(s->align, ofile);
1316 fwriteint32_t(0, ofile);
1317 fwriteint32_t(0, ofile);
1320 fwriteint32_t(s->flags, ofile); /* flags */
1321 fwriteint32_t(0, ofile); /* reserved */
1322 fwriteptr(0, ofile); /* reserved */
1325 rel_padcnt = rel_base - offset;
1326 offset = rel_base + s_reloff;
1328 return offset;
1331 /* For a given chain of relocs r, write out the entire relocation
1332 chain to the object file. */
1334 static void macho_write_relocs (struct reloc *r)
1336 while (r) {
1337 uint32_t word2;
1339 fwriteint32_t(r->addr, ofile); /* reloc offset */
1341 word2 = r->snum;
1342 word2 |= r->pcrel << 24;
1343 word2 |= r->length << 25;
1344 word2 |= r->ext << 27;
1345 word2 |= r->type << 28;
1346 fwriteint32_t(word2, ofile); /* reloc data */
1347 r = r->next;
1351 /* Write out the section data. */
1352 static void macho_write_section (void)
1354 struct section *s;
1355 struct reloc *r;
1356 uint8_t *p;
1357 int32_t len;
1358 int64_t l;
1359 union offset {
1360 uint64_t val;
1361 uint8_t buf[8];
1362 } blk;
1364 for (s = sects; s != NULL; s = s->next) {
1365 if ((s->flags & SECTION_TYPE) == S_ZEROFILL)
1366 continue;
1368 /* Like a.out Mach-O references things in the data or bss
1369 * sections by addresses which are actually relative to the
1370 * start of the _text_ section, in the _file_. See outaout.c
1371 * for more information. */
1372 saa_rewind(s->data);
1373 for (r = s->relocs; r != NULL; r = r->next) {
1374 len = (uint32_t)1 << r->length;
1375 if (len > 4) /* Can this ever be an issue?! */
1376 len = 8;
1377 blk.val = 0;
1378 saa_fread(s->data, r->addr, blk.buf, len);
1380 /* get offset based on relocation type */
1381 #ifdef WORDS_LITTLEENDIAN
1382 l = blk.val;
1383 #else
1384 l = blk.buf[0];
1385 l += ((int64_t)blk.buf[1]) << 8;
1386 l += ((int64_t)blk.buf[2]) << 16;
1387 l += ((int64_t)blk.buf[3]) << 24;
1388 l += ((int64_t)blk.buf[4]) << 32;
1389 l += ((int64_t)blk.buf[5]) << 40;
1390 l += ((int64_t)blk.buf[6]) << 48;
1391 l += ((int64_t)blk.buf[7]) << 56;
1392 #endif
1394 /* If the relocation is internal add to the current section
1395 offset. Otherwise the only value we need is the symbol
1396 offset which we already have. The linker takes care
1397 of the rest of the address. */
1398 if (!r->ext) {
1399 /* generate final address by section address and offset */
1400 nasm_assert(r->snum <= seg_nsects);
1401 l += sectstab[r->snum]->addr;
1402 if (r->pcrel)
1403 l -= s->addr;
1404 } else if (r->pcrel && r->type == GENERIC_RELOC_VANILLA) {
1405 l -= s->addr;
1408 /* write new offset back */
1409 p = blk.buf;
1410 WRITEDLONG(p, l);
1411 saa_fwrite(s->data, r->addr, blk.buf, len);
1414 /* dump the section data to file */
1415 fwritezero(s->pad, ofile);
1416 saa_fpwrite(s->data, ofile);
1419 /* pad last section up to reloc entries on pointer boundary */
1420 fwritezero(rel_padcnt, ofile);
1422 /* emit relocation entries */
1423 for (s = sects; s != NULL; s = s->next)
1424 macho_write_relocs (s->relocs);
1427 /* Write out the symbol table. We should already have sorted this
1428 before now. */
1429 static void macho_write_symtab (void)
1431 struct symbol *sym;
1432 uint64_t i;
1434 /* we don't need to pad here since MACHO_RELINFO_SIZE == 8 */
1436 for (sym = syms; sym != NULL; sym = sym->next) {
1437 if ((sym->type & N_EXT) == 0) {
1438 fwriteint32_t(sym->strx, ofile); /* string table entry number */
1439 nasm_write(&sym->type, 1, ofile); /* symbol type */
1440 nasm_write(&sym->sect, 1, ofile); /* section */
1441 fwriteint16_t(sym->desc, ofile); /* description */
1443 /* Fix up the symbol value now that we know the final section
1444 sizes. */
1445 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1446 nasm_assert(sym->sect <= seg_nsects);
1447 sym->symv.key += sectstab[sym->sect]->addr;
1450 fwriteptr(sym->symv.key, ofile); /* value (i.e. offset) */
1454 for (i = 0; i < nextdefsym; i++) {
1455 sym = extdefsyms[i];
1456 fwriteint32_t(sym->strx, ofile);
1457 nasm_write(&sym->type, 1, ofile); /* symbol type */
1458 nasm_write(&sym->sect, 1, ofile); /* section */
1459 fwriteint16_t(sym->desc, ofile); /* description */
1461 /* Fix up the symbol value now that we know the final section
1462 sizes. */
1463 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1464 nasm_assert(sym->sect <= seg_nsects);
1465 sym->symv.key += sectstab[sym->sect]->addr;
1468 fwriteptr(sym->symv.key, ofile); /* value (i.e. offset) */
1471 for (i = 0; i < nundefsym; i++) {
1472 sym = undefsyms[i];
1473 fwriteint32_t(sym->strx, ofile);
1474 nasm_write(&sym->type, 1, ofile); /* symbol type */
1475 nasm_write(&sym->sect, 1, ofile); /* section */
1476 fwriteint16_t(sym->desc, ofile); /* description */
1478 /* Fix up the symbol value now that we know the final section
1479 sizes. */
1480 if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
1481 nasm_assert(sym->sect <= seg_nsects);
1482 sym->symv.key += sectstab[sym->sect]->addr;
1485 fwriteptr(sym->symv.key, ofile); /* value (i.e. offset) */
1490 /* Fixup the snum in the relocation entries, we should be
1491 doing this only for externally referenced symbols. */
1492 static void macho_fixup_relocs (struct reloc *r)
1494 struct symbol *sym;
1496 while (r != NULL) {
1497 if (r->ext) {
1498 for (sym = syms; sym != NULL; sym = sym->next) {
1499 if (sym->initial_snum == r->snum) {
1500 r->snum = sym->snum;
1501 break;
1505 r = r->next;
1509 /* Write out the object file. */
1511 static void macho_write (void)
1513 uint64_t offset = 0;
1515 /* mach-o object file structure:
1517 ** mach header
1518 ** uint32_t magic
1519 ** int cpu type
1520 ** int cpu subtype
1521 ** uint32_t mach file type
1522 ** uint32_t number of load commands
1523 ** uint32_t size of all load commands
1524 ** (includes section struct size of segment command)
1525 ** uint32_t flags
1527 ** segment command
1528 ** uint32_t command type == LC_SEGMENT[_64]
1529 ** uint32_t size of load command
1530 ** (including section load commands)
1531 ** char[16] segment name
1532 ** pointer in-memory offset
1533 ** pointer in-memory size
1534 ** pointer in-file offset to data area
1535 ** pointer in-file size
1536 ** (in-memory size excluding zerofill sections)
1537 ** int maximum vm protection
1538 ** int initial vm protection
1539 ** uint32_t number of sections
1540 ** uint32_t flags
1542 ** section commands
1543 ** char[16] section name
1544 ** char[16] segment name
1545 ** pointer in-memory offset
1546 ** pointer in-memory size
1547 ** uint32_t in-file offset
1548 ** uint32_t alignment
1549 ** (irrelevant in MH_OBJECT)
1550 ** uint32_t in-file offset of relocation entires
1551 ** uint32_t number of relocations
1552 ** uint32_t flags
1553 ** uint32_t reserved
1554 ** uint32_t reserved
1556 ** symbol table command
1557 ** uint32_t command type == LC_SYMTAB
1558 ** uint32_t size of load command
1559 ** uint32_t symbol table offset
1560 ** uint32_t number of symbol table entries
1561 ** uint32_t string table offset
1562 ** uint32_t string table size
1564 ** raw section data
1566 ** padding to pointer boundary
1568 ** relocation data (struct reloc)
1569 ** int32_t offset
1570 ** uint data (symbolnum, pcrel, length, extern, type)
1572 ** symbol table data (struct nlist)
1573 ** int32_t string table entry number
1574 ** uint8_t type
1575 ** (extern, absolute, defined in section)
1576 ** uint8_t section
1577 ** (0 for global symbols, section number of definition (>= 1, <=
1578 ** 254) for local symbols, size of variable for common symbols
1579 ** [type == extern])
1580 ** int16_t description
1581 ** (for stab debugging format)
1582 ** pointer value (i.e. file offset) of symbol or stab offset
1584 ** string table data
1585 ** list of null-terminated strings
1588 /* Emit the Mach-O header. */
1589 macho_write_header();
1591 offset = fmt.header_size + head_sizeofcmds;
1593 /* emit the segment load command */
1594 if (seg_nsects > 0)
1595 offset = macho_write_segment (offset);
1596 else
1597 nasm_error(ERR_WARNING, "no sections?");
1599 if (nsyms > 0) {
1600 /* write out symbol command */
1601 fwriteint32_t(LC_SYMTAB, ofile); /* cmd == LC_SYMTAB */
1602 fwriteint32_t(MACHO_SYMCMD_SIZE, ofile); /* size of load command */
1603 fwriteint32_t(offset, ofile); /* symbol table offset */
1604 fwriteint32_t(nsyms, ofile); /* number of symbol
1605 ** table entries */
1606 offset += nsyms * fmt.nlist_size;
1607 fwriteint32_t(offset, ofile); /* string table offset */
1608 fwriteint32_t(strslen, ofile); /* string table size */
1611 /* emit section data */
1612 if (seg_nsects > 0)
1613 macho_write_section ();
1615 /* emit symbol table if we have symbols */
1616 if (nsyms > 0)
1617 macho_write_symtab ();
1619 /* we don't need to pad here, we are already aligned */
1621 /* emit string table */
1622 saa_fpwrite(strs, ofile);
1624 /* We do quite a bit here, starting with finalizing all of the data
1625 for the object file, writing, and then freeing all of the data from
1626 the file. */
1628 static void macho_cleanup(void)
1630 struct section *s;
1631 struct reloc *r;
1632 struct symbol *sym;
1634 dfmt->cleanup();
1636 /* Sort all symbols. */
1637 macho_layout_symbols (&nsyms, &strslen);
1639 /* Fixup relocation entries */
1640 for (s = sects; s != NULL; s = s->next) {
1641 macho_fixup_relocs (s->relocs);
1644 /* First calculate and finalize needed values. */
1645 macho_calculate_sizes();
1646 macho_write();
1648 /* free up everything */
1649 while (sects->next) {
1650 s = sects;
1651 sects = sects->next;
1653 saa_free(s->data);
1654 while (s->relocs != NULL) {
1655 r = s->relocs;
1656 s->relocs = s->relocs->next;
1657 nasm_free(r);
1660 nasm_free(s);
1663 saa_free(strs);
1664 raa_free(extsyms);
1666 while (syms) {
1667 sym = syms;
1668 syms = syms->next;
1669 nasm_free (sym);
1672 nasm_free(extdefsyms);
1673 nasm_free(undefsyms);
1674 nasm_free(sectstab);
1677 static bool macho_set_section_attribute_by_symbol(const char *label, uint32_t flags)
1679 struct section *s;
1680 int32_t nasm_seg;
1681 int64_t offset;
1683 if (!lookup_label(label, &nasm_seg, &offset)) {
1684 nasm_error(ERR_NONFATAL, "unknown symbol `%s' in no_dead_strip", label);
1685 return false;
1688 s = get_section_by_index(nasm_seg);
1689 if (!s) {
1690 nasm_error(ERR_NONFATAL, "symbol `%s' is external or absolute", label);
1691 return false;
1694 s->flags |= flags;
1695 return true;
1699 * Mark a symbol for no dead stripping
1701 static enum directive_result macho_no_dead_strip(const char *labels)
1703 char *s, *p, *ep;
1704 char ec;
1705 enum directive_result rv = DIRR_ERROR;
1706 bool real = passn > 1;
1708 p = s = nasm_strdup(labels);
1709 while (*p) {
1710 ep = nasm_skip_identifier(p);
1711 if (!ep) {
1712 nasm_error(ERR_NONFATAL, "invalid symbol in NO_DEAD_STRIP");
1713 goto err;
1715 ec = *ep;
1716 if (ec && ec != ',' && !nasm_isspace(ec)) {
1717 nasm_error(ERR_NONFATAL, "cannot parse contents after symbol");
1718 goto err;
1720 *ep = '\0';
1721 if (real) {
1722 if (!macho_set_section_attribute_by_symbol(p, S_ATTR_NO_DEAD_STRIP))
1723 rv = DIRR_ERROR;
1725 *ep = ec;
1726 p = nasm_skip_spaces(ep);
1727 if (*p == ',')
1728 p = nasm_skip_spaces(++p);
1731 rv = DIRR_OK;
1733 err:
1734 nasm_free(s);
1735 return rv;
1739 * Mach-O pragmas
1741 static enum directive_result
1742 macho_pragma(const struct pragma *pragma)
1744 bool real = passn > 1;
1746 switch (pragma->opcode) {
1747 case D_SUBSECTIONS_VIA_SYMBOLS:
1748 if (*pragma->tail)
1749 return DIRR_BADPARAM;
1751 if (real)
1752 head_flags |= MH_SUBSECTIONS_VIA_SYMBOLS;
1754 return DIRR_OK;
1756 case D_NO_DEAD_STRIP:
1757 return macho_no_dead_strip(pragma->tail);
1759 default:
1760 return DIRR_UNKNOWN; /* Not a Mach-O directive */
1764 static const struct pragma_facility macho_pragma_list[] = {
1765 { "macho", macho_pragma },
1766 { NULL, macho_pragma } /* Implements macho32/macho64 namespaces */
1769 static void macho_dbg_init(void)
1773 static void macho_dbg_linenum(const char *file_name, int32_t line_num, int32_t segto)
1775 bool need_new_list = true;
1776 (void)segto;
1778 if(!dw_cur_list || strcmp(file_name, dw_cur_list->file_name)) {
1779 if(dw_head_list) {
1780 struct file_list *match = dw_head_list;
1781 uint32_t idx = 0;
1783 for (; idx < dw_num_files; idx++ ) {
1784 if(!(strcmp(file_name, match->file_name))) {
1785 dw_cur_list = match;
1786 need_new_list = false;
1787 break;
1789 match = match->next;
1793 if(need_new_list) {
1794 nasm_new(dw_cur_list);
1795 dw_cur_list->file = ++dw_num_files;
1796 dw_cur_list->file_name = (char*)file_name;
1798 if(!dw_head_list) {
1799 dw_head_list = dw_last_list = dw_cur_list;
1800 } else {
1801 dw_last_list->next = dw_cur_list;
1802 dw_last_list = dw_cur_list;
1807 dbg_immcall = true;
1808 cur_line = line_num;
1811 static void macho_dbg_output(int type, void *param)
1813 struct section_info *sinfo_param = (struct section_info *)param;
1814 int32_t secto = sinfo_param->secto;
1815 bool need_new_sect = false;
1816 (void)type;
1818 if(!(dw_cur_sect && (dw_cur_sect->section == secto))) {
1819 need_new_sect = true;
1820 if(dw_head_sect) {
1821 struct dw_sect_list *match = dw_head_sect;
1822 uint32_t idx = 0;
1824 for(; idx < dw_num_sects; idx++) {
1825 if(match->section == secto) {
1826 dw_cur_sect = match;
1827 need_new_sect = false;
1828 break;
1830 match = match->next;
1835 if(need_new_sect) {
1836 nasm_new(dw_cur_sect);
1837 dw_num_sects ++;
1838 dw_cur_sect->line = dw_cur_sect->file = 1;
1839 dw_cur_sect->offset = 0;
1840 dw_cur_sect->next = NULL;
1841 dw_cur_sect->section = secto;
1843 if(!dw_head_sect) {
1844 dw_head_sect = dw_last_sect = dw_cur_sect;
1845 } else {
1846 dw_last_sect->next = dw_cur_sect;
1847 dw_last_sect = dw_cur_sect;
1851 if(dbg_immcall == true) {
1852 int32_t line_delta = cur_line - dw_cur_sect->line;
1853 uint32_t cur_file = dw_cur_list->file;
1855 if(cur_file != dw_cur_sect->file) {
1856 dw_cur_sect->file = cur_file;
1859 if(line_delta) {
1860 dw_cur_sect->line = cur_line;
1861 dw_cur_sect->offset = sinfo_param->size;
1864 dbg_immcall = false;
1868 static void macho_dbg_cleanup(void)
1871 struct dw_sect_list *p_sect = dw_head_sect;
1872 struct file_list *p_file = dw_head_list;
1873 uint32_t idx = 0;
1875 for(; idx < dw_num_sects; idx++) {
1876 struct dw_sect_list *next = p_sect->next;
1877 nasm_free(p_sect);
1878 p_sect = next;
1881 for(idx = 0; idx < dw_num_files; idx++) {
1882 struct file_list *next = p_file->next;
1883 nasm_free(p_file);
1884 p_file = next;
1889 #ifdef OF_MACHO32
1890 static const struct macho_fmt macho32_fmt = {
1892 MH_MAGIC,
1893 CPU_TYPE_I386,
1894 LC_SEGMENT,
1895 MACHO_HEADER_SIZE,
1896 MACHO_SEGCMD_SIZE,
1897 MACHO_SECTCMD_SIZE,
1898 MACHO_NLIST_SIZE,
1899 RL_MAX_32,
1900 GENERIC_RELOC_VANILLA,
1901 GENERIC_RELOC_VANILLA,
1902 GENERIC_RELOC_TLV
1905 static void macho32_init(void)
1907 fmt = macho32_fmt;
1908 macho_init();
1910 macho_gotpcrel_sect = NO_SEG;
1913 static const struct dfmt macho32_df_dwarf = {
1914 "MachO32 (i386) dwarf debug format for Darwin/MacOS",
1915 "dwarf",
1916 macho_dbg_init,
1917 macho_dbg_linenum,
1918 null_debug_deflabel,
1919 null_debug_directive,
1920 null_debug_typevalue,
1921 macho_dbg_output,
1922 macho_dbg_cleanup,
1923 NULL /*pragma list*/
1926 static const struct dfmt * const macho32_df_arr[2] =
1927 { &macho32_df_dwarf, NULL };
1929 const struct ofmt of_macho32 = {
1930 "NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (i386) object files",
1931 "macho32",
1934 macho32_df_arr,
1935 &macho32_df_dwarf,
1936 macho_stdmac,
1937 macho32_init,
1938 nasm_do_legacy_output,
1939 macho_output,
1940 macho_symdef,
1941 macho_section,
1942 macho_sectalign,
1943 macho_segbase,
1944 null_directive,
1945 macho_filename,
1946 macho_cleanup,
1947 macho_pragma_list,
1949 #endif
1951 #ifdef OF_MACHO64
1952 static const struct macho_fmt macho64_fmt = {
1954 MH_MAGIC_64,
1955 CPU_TYPE_X86_64,
1956 LC_SEGMENT_64,
1957 MACHO_HEADER64_SIZE,
1958 MACHO_SEGCMD64_SIZE,
1959 MACHO_SECTCMD64_SIZE,
1960 MACHO_NLIST64_SIZE,
1961 RL_MAX_64,
1962 X86_64_RELOC_UNSIGNED,
1963 X86_64_RELOC_SIGNED,
1964 X86_64_RELOC_TLV
1967 static void macho64_init(void)
1969 fmt = macho64_fmt;
1970 macho_init();
1972 /* add special symbol for ..gotpcrel */
1973 macho_gotpcrel_sect = seg_alloc() + 1;
1974 define_label("..gotpcrel", macho_gotpcrel_sect, 0L, NULL, false, false);
1977 static const struct dfmt macho64_df_dwarf = {
1978 "MachO64 (x86-64) dwarf debug format for Darwin/MacOS",
1979 "dwarf",
1980 macho_dbg_init,
1981 macho_dbg_linenum,
1982 null_debug_deflabel,
1983 null_debug_directive,
1984 null_debug_typevalue,
1985 macho_dbg_output,
1986 macho_dbg_cleanup,
1987 NULL /*pragma list*/
1990 static const struct dfmt * const macho64_df_arr[2] =
1991 { &macho64_df_dwarf, NULL };
1993 const struct ofmt of_macho64 = {
1994 "NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (x86_64) object files",
1995 "macho64",
1998 macho64_df_arr,
1999 &macho64_df_dwarf,
2000 macho_stdmac,
2001 macho64_init,
2002 nasm_do_legacy_output,
2003 macho_output,
2004 macho_symdef,
2005 macho_section,
2006 macho_sectalign,
2007 macho_segbase,
2008 null_directive,
2009 macho_filename,
2010 macho_cleanup,
2011 macho_pragma_list,
2013 #endif
2015 #endif
2018 * Local Variables:
2019 * mode:c
2020 * c-basic-offset:4
2021 * End:
2023 * end of file */