Fix bitfield long types for stabs/dwarf
[tinycc.git] / tccmacho.c
blob8e86b75f487f0533f122547a7c0a08a7c5e3cb5c
1 /*
2 * Mach-O file handling for TCC
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 #include "tcc.h"
20 /* In order to make life easy for us we are generating Mach-O files which
21 don't make use of some modern features, but which aren't entirely classic
22 either in that they do use some modern features. We're also only
23 generating 64bit Mach-O files, and only native endian at that.
25 In particular we're generating executables that don't make use of
26 DYLD_INFO for dynamic linking info, as that requires us building a
27 trie of exported names. We're simply using classic symbol tables which
28 are still supported by modern dyld.
30 But we do use LC_MAIN, which is a "modern" feature in order to not have
31 to setup our own crt code. We're not using lazy linking, so even function
32 calls are resolved at startup. */
34 #define DEBUG_MACHO 0
35 #define dprintf if (DEBUG_MACHO) printf
37 struct fat_header {
38 uint32_t magic; /* FAT_MAGIC or FAT_MAGIC_64 */
39 uint32_t nfat_arch; /* number of structs that follow */
42 struct fat_arch {
43 int cputype; /* cpu specifier (int) */
44 int cpusubtype; /* machine specifier (int) */
45 uint32_t offset; /* file offset to this object file */
46 uint32_t size; /* size of this object file */
47 uint32_t align; /* alignment as a power of 2 */
50 #define FAT_MAGIC 0xcafebabe
51 #define FAT_CIGAM 0xbebafeca
52 #define FAT_MAGIC_64 0xcafebabf
53 #define FAT_CIGAM_64 0xbfbafeca
55 struct mach_header {
56 uint32_t magic; /* mach magic number identifier */
57 int cputype; /* cpu specifier */
58 int cpusubtype; /* machine specifier */
59 uint32_t filetype; /* type of file */
60 uint32_t ncmds; /* number of load commands */
61 uint32_t sizeofcmds; /* the size of all the load commands */
62 uint32_t flags; /* flags */
65 struct mach_header_64 {
66 struct mach_header mh;
67 uint32_t reserved; /* reserved, pad to 64bit */
70 /* Constant for the magic field of the mach_header (32-bit architectures) */
71 #define MH_MAGIC 0xfeedface /* the mach magic number */
72 #define MH_CIGAM 0xcefaedfe /* NXSwapInt(MH_MAGIC) */
73 #define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */
74 #define MH_CIGAM_64 0xcffaedfe /* NXSwapInt(MH_MAGIC_64) */
76 struct load_command {
77 uint32_t cmd; /* type of load command */
78 uint32_t cmdsize; /* total size of command in bytes */
81 #define LC_REQ_DYLD 0x80000000
82 #define LC_SYMTAB 0x2
83 #define LC_DYSYMTAB 0xb
84 #define LC_LOAD_DYLIB 0xc
85 #define LC_ID_DYLIB 0xd
86 #define LC_LOAD_DYLINKER 0xe
87 #define LC_SEGMENT_64 0x19
88 #define LC_REEXPORT_DYLIB (0x1f | LC_REQ_DYLD)
89 #define LC_MAIN (0x28|LC_REQ_DYLD)
91 typedef int vm_prot_t;
93 struct segment_command_64 { /* for 64-bit architectures */
94 uint32_t cmd; /* LC_SEGMENT_64 */
95 uint32_t cmdsize; /* includes sizeof section_64 structs */
96 char segname[16]; /* segment name */
97 uint64_t vmaddr; /* memory address of this segment */
98 uint64_t vmsize; /* memory size of this segment */
99 uint64_t fileoff; /* file offset of this segment */
100 uint64_t filesize; /* amount to map from the file */
101 vm_prot_t maxprot; /* maximum VM protection */
102 vm_prot_t initprot; /* initial VM protection */
103 uint32_t nsects; /* number of sections in segment */
104 uint32_t flags; /* flags */
107 struct section_64 { /* for 64-bit architectures */
108 char sectname[16]; /* name of this section */
109 char segname[16]; /* segment this section goes in */
110 uint64_t addr; /* memory address of this section */
111 uint64_t size; /* size in bytes of this section */
112 uint32_t offset; /* file offset of this section */
113 uint32_t align; /* section alignment (power of 2) */
114 uint32_t reloff; /* file offset of relocation entries */
115 uint32_t nreloc; /* number of relocation entries */
116 uint32_t flags; /* flags (section type and attributes)*/
117 uint32_t reserved1; /* reserved (for offset or index) */
118 uint32_t reserved2; /* reserved (for count or sizeof) */
119 uint32_t reserved3; /* reserved */
122 #define S_REGULAR 0x0
123 #define S_ZEROFILL 0x1
124 #define S_NON_LAZY_SYMBOL_POINTERS 0x6
125 #define S_MOD_INIT_FUNC_POINTERS 0x9
126 #define S_MOD_TERM_FUNC_POINTERS 0xa
128 #define S_ATTR_PURE_INSTRUCTIONS 0x80000000
129 #define S_ATTR_SOME_INSTRUCTIONS 0x00000400
131 typedef uint32_t lc_str;
133 struct dylib_command {
134 uint32_t cmd; /* LC_ID_DYLIB, LC_LOAD_{,WEAK_}DYLIB,
135 LC_REEXPORT_DYLIB */
136 uint32_t cmdsize; /* includes pathname string */
137 lc_str name; /* library's path name */
138 uint32_t timestamp; /* library's build time stamp */
139 uint32_t current_version; /* library's current version number */
140 uint32_t compatibility_version; /* library's compatibility vers number*/
143 struct dylinker_command {
144 uint32_t cmd; /* LC_ID_DYLINKER, LC_LOAD_DYLINKER or
145 LC_DYLD_ENVIRONMENT */
146 uint32_t cmdsize; /* includes pathname string */
147 lc_str name; /* dynamic linker's path name */
150 struct symtab_command {
151 uint32_t cmd; /* LC_SYMTAB */
152 uint32_t cmdsize; /* sizeof(struct symtab_command) */
153 uint32_t symoff; /* symbol table offset */
154 uint32_t nsyms; /* number of symbol table entries */
155 uint32_t stroff; /* string table offset */
156 uint32_t strsize; /* string table size in bytes */
159 struct dysymtab_command {
160 uint32_t cmd; /* LC_DYSYMTAB */
161 uint32_t cmdsize; /* sizeof(struct dysymtab_command) */
163 uint32_t ilocalsym; /* index to local symbols */
164 uint32_t nlocalsym; /* number of local symbols */
166 uint32_t iextdefsym;/* index to externally defined symbols */
167 uint32_t nextdefsym;/* number of externally defined symbols */
169 uint32_t iundefsym; /* index to undefined symbols */
170 uint32_t nundefsym; /* number of undefined symbols */
172 uint32_t tocoff; /* file offset to table of contents */
173 uint32_t ntoc; /* number of entries in table of contents */
175 uint32_t modtaboff; /* file offset to module table */
176 uint32_t nmodtab; /* number of module table entries */
178 uint32_t extrefsymoff; /* offset to referenced symbol table */
179 uint32_t nextrefsyms; /* number of referenced symbol table entries */
181 uint32_t indirectsymoff;/* file offset to the indirect symbol table */
182 uint32_t nindirectsyms; /* number of indirect symbol table entries */
184 uint32_t extreloff; /* offset to external relocation entries */
185 uint32_t nextrel; /* number of external relocation entries */
186 uint32_t locreloff; /* offset to local relocation entries */
187 uint32_t nlocrel; /* number of local relocation entries */
190 #define INDIRECT_SYMBOL_LOCAL 0x80000000
192 struct entry_point_command {
193 uint32_t cmd; /* LC_MAIN only used in MH_EXECUTE filetypes */
194 uint32_t cmdsize; /* 24 */
195 uint64_t entryoff; /* file (__TEXT) offset of main() */
196 uint64_t stacksize;/* if not zero, initial stack size */
199 enum skind {
200 sk_unknown = 0,
201 sk_discard,
202 sk_text,
203 sk_stubs,
204 sk_ro_data,
205 sk_uw_info,
206 sk_nl_ptr, // non-lazy pointers, aka GOT
207 sk_la_ptr, // lazy pointers
208 sk_init,
209 sk_fini,
210 sk_rw_data,
211 sk_bss,
212 sk_linkedit,
213 sk_last
216 struct nlist_64 {
217 uint32_t n_strx; /* index into the string table */
218 uint8_t n_type; /* type flag, see below */
219 uint8_t n_sect; /* section number or NO_SECT */
220 uint16_t n_desc; /* see <mach-o/stab.h> */
221 uint64_t n_value; /* value of this symbol (or stab offset) */
224 #define N_UNDF 0x0
225 #define N_ABS 0x2
226 #define N_EXT 0x1
227 #define N_SECT 0xe
229 #define N_WEAK_REF 0x0040
230 #define N_WEAK_DEF 0x0080
232 struct macho {
233 struct mach_header_64 mh;
234 int seg2lc[4], nseg;
235 struct load_command **lc;
236 struct entry_point_command *ep;
237 int nlc;
238 struct {
239 Section *s;
240 int machosect;
241 } sk_to_sect[sk_last];
242 int *elfsectomacho;
243 int *e2msym;
244 Section *symtab, *strtab, *wdata, *indirsyms, *stubs;
245 int stubsym;
246 uint32_t ilocal, iextdef, iundef;
249 #define SHT_LINKEDIT (SHT_LOOS + 42)
250 #define SHN_FROMDLL (SHN_LOOS + 2) /* Symbol is undefined, comes from a DLL */
252 static void * add_lc(struct macho *mo, uint32_t cmd, uint32_t cmdsize)
254 struct load_command *lc = tcc_mallocz(cmdsize);
255 lc->cmd = cmd;
256 lc->cmdsize = cmdsize;
257 mo->lc = tcc_realloc(mo->lc, sizeof(mo->lc[0]) * (mo->nlc + 1));
258 mo->lc[mo->nlc++] = lc;
259 return lc;
262 static struct segment_command_64 * add_segment(struct macho *mo, const char *name)
264 struct segment_command_64 *sc = add_lc(mo, LC_SEGMENT_64, sizeof(*sc));
265 strncpy(sc->segname, name, 16);
266 mo->seg2lc[mo->nseg++] = mo->nlc - 1;
267 return sc;
270 static struct segment_command_64 * get_segment(struct macho *mo, int i)
272 return (struct segment_command_64 *) (mo->lc[mo->seg2lc[i]]);
275 static int add_section(struct macho *mo, struct segment_command_64 **_seg, const char *name)
277 struct segment_command_64 *seg = *_seg;
278 int ret = seg->nsects;
279 struct section_64 *sec;
280 seg->nsects++;
281 seg->cmdsize += sizeof(*sec);
282 seg = tcc_realloc(seg, sizeof(*seg) + seg->nsects * sizeof(*sec));
283 sec = (struct section_64*)((char*)seg + sizeof(*seg)) + ret;
284 memset(sec, 0, sizeof(*sec));
285 strncpy(sec->sectname, name, 16);
286 strncpy(sec->segname, seg->segname, 16);
287 *_seg = seg;
288 return ret;
291 static struct section_64 *get_section(struct segment_command_64 *seg, int i)
293 return (struct section_64*)((char*)seg + sizeof(*seg)) + i;
296 static void * add_dylib(struct macho *mo, char *name)
298 struct dylib_command *lc;
299 int sz = (sizeof(*lc) + strlen(name) + 1 + 7) & -8;
300 lc = add_lc(mo, LC_LOAD_DYLIB, sz);
301 lc->name = sizeof(*lc);
302 strcpy((char*)lc + lc->name, name);
303 lc->timestamp = 2;
304 lc->current_version = 1 << 16;
305 lc->compatibility_version = 1 << 16;
306 return lc;
309 static void check_relocs(TCCState *s1, struct macho *mo)
311 Section *s;
312 ElfW_Rel *rel;
313 ElfW(Sym) *sym;
314 int i, type, gotplt_entry, sym_index, for_code;
315 struct sym_attr *attr;
317 s1->got = new_section(s1, ".got", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE);
318 mo->indirsyms = new_section(s1, "LEINDIR", SHT_LINKEDIT, SHF_ALLOC | SHF_WRITE);
319 for (i = 1; i < s1->nb_sections; i++) {
320 s = s1->sections[i];
321 if (s->sh_type != SHT_RELX)
322 continue;
323 for_each_elem(s, 0, rel, ElfW_Rel) {
324 type = ELFW(R_TYPE)(rel->r_info);
325 gotplt_entry = gotplt_entry_type(type);
326 for_code = code_reloc(type);
327 /* We generate a non-lazy pointer for used undefined symbols
328 and for defined symbols that must have a place for their
329 address due to codegen (i.e. a reloc requiring a got slot). */
330 sym_index = ELFW(R_SYM)(rel->r_info);
331 sym = &((ElfW(Sym) *)symtab_section->data)[sym_index];
332 if (sym->st_shndx == SHN_UNDEF
333 || gotplt_entry == ALWAYS_GOTPLT_ENTRY) {
334 attr = get_sym_attr(s1, sym_index, 1);
335 if (!attr->dyn_index) {
336 uint32_t *pi = section_ptr_add(mo->indirsyms, sizeof(*pi));
337 attr->got_offset = s1->got->data_offset;
338 attr->plt_offset = -1;
339 attr->dyn_index = 1; /* used as flag */
340 section_ptr_add(s1->got, PTR_SIZE);
341 if (ELFW(ST_BIND)(sym->st_info) == STB_LOCAL) {
342 if (sym->st_shndx == SHN_UNDEF)
343 tcc_error("undefined local symbol???");
344 *pi = INDIRECT_SYMBOL_LOCAL;
345 /* The pointer slot we generated must point to the
346 symbol, whose address is only known after layout,
347 so register a simple relocation for that. */
348 put_elf_reloc(s1->symtab, s1->got, attr->got_offset,
349 R_DATA_PTR, sym_index);
350 } else
351 *pi = mo->e2msym[sym_index];
353 if (for_code) {
354 if (attr->plt_offset == -1) {
355 uint8_t *jmp;
356 attr->plt_offset = mo->stubs->data_offset;
357 jmp = section_ptr_add(mo->stubs, 6);
358 jmp[0] = 0xff; /* jmpq *ofs(%rip) */
359 jmp[1] = 0x25;
360 put_elf_reloc(s1->symtab, mo->stubs,
361 attr->plt_offset + 2,
362 R_X86_64_GOTPCREL, sym_index);
364 rel->r_info = ELFW(R_INFO)(mo->stubsym, type);
365 rel->r_addend += attr->plt_offset;
372 static int check_symbols(TCCState *s1, struct macho *mo)
374 int sym_index, sym_end;
375 int ret = 0;
377 mo->ilocal = mo->iextdef = mo->iundef = -1;
378 sym_end = symtab_section->data_offset / sizeof(ElfW(Sym));
379 for (sym_index = 1; sym_index < sym_end; ++sym_index) {
380 int elf_index = ((struct nlist_64 *)mo->symtab->data + sym_index - 1)->n_value;
381 ElfW(Sym) *sym = (ElfW(Sym) *)symtab_section->data + elf_index;
382 const char *name = (char*)symtab_section->link->data + sym->st_name;
383 unsigned type = ELFW(ST_TYPE)(sym->st_info);
384 unsigned bind = ELFW(ST_BIND)(sym->st_info);
385 unsigned vis = ELFW(ST_VISIBILITY)(sym->st_other);
387 dprintf("%4d (%4d): %09lx %4d %4d %4d %3d %s\n",
388 sym_index, elf_index, (long)sym->st_value,
389 type, bind, vis, sym->st_shndx, name);
390 if (bind == STB_LOCAL) {
391 if (mo->ilocal == -1)
392 mo->ilocal = sym_index - 1;
393 if (mo->iextdef != -1 || mo->iundef != -1)
394 tcc_error("local syms after global ones");
395 } else if (sym->st_shndx != SHN_UNDEF) {
396 if (mo->iextdef == -1)
397 mo->iextdef = sym_index - 1;
398 if (mo->iundef != -1)
399 tcc_error("external defined symbol after undefined");
400 } else if (sym->st_shndx == SHN_UNDEF) {
401 if (mo->iundef == -1)
402 mo->iundef = sym_index - 1;
403 if (ELFW(ST_BIND)(sym->st_info) == STB_WEAK
404 || find_elf_sym(s1->dynsymtab_section, name)) {
405 /* Mark the symbol as coming from a dylib so that
406 relocate_syms doesn't complain. Normally bind_exe_dynsyms
407 would do this check, and place the symbol into dynsym
408 which is checked by relocate_syms. But Mach-O doesn't use
409 bind_exe_dynsyms. */
410 sym->st_shndx = SHN_FROMDLL;
411 continue;
413 tcc_error_noabort("undefined symbol '%s'", name);
414 ret = -1;
417 return ret;
420 static void convert_symbol(TCCState *s1, struct macho *mo, struct nlist_64 *pn)
422 struct nlist_64 n = *pn;
423 ElfSym *sym = (ElfW(Sym) *)symtab_section->data + pn->n_value;
424 const char *name = (char*)symtab_section->link->data + sym->st_name;
425 switch(ELFW(ST_TYPE)(sym->st_info)) {
426 case STT_NOTYPE:
427 case STT_OBJECT:
428 case STT_FUNC:
429 case STT_SECTION:
430 n.n_type = N_SECT;
431 break;
432 case STT_FILE:
433 n.n_type = N_ABS;
434 break;
435 default:
436 tcc_error("unhandled ELF symbol type %d %s",
437 ELFW(ST_TYPE)(sym->st_info), name);
439 if (sym->st_shndx == SHN_UNDEF)
440 tcc_error("should have been rewritten to SHN_FROMDLL: %s", name);
441 else if (sym->st_shndx == SHN_FROMDLL)
442 n.n_type = N_UNDF, n.n_sect = 0;
443 else if (sym->st_shndx == SHN_ABS)
444 n.n_type = N_ABS, n.n_sect = 0;
445 else if (sym->st_shndx >= SHN_LORESERVE)
446 tcc_error("unhandled ELF symbol section %d %s", sym->st_shndx, name);
447 else if (!mo->elfsectomacho[sym->st_shndx])
448 tcc_error("ELF section %d not mapped into Mach-O for symbol %s",
449 sym->st_shndx, name);
450 else
451 n.n_sect = mo->elfsectomacho[sym->st_shndx];
452 if (ELFW(ST_BIND)(sym->st_info) == STB_GLOBAL)
453 n.n_type |= N_EXT;
454 else if (ELFW(ST_BIND)(sym->st_info) == STB_WEAK)
455 n.n_desc |= N_WEAK_REF | (n.n_type != N_UNDF ? N_WEAK_DEF : 0);
456 n.n_strx = pn->n_strx;
457 n.n_value = sym->st_value;
458 *pn = n;
461 static void convert_symbols(TCCState *s1, struct macho *mo)
463 struct nlist_64 *pn;
464 for_each_elem(mo->symtab, 0, pn, struct nlist_64)
465 convert_symbol(s1, mo, pn);
468 static int machosymcmp(const void *_a, const void *_b)
470 TCCState *s1 = tcc_state;
471 int ea = ((struct nlist_64 *)_a)->n_value;
472 int eb = ((struct nlist_64 *)_b)->n_value;
473 ElfSym *sa = (ElfSym *)symtab_section->data + ea;
474 ElfSym *sb = (ElfSym *)symtab_section->data + eb;
475 int r;
476 /* locals, then defined externals, then undefined externals, the
477 last two sections also by name, otherwise stable sort */
478 r = (ELFW(ST_BIND)(sb->st_info) == STB_LOCAL)
479 - (ELFW(ST_BIND)(sa->st_info) == STB_LOCAL);
480 if (r)
481 return r;
482 r = (sa->st_shndx == SHN_UNDEF) - (sb->st_shndx == SHN_UNDEF);
483 if (r)
484 return r;
485 if (ELFW(ST_BIND)(sa->st_info) != STB_LOCAL) {
486 const char * na = (char*)symtab_section->link->data + sa->st_name;
487 const char * nb = (char*)symtab_section->link->data + sb->st_name;
488 r = strcmp(na, nb);
489 if (r)
490 return r;
492 return ea - eb;
495 static void create_symtab(TCCState *s1, struct macho *mo)
497 int sym_index, sym_end;
498 struct nlist_64 *pn;
500 /* Stub creation belongs to check_relocs, but we need to create
501 the symbol now, so its included in the sorting. */
502 mo->stubs = new_section(s1, "__stubs", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR);
503 mo->stubsym = put_elf_sym(s1->symtab, 0, 0,
504 ELFW(ST_INFO)(STB_LOCAL, STT_SECTION), 0,
505 mo->stubs->sh_num, ".__stubs");
507 mo->symtab = new_section(s1, "LESYMTAB", SHT_LINKEDIT, SHF_ALLOC | SHF_WRITE);
508 mo->strtab = new_section(s1, "LESTRTAB", SHT_LINKEDIT, SHF_ALLOC | SHF_WRITE);
509 put_elf_str(mo->strtab, " "); /* Mach-O starts strtab with a space */
510 sym_end = symtab_section->data_offset / sizeof(ElfW(Sym));
511 pn = section_ptr_add(mo->symtab, sizeof(*pn) * (sym_end - 1));
512 for (sym_index = 1; sym_index < sym_end; ++sym_index) {
513 ElfW(Sym) *sym = (ElfW(Sym) *)symtab_section->data + sym_index;
514 const char *name = (char*)symtab_section->link->data + sym->st_name;
515 pn[sym_index - 1].n_strx = put_elf_str(mo->strtab, name);
516 pn[sym_index - 1].n_value = sym_index;
518 tcc_enter_state(s1); /* qsort needs global state */
519 qsort(pn, sym_end - 1, sizeof(*pn), machosymcmp);
520 tcc_exit_state(s1);
521 mo->e2msym = tcc_malloc(sym_end * sizeof(*mo->e2msym));
522 mo->e2msym[0] = -1;
523 for (sym_index = 1; sym_index < sym_end; ++sym_index) {
524 mo->e2msym[pn[sym_index - 1].n_value] = sym_index - 1;
528 const struct {
529 int seg;
530 uint32_t flags;
531 const char *name;
532 } skinfo[sk_last] = {
533 /*[sk_unknown] =*/ { 0 },
534 /*[sk_discard] =*/ { 0 },
535 /*[sk_text] =*/ { 1, S_REGULAR | S_ATTR_PURE_INSTRUCTIONS
536 | S_ATTR_SOME_INSTRUCTIONS, "__text" },
537 /*[sk_stubs] =*/ { 0 },
538 /*[sk_ro_data] =*/ { 1, S_REGULAR, "__rodata" },
539 /*[sk_uw_info] =*/ { 0 },
540 /*[sk_nl_ptr] =*/ { 2, S_NON_LAZY_SYMBOL_POINTERS, "__got" },
541 /*[sk_la_ptr] =*/ { 0 },
542 /*[sk_init] =*/ { 2, S_MOD_INIT_FUNC_POINTERS, "__mod_init_func" },
543 /*[sk_fini] =*/ { 2, S_MOD_TERM_FUNC_POINTERS, "__mod_term_func" },
544 /*[sk_rw_data] =*/ { 2, S_REGULAR, "__data" },
545 /*[sk_bss] =*/ { 2, S_ZEROFILL, "__bss" },
546 /*[sk_linkedit] =*/ { 3, S_REGULAR, NULL },
549 static void collect_sections(TCCState *s1, struct macho *mo)
551 int i, sk, numsec;
552 uint64_t curaddr, fileofs;
553 Section *s;
554 struct segment_command_64 *seg = NULL;
555 struct dylinker_command *dyldlc;
556 struct symtab_command *symlc;
557 struct dysymtab_command *dysymlc;
558 char *str;
560 seg = add_segment(mo, "__PAGEZERO");
561 seg->vmsize = (uint64_t)1 << 32;
563 seg = add_segment(mo, "__TEXT");
564 seg->vmaddr = (uint64_t)1 << 32;
565 seg->maxprot = 7; // rwx
566 seg->initprot = 5; // r-x
568 seg = add_segment(mo, "__DATA");
569 seg->vmaddr = -1;
570 seg->maxprot = 7; // rwx
571 seg->initprot = 3; // rw-
573 seg = add_segment(mo, "__LINKEDIT");
574 seg->vmaddr = -1;
575 seg->maxprot = 7; // rwx
576 seg->initprot = 1; // r--
578 mo->ep = add_lc(mo, LC_MAIN, sizeof(*mo->ep));
579 mo->ep->entryoff = 4096;
581 i = (sizeof(*dyldlc) + strlen("/usr/lib/dyld") + 1 + 7) &-8;
582 dyldlc = add_lc(mo, LC_LOAD_DYLINKER, i);
583 dyldlc->name = sizeof(*dyldlc);
584 str = (char*)dyldlc + dyldlc->name;
585 strcpy(str, "/usr/lib/dyld");
587 symlc = add_lc(mo, LC_SYMTAB, sizeof(*symlc));
588 dysymlc = add_lc(mo, LC_DYSYMTAB, sizeof(*dysymlc));
590 for(i = 0; i < s1->nb_loaded_dlls; i++) {
591 DLLReference *dllref = s1->loaded_dlls[i];
592 if (dllref->level == 0)
593 add_dylib(mo, dllref->name);
596 /* dyld requires a writable segment with classic Mach-O, but it ignores
597 zero-sized segments for this, so force to have some data. */
598 section_ptr_add(data_section, 1);
599 memset (mo->sk_to_sect, 0, sizeof(mo->sk_to_sect));
600 for (i = s1->nb_sections; i-- > 1;) {
601 int type, flags;
602 s = s1->sections[i];
603 type = s->sh_type;
604 flags = s->sh_flags;
605 sk = sk_unknown;
606 if (flags & SHF_ALLOC) {
607 switch (type) {
608 default: sk = sk_unknown; break;
609 case SHT_INIT_ARRAY: sk = sk_init; break;
610 case SHT_FINI_ARRAY: sk = sk_fini; break;
611 case SHT_NOBITS: sk = sk_bss; break;
612 case SHT_SYMTAB: sk = sk_discard; break;
613 case SHT_STRTAB: sk = s == stabstr_section ? sk_ro_data : sk_discard; break;
614 case SHT_RELX: sk = sk_discard; break;
615 case SHT_LINKEDIT: sk = sk_linkedit; break;
616 case SHT_PROGBITS:
617 if (s == s1->got)
618 sk = sk_nl_ptr;
619 else if (flags & SHF_EXECINSTR)
620 sk = sk_text;
621 else if (flags & SHF_WRITE)
622 sk = sk_rw_data;
623 else
624 sk = sk_ro_data;
625 break;
627 } else
628 sk = sk_discard;
629 s->prev = mo->sk_to_sect[sk].s;
630 mo->sk_to_sect[sk].s = s;
632 fileofs = 4096; /* leave space for mach-o headers */
633 curaddr = get_segment(mo, 1)->vmaddr;
634 curaddr += 4096;
635 seg = NULL;
636 numsec = 0;
637 mo->elfsectomacho = tcc_mallocz(sizeof(*mo->elfsectomacho) * s1->nb_sections);
638 for (sk = sk_unknown; sk < sk_last; sk++) {
639 struct section_64 *sec = NULL;
640 if (seg) {
641 seg->vmsize = curaddr - seg->vmaddr;
642 seg->filesize = fileofs - seg->fileoff;
644 if (skinfo[sk].seg && mo->sk_to_sect[sk].s) {
645 uint64_t al = 0;
646 int si;
647 seg = get_segment(mo, skinfo[sk].seg);
648 if (skinfo[sk].name) {
649 si = add_section(mo, &seg, skinfo[sk].name);
650 numsec++;
651 mo->lc[mo->seg2lc[skinfo[sk].seg]] = (struct load_command*)seg;
652 mo->sk_to_sect[sk].machosect = si;
653 sec = get_section(seg, si);
654 sec->flags = skinfo[sk].flags;
656 if (seg->vmaddr == -1) {
657 curaddr = (curaddr + 4095) & -4096;
658 seg->vmaddr = curaddr;
659 fileofs = (fileofs + 4095) & -4096;
660 seg->fileoff = fileofs;
663 for (s = mo->sk_to_sect[sk].s; s; s = s->prev) {
664 int a = exact_log2p1(s->sh_addralign);
665 if (a && al < (a - 1))
666 al = a - 1;
667 s->sh_size = s->data_offset;
669 if (sec)
670 sec->align = al;
671 al = 1ULL << al;
672 if (al > 4096)
673 tcc_warning("alignment > 4096"), sec->align = 12, al = 4096;
674 curaddr = (curaddr + al - 1) & -al;
675 fileofs = (fileofs + al - 1) & -al;
676 if (sec) {
677 sec->addr = curaddr;
678 sec->offset = fileofs;
680 for (s = mo->sk_to_sect[sk].s; s; s = s->prev) {
681 al = s->sh_addralign;
682 curaddr = (curaddr + al - 1) & -al;
683 dprintf("curaddr now 0x%lx\n", (long)curaddr);
684 s->sh_addr = curaddr;
685 curaddr += s->sh_size;
686 if (s->sh_type != SHT_NOBITS) {
687 fileofs = (fileofs + al - 1) & -al;
688 s->sh_offset = fileofs;
689 fileofs += s->sh_size;
690 dprintf("fileofs now %ld\n", (long)fileofs);
692 if (sec)
693 mo->elfsectomacho[s->sh_num] = numsec;
695 if (sec)
696 sec->size = curaddr - sec->addr;
698 if (DEBUG_MACHO)
699 for (s = mo->sk_to_sect[sk].s; s; s = s->prev) {
700 int type = s->sh_type;
701 int flags = s->sh_flags;
702 printf("%d section %-16s %-10s %09lx %04x %02d %s,%s,%s\n",
704 s->name,
705 type == SHT_PROGBITS ? "progbits" :
706 type == SHT_NOBITS ? "nobits" :
707 type == SHT_SYMTAB ? "symtab" :
708 type == SHT_STRTAB ? "strtab" :
709 type == SHT_INIT_ARRAY ? "init" :
710 type == SHT_FINI_ARRAY ? "fini" :
711 type == SHT_RELX ? "rel" : "???",
712 (long)s->sh_addr,
713 (unsigned)s->data_offset,
714 s->sh_addralign,
715 flags & SHF_ALLOC ? "alloc" : "",
716 flags & SHF_WRITE ? "write" : "",
717 flags & SHF_EXECINSTR ? "exec" : ""
721 if (seg) {
722 seg->vmsize = curaddr - seg->vmaddr;
723 seg->filesize = fileofs - seg->fileoff;
726 /* Fill symtab info */
727 symlc->symoff = mo->symtab->sh_offset;
728 symlc->nsyms = mo->symtab->data_offset / sizeof(struct nlist_64);
729 symlc->stroff = mo->strtab->sh_offset;
730 symlc->strsize = mo->strtab->data_offset;
732 dysymlc->iundefsym = mo->iundef == -1 ? symlc->nsyms : mo->iundef;
733 dysymlc->iextdefsym = mo->iextdef == -1 ? dysymlc->iundefsym : mo->iextdef;
734 dysymlc->ilocalsym = mo->ilocal == -1 ? dysymlc->iextdefsym : mo->ilocal;
735 dysymlc->nlocalsym = dysymlc->iextdefsym - dysymlc->ilocalsym;
736 dysymlc->nextdefsym = dysymlc->iundefsym - dysymlc->iextdefsym;
737 dysymlc->nundefsym = symlc->nsyms - dysymlc->iundefsym;
738 dysymlc->indirectsymoff = mo->indirsyms->sh_offset;
739 dysymlc->nindirectsyms = mo->indirsyms->data_offset / sizeof(uint32_t);
742 static void macho_write(TCCState *s1, struct macho *mo, FILE *fp)
744 int i, sk;
745 uint64_t fileofs = 0;
746 Section *s;
747 mo->mh.mh.magic = MH_MAGIC_64;
748 mo->mh.mh.cputype = 0x1000007; // x86_64
749 mo->mh.mh.cpusubtype = 0x80000003;// all | CPU_SUBTYPE_LIB64
750 mo->mh.mh.filetype = 2; // MH_EXECUTE
751 mo->mh.mh.flags = 4; // DYLDLINK
752 mo->mh.mh.ncmds = mo->nlc;
753 mo->mh.mh.sizeofcmds = 0;
754 for (i = 0; i < mo->nlc; i++)
755 mo->mh.mh.sizeofcmds += mo->lc[i]->cmdsize;
757 fwrite(&mo->mh, 1, sizeof(mo->mh), fp);
758 fileofs += sizeof(mo->mh);
759 for (i = 0; i < mo->nlc; i++) {
760 fwrite(mo->lc[i], 1, mo->lc[i]->cmdsize, fp);
761 fileofs += mo->lc[i]->cmdsize;
764 for (sk = sk_unknown; sk < sk_last; sk++) {
765 //struct segment_command_64 *seg;
766 if (!skinfo[sk].seg || !mo->sk_to_sect[sk].s)
767 continue;
768 /*seg =*/ get_segment(mo, skinfo[sk].seg);
769 for (s = mo->sk_to_sect[sk].s; s; s = s->prev) {
770 if (s->sh_type != SHT_NOBITS) {
771 while (fileofs < s->sh_offset)
772 fputc(0, fp), fileofs++;
773 if (s->sh_size) {
774 fwrite(s->data, 1, s->sh_size, fp);
775 fileofs += s->sh_size;
782 ST_FUNC int macho_output_file(TCCState *s1, const char *filename)
784 int fd, mode, file_type;
785 FILE *fp;
786 int i, ret = -1;
787 struct macho mo;
789 (void)memset(&mo, 0, sizeof(mo));
791 file_type = s1->output_type;
792 if (file_type == TCC_OUTPUT_OBJ)
793 mode = 0666;
794 else
795 mode = 0777;
796 unlink(filename);
797 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, mode);
798 if (fd < 0 || (fp = fdopen(fd, "wb")) == NULL) {
799 tcc_error_noabort("could not write '%s: %s'", filename, strerror(errno));
800 return -1;
802 if (s1->verbose)
803 printf("<- %s\n", filename);
805 tcc_add_runtime(s1);
806 resolve_common_syms(s1);
807 create_symtab(s1, &mo);
808 check_relocs(s1, &mo);
809 ret = check_symbols(s1, &mo);
810 if (!ret) {
811 collect_sections(s1, &mo);
812 relocate_syms(s1, s1->symtab, 0);
813 mo.ep->entryoff = get_sym_addr(s1, "main", 1, 1)
814 - get_segment(&mo, 1)->vmaddr;
815 if (s1->nb_errors)
816 goto do_ret;
817 relocate_sections(s1);
818 convert_symbols(s1, &mo);
819 macho_write(s1, &mo, fp);
822 do_ret:
823 for (i = 0; i < mo.nlc; i++)
824 tcc_free(mo.lc[i]);
825 tcc_free(mo.lc);
826 tcc_free(mo.elfsectomacho);
827 tcc_free(mo.e2msym);
829 fclose(fp);
830 return ret;
833 static uint32_t macho_swap32(uint32_t x)
835 return (x >> 24) | (x << 24) | ((x >> 8) & 0xff00) | ((x & 0xff00) << 8);
837 #define SWAP(x) (swap ? macho_swap32(x) : (x))
839 ST_FUNC int macho_add_dllref(TCCState* s1, int lev, const char* soname)
841 /* if the dll is already loaded, do not load it */
842 DLLReference *dllref;
843 int i;
844 for(i = 0; i < s1->nb_loaded_dlls; i++) {
845 dllref = s1->loaded_dlls[i];
846 if (!strcmp(soname, dllref->name)) {
847 /* but update level if needed */
848 if (lev < dllref->level)
849 dllref->level = lev;
850 return -1;
853 tcc_add_dllref(s1, soname)->level = lev;
854 return 0;
857 #define tbd_parse_movepast(s) \
858 (pos = (pos = strstr(pos, s)) ? pos + strlen(s) : NULL)
859 #define tbd_parse_movetoany(cs) (pos = strpbrk(pos, cs))
860 #define tbd_parse_skipws while (*pos && (*pos==' '||*pos=='\n')) ++pos
861 #define tbd_parse_tramplequote if(*pos=='\''||*pos=='"') tbd_parse_trample
862 #define tbd_parse_tramplespace if(*pos==' ') tbd_parse_trample
863 #define tbd_parse_trample *pos++=0
865 #ifdef TCC_IS_NATIVE
866 /* Looks for the active developer SDK set by xcode-select (or the default
867 one set during installation.) */
868 ST_FUNC void tcc_add_macos_sdkpath(TCCState* s)
870 char *sdkroot = NULL, *pos = NULL;
871 void* xcs = dlopen("libxcselect.dylib", RTLD_GLOBAL | RTLD_LAZY);
872 CString path;
873 int (*f)(unsigned int, char**) = dlsym(xcs, "xcselect_host_sdk_path");
874 cstr_new(&path);
875 if (f) f(1, &sdkroot);
876 if (sdkroot)
877 pos = strstr(sdkroot,"SDKs/MacOSX");
878 if (pos)
879 cstr_printf(&path, "%.*s.sdk/usr/lib", (int)(pos - sdkroot + 11), sdkroot);
880 /* must use free from libc directly */
881 #pragma push_macro("free")
882 #undef free
883 free(sdkroot);
884 #pragma pop_macro("free")
885 if (path.size)
886 tcc_add_library_path(s, (char*)path.data);
887 else
888 tcc_add_library_path(s,
889 "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib"
890 ":" "/Applications/Xcode.app/Developer/SDKs/MacOSX.sdk/usr/lib"
892 cstr_free(&path);
895 ST_FUNC const char* macho_tbd_soname(const char* filename) {
896 char *soname, *data, *pos;
897 const char *ret = filename;
899 int fd = open(filename,O_RDONLY);
900 if (fd<0) return ret;
901 pos = data = tcc_load_text(fd);
902 if (!tbd_parse_movepast("install-name: ")) goto the_end;
903 tbd_parse_skipws;
904 tbd_parse_tramplequote;
905 soname = pos;
906 if (!tbd_parse_movetoany("\n \"'")) goto the_end;
907 tbd_parse_trample;
908 ret = tcc_strdup(soname);
909 the_end:
910 tcc_free(data);
911 return ret;
913 #endif /* TCC_IS_NATIVE */
915 ST_FUNC int macho_load_tbd(TCCState* s1, int fd, const char* filename, int lev)
917 char *soname, *data, *pos;
918 int ret = -1;
920 pos = data = tcc_load_text(fd);
921 if (!tbd_parse_movepast("install-name: ")) goto the_end;
922 tbd_parse_skipws;
923 tbd_parse_tramplequote;
924 soname = pos;
925 if (!tbd_parse_movetoany("\n \"'")) goto the_end;
926 tbd_parse_trample;
927 ret = 0;
928 if (macho_add_dllref(s1, lev, soname) != 0) goto the_end;
929 while(pos) {
930 char* sym = NULL;
931 int cont = 1;
932 if (!tbd_parse_movepast("symbols: ")) break;
933 if (!tbd_parse_movepast("[")) break;
934 while (cont) {
935 tbd_parse_skipws;
936 tbd_parse_tramplequote;
937 sym = pos;
938 if (!tbd_parse_movetoany(",] \"'")) break;
939 tbd_parse_tramplequote;
940 tbd_parse_tramplespace;
941 tbd_parse_skipws;
942 if (*pos==0||*pos==']') cont=0;
943 tbd_parse_trample;
944 set_elf_sym(s1->dynsymtab_section, 0, 0,
945 ELFW(ST_INFO)(STB_GLOBAL, STT_NOTYPE), 0, SHN_UNDEF, sym);
949 the_end:
950 tcc_free(data);
951 return ret;
954 ST_FUNC int macho_load_dll(TCCState * s1, int fd, const char* filename, int lev)
956 unsigned char buf[sizeof(struct mach_header_64)];
957 void *buf2;
958 uint32_t machofs = 0;
959 struct fat_header fh;
960 struct mach_header mh;
961 struct load_command *lc;
962 int i, swap = 0;
963 const char *soname = filename;
964 struct nlist_64 *symtab = 0;
965 uint32_t nsyms = 0;
966 char *strtab = 0;
967 uint32_t strsize = 0;
968 uint32_t iextdef = 0;
969 uint32_t nextdef = 0;
971 again:
972 if (full_read(fd, buf, sizeof(buf)) != sizeof(buf))
973 return -1;
974 memcpy(&fh, buf, sizeof(fh));
975 if (fh.magic == FAT_MAGIC || fh.magic == FAT_CIGAM) {
976 struct fat_arch *fa = load_data(fd, sizeof(fh),
977 fh.nfat_arch * sizeof(*fa));
978 swap = fh.magic == FAT_CIGAM;
979 for (i = 0; i < SWAP(fh.nfat_arch); i++)
980 if (SWAP(fa[i].cputype) == 0x01000007 /* CPU_TYPE_X86_64 */
981 && SWAP(fa[i].cpusubtype) == 3) /* CPU_SUBTYPE_X86_ALL */
982 break;
983 if (i == SWAP(fh.nfat_arch)) {
984 tcc_free(fa);
985 return -1;
987 machofs = SWAP(fa[i].offset);
988 tcc_free(fa);
989 lseek(fd, machofs, SEEK_SET);
990 goto again;
991 } else if (fh.magic == FAT_MAGIC_64 || fh.magic == FAT_CIGAM_64) {
992 tcc_warning("%s: Mach-O fat 64bit files of type 0x%x not handled",
993 filename, fh.magic);
994 return -1;
997 memcpy(&mh, buf, sizeof(mh));
998 if (mh.magic != MH_MAGIC_64)
999 return -1;
1000 dprintf("found Mach-O at %d\n", machofs);
1001 buf2 = load_data(fd, machofs + sizeof(struct mach_header_64), mh.sizeofcmds);
1002 for (i = 0, lc = buf2; i < mh.ncmds; i++) {
1003 dprintf("lc %2d: 0x%08x\n", i, lc->cmd);
1004 switch (lc->cmd) {
1005 case LC_SYMTAB:
1007 struct symtab_command *sc = (struct symtab_command*)lc;
1008 nsyms = sc->nsyms;
1009 symtab = load_data(fd, machofs + sc->symoff, nsyms * sizeof(*symtab));
1010 strsize = sc->strsize;
1011 strtab = load_data(fd, machofs + sc->stroff, strsize);
1012 break;
1014 case LC_ID_DYLIB:
1016 struct dylib_command *dc = (struct dylib_command*)lc;
1017 soname = (char*)lc + dc->name;
1018 dprintf(" ID_DYLIB %d 0x%x 0x%x %s\n",
1019 dc->timestamp, dc->current_version,
1020 dc->compatibility_version, soname);
1021 break;
1023 case LC_REEXPORT_DYLIB:
1025 struct dylib_command *dc = (struct dylib_command*)lc;
1026 char *name = (char*)lc + dc->name;
1027 int subfd = open(name, O_RDONLY | O_BINARY);
1028 dprintf(" REEXPORT %s\n", name);
1029 if (subfd < 0)
1030 tcc_warning("can't open %s (reexported from %s)", name, filename);
1031 else {
1032 /* Hopefully the REEXPORTs never form a cycle, we don't check
1033 for that! */
1034 macho_load_dll(s1, subfd, name, lev + 1);
1035 close(subfd);
1037 break;
1039 case LC_DYSYMTAB:
1041 struct dysymtab_command *dc = (struct dysymtab_command*)lc;
1042 iextdef = dc->iextdefsym;
1043 nextdef = dc->nextdefsym;
1044 break;
1047 lc = (struct load_command*) ((char*)lc + lc->cmdsize);
1050 if (0 != macho_add_dllref(s1, lev, soname))
1051 goto the_end;
1053 if (!nsyms || !nextdef)
1054 tcc_warning("%s doesn't export any symbols?", filename);
1056 //dprintf("symbols (all):\n");
1057 dprintf("symbols (exported):\n");
1058 dprintf(" n: typ sec desc value name\n");
1059 //for (i = 0; i < nsyms; i++) {
1060 for (i = iextdef; i < iextdef + nextdef; i++) {
1061 struct nlist_64 *sym = symtab + i;
1062 dprintf("%5d: %3d %3d 0x%04x 0x%016lx %s\n",
1063 i, sym->n_type, sym->n_sect, sym->n_desc, (long)sym->n_value,
1064 strtab + sym->n_strx);
1065 set_elf_sym(s1->dynsymtab_section, 0, 0,
1066 ELFW(ST_INFO)(STB_GLOBAL, STT_NOTYPE),
1067 0, SHN_UNDEF, strtab + sym->n_strx);
1070 the_end:
1071 tcc_free(strtab);
1072 tcc_free(symtab);
1073 tcc_free(buf2);
1074 return 0;