macos: Fix memtest
[tinycc.git] / tccmacho.c
blob85e6459ac3da6ef31b1f7022c293632522eb29f8
1 /*
2 * Mach-O file handling for TCC
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 #include "tcc.h"
20 #define DEBUG_MACHO 0
21 #define dprintf if (DEBUG_MACHO) printf
23 struct fat_header {
24 uint32_t magic; /* FAT_MAGIC or FAT_MAGIC_64 */
25 uint32_t nfat_arch; /* number of structs that follow */
28 struct fat_arch {
29 int cputype; /* cpu specifier (int) */
30 int cpusubtype; /* machine specifier (int) */
31 uint32_t offset; /* file offset to this object file */
32 uint32_t size; /* size of this object file */
33 uint32_t align; /* alignment as a power of 2 */
36 #define FAT_MAGIC 0xcafebabe
37 #define FAT_CIGAM 0xbebafeca
38 #define FAT_MAGIC_64 0xcafebabf
39 #define FAT_CIGAM_64 0xbfbafeca
41 struct mach_header {
42 uint32_t magic; /* mach magic number identifier */
43 int cputype; /* cpu specifier */
44 int cpusubtype; /* machine specifier */
45 uint32_t filetype; /* type of file */
46 uint32_t ncmds; /* number of load commands */
47 uint32_t sizeofcmds; /* the size of all the load commands */
48 uint32_t flags; /* flags */
51 struct mach_header_64 {
52 struct mach_header mh;
53 uint32_t reserved; /* reserved, pad to 64bit */
56 /* Constant for the magic field of the mach_header (32-bit architectures) */
57 #define MH_MAGIC 0xfeedface /* the mach magic number */
58 #define MH_CIGAM 0xcefaedfe /* NXSwapInt(MH_MAGIC) */
59 #define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */
60 #define MH_CIGAM_64 0xcffaedfe /* NXSwapInt(MH_MAGIC_64) */
62 struct load_command {
63 uint32_t cmd; /* type of load command */
64 uint32_t cmdsize; /* total size of command in bytes */
67 #define LC_REQ_DYLD 0x80000000
68 #define LC_SYMTAB 0x2
69 #define LC_DYSYMTAB 0xb
70 #define LC_LOAD_DYLIB 0xc
71 #define LC_ID_DYLIB 0xd
72 #define LC_LOAD_DYLINKER 0xe
73 #define LC_SEGMENT_64 0x19
74 #define LC_REEXPORT_DYLIB (0x1f | LC_REQ_DYLD)
75 #define LC_MAIN (0x28|LC_REQ_DYLD)
77 typedef int vm_prot_t;
79 struct segment_command_64 { /* for 64-bit architectures */
80 uint32_t cmd; /* LC_SEGMENT_64 */
81 uint32_t cmdsize; /* includes sizeof section_64 structs */
82 char segname[16]; /* segment name */
83 uint64_t vmaddr; /* memory address of this segment */
84 uint64_t vmsize; /* memory size of this segment */
85 uint64_t fileoff; /* file offset of this segment */
86 uint64_t filesize; /* amount to map from the file */
87 vm_prot_t maxprot; /* maximum VM protection */
88 vm_prot_t initprot; /* initial VM protection */
89 uint32_t nsects; /* number of sections in segment */
90 uint32_t flags; /* flags */
93 struct section_64 { /* for 64-bit architectures */
94 char sectname[16]; /* name of this section */
95 char segname[16]; /* segment this section goes in */
96 uint64_t addr; /* memory address of this section */
97 uint64_t size; /* size in bytes of this section */
98 uint32_t offset; /* file offset of this section */
99 uint32_t align; /* section alignment (power of 2) */
100 uint32_t reloff; /* file offset of relocation entries */
101 uint32_t nreloc; /* number of relocation entries */
102 uint32_t flags; /* flags (section type and attributes)*/
103 uint32_t reserved1; /* reserved (for offset or index) */
104 uint32_t reserved2; /* reserved (for count or sizeof) */
105 uint32_t reserved3; /* reserved */
108 #define S_REGULAR 0x0
109 #define S_ZEROFILL 0x1
110 #define S_NON_LAZY_SYMBOL_POINTERS 0x6
111 #define S_MOD_INIT_FUNC_POINTERS 0x9
112 #define S_MOD_TERM_FUNC_POINTERS 0xa
114 #define S_ATTR_PURE_INSTRUCTIONS 0x80000000
115 #define S_ATTR_SOME_INSTRUCTIONS 0x00000400
117 typedef uint32_t lc_str;
119 struct dylib_command {
120 uint32_t cmd; /* LC_ID_DYLIB, LC_LOAD_{,WEAK_}DYLIB,
121 LC_REEXPORT_DYLIB */
122 uint32_t cmdsize; /* includes pathname string */
123 lc_str name; /* library's path name */
124 uint32_t timestamp; /* library's build time stamp */
125 uint32_t current_version; /* library's current version number */
126 uint32_t compatibility_version; /* library's compatibility vers number*/
129 struct dylinker_command {
130 uint32_t cmd; /* LC_ID_DYLINKER, LC_LOAD_DYLINKER or
131 LC_DYLD_ENVIRONMENT */
132 uint32_t cmdsize; /* includes pathname string */
133 lc_str name; /* dynamic linker's path name */
136 struct symtab_command {
137 uint32_t cmd; /* LC_SYMTAB */
138 uint32_t cmdsize; /* sizeof(struct symtab_command) */
139 uint32_t symoff; /* symbol table offset */
140 uint32_t nsyms; /* number of symbol table entries */
141 uint32_t stroff; /* string table offset */
142 uint32_t strsize; /* string table size in bytes */
145 struct dysymtab_command {
146 uint32_t cmd; /* LC_DYSYMTAB */
147 uint32_t cmdsize; /* sizeof(struct dysymtab_command) */
149 uint32_t ilocalsym; /* index to local symbols */
150 uint32_t nlocalsym; /* number of local symbols */
152 uint32_t iextdefsym;/* index to externally defined symbols */
153 uint32_t nextdefsym;/* number of externally defined symbols */
155 uint32_t iundefsym; /* index to undefined symbols */
156 uint32_t nundefsym; /* number of undefined symbols */
158 uint32_t tocoff; /* file offset to table of contents */
159 uint32_t ntoc; /* number of entries in table of contents */
161 uint32_t modtaboff; /* file offset to module table */
162 uint32_t nmodtab; /* number of module table entries */
164 uint32_t extrefsymoff; /* offset to referenced symbol table */
165 uint32_t nextrefsyms; /* number of referenced symbol table entries */
167 uint32_t indirectsymoff;/* file offset to the indirect symbol table */
168 uint32_t nindirectsyms; /* number of indirect symbol table entries */
170 uint32_t extreloff; /* offset to external relocation entries */
171 uint32_t nextrel; /* number of external relocation entries */
172 uint32_t locreloff; /* offset to local relocation entries */
173 uint32_t nlocrel; /* number of local relocation entries */
176 #define INDIRECT_SYMBOL_LOCAL 0x80000000
178 struct entry_point_command {
179 uint32_t cmd; /* LC_MAIN only used in MH_EXECUTE filetypes */
180 uint32_t cmdsize; /* 24 */
181 uint64_t entryoff; /* file (__TEXT) offset of main() */
182 uint64_t stacksize;/* if not zero, initial stack size */
185 enum skind {
186 sk_unknown = 0,
187 sk_discard,
188 sk_text,
189 sk_stubs,
190 sk_ro_data,
191 sk_uw_info,
192 sk_nl_ptr, // non-lazy pointers, aka GOT
193 sk_la_ptr, // lazy pointers
194 sk_init,
195 sk_fini,
196 sk_rw_data,
197 sk_bss,
198 sk_linkedit,
199 sk_last
202 struct nlist_64 {
203 uint32_t n_strx; /* index into the string table */
204 uint8_t n_type; /* type flag, see below */
205 uint8_t n_sect; /* section number or NO_SECT */
206 uint16_t n_desc; /* see <mach-o/stab.h> */
207 uint64_t n_value; /* value of this symbol (or stab offset) */
210 #define N_UNDF 0x0
211 #define N_ABS 0x2
212 #define N_EXT 0x1
213 #define N_SECT 0xe
215 #define N_WEAK_REF 0x0040
216 #define N_WEAK_DEF 0x0080
218 struct macho {
219 struct mach_header_64 mh;
220 int seg2lc[4], nseg;
221 struct load_command **lc;
222 struct entry_point_command *ep;
223 int nlc;
224 struct {
225 Section *s;
226 int machosect;
227 } sk_to_sect[sk_last];
228 int *elfsectomacho;
229 int *e2msym;
230 Section *symtab, *strtab, *wdata, *indirsyms, *stubs;
231 int stubsym;
232 uint32_t ilocal, iextdef, iundef;
235 #define SHT_LINKEDIT (SHT_LOOS + 42)
236 #define SHN_FROMDLL (SHN_LOOS + 2) /* Symbol is undefined, comes from a DLL */
238 static void * add_lc(struct macho *mo, uint32_t cmd, uint32_t cmdsize)
240 struct load_command *lc = tcc_mallocz(cmdsize);
241 lc->cmd = cmd;
242 lc->cmdsize = cmdsize;
243 mo->lc = tcc_realloc(mo->lc, sizeof(mo->lc[0]) * (mo->nlc + 1));
244 mo->lc[mo->nlc++] = lc;
245 return lc;
248 static struct segment_command_64 * add_segment(struct macho *mo, char *name)
250 struct segment_command_64 *sc = add_lc(mo, LC_SEGMENT_64, sizeof(*sc));
251 strncpy(sc->segname, name, 16);
252 mo->seg2lc[mo->nseg++] = mo->nlc - 1;
253 return sc;
256 static struct segment_command_64 * get_segment(struct macho *mo, int i)
258 return (struct segment_command_64 *) (mo->lc[mo->seg2lc[i]]);
261 static int add_section(struct macho *mo, struct segment_command_64 **_seg, char *name)
263 struct segment_command_64 *seg = *_seg;
264 int ret = seg->nsects;
265 struct section_64 *sec;
266 seg->nsects++;
267 seg->cmdsize += sizeof(*sec);
268 seg = tcc_realloc(seg, sizeof(*seg) + seg->nsects * sizeof(*sec));
269 sec = (struct section_64*)((char*)seg + sizeof(*seg)) + ret;
270 memset(sec, 0, sizeof(*sec));
271 strncpy(sec->sectname, name, 16);
272 strncpy(sec->segname, seg->segname, 16);
273 *_seg = seg;
274 return ret;
277 static struct section_64 *get_section(struct segment_command_64 *seg, int i)
279 return (struct section_64*)((char*)seg + sizeof(*seg)) + i;
282 static void * add_dylib(struct macho *mo, char *name)
284 struct dylib_command *lc;
285 int sz = (sizeof(*lc) + strlen(name) + 1 + 7) & -8;
286 lc = add_lc(mo, LC_LOAD_DYLIB, sz);
287 lc->name = sizeof(*lc);
288 strcpy((char*)lc + lc->name, name);
289 lc->timestamp = 2;
290 lc->current_version = 1 << 16;
291 lc->compatibility_version = 1 << 16;
292 return lc;
295 static void check_relocs(TCCState *s1, struct macho *mo)
297 Section *s;
298 ElfW_Rel *rel;
299 ElfW(Sym) *sym;
300 int i, type, gotplt_entry, sym_index, for_code;
301 struct sym_attr *attr;
303 s1->got = new_section(s1, ".got", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE);
304 mo->indirsyms = new_section(s1, "LEINDIR", SHT_LINKEDIT, SHF_ALLOC | SHF_WRITE);
305 for (i = 1; i < s1->nb_sections; i++) {
306 s = s1->sections[i];
307 if (s->sh_type != SHT_RELX)
308 continue;
309 for_each_elem(s, 0, rel, ElfW_Rel) {
310 type = ELFW(R_TYPE)(rel->r_info);
311 gotplt_entry = gotplt_entry_type(type);
312 for_code = code_reloc(type);
313 /* We generate a non-lazy pointer for used undefined symbols
314 and for defined symbols that must have a place for their
315 address due to codegen (i.e. a reloc requiring a got slot). */
316 sym_index = ELFW(R_SYM)(rel->r_info);
317 sym = &((ElfW(Sym) *)symtab_section->data)[sym_index];
318 if (sym->st_shndx == SHN_UNDEF
319 || gotplt_entry == ALWAYS_GOTPLT_ENTRY) {
320 attr = get_sym_attr(s1, sym_index, 1);
321 if (!attr->dyn_index) {
322 uint32_t *pi = section_ptr_add(mo->indirsyms, sizeof(*pi));
323 attr->got_offset = s1->got->data_offset;
324 attr->plt_offset = -1;
325 attr->dyn_index = 1; /* used as flag */
326 section_ptr_add(s1->got, PTR_SIZE);
327 if (ELFW(ST_BIND)(sym->st_info) == STB_LOCAL) {
328 if (sym->st_shndx == SHN_UNDEF)
329 tcc_error("undefined local symbol???");
330 *pi = INDIRECT_SYMBOL_LOCAL;
331 /* The pointer slot we generated must point to the
332 symbol, whose address is only known after layout,
333 so register a simple relocation for that. */
334 put_elf_reloc(s1->symtab, s1->got, attr->got_offset,
335 R_DATA_PTR, sym_index);
336 } else
337 *pi = mo->e2msym[sym_index];
339 if (for_code) {
340 if (attr->plt_offset == -1) {
341 uint8_t *jmp;
342 attr->plt_offset = mo->stubs->data_offset;
343 jmp = section_ptr_add(mo->stubs, 6);
344 jmp[0] = 0xff; /* jmpq *ofs(%rip) */
345 jmp[1] = 0x25;
346 put_elf_reloc(s1->symtab, mo->stubs,
347 attr->plt_offset + 2,
348 R_X86_64_GOTPCREL, sym_index);
350 rel->r_info = ELFW(R_INFO)(mo->stubsym, type);
351 rel->r_addend += attr->plt_offset;
358 static int check_symbols(TCCState *s1, struct macho *mo)
360 int sym_index, sym_end;
361 int ret = 0;
363 mo->ilocal = mo->iextdef = mo->iundef = -1;
364 sym_end = symtab_section->data_offset / sizeof(ElfW(Sym));
365 for (sym_index = 1; sym_index < sym_end; ++sym_index) {
366 int elf_index = ((struct nlist_64 *)mo->symtab->data + sym_index - 1)->n_value;
367 ElfW(Sym) *sym = (ElfW(Sym) *)symtab_section->data + elf_index;
368 const char *name = (char*)symtab_section->link->data + sym->st_name;
369 unsigned type = ELFW(ST_TYPE)(sym->st_info);
370 unsigned bind = ELFW(ST_BIND)(sym->st_info);
371 unsigned vis = ELFW(ST_VISIBILITY)(sym->st_other);
373 dprintf("%4d (%4d): %09llx %4d %4d %4d %3d %s\n",
374 sym_index, elf_index, sym->st_value,
375 type, bind, vis, sym->st_shndx, name);
376 if (bind == STB_LOCAL) {
377 if (mo->ilocal == -1)
378 mo->ilocal = sym_index - 1;
379 if (mo->iextdef != -1 || mo->iundef != -1)
380 tcc_error("local syms after global ones");
381 } else if (sym->st_shndx != SHN_UNDEF) {
382 if (mo->iextdef == -1)
383 mo->iextdef = sym_index - 1;
384 if (mo->iundef != -1)
385 tcc_error("external defined symbol after undefined");
386 } else if (sym->st_shndx == SHN_UNDEF) {
387 if (mo->iundef == -1)
388 mo->iundef = sym_index - 1;
389 if (ELFW(ST_BIND)(sym->st_info) == STB_WEAK
390 || find_elf_sym(s1->dynsymtab_section, name)) {
391 /* Mark the symbol as coming from a dylib so that
392 relocate_syms doesn't complain. Normally bind_exe_dynsyms
393 would do this check, and place the symbol into dynsym
394 which is checked by relocate_syms. But Mach-O doesn't use
395 bind_exe_dynsyms. */
396 sym->st_shndx = SHN_FROMDLL;
397 continue;
399 tcc_error_noabort("undefined symbol '%s'", name);
400 ret = -1;
403 return ret;
406 static void convert_symbol(TCCState *s1, struct macho *mo, struct nlist_64 *pn)
408 struct nlist_64 n = *pn;
409 ElfSym *sym = (ElfW(Sym) *)symtab_section->data + pn->n_value;
410 const char *name = (char*)symtab_section->link->data + sym->st_name;
411 switch(ELFW(ST_TYPE)(sym->st_info)) {
412 case STT_NOTYPE:
413 case STT_OBJECT:
414 case STT_FUNC:
415 case STT_SECTION:
416 n.n_type = N_SECT;
417 break;
418 case STT_FILE:
419 n.n_type = N_ABS;
420 break;
421 default:
422 tcc_error("unhandled ELF symbol type %d %s",
423 ELFW(ST_TYPE)(sym->st_info), name);
425 if (sym->st_shndx == SHN_UNDEF)
426 tcc_error("should have been rewritten to SHN_FROMDLL: %s", name);
427 else if (sym->st_shndx == SHN_FROMDLL)
428 n.n_type = N_UNDF, n.n_sect = 0;
429 else if (sym->st_shndx == SHN_ABS)
430 n.n_type = N_ABS, n.n_sect = 0;
431 else if (sym->st_shndx >= SHN_LORESERVE)
432 tcc_error("unhandled ELF symbol section %d %s", sym->st_shndx, name);
433 else if (!mo->elfsectomacho[sym->st_shndx])
434 tcc_error("ELF section %d not mapped into Mach-O for symbol %s",
435 sym->st_shndx, name);
436 else
437 n.n_sect = mo->elfsectomacho[sym->st_shndx];
438 if (ELFW(ST_BIND)(sym->st_info) == STB_GLOBAL)
439 n.n_type |= N_EXT;
440 else if (ELFW(ST_BIND)(sym->st_info) == STB_WEAK)
441 n.n_desc |= N_WEAK_REF | (n.n_type != N_UNDF ? N_WEAK_DEF : 0);
442 n.n_strx = pn->n_strx;
443 n.n_value = sym->st_value;
444 *pn = n;
447 static void convert_symbols(TCCState *s1, struct macho *mo)
449 struct nlist_64 *pn;
450 for_each_elem(mo->symtab, 0, pn, struct nlist_64)
451 convert_symbol(s1, mo, pn);
454 static int machosymcmp(const void *_a, const void *_b)
456 TCCState *s1 = tcc_state;
457 int ea = ((struct nlist_64 *)_a)->n_value;
458 int eb = ((struct nlist_64 *)_b)->n_value;
459 ElfSym *sa = (ElfSym *)symtab_section->data + ea;
460 ElfSym *sb = (ElfSym *)symtab_section->data + eb;
461 int r;
462 /* locals, then defined externals, then undefined externals, the
463 last two sections also by name, otherwise stable sort */
464 r = (ELFW(ST_BIND)(sb->st_info) == STB_LOCAL)
465 - (ELFW(ST_BIND)(sa->st_info) == STB_LOCAL);
466 if (r)
467 return r;
468 r = (sa->st_shndx == SHN_UNDEF) - (sb->st_shndx == SHN_UNDEF);
469 if (r)
470 return r;
471 if (ELFW(ST_BIND)(sa->st_info) != STB_LOCAL) {
472 const char * na = (char*)symtab_section->link->data + sa->st_name;
473 const char * nb = (char*)symtab_section->link->data + sb->st_name;
474 r = strcmp(na, nb);
475 if (r)
476 return r;
478 return ea - eb;
481 static void create_symtab(TCCState *s1, struct macho *mo)
483 int sym_index, sym_end;
484 struct nlist_64 *pn;
486 /* Stub creation belongs to check_relocs, but we need to create
487 the symbol now, so its included in the sorting. */
488 mo->stubs = new_section(s1, "__stubs", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR);
489 mo->stubsym = put_elf_sym(s1->symtab, 0, 0,
490 ELFW(ST_INFO)(STB_LOCAL, STT_SECTION), 0,
491 mo->stubs->sh_num, ".__stubs");
493 mo->symtab = new_section(s1, "LESYMTAB", SHT_LINKEDIT, SHF_ALLOC | SHF_WRITE);
494 mo->strtab = new_section(s1, "LESTRTAB", SHT_LINKEDIT, SHF_ALLOC | SHF_WRITE);
495 put_elf_str(mo->strtab, " "); /* Mach-O starts strtab with a space */
496 sym_end = symtab_section->data_offset / sizeof(ElfW(Sym));
497 pn = section_ptr_add(mo->symtab, sizeof(*pn) * (sym_end - 1));
498 for (sym_index = 1; sym_index < sym_end; ++sym_index) {
499 ElfW(Sym) *sym = (ElfW(Sym) *)symtab_section->data + sym_index;
500 const char *name = (char*)symtab_section->link->data + sym->st_name;
501 pn[sym_index - 1].n_strx = put_elf_str(mo->strtab, name);
502 pn[sym_index - 1].n_value = sym_index;
504 tcc_enter_state(s1); /* qsort needs global state */
505 qsort(pn, sym_end - 1, sizeof(*pn), machosymcmp);
506 tcc_exit_state();
507 mo->e2msym = tcc_malloc(sym_end * sizeof(*mo->e2msym));
508 mo->e2msym[0] = -1;
509 for (sym_index = 1; sym_index < sym_end; ++sym_index) {
510 mo->e2msym[pn[sym_index - 1].n_value] = sym_index - 1;
514 struct {
515 int seg;
516 uint32_t flags;
517 char *name;
518 } skinfo[sk_last] = {
519 [sk_text] = { 1, S_REGULAR | S_ATTR_PURE_INSTRUCTIONS
520 | S_ATTR_SOME_INSTRUCTIONS, "__text" },
521 [sk_ro_data] = { 1, S_REGULAR, "__rodata" },
522 [sk_nl_ptr] = { 2, S_NON_LAZY_SYMBOL_POINTERS, "__got" },
523 [sk_init] = { 2, S_MOD_INIT_FUNC_POINTERS, "__mod_init_func" },
524 [sk_fini] = { 2, S_MOD_TERM_FUNC_POINTERS, "__mod_term_func" },
525 [sk_rw_data] = { 2, S_REGULAR, "__data" },
526 [sk_bss] = { 2, S_ZEROFILL, "__bss" },
527 [sk_linkedit] = { 3, S_REGULAR, NULL },
530 static void collect_sections(TCCState *s1, struct macho *mo)
532 int i, sk, numsec;
533 uint64_t curaddr, fileofs;
534 Section *s;
535 struct segment_command_64 *seg = NULL;
536 struct dylinker_command *dyldlc;
537 struct symtab_command *symlc;
538 struct dysymtab_command *dysymlc;
539 char *str;
541 seg = add_segment(mo, "__PAGEZERO");
542 seg->vmsize = (uint64_t)1 << 32;
544 seg = add_segment(mo, "__TEXT");
545 seg->vmaddr = (uint64_t)1 << 32;
546 seg->maxprot = 7; // rwx
547 seg->initprot = 5; // r-x
549 seg = add_segment(mo, "__DATA");
550 seg->vmaddr = -1;
551 seg->maxprot = 7; // rwx
552 seg->initprot = 3; // rw-
554 seg = add_segment(mo, "__LINKEDIT");
555 seg->vmaddr = -1;
556 seg->maxprot = 7; // rwx
557 seg->initprot = 1; // r--
559 mo->ep = add_lc(mo, LC_MAIN, sizeof(*mo->ep));
560 mo->ep->entryoff = 4096;
562 i = (sizeof(*dyldlc) + strlen("/usr/lib/dyld") + 1 + 7) &-8;
563 dyldlc = add_lc(mo, LC_LOAD_DYLINKER, i);
564 dyldlc->name = sizeof(*dyldlc);
565 str = (char*)dyldlc + dyldlc->name;
566 strcpy(str, "/usr/lib/dyld");
568 symlc = add_lc(mo, LC_SYMTAB, sizeof(*symlc));
569 dysymlc = add_lc(mo, LC_DYSYMTAB, sizeof(*dysymlc));
571 for(i = 0; i < s1->nb_loaded_dlls; i++) {
572 DLLReference *dllref = s1->loaded_dlls[i];
573 if (dllref->level == 0)
574 add_dylib(mo, dllref->name);
577 /* dyld requires a writable segment with classic Mach-O, but it ignores
578 zero-sized segments for this, so force to have some data. */
579 section_ptr_add(data_section, 1);
580 memset (mo->sk_to_sect, 0, sizeof(mo->sk_to_sect));
581 for (i = s1->nb_sections; i-- > 1;) {
582 int type, flags;
583 s = s1->sections[i];
584 type = s->sh_type;
585 flags = s->sh_flags;
586 sk = sk_unknown;
587 if (flags & SHF_ALLOC) {
588 switch (type) {
589 default: sk = sk_unknown; break;
590 case SHT_INIT_ARRAY: sk = sk_init; break;
591 case SHT_FINI_ARRAY: sk = sk_fini; break;
592 case SHT_NOBITS: sk = sk_bss; break;
593 case SHT_SYMTAB: sk = sk_discard; break;
594 case SHT_STRTAB: sk = s == stabstr_section ? sk_ro_data : sk_discard; break;
595 case SHT_RELX: sk = sk_discard; break;
596 case SHT_LINKEDIT: sk = sk_linkedit; break;
597 case SHT_PROGBITS:
598 if (s == s1->got)
599 sk = sk_nl_ptr;
600 else if (flags & SHF_EXECINSTR)
601 sk = sk_text;
602 else if (flags & SHF_WRITE)
603 sk = sk_rw_data;
604 else
605 sk = sk_ro_data;
606 break;
608 } else
609 sk = sk_discard;
610 s->prev = mo->sk_to_sect[sk].s;
611 mo->sk_to_sect[sk].s = s;
613 fileofs = 4096; /* leave space for mach-o headers */
614 curaddr = get_segment(mo, 1)->vmaddr;
615 curaddr += 4096;
616 seg = NULL;
617 numsec = 0;
618 mo->elfsectomacho = tcc_mallocz(sizeof(*mo->elfsectomacho) * s1->nb_sections);
619 for (sk = sk_unknown; sk < sk_last; sk++) {
620 struct section_64 *sec = NULL;
621 if (seg) {
622 seg->vmsize = curaddr - seg->vmaddr;
623 seg->filesize = fileofs - seg->fileoff;
625 if (skinfo[sk].seg && mo->sk_to_sect[sk].s) {
626 uint64_t al = 0;
627 int si;
628 seg = get_segment(mo, skinfo[sk].seg);
629 if (skinfo[sk].name) {
630 si = add_section(mo, &seg, skinfo[sk].name);
631 numsec++;
632 mo->lc[mo->seg2lc[skinfo[sk].seg]] = (struct load_command*)seg;
633 mo->sk_to_sect[sk].machosect = si;
634 sec = get_section(seg, si);
635 sec->flags = skinfo[sk].flags;
637 if (seg->vmaddr == -1) {
638 curaddr = (curaddr + 4095) & -4096;
639 seg->vmaddr = curaddr;
640 fileofs = (fileofs + 4095) & -4096;
641 seg->fileoff = fileofs;
644 for (s = mo->sk_to_sect[sk].s; s; s = s->prev) {
645 int a = exact_log2p1(s->sh_addralign);
646 if (a && al < (a - 1))
647 al = a - 1;
648 s->sh_size = s->data_offset;
650 if (sec)
651 sec->align = al;
652 al = 1U << al;
653 if (al > 4096)
654 tcc_warning("alignment > 4096"), sec->align = 12, al = 4096;
655 curaddr = (curaddr + al - 1) & -al;
656 fileofs = (fileofs + al - 1) & -al;
657 if (sec) {
658 sec->addr = curaddr;
659 sec->offset = fileofs;
661 for (s = mo->sk_to_sect[sk].s; s; s = s->prev) {
662 al = s->sh_addralign;
663 curaddr = (curaddr + al - 1) & -al;
664 dprintf("curaddr now 0x%llx\n", curaddr);
665 s->sh_addr = curaddr;
666 curaddr += s->sh_size;
667 if (s->sh_type != SHT_NOBITS) {
668 fileofs = (fileofs + al - 1) & -al;
669 s->sh_offset = fileofs;
670 fileofs += s->sh_size;
671 dprintf("fileofs now %lld\n", fileofs);
673 if (sec)
674 mo->elfsectomacho[s->sh_num] = numsec;
676 if (sec)
677 sec->size = curaddr - sec->addr;
679 if (DEBUG_MACHO)
680 for (s = mo->sk_to_sect[sk].s; s; s = s->prev) {
681 int type = s->sh_type;
682 int flags = s->sh_flags;
683 printf("%d section %-16s %-10s %09llx %04x %02d %s,%s,%s\n",
685 s->name,
686 type == SHT_PROGBITS ? "progbits" :
687 type == SHT_NOBITS ? "nobits" :
688 type == SHT_SYMTAB ? "symtab" :
689 type == SHT_STRTAB ? "strtab" :
690 type == SHT_INIT_ARRAY ? "init" :
691 type == SHT_FINI_ARRAY ? "fini" :
692 type == SHT_RELX ? "rel" : "???",
693 s->sh_addr,
694 (unsigned)s->data_offset,
695 s->sh_addralign,
696 flags & SHF_ALLOC ? "alloc" : "",
697 flags & SHF_WRITE ? "write" : "",
698 flags & SHF_EXECINSTR ? "exec" : ""
702 if (seg) {
703 seg->vmsize = curaddr - seg->vmaddr;
704 seg->filesize = fileofs - seg->fileoff;
707 /* Fill symtab info */
708 symlc->symoff = mo->symtab->sh_offset;
709 symlc->nsyms = mo->symtab->data_offset / sizeof(struct nlist_64);
710 symlc->stroff = mo->strtab->sh_offset;
711 symlc->strsize = mo->strtab->data_offset;
713 dysymlc->iundefsym = mo->iundef == -1 ? symlc->nsyms : mo->iundef;
714 dysymlc->iextdefsym = mo->iextdef == -1 ? dysymlc->iundefsym : mo->iextdef;
715 dysymlc->ilocalsym = mo->ilocal == -1 ? dysymlc->iextdefsym : mo->ilocal;
716 dysymlc->nlocalsym = dysymlc->iextdefsym - dysymlc->ilocalsym;
717 dysymlc->nextdefsym = dysymlc->iundefsym - dysymlc->iextdefsym;
718 dysymlc->nundefsym = symlc->nsyms - dysymlc->iundefsym;
719 dysymlc->indirectsymoff = mo->indirsyms->sh_offset;
720 dysymlc->nindirectsyms = mo->indirsyms->data_offset / sizeof(uint32_t);
723 static void macho_write(TCCState *s1, struct macho *mo, FILE *fp)
725 int i, sk;
726 uint64_t fileofs = 0;
727 Section *s;
728 mo->mh.mh.magic = MH_MAGIC_64;
729 mo->mh.mh.cputype = 0x1000007; // x86_64
730 mo->mh.mh.cpusubtype = 0x80000003;// all | CPU_SUBTYPE_LIB64
731 mo->mh.mh.filetype = 2; // MH_EXECUTE
732 mo->mh.mh.flags = 4; // DYLDLINK
733 mo->mh.mh.ncmds = mo->nlc;
734 mo->mh.mh.sizeofcmds = 0;
735 for (i = 0; i < mo->nlc; i++)
736 mo->mh.mh.sizeofcmds += mo->lc[i]->cmdsize;
738 fwrite(&mo->mh, 1, sizeof(mo->mh), fp);
739 fileofs += sizeof(mo->mh);
740 for (i = 0; i < mo->nlc; i++) {
741 fwrite(mo->lc[i], 1, mo->lc[i]->cmdsize, fp);
742 fileofs += mo->lc[i]->cmdsize;
745 for (sk = sk_unknown; sk < sk_last; sk++) {
746 struct segment_command_64 *seg;
747 if (!skinfo[sk].seg || !mo->sk_to_sect[sk].s)
748 continue;
749 seg = get_segment(mo, skinfo[sk].seg);
750 for (s = mo->sk_to_sect[sk].s; s; s = s->prev) {
751 if (s->sh_type != SHT_NOBITS) {
752 while (fileofs < s->sh_offset)
753 fputc(0, fp), fileofs++;
754 if (s->sh_size) {
755 fwrite(s->data, 1, s->sh_size, fp);
756 fileofs += s->sh_size;
763 ST_FUNC int macho_output_file(TCCState *s1, const char *filename)
765 int fd, mode, file_type;
766 FILE *fp;
767 int i, ret = -1;
768 struct macho mo = {};
770 file_type = s1->output_type;
771 if (file_type == TCC_OUTPUT_OBJ)
772 mode = 0666;
773 else
774 mode = 0777;
775 unlink(filename);
776 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, mode);
777 if (fd < 0) {
778 tcc_error_noabort("could not write '%s: %s'", filename, strerror(errno));
779 return -1;
781 fp = fdopen(fd, "wb");
782 if (s1->verbose)
783 printf("<- %s\n", filename);
785 tcc_add_runtime(s1);
786 resolve_common_syms(s1);
787 create_symtab(s1, &mo);
788 check_relocs(s1, &mo);
789 ret = check_symbols(s1, &mo);
790 if (!ret) {
791 int i;
792 Section *s;
793 collect_sections(s1, &mo);
794 relocate_syms(s1, s1->symtab, 0);
795 mo.ep->entryoff = get_sym_addr(s1, "_main", 1, 0)
796 - get_segment(&mo, 1)->vmaddr;
797 if (s1->nb_errors)
798 goto do_ret;
800 for(i = 1; i < s1->nb_sections; i++) {
801 s = s1->sections[i];
802 if (s->reloc)
803 relocate_section(s1, s);
805 convert_symbols(s1, &mo);
807 macho_write(s1, &mo, fp);
809 ret = 0;
811 do_ret:
812 for (i = 0; i < mo.nlc; i++)
813 tcc_free(mo.lc[i]);
814 tcc_free(mo.lc);
815 tcc_free(mo.elfsectomacho);
816 tcc_free(mo.e2msym);
818 fclose(fp);
819 return ret;
822 static uint32_t swap32(uint32_t x)
824 return (x >> 24) | (x << 24) | ((x >> 8) & 0xff00) | ((x & 0xff00) << 8);
826 #define SWAP(x) (swap ? swap32(x) : (x))
828 ST_FUNC int macho_load_dll(TCCState *s1, int fd, const char *filename, int lev)
830 unsigned char buf[sizeof(struct mach_header_64)];
831 void *buf2;
832 uint32_t machofs = 0;
833 struct fat_header fh;
834 struct mach_header mh;
835 struct load_command *lc;
836 int i, swap = 0;
837 const char *soname = filename;
838 struct nlist_64 *symtab = 0;
839 uint32_t nsyms = 0;
840 char *strtab = 0;
841 uint32_t strsize = 0;
842 uint32_t iextdef = 0;
843 uint32_t nextdef = 0;
844 DLLReference *dllref;
846 again:
847 if (full_read(fd, buf, sizeof(buf)) != sizeof(buf))
848 return -1;
849 memcpy(&fh, buf, sizeof(fh));
850 if (fh.magic == FAT_MAGIC || fh.magic == FAT_CIGAM) {
851 struct fat_arch *fa = load_data(fd, sizeof(fh),
852 fh.nfat_arch * sizeof(*fa));
853 swap = fh.magic == FAT_CIGAM;
854 for (i = 0; i < SWAP(fh.nfat_arch); i++)
855 if (SWAP(fa[i].cputype) == 0x01000007 /* CPU_TYPE_X86_64 */
856 && SWAP(fa[i].cpusubtype) == 3) /* CPU_SUBTYPE_X86_ALL */
857 break;
858 if (i == SWAP(fh.nfat_arch)) {
859 tcc_free(fa);
860 return -1;
862 machofs = SWAP(fa[i].offset);
863 tcc_free(fa);
864 lseek(fd, machofs, SEEK_SET);
865 goto again;
866 } else if (fh.magic == FAT_MAGIC_64 || fh.magic == FAT_CIGAM_64) {
867 tcc_warning("%s: Mach-O fat 64bit files of type 0x%x not handled",
868 filename, fh.magic);
869 return -1;
872 memcpy(&mh, buf, sizeof(mh));
873 if (mh.magic != MH_MAGIC_64)
874 return -1;
875 dprintf("found Mach-O at %d\n", machofs);
876 buf2 = load_data(fd, machofs + sizeof(struct mach_header_64), mh.sizeofcmds);
877 for (i = 0, lc = buf2; i < mh.ncmds; i++) {
878 dprintf("lc %2d: 0x%08x\n", i, lc->cmd);
879 switch (lc->cmd) {
880 case LC_SYMTAB:
882 struct symtab_command *sc = (struct symtab_command*)lc;
883 nsyms = sc->nsyms;
884 symtab = load_data(fd, machofs + sc->symoff, nsyms * sizeof(*symtab));
885 strsize = sc->strsize;
886 strtab = load_data(fd, machofs + sc->stroff, strsize);
887 break;
889 case LC_ID_DYLIB:
891 struct dylib_command *dc = (struct dylib_command*)lc;
892 soname = (char*)lc + dc->name;
893 dprintf(" ID_DYLIB %d 0x%x 0x%x %s\n",
894 dc->timestamp, dc->current_version,
895 dc->compatibility_version, soname);
896 break;
898 case LC_REEXPORT_DYLIB:
900 struct dylib_command *dc = (struct dylib_command*)lc;
901 char *name = (char*)lc + dc->name;
902 dprintf(" REEXPORT %s\n", name);
903 int subfd = open(name, O_RDONLY | O_BINARY);
904 if (subfd < 0)
905 tcc_warning("can't open %s (reexported from %s)", name, filename);
906 else {
907 /* Hopefully the REEXPORTs never form a cycle, we don't check
908 for that! */
909 macho_load_dll(s1, subfd, name, lev + 1);
910 close(subfd);
912 break;
914 case LC_DYSYMTAB:
916 struct dysymtab_command *dc = (struct dysymtab_command*)lc;
917 iextdef = dc->iextdefsym;
918 nextdef = dc->nextdefsym;
919 break;
922 lc = (struct load_command*) ((char*)lc + lc->cmdsize);
925 /* if the dll is already loaded, do not load it */
926 for(i = 0; i < s1->nb_loaded_dlls; i++) {
927 dllref = s1->loaded_dlls[i];
928 if (!strcmp(soname, dllref->name)) {
929 /* but update level if needed */
930 if (lev < dllref->level)
931 dllref->level = lev;
932 goto the_end;
935 dllref = tcc_mallocz(sizeof(DLLReference) + strlen(soname));
936 dllref->level = lev;
937 strcpy(dllref->name, soname);
938 dynarray_add(&s1->loaded_dlls, &s1->nb_loaded_dlls, dllref);
940 if (!nsyms || !nextdef)
941 tcc_warning("%s doesn't export any symbols?", filename);
943 //dprintf("symbols (all):\n");
944 dprintf("symbols (exported):\n");
945 dprintf(" n: typ sec desc value name\n");
946 //for (i = 0; i < nsyms; i++) {
947 for (i = iextdef; i < iextdef + nextdef; i++) {
948 struct nlist_64 *sym = symtab + i;
949 dprintf("%5d: %3d %3d 0x%04x 0x%016llx %s\n",
950 i, sym->n_type, sym->n_sect, sym->n_desc, sym->n_value,
951 strtab + sym->n_strx);
952 set_elf_sym(s1->dynsymtab_section, 0, 0,
953 ELFW(ST_INFO)(STB_GLOBAL, STT_NOTYPE),
954 0, SHN_UNDEF, strtab + sym->n_strx);
957 the_end:
958 tcc_free(strtab);
959 tcc_free(symtab);
960 tcc_free(buf2);
961 return 0;