Working on a kernel elf loader.
[newos.git] / boot / i386 / stage2.c
blob473ae6059253b470545f492f48e288f10e8b0bf8
1 /*
2 ** Copyright 2001, Travis Geiselbrecht. All rights reserved.
3 ** Distributed under the terms of the NewOS License.
4 */
5 #include <boot/bootdir.h>
6 #include <boot/stage2.h>
7 #include "stage2_priv.h"
9 #include <libc/string.h>
10 #include <libc/stdarg.h>
11 #include <libc/printf.h>
12 #include <sys/elf32.h>
14 const unsigned kBSSSize = 0x9000;
16 // we're running out of the first 'file' contained in the bootdir, which is
17 // a set of binaries and data packed back to back, described by an array
18 // of boot_entry structures at the beginning. The load address is fixed.
19 #define BOOTDIR_ADDR 0x100000
20 const boot_entry *bootdir = (boot_entry*)BOOTDIR_ADDR;
22 // stick the kernel arguments in a pseudo-random page that will be mapped
23 // at least during the call into the kernel. The kernel should copy the
24 // data out and unmap the page.
25 kernel_args *ka = (kernel_args *)0x20000;
27 // needed for message
28 unsigned short *kScreenBase = (unsigned short*) 0xb8000;
29 unsigned screenOffset = 0;
30 unsigned int line = 0;
32 unsigned int cv_factor = 0;
34 // size of bootdir in pages
35 unsigned int bootdir_pages = 0;
37 // working pagedir and pagetable
38 unsigned int *pgdir = 0;
39 unsigned int *pgtable = 0;
41 // function decls for this module
42 void calculate_cpu_conversion_factor();
43 void load_elf_image(void *data, unsigned int *next_paddr,
44 addr_range *ar0, addr_range *ar1, unsigned int *start_addr, addr_range *dynamic_section);
45 int mmu_init(kernel_args *ka, unsigned int *next_paddr);
46 void mmu_map_page(unsigned int vaddr, unsigned int paddr);
48 // called by the stage1 bootloader.
49 // State:
50 // 32-bit
51 // mmu turned on, first 4 megs or so identity mapped
52 // stack somewhere below 1 MB
53 // supervisor mode
54 void _start(unsigned int mem, char *str)
56 unsigned int new_stack;
57 unsigned int *idt;
58 unsigned int *gdt;
59 unsigned int next_vaddr;
60 unsigned int next_paddr;
61 unsigned int nextAllocPage;
62 unsigned int kernelSize;
63 unsigned int i;
64 unsigned int kernel_entry;
66 // Important. Make sure supervisor threads can fault on read only pages...
67 asm("movl %%eax, %%cr0" : : "a" ((1 << 31) | (1 << 16) | (1 << 5) | 1));
68 asm("cld"); // Ain't nothing but a GCC thang.
69 asm("fninit"); // initialize floating point unit
71 clearscreen();
72 dprintf("stage2 bootloader entry.\n");
73 dprintf("memsize = 0x%x\n", mem);
75 // calculate the conversion factor that translates rdtsc time to real microseconds
76 calculate_cpu_conversion_factor();
77 dprintf("system_time = %d %d\n", system_time());
79 // calculate how big the bootdir is so we know where we can start grabbing pages
81 int entry;
82 for (entry = 0; entry < 64; entry++) {
83 if (bootdir[entry].be_type == BE_TYPE_NONE)
84 break;
86 bootdir_pages += bootdir[entry].be_size;
89 // nmessage("bootdir is ", bootdir_pages, " pages long\n");
92 ka->bootdir_addr.start = (unsigned long)bootdir;
93 ka->bootdir_addr.size = bootdir_pages * PAGE_SIZE;
95 next_paddr = BOOTDIR_ADDR + bootdir_pages * PAGE_SIZE;
97 mmu_init(ka, &next_paddr);
99 // load the kernel (3rd entry in the bootdir)
100 load_elf_image((void *)(bootdir[2].be_offset * PAGE_SIZE + BOOTDIR_ADDR), &next_paddr,
101 &ka->kernel_seg0_addr, &ka->kernel_seg1_addr, &kernel_entry, &ka->kernel_dynamic_section_addr);
103 if(ka->kernel_seg1_addr.size > 0)
104 next_vaddr = ROUNDUP(ka->kernel_seg1_addr.start + ka->kernel_seg1_addr.size, PAGE_SIZE);
105 else
106 next_vaddr = ROUNDUP(ka->kernel_seg0_addr.start + ka->kernel_seg0_addr.size, PAGE_SIZE);
108 // map in a kernel stack
109 ka->cpu_kstack[0].start = next_vaddr;
110 for(i=0; i<STACK_SIZE; i++) {
111 mmu_map_page(next_vaddr, next_paddr);
112 next_vaddr += PAGE_SIZE;
113 next_paddr += PAGE_SIZE;
115 ka->cpu_kstack[0].size = next_vaddr - ka->cpu_kstack[0].start;
117 dprintf("new stack at 0x%x to 0x%x\n", ka->cpu_kstack[0].start, ka->cpu_kstack[0].start + ka->cpu_kstack[0].size);
119 // set up a new idt
121 struct gdt_idt_descr idt_descr;
123 // find a new idt
124 idt = (unsigned int *)next_paddr;
125 ka->arch_args.phys_idt = (unsigned int)idt;
126 next_paddr += PAGE_SIZE;
128 // nmessage("idt at ", (unsigned int)idt, "\n");
130 // clear it out
131 for(i=0; i<IDT_LIMIT/4; i++) {
132 idt[i] = 0;
135 // map the idt into virtual space
136 mmu_map_page(next_vaddr, (unsigned int)idt);
137 ka->arch_args.vir_idt = (unsigned int)next_vaddr;
138 next_vaddr += PAGE_SIZE;
140 // load the idt
141 idt_descr.a = IDT_LIMIT - 1;
142 idt_descr.b = (unsigned int *)ka->arch_args.vir_idt;
144 asm("lidt %0;"
145 : : "m" (idt_descr));
147 // nmessage("idt at virtual address ", next_vpage, "\n");
150 // set up a new gdt
152 struct gdt_idt_descr gdt_descr;
154 // find a new gdt
155 gdt = (unsigned int *)next_paddr;
156 ka->arch_args.phys_gdt = (unsigned int)gdt;
157 next_paddr += PAGE_SIZE;
159 // nmessage("gdt at ", (unsigned int)gdt, "\n");
161 // put segment descriptors in it
162 gdt[0] = 0;
163 gdt[1] = 0;
164 gdt[2] = 0x0000ffff; // seg 0x8 -- kernel 4GB code
165 gdt[3] = 0x00cf9a00;
166 gdt[4] = 0x0000ffff; // seg 0x10 -- kernel 4GB data
167 gdt[5] = 0x00cf9200;
168 gdt[6] = 0x0000ffff; // seg 0x1b -- ring 3 4GB code
169 gdt[7] = 0x00cffa00;
170 gdt[8] = 0x0000ffff; // seg 0x23 -- ring 3 4GB data
171 gdt[9] = 0x00cff200;
172 // gdt[10] & gdt[11] will be filled later by the kernel
174 // map the gdt into virtual space
175 mmu_map_page(next_vaddr, (unsigned int)gdt);
176 ka->arch_args.vir_gdt = (unsigned int)next_vaddr;
177 next_vaddr += PAGE_SIZE;
179 // load the GDT
180 gdt_descr.a = GDT_LIMIT - 1;
181 gdt_descr.b = (unsigned int *)ka->arch_args.vir_gdt;
183 asm("lgdt %0;"
184 : : "m" (gdt_descr));
186 // nmessage("gdt at virtual address ", next_vpage, "\n");
189 // Map the pg_dir into kernel space at 0xffc00000-0xffffffff
190 // this enables a mmu trick where the 4 MB region that this pgdir entry
191 // represents now maps the 4MB of potential pagetables that the pgdir
192 // points to. Thrown away later in VM bringup, but useful for now.
193 pgdir[1023] = (unsigned int)pgdir | DEFAULT_PAGE_FLAGS;
195 // also map it on the next vpage
196 mmu_map_page(next_vaddr, (unsigned int)pgdir);
197 ka->arch_args.vir_pgdir = next_vaddr;
198 next_vaddr += PAGE_SIZE;
200 // save the kernel args
201 ka->arch_args.system_time_cv_factor = cv_factor;
202 ka->phys_mem_range[0].start = 0;
203 ka->phys_mem_range[0].size = mem;
204 ka->num_phys_mem_ranges = 1;
205 ka->str = str;
206 ka->phys_alloc_range[0].start = BOOTDIR_ADDR;
207 ka->phys_alloc_range[0].size = next_paddr - BOOTDIR_ADDR;
208 ka->num_phys_alloc_ranges = 1;
209 ka->virt_alloc_range[0].start = KERNEL_BASE;
210 ka->virt_alloc_range[0].size = next_vaddr - KERNEL_BASE;
211 ka->num_virt_alloc_ranges = 1;
212 ka->arch_args.page_hole = 0xffc00000;
213 ka->num_cpus = 1;
214 #if 0
215 dprintf("kernel args at 0x%x\n", ka);
216 dprintf("pgdir = 0x%x\n", ka->pgdir);
217 dprintf("pgtables[0] = 0x%x\n", ka->pgtables[0]);
218 dprintf("phys_idt = 0x%x\n", ka->phys_idt);
219 dprintf("vir_idt = 0x%x\n", ka->vir_idt);
220 dprintf("phys_gdt = 0x%x\n", ka->phys_gdt);
221 dprintf("vir_gdt = 0x%x\n", ka->vir_gdt);
222 dprintf("mem_size = 0x%x\n", ka->mem_size);
223 dprintf("str = 0x%x\n", ka->str);
224 dprintf("bootdir = 0x%x\n", ka->bootdir);
225 dprintf("bootdir_size = 0x%x\n", ka->bootdir_size);
226 dprintf("phys_alloc_range_low = 0x%x\n", ka->phys_alloc_range_low);
227 dprintf("phys_alloc_range_high = 0x%x\n", ka->phys_alloc_range_high);
228 dprintf("virt_alloc_range_low = 0x%x\n", ka->virt_alloc_range_low);
229 dprintf("virt_alloc_range_high = 0x%x\n", ka->virt_alloc_range_high);
230 dprintf("page_hole = 0x%x\n", ka->page_hole);
231 #endif
232 dprintf("finding and booting other cpus...\n");
233 smp_boot(ka, kernel_entry);
235 dprintf("jumping into kernel at 0x%x\n", kernel_entry);
237 ka->cons_line = line;
239 asm("movl %0, %%eax; " // move stack out of way
240 "movl %%eax, %%esp; "
241 : : "m" (ka->cpu_kstack[0].start + ka->cpu_kstack[0].size));
242 asm("pushl $0x0; " // we're the BSP cpu (0)
243 "pushl %0; " // kernel args
244 "pushl $0x0;" // dummy retval for call to main
245 "pushl %1; " // this is the start address
246 "ret; " // jump.
247 : : "g" (ka), "g" (kernel_entry));
250 void load_elf_image(void *data, unsigned int *next_paddr, addr_range *ar0, addr_range *ar1, unsigned int *start_addr, addr_range *dynamic_section)
252 struct Elf32_Ehdr *imageHeader = (struct Elf32_Ehdr*) data;
253 struct Elf32_Phdr *segments = (struct Elf32_Phdr*)(imageHeader->e_phoff + (unsigned) imageHeader);
254 int segmentIndex;
255 int foundSegmentIndex = 0;
257 ar0->size = 0;
258 ar1->size = 0;
259 dynamic_section->size = 0;
261 for (segmentIndex = 0; segmentIndex < imageHeader->e_phnum; segmentIndex++) {
262 struct Elf32_Phdr *segment = &segments[segmentIndex];
263 unsigned segmentOffset;
265 switch(segment->p_type) {
266 case PT_LOAD:
267 break;
268 case PT_DYNAMIC:
269 dynamic_section->start = segment->p_vaddr;
270 dynamic_section->size = segment->p_memsz;
271 default:
272 continue;
275 // dprintf("segment %d\n", segmentIndex);
276 // dprintf("p_vaddr 0x%x p_paddr 0x%x p_filesz 0x%x p_memsz 0x%x\n",
277 // segment->p_vaddr, segment->p_paddr, segment->p_filesz, segment->p_memsz);
279 /* Map initialized portion */
280 for (segmentOffset = 0;
281 segmentOffset < ROUNDUP(segment->p_filesz, PAGE_SIZE);
282 segmentOffset += PAGE_SIZE) {
284 mmu_map_page(segment->p_vaddr + segmentOffset, *next_paddr);
285 memcpy((void *)ROUNDOWN(segment->p_vaddr + segmentOffset, PAGE_SIZE),
286 (void *)ROUNDOWN((unsigned)data + segment->p_offset + segmentOffset, PAGE_SIZE), PAGE_SIZE);
287 (*next_paddr) += PAGE_SIZE;
290 /* Clean out the leftover part of the last page */
291 if(segment->p_filesz % PAGE_SIZE > 0) {
292 // dprintf("memsetting 0 to va 0x%x, size %d\n", (void*)((unsigned)segment->p_vaddr + segment->p_filesz), PAGE_SIZE - (segment->p_filesz % PAGE_SIZE));
293 memset((void*)((unsigned)segment->p_vaddr + segment->p_filesz), 0, PAGE_SIZE
294 - (segment->p_filesz % PAGE_SIZE));
297 /* Map uninitialized portion */
298 for (; segmentOffset < ROUNDUP(segment->p_memsz, PAGE_SIZE); segmentOffset += PAGE_SIZE) {
299 // dprintf("mapping zero page at va 0x%x\n", segment->p_vaddr + segmentOffset);
300 mmu_map_page(segment->p_vaddr + segmentOffset, *next_paddr);
301 memset((void *)(segment->p_vaddr + segmentOffset), 0, PAGE_SIZE);
302 (*next_paddr) += PAGE_SIZE;
304 switch(foundSegmentIndex) {
305 case 0:
306 ar0->start = segment->p_vaddr;
307 ar0->size = segment->p_memsz;
308 break;
309 case 1:
310 ar1->start = segment->p_vaddr;
311 ar1->size = segment->p_memsz;
312 break;
313 default:
316 foundSegmentIndex++;
318 *start_addr = imageHeader->e_entry;
321 // allocate a page directory and page table to facilitate mapping
322 // pages to the 0x80000000 - 0x80400000 region.
323 int mmu_init(kernel_args *ka, unsigned int *next_paddr)
325 unsigned int *old_pgdir;
326 int i;
328 // get the current page directory
329 asm("movl %%cr3, %%eax" : "=a" (old_pgdir));
331 // allocate a new pgdir and
332 // copy the old pgdir to the new one
333 pgdir = (unsigned int *)*next_paddr;
334 (*next_paddr) += PAGE_SIZE;
335 ka->arch_args.phys_pgdir = (unsigned int)pgdir;
336 for(i = 0; i < 512; i++)
337 pgdir[i] = old_pgdir[i];
339 // clear out the top part of the pgdir
340 for(; i < 1024; i++)
341 pgdir[i] = 0;
343 // switch to the new pgdir
344 asm("movl %0, %%eax;"
345 "movl %%eax, %%cr3;" :: "m" (pgdir) : "eax");
347 // Get new page table and clear it out
348 pgtable = (unsigned int *)*next_paddr;
349 ka->arch_args.pgtables[0] = (unsigned int)pgtable;
350 ka->arch_args.num_pgtables = 1;
351 (*next_paddr) += PAGE_SIZE;
352 for (i = 0; i < 1024; i++)
353 pgtable[i] = 0;
355 // put the new page table into the page directory
356 // this maps the kernel at KERNEL_BASE
357 pgdir[KERNEL_BASE/(4*1024*1024)] = (unsigned int)pgtable | DEFAULT_PAGE_FLAGS;
359 return 0;
362 // can only map the 4 meg region right after KERNEL_BASE, may fix this later
363 // if need arises.
364 void mmu_map_page(unsigned int vaddr, unsigned int paddr)
366 // dprintf("mmu_map_page: vaddr 0x%x, paddr 0x%x\n", vaddr, paddr);
367 if(vaddr < KERNEL_BASE || vaddr >= (KERNEL_BASE + 4096*1024)) {
368 dprintf("mmu_map_page: asked to map invalid page!\n");
369 for(;;);
371 paddr &= ~(PAGE_SIZE-1);
372 // dprintf("paddr 0x%x @ index %d\n", paddr, (vaddr % (PAGE_SIZE * 1024)) / PAGE_SIZE);
373 pgtable[(vaddr % (PAGE_SIZE * 1024)) / PAGE_SIZE] = paddr | DEFAULT_PAGE_FLAGS;
376 long long rdtsc();
377 asm("
378 rdtsc:
379 rdtsc
383 //void execute_n_instructions(int count);
384 asm("
385 .global execute_n_instructions
386 execute_n_instructions:
387 movl 4(%esp), %ecx
388 shrl $4, %ecx /* divide count by 16 */
389 .again:
390 xorl %eax, %eax
391 xorl %eax, %eax
392 xorl %eax, %eax
393 xorl %eax, %eax
394 xorl %eax, %eax
395 xorl %eax, %eax
396 xorl %eax, %eax
397 xorl %eax, %eax
398 xorl %eax, %eax
399 xorl %eax, %eax
400 xorl %eax, %eax
401 xorl %eax, %eax
402 xorl %eax, %eax
403 xorl %eax, %eax
404 xorl %eax, %eax
405 loop .again
409 void system_time_setup(long a);
410 asm("
411 system_time_setup:
412 /* First divide 1M * 2^32 by proc_clock */
413 movl $0x0F4240, %ecx
414 movl %ecx, %edx
415 subl %eax, %eax
416 movl 4(%esp), %ebx
417 divl %ebx, %eax /* should be 64 / 32 */
418 movl %eax, cv_factor
422 // long long system_time();
423 asm("
424 .global system_time
425 system_time:
426 /* load 64-bit factor into %eax (low), %edx (high) */
427 /* hand-assemble rdtsc -- read time stamp counter */
428 rdtsc /* time in %edx,%eax */
430 pushl %ebx
431 pushl %ecx
432 movl cv_factor, %ebx
433 movl %edx, %ecx /* save high half */
434 mull %ebx /* truncate %eax, but keep %edx */
435 movl %ecx, %eax
436 movl %edx, %ecx /* save high half of low */
437 mull %ebx /*, %eax*/
438 /* now compute [%edx, %eax] + [%ecx], propagating carry */
439 subl %ebx, %ebx /* need zero to propagate carry */
440 addl %ecx, %eax
441 adc %ebx, %edx
442 popl %ecx
443 popl %ebx
447 void sleep(long long time)
449 long long start = system_time();
451 while(system_time() - start <= time)
455 #define outb(value,port) \
456 asm("outb %%al,%%dx"::"a" (value),"d" (port))
459 #define inb(port) ({ \
460 unsigned char _v; \
461 asm volatile("inb %%dx,%%al":"=a" (_v):"d" (port)); \
462 _v; \
465 #define TIMER_CLKNUM_HZ 1193167
467 void calculate_cpu_conversion_factor()
469 unsigned char low, high;
470 unsigned long expired;
471 long long t1, t2;
472 long long time_base_ticks;
473 double timer_usecs;
475 /* program the timer to count down mode */
476 outb(0x34, 0x43);
478 outb(0xff, 0x40); /* low and then high */
479 outb(0xff, 0x40);
481 t1 = rdtsc();
483 execute_n_instructions(32*20000);
485 t2 = rdtsc();
487 outb(0x00, 0x43); /* latch counter value */
488 low = inb(0x40);
489 high = inb(0x40);
491 expired = (unsigned long)0xffff - ((((unsigned long)high) << 8) + low);
493 timer_usecs = (expired * 1.0) / (TIMER_CLKNUM_HZ/1000000.0);
494 time_base_ticks = t2 -t1;
496 dprintf("CPU at %d Hz\n", (int)((time_base_ticks / timer_usecs) * 1000000));
498 system_time_setup((int)((time_base_ticks / timer_usecs) * 1000000));
501 void clearscreen()
503 int i;
505 for(i=0; i< SCREEN_WIDTH*SCREEN_HEIGHT*2; i++) {
506 kScreenBase[i] = 0xf20;
510 static void scrup()
512 int i;
513 memcpy(kScreenBase, kScreenBase + SCREEN_WIDTH,
514 SCREEN_WIDTH * SCREEN_HEIGHT * 2 - SCREEN_WIDTH * 2);
515 screenOffset = (SCREEN_HEIGHT - 1) * SCREEN_WIDTH;
516 for(i=0; i<SCREEN_WIDTH; i++)
517 kScreenBase[screenOffset + i] = 0x0720;
518 line = SCREEN_HEIGHT - 1;
521 void puts(const char *str)
523 while (*str) {
524 if (*str == '\n') {
525 line++;
526 if(line > SCREEN_HEIGHT - 1)
527 scrup();
528 else
529 screenOffset += SCREEN_WIDTH - (screenOffset % 80);
530 } else {
531 kScreenBase[screenOffset++] = 0xf00 | *str;
533 if (screenOffset > SCREEN_WIDTH * SCREEN_HEIGHT)
534 scrup();
536 str++;
540 int dprintf(const char *fmt, ...)
542 int ret;
543 va_list args;
544 char temp[256];
546 va_start(args, fmt);
547 ret = vsprintf(temp,fmt,args);
548 va_end(args);
550 puts(temp);
551 return ret;