Work on the stage2 loader:
[newos.git] / boot / i386 / smp_boot.c
blobf34fb60a4c866e641d9fcf6e3f8bba7f393f4277
1 /*
2 ** Copyright 2001, Travis Geiselbrecht. All rights reserved.
3 ** Distributed under the terms of the NewOS License.
4 */
5 #include <boot/stage2.h>
6 #include "stage2_priv.h"
8 #include <libc/string.h>
9 #include <libc/printf.h>
11 #define NO_SMP 0
12 #define CHATTY_SMP 0
14 static unsigned int mp_mem_phys = 0;
15 static unsigned int mp_mem_virt = 0;
16 static struct mp_flt_struct *mp_flt_ptr = NULL;
17 static kernel_args *saved_ka = NULL;
18 static unsigned int kernel_entry_point = 0;
20 void smp_trampoline();
21 void smp_trampoline_end();
22 int smp_get_current_cpu(kernel_args *ka);
24 static unsigned int map_page(kernel_args *ka, unsigned int paddr, unsigned int vaddr)
26 unsigned int *pentry;
27 unsigned int *pgdir = (unsigned int *)(ka->arch_args.page_hole + (4*1024*1024-PAGE_SIZE));
29 // check to see if a page table exists for this range
30 if(pgdir[vaddr / PAGE_SIZE / 1024] == 0) {
31 unsigned int pgtable;
32 // we need to allocate a pgtable
33 pgtable = ka->phys_alloc_range[0].start + ka->phys_alloc_range[0].size;
34 ka->phys_alloc_range[0].size += PAGE_SIZE;
35 ka->arch_args.pgtables[ka->arch_args.num_pgtables++] = pgtable;
37 // put it in the pgdir
38 pgdir[vaddr / PAGE_SIZE / 1024] = (pgtable & ADDR_MASK) | DEFAULT_PAGE_FLAGS;
40 // zero it out in it's new mapping
41 memset((unsigned int *)((unsigned int *)ka->arch_args.page_hole + (vaddr / PAGE_SIZE / 1024) * PAGE_SIZE), 0, PAGE_SIZE);
43 // now, fill in the pentry
44 pentry = (unsigned int *)((unsigned int *)ka->arch_args.page_hole + vaddr / PAGE_SIZE);
46 *pentry = (paddr & ADDR_MASK) | DEFAULT_PAGE_FLAGS;
48 asm volatile("invlpg (%0)" : : "r" (vaddr));
50 return 0;
53 static unsigned int apic_read(unsigned int *addr)
55 return *addr;
58 static void apic_write(unsigned int *addr, unsigned int data)
60 *addr = data;
64 static void *mp_virt_to_phys(void *ptr)
66 return ((void *)(((unsigned int)ptr - mp_mem_virt) + mp_mem_phys));
69 static void *mp_phys_to_virt(void *ptr)
71 return ((void *)(((unsigned int)ptr - mp_mem_phys) + mp_mem_virt));
74 static unsigned int *smp_probe(unsigned int base, unsigned int limit)
76 unsigned int *ptr;
78 // dprintf("smp_probe: entry base 0x%x, limit 0x%x\n", base, limit);
80 for (ptr = (unsigned int *) base; (unsigned int) ptr < limit; ptr++) {
81 if (*ptr == MP_FLT_SIGNATURE) {
82 // dprintf("smp_probe: found floating pointer structure at 0x%x\n", ptr);
83 return ptr;
86 return NULL;
89 static void smp_do_config(kernel_args *ka)
91 char *ptr;
92 int i;
93 struct mp_config_table *mpc;
94 struct mp_ext_pe *pe;
95 struct mp_ext_ioapic *io;
96 struct mp_ext_bus *bus;
97 const char *cpu_family[] = { "", "", "", "", "Intel 486",
98 "Intel Pentium", "Intel Pentium Pro", "Intel Pentium II" };
101 * we are not running in standard configuration, so we have to look through
102 * all of the mp configuration table crap to figure out how many processors
103 * we have, where our apics are, etc.
105 ka->num_cpus = 0;
107 mpc = mp_phys_to_virt(mp_flt_ptr->mpc);
109 /* print out our new found configuration. */
110 ptr = (char *) &(mpc->oem[0]);
111 #if CHATTY_SMP
112 dprintf ("smp: oem id: %c%c%c%c%c%c%c%c product id: "
113 "%c%c%c%c%c%c%c%c%c%c%c%c\n", ptr[0], ptr[1], ptr[2], ptr[3], ptr[4],
114 ptr[5], ptr[6], ptr[7], ptr[8], ptr[9], ptr[10], ptr[11], ptr[12],
115 ptr[13], ptr[14], ptr[15], ptr[16], ptr[17], ptr[18], ptr[19],
116 ptr[20]);
117 dprintf("smp: base table has %d entries, extended section %d bytes\n",
118 mpc->num_entries, mpc->ext_len);
119 #endif
120 ka->arch_args.apic_phys = (unsigned int)mpc->apic;
122 ptr = (char *) ((unsigned int) mpc + sizeof (struct mp_config_table));
123 for (i = 0; i < mpc->num_entries; i++) {
124 switch (*ptr) {
125 case MP_EXT_PE:
126 pe = (struct mp_ext_pe *) ptr;
127 ka->arch_args.cpu_apic_id[ka->num_cpus] = pe->apic_id;
128 ka->arch_args.cpu_os_id[pe->apic_id] = ka->num_cpus;
129 ka->arch_args.cpu_apic_version[ka->num_cpus] = pe->apic_version;
130 #if CHATTY_SMP
131 dprintf ("smp: cpu#%d: %s, apic id %d, version %d%s\n",
132 ka->num_cpus, cpu_family[(pe->signature & 0xf00) >> 8],
133 pe->apic_id, pe->apic_version, (pe->cpu_flags & 0x2) ?
134 ", BSP" : "");
135 #endif
136 ptr += 20;
137 ka->num_cpus++;
138 break;
139 case MP_EXT_BUS:
140 bus = (struct mp_ext_bus *)ptr;
141 #if CHATTY_SMP
142 dprintf("smp: bus%d: %c%c%c%c%c%c\n", bus->bus_id,
143 bus->name[0], bus->name[1], bus->name[2], bus->name[3],
144 bus->name[4], bus->name[5]);
145 #endif
146 ptr += 8;
147 break;
148 case MP_EXT_IO_APIC:
149 io = (struct mp_ext_ioapic *) ptr;
150 ka->arch_args.ioapic_phys = (unsigned int)io->addr;
151 #if CHATTY_SMP
152 dprintf("smp: found io apic with apic id %d, version %d\n",
153 io->ioapic_id, io->ioapic_version);
154 #endif
155 ptr += 8;
156 break;
157 case MP_EXT_IO_INT:
158 ptr += 8;
159 break;
160 case MP_EXT_LOCAL_INT:
161 ptr += 8;
162 break;
165 dprintf("smp: apic @ 0x%x, i/o apic @ 0x%x, total %d processors detected\n",
166 (unsigned int)ka->arch_args.apic_phys, (unsigned int)ka->arch_args.ioapic_phys, ka->num_cpus);
168 // this BIOS looks broken, because it didn't report any cpus (VMWare)
169 if(ka->num_cpus == 0) {
170 ka->num_cpus = 1;
174 struct smp_scan_spots_struct {
175 unsigned int start;
176 unsigned int stop;
177 unsigned int len;
180 static struct smp_scan_spots_struct smp_scan_spots[] = {
181 { 0x9fc00, 0xa0000, 0xa0000 - 0x9fc00 },
182 { 0xf0000, 0x100000, 0x100000 - 0xf0000 },
183 { 0, 0, 0 }
186 static int smp_find_mp_config(kernel_args *ka)
188 int i;
190 // XXX for now, assume the memory is identity mapped by the 1st stage
191 for(i=0; smp_scan_spots[i].len > 0; i++) {
192 mp_flt_ptr = (struct mp_flt_struct *)smp_probe(smp_scan_spots[i].start,
193 smp_scan_spots[i].stop);
194 if(mp_flt_ptr != NULL)
195 break;
197 #if NO_SMP
198 if(0) {
199 #else
200 if(mp_flt_ptr != NULL) {
201 #endif
202 mp_mem_phys = smp_scan_spots[i].start;
203 mp_mem_virt = smp_scan_spots[i].start;
205 #if CHATTY_SMP
206 dprintf ("smp_boot: intel mp version %s, %s", (mp_flt_ptr->mp_rev == 1) ? "1.1" :
207 "1.4", (mp_flt_ptr->mp_feature_2 & 0x80) ?
208 "imcr and pic compatibility mode.\n" : "virtual wire compatibility mode.\n");
209 #endif
210 if (mp_flt_ptr->mpc == 0) {
211 // XXX need to implement
212 #if 1
213 ka->num_cpus = 1;
214 return 1;
215 #else
216 /* this system conforms to one of the default configurations */
217 // mp_num_def_config = mp_flt_ptr->mp_feature_1;
218 dprintf ("smp: standard configuration %d\n", mp_flt_ptr->mp_feature_1);
219 /* num_cpus = 2;
220 ka->cpu_apic_id[0] = 0;
221 ka->cpu_apic_id[1] = 1;
222 apic_phys = (unsigned int *) 0xfee00000;
223 ioapic_phys = (unsigned int *) 0xfec00000;
224 kprintf ("smp: WARNING: standard configuration code is untested");
226 #endif
227 } else {
228 smp_do_config(ka);
230 return ka->num_cpus;
231 } else {
232 ka->num_cpus = 1;
233 return 1;
237 static int smp_setup_apic(kernel_args *ka)
239 unsigned int config;
240 // dprintf("setting up the apic...");
242 /* set spurious interrupt vector to 0xff */
243 config = apic_read(APIC_SIVR) & 0xfffffc00;
244 config |= APIC_ENABLE | 0xff;
245 apic_write(APIC_SIVR, config);
246 #if 0
247 /* setup LINT0 as ExtINT */
248 config = (apic_read(APIC_LINT0) & 0xffff1c00);
249 config |= APIC_LVT_DM_ExtINT | APIC_LVT_IIPP | APIC_LVT_TM;
250 apic_write(APIC_LINT0, config);
252 /* setup LINT1 as NMI */
253 config = (apic_read(APIC_LINT1) & 0xffff1c00);
254 config |= APIC_LVT_DM_NMI | APIC_LVT_IIPP;
255 apic_write(APIC_LINT1, config);
256 #endif
258 /* setup timer */
259 config = apic_read(APIC_LVTT) & ~APIC_LVTT_MASK;
260 config |= 0xfb | APIC_LVTT_M; // vector 0xfb, timer masked
261 apic_write(APIC_LVTT, config);
263 apic_write(APIC_ICRT, 0); // zero out the clock
265 config = apic_read(APIC_TDCR) & ~0x0000000f;
266 config |= APIC_TDCR_1; // clock division by 1
267 apic_write(APIC_TDCR, config);
269 /* setup error vector to 0xfe */
270 config = (apic_read(APIC_LVT3) & 0xffffff00) | 0xfe;
271 apic_write(APIC_LVT3, config);
273 /* accept all interrupts */
274 config = apic_read(APIC_TPRI) & 0xffffff00;
275 apic_write(APIC_TPRI, config);
277 config = apic_read(APIC_SIVR);
278 apic_write(APIC_EOI, 0);
280 // dprintf("done\n");
281 return 0;
284 // target function of the trampoline code
285 // The trampoline code should have the pgdir and a gdt set up for us,
286 // along with us being on the final stack for this processor. We need
287 // to set up the local APIC and load the global idt and gdt. When we're
288 // done, we'll jump into the kernel with the cpu number as an argument.
289 static int smp_cpu_ready()
291 kernel_args *ka = saved_ka;
292 unsigned int curr_cpu = smp_get_current_cpu(ka);
293 struct gdt_idt_descr idt_descr;
294 struct gdt_idt_descr gdt_descr;
296 // dprintf("smp_cpu_ready: entry cpu %d\n", curr_cpu);
298 // Important. Make sure supervisor threads can fault on read only pages...
299 asm("movl %%eax, %%cr0" : : "a" ((1 << 31) | (1 << 16) | (1 << 5) | 1));
300 asm("cld");
301 asm("fninit");
303 smp_setup_apic(ka);
305 // Set up the final idt
306 idt_descr.a = IDT_LIMIT - 1;
307 idt_descr.b = (unsigned int *)ka->arch_args.vir_idt;
309 asm("lidt %0;"
310 : : "m" (idt_descr));
312 // Set up the final gdt
313 gdt_descr.a = GDT_LIMIT - 1;
314 gdt_descr.b = (unsigned int *)ka->arch_args.vir_gdt;
316 asm("lgdt %0;"
317 : : "m" (gdt_descr));
319 asm("pushl %0; " // push the cpu number
320 "pushl %1; " // kernel args
321 "pushl $0x0;" // dummy retval for call to main
322 "pushl %2; " // this is the start address
323 "ret; " // jump.
324 : : "r" (curr_cpu), "m" (ka), "g" (kernel_entry_point));
326 // no where to return to
327 return 0;
330 static int smp_boot_all_cpus(kernel_args *ka)
332 unsigned int trampoline_code;
333 unsigned int trampoline_stack;
334 unsigned int i;
336 // XXX assume low 1 meg is identity mapped by the 1st stage bootloader
337 // and nothing important is in 0x9e000 & 0x9f000
339 // allocate a stack and a code area for the smp trampoline
340 // (these have to be < 1M physical)
341 trampoline_code = 0x9f000; // 640kB - 4096 == 0x9f000
342 trampoline_stack = 0x9e000; // 640kB - 8192 == 0x9e000
343 map_page(ka, 0x9f000, 0x9f000);
344 map_page(ka, 0x9e000, 0x9e000);
346 // copy the trampoline code over
347 memcpy((char *)trampoline_code, &smp_trampoline,
348 (unsigned int)&smp_trampoline_end - (unsigned int)&smp_trampoline);
350 // boot the cpus
351 for(i = 1; i < ka->num_cpus; i++) {
352 unsigned int *final_stack;
353 unsigned int *final_stack_ptr;
354 unsigned int *tramp_stack_ptr;
355 unsigned int config;
356 unsigned int num_startups;
357 unsigned int j;
359 // create a final stack the trampoline code will put the ap processor on
360 ka->cpu_kstack[i].start = ka->virt_alloc_range[0].start + ka->virt_alloc_range[0].size;
361 ka->cpu_kstack[i].size = STACK_SIZE * PAGE_SIZE;
362 for(j=0; j<ka->cpu_kstack[i].size/PAGE_SIZE; j++) {
363 // map the pages in
364 map_page(ka, ka->phys_alloc_range[0].start + ka->phys_alloc_range[0].size,
365 ka->virt_alloc_range[0].start + ka->virt_alloc_range[0].size);
366 ka->phys_alloc_range[0].size += PAGE_SIZE;
367 ka->virt_alloc_range[0].size += PAGE_SIZE;
370 // set this stack up
371 final_stack = (unsigned int *)ka->cpu_kstack[i].start;
372 memset(final_stack, 0, STACK_SIZE * PAGE_SIZE);
373 final_stack_ptr = (final_stack + (STACK_SIZE * PAGE_SIZE) / sizeof(unsigned int)) - 1;
374 *final_stack_ptr = (unsigned int)&smp_cpu_ready;
375 final_stack_ptr--;
377 // set the trampoline stack up
378 tramp_stack_ptr = (unsigned int *)(trampoline_stack + PAGE_SIZE - 4);
379 // final location of the stack
380 *tramp_stack_ptr = ((unsigned int)final_stack) + STACK_SIZE * PAGE_SIZE - sizeof(unsigned int);
381 tramp_stack_ptr--;
382 // page dir
383 *tramp_stack_ptr = ka->arch_args.phys_pgdir;
384 tramp_stack_ptr--;
386 // put a gdt descriptor at the bottom of the stack
387 *((unsigned short *)trampoline_stack) = 0x18-1; // LIMIT
388 *((unsigned int *)(trampoline_stack + 2)) = trampoline_stack + 8;
389 // put the gdt at the bottom
390 memcpy(&((unsigned int *)trampoline_stack)[2], (void *)ka->arch_args.vir_gdt, 6*4);
392 /* clear apic errors */
393 if(ka->arch_args.cpu_apic_version[i] & 0xf0) {
394 apic_write(APIC_ESR, 0);
395 apic_read(APIC_ESR);
398 /* send (aka assert) INIT IPI */
399 config = (apic_read(APIC_ICR2) & 0x00ffffff) | (ka->arch_args.cpu_apic_id[i] << 24);
400 apic_write(APIC_ICR2, config); /* set target pe */
401 config = (apic_read(APIC_ICR1) & 0xfff00000) | 0x0000c500;
402 apic_write(APIC_ICR1, config);
404 // wait for pending to end
405 while((apic_read(APIC_ICR1) & 0x00001000) == 0x00001000);
407 /* deassert INIT */
408 config = (apic_read(APIC_ICR2) & 0x00ffffff) | (ka->arch_args.cpu_apic_id[i] << 24);
409 apic_write(APIC_ICR2, config);
410 config = (apic_read(APIC_ICR1) & 0xfff00000) | 0x00008500;
412 // wait for pending to end
413 while((apic_read(APIC_ICR1) & 0x00001000) == 0x00001000);
414 // dprintf("0x%x\n", apic_read(APIC_ICR1));
416 /* wait 10ms */
417 sleep(10000);
419 /* is this a local apic or an 82489dx ? */
420 num_startups = (ka->arch_args.cpu_apic_version[i] & 0xf0) ? 2 : 0;
421 for (j = 0; j < num_startups; j++) {
422 /* it's a local apic, so send STARTUP IPIs */
423 apic_write(APIC_ESR, 0);
425 /* set target pe */
426 config = (apic_read(APIC_ICR2) & 0xf0ffffff) | (ka->arch_args.cpu_apic_id[i] << 24);
427 apic_write(APIC_ICR2, config);
429 /* send the IPI */
430 config = (apic_read(APIC_ICR1) & 0xfff0f800) | APIC_DM_STARTUP |
431 (0x9f000 >> 12);
432 apic_write(APIC_ICR1, config);
434 /* wait */
435 sleep(200);
437 while((apic_read(APIC_ICR1)& 0x00001000) == 0x00001000);
441 return 0;
444 void calculate_apic_timer_conversion_factor(kernel_args *ka)
446 long long t1, t2;
447 unsigned int config;
448 unsigned int count;
450 // setup the timer
451 config = apic_read(APIC_LVTT);
452 config = (config & ~APIC_LVTT_MASK) + APIC_LVTT_M; // timer masked, vector 0
453 apic_write(APIC_LVTT, config);
455 config = (apic_read(APIC_TDCR) & ~0x0000000f) + 0xb; // divide clock by one
456 apic_write(APIC_TDCR, config);
458 t1 = system_time();
459 apic_write(APIC_ICRT, 0xffffffff); // start the counter
461 execute_n_instructions(128*20000);
463 count = apic_read(APIC_CCRT);
464 t2 = system_time();
466 count = 0xffffffff - count;
468 ka->arch_args.apic_time_cv_factor = (unsigned int)((1000000.0/(t2 - t1)) * count);
470 dprintf("APIC ticks/sec = %d\n", ka->arch_args.apic_time_cv_factor);
473 int smp_boot(kernel_args *ka, unsigned int kernel_entry)
475 // dprintf("smp_boot: entry\n");
477 kernel_entry_point = kernel_entry;
478 saved_ka = ka;
480 if(smp_find_mp_config(ka) > 1) {
481 // dprintf("smp_boot: had found > 1 cpus\n");
482 // dprintf("post config:\n");
483 // dprintf("num_cpus = 0x%p\n", ka->num_cpus);
484 // dprintf("apic_phys = 0x%p\n", ka->arch_args.apic_phys);
485 // dprintf("ioapic_phys = 0x%p\n", ka->arch_args.ioapic_phys);
487 // map in the apic & ioapic
488 map_page(ka, ka->arch_args.apic_phys, ka->virt_alloc_range[0].start + ka->virt_alloc_range[0].size);
489 ka->arch_args.apic = (unsigned int *)(ka->virt_alloc_range[0].start + ka->virt_alloc_range[0].size);
490 ka->virt_alloc_range[0].size += PAGE_SIZE;
492 map_page(ka, ka->arch_args.ioapic_phys, ka->virt_alloc_range[0].start + ka->virt_alloc_range[0].size);
493 ka->arch_args.ioapic = (unsigned int *)(ka->virt_alloc_range[0].start + ka->virt_alloc_range[0].size);
494 ka->virt_alloc_range[0].size += PAGE_SIZE;
496 // dprintf("apic = 0x%p\n", ka->arch_args.apic);
497 // dprintf("ioapic = 0x%p\n", ka->arch_args.ioapic);
499 // set up the apic
500 smp_setup_apic(ka);
502 // calculate how fast the apic timer is
503 calculate_apic_timer_conversion_factor(ka);
505 // dprintf("trampolining other cpus\n");
506 smp_boot_all_cpus(ka);
507 // dprintf("done trampolining\n");
510 // dprintf("smp_boot: exit\n");
512 return 0;
515 int smp_get_current_cpu(kernel_args *ka)
517 if(ka->arch_args.apic == NULL)
518 return 0;
519 else
520 return ka->arch_args.cpu_os_id[(apic_read(APIC_ID) & 0xffffffff) >> 24];