preprocessor cleanup: __xpv
[unleashed.git] / arch / x86 / kernel / platform / i86pc / dboot / dboot_startkern.c
blobcaac555fdb582586d57eb203ec6bf8deb4f4743d
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright 2013 Joyent, Inc. All rights reserved.
30 #include <sys/types.h>
31 #include <sys/machparam.h>
32 #include <sys/x86_archext.h>
33 #include <sys/systm.h>
34 #include <sys/mach_mmu.h>
35 #include <sys/multiboot.h>
36 #include <sys/multiboot2.h>
37 #include <sys/multiboot2_impl.h>
38 #include <sys/sysmacros.h>
39 #include <sys/sha1.h>
40 #include <util/string.h>
41 #include <util/strtolctype.h>
44 extern multiboot_header_t mb_header;
45 extern uint32_t mb2_load_addr;
46 extern int have_cpuid(void);
49 #include <sys/inttypes.h>
50 #include <sys/bootinfo.h>
51 #include <sys/mach_mmu.h>
52 #include <sys/boot_console.h>
54 #include "dboot_asm.h"
55 #include "dboot_printf.h"
56 #include "dboot_xboot.h"
57 #include "dboot_elfload.h"
59 #define SHA1_ASCII_LENGTH (SHA1_DIGEST_LENGTH * 2)
62 * This file contains code that runs to transition us from either a multiboot
63 * compliant loader (32 bit non-paging) or a XPV domain loader to
64 * regular kernel execution. Its task is to setup the kernel memory image
65 * and page tables.
67 * The code executes as:
68 * - 32 bits under GRUB (for 32 or 64 bit Solaris)
69 * - a 32 bit program for the 32-bit PV hypervisor
70 * - a 64 bit program for the 64-bit PV hypervisor (at least for now)
72 * Under the PV hypervisor, we must create mappings for any memory beyond the
73 * initial start of day allocation (such as the kernel itself).
75 * When on the metal, the mapping between maddr_t and paddr_t is 1:1.
76 * Since we are running in real mode, so all such memory is accessible.
80 * Standard bits used in PTE (page level) and PTP (internal levels)
82 x86pte_t ptp_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_USER;
83 x86pte_t pte_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_MOD | PT_NOCONSIST;
86 * This is the target addresses (physical) where the kernel text and data
87 * nucleus pages will be unpacked. On the hypervisor this is actually a
88 * virtual address.
90 paddr_t ktext_phys;
91 uint32_t ksize = 2 * FOUR_MEG; /* kernel nucleus is 8Meg */
93 static uint64_t target_kernel_text; /* value to use for KERNEL_TEXT */
96 * The stack is setup in assembler before entering startup_kernel()
98 char stack_space[STACK_SIZE];
101 * Used to track physical memory allocation
103 static paddr_t next_avail_addr = 0;
107 * If on the metal, then we have a multiboot loader.
109 uint32_t mb_magic; /* magic from boot loader */
110 uint32_t mb_addr; /* multiboot info package from loader */
111 int multiboot_version;
112 multiboot_info_t *mb_info;
113 multiboot2_info_header_t *mb2_info;
114 multiboot_tag_mmap_t *mb2_mmap_tagp;
115 int num_entries; /* mmap entry count */
116 boolean_t num_entries_set; /* is mmap entry count set */
117 uintptr_t load_addr;
121 * This contains information passed to the kernel
123 struct xboot_info boot_info[2]; /* extra space to fix alignement for amd64 */
124 struct xboot_info *bi;
127 * Page table and memory stuff.
129 static paddr_t max_mem; /* maximum memory address */
132 * Information about processor MMU
134 int amd64_support = 0;
135 int largepage_support = 0;
136 int pae_support = 0;
137 int pge_support = 0;
138 int NX_support = 0;
141 * Low 32 bits of kernel entry address passed back to assembler.
142 * When running a 64 bit kernel, the high 32 bits are 0xffffffff.
144 uint32_t entry_addr_low;
147 * Memlists for the kernel. We shouldn't need a lot of these.
149 #define MAX_MEMLIST (50)
150 struct boot_memlist memlists[MAX_MEMLIST];
151 uint_t memlists_used = 0;
152 struct boot_memlist pcimemlists[MAX_MEMLIST];
153 uint_t pcimemlists_used = 0;
154 struct boot_memlist rsvdmemlists[MAX_MEMLIST];
155 uint_t rsvdmemlists_used = 0;
158 * This should match what's in the bootloader. It's arbitrary, but GRUB
159 * in particular has limitations on how much space it can use before it
160 * stops working properly. This should be enough.
162 struct boot_modules modules[MAX_BOOT_MODULES];
163 uint_t modules_used = 0;
165 typedef mb_memory_map_t mmap_t;
168 * Debugging macros
170 uint_t prom_debug = 0;
171 uint_t map_debug = 0;
173 static char noname[2] = "-";
176 * Either hypervisor-specific or grub-specific code builds the initial
177 * memlists. This code does the sort/merge/link for final use.
179 static void
180 sort_physinstall(void)
182 int i;
183 int j;
184 struct boot_memlist tmp;
187 * Now sort the memlists, in case they weren't in order.
188 * Yeah, this is a bubble sort; small, simple and easy to get right.
190 DBG_MSG("Sorting phys-installed list\n");
191 for (j = memlists_used - 1; j > 0; --j) {
192 for (i = 0; i < j; ++i) {
193 if (memlists[i].addr < memlists[i + 1].addr)
194 continue;
195 tmp = memlists[i];
196 memlists[i] = memlists[i + 1];
197 memlists[i + 1] = tmp;
202 * Merge any memlists that don't have holes between them.
204 for (i = 0; i <= memlists_used - 1; ++i) {
205 if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr)
206 continue;
208 if (prom_debug)
209 dboot_printf(
210 "merging mem segs %" PRIx64 "...%" PRIx64
211 " w/ %" PRIx64 "...%" PRIx64 "\n",
212 memlists[i].addr,
213 memlists[i].addr + memlists[i].size,
214 memlists[i + 1].addr,
215 memlists[i + 1].addr + memlists[i + 1].size);
217 memlists[i].size += memlists[i + 1].size;
218 for (j = i + 1; j < memlists_used - 1; ++j)
219 memlists[j] = memlists[j + 1];
220 --memlists_used;
221 DBG(memlists_used);
222 --i; /* after merging we need to reexamine, so do this */
225 if (prom_debug) {
226 dboot_printf("\nFinal memlists:\n");
227 for (i = 0; i < memlists_used; ++i) {
228 dboot_printf("\t%d: addr=%" PRIx64 " size=%"
229 PRIx64 "\n", i, memlists[i].addr, memlists[i].size);
234 * link together the memlists with native size pointers
236 memlists[0].next = 0;
237 memlists[0].prev = 0;
238 for (i = 1; i < memlists_used; ++i) {
239 memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1);
240 memlists[i].next = 0;
241 memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i);
243 bi->bi_phys_install = (native_ptr_t)(uintptr_t)memlists;
244 DBG(bi->bi_phys_install);
248 * build bios reserved memlists
250 static void
251 build_rsvdmemlists(void)
253 int i;
255 rsvdmemlists[0].next = 0;
256 rsvdmemlists[0].prev = 0;
257 for (i = 1; i < rsvdmemlists_used; ++i) {
258 rsvdmemlists[i].prev =
259 (native_ptr_t)(uintptr_t)(rsvdmemlists + i - 1);
260 rsvdmemlists[i].next = 0;
261 rsvdmemlists[i - 1].next =
262 (native_ptr_t)(uintptr_t)(rsvdmemlists + i);
264 bi->bi_rsvdmem = (native_ptr_t)(uintptr_t)rsvdmemlists;
265 DBG(bi->bi_rsvdmem);
269 x86pte_t
270 get_pteval(paddr_t table, uint_t index)
272 if (pae_support)
273 return (((x86pte_t *)(uintptr_t)table)[index]);
274 return (((x86pte32_t *)(uintptr_t)table)[index]);
277 /*ARGSUSED*/
278 void
279 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
281 uintptr_t tab_addr = (uintptr_t)table;
283 if (pae_support)
284 ((x86pte_t *)tab_addr)[index] = pteval;
285 else
286 ((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval;
287 if (level == top_level && level == 2)
288 reload_cr3();
291 paddr_t
292 make_ptable(x86pte_t *pteval, uint_t level)
294 paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
296 if (level == top_level && level == 2)
297 *pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID;
298 else
299 *pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits;
302 if (map_debug)
303 dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%"
304 PRIx64 "\n", level, (ulong_t)new_table, *pteval);
305 return (new_table);
308 x86pte_t *
309 map_pte(paddr_t table, uint_t index)
311 return ((x86pte_t *)(uintptr_t)(table + index * pte_size));
315 * dump out the contents of page tables...
317 static void
318 dump_tables(void)
320 uint_t save_index[4]; /* for recursion */
321 char *save_table[4]; /* for recursion */
322 uint_t l;
323 uint64_t va;
324 uint64_t pgsize;
325 int index;
326 int i;
327 x86pte_t pteval;
328 char *table;
329 static char *tablist = "\t\t\t";
330 char *tabs = tablist + 3 - top_level;
331 uint_t pa, pa1;
332 #define maddr_t paddr_t
334 dboot_printf("Finished pagetables:\n");
335 table = (char *)(uintptr_t)top_page_table;
336 l = top_level;
337 va = 0;
338 for (index = 0; index < ptes_per_table; ++index) {
339 pgsize = 1ull << shift_amt[l];
340 if (pae_support)
341 pteval = ((x86pte_t *)table)[index];
342 else
343 pteval = ((x86pte32_t *)table)[index];
344 if (pteval == 0)
345 goto next_entry;
347 dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64,
348 tabs + l, (void *)table, index, (uint64_t)pteval, va);
349 pa = ma_to_pa(pteval & MMU_PAGEMASK);
350 dboot_printf(" physaddr=%x\n", pa);
353 * Don't try to walk hypervisor private pagetables
355 if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) {
356 save_table[l] = table;
357 save_index[l] = index;
358 --l;
359 index = -1;
360 table = (char *)(uintptr_t)
361 ma_to_pa(pteval & MMU_PAGEMASK);
362 goto recursion;
366 * shorten dump for consecutive mappings
368 for (i = 1; index + i < ptes_per_table; ++i) {
369 if (pae_support)
370 pteval = ((x86pte_t *)table)[index + i];
371 else
372 pteval = ((x86pte32_t *)table)[index + i];
373 if (pteval == 0)
374 break;
375 pa1 = ma_to_pa(pteval & MMU_PAGEMASK);
376 if (pa1 != pa + i * pgsize)
377 break;
379 if (i > 2) {
380 dboot_printf("%s...\n", tabs + l);
381 va += pgsize * (i - 2);
382 index += i - 2;
384 next_entry:
385 va += pgsize;
386 if (l == 3 && index == 256) /* VA hole */
387 va = 0xffff800000000000ull;
388 recursion:
391 if (l < top_level) {
392 ++l;
393 index = save_index[l];
394 table = save_table[l];
395 goto recursion;
400 * Add a mapping for the machine page at the given virtual address.
402 static void
403 map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level)
405 x86pte_t *ptep;
406 x86pte_t pteval;
408 pteval = ma | pte_bits;
409 if (level > 0)
410 pteval |= PT_PAGESIZE;
411 if (va >= target_kernel_text && pge_support)
412 pteval |= PT_GLOBAL;
414 if (map_debug && ma != va)
415 dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64
416 " pte=0x%" PRIx64 " l=%d\n",
417 (uint64_t)ma, (uint64_t)va, pteval, level);
421 * Find the pte that will map this address. This creates any
422 * missing intermediate level page tables
424 ptep = find_pte(va, NULL, level, 0);
427 * When paravirtualized, we must use hypervisor calls to modify the
428 * PTE, since paging is active. On real hardware we just write to
429 * the pagetables which aren't in use yet.
431 if (va < 1024 * 1024)
432 pteval |= PT_NOCACHE; /* for video RAM */
433 if (pae_support)
434 *ptep = pteval;
435 else
436 *((x86pte32_t *)ptep) = (x86pte32_t)pteval;
440 * Add a mapping for the physical page at the given virtual address.
442 static void
443 map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level)
445 map_ma_at_va(pa_to_ma(pa), va, level);
449 * This is called to remove start..end from the
450 * possible range of PCI addresses.
452 const uint64_t pci_lo_limit = 0x00100000ul;
453 const uint64_t pci_hi_limit = 0xfff00000ul;
454 static void
455 exclude_from_pci(uint64_t start, uint64_t end)
457 int i;
458 int j;
459 struct boot_memlist *ml;
461 for (i = 0; i < pcimemlists_used; ++i) {
462 ml = &pcimemlists[i];
464 /* delete the entire range? */
465 if (start <= ml->addr && ml->addr + ml->size <= end) {
466 --pcimemlists_used;
467 for (j = i; j < pcimemlists_used; ++j)
468 pcimemlists[j] = pcimemlists[j + 1];
469 --i; /* to revisit the new one at this index */
472 /* split a range? */
473 else if (ml->addr < start && end < ml->addr + ml->size) {
475 ++pcimemlists_used;
476 if (pcimemlists_used > MAX_MEMLIST)
477 dboot_panic("too many pcimemlists");
479 for (j = pcimemlists_used - 1; j > i; --j)
480 pcimemlists[j] = pcimemlists[j - 1];
481 ml->size = start - ml->addr;
483 ++ml;
484 ml->size = (ml->addr + ml->size) - end;
485 ml->addr = end;
486 ++i; /* skip on to next one */
489 /* cut memory off the start? */
490 else if (ml->addr < end && end < ml->addr + ml->size) {
491 ml->size -= end - ml->addr;
492 ml->addr = end;
495 /* cut memory off the end? */
496 else if (ml->addr <= start && start < ml->addr + ml->size) {
497 ml->size = start - ml->addr;
503 * During memory allocation, find the highest address not used yet.
505 static void
506 check_higher(paddr_t a)
508 if (a < next_avail_addr)
509 return;
510 next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE);
511 DBG(next_avail_addr);
514 static int
515 dboot_loader_mmap_entries(void)
517 if (num_entries_set == B_TRUE)
518 return (num_entries);
520 switch (multiboot_version) {
521 case 1:
522 DBG(mb_info->flags);
523 if (mb_info->flags & 0x40) {
524 mb_memory_map_t *mmap;
526 DBG(mb_info->mmap_addr);
527 DBG(mb_info->mmap_length);
528 check_higher(mb_info->mmap_addr + mb_info->mmap_length);
530 for (mmap = (mb_memory_map_t *)mb_info->mmap_addr;
531 (uint32_t)mmap < mb_info->mmap_addr +
532 mb_info->mmap_length;
533 mmap = (mb_memory_map_t *)((uint32_t)mmap +
534 mmap->size + sizeof (mmap->size)))
535 ++num_entries;
537 num_entries_set = B_TRUE;
539 break;
540 case 2:
541 num_entries_set = B_TRUE;
542 num_entries = dboot_multiboot2_mmap_nentries(mb2_info,
543 mb2_mmap_tagp);
544 break;
545 default:
546 dboot_panic("Unknown multiboot version: %d\n",
547 multiboot_version);
548 break;
550 return (num_entries);
553 static uint32_t
554 dboot_loader_mmap_get_type(int index)
556 mb_memory_map_t *mp, *mpend;
557 int i;
559 switch (multiboot_version) {
560 case 1:
561 mp = (mb_memory_map_t *)mb_info->mmap_addr;
562 mpend = (mb_memory_map_t *)
563 (mb_info->mmap_addr + mb_info->mmap_length);
565 for (i = 0; mp < mpend && i != index; i++)
566 mp = (mb_memory_map_t *)((uint32_t)mp + mp->size +
567 sizeof (mp->size));
568 if (mp >= mpend) {
569 dboot_panic("dboot_loader_mmap_get_type(): index "
570 "out of bounds: %d\n", index);
572 return (mp->type);
574 case 2:
575 return (dboot_multiboot2_mmap_get_type(mb2_info,
576 mb2_mmap_tagp, index));
578 default:
579 dboot_panic("Unknown multiboot version: %d\n",
580 multiboot_version);
581 break;
583 return (0);
586 static uint64_t
587 dboot_loader_mmap_get_base(int index)
589 mb_memory_map_t *mp, *mpend;
590 int i;
592 switch (multiboot_version) {
593 case 1:
594 mp = (mb_memory_map_t *)mb_info->mmap_addr;
595 mpend = (mb_memory_map_t *)
596 (mb_info->mmap_addr + mb_info->mmap_length);
598 for (i = 0; mp < mpend && i != index; i++)
599 mp = (mb_memory_map_t *)((uint32_t)mp + mp->size +
600 sizeof (mp->size));
601 if (mp >= mpend) {
602 dboot_panic("dboot_loader_mmap_get_base(): index "
603 "out of bounds: %d\n", index);
605 return (((uint64_t)mp->base_addr_high << 32) +
606 (uint64_t)mp->base_addr_low);
608 case 2:
609 return (dboot_multiboot2_mmap_get_base(mb2_info,
610 mb2_mmap_tagp, index));
612 default:
613 dboot_panic("Unknown multiboot version: %d\n",
614 multiboot_version);
615 break;
617 return (0);
620 static uint64_t
621 dboot_loader_mmap_get_length(int index)
623 mb_memory_map_t *mp, *mpend;
624 int i;
626 switch (multiboot_version) {
627 case 1:
628 mp = (mb_memory_map_t *)mb_info->mmap_addr;
629 mpend = (mb_memory_map_t *)
630 (mb_info->mmap_addr + mb_info->mmap_length);
632 for (i = 0; mp < mpend && i != index; i++)
633 mp = (mb_memory_map_t *)((uint32_t)mp + mp->size +
634 sizeof (mp->size));
635 if (mp >= mpend) {
636 dboot_panic("dboot_loader_mmap_get_length(): index "
637 "out of bounds: %d\n", index);
639 return (((uint64_t)mp->length_high << 32) +
640 (uint64_t)mp->length_low);
642 case 2:
643 return (dboot_multiboot2_mmap_get_length(mb2_info,
644 mb2_mmap_tagp, index));
646 default:
647 dboot_panic("Unknown multiboot version: %d\n",
648 multiboot_version);
649 break;
651 return (0);
654 static void
655 build_pcimemlists(void)
657 uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */
658 uint64_t start;
659 uint64_t end;
660 int i, num;
663 * initialize
665 pcimemlists[0].addr = pci_lo_limit;
666 pcimemlists[0].size = pci_hi_limit - pci_lo_limit;
667 pcimemlists_used = 1;
669 num = dboot_loader_mmap_entries();
671 * Fill in PCI memlists.
673 for (i = 0; i < num; ++i) {
674 start = dboot_loader_mmap_get_base(i);
675 end = start + dboot_loader_mmap_get_length(i);
677 if (prom_debug)
678 dboot_printf("\ttype: %d %" PRIx64 "..%"
679 PRIx64 "\n", dboot_loader_mmap_get_type(i),
680 start, end);
683 * page align start and end
685 start = (start + page_offset) & ~page_offset;
686 end &= ~page_offset;
687 if (end <= start)
688 continue;
690 exclude_from_pci(start, end);
694 * Finish off the pcimemlist
696 if (prom_debug) {
697 for (i = 0; i < pcimemlists_used; ++i) {
698 dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%"
699 PRIx64 "\n", pcimemlists[i].addr,
700 pcimemlists[i].addr + pcimemlists[i].size);
703 pcimemlists[0].next = 0;
704 pcimemlists[0].prev = 0;
705 for (i = 1; i < pcimemlists_used; ++i) {
706 pcimemlists[i].prev =
707 (native_ptr_t)(uintptr_t)(pcimemlists + i - 1);
708 pcimemlists[i].next = 0;
709 pcimemlists[i - 1].next =
710 (native_ptr_t)(uintptr_t)(pcimemlists + i);
712 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
713 DBG(bi->bi_pcimem);
717 static void
718 dboot_multiboot1_xboot_consinfo(void)
722 static void
723 dboot_multiboot2_xboot_consinfo(void)
727 static int
728 dboot_multiboot_modcount(void)
730 switch (multiboot_version) {
731 case 1:
732 return (mb_info->mods_count);
734 case 2:
735 return (dboot_multiboot2_modcount(mb2_info));
737 default:
738 dboot_panic("Unknown multiboot version: %d\n",
739 multiboot_version);
740 break;
742 return (0);
745 static uint32_t
746 dboot_multiboot_modstart(int index)
748 switch (multiboot_version) {
749 case 1:
750 return (((mb_module_t *)mb_info->mods_addr)[index].mod_start);
752 case 2:
753 return (dboot_multiboot2_modstart(mb2_info, index));
755 default:
756 dboot_panic("Unknown multiboot version: %d\n",
757 multiboot_version);
758 break;
760 return (0);
763 static uint32_t
764 dboot_multiboot_modend(int index)
766 switch (multiboot_version) {
767 case 1:
768 return (((mb_module_t *)mb_info->mods_addr)[index].mod_end);
770 case 2:
771 return (dboot_multiboot2_modend(mb2_info, index));
773 default:
774 dboot_panic("Unknown multiboot version: %d\n",
775 multiboot_version);
776 break;
778 return (0);
781 static char *
782 dboot_multiboot_modcmdline(int index)
784 switch (multiboot_version) {
785 case 1:
786 return ((char *)((mb_module_t *)
787 mb_info->mods_addr)[index].mod_name);
789 case 2:
790 return (dboot_multiboot2_modcmdline(mb2_info, index));
792 default:
793 dboot_panic("Unknown multiboot version: %d\n",
794 multiboot_version);
795 break;
797 return (0);
801 * Find the environment module for console setup.
802 * Since we need the console to print early boot messages, the console is set up
803 * before anything else and therefore we need to pick up the environment module
804 * early too.
806 * Note, we just will search for and if found, will pass the env
807 * module to console setup, the proper module list processing will happen later.
809 static void
810 dboot_find_env(void)
812 int i, modcount;
813 uint32_t mod_start, mod_end;
814 char *cmdline;
816 modcount = dboot_multiboot_modcount();
818 for (i = 0; i < modcount; ++i) {
819 cmdline = dboot_multiboot_modcmdline(i);
820 if (cmdline == NULL)
821 continue;
823 if (strstr(cmdline, "type=environment") == NULL)
824 continue;
826 mod_start = dboot_multiboot_modstart(i);
827 mod_end = dboot_multiboot_modend(i);
828 modules[0].bm_addr = mod_start;
829 modules[0].bm_size = mod_end - mod_start;
830 modules[0].bm_name = (uintptr_t)NULL;
831 modules[0].bm_hash = (uintptr_t)NULL;
832 modules[0].bm_type = BMT_ENV;
833 bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
834 bi->bi_module_cnt = 1;
835 return;
839 static boolean_t
840 dboot_multiboot_basicmeminfo(uint32_t *lower, uint32_t *upper)
842 boolean_t rv = B_FALSE;
844 switch (multiboot_version) {
845 case 1:
846 if (mb_info->flags & 0x01) {
847 *lower = mb_info->mem_lower;
848 *upper = mb_info->mem_upper;
849 rv = B_TRUE;
851 break;
853 case 2:
854 return (dboot_multiboot2_basicmeminfo(mb2_info, lower, upper));
856 default:
857 dboot_panic("Unknown multiboot version: %d\n",
858 multiboot_version);
859 break;
861 return (rv);
864 static uint8_t
865 dboot_a2h(char v)
867 if (v >= 'a')
868 return (v - 'a' + 0xa);
869 else if (v >= 'A')
870 return (v - 'A' + 0xa);
871 else if (v >= '0')
872 return (v - '0');
873 else
874 dboot_panic("bad ASCII hex character %c\n", v);
876 return (0);
879 static void
880 digest_a2h(const char *ascii, uint8_t *digest)
882 unsigned int i;
884 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
885 digest[i] = dboot_a2h(ascii[i * 2]) << 4;
886 digest[i] |= dboot_a2h(ascii[i * 2 + 1]);
891 * Generate a SHA-1 hash of the first len bytes of image, and compare it with
892 * the ASCII-format hash found in the 40-byte buffer at ascii. If they
893 * match, return 0, otherwise -1. This works only for images smaller than
894 * 4 GB, which should not be a problem.
896 static int
897 check_image_hash(uint_t midx)
899 const char *ascii;
900 const void *image;
901 size_t len;
902 SHA1_CTX ctx;
903 uint8_t digest[SHA1_DIGEST_LENGTH];
904 uint8_t baseline[SHA1_DIGEST_LENGTH];
905 unsigned int i;
907 ascii = (const char *)(uintptr_t)modules[midx].bm_hash;
908 image = (const void *)(uintptr_t)modules[midx].bm_addr;
909 len = (size_t)modules[midx].bm_size;
911 digest_a2h(ascii, baseline);
913 SHA1Init(&ctx);
914 SHA1Update(&ctx, image, len);
915 SHA1Final(digest, &ctx);
917 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
918 if (digest[i] != baseline[i])
919 return (-1);
922 return (0);
925 static const char *
926 type_to_str(boot_module_type_t type)
928 switch (type) {
929 case BMT_ROOTFS:
930 return ("rootfs");
931 case BMT_FILE:
932 return ("file");
933 case BMT_HASH:
934 return ("hash");
935 case BMT_ENV:
936 return ("environment");
937 default:
938 return ("unknown");
942 static void
943 check_images(void)
945 uint_t i;
946 char displayhash[SHA1_ASCII_LENGTH + 1];
948 for (i = 0; i < modules_used; i++) {
949 if (prom_debug) {
950 dboot_printf("module #%d: name %s type %s "
951 "addr %lx size %lx\n",
952 i, (char *)(uintptr_t)modules[i].bm_name,
953 type_to_str(modules[i].bm_type),
954 (ulong_t)modules[i].bm_addr,
955 (ulong_t)modules[i].bm_size);
958 if (modules[i].bm_type == BMT_HASH ||
959 modules[i].bm_hash == (uintptr_t)NULL) {
960 DBG_MSG("module has no hash; skipping check\n");
961 continue;
963 (void) memcpy(displayhash,
964 (void *)(uintptr_t)modules[i].bm_hash,
965 SHA1_ASCII_LENGTH);
966 displayhash[SHA1_ASCII_LENGTH] = '\0';
967 if (prom_debug) {
968 dboot_printf("checking expected hash [%s]: ",
969 displayhash);
972 if (check_image_hash(i) != 0)
973 dboot_panic("hash mismatch!\n");
974 else
975 DBG_MSG("OK\n");
980 * Determine the module's starting address, size, name, and type, and fill the
981 * boot_modules structure. This structure is used by the bop code, except for
982 * hashes which are checked prior to transferring control to the kernel.
984 static void
985 process_module(int midx)
987 uint32_t mod_start = dboot_multiboot_modstart(midx);
988 uint32_t mod_end = dboot_multiboot_modend(midx);
989 char *cmdline = dboot_multiboot_modcmdline(midx);
990 char *p, *q;
992 check_higher(mod_end);
993 if (prom_debug) {
994 dboot_printf("\tmodule #%d: '%s' at 0x%lx, end 0x%lx\n",
995 midx, cmdline, (ulong_t)mod_start, (ulong_t)mod_end);
998 if (mod_start > mod_end) {
999 dboot_panic("module #%d: module start address 0x%lx greater "
1000 "than end address 0x%lx", midx,
1001 (ulong_t)mod_start, (ulong_t)mod_end);
1005 * A brief note on lengths and sizes: GRUB, for reasons unknown, passes
1006 * the address of the last valid byte in a module plus 1 as mod_end.
1007 * This is of course a bug; the multiboot specification simply states
1008 * that mod_start and mod_end "contain the start and end addresses of
1009 * the boot module itself" which is pretty obviously not what GRUB is
1010 * doing. However, fixing it requires that not only this code be
1011 * changed but also that other code consuming this value and values
1012 * derived from it be fixed, and that the kernel and GRUB must either
1013 * both have the bug or neither. While there are a lot of combinations
1014 * that will work, there are also some that won't, so for simplicity
1015 * we'll just cope with the bug. That means we won't actually hash the
1016 * byte at mod_end, and we will expect that mod_end for the hash file
1017 * itself is one greater than some multiple of 41 (40 bytes of ASCII
1018 * hash plus a newline for each module). We set bm_size to the true
1019 * correct number of bytes in each module, achieving exactly this.
1022 modules[midx].bm_addr = mod_start;
1023 modules[midx].bm_size = mod_end - mod_start;
1024 modules[midx].bm_name = (native_ptr_t)(uintptr_t)cmdline;
1025 modules[midx].bm_hash = (uintptr_t)NULL;
1026 modules[midx].bm_type = BMT_FILE;
1028 if (cmdline == NULL) {
1029 modules[midx].bm_name = (native_ptr_t)(uintptr_t)noname;
1030 return;
1033 p = cmdline;
1034 modules[midx].bm_name =
1035 (native_ptr_t)(uintptr_t)strsep(&p, " \t\f\n\r");
1037 while (p != NULL) {
1038 q = strsep(&p, " \t\f\n\r");
1039 if (strncmp(q, "name=", 5) == 0) {
1040 if (q[5] != '\0' && !isspace(q[5])) {
1041 modules[midx].bm_name =
1042 (native_ptr_t)(uintptr_t)(q + 5);
1044 continue;
1047 if (strncmp(q, "type=", 5) == 0) {
1048 if (q[5] == '\0' || isspace(q[5]))
1049 continue;
1050 q += 5;
1051 if (strcmp(q, "rootfs") == 0) {
1052 modules[midx].bm_type = BMT_ROOTFS;
1053 } else if (strcmp(q, "hash") == 0) {
1054 modules[midx].bm_type = BMT_HASH;
1055 } else if (strcmp(q, "environment") == 0) {
1056 modules[midx].bm_type = BMT_ENV;
1057 } else if (strcmp(q, "file") != 0) {
1058 dboot_printf("\tmodule #%d: unknown module "
1059 "type '%s'; defaulting to 'file'",
1060 midx, q);
1062 continue;
1065 if (strncmp(q, "hash=", 5) == 0) {
1066 if (q[5] != '\0' && !isspace(q[5])) {
1067 modules[midx].bm_hash =
1068 (native_ptr_t)(uintptr_t)(q + 5);
1070 continue;
1073 dboot_printf("ignoring unknown option '%s'\n", q);
1078 * Backward compatibility: if there are exactly one or two modules, both
1079 * of type 'file' and neither with an embedded hash value, we have been
1080 * given the legacy style modules. In this case we need to treat the first
1081 * module as a rootfs and the second as a hash referencing that module.
1082 * Otherwise, even if the configuration is invalid, we assume that the
1083 * operator knows what he's doing or at least isn't being bitten by this
1084 * interface change.
1086 static void
1087 fixup_modules(void)
1089 if (modules_used == 0 || modules_used > 2)
1090 return;
1092 if (modules[0].bm_type != BMT_FILE ||
1093 modules_used > 1 && modules[1].bm_type != BMT_FILE) {
1094 return;
1097 if (modules[0].bm_hash != (uintptr_t)NULL ||
1098 modules_used > 1 && modules[1].bm_hash != (uintptr_t)NULL) {
1099 return;
1102 modules[0].bm_type = BMT_ROOTFS;
1103 if (modules_used > 1) {
1104 modules[1].bm_type = BMT_HASH;
1105 modules[1].bm_name = modules[0].bm_name;
1110 * For modules that do not have assigned hashes but have a separate hash module,
1111 * find the assigned hash module and set the primary module's bm_hash to point
1112 * to the hash data from that module. We will then ignore modules of type
1113 * BMT_HASH from this point forward.
1115 static void
1116 assign_module_hashes(void)
1118 uint_t i, j;
1120 for (i = 0; i < modules_used; i++) {
1121 if (modules[i].bm_type == BMT_HASH ||
1122 modules[i].bm_hash != (uintptr_t)NULL) {
1123 continue;
1126 for (j = 0; j < modules_used; j++) {
1127 if (modules[j].bm_type != BMT_HASH ||
1128 strcmp((char *)(uintptr_t)modules[j].bm_name,
1129 (char *)(uintptr_t)modules[i].bm_name) != 0) {
1130 continue;
1133 if (modules[j].bm_size < SHA1_ASCII_LENGTH) {
1134 dboot_printf("Short hash module of length "
1135 "0x%lx bytes; ignoring\n",
1136 (ulong_t)modules[j].bm_size);
1137 } else {
1138 modules[i].bm_hash = modules[j].bm_addr;
1140 break;
1146 * Walk through the module information finding the last used address.
1147 * The first available address will become the top level page table.
1149 static void
1150 dboot_process_modules(void)
1152 int i, modcount;
1153 extern char _end[];
1155 DBG_MSG("\nFinding Modules\n");
1156 modcount = dboot_multiboot_modcount();
1157 if (modcount > MAX_BOOT_MODULES) {
1158 dboot_panic("Too many modules (%d) -- the maximum is %d.",
1159 modcount, MAX_BOOT_MODULES);
1162 * search the modules to find the last used address
1163 * we'll build the module list while we're walking through here
1165 check_higher((paddr_t)(uintptr_t)&_end);
1166 for (i = 0; i < modcount; ++i) {
1167 process_module(i);
1168 modules_used++;
1170 bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
1171 DBG(bi->bi_modules);
1172 bi->bi_module_cnt = modcount;
1173 DBG(bi->bi_module_cnt);
1175 fixup_modules();
1176 assign_module_hashes();
1177 check_images();
1181 * We then build the phys_install memlist from the multiboot information.
1183 static void
1184 dboot_process_mmap(void)
1186 uint64_t start;
1187 uint64_t end;
1188 uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */
1189 uint32_t lower, upper;
1190 int i, mmap_entries;
1193 * Walk through the memory map from multiboot and build our memlist
1194 * structures. Note these will have native format pointers.
1196 DBG_MSG("\nFinding Memory Map\n");
1197 num_entries = 0;
1198 num_entries_set = B_FALSE;
1199 max_mem = 0;
1200 if ((mmap_entries = dboot_loader_mmap_entries()) > 0) {
1201 for (i = 0; i < mmap_entries; i++) {
1202 uint32_t type = dboot_loader_mmap_get_type(i);
1203 start = dboot_loader_mmap_get_base(i);
1204 end = start + dboot_loader_mmap_get_length(i);
1206 if (prom_debug)
1207 dboot_printf("\ttype: %d %" PRIx64 "..%"
1208 PRIx64 "\n", type, start, end);
1211 * page align start and end
1213 start = (start + page_offset) & ~page_offset;
1214 end &= ~page_offset;
1215 if (end <= start)
1216 continue;
1219 * only type 1 is usable RAM
1221 switch (type) {
1222 case 1:
1223 if (end > max_mem)
1224 max_mem = end;
1225 memlists[memlists_used].addr = start;
1226 memlists[memlists_used].size = end - start;
1227 ++memlists_used;
1228 if (memlists_used > MAX_MEMLIST)
1229 dboot_panic("too many memlists");
1230 break;
1231 case 2:
1232 rsvdmemlists[rsvdmemlists_used].addr = start;
1233 rsvdmemlists[rsvdmemlists_used].size =
1234 end - start;
1235 ++rsvdmemlists_used;
1236 if (rsvdmemlists_used > MAX_MEMLIST)
1237 dboot_panic("too many rsvdmemlists");
1238 break;
1239 default:
1240 continue;
1243 build_pcimemlists();
1244 } else if (dboot_multiboot_basicmeminfo(&lower, &upper)) {
1245 DBG(lower);
1246 memlists[memlists_used].addr = 0;
1247 memlists[memlists_used].size = lower * 1024;
1248 ++memlists_used;
1249 DBG(upper);
1250 memlists[memlists_used].addr = 1024 * 1024;
1251 memlists[memlists_used].size = upper * 1024;
1252 ++memlists_used;
1255 * Old platform - assume I/O space at the end of memory.
1257 pcimemlists[0].addr = (upper * 1024) + (1024 * 1024);
1258 pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr;
1259 pcimemlists[0].next = 0;
1260 pcimemlists[0].prev = 0;
1261 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
1262 DBG(bi->bi_pcimem);
1263 } else {
1264 dboot_panic("No memory info from boot loader!!!");
1268 * finish processing the physinstall list
1270 sort_physinstall();
1273 * build bios reserved mem lists
1275 build_rsvdmemlists();
1279 * The highest address is used as the starting point for dboot's simple
1280 * memory allocator.
1282 * Finding the highest address in case of Multiboot 1 protocol is
1283 * quite painful in the sense that some information provided by
1284 * the multiboot info structure points to BIOS data, and some to RAM.
1286 * The module list was processed and checked already by dboot_process_modules(),
1287 * so we will check the command line string and the memory map.
1289 * This list of to be checked items is based on our current knowledge of
1290 * allocations made by grub1 and will need to be reviewed if there
1291 * are updates about the information provided by Multiboot 1.
1293 * In the case of the Multiboot 2, our life is much simpler, as the MB2
1294 * information tag list is one contiguous chunk of memory.
1296 static paddr_t
1297 dboot_multiboot1_highest_addr(void)
1299 paddr_t addr = (uintptr_t)NULL;
1300 char *cmdl = (char *)mb_info->cmdline;
1302 if (mb_info->flags & MB_INFO_CMDLINE)
1303 addr = ((paddr_t)((uintptr_t)cmdl + strlen(cmdl) + 1));
1305 if (mb_info->flags & MB_INFO_MEM_MAP)
1306 addr = MAX(addr,
1307 ((paddr_t)(mb_info->mmap_addr + mb_info->mmap_length)));
1308 return (addr);
1311 static void
1312 dboot_multiboot_highest_addr(void)
1314 paddr_t addr;
1316 switch (multiboot_version) {
1317 case 1:
1318 addr = dboot_multiboot1_highest_addr();
1319 if (addr != (uintptr_t)NULL)
1320 check_higher(addr);
1321 break;
1322 case 2:
1323 addr = dboot_multiboot2_highest_addr(mb2_info);
1324 if (addr != (uintptr_t)NULL)
1325 check_higher(addr);
1326 break;
1327 default:
1328 dboot_panic("Unknown multiboot version: %d\n",
1329 multiboot_version);
1330 break;
1335 * Walk the boot loader provided information and find the highest free address.
1337 static void
1338 init_mem_alloc(void)
1340 DBG_MSG("Entered init_mem_alloc()\n");
1341 dboot_process_modules();
1342 dboot_process_mmap();
1343 dboot_multiboot_highest_addr();
1346 static void
1347 dboot_multiboot_get_fwtables(void)
1349 multiboot_tag_new_acpi_t *nacpitagp;
1350 multiboot_tag_old_acpi_t *oacpitagp;
1352 /* no fw tables from multiboot 1 */
1353 if (multiboot_version != 2)
1354 return;
1356 /* only provide SMBIOS pointer in case of UEFI */
1357 bi->bi_smbios = (uintptr_t)NULL;
1359 nacpitagp = (multiboot_tag_new_acpi_t *)
1360 dboot_multiboot2_find_tag(mb2_info,
1361 MULTIBOOT_TAG_TYPE_ACPI_NEW);
1362 oacpitagp = (multiboot_tag_old_acpi_t *)
1363 dboot_multiboot2_find_tag(mb2_info,
1364 MULTIBOOT_TAG_TYPE_ACPI_OLD);
1366 if (nacpitagp != NULL) {
1367 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1368 &nacpitagp->mb_rsdp[0];
1369 } else if (oacpitagp != NULL) {
1370 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1371 &oacpitagp->mb_rsdp[0];
1372 } else {
1373 bi->bi_acpi_rsdp = (uintptr_t)NULL;
1378 * Simple memory allocator, allocates aligned physical memory.
1379 * Note that startup_kernel() only allocates memory, never frees.
1380 * Memory usage just grows in an upward direction.
1382 static void *
1383 do_mem_alloc(uint32_t size, uint32_t align)
1385 uint_t i;
1386 uint64_t best;
1387 uint64_t start;
1388 uint64_t end;
1391 * make sure size is a multiple of pagesize
1393 size = RNDUP(size, MMU_PAGESIZE);
1394 next_avail_addr = RNDUP(next_avail_addr, align);
1397 * XXPV fixme joe
1399 * a really large bootarchive that causes you to run out of memory
1400 * may cause this to blow up
1402 /* LINTED E_UNEXPECTED_UINT_PROMOTION */
1403 best = (uint64_t)-size;
1404 for (i = 0; i < memlists_used; ++i) {
1405 start = memlists[i].addr;
1406 end = start + memlists[i].size;
1409 * did we find the desired address?
1411 if (start <= next_avail_addr && next_avail_addr + size <= end) {
1412 best = next_avail_addr;
1413 goto done;
1417 * if not is this address the best so far?
1419 if (start > next_avail_addr && start < best &&
1420 RNDUP(start, align) + size <= end)
1421 best = RNDUP(start, align);
1425 * We didn't find exactly the address we wanted, due to going off the
1426 * end of a memory region. Return the best found memory address.
1428 done:
1429 next_avail_addr = best + size;
1430 (void) memset((void *)(uintptr_t)best, 0, size);
1431 return ((void *)(uintptr_t)best);
1434 void *
1435 mem_alloc(uint32_t size)
1437 return (do_mem_alloc(size, MMU_PAGESIZE));
1442 * Build page tables to map all of memory used so far as well as the kernel.
1444 static void
1445 build_page_tables(void)
1447 uint32_t psize;
1448 uint32_t level;
1449 uint32_t off;
1450 uint64_t start;
1451 uint32_t i;
1452 uint64_t end;
1455 * If we're on metal, we need to create the top level pagetable.
1457 top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
1458 DBG((uintptr_t)top_page_table);
1461 * Determine if we'll use large mappings for kernel, then map it.
1463 if (largepage_support) {
1464 psize = lpagesize;
1465 level = 1;
1466 } else {
1467 psize = MMU_PAGESIZE;
1468 level = 0;
1471 DBG_MSG("Mapping kernel\n");
1472 DBG(ktext_phys);
1473 DBG(target_kernel_text);
1474 DBG(ksize);
1475 DBG(psize);
1476 for (off = 0; off < ksize; off += psize)
1477 map_pa_at_va(ktext_phys + off, target_kernel_text + off, level);
1480 * The kernel will need a 1 page window to work with page tables
1482 bi->bi_pt_window = (uintptr_t)mem_alloc(MMU_PAGESIZE);
1483 DBG(bi->bi_pt_window);
1484 bi->bi_pte_to_pt_window =
1485 (uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0);
1486 DBG(bi->bi_pte_to_pt_window);
1490 * We need 1:1 mappings for the lower 1M of memory to access
1491 * BIOS tables used by a couple of drivers during boot.
1493 * The following code works because our simple memory allocator
1494 * only grows usage in an upwards direction.
1496 * Note that by this point in boot some mappings for low memory
1497 * may already exist because we've already accessed device in low
1498 * memory. (Specifically the video frame buffer and keyboard
1499 * status ports.) If we're booting on raw hardware then GRUB
1500 * created these mappings for us. If we're booting under a
1501 * hypervisor then we went ahead and remapped these devices into
1502 * memory allocated within dboot itself.
1504 if (map_debug)
1505 dboot_printf("1:1 map pa=0..1Meg\n");
1506 for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) {
1507 map_pa_at_va(start, start, 0);
1510 for (i = 0; i < memlists_used; ++i) {
1511 start = memlists[i].addr;
1513 end = start + memlists[i].size;
1515 if (map_debug)
1516 dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n",
1517 start, end);
1518 while (start < end && start < next_avail_addr) {
1519 map_pa_at_va(start, start, 0);
1520 start += MMU_PAGESIZE;
1524 DBG_MSG("\nPage tables constructed\n");
1527 static void
1528 dboot_init_xboot_consinfo(void)
1530 uintptr_t addr;
1532 * boot info must be 16 byte aligned for 64 bit kernel ABI
1534 addr = (uintptr_t)boot_info;
1535 addr = (addr + 0xf) & ~0xf;
1536 bi = (struct xboot_info *)addr;
1538 switch (multiboot_version) {
1539 case 1:
1540 dboot_multiboot1_xboot_consinfo();
1541 break;
1542 case 2:
1543 dboot_multiboot2_xboot_consinfo();
1544 break;
1545 default:
1546 dboot_panic("Unknown multiboot version: %d\n",
1547 multiboot_version);
1548 break;
1551 * Lookup environment module for the console. Complete module list
1552 * will be built after console setup.
1554 dboot_find_env();
1558 * Set up basic data from the boot loader.
1559 * The load_addr is part of AOUT kludge setup in dboot_grub.s, to support
1560 * 32-bit dboot code setup used to set up and start 64-bit kernel.
1561 * AOUT kludge does allow 32-bit boot loader, such as grub1, to load and
1562 * start 64-bit illumos kernel.
1564 static void
1565 dboot_loader_init(void)
1567 mb_info = NULL;
1568 mb2_info = NULL;
1570 switch (mb_magic) {
1571 case MB_BOOTLOADER_MAGIC:
1572 multiboot_version = 1;
1573 mb_info = (multiboot_info_t *)(uintptr_t)mb_addr;
1574 #if defined(_BOOT_TARGET_amd64)
1575 load_addr = mb_header.load_addr;
1576 #endif
1577 break;
1579 case MULTIBOOT2_BOOTLOADER_MAGIC:
1580 multiboot_version = 2;
1581 mb2_info = (multiboot2_info_header_t *)(uintptr_t)mb_addr;
1582 mb2_mmap_tagp = dboot_multiboot2_get_mmap_tagp(mb2_info);
1583 #if defined(_BOOT_TARGET_amd64)
1584 load_addr = mb2_load_addr;
1585 #endif
1586 break;
1588 default:
1589 dboot_panic("Unknown bootloader magic: 0x%x\n", mb_magic);
1590 break;
1594 /* Extract the kernel command line from [multi]boot information. */
1595 static char *
1596 dboot_loader_cmdline(void)
1598 char *line = NULL;
1601 switch (multiboot_version) {
1602 case 1:
1603 if (mb_info->flags & MB_INFO_CMDLINE)
1604 line = (char *)mb_info->cmdline;
1605 break;
1607 case 2:
1608 line = dboot_multiboot2_cmdline(mb2_info);
1609 break;
1611 default:
1612 dboot_panic("Unknown multiboot version: %d\n",
1613 multiboot_version);
1614 break;
1619 * Make sure we have valid pointer so the string operations
1620 * will not crash us.
1622 if (line == NULL)
1623 line = "";
1625 return (line);
1628 static char *
1629 dboot_loader_name(void)
1631 multiboot_tag_string_t *tag;
1633 switch (multiboot_version) {
1634 case 1:
1635 return ((char *)mb_info->boot_loader_name);
1637 case 2:
1638 tag = dboot_multiboot2_find_tag(mb2_info,
1639 MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME);
1640 return (tag->mb_string);
1641 default:
1642 dboot_panic("Unknown multiboot version: %d\n",
1643 multiboot_version);
1644 break;
1647 return (NULL);
1651 * startup_kernel has a pretty simple job. It builds pagetables which reflect
1652 * 1:1 mappings for all memory in use. It then also adds mappings for
1653 * the kernel nucleus at virtual address of target_kernel_text using large page
1654 * mappings. The page table pages are also accessible at 1:1 mapped
1655 * virtual addresses.
1657 /*ARGSUSED*/
1658 void
1659 startup_kernel(void)
1661 char *cmdline;
1662 char *bootloader;
1664 dboot_loader_init();
1666 * At this point we are executing in a 32 bit real mode.
1669 bootloader = dboot_loader_name();
1670 cmdline = dboot_loader_cmdline();
1673 dboot_init_xboot_consinfo();
1674 bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline;
1675 bcons_init(bi);
1677 prom_debug = (find_boot_prop("prom_debug") != NULL);
1678 map_debug = (find_boot_prop("map_debug") != NULL);
1680 dboot_multiboot_get_fwtables();
1681 DBG_MSG("\n\nillumos prekernel set: ");
1682 DBG_MSG(cmdline);
1683 DBG_MSG("\n");
1685 if (bootloader != NULL && prom_debug) {
1686 dboot_printf("Kernel loaded by: %s\n", bootloader);
1687 dboot_printf("Using multiboot %d boot protocol.\n",
1688 multiboot_version);
1691 DBG((uintptr_t)bi);
1692 DBG((uintptr_t)mb_info);
1693 DBG((uintptr_t)mb2_info);
1694 if (mb2_info != NULL)
1695 DBG(mb2_info->mbi_total_size);
1696 DBG(bi->bi_acpi_rsdp);
1697 DBG(bi->bi_smbios);
1700 * Need correct target_kernel_text value
1702 #if defined(_BOOT_TARGET_amd64)
1703 target_kernel_text = KERNEL_TEXT_amd64;
1704 #elif defined(__xpv)
1705 target_kernel_text = KERNEL_TEXT_i386_xpv;
1706 #else
1707 target_kernel_text = KERNEL_TEXT_i386;
1708 #endif
1709 DBG(target_kernel_text);
1713 * use cpuid to enable MMU features
1715 if (have_cpuid()) {
1716 uint32_t eax, edx;
1718 eax = 1;
1719 edx = get_cpuid_edx(&eax);
1720 if (edx & CPUID_INTC_EDX_PSE)
1721 largepage_support = 1;
1722 if (edx & CPUID_INTC_EDX_PGE)
1723 pge_support = 1;
1724 if (edx & CPUID_INTC_EDX_PAE)
1725 pae_support = 1;
1727 eax = 0x80000000;
1728 edx = get_cpuid_edx(&eax);
1729 if (eax >= 0x80000001) {
1730 eax = 0x80000001;
1731 edx = get_cpuid_edx(&eax);
1732 if (edx & CPUID_AMD_EDX_LM)
1733 amd64_support = 1;
1734 if (edx & CPUID_AMD_EDX_NX)
1735 NX_support = 1;
1737 } else {
1738 dboot_printf("cpuid not supported\n");
1742 #if defined(_BOOT_TARGET_amd64)
1743 if (amd64_support == 0)
1744 dboot_panic("long mode not supported, rebooting");
1745 else if (pae_support == 0)
1746 dboot_panic("long mode, but no PAE; rebooting");
1747 #else
1749 * Allow the command line to over-ride use of PAE for 32 bit.
1751 if (strstr(cmdline, "disablePAE=true") != NULL) {
1752 pae_support = 0;
1753 NX_support = 0;
1754 amd64_support = 0;
1756 #endif
1759 * initialize the simple memory allocator
1761 init_mem_alloc();
1763 #if !defined(__xpv) && !defined(_BOOT_TARGET_amd64)
1765 * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory
1767 if (max_mem < FOUR_GIG && NX_support == 0)
1768 pae_support = 0;
1769 #endif
1772 * configure mmu information
1774 if (pae_support) {
1775 shift_amt = shift_amt_pae;
1776 ptes_per_table = 512;
1777 pte_size = 8;
1778 lpagesize = TWO_MEG;
1779 #if defined(_BOOT_TARGET_amd64)
1780 top_level = 3;
1781 #else
1782 top_level = 2;
1783 #endif
1784 } else {
1785 pae_support = 0;
1786 NX_support = 0;
1787 shift_amt = shift_amt_nopae;
1788 ptes_per_table = 1024;
1789 pte_size = 4;
1790 lpagesize = FOUR_MEG;
1791 top_level = 1;
1794 DBG(pge_support);
1795 DBG(NX_support);
1796 DBG(largepage_support);
1797 DBG(amd64_support);
1798 DBG(top_level);
1799 DBG(pte_size);
1800 DBG(ptes_per_table);
1801 DBG(lpagesize);
1803 ktext_phys = FOUR_MEG; /* from UNIX Mapfile */
1805 #if !defined(__xpv) && defined(_BOOT_TARGET_amd64)
1807 * For grub, copy kernel bits from the ELF64 file to final place.
1809 DBG_MSG("\nAllocating nucleus pages.\n");
1810 ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG);
1811 if (ktext_phys == 0)
1812 dboot_panic("failed to allocate aligned kernel memory");
1813 DBG(load_addr);
1814 if (dboot_elfload64(load_addr) != 0)
1815 dboot_panic("failed to parse kernel ELF image, rebooting");
1816 #endif
1818 DBG(ktext_phys);
1821 * Allocate page tables.
1823 build_page_tables();
1826 * return to assembly code to switch to running kernel
1828 entry_addr_low = (uint32_t)target_kernel_text;
1829 DBG(entry_addr_low);
1830 bi->bi_use_largepage = largepage_support;
1831 bi->bi_use_pae = pae_support;
1832 bi->bi_use_pge = pge_support;
1833 bi->bi_use_nx = NX_support;
1836 bi->bi_next_paddr = next_avail_addr;
1837 DBG(bi->bi_next_paddr);
1838 bi->bi_next_vaddr = (uintptr_t)next_avail_addr;
1839 DBG(bi->bi_next_vaddr);
1840 bi->bi_mb_version = multiboot_version;
1842 switch (multiboot_version) {
1843 case 1:
1844 bi->bi_mb_info = (uintptr_t)mb_info;
1845 break;
1846 case 2:
1847 bi->bi_mb_info = (uintptr_t)mb2_info;
1848 break;
1849 default:
1850 dboot_panic("Unknown multiboot version: %d\n",
1851 multiboot_version);
1852 break;
1854 bi->bi_top_page_table = (uintptr_t)top_page_table;
1857 bi->bi_kseg_size = FOUR_MEG;
1858 DBG(bi->bi_kseg_size);
1860 if (map_debug)
1861 dump_tables();
1863 DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n");