2 * Copyright (c) 1998 Robert Nordier
5 * Redistribution and use in source and binary forms are freely
6 * permitted provided that the above copyright notice and this
7 * paragraph and the following disclaimer are duplicated in all
10 * This software is provided "AS IS" and without any express or
11 * implied warranties, including, without limitation, the implied
12 * warranties of merchantability and fitness for a particular
16 #include <sys/cdefs.h>
19 #include <sys/param.h>
20 #include <sys/errno.h>
21 #include <sys/diskmbr.h>
24 #include <sys/reboot.h>
25 #include <sys/queue.h>
26 #include <sys/multiboot.h>
28 #include <machine/bootinfo.h>
29 #include <machine/elf.h>
30 #include <machine/pc/bios.h>
36 #include "bootstrap.h"
54 #define BIOS_NUMDRIVES 0x475
60 #define TYPE_MAXHARD TYPE_DA
66 * Fake multiboot header to provide versioning and to pass
67 * partition start LBA. Partition is either GPT partition or
70 extern const struct multiboot_header mb_header
;
71 extern uint64_t start_sector
;
73 static const char optstr
[NOPT
] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
74 static const unsigned char flags
[NOPT
] = {
92 static const unsigned char dev_maj
[NDEV
] = {30, 4, 2};
94 static struct i386_devdesc
*bdev
;
96 static char cmddup
[512];
97 static char kname
[1024];
98 static int comspeed
= SIOSPD
;
99 static struct bootinfo bootinfo
;
100 static uint32_t bootdev
;
101 static struct zfs_boot_args zfsargs
;
103 extern vm_offset_t high_heap_base
;
104 extern uint32_t bios_basemem
, bios_extmem
, high_heap_size
;
106 static char *heap_top
;
107 static char *heap_bottom
;
109 static void i386_zfs_probe(void);
111 static void load(void);
112 static int parse_cmd(void);
114 struct arch_switch archsw
; /* MI/MD interface boundary */
115 static char boot_devname
[2 * ZFS_MAXNAMELEN
+ 8]; /* disk or pool:dataset */
117 struct devsw
*devsw
[] = {
123 struct fs_ops
*file_system
[] = {
133 int auto_boot
, i
, fd
;
134 struct disk_devdesc devdesc
;
138 if (high_heap_size
> 0) {
139 heap_top
= PTOV(high_heap_base
+ high_heap_size
);
140 heap_bottom
= PTOV(high_heap_base
);
142 heap_bottom
= (char *)
143 (roundup2(__base
+ (int32_t)&_end
, 0x10000) - __base
);
144 heap_top
= (char *) PTOV(bios_basemem
);
146 setheap(heap_bottom
, heap_top
);
149 * Initialise the block cache. Set the upper limit.
151 bcache_init(32768, 512);
153 archsw
.arch_autoload
= NULL
;
154 archsw
.arch_getdev
= i386_getdev
;
155 archsw
.arch_copyin
= NULL
;
156 archsw
.arch_copyout
= NULL
;
157 archsw
.arch_readin
= NULL
;
158 archsw
.arch_isainb
= NULL
;
159 archsw
.arch_isaoutb
= NULL
;
160 archsw
.arch_zfs_probe
= i386_zfs_probe
;
162 bootinfo
.bi_version
= BOOTINFO_VERSION
;
163 bootinfo
.bi_size
= sizeof(bootinfo
);
164 bootinfo
.bi_basemem
= bios_basemem
/ 1024;
165 bootinfo
.bi_extmem
= bios_extmem
/ 1024;
166 bootinfo
.bi_memsizes_valid
++;
167 bootinfo
.bi_bios_dev
= *(uint8_t *)PTOV(ARGS
);
169 /* Set up fall back device name. */
170 snprintf(boot_devname
, sizeof (boot_devname
), "disk%d:",
171 bd_bios2unit(bootinfo
.bi_bios_dev
));
173 for (i
= 0; devsw
[i
] != NULL
; i
++)
174 if (devsw
[i
]->dv_init
!= NULL
)
175 (devsw
[i
]->dv_init
)();
177 disk_parsedev(&devdesc
, boot_devname
+4, NULL
);
179 bootdev
= MAKEBOOTDEV(dev_maj
[DEVT_DISK
], devdesc
.d_slice
+ 1,
180 devdesc
.dd
.d_unit
, devdesc
.d_partition
>= 0? devdesc
.d_partition
:0xff);
183 * zfs_fmtdev() can be called only after dv_init
185 if (bdev
!= NULL
&& bdev
->dd
.d_dev
->dv_type
== DEVT_ZFS
) {
186 /* set up proper device name string for ZFS */
187 strncpy(boot_devname
, zfs_fmtdev(bdev
), sizeof (boot_devname
));
190 /* now make sure we have bdev on all cases */
193 i386_getdev((void **)&bdev
, boot_devname
, NULL
);
195 env_setenv("currdev", EV_VOLATILE
, boot_devname
, i386_setcurrdev
,
198 /* Process configuration file */
199 setenv("LINES", "24", 1);
202 fd
= open(PATH_CONFIG
, O_RDONLY
);
204 fd
= open(PATH_DOTCONFIG
, O_RDONLY
);
207 read(fd
, cmd
, sizeof(cmd
));
213 * Note that parse_cmd() is destructive to cmd[] and we also want
214 * to honor RBX_QUIET option that could be present in cmd[].
216 memcpy(cmddup
, cmd
, sizeof(cmd
));
219 if (!OPT_CHECK(RBX_QUIET
))
220 printf("%s: %s\n", PATH_CONFIG
, cmddup
);
221 /* Do not process this command twice */
226 * Try to exec stage 3 boot loader. If interrupted by a keypress,
227 * or in case of failure, switch off auto boot.
230 if (auto_boot
&& !*kname
) {
231 memcpy(kname
, PATH_LOADER_ZFS
, sizeof(PATH_LOADER_ZFS
));
238 /* Present the user with the boot2 prompt. */
241 if (!auto_boot
|| !OPT_CHECK(RBX_QUIET
)) {
242 printf("\nillumos/x86 boot\n");
243 printf("Default: %s%s\nboot: ", boot_devname
, kname
);
245 if (ioctrl
& IO_SERIAL
)
247 if (!auto_boot
|| keyhit(5))
248 getstr(cmd
, sizeof(cmd
));
249 else if (!auto_boot
|| !OPT_CHECK(RBX_QUIET
))
259 /* XXX - Needed for btxld to link the boot2 binary; do not remove. */
272 static Elf32_Phdr ep
[2];
273 static Elf32_Shdr es
[2];
278 if ((fd
= open(kname
, O_RDONLY
)) == -1) {
279 printf("\nCan't find %s\n", kname
);
282 if (read(fd
, &hdr
, sizeof(hdr
)) != sizeof(hdr
)) {
286 if (N_GETMAGIC(hdr
.ex
) == ZMAGIC
)
288 else if (IS_ELF(hdr
.eh
))
291 printf("Invalid %s\n", "format");
296 addr
= hdr
.ex
.a_entry
& 0xffffff;
298 lseek(fd
, PAGE_SIZE
, SEEK_SET
);
299 if (read(fd
, p
, hdr
.ex
.a_text
) != hdr
.ex
.a_text
) {
303 p
+= roundup2(hdr
.ex
.a_text
, PAGE_SIZE
);
304 if (read(fd
, p
, hdr
.ex
.a_data
) != hdr
.ex
.a_data
) {
308 p
+= hdr
.ex
.a_data
+ roundup2(hdr
.ex
.a_bss
, PAGE_SIZE
);
309 bootinfo
.bi_symtab
= VTOP(p
);
310 memcpy(p
, &hdr
.ex
.a_syms
, sizeof(hdr
.ex
.a_syms
));
311 p
+= sizeof(hdr
.ex
.a_syms
);
313 if (read(fd
, p
, hdr
.ex
.a_syms
) != hdr
.ex
.a_syms
) {
318 if (read(fd
, p
, sizeof(int)) != sizeof(int)) {
325 if (read(fd
, p
, x
) != x
) {
332 lseek(fd
, hdr
.eh
.e_phoff
, SEEK_SET
);
333 for (j
= i
= 0; i
< hdr
.eh
.e_phnum
&& j
< 2; i
++) {
334 if (read(fd
, ep
+ j
, sizeof(ep
[0])) != sizeof(ep
[0])) {
338 if (ep
[j
].p_type
== PT_LOAD
)
341 for (i
= 0; i
< 2; i
++) {
342 p
= PTOV(ep
[i
].p_paddr
& 0xffffff);
343 lseek(fd
, ep
[i
].p_offset
, SEEK_SET
);
344 if (read(fd
, p
, ep
[i
].p_filesz
) != ep
[i
].p_filesz
) {
349 p
+= roundup2(ep
[1].p_memsz
, PAGE_SIZE
);
350 bootinfo
.bi_symtab
= VTOP(p
);
351 if (hdr
.eh
.e_shnum
== hdr
.eh
.e_shstrndx
+ 3) {
352 lseek(fd
, hdr
.eh
.e_shoff
+ sizeof(es
[0]) * (hdr
.eh
.e_shstrndx
+ 1),
354 if (read(fd
, &es
, sizeof(es
)) != sizeof(es
)) {
358 for (i
= 0; i
< 2; i
++) {
359 memcpy(p
, &es
[i
].sh_size
, sizeof(es
[i
].sh_size
));
360 p
+= sizeof(es
[i
].sh_size
);
361 lseek(fd
, es
[i
].sh_offset
, SEEK_SET
);
362 if (read(fd
, p
, es
[i
].sh_size
) != es
[i
].sh_size
) {
369 addr
= hdr
.eh
.e_entry
& 0xffffff;
373 bootinfo
.bi_esymtab
= VTOP(p
);
374 bootinfo
.bi_kernelname
= VTOP(kname
);
376 if (bdev
->dd
.d_dev
->dv_type
== DEVT_ZFS
) {
377 zfsargs
.size
= sizeof(zfsargs
);
378 zfsargs
.pool
= bdev
->d_kind
.zfs
.pool_guid
;
379 zfsargs
.root
= bdev
->d_kind
.zfs
.root_guid
;
380 __exec((caddr_t
)addr
, RB_BOOTINFO
| (opts
& RBX_MASK
),
382 KARGS_FLAGS_ZFS
| KARGS_FLAGS_EXTARG
,
383 (uint32_t) bdev
->d_kind
.zfs
.pool_guid
,
384 (uint32_t) (bdev
->d_kind
.zfs
.pool_guid
>> 32),
388 __exec((caddr_t
)addr
, RB_BOOTINFO
| (opts
& RBX_MASK
),
389 bootdev
, 0, 0, 0, VTOP(&bootinfo
));
393 mount_root(char *arg
)
396 struct i386_devdesc
*ddesc
;
399 root
= malloc(strlen(arg
) + 2);
402 sprintf(root
, "%s:", arg
);
403 if (i386_getdev((void **)&ddesc
, root
, NULL
)) {
408 /* we should have new device descriptor, free old and replace it. */
412 if (bdev
->dd
.d_dev
->dv_type
== DEVT_DISK
) {
413 if (bdev
->d_kind
.biosdisk
.partition
== -1)
416 part
= bdev
->d_kind
.biosdisk
.partition
;
417 bootdev
= MAKEBOOTDEV(dev_maj
[bdev
->dd
.d_dev
->dv_type
],
418 bdev
->d_kind
.biosdisk
.slice
+ 1,
419 bdev
->dd
.d_unit
, part
);
420 bootinfo
.bi_bios_dev
= bd_unit2bios(bdev
->dd
.d_unit
);
422 setenv("currdev", root
, 1);
434 fd
= open(arg
, O_RDONLY
);
438 while ((d
= readdirfd(fd
)) != NULL
) {
439 sprintf(line
, "%s\n", d
->d_name
);
440 if (pager_output(line
))
456 while ((c
= *arg
++)) {
457 if (c
== ' ' || c
== '\t' || c
== '\n')
459 for (p
= arg
; *p
&& *p
!= '\n' && *p
!= ' ' && *p
!= '\t'; p
++);
464 while ((c
= *arg
++)) {
466 if (*(uint8_t *)PTOV(0x496) & 0x10) {
469 opts
|= OPT_SET(RBX_DUAL
) | OPT_SET(RBX_SERIAL
);
472 printf("Keyboard: %s\n", cp
);
474 } else if (c
== 'S') {
476 while ((unsigned int)(i
= *arg
++ - '0') <= 9)
478 if (j
> 0 && i
== -'0') {
482 /* Fall through to error below ('S' not in optstr[]). */
484 for (i
= 0; c
!= optstr
[i
]; i
++)
487 opts
^= OPT_SET(flags
[i
]);
489 ioctrl
= OPT_CHECK(RBX_DUAL
) ? (IO_SERIAL
|IO_KEYBOARD
) :
490 OPT_CHECK(RBX_SERIAL
) ? IO_SERIAL
: IO_KEYBOARD
;
491 if (ioctrl
& IO_SERIAL
) {
492 if (sio_init(115200 / comspeed
) != 0)
493 ioctrl
&= ~IO_SERIAL
;
504 * Report pool status if the comment is 'status'. Lets
505 * hope no-one wants to load /status as a kernel.
507 if (!strcmp(arg
, "status")) {
509 for (i
= 0; devsw
[i
] != NULL
; i
++) {
510 if (devsw
[i
]->dv_print
!= NULL
) {
511 if (devsw
[i
]->dv_print(1))
514 sprintf(line
, "%s: (unknown)\n", devsw
[i
]->dv_name
);
515 if (pager_output(line
))
524 * If there is a colon, switch pools.
526 if (strncmp(arg
, "zfs:", 4) == 0)
527 q
= strchr(arg
+ 4, ':');
529 q
= strchr(arg
, ':');
532 if (mount_root(arg
) != 0)
536 if ((i
= ep
- arg
)) {
537 if ((size_t)i
>= sizeof(kname
))
539 memcpy(kname
, arg
, i
+ 1);
548 * probe arguments for partition iterator (see below)
558 * simple wrapper around read() to avoid using device specific
559 * strategy() directly.
562 parttblread(void *arg
, void *buf
, size_t blocks
, uint64_t offset
)
564 struct probe_args
*ppa
= arg
;
565 size_t size
= ppa
->secsz
* blocks
;
567 lseek(ppa
->fd
, offset
* ppa
->secsz
, SEEK_SET
);
568 if (read(ppa
->fd
, buf
, size
) == size
)
574 * scan partition entries to find boot partition starting at start_sector.
575 * in case of MBR partition type PART_SOLARIS2, read VTOC and recurse.
578 probe_partition(void *arg
, const char *partname
,
579 const struct ptable_entry
*part
)
581 struct probe_args pa
, *ppa
= arg
;
582 struct ptable
*table
;
583 uint64_t *pool_guid_ptr
= NULL
;
584 uint64_t pool_guid
= 0;
588 len
= strlen(ppa
->devname
);
589 if (len
> sizeof (devname
))
590 len
= sizeof (devname
);
592 strncpy(devname
, ppa
->devname
, len
- 1);
593 devname
[len
- 1] = '\0';
594 snprintf(devname
, sizeof (devname
), "%s%s:", devname
, partname
);
596 /* filter out partitions *not* used by zfs */
597 switch (part
->type
) {
598 case PART_RESERVED
: /* efi reserverd */
599 case PART_VTOC_BOOT
: /* vtoc boot area */
606 if (part
->type
== PART_SOLARIS2
) {
607 pa
.offset
= part
->start
;
608 pa
.fd
= open(devname
, O_RDONLY
);
611 pa
.devname
= devname
;
612 pa
.secsz
= ppa
->secsz
;
613 table
= ptable_open(&pa
, part
->end
- part
->start
+ 1,
614 ppa
->secsz
, parttblread
);
616 enum ptable_type pt
= ptable_gettype(table
);
618 if (pt
== PTABLE_VTOC8
|| pt
== PTABLE_VTOC
) {
619 ret
= ptable_iterate(table
, &pa
,
630 if (ppa
->offset
+ part
->start
== start_sector
) {
631 /* Ask zfs_probe_dev to provide guid. */
632 pool_guid_ptr
= &pool_guid
;
633 /* Set up boot device name for non-zfs case. */
634 strncpy(boot_devname
, devname
, sizeof (boot_devname
));
637 ret
= zfs_probe_dev(devname
, pool_guid_ptr
);
638 if (pool_guid
!= 0 && bdev
== NULL
) {
639 bdev
= malloc(sizeof (struct i386_devdesc
));
640 bzero(bdev
, sizeof (struct i386_devdesc
));
641 bdev
->dd
.d_dev
= &zfs_dev
;
642 bdev
->d_kind
.zfs
.pool_guid
= pool_guid
;
645 * We can not set up zfs boot device name yet, as the
646 * zfs dv_init() is not completed. We will set boot_devname
647 * in main, after devsw setup.
655 * open partition table on disk and scan partition entries to find
656 * boot partition starting at start_sector (recorded by installboot).
659 probe_disk(char *devname
)
661 struct ptable
*table
;
662 struct probe_args pa
;
667 pa
.devname
= devname
;
668 pa
.fd
= open(devname
, O_RDONLY
);
673 ret
= ioctl(pa
.fd
, DIOCGMEDIASIZE
, &mediasz
);
675 ret
= ioctl(pa
.fd
, DIOCGSECTORSIZE
, &pa
.secsz
);
677 table
= ptable_open(&pa
, mediasz
/ pa
.secsz
, pa
.secsz
,
680 ret
= ptable_iterate(table
, &pa
, probe_partition
);
689 * Probe all disks to discover ZFS pools. The idea is to walk all possible
690 * disk devices, however, we also need to identify possible boot pool.
691 * For boot pool detection we have boot disk passed us from BIOS, recorded
692 * in bootinfo.bi_bios_dev, and start_sector LBA recorded by installboot.
694 * To detect boot pool, we can not use generic zfs_probe_dev() on boot disk,
695 * but we need to walk partitions, as we have no way to pass start_sector
696 * to zfs_probe_dev(). Note we do need to detect the partition correcponding
697 * to non-zfs case, so here we can set boot_devname for both cases.
705 /* Translate bios dev to our unit number. */
706 boot_unit
= bd_bios2unit(bootinfo
.bi_bios_dev
);
709 * Open all the disks we can find and see if we can reconstruct
710 * ZFS pools from them.
712 for (unit
= 0; unit
< MAXBDDEV
; unit
++) {
713 if (bd_unit2bios(unit
) == -1)
716 sprintf(devname
, "disk%d:", unit
);
717 /* If this is not boot disk, use generic probe. */
718 if (unit
!= boot_unit
)
719 zfs_probe_dev(devname
, NULL
);
726 ldi_get_size(void *priv
)
728 int fd
= (uintptr_t) priv
;
731 ioctl(fd
, DIOCGMEDIASIZE
, &size
);