9703 loader: stage1 should not probe floppies for zfs
[unleashed.git] / usr / src / boot / sys / boot / i386 / gptzfsboot / zfsboot.c
blob3888766ad8354d0719cd981f89d74f18ea0fe41a
1 /*-
2 * Copyright (c) 1998 Robert Nordier
3 * All rights reserved.
5 * Redistribution and use in source and binary forms are freely
6 * permitted provided that the above copyright notice and this
7 * paragraph and the following disclaimer are duplicated in all
8 * such forms.
10 * This software is provided "AS IS" and without any express or
11 * implied warranties, including, without limitation, the implied
12 * warranties of merchantability and fitness for a particular
13 * purpose.
16 #include <sys/cdefs.h>
17 #include <stand.h>
19 #include <sys/param.h>
20 #include <sys/errno.h>
21 #include <sys/diskmbr.h>
22 #include <sys/vtoc.h>
23 #include <sys/disk.h>
24 #include <sys/reboot.h>
25 #include <sys/queue.h>
26 #include <sys/multiboot.h>
28 #include <machine/bootinfo.h>
29 #include <machine/elf.h>
30 #include <machine/pc/bios.h>
32 #include <stdarg.h>
33 #include <stddef.h>
35 #include <a.out.h>
36 #include "bootstrap.h"
37 #include "libi386.h"
38 #include <btxv86.h>
40 #include "lib.h"
41 #include "rbx.h"
42 #include "cons.h"
43 #include "bootargs.h"
44 #include "disk.h"
45 #include "part.h"
46 #include "paths.h"
48 #include "libzfs.h"
50 #define ARGS 0x900
51 #define NOPT 14
52 #define NDEV 3
54 #define BIOS_NUMDRIVES 0x475
55 #define DRV_HARD 0x80
56 #define DRV_MASK 0x7f
58 #define TYPE_AD 0
59 #define TYPE_DA 1
60 #define TYPE_MAXHARD TYPE_DA
61 #define TYPE_FD 2
63 extern uint32_t _end;
66 * Fake multiboot header to provide versioning and to pass
67 * partition start LBA. Partition is either GPT partition or
68 * VTOC slice.
70 extern const struct multiboot_header mb_header;
71 extern uint64_t start_sector;
73 static const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
74 static const unsigned char flags[NOPT] = {
75 RBX_DUAL,
76 RBX_SERIAL,
77 RBX_ASKNAME,
78 RBX_CDROM,
79 RBX_CONFIG,
80 RBX_KDB,
81 RBX_GDB,
82 RBX_MUTE,
83 RBX_NOINTR,
84 RBX_PAUSE,
85 RBX_QUIET,
86 RBX_DFLTROOT,
87 RBX_SINGLE,
88 RBX_VERBOSE
90 uint32_t opts;
92 static const unsigned char dev_maj[NDEV] = {30, 4, 2};
94 static struct i386_devdesc *bdev;
95 static char cmd[512];
96 static char cmddup[512];
97 static char kname[1024];
98 static int comspeed = SIOSPD;
99 static struct bootinfo bootinfo;
100 static uint32_t bootdev;
101 static struct zfs_boot_args zfsargs;
103 extern vm_offset_t high_heap_base;
104 extern uint32_t bios_basemem, bios_extmem, high_heap_size;
106 static char *heap_top;
107 static char *heap_bottom;
109 static void i386_zfs_probe(void);
110 void exit(int);
111 static void load(void);
112 static int parse_cmd(void);
114 struct arch_switch archsw; /* MI/MD interface boundary */
115 static char boot_devname[2 * ZFS_MAXNAMELEN + 8]; /* disk or pool:dataset */
117 struct devsw *devsw[] = {
118 &biosdisk,
119 &zfs_dev,
120 NULL
123 struct fs_ops *file_system[] = {
124 &zfs_fsops,
125 &ufs_fsops,
126 &dosfs_fsops,
127 NULL
131 main(void)
133 int auto_boot, i, fd;
134 struct disk_devdesc devdesc;
136 bios_getmem();
138 if (high_heap_size > 0) {
139 heap_top = PTOV(high_heap_base + high_heap_size);
140 heap_bottom = PTOV(high_heap_base);
141 } else {
142 heap_bottom = (char *)
143 (roundup2(__base + (int32_t)&_end, 0x10000) - __base);
144 heap_top = (char *) PTOV(bios_basemem);
146 setheap(heap_bottom, heap_top);
149 * Initialise the block cache. Set the upper limit.
151 bcache_init(32768, 512);
153 archsw.arch_autoload = NULL;
154 archsw.arch_getdev = i386_getdev;
155 archsw.arch_copyin = NULL;
156 archsw.arch_copyout = NULL;
157 archsw.arch_readin = NULL;
158 archsw.arch_isainb = NULL;
159 archsw.arch_isaoutb = NULL;
160 archsw.arch_zfs_probe = i386_zfs_probe;
162 bootinfo.bi_version = BOOTINFO_VERSION;
163 bootinfo.bi_size = sizeof(bootinfo);
164 bootinfo.bi_basemem = bios_basemem / 1024;
165 bootinfo.bi_extmem = bios_extmem / 1024;
166 bootinfo.bi_memsizes_valid++;
167 bootinfo.bi_bios_dev = *(uint8_t *)PTOV(ARGS);
169 /* Set up fall back device name. */
170 snprintf(boot_devname, sizeof (boot_devname), "disk%d:",
171 bd_bios2unit(bootinfo.bi_bios_dev));
173 for (i = 0; devsw[i] != NULL; i++)
174 if (devsw[i]->dv_init != NULL)
175 (devsw[i]->dv_init)();
177 disk_parsedev(&devdesc, boot_devname+4, NULL);
179 bootdev = MAKEBOOTDEV(dev_maj[DEVT_DISK], devdesc.d_slice + 1,
180 devdesc.dd.d_unit, devdesc.d_partition >= 0? devdesc.d_partition:0xff);
183 * zfs_fmtdev() can be called only after dv_init
185 if (bdev != NULL && bdev->dd.d_dev->dv_type == DEVT_ZFS) {
186 /* set up proper device name string for ZFS */
187 strncpy(boot_devname, zfs_fmtdev(bdev), sizeof (boot_devname));
190 /* now make sure we have bdev on all cases */
191 if (bdev != NULL)
192 free(bdev);
193 i386_getdev((void **)&bdev, boot_devname, NULL);
195 env_setenv("currdev", EV_VOLATILE, boot_devname, i386_setcurrdev,
196 env_nounset);
198 /* Process configuration file */
199 setenv("LINES", "24", 1);
200 auto_boot = 1;
202 fd = open(PATH_CONFIG, O_RDONLY);
203 if (fd == -1)
204 fd = open(PATH_DOTCONFIG, O_RDONLY);
206 if (fd != -1) {
207 read(fd, cmd, sizeof(cmd));
208 close(fd);
211 if (*cmd) {
213 * Note that parse_cmd() is destructive to cmd[] and we also want
214 * to honor RBX_QUIET option that could be present in cmd[].
216 memcpy(cmddup, cmd, sizeof(cmd));
217 if (parse_cmd())
218 auto_boot = 0;
219 if (!OPT_CHECK(RBX_QUIET))
220 printf("%s: %s\n", PATH_CONFIG, cmddup);
221 /* Do not process this command twice */
222 *cmd = 0;
226 * Try to exec stage 3 boot loader. If interrupted by a keypress,
227 * or in case of failure, switch off auto boot.
230 if (auto_boot && !*kname) {
231 memcpy(kname, PATH_LOADER, sizeof(PATH_LOADER));
232 if (!keyhit(3)) {
233 load();
234 auto_boot = 0;
236 * Try to fall back to /boot/zfsloader.
237 * This fallback should be eventually removed.
238 * Created: 08/03/2018
240 #define PATH_ZFSLOADER "/boot/zfsloader"
241 memcpy(kname, PATH_ZFSLOADER, sizeof(PATH_ZFSLOADER));
242 load();
244 * Still there? restore default loader name for prompt.
246 memcpy(kname, PATH_LOADER, sizeof(PATH_LOADER));
250 /* Present the user with the boot2 prompt. */
252 for (;;) {
253 if (!auto_boot || !OPT_CHECK(RBX_QUIET)) {
254 printf("\nillumos/x86 boot\n");
255 printf("Default: %s%s\nboot: ", boot_devname, kname);
257 if (ioctrl & IO_SERIAL)
258 sio_flush();
259 if (!auto_boot || keyhit(5))
260 getstr(cmd, sizeof(cmd));
261 else if (!auto_boot || !OPT_CHECK(RBX_QUIET))
262 putchar('\n');
263 auto_boot = 0;
264 if (parse_cmd())
265 putchar('\a');
266 else
267 load();
271 /* XXX - Needed for btxld to link the boot2 binary; do not remove. */
272 void
273 exit(int x)
277 static void
278 load(void)
280 union {
281 struct exec ex;
282 Elf32_Ehdr eh;
283 } hdr;
284 static Elf32_Phdr ep[2];
285 static Elf32_Shdr es[2];
286 caddr_t p;
287 uint32_t addr, x;
288 int fd, fmt, i, j;
290 if ((fd = open(kname, O_RDONLY)) == -1) {
291 printf("\nCan't find %s\n", kname);
292 return;
294 if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
295 close(fd);
296 return;
298 if (N_GETMAGIC(hdr.ex) == ZMAGIC)
299 fmt = 0;
300 else if (IS_ELF(hdr.eh))
301 fmt = 1;
302 else {
303 printf("Invalid %s\n", "format");
304 close(fd);
305 return;
307 if (fmt == 0) {
308 addr = hdr.ex.a_entry & 0xffffff;
309 p = PTOV(addr);
310 lseek(fd, PAGE_SIZE, SEEK_SET);
311 if (read(fd, p, hdr.ex.a_text) != hdr.ex.a_text) {
312 close(fd);
313 return;
315 p += roundup2(hdr.ex.a_text, PAGE_SIZE);
316 if (read(fd, p, hdr.ex.a_data) != hdr.ex.a_data) {
317 close(fd);
318 return;
320 p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
321 bootinfo.bi_symtab = VTOP(p);
322 memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
323 p += sizeof(hdr.ex.a_syms);
324 if (hdr.ex.a_syms) {
325 if (read(fd, p, hdr.ex.a_syms) != hdr.ex.a_syms) {
326 close(fd);
327 return;
329 p += hdr.ex.a_syms;
330 if (read(fd, p, sizeof(int)) != sizeof(int)) {
331 close(fd);
332 return;
334 x = *(uint32_t *)p;
335 p += sizeof(int);
336 x -= sizeof(int);
337 if (read(fd, p, x) != x) {
338 close(fd);
339 return;
341 p += x;
343 } else {
344 lseek(fd, hdr.eh.e_phoff, SEEK_SET);
345 for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
346 if (read(fd, ep + j, sizeof(ep[0])) != sizeof(ep[0])) {
347 close(fd);
348 return;
350 if (ep[j].p_type == PT_LOAD)
351 j++;
353 for (i = 0; i < 2; i++) {
354 p = PTOV(ep[i].p_paddr & 0xffffff);
355 lseek(fd, ep[i].p_offset, SEEK_SET);
356 if (read(fd, p, ep[i].p_filesz) != ep[i].p_filesz) {
357 close(fd);
358 return;
361 p += roundup2(ep[1].p_memsz, PAGE_SIZE);
362 bootinfo.bi_symtab = VTOP(p);
363 if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
364 lseek(fd, hdr.eh.e_shoff + sizeof(es[0]) * (hdr.eh.e_shstrndx + 1),
365 SEEK_SET);
366 if (read(fd, &es, sizeof(es)) != sizeof(es)) {
367 close(fd);
368 return;
370 for (i = 0; i < 2; i++) {
371 memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
372 p += sizeof(es[i].sh_size);
373 lseek(fd, es[i].sh_offset, SEEK_SET);
374 if (read(fd, p, es[i].sh_size) != es[i].sh_size) {
375 close(fd);
376 return;
378 p += es[i].sh_size;
381 addr = hdr.eh.e_entry & 0xffffff;
383 close(fd);
385 bootinfo.bi_esymtab = VTOP(p);
386 bootinfo.bi_kernelname = VTOP(kname);
388 if (bdev->dd.d_dev->dv_type == DEVT_ZFS) {
389 zfsargs.size = sizeof(zfsargs);
390 zfsargs.pool = bdev->d_kind.zfs.pool_guid;
391 zfsargs.root = bdev->d_kind.zfs.root_guid;
392 __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
393 bootdev,
394 KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG,
395 (uint32_t) bdev->d_kind.zfs.pool_guid,
396 (uint32_t) (bdev->d_kind.zfs.pool_guid >> 32),
397 VTOP(&bootinfo),
398 zfsargs);
399 } else
400 __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
401 bootdev, 0, 0, 0, VTOP(&bootinfo));
404 static int
405 mount_root(char *arg)
407 char *root;
408 struct i386_devdesc *ddesc;
409 uint8_t part;
411 root = malloc(strlen(arg) + 2);
412 if (root == NULL)
413 return (1);
414 sprintf(root, "%s:", arg);
415 if (i386_getdev((void **)&ddesc, root, NULL)) {
416 free(root);
417 return (1);
420 /* we should have new device descriptor, free old and replace it. */
421 if (bdev != NULL)
422 free(bdev);
423 bdev = ddesc;
424 if (bdev->dd.d_dev->dv_type == DEVT_DISK) {
425 if (bdev->d_kind.biosdisk.partition == -1)
426 part = 0xff;
427 else
428 part = bdev->d_kind.biosdisk.partition;
429 bootdev = MAKEBOOTDEV(dev_maj[bdev->dd.d_dev->dv_type],
430 bdev->d_kind.biosdisk.slice + 1,
431 bdev->dd.d_unit, part);
432 bootinfo.bi_bios_dev = bd_unit2bios(bdev->dd.d_unit);
434 setenv("currdev", root, 1);
435 free(root);
436 return (0);
439 static void
440 fs_list(char *arg)
442 int fd;
443 struct dirent *d;
444 char line[80];
446 fd = open(arg, O_RDONLY);
447 if (fd < 0)
448 return;
449 pager_open();
450 while ((d = readdirfd(fd)) != NULL) {
451 sprintf(line, "%s\n", d->d_name);
452 if (pager_output(line))
453 break;
455 pager_close();
456 close(fd);
459 static int
460 parse_cmd(void)
462 char *arg = cmd;
463 char *ep, *p, *q;
464 const char *cp;
465 char line[80];
466 int c, i, j;
468 while ((c = *arg++)) {
469 if (c == ' ' || c == '\t' || c == '\n')
470 continue;
471 for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
472 ep = p;
473 if (*p)
474 *p++ = 0;
475 if (c == '-') {
476 while ((c = *arg++)) {
477 if (c == 'P') {
478 if (*(uint8_t *)PTOV(0x496) & 0x10) {
479 cp = "yes";
480 } else {
481 opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
482 cp = "no";
484 printf("Keyboard: %s\n", cp);
485 continue;
486 } else if (c == 'S') {
487 j = 0;
488 while ((unsigned int)(i = *arg++ - '0') <= 9)
489 j = j * 10 + i;
490 if (j > 0 && i == -'0') {
491 comspeed = j;
492 break;
494 /* Fall through to error below ('S' not in optstr[]). */
496 for (i = 0; c != optstr[i]; i++)
497 if (i == NOPT - 1)
498 return -1;
499 opts ^= OPT_SET(flags[i]);
501 ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
502 OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
503 if (ioctrl & IO_SERIAL) {
504 if (sio_init(115200 / comspeed) != 0)
505 ioctrl &= ~IO_SERIAL;
507 } if (c == '?') {
508 printf("\n");
509 fs_list(arg);
510 zfs_list(arg);
511 return -1;
512 } else {
513 arg--;
516 * Report pool status if the comment is 'status'. Lets
517 * hope no-one wants to load /status as a kernel.
519 if (!strcmp(arg, "status")) {
520 pager_open();
521 for (i = 0; devsw[i] != NULL; i++) {
522 if (devsw[i]->dv_print != NULL) {
523 if (devsw[i]->dv_print(1))
524 break;
525 } else {
526 sprintf(line, "%s: (unknown)\n", devsw[i]->dv_name);
527 if (pager_output(line))
528 break;
531 pager_close();
532 return -1;
536 * If there is a colon, switch pools.
538 if (strncmp(arg, "zfs:", 4) == 0)
539 q = strchr(arg + 4, ':');
540 else
541 q = strchr(arg, ':');
542 if (q) {
543 *q++ = '\0';
544 if (mount_root(arg) != 0)
545 return -1;
546 arg = q;
548 if ((i = ep - arg)) {
549 if ((size_t)i >= sizeof(kname))
550 return -1;
551 memcpy(kname, arg, i + 1);
554 arg = p;
556 return 0;
560 * probe arguments for partition iterator (see below)
562 struct probe_args {
563 int fd;
564 char *devname;
565 u_int secsz;
566 uint64_t offset;
570 * simple wrapper around read() to avoid using device specific
571 * strategy() directly.
573 static int
574 parttblread(void *arg, void *buf, size_t blocks, uint64_t offset)
576 struct probe_args *ppa = arg;
577 size_t size = ppa->secsz * blocks;
579 lseek(ppa->fd, offset * ppa->secsz, SEEK_SET);
580 if (read(ppa->fd, buf, size) == size)
581 return (0);
582 return (EIO);
586 * scan partition entries to find boot partition starting at start_sector.
587 * in case of MBR partition type PART_SOLARIS2, read VTOC and recurse.
589 static int
590 probe_partition(void *arg, const char *partname,
591 const struct ptable_entry *part)
593 struct probe_args pa, *ppa = arg;
594 struct ptable *table;
595 uint64_t *pool_guid_ptr = NULL;
596 uint64_t pool_guid = 0;
597 char devname[32];
598 int len, ret = 0;
600 len = strlen(ppa->devname);
601 if (len > sizeof (devname))
602 len = sizeof (devname);
604 strncpy(devname, ppa->devname, len - 1);
605 devname[len - 1] = '\0';
606 snprintf(devname, sizeof (devname), "%s%s:", devname, partname);
608 /* filter out partitions *not* used by zfs */
609 switch (part->type) {
610 case PART_RESERVED: /* efi reserverd */
611 case PART_VTOC_BOOT: /* vtoc boot area */
612 case PART_VTOC_SWAP:
613 return (ret);
614 default:
615 break;
618 if (part->type == PART_SOLARIS2) {
619 pa.offset = part->start;
620 pa.fd = open(devname, O_RDONLY);
621 if (pa.fd == -1)
622 return (ret);
623 pa.devname = devname;
624 pa.secsz = ppa->secsz;
625 table = ptable_open(&pa, part->end - part->start + 1,
626 ppa->secsz, parttblread);
627 if (table != NULL) {
628 enum ptable_type pt = ptable_gettype(table);
630 if (pt == PTABLE_VTOC8 || pt == PTABLE_VTOC) {
631 ret = ptable_iterate(table, &pa,
632 probe_partition);
633 ptable_close(table);
634 close(pa.fd);
635 return (ret);
637 ptable_close(table);
639 close(pa.fd);
642 if (ppa->offset + part->start == start_sector) {
643 /* Ask zfs_probe_dev to provide guid. */
644 pool_guid_ptr = &pool_guid;
645 /* Set up boot device name for non-zfs case. */
646 strncpy(boot_devname, devname, sizeof (boot_devname));
649 ret = zfs_probe_dev(devname, pool_guid_ptr);
650 if (pool_guid != 0 && bdev == NULL) {
651 bdev = malloc(sizeof (struct i386_devdesc));
652 bzero(bdev, sizeof (struct i386_devdesc));
653 bdev->dd.d_dev = &zfs_dev;
654 bdev->d_kind.zfs.pool_guid = pool_guid;
657 * We can not set up zfs boot device name yet, as the
658 * zfs dv_init() is not completed. We will set boot_devname
659 * in main, after devsw setup.
663 return (0);
667 * open partition table on disk and scan partition entries to find
668 * boot partition starting at start_sector (recorded by installboot).
670 static int
671 probe_disk(char *devname)
673 struct ptable *table;
674 struct probe_args pa;
675 uint64_t mediasz;
676 int ret;
678 pa.offset = 0;
679 pa.devname = devname;
680 pa.fd = open(devname, O_RDONLY);
681 if (pa.fd == -1) {
682 return (ENXIO);
685 ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
686 if (ret == 0)
687 ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
688 if (ret == 0) {
689 table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
690 parttblread);
691 if (table != NULL) {
692 ret = ptable_iterate(table, &pa, probe_partition);
693 ptable_close(table);
696 close(pa.fd);
697 return (ret);
701 * Probe all disks to discover ZFS pools. The idea is to walk all possible
702 * disk devices, however, we also need to identify possible boot pool.
703 * For boot pool detection we have boot disk passed us from BIOS, recorded
704 * in bootinfo.bi_bios_dev, and start_sector LBA recorded by installboot.
706 * To detect boot pool, we can not use generic zfs_probe_dev() on boot disk,
707 * but we need to walk partitions, as we have no way to pass start_sector
708 * to zfs_probe_dev(). Note we do need to detect the partition correcponding
709 * to non-zfs case, so here we can set boot_devname for both cases.
711 static void
712 i386_zfs_probe(void)
714 char devname[32];
715 int boot_unit, unit;
717 /* Translate bios dev to our unit number. */
718 boot_unit = bd_bios2unit(bootinfo.bi_bios_dev);
721 * Open all the disks we can find and see if we can reconstruct
722 * ZFS pools from them.
724 for (unit = 0; unit < MAXBDDEV; unit++) {
725 if (bd_unit2bios(unit) == -1)
726 break;
727 if (bd_unit2bios(unit) < 0x80)
728 continue;
730 sprintf(devname, "disk%d:", unit);
731 /* If this is not boot disk, use generic probe. */
732 if (unit != boot_unit)
733 zfs_probe_dev(devname, NULL);
734 else
735 probe_disk(devname);
739 uint64_t
740 ldi_get_size(void *priv)
742 int fd = (uintptr_t) priv;
743 uint64_t size;
745 ioctl(fd, DIOCGMEDIASIZE, &size);
746 return (size);