loader: Remove d_type from devdesc.
[unleashed.git] / usr / src / boot / sys / boot / i386 / gptzfsboot / zfsboot.c
blob89eb2281a4ab6d6ba388966883b6abcdc7b6917c
1 /*-
2 * Copyright (c) 1998 Robert Nordier
3 * All rights reserved.
5 * Redistribution and use in source and binary forms are freely
6 * permitted provided that the above copyright notice and this
7 * paragraph and the following disclaimer are duplicated in all
8 * such forms.
10 * This software is provided "AS IS" and without any express or
11 * implied warranties, including, without limitation, the implied
12 * warranties of merchantability and fitness for a particular
13 * purpose.
16 #include <sys/cdefs.h>
17 #include <stand.h>
19 #include <sys/param.h>
20 #include <sys/errno.h>
21 #include <sys/diskmbr.h>
22 #include <sys/vtoc.h>
23 #include <sys/disk.h>
24 #include <sys/reboot.h>
25 #include <sys/queue.h>
26 #include <sys/multiboot.h>
28 #include <machine/bootinfo.h>
29 #include <machine/elf.h>
30 #include <machine/pc/bios.h>
32 #include <stdarg.h>
33 #include <stddef.h>
35 #include <a.out.h>
36 #include "bootstrap.h"
37 #include "libi386.h"
38 #include <btxv86.h>
40 #include "lib.h"
41 #include "rbx.h"
42 #include "cons.h"
43 #include "bootargs.h"
44 #include "disk.h"
45 #include "part.h"
46 #include "paths.h"
48 #include "libzfs.h"
50 #define ARGS 0x900
51 #define NOPT 14
52 #define NDEV 3
54 #define BIOS_NUMDRIVES 0x475
55 #define DRV_HARD 0x80
56 #define DRV_MASK 0x7f
58 #define TYPE_AD 0
59 #define TYPE_DA 1
60 #define TYPE_MAXHARD TYPE_DA
61 #define TYPE_FD 2
63 extern uint32_t _end;
66 * Fake multiboot header to provide versioning and to pass
67 * partition start LBA. Partition is either GPT partition or
68 * VTOC slice.
70 extern const struct multiboot_header mb_header;
71 extern uint64_t start_sector;
73 static const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
74 static const unsigned char flags[NOPT] = {
75 RBX_DUAL,
76 RBX_SERIAL,
77 RBX_ASKNAME,
78 RBX_CDROM,
79 RBX_CONFIG,
80 RBX_KDB,
81 RBX_GDB,
82 RBX_MUTE,
83 RBX_NOINTR,
84 RBX_PAUSE,
85 RBX_QUIET,
86 RBX_DFLTROOT,
87 RBX_SINGLE,
88 RBX_VERBOSE
90 uint32_t opts;
92 static const unsigned char dev_maj[NDEV] = {30, 4, 2};
94 static struct i386_devdesc *bdev;
95 static char cmd[512];
96 static char cmddup[512];
97 static char kname[1024];
98 static int comspeed = SIOSPD;
99 static struct bootinfo bootinfo;
100 static uint32_t bootdev;
101 static struct zfs_boot_args zfsargs;
103 extern vm_offset_t high_heap_base;
104 extern uint32_t bios_basemem, bios_extmem, high_heap_size;
106 static char *heap_top;
107 static char *heap_bottom;
109 static void i386_zfs_probe(void);
110 void exit(int);
111 static void load(void);
112 static int parse_cmd(void);
114 struct arch_switch archsw; /* MI/MD interface boundary */
115 static char boot_devname[2 * ZFS_MAXNAMELEN + 8]; /* disk or pool:dataset */
117 struct devsw *devsw[] = {
118 &biosdisk,
119 &zfs_dev,
120 NULL
123 struct fs_ops *file_system[] = {
124 &zfs_fsops,
125 &ufs_fsops,
126 &dosfs_fsops,
127 NULL
131 main(void)
133 int auto_boot, i, fd;
134 struct disk_devdesc devdesc;
136 bios_getmem();
138 if (high_heap_size > 0) {
139 heap_top = PTOV(high_heap_base + high_heap_size);
140 heap_bottom = PTOV(high_heap_base);
141 } else {
142 heap_bottom = (char *)
143 (roundup2(__base + (int32_t)&_end, 0x10000) - __base);
144 heap_top = (char *) PTOV(bios_basemem);
146 setheap(heap_bottom, heap_top);
149 * Initialise the block cache. Set the upper limit.
151 bcache_init(32768, 512);
153 archsw.arch_autoload = NULL;
154 archsw.arch_getdev = i386_getdev;
155 archsw.arch_copyin = NULL;
156 archsw.arch_copyout = NULL;
157 archsw.arch_readin = NULL;
158 archsw.arch_isainb = NULL;
159 archsw.arch_isaoutb = NULL;
160 archsw.arch_zfs_probe = i386_zfs_probe;
162 bootinfo.bi_version = BOOTINFO_VERSION;
163 bootinfo.bi_size = sizeof(bootinfo);
164 bootinfo.bi_basemem = bios_basemem / 1024;
165 bootinfo.bi_extmem = bios_extmem / 1024;
166 bootinfo.bi_memsizes_valid++;
167 bootinfo.bi_bios_dev = *(uint8_t *)PTOV(ARGS);
169 /* Set up fall back device name. */
170 snprintf(boot_devname, sizeof (boot_devname), "disk%d:",
171 bd_bios2unit(bootinfo.bi_bios_dev));
173 for (i = 0; devsw[i] != NULL; i++)
174 if (devsw[i]->dv_init != NULL)
175 (devsw[i]->dv_init)();
177 disk_parsedev(&devdesc, boot_devname+4, NULL);
179 bootdev = MAKEBOOTDEV(dev_maj[DEVT_DISK], devdesc.d_slice + 1,
180 devdesc.dd.d_unit, devdesc.d_partition >= 0? devdesc.d_partition:0xff);
183 * zfs_fmtdev() can be called only after dv_init
185 if (bdev != NULL && bdev->dd.d_dev->dv_type == DEVT_ZFS) {
186 /* set up proper device name string for ZFS */
187 strncpy(boot_devname, zfs_fmtdev(bdev), sizeof (boot_devname));
190 /* now make sure we have bdev on all cases */
191 if (bdev != NULL)
192 free(bdev);
193 i386_getdev((void **)&bdev, boot_devname, NULL);
195 env_setenv("currdev", EV_VOLATILE, boot_devname, i386_setcurrdev,
196 env_nounset);
198 /* Process configuration file */
199 setenv("LINES", "24", 1);
200 auto_boot = 1;
202 fd = open(PATH_CONFIG, O_RDONLY);
203 if (fd == -1)
204 fd = open(PATH_DOTCONFIG, O_RDONLY);
206 if (fd != -1) {
207 read(fd, cmd, sizeof(cmd));
208 close(fd);
211 if (*cmd) {
213 * Note that parse_cmd() is destructive to cmd[] and we also want
214 * to honor RBX_QUIET option that could be present in cmd[].
216 memcpy(cmddup, cmd, sizeof(cmd));
217 if (parse_cmd())
218 auto_boot = 0;
219 if (!OPT_CHECK(RBX_QUIET))
220 printf("%s: %s\n", PATH_CONFIG, cmddup);
221 /* Do not process this command twice */
222 *cmd = 0;
226 * Try to exec stage 3 boot loader. If interrupted by a keypress,
227 * or in case of failure, switch off auto boot.
230 if (auto_boot && !*kname) {
231 memcpy(kname, PATH_LOADER_ZFS, sizeof(PATH_LOADER_ZFS));
232 if (!keyhit(3)) {
233 load();
234 auto_boot = 0;
238 /* Present the user with the boot2 prompt. */
240 for (;;) {
241 if (!auto_boot || !OPT_CHECK(RBX_QUIET)) {
242 printf("\nillumos/x86 boot\n");
243 printf("Default: %s%s\nboot: ", boot_devname, kname);
245 if (ioctrl & IO_SERIAL)
246 sio_flush();
247 if (!auto_boot || keyhit(5))
248 getstr(cmd, sizeof(cmd));
249 else if (!auto_boot || !OPT_CHECK(RBX_QUIET))
250 putchar('\n');
251 auto_boot = 0;
252 if (parse_cmd())
253 putchar('\a');
254 else
255 load();
259 /* XXX - Needed for btxld to link the boot2 binary; do not remove. */
260 void
261 exit(int x)
265 static void
266 load(void)
268 union {
269 struct exec ex;
270 Elf32_Ehdr eh;
271 } hdr;
272 static Elf32_Phdr ep[2];
273 static Elf32_Shdr es[2];
274 caddr_t p;
275 uint32_t addr, x;
276 int fd, fmt, i, j;
278 if ((fd = open(kname, O_RDONLY)) == -1) {
279 printf("\nCan't find %s\n", kname);
280 return;
282 if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
283 close(fd);
284 return;
286 if (N_GETMAGIC(hdr.ex) == ZMAGIC)
287 fmt = 0;
288 else if (IS_ELF(hdr.eh))
289 fmt = 1;
290 else {
291 printf("Invalid %s\n", "format");
292 close(fd);
293 return;
295 if (fmt == 0) {
296 addr = hdr.ex.a_entry & 0xffffff;
297 p = PTOV(addr);
298 lseek(fd, PAGE_SIZE, SEEK_SET);
299 if (read(fd, p, hdr.ex.a_text) != hdr.ex.a_text) {
300 close(fd);
301 return;
303 p += roundup2(hdr.ex.a_text, PAGE_SIZE);
304 if (read(fd, p, hdr.ex.a_data) != hdr.ex.a_data) {
305 close(fd);
306 return;
308 p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
309 bootinfo.bi_symtab = VTOP(p);
310 memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
311 p += sizeof(hdr.ex.a_syms);
312 if (hdr.ex.a_syms) {
313 if (read(fd, p, hdr.ex.a_syms) != hdr.ex.a_syms) {
314 close(fd);
315 return;
317 p += hdr.ex.a_syms;
318 if (read(fd, p, sizeof(int)) != sizeof(int)) {
319 close(fd);
320 return;
322 x = *(uint32_t *)p;
323 p += sizeof(int);
324 x -= sizeof(int);
325 if (read(fd, p, x) != x) {
326 close(fd);
327 return;
329 p += x;
331 } else {
332 lseek(fd, hdr.eh.e_phoff, SEEK_SET);
333 for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
334 if (read(fd, ep + j, sizeof(ep[0])) != sizeof(ep[0])) {
335 close(fd);
336 return;
338 if (ep[j].p_type == PT_LOAD)
339 j++;
341 for (i = 0; i < 2; i++) {
342 p = PTOV(ep[i].p_paddr & 0xffffff);
343 lseek(fd, ep[i].p_offset, SEEK_SET);
344 if (read(fd, p, ep[i].p_filesz) != ep[i].p_filesz) {
345 close(fd);
346 return;
349 p += roundup2(ep[1].p_memsz, PAGE_SIZE);
350 bootinfo.bi_symtab = VTOP(p);
351 if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
352 lseek(fd, hdr.eh.e_shoff + sizeof(es[0]) * (hdr.eh.e_shstrndx + 1),
353 SEEK_SET);
354 if (read(fd, &es, sizeof(es)) != sizeof(es)) {
355 close(fd);
356 return;
358 for (i = 0; i < 2; i++) {
359 memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
360 p += sizeof(es[i].sh_size);
361 lseek(fd, es[i].sh_offset, SEEK_SET);
362 if (read(fd, p, es[i].sh_size) != es[i].sh_size) {
363 close(fd);
364 return;
366 p += es[i].sh_size;
369 addr = hdr.eh.e_entry & 0xffffff;
371 close(fd);
373 bootinfo.bi_esymtab = VTOP(p);
374 bootinfo.bi_kernelname = VTOP(kname);
376 if (bdev->dd.d_dev->dv_type == DEVT_ZFS) {
377 zfsargs.size = sizeof(zfsargs);
378 zfsargs.pool = bdev->d_kind.zfs.pool_guid;
379 zfsargs.root = bdev->d_kind.zfs.root_guid;
380 __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
381 bootdev,
382 KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG,
383 (uint32_t) bdev->d_kind.zfs.pool_guid,
384 (uint32_t) (bdev->d_kind.zfs.pool_guid >> 32),
385 VTOP(&bootinfo),
386 zfsargs);
387 } else
388 __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
389 bootdev, 0, 0, 0, VTOP(&bootinfo));
392 static int
393 mount_root(char *arg)
395 char *root;
396 struct i386_devdesc *ddesc;
397 uint8_t part;
399 root = malloc(strlen(arg) + 2);
400 if (root == NULL)
401 return (1);
402 sprintf(root, "%s:", arg);
403 if (i386_getdev((void **)&ddesc, root, NULL)) {
404 free(root);
405 return (1);
408 /* we should have new device descriptor, free old and replace it. */
409 if (bdev != NULL)
410 free(bdev);
411 bdev = ddesc;
412 if (bdev->dd.d_dev->dv_type == DEVT_DISK) {
413 if (bdev->d_kind.biosdisk.partition == -1)
414 part = 0xff;
415 else
416 part = bdev->d_kind.biosdisk.partition;
417 bootdev = MAKEBOOTDEV(dev_maj[bdev->dd.d_dev->dv_type],
418 bdev->d_kind.biosdisk.slice + 1,
419 bdev->dd.d_unit, part);
420 bootinfo.bi_bios_dev = bd_unit2bios(bdev->dd.d_unit);
422 setenv("currdev", root, 1);
423 free(root);
424 return (0);
427 static void
428 fs_list(char *arg)
430 int fd;
431 struct dirent *d;
432 char line[80];
434 fd = open(arg, O_RDONLY);
435 if (fd < 0)
436 return;
437 pager_open();
438 while ((d = readdirfd(fd)) != NULL) {
439 sprintf(line, "%s\n", d->d_name);
440 if (pager_output(line))
441 break;
443 pager_close();
444 close(fd);
447 static int
448 parse_cmd(void)
450 char *arg = cmd;
451 char *ep, *p, *q;
452 const char *cp;
453 char line[80];
454 int c, i, j;
456 while ((c = *arg++)) {
457 if (c == ' ' || c == '\t' || c == '\n')
458 continue;
459 for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
460 ep = p;
461 if (*p)
462 *p++ = 0;
463 if (c == '-') {
464 while ((c = *arg++)) {
465 if (c == 'P') {
466 if (*(uint8_t *)PTOV(0x496) & 0x10) {
467 cp = "yes";
468 } else {
469 opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
470 cp = "no";
472 printf("Keyboard: %s\n", cp);
473 continue;
474 } else if (c == 'S') {
475 j = 0;
476 while ((unsigned int)(i = *arg++ - '0') <= 9)
477 j = j * 10 + i;
478 if (j > 0 && i == -'0') {
479 comspeed = j;
480 break;
482 /* Fall through to error below ('S' not in optstr[]). */
484 for (i = 0; c != optstr[i]; i++)
485 if (i == NOPT - 1)
486 return -1;
487 opts ^= OPT_SET(flags[i]);
489 ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
490 OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
491 if (ioctrl & IO_SERIAL) {
492 if (sio_init(115200 / comspeed) != 0)
493 ioctrl &= ~IO_SERIAL;
495 } if (c == '?') {
496 printf("\n");
497 fs_list(arg);
498 zfs_list(arg);
499 return -1;
500 } else {
501 arg--;
504 * Report pool status if the comment is 'status'. Lets
505 * hope no-one wants to load /status as a kernel.
507 if (!strcmp(arg, "status")) {
508 pager_open();
509 for (i = 0; devsw[i] != NULL; i++) {
510 if (devsw[i]->dv_print != NULL) {
511 if (devsw[i]->dv_print(1))
512 break;
513 } else {
514 sprintf(line, "%s: (unknown)\n", devsw[i]->dv_name);
515 if (pager_output(line))
516 break;
519 pager_close();
520 return -1;
524 * If there is a colon, switch pools.
526 if (strncmp(arg, "zfs:", 4) == 0)
527 q = strchr(arg + 4, ':');
528 else
529 q = strchr(arg, ':');
530 if (q) {
531 *q++ = '\0';
532 if (mount_root(arg) != 0)
533 return -1;
534 arg = q;
536 if ((i = ep - arg)) {
537 if ((size_t)i >= sizeof(kname))
538 return -1;
539 memcpy(kname, arg, i + 1);
542 arg = p;
544 return 0;
548 * probe arguments for partition iterator (see below)
550 struct probe_args {
551 int fd;
552 char *devname;
553 u_int secsz;
554 uint64_t offset;
558 * simple wrapper around read() to avoid using device specific
559 * strategy() directly.
561 static int
562 parttblread(void *arg, void *buf, size_t blocks, uint64_t offset)
564 struct probe_args *ppa = arg;
565 size_t size = ppa->secsz * blocks;
567 lseek(ppa->fd, offset * ppa->secsz, SEEK_SET);
568 if (read(ppa->fd, buf, size) == size)
569 return (0);
570 return (EIO);
574 * scan partition entries to find boot partition starting at start_sector.
575 * in case of MBR partition type PART_SOLARIS2, read VTOC and recurse.
577 static int
578 probe_partition(void *arg, const char *partname,
579 const struct ptable_entry *part)
581 struct probe_args pa, *ppa = arg;
582 struct ptable *table;
583 uint64_t *pool_guid_ptr = NULL;
584 uint64_t pool_guid = 0;
585 char devname[32];
586 int len, ret = 0;
588 len = strlen(ppa->devname);
589 if (len > sizeof (devname))
590 len = sizeof (devname);
592 strncpy(devname, ppa->devname, len - 1);
593 devname[len - 1] = '\0';
594 snprintf(devname, sizeof (devname), "%s%s:", devname, partname);
596 /* filter out partitions *not* used by zfs */
597 switch (part->type) {
598 case PART_RESERVED: /* efi reserverd */
599 case PART_VTOC_BOOT: /* vtoc boot area */
600 case PART_VTOC_SWAP:
601 return (ret);
602 default:
603 break;
606 if (part->type == PART_SOLARIS2) {
607 pa.offset = part->start;
608 pa.fd = open(devname, O_RDONLY);
609 if (pa.fd == -1)
610 return (ret);
611 pa.devname = devname;
612 pa.secsz = ppa->secsz;
613 table = ptable_open(&pa, part->end - part->start + 1,
614 ppa->secsz, parttblread);
615 if (table != NULL) {
616 enum ptable_type pt = ptable_gettype(table);
618 if (pt == PTABLE_VTOC8 || pt == PTABLE_VTOC) {
619 ret = ptable_iterate(table, &pa,
620 probe_partition);
621 ptable_close(table);
622 close(pa.fd);
623 return (ret);
625 ptable_close(table);
627 close(pa.fd);
630 if (ppa->offset + part->start == start_sector) {
631 /* Ask zfs_probe_dev to provide guid. */
632 pool_guid_ptr = &pool_guid;
633 /* Set up boot device name for non-zfs case. */
634 strncpy(boot_devname, devname, sizeof (boot_devname));
637 ret = zfs_probe_dev(devname, pool_guid_ptr);
638 if (pool_guid != 0 && bdev == NULL) {
639 bdev = malloc(sizeof (struct i386_devdesc));
640 bzero(bdev, sizeof (struct i386_devdesc));
641 bdev->dd.d_dev = &zfs_dev;
642 bdev->d_kind.zfs.pool_guid = pool_guid;
645 * We can not set up zfs boot device name yet, as the
646 * zfs dv_init() is not completed. We will set boot_devname
647 * in main, after devsw setup.
651 return (0);
655 * open partition table on disk and scan partition entries to find
656 * boot partition starting at start_sector (recorded by installboot).
658 static int
659 probe_disk(char *devname)
661 struct ptable *table;
662 struct probe_args pa;
663 uint64_t mediasz;
664 int ret;
666 pa.offset = 0;
667 pa.devname = devname;
668 pa.fd = open(devname, O_RDONLY);
669 if (pa.fd == -1) {
670 return (ENXIO);
673 ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
674 if (ret == 0)
675 ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
676 if (ret == 0) {
677 table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
678 parttblread);
679 if (table != NULL) {
680 ret = ptable_iterate(table, &pa, probe_partition);
681 ptable_close(table);
684 close(pa.fd);
685 return (ret);
689 * Probe all disks to discover ZFS pools. The idea is to walk all possible
690 * disk devices, however, we also need to identify possible boot pool.
691 * For boot pool detection we have boot disk passed us from BIOS, recorded
692 * in bootinfo.bi_bios_dev, and start_sector LBA recorded by installboot.
694 * To detect boot pool, we can not use generic zfs_probe_dev() on boot disk,
695 * but we need to walk partitions, as we have no way to pass start_sector
696 * to zfs_probe_dev(). Note we do need to detect the partition correcponding
697 * to non-zfs case, so here we can set boot_devname for both cases.
699 static void
700 i386_zfs_probe(void)
702 char devname[32];
703 int boot_unit, unit;
705 /* Translate bios dev to our unit number. */
706 boot_unit = bd_bios2unit(bootinfo.bi_bios_dev);
709 * Open all the disks we can find and see if we can reconstruct
710 * ZFS pools from them.
712 for (unit = 0; unit < MAXBDDEV; unit++) {
713 if (bd_unit2bios(unit) == -1)
714 break;
716 sprintf(devname, "disk%d:", unit);
717 /* If this is not boot disk, use generic probe. */
718 if (unit != boot_unit)
719 zfs_probe_dev(devname, NULL);
720 else
721 probe_disk(devname);
725 uint64_t
726 ldi_get_size(void *priv)
728 int fd = (uintptr_t) priv;
729 uint64_t size;
731 ioctl(fd, DIOCGMEDIASIZE, &size);
732 return (size);