2 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/platform/vkernel/platform/init.c,v 1.56 2008/05/27 07:48:00 dillon Exp $
37 #include <sys/types.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
43 #include <sys/random.h>
44 #include <sys/vkernel.h>
46 #include <sys/reboot.h>
48 #include <sys/msgbuf.h>
49 #include <sys/vmspace.h>
50 #include <sys/socket.h>
51 #include <sys/sockio.h>
52 #include <sys/sysctl.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_map.h>
56 #include <sys/mplock2.h>
58 #include <machine/cpu.h>
59 #include <machine/globaldata.h>
60 #include <machine/tls.h>
61 #include <machine/md_var.h>
62 #include <machine/vmparam.h>
63 #include <cpu/specialreg.h>
66 #include <net/if_arp.h>
67 #include <net/ethernet.h>
68 #include <net/bridge/if_bridgevar.h>
69 #include <netinet/in.h>
70 #include <arpa/inet.h>
84 vm_paddr_t phys_avail
[16];
86 vm_paddr_t Maxmem_bytes
;
89 struct vkdisk_info DiskInfo
[VKDISK_MAX
];
91 struct vknetif_info NetifInfo
[VKNETIF_MAX
];
97 vm_offset_t virtual_start
;
98 vm_offset_t virtual_end
;
99 vm_offset_t virtual2_start
;
100 vm_offset_t virtual2_end
;
101 vm_offset_t kernel_vm_end
;
102 vm_offset_t crashdumpmap
;
103 vm_offset_t clean_sva
;
104 vm_offset_t clean_eva
;
105 struct msgbuf
*msgbufp
;
108 vpte_t
*KernelPTA
; /* Warning: Offset for direct VA translation */
109 void *dmap_min_address
;
110 u_int cpu_feature
; /* XXX */
112 int64_t tsc_frequency
;
113 int optcpus
; /* number of cpus - see mp_start() */
114 int lwp_cpu_lock
; /* if/how to lock virtual CPUs to real CPUs */
115 int real_ncpus
; /* number of real CPUs */
116 int next_cpu
; /* next real CPU to lock a virtual CPU to */
118 struct privatespace
*CPU_prvspace
;
120 static struct trapframe proc0_tf
;
121 static void *proc0paddr
;
123 static void init_sys_memory(char *imageFile
);
124 static void init_kern_memory(void);
125 static void init_globaldata(void);
126 static void init_vkernel(void);
127 static void init_disk(char *diskExp
[], int diskFileNum
, enum vkdisk_type type
);
128 static void init_netif(char *netifExp
[], int netifFileNum
);
129 static void writepid(void);
130 static void cleanpid(void);
131 static int unix_connect(const char *path
);
132 static void usage_err(const char *ctl
, ...);
133 static void usage_help(_Bool
);
136 static char **save_av
;
139 * Kernel startup for virtual kernels - standard main()
142 main(int ac
, char **av
)
144 char *memImageFile
= NULL
;
145 char *netifFile
[VKNETIF_MAX
];
146 char *diskFile
[VKDISK_MAX
];
147 char *cdFile
[VKDISK_MAX
];
150 int netifFileNum
= 0;
153 int bootOnDisk
= -1; /* set below to vcd (0) or vkd (1) */
159 int real_vkernel_enable
;
169 kernel_mem_readonly
= 1;
173 lwp_cpu_lock
= LCL_NONE
;
175 real_vkernel_enable
= 0;
176 vsize
= sizeof(real_vkernel_enable
);
177 sysctlbyname("vm.vkernel_enable", &real_vkernel_enable
, &vsize
, NULL
,0);
179 if (real_vkernel_enable
== 0) {
180 errx(1, "vm.vkernel_enable is 0, must be set "
181 "to 1 to execute a vkernel!");
185 vsize
= sizeof(real_ncpus
);
186 sysctlbyname("hw.ncpu", &real_ncpus
, &vsize
, NULL
, 0);
191 while ((c
= getopt(ac
, av
, "c:hsvl:m:n:r:e:i:p:I:U")) != -1) {
195 * name=value:name=value:name=value...
198 * Allow values to be quoted but note that shells
199 * may remove the quotes, so using this feature
200 * to embed colons may require a backslash.
204 kern_envp
= malloc(n
+ 2);
205 for (i
= j
= 0; i
< n
; ++i
) {
206 if (optarg
[i
] == '"')
208 else if (optarg
[i
] == '\'')
210 else if (isq
== 0 && optarg
[i
] == ':')
213 kern_envp
[j
++] = optarg
[i
];
219 boothowto
|= RB_SINGLE
;
225 memImageFile
= optarg
;
228 if (netifFileNum
< VKNETIF_MAX
)
229 netifFile
[netifFileNum
++] = strdup(optarg
);
234 if (diskFileNum
+ cdFileNum
< VKDISK_MAX
)
235 diskFile
[diskFileNum
++] = strdup(optarg
);
240 if (diskFileNum
+ cdFileNum
< VKDISK_MAX
)
241 cdFile
[cdFileNum
++] = strdup(optarg
);
244 Maxmem_bytes
= strtoull(optarg
, &suffix
, 0);
261 usage_err("Bad maxmem option");
269 if (strncmp("map", optarg
, 3) == 0) {
270 lwp_cpu_lock
= LCL_PER_CPU
;
271 if (optarg
[3] == ',') {
272 next_cpu
= strtol(optarg
+4, &endp
, 0);
274 usage_err("Bad target CPU number at '%s'", endp
);
278 if (next_cpu
< 0 || next_cpu
> real_ncpus
- 1)
279 usage_err("Bad target CPU, valid range is 0-%d", real_ncpus
- 1);
280 } else if (strncmp("any", optarg
, 3) == 0) {
281 lwp_cpu_lock
= LCL_NONE
;
283 lwp_cpu_lock
= LCL_SINGLE_CPU
;
284 next_cpu
= strtol(optarg
, &endp
, 0);
286 usage_err("Bad target CPU number at '%s'", endp
);
287 if (next_cpu
< 0 || next_cpu
> real_ncpus
- 1)
288 usage_err("Bad target CPU, valid range is 0-%d", real_ncpus
- 1);
293 * This value is set up by mp_start(), don't just
297 optcpus
= strtol(optarg
, NULL
, 0);
298 if (optcpus
< 1 || optcpus
> MAXCPU
)
299 usage_err("Bad ncpus, valid range is 1-%d", MAXCPU
);
301 if (strtol(optarg
, NULL
, 0) != 1) {
302 usage_err("You built a UP vkernel, only 1 cpu!");
311 kernel_mem_readonly
= 0;
323 init_sys_memory(memImageFile
);
333 vsize
= sizeof(tsc_present
);
334 sysctlbyname("hw.tsc_present", &tsc_present
, &vsize
, NULL
, 0);
335 vsize
= sizeof(tsc_frequency
);
336 sysctlbyname("hw.tsc_frequency", &tsc_frequency
, &vsize
, NULL
, 0);
338 cpu_feature
|= CPUID_TSC
;
343 vsize
= sizeof(supports_sse
);
345 sysctlbyname("hw.instruction_sse", &supports_sse
, &vsize
, NULL
, 0);
346 init_fpu(supports_sse
);
348 cpu_feature
|= CPUID_SSE
| CPUID_FXSR
;
351 * We boot from the first installed disk.
353 if (bootOnDisk
== 1) {
354 init_disk(diskFile
, diskFileNum
, VKD_DISK
);
355 init_disk(cdFile
, cdFileNum
, VKD_CD
);
357 init_disk(cdFile
, cdFileNum
, VKD_CD
);
358 init_disk(diskFile
, diskFileNum
, VKD_DISK
);
360 init_netif(netifFile
, netifFileNum
);
368 * Initialize system memory. This is the virtual kernel's 'RAM'.
372 init_sys_memory(char *imageFile
)
379 * Figure out the system memory image size. If an image file was
380 * specified and -m was not specified, use the image file's size.
383 if (imageFile
&& stat(imageFile
, &st
) == 0 && Maxmem_bytes
== 0)
384 Maxmem_bytes
= (vm_paddr_t
)st
.st_size
;
385 if ((imageFile
== NULL
|| stat(imageFile
, &st
) < 0) &&
387 errx(1, "Cannot create new memory file %s unless "
388 "system memory size is specified with -m",
394 * Maxmem must be known at this time
396 if (Maxmem_bytes
< 64 * 1024 * 1024 || (Maxmem_bytes
& SEG_MASK
)) {
397 errx(1, "Bad maxmem specification: 64MB minimum, "
398 "multiples of %dMB only",
399 SEG_SIZE
/ 1024 / 1024);
404 * Generate an image file name if necessary, then open/create the
405 * file exclusively locked. Do not allow multiple virtual kernels
406 * to use the same image file.
408 if (imageFile
== NULL
) {
409 for (i
= 0; i
< 1000000; ++i
) {
410 asprintf(&imageFile
, "/var/vkernel/memimg.%06d", i
);
412 O_RDWR
|O_CREAT
|O_EXLOCK
|O_NONBLOCK
, 0644);
413 if (fd
< 0 && errno
== EWOULDBLOCK
) {
420 fd
= open(imageFile
, O_RDWR
|O_CREAT
|O_EXLOCK
|O_NONBLOCK
, 0644);
422 fprintf(stderr
, "Using memory file: %s\n", imageFile
);
423 if (fd
< 0 || fstat(fd
, &st
) < 0) {
424 err(1, "Unable to open/create %s", imageFile
);
429 * Truncate or extend the file as necessary. Clean out the contents
430 * of the file, we want it to be full of holes so we don't waste
431 * time reading in data from an old file that we no longer care
435 ftruncate(fd
, Maxmem_bytes
);
438 Maxmem
= Maxmem_bytes
>> PAGE_SHIFT
;
443 * Initialize kernel memory. This reserves kernel virtual memory by using
449 init_kern_memory(void)
454 char *topofstack
= &dummy
;
459 * Memory map our kernel virtual memory space. Note that the
460 * kernel image itself is not made part of this memory for the
463 * The memory map must be segment-aligned so we can properly
466 * If the system kernel has a different MAXDSIZ, it might not
467 * be possible to map kernel memory in its prefered location.
468 * Try a number of different locations.
470 try = (void *)(512UL << 30);
472 while ((char *)try + KERNEL_KVA_SIZE
< topofstack
) {
473 base
= mmap(try, KERNEL_KVA_SIZE
, PROT_READ
|PROT_WRITE
,
474 MAP_FILE
|MAP_SHARED
|MAP_VPAGETABLE
,
475 MemImageFd
, (off_t
)try);
478 if (base
!= MAP_FAILED
)
479 munmap(base
, KERNEL_KVA_SIZE
);
480 try = (char *)try + (512UL << 30);
483 err(1, "Unable to mmap() kernel virtual memory!");
486 madvise(base
, KERNEL_KVA_SIZE
, MADV_NOSYNC
);
487 KvaStart
= (vm_offset_t
)base
;
488 KvaSize
= KERNEL_KVA_SIZE
;
489 KvaEnd
= KvaStart
+ KvaSize
;
491 /* cannot use kprintf yet */
492 printf("KVM mapped at %p-%p\n", (void *)KvaStart
, (void *)KvaEnd
);
495 dmap_min_address
= mmap(0, DMAP_SIZE
, PROT_READ
|PROT_WRITE
,
496 MAP_NOCORE
|MAP_NOSYNC
|MAP_SHARED
,
498 if (dmap_min_address
== MAP_FAILED
) {
499 err(1, "Unable to mmap() kernel DMAP region!");
504 pmap_bootstrap((vm_paddr_t
*)&firstfree
, (int64_t)base
);
506 mcontrol(base
, KERNEL_KVA_SIZE
, MADV_SETMAP
,
507 0 | VPTE_R
| VPTE_W
| VPTE_V
);
510 * phys_avail[] represents unallocated physical memory. MI code
511 * will use phys_avail[] to create the vm_page array.
513 phys_avail
[0] = (vm_paddr_t
)firstfree
;
514 phys_avail
[0] = (phys_avail
[0] + PAGE_MASK
) & ~(vm_paddr_t
)PAGE_MASK
;
515 phys_avail
[1] = Maxmem_bytes
;
519 * (virtual_start, virtual_end) represent unallocated kernel virtual
520 * memory. MI code will create kernel_map using these parameters.
522 virtual_start
= KvaStart
+ (long)firstfree
;
523 virtual_start
= (virtual_start
+ PAGE_MASK
) & ~(vm_offset_t
)PAGE_MASK
;
524 virtual_end
= KvaStart
+ KERNEL_KVA_SIZE
;
528 * pmap_growkernel() will set the correct value.
533 * Allocate space for process 0's UAREA.
535 proc0paddr
= (void *)virtual_start
;
536 for (i
= 0; i
< UPAGES
; ++i
) {
537 pmap_kenter_quick(virtual_start
, phys_avail
[0]);
538 virtual_start
+= PAGE_SIZE
;
539 phys_avail
[0] += PAGE_SIZE
;
545 crashdumpmap
= virtual_start
;
546 virtual_start
+= MAXDUMPPGS
* PAGE_SIZE
;
549 * msgbufp maps the system message buffer
551 assert((MSGBUF_SIZE
& PAGE_MASK
) == 0);
552 msgbufp
= (void *)virtual_start
;
553 for (i
= 0; i
< (MSGBUF_SIZE
>> PAGE_SHIFT
); ++i
) {
554 pmap_kenter_quick(virtual_start
, phys_avail
[0]);
555 virtual_start
+= PAGE_SIZE
;
556 phys_avail
[0] += PAGE_SIZE
;
558 msgbufinit(msgbufp
, MSGBUF_SIZE
);
561 * used by kern_memio for /dev/mem access
563 ptvmmap
= (caddr_t
)virtual_start
;
564 virtual_start
+= PAGE_SIZE
;
567 * Bootstrap the kernel_pmap
575 * Map the per-cpu globaldata for cpu #0. Allocate the space using
576 * virtual_start and phys_avail[0]
580 init_globaldata(void)
587 * Reserve enough KVA to cover possible cpus. This is a considerable
588 * amount of KVA since the privatespace structure includes two
589 * whole page table mappings.
591 virtual_start
= (virtual_start
+ SEG_MASK
) & ~(vm_offset_t
)SEG_MASK
;
592 CPU_prvspace
= (void *)virtual_start
;
593 virtual_start
+= sizeof(struct privatespace
) * SMP_MAXCPU
;
596 * Allocate enough physical memory to cover the mdglobaldata
597 * portion of the space and the idle stack and map the pages
598 * into KVA. For cpu #0 only.
600 for (i
= 0; i
< sizeof(struct mdglobaldata
); i
+= PAGE_SIZE
) {
602 va
= (vm_offset_t
)&CPU_prvspace
[0].mdglobaldata
+ i
;
603 pmap_kenter_quick(va
, pa
);
604 phys_avail
[0] += PAGE_SIZE
;
606 for (i
= 0; i
< sizeof(CPU_prvspace
[0].idlestack
); i
+= PAGE_SIZE
) {
608 va
= (vm_offset_t
)&CPU_prvspace
[0].idlestack
+ i
;
609 pmap_kenter_quick(va
, pa
);
610 phys_avail
[0] += PAGE_SIZE
;
614 * Setup the %gs for cpu #0. The mycpu macro works after this
615 * point. Note that %fs is used by pthreads.
617 tls_set_gs(&CPU_prvspace
[0], sizeof(struct privatespace
));
621 * Initialize very low level systems including thread0, proc0, etc.
627 struct mdglobaldata
*gd
;
629 gd
= &CPU_prvspace
[0].mdglobaldata
;
630 bzero(gd
, sizeof(*gd
));
632 gd
->mi
.gd_curthread
= &thread0
;
633 thread0
.td_gd
= &gd
->mi
;
635 ncpus2
= 1; /* rounded down power of 2 */
636 ncpus_fit
= 1; /* rounded up power of 2 */
637 /* ncpus2_mask and ncpus_fit_mask are 0 */
639 gd
->mi
.gd_prvspace
= &CPU_prvspace
[0];
640 mi_gdinit(&gd
->mi
, 0);
642 mi_proc0init(&gd
->mi
, proc0paddr
);
643 lwp0
.lwp_md
.md_regs
= &proc0_tf
;
648 * Get the initial mplock with a count of 1 for the BSP.
649 * This uses a LOGICAL cpu ID, ie BSP == 0.
651 cpu_get_initial_mplock();
655 #if 0 /* #ifdef DDB */
657 if (boothowto
& RB_KDB
)
658 Debugger("Boot flags requested debugger");
662 initializecpu(); /* Initialize CPU registers */
664 init_param2((phys_avail
[1] - phys_avail
[0]) / PAGE_SIZE
);
668 * Map the message buffer
670 for (off
= 0; off
< round_page(MSGBUF_SIZE
); off
+= PAGE_SIZE
)
671 pmap_kenter((vm_offset_t
)msgbufp
+ off
, avail_end
+ off
);
672 msgbufinit(msgbufp
, MSGBUF_SIZE
);
675 thread0
.td_pcb_cr3
... MMU
676 lwp0
.lwp_md
.md_regs
= &proc0_tf
;
681 * Filesystem image paths for the virtual kernel are optional.
682 * If specified they each should point to a disk image,
683 * the first of which will become the root disk.
685 * The virtual kernel caches data from our 'disk' just like a normal kernel,
686 * so we do not really want the real kernel to cache the data too. Use
687 * O_DIRECT to remove the duplication.
691 init_disk(char *diskExp
[], int diskFileNum
, enum vkdisk_type type
)
695 if (diskFileNum
== 0)
698 for(i
=0; i
< diskFileNum
; i
++){
703 warnx("Invalid argument to '-r'");
707 if (DiskNum
< VKDISK_MAX
) {
709 struct vkdisk_info
* info
= NULL
;
713 if (type
== VKD_DISK
)
714 fd
= open(fname
, O_RDWR
|O_DIRECT
, 0644);
716 fd
= open(fname
, O_RDONLY
|O_DIRECT
, 0644);
717 if (fd
< 0 || fstat(fd
, &st
) < 0) {
718 err(1, "Unable to open/create %s", fname
);
721 if (S_ISREG(st
.st_mode
)) {
722 if (flock(fd
, LOCK_EX
|LOCK_NB
) < 0) {
723 errx(1, "Disk image %s is already "
729 info
= &DiskInfo
[DiskNum
];
735 memcpy(info
->fname
, fname
, l
);
738 if (type
== VKD_CD
) {
739 rootdevnames
[0] = "cd9660:vcd0a";
740 } else if (type
== VKD_DISK
) {
741 rootdevnames
[0] = "ufs:vkd0s0a";
742 rootdevnames
[1] = "ufs:vkd0s1a";
748 warnx("vkd%d (%s) > VKDISK_MAX", DiskNum
, fname
);
756 netif_set_tapflags(int tap_unit
, int f
, int s
)
761 bzero(&ifr
, sizeof(ifr
));
763 snprintf(ifr
.ifr_name
, sizeof(ifr
.ifr_name
), "tap%d", tap_unit
);
764 if (ioctl(s
, SIOCGIFFLAGS
, &ifr
) < 0) {
765 warn("tap%d: ioctl(SIOCGIFFLAGS) failed", tap_unit
);
772 * If the flags are already set/cleared, then we return
773 * immediately to avoid extra syscalls
775 flags
= (ifr
.ifr_flags
& 0xffff) | (ifr
.ifr_flagshigh
<< 16);
779 if ((flags
& f
) == 0)
790 * Fix up ifreq.ifr_name, since it may be trashed
791 * in previous ioctl(SIOCGIFFLAGS)
793 snprintf(ifr
.ifr_name
, sizeof(ifr
.ifr_name
), "tap%d", tap_unit
);
795 ifr
.ifr_flags
= flags
& 0xffff;
796 ifr
.ifr_flagshigh
= flags
>> 16;
797 if (ioctl(s
, SIOCSIFFLAGS
, &ifr
) < 0) {
798 warn("tap%d: ioctl(SIOCSIFFLAGS) failed", tap_unit
);
806 netif_set_tapaddr(int tap_unit
, in_addr_t addr
, in_addr_t mask
, int s
)
808 struct ifaliasreq ifra
;
809 struct sockaddr_in
*in
;
811 bzero(&ifra
, sizeof(ifra
));
812 snprintf(ifra
.ifra_name
, sizeof(ifra
.ifra_name
), "tap%d", tap_unit
);
815 in
= (struct sockaddr_in
*)&ifra
.ifra_addr
;
816 in
->sin_family
= AF_INET
;
817 in
->sin_len
= sizeof(*in
);
818 in
->sin_addr
.s_addr
= addr
;
822 in
= (struct sockaddr_in
*)&ifra
.ifra_mask
;
823 in
->sin_len
= sizeof(*in
);
824 in
->sin_addr
.s_addr
= mask
;
827 if (ioctl(s
, SIOCAIFADDR
, &ifra
) < 0) {
828 warn("tap%d: ioctl(SIOCAIFADDR) failed", tap_unit
);
836 netif_add_tap2brg(int tap_unit
, const char *ifbridge
, int s
)
841 bzero(&ifbr
, sizeof(ifbr
));
842 snprintf(ifbr
.ifbr_ifsname
, sizeof(ifbr
.ifbr_ifsname
),
845 bzero(&ifd
, sizeof(ifd
));
846 strlcpy(ifd
.ifd_name
, ifbridge
, sizeof(ifd
.ifd_name
));
847 ifd
.ifd_cmd
= BRDGADD
;
848 ifd
.ifd_len
= sizeof(ifbr
);
849 ifd
.ifd_data
= &ifbr
;
851 if (ioctl(s
, SIOCSDRVSPEC
, &ifd
) < 0) {
853 * 'errno == EEXIST' means that the tap(4) is already
854 * a member of the bridge(4)
856 if (errno
!= EEXIST
) {
857 warn("ioctl(%s, SIOCSDRVSPEC) failed", ifbridge
);
864 #define TAPDEV_OFLAGS (O_RDWR | O_NONBLOCK)
867 * Locate the first unused tap(4) device file if auto mode is requested,
868 * or open the user supplied device file, and bring up the corresponding
871 * NOTE: Only tap(4) device file is supported currently
875 netif_open_tap(const char *netif
, int *tap_unit
, int s
)
877 char tap_dev
[MAXPATHLEN
];
884 if (strcmp(netif
, "auto") == 0) {
886 * Find first unused tap(4) device file
888 tap_fd
= open("/dev/tap", TAPDEV_OFLAGS
);
890 warnc(errno
, "Unable to find a free tap(4)");
895 * User supplied tap(4) device file or unix socket.
897 if (netif
[0] == '/') /* Absolute path */
898 strlcpy(tap_dev
, netif
, sizeof(tap_dev
));
900 snprintf(tap_dev
, sizeof(tap_dev
), "/dev/%s", netif
);
902 tap_fd
= open(tap_dev
, TAPDEV_OFLAGS
);
905 * If we cannot open normally try to connect to it.
908 tap_fd
= unix_connect(tap_dev
);
911 warn("Unable to open %s", tap_dev
);
917 * Check whether the device file is a tap(4)
919 if (fstat(tap_fd
, &st
) < 0) {
921 } else if (S_ISCHR(st
.st_mode
)) {
922 dname
= fdevname(tap_fd
);
924 dname
= strstr(dname
, "tap");
927 * Bring up the corresponding tap(4) interface
929 *tap_unit
= strtol(dname
+ 3, NULL
, 10);
930 printf("TAP UNIT %d\n", *tap_unit
);
931 if (netif_set_tapflags(*tap_unit
, IFF_UP
, s
) == 0)
938 } else if (S_ISSOCK(st
.st_mode
)) {
940 * Special socket connection (typically to vknet). We
941 * do not have to do anything.
949 warnx("%s is not a tap(4) device or socket", tap_dev
);
958 unix_connect(const char *path
)
960 struct sockaddr_un sunx
;
966 snprintf(sunx
.sun_path
, sizeof(sunx
.sun_path
), "%s", path
);
967 len
= offsetof(struct sockaddr_un
, sun_path
[strlen(sunx
.sun_path
)]);
968 ++len
; /* include nul */
969 sunx
.sun_family
= AF_UNIX
;
972 net_fd
= socket(AF_UNIX
, SOCK_SEQPACKET
, 0);
975 if (connect(net_fd
, (void *)&sunx
, len
) < 0) {
979 setsockopt(net_fd
, SOL_SOCKET
, SO_SNDBUF
, &sndbuf
, sizeof(sndbuf
));
980 if (fstat(net_fd
, &st
) == 0)
981 printf("Network socket buffer: %d bytes\n", st
.st_blksize
);
982 fcntl(net_fd
, F_SETFL
, O_NONBLOCK
);
991 * Following syntax is supported,
992 * 1) x.x.x.x tap(4)'s address is x.x.x.x
994 * 2) x.x.x.x/z tap(4)'s address is x.x.x.x
995 * tap(4)'s netmask len is z
997 * 3) x.x.x.x:y.y.y.y tap(4)'s address is x.x.x.x
998 * pseudo netif's address is y.y.y.y
1000 * 4) x.x.x.x:y.y.y.y/z tap(4)'s address is x.x.x.x
1001 * pseudo netif's address is y.y.y.y
1002 * tap(4) and pseudo netif's netmask len are z
1004 * 5) bridgeX tap(4) will be added to bridgeX
1006 * 6) bridgeX:y.y.y.y tap(4) will be added to bridgeX
1007 * pseudo netif's address is y.y.y.y
1009 * 7) bridgeX:y.y.y.y/z tap(4) will be added to bridgeX
1010 * pseudo netif's address is y.y.y.y
1011 * pseudo netif's netmask len is z
1015 netif_init_tap(int tap_unit
, in_addr_t
*addr
, in_addr_t
*mask
, int s
)
1017 in_addr_t tap_addr
, netmask
, netif_addr
;
1018 int next_netif_addr
;
1019 char *tok
, *masklen_str
, *ifbridge
;
1024 tok
= strtok(NULL
, ":/");
1027 * Nothing special, simply use tap(4) as backend
1032 if (inet_pton(AF_INET
, tok
, &tap_addr
) > 0) {
1034 * tap(4)'s address is supplied
1039 * If there is next token, then it may be pseudo
1040 * netif's address or netmask len for tap(4)
1042 next_netif_addr
= 0;
1045 * Not tap(4)'s address, assume it as a bridge(4)
1052 * If there is next token, then it must be pseudo
1055 next_netif_addr
= 1;
1058 netmask
= netif_addr
= 0;
1060 tok
= strtok(NULL
, ":/");
1064 if (inet_pton(AF_INET
, tok
, &netif_addr
) <= 0) {
1065 if (next_netif_addr
) {
1066 warnx("Invalid pseudo netif address: %s", tok
);
1072 * Current token is not address, then it must be netmask len
1077 * Current token is pseudo netif address, if there is next token
1078 * it must be netmask len
1080 masklen_str
= strtok(NULL
, "/");
1083 /* Calculate netmask */
1084 if (masklen_str
!= NULL
) {
1087 masklen
= strtoul(masklen_str
, NULL
, 10);
1088 if (masklen
< 32 && masklen
> 0) {
1089 netmask
= htonl(~((1LL << (32 - masklen
)) - 1)
1092 warnx("Invalid netmask len: %lu", masklen
);
1097 /* Make sure there is no more token left */
1098 if (strtok(NULL
, ":/") != NULL
) {
1099 warnx("Invalid argument to '-I'");
1106 } else if (ifbridge
== NULL
) {
1107 /* Set tap(4) address/netmask */
1108 if (netif_set_tapaddr(tap_unit
, tap_addr
, netmask
, s
) < 0)
1111 /* Tie tap(4) to bridge(4) */
1112 if (netif_add_tap2brg(tap_unit
, ifbridge
, s
) < 0)
1122 * NetifInfo[] will be filled for pseudo netif initialization.
1123 * NetifNum will be bumped to reflect the number of valid entries
1128 init_netif(char *netifExp
[], int netifExpNum
)
1132 if (netifExpNum
== 0)
1135 s
= socket(AF_INET
, SOCK_DGRAM
, 0); /* for ioctl(SIOC) */
1139 for (i
= 0; i
< netifExpNum
; ++i
) {
1140 struct vknetif_info
*info
;
1141 in_addr_t netif_addr
, netif_mask
;
1142 int tap_fd
, tap_unit
;
1145 netif
= strtok(netifExp
[i
], ":");
1146 if (netif
== NULL
) {
1147 warnx("Invalid argument to '-I'");
1152 * Open tap(4) device file and bring up the
1153 * corresponding interface
1155 tap_fd
= netif_open_tap(netif
, &tap_unit
, s
);
1160 * Initialize tap(4) and get address/netmask
1163 * NB: Rest part of netifExp[i] is passed
1164 * to netif_init_tap() implicitly.
1166 if (netif_init_tap(tap_unit
, &netif_addr
, &netif_mask
, s
) < 0) {
1168 * NB: Closing tap(4) device file will bring
1169 * down the corresponding interface
1175 info
= &NetifInfo
[NetifNum
];
1176 info
->tap_fd
= tap_fd
;
1177 info
->tap_unit
= tap_unit
;
1178 info
->netif_addr
= netif_addr
;
1179 info
->netif_mask
= netif_mask
;
1182 if (NetifNum
>= VKNETIF_MAX
) /* XXX will this happen? */
1195 if (pid_file
!= NULL
) {
1197 fp
= fopen(pid_file
, "w");
1200 fprintf(fp
, "%ld\n", (long)self
);
1204 perror("Warning: couldn't open pidfile");
1213 if (pid_file
!= NULL
) {
1214 if ( unlink(pid_file
) != 0 )
1215 perror("Warning: couldn't remove pidfile");
1221 usage_err(const char *ctl
, ...)
1226 vfprintf(stderr
, ctl
, va
);
1228 fprintf(stderr
, "\n");
1234 usage_help(_Bool help
)
1236 fprintf(stderr
, "Usage: %s [-hsUv] [-c file] [-e name=value:name=value:...]\n"
1237 "\t[-i file] [-I interface[:address1[:address2][/netmask]]] [-l cpulock]\n"
1238 "\t[-m size] [-n numcpus] [-p file] [-r file]\n", save_av
[0]);
1241 fprintf(stderr
, "\nArguments:\n"
1242 "\t-c\tSpecify a readonly CD-ROM image file to be used by the kernel.\n"
1243 "\t-e\tSpecify an environment to be used by the kernel.\n"
1244 "\t-h\tThis list of options.\n"
1245 "\t-i\tSpecify a memory image file to be used by the virtual kernel.\n"
1246 "\t-I\tCreate a virtual network device.\n"
1247 "\t-l\tSpecify which, if any, real CPUs to lock virtual CPUs to.\n"
1248 "\t-m\tSpecify the amount of memory to be used by the kernel in bytes.\n"
1249 "\t-n\tSpecify the number of CPUs you wish to emulate.\n"
1250 "\t-p\tSpecify a file in which to store the process ID.\n"
1251 "\t-r\tSpecify a R/W disk image file to be used by the kernel.\n"
1252 "\t-s\tBoot into single-user mode.\n"
1253 "\t-U\tEnable writing to kernel memory and module loading.\n"
1254 "\t-v\tTurn on verbose booting.\n");
1262 kprintf("cpu reset, rebooting vkernel\n");
1265 execv(save_av
[0], save_av
);
1271 kprintf("cpu halt, exiting vkernel\n");
1279 switch(lwp_cpu_lock
) {
1282 kprintf("Locking CPU%d to real cpu %d\n",
1284 usched_set(getpid(), USCHED_SET_CPU
, &next_cpu
, sizeof(next_cpu
));
1286 if (next_cpu
>= real_ncpus
)
1289 case LCL_SINGLE_CPU
:
1291 kprintf("Locking CPU%d to real cpu %d\n",
1293 usched_set(getpid(), USCHED_SET_CPU
, &next_cpu
, sizeof(next_cpu
));
1296 /* do not map virtual cpus to real cpus */
1302 * Allocate and free memory for module loading. The loaded module
1303 * has to be placed somewhere near the current kernel binary load
1304 * point or the relocations will not work.
1306 * I'm not sure why this isn't working.
1309 vkernel_module_memory_alloc(vm_offset_t
*basep
, size_t bytes
)
1311 kprintf("module loading for vkernel64's not currently supported\n");
1317 xtra
= (PAGE_SIZE
- (vm_offset_t
)sbrk(0)) & PAGE_MASK
;
1318 *basep
= (vm_offset_t
)sbrk(xtra
+ bytes
) + xtra
;
1319 bzero((void *)*basep
, bytes
);
1321 *basep
= (vm_offset_t
)mmap((void *)0x000000000, bytes
,
1322 PROT_READ
|PROT_WRITE
|PROT_EXEC
,
1323 MAP_ANON
|MAP_SHARED
, -1, 0);
1324 if ((void *)*basep
== MAP_FAILED
)
1327 kprintf("basep %p %p %zd\n",
1328 (void *)vkernel_module_memory_alloc
, (void *)*basep
, bytes
);
1334 vkernel_module_memory_free(vm_offset_t base
, size_t bytes
)
1338 munmap((void *)base
, bytes
);