2 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/platform/vkernel/platform/init.c,v 1.48 2008/01/29 19:54:59 dillon Exp $
37 #include <sys/types.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
43 #include <sys/random.h>
44 #include <sys/vkernel.h>
46 #include <sys/reboot.h>
48 #include <sys/msgbuf.h>
49 #include <sys/vmspace.h>
50 #include <sys/socket.h>
51 #include <sys/sockio.h>
52 #include <sys/sysctl.h>
53 #include <vm/vm_page.h>
55 #include <machine/cpu.h>
56 #include <machine/globaldata.h>
57 #include <machine/tls.h>
58 #include <machine/md_var.h>
59 #include <machine/vmparam.h>
62 #include <net/if_arp.h>
63 #include <net/ethernet.h>
64 #include <net/bridge/if_bridgevar.h>
65 #include <netinet/in.h>
66 #include <arpa/inet.h>
78 vm_paddr_t phys_avail
[16];
80 vm_paddr_t Maxmem_bytes
;
82 struct vkdisk_info DiskInfo
[VKDISK_MAX
];
84 struct vknetif_info NetifInfo
[VKNETIF_MAX
];
90 vm_offset_t virtual_start
;
91 vm_offset_t virtual_end
;
92 vm_offset_t kernel_vm_end
;
93 vm_offset_t crashdumpmap
;
94 vm_offset_t clean_sva
;
95 vm_offset_t clean_eva
;
96 struct msgbuf
*msgbufp
;
99 vpte_t
*KernelPTA
; /* Warning: Offset for direct VA translation */
100 u_int cpu_feature
; /* XXX */
101 u_int tsc_present
; /* XXX */
102 int optcpus
; /* number of cpus - see mp_start() */
103 int lwp_cpu_lock
; /* if/how to lock virtual CPUs to real CPUs */
104 int real_ncpus
; /* number of real CPUs */
105 int next_cpu
; /* next real CPU to lock a virtual CPU to */
107 struct privatespace
*CPU_prvspace
;
109 static struct trapframe proc0_tf
;
110 static void *proc0paddr
;
112 static void init_sys_memory(char *imageFile
);
113 static void init_kern_memory(void);
114 static void init_globaldata(void);
115 static void init_vkernel(void);
116 static void init_disk(char *diskExp
[], int diskFileNum
, enum vkdisk_type type
);
117 static void init_netif(char *netifExp
[], int netifFileNum
);
118 static void writepid( void );
119 static void cleanpid( void );
120 static void usage(const char *ctl
, ...);
123 static char **save_av
;
126 * Kernel startup for virtual kernels - standard main()
129 main(int ac
, char **av
)
131 char *memImageFile
= NULL
;
132 char *netifFile
[VKNETIF_MAX
];
133 char *diskFile
[VKDISK_MAX
];
134 char *cdFile
[VKDISK_MAX
];
137 size_t real_ncpus_size
;
138 int netifFileNum
= 0;
141 int bootOnDisk
= -1; /* set below to vcd (0) or vkd (1) */
145 int real_vkernel_enable
;
146 size_t real_vkernel_enable_size
;
148 int supports_sse_size
;
156 kernel_mem_readonly
= 1;
160 lwp_cpu_lock
= LCL_NONE
;
162 real_vkernel_enable_size
= sizeof(real_vkernel_enable
);
163 sysctlbyname("vm.vkernel_enable", &real_vkernel_enable
, &real_vkernel_enable_size
, NULL
, 0);
165 if (real_vkernel_enable
== 0)
166 errx(1,"vm.vkernel_enable is %d, must be set to 1 to execute a vkernel!", real_vkernel_enable
);
168 real_ncpus_size
= sizeof(real_ncpus
);
169 sysctlbyname("hw.ncpu", &real_ncpus
, &real_ncpus_size
, NULL
, 0);
171 while ((c
= getopt(ac
, av
, "c:svl:m:n:r:e:i:p:I:U")) != -1) {
175 * name=value:name=value:name=value...
178 kern_envp
= malloc(n
+ 2);
179 for (i
= 0; i
< n
; ++i
) {
180 if (optarg
[i
] == ':')
183 kern_envp
[i
] = optarg
[i
];
189 boothowto
|= RB_SINGLE
;
195 memImageFile
= optarg
;
198 if (netifFileNum
< VKNETIF_MAX
)
199 netifFile
[netifFileNum
++] = strdup(optarg
);
204 if (diskFileNum
+ cdFileNum
< VKDISK_MAX
)
205 diskFile
[diskFileNum
++] = strdup(optarg
);
210 if (diskFileNum
+ cdFileNum
< VKDISK_MAX
)
211 cdFile
[cdFileNum
++] = strdup(optarg
);
214 Maxmem_bytes
= strtoull(optarg
, &suffix
, 0);
231 usage("Bad maxmem option");
239 if (strncmp("map", optarg
, 3) == 0) {
240 lwp_cpu_lock
= LCL_PER_CPU
;
241 if (optarg
[3] == ',') {
242 next_cpu
= strtol(optarg
+4, &endp
, 0);
244 usage("Bad target CPU number at '%s'", endp
);
248 if (next_cpu
< 0 || next_cpu
> real_ncpus
- 1)
249 usage("Bad target CPU, valid range is 0-%d", real_ncpus
- 1);
250 } else if (strncmp("any", optarg
, 3) == 0) {
251 lwp_cpu_lock
= LCL_NONE
;
253 lwp_cpu_lock
= LCL_SINGLE_CPU
;
254 next_cpu
= strtol(optarg
, &endp
, 0);
256 usage("Bad target CPU number at '%s'", endp
);
257 if (next_cpu
< 0 || next_cpu
> real_ncpus
- 1)
258 usage("Bad target CPU, valid range is 0-%d", real_ncpus
- 1);
263 * This value is set up by mp_start(), don't just
267 optcpus
= strtol(optarg
, NULL
, 0);
268 if (optcpus
< 1 || optcpus
> MAXCPU
)
269 usage("Bad ncpus, valid range is 1-%d", MAXCPU
);
271 if (strtol(optarg
, NULL
, 0) != 1) {
272 usage("You built a UP vkernel, only 1 cpu!");
281 kernel_mem_readonly
= 0;
288 init_sys_memory(memImageFile
);
295 supports_sse_size
= sizeof(supports_sse_size
);
296 sysctlbyname("hw.instruction_sse", &supports_sse
, &supports_sse_size
,
298 init_fpu(supports_sse
);
301 * We boot from the first installed disk.
303 if (bootOnDisk
== 1) {
304 init_disk(diskFile
, diskFileNum
, VKD_DISK
);
305 init_disk(cdFile
, cdFileNum
, VKD_CD
);
307 init_disk(cdFile
, cdFileNum
, VKD_CD
);
308 init_disk(diskFile
, diskFileNum
, VKD_DISK
);
310 init_netif(netifFile
, netifFileNum
);
318 * Initialize system memory. This is the virtual kernel's 'RAM'.
322 init_sys_memory(char *imageFile
)
329 * Figure out the system memory image size. If an image file was
330 * specified and -m was not specified, use the image file's size.
333 if (imageFile
&& stat(imageFile
, &st
) == 0 && Maxmem_bytes
== 0)
334 Maxmem_bytes
= (vm_paddr_t
)st
.st_size
;
335 if ((imageFile
== NULL
|| stat(imageFile
, &st
) < 0) &&
337 err(1, "Cannot create new memory file %s unless "
338 "system memory size is specified with -m",
344 * Maxmem must be known at this time
346 if (Maxmem_bytes
< 32 * 1024 * 1024 || (Maxmem_bytes
& SEG_MASK
)) {
347 err(1, "Bad maxmem specification: 32MB minimum, "
348 "multiples of %dMB only",
349 SEG_SIZE
/ 1024 / 1024);
354 * Generate an image file name if necessary, then open/create the
355 * file exclusively locked. Do not allow multiple virtual kernels
356 * to use the same image file.
358 if (imageFile
== NULL
) {
359 for (i
= 0; i
< 1000000; ++i
) {
360 asprintf(&imageFile
, "/var/vkernel/memimg.%06d", i
);
362 O_RDWR
|O_CREAT
|O_EXLOCK
|O_NONBLOCK
, 0644);
363 if (fd
< 0 && errno
== EWOULDBLOCK
) {
370 fd
= open(imageFile
, O_RDWR
|O_CREAT
|O_EXLOCK
|O_NONBLOCK
, 0644);
372 printf("Using memory file: %s\n", imageFile
);
373 if (fd
< 0 || fstat(fd
, &st
) < 0) {
374 err(1, "Unable to open/create %s", imageFile
);
379 * Truncate or extend the file as necessary.
381 if (st
.st_size
> Maxmem_bytes
) {
382 ftruncate(fd
, Maxmem_bytes
);
383 } else if (st
.st_size
< Maxmem_bytes
) {
385 off_t off
= st
.st_size
& ~SEG_MASK
;
387 kprintf("%s: Reserving blocks for memory image\n", imageFile
);
388 zmem
= malloc(SEG_SIZE
);
389 bzero(zmem
, SEG_SIZE
);
390 lseek(fd
, off
, SEEK_SET
);
391 while (off
< Maxmem_bytes
) {
392 if (write(fd
, zmem
, SEG_SIZE
) != SEG_SIZE
) {
393 err(1, "Unable to reserve blocks for memory image");
399 err(1, "Unable to reserve blocks for memory image");
403 Maxmem
= Maxmem_bytes
>> PAGE_SHIFT
;
407 * Initialize kernel memory. This reserves kernel virtual memory by using
413 init_kern_memory(void)
419 char *topofstack
= &dummy
;
424 * Memory map our kernel virtual memory space. Note that the
425 * kernel image itself is not made part of this memory for the
428 * The memory map must be segment-aligned so we can properly
431 * If the system kernel has a different MAXDSIZ, it might not
432 * be possible to map kernel memory in its prefered location.
433 * Try a number of different locations.
435 try = (void *)0x40000000;
437 while ((char *)try + KERNEL_KVA_SIZE
< topofstack
) {
438 base
= mmap(try, KERNEL_KVA_SIZE
, PROT_READ
|PROT_WRITE
,
439 MAP_FILE
|MAP_SHARED
|MAP_VPAGETABLE
,
443 if (base
!= MAP_FAILED
)
444 munmap(base
, KERNEL_KVA_SIZE
);
445 try = (char *)try + 0x10000000;
448 err(1, "Unable to mmap() kernel virtual memory!");
451 madvise(base
, KERNEL_KVA_SIZE
, MADV_NOSYNC
);
452 KvaStart
= (vm_offset_t
)base
;
453 KvaSize
= KERNEL_KVA_SIZE
;
454 KvaEnd
= KvaStart
+ KvaSize
;
455 printf("KVM mapped at %p-%p\n", (void *)KvaStart
, (void *)KvaEnd
);
458 * Create a top-level page table self-mapping itself.
460 * Initialize the page directory at physical page index 0 to point
461 * to an array of page table pages starting at physical page index 1
463 lseek(MemImageFd
, 0L, 0);
464 for (i
= 0; i
< KERNEL_KVA_SIZE
/ SEG_SIZE
; ++i
) {
465 pte
= ((i
+ 1) * PAGE_SIZE
) | VPTE_V
| VPTE_R
| VPTE_W
;
466 write(MemImageFd
, &pte
, sizeof(pte
));
470 * Initialize the PTEs in the page table pages required to map the
471 * page table itself. This includes mapping the page directory page
472 * at the base so we go one more loop then normal.
474 lseek(MemImageFd
, PAGE_SIZE
, 0);
475 for (i
= 0; i
<= KERNEL_KVA_SIZE
/ SEG_SIZE
* sizeof(vpte_t
); ++i
) {
476 pte
= (i
* PAGE_SIZE
) | VPTE_V
| VPTE_R
| VPTE_W
;
477 write(MemImageFd
, &pte
, sizeof(pte
));
481 * Initialize remaining PTEs to 0. We may be reusing a memory image
482 * file. This is approximately a megabyte.
484 i
= (KERNEL_KVA_SIZE
/ PAGE_SIZE
- i
) * sizeof(pte
);
485 zero
= malloc(PAGE_SIZE
);
486 bzero(zero
, PAGE_SIZE
);
488 write(MemImageFd
, zero
, (i
> PAGE_SIZE
) ? PAGE_SIZE
: i
);
489 i
= i
- ((i
> PAGE_SIZE
) ? PAGE_SIZE
: i
);
494 * Enable the page table and calculate pointers to our self-map
495 * for easy kernel page table manipulation.
497 * KernelPTA must be offset so we can do direct VA translations
499 mcontrol(base
, KERNEL_KVA_SIZE
, MADV_SETMAP
,
500 0 | VPTE_R
| VPTE_W
| VPTE_V
);
501 KernelPTD
= (vpte_t
*)base
; /* pg directory */
502 KernelPTA
= (vpte_t
*)((char *)base
+ PAGE_SIZE
); /* pg table pages */
503 KernelPTA
-= KvaStart
>> PAGE_SHIFT
;
506 * phys_avail[] represents unallocated physical memory. MI code
507 * will use phys_avail[] to create the vm_page array.
509 phys_avail
[0] = PAGE_SIZE
+
510 KERNEL_KVA_SIZE
/ PAGE_SIZE
* sizeof(vpte_t
);
511 phys_avail
[0] = (phys_avail
[0] + PAGE_MASK
) & ~(vm_paddr_t
)PAGE_MASK
;
512 phys_avail
[1] = Maxmem_bytes
;
515 * (virtual_start, virtual_end) represent unallocated kernel virtual
516 * memory. MI code will create kernel_map using these parameters.
518 virtual_start
= KvaStart
+ PAGE_SIZE
+
519 KERNEL_KVA_SIZE
/ PAGE_SIZE
* sizeof(vpte_t
);
520 virtual_start
= (virtual_start
+ PAGE_MASK
) & ~(vm_offset_t
)PAGE_MASK
;
521 virtual_end
= KvaStart
+ KERNEL_KVA_SIZE
;
524 * kernel_vm_end could be set to virtual_end but we want some
525 * indication of how much of the kernel_map we've used, so
526 * set it low and let pmap_growkernel increase it even though we
527 * don't need to create any new page table pages.
529 kernel_vm_end
= virtual_start
;
532 * Allocate space for process 0's UAREA.
534 proc0paddr
= (void *)virtual_start
;
535 for (i
= 0; i
< UPAGES
; ++i
) {
536 pmap_kenter_quick(virtual_start
, phys_avail
[0]);
537 virtual_start
+= PAGE_SIZE
;
538 phys_avail
[0] += PAGE_SIZE
;
544 crashdumpmap
= virtual_start
;
545 virtual_start
+= MAXDUMPPGS
* PAGE_SIZE
;
548 * msgbufp maps the system message buffer
550 assert((MSGBUF_SIZE
& PAGE_MASK
) == 0);
551 msgbufp
= (void *)virtual_start
;
552 for (i
= 0; i
< (MSGBUF_SIZE
>> PAGE_SHIFT
); ++i
) {
553 pmap_kenter_quick(virtual_start
, phys_avail
[0]);
554 virtual_start
+= PAGE_SIZE
;
555 phys_avail
[0] += PAGE_SIZE
;
557 msgbufinit(msgbufp
, MSGBUF_SIZE
);
560 * used by kern_memio for /dev/mem access
562 ptvmmap
= (caddr_t
)virtual_start
;
563 virtual_start
+= PAGE_SIZE
;
566 * Bootstrap the kernel_pmap
572 * Map the per-cpu globaldata for cpu #0. Allocate the space using
573 * virtual_start and phys_avail[0]
577 init_globaldata(void)
584 * Reserve enough KVA to cover possible cpus. This is a considerable
585 * amount of KVA since the privatespace structure includes two
586 * whole page table mappings.
588 virtual_start
= (virtual_start
+ SEG_MASK
) & ~(vm_offset_t
)SEG_MASK
;
589 CPU_prvspace
= (void *)virtual_start
;
590 virtual_start
+= sizeof(struct privatespace
) * SMP_MAXCPU
;
593 * Allocate enough physical memory to cover the mdglobaldata
594 * portion of the space and the idle stack and map the pages
595 * into KVA. For cpu #0 only.
597 for (i
= 0; i
< sizeof(struct mdglobaldata
); i
+= PAGE_SIZE
) {
599 va
= (vm_offset_t
)&CPU_prvspace
[0].mdglobaldata
+ i
;
600 pmap_kenter_quick(va
, pa
);
601 phys_avail
[0] += PAGE_SIZE
;
603 for (i
= 0; i
< sizeof(CPU_prvspace
[0].idlestack
); i
+= PAGE_SIZE
) {
605 va
= (vm_offset_t
)&CPU_prvspace
[0].idlestack
+ i
;
606 pmap_kenter_quick(va
, pa
);
607 phys_avail
[0] += PAGE_SIZE
;
611 * Setup the %gs for cpu #0. The mycpu macro works after this
614 tls_set_fs(&CPU_prvspace
[0], sizeof(struct privatespace
));
618 * Initialize very low level systems including thread0, proc0, etc.
624 struct mdglobaldata
*gd
;
626 gd
= &CPU_prvspace
[0].mdglobaldata
;
627 bzero(gd
, sizeof(*gd
));
629 gd
->mi
.gd_curthread
= &thread0
;
630 thread0
.td_gd
= &gd
->mi
;
632 ncpus2
= 1; /* rounded down power of 2 */
633 ncpus_fit
= 1; /* rounded up power of 2 */
634 /* ncpus2_mask and ncpus_fit_mask are 0 */
636 gd
->mi
.gd_prvspace
= &CPU_prvspace
[0];
637 mi_gdinit(&gd
->mi
, 0);
639 mi_proc0init(&gd
->mi
, proc0paddr
);
640 lwp0
.lwp_md
.md_regs
= &proc0_tf
;
645 #if 0 /* #ifdef DDB */
647 if (boothowto
& RB_KDB
)
648 Debugger("Boot flags requested debugger");
651 initializecpu(); /* Initialize CPU registers */
653 init_param2((phys_avail
[1] - phys_avail
[0]) / PAGE_SIZE
);
657 * Map the message buffer
659 for (off
= 0; off
< round_page(MSGBUF_SIZE
); off
+= PAGE_SIZE
)
660 pmap_kenter((vm_offset_t
)msgbufp
+ off
, avail_end
+ off
);
661 msgbufinit(msgbufp
, MSGBUF_SIZE
);
664 thread0
.td_pcb_cr3
... MMU
665 lwp0
.lwp_md
.md_regs
= &proc0_tf
;
670 * Filesystem image paths for the virtual kernel are optional.
671 * If specified they each should point to a disk image,
672 * the first of which will become the root disk.
674 * The virtual kernel caches data from our 'disk' just like a normal kernel,
675 * so we do not really want the real kernel to cache the data too. Use
676 * O_DIRECT to remove the duplication.
680 init_disk(char *diskExp
[], int diskFileNum
, enum vkdisk_type type
)
684 if (diskFileNum
== 0)
687 for(i
=0; i
< diskFileNum
; i
++){
692 warnx("Invalid argument to '-r'");
696 if (DiskNum
< VKDISK_MAX
) {
698 struct vkdisk_info
* info
= NULL
;
702 if (type
== VKD_DISK
)
703 fd
= open(fname
, O_RDWR
|O_DIRECT
|O_EXLOCK
|O_NONBLOCK
, 0644);
705 fd
= open(fname
, O_RDONLY
|O_DIRECT
, 0644);
706 if (fd
< 0 || fstat(fd
, &st
) < 0) {
708 fprintf(stderr
, "You may already have a vkernel using this disk image!\n");
709 err(1, "Unable to open/create %s", fname
);
712 /* get rid of O_NONBLOCK, keep O_DIRECT */
713 if (type
== VKD_DISK
)
714 fcntl(fd
, F_SETFL
, O_DIRECT
);
716 info
= &DiskInfo
[DiskNum
];
722 memcpy(info
->fname
, fname
, l
);
726 rootdevnames
[0] = "cd9660:vcd0a";
727 else if (type
== VKD_DISK
)
728 rootdevnames
[0] = "ufs:vkd0s0a";
733 warnx("vkd%d (%s) > VKDISK_MAX", DiskNum
, fname
);
741 netif_set_tapflags(int tap_unit
, int f
, int s
)
746 bzero(&ifr
, sizeof(ifr
));
748 snprintf(ifr
.ifr_name
, sizeof(ifr
.ifr_name
), "tap%d", tap_unit
);
749 if (ioctl(s
, SIOCGIFFLAGS
, &ifr
) < 0) {
750 warn("tap%d: ioctl(SIOCGIFFLAGS) failed", tap_unit
);
757 * If the flags are already set/cleared, then we return
758 * immediately to avoid extra syscalls
760 flags
= (ifr
.ifr_flags
& 0xffff) | (ifr
.ifr_flagshigh
<< 16);
764 if ((flags
& f
) == 0)
775 * Fix up ifreq.ifr_name, since it may be trashed
776 * in previous ioctl(SIOCGIFFLAGS)
778 snprintf(ifr
.ifr_name
, sizeof(ifr
.ifr_name
), "tap%d", tap_unit
);
780 ifr
.ifr_flags
= flags
& 0xffff;
781 ifr
.ifr_flagshigh
= flags
>> 16;
782 if (ioctl(s
, SIOCSIFFLAGS
, &ifr
) < 0) {
783 warn("tap%d: ioctl(SIOCSIFFLAGS) failed", tap_unit
);
791 netif_set_tapaddr(int tap_unit
, in_addr_t addr
, in_addr_t mask
, int s
)
793 struct ifaliasreq ifra
;
794 struct sockaddr_in
*in
;
796 bzero(&ifra
, sizeof(ifra
));
797 snprintf(ifra
.ifra_name
, sizeof(ifra
.ifra_name
), "tap%d", tap_unit
);
800 in
= (struct sockaddr_in
*)&ifra
.ifra_addr
;
801 in
->sin_family
= AF_INET
;
802 in
->sin_len
= sizeof(*in
);
803 in
->sin_addr
.s_addr
= addr
;
807 in
= (struct sockaddr_in
*)&ifra
.ifra_mask
;
808 in
->sin_len
= sizeof(*in
);
809 in
->sin_addr
.s_addr
= mask
;
812 if (ioctl(s
, SIOCAIFADDR
, &ifra
) < 0) {
813 warn("tap%d: ioctl(SIOCAIFADDR) failed", tap_unit
);
821 netif_add_tap2brg(int tap_unit
, const char *ifbridge
, int s
)
826 bzero(&ifbr
, sizeof(ifbr
));
827 snprintf(ifbr
.ifbr_ifsname
, sizeof(ifbr
.ifbr_ifsname
),
830 bzero(&ifd
, sizeof(ifd
));
831 strlcpy(ifd
.ifd_name
, ifbridge
, sizeof(ifd
.ifd_name
));
832 ifd
.ifd_cmd
= BRDGADD
;
833 ifd
.ifd_len
= sizeof(ifbr
);
834 ifd
.ifd_data
= &ifbr
;
836 if (ioctl(s
, SIOCSDRVSPEC
, &ifd
) < 0) {
838 * 'errno == EEXIST' means that the tap(4) is already
839 * a member of the bridge(4)
841 if (errno
!= EEXIST
) {
842 warn("ioctl(%s, SIOCSDRVSPEC) failed", ifbridge
);
849 #define TAPDEV_OFLAGS (O_RDWR | O_NONBLOCK)
851 /* XXX major()/minor() can't be used in vkernel */
852 #define TAPDEV_MAJOR(x) ((int)(((u_int)(x) >> 8) & 0xff))
853 #define TAPDEV_MINOR(x) ((int)((x) & 0xffff00ff))
855 #ifndef TAP_CDEV_MAJOR
856 #define TAP_CDEV_MAJOR 149
860 * Locate the first unused tap(4) device file if auto mode is requested,
861 * or open the user supplied device file, and bring up the corresponding
864 * NOTE: Only tap(4) device file is supported currently
868 netif_open_tap(const char *netif
, int *tap_unit
, int s
)
870 char tap_dev
[MAXPATHLEN
];
876 if (strcmp(netif
, "auto") == 0) {
880 * Find first unused tap(4) device file
883 snprintf(tap_dev
, sizeof(tap_dev
), "/dev/tap%d", i
);
884 tap_fd
= open(tap_dev
, TAPDEV_OFLAGS
);
885 if (tap_fd
>= 0 || errno
== ENOENT
)
889 warnx("Unable to find a free tap(4)");
894 * User supplied tap(4) device file
896 if (netif
[0] == '/') /* Absolute path */
897 strlcpy(tap_dev
, netif
, sizeof(tap_dev
));
899 snprintf(tap_dev
, sizeof(tap_dev
), "/dev/%s", netif
);
901 tap_fd
= open(tap_dev
, TAPDEV_OFLAGS
);
903 warn("Unable to open %s", tap_dev
);
909 * Check whether the device file is a tap(4)
912 if (fstat(tap_fd
, &st
) == 0 && S_ISCHR(st
.st_mode
) &&
913 TAPDEV_MAJOR(st
.st_rdev
) == TAP_CDEV_MAJOR
) {
914 *tap_unit
= TAPDEV_MINOR(st
.st_rdev
);
917 * Bring up the corresponding tap(4) interface
919 if (netif_set_tapflags(*tap_unit
, IFF_UP
, s
) == 0)
922 warnx("%s is not a tap(4) device", tap_dev
);
938 * Following syntax is supported,
939 * 1) x.x.x.x tap(4)'s address is x.x.x.x
941 * 2) x.x.x.x/z tap(4)'s address is x.x.x.x
942 * tap(4)'s netmask len is z
944 * 3) x.x.x.x:y.y.y.y tap(4)'s address is x.x.x.x
945 * pseudo netif's address is y.y.y.y
947 * 4) x.x.x.x:y.y.y.y/z tap(4)'s address is x.x.x.x
948 * pseudo netif's address is y.y.y.y
949 * tap(4) and pseudo netif's netmask len are z
951 * 5) bridgeX tap(4) will be added to bridgeX
953 * 6) bridgeX:y.y.y.y tap(4) will be added to bridgeX
954 * pseudo netif's address is y.y.y.y
956 * 7) bridgeX:y.y.y.y/z tap(4) will be added to bridgeX
957 * pseudo netif's address is y.y.y.y
958 * pseudo netif's netmask len is z
962 netif_init_tap(int tap_unit
, in_addr_t
*addr
, in_addr_t
*mask
, int s
)
964 in_addr_t tap_addr
, netmask
, netif_addr
;
966 char *tok
, *masklen_str
, *ifbridge
;
971 tok
= strtok(NULL
, ":/");
974 * Nothing special, simply use tap(4) as backend
979 if (inet_pton(AF_INET
, tok
, &tap_addr
) > 0) {
981 * tap(4)'s address is supplied
986 * If there is next token, then it may be pseudo
987 * netif's address or netmask len for tap(4)
992 * Not tap(4)'s address, assume it as a bridge(4)
999 * If there is next token, then it must be pseudo
1002 next_netif_addr
= 1;
1005 netmask
= netif_addr
= 0;
1007 tok
= strtok(NULL
, ":/");
1011 if (inet_pton(AF_INET
, tok
, &netif_addr
) <= 0) {
1012 if (next_netif_addr
) {
1013 warnx("Invalid pseudo netif address: %s", tok
);
1019 * Current token is not address, then it must be netmask len
1024 * Current token is pseudo netif address, if there is next token
1025 * it must be netmask len
1027 masklen_str
= strtok(NULL
, "/");
1030 /* Calculate netmask */
1031 if (masklen_str
!= NULL
) {
1034 masklen
= strtoul(masklen_str
, NULL
, 10);
1035 if (masklen
< 32 && masklen
> 0) {
1036 netmask
= htonl(~((1LL << (32 - masklen
)) - 1)
1039 warnx("Invalid netmask len: %lu", masklen
);
1044 /* Make sure there is no more token left */
1045 if (strtok(NULL
, ":/") != NULL
) {
1046 warnx("Invalid argument to '-I'");
1051 if (ifbridge
== NULL
) {
1052 /* Set tap(4) address/netmask */
1053 if (netif_set_tapaddr(tap_unit
, tap_addr
, netmask
, s
) < 0)
1056 /* Tie tap(4) to bridge(4) */
1057 if (netif_add_tap2brg(tap_unit
, ifbridge
, s
) < 0)
1067 * NetifInfo[] will be filled for pseudo netif initialization.
1068 * NetifNum will be bumped to reflect the number of valid entries
1073 init_netif(char *netifExp
[], int netifExpNum
)
1077 if (netifExpNum
== 0)
1080 s
= socket(AF_INET
, SOCK_DGRAM
, 0); /* for ioctl(SIOC) */
1084 for (i
= 0; i
< netifExpNum
; ++i
) {
1085 struct vknetif_info
*info
;
1086 in_addr_t netif_addr
, netif_mask
;
1087 int tap_fd
, tap_unit
;
1090 netif
= strtok(netifExp
[i
], ":");
1091 if (netif
== NULL
) {
1092 warnx("Invalid argument to '-I'");
1097 * Open tap(4) device file and bring up the
1098 * corresponding interface
1100 tap_fd
= netif_open_tap(netif
, &tap_unit
, s
);
1105 * Initialize tap(4) and get address/netmask
1108 * NB: Rest part of netifExp[i] is passed
1109 * to netif_init_tap() implicitly.
1111 if (netif_init_tap(tap_unit
, &netif_addr
, &netif_mask
, s
) < 0) {
1113 * NB: Closing tap(4) device file will bring
1114 * down the corresponding interface
1120 info
= &NetifInfo
[NetifNum
];
1121 info
->tap_fd
= tap_fd
;
1122 info
->tap_unit
= tap_unit
;
1123 info
->netif_addr
= netif_addr
;
1124 info
->netif_mask
= netif_mask
;
1127 if (NetifNum
>= VKNETIF_MAX
) /* XXX will this happen? */
1140 if (pid_file
!= NULL
) {
1142 fp
= fopen(pid_file
, "w");
1145 fprintf(fp
, "%ld\n", (long)self
);
1149 perror("Warning: couldn't open pidfile");
1158 if (pid_file
!= NULL
) {
1159 if ( unlink(pid_file
) != 0 )
1160 perror("Warning: couldn't remove pidfile");
1166 usage(const char *ctl
, ...)
1171 vfprintf(stderr
, ctl
, va
);
1173 fprintf(stderr
, "\n");
1180 kprintf("cpu reset, rebooting vkernel\n");
1183 execv(save_av
[0], save_av
);
1189 kprintf("cpu halt, exiting vkernel\n");
1197 switch(lwp_cpu_lock
) {
1200 kprintf("Locking CPU%d to real cpu %d\n",
1202 usched_set(getpid(), USCHED_SET_CPU
, &next_cpu
, sizeof(next_cpu
));
1204 if (next_cpu
>= real_ncpus
)
1207 case LCL_SINGLE_CPU
:
1209 kprintf("Locking CPU%d to real cpu %d\n",
1211 usched_set(getpid(), USCHED_SET_CPU
, &next_cpu
, sizeof(next_cpu
));
1214 /* do not map virtual cpus to real cpus */