4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
26 /* Copyright (c) 1988 AT&T */
27 /* All Rights Reserved */
30 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/sysmacros.h>
37 #include <sys/systm.h>
38 #include <sys/signal.h>
42 #include <sys/vnode.h>
46 #include <sys/priocntl.h>
47 #include <sys/procset.h>
49 #include <sys/callo.h>
50 #include <sys/callb.h>
51 #include <sys/debug.h>
53 #include <sys/bootconf.h>
54 #include <sys/utsname.h>
55 #include <sys/cmn_err.h>
56 #include <sys/vmparam.h>
57 #include <sys/modctl.h>
59 #include <sys/callb.h>
60 #include <sys/ddi_periodic.h>
63 #include <sys/cpuvar.h>
64 #include <sys/cladm.h>
65 #include <sys/corectl.h>
67 #include <sys/syscall.h>
68 #include <sys/reboot.h>
70 #include <sys/exacct.h>
71 #include <sys/autoconf.h>
72 #include <sys/errorq.h>
73 #include <sys/class.h>
74 #include <sys/stack.h>
75 #include <sys/brand.h>
76 #include <sys/mmapobj.h>
79 #include <vm/seg_kmem.h>
80 #include <sys/dc_ki.h>
83 #include <sys/bootprops.h>
85 /* well known processes */
86 proc_t
*proc_sched
; /* memory scheduler */
87 proc_t
*proc_init
; /* init */
88 proc_t
*proc_pageout
; /* pageout daemon */
89 proc_t
*proc_fsflush
; /* fsflush daemon */
91 pgcnt_t maxmem
; /* Maximum available memory in pages. */
92 pgcnt_t freemem
; /* Current available memory in pages. */
93 int interrupts_unleashed
; /* set when we do the first spl0() */
95 kmem_cache_t
*process_cache
; /* kmem cache for proc structures */
98 * Indicates whether the auditing module (c2audit) is loaded. Possible
100 * 0 - c2audit module is excluded in /etc/system and cannot be loaded
101 * 1 - c2audit module is not loaded but can be anytime
102 * 2 - c2audit module is loaded
104 int audit_active
= C2AUDIT_DISABLED
;
107 * Process 0's lwp directory and lwpid hash table.
109 lwpdir_t p0_lwpdir
[2];
110 tidhash_t p0_tidhash
[2];
114 * Machine-independent initialization code
115 * Called from cold start routine as
116 * soon as a stack and segmentation
117 * have been established.
119 * clear and free user core
121 * hand craft 0th process
122 * call all initialization routines
123 * fork - process 0 to schedule
124 * - process 1 execute bootstrap
125 * - process 2 to page out
126 * create system threads
129 int cluster_bootflags
= 0;
132 cluster_wrapper(void)
135 panic("cluster() returned");
138 char initname
[INITNAME_SZ
] = "/sbin/init"; /* also referenced by zone0 */
139 char initargs
[BOOTARGS_MAX
] = ""; /* also referenced by zone0 */
142 * Construct a stack for init containing the arguments to it, then
143 * pass control to exec_common.
146 exec_init(const char *initpath
, const char *args
)
151 caddr32_t exec_fnamep
;
154 size_t argvlen
, alen
;
157 int error
= 0, count
= 0;
158 proc_t
*p
= ttoproc(curthread
);
159 klwp_t
*lwp
= ttolwp(curthread
);
165 alen
= strlen(initpath
) + 1 + strlen(args
) + 1;
166 scratchargs
= kmem_alloc(alen
, KM_SLEEP
);
167 (void) snprintf(scratchargs
, alen
, "%s %s", initpath
, args
);
170 * We do a quick two state parse of the string to sort out how big
174 for (i
= 0; i
< strlen(scratchargs
); i
++) {
175 if (scratchargs
[i
] == ' ' || scratchargs
[i
] == '\0') {
184 argvlen
= sizeof (caddr32_t
) * (argc
+ 1);
185 argv
= kmem_zalloc(argvlen
, KM_SLEEP
);
188 * We pull off a bit of a hack here. We work our way through the
189 * args string, putting nulls at the ends of space delimited tokens
190 * (boot args don't support quoting at this time). Then we just
191 * copy the whole mess to userland in one go. In other words, we
192 * transform this: "init -s -r\0" into this on the stack:
205 * -0x10 NULL | | | (argv[3])
206 * -0x14 -----|--|-' (argv[2])
207 * -0x18 ------|--' (argv[1])
208 * -0x1c -------' (argv[0])
210 * Since we know the value of ucp at the beginning of this process,
211 * we can trivially compute the argv[] array which we also need to
212 * place in userland: argv[i] = ucp - sarg(i), where ucp is the
213 * stack ptr, and sarg is the string index of the start of the
216 ucp
= (caddr32_t
)(uintptr_t)p
->p_usrstack
;
222 for (i
= 0; i
< alen
; i
++) {
223 if (scratchargs
[i
] == ' ' || scratchargs
[i
] == '\0') {
224 if (in_arg
== B_TRUE
) {
226 scratchargs
[i
] = '\0';
227 argv
[argc
++] = ucp
- (alen
- sarg
);
229 } else if (in_arg
== B_FALSE
) {
235 error
|= copyout(scratchargs
, (caddr_t
)(uintptr_t)ucp
, alen
);
237 uap
= (caddr32_t
*)P2ALIGN((uintptr_t)ucp
, sizeof (caddr32_t
));
238 uap
--; /* advance to be below the word we're in */
239 uap
-= (argc
+ 1); /* advance argc words down, plus one for NULL */
240 error
|= copyout(argv
, uap
, argvlen
);
243 zcmn_err(p
->p_zone
->zone_id
, CE_WARN
,
244 "Could not construct stack for init.\n");
245 kmem_free(argv
, argvlen
);
246 kmem_free(scratchargs
, alen
);
250 exec_fnamep
= argv
[0];
251 kmem_free(argv
, argvlen
);
252 kmem_free(scratchargs
, alen
);
255 * Point at the arguments.
257 lwp
->lwp_ap
= lwp
->lwp_arg
;
258 lwp
->lwp_arg
[0] = (uintptr_t)exec_fnamep
;
259 lwp
->lwp_arg
[1] = (uintptr_t)uap
;
260 lwp
->lwp_arg
[2] = NULL
;
261 curthread
->t_post_sys
= 1;
262 curthread
->t_sysnum
= SYS_execve
;
265 * If we are executing init from zsched, we may have inherited its
266 * parent process's signal mask. Clear it now so that we behave in
267 * the same way as when started from the global zone.
269 sigemptyset(&curthread
->t_hold
);
271 brand_action
= ZONE_IS_BRANDED(p
->p_zone
) ? EBA_BRAND
: EBA_NONE
;
273 error
= exec_common((const char *)(uintptr_t)exec_fnamep
,
274 (const char **)(uintptr_t)uap
, NULL
, brand_action
);
277 * Normally we would just set lwp_argsaved and t_post_sys and
278 * let post_syscall reset lwp_ap for us. Unfortunately,
279 * exec_init isn't always called from a system call. Instead
280 * of making a mess of trap_cleanup, we just reset the args
283 reset_syscall_args();
290 zcmn_err(p
->p_zone
->zone_id
, CE_WARN
,
291 "exec(%s) failed (file not found).\n", initpath
);
298 zcmn_err(p
->p_zone
->zone_id
, CE_WARN
,
299 "exec(%s) failed with errno %d. Retrying...\n",
305 zcmn_err(p
->p_zone
->zone_id
, CE_WARN
,
306 "exec(%s) failed with errno %d.", initpath
, error
);
311 * This routine does all of the common setup for invoking init; global
312 * and non-global zones employ this routine for the functionality which is
315 * This program (init, presumably) must be a 32-bit process.
321 ASSERT_STACK_ALIGNED();
322 p
->p_zone
->zone_proc_initpid
= p
->p_pid
;
324 p
->p_cstime
= p
->p_stime
= p
->p_cutime
= p
->p_utime
= 0;
325 p
->p_usrstack
= (caddr_t
)USRSTACK32
;
326 p
->p_model
= DATAMODEL_ILP32
;
327 p
->p_stkprot
= PROT_ZFOD
& ~PROT_EXEC
;
328 p
->p_datprot
= PROT_ZFOD
& ~PROT_EXEC
;
329 p
->p_stk_ctl
= INT32_MAX
;
331 p
->p_as
= as_alloc();
333 p
->p_as
->a_userlimit
= (caddr_t
)USERLIMIT32
;
334 (void) hat_setup(p
->p_as
->a_hat
, HAT_INIT
);
338 init_mstate(curthread
, LMS_SYSTEM
);
339 return (exec_init(p
->p_zone
->zone_initname
, p
->p_zone
->zone_bootargs
));
343 * Start the initial user process for the global zone; once running, if
344 * init should subsequently fail, it will be automatically be caught in the
345 * exit(2) path, and restarted by restart_init().
352 ASSERT(curproc
->p_zone
->zone_initname
!= NULL
);
354 if (start_init_common() != 0)
355 halt("unix: Could not start init");
362 proc_t
*p
= ttoproc(curthread
); /* &p0 */
365 extern void fsflush();
366 extern int (*init_tbl
[])();
367 extern int (*mp_init_tbl
[])();
368 extern id_t syscid
, defaultcid
;
369 extern int swaploaded
;
371 extern ib_boot_prop_t
*iscsiboot_prop
;
372 extern void vm_init(void);
373 extern void cbe_init_pre(void);
374 extern void cbe_init(void);
375 extern void clock_tick_init_pre(void);
376 extern void clock_tick_init_post(void);
377 extern void clock_init(void);
378 extern void physio_bufs_init(void);
379 extern void pm_cfb_setup_intr(void);
380 extern int pm_adjust_timestamps(dev_info_t
*, void *);
381 extern void start_other_cpus(int);
382 extern void sysevent_evc_thrinit();
383 extern kmutex_t ualock
;
385 extern void fastboot_post_startup(void);
386 extern void progressbar_start(void);
389 * In the horrible world of x86 in-lines, you can't get symbolic
390 * structure offsets a la genassym. This assertion is here so
391 * that the next poor slob who innocently changes the offset of
392 * cpu_thread doesn't waste as much time as I just did finding
393 * out that it's hard-coded in i86/ml/i86.il. Similarly for
394 * curcpup. You're welcome.
396 ASSERT(CPU
== CPU
->cpu_self
);
397 ASSERT(curthread
== CPU
->cpu_thread
);
398 ASSERT_STACK_ALIGNED();
401 * We take the ualock until we have completed the startup
402 * to prevent kadmin() from disrupting this work. In particular,
403 * we don't want kadmin() to bring the system down while we are
404 * trying to start it up.
406 mutex_enter(&ualock
);
409 * Setup root lgroup and leaf lgroup for CPU 0
411 lgrp_init(LGRP_INIT_STAGE2
);
414 * Once 'startup()' completes, the thread_reaper() daemon would be
415 * created(in thread_init()). After that, it is safe to create threads
416 * that could exit. These exited threads will get reaped.
421 cbe_init_pre(); /* x86 must initialize gethrtimef before timer_init */
424 callout_init(); /* callout table MUST be init'd after cyclics */
425 clock_tick_init_pre();
430 * The progressbar thread uses cv_reltimedwait() and hence needs to be
431 * started after the callout mechanism has been initialized.
436 * On some platforms, clkinitf() changes the timing source that
437 * gethrtime_unscaled() uses to generate timestamps. cbe_init() calls
438 * clkinitf(), so re-initialize the microstate counters after the
439 * timesource has been chosen.
441 init_mstate(&t0
, LMS_SYSTEM
);
442 init_cpu_mstate(CPU
, CMS_SYSTEM
);
445 * May need to probe to determine latencies from CPU 0 after
446 * gethrtime() comes alive in cbe_init() and before enabling interrupts
447 * and copy and release any temporary memory allocated with BOP_ALLOC()
448 * before release_bootstrap() frees boot memory
450 lgrp_init(LGRP_INIT_STAGE3
);
453 * Call all system initialization functions.
455 for (initptr
= &init_tbl
[0]; *initptr
; initptr
++)
458 * Load iSCSI boot properties
462 * initialize vm related stuff.
467 * initialize buffer pool for raw I/O requests
471 ttolwp(curthread
)->lwp_error
= 0; /* XXX kludge for SCSI driver */
474 * Drop the interrupt level and allow interrupts. At this point
475 * the DDI guarantees that interrupts are enabled.
478 interrupts_unleashed
= 1;
481 * Create kmem cache for proc structures
483 process_cache
= kmem_cache_create("process_cache", sizeof (proc_t
),
484 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
486 vfs_mountroot(); /* Mount the root file system */
487 errorq_init(); /* after vfs_mountroot() so DDI root is ready */
488 cpu_kstat_init(CPU
); /* after vfs_mountroot() so TOD is valid */
489 ddi_walk_devs(ddi_root_node(), pm_adjust_timestamps
, NULL
);
490 /* after vfs_mountroot() so hrestime is valid */
496 * Initialize Solaris Audit Subsystem
501 * Plumb the protocol modules and drivers only if we are not
502 * networked booted, in this case we already did it in rootconf().
504 if (netboot
== 0 && iscsiboot_prop
== NULL
)
507 gethrestime(&PTOU(curproc
)->u_start
);
508 curthread
->t_start
= PTOU(curproc
)->u_start
.tv_sec
;
509 p
->p_mstart
= gethrtime();
512 * Perform setup functions that can only be done after root
513 * and swap have been set up.
521 * attach drivers with ddi-forceattach prop
522 * It must be done early enough to load hotplug drivers (e.g.
523 * pcmcia nexus) so that devices enumerated via hotplug is
524 * available before I/O subsystem is fully initialized.
526 i_ddi_forceattach_drivers();
529 * Set the scan rate and other parameters of the paging subsystem.
534 * Initialize process 0's lwp directory and lwpid hash table.
536 p
->p_lwpdir
= p
->p_lwpfree
= p0_lwpdir
;
537 p
->p_lwpdir
->ld_next
= p
->p_lwpdir
+ 1;
539 p
->p_tidhash
= p0_tidhash
;
541 p0_lep
.le_thread
= curthread
;
542 p0_lep
.le_lwpid
= curthread
->t_tid
;
543 p0_lep
.le_start
= curthread
->t_start
;
544 lwp_hash_in(p
, &p0_lep
, p0_tidhash
, 2, 0);
547 * Initialize extended accounting.
552 * Initialize threads of sysevent event channels
554 sysevent_evc_thrinit();
557 * This must be done after post_startup() but before
560 lgrp_init(LGRP_INIT_STAGE4
);
563 * Perform MP initialization, if any.
569 * Release bootstrap here since PROM interfaces are
570 * used to start other CPUs above.
576 * Finish lgrp initialization after all CPUS are brought online.
578 lgrp_init(LGRP_INIT_STAGE5
);
581 * After mp_init(), number of cpus are known (this is
582 * true for the time being, when there are actually
583 * hot pluggable cpus then this scheme would not do).
584 * Any per cpu initialization is done here.
589 clock_tick_init_post();
591 for (initptr
= &mp_init_tbl
[0]; *initptr
; initptr
++)
595 * These must be called after start_other_cpus
599 fastboot_post_startup();
603 * Make init process; enter scheduling loop with system process.
605 * Note that we manually assign the pids for these processes, for
606 * historical reasons. If more pre-assigned pids are needed,
607 * FAMOUS_PIDS will have to be updated.
610 /* create init process */
611 if (newproc(start_init
, NULL
, defaultcid
, 59, NULL
,
613 panic("main: unable to fork init.");
615 /* create pageout daemon */
616 if (newproc(pageout
, NULL
, syscid
, maxclsyspri
- 1, NULL
,
618 panic("main: unable to fork pageout()");
620 /* create fsflush daemon */
621 if (newproc(fsflush
, NULL
, syscid
, minclsyspri
, NULL
,
623 panic("main: unable to fork fsflush()");
625 /* create cluster process if we're a member of one */
626 if (cluster_bootflags
& CLUSTER_BOOTED
) {
627 if (newproc(cluster_wrapper
, NULL
, syscid
, minclsyspri
,
629 panic("main: unable to fork cluster()");
634 * Create system threads (threads are associated with p0)
637 /* create module uninstall daemon */
638 /* BugID 1132273. If swapping over NFS need a bigger stack */
639 (void) thread_create(NULL
, 0, (void (*)())mod_uninstall_daemon
,
640 NULL
, 0, &p0
, TS_RUN
, minclsyspri
);
642 (void) thread_create(NULL
, 0, seg_pasync_thread
,
643 NULL
, 0, &p0
, TS_RUN
, minclsyspri
);
647 /* system is now ready */
650 bcopy("sched", PTOU(curproc
)->u_psargs
, 6);
651 bcopy("sched", PTOU(curproc
)->u_comm
, 5);