3 * John Baldwin <jhb@FreeBSD.org>. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the author nor the names of any co-contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY JOHN BALDWIN AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL JOHN BALDWIN OR THE VOICES IN HIS HEAD
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 * THE POSSIBILITY OF SUCH DAMAGE.
31 * This module holds the global variables and machine independent functions
32 * used for the kernel SMP support.
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
45 #include <sys/mutex.h>
48 #include <sys/sysctl.h>
50 #include <machine/cpu.h>
51 #include <machine/smp.h>
53 #include "opt_sched.h"
56 volatile cpumask_t stopped_cpus
;
57 volatile cpumask_t started_cpus
;
58 cpumask_t idle_cpus_mask
;
59 cpumask_t hlt_cpus_mask
;
60 cpumask_t logical_cpus_mask
;
62 void (*cpustop_restartfunc
)(void);
64 /* This is used in modules that need to work in both SMP and UP. */
68 /* export this for libkvm consumers. */
69 int mp_maxcpus
= MAXCPU
;
71 volatile int smp_started
;
74 SYSCTL_NODE(_kern
, OID_AUTO
, smp
, CTLFLAG_RD
, NULL
, "Kernel SMP");
76 SYSCTL_INT(_kern_smp
, OID_AUTO
, maxid
, CTLFLAG_RD
, &mp_maxid
, 0,
79 SYSCTL_INT(_kern_smp
, OID_AUTO
, maxcpus
, CTLFLAG_RD
, &mp_maxcpus
, 0,
80 "Max number of CPUs that the system was compiled for.");
82 int smp_active
= 0; /* are the APs allowed to run? */
83 SYSCTL_INT(_kern_smp
, OID_AUTO
, active
, CTLFLAG_RW
, &smp_active
, 0,
84 "Number of Auxillary Processors (APs) that were successfully started");
86 int smp_disabled
= 0; /* has smp been disabled? */
87 SYSCTL_INT(_kern_smp
, OID_AUTO
, disabled
, CTLFLAG_RDTUN
, &smp_disabled
, 0,
88 "SMP has been disabled from the loader");
89 TUNABLE_INT("kern.smp.disabled", &smp_disabled
);
91 int smp_cpus
= 1; /* how many cpu's running */
92 SYSCTL_INT(_kern_smp
, OID_AUTO
, cpus
, CTLFLAG_RD
, &smp_cpus
, 0,
93 "Number of CPUs online");
95 int smp_topology
= 0; /* Which topology we're using. */
96 SYSCTL_INT(_kern_smp
, OID_AUTO
, topology
, CTLFLAG_RD
, &smp_topology
, 0,
97 "Topology override setting; 0 is default provided by hardware.");
98 TUNABLE_INT("kern.smp.topology", &smp_topology
);
101 /* Enable forwarding of a signal to a process running on a different CPU */
102 static int forward_signal_enabled
= 1;
103 SYSCTL_INT(_kern_smp
, OID_AUTO
, forward_signal_enabled
, CTLFLAG_RW
,
104 &forward_signal_enabled
, 0,
105 "Forwarding of a signal to a process on a different CPU");
107 /* Enable forwarding of roundrobin to all other cpus */
108 static int forward_roundrobin_enabled
= 1;
109 SYSCTL_INT(_kern_smp
, OID_AUTO
, forward_roundrobin_enabled
, CTLFLAG_RW
,
110 &forward_roundrobin_enabled
, 0,
111 "Forwarding of roundrobin to all other CPUs");
113 /* Variables needed for SMP rendezvous. */
114 static volatile int smp_rv_ncpus
;
115 static void (*volatile smp_rv_setup_func
)(void *arg
);
116 static void (*volatile smp_rv_action_func
)(void *arg
);
117 static void (*volatile smp_rv_teardown_func
)(void *arg
);
118 static void * volatile smp_rv_func_arg
;
119 static volatile int smp_rv_waiters
[3];
122 * Shared mutex to restrict busywaits between smp_rendezvous() and
123 * smp(_targeted)_tlb_shootdown(). A deadlock occurs if both of these
124 * functions trigger at once and cause multiple CPUs to busywait with
125 * interrupts disabled.
127 struct mtx smp_ipi_mtx
;
130 * Let the MD SMP code initialize mp_maxid very early if it can.
133 mp_setmaxid(void *dummy
)
137 SYSINIT(cpu_mp_setmaxid
, SI_SUB_TUNABLES
, SI_ORDER_FIRST
, mp_setmaxid
, NULL
);
140 * Call the MD SMP initialization code.
143 mp_start(void *dummy
)
146 /* Probe for MP hardware. */
147 if (smp_disabled
!= 0 || cpu_mp_probe() == 0) {
149 all_cpus
= PCPU_GET(cpumask
);
153 mtx_init(&smp_ipi_mtx
, "smp rendezvous", NULL
, MTX_SPIN
);
155 printf("FreeBSD/SMP: Multiprocessor System Detected: %d CPUs\n",
159 SYSINIT(cpu_mp
, SI_SUB_CPU
, SI_ORDER_THIRD
, mp_start
, NULL
);
162 forward_signal(struct thread
*td
)
167 * signotify() has already set TDF_ASTPENDING and TDF_NEEDSIGCHECK on
168 * this thread, so all we need to do is poke it if it is currently
169 * executing so that it executes ast().
171 THREAD_LOCK_ASSERT(td
, MA_OWNED
);
172 KASSERT(TD_IS_RUNNING(td
),
173 ("forward_signal: thread is not TDS_RUNNING"));
175 CTR1(KTR_SMP
, "forward_signal(%p)", td
->td_proc
);
177 if (!smp_started
|| cold
|| panicstr
)
179 if (!forward_signal_enabled
)
182 /* No need to IPI ourself. */
189 ipi_selected(1 << id
, IPI_AST
);
193 forward_roundrobin(void)
197 cpumask_t id
, map
, me
;
199 CTR0(KTR_SMP
, "forward_roundrobin()");
201 if (!smp_started
|| cold
|| panicstr
)
203 if (!forward_roundrobin_enabled
)
206 me
= PCPU_GET(cpumask
);
207 SLIST_FOREACH(pc
, &cpuhead
, pc_allcpu
) {
208 td
= pc
->pc_curthread
;
210 if (id
!= me
&& (id
& stopped_cpus
) == 0 &&
211 !TD_IS_IDLETHREAD(td
)) {
212 td
->td_flags
|= TDF_NEEDRESCHED
;
216 ipi_selected(map
, IPI_AST
);
220 * When called the executing CPU will send an IPI to all other CPUs
221 * requesting that they halt execution.
223 * Usually (but not necessarily) called with 'other_cpus' as its arg.
225 * - Signals all CPUs in map to stop.
226 * - Waits for each to stop.
233 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
234 * from executing at same time.
237 stop_cpus(cpumask_t map
)
244 CTR1(KTR_SMP
, "stop_cpus(%x)", map
);
246 /* send the stop IPI to all CPUs in map */
247 ipi_selected(map
, IPI_STOP
);
250 while ((stopped_cpus
& map
) != map
) {
256 printf("timeout stopping cpus\n");
266 * Called by a CPU to restart stopped CPUs.
268 * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
270 * - Signals all CPUs in map to restart.
271 * - Waits for each to restart.
279 restart_cpus(cpumask_t map
)
285 CTR1(KTR_SMP
, "restart_cpus(%x)", map
);
287 /* signal other cpus to restart */
288 atomic_store_rel_int(&started_cpus
, map
);
290 /* wait for each to clear its bit */
291 while ((stopped_cpus
& map
) != 0)
298 * All-CPU rendezvous. CPUs are signalled, all execute the setup function
299 * (if specified), rendezvous, execute the action function (if specified),
300 * rendezvous again, execute the teardown function (if specified), and then
303 * Note that the supplied external functions _must_ be reentrant and aware
304 * that they are running in parallel and in an unknown lock context.
307 smp_rendezvous_action(void)
309 void* local_func_arg
= smp_rv_func_arg
;
310 void (*local_setup_func
)(void*) = smp_rv_setup_func
;
311 void (*local_action_func
)(void*) = smp_rv_action_func
;
312 void (*local_teardown_func
)(void*) = smp_rv_teardown_func
;
314 /* Ensure we have up-to-date values. */
315 atomic_add_acq_int(&smp_rv_waiters
[0], 1);
316 while (smp_rv_waiters
[0] < smp_rv_ncpus
)
320 if (local_setup_func
!= smp_no_rendevous_barrier
) {
321 if (smp_rv_setup_func
!= NULL
)
322 smp_rv_setup_func(smp_rv_func_arg
);
324 /* spin on entry rendezvous */
325 atomic_add_int(&smp_rv_waiters
[1], 1);
326 while (smp_rv_waiters
[1] < smp_rv_ncpus
)
330 /* action function */
331 if (local_action_func
!= NULL
)
332 local_action_func(local_func_arg
);
334 /* spin on exit rendezvous */
335 atomic_add_int(&smp_rv_waiters
[2], 1);
336 if (local_teardown_func
== smp_no_rendevous_barrier
)
338 while (smp_rv_waiters
[2] < smp_rv_ncpus
)
341 /* teardown function */
342 if (local_teardown_func
!= NULL
)
343 local_teardown_func(local_func_arg
);
347 smp_rendezvous_cpus(cpumask_t map
,
348 void (* setup_func
)(void *),
349 void (* action_func
)(void *),
350 void (* teardown_func
)(void *),
356 if (setup_func
!= NULL
)
358 if (action_func
!= NULL
)
360 if (teardown_func
!= NULL
)
365 for (i
= 0; i
< mp_maxid
; i
++)
366 if (((1 << i
) & map
) != 0 && !CPU_ABSENT(i
))
369 /* obtain rendezvous lock */
370 mtx_lock_spin(&smp_ipi_mtx
);
372 /* set static function pointers */
373 smp_rv_ncpus
= ncpus
;
374 smp_rv_setup_func
= setup_func
;
375 smp_rv_action_func
= action_func
;
376 smp_rv_teardown_func
= teardown_func
;
377 smp_rv_func_arg
= arg
;
378 smp_rv_waiters
[1] = 0;
379 smp_rv_waiters
[2] = 0;
380 atomic_store_rel_int(&smp_rv_waiters
[0], 0);
382 /* signal other processors, which will enter the IPI with interrupts off */
383 ipi_selected(map
& ~(1 << curcpu
), IPI_RENDEZVOUS
);
385 /* Check if the current CPU is in the map */
386 if ((map
& (1 << curcpu
)) != 0)
387 smp_rendezvous_action();
389 if (teardown_func
== smp_no_rendevous_barrier
)
390 while (atomic_load_acq_int(&smp_rv_waiters
[2]) < ncpus
)
394 mtx_unlock_spin(&smp_ipi_mtx
);
398 smp_rendezvous(void (* setup_func
)(void *),
399 void (* action_func
)(void *),
400 void (* teardown_func
)(void *),
403 smp_rendezvous_cpus(all_cpus
, setup_func
, action_func
, teardown_func
, arg
);
406 static struct cpu_group group
[MAXCPU
];
411 struct cpu_group
*top
;
414 * Check for a fake topology request for debugging purposes.
416 switch (smp_topology
) {
418 /* Dual core with no sharing. */
419 top
= smp_topo_1level(CG_SHARE_NONE
, 2, 0);
422 /* No topology, all cpus are equal. */
423 top
= smp_topo_none();
426 /* Dual core with shared L2. */
427 top
= smp_topo_1level(CG_SHARE_L2
, 2, 0);
430 /* quad core, shared l3 among each package, private l2. */
431 top
= smp_topo_1level(CG_SHARE_L3
, 4, 0);
434 /* quad core, 2 dualcore parts on each package share l2. */
435 top
= smp_topo_2level(CG_SHARE_NONE
, 2, CG_SHARE_L2
, 2, 0);
438 /* Single-core 2xHTT */
439 top
= smp_topo_1level(CG_SHARE_L1
, 2, CG_FLAG_HTT
);
442 /* quad core with a shared l3, 8 threads sharing L2. */
443 top
= smp_topo_2level(CG_SHARE_L3
, 4, CG_SHARE_L2
, 8,
447 /* Default, ask the system what it wants. */
452 * Verify the returned topology.
454 if (top
->cg_count
!= mp_ncpus
)
455 panic("Built bad topology at %p. CPU count %d != %d",
456 top
, top
->cg_count
, mp_ncpus
);
457 if (top
->cg_mask
!= all_cpus
)
458 panic("Built bad topology at %p. CPU mask 0x%X != 0x%X",
459 top
, top
->cg_mask
, all_cpus
);
466 struct cpu_group
*top
;
469 top
->cg_parent
= NULL
;
470 top
->cg_child
= NULL
;
471 top
->cg_mask
= (1 << mp_ncpus
) - 1;
472 top
->cg_count
= mp_ncpus
;
473 top
->cg_children
= 0;
474 top
->cg_level
= CG_SHARE_NONE
;
481 smp_topo_addleaf(struct cpu_group
*parent
, struct cpu_group
*child
, int share
,
482 int count
, int flags
, int start
)
487 for (mask
= 0, i
= 0; i
< count
; i
++, start
++)
488 mask
|= (1 << start
);
489 child
->cg_parent
= parent
;
490 child
->cg_child
= NULL
;
491 child
->cg_children
= 0;
492 child
->cg_level
= share
;
493 child
->cg_count
= count
;
494 child
->cg_flags
= flags
;
495 child
->cg_mask
= mask
;
496 parent
->cg_children
++;
497 for (; parent
!= NULL
; parent
= parent
->cg_parent
) {
498 if ((parent
->cg_mask
& child
->cg_mask
) != 0)
499 panic("Duplicate children in %p. mask 0x%X child 0x%X",
500 parent
, parent
->cg_mask
, child
->cg_mask
);
501 parent
->cg_mask
|= child
->cg_mask
;
502 parent
->cg_count
+= child
->cg_count
;
509 smp_topo_1level(int share
, int count
, int flags
)
511 struct cpu_group
*child
;
512 struct cpu_group
*top
;
519 packages
= mp_ncpus
/ count
;
520 top
->cg_child
= child
= &group
[1];
521 top
->cg_level
= CG_SHARE_NONE
;
522 for (i
= 0; i
< packages
; i
++, child
++)
523 cpu
= smp_topo_addleaf(top
, child
, share
, count
, flags
, cpu
);
528 smp_topo_2level(int l2share
, int l2count
, int l1share
, int l1count
,
531 struct cpu_group
*top
;
532 struct cpu_group
*l1g
;
533 struct cpu_group
*l2g
;
542 top
->cg_level
= CG_SHARE_NONE
;
543 top
->cg_children
= mp_ncpus
/ (l2count
* l1count
);
544 l1g
= l2g
+ top
->cg_children
;
545 for (i
= 0; i
< top
->cg_children
; i
++, l2g
++) {
546 l2g
->cg_parent
= top
;
548 l2g
->cg_level
= l2share
;
549 for (j
= 0; j
< l2count
; j
++, l1g
++)
550 cpu
= smp_topo_addleaf(l2g
, l1g
, l1share
, l1count
,
558 smp_topo_find(struct cpu_group
*top
, int cpu
)
560 struct cpu_group
*cg
;
568 if ((cg
->cg_mask
& mask
) == 0)
570 if (cg
->cg_children
== 0)
572 children
= cg
->cg_children
;
573 for (i
= 0, cg
= cg
->cg_child
; i
< children
; cg
++, i
++)
574 if ((cg
->cg_mask
& mask
) != 0)
582 smp_rendezvous_cpus(cpumask_t map
,
583 void (*setup_func
)(void *),
584 void (*action_func
)(void *),
585 void (*teardown_func
)(void *),
588 if (setup_func
!= NULL
)
590 if (action_func
!= NULL
)
592 if (teardown_func
!= NULL
)
597 smp_rendezvous(void (*setup_func
)(void *),
598 void (*action_func
)(void *),
599 void (*teardown_func
)(void *),
603 if (setup_func
!= NULL
)
605 if (action_func
!= NULL
)
607 if (teardown_func
!= NULL
)
612 * Provide dummy SMP support for UP kernels. Modules that need to use SMP
613 * APIs will still work using this dummy support.
616 mp_setvariables_for_up(void *dummy
)
619 mp_maxid
= PCPU_GET(cpuid
);
620 all_cpus
= PCPU_GET(cpumask
);
621 KASSERT(PCPU_GET(cpuid
) == 0, ("UP must have a CPU ID of zero"));
623 SYSINIT(cpu_mp_setvariables
, SI_SUB_TUNABLES
, SI_ORDER_FIRST
,
624 mp_setvariables_for_up
, NULL
);
628 smp_no_rendevous_barrier(void *dummy
)
631 KASSERT((!smp_started
),("smp_no_rendevous called and smp is started"));