man2/sched_setaffinity.2

   1 .\" Copyright (C) 2002 Robert Love
   2 .\" and Copyright (C) 2006, 2015 Michael Kerrisk
   3 .\"
   4 .\" %%%LICENSE_START(GPLv2+_DOC_FULL)
   5 .\" This is free documentation; you can redistribute it and/or
   6 .\" modify it under the terms of the GNU General Public License as
   7 .\" published by the Free Software Foundation; either version 2 of
   8 .\" the License, or (at your option) any later version.
   9 .\"
  10 .\" The GNU General Public License's references to "object code"
  11 .\" and "executables" are to be interpreted as the output of any
  12 .\" document formatting or typesetting system, including
  13 .\" intermediate and printed output.
  14 .\"
  15 .\" This manual is distributed in the hope that it will be useful,
  16 .\" but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 .\" GNU General Public License for more details.
  19 .\"
  20 .\" You should have received a copy of the GNU General Public
  21 .\" License along with this manual; if not, see
  22 .\" <http://www.gnu.org/licenses/>.
  23 .\" %%%LICENSE_END
  24 .\"
  25 .\" 2002-11-19 Robert Love <rml@tech9.net> - initial version
  26 .\" 2004-04-20 mtk - fixed description of return value
  27 .\" 2004-04-22 aeb - added glibc prototype history
  28 .\" 2005-05-03 mtk - noted that sched_setaffinity may cause thread
  29 .\"     migration and that CPU affinity is a per-thread attribute.
  30 .\" 2006-02-03 mtk -- Major rewrite
  31 .\" 2008-11-12, mtk, removed CPU_*() macro descriptions to a
  32 .\" separate CPU_SET(3) page.
  33 .\"
  34 .TH SCHED_SETAFFINITY 2 2021-03-22 "Linux" "Linux Programmer's Manual"
  35 .SH NAME
  36 sched_setaffinity, sched_getaffinity \- \
  37 set and get a thread's CPU affinity mask
  38 .SH SYNOPSIS
  39 .nf
  40 .BR "#define _GNU_SOURCE" "             /* See feature_test_macros(7) */"
  41 .B #include <sched.h>
  42 .PP
  43 .BI "int sched_setaffinity(pid_t " pid ", size_t " cpusetsize ,
  44 .BI "                      const cpu_set_t *" mask );
  45 .BI "int sched_getaffinity(pid_t " pid ", size_t " cpusetsize ,
  46 .BI "                      cpu_set_t *" mask );
  47 .fi
  48 .SH DESCRIPTION
  49 A thread's CPU affinity mask determines the set of CPUs on which
  50 it is eligible to run.
  51 On a multiprocessor system, setting the CPU affinity mask
  52 can be used to obtain performance benefits.
  53 For example,
  54 by dedicating one CPU to a particular thread
  55 (i.e., setting the affinity mask of that thread to specify a single CPU,
  56 and setting the affinity mask of all other threads to exclude that CPU),
  57 it is possible to ensure maximum execution speed for that thread.
  58 Restricting a thread to run on a single CPU also avoids
  59 the performance cost caused by the cache invalidation that occurs
  60 when a thread ceases to execute on one CPU and then
  61 recommences execution on a different CPU.
  62 .PP
  63 A CPU affinity mask is represented by the
  64 .I cpu_set_t
  65 structure, a "CPU set", pointed to by
  66 .IR mask .
  67 A set of macros for manipulating CPU sets is described in
  68 .BR CPU_SET (3).
  69 .PP
  70 .BR sched_setaffinity ()
  71 sets the CPU affinity mask of the thread whose ID is
  72 .I pid
  73 to the value specified by
  74 .IR mask .
  75 If
  76 .I pid
  77 is zero, then the calling thread is used.
  78 The argument
  79 .I cpusetsize
  80 is the length (in bytes) of the data pointed to by
  81 .IR mask .
  82 Normally this argument would be specified as
  83 .IR "sizeof(cpu_set_t)" .
  84 .PP
  85 If the thread specified by
  86 .I pid
  87 is not currently running on one of the CPUs specified in
  88 .IR mask ,
  89 then that thread is migrated to one of the CPUs specified in
  90 .IR mask .
  91 .PP
  92 .BR sched_getaffinity ()
  93 writes the affinity mask of the thread whose ID is
  94 .I pid
  95 into the
  96 .I cpu_set_t
  97 structure pointed to by
  98 .IR mask .
  99 The
 100 .I cpusetsize
 101 argument specifies the size (in bytes) of
 102 .IR mask .
 103 If
 104 .I pid
 105 is zero, then the mask of the calling thread is returned.
 106 .SH RETURN VALUE
 107 On success,
 108 .BR sched_setaffinity ()
 109 and
 110 .BR sched_getaffinity ()
 111 return 0 (but see "C library/kernel differences" below,
 112 which notes that the underlying
 113 .BR sched_getaffinity ()
 114 differs in its return value).
 115 On failure, \-1 is returned, and
 116 .I errno
 117 is set to indicate the error.
 118 .SH ERRORS
 119 .TP
 120 .B EFAULT
 121 A supplied memory address was invalid.
 122 .TP
 123 .B EINVAL
 124 The affinity bit mask
 125 .I mask
 126 contains no processors that are currently physically on the system
 127 and permitted to the thread according to any restrictions that
 128 may be imposed by
 129 .I cpuset
 130 cgroups or the "cpuset" mechanism described in
 131 .BR cpuset (7).
 132 .TP
 133 .B EINVAL
 134 .RB ( sched_getaffinity ()
 135 and, in kernels before 2.6.9,
 136 .BR sched_setaffinity ())
 137 .I cpusetsize
 138 is smaller than the size of the affinity mask used by the kernel.
 139 .TP
 140 .B EPERM
 141 .RB ( sched_setaffinity ())
 142 The calling thread does not have appropriate privileges.
 143 The caller needs an effective user ID equal to the real user ID
 144 or effective user ID of the thread identified by
 145 .IR pid ,
 146 or it must possess the
 147 .B CAP_SYS_NICE
 148 capability in the user namespace of the thread
 149 .IR pid .
 150 .TP
 151 .B ESRCH
 152 The thread whose ID is \fIpid\fP could not be found.
 153 .SH VERSIONS
 154 The CPU affinity system calls were introduced in Linux kernel 2.5.8.
 155 The system call wrappers were introduced in glibc 2.3.
 156 Initially, the glibc interfaces included a
 157 .I cpusetsize
 158 argument, typed as
 159 .IR "unsigned int" .
 160 In glibc 2.3.3, the
 161 .I cpusetsize
 162 argument was removed, but was then restored in glibc 2.3.4, with type
 163 .IR size_t .
 164 .SH CONFORMING TO
 165 These system calls are Linux-specific.
 166 .SH NOTES
 167 After a call to
 168 .BR sched_setaffinity (),
 169 the set of CPUs on which the thread will actually run is
 170 the intersection of the set specified in the
 171 .I mask
 172 argument and the set of CPUs actually present on the system.
 173 The system may further restrict the set of CPUs on which the thread
 174 runs if the "cpuset" mechanism described in
 175 .BR cpuset (7)
 176 is being used.
 177 These restrictions on the actual set of CPUs on which the thread
 178 will run are silently imposed by the kernel.
 179 .PP
 180 There are various ways of determining the number of CPUs
 181 available on the system, including: inspecting the contents of
 182 .IR /proc/cpuinfo ;
 183 using
 184 .BR sysconf (3)
 185 to obtain the values of the
 186 .BR _SC_NPROCESSORS_CONF
 187 and
 188 .BR _SC_NPROCESSORS_ONLN
 189 parameters; and inspecting the list of CPU directories under
 190 .IR /sys/devices/system/cpu/ .
 191 .PP
 192 .BR sched (7)
 193 has a description of the Linux scheduling scheme.
 194 .PP
 195 The affinity mask is a per-thread attribute that can be
 196 adjusted independently for each of the threads in a thread group.
 197 The value returned from a call to
 198 .BR gettid (2)
 199 can be passed in the argument
 200 .IR pid .
 201 Specifying
 202 .I pid
 203 as 0 will set the attribute for the calling thread,
 204 and passing the value returned from a call to
 205 .BR getpid (2)
 206 will set the attribute for the main thread of the thread group.
 207 (If you are using the POSIX threads API, then use
 208 .BR pthread_setaffinity_np (3)
 209 instead of
 210 .BR sched_setaffinity ().)
 211 .PP
 212 The
 213 .I isolcpus
 214 boot option can be used to isolate one or more CPUs at boot time,
 215 so that no processes are scheduled onto those CPUs.
 216 Following the use of this boot option,
 217 the only way to schedule processes onto the isolated CPUs is via
 218 .BR sched_setaffinity ()
 219 or the
 220 .BR cpuset (7)
 221 mechanism.
 222 For further information, see the kernel source file
 223 .IR Documentation/admin\-guide/kernel\-parameters.txt .
 224 As noted in that file,
 225 .I isolcpus
 226 is the preferred mechanism of isolating CPUs
 227 (versus the alternative of manually setting the CPU affinity
 228 of all processes on the system).
 229 .PP
 230 A child created via
 231 .BR fork (2)
 232 inherits its parent's CPU affinity mask.
 233 The affinity mask is preserved across an
 234 .BR execve (2).
 235 .SS C library/kernel differences
 236 This manual page describes the glibc interface for the CPU affinity calls.
 237 The actual system call interface is slightly different, with the
 238 .I mask
 239 being typed as
 240 .IR "unsigned long\ *" ,
 241 reflecting the fact that the underlying implementation of CPU
 242 sets is a simple bit mask.
 243 .PP
 244 On success, the raw
 245 .BR sched_getaffinity ()
 246 system call returns the number of bytes placed copied into the
 247 .I mask
 248 buffer;
 249 this will be the minimum of
 250 .I cpusetsize
 251 and the size (in bytes) of the
 252 .I cpumask_t
 253 data type that is used internally by the kernel to
 254 represent the CPU set bit mask.
 255 .SS Handling systems with large CPU affinity masks
 256 The underlying system calls (which represent CPU masks as bit masks of type
 257 .IR "unsigned long\ *" )
 258 impose no restriction on the size of the CPU mask.
 259 However, the
 260 .I cpu_set_t
 261 data type used by glibc has a fixed size of 128 bytes,
 262 meaning that the maximum CPU number that can be represented is 1023.
 263 .\" FIXME . See https://sourceware.org/bugzilla/show_bug.cgi?id=15630
 264 .\" and https://sourceware.org/ml/libc-alpha/2013-07/msg00288.html
 265 If the kernel CPU affinity mask is larger than 1024,
 266 then calls of the form:
 267 .PP
 268     sched_getaffinity(pid, sizeof(cpu_set_t), &mask);
 269 .PP
 270 fail with the error
 271 .BR EINVAL ,
 272 the error produced by the underlying system call for the case where the
 273 .I mask
 274 size specified in
 275 .I cpusetsize
 276 is smaller than the size of the affinity mask used by the kernel.
 277 (Depending on the system CPU topology, the kernel affinity mask can
 278 be substantially larger than the number of active CPUs in the system.)
 279 .PP
 280 When working on systems with large kernel CPU affinity masks,
 281 one must dynamically allocate the
 282 .I mask
 283 argument (see
 284 .BR CPU_ALLOC (3)).
 285 Currently, the only way to do this is by probing for the size
 286 of the required mask using
 287 .BR sched_getaffinity ()
 288 calls with increasing mask sizes (until the call does not fail with the error
 289 .BR EINVAL ).
 290 .PP
 291 Be aware that
 292 .BR CPU_ALLOC (3)
 293 may allocate a slightly larger CPU set than requested
 294 (because CPU sets are implemented as bit masks allocated in units of
 295 .IR sizeof(long) ).
 296 Consequently,
 297 .BR sched_getaffinity ()
 298 can set bits beyond the requested allocation size, because the kernel
 299 sees a few additional bits.
 300 Therefore, the caller should iterate over the bits in the returned set,
 301 counting those which are set, and stop upon reaching the value returned by
 302 .BR CPU_COUNT (3)
 303 (rather than iterating over the number of bits
 304 requested to be allocated).
 305 .SH EXAMPLES
 306 The program below creates a child process.
 307 The parent and child then each assign themselves to a specified CPU
 308 and execute identical loops that consume some CPU time.
 309 Before terminating, the parent waits for the child to complete.
 310 The program takes three command-line arguments:
 311 the CPU number for the parent,
 312 the CPU number for the child,
 313 and the number of loop iterations that both processes should perform.
 314 .PP
 315 As the sample runs below demonstrate, the amount of real and CPU time
 316 consumed when running the program will depend on intra-core caching effects
 317 and whether the processes are using the same CPU.
 318 .PP
 319 We first employ
 320 .BR lscpu (1)
 321 to determine that this (x86)
 322 system has two cores, each with two CPUs:
 323 .PP
 324 .in +4n
 325 .EX
 326 $ \fBlscpu | egrep \-i \(aqcore.*:|socket\(aq\fP
 327 Thread(s) per core:    2
 328 Core(s) per socket:    2
 329 Socket(s):             1
 330 .EE
 331 .in
 332 .PP
 333 We then time the operation of the example program for three cases:
 334 both processes running on the same CPU;
 335 both processes running on different CPUs on the same core;
 336 and both processes running on different CPUs on different cores.
 337 .PP
 338 .in +4n
 339 .EX
 340 $ \fBtime \-p ./a.out 0 0 100000000\fP
 341 real 14.75
 342 user 3.02
 343 sys 11.73
 344 $ \fBtime \-p ./a.out 0 1 100000000\fP
 345 real 11.52
 346 user 3.98
 347 sys 19.06
 348 $ \fBtime \-p ./a.out 0 3 100000000\fP
 349 real 7.89
 350 user 3.29
 351 sys 12.07
 352 .EE
 353 .in
 354 .SS Program source
 355 \&
 356 .EX
 357 #define _GNU_SOURCE
 358 #include <sched.h>
 359 #include <stdio.h>
 360 #include <stdlib.h>
 361 #include <unistd.h>
 362 #include <sys/wait.h>
 363
 364 #define errExit(msg)    do { perror(msg); exit(EXIT_FAILURE); \e
 365                         } while (0)
 366
 367 int
 368 main(int argc, char *argv[])
 369 {
 370     cpu_set_t set;
 371     int parentCPU, childCPU;
 372     int nloops;
 373
 374     if (argc != 4) {
 375         fprintf(stderr, "Usage: %s parent\-cpu child\-cpu num\-loops\en",
 376                 argv[0]);
 377         exit(EXIT_FAILURE);
 378     }
 379
 380     parentCPU = atoi(argv[1]);
 381     childCPU = atoi(argv[2]);
 382     nloops = atoi(argv[3]);
 383
 384     CPU_ZERO(&set);
 385
 386     switch (fork()) {
 387     case \-1:            /* Error */
 388         errExit("fork");
 389
 390     case 0:             /* Child */
 391         CPU_SET(childCPU, &set);
 392
 393         if (sched_setaffinity(getpid(), sizeof(set), &set) == \-1)
 394             errExit("sched_setaffinity");
 395
 396         for (int j = 0; j < nloops; j++)
 397             getppid();
 398
 399         exit(EXIT_SUCCESS);
 400
 401     default:            /* Parent */
 402         CPU_SET(parentCPU, &set);
 403
 404         if (sched_setaffinity(getpid(), sizeof(set), &set) == \-1)
 405             errExit("sched_setaffinity");
 406
 407         for (int j = 0; j < nloops; j++)
 408             getppid();
 409
 410         wait(NULL);     /* Wait for child to terminate */
 411         exit(EXIT_SUCCESS);
 412     }
 413 }
 414 .EE
 415 .SH SEE ALSO
 416 .ad l
 417 .nh
 418 .BR lscpu (1),
 419 .BR nproc (1),
 420 .BR taskset (1),
 421 .BR clone (2),
 422 .BR getcpu (2),
 423 .BR getpriority (2),
 424 .BR gettid (2),
 425 .BR nice (2),
 426 .BR sched_get_priority_max (2),
 427 .BR sched_get_priority_min (2),
 428 .BR sched_getscheduler (2),
 429 .BR sched_setscheduler (2),
 430 .BR setpriority (2),
 431 .BR CPU_SET (3),
 432 .BR get_nprocs (3),
 433 .BR pthread_setaffinity_np (3),
 434 .BR sched_getcpu (3),
 435 .BR capabilities (7),
 436 .BR cpuset (7),
 437 .BR sched (7),
 438 .BR numactl (8)