man2/sched_setaffinity.2

   1 .\" Copyright (C) 2002 Robert Love
   2 .\" and Copyright (C) 2006, 2015 Michael Kerrisk
   3 .\"
   4 .\" %%%LICENSE_START(GPLv2+_DOC_FULL)
   5 .\" This is free documentation; you can redistribute it and/or
   6 .\" modify it under the terms of the GNU General Public License as
   7 .\" published by the Free Software Foundation; either version 2 of
   8 .\" the License, or (at your option) any later version.
   9 .\"
  10 .\" The GNU General Public License's references to "object code"
  11 .\" and "executables" are to be interpreted as the output of any
  12 .\" document formatting or typesetting system, including
  13 .\" intermediate and printed output.
  14 .\"
  15 .\" This manual is distributed in the hope that it will be useful,
  16 .\" but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 .\" GNU General Public License for more details.
  19 .\"
  20 .\" You should have received a copy of the GNU General Public
  21 .\" License along with this manual; if not, see
  22 .\" <http://www.gnu.org/licenses/>.
  23 .\" %%%LICENSE_END
  24 .\"
  25 .\" 2002-11-19 Robert Love <rml@tech9.net> - initial version
  26 .\" 2004-04-20 mtk - fixed description of return value
  27 .\" 2004-04-22 aeb - added glibc prototype history
  28 .\" 2005-05-03 mtk - noted that sched_setaffinity may cause thread
  29 .\"     migration and that CPU affinity is a per-thread attribute.
  30 .\" 2006-02-03 mtk -- Major rewrite
  31 .\" 2008-11-12, mtk, removed CPU_*() macro descriptions to a
  32 .\" separate CPU_SET(3) page.
  33 .\"
  34 .TH SCHED_SETAFFINITY 2 2017-09-15 "Linux" "Linux Programmer's Manual"
  35 .SH NAME
  36 sched_setaffinity, sched_getaffinity \- \
  37 set and get a thread's CPU affinity mask
  38 .SH SYNOPSIS
  39 .nf
  40 .BR "#define _GNU_SOURCE" "             /* See feature_test_macros(7) */"
  41 .B #include <sched.h>
  42 .PP
  43 .BI "int sched_setaffinity(pid_t " pid ", size_t " cpusetsize ,
  44 .BI "                      const cpu_set_t *" mask );
  45 .PP
  46 .BI "int sched_getaffinity(pid_t " pid ", size_t " cpusetsize ,
  47 .BI "                      cpu_set_t *" mask );
  48 .fi
  49 .SH DESCRIPTION
  50 A thread's CPU affinity mask determines the set of CPUs on which
  51 it is eligible to run.
  52 On a multiprocessor system, setting the CPU affinity mask
  53 can be used to obtain performance benefits.
  54 For example,
  55 by dedicating one CPU to a particular thread
  56 (i.e., setting the affinity mask of that thread to specify a single CPU,
  57 and setting the affinity mask of all other threads to exclude that CPU),
  58 it is possible to ensure maximum execution speed for that thread.
  59 Restricting a thread to run on a single CPU also avoids
  60 the performance cost caused by the cache invalidation that occurs
  61 when a thread ceases to execute on one CPU and then
  62 recommences execution on a different CPU.
  63 .PP
  64 A CPU affinity mask is represented by the
  65 .I cpu_set_t
  66 structure, a "CPU set", pointed to by
  67 .IR mask .
  68 A set of macros for manipulating CPU sets is described in
  69 .BR CPU_SET (3).
  70 .PP
  71 .BR sched_setaffinity ()
  72 sets the CPU affinity mask of the thread whose ID is
  73 .I pid
  74 to the value specified by
  75 .IR mask .
  76 If
  77 .I pid
  78 is zero, then the calling thread is used.
  79 The argument
  80 .I cpusetsize
  81 is the length (in bytes) of the data pointed to by
  82 .IR mask .
  83 Normally this argument would be specified as
  84 .IR "sizeof(cpu_set_t)" .
  85 .PP
  86 If the thread specified by
  87 .I pid
  88 is not currently running on one of the CPUs specified in
  89 .IR mask ,
  90 then that thread is migrated to one of the CPUs specified in
  91 .IR mask .
  92 .PP
  93 .BR sched_getaffinity ()
  94 writes the affinity mask of the thread whose ID is
  95 .I pid
  96 into the
  97 .I cpu_set_t
  98 structure pointed to by
  99 .IR mask .
 100 The
 101 .I cpusetsize
 102 argument specifies the size (in bytes) of
 103 .IR mask .
 104 If
 105 .I pid
 106 is zero, then the mask of the calling thread is returned.
 107 .SH RETURN VALUE
 108 On success,
 109 .BR sched_setaffinity ()
 110 and
 111 .BR sched_getaffinity ()
 112 return 0.
 113 On error, \-1 is returned, and
 114 .I errno
 115 is set appropriately.
 116 .SH ERRORS
 117 .TP
 118 .B EFAULT
 119 A supplied memory address was invalid.
 120 .TP
 121 .B EINVAL
 122 The affinity bit mask
 123 .I mask
 124 contains no processors that are currently physically on the system
 125 and permitted to the thread according to any restrictions that
 126 may be imposed by
 127 .I cpuset
 128 cgroups or the "cpuset" mechanism described in
 129 .BR cpuset (7).
 130 .TP
 131 .B EINVAL
 132 .RB ( sched_getaffinity ()
 133 and, in kernels before 2.6.9,
 134 .BR sched_setaffinity ())
 135 .I cpusetsize
 136 is smaller than the size of the affinity mask used by the kernel.
 137 .TP
 138 .B EPERM
 139 .RB ( sched_setaffinity ())
 140 The calling thread does not have appropriate privileges.
 141 The caller needs an effective user ID equal to the real user ID
 142 or effective user ID of the thread identified by
 143 .IR pid ,
 144 or it must possess the
 145 .B CAP_SYS_NICE
 146 capability in the user namespace of the thread
 147 .IR pid .
 148 .TP
 149 .B ESRCH
 150 The thread whose ID is \fIpid\fP could not be found.
 151 .SH VERSIONS
 152 The CPU affinity system calls were introduced in Linux kernel 2.5.8.
 153 The system call wrappers were introduced in glibc 2.3.
 154 Initially, the glibc interfaces included a
 155 .I cpusetsize
 156 argument, typed as
 157 .IR "unsigned int" .
 158 In glibc 2.3.3, the
 159 .I cpusetsize
 160 argument was removed, but was then restored in glibc 2.3.4, with type
 161 .IR size_t .
 162 .SH CONFORMING TO
 163 These system calls are Linux-specific.
 164 .SH NOTES
 165 After a call to
 166 .BR sched_setaffinity (),
 167 the set of CPUs on which the thread will actually run is
 168 the intersection of the set specified in the
 169 .I mask
 170 argument and the set of CPUs actually present on the system.
 171 The system may further restrict the set of CPUs on which the thread
 172 runs if the "cpuset" mechanism described in
 173 .BR cpuset (7)
 174 is being used.
 175 These restrictions on the actual set of CPUs on which the thread
 176 will run are silently imposed by the kernel.
 177 .PP
 178 There are various ways of determining the number of CPUs
 179 available on the system, including: inspecting the contents of
 180 .IR /proc/cpuinfo ;
 181 using
 182 .BR sysconf (3)
 183 to obtain the values of the
 184 .BR _SC_NPROCESSORS_CONF
 185 and
 186 .BR _SC_NPROCESSORS_ONLN
 187 parameters; and inspecting the list of CPU directories under
 188 .IR /sys/devices/system/cpu/ .
 189 .PP
 190 .BR sched (7)
 191 has a description of the Linux scheduling scheme.
 192 .PP
 193 The affinity mask is a per-thread attribute that can be
 194 adjusted independently for each of the threads in a thread group.
 195 The value returned from a call to
 196 .BR gettid (2)
 197 can be passed in the argument
 198 .IR pid .
 199 Specifying
 200 .I pid
 201 as 0 will set the attribute for the calling thread,
 202 and passing the value returned from a call to
 203 .BR getpid (2)
 204 will set the attribute for the main thread of the thread group.
 205 (If you are using the POSIX threads API, then use
 206 .BR pthread_setaffinity_np (3)
 207 instead of
 208 .BR sched_setaffinity ().)
 209 .PP
 210 The
 211 .I isolcpus
 212 boot option can be used to isolate one or more CPUs at boot time,
 213 so that no processes are scheduled onto those CPUs.
 214 Following the use of this boot option,
 215 the only way to schedule processes onto the isolated CPUs is via
 216 .BR sched_setaffinity ()
 217 or the
 218 .BR cpuset (7)
 219 mechanism.
 220 For further information, see the kernel source file
 221 .IR Documentation/admin-guide/kernel-parameters.txt .
 222 As noted in that file,
 223 .I isolcpus
 224 is the preferred mechanism of isolating CPUs
 225 (versus the alternative of manually setting the CPU affinity
 226 of all processes on the system).
 227 .PP
 228 A child created via
 229 .BR fork (2)
 230 inherits its parent's CPU affinity mask.
 231 The affinity mask is preserved across an
 232 .BR execve (2).
 233 .SS C library/kernel differences
 234 This manual page describes the glibc interface for the CPU affinity calls.
 235 The actual system call interface is slightly different, with the
 236 .I mask
 237 being typed as
 238 .IR "unsigned long\ *" ,
 239 reflecting the fact that the underlying implementation of CPU
 240 sets is a simple bit mask.
 241 On success, the raw
 242 .BR sched_getaffinity ()
 243 system call returns the size (in bytes) of the
 244 .I cpumask_t
 245 data type that is used internally by the kernel to
 246 represent the CPU set bit mask.
 247 .SS Handling systems with large CPU affinity masks
 248 The underlying system calls (which represent CPU masks as bit masks of type
 249 .IR "unsigned long\ *" )
 250 impose no restriction on the size of the CPU mask.
 251 However, the
 252 .I cpu_set_t
 253 data type used by glibc has a fixed size of 128 bytes,
 254 meaning that the maximum CPU number that can be represented is 1023.
 255 .\" FIXME . See https://sourceware.org/bugzilla/show_bug.cgi?id=15630
 256 .\" and https://sourceware.org/ml/libc-alpha/2013-07/msg00288.html
 257 If the kernel CPU affinity mask is larger than 1024,
 258 then calls of the form:
 259 .PP
 260     sched_getaffinity(pid, sizeof(cpu_set_t), &mask);
 261 .PP
 262 fail with the error
 263 .BR EINVAL ,
 264 the error produced by the underlying system call for the case where the
 265 .I mask
 266 size specified in
 267 .I cpusetsize
 268 is smaller than the size of the affinity mask used by the kernel.
 269 (Depending on the system CPU topology, the kernel affinity mask can
 270 be substantially larger than the number of active CPUs in the system.)
 271 .PP
 272 When working on systems with large kernel CPU affinity masks,
 273 one must dynamically allocate the
 274 .I mask
 275 argument (see
 276 .BR CPU_ALLOC (3)).
 277 Currently, the only way to do this is by probing for the size
 278 of the required mask using
 279 .BR sched_getaffinity ()
 280 calls with increasing mask sizes (until the call does not fail with the error
 281 .BR EINVAL ).
 282 .PP
 283 Be aware that
 284 .BR CPU_ALLOC (3)
 285 may allocate a slightly larger CPU set than requested
 286 (because CPU sets are implemented as bit masks allocated in units of
 287 .IR sizeof(long) ).
 288 Consequently,
 289 .BR sched_getaffinity ()
 290 can set bits beyond the requested allocation size, because the kernel
 291 sees a few additional bits.
 292 Therefore, the caller should iterate over the bits in the returned set,
 293 counting those which are set, and stop upon reaching the value returned by
 294 .BR CPU_COUNT (3)
 295 (rather than iterating over the number of bits
 296 requested to be allocated).
 297 .SH EXAMPLE
 298 The program below creates a child process.
 299 The parent and child then each assign themselves to a specified CPU
 300 and execute identical loops that consume some CPU time.
 301 Before terminating, the parent waits for the child to complete.
 302 The program takes three command-line arguments:
 303 the CPU number for the parent,
 304 the CPU number for the child,
 305 and the number of loop iterations that both processes should perform.
 306 .PP
 307 As the sample runs below demonstrate, the amount of real and CPU time
 308 consumed when running the program will depend on intra-core caching effects
 309 and whether the processes are using the same CPU.
 310 .PP
 311 We first employ
 312 .BR lscpu (1)
 313 to determine that this (x86)
 314 system has two cores, each with two CPUs:
 315 .PP
 316 .in +4n
 317 .EX
 318 $ \fBlscpu | grep -i 'core.*:|socket'\fP
 319 Thread(s) per core:    2
 320 Core(s) per socket:    2
 321 Socket(s):             1
 322 .EE
 323 .in
 324 .PP
 325 We then time the operation of the example program for three cases:
 326 both processes running on the same CPU;
 327 both processes running on different CPUs on the same core;
 328 and both processes running on different CPUs on different cores.
 329 .PP
 330 .in +4n
 331 .EX
 332 $ \fBtime \-p ./a.out 0 0 100000000\fP
 333 real 14.75
 334 user 3.02
 335 sys 11.73
 336 $ \fBtime \-p ./a.out 0 1 100000000\fP
 337 real 11.52
 338 user 3.98
 339 sys 19.06
 340 $ \fBtime \-p ./a.out 0 3 100000000\fP
 341 real 7.89
 342 user 3.29
 343 sys 12.07
 344 .EE
 345 .in
 346 .SS Program source
 347 \&
 348 .EX
 349 #define _GNU_SOURCE
 350 #include <sched.h>
 351 #include <stdio.h>
 352 #include <stdlib.h>
 353 #include <unistd.h>
 354 #include <sys/wait.h>
 355
 356 #define errExit(msg)    do { perror(msg); exit(EXIT_FAILURE); \\
 357                         } while (0)
 358
 359 int
 360 main(int argc, char *argv[])
 361 {
 362     cpu_set_t set;
 363     int parentCPU, childCPU;
 364     int nloops, j;
 365
 366     if (argc != 4) {
 367         fprintf(stderr, "Usage: %s parent\-cpu child\-cpu num\-loops\\n",
 368                 argv[0]);
 369         exit(EXIT_FAILURE);
 370     }
 371
 372     parentCPU = atoi(argv[1]);
 373     childCPU = atoi(argv[2]);
 374     nloops = atoi(argv[3]);
 375
 376     CPU_ZERO(&set);
 377
 378     switch (fork()) {
 379     case \-1:            /* Error */
 380         errExit("fork");
 381
 382     case 0:             /* Child */
 383         CPU_SET(childCPU, &set);
 384
 385         if (sched_setaffinity(getpid(), sizeof(set), &set) == \-1)
 386             errExit("sched_setaffinity");
 387
 388         for (j = 0; j < nloops; j++)
 389             getppid();
 390
 391         exit(EXIT_SUCCESS);
 392
 393     default:            /* Parent */
 394         CPU_SET(parentCPU, &set);
 395
 396         if (sched_setaffinity(getpid(), sizeof(set), &set) == \-1)
 397             errExit("sched_setaffinity");
 398
 399         for (j = 0; j < nloops; j++)
 400             getppid();
 401
 402         wait(NULL);     /* Wait for child to terminate */
 403         exit(EXIT_SUCCESS);
 404     }
 405 }
 406 .EE
 407 .SH SEE ALSO
 408 .ad l
 409 .nh
 410 .BR lscpu (1),
 411 .BR nproc (1),
 412 .BR taskset (1),
 413 .BR clone (2),
 414 .BR getcpu (2),
 415 .BR getpriority (2),
 416 .BR gettid (2),
 417 .BR nice (2),
 418 .BR sched_get_priority_max (2),
 419 .BR sched_get_priority_min (2),
 420 .BR sched_getscheduler (2),
 421 .BR sched_setscheduler (2),
 422 .BR setpriority (2),
 423 .BR CPU_SET (3),
 424 .BR get_nprocs (3),
 425 .BR pthread_setaffinity_np (3),
 426 .BR sched_getcpu (3),
 427 .BR capabilities (7),
 428 .BR cpuset (7),
 429 .BR sched (7),
 430 .BR numactl (8)