man2/membarrier.2

   1 .\" Copyright 2015-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
   2 .\"
   3 .\" %%%LICENSE_START(VERBATIM)
   4 .\" Permission is granted to make and distribute verbatim copies of this
   5 .\" manual provided the copyright notice and this permission notice are
   6 .\" preserved on all copies.
   7 .\"
   8 .\" Permission is granted to copy and distribute modified versions of this
   9 .\" manual under the conditions for verbatim copying, provided that the
  10 .\" entire resulting derived work is distributed under the terms of a
  11 .\" permission notice identical to this one.
  12 .\"
  13 .\" Since the Linux kernel and libraries are constantly changing, this
  14 .\" manual page may be incorrect or out-of-date.  The author(s) assume no
  15 .\" responsibility for errors or omissions, or for damages resulting from
  16 .\" the use of the information contained herein.  The author(s) may not
  17 .\" have taken the same level of care in the production of this manual,
  18 .\" which is licensed free of charge, as they might when working
  19 .\" professionally.
  20 .\"
  21 .\" Formatted or processed versions of this manual, if unaccompanied by
  22 .\" the source, must acknowledge the copyright and authors of this work.
  23 .\" %%%LICENSE_END
  24 .\"
  25 .TH MEMBARRIER 2 2021-03-22 "Linux" "Linux Programmer's Manual"
  26 .SH NAME
  27 membarrier \- issue memory barriers on a set of threads
  28 .SH SYNOPSIS
  29 .nf
  30 .PP
  31 .BR "#include <linux/membarrier.h>" \
  32 " /* Definition of " MEMBARRIER_* " constants */"
  33 .BR "#include <sys/syscall.h>" "      /* Definition of " SYS_* " constants */"
  34 .B #include <unistd.h>
  35 .PP
  36 .BI "int syscall(SYS_membarrier, int " cmd ", unsigned int " flags \
  37 ", int " cpu_id );
  38 .fi
  39 .PP
  40 .IR Note :
  41 glibc provides no wrapper for
  42 .BR membarrier (),
  43 necessitating the use of
  44 .BR syscall (2).
  45 .SH DESCRIPTION
  46 The
  47 .BR membarrier ()
  48 system call helps reducing the overhead of the memory barrier
  49 instructions required to order memory accesses on multi-core systems.
  50 However, this system call is heavier than a memory barrier, so using it
  51 effectively is
  52 .I not
  53 as simple as replacing memory barriers with this
  54 system call, but requires understanding of the details below.
  55 .PP
  56 Use of memory barriers needs to be done taking into account that a
  57 memory barrier always needs to be either matched with its memory barrier
  58 counterparts, or that the architecture's memory model doesn't require the
  59 matching barriers.
  60 .PP
  61 There are cases where one side of the matching barriers (which we will
  62 refer to as "fast side") is executed much more often than the other
  63 (which we will refer to as "slow side").
  64 This is a prime target for the use of
  65 .BR membarrier ().
  66 The key idea is to replace, for these matching
  67 barriers, the fast-side memory barriers by simple compiler barriers,
  68 for example:
  69 .PP
  70 .in +4n
  71 .EX
  72 asm volatile ("" : : : "memory")
  73 .EE
  74 .in
  75 .PP
  76 and replace the slow-side memory barriers by calls to
  77 .BR membarrier ().
  78 .PP
  79 This will add overhead to the slow side, and remove overhead from the
  80 fast side, thus resulting in an overall performance increase as long as
  81 the slow side is infrequent enough that the overhead of the
  82 .BR membarrier ()
  83 calls does not outweigh the performance gain on the fast side.
  84 .PP
  85 The
  86 .I cmd
  87 argument is one of the following:
  88 .TP
  89 .BR MEMBARRIER_CMD_QUERY " (since Linux 4.3)"
  90 Query the set of supported commands.
  91 The return value of the call is a bit mask of supported
  92 commands.
  93 .BR MEMBARRIER_CMD_QUERY ,
  94 which has the value 0,
  95 is not itself included in this bit mask.
  96 This command is always supported (on kernels where
  97 .BR membarrier ()
  98 is provided).
  99 .TP
 100 .BR MEMBARRIER_CMD_GLOBAL " (since Linux 4.16)"
 101 Ensure that all threads from all processes on the system pass through a
 102 state where all memory accesses to user-space addresses match program
 103 order between entry to and return from the
 104 .BR membarrier ()
 105 system call.
 106 All threads on the system are targeted by this command.
 107 .TP
 108 .BR MEMBARRIER_CMD_GLOBAL_EXPEDITED " (since Linux 4.16)"
 109 Execute a memory barrier on all running threads of all processes that
 110 previously registered with
 111 .BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED .
 112 .IP
 113 Upon return from the system call, the calling thread has a guarantee that all
 114 running threads have passed through a state where all memory accesses to
 115 user-space addresses match program order between entry to and return
 116 from the system call (non-running threads are de facto in such a state).
 117 This guarantee is provided only for the threads of processes that
 118 previously registered with
 119 .BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED .
 120 .IP
 121 Given that registration is about the intent to receive the barriers, it
 122 is valid to invoke
 123 .BR MEMBARRIER_CMD_GLOBAL_EXPEDITED
 124 from a process that has not employed
 125 .BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED .
 126 .IP
 127 The "expedited" commands complete faster than the non-expedited ones;
 128 they never block, but have the downside of causing extra overhead.
 129 .TP
 130 .BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED " (since Linux 4.16)"
 131 Register the process's intent to receive
 132 .BR MEMBARRIER_CMD_GLOBAL_EXPEDITED
 133 memory barriers.
 134 .TP
 135 .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED " (since Linux 4.14)"
 136 Execute a memory barrier on each running thread belonging to the same
 137 process as the calling thread.
 138 .IP
 139 Upon return from the system call, the calling
 140 thread has a guarantee that all its running thread siblings have passed
 141 through a state where all memory accesses to user-space addresses match
 142 program order between entry to and return from the system call
 143 (non-running threads are de facto in such a state).
 144 This guarantee is provided only for threads in
 145 the same process as the calling thread.
 146 .IP
 147 The "expedited" commands complete faster than the non-expedited ones;
 148 they never block, but have the downside of causing extra overhead.
 149 .IP
 150 A process must register its intent to use the private
 151 expedited command prior to using it.
 152 .TP
 153 .BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED " (since Linux 4.14)"
 154 Register the process's intent to use
 155 .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED .
 156 .TP
 157 .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE " (since Linux 4.16)"
 158 In addition to providing the memory ordering guarantees described in
 159 .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED ,
 160 upon return from system call the calling thread has a guarantee that all its
 161 running thread siblings have executed a core serializing instruction.
 162 This guarantee is provided only for threads in
 163 the same process as the calling thread.
 164 .IP
 165 The "expedited" commands complete faster than the non-expedited ones,
 166 they never block, but have the downside of causing extra overhead.
 167 .IP
 168 A process must register its intent to use the private expedited sync
 169 core command prior to using it.
 170 .TP
 171 .BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE " (since Linux 4.16)"
 172 Register the process's intent to use
 173 .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE .
 174 .TP
 175 .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ " (since Linux 5.10)"
 176 Ensure the caller thread, upon return from system call, that all its
 177 running thread siblings have any currently running rseq critical sections
 178 restarted if
 179 .I flags
 180 parameter is 0; if
 181 .I flags
 182 parameter is
 183 .BR MEMBARRIER_CMD_FLAG_CPU ,
 184 then this operation is performed only on CPU indicated by
 185 .IR cpu_id .
 186 This guarantee is provided only for threads in
 187 the same process as the calling thread.
 188 .IP
 189 RSEQ membarrier is only available in the "private expedited" form.
 190 .IP
 191 A process must register its intent to use the private expedited rseq
 192 command prior to using it.
 193 .TP
 194 .BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ " (since Linux 5.10)"
 195 Register the process's intent to use
 196 .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ .
 197 .TP
 198 .BR MEMBARRIER_CMD_SHARED " (since Linux 4.3)"
 199 This is an alias for
 200 .BR MEMBARRIER_CMD_GLOBAL
 201 that exists for header backward compatibility.
 202 .PP
 203 The
 204 .I flags
 205 argument must be specified as 0 unless the command is
 206 .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ ,
 207 in which case
 208 .I flags
 209 can be either 0 or
 210 .BR MEMBARRIER_CMD_FLAG_CPU .
 211 .PP
 212 The
 213 .I cpu_id
 214 argument is ignored unless
 215 .I flags
 216 is
 217 .BR MEMBARRIER_CMD_FLAG_CPU ,
 218 in which case it must specify the CPU targeted by this membarrier
 219 command.
 220 .PP
 221 All memory accesses performed in program order from each targeted thread
 222 are guaranteed to be ordered with respect to
 223 .BR membarrier ().
 224 .PP
 225 If we use the semantic
 226 .I barrier()
 227 to represent a compiler barrier forcing memory
 228 accesses to be performed in program order across the barrier, and
 229 .I smp_mb()
 230 to represent explicit memory barriers forcing full memory
 231 ordering across the barrier, we have the following ordering table for
 232 each pairing of
 233 .IR barrier() ,
 234 .BR membarrier (),
 235 and
 236 .IR smp_mb() .
 237 The pair ordering is detailed as (O: ordered, X: not ordered):
 238 .PP
 239                        barrier()  smp_mb()  membarrier()
 240        barrier()          X          X          O
 241        smp_mb()           X          O          O
 242        membarrier()       O          O          O
 243 .SH RETURN VALUE
 244 On success, the
 245 .B MEMBARRIER_CMD_QUERY
 246 operation returns a bit mask of supported commands, and the
 247 .BR MEMBARRIER_CMD_GLOBAL ,
 248 .BR MEMBARRIER_CMD_GLOBAL_EXPEDITED ,
 249 .BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED ,
 250 .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED ,
 251 .BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED ,
 252 .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE ,
 253 and
 254 .B MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE
 255 operations return zero.
 256 On error, \-1 is returned,
 257 and
 258 .I errno
 259 is set to indicate the error.
 260 .PP
 261 For a given command, with
 262 .I flags
 263 set to 0, this system call is
 264 guaranteed to always return the same value until reboot.
 265 Further calls with the same arguments will lead to the same result.
 266 Therefore, with
 267 .I flags
 268 set to 0, error handling is required only for the first call to
 269 .BR membarrier ().
 270 .SH ERRORS
 271 .TP
 272 .B EINVAL
 273 .I cmd
 274 is invalid, or
 275 .I flags
 276 is nonzero, or the
 277 .BR MEMBARRIER_CMD_GLOBAL
 278 command is disabled because the
 279 .I nohz_full
 280 CPU parameter has been set, or the
 281 .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE
 282 and
 283 .BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE
 284 commands are not implemented by the architecture.
 285 .TP
 286 .B ENOSYS
 287 The
 288 .BR membarrier ()
 289 system call is not implemented by this kernel.
 290 .TP
 291 .B EPERM
 292 The current process was not registered prior to using private expedited
 293 commands.
 294 .SH VERSIONS
 295 The
 296 .BR membarrier ()
 297 system call was added in Linux 4.3.
 298 .PP
 299 Before Linux 5.10, the prototype for
 300 .BR membarrier ()
 301 was:
 302 .PP
 303 .in +4n
 304 .EX
 305 .BI "int membarrier(int " cmd ", int " flags );
 306 .EE
 307 .in
 308 .SH CONFORMING TO
 309 .BR membarrier ()
 310 is Linux-specific.
 311 .\" .SH SEE ALSO
 312 .\" FIXME See if the following syscalls make it into Linux 4.15 or later
 313 .\" .BR cpu_opv (2),
 314 .\" .BR rseq (2)
 315 .SH NOTES
 316 A memory barrier instruction is part of the instruction set of
 317 architectures with weakly ordered memory models.
 318 It orders memory
 319 accesses prior to the barrier and after the barrier with respect to
 320 matching barriers on other cores.
 321 For instance, a load fence can order
 322 loads prior to and following that fence with respect to stores ordered
 323 by store fences.
 324 .PP
 325 Program order is the order in which instructions are ordered in the
 326 program assembly code.
 327 .PP
 328 Examples where
 329 .BR membarrier ()
 330 can be useful include implementations
 331 of Read-Copy-Update libraries and garbage collectors.
 332 .SH EXAMPLES
 333 Assuming a multithreaded application where "fast_path()" is executed
 334 very frequently, and where "slow_path()" is executed infrequently, the
 335 following code (x86) can be transformed using
 336 .BR membarrier ():
 337 .PP
 338 .in +4n
 339 .EX
 340 #include <stdlib.h>
 341
 342 static volatile int a, b;
 343
 344 static void
 345 fast_path(int *read_b)
 346 {
 347     a = 1;
 348     asm volatile ("mfence" : : : "memory");
 349     *read_b = b;
 350 }
 351
 352 static void
 353 slow_path(int *read_a)
 354 {
 355     b = 1;
 356     asm volatile ("mfence" : : : "memory");
 357     *read_a = a;
 358 }
 359
 360 int
 361 main(int argc, char *argv[])
 362 {
 363     int read_a, read_b;
 364
 365     /*
 366      * Real applications would call fast_path() and slow_path()
 367      * from different threads. Call those from main() to keep
 368      * this example short.
 369      */
 370
 371     slow_path(&read_a);
 372     fast_path(&read_b);
 373
 374     /*
 375      * read_b == 0 implies read_a == 1 and
 376      * read_a == 0 implies read_b == 1.
 377      */
 378
 379     if (read_b == 0 && read_a == 0)
 380         abort();
 381
 382     exit(EXIT_SUCCESS);
 383 }
 384 .EE
 385 .in
 386 .PP
 387 The code above transformed to use
 388 .BR membarrier ()
 389 becomes:
 390 .PP
 391 .in +4n
 392 .EX
 393 #define _GNU_SOURCE
 394 #include <stdlib.h>
 395 #include <stdio.h>
 396 #include <unistd.h>
 397 #include <sys/syscall.h>
 398 #include <linux/membarrier.h>
 399
 400 static volatile int a, b;
 401
 402 static int
 403 membarrier(int cmd, unsigned int flags, int cpu_id)
 404 {
 405     return syscall(__NR_membarrier, cmd, flags, cpu_id);
 406 }
 407
 408 static int
 409 init_membarrier(void)
 410 {
 411     int ret;
 412
 413     /* Check that membarrier() is supported. */
 414
 415     ret = membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
 416     if (ret < 0) {
 417         perror("membarrier");
 418         return \-1;
 419     }
 420
 421     if (!(ret & MEMBARRIER_CMD_GLOBAL)) {
 422         fprintf(stderr,
 423             "membarrier does not support MEMBARRIER_CMD_GLOBAL\en");
 424         return \-1;
 425     }
 426
 427     return 0;
 428 }
 429
 430 static void
 431 fast_path(int *read_b)
 432 {
 433     a = 1;
 434     asm volatile ("" : : : "memory");
 435     *read_b = b;
 436 }
 437
 438 static void
 439 slow_path(int *read_a)
 440 {
 441     b = 1;
 442     membarrier(MEMBARRIER_CMD_GLOBAL, 0, 0);
 443     *read_a = a;
 444 }
 445
 446 int
 447 main(int argc, char *argv[])
 448 {
 449     int read_a, read_b;
 450
 451     if (init_membarrier())
 452         exit(EXIT_FAILURE);
 453
 454     /*
 455      * Real applications would call fast_path() and slow_path()
 456      * from different threads. Call those from main() to keep
 457      * this example short.
 458      */
 459
 460     slow_path(&read_a);
 461     fast_path(&read_b);
 462
 463     /*
 464      * read_b == 0 implies read_a == 1 and
 465      * read_a == 0 implies read_b == 1.
 466      */
 467
 468     if (read_b == 0 && read_a == 0)
 469         abort();
 470
 471     exit(EXIT_SUCCESS);
 472 }
 473 .EE
 474 .in