1 .\" Copyright (C) 2019 Michael Kerrisk <mtk.manpages@gmail.com>
2 .\" A very few fragments remain from an earlier page written by
3 .\" Werner Almesberger in 2000
5 .\" SPDX-License-Identifier: Linux-man-pages-copyleft
7 .TH pivot_root 2 (date) "Linux man-pages (unreleased)"
9 pivot_root \- change the root mount
12 .RI ( libc ", " \-lc )
15 .BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
16 .B #include <unistd.h>
18 .BI "int syscall(SYS_pivot_root, const char *" new_root \
19 ", const char *" put_old );
23 glibc provides no wrapper for
25 necessitating the use of
29 changes the root mount in the mount namespace of the calling process.
30 More precisely, it moves the root mount to the
31 directory \fIput_old\fP and makes \fInew_root\fP the new root mount.
32 The calling process must have the
34 capability in the user namespace that owns the caller's mount namespace.
37 changes the root directory and the current working directory
38 of each process or thread in the same mount namespace to
40 if they point to the old root directory.
44 does not change the caller's current working directory
45 (unless it is on the old root directory),
46 and thus it should be followed by a
47 \fBchdir("/")\fP call.
49 The following restrictions apply:
59 must not be on the same mount as the current root.
61 \fIput_old\fP must be at or underneath \fInew_root\fP;
62 that is, adding some nonnegative
63 number of "\fI/..\fP" suffixes to the pathname pointed to by
65 must yield the same directory as \fInew_root\fP.
68 must be a path to a mount point, but can't be
70 A path that is not already a mount point can be converted into one by
71 bind mounting the path onto itself.
73 The propagation type of the parent mount of
75 and the parent mount of the current root directory must not be
79 is an existing mount point, its propagation type must not be
81 These restrictions ensure that
83 never propagates any changes to another mount namespace.
85 The current root directory must be a mount point.
87 On success, zero is returned.
88 On error, \-1 is returned, and
89 \fIerrno\fP is set to indicate the error.
92 may fail with any of the same errors as
94 Additionally, it may fail with the following errors:
97 .\" Reconfirmed that the following error occurs on Linux 5.0 by
98 .\" specifying 'new_root' as "/rootfs" and 'put_old' as
99 .\" "/rootfs/oldrootfs", and *not* bind mounting "/rootfs" on top of
100 .\" itself. Of course, this is an odd situation, since a later check
101 .\" in the kernel code will in any case yield EINVAL if 'new_root' is
102 .\" not a mount point. However, when the system call was first added,
103 .\" 'new_root' was not required to be a mount point. So, this
104 .\" error is nowadays probably just the result of crufty accumulation.
105 .\" This error can also occur if we bind mount "/" on top of itself
106 .\" and try to specify "/" as the 'new' (again, an odd situation). So,
107 .\" the EBUSY check in the kernel does still seem necessary to prevent
108 .\" that case. Furthermore, the "or put_old" piece is probably
109 .\" redundant text (although the check is in the kernel), since,
110 .\" in another check, 'put_old' is required to be under 'new_root'.
114 is on the current root mount.
115 (This error covers the pathological case where
122 is not a mount point.
125 \fIput_old\fP is not at or underneath \fInew_root\fP.
128 The current root directory is not a mount point
129 (because of an earlier
133 The current root is on the rootfs (initial ramfs) mount; see NOTES.
136 Either the mount point at
138 or the parent mount of that mount point,
144 is a mount point and has the propagation type
148 \fInew_root\fP or \fIput_old\fP is not a directory.
151 The calling process does not have the
159 A command-line interface for this system call is provided by
163 allows the caller to switch to a new root filesystem while at the same time
164 placing the old root mount at a location under
166 from where it can subsequently be unmounted.
167 (The fact that it moves all processes that have a root directory
168 or current working directory on the old root directory to the
169 new root frees the old root directory of users,
170 allowing the old root mount to be unmounted more easily.)
174 is during system startup, when the
175 system mounts a temporary root filesystem (e.g., an
177 then mounts the real root filesystem, and eventually turns the latter into
178 the root directory of all relevant processes and threads.
179 A modern use is to set up a root filesystem during
180 the creation of a container.
184 modifies process root and current working directories in the
185 manner noted in DESCRIPTION
186 is necessary in order to prevent kernel threads from keeping the old
187 root mount busy with their root and current working directories,
188 even if they never access
189 the filesystem in any way.
191 The rootfs (initial ramfs) cannot be
193 The recommended method of changing the root filesystem in this case is
194 to delete everything in rootfs, overmount rootfs with the new root, attach
195 .IR stdin / stdout / stderr
200 Helper programs for this process exist; see
203 .SS pivot_root(\[dq].\[dq], \[dq].\[dq])
207 may be the same directory.
208 In particular, the following sequence allows a pivot-root operation
209 without needing to create and remove a temporary directory:
214 pivot_root(".", ".");
215 umount2(".", MNT_DETACH);
219 This sequence succeeds because the
221 call stacks the old root mount point
222 on top of the new root mount point at
224 At that point, the calling process's root directory and current
225 working directory refer to the new root mount point
227 During the subsequent
233 and then moves up the list of mounts stacked at
235 with the result that old root mount point is unmounted.
238 For many years, this manual page carried the following text:
242 may or may not change the current root and the current
243 working directory of any processes or threads which use the old
247 must ensure that processes with root or current working directory
248 at the old root operate correctly in either case.
249 An easy way to ensure this is to change their
250 root and current working directory to \fInew_root\fP before invoking
254 This text, written before the system call implementation was
255 even finalized in the kernel, was probably intended to warn users
256 at that time that the implementation might change before final release.
257 However, the behavior stated in DESCRIPTION
258 has remained consistent since this system call
259 was first implemented and will not change now.
262 .\" Would it be better, because simpler, to use unshare(2)
263 .\" rather than clone(2) in the example below?
264 The program below demonstrates the use of
266 inside a mount namespace that is created using
268 After pivoting to the root directory named in the program's
269 first command-line argument, the child created by
271 then executes the program named in the remaining command-line arguments.
273 We demonstrate the program by creating a directory that will serve as
274 the new root filesystem and placing a copy of the (statically linked)
276 executable in that directory.
280 $ \fBmkdir /tmp/rootfs\fP
281 $ \fBls \-id /tmp/rootfs\fP # Show inode number of new root directory
283 $ \fBcp $(which busybox) /tmp/rootfs\fP
284 $ \fBPS1=\[aq]bbsh$ \[aq] sudo ./pivot_root_demo /tmp/rootfs /busybox sh\fP
286 bbsh$ \fBbusybox ln busybox ln\fP
287 bbsh$ \fBln busybox echo\fP
288 bbsh$ \fBln busybox ls\fP
291 bbsh$ \fBls \-id /\fP # Compare with inode number above
293 bbsh$ \fBecho \[aq]hello world\[aq]\fP
300 .\" SRC BEGIN (pivot_root.c)
302 /* pivot_root_demo.c */
311 #include <sys/mman.h>
312 #include <sys/mount.h>
313 #include <sys/stat.h>
314 #include <sys/syscall.h>
315 #include <sys/wait.h>
319 pivot_root(const char *new_root, const char *put_old)
321 return syscall(SYS_pivot_root, new_root, put_old);
324 #define STACK_SIZE (1024 * 1024)
326 static int /* Startup function for cloned child */
331 char *new_root = args[0];
332 const char *put_old = "/oldrootfs";
334 /* Ensure that \[aq]new_root\[aq] and its parent mount don\[aq]t have
335 shared propagation (which would cause pivot_root() to
336 return an error), and prevent propagation of mount
337 events to the initial mount namespace. */
339 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) == \-1)
340 err(EXIT_FAILURE, "mount\-MS_PRIVATE");
342 /* Ensure that \[aq]new_root\[aq] is a mount point. */
344 if (mount(new_root, new_root, NULL, MS_BIND, NULL) == \-1)
345 err(EXIT_FAILURE, "mount\-MS_BIND");
347 /* Create directory to which old root will be pivoted. */
349 snprintf(path, sizeof(path), "%s/%s", new_root, put_old);
350 if (mkdir(path, 0777) == \-1)
351 err(EXIT_FAILURE, "mkdir");
353 /* And pivot the root filesystem. */
355 if (pivot_root(new_root, path) == \-1)
356 err(EXIT_FAILURE, "pivot_root");
358 /* Switch the current working directory to "/". */
360 if (chdir("/") == \-1)
361 err(EXIT_FAILURE, "chdir");
363 /* Unmount old root and remove mount point. */
365 if (umount2(put_old, MNT_DETACH) == \-1)
367 if (rmdir(put_old) == \-1)
370 /* Execute the command specified in argv[1]... */
372 execv(args[1], &args[1]);
373 err(EXIT_FAILURE, "execv");
377 main(int argc, char *argv[])
381 /* Create a child process in a new mount namespace. */
383 stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE,
384 MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, \-1, 0);
385 if (stack == MAP_FAILED)
386 err(EXIT_FAILURE, "mmap");
388 if (clone(child, stack + STACK_SIZE,
389 CLONE_NEWNS | SIGCHLD, &argv[1]) == \-1)
390 err(EXIT_FAILURE, "clone");
392 /* Parent falls through to here; wait for child. */
394 if (wait(NULL) == \-1)
395 err(EXIT_FAILURE, "wait");
407 .BR mount_namespaces (7),