1 .\" Copyright (c) 2021 by Christian Brauner <christian.brauner@ubuntu.com>
3 .\" %%%LICENSE_START(VERBATIM)
4 .\" Permission is granted to make and distribute verbatim copies of this
5 .\" manual provided the copyright notice and this permission notice are
6 .\" preserved on all copies.
8 .\" Permission is granted to copy and distribute modified versions of this
9 .\" manual under the conditions for verbatim copying, provided that the
10 .\" entire resulting derived work is distributed under the terms of a
11 .\" permission notice identical to this one.
13 .\" Since the Linux kernel and libraries are constantly changing, this
14 .\" manual page may be incorrect or out-of-date. The author(s) assume no
15 .\" responsibility for errors or omissions, or for damages resulting from
16 .\" the use of the information contained herein. The author(s) may not
17 .\" have taken the same level of care in the production of this manual,
18 .\" which is licensed free of charge, as they might when working
21 .\" Formatted or processed versions of this manual, if unaccompanied by
22 .\" the source, must acknowledge the copyright and authors of this work.
25 .TH MOUNT_SETATTR 2 2021-03-22 "Linux" "Linux Programmer's Manual"
27 mount_setattr \- change mount properties of a mount or mount tree
32 .BR "#include <linux/fcntl.h>" " /* Definition of " AT_* " constants */"
33 .BR "#include <linux/mount.h>" " /* Definition of struct mount_attr and MOUNT_ATTR_* constants */"
34 .BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
35 .B #include <unistd.h>
37 .BI "int syscall(SYS_mount_setattr, int " dfd ", const char *" path \
38 ", unsigned int " flags \
39 ", struct mount_attr *" attr ", size_t " size );
43 glibc provides no wrapper for
45 necessitating the use of
50 system call changes the mount properties of a mount or entire mount tree.
53 is a relative pathname,
54 then it is interpreted relative to the directory referred to by the file
63 is taken to be relative to the current working directory of the calling process.
66 is the empty string and
70 then the mount properties of the mount identified by
76 system call uses an extensible structure
77 .IR ( "struct mount_attr" )
78 to allow for future extensions.
79 Any non-flag extensions to
81 will be implemented as new fields appended to the above structure,
82 with a zero value in a new field resulting in the kernel behaving
83 as though that extension field was not present.
87 zero-fill this structure on initialization.
88 Please see the "Extensibility" section under
94 argument should usually be specified as
95 .IR "sizeof(struct mount_attr)" .
97 if the caller does not intend to make use of features that got
98 introduced after the initial version of
100 they are free to pass the size of the initial struct together with the larger
102 This allows the kernel to not copy later parts of the struct that aren't used
104 With each extension that changes the size of
106 the kernel will expose a define of the form
107 .BR MOUNT_ATTR_SIZE_VER<number> .
108 For example the macro for the size of the initial version of
111 .BR MOUNT_ATTR_SIZE_VER0 .
115 argument can be used to alter the path resolution behavior.
116 The supported values are:
121 is the empty string change the mount properties on
126 Change the mount properties of the entire mount tree.
128 .B AT_SYMLINK_NOFOLLOW
129 Don't follow trailing symlinks.
132 Don't trigger automounts.
137 .BR mount_setattr (2)
138 is a structure of the following form:
143 __u64 attr_set; /* Mount properties to set. */
144 __u64 attr_clr; /* Mount properties to clear. */
145 __u64 propagation; /* Mount propagation type. */
146 __u64 userns_fd; /* User namespace file descriptor. */
155 members are used to specify the mount properties that are supposed to be set or
156 cleared for a mount or mount tree.
159 enable a property on a mount or mount tree and flags set in
161 remove a property from a mount or mount tree.
163 When changing mount properties the kernel will first clear the flags specified
166 field and then set the flags specified in the
172 struct mount_attr attr = {
173 .attr_clr = MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NODEV,
174 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
176 unsigned int current_mnt_flags = mnt->mnt_flags;
179 * Clear all flags set in .attr_clr,
180 * clearing MOUNT_ATTR_NOEXEC and MOUNT_ATTR_NODEV.
182 current_mnt_flags &= ~attr->attr_clr;
185 * Now set all flags set in .attr_set,
186 * applying MOUNT_ATTR_RDONLY and MOUNT_ATTR_NOSUID.
188 current_mnt_flags |= attr->attr_set;
190 mnt->mnt_flags = current_mnt_flags;
194 The effect of this change will be a mount or mount tree that is read-only,
195 blocks the execution of set-user-ID and set-group-ID binaries but does allow to
196 execute programs and access to devices nodes.
197 Multiple changes with the same set of flags requested
202 are guaranteed to be idempotent after the changes have been applied.
204 The following mount attributes can be specified in the
213 makes the mount read-only and if set in
215 removes the read-only setting if set on the mount.
220 makes the mount not honor set-user-ID and set-group-ID binaries,
221 and file capabilities when executing programs.
224 clears the set-user-ID, set-group-ID,
225 and file capability restriction if set on this mount.
230 prevents access to devices on this mount and if set in
232 removes the device access restriction if set on this mount.
234 .BR MOUNT_ATTR_NOEXEC
237 prevents executing programs on this mount and if set in
239 removes the restriction to execute programs on this mount.
241 .BR MOUNT_ATTR_NOSYMFOLLOW
244 prevents following symlinks on this mount and if set in
246 removes the restriction to not follow symlinks on this mount.
248 .B MOUNT_ATTR_NODIRATIME
251 prevents updating access time for directories on this mount and if set in
253 removes access time restriction for directories.
255 .BR MOUNT_ATTR_NODIRATIME
256 can be combined with other access time settings and is implied
257 by the noatime setting.
258 All other access time settings are mutually exclusive.
260 .BR MOUNT_ATTR__ATIME " - Changing access time settings
261 In the new mount api the access time values are an enum starting from 0.
262 Even though they are an enum in contrast to the other mount flags such as
263 .BR MOUNT_ATTR_NOEXEC
264 they are nonetheless passed in
270 which introduced this behavior.
273 since access times are an enum,
275 users wanting to transition to a different access time setting cannot simply
276 specify the access time in
283 The kernel will verify that
284 .BR MOUNT_ATTR__ATIME
285 isn't partially set in
289 doesn't have any access time bits set if
290 .BR MOUNT_ATTR__ATIME
295 .B MOUNT_ATTR_RELATIME
296 When a file is accessed via this mount,
297 update the file's last access time
299 only if the current value of atime is less than or equal to the file's
300 last modification time (mtime) or last status change time (ctime).
302 To enable this access time setting on a mount or mount tree
303 .BR MOUNT_ATTR_RELATIME
307 .BR MOUNT_ATTR__ATIME
312 .BR MOUNT_ATTR_NOATIME
313 Do not update access times for (all types of) files on this mount.
315 To enable this access time setting on a mount or mount tree
316 .BR MOUNT_ATTR_NOATIME
320 .BR MOUNT_ATTR__ATIME
325 .BR MOUNT_ATTR_STRICTATIME
326 Always update the last access time (atime) when files are accessed on this
329 To enable this access time setting on a mount or mount tree
330 .BR MOUNT_ATTR_STRICTATIME
334 .BR MOUNT_ATTR__ATIME
343 creates an idmapped mount.
344 Since it is not supported to change the idmapping of a mount after it has been
346 it is invalid to specify
350 The idmapping is taken from the user namespace specified in
352 and attached to the mount.
353 More details can be found in subsequent paragraphs.
355 Creating an idmapped mount allows to change the ownership of all files located
357 Thus, idmapped mounts make it possible to change ownership in a temporary and
359 It is a localized change because ownership changes are restricted to a specific
361 All other users and locations where the filesystem is exposed are unaffected.
362 And it is a temporary change because ownership changes are tied to the lifetime
365 Whenever callers interact with the filesystem through an idmapped mount the
366 idmapping of the mount will be applied to user and group IDs associated with
368 This encompasses the user and group IDs associated with inodes and also
375 .IR security.capability
378 are stored or returned in the
379 .I VFS_CAP_REVISION_3
380 format which stores a rootid alongside the capabilities.
382 .I system.posix_acl_access
384 .I system.posix_acl_default
385 whenever user IDs or group IDs are stored in
393 The following conditions must be met in order to create an idmapped mount:
399 in the initial user namespace.
401 The filesystem must be mounted in the initial user namespace.
403 The underlying filesystem must support idmapped mounts.
409 filesystems support idmapped mounts with more filesystems being actively worked
412 The mount must not already be idmapped.
413 This also implies that the idmapping of a mount cannot be altered.
415 The mount must be a detached/anonymous mount,
417 it must have been created by calling
421 flag and it must not already have been visible in the filesystem.
425 Idmappings can be created for user IDs, group IDs, and project IDs.
426 An idmapping is essentially a mapping of a range of user or group IDs into
427 another or the same range of user or group IDs.
428 Idmappings are usually written as three numbers either separated by white space
430 The first two numbers specify the starting user or group ID in each of the two
432 The third number specifies the range of the idmapping.
433 For example, a mapping for user IDs such as 1000:1001:1 would indicate that
434 user ID 1000 in the caller's user namespace is mapped to user ID 1001 in its
435 ancestor user namespace.
436 Since the map range is 1 only user ID 1000 is mapped.
437 It is possible to specify up to 340 idmappings for each idmapping type.
438 If any user IDs or group IDs are not mapped all files owned by that unmapped
439 user or group ID will appear as being owned by the overflow user ID or overflow
440 group ID respectively.
441 Further details and instructions for setting up idmappings can be found in the
442 .BR user_namespaces (7)
445 In the common case the user namespace passed in
451 to create an idmapped mount will be the user namespace of a container.
452 In other scenarios it will be a dedicated user namespace associated with a
453 user's login session as is the case for portable home directories in
454 .BR systemd-homed.service (8) ).
455 It is also perfectly fine to create a dedicated user namespace for the sake of
458 Idmapped mounts can be useful in the following and a variety of other
463 sharing files between multiple users or multiple machines especially in
466 idmapped mounts are used to implement portable home directories in
467 .BR systemd-homed.service (8)
468 where they allow users to move their home directory to an external storage
469 device and use it on multiple computers where they are assigned different user IDs
471 This effectively makes it possible to assign random user IDs and group IDs at login time.
473 sharing files from the host with unprivileged containers.
474 This allows user to avoid having to change ownership permanently through
477 idmapping a container's root filesystem.
478 Users don't need to change ownership
481 Especially for large root filesystems using
483 can be prohibitively expensive.
485 sharing files between containers with non-overlapping
488 implementing discretionary access (DAC) permission checking for fileystems
489 lacking a concept of ownership.
491 efficiently change ownership on a per-mount basis.
494 changing ownership of large sets of files is instantenous with idmapped mounts.
495 This is especially useful when ownership of an entire root filesystem of a
496 virtual machine or container is to be changed as we've mentioned above.
497 With idmapped mounts a single
498 .BR mount_setattr (2)
499 system call will be sufficient to change the ownership of all files.
501 taking the current ownership into account.
502 Idmappings specify precisely what a user or group ID is supposed to be
504 This contrasts with the
506 system call which cannot by itself take the current ownership of the files it
507 changes into account.
508 It simply changes the ownership to the specified user ID and group ID.
510 locally and temporarily restricted ownership changes.
511 Idmapped mounts allow to change ownership locally,
512 restricting it to specific mounts,
513 and temporarily as the ownership changes only apply as long as the mount exists.
515 changing ownership via the
517 system call changes the ownership globally and permanently.
523 field is used to specify the propagation type of the mount or mount tree.
524 Mount propagation options are mutually exclusive,
526 the propagation values behave like an enum.
527 The supported mount propagation settings are:
530 Turn all mounts into private mounts.
531 Mount and unmount events do not propagate into or out of this mount point.
534 Turn all mounts into shared mounts.
535 Mount points share events with members of a peer group.
536 Mount and unmount events immediately under this mount point
537 will propagate to the other mount points that are members of the peer group.
538 Propagation here means that the same mount or unmount will automatically occur
539 under all of the other mount points in the peer group.
541 mount and unmount events that take place under peer mount points will propagate
545 Turn all mounts into dependent mounts.
546 Mount and unmount events propagate into this mount point from a shared peer
548 Mount and unmount events under this mount point do not propagate to any peer.
551 This is like a private mount,
552 and in addition this mount can't be bind mounted.
553 Attempts to bind mount this mount will fail.
554 When a recursive bind mount is performed on a directory subtree,
555 any bind mounts within the subtree are automatically pruned
556 (i.e., not replicated)
557 when replicating that subtree to produce the target subtree.
561 .BR mount_setattr (2)
566 is set to indicate the cause of the error.
571 is not a valid file descriptor.
575 is not a valid file descriptor.
578 The caller tried to change the mount to
579 .BR MOUNT_ATTR_RDONLY
580 but the mount still has files open for writing.
583 The path specified via the
588 .BR mount_setattr (2)
592 An unsupported value was set in
596 An unsupported value was specified in the
602 An unsupported value was specified in the
608 An unsupported value was specified in the
626 An access time setting was specified in the
629 .BR MOUNT_ATTR__ATIME
640 A file descriptor value was specified in
646 A valid file descriptor value was specified in
648 but the file descriptor wasn't a namespace file descriptor or did not refer to
652 The underlying filesystem does not support idmapped mounts.
655 The mount to idmap is not a detached/anonymous mount,
657 the mount is already visible in the filesystem.
660 A partial access time setting was specified in
663 .BR MOUNT_ATTR__ATIME
667 The mount is located outside the caller's mount namespace.
670 The underlying filesystem is mounted in a user namespace.
673 A pathname was empty or had a nonexistent component.
676 When changing mount propagation to
678 a new peer group id needs to be allocated for all mounts without a peer group
680 Allocation of this peer group id has failed.
683 When changing mount propagation to
685 a new peer group id needs to be allocated for all mounts without a peer group
687 Allocation of this peer group id can fail.
688 Note that technically further error codes are possible that are specific to the
689 id allocation implementation used.
692 One of the mounts had at least one of
693 .BR MOUNT_ATTR_NOATIME,
694 .BR MOUNT_ATTR_NODEV,
695 .BR MOUNT_ATTR_NODIRATIME,
696 .BR MOUNT_ATTR_NOEXEC,
697 .BR MOUNT_ATTR_NOSUID,
699 .BR MOUNT_ATTR_RDONLY
700 set and the flag is locked.
701 Mount attributes become locked on a mount if:
704 a new mount or mount tree is created causing mount propagation across user
706 The kernel will lock the aforementioned flags to protect these sensitive
707 properties from being altered.
709 a new mount and user namespace pair is created.
710 This happens for example when specifying
711 .BR CLONE_NEWUSER | CLONE_NEWNS
717 The aformentioned flags become locked to protect user namespaces from altering
718 sensitive mount properties.
722 A valid file descriptor value was specified in
724 but the file descriptor refers to the initial user namespace.
727 An already idmapped mount was supposed to be idmapped.
730 The caller does not have
732 in the initial user namespace.
734 .BR mount_setattr (2)
735 first appeared in Linux 5.12.
736 .\" commit 7d6beb71da3cc033649d641e1e608713b8220290
737 .\" commit 2a1867219c7b27f928e2545782b86daaf9ad50bd
738 .\" commit 9caccd41541a6f7d6279928d9f971f6642c361af
740 .BR mount_setattr (2)
744 In order to allow for future extensibility,
745 .BR mount_setattr (2)
746 along with other system calls such as
750 requires the user-space application to specify the size of the
752 structure that it is passing.
753 By providing this information, it is possible for
754 .BR mount_setattr (2)
755 to provide both forwards- and backwards-compatibility, with
757 acting as an implicit version number.
758 (Because new extension fields will always
759 be appended, the structure size will always increase.)
760 This extensibility design is very similar to other system calls such as
761 .BR perf_setattr (2),
762 .BR perf_event_open (2),
769 be the size of the structure as specified by the user-space application,
772 be the size of the structure which the kernel supports,
773 then there are three cases to consider:
780 then there is no version mismatch and
782 can be used verbatim.
788 then there are some extension fields that the kernel supports which the
789 user-space application is unaware of.
790 Because a zero value in any added extension field signifies a no-op,
791 the kernel treats all of the extension fields not provided by the user-space
792 application as having zero values.
793 This provides backwards-compatibility.
799 then there are some extension fields which the user-space application is aware
800 of but which the kernel does not support.
801 Because any extension field must have its zero values signify a no-op,
802 the kernel can safely ignore the unsupported extension fields if they are
804 If any unsupported extension fields are non-zero, then \-1 is returned and
808 This provides forwards-compatibility.
811 Because the definition of
813 may change in the future
814 (with new fields being added when system headers are updated),
815 user-space applications should zero-fill
817 to ensure that recompiling the program with new headers will not result in
818 spurious errors at runtime.
819 The simplest way is to use a designated initializer:
823 struct mount_attr attr = {
824 .attr_set = MOUNT_ATTR_RDONLY,
825 .attr_clr = MOUNT_ATTR_NODEV
832 or similar functions:
836 struct mount_attr attr;
837 memset(&attr, 0, sizeof(attr));
838 attr.attr_set = MOUNT_ATTR_RDONLY;
839 attr.attr_clr = MOUNT_ATTR_NODEV;
843 A user-space application that wishes to determine which extensions the running
844 kernel supports can do so by conducting a binary search on
846 with a structure which has every byte nonzero
847 (to find the largest value which doesn't produce an error of
852 * This program allows the caller to create a new detached mount and set
853 * various properties on it.
859 #include <linux/mount.h>
860 #include <linux/types.h>
865 #include <sys/syscall.h>
868 static inline int mount_setattr(int dfd,
871 struct mount_attr *attr,
874 return syscall(SYS_mount_setattr, dfd, path,
878 static inline int open_tree(int dfd, const char *filename,
881 return syscall(SYS_open_tree, dfd, filename, flags);
884 static inline int move_mount(int from_dfd,
885 const char *from_pathname,
887 const char *to_pathname,
890 return syscall(SYS_move_mount, from_dfd,
891 from_pathname, to_dfd, to_pathname, flags);
894 static const struct option longopts[] = {
895 {"map-mount", required_argument, NULL, 'a'},
896 {"recursive", no_argument, NULL, 'b'},
897 {"read-only", no_argument, NULL, 'c'},
898 {"block-setid", no_argument, NULL, 'd'},
899 {"block-devices", no_argument, NULL, 'e'},
900 {"block-exec", no_argument, NULL, 'f'},
901 {"no-access-time", no_argument, NULL, 'g'},
902 { NULL, 0, NULL, 0 },
905 #define exit_log(format, ...) \\
907 fprintf(stderr, format, ##__VA_ARGS__); \\
908 exit(EXIT_FAILURE); \\
911 int main(int argc, char *argv[])
913 int fd_userns = \-EBADF, index = 0;
914 bool recursive = false;
915 struct mount_attr *attr = &(struct mount_attr){};
916 const char *source, *target;
917 int fd_tree, new_argc, ret;
918 char *const *new_argv;
920 while ((ret = getopt_long_only(argc, argv, "",
921 longopts, &index)) != \-1) {
924 fd_userns = open(optarg, O_RDONLY | O_CLOEXEC);
925 if (fd_userns == \-1)
926 exit_log("%m - Failed top open %s\en", optarg);
932 attr->attr_set |= MOUNT_ATTR_RDONLY;
935 attr->attr_set |= MOUNT_ATTR_NOSUID;
938 attr->attr_set |= MOUNT_ATTR_NODEV;
941 attr->attr_set |= MOUNT_ATTR_NOEXEC;
944 attr->attr_set |= MOUNT_ATTR_NOATIME;
945 attr->attr_clr |= MOUNT_ATTR__ATIME;
948 exit_log("Invalid argument specified");
952 new_argv = &argv[optind];
953 new_argc = argc \- optind;
955 exit_log("Missing source or target mountpoint\en");
956 source = new_argv[0];
957 target = new_argv[1];
959 fd_tree = open_tree(\-EBADF, source,
963 (recursive ? AT_RECURSIVE : 0));
965 exit_log("%m - Failed to open %s\en", source);
967 if (fd_userns >= 0) {
968 attr->attr_set |= MOUNT_ATTR_IDMAP;
969 attr->userns_fd = fd_userns;
971 ret = mount_setattr(fd_tree, "",
973 (recursive ? AT_RECURSIVE : 0),
974 attr, sizeof(struct mount_attr));
976 exit_log("%m - Failed to change mount attributes\en");
979 ret = move_mount(fd_tree, "", \-EBADF, target,
980 MOVE_MOUNT_F_EMPTY_PATH);
982 exit_log("%m - Failed to attach mount to %s\en", target);
990 .BR capabilities (7),
995 .BR mount_namespaces (7),
1000 .BR user_namespaces (7),