1 .\" Copyright (c) 2021 by Christian Brauner <christian.brauner@ubuntu.com>
3 .\" %%%LICENSE_START(VERBATIM)
4 .\" Permission is granted to make and distribute verbatim copies of this
5 .\" manual provided the copyright notice and this permission notice are
6 .\" preserved on all copies.
8 .\" Permission is granted to copy and distribute modified versions of this
9 .\" manual under the conditions for verbatim copying, provided that the
10 .\" entire resulting derived work is distributed under the terms of a
11 .\" permission notice identical to this one.
13 .\" Since the Linux kernel and libraries are constantly changing, this
14 .\" manual page may be incorrect or out-of-date. The author(s) assume no
15 .\" responsibility for errors or omissions, or for damages resulting from
16 .\" the use of the information contained herein. The author(s) may not
17 .\" have taken the same level of care in the production of this manual,
18 .\" which is licensed free of charge, as they might when working
21 .\" Formatted or processed versions of this manual, if unaccompanied by
22 .\" the source, must acknowledge the copyright and authors of this work.
25 .TH MOUNT_SETATTR 2 2021-03-22 "Linux" "Linux Programmer's Manual"
27 mount_setattr \- change mount properties of a mount or mount tree
32 .BR "#include <linux/fcntl.h>" " /* Definition of " AT_* " constants */"
33 .BR "#include <linux/mount.h>" " /* Definition of " MOUNT_ATTR_* " constants */"
34 .BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
35 .B #include <unistd.h>
37 .BI "int syscall(SYS_mount_setattr, int " dirfd ", const char *" path ,
38 .BI " unsigned int " flags ", struct mount_attr *" attr \
43 glibc provides no wrapper for
45 necessitating the use of
50 system call changes the mount properties of a mount or an entire mount tree.
53 is a relative pathname,
54 then it is interpreted relative to
55 the directory referred to by the file descriptor
63 is interpreted relative to
64 the current working directory of the calling process.
67 is the empty string and
71 then the mount properties of the mount identified by
77 system call uses an extensible structure
78 .RI ( "struct mount_attr" )
79 to allow for future extensions.
80 Any non-flag extensions to
82 will be implemented as new fields appended to the this structure,
83 with a zero value in a new field resulting in the kernel behaving
84 as though that extension field was not present.
88 zero-fill this structure on initialization.
89 See the "Extensibility" subsection under
95 argument should usually be specified as
96 .IR "sizeof(struct mount_attr)" .
98 if the caller does not intend to make use of features that
99 got introduced after the initial version of
100 .IR "struct mount_attr" ,
101 it is possible to pass
102 the size of the initial struct together with the larger struct.
103 This allows the kernel to not copy later parts of the struct
104 that aren't used anyway.
105 With each extension that changes the size of
106 .IR "struct mount_attr" ,
107 the kernel will expose a definition of the form
108 .BI MOUNT_ATTR_SIZE_VER number\c
110 For example, the macro for the size of the initial version of
113 .BR MOUNT_ATTR_SIZE_VER0 .
117 argument can be used to alter the path resolution behavior.
118 The supported values are:
124 change the mount properties on
129 Change the mount properties of the entire mount tree.
131 .B AT_SYMLINK_NOFOLLOW
132 Don't follow trailing symbolic links.
135 Don't trigger automounts.
141 is a structure of the following form:
146 __u64 attr_set; /* Mount properties to set */
147 __u64 attr_clr; /* Mount properties to clear */
148 __u64 propagation; /* Mount propagation type */
149 __u64 userns_fd; /* User namespace file descriptor */
158 members are used to specify the mount properties that
159 are supposed to be set or cleared for a mount or mount tree.
162 enable a property on a mount or mount tree,
165 remove a property from a mount or mount tree.
167 When changing mount properties,
168 the kernel will first clear the flags specified
172 and then set the flags specified in the
178 struct mount_attr attr = {
179 .attr_clr = MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NODEV,
180 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
182 unsigned int current_mnt_flags = mnt->mnt_flags;
185 * Clear all flags set in .attr_clr,
186 * clearing MOUNT_ATTR_NOEXEC and MOUNT_ATTR_NODEV.
188 current_mnt_flags &= ~attr->attr_clr;
191 * Now set all flags set in .attr_set,
192 * applying MOUNT_ATTR_RDONLY and MOUNT_ATTR_NOSUID.
194 current_mnt_flags |= attr->attr_set;
196 mnt->mnt_flags = current_mnt_flags;
200 As a rsult of this change, the mount or mount tree (a) is read-only;
201 (b) blocks the execution of set-user-ID and set-group-ID programs;
202 (c) allows execution of programs; and (d) allows access to devices.
204 Multiple changes with the same set of flags requested
209 are guaranteed to be idempotent after the changes have been applied.
211 The following mount attributes can be specified in the
220 makes the mount read-only.
223 removes the read-only setting if set on the mount.
228 causes the mount not to honor the set-user-ID and set-group-ID mode bits and
229 file capabilities when executing programs.
232 clears the set-user-ID, set-group-ID,
233 and file capability restriction if set on this mount.
238 prevents access to devices on this mount.
241 removes the restriction that prevented accessing devices on this mount.
246 prevents executing programs on this mount.
249 removes the restriction that prevented executing programs on this mount.
251 .B MOUNT_ATTR_NOSYMFOLLOW
254 prevents following symbolic links on this mount.
257 removes the restriction that prevented following symbolic links on this mount.
259 .B MOUNT_ATTR_NODIRATIME
262 prevents updating access time for directories on this mount.
265 removes the restriction that prevented updating access time for directories.
267 .B MOUNT_ATTR_NODIRATIME
268 can be combined with other access-time settings
269 and is implied by the noatime setting.
270 All other access-time settings are mutually exclusive.
272 .BR MOUNT_ATTR__ATIME " - changing access-time settings"
273 In the new mount API, the access-time values are an enum starting from 0.
274 Even though they are an enum (in contrast to the other mount flags such as
275 .BR MOUNT_ATTR_NOEXEC ),
276 they are nonetheless passed in
282 which introduced this behavior.
285 since access times are an enum
287 users wanting to transition to a different access-time setting cannot simply
288 specify the access-time setting in
295 The kernel will verify that
297 isn't partially set in
301 doesn't have any access-time bits set if
307 .B MOUNT_ATTR_RELATIME
308 When a file is accessed via this mount,
309 update the file's last access time (atime)
310 only if the current value of atime is less than or equal to
311 the file's last modification time (mtime) or last status change time (ctime).
313 To enable this access-time setting on a mount or mount tree,
314 .B MOUNT_ATTR_RELATIME
323 .B MOUNT_ATTR_NOATIME
324 Do not update access times for (all types of) files on this mount.
326 To enable this access-time setting on a mount or mount tree,
327 .B MOUNT_ATTR_NOATIME
336 .B MOUNT_ATTR_STRICTATIME
337 Always update the last access time (atime)
338 when files are accessed on this mount.
340 To enable this access-time setting on a mount or mount tree,
341 .B MOUNT_ATTR_STRICTATIME
354 creates an ID-mapped mount.
355 The ID mapping is taken from the user namespace specified in
357 and attached to the mount.
359 Since it is not supported to
360 change the ID mapping of a mount after it has been ID mapped,
361 it is invalid to specify
366 For further details, see the subsection "ID-mapped mounts" under NOTES.
370 field is used to specify the propagation type of the mount or mount tree.
371 This field either has the value zero,
372 meaning leave the propagation type unchanged, or it has one of
373 the following values:
376 Turn all mounts into private mounts.
379 Turn all mounts into shared mounts.
382 Turn all mounts into dependent mounts.
385 Turn all mounts into unbindable mounts.
387 For further details on the above propagation types, see
388 .BR mount_namespaces (7).
396 is set to indicate the cause of the error.
401 is not a valid file descriptor.
405 is not a valid file descriptor.
408 The caller tried to change the mount to
409 .BR MOUNT_ATTR_RDONLY ,
410 but the mount still holds files open for writing.
413 The path specified via the
422 An unsupported value was set in
426 An unsupported value was specified in the
432 An unsupported value was specified in the
438 An unsupported value was specified in the
456 An access-time setting was specified in the
470 A file descriptor value was specified in
476 A valid file descriptor value was specified in
478 but the file descriptor wasn't a namespace file descriptor
479 or did not refer to a user namespace.
482 The underlying filesystem does not support ID-mapped mounts.
485 The mount that is to be ID mapped is not a detached/anonymous mount;
486 that is, the mount is already visible in the filesystem.
489 A partial access-time setting was specified in
496 The mount is located outside the caller's mount namespace.
499 The underlying filesystem is mounted in a user namespace.
502 A pathname was empty or had a nonexistent component.
505 When changing mount propagation to
507 a new peer group ID needs to be allocated for all mounts without a peer group
509 Allocation of this peer group ID has failed.
512 When changing mount propagation to
514 a new peer group ID needs to be allocated for all mounts without a peer group
516 Allocation of this peer group ID can fail.
517 Note that technically further error codes are possible that are specific to the
518 ID allocation implementation used.
521 One of the mounts had at least one of
522 .BR MOUNT_ATTR_NOATIME ,
523 .BR MOUNT_ATTR_NODEV ,
524 .BR MOUNT_ATTR_NODIRATIME ,
525 .BR MOUNT_ATTR_NOEXEC ,
526 .BR MOUNT_ATTR_NOSUID ,
529 set and the flag is locked.
530 Mount attributes become locked on a mount if:
533 A new mount or mount tree is created causing mount propagation across user
535 The kernel will lock the aforementioned flags to protect these sensitive
536 properties from being altered.
538 A new mount and user namespace pair is created.
539 This happens for example when specifying
540 .B CLONE_NEWUSER | CLONE_NEWNS
546 The aforementioned flags become locked to protect user namespaces from altering
547 sensitive mount properties.
551 A valid file descriptor value was specified in
553 but the file descriptor refers to the initial user namespace.
556 An already ID-mapped mount was supposed to be ID mapped.
559 The caller does not have
561 in the initial user namespace.
564 first appeared in Linux 5.12.
565 .\" commit 7d6beb71da3cc033649d641e1e608713b8220290
566 .\" commit 2a1867219c7b27f928e2545782b86daaf9ad50bd
567 .\" commit 9caccd41541a6f7d6279928d9f971f6642c361af
573 Creating an ID-mapped mount makes it possible to
574 change the ownership of all files located under a mount.
575 Thus, ID-mapped mounts make it possible to
576 change ownership in a temporary and localized way.
577 It is a localized change because
578 ownership changes are restricted to a specific mount.
579 All other users and locations where the filesystem is exposed are unaffected.
580 And it is a temporary change because
581 ownership changes are tied to the lifetime of the mount.
583 Whenever callers interact with the filesystem through an ID-mapped mount,
584 the ID mapping of the mount will be applied to
585 user and group IDs associated with filesystem objects.
586 This encompasses the user and group IDs associated with inodes
587 and also the following
591 .IR security.capability ,
592 whenever filesystem capabilities
593 are stored or returned in the
594 .B VFS_CAP_REVISION_3
596 which stores a root user ID alongside the capabilities
598 .BR capabilities (7)).
600 .I system.posix_acl_access
602 .IR system.posix_acl_default ,
603 whenever user IDs or group IDs are stored in
609 The following conditions must be met in order to create an ID-mapped mount:
611 The caller must have the
613 capability in the initial user namespace.
615 The filesystem must be mounted in the initial user namespace.
617 The underlying filesystem must support ID-mapped mounts.
623 filesystems support ID-mapped mounts
624 with more filesystems being actively worked on.
626 The mount must not already be ID-mapped.
627 This also implies that the ID mapping of a mount cannot be altered.
629 The mount must be a detached/anonymous mount;
631 it must have been created by calling
635 flag and it must not already have been visible in the filesystem.
637 ID mappings can be created for user IDs, group IDs, and project IDs.
638 An ID mapping is essentially a mapping of a range of user or group IDs into
639 another or the same range of user or group IDs.
640 ID mappings are usually written as three numbers
641 either separated by white space or a full stop.
642 The first two numbers specify the starting user or group ID
643 in each of the two user namespaces.
644 The third number specifies the range of the ID mapping.
645 For example, a mapping for user IDs such as 1000:1001:1 would indicate that
646 user ID 1000 in the caller's user namespace is mapped to
647 user ID 1001 in its ancestor user namespace.
648 Since the map range is 1,
649 only user ID 1000 is mapped.
651 It is possible to specify up to 340 ID mappings for each ID mapping type.
652 If any user IDs or group IDs are not mapped,
653 all files owned by that unmapped user or group ID will appear as
654 being owned by the overflow user ID or overflow group ID respectively.
656 Further details and instructions for setting up ID mappings can be found in the
657 .BR user_namespaces (7)
660 In the common case, the user namespace passed in
666 to create an ID-mapped mount will be the user namespace of a container.
667 In other scenarios it will be a dedicated user namespace associated with
668 a user's login session as is the case for portable home directories in
669 .BR systemd-homed.service (8)).
670 It is also perfectly fine to create a dedicated user namespace
671 for the sake of ID mapping a mount.
673 ID-mapped mounts can be useful in the following
674 and a variety of other scenarios:
676 Sharing files between multiple users or multiple machines,
677 especially in complex scenarios.
679 ID-mapped mounts are used to implement portable home directories in
680 .BR systemd-homed.service (8),
681 where they allow users to move their home directory
682 to an external storage device
683 and use it on multiple computers
684 where they are assigned different user IDs and group IDs.
685 This effectively makes it possible to
686 assign random user IDs and group IDs at login time.
688 Sharing files from the host with unprivileged containers.
689 This allows a user to avoid having to change ownership permanently through
692 ID mapping a container's root filesystem.
693 Users don't need to change ownership permanently through
695 Especially for large root filesystems, using
697 can be prohibitively expensive.
699 Sharing files between containers with non-overlapping ID mappings.
701 Implementing discretionary access (DAC) permission checking
702 for filesystems lacking a concept of ownership.
704 Efficiently changing ownership on a per-mount basis.
707 changing ownership of large sets of files is instantaneous with
709 This is especially useful when ownership of
710 an entire root filesystem of a virtual machine or container
711 is to be changed as mentioned above.
712 With ID-mapped mounts,
715 system call will be sufficient to change the ownership of all files.
717 Taking the current ownership into account.
718 ID mappings specify precisely
719 what a user or group ID is supposed to be mapped to.
720 This contrasts with the
722 system call which cannot by itself
723 take the current ownership of the files it changes into account.
724 It simply changes the ownership to the specified user ID and group ID.
726 Locally and temporarily restricted ownership changes.
727 ID-mapped mounts make it possible to change ownership locally,
728 restricting it to specific mounts,
729 and temporarily as the ownership changes only apply as long as the mount exists.
731 changing ownership via the
733 system call changes the ownership globally and permanently.
736 In order to allow for future extensibility,
738 requires the user-space application to specify the size of the
740 structure that it is passing.
741 By providing this information, it is possible for
743 to provide both forwards- and backwards-compatibility, with
745 acting as an implicit version number.
746 (Because new extension fields will always
747 be appended, the structure size will always increase.)
748 This extensibility design is very similar to other system calls such as
749 .BR perf_setattr (2),
750 .BR perf_event_open (2),
757 be the size of the structure as specified by the user-space application,
760 be the size of the structure which the kernel supports,
761 then there are three cases to consider:
767 then there is no version mismatch and
769 can be used verbatim.
775 then there are some extension fields that the kernel supports
776 which the user-space application is unaware of.
777 Because a zero value in any added extension field signifies a no-op,
778 the kernel treats all of the extension fields
779 not provided by the user-space application
780 as having zero values.
781 This provides backwards-compatibility.
787 then there are some extension fields which the user-space application is aware
788 of but which the kernel does not support.
789 Because any extension field must have its zero values signify a no-op,
790 the kernel can safely ignore the unsupported extension fields
791 if they are all zero.
792 If any unsupported extension fields are non-zero,
793 then \-1 is returned and
797 This provides forwards-compatibility.
799 Because the definition of
801 may change in the future
802 (with new fields being added when system headers are updated),
803 user-space applications should zero-fill
805 to ensure that recompiling the program with new headers will not result in
806 spurious errors at runtime.
807 The simplest way is to use a designated initializer:
811 struct mount_attr attr = {
812 .attr_set = MOUNT_ATTR_RDONLY,
813 .attr_clr = MOUNT_ATTR_NODEV
818 Alternatively, the structure can be zero-filled using
820 or similar functions:
824 struct mount_attr attr;
825 memset(&attr, 0, sizeof(attr));
826 attr.attr_set = MOUNT_ATTR_RDONLY;
827 attr.attr_clr = MOUNT_ATTR_NODEV;
831 A user-space application that wishes to determine which extensions the running
832 kernel supports can do so by conducting a binary search on
834 with a structure which has every byte nonzero
835 (to find the largest value which doesn't produce an error of
840 * This program allows the caller to create a new detached mount
841 * and set various properties on it.
847 #include <linux/mount.h>
848 #include <linux/types.h>
853 #include <sys/syscall.h>
857 mount_setattr(int dirfd, const char *path, unsigned int flags,
858 struct mount_attr *attr, size_t size)
860 return syscall(SYS_mount_setattr, dirfd, path, flags, attr, size);
864 open_tree(int dirfd, const char *filename, unsigned int flags)
866 return syscall(SYS_open_tree, dirfd, filename, flags);
870 move_mount(int from_dirfd, const char *from_pathname,
871 int to_dirfd, const char *to_pathname, unsigned int flags)
873 return syscall(SYS_move_mount, from_dirfd, from_pathname,
874 to_dirfd, to_pathname, flags);
877 static const struct option longopts[] = {
878 {"map\-mount", required_argument, NULL, 'a'},
879 {"recursive", no_argument, NULL, 'b'},
880 {"read\-only", no_argument, NULL, 'c'},
881 {"block\-setid", no_argument, NULL, 'd'},
882 {"block\-devices", no_argument, NULL, 'e'},
883 {"block\-exec", no_argument, NULL, 'f'},
884 {"no\-access\-time", no_argument, NULL, 'g'},
885 { NULL, 0, NULL, 0 },
888 #define exit_log(format, ...) do \e
890 fprintf(stderr, format, ##__VA_ARGS__); \e
891 exit(EXIT_FAILURE); \e
895 main(int argc, char *argv[])
897 struct mount_attr *attr = &(struct mount_attr){};
898 int fd_userns = \-EBADF;
899 bool recursive = false;
903 while ((ret = getopt_long_only(argc, argv, "",
904 longopts, &index)) != \-1) {
907 fd_userns = open(optarg, O_RDONLY | O_CLOEXEC);
908 if (fd_userns == \-1)
909 exit_log("%m \- Failed top open %s\en", optarg);
915 attr\->attr_set |= MOUNT_ATTR_RDONLY;
918 attr\->attr_set |= MOUNT_ATTR_NOSUID;
921 attr\->attr_set |= MOUNT_ATTR_NODEV;
924 attr\->attr_set |= MOUNT_ATTR_NOEXEC;
927 attr\->attr_set |= MOUNT_ATTR_NOATIME;
928 attr\->attr_clr |= MOUNT_ATTR__ATIME;
931 exit_log("Invalid argument specified");
935 if ((argc \- optind) < 2)
936 exit_log("Missing source or target mount point\en");
938 const char *source = argv[optind];
939 const char *target = argv[optind + 1];
941 int fd_tree = open_tree(\-EBADF, source,
942 OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC |
943 AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0));
945 exit_log("%m \- Failed to open %s\en", source);
947 if (fd_userns >= 0) {
948 attr\->attr_set |= MOUNT_ATTR_IDMAP;
949 attr\->userns_fd = fd_userns;
952 ret = mount_setattr(fd_tree, "",
953 AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0),
954 attr, sizeof(struct mount_attr));
956 exit_log("%m \- Failed to change mount attributes\en");
960 ret = move_mount(fd_tree, "", \-EBADF, target,
961 MOVE_MOUNT_F_EMPTY_PATH);
963 exit_log("%m \- Failed to attach mount to %s\en", target);
977 .BR mount_namespaces (7),
978 .BR capabilities (7),
979 .BR user_namespaces (7),