build: ensure sys/select.h is included
[coreutils.git] / src / extent-scan.c
blob2c9db4405a9f14f98d19396ef8dec80886541b2c
1 /* extent-scan.c -- core functions for scanning extents
2 Copyright (C) 2010-2019 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>.
17 Written by Jie Liu (jeff.liu@oracle.com). */
19 #include <config.h>
20 #include <sys/types.h>
21 #include <sys/ioctl.h>
22 #include <sys/utsname.h>
23 #include <assert.h>
25 #include "system.h"
26 #include "extent-scan.h"
27 #include "fiemap.h"
28 #include "xstrtol.h"
31 /* Work around Linux kernel issues on BTRFS and EXT4. */
32 static bool
33 extent_need_sync (void)
35 /* For now always return true, to be on the safe side.
36 If/when FIEMAP semantics are well defined (before SEEK_HOLE support
37 is usable) and kernels implementing them are in use, we may relax
38 this once again. */
39 return true;
41 #if FIEMAP_BEHAVIOR_IS_DEFINED_AND_USABLE
42 static int need_sync = -1;
44 if (need_sync == -1)
46 struct utsname name;
47 need_sync = 0; /* No workaround by default. */
49 # ifdef __linux__
50 if (uname (&name) != -1 && STRNCMP_LIT (name.release, "2.6.") == 0)
52 unsigned long val;
53 if (xstrtoul (name.release + 4, NULL, 10, &val, NULL) == LONGINT_OK)
55 if (val < 39)
56 need_sync = 1;
59 # endif
62 return need_sync;
63 #endif
66 /* Allocate space for struct extent_scan, initialize the entries if
67 necessary and return it as the input argument of extent_scan_read(). */
68 extern void
69 extent_scan_init (int src_fd, struct extent_scan *scan)
71 scan->fd = src_fd;
72 scan->ei_count = 0;
73 scan->ext_info = NULL;
74 scan->scan_start = 0;
75 scan->initial_scan_failed = false;
76 scan->hit_final_extent = false;
77 scan->fm_flags = extent_need_sync () ? FIEMAP_FLAG_SYNC : 0;
80 #ifdef __linux__
81 # ifndef FS_IOC_FIEMAP
82 # define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap)
83 # endif
84 /* Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to
85 obtain a map of file extents excluding holes. */
86 extern bool
87 extent_scan_read (struct extent_scan *scan)
89 unsigned int si = 0;
90 struct extent_info *last_ei = scan->ext_info;
92 while (true)
94 union { struct fiemap f; char c[4096]; } fiemap_buf;
95 struct fiemap *fiemap = &fiemap_buf.f;
96 struct fiemap_extent *fm_extents = &fiemap->fm_extents[0];
97 enum { count = (sizeof fiemap_buf - sizeof *fiemap)/sizeof *fm_extents };
98 verify (count > 1);
100 /* This is required at least to initialize fiemap->fm_start,
101 but also serves (in mid 2010) to appease valgrind, which
102 appears not to know the semantics of the FIEMAP ioctl. */
103 memset (&fiemap_buf, 0, sizeof fiemap_buf);
105 fiemap->fm_start = scan->scan_start;
106 fiemap->fm_flags = scan->fm_flags;
107 fiemap->fm_extent_count = count;
108 fiemap->fm_length = FIEMAP_MAX_OFFSET - scan->scan_start;
110 /* Fall back to the standard copy if call ioctl(2) failed for
111 the first time. */
112 if (ioctl (scan->fd, FS_IOC_FIEMAP, fiemap) < 0)
114 if (scan->scan_start == 0)
115 scan->initial_scan_failed = true;
116 return false;
119 /* If 0 extents are returned, then no more scans are needed. */
120 if (fiemap->fm_mapped_extents == 0)
122 scan->hit_final_extent = true;
123 return scan->scan_start != 0;
126 assert (scan->ei_count <= SIZE_MAX - fiemap->fm_mapped_extents);
127 scan->ei_count += fiemap->fm_mapped_extents;
129 /* last_ei points into a buffer that may be freed via xnrealloc.
130 Record its offset and adjust after allocation. */
131 size_t prev_idx = last_ei - scan->ext_info;
132 scan->ext_info = xnrealloc (scan->ext_info, scan->ei_count,
133 sizeof (struct extent_info));
134 last_ei = scan->ext_info + prev_idx;
137 unsigned int i = 0;
138 for (i = 0; i < fiemap->fm_mapped_extents; i++)
140 assert (fm_extents[i].fe_logical
141 <= OFF_T_MAX - fm_extents[i].fe_length);
143 verify (sizeof last_ei->ext_flags >= sizeof fm_extents->fe_flags);
145 if (si && last_ei->ext_flags
146 == (fm_extents[i].fe_flags & ~FIEMAP_EXTENT_LAST)
147 && (last_ei->ext_logical + last_ei->ext_length
148 == fm_extents[i].fe_logical))
150 /* Merge previous with last. */
151 last_ei->ext_length += fm_extents[i].fe_length;
152 /* Copy flags in case different. */
153 last_ei->ext_flags = fm_extents[i].fe_flags;
155 else if ((si == 0 && scan->scan_start > fm_extents[i].fe_logical)
156 || (si && (last_ei->ext_logical + last_ei->ext_length
157 > fm_extents[i].fe_logical)))
159 /* BTRFS before 2.6.38 could return overlapping extents
160 for sparse files. We adjust the returned extents
161 rather than failing, as otherwise it would be inefficient
162 to detect this on the initial scan. */
163 uint64_t new_logical;
164 uint64_t length_adjust;
165 if (si == 0)
166 new_logical = scan->scan_start;
167 else
169 /* We could return here if scan->scan_start == 0
170 but don't so as to minimize special cases. */
171 new_logical = last_ei->ext_logical + last_ei->ext_length;
173 length_adjust = new_logical - fm_extents[i].fe_logical;
174 /* If an extent is contained within the previous one, fail. */
175 if (length_adjust < fm_extents[i].fe_length)
177 if (scan->scan_start == 0)
178 scan->initial_scan_failed = true;
179 return false;
181 fm_extents[i].fe_logical = new_logical;
182 fm_extents[i].fe_length -= length_adjust;
183 /* Process the adjusted extent again. */
184 i--;
185 continue;
187 else
189 last_ei = scan->ext_info + si;
190 last_ei->ext_logical = fm_extents[i].fe_logical;
191 last_ei->ext_length = fm_extents[i].fe_length;
192 last_ei->ext_flags = fm_extents[i].fe_flags;
193 si++;
197 if (last_ei->ext_flags & FIEMAP_EXTENT_LAST)
198 scan->hit_final_extent = true;
200 /* If we have enough extents, discard the last as it might
201 be merged with one from the next scan. */
202 if (si > count && !scan->hit_final_extent)
203 last_ei = scan->ext_info + --si - 1;
205 /* We don't bother reallocating any trailing slots. */
206 scan->ei_count = si;
208 if (scan->hit_final_extent)
209 break;
210 else
211 scan->scan_start = last_ei->ext_logical + last_ei->ext_length;
213 if (si >= count)
214 break;
217 return true;
219 #else
220 extern bool
221 extent_scan_read (struct extent_scan *scan _GL_UNUSED)
223 scan->initial_scan_failed = true;
224 errno = ENOTSUP;
225 return false;
227 #endif