Add fix-uninit-bitmap-blocks patch
[ext4-patch-queue.git] / ocfs2-fiemap.patch
blobd9360c5efd4a511b3e6af598e6efd247acdd4236
1 ocfs2: fiemap support
3 From: Mark Fasheh <mfasheh@suse.com>
5 Plug ocfs2 into ->fiemap. Some portions of ocfs2_get_clusters() had to be
6 refactored so that the extent cache can be skipped in favor of going
7 directly to the on-disk records. This makes it easier for us to determine
8 which extent is the last one in the btree. Also, I'm not sure we want to be
9 caching fiemap lookups anyway as they're not directly related to data
10 read/write.
12 Signed-off-by: Mark Fasheh <mfasheh@suse.com>
13 ---
14 fs/ocfs2/alloc.c | 9 --
15 fs/ocfs2/alloc.h | 9 ++
16 fs/ocfs2/extent_map.c | 346 +++++++++++++++++++++++++++++++++++++++++--------
17 fs/ocfs2/extent_map.h | 3 +
18 fs/ocfs2/file.c | 1 +
19 5 files changed, 306 insertions(+), 62 deletions(-)
21 diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
22 index 10bfb46..29ff57e 100644
23 --- a/fs/ocfs2/alloc.c
24 +++ b/fs/ocfs2/alloc.c
25 @@ -990,15 +990,6 @@ out:
29 - * This is only valid for leaf nodes, which are the only ones that can
30 - * have empty extents anyway.
31 - */
32 -static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
34 - return !rec->e_leaf_clusters;
37 -/*
38 * This function will discard the rightmost extent record.
40 static void ocfs2_shift_records_right(struct ocfs2_extent_list *el)
41 diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
42 index 42ff94b..60cd3d5 100644
43 --- a/fs/ocfs2/alloc.h
44 +++ b/fs/ocfs2/alloc.h
45 @@ -146,4 +146,13 @@ static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el,
46 return le16_to_cpu(rec->e_leaf_clusters);
49 +/*
50 + * This is only valid for leaf nodes, which are the only ones that can
51 + * have empty extents anyway.
52 + */
53 +static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
55 + return !rec->e_leaf_clusters;
58 #endif /* OCFS2_ALLOC_H */
59 diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
60 index c58668a..aed268e 100644
61 --- a/fs/ocfs2/extent_map.c
62 +++ b/fs/ocfs2/extent_map.c
63 @@ -25,6 +25,7 @@
64 #include <linux/fs.h>
65 #include <linux/init.h>
66 #include <linux/types.h>
67 +#include <linux/fiemap.h>
69 #define MLOG_MASK_PREFIX ML_EXTENT_MAP
70 #include <cluster/masklog.h>
71 @@ -32,6 +33,7 @@
72 #include "ocfs2.h"
74 #include "alloc.h"
75 +#include "dlmglue.h"
76 #include "extent_map.h"
77 #include "inode.h"
78 #include "super.h"
79 @@ -282,6 +284,51 @@ out:
80 kfree(new_emi);
83 +static int ocfs2_last_eb_is_empty(struct inode *inode,
84 + struct ocfs2_dinode *di)
86 + int ret, next_free;
87 + u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
88 + struct buffer_head *eb_bh = NULL;
89 + struct ocfs2_extent_block *eb;
90 + struct ocfs2_extent_list *el;
92 + ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), last_eb_blk,
93 + &eb_bh, OCFS2_BH_CACHED, inode);
94 + if (ret) {
95 + mlog_errno(ret);
96 + goto out;
97 + }
99 + eb = (struct ocfs2_extent_block *) eb_bh->b_data;
100 + el = &eb->h_list;
102 + if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
103 + ret = -EROFS;
104 + OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
105 + goto out;
108 + if (el->l_tree_depth) {
109 + ocfs2_error(inode->i_sb,
110 + "Inode %lu has non zero tree depth in "
111 + "leaf block %llu\n", inode->i_ino,
112 + (unsigned long long)eb_bh->b_blocknr);
113 + ret = -EROFS;
114 + goto out;
117 + next_free = le16_to_cpu(el->l_next_free_rec);
119 + if (next_free == 0 ||
120 + (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
121 + ret = 1;
123 +out:
124 + brelse(eb_bh);
125 + return ret;
129 * Return the 1st index within el which contains an extent start
130 * larger than v_cluster.
131 @@ -373,42 +420,28 @@ out:
132 return ret;
135 -int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
136 - u32 *p_cluster, u32 *num_clusters,
137 - unsigned int *extent_flags)
138 +static int ocfs2_get_clusters_nocache(struct inode *inode,
139 + struct buffer_head *di_bh,
140 + u32 v_cluster, unsigned int *hole_len,
141 + struct ocfs2_extent_rec *ret_rec,
142 + unsigned int *is_last)
144 - int ret, i;
145 - unsigned int flags = 0;
146 - struct buffer_head *di_bh = NULL;
147 - struct buffer_head *eb_bh = NULL;
148 + int i, ret, tree_height, len;
149 struct ocfs2_dinode *di;
150 - struct ocfs2_extent_block *eb;
151 + struct ocfs2_extent_block *uninitialized_var(eb);
152 struct ocfs2_extent_list *el;
153 struct ocfs2_extent_rec *rec;
154 - u32 coff;
156 - if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
157 - ret = -ERANGE;
158 - mlog_errno(ret);
159 - goto out;
162 - ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
163 - num_clusters, extent_flags);
164 - if (ret == 0)
165 - goto out;
166 + struct buffer_head *eb_bh = NULL;
168 - ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
169 - &di_bh, OCFS2_BH_CACHED, inode);
170 - if (ret) {
171 - mlog_errno(ret);
172 - goto out;
174 + memset(ret_rec, 0, sizeof(*ret_rec));
175 + if (is_last)
176 + *is_last = 0;
178 di = (struct ocfs2_dinode *) di_bh->b_data;
179 el = &di->id2.i_list;
180 + tree_height = le16_to_cpu(el->l_tree_depth);
182 - if (el->l_tree_depth) {
183 + if (tree_height > 0) {
184 ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh);
185 if (ret) {
186 mlog_errno(ret);
187 @@ -431,46 +464,143 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
188 i = ocfs2_search_extent_list(el, v_cluster);
189 if (i == -1) {
191 - * A hole was found. Return some canned values that
192 - * callers can key on. If asked for, num_clusters will
193 - * be populated with the size of the hole.
194 + * Holes can be larger than the maximum size of an
195 + * extent, so we return their lengths in a seperate
196 + * field.
198 - *p_cluster = 0;
199 - if (num_clusters) {
200 + if (hole_len) {
201 ret = ocfs2_figure_hole_clusters(inode, el, eb_bh,
202 - v_cluster,
203 - num_clusters);
204 + v_cluster, &len);
205 if (ret) {
206 mlog_errno(ret);
207 goto out;
210 + *hole_len = len;
212 - } else {
213 - rec = &el->l_recs[i];
214 + goto out_hole;
217 - BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
218 + rec = &el->l_recs[i];
220 - if (!rec->e_blkno) {
221 - ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
222 - "record (%u, %u, 0)", inode->i_ino,
223 - le32_to_cpu(rec->e_cpos),
224 - ocfs2_rec_clusters(el, rec));
225 - ret = -EROFS;
226 - goto out;
227 + BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
229 + if (!rec->e_blkno) {
230 + ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
231 + "record (%u, %u, 0)", inode->i_ino,
232 + le32_to_cpu(rec->e_cpos),
233 + ocfs2_rec_clusters(el, rec));
234 + ret = -EROFS;
235 + goto out;
238 + *ret_rec = *rec;
240 + /*
241 + * Checking for last extent is potentially expensive - we
242 + * might have to look at the next leaf over to see if it's
243 + * empty.
245 + * The first two checks are to see whether the caller even
246 + * cares for this information, and if the extent is at least
247 + * the last in it's list.
249 + * If those hold true, then the extent is last if any of the
250 + * additional conditions hold true:
251 + * - Extent list is in-inode
252 + * - Extent list is right-most
253 + * - Extent list is 2nd to rightmost, with empty right-most
254 + */
255 + if (is_last) {
256 + if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
257 + if (tree_height == 0)
258 + *is_last = 1;
259 + else if (eb->h_blkno == di->i_last_eb_blk)
260 + *is_last = 1;
261 + else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
262 + ret = ocfs2_last_eb_is_empty(inode, di);
263 + if (ret < 0) {
264 + mlog_errno(ret);
265 + goto out;
267 + if (ret == 1)
268 + *is_last = 1;
273 +out_hole:
274 + ret = 0;
275 +out:
276 + brelse(eb_bh);
277 + return ret;
280 +static void ocfs2_relative_extent_offsets(struct super_block *sb,
281 + u32 v_cluster,
282 + struct ocfs2_extent_rec *rec,
283 + u32 *p_cluster, u32 *num_clusters)
286 + u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
288 + *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
289 + *p_cluster = *p_cluster + coff;
291 + if (num_clusters)
292 + *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
295 +int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
296 + u32 *p_cluster, u32 *num_clusters,
297 + unsigned int *extent_flags)
299 + int ret;
300 + unsigned int uninitialized_var(hole_len), flags = 0;
301 + struct buffer_head *di_bh = NULL;
302 + struct ocfs2_extent_rec rec;
304 - coff = v_cluster - le32_to_cpu(rec->e_cpos);
305 + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
306 + ret = -ERANGE;
307 + mlog_errno(ret);
308 + goto out;
311 - *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
312 - le64_to_cpu(rec->e_blkno));
313 - *p_cluster = *p_cluster + coff;
314 + ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
315 + num_clusters, extent_flags);
316 + if (ret == 0)
317 + goto out;
319 - if (num_clusters)
320 - *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
321 + ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
322 + &di_bh, OCFS2_BH_CACHED, inode);
323 + if (ret) {
324 + mlog_errno(ret);
325 + goto out;
328 - flags = rec->e_flags;
329 + ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
330 + &rec, NULL);
331 + if (ret) {
332 + mlog_errno(ret);
333 + goto out;
336 - ocfs2_extent_map_insert_rec(inode, rec);
337 + if (rec.e_blkno == 0ULL) {
338 + /*
339 + * A hole was found. Return some canned values that
340 + * callers can key on. If asked for, num_clusters will
341 + * be populated with the size of the hole.
342 + */
343 + *p_cluster = 0;
344 + if (num_clusters) {
345 + *num_clusters = hole_len;
347 + } else {
348 + ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
349 + p_cluster, num_clusters);
350 + flags = rec.e_flags;
352 + ocfs2_extent_map_insert_rec(inode, &rec);
355 if (extent_flags)
356 @@ -478,7 +608,6 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
358 out:
359 brelse(di_bh);
360 - brelse(eb_bh);
361 return ret;
364 @@ -521,3 +650,114 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
365 out:
366 return ret;
369 +static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
370 + struct fiemap_extent_info *fieinfo,
371 + u64 map_start)
373 + int ret;
374 + unsigned int id_count;
375 + struct ocfs2_dinode *di;
376 + u64 phys;
377 + u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
378 + struct ocfs2_inode_info *oi = OCFS2_I(inode);
380 + di = (struct ocfs2_dinode *)di_bh->b_data;
381 + id_count = le16_to_cpu(di->id2.i_data.id_count);
383 + if (map_start < id_count) {
384 + phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
385 + phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data);
387 + ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
388 + flags);
389 + if (ret < 0)
390 + return ret;
393 + return 0;
396 +#define OCFS2_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
398 +int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
399 + u64 map_start, u64 map_len)
401 + int ret, is_last;
402 + u32 mapping_end, cpos;
403 + unsigned int hole_size;
404 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
405 + u64 len_bytes, phys_bytes, virt_bytes;
406 + struct buffer_head *di_bh = NULL;
407 + struct ocfs2_extent_rec rec;
409 + ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
410 + if (ret)
411 + return ret;
413 + ret = ocfs2_inode_lock(inode, &di_bh, 0);
414 + if (ret) {
415 + mlog_errno(ret);
416 + goto out;
419 + down_read(&OCFS2_I(inode)->ip_alloc_sem);
421 + /*
422 + * Handle inline-data separately.
423 + */
424 + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
425 + ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
426 + goto out_unlock;
429 + cpos = map_start >> osb->s_clustersize_bits;
430 + mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
431 + map_start + map_len);
432 + mapping_end -= cpos;
433 + is_last = 0;
434 + while (cpos < mapping_end && !is_last) {
435 + u32 fe_flags;
437 + ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
438 + &hole_size, &rec, &is_last);
439 + if (ret) {
440 + mlog_errno(ret);
441 + goto out;
444 + if (rec.e_blkno == 0ULL) {
445 + cpos += hole_size;
446 + continue;
449 + fe_flags = 0;
450 + if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
451 + fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
452 + if (is_last)
453 + fe_flags |= FIEMAP_EXTENT_LAST;
454 + len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
455 + phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
456 + virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
458 + ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
459 + len_bytes, fe_flags);
460 + if (ret)
461 + break;
463 + cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
466 + if (ret > 0)
467 + ret = 0;
469 +out_unlock:
470 + brelse(di_bh);
472 + up_read(&OCFS2_I(inode)->ip_alloc_sem);
474 + ocfs2_inode_unlock(inode, 0);
475 +out:
477 + return ret;
479 diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
480 index de91e3e..1b97490 100644
481 --- a/fs/ocfs2/extent_map.h
482 +++ b/fs/ocfs2/extent_map.h
483 @@ -50,4 +50,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster,
484 int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
485 u64 *ret_count, unsigned int *extent_flags);
487 +int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
488 + u64 map_start, u64 map_len);
490 #endif /* _EXTENT_MAP_H */
491 diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
492 index ec2ed15..ed38796 100644
493 --- a/fs/ocfs2/file.c
494 +++ b/fs/ocfs2/file.c
495 @@ -2228,6 +2228,7 @@ const struct inode_operations ocfs2_file_iops = {
496 .getattr = ocfs2_getattr,
497 .permission = ocfs2_permission,
498 .fallocate = ocfs2_fallocate,
499 + .fiemap = ocfs2_fiemap,
502 const struct inode_operations ocfs2_special_file_iops = {
504 1.5.6.1.205.ge2c7.dirty