3 From: Mark Fasheh <mfasheh@suse.com>
5 Plug ocfs2 into ->fiemap. Some portions of ocfs2_get_clusters() had to be
6 refactored so that the extent cache can be skipped in favor of going
7 directly to the on-disk records. This makes it easier for us to determine
8 which extent is the last one in the btree. Also, I'm not sure we want to be
9 caching fiemap lookups anyway as they're not directly related to data
12 Signed-off-by: Mark Fasheh <mfasheh@suse.com>
14 fs/ocfs2/alloc.c | 9 --
15 fs/ocfs2/alloc.h | 9 ++
16 fs/ocfs2/extent_map.c | 346 +++++++++++++++++++++++++++++++++++++++++--------
17 fs/ocfs2/extent_map.h | 3 +
19 5 files changed, 306 insertions(+), 62 deletions(-)
21 diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
22 index 10bfb46..29ff57e 100644
23 --- a/fs/ocfs2/alloc.c
24 +++ b/fs/ocfs2/alloc.c
25 @@ -990,15 +990,6 @@ out:
29 - * This is only valid for leaf nodes, which are the only ones that can
30 - * have empty extents anyway.
32 -static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
34 - return !rec->e_leaf_clusters;
38 * This function will discard the rightmost extent record.
40 static void ocfs2_shift_records_right(struct ocfs2_extent_list *el)
41 diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
42 index 42ff94b..60cd3d5 100644
43 --- a/fs/ocfs2/alloc.h
44 +++ b/fs/ocfs2/alloc.h
45 @@ -146,4 +146,13 @@ static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el,
46 return le16_to_cpu(rec->e_leaf_clusters);
50 + * This is only valid for leaf nodes, which are the only ones that can
51 + * have empty extents anyway.
53 +static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
55 + return !rec->e_leaf_clusters;
58 #endif /* OCFS2_ALLOC_H */
59 diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
60 index c58668a..aed268e 100644
61 --- a/fs/ocfs2/extent_map.c
62 +++ b/fs/ocfs2/extent_map.c
65 #include <linux/init.h>
66 #include <linux/types.h>
67 +#include <linux/fiemap.h>
69 #define MLOG_MASK_PREFIX ML_EXTENT_MAP
70 #include <cluster/masklog.h>
76 #include "extent_map.h"
79 @@ -282,6 +284,51 @@ out:
83 +static int ocfs2_last_eb_is_empty(struct inode *inode,
84 + struct ocfs2_dinode *di)
87 + u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
88 + struct buffer_head *eb_bh = NULL;
89 + struct ocfs2_extent_block *eb;
90 + struct ocfs2_extent_list *el;
92 + ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), last_eb_blk,
93 + &eb_bh, OCFS2_BH_CACHED, inode);
99 + eb = (struct ocfs2_extent_block *) eb_bh->b_data;
102 + if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
104 + OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
108 + if (el->l_tree_depth) {
109 + ocfs2_error(inode->i_sb,
110 + "Inode %lu has non zero tree depth in "
111 + "leaf block %llu\n", inode->i_ino,
112 + (unsigned long long)eb_bh->b_blocknr);
117 + next_free = le16_to_cpu(el->l_next_free_rec);
119 + if (next_free == 0 ||
120 + (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
129 * Return the 1st index within el which contains an extent start
130 * larger than v_cluster.
131 @@ -373,42 +420,28 @@ out:
135 -int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
136 - u32 *p_cluster, u32 *num_clusters,
137 - unsigned int *extent_flags)
138 +static int ocfs2_get_clusters_nocache(struct inode *inode,
139 + struct buffer_head *di_bh,
140 + u32 v_cluster, unsigned int *hole_len,
141 + struct ocfs2_extent_rec *ret_rec,
142 + unsigned int *is_last)
145 - unsigned int flags = 0;
146 - struct buffer_head *di_bh = NULL;
147 - struct buffer_head *eb_bh = NULL;
148 + int i, ret, tree_height, len;
149 struct ocfs2_dinode *di;
150 - struct ocfs2_extent_block *eb;
151 + struct ocfs2_extent_block *uninitialized_var(eb);
152 struct ocfs2_extent_list *el;
153 struct ocfs2_extent_rec *rec;
156 - if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
162 - ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
163 - num_clusters, extent_flags);
166 + struct buffer_head *eb_bh = NULL;
168 - ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
169 - &di_bh, OCFS2_BH_CACHED, inode);
174 + memset(ret_rec, 0, sizeof(*ret_rec));
178 di = (struct ocfs2_dinode *) di_bh->b_data;
179 el = &di->id2.i_list;
180 + tree_height = le16_to_cpu(el->l_tree_depth);
182 - if (el->l_tree_depth) {
183 + if (tree_height > 0) {
184 ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh);
187 @@ -431,46 +464,143 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
188 i = ocfs2_search_extent_list(el, v_cluster);
191 - * A hole was found. Return some canned values that
192 - * callers can key on. If asked for, num_clusters will
193 - * be populated with the size of the hole.
194 + * Holes can be larger than the maximum size of an
195 + * extent, so we return their lengths in a seperate
199 - if (num_clusters) {
201 ret = ocfs2_figure_hole_clusters(inode, el, eb_bh,
213 - rec = &el->l_recs[i];
217 - BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
218 + rec = &el->l_recs[i];
220 - if (!rec->e_blkno) {
221 - ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
222 - "record (%u, %u, 0)", inode->i_ino,
223 - le32_to_cpu(rec->e_cpos),
224 - ocfs2_rec_clusters(el, rec));
227 + BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
229 + if (!rec->e_blkno) {
230 + ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
231 + "record (%u, %u, 0)", inode->i_ino,
232 + le32_to_cpu(rec->e_cpos),
233 + ocfs2_rec_clusters(el, rec));
241 + * Checking for last extent is potentially expensive - we
242 + * might have to look at the next leaf over to see if it's
245 + * The first two checks are to see whether the caller even
246 + * cares for this information, and if the extent is at least
247 + * the last in it's list.
249 + * If those hold true, then the extent is last if any of the
250 + * additional conditions hold true:
251 + * - Extent list is in-inode
252 + * - Extent list is right-most
253 + * - Extent list is 2nd to rightmost, with empty right-most
256 + if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
257 + if (tree_height == 0)
259 + else if (eb->h_blkno == di->i_last_eb_blk)
261 + else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
262 + ret = ocfs2_last_eb_is_empty(inode, di);
280 +static void ocfs2_relative_extent_offsets(struct super_block *sb,
282 + struct ocfs2_extent_rec *rec,
283 + u32 *p_cluster, u32 *num_clusters)
286 + u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
288 + *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
289 + *p_cluster = *p_cluster + coff;
292 + *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
295 +int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
296 + u32 *p_cluster, u32 *num_clusters,
297 + unsigned int *extent_flags)
300 + unsigned int uninitialized_var(hole_len), flags = 0;
301 + struct buffer_head *di_bh = NULL;
302 + struct ocfs2_extent_rec rec;
304 - coff = v_cluster - le32_to_cpu(rec->e_cpos);
305 + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
311 - *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
312 - le64_to_cpu(rec->e_blkno));
313 - *p_cluster = *p_cluster + coff;
314 + ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
315 + num_clusters, extent_flags);
320 - *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
321 + ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
322 + &di_bh, OCFS2_BH_CACHED, inode);
328 - flags = rec->e_flags;
329 + ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
336 - ocfs2_extent_map_insert_rec(inode, rec);
337 + if (rec.e_blkno == 0ULL) {
339 + * A hole was found. Return some canned values that
340 + * callers can key on. If asked for, num_clusters will
341 + * be populated with the size of the hole.
344 + if (num_clusters) {
345 + *num_clusters = hole_len;
348 + ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
349 + p_cluster, num_clusters);
350 + flags = rec.e_flags;
352 + ocfs2_extent_map_insert_rec(inode, &rec);
356 @@ -478,7 +608,6 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
364 @@ -521,3 +650,114 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
369 +static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
370 + struct fiemap_extent_info *fieinfo,
374 + unsigned int id_count;
375 + struct ocfs2_dinode *di;
377 + u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
378 + struct ocfs2_inode_info *oi = OCFS2_I(inode);
380 + di = (struct ocfs2_dinode *)di_bh->b_data;
381 + id_count = le16_to_cpu(di->id2.i_data.id_count);
383 + if (map_start < id_count) {
384 + phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
385 + phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data);
387 + ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
396 +#define OCFS2_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
398 +int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
399 + u64 map_start, u64 map_len)
402 + u32 mapping_end, cpos;
403 + unsigned int hole_size;
404 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
405 + u64 len_bytes, phys_bytes, virt_bytes;
406 + struct buffer_head *di_bh = NULL;
407 + struct ocfs2_extent_rec rec;
409 + ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
413 + ret = ocfs2_inode_lock(inode, &di_bh, 0);
419 + down_read(&OCFS2_I(inode)->ip_alloc_sem);
422 + * Handle inline-data separately.
424 + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
425 + ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
429 + cpos = map_start >> osb->s_clustersize_bits;
430 + mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
431 + map_start + map_len);
432 + mapping_end -= cpos;
434 + while (cpos < mapping_end && !is_last) {
437 + ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
438 + &hole_size, &rec, &is_last);
444 + if (rec.e_blkno == 0ULL) {
450 + if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
451 + fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
453 + fe_flags |= FIEMAP_EXTENT_LAST;
454 + len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
455 + phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
456 + virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
458 + ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
459 + len_bytes, fe_flags);
463 + cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
472 + up_read(&OCFS2_I(inode)->ip_alloc_sem);
474 + ocfs2_inode_unlock(inode, 0);
479 diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
480 index de91e3e..1b97490 100644
481 --- a/fs/ocfs2/extent_map.h
482 +++ b/fs/ocfs2/extent_map.h
483 @@ -50,4 +50,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster,
484 int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
485 u64 *ret_count, unsigned int *extent_flags);
487 +int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
488 + u64 map_start, u64 map_len);
490 #endif /* _EXTENT_MAP_H */
491 diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
492 index ec2ed15..ed38796 100644
493 --- a/fs/ocfs2/file.c
494 +++ b/fs/ocfs2/file.c
495 @@ -2228,6 +2228,7 @@ const struct inode_operations ocfs2_file_iops = {
496 .getattr = ocfs2_getattr,
497 .permission = ocfs2_permission,
498 .fallocate = ocfs2_fallocate,
499 + .fiemap = ocfs2_fiemap,
502 const struct inode_operations ocfs2_special_file_iops = {
504 1.5.6.1.205.ge2c7.dirty