1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
6 * Block/Cluster mapping functions
8 * Copyright (C) 2004 Oracle. All rights reserved.
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License, version 2, as published by the Free Software Foundation.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
19 * You should have received a copy of the GNU General Public
20 * License along with this program; if not, write to the
21 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 * Boston, MA 021110-1307, USA.
26 #include <linux/init.h>
27 #include <linux/types.h>
29 #define MLOG_MASK_PREFIX ML_EXTENT_MAP
30 #include <cluster/masklog.h>
35 #include "extent_map.h"
39 #include "buffer_head_io.h"
42 * The extent caching implementation is intentionally trivial.
44 * We only cache a small number of extents stored directly on the
45 * inode, so linear order operations are acceptable. If we ever want
46 * to increase the size of the extent map, then these algorithms must
50 void ocfs2_extent_map_init(struct inode
*inode
)
52 struct ocfs2_inode_info
*oi
= OCFS2_I(inode
);
54 oi
->ip_extent_map
.em_num_items
= 0;
55 INIT_LIST_HEAD(&oi
->ip_extent_map
.em_list
);
58 static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map
*em
,
60 struct ocfs2_extent_map_item
**ret_emi
)
63 struct ocfs2_extent_map_item
*emi
;
67 list_for_each_entry(emi
, &em
->em_list
, ei_list
) {
68 range
= emi
->ei_cpos
+ emi
->ei_clusters
;
70 if (cpos
>= emi
->ei_cpos
&& cpos
< range
) {
71 list_move(&emi
->ei_list
, &em
->em_list
);
79 static int ocfs2_extent_map_lookup(struct inode
*inode
, unsigned int cpos
,
80 unsigned int *phys
, unsigned int *len
,
84 struct ocfs2_inode_info
*oi
= OCFS2_I(inode
);
85 struct ocfs2_extent_map_item
*emi
;
87 spin_lock(&oi
->ip_lock
);
89 __ocfs2_extent_map_lookup(&oi
->ip_extent_map
, cpos
, &emi
);
91 coff
= cpos
- emi
->ei_cpos
;
92 *phys
= emi
->ei_phys
+ coff
;
94 *len
= emi
->ei_clusters
- coff
;
96 *flags
= emi
->ei_flags
;
99 spin_unlock(&oi
->ip_lock
);
108 * Forget about all clusters equal to or greater than cpos.
110 void ocfs2_extent_map_trunc(struct inode
*inode
, unsigned int cpos
)
112 struct list_head
*p
, *n
;
113 struct ocfs2_extent_map_item
*emi
;
114 struct ocfs2_inode_info
*oi
= OCFS2_I(inode
);
115 struct ocfs2_extent_map
*em
= &oi
->ip_extent_map
;
119 spin_lock(&oi
->ip_lock
);
120 list_for_each_safe(p
, n
, &em
->em_list
) {
121 emi
= list_entry(p
, struct ocfs2_extent_map_item
, ei_list
);
123 if (emi
->ei_cpos
>= cpos
) {
124 /* Full truncate of this record. */
125 list_move(&emi
->ei_list
, &tmp_list
);
126 BUG_ON(em
->em_num_items
== 0);
131 range
= emi
->ei_cpos
+ emi
->ei_clusters
;
133 /* Partial truncate */
134 emi
->ei_clusters
= cpos
- emi
->ei_cpos
;
137 spin_unlock(&oi
->ip_lock
);
139 list_for_each_safe(p
, n
, &tmp_list
) {
140 emi
= list_entry(p
, struct ocfs2_extent_map_item
, ei_list
);
141 list_del(&emi
->ei_list
);
147 * Is any part of emi2 contained within emi1
149 static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item
*emi1
,
150 struct ocfs2_extent_map_item
*emi2
)
152 unsigned int range1
, range2
;
155 * Check if logical start of emi2 is inside emi1
157 range1
= emi1
->ei_cpos
+ emi1
->ei_clusters
;
158 if (emi2
->ei_cpos
>= emi1
->ei_cpos
&& emi2
->ei_cpos
< range1
)
162 * Check if logical end of emi2 is inside emi1
164 range2
= emi2
->ei_cpos
+ emi2
->ei_clusters
;
165 if (range2
> emi1
->ei_cpos
&& range2
<= range1
)
171 static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item
*dest
,
172 struct ocfs2_extent_map_item
*src
)
174 dest
->ei_cpos
= src
->ei_cpos
;
175 dest
->ei_phys
= src
->ei_phys
;
176 dest
->ei_clusters
= src
->ei_clusters
;
177 dest
->ei_flags
= src
->ei_flags
;
181 * Try to merge emi with ins. Returns 1 if merge succeeds, zero
184 static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item
*emi
,
185 struct ocfs2_extent_map_item
*ins
)
188 * Handle contiguousness
190 if (ins
->ei_phys
== (emi
->ei_phys
+ emi
->ei_clusters
) &&
191 ins
->ei_cpos
== (emi
->ei_cpos
+ emi
->ei_clusters
) &&
192 ins
->ei_flags
== emi
->ei_flags
) {
193 emi
->ei_clusters
+= ins
->ei_clusters
;
195 } else if ((ins
->ei_phys
+ ins
->ei_clusters
) == emi
->ei_phys
&&
196 (ins
->ei_cpos
+ ins
->ei_clusters
) == emi
->ei_phys
&&
197 ins
->ei_flags
== emi
->ei_flags
) {
198 emi
->ei_phys
= ins
->ei_phys
;
199 emi
->ei_cpos
= ins
->ei_cpos
;
200 emi
->ei_clusters
+= ins
->ei_clusters
;
205 * Overlapping extents - this shouldn't happen unless we've
206 * split an extent to change it's flags. That is exceedingly
207 * rare, so there's no sense in trying to optimize it yet.
209 if (ocfs2_ei_is_contained(emi
, ins
) ||
210 ocfs2_ei_is_contained(ins
, emi
)) {
211 ocfs2_copy_emi_fields(emi
, ins
);
215 /* No merge was possible. */
220 * In order to reduce complexity on the caller, this insert function
221 * is intentionally liberal in what it will accept.
223 * The only rule is that the truncate call *must* be used whenever
224 * records have been deleted. This avoids inserting overlapping
225 * records with different physical mappings.
227 void ocfs2_extent_map_insert_rec(struct inode
*inode
,
228 struct ocfs2_extent_rec
*rec
)
230 struct ocfs2_inode_info
*oi
= OCFS2_I(inode
);
231 struct ocfs2_extent_map
*em
= &oi
->ip_extent_map
;
232 struct ocfs2_extent_map_item
*emi
, *new_emi
= NULL
;
233 struct ocfs2_extent_map_item ins
;
235 ins
.ei_cpos
= le32_to_cpu(rec
->e_cpos
);
236 ins
.ei_phys
= ocfs2_blocks_to_clusters(inode
->i_sb
,
237 le64_to_cpu(rec
->e_blkno
));
238 ins
.ei_clusters
= le16_to_cpu(rec
->e_leaf_clusters
);
239 ins
.ei_flags
= rec
->e_flags
;
242 spin_lock(&oi
->ip_lock
);
244 list_for_each_entry(emi
, &em
->em_list
, ei_list
) {
245 if (ocfs2_try_to_merge_extent_map(emi
, &ins
)) {
246 list_move(&emi
->ei_list
, &em
->em_list
);
247 spin_unlock(&oi
->ip_lock
);
253 * No item could be merged.
255 * Either allocate and add a new item, or overwrite the last recently
259 if (em
->em_num_items
< OCFS2_MAX_EXTENT_MAP_ITEMS
) {
260 if (new_emi
== NULL
) {
261 spin_unlock(&oi
->ip_lock
);
263 new_emi
= kmalloc(sizeof(*new_emi
), GFP_NOFS
);
270 ocfs2_copy_emi_fields(new_emi
, &ins
);
271 list_add(&new_emi
->ei_list
, &em
->em_list
);
275 BUG_ON(list_empty(&em
->em_list
) || em
->em_num_items
== 0);
276 emi
= list_entry(em
->em_list
.prev
,
277 struct ocfs2_extent_map_item
, ei_list
);
278 list_move(&emi
->ei_list
, &em
->em_list
);
279 ocfs2_copy_emi_fields(emi
, &ins
);
282 spin_unlock(&oi
->ip_lock
);
290 * Return the 1st index within el which contains an extent start
291 * larger than v_cluster.
293 static int ocfs2_search_for_hole_index(struct ocfs2_extent_list
*el
,
297 struct ocfs2_extent_rec
*rec
;
299 for(i
= 0; i
< le16_to_cpu(el
->l_next_free_rec
); i
++) {
300 rec
= &el
->l_recs
[i
];
302 if (v_cluster
< le32_to_cpu(rec
->e_cpos
))
310 * Figure out the size of a hole which starts at v_cluster within the given
313 * If there is no more allocation past v_cluster, we return the maximum
314 * cluster size minus v_cluster.
316 * If we have in-inode extents, then el points to the dinode list and
317 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
320 static int ocfs2_figure_hole_clusters(struct inode
*inode
,
321 struct ocfs2_extent_list
*el
,
322 struct buffer_head
*eb_bh
,
327 struct buffer_head
*next_eb_bh
= NULL
;
328 struct ocfs2_extent_block
*eb
, *next_eb
;
330 i
= ocfs2_search_for_hole_index(el
, v_cluster
);
332 if (i
== le16_to_cpu(el
->l_next_free_rec
) && eb_bh
) {
333 eb
= (struct ocfs2_extent_block
*)eb_bh
->b_data
;
336 * Check the next leaf for any extents.
339 if (le64_to_cpu(eb
->h_next_leaf_blk
) == 0ULL)
340 goto no_more_extents
;
342 ret
= ocfs2_read_block(OCFS2_SB(inode
->i_sb
),
343 le64_to_cpu(eb
->h_next_leaf_blk
),
344 &next_eb_bh
, OCFS2_BH_CACHED
, inode
);
349 next_eb
= (struct ocfs2_extent_block
*)next_eb_bh
->b_data
;
351 if (!OCFS2_IS_VALID_EXTENT_BLOCK(next_eb
)) {
353 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode
->i_sb
, next_eb
);
357 el
= &next_eb
->h_list
;
359 i
= ocfs2_search_for_hole_index(el
, v_cluster
);
363 if (i
== le16_to_cpu(el
->l_next_free_rec
)) {
365 * We're at the end of our existing allocation. Just
366 * return the maximum number of clusters we could
369 *num_clusters
= UINT_MAX
- v_cluster
;
371 *num_clusters
= le32_to_cpu(el
->l_recs
[i
].e_cpos
) - v_cluster
;
381 * Return the index of the extent record which contains cluster #v_cluster.
382 * -1 is returned if it was not found.
384 * Should work fine on interior and exterior nodes.
386 static int ocfs2_search_extent_list(struct ocfs2_extent_list
*el
,
391 struct ocfs2_extent_rec
*rec
;
392 u32 rec_end
, rec_start
, clusters
;
394 for(i
= 0; i
< le16_to_cpu(el
->l_next_free_rec
); i
++) {
395 rec
= &el
->l_recs
[i
];
397 rec_start
= le32_to_cpu(rec
->e_cpos
);
398 clusters
= ocfs2_rec_clusters(el
, rec
);
400 rec_end
= rec_start
+ clusters
;
402 if (v_cluster
>= rec_start
&& v_cluster
< rec_end
) {
411 int ocfs2_get_clusters(struct inode
*inode
, u32 v_cluster
,
412 u32
*p_cluster
, u32
*num_clusters
,
413 unsigned int *extent_flags
)
416 unsigned int flags
= 0;
417 struct buffer_head
*di_bh
= NULL
;
418 struct buffer_head
*eb_bh
= NULL
;
419 struct ocfs2_dinode
*di
;
420 struct ocfs2_extent_block
*eb
;
421 struct ocfs2_extent_list
*el
;
422 struct ocfs2_extent_rec
*rec
;
425 ret
= ocfs2_extent_map_lookup(inode
, v_cluster
, p_cluster
,
426 num_clusters
, extent_flags
);
430 ret
= ocfs2_read_block(OCFS2_SB(inode
->i_sb
), OCFS2_I(inode
)->ip_blkno
,
431 &di_bh
, OCFS2_BH_CACHED
, inode
);
437 di
= (struct ocfs2_dinode
*) di_bh
->b_data
;
438 el
= &di
->id2
.i_list
;
440 if (el
->l_tree_depth
) {
441 ret
= ocfs2_find_leaf(inode
, el
, v_cluster
, &eb_bh
);
447 eb
= (struct ocfs2_extent_block
*) eb_bh
->b_data
;
450 if (el
->l_tree_depth
) {
451 ocfs2_error(inode
->i_sb
,
452 "Inode %lu has non zero tree depth in "
453 "leaf block %llu\n", inode
->i_ino
,
454 (unsigned long long)eb_bh
->b_blocknr
);
460 i
= ocfs2_search_extent_list(el
, v_cluster
);
463 * A hole was found. Return some canned values that
464 * callers can key on. If asked for, num_clusters will
465 * be populated with the size of the hole.
469 ret
= ocfs2_figure_hole_clusters(inode
, el
, eb_bh
,
478 rec
= &el
->l_recs
[i
];
480 BUG_ON(v_cluster
< le32_to_cpu(rec
->e_cpos
));
483 ocfs2_error(inode
->i_sb
, "Inode %lu has bad extent "
484 "record (%u, %u, 0)", inode
->i_ino
,
485 le32_to_cpu(rec
->e_cpos
),
486 ocfs2_rec_clusters(el
, rec
));
491 coff
= v_cluster
- le32_to_cpu(rec
->e_cpos
);
493 *p_cluster
= ocfs2_blocks_to_clusters(inode
->i_sb
,
494 le64_to_cpu(rec
->e_blkno
));
495 *p_cluster
= *p_cluster
+ coff
;
498 *num_clusters
= ocfs2_rec_clusters(el
, rec
) - coff
;
500 flags
= rec
->e_flags
;
502 ocfs2_extent_map_insert_rec(inode
, rec
);
506 *extent_flags
= flags
;
515 * This expects alloc_sem to be held. The allocation cannot change at
516 * all while the map is in the process of being updated.
518 int ocfs2_extent_map_get_blocks(struct inode
*inode
, u64 v_blkno
, u64
*p_blkno
,
519 u64
*ret_count
, unsigned int *extent_flags
)
522 int bpc
= ocfs2_clusters_to_blocks(inode
->i_sb
, 1);
523 u32 cpos
, num_clusters
, p_cluster
;
526 cpos
= ocfs2_blocks_to_clusters(inode
->i_sb
, v_blkno
);
528 ret
= ocfs2_get_clusters(inode
, cpos
, &p_cluster
, &num_clusters
,
536 * p_cluster == 0 indicates a hole.
539 boff
= ocfs2_clusters_to_blocks(inode
->i_sb
, p_cluster
);
540 boff
+= (v_blkno
& (u64
)(bpc
- 1));
546 *ret_count
= ocfs2_clusters_to_blocks(inode
->i_sb
, num_clusters
);
547 *ret_count
-= v_blkno
& (u64
)(bpc
- 1);