[PATCH] remove ext2 xattr permission checks
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / fs / ocfs2 / extent_map.c
blobf2fb40cd296a2890b1af082794c1324ba6969c0d
1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
4 * extent_map.c
6 * In-memory extent map for OCFS2. Man, this code was prettier in
7 * the library.
9 * Copyright (C) 2004 Oracle. All rights reserved.
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public
13 * License, version 2, as published by the Free Software Foundation.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
26 #include <linux/fs.h>
27 #include <linux/init.h>
28 #include <linux/types.h>
29 #include <linux/slab.h>
30 #include <linux/rbtree.h>
32 #define MLOG_MASK_PREFIX ML_EXTENT_MAP
33 #include <cluster/masklog.h>
35 #include "ocfs2.h"
37 #include "extent_map.h"
38 #include "inode.h"
39 #include "super.h"
41 #include "buffer_head_io.h"
45 * SUCK SUCK SUCK
46 * Our headers are so bad that struct ocfs2_extent_map is in ocfs.h
49 struct ocfs2_extent_map_entry {
50 struct rb_node e_node;
51 int e_tree_depth;
52 struct ocfs2_extent_rec e_rec;
55 struct ocfs2_em_insert_context {
56 int need_left;
57 int need_right;
58 struct ocfs2_extent_map_entry *new_ent;
59 struct ocfs2_extent_map_entry *old_ent;
60 struct ocfs2_extent_map_entry *left_ent;
61 struct ocfs2_extent_map_entry *right_ent;
64 static kmem_cache_t *ocfs2_em_ent_cachep = NULL;
67 static struct ocfs2_extent_map_entry *
68 ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
69 u32 cpos, u32 clusters,
70 struct rb_node ***ret_p,
71 struct rb_node **ret_parent);
72 static int ocfs2_extent_map_insert(struct inode *inode,
73 struct ocfs2_extent_rec *rec,
74 int tree_depth);
75 static int ocfs2_extent_map_insert_entry(struct ocfs2_extent_map *em,
76 struct ocfs2_extent_map_entry *ent);
77 static int ocfs2_extent_map_find_leaf(struct inode *inode,
78 u32 cpos, u32 clusters,
79 struct ocfs2_extent_list *el);
80 static int ocfs2_extent_map_lookup_read(struct inode *inode,
81 u32 cpos, u32 clusters,
82 struct ocfs2_extent_map_entry **ret_ent);
83 static int ocfs2_extent_map_try_insert(struct inode *inode,
84 struct ocfs2_extent_rec *rec,
85 int tree_depth,
86 struct ocfs2_em_insert_context *ctxt);
88 /* returns 1 only if the rec contains all the given clusters -- that is that
89 * rec's cpos is <= the cluster cpos and that the rec endpoint (cpos +
90 * clusters) is >= the argument's endpoint */
91 static int ocfs2_extent_rec_contains_clusters(struct ocfs2_extent_rec *rec,
92 u32 cpos, u32 clusters)
94 if (le32_to_cpu(rec->e_cpos) > cpos)
95 return 0;
96 if (cpos + clusters > le32_to_cpu(rec->e_cpos) +
97 le32_to_cpu(rec->e_clusters))
98 return 0;
99 return 1;
104 * Find an entry in the tree that intersects the region passed in.
105 * Note that this will find straddled intervals, it is up to the
106 * callers to enforce any boundary conditions.
108 * Callers must hold ip_lock. This lookup is not guaranteed to return
109 * a tree_depth 0 match, and as such can race inserts if the lock
110 * were not held.
112 * The rb_node garbage lets insertion share the search. Trivial
113 * callers pass NULL.
115 static struct ocfs2_extent_map_entry *
116 ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
117 u32 cpos, u32 clusters,
118 struct rb_node ***ret_p,
119 struct rb_node **ret_parent)
121 struct rb_node **p = &em->em_extents.rb_node;
122 struct rb_node *parent = NULL;
123 struct ocfs2_extent_map_entry *ent = NULL;
125 while (*p)
127 parent = *p;
128 ent = rb_entry(parent, struct ocfs2_extent_map_entry,
129 e_node);
130 if ((cpos + clusters) <= le32_to_cpu(ent->e_rec.e_cpos)) {
131 p = &(*p)->rb_left;
132 ent = NULL;
133 } else if (cpos >= (le32_to_cpu(ent->e_rec.e_cpos) +
134 le32_to_cpu(ent->e_rec.e_clusters))) {
135 p = &(*p)->rb_right;
136 ent = NULL;
137 } else
138 break;
141 if (ret_p != NULL)
142 *ret_p = p;
143 if (ret_parent != NULL)
144 *ret_parent = parent;
145 return ent;
149 * Find the leaf containing the interval we want. While we're on our
150 * way down the tree, fill in every record we see at any depth, because
151 * we might want it later.
153 * Note that this code is run without ip_lock. That's because it
154 * sleeps while reading. If someone is also filling the extent list at
155 * the same time we are, we might have to restart.
157 static int ocfs2_extent_map_find_leaf(struct inode *inode,
158 u32 cpos, u32 clusters,
159 struct ocfs2_extent_list *el)
161 int i, ret;
162 struct buffer_head *eb_bh = NULL;
163 u64 blkno;
164 u32 rec_end;
165 struct ocfs2_extent_block *eb;
166 struct ocfs2_extent_rec *rec;
169 * The bh data containing the el cannot change here, because
170 * we hold alloc_sem. So we can do this without other
171 * locks.
173 while (el->l_tree_depth)
175 blkno = 0;
176 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
177 rec = &el->l_recs[i];
178 rec_end = (le32_to_cpu(rec->e_cpos) +
179 le32_to_cpu(rec->e_clusters));
181 ret = -EBADR;
182 if (rec_end > OCFS2_I(inode)->ip_clusters) {
183 mlog_errno(ret);
184 goto out_free;
187 if (rec_end <= cpos) {
188 ret = ocfs2_extent_map_insert(inode, rec,
189 le16_to_cpu(el->l_tree_depth));
190 if (ret && (ret != -EEXIST)) {
191 mlog_errno(ret);
192 goto out_free;
194 continue;
196 if ((cpos + clusters) <= le32_to_cpu(rec->e_cpos)) {
197 ret = ocfs2_extent_map_insert(inode, rec,
198 le16_to_cpu(el->l_tree_depth));
199 if (ret && (ret != -EEXIST)) {
200 mlog_errno(ret);
201 goto out_free;
203 continue;
207 * We've found a record that matches our
208 * interval. We don't insert it because we're
209 * about to traverse it.
212 /* Check to see if we're stradling */
213 ret = -ESRCH;
214 if (!ocfs2_extent_rec_contains_clusters(rec,
215 cpos,
216 clusters)) {
217 mlog_errno(ret);
218 goto out_free;
222 * If we've already found a record, the el has
223 * two records covering the same interval.
224 * EEEK!
226 ret = -EBADR;
227 if (blkno) {
228 mlog_errno(ret);
229 goto out_free;
232 blkno = le64_to_cpu(rec->e_blkno);
236 * We don't support holes, and we're still up
237 * in the branches, so we'd better have found someone
239 ret = -EBADR;
240 if (!blkno) {
241 mlog_errno(ret);
242 goto out_free;
245 if (eb_bh) {
246 brelse(eb_bh);
247 eb_bh = NULL;
249 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
250 blkno, &eb_bh, OCFS2_BH_CACHED,
251 inode);
252 if (ret) {
253 mlog_errno(ret);
254 goto out_free;
256 eb = (struct ocfs2_extent_block *)eb_bh->b_data;
257 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
258 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
259 ret = -EIO;
260 goto out_free;
262 el = &eb->h_list;
265 if (el->l_tree_depth)
266 BUG();
268 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
269 rec = &el->l_recs[i];
270 ret = ocfs2_extent_map_insert(inode, rec,
271 le16_to_cpu(el->l_tree_depth));
272 if (ret) {
273 mlog_errno(ret);
274 goto out_free;
278 ret = 0;
280 out_free:
281 if (eb_bh)
282 brelse(eb_bh);
284 return ret;
288 * This lookup actually will read from disk. It has one invariant:
289 * It will never re-traverse blocks. This means that all inserts should
290 * be new regions or more granular regions (both allowed by insert).
292 static int ocfs2_extent_map_lookup_read(struct inode *inode,
293 u32 cpos,
294 u32 clusters,
295 struct ocfs2_extent_map_entry **ret_ent)
297 int ret;
298 u64 blkno;
299 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
300 struct ocfs2_extent_map_entry *ent;
301 struct buffer_head *bh = NULL;
302 struct ocfs2_extent_block *eb;
303 struct ocfs2_dinode *di;
304 struct ocfs2_extent_list *el;
306 spin_lock(&OCFS2_I(inode)->ip_lock);
307 ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL);
308 if (ent) {
309 if (!ent->e_tree_depth) {
310 spin_unlock(&OCFS2_I(inode)->ip_lock);
311 *ret_ent = ent;
312 return 0;
314 blkno = le64_to_cpu(ent->e_rec.e_blkno);
315 spin_unlock(&OCFS2_I(inode)->ip_lock);
317 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, &bh,
318 OCFS2_BH_CACHED, inode);
319 if (ret) {
320 mlog_errno(ret);
321 if (bh)
322 brelse(bh);
323 return ret;
325 eb = (struct ocfs2_extent_block *)bh->b_data;
326 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
327 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
328 brelse(bh);
329 return -EIO;
331 el = &eb->h_list;
332 } else {
333 spin_unlock(&OCFS2_I(inode)->ip_lock);
335 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
336 OCFS2_I(inode)->ip_blkno, &bh,
337 OCFS2_BH_CACHED, inode);
338 if (ret) {
339 mlog_errno(ret);
340 if (bh)
341 brelse(bh);
342 return ret;
344 di = (struct ocfs2_dinode *)bh->b_data;
345 if (!OCFS2_IS_VALID_DINODE(di)) {
346 brelse(bh);
347 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, di);
348 return -EIO;
350 el = &di->id2.i_list;
353 ret = ocfs2_extent_map_find_leaf(inode, cpos, clusters, el);
354 brelse(bh);
355 if (ret) {
356 mlog_errno(ret);
357 return ret;
360 ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL);
361 if (!ent) {
362 ret = -ESRCH;
363 mlog_errno(ret);
364 return ret;
367 if (ent->e_tree_depth)
368 BUG(); /* FIXME: Make sure this isn't a corruption */
370 *ret_ent = ent;
372 return 0;
376 * Callers must hold ip_lock. This can insert pieces of the tree,
377 * thus racing lookup if the lock weren't held.
379 static int ocfs2_extent_map_insert_entry(struct ocfs2_extent_map *em,
380 struct ocfs2_extent_map_entry *ent)
382 struct rb_node **p, *parent;
383 struct ocfs2_extent_map_entry *old_ent;
385 old_ent = ocfs2_extent_map_lookup(em, le32_to_cpu(ent->e_rec.e_cpos),
386 le32_to_cpu(ent->e_rec.e_clusters),
387 &p, &parent);
388 if (old_ent)
389 return -EEXIST;
391 rb_link_node(&ent->e_node, parent, p);
392 rb_insert_color(&ent->e_node, &em->em_extents);
394 return 0;
399 * Simple rule: on any return code other than -EAGAIN, anything left
400 * in the insert_context will be freed.
402 static int ocfs2_extent_map_try_insert(struct inode *inode,
403 struct ocfs2_extent_rec *rec,
404 int tree_depth,
405 struct ocfs2_em_insert_context *ctxt)
407 int ret;
408 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
409 struct ocfs2_extent_map_entry *old_ent;
411 ctxt->need_left = 0;
412 ctxt->need_right = 0;
413 ctxt->old_ent = NULL;
415 spin_lock(&OCFS2_I(inode)->ip_lock);
416 ret = ocfs2_extent_map_insert_entry(em, ctxt->new_ent);
417 if (!ret) {
418 ctxt->new_ent = NULL;
419 goto out_unlock;
422 old_ent = ocfs2_extent_map_lookup(em, le32_to_cpu(rec->e_cpos),
423 le32_to_cpu(rec->e_clusters), NULL,
424 NULL);
426 if (!old_ent)
427 BUG();
429 ret = -EEXIST;
430 if (old_ent->e_tree_depth < tree_depth)
431 goto out_unlock;
433 if (old_ent->e_tree_depth == tree_depth) {
434 if (!memcmp(rec, &old_ent->e_rec,
435 sizeof(struct ocfs2_extent_rec)))
436 ret = 0;
438 /* FIXME: Should this be ESRCH/EBADR??? */
439 goto out_unlock;
443 * We do it in this order specifically so that no actual tree
444 * changes occur until we have all the pieces we need. We
445 * don't want malloc failures to leave an inconsistent tree.
446 * Whenever we drop the lock, another process could be
447 * inserting. Also note that, if another process just beat us
448 * to an insert, we might not need the same pieces we needed
449 * the first go round. In the end, the pieces we need will
450 * be used, and the pieces we don't will be freed.
452 ctxt->need_left = !!(le32_to_cpu(rec->e_cpos) >
453 le32_to_cpu(old_ent->e_rec.e_cpos));
454 ctxt->need_right = !!((le32_to_cpu(old_ent->e_rec.e_cpos) +
455 le32_to_cpu(old_ent->e_rec.e_clusters)) >
456 (le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)));
457 ret = -EAGAIN;
458 if (ctxt->need_left) {
459 if (!ctxt->left_ent)
460 goto out_unlock;
461 *(ctxt->left_ent) = *old_ent;
462 ctxt->left_ent->e_rec.e_clusters =
463 cpu_to_le32(le32_to_cpu(rec->e_cpos) -
464 le32_to_cpu(ctxt->left_ent->e_rec.e_cpos));
466 if (ctxt->need_right) {
467 if (!ctxt->right_ent)
468 goto out_unlock;
469 *(ctxt->right_ent) = *old_ent;
470 ctxt->right_ent->e_rec.e_cpos =
471 cpu_to_le32(le32_to_cpu(rec->e_cpos) +
472 le32_to_cpu(rec->e_clusters));
473 ctxt->right_ent->e_rec.e_clusters =
474 cpu_to_le32((le32_to_cpu(old_ent->e_rec.e_cpos) +
475 le32_to_cpu(old_ent->e_rec.e_clusters)) -
476 le32_to_cpu(ctxt->right_ent->e_rec.e_cpos));
479 rb_erase(&old_ent->e_node, &em->em_extents);
480 /* Now that he's erased, set him up for deletion */
481 ctxt->old_ent = old_ent;
483 if (ctxt->need_left) {
484 ret = ocfs2_extent_map_insert_entry(em,
485 ctxt->left_ent);
486 if (ret)
487 goto out_unlock;
488 ctxt->left_ent = NULL;
491 if (ctxt->need_right) {
492 ret = ocfs2_extent_map_insert_entry(em,
493 ctxt->right_ent);
494 if (ret)
495 goto out_unlock;
496 ctxt->right_ent = NULL;
499 ret = ocfs2_extent_map_insert_entry(em, ctxt->new_ent);
501 if (!ret)
502 ctxt->new_ent = NULL;
504 out_unlock:
505 spin_unlock(&OCFS2_I(inode)->ip_lock);
507 return ret;
511 static int ocfs2_extent_map_insert(struct inode *inode,
512 struct ocfs2_extent_rec *rec,
513 int tree_depth)
515 int ret;
516 struct ocfs2_em_insert_context ctxt = {0, };
518 if ((le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)) >
519 OCFS2_I(inode)->ip_map.em_clusters) {
520 ret = -EBADR;
521 mlog_errno(ret);
522 return ret;
525 /* Zero e_clusters means a truncated tail record. It better be EOF */
526 if (!rec->e_clusters) {
527 if ((le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)) !=
528 OCFS2_I(inode)->ip_map.em_clusters) {
529 ret = -EBADR;
530 mlog_errno(ret);
531 return ret;
534 /* Ignore the truncated tail */
535 return 0;
538 ret = -ENOMEM;
539 ctxt.new_ent = kmem_cache_alloc(ocfs2_em_ent_cachep,
540 GFP_KERNEL);
541 if (!ctxt.new_ent) {
542 mlog_errno(ret);
543 return ret;
546 ctxt.new_ent->e_rec = *rec;
547 ctxt.new_ent->e_tree_depth = tree_depth;
549 do {
550 ret = -ENOMEM;
551 if (ctxt.need_left && !ctxt.left_ent) {
552 ctxt.left_ent =
553 kmem_cache_alloc(ocfs2_em_ent_cachep,
554 GFP_KERNEL);
555 if (!ctxt.left_ent)
556 break;
558 if (ctxt.need_right && !ctxt.right_ent) {
559 ctxt.right_ent =
560 kmem_cache_alloc(ocfs2_em_ent_cachep,
561 GFP_KERNEL);
562 if (!ctxt.right_ent)
563 break;
566 ret = ocfs2_extent_map_try_insert(inode, rec,
567 tree_depth, &ctxt);
568 } while (ret == -EAGAIN);
570 if (ret < 0)
571 mlog_errno(ret);
573 if (ctxt.left_ent)
574 kmem_cache_free(ocfs2_em_ent_cachep, ctxt.left_ent);
575 if (ctxt.right_ent)
576 kmem_cache_free(ocfs2_em_ent_cachep, ctxt.right_ent);
577 if (ctxt.old_ent)
578 kmem_cache_free(ocfs2_em_ent_cachep, ctxt.old_ent);
579 if (ctxt.new_ent)
580 kmem_cache_free(ocfs2_em_ent_cachep, ctxt.new_ent);
582 return ret;
586 * Append this record to the tail of the extent map. It must be
587 * tree_depth 0. The record might be an extension of an existing
588 * record, and as such that needs to be handled. eg:
590 * Existing record in the extent map:
592 * cpos = 10, len = 10
593 * |---------|
595 * New Record:
597 * cpos = 10, len = 20
598 * |------------------|
600 * The passed record is the new on-disk record. The new_clusters value
601 * is how many clusters were added to the file. If the append is a
602 * contiguous append, the new_clusters has been added to
603 * rec->e_clusters. If the append is an entirely new extent, then
604 * rec->e_clusters is == new_clusters.
606 int ocfs2_extent_map_append(struct inode *inode,
607 struct ocfs2_extent_rec *rec,
608 u32 new_clusters)
610 int ret;
611 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
612 struct ocfs2_extent_map_entry *ent;
613 struct ocfs2_extent_rec *old;
615 BUG_ON(!new_clusters);
616 BUG_ON(le32_to_cpu(rec->e_clusters) < new_clusters);
618 if (em->em_clusters < OCFS2_I(inode)->ip_clusters) {
620 * Size changed underneath us on disk. Drop any
621 * straddling records and update our idea of
622 * i_clusters
624 ocfs2_extent_map_drop(inode, em->em_clusters - 1);
625 em->em_clusters = OCFS2_I(inode)->ip_clusters;
628 mlog_bug_on_msg((le32_to_cpu(rec->e_cpos) +
629 le32_to_cpu(rec->e_clusters)) !=
630 (em->em_clusters + new_clusters),
631 "Inode %"MLFu64":\n"
632 "rec->e_cpos = %u + rec->e_clusters = %u = %u\n"
633 "em->em_clusters = %u + new_clusters = %u = %u\n",
634 OCFS2_I(inode)->ip_blkno,
635 le32_to_cpu(rec->e_cpos), le32_to_cpu(rec->e_clusters),
636 le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters),
637 em->em_clusters, new_clusters,
638 em->em_clusters + new_clusters);
640 em->em_clusters += new_clusters;
642 ret = -ENOENT;
643 if (le32_to_cpu(rec->e_clusters) > new_clusters) {
644 /* This is a contiguous append */
645 ent = ocfs2_extent_map_lookup(em, le32_to_cpu(rec->e_cpos), 1,
646 NULL, NULL);
647 if (ent) {
648 old = &ent->e_rec;
649 BUG_ON((le32_to_cpu(rec->e_cpos) +
650 le32_to_cpu(rec->e_clusters)) !=
651 (le32_to_cpu(old->e_cpos) +
652 le32_to_cpu(old->e_clusters) +
653 new_clusters));
654 if (ent->e_tree_depth == 0) {
655 BUG_ON(le32_to_cpu(old->e_cpos) !=
656 le32_to_cpu(rec->e_cpos));
657 BUG_ON(le64_to_cpu(old->e_blkno) !=
658 le64_to_cpu(rec->e_blkno));
659 ret = 0;
662 * Let non-leafs fall through as -ENOENT to
663 * force insertion of the new leaf.
665 le32_add_cpu(&old->e_clusters, new_clusters);
669 if (ret == -ENOENT)
670 ret = ocfs2_extent_map_insert(inode, rec, 0);
671 if (ret < 0)
672 mlog_errno(ret);
673 return ret;
676 #if 0
677 /* Code here is included but defined out as it completes the extent
678 * map api and may be used in the future. */
681 * Look up the record containing this cluster offset. This record is
682 * part of the extent map. Do not free it. Any changes you make to
683 * it will reflect in the extent map. So, if your last extent
684 * is (cpos = 10, clusters = 10) and you truncate the file by 5
685 * clusters, you can do:
687 * ret = ocfs2_extent_map_get_rec(em, orig_size - 5, &rec);
688 * rec->e_clusters -= 5;
690 * The lookup does not read from disk. If the map isn't filled in for
691 * an entry, you won't find it.
693 * Also note that the returned record is valid until alloc_sem is
694 * dropped. After that, truncate and extend can happen. Caveat Emptor.
696 int ocfs2_extent_map_get_rec(struct inode *inode, u32 cpos,
697 struct ocfs2_extent_rec **rec,
698 int *tree_depth)
700 int ret = -ENOENT;
701 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
702 struct ocfs2_extent_map_entry *ent;
704 *rec = NULL;
706 if (cpos >= OCFS2_I(inode)->ip_clusters)
707 return -EINVAL;
709 if (cpos >= em->em_clusters) {
711 * Size changed underneath us on disk. Drop any
712 * straddling records and update our idea of
713 * i_clusters
715 ocfs2_extent_map_drop(inode, em->em_clusters - 1);
716 em->em_clusters = OCFS2_I(inode)->ip_clusters ;
719 ent = ocfs2_extent_map_lookup(&OCFS2_I(inode)->ip_map, cpos, 1,
720 NULL, NULL);
722 if (ent) {
723 *rec = &ent->e_rec;
724 if (tree_depth)
725 *tree_depth = ent->e_tree_depth;
726 ret = 0;
729 return ret;
732 int ocfs2_extent_map_get_clusters(struct inode *inode,
733 u32 v_cpos, int count,
734 u32 *p_cpos, int *ret_count)
736 int ret;
737 u32 coff, ccount;
738 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
739 struct ocfs2_extent_map_entry *ent = NULL;
741 *p_cpos = ccount = 0;
743 if ((v_cpos + count) > OCFS2_I(inode)->ip_clusters)
744 return -EINVAL;
746 if ((v_cpos + count) > em->em_clusters) {
748 * Size changed underneath us on disk. Drop any
749 * straddling records and update our idea of
750 * i_clusters
752 ocfs2_extent_map_drop(inode, em->em_clusters - 1);
753 em->em_clusters = OCFS2_I(inode)->ip_clusters;
757 ret = ocfs2_extent_map_lookup_read(inode, v_cpos, count, &ent);
758 if (ret)
759 return ret;
761 if (ent) {
762 /* We should never find ourselves straddling an interval */
763 if (!ocfs2_extent_rec_contains_clusters(&ent->e_rec,
764 v_cpos,
765 count))
766 return -ESRCH;
768 coff = v_cpos - le32_to_cpu(ent->e_rec.e_cpos);
769 *p_cpos = ocfs2_blocks_to_clusters(inode->i_sb,
770 le64_to_cpu(ent->e_rec.e_blkno)) +
771 coff;
773 if (ret_count)
774 *ret_count = le32_to_cpu(ent->e_rec.e_clusters) - coff;
776 return 0;
780 return -ENOENT;
783 #endif /* 0 */
785 int ocfs2_extent_map_get_blocks(struct inode *inode,
786 u64 v_blkno, int count,
787 u64 *p_blkno, int *ret_count)
789 int ret;
790 u64 boff;
791 u32 cpos, clusters;
792 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
793 struct ocfs2_extent_map_entry *ent = NULL;
794 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
795 struct ocfs2_extent_rec *rec;
797 *p_blkno = 0;
799 cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
800 clusters = ocfs2_blocks_to_clusters(inode->i_sb,
801 (u64)count + bpc - 1);
802 if ((cpos + clusters) > OCFS2_I(inode)->ip_clusters) {
803 ret = -EINVAL;
804 mlog_errno(ret);
805 return ret;
808 if ((cpos + clusters) > em->em_clusters) {
810 * Size changed underneath us on disk. Drop any
811 * straddling records and update our idea of
812 * i_clusters
814 ocfs2_extent_map_drop(inode, em->em_clusters - 1);
815 em->em_clusters = OCFS2_I(inode)->ip_clusters;
818 ret = ocfs2_extent_map_lookup_read(inode, cpos, clusters, &ent);
819 if (ret) {
820 mlog_errno(ret);
821 return ret;
824 if (ent)
826 rec = &ent->e_rec;
828 /* We should never find ourselves straddling an interval */
829 if (!ocfs2_extent_rec_contains_clusters(rec, cpos, clusters)) {
830 ret = -ESRCH;
831 mlog_errno(ret);
832 return ret;
835 boff = ocfs2_clusters_to_blocks(inode->i_sb, cpos -
836 le32_to_cpu(rec->e_cpos));
837 boff += (v_blkno & (u64)(bpc - 1));
838 *p_blkno = le64_to_cpu(rec->e_blkno) + boff;
840 if (ret_count) {
841 *ret_count = ocfs2_clusters_to_blocks(inode->i_sb,
842 le32_to_cpu(rec->e_clusters)) - boff;
845 return 0;
848 return -ENOENT;
851 int ocfs2_extent_map_init(struct inode *inode)
853 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
855 em->em_extents = RB_ROOT;
856 em->em_clusters = 0;
858 return 0;
861 /* Needs the lock */
862 static void __ocfs2_extent_map_drop(struct inode *inode,
863 u32 new_clusters,
864 struct rb_node **free_head,
865 struct ocfs2_extent_map_entry **tail_ent)
867 struct rb_node *node, *next;
868 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
869 struct ocfs2_extent_map_entry *ent;
871 *free_head = NULL;
873 ent = NULL;
874 node = rb_last(&em->em_extents);
875 while (node)
877 next = rb_prev(node);
879 ent = rb_entry(node, struct ocfs2_extent_map_entry,
880 e_node);
881 if (le32_to_cpu(ent->e_rec.e_cpos) < new_clusters)
882 break;
884 rb_erase(&ent->e_node, &em->em_extents);
886 node->rb_right = *free_head;
887 *free_head = node;
889 ent = NULL;
890 node = next;
893 /* Do we have an entry straddling new_clusters? */
894 if (tail_ent) {
895 if (ent &&
896 ((le32_to_cpu(ent->e_rec.e_cpos) +
897 le32_to_cpu(ent->e_rec.e_clusters)) > new_clusters))
898 *tail_ent = ent;
899 else
900 *tail_ent = NULL;
904 static void __ocfs2_extent_map_drop_cleanup(struct rb_node *free_head)
906 struct rb_node *node;
907 struct ocfs2_extent_map_entry *ent;
909 while (free_head) {
910 node = free_head;
911 free_head = node->rb_right;
913 ent = rb_entry(node, struct ocfs2_extent_map_entry,
914 e_node);
915 kmem_cache_free(ocfs2_em_ent_cachep, ent);
920 * Remove all entries past new_clusters, inclusive of an entry that
921 * contains new_clusters. This is effectively a cache forget.
923 * If you want to also clip the last extent by some number of clusters,
924 * you need to call ocfs2_extent_map_trunc().
925 * This code does not check or modify ip_clusters.
927 int ocfs2_extent_map_drop(struct inode *inode, u32 new_clusters)
929 struct rb_node *free_head = NULL;
930 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
931 struct ocfs2_extent_map_entry *ent;
933 spin_lock(&OCFS2_I(inode)->ip_lock);
935 __ocfs2_extent_map_drop(inode, new_clusters, &free_head, &ent);
937 if (ent) {
938 rb_erase(&ent->e_node, &em->em_extents);
939 ent->e_node.rb_right = free_head;
940 free_head = &ent->e_node;
943 spin_unlock(&OCFS2_I(inode)->ip_lock);
945 if (free_head)
946 __ocfs2_extent_map_drop_cleanup(free_head);
948 return 0;
952 * Remove all entries past new_clusters and also clip any extent
953 * straddling new_clusters, if there is one. This does not check
954 * or modify ip_clusters
956 int ocfs2_extent_map_trunc(struct inode *inode, u32 new_clusters)
958 struct rb_node *free_head = NULL;
959 struct ocfs2_extent_map_entry *ent = NULL;
961 spin_lock(&OCFS2_I(inode)->ip_lock);
963 __ocfs2_extent_map_drop(inode, new_clusters, &free_head, &ent);
965 if (ent)
966 ent->e_rec.e_clusters = cpu_to_le32(new_clusters -
967 le32_to_cpu(ent->e_rec.e_cpos));
969 OCFS2_I(inode)->ip_map.em_clusters = new_clusters;
971 spin_unlock(&OCFS2_I(inode)->ip_lock);
973 if (free_head)
974 __ocfs2_extent_map_drop_cleanup(free_head);
976 return 0;
979 int __init init_ocfs2_extent_maps(void)
981 ocfs2_em_ent_cachep =
982 kmem_cache_create("ocfs2_em_ent",
983 sizeof(struct ocfs2_extent_map_entry),
984 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
985 if (!ocfs2_em_ent_cachep)
986 return -ENOMEM;
988 return 0;
991 void __exit exit_ocfs2_extent_maps(void)
993 kmem_cache_destroy(ocfs2_em_ent_cachep);