Btrfs: allow callers to specify if flushing can occur for btrfs_block_rsv_check
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / fs / btrfs / free-space-cache.c
blobb0122e19db6b407a7ac958f678c661d9c05c31c3
1 /*
2 * Copyright (C) 2008 Red Hat. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include <linux/ratelimit.h>
24 #include "ctree.h"
25 #include "free-space-cache.h"
26 #include "transaction.h"
27 #include "disk-io.h"
28 #include "extent_io.h"
29 #include "inode-map.h"
31 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
34 static int link_free_space(struct btrfs_free_space_ctl *ctl,
35 struct btrfs_free_space *info);
37 static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38 struct btrfs_path *path,
39 u64 offset)
41 struct btrfs_key key;
42 struct btrfs_key location;
43 struct btrfs_disk_key disk_key;
44 struct btrfs_free_space_header *header;
45 struct extent_buffer *leaf;
46 struct inode *inode = NULL;
47 int ret;
49 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
50 key.offset = offset;
51 key.type = 0;
53 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54 if (ret < 0)
55 return ERR_PTR(ret);
56 if (ret > 0) {
57 btrfs_release_path(path);
58 return ERR_PTR(-ENOENT);
61 leaf = path->nodes[0];
62 header = btrfs_item_ptr(leaf, path->slots[0],
63 struct btrfs_free_space_header);
64 btrfs_free_space_key(leaf, header, &disk_key);
65 btrfs_disk_key_to_cpu(&location, &disk_key);
66 btrfs_release_path(path);
68 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69 if (!inode)
70 return ERR_PTR(-ENOENT);
71 if (IS_ERR(inode))
72 return inode;
73 if (is_bad_inode(inode)) {
74 iput(inode);
75 return ERR_PTR(-ENOENT);
78 inode->i_mapping->flags &= ~__GFP_FS;
80 return inode;
83 struct inode *lookup_free_space_inode(struct btrfs_root *root,
84 struct btrfs_block_group_cache
85 *block_group, struct btrfs_path *path)
87 struct inode *inode = NULL;
89 spin_lock(&block_group->lock);
90 if (block_group->inode)
91 inode = igrab(block_group->inode);
92 spin_unlock(&block_group->lock);
93 if (inode)
94 return inode;
96 inode = __lookup_free_space_inode(root, path,
97 block_group->key.objectid);
98 if (IS_ERR(inode))
99 return inode;
101 spin_lock(&block_group->lock);
102 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) {
103 printk(KERN_INFO "Old style space inode found, converting.\n");
104 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM;
105 block_group->disk_cache_state = BTRFS_DC_CLEAR;
108 if (!btrfs_fs_closing(root->fs_info)) {
109 block_group->inode = igrab(inode);
110 block_group->iref = 1;
112 spin_unlock(&block_group->lock);
114 return inode;
117 int __create_free_space_inode(struct btrfs_root *root,
118 struct btrfs_trans_handle *trans,
119 struct btrfs_path *path, u64 ino, u64 offset)
121 struct btrfs_key key;
122 struct btrfs_disk_key disk_key;
123 struct btrfs_free_space_header *header;
124 struct btrfs_inode_item *inode_item;
125 struct extent_buffer *leaf;
126 int ret;
128 ret = btrfs_insert_empty_inode(trans, root, path, ino);
129 if (ret)
130 return ret;
132 leaf = path->nodes[0];
133 inode_item = btrfs_item_ptr(leaf, path->slots[0],
134 struct btrfs_inode_item);
135 btrfs_item_key(leaf, &disk_key, path->slots[0]);
136 memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
137 sizeof(*inode_item));
138 btrfs_set_inode_generation(leaf, inode_item, trans->transid);
139 btrfs_set_inode_size(leaf, inode_item, 0);
140 btrfs_set_inode_nbytes(leaf, inode_item, 0);
141 btrfs_set_inode_uid(leaf, inode_item, 0);
142 btrfs_set_inode_gid(leaf, inode_item, 0);
143 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
144 btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
145 BTRFS_INODE_PREALLOC);
146 btrfs_set_inode_nlink(leaf, inode_item, 1);
147 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
148 btrfs_set_inode_block_group(leaf, inode_item, offset);
149 btrfs_mark_buffer_dirty(leaf);
150 btrfs_release_path(path);
152 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
153 key.offset = offset;
154 key.type = 0;
156 ret = btrfs_insert_empty_item(trans, root, path, &key,
157 sizeof(struct btrfs_free_space_header));
158 if (ret < 0) {
159 btrfs_release_path(path);
160 return ret;
162 leaf = path->nodes[0];
163 header = btrfs_item_ptr(leaf, path->slots[0],
164 struct btrfs_free_space_header);
165 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
166 btrfs_set_free_space_key(leaf, header, &disk_key);
167 btrfs_mark_buffer_dirty(leaf);
168 btrfs_release_path(path);
170 return 0;
173 int create_free_space_inode(struct btrfs_root *root,
174 struct btrfs_trans_handle *trans,
175 struct btrfs_block_group_cache *block_group,
176 struct btrfs_path *path)
178 int ret;
179 u64 ino;
181 ret = btrfs_find_free_objectid(root, &ino);
182 if (ret < 0)
183 return ret;
185 return __create_free_space_inode(root, trans, path, ino,
186 block_group->key.objectid);
189 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
190 struct btrfs_trans_handle *trans,
191 struct btrfs_path *path,
192 struct inode *inode)
194 struct btrfs_block_rsv *rsv;
195 loff_t oldsize;
196 int ret = 0;
198 rsv = trans->block_rsv;
199 trans->block_rsv = root->orphan_block_rsv;
200 ret = btrfs_block_rsv_check(trans, root,
201 root->orphan_block_rsv,
202 0, 5, 0);
203 if (ret)
204 return ret;
206 oldsize = i_size_read(inode);
207 btrfs_i_size_write(inode, 0);
208 truncate_pagecache(inode, oldsize, 0);
211 * We don't need an orphan item because truncating the free space cache
212 * will never be split across transactions.
214 ret = btrfs_truncate_inode_items(trans, root, inode,
215 0, BTRFS_EXTENT_DATA_KEY);
217 trans->block_rsv = rsv;
218 if (ret) {
219 WARN_ON(1);
220 return ret;
223 ret = btrfs_update_inode(trans, root, inode);
224 return ret;
227 static int readahead_cache(struct inode *inode)
229 struct file_ra_state *ra;
230 unsigned long last_index;
232 ra = kzalloc(sizeof(*ra), GFP_NOFS);
233 if (!ra)
234 return -ENOMEM;
236 file_ra_state_init(ra, inode->i_mapping);
237 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
239 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
241 kfree(ra);
243 return 0;
246 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
247 struct btrfs_free_space_ctl *ctl,
248 struct btrfs_path *path, u64 offset)
250 struct btrfs_free_space_header *header;
251 struct extent_buffer *leaf;
252 struct page *page;
253 struct btrfs_key key;
254 struct list_head bitmaps;
255 u64 num_entries;
256 u64 num_bitmaps;
257 u64 generation;
258 pgoff_t index = 0;
259 int ret = 0;
261 INIT_LIST_HEAD(&bitmaps);
263 /* Nothing in the space cache, goodbye */
264 if (!i_size_read(inode))
265 goto out;
267 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
268 key.offset = offset;
269 key.type = 0;
271 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
272 if (ret < 0)
273 goto out;
274 else if (ret > 0) {
275 btrfs_release_path(path);
276 ret = 0;
277 goto out;
280 ret = -1;
282 leaf = path->nodes[0];
283 header = btrfs_item_ptr(leaf, path->slots[0],
284 struct btrfs_free_space_header);
285 num_entries = btrfs_free_space_entries(leaf, header);
286 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
287 generation = btrfs_free_space_generation(leaf, header);
288 btrfs_release_path(path);
290 if (BTRFS_I(inode)->generation != generation) {
291 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
292 " not match free space cache generation (%llu)\n",
293 (unsigned long long)BTRFS_I(inode)->generation,
294 (unsigned long long)generation);
295 goto out;
298 if (!num_entries)
299 goto out;
301 ret = readahead_cache(inode);
302 if (ret)
303 goto out;
305 while (1) {
306 struct btrfs_free_space_entry *entry;
307 struct btrfs_free_space *e;
308 void *addr;
309 unsigned long offset = 0;
310 int need_loop = 0;
312 if (!num_entries && !num_bitmaps)
313 break;
315 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
316 if (!page)
317 goto free_cache;
319 if (!PageUptodate(page)) {
320 btrfs_readpage(NULL, page);
321 lock_page(page);
322 if (!PageUptodate(page)) {
323 unlock_page(page);
324 page_cache_release(page);
325 printk(KERN_ERR "btrfs: error reading free "
326 "space cache\n");
327 goto free_cache;
330 addr = kmap(page);
332 if (index == 0) {
333 u64 *gen;
336 * We put a bogus crc in the front of the first page in
337 * case old kernels try to mount a fs with the new
338 * format to make sure they discard the cache.
340 addr += sizeof(u64);
341 offset += sizeof(u64);
343 gen = addr;
344 if (*gen != BTRFS_I(inode)->generation) {
345 printk_ratelimited(KERN_ERR "btrfs: space cache"
346 " generation (%llu) does not match "
347 "inode (%llu)\n",
348 (unsigned long long)*gen,
349 (unsigned long long)
350 BTRFS_I(inode)->generation);
351 kunmap(page);
352 unlock_page(page);
353 page_cache_release(page);
354 goto free_cache;
356 addr += sizeof(u64);
357 offset += sizeof(u64);
359 entry = addr;
361 while (1) {
362 if (!num_entries)
363 break;
365 need_loop = 1;
366 e = kmem_cache_zalloc(btrfs_free_space_cachep,
367 GFP_NOFS);
368 if (!e) {
369 kunmap(page);
370 unlock_page(page);
371 page_cache_release(page);
372 goto free_cache;
375 e->offset = le64_to_cpu(entry->offset);
376 e->bytes = le64_to_cpu(entry->bytes);
377 if (!e->bytes) {
378 kunmap(page);
379 kmem_cache_free(btrfs_free_space_cachep, e);
380 unlock_page(page);
381 page_cache_release(page);
382 goto free_cache;
385 if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
386 spin_lock(&ctl->tree_lock);
387 ret = link_free_space(ctl, e);
388 spin_unlock(&ctl->tree_lock);
389 if (ret) {
390 printk(KERN_ERR "Duplicate entries in "
391 "free space cache, dumping\n");
392 kunmap(page);
393 unlock_page(page);
394 page_cache_release(page);
395 goto free_cache;
397 } else {
398 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
399 if (!e->bitmap) {
400 kunmap(page);
401 kmem_cache_free(
402 btrfs_free_space_cachep, e);
403 unlock_page(page);
404 page_cache_release(page);
405 goto free_cache;
407 spin_lock(&ctl->tree_lock);
408 ret = link_free_space(ctl, e);
409 ctl->total_bitmaps++;
410 ctl->op->recalc_thresholds(ctl);
411 spin_unlock(&ctl->tree_lock);
412 if (ret) {
413 printk(KERN_ERR "Duplicate entries in "
414 "free space cache, dumping\n");
415 kunmap(page);
416 unlock_page(page);
417 page_cache_release(page);
418 goto free_cache;
420 list_add_tail(&e->list, &bitmaps);
423 num_entries--;
424 offset += sizeof(struct btrfs_free_space_entry);
425 if (offset + sizeof(struct btrfs_free_space_entry) >=
426 PAGE_CACHE_SIZE)
427 break;
428 entry++;
432 * We read an entry out of this page, we need to move on to the
433 * next page.
435 if (need_loop) {
436 kunmap(page);
437 goto next;
441 * We add the bitmaps at the end of the entries in order that
442 * the bitmap entries are added to the cache.
444 e = list_entry(bitmaps.next, struct btrfs_free_space, list);
445 list_del_init(&e->list);
446 memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
447 kunmap(page);
448 num_bitmaps--;
449 next:
450 unlock_page(page);
451 page_cache_release(page);
452 index++;
455 ret = 1;
456 out:
457 return ret;
458 free_cache:
459 __btrfs_remove_free_space_cache(ctl);
460 goto out;
463 int load_free_space_cache(struct btrfs_fs_info *fs_info,
464 struct btrfs_block_group_cache *block_group)
466 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
467 struct btrfs_root *root = fs_info->tree_root;
468 struct inode *inode;
469 struct btrfs_path *path;
470 int ret;
471 bool matched;
472 u64 used = btrfs_block_group_used(&block_group->item);
475 * If we're unmounting then just return, since this does a search on the
476 * normal root and not the commit root and we could deadlock.
478 if (btrfs_fs_closing(fs_info))
479 return 0;
482 * If this block group has been marked to be cleared for one reason or
483 * another then we can't trust the on disk cache, so just return.
485 spin_lock(&block_group->lock);
486 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
487 spin_unlock(&block_group->lock);
488 return 0;
490 spin_unlock(&block_group->lock);
492 path = btrfs_alloc_path();
493 if (!path)
494 return 0;
496 inode = lookup_free_space_inode(root, block_group, path);
497 if (IS_ERR(inode)) {
498 btrfs_free_path(path);
499 return 0;
502 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
503 path, block_group->key.objectid);
504 btrfs_free_path(path);
505 if (ret <= 0)
506 goto out;
508 spin_lock(&ctl->tree_lock);
509 matched = (ctl->free_space == (block_group->key.offset - used -
510 block_group->bytes_super));
511 spin_unlock(&ctl->tree_lock);
513 if (!matched) {
514 __btrfs_remove_free_space_cache(ctl);
515 printk(KERN_ERR "block group %llu has an wrong amount of free "
516 "space\n", block_group->key.objectid);
517 ret = -1;
519 out:
520 if (ret < 0) {
521 /* This cache is bogus, make sure it gets cleared */
522 spin_lock(&block_group->lock);
523 block_group->disk_cache_state = BTRFS_DC_CLEAR;
524 spin_unlock(&block_group->lock);
525 ret = 0;
527 printk(KERN_ERR "btrfs: failed to load free space cache "
528 "for block group %llu\n", block_group->key.objectid);
531 iput(inode);
532 return ret;
535 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
536 struct btrfs_free_space_ctl *ctl,
537 struct btrfs_block_group_cache *block_group,
538 struct btrfs_trans_handle *trans,
539 struct btrfs_path *path, u64 offset)
541 struct btrfs_free_space_header *header;
542 struct extent_buffer *leaf;
543 struct rb_node *node;
544 struct list_head *pos, *n;
545 struct page **pages;
546 struct page *page;
547 struct extent_state *cached_state = NULL;
548 struct btrfs_free_cluster *cluster = NULL;
549 struct extent_io_tree *unpin = NULL;
550 struct list_head bitmap_list;
551 struct btrfs_key key;
552 u64 start, end, len;
553 u64 bytes = 0;
554 u32 crc = ~(u32)0;
555 int index = 0, num_pages = 0;
556 int entries = 0;
557 int bitmaps = 0;
558 int ret = -1;
559 bool next_page = false;
560 bool out_of_space = false;
562 INIT_LIST_HEAD(&bitmap_list);
564 node = rb_first(&ctl->free_space_offset);
565 if (!node)
566 return 0;
568 if (!i_size_read(inode))
569 return -1;
571 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
572 PAGE_CACHE_SHIFT;
574 filemap_write_and_wait(inode->i_mapping);
575 btrfs_wait_ordered_range(inode, inode->i_size &
576 ~(root->sectorsize - 1), (u64)-1);
578 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
579 if (!pages)
580 return -1;
582 /* Get the cluster for this block_group if it exists */
583 if (block_group && !list_empty(&block_group->cluster_list))
584 cluster = list_entry(block_group->cluster_list.next,
585 struct btrfs_free_cluster,
586 block_group_list);
589 * We shouldn't have switched the pinned extents yet so this is the
590 * right one
592 unpin = root->fs_info->pinned_extents;
595 * Lock all pages first so we can lock the extent safely.
597 * NOTE: Because we hold the ref the entire time we're going to write to
598 * the page find_get_page should never fail, so we don't do a check
599 * after find_get_page at this point. Just putting this here so people
600 * know and don't freak out.
602 while (index < num_pages) {
603 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
604 if (!page) {
605 int i;
607 for (i = 0; i < num_pages; i++) {
608 unlock_page(pages[i]);
609 page_cache_release(pages[i]);
611 goto out;
613 pages[index] = page;
614 index++;
617 index = 0;
618 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
619 0, &cached_state, GFP_NOFS);
622 * When searching for pinned extents, we need to start at our start
623 * offset.
625 if (block_group)
626 start = block_group->key.objectid;
628 /* Write out the extent entries */
629 do {
630 struct btrfs_free_space_entry *entry;
631 void *addr, *orig;
632 unsigned long offset = 0;
634 next_page = false;
636 if (index >= num_pages) {
637 out_of_space = true;
638 break;
641 page = pages[index];
643 orig = addr = kmap(page);
644 if (index == 0) {
645 u64 *gen;
648 * We're going to put in a bogus crc for this page to
649 * make sure that old kernels who aren't aware of this
650 * format will be sure to discard the cache.
652 addr += sizeof(u64);
653 offset += sizeof(u64);
655 gen = addr;
656 *gen = trans->transid;
657 addr += sizeof(u64);
658 offset += sizeof(u64);
660 entry = addr;
662 memset(addr, 0, PAGE_CACHE_SIZE - offset);
663 while (node && !next_page) {
664 struct btrfs_free_space *e;
666 e = rb_entry(node, struct btrfs_free_space, offset_index);
667 entries++;
669 entry->offset = cpu_to_le64(e->offset);
670 entry->bytes = cpu_to_le64(e->bytes);
671 if (e->bitmap) {
672 entry->type = BTRFS_FREE_SPACE_BITMAP;
673 list_add_tail(&e->list, &bitmap_list);
674 bitmaps++;
675 } else {
676 entry->type = BTRFS_FREE_SPACE_EXTENT;
678 node = rb_next(node);
679 if (!node && cluster) {
680 node = rb_first(&cluster->root);
681 cluster = NULL;
683 offset += sizeof(struct btrfs_free_space_entry);
684 if (offset + sizeof(struct btrfs_free_space_entry) >=
685 PAGE_CACHE_SIZE)
686 next_page = true;
687 entry++;
691 * We want to add any pinned extents to our free space cache
692 * so we don't leak the space
694 while (block_group && !next_page &&
695 (start < block_group->key.objectid +
696 block_group->key.offset)) {
697 ret = find_first_extent_bit(unpin, start, &start, &end,
698 EXTENT_DIRTY);
699 if (ret) {
700 ret = 0;
701 break;
704 /* This pinned extent is out of our range */
705 if (start >= block_group->key.objectid +
706 block_group->key.offset)
707 break;
709 len = block_group->key.objectid +
710 block_group->key.offset - start;
711 len = min(len, end + 1 - start);
713 entries++;
714 entry->offset = cpu_to_le64(start);
715 entry->bytes = cpu_to_le64(len);
716 entry->type = BTRFS_FREE_SPACE_EXTENT;
718 start = end + 1;
719 offset += sizeof(struct btrfs_free_space_entry);
720 if (offset + sizeof(struct btrfs_free_space_entry) >=
721 PAGE_CACHE_SIZE)
722 next_page = true;
723 entry++;
726 /* Generate bogus crc value */
727 if (index == 0) {
728 u32 *tmp;
729 crc = btrfs_csum_data(root, orig + sizeof(u64), crc,
730 PAGE_CACHE_SIZE - sizeof(u64));
731 btrfs_csum_final(crc, (char *)&crc);
732 crc++;
733 tmp = orig;
734 *tmp = crc;
737 kunmap(page);
739 bytes += PAGE_CACHE_SIZE;
741 index++;
742 } while (node || next_page);
744 /* Write out the bitmaps */
745 list_for_each_safe(pos, n, &bitmap_list) {
746 void *addr;
747 struct btrfs_free_space *entry =
748 list_entry(pos, struct btrfs_free_space, list);
750 if (index >= num_pages) {
751 out_of_space = true;
752 break;
754 page = pages[index];
756 addr = kmap(page);
757 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
758 kunmap(page);
759 bytes += PAGE_CACHE_SIZE;
761 list_del_init(&entry->list);
762 index++;
765 if (out_of_space) {
766 btrfs_drop_pages(pages, num_pages);
767 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
768 i_size_read(inode) - 1, &cached_state,
769 GFP_NOFS);
770 ret = 0;
771 goto out;
774 /* Zero out the rest of the pages just to make sure */
775 while (index < num_pages) {
776 void *addr;
778 page = pages[index];
779 addr = kmap(page);
780 memset(addr, 0, PAGE_CACHE_SIZE);
781 kunmap(page);
782 bytes += PAGE_CACHE_SIZE;
783 index++;
786 ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
787 bytes, &cached_state);
788 btrfs_drop_pages(pages, num_pages);
789 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
790 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
792 if (ret) {
793 ret = 0;
794 goto out;
797 BTRFS_I(inode)->generation = trans->transid;
799 filemap_write_and_wait(inode->i_mapping);
801 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
802 key.offset = offset;
803 key.type = 0;
805 ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
806 if (ret < 0) {
807 ret = -1;
808 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
809 EXTENT_DIRTY | EXTENT_DELALLOC |
810 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
811 goto out;
813 leaf = path->nodes[0];
814 if (ret > 0) {
815 struct btrfs_key found_key;
816 BUG_ON(!path->slots[0]);
817 path->slots[0]--;
818 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
819 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
820 found_key.offset != offset) {
821 ret = -1;
822 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
823 EXTENT_DIRTY | EXTENT_DELALLOC |
824 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
825 GFP_NOFS);
826 btrfs_release_path(path);
827 goto out;
830 header = btrfs_item_ptr(leaf, path->slots[0],
831 struct btrfs_free_space_header);
832 btrfs_set_free_space_entries(leaf, header, entries);
833 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
834 btrfs_set_free_space_generation(leaf, header, trans->transid);
835 btrfs_mark_buffer_dirty(leaf);
836 btrfs_release_path(path);
838 ret = 1;
840 out:
841 kfree(pages);
842 if (ret != 1) {
843 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
844 BTRFS_I(inode)->generation = 0;
846 btrfs_update_inode(trans, root, inode);
847 return ret;
850 int btrfs_write_out_cache(struct btrfs_root *root,
851 struct btrfs_trans_handle *trans,
852 struct btrfs_block_group_cache *block_group,
853 struct btrfs_path *path)
855 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
856 struct inode *inode;
857 int ret = 0;
859 root = root->fs_info->tree_root;
861 spin_lock(&block_group->lock);
862 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
863 spin_unlock(&block_group->lock);
864 return 0;
866 spin_unlock(&block_group->lock);
868 inode = lookup_free_space_inode(root, block_group, path);
869 if (IS_ERR(inode))
870 return 0;
872 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
873 path, block_group->key.objectid);
874 if (ret < 0) {
875 spin_lock(&block_group->lock);
876 block_group->disk_cache_state = BTRFS_DC_ERROR;
877 spin_unlock(&block_group->lock);
878 ret = 0;
880 printk(KERN_ERR "btrfs: failed to write free space cace "
881 "for block group %llu\n", block_group->key.objectid);
884 iput(inode);
885 return ret;
888 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
889 u64 offset)
891 BUG_ON(offset < bitmap_start);
892 offset -= bitmap_start;
893 return (unsigned long)(div_u64(offset, unit));
896 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
898 return (unsigned long)(div_u64(bytes, unit));
901 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
902 u64 offset)
904 u64 bitmap_start;
905 u64 bytes_per_bitmap;
907 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
908 bitmap_start = offset - ctl->start;
909 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
910 bitmap_start *= bytes_per_bitmap;
911 bitmap_start += ctl->start;
913 return bitmap_start;
916 static int tree_insert_offset(struct rb_root *root, u64 offset,
917 struct rb_node *node, int bitmap)
919 struct rb_node **p = &root->rb_node;
920 struct rb_node *parent = NULL;
921 struct btrfs_free_space *info;
923 while (*p) {
924 parent = *p;
925 info = rb_entry(parent, struct btrfs_free_space, offset_index);
927 if (offset < info->offset) {
928 p = &(*p)->rb_left;
929 } else if (offset > info->offset) {
930 p = &(*p)->rb_right;
931 } else {
933 * we could have a bitmap entry and an extent entry
934 * share the same offset. If this is the case, we want
935 * the extent entry to always be found first if we do a
936 * linear search through the tree, since we want to have
937 * the quickest allocation time, and allocating from an
938 * extent is faster than allocating from a bitmap. So
939 * if we're inserting a bitmap and we find an entry at
940 * this offset, we want to go right, or after this entry
941 * logically. If we are inserting an extent and we've
942 * found a bitmap, we want to go left, or before
943 * logically.
945 if (bitmap) {
946 if (info->bitmap) {
947 WARN_ON_ONCE(1);
948 return -EEXIST;
950 p = &(*p)->rb_right;
951 } else {
952 if (!info->bitmap) {
953 WARN_ON_ONCE(1);
954 return -EEXIST;
956 p = &(*p)->rb_left;
961 rb_link_node(node, parent, p);
962 rb_insert_color(node, root);
964 return 0;
968 * searches the tree for the given offset.
970 * fuzzy - If this is set, then we are trying to make an allocation, and we just
971 * want a section that has at least bytes size and comes at or after the given
972 * offset.
974 static struct btrfs_free_space *
975 tree_search_offset(struct btrfs_free_space_ctl *ctl,
976 u64 offset, int bitmap_only, int fuzzy)
978 struct rb_node *n = ctl->free_space_offset.rb_node;
979 struct btrfs_free_space *entry, *prev = NULL;
981 /* find entry that is closest to the 'offset' */
982 while (1) {
983 if (!n) {
984 entry = NULL;
985 break;
988 entry = rb_entry(n, struct btrfs_free_space, offset_index);
989 prev = entry;
991 if (offset < entry->offset)
992 n = n->rb_left;
993 else if (offset > entry->offset)
994 n = n->rb_right;
995 else
996 break;
999 if (bitmap_only) {
1000 if (!entry)
1001 return NULL;
1002 if (entry->bitmap)
1003 return entry;
1006 * bitmap entry and extent entry may share same offset,
1007 * in that case, bitmap entry comes after extent entry.
1009 n = rb_next(n);
1010 if (!n)
1011 return NULL;
1012 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1013 if (entry->offset != offset)
1014 return NULL;
1016 WARN_ON(!entry->bitmap);
1017 return entry;
1018 } else if (entry) {
1019 if (entry->bitmap) {
1021 * if previous extent entry covers the offset,
1022 * we should return it instead of the bitmap entry
1024 n = &entry->offset_index;
1025 while (1) {
1026 n = rb_prev(n);
1027 if (!n)
1028 break;
1029 prev = rb_entry(n, struct btrfs_free_space,
1030 offset_index);
1031 if (!prev->bitmap) {
1032 if (prev->offset + prev->bytes > offset)
1033 entry = prev;
1034 break;
1038 return entry;
1041 if (!prev)
1042 return NULL;
1044 /* find last entry before the 'offset' */
1045 entry = prev;
1046 if (entry->offset > offset) {
1047 n = rb_prev(&entry->offset_index);
1048 if (n) {
1049 entry = rb_entry(n, struct btrfs_free_space,
1050 offset_index);
1051 BUG_ON(entry->offset > offset);
1052 } else {
1053 if (fuzzy)
1054 return entry;
1055 else
1056 return NULL;
1060 if (entry->bitmap) {
1061 n = &entry->offset_index;
1062 while (1) {
1063 n = rb_prev(n);
1064 if (!n)
1065 break;
1066 prev = rb_entry(n, struct btrfs_free_space,
1067 offset_index);
1068 if (!prev->bitmap) {
1069 if (prev->offset + prev->bytes > offset)
1070 return prev;
1071 break;
1074 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1075 return entry;
1076 } else if (entry->offset + entry->bytes > offset)
1077 return entry;
1079 if (!fuzzy)
1080 return NULL;
1082 while (1) {
1083 if (entry->bitmap) {
1084 if (entry->offset + BITS_PER_BITMAP *
1085 ctl->unit > offset)
1086 break;
1087 } else {
1088 if (entry->offset + entry->bytes > offset)
1089 break;
1092 n = rb_next(&entry->offset_index);
1093 if (!n)
1094 return NULL;
1095 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1097 return entry;
1100 static inline void
1101 __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1102 struct btrfs_free_space *info)
1104 rb_erase(&info->offset_index, &ctl->free_space_offset);
1105 ctl->free_extents--;
1108 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1109 struct btrfs_free_space *info)
1111 __unlink_free_space(ctl, info);
1112 ctl->free_space -= info->bytes;
1115 static int link_free_space(struct btrfs_free_space_ctl *ctl,
1116 struct btrfs_free_space *info)
1118 int ret = 0;
1120 BUG_ON(!info->bitmap && !info->bytes);
1121 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1122 &info->offset_index, (info->bitmap != NULL));
1123 if (ret)
1124 return ret;
1126 ctl->free_space += info->bytes;
1127 ctl->free_extents++;
1128 return ret;
1131 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1133 struct btrfs_block_group_cache *block_group = ctl->private;
1134 u64 max_bytes;
1135 u64 bitmap_bytes;
1136 u64 extent_bytes;
1137 u64 size = block_group->key.offset;
1138 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1139 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1141 BUG_ON(ctl->total_bitmaps > max_bitmaps);
1144 * The goal is to keep the total amount of memory used per 1gb of space
1145 * at or below 32k, so we need to adjust how much memory we allow to be
1146 * used by extent based free space tracking
1148 if (size < 1024 * 1024 * 1024)
1149 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1150 else
1151 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1152 div64_u64(size, 1024 * 1024 * 1024);
1155 * we want to account for 1 more bitmap than what we have so we can make
1156 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1157 * we add more bitmaps.
1159 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1161 if (bitmap_bytes >= max_bytes) {
1162 ctl->extents_thresh = 0;
1163 return;
1167 * we want the extent entry threshold to always be at most 1/2 the maxw
1168 * bytes we can have, or whatever is less than that.
1170 extent_bytes = max_bytes - bitmap_bytes;
1171 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1173 ctl->extents_thresh =
1174 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1177 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1178 struct btrfs_free_space *info,
1179 u64 offset, u64 bytes)
1181 unsigned long start, count;
1183 start = offset_to_bit(info->offset, ctl->unit, offset);
1184 count = bytes_to_bits(bytes, ctl->unit);
1185 BUG_ON(start + count > BITS_PER_BITMAP);
1187 bitmap_clear(info->bitmap, start, count);
1189 info->bytes -= bytes;
1192 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1193 struct btrfs_free_space *info, u64 offset,
1194 u64 bytes)
1196 __bitmap_clear_bits(ctl, info, offset, bytes);
1197 ctl->free_space -= bytes;
1200 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1201 struct btrfs_free_space *info, u64 offset,
1202 u64 bytes)
1204 unsigned long start, count;
1206 start = offset_to_bit(info->offset, ctl->unit, offset);
1207 count = bytes_to_bits(bytes, ctl->unit);
1208 BUG_ON(start + count > BITS_PER_BITMAP);
1210 bitmap_set(info->bitmap, start, count);
1212 info->bytes += bytes;
1213 ctl->free_space += bytes;
1216 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1217 struct btrfs_free_space *bitmap_info, u64 *offset,
1218 u64 *bytes)
1220 unsigned long found_bits = 0;
1221 unsigned long bits, i;
1222 unsigned long next_zero;
1224 i = offset_to_bit(bitmap_info->offset, ctl->unit,
1225 max_t(u64, *offset, bitmap_info->offset));
1226 bits = bytes_to_bits(*bytes, ctl->unit);
1228 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1229 i < BITS_PER_BITMAP;
1230 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1231 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1232 BITS_PER_BITMAP, i);
1233 if ((next_zero - i) >= bits) {
1234 found_bits = next_zero - i;
1235 break;
1237 i = next_zero;
1240 if (found_bits) {
1241 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1242 *bytes = (u64)(found_bits) * ctl->unit;
1243 return 0;
1246 return -1;
1249 static struct btrfs_free_space *
1250 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1252 struct btrfs_free_space *entry;
1253 struct rb_node *node;
1254 int ret;
1256 if (!ctl->free_space_offset.rb_node)
1257 return NULL;
1259 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1260 if (!entry)
1261 return NULL;
1263 for (node = &entry->offset_index; node; node = rb_next(node)) {
1264 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1265 if (entry->bytes < *bytes)
1266 continue;
1268 if (entry->bitmap) {
1269 ret = search_bitmap(ctl, entry, offset, bytes);
1270 if (!ret)
1271 return entry;
1272 continue;
1275 *offset = entry->offset;
1276 *bytes = entry->bytes;
1277 return entry;
1280 return NULL;
1283 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1284 struct btrfs_free_space *info, u64 offset)
1286 info->offset = offset_to_bitmap(ctl, offset);
1287 info->bytes = 0;
1288 link_free_space(ctl, info);
1289 ctl->total_bitmaps++;
1291 ctl->op->recalc_thresholds(ctl);
1294 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1295 struct btrfs_free_space *bitmap_info)
1297 unlink_free_space(ctl, bitmap_info);
1298 kfree(bitmap_info->bitmap);
1299 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1300 ctl->total_bitmaps--;
1301 ctl->op->recalc_thresholds(ctl);
1304 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1305 struct btrfs_free_space *bitmap_info,
1306 u64 *offset, u64 *bytes)
1308 u64 end;
1309 u64 search_start, search_bytes;
1310 int ret;
1312 again:
1313 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1316 * XXX - this can go away after a few releases.
1318 * since the only user of btrfs_remove_free_space is the tree logging
1319 * stuff, and the only way to test that is under crash conditions, we
1320 * want to have this debug stuff here just in case somethings not
1321 * working. Search the bitmap for the space we are trying to use to
1322 * make sure its actually there. If its not there then we need to stop
1323 * because something has gone wrong.
1325 search_start = *offset;
1326 search_bytes = *bytes;
1327 search_bytes = min(search_bytes, end - search_start + 1);
1328 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1329 BUG_ON(ret < 0 || search_start != *offset);
1331 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1332 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1333 *bytes -= end - *offset + 1;
1334 *offset = end + 1;
1335 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1336 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1337 *bytes = 0;
1340 if (*bytes) {
1341 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1342 if (!bitmap_info->bytes)
1343 free_bitmap(ctl, bitmap_info);
1346 * no entry after this bitmap, but we still have bytes to
1347 * remove, so something has gone wrong.
1349 if (!next)
1350 return -EINVAL;
1352 bitmap_info = rb_entry(next, struct btrfs_free_space,
1353 offset_index);
1356 * if the next entry isn't a bitmap we need to return to let the
1357 * extent stuff do its work.
1359 if (!bitmap_info->bitmap)
1360 return -EAGAIN;
1363 * Ok the next item is a bitmap, but it may not actually hold
1364 * the information for the rest of this free space stuff, so
1365 * look for it, and if we don't find it return so we can try
1366 * everything over again.
1368 search_start = *offset;
1369 search_bytes = *bytes;
1370 ret = search_bitmap(ctl, bitmap_info, &search_start,
1371 &search_bytes);
1372 if (ret < 0 || search_start != *offset)
1373 return -EAGAIN;
1375 goto again;
1376 } else if (!bitmap_info->bytes)
1377 free_bitmap(ctl, bitmap_info);
1379 return 0;
1382 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1383 struct btrfs_free_space *info, u64 offset,
1384 u64 bytes)
1386 u64 bytes_to_set = 0;
1387 u64 end;
1389 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1391 bytes_to_set = min(end - offset, bytes);
1393 bitmap_set_bits(ctl, info, offset, bytes_to_set);
1395 return bytes_to_set;
1399 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1400 struct btrfs_free_space *info)
1402 struct btrfs_block_group_cache *block_group = ctl->private;
1405 * If we are below the extents threshold then we can add this as an
1406 * extent, and don't have to deal with the bitmap
1408 if (ctl->free_extents < ctl->extents_thresh) {
1410 * If this block group has some small extents we don't want to
1411 * use up all of our free slots in the cache with them, we want
1412 * to reserve them to larger extents, however if we have plent
1413 * of cache left then go ahead an dadd them, no sense in adding
1414 * the overhead of a bitmap if we don't have to.
1416 if (info->bytes <= block_group->sectorsize * 4) {
1417 if (ctl->free_extents * 2 <= ctl->extents_thresh)
1418 return false;
1419 } else {
1420 return false;
1425 * some block groups are so tiny they can't be enveloped by a bitmap, so
1426 * don't even bother to create a bitmap for this
1428 if (BITS_PER_BITMAP * block_group->sectorsize >
1429 block_group->key.offset)
1430 return false;
1432 return true;
1435 static struct btrfs_free_space_op free_space_op = {
1436 .recalc_thresholds = recalculate_thresholds,
1437 .use_bitmap = use_bitmap,
1440 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1441 struct btrfs_free_space *info)
1443 struct btrfs_free_space *bitmap_info;
1444 struct btrfs_block_group_cache *block_group = NULL;
1445 int added = 0;
1446 u64 bytes, offset, bytes_added;
1447 int ret;
1449 bytes = info->bytes;
1450 offset = info->offset;
1452 if (!ctl->op->use_bitmap(ctl, info))
1453 return 0;
1455 if (ctl->op == &free_space_op)
1456 block_group = ctl->private;
1457 again:
1459 * Since we link bitmaps right into the cluster we need to see if we
1460 * have a cluster here, and if so and it has our bitmap we need to add
1461 * the free space to that bitmap.
1463 if (block_group && !list_empty(&block_group->cluster_list)) {
1464 struct btrfs_free_cluster *cluster;
1465 struct rb_node *node;
1466 struct btrfs_free_space *entry;
1468 cluster = list_entry(block_group->cluster_list.next,
1469 struct btrfs_free_cluster,
1470 block_group_list);
1471 spin_lock(&cluster->lock);
1472 node = rb_first(&cluster->root);
1473 if (!node) {
1474 spin_unlock(&cluster->lock);
1475 goto no_cluster_bitmap;
1478 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1479 if (!entry->bitmap) {
1480 spin_unlock(&cluster->lock);
1481 goto no_cluster_bitmap;
1484 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1485 bytes_added = add_bytes_to_bitmap(ctl, entry,
1486 offset, bytes);
1487 bytes -= bytes_added;
1488 offset += bytes_added;
1490 spin_unlock(&cluster->lock);
1491 if (!bytes) {
1492 ret = 1;
1493 goto out;
1497 no_cluster_bitmap:
1498 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1499 1, 0);
1500 if (!bitmap_info) {
1501 BUG_ON(added);
1502 goto new_bitmap;
1505 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1506 bytes -= bytes_added;
1507 offset += bytes_added;
1508 added = 0;
1510 if (!bytes) {
1511 ret = 1;
1512 goto out;
1513 } else
1514 goto again;
1516 new_bitmap:
1517 if (info && info->bitmap) {
1518 add_new_bitmap(ctl, info, offset);
1519 added = 1;
1520 info = NULL;
1521 goto again;
1522 } else {
1523 spin_unlock(&ctl->tree_lock);
1525 /* no pre-allocated info, allocate a new one */
1526 if (!info) {
1527 info = kmem_cache_zalloc(btrfs_free_space_cachep,
1528 GFP_NOFS);
1529 if (!info) {
1530 spin_lock(&ctl->tree_lock);
1531 ret = -ENOMEM;
1532 goto out;
1536 /* allocate the bitmap */
1537 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1538 spin_lock(&ctl->tree_lock);
1539 if (!info->bitmap) {
1540 ret = -ENOMEM;
1541 goto out;
1543 goto again;
1546 out:
1547 if (info) {
1548 if (info->bitmap)
1549 kfree(info->bitmap);
1550 kmem_cache_free(btrfs_free_space_cachep, info);
1553 return ret;
1556 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1557 struct btrfs_free_space *info, bool update_stat)
1559 struct btrfs_free_space *left_info;
1560 struct btrfs_free_space *right_info;
1561 bool merged = false;
1562 u64 offset = info->offset;
1563 u64 bytes = info->bytes;
1566 * first we want to see if there is free space adjacent to the range we
1567 * are adding, if there is remove that struct and add a new one to
1568 * cover the entire range
1570 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1571 if (right_info && rb_prev(&right_info->offset_index))
1572 left_info = rb_entry(rb_prev(&right_info->offset_index),
1573 struct btrfs_free_space, offset_index);
1574 else
1575 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1577 if (right_info && !right_info->bitmap) {
1578 if (update_stat)
1579 unlink_free_space(ctl, right_info);
1580 else
1581 __unlink_free_space(ctl, right_info);
1582 info->bytes += right_info->bytes;
1583 kmem_cache_free(btrfs_free_space_cachep, right_info);
1584 merged = true;
1587 if (left_info && !left_info->bitmap &&
1588 left_info->offset + left_info->bytes == offset) {
1589 if (update_stat)
1590 unlink_free_space(ctl, left_info);
1591 else
1592 __unlink_free_space(ctl, left_info);
1593 info->offset = left_info->offset;
1594 info->bytes += left_info->bytes;
1595 kmem_cache_free(btrfs_free_space_cachep, left_info);
1596 merged = true;
1599 return merged;
1602 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1603 u64 offset, u64 bytes)
1605 struct btrfs_free_space *info;
1606 int ret = 0;
1608 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1609 if (!info)
1610 return -ENOMEM;
1612 info->offset = offset;
1613 info->bytes = bytes;
1615 spin_lock(&ctl->tree_lock);
1617 if (try_merge_free_space(ctl, info, true))
1618 goto link;
1621 * There was no extent directly to the left or right of this new
1622 * extent then we know we're going to have to allocate a new extent, so
1623 * before we do that see if we need to drop this into a bitmap
1625 ret = insert_into_bitmap(ctl, info);
1626 if (ret < 0) {
1627 goto out;
1628 } else if (ret) {
1629 ret = 0;
1630 goto out;
1632 link:
1633 ret = link_free_space(ctl, info);
1634 if (ret)
1635 kmem_cache_free(btrfs_free_space_cachep, info);
1636 out:
1637 spin_unlock(&ctl->tree_lock);
1639 if (ret) {
1640 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1641 BUG_ON(ret == -EEXIST);
1644 return ret;
1647 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1648 u64 offset, u64 bytes)
1650 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1651 struct btrfs_free_space *info;
1652 struct btrfs_free_space *next_info = NULL;
1653 int ret = 0;
1655 spin_lock(&ctl->tree_lock);
1657 again:
1658 info = tree_search_offset(ctl, offset, 0, 0);
1659 if (!info) {
1661 * oops didn't find an extent that matched the space we wanted
1662 * to remove, look for a bitmap instead
1664 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1665 1, 0);
1666 if (!info) {
1667 WARN_ON(1);
1668 goto out_lock;
1672 if (info->bytes < bytes && rb_next(&info->offset_index)) {
1673 u64 end;
1674 next_info = rb_entry(rb_next(&info->offset_index),
1675 struct btrfs_free_space,
1676 offset_index);
1678 if (next_info->bitmap)
1679 end = next_info->offset +
1680 BITS_PER_BITMAP * ctl->unit - 1;
1681 else
1682 end = next_info->offset + next_info->bytes;
1684 if (next_info->bytes < bytes ||
1685 next_info->offset > offset || offset > end) {
1686 printk(KERN_CRIT "Found free space at %llu, size %llu,"
1687 " trying to use %llu\n",
1688 (unsigned long long)info->offset,
1689 (unsigned long long)info->bytes,
1690 (unsigned long long)bytes);
1691 WARN_ON(1);
1692 ret = -EINVAL;
1693 goto out_lock;
1696 info = next_info;
1699 if (info->bytes == bytes) {
1700 unlink_free_space(ctl, info);
1701 if (info->bitmap) {
1702 kfree(info->bitmap);
1703 ctl->total_bitmaps--;
1705 kmem_cache_free(btrfs_free_space_cachep, info);
1706 goto out_lock;
1709 if (!info->bitmap && info->offset == offset) {
1710 unlink_free_space(ctl, info);
1711 info->offset += bytes;
1712 info->bytes -= bytes;
1713 link_free_space(ctl, info);
1714 goto out_lock;
1717 if (!info->bitmap && info->offset <= offset &&
1718 info->offset + info->bytes >= offset + bytes) {
1719 u64 old_start = info->offset;
1721 * we're freeing space in the middle of the info,
1722 * this can happen during tree log replay
1724 * first unlink the old info and then
1725 * insert it again after the hole we're creating
1727 unlink_free_space(ctl, info);
1728 if (offset + bytes < info->offset + info->bytes) {
1729 u64 old_end = info->offset + info->bytes;
1731 info->offset = offset + bytes;
1732 info->bytes = old_end - info->offset;
1733 ret = link_free_space(ctl, info);
1734 WARN_ON(ret);
1735 if (ret)
1736 goto out_lock;
1737 } else {
1738 /* the hole we're creating ends at the end
1739 * of the info struct, just free the info
1741 kmem_cache_free(btrfs_free_space_cachep, info);
1743 spin_unlock(&ctl->tree_lock);
1745 /* step two, insert a new info struct to cover
1746 * anything before the hole
1748 ret = btrfs_add_free_space(block_group, old_start,
1749 offset - old_start);
1750 WARN_ON(ret);
1751 goto out;
1754 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1755 if (ret == -EAGAIN)
1756 goto again;
1757 BUG_ON(ret);
1758 out_lock:
1759 spin_unlock(&ctl->tree_lock);
1760 out:
1761 return ret;
1764 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1765 u64 bytes)
1767 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1768 struct btrfs_free_space *info;
1769 struct rb_node *n;
1770 int count = 0;
1772 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1773 info = rb_entry(n, struct btrfs_free_space, offset_index);
1774 if (info->bytes >= bytes)
1775 count++;
1776 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1777 (unsigned long long)info->offset,
1778 (unsigned long long)info->bytes,
1779 (info->bitmap) ? "yes" : "no");
1781 printk(KERN_INFO "block group has cluster?: %s\n",
1782 list_empty(&block_group->cluster_list) ? "no" : "yes");
1783 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1784 "\n", count);
1787 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1789 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1791 spin_lock_init(&ctl->tree_lock);
1792 ctl->unit = block_group->sectorsize;
1793 ctl->start = block_group->key.objectid;
1794 ctl->private = block_group;
1795 ctl->op = &free_space_op;
1798 * we only want to have 32k of ram per block group for keeping
1799 * track of free space, and if we pass 1/2 of that we want to
1800 * start converting things over to using bitmaps
1802 ctl->extents_thresh = ((1024 * 32) / 2) /
1803 sizeof(struct btrfs_free_space);
1807 * for a given cluster, put all of its extents back into the free
1808 * space cache. If the block group passed doesn't match the block group
1809 * pointed to by the cluster, someone else raced in and freed the
1810 * cluster already. In that case, we just return without changing anything
1812 static int
1813 __btrfs_return_cluster_to_free_space(
1814 struct btrfs_block_group_cache *block_group,
1815 struct btrfs_free_cluster *cluster)
1817 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1818 struct btrfs_free_space *entry;
1819 struct rb_node *node;
1821 spin_lock(&cluster->lock);
1822 if (cluster->block_group != block_group)
1823 goto out;
1825 cluster->block_group = NULL;
1826 cluster->window_start = 0;
1827 list_del_init(&cluster->block_group_list);
1829 node = rb_first(&cluster->root);
1830 while (node) {
1831 bool bitmap;
1833 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1834 node = rb_next(&entry->offset_index);
1835 rb_erase(&entry->offset_index, &cluster->root);
1837 bitmap = (entry->bitmap != NULL);
1838 if (!bitmap)
1839 try_merge_free_space(ctl, entry, false);
1840 tree_insert_offset(&ctl->free_space_offset,
1841 entry->offset, &entry->offset_index, bitmap);
1843 cluster->root = RB_ROOT;
1845 out:
1846 spin_unlock(&cluster->lock);
1847 btrfs_put_block_group(block_group);
1848 return 0;
1851 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
1853 struct btrfs_free_space *info;
1854 struct rb_node *node;
1856 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
1857 info = rb_entry(node, struct btrfs_free_space, offset_index);
1858 if (!info->bitmap) {
1859 unlink_free_space(ctl, info);
1860 kmem_cache_free(btrfs_free_space_cachep, info);
1861 } else {
1862 free_bitmap(ctl, info);
1864 if (need_resched()) {
1865 spin_unlock(&ctl->tree_lock);
1866 cond_resched();
1867 spin_lock(&ctl->tree_lock);
1872 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
1874 spin_lock(&ctl->tree_lock);
1875 __btrfs_remove_free_space_cache_locked(ctl);
1876 spin_unlock(&ctl->tree_lock);
1879 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1881 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1882 struct btrfs_free_cluster *cluster;
1883 struct list_head *head;
1885 spin_lock(&ctl->tree_lock);
1886 while ((head = block_group->cluster_list.next) !=
1887 &block_group->cluster_list) {
1888 cluster = list_entry(head, struct btrfs_free_cluster,
1889 block_group_list);
1891 WARN_ON(cluster->block_group != block_group);
1892 __btrfs_return_cluster_to_free_space(block_group, cluster);
1893 if (need_resched()) {
1894 spin_unlock(&ctl->tree_lock);
1895 cond_resched();
1896 spin_lock(&ctl->tree_lock);
1899 __btrfs_remove_free_space_cache_locked(ctl);
1900 spin_unlock(&ctl->tree_lock);
1904 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1905 u64 offset, u64 bytes, u64 empty_size)
1907 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1908 struct btrfs_free_space *entry = NULL;
1909 u64 bytes_search = bytes + empty_size;
1910 u64 ret = 0;
1912 spin_lock(&ctl->tree_lock);
1913 entry = find_free_space(ctl, &offset, &bytes_search);
1914 if (!entry)
1915 goto out;
1917 ret = offset;
1918 if (entry->bitmap) {
1919 bitmap_clear_bits(ctl, entry, offset, bytes);
1920 if (!entry->bytes)
1921 free_bitmap(ctl, entry);
1922 } else {
1923 unlink_free_space(ctl, entry);
1924 entry->offset += bytes;
1925 entry->bytes -= bytes;
1926 if (!entry->bytes)
1927 kmem_cache_free(btrfs_free_space_cachep, entry);
1928 else
1929 link_free_space(ctl, entry);
1932 out:
1933 spin_unlock(&ctl->tree_lock);
1935 return ret;
1939 * given a cluster, put all of its extents back into the free space
1940 * cache. If a block group is passed, this function will only free
1941 * a cluster that belongs to the passed block group.
1943 * Otherwise, it'll get a reference on the block group pointed to by the
1944 * cluster and remove the cluster from it.
1946 int btrfs_return_cluster_to_free_space(
1947 struct btrfs_block_group_cache *block_group,
1948 struct btrfs_free_cluster *cluster)
1950 struct btrfs_free_space_ctl *ctl;
1951 int ret;
1953 /* first, get a safe pointer to the block group */
1954 spin_lock(&cluster->lock);
1955 if (!block_group) {
1956 block_group = cluster->block_group;
1957 if (!block_group) {
1958 spin_unlock(&cluster->lock);
1959 return 0;
1961 } else if (cluster->block_group != block_group) {
1962 /* someone else has already freed it don't redo their work */
1963 spin_unlock(&cluster->lock);
1964 return 0;
1966 atomic_inc(&block_group->count);
1967 spin_unlock(&cluster->lock);
1969 ctl = block_group->free_space_ctl;
1971 /* now return any extents the cluster had on it */
1972 spin_lock(&ctl->tree_lock);
1973 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
1974 spin_unlock(&ctl->tree_lock);
1976 /* finally drop our ref */
1977 btrfs_put_block_group(block_group);
1978 return ret;
1981 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1982 struct btrfs_free_cluster *cluster,
1983 struct btrfs_free_space *entry,
1984 u64 bytes, u64 min_start)
1986 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1987 int err;
1988 u64 search_start = cluster->window_start;
1989 u64 search_bytes = bytes;
1990 u64 ret = 0;
1992 search_start = min_start;
1993 search_bytes = bytes;
1995 err = search_bitmap(ctl, entry, &search_start, &search_bytes);
1996 if (err)
1997 return 0;
1999 ret = search_start;
2000 __bitmap_clear_bits(ctl, entry, ret, bytes);
2002 return ret;
2006 * given a cluster, try to allocate 'bytes' from it, returns 0
2007 * if it couldn't find anything suitably large, or a logical disk offset
2008 * if things worked out
2010 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2011 struct btrfs_free_cluster *cluster, u64 bytes,
2012 u64 min_start)
2014 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2015 struct btrfs_free_space *entry = NULL;
2016 struct rb_node *node;
2017 u64 ret = 0;
2019 spin_lock(&cluster->lock);
2020 if (bytes > cluster->max_size)
2021 goto out;
2023 if (cluster->block_group != block_group)
2024 goto out;
2026 node = rb_first(&cluster->root);
2027 if (!node)
2028 goto out;
2030 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2031 while(1) {
2032 if (entry->bytes < bytes ||
2033 (!entry->bitmap && entry->offset < min_start)) {
2034 node = rb_next(&entry->offset_index);
2035 if (!node)
2036 break;
2037 entry = rb_entry(node, struct btrfs_free_space,
2038 offset_index);
2039 continue;
2042 if (entry->bitmap) {
2043 ret = btrfs_alloc_from_bitmap(block_group,
2044 cluster, entry, bytes,
2045 min_start);
2046 if (ret == 0) {
2047 node = rb_next(&entry->offset_index);
2048 if (!node)
2049 break;
2050 entry = rb_entry(node, struct btrfs_free_space,
2051 offset_index);
2052 continue;
2054 } else {
2055 ret = entry->offset;
2057 entry->offset += bytes;
2058 entry->bytes -= bytes;
2061 if (entry->bytes == 0)
2062 rb_erase(&entry->offset_index, &cluster->root);
2063 break;
2065 out:
2066 spin_unlock(&cluster->lock);
2068 if (!ret)
2069 return 0;
2071 spin_lock(&ctl->tree_lock);
2073 ctl->free_space -= bytes;
2074 if (entry->bytes == 0) {
2075 ctl->free_extents--;
2076 if (entry->bitmap) {
2077 kfree(entry->bitmap);
2078 ctl->total_bitmaps--;
2079 ctl->op->recalc_thresholds(ctl);
2081 kmem_cache_free(btrfs_free_space_cachep, entry);
2084 spin_unlock(&ctl->tree_lock);
2086 return ret;
2089 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2090 struct btrfs_free_space *entry,
2091 struct btrfs_free_cluster *cluster,
2092 u64 offset, u64 bytes, u64 min_bytes)
2094 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2095 unsigned long next_zero;
2096 unsigned long i;
2097 unsigned long search_bits;
2098 unsigned long total_bits;
2099 unsigned long found_bits;
2100 unsigned long start = 0;
2101 unsigned long total_found = 0;
2102 int ret;
2103 bool found = false;
2105 i = offset_to_bit(entry->offset, block_group->sectorsize,
2106 max_t(u64, offset, entry->offset));
2107 search_bits = bytes_to_bits(bytes, block_group->sectorsize);
2108 total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
2110 again:
2111 found_bits = 0;
2112 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2113 i < BITS_PER_BITMAP;
2114 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2115 next_zero = find_next_zero_bit(entry->bitmap,
2116 BITS_PER_BITMAP, i);
2117 if (next_zero - i >= search_bits) {
2118 found_bits = next_zero - i;
2119 break;
2121 i = next_zero;
2124 if (!found_bits)
2125 return -ENOSPC;
2127 if (!found) {
2128 start = i;
2129 found = true;
2132 total_found += found_bits;
2134 if (cluster->max_size < found_bits * block_group->sectorsize)
2135 cluster->max_size = found_bits * block_group->sectorsize;
2137 if (total_found < total_bits) {
2138 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
2139 if (i - start > total_bits * 2) {
2140 total_found = 0;
2141 cluster->max_size = 0;
2142 found = false;
2144 goto again;
2147 cluster->window_start = start * block_group->sectorsize +
2148 entry->offset;
2149 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2150 ret = tree_insert_offset(&cluster->root, entry->offset,
2151 &entry->offset_index, 1);
2152 BUG_ON(ret);
2154 return 0;
2158 * This searches the block group for just extents to fill the cluster with.
2160 static noinline int
2161 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2162 struct btrfs_free_cluster *cluster,
2163 struct list_head *bitmaps, u64 offset, u64 bytes,
2164 u64 min_bytes)
2166 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2167 struct btrfs_free_space *first = NULL;
2168 struct btrfs_free_space *entry = NULL;
2169 struct btrfs_free_space *prev = NULL;
2170 struct btrfs_free_space *last;
2171 struct rb_node *node;
2172 u64 window_start;
2173 u64 window_free;
2174 u64 max_extent;
2175 u64 max_gap = 128 * 1024;
2177 entry = tree_search_offset(ctl, offset, 0, 1);
2178 if (!entry)
2179 return -ENOSPC;
2182 * We don't want bitmaps, so just move along until we find a normal
2183 * extent entry.
2185 while (entry->bitmap) {
2186 if (list_empty(&entry->list))
2187 list_add_tail(&entry->list, bitmaps);
2188 node = rb_next(&entry->offset_index);
2189 if (!node)
2190 return -ENOSPC;
2191 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2194 window_start = entry->offset;
2195 window_free = entry->bytes;
2196 max_extent = entry->bytes;
2197 first = entry;
2198 last = entry;
2199 prev = entry;
2201 while (window_free <= min_bytes) {
2202 node = rb_next(&entry->offset_index);
2203 if (!node)
2204 return -ENOSPC;
2205 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2207 if (entry->bitmap) {
2208 if (list_empty(&entry->list))
2209 list_add_tail(&entry->list, bitmaps);
2210 continue;
2214 * we haven't filled the empty size and the window is
2215 * very large. reset and try again
2217 if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
2218 entry->offset - window_start > (min_bytes * 2)) {
2219 first = entry;
2220 window_start = entry->offset;
2221 window_free = entry->bytes;
2222 last = entry;
2223 max_extent = entry->bytes;
2224 } else {
2225 last = entry;
2226 window_free += entry->bytes;
2227 if (entry->bytes > max_extent)
2228 max_extent = entry->bytes;
2230 prev = entry;
2233 cluster->window_start = first->offset;
2235 node = &first->offset_index;
2238 * now we've found our entries, pull them out of the free space
2239 * cache and put them into the cluster rbtree
2241 do {
2242 int ret;
2244 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2245 node = rb_next(&entry->offset_index);
2246 if (entry->bitmap)
2247 continue;
2249 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2250 ret = tree_insert_offset(&cluster->root, entry->offset,
2251 &entry->offset_index, 0);
2252 BUG_ON(ret);
2253 } while (node && entry != last);
2255 cluster->max_size = max_extent;
2257 return 0;
2261 * This specifically looks for bitmaps that may work in the cluster, we assume
2262 * that we have already failed to find extents that will work.
2264 static noinline int
2265 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2266 struct btrfs_free_cluster *cluster,
2267 struct list_head *bitmaps, u64 offset, u64 bytes,
2268 u64 min_bytes)
2270 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2271 struct btrfs_free_space *entry;
2272 struct rb_node *node;
2273 int ret = -ENOSPC;
2275 if (ctl->total_bitmaps == 0)
2276 return -ENOSPC;
2279 * First check our cached list of bitmaps and see if there is an entry
2280 * here that will work.
2282 list_for_each_entry(entry, bitmaps, list) {
2283 if (entry->bytes < min_bytes)
2284 continue;
2285 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2286 bytes, min_bytes);
2287 if (!ret)
2288 return 0;
2292 * If we do have entries on our list and we are here then we didn't find
2293 * anything, so go ahead and get the next entry after the last entry in
2294 * this list and start the search from there.
2296 if (!list_empty(bitmaps)) {
2297 entry = list_entry(bitmaps->prev, struct btrfs_free_space,
2298 list);
2299 node = rb_next(&entry->offset_index);
2300 if (!node)
2301 return -ENOSPC;
2302 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2303 goto search;
2306 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2307 if (!entry)
2308 return -ENOSPC;
2310 search:
2311 node = &entry->offset_index;
2312 do {
2313 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2314 node = rb_next(&entry->offset_index);
2315 if (!entry->bitmap)
2316 continue;
2317 if (entry->bytes < min_bytes)
2318 continue;
2319 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2320 bytes, min_bytes);
2321 } while (ret && node);
2323 return ret;
2327 * here we try to find a cluster of blocks in a block group. The goal
2328 * is to find at least bytes free and up to empty_size + bytes free.
2329 * We might not find them all in one contiguous area.
2331 * returns zero and sets up cluster if things worked out, otherwise
2332 * it returns -enospc
2334 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2335 struct btrfs_root *root,
2336 struct btrfs_block_group_cache *block_group,
2337 struct btrfs_free_cluster *cluster,
2338 u64 offset, u64 bytes, u64 empty_size)
2340 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2341 struct list_head bitmaps;
2342 struct btrfs_free_space *entry, *tmp;
2343 u64 min_bytes;
2344 int ret;
2346 /* for metadata, allow allocates with more holes */
2347 if (btrfs_test_opt(root, SSD_SPREAD)) {
2348 min_bytes = bytes + empty_size;
2349 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2351 * we want to do larger allocations when we are
2352 * flushing out the delayed refs, it helps prevent
2353 * making more work as we go along.
2355 if (trans->transaction->delayed_refs.flushing)
2356 min_bytes = max(bytes, (bytes + empty_size) >> 1);
2357 else
2358 min_bytes = max(bytes, (bytes + empty_size) >> 4);
2359 } else
2360 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2362 spin_lock(&ctl->tree_lock);
2365 * If we know we don't have enough space to make a cluster don't even
2366 * bother doing all the work to try and find one.
2368 if (ctl->free_space < min_bytes) {
2369 spin_unlock(&ctl->tree_lock);
2370 return -ENOSPC;
2373 spin_lock(&cluster->lock);
2375 /* someone already found a cluster, hooray */
2376 if (cluster->block_group) {
2377 ret = 0;
2378 goto out;
2381 INIT_LIST_HEAD(&bitmaps);
2382 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2383 bytes, min_bytes);
2384 if (ret)
2385 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2386 offset, bytes, min_bytes);
2388 /* Clear our temporary list */
2389 list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2390 list_del_init(&entry->list);
2392 if (!ret) {
2393 atomic_inc(&block_group->count);
2394 list_add_tail(&cluster->block_group_list,
2395 &block_group->cluster_list);
2396 cluster->block_group = block_group;
2398 out:
2399 spin_unlock(&cluster->lock);
2400 spin_unlock(&ctl->tree_lock);
2402 return ret;
2406 * simple code to zero out a cluster
2408 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2410 spin_lock_init(&cluster->lock);
2411 spin_lock_init(&cluster->refill_lock);
2412 cluster->root = RB_ROOT;
2413 cluster->max_size = 0;
2414 INIT_LIST_HEAD(&cluster->block_group_list);
2415 cluster->block_group = NULL;
2418 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2419 u64 *trimmed, u64 start, u64 end, u64 minlen)
2421 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2422 struct btrfs_free_space *entry = NULL;
2423 struct btrfs_fs_info *fs_info = block_group->fs_info;
2424 u64 bytes = 0;
2425 u64 actually_trimmed;
2426 int ret = 0;
2428 *trimmed = 0;
2430 while (start < end) {
2431 spin_lock(&ctl->tree_lock);
2433 if (ctl->free_space < minlen) {
2434 spin_unlock(&ctl->tree_lock);
2435 break;
2438 entry = tree_search_offset(ctl, start, 0, 1);
2439 if (!entry)
2440 entry = tree_search_offset(ctl,
2441 offset_to_bitmap(ctl, start),
2442 1, 1);
2444 if (!entry || entry->offset >= end) {
2445 spin_unlock(&ctl->tree_lock);
2446 break;
2449 if (entry->bitmap) {
2450 ret = search_bitmap(ctl, entry, &start, &bytes);
2451 if (!ret) {
2452 if (start >= end) {
2453 spin_unlock(&ctl->tree_lock);
2454 break;
2456 bytes = min(bytes, end - start);
2457 bitmap_clear_bits(ctl, entry, start, bytes);
2458 if (entry->bytes == 0)
2459 free_bitmap(ctl, entry);
2460 } else {
2461 start = entry->offset + BITS_PER_BITMAP *
2462 block_group->sectorsize;
2463 spin_unlock(&ctl->tree_lock);
2464 ret = 0;
2465 continue;
2467 } else {
2468 start = entry->offset;
2469 bytes = min(entry->bytes, end - start);
2470 unlink_free_space(ctl, entry);
2471 kmem_cache_free(btrfs_free_space_cachep, entry);
2474 spin_unlock(&ctl->tree_lock);
2476 if (bytes >= minlen) {
2477 struct btrfs_space_info *space_info;
2478 int update = 0;
2480 space_info = block_group->space_info;
2481 spin_lock(&space_info->lock);
2482 spin_lock(&block_group->lock);
2483 if (!block_group->ro) {
2484 block_group->reserved += bytes;
2485 space_info->bytes_reserved += bytes;
2486 update = 1;
2488 spin_unlock(&block_group->lock);
2489 spin_unlock(&space_info->lock);
2491 ret = btrfs_error_discard_extent(fs_info->extent_root,
2492 start,
2493 bytes,
2494 &actually_trimmed);
2496 btrfs_add_free_space(block_group, start, bytes);
2497 if (update) {
2498 spin_lock(&space_info->lock);
2499 spin_lock(&block_group->lock);
2500 if (block_group->ro)
2501 space_info->bytes_readonly += bytes;
2502 block_group->reserved -= bytes;
2503 space_info->bytes_reserved -= bytes;
2504 spin_unlock(&space_info->lock);
2505 spin_unlock(&block_group->lock);
2508 if (ret)
2509 break;
2510 *trimmed += actually_trimmed;
2512 start += bytes;
2513 bytes = 0;
2515 if (fatal_signal_pending(current)) {
2516 ret = -ERESTARTSYS;
2517 break;
2520 cond_resched();
2523 return ret;
2527 * Find the left-most item in the cache tree, and then return the
2528 * smallest inode number in the item.
2530 * Note: the returned inode number may not be the smallest one in
2531 * the tree, if the left-most item is a bitmap.
2533 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2535 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2536 struct btrfs_free_space *entry = NULL;
2537 u64 ino = 0;
2539 spin_lock(&ctl->tree_lock);
2541 if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2542 goto out;
2544 entry = rb_entry(rb_first(&ctl->free_space_offset),
2545 struct btrfs_free_space, offset_index);
2547 if (!entry->bitmap) {
2548 ino = entry->offset;
2550 unlink_free_space(ctl, entry);
2551 entry->offset++;
2552 entry->bytes--;
2553 if (!entry->bytes)
2554 kmem_cache_free(btrfs_free_space_cachep, entry);
2555 else
2556 link_free_space(ctl, entry);
2557 } else {
2558 u64 offset = 0;
2559 u64 count = 1;
2560 int ret;
2562 ret = search_bitmap(ctl, entry, &offset, &count);
2563 BUG_ON(ret);
2565 ino = offset;
2566 bitmap_clear_bits(ctl, entry, offset, 1);
2567 if (entry->bytes == 0)
2568 free_bitmap(ctl, entry);
2570 out:
2571 spin_unlock(&ctl->tree_lock);
2573 return ino;
2576 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2577 struct btrfs_path *path)
2579 struct inode *inode = NULL;
2581 spin_lock(&root->cache_lock);
2582 if (root->cache_inode)
2583 inode = igrab(root->cache_inode);
2584 spin_unlock(&root->cache_lock);
2585 if (inode)
2586 return inode;
2588 inode = __lookup_free_space_inode(root, path, 0);
2589 if (IS_ERR(inode))
2590 return inode;
2592 spin_lock(&root->cache_lock);
2593 if (!btrfs_fs_closing(root->fs_info))
2594 root->cache_inode = igrab(inode);
2595 spin_unlock(&root->cache_lock);
2597 return inode;
2600 int create_free_ino_inode(struct btrfs_root *root,
2601 struct btrfs_trans_handle *trans,
2602 struct btrfs_path *path)
2604 return __create_free_space_inode(root, trans, path,
2605 BTRFS_FREE_INO_OBJECTID, 0);
2608 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2610 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2611 struct btrfs_path *path;
2612 struct inode *inode;
2613 int ret = 0;
2614 u64 root_gen = btrfs_root_generation(&root->root_item);
2616 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2617 return 0;
2620 * If we're unmounting then just return, since this does a search on the
2621 * normal root and not the commit root and we could deadlock.
2623 if (btrfs_fs_closing(fs_info))
2624 return 0;
2626 path = btrfs_alloc_path();
2627 if (!path)
2628 return 0;
2630 inode = lookup_free_ino_inode(root, path);
2631 if (IS_ERR(inode))
2632 goto out;
2634 if (root_gen != BTRFS_I(inode)->generation)
2635 goto out_put;
2637 ret = __load_free_space_cache(root, inode, ctl, path, 0);
2639 if (ret < 0)
2640 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2641 "root %llu\n", root->root_key.objectid);
2642 out_put:
2643 iput(inode);
2644 out:
2645 btrfs_free_path(path);
2646 return ret;
2649 int btrfs_write_out_ino_cache(struct btrfs_root *root,
2650 struct btrfs_trans_handle *trans,
2651 struct btrfs_path *path)
2653 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2654 struct inode *inode;
2655 int ret;
2657 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2658 return 0;
2660 inode = lookup_free_ino_inode(root, path);
2661 if (IS_ERR(inode))
2662 return 0;
2664 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2665 if (ret < 0)
2666 printk(KERN_ERR "btrfs: failed to write free ino cache "
2667 "for root %llu\n", root->root_key.objectid);
2669 iput(inode);
2670 return ret;