pch_gbe: export a method to set the receive match address
[linux-2.6/cjktty.git] / fs / btrfs / free-space-cache.c
blobe88330d3df52f259b42e3fc5e0f865befd172e20
1 /*
2 * Copyright (C) 2008 Red Hat. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include <linux/ratelimit.h>
24 #include "ctree.h"
25 #include "free-space-cache.h"
26 #include "transaction.h"
27 #include "disk-io.h"
28 #include "extent_io.h"
29 #include "inode-map.h"
31 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
34 static int link_free_space(struct btrfs_free_space_ctl *ctl,
35 struct btrfs_free_space *info);
37 static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38 struct btrfs_path *path,
39 u64 offset)
41 struct btrfs_key key;
42 struct btrfs_key location;
43 struct btrfs_disk_key disk_key;
44 struct btrfs_free_space_header *header;
45 struct extent_buffer *leaf;
46 struct inode *inode = NULL;
47 int ret;
49 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
50 key.offset = offset;
51 key.type = 0;
53 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54 if (ret < 0)
55 return ERR_PTR(ret);
56 if (ret > 0) {
57 btrfs_release_path(path);
58 return ERR_PTR(-ENOENT);
61 leaf = path->nodes[0];
62 header = btrfs_item_ptr(leaf, path->slots[0],
63 struct btrfs_free_space_header);
64 btrfs_free_space_key(leaf, header, &disk_key);
65 btrfs_disk_key_to_cpu(&location, &disk_key);
66 btrfs_release_path(path);
68 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69 if (!inode)
70 return ERR_PTR(-ENOENT);
71 if (IS_ERR(inode))
72 return inode;
73 if (is_bad_inode(inode)) {
74 iput(inode);
75 return ERR_PTR(-ENOENT);
78 inode->i_mapping->flags &= ~__GFP_FS;
80 return inode;
83 struct inode *lookup_free_space_inode(struct btrfs_root *root,
84 struct btrfs_block_group_cache
85 *block_group, struct btrfs_path *path)
87 struct inode *inode = NULL;
88 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
90 spin_lock(&block_group->lock);
91 if (block_group->inode)
92 inode = igrab(block_group->inode);
93 spin_unlock(&block_group->lock);
94 if (inode)
95 return inode;
97 inode = __lookup_free_space_inode(root, path,
98 block_group->key.objectid);
99 if (IS_ERR(inode))
100 return inode;
102 spin_lock(&block_group->lock);
103 if (!((BTRFS_I(inode)->flags & flags) == flags)) {
104 printk(KERN_INFO "Old style space inode found, converting.\n");
105 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
106 BTRFS_INODE_NODATACOW;
107 block_group->disk_cache_state = BTRFS_DC_CLEAR;
110 if (!block_group->iref) {
111 block_group->inode = igrab(inode);
112 block_group->iref = 1;
114 spin_unlock(&block_group->lock);
116 return inode;
119 int __create_free_space_inode(struct btrfs_root *root,
120 struct btrfs_trans_handle *trans,
121 struct btrfs_path *path, u64 ino, u64 offset)
123 struct btrfs_key key;
124 struct btrfs_disk_key disk_key;
125 struct btrfs_free_space_header *header;
126 struct btrfs_inode_item *inode_item;
127 struct extent_buffer *leaf;
128 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
129 int ret;
131 ret = btrfs_insert_empty_inode(trans, root, path, ino);
132 if (ret)
133 return ret;
135 /* We inline crc's for the free disk space cache */
136 if (ino != BTRFS_FREE_INO_OBJECTID)
137 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
139 leaf = path->nodes[0];
140 inode_item = btrfs_item_ptr(leaf, path->slots[0],
141 struct btrfs_inode_item);
142 btrfs_item_key(leaf, &disk_key, path->slots[0]);
143 memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
144 sizeof(*inode_item));
145 btrfs_set_inode_generation(leaf, inode_item, trans->transid);
146 btrfs_set_inode_size(leaf, inode_item, 0);
147 btrfs_set_inode_nbytes(leaf, inode_item, 0);
148 btrfs_set_inode_uid(leaf, inode_item, 0);
149 btrfs_set_inode_gid(leaf, inode_item, 0);
150 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
151 btrfs_set_inode_flags(leaf, inode_item, flags);
152 btrfs_set_inode_nlink(leaf, inode_item, 1);
153 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
154 btrfs_set_inode_block_group(leaf, inode_item, offset);
155 btrfs_mark_buffer_dirty(leaf);
156 btrfs_release_path(path);
158 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
159 key.offset = offset;
160 key.type = 0;
162 ret = btrfs_insert_empty_item(trans, root, path, &key,
163 sizeof(struct btrfs_free_space_header));
164 if (ret < 0) {
165 btrfs_release_path(path);
166 return ret;
168 leaf = path->nodes[0];
169 header = btrfs_item_ptr(leaf, path->slots[0],
170 struct btrfs_free_space_header);
171 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
172 btrfs_set_free_space_key(leaf, header, &disk_key);
173 btrfs_mark_buffer_dirty(leaf);
174 btrfs_release_path(path);
176 return 0;
179 int create_free_space_inode(struct btrfs_root *root,
180 struct btrfs_trans_handle *trans,
181 struct btrfs_block_group_cache *block_group,
182 struct btrfs_path *path)
184 int ret;
185 u64 ino;
187 ret = btrfs_find_free_objectid(root, &ino);
188 if (ret < 0)
189 return ret;
191 return __create_free_space_inode(root, trans, path, ino,
192 block_group->key.objectid);
195 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
196 struct btrfs_trans_handle *trans,
197 struct btrfs_path *path,
198 struct inode *inode)
200 struct btrfs_block_rsv *rsv;
201 u64 needed_bytes;
202 loff_t oldsize;
203 int ret = 0;
205 rsv = trans->block_rsv;
206 trans->block_rsv = &root->fs_info->global_block_rsv;
208 /* 1 for slack space, 1 for updating the inode */
209 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
210 btrfs_calc_trans_metadata_size(root, 1);
212 spin_lock(&trans->block_rsv->lock);
213 if (trans->block_rsv->reserved < needed_bytes) {
214 spin_unlock(&trans->block_rsv->lock);
215 trans->block_rsv = rsv;
216 return -ENOSPC;
218 spin_unlock(&trans->block_rsv->lock);
220 oldsize = i_size_read(inode);
221 btrfs_i_size_write(inode, 0);
222 truncate_pagecache(inode, oldsize, 0);
225 * We don't need an orphan item because truncating the free space cache
226 * will never be split across transactions.
228 ret = btrfs_truncate_inode_items(trans, root, inode,
229 0, BTRFS_EXTENT_DATA_KEY);
231 if (ret) {
232 trans->block_rsv = rsv;
233 btrfs_abort_transaction(trans, root, ret);
234 return ret;
237 ret = btrfs_update_inode(trans, root, inode);
238 if (ret)
239 btrfs_abort_transaction(trans, root, ret);
240 trans->block_rsv = rsv;
242 return ret;
245 static int readahead_cache(struct inode *inode)
247 struct file_ra_state *ra;
248 unsigned long last_index;
250 ra = kzalloc(sizeof(*ra), GFP_NOFS);
251 if (!ra)
252 return -ENOMEM;
254 file_ra_state_init(ra, inode->i_mapping);
255 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
257 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
259 kfree(ra);
261 return 0;
264 struct io_ctl {
265 void *cur, *orig;
266 struct page *page;
267 struct page **pages;
268 struct btrfs_root *root;
269 unsigned long size;
270 int index;
271 int num_pages;
272 unsigned check_crcs:1;
275 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
276 struct btrfs_root *root)
278 memset(io_ctl, 0, sizeof(struct io_ctl));
279 io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
280 PAGE_CACHE_SHIFT;
281 io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
282 GFP_NOFS);
283 if (!io_ctl->pages)
284 return -ENOMEM;
285 io_ctl->root = root;
286 if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
287 io_ctl->check_crcs = 1;
288 return 0;
291 static void io_ctl_free(struct io_ctl *io_ctl)
293 kfree(io_ctl->pages);
296 static void io_ctl_unmap_page(struct io_ctl *io_ctl)
298 if (io_ctl->cur) {
299 kunmap(io_ctl->page);
300 io_ctl->cur = NULL;
301 io_ctl->orig = NULL;
305 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
307 WARN_ON(io_ctl->cur);
308 BUG_ON(io_ctl->index >= io_ctl->num_pages);
309 io_ctl->page = io_ctl->pages[io_ctl->index++];
310 io_ctl->cur = kmap(io_ctl->page);
311 io_ctl->orig = io_ctl->cur;
312 io_ctl->size = PAGE_CACHE_SIZE;
313 if (clear)
314 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
317 static void io_ctl_drop_pages(struct io_ctl *io_ctl)
319 int i;
321 io_ctl_unmap_page(io_ctl);
323 for (i = 0; i < io_ctl->num_pages; i++) {
324 if (io_ctl->pages[i]) {
325 ClearPageChecked(io_ctl->pages[i]);
326 unlock_page(io_ctl->pages[i]);
327 page_cache_release(io_ctl->pages[i]);
332 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
333 int uptodate)
335 struct page *page;
336 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
337 int i;
339 for (i = 0; i < io_ctl->num_pages; i++) {
340 page = find_or_create_page(inode->i_mapping, i, mask);
341 if (!page) {
342 io_ctl_drop_pages(io_ctl);
343 return -ENOMEM;
345 io_ctl->pages[i] = page;
346 if (uptodate && !PageUptodate(page)) {
347 btrfs_readpage(NULL, page);
348 lock_page(page);
349 if (!PageUptodate(page)) {
350 printk(KERN_ERR "btrfs: error reading free "
351 "space cache\n");
352 io_ctl_drop_pages(io_ctl);
353 return -EIO;
358 for (i = 0; i < io_ctl->num_pages; i++) {
359 clear_page_dirty_for_io(io_ctl->pages[i]);
360 set_page_extent_mapped(io_ctl->pages[i]);
363 return 0;
366 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
368 u64 *val;
370 io_ctl_map_page(io_ctl, 1);
373 * Skip the csum areas. If we don't check crcs then we just have a
374 * 64bit chunk at the front of the first page.
376 if (io_ctl->check_crcs) {
377 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
378 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
379 } else {
380 io_ctl->cur += sizeof(u64);
381 io_ctl->size -= sizeof(u64) * 2;
384 val = io_ctl->cur;
385 *val = cpu_to_le64(generation);
386 io_ctl->cur += sizeof(u64);
389 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
391 u64 *gen;
394 * Skip the crc area. If we don't check crcs then we just have a 64bit
395 * chunk at the front of the first page.
397 if (io_ctl->check_crcs) {
398 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
399 io_ctl->size -= sizeof(u64) +
400 (sizeof(u32) * io_ctl->num_pages);
401 } else {
402 io_ctl->cur += sizeof(u64);
403 io_ctl->size -= sizeof(u64) * 2;
406 gen = io_ctl->cur;
407 if (le64_to_cpu(*gen) != generation) {
408 printk_ratelimited(KERN_ERR "btrfs: space cache generation "
409 "(%Lu) does not match inode (%Lu)\n", *gen,
410 generation);
411 io_ctl_unmap_page(io_ctl);
412 return -EIO;
414 io_ctl->cur += sizeof(u64);
415 return 0;
418 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
420 u32 *tmp;
421 u32 crc = ~(u32)0;
422 unsigned offset = 0;
424 if (!io_ctl->check_crcs) {
425 io_ctl_unmap_page(io_ctl);
426 return;
429 if (index == 0)
430 offset = sizeof(u32) * io_ctl->num_pages;
432 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
433 PAGE_CACHE_SIZE - offset);
434 btrfs_csum_final(crc, (char *)&crc);
435 io_ctl_unmap_page(io_ctl);
436 tmp = kmap(io_ctl->pages[0]);
437 tmp += index;
438 *tmp = crc;
439 kunmap(io_ctl->pages[0]);
442 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
444 u32 *tmp, val;
445 u32 crc = ~(u32)0;
446 unsigned offset = 0;
448 if (!io_ctl->check_crcs) {
449 io_ctl_map_page(io_ctl, 0);
450 return 0;
453 if (index == 0)
454 offset = sizeof(u32) * io_ctl->num_pages;
456 tmp = kmap(io_ctl->pages[0]);
457 tmp += index;
458 val = *tmp;
459 kunmap(io_ctl->pages[0]);
461 io_ctl_map_page(io_ctl, 0);
462 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
463 PAGE_CACHE_SIZE - offset);
464 btrfs_csum_final(crc, (char *)&crc);
465 if (val != crc) {
466 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
467 "space cache\n");
468 io_ctl_unmap_page(io_ctl);
469 return -EIO;
472 return 0;
475 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
476 void *bitmap)
478 struct btrfs_free_space_entry *entry;
480 if (!io_ctl->cur)
481 return -ENOSPC;
483 entry = io_ctl->cur;
484 entry->offset = cpu_to_le64(offset);
485 entry->bytes = cpu_to_le64(bytes);
486 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
487 BTRFS_FREE_SPACE_EXTENT;
488 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
489 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
491 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
492 return 0;
494 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
496 /* No more pages to map */
497 if (io_ctl->index >= io_ctl->num_pages)
498 return 0;
500 /* map the next page */
501 io_ctl_map_page(io_ctl, 1);
502 return 0;
505 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
507 if (!io_ctl->cur)
508 return -ENOSPC;
511 * If we aren't at the start of the current page, unmap this one and
512 * map the next one if there is any left.
514 if (io_ctl->cur != io_ctl->orig) {
515 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
516 if (io_ctl->index >= io_ctl->num_pages)
517 return -ENOSPC;
518 io_ctl_map_page(io_ctl, 0);
521 memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
522 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
523 if (io_ctl->index < io_ctl->num_pages)
524 io_ctl_map_page(io_ctl, 0);
525 return 0;
528 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
531 * If we're not on the boundary we know we've modified the page and we
532 * need to crc the page.
534 if (io_ctl->cur != io_ctl->orig)
535 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
536 else
537 io_ctl_unmap_page(io_ctl);
539 while (io_ctl->index < io_ctl->num_pages) {
540 io_ctl_map_page(io_ctl, 1);
541 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
545 static int io_ctl_read_entry(struct io_ctl *io_ctl,
546 struct btrfs_free_space *entry, u8 *type)
548 struct btrfs_free_space_entry *e;
549 int ret;
551 if (!io_ctl->cur) {
552 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
553 if (ret)
554 return ret;
557 e = io_ctl->cur;
558 entry->offset = le64_to_cpu(e->offset);
559 entry->bytes = le64_to_cpu(e->bytes);
560 *type = e->type;
561 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
562 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
564 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
565 return 0;
567 io_ctl_unmap_page(io_ctl);
569 return 0;
572 static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
573 struct btrfs_free_space *entry)
575 int ret;
577 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
578 if (ret)
579 return ret;
581 memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
582 io_ctl_unmap_page(io_ctl);
584 return 0;
587 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
588 struct btrfs_free_space_ctl *ctl,
589 struct btrfs_path *path, u64 offset)
591 struct btrfs_free_space_header *header;
592 struct extent_buffer *leaf;
593 struct io_ctl io_ctl;
594 struct btrfs_key key;
595 struct btrfs_free_space *e, *n;
596 struct list_head bitmaps;
597 u64 num_entries;
598 u64 num_bitmaps;
599 u64 generation;
600 u8 type;
601 int ret = 0;
603 INIT_LIST_HEAD(&bitmaps);
605 /* Nothing in the space cache, goodbye */
606 if (!i_size_read(inode))
607 return 0;
609 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
610 key.offset = offset;
611 key.type = 0;
613 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
614 if (ret < 0)
615 return 0;
616 else if (ret > 0) {
617 btrfs_release_path(path);
618 return 0;
621 ret = -1;
623 leaf = path->nodes[0];
624 header = btrfs_item_ptr(leaf, path->slots[0],
625 struct btrfs_free_space_header);
626 num_entries = btrfs_free_space_entries(leaf, header);
627 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
628 generation = btrfs_free_space_generation(leaf, header);
629 btrfs_release_path(path);
631 if (BTRFS_I(inode)->generation != generation) {
632 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
633 " not match free space cache generation (%llu)\n",
634 (unsigned long long)BTRFS_I(inode)->generation,
635 (unsigned long long)generation);
636 return 0;
639 if (!num_entries)
640 return 0;
642 ret = io_ctl_init(&io_ctl, inode, root);
643 if (ret)
644 return ret;
646 ret = readahead_cache(inode);
647 if (ret)
648 goto out;
650 ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
651 if (ret)
652 goto out;
654 ret = io_ctl_check_crc(&io_ctl, 0);
655 if (ret)
656 goto free_cache;
658 ret = io_ctl_check_generation(&io_ctl, generation);
659 if (ret)
660 goto free_cache;
662 while (num_entries) {
663 e = kmem_cache_zalloc(btrfs_free_space_cachep,
664 GFP_NOFS);
665 if (!e)
666 goto free_cache;
668 ret = io_ctl_read_entry(&io_ctl, e, &type);
669 if (ret) {
670 kmem_cache_free(btrfs_free_space_cachep, e);
671 goto free_cache;
674 if (!e->bytes) {
675 kmem_cache_free(btrfs_free_space_cachep, e);
676 goto free_cache;
679 if (type == BTRFS_FREE_SPACE_EXTENT) {
680 spin_lock(&ctl->tree_lock);
681 ret = link_free_space(ctl, e);
682 spin_unlock(&ctl->tree_lock);
683 if (ret) {
684 printk(KERN_ERR "Duplicate entries in "
685 "free space cache, dumping\n");
686 kmem_cache_free(btrfs_free_space_cachep, e);
687 goto free_cache;
689 } else {
690 BUG_ON(!num_bitmaps);
691 num_bitmaps--;
692 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
693 if (!e->bitmap) {
694 kmem_cache_free(
695 btrfs_free_space_cachep, e);
696 goto free_cache;
698 spin_lock(&ctl->tree_lock);
699 ret = link_free_space(ctl, e);
700 ctl->total_bitmaps++;
701 ctl->op->recalc_thresholds(ctl);
702 spin_unlock(&ctl->tree_lock);
703 if (ret) {
704 printk(KERN_ERR "Duplicate entries in "
705 "free space cache, dumping\n");
706 kmem_cache_free(btrfs_free_space_cachep, e);
707 goto free_cache;
709 list_add_tail(&e->list, &bitmaps);
712 num_entries--;
715 io_ctl_unmap_page(&io_ctl);
718 * We add the bitmaps at the end of the entries in order that
719 * the bitmap entries are added to the cache.
721 list_for_each_entry_safe(e, n, &bitmaps, list) {
722 list_del_init(&e->list);
723 ret = io_ctl_read_bitmap(&io_ctl, e);
724 if (ret)
725 goto free_cache;
728 io_ctl_drop_pages(&io_ctl);
729 ret = 1;
730 out:
731 io_ctl_free(&io_ctl);
732 return ret;
733 free_cache:
734 io_ctl_drop_pages(&io_ctl);
735 __btrfs_remove_free_space_cache(ctl);
736 goto out;
739 int load_free_space_cache(struct btrfs_fs_info *fs_info,
740 struct btrfs_block_group_cache *block_group)
742 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
743 struct btrfs_root *root = fs_info->tree_root;
744 struct inode *inode;
745 struct btrfs_path *path;
746 int ret = 0;
747 bool matched;
748 u64 used = btrfs_block_group_used(&block_group->item);
751 * If we're unmounting then just return, since this does a search on the
752 * normal root and not the commit root and we could deadlock.
754 if (btrfs_fs_closing(fs_info))
755 return 0;
758 * If this block group has been marked to be cleared for one reason or
759 * another then we can't trust the on disk cache, so just return.
761 spin_lock(&block_group->lock);
762 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
763 spin_unlock(&block_group->lock);
764 return 0;
766 spin_unlock(&block_group->lock);
768 path = btrfs_alloc_path();
769 if (!path)
770 return 0;
772 inode = lookup_free_space_inode(root, block_group, path);
773 if (IS_ERR(inode)) {
774 btrfs_free_path(path);
775 return 0;
778 /* We may have converted the inode and made the cache invalid. */
779 spin_lock(&block_group->lock);
780 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
781 spin_unlock(&block_group->lock);
782 btrfs_free_path(path);
783 goto out;
785 spin_unlock(&block_group->lock);
787 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
788 path, block_group->key.objectid);
789 btrfs_free_path(path);
790 if (ret <= 0)
791 goto out;
793 spin_lock(&ctl->tree_lock);
794 matched = (ctl->free_space == (block_group->key.offset - used -
795 block_group->bytes_super));
796 spin_unlock(&ctl->tree_lock);
798 if (!matched) {
799 __btrfs_remove_free_space_cache(ctl);
800 printk(KERN_ERR "block group %llu has an wrong amount of free "
801 "space\n", block_group->key.objectid);
802 ret = -1;
804 out:
805 if (ret < 0) {
806 /* This cache is bogus, make sure it gets cleared */
807 spin_lock(&block_group->lock);
808 block_group->disk_cache_state = BTRFS_DC_CLEAR;
809 spin_unlock(&block_group->lock);
810 ret = 0;
812 printk(KERN_ERR "btrfs: failed to load free space cache "
813 "for block group %llu\n", block_group->key.objectid);
816 iput(inode);
817 return ret;
821 * __btrfs_write_out_cache - write out cached info to an inode
822 * @root - the root the inode belongs to
823 * @ctl - the free space cache we are going to write out
824 * @block_group - the block_group for this cache if it belongs to a block_group
825 * @trans - the trans handle
826 * @path - the path to use
827 * @offset - the offset for the key we'll insert
829 * This function writes out a free space cache struct to disk for quick recovery
830 * on mount. This will return 0 if it was successfull in writing the cache out,
831 * and -1 if it was not.
833 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
834 struct btrfs_free_space_ctl *ctl,
835 struct btrfs_block_group_cache *block_group,
836 struct btrfs_trans_handle *trans,
837 struct btrfs_path *path, u64 offset)
839 struct btrfs_free_space_header *header;
840 struct extent_buffer *leaf;
841 struct rb_node *node;
842 struct list_head *pos, *n;
843 struct extent_state *cached_state = NULL;
844 struct btrfs_free_cluster *cluster = NULL;
845 struct extent_io_tree *unpin = NULL;
846 struct io_ctl io_ctl;
847 struct list_head bitmap_list;
848 struct btrfs_key key;
849 u64 start, extent_start, extent_end, len;
850 int entries = 0;
851 int bitmaps = 0;
852 int ret;
853 int err = -1;
855 INIT_LIST_HEAD(&bitmap_list);
857 if (!i_size_read(inode))
858 return -1;
860 ret = io_ctl_init(&io_ctl, inode, root);
861 if (ret)
862 return -1;
864 /* Get the cluster for this block_group if it exists */
865 if (block_group && !list_empty(&block_group->cluster_list))
866 cluster = list_entry(block_group->cluster_list.next,
867 struct btrfs_free_cluster,
868 block_group_list);
870 /* Lock all pages first so we can lock the extent safely. */
871 io_ctl_prepare_pages(&io_ctl, inode, 0);
873 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
874 0, &cached_state);
876 node = rb_first(&ctl->free_space_offset);
877 if (!node && cluster) {
878 node = rb_first(&cluster->root);
879 cluster = NULL;
882 /* Make sure we can fit our crcs into the first page */
883 if (io_ctl.check_crcs &&
884 (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
885 WARN_ON(1);
886 goto out_nospc;
889 io_ctl_set_generation(&io_ctl, trans->transid);
891 /* Write out the extent entries */
892 while (node) {
893 struct btrfs_free_space *e;
895 e = rb_entry(node, struct btrfs_free_space, offset_index);
896 entries++;
898 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
899 e->bitmap);
900 if (ret)
901 goto out_nospc;
903 if (e->bitmap) {
904 list_add_tail(&e->list, &bitmap_list);
905 bitmaps++;
907 node = rb_next(node);
908 if (!node && cluster) {
909 node = rb_first(&cluster->root);
910 cluster = NULL;
915 * We want to add any pinned extents to our free space cache
916 * so we don't leak the space
920 * We shouldn't have switched the pinned extents yet so this is the
921 * right one
923 unpin = root->fs_info->pinned_extents;
925 if (block_group)
926 start = block_group->key.objectid;
928 while (block_group && (start < block_group->key.objectid +
929 block_group->key.offset)) {
930 ret = find_first_extent_bit(unpin, start,
931 &extent_start, &extent_end,
932 EXTENT_DIRTY);
933 if (ret) {
934 ret = 0;
935 break;
938 /* This pinned extent is out of our range */
939 if (extent_start >= block_group->key.objectid +
940 block_group->key.offset)
941 break;
943 extent_start = max(extent_start, start);
944 extent_end = min(block_group->key.objectid +
945 block_group->key.offset, extent_end + 1);
946 len = extent_end - extent_start;
948 entries++;
949 ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL);
950 if (ret)
951 goto out_nospc;
953 start = extent_end;
956 /* Write out the bitmaps */
957 list_for_each_safe(pos, n, &bitmap_list) {
958 struct btrfs_free_space *entry =
959 list_entry(pos, struct btrfs_free_space, list);
961 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
962 if (ret)
963 goto out_nospc;
964 list_del_init(&entry->list);
967 /* Zero out the rest of the pages just to make sure */
968 io_ctl_zero_remaining_pages(&io_ctl);
970 ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
971 0, i_size_read(inode), &cached_state);
972 io_ctl_drop_pages(&io_ctl);
973 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
974 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
976 if (ret)
977 goto out;
980 ret = filemap_write_and_wait(inode->i_mapping);
981 if (ret)
982 goto out;
984 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
985 key.offset = offset;
986 key.type = 0;
988 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
989 if (ret < 0) {
990 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
991 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
992 GFP_NOFS);
993 goto out;
995 leaf = path->nodes[0];
996 if (ret > 0) {
997 struct btrfs_key found_key;
998 BUG_ON(!path->slots[0]);
999 path->slots[0]--;
1000 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1001 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
1002 found_key.offset != offset) {
1003 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1004 inode->i_size - 1,
1005 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
1006 NULL, GFP_NOFS);
1007 btrfs_release_path(path);
1008 goto out;
1012 BTRFS_I(inode)->generation = trans->transid;
1013 header = btrfs_item_ptr(leaf, path->slots[0],
1014 struct btrfs_free_space_header);
1015 btrfs_set_free_space_entries(leaf, header, entries);
1016 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1017 btrfs_set_free_space_generation(leaf, header, trans->transid);
1018 btrfs_mark_buffer_dirty(leaf);
1019 btrfs_release_path(path);
1021 err = 0;
1022 out:
1023 io_ctl_free(&io_ctl);
1024 if (err) {
1025 invalidate_inode_pages2(inode->i_mapping);
1026 BTRFS_I(inode)->generation = 0;
1028 btrfs_update_inode(trans, root, inode);
1029 return err;
1031 out_nospc:
1032 list_for_each_safe(pos, n, &bitmap_list) {
1033 struct btrfs_free_space *entry =
1034 list_entry(pos, struct btrfs_free_space, list);
1035 list_del_init(&entry->list);
1037 io_ctl_drop_pages(&io_ctl);
1038 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1039 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1040 goto out;
1043 int btrfs_write_out_cache(struct btrfs_root *root,
1044 struct btrfs_trans_handle *trans,
1045 struct btrfs_block_group_cache *block_group,
1046 struct btrfs_path *path)
1048 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1049 struct inode *inode;
1050 int ret = 0;
1052 root = root->fs_info->tree_root;
1054 spin_lock(&block_group->lock);
1055 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1056 spin_unlock(&block_group->lock);
1057 return 0;
1059 spin_unlock(&block_group->lock);
1061 inode = lookup_free_space_inode(root, block_group, path);
1062 if (IS_ERR(inode))
1063 return 0;
1065 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1066 path, block_group->key.objectid);
1067 if (ret) {
1068 spin_lock(&block_group->lock);
1069 block_group->disk_cache_state = BTRFS_DC_ERROR;
1070 spin_unlock(&block_group->lock);
1071 ret = 0;
1072 #ifdef DEBUG
1073 printk(KERN_ERR "btrfs: failed to write free space cache "
1074 "for block group %llu\n", block_group->key.objectid);
1075 #endif
1078 iput(inode);
1079 return ret;
1082 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1083 u64 offset)
1085 BUG_ON(offset < bitmap_start);
1086 offset -= bitmap_start;
1087 return (unsigned long)(div_u64(offset, unit));
1090 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1092 return (unsigned long)(div_u64(bytes, unit));
1095 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1096 u64 offset)
1098 u64 bitmap_start;
1099 u64 bytes_per_bitmap;
1101 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1102 bitmap_start = offset - ctl->start;
1103 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1104 bitmap_start *= bytes_per_bitmap;
1105 bitmap_start += ctl->start;
1107 return bitmap_start;
1110 static int tree_insert_offset(struct rb_root *root, u64 offset,
1111 struct rb_node *node, int bitmap)
1113 struct rb_node **p = &root->rb_node;
1114 struct rb_node *parent = NULL;
1115 struct btrfs_free_space *info;
1117 while (*p) {
1118 parent = *p;
1119 info = rb_entry(parent, struct btrfs_free_space, offset_index);
1121 if (offset < info->offset) {
1122 p = &(*p)->rb_left;
1123 } else if (offset > info->offset) {
1124 p = &(*p)->rb_right;
1125 } else {
1127 * we could have a bitmap entry and an extent entry
1128 * share the same offset. If this is the case, we want
1129 * the extent entry to always be found first if we do a
1130 * linear search through the tree, since we want to have
1131 * the quickest allocation time, and allocating from an
1132 * extent is faster than allocating from a bitmap. So
1133 * if we're inserting a bitmap and we find an entry at
1134 * this offset, we want to go right, or after this entry
1135 * logically. If we are inserting an extent and we've
1136 * found a bitmap, we want to go left, or before
1137 * logically.
1139 if (bitmap) {
1140 if (info->bitmap) {
1141 WARN_ON_ONCE(1);
1142 return -EEXIST;
1144 p = &(*p)->rb_right;
1145 } else {
1146 if (!info->bitmap) {
1147 WARN_ON_ONCE(1);
1148 return -EEXIST;
1150 p = &(*p)->rb_left;
1155 rb_link_node(node, parent, p);
1156 rb_insert_color(node, root);
1158 return 0;
1162 * searches the tree for the given offset.
1164 * fuzzy - If this is set, then we are trying to make an allocation, and we just
1165 * want a section that has at least bytes size and comes at or after the given
1166 * offset.
1168 static struct btrfs_free_space *
1169 tree_search_offset(struct btrfs_free_space_ctl *ctl,
1170 u64 offset, int bitmap_only, int fuzzy)
1172 struct rb_node *n = ctl->free_space_offset.rb_node;
1173 struct btrfs_free_space *entry, *prev = NULL;
1175 /* find entry that is closest to the 'offset' */
1176 while (1) {
1177 if (!n) {
1178 entry = NULL;
1179 break;
1182 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1183 prev = entry;
1185 if (offset < entry->offset)
1186 n = n->rb_left;
1187 else if (offset > entry->offset)
1188 n = n->rb_right;
1189 else
1190 break;
1193 if (bitmap_only) {
1194 if (!entry)
1195 return NULL;
1196 if (entry->bitmap)
1197 return entry;
1200 * bitmap entry and extent entry may share same offset,
1201 * in that case, bitmap entry comes after extent entry.
1203 n = rb_next(n);
1204 if (!n)
1205 return NULL;
1206 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1207 if (entry->offset != offset)
1208 return NULL;
1210 WARN_ON(!entry->bitmap);
1211 return entry;
1212 } else if (entry) {
1213 if (entry->bitmap) {
1215 * if previous extent entry covers the offset,
1216 * we should return it instead of the bitmap entry
1218 n = &entry->offset_index;
1219 while (1) {
1220 n = rb_prev(n);
1221 if (!n)
1222 break;
1223 prev = rb_entry(n, struct btrfs_free_space,
1224 offset_index);
1225 if (!prev->bitmap) {
1226 if (prev->offset + prev->bytes > offset)
1227 entry = prev;
1228 break;
1232 return entry;
1235 if (!prev)
1236 return NULL;
1238 /* find last entry before the 'offset' */
1239 entry = prev;
1240 if (entry->offset > offset) {
1241 n = rb_prev(&entry->offset_index);
1242 if (n) {
1243 entry = rb_entry(n, struct btrfs_free_space,
1244 offset_index);
1245 BUG_ON(entry->offset > offset);
1246 } else {
1247 if (fuzzy)
1248 return entry;
1249 else
1250 return NULL;
1254 if (entry->bitmap) {
1255 n = &entry->offset_index;
1256 while (1) {
1257 n = rb_prev(n);
1258 if (!n)
1259 break;
1260 prev = rb_entry(n, struct btrfs_free_space,
1261 offset_index);
1262 if (!prev->bitmap) {
1263 if (prev->offset + prev->bytes > offset)
1264 return prev;
1265 break;
1268 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1269 return entry;
1270 } else if (entry->offset + entry->bytes > offset)
1271 return entry;
1273 if (!fuzzy)
1274 return NULL;
1276 while (1) {
1277 if (entry->bitmap) {
1278 if (entry->offset + BITS_PER_BITMAP *
1279 ctl->unit > offset)
1280 break;
1281 } else {
1282 if (entry->offset + entry->bytes > offset)
1283 break;
1286 n = rb_next(&entry->offset_index);
1287 if (!n)
1288 return NULL;
1289 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1291 return entry;
1294 static inline void
1295 __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1296 struct btrfs_free_space *info)
1298 rb_erase(&info->offset_index, &ctl->free_space_offset);
1299 ctl->free_extents--;
1302 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1303 struct btrfs_free_space *info)
1305 __unlink_free_space(ctl, info);
1306 ctl->free_space -= info->bytes;
1309 static int link_free_space(struct btrfs_free_space_ctl *ctl,
1310 struct btrfs_free_space *info)
1312 int ret = 0;
1314 BUG_ON(!info->bitmap && !info->bytes);
1315 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1316 &info->offset_index, (info->bitmap != NULL));
1317 if (ret)
1318 return ret;
1320 ctl->free_space += info->bytes;
1321 ctl->free_extents++;
1322 return ret;
1325 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1327 struct btrfs_block_group_cache *block_group = ctl->private;
1328 u64 max_bytes;
1329 u64 bitmap_bytes;
1330 u64 extent_bytes;
1331 u64 size = block_group->key.offset;
1332 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1333 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1335 BUG_ON(ctl->total_bitmaps > max_bitmaps);
1338 * The goal is to keep the total amount of memory used per 1gb of space
1339 * at or below 32k, so we need to adjust how much memory we allow to be
1340 * used by extent based free space tracking
1342 if (size < 1024 * 1024 * 1024)
1343 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1344 else
1345 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1346 div64_u64(size, 1024 * 1024 * 1024);
1349 * we want to account for 1 more bitmap than what we have so we can make
1350 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1351 * we add more bitmaps.
1353 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1355 if (bitmap_bytes >= max_bytes) {
1356 ctl->extents_thresh = 0;
1357 return;
1361 * we want the extent entry threshold to always be at most 1/2 the maxw
1362 * bytes we can have, or whatever is less than that.
1364 extent_bytes = max_bytes - bitmap_bytes;
1365 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1367 ctl->extents_thresh =
1368 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1371 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1372 struct btrfs_free_space *info,
1373 u64 offset, u64 bytes)
1375 unsigned long start, count;
1377 start = offset_to_bit(info->offset, ctl->unit, offset);
1378 count = bytes_to_bits(bytes, ctl->unit);
1379 BUG_ON(start + count > BITS_PER_BITMAP);
1381 bitmap_clear(info->bitmap, start, count);
1383 info->bytes -= bytes;
1386 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1387 struct btrfs_free_space *info, u64 offset,
1388 u64 bytes)
1390 __bitmap_clear_bits(ctl, info, offset, bytes);
1391 ctl->free_space -= bytes;
1394 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1395 struct btrfs_free_space *info, u64 offset,
1396 u64 bytes)
1398 unsigned long start, count;
1400 start = offset_to_bit(info->offset, ctl->unit, offset);
1401 count = bytes_to_bits(bytes, ctl->unit);
1402 BUG_ON(start + count > BITS_PER_BITMAP);
1404 bitmap_set(info->bitmap, start, count);
1406 info->bytes += bytes;
1407 ctl->free_space += bytes;
1410 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1411 struct btrfs_free_space *bitmap_info, u64 *offset,
1412 u64 *bytes)
1414 unsigned long found_bits = 0;
1415 unsigned long bits, i;
1416 unsigned long next_zero;
1418 i = offset_to_bit(bitmap_info->offset, ctl->unit,
1419 max_t(u64, *offset, bitmap_info->offset));
1420 bits = bytes_to_bits(*bytes, ctl->unit);
1422 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1423 i < BITS_PER_BITMAP;
1424 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1425 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1426 BITS_PER_BITMAP, i);
1427 if ((next_zero - i) >= bits) {
1428 found_bits = next_zero - i;
1429 break;
1431 i = next_zero;
1434 if (found_bits) {
1435 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1436 *bytes = (u64)(found_bits) * ctl->unit;
1437 return 0;
1440 return -1;
1443 static struct btrfs_free_space *
1444 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1446 struct btrfs_free_space *entry;
1447 struct rb_node *node;
1448 int ret;
1450 if (!ctl->free_space_offset.rb_node)
1451 return NULL;
1453 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1454 if (!entry)
1455 return NULL;
1457 for (node = &entry->offset_index; node; node = rb_next(node)) {
1458 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1459 if (entry->bytes < *bytes)
1460 continue;
1462 if (entry->bitmap) {
1463 ret = search_bitmap(ctl, entry, offset, bytes);
1464 if (!ret)
1465 return entry;
1466 continue;
1469 *offset = entry->offset;
1470 *bytes = entry->bytes;
1471 return entry;
1474 return NULL;
1477 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1478 struct btrfs_free_space *info, u64 offset)
1480 info->offset = offset_to_bitmap(ctl, offset);
1481 info->bytes = 0;
1482 INIT_LIST_HEAD(&info->list);
1483 link_free_space(ctl, info);
1484 ctl->total_bitmaps++;
1486 ctl->op->recalc_thresholds(ctl);
1489 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1490 struct btrfs_free_space *bitmap_info)
1492 unlink_free_space(ctl, bitmap_info);
1493 kfree(bitmap_info->bitmap);
1494 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1495 ctl->total_bitmaps--;
1496 ctl->op->recalc_thresholds(ctl);
1499 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1500 struct btrfs_free_space *bitmap_info,
1501 u64 *offset, u64 *bytes)
1503 u64 end;
1504 u64 search_start, search_bytes;
1505 int ret;
1507 again:
1508 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1511 * XXX - this can go away after a few releases.
1513 * since the only user of btrfs_remove_free_space is the tree logging
1514 * stuff, and the only way to test that is under crash conditions, we
1515 * want to have this debug stuff here just in case somethings not
1516 * working. Search the bitmap for the space we are trying to use to
1517 * make sure its actually there. If its not there then we need to stop
1518 * because something has gone wrong.
1520 search_start = *offset;
1521 search_bytes = *bytes;
1522 search_bytes = min(search_bytes, end - search_start + 1);
1523 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1524 BUG_ON(ret < 0 || search_start != *offset);
1526 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1527 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1528 *bytes -= end - *offset + 1;
1529 *offset = end + 1;
1530 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1531 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1532 *bytes = 0;
1535 if (*bytes) {
1536 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1537 if (!bitmap_info->bytes)
1538 free_bitmap(ctl, bitmap_info);
1541 * no entry after this bitmap, but we still have bytes to
1542 * remove, so something has gone wrong.
1544 if (!next)
1545 return -EINVAL;
1547 bitmap_info = rb_entry(next, struct btrfs_free_space,
1548 offset_index);
1551 * if the next entry isn't a bitmap we need to return to let the
1552 * extent stuff do its work.
1554 if (!bitmap_info->bitmap)
1555 return -EAGAIN;
1558 * Ok the next item is a bitmap, but it may not actually hold
1559 * the information for the rest of this free space stuff, so
1560 * look for it, and if we don't find it return so we can try
1561 * everything over again.
1563 search_start = *offset;
1564 search_bytes = *bytes;
1565 ret = search_bitmap(ctl, bitmap_info, &search_start,
1566 &search_bytes);
1567 if (ret < 0 || search_start != *offset)
1568 return -EAGAIN;
1570 goto again;
1571 } else if (!bitmap_info->bytes)
1572 free_bitmap(ctl, bitmap_info);
1574 return 0;
1577 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1578 struct btrfs_free_space *info, u64 offset,
1579 u64 bytes)
1581 u64 bytes_to_set = 0;
1582 u64 end;
1584 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1586 bytes_to_set = min(end - offset, bytes);
1588 bitmap_set_bits(ctl, info, offset, bytes_to_set);
1590 return bytes_to_set;
1594 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1595 struct btrfs_free_space *info)
1597 struct btrfs_block_group_cache *block_group = ctl->private;
1600 * If we are below the extents threshold then we can add this as an
1601 * extent, and don't have to deal with the bitmap
1603 if (ctl->free_extents < ctl->extents_thresh) {
1605 * If this block group has some small extents we don't want to
1606 * use up all of our free slots in the cache with them, we want
1607 * to reserve them to larger extents, however if we have plent
1608 * of cache left then go ahead an dadd them, no sense in adding
1609 * the overhead of a bitmap if we don't have to.
1611 if (info->bytes <= block_group->sectorsize * 4) {
1612 if (ctl->free_extents * 2 <= ctl->extents_thresh)
1613 return false;
1614 } else {
1615 return false;
1620 * some block groups are so tiny they can't be enveloped by a bitmap, so
1621 * don't even bother to create a bitmap for this
1623 if (BITS_PER_BITMAP * block_group->sectorsize >
1624 block_group->key.offset)
1625 return false;
1627 return true;
1630 static struct btrfs_free_space_op free_space_op = {
1631 .recalc_thresholds = recalculate_thresholds,
1632 .use_bitmap = use_bitmap,
1635 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1636 struct btrfs_free_space *info)
1638 struct btrfs_free_space *bitmap_info;
1639 struct btrfs_block_group_cache *block_group = NULL;
1640 int added = 0;
1641 u64 bytes, offset, bytes_added;
1642 int ret;
1644 bytes = info->bytes;
1645 offset = info->offset;
1647 if (!ctl->op->use_bitmap(ctl, info))
1648 return 0;
1650 if (ctl->op == &free_space_op)
1651 block_group = ctl->private;
1652 again:
1654 * Since we link bitmaps right into the cluster we need to see if we
1655 * have a cluster here, and if so and it has our bitmap we need to add
1656 * the free space to that bitmap.
1658 if (block_group && !list_empty(&block_group->cluster_list)) {
1659 struct btrfs_free_cluster *cluster;
1660 struct rb_node *node;
1661 struct btrfs_free_space *entry;
1663 cluster = list_entry(block_group->cluster_list.next,
1664 struct btrfs_free_cluster,
1665 block_group_list);
1666 spin_lock(&cluster->lock);
1667 node = rb_first(&cluster->root);
1668 if (!node) {
1669 spin_unlock(&cluster->lock);
1670 goto no_cluster_bitmap;
1673 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1674 if (!entry->bitmap) {
1675 spin_unlock(&cluster->lock);
1676 goto no_cluster_bitmap;
1679 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1680 bytes_added = add_bytes_to_bitmap(ctl, entry,
1681 offset, bytes);
1682 bytes -= bytes_added;
1683 offset += bytes_added;
1685 spin_unlock(&cluster->lock);
1686 if (!bytes) {
1687 ret = 1;
1688 goto out;
1692 no_cluster_bitmap:
1693 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1694 1, 0);
1695 if (!bitmap_info) {
1696 BUG_ON(added);
1697 goto new_bitmap;
1700 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1701 bytes -= bytes_added;
1702 offset += bytes_added;
1703 added = 0;
1705 if (!bytes) {
1706 ret = 1;
1707 goto out;
1708 } else
1709 goto again;
1711 new_bitmap:
1712 if (info && info->bitmap) {
1713 add_new_bitmap(ctl, info, offset);
1714 added = 1;
1715 info = NULL;
1716 goto again;
1717 } else {
1718 spin_unlock(&ctl->tree_lock);
1720 /* no pre-allocated info, allocate a new one */
1721 if (!info) {
1722 info = kmem_cache_zalloc(btrfs_free_space_cachep,
1723 GFP_NOFS);
1724 if (!info) {
1725 spin_lock(&ctl->tree_lock);
1726 ret = -ENOMEM;
1727 goto out;
1731 /* allocate the bitmap */
1732 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1733 spin_lock(&ctl->tree_lock);
1734 if (!info->bitmap) {
1735 ret = -ENOMEM;
1736 goto out;
1738 goto again;
1741 out:
1742 if (info) {
1743 if (info->bitmap)
1744 kfree(info->bitmap);
1745 kmem_cache_free(btrfs_free_space_cachep, info);
1748 return ret;
1751 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1752 struct btrfs_free_space *info, bool update_stat)
1754 struct btrfs_free_space *left_info;
1755 struct btrfs_free_space *right_info;
1756 bool merged = false;
1757 u64 offset = info->offset;
1758 u64 bytes = info->bytes;
1761 * first we want to see if there is free space adjacent to the range we
1762 * are adding, if there is remove that struct and add a new one to
1763 * cover the entire range
1765 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1766 if (right_info && rb_prev(&right_info->offset_index))
1767 left_info = rb_entry(rb_prev(&right_info->offset_index),
1768 struct btrfs_free_space, offset_index);
1769 else
1770 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1772 if (right_info && !right_info->bitmap) {
1773 if (update_stat)
1774 unlink_free_space(ctl, right_info);
1775 else
1776 __unlink_free_space(ctl, right_info);
1777 info->bytes += right_info->bytes;
1778 kmem_cache_free(btrfs_free_space_cachep, right_info);
1779 merged = true;
1782 if (left_info && !left_info->bitmap &&
1783 left_info->offset + left_info->bytes == offset) {
1784 if (update_stat)
1785 unlink_free_space(ctl, left_info);
1786 else
1787 __unlink_free_space(ctl, left_info);
1788 info->offset = left_info->offset;
1789 info->bytes += left_info->bytes;
1790 kmem_cache_free(btrfs_free_space_cachep, left_info);
1791 merged = true;
1794 return merged;
1797 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1798 u64 offset, u64 bytes)
1800 struct btrfs_free_space *info;
1801 int ret = 0;
1803 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1804 if (!info)
1805 return -ENOMEM;
1807 info->offset = offset;
1808 info->bytes = bytes;
1810 spin_lock(&ctl->tree_lock);
1812 if (try_merge_free_space(ctl, info, true))
1813 goto link;
1816 * There was no extent directly to the left or right of this new
1817 * extent then we know we're going to have to allocate a new extent, so
1818 * before we do that see if we need to drop this into a bitmap
1820 ret = insert_into_bitmap(ctl, info);
1821 if (ret < 0) {
1822 goto out;
1823 } else if (ret) {
1824 ret = 0;
1825 goto out;
1827 link:
1828 ret = link_free_space(ctl, info);
1829 if (ret)
1830 kmem_cache_free(btrfs_free_space_cachep, info);
1831 out:
1832 spin_unlock(&ctl->tree_lock);
1834 if (ret) {
1835 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1836 BUG_ON(ret == -EEXIST);
1839 return ret;
1842 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1843 u64 offset, u64 bytes)
1845 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1846 struct btrfs_free_space *info;
1847 struct btrfs_free_space *next_info = NULL;
1848 int ret = 0;
1850 spin_lock(&ctl->tree_lock);
1852 again:
1853 info = tree_search_offset(ctl, offset, 0, 0);
1854 if (!info) {
1856 * oops didn't find an extent that matched the space we wanted
1857 * to remove, look for a bitmap instead
1859 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1860 1, 0);
1861 if (!info) {
1862 /* the tree logging code might be calling us before we
1863 * have fully loaded the free space rbtree for this
1864 * block group. So it is possible the entry won't
1865 * be in the rbtree yet at all. The caching code
1866 * will make sure not to put it in the rbtree if
1867 * the logging code has pinned it.
1869 goto out_lock;
1873 if (info->bytes < bytes && rb_next(&info->offset_index)) {
1874 u64 end;
1875 next_info = rb_entry(rb_next(&info->offset_index),
1876 struct btrfs_free_space,
1877 offset_index);
1879 if (next_info->bitmap)
1880 end = next_info->offset +
1881 BITS_PER_BITMAP * ctl->unit - 1;
1882 else
1883 end = next_info->offset + next_info->bytes;
1885 if (next_info->bytes < bytes ||
1886 next_info->offset > offset || offset > end) {
1887 printk(KERN_CRIT "Found free space at %llu, size %llu,"
1888 " trying to use %llu\n",
1889 (unsigned long long)info->offset,
1890 (unsigned long long)info->bytes,
1891 (unsigned long long)bytes);
1892 WARN_ON(1);
1893 ret = -EINVAL;
1894 goto out_lock;
1897 info = next_info;
1900 if (info->bytes == bytes) {
1901 unlink_free_space(ctl, info);
1902 if (info->bitmap) {
1903 kfree(info->bitmap);
1904 ctl->total_bitmaps--;
1906 kmem_cache_free(btrfs_free_space_cachep, info);
1907 ret = 0;
1908 goto out_lock;
1911 if (!info->bitmap && info->offset == offset) {
1912 unlink_free_space(ctl, info);
1913 info->offset += bytes;
1914 info->bytes -= bytes;
1915 ret = link_free_space(ctl, info);
1916 WARN_ON(ret);
1917 goto out_lock;
1920 if (!info->bitmap && info->offset <= offset &&
1921 info->offset + info->bytes >= offset + bytes) {
1922 u64 old_start = info->offset;
1924 * we're freeing space in the middle of the info,
1925 * this can happen during tree log replay
1927 * first unlink the old info and then
1928 * insert it again after the hole we're creating
1930 unlink_free_space(ctl, info);
1931 if (offset + bytes < info->offset + info->bytes) {
1932 u64 old_end = info->offset + info->bytes;
1934 info->offset = offset + bytes;
1935 info->bytes = old_end - info->offset;
1936 ret = link_free_space(ctl, info);
1937 WARN_ON(ret);
1938 if (ret)
1939 goto out_lock;
1940 } else {
1941 /* the hole we're creating ends at the end
1942 * of the info struct, just free the info
1944 kmem_cache_free(btrfs_free_space_cachep, info);
1946 spin_unlock(&ctl->tree_lock);
1948 /* step two, insert a new info struct to cover
1949 * anything before the hole
1951 ret = btrfs_add_free_space(block_group, old_start,
1952 offset - old_start);
1953 WARN_ON(ret); /* -ENOMEM */
1954 goto out;
1957 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1958 if (ret == -EAGAIN)
1959 goto again;
1960 BUG_ON(ret); /* logic error */
1961 out_lock:
1962 spin_unlock(&ctl->tree_lock);
1963 out:
1964 return ret;
1967 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1968 u64 bytes)
1970 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1971 struct btrfs_free_space *info;
1972 struct rb_node *n;
1973 int count = 0;
1975 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1976 info = rb_entry(n, struct btrfs_free_space, offset_index);
1977 if (info->bytes >= bytes)
1978 count++;
1979 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1980 (unsigned long long)info->offset,
1981 (unsigned long long)info->bytes,
1982 (info->bitmap) ? "yes" : "no");
1984 printk(KERN_INFO "block group has cluster?: %s\n",
1985 list_empty(&block_group->cluster_list) ? "no" : "yes");
1986 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1987 "\n", count);
1990 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1992 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1994 spin_lock_init(&ctl->tree_lock);
1995 ctl->unit = block_group->sectorsize;
1996 ctl->start = block_group->key.objectid;
1997 ctl->private = block_group;
1998 ctl->op = &free_space_op;
2001 * we only want to have 32k of ram per block group for keeping
2002 * track of free space, and if we pass 1/2 of that we want to
2003 * start converting things over to using bitmaps
2005 ctl->extents_thresh = ((1024 * 32) / 2) /
2006 sizeof(struct btrfs_free_space);
2010 * for a given cluster, put all of its extents back into the free
2011 * space cache. If the block group passed doesn't match the block group
2012 * pointed to by the cluster, someone else raced in and freed the
2013 * cluster already. In that case, we just return without changing anything
2015 static int
2016 __btrfs_return_cluster_to_free_space(
2017 struct btrfs_block_group_cache *block_group,
2018 struct btrfs_free_cluster *cluster)
2020 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2021 struct btrfs_free_space *entry;
2022 struct rb_node *node;
2024 spin_lock(&cluster->lock);
2025 if (cluster->block_group != block_group)
2026 goto out;
2028 cluster->block_group = NULL;
2029 cluster->window_start = 0;
2030 list_del_init(&cluster->block_group_list);
2032 node = rb_first(&cluster->root);
2033 while (node) {
2034 bool bitmap;
2036 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2037 node = rb_next(&entry->offset_index);
2038 rb_erase(&entry->offset_index, &cluster->root);
2040 bitmap = (entry->bitmap != NULL);
2041 if (!bitmap)
2042 try_merge_free_space(ctl, entry, false);
2043 tree_insert_offset(&ctl->free_space_offset,
2044 entry->offset, &entry->offset_index, bitmap);
2046 cluster->root = RB_ROOT;
2048 out:
2049 spin_unlock(&cluster->lock);
2050 btrfs_put_block_group(block_group);
2051 return 0;
2054 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
2056 struct btrfs_free_space *info;
2057 struct rb_node *node;
2059 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2060 info = rb_entry(node, struct btrfs_free_space, offset_index);
2061 if (!info->bitmap) {
2062 unlink_free_space(ctl, info);
2063 kmem_cache_free(btrfs_free_space_cachep, info);
2064 } else {
2065 free_bitmap(ctl, info);
2067 if (need_resched()) {
2068 spin_unlock(&ctl->tree_lock);
2069 cond_resched();
2070 spin_lock(&ctl->tree_lock);
2075 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2077 spin_lock(&ctl->tree_lock);
2078 __btrfs_remove_free_space_cache_locked(ctl);
2079 spin_unlock(&ctl->tree_lock);
2082 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2084 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2085 struct btrfs_free_cluster *cluster;
2086 struct list_head *head;
2088 spin_lock(&ctl->tree_lock);
2089 while ((head = block_group->cluster_list.next) !=
2090 &block_group->cluster_list) {
2091 cluster = list_entry(head, struct btrfs_free_cluster,
2092 block_group_list);
2094 WARN_ON(cluster->block_group != block_group);
2095 __btrfs_return_cluster_to_free_space(block_group, cluster);
2096 if (need_resched()) {
2097 spin_unlock(&ctl->tree_lock);
2098 cond_resched();
2099 spin_lock(&ctl->tree_lock);
2102 __btrfs_remove_free_space_cache_locked(ctl);
2103 spin_unlock(&ctl->tree_lock);
2107 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2108 u64 offset, u64 bytes, u64 empty_size)
2110 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2111 struct btrfs_free_space *entry = NULL;
2112 u64 bytes_search = bytes + empty_size;
2113 u64 ret = 0;
2115 spin_lock(&ctl->tree_lock);
2116 entry = find_free_space(ctl, &offset, &bytes_search);
2117 if (!entry)
2118 goto out;
2120 ret = offset;
2121 if (entry->bitmap) {
2122 bitmap_clear_bits(ctl, entry, offset, bytes);
2123 if (!entry->bytes)
2124 free_bitmap(ctl, entry);
2125 } else {
2126 unlink_free_space(ctl, entry);
2127 entry->offset += bytes;
2128 entry->bytes -= bytes;
2129 if (!entry->bytes)
2130 kmem_cache_free(btrfs_free_space_cachep, entry);
2131 else
2132 link_free_space(ctl, entry);
2135 out:
2136 spin_unlock(&ctl->tree_lock);
2138 return ret;
2142 * given a cluster, put all of its extents back into the free space
2143 * cache. If a block group is passed, this function will only free
2144 * a cluster that belongs to the passed block group.
2146 * Otherwise, it'll get a reference on the block group pointed to by the
2147 * cluster and remove the cluster from it.
2149 int btrfs_return_cluster_to_free_space(
2150 struct btrfs_block_group_cache *block_group,
2151 struct btrfs_free_cluster *cluster)
2153 struct btrfs_free_space_ctl *ctl;
2154 int ret;
2156 /* first, get a safe pointer to the block group */
2157 spin_lock(&cluster->lock);
2158 if (!block_group) {
2159 block_group = cluster->block_group;
2160 if (!block_group) {
2161 spin_unlock(&cluster->lock);
2162 return 0;
2164 } else if (cluster->block_group != block_group) {
2165 /* someone else has already freed it don't redo their work */
2166 spin_unlock(&cluster->lock);
2167 return 0;
2169 atomic_inc(&block_group->count);
2170 spin_unlock(&cluster->lock);
2172 ctl = block_group->free_space_ctl;
2174 /* now return any extents the cluster had on it */
2175 spin_lock(&ctl->tree_lock);
2176 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
2177 spin_unlock(&ctl->tree_lock);
2179 /* finally drop our ref */
2180 btrfs_put_block_group(block_group);
2181 return ret;
2184 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2185 struct btrfs_free_cluster *cluster,
2186 struct btrfs_free_space *entry,
2187 u64 bytes, u64 min_start)
2189 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2190 int err;
2191 u64 search_start = cluster->window_start;
2192 u64 search_bytes = bytes;
2193 u64 ret = 0;
2195 search_start = min_start;
2196 search_bytes = bytes;
2198 err = search_bitmap(ctl, entry, &search_start, &search_bytes);
2199 if (err)
2200 return 0;
2202 ret = search_start;
2203 __bitmap_clear_bits(ctl, entry, ret, bytes);
2205 return ret;
2209 * given a cluster, try to allocate 'bytes' from it, returns 0
2210 * if it couldn't find anything suitably large, or a logical disk offset
2211 * if things worked out
2213 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2214 struct btrfs_free_cluster *cluster, u64 bytes,
2215 u64 min_start)
2217 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2218 struct btrfs_free_space *entry = NULL;
2219 struct rb_node *node;
2220 u64 ret = 0;
2222 spin_lock(&cluster->lock);
2223 if (bytes > cluster->max_size)
2224 goto out;
2226 if (cluster->block_group != block_group)
2227 goto out;
2229 node = rb_first(&cluster->root);
2230 if (!node)
2231 goto out;
2233 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2234 while(1) {
2235 if (entry->bytes < bytes ||
2236 (!entry->bitmap && entry->offset < min_start)) {
2237 node = rb_next(&entry->offset_index);
2238 if (!node)
2239 break;
2240 entry = rb_entry(node, struct btrfs_free_space,
2241 offset_index);
2242 continue;
2245 if (entry->bitmap) {
2246 ret = btrfs_alloc_from_bitmap(block_group,
2247 cluster, entry, bytes,
2248 cluster->window_start);
2249 if (ret == 0) {
2250 node = rb_next(&entry->offset_index);
2251 if (!node)
2252 break;
2253 entry = rb_entry(node, struct btrfs_free_space,
2254 offset_index);
2255 continue;
2257 cluster->window_start += bytes;
2258 } else {
2259 ret = entry->offset;
2261 entry->offset += bytes;
2262 entry->bytes -= bytes;
2265 if (entry->bytes == 0)
2266 rb_erase(&entry->offset_index, &cluster->root);
2267 break;
2269 out:
2270 spin_unlock(&cluster->lock);
2272 if (!ret)
2273 return 0;
2275 spin_lock(&ctl->tree_lock);
2277 ctl->free_space -= bytes;
2278 if (entry->bytes == 0) {
2279 ctl->free_extents--;
2280 if (entry->bitmap) {
2281 kfree(entry->bitmap);
2282 ctl->total_bitmaps--;
2283 ctl->op->recalc_thresholds(ctl);
2285 kmem_cache_free(btrfs_free_space_cachep, entry);
2288 spin_unlock(&ctl->tree_lock);
2290 return ret;
2293 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2294 struct btrfs_free_space *entry,
2295 struct btrfs_free_cluster *cluster,
2296 u64 offset, u64 bytes,
2297 u64 cont1_bytes, u64 min_bytes)
2299 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2300 unsigned long next_zero;
2301 unsigned long i;
2302 unsigned long want_bits;
2303 unsigned long min_bits;
2304 unsigned long found_bits;
2305 unsigned long start = 0;
2306 unsigned long total_found = 0;
2307 int ret;
2309 i = offset_to_bit(entry->offset, block_group->sectorsize,
2310 max_t(u64, offset, entry->offset));
2311 want_bits = bytes_to_bits(bytes, block_group->sectorsize);
2312 min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
2314 again:
2315 found_bits = 0;
2316 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2317 i < BITS_PER_BITMAP;
2318 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2319 next_zero = find_next_zero_bit(entry->bitmap,
2320 BITS_PER_BITMAP, i);
2321 if (next_zero - i >= min_bits) {
2322 found_bits = next_zero - i;
2323 break;
2325 i = next_zero;
2328 if (!found_bits)
2329 return -ENOSPC;
2331 if (!total_found) {
2332 start = i;
2333 cluster->max_size = 0;
2336 total_found += found_bits;
2338 if (cluster->max_size < found_bits * block_group->sectorsize)
2339 cluster->max_size = found_bits * block_group->sectorsize;
2341 if (total_found < want_bits || cluster->max_size < cont1_bytes) {
2342 i = next_zero + 1;
2343 goto again;
2346 cluster->window_start = start * block_group->sectorsize +
2347 entry->offset;
2348 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2349 ret = tree_insert_offset(&cluster->root, entry->offset,
2350 &entry->offset_index, 1);
2351 BUG_ON(ret); /* -EEXIST; Logic error */
2353 trace_btrfs_setup_cluster(block_group, cluster,
2354 total_found * block_group->sectorsize, 1);
2355 return 0;
2359 * This searches the block group for just extents to fill the cluster with.
2360 * Try to find a cluster with at least bytes total bytes, at least one
2361 * extent of cont1_bytes, and other clusters of at least min_bytes.
2363 static noinline int
2364 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2365 struct btrfs_free_cluster *cluster,
2366 struct list_head *bitmaps, u64 offset, u64 bytes,
2367 u64 cont1_bytes, u64 min_bytes)
2369 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2370 struct btrfs_free_space *first = NULL;
2371 struct btrfs_free_space *entry = NULL;
2372 struct btrfs_free_space *last;
2373 struct rb_node *node;
2374 u64 window_start;
2375 u64 window_free;
2376 u64 max_extent;
2377 u64 total_size = 0;
2379 entry = tree_search_offset(ctl, offset, 0, 1);
2380 if (!entry)
2381 return -ENOSPC;
2384 * We don't want bitmaps, so just move along until we find a normal
2385 * extent entry.
2387 while (entry->bitmap || entry->bytes < min_bytes) {
2388 if (entry->bitmap && list_empty(&entry->list))
2389 list_add_tail(&entry->list, bitmaps);
2390 node = rb_next(&entry->offset_index);
2391 if (!node)
2392 return -ENOSPC;
2393 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2396 window_start = entry->offset;
2397 window_free = entry->bytes;
2398 max_extent = entry->bytes;
2399 first = entry;
2400 last = entry;
2402 for (node = rb_next(&entry->offset_index); node;
2403 node = rb_next(&entry->offset_index)) {
2404 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2406 if (entry->bitmap) {
2407 if (list_empty(&entry->list))
2408 list_add_tail(&entry->list, bitmaps);
2409 continue;
2412 if (entry->bytes < min_bytes)
2413 continue;
2415 last = entry;
2416 window_free += entry->bytes;
2417 if (entry->bytes > max_extent)
2418 max_extent = entry->bytes;
2421 if (window_free < bytes || max_extent < cont1_bytes)
2422 return -ENOSPC;
2424 cluster->window_start = first->offset;
2426 node = &first->offset_index;
2429 * now we've found our entries, pull them out of the free space
2430 * cache and put them into the cluster rbtree
2432 do {
2433 int ret;
2435 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2436 node = rb_next(&entry->offset_index);
2437 if (entry->bitmap || entry->bytes < min_bytes)
2438 continue;
2440 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2441 ret = tree_insert_offset(&cluster->root, entry->offset,
2442 &entry->offset_index, 0);
2443 total_size += entry->bytes;
2444 BUG_ON(ret); /* -EEXIST; Logic error */
2445 } while (node && entry != last);
2447 cluster->max_size = max_extent;
2448 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
2449 return 0;
2453 * This specifically looks for bitmaps that may work in the cluster, we assume
2454 * that we have already failed to find extents that will work.
2456 static noinline int
2457 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2458 struct btrfs_free_cluster *cluster,
2459 struct list_head *bitmaps, u64 offset, u64 bytes,
2460 u64 cont1_bytes, u64 min_bytes)
2462 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2463 struct btrfs_free_space *entry;
2464 int ret = -ENOSPC;
2465 u64 bitmap_offset = offset_to_bitmap(ctl, offset);
2467 if (ctl->total_bitmaps == 0)
2468 return -ENOSPC;
2471 * The bitmap that covers offset won't be in the list unless offset
2472 * is just its start offset.
2474 entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
2475 if (entry->offset != bitmap_offset) {
2476 entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
2477 if (entry && list_empty(&entry->list))
2478 list_add(&entry->list, bitmaps);
2481 list_for_each_entry(entry, bitmaps, list) {
2482 if (entry->bytes < bytes)
2483 continue;
2484 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2485 bytes, cont1_bytes, min_bytes);
2486 if (!ret)
2487 return 0;
2491 * The bitmaps list has all the bitmaps that record free space
2492 * starting after offset, so no more search is required.
2494 return -ENOSPC;
2498 * here we try to find a cluster of blocks in a block group. The goal
2499 * is to find at least bytes+empty_size.
2500 * We might not find them all in one contiguous area.
2502 * returns zero and sets up cluster if things worked out, otherwise
2503 * it returns -enospc
2505 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2506 struct btrfs_root *root,
2507 struct btrfs_block_group_cache *block_group,
2508 struct btrfs_free_cluster *cluster,
2509 u64 offset, u64 bytes, u64 empty_size)
2511 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2512 struct btrfs_free_space *entry, *tmp;
2513 LIST_HEAD(bitmaps);
2514 u64 min_bytes;
2515 u64 cont1_bytes;
2516 int ret;
2519 * Choose the minimum extent size we'll require for this
2520 * cluster. For SSD_SPREAD, don't allow any fragmentation.
2521 * For metadata, allow allocates with smaller extents. For
2522 * data, keep it dense.
2524 if (btrfs_test_opt(root, SSD_SPREAD)) {
2525 cont1_bytes = min_bytes = bytes + empty_size;
2526 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2527 cont1_bytes = bytes;
2528 min_bytes = block_group->sectorsize;
2529 } else {
2530 cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
2531 min_bytes = block_group->sectorsize;
2534 spin_lock(&ctl->tree_lock);
2537 * If we know we don't have enough space to make a cluster don't even
2538 * bother doing all the work to try and find one.
2540 if (ctl->free_space < bytes) {
2541 spin_unlock(&ctl->tree_lock);
2542 return -ENOSPC;
2545 spin_lock(&cluster->lock);
2547 /* someone already found a cluster, hooray */
2548 if (cluster->block_group) {
2549 ret = 0;
2550 goto out;
2553 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
2554 min_bytes);
2556 INIT_LIST_HEAD(&bitmaps);
2557 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2558 bytes + empty_size,
2559 cont1_bytes, min_bytes);
2560 if (ret)
2561 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2562 offset, bytes + empty_size,
2563 cont1_bytes, min_bytes);
2565 /* Clear our temporary list */
2566 list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2567 list_del_init(&entry->list);
2569 if (!ret) {
2570 atomic_inc(&block_group->count);
2571 list_add_tail(&cluster->block_group_list,
2572 &block_group->cluster_list);
2573 cluster->block_group = block_group;
2574 } else {
2575 trace_btrfs_failed_cluster_setup(block_group);
2577 out:
2578 spin_unlock(&cluster->lock);
2579 spin_unlock(&ctl->tree_lock);
2581 return ret;
2585 * simple code to zero out a cluster
2587 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2589 spin_lock_init(&cluster->lock);
2590 spin_lock_init(&cluster->refill_lock);
2591 cluster->root = RB_ROOT;
2592 cluster->max_size = 0;
2593 INIT_LIST_HEAD(&cluster->block_group_list);
2594 cluster->block_group = NULL;
2597 static int do_trimming(struct btrfs_block_group_cache *block_group,
2598 u64 *total_trimmed, u64 start, u64 bytes,
2599 u64 reserved_start, u64 reserved_bytes)
2601 struct btrfs_space_info *space_info = block_group->space_info;
2602 struct btrfs_fs_info *fs_info = block_group->fs_info;
2603 int ret;
2604 int update = 0;
2605 u64 trimmed = 0;
2607 spin_lock(&space_info->lock);
2608 spin_lock(&block_group->lock);
2609 if (!block_group->ro) {
2610 block_group->reserved += reserved_bytes;
2611 space_info->bytes_reserved += reserved_bytes;
2612 update = 1;
2614 spin_unlock(&block_group->lock);
2615 spin_unlock(&space_info->lock);
2617 ret = btrfs_error_discard_extent(fs_info->extent_root,
2618 start, bytes, &trimmed);
2619 if (!ret)
2620 *total_trimmed += trimmed;
2622 btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
2624 if (update) {
2625 spin_lock(&space_info->lock);
2626 spin_lock(&block_group->lock);
2627 if (block_group->ro)
2628 space_info->bytes_readonly += reserved_bytes;
2629 block_group->reserved -= reserved_bytes;
2630 space_info->bytes_reserved -= reserved_bytes;
2631 spin_unlock(&space_info->lock);
2632 spin_unlock(&block_group->lock);
2635 return ret;
2638 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
2639 u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2641 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2642 struct btrfs_free_space *entry;
2643 struct rb_node *node;
2644 int ret = 0;
2645 u64 extent_start;
2646 u64 extent_bytes;
2647 u64 bytes;
2649 while (start < end) {
2650 spin_lock(&ctl->tree_lock);
2652 if (ctl->free_space < minlen) {
2653 spin_unlock(&ctl->tree_lock);
2654 break;
2657 entry = tree_search_offset(ctl, start, 0, 1);
2658 if (!entry) {
2659 spin_unlock(&ctl->tree_lock);
2660 break;
2663 /* skip bitmaps */
2664 while (entry->bitmap) {
2665 node = rb_next(&entry->offset_index);
2666 if (!node) {
2667 spin_unlock(&ctl->tree_lock);
2668 goto out;
2670 entry = rb_entry(node, struct btrfs_free_space,
2671 offset_index);
2674 if (entry->offset >= end) {
2675 spin_unlock(&ctl->tree_lock);
2676 break;
2679 extent_start = entry->offset;
2680 extent_bytes = entry->bytes;
2681 start = max(start, extent_start);
2682 bytes = min(extent_start + extent_bytes, end) - start;
2683 if (bytes < minlen) {
2684 spin_unlock(&ctl->tree_lock);
2685 goto next;
2688 unlink_free_space(ctl, entry);
2689 kmem_cache_free(btrfs_free_space_cachep, entry);
2691 spin_unlock(&ctl->tree_lock);
2693 ret = do_trimming(block_group, total_trimmed, start, bytes,
2694 extent_start, extent_bytes);
2695 if (ret)
2696 break;
2697 next:
2698 start += bytes;
2700 if (fatal_signal_pending(current)) {
2701 ret = -ERESTARTSYS;
2702 break;
2705 cond_resched();
2707 out:
2708 return ret;
2711 static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
2712 u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2714 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2715 struct btrfs_free_space *entry;
2716 int ret = 0;
2717 int ret2;
2718 u64 bytes;
2719 u64 offset = offset_to_bitmap(ctl, start);
2721 while (offset < end) {
2722 bool next_bitmap = false;
2724 spin_lock(&ctl->tree_lock);
2726 if (ctl->free_space < minlen) {
2727 spin_unlock(&ctl->tree_lock);
2728 break;
2731 entry = tree_search_offset(ctl, offset, 1, 0);
2732 if (!entry) {
2733 spin_unlock(&ctl->tree_lock);
2734 next_bitmap = true;
2735 goto next;
2738 bytes = minlen;
2739 ret2 = search_bitmap(ctl, entry, &start, &bytes);
2740 if (ret2 || start >= end) {
2741 spin_unlock(&ctl->tree_lock);
2742 next_bitmap = true;
2743 goto next;
2746 bytes = min(bytes, end - start);
2747 if (bytes < minlen) {
2748 spin_unlock(&ctl->tree_lock);
2749 goto next;
2752 bitmap_clear_bits(ctl, entry, start, bytes);
2753 if (entry->bytes == 0)
2754 free_bitmap(ctl, entry);
2756 spin_unlock(&ctl->tree_lock);
2758 ret = do_trimming(block_group, total_trimmed, start, bytes,
2759 start, bytes);
2760 if (ret)
2761 break;
2762 next:
2763 if (next_bitmap) {
2764 offset += BITS_PER_BITMAP * ctl->unit;
2765 } else {
2766 start += bytes;
2767 if (start >= offset + BITS_PER_BITMAP * ctl->unit)
2768 offset += BITS_PER_BITMAP * ctl->unit;
2771 if (fatal_signal_pending(current)) {
2772 ret = -ERESTARTSYS;
2773 break;
2776 cond_resched();
2779 return ret;
2782 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2783 u64 *trimmed, u64 start, u64 end, u64 minlen)
2785 int ret;
2787 *trimmed = 0;
2789 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
2790 if (ret)
2791 return ret;
2793 ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
2795 return ret;
2799 * Find the left-most item in the cache tree, and then return the
2800 * smallest inode number in the item.
2802 * Note: the returned inode number may not be the smallest one in
2803 * the tree, if the left-most item is a bitmap.
2805 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2807 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2808 struct btrfs_free_space *entry = NULL;
2809 u64 ino = 0;
2811 spin_lock(&ctl->tree_lock);
2813 if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2814 goto out;
2816 entry = rb_entry(rb_first(&ctl->free_space_offset),
2817 struct btrfs_free_space, offset_index);
2819 if (!entry->bitmap) {
2820 ino = entry->offset;
2822 unlink_free_space(ctl, entry);
2823 entry->offset++;
2824 entry->bytes--;
2825 if (!entry->bytes)
2826 kmem_cache_free(btrfs_free_space_cachep, entry);
2827 else
2828 link_free_space(ctl, entry);
2829 } else {
2830 u64 offset = 0;
2831 u64 count = 1;
2832 int ret;
2834 ret = search_bitmap(ctl, entry, &offset, &count);
2835 /* Logic error; Should be empty if it can't find anything */
2836 BUG_ON(ret);
2838 ino = offset;
2839 bitmap_clear_bits(ctl, entry, offset, 1);
2840 if (entry->bytes == 0)
2841 free_bitmap(ctl, entry);
2843 out:
2844 spin_unlock(&ctl->tree_lock);
2846 return ino;
2849 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2850 struct btrfs_path *path)
2852 struct inode *inode = NULL;
2854 spin_lock(&root->cache_lock);
2855 if (root->cache_inode)
2856 inode = igrab(root->cache_inode);
2857 spin_unlock(&root->cache_lock);
2858 if (inode)
2859 return inode;
2861 inode = __lookup_free_space_inode(root, path, 0);
2862 if (IS_ERR(inode))
2863 return inode;
2865 spin_lock(&root->cache_lock);
2866 if (!btrfs_fs_closing(root->fs_info))
2867 root->cache_inode = igrab(inode);
2868 spin_unlock(&root->cache_lock);
2870 return inode;
2873 int create_free_ino_inode(struct btrfs_root *root,
2874 struct btrfs_trans_handle *trans,
2875 struct btrfs_path *path)
2877 return __create_free_space_inode(root, trans, path,
2878 BTRFS_FREE_INO_OBJECTID, 0);
2881 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2883 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2884 struct btrfs_path *path;
2885 struct inode *inode;
2886 int ret = 0;
2887 u64 root_gen = btrfs_root_generation(&root->root_item);
2889 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2890 return 0;
2893 * If we're unmounting then just return, since this does a search on the
2894 * normal root and not the commit root and we could deadlock.
2896 if (btrfs_fs_closing(fs_info))
2897 return 0;
2899 path = btrfs_alloc_path();
2900 if (!path)
2901 return 0;
2903 inode = lookup_free_ino_inode(root, path);
2904 if (IS_ERR(inode))
2905 goto out;
2907 if (root_gen != BTRFS_I(inode)->generation)
2908 goto out_put;
2910 ret = __load_free_space_cache(root, inode, ctl, path, 0);
2912 if (ret < 0)
2913 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2914 "root %llu\n", root->root_key.objectid);
2915 out_put:
2916 iput(inode);
2917 out:
2918 btrfs_free_path(path);
2919 return ret;
2922 int btrfs_write_out_ino_cache(struct btrfs_root *root,
2923 struct btrfs_trans_handle *trans,
2924 struct btrfs_path *path)
2926 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2927 struct inode *inode;
2928 int ret;
2930 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2931 return 0;
2933 inode = lookup_free_ino_inode(root, path);
2934 if (IS_ERR(inode))
2935 return 0;
2937 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2938 if (ret) {
2939 btrfs_delalloc_release_metadata(inode, inode->i_size);
2940 #ifdef DEBUG
2941 printk(KERN_ERR "btrfs: failed to write free ino cache "
2942 "for root %llu\n", root->root_key.objectid);
2943 #endif
2946 iput(inode);
2947 return ret;