2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
20 #include <linux/sched.h>
21 #include <linux/writeback.h>
22 #include <linux/pagemap.h>
23 #include <linux/blkdev.h>
26 #include "transaction.h"
28 #include "ref-cache.h"
31 #define BTRFS_ROOT_TRANS_TAG 0
33 static noinline
void put_transaction(struct btrfs_transaction
*transaction
)
35 WARN_ON(transaction
->use_count
== 0);
36 transaction
->use_count
--;
37 if (transaction
->use_count
== 0) {
38 list_del_init(&transaction
->list
);
39 memset(transaction
, 0, sizeof(*transaction
));
40 kmem_cache_free(btrfs_transaction_cachep
, transaction
);
45 * either allocate a new transaction or hop into the existing one
47 static noinline
int join_transaction(struct btrfs_root
*root
)
49 struct btrfs_transaction
*cur_trans
;
50 cur_trans
= root
->fs_info
->running_transaction
;
52 cur_trans
= kmem_cache_alloc(btrfs_transaction_cachep
,
55 root
->fs_info
->generation
++;
56 cur_trans
->num_writers
= 1;
57 cur_trans
->num_joined
= 0;
58 cur_trans
->transid
= root
->fs_info
->generation
;
59 init_waitqueue_head(&cur_trans
->writer_wait
);
60 init_waitqueue_head(&cur_trans
->commit_wait
);
61 cur_trans
->in_commit
= 0;
62 cur_trans
->blocked
= 0;
63 cur_trans
->use_count
= 1;
64 cur_trans
->commit_done
= 0;
65 cur_trans
->start_time
= get_seconds();
67 cur_trans
->delayed_refs
.root
.rb_node
= NULL
;
68 cur_trans
->delayed_refs
.num_entries
= 0;
69 cur_trans
->delayed_refs
.num_heads_ready
= 0;
70 cur_trans
->delayed_refs
.num_heads
= 0;
71 cur_trans
->delayed_refs
.flushing
= 0;
72 cur_trans
->delayed_refs
.run_delayed_start
= 0;
73 spin_lock_init(&cur_trans
->delayed_refs
.lock
);
75 INIT_LIST_HEAD(&cur_trans
->pending_snapshots
);
76 list_add_tail(&cur_trans
->list
, &root
->fs_info
->trans_list
);
77 extent_io_tree_init(&cur_trans
->dirty_pages
,
78 root
->fs_info
->btree_inode
->i_mapping
,
80 spin_lock(&root
->fs_info
->new_trans_lock
);
81 root
->fs_info
->running_transaction
= cur_trans
;
82 spin_unlock(&root
->fs_info
->new_trans_lock
);
84 cur_trans
->num_writers
++;
85 cur_trans
->num_joined
++;
92 * this does all the record keeping required to make sure that a reference
93 * counted root is properly recorded in a given transaction. This is required
94 * to make sure the old root from before we joined the transaction is deleted
95 * when the transaction commits
97 noinline
int btrfs_record_root_in_trans(struct btrfs_root
*root
)
99 struct btrfs_dirty_root
*dirty
;
100 u64 running_trans_id
= root
->fs_info
->running_transaction
->transid
;
101 if (root
->ref_cows
&& root
->last_trans
< running_trans_id
) {
102 WARN_ON(root
== root
->fs_info
->extent_root
);
103 if (root
->root_item
.refs
!= 0) {
104 radix_tree_tag_set(&root
->fs_info
->fs_roots_radix
,
105 (unsigned long)root
->root_key
.objectid
,
106 BTRFS_ROOT_TRANS_TAG
);
108 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
110 dirty
->root
= kmalloc(sizeof(*dirty
->root
), GFP_NOFS
);
111 BUG_ON(!dirty
->root
);
112 dirty
->latest_root
= root
;
113 INIT_LIST_HEAD(&dirty
->list
);
115 root
->commit_root
= btrfs_root_node(root
);
117 memcpy(dirty
->root
, root
, sizeof(*root
));
118 spin_lock_init(&dirty
->root
->node_lock
);
119 spin_lock_init(&dirty
->root
->list_lock
);
120 mutex_init(&dirty
->root
->objectid_mutex
);
121 mutex_init(&dirty
->root
->log_mutex
);
122 INIT_LIST_HEAD(&dirty
->root
->dead_list
);
123 dirty
->root
->node
= root
->commit_root
;
124 dirty
->root
->commit_root
= NULL
;
126 spin_lock(&root
->list_lock
);
127 list_add(&dirty
->root
->dead_list
, &root
->dead_list
);
128 spin_unlock(&root
->list_lock
);
130 root
->dirty_root
= dirty
;
134 root
->last_trans
= running_trans_id
;
139 /* wait for commit against the current transaction to become unblocked
140 * when this is done, it is safe to start a new transaction, but the current
141 * transaction might not be fully on disk.
143 static void wait_current_trans(struct btrfs_root
*root
)
145 struct btrfs_transaction
*cur_trans
;
147 cur_trans
= root
->fs_info
->running_transaction
;
148 if (cur_trans
&& cur_trans
->blocked
) {
150 cur_trans
->use_count
++;
152 prepare_to_wait(&root
->fs_info
->transaction_wait
, &wait
,
153 TASK_UNINTERRUPTIBLE
);
154 if (cur_trans
->blocked
) {
155 mutex_unlock(&root
->fs_info
->trans_mutex
);
157 mutex_lock(&root
->fs_info
->trans_mutex
);
158 finish_wait(&root
->fs_info
->transaction_wait
,
161 finish_wait(&root
->fs_info
->transaction_wait
,
166 put_transaction(cur_trans
);
170 static struct btrfs_trans_handle
*start_transaction(struct btrfs_root
*root
,
171 int num_blocks
, int wait
)
173 struct btrfs_trans_handle
*h
=
174 kmem_cache_alloc(btrfs_trans_handle_cachep
, GFP_NOFS
);
177 mutex_lock(&root
->fs_info
->trans_mutex
);
178 if (!root
->fs_info
->log_root_recovering
&&
179 ((wait
== 1 && !root
->fs_info
->open_ioctl_trans
) || wait
== 2))
180 wait_current_trans(root
);
181 ret
= join_transaction(root
);
184 btrfs_record_root_in_trans(root
);
185 h
->transid
= root
->fs_info
->running_transaction
->transid
;
186 h
->transaction
= root
->fs_info
->running_transaction
;
187 h
->blocks_reserved
= num_blocks
;
190 h
->alloc_exclude_nr
= 0;
191 h
->alloc_exclude_start
= 0;
192 h
->delayed_ref_updates
= 0;
194 root
->fs_info
->running_transaction
->use_count
++;
195 mutex_unlock(&root
->fs_info
->trans_mutex
);
199 struct btrfs_trans_handle
*btrfs_start_transaction(struct btrfs_root
*root
,
202 return start_transaction(root
, num_blocks
, 1);
204 struct btrfs_trans_handle
*btrfs_join_transaction(struct btrfs_root
*root
,
207 return start_transaction(root
, num_blocks
, 0);
210 struct btrfs_trans_handle
*btrfs_start_ioctl_transaction(struct btrfs_root
*r
,
213 return start_transaction(r
, num_blocks
, 2);
216 /* wait for a transaction commit to be fully complete */
217 static noinline
int wait_for_commit(struct btrfs_root
*root
,
218 struct btrfs_transaction
*commit
)
221 mutex_lock(&root
->fs_info
->trans_mutex
);
222 while (!commit
->commit_done
) {
223 prepare_to_wait(&commit
->commit_wait
, &wait
,
224 TASK_UNINTERRUPTIBLE
);
225 if (commit
->commit_done
)
227 mutex_unlock(&root
->fs_info
->trans_mutex
);
229 mutex_lock(&root
->fs_info
->trans_mutex
);
231 mutex_unlock(&root
->fs_info
->trans_mutex
);
232 finish_wait(&commit
->commit_wait
, &wait
);
237 * rate limit against the drop_snapshot code. This helps to slow down new
238 * operations if the drop_snapshot code isn't able to keep up.
240 static void throttle_on_drops(struct btrfs_root
*root
)
242 struct btrfs_fs_info
*info
= root
->fs_info
;
243 int harder_count
= 0;
246 if (atomic_read(&info
->throttles
)) {
249 thr
= atomic_read(&info
->throttle_gen
);
252 prepare_to_wait(&info
->transaction_throttle
,
253 &wait
, TASK_UNINTERRUPTIBLE
);
254 if (!atomic_read(&info
->throttles
)) {
255 finish_wait(&info
->transaction_throttle
, &wait
);
259 finish_wait(&info
->transaction_throttle
, &wait
);
260 } while (thr
== atomic_read(&info
->throttle_gen
));
263 if (root
->fs_info
->total_ref_cache_size
> 1 * 1024 * 1024 &&
267 if (root
->fs_info
->total_ref_cache_size
> 5 * 1024 * 1024 &&
271 if (root
->fs_info
->total_ref_cache_size
> 10 * 1024 * 1024 &&
277 void btrfs_throttle(struct btrfs_root
*root
)
279 mutex_lock(&root
->fs_info
->trans_mutex
);
280 if (!root
->fs_info
->open_ioctl_trans
)
281 wait_current_trans(root
);
282 mutex_unlock(&root
->fs_info
->trans_mutex
);
283 throttle_on_drops(root
);
286 static int __btrfs_end_transaction(struct btrfs_trans_handle
*trans
,
287 struct btrfs_root
*root
, int throttle
)
289 struct btrfs_transaction
*cur_trans
;
290 struct btrfs_fs_info
*info
= root
->fs_info
;
294 unsigned long cur
= trans
->delayed_ref_updates
;
295 trans
->delayed_ref_updates
= 0;
297 trans
->transaction
->delayed_refs
.num_heads_ready
> 64) {
298 trans
->delayed_ref_updates
= 0;
301 * do a full flush if the transaction is trying
304 if (trans
->transaction
->delayed_refs
.flushing
)
306 btrfs_run_delayed_refs(trans
, root
, cur
);
313 mutex_lock(&info
->trans_mutex
);
314 cur_trans
= info
->running_transaction
;
315 WARN_ON(cur_trans
!= trans
->transaction
);
316 WARN_ON(cur_trans
->num_writers
< 1);
317 cur_trans
->num_writers
--;
319 if (waitqueue_active(&cur_trans
->writer_wait
))
320 wake_up(&cur_trans
->writer_wait
);
321 put_transaction(cur_trans
);
322 mutex_unlock(&info
->trans_mutex
);
323 memset(trans
, 0, sizeof(*trans
));
324 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
327 throttle_on_drops(root
);
332 int btrfs_end_transaction(struct btrfs_trans_handle
*trans
,
333 struct btrfs_root
*root
)
335 return __btrfs_end_transaction(trans
, root
, 0);
338 int btrfs_end_transaction_throttle(struct btrfs_trans_handle
*trans
,
339 struct btrfs_root
*root
)
341 return __btrfs_end_transaction(trans
, root
, 1);
345 * when btree blocks are allocated, they have some corresponding bits set for
346 * them in one of two extent_io trees. This is used to make sure all of
347 * those extents are on disk for transaction or log commit
349 int btrfs_write_and_wait_marked_extents(struct btrfs_root
*root
,
350 struct extent_io_tree
*dirty_pages
)
356 struct inode
*btree_inode
= root
->fs_info
->btree_inode
;
362 ret
= find_first_extent_bit(dirty_pages
, start
, &start
, &end
,
366 while (start
<= end
) {
369 index
= start
>> PAGE_CACHE_SHIFT
;
370 start
= (u64
)(index
+ 1) << PAGE_CACHE_SHIFT
;
371 page
= find_get_page(btree_inode
->i_mapping
, index
);
375 btree_lock_page_hook(page
);
376 if (!page
->mapping
) {
378 page_cache_release(page
);
382 if (PageWriteback(page
)) {
384 wait_on_page_writeback(page
);
387 page_cache_release(page
);
391 err
= write_one_page(page
, 0);
394 page_cache_release(page
);
398 ret
= find_first_extent_bit(dirty_pages
, 0, &start
, &end
,
403 clear_extent_dirty(dirty_pages
, start
, end
, GFP_NOFS
);
404 while (start
<= end
) {
405 index
= start
>> PAGE_CACHE_SHIFT
;
406 start
= (u64
)(index
+ 1) << PAGE_CACHE_SHIFT
;
407 page
= find_get_page(btree_inode
->i_mapping
, index
);
410 if (PageDirty(page
)) {
411 btree_lock_page_hook(page
);
412 wait_on_page_writeback(page
);
413 err
= write_one_page(page
, 0);
417 wait_on_page_writeback(page
);
418 page_cache_release(page
);
427 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle
*trans
,
428 struct btrfs_root
*root
)
430 if (!trans
|| !trans
->transaction
) {
431 struct inode
*btree_inode
;
432 btree_inode
= root
->fs_info
->btree_inode
;
433 return filemap_write_and_wait(btree_inode
->i_mapping
);
435 return btrfs_write_and_wait_marked_extents(root
,
436 &trans
->transaction
->dirty_pages
);
440 * this is used to update the root pointer in the tree of tree roots.
442 * But, in the case of the extent allocation tree, updating the root
443 * pointer may allocate blocks which may change the root of the extent
446 * So, this loops and repeats and makes sure the cowonly root didn't
447 * change while the root pointer was being updated in the metadata.
449 static int update_cowonly_root(struct btrfs_trans_handle
*trans
,
450 struct btrfs_root
*root
)
454 struct btrfs_root
*tree_root
= root
->fs_info
->tree_root
;
456 btrfs_write_dirty_block_groups(trans
, root
);
458 ret
= btrfs_run_delayed_refs(trans
, root
, (unsigned long)-1);
462 old_root_bytenr
= btrfs_root_bytenr(&root
->root_item
);
463 if (old_root_bytenr
== root
->node
->start
)
465 btrfs_set_root_bytenr(&root
->root_item
,
467 btrfs_set_root_level(&root
->root_item
,
468 btrfs_header_level(root
->node
));
469 btrfs_set_root_generation(&root
->root_item
, trans
->transid
);
471 ret
= btrfs_update_root(trans
, tree_root
,
475 btrfs_write_dirty_block_groups(trans
, root
);
477 ret
= btrfs_run_delayed_refs(trans
, root
, (unsigned long)-1);
484 * update all the cowonly tree roots on disk
486 int btrfs_commit_tree_roots(struct btrfs_trans_handle
*trans
,
487 struct btrfs_root
*root
)
489 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
490 struct list_head
*next
;
491 struct extent_buffer
*eb
;
494 ret
= btrfs_run_delayed_refs(trans
, root
, (unsigned long)-1);
497 eb
= btrfs_lock_root_node(fs_info
->tree_root
);
498 btrfs_cow_block(trans
, fs_info
->tree_root
, eb
, NULL
, 0, &eb
);
499 btrfs_tree_unlock(eb
);
500 free_extent_buffer(eb
);
502 ret
= btrfs_run_delayed_refs(trans
, root
, (unsigned long)-1);
505 while (!list_empty(&fs_info
->dirty_cowonly_roots
)) {
506 next
= fs_info
->dirty_cowonly_roots
.next
;
508 root
= list_entry(next
, struct btrfs_root
, dirty_list
);
510 update_cowonly_root(trans
, root
);
512 ret
= btrfs_run_delayed_refs(trans
, root
, (unsigned long)-1);
519 * dead roots are old snapshots that need to be deleted. This allocates
520 * a dirty root struct and adds it into the list of dead roots that need to
523 int btrfs_add_dead_root(struct btrfs_root
*root
, struct btrfs_root
*latest
)
525 struct btrfs_dirty_root
*dirty
;
527 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
531 dirty
->latest_root
= latest
;
533 mutex_lock(&root
->fs_info
->trans_mutex
);
534 list_add(&dirty
->list
, &latest
->fs_info
->dead_roots
);
535 mutex_unlock(&root
->fs_info
->trans_mutex
);
540 * at transaction commit time we need to schedule the old roots for
541 * deletion via btrfs_drop_snapshot. This runs through all the
542 * reference counted roots that were modified in the current
543 * transaction and puts them into the drop list
545 static noinline
int add_dirty_roots(struct btrfs_trans_handle
*trans
,
546 struct radix_tree_root
*radix
,
547 struct list_head
*list
)
549 struct btrfs_dirty_root
*dirty
;
550 struct btrfs_root
*gang
[8];
551 struct btrfs_root
*root
;
558 ret
= radix_tree_gang_lookup_tag(radix
, (void **)gang
, 0,
560 BTRFS_ROOT_TRANS_TAG
);
563 for (i
= 0; i
< ret
; i
++) {
565 radix_tree_tag_clear(radix
,
566 (unsigned long)root
->root_key
.objectid
,
567 BTRFS_ROOT_TRANS_TAG
);
569 BUG_ON(!root
->ref_tree
);
570 dirty
= root
->dirty_root
;
572 btrfs_free_log(trans
, root
);
573 btrfs_free_reloc_root(trans
, root
);
575 if (root
->commit_root
== root
->node
) {
576 WARN_ON(root
->node
->start
!=
577 btrfs_root_bytenr(&root
->root_item
));
579 free_extent_buffer(root
->commit_root
);
580 root
->commit_root
= NULL
;
581 root
->dirty_root
= NULL
;
583 spin_lock(&root
->list_lock
);
584 list_del_init(&dirty
->root
->dead_list
);
585 spin_unlock(&root
->list_lock
);
590 /* make sure to update the root on disk
591 * so we get any updates to the block used
594 err
= btrfs_update_root(trans
,
595 root
->fs_info
->tree_root
,
601 memset(&root
->root_item
.drop_progress
, 0,
602 sizeof(struct btrfs_disk_key
));
603 root
->root_item
.drop_level
= 0;
604 root
->commit_root
= NULL
;
605 root
->dirty_root
= NULL
;
606 root
->root_key
.offset
= root
->fs_info
->generation
;
607 btrfs_set_root_bytenr(&root
->root_item
,
609 btrfs_set_root_level(&root
->root_item
,
610 btrfs_header_level(root
->node
));
611 btrfs_set_root_generation(&root
->root_item
,
612 root
->root_key
.offset
);
614 err
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
,
620 refs
= btrfs_root_refs(&dirty
->root
->root_item
);
621 btrfs_set_root_refs(&dirty
->root
->root_item
, refs
- 1);
622 err
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
623 &dirty
->root
->root_key
,
624 &dirty
->root
->root_item
);
628 list_add(&dirty
->list
, list
);
631 free_extent_buffer(dirty
->root
->node
);
641 * defrag a given btree. If cacheonly == 1, this won't read from the disk,
642 * otherwise every leaf in the btree is read and defragged.
644 int btrfs_defrag_root(struct btrfs_root
*root
, int cacheonly
)
646 struct btrfs_fs_info
*info
= root
->fs_info
;
648 struct btrfs_trans_handle
*trans
;
652 if (root
->defrag_running
)
654 trans
= btrfs_start_transaction(root
, 1);
656 root
->defrag_running
= 1;
657 ret
= btrfs_defrag_leaves(trans
, root
, cacheonly
);
658 nr
= trans
->blocks_used
;
659 btrfs_end_transaction(trans
, root
);
660 btrfs_btree_balance_dirty(info
->tree_root
, nr
);
663 trans
= btrfs_start_transaction(root
, 1);
664 if (root
->fs_info
->closing
|| ret
!= -EAGAIN
)
667 root
->defrag_running
= 0;
669 btrfs_end_transaction(trans
, root
);
674 * when dropping snapshots, we generate a ton of delayed refs, and it makes
675 * sense not to join the transaction while it is trying to flush the current
676 * queue of delayed refs out.
678 * This is used by the drop snapshot code only
680 static noinline
int wait_transaction_pre_flush(struct btrfs_fs_info
*info
)
684 mutex_lock(&info
->trans_mutex
);
685 while (info
->running_transaction
&&
686 info
->running_transaction
->delayed_refs
.flushing
) {
687 prepare_to_wait(&info
->transaction_wait
, &wait
,
688 TASK_UNINTERRUPTIBLE
);
689 mutex_unlock(&info
->trans_mutex
);
691 mutex_lock(&info
->trans_mutex
);
692 finish_wait(&info
->transaction_wait
, &wait
);
694 mutex_unlock(&info
->trans_mutex
);
699 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
702 static noinline
int drop_dirty_roots(struct btrfs_root
*tree_root
,
703 struct list_head
*list
)
705 struct btrfs_dirty_root
*dirty
;
706 struct btrfs_trans_handle
*trans
;
714 while (!list_empty(list
)) {
715 struct btrfs_root
*root
;
717 dirty
= list_entry(list
->prev
, struct btrfs_dirty_root
, list
);
718 list_del_init(&dirty
->list
);
720 num_bytes
= btrfs_root_used(&dirty
->root
->root_item
);
721 root
= dirty
->latest_root
;
722 atomic_inc(&root
->fs_info
->throttles
);
726 * we don't want to jump in and create a bunch of
727 * delayed refs if the transaction is starting to close
729 wait_transaction_pre_flush(tree_root
->fs_info
);
730 trans
= btrfs_start_transaction(tree_root
, 1);
733 * we've joined a transaction, make sure it isn't
736 if (trans
->transaction
->delayed_refs
.flushing
) {
737 btrfs_end_transaction(trans
, tree_root
);
741 mutex_lock(&root
->fs_info
->drop_mutex
);
742 ret
= btrfs_drop_snapshot(trans
, dirty
->root
);
745 mutex_unlock(&root
->fs_info
->drop_mutex
);
747 err
= btrfs_update_root(trans
,
749 &dirty
->root
->root_key
,
750 &dirty
->root
->root_item
);
753 nr
= trans
->blocks_used
;
754 ret
= btrfs_end_transaction(trans
, tree_root
);
757 btrfs_btree_balance_dirty(tree_root
, nr
);
761 atomic_dec(&root
->fs_info
->throttles
);
762 wake_up(&root
->fs_info
->transaction_throttle
);
764 num_bytes
-= btrfs_root_used(&dirty
->root
->root_item
);
765 bytes_used
= btrfs_root_used(&root
->root_item
);
767 mutex_lock(&root
->fs_info
->trans_mutex
);
768 btrfs_record_root_in_trans(root
);
769 mutex_unlock(&root
->fs_info
->trans_mutex
);
770 btrfs_set_root_used(&root
->root_item
,
771 bytes_used
- num_bytes
);
774 ret
= btrfs_del_root(trans
, tree_root
, &dirty
->root
->root_key
);
779 mutex_unlock(&root
->fs_info
->drop_mutex
);
781 spin_lock(&root
->list_lock
);
782 list_del_init(&dirty
->root
->dead_list
);
783 if (!list_empty(&root
->dead_list
)) {
784 struct btrfs_root
*oldest
;
785 oldest
= list_entry(root
->dead_list
.prev
,
786 struct btrfs_root
, dead_list
);
787 max_useless
= oldest
->root_key
.offset
- 1;
789 max_useless
= root
->root_key
.offset
- 1;
791 spin_unlock(&root
->list_lock
);
793 nr
= trans
->blocks_used
;
794 ret
= btrfs_end_transaction(trans
, tree_root
);
797 ret
= btrfs_remove_leaf_refs(root
, max_useless
, 0);
800 free_extent_buffer(dirty
->root
->node
);
804 btrfs_btree_balance_dirty(tree_root
, nr
);
811 * new snapshots need to be created at a very specific time in the
812 * transaction commit. This does the actual creation
814 static noinline
int create_pending_snapshot(struct btrfs_trans_handle
*trans
,
815 struct btrfs_fs_info
*fs_info
,
816 struct btrfs_pending_snapshot
*pending
)
818 struct btrfs_key key
;
819 struct btrfs_root_item
*new_root_item
;
820 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
821 struct btrfs_root
*root
= pending
->root
;
822 struct extent_buffer
*tmp
;
823 struct extent_buffer
*old
;
827 new_root_item
= kmalloc(sizeof(*new_root_item
), GFP_NOFS
);
828 if (!new_root_item
) {
832 ret
= btrfs_find_free_objectid(trans
, tree_root
, 0, &objectid
);
836 btrfs_record_root_in_trans(root
);
837 btrfs_set_root_last_snapshot(&root
->root_item
, trans
->transid
);
838 memcpy(new_root_item
, &root
->root_item
, sizeof(*new_root_item
));
840 key
.objectid
= objectid
;
841 key
.offset
= trans
->transid
;
842 btrfs_set_key_type(&key
, BTRFS_ROOT_ITEM_KEY
);
844 old
= btrfs_lock_root_node(root
);
845 btrfs_cow_block(trans
, root
, old
, NULL
, 0, &old
);
847 btrfs_copy_root(trans
, root
, old
, &tmp
, objectid
);
848 btrfs_tree_unlock(old
);
849 free_extent_buffer(old
);
851 btrfs_set_root_bytenr(new_root_item
, tmp
->start
);
852 btrfs_set_root_level(new_root_item
, btrfs_header_level(tmp
));
853 btrfs_set_root_generation(new_root_item
, trans
->transid
);
854 ret
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
, &key
,
856 btrfs_tree_unlock(tmp
);
857 free_extent_buffer(tmp
);
861 key
.offset
= (u64
)-1;
862 memcpy(&pending
->root_key
, &key
, sizeof(key
));
864 kfree(new_root_item
);
868 static noinline
int finish_pending_snapshot(struct btrfs_fs_info
*fs_info
,
869 struct btrfs_pending_snapshot
*pending
)
874 struct btrfs_trans_handle
*trans
;
875 struct inode
*parent_inode
;
877 struct btrfs_root
*parent_root
;
879 parent_inode
= pending
->dentry
->d_parent
->d_inode
;
880 parent_root
= BTRFS_I(parent_inode
)->root
;
881 trans
= btrfs_join_transaction(parent_root
, 1);
884 * insert the directory item
886 namelen
= strlen(pending
->name
);
887 ret
= btrfs_set_inode_index(parent_inode
, &index
);
888 ret
= btrfs_insert_dir_item(trans
, parent_root
,
889 pending
->name
, namelen
,
891 &pending
->root_key
, BTRFS_FT_DIR
, index
);
896 btrfs_i_size_write(parent_inode
, parent_inode
->i_size
+ namelen
* 2);
897 ret
= btrfs_update_inode(trans
, parent_root
, parent_inode
);
900 /* add the backref first */
901 ret
= btrfs_add_root_ref(trans
, parent_root
->fs_info
->tree_root
,
902 pending
->root_key
.objectid
,
903 BTRFS_ROOT_BACKREF_KEY
,
904 parent_root
->root_key
.objectid
,
905 parent_inode
->i_ino
, index
, pending
->name
,
910 /* now add the forward ref */
911 ret
= btrfs_add_root_ref(trans
, parent_root
->fs_info
->tree_root
,
912 parent_root
->root_key
.objectid
,
914 pending
->root_key
.objectid
,
915 parent_inode
->i_ino
, index
, pending
->name
,
918 inode
= btrfs_lookup_dentry(parent_inode
, pending
->dentry
);
919 d_instantiate(pending
->dentry
, inode
);
921 btrfs_end_transaction(trans
, fs_info
->fs_root
);
926 * create all the snapshots we've scheduled for creation
928 static noinline
int create_pending_snapshots(struct btrfs_trans_handle
*trans
,
929 struct btrfs_fs_info
*fs_info
)
931 struct btrfs_pending_snapshot
*pending
;
932 struct list_head
*head
= &trans
->transaction
->pending_snapshots
;
935 list_for_each_entry(pending
, head
, list
) {
936 ret
= create_pending_snapshot(trans
, fs_info
, pending
);
942 static noinline
int finish_pending_snapshots(struct btrfs_trans_handle
*trans
,
943 struct btrfs_fs_info
*fs_info
)
945 struct btrfs_pending_snapshot
*pending
;
946 struct list_head
*head
= &trans
->transaction
->pending_snapshots
;
949 while (!list_empty(head
)) {
950 pending
= list_entry(head
->next
,
951 struct btrfs_pending_snapshot
, list
);
952 ret
= finish_pending_snapshot(fs_info
, pending
);
954 list_del(&pending
->list
);
955 kfree(pending
->name
);
961 int btrfs_commit_transaction(struct btrfs_trans_handle
*trans
,
962 struct btrfs_root
*root
)
964 unsigned long joined
= 0;
965 unsigned long timeout
= 1;
966 struct btrfs_transaction
*cur_trans
;
967 struct btrfs_transaction
*prev_trans
= NULL
;
968 struct btrfs_root
*chunk_root
= root
->fs_info
->chunk_root
;
969 struct list_head dirty_fs_roots
;
970 struct extent_io_tree
*pinned_copy
;
974 unsigned long now
= get_seconds();
975 int flush_on_commit
= btrfs_test_opt(root
, FLUSHONCOMMIT
);
977 btrfs_run_ordered_operations(root
, 0);
979 /* make a pass through all the delayed refs we have so far
980 * any runnings procs may add more while we are here
982 ret
= btrfs_run_delayed_refs(trans
, root
, 0);
985 cur_trans
= trans
->transaction
;
987 * set the flushing flag so procs in this transaction have to
988 * start sending their work down.
990 cur_trans
->delayed_refs
.flushing
= 1;
992 ret
= btrfs_run_delayed_refs(trans
, root
, 0);
995 mutex_lock(&root
->fs_info
->trans_mutex
);
996 INIT_LIST_HEAD(&dirty_fs_roots
);
997 if (cur_trans
->in_commit
) {
998 cur_trans
->use_count
++;
999 mutex_unlock(&root
->fs_info
->trans_mutex
);
1000 btrfs_end_transaction(trans
, root
);
1002 ret
= wait_for_commit(root
, cur_trans
);
1005 mutex_lock(&root
->fs_info
->trans_mutex
);
1006 put_transaction(cur_trans
);
1007 mutex_unlock(&root
->fs_info
->trans_mutex
);
1012 pinned_copy
= kmalloc(sizeof(*pinned_copy
), GFP_NOFS
);
1016 extent_io_tree_init(pinned_copy
,
1017 root
->fs_info
->btree_inode
->i_mapping
, GFP_NOFS
);
1019 trans
->transaction
->in_commit
= 1;
1020 trans
->transaction
->blocked
= 1;
1021 if (cur_trans
->list
.prev
!= &root
->fs_info
->trans_list
) {
1022 prev_trans
= list_entry(cur_trans
->list
.prev
,
1023 struct btrfs_transaction
, list
);
1024 if (!prev_trans
->commit_done
) {
1025 prev_trans
->use_count
++;
1026 mutex_unlock(&root
->fs_info
->trans_mutex
);
1028 wait_for_commit(root
, prev_trans
);
1030 mutex_lock(&root
->fs_info
->trans_mutex
);
1031 put_transaction(prev_trans
);
1035 if (now
< cur_trans
->start_time
|| now
- cur_trans
->start_time
< 1)
1039 int snap_pending
= 0;
1040 joined
= cur_trans
->num_joined
;
1041 if (!list_empty(&trans
->transaction
->pending_snapshots
))
1044 WARN_ON(cur_trans
!= trans
->transaction
);
1045 prepare_to_wait(&cur_trans
->writer_wait
, &wait
,
1046 TASK_UNINTERRUPTIBLE
);
1048 if (cur_trans
->num_writers
> 1)
1049 timeout
= MAX_SCHEDULE_TIMEOUT
;
1050 else if (should_grow
)
1053 mutex_unlock(&root
->fs_info
->trans_mutex
);
1055 if (flush_on_commit
|| snap_pending
) {
1056 if (flush_on_commit
)
1057 btrfs_start_delalloc_inodes(root
);
1058 ret
= btrfs_wait_ordered_extents(root
, 1);
1063 * rename don't use btrfs_join_transaction, so, once we
1064 * set the transaction to blocked above, we aren't going
1065 * to get any new ordered operations. We can safely run
1066 * it here and no for sure that nothing new will be added
1069 btrfs_run_ordered_operations(root
, 1);
1072 if (cur_trans
->num_writers
> 1 || should_grow
)
1073 schedule_timeout(timeout
);
1075 mutex_lock(&root
->fs_info
->trans_mutex
);
1076 finish_wait(&cur_trans
->writer_wait
, &wait
);
1077 } while (cur_trans
->num_writers
> 1 ||
1078 (should_grow
&& cur_trans
->num_joined
!= joined
));
1080 ret
= create_pending_snapshots(trans
, root
->fs_info
);
1083 ret
= btrfs_run_delayed_refs(trans
, root
, (unsigned long)-1);
1086 WARN_ON(cur_trans
!= trans
->transaction
);
1088 /* btrfs_commit_tree_roots is responsible for getting the
1089 * various roots consistent with each other. Every pointer
1090 * in the tree of tree roots has to point to the most up to date
1091 * root for every subvolume and other tree. So, we have to keep
1092 * the tree logging code from jumping in and changing any
1095 * At this point in the commit, there can't be any tree-log
1096 * writers, but a little lower down we drop the trans mutex
1097 * and let new people in. By holding the tree_log_mutex
1098 * from now until after the super is written, we avoid races
1099 * with the tree-log code.
1101 mutex_lock(&root
->fs_info
->tree_log_mutex
);
1103 * keep tree reloc code from adding new reloc trees
1105 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
1108 ret
= add_dirty_roots(trans
, &root
->fs_info
->fs_roots_radix
,
1112 /* add_dirty_roots gets rid of all the tree log roots, it is now
1113 * safe to free the root of tree log roots
1115 btrfs_free_log_root_tree(trans
, root
->fs_info
);
1117 ret
= btrfs_commit_tree_roots(trans
, root
);
1120 cur_trans
= root
->fs_info
->running_transaction
;
1121 spin_lock(&root
->fs_info
->new_trans_lock
);
1122 root
->fs_info
->running_transaction
= NULL
;
1123 spin_unlock(&root
->fs_info
->new_trans_lock
);
1124 btrfs_set_super_generation(&root
->fs_info
->super_copy
,
1125 cur_trans
->transid
);
1126 btrfs_set_super_root(&root
->fs_info
->super_copy
,
1127 root
->fs_info
->tree_root
->node
->start
);
1128 btrfs_set_super_root_level(&root
->fs_info
->super_copy
,
1129 btrfs_header_level(root
->fs_info
->tree_root
->node
));
1131 btrfs_set_super_chunk_root(&root
->fs_info
->super_copy
,
1132 chunk_root
->node
->start
);
1133 btrfs_set_super_chunk_root_level(&root
->fs_info
->super_copy
,
1134 btrfs_header_level(chunk_root
->node
));
1135 btrfs_set_super_chunk_root_generation(&root
->fs_info
->super_copy
,
1136 btrfs_header_generation(chunk_root
->node
));
1138 if (!root
->fs_info
->log_root_recovering
) {
1139 btrfs_set_super_log_root(&root
->fs_info
->super_copy
, 0);
1140 btrfs_set_super_log_root_level(&root
->fs_info
->super_copy
, 0);
1143 memcpy(&root
->fs_info
->super_for_commit
, &root
->fs_info
->super_copy
,
1144 sizeof(root
->fs_info
->super_copy
));
1146 btrfs_copy_pinned(root
, pinned_copy
);
1148 trans
->transaction
->blocked
= 0;
1150 wake_up(&root
->fs_info
->transaction_throttle
);
1151 wake_up(&root
->fs_info
->transaction_wait
);
1153 mutex_unlock(&root
->fs_info
->trans_mutex
);
1154 ret
= btrfs_write_and_wait_transaction(trans
, root
);
1156 write_ctree_super(trans
, root
, 0);
1159 * the super is written, we can safely allow the tree-loggers
1160 * to go about their business
1162 mutex_unlock(&root
->fs_info
->tree_log_mutex
);
1164 btrfs_finish_extent_commit(trans
, root
, pinned_copy
);
1167 btrfs_drop_dead_reloc_roots(root
);
1168 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
1170 /* do the directory inserts of any pending snapshot creations */
1171 finish_pending_snapshots(trans
, root
->fs_info
);
1173 mutex_lock(&root
->fs_info
->trans_mutex
);
1175 cur_trans
->commit_done
= 1;
1177 root
->fs_info
->last_trans_committed
= cur_trans
->transid
;
1178 wake_up(&cur_trans
->commit_wait
);
1180 put_transaction(cur_trans
);
1181 put_transaction(cur_trans
);
1183 list_splice_init(&dirty_fs_roots
, &root
->fs_info
->dead_roots
);
1184 if (root
->fs_info
->closing
)
1185 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_fs_roots
);
1187 mutex_unlock(&root
->fs_info
->trans_mutex
);
1189 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
1191 if (root
->fs_info
->closing
)
1192 drop_dirty_roots(root
->fs_info
->tree_root
, &dirty_fs_roots
);
1197 * interface function to delete all the snapshots we have scheduled for deletion
1199 int btrfs_clean_old_snapshots(struct btrfs_root
*root
)
1201 struct list_head dirty_roots
;
1202 INIT_LIST_HEAD(&dirty_roots
);
1204 mutex_lock(&root
->fs_info
->trans_mutex
);
1205 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_roots
);
1206 mutex_unlock(&root
->fs_info
->trans_mutex
);
1208 if (!list_empty(&dirty_roots
)) {
1209 drop_dirty_roots(root
, &dirty_roots
);