2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
20 #include <linux/sched.h>
21 #include <linux/writeback.h>
22 #include <linux/pagemap.h>
23 #include <linux/blkdev.h>
26 #include "transaction.h"
28 #include "ref-cache.h"
31 #define BTRFS_ROOT_TRANS_TAG 0
33 static noinline
void put_transaction(struct btrfs_transaction
*transaction
)
35 WARN_ON(transaction
->use_count
== 0);
36 transaction
->use_count
--;
37 if (transaction
->use_count
== 0) {
38 list_del_init(&transaction
->list
);
39 memset(transaction
, 0, sizeof(*transaction
));
40 kmem_cache_free(btrfs_transaction_cachep
, transaction
);
45 * either allocate a new transaction or hop into the existing one
47 static noinline
int join_transaction(struct btrfs_root
*root
)
49 struct btrfs_transaction
*cur_trans
;
50 cur_trans
= root
->fs_info
->running_transaction
;
52 cur_trans
= kmem_cache_alloc(btrfs_transaction_cachep
,
55 root
->fs_info
->generation
++;
56 root
->fs_info
->last_alloc
= 0;
57 root
->fs_info
->last_data_alloc
= 0;
58 cur_trans
->num_writers
= 1;
59 cur_trans
->num_joined
= 0;
60 cur_trans
->transid
= root
->fs_info
->generation
;
61 init_waitqueue_head(&cur_trans
->writer_wait
);
62 init_waitqueue_head(&cur_trans
->commit_wait
);
63 cur_trans
->in_commit
= 0;
64 cur_trans
->blocked
= 0;
65 cur_trans
->use_count
= 1;
66 cur_trans
->commit_done
= 0;
67 cur_trans
->start_time
= get_seconds();
68 INIT_LIST_HEAD(&cur_trans
->pending_snapshots
);
69 list_add_tail(&cur_trans
->list
, &root
->fs_info
->trans_list
);
70 extent_io_tree_init(&cur_trans
->dirty_pages
,
71 root
->fs_info
->btree_inode
->i_mapping
,
73 spin_lock(&root
->fs_info
->new_trans_lock
);
74 root
->fs_info
->running_transaction
= cur_trans
;
75 spin_unlock(&root
->fs_info
->new_trans_lock
);
77 cur_trans
->num_writers
++;
78 cur_trans
->num_joined
++;
85 * this does all the record keeping required to make sure that a reference
86 * counted root is properly recorded in a given transaction. This is required
87 * to make sure the old root from before we joined the transaction is deleted
88 * when the transaction commits
90 noinline
int btrfs_record_root_in_trans(struct btrfs_root
*root
)
92 struct btrfs_dirty_root
*dirty
;
93 u64 running_trans_id
= root
->fs_info
->running_transaction
->transid
;
94 if (root
->ref_cows
&& root
->last_trans
< running_trans_id
) {
95 WARN_ON(root
== root
->fs_info
->extent_root
);
96 if (root
->root_item
.refs
!= 0) {
97 radix_tree_tag_set(&root
->fs_info
->fs_roots_radix
,
98 (unsigned long)root
->root_key
.objectid
,
99 BTRFS_ROOT_TRANS_TAG
);
101 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
103 dirty
->root
= kmalloc(sizeof(*dirty
->root
), GFP_NOFS
);
104 BUG_ON(!dirty
->root
);
105 dirty
->latest_root
= root
;
106 INIT_LIST_HEAD(&dirty
->list
);
108 root
->commit_root
= btrfs_root_node(root
);
110 memcpy(dirty
->root
, root
, sizeof(*root
));
111 spin_lock_init(&dirty
->root
->node_lock
);
112 spin_lock_init(&dirty
->root
->list_lock
);
113 mutex_init(&dirty
->root
->objectid_mutex
);
114 mutex_init(&dirty
->root
->log_mutex
);
115 INIT_LIST_HEAD(&dirty
->root
->dead_list
);
116 dirty
->root
->node
= root
->commit_root
;
117 dirty
->root
->commit_root
= NULL
;
119 spin_lock(&root
->list_lock
);
120 list_add(&dirty
->root
->dead_list
, &root
->dead_list
);
121 spin_unlock(&root
->list_lock
);
123 root
->dirty_root
= dirty
;
127 root
->last_trans
= running_trans_id
;
132 /* wait for commit against the current transaction to become unblocked
133 * when this is done, it is safe to start a new transaction, but the current
134 * transaction might not be fully on disk.
136 static void wait_current_trans(struct btrfs_root
*root
)
138 struct btrfs_transaction
*cur_trans
;
140 cur_trans
= root
->fs_info
->running_transaction
;
141 if (cur_trans
&& cur_trans
->blocked
) {
143 cur_trans
->use_count
++;
145 prepare_to_wait(&root
->fs_info
->transaction_wait
, &wait
,
146 TASK_UNINTERRUPTIBLE
);
147 if (cur_trans
->blocked
) {
148 mutex_unlock(&root
->fs_info
->trans_mutex
);
150 mutex_lock(&root
->fs_info
->trans_mutex
);
151 finish_wait(&root
->fs_info
->transaction_wait
,
154 finish_wait(&root
->fs_info
->transaction_wait
,
159 put_transaction(cur_trans
);
163 static struct btrfs_trans_handle
*start_transaction(struct btrfs_root
*root
,
164 int num_blocks
, int wait
)
166 struct btrfs_trans_handle
*h
=
167 kmem_cache_alloc(btrfs_trans_handle_cachep
, GFP_NOFS
);
170 mutex_lock(&root
->fs_info
->trans_mutex
);
171 if (!root
->fs_info
->log_root_recovering
&&
172 ((wait
== 1 && !root
->fs_info
->open_ioctl_trans
) || wait
== 2))
173 wait_current_trans(root
);
174 ret
= join_transaction(root
);
177 btrfs_record_root_in_trans(root
);
178 h
->transid
= root
->fs_info
->running_transaction
->transid
;
179 h
->transaction
= root
->fs_info
->running_transaction
;
180 h
->blocks_reserved
= num_blocks
;
183 h
->alloc_exclude_nr
= 0;
184 h
->alloc_exclude_start
= 0;
185 root
->fs_info
->running_transaction
->use_count
++;
186 mutex_unlock(&root
->fs_info
->trans_mutex
);
190 struct btrfs_trans_handle
*btrfs_start_transaction(struct btrfs_root
*root
,
193 return start_transaction(root
, num_blocks
, 1);
195 struct btrfs_trans_handle
*btrfs_join_transaction(struct btrfs_root
*root
,
198 return start_transaction(root
, num_blocks
, 0);
201 struct btrfs_trans_handle
*btrfs_start_ioctl_transaction(struct btrfs_root
*r
,
204 return start_transaction(r
, num_blocks
, 2);
207 /* wait for a transaction commit to be fully complete */
208 static noinline
int wait_for_commit(struct btrfs_root
*root
,
209 struct btrfs_transaction
*commit
)
212 mutex_lock(&root
->fs_info
->trans_mutex
);
213 while (!commit
->commit_done
) {
214 prepare_to_wait(&commit
->commit_wait
, &wait
,
215 TASK_UNINTERRUPTIBLE
);
216 if (commit
->commit_done
)
218 mutex_unlock(&root
->fs_info
->trans_mutex
);
220 mutex_lock(&root
->fs_info
->trans_mutex
);
222 mutex_unlock(&root
->fs_info
->trans_mutex
);
223 finish_wait(&commit
->commit_wait
, &wait
);
228 * rate limit against the drop_snapshot code. This helps to slow down new
229 * operations if the drop_snapshot code isn't able to keep up.
231 static void throttle_on_drops(struct btrfs_root
*root
)
233 struct btrfs_fs_info
*info
= root
->fs_info
;
234 int harder_count
= 0;
237 if (atomic_read(&info
->throttles
)) {
240 thr
= atomic_read(&info
->throttle_gen
);
243 prepare_to_wait(&info
->transaction_throttle
,
244 &wait
, TASK_UNINTERRUPTIBLE
);
245 if (!atomic_read(&info
->throttles
)) {
246 finish_wait(&info
->transaction_throttle
, &wait
);
250 finish_wait(&info
->transaction_throttle
, &wait
);
251 } while (thr
== atomic_read(&info
->throttle_gen
));
254 if (root
->fs_info
->total_ref_cache_size
> 1 * 1024 * 1024 &&
258 if (root
->fs_info
->total_ref_cache_size
> 5 * 1024 * 1024 &&
262 if (root
->fs_info
->total_ref_cache_size
> 10 * 1024 * 1024 &&
268 void btrfs_throttle(struct btrfs_root
*root
)
270 mutex_lock(&root
->fs_info
->trans_mutex
);
271 if (!root
->fs_info
->open_ioctl_trans
)
272 wait_current_trans(root
);
273 mutex_unlock(&root
->fs_info
->trans_mutex
);
275 throttle_on_drops(root
);
278 static int __btrfs_end_transaction(struct btrfs_trans_handle
*trans
,
279 struct btrfs_root
*root
, int throttle
)
281 struct btrfs_transaction
*cur_trans
;
282 struct btrfs_fs_info
*info
= root
->fs_info
;
284 mutex_lock(&info
->trans_mutex
);
285 cur_trans
= info
->running_transaction
;
286 WARN_ON(cur_trans
!= trans
->transaction
);
287 WARN_ON(cur_trans
->num_writers
< 1);
288 cur_trans
->num_writers
--;
290 if (waitqueue_active(&cur_trans
->writer_wait
))
291 wake_up(&cur_trans
->writer_wait
);
292 put_transaction(cur_trans
);
293 mutex_unlock(&info
->trans_mutex
);
294 memset(trans
, 0, sizeof(*trans
));
295 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
298 throttle_on_drops(root
);
303 int btrfs_end_transaction(struct btrfs_trans_handle
*trans
,
304 struct btrfs_root
*root
)
306 return __btrfs_end_transaction(trans
, root
, 0);
309 int btrfs_end_transaction_throttle(struct btrfs_trans_handle
*trans
,
310 struct btrfs_root
*root
)
312 return __btrfs_end_transaction(trans
, root
, 1);
316 * when btree blocks are allocated, they have some corresponding bits set for
317 * them in one of two extent_io trees. This is used to make sure all of
318 * those extents are on disk for transaction or log commit
320 int btrfs_write_and_wait_marked_extents(struct btrfs_root
*root
,
321 struct extent_io_tree
*dirty_pages
)
327 struct inode
*btree_inode
= root
->fs_info
->btree_inode
;
333 ret
= find_first_extent_bit(dirty_pages
, start
, &start
, &end
,
337 while (start
<= end
) {
340 index
= start
>> PAGE_CACHE_SHIFT
;
341 start
= (u64
)(index
+ 1) << PAGE_CACHE_SHIFT
;
342 page
= find_get_page(btree_inode
->i_mapping
, index
);
346 btree_lock_page_hook(page
);
347 if (!page
->mapping
) {
349 page_cache_release(page
);
353 if (PageWriteback(page
)) {
355 wait_on_page_writeback(page
);
358 page_cache_release(page
);
362 err
= write_one_page(page
, 0);
365 page_cache_release(page
);
369 ret
= find_first_extent_bit(dirty_pages
, 0, &start
, &end
,
374 clear_extent_dirty(dirty_pages
, start
, end
, GFP_NOFS
);
375 while (start
<= end
) {
376 index
= start
>> PAGE_CACHE_SHIFT
;
377 start
= (u64
)(index
+ 1) << PAGE_CACHE_SHIFT
;
378 page
= find_get_page(btree_inode
->i_mapping
, index
);
381 if (PageDirty(page
)) {
382 btree_lock_page_hook(page
);
383 wait_on_page_writeback(page
);
384 err
= write_one_page(page
, 0);
388 wait_on_page_writeback(page
);
389 page_cache_release(page
);
398 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle
*trans
,
399 struct btrfs_root
*root
)
401 if (!trans
|| !trans
->transaction
) {
402 struct inode
*btree_inode
;
403 btree_inode
= root
->fs_info
->btree_inode
;
404 return filemap_write_and_wait(btree_inode
->i_mapping
);
406 return btrfs_write_and_wait_marked_extents(root
,
407 &trans
->transaction
->dirty_pages
);
411 * this is used to update the root pointer in the tree of tree roots.
413 * But, in the case of the extent allocation tree, updating the root
414 * pointer may allocate blocks which may change the root of the extent
417 * So, this loops and repeats and makes sure the cowonly root didn't
418 * change while the root pointer was being updated in the metadata.
420 static int update_cowonly_root(struct btrfs_trans_handle
*trans
,
421 struct btrfs_root
*root
)
425 struct btrfs_root
*tree_root
= root
->fs_info
->tree_root
;
427 btrfs_extent_post_op(trans
, root
);
428 btrfs_write_dirty_block_groups(trans
, root
);
429 btrfs_extent_post_op(trans
, root
);
432 old_root_bytenr
= btrfs_root_bytenr(&root
->root_item
);
433 if (old_root_bytenr
== root
->node
->start
)
435 btrfs_set_root_bytenr(&root
->root_item
,
437 btrfs_set_root_level(&root
->root_item
,
438 btrfs_header_level(root
->node
));
439 btrfs_set_root_generation(&root
->root_item
, trans
->transid
);
441 btrfs_extent_post_op(trans
, root
);
443 ret
= btrfs_update_root(trans
, tree_root
,
447 btrfs_write_dirty_block_groups(trans
, root
);
448 btrfs_extent_post_op(trans
, root
);
454 * update all the cowonly tree roots on disk
456 int btrfs_commit_tree_roots(struct btrfs_trans_handle
*trans
,
457 struct btrfs_root
*root
)
459 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
460 struct list_head
*next
;
461 struct extent_buffer
*eb
;
463 btrfs_extent_post_op(trans
, fs_info
->tree_root
);
465 eb
= btrfs_lock_root_node(fs_info
->tree_root
);
466 btrfs_cow_block(trans
, fs_info
->tree_root
, eb
, NULL
, 0, &eb
, 0);
467 btrfs_tree_unlock(eb
);
468 free_extent_buffer(eb
);
470 btrfs_extent_post_op(trans
, fs_info
->tree_root
);
472 while (!list_empty(&fs_info
->dirty_cowonly_roots
)) {
473 next
= fs_info
->dirty_cowonly_roots
.next
;
475 root
= list_entry(next
, struct btrfs_root
, dirty_list
);
477 update_cowonly_root(trans
, root
);
483 * dead roots are old snapshots that need to be deleted. This allocates
484 * a dirty root struct and adds it into the list of dead roots that need to
487 int btrfs_add_dead_root(struct btrfs_root
*root
, struct btrfs_root
*latest
)
489 struct btrfs_dirty_root
*dirty
;
491 dirty
= kmalloc(sizeof(*dirty
), GFP_NOFS
);
495 dirty
->latest_root
= latest
;
497 mutex_lock(&root
->fs_info
->trans_mutex
);
498 list_add(&dirty
->list
, &latest
->fs_info
->dead_roots
);
499 mutex_unlock(&root
->fs_info
->trans_mutex
);
504 * at transaction commit time we need to schedule the old roots for
505 * deletion via btrfs_drop_snapshot. This runs through all the
506 * reference counted roots that were modified in the current
507 * transaction and puts them into the drop list
509 static noinline
int add_dirty_roots(struct btrfs_trans_handle
*trans
,
510 struct radix_tree_root
*radix
,
511 struct list_head
*list
)
513 struct btrfs_dirty_root
*dirty
;
514 struct btrfs_root
*gang
[8];
515 struct btrfs_root
*root
;
522 ret
= radix_tree_gang_lookup_tag(radix
, (void **)gang
, 0,
524 BTRFS_ROOT_TRANS_TAG
);
527 for (i
= 0; i
< ret
; i
++) {
529 radix_tree_tag_clear(radix
,
530 (unsigned long)root
->root_key
.objectid
,
531 BTRFS_ROOT_TRANS_TAG
);
533 BUG_ON(!root
->ref_tree
);
534 dirty
= root
->dirty_root
;
536 btrfs_free_log(trans
, root
);
537 btrfs_free_reloc_root(trans
, root
);
539 if (root
->commit_root
== root
->node
) {
540 WARN_ON(root
->node
->start
!=
541 btrfs_root_bytenr(&root
->root_item
));
543 free_extent_buffer(root
->commit_root
);
544 root
->commit_root
= NULL
;
545 root
->dirty_root
= NULL
;
547 spin_lock(&root
->list_lock
);
548 list_del_init(&dirty
->root
->dead_list
);
549 spin_unlock(&root
->list_lock
);
554 /* make sure to update the root on disk
555 * so we get any updates to the block used
558 err
= btrfs_update_root(trans
,
559 root
->fs_info
->tree_root
,
565 memset(&root
->root_item
.drop_progress
, 0,
566 sizeof(struct btrfs_disk_key
));
567 root
->root_item
.drop_level
= 0;
568 root
->commit_root
= NULL
;
569 root
->dirty_root
= NULL
;
570 root
->root_key
.offset
= root
->fs_info
->generation
;
571 btrfs_set_root_bytenr(&root
->root_item
,
573 btrfs_set_root_level(&root
->root_item
,
574 btrfs_header_level(root
->node
));
575 btrfs_set_root_generation(&root
->root_item
,
576 root
->root_key
.offset
);
578 err
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
,
584 refs
= btrfs_root_refs(&dirty
->root
->root_item
);
585 btrfs_set_root_refs(&dirty
->root
->root_item
, refs
- 1);
586 err
= btrfs_update_root(trans
, root
->fs_info
->tree_root
,
587 &dirty
->root
->root_key
,
588 &dirty
->root
->root_item
);
592 list_add(&dirty
->list
, list
);
595 free_extent_buffer(dirty
->root
->node
);
605 * defrag a given btree. If cacheonly == 1, this won't read from the disk,
606 * otherwise every leaf in the btree is read and defragged.
608 int btrfs_defrag_root(struct btrfs_root
*root
, int cacheonly
)
610 struct btrfs_fs_info
*info
= root
->fs_info
;
612 struct btrfs_trans_handle
*trans
;
616 if (root
->defrag_running
)
618 trans
= btrfs_start_transaction(root
, 1);
620 root
->defrag_running
= 1;
621 ret
= btrfs_defrag_leaves(trans
, root
, cacheonly
);
622 nr
= trans
->blocks_used
;
623 btrfs_end_transaction(trans
, root
);
624 btrfs_btree_balance_dirty(info
->tree_root
, nr
);
627 trans
= btrfs_start_transaction(root
, 1);
628 if (root
->fs_info
->closing
|| ret
!= -EAGAIN
)
631 root
->defrag_running
= 0;
633 btrfs_end_transaction(trans
, root
);
638 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
641 static noinline
int drop_dirty_roots(struct btrfs_root
*tree_root
,
642 struct list_head
*list
)
644 struct btrfs_dirty_root
*dirty
;
645 struct btrfs_trans_handle
*trans
;
653 while (!list_empty(list
)) {
654 struct btrfs_root
*root
;
656 dirty
= list_entry(list
->prev
, struct btrfs_dirty_root
, list
);
657 list_del_init(&dirty
->list
);
659 num_bytes
= btrfs_root_used(&dirty
->root
->root_item
);
660 root
= dirty
->latest_root
;
661 atomic_inc(&root
->fs_info
->throttles
);
664 trans
= btrfs_start_transaction(tree_root
, 1);
665 mutex_lock(&root
->fs_info
->drop_mutex
);
666 ret
= btrfs_drop_snapshot(trans
, dirty
->root
);
669 mutex_unlock(&root
->fs_info
->drop_mutex
);
671 err
= btrfs_update_root(trans
,
673 &dirty
->root
->root_key
,
674 &dirty
->root
->root_item
);
677 nr
= trans
->blocks_used
;
678 ret
= btrfs_end_transaction(trans
, tree_root
);
681 btrfs_btree_balance_dirty(tree_root
, nr
);
685 atomic_dec(&root
->fs_info
->throttles
);
686 wake_up(&root
->fs_info
->transaction_throttle
);
688 num_bytes
-= btrfs_root_used(&dirty
->root
->root_item
);
689 bytes_used
= btrfs_root_used(&root
->root_item
);
691 mutex_lock(&root
->fs_info
->trans_mutex
);
692 btrfs_record_root_in_trans(root
);
693 mutex_unlock(&root
->fs_info
->trans_mutex
);
694 btrfs_set_root_used(&root
->root_item
,
695 bytes_used
- num_bytes
);
698 ret
= btrfs_del_root(trans
, tree_root
, &dirty
->root
->root_key
);
703 mutex_unlock(&root
->fs_info
->drop_mutex
);
705 spin_lock(&root
->list_lock
);
706 list_del_init(&dirty
->root
->dead_list
);
707 if (!list_empty(&root
->dead_list
)) {
708 struct btrfs_root
*oldest
;
709 oldest
= list_entry(root
->dead_list
.prev
,
710 struct btrfs_root
, dead_list
);
711 max_useless
= oldest
->root_key
.offset
- 1;
713 max_useless
= root
->root_key
.offset
- 1;
715 spin_unlock(&root
->list_lock
);
717 nr
= trans
->blocks_used
;
718 ret
= btrfs_end_transaction(trans
, tree_root
);
721 ret
= btrfs_remove_leaf_refs(root
, max_useless
, 0);
724 free_extent_buffer(dirty
->root
->node
);
728 btrfs_btree_balance_dirty(tree_root
, nr
);
735 * new snapshots need to be created at a very specific time in the
736 * transaction commit. This does the actual creation
738 static noinline
int create_pending_snapshot(struct btrfs_trans_handle
*trans
,
739 struct btrfs_fs_info
*fs_info
,
740 struct btrfs_pending_snapshot
*pending
)
742 struct btrfs_key key
;
743 struct btrfs_root_item
*new_root_item
;
744 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
745 struct btrfs_root
*root
= pending
->root
;
746 struct extent_buffer
*tmp
;
747 struct extent_buffer
*old
;
751 new_root_item
= kmalloc(sizeof(*new_root_item
), GFP_NOFS
);
752 if (!new_root_item
) {
756 ret
= btrfs_find_free_objectid(trans
, tree_root
, 0, &objectid
);
760 btrfs_record_root_in_trans(root
);
761 btrfs_set_root_last_snapshot(&root
->root_item
, trans
->transid
);
762 memcpy(new_root_item
, &root
->root_item
, sizeof(*new_root_item
));
764 key
.objectid
= objectid
;
765 key
.offset
= trans
->transid
;
766 btrfs_set_key_type(&key
, BTRFS_ROOT_ITEM_KEY
);
768 old
= btrfs_lock_root_node(root
);
769 btrfs_cow_block(trans
, root
, old
, NULL
, 0, &old
, 0);
771 btrfs_copy_root(trans
, root
, old
, &tmp
, objectid
);
772 btrfs_tree_unlock(old
);
773 free_extent_buffer(old
);
775 btrfs_set_root_bytenr(new_root_item
, tmp
->start
);
776 btrfs_set_root_level(new_root_item
, btrfs_header_level(tmp
));
777 btrfs_set_root_generation(new_root_item
, trans
->transid
);
778 ret
= btrfs_insert_root(trans
, root
->fs_info
->tree_root
, &key
,
780 btrfs_tree_unlock(tmp
);
781 free_extent_buffer(tmp
);
785 key
.offset
= (u64
)-1;
786 memcpy(&pending
->root_key
, &key
, sizeof(key
));
788 kfree(new_root_item
);
792 static noinline
int finish_pending_snapshot(struct btrfs_fs_info
*fs_info
,
793 struct btrfs_pending_snapshot
*pending
)
798 struct btrfs_trans_handle
*trans
;
799 struct inode
*parent_inode
;
801 struct btrfs_root
*parent_root
;
803 parent_inode
= pending
->dentry
->d_parent
->d_inode
;
804 parent_root
= BTRFS_I(parent_inode
)->root
;
805 trans
= btrfs_join_transaction(parent_root
, 1);
808 * insert the directory item
810 namelen
= strlen(pending
->name
);
811 ret
= btrfs_set_inode_index(parent_inode
, &index
);
812 ret
= btrfs_insert_dir_item(trans
, parent_root
,
813 pending
->name
, namelen
,
815 &pending
->root_key
, BTRFS_FT_DIR
, index
);
820 btrfs_i_size_write(parent_inode
, parent_inode
->i_size
+ namelen
* 2);
821 ret
= btrfs_update_inode(trans
, parent_root
, parent_inode
);
824 /* add the backref first */
825 ret
= btrfs_add_root_ref(trans
, parent_root
->fs_info
->tree_root
,
826 pending
->root_key
.objectid
,
827 BTRFS_ROOT_BACKREF_KEY
,
828 parent_root
->root_key
.objectid
,
829 parent_inode
->i_ino
, index
, pending
->name
,
834 /* now add the forward ref */
835 ret
= btrfs_add_root_ref(trans
, parent_root
->fs_info
->tree_root
,
836 parent_root
->root_key
.objectid
,
838 pending
->root_key
.objectid
,
839 parent_inode
->i_ino
, index
, pending
->name
,
842 inode
= btrfs_lookup_dentry(parent_inode
, pending
->dentry
);
843 d_instantiate(pending
->dentry
, inode
);
845 btrfs_end_transaction(trans
, fs_info
->fs_root
);
850 * create all the snapshots we've scheduled for creation
852 static noinline
int create_pending_snapshots(struct btrfs_trans_handle
*trans
,
853 struct btrfs_fs_info
*fs_info
)
855 struct btrfs_pending_snapshot
*pending
;
856 struct list_head
*head
= &trans
->transaction
->pending_snapshots
;
859 list_for_each_entry(pending
, head
, list
) {
860 ret
= create_pending_snapshot(trans
, fs_info
, pending
);
866 static noinline
int finish_pending_snapshots(struct btrfs_trans_handle
*trans
,
867 struct btrfs_fs_info
*fs_info
)
869 struct btrfs_pending_snapshot
*pending
;
870 struct list_head
*head
= &trans
->transaction
->pending_snapshots
;
873 while (!list_empty(head
)) {
874 pending
= list_entry(head
->next
,
875 struct btrfs_pending_snapshot
, list
);
876 ret
= finish_pending_snapshot(fs_info
, pending
);
878 list_del(&pending
->list
);
879 kfree(pending
->name
);
885 int btrfs_commit_transaction(struct btrfs_trans_handle
*trans
,
886 struct btrfs_root
*root
)
888 unsigned long joined
= 0;
889 unsigned long timeout
= 1;
890 struct btrfs_transaction
*cur_trans
;
891 struct btrfs_transaction
*prev_trans
= NULL
;
892 struct btrfs_root
*chunk_root
= root
->fs_info
->chunk_root
;
893 struct list_head dirty_fs_roots
;
894 struct extent_io_tree
*pinned_copy
;
898 INIT_LIST_HEAD(&dirty_fs_roots
);
899 mutex_lock(&root
->fs_info
->trans_mutex
);
900 if (trans
->transaction
->in_commit
) {
901 cur_trans
= trans
->transaction
;
902 trans
->transaction
->use_count
++;
903 mutex_unlock(&root
->fs_info
->trans_mutex
);
904 btrfs_end_transaction(trans
, root
);
906 ret
= wait_for_commit(root
, cur_trans
);
909 mutex_lock(&root
->fs_info
->trans_mutex
);
910 put_transaction(cur_trans
);
911 mutex_unlock(&root
->fs_info
->trans_mutex
);
916 pinned_copy
= kmalloc(sizeof(*pinned_copy
), GFP_NOFS
);
920 extent_io_tree_init(pinned_copy
,
921 root
->fs_info
->btree_inode
->i_mapping
, GFP_NOFS
);
923 trans
->transaction
->in_commit
= 1;
924 trans
->transaction
->blocked
= 1;
925 cur_trans
= trans
->transaction
;
926 if (cur_trans
->list
.prev
!= &root
->fs_info
->trans_list
) {
927 prev_trans
= list_entry(cur_trans
->list
.prev
,
928 struct btrfs_transaction
, list
);
929 if (!prev_trans
->commit_done
) {
930 prev_trans
->use_count
++;
931 mutex_unlock(&root
->fs_info
->trans_mutex
);
933 wait_for_commit(root
, prev_trans
);
935 mutex_lock(&root
->fs_info
->trans_mutex
);
936 put_transaction(prev_trans
);
941 int snap_pending
= 0;
942 joined
= cur_trans
->num_joined
;
943 if (!list_empty(&trans
->transaction
->pending_snapshots
))
946 WARN_ON(cur_trans
!= trans
->transaction
);
947 prepare_to_wait(&cur_trans
->writer_wait
, &wait
,
948 TASK_UNINTERRUPTIBLE
);
950 if (cur_trans
->num_writers
> 1)
951 timeout
= MAX_SCHEDULE_TIMEOUT
;
955 mutex_unlock(&root
->fs_info
->trans_mutex
);
958 ret
= btrfs_wait_ordered_extents(root
, 1);
962 schedule_timeout(timeout
);
964 mutex_lock(&root
->fs_info
->trans_mutex
);
965 finish_wait(&cur_trans
->writer_wait
, &wait
);
966 } while (cur_trans
->num_writers
> 1 ||
967 (cur_trans
->num_joined
!= joined
));
969 ret
= create_pending_snapshots(trans
, root
->fs_info
);
972 WARN_ON(cur_trans
!= trans
->transaction
);
974 /* btrfs_commit_tree_roots is responsible for getting the
975 * various roots consistent with each other. Every pointer
976 * in the tree of tree roots has to point to the most up to date
977 * root for every subvolume and other tree. So, we have to keep
978 * the tree logging code from jumping in and changing any
981 * At this point in the commit, there can't be any tree-log
982 * writers, but a little lower down we drop the trans mutex
983 * and let new people in. By holding the tree_log_mutex
984 * from now until after the super is written, we avoid races
985 * with the tree-log code.
987 mutex_lock(&root
->fs_info
->tree_log_mutex
);
989 * keep tree reloc code from adding new reloc trees
991 mutex_lock(&root
->fs_info
->tree_reloc_mutex
);
994 ret
= add_dirty_roots(trans
, &root
->fs_info
->fs_roots_radix
,
998 /* add_dirty_roots gets rid of all the tree log roots, it is now
999 * safe to free the root of tree log roots
1001 btrfs_free_log_root_tree(trans
, root
->fs_info
);
1003 ret
= btrfs_commit_tree_roots(trans
, root
);
1006 cur_trans
= root
->fs_info
->running_transaction
;
1007 spin_lock(&root
->fs_info
->new_trans_lock
);
1008 root
->fs_info
->running_transaction
= NULL
;
1009 spin_unlock(&root
->fs_info
->new_trans_lock
);
1010 btrfs_set_super_generation(&root
->fs_info
->super_copy
,
1011 cur_trans
->transid
);
1012 btrfs_set_super_root(&root
->fs_info
->super_copy
,
1013 root
->fs_info
->tree_root
->node
->start
);
1014 btrfs_set_super_root_level(&root
->fs_info
->super_copy
,
1015 btrfs_header_level(root
->fs_info
->tree_root
->node
));
1017 btrfs_set_super_chunk_root(&root
->fs_info
->super_copy
,
1018 chunk_root
->node
->start
);
1019 btrfs_set_super_chunk_root_level(&root
->fs_info
->super_copy
,
1020 btrfs_header_level(chunk_root
->node
));
1021 btrfs_set_super_chunk_root_generation(&root
->fs_info
->super_copy
,
1022 btrfs_header_generation(chunk_root
->node
));
1024 if (!root
->fs_info
->log_root_recovering
) {
1025 btrfs_set_super_log_root(&root
->fs_info
->super_copy
, 0);
1026 btrfs_set_super_log_root_level(&root
->fs_info
->super_copy
, 0);
1029 memcpy(&root
->fs_info
->super_for_commit
, &root
->fs_info
->super_copy
,
1030 sizeof(root
->fs_info
->super_copy
));
1032 btrfs_copy_pinned(root
, pinned_copy
);
1034 trans
->transaction
->blocked
= 0;
1035 wake_up(&root
->fs_info
->transaction_throttle
);
1036 wake_up(&root
->fs_info
->transaction_wait
);
1038 mutex_unlock(&root
->fs_info
->trans_mutex
);
1039 ret
= btrfs_write_and_wait_transaction(trans
, root
);
1041 write_ctree_super(trans
, root
, 0);
1044 * the super is written, we can safely allow the tree-loggers
1045 * to go about their business
1047 mutex_unlock(&root
->fs_info
->tree_log_mutex
);
1049 btrfs_finish_extent_commit(trans
, root
, pinned_copy
);
1052 btrfs_drop_dead_reloc_roots(root
);
1053 mutex_unlock(&root
->fs_info
->tree_reloc_mutex
);
1055 /* do the directory inserts of any pending snapshot creations */
1056 finish_pending_snapshots(trans
, root
->fs_info
);
1058 mutex_lock(&root
->fs_info
->trans_mutex
);
1060 cur_trans
->commit_done
= 1;
1061 root
->fs_info
->last_trans_committed
= cur_trans
->transid
;
1062 wake_up(&cur_trans
->commit_wait
);
1064 put_transaction(cur_trans
);
1065 put_transaction(cur_trans
);
1067 list_splice_init(&dirty_fs_roots
, &root
->fs_info
->dead_roots
);
1068 if (root
->fs_info
->closing
)
1069 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_fs_roots
);
1071 mutex_unlock(&root
->fs_info
->trans_mutex
);
1073 kmem_cache_free(btrfs_trans_handle_cachep
, trans
);
1075 if (root
->fs_info
->closing
)
1076 drop_dirty_roots(root
->fs_info
->tree_root
, &dirty_fs_roots
);
1081 * interface function to delete all the snapshots we have scheduled for deletion
1083 int btrfs_clean_old_snapshots(struct btrfs_root
*root
)
1085 struct list_head dirty_roots
;
1086 INIT_LIST_HEAD(&dirty_roots
);
1088 mutex_lock(&root
->fs_info
->trans_mutex
);
1089 list_splice_init(&root
->fs_info
->dead_roots
, &dirty_roots
);
1090 mutex_unlock(&root
->fs_info
->trans_mutex
);
1092 if (!list_empty(&dirty_roots
)) {
1093 drop_dirty_roots(root
, &dirty_roots
);