2 * linux/fs/jbd/commit.c
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
6 * Copyright 1998 Red Hat corp --- All Rights Reserved
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
12 * Journal commit routines for the generic filesystem journaling code;
13 * part of the ext2fs journaling system.
16 #include <linux/time.h>
18 #include <linux/jbd.h>
19 #include <linux/errno.h>
20 #include <linux/slab.h>
22 #include <linux/pagemap.h>
23 #include <linux/smp_lock.h>
26 * Default IO end handler for temporary BJ_IO buffer_heads.
28 static void journal_end_buffer_io_sync(struct buffer_head
*bh
, int uptodate
)
32 set_buffer_uptodate(bh
);
34 clear_buffer_uptodate(bh
);
39 * When an ext3-ordered file is truncated, it is possible that many pages are
40 * not sucessfully freed, because they are attached to a committing transaction.
41 * After the transaction commits, these pages are left on the LRU, with no
42 * ->mapping, and with attached buffers. These pages are trivially reclaimable
43 * by the VM, but their apparent absence upsets the VM accounting, and it makes
44 * the numbers in /proc/meminfo look odd.
46 * So here, we have a buffer which has just come off the forget list. Look to
47 * see if we can strip all buffers from the backing page.
49 * Called under lock_journal(), and possibly under journal_datalist_lock. The
50 * caller provided us with a ref against the buffer, and we drop that here.
52 static void release_buffer_page(struct buffer_head
*bh
)
58 if (atomic_read(&bh
->b_count
) != 1)
66 /* OK, it's a truncated page */
67 if (TestSetPageLocked(page
))
72 try_to_free_buffers(page
);
74 page_cache_release(page
);
82 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
83 * held. For ranking reasons we must trylock. If we lose, schedule away and
84 * return 0. j_list_lock is dropped in this case.
86 static int inverted_lock(journal_t
*journal
, struct buffer_head
*bh
)
88 if (!jbd_trylock_bh_state(bh
)) {
89 spin_unlock(&journal
->j_list_lock
);
96 /* Done it all: now write the commit record. We should have
97 * cleaned up our previous buffers by now, so if we are in abort
98 * mode we can now just skip the rest of the journal write
101 * Returns 1 if the journal needs to be aborted or 0 on success
103 static int journal_write_commit_record(journal_t
*journal
,
104 transaction_t
*commit_transaction
)
106 struct journal_head
*descriptor
;
107 struct buffer_head
*bh
;
109 int barrier_done
= 0;
111 if (is_journal_aborted(journal
))
114 descriptor
= journal_get_descriptor_buffer(journal
);
118 bh
= jh2bh(descriptor
);
120 /* AKPM: buglet - add `i' to tmp! */
121 for (i
= 0; i
< bh
->b_size
; i
+= 512) {
122 journal_header_t
*tmp
= (journal_header_t
*)bh
->b_data
;
123 tmp
->h_magic
= cpu_to_be32(JFS_MAGIC_NUMBER
);
124 tmp
->h_blocktype
= cpu_to_be32(JFS_COMMIT_BLOCK
);
125 tmp
->h_sequence
= cpu_to_be32(commit_transaction
->t_tid
);
128 JBUFFER_TRACE(descriptor
, "write commit block");
129 set_buffer_dirty(bh
);
130 if (journal
->j_flags
& JFS_BARRIER
) {
131 set_buffer_ordered(bh
);
134 ret
= sync_dirty_buffer(bh
);
135 /* is it possible for another commit to fail at roughly
136 * the same time as this one? If so, we don't want to
137 * trust the barrier flag in the super, but instead want
138 * to remember if we sent a barrier request
140 if (ret
== -EOPNOTSUPP
&& barrier_done
) {
141 char b
[BDEVNAME_SIZE
];
144 "JBD: barrier-based sync failed on %s - "
145 "disabling barriers\n",
146 bdevname(journal
->j_dev
, b
));
147 spin_lock(&journal
->j_state_lock
);
148 journal
->j_flags
&= ~JFS_BARRIER
;
149 spin_unlock(&journal
->j_state_lock
);
151 /* And try again, without the barrier */
152 clear_buffer_ordered(bh
);
153 set_buffer_uptodate(bh
);
154 set_buffer_dirty(bh
);
155 ret
= sync_dirty_buffer(bh
);
157 put_bh(bh
); /* One for getblk() */
158 journal_put_journal_head(descriptor
);
160 return (ret
== -EIO
);
163 static void journal_do_submit_data(struct buffer_head
**wbuf
, int bufs
)
167 for (i
= 0; i
< bufs
; i
++) {
168 wbuf
[i
]->b_end_io
= end_buffer_write_sync
;
169 /* We use-up our safety reference in submit_bh() */
170 submit_bh(WRITE
, wbuf
[i
]);
175 * Submit all the data buffers to disk
177 static void journal_submit_data_buffers(journal_t
*journal
,
178 transaction_t
*commit_transaction
)
180 struct journal_head
*jh
;
181 struct buffer_head
*bh
;
184 struct buffer_head
**wbuf
= journal
->j_wbuf
;
187 * Whenever we unlock the journal and sleep, things can get added
188 * onto ->t_sync_datalist, so we have to keep looping back to
189 * write_out_data until we *know* that the list is empty.
191 * Cleanup any flushed data buffers from the data list. Even in
192 * abort mode, we want to flush this out as soon as possible.
196 spin_lock(&journal
->j_list_lock
);
198 while (commit_transaction
->t_sync_datalist
) {
199 jh
= commit_transaction
->t_sync_datalist
;
203 /* Get reference just to make sure buffer does not disappear
204 * when we are forced to drop various locks */
206 /* If the buffer is dirty, we need to submit IO and hence
207 * we need the buffer lock. We try to lock the buffer without
208 * blocking. If we fail, we need to drop j_list_lock and do
209 * blocking lock_buffer().
211 if (buffer_dirty(bh
)) {
212 if (test_set_buffer_locked(bh
)) {
213 BUFFER_TRACE(bh
, "needs blocking lock");
214 spin_unlock(&journal
->j_list_lock
);
215 /* Write out all data to prevent deadlocks */
216 journal_do_submit_data(wbuf
, bufs
);
219 spin_lock(&journal
->j_list_lock
);
223 /* We have to get bh_state lock. Again out of order, sigh. */
224 if (!inverted_lock(journal
, bh
)) {
225 jbd_lock_bh_state(bh
);
226 spin_lock(&journal
->j_list_lock
);
228 /* Someone already cleaned up the buffer? */
230 || jh
->b_transaction
!= commit_transaction
231 || jh
->b_jlist
!= BJ_SyncData
) {
232 jbd_unlock_bh_state(bh
);
235 BUFFER_TRACE(bh
, "already cleaned up");
239 if (locked
&& test_clear_buffer_dirty(bh
)) {
240 BUFFER_TRACE(bh
, "needs writeout, adding to array");
242 __journal_file_buffer(jh
, commit_transaction
,
244 jbd_unlock_bh_state(bh
);
245 if (bufs
== journal
->j_wbufsize
) {
246 spin_unlock(&journal
->j_list_lock
);
247 journal_do_submit_data(wbuf
, bufs
);
253 BUFFER_TRACE(bh
, "writeout complete: unfile");
254 __journal_unfile_buffer(jh
);
255 jbd_unlock_bh_state(bh
);
258 journal_remove_journal_head(bh
);
259 /* Once for our safety reference, once for
260 * journal_remove_journal_head() */
265 if (lock_need_resched(&journal
->j_list_lock
)) {
266 spin_unlock(&journal
->j_list_lock
);
270 spin_unlock(&journal
->j_list_lock
);
271 journal_do_submit_data(wbuf
, bufs
);
275 * journal_commit_transaction
277 * The primary function for committing a transaction to the log. This
278 * function is called by the journal thread to begin a complete commit.
280 void journal_commit_transaction(journal_t
*journal
)
282 transaction_t
*commit_transaction
;
283 struct journal_head
*jh
, *new_jh
, *descriptor
;
284 struct buffer_head
**wbuf
= journal
->j_wbuf
;
288 unsigned long blocknr
;
290 journal_header_t
*header
;
291 journal_block_tag_t
*tag
= NULL
;
298 * First job: lock down the current transaction and wait for
299 * all outstanding updates to complete.
303 spin_lock(&journal
->j_list_lock
);
304 summarise_journal_usage(journal
);
305 spin_unlock(&journal
->j_list_lock
);
308 /* Do we need to erase the effects of a prior journal_flush? */
309 if (journal
->j_flags
& JFS_FLUSHED
) {
310 jbd_debug(3, "super block updated\n");
311 journal_update_superblock(journal
, 1);
313 jbd_debug(3, "superblock not updated\n");
316 J_ASSERT(journal
->j_running_transaction
!= NULL
);
317 J_ASSERT(journal
->j_committing_transaction
== NULL
);
319 commit_transaction
= journal
->j_running_transaction
;
320 J_ASSERT(commit_transaction
->t_state
== T_RUNNING
);
322 jbd_debug(1, "JBD: starting commit of transaction %d\n",
323 commit_transaction
->t_tid
);
325 spin_lock(&journal
->j_state_lock
);
326 commit_transaction
->t_state
= T_LOCKED
;
328 spin_lock(&commit_transaction
->t_handle_lock
);
329 while (commit_transaction
->t_updates
) {
332 prepare_to_wait(&journal
->j_wait_updates
, &wait
,
333 TASK_UNINTERRUPTIBLE
);
334 if (commit_transaction
->t_updates
) {
335 spin_unlock(&commit_transaction
->t_handle_lock
);
336 spin_unlock(&journal
->j_state_lock
);
338 spin_lock(&journal
->j_state_lock
);
339 spin_lock(&commit_transaction
->t_handle_lock
);
341 finish_wait(&journal
->j_wait_updates
, &wait
);
343 spin_unlock(&commit_transaction
->t_handle_lock
);
345 J_ASSERT (commit_transaction
->t_outstanding_credits
<=
346 journal
->j_max_transaction_buffers
);
349 * First thing we are allowed to do is to discard any remaining
350 * BJ_Reserved buffers. Note, it is _not_ permissible to assume
351 * that there are no such buffers: if a large filesystem
352 * operation like a truncate needs to split itself over multiple
353 * transactions, then it may try to do a journal_restart() while
354 * there are still BJ_Reserved buffers outstanding. These must
355 * be released cleanly from the current transaction.
357 * In this case, the filesystem must still reserve write access
358 * again before modifying the buffer in the new transaction, but
359 * we do not require it to remember exactly which old buffers it
360 * has reserved. This is consistent with the existing behaviour
361 * that multiple journal_get_write_access() calls to the same
362 * buffer are perfectly permissable.
364 while (commit_transaction
->t_reserved_list
) {
365 jh
= commit_transaction
->t_reserved_list
;
366 JBUFFER_TRACE(jh
, "reserved, unused: refile");
368 * A journal_get_undo_access()+journal_release_buffer() may
369 * leave undo-committed data.
371 if (jh
->b_committed_data
) {
372 struct buffer_head
*bh
= jh2bh(jh
);
374 jbd_lock_bh_state(bh
);
375 jbd_slab_free(jh
->b_committed_data
, bh
->b_size
);
376 jh
->b_committed_data
= NULL
;
377 jbd_unlock_bh_state(bh
);
379 journal_refile_buffer(journal
, jh
);
383 * Now try to drop any written-back buffers from the journal's
384 * checkpoint lists. We do this *before* commit because it potentially
387 spin_lock(&journal
->j_list_lock
);
388 __journal_clean_checkpoint_list(journal
);
389 spin_unlock(&journal
->j_list_lock
);
391 jbd_debug (3, "JBD: commit phase 1\n");
394 * Switch to a new revoke table.
396 journal_switch_revoke_table(journal
);
398 commit_transaction
->t_state
= T_FLUSH
;
399 journal
->j_committing_transaction
= commit_transaction
;
400 journal
->j_running_transaction
= NULL
;
401 commit_transaction
->t_log_start
= journal
->j_head
;
402 wake_up(&journal
->j_wait_transaction_locked
);
403 spin_unlock(&journal
->j_state_lock
);
405 jbd_debug (3, "JBD: commit phase 2\n");
408 * First, drop modified flag: all accesses to the buffers
409 * will be tracked for a new trasaction only -bzzz
411 spin_lock(&journal
->j_list_lock
);
412 if (commit_transaction
->t_buffers
) {
413 new_jh
= jh
= commit_transaction
->t_buffers
->b_tnext
;
415 J_ASSERT_JH(new_jh
, new_jh
->b_modified
== 1 ||
416 new_jh
->b_modified
== 0);
417 new_jh
->b_modified
= 0;
418 new_jh
= new_jh
->b_tnext
;
419 } while (new_jh
!= jh
);
421 spin_unlock(&journal
->j_list_lock
);
424 * Now start flushing things to disk, in the order they appear
425 * on the transaction lists. Data blocks go first.
428 journal_submit_data_buffers(journal
, commit_transaction
);
431 * Wait for all previously submitted IO to complete.
433 spin_lock(&journal
->j_list_lock
);
434 while (commit_transaction
->t_locked_list
) {
435 struct buffer_head
*bh
;
437 jh
= commit_transaction
->t_locked_list
->b_tprev
;
440 if (buffer_locked(bh
)) {
441 spin_unlock(&journal
->j_list_lock
);
443 if (unlikely(!buffer_uptodate(bh
)))
445 spin_lock(&journal
->j_list_lock
);
447 if (!inverted_lock(journal
, bh
)) {
449 spin_lock(&journal
->j_list_lock
);
452 if (buffer_jbd(bh
) && jh
->b_jlist
== BJ_Locked
) {
453 __journal_unfile_buffer(jh
);
454 jbd_unlock_bh_state(bh
);
455 journal_remove_journal_head(bh
);
458 jbd_unlock_bh_state(bh
);
461 cond_resched_lock(&journal
->j_list_lock
);
463 spin_unlock(&journal
->j_list_lock
);
466 __journal_abort_hard(journal
);
468 journal_write_revoke_records(journal
, commit_transaction
);
470 jbd_debug(3, "JBD: commit phase 2\n");
473 * If we found any dirty or locked buffers, then we should have
474 * looped back up to the write_out_data label. If there weren't
475 * any then journal_clean_data_list should have wiped the list
476 * clean by now, so check that it is in fact empty.
478 J_ASSERT (commit_transaction
->t_sync_datalist
== NULL
);
480 jbd_debug (3, "JBD: commit phase 3\n");
483 * Way to go: we have now written out all of the data for a
484 * transaction! Now comes the tricky part: we need to write out
485 * metadata. Loop over the transaction's entire buffer list:
487 commit_transaction
->t_state
= T_COMMIT
;
491 while (commit_transaction
->t_buffers
) {
493 /* Find the next buffer to be journaled... */
495 jh
= commit_transaction
->t_buffers
;
497 /* If we're in abort mode, we just un-journal the buffer and
498 release it for background writing. */
500 if (is_journal_aborted(journal
)) {
501 JBUFFER_TRACE(jh
, "journal is aborting: refile");
502 journal_refile_buffer(journal
, jh
);
503 /* If that was the last one, we need to clean up
504 * any descriptor buffers which may have been
505 * already allocated, even if we are now
507 if (!commit_transaction
->t_buffers
)
508 goto start_journal_io
;
512 /* Make sure we have a descriptor block in which to
513 record the metadata buffer. */
516 struct buffer_head
*bh
;
518 J_ASSERT (bufs
== 0);
520 jbd_debug(4, "JBD: get descriptor\n");
522 descriptor
= journal_get_descriptor_buffer(journal
);
524 __journal_abort_hard(journal
);
528 bh
= jh2bh(descriptor
);
529 jbd_debug(4, "JBD: got buffer %llu (%p)\n",
530 (unsigned long long)bh
->b_blocknr
, bh
->b_data
);
531 header
= (journal_header_t
*)&bh
->b_data
[0];
532 header
->h_magic
= cpu_to_be32(JFS_MAGIC_NUMBER
);
533 header
->h_blocktype
= cpu_to_be32(JFS_DESCRIPTOR_BLOCK
);
534 header
->h_sequence
= cpu_to_be32(commit_transaction
->t_tid
);
536 tagp
= &bh
->b_data
[sizeof(journal_header_t
)];
537 space_left
= bh
->b_size
- sizeof(journal_header_t
);
539 set_buffer_jwrite(bh
);
540 set_buffer_dirty(bh
);
543 /* Record it so that we can wait for IO
545 BUFFER_TRACE(bh
, "ph3: file as descriptor");
546 journal_file_buffer(descriptor
, commit_transaction
,
550 /* Where is the buffer to be written? */
552 err
= journal_next_log_block(journal
, &blocknr
);
553 /* If the block mapping failed, just abandon the buffer
554 and repeat this loop: we'll fall into the
555 refile-on-abort condition above. */
557 __journal_abort_hard(journal
);
562 * start_this_handle() uses t_outstanding_credits to determine
563 * the free space in the log, but this counter is changed
564 * by journal_next_log_block() also.
566 commit_transaction
->t_outstanding_credits
--;
568 /* Bump b_count to prevent truncate from stumbling over
569 the shadowed buffer! @@@ This can go if we ever get
570 rid of the BJ_IO/BJ_Shadow pairing of buffers. */
571 atomic_inc(&jh2bh(jh
)->b_count
);
573 /* Make a temporary IO buffer with which to write it out
574 (this will requeue both the metadata buffer and the
575 temporary IO buffer). new_bh goes on BJ_IO*/
577 set_bit(BH_JWrite
, &jh2bh(jh
)->b_state
);
579 * akpm: journal_write_metadata_buffer() sets
580 * new_bh->b_transaction to commit_transaction.
581 * We need to clean this up before we release new_bh
582 * (which is of type BJ_IO)
584 JBUFFER_TRACE(jh
, "ph3: write metadata");
585 flags
= journal_write_metadata_buffer(commit_transaction
,
586 jh
, &new_jh
, blocknr
);
587 set_bit(BH_JWrite
, &jh2bh(new_jh
)->b_state
);
588 wbuf
[bufs
++] = jh2bh(new_jh
);
590 /* Record the new block's tag in the current descriptor
595 tag_flag
|= JFS_FLAG_ESCAPE
;
597 tag_flag
|= JFS_FLAG_SAME_UUID
;
599 tag
= (journal_block_tag_t
*) tagp
;
600 tag
->t_blocknr
= cpu_to_be32(jh2bh(jh
)->b_blocknr
);
601 tag
->t_flags
= cpu_to_be32(tag_flag
);
602 tagp
+= sizeof(journal_block_tag_t
);
603 space_left
-= sizeof(journal_block_tag_t
);
606 memcpy (tagp
, journal
->j_uuid
, 16);
612 /* If there's no more to do, or if the descriptor is full,
615 if (bufs
== journal
->j_wbufsize
||
616 commit_transaction
->t_buffers
== NULL
||
617 space_left
< sizeof(journal_block_tag_t
) + 16) {
619 jbd_debug(4, "JBD: Submit %d IOs\n", bufs
);
621 /* Write an end-of-descriptor marker before
622 submitting the IOs. "tag" still points to
623 the last tag we set up. */
625 tag
->t_flags
|= cpu_to_be32(JFS_FLAG_LAST_TAG
);
628 for (i
= 0; i
< bufs
; i
++) {
629 struct buffer_head
*bh
= wbuf
[i
];
631 clear_buffer_dirty(bh
);
632 set_buffer_uptodate(bh
);
633 bh
->b_end_io
= journal_end_buffer_io_sync
;
634 submit_bh(WRITE
, bh
);
638 /* Force a new descriptor to be generated next
639 time round the loop. */
645 /* Lo and behold: we have just managed to send a transaction to
646 the log. Before we can commit it, wait for the IO so far to
647 complete. Control buffers being written are on the
648 transaction's t_log_list queue, and metadata buffers are on
649 the t_iobuf_list queue.
651 Wait for the buffers in reverse order. That way we are
652 less likely to be woken up until all IOs have completed, and
653 so we incur less scheduling load.
656 jbd_debug(3, "JBD: commit phase 4\n");
659 * akpm: these are BJ_IO, and j_list_lock is not needed.
660 * See __journal_try_to_free_buffer.
663 while (commit_transaction
->t_iobuf_list
!= NULL
) {
664 struct buffer_head
*bh
;
666 jh
= commit_transaction
->t_iobuf_list
->b_tprev
;
668 if (buffer_locked(bh
)) {
675 if (unlikely(!buffer_uptodate(bh
)))
678 clear_buffer_jwrite(bh
);
680 JBUFFER_TRACE(jh
, "ph4: unfile after journal write");
681 journal_unfile_buffer(journal
, jh
);
684 * ->t_iobuf_list should contain only dummy buffer_heads
685 * which were created by journal_write_metadata_buffer().
687 BUFFER_TRACE(bh
, "dumping temporary bh");
688 journal_put_journal_head(jh
);
690 J_ASSERT_BH(bh
, atomic_read(&bh
->b_count
) == 0);
691 free_buffer_head(bh
);
693 /* We also have to unlock and free the corresponding
695 jh
= commit_transaction
->t_shadow_list
->b_tprev
;
697 clear_bit(BH_JWrite
, &bh
->b_state
);
698 J_ASSERT_BH(bh
, buffer_jbddirty(bh
));
700 /* The metadata is now released for reuse, but we need
701 to remember it against this transaction so that when
702 we finally commit, we can do any checkpointing
704 JBUFFER_TRACE(jh
, "file as BJ_Forget");
705 journal_file_buffer(jh
, commit_transaction
, BJ_Forget
);
706 /* Wake up any transactions which were waiting for this
708 wake_up_bit(&bh
->b_state
, BH_Unshadow
);
709 JBUFFER_TRACE(jh
, "brelse shadowed buffer");
713 J_ASSERT (commit_transaction
->t_shadow_list
== NULL
);
715 jbd_debug(3, "JBD: commit phase 5\n");
717 /* Here we wait for the revoke record and descriptor record buffers */
719 while (commit_transaction
->t_log_list
!= NULL
) {
720 struct buffer_head
*bh
;
722 jh
= commit_transaction
->t_log_list
->b_tprev
;
724 if (buffer_locked(bh
)) {
726 goto wait_for_ctlbuf
;
729 goto wait_for_ctlbuf
;
731 if (unlikely(!buffer_uptodate(bh
)))
734 BUFFER_TRACE(bh
, "ph5: control buffer writeout done: unfile");
735 clear_buffer_jwrite(bh
);
736 journal_unfile_buffer(journal
, jh
);
737 journal_put_journal_head(jh
);
738 __brelse(bh
); /* One for getblk */
739 /* AKPM: bforget here */
742 jbd_debug(3, "JBD: commit phase 6\n");
744 if (journal_write_commit_record(journal
, commit_transaction
))
748 __journal_abort_hard(journal
);
750 /* End of a transaction! Finally, we can do checkpoint
751 processing: any buffers committed as a result of this
752 transaction can be removed from any checkpoint list it was on
755 jbd_debug(3, "JBD: commit phase 7\n");
757 J_ASSERT(commit_transaction
->t_sync_datalist
== NULL
);
758 J_ASSERT(commit_transaction
->t_buffers
== NULL
);
759 J_ASSERT(commit_transaction
->t_checkpoint_list
== NULL
);
760 J_ASSERT(commit_transaction
->t_iobuf_list
== NULL
);
761 J_ASSERT(commit_transaction
->t_shadow_list
== NULL
);
762 J_ASSERT(commit_transaction
->t_log_list
== NULL
);
766 * As there are other places (journal_unmap_buffer()) adding buffers
767 * to this list we have to be careful and hold the j_list_lock.
769 spin_lock(&journal
->j_list_lock
);
770 while (commit_transaction
->t_forget
) {
771 transaction_t
*cp_transaction
;
772 struct buffer_head
*bh
;
774 jh
= commit_transaction
->t_forget
;
775 spin_unlock(&journal
->j_list_lock
);
777 jbd_lock_bh_state(bh
);
778 J_ASSERT_JH(jh
, jh
->b_transaction
== commit_transaction
||
779 jh
->b_transaction
== journal
->j_running_transaction
);
782 * If there is undo-protected committed data against
783 * this buffer, then we can remove it now. If it is a
784 * buffer needing such protection, the old frozen_data
785 * field now points to a committed version of the
786 * buffer, so rotate that field to the new committed
789 * Otherwise, we can just throw away the frozen data now.
791 if (jh
->b_committed_data
) {
792 jbd_slab_free(jh
->b_committed_data
, bh
->b_size
);
793 jh
->b_committed_data
= NULL
;
794 if (jh
->b_frozen_data
) {
795 jh
->b_committed_data
= jh
->b_frozen_data
;
796 jh
->b_frozen_data
= NULL
;
798 } else if (jh
->b_frozen_data
) {
799 jbd_slab_free(jh
->b_frozen_data
, bh
->b_size
);
800 jh
->b_frozen_data
= NULL
;
803 spin_lock(&journal
->j_list_lock
);
804 cp_transaction
= jh
->b_cp_transaction
;
805 if (cp_transaction
) {
806 JBUFFER_TRACE(jh
, "remove from old cp transaction");
807 __journal_remove_checkpoint(jh
);
810 /* Only re-checkpoint the buffer_head if it is marked
811 * dirty. If the buffer was added to the BJ_Forget list
812 * by journal_forget, it may no longer be dirty and
813 * there's no point in keeping a checkpoint record for
816 /* A buffer which has been freed while still being
817 * journaled by a previous transaction may end up still
818 * being dirty here, but we want to avoid writing back
819 * that buffer in the future now that the last use has
820 * been committed. That's not only a performance gain,
821 * it also stops aliasing problems if the buffer is left
822 * behind for writeback and gets reallocated for another
823 * use in a different page. */
824 if (buffer_freed(bh
)) {
825 clear_buffer_freed(bh
);
826 clear_buffer_jbddirty(bh
);
829 if (buffer_jbddirty(bh
)) {
830 JBUFFER_TRACE(jh
, "add to new checkpointing trans");
831 __journal_insert_checkpoint(jh
, commit_transaction
);
832 JBUFFER_TRACE(jh
, "refile for checkpoint writeback");
833 __journal_refile_buffer(jh
);
834 jbd_unlock_bh_state(bh
);
836 J_ASSERT_BH(bh
, !buffer_dirty(bh
));
837 /* The buffer on BJ_Forget list and not jbddirty means
838 * it has been freed by this transaction and hence it
839 * could not have been reallocated until this
840 * transaction has committed. *BUT* it could be
841 * reallocated once we have written all the data to
842 * disk and before we process the buffer on BJ_Forget
844 JBUFFER_TRACE(jh
, "refile or unfile freed buffer");
845 __journal_refile_buffer(jh
);
846 if (!jh
->b_transaction
) {
847 jbd_unlock_bh_state(bh
);
849 journal_remove_journal_head(bh
);
850 release_buffer_page(bh
);
852 jbd_unlock_bh_state(bh
);
854 cond_resched_lock(&journal
->j_list_lock
);
856 spin_unlock(&journal
->j_list_lock
);
858 * This is a bit sleazy. We borrow j_list_lock to protect
859 * journal->j_committing_transaction in __journal_remove_checkpoint.
860 * Really, __journal_remove_checkpoint should be using j_state_lock but
861 * it's a bit hassle to hold that across __journal_remove_checkpoint
863 spin_lock(&journal
->j_state_lock
);
864 spin_lock(&journal
->j_list_lock
);
866 * Now recheck if some buffers did not get attached to the transaction
867 * while the lock was dropped...
869 if (commit_transaction
->t_forget
) {
870 spin_unlock(&journal
->j_list_lock
);
871 spin_unlock(&journal
->j_state_lock
);
875 /* Done with this transaction! */
877 jbd_debug(3, "JBD: commit phase 8\n");
879 J_ASSERT(commit_transaction
->t_state
== T_COMMIT
);
881 commit_transaction
->t_state
= T_FINISHED
;
882 J_ASSERT(commit_transaction
== journal
->j_committing_transaction
);
883 journal
->j_commit_sequence
= commit_transaction
->t_tid
;
884 journal
->j_committing_transaction
= NULL
;
885 spin_unlock(&journal
->j_state_lock
);
887 if (commit_transaction
->t_checkpoint_list
== NULL
) {
888 __journal_drop_transaction(journal
, commit_transaction
);
890 if (journal
->j_checkpoint_transactions
== NULL
) {
891 journal
->j_checkpoint_transactions
= commit_transaction
;
892 commit_transaction
->t_cpnext
= commit_transaction
;
893 commit_transaction
->t_cpprev
= commit_transaction
;
895 commit_transaction
->t_cpnext
=
896 journal
->j_checkpoint_transactions
;
897 commit_transaction
->t_cpprev
=
898 commit_transaction
->t_cpnext
->t_cpprev
;
899 commit_transaction
->t_cpnext
->t_cpprev
=
901 commit_transaction
->t_cpprev
->t_cpnext
=
905 spin_unlock(&journal
->j_list_lock
);
907 jbd_debug(1, "JBD: commit %d complete, head %d\n",
908 journal
->j_commit_sequence
, journal
->j_tail_sequence
);
910 wake_up(&journal
->j_wait_done_commit
);