[PATCH] i386: introduce the mechanism of disabling cpu hotplug control
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / fs / jbd / commit.c
blob10be51290a27e887c7cc503dbbb21ef4cc82301e
1 /*
2 * linux/fs/jbd/commit.c
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
6 * Copyright 1998 Red Hat corp --- All Rights Reserved
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
12 * Journal commit routines for the generic filesystem journaling code;
13 * part of the ext2fs journaling system.
16 #include <linux/time.h>
17 #include <linux/fs.h>
18 #include <linux/jbd.h>
19 #include <linux/errno.h>
20 #include <linux/slab.h>
21 #include <linux/mm.h>
22 #include <linux/pagemap.h>
23 #include <linux/smp_lock.h>
26 * Default IO end handler for temporary BJ_IO buffer_heads.
28 static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
30 BUFFER_TRACE(bh, "");
31 if (uptodate)
32 set_buffer_uptodate(bh);
33 else
34 clear_buffer_uptodate(bh);
35 unlock_buffer(bh);
39 * When an ext3-ordered file is truncated, it is possible that many pages are
40 * not sucessfully freed, because they are attached to a committing transaction.
41 * After the transaction commits, these pages are left on the LRU, with no
42 * ->mapping, and with attached buffers. These pages are trivially reclaimable
43 * by the VM, but their apparent absence upsets the VM accounting, and it makes
44 * the numbers in /proc/meminfo look odd.
46 * So here, we have a buffer which has just come off the forget list. Look to
47 * see if we can strip all buffers from the backing page.
49 * Called under lock_journal(), and possibly under journal_datalist_lock. The
50 * caller provided us with a ref against the buffer, and we drop that here.
52 static void release_buffer_page(struct buffer_head *bh)
54 struct page *page;
56 if (buffer_dirty(bh))
57 goto nope;
58 if (atomic_read(&bh->b_count) != 1)
59 goto nope;
60 page = bh->b_page;
61 if (!page)
62 goto nope;
63 if (page->mapping)
64 goto nope;
66 /* OK, it's a truncated page */
67 if (TestSetPageLocked(page))
68 goto nope;
70 page_cache_get(page);
71 __brelse(bh);
72 try_to_free_buffers(page);
73 unlock_page(page);
74 page_cache_release(page);
75 return;
77 nope:
78 __brelse(bh);
82 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
83 * held. For ranking reasons we must trylock. If we lose, schedule away and
84 * return 0. j_list_lock is dropped in this case.
86 static int inverted_lock(journal_t *journal, struct buffer_head *bh)
88 if (!jbd_trylock_bh_state(bh)) {
89 spin_unlock(&journal->j_list_lock);
90 schedule();
91 return 0;
93 return 1;
96 /* Done it all: now write the commit record. We should have
97 * cleaned up our previous buffers by now, so if we are in abort
98 * mode we can now just skip the rest of the journal write
99 * entirely.
101 * Returns 1 if the journal needs to be aborted or 0 on success
103 static int journal_write_commit_record(journal_t *journal,
104 transaction_t *commit_transaction)
106 struct journal_head *descriptor;
107 struct buffer_head *bh;
108 int i, ret;
109 int barrier_done = 0;
111 if (is_journal_aborted(journal))
112 return 0;
114 descriptor = journal_get_descriptor_buffer(journal);
115 if (!descriptor)
116 return 1;
118 bh = jh2bh(descriptor);
120 /* AKPM: buglet - add `i' to tmp! */
121 for (i = 0; i < bh->b_size; i += 512) {
122 journal_header_t *tmp = (journal_header_t*)bh->b_data;
123 tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
124 tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
125 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
128 JBUFFER_TRACE(descriptor, "write commit block");
129 set_buffer_dirty(bh);
130 if (journal->j_flags & JFS_BARRIER) {
131 set_buffer_ordered(bh);
132 barrier_done = 1;
134 ret = sync_dirty_buffer(bh);
135 /* is it possible for another commit to fail at roughly
136 * the same time as this one? If so, we don't want to
137 * trust the barrier flag in the super, but instead want
138 * to remember if we sent a barrier request
140 if (ret == -EOPNOTSUPP && barrier_done) {
141 char b[BDEVNAME_SIZE];
143 printk(KERN_WARNING
144 "JBD: barrier-based sync failed on %s - "
145 "disabling barriers\n",
146 bdevname(journal->j_dev, b));
147 spin_lock(&journal->j_state_lock);
148 journal->j_flags &= ~JFS_BARRIER;
149 spin_unlock(&journal->j_state_lock);
151 /* And try again, without the barrier */
152 clear_buffer_ordered(bh);
153 set_buffer_uptodate(bh);
154 set_buffer_dirty(bh);
155 ret = sync_dirty_buffer(bh);
157 put_bh(bh); /* One for getblk() */
158 journal_put_journal_head(descriptor);
160 return (ret == -EIO);
163 static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
165 int i;
167 for (i = 0; i < bufs; i++) {
168 wbuf[i]->b_end_io = end_buffer_write_sync;
169 /* We use-up our safety reference in submit_bh() */
170 submit_bh(WRITE, wbuf[i]);
175 * Submit all the data buffers to disk
177 static void journal_submit_data_buffers(journal_t *journal,
178 transaction_t *commit_transaction)
180 struct journal_head *jh;
181 struct buffer_head *bh;
182 int locked;
183 int bufs = 0;
184 struct buffer_head **wbuf = journal->j_wbuf;
187 * Whenever we unlock the journal and sleep, things can get added
188 * onto ->t_sync_datalist, so we have to keep looping back to
189 * write_out_data until we *know* that the list is empty.
191 * Cleanup any flushed data buffers from the data list. Even in
192 * abort mode, we want to flush this out as soon as possible.
194 write_out_data:
195 cond_resched();
196 spin_lock(&journal->j_list_lock);
198 while (commit_transaction->t_sync_datalist) {
199 jh = commit_transaction->t_sync_datalist;
200 bh = jh2bh(jh);
201 locked = 0;
203 /* Get reference just to make sure buffer does not disappear
204 * when we are forced to drop various locks */
205 get_bh(bh);
206 /* If the buffer is dirty, we need to submit IO and hence
207 * we need the buffer lock. We try to lock the buffer without
208 * blocking. If we fail, we need to drop j_list_lock and do
209 * blocking lock_buffer().
211 if (buffer_dirty(bh)) {
212 if (test_set_buffer_locked(bh)) {
213 BUFFER_TRACE(bh, "needs blocking lock");
214 spin_unlock(&journal->j_list_lock);
215 /* Write out all data to prevent deadlocks */
216 journal_do_submit_data(wbuf, bufs);
217 bufs = 0;
218 lock_buffer(bh);
219 spin_lock(&journal->j_list_lock);
221 locked = 1;
223 /* We have to get bh_state lock. Again out of order, sigh. */
224 if (!inverted_lock(journal, bh)) {
225 jbd_lock_bh_state(bh);
226 spin_lock(&journal->j_list_lock);
228 /* Someone already cleaned up the buffer? */
229 if (!buffer_jbd(bh)
230 || jh->b_transaction != commit_transaction
231 || jh->b_jlist != BJ_SyncData) {
232 jbd_unlock_bh_state(bh);
233 if (locked)
234 unlock_buffer(bh);
235 BUFFER_TRACE(bh, "already cleaned up");
236 put_bh(bh);
237 continue;
239 if (locked && test_clear_buffer_dirty(bh)) {
240 BUFFER_TRACE(bh, "needs writeout, adding to array");
241 wbuf[bufs++] = bh;
242 __journal_file_buffer(jh, commit_transaction,
243 BJ_Locked);
244 jbd_unlock_bh_state(bh);
245 if (bufs == journal->j_wbufsize) {
246 spin_unlock(&journal->j_list_lock);
247 journal_do_submit_data(wbuf, bufs);
248 bufs = 0;
249 goto write_out_data;
252 else {
253 BUFFER_TRACE(bh, "writeout complete: unfile");
254 __journal_unfile_buffer(jh);
255 jbd_unlock_bh_state(bh);
256 if (locked)
257 unlock_buffer(bh);
258 journal_remove_journal_head(bh);
259 /* Once for our safety reference, once for
260 * journal_remove_journal_head() */
261 put_bh(bh);
262 put_bh(bh);
265 if (lock_need_resched(&journal->j_list_lock)) {
266 spin_unlock(&journal->j_list_lock);
267 goto write_out_data;
270 spin_unlock(&journal->j_list_lock);
271 journal_do_submit_data(wbuf, bufs);
275 * journal_commit_transaction
277 * The primary function for committing a transaction to the log. This
278 * function is called by the journal thread to begin a complete commit.
280 void journal_commit_transaction(journal_t *journal)
282 transaction_t *commit_transaction;
283 struct journal_head *jh, *new_jh, *descriptor;
284 struct buffer_head **wbuf = journal->j_wbuf;
285 int bufs;
286 int flags;
287 int err;
288 unsigned long blocknr;
289 char *tagp = NULL;
290 journal_header_t *header;
291 journal_block_tag_t *tag = NULL;
292 int space_left = 0;
293 int first_tag = 0;
294 int tag_flag;
295 int i;
298 * First job: lock down the current transaction and wait for
299 * all outstanding updates to complete.
302 #ifdef COMMIT_STATS
303 spin_lock(&journal->j_list_lock);
304 summarise_journal_usage(journal);
305 spin_unlock(&journal->j_list_lock);
306 #endif
308 /* Do we need to erase the effects of a prior journal_flush? */
309 if (journal->j_flags & JFS_FLUSHED) {
310 jbd_debug(3, "super block updated\n");
311 journal_update_superblock(journal, 1);
312 } else {
313 jbd_debug(3, "superblock not updated\n");
316 J_ASSERT(journal->j_running_transaction != NULL);
317 J_ASSERT(journal->j_committing_transaction == NULL);
319 commit_transaction = journal->j_running_transaction;
320 J_ASSERT(commit_transaction->t_state == T_RUNNING);
322 jbd_debug(1, "JBD: starting commit of transaction %d\n",
323 commit_transaction->t_tid);
325 spin_lock(&journal->j_state_lock);
326 commit_transaction->t_state = T_LOCKED;
328 spin_lock(&commit_transaction->t_handle_lock);
329 while (commit_transaction->t_updates) {
330 DEFINE_WAIT(wait);
332 prepare_to_wait(&journal->j_wait_updates, &wait,
333 TASK_UNINTERRUPTIBLE);
334 if (commit_transaction->t_updates) {
335 spin_unlock(&commit_transaction->t_handle_lock);
336 spin_unlock(&journal->j_state_lock);
337 schedule();
338 spin_lock(&journal->j_state_lock);
339 spin_lock(&commit_transaction->t_handle_lock);
341 finish_wait(&journal->j_wait_updates, &wait);
343 spin_unlock(&commit_transaction->t_handle_lock);
345 J_ASSERT (commit_transaction->t_outstanding_credits <=
346 journal->j_max_transaction_buffers);
349 * First thing we are allowed to do is to discard any remaining
350 * BJ_Reserved buffers. Note, it is _not_ permissible to assume
351 * that there are no such buffers: if a large filesystem
352 * operation like a truncate needs to split itself over multiple
353 * transactions, then it may try to do a journal_restart() while
354 * there are still BJ_Reserved buffers outstanding. These must
355 * be released cleanly from the current transaction.
357 * In this case, the filesystem must still reserve write access
358 * again before modifying the buffer in the new transaction, but
359 * we do not require it to remember exactly which old buffers it
360 * has reserved. This is consistent with the existing behaviour
361 * that multiple journal_get_write_access() calls to the same
362 * buffer are perfectly permissable.
364 while (commit_transaction->t_reserved_list) {
365 jh = commit_transaction->t_reserved_list;
366 JBUFFER_TRACE(jh, "reserved, unused: refile");
368 * A journal_get_undo_access()+journal_release_buffer() may
369 * leave undo-committed data.
371 if (jh->b_committed_data) {
372 struct buffer_head *bh = jh2bh(jh);
374 jbd_lock_bh_state(bh);
375 jbd_slab_free(jh->b_committed_data, bh->b_size);
376 jh->b_committed_data = NULL;
377 jbd_unlock_bh_state(bh);
379 journal_refile_buffer(journal, jh);
383 * Now try to drop any written-back buffers from the journal's
384 * checkpoint lists. We do this *before* commit because it potentially
385 * frees some memory
387 spin_lock(&journal->j_list_lock);
388 __journal_clean_checkpoint_list(journal);
389 spin_unlock(&journal->j_list_lock);
391 jbd_debug (3, "JBD: commit phase 1\n");
394 * Switch to a new revoke table.
396 journal_switch_revoke_table(journal);
398 commit_transaction->t_state = T_FLUSH;
399 journal->j_committing_transaction = commit_transaction;
400 journal->j_running_transaction = NULL;
401 commit_transaction->t_log_start = journal->j_head;
402 wake_up(&journal->j_wait_transaction_locked);
403 spin_unlock(&journal->j_state_lock);
405 jbd_debug (3, "JBD: commit phase 2\n");
408 * First, drop modified flag: all accesses to the buffers
409 * will be tracked for a new trasaction only -bzzz
411 spin_lock(&journal->j_list_lock);
412 if (commit_transaction->t_buffers) {
413 new_jh = jh = commit_transaction->t_buffers->b_tnext;
414 do {
415 J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
416 new_jh->b_modified == 0);
417 new_jh->b_modified = 0;
418 new_jh = new_jh->b_tnext;
419 } while (new_jh != jh);
421 spin_unlock(&journal->j_list_lock);
424 * Now start flushing things to disk, in the order they appear
425 * on the transaction lists. Data blocks go first.
427 err = 0;
428 journal_submit_data_buffers(journal, commit_transaction);
431 * Wait for all previously submitted IO to complete.
433 spin_lock(&journal->j_list_lock);
434 while (commit_transaction->t_locked_list) {
435 struct buffer_head *bh;
437 jh = commit_transaction->t_locked_list->b_tprev;
438 bh = jh2bh(jh);
439 get_bh(bh);
440 if (buffer_locked(bh)) {
441 spin_unlock(&journal->j_list_lock);
442 wait_on_buffer(bh);
443 if (unlikely(!buffer_uptodate(bh)))
444 err = -EIO;
445 spin_lock(&journal->j_list_lock);
447 if (!inverted_lock(journal, bh)) {
448 put_bh(bh);
449 spin_lock(&journal->j_list_lock);
450 continue;
452 if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
453 __journal_unfile_buffer(jh);
454 jbd_unlock_bh_state(bh);
455 journal_remove_journal_head(bh);
456 put_bh(bh);
457 } else {
458 jbd_unlock_bh_state(bh);
460 put_bh(bh);
461 cond_resched_lock(&journal->j_list_lock);
463 spin_unlock(&journal->j_list_lock);
465 if (err)
466 __journal_abort_hard(journal);
468 journal_write_revoke_records(journal, commit_transaction);
470 jbd_debug(3, "JBD: commit phase 2\n");
473 * If we found any dirty or locked buffers, then we should have
474 * looped back up to the write_out_data label. If there weren't
475 * any then journal_clean_data_list should have wiped the list
476 * clean by now, so check that it is in fact empty.
478 J_ASSERT (commit_transaction->t_sync_datalist == NULL);
480 jbd_debug (3, "JBD: commit phase 3\n");
483 * Way to go: we have now written out all of the data for a
484 * transaction! Now comes the tricky part: we need to write out
485 * metadata. Loop over the transaction's entire buffer list:
487 commit_transaction->t_state = T_COMMIT;
489 descriptor = NULL;
490 bufs = 0;
491 while (commit_transaction->t_buffers) {
493 /* Find the next buffer to be journaled... */
495 jh = commit_transaction->t_buffers;
497 /* If we're in abort mode, we just un-journal the buffer and
498 release it for background writing. */
500 if (is_journal_aborted(journal)) {
501 JBUFFER_TRACE(jh, "journal is aborting: refile");
502 journal_refile_buffer(journal, jh);
503 /* If that was the last one, we need to clean up
504 * any descriptor buffers which may have been
505 * already allocated, even if we are now
506 * aborting. */
507 if (!commit_transaction->t_buffers)
508 goto start_journal_io;
509 continue;
512 /* Make sure we have a descriptor block in which to
513 record the metadata buffer. */
515 if (!descriptor) {
516 struct buffer_head *bh;
518 J_ASSERT (bufs == 0);
520 jbd_debug(4, "JBD: get descriptor\n");
522 descriptor = journal_get_descriptor_buffer(journal);
523 if (!descriptor) {
524 __journal_abort_hard(journal);
525 continue;
528 bh = jh2bh(descriptor);
529 jbd_debug(4, "JBD: got buffer %llu (%p)\n",
530 (unsigned long long)bh->b_blocknr, bh->b_data);
531 header = (journal_header_t *)&bh->b_data[0];
532 header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
533 header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);
534 header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
536 tagp = &bh->b_data[sizeof(journal_header_t)];
537 space_left = bh->b_size - sizeof(journal_header_t);
538 first_tag = 1;
539 set_buffer_jwrite(bh);
540 set_buffer_dirty(bh);
541 wbuf[bufs++] = bh;
543 /* Record it so that we can wait for IO
544 completion later */
545 BUFFER_TRACE(bh, "ph3: file as descriptor");
546 journal_file_buffer(descriptor, commit_transaction,
547 BJ_LogCtl);
550 /* Where is the buffer to be written? */
552 err = journal_next_log_block(journal, &blocknr);
553 /* If the block mapping failed, just abandon the buffer
554 and repeat this loop: we'll fall into the
555 refile-on-abort condition above. */
556 if (err) {
557 __journal_abort_hard(journal);
558 continue;
562 * start_this_handle() uses t_outstanding_credits to determine
563 * the free space in the log, but this counter is changed
564 * by journal_next_log_block() also.
566 commit_transaction->t_outstanding_credits--;
568 /* Bump b_count to prevent truncate from stumbling over
569 the shadowed buffer! @@@ This can go if we ever get
570 rid of the BJ_IO/BJ_Shadow pairing of buffers. */
571 atomic_inc(&jh2bh(jh)->b_count);
573 /* Make a temporary IO buffer with which to write it out
574 (this will requeue both the metadata buffer and the
575 temporary IO buffer). new_bh goes on BJ_IO*/
577 set_bit(BH_JWrite, &jh2bh(jh)->b_state);
579 * akpm: journal_write_metadata_buffer() sets
580 * new_bh->b_transaction to commit_transaction.
581 * We need to clean this up before we release new_bh
582 * (which is of type BJ_IO)
584 JBUFFER_TRACE(jh, "ph3: write metadata");
585 flags = journal_write_metadata_buffer(commit_transaction,
586 jh, &new_jh, blocknr);
587 set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
588 wbuf[bufs++] = jh2bh(new_jh);
590 /* Record the new block's tag in the current descriptor
591 buffer */
593 tag_flag = 0;
594 if (flags & 1)
595 tag_flag |= JFS_FLAG_ESCAPE;
596 if (!first_tag)
597 tag_flag |= JFS_FLAG_SAME_UUID;
599 tag = (journal_block_tag_t *) tagp;
600 tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
601 tag->t_flags = cpu_to_be32(tag_flag);
602 tagp += sizeof(journal_block_tag_t);
603 space_left -= sizeof(journal_block_tag_t);
605 if (first_tag) {
606 memcpy (tagp, journal->j_uuid, 16);
607 tagp += 16;
608 space_left -= 16;
609 first_tag = 0;
612 /* If there's no more to do, or if the descriptor is full,
613 let the IO rip! */
615 if (bufs == journal->j_wbufsize ||
616 commit_transaction->t_buffers == NULL ||
617 space_left < sizeof(journal_block_tag_t) + 16) {
619 jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
621 /* Write an end-of-descriptor marker before
622 submitting the IOs. "tag" still points to
623 the last tag we set up. */
625 tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG);
627 start_journal_io:
628 for (i = 0; i < bufs; i++) {
629 struct buffer_head *bh = wbuf[i];
630 lock_buffer(bh);
631 clear_buffer_dirty(bh);
632 set_buffer_uptodate(bh);
633 bh->b_end_io = journal_end_buffer_io_sync;
634 submit_bh(WRITE, bh);
636 cond_resched();
638 /* Force a new descriptor to be generated next
639 time round the loop. */
640 descriptor = NULL;
641 bufs = 0;
645 /* Lo and behold: we have just managed to send a transaction to
646 the log. Before we can commit it, wait for the IO so far to
647 complete. Control buffers being written are on the
648 transaction's t_log_list queue, and metadata buffers are on
649 the t_iobuf_list queue.
651 Wait for the buffers in reverse order. That way we are
652 less likely to be woken up until all IOs have completed, and
653 so we incur less scheduling load.
656 jbd_debug(3, "JBD: commit phase 4\n");
659 * akpm: these are BJ_IO, and j_list_lock is not needed.
660 * See __journal_try_to_free_buffer.
662 wait_for_iobuf:
663 while (commit_transaction->t_iobuf_list != NULL) {
664 struct buffer_head *bh;
666 jh = commit_transaction->t_iobuf_list->b_tprev;
667 bh = jh2bh(jh);
668 if (buffer_locked(bh)) {
669 wait_on_buffer(bh);
670 goto wait_for_iobuf;
672 if (cond_resched())
673 goto wait_for_iobuf;
675 if (unlikely(!buffer_uptodate(bh)))
676 err = -EIO;
678 clear_buffer_jwrite(bh);
680 JBUFFER_TRACE(jh, "ph4: unfile after journal write");
681 journal_unfile_buffer(journal, jh);
684 * ->t_iobuf_list should contain only dummy buffer_heads
685 * which were created by journal_write_metadata_buffer().
687 BUFFER_TRACE(bh, "dumping temporary bh");
688 journal_put_journal_head(jh);
689 __brelse(bh);
690 J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
691 free_buffer_head(bh);
693 /* We also have to unlock and free the corresponding
694 shadowed buffer */
695 jh = commit_transaction->t_shadow_list->b_tprev;
696 bh = jh2bh(jh);
697 clear_bit(BH_JWrite, &bh->b_state);
698 J_ASSERT_BH(bh, buffer_jbddirty(bh));
700 /* The metadata is now released for reuse, but we need
701 to remember it against this transaction so that when
702 we finally commit, we can do any checkpointing
703 required. */
704 JBUFFER_TRACE(jh, "file as BJ_Forget");
705 journal_file_buffer(jh, commit_transaction, BJ_Forget);
706 /* Wake up any transactions which were waiting for this
707 IO to complete */
708 wake_up_bit(&bh->b_state, BH_Unshadow);
709 JBUFFER_TRACE(jh, "brelse shadowed buffer");
710 __brelse(bh);
713 J_ASSERT (commit_transaction->t_shadow_list == NULL);
715 jbd_debug(3, "JBD: commit phase 5\n");
717 /* Here we wait for the revoke record and descriptor record buffers */
718 wait_for_ctlbuf:
719 while (commit_transaction->t_log_list != NULL) {
720 struct buffer_head *bh;
722 jh = commit_transaction->t_log_list->b_tprev;
723 bh = jh2bh(jh);
724 if (buffer_locked(bh)) {
725 wait_on_buffer(bh);
726 goto wait_for_ctlbuf;
728 if (cond_resched())
729 goto wait_for_ctlbuf;
731 if (unlikely(!buffer_uptodate(bh)))
732 err = -EIO;
734 BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
735 clear_buffer_jwrite(bh);
736 journal_unfile_buffer(journal, jh);
737 journal_put_journal_head(jh);
738 __brelse(bh); /* One for getblk */
739 /* AKPM: bforget here */
742 jbd_debug(3, "JBD: commit phase 6\n");
744 if (journal_write_commit_record(journal, commit_transaction))
745 err = -EIO;
747 if (err)
748 __journal_abort_hard(journal);
750 /* End of a transaction! Finally, we can do checkpoint
751 processing: any buffers committed as a result of this
752 transaction can be removed from any checkpoint list it was on
753 before. */
755 jbd_debug(3, "JBD: commit phase 7\n");
757 J_ASSERT(commit_transaction->t_sync_datalist == NULL);
758 J_ASSERT(commit_transaction->t_buffers == NULL);
759 J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
760 J_ASSERT(commit_transaction->t_iobuf_list == NULL);
761 J_ASSERT(commit_transaction->t_shadow_list == NULL);
762 J_ASSERT(commit_transaction->t_log_list == NULL);
764 restart_loop:
766 * As there are other places (journal_unmap_buffer()) adding buffers
767 * to this list we have to be careful and hold the j_list_lock.
769 spin_lock(&journal->j_list_lock);
770 while (commit_transaction->t_forget) {
771 transaction_t *cp_transaction;
772 struct buffer_head *bh;
774 jh = commit_transaction->t_forget;
775 spin_unlock(&journal->j_list_lock);
776 bh = jh2bh(jh);
777 jbd_lock_bh_state(bh);
778 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
779 jh->b_transaction == journal->j_running_transaction);
782 * If there is undo-protected committed data against
783 * this buffer, then we can remove it now. If it is a
784 * buffer needing such protection, the old frozen_data
785 * field now points to a committed version of the
786 * buffer, so rotate that field to the new committed
787 * data.
789 * Otherwise, we can just throw away the frozen data now.
791 if (jh->b_committed_data) {
792 jbd_slab_free(jh->b_committed_data, bh->b_size);
793 jh->b_committed_data = NULL;
794 if (jh->b_frozen_data) {
795 jh->b_committed_data = jh->b_frozen_data;
796 jh->b_frozen_data = NULL;
798 } else if (jh->b_frozen_data) {
799 jbd_slab_free(jh->b_frozen_data, bh->b_size);
800 jh->b_frozen_data = NULL;
803 spin_lock(&journal->j_list_lock);
804 cp_transaction = jh->b_cp_transaction;
805 if (cp_transaction) {
806 JBUFFER_TRACE(jh, "remove from old cp transaction");
807 __journal_remove_checkpoint(jh);
810 /* Only re-checkpoint the buffer_head if it is marked
811 * dirty. If the buffer was added to the BJ_Forget list
812 * by journal_forget, it may no longer be dirty and
813 * there's no point in keeping a checkpoint record for
814 * it. */
816 /* A buffer which has been freed while still being
817 * journaled by a previous transaction may end up still
818 * being dirty here, but we want to avoid writing back
819 * that buffer in the future now that the last use has
820 * been committed. That's not only a performance gain,
821 * it also stops aliasing problems if the buffer is left
822 * behind for writeback and gets reallocated for another
823 * use in a different page. */
824 if (buffer_freed(bh)) {
825 clear_buffer_freed(bh);
826 clear_buffer_jbddirty(bh);
829 if (buffer_jbddirty(bh)) {
830 JBUFFER_TRACE(jh, "add to new checkpointing trans");
831 __journal_insert_checkpoint(jh, commit_transaction);
832 JBUFFER_TRACE(jh, "refile for checkpoint writeback");
833 __journal_refile_buffer(jh);
834 jbd_unlock_bh_state(bh);
835 } else {
836 J_ASSERT_BH(bh, !buffer_dirty(bh));
837 /* The buffer on BJ_Forget list and not jbddirty means
838 * it has been freed by this transaction and hence it
839 * could not have been reallocated until this
840 * transaction has committed. *BUT* it could be
841 * reallocated once we have written all the data to
842 * disk and before we process the buffer on BJ_Forget
843 * list. */
844 JBUFFER_TRACE(jh, "refile or unfile freed buffer");
845 __journal_refile_buffer(jh);
846 if (!jh->b_transaction) {
847 jbd_unlock_bh_state(bh);
848 /* needs a brelse */
849 journal_remove_journal_head(bh);
850 release_buffer_page(bh);
851 } else
852 jbd_unlock_bh_state(bh);
854 cond_resched_lock(&journal->j_list_lock);
856 spin_unlock(&journal->j_list_lock);
858 * This is a bit sleazy. We borrow j_list_lock to protect
859 * journal->j_committing_transaction in __journal_remove_checkpoint.
860 * Really, __journal_remove_checkpoint should be using j_state_lock but
861 * it's a bit hassle to hold that across __journal_remove_checkpoint
863 spin_lock(&journal->j_state_lock);
864 spin_lock(&journal->j_list_lock);
866 * Now recheck if some buffers did not get attached to the transaction
867 * while the lock was dropped...
869 if (commit_transaction->t_forget) {
870 spin_unlock(&journal->j_list_lock);
871 spin_unlock(&journal->j_state_lock);
872 goto restart_loop;
875 /* Done with this transaction! */
877 jbd_debug(3, "JBD: commit phase 8\n");
879 J_ASSERT(commit_transaction->t_state == T_COMMIT);
881 commit_transaction->t_state = T_FINISHED;
882 J_ASSERT(commit_transaction == journal->j_committing_transaction);
883 journal->j_commit_sequence = commit_transaction->t_tid;
884 journal->j_committing_transaction = NULL;
885 spin_unlock(&journal->j_state_lock);
887 if (commit_transaction->t_checkpoint_list == NULL) {
888 __journal_drop_transaction(journal, commit_transaction);
889 } else {
890 if (journal->j_checkpoint_transactions == NULL) {
891 journal->j_checkpoint_transactions = commit_transaction;
892 commit_transaction->t_cpnext = commit_transaction;
893 commit_transaction->t_cpprev = commit_transaction;
894 } else {
895 commit_transaction->t_cpnext =
896 journal->j_checkpoint_transactions;
897 commit_transaction->t_cpprev =
898 commit_transaction->t_cpnext->t_cpprev;
899 commit_transaction->t_cpnext->t_cpprev =
900 commit_transaction;
901 commit_transaction->t_cpprev->t_cpnext =
902 commit_transaction;
905 spin_unlock(&journal->j_list_lock);
907 jbd_debug(1, "JBD: commit %d complete, head %d\n",
908 journal->j_commit_sequence, journal->j_tail_sequence);
910 wake_up(&journal->j_wait_done_commit);