2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/gfs2_ondisk.h>
16 #include <linux/crc32.h>
17 #include <linux/lm_interface.h>
18 #include <linux/delay.h>
33 * gfs2_struct2blk - compute stuff
34 * @sdp: the filesystem
35 * @nstruct: the number of structures
36 * @ssize: the size of the structures
38 * Compute the number of log descriptor blocks needed to hold a certain number
39 * of structures of a certain size.
41 * Returns: the number of blocks needed (minimum is always 1)
44 unsigned int gfs2_struct2blk(struct gfs2_sbd
*sdp
, unsigned int nstruct
,
48 unsigned int first
, second
;
51 first
= (sdp
->sd_sb
.sb_bsize
- sizeof(struct gfs2_log_descriptor
)) / ssize
;
53 if (nstruct
> first
) {
54 second
= (sdp
->sd_sb
.sb_bsize
-
55 sizeof(struct gfs2_meta_header
)) / ssize
;
56 blks
+= DIV_ROUND_UP(nstruct
- first
, second
);
63 * gfs2_ail1_start_one - Start I/O on a part of the AIL
64 * @sdp: the filesystem
65 * @tr: the part of the AIL
69 static void gfs2_ail1_start_one(struct gfs2_sbd
*sdp
, struct gfs2_ail
*ai
)
71 struct gfs2_bufdata
*bd
, *s
;
72 struct buffer_head
*bh
;
75 BUG_ON(!spin_is_locked(&sdp
->sd_log_lock
));
80 list_for_each_entry_safe_reverse(bd
, s
, &ai
->ai_ail1_list
,
84 gfs2_assert(sdp
, bd
->bd_ail
== ai
);
86 if (!buffer_busy(bh
)) {
87 if (!buffer_uptodate(bh
)) {
89 gfs2_io_error_bh(sdp
, bh
);
92 list_move(&bd
->bd_ail_st_list
, &ai
->ai_ail2_list
);
96 if (!buffer_dirty(bh
))
99 list_move(&bd
->bd_ail_st_list
, &ai
->ai_ail1_list
);
101 gfs2_log_unlock(sdp
);
103 ll_rw_block(WRITE
, 1, &bh
);
113 * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
114 * @sdp: the filesystem
119 static int gfs2_ail1_empty_one(struct gfs2_sbd
*sdp
, struct gfs2_ail
*ai
, int flags
)
121 struct gfs2_bufdata
*bd
, *s
;
122 struct buffer_head
*bh
;
124 list_for_each_entry_safe_reverse(bd
, s
, &ai
->ai_ail1_list
,
128 gfs2_assert(sdp
, bd
->bd_ail
== ai
);
130 if (buffer_busy(bh
)) {
137 if (!buffer_uptodate(bh
))
138 gfs2_io_error_bh(sdp
, bh
);
140 list_move(&bd
->bd_ail_st_list
, &ai
->ai_ail2_list
);
143 return list_empty(&ai
->ai_ail1_list
);
146 static void gfs2_ail1_start(struct gfs2_sbd
*sdp
, int flags
)
148 struct list_head
*head
= &sdp
->sd_ail1_list
;
150 struct list_head
*first
;
151 struct gfs2_ail
*first_ai
, *ai
, *tmp
;
155 if (list_empty(head
)) {
156 gfs2_log_unlock(sdp
);
159 sync_gen
= sdp
->sd_ail_sync_gen
++;
162 first_ai
= list_entry(first
, struct gfs2_ail
, ai_list
);
163 first_ai
->ai_sync_gen
= sync_gen
;
164 gfs2_ail1_start_one(sdp
, first_ai
); /* This may drop log lock */
170 if (first
&& (head
->prev
!= first
||
171 gfs2_ail1_empty_one(sdp
, first_ai
, 0)))
175 list_for_each_entry_safe_reverse(ai
, tmp
, head
, ai_list
) {
176 if (ai
->ai_sync_gen
>= sync_gen
)
178 ai
->ai_sync_gen
= sync_gen
;
179 gfs2_ail1_start_one(sdp
, ai
); /* This may drop log lock */
185 gfs2_log_unlock(sdp
);
188 int gfs2_ail1_empty(struct gfs2_sbd
*sdp
, int flags
)
190 struct gfs2_ail
*ai
, *s
;
195 list_for_each_entry_safe_reverse(ai
, s
, &sdp
->sd_ail1_list
, ai_list
) {
196 if (gfs2_ail1_empty_one(sdp
, ai
, flags
))
197 list_move(&ai
->ai_list
, &sdp
->sd_ail2_list
);
198 else if (!(flags
& DIO_ALL
))
202 ret
= list_empty(&sdp
->sd_ail1_list
);
204 gfs2_log_unlock(sdp
);
211 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
212 * @sdp: the filesystem
217 static void gfs2_ail2_empty_one(struct gfs2_sbd
*sdp
, struct gfs2_ail
*ai
)
219 struct list_head
*head
= &ai
->ai_ail2_list
;
220 struct gfs2_bufdata
*bd
;
222 while (!list_empty(head
)) {
223 bd
= list_entry(head
->prev
, struct gfs2_bufdata
,
225 gfs2_assert(sdp
, bd
->bd_ail
== ai
);
227 list_del(&bd
->bd_ail_st_list
);
228 list_del(&bd
->bd_ail_gl_list
);
229 atomic_dec(&bd
->bd_gl
->gl_ail_count
);
234 static void ail2_empty(struct gfs2_sbd
*sdp
, unsigned int new_tail
)
236 struct gfs2_ail
*ai
, *safe
;
237 unsigned int old_tail
= sdp
->sd_log_tail
;
238 int wrap
= (new_tail
< old_tail
);
243 list_for_each_entry_safe(ai
, safe
, &sdp
->sd_ail2_list
, ai_list
) {
244 a
= (old_tail
<= ai
->ai_first
);
245 b
= (ai
->ai_first
< new_tail
);
246 rm
= (wrap
) ? (a
|| b
) : (a
&& b
);
250 gfs2_ail2_empty_one(sdp
, ai
);
251 list_del(&ai
->ai_list
);
252 gfs2_assert_warn(sdp
, list_empty(&ai
->ai_ail1_list
));
253 gfs2_assert_warn(sdp
, list_empty(&ai
->ai_ail2_list
));
257 gfs2_log_unlock(sdp
);
261 * gfs2_log_reserve - Make a log reservation
262 * @sdp: The GFS2 superblock
263 * @blks: The number of blocks to reserve
265 * Note that we never give out the last 6 blocks of the journal. Thats
266 * due to the fact that there is are a small number of header blocks
267 * associated with each log flush. The exact number can't be known until
268 * flush time, so we ensure that we have just enough free blocks at all
269 * times to avoid running out during a log flush.
274 int gfs2_log_reserve(struct gfs2_sbd
*sdp
, unsigned int blks
)
276 unsigned int try = 0;
278 if (gfs2_assert_warn(sdp
, blks
) ||
279 gfs2_assert_warn(sdp
, blks
<= sdp
->sd_jdesc
->jd_blocks
))
282 mutex_lock(&sdp
->sd_log_reserve_mutex
);
284 while(sdp
->sd_log_blks_free
<= (blks
+ 6)) {
285 gfs2_log_unlock(sdp
);
286 gfs2_ail1_empty(sdp
, 0);
287 gfs2_log_flush(sdp
, NULL
);
290 gfs2_ail1_start(sdp
, 0);
293 sdp
->sd_log_blks_free
-= blks
;
294 gfs2_log_unlock(sdp
);
295 mutex_unlock(&sdp
->sd_log_reserve_mutex
);
297 down_read(&sdp
->sd_log_flush_lock
);
303 * gfs2_log_release - Release a given number of log blocks
304 * @sdp: The GFS2 superblock
305 * @blks: The number of blocks
309 void gfs2_log_release(struct gfs2_sbd
*sdp
, unsigned int blks
)
313 sdp
->sd_log_blks_free
+= blks
;
314 gfs2_assert_withdraw(sdp
,
315 sdp
->sd_log_blks_free
<= sdp
->sd_jdesc
->jd_blocks
);
316 gfs2_log_unlock(sdp
);
317 up_read(&sdp
->sd_log_flush_lock
);
320 static u64
log_bmap(struct gfs2_sbd
*sdp
, unsigned int lbn
)
322 struct inode
*inode
= sdp
->sd_jdesc
->jd_inode
;
324 struct buffer_head bh_map
= { .b_state
= 0, .b_blocknr
= 0 };
326 bh_map
.b_size
= 1 << inode
->i_blkbits
;
327 error
= gfs2_block_map(inode
, lbn
, 0, &bh_map
);
328 if (error
|| !bh_map
.b_blocknr
)
329 printk(KERN_INFO
"error=%d, dbn=%llu lbn=%u", error
,
330 (unsigned long long)bh_map
.b_blocknr
, lbn
);
331 gfs2_assert_withdraw(sdp
, !error
&& bh_map
.b_blocknr
);
333 return bh_map
.b_blocknr
;
337 * log_distance - Compute distance between two journal blocks
338 * @sdp: The GFS2 superblock
339 * @newer: The most recent journal block of the pair
340 * @older: The older journal block of the pair
342 * Compute the distance (in the journal direction) between two
343 * blocks in the journal
345 * Returns: the distance in blocks
348 static inline unsigned int log_distance(struct gfs2_sbd
*sdp
, unsigned int newer
,
353 dist
= newer
- older
;
355 dist
+= sdp
->sd_jdesc
->jd_blocks
;
360 static unsigned int current_tail(struct gfs2_sbd
*sdp
)
367 if (list_empty(&sdp
->sd_ail1_list
)) {
368 tail
= sdp
->sd_log_head
;
370 ai
= list_entry(sdp
->sd_ail1_list
.prev
, struct gfs2_ail
, ai_list
);
374 gfs2_log_unlock(sdp
);
379 static inline void log_incr_head(struct gfs2_sbd
*sdp
)
381 if (sdp
->sd_log_flush_head
== sdp
->sd_log_tail
)
382 gfs2_assert_withdraw(sdp
, sdp
->sd_log_flush_head
== sdp
->sd_log_head
);
384 if (++sdp
->sd_log_flush_head
== sdp
->sd_jdesc
->jd_blocks
) {
385 sdp
->sd_log_flush_head
= 0;
386 sdp
->sd_log_flush_wrapped
= 1;
391 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
392 * @sdp: The GFS2 superblock
394 * Returns: the buffer_head
397 struct buffer_head
*gfs2_log_get_buf(struct gfs2_sbd
*sdp
)
399 u64 blkno
= log_bmap(sdp
, sdp
->sd_log_flush_head
);
400 struct gfs2_log_buf
*lb
;
401 struct buffer_head
*bh
;
403 lb
= kzalloc(sizeof(struct gfs2_log_buf
), GFP_NOFS
| __GFP_NOFAIL
);
404 list_add(&lb
->lb_list
, &sdp
->sd_log_flush_list
);
406 bh
= lb
->lb_bh
= sb_getblk(sdp
->sd_vfs
, blkno
);
408 memset(bh
->b_data
, 0, bh
->b_size
);
409 set_buffer_uptodate(bh
);
410 clear_buffer_dirty(bh
);
419 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
420 * @sdp: the filesystem
421 * @data: the data the buffer_head should point to
423 * Returns: the log buffer descriptor
426 struct buffer_head
*gfs2_log_fake_buf(struct gfs2_sbd
*sdp
,
427 struct buffer_head
*real
)
429 u64 blkno
= log_bmap(sdp
, sdp
->sd_log_flush_head
);
430 struct gfs2_log_buf
*lb
;
431 struct buffer_head
*bh
;
433 lb
= kzalloc(sizeof(struct gfs2_log_buf
), GFP_NOFS
| __GFP_NOFAIL
);
434 list_add(&lb
->lb_list
, &sdp
->sd_log_flush_list
);
437 bh
= lb
->lb_bh
= alloc_buffer_head(GFP_NOFS
| __GFP_NOFAIL
);
438 atomic_set(&bh
->b_count
, 1);
439 bh
->b_state
= (1 << BH_Mapped
) | (1 << BH_Uptodate
);
440 set_bh_page(bh
, real
->b_page
, bh_offset(real
));
441 bh
->b_blocknr
= blkno
;
442 bh
->b_size
= sdp
->sd_sb
.sb_bsize
;
443 bh
->b_bdev
= sdp
->sd_vfs
->s_bdev
;
450 static void log_pull_tail(struct gfs2_sbd
*sdp
, unsigned int new_tail
, int pull
)
452 unsigned int dist
= log_distance(sdp
, new_tail
, sdp
->sd_log_tail
);
454 ail2_empty(sdp
, new_tail
);
457 sdp
->sd_log_blks_free
+= dist
- (pull
? 1 : 0);
458 gfs2_assert_withdraw(sdp
, sdp
->sd_log_blks_free
<= sdp
->sd_jdesc
->jd_blocks
);
459 gfs2_log_unlock(sdp
);
461 sdp
->sd_log_tail
= new_tail
;
465 * log_write_header - Get and initialize a journal header buffer
466 * @sdp: The GFS2 superblock
468 * Returns: the initialized log buffer descriptor
471 static void log_write_header(struct gfs2_sbd
*sdp
, u32 flags
, int pull
)
473 u64 blkno
= log_bmap(sdp
, sdp
->sd_log_flush_head
);
474 struct buffer_head
*bh
;
475 struct gfs2_log_header
*lh
;
479 bh
= sb_getblk(sdp
->sd_vfs
, blkno
);
481 memset(bh
->b_data
, 0, bh
->b_size
);
482 set_buffer_uptodate(bh
);
483 clear_buffer_dirty(bh
);
486 gfs2_ail1_empty(sdp
, 0);
487 tail
= current_tail(sdp
);
489 lh
= (struct gfs2_log_header
*)bh
->b_data
;
490 memset(lh
, 0, sizeof(struct gfs2_log_header
));
491 lh
->lh_header
.mh_magic
= cpu_to_be32(GFS2_MAGIC
);
492 lh
->lh_header
.mh_type
= cpu_to_be32(GFS2_METATYPE_LH
);
493 lh
->lh_header
.mh_format
= cpu_to_be32(GFS2_FORMAT_LH
);
494 lh
->lh_sequence
= cpu_to_be64(sdp
->sd_log_sequence
++);
495 lh
->lh_flags
= cpu_to_be32(flags
);
496 lh
->lh_tail
= cpu_to_be32(tail
);
497 lh
->lh_blkno
= cpu_to_be32(sdp
->sd_log_flush_head
);
498 hash
= gfs2_disk_hash(bh
->b_data
, sizeof(struct gfs2_log_header
));
499 lh
->lh_hash
= cpu_to_be32(hash
);
501 set_buffer_dirty(bh
);
502 if (sync_dirty_buffer(bh
))
503 gfs2_io_error_bh(sdp
, bh
);
506 if (sdp
->sd_log_tail
!= tail
)
507 log_pull_tail(sdp
, tail
, pull
);
509 gfs2_assert_withdraw(sdp
, !pull
);
511 sdp
->sd_log_idle
= (tail
== sdp
->sd_log_flush_head
);
515 static void log_flush_commit(struct gfs2_sbd
*sdp
)
517 struct list_head
*head
= &sdp
->sd_log_flush_list
;
518 struct gfs2_log_buf
*lb
;
519 struct buffer_head
*bh
;
521 while (!list_empty(head
)) {
522 lb
= list_entry(head
->next
, struct gfs2_log_buf
, lb_list
);
523 list_del(&lb
->lb_list
);
527 if (!buffer_uptodate(bh
))
528 gfs2_io_error_bh(sdp
, bh
);
530 while (atomic_read(&bh
->b_count
) != 1) /* Grrrr... */
532 free_buffer_head(bh
);
538 log_write_header(sdp
, 0, 0);
542 * gfs2_log_flush - flush incore transaction(s)
543 * @sdp: the filesystem
544 * @gl: The glock structure to flush. If NULL, flush the whole incore log
548 void gfs2_log_flush(struct gfs2_sbd
*sdp
, struct gfs2_glock
*gl
)
552 down_write(&sdp
->sd_log_flush_lock
);
556 if (list_empty(&gl
->gl_le
.le_list
)) {
557 gfs2_log_unlock(sdp
);
558 up_write(&sdp
->sd_log_flush_lock
);
561 gfs2_log_unlock(sdp
);
564 ai
= kzalloc(sizeof(struct gfs2_ail
), GFP_NOFS
| __GFP_NOFAIL
);
565 INIT_LIST_HEAD(&ai
->ai_ail1_list
);
566 INIT_LIST_HEAD(&ai
->ai_ail2_list
);
568 gfs2_assert_withdraw(sdp
, sdp
->sd_log_num_buf
== sdp
->sd_log_commited_buf
);
569 gfs2_assert_withdraw(sdp
,
570 sdp
->sd_log_num_revoke
== sdp
->sd_log_commited_revoke
);
572 sdp
->sd_log_flush_head
= sdp
->sd_log_head
;
573 sdp
->sd_log_flush_wrapped
= 0;
574 ai
->ai_first
= sdp
->sd_log_flush_head
;
576 lops_before_commit(sdp
);
577 if (!list_empty(&sdp
->sd_log_flush_list
))
578 log_flush_commit(sdp
);
579 else if (sdp
->sd_log_tail
!= current_tail(sdp
) && !sdp
->sd_log_idle
)
580 log_write_header(sdp
, 0, PULL
);
581 lops_after_commit(sdp
, ai
);
584 sdp
->sd_log_head
= sdp
->sd_log_flush_head
;
585 sdp
->sd_log_blks_free
-= sdp
->sd_log_num_hdrs
;
586 sdp
->sd_log_blks_reserved
= 0;
587 sdp
->sd_log_commited_buf
= 0;
588 sdp
->sd_log_num_hdrs
= 0;
589 sdp
->sd_log_commited_revoke
= 0;
591 if (!list_empty(&ai
->ai_ail1_list
)) {
592 list_add(&ai
->ai_list
, &sdp
->sd_ail1_list
);
595 gfs2_log_unlock(sdp
);
597 sdp
->sd_vfs
->s_dirt
= 0;
598 up_write(&sdp
->sd_log_flush_lock
);
603 static void log_refund(struct gfs2_sbd
*sdp
, struct gfs2_trans
*tr
)
605 unsigned int reserved
= 0;
610 sdp
->sd_log_commited_buf
+= tr
->tr_num_buf_new
- tr
->tr_num_buf_rm
;
611 gfs2_assert_withdraw(sdp
, ((int)sdp
->sd_log_commited_buf
) >= 0);
612 sdp
->sd_log_commited_revoke
+= tr
->tr_num_revoke
- tr
->tr_num_revoke_rm
;
613 gfs2_assert_withdraw(sdp
, ((int)sdp
->sd_log_commited_revoke
) >= 0);
615 if (sdp
->sd_log_commited_buf
)
616 reserved
+= sdp
->sd_log_commited_buf
;
617 if (sdp
->sd_log_commited_revoke
)
618 reserved
+= gfs2_struct2blk(sdp
, sdp
->sd_log_commited_revoke
,
623 old
= sdp
->sd_log_blks_free
;
624 sdp
->sd_log_blks_free
+= tr
->tr_reserved
-
625 (reserved
- sdp
->sd_log_blks_reserved
);
627 gfs2_assert_withdraw(sdp
, sdp
->sd_log_blks_free
>= old
);
628 gfs2_assert_withdraw(sdp
,
629 sdp
->sd_log_blks_free
<= sdp
->sd_jdesc
->jd_blocks
+
630 sdp
->sd_log_num_hdrs
);
632 sdp
->sd_log_blks_reserved
= reserved
;
634 gfs2_log_unlock(sdp
);
638 * gfs2_log_commit - Commit a transaction to the log
639 * @sdp: the filesystem
640 * @tr: the transaction
645 void gfs2_log_commit(struct gfs2_sbd
*sdp
, struct gfs2_trans
*tr
)
648 lops_incore_commit(sdp
, tr
);
650 sdp
->sd_vfs
->s_dirt
= 1;
651 up_read(&sdp
->sd_log_flush_lock
);
654 if (sdp
->sd_log_num_buf
> gfs2_tune_get(sdp
, gt_incore_log_blocks
))
655 wake_up_process(sdp
->sd_logd_process
);
656 gfs2_log_unlock(sdp
);
660 * gfs2_log_shutdown - write a shutdown header into a journal
661 * @sdp: the filesystem
665 void gfs2_log_shutdown(struct gfs2_sbd
*sdp
)
667 down_write(&sdp
->sd_log_flush_lock
);
669 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_blks_reserved
);
670 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_num_gl
);
671 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_num_buf
);
672 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_num_jdata
);
673 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_num_revoke
);
674 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_num_rg
);
675 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_num_databuf
);
676 gfs2_assert_withdraw(sdp
, !sdp
->sd_log_num_hdrs
);
677 gfs2_assert_withdraw(sdp
, list_empty(&sdp
->sd_ail1_list
));
679 sdp
->sd_log_flush_head
= sdp
->sd_log_head
;
680 sdp
->sd_log_flush_wrapped
= 0;
682 log_write_header(sdp
, GFS2_LOG_HEAD_UNMOUNT
, 0);
684 gfs2_assert_warn(sdp
, sdp
->sd_log_blks_free
== sdp
->sd_jdesc
->jd_blocks
);
685 gfs2_assert_warn(sdp
, sdp
->sd_log_head
== sdp
->sd_log_tail
);
686 gfs2_assert_warn(sdp
, list_empty(&sdp
->sd_ail2_list
));
688 sdp
->sd_log_head
= sdp
->sd_log_flush_head
;
689 sdp
->sd_log_tail
= sdp
->sd_log_head
;
691 up_write(&sdp
->sd_log_flush_lock
);
696 * gfs2_meta_syncfs - sync all the buffers in a filesystem
697 * @sdp: the filesystem
701 void gfs2_meta_syncfs(struct gfs2_sbd
*sdp
)
703 gfs2_log_flush(sdp
, NULL
);
705 gfs2_ail1_start(sdp
, DIO_ALL
);
706 if (gfs2_ail1_empty(sdp
, DIO_ALL
))