memcg: fix refcnt going negative
[pohmelfs.git] / fs / gfs2 / meta_io.c
blobcb8d7a93d5ec257204c93aac033c95146fdd1a07
1 /*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/mm.h>
16 #include <linux/pagemap.h>
17 #include <linux/writeback.h>
18 #include <linux/swap.h>
19 #include <linux/delay.h>
20 #include <linux/bio.h>
21 #include <linux/gfs2_ondisk.h>
23 #include "gfs2.h"
24 #include "incore.h"
25 #include "glock.h"
26 #include "glops.h"
27 #include "inode.h"
28 #include "log.h"
29 #include "lops.h"
30 #include "meta_io.h"
31 #include "rgrp.h"
32 #include "trans.h"
33 #include "util.h"
35 static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
37 int err;
38 struct buffer_head *bh, *head;
39 int nr_underway = 0;
40 int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ?
41 WRITE_SYNC_PLUG : WRITE));
43 BUG_ON(!PageLocked(page));
44 BUG_ON(!page_has_buffers(page));
46 head = page_buffers(page);
47 bh = head;
49 do {
50 if (!buffer_mapped(bh))
51 continue;
53 * If it's a fully non-blocking write attempt and we cannot
54 * lock the buffer then redirty the page. Note that this can
55 * potentially cause a busy-wait loop from pdflush and kswapd
56 * activity, but those code paths have their own higher-level
57 * throttling.
59 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
60 lock_buffer(bh);
61 } else if (!trylock_buffer(bh)) {
62 redirty_page_for_writepage(wbc, page);
63 continue;
65 if (test_clear_buffer_dirty(bh)) {
66 mark_buffer_async_write(bh);
67 } else {
68 unlock_buffer(bh);
70 } while ((bh = bh->b_this_page) != head);
73 * The page and its buffers are protected by PageWriteback(), so we can
74 * drop the bh refcounts early.
76 BUG_ON(PageWriteback(page));
77 set_page_writeback(page);
79 do {
80 struct buffer_head *next = bh->b_this_page;
81 if (buffer_async_write(bh)) {
82 submit_bh(write_op, bh);
83 nr_underway++;
85 bh = next;
86 } while (bh != head);
87 unlock_page(page);
89 err = 0;
90 if (nr_underway == 0)
91 end_page_writeback(page);
93 return err;
96 static const struct address_space_operations aspace_aops = {
97 .writepage = gfs2_aspace_writepage,
98 .releasepage = gfs2_releasepage,
99 .sync_page = block_sync_page,
103 * gfs2_aspace_get - Create and initialize a struct inode structure
104 * @sdp: the filesystem the aspace is in
106 * Right now a struct inode is just a struct inode. Maybe Linux
107 * will supply a more lightweight address space construct (that works)
108 * in the future.
110 * Make sure pages/buffers in this aspace aren't in high memory.
112 * Returns: the aspace
115 struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
117 struct inode *aspace;
118 struct gfs2_inode *ip;
120 aspace = new_inode(sdp->sd_vfs);
121 if (aspace) {
122 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS);
123 aspace->i_mapping->a_ops = &aspace_aops;
124 aspace->i_size = ~0ULL;
125 ip = GFS2_I(aspace);
126 clear_bit(GIF_USER, &ip->i_flags);
127 insert_inode_hash(aspace);
129 return aspace;
132 void gfs2_aspace_put(struct inode *aspace)
134 remove_inode_hash(aspace);
135 iput(aspace);
139 * gfs2_meta_sync - Sync all buffers associated with a glock
140 * @gl: The glock
144 void gfs2_meta_sync(struct gfs2_glock *gl)
146 struct address_space *mapping = gl->gl_aspace->i_mapping;
147 int error;
149 filemap_fdatawrite(mapping);
150 error = filemap_fdatawait(mapping);
152 if (error)
153 gfs2_io_error(gl->gl_sbd);
157 * gfs2_getbuf - Get a buffer with a given address space
158 * @gl: the glock
159 * @blkno: the block number (filesystem scope)
160 * @create: 1 if the buffer should be created
162 * Returns: the buffer
165 struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
167 struct address_space *mapping = gl->gl_aspace->i_mapping;
168 struct gfs2_sbd *sdp = gl->gl_sbd;
169 struct page *page;
170 struct buffer_head *bh;
171 unsigned int shift;
172 unsigned long index;
173 unsigned int bufnum;
175 shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
176 index = blkno >> shift; /* convert block to page */
177 bufnum = blkno - (index << shift); /* block buf index within page */
179 if (create) {
180 for (;;) {
181 page = grab_cache_page(mapping, index);
182 if (page)
183 break;
184 yield();
186 } else {
187 page = find_lock_page(mapping, index);
188 if (!page)
189 return NULL;
192 if (!page_has_buffers(page))
193 create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
195 /* Locate header for our buffer within our page */
196 for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
197 /* Do nothing */;
198 get_bh(bh);
200 if (!buffer_mapped(bh))
201 map_bh(bh, sdp->sd_vfs, blkno);
203 unlock_page(page);
204 mark_page_accessed(page);
205 page_cache_release(page);
207 return bh;
210 static void meta_prep_new(struct buffer_head *bh)
212 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
214 lock_buffer(bh);
215 clear_buffer_dirty(bh);
216 set_buffer_uptodate(bh);
217 unlock_buffer(bh);
219 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
223 * gfs2_meta_new - Get a block
224 * @gl: The glock associated with this block
225 * @blkno: The block number
227 * Returns: The buffer
230 struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
232 struct buffer_head *bh;
233 bh = gfs2_getbuf(gl, blkno, CREATE);
234 meta_prep_new(bh);
235 return bh;
239 * gfs2_meta_read - Read a block from disk
240 * @gl: The glock covering the block
241 * @blkno: The block number
242 * @flags: flags
243 * @bhp: the place where the buffer is returned (NULL on failure)
245 * Returns: errno
248 int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
249 struct buffer_head **bhp)
251 struct gfs2_sbd *sdp = gl->gl_sbd;
252 struct buffer_head *bh;
254 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
255 return -EIO;
257 *bhp = bh = gfs2_getbuf(gl, blkno, CREATE);
259 lock_buffer(bh);
260 if (buffer_uptodate(bh)) {
261 unlock_buffer(bh);
262 return 0;
264 bh->b_end_io = end_buffer_read_sync;
265 get_bh(bh);
266 submit_bh(READ_SYNC | (1 << BIO_RW_META), bh);
267 if (!(flags & DIO_WAIT))
268 return 0;
270 wait_on_buffer(bh);
271 if (unlikely(!buffer_uptodate(bh))) {
272 struct gfs2_trans *tr = current->journal_info;
273 if (tr && tr->tr_touched)
274 gfs2_io_error_bh(sdp, bh);
275 brelse(bh);
276 return -EIO;
279 return 0;
283 * gfs2_meta_wait - Reread a block from disk
284 * @sdp: the filesystem
285 * @bh: The block to wait for
287 * Returns: errno
290 int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
292 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
293 return -EIO;
295 wait_on_buffer(bh);
297 if (!buffer_uptodate(bh)) {
298 struct gfs2_trans *tr = current->journal_info;
299 if (tr && tr->tr_touched)
300 gfs2_io_error_bh(sdp, bh);
301 return -EIO;
303 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
304 return -EIO;
306 return 0;
310 * gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
311 * @gl: the glock the buffer belongs to
312 * @bh: The buffer to be attached to
313 * @meta: Flag to indicate whether its metadata or not
316 void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
317 int meta)
319 struct gfs2_bufdata *bd;
321 if (meta)
322 lock_page(bh->b_page);
324 if (bh->b_private) {
325 if (meta)
326 unlock_page(bh->b_page);
327 return;
330 bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
331 bd->bd_bh = bh;
332 bd->bd_gl = gl;
334 INIT_LIST_HEAD(&bd->bd_list_tr);
335 if (meta)
336 lops_init_le(&bd->bd_le, &gfs2_buf_lops);
337 else
338 lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
339 bh->b_private = bd;
341 if (meta)
342 unlock_page(bh->b_page);
345 void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta)
347 struct gfs2_sbd *sdp = GFS2_SB(bh->b_page->mapping->host);
348 struct gfs2_bufdata *bd = bh->b_private;
349 if (test_clear_buffer_pinned(bh)) {
350 list_del_init(&bd->bd_le.le_list);
351 if (meta) {
352 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
353 sdp->sd_log_num_buf--;
354 tr->tr_num_buf_rm++;
355 } else {
356 gfs2_assert_warn(sdp, sdp->sd_log_num_databuf);
357 sdp->sd_log_num_databuf--;
358 tr->tr_num_databuf_rm++;
360 tr->tr_touched = 1;
361 brelse(bh);
363 if (bd) {
364 if (bd->bd_ail) {
365 gfs2_remove_from_ail(bd);
366 bh->b_private = NULL;
367 bd->bd_bh = NULL;
368 bd->bd_blkno = bh->b_blocknr;
369 gfs2_trans_add_revoke(sdp, bd);
372 clear_buffer_dirty(bh);
373 clear_buffer_uptodate(bh);
377 * gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore
378 * @ip: the inode who owns the buffers
379 * @bstart: the first buffer in the run
380 * @blen: the number of buffers in the run
384 void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
386 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
387 struct buffer_head *bh;
389 while (blen) {
390 bh = gfs2_getbuf(ip->i_gl, bstart, NO_CREATE);
391 if (bh) {
392 lock_buffer(bh);
393 gfs2_log_lock(sdp);
394 gfs2_remove_from_journal(bh, current->journal_info, 1);
395 gfs2_log_unlock(sdp);
396 unlock_buffer(bh);
397 brelse(bh);
400 bstart++;
401 blen--;
406 * gfs2_meta_indirect_buffer - Get a metadata buffer
407 * @ip: The GFS2 inode
408 * @height: The level of this buf in the metadata (indir addr) tree (if any)
409 * @num: The block number (device relative) of the buffer
410 * @new: Non-zero if we may create a new buffer
411 * @bhp: the buffer is returned here
413 * Returns: errno
416 int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
417 int new, struct buffer_head **bhp)
419 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
420 struct gfs2_glock *gl = ip->i_gl;
421 struct buffer_head *bh;
422 int ret = 0;
424 if (new) {
425 BUG_ON(height == 0);
426 bh = gfs2_meta_new(gl, num);
427 gfs2_trans_add_bh(ip->i_gl, bh, 1);
428 gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
429 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
430 } else {
431 u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI;
432 ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh);
433 if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) {
434 brelse(bh);
435 ret = -EIO;
438 *bhp = bh;
439 return ret;
443 * gfs2_meta_ra - start readahead on an extent of a file
444 * @gl: the glock the blocks belong to
445 * @dblock: the starting disk block
446 * @extlen: the number of blocks in the extent
448 * returns: the first buffer in the extent
451 struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
453 struct gfs2_sbd *sdp = gl->gl_sbd;
454 struct buffer_head *first_bh, *bh;
455 u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
456 sdp->sd_sb.sb_bsize_shift;
458 BUG_ON(!extlen);
460 if (max_ra < 1)
461 max_ra = 1;
462 if (extlen > max_ra)
463 extlen = max_ra;
465 first_bh = gfs2_getbuf(gl, dblock, CREATE);
467 if (buffer_uptodate(first_bh))
468 goto out;
469 if (!buffer_locked(first_bh))
470 ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh);
472 dblock++;
473 extlen--;
475 while (extlen) {
476 bh = gfs2_getbuf(gl, dblock, CREATE);
478 if (!buffer_uptodate(bh) && !buffer_locked(bh))
479 ll_rw_block(READA, 1, &bh);
480 brelse(bh);
481 dblock++;
482 extlen--;
483 if (!buffer_locked(first_bh) && buffer_uptodate(first_bh))
484 goto out;
487 wait_on_buffer(first_bh);
488 out:
489 return first_bh;