ext2/ext3/ext4: add block bitmap validation
[linux-2.6/kvm.git] / fs / ext2 / balloc.c
blobffaa6d8454425e1885e9e36dde2987dc00d6a278
1 /*
2 * linux/fs/ext2/balloc.c
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
9 * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10 * Big-endian to little-endian byte-swapping/bitmaps by
11 * David S. Miller (davem@caip.rutgers.edu), 1995
14 #include "ext2.h"
15 #include <linux/quotaops.h>
16 #include <linux/sched.h>
17 #include <linux/buffer_head.h>
18 #include <linux/capability.h>
21 * balloc.c contains the blocks allocation and deallocation routines
25 * The free blocks are managed by bitmaps. A file system contains several
26 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
27 * block for inodes, N blocks for the inode table and data blocks.
29 * The file system contains group descriptors which are located after the
30 * super block. Each descriptor contains the number of the bitmap block and
31 * the free blocks count in the block. The descriptors are loaded in memory
32 * when a file system is mounted (see ext2_fill_super).
36 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
38 struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
39 unsigned int block_group,
40 struct buffer_head ** bh)
42 unsigned long group_desc;
43 unsigned long offset;
44 struct ext2_group_desc * desc;
45 struct ext2_sb_info *sbi = EXT2_SB(sb);
47 if (block_group >= sbi->s_groups_count) {
48 ext2_error (sb, "ext2_get_group_desc",
49 "block_group >= groups_count - "
50 "block_group = %d, groups_count = %lu",
51 block_group, sbi->s_groups_count);
53 return NULL;
56 group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
57 offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
58 if (!sbi->s_group_desc[group_desc]) {
59 ext2_error (sb, "ext2_get_group_desc",
60 "Group descriptor not loaded - "
61 "block_group = %d, group_desc = %lu, desc = %lu",
62 block_group, group_desc, offset);
63 return NULL;
66 desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data;
67 if (bh)
68 *bh = sbi->s_group_desc[group_desc];
69 return desc + offset;
72 static inline int
73 block_in_use(unsigned long block, struct super_block *sb, unsigned char *map)
75 return ext2_test_bit ((block -
76 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) %
77 EXT2_BLOCKS_PER_GROUP(sb), map);
81 * Read the bitmap for a given block_group, reading into the specified
82 * slot in the superblock's bitmap cache.
84 * Return buffer_head on success or NULL in case of failure.
86 static struct buffer_head *
87 read_block_bitmap(struct super_block *sb, unsigned int block_group)
89 int i;
90 struct ext2_group_desc * desc;
91 struct buffer_head * bh = NULL;
92 unsigned int bitmap_blk;
94 desc = ext2_get_group_desc (sb, block_group, NULL);
95 if (!desc)
96 return NULL;
97 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
98 bh = sb_bread(sb, bitmap_blk);
99 if (!bh)
100 ext2_error (sb, __FUNCTION__,
101 "Cannot read block bitmap - "
102 "block_group = %d, block_bitmap = %u",
103 block_group, le32_to_cpu(desc->bg_block_bitmap));
105 /* check whether block bitmap block number is set */
106 if (!block_in_use(bitmap_blk, sb, bh->b_data)) {
107 /* bad block bitmap */
108 goto error_out;
110 /* check whether the inode bitmap block number is set */
111 bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap);
112 if (!block_in_use(bitmap_blk, sb, bh->b_data)) {
113 /* bad block bitmap */
114 goto error_out;
116 /* check whether the inode table block number is set */
117 bitmap_blk = le32_to_cpu(desc->bg_inode_table);
118 for (i = 0; i < EXT2_SB(sb)->s_itb_per_group; i++, bitmap_blk++) {
119 if (!block_in_use(bitmap_blk, sb, bh->b_data)) {
120 /* bad block bitmap */
121 goto error_out;
125 return bh;
127 error_out:
128 brelse(bh);
129 ext2_error(sb, __FUNCTION__,
130 "Invalid block bitmap - "
131 "block_group = %d, block = %u",
132 block_group, bitmap_blk);
133 return NULL;
137 * Set sb->s_dirt here because the superblock was "logically" altered. We
138 * need to recalculate its free blocks count and flush it out.
140 static int reserve_blocks(struct super_block *sb, int count)
142 struct ext2_sb_info *sbi = EXT2_SB(sb);
143 struct ext2_super_block *es = sbi->s_es;
144 unsigned free_blocks;
145 unsigned root_blocks;
147 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
148 root_blocks = le32_to_cpu(es->s_r_blocks_count);
150 if (free_blocks < count)
151 count = free_blocks;
153 if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) &&
154 sbi->s_resuid != current->fsuid &&
155 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
157 * We are too close to reserve and we are not privileged.
158 * Can we allocate anything at all?
160 if (free_blocks > root_blocks)
161 count = free_blocks - root_blocks;
162 else
163 return 0;
166 percpu_counter_sub(&sbi->s_freeblocks_counter, count);
167 sb->s_dirt = 1;
168 return count;
171 static void release_blocks(struct super_block *sb, int count)
173 if (count) {
174 struct ext2_sb_info *sbi = EXT2_SB(sb);
176 percpu_counter_add(&sbi->s_freeblocks_counter, count);
177 sb->s_dirt = 1;
181 static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no,
182 struct ext2_group_desc *desc, struct buffer_head *bh, int count)
184 unsigned free_blocks;
186 if (!desc->bg_free_blocks_count)
187 return 0;
189 spin_lock(sb_bgl_lock(sbi, group_no));
190 free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
191 if (free_blocks < count)
192 count = free_blocks;
193 desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count);
194 spin_unlock(sb_bgl_lock(sbi, group_no));
195 mark_buffer_dirty(bh);
196 return count;
199 static void group_release_blocks(struct super_block *sb, int group_no,
200 struct ext2_group_desc *desc, struct buffer_head *bh, int count)
202 if (count) {
203 struct ext2_sb_info *sbi = EXT2_SB(sb);
204 unsigned free_blocks;
206 spin_lock(sb_bgl_lock(sbi, group_no));
207 free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
208 desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count);
209 spin_unlock(sb_bgl_lock(sbi, group_no));
210 sb->s_dirt = 1;
211 mark_buffer_dirty(bh);
215 /* Free given blocks, update quota and i_blocks field */
216 void ext2_free_blocks (struct inode * inode, unsigned long block,
217 unsigned long count)
219 struct buffer_head *bitmap_bh = NULL;
220 struct buffer_head * bh2;
221 unsigned long block_group;
222 unsigned long bit;
223 unsigned long i;
224 unsigned long overflow;
225 struct super_block * sb = inode->i_sb;
226 struct ext2_sb_info * sbi = EXT2_SB(sb);
227 struct ext2_group_desc * desc;
228 struct ext2_super_block * es = sbi->s_es;
229 unsigned freed = 0, group_freed;
231 if (block < le32_to_cpu(es->s_first_data_block) ||
232 block + count < block ||
233 block + count > le32_to_cpu(es->s_blocks_count)) {
234 ext2_error (sb, "ext2_free_blocks",
235 "Freeing blocks not in datazone - "
236 "block = %lu, count = %lu", block, count);
237 goto error_return;
240 ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
242 do_more:
243 overflow = 0;
244 block_group = (block - le32_to_cpu(es->s_first_data_block)) /
245 EXT2_BLOCKS_PER_GROUP(sb);
246 bit = (block - le32_to_cpu(es->s_first_data_block)) %
247 EXT2_BLOCKS_PER_GROUP(sb);
249 * Check to see if we are freeing blocks across a group
250 * boundary.
252 if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) {
253 overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb);
254 count -= overflow;
256 brelse(bitmap_bh);
257 bitmap_bh = read_block_bitmap(sb, block_group);
258 if (!bitmap_bh)
259 goto error_return;
261 desc = ext2_get_group_desc (sb, block_group, &bh2);
262 if (!desc)
263 goto error_return;
265 if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
266 in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
267 in_range (block, le32_to_cpu(desc->bg_inode_table),
268 sbi->s_itb_per_group) ||
269 in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
270 sbi->s_itb_per_group))
271 ext2_error (sb, "ext2_free_blocks",
272 "Freeing blocks in system zones - "
273 "Block = %lu, count = %lu",
274 block, count);
276 for (i = 0, group_freed = 0; i < count; i++) {
277 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
278 bit + i, bitmap_bh->b_data)) {
279 ext2_error(sb, __FUNCTION__,
280 "bit already cleared for block %lu", block + i);
281 } else {
282 group_freed++;
286 mark_buffer_dirty(bitmap_bh);
287 if (sb->s_flags & MS_SYNCHRONOUS)
288 sync_dirty_buffer(bitmap_bh);
290 group_release_blocks(sb, block_group, desc, bh2, group_freed);
291 freed += group_freed;
293 if (overflow) {
294 block += count;
295 count = overflow;
296 goto do_more;
298 error_return:
299 brelse(bitmap_bh);
300 release_blocks(sb, freed);
301 DQUOT_FREE_BLOCK(inode, freed);
304 static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal)
306 int k;
307 char *p, *r;
309 if (!ext2_test_bit(goal, map))
310 goto got_it;
312 repeat:
313 if (goal) {
315 * The goal was occupied; search forward for a free
316 * block within the next XX blocks.
318 * end_goal is more or less random, but it has to be
319 * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the
320 * next 64-bit boundary is simple..
322 k = (goal + 63) & ~63;
323 goal = ext2_find_next_zero_bit(map, k, goal);
324 if (goal < k)
325 goto got_it;
327 * Search in the remainder of the current group.
331 p = map + (goal >> 3);
332 r = memscan(p, 0, (size - goal + 7) >> 3);
333 k = (r - map) << 3;
334 if (k < size) {
336 * We have succeeded in finding a free byte in the block
337 * bitmap. Now search backwards to find the start of this
338 * group of free blocks - won't take more than 7 iterations.
340 for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--)
342 goto got_it;
345 k = ext2_find_next_zero_bit ((u32 *)map, size, goal);
346 if (k < size) {
347 goal = k;
348 goto got_it;
350 return -1;
351 got_it:
352 if (ext2_set_bit_atomic(lock, goal, (void *) map))
353 goto repeat;
354 return goal;
358 * ext2_new_block uses a goal block to assist allocation. If the goal is
359 * free, or there is a free block within 32 blocks of the goal, that block
360 * is allocated. Otherwise a forward search is made for a free block; within
361 * each block group the search first looks for an entire free byte in the block
362 * bitmap, and then for any free bit if that fails.
363 * This function also updates quota and i_blocks field.
365 int ext2_new_block(struct inode *inode, unsigned long goal,
366 u32 *prealloc_count, u32 *prealloc_block, int *err)
368 struct buffer_head *bitmap_bh = NULL;
369 struct buffer_head *gdp_bh; /* bh2 */
370 struct ext2_group_desc *desc;
371 int group_no; /* i */
372 int ret_block; /* j */
373 int group_idx; /* k */
374 int target_block; /* tmp */
375 int block = 0;
376 struct super_block *sb = inode->i_sb;
377 struct ext2_sb_info *sbi = EXT2_SB(sb);
378 struct ext2_super_block *es = sbi->s_es;
379 unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb);
380 unsigned prealloc_goal = es->s_prealloc_blocks;
381 unsigned group_alloc = 0, es_alloc, dq_alloc;
382 int nr_scanned_groups;
384 if (!prealloc_goal--)
385 prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1;
386 if (!prealloc_count || *prealloc_count)
387 prealloc_goal = 0;
389 if (DQUOT_ALLOC_BLOCK(inode, 1)) {
390 *err = -EDQUOT;
391 goto out;
394 while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal))
395 prealloc_goal--;
397 dq_alloc = prealloc_goal + 1;
398 es_alloc = reserve_blocks(sb, dq_alloc);
399 if (!es_alloc) {
400 *err = -ENOSPC;
401 goto out_dquot;
404 ext2_debug ("goal=%lu.\n", goal);
406 if (goal < le32_to_cpu(es->s_first_data_block) ||
407 goal >= le32_to_cpu(es->s_blocks_count))
408 goal = le32_to_cpu(es->s_first_data_block);
409 group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size;
410 desc = ext2_get_group_desc (sb, group_no, &gdp_bh);
411 if (!desc) {
413 * gdp_bh may still be uninitialised. But group_release_blocks
414 * will not touch it because group_alloc is zero.
416 goto io_error;
419 group_alloc = group_reserve_blocks(sbi, group_no, desc,
420 gdp_bh, es_alloc);
421 if (group_alloc) {
422 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
423 group_size);
424 brelse(bitmap_bh);
425 bitmap_bh = read_block_bitmap(sb, group_no);
426 if (!bitmap_bh)
427 goto io_error;
429 ext2_debug("goal is at %d:%d.\n", group_no, ret_block);
431 ret_block = grab_block(sb_bgl_lock(sbi, group_no),
432 bitmap_bh->b_data, group_size, ret_block);
433 if (ret_block >= 0)
434 goto got_block;
435 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
436 group_alloc = 0;
439 ext2_debug ("Bit not found in block group %d.\n", group_no);
442 * Now search the rest of the groups. We assume that
443 * i and desc correctly point to the last group visited.
445 nr_scanned_groups = 0;
446 retry:
447 for (group_idx = 0; !group_alloc &&
448 group_idx < sbi->s_groups_count; group_idx++) {
449 group_no++;
450 if (group_no >= sbi->s_groups_count)
451 group_no = 0;
452 desc = ext2_get_group_desc(sb, group_no, &gdp_bh);
453 if (!desc)
454 goto io_error;
455 group_alloc = group_reserve_blocks(sbi, group_no, desc,
456 gdp_bh, es_alloc);
458 if (!group_alloc) {
459 *err = -ENOSPC;
460 goto out_release;
462 brelse(bitmap_bh);
463 bitmap_bh = read_block_bitmap(sb, group_no);
464 if (!bitmap_bh)
465 goto io_error;
467 ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data,
468 group_size, 0);
469 if (ret_block < 0) {
471 * If a free block counter is corrupted we can loop inifintely.
472 * Detect that here.
474 nr_scanned_groups++;
475 if (nr_scanned_groups > 2 * sbi->s_groups_count) {
476 ext2_error(sb, "ext2_new_block",
477 "corrupted free blocks counters");
478 goto io_error;
481 * Someone else grabbed the last free block in this blockgroup
482 * before us. Retry the scan.
484 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
485 group_alloc = 0;
486 goto retry;
489 got_block:
490 ext2_debug("using block group %d(%d)\n",
491 group_no, desc->bg_free_blocks_count);
493 target_block = ret_block + group_no * group_size +
494 le32_to_cpu(es->s_first_data_block);
496 if (target_block == le32_to_cpu(desc->bg_block_bitmap) ||
497 target_block == le32_to_cpu(desc->bg_inode_bitmap) ||
498 in_range(target_block, le32_to_cpu(desc->bg_inode_table),
499 sbi->s_itb_per_group))
500 ext2_error (sb, "ext2_new_block",
501 "Allocating block in system zone - "
502 "block = %u", target_block);
504 if (target_block >= le32_to_cpu(es->s_blocks_count)) {
505 ext2_error (sb, "ext2_new_block",
506 "block(%d) >= blocks count(%d) - "
507 "block_group = %d, es == %p ", ret_block,
508 le32_to_cpu(es->s_blocks_count), group_no, es);
509 goto io_error;
511 block = target_block;
513 /* OK, we _had_ allocated something */
514 ext2_debug("found bit %d\n", ret_block);
516 dq_alloc--;
517 es_alloc--;
518 group_alloc--;
521 * Do block preallocation now if required.
523 write_lock(&EXT2_I(inode)->i_meta_lock);
524 if (group_alloc && !*prealloc_count) {
525 unsigned n;
527 for (n = 0; n < group_alloc && ++ret_block < group_size; n++) {
528 if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no),
529 ret_block,
530 (void*) bitmap_bh->b_data))
531 break;
533 *prealloc_block = block + 1;
534 *prealloc_count = n;
535 es_alloc -= n;
536 dq_alloc -= n;
537 group_alloc -= n;
539 write_unlock(&EXT2_I(inode)->i_meta_lock);
541 mark_buffer_dirty(bitmap_bh);
542 if (sb->s_flags & MS_SYNCHRONOUS)
543 sync_dirty_buffer(bitmap_bh);
545 ext2_debug ("allocating block %d. ", block);
547 *err = 0;
548 out_release:
549 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
550 release_blocks(sb, es_alloc);
551 out_dquot:
552 DQUOT_FREE_BLOCK(inode, dq_alloc);
553 out:
554 brelse(bitmap_bh);
555 return block;
557 io_error:
558 *err = -EIO;
559 goto out_release;
562 #ifdef EXT2FS_DEBUG
564 static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
566 unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
568 unsigned int i;
569 unsigned long sum = 0;
571 if (!map)
572 return (0);
573 for (i = 0; i < numchars; i++)
574 sum += nibblemap[map->b_data[i] & 0xf] +
575 nibblemap[(map->b_data[i] >> 4) & 0xf];
576 return (sum);
579 #endif /* EXT2FS_DEBUG */
581 unsigned long ext2_count_free_blocks (struct super_block * sb)
583 struct ext2_group_desc * desc;
584 unsigned long desc_count = 0;
585 int i;
586 #ifdef EXT2FS_DEBUG
587 unsigned long bitmap_count, x;
588 struct ext2_super_block *es;
590 es = EXT2_SB(sb)->s_es;
591 desc_count = 0;
592 bitmap_count = 0;
593 desc = NULL;
594 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
595 struct buffer_head *bitmap_bh;
596 desc = ext2_get_group_desc (sb, i, NULL);
597 if (!desc)
598 continue;
599 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
600 bitmap_bh = read_block_bitmap(sb, i);
601 if (!bitmap_bh)
602 continue;
604 x = ext2_count_free(bitmap_bh, sb->s_blocksize);
605 printk ("group %d: stored = %d, counted = %lu\n",
606 i, le16_to_cpu(desc->bg_free_blocks_count), x);
607 bitmap_count += x;
608 brelse(bitmap_bh);
610 printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
611 (long)le32_to_cpu(es->s_free_blocks_count),
612 desc_count, bitmap_count);
613 return bitmap_count;
614 #else
615 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
616 desc = ext2_get_group_desc (sb, i, NULL);
617 if (!desc)
618 continue;
619 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
621 return desc_count;
622 #endif
626 static inline int test_root(int a, int b)
628 int num = b;
630 while (a > num)
631 num *= b;
632 return num == a;
635 static int ext2_group_sparse(int group)
637 if (group <= 1)
638 return 1;
639 return (test_root(group, 3) || test_root(group, 5) ||
640 test_root(group, 7));
644 * ext2_bg_has_super - number of blocks used by the superblock in group
645 * @sb: superblock for filesystem
646 * @group: group number to check
648 * Return the number of blocks used by the superblock (primary or backup)
649 * in this group. Currently this will be only 0 or 1.
651 int ext2_bg_has_super(struct super_block *sb, int group)
653 if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
654 !ext2_group_sparse(group))
655 return 0;
656 return 1;
660 * ext2_bg_num_gdb - number of blocks used by the group table in group
661 * @sb: superblock for filesystem
662 * @group: group number to check
664 * Return the number of blocks used by the group descriptor table
665 * (primary or backup) in this group. In the future there may be a
666 * different number of descriptor blocks in each group.
668 unsigned long ext2_bg_num_gdb(struct super_block *sb, int group)
670 if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
671 !ext2_group_sparse(group))
672 return 0;
673 return EXT2_SB(sb)->s_gdb_count;