Fix random whitespace issues and other checkpatch flameage. Also made
[ext4-patch-queue.git] / ext4_ialloc-flexbg.patch
blob2734dac38537f67a04ec5fc7d647793e6e57d5e3
1 ext4: New inode allocation for FLEX_BG meta-data groups.
3 From: Jose R. Santos <jrs@us.ibm.com>
5 This patch mostly controls the way inode are allocated in order to
6 make ialloc aware of flex_bg block group grouping. It achieves this
7 by bypassing the Orlov allocator when block group meta-data are packed
8 toghether through mke2fs. Since the impact on the block allocator is
9 minimal, this patch should have little or no effect on other block
10 allocation algorithms. By controlling the inode allocation, it can
11 basically control where the initial search for new block begins and
12 thus indirectly manipulate the block allocator.
14 This allocator favors data and meta-data locality so the disk will
15 gradually be filled from block group zero upward. This helps improve
16 performance by reducing seek time. Since the group of inode tables
17 within one flex_bg are treated as one giant inode table, uninitialized
18 block groups would not need to partially initialize as many inode
19 table as with Orlov which would help fsck time as the filesystem usage
20 goes up.
22 Signed-off-by: Jose R. Santos <jrs@us.ibm.com>
23 Signed-off-by: Valerie Clement <valerie.clement@bull.net>
24 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
25 ---
27 fs/ext4/balloc.c | 14 +++++++
28 fs/ext4/ext4.h | 25 +++++++++++++-
29 fs/ext4/ext4_sb.h | 3 +
30 fs/ext4/ialloc.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
31 fs/ext4/mballoc.c | 15 ++++++++
32 fs/ext4/super.c | 57 ++++++++++++++++++++++++++++++++
33 6 files changed, 209 insertions(+), 1 deletion(-)
35 Index: linux-2.6.26-rc6/fs/ext4/balloc.c
36 ===================================================================
37 --- linux-2.6.26-rc6.orig/fs/ext4/balloc.c 2008-06-17 10:43:26.000000000 -0700
38 +++ linux-2.6.26-rc6/fs/ext4/balloc.c 2008-06-17 10:43:27.000000000 -0700
39 @@ -809,6 +809,13 @@ do_more:
40 spin_unlock(sb_bgl_lock(sbi, block_group));
41 percpu_counter_add(&sbi->s_freeblocks_counter, count);
43 + if (sbi->s_log_groups_per_flex) {
44 + ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
45 + spin_lock(sb_bgl_lock(sbi, flex_group));
46 + sbi->s_flex_groups[flex_group].free_blocks += count;
47 + spin_unlock(sb_bgl_lock(sbi, flex_group));
48 + }
50 /* We dirtied the bitmap block */
51 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
52 err = ext4_journal_dirty_metadata(handle, bitmap_bh);
53 @@ -1883,6 +1890,13 @@ allocated:
54 spin_unlock(sb_bgl_lock(sbi, group_no));
55 percpu_counter_sub(&sbi->s_freeblocks_counter, num);
57 + if (sbi->s_log_groups_per_flex) {
58 + ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
59 + spin_lock(sb_bgl_lock(sbi, flex_group));
60 + sbi->s_flex_groups[flex_group].free_blocks -= num;
61 + spin_unlock(sb_bgl_lock(sbi, flex_group));
62 + }
64 BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
65 err = ext4_journal_dirty_metadata(handle, gdp_bh);
66 if (!fatal)
67 Index: linux-2.6.26-rc6/fs/ext4/ext4.h
68 ===================================================================
69 --- linux-2.6.26-rc6.orig/fs/ext4/ext4.h 2008-06-17 10:43:22.000000000 -0700
70 +++ linux-2.6.26-rc6/fs/ext4/ext4.h 2008-06-17 10:43:27.000000000 -0700
71 @@ -170,6 +170,15 @@ struct ext4_group_desc
72 __u32 bg_reserved2[3];
75 +/*
76 + * Structure of a flex block group info
77 + */
79 +struct flex_groups {
80 + __u32 free_inodes;
81 + __u32 free_blocks;
82 +};
84 #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
85 #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
86 #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
87 @@ -647,7 +656,10 @@ struct ext4_super_block {
88 __le16 s_mmp_interval; /* # seconds to wait in MMP checking */
89 __le64 s_mmp_block; /* Block for multi-mount protection */
90 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
91 - __u32 s_reserved[163]; /* Padding to the end of the block */
92 + __u8 s_log_groups_per_flex; /* FLEX_BG group size */
93 + __u8 s_reserved_char_pad2;
94 + __le16 s_reserved_pad;
95 + __u32 s_reserved[162]; /* Padding to the end of the block */
98 #ifdef __KERNEL__
99 @@ -1159,6 +1171,17 @@ struct ext4_group_info *ext4_get_group_i
103 +static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
104 + ext4_group_t block_group)
106 + return block_group >> sbi->s_log_groups_per_flex;
109 +static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi)
111 + return 1 << sbi->s_log_groups_per_flex;
114 #define ext4_std_error(sb, errno) \
115 do { \
116 if ((errno)) \
117 Index: linux-2.6.26-rc6/fs/ext4/ext4_sb.h
118 ===================================================================
119 --- linux-2.6.26-rc6.orig/fs/ext4/ext4_sb.h 2008-06-17 10:43:22.000000000 -0700
120 +++ linux-2.6.26-rc6/fs/ext4/ext4_sb.h 2008-06-17 10:43:27.000000000 -0700
121 @@ -143,6 +143,9 @@ struct ext4_sb_info {
123 /* locality groups */
124 struct ext4_locality_group *s_locality_groups;
126 + unsigned int s_log_groups_per_flex;
127 + struct flex_groups *s_flex_groups;
130 #endif /* _EXT4_SB */
131 Index: linux-2.6.26-rc6/fs/ext4/ialloc.c
132 ===================================================================
133 --- linux-2.6.26-rc6.orig/fs/ext4/ialloc.c 2008-06-17 10:43:26.000000000 -0700
134 +++ linux-2.6.26-rc6/fs/ext4/ialloc.c 2008-06-17 10:43:27.000000000 -0700
135 @@ -157,6 +157,7 @@ void ext4_free_inode (handle_t *handle,
136 struct ext4_super_block * es;
137 struct ext4_sb_info *sbi;
138 int fatal = 0, err;
139 + ext4_group_t flex_group;
141 if (atomic_read(&inode->i_count) > 1) {
142 printk ("ext4_free_inode: inode has count=%d\n",
143 @@ -232,6 +233,12 @@ void ext4_free_inode (handle_t *handle,
144 if (is_directory)
145 percpu_counter_dec(&sbi->s_dirs_counter);
147 + if (sbi->s_log_groups_per_flex) {
148 + flex_group = ext4_flex_group(sbi, block_group);
149 + spin_lock(sb_bgl_lock(sbi, flex_group));
150 + sbi->s_flex_groups[flex_group].free_inodes++;
151 + spin_unlock(sb_bgl_lock(sbi, flex_group));
154 BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
155 err = ext4_journal_dirty_metadata(handle, bh2);
156 @@ -286,6 +293,80 @@ static int find_group_dir(struct super_b
157 return ret;
160 +#define free_block_ratio 10
162 +static int find_group_flex(struct super_block *sb, struct inode *parent,
163 + ext4_group_t *best_group)
165 + struct ext4_sb_info *sbi = EXT4_SB(sb);
166 + struct ext4_group_desc *desc;
167 + struct buffer_head *bh;
168 + struct flex_groups *flex_group = sbi->s_flex_groups;
169 + ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
170 + ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
171 + ext4_group_t ngroups = sbi->s_groups_count;
172 + int flex_size = ext4_flex_bg_size(sbi);
173 + ext4_group_t best_flex = parent_fbg_group;
174 + int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
175 + int flexbg_free_blocks;
176 + int flex_freeb_ratio;
177 + ext4_group_t n_fbg_groups;
178 + ext4_group_t i;
180 + n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >>
181 + sbi->s_log_groups_per_flex;
183 +find_close_to_parent:
184 + flexbg_free_blocks = flex_group[best_flex].free_blocks;
185 + flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
186 + if (flex_group[best_flex].free_inodes &&
187 + flex_freeb_ratio > free_block_ratio)
188 + goto found_flexbg;
190 + if (best_flex && best_flex == parent_fbg_group) {
191 + best_flex--;
192 + goto find_close_to_parent;
195 + for (i = 0; i < n_fbg_groups; i++) {
196 + if (i == parent_fbg_group || i == parent_fbg_group - 1)
197 + continue;
199 + flexbg_free_blocks = flex_group[i].free_blocks;
200 + flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
202 + if (flex_freeb_ratio > free_block_ratio &&
203 + flex_group[i].free_inodes) {
204 + best_flex = i;
205 + goto found_flexbg;
208 + if (best_flex < 0 ||
209 + (flex_group[i].free_blocks >
210 + flex_group[best_flex].free_blocks &&
211 + flex_group[i].free_inodes))
212 + best_flex = i;
215 + if (!flex_group[best_flex].free_inodes ||
216 + !flex_group[best_flex].free_blocks)
217 + return -1;
219 +found_flexbg:
220 + for (i = best_flex * flex_size; i < ngroups &&
221 + i < (best_flex + 1) * flex_size; i++) {
222 + desc = ext4_get_group_desc(sb, i, &bh);
223 + if (le16_to_cpu(desc->bg_free_inodes_count)) {
224 + *best_group = i;
225 + goto out;
229 + return -1;
230 +out:
231 + return 0;
235 * Orlov's allocator for directories.
237 @@ -501,6 +582,7 @@ struct inode *ext4_new_inode(handle_t *h
238 struct inode *ret;
239 ext4_group_t i;
240 int free = 0;
241 + ext4_group_t flex_group;
243 /* Cannot create files in a deleted directory */
244 if (!dir || !dir->i_nlink)
245 @@ -514,6 +596,12 @@ struct inode *ext4_new_inode(handle_t *h
247 sbi = EXT4_SB(sb);
248 es = sbi->s_es;
250 + if (sbi->s_log_groups_per_flex) {
251 + ret2 = find_group_flex(sb, dir, &group);
252 + goto got_group;
255 if (S_ISDIR(mode)) {
256 if (test_opt (sb, OLDALLOC))
257 ret2 = find_group_dir(sb, dir, &group);
258 @@ -522,6 +610,7 @@ struct inode *ext4_new_inode(handle_t *h
259 } else
260 ret2 = find_group_other(sb, dir, &group);
262 +got_group:
263 err = -ENOSPC;
264 if (ret2 == -1)
265 goto out;
266 @@ -676,6 +765,13 @@ got:
267 percpu_counter_inc(&sbi->s_dirs_counter);
268 sb->s_dirt = 1;
270 + if (sbi->s_log_groups_per_flex) {
271 + flex_group = ext4_flex_group(sbi, group);
272 + spin_lock(sb_bgl_lock(sbi, flex_group));
273 + sbi->s_flex_groups[flex_group].free_inodes--;
274 + spin_unlock(sb_bgl_lock(sbi, flex_group));
277 inode->i_uid = current->fsuid;
278 if (test_opt (sb, GRPID))
279 inode->i_gid = dir->i_gid;
280 Index: linux-2.6.26-rc6/fs/ext4/mballoc.c
281 ===================================================================
282 --- linux-2.6.26-rc6.orig/fs/ext4/mballoc.c 2008-06-17 10:43:26.000000000 -0700
283 +++ linux-2.6.26-rc6/fs/ext4/mballoc.c 2008-06-17 10:43:27.000000000 -0700
284 @@ -2834,6 +2834,14 @@ ext4_mb_mark_diskspace_used(struct ext4_
285 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
286 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
288 + if (sbi->s_log_groups_per_flex) {
289 + ext4_group_t flex_group = ext4_flex_group(sbi,
290 + ac->ac_b_ex.fe_group);
291 + spin_lock(sb_bgl_lock(sbi, flex_group));
292 + sbi->s_flex_groups[flex_group].free_blocks -= ac->ac_b_ex.fe_len;
293 + spin_unlock(sb_bgl_lock(sbi, flex_group));
296 err = ext4_journal_dirty_metadata(handle, bitmap_bh);
297 if (err)
298 goto out_err;
299 @@ -4337,6 +4345,13 @@ do_more:
300 spin_unlock(sb_bgl_lock(sbi, block_group));
301 percpu_counter_add(&sbi->s_freeblocks_counter, count);
303 + if (sbi->s_log_groups_per_flex) {
304 + ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
305 + spin_lock(sb_bgl_lock(sbi, flex_group));
306 + sbi->s_flex_groups[flex_group].free_blocks += count;
307 + spin_unlock(sb_bgl_lock(sbi, flex_group));
310 ext4_mb_release_desc(&e4b);
312 *freed += count;
313 Index: linux-2.6.26-rc6/fs/ext4/super.c
314 ===================================================================
315 --- linux-2.6.26-rc6.orig/fs/ext4/super.c 2008-06-17 10:43:26.000000000 -0700
316 +++ linux-2.6.26-rc6/fs/ext4/super.c 2008-06-17 10:43:27.000000000 -0700
317 @@ -517,6 +517,7 @@ static void ext4_put_super (struct super
318 for (i = 0; i < sbi->s_gdb_count; i++)
319 brelse(sbi->s_group_desc[i]);
320 kfree(sbi->s_group_desc);
321 + kfree(sbi->s_flex_groups);
322 percpu_counter_destroy(&sbi->s_freeblocks_counter);
323 percpu_counter_destroy(&sbi->s_freeinodes_counter);
324 percpu_counter_destroy(&sbi->s_dirs_counter);
325 @@ -1442,6 +1443,54 @@ static int ext4_setup_super(struct super
326 return res;
329 +static int ext4_fill_flex_info(struct super_block *sb)
331 + struct ext4_sb_info *sbi = EXT4_SB(sb);
332 + struct ext4_group_desc *gdp = NULL;
333 + struct buffer_head *bh;
334 + ext4_group_t flex_group_count;
335 + ext4_group_t flex_group;
336 + int groups_per_flex = 0;
337 + __u64 block_bitmap = 0;
338 + int i;
340 + if (!sbi->s_es->s_log_groups_per_flex) {
341 + sbi->s_log_groups_per_flex = 0;
342 + return 1;
345 + sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
346 + groups_per_flex = 1 << sbi->s_log_groups_per_flex;
348 + flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
349 + groups_per_flex;
350 + sbi->s_flex_groups = kmalloc(flex_group_count *
351 + sizeof(struct flex_groups), GFP_KERNEL);
352 + if (sbi->s_flex_groups == NULL) {
353 + printk(KERN_ERR "EXT4-fs: not enough memory\n");
354 + goto failed;
356 + memset(sbi->s_flex_groups, 0, flex_group_count *
357 + sizeof(struct flex_groups));
359 + gdp = ext4_get_group_desc(sb, 1, &bh);
360 + block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
362 + for (i = 0; i < sbi->s_groups_count; i++) {
363 + gdp = ext4_get_group_desc(sb, i, &bh);
365 + flex_group = ext4_flex_group(sbi, i);
366 + sbi->s_flex_groups[flex_group].free_inodes +=
367 + le16_to_cpu(gdp->bg_free_inodes_count);
368 + sbi->s_flex_groups[flex_group].free_blocks +=
369 + le16_to_cpu(gdp->bg_free_blocks_count);
372 + return 1;
373 +failed:
374 + return 0;
377 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
378 struct ext4_group_desc *gdp)
380 @@ -2137,6 +2186,14 @@ static int ext4_fill_super (struct super
381 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
382 goto failed_mount2;
384 + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
385 + if (!ext4_fill_flex_info(sb)) {
386 + printk(KERN_ERR
387 + "EXT4-fs: unable to initialize "
388 + "flex_bg meta info!\n");
389 + goto failed_mount2;
392 sbi->s_gdb_count = db_count;
393 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
394 spin_lock_init(&sbi->s_next_gen_lock);