Fix ext3-4-migrate.patch and mballoc-core.patches
[ext4-patch-queue.git] / ext4-block-reservation.patch
blobc807320f17946768e62d5dfdd5ea21d084ec5050
1 ext4: [RFC] free space management for delayed allocation
3 From: Alex Tomas <alex@clusterfs.com>
5 ext4-block-reservation.patch
7 this is scalable free space management. every time we
8 delay allocation of some page, a space (including metadata)
9 should be reserved
11 Signed-off-by: Alex Tomas <alex@clusterfs.com>
13 ---
14 ---
15 fs/ext4/balloc.c | 176 ++++++++++++++++++++++++++++++++++++++++++++-
16 fs/ext4/super.c | 2
17 include/linux/ext4_fs.h | 5 +
18 include/linux/ext4_fs_sb.h | 5 +
19 4 files changed, 185 insertions(+), 3 deletions(-)
21 Index: linux-2.6.23-rc2/fs/ext4/balloc.c
22 ===================================================================
23 --- linux-2.6.23-rc2.orig/fs/ext4/balloc.c 2007-08-06 22:18:09.000000000 -0700
24 +++ linux-2.6.23-rc2/fs/ext4/balloc.c 2007-08-06 22:18:59.000000000 -0700
25 @@ -630,8 +630,10 @@ void ext4_free_blocks(handle_t *handle,
26 return;
28 ext4_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
29 - if (dquot_freed_blocks)
30 + if (dquot_freed_blocks) {
31 + ext4_release_blocks(sb, dquot_freed_blocks);
32 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
33 + }
34 return;
37 @@ -1440,7 +1442,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *h
38 struct ext4_sb_info *sbi;
39 struct ext4_reserve_window_node *my_rsv = NULL;
40 struct ext4_block_alloc_info *block_i;
41 - unsigned short windowsz = 0;
42 + unsigned short windowsz = 0, reserved = 0;
43 #ifdef EXT4FS_DEBUG
44 static int goal_hits, goal_attempts;
45 #endif
46 @@ -1462,6 +1464,13 @@ ext4_fsblk_t ext4_new_blocks(handle_t *h
47 return 0;
50 + if (!(EXT4_I(inode)->i_state & EXT4_STATE_BLOCKS_RESERVED)) {
51 + *errp = ext4_reserve_blocks(sb, num);
52 + if (*errp)
53 + return 0;
54 + reserved = num;
55 + }
57 sbi = EXT4_SB(sb);
58 es = EXT4_SB(sb)->s_es;
59 ext4_debug("goal=%lu.\n", goal);
60 @@ -1674,8 +1683,11 @@ out:
62 * Undo the block allocation
64 - if (!performed_allocation)
65 + if (!performed_allocation) {
66 DQUOT_FREE_BLOCK(inode, *count);
67 + if (reserved)
68 + ext4_release_blocks(sb, reserved);
69 + }
70 brelse(bitmap_bh);
71 return 0;
73 @@ -1834,3 +1846,161 @@ unsigned long ext4_bg_num_gdb(struct sup
74 return ext4_bg_num_gdb_meta(sb,group);
78 +/*
79 + * reservation.c contains routines to reserve blocks.
80 + * we need this for delayed allocation, otherwise we
81 + * could meet -ENOSPC at flush time
82 + */
84 +/*
85 + * as ->commit_write() where we're going to reserve
86 + * non-allocated-yet blocks is well known hotpath,
87 + * we have to make it scalable and avoid global
88 + * data as much as possible
89 + *
90 + * there is per-sb array
91 + */
93 +struct ext4_reservation_slot {
94 + __u64 rs_reserved;
95 + spinlock_t rs_lock;
96 +} ____cacheline_aligned;
99 +int ext4_reserve_local(struct super_block *sb, int blocks)
101 + struct ext4_sb_info *sbi = EXT4_SB(sb);
102 + struct ext4_reservation_slot *rs;
103 + int rc = -ENOSPC;
105 + preempt_disable();
106 + rs = sbi->s_reservation_slots + smp_processor_id();
108 + spin_lock(&rs->rs_lock);
109 + if (likely(rs->rs_reserved >= blocks)) {
110 + rs->rs_reserved -= blocks;
111 + rc = 0;
113 + spin_unlock(&rs->rs_lock);
115 + preempt_enable();
116 + return rc;
120 +void ext4_rebalance_reservation(struct ext4_reservation_slot *rs, __u64 free)
122 + int i, used_slots = 0;
123 + __u64 chunk;
125 + /* let's know what slots have been used */
126 + for (i = 0; i < NR_CPUS; i++)
127 + if (rs[i].rs_reserved || i == smp_processor_id())
128 + used_slots++;
130 + /* chunk is a number of block every used
131 + * slot will get. make sure it isn't 0 */
132 + chunk = free + used_slots - 1;
133 + do_div(chunk, used_slots);
135 + for (i = 0; i < NR_CPUS; i++) {
136 + if (free < chunk)
137 + chunk = free;
138 + if (rs[i].rs_reserved || i == smp_processor_id()) {
139 + rs[i].rs_reserved = chunk;
140 + free -= chunk;
141 + BUG_ON(free < 0);
144 + BUG_ON(free);
147 +int ext4_reserve_global(struct super_block *sb, int blocks)
149 + struct ext4_sb_info *sbi = EXT4_SB(sb);
150 + struct ext4_reservation_slot *rs;
151 + int i, rc = -ENOENT;
152 + __u64 free = 0;
154 + rs = sbi->s_reservation_slots;
156 + /* lock all slots */
157 + for (i = 0; i < NR_CPUS; i++) {
158 + spin_lock(&rs[i].rs_lock);
159 + free += rs[i].rs_reserved;
162 + if (free >= blocks) {
163 + free -= blocks;
164 + ext4_rebalance_reservation(rs, free);
165 + rc = 0;
168 + for (i = 0; i < NR_CPUS; i++)
169 + spin_unlock(&rs[i].rs_lock);
171 + return rc;
174 +int ext4_reserve_blocks(struct super_block *sb, int blocks)
176 + int ret;
178 + BUG_ON(blocks <= 0);
180 + ret = ext4_reserve_local(sb, blocks);
181 + if (likely(ret == 0))
182 + return 0;
184 + return ext4_reserve_global(sb, blocks);
187 +void ext4_release_blocks(struct super_block *sb, int blocks)
189 + struct ext4_sb_info *sbi = EXT4_SB(sb);
190 + struct ext4_reservation_slot *rs;
192 + BUG_ON(blocks <= 0);
194 + preempt_disable();
195 + rs = sbi->s_reservation_slots + smp_processor_id();
197 + spin_lock(&rs->rs_lock);
198 + rs->rs_reserved += blocks;
199 + spin_unlock(&rs->rs_lock);
201 + preempt_enable();
204 +int ext4_reserve_init(struct super_block *sb)
206 + struct ext4_sb_info *sbi = EXT4_SB(sb);
207 + struct ext4_reservation_slot *rs;
208 + int i;
210 + rs = kmalloc(sizeof(struct ext4_reservation_slot)*NR_CPUS, GFP_KERNEL);
211 + if (rs == NULL)
212 + return -ENOMEM;
213 + sbi->s_reservation_slots = rs;
215 + for (i = 0; i < NR_CPUS; i++) {
216 + spin_lock_init(&rs[i].rs_lock);
217 + rs[i].rs_reserved = 0;
219 + rs[0].rs_reserved = percpu_counter_sum(&sbi->s_freeblocks_counter);
221 + return 0;
224 +void ext4_reserve_release(struct super_block *sb)
226 + struct ext4_sb_info *sbi = EXT4_SB(sb);
227 + struct ext4_reservation_slot *rs;
229 + rs = sbi->s_reservation_slots;
230 + BUG_ON(sbi->s_reservation_slots == NULL);
231 + kfree(sbi->s_reservation_slots);
232 + sbi->s_reservation_slots = NULL;
235 Index: linux-2.6.23-rc2/fs/ext4/super.c
236 ===================================================================
237 --- linux-2.6.23-rc2.orig/fs/ext4/super.c 2007-08-06 22:18:49.000000000 -0700
238 +++ linux-2.6.23-rc2/fs/ext4/super.c 2007-08-06 22:18:59.000000000 -0700
239 @@ -441,6 +441,7 @@ static void ext4_put_super (struct super
240 struct ext4_super_block *es = sbi->s_es;
241 int i;
243 + ext4_reserve_release(sb);
244 ext4_ext_release(sb);
245 ext4_xattr_put_super(sb);
246 jbd2_journal_destroy(sbi->s_journal);
247 @@ -1948,6 +1949,7 @@ static int ext4_fill_super (struct super
248 "writeback");
250 ext4_ext_init(sb);
251 + ext4_reserve_init(sb);
253 lock_kernel();
254 return 0;
255 Index: linux-2.6.23-rc2/include/linux/ext4_fs.h
256 ===================================================================
257 --- linux-2.6.23-rc2.orig/include/linux/ext4_fs.h 2007-08-06 22:18:46.000000000 -0700
258 +++ linux-2.6.23-rc2/include/linux/ext4_fs.h 2007-08-06 22:18:59.000000000 -0700
259 @@ -203,6 +203,7 @@ struct ext4_group_desc
260 #define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
261 #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
262 #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
263 +#define EXT4_STATE_BLOCKS_RESERVED 0x00000010 /* blocks reserved */
265 /* Used to pass group descriptor data when online resize is done */
266 struct ext4_new_group_input {
267 @@ -912,6 +913,10 @@ extern struct ext4_group_desc * ext4_get
268 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
269 extern void ext4_init_block_alloc_info(struct inode *);
270 extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
271 +int ext4_reserve_init(struct super_block *sb);
272 +void ext4_reserve_release(struct super_block *sb);
273 +void ext4_release_blocks(struct super_block *sb, int blocks);
274 +int ext4_reserve_blocks(struct super_block *sb, int blocks);
276 /* dir.c */
277 extern int ext4_check_dir_entry(const char *, struct inode *,
278 Index: linux-2.6.23-rc2/include/linux/ext4_fs_sb.h
279 ===================================================================
280 --- linux-2.6.23-rc2.orig/include/linux/ext4_fs_sb.h 2007-08-06 22:18:09.000000000 -0700
281 +++ linux-2.6.23-rc2/include/linux/ext4_fs_sb.h 2007-08-06 22:18:59.000000000 -0700
282 @@ -24,6 +24,8 @@
283 #endif
284 #include <linux/rbtree.h>
286 +struct ext4_reservation_slot;
289 * third extended-fs super-block data in memory
291 @@ -67,6 +69,9 @@ struct ext4_sb_info {
292 struct rb_root s_rsv_window_root;
293 struct ext4_reserve_window_node s_rsv_window_head;
295 + /* global reservation structures */
296 + struct ext4_reservation_slot *s_reservation_slots;
298 /* Journaling */
299 struct inode * s_journal_inode;
300 struct journal_s * s_journal;