Update delalloc ENOSPC patch comments for clarity
[ext4-patch-queue.git] / ext4-Make-sure-all-the-block-allocation-paths-reser.patch
blob74d2082d5f419a5458588240773ef0cad731cf4d
1 ext4: Make sure all the block allocation paths reserve blocks
3 With delayed allocation we need to make sure block are reserved before
4 we attempt to allocate them. Otherwise we get block allocation failure
5 (ENOSPC) during writepages which cannot be handled. This would mean
6 silent data loss (We do a printk stating data will be lost). This patch
7 updates the DIO and fallocate code path to do block reservation before
8 block allocation. This is needed to make sure parallel DIO and fallocate
9 request doesn't take block out of delayed reserve space.
11 When free blocks count go below a threshold we switch to a slow patch
12 which looks at other CPU's accumulated percpu counter values.
14 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
15 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
16 ---
17 fs/ext4/balloc.c | 58 ++++++++++++++++++++++++++++++++++++++--------------
18 fs/ext4/ext4.h | 13 +++++++++++
19 fs/ext4/inode.c | 5 +---
20 fs/ext4/mballoc.c | 23 +++++++++++---------
21 4 files changed, 69 insertions(+), 30 deletions(-)
23 diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
24 index cfed283..dc10bfd 100644
25 --- a/fs/ext4/balloc.c
26 +++ b/fs/ext4/balloc.c
27 @@ -1602,6 +1602,32 @@ ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
28 return ret;
31 +int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
32 + ext4_fsblk_t nblocks)
34 + s64 free_blocks;
35 + ext4_fsblk_t root_blocks = 0;
36 + struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
38 + free_blocks = percpu_counter_read(fbc);
40 + if (!capable(CAP_SYS_RESOURCE) &&
41 + sbi->s_resuid != current->fsuid &&
42 + (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
43 + root_blocks = ext4_r_blocks_count(sbi->s_es);
45 + if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
46 + free_blocks = percpu_counter_sum(&sbi->s_freeblocks_counter);
48 + if (free_blocks < (root_blocks + nblocks))
49 + /* we don't have free space */
50 + return -ENOSPC;
52 + /* reduce fs free blocks counter */
53 + percpu_counter_sub(fbc, nblocks);
54 + return 0;
57 /**
58 * ext4_has_free_blocks()
59 * @sbi: in-core super block structure.
60 @@ -1623,18 +1649,17 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
61 sbi->s_resuid != current->fsuid &&
62 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
63 root_blocks = ext4_r_blocks_count(sbi->s_es);
64 -#ifdef CONFIG_SMP
65 - if (free_blocks - root_blocks < FBC_BATCH)
66 - free_blocks =
67 - percpu_counter_sum(&sbi->s_freeblocks_counter);
68 -#endif
70 + if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
71 + free_blocks = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
73 if (free_blocks <= root_blocks)
74 /* we don't have free space */
75 return 0;
76 if (free_blocks - root_blocks < nblocks)
77 return free_blocks - root_blocks;
78 return nblocks;
79 - }
83 /**
84 @@ -1713,14 +1738,11 @@ ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
86 * With delalloc we already reserved the blocks
88 - *count = ext4_has_free_blocks(sbi, *count);
89 - }
90 - if (*count == 0) {
91 - *errp = -ENOSPC;
92 - return 0; /*return with ENOSPC error */
93 + if (ext4_claim_free_blocks(sbi, *count)) {
94 + *errp = -ENOSPC;
95 + return 0; /*return with ENOSPC error */
96 + }
98 - num = *count;
101 * Check quota for allocation of this block.
103 @@ -1915,9 +1937,13 @@ ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
104 le16_add_cpu(&gdp->bg_free_blocks_count, -num);
105 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
106 spin_unlock(sb_bgl_lock(sbi, group_no));
107 - if (!EXT4_I(inode)->i_delalloc_reserved_flag)
108 - percpu_counter_sub(&sbi->s_freeblocks_counter, num);
110 + if (!EXT4_I(inode)->i_delalloc_reserved_flag && (*count != num)) {
111 + /*
112 + * we allocated less blocks than we
113 + * claimed. Add the difference back.
114 + */
115 + percpu_counter_add(&sbi->s_freeblocks_counter, *count - num);
117 if (sbi->s_log_groups_per_flex) {
118 ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
119 spin_lock(sb_bgl_lock(sbi, flex_group));
120 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
121 index 7f11b25..71a4fde 100644
122 --- a/fs/ext4/ext4.h
123 +++ b/fs/ext4/ext4.h
124 @@ -1047,6 +1047,8 @@ extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
125 unsigned long *count, int *errp);
126 extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
127 ext4_fsblk_t goal, unsigned long *count, int *errp);
128 +extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
129 + ext4_fsblk_t nblocks);
130 extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
131 ext4_fsblk_t nblocks);
132 extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
133 @@ -1295,6 +1297,17 @@ do { \
134 __ext4_std_error((sb), __func__, (errno)); \
135 } while (0)
137 +#ifdef CONFIG_SMP
138 +/* Each CPU can accumulate FBC_BATCH blocks in their local
139 + * counters. So we need to make sure we have free blocks more
140 + * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times.
141 + */
142 +#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
143 +#else
144 +#define EXT4_FREEBLOCKS_WATERMARK 0
145 +#endif
149 * Inodes and files operations
151 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
152 index 1c289c1..d965a05 100644
153 --- a/fs/ext4/inode.c
154 +++ b/fs/ext4/inode.c
155 @@ -1537,13 +1537,10 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
156 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
157 total = md_needed + nrblocks;
159 - if (ext4_has_free_blocks(sbi, total) < total) {
160 + if (ext4_claim_free_blocks(sbi, total)) {
161 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
162 return -ENOSPC;
164 - /* reduce fs free blocks counter */
165 - percpu_counter_sub(&sbi->s_freeblocks_counter, total);
167 EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
168 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
170 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
171 index 82dd0e4..4404b46 100644
172 --- a/fs/ext4/mballoc.c
173 +++ b/fs/ext4/mballoc.c
174 @@ -2977,9 +2977,15 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
175 * at write_begin() time for delayed allocation
176 * do not double accounting
178 - if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
179 - percpu_counter_sub(&sbi->s_freeblocks_counter,
180 - ac->ac_b_ex.fe_len);
181 + if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED) &&
182 + ac->ac_o_ex.fe_len != ac->ac_b_ex.fe_len) {
183 + /*
184 + * we allocated less blocks than we calimed
185 + * Add the difference back
186 + */
187 + percpu_counter_add(&sbi->s_freeblocks_counter,
188 + ac->ac_o_ex.fe_len - ac->ac_b_ex.fe_len);
191 if (sbi->s_log_groups_per_flex) {
192 ext4_group_t flex_group = ext4_flex_group(sbi,
193 @@ -4391,14 +4397,11 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
195 * With delalloc we already reserved the blocks
197 - ar->len = ext4_has_free_blocks(sbi, ar->len);
200 - if (ar->len == 0) {
201 - *errp = -ENOSPC;
202 - return 0;
203 + if (ext4_claim_free_blocks(sbi, ar->len)) {
204 + *errp = -ENOSPC;
205 + return 0;
209 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
210 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
211 ar->len--;
213 1.6.0.1.90.g27a6e