remove V2 from subject line
[ext4-patch-queue.git] / fix-block-reservation-for-bigalloc-filesystems
blob5530597a1f99eb80378152d4bfcc0d1b51665d61
1 ext4: fix block reservation for bigalloc filesystems
3 From: Jan Kara <jack@suse.cz>
5 For bigalloc filesystems we have to check whether newly requested inode
6 block isn't already part of a cluster for which we already have delayed
7 allocation reservation. This check happens in ext4_ext_map_blocks() and
8 that function sets EXT4_MAP_FROM_CLUSTER if that's the case. However if
9 ext4_da_map_blocks() finds in extent cache information about the block,
10 we don't call into ext4_ext_map_blocks() and thus we always end up
11 getting new reservation even if the space for cluster is already
12 reserved. This results in overreservation and premature ENOSPC reports.
14 Fix the problem by checking for existing cluster reservation already in
15 ext4_da_map_blocks(). That simplifies the logic and actually allows us
16 to get rid of the EXT4_MAP_FROM_CLUSTER flag completely.
18 Signed-off-by: Jan Kara <jack@suse.cz>
19 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
20 ---
21  fs/ext4/ext4.h              | 21 +--------------------
22  fs/ext4/extents.c           | 12 ++++--------
23  fs/ext4/inode.c             | 27 ++++-----------------------
24  include/trace/events/ext4.h |  3 +--
25  4 files changed, 10 insertions(+), 53 deletions(-)
27 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
28 index c55a1faaed58..b28c916340b9 100644
29 --- a/fs/ext4/ext4.h
30 +++ b/fs/ext4/ext4.h
31 @@ -158,17 +158,8 @@ struct ext4_allocation_request {
32  #define EXT4_MAP_MAPPED                (1 << BH_Mapped)
33  #define EXT4_MAP_UNWRITTEN     (1 << BH_Unwritten)
34  #define EXT4_MAP_BOUNDARY      (1 << BH_Boundary)
35 -/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
36 - * ext4_map_blocks wants to know whether or not the underlying cluster has
37 - * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
38 - * the requested mapping was from previously mapped (or delayed allocated)
39 - * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
40 - * should never appear on buffer_head's state flags.
41 - */
42 -#define EXT4_MAP_FROM_CLUSTER  (1 << BH_AllocFromCluster)
43  #define EXT4_MAP_FLAGS         (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
44 -                                EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
45 -                                EXT4_MAP_FROM_CLUSTER)
46 +                                EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY)
48  struct ext4_map_blocks {
49         ext4_fsblk_t m_pblk;
50 @@ -2791,16 +2782,6 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
51  extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
53  /*
54 - * Note that these flags will never ever appear in a buffer_head's state flag.
55 - * See EXT4_MAP_... to see where this is used.
56 - */
57 -enum ext4_state_bits {
58 -       BH_AllocFromCluster     /* allocated blocks were part of already
59 -                                * allocated cluster. */
60 -       = BH_JBDPrivateStart
61 -};
63 -/*
64   * Add new method to test whether block and inode bitmaps are properly
65   * initialized. With uninit_bg reading the block from disk is not enough
66   * to mark the bitmap uptodate. We need to also zero-out the bitmap
67 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
68 index 37043d0b2be8..d8cd229b5c2b 100644
69 --- a/fs/ext4/extents.c
70 +++ b/fs/ext4/extents.c
71 @@ -4268,6 +4268,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
72         ext4_io_end_t *io = ext4_inode_aio(inode);
73         ext4_lblk_t cluster_offset;
74         int set_unwritten = 0;
75 +       bool map_from_cluster = false;
77         ext_debug("blocks %u/%u requested for inode %lu\n",
78                   map->m_lblk, map->m_len, inode->i_ino);
79 @@ -4344,10 +4345,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
80                 }
81         }
83 -       if ((sbi->s_cluster_ratio > 1) &&
84 -           ext4_find_delalloc_cluster(inode, map->m_lblk))
85 -               map->m_flags |= EXT4_MAP_FROM_CLUSTER;
87         /*
88          * requested block isn't allocated yet;
89          * we couldn't try to create block if create flag is zero
90 @@ -4365,7 +4362,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
91         /*
92          * Okay, we need to do block allocation.
93          */
94 -       map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
95         newex.ee_block = cpu_to_le32(map->m_lblk);
96         cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
98 @@ -4377,7 +4373,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
99             get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
100                 ar.len = allocated = map->m_len;
101                 newblock = map->m_pblk;
102 -               map->m_flags |= EXT4_MAP_FROM_CLUSTER;
103 +               map_from_cluster = true;
104                 goto got_allocated_blocks;
105         }
107 @@ -4398,7 +4394,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
108             get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
109                 ar.len = allocated = map->m_len;
110                 newblock = map->m_pblk;
111 -               map->m_flags |= EXT4_MAP_FROM_CLUSTER;
112 +               map_from_cluster = true;
113                 goto got_allocated_blocks;
114         }
116 @@ -4524,7 +4520,7 @@ got_allocated_blocks:
117                  */
118                 reserved_clusters = get_reserved_cluster_alloc(inode,
119                                                 map->m_lblk, allocated);
120 -               if (map->m_flags & EXT4_MAP_FROM_CLUSTER) {
121 +               if (map_from_cluster) {
122                         if (reserved_clusters) {
123                                 /*
124                                  * We have clusters reserved for this range.
125 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
126 index e9777f93cf05..08ccb344a46f 100644
127 --- a/fs/ext4/inode.c
128 +++ b/fs/ext4/inode.c
129 @@ -416,11 +416,6 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
130         }
131         if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
132                 up_read((&EXT4_I(inode)->i_data_sem));
133 -       /*
134 -        * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag
135 -        * because it shouldn't be marked in es_map->m_flags.
136 -        */
137 -       map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY);
139         /*
140          * We don't check m_len because extent will be collpased in status
141 @@ -1434,19 +1429,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
142          * file system block.
143          */
144         down_read(&EXT4_I(inode)->i_data_sem);
145 -       if (ext4_has_inline_data(inode)) {
146 -               /*
147 -                * We will soon create blocks for this page, and let
148 -                * us pretend as if the blocks aren't allocated yet.
149 -                * In case of clusters, we have to handle the work
150 -                * of mapping from cluster so that the reserved space
151 -                * is calculated properly.
152 -                */
153 -               if ((EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) &&
154 -                   ext4_find_delalloc_cluster(inode, map->m_lblk))
155 -                       map->m_flags |= EXT4_MAP_FROM_CLUSTER;
156 +       if (ext4_has_inline_data(inode))
157                 retval = 0;
158 -       } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
159 +       else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
160                 retval = ext4_ext_map_blocks(NULL, inode, map,
161                                              EXT4_GET_BLOCKS_NO_PUT_HOLE);
162         else
163 @@ -1465,7 +1450,8 @@ add_delayed:
164                  * then we don't need to reserve it again. However we still need
165                  * to reserve metadata for every block we're going to write.
166                  */
167 -               if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
168 +               if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 ||
169 +                   !ext4_find_delalloc_cluster(inode, map->m_lblk)) {
170                         ret = ext4_da_reserve_space(inode, iblock);
171                         if (ret) {
172                                 /* not enough space to reserve */
173 @@ -1481,11 +1467,6 @@ add_delayed:
174                         goto out_unlock;
175                 }
177 -               /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
178 -                * and it should not appear on the bh->b_state.
179 -                */
180 -               map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
182                 map_bh(bh, inode->i_sb, invalid_block);
183                 set_buffer_new(bh);
184                 set_buffer_delay(bh);
185 diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
186 index ff4bd1b35246..bb7dcbe99652 100644
187 --- a/include/trace/events/ext4.h
188 +++ b/include/trace/events/ext4.h
189 @@ -50,8 +50,7 @@ struct extent_status;
190         { EXT4_MAP_NEW,         "N" },                  \
191         { EXT4_MAP_MAPPED,      "M" },                  \
192         { EXT4_MAP_UNWRITTEN,   "U" },                  \
193 -       { EXT4_MAP_BOUNDARY,    "B" },                  \
194 -       { EXT4_MAP_FROM_CLUSTER, "C" })
195 +       { EXT4_MAP_BOUNDARY,    "B" })
197  #define show_free_flags(flags) __print_flags(flags, "|",       \
198         { EXT4_FREE_BLOCKS_METADATA,            "METADATA" },   \
199 -- 
200 1.8.1.4
203 To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
204 the body of a message to majordomo@vger.kernel.org
205 More majordomo info at  http://vger.kernel.org/majordomo-info.html