1 ext4: fix reserved cluster accounting at page invalidation time
3 From: Eric Whitney <enwlinux@gmail.com>
5 Add new code to count canceled pending cluster reservations on bigalloc
6 file systems and to reduce the cluster reservation count on all file
7 systems using delayed allocation. This replaces old code in
8 ext4_da_page_release_reservations that was incorrect.
10 Signed-off-by: Eric Whitney <enwlinux@gmail.com>
11 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
14 fs/ext4/extents_status.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++
15 fs/ext4/inode.c | 23 +++----------
16 3 files changed, 95 insertions(+), 19 deletions(-)
18 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
19 index 0bdbbd151d2c..57cbc98d730f 100644
22 @@ -2491,6 +2491,7 @@ extern int ext4_page_mkwrite(struct vm_fault *vmf);
23 extern int ext4_filemap_fault(struct vm_fault *vmf);
24 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
25 extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
26 +extern void ext4_da_release_space(struct inode *inode, int to_free);
27 extern void ext4_da_update_reserve_space(struct inode *inode,
28 int used, int quota_claim);
29 extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
30 diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
31 index c92fbf444d08..2b439afafe13 100644
32 --- a/fs/ext4/extents_status.c
33 +++ b/fs/ext4/extents_status.c
34 @@ -1780,3 +1780,93 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
35 __remove_pending(inode, last);
40 + * ext4_es_remove_blks - remove block range from extents status tree and
41 + * reduce reservation count or cancel pending
42 + * reservation as needed
44 + * @inode - file containing range
45 + * @lblk - first block in range
46 + * @len - number of blocks to remove
49 +void ext4_es_remove_blks(struct inode *inode, ext4_lblk_t lblk,
52 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
53 + unsigned int clu_size, reserved = 0;
54 + ext4_lblk_t last_lclu, first, length, remainder, last;
57 + struct pending_reservation *pr;
58 + struct ext4_pending_tree *tree;
61 + * Process cluster by cluster for bigalloc - there may be up to
62 + * two clusters in a 4k page with a 1k block size and two blocks
63 + * per cluster. Also necessary for systems with larger page sizes
64 + * and potentially larger block sizes.
66 + clu_size = sbi->s_cluster_ratio;
67 + last_lclu = EXT4_B2C(sbi, lblk + len - 1);
69 + write_lock(&EXT4_I(inode)->i_es_lock);
71 + for (first = lblk, remainder = len;
73 + first += length, remainder -= length) {
75 + if (EXT4_B2C(sbi, first) == last_lclu)
78 + length = clu_size - EXT4_LBLK_COFF(sbi, first);
81 + * The BH_Delay flag, which triggers calls to this function,
82 + * and the contents of the extents status tree can be
83 + * inconsistent due to writepages activity. So, note whether
84 + * the blocks to be removed actually belong to an extent with
85 + * delayed only status.
87 + delonly = __es_scan_clu(inode, &ext4_es_is_delonly, first);
90 + * because of the writepages effect, written and unwritten
91 + * blocks could be removed here
93 + last = first + length - 1;
94 + err = __es_remove_extent(inode, first, last);
96 + ext4_warning(inode->i_sb,
97 + "%s: couldn't remove page (err = %d)",
100 + /* non-bigalloc case: simply count the cluster for release */
101 + if (sbi->s_cluster_ratio == 1 && delonly) {
107 + * bigalloc case: if all delayed allocated only blocks have
108 + * just been removed from a cluster, either cancel a pending
109 + * reservation if it exists or count a cluster for release
112 + !__es_scan_clu(inode, &ext4_es_is_delonly, first)) {
113 + pr = __get_pending(inode, EXT4_B2C(sbi, first));
115 + tree = &EXT4_I(inode)->i_pending_tree;
116 + rb_erase(&pr->rb_node, &tree->root);
117 + kmem_cache_free(ext4_pending_cachep, pr);
124 + write_unlock(&EXT4_I(inode)->i_es_lock);
126 + ext4_da_release_space(inode, reserved);
128 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
129 index 57c6dd38f071..9b69f88bdacc 100644
130 --- a/fs/ext4/inode.c
131 +++ b/fs/ext4/inode.c
132 @@ -1595,7 +1595,7 @@ static int ext4_da_reserve_space(struct inode *inode)
133 return 0; /* success */
136 -static void ext4_da_release_space(struct inode *inode, int to_free)
137 +void ext4_da_release_space(struct inode *inode, int to_free)
139 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
140 struct ext4_inode_info *ei = EXT4_I(inode);
141 @@ -1634,13 +1634,11 @@ static void ext4_da_page_release_reservation(struct page *page,
145 - int to_release = 0, contiguous_blks = 0;
146 + int contiguous_blks = 0;
147 struct buffer_head *head, *bh;
148 unsigned int curr_off = 0;
149 struct inode *inode = page->mapping->host;
150 - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
151 unsigned int stop = offset + length;
155 BUG_ON(stop > PAGE_SIZE || stop < length);
156 @@ -1654,7 +1652,6 @@ static void ext4_da_page_release_reservation(struct page *page,
159 if ((offset <= curr_off) && (buffer_delay(bh))) {
162 clear_buffer_delay(bh);
163 } else if (contiguous_blks) {
164 @@ -1662,7 +1659,7 @@ static void ext4_da_page_release_reservation(struct page *page,
165 (PAGE_SHIFT - inode->i_blkbits);
166 lblk += (curr_off >> inode->i_blkbits) -
168 - ext4_es_remove_extent(inode, lblk, contiguous_blks);
169 + ext4_es_remove_blks(inode, lblk, contiguous_blks);
173 @@ -1671,21 +1668,9 @@ static void ext4_da_page_release_reservation(struct page *page,
174 if (contiguous_blks) {
175 lblk = page->index << (PAGE_SHIFT - inode->i_blkbits);
176 lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
177 - ext4_es_remove_extent(inode, lblk, contiguous_blks);
178 + ext4_es_remove_blks(inode, lblk, contiguous_blks);
181 - /* If we have released all the blocks belonging to a cluster, then we
182 - * need to release the reserved space for that cluster. */
183 - num_clusters = EXT4_NUM_B2C(sbi, to_release);
184 - while (num_clusters > 0) {
185 - lblk = (page->index << (PAGE_SHIFT - inode->i_blkbits)) +
186 - ((num_clusters - 1) << sbi->s_cluster_bits);
187 - if (sbi->s_cluster_ratio == 1 ||
188 - !ext4_es_scan_clu(inode, &ext4_es_is_delayed, lblk))
189 - ext4_da_release_space(inode, 1);