add patch initialize-retries-variable-in-ext4_da_write_inline_begin
[ext4-patch-queue.git] / fix-reserved-cluster-accounting-at-page-invalidation-time
blobecd4e9d0435d801de391bcd0680a6e5c22eb3d9d
1 ext4: fix reserved cluster accounting at page invalidation time
3 From: Eric Whitney <enwlinux@gmail.com>
5 Add new code to count canceled pending cluster reservations on bigalloc
6 file systems and to reduce the cluster reservation count on all file
7 systems using delayed allocation.  This replaces old code in
8 ext4_da_page_release_reservations that was incorrect.
10 Signed-off-by: Eric Whitney <enwlinux@gmail.com>
11 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
12 ---
13  fs/ext4/ext4.h           |  1 +
14  fs/ext4/extents_status.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++
15  fs/ext4/inode.c          | 23 +++----------
16  3 files changed, 95 insertions(+), 19 deletions(-)
18 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
19 index 0bdbbd151d2c..57cbc98d730f 100644
20 --- a/fs/ext4/ext4.h
21 +++ b/fs/ext4/ext4.h
22 @@ -2491,6 +2491,7 @@ extern int ext4_page_mkwrite(struct vm_fault *vmf);
23  extern int ext4_filemap_fault(struct vm_fault *vmf);
24  extern qsize_t *ext4_get_reserved_space(struct inode *inode);
25  extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
26 +extern void ext4_da_release_space(struct inode *inode, int to_free);
27  extern void ext4_da_update_reserve_space(struct inode *inode,
28                                         int used, int quota_claim);
29  extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
30 diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
31 index c92fbf444d08..2b439afafe13 100644
32 --- a/fs/ext4/extents_status.c
33 +++ b/fs/ext4/extents_status.c
34 @@ -1780,3 +1780,93 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
35                         __remove_pending(inode, last);
36         }
37  }
39 +/*
40 + * ext4_es_remove_blks - remove block range from extents status tree and
41 + *                       reduce reservation count or cancel pending
42 + *                       reservation as needed
43 + *
44 + * @inode - file containing range
45 + * @lblk - first block in range
46 + * @len - number of blocks to remove
47 + *
48 + */
49 +void ext4_es_remove_blks(struct inode *inode, ext4_lblk_t lblk,
50 +                        ext4_lblk_t len)
52 +       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
53 +       unsigned int clu_size, reserved = 0;
54 +       ext4_lblk_t last_lclu, first, length, remainder, last;
55 +       bool delonly;
56 +       int err = 0;
57 +       struct pending_reservation *pr;
58 +       struct ext4_pending_tree *tree;
60 +       /*
61 +        * Process cluster by cluster for bigalloc - there may be up to
62 +        * two clusters in a 4k page with a 1k block size and two blocks
63 +        * per cluster.  Also necessary for systems with larger page sizes
64 +        * and potentially larger block sizes.
65 +        */
66 +       clu_size = sbi->s_cluster_ratio;
67 +       last_lclu = EXT4_B2C(sbi, lblk + len - 1);
69 +       write_lock(&EXT4_I(inode)->i_es_lock);
71 +       for (first = lblk, remainder = len;
72 +            remainder > 0;
73 +            first += length, remainder -= length) {
75 +               if (EXT4_B2C(sbi, first) == last_lclu)
76 +                       length = remainder;
77 +               else
78 +                       length = clu_size - EXT4_LBLK_COFF(sbi, first);
80 +               /*
81 +                * The BH_Delay flag, which triggers calls to this function,
82 +                * and the contents of the extents status tree can be
83 +                * inconsistent due to writepages activity. So, note whether
84 +                * the blocks to be removed actually belong to an extent with
85 +                * delayed only status.
86 +                */
87 +               delonly = __es_scan_clu(inode, &ext4_es_is_delonly, first);
89 +               /*
90 +                * because of the writepages effect, written and unwritten
91 +                * blocks could be removed here
92 +                */
93 +               last = first + length - 1;
94 +               err = __es_remove_extent(inode, first, last);
95 +               if (err)
96 +                       ext4_warning(inode->i_sb,
97 +                                    "%s: couldn't remove page (err = %d)",
98 +                                    __func__, err);
100 +               /* non-bigalloc case: simply count the cluster for release */
101 +               if (sbi->s_cluster_ratio == 1 && delonly) {
102 +                       reserved++;
103 +                       continue;
104 +               }
106 +               /*
107 +                * bigalloc case: if all delayed allocated only blocks have
108 +                * just been removed from a cluster, either cancel a pending
109 +                * reservation if it exists or count a cluster for release
110 +                */
111 +               if (delonly &&
112 +                   !__es_scan_clu(inode, &ext4_es_is_delonly, first)) {
113 +                       pr = __get_pending(inode, EXT4_B2C(sbi, first));
114 +                       if (pr != NULL) {
115 +                               tree = &EXT4_I(inode)->i_pending_tree;
116 +                               rb_erase(&pr->rb_node, &tree->root);
117 +                               kmem_cache_free(ext4_pending_cachep, pr);
118 +                       } else {
119 +                               reserved++;
120 +                       }
121 +               }
122 +       }
124 +       write_unlock(&EXT4_I(inode)->i_es_lock);
126 +       ext4_da_release_space(inode, reserved);
128 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
129 index 57c6dd38f071..9b69f88bdacc 100644
130 --- a/fs/ext4/inode.c
131 +++ b/fs/ext4/inode.c
132 @@ -1595,7 +1595,7 @@ static int ext4_da_reserve_space(struct inode *inode)
133         return 0;       /* success */
136 -static void ext4_da_release_space(struct inode *inode, int to_free)
137 +void ext4_da_release_space(struct inode *inode, int to_free)
139         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
140         struct ext4_inode_info *ei = EXT4_I(inode);
141 @@ -1634,13 +1634,11 @@ static void ext4_da_page_release_reservation(struct page *page,
142                                              unsigned int offset,
143                                              unsigned int length)
145 -       int to_release = 0, contiguous_blks = 0;
146 +       int contiguous_blks = 0;
147         struct buffer_head *head, *bh;
148         unsigned int curr_off = 0;
149         struct inode *inode = page->mapping->host;
150 -       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
151         unsigned int stop = offset + length;
152 -       int num_clusters;
153         ext4_fsblk_t lblk;
155         BUG_ON(stop > PAGE_SIZE || stop < length);
156 @@ -1654,7 +1652,6 @@ static void ext4_da_page_release_reservation(struct page *page,
157                         break;
159                 if ((offset <= curr_off) && (buffer_delay(bh))) {
160 -                       to_release++;
161                         contiguous_blks++;
162                         clear_buffer_delay(bh);
163                 } else if (contiguous_blks) {
164 @@ -1662,7 +1659,7 @@ static void ext4_da_page_release_reservation(struct page *page,
165                                (PAGE_SHIFT - inode->i_blkbits);
166                         lblk += (curr_off >> inode->i_blkbits) -
167                                 contiguous_blks;
168 -                       ext4_es_remove_extent(inode, lblk, contiguous_blks);
169 +                       ext4_es_remove_blks(inode, lblk, contiguous_blks);
170                         contiguous_blks = 0;
171                 }
172                 curr_off = next_off;
173 @@ -1671,21 +1668,9 @@ static void ext4_da_page_release_reservation(struct page *page,
174         if (contiguous_blks) {
175                 lblk = page->index << (PAGE_SHIFT - inode->i_blkbits);
176                 lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
177 -               ext4_es_remove_extent(inode, lblk, contiguous_blks);
178 +               ext4_es_remove_blks(inode, lblk, contiguous_blks);
179         }
181 -       /* If we have released all the blocks belonging to a cluster, then we
182 -        * need to release the reserved space for that cluster. */
183 -       num_clusters = EXT4_NUM_B2C(sbi, to_release);
184 -       while (num_clusters > 0) {
185 -               lblk = (page->index << (PAGE_SHIFT - inode->i_blkbits)) +
186 -                       ((num_clusters - 1) << sbi->s_cluster_bits);
187 -               if (sbi->s_cluster_ratio == 1 ||
188 -                   !ext4_es_scan_clu(inode, &ext4_es_is_delayed, lblk))
189 -                       ext4_da_release_space(inode, 1);
191 -               num_clusters--;
192 -       }
195  /*
196 -- 
197 2.11.0