add patch backwards-compatibility-support-for-luster-ea_inode
[ext4-patch-queue.git] / backwards-compatibility-support-for-luster-ea_inode
blob924a5d1a5dd113fe53efbed93510cbbfebe66d85
1 ext4: backward compatibility support for Lustre ea_inode implementation
3 From: Tahsin Erdogan <tahsin@google.com>
5 Original Lustre ea_inode feature did not have ref counts on xattr inodes
6 because there was always one parent that referenced it. New
7 implementation expects ref count to be initialized which is not true for
8 Lustre case. Handle this by detecting Lustre created xattr inode and set
9 its ref count to 1.
11 The quota handling of xattr inodes have also changed with deduplication
12 support. New implementation manually manages quotas to support sharing
13 across multiple users. A consequence is that, a referencing inode
14 incorporates the blocks of xattr inode into its own i_block field.
16 We need to know how a xattr inode was created so that we can reverse the
17 block charges during reference removal. This is handled by introducing a
18 EXT4_STATE_LUSTRE_EA_INODE flag. The flag is set on a xattr inode if
19 inode appears to have been created by Lustre. During xattr inode reference
20 removal, the manual quota uncharge is skipped if the flag is set.
22 Signed-off-by: Tahsin Erdogan <tahsin@google.com>
23 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
24 ---
25  fs/ext4/ext4.h  |   1 +
26  fs/ext4/inode.c |   8 ----
27  fs/ext4/xattr.c | 141 +++++++++++++++++++++++++++++++++++++-------------------
28  3 files changed, 94 insertions(+), 56 deletions(-)
30 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
31 index e9440ed605c0..21e8b1dea958 100644
32 --- a/fs/ext4/ext4.h
33 +++ b/fs/ext4/ext4.h
34 @@ -1569,6 +1569,7 @@ enum {
35                                            nolocking */
36         EXT4_STATE_MAY_INLINE_DATA,     /* may have in-inode data */
37         EXT4_STATE_EXT_PRECACHED,       /* extents have been precached */
38 +       EXT4_STATE_LUSTRE_EA_INODE,     /* Lustre-style ea_inode */
39  };
41  #define EXT4_INODE_BIT_FNS(name, field, offset)                                \
42 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
43 index 70699940e20d..cebb6e60a8af 100644
44 --- a/fs/ext4/inode.c
45 +++ b/fs/ext4/inode.c
46 @@ -4897,14 +4897,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
47         brelse(iloc.bh);
48         ext4_set_inode_flags(inode);
50 -       if (ei->i_flags & EXT4_EA_INODE_FL) {
51 -               ext4_xattr_inode_set_class(inode);
53 -               inode_lock(inode);
54 -               inode->i_flags |= S_NOQUOTA;
55 -               inode_unlock(inode);
56 -       }
58         unlock_new_inode(inode);
59         return inode;
61 diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
62 index 949b4ea3ff58..415be4a88cc3 100644
63 --- a/fs/ext4/xattr.c
64 +++ b/fs/ext4/xattr.c
65 @@ -354,8 +354,10 @@ static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
66         return ret;
67  }
69 +#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec)
71  static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
72 -                                struct inode **ea_inode)
73 +                                u32 ea_inode_hash, struct inode **ea_inode)
74  {
75         struct inode *inode;
76         int err;
77 @@ -385,6 +387,24 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
78                 goto error;
79         }
81 +       ext4_xattr_inode_set_class(inode);
83 +       /*
84 +        * Check whether this is an old Lustre-style xattr inode. Lustre
85 +        * implementation does not have hash validation, rather it has a
86 +        * backpointer from ea_inode to the parent inode.
87 +        */
88 +       if (ea_inode_hash != ext4_xattr_inode_get_hash(inode) &&
89 +           EXT4_XATTR_INODE_GET_PARENT(inode) == parent->i_ino &&
90 +           inode->i_generation == parent->i_generation) {
91 +               ext4_set_inode_state(inode, EXT4_STATE_LUSTRE_EA_INODE);
92 +               ext4_xattr_inode_set_ref(inode, 1);
93 +       } else {
94 +               inode_lock(inode);
95 +               inode->i_flags |= S_NOQUOTA;
96 +               inode_unlock(inode);
97 +       }
99         *ea_inode = inode;
100         return 0;
101  error:
102 @@ -417,8 +437,6 @@ ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
103         return 0;
106 -#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec)
108  /*
109   * Read xattr value from the EA inode.
110   */
111 @@ -431,7 +449,7 @@ ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry,
112         int err;
114         err = ext4_xattr_inode_iget(inode, le32_to_cpu(entry->e_value_inum),
115 -                                   &ea_inode);
116 +                                   le32_to_cpu(entry->e_hash), &ea_inode);
117         if (err) {
118                 ea_inode = NULL;
119                 goto out;
120 @@ -449,29 +467,20 @@ ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry,
121         if (err)
122                 goto out;
124 -       err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer, size);
125 -       /*
126 -        * Compatibility check for old Lustre ea_inode implementation. Old
127 -        * version does not have hash validation, but it has a backpointer
128 -        * from ea_inode to the parent inode.
129 -        */
130 -       if (err == -EFSCORRUPTED) {
131 -               if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != inode->i_ino ||
132 -                   ea_inode->i_generation != inode->i_generation) {
133 +       if (!ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE)) {
134 +               err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer,
135 +                                                    size);
136 +               if (err) {
137                         ext4_warning_inode(ea_inode,
138                                            "EA inode hash validation failed");
139                         goto out;
140                 }
141 -               /* Do not add ea_inode to the cache. */
142 -               ea_inode_cache = NULL;
143 -               err = 0;
144 -       } else if (err)
145 -               goto out;
147 -       if (ea_inode_cache)
148 -               mb_cache_entry_create(ea_inode_cache, GFP_NOFS,
149 -                                     ext4_xattr_inode_get_hash(ea_inode),
150 -                                     ea_inode->i_ino, true /* reusable */);
151 +               if (ea_inode_cache)
152 +                       mb_cache_entry_create(ea_inode_cache, GFP_NOFS,
153 +                                       ext4_xattr_inode_get_hash(ea_inode),
154 +                                       ea_inode->i_ino, true /* reusable */);
155 +       }
156  out:
157         iput(ea_inode);
158         return err;
159 @@ -838,10 +847,15 @@ static int ext4_xattr_inode_alloc_quota(struct inode *inode, size_t len)
160         return err;
163 -static void ext4_xattr_inode_free_quota(struct inode *inode, size_t len)
164 +static void ext4_xattr_inode_free_quota(struct inode *parent,
165 +                                       struct inode *ea_inode,
166 +                                       size_t len)
168 -       dquot_free_space_nodirty(inode, round_up_cluster(inode, len));
169 -       dquot_free_inode(inode);
170 +       if (ea_inode &&
171 +           ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE))
172 +               return;
173 +       dquot_free_space_nodirty(parent, round_up_cluster(parent, len));
174 +       dquot_free_inode(parent);
177  int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
178 @@ -1071,7 +1085,9 @@ static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent,
179                 if (!entry->e_value_inum)
180                         continue;
181                 ea_ino = le32_to_cpu(entry->e_value_inum);
182 -               err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
183 +               err = ext4_xattr_inode_iget(parent, ea_ino,
184 +                                           le32_to_cpu(entry->e_hash),
185 +                                           &ea_inode);
186                 if (err)
187                         goto cleanup;
188                 err = ext4_xattr_inode_inc_ref(handle, ea_inode);
189 @@ -1093,7 +1109,9 @@ static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent,
190                 if (!entry->e_value_inum)
191                         continue;
192                 ea_ino = le32_to_cpu(entry->e_value_inum);
193 -               err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
194 +               err = ext4_xattr_inode_iget(parent, ea_ino,
195 +                                           le32_to_cpu(entry->e_hash),
196 +                                           &ea_inode);
197                 if (err) {
198                         ext4_warning(parent->i_sb,
199                                      "cleanup ea_ino %u iget error %d", ea_ino,
200 @@ -1131,7 +1149,9 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
201                 if (!entry->e_value_inum)
202                         continue;
203                 ea_ino = le32_to_cpu(entry->e_value_inum);
204 -               err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
205 +               err = ext4_xattr_inode_iget(parent, ea_ino,
206 +                                           le32_to_cpu(entry->e_hash),
207 +                                           &ea_inode);
208                 if (err)
209                         continue;
211 @@ -1159,7 +1179,7 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
212                 }
214                 if (!skip_quota)
215 -                       ext4_xattr_inode_free_quota(parent,
216 +                       ext4_xattr_inode_free_quota(parent, ea_inode,
217                                               le32_to_cpu(entry->e_value_size));
219                 /*
220 @@ -1591,6 +1611,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
221         if (!s->not_found && here->e_value_inum) {
222                 ret = ext4_xattr_inode_iget(inode,
223                                             le32_to_cpu(here->e_value_inum),
224 +                                           le32_to_cpu(here->e_hash),
225                                             &old_ea_inode);
226                 if (ret) {
227                         old_ea_inode = NULL;
228 @@ -1609,7 +1630,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
229                                                      &new_ea_inode);
230                 if (ret) {
231                         new_ea_inode = NULL;
232 -                       ext4_xattr_inode_free_quota(inode, i->value_len);
233 +                       ext4_xattr_inode_free_quota(inode, NULL, i->value_len);
234                         goto out;
235                 }
236         }
237 @@ -1628,13 +1649,13 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
238                                         ext4_warning_inode(new_ea_inode,
239                                                   "dec ref new_ea_inode err=%d",
240                                                   err);
241 -                               ext4_xattr_inode_free_quota(inode,
242 +                               ext4_xattr_inode_free_quota(inode, new_ea_inode,
243                                                             i->value_len);
244                         }
245                         goto out;
246                 }
248 -               ext4_xattr_inode_free_quota(inode,
249 +               ext4_xattr_inode_free_quota(inode, old_ea_inode,
250                                             le32_to_cpu(here->e_value_size));
251         }
253 @@ -1803,8 +1824,10 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
254         struct mb_cache_entry *ce = NULL;
255         int error = 0;
256         struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
257 -       struct inode *ea_inode = NULL;
258 -       size_t old_ea_inode_size = 0;
259 +       struct inode *ea_inode = NULL, *tmp_inode;
260 +       size_t old_ea_inode_quota = 0;
261 +       unsigned int ea_ino;
264  #define header(x) ((struct ext4_xattr_header *)(x))
266 @@ -1866,12 +1889,24 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
267                          * like it has an empty value.
268                          */
269                         if (!s->not_found && s->here->e_value_inum) {
270 -                               /*
271 -                                * Defer quota free call for previous inode
272 -                                * until success is guaranteed.
273 -                                */
274 -                               old_ea_inode_size = le32_to_cpu(
275 +                               ea_ino = le32_to_cpu(s->here->e_value_inum);
276 +                               error = ext4_xattr_inode_iget(inode, ea_ino,
277 +                                             le32_to_cpu(s->here->e_hash),
278 +                                             &tmp_inode);
279 +                               if (error)
280 +                                       goto cleanup;
282 +                               if (!ext4_test_inode_state(tmp_inode,
283 +                                               EXT4_STATE_LUSTRE_EA_INODE)) {
284 +                                       /*
285 +                                        * Defer quota free call for previous
286 +                                        * inode until success is guaranteed.
287 +                                        */
288 +                                       old_ea_inode_quota = le32_to_cpu(
289                                                         s->here->e_value_size);
290 +                               }
291 +                               iput(tmp_inode);
293                                 s->here->e_value_inum = 0;
294                                 s->here->e_value_size = 0;
295                         }
296 @@ -1898,8 +1933,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
297                 goto cleanup;
299         if (i->value && s->here->e_value_inum) {
300 -               unsigned int ea_ino;
302                 /*
303                  * A ref count on ea_inode has been taken as part of the call to
304                  * ext4_xattr_set_entry() above. We would like to drop this
305 @@ -1907,7 +1940,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
306                  * initialized and has its own ref count on the ea_inode.
307                  */
308                 ea_ino = le32_to_cpu(s->here->e_value_inum);
309 -               error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
310 +               error = ext4_xattr_inode_iget(inode, ea_ino,
311 +                                             le32_to_cpu(s->here->e_hash),
312 +                                             &ea_inode);
313                 if (error) {
314                         ea_inode = NULL;
315                         goto cleanup;
316 @@ -2056,8 +2091,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
317                 }
318         }
320 -       if (old_ea_inode_size)
321 -               ext4_xattr_inode_free_quota(inode, old_ea_inode_size);
322 +       if (old_ea_inode_quota)
323 +               ext4_xattr_inode_free_quota(inode, NULL, old_ea_inode_quota);
325         /* Update the inode. */
326         EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
327 @@ -2084,7 +2119,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
329                 /* If there was an error, revert the quota charge. */
330                 if (error)
331 -                       ext4_xattr_inode_free_quota(inode,
332 +                       ext4_xattr_inode_free_quota(inode, ea_inode,
333                                                     i_size_read(ea_inode));
334                 iput(ea_inode);
335         }
336 @@ -2807,6 +2842,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
337         struct ext4_xattr_ibody_header *header;
338         struct ext4_iloc iloc = { .bh = NULL };
339         struct ext4_xattr_entry *entry;
340 +       struct inode *ea_inode;
341         int error;
343         error = ext4_xattr_ensure_credits(handle, inode, extra_credits,
344 @@ -2861,10 +2897,19 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
346                 if (ext4_has_feature_ea_inode(inode->i_sb)) {
347                         for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
348 -                            entry = EXT4_XATTR_NEXT(entry))
349 -                               if (entry->e_value_inum)
350 -                                       ext4_xattr_inode_free_quota(inode,
351 +                            entry = EXT4_XATTR_NEXT(entry)) {
352 +                               if (!entry->e_value_inum)
353 +                                       continue;
354 +                               error = ext4_xattr_inode_iget(inode,
355 +                                             le32_to_cpu(entry->e_value_inum),
356 +                                             le32_to_cpu(entry->e_hash),
357 +                                             &ea_inode);
358 +                               if (error)
359 +                                       continue;
360 +                               ext4_xattr_inode_free_quota(inode, ea_inode,
361                                               le32_to_cpu(entry->e_value_size));
362 +                               iput(ea_inode);
363 +                       }
365                 }
367 -- 
368 2.14.0.rc0.284.gd933b75aa4-goog