Update delalloc ENOSPC patch comments for clarity
[ext4-patch-queue.git] / ext4_i_disksize_lock_race_fix.patch
blob8f68abd0c0c50d2f1e68edc42e4f7c84e311db8a
1 ext4: Properly update i_disksize.
3 From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
5 With delayed allocation we use i_data_sem to update i_disksize. We need
6 to update i_disksize only if the new size specified is greater than the
7 current value and we need to make sure we don't race with other
8 i_disksize update. With delayed allocation we will switch to the
9 write_begin function for non-delayed allocation if we are low on free
10 blocks. That means write_begin function for non-delayed allocation also
11 needs to use the same locking.
13 We also need to check and update i_disksize even if the new size is less
14 that inode.i_size because of delayed allocation.
16 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
17 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
18 ---
19 fs/ext4/ext4.h | 11 ++++++++++
20 fs/ext4/extents.c | 9 ++++----
21 fs/ext4/inode.c | 61 ++++++++++++++++++++++++++++++++---------------------
22 3 files changed, 53 insertions(+), 28 deletions(-)
24 Index: linux-2.6.27-rc5/fs/ext4/ext4.h
25 ===================================================================
26 --- linux-2.6.27-rc5.orig/fs/ext4/ext4.h 2008-08-28 23:25:05.000000000 -0700
27 +++ linux-2.6.27-rc5/fs/ext4/ext4.h 2008-08-28 23:28:51.000000000 -0700
28 @@ -1219,6 +1219,17 @@ do { \
29 #define EXT4_FREEBLOCKS_WATERMARK 0
30 #endif
32 +static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
34 + /*
35 + * XXX: replace with spinlock if seen contended -bzzz
36 + */
37 + down_write(&EXT4_I(inode)->i_data_sem);
38 + if (newsize > EXT4_I(inode)->i_disksize)
39 + EXT4_I(inode)->i_disksize = newsize;
40 + up_write(&EXT4_I(inode)->i_data_sem);
41 + return ;
45 * Inodes and files operations
46 Index: linux-2.6.27-rc5/fs/ext4/extents.c
47 ===================================================================
48 --- linux-2.6.27-rc5.orig/fs/ext4/extents.c 2008-08-28 23:25:09.000000000 -0700
49 +++ linux-2.6.27-rc5/fs/ext4/extents.c 2008-08-28 23:28:51.000000000 -0700
50 @@ -2877,10 +2877,11 @@ static void ext4_falloc_update_inode(str
51 * Update only when preallocation was requested beyond
52 * the file size.
54 - if (!(mode & FALLOC_FL_KEEP_SIZE) &&
55 - new_size > i_size_read(inode)) {
56 - i_size_write(inode, new_size);
57 - EXT4_I(inode)->i_disksize = new_size;
58 + if (!(mode & FALLOC_FL_KEEP_SIZE)) {
59 + if (new_size > i_size_read(inode))
60 + i_size_write(inode, new_size);
61 + if (new_size > EXT4_I(inode)->i_disksize)
62 + ext4_update_i_disksize(inode, new_size);
66 Index: linux-2.6.27-rc5/fs/ext4/inode.c
67 ===================================================================
68 --- linux-2.6.27-rc5.orig/fs/ext4/inode.c 2008-08-28 23:28:42.000000000 -0700
69 +++ linux-2.6.27-rc5/fs/ext4/inode.c 2008-08-28 23:28:51.000000000 -0700
70 @@ -1434,16 +1434,18 @@ static int ext4_ordered_write_end(struct
71 ret = ext4_jbd2_file_inode(handle, inode);
73 if (ret == 0) {
74 - /*
75 - * generic_write_end() will run mark_inode_dirty() if i_size
76 - * changes. So let's piggyback the i_disksize mark_inode_dirty
77 - * into that.
78 - */
79 loff_t new_i_size;
81 new_i_size = pos + copied;
82 - if (new_i_size > EXT4_I(inode)->i_disksize)
83 - EXT4_I(inode)->i_disksize = new_i_size;
84 + if (new_i_size > EXT4_I(inode)->i_disksize) {
85 + ext4_update_i_disksize(inode, new_i_size);
86 + /* We need to mark inode dirty even if
87 + * new_i_size is less that inode->i_size
88 + * bu greater than i_disksize.(hint delalloc)
89 + */
90 + ext4_mark_inode_dirty(handle, inode);
91 + }
93 ret2 = generic_write_end(file, mapping, pos, len, copied,
94 page, fsdata);
95 copied = ret2;
96 @@ -1468,8 +1470,14 @@ static int ext4_writeback_write_end(stru
97 loff_t new_i_size;
99 new_i_size = pos + copied;
100 - if (new_i_size > EXT4_I(inode)->i_disksize)
101 - EXT4_I(inode)->i_disksize = new_i_size;
102 + if (new_i_size > EXT4_I(inode)->i_disksize) {
103 + ext4_update_i_disksize(inode, new_i_size);
104 + /* We need to mark inode dirty even if
105 + * new_i_size is less that inode->i_size
106 + * bu greater than i_disksize.(hint delalloc)
107 + */
108 + ext4_mark_inode_dirty(handle, inode);
111 ret2 = generic_write_end(file, mapping, pos, len, copied,
112 page, fsdata);
113 @@ -1494,6 +1502,7 @@ static int ext4_journalled_write_end(str
114 int ret = 0, ret2;
115 int partial = 0;
116 unsigned from, to;
117 + loff_t new_i_size;
119 from = pos & (PAGE_CACHE_SIZE - 1);
120 to = from + len;
121 @@ -1508,11 +1517,12 @@ static int ext4_journalled_write_end(str
122 to, &partial, write_end_fn);
123 if (!partial)
124 SetPageUptodate(page);
125 - if (pos+copied > inode->i_size)
126 + new_i_size = pos + copied;
127 + if (new_i_size > inode->i_size)
128 i_size_write(inode, pos+copied);
129 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
130 - if (inode->i_size > EXT4_I(inode)->i_disksize) {
131 - EXT4_I(inode)->i_disksize = inode->i_size;
132 + if (new_i_size > EXT4_I(inode)->i_disksize) {
133 + ext4_update_i_disksize(inode, new_i_size);
134 ret2 = ext4_mark_inode_dirty(handle, inode);
135 if (!ret)
136 ret = ret2;
137 @@ -2227,18 +2237,9 @@ static int ext4_da_get_block_write(struc
138 if (disksize > i_size_read(inode))
139 disksize = i_size_read(inode);
140 if (disksize > EXT4_I(inode)->i_disksize) {
141 - /*
142 - * XXX: replace with spinlock if seen contended -bzzz
143 - */
144 - down_write(&EXT4_I(inode)->i_data_sem);
145 - if (disksize > EXT4_I(inode)->i_disksize)
146 - EXT4_I(inode)->i_disksize = disksize;
147 - up_write(&EXT4_I(inode)->i_data_sem);
149 - if (EXT4_I(inode)->i_disksize == disksize) {
150 - ret = ext4_mark_inode_dirty(handle, inode);
151 - return ret;
153 + ext4_update_i_disksize(inode, disksize);
154 + ret = ext4_mark_inode_dirty(handle, inode);
155 + return ret;
157 ret = 0;
159 @@ -2567,6 +2568,13 @@ retry:
160 unlock_page(page);
161 ext4_journal_stop(handle);
162 page_cache_release(page);
163 + /*
164 + * block_write_begin may have instantiated a few blocks
165 + * outside i_size. Trim these off again. Don't need
166 + * i_size_read because we hold i_mutex.
167 + */
168 + if (pos + len > inode->i_size)
169 + vmtruncate(inode, inode->i_size);
172 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
173 @@ -2647,6 +2655,11 @@ static int ext4_da_write_end(struct file
174 EXT4_I(inode)->i_disksize = new_i_size;
176 up_write(&EXT4_I(inode)->i_data_sem);
177 + /* We need to mark inode dirty even if
178 + * new_i_size is less that inode->i_size
179 + * bu greater than i_disksize.(hint delalloc)
180 + */
181 + ext4_mark_inode_dirty(handle, inode);
184 ret2 = generic_write_end(file, mapping, pos, len, copied,