pohmelfs: Trivial correction of invalidate_mapping_pages
[pohmelfs.git] / fs / pohmelfs / file.c
blobce83ca42f1dcf1451a0b44a40019771675947066
1 /*
2 * Copyright (C) 2011+ Evgeniy Polyakov <zbr@ioremap.net>
3 */
5 #include <linux/fs.h>
7 #include "pohmelfs.h"
9 #define POHMELFS_READ_LATEST_GROUPS_SCRIPT "pohmelfs_read_latest_groups.py"
11 static int pohmelfs_write_init(struct pohmelfs_trans *t)
13 struct pohmelfs_wait *wait = t->priv;
15 pohmelfs_wait_get(wait);
16 return 0;
19 static void pohmelfs_write_destroy(struct pohmelfs_trans *t)
21 struct pohmelfs_wait *wait = t->priv;
23 wake_up(&wait->wq);
24 pohmelfs_wait_put(wait);
27 static int pohmelfs_write_complete(struct pohmelfs_trans *t, struct pohmelfs_state *recv)
29 struct pohmelfs_wait *wait = t->priv;
30 struct pohmelfs_inode *pi = pohmelfs_inode(t->inode);
31 struct dnet_cmd *cmd = &recv->cmd;
32 unsigned long long trans = cmd->trans & ~DNET_TRANS_REPLY;
34 pr_debug("pohmelfs: %s: write complete: %llu, flags: %x, status: %d\n",
35 pohmelfs_dump_id(pi->id.id), trans, cmd->flags, cmd->status);
37 if (cmd->flags & DNET_FLAGS_MORE)
38 return 0;
40 wait->condition = cmd->status;
41 if (!wait->condition)
42 wait->condition = 1;
44 return 0;
47 static int pohmelfs_send_write_metadata(struct pohmelfs_inode *pi, struct pohmelfs_io *pio, struct pohmelfs_wait *wait)
49 struct pohmelfs_sb *psb = pohmelfs_sb(pi->vfs_inode.i_sb);
50 struct timespec ts = CURRENT_TIME;
51 struct dnet_meta_update *mu;
52 struct dnet_meta *m;
53 int err, size;
54 void *data;
56 size = sizeof(struct dnet_meta) * 4 +
57 sizeof(struct dnet_meta_check_status) +
58 sizeof(struct dnet_meta_update) +
59 psb->fsid_len +
60 psb->group_num * sizeof(int);
62 data = kzalloc(size, GFP_NOIO);
63 if (!data) {
64 err = -ENOMEM;
65 goto err_out_exit;
68 m = data;
69 m->type = DNET_META_GROUPS;
70 m->size = psb->group_num * sizeof(int);
71 memcpy(m->data, psb->groups, m->size);
72 dnet_convert_meta(m);
74 m = (struct dnet_meta *)(m->data + le32_to_cpu(m->size));
75 m->type = DNET_META_NAMESPACE;
76 m->size = psb->fsid_len;
77 memcpy(m->data, psb->fsid, psb->fsid_len);
78 dnet_convert_meta(m);
80 m = (struct dnet_meta *)(m->data + le32_to_cpu(m->size));
81 m->type = DNET_META_UPDATE;
82 m->size = sizeof(struct dnet_meta_update);
83 mu = (struct dnet_meta_update *)m->data;
84 mu->tm.tsec = ts.tv_sec;
85 mu->tm.tnsec = ts.tv_nsec;
86 dnet_convert_meta_update(mu);
87 dnet_convert_meta(m);
89 m = (struct dnet_meta *)(m->data + le32_to_cpu(m->size));
90 m->type = DNET_META_CHECK_STATUS;
91 m->size = sizeof(struct dnet_meta_check_status);
92 /* do not fill, it will be updated on server */
93 dnet_convert_meta(m);
95 pio->pi = pi;
96 pio->id = &pi->id;
97 pio->cmd = DNET_CMD_WRITE;
98 pio->ioflags = DNET_IO_FLAGS_OVERWRITE | DNET_IO_FLAGS_META;
99 pio->cflags = DNET_FLAGS_NEED_ACK;
100 pio->type = 1;
101 pio->cb.init = pohmelfs_write_init;
102 pio->cb.destroy = pohmelfs_write_destroy;
103 pio->cb.complete = pohmelfs_write_complete;
104 pio->priv = wait;
105 pio->data = data;
106 pio->size = size;
108 err = pohmelfs_send_io(pio);
109 if (err)
110 goto err_out_free;
112 err_out_free:
113 kfree(data);
114 err_out_exit:
115 return err;
118 static int pohmelfs_write_command_complete(struct pohmelfs_trans *t, struct pohmelfs_state *recv)
120 struct dnet_cmd *cmd = &recv->cmd;
121 struct pohmelfs_write_ctl *ctl = t->wctl;
123 if (cmd->flags & DNET_FLAGS_MORE)
124 return 0;
126 if (cmd->status == 0)
127 atomic_inc(&ctl->good_writes);
128 else {
129 struct inode *inode = t->inode;
130 struct pohmelfs_inode *pi = pohmelfs_inode(inode);
131 unsigned long long size = le64_to_cpu(t->cmd.p.io.size);
132 unsigned long long offset = le64_to_cpu(t->cmd.p.io.offset);
134 pr_debug("pohmelfs: %s: write failed: ino: %lu, isize: %llu, offset: %llu, size: %llu: %d\n",
135 pohmelfs_dump_id(pi->id.id), inode->i_ino, inode->i_size, offset, size, cmd->status);
138 return 0;
141 static int pohmelfs_write_command_init(struct pohmelfs_trans *t)
143 struct pohmelfs_write_ctl *ctl = t->wctl;
145 kref_get(&ctl->refcnt);
146 return 0;
149 static void pohmelfs_write_command_destroy(struct pohmelfs_trans *t)
151 struct pohmelfs_write_ctl *ctl = t->wctl;
153 kref_put(&ctl->refcnt, pohmelfs_write_ctl_release);
156 int pohmelfs_write_command(struct pohmelfs_inode *pi, struct pohmelfs_write_ctl *ctl, loff_t offset, size_t len)
158 int err;
159 struct inode *inode = &pi->vfs_inode;
160 struct pohmelfs_io *pio;
161 uint64_t prepare_size = i_size_read(&pi->vfs_inode);
163 pio = kmem_cache_zalloc(pohmelfs_io_cache, GFP_NOIO);
164 if (!pio) {
165 err = -ENOMEM;
166 goto err_out_exit;
169 pio->pi = pi;
170 pio->id = &pi->id;
171 pio->cmd = DNET_CMD_WRITE;
172 pio->offset = offset;
173 pio->size = len;
174 pio->cflags = DNET_FLAGS_NEED_ACK;
177 * We always set prepare bit, since elliptics/eblob reuses existing (previously prepared/reserved) area
178 * But it also allows to 'miss' prepare message (for example if we sent prepare bit when node was offline)
180 pio->ioflags = DNET_IO_FLAGS_OVERWRITE | DNET_IO_FLAGS_PLAIN_WRITE | DNET_IO_FLAGS_PREPARE;
182 pio->num = prepare_size;
184 /* commit when whole inode is written */
185 if (offset + len == prepare_size) {
186 pio->ioflags |= DNET_IO_FLAGS_COMMIT;
189 pio->wctl = ctl;
190 pio->priv = ctl;
191 pio->cb.complete = pohmelfs_write_command_complete;
192 pio->cb.init = pohmelfs_write_command_init;
193 pio->cb.destroy = pohmelfs_write_command_destroy;
195 pr_debug("pohmelfs_write_prepare_commit: %s: ino: %lu, offset: %llu, len: %zu, total size: %llu\n",
196 pohmelfs_dump_id(pi->id.id), inode->i_ino, (unsigned long long)offset, len, inode->i_size);
198 err = pohmelfs_send_io(pio);
199 if (err)
200 goto err_out_free;
202 err_out_free:
203 kmem_cache_free(pohmelfs_io_cache, pio);
204 err_out_exit:
205 return err;
208 int pohmelfs_metadata_inode(struct pohmelfs_inode *pi, int sync)
210 struct inode *inode = &pi->vfs_inode;
211 struct pohmelfs_sb *psb = pohmelfs_sb(inode->i_sb);
212 struct pohmelfs_io *pio;
213 struct pohmelfs_wait *wait;
214 long ret;
215 int err;
217 wait = pohmelfs_wait_alloc(pi);
218 if (!wait) {
219 err = -ENOMEM;
220 goto err_out_exit;
223 pio = kmem_cache_zalloc(pohmelfs_io_cache, GFP_NOIO);
224 if (!pio) {
225 err = -ENOMEM;
226 goto err_out_put;
229 err = pohmelfs_send_write_metadata(pi, pio, wait);
230 if (err)
231 goto err_out_free;
233 if (sync) {
234 ret = wait_event_interruptible_timeout(wait->wq,
235 wait->condition != 0 && atomic_read(&wait->refcnt.refcount) <= 2,
236 msecs_to_jiffies(psb->write_wait_timeout));
237 if (ret <= 0) {
238 err = ret;
239 if (ret == 0)
240 err = -ETIMEDOUT;
241 goto err_out_free;
244 if (wait->condition < 0) {
245 err = wait->condition;
246 goto err_out_free;
250 err_out_free:
251 kmem_cache_free(pohmelfs_io_cache, pio);
252 err_out_put:
253 pohmelfs_wait_put(wait);
254 err_out_exit:
255 return err;
258 static long pohmelfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
260 struct inode *inode = file->f_path.dentry->d_inode;
261 struct pohmelfs_inode *pi = pohmelfs_inode(inode);
262 struct pohmelfs_io *pio;
263 int err;
265 if (offset + len < i_size_read(inode)) {
266 err = 0;
267 goto err_out_exit;
270 pio = kmem_cache_zalloc(pohmelfs_io_cache, GFP_NOIO);
271 if (!pio) {
272 err = -ENOMEM;
273 goto err_out_exit;
276 pio->pi = pi;
277 pio->id = &pi->id;
278 pio->cmd = DNET_CMD_WRITE;
279 pio->cflags = DNET_FLAGS_NEED_ACK;
280 pio->ioflags = DNET_IO_FLAGS_PREPARE;
281 pio->num = i_size_read(inode);
283 pr_debug("pohmelfs_fallocate: %s: ino: %lu, offset: %llu, len: %llu, total size: %llu\n",
284 pohmelfs_dump_id(pi->id.id), inode->i_ino,
285 (unsigned long long)offset, (unsigned long long)len, inode->i_size);
287 err = pohmelfs_send_io(pio);
288 if (err)
289 goto err_out_free;
291 err_out_free:
292 kmem_cache_free(pohmelfs_io_cache, pio);
293 err_out_exit:
294 return err;
297 struct pohmelfs_latest_ctl {
298 struct dnet_id id;
299 uint64_t offset;
300 uint64_t size;
303 static int pohmelfs_read_latest_complete(struct pohmelfs_trans *t, struct pohmelfs_state *recv)
305 struct pohmelfs_inode *pi = pohmelfs_inode(t->inode);
306 struct pohmelfs_wait *wait = t->priv;
307 struct dnet_cmd *cmd = &recv->cmd;
308 int err = cmd->status;
310 if (cmd->status)
311 goto err_out_exit;
313 if (cmd->flags & DNET_FLAGS_MORE) {
314 pr_debug("pohmelfs: %s: read-latest: complete: group: %d, attr size: %lld\n",
315 pohmelfs_dump_id(cmd->id.id), cmd->id.group_id, cmd->size - sizeof(struct dnet_attr));
316 if (cmd->size < sizeof(struct dnet_attr) + 4) {
317 err = -ENOENT;
318 goto err_out_exit;
321 mutex_lock(&pi->lock);
322 if (!pi->groups) {
323 pi->groups = kmalloc(cmd->size - sizeof(struct dnet_attr), GFP_NOIO);
324 if (!pi->groups) {
325 err = -ENOMEM;
326 mutex_unlock(&pi->lock);
327 goto err_out_exit;
330 pi->group_num = (cmd->size - sizeof(struct dnet_attr)) / sizeof(int);
331 memcpy(pi->groups, t->recv_data + sizeof(struct dnet_attr), pi->group_num * sizeof(int));
333 pr_debug("pohmelfs: %s: read-latest: complete: group: %d, received: %d groups\n",
334 pohmelfs_dump_id(cmd->id.id), cmd->id.group_id, pi->group_num);
336 mutex_unlock(&pi->lock);
339 err_out_exit:
340 if (err)
341 wait->condition = err;
342 else
343 wait->condition = 1;
344 return 0;
347 static int pohmelfs_read_latest_group(struct pohmelfs_inode *pi, struct pohmelfs_latest_ctl *r, int group_id)
349 struct pohmelfs_script_req req;
351 memset(&req, 0, sizeof(struct pohmelfs_script_req));
353 req.script_name = POHMELFS_READ_LATEST_GROUPS_SCRIPT;
354 req.script_namelen = sizeof(POHMELFS_READ_LATEST_GROUPS_SCRIPT) - 1;
356 req.obj_name = "noname";
357 req.obj_len = 5;
359 req.binary = r;
360 req.binary_size = sizeof(struct pohmelfs_latest_ctl);
362 req.id = &pi->id;
363 req.group_id = group_id;
364 req.sync = 1;
365 req.cflags = 0;
366 req.complete = pohmelfs_read_latest_complete;
368 return pohmelfs_send_script_request(pi, &req);
371 static int pohmelfs_read_latest(struct pohmelfs_inode *pi)
373 struct pohmelfs_latest_ctl *r;
374 struct pohmelfs_sb *psb = pohmelfs_sb(pi->vfs_inode.i_sb);
375 int i, err = -ENOENT;
377 r = kzalloc(sizeof(struct pohmelfs_latest_ctl), GFP_NOIO);
378 if (!r) {
379 err = -ENOMEM;
380 goto err_out_exit;
383 dnet_setup_id(&r->id, 0, pi->id.id);
385 for (i = 0; i < psb->group_num; ++i) {
386 r->id.group_id = psb->groups[i];
388 err = pohmelfs_read_latest_group(pi, r, psb->groups[i]);
389 if (err)
390 continue;
392 break;
395 kfree(r);
397 pr_debug("pohmelfs: %s: read-latest: %d groups\n", pohmelfs_dump_id(pi->id.id), pi->group_num);
399 err_out_exit:
400 return err;
403 static int pohmelfs_file_open(struct inode *inode, struct file *filp)
405 struct pohmelfs_inode *pi = pohmelfs_inode(inode);
407 if (!pi->group_num && !pi->local)
408 pohmelfs_read_latest(pi);
410 if (pohmelfs_need_resync(pi))
411 invalidate_mapping_pages(&inode->i_data, 0, -1);
413 return generic_file_open(inode, filp);
417 * We want fsync() to work on POHMELFS.
419 static int pohmelfs_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
421 struct inode *inode = filp->f_mapping->host;
422 int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
423 if (!err) {
424 mutex_lock(&inode->i_mutex);
425 err = sync_inode_metadata(inode, 1);
426 mutex_unlock(&inode->i_mutex);
428 pr_debug("pohmelfs: fsync: %s: start: %lld, end: %lld, nrpages: %ld, dirty: %d: %d\n",
429 pohmelfs_dump_id(pohmelfs_inode(inode)->id.id),
430 (unsigned long long)start, (unsigned long long)end,
431 inode->i_mapping->nrpages, mapping_cap_writeback_dirty(inode->i_mapping), err);
432 return err;
435 static int pohmelfs_flush(struct file *filp, fl_owner_t id)
437 struct inode *inode = filp->f_mapping->host;
438 struct pohmelfs_sb *psb = pohmelfs_sb(inode->i_sb);
439 int err = 0;
441 if (psb->sync_on_close)
442 err = pohmelfs_fsync(filp, 0, ~0ULL, 1);
444 if (!err && test_bit(AS_EIO, &inode->i_mapping->flags))
445 err = -EIO;
447 pr_debug("pohmelfs: flush: %s: %d\n", pohmelfs_dump_id(pohmelfs_inode(inode)->id.id), err);
448 return err;
451 const struct file_operations pohmelfs_file_ops = {
452 .open = pohmelfs_file_open,
454 .llseek = generic_file_llseek,
456 .read = do_sync_read,
457 .aio_read = generic_file_aio_read,
459 .mmap = generic_file_mmap,
461 .splice_read = generic_file_splice_read,
462 .splice_write = generic_file_splice_write,
464 .write = do_sync_write,
465 .aio_write = generic_file_aio_write,
467 .fallocate = pohmelfs_fallocate,
469 .fsync = pohmelfs_fsync,
470 .flush = pohmelfs_flush,
473 const struct inode_operations pohmelfs_file_inode_operations = {