Never call d_instantiate() second time for the same dentry. Misc cleanups.
[pohmelfs.git] / fs / pohmelfs / net.c
blob3cc8be53090d4afb6b356f6f627c39a6d4e02366
1 /*
2 * Copyright (C) 2011+ Evgeniy Polyakov <zbr@ioremap.net>
3 */
5 #include <linux/in.h>
6 #include <linux/in6.h>
7 #include <linux/net.h>
9 #include <net/sock.h>
10 #include <net/tcp.h>
12 #include "pohmelfs.h"
14 void *pohmelfs_scratch_buf;
15 int pohmelfs_scratch_buf_size = 4096;
17 void pohmelfs_print_addr(struct sockaddr_storage *addr, const char *fmt, ...)
19 struct sockaddr *sa = (struct sockaddr *)addr;
20 va_list args;
21 char *ptr;
23 va_start(args, fmt);
24 ptr = kvasprintf(GFP_NOIO, fmt, args);
25 if (!ptr)
26 goto err_out_exit;
28 if (sa->sa_family == AF_INET) {
29 struct sockaddr_in *sin = (struct sockaddr_in *)addr;
30 pr_info("pohmelfs: %pI4:%d: %s", &sin->sin_addr.s_addr, ntohs(sin->sin_port), ptr);
31 } else if (sa->sa_family == AF_INET6) {
32 struct sockaddr_in6 *sin = (struct sockaddr_in6 *)addr;
33 pr_info("pohmelfs: %pI6:%d: %s", &sin->sin6_addr, ntohs(sin->sin6_port), ptr);
36 kfree(ptr);
37 err_out_exit:
38 va_end(args);
42 * Basic network sending/receiving functions.
43 * Blocked mode is used.
45 int pohmelfs_data_recv(struct pohmelfs_state *st, void *buf, u64 size, unsigned int flags)
47 struct msghdr msg;
48 struct kvec iov;
49 int err;
51 BUG_ON(!size);
53 iov.iov_base = buf;
54 iov.iov_len = size;
56 msg.msg_iov = (struct iovec *)&iov;
57 msg.msg_iovlen = 1;
58 msg.msg_name = NULL;
59 msg.msg_namelen = 0;
60 msg.msg_control = NULL;
61 msg.msg_controllen = 0;
62 msg.msg_flags = flags;
64 err = kernel_recvmsg(st->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags);
65 if (err <= 0) {
66 if (err == 0)
67 err = -ECONNRESET;
68 goto err_out_exit;
71 err_out_exit:
72 return err;
75 int pohmelfs_recv(struct pohmelfs_trans *t, struct pohmelfs_state *recv, void *data, int size)
77 int err;
79 err = pohmelfs_data_recv(recv, data, size, MSG_DONTWAIT);
80 if (err < 0)
81 return err;
83 t->recv_offset += err;
84 return err;
87 static int pohmelfs_data_send(struct pohmelfs_trans *t)
89 struct msghdr msg;
90 struct iovec io[2];
91 int err, ionum = 1;
93 io[0].iov_base = &t->cmd;
94 io[0].iov_len = t->header_size;
96 if (t->data) {
97 io[1].iov_base = t->data;
98 io[1].iov_len = t->data_size;
99 ionum = 2;
102 msg.msg_name = NULL;
103 msg.msg_namelen = 0;
104 msg.msg_control = NULL;
105 msg.msg_controllen = 0;
106 msg.msg_flags = MSG_WAITALL;
108 msg.msg_iov = io;
109 msg.msg_iovlen = ionum;
111 err = kernel_sendmsg(t->st->sock, &msg, (struct kvec *)msg.msg_iov, ionum, t->data_size + t->header_size);
112 if (err <= 0) {
113 if (err == 0)
114 err = -ECONNRESET;
115 goto err_out_exit;
118 err = 0;
120 err_out_exit:
121 return err;
124 static int pohmelfs_page_send(struct pohmelfs_trans *t)
126 struct pohmelfs_write_ctl *ctl = t->wctl;
127 size_t size = le64_to_cpu(t->cmd.p.io.size);
128 pgoff_t offset = le64_to_cpu(t->cmd.p.io.offset);
129 struct msghdr msg;
130 struct iovec io;
131 unsigned i;
132 int err;
134 io.iov_base = &t->cmd;
135 io.iov_len = t->header_size;
137 msg.msg_name = NULL;
138 msg.msg_namelen = 0;
139 msg.msg_control = NULL;
140 msg.msg_controllen = 0;
141 msg.msg_flags = MSG_WAITALL;
143 msg.msg_iov = &io;
144 msg.msg_iovlen = 1;
146 err = kernel_sendmsg(t->st->sock, &msg, (struct kvec *)msg.msg_iov, 1, t->header_size);
147 if (err <= 0) {
148 if (err == 0)
149 err = -ECONNRESET;
150 goto err_out_exit;
153 for (i = 0; i< pagevec_count(&ctl->pvec); ++i) {
154 struct page *page = ctl->pvec.pages[i];
155 pgoff_t off = offset & (PAGE_CACHE_SIZE - 1);
156 size_t sz = PAGE_CACHE_SIZE - off;
158 if (sz > size)
159 sz = size;
161 err = kernel_sendpage(t->st->sock, page, off, sz, msg.msg_flags);
162 if (err <= 0) {
163 if (err == 0)
164 err = -ECONNRESET;
166 goto err_out_reset;
169 size -= err;
170 offset += err;
174 return 0;
176 err_out_reset:
177 err_out_exit:
178 return err;
182 * Polling machinery.
185 struct pohmelfs_poll_helper {
186 poll_table pt;
187 struct pohmelfs_state *st;
190 static int pohmelfs_queue_wake(wait_queue_t *wait, unsigned mode, int sync, void *key)
192 struct pohmelfs_state *st = container_of(wait, struct pohmelfs_state, wait);
194 if (!st->conn->need_exit)
195 queue_work(st->conn->wq, &st->recv_work);
196 return 1;
199 static void pohmelfs_queue_func(struct file *file, wait_queue_head_t *whead, poll_table *pt)
201 struct pohmelfs_state *st = container_of(pt, struct pohmelfs_poll_helper, pt)->st;
203 st->whead = whead;
205 init_waitqueue_func_entry(&st->wait, pohmelfs_queue_wake);
206 add_wait_queue(whead, &st->wait);
209 static void pohmelfs_poll_exit(struct pohmelfs_state *st)
211 if (st->whead) {
212 remove_wait_queue(st->whead, &st->wait);
213 st->whead = NULL;
217 static int pohmelfs_poll_init(struct pohmelfs_state *st)
219 struct pohmelfs_poll_helper ph;
221 ph.st = st;
222 init_poll_funcptr(&ph.pt, &pohmelfs_queue_func);
224 st->sock->ops->poll(NULL, st->sock, &ph.pt);
225 return 0;
228 static void pohmelfs_state_send_work(struct work_struct *work)
230 struct pohmelfs_state *st = container_of(work, struct pohmelfs_state, send_work);
231 struct pohmelfs_trans *t;
232 int trans_put;
233 int err;
235 while (!st->conn->need_exit) {
236 t = NULL;
237 trans_put = 0;
239 mutex_lock(&st->trans_lock);
240 if (!list_empty(&st->trans_list)) {
241 t = list_first_entry(&st->trans_list, struct pohmelfs_trans, trans_entry);
242 list_del_init(&t->trans_entry);
243 err = pohmelfs_trans_insert_tree(st, t);
244 if (err)
245 trans_put = 1;
247 mutex_unlock(&st->trans_lock);
249 if (!t)
250 break;
252 if (t->wctl)
253 err = pohmelfs_page_send(t);
254 else
255 err = pohmelfs_data_send(t);
257 if (trans_put)
258 pohmelfs_trans_put(t);
260 if (err) {
261 pohmelfs_print_addr(&st->sa, "send error: %d\n", err);
263 pohmelfs_state_add_reconnect(st);
264 break;
269 static void pohmelfs_suck_scratch(struct pohmelfs_state *st)
271 struct dnet_cmd *cmd = &st->cmd;
272 int err = 0;
274 pr_debug("pohmelfs_suck_scratch: %llu\n", (unsigned long long)cmd->size);
276 while (cmd->size) {
277 int sz = pohmelfs_scratch_buf_size;
279 if (cmd->size < sz)
280 sz = cmd->size;
282 err = pohmelfs_data_recv(st, pohmelfs_scratch_buf, sz, MSG_WAITALL);
283 if (err < 0) {
284 pohmelfs_print_addr(&st->sa, "recv-scratch err: %d\n", err);
285 goto err_out_exit;
288 cmd->size -= err;
291 err_out_exit:
292 st->cmd_read = 1;
295 static void pohmelfs_state_recv_work(struct work_struct *work)
297 struct pohmelfs_state *st = container_of(work, struct pohmelfs_state, recv_work);
298 struct dnet_cmd *cmd = &st->cmd;
299 struct pohmelfs_trans *t;
300 unsigned long long trans;
301 unsigned int revents;
302 int err = 0;
304 while (!st->conn->need_exit) {
305 revents = st->sock->ops->poll(NULL, st->sock, NULL);
306 if (!(revents & POLLIN))
307 break;
309 if (st->cmd_read) {
310 err = pohmelfs_data_recv(st, cmd, sizeof(struct dnet_cmd), MSG_WAITALL);
311 if (err < 0) {
312 pohmelfs_print_addr(&st->sa, "recv error: %d\n", err);
313 goto err_out_exit;
316 dnet_convert_cmd(cmd);
318 trans = cmd->trans & ~DNET_TRANS_REPLY;
319 st->cmd_read = 0;
322 t = pohmelfs_trans_lookup(st, cmd);
323 if (!t) {
324 pohmelfs_suck_scratch(st);
326 err = 0;
327 goto err_out_continue;
329 if (cmd->size && (t->recv_offset != cmd->size)) {
330 err = t->cb.recv_reply(t, st);
331 if (err && (err != -EAGAIN)) {
332 pohmelfs_print_addr(&st->sa, "recv-reply error: %d\n", err);
333 goto err_out_remove;
336 if (t->recv_offset != cmd->size)
337 goto err_out_continue_put;
340 err = t->cb.complete(t, st);
341 if (err) {
342 pohmelfs_print_addr(&st->sa, "recv-complete err: %d\n", err);
345 kfree(t->recv_data);
346 t->recv_data = NULL;
347 t->recv_offset = 0;
349 err_out_remove:
350 /* only remove and free transaction if there is error or there will be no more replies */
351 if (!(cmd->flags & DNET_FLAGS_MORE) || err) {
352 pohmelfs_trans_remove(t);
355 * refcnt was grabbed twice:
356 * in pohmelfs_trans_lookup()
357 * and at transaction creation
359 pohmelfs_trans_put(t);
361 st->cmd_read = 1;
362 if (err) {
363 cmd->size -= t->recv_offset;
364 t->recv_offset = 0;
366 err_out_continue_put:
367 pohmelfs_trans_put(t);
368 err_out_continue:
369 if (err && (err != -EAGAIN)) {
370 //pohmelfs_suck_scratch(st);
371 goto err_out_exit;
374 continue;
377 err_out_exit:
378 if (err && err != -EAGAIN)
379 pohmelfs_state_add_reconnect(st);
380 return;
383 struct pohmelfs_state *pohmelfs_addr_exist(struct pohmelfs_connection *conn, struct sockaddr_storage *sa, int addrlen)
385 struct pohmelfs_state *st;
387 list_for_each_entry(st, &conn->state_list, state_entry) {
388 if (st->addrlen != addrlen)
389 continue;
391 if (!memcmp(&st->sa, sa, addrlen)) {
392 return st;
396 return 0;
399 struct pohmelfs_state *pohmelfs_state_create(struct pohmelfs_connection *conn, struct sockaddr_storage *sa, int addrlen,
400 int ask_route, int group_id)
402 int err = 0;
403 struct pohmelfs_state *st;
404 struct sockaddr *addr = (struct sockaddr *)sa;
406 /* early check - this state can be inserted into route table, no need to create state and check again */
407 spin_lock(&conn->state_lock);
408 if (pohmelfs_addr_exist(conn, sa, addrlen))
409 err = -EEXIST;
410 spin_unlock(&conn->state_lock);
412 if (err)
413 goto err_out_exit;
415 st = kzalloc(sizeof(struct pohmelfs_state), GFP_KERNEL);
416 if (!st) {
417 err = -ENOMEM;
418 goto err_out_exit;
421 st->conn = conn;
422 mutex_init(&st->trans_lock);
423 INIT_LIST_HEAD(&st->trans_list);
424 st->trans_root = RB_ROOT;
426 st->group_id = group_id;
428 kref_init(&st->refcnt);
430 INIT_WORK(&st->send_work, pohmelfs_state_send_work);
431 INIT_WORK(&st->recv_work, pohmelfs_state_recv_work);
433 st->cmd_read = 1;
435 err = sock_create_kern(addr->sa_family, SOCK_STREAM, IPPROTO_TCP, &st->sock);
436 if (err) {
437 pohmelfs_print_addr(sa, "sock_create: failed family: %d, err: %d\n", addr->sa_family, err);
438 goto err_out_free;
441 st->sock->sk->sk_allocation = GFP_NOIO;
442 st->sock->sk->sk_sndtimeo = st->sock->sk->sk_rcvtimeo = msecs_to_jiffies(60000);
444 err = 1;
445 sock_setsockopt(st->sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&err, 4);
447 tcp_setsockopt(st->sock->sk, SOL_TCP, TCP_KEEPIDLE, (char *)&conn->psb->keepalive_idle, 4);
448 tcp_setsockopt(st->sock->sk, SOL_TCP, TCP_KEEPINTVL, (char *)&conn->psb->keepalive_interval, 4);
449 tcp_setsockopt(st->sock->sk, SOL_TCP, TCP_KEEPCNT, (char *)&conn->psb->keepalive_cnt, 4);
451 err = kernel_connect(st->sock, (struct sockaddr *)addr, addrlen, 0);
452 if (err) {
453 pohmelfs_print_addr(sa, "kernel_connect: failed family: %d, err: %d\n", addr->sa_family, err);
454 goto err_out_release;
456 st->sock->sk->sk_sndtimeo = st->sock->sk->sk_rcvtimeo = msecs_to_jiffies(60000);
458 memcpy(&st->sa, sa, sizeof(struct sockaddr_storage));
459 st->addrlen = addrlen;
461 err = pohmelfs_poll_init(st);
462 if (err)
463 goto err_out_shutdown;
466 spin_lock(&conn->state_lock);
467 err = -EEXIST;
468 if (!pohmelfs_addr_exist(conn, sa, addrlen)) {
469 list_add_tail(&st->state_entry, &conn->state_list);
470 err = 0;
472 spin_unlock(&conn->state_lock);
474 if (err)
475 goto err_out_poll_exit;
477 if (ask_route) {
478 err = pohmelfs_route_request(st);
479 if (err)
480 goto err_out_poll_exit;
483 pohmelfs_print_addr(sa, "%d: connected\n", st->conn->idx);
485 return st;
487 err_out_poll_exit:
488 pohmelfs_poll_exit(st);
489 err_out_shutdown:
490 st->sock->ops->shutdown(st->sock, 2);
491 err_out_release:
492 sock_release(st->sock);
493 err_out_free:
494 kfree(st);
495 err_out_exit:
496 if (err != -EEXIST) {
497 pohmelfs_print_addr(sa, "state creation failed: %d\n", err);
499 return ERR_PTR(err);
502 static void pohmelfs_state_exit(struct pohmelfs_state *st)
504 if (!st->sock)
505 return;
507 pohmelfs_poll_exit(st);
508 st->sock->ops->shutdown(st->sock, 2);
510 pohmelfs_print_addr(&st->sa, "disconnected\n");
511 sock_release(st->sock);
514 static void pohmelfs_state_release(struct kref *kref)
516 struct pohmelfs_state *st = container_of(kref, struct pohmelfs_state, refcnt);
517 pohmelfs_state_exit(st);
520 void pohmelfs_state_put(struct pohmelfs_state *st)
522 kref_put(&st->refcnt, pohmelfs_state_release);
525 static void pohmelfs_state_clean(struct pohmelfs_state *st)
527 struct pohmelfs_trans *t, *tmp;
529 pohmelfs_route_remove_all(st);
531 mutex_lock(&st->trans_lock);
532 list_for_each_entry_safe(t, tmp, &st->trans_list, trans_entry) {
533 list_del(&t->trans_entry);
534 pohmelfs_trans_put(t);
537 while (1) {
538 struct rb_node *n = rb_first(&st->trans_root);
539 if (!n)
540 break;
542 t = rb_entry(n, struct pohmelfs_trans, trans_node);
543 pohmelfs_trans_put(t);
545 mutex_unlock(&st->trans_lock);
547 cancel_work_sync(&st->send_work);
548 cancel_work_sync(&st->recv_work);
551 void pohmelfs_state_kill(struct pohmelfs_state *st)
553 BUG_ON(!list_empty(&st->state_entry));
555 pohmelfs_state_clean(st);
556 pohmelfs_state_put(st);
559 void pohmelfs_state_schedule(struct pohmelfs_state *st)
561 if (!st->conn->need_exit)
562 queue_work(st->conn->wq, &st->send_work);
565 int pohmelfs_state_add_reconnect(struct pohmelfs_state *st)
567 struct pohmelfs_connection *conn = st->conn;
568 struct pohmelfs_reconnect *r, *tmp;
569 int err = 0;
571 pohmelfs_route_remove_all(st);
574 * Remove state from route table
576 spin_lock(&conn->state_lock);
577 list_move(&st->state_entry, &conn->kill_state_list);
578 spin_unlock(&conn->state_lock);
580 r = kzalloc(sizeof(struct pohmelfs_reconnect), GFP_NOIO);
581 if (!r) {
582 err = -ENOMEM;
583 goto err_out_exit;
586 memcpy(&r->sa, &st->sa, sizeof(struct sockaddr_storage));
587 r->addrlen = st->addrlen;
588 r->group_id = st->group_id;
590 mutex_lock(&conn->reconnect_lock);
591 list_for_each_entry(tmp, &conn->reconnect_list, reconnect_entry) {
592 if (tmp->addrlen != r->addrlen)
593 continue;
595 if (memcmp(&tmp->sa, &r->sa, r->addrlen))
596 continue;
598 err = -EEXIST;
599 break;
602 if (!err) {
603 list_add_tail(&r->reconnect_entry, &conn->reconnect_list);
605 mutex_unlock(&conn->reconnect_lock);
607 if (err)
608 goto err_out_free;
610 /* we do not really care if this work will not be processed immediately */
611 queue_delayed_work(conn->wq, &conn->reconnect_work, 0);
613 pohmelfs_print_addr(&st->sa, "reconnection added\n");
614 err = 0;
615 goto err_out_exit;
617 err_out_free:
618 kfree(r);
619 err_out_exit:
620 return err;