2 * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
16 #include <linux/blkdev.h>
17 #include <linux/bio.h>
18 #include <linux/dst.h>
20 #include <linux/in6.h>
21 #include <linux/poll.h>
22 #include <linux/slab.h>
23 #include <linux/socket.h>
28 * Export bioset is used for server block IO requests.
30 static struct bio_set
*dst_bio_set
;
32 int __init
dst_export_init(void)
36 dst_bio_set
= bioset_create(32, sizeof(struct dst_export_priv
));
46 void dst_export_exit(void)
48 bioset_free(dst_bio_set
);
52 * When client connects and autonegotiates with the server node,
53 * its permissions are checked in a security attributes and sent
56 static unsigned int dst_check_permissions(struct dst_state
*main
,
59 struct dst_node
*n
= main
->node
;
60 struct dst_secure
*sentry
;
61 struct dst_secure_user
*s
;
62 struct saddr
*sa
= &st
->ctl
.addr
;
63 unsigned int perm
= 0;
65 mutex_lock(&n
->security_lock
);
66 list_for_each_entry(sentry
, &n
->security_list
, sec_entry
) {
69 if (s
->addr
.sa_family
!= sa
->sa_family
)
72 if (s
->addr
.sa_data_len
!= sa
->sa_data_len
)
76 * This '2' below is a port field. This may be very wrong to do
77 * in atalk for example though. If there will be any need
78 * to extent protocol to something else, I can create
79 * per-family helpers and use them instead of this memcmp.
81 if (memcmp(s
->addr
.sa_data
+ 2, sa
->sa_data
+ 2,
85 perm
= s
->permissions
;
87 mutex_unlock(&n
->security_lock
);
93 * Accept new client: allocate appropriate network state and check permissions.
95 static struct dst_state
*dst_accept_client(struct dst_state
*st
)
97 unsigned int revents
= 0;
98 unsigned int err_mask
= POLLERR
| POLLHUP
| POLLRDHUP
;
99 unsigned int mask
= err_mask
| POLLIN
;
100 struct dst_node
*n
= st
->node
;
102 struct socket
*sock
= NULL
;
103 struct dst_state
*new;
105 while (!err
&& !sock
) {
106 revents
= dst_state_poll(st
);
108 if (!(revents
& mask
)) {
112 prepare_to_wait(&st
->thread_wait
,
113 &wait
, TASK_INTERRUPTIBLE
);
114 if (!n
->trans_scan_timeout
|| st
->need_exit
)
117 revents
= dst_state_poll(st
);
122 if (signal_pending(current
))
126 * Magic HZ? Polling check above is not safe in
127 * all cases (like socket reset in BH context),
128 * so it is simpler just to postpone it to the
129 * process context instead of implementing
130 * special locking there.
132 schedule_timeout(HZ
);
134 finish_wait(&st
->thread_wait
, &wait
);
140 dprintk("%s: st: %p, revents: %x [err: %d, in: %d].\n",
141 __func__
, st
, revents
, revents
& err_mask
,
144 if (revents
& err_mask
) {
145 dprintk("%s: revents: %x, socket: %p, err: %d.\n",
146 __func__
, revents
, st
->socket
, err
);
150 if (!n
->trans_scan_timeout
|| st
->need_exit
)
153 if (st
->socket
&& (revents
& POLLIN
))
154 err
= kernel_accept(st
->socket
, &sock
, 0);
156 dst_state_unlock(st
);
162 new = dst_state_alloc(st
->node
);
165 goto err_out_release
;
169 new->ctl
.addr
.sa_data_len
= sizeof(struct sockaddr
);
170 err
= kernel_getpeername(sock
, (struct sockaddr
*)&new->ctl
.addr
,
171 (int *)&new->ctl
.addr
.sa_data_len
);
175 new->permissions
= dst_check_permissions(st
, new);
176 if (new->permissions
== 0) {
178 dst_dump_addr(sock
, (struct sockaddr
*)&new->ctl
.addr
,
179 "Client is not allowed to connect");
183 err
= dst_poll_init(new);
187 dst_dump_addr(sock
, (struct sockaddr
*)&new->ctl
.addr
,
201 * Each server's block request sometime finishes.
202 * Usually it happens in hard irq context of the appropriate controller,
203 * so to play good with all cases we just queue BIO into the queue
204 * and wake up processing thread, which gets completed request and
205 * send (encrypting if needed) it back to the client (if it was a read
206 * request), or sends back reply that writing successfully completed.
208 static int dst_export_process_request_queue(struct dst_state
*st
)
211 struct dst_export_priv
*p
= NULL
;
215 while (!list_empty(&st
->request_list
)) {
216 spin_lock_irqsave(&st
->request_lock
, flags
);
217 if (!list_empty(&st
->request_list
)) {
218 p
= list_first_entry(&st
->request_list
,
219 struct dst_export_priv
, request_entry
);
220 list_del(&p
->request_entry
);
222 spin_unlock_irqrestore(&st
->request_lock
, flags
);
229 if (dst_need_crypto(st
->node
) && (bio_data_dir(bio
) == READ
))
230 err
= dst_export_crypto(st
->node
, bio
);
232 err
= dst_export_send_bio(bio
);
242 * Cleanup export state.
243 * It has to wait until all requests are finished,
244 * and then free them all.
246 static void dst_state_cleanup_export(struct dst_state
*st
)
248 struct dst_export_priv
*p
;
252 * This loop waits for all pending bios to be completed and freed.
254 while (atomic_read(&st
->refcnt
) > 1) {
255 dprintk("%s: st: %p, refcnt: %d, list_empty: %d.\n",
256 __func__
, st
, atomic_read(&st
->refcnt
),
257 list_empty(&st
->request_list
));
258 wait_event_timeout(st
->thread_wait
,
259 (atomic_read(&st
->refcnt
) == 1) ||
260 !list_empty(&st
->request_list
),
263 while (!list_empty(&st
->request_list
)) {
265 spin_lock_irqsave(&st
->request_lock
, flags
);
266 if (!list_empty(&st
->request_list
)) {
267 p
= list_first_entry(&st
->request_list
,
268 struct dst_export_priv
, request_entry
);
269 list_del(&p
->request_entry
);
271 spin_unlock_irqrestore(&st
->request_lock
, flags
);
276 dprintk("%s: st: %p, refcnt: %d, list_empty: %d, p: "
277 "%p.\n", __func__
, st
, atomic_read(&st
->refcnt
),
278 list_empty(&st
->request_list
), p
);
286 * Client accepting thread.
287 * Not only accepts new connection, but also schedules receiving thread
288 * and performs request completion described above.
290 static int dst_accept(void *init_data
, void *schedule_data
)
292 struct dst_state
*main_st
= schedule_data
;
293 struct dst_node
*n
= init_data
;
294 struct dst_state
*st
;
297 while (n
->trans_scan_timeout
&& !main_st
->need_exit
) {
298 dprintk("%s: main_st: %p, n: %p.\n", __func__
, main_st
, n
);
299 st
= dst_accept_client(main_st
);
303 err
= dst_state_schedule_receiver(st
);
305 while (n
->trans_scan_timeout
) {
306 err
= wait_event_interruptible_timeout(st
->thread_wait
,
307 !list_empty(&st
->request_list
) ||
308 !n
->trans_scan_timeout
||
312 if (!n
->trans_scan_timeout
|| st
->need_exit
)
315 if (list_empty(&st
->request_list
))
318 err
= dst_export_process_request_queue(st
);
324 wake_up(&st
->thread_wait
);
327 dst_state_cleanup_export(st
);
330 dprintk("%s: freeing listening socket st: %p.\n", __func__
, main_st
);
332 dst_state_lock(main_st
);
333 dst_poll_exit(main_st
);
334 dst_state_socket_release(main_st
);
335 dst_state_unlock(main_st
);
336 dst_state_put(main_st
);
337 dprintk("%s: freed listening socket st: %p.\n", __func__
, main_st
);
342 int dst_start_export(struct dst_node
*n
)
344 if (list_empty(&n
->security_list
)) {
345 printk(KERN_ERR
"You are trying to export node '%s' "
346 "without security attributes.\nNo clients will "
347 "be allowed to connect. Exiting.\n", n
->name
);
350 return dst_node_trans_init(n
, sizeof(struct dst_export_priv
));
354 * Initialize listening state and schedule accepting thread.
356 int dst_node_init_listened(struct dst_node
*n
, struct dst_export_ctl
*le
)
358 struct dst_state
*st
;
360 struct dst_network_ctl
*ctl
= &le
->ctl
;
362 memcpy(&n
->info
->net
, ctl
, sizeof(struct dst_network_ctl
));
364 st
= dst_state_alloc(n
);
369 memcpy(&st
->ctl
, ctl
, sizeof(struct dst_network_ctl
));
371 err
= dst_state_socket_create(st
);
375 st
->socket
->sk
->sk_reuse
= 1;
377 err
= kernel_bind(st
->socket
, (struct sockaddr
*)&ctl
->addr
,
378 ctl
->addr
.sa_data_len
);
380 goto err_out_socket_release
;
382 err
= kernel_listen(st
->socket
, 1024);
384 goto err_out_socket_release
;
387 err
= dst_poll_init(st
);
389 goto err_out_socket_release
;
393 err
= thread_pool_schedule(n
->pool
, dst_thread_setup
,
394 dst_accept
, st
, MAX_SCHEDULE_TIMEOUT
);
396 goto err_out_poll_exit
;
402 err_out_socket_release
:
403 dst_state_socket_release(st
);
412 * Free bio and related private data.
413 * Also drop a reference counter for appropriate state,
414 * which waits when there are no more block IOs in-flight.
416 static void dst_bio_destructor(struct bio
*bio
)
419 struct dst_export_priv
*priv
= bio
->bi_private
;
422 bio_for_each_segment(bv
, bio
, i
) {
426 __free_page(bv
->bv_page
);
430 dst_state_put(priv
->state
);
431 bio_free(bio
, dst_bio_set
);
435 * Block IO completion. Queue request to be sent back to
436 * the client (or just confirmation).
438 static void dst_bio_end_io(struct bio
*bio
, int err
)
440 struct dst_export_priv
*p
= bio
->bi_private
;
441 struct dst_state
*st
= p
->state
;
444 spin_lock_irqsave(&st
->request_lock
, flags
);
445 list_add_tail(&p
->request_entry
, &st
->request_list
);
446 spin_unlock_irqrestore(&st
->request_lock
, flags
);
448 wake_up(&st
->thread_wait
);
452 * Allocate read request for the server.
454 static int dst_export_read_request(struct bio
*bio
, unsigned int total_size
)
462 page
= alloc_page(GFP_KERNEL
);
466 size
= min_t(unsigned int, PAGE_SIZE
, total_size
);
468 err
= bio_add_page(bio
, page
, size
, 0);
469 dprintk("%s: bio: %llu/%u, size: %u, err: %d.\n",
470 __func__
, (u64
)bio
->bi_sector
, bio
->bi_size
,
473 goto err_out_free_page
;
487 * Allocate write request for the server.
488 * Should not only get pages, but also read data from the network.
490 static int dst_export_write_request(struct dst_state
*st
,
491 struct bio
*bio
, unsigned int total_size
)
500 page
= alloc_page(GFP_KERNEL
);
506 goto err_out_free_page
;
508 size
= min_t(unsigned int, PAGE_SIZE
, total_size
);
510 err
= dst_data_recv(st
, data
, size
);
512 goto err_out_unmap_page
;
514 err
= bio_add_page(bio
, page
, size
, 0);
516 goto err_out_unmap_page
;
534 * Groovy, we've gotten an IO request from the client.
535 * Allocate BIO from the bioset, private data from the mempool
536 * and lots of pages for IO.
538 int dst_process_io(struct dst_state
*st
)
540 struct dst_node
*n
= st
->node
;
541 struct dst_cmd
*cmd
= st
->data
;
543 struct dst_export_priv
*priv
;
546 if (unlikely(!n
->bdev
)) {
551 bio
= bio_alloc_bioset(GFP_KERNEL
,
552 PAGE_ALIGN(cmd
->size
) >> PAGE_SHIFT
,
557 priv
= (struct dst_export_priv
*)(((void *)bio
) -
558 sizeof (struct dst_export_priv
));
560 priv
->state
= dst_state_get(st
);
563 bio
->bi_private
= priv
;
564 bio
->bi_end_io
= dst_bio_end_io
;
565 bio
->bi_destructor
= dst_bio_destructor
;
566 bio
->bi_bdev
= n
->bdev
;
569 * Server side is only interested in two low bits:
570 * uptodate (set by itself actually) and rw block
572 bio
->bi_flags
|= cmd
->flags
& 3;
574 bio
->bi_rw
= cmd
->rw
;
576 bio
->bi_sector
= cmd
->sector
;
578 dst_bio_to_cmd(bio
, &priv
->cmd
, DST_IO_RESPONSE
, cmd
->id
);
581 priv
->cmd
.size
= cmd
->size
;
583 if (bio_data_dir(bio
) == WRITE
) {
584 err
= dst_recv_cdata(st
, priv
->cmd
.hash
);
588 err
= dst_export_write_request(st
, bio
, cmd
->size
);
592 if (dst_need_crypto(n
))
593 return dst_export_crypto(n
, bio
);
595 err
= dst_export_read_request(bio
, cmd
->size
);
600 dprintk("%s: bio: %llu/%u, rw: %lu, dir: %lu, flags: %lx, phys: %d.\n",
601 __func__
, (u64
)bio
->bi_sector
, bio
->bi_size
,
602 bio
->bi_rw
, bio_data_dir(bio
),
603 bio
->bi_flags
, bio
->bi_phys_segments
);
605 generic_make_request(bio
);
616 * Ok, block IO is ready, let's send it back to the client...
618 int dst_export_send_bio(struct bio
*bio
)
620 struct dst_export_priv
*p
= bio
->bi_private
;
621 struct dst_state
*st
= p
->state
;
622 struct dst_cmd
*cmd
= &p
->cmd
;
625 dprintk("%s: id: %llu, bio: %llu/%u, csize: %u, flags: %lu, rw: %lu.\n",
626 __func__
, cmd
->id
, (u64
)bio
->bi_sector
, bio
->bi_size
,
627 cmd
->csize
, bio
->bi_flags
, bio
->bi_rw
);
629 dst_convert_cmd(cmd
);
637 if (bio_data_dir(bio
) == WRITE
) {
638 /* ... or just confirmation that writing has completed. */
639 cmd
->size
= cmd
->csize
= 0;
640 err
= dst_data_send_header(st
->socket
, cmd
,
641 sizeof(struct dst_cmd
), 0);
645 err
= dst_send_bio(st
, cmd
, bio
);
650 dst_state_unlock(st
);
656 dst_state_unlock(st
);