More Makefile cleanups, otherwise mainly noticeable are the netfilter fix
[davej-history.git] / drivers / block / nbd.c
blob7dfb101ab43226091df19c3418de4e0b45e136be
1 /*
2 * Network block device - make block devices work over TCP
4 * Note that you can not swap over this thing, yet. Seems to work but
5 * deadlocks sometimes - you can not swap over TCP in general.
6 *
7 * Copyright 1997-2000 Pavel Machek <pavel@ucw.cz>
8 *
9 * (part of code stolen from loop.c)
11 * 97-3-25 compiled 0-th version, not yet tested it
12 * (it did not work, BTW) (later that day) HEY! it works!
13 * (bit later) hmm, not that much... 2:00am next day:
14 * yes, it works, but it gives something like 50kB/sec
15 * 97-4-01 complete rewrite to make it possible for many requests at
16 * once to be processed
17 * 97-4-11 Making protocol independent of endianity etc.
18 * 97-9-13 Cosmetic changes
19 * 98-5-13 Attempt to make 64-bit-clean on 64-bit machines
20 * 99-1-11 Attempt to make 64-bit-clean on 32-bit machines <ankry@mif.pg.gda.pl>
22 * possible FIXME: make set_sock / set_blksize / set_size / do_it one syscall
23 * why not: would need verify_area and friends, would share yet another
24 * structure with userland
27 #undef NBD_PLUGGABLE
28 #define PARANOIA
29 #include <linux/major.h>
31 #include <linux/module.h>
33 #include <linux/sched.h>
34 #include <linux/fs.h>
35 #include <linux/stat.h>
36 #include <linux/errno.h>
37 #include <linux/file.h>
38 #include <linux/ioctl.h>
39 #include <net/sock.h>
41 #include <linux/devfs_fs_kernel.h>
43 #include <asm/segment.h>
44 #include <asm/uaccess.h>
45 #include <asm/types.h>
47 #define MAJOR_NR NBD_MAJOR
48 #include <linux/nbd.h>
50 #define LO_MAGIC 0x68797548
52 static int nbd_blksizes[MAX_NBD];
53 static int nbd_blksize_bits[MAX_NBD];
54 static int nbd_sizes[MAX_NBD];
55 static u64 nbd_bytesizes[MAX_NBD];
57 static struct nbd_device nbd_dev[MAX_NBD];
58 static devfs_handle_t devfs_handle;
60 #define DEBUG( s )
61 /* #define DEBUG( s ) printk( s )
64 #ifdef PARANOIA
65 static int requests_in;
66 static int requests_out;
67 #endif
69 static void nbd_plug_device(request_queue_t *q, kdev_t dev) { }
71 static int nbd_open(struct inode *inode, struct file *file)
73 int dev;
75 if (!inode)
76 return -EINVAL;
77 dev = MINOR(inode->i_rdev);
78 if (dev >= MAX_NBD)
79 return -ENODEV;
81 nbd_dev[dev].refcnt++;
82 MOD_INC_USE_COUNT;
83 return 0;
87 * Send or receive packet.
89 static int nbd_xmit(int send, struct socket *sock, char *buf, int size)
91 mm_segment_t oldfs;
92 int result;
93 struct msghdr msg;
94 struct iovec iov;
95 unsigned long flags;
96 sigset_t oldset;
98 oldfs = get_fs();
99 set_fs(get_ds());
101 spin_lock_irqsave(&current->sigmask_lock, flags);
102 oldset = current->blocked;
103 sigfillset(&current->blocked);
104 recalc_sigpending(current);
105 spin_unlock_irqrestore(&current->sigmask_lock, flags);
108 do {
109 sock->sk->allocation = GFP_BUFFER;
110 iov.iov_base = buf;
111 iov.iov_len = size;
112 msg.msg_name = NULL;
113 msg.msg_namelen = 0;
114 msg.msg_iov = &iov;
115 msg.msg_iovlen = 1;
116 msg.msg_control = NULL;
117 msg.msg_controllen = 0;
118 msg.msg_namelen = 0;
119 msg.msg_flags = 0;
121 if (send)
122 result = sock_sendmsg(sock, &msg, size);
123 else
124 result = sock_recvmsg(sock, &msg, size, 0);
126 if (result <= 0) {
127 #ifdef PARANOIA
128 printk(KERN_ERR "NBD: %s - sock=%ld at buf=%ld, size=%d returned %d.\n",
129 send ? "send" : "receive", (long) sock, (long) buf, size, result);
130 #endif
131 break;
133 size -= result;
134 buf += result;
135 } while (size > 0);
137 spin_lock_irqsave(&current->sigmask_lock, flags);
138 current->blocked = oldset;
139 recalc_sigpending(current);
140 spin_unlock_irqrestore(&current->sigmask_lock, flags);
142 set_fs(oldfs);
143 return result;
146 #define FAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); goto error_out; }
148 void nbd_send_req(struct socket *sock, struct request *req)
150 int result;
151 struct nbd_request request;
153 DEBUG("NBD: sending control, ");
154 request.magic = htonl(NBD_REQUEST_MAGIC);
155 request.type = htonl(req->cmd);
156 request.from = cpu_to_be64( (u64) req->sector << 9);
157 request.len = htonl(req->current_nr_sectors << 9);
158 memcpy(request.handle, &req, sizeof(req));
160 result = nbd_xmit(1, sock, (char *) &request, sizeof(request));
161 if (result <= 0)
162 FAIL("Sendmsg failed for control.");
164 if (req->cmd == WRITE) {
165 DEBUG("data, ");
166 result = nbd_xmit(1, sock, req->buffer, req->current_nr_sectors << 9);
167 if (result <= 0)
168 FAIL("Send data failed.");
170 return;
172 error_out:
173 req->errors++;
176 #define HARDFAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); lo->harderror = result; return NULL; }
177 struct request *nbd_read_stat(struct nbd_device *lo)
178 /* NULL returned = something went wrong, inform userspace */
180 int result;
181 struct nbd_reply reply;
182 struct request *xreq, *req;
184 DEBUG("reading control, ");
185 reply.magic = 0;
186 result = nbd_xmit(0, lo->sock, (char *) &reply, sizeof(reply));
187 if (result <= 0)
188 HARDFAIL("Recv control failed.");
189 memcpy(&xreq, reply.handle, sizeof(xreq));
190 req = blkdev_entry_prev_request(&lo->queue_head);
192 if (xreq != req)
193 FAIL("Unexpected handle received.\n");
195 DEBUG("ok, ");
196 if (ntohl(reply.magic) != NBD_REPLY_MAGIC)
197 HARDFAIL("Not enough magic.");
198 if (ntohl(reply.error))
199 FAIL("Other side returned error.");
200 if (req->cmd == READ) {
201 DEBUG("data, ");
202 result = nbd_xmit(0, lo->sock, req->buffer, req->current_nr_sectors << 9);
203 if (result <= 0)
204 HARDFAIL("Recv data failed.");
206 DEBUG("done.\n");
207 return req;
209 /* Can we get here? Yes, if other side returns error */
210 error_out:
211 req->errors++;
212 return req;
215 void nbd_do_it(struct nbd_device *lo)
217 struct request *req;
218 int dequeued;
220 down (&lo->queue_lock);
221 while (1) {
222 up (&lo->queue_lock);
223 req = nbd_read_stat(lo);
224 down (&lo->queue_lock);
226 if (!req) {
227 printk(KERN_ALERT "req should never be null\n" );
228 goto out;
230 #ifdef PARANOIA
231 if (req != blkdev_entry_prev_request(&lo->queue_head)) {
232 printk(KERN_ALERT "NBD: I have problem...\n");
234 if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
235 printk(KERN_ALERT "NBD: request corrupted!\n");
236 continue;
238 if (lo->magic != LO_MAGIC) {
239 printk(KERN_ALERT "NBD: nbd_dev[] corrupted: Not enough magic\n");
240 goto out;
242 #endif
243 list_del(&req->queue);
244 up (&lo->queue_lock);
246 dequeued = nbd_end_request(req);
248 down (&lo->queue_lock);
249 if (!dequeued)
250 list_add(&req->queue, &lo->queue_head);
252 out:
253 up (&lo->queue_lock);
256 void nbd_clear_que(struct nbd_device *lo)
258 struct request *req;
259 int dequeued;
261 #ifdef PARANOIA
262 if (lo->magic != LO_MAGIC) {
263 printk(KERN_ERR "NBD: nbd_dev[] corrupted: Not enough magic when clearing!\n");
264 return;
266 #endif
268 while (!list_empty(&lo->queue_head)) {
269 req = blkdev_entry_prev_request(&lo->queue_head);
270 #ifdef PARANOIA
271 if (!req) {
272 printk( KERN_ALERT "NBD: panic, panic, panic\n" );
273 break;
275 if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
276 printk(KERN_ALERT "NBD: request corrupted when clearing!\n");
277 continue;
279 #endif
280 req->errors++;
281 list_del(&req->queue);
282 up(&lo->queue_lock);
284 dequeued = nbd_end_request(req);
286 down(&lo->queue_lock);
287 if (!dequeued)
288 list_add(&req->queue, &lo->queue_head);
293 * We always wait for result of write, for now. It would be nice to make it optional
294 * in future
295 * if ((req->cmd == WRITE) && (lo->flags & NBD_WRITE_NOCHK))
296 * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
299 #undef FAIL
300 #define FAIL( s ) { printk( KERN_ERR "NBD, minor %d: " s "\n", dev ); goto error_out; }
302 static void do_nbd_request(request_queue_t * q)
304 struct request *req;
305 int dev = 0;
306 struct nbd_device *lo;
308 while (!QUEUE_EMPTY) {
309 req = CURRENT;
310 #ifdef PARANOIA
311 if (!req)
312 FAIL("que not empty but no request?");
313 #endif
314 dev = MINOR(req->rq_dev);
315 #ifdef PARANOIA
316 if (dev >= MAX_NBD)
317 FAIL("Minor too big."); /* Probably can not happen */
318 #endif
319 lo = &nbd_dev[dev];
320 if (!lo->file)
321 FAIL("Request when not-ready.");
322 if ((req->cmd == WRITE) && (lo->flags & NBD_READ_ONLY))
323 FAIL("Write on read-only");
324 #ifdef PARANOIA
325 if (lo->magic != LO_MAGIC)
326 FAIL("nbd[] is not magical!");
327 requests_in++;
328 #endif
329 req->errors = 0;
330 blkdev_dequeue_request(req);
331 spin_unlock_irq(&io_request_lock);
333 down (&lo->queue_lock);
334 list_add(&req->queue, &lo->queue_head);
335 nbd_send_req(lo->sock, req); /* Why does this block? */
336 up (&lo->queue_lock);
338 spin_lock_irq(&io_request_lock);
339 continue;
341 error_out:
342 req->errors++;
343 blkdev_dequeue_request(req);
344 spin_unlock(&io_request_lock);
345 nbd_end_request(req);
346 spin_lock(&io_request_lock);
348 return;
351 static int nbd_ioctl(struct inode *inode, struct file *file,
352 unsigned int cmd, unsigned long arg)
354 struct nbd_device *lo;
355 int dev, error, temp;
356 struct request sreq ;
358 /* Anyone capable of this syscall can do *real bad* things */
360 if (!capable(CAP_SYS_ADMIN))
361 return -EPERM;
362 if (!inode)
363 return -EINVAL;
364 dev = MINOR(inode->i_rdev);
365 if (dev >= MAX_NBD)
366 return -ENODEV;
368 lo = &nbd_dev[dev];
369 switch (cmd) {
370 case NBD_DISCONNECT:
371 printk("NBD_DISCONNECT\n") ;
372 sreq.cmd=2 ; /* shutdown command */
373 if (!lo->sock) return -EINVAL ;
374 nbd_send_req(lo->sock,&sreq) ;
375 return 0 ;
377 case NBD_CLEAR_SOCK:
378 down(&lo->queue_lock);
379 nbd_clear_que(lo);
380 if (!list_empty(&lo->queue_head)) {
381 up(&lo->queue_lock);
382 printk(KERN_ERR "nbd: Some requests are in progress -> can not turn off.\n");
383 return -EBUSY;
385 up(&lo->queue_lock);
386 file = lo->file;
387 if (!file)
388 return -EINVAL;
389 lo->file = NULL;
390 lo->sock = NULL;
391 fput(file);
392 return 0;
393 case NBD_SET_SOCK:
394 if (lo->file)
395 return -EBUSY;
396 error = -EINVAL;
397 file = fget(arg);
398 if (file) {
399 inode = file->f_dentry->d_inode;
400 /* N.B. Should verify that it's a socket */
401 lo->file = file;
402 lo->sock = &inode->u.socket_i;
403 error = 0;
405 return error;
406 case NBD_SET_BLKSIZE:
407 if ((arg & (arg-1)) || (arg < 512) || (arg > PAGE_SIZE))
408 return -EINVAL;
409 nbd_blksizes[dev] = arg;
410 temp = arg >> 9;
411 nbd_blksize_bits[dev] = 9;
412 while (temp > 1) {
413 nbd_blksize_bits[dev]++;
414 temp >>= 1;
416 nbd_sizes[dev] = nbd_bytesizes[dev] >> nbd_blksize_bits[dev];
417 nbd_bytesizes[dev] = nbd_sizes[dev] << nbd_blksize_bits[dev];
418 return 0;
419 case NBD_SET_SIZE:
420 nbd_sizes[dev] = arg >> nbd_blksize_bits[dev];
421 nbd_bytesizes[dev] = nbd_sizes[dev] << nbd_blksize_bits[dev];
422 return 0;
423 case NBD_SET_SIZE_BLOCKS:
424 nbd_sizes[dev] = arg;
425 nbd_bytesizes[dev] = ((u64) arg) << nbd_blksize_bits[dev];
426 return 0;
427 case NBD_DO_IT:
428 if (!lo->file)
429 return -EINVAL;
430 nbd_do_it(lo);
431 return lo->harderror;
432 case NBD_CLEAR_QUE:
433 nbd_clear_que(lo);
434 return 0;
435 #ifdef PARANOIA
436 case NBD_PRINT_DEBUG:
437 printk(KERN_INFO "NBD device %d: next = %p, prev = %p. Global: in %d, out %d\n",
438 dev, lo->queue_head.next, lo->queue_head.prev, requests_in, requests_out);
439 return 0;
440 #endif
441 case BLKGETSIZE:
442 return put_user(nbd_bytesizes[dev] >> 9, (long *) arg);
444 return -EINVAL;
447 static int nbd_release(struct inode *inode, struct file *file)
449 struct nbd_device *lo;
450 int dev;
452 if (!inode)
453 return -ENODEV;
454 dev = MINOR(inode->i_rdev);
455 if (dev >= MAX_NBD)
456 return -ENODEV;
457 lo = &nbd_dev[dev];
458 if (lo->refcnt <= 0)
459 printk(KERN_ALERT "nbd_release: refcount(%d) <= 0\n", lo->refcnt);
460 lo->refcnt--;
461 /* N.B. Doesn't lo->file need an fput?? */
462 MOD_DEC_USE_COUNT;
463 return 0;
466 static struct block_device_operations nbd_fops =
468 open: nbd_open,
469 release: nbd_release,
470 ioctl: nbd_ioctl,
474 * And here should be modules and kernel interface
475 * (Just smiley confuses emacs :-)
478 #ifdef MODULE
479 #define nbd_init init_module
480 #endif
482 int nbd_init(void)
484 int i;
486 if (sizeof(struct nbd_request) != 28) {
487 printk(KERN_CRIT "Sizeof nbd_request needs to be 28 in order to work!\n" );
488 return -EIO;
491 if (register_blkdev(MAJOR_NR, "nbd", &nbd_fops)) {
492 printk("Unable to get major number %d for NBD\n",
493 MAJOR_NR);
494 return -EIO;
496 #ifdef MODULE
497 printk("nbd: registered device at major %d\n", MAJOR_NR);
498 #endif
499 blksize_size[MAJOR_NR] = nbd_blksizes;
500 blk_size[MAJOR_NR] = nbd_sizes;
501 blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request);
502 #ifndef NBD_PLUGGABLE
503 blk_queue_pluggable(BLK_DEFAULT_QUEUE(MAJOR_NR), nbd_plug_device);
504 #endif
505 blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
506 for (i = 0; i < MAX_NBD; i++) {
507 nbd_dev[i].refcnt = 0;
508 nbd_dev[i].file = NULL;
509 nbd_dev[i].magic = LO_MAGIC;
510 nbd_dev[i].flags = 0;
511 INIT_LIST_HEAD(&nbd_dev[i].queue_head);
512 init_MUTEX(&nbd_dev[i].queue_lock);
513 nbd_blksizes[i] = 1024;
514 nbd_blksize_bits[i] = 10;
515 nbd_bytesizes[i] = 0x7ffffc00; /* 2GB */
516 nbd_sizes[i] = nbd_bytesizes[i] >> nbd_blksize_bits[i];
517 register_disk(NULL, MKDEV(MAJOR_NR,i), 1, &nbd_fops,
518 nbd_bytesizes[i]>>9);
520 devfs_handle = devfs_mk_dir (NULL, "nbd", NULL);
521 devfs_register_series (devfs_handle, "%u", MAX_NBD,
522 DEVFS_FL_DEFAULT, MAJOR_NR, 0,
523 S_IFBLK | S_IRUSR | S_IWUSR,
524 &nbd_fops, NULL);
526 return 0;
529 #ifdef MODULE
530 void cleanup_module(void)
532 devfs_unregister (devfs_handle);
533 blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
535 if (unregister_blkdev(MAJOR_NR, "nbd") != 0)
536 printk("nbd: cleanup_module failed\n");
537 else
538 printk("nbd: module cleaned up.\n");
540 #endif