Import 2.3.50pre1
[davej-history.git] / drivers / block / nbd.c
blob65e7bfdc48e2c0351d7980e5a28928c1976ce0d3
1 /*
2 * Network block device - make block devices work over TCP
4 * Note that you can not swap over this thing, yet. Seems to work but
5 * deadlocks sometimes - you can not swap over TCP in general.
6 *
7 * Copyright 1997 Pavel Machek <pavel@atrey.karlin.mff.cuni.cz>
8 *
9 * (part of code stolen from loop.c)
11 * 97-3-25 compiled 0-th version, not yet tested it
12 * (it did not work, BTW) (later that day) HEY! it works!
13 * (bit later) hmm, not that much... 2:00am next day:
14 * yes, it works, but it gives something like 50kB/sec
15 * 97-4-01 complete rewrite to make it possible for many requests at
16 * once to be processed
17 * 97-4-11 Making protocol independent of endianity etc.
18 * 97-9-13 Cosmetic changes
19 * 98-5-13 Attempt to make 64-bit-clean on 64-bit machines
20 * 99-1-11 Attempt to make 64-bit-clean on 32-bit machines <ankry@mif.pg.gda.pl>
22 * possible FIXME: make set_sock / set_blksize / set_size / do_it one syscall
23 * why not: would need verify_area and friends, would share yet another
24 * structure with userland
27 #define PARANOIA
28 #include <linux/major.h>
30 #include <linux/module.h>
32 #include <linux/sched.h>
33 #include <linux/fs.h>
34 #include <linux/stat.h>
35 #include <linux/errno.h>
36 #include <linux/file.h>
37 #include <linux/ioctl.h>
38 #include <net/sock.h>
40 #include <asm/segment.h>
41 #include <asm/uaccess.h>
42 #include <asm/types.h>
44 #define MAJOR_NR NBD_MAJOR
45 #include <linux/nbd.h>
47 #define LO_MAGIC 0x68797548
49 static int nbd_blksizes[MAX_NBD];
50 static int nbd_blksize_bits[MAX_NBD];
51 static int nbd_sizes[MAX_NBD];
52 static u64 nbd_bytesizes[MAX_NBD];
54 static struct nbd_device nbd_dev[MAX_NBD];
56 #define DEBUG( s )
57 /* #define DEBUG( s ) printk( s )
60 #ifdef PARANOIA
61 static int requests_in;
62 static int requests_out;
63 #endif
65 static int nbd_open(struct inode *inode, struct file *file)
67 int dev;
68 struct nbd_device *nbdev;
70 if (!inode)
71 return -EINVAL;
72 dev = MINOR(inode->i_rdev);
73 if (dev >= MAX_NBD)
74 return -ENODEV;
76 nbdev = &nbd_dev[dev];
77 nbd_dev[dev].refcnt++;
78 if (!(nbdev->flags & NBD_INITIALISED)) {
79 init_MUTEX(&nbdev->queue_lock);
80 INIT_LIST_HEAD(&nbdev->queue_head);
81 nbdev->flags |= NBD_INITIALISED;
83 MOD_INC_USE_COUNT;
84 return 0;
88 * Send or receive packet.
90 static int nbd_xmit(int send, struct socket *sock, char *buf, int size)
92 mm_segment_t oldfs;
93 int result;
94 struct msghdr msg;
95 struct iovec iov;
96 unsigned long flags;
97 sigset_t oldset;
99 oldfs = get_fs();
100 set_fs(get_ds());
102 spin_lock_irqsave(&current->sigmask_lock, flags);
103 oldset = current->blocked;
104 sigfillset(&current->blocked);
105 recalc_sigpending(current);
106 spin_unlock_irqrestore(&current->sigmask_lock, flags);
109 do {
110 sock->sk->allocation = GFP_ATOMIC;
111 iov.iov_base = buf;
112 iov.iov_len = size;
113 msg.msg_name = NULL;
114 msg.msg_namelen = 0;
115 msg.msg_iov = &iov;
116 msg.msg_iovlen = 1;
117 msg.msg_control = NULL;
118 msg.msg_controllen = 0;
119 msg.msg_namelen = 0;
120 msg.msg_flags = 0;
122 if (send)
123 result = sock_sendmsg(sock, &msg, size);
124 else
125 result = sock_recvmsg(sock, &msg, size, 0);
127 if (result <= 0) {
128 #ifdef PARANOIA
129 printk(KERN_ERR "NBD: %s - sock=%ld at buf=%ld, size=%d returned %d.\n",
130 send ? "send" : "receive", (long) sock, (long) buf, size, result);
131 #endif
132 break;
134 size -= result;
135 buf += result;
136 } while (size > 0);
138 spin_lock_irqsave(&current->sigmask_lock, flags);
139 current->blocked = oldset;
140 recalc_sigpending(current);
141 spin_unlock_irqrestore(&current->sigmask_lock, flags);
143 set_fs(oldfs);
144 return result;
147 #define FAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); goto error_out; }
149 void nbd_send_req(struct socket *sock, struct request *req)
151 int result;
152 struct nbd_request request;
154 DEBUG("NBD: sending control, ");
155 request.magic = htonl(NBD_REQUEST_MAGIC);
156 request.type = htonl(req->cmd);
157 request.from = cpu_to_be64( (u64) req->sector << 9);
158 request.len = htonl(req->current_nr_sectors << 9);
159 memcpy(request.handle, &req, sizeof(req));
161 result = nbd_xmit(1, sock, (char *) &request, sizeof(request));
162 if (result <= 0)
163 FAIL("Sendmsg failed for control.");
165 if (req->cmd == WRITE) {
166 DEBUG("data, ");
167 result = nbd_xmit(1, sock, req->buffer, req->current_nr_sectors << 9);
168 if (result <= 0)
169 FAIL("Send data failed.");
171 return;
173 error_out:
174 req->errors++;
177 #define HARDFAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); lo->harderror = result; return NULL; }
178 struct request *nbd_read_stat(struct nbd_device *lo)
179 /* NULL returned = something went wrong, inform userspace */
181 int result;
182 struct nbd_reply reply;
183 struct request *xreq, *req;
185 DEBUG("reading control, ");
186 reply.magic = 0;
187 result = nbd_xmit(0, lo->sock, (char *) &reply, sizeof(reply));
188 if (result <= 0)
189 HARDFAIL("Recv control failed.");
190 memcpy(&xreq, reply.handle, sizeof(xreq));
191 req = blkdev_entry_prev_request(&lo->queue_head);
193 if (xreq != req)
194 FAIL("Unexpected handle received.\n");
196 DEBUG("ok, ");
197 if (ntohl(reply.magic) != NBD_REPLY_MAGIC)
198 HARDFAIL("Not enough magic.");
199 if (ntohl(reply.error))
200 FAIL("Other side returned error.");
201 if (req->cmd == READ) {
202 DEBUG("data, ");
203 result = nbd_xmit(0, lo->sock, req->buffer, req->current_nr_sectors << 9);
204 if (result <= 0)
205 HARDFAIL("Recv data failed.");
207 DEBUG("done.\n");
208 return req;
210 /* Can we get here? Yes, if other side returns error */
211 error_out:
212 req->errors++;
213 return req;
216 void nbd_do_it(struct nbd_device *lo)
218 struct request *req;
220 down (&lo->queue_lock);
221 while (!list_empty(&lo->queue_head)) {
222 req = nbd_read_stat(lo);
223 if (!req)
224 goto out;
225 #ifdef PARANOIA
226 if (req != blkdev_entry_prev_request(&lo->queue_head)) {
227 printk(KERN_ALERT "NBD: I have problem...\n");
229 if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
230 printk(KERN_ALERT "NBD: request corrupted!\n");
231 continue;
233 if (lo->magic != LO_MAGIC) {
234 printk(KERN_ALERT "NBD: nbd_dev[] corrupted: Not enough magic\n");
235 goto out;
237 #endif
238 list_del(&req->queue);
239 up (&lo->queue_lock);
241 nbd_end_request(req);
243 down (&lo->queue_lock);
245 out:
246 up (&lo->queue_lock);
249 void nbd_clear_que(struct nbd_device *lo)
251 struct request *req;
253 while (!list_empty(&lo->queue_head)) {
254 req = blkdev_entry_prev_request(&lo->queue_head);
255 #ifdef PARANOIA
256 if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
257 printk(KERN_ALERT "NBD: request corrupted when clearing!\n");
258 continue;
260 if (lo->magic != LO_MAGIC) {
261 printk(KERN_ERR "NBD: nbd_dev[] corrupted: Not enough magic when clearing!\n");
262 return;
264 #endif
265 req->errors++;
266 list_del(&req->queue);
267 up(&lo->queue_lock);
269 nbd_end_request(req);
271 down(&lo->queue_lock);
276 * We always wait for result of write, for now. It would be nice to make it optional
277 * in future
278 * if ((req->cmd == WRITE) && (lo->flags & NBD_WRITE_NOCHK))
279 * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
282 #undef FAIL
283 #define FAIL( s ) { printk( KERN_ERR "NBD, minor %d: " s "\n", dev ); goto error_out; }
285 static void do_nbd_request(request_queue_t * q)
287 struct request *req;
288 int dev;
289 struct nbd_device *lo;
291 while (!QUEUE_EMPTY) {
292 req = CURRENT;
293 dev = MINOR(req->rq_dev);
294 #ifdef PARANOIA
295 if (dev >= MAX_NBD)
296 FAIL("Minor too big."); /* Probably can not happen */
297 #endif
298 lo = &nbd_dev[dev];
299 if (!lo->file)
300 FAIL("Request when not-ready.");
301 if ((req->cmd == WRITE) && (lo->flags & NBD_READ_ONLY))
302 FAIL("Write on read-only");
303 #ifdef PARANOIA
304 if (lo->magic != LO_MAGIC)
305 FAIL("nbd[] is not magical!");
306 requests_in++;
307 #endif
308 req->errors = 0;
309 blkdev_dequeue_request(req);
310 spin_unlock_irq(&io_request_lock);
312 down (&lo->queue_lock);
313 list_add(&req->queue, &lo->queue_head);
314 nbd_send_req(lo->sock, req); /* Why does this block? */
315 up (&lo->queue_lock);
317 spin_lock_irq(&io_request_lock);
318 continue;
320 error_out:
321 req->errors++;
322 blkdev_dequeue_request(req);
323 spin_unlock(&io_request_lock);
324 nbd_end_request(req);
325 spin_lock(&io_request_lock);
327 return;
330 static int nbd_ioctl(struct inode *inode, struct file *file,
331 unsigned int cmd, unsigned long arg)
333 struct nbd_device *lo;
334 int dev, error, temp;
336 /* Anyone capable of this syscall can do *real bad* things */
338 if (!capable(CAP_SYS_ADMIN))
339 return -EPERM;
340 if (!inode)
341 return -EINVAL;
342 dev = MINOR(inode->i_rdev);
343 if (dev >= MAX_NBD)
344 return -ENODEV;
346 lo = &nbd_dev[dev];
347 switch (cmd) {
348 case NBD_CLEAR_SOCK:
349 down(&lo->queue_lock);
350 nbd_clear_que(lo);
351 if (!list_empty(&lo->queue_head)) {
352 up(&lo->queue_lock);
353 printk(KERN_ERR "nbd: Some requests are in progress -> can not turn off.\n");
354 return -EBUSY;
356 up(&lo->queue_lock);
357 file = lo->file;
358 if (!file)
359 return -EINVAL;
360 lo->file = NULL;
361 lo->sock = NULL;
362 fput(file);
363 return 0;
364 case NBD_SET_SOCK:
365 if (lo->file)
366 return -EBUSY;
367 error = -EINVAL;
368 file = fget(arg);
369 if (file) {
370 inode = file->f_dentry->d_inode;
371 /* N.B. Should verify that it's a socket */
372 lo->file = file;
373 lo->sock = &inode->u.socket_i;
374 error = 0;
376 return error;
377 case NBD_SET_BLKSIZE:
378 if ((arg & (arg-1)) || (arg < 512) || (arg > PAGE_SIZE))
379 return -EINVAL;
380 nbd_blksizes[dev] = arg;
381 temp = arg >> 9;
382 nbd_blksize_bits[dev] = 9;
383 while (temp > 1) {
384 nbd_blksize_bits[dev]++;
385 temp >>= 1;
387 nbd_sizes[dev] = nbd_bytesizes[dev] >> nbd_blksize_bits[dev];
388 nbd_bytesizes[dev] = nbd_sizes[dev] << nbd_blksize_bits[dev];
389 return 0;
390 case NBD_SET_SIZE:
391 nbd_sizes[dev] = arg >> nbd_blksize_bits[dev];
392 nbd_bytesizes[dev] = nbd_sizes[dev] << nbd_blksize_bits[dev];
393 return 0;
394 case NBD_SET_SIZE_BLOCKS:
395 nbd_sizes[dev] = arg;
396 nbd_bytesizes[dev] = ((u64) arg) << nbd_blksize_bits[dev];
397 return 0;
398 case NBD_DO_IT:
399 if (!lo->file)
400 return -EINVAL;
401 nbd_do_it(lo);
402 return lo->harderror;
403 case NBD_CLEAR_QUE:
404 nbd_clear_que(lo);
405 return 0;
406 #ifdef PARANOIA
407 case NBD_PRINT_DEBUG:
408 printk(KERN_INFO "NBD device %d: next = %p, prev = %p. Global: in %d, out %d\n",
409 dev, lo->queue_head.next, lo->queue_head.prev, requests_in, requests_out);
410 return 0;
411 #endif
412 case BLKGETSIZE:
413 return put_user(nbd_bytesizes[dev] >> 9, (long *) arg);
415 return -EINVAL;
418 static int nbd_release(struct inode *inode, struct file *file)
420 struct nbd_device *lo;
421 int dev;
423 if (!inode)
424 return -ENODEV;
425 dev = MINOR(inode->i_rdev);
426 if (dev >= MAX_NBD)
427 return -ENODEV;
428 lo = &nbd_dev[dev];
429 if (lo->refcnt <= 0)
430 printk(KERN_ALERT "nbd_release: refcount(%d) <= 0\n", lo->refcnt);
431 lo->refcnt--;
432 /* N.B. Doesn't lo->file need an fput?? */
433 MOD_DEC_USE_COUNT;
434 return 0;
437 static struct block_device_operations nbd_fops =
439 open: nbd_open,
440 release: nbd_release,
441 ioctl: nbd_ioctl,
445 * And here should be modules and kernel interface
446 * (Just smiley confuses emacs :-)
449 #ifdef MODULE
450 #define nbd_init init_module
451 #endif
453 int nbd_init(void)
455 int i;
457 if (sizeof(struct nbd_request) != 28) {
458 printk(KERN_CRIT "Sizeof nbd_request needs to be 28 in order to work!\n" );
459 return -EIO;
462 if (register_blkdev(MAJOR_NR, "nbd", &nbd_fops)) {
463 printk("Unable to get major number %d for NBD\n",
464 MAJOR_NR);
465 return -EIO;
467 #ifdef MODULE
468 printk("nbd: registered device at major %d\n", MAJOR_NR);
469 #endif
470 blksize_size[MAJOR_NR] = nbd_blksizes;
471 blk_size[MAJOR_NR] = nbd_sizes;
472 blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request);
473 blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
474 for (i = 0; i < MAX_NBD; i++) {
475 nbd_dev[i].refcnt = 0;
476 nbd_dev[i].file = NULL;
477 nbd_dev[i].magic = LO_MAGIC;
478 nbd_dev[i].flags = 0;
479 nbd_blksizes[i] = 1024;
480 nbd_blksize_bits[i] = 10;
481 nbd_bytesizes[i] = 0x7ffffc00; /* 2GB */
482 nbd_sizes[i] = nbd_bytesizes[i] >> nbd_blksize_bits[i];
483 register_disk(NULL, MKDEV(MAJOR_NR,i), 1, &nbd_fops,
484 nbd_bytesizes[i]>>9);
486 return 0;
489 #ifdef MODULE
490 void cleanup_module(void)
492 if (unregister_blkdev(MAJOR_NR, "nbd") != 0)
493 printk("nbd: cleanup_module failed\n");
494 else
495 printk("nbd: module cleaned up.\n");
497 #endif