Import 2.1.118
[davej-history.git] / drivers / block / nbd.c
blob94464709615c4139ae70da46d304b39767a853a3
1 /*
2 * Network block device - make block devices work over TCP
4 * Note that you can not swap over this thing, yet. Seems to work but
5 * deadlocks sometimes - you can not swap over TCP in general.
6 *
7 * Copyright 1997 Pavel Machek <pavel@atrey.karlin.mff.cuni.cz>
8 *
9 * (part of code stolen from loop.c)
11 * 97-3-25 compiled 0-th version, not yet tested it
12 * (it did not work, BTW) (later that day) HEY! it works!
13 * (bit later) hmm, not that much... 2:00am next day:
14 * yes, it works, but it gives something like 50kB/sec
15 * 97-4-01 complete rewrite to make it possible for many requests at
16 * once to be processed
17 * 97-4-11 Making protocol independent of endianity etc.
18 * 97-9-13 Cosmetic changes
19 * 98-5-13 Attempt to make 64-bit-clean on 64-bit machines
21 * possible FIXME: make set_sock / set_blksize / set_size / do_it one syscall
22 * why not: would need verify_area and friends, would share yet another
23 * structure with userland
26 #define PARANOIA
27 #include <linux/major.h>
29 #include <linux/module.h>
31 #include <linux/sched.h>
32 #include <linux/fs.h>
33 #include <linux/stat.h>
34 #include <linux/errno.h>
35 #include <linux/file.h>
36 #include <linux/ioctl.h>
38 #include <asm/segment.h>
39 #include <asm/uaccess.h>
40 #include <asm/types.h>
42 #define MAJOR_NR NBD_MAJOR
43 #include <linux/nbd.h>
45 #define LO_MAGIC 0x68797548
47 static int nbd_blksizes[MAX_NBD] = {1024, 1024,};
48 static int nbd_sizes[MAX_NBD] = {0x7fffffff, 0x7fffffff,};
50 static struct nbd_device nbd_dev[MAX_NBD];
52 #define DEBUG( s )
53 /* #define DEBUG( s ) printk( s )
56 #ifdef PARANOIA
57 static int requests_in;
58 static int requests_out;
59 #endif
61 static int nbd_open(struct inode *inode, struct file *file)
63 int dev;
65 if (!inode)
66 return -EINVAL;
67 dev = MINOR(inode->i_rdev);
68 if (dev >= MAX_NBD)
69 return -ENODEV;
70 nbd_dev[dev].refcnt++;
71 MOD_INC_USE_COUNT;
72 return 0;
76 * Send or receive packet.
78 static int nbd_xmit(int send, struct socket *sock, char *buf, int size)
80 mm_segment_t oldfs;
81 int result;
82 struct msghdr msg;
83 struct iovec iov;
85 oldfs = get_fs();
86 set_fs(get_ds());
87 do {
88 sigset_t oldset;
90 iov.iov_base = buf;
91 iov.iov_len = size;
92 msg.msg_name = NULL;
93 msg.msg_namelen = 0;
94 msg.msg_iov = &iov;
95 msg.msg_iovlen = 1;
96 msg.msg_control = NULL;
97 msg.msg_controllen = 0;
98 msg.msg_namelen = 0;
99 msg.msg_flags = 0;
101 spin_lock_irq(&current->sigmask_lock);
102 oldset = current->blocked;
103 sigfillset(&current->blocked);
104 recalc_sigpending(current);
105 spin_unlock_irq(&current->sigmask_lock);
107 if (send)
108 result = sock_sendmsg(sock, &msg, size);
109 else
110 result = sock_recvmsg(sock, &msg, size, 0);
112 spin_lock_irq(&current->sigmask_lock);
113 current->blocked = oldset;
114 recalc_sigpending(current);
115 spin_unlock_irq(&current->sigmask_lock);
117 if (result <= 0) {
118 #ifdef PARANOIA
119 printk(KERN_ERR "NBD: %s - sock=%ld at buf=%ld, size=%d returned %d.\n",
120 send ? "send" : "receive", (long) sock, (long) buf, size, result);
121 #endif
122 break;
124 size -= result;
125 buf += result;
126 } while (size > 0);
127 set_fs(oldfs);
128 return result;
131 #define FAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); goto error_out; }
133 void nbd_send_req(struct socket *sock, struct request *req)
135 int result;
136 struct nbd_request request;
138 DEBUG("NBD: sending control, ");
139 request.magic = htonl(NBD_REQUEST_MAGIC);
140 request.type = htonl(req->cmd);
141 request.from = cpu_to_be64( (u64) req->sector * (u64) 512);
142 request.len = htonl(req->current_nr_sectors << 9);
143 memcpy(request.handle, &req, sizeof(req));
145 result = nbd_xmit(1, sock, (char *) &request, sizeof(request));
146 if (result <= 0)
147 FAIL("Sendmsg failed for control.");
149 if (req->cmd == WRITE) {
150 DEBUG("data, ");
151 result = nbd_xmit(1, sock, req->buffer, req->current_nr_sectors << 9);
152 if (result <= 0)
153 FAIL("Send data failed.");
155 return;
157 error_out:
158 req->errors++;
161 #define HARDFAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); lo->harderror = result; return NULL; }
162 struct request *nbd_read_stat(struct nbd_device *lo)
163 /* NULL returned = something went wrong, inform userspace */
165 int result;
166 struct nbd_reply reply;
167 struct request *xreq, *req;
169 DEBUG("reading control, ");
170 reply.magic = 0;
171 result = nbd_xmit(0, lo->sock, (char *) &reply, sizeof(reply));
172 req = lo->tail;
173 if (result <= 0)
174 HARDFAIL("Recv control failed.");
175 memcpy(&xreq, reply.handle, sizeof(xreq));
177 if (xreq != req)
178 FAIL("Unexpected handle received.\n");
180 DEBUG("ok, ");
181 if (ntohl(reply.magic) != NBD_REPLY_MAGIC)
182 HARDFAIL("Not enough magic.");
183 if (ntohl(reply.error))
184 FAIL("Other side returned error.");
185 if (req->cmd == READ) {
186 DEBUG("data, ");
187 result = nbd_xmit(0, lo->sock, req->buffer, req->current_nr_sectors << 9);
188 if (result <= 0)
189 HARDFAIL("Recv data failed.");
191 DEBUG("done.\n");
192 return req;
194 /* Can we get here? Yes, if other side returns error */
195 error_out:
196 req->errors++;
197 return req;
200 void nbd_do_it(struct nbd_device *lo)
202 struct request *req;
204 while (1) {
205 req = nbd_read_stat(lo);
206 if (!req)
207 return;
208 #ifdef PARANOIA
209 if (req != lo->tail) {
210 printk(KERN_ALERT "NBD: I have problem...\n");
212 if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
213 printk(KERN_ALERT "NBD: request corrupted!\n");
214 continue;
216 if (lo->magic != LO_MAGIC) {
217 printk(KERN_ALERT "NBD: nbd_dev[] corrupted: Not enough magic\n");
218 return;
220 #endif
221 nbd_end_request(req);
222 if (lo->tail == lo->head) {
223 #ifdef PARANOIA
224 if (lo->tail->next)
225 printk(KERN_ERR "NBD: I did not expect this\n");
226 #endif
227 lo->head = NULL;
229 lo->tail = lo->tail->next;
233 void nbd_clear_que(struct nbd_device *lo)
235 struct request *req;
237 while (1) {
238 req = lo->tail;
239 if (!req)
240 return;
241 #ifdef PARANOIA
242 if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
243 printk(KERN_ALERT "NBD: request corrupted when clearing!\n");
244 continue;
246 if (lo->magic != LO_MAGIC) {
247 printk(KERN_ERR "NBD: nbd_dev[] corrupted: Not enough magic when clearing!\n");
248 return;
250 #endif
251 req->errors++;
252 nbd_end_request(req);
253 if (lo->tail == lo->head) {
254 #ifdef PARANOIA
255 if (lo->tail->next)
256 printk(KERN_ERR "NBD: I did not assume this\n");
257 #endif
258 lo->head = NULL;
260 lo->tail = lo->tail->next;
265 * We always wait for result of write, for now. It would be nice to make it optional
266 * in future
267 * if ((req->cmd == WRITE) && (lo->flags & NBD_WRITE_NOCHK))
268 * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
271 #undef FAIL
272 #define FAIL( s ) { printk( KERN_ERR "NBD, minor %d: " s "\n", dev ); goto error_out; }
274 static void do_nbd_request(void)
276 struct request *req;
277 int dev;
278 struct nbd_device *lo;
280 while (CURRENT) {
281 req = CURRENT;
282 dev = MINOR(req->rq_dev);
283 #ifdef PARANOIA
284 if (dev >= MAX_NBD)
285 FAIL("Minor too big."); /* Probably can not happen */
286 #endif
287 lo = &nbd_dev[dev];
288 if (!lo->file)
289 FAIL("Request when not-ready.");
290 if ((req->cmd == WRITE) && (lo->flags && NBD_READ_ONLY))
291 FAIL("Write on read-only");
292 #ifdef PARANOIA
293 if (lo->magic != LO_MAGIC)
294 FAIL("nbd[] is not magical!");
295 requests_in++;
296 #endif
297 req->errors = 0;
299 nbd_send_req(lo->sock, req); /* Why does this block? */
300 CURRENT = CURRENT->next;
301 req->next = NULL;
302 if (lo->head == NULL) {
303 lo->head = req;
304 lo->tail = req;
305 } else {
306 lo->head->next = req;
307 lo->head = req;
309 continue;
311 error_out:
312 req->errors++;
313 nbd_end_request(req);
314 CURRENT = CURRENT->next;
316 return;
319 static int nbd_ioctl(struct inode *inode, struct file *file,
320 unsigned int cmd, unsigned long arg)
322 struct nbd_device *lo;
323 int dev, error;
325 /* Anyone capable of this syscall can do *real bad* things */
327 if (!capable(CAP_SYS_ADMIN))
328 return -EPERM;
329 if (!inode)
330 return -EINVAL;
331 dev = MINOR(inode->i_rdev);
332 if (dev >= MAX_NBD)
333 return -ENODEV;
335 lo = &nbd_dev[dev];
336 switch (cmd) {
337 case NBD_CLEAR_SOCK:
338 if (lo->head || lo->tail) {
339 printk(KERN_ERR "nbd: Some requests are in progress -> can not turn off.\n");
340 return -EBUSY;
342 file = lo->file;
343 if (!file)
344 return -EINVAL;
345 lo->file = NULL;
346 lo->sock = NULL;
347 fput(file);
348 return 0;
349 case NBD_SET_SOCK:
350 if (lo->file)
351 return -EBUSY;
352 error = -EINVAL;
353 file = fget(arg);
354 if (file) {
355 inode = file->f_dentry->d_inode;
356 /* N.B. Should verify that it's a socket */
357 lo->file = file;
358 lo->sock = &inode->u.socket_i;
359 error = 0;
361 return error;
362 case NBD_SET_BLKSIZE:
363 if ((arg & 511) || (arg > PAGE_SIZE))
364 return -EINVAL;
365 nbd_blksizes[dev] = arg;
366 return 0;
367 case NBD_SET_SIZE:
368 nbd_sizes[dev] = arg;
369 return 0;
370 case NBD_DO_IT:
371 if (!lo->file)
372 return -EINVAL;
373 nbd_do_it(lo);
374 return lo->harderror;
375 case NBD_CLEAR_QUE:
376 nbd_clear_que(lo);
377 return 0;
378 #ifdef PARANOIA
379 case NBD_PRINT_DEBUG:
380 printk(KERN_INFO "NBD device %d: head = %lx, tail = %lx. Global: in %d, out %d\n",
381 dev, (long) lo->head, (long) lo->tail, requests_in, requests_out);
382 return 0;
383 #endif
385 return -EINVAL;
388 static int nbd_release(struct inode *inode, struct file *file)
390 struct nbd_device *lo;
391 int dev;
393 if (!inode)
394 return -ENODEV;
395 dev = MINOR(inode->i_rdev);
396 if (dev >= MAX_NBD)
397 return -ENODEV;
398 fsync_dev(inode->i_rdev);
399 lo = &nbd_dev[dev];
400 if (lo->refcnt <= 0)
401 printk(KERN_ALERT "nbd_release: refcount(%d) <= 0\n", lo->refcnt);
402 lo->refcnt--;
403 /* N.B. Doesn't lo->file need an fput?? */
404 MOD_DEC_USE_COUNT;
405 return 0;
408 static struct file_operations nbd_fops =
410 NULL, /* lseek - default */
411 block_read, /* read - general block-dev read */
412 block_write, /* write - general block-dev write */
413 NULL, /* readdir - bad */
414 NULL, /* select */
415 nbd_ioctl, /* ioctl */
416 NULL, /* mmap */
417 nbd_open, /* open */
418 NULL, /* flush */
419 nbd_release /* release */
423 * And here should be modules and kernel interface
424 * (Just smiley confuses emacs :-)
427 #ifdef MODULE
428 #define nbd_init init_module
429 #endif
431 int nbd_init(void)
433 int i;
435 if (sizeof(struct nbd_request) != 28) {
436 printk(KERN_CRIT "Sizeof nbd_request needs to be 28 in order to work!\n" );
437 return -EIO;
440 if (register_blkdev(MAJOR_NR, "nbd", &nbd_fops)) {
441 printk("Unable to get major number %d for NBD\n",
442 MAJOR_NR);
443 return -EIO;
445 #ifdef MODULE
446 printk("nbd: registered device at major %d\n", MAJOR_NR);
447 #endif
448 blksize_size[MAJOR_NR] = nbd_blksizes;
449 blk_size[MAJOR_NR] = nbd_sizes;
450 blk_dev[MAJOR_NR].request_fn = do_nbd_request;
451 for (i = 0; i < MAX_NBD; i++) {
452 nbd_dev[i].refcnt = 0;
453 nbd_dev[i].file = NULL;
454 nbd_dev[i].magic = LO_MAGIC;
455 nbd_dev[i].flags = 0;
457 return 0;
460 #ifdef MODULE
461 void cleanup_module(void)
463 if (unregister_blkdev(MAJOR_NR, "nbd") != 0)
464 printk("nbd: cleanup_module failed\n");
465 else
466 printk("nbd: module cleaned up.\n");
468 #endif