2 * Network block device - make block devices work over TCP
4 * Note that you can not swap over this thing, yet. Seems to work but
5 * deadlocks sometimes - you can not swap over TCP in general.
7 * Copyright 1997 Pavel Machek <pavel@atrey.karlin.mff.cuni.cz>
9 * (part of code stolen from loop.c)
11 * 97-3-25 compiled 0-th version, not yet tested it
12 * (it did not work, BTW) (later that day) HEY! it works!
13 * (bit later) hmm, not that much... 2:00am next day:
14 * yes, it works, but it gives something like 50kB/sec
15 * 97-4-01 complete rewrite to make it possible for many requests at
16 * once to be processed
17 * 97-4-11 Making protocol independent of endianity etc.
18 * 97-9-13 Cosmetic changes
19 * 98-5-13 Attempt to make 64-bit-clean on 64-bit machines
21 * possible FIXME: make set_sock / set_blksize / set_size / do_it one syscall
22 * why not: would need verify_area and friends, would share yet another
23 * structure with userland
27 #include <linux/major.h>
29 #include <linux/module.h>
31 #include <linux/sched.h>
33 #include <linux/stat.h>
34 #include <linux/errno.h>
35 #include <linux/file.h>
36 #include <linux/ioctl.h>
38 #include <asm/segment.h>
39 #include <asm/uaccess.h>
40 #include <asm/types.h>
42 #define MAJOR_NR NBD_MAJOR
43 #include <linux/nbd.h>
45 #define LO_MAGIC 0x68797548
47 static int nbd_blksizes
[MAX_NBD
] = {1024, 1024,};
48 static int nbd_sizes
[MAX_NBD
] = {0x7fffffff, 0x7fffffff,};
50 static struct nbd_device nbd_dev
[MAX_NBD
];
53 /* #define DEBUG( s ) printk( s )
57 static int requests_in
;
58 static int requests_out
;
61 static int nbd_open(struct inode
*inode
, struct file
*file
)
67 dev
= MINOR(inode
->i_rdev
);
70 nbd_dev
[dev
].refcnt
++;
76 * Send or receive packet.
78 static int nbd_xmit(int send
, struct socket
*sock
, char *buf
, int size
)
96 msg
.msg_control
= NULL
;
97 msg
.msg_controllen
= 0;
101 spin_lock_irq(¤t
->sigmask_lock
);
102 oldset
= current
->blocked
;
103 sigfillset(¤t
->blocked
);
104 recalc_sigpending(current
);
105 spin_unlock_irq(¤t
->sigmask_lock
);
108 result
= sock_sendmsg(sock
, &msg
, size
);
110 result
= sock_recvmsg(sock
, &msg
, size
, 0);
112 spin_lock_irq(¤t
->sigmask_lock
);
113 current
->blocked
= oldset
;
114 recalc_sigpending(current
);
115 spin_unlock_irq(¤t
->sigmask_lock
);
119 printk(KERN_ERR
"NBD: %s - sock=%ld at buf=%ld, size=%d returned %d.\n",
120 send
? "send" : "receive", (long) sock
, (long) buf
, size
, result
);
131 #define FAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); goto error_out; }
133 void nbd_send_req(struct socket
*sock
, struct request
*req
)
136 struct nbd_request request
;
138 DEBUG("NBD: sending control, ");
139 request
.magic
= htonl(NBD_REQUEST_MAGIC
);
140 request
.type
= htonl(req
->cmd
);
141 request
.from
= cpu_to_be64( (u64
) req
->sector
* (u64
) 512);
142 request
.len
= htonl(req
->current_nr_sectors
<< 9);
143 memcpy(request
.handle
, &req
, sizeof(req
));
145 result
= nbd_xmit(1, sock
, (char *) &request
, sizeof(request
));
147 FAIL("Sendmsg failed for control.");
149 if (req
->cmd
== WRITE
) {
151 result
= nbd_xmit(1, sock
, req
->buffer
, req
->current_nr_sectors
<< 9);
153 FAIL("Send data failed.");
161 #define HARDFAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); lo->harderror = result; return NULL; }
162 struct request
*nbd_read_stat(struct nbd_device
*lo
)
163 /* NULL returned = something went wrong, inform userspace */
166 struct nbd_reply reply
;
167 struct request
*xreq
, *req
;
169 DEBUG("reading control, ");
171 result
= nbd_xmit(0, lo
->sock
, (char *) &reply
, sizeof(reply
));
174 HARDFAIL("Recv control failed.");
175 memcpy(&xreq
, reply
.handle
, sizeof(xreq
));
178 FAIL("Unexpected handle received.\n");
181 if (ntohl(reply
.magic
) != NBD_REPLY_MAGIC
)
182 HARDFAIL("Not enough magic.");
183 if (ntohl(reply
.error
))
184 FAIL("Other side returned error.");
185 if (req
->cmd
== READ
) {
187 result
= nbd_xmit(0, lo
->sock
, req
->buffer
, req
->current_nr_sectors
<< 9);
189 HARDFAIL("Recv data failed.");
194 /* Can we get here? Yes, if other side returns error */
200 void nbd_do_it(struct nbd_device
*lo
)
205 req
= nbd_read_stat(lo
);
209 if (req
!= lo
->tail
) {
210 printk(KERN_ALERT
"NBD: I have problem...\n");
212 if (lo
!= &nbd_dev
[MINOR(req
->rq_dev
)]) {
213 printk(KERN_ALERT
"NBD: request corrupted!\n");
216 if (lo
->magic
!= LO_MAGIC
) {
217 printk(KERN_ALERT
"NBD: nbd_dev[] corrupted: Not enough magic\n");
221 nbd_end_request(req
);
222 if (lo
->tail
== lo
->head
) {
225 printk(KERN_ERR
"NBD: I did not expect this\n");
229 lo
->tail
= lo
->tail
->next
;
233 void nbd_clear_que(struct nbd_device
*lo
)
242 if (lo
!= &nbd_dev
[MINOR(req
->rq_dev
)]) {
243 printk(KERN_ALERT
"NBD: request corrupted when clearing!\n");
246 if (lo
->magic
!= LO_MAGIC
) {
247 printk(KERN_ERR
"NBD: nbd_dev[] corrupted: Not enough magic when clearing!\n");
252 nbd_end_request(req
);
253 if (lo
->tail
== lo
->head
) {
256 printk(KERN_ERR
"NBD: I did not assume this\n");
260 lo
->tail
= lo
->tail
->next
;
265 * We always wait for result of write, for now. It would be nice to make it optional
267 * if ((req->cmd == WRITE) && (lo->flags & NBD_WRITE_NOCHK))
268 * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
272 #define FAIL( s ) { printk( KERN_ERR "NBD, minor %d: " s "\n", dev ); goto error_out; }
274 static void do_nbd_request(void)
278 struct nbd_device
*lo
;
282 dev
= MINOR(req
->rq_dev
);
285 FAIL("Minor too big."); /* Probably can not happen */
289 FAIL("Request when not-ready.");
290 if ((req
->cmd
== WRITE
) && (lo
->flags
&& NBD_READ_ONLY
))
291 FAIL("Write on read-only");
293 if (lo
->magic
!= LO_MAGIC
)
294 FAIL("nbd[] is not magical!");
299 nbd_send_req(lo
->sock
, req
); /* Why does this block? */
300 CURRENT
= CURRENT
->next
;
302 if (lo
->head
== NULL
) {
306 lo
->head
->next
= req
;
313 nbd_end_request(req
);
314 CURRENT
= CURRENT
->next
;
319 static int nbd_ioctl(struct inode
*inode
, struct file
*file
,
320 unsigned int cmd
, unsigned long arg
)
322 struct nbd_device
*lo
;
325 /* Anyone capable of this syscall can do *real bad* things */
327 if (!capable(CAP_SYS_ADMIN
))
331 dev
= MINOR(inode
->i_rdev
);
338 if (lo
->head
|| lo
->tail
) {
339 printk(KERN_ERR
"nbd: Some requests are in progress -> can not turn off.\n");
355 inode
= file
->f_dentry
->d_inode
;
356 /* N.B. Should verify that it's a socket */
358 lo
->sock
= &inode
->u
.socket_i
;
362 case NBD_SET_BLKSIZE
:
363 if ((arg
& 511) || (arg
> PAGE_SIZE
))
365 nbd_blksizes
[dev
] = arg
;
368 nbd_sizes
[dev
] = arg
;
374 return lo
->harderror
;
379 case NBD_PRINT_DEBUG
:
380 printk(KERN_INFO
"NBD device %d: head = %lx, tail = %lx. Global: in %d, out %d\n",
381 dev
, (long) lo
->head
, (long) lo
->tail
, requests_in
, requests_out
);
388 static int nbd_release(struct inode
*inode
, struct file
*file
)
390 struct nbd_device
*lo
;
395 dev
= MINOR(inode
->i_rdev
);
398 fsync_dev(inode
->i_rdev
);
401 printk(KERN_ALERT
"nbd_release: refcount(%d) <= 0\n", lo
->refcnt
);
403 /* N.B. Doesn't lo->file need an fput?? */
408 static struct file_operations nbd_fops
=
410 NULL
, /* lseek - default */
411 block_read
, /* read - general block-dev read */
412 block_write
, /* write - general block-dev write */
413 NULL
, /* readdir - bad */
415 nbd_ioctl
, /* ioctl */
419 nbd_release
/* release */
423 * And here should be modules and kernel interface
424 * (Just smiley confuses emacs :-)
428 #define nbd_init init_module
435 if (sizeof(struct nbd_request
) != 28) {
436 printk(KERN_CRIT
"Sizeof nbd_request needs to be 28 in order to work!\n" );
440 if (register_blkdev(MAJOR_NR
, "nbd", &nbd_fops
)) {
441 printk("Unable to get major number %d for NBD\n",
446 printk("nbd: registered device at major %d\n", MAJOR_NR
);
448 blksize_size
[MAJOR_NR
] = nbd_blksizes
;
449 blk_size
[MAJOR_NR
] = nbd_sizes
;
450 blk_dev
[MAJOR_NR
].request_fn
= do_nbd_request
;
451 for (i
= 0; i
< MAX_NBD
; i
++) {
452 nbd_dev
[i
].refcnt
= 0;
453 nbd_dev
[i
].file
= NULL
;
454 nbd_dev
[i
].magic
= LO_MAGIC
;
455 nbd_dev
[i
].flags
= 0;
461 void cleanup_module(void)
463 if (unregister_blkdev(MAJOR_NR
, "nbd") != 0)
464 printk("nbd: cleanup_module failed\n");
466 printk("nbd: module cleaned up.\n");