2 * Network block device - make block devices work over TCP
4 * Note that you can not swap over this thing, yet. Seems to work but
5 * deadlocks sometimes - you can not swap over TCP in general.
7 * Copyright 1997-2000 Pavel Machek <pavel@ucw.cz>
9 * (part of code stolen from loop.c)
11 * 97-3-25 compiled 0-th version, not yet tested it
12 * (it did not work, BTW) (later that day) HEY! it works!
13 * (bit later) hmm, not that much... 2:00am next day:
14 * yes, it works, but it gives something like 50kB/sec
15 * 97-4-01 complete rewrite to make it possible for many requests at
16 * once to be processed
17 * 97-4-11 Making protocol independent of endianity etc.
18 * 97-9-13 Cosmetic changes
19 * 98-5-13 Attempt to make 64-bit-clean on 64-bit machines
20 * 99-1-11 Attempt to make 64-bit-clean on 32-bit machines <ankry@mif.pg.gda.pl>
22 * possible FIXME: make set_sock / set_blksize / set_size / do_it one syscall
23 * why not: would need verify_area and friends, would share yet another
24 * structure with userland
29 #include <linux/major.h>
31 #include <linux/module.h>
33 #include <linux/sched.h>
35 #include <linux/stat.h>
36 #include <linux/errno.h>
37 #include <linux/file.h>
38 #include <linux/ioctl.h>
41 #include <linux/devfs_fs_kernel.h>
43 #include <asm/segment.h>
44 #include <asm/uaccess.h>
45 #include <asm/types.h>
47 #define MAJOR_NR NBD_MAJOR
48 #include <linux/nbd.h>
50 #define LO_MAGIC 0x68797548
52 static int nbd_blksizes
[MAX_NBD
];
53 static int nbd_blksize_bits
[MAX_NBD
];
54 static int nbd_sizes
[MAX_NBD
];
55 static u64 nbd_bytesizes
[MAX_NBD
];
57 static struct nbd_device nbd_dev
[MAX_NBD
];
58 static devfs_handle_t devfs_handle
;
61 /* #define DEBUG( s ) printk( s )
65 static int requests_in
;
66 static int requests_out
;
69 static void nbd_plug_device(request_queue_t
*q
, kdev_t dev
) { }
71 static int nbd_open(struct inode
*inode
, struct file
*file
)
77 dev
= MINOR(inode
->i_rdev
);
81 nbd_dev
[dev
].refcnt
++;
87 * Send or receive packet.
89 static int nbd_xmit(int send
, struct socket
*sock
, char *buf
, int size
)
101 spin_lock_irqsave(¤t
->sigmask_lock
, flags
);
102 oldset
= current
->blocked
;
103 sigfillset(¤t
->blocked
);
104 recalc_sigpending(current
);
105 spin_unlock_irqrestore(¤t
->sigmask_lock
, flags
);
109 sock
->sk
->allocation
= GFP_BUFFER
;
116 msg
.msg_control
= NULL
;
117 msg
.msg_controllen
= 0;
122 result
= sock_sendmsg(sock
, &msg
, size
);
124 result
= sock_recvmsg(sock
, &msg
, size
, 0);
128 printk(KERN_ERR
"NBD: %s - sock=%ld at buf=%ld, size=%d returned %d.\n",
129 send
? "send" : "receive", (long) sock
, (long) buf
, size
, result
);
137 spin_lock_irqsave(¤t
->sigmask_lock
, flags
);
138 current
->blocked
= oldset
;
139 recalc_sigpending(current
);
140 spin_unlock_irqrestore(¤t
->sigmask_lock
, flags
);
146 #define FAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); goto error_out; }
148 void nbd_send_req(struct socket
*sock
, struct request
*req
)
151 struct nbd_request request
;
153 DEBUG("NBD: sending control, ");
154 request
.magic
= htonl(NBD_REQUEST_MAGIC
);
155 request
.type
= htonl(req
->cmd
);
156 request
.from
= cpu_to_be64( (u64
) req
->sector
<< 9);
157 request
.len
= htonl(req
->current_nr_sectors
<< 9);
158 memcpy(request
.handle
, &req
, sizeof(req
));
160 result
= nbd_xmit(1, sock
, (char *) &request
, sizeof(request
));
162 FAIL("Sendmsg failed for control.");
164 if (req
->cmd
== WRITE
) {
166 result
= nbd_xmit(1, sock
, req
->buffer
, req
->current_nr_sectors
<< 9);
168 FAIL("Send data failed.");
176 #define HARDFAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); lo->harderror = result; return NULL; }
177 struct request
*nbd_read_stat(struct nbd_device
*lo
)
178 /* NULL returned = something went wrong, inform userspace */
181 struct nbd_reply reply
;
182 struct request
*xreq
, *req
;
184 DEBUG("reading control, ");
186 result
= nbd_xmit(0, lo
->sock
, (char *) &reply
, sizeof(reply
));
188 HARDFAIL("Recv control failed.");
189 memcpy(&xreq
, reply
.handle
, sizeof(xreq
));
190 req
= blkdev_entry_prev_request(&lo
->queue_head
);
193 FAIL("Unexpected handle received.\n");
196 if (ntohl(reply
.magic
) != NBD_REPLY_MAGIC
)
197 HARDFAIL("Not enough magic.");
198 if (ntohl(reply
.error
))
199 FAIL("Other side returned error.");
200 if (req
->cmd
== READ
) {
202 result
= nbd_xmit(0, lo
->sock
, req
->buffer
, req
->current_nr_sectors
<< 9);
204 HARDFAIL("Recv data failed.");
209 /* Can we get here? Yes, if other side returns error */
215 void nbd_do_it(struct nbd_device
*lo
)
220 down (&lo
->queue_lock
);
222 up (&lo
->queue_lock
);
223 req
= nbd_read_stat(lo
);
224 down (&lo
->queue_lock
);
227 printk(KERN_ALERT
"req should never be null\n" );
231 if (req
!= blkdev_entry_prev_request(&lo
->queue_head
)) {
232 printk(KERN_ALERT
"NBD: I have problem...\n");
234 if (lo
!= &nbd_dev
[MINOR(req
->rq_dev
)]) {
235 printk(KERN_ALERT
"NBD: request corrupted!\n");
238 if (lo
->magic
!= LO_MAGIC
) {
239 printk(KERN_ALERT
"NBD: nbd_dev[] corrupted: Not enough magic\n");
243 list_del(&req
->queue
);
244 up (&lo
->queue_lock
);
246 dequeued
= nbd_end_request(req
);
248 down (&lo
->queue_lock
);
250 list_add(&req
->queue
, &lo
->queue_head
);
253 up (&lo
->queue_lock
);
256 void nbd_clear_que(struct nbd_device
*lo
)
262 if (lo
->magic
!= LO_MAGIC
) {
263 printk(KERN_ERR
"NBD: nbd_dev[] corrupted: Not enough magic when clearing!\n");
268 while (!list_empty(&lo
->queue_head
)) {
269 req
= blkdev_entry_prev_request(&lo
->queue_head
);
272 printk( KERN_ALERT
"NBD: panic, panic, panic\n" );
275 if (lo
!= &nbd_dev
[MINOR(req
->rq_dev
)]) {
276 printk(KERN_ALERT
"NBD: request corrupted when clearing!\n");
281 list_del(&req
->queue
);
284 dequeued
= nbd_end_request(req
);
286 down(&lo
->queue_lock
);
288 list_add(&req
->queue
, &lo
->queue_head
);
293 * We always wait for result of write, for now. It would be nice to make it optional
295 * if ((req->cmd == WRITE) && (lo->flags & NBD_WRITE_NOCHK))
296 * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
300 #define FAIL( s ) { printk( KERN_ERR "NBD, minor %d: " s "\n", dev ); goto error_out; }
302 static void do_nbd_request(request_queue_t
* q
)
306 struct nbd_device
*lo
;
308 while (!QUEUE_EMPTY
) {
312 FAIL("que not empty but no request?");
314 dev
= MINOR(req
->rq_dev
);
317 FAIL("Minor too big."); /* Probably can not happen */
321 FAIL("Request when not-ready.");
322 if ((req
->cmd
== WRITE
) && (lo
->flags
& NBD_READ_ONLY
))
323 FAIL("Write on read-only");
325 if (lo
->magic
!= LO_MAGIC
)
326 FAIL("nbd[] is not magical!");
330 blkdev_dequeue_request(req
);
331 spin_unlock_irq(&io_request_lock
);
333 down (&lo
->queue_lock
);
334 list_add(&req
->queue
, &lo
->queue_head
);
335 nbd_send_req(lo
->sock
, req
); /* Why does this block? */
336 up (&lo
->queue_lock
);
338 spin_lock_irq(&io_request_lock
);
343 blkdev_dequeue_request(req
);
344 spin_unlock(&io_request_lock
);
345 nbd_end_request(req
);
346 spin_lock(&io_request_lock
);
351 static int nbd_ioctl(struct inode
*inode
, struct file
*file
,
352 unsigned int cmd
, unsigned long arg
)
354 struct nbd_device
*lo
;
355 int dev
, error
, temp
;
356 struct request sreq
;
358 /* Anyone capable of this syscall can do *real bad* things */
360 if (!capable(CAP_SYS_ADMIN
))
364 dev
= MINOR(inode
->i_rdev
);
371 printk("NBD_DISCONNECT\n") ;
372 sreq
.cmd
=2 ; /* shutdown command */
373 if (!lo
->sock
) return -EINVAL
;
374 nbd_send_req(lo
->sock
,&sreq
) ;
378 down(&lo
->queue_lock
);
380 if (!list_empty(&lo
->queue_head
)) {
382 printk(KERN_ERR
"nbd: Some requests are in progress -> can not turn off.\n");
399 inode
= file
->f_dentry
->d_inode
;
400 /* N.B. Should verify that it's a socket */
402 lo
->sock
= &inode
->u
.socket_i
;
406 case NBD_SET_BLKSIZE
:
407 if ((arg
& (arg
-1)) || (arg
< 512) || (arg
> PAGE_SIZE
))
409 nbd_blksizes
[dev
] = arg
;
411 nbd_blksize_bits
[dev
] = 9;
413 nbd_blksize_bits
[dev
]++;
416 nbd_sizes
[dev
] = nbd_bytesizes
[dev
] >> nbd_blksize_bits
[dev
];
417 nbd_bytesizes
[dev
] = nbd_sizes
[dev
] << nbd_blksize_bits
[dev
];
420 nbd_sizes
[dev
] = arg
>> nbd_blksize_bits
[dev
];
421 nbd_bytesizes
[dev
] = nbd_sizes
[dev
] << nbd_blksize_bits
[dev
];
423 case NBD_SET_SIZE_BLOCKS
:
424 nbd_sizes
[dev
] = arg
;
425 nbd_bytesizes
[dev
] = ((u64
) arg
) << nbd_blksize_bits
[dev
];
431 return lo
->harderror
;
436 case NBD_PRINT_DEBUG
:
437 printk(KERN_INFO
"NBD device %d: next = %p, prev = %p. Global: in %d, out %d\n",
438 dev
, lo
->queue_head
.next
, lo
->queue_head
.prev
, requests_in
, requests_out
);
442 return put_user(nbd_bytesizes
[dev
] >> 9, (long *) arg
);
447 static int nbd_release(struct inode
*inode
, struct file
*file
)
449 struct nbd_device
*lo
;
454 dev
= MINOR(inode
->i_rdev
);
459 printk(KERN_ALERT
"nbd_release: refcount(%d) <= 0\n", lo
->refcnt
);
461 /* N.B. Doesn't lo->file need an fput?? */
466 static struct block_device_operations nbd_fops
=
469 release
: nbd_release
,
474 * And here should be modules and kernel interface
475 * (Just smiley confuses emacs :-)
479 #define nbd_init init_module
486 if (sizeof(struct nbd_request
) != 28) {
487 printk(KERN_CRIT
"Sizeof nbd_request needs to be 28 in order to work!\n" );
491 if (register_blkdev(MAJOR_NR
, "nbd", &nbd_fops
)) {
492 printk("Unable to get major number %d for NBD\n",
497 printk("nbd: registered device at major %d\n", MAJOR_NR
);
499 blksize_size
[MAJOR_NR
] = nbd_blksizes
;
500 blk_size
[MAJOR_NR
] = nbd_sizes
;
501 blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR
), do_nbd_request
);
502 #ifndef NBD_PLUGGABLE
503 blk_queue_pluggable(BLK_DEFAULT_QUEUE(MAJOR_NR
), nbd_plug_device
);
505 blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR
), 0);
506 for (i
= 0; i
< MAX_NBD
; i
++) {
507 nbd_dev
[i
].refcnt
= 0;
508 nbd_dev
[i
].file
= NULL
;
509 nbd_dev
[i
].magic
= LO_MAGIC
;
510 nbd_dev
[i
].flags
= 0;
511 INIT_LIST_HEAD(&nbd_dev
[i
].queue_head
);
512 init_MUTEX(&nbd_dev
[i
].queue_lock
);
513 nbd_blksizes
[i
] = 1024;
514 nbd_blksize_bits
[i
] = 10;
515 nbd_bytesizes
[i
] = 0x7ffffc00; /* 2GB */
516 nbd_sizes
[i
] = nbd_bytesizes
[i
] >> nbd_blksize_bits
[i
];
517 register_disk(NULL
, MKDEV(MAJOR_NR
,i
), 1, &nbd_fops
,
518 nbd_bytesizes
[i
]>>9);
520 devfs_handle
= devfs_mk_dir (NULL
, "nbd", NULL
);
521 devfs_register_series (devfs_handle
, "%u", MAX_NBD
,
522 DEVFS_FL_DEFAULT
, MAJOR_NR
, 0,
523 S_IFBLK
| S_IRUSR
| S_IWUSR
,
530 void cleanup_module(void)
532 devfs_unregister (devfs_handle
);
533 blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR
));
535 if (unregister_blkdev(MAJOR_NR
, "nbd") != 0)
536 printk("nbd: cleanup_module failed\n");
538 printk("nbd: module cleaned up.\n");