Merge branch 'vendor/OPENSSL'
[dragonfly.git] / sys / kern / subr_diskiocom.c
blob1101392933e994400c116fc2d4f08191dfe55730
1 /*
2 * Copyright (c) 2012 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/proc.h>
38 #include <sys/sysctl.h>
39 #include <sys/buf.h>
40 #include <sys/conf.h>
41 #include <sys/disklabel.h>
42 #include <sys/disklabel32.h>
43 #include <sys/disklabel64.h>
44 #include <sys/diskslice.h>
45 #include <sys/diskmbr.h>
46 #include <sys/disk.h>
47 #include <sys/malloc.h>
48 #include <sys/device.h>
49 #include <sys/devfs.h>
50 #include <sys/thread.h>
51 #include <sys/queue.h>
52 #include <sys/lock.h>
53 #include <sys/stat.h>
54 #include <sys/uuid.h>
56 #include <sys/dmsg.h>
58 #include <sys/buf2.h>
59 #include <sys/mplock2.h>
60 #include <sys/msgport2.h>
61 #include <sys/thread2.h>
63 struct dios_open {
64 int openrd;
65 int openwr;
68 struct dios_io {
69 int count;
70 int eof;
73 static MALLOC_DEFINE(M_DMSG_DISK, "dmsg_disk", "disk dmsg");
75 static int disk_iocom_reconnect(struct disk *dp, struct file *fp);
76 static int disk_rcvdmsg(kdmsg_msg_t *msg);
78 static void disk_blk_open(struct disk *dp, kdmsg_msg_t *msg);
79 static void disk_blk_read(struct disk *dp, kdmsg_msg_t *msg);
80 static void disk_blk_write(struct disk *dp, kdmsg_msg_t *msg);
81 static void disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg);
82 static void disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg);
83 static void diskiodone(struct bio *bio);
85 void
86 disk_iocom_init(struct disk *dp)
88 kdmsg_iocom_init(&dp->d_iocom, dp,
89 KDMSG_IOCOMF_AUTOCONN |
90 KDMSG_IOCOMF_AUTOSPAN |
91 KDMSG_IOCOMF_AUTOCIRC,
92 M_DMSG_DISK, disk_rcvdmsg);
95 void
96 disk_iocom_update(struct disk *dp)
100 void
101 disk_iocom_uninit(struct disk *dp)
103 kdmsg_iocom_uninit(&dp->d_iocom);
107 disk_iocom_ioctl(struct disk *dp, int cmd, void *data)
109 struct file *fp;
110 struct disk_ioc_recluster *recl;
111 int error;
113 switch(cmd) {
114 case DIOCRECLUSTER:
115 recl = data;
116 fp = holdfp(curproc->p_fd, recl->fd, -1);
117 if (fp) {
118 error = disk_iocom_reconnect(dp, fp);
119 } else {
120 error = EINVAL;
122 break;
123 default:
124 error = EOPNOTSUPP;
125 break;
127 return error;
130 static
132 disk_iocom_reconnect(struct disk *dp, struct file *fp)
134 char devname[64];
136 ksnprintf(devname, sizeof(devname), "%s%d",
137 dev_dname(dp->d_rawdev), dkunit(dp->d_rawdev));
139 kdmsg_iocom_reconnect(&dp->d_iocom, fp, devname);
141 dp->d_iocom.auto_lnk_conn.pfs_type = DMSG_PFSTYPE_SERVER;
142 dp->d_iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1;
143 dp->d_iocom.auto_lnk_conn.peer_type = DMSG_PEER_BLOCK;
144 dp->d_iocom.auto_lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK;
145 dp->d_iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1;
146 ksnprintf(dp->d_iocom.auto_lnk_conn.cl_label,
147 sizeof(dp->d_iocom.auto_lnk_conn.cl_label),
148 "%s/%s", hostname, devname);
149 if (dp->d_info.d_serialno) {
150 ksnprintf(dp->d_iocom.auto_lnk_conn.fs_label,
151 sizeof(dp->d_iocom.auto_lnk_conn.fs_label),
152 "%s", dp->d_info.d_serialno);
155 dp->d_iocom.auto_lnk_span.pfs_type = DMSG_PFSTYPE_SERVER;
156 dp->d_iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1;
157 dp->d_iocom.auto_lnk_span.peer_type = DMSG_PEER_BLOCK;
158 dp->d_iocom.auto_lnk_span.media.block.bytes =
159 dp->d_info.d_media_size;
160 dp->d_iocom.auto_lnk_span.media.block.blksize =
161 dp->d_info.d_media_blksize;
162 ksnprintf(dp->d_iocom.auto_lnk_span.cl_label,
163 sizeof(dp->d_iocom.auto_lnk_span.cl_label),
164 "%s/%s", hostname, devname);
165 if (dp->d_info.d_serialno) {
166 ksnprintf(dp->d_iocom.auto_lnk_span.fs_label,
167 sizeof(dp->d_iocom.auto_lnk_span.fs_label),
168 "%s", dp->d_info.d_serialno);
171 kdmsg_iocom_autoinitiate(&dp->d_iocom, NULL);
173 return (0);
177 disk_rcvdmsg(kdmsg_msg_t *msg)
179 struct disk *dp = msg->iocom->handle;
182 * Handle debug messages (these might not be in transactions)
184 switch(msg->any.head.cmd & DMSGF_CMDSWMASK) {
185 case DMSG_DBG_SHELL:
187 * Execute shell command (not supported atm)
189 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
190 return(0);
191 case DMSG_DBG_SHELL | DMSGF_REPLY:
192 if (msg->aux_data) {
193 msg->aux_data[msg->aux_size - 1] = 0;
194 kprintf("diskiocom: DEBUGMSG: %s\n", msg->aux_data);
196 return(0);
200 * All remaining messages must be in a transaction
202 * NOTE! We are switching on the first message's command. The
203 * actual message command within the transaction may be
204 * different (if streaming within a transaction).
206 if (msg->state == NULL) {
207 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
208 return(0);
211 switch(msg->state->rxcmd & DMSGF_CMDSWMASK) {
212 case DMSG_BLK_OPEN:
213 case DMSG_BLK_CLOSE:
214 disk_blk_open(dp, msg);
215 break;
216 case DMSG_BLK_READ:
217 disk_blk_read(dp, msg);
218 break;
219 case DMSG_BLK_WRITE:
220 disk_blk_write(dp, msg);
221 break;
222 case DMSG_BLK_FLUSH:
223 disk_blk_flush(dp, msg);
224 break;
225 case DMSG_BLK_FREEBLKS:
226 disk_blk_freeblks(dp, msg);
227 break;
228 default:
229 if ((msg->any.head.cmd & DMSGF_REPLY) == 0) {
230 if (msg->any.head.cmd & DMSGF_DELETE)
231 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
232 else
233 kdmsg_msg_result(msg, DMSG_ERR_NOSUPP);
235 break;
237 return (0);
240 static
241 void
242 disk_blk_open(struct disk *dp, kdmsg_msg_t *msg)
244 struct dios_open *openst;
245 int error = DMSG_ERR_NOSUPP;
246 int fflags;
248 openst = msg->state->any.any;
249 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_OPEN) {
250 if (openst == NULL) {
251 openst = kmalloc(sizeof(*openst), M_DEVBUF,
252 M_WAITOK | M_ZERO);
253 msg->state->any.any = openst;
255 fflags = 0;
256 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
257 fflags = FREAD;
258 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
259 fflags |= FWRITE;
260 error = dev_dopen(dp->d_rawdev, fflags, S_IFCHR, proc0.p_ucred);
261 if (error) {
262 error = DMSG_ERR_IO;
263 } else {
264 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
265 ++openst->openrd;
266 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
267 ++openst->openwr;
270 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_CLOSE &&
271 openst) {
272 fflags = 0;
273 if ((msg->any.blk_open.modes & DMSG_BLKOPEN_RD) &&
274 openst->openrd) {
275 fflags = FREAD;
277 if ((msg->any.blk_open.modes & DMSG_BLKOPEN_WR) &&
278 openst->openwr) {
279 fflags |= FWRITE;
281 error = dev_dclose(dp->d_rawdev, fflags, S_IFCHR);
282 if (error) {
283 error = DMSG_ERR_IO;
284 } else {
285 if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
286 --openst->openrd;
287 if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
288 --openst->openwr;
291 if (msg->any.head.cmd & DMSGF_DELETE) {
292 if (openst) {
293 while (openst->openrd && openst->openwr) {
294 --openst->openrd;
295 --openst->openwr;
296 dev_dclose(dp->d_rawdev, FREAD|FWRITE, S_IFCHR);
298 while (openst->openrd) {
299 --openst->openrd;
300 dev_dclose(dp->d_rawdev, FREAD, S_IFCHR);
302 while (openst->openwr) {
303 --openst->openwr;
304 dev_dclose(dp->d_rawdev, FWRITE, S_IFCHR);
306 kfree(openst, M_DEVBUF);
307 msg->state->any.any = NULL;
309 kdmsg_msg_reply(msg, error);
310 } else {
311 kdmsg_msg_result(msg, error);
315 static
316 void
317 disk_blk_read(struct disk *dp, kdmsg_msg_t *msg)
319 struct dios_io *iost;
320 struct buf *bp;
321 struct bio *bio;
322 int error = DMSG_ERR_NOSUPP;
323 int reterr = 1;
326 * Only DMSG_BLK_READ commands imply read ops.
328 iost = msg->state->any.any;
329 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_READ) {
330 if (msg->any.blk_read.bytes < DEV_BSIZE ||
331 msg->any.blk_read.bytes > MAXPHYS) {
332 error = DMSG_ERR_PARAM;
333 goto done;
335 if (iost == NULL) {
336 iost = kmalloc(sizeof(*iost), M_DEVBUF,
337 M_WAITOK | M_ZERO);
338 msg->state->any.any = iost;
340 reterr = 0;
341 bp = geteblk(msg->any.blk_read.bytes);
342 bio = &bp->b_bio1;
343 bp->b_cmd = BUF_CMD_READ;
344 bp->b_bcount = msg->any.blk_read.bytes;
345 bp->b_resid = bp->b_bcount;
346 bio->bio_offset = msg->any.blk_read.offset;
347 bio->bio_caller_info1.ptr = msg->state;
348 bio->bio_done = diskiodone;
349 /* kdmsg_state_hold(msg->state); */
351 atomic_add_int(&iost->count, 1);
352 if (msg->any.head.cmd & DMSGF_DELETE)
353 iost->eof = 1;
354 BUF_KERNPROC(bp);
355 dev_dstrategy(dp->d_rawdev, bio);
357 done:
358 if (reterr) {
359 if (msg->any.head.cmd & DMSGF_DELETE) {
360 if (iost && iost->count == 0) {
361 kfree(iost, M_DEVBUF);
362 msg->state->any.any = NULL;
364 kdmsg_msg_reply(msg, error);
365 } else {
366 kdmsg_msg_result(msg, error);
371 static
372 void
373 disk_blk_write(struct disk *dp, kdmsg_msg_t *msg)
375 struct dios_io *iost;
376 struct buf *bp;
377 struct bio *bio;
378 int error = DMSG_ERR_NOSUPP;
379 int reterr = 1;
382 * Only DMSG_BLK_WRITE commands imply read ops.
384 iost = msg->state->any.any;
385 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_WRITE) {
386 if (msg->any.blk_write.bytes < DEV_BSIZE ||
387 msg->any.blk_write.bytes > MAXPHYS) {
388 error = DMSG_ERR_PARAM;
389 goto done;
391 if (iost == NULL) {
392 iost = kmalloc(sizeof(*iost), M_DEVBUF,
393 M_WAITOK | M_ZERO);
394 msg->state->any.any = iost;
398 * Issue WRITE. Short data implies zeros. Try to optimize
399 * the buffer cache buffer for the case where we can just
400 * use the message's data pointer.
402 reterr = 0;
403 if (msg->aux_size >= msg->any.blk_write.bytes)
404 bp = getpbuf(NULL);
405 else
406 bp = geteblk(msg->any.blk_write.bytes);
407 bio = &bp->b_bio1;
408 bp->b_cmd = BUF_CMD_WRITE;
409 bp->b_bcount = msg->any.blk_write.bytes;
410 bp->b_resid = bp->b_bcount;
411 if (msg->aux_size >= msg->any.blk_write.bytes) {
412 bp->b_data = msg->aux_data;
413 } else {
414 bcopy(msg->aux_data, bp->b_data, msg->aux_size);
415 bzero(bp->b_data + msg->aux_size,
416 msg->any.blk_write.bytes - msg->aux_size);
418 bio->bio_offset = msg->any.blk_write.offset;
419 bio->bio_caller_info1.ptr = msg->state;
420 bio->bio_done = diskiodone;
421 /* kdmsg_state_hold(msg->state); */
423 atomic_add_int(&iost->count, 1);
424 if (msg->any.head.cmd & DMSGF_DELETE)
425 iost->eof = 1;
426 BUF_KERNPROC(bp);
427 dev_dstrategy(dp->d_rawdev, bio);
429 done:
430 if (reterr) {
431 if (msg->any.head.cmd & DMSGF_DELETE) {
432 if (iost && iost->count == 0) {
433 kfree(iost, M_DEVBUF);
434 msg->state->any.any = NULL;
436 kdmsg_msg_reply(msg, error);
437 } else {
438 kdmsg_msg_result(msg, error);
443 static
444 void
445 disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg)
447 struct dios_io *iost;
448 struct buf *bp;
449 struct bio *bio;
450 int error = DMSG_ERR_NOSUPP;
451 int reterr = 1;
454 * Only DMSG_BLK_FLUSH commands imply read ops.
456 iost = msg->state->any.any;
457 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FLUSH) {
458 if (iost == NULL) {
459 iost = kmalloc(sizeof(*iost), M_DEVBUF,
460 M_WAITOK | M_ZERO);
461 msg->state->any.any = iost;
463 reterr = 0;
464 bp = getpbuf(NULL);
465 bio = &bp->b_bio1;
466 bp->b_cmd = BUF_CMD_FLUSH;
467 bp->b_bcount = msg->any.blk_flush.bytes;
468 bp->b_resid = 0;
469 bio->bio_offset = msg->any.blk_flush.offset;
470 bio->bio_caller_info1.ptr = msg->state;
471 bio->bio_done = diskiodone;
472 /* kdmsg_state_hold(msg->state); */
474 atomic_add_int(&iost->count, 1);
475 if (msg->any.head.cmd & DMSGF_DELETE)
476 iost->eof = 1;
477 BUF_KERNPROC(bp);
478 dev_dstrategy(dp->d_rawdev, bio);
480 if (reterr) {
481 if (msg->any.head.cmd & DMSGF_DELETE) {
482 if (iost && iost->count == 0) {
483 kfree(iost, M_DEVBUF);
484 msg->state->any.any = NULL;
486 kdmsg_msg_reply(msg, error);
487 } else {
488 kdmsg_msg_result(msg, error);
493 static
494 void
495 disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg)
497 struct dios_io *iost;
498 struct buf *bp;
499 struct bio *bio;
500 int error = DMSG_ERR_NOSUPP;
501 int reterr = 1;
504 * Only DMSG_BLK_FREEBLKS commands imply read ops.
506 iost = msg->state->any.any;
507 if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FREEBLKS) {
508 if (iost == NULL) {
509 iost = kmalloc(sizeof(*iost), M_DEVBUF,
510 M_WAITOK | M_ZERO);
511 msg->state->any.any = iost;
513 reterr = 0;
514 bp = getpbuf(NULL);
515 bio = &bp->b_bio1;
516 bp->b_cmd = BUF_CMD_FREEBLKS;
517 bp->b_bcount = msg->any.blk_freeblks.bytes;
518 bp->b_resid = 0;
519 bio->bio_offset = msg->any.blk_freeblks.offset;
520 bio->bio_caller_info1.ptr = msg->state;
521 bio->bio_done = diskiodone;
522 /* kdmsg_state_hold(msg->state); */
524 atomic_add_int(&iost->count, 1);
525 if (msg->any.head.cmd & DMSGF_DELETE)
526 iost->eof = 1;
527 BUF_KERNPROC(bp);
528 dev_dstrategy(dp->d_rawdev, bio);
530 if (reterr) {
531 if (msg->any.head.cmd & DMSGF_DELETE) {
532 if (iost && iost->count == 0) {
533 kfree(iost, M_DEVBUF);
534 msg->state->any.any = NULL;
536 kdmsg_msg_reply(msg, error);
537 } else {
538 kdmsg_msg_result(msg, error);
543 static
544 void
545 diskiodone(struct bio *bio)
547 struct buf *bp = bio->bio_buf;
548 kdmsg_state_t *state = bio->bio_caller_info1.ptr;
549 kdmsg_msg_t *rmsg;
550 struct dios_io *iost = state->any.any;
551 int error;
552 int resid = 0;
553 int bytes;
554 uint32_t cmd;
555 void *data;
557 cmd = DMSG_LNK_ERROR;
558 data = NULL;
559 bytes = 0;
561 switch(bp->b_cmd) {
562 case BUF_CMD_READ:
563 cmd = DMSG_LNK_ERROR;
564 data = bp->b_data;
565 bytes = bp->b_bcount;
566 /* fall through */
567 case BUF_CMD_WRITE:
568 if (bp->b_flags & B_ERROR) {
569 error = bp->b_error;
570 } else {
571 error = 0;
572 resid = bp->b_resid;
574 break;
575 case BUF_CMD_FLUSH:
576 case BUF_CMD_FREEBLKS:
577 if (bp->b_flags & B_ERROR)
578 error = bp->b_error;
579 else
580 error = 0;
581 break;
582 default:
583 panic("diskiodone: Unknown bio cmd = %d\n",
584 bio->bio_buf->b_cmd);
585 error = 0; /* avoid compiler warning */
586 break; /* NOT REACHED */
590 * Convert error to DMSG_ERR_* code.
592 if (error)
593 error = DMSG_ERR_IO;
596 * Convert LNK_ERROR or BLK_ERROR if non-zero resid. READS will
597 * have already converted cmd to BLK_ERROR and set up data to return.
599 if (resid && cmd == DMSG_LNK_ERROR)
600 cmd = DMSG_BLK_ERROR;
601 /* XXX txcmd is delayed so this won't work for streaming */
602 if ((state->txcmd & DMSGF_CREATE) == 0) /* assume serialized */
603 cmd |= DMSGF_CREATE;
604 if (iost->eof) {
605 if (atomic_fetchadd_int(&iost->count, -1) == 1)
606 cmd |= DMSGF_DELETE;
607 } else {
608 atomic_add_int(&iost->count, -1);
610 cmd |= DMSGF_REPLY;
613 * Allocate a basic or extended reply. Be careful not to populate
614 * extended header fields unless we allocated an extended reply.
616 rmsg = kdmsg_msg_alloc_state(state, cmd, NULL, 0);
617 if (data) {
618 rmsg->aux_data = kmalloc(bytes, state->iocom->mmsg, M_INTWAIT);
619 rmsg->aux_size = bytes;
620 rmsg->flags |= KDMSG_FLAG_AUXALLOC;
621 bcopy(data, rmsg->aux_data, bytes);
623 rmsg->any.blk_error.head.error = error;
624 if ((cmd & DMSGF_BASECMDMASK) == DMSG_BLK_ERROR)
625 rmsg->any.blk_error.resid = resid;
626 bio->bio_caller_info1.ptr = NULL;
627 /* kdmsg_state_drop(state); */
628 kdmsg_msg_write(rmsg);
629 if (bp->b_flags & B_PAGING) {
630 relpbuf(bio->bio_buf, NULL);
631 } else {
632 bp->b_flags |= B_INVAL | B_AGE;
633 brelse(bp);