Merge commit 'ea01a15a654b9e1c7b37d958f4d1911882ed7781'
[unleashed.git] / kernel / os / move.c
blob7ed8e4e3d3972b540aab7c0d73cfc8c1900de96a
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
39 #include <sys/types.h>
40 #include <sys/sysmacros.h>
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/uio.h>
44 #include <sys/errno.h>
45 #include <sys/vmsystm.h>
46 #include <sys/cmn_err.h>
47 #include <vm/as.h>
48 #include <vm/page.h>
50 #include <sys/dcopy.h>
52 int64_t uioa_maxpoll = -1; /* <0 = noblock, 0 = block, >0 = block after */
53 #define UIO_DCOPY_CHANNEL 0
54 #define UIO_DCOPY_CMD 1
57 * Move "n" bytes at byte address "p"; "rw" indicates the direction
58 * of the move, and the I/O parameters are provided in "uio", which is
59 * update to reflect the data which was moved. Returns 0 on success or
60 * a non-zero errno on failure.
62 int
63 uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
65 struct iovec *iov;
66 ulong_t cnt;
67 int error;
69 while (n && uio->uio_resid) {
70 iov = uio->uio_iov;
71 cnt = MIN(iov->iov_len, n);
72 if (cnt == 0l) {
73 uio->uio_iov++;
74 uio->uio_iovcnt--;
75 continue;
77 switch (uio->uio_segflg) {
79 case UIO_USERSPACE:
80 case UIO_USERISPACE:
81 if (rw == UIO_READ) {
82 error = xcopyout_nta(p, iov->iov_base, cnt,
83 (uio->uio_extflg & UIO_COPY_CACHED));
84 } else {
85 error = xcopyin_nta(iov->iov_base, p, cnt,
86 (uio->uio_extflg & UIO_COPY_CACHED));
89 if (error)
90 return (error);
91 break;
93 case UIO_SYSSPACE:
94 if (rw == UIO_READ)
95 error = kcopy_nta(p, iov->iov_base, cnt,
96 (uio->uio_extflg & UIO_COPY_CACHED));
97 else
98 error = kcopy_nta(iov->iov_base, p, cnt,
99 (uio->uio_extflg & UIO_COPY_CACHED));
100 if (error)
101 return (error);
102 break;
104 iov->iov_base += cnt;
105 iov->iov_len -= cnt;
106 uio->uio_resid -= cnt;
107 uio->uio_loffset += cnt;
108 p = (caddr_t)p + cnt;
109 n -= cnt;
111 return (0);
115 * Fault in the pages of the first n bytes specified by the uio structure.
116 * 1 byte in each page is touched and the uio struct is unmodified. Any
117 * error will terminate the process as this is only a best attempt to get
118 * the pages resident.
120 void
121 uio_prefaultpages(ssize_t n, struct uio *uio)
123 struct iovec *iov;
124 ulong_t cnt, incr;
125 caddr_t p;
126 uint8_t tmp;
127 int iovcnt;
129 iov = uio->uio_iov;
130 iovcnt = uio->uio_iovcnt;
132 while ((n > 0) && (iovcnt > 0)) {
133 cnt = MIN(iov->iov_len, n);
134 if (cnt == 0) {
135 /* empty iov entry */
136 iov++;
137 iovcnt--;
138 continue;
140 n -= cnt;
142 * touch each page in this segment.
144 p = iov->iov_base;
145 while (cnt) {
146 switch (uio->uio_segflg) {
147 case UIO_USERSPACE:
148 case UIO_USERISPACE:
149 if (fuword8(p, &tmp))
150 return;
151 break;
152 case UIO_SYSSPACE:
153 if (kcopy(p, &tmp, 1))
154 return;
155 break;
157 incr = MIN(cnt, PAGESIZE);
158 p += incr;
159 cnt -= incr;
162 * touch the last byte in case it straddles a page.
164 p--;
165 switch (uio->uio_segflg) {
166 case UIO_USERSPACE:
167 case UIO_USERISPACE:
168 if (fuword8(p, &tmp))
169 return;
170 break;
171 case UIO_SYSSPACE:
172 if (kcopy(p, &tmp, 1))
173 return;
174 break;
176 iov++;
177 iovcnt--;
182 * same as uiomove() but doesn't modify uio structure.
183 * return in cbytes how many bytes were copied.
186 uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes)
188 struct iovec *iov;
189 ulong_t cnt;
190 int error;
191 int iovcnt;
193 iovcnt = uio->uio_iovcnt;
194 *cbytes = 0;
196 for (iov = uio->uio_iov; n && iovcnt; iov++, iovcnt--) {
197 cnt = MIN(iov->iov_len, n);
198 if (cnt == 0)
199 continue;
201 switch (uio->uio_segflg) {
203 case UIO_USERSPACE:
204 case UIO_USERISPACE:
205 if (rw == UIO_READ) {
206 error = xcopyout_nta(p, iov->iov_base, cnt,
207 (uio->uio_extflg & UIO_COPY_CACHED));
208 } else {
209 error = xcopyin_nta(iov->iov_base, p, cnt,
210 (uio->uio_extflg & UIO_COPY_CACHED));
213 if (error)
214 return (error);
215 break;
217 case UIO_SYSSPACE:
218 if (rw == UIO_READ)
219 error = kcopy_nta(p, iov->iov_base, cnt,
220 (uio->uio_extflg & UIO_COPY_CACHED));
221 else
222 error = kcopy_nta(iov->iov_base, p, cnt,
223 (uio->uio_extflg & UIO_COPY_CACHED));
224 if (error)
225 return (error);
226 break;
228 p = (caddr_t)p + cnt;
229 n -= cnt;
230 *cbytes += cnt;
232 return (0);
236 * transfer a character value into the address space
237 * delineated by a uio and update fields within the
238 * uio for next character. Return 0 for success, EFAULT
239 * for error.
242 ureadc(int val, struct uio *uiop)
244 struct iovec *iovp;
245 unsigned char c;
248 * first determine if uio is valid. uiop should be
249 * non-NULL and the resid count > 0.
251 if (!(uiop && uiop->uio_resid > 0))
252 return (EFAULT);
255 * scan through iovecs until one is found that is non-empty.
256 * Return EFAULT if none found.
258 while (uiop->uio_iovcnt > 0) {
259 iovp = uiop->uio_iov;
260 if (iovp->iov_len <= 0) {
261 uiop->uio_iovcnt--;
262 uiop->uio_iov++;
263 } else
264 break;
267 if (uiop->uio_iovcnt <= 0)
268 return (EFAULT);
271 * Transfer character to uio space.
274 c = (unsigned char) (val & 0xFF);
276 switch (uiop->uio_segflg) {
278 case UIO_USERISPACE:
279 case UIO_USERSPACE:
280 if (copyout(&c, iovp->iov_base, sizeof (unsigned char)))
281 return (EFAULT);
282 break;
284 case UIO_SYSSPACE: /* can do direct copy since kernel-kernel */
285 *iovp->iov_base = c;
286 break;
288 default:
289 return (EFAULT); /* invalid segflg value */
293 * bump up/down iovec and uio members to reflect transfer.
295 iovp->iov_base++;
296 iovp->iov_len--;
297 uiop->uio_resid--;
298 uiop->uio_loffset++;
299 return (0); /* success */
303 * return a character value from the address space
304 * delineated by a uio and update fields within the
305 * uio for next character. Return the character for success,
306 * -1 for error.
309 uwritec(struct uio *uiop)
311 struct iovec *iovp;
312 unsigned char c;
315 * verify we were passed a valid uio structure.
316 * (1) non-NULL uiop, (2) positive resid count
317 * (3) there is an iovec with positive length
320 if (!(uiop && uiop->uio_resid > 0))
321 return (-1);
323 while (uiop->uio_iovcnt > 0) {
324 iovp = uiop->uio_iov;
325 if (iovp->iov_len <= 0) {
326 uiop->uio_iovcnt--;
327 uiop->uio_iov++;
328 } else
329 break;
332 if (uiop->uio_iovcnt <= 0)
333 return (-1);
336 * Get the character from the uio address space.
338 switch (uiop->uio_segflg) {
340 case UIO_USERISPACE:
341 case UIO_USERSPACE:
342 if (copyin(iovp->iov_base, &c, sizeof (unsigned char)))
343 return (-1);
344 break;
346 case UIO_SYSSPACE:
347 c = *iovp->iov_base;
348 break;
350 default:
351 return (-1); /* invalid segflg */
355 * Adjust fields of iovec and uio appropriately.
357 iovp->iov_base++;
358 iovp->iov_len--;
359 uiop->uio_resid--;
360 uiop->uio_loffset++;
361 return ((int)c & 0xFF); /* success */
365 * Drop the next n chars out of *uiop.
367 void
368 uioskip(uio_t *uiop, size_t n)
370 if (n > uiop->uio_resid)
371 return;
372 while (n != 0) {
373 register iovec_t *iovp = uiop->uio_iov;
374 register size_t niovb = MIN(iovp->iov_len, n);
376 if (niovb == 0) {
377 uiop->uio_iov++;
378 uiop->uio_iovcnt--;
379 continue;
381 iovp->iov_base += niovb;
382 uiop->uio_loffset += niovb;
383 iovp->iov_len -= niovb;
384 uiop->uio_resid -= niovb;
385 n -= niovb;
390 * Dup the suio into the duio and diovec of size diov_cnt. If diov
391 * is too small to dup suio then an error will be returned, else 0.
394 uiodup(uio_t *suio, uio_t *duio, iovec_t *diov, int diov_cnt)
396 int ix;
397 iovec_t *siov = suio->uio_iov;
399 *duio = *suio;
400 for (ix = 0; ix < suio->uio_iovcnt; ix++) {
401 diov[ix] = siov[ix];
402 if (ix >= diov_cnt)
403 return (1);
405 duio->uio_iov = diov;
406 return (0);
410 * Shadow state for checking if a platform has hardware asynchronous
411 * copy capability and minimum copy size, e.g. Intel's I/OAT dma engine,
413 * Dcopy does a call-back to uioa_dcopy_enable() when a dma device calls
414 * into dcopy to register and uioa_dcopy_disable() when the device calls
415 * into dcopy to unregister.
417 uioasync_t uioasync = {B_FALSE, 1024};
419 void
420 uioa_dcopy_enable()
422 uioasync.enabled = B_TRUE;
425 void
426 uioa_dcopy_disable()
428 uioasync.enabled = B_FALSE;
432 * Schedule an asynchronous move of "n" bytes at byte address "p",
433 * "rw" indicates the direction of the move, I/O parameters and
434 * async state are provided in "uioa" which is update to reflect
435 * the data which is to be moved.
437 * Returns 0 on success or a non-zero errno on failure.
439 * Note, while the uioasync APIs are general purpose in design
440 * the current implementation is Intel I/OAT specific.
443 uioamove(void *p, size_t n, enum uio_rw rw, uioa_t *uioa)
445 int soff, doff;
446 uint64_t pa;
447 int cnt;
448 iovec_t *iov;
449 dcopy_handle_t channel;
450 dcopy_cmd_t cmd;
451 int ret = 0;
452 int dcopy_flags;
454 if (!(uioa->uioa_state & UIOA_ENABLED)) {
455 /* The uioa_t isn't enabled */
456 return (ENXIO);
459 if (uioa->uio_segflg != UIO_USERSPACE || rw != UIO_READ) {
460 /* Only support to user-land from kernel */
461 return (ENOTSUP);
465 channel = uioa->uioa_hwst[UIO_DCOPY_CHANNEL];
466 cmd = uioa->uioa_hwst[UIO_DCOPY_CMD];
467 dcopy_flags = DCOPY_NOSLEEP;
470 * While source bytes and destination bytes.
472 while (n > 0 && uioa->uio_resid > 0) {
473 iov = uioa->uio_iov;
474 if (iov->iov_len == 0l) {
475 uioa->uio_iov++;
476 uioa->uio_iovcnt--;
477 uioa->uioa_lcur++;
478 uioa->uioa_lppp = uioa->uioa_lcur->uioa_ppp;
479 continue;
482 * While source bytes schedule an async
483 * dma for destination page by page.
485 while (n > 0) {
486 /* Addr offset in page src/dst */
487 soff = (uintptr_t)p & PAGEOFFSET;
488 doff = (uintptr_t)iov->iov_base & PAGEOFFSET;
489 /* Min copy count src and dst and page sized */
490 cnt = MIN(n, iov->iov_len);
491 cnt = MIN(cnt, PAGESIZE - soff);
492 cnt = MIN(cnt, PAGESIZE - doff);
493 /* XXX if next page(s) contiguous could use multipage */
496 * if we have an old command, we want to link all
497 * other commands to the next command we alloced so
498 * we only need to track the last command but can
499 * still free them all.
501 if (cmd != NULL) {
502 dcopy_flags |= DCOPY_ALLOC_LINK;
504 ret = dcopy_cmd_alloc(channel, dcopy_flags, &cmd);
505 if (ret != DCOPY_SUCCESS) {
506 /* Error of some sort */
507 return (EIO);
509 uioa->uioa_hwst[UIO_DCOPY_CMD] = cmd;
511 ASSERT(cmd->dp_version == DCOPY_CMD_V0);
512 if (uioa_maxpoll >= 0) {
513 /* Blocking (>0 may be) used in uioafini() */
514 cmd->dp_flags = DCOPY_CMD_INTR;
515 } else {
516 /* Non blocking uioafini() so no intr */
517 cmd->dp_flags = DCOPY_CMD_NOFLAGS;
519 cmd->dp_cmd = DCOPY_CMD_COPY;
520 pa = ptob((uint64_t)hat_getpfnum(kas.a_hat, p));
521 cmd->dp.copy.cc_source = pa + soff;
522 if (uioa->uioa_lcur->uioa_pfncnt == 0) {
523 /* Have a (page_t **) */
524 pa = ptob((uint64_t)(
525 *(page_t **)uioa->uioa_lppp)->p_pagenum);
526 } else {
527 /* Have a (pfn_t *) */
528 pa = ptob((uint64_t)(
529 *(pfn_t *)uioa->uioa_lppp));
531 cmd->dp.copy.cc_dest = pa + doff;
532 cmd->dp.copy.cc_size = cnt;
533 ret = dcopy_cmd_post(cmd);
534 if (ret != DCOPY_SUCCESS) {
535 /* Error of some sort */
536 return (EIO);
538 ret = 0;
540 /* If UIOA_POLL not set, set it */
541 if (!(uioa->uioa_state & UIOA_POLL))
542 uioa->uioa_state |= UIOA_POLL;
544 /* Update iov, uio, and local pointers/counters */
545 iov->iov_base += cnt;
546 iov->iov_len -= cnt;
547 uioa->uio_resid -= cnt;
548 uioa->uioa_mbytes += cnt;
549 uioa->uio_loffset += cnt;
550 p = (caddr_t)p + cnt;
551 n -= cnt;
553 /* End of iovec? */
554 if (iov->iov_len == 0) {
555 /* Yup, next iovec */
556 break;
559 /* Next dst addr page? */
560 if (doff + cnt == PAGESIZE) {
561 /* Yup, next page_t */
562 uioa->uioa_lppp++;
567 return (ret);
571 * Initialize a uioa_t for a given uio_t for the current user context,
572 * copy the common uio_t to the uioa_t, walk the shared iovec_t and
573 * lock down the user-land page(s) containing iovec_t data, then mapin
574 * user-land pages using segkpm.
577 uioainit(uio_t *uiop, uioa_t *uioap)
579 caddr_t addr;
580 page_t **pages;
581 int off;
582 int len;
583 proc_t *procp = ttoproc(curthread);
584 struct as *as = procp->p_as;
585 iovec_t *iov = uiop->uio_iov;
586 int32_t iovcnt = uiop->uio_iovcnt;
587 uioa_page_t *locked = uioap->uioa_locked;
588 dcopy_handle_t channel;
589 int error;
591 if (! (uioap->uioa_state & UIOA_ALLOC)) {
592 /* Can only init() a freshly allocated uioa_t */
593 return (EINVAL);
596 error = dcopy_alloc(DCOPY_NOSLEEP, &channel);
597 if (error == DCOPY_NORESOURCES) {
598 /* Turn off uioa */
599 uioasync.enabled = B_FALSE;
600 return (ENODEV);
602 if (error != DCOPY_SUCCESS) {
603 /* Alloc failed */
604 return (EIO);
607 uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = channel;
608 uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL;
610 /* Indicate uioa_t (will be) initialized */
611 uioap->uioa_state = UIOA_INIT;
613 uioap->uioa_mbytes = 0;
615 /* uio_t/uioa_t uio_t common struct copy */
616 *((uio_t *)uioap) = *uiop;
618 /* initialize *uiop->uio_iov */
619 if (iovcnt > UIOA_IOV_MAX) {
620 /* Too big? */
621 return (E2BIG);
623 uioap->uio_iov = iov;
624 uioap->uio_iovcnt = iovcnt;
626 /* Mark the uioap as such */
627 uioap->uio_extflg |= UIO_ASYNC;
630 * For each iovec_t, lock-down the page(s) backing the iovec_t
631 * and save the page_t list for phys addr use in uioamove().
633 iov = uiop->uio_iov;
634 iovcnt = uiop->uio_iovcnt;
635 while (iovcnt > 0) {
636 addr = iov->iov_base;
637 off = (uintptr_t)addr & PAGEOFFSET;
638 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
639 len = iov->iov_len + off;
641 /* Lock down page(s) for the iov span */
642 if ((error = as_pagelock(as, &pages,
643 iov->iov_base, iov->iov_len, S_WRITE)) != 0) {
644 /* Error */
645 goto cleanup;
648 if (pages == NULL) {
650 * Need page_t list, really only need
651 * a pfn list so build one.
653 pfn_t *pfnp;
654 int pcnt = len >> PAGESHIFT;
656 if (off)
657 pcnt++;
658 if ((pfnp = kmem_alloc(pcnt * sizeof (pfnp),
659 KM_NOSLEEP)) == NULL) {
660 error = ENOMEM;
661 goto cleanup;
663 locked->uioa_ppp = (void **)pfnp;
664 locked->uioa_pfncnt = pcnt;
665 AS_LOCK_ENTER(as, RW_READER);
666 while (pcnt-- > 0) {
667 *pfnp++ = hat_getpfnum(as->a_hat, addr);
668 addr += PAGESIZE;
670 AS_LOCK_EXIT(as);
671 } else {
672 /* Have a page_t list, save it */
673 locked->uioa_ppp = (void **)pages;
674 locked->uioa_pfncnt = 0;
676 /* Save for as_pageunlock() in uioafini() */
677 locked->uioa_base = iov->iov_base;
678 locked->uioa_len = iov->iov_len;
679 locked++;
681 /* Next iovec_t */
682 iov++;
683 iovcnt--;
685 /* Initialize curret pointer into uioa_locked[] and it's uioa_ppp */
686 uioap->uioa_lcur = uioap->uioa_locked;
687 uioap->uioa_lppp = uioap->uioa_lcur->uioa_ppp;
688 return (0);
690 cleanup:
691 /* Unlock any previously locked page_t(s) */
692 while (locked > uioap->uioa_locked) {
693 locked--;
694 as_pageunlock(as, (page_t **)locked->uioa_ppp,
695 locked->uioa_base, locked->uioa_len, S_WRITE);
698 /* Last indicate uioa_t still in alloc state */
699 uioap->uioa_state = UIOA_ALLOC;
700 uioap->uioa_mbytes = 0;
702 return (error);
706 * Finish processing of a uioa_t by cleanup any pending "uioap" actions.
709 uioafini(uio_t *uiop, uioa_t *uioap)
711 int32_t iovcnt = uiop->uio_iovcnt;
712 uioa_page_t *locked = uioap->uioa_locked;
713 struct as *as = ttoproc(curthread)->p_as;
714 dcopy_handle_t channel;
715 dcopy_cmd_t cmd;
716 int ret = 0;
718 ASSERT(uioap->uio_extflg & UIO_ASYNC);
720 if (!(uioap->uioa_state & (UIOA_ENABLED|UIOA_FINI))) {
721 /* Must be an active uioa_t */
722 return (EINVAL);
725 channel = uioap->uioa_hwst[UIO_DCOPY_CHANNEL];
726 cmd = uioap->uioa_hwst[UIO_DCOPY_CMD];
728 /* XXX - why do we get cmd == NULL sometimes? */
729 if (cmd != NULL) {
730 if (uioap->uioa_state & UIOA_POLL) {
731 /* Wait for last dcopy() to finish */
732 int64_t poll = 1;
733 int poll_flag = DCOPY_POLL_NOFLAGS;
735 do {
736 if (uioa_maxpoll == 0 ||
737 (uioa_maxpoll > 0 &&
738 poll >= uioa_maxpoll)) {
739 /* Always block or after maxpoll */
740 poll_flag = DCOPY_POLL_BLOCK;
741 } else {
742 /* No block, poll */
743 poll++;
745 ret = dcopy_cmd_poll(cmd, poll_flag);
746 } while (ret == DCOPY_PENDING);
748 if (ret == DCOPY_COMPLETED) {
749 /* Poll/block succeeded */
750 ret = 0;
751 } else {
752 /* Poll/block failed */
753 ret = EIO;
756 dcopy_cmd_free(&cmd);
759 dcopy_free(&channel);
761 /* Unlock all page(s) iovec_t by iovec_t */
762 while (iovcnt-- > 0) {
763 page_t **pages;
765 if (locked->uioa_pfncnt == 0) {
766 /* A as_pagelock() returned (page_t **) */
767 pages = (page_t **)locked->uioa_ppp;
768 } else {
769 /* Our pfn_t array */
770 pages = NULL;
771 kmem_free(locked->uioa_ppp, locked->uioa_pfncnt *
772 sizeof (pfn_t *));
774 as_pageunlock(as, pages, locked->uioa_base, locked->uioa_len,
775 S_WRITE);
777 locked++;
779 /* uioa_t->uio_t common struct copy */
780 *uiop = *((uio_t *)uioap);
783 * Last, reset uioa state to alloc.
785 * Note, we only initialize the state here, all other members
786 * will be initialized in a subsequent uioainit().
788 uioap->uioa_state = UIOA_ALLOC;
789 uioap->uioa_mbytes = 0;
791 uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL;
792 uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = NULL;
794 return (ret);