4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
29 * PMEM - Direct mapping physical memory pages to userland process
31 * Provide functions used for directly (w/o occupying kernel virtual address
32 * space) allocating and exporting physical memory pages to userland.
35 #include <sys/types.h>
36 #include <sys/mutex.h>
37 #include <sys/sunddi.h>
38 #include <sys/ddidevmap.h>
39 #include <sys/vnode.h>
40 #include <sys/sysmacros.h>
41 #include <vm/seg_dev.h>
43 #include <vm/hat_i86.h>
48 * The routines in this file allocate memory which will be accessed through
49 * the AGP GART hardware. The GART is programmed with the PFNs for this
50 * memory, and the only mechanism for removing these entries is by an
51 * explicit process operation (ioctl/close of the driver, or process exit).
52 * As such, the pages need to remain locked to ensure that they won't be
53 * relocated or paged out.
55 * To prevent these locked pages from getting in the way of page
56 * coalescing, we try to allocate large pages from the system, and carve
57 * them up to satisfy pmem allocation requests. This will keep the locked
58 * pages within a constrained area of physical memory, limiting the number
59 * of large pages that would be pinned by our locked pages. This is, of
60 * course, another take on the infamous kernel cage, and it has many of the
61 * downsides of the original cage. It also interferes with system-wide
62 * resource management decisions, as it maintains its own pool of unused
63 * pages which can't be easily reclaimed and used during low-memory
66 * The right solution is for pmem to register a callback that the VM system
67 * could call, which would temporarily remove any GART entries for pages
68 * that were being relocated. This would let us leave the pages unlocked,
69 * which would remove the need for using large pages, which would simplify
70 * this code a great deal. Unfortunately, the support for these callbacks
71 * only exists on some SPARC platforms right now.
73 * Note that this is the *only* reason that large pages are used here. The
74 * GART can't perform large-page translations, and the code appropriately
75 * falls back to using small pages if page_create_va_large() fails.
78 #define HOLD_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
79 { mutex_enter(&dhp->dh_lock); }
81 #define RELE_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
82 { mutex_exit(&dhp->dh_lock); }
84 #define FROM_LPG(pp) (pp->p_szc != 0)
85 #define PFIND(pp) (page_pptonum(pp) & (pmem_pgcnt - 1))
88 * Structs and static variables used for pmem only.
90 typedef struct pmem_lpg
{
91 page_t
*pl_pp
; /* start pp */
92 ulong_t
*pl_bitmap
; /* allocation status for each page */
93 ushort_t pl_pfree
; /* this large page might be fully freed */
94 struct pmem_lpg
*pl_next
;
95 struct pmem_lpg
*pl_prev
;
98 static size_t pmem_lpgsize
; /* the size of one large page */
99 static pgcnt_t pmem_pgcnt
; /* the number of small pages in a large page */
100 static uint_t pmem_lszc
; /* page size code of the large page */
101 /* The segment to be associated with all the allocated pages. */
102 static struct seg pmem_seg
;
103 /* Fully occupied large pages allocated for pmem. */
104 static pmem_lpg_t
*pmem_occ_lpgs
;
105 /* Memory pool to store residual small pages from large pages. */
106 static page_t
*pmem_mpool
= NULL
;
107 /* Number of small pages reside in pmem_mpool currently. */
108 static pgcnt_t pmem_nmpages
= 0;
109 /* To protect pmem_nmpages, pmem_mpool and pmem_occ_lpgs. */
112 static int lpg_isfree(pmem_lpg_t
*);
113 static void pmem_lpg_sub(pmem_lpg_t
**, pmem_lpg_t
*);
114 static void pmem_lpg_concat(pmem_lpg_t
**, pmem_lpg_t
**);
115 static pmem_lpg_t
*pmem_lpg_get(pmem_lpg_t
*, page_t
*, pmem_lpg_t
**);
116 static pmem_lpg_t
*pmem_lpg_alloc(uint_t
);
117 static void pmem_lpg_free(pmem_lpg_t
**, pmem_lpg_t
*);
118 static void lpg_free(page_t
*spp
);
119 static pgcnt_t
mpool_break(page_t
**, pgcnt_t
);
120 static void mpool_append(page_t
**, pgcnt_t
);
121 static void lpp_break(page_t
**, pgcnt_t
, pgcnt_t
, pmem_lpg_t
*);
122 static void lpp_free(page_t
*, pgcnt_t
, pmem_lpg_t
**);
123 static int lpp_create(page_t
**, pgcnt_t
, pgcnt_t
*, pmem_lpg_t
**,
124 vnode_t
*, u_offset_t
*, uint_t
);
125 static void tlist_in(page_t
*, pgcnt_t
, vnode_t
*, u_offset_t
*);
126 static void tlist_out(page_t
*, pgcnt_t
);
127 static int pmem_cookie_alloc(struct devmap_pmem_cookie
**, pgcnt_t
, uint_t
);
128 static int pmem_lock(pgcnt_t
, proc_t
*p
);
131 * Called by driver devmap routine to pass physical memory mapping info to
132 * seg_dev framework, used only for physical memory allocated from
133 * devmap_pmem_alloc().
137 devmap_pmem_setup(devmap_cookie_t dhc
, dev_info_t
*dip
,
138 struct devmap_callback_ctl
*callbackops
, devmap_pmem_cookie_t cookie
,
139 offset_t off
, size_t len
, uint_t maxprot
, uint_t flags
,
140 ddi_device_acc_attr_t
*accattrp
)
142 devmap_handle_t
*dhp
= (devmap_handle_t
*)dhc
;
143 struct devmap_pmem_cookie
*pcp
= (struct devmap_pmem_cookie
*)cookie
;
144 uint_t cache_attr
= IOMEM_CACHE_ATTR(flags
);
146 if (pcp
== NULL
|| (off
+ len
) > ptob(pcp
->dp_npages
))
147 return (DDI_FAILURE
);
150 * First to check if this function has been called for this dhp.
152 if (dhp
->dh_flags
& DEVMAP_SETUP_DONE
)
153 return (DDI_FAILURE
);
155 if ((dhp
->dh_prot
& dhp
->dh_orig_maxprot
& maxprot
) != dhp
->dh_prot
)
156 return (DDI_FAILURE
);
159 * Check if the cache attributes are supported. Need to pay
160 * attention that only uncachable or write-combining is
161 * permitted for pmem.
163 if (i_ddi_check_cache_attr(flags
) == B_FALSE
||
164 (cache_attr
& (IOMEM_DATA_UNCACHED
|IOMEM_DATA_UC_WR_COMBINE
)) == 0)
165 return (DDI_FAILURE
);
167 if (flags
& DEVMAP_MAPPING_INVALID
) {
169 * If DEVMAP_MAPPING_INVALID is specified, we have to grant
172 if (!(flags
& DEVMAP_ALLOW_REMAP
))
173 return (DDI_FAILURE
);
175 dhp
->dh_pcookie
= (devmap_pmem_cookie_t
)pcp
;
176 /* dh_roff is the offset inside the dh_pcookie. */
177 dhp
->dh_roff
= ptob(btop(off
));
178 /* Set the cache attributes correctly */
179 i_ddi_cacheattr_to_hatacc(cache_attr
, &dhp
->dh_hat_attr
);
182 dhp
->dh_cookie
= DEVMAP_PMEM_COOKIE
;
183 dhp
->dh_flags
|= (flags
& DEVMAP_SETUP_FLAGS
);
184 dhp
->dh_len
= ptob(btopr(len
));
186 dhp
->dh_maxprot
= maxprot
& dhp
->dh_orig_maxprot
;
187 ASSERT((dhp
->dh_prot
& dhp
->dh_orig_maxprot
& maxprot
) == dhp
->dh_prot
);
189 if (callbackops
!= NULL
) {
190 bcopy(callbackops
, &dhp
->dh_callbackops
,
191 sizeof (struct devmap_callback_ctl
));
195 * Initialize dh_lock if we want to do remap.
197 if (dhp
->dh_flags
& DEVMAP_ALLOW_REMAP
) {
198 mutex_init(&dhp
->dh_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
199 dhp
->dh_flags
|= DEVMAP_LOCK_INITED
;
202 dhp
->dh_flags
|= DEVMAP_SETUP_DONE
;
204 return (DDI_SUCCESS
);
208 * Replace existing mapping using a new cookie, mainly gets called when doing
209 * fork(). Should be called in associated devmap_dup(9E).
213 devmap_pmem_remap(devmap_cookie_t dhc
, dev_info_t
*dip
,
214 devmap_pmem_cookie_t cookie
, offset_t off
, size_t len
, uint_t maxprot
,
215 uint_t flags
, ddi_device_acc_attr_t
*accattrp
)
217 devmap_handle_t
*dhp
= (devmap_handle_t
*)dhc
;
218 struct devmap_pmem_cookie
*pcp
= (struct devmap_pmem_cookie
*)cookie
;
219 uint_t cache_attr
= IOMEM_CACHE_ATTR(flags
);
222 * Reture failure if setup has not been done or no remap permission
223 * has been granted during the setup.
225 if ((dhp
->dh_flags
& DEVMAP_SETUP_DONE
) == 0 ||
226 (dhp
->dh_flags
& DEVMAP_ALLOW_REMAP
) == 0)
227 return (DDI_FAILURE
);
229 /* No flags supported for remap yet. */
231 return (DDI_FAILURE
);
233 if ((dhp
->dh_prot
& dhp
->dh_orig_maxprot
& maxprot
) != dhp
->dh_prot
)
234 return (DDI_FAILURE
);
236 if (pcp
== NULL
|| (off
+ len
) > ptob(pcp
->dp_npages
))
237 return (DDI_FAILURE
);
240 * Check if the cache attributes are supported. Need to pay
241 * attention that only uncachable or write-combining is
242 * permitted for pmem.
244 if (i_ddi_check_cache_attr(flags
) == B_FALSE
||
245 (cache_attr
& (IOMEM_DATA_UNCACHED
|IOMEM_DATA_UC_WR_COMBINE
)) == 0)
246 return (DDI_FAILURE
);
250 * Unload the old mapping of pages reloated with this dhp, so next
251 * fault will setup the new mappings. It is in segdev_faultpage that
252 * calls hat_devload to establish the mapping. Do this while holding
253 * the dhp lock so other faults dont reestablish the mappings.
255 hat_unload(dhp
->dh_seg
->s_as
->a_hat
, dhp
->dh_uvaddr
,
256 dhp
->dh_len
, HAT_UNLOAD
|HAT_UNLOAD_OTHER
);
258 /* Set the cache attributes correctly */
259 i_ddi_cacheattr_to_hatacc(cache_attr
, &dhp
->dh_hat_attr
);
261 dhp
->dh_pcookie
= cookie
;
262 dhp
->dh_roff
= ptob(btop(off
));
263 dhp
->dh_len
= ptob(btopr(len
));
265 /* Clear the large page size flag. */
266 dhp
->dh_flags
&= ~DEVMAP_FLAG_LARGE
;
268 dhp
->dh_maxprot
= maxprot
& dhp
->dh_orig_maxprot
;
269 ASSERT((dhp
->dh_prot
& dhp
->dh_orig_maxprot
& maxprot
) == dhp
->dh_prot
);
271 return (DDI_SUCCESS
);
275 * Directly (i.e., without occupying kernel virtual address space) allocate
276 * 'npages' physical memory pages for exporting to user land. The allocated
277 * page_t pointer will be recorded in cookie.
280 devmap_pmem_alloc(size_t size
, uint_t flags
, devmap_pmem_cookie_t
*cookiep
)
282 u_offset_t pmem_off
= 0;
285 page_t
*tlist
= NULL
;
290 pgcnt_t npages
= btopr(size
);
291 pmem_lpg_t
*plp
= NULL
;
292 struct devmap_pmem_cookie
*pcp
;
295 uint_t pflags
, kflags
;
300 * Number larger than this will cause page_create_va() to loop
303 if (npages
== 0 || npages
>= total_pages
/ 2)
304 return (DDI_FAILURE
);
305 if ((flags
& (PMEM_SLEEP
| PMEM_NOSLEEP
)) == 0)
306 return (DDI_FAILURE
);
307 pflags
= flags
& PMEM_NOSLEEP
? PG_EXCL
: PG_WAIT
;
308 kflags
= flags
& PMEM_NOSLEEP
? KM_NOSLEEP
: KM_SLEEP
;
310 /* Allocate pmem cookie. */
311 if (pmem_cookie_alloc(&pcp
, npages
, kflags
) == DDI_FAILURE
)
312 return (DDI_FAILURE
);
313 pcp
->dp_npages
= npages
;
316 * See if the requested memory can be locked.
318 pcp
->dp_proc
= curproc
;
319 if (pmem_lock(npages
, curproc
) == DDI_FAILURE
)
323 * First, grab as many as possible from pmem_mpool. If pages in
324 * pmem_mpool are enough for this request, we are done.
326 mutex_enter(&pmem_mutex
);
327 tpages
= mpool_break(&tlist
, npages
);
328 /* IOlock and hashin them into the new offset. */
330 tlist_in(tlist
, tpages
, pcp
->dp_vnp
, &pmem_off
);
331 mutex_exit(&pmem_mutex
);
333 if (tpages
== npages
)
336 rpages
= npages
- tpages
;
337 /* Quit now if memory cannot be reserved. */
338 if (!page_resv(rpages
, kflags
))
342 /* If we have large pages */
343 if (pmem_lpgsize
> PAGESIZE
) {
344 /* Try to alloc large pages first to decrease fragmentation. */
345 i
= (rpages
+ (pmem_pgcnt
- 1)) / pmem_pgcnt
;
346 if (lpp_create(&lpp
, i
, &lpages
, &plp
, pcp
->dp_vnp
, &pmem_off
,
347 kflags
) == DDI_FAILURE
)
349 ASSERT(lpages
== 0 ? lpp
== NULL
: 1);
353 * Pages in large pages is more than the request, put the residual
354 * pages into pmem_mpool.
356 if (lpages
>= rpages
) {
357 lpp_break(&lpp
, lpages
, lpages
- rpages
, plp
);
361 /* Allocate small pages if lpp+tlist cannot satisfy the request. */
363 if ((pp
= page_create_va(pcp
->dp_vnp
, pmem_off
, ptob(i
),
364 pflags
, &pmem_seg
, (caddr_t
)(uintptr_t)pmem_off
)) == NULL
)
368 page_list_concat(&tlist
, &lpp
);
369 page_list_concat(&tlist
, &pp
);
370 /* Set those small pages from large pages as allocated. */
371 mutex_enter(&pmem_mutex
);
372 pmem_lpg_concat(&pmem_occ_lpgs
, &plp
);
373 mutex_exit(&pmem_mutex
);
376 * Now tlist holds all the pages for this cookie. Record these pages in
379 for (pp
= tlist
, i
= 0; i
< npages
; i
++) {
380 pcp
->dp_pparray
[i
] = pp
;
383 page_sub(&tlist
, pp
->p_prev
);
385 ASSERT(tlist
== NULL
);
386 *cookiep
= (devmap_pmem_cookie_t
)pcp
;
388 return (DDI_SUCCESS
);
391 DTRACE_PROBE(pmem__alloc__fail
);
392 /* Free large pages and the associated allocation records. */
394 lpp_free(lpp
, lpages
/ pmem_pgcnt
, &plp
);
397 /* Put those pages in tlist back into pmem_mpool. */
399 mutex_enter(&pmem_mutex
);
400 /* IOunlock, hashout and update the allocation records. */
401 tlist_out(tlist
, tpages
);
402 mpool_append(&tlist
, tpages
);
403 mutex_exit(&pmem_mutex
);
406 i_ddi_decr_locked_memory(pcp
->dp_proc
, ptob(pcp
->dp_npages
));
407 /* Freeing pmem_cookie. */
408 kmem_free(pcp
->dp_vnp
, sizeof (vnode_t
));
409 kmem_free(pcp
->dp_pparray
, npages
* sizeof (page_t
*));
410 kmem_free(pcp
, sizeof (struct devmap_pmem_cookie
));
411 return (DDI_FAILURE
);
415 * Free all small pages inside cookie, and return pages from large pages into
416 * mpool, if all the pages from one large page is in mpool, free it as a whole.
419 devmap_pmem_free(devmap_pmem_cookie_t cookie
)
421 struct devmap_pmem_cookie
*pcp
= (struct devmap_pmem_cookie
*)cookie
;
425 pmem_lpg_t
*pl1
, *plp
;
426 pmem_lpg_t
*pf_lpgs
= NULL
;
428 pmem_lpg_t
*last_pl
= NULL
;
429 pmem_lpg_t
*plast_pl
= NULL
;
432 mutex_enter(&pmem_mutex
);
433 /* Free small pages and return them to memory pool. */
434 for (i
= pcp
->dp_npages
; i
> 0; i
--) {
435 pp
= pcp
->dp_pparray
[i
- 1];
436 page_hashout(pp
, NULL
);
438 * Remove the mapping of this single page, this mapping is
439 * created using hat_devload() in segdev_faultpage().
441 (void) hat_pageunload(pp
, HAT_FORCE_PGUNLOAD
);
443 /* Normal small page. */
447 /* Small page from large pages. */
448 plp
= pmem_lpg_get(pmem_occ_lpgs
, pp
, &last_pl
);
449 if (plp
&& !(plp
->pl_pfree
)) {
451 * Move this record to pf_lpgs list, this large
452 * page may be able to be freed as a whole.
454 pmem_lpg_sub(&pmem_occ_lpgs
, plp
);
455 pmem_lpg_concat(&pf_lpgs
, &plp
);
460 /* Search in pf_lpgs list. */
461 plp
= pmem_lpg_get(pf_lpgs
, pp
, &plast_pl
);
464 /* Mark this page as free. */
465 BT_SET(plp
->pl_bitmap
, PFIND(pp
));
466 /* Record this page in pmem_mpool. */
467 mpool_append(&pp
, 1);
472 * Find out the large pages whose pages have been freed, remove them
473 * from plp list, free them and the associated pmem_lpg struct.
475 for (plp
= pf_lpgs
; npls
!= 0; npls
--) {
478 if (lpg_isfree(pl1
)) {
480 * Get one free large page. Find all pages in this
481 * large page and remove them from pmem_mpool.
483 lpg_free(pl1
->pl_pp
);
484 /* Remove associated allocation records. */
485 pmem_lpg_sub(&pf_lpgs
, pl1
);
486 pmem_lpg_free(&pf_lpgs
, pl1
);
487 tpages
-= pmem_pgcnt
;
491 /* Update allocation records accordingly. */
492 pmem_lpg_concat(&pmem_occ_lpgs
, &pf_lpgs
);
493 mutex_exit(&pmem_mutex
);
495 if (curproc
== pcp
->dp_proc
)
496 i_ddi_decr_locked_memory(curproc
, ptob(pcp
->dp_npages
));
497 kmem_free(pcp
->dp_vnp
, sizeof (vnode_t
));
498 kmem_free(pcp
->dp_pparray
, pcp
->dp_npages
* sizeof (page_t
*));
499 kmem_free(pcp
, sizeof (struct devmap_pmem_cookie
));
503 * To extract page frame number from specified range in a cookie.
506 devmap_pmem_getpfns(devmap_pmem_cookie_t cookie
, uint_t start
, pgcnt_t npages
,
509 struct devmap_pmem_cookie
*pcp
= (struct devmap_pmem_cookie
*)cookie
;
512 if (pcp
== NULL
|| start
+ npages
> pcp
->dp_npages
)
513 return (DDI_FAILURE
);
515 for (i
= start
; i
< start
+ npages
; i
++)
516 pfnarray
[i
- start
] = pfn_to_mfn(pcp
->dp_pparray
[i
]->p_pagenum
);
518 return (DDI_SUCCESS
);
524 mutex_init(&pmem_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
525 pmem_lszc
= MIN(1, page_num_pagesizes() - 1);
526 pmem_lpgsize
= page_get_pagesize(pmem_lszc
);
527 pmem_pgcnt
= pmem_lpgsize
>> PAGESHIFT
;
528 bzero(&pmem_seg
, sizeof (struct seg
));
529 pmem_seg
.s_as
= &kas
;
532 /* Allocate kernel memory for one pmem cookie with n pages. */
534 pmem_cookie_alloc(struct devmap_pmem_cookie
**pcpp
, pgcnt_t n
, uint_t kflags
)
536 struct devmap_pmem_cookie
*pcp
;
538 if ((*pcpp
= kmem_zalloc(sizeof (struct devmap_pmem_cookie
),
540 return (DDI_FAILURE
);
543 kmem_zalloc(sizeof (vnode_t
), kflags
)) == NULL
) {
544 kmem_free(pcp
, sizeof (struct devmap_pmem_cookie
));
545 return (DDI_FAILURE
);
547 if ((pcp
->dp_pparray
=
548 kmem_zalloc(n
* sizeof (page_t
*), kflags
)) == NULL
) {
549 kmem_free(pcp
->dp_vnp
, sizeof (vnode_t
));
550 kmem_free(pcp
, sizeof (struct devmap_pmem_cookie
));
551 return (DDI_FAILURE
);
553 return (DDI_SUCCESS
);
556 /* Try to lock down n pages resource */
558 pmem_lock(pgcnt_t n
, proc_t
*p
)
560 if (i_ddi_incr_locked_memory(p
, ptob(n
)) != 0) {
561 return (DDI_FAILURE
);
563 return (DDI_SUCCESS
);
566 /* To check if all the pages in a large page are freed. */
568 lpg_isfree(pmem_lpg_t
*plp
)
572 for (i
= 0; i
< BT_BITOUL(pmem_pgcnt
); i
++)
573 if (plp
->pl_bitmap
[i
] != BT_ULMAXMASK
)
575 /* All 1 means all pages are freed. */
580 * Using pp to get the associated large page allocation record, searching in
581 * the splp linked list with *last as the heuristic pointer. Return NULL if
585 pmem_lpg_get(pmem_lpg_t
*splp
, page_t
*pp
, pmem_lpg_t
**last
)
593 root_pfn
= page_pptonum(pp
) & ~(pmem_pgcnt
- 1);
595 /* Try last winner first. */
596 if (*last
&& root_pfn
== page_pptonum((*last
)->pl_pp
))
599 /* Else search the whole pmem_lpg list. */
600 for (plp
= splp
; root_pfn
!= page_pptonum(plp
->pl_pp
); ) {
616 * Remove one pmem_lpg plp from the oplpp list.
619 pmem_lpg_sub(pmem_lpg_t
**oplpp
, pmem_lpg_t
*plp
)
622 *oplpp
= plp
->pl_next
; /* go to next pmem_lpg */
625 *oplpp
= NULL
; /* pmem_lpg list is gone */
627 plp
->pl_prev
->pl_next
= plp
->pl_next
;
628 plp
->pl_next
->pl_prev
= plp
->pl_prev
;
630 plp
->pl_prev
= plp
->pl_next
= plp
; /* make plp a list of one */
634 * Concatenate page list nplpp onto the end of list plpp.
637 pmem_lpg_concat(pmem_lpg_t
**plpp
, pmem_lpg_t
**nplpp
)
639 pmem_lpg_t
*s1p
, *s2p
, *e1p
, *e2p
;
641 if (*nplpp
== NULL
) {
659 * Allocate and initialize the allocation record of one large page, the init
660 * value is 'allocated'.
663 pmem_lpg_alloc(uint_t kflags
)
667 ASSERT(pmem_pgcnt
% BT_NBIPUL
== 0);
668 plp
= kmem_zalloc(sizeof (pmem_lpg_t
), kflags
);
671 plp
->pl_bitmap
= kmem_zalloc(BT_SIZEOFMAP(pmem_pgcnt
), kflags
);
672 if (plp
->pl_bitmap
== NULL
) {
673 kmem_free(plp
, sizeof (*plp
));
676 plp
->pl_next
= plp
->pl_prev
= plp
;
680 /* Free one allocation record pointed by oplp. */
682 pmem_lpg_free(pmem_lpg_t
**headp
, pmem_lpg_t
*plp
)
685 *headp
= plp
->pl_next
; /* go to next pmem_lpg_t */
688 *headp
= NULL
; /* this list is gone */
690 plp
->pl_prev
->pl_next
= plp
->pl_next
;
691 plp
->pl_next
->pl_prev
= plp
->pl_prev
;
693 kmem_free(plp
->pl_bitmap
, BT_SIZEOFMAP(pmem_pgcnt
));
694 kmem_free(plp
, sizeof (*plp
));
697 /* Free one large page headed by spp from pmem_mpool. */
699 lpg_free(page_t
*spp
)
704 ASSERT(MUTEX_HELD(&pmem_mutex
));
705 for (i
= 0; i
< pmem_pgcnt
; i
++) {
706 /* Break pp1 from pmem_mpool. */
707 page_sub(&pmem_mpool
, pp1
);
710 /* Free pages in this large page. */
711 page_free_pages(spp
);
712 page_unresv(pmem_pgcnt
);
713 pmem_nmpages
-= pmem_pgcnt
;
714 ASSERT((pmem_nmpages
&& pmem_mpool
) || (!pmem_nmpages
&& !pmem_mpool
));
717 /* Put n pages in *ppp list back into pmem_mpool. */
719 mpool_append(page_t
**ppp
, pgcnt_t n
)
721 ASSERT(MUTEX_HELD(&pmem_mutex
));
722 /* Put back pages. */
723 page_list_concat(&pmem_mpool
, ppp
);
725 ASSERT((pmem_nmpages
&& pmem_mpool
) || (!pmem_nmpages
&& !pmem_mpool
));
729 * Try to grab MIN(pmem_nmpages, n) pages from pmem_mpool, put them into *ppp
730 * list, and return the number of grabbed pages.
733 mpool_break(page_t
**ppp
, pgcnt_t n
)
737 ASSERT(MUTEX_HELD(&pmem_mutex
));
738 /* Grab the pages. */
739 i
= MIN(pmem_nmpages
, n
);
741 page_list_break(ppp
, &pmem_mpool
, i
);
743 ASSERT((pmem_nmpages
&& pmem_mpool
) || (!pmem_nmpages
&& !pmem_mpool
));
748 * Create n large pages, lpages and plpp contains the number of small pages and
749 * allocation records list respectively.
752 lpp_create(page_t
**lppp
, pgcnt_t n
, pgcnt_t
*lpages
, pmem_lpg_t
**plpp
,
753 vnode_t
*vnp
, u_offset_t
*offp
, uint_t kflags
)
759 for (i
= 0, *lpages
= 0; i
< n
; i
++) {
760 /* Allocte one large page each time. */
761 pp
= page_create_va_large(vnp
, *offp
, pmem_lpgsize
,
762 PG_EXCL
, &pmem_seg
, (caddr_t
)(uintptr_t)*offp
, NULL
);
765 *offp
+= pmem_lpgsize
;
766 page_list_concat(lppp
, &pp
);
767 *lpages
+= pmem_pgcnt
;
768 /* Add one allocation record for this large page. */
769 if ((plp
= pmem_lpg_alloc(kflags
)) == NULL
)
770 return (DDI_FAILURE
);
772 pmem_lpg_concat(plpp
, &plp
);
774 return (DDI_SUCCESS
);
778 * Break the last r small pages from the large page list *lppp (with totally n
779 * small pages) and put them into pmem_mpool.
782 lpp_break(page_t
**lppp
, pgcnt_t n
, pgcnt_t r
, pmem_lpg_t
*oplp
)
790 ASSERT(*lppp
!= NULL
&& r
< pmem_pgcnt
);
791 page_list_break(lppp
, &pp
, n
- r
);
793 /* The residual should reside in the last large page. */
795 /* IOunlock and hashout the residual pages. */
796 for (pp1
= pp
, i
= 0; i
< r
; i
++) {
798 page_hashout(pp1
, NULL
);
799 /* Mark this page as free. */
800 BT_SET(plp
->pl_bitmap
, PFIND(pp1
));
804 /* Put these residual pages into memory pool. */
805 mutex_enter(&pmem_mutex
);
806 mpool_append(&pp
, r
);
807 mutex_exit(&pmem_mutex
);
810 /* Freeing large pages in lpp and the associated allocation records in plp. */
812 lpp_free(page_t
*lpp
, pgcnt_t lpgs
, pmem_lpg_t
**plpp
)
815 page_t
*pp
= lpp
, *pp1
;
816 pmem_lpg_t
*plp1
, *plp2
;
818 for (i
= 0; i
< lpgs
; i
++) {
819 for (j
= 0; j
< pmem_pgcnt
; j
++) {
820 /* IO unlock and hashout this small page. */
822 page_hashout(pp
, NULL
);
824 pp
->p_prev
= pp
->p_next
= pp
;
827 /* Free one large page at one time. */
828 page_free_pages(lpp
);
831 /* Free associate pmem large page allocation records. */
832 for (plp1
= *plpp
; *plpp
; plp1
= plp2
) {
833 plp2
= plp1
->pl_next
;
834 pmem_lpg_free(plpp
, plp1
);
839 * IOlock and hashin all pages in tlist, associate them with vnode *pvnp
840 * and offset starting with *poffp. Update allocation records accordingly at
844 tlist_in(page_t
*tlist
, pgcnt_t tpages
, vnode_t
*pvnp
, u_offset_t
*poffp
)
848 pmem_lpg_t
*plp
, *last_pl
= NULL
;
850 ASSERT(MUTEX_HELD(&pmem_mutex
));
851 for (pp
= tlist
; i
< tpages
; i
++) {
852 ASSERT(FROM_LPG(pp
));
854 (void) page_hashin(pp
, pvnp
, *poffp
, NULL
);
855 plp
= pmem_lpg_get(pmem_occ_lpgs
, pp
, &last_pl
);
856 /* Mark this page as allocated. */
857 BT_CLEAR(plp
->pl_bitmap
, PFIND(pp
));
865 * IOunlock and hashout all pages in tlist, update allocation records
866 * accordingly at the same time.
869 tlist_out(page_t
*tlist
, pgcnt_t tpages
)
873 pmem_lpg_t
*plp
, *last_pl
= NULL
;
875 ASSERT(MUTEX_HELD(&pmem_mutex
));
876 for (pp
= tlist
; i
< tpages
; i
++) {
877 ASSERT(FROM_LPG(pp
));
879 page_hashout(pp
, NULL
);
880 plp
= pmem_lpg_get(pmem_occ_lpgs
, pp
, &last_pl
);
881 /* Mark this page as free. */
882 BT_SET(plp
->pl_bitmap
, PFIND(pp
));