2 * Copyright (C) 2001-2006 Jakub Jermar
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /** @addtogroup genericmm
35 * @brief Address space related functions.
37 * This file contains address space manipulation functions.
38 * Roughly speaking, this is a higher-level client of
39 * Virtual Address Translation (VAT) subsystem.
41 * Functionality provided by this file allows one to
42 * create address spaces and create, resize and share
43 * address space areas.
50 #include <arch/mm/as.h>
55 #include <arch/mm/page.h>
56 #include <genarch/mm/page_pt.h>
57 #include <genarch/mm/page_ht.h>
59 #include <arch/mm/asid.h>
60 #include <synch/spinlock.h>
61 #include <synch/mutex.h>
63 #include <adt/btree.h>
64 #include <proc/task.h>
65 #include <proc/thread.h>
76 #include <arch/types.h>
78 #include <syscall/copy.h>
79 #include <arch/interrupt.h>
82 * Each architecture decides what functions will be used to carry out
83 * address space operations such as creating or locking page tables.
85 as_operations_t
*as_operations
= NULL
;
88 * Slab for as_t objects.
90 static slab_cache_t
*as_slab
;
92 /** This lock protects inactive_as_with_asid_head list. It must be acquired before as_t mutex. */
93 SPINLOCK_INITIALIZE(inactive_as_with_asid_lock
);
96 * This list contains address spaces that are not active on any
97 * processor and that have valid ASID.
99 LIST_INITIALIZE(inactive_as_with_asid_head
);
101 /** Kernel address space. */
102 as_t
*AS_KERNEL
= NULL
;
104 static int area_flags_to_page_flags(int aflags
);
105 static as_area_t
*find_area_and_lock(as_t
*as
, uintptr_t va
);
106 static bool check_area_conflicts(as_t
*as
, uintptr_t va
, size_t size
, as_area_t
*avoid_area
);
107 static void sh_info_remove_reference(share_info_t
*sh_info
);
109 static int as_constructor(void *obj
, int flags
)
111 as_t
*as
= (as_t
*) obj
;
114 link_initialize(&as
->inactive_as_with_asid_link
);
115 mutex_initialize(&as
->lock
);
117 rc
= as_constructor_arch(as
, flags
);
122 static int as_destructor(void *obj
)
124 as_t
*as
= (as_t
*) obj
;
126 return as_destructor_arch(as
);
129 /** Initialize address space subsystem. */
134 as_slab
= slab_cache_create("as_slab", sizeof(as_t
), 0,
135 as_constructor
, as_destructor
, SLAB_CACHE_MAGDEFERRED
);
137 AS_KERNEL
= as_create(FLAG_AS_KERNEL
);
139 panic("can't create kernel address space\n");
143 /** Create address space.
145 * @param flags Flags that influence way in wich the address space is created.
147 as_t
*as_create(int flags
)
151 as
= (as_t
*) slab_alloc(as_slab
, 0);
152 (void) as_create_arch(as
, 0);
154 btree_create(&as
->as_area_btree
);
156 if (flags
& FLAG_AS_KERNEL
)
157 as
->asid
= ASID_KERNEL
;
159 as
->asid
= ASID_INVALID
;
162 as
->cpu_refcount
= 0;
163 as
->page_table
= page_table_create(flags
);
168 /** Destroy adress space.
170 * When there are no tasks referencing this address space (i.e. its refcount is zero),
171 * the address space can be destroyed.
173 void as_destroy(as_t
*as
)
178 ASSERT(as
->refcount
== 0);
181 * Since there is no reference to this area,
182 * it is safe not to lock its mutex.
184 ipl
= interrupts_disable();
185 spinlock_lock(&inactive_as_with_asid_lock
);
186 if (as
->asid
!= ASID_INVALID
&& as
!= AS_KERNEL
) {
187 if (as
!= AS
&& as
->cpu_refcount
== 0)
188 list_remove(&as
->inactive_as_with_asid_link
);
191 spinlock_unlock(&inactive_as_with_asid_lock
);
194 * Destroy address space areas of the address space.
195 * The B+tree must be walked carefully because it is
196 * also being destroyed.
198 for (cond
= true; cond
; ) {
201 ASSERT(!list_empty(&as
->as_area_btree
.leaf_head
));
202 node
= list_get_instance(as
->as_area_btree
.leaf_head
.next
, btree_node_t
, leaf_link
);
204 if ((cond
= node
->keys
)) {
205 as_area_destroy(as
, node
->key
[0]);
209 btree_destroy(&as
->as_area_btree
);
210 page_table_destroy(as
->page_table
);
212 interrupts_restore(ipl
);
214 slab_free(as_slab
, as
);
217 /** Create address space area of common attributes.
219 * The created address space area is added to the target address space.
221 * @param as Target address space.
222 * @param flags Flags of the area memory.
223 * @param size Size of area.
224 * @param base Base address of area.
225 * @param attrs Attributes of the area.
226 * @param backend Address space area backend. NULL if no backend is used.
227 * @param backend_data NULL or a pointer to an array holding two void *.
229 * @return Address space area on success or NULL on failure.
231 as_area_t
*as_area_create(as_t
*as
, int flags
, size_t size
, uintptr_t base
, int attrs
,
232 mem_backend_t
*backend
, mem_backend_data_t
*backend_data
)
237 if (base
% PAGE_SIZE
)
243 /* Writeable executable areas are not supported. */
244 if ((flags
& AS_AREA_EXEC
) && (flags
& AS_AREA_WRITE
))
247 ipl
= interrupts_disable();
248 mutex_lock(&as
->lock
);
250 if (!check_area_conflicts(as
, base
, size
, NULL
)) {
251 mutex_unlock(&as
->lock
);
252 interrupts_restore(ipl
);
256 a
= (as_area_t
*) malloc(sizeof(as_area_t
), 0);
258 mutex_initialize(&a
->lock
);
262 a
->attributes
= attrs
;
263 a
->pages
= SIZE2FRAMES(size
);
266 a
->backend
= backend
;
268 a
->backend_data
= *backend_data
;
270 memsetb((uintptr_t) &a
->backend_data
, sizeof(a
->backend_data
), 0);
272 btree_create(&a
->used_space
);
274 btree_insert(&as
->as_area_btree
, base
, (void *) a
, NULL
);
276 mutex_unlock(&as
->lock
);
277 interrupts_restore(ipl
);
282 /** Find address space area and change it.
284 * @param as Address space.
285 * @param address Virtual address belonging to the area to be changed. Must be page-aligned.
286 * @param size New size of the virtual memory block starting at address.
287 * @param flags Flags influencing the remap operation. Currently unused.
289 * @return Zero on success or a value from @ref errno.h otherwise.
291 int as_area_resize(as_t
*as
, uintptr_t address
, size_t size
, int flags
)
297 ipl
= interrupts_disable();
298 mutex_lock(&as
->lock
);
303 area
= find_area_and_lock(as
, address
);
305 mutex_unlock(&as
->lock
);
306 interrupts_restore(ipl
);
310 if (area
->backend
== &phys_backend
) {
312 * Remapping of address space areas associated
313 * with memory mapped devices is not supported.
315 mutex_unlock(&area
->lock
);
316 mutex_unlock(&as
->lock
);
317 interrupts_restore(ipl
);
322 * Remapping of shared address space areas
325 mutex_unlock(&area
->lock
);
326 mutex_unlock(&as
->lock
);
327 interrupts_restore(ipl
);
331 pages
= SIZE2FRAMES((address
- area
->base
) + size
);
334 * Zero size address space areas are not allowed.
336 mutex_unlock(&area
->lock
);
337 mutex_unlock(&as
->lock
);
338 interrupts_restore(ipl
);
342 if (pages
< area
->pages
) {
344 uintptr_t start_free
= area
->base
+ pages
*PAGE_SIZE
;
347 * Shrinking the area.
348 * No need to check for overlaps.
352 * Start TLB shootdown sequence.
354 tlb_shootdown_start(TLB_INVL_PAGES
, AS
->asid
, area
->base
+ pages
*PAGE_SIZE
, area
->pages
- pages
);
357 * Remove frames belonging to used space starting from
358 * the highest addresses downwards until an overlap with
359 * the resized address space area is found. Note that this
360 * is also the right way to remove part of the used_space
363 for (cond
= true; cond
;) {
366 ASSERT(!list_empty(&area
->used_space
.leaf_head
));
367 node
= list_get_instance(area
->used_space
.leaf_head
.prev
, btree_node_t
, leaf_link
);
368 if ((cond
= (bool) node
->keys
)) {
369 uintptr_t b
= node
->key
[node
->keys
- 1];
370 count_t c
= (count_t
) node
->value
[node
->keys
- 1];
373 if (overlaps(b
, c
*PAGE_SIZE
, area
->base
, pages
*PAGE_SIZE
)) {
375 if (b
+ c
*PAGE_SIZE
<= start_free
) {
377 * The whole interval fits completely
378 * in the resized address space area.
384 * Part of the interval corresponding to b and c
385 * overlaps with the resized address space area.
388 cond
= false; /* we are almost done */
389 i
= (start_free
- b
) >> PAGE_WIDTH
;
390 if (!used_space_remove(area
, start_free
, c
- i
))
391 panic("Could not remove used space.\n");
394 * The interval of used space can be completely removed.
396 if (!used_space_remove(area
, b
, c
))
397 panic("Could not remove used space.\n");
403 page_table_lock(as
, false);
404 pte
= page_mapping_find(as
, b
+ i
*PAGE_SIZE
);
405 ASSERT(pte
&& PTE_VALID(pte
) && PTE_PRESENT(pte
));
406 if (area
->backend
&& area
->backend
->frame_free
) {
407 area
->backend
->frame_free(area
,
408 b
+ i
*PAGE_SIZE
, PTE_GET_FRAME(pte
));
410 page_mapping_remove(as
, b
+ i
*PAGE_SIZE
);
411 page_table_unlock(as
, false);
417 * Finish TLB shootdown sequence.
419 tlb_invalidate_pages(as
->asid
, area
->base
+ pages
*PAGE_SIZE
, area
->pages
- pages
);
420 tlb_shootdown_finalize();
423 * Invalidate software translation caches (e.g. TSB on sparc64).
425 as_invalidate_translation_cache(as
, area
->base
+ pages
*PAGE_SIZE
, area
->pages
- pages
);
429 * Check for overlaps with other address space areas.
431 if (!check_area_conflicts(as
, address
, pages
* PAGE_SIZE
, area
)) {
432 mutex_unlock(&area
->lock
);
433 mutex_unlock(&as
->lock
);
434 interrupts_restore(ipl
);
435 return EADDRNOTAVAIL
;
441 mutex_unlock(&area
->lock
);
442 mutex_unlock(&as
->lock
);
443 interrupts_restore(ipl
);
448 /** Destroy address space area.
450 * @param as Address space.
451 * @param address Address withing the area to be deleted.
453 * @return Zero on success or a value from @ref errno.h on failure.
455 int as_area_destroy(as_t
*as
, uintptr_t address
)
462 ipl
= interrupts_disable();
463 mutex_lock(&as
->lock
);
465 area
= find_area_and_lock(as
, address
);
467 mutex_unlock(&as
->lock
);
468 interrupts_restore(ipl
);
475 * Start TLB shootdown sequence.
477 tlb_shootdown_start(TLB_INVL_PAGES
, as
->asid
, area
->base
, area
->pages
);
480 * Visit only the pages mapped by used_space B+tree.
482 for (cur
= area
->used_space
.leaf_head
.next
; cur
!= &area
->used_space
.leaf_head
; cur
= cur
->next
) {
486 node
= list_get_instance(cur
, btree_node_t
, leaf_link
);
487 for (i
= 0; i
< node
->keys
; i
++) {
488 uintptr_t b
= node
->key
[i
];
492 for (j
= 0; j
< (count_t
) node
->value
[i
]; j
++) {
493 page_table_lock(as
, false);
494 pte
= page_mapping_find(as
, b
+ j
*PAGE_SIZE
);
495 ASSERT(pte
&& PTE_VALID(pte
) && PTE_PRESENT(pte
));
496 if (area
->backend
&& area
->backend
->frame_free
) {
497 area
->backend
->frame_free(area
,
498 b
+ j
*PAGE_SIZE
, PTE_GET_FRAME(pte
));
500 page_mapping_remove(as
, b
+ j
*PAGE_SIZE
);
501 page_table_unlock(as
, false);
507 * Finish TLB shootdown sequence.
509 tlb_invalidate_pages(as
->asid
, area
->base
, area
->pages
);
510 tlb_shootdown_finalize();
513 * Invalidate potential software translation caches (e.g. TSB on sparc64).
515 as_invalidate_translation_cache(as
, area
->base
, area
->pages
);
517 btree_destroy(&area
->used_space
);
519 area
->attributes
|= AS_AREA_ATTR_PARTIAL
;
522 sh_info_remove_reference(area
->sh_info
);
524 mutex_unlock(&area
->lock
);
527 * Remove the empty area from address space.
529 btree_remove(&as
->as_area_btree
, base
, NULL
);
533 mutex_unlock(&as
->lock
);
534 interrupts_restore(ipl
);
538 /** Share address space area with another or the same address space.
540 * Address space area mapping is shared with a new address space area.
541 * If the source address space area has not been shared so far,
542 * a new sh_info is created. The new address space area simply gets the
543 * sh_info of the source area. The process of duplicating the
544 * mapping is done through the backend share function.
546 * @param src_as Pointer to source address space.
547 * @param src_base Base address of the source address space area.
548 * @param acc_size Expected size of the source area.
549 * @param dst_as Pointer to destination address space.
550 * @param dst_base Target base address.
551 * @param dst_flags_mask Destination address space area flags mask.
553 * @return Zero on success or ENOENT if there is no such task or if there is no
554 * such address space area, EPERM if there was a problem in accepting the area
555 * or ENOMEM if there was a problem in allocating destination address space
556 * area. ENOTSUP is returned if the address space area backend does not support
557 * sharing. It can be also returned if the architecture uses virtually indexed
558 * caches and the source and destination areas start at pages with different
561 int as_area_share(as_t
*src_as
, uintptr_t src_base
, size_t acc_size
,
562 as_t
*dst_as
, uintptr_t dst_base
, int dst_flags_mask
)
567 as_area_t
*src_area
, *dst_area
;
568 share_info_t
*sh_info
;
569 mem_backend_t
*src_backend
;
570 mem_backend_data_t src_backend_data
;
572 ipl
= interrupts_disable();
573 mutex_lock(&src_as
->lock
);
574 src_area
= find_area_and_lock(src_as
, src_base
);
577 * Could not find the source address space area.
579 mutex_unlock(&src_as
->lock
);
580 interrupts_restore(ipl
);
584 #if 0 /* disable the check for now */
585 #ifdef CONFIG_VIRT_IDX_CACHE
586 if (PAGE_COLOR(src_area
->base
) != PAGE_COLOR(dst_base
)) {
588 * Refuse to create illegal address alias.
590 mutex_unlock(&src_area
->lock
);
591 mutex_unlock(&src_as
->lock
);
592 interrupts_restore(ipl
);
595 #endif /* CONFIG_VIRT_IDX_CACHE */
598 if (!src_area
->backend
|| !src_area
->backend
->share
) {
600 * There is no backend or the backend does not
601 * know how to share the area.
603 mutex_unlock(&src_area
->lock
);
604 mutex_unlock(&src_as
->lock
);
605 interrupts_restore(ipl
);
609 src_size
= src_area
->pages
* PAGE_SIZE
;
610 src_flags
= src_area
->flags
;
611 src_backend
= src_area
->backend
;
612 src_backend_data
= src_area
->backend_data
;
614 /* Share the cacheable flag from the original mapping */
615 if (src_flags
& AS_AREA_CACHEABLE
)
616 dst_flags_mask
|= AS_AREA_CACHEABLE
;
618 if (src_size
!= acc_size
|| (src_flags
& dst_flags_mask
) != dst_flags_mask
) {
619 mutex_unlock(&src_area
->lock
);
620 mutex_unlock(&src_as
->lock
);
621 interrupts_restore(ipl
);
626 * Now we are committed to sharing the area.
627 * First, prepare the area for sharing.
628 * Then it will be safe to unlock it.
630 sh_info
= src_area
->sh_info
;
632 sh_info
= (share_info_t
*) malloc(sizeof(share_info_t
), 0);
633 mutex_initialize(&sh_info
->lock
);
634 sh_info
->refcount
= 2;
635 btree_create(&sh_info
->pagemap
);
636 src_area
->sh_info
= sh_info
;
638 mutex_lock(&sh_info
->lock
);
640 mutex_unlock(&sh_info
->lock
);
643 src_area
->backend
->share(src_area
);
645 mutex_unlock(&src_area
->lock
);
646 mutex_unlock(&src_as
->lock
);
649 * Create copy of the source address space area.
650 * The destination area is created with AS_AREA_ATTR_PARTIAL
651 * attribute set which prevents race condition with
652 * preliminary as_page_fault() calls.
653 * The flags of the source area are masked against dst_flags_mask
654 * to support sharing in less privileged mode.
656 dst_area
= as_area_create(dst_as
, dst_flags_mask
, src_size
, dst_base
,
657 AS_AREA_ATTR_PARTIAL
, src_backend
, &src_backend_data
);
660 * Destination address space area could not be created.
662 sh_info_remove_reference(sh_info
);
664 interrupts_restore(ipl
);
669 * Now the destination address space area has been
670 * fully initialized. Clear the AS_AREA_ATTR_PARTIAL
671 * attribute and set the sh_info.
673 mutex_lock(&dst_area
->lock
);
674 dst_area
->attributes
&= ~AS_AREA_ATTR_PARTIAL
;
675 dst_area
->sh_info
= sh_info
;
676 mutex_unlock(&dst_area
->lock
);
678 interrupts_restore(ipl
);
683 /** Check access mode for address space area.
685 * The address space area must be locked prior to this call.
687 * @param area Address space area.
688 * @param access Access mode.
690 * @return False if access violates area's permissions, true otherwise.
692 bool as_area_check_access(as_area_t
*area
, pf_access_t access
)
695 [PF_ACCESS_READ
] = AS_AREA_READ
,
696 [PF_ACCESS_WRITE
] = AS_AREA_WRITE
,
697 [PF_ACCESS_EXEC
] = AS_AREA_EXEC
700 if (!(area
->flags
& flagmap
[access
]))
706 /** Handle page fault within the current address space.
708 * This is the high-level page fault handler. It decides
709 * whether the page fault can be resolved by any backend
710 * and if so, it invokes the backend to resolve the page
713 * Interrupts are assumed disabled.
715 * @param page Faulting page.
716 * @param access Access mode that caused the fault (i.e. read/write/exec).
717 * @param istate Pointer to interrupted state.
719 * @return AS_PF_FAULT on page fault, AS_PF_OK on success or AS_PF_DEFER if the
720 * fault was caused by copy_to_uspace() or copy_from_uspace().
722 int as_page_fault(uintptr_t page
, pf_access_t access
, istate_t
*istate
)
732 mutex_lock(&AS
->lock
);
733 area
= find_area_and_lock(AS
, page
);
736 * No area contained mapping for 'page'.
737 * Signal page fault to low-level handler.
739 mutex_unlock(&AS
->lock
);
743 if (area
->attributes
& AS_AREA_ATTR_PARTIAL
) {
745 * The address space area is not fully initialized.
746 * Avoid possible race by returning error.
748 mutex_unlock(&area
->lock
);
749 mutex_unlock(&AS
->lock
);
753 if (!area
->backend
|| !area
->backend
->page_fault
) {
755 * The address space area is not backed by any backend
756 * or the backend cannot handle page faults.
758 mutex_unlock(&area
->lock
);
759 mutex_unlock(&AS
->lock
);
763 page_table_lock(AS
, false);
766 * To avoid race condition between two page faults
767 * on the same address, we need to make sure
768 * the mapping has not been already inserted.
770 if ((pte
= page_mapping_find(AS
, page
))) {
771 if (PTE_PRESENT(pte
)) {
772 if (((access
== PF_ACCESS_READ
) && PTE_READABLE(pte
)) ||
773 (access
== PF_ACCESS_WRITE
&& PTE_WRITABLE(pte
)) ||
774 (access
== PF_ACCESS_EXEC
&& PTE_EXECUTABLE(pte
))) {
775 page_table_unlock(AS
, false);
776 mutex_unlock(&area
->lock
);
777 mutex_unlock(&AS
->lock
);
784 * Resort to the backend page fault handler.
786 if (area
->backend
->page_fault(area
, page
, access
) != AS_PF_OK
) {
787 page_table_unlock(AS
, false);
788 mutex_unlock(&area
->lock
);
789 mutex_unlock(&AS
->lock
);
793 page_table_unlock(AS
, false);
794 mutex_unlock(&area
->lock
);
795 mutex_unlock(&AS
->lock
);
799 if (THREAD
->in_copy_from_uspace
) {
800 THREAD
->in_copy_from_uspace
= false;
801 istate_set_retaddr(istate
, (uintptr_t) &memcpy_from_uspace_failover_address
);
802 } else if (THREAD
->in_copy_to_uspace
) {
803 THREAD
->in_copy_to_uspace
= false;
804 istate_set_retaddr(istate
, (uintptr_t) &memcpy_to_uspace_failover_address
);
812 /** Switch address spaces.
814 * Note that this function cannot sleep as it is essentially a part of
815 * scheduling. Sleeping here would lead to deadlock on wakeup.
817 * @param old Old address space or NULL.
818 * @param new New address space.
820 void as_switch(as_t
*old
, as_t
*new)
823 bool needs_asid
= false;
825 ipl
= interrupts_disable();
826 spinlock_lock(&inactive_as_with_asid_lock
);
829 * First, take care of the old address space.
832 mutex_lock_active(&old
->lock
);
833 ASSERT(old
->cpu_refcount
);
834 if((--old
->cpu_refcount
== 0) && (old
!= AS_KERNEL
)) {
836 * The old address space is no longer active on
837 * any processor. It can be appended to the
838 * list of inactive address spaces with assigned
841 ASSERT(old
->asid
!= ASID_INVALID
);
842 list_append(&old
->inactive_as_with_asid_link
, &inactive_as_with_asid_head
);
844 mutex_unlock(&old
->lock
);
847 * Perform architecture-specific tasks when the address space
848 * is being removed from the CPU.
850 as_deinstall_arch(old
);
854 * Second, prepare the new address space.
856 mutex_lock_active(&new->lock
);
857 if ((new->cpu_refcount
++ == 0) && (new != AS_KERNEL
)) {
858 if (new->asid
!= ASID_INVALID
)
859 list_remove(&new->inactive_as_with_asid_link
);
861 needs_asid
= true; /* defer call to asid_get() until new->lock is released */
863 SET_PTL0_ADDRESS(new->page_table
);
864 mutex_unlock(&new->lock
);
868 * Allocation of new ASID was deferred
869 * until now in order to avoid deadlock.
874 mutex_lock_active(&new->lock
);
876 mutex_unlock(&new->lock
);
878 spinlock_unlock(&inactive_as_with_asid_lock
);
879 interrupts_restore(ipl
);
882 * Perform architecture-specific steps.
883 * (e.g. write ASID to hardware register etc.)
885 as_install_arch(new);
890 /** Convert address space area flags to page flags.
892 * @param aflags Flags of some address space area.
894 * @return Flags to be passed to page_mapping_insert().
896 int area_flags_to_page_flags(int aflags
)
900 flags
= PAGE_USER
| PAGE_PRESENT
;
902 if (aflags
& AS_AREA_READ
)
905 if (aflags
& AS_AREA_WRITE
)
908 if (aflags
& AS_AREA_EXEC
)
911 if (aflags
& AS_AREA_CACHEABLE
)
912 flags
|= PAGE_CACHEABLE
;
917 /** Compute flags for virtual address translation subsytem.
919 * The address space area must be locked.
920 * Interrupts must be disabled.
922 * @param a Address space area.
924 * @return Flags to be used in page_mapping_insert().
926 int as_area_get_flags(as_area_t
*a
)
928 return area_flags_to_page_flags(a
->flags
);
931 /** Create page table.
933 * Depending on architecture, create either address space
934 * private or global page table.
936 * @param flags Flags saying whether the page table is for kernel address space.
938 * @return First entry of the page table.
940 pte_t
*page_table_create(int flags
)
942 ASSERT(as_operations
);
943 ASSERT(as_operations
->page_table_create
);
945 return as_operations
->page_table_create(flags
);
948 /** Destroy page table.
950 * Destroy page table in architecture specific way.
952 * @param page_table Physical address of PTL0.
954 void page_table_destroy(pte_t
*page_table
)
956 ASSERT(as_operations
);
957 ASSERT(as_operations
->page_table_destroy
);
959 as_operations
->page_table_destroy(page_table
);
964 * This function should be called before any page_mapping_insert(),
965 * page_mapping_remove() and page_mapping_find().
967 * Locking order is such that address space areas must be locked
968 * prior to this call. Address space can be locked prior to this
969 * call in which case the lock argument is false.
971 * @param as Address space.
972 * @param lock If false, do not attempt to lock as->lock.
974 void page_table_lock(as_t
*as
, bool lock
)
976 ASSERT(as_operations
);
977 ASSERT(as_operations
->page_table_lock
);
979 as_operations
->page_table_lock(as
, lock
);
982 /** Unlock page table.
984 * @param as Address space.
985 * @param unlock If false, do not attempt to unlock as->lock.
987 void page_table_unlock(as_t
*as
, bool unlock
)
989 ASSERT(as_operations
);
990 ASSERT(as_operations
->page_table_unlock
);
992 as_operations
->page_table_unlock(as
, unlock
);
996 /** Find address space area and lock it.
998 * The address space must be locked and interrupts must be disabled.
1000 * @param as Address space.
1001 * @param va Virtual address.
1003 * @return Locked address space area containing va on success or NULL on failure.
1005 as_area_t
*find_area_and_lock(as_t
*as
, uintptr_t va
)
1008 btree_node_t
*leaf
, *lnode
;
1011 a
= (as_area_t
*) btree_search(&as
->as_area_btree
, va
, &leaf
);
1013 /* va is the base address of an address space area */
1014 mutex_lock(&a
->lock
);
1019 * Search the leaf node and the righmost record of its left neighbour
1020 * to find out whether this is a miss or va belongs to an address
1021 * space area found there.
1024 /* First, search the leaf node itself. */
1025 for (i
= 0; i
< leaf
->keys
; i
++) {
1026 a
= (as_area_t
*) leaf
->value
[i
];
1027 mutex_lock(&a
->lock
);
1028 if ((a
->base
<= va
) && (va
< a
->base
+ a
->pages
* PAGE_SIZE
)) {
1031 mutex_unlock(&a
->lock
);
1035 * Second, locate the left neighbour and test its last record.
1036 * Because of its position in the B+tree, it must have base < va.
1038 if ((lnode
= btree_leaf_node_left_neighbour(&as
->as_area_btree
, leaf
))) {
1039 a
= (as_area_t
*) lnode
->value
[lnode
->keys
- 1];
1040 mutex_lock(&a
->lock
);
1041 if (va
< a
->base
+ a
->pages
* PAGE_SIZE
) {
1044 mutex_unlock(&a
->lock
);
1050 /** Check area conflicts with other areas.
1052 * The address space must be locked and interrupts must be disabled.
1054 * @param as Address space.
1055 * @param va Starting virtual address of the area being tested.
1056 * @param size Size of the area being tested.
1057 * @param avoid_area Do not touch this area.
1059 * @return True if there is no conflict, false otherwise.
1061 bool check_area_conflicts(as_t
*as
, uintptr_t va
, size_t size
, as_area_t
*avoid_area
)
1064 btree_node_t
*leaf
, *node
;
1068 * We don't want any area to have conflicts with NULL page.
1070 if (overlaps(va
, size
, NULL
, PAGE_SIZE
))
1074 * The leaf node is found in O(log n), where n is proportional to
1075 * the number of address space areas belonging to as.
1076 * The check for conflicts is then attempted on the rightmost
1077 * record in the left neighbour, the leftmost record in the right
1078 * neighbour and all records in the leaf node itself.
1081 if ((a
= (as_area_t
*) btree_search(&as
->as_area_btree
, va
, &leaf
))) {
1082 if (a
!= avoid_area
)
1086 /* First, check the two border cases. */
1087 if ((node
= btree_leaf_node_left_neighbour(&as
->as_area_btree
, leaf
))) {
1088 a
= (as_area_t
*) node
->value
[node
->keys
- 1];
1089 mutex_lock(&a
->lock
);
1090 if (overlaps(va
, size
, a
->base
, a
->pages
* PAGE_SIZE
)) {
1091 mutex_unlock(&a
->lock
);
1094 mutex_unlock(&a
->lock
);
1096 if ((node
= btree_leaf_node_right_neighbour(&as
->as_area_btree
, leaf
))) {
1097 a
= (as_area_t
*) node
->value
[0];
1098 mutex_lock(&a
->lock
);
1099 if (overlaps(va
, size
, a
->base
, a
->pages
* PAGE_SIZE
)) {
1100 mutex_unlock(&a
->lock
);
1103 mutex_unlock(&a
->lock
);
1106 /* Second, check the leaf node. */
1107 for (i
= 0; i
< leaf
->keys
; i
++) {
1108 a
= (as_area_t
*) leaf
->value
[i
];
1110 if (a
== avoid_area
)
1113 mutex_lock(&a
->lock
);
1114 if (overlaps(va
, size
, a
->base
, a
->pages
* PAGE_SIZE
)) {
1115 mutex_unlock(&a
->lock
);
1118 mutex_unlock(&a
->lock
);
1122 * So far, the area does not conflict with other areas.
1123 * Check if it doesn't conflict with kernel address space.
1125 if (!KERNEL_ADDRESS_SPACE_SHADOWED
) {
1126 return !overlaps(va
, size
,
1127 KERNEL_ADDRESS_SPACE_START
, KERNEL_ADDRESS_SPACE_END
-KERNEL_ADDRESS_SPACE_START
);
1133 /** Return size of the address space area with given base. */
1134 size_t as_get_size(uintptr_t base
)
1137 as_area_t
*src_area
;
1140 ipl
= interrupts_disable();
1141 src_area
= find_area_and_lock(AS
, base
);
1143 size
= src_area
->pages
* PAGE_SIZE
;
1144 mutex_unlock(&src_area
->lock
);
1148 interrupts_restore(ipl
);
1152 /** Mark portion of address space area as used.
1154 * The address space area must be already locked.
1156 * @param a Address space area.
1157 * @param page First page to be marked.
1158 * @param count Number of page to be marked.
1160 * @return 0 on failure and 1 on success.
1162 int used_space_insert(as_area_t
*a
, uintptr_t page
, count_t count
)
1164 btree_node_t
*leaf
, *node
;
1168 ASSERT(page
== ALIGN_DOWN(page
, PAGE_SIZE
));
1171 pages
= (count_t
) btree_search(&a
->used_space
, page
, &leaf
);
1174 * We hit the beginning of some used space.
1180 btree_insert(&a
->used_space
, page
, (void *) count
, leaf
);
1184 node
= btree_leaf_node_left_neighbour(&a
->used_space
, leaf
);
1186 uintptr_t left_pg
= node
->key
[node
->keys
- 1], right_pg
= leaf
->key
[0];
1187 count_t left_cnt
= (count_t
) node
->value
[node
->keys
- 1], right_cnt
= (count_t
) leaf
->value
[0];
1190 * Examine the possibility that the interval fits
1191 * somewhere between the rightmost interval of
1192 * the left neigbour and the first interval of the leaf.
1195 if (page
>= right_pg
) {
1197 } else if (overlaps(page
, count
*PAGE_SIZE
, left_pg
, left_cnt
*PAGE_SIZE
)) {
1198 /* The interval intersects with the left interval. */
1200 } else if (overlaps(page
, count
*PAGE_SIZE
, right_pg
, right_cnt
*PAGE_SIZE
)) {
1201 /* The interval intersects with the right interval. */
1203 } else if ((page
== left_pg
+ left_cnt
*PAGE_SIZE
) && (page
+ count
*PAGE_SIZE
== right_pg
)) {
1204 /* The interval can be added by merging the two already present intervals. */
1205 node
->value
[node
->keys
- 1] += count
+ right_cnt
;
1206 btree_remove(&a
->used_space
, right_pg
, leaf
);
1208 } else if (page
== left_pg
+ left_cnt
*PAGE_SIZE
) {
1209 /* The interval can be added by simply growing the left interval. */
1210 node
->value
[node
->keys
- 1] += count
;
1212 } else if (page
+ count
*PAGE_SIZE
== right_pg
) {
1214 * The interval can be addded by simply moving base of the right
1215 * interval down and increasing its size accordingly.
1217 leaf
->value
[0] += count
;
1218 leaf
->key
[0] = page
;
1222 * The interval is between both neigbouring intervals,
1223 * but cannot be merged with any of them.
1225 btree_insert(&a
->used_space
, page
, (void *) count
, leaf
);
1228 } else if (page
< leaf
->key
[0]) {
1229 uintptr_t right_pg
= leaf
->key
[0];
1230 count_t right_cnt
= (count_t
) leaf
->value
[0];
1233 * Investigate the border case in which the left neighbour does not
1234 * exist but the interval fits from the left.
1237 if (overlaps(page
, count
*PAGE_SIZE
, right_pg
, right_cnt
*PAGE_SIZE
)) {
1238 /* The interval intersects with the right interval. */
1240 } else if (page
+ count
*PAGE_SIZE
== right_pg
) {
1242 * The interval can be added by moving the base of the right interval down
1243 * and increasing its size accordingly.
1245 leaf
->key
[0] = page
;
1246 leaf
->value
[0] += count
;
1250 * The interval doesn't adjoin with the right interval.
1251 * It must be added individually.
1253 btree_insert(&a
->used_space
, page
, (void *) count
, leaf
);
1258 node
= btree_leaf_node_right_neighbour(&a
->used_space
, leaf
);
1260 uintptr_t left_pg
= leaf
->key
[leaf
->keys
- 1], right_pg
= node
->key
[0];
1261 count_t left_cnt
= (count_t
) leaf
->value
[leaf
->keys
- 1], right_cnt
= (count_t
) node
->value
[0];
1264 * Examine the possibility that the interval fits
1265 * somewhere between the leftmost interval of
1266 * the right neigbour and the last interval of the leaf.
1269 if (page
< left_pg
) {
1271 } else if (overlaps(page
, count
*PAGE_SIZE
, left_pg
, left_cnt
*PAGE_SIZE
)) {
1272 /* The interval intersects with the left interval. */
1274 } else if (overlaps(page
, count
*PAGE_SIZE
, right_pg
, right_cnt
*PAGE_SIZE
)) {
1275 /* The interval intersects with the right interval. */
1277 } else if ((page
== left_pg
+ left_cnt
*PAGE_SIZE
) && (page
+ count
*PAGE_SIZE
== right_pg
)) {
1278 /* The interval can be added by merging the two already present intervals. */
1279 leaf
->value
[leaf
->keys
- 1] += count
+ right_cnt
;
1280 btree_remove(&a
->used_space
, right_pg
, node
);
1282 } else if (page
== left_pg
+ left_cnt
*PAGE_SIZE
) {
1283 /* The interval can be added by simply growing the left interval. */
1284 leaf
->value
[leaf
->keys
- 1] += count
;
1286 } else if (page
+ count
*PAGE_SIZE
== right_pg
) {
1288 * The interval can be addded by simply moving base of the right
1289 * interval down and increasing its size accordingly.
1291 node
->value
[0] += count
;
1292 node
->key
[0] = page
;
1296 * The interval is between both neigbouring intervals,
1297 * but cannot be merged with any of them.
1299 btree_insert(&a
->used_space
, page
, (void *) count
, leaf
);
1302 } else if (page
>= leaf
->key
[leaf
->keys
- 1]) {
1303 uintptr_t left_pg
= leaf
->key
[leaf
->keys
- 1];
1304 count_t left_cnt
= (count_t
) leaf
->value
[leaf
->keys
- 1];
1307 * Investigate the border case in which the right neighbour does not
1308 * exist but the interval fits from the right.
1311 if (overlaps(page
, count
*PAGE_SIZE
, left_pg
, left_cnt
*PAGE_SIZE
)) {
1312 /* The interval intersects with the left interval. */
1314 } else if (left_pg
+ left_cnt
*PAGE_SIZE
== page
) {
1315 /* The interval can be added by growing the left interval. */
1316 leaf
->value
[leaf
->keys
- 1] += count
;
1320 * The interval doesn't adjoin with the left interval.
1321 * It must be added individually.
1323 btree_insert(&a
->used_space
, page
, (void *) count
, leaf
);
1329 * Note that if the algorithm made it thus far, the interval can fit only
1330 * between two other intervals of the leaf. The two border cases were already
1333 for (i
= 1; i
< leaf
->keys
; i
++) {
1334 if (page
< leaf
->key
[i
]) {
1335 uintptr_t left_pg
= leaf
->key
[i
- 1], right_pg
= leaf
->key
[i
];
1336 count_t left_cnt
= (count_t
) leaf
->value
[i
- 1], right_cnt
= (count_t
) leaf
->value
[i
];
1339 * The interval fits between left_pg and right_pg.
1342 if (overlaps(page
, count
*PAGE_SIZE
, left_pg
, left_cnt
*PAGE_SIZE
)) {
1343 /* The interval intersects with the left interval. */
1345 } else if (overlaps(page
, count
*PAGE_SIZE
, right_pg
, right_cnt
*PAGE_SIZE
)) {
1346 /* The interval intersects with the right interval. */
1348 } else if ((page
== left_pg
+ left_cnt
*PAGE_SIZE
) && (page
+ count
*PAGE_SIZE
== right_pg
)) {
1349 /* The interval can be added by merging the two already present intervals. */
1350 leaf
->value
[i
- 1] += count
+ right_cnt
;
1351 btree_remove(&a
->used_space
, right_pg
, leaf
);
1353 } else if (page
== left_pg
+ left_cnt
*PAGE_SIZE
) {
1354 /* The interval can be added by simply growing the left interval. */
1355 leaf
->value
[i
- 1] += count
;
1357 } else if (page
+ count
*PAGE_SIZE
== right_pg
) {
1359 * The interval can be addded by simply moving base of the right
1360 * interval down and increasing its size accordingly.
1362 leaf
->value
[i
] += count
;
1363 leaf
->key
[i
] = page
;
1367 * The interval is between both neigbouring intervals,
1368 * but cannot be merged with any of them.
1370 btree_insert(&a
->used_space
, page
, (void *) count
, leaf
);
1376 panic("Inconsistency detected while adding %d pages of used space at %p.\n", count
, page
);
1379 /** Mark portion of address space area as unused.
1381 * The address space area must be already locked.
1383 * @param a Address space area.
1384 * @param page First page to be marked.
1385 * @param count Number of page to be marked.
1387 * @return 0 on failure and 1 on success.
1389 int used_space_remove(as_area_t
*a
, uintptr_t page
, count_t count
)
1391 btree_node_t
*leaf
, *node
;
1395 ASSERT(page
== ALIGN_DOWN(page
, PAGE_SIZE
));
1398 pages
= (count_t
) btree_search(&a
->used_space
, page
, &leaf
);
1401 * We are lucky, page is the beginning of some interval.
1403 if (count
> pages
) {
1405 } else if (count
== pages
) {
1406 btree_remove(&a
->used_space
, page
, leaf
);
1410 * Find the respective interval.
1411 * Decrease its size and relocate its start address.
1413 for (i
= 0; i
< leaf
->keys
; i
++) {
1414 if (leaf
->key
[i
] == page
) {
1415 leaf
->key
[i
] += count
*PAGE_SIZE
;
1416 leaf
->value
[i
] -= count
;
1424 node
= btree_leaf_node_left_neighbour(&a
->used_space
, leaf
);
1425 if (node
&& page
< leaf
->key
[0]) {
1426 uintptr_t left_pg
= node
->key
[node
->keys
- 1];
1427 count_t left_cnt
= (count_t
) node
->value
[node
->keys
- 1];
1429 if (overlaps(left_pg
, left_cnt
*PAGE_SIZE
, page
, count
*PAGE_SIZE
)) {
1430 if (page
+ count
*PAGE_SIZE
== left_pg
+ left_cnt
*PAGE_SIZE
) {
1432 * The interval is contained in the rightmost interval
1433 * of the left neighbour and can be removed by
1434 * updating the size of the bigger interval.
1436 node
->value
[node
->keys
- 1] -= count
;
1438 } else if (page
+ count
*PAGE_SIZE
< left_pg
+ left_cnt
*PAGE_SIZE
) {
1442 * The interval is contained in the rightmost interval
1443 * of the left neighbour but its removal requires
1444 * both updating the size of the original interval and
1445 * also inserting a new interval.
1447 new_cnt
= ((left_pg
+ left_cnt
*PAGE_SIZE
) - (page
+ count
*PAGE_SIZE
)) >> PAGE_WIDTH
;
1448 node
->value
[node
->keys
- 1] -= count
+ new_cnt
;
1449 btree_insert(&a
->used_space
, page
+ count
*PAGE_SIZE
, (void *) new_cnt
, leaf
);
1454 } else if (page
< leaf
->key
[0]) {
1458 if (page
> leaf
->key
[leaf
->keys
- 1]) {
1459 uintptr_t left_pg
= leaf
->key
[leaf
->keys
- 1];
1460 count_t left_cnt
= (count_t
) leaf
->value
[leaf
->keys
- 1];
1462 if (overlaps(left_pg
, left_cnt
*PAGE_SIZE
, page
, count
*PAGE_SIZE
)) {
1463 if (page
+ count
*PAGE_SIZE
== left_pg
+ left_cnt
*PAGE_SIZE
) {
1465 * The interval is contained in the rightmost interval
1466 * of the leaf and can be removed by updating the size
1467 * of the bigger interval.
1469 leaf
->value
[leaf
->keys
- 1] -= count
;
1471 } else if (page
+ count
*PAGE_SIZE
< left_pg
+ left_cnt
*PAGE_SIZE
) {
1475 * The interval is contained in the rightmost interval
1476 * of the leaf but its removal requires both updating
1477 * the size of the original interval and
1478 * also inserting a new interval.
1480 new_cnt
= ((left_pg
+ left_cnt
*PAGE_SIZE
) - (page
+ count
*PAGE_SIZE
)) >> PAGE_WIDTH
;
1481 leaf
->value
[leaf
->keys
- 1] -= count
+ new_cnt
;
1482 btree_insert(&a
->used_space
, page
+ count
*PAGE_SIZE
, (void *) new_cnt
, leaf
);
1490 * The border cases have been already resolved.
1491 * Now the interval can be only between intervals of the leaf.
1493 for (i
= 1; i
< leaf
->keys
- 1; i
++) {
1494 if (page
< leaf
->key
[i
]) {
1495 uintptr_t left_pg
= leaf
->key
[i
- 1];
1496 count_t left_cnt
= (count_t
) leaf
->value
[i
- 1];
1499 * Now the interval is between intervals corresponding to (i - 1) and i.
1501 if (overlaps(left_pg
, left_cnt
*PAGE_SIZE
, page
, count
*PAGE_SIZE
)) {
1502 if (page
+ count
*PAGE_SIZE
== left_pg
+ left_cnt
*PAGE_SIZE
) {
1504 * The interval is contained in the interval (i - 1)
1505 * of the leaf and can be removed by updating the size
1506 * of the bigger interval.
1508 leaf
->value
[i
- 1] -= count
;
1510 } else if (page
+ count
*PAGE_SIZE
< left_pg
+ left_cnt
*PAGE_SIZE
) {
1514 * The interval is contained in the interval (i - 1)
1515 * of the leaf but its removal requires both updating
1516 * the size of the original interval and
1517 * also inserting a new interval.
1519 new_cnt
= ((left_pg
+ left_cnt
*PAGE_SIZE
) - (page
+ count
*PAGE_SIZE
)) >> PAGE_WIDTH
;
1520 leaf
->value
[i
- 1] -= count
+ new_cnt
;
1521 btree_insert(&a
->used_space
, page
+ count
*PAGE_SIZE
, (void *) new_cnt
, leaf
);
1530 panic("Inconsistency detected while removing %d pages of used space from %p.\n", count
, page
);
1533 /** Remove reference to address space area share info.
1535 * If the reference count drops to 0, the sh_info is deallocated.
1537 * @param sh_info Pointer to address space area share info.
1539 void sh_info_remove_reference(share_info_t
*sh_info
)
1541 bool dealloc
= false;
1543 mutex_lock(&sh_info
->lock
);
1544 ASSERT(sh_info
->refcount
);
1545 if (--sh_info
->refcount
== 0) {
1550 * Now walk carefully the pagemap B+tree and free/remove
1551 * reference from all frames found there.
1553 for (cur
= sh_info
->pagemap
.leaf_head
.next
; cur
!= &sh_info
->pagemap
.leaf_head
; cur
= cur
->next
) {
1557 node
= list_get_instance(cur
, btree_node_t
, leaf_link
);
1558 for (i
= 0; i
< node
->keys
; i
++)
1559 frame_free((uintptr_t) node
->value
[i
]);
1563 mutex_unlock(&sh_info
->lock
);
1566 btree_destroy(&sh_info
->pagemap
);
1572 * Address space related syscalls.
1575 /** Wrapper for as_area_create(). */
1576 unative_t
sys_as_area_create(uintptr_t address
, size_t size
, int flags
)
1578 if (as_area_create(AS
, flags
| AS_AREA_CACHEABLE
, size
, address
, AS_AREA_ATTR_NONE
, &anon_backend
, NULL
))
1579 return (unative_t
) address
;
1581 return (unative_t
) -1;
1584 /** Wrapper for as_area_resize(). */
1585 unative_t
sys_as_area_resize(uintptr_t address
, size_t size
, int flags
)
1587 return (unative_t
) as_area_resize(AS
, address
, size
, 0);
1590 /** Wrapper for as_area_destroy(). */
1591 unative_t
sys_as_area_destroy(uintptr_t address
)
1593 return (unative_t
) as_area_destroy(AS
, address
);
1596 /** Print out information about address space.
1598 * @param as Address space.
1600 void as_print(as_t
*as
)
1604 ipl
= interrupts_disable();
1605 mutex_lock(&as
->lock
);
1607 /* print out info about address space areas */
1609 for (cur
= as
->as_area_btree
.leaf_head
.next
; cur
!= &as
->as_area_btree
.leaf_head
; cur
= cur
->next
) {
1610 btree_node_t
*node
= list_get_instance(cur
, btree_node_t
, leaf_link
);
1613 for (i
= 0; i
< node
->keys
; i
++) {
1614 as_area_t
*area
= node
->value
[i
];
1616 mutex_lock(&area
->lock
);
1617 printf("as_area: %p, base=%p, pages=%d (%p - %p)\n",
1618 area
, area
->base
, area
->pages
, area
->base
, area
->base
+ area
->pages
*PAGE_SIZE
);
1619 mutex_unlock(&area
->lock
);
1623 mutex_unlock(&as
->lock
);
1624 interrupts_restore(ipl
);