Use ordered dictionary for pagemap instead of B+tree
[helenos.git] / kernel / generic / src / mm / backend_elf.c
blobcced77c57b30f1587e8a1cf67bddc2c8d350dc67
1 /*
2 * Copyright (c) 2006 Jakub Jermar
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /** @addtogroup kernel_generic_mm
30 * @{
33 /**
34 * @file
35 * @brief Backend for address space areas backed by an ELF image.
38 #include <lib/elf.h>
39 #include <assert.h>
40 #include <typedefs.h>
41 #include <mm/as.h>
42 #include <mm/frame.h>
43 #include <mm/slab.h>
44 #include <mm/page.h>
45 #include <mm/reserve.h>
46 #include <mm/km.h>
47 #include <genarch/mm/page_pt.h>
48 #include <genarch/mm/page_ht.h>
49 #include <align.h>
50 #include <mem.h>
51 #include <macros.h>
52 #include <arch.h>
53 #include <barrier.h>
55 static bool elf_create(as_area_t *);
56 static bool elf_resize(as_area_t *, size_t);
57 static void elf_share(as_area_t *);
58 static void elf_destroy(as_area_t *);
60 static bool elf_is_resizable(as_area_t *);
61 static bool elf_is_shareable(as_area_t *);
63 static int elf_page_fault(as_area_t *, uintptr_t, pf_access_t);
64 static void elf_frame_free(as_area_t *, uintptr_t, uintptr_t);
66 mem_backend_t elf_backend = {
67 .create = elf_create,
68 .resize = elf_resize,
69 .share = elf_share,
70 .destroy = elf_destroy,
72 .is_resizable = elf_is_resizable,
73 .is_shareable = elf_is_shareable,
75 .page_fault = elf_page_fault,
76 .frame_free = elf_frame_free,
78 .create_shared_data = NULL,
79 .destroy_shared_data = NULL
82 static size_t elf_nonanon_pages_get(as_area_t *area)
84 elf_segment_header_t *entry = area->backend_data.segment;
85 uintptr_t first = ALIGN_UP(entry->p_vaddr, PAGE_SIZE);
86 uintptr_t last = ALIGN_DOWN(entry->p_vaddr + entry->p_filesz,
87 PAGE_SIZE);
89 if (entry->p_flags & PF_W)
90 return 0;
92 if (last < first)
93 return 0;
95 return last - first;
98 /** Get page number in the task where the ELF page originates from.
100 * The ELF page can be shared to a different address than it originated from,
101 * but we need the originating address since that corresponds to the ELF's
102 * virtual addesses.
104 * @param area Area in which the page resides
105 * @param page Virtual address of the page in @a area
106 * @return Virtual address of the page in the origin address space
108 static uintptr_t elf_orig_page(as_area_t *area, uintptr_t page)
110 return page - area->base + area->backend_data.elf_base;
113 bool elf_create(as_area_t *area)
115 size_t nonanon_pages = elf_nonanon_pages_get(area);
117 if (area->pages <= nonanon_pages)
118 return true;
120 return reserve_try_alloc(area->pages - nonanon_pages);
123 bool elf_resize(as_area_t *area, size_t new_pages)
125 size_t nonanon_pages = elf_nonanon_pages_get(area);
127 if (new_pages > area->pages) {
128 /* The area is growing. */
129 if (area->pages >= nonanon_pages)
130 return reserve_try_alloc(new_pages - area->pages);
131 else if (new_pages > nonanon_pages)
132 return reserve_try_alloc(new_pages - nonanon_pages);
133 } else if (new_pages < area->pages) {
134 /* The area is shrinking. */
135 if (new_pages >= nonanon_pages)
136 reserve_free(area->pages - new_pages);
137 else if (area->pages > nonanon_pages)
138 reserve_free(nonanon_pages - new_pages);
141 return true;
144 /** Share ELF image backed address space area.
146 * If the area is writable, then all mapped pages are duplicated in the pagemap.
147 * Otherwise only portions of the area that are not backed by the ELF image
148 * are put into the pagemap.
150 * @param area Address space area.
152 void elf_share(as_area_t *area)
154 elf_segment_header_t *entry = area->backend_data.segment;
155 link_t *cur;
156 btree_node_t *leaf, *node;
157 uintptr_t start_anon = entry->p_vaddr + entry->p_filesz;
159 assert(mutex_locked(&area->as->lock));
160 assert(mutex_locked(&area->lock));
163 * Find the node in which to start linear search.
165 if (area->flags & AS_AREA_WRITE) {
166 node = list_get_instance(list_first(&area->used_space.leaf_list),
167 btree_node_t, leaf_link);
168 } else {
169 (void) btree_search(&area->used_space, start_anon, &leaf);
170 node = btree_leaf_node_left_neighbour(&area->used_space, leaf);
171 if (!node)
172 node = leaf;
176 * Copy used anonymous portions of the area to sh_info's page map.
178 mutex_lock(&area->sh_info->lock);
179 for (cur = &node->leaf_link; cur != &area->used_space.leaf_list.head;
180 cur = cur->next) {
181 unsigned int i;
183 node = list_get_instance(cur, btree_node_t, leaf_link);
185 for (i = 0; i < node->keys; i++) {
186 uintptr_t base = node->key[i];
187 size_t count = (size_t) node->value[i];
188 unsigned int j;
191 * Skip read-only areas of used space that are backed
192 * by the ELF image.
194 if (!(area->flags & AS_AREA_WRITE))
195 if (base >= entry->p_vaddr &&
196 base + P2SZ(count) <= start_anon)
197 continue;
199 for (j = 0; j < count; j++) {
200 pte_t pte;
201 bool found;
204 * Skip read-only pages that are backed by the
205 * ELF image.
207 if (!(area->flags & AS_AREA_WRITE))
208 if (base >= entry->p_vaddr &&
209 base + P2SZ(j + 1) <= start_anon)
210 continue;
212 page_table_lock(area->as, false);
213 found = page_mapping_find(area->as,
214 base + P2SZ(j), false, &pte);
216 (void) found;
217 assert(found);
218 assert(PTE_VALID(&pte));
219 assert(PTE_PRESENT(&pte));
221 as_pagemap_insert(&area->sh_info->pagemap,
222 (base + P2SZ(j)) - area->base,
223 PTE_GET_FRAME(&pte));
224 page_table_unlock(area->as, false);
226 pfn_t pfn = ADDR2PFN(PTE_GET_FRAME(&pte));
227 frame_reference_add(pfn);
232 mutex_unlock(&area->sh_info->lock);
235 void elf_destroy(as_area_t *area)
237 size_t nonanon_pages = elf_nonanon_pages_get(area);
239 if (area->pages > nonanon_pages)
240 reserve_free(area->pages - nonanon_pages);
243 bool elf_is_resizable(as_area_t *area)
245 return true;
248 bool elf_is_shareable(as_area_t *area)
250 return true;
253 /** Service a page fault in the ELF backend address space area.
255 * The address space area and page tables must be already locked.
257 * @param area Pointer to the address space area.
258 * @param upage Faulting virtual page.
259 * @param access Access mode that caused the fault (i.e.
260 * read/write/exec).
262 * @return AS_PF_FAULT on failure (i.e. page fault) or AS_PF_OK
263 * on success (i.e. serviced).
265 int elf_page_fault(as_area_t *area, uintptr_t upage, pf_access_t access)
267 elf_header_t *elf = area->backend_data.elf;
268 elf_segment_header_t *entry = area->backend_data.segment;
269 uintptr_t base;
270 uintptr_t frame;
271 uintptr_t kpage;
272 uintptr_t start_anon;
273 uintptr_t elfpage;
274 size_t i;
275 bool dirty = false;
277 assert(page_table_locked(AS));
278 assert(mutex_locked(&area->lock));
279 assert(IS_ALIGNED(upage, PAGE_SIZE));
281 elfpage = elf_orig_page(area, upage);
283 if (!as_area_check_access(area, access))
284 return AS_PF_FAULT;
286 if (elfpage < ALIGN_DOWN(entry->p_vaddr, PAGE_SIZE))
287 return AS_PF_FAULT;
289 if (elfpage >= entry->p_vaddr + entry->p_memsz)
290 return AS_PF_FAULT;
292 i = (elfpage - ALIGN_DOWN(entry->p_vaddr, PAGE_SIZE)) >>
293 PAGE_WIDTH;
294 base = (uintptr_t)
295 (((void *) elf) + ALIGN_DOWN(entry->p_offset, PAGE_SIZE));
297 /* Virtual address of the end of initialized part of segment */
298 start_anon = entry->p_vaddr + entry->p_filesz;
300 mutex_lock(&area->sh_info->lock);
301 if (area->sh_info->shared) {
303 * The address space area is shared.
306 errno_t rc = as_pagemap_find(&area->sh_info->pagemap,
307 upage - area->base, &frame);
308 if (rc == EOK) {
309 frame_reference_add(ADDR2PFN(frame));
310 page_mapping_insert(AS, upage, frame,
311 as_area_get_flags(area));
312 if (!used_space_insert(area, upage, 1))
313 panic("Cannot insert used space.");
314 mutex_unlock(&area->sh_info->lock);
315 return AS_PF_OK;
320 * The area is either not shared or the pagemap does not contain the
321 * mapping.
323 if (elfpage >= entry->p_vaddr && elfpage + PAGE_SIZE <= start_anon) {
325 * Initialized portion of the segment. The memory is backed
326 * directly by the content of the ELF image. Pages are
327 * only copied if the segment is writable so that there
328 * can be more instances of the same memory ELF image
329 * used at a time. Note that this could be later done
330 * as COW.
332 if (entry->p_flags & PF_W) {
333 kpage = km_temporary_page_get(&frame, FRAME_NO_RESERVE);
334 memcpy((void *) kpage, (void *) (base + i * PAGE_SIZE),
335 PAGE_SIZE);
336 if (entry->p_flags & PF_X) {
337 smc_coherence((void *) kpage, PAGE_SIZE);
339 km_temporary_page_put(kpage);
340 dirty = true;
341 } else {
342 pte_t pte;
343 bool found;
345 found = page_mapping_find(AS_KERNEL,
346 base + i * FRAME_SIZE, true, &pte);
348 (void) found;
349 assert(found);
350 assert(PTE_PRESENT(&pte));
352 frame = PTE_GET_FRAME(&pte);
354 } else if (elfpage >= start_anon) {
356 * This is the uninitialized portion of the segment.
357 * It is not physically present in the ELF image.
358 * To resolve the situation, a frame must be allocated
359 * and cleared.
361 kpage = km_temporary_page_get(&frame, FRAME_NO_RESERVE);
362 memsetb((void *) kpage, PAGE_SIZE, 0);
363 km_temporary_page_put(kpage);
364 dirty = true;
365 } else {
366 size_t pad_lo, pad_hi;
368 * The mixed case.
370 * The middle part is backed by the ELF image and
371 * the lower and upper parts are anonymous memory.
372 * (The segment can be and often is shorter than 1 page).
374 if (upage < entry->p_vaddr)
375 pad_lo = entry->p_vaddr - upage;
376 else
377 pad_lo = 0;
379 if (start_anon < upage + PAGE_SIZE)
380 pad_hi = upage + PAGE_SIZE - start_anon;
381 else
382 pad_hi = 0;
384 kpage = km_temporary_page_get(&frame, FRAME_NO_RESERVE);
385 memcpy((void *) (kpage + pad_lo),
386 (void *) (base + i * PAGE_SIZE + pad_lo),
387 PAGE_SIZE - pad_lo - pad_hi);
388 if (entry->p_flags & PF_X) {
389 smc_coherence((void *) (kpage + pad_lo),
390 PAGE_SIZE - pad_lo - pad_hi);
392 memsetb((void *) kpage, pad_lo, 0);
393 memsetb((void *) (kpage + PAGE_SIZE - pad_hi), pad_hi, 0);
394 km_temporary_page_put(kpage);
395 dirty = true;
398 if (dirty && area->sh_info->shared) {
399 frame_reference_add(ADDR2PFN(frame));
400 as_pagemap_insert(&area->sh_info->pagemap, upage - area->base,
401 frame);
404 mutex_unlock(&area->sh_info->lock);
406 page_mapping_insert(AS, upage, frame, as_area_get_flags(area));
407 if (!used_space_insert(area, upage, 1))
408 panic("Cannot insert used space.");
410 return AS_PF_OK;
413 /** Free a frame that is backed by the ELF backend.
415 * The address space area and page tables must be already locked.
417 * @param area Pointer to the address space area.
418 * @param page Page that is mapped to frame. Must be aligned to
419 * PAGE_SIZE.
420 * @param frame Frame to be released.
423 void elf_frame_free(as_area_t *area, uintptr_t page, uintptr_t frame)
425 elf_segment_header_t *entry = area->backend_data.segment;
426 uintptr_t start_anon;
427 uintptr_t elfpage;
429 assert(page_table_locked(area->as));
430 assert(mutex_locked(&area->lock));
432 elfpage = elf_orig_page(area, page);
434 assert(elfpage >= ALIGN_DOWN(entry->p_vaddr, PAGE_SIZE));
435 assert(elfpage < entry->p_vaddr + entry->p_memsz);
437 start_anon = entry->p_vaddr + entry->p_filesz;
439 if (elfpage >= entry->p_vaddr && elfpage + PAGE_SIZE <= start_anon) {
440 if (entry->p_flags & PF_W) {
442 * Free the frame with the copy of writable segment
443 * data.
445 frame_free_noreserve(frame, 1);
447 } else {
449 * The frame is either anonymous memory or the mixed case (i.e.
450 * lower part is backed by the ELF image and the upper is
451 * anonymous). In any case, a frame needs to be freed.
453 frame_free_noreserve(frame, 1);
457 /** @}