4 * (C) Copyright 1996 Linus Torvalds
6 * Address space accounting code <alan@redhat.com>
7 * (C) Copyright 2002 Red Hat Inc, All Rights Reserved
11 #include <linux/hugetlb.h>
12 #include <linux/slab.h>
13 #include <linux/shm.h>
14 #include <linux/mman.h>
15 #include <linux/swap.h>
17 #include <linux/highmem.h>
18 #include <linux/rmap-locking.h>
19 #include <linux/security.h>
21 #include <asm/uaccess.h>
22 #include <asm/pgalloc.h>
23 #include <asm/cacheflush.h>
24 #include <asm/tlbflush.h>
26 static pte_t
*get_one_pte_map_nested(struct mm_struct
*mm
, unsigned long addr
)
32 pgd
= pgd_offset(mm
, addr
);
41 pmd
= pmd_offset(pgd
, addr
);
50 pte
= pte_offset_map_nested(pmd
, addr
);
52 pte_unmap_nested(pte
);
59 #ifdef CONFIG_HIGHPTE /* Save a few cycles on the sane machines */
60 static inline int page_table_present(struct mm_struct
*mm
, unsigned long addr
)
65 pgd
= pgd_offset(mm
, addr
);
68 pmd
= pmd_offset(pgd
, addr
);
69 return pmd_present(*pmd
);
72 #define page_table_present(mm, addr) (1)
75 static inline pte_t
*alloc_one_pte_map(struct mm_struct
*mm
, unsigned long addr
)
80 pmd
= pmd_alloc(mm
, pgd_offset(mm
, addr
), addr
);
82 pte
= pte_alloc_map(mm
, pmd
, addr
);
87 copy_one_pte(struct mm_struct
*mm
, pte_t
*src
, pte_t
*dst
,
88 struct pte_chain
**pte_chainp
)
92 struct page
*page
= NULL
;
94 if (pte_present(*src
))
95 page
= pte_page(*src
);
97 if (!pte_none(*src
)) {
99 page_remove_rmap(page
, src
);
100 pte
= ptep_get_and_clear(src
);
102 /* No dest? We must put it back. */
108 *pte_chainp
= page_add_rmap(page
, dst
, *pte_chainp
);
114 move_one_page(struct vm_area_struct
*vma
, unsigned long old_addr
,
115 unsigned long new_addr
)
117 struct mm_struct
*mm
= vma
->vm_mm
;
120 struct pte_chain
*pte_chain
;
122 pte_chain
= pte_chain_alloc(GFP_KERNEL
);
127 spin_lock(&mm
->page_table_lock
);
128 src
= get_one_pte_map_nested(mm
, old_addr
);
131 * Look to see whether alloc_one_pte_map needs to perform a
132 * memory allocation. If it does then we need to drop the
135 if (!page_table_present(mm
, new_addr
)) {
136 pte_unmap_nested(src
);
139 dst
= alloc_one_pte_map(mm
, new_addr
);
141 src
= get_one_pte_map_nested(mm
, old_addr
);
142 error
= copy_one_pte(mm
, src
, dst
, &pte_chain
);
143 pte_unmap_nested(src
);
146 flush_tlb_page(vma
, old_addr
);
147 spin_unlock(&mm
->page_table_lock
);
148 pte_chain_free(pte_chain
);
153 static int move_page_tables(struct vm_area_struct
*vma
,
154 unsigned long new_addr
, unsigned long old_addr
, unsigned long len
)
156 unsigned long offset
= len
;
158 flush_cache_range(vma
, old_addr
, old_addr
+ len
);
161 * This is not the clever way to do this, but we're taking the
162 * easy way out on the assumption that most remappings will be
163 * only a few pages.. This also makes error recovery easier.
167 if (move_one_page(vma
, old_addr
+ offset
, new_addr
+ offset
))
173 * Ok, the move failed because we didn't have enough pages for
174 * the new page table tree. This is unlikely, but we have to
175 * take the possibility into account. In that case we just move
176 * all the pages back (this will work, because we still have
177 * the old page tables)
180 flush_cache_range(vma
, new_addr
, new_addr
+ len
);
181 while ((offset
+= PAGE_SIZE
) < len
)
182 move_one_page(vma
, new_addr
+ offset
, old_addr
+ offset
);
183 zap_page_range(vma
, new_addr
, len
);
187 static unsigned long move_vma(struct vm_area_struct
*vma
,
188 unsigned long addr
, unsigned long old_len
, unsigned long new_len
,
189 unsigned long new_addr
)
191 struct mm_struct
*mm
= vma
->vm_mm
;
192 struct vm_area_struct
*new_vma
, *next
, *prev
;
197 next
= find_vma_prev(mm
, new_addr
, &prev
);
199 if (prev
&& prev
->vm_end
== new_addr
&&
200 can_vma_merge(prev
, vma
->vm_flags
) && !vma
->vm_file
&&
201 !(vma
->vm_flags
& VM_SHARED
)) {
202 spin_lock(&mm
->page_table_lock
);
203 prev
->vm_end
= new_addr
+ new_len
;
204 spin_unlock(&mm
->page_table_lock
);
206 if (next
!= prev
->vm_next
)
208 if (prev
->vm_end
== next
->vm_start
&&
209 can_vma_merge(next
, prev
->vm_flags
)) {
210 spin_lock(&mm
->page_table_lock
);
211 prev
->vm_end
= next
->vm_end
;
212 __vma_unlink(mm
, next
, prev
);
213 spin_unlock(&mm
->page_table_lock
);
217 kmem_cache_free(vm_area_cachep
, next
);
219 } else if (next
->vm_start
== new_addr
+ new_len
&&
220 can_vma_merge(next
, vma
->vm_flags
) &&
221 !vma
->vm_file
&& !(vma
->vm_flags
& VM_SHARED
)) {
222 spin_lock(&mm
->page_table_lock
);
223 next
->vm_start
= new_addr
;
224 spin_unlock(&mm
->page_table_lock
);
228 prev
= find_vma(mm
, new_addr
-1);
229 if (prev
&& prev
->vm_end
== new_addr
&&
230 can_vma_merge(prev
, vma
->vm_flags
) && !vma
->vm_file
&&
231 !(vma
->vm_flags
& VM_SHARED
)) {
232 spin_lock(&mm
->page_table_lock
);
233 prev
->vm_end
= new_addr
+ new_len
;
234 spin_unlock(&mm
->page_table_lock
);
241 new_vma
= kmem_cache_alloc(vm_area_cachep
, SLAB_KERNEL
);
247 if (!move_page_tables(vma
, new_addr
, addr
, old_len
)) {
248 unsigned long vm_locked
= vma
->vm_flags
& VM_LOCKED
;
252 INIT_LIST_HEAD(&new_vma
->shared
);
253 new_vma
->vm_start
= new_addr
;
254 new_vma
->vm_end
= new_addr
+new_len
;
255 new_vma
->vm_pgoff
+= (addr
-vma
->vm_start
) >> PAGE_SHIFT
;
256 if (new_vma
->vm_file
)
257 get_file(new_vma
->vm_file
);
258 if (new_vma
->vm_ops
&& new_vma
->vm_ops
->open
)
259 new_vma
->vm_ops
->open(new_vma
);
260 insert_vm_struct(current
->mm
, new_vma
);
263 /* Conceal VM_ACCOUNT so old reservation is not undone */
264 if (vma
->vm_flags
& VM_ACCOUNT
) {
265 vma
->vm_flags
&= ~VM_ACCOUNT
;
266 if (addr
> vma
->vm_start
) {
267 if (addr
+ old_len
< vma
->vm_end
)
269 } else if (addr
+ old_len
== vma
->vm_end
)
270 vma
= NULL
; /* it will be removed */
272 vma
= NULL
; /* nothing more to do */
274 do_munmap(current
->mm
, addr
, old_len
);
276 /* Restore VM_ACCOUNT if one or two pieces of vma left */
278 vma
->vm_flags
|= VM_ACCOUNT
;
280 vma
->vm_next
->vm_flags
|= VM_ACCOUNT
;
283 current
->mm
->total_vm
+= new_len
>> PAGE_SHIFT
;
285 current
->mm
->locked_vm
+= new_len
>> PAGE_SHIFT
;
286 if (new_len
> old_len
)
287 make_pages_present(new_addr
+ old_len
,
293 kmem_cache_free(vm_area_cachep
, new_vma
);
299 * Expand (or shrink) an existing mapping, potentially moving it at the
300 * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
302 * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
303 * This option implies MREMAP_MAYMOVE.
305 unsigned long do_mremap(unsigned long addr
,
306 unsigned long old_len
, unsigned long new_len
,
307 unsigned long flags
, unsigned long new_addr
)
309 struct vm_area_struct
*vma
;
310 unsigned long ret
= -EINVAL
;
311 unsigned long charged
= 0;
313 if (flags
& ~(MREMAP_FIXED
| MREMAP_MAYMOVE
))
316 if (addr
& ~PAGE_MASK
)
319 old_len
= PAGE_ALIGN(old_len
);
320 new_len
= PAGE_ALIGN(new_len
);
322 /* new_addr is only valid if MREMAP_FIXED is specified */
323 if (flags
& MREMAP_FIXED
) {
324 if (new_addr
& ~PAGE_MASK
)
326 if (!(flags
& MREMAP_MAYMOVE
))
329 if (new_len
> TASK_SIZE
|| new_addr
> TASK_SIZE
- new_len
)
332 /* Check if the location we're moving into overlaps the
333 * old location at all, and fail if it does.
335 if ((new_addr
<= addr
) && (new_addr
+new_len
) > addr
)
338 if ((addr
<= new_addr
) && (addr
+old_len
) > new_addr
)
341 do_munmap(current
->mm
, new_addr
, new_len
);
345 * Always allow a shrinking remap: that just unmaps
346 * the unnecessary pages..
347 * do_munmap does all the needed commit accounting
350 if (old_len
>= new_len
) {
351 do_munmap(current
->mm
, addr
+new_len
, old_len
- new_len
);
352 if (!(flags
& MREMAP_FIXED
) || (new_addr
== addr
))
358 * Ok, we need to grow.. or relocate.
361 vma
= find_vma(current
->mm
, addr
);
362 if (!vma
|| vma
->vm_start
> addr
)
364 if (is_vm_hugetlb_page(vma
)) {
368 /* We can't remap across vm area boundaries */
369 if (old_len
> vma
->vm_end
- addr
)
371 if (vma
->vm_flags
& VM_DONTEXPAND
) {
372 if (new_len
> old_len
)
375 if (vma
->vm_flags
& VM_LOCKED
) {
376 unsigned long locked
= current
->mm
->locked_vm
<< PAGE_SHIFT
;
377 locked
+= new_len
- old_len
;
379 if (locked
> current
->rlim
[RLIMIT_MEMLOCK
].rlim_cur
)
383 if ((current
->mm
->total_vm
<< PAGE_SHIFT
) + (new_len
- old_len
)
384 > current
->rlim
[RLIMIT_AS
].rlim_cur
)
387 if (vma
->vm_flags
& VM_ACCOUNT
) {
388 charged
= (new_len
- old_len
) >> PAGE_SHIFT
;
389 if (security_vm_enough_memory(charged
))
393 /* old_len exactly to the end of the area..
394 * And we're not relocating the area.
396 if (old_len
== vma
->vm_end
- addr
&&
397 !((flags
& MREMAP_FIXED
) && (addr
!= new_addr
)) &&
398 (old_len
!= new_len
|| !(flags
& MREMAP_MAYMOVE
))) {
399 unsigned long max_addr
= TASK_SIZE
;
401 max_addr
= vma
->vm_next
->vm_start
;
402 /* can we just expand the current mapping? */
403 if (max_addr
- addr
>= new_len
) {
404 int pages
= (new_len
- old_len
) >> PAGE_SHIFT
;
405 spin_lock(&vma
->vm_mm
->page_table_lock
);
406 vma
->vm_end
= addr
+ new_len
;
407 spin_unlock(&vma
->vm_mm
->page_table_lock
);
408 current
->mm
->total_vm
+= pages
;
409 if (vma
->vm_flags
& VM_LOCKED
) {
410 current
->mm
->locked_vm
+= pages
;
411 make_pages_present(addr
+ old_len
,
420 * We weren't able to just expand or shrink the area,
421 * we need to create a new one and move it..
424 if (flags
& MREMAP_MAYMOVE
) {
425 if (!(flags
& MREMAP_FIXED
)) {
426 unsigned long map_flags
= 0;
427 if (vma
->vm_flags
& VM_SHARED
)
428 map_flags
|= MAP_SHARED
;
430 new_addr
= get_unmapped_area(vma
->vm_file
, 0, new_len
,
431 vma
->vm_pgoff
, map_flags
);
433 if (new_addr
& ~PAGE_MASK
)
436 ret
= move_vma(vma
, addr
, old_len
, new_len
, new_addr
);
439 if (ret
& ~PAGE_MASK
)
440 vm_unacct_memory(charged
);
445 asmlinkage
unsigned long sys_mremap(unsigned long addr
,
446 unsigned long old_len
, unsigned long new_len
,
447 unsigned long flags
, unsigned long new_addr
)
451 down_write(¤t
->mm
->mmap_sem
);
452 ret
= do_mremap(addr
, old_len
, new_len
, flags
, new_addr
);
453 up_write(¤t
->mm
->mmap_sem
);