4 * (C) Copyright 1996 Linus Torvalds
6 * Address space accounting code <alan@redhat.com>
7 * (C) Copyright 2002 Red Hat Inc, All Rights Reserved
11 #include <linux/hugetlb.h>
12 #include <linux/slab.h>
13 #include <linux/shm.h>
14 #include <linux/mman.h>
15 #include <linux/swap.h>
17 #include <linux/highmem.h>
18 #include <linux/rmap-locking.h>
19 #include <linux/security.h>
21 #include <asm/uaccess.h>
22 #include <asm/pgalloc.h>
23 #include <asm/cacheflush.h>
24 #include <asm/tlbflush.h>
26 static pte_t
*get_one_pte_map_nested(struct mm_struct
*mm
, unsigned long addr
)
32 pgd
= pgd_offset(mm
, addr
);
41 pmd
= pmd_offset(pgd
, addr
);
50 pte
= pte_offset_map_nested(pmd
, addr
);
52 pte_unmap_nested(pte
);
59 static inline int page_table_present(struct mm_struct
*mm
, unsigned long addr
)
64 pgd
= pgd_offset(mm
, addr
);
67 pmd
= pmd_offset(pgd
, addr
);
68 return pmd_present(*pmd
);
71 static inline pte_t
*alloc_one_pte_map(struct mm_struct
*mm
, unsigned long addr
)
76 pmd
= pmd_alloc(mm
, pgd_offset(mm
, addr
), addr
);
78 pte
= pte_alloc_map(mm
, pmd
, addr
);
83 copy_one_pte(struct mm_struct
*mm
, pte_t
*src
, pte_t
*dst
,
84 struct pte_chain
**pte_chainp
)
88 struct page
*page
= NULL
;
90 if (pte_present(*src
))
91 page
= pte_page(*src
);
93 if (!pte_none(*src
)) {
95 page_remove_rmap(page
, src
);
96 pte
= ptep_get_and_clear(src
);
98 /* No dest? We must put it back. */
104 *pte_chainp
= page_add_rmap(page
, dst
, *pte_chainp
);
110 move_one_page(struct vm_area_struct
*vma
, unsigned long old_addr
,
111 unsigned long new_addr
)
113 struct mm_struct
*mm
= vma
->vm_mm
;
116 struct pte_chain
*pte_chain
;
118 pte_chain
= pte_chain_alloc(GFP_KERNEL
);
123 spin_lock(&mm
->page_table_lock
);
124 src
= get_one_pte_map_nested(mm
, old_addr
);
127 * Look to see whether alloc_one_pte_map needs to perform a
128 * memory allocation. If it does then we need to drop the
131 if (!page_table_present(mm
, new_addr
)) {
132 pte_unmap_nested(src
);
135 dst
= alloc_one_pte_map(mm
, new_addr
);
137 src
= get_one_pte_map_nested(mm
, old_addr
);
138 error
= copy_one_pte(mm
, src
, dst
, &pte_chain
);
139 pte_unmap_nested(src
);
142 flush_tlb_page(vma
, old_addr
);
143 spin_unlock(&mm
->page_table_lock
);
144 pte_chain_free(pte_chain
);
149 static int move_page_tables(struct vm_area_struct
*vma
,
150 unsigned long new_addr
, unsigned long old_addr
, unsigned long len
)
152 unsigned long offset
= len
;
154 flush_cache_range(vma
, old_addr
, old_addr
+ len
);
157 * This is not the clever way to do this, but we're taking the
158 * easy way out on the assumption that most remappings will be
159 * only a few pages.. This also makes error recovery easier.
163 if (move_one_page(vma
, old_addr
+ offset
, new_addr
+ offset
))
169 * Ok, the move failed because we didn't have enough pages for
170 * the new page table tree. This is unlikely, but we have to
171 * take the possibility into account. In that case we just move
172 * all the pages back (this will work, because we still have
173 * the old page tables)
176 flush_cache_range(vma
, new_addr
, new_addr
+ len
);
177 while ((offset
+= PAGE_SIZE
) < len
)
178 move_one_page(vma
, new_addr
+ offset
, old_addr
+ offset
);
179 zap_page_range(vma
, new_addr
, len
);
183 static unsigned long move_vma(struct vm_area_struct
*vma
,
184 unsigned long addr
, unsigned long old_len
, unsigned long new_len
,
185 unsigned long new_addr
)
187 struct mm_struct
*mm
= vma
->vm_mm
;
188 struct vm_area_struct
*new_vma
, *next
, *prev
;
193 next
= find_vma_prev(mm
, new_addr
, &prev
);
195 if (prev
&& prev
->vm_end
== new_addr
&&
196 can_vma_merge(prev
, vma
->vm_flags
) && !vma
->vm_file
&&
197 !(vma
->vm_flags
& VM_SHARED
)) {
198 spin_lock(&mm
->page_table_lock
);
199 prev
->vm_end
= new_addr
+ new_len
;
200 spin_unlock(&mm
->page_table_lock
);
202 if (next
!= prev
->vm_next
)
204 if (prev
->vm_end
== next
->vm_start
&&
205 can_vma_merge(next
, prev
->vm_flags
)) {
206 spin_lock(&mm
->page_table_lock
);
207 prev
->vm_end
= next
->vm_end
;
208 __vma_unlink(mm
, next
, prev
);
209 spin_unlock(&mm
->page_table_lock
);
213 kmem_cache_free(vm_area_cachep
, next
);
215 } else if (next
->vm_start
== new_addr
+ new_len
&&
216 can_vma_merge(next
, vma
->vm_flags
) &&
217 !vma
->vm_file
&& !(vma
->vm_flags
& VM_SHARED
)) {
218 spin_lock(&mm
->page_table_lock
);
219 next
->vm_start
= new_addr
;
220 spin_unlock(&mm
->page_table_lock
);
224 prev
= find_vma(mm
, new_addr
-1);
225 if (prev
&& prev
->vm_end
== new_addr
&&
226 can_vma_merge(prev
, vma
->vm_flags
) && !vma
->vm_file
&&
227 !(vma
->vm_flags
& VM_SHARED
)) {
228 spin_lock(&mm
->page_table_lock
);
229 prev
->vm_end
= new_addr
+ new_len
;
230 spin_unlock(&mm
->page_table_lock
);
237 new_vma
= kmem_cache_alloc(vm_area_cachep
, SLAB_KERNEL
);
243 if (!move_page_tables(vma
, new_addr
, addr
, old_len
)) {
244 unsigned long vm_locked
= vma
->vm_flags
& VM_LOCKED
;
248 INIT_LIST_HEAD(&new_vma
->shared
);
249 new_vma
->vm_start
= new_addr
;
250 new_vma
->vm_end
= new_addr
+new_len
;
251 new_vma
->vm_pgoff
+= (addr
-vma
->vm_start
) >> PAGE_SHIFT
;
252 if (new_vma
->vm_file
)
253 get_file(new_vma
->vm_file
);
254 if (new_vma
->vm_ops
&& new_vma
->vm_ops
->open
)
255 new_vma
->vm_ops
->open(new_vma
);
256 insert_vm_struct(current
->mm
, new_vma
);
259 /* Conceal VM_ACCOUNT so old reservation is not undone */
260 if (vma
->vm_flags
& VM_ACCOUNT
) {
261 vma
->vm_flags
&= ~VM_ACCOUNT
;
262 if (addr
> vma
->vm_start
) {
263 if (addr
+ old_len
< vma
->vm_end
)
265 } else if (addr
+ old_len
== vma
->vm_end
)
266 vma
= NULL
; /* it will be removed */
268 vma
= NULL
; /* nothing more to do */
270 do_munmap(current
->mm
, addr
, old_len
);
272 /* Restore VM_ACCOUNT if one or two pieces of vma left */
274 vma
->vm_flags
|= VM_ACCOUNT
;
276 vma
->vm_next
->vm_flags
|= VM_ACCOUNT
;
279 current
->mm
->total_vm
+= new_len
>> PAGE_SHIFT
;
281 current
->mm
->locked_vm
+= new_len
>> PAGE_SHIFT
;
282 if (new_len
> old_len
)
283 make_pages_present(new_addr
+ old_len
,
289 kmem_cache_free(vm_area_cachep
, new_vma
);
295 * Expand (or shrink) an existing mapping, potentially moving it at the
296 * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
298 * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
299 * This option implies MREMAP_MAYMOVE.
301 unsigned long do_mremap(unsigned long addr
,
302 unsigned long old_len
, unsigned long new_len
,
303 unsigned long flags
, unsigned long new_addr
)
305 struct vm_area_struct
*vma
;
306 unsigned long ret
= -EINVAL
;
307 unsigned long charged
= 0;
309 if (flags
& ~(MREMAP_FIXED
| MREMAP_MAYMOVE
))
312 if (addr
& ~PAGE_MASK
)
315 old_len
= PAGE_ALIGN(old_len
);
316 new_len
= PAGE_ALIGN(new_len
);
318 /* new_addr is only valid if MREMAP_FIXED is specified */
319 if (flags
& MREMAP_FIXED
) {
320 if (new_addr
& ~PAGE_MASK
)
322 if (!(flags
& MREMAP_MAYMOVE
))
325 if (new_len
> TASK_SIZE
|| new_addr
> TASK_SIZE
- new_len
)
328 /* Check if the location we're moving into overlaps the
329 * old location at all, and fail if it does.
331 if ((new_addr
<= addr
) && (new_addr
+new_len
) > addr
)
334 if ((addr
<= new_addr
) && (addr
+old_len
) > new_addr
)
337 do_munmap(current
->mm
, new_addr
, new_len
);
341 * Always allow a shrinking remap: that just unmaps
342 * the unnecessary pages..
343 * do_munmap does all the needed commit accounting
346 if (old_len
>= new_len
) {
347 do_munmap(current
->mm
, addr
+new_len
, old_len
- new_len
);
348 if (!(flags
& MREMAP_FIXED
) || (new_addr
== addr
))
354 * Ok, we need to grow.. or relocate.
357 vma
= find_vma(current
->mm
, addr
);
358 if (!vma
|| vma
->vm_start
> addr
)
360 if (is_vm_hugetlb_page(vma
)) {
364 /* We can't remap across vm area boundaries */
365 if (old_len
> vma
->vm_end
- addr
)
367 if (vma
->vm_flags
& VM_DONTEXPAND
) {
368 if (new_len
> old_len
)
371 if (vma
->vm_flags
& VM_LOCKED
) {
372 unsigned long locked
= current
->mm
->locked_vm
<< PAGE_SHIFT
;
373 locked
+= new_len
- old_len
;
375 if (locked
> current
->rlim
[RLIMIT_MEMLOCK
].rlim_cur
)
379 if ((current
->mm
->total_vm
<< PAGE_SHIFT
) + (new_len
- old_len
)
380 > current
->rlim
[RLIMIT_AS
].rlim_cur
)
383 if (vma
->vm_flags
& VM_ACCOUNT
) {
384 charged
= (new_len
- old_len
) >> PAGE_SHIFT
;
385 if (security_vm_enough_memory(charged
))
389 /* old_len exactly to the end of the area..
390 * And we're not relocating the area.
392 if (old_len
== vma
->vm_end
- addr
&&
393 !((flags
& MREMAP_FIXED
) && (addr
!= new_addr
)) &&
394 (old_len
!= new_len
|| !(flags
& MREMAP_MAYMOVE
))) {
395 unsigned long max_addr
= TASK_SIZE
;
397 max_addr
= vma
->vm_next
->vm_start
;
398 /* can we just expand the current mapping? */
399 if (max_addr
- addr
>= new_len
) {
400 int pages
= (new_len
- old_len
) >> PAGE_SHIFT
;
401 spin_lock(&vma
->vm_mm
->page_table_lock
);
402 vma
->vm_end
= addr
+ new_len
;
403 spin_unlock(&vma
->vm_mm
->page_table_lock
);
404 current
->mm
->total_vm
+= pages
;
405 if (vma
->vm_flags
& VM_LOCKED
) {
406 current
->mm
->locked_vm
+= pages
;
407 make_pages_present(addr
+ old_len
,
416 * We weren't able to just expand or shrink the area,
417 * we need to create a new one and move it..
420 if (flags
& MREMAP_MAYMOVE
) {
421 if (!(flags
& MREMAP_FIXED
)) {
422 unsigned long map_flags
= 0;
423 if (vma
->vm_flags
& VM_MAYSHARE
)
424 map_flags
|= MAP_SHARED
;
426 new_addr
= get_unmapped_area(vma
->vm_file
, 0, new_len
,
427 vma
->vm_pgoff
, map_flags
);
429 if (new_addr
& ~PAGE_MASK
)
432 ret
= move_vma(vma
, addr
, old_len
, new_len
, new_addr
);
435 if (ret
& ~PAGE_MASK
)
436 vm_unacct_memory(charged
);
441 asmlinkage
unsigned long sys_mremap(unsigned long addr
,
442 unsigned long old_len
, unsigned long new_len
,
443 unsigned long flags
, unsigned long new_addr
)
447 down_write(¤t
->mm
->mmap_sem
);
448 ret
= do_mremap(addr
, old_len
, new_len
, flags
, new_addr
);
449 up_write(¤t
->mm
->mmap_sem
);