o kernel/ksyms.c: move remaining EXPORT_SYMBOLs, remove this file from the tree
[linux-2.6/history.git] / mm / mremap.c
blob0412a204cb6919f3f08a4ef67395058adaef67e3
1 /*
2 * mm/mremap.c
4 * (C) Copyright 1996 Linus Torvalds
6 * Address space accounting code <alan@redhat.com>
7 * (C) Copyright 2002 Red Hat Inc, All Rights Reserved
8 */
10 #include <linux/mm.h>
11 #include <linux/hugetlb.h>
12 #include <linux/slab.h>
13 #include <linux/shm.h>
14 #include <linux/mman.h>
15 #include <linux/swap.h>
16 #include <linux/fs.h>
17 #include <linux/highmem.h>
18 #include <linux/rmap-locking.h>
19 #include <linux/security.h>
21 #include <asm/uaccess.h>
22 #include <asm/pgalloc.h>
23 #include <asm/cacheflush.h>
24 #include <asm/tlbflush.h>
26 static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr)
28 pgd_t *pgd;
29 pmd_t *pmd;
30 pte_t *pte = NULL;
32 pgd = pgd_offset(mm, addr);
33 if (pgd_none(*pgd))
34 goto end;
35 if (pgd_bad(*pgd)) {
36 pgd_ERROR(*pgd);
37 pgd_clear(pgd);
38 goto end;
41 pmd = pmd_offset(pgd, addr);
42 if (pmd_none(*pmd))
43 goto end;
44 if (pmd_bad(*pmd)) {
45 pmd_ERROR(*pmd);
46 pmd_clear(pmd);
47 goto end;
50 pte = pte_offset_map_nested(pmd, addr);
51 if (pte_none(*pte)) {
52 pte_unmap_nested(pte);
53 pte = NULL;
55 end:
56 return pte;
59 static inline int page_table_present(struct mm_struct *mm, unsigned long addr)
61 pgd_t *pgd;
62 pmd_t *pmd;
64 pgd = pgd_offset(mm, addr);
65 if (pgd_none(*pgd))
66 return 0;
67 pmd = pmd_offset(pgd, addr);
68 return pmd_present(*pmd);
71 static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
73 pmd_t *pmd;
74 pte_t *pte = NULL;
76 pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr);
77 if (pmd)
78 pte = pte_alloc_map(mm, pmd, addr);
79 return pte;
82 static int
83 copy_one_pte(struct mm_struct *mm, pte_t *src, pte_t *dst,
84 struct pte_chain **pte_chainp)
86 int error = 0;
87 pte_t pte;
88 struct page *page = NULL;
90 if (pte_present(*src))
91 page = pte_page(*src);
93 if (!pte_none(*src)) {
94 if (page)
95 page_remove_rmap(page, src);
96 pte = ptep_get_and_clear(src);
97 if (!dst) {
98 /* No dest? We must put it back. */
99 dst = src;
100 error++;
102 set_pte(dst, pte);
103 if (page)
104 *pte_chainp = page_add_rmap(page, dst, *pte_chainp);
106 return error;
109 static int
110 move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
111 unsigned long new_addr)
113 struct mm_struct *mm = vma->vm_mm;
114 int error = 0;
115 pte_t *src, *dst;
116 struct pte_chain *pte_chain;
118 pte_chain = pte_chain_alloc(GFP_KERNEL);
119 if (!pte_chain) {
120 error = -ENOMEM;
121 goto out;
123 spin_lock(&mm->page_table_lock);
124 src = get_one_pte_map_nested(mm, old_addr);
125 if (src) {
127 * Look to see whether alloc_one_pte_map needs to perform a
128 * memory allocation. If it does then we need to drop the
129 * atomic kmap
131 if (!page_table_present(mm, new_addr)) {
132 pte_unmap_nested(src);
133 src = NULL;
135 dst = alloc_one_pte_map(mm, new_addr);
136 if (src == NULL)
137 src = get_one_pte_map_nested(mm, old_addr);
138 error = copy_one_pte(mm, src, dst, &pte_chain);
139 pte_unmap_nested(src);
140 pte_unmap(dst);
142 flush_tlb_page(vma, old_addr);
143 spin_unlock(&mm->page_table_lock);
144 pte_chain_free(pte_chain);
145 out:
146 return error;
149 static int move_page_tables(struct vm_area_struct *vma,
150 unsigned long new_addr, unsigned long old_addr, unsigned long len)
152 unsigned long offset = len;
154 flush_cache_range(vma, old_addr, old_addr + len);
157 * This is not the clever way to do this, but we're taking the
158 * easy way out on the assumption that most remappings will be
159 * only a few pages.. This also makes error recovery easier.
161 while (offset) {
162 offset -= PAGE_SIZE;
163 if (move_one_page(vma, old_addr + offset, new_addr + offset))
164 goto oops_we_failed;
166 return 0;
169 * Ok, the move failed because we didn't have enough pages for
170 * the new page table tree. This is unlikely, but we have to
171 * take the possibility into account. In that case we just move
172 * all the pages back (this will work, because we still have
173 * the old page tables)
175 oops_we_failed:
176 flush_cache_range(vma, new_addr, new_addr + len);
177 while ((offset += PAGE_SIZE) < len)
178 move_one_page(vma, new_addr + offset, old_addr + offset);
179 zap_page_range(vma, new_addr, len);
180 return -1;
183 static unsigned long move_vma(struct vm_area_struct *vma,
184 unsigned long addr, unsigned long old_len, unsigned long new_len,
185 unsigned long new_addr)
187 struct mm_struct *mm = vma->vm_mm;
188 struct vm_area_struct *new_vma, *next, *prev;
189 int allocated_vma;
190 int split = 0;
192 new_vma = NULL;
193 next = find_vma_prev(mm, new_addr, &prev);
194 if (next) {
195 if (prev && prev->vm_end == new_addr &&
196 can_vma_merge(prev, vma->vm_flags) && !vma->vm_file &&
197 !(vma->vm_flags & VM_SHARED)) {
198 spin_lock(&mm->page_table_lock);
199 prev->vm_end = new_addr + new_len;
200 spin_unlock(&mm->page_table_lock);
201 new_vma = prev;
202 if (next != prev->vm_next)
203 BUG();
204 if (prev->vm_end == next->vm_start &&
205 can_vma_merge(next, prev->vm_flags)) {
206 spin_lock(&mm->page_table_lock);
207 prev->vm_end = next->vm_end;
208 __vma_unlink(mm, next, prev);
209 spin_unlock(&mm->page_table_lock);
210 if (vma == next)
211 vma = prev;
212 mm->map_count--;
213 kmem_cache_free(vm_area_cachep, next);
215 } else if (next->vm_start == new_addr + new_len &&
216 can_vma_merge(next, vma->vm_flags) &&
217 !vma->vm_file && !(vma->vm_flags & VM_SHARED)) {
218 spin_lock(&mm->page_table_lock);
219 next->vm_start = new_addr;
220 spin_unlock(&mm->page_table_lock);
221 new_vma = next;
223 } else {
224 prev = find_vma(mm, new_addr-1);
225 if (prev && prev->vm_end == new_addr &&
226 can_vma_merge(prev, vma->vm_flags) && !vma->vm_file &&
227 !(vma->vm_flags & VM_SHARED)) {
228 spin_lock(&mm->page_table_lock);
229 prev->vm_end = new_addr + new_len;
230 spin_unlock(&mm->page_table_lock);
231 new_vma = prev;
235 allocated_vma = 0;
236 if (!new_vma) {
237 new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
238 if (!new_vma)
239 goto out;
240 allocated_vma = 1;
243 if (!move_page_tables(vma, new_addr, addr, old_len)) {
244 unsigned long vm_locked = vma->vm_flags & VM_LOCKED;
246 if (allocated_vma) {
247 *new_vma = *vma;
248 INIT_LIST_HEAD(&new_vma->shared);
249 new_vma->vm_start = new_addr;
250 new_vma->vm_end = new_addr+new_len;
251 new_vma->vm_pgoff += (addr-vma->vm_start) >> PAGE_SHIFT;
252 if (new_vma->vm_file)
253 get_file(new_vma->vm_file);
254 if (new_vma->vm_ops && new_vma->vm_ops->open)
255 new_vma->vm_ops->open(new_vma);
256 insert_vm_struct(current->mm, new_vma);
259 /* Conceal VM_ACCOUNT so old reservation is not undone */
260 if (vma->vm_flags & VM_ACCOUNT) {
261 vma->vm_flags &= ~VM_ACCOUNT;
262 if (addr > vma->vm_start) {
263 if (addr + old_len < vma->vm_end)
264 split = 1;
265 } else if (addr + old_len == vma->vm_end)
266 vma = NULL; /* it will be removed */
267 } else
268 vma = NULL; /* nothing more to do */
270 do_munmap(current->mm, addr, old_len);
272 /* Restore VM_ACCOUNT if one or two pieces of vma left */
273 if (vma) {
274 vma->vm_flags |= VM_ACCOUNT;
275 if (split)
276 vma->vm_next->vm_flags |= VM_ACCOUNT;
279 current->mm->total_vm += new_len >> PAGE_SHIFT;
280 if (vm_locked) {
281 current->mm->locked_vm += new_len >> PAGE_SHIFT;
282 if (new_len > old_len)
283 make_pages_present(new_addr + old_len,
284 new_addr + new_len);
286 return new_addr;
288 if (allocated_vma)
289 kmem_cache_free(vm_area_cachep, new_vma);
290 out:
291 return -ENOMEM;
295 * Expand (or shrink) an existing mapping, potentially moving it at the
296 * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
298 * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
299 * This option implies MREMAP_MAYMOVE.
301 unsigned long do_mremap(unsigned long addr,
302 unsigned long old_len, unsigned long new_len,
303 unsigned long flags, unsigned long new_addr)
305 struct vm_area_struct *vma;
306 unsigned long ret = -EINVAL;
307 unsigned long charged = 0;
309 if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
310 goto out;
312 if (addr & ~PAGE_MASK)
313 goto out;
315 old_len = PAGE_ALIGN(old_len);
316 new_len = PAGE_ALIGN(new_len);
318 /* new_addr is only valid if MREMAP_FIXED is specified */
319 if (flags & MREMAP_FIXED) {
320 if (new_addr & ~PAGE_MASK)
321 goto out;
322 if (!(flags & MREMAP_MAYMOVE))
323 goto out;
325 if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
326 goto out;
328 /* Check if the location we're moving into overlaps the
329 * old location at all, and fail if it does.
331 if ((new_addr <= addr) && (new_addr+new_len) > addr)
332 goto out;
334 if ((addr <= new_addr) && (addr+old_len) > new_addr)
335 goto out;
337 do_munmap(current->mm, new_addr, new_len);
341 * Always allow a shrinking remap: that just unmaps
342 * the unnecessary pages..
343 * do_munmap does all the needed commit accounting
345 ret = addr;
346 if (old_len >= new_len) {
347 do_munmap(current->mm, addr+new_len, old_len - new_len);
348 if (!(flags & MREMAP_FIXED) || (new_addr == addr))
349 goto out;
350 old_len = new_len;
354 * Ok, we need to grow.. or relocate.
356 ret = -EFAULT;
357 vma = find_vma(current->mm, addr);
358 if (!vma || vma->vm_start > addr)
359 goto out;
360 if (is_vm_hugetlb_page(vma)) {
361 ret = -EINVAL;
362 goto out;
364 /* We can't remap across vm area boundaries */
365 if (old_len > vma->vm_end - addr)
366 goto out;
367 if (vma->vm_flags & VM_DONTEXPAND) {
368 if (new_len > old_len)
369 goto out;
371 if (vma->vm_flags & VM_LOCKED) {
372 unsigned long locked = current->mm->locked_vm << PAGE_SHIFT;
373 locked += new_len - old_len;
374 ret = -EAGAIN;
375 if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
376 goto out;
378 ret = -ENOMEM;
379 if ((current->mm->total_vm << PAGE_SHIFT) + (new_len - old_len)
380 > current->rlim[RLIMIT_AS].rlim_cur)
381 goto out;
383 if (vma->vm_flags & VM_ACCOUNT) {
384 charged = (new_len - old_len) >> PAGE_SHIFT;
385 if (security_vm_enough_memory(charged))
386 goto out_nc;
389 /* old_len exactly to the end of the area..
390 * And we're not relocating the area.
392 if (old_len == vma->vm_end - addr &&
393 !((flags & MREMAP_FIXED) && (addr != new_addr)) &&
394 (old_len != new_len || !(flags & MREMAP_MAYMOVE))) {
395 unsigned long max_addr = TASK_SIZE;
396 if (vma->vm_next)
397 max_addr = vma->vm_next->vm_start;
398 /* can we just expand the current mapping? */
399 if (max_addr - addr >= new_len) {
400 int pages = (new_len - old_len) >> PAGE_SHIFT;
401 spin_lock(&vma->vm_mm->page_table_lock);
402 vma->vm_end = addr + new_len;
403 spin_unlock(&vma->vm_mm->page_table_lock);
404 current->mm->total_vm += pages;
405 if (vma->vm_flags & VM_LOCKED) {
406 current->mm->locked_vm += pages;
407 make_pages_present(addr + old_len,
408 addr + new_len);
410 ret = addr;
411 goto out;
416 * We weren't able to just expand or shrink the area,
417 * we need to create a new one and move it..
419 ret = -ENOMEM;
420 if (flags & MREMAP_MAYMOVE) {
421 if (!(flags & MREMAP_FIXED)) {
422 unsigned long map_flags = 0;
423 if (vma->vm_flags & VM_MAYSHARE)
424 map_flags |= MAP_SHARED;
426 new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
427 vma->vm_pgoff, map_flags);
428 ret = new_addr;
429 if (new_addr & ~PAGE_MASK)
430 goto out;
432 ret = move_vma(vma, addr, old_len, new_len, new_addr);
434 out:
435 if (ret & ~PAGE_MASK)
436 vm_unacct_memory(charged);
437 out_nc:
438 return ret;
441 asmlinkage unsigned long sys_mremap(unsigned long addr,
442 unsigned long old_len, unsigned long new_len,
443 unsigned long flags, unsigned long new_addr)
445 unsigned long ret;
447 down_write(&current->mm->mmap_sem);
448 ret = do_mremap(addr, old_len, new_len, flags, new_addr);
449 up_write(&current->mm->mmap_sem);
450 return ret;