6 #include <linux/slab.h>
8 #include <linux/mman.h>
9 #include <linux/pagemap.h>
10 #include <linux/swap.h>
11 #include <linux/swapctl.h>
12 #include <linux/smp_lock.h>
13 #include <linux/init.h>
14 #include <linux/file.h>
16 #include <asm/uaccess.h>
17 #include <asm/pgalloc.h>
19 /* description of effects of mapping type and prot in current implementation.
20 * this is due to the limited x86 page protection hardware. The expected
21 * behavior is in parens:
24 * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
25 * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
26 * w: (no) no w: (no) no w: (yes) yes w: (no) no
27 * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
29 * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
30 * w: (no) no w: (no) no w: (copy) copy w: (no) no
31 * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
34 pgprot_t protection_map
[16] = {
35 __P000
, __P001
, __P010
, __P011
, __P100
, __P101
, __P110
, __P111
,
36 __S000
, __S001
, __S010
, __S011
, __S100
, __S101
, __S110
, __S111
39 int sysctl_overcommit_memory
;
41 /* Check that a process has enough memory to allocate a
42 * new virtual mapping.
44 int vm_enough_memory(long pages
)
46 /* Stupid algorithm to decide if we have enough memory: while
47 * simple, it hopefully works in most obvious cases.. Easy to
48 * fool it, but this should catch most mistakes.
50 /* 23/11/98 NJC: Somewhat less stupid version of algorithm,
51 * which tries to do "TheRightThing". Instead of using half of
52 * (buffers+cache), use the minimum values. Allow an extra 2%
53 * of num_physpages for safety margin.
58 /* Sometimes we want to use more memory than we have. */
59 if (sysctl_overcommit_memory
)
62 free
= atomic_read(&buffermem_pages
);
63 free
+= atomic_read(&page_cache_size
);
64 free
+= nr_free_pages();
65 free
+= nr_swap_pages
;
69 /* Remove one vm structure from the inode's i_mapping address space. */
70 static inline void __remove_shared_vm_struct(struct vm_area_struct
*vma
)
72 struct file
* file
= vma
->vm_file
;
75 struct inode
*inode
= file
->f_dentry
->d_inode
;
76 if (vma
->vm_flags
& VM_DENYWRITE
)
77 atomic_inc(&inode
->i_writecount
);
78 if(vma
->vm_next_share
)
79 vma
->vm_next_share
->vm_pprev_share
= vma
->vm_pprev_share
;
80 *vma
->vm_pprev_share
= vma
->vm_next_share
;
84 static inline void remove_shared_vm_struct(struct vm_area_struct
*vma
)
86 lock_vma_mappings(vma
);
87 __remove_shared_vm_struct(vma
);
88 unlock_vma_mappings(vma
);
91 void lock_vma_mappings(struct vm_area_struct
*vma
)
93 struct address_space
*mapping
;
97 mapping
= vma
->vm_file
->f_dentry
->d_inode
->i_mapping
;
99 spin_lock(&mapping
->i_shared_lock
);
102 void unlock_vma_mappings(struct vm_area_struct
*vma
)
104 struct address_space
*mapping
;
108 mapping
= vma
->vm_file
->f_dentry
->d_inode
->i_mapping
;
110 spin_unlock(&mapping
->i_shared_lock
);
114 * sys_brk() for the most part doesn't need the global kernel
115 * lock, except when an application is doing something nasty
116 * like trying to un-brk an area that has already been mapped
117 * to a regular file. in this case, the unmapping will need
118 * to invoke file system routines that need the global lock.
120 asmlinkage
unsigned long sys_brk(unsigned long brk
)
122 unsigned long rlim
, retval
;
123 unsigned long newbrk
, oldbrk
;
124 struct mm_struct
*mm
= current
->mm
;
128 if (brk
< mm
->end_code
)
130 newbrk
= PAGE_ALIGN(brk
);
131 oldbrk
= PAGE_ALIGN(mm
->brk
);
132 if (oldbrk
== newbrk
)
135 /* Always allow shrinking brk. */
136 if (brk
<= mm
->brk
) {
137 if (!do_munmap(mm
, newbrk
, oldbrk
-newbrk
))
142 /* Check against rlimit.. */
143 rlim
= current
->rlim
[RLIMIT_DATA
].rlim_cur
;
144 if (rlim
< RLIM_INFINITY
&& brk
- mm
->start_data
> rlim
)
147 /* Check against existing mmap mappings. */
148 if (find_vma_intersection(mm
, oldbrk
, newbrk
+PAGE_SIZE
))
151 /* Check if we have enough memory.. */
152 if (!vm_enough_memory((newbrk
-oldbrk
) >> PAGE_SHIFT
))
155 /* Ok, looks good - let it rip. */
156 if (do_brk(oldbrk
, newbrk
-oldbrk
) != oldbrk
)
166 /* Combine the mmap "prot" and "flags" argument into one "vm_flags" used
167 * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
170 static inline unsigned long vm_flags(unsigned long prot
, unsigned long flags
)
172 #define _trans(x,bit1,bit2) \
173 ((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
175 unsigned long prot_bits
, flag_bits
;
177 _trans(prot
, PROT_READ
, VM_READ
) |
178 _trans(prot
, PROT_WRITE
, VM_WRITE
) |
179 _trans(prot
, PROT_EXEC
, VM_EXEC
);
181 _trans(flags
, MAP_GROWSDOWN
, VM_GROWSDOWN
) |
182 _trans(flags
, MAP_DENYWRITE
, VM_DENYWRITE
) |
183 _trans(flags
, MAP_EXECUTABLE
, VM_EXECUTABLE
);
184 return prot_bits
| flag_bits
;
188 unsigned long do_mmap_pgoff(struct file
* file
, unsigned long addr
, unsigned long len
,
189 unsigned long prot
, unsigned long flags
, unsigned long pgoff
)
191 struct mm_struct
* mm
= current
->mm
;
192 struct vm_area_struct
* vma
;
193 int correct_wcount
= 0;
196 if (file
&& (!file
->f_op
|| !file
->f_op
->mmap
))
199 if ((len
= PAGE_ALIGN(len
)) == 0)
202 if (len
> TASK_SIZE
|| addr
> TASK_SIZE
-len
)
205 /* offset overflow? */
206 if ((pgoff
+ (len
>> PAGE_SHIFT
)) < pgoff
)
209 /* Too many mappings? */
210 if (mm
->map_count
> MAX_MAP_COUNT
)
213 /* mlock MCL_FUTURE? */
214 if (mm
->def_flags
& VM_LOCKED
) {
215 unsigned long locked
= mm
->locked_vm
<< PAGE_SHIFT
;
217 if (locked
> current
->rlim
[RLIMIT_MEMLOCK
].rlim_cur
)
221 /* Do simple checking here so the lower-level routines won't have
222 * to. we assume access permissions have been handled by the open
223 * of the memory object, so we don't do any here.
226 switch (flags
& MAP_TYPE
) {
228 if ((prot
& PROT_WRITE
) && !(file
->f_mode
& FMODE_WRITE
))
231 /* Make sure we don't allow writing to an append-only file.. */
232 if (IS_APPEND(file
->f_dentry
->d_inode
) && (file
->f_mode
& FMODE_WRITE
))
235 /* make sure there are no mandatory locks on the file. */
236 if (locks_verify_locked(file
->f_dentry
->d_inode
))
241 if (!(file
->f_mode
& FMODE_READ
))
250 /* Obtain the address to map to. we verify (or select) it and ensure
251 * that it represents a valid section of the address space.
253 if (flags
& MAP_FIXED
) {
254 if (addr
& ~PAGE_MASK
)
257 addr
= get_unmapped_area(addr
, len
);
262 /* Determine the object being mapped and call the appropriate
263 * specific mapper. the address has already been validated, but
264 * not unmapped, but the maps are removed from the list.
266 vma
= kmem_cache_alloc(vm_area_cachep
, SLAB_KERNEL
);
271 vma
->vm_start
= addr
;
272 vma
->vm_end
= addr
+ len
;
273 vma
->vm_flags
= vm_flags(prot
,flags
) | mm
->def_flags
;
276 VM_ClearReadHint(vma
);
279 if (file
->f_mode
& FMODE_READ
)
280 vma
->vm_flags
|= VM_MAYREAD
| VM_MAYWRITE
| VM_MAYEXEC
;
281 if (flags
& MAP_SHARED
) {
282 vma
->vm_flags
|= VM_SHARED
| VM_MAYSHARE
;
284 /* This looks strange, but when we don't have the file open
285 * for writing, we can demote the shared mapping to a simpler
286 * private mapping. That also takes care of a security hole
287 * with ptrace() writing to a shared mapping without write
290 * We leave the VM_MAYSHARE bit on, just to get correct output
291 * from /proc/xxx/maps..
293 if (!(file
->f_mode
& FMODE_WRITE
))
294 vma
->vm_flags
&= ~(VM_MAYWRITE
| VM_SHARED
);
297 vma
->vm_flags
|= VM_MAYREAD
| VM_MAYWRITE
| VM_MAYEXEC
;
298 if (flags
& MAP_SHARED
)
299 vma
->vm_flags
|= VM_SHARED
| VM_MAYSHARE
;
301 vma
->vm_page_prot
= protection_map
[vma
->vm_flags
& 0x0f];
303 vma
->vm_pgoff
= pgoff
;
305 vma
->vm_private_data
= NULL
;
309 if (do_munmap(mm
, addr
, len
))
312 /* Check against address space limit. */
313 if ((mm
->total_vm
<< PAGE_SHIFT
) + len
314 > current
->rlim
[RLIMIT_AS
].rlim_cur
)
317 /* Private writable mapping? Check memory availability.. */
318 if ((vma
->vm_flags
& (VM_SHARED
| VM_WRITE
)) == VM_WRITE
&&
319 !(flags
& MAP_NORESERVE
) &&
320 !vm_enough_memory(len
>> PAGE_SHIFT
))
324 if (vma
->vm_flags
& VM_DENYWRITE
) {
325 error
= deny_write_access(file
);
332 error
= file
->f_op
->mmap(file
, vma
);
334 goto unmap_and_free_vma
;
335 } else if (flags
& MAP_SHARED
) {
336 error
= shmem_zero_setup(vma
);
341 /* Can addr have changed??
343 * Answer: Yes, several device drivers can do it in their
344 * f_op->mmap method. -DaveM
346 flags
= vma
->vm_flags
;
347 addr
= vma
->vm_start
;
349 insert_vm_struct(mm
, vma
);
351 atomic_inc(&file
->f_dentry
->d_inode
->i_writecount
);
353 mm
->total_vm
+= len
>> PAGE_SHIFT
;
354 if (flags
& VM_LOCKED
) {
355 mm
->locked_vm
+= len
>> PAGE_SHIFT
;
356 make_pages_present(addr
, addr
+ len
);
362 atomic_inc(&file
->f_dentry
->d_inode
->i_writecount
);
365 /* Undo any partial mapping done by a device driver. */
366 flush_cache_range(mm
, vma
->vm_start
, vma
->vm_end
);
367 zap_page_range(mm
, vma
->vm_start
, vma
->vm_end
- vma
->vm_start
);
368 flush_tlb_range(mm
, vma
->vm_start
, vma
->vm_end
);
370 kmem_cache_free(vm_area_cachep
, vma
);
374 /* Get an address range which is currently unmapped.
375 * For mmap() without MAP_FIXED and shmat() with addr=0.
376 * Return value 0 means ENOMEM.
378 #ifndef HAVE_ARCH_UNMAPPED_AREA
379 unsigned long get_unmapped_area(unsigned long addr
, unsigned long len
)
381 struct vm_area_struct
* vmm
;
386 addr
= TASK_UNMAPPED_BASE
;
387 addr
= PAGE_ALIGN(addr
);
389 for (vmm
= find_vma(current
->mm
, addr
); ; vmm
= vmm
->vm_next
) {
390 /* At this point: (!vmm || addr < vmm->vm_end). */
391 if (TASK_SIZE
- len
< addr
)
393 if (!vmm
|| addr
+ len
<= vmm
->vm_start
)
400 #define vm_avl_empty (struct vm_area_struct *) NULL
402 #include "mmap_avl.c"
404 /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
405 struct vm_area_struct
* find_vma(struct mm_struct
* mm
, unsigned long addr
)
407 struct vm_area_struct
*vma
= NULL
;
410 /* Check the cache first. */
411 /* (Cache hit rate is typically around 35%.) */
412 vma
= mm
->mmap_cache
;
413 if (!(vma
&& vma
->vm_end
> addr
&& vma
->vm_start
<= addr
)) {
415 /* Go through the linear list. */
417 while (vma
&& vma
->vm_end
<= addr
)
420 /* Then go through the AVL tree quickly. */
421 struct vm_area_struct
* tree
= mm
->mmap_avl
;
424 if (tree
== vm_avl_empty
)
426 if (tree
->vm_end
> addr
) {
428 if (tree
->vm_start
<= addr
)
430 tree
= tree
->vm_avl_left
;
432 tree
= tree
->vm_avl_right
;
436 mm
->mmap_cache
= vma
;
442 /* Same as find_vma, but also return a pointer to the previous VMA in *pprev. */
443 struct vm_area_struct
* find_vma_prev(struct mm_struct
* mm
, unsigned long addr
,
444 struct vm_area_struct
**pprev
)
448 /* Go through the linear list. */
449 struct vm_area_struct
* prev
= NULL
;
450 struct vm_area_struct
* vma
= mm
->mmap
;
451 while (vma
&& vma
->vm_end
<= addr
) {
458 /* Go through the AVL tree quickly. */
459 struct vm_area_struct
* vma
= NULL
;
460 struct vm_area_struct
* last_turn_right
= NULL
;
461 struct vm_area_struct
* prev
= NULL
;
462 struct vm_area_struct
* tree
= mm
->mmap_avl
;
464 if (tree
== vm_avl_empty
)
466 if (tree
->vm_end
> addr
) {
468 prev
= last_turn_right
;
469 if (tree
->vm_start
<= addr
)
471 tree
= tree
->vm_avl_left
;
473 last_turn_right
= tree
;
474 tree
= tree
->vm_avl_right
;
478 if (vma
->vm_avl_left
!= vm_avl_empty
) {
479 prev
= vma
->vm_avl_left
;
480 while (prev
->vm_avl_right
!= vm_avl_empty
)
481 prev
= prev
->vm_avl_right
;
483 if ((prev
? prev
->vm_next
: mm
->mmap
) != vma
)
484 printk("find_vma_prev: tree inconsistent with list\n");
494 struct vm_area_struct
* find_extend_vma(struct mm_struct
* mm
, unsigned long addr
)
496 struct vm_area_struct
* vma
;
500 vma
= find_vma(mm
,addr
);
503 if (vma
->vm_start
<= addr
)
505 if (!(vma
->vm_flags
& VM_GROWSDOWN
))
507 start
= vma
->vm_start
;
508 if (expand_stack(vma
, addr
))
510 if (vma
->vm_flags
& VM_LOCKED
) {
511 make_pages_present(addr
, start
);
516 /* Normal function to fix up a mapping
517 * This function is the default for when an area has no specific
518 * function. This may be used as part of a more specific routine.
519 * This function works out what part of an area is affected and
520 * adjusts the mapping information. Since the actual page
521 * manipulation is done in do_mmap(), none need be done here,
522 * though it would probably be more appropriate.
524 * By the time this function is called, the area struct has been
525 * removed from the process mapping list, so it needs to be
526 * reinserted if necessary.
528 * The 4 main cases are:
529 * Unmapping the whole area
530 * Unmapping from the start of the segment to a point in it
531 * Unmapping from an intermediate point to the end
532 * Unmapping between to intermediate points, making a hole.
534 * Case 4 involves the creation of 2 new areas, for each side of
535 * the hole. If possible, we reuse the existing area rather than
536 * allocate a new one, and the return indicates whether the old
539 static struct vm_area_struct
* unmap_fixup(struct mm_struct
*mm
,
540 struct vm_area_struct
*area
, unsigned long addr
, size_t len
,
541 struct vm_area_struct
*extra
)
543 struct vm_area_struct
*mpnt
;
544 unsigned long end
= addr
+ len
;
546 area
->vm_mm
->total_vm
-= len
>> PAGE_SHIFT
;
547 if (area
->vm_flags
& VM_LOCKED
)
548 area
->vm_mm
->locked_vm
-= len
>> PAGE_SHIFT
;
550 /* Unmapping the whole area. */
551 if (addr
== area
->vm_start
&& end
== area
->vm_end
) {
552 if (area
->vm_ops
&& area
->vm_ops
->close
)
553 area
->vm_ops
->close(area
);
556 kmem_cache_free(vm_area_cachep
, area
);
560 /* Work out to one of the ends. */
561 if (end
== area
->vm_end
) {
563 lock_vma_mappings(area
);
564 spin_lock(&mm
->page_table_lock
);
565 } else if (addr
== area
->vm_start
) {
566 area
->vm_pgoff
+= (end
- area
->vm_start
) >> PAGE_SHIFT
;
567 area
->vm_start
= end
;
568 lock_vma_mappings(area
);
569 spin_lock(&mm
->page_table_lock
);
571 /* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
572 /* Add end mapping -- leave beginning for below */
576 mpnt
->vm_mm
= area
->vm_mm
;
577 mpnt
->vm_start
= end
;
578 mpnt
->vm_end
= area
->vm_end
;
579 mpnt
->vm_page_prot
= area
->vm_page_prot
;
580 mpnt
->vm_flags
= area
->vm_flags
;
582 mpnt
->vm_ops
= area
->vm_ops
;
583 mpnt
->vm_pgoff
= area
->vm_pgoff
+ ((end
- area
->vm_start
) >> PAGE_SHIFT
);
584 mpnt
->vm_file
= area
->vm_file
;
585 mpnt
->vm_private_data
= area
->vm_private_data
;
587 get_file(mpnt
->vm_file
);
588 if (mpnt
->vm_ops
&& mpnt
->vm_ops
->open
)
589 mpnt
->vm_ops
->open(mpnt
);
590 area
->vm_end
= addr
; /* Truncate area */
592 /* Because mpnt->vm_file == area->vm_file this locks
595 lock_vma_mappings(area
);
596 spin_lock(&mm
->page_table_lock
);
597 __insert_vm_struct(mm
, mpnt
);
600 __insert_vm_struct(mm
, area
);
601 spin_unlock(&mm
->page_table_lock
);
602 unlock_vma_mappings(area
);
607 * Try to free as many page directory entries as we can,
608 * without having to work very hard at actually scanning
609 * the page tables themselves.
611 * Right now we try to free page tables if we have a nice
612 * PGDIR-aligned area that got free'd up. We could be more
613 * granular if we want to, but this is fast and simple,
614 * and covers the bad cases.
616 * "prev", if it exists, points to a vma before the one
617 * we just free'd - but there's no telling how much before.
619 static void free_pgtables(struct mm_struct
* mm
, struct vm_area_struct
*prev
,
620 unsigned long start
, unsigned long end
)
622 unsigned long first
= start
& PGDIR_MASK
;
623 unsigned long last
= end
+ PGDIR_SIZE
- 1;
624 unsigned long start_index
, end_index
;
630 if (prev
->vm_end
> start
) {
631 if (last
> prev
->vm_start
)
632 last
= prev
->vm_start
;
637 struct vm_area_struct
*next
= prev
->vm_next
;
640 if (next
->vm_start
< start
) {
644 if (last
> next
->vm_start
)
645 last
= next
->vm_start
;
647 if (prev
->vm_end
> first
)
648 first
= prev
->vm_end
+ PGDIR_SIZE
- 1;
653 * If the PGD bits are not consecutive in the virtual address, the
654 * old method of shifting the VA >> by PGDIR_SHIFT doesn't work.
656 start_index
= pgd_index(first
);
657 end_index
= pgd_index(last
);
658 if (end_index
> start_index
) {
659 clear_page_tables(mm
, start_index
, end_index
- start_index
);
660 flush_tlb_pgtables(mm
, first
& PGDIR_MASK
, last
& PGDIR_MASK
);
664 /* Munmap is split into 2 main parts -- this part which finds
665 * what needs doing, and the areas themselves, which do the
666 * work. This now handles partial unmappings.
667 * Jeremy Fitzhardine <jeremy@sw.oz.au>
669 int do_munmap(struct mm_struct
*mm
, unsigned long addr
, size_t len
)
671 struct vm_area_struct
*mpnt
, *prev
, **npp
, *free
, *extra
;
673 if ((addr
& ~PAGE_MASK
) || addr
> TASK_SIZE
|| len
> TASK_SIZE
-addr
)
676 if ((len
= PAGE_ALIGN(len
)) == 0)
679 /* Check if this memory area is ok - put it on the temporary
680 * list if so.. The checks here are pretty simple --
681 * every area affected in some way (by any overlap) is put
682 * on the list. If nothing is put on, nothing is affected.
684 mpnt
= find_vma_prev(mm
, addr
, &prev
);
687 /* we have addr < mpnt->vm_end */
689 if (mpnt
->vm_start
>= addr
+len
)
692 /* If we'll make "hole", check the vm areas limit */
693 if ((mpnt
->vm_start
< addr
&& mpnt
->vm_end
> addr
+len
)
694 && mm
->map_count
>= MAX_MAP_COUNT
)
698 * We may need one additional vma to fix up the mappings ...
699 * and this is the last chance for an easy error exit.
701 extra
= kmem_cache_alloc(vm_area_cachep
, SLAB_KERNEL
);
705 npp
= (prev
? &prev
->vm_next
: &mm
->mmap
);
707 spin_lock(&mm
->page_table_lock
);
708 for ( ; mpnt
&& mpnt
->vm_start
< addr
+len
; mpnt
= *npp
) {
709 *npp
= mpnt
->vm_next
;
710 mpnt
->vm_next
= free
;
713 avl_remove(mpnt
, &mm
->mmap_avl
);
715 mm
->mmap_cache
= NULL
; /* Kill the cache. */
716 spin_unlock(&mm
->page_table_lock
);
718 /* Ok - we have the memory areas we should free on the 'free' list,
719 * so release them, and unmap the page range..
720 * If the one of the segments is only being partially unmapped,
721 * it will put new vm_area_struct(s) into the address space.
722 * In that case we have to be careful with VM_DENYWRITE.
724 while ((mpnt
= free
) != NULL
) {
725 unsigned long st
, end
, size
;
726 struct file
*file
= NULL
;
728 free
= free
->vm_next
;
730 st
= addr
< mpnt
->vm_start
? mpnt
->vm_start
: addr
;
732 end
= end
> mpnt
->vm_end
? mpnt
->vm_end
: end
;
735 if (mpnt
->vm_ops
&& mpnt
->vm_ops
->unmap
)
736 mpnt
->vm_ops
->unmap(mpnt
, st
, size
);
738 if (mpnt
->vm_flags
& VM_DENYWRITE
&&
739 (st
!= mpnt
->vm_start
|| end
!= mpnt
->vm_end
) &&
740 (file
= mpnt
->vm_file
) != NULL
) {
741 atomic_dec(&file
->f_dentry
->d_inode
->i_writecount
);
743 remove_shared_vm_struct(mpnt
);
746 flush_cache_range(mm
, st
, end
);
747 zap_page_range(mm
, st
, size
);
748 flush_tlb_range(mm
, st
, end
);
751 * Fix the mapping, and free the old area if it wasn't reused.
753 extra
= unmap_fixup(mm
, mpnt
, st
, size
, extra
);
755 atomic_inc(&file
->f_dentry
->d_inode
->i_writecount
);
758 /* Release the extra vma struct if it wasn't used */
760 kmem_cache_free(vm_area_cachep
, extra
);
762 free_pgtables(mm
, prev
, addr
, addr
+len
);
767 asmlinkage
long sys_munmap(unsigned long addr
, size_t len
)
770 struct mm_struct
*mm
= current
->mm
;
773 ret
= do_munmap(mm
, addr
, len
);
779 * this is really a simplified "do_mmap". it only handles
780 * anonymous maps. eventually we may be able to do some
781 * brk-specific accounting here.
783 unsigned long do_brk(unsigned long addr
, unsigned long len
)
785 struct mm_struct
* mm
= current
->mm
;
786 struct vm_area_struct
* vma
;
787 unsigned long flags
, retval
;
789 len
= PAGE_ALIGN(len
);
796 if (mm
->def_flags
& VM_LOCKED
) {
797 unsigned long locked
= mm
->locked_vm
<< PAGE_SHIFT
;
799 if (locked
> current
->rlim
[RLIMIT_MEMLOCK
].rlim_cur
)
804 * Clear old maps. this also does some error checking for us
806 retval
= do_munmap(mm
, addr
, len
);
810 /* Check against address space limits *after* clearing old maps... */
811 if ((mm
->total_vm
<< PAGE_SHIFT
) + len
812 > current
->rlim
[RLIMIT_AS
].rlim_cur
)
815 if (mm
->map_count
> MAX_MAP_COUNT
)
818 if (!vm_enough_memory(len
>> PAGE_SHIFT
))
821 flags
= vm_flags(PROT_READ
|PROT_WRITE
|PROT_EXEC
,
822 MAP_FIXED
|MAP_PRIVATE
) | mm
->def_flags
;
824 flags
|= VM_MAYREAD
| VM_MAYWRITE
| VM_MAYEXEC
;
827 /* Can we just expand an old anonymous mapping? */
829 struct vm_area_struct
* vma
= find_vma(mm
, addr
-1);
830 if (vma
&& vma
->vm_end
== addr
&& !vma
->vm_file
&&
831 vma
->vm_flags
== flags
) {
832 vma
->vm_end
= addr
+ len
;
839 * create a vma struct for an anonymous mapping
841 vma
= kmem_cache_alloc(vm_area_cachep
, SLAB_KERNEL
);
846 vma
->vm_start
= addr
;
847 vma
->vm_end
= addr
+ len
;
848 vma
->vm_flags
= flags
;
849 vma
->vm_page_prot
= protection_map
[flags
& 0x0f];
853 vma
->vm_private_data
= NULL
;
855 insert_vm_struct(mm
, vma
);
858 mm
->total_vm
+= len
>> PAGE_SHIFT
;
859 if (flags
& VM_LOCKED
) {
860 mm
->locked_vm
+= len
>> PAGE_SHIFT
;
861 make_pages_present(addr
, addr
+ len
);
866 /* Build the AVL tree corresponding to the VMA list. */
867 void build_mmap_avl(struct mm_struct
* mm
)
869 struct vm_area_struct
* vma
;
872 for (vma
= mm
->mmap
; vma
; vma
= vma
->vm_next
)
873 avl_insert(vma
, &mm
->mmap_avl
);
876 /* Release all mmaps. */
877 void exit_mmap(struct mm_struct
* mm
)
879 struct vm_area_struct
* mpnt
;
881 spin_lock(&mm
->page_table_lock
);
883 mm
->mmap
= mm
->mmap_avl
= mm
->mmap_cache
= NULL
;
884 spin_unlock(&mm
->page_table_lock
);
889 struct vm_area_struct
* next
= mpnt
->vm_next
;
890 unsigned long start
= mpnt
->vm_start
;
891 unsigned long end
= mpnt
->vm_end
;
892 unsigned long size
= end
- start
;
895 if (mpnt
->vm_ops
->unmap
)
896 mpnt
->vm_ops
->unmap(mpnt
, start
, size
);
897 if (mpnt
->vm_ops
->close
)
898 mpnt
->vm_ops
->close(mpnt
);
901 remove_shared_vm_struct(mpnt
);
902 flush_cache_range(mm
, start
, end
);
903 zap_page_range(mm
, start
, size
);
906 kmem_cache_free(vm_area_cachep
, mpnt
);
910 /* This is just debugging */
912 printk("exit_mmap: map count is %d\n", mm
->map_count
);
914 clear_page_tables(mm
, FIRST_USER_PGD_NR
, USER_PTRS_PER_PGD
);
917 /* Insert vm structure into process list sorted by address
918 * and into the inode's i_mmap ring. If vm_file is non-NULL
919 * then the i_shared_lock must be held here.
921 void __insert_vm_struct(struct mm_struct
*mm
, struct vm_area_struct
*vmp
)
923 struct vm_area_struct
**pprev
;
928 while (*pprev
&& (*pprev
)->vm_start
<= vmp
->vm_start
)
929 pprev
= &(*pprev
)->vm_next
;
931 struct vm_area_struct
*prev
, *next
;
932 avl_insert_neighbours(vmp
, &mm
->mmap_avl
, &prev
, &next
);
933 pprev
= (prev
? &prev
->vm_next
: &mm
->mmap
);
935 printk("insert_vm_struct: tree inconsistent with list\n");
937 vmp
->vm_next
= *pprev
;
941 if (mm
->map_count
>= AVL_MIN_MAP_COUNT
&& !mm
->mmap_avl
)
946 struct inode
* inode
= file
->f_dentry
->d_inode
;
947 struct address_space
*mapping
= inode
->i_mapping
;
948 struct vm_area_struct
**head
;
950 if (vmp
->vm_flags
& VM_DENYWRITE
)
951 atomic_dec(&inode
->i_writecount
);
953 head
= &mapping
->i_mmap
;
954 if (vmp
->vm_flags
& VM_SHARED
)
955 head
= &mapping
->i_mmap_shared
;
957 /* insert vmp into inode's share list */
958 if((vmp
->vm_next_share
= *head
) != NULL
)
959 (*head
)->vm_pprev_share
= &vmp
->vm_next_share
;
961 vmp
->vm_pprev_share
= head
;
965 void insert_vm_struct(struct mm_struct
*mm
, struct vm_area_struct
*vmp
)
967 lock_vma_mappings(vmp
);
968 spin_lock(¤t
->mm
->page_table_lock
);
969 __insert_vm_struct(mm
, vmp
);
970 spin_unlock(¤t
->mm
->page_table_lock
);
971 unlock_vma_mappings(vmp
);