6 #include <linux/slab.h>
8 #include <linux/mman.h>
9 #include <linux/pagemap.h>
10 #include <linux/swap.h>
11 #include <linux/swapctl.h>
12 #include <linux/smp_lock.h>
13 #include <linux/init.h>
14 #include <linux/file.h>
16 #include <asm/uaccess.h>
17 #include <asm/pgtable.h>
19 /* description of effects of mapping type and prot in current implementation.
20 * this is due to the limited x86 page protection hardware. The expected
21 * behavior is in parens:
24 * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
25 * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
26 * w: (no) no w: (no) no w: (yes) yes w: (no) no
27 * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
29 * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
30 * w: (no) no w: (no) no w: (copy) copy w: (no) no
31 * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
34 pgprot_t protection_map
[16] = {
35 __P000
, __P001
, __P010
, __P011
, __P100
, __P101
, __P110
, __P111
,
36 __S000
, __S001
, __S010
, __S011
, __S100
, __S101
, __S110
, __S111
39 /* SLAB cache for vm_area_struct's. */
40 kmem_cache_t
*vm_area_cachep
;
42 int sysctl_overcommit_memory
;
44 /* Check that a process has enough memory to allocate a
45 * new virtual mapping.
47 int vm_enough_memory(long pages
)
49 /* Stupid algorithm to decide if we have enough memory: while
50 * simple, it hopefully works in most obvious cases.. Easy to
51 * fool it, but this should catch most mistakes.
53 /* 23/11/98 NJC: Somewhat less stupid version of algorithm,
54 * which tries to do "TheRightThing". Instead of using half of
55 * (buffers+cache), use the minimum values. Allow an extra 2%
56 * of num_physpages for safety margin.
61 /* Sometimes we want to use more memory than we have. */
62 if (sysctl_overcommit_memory
)
65 free
= atomic_read(&buffermem
) >> PAGE_SHIFT
;
66 free
+= atomic_read(&page_cache_size
);
67 free
+= nr_free_pages
;
68 free
+= nr_swap_pages
;
69 free
-= (page_cache
.min_percent
+ buffer_mem
.min_percent
+ 2)*num_physpages
/100;
73 /* Remove one vm structure from the inode's i_mmap ring. */
74 static inline void remove_shared_vm_struct(struct vm_area_struct
*vma
)
76 struct file
* file
= vma
->vm_file
;
79 if (vma
->vm_flags
& VM_DENYWRITE
)
80 atomic_inc(&file
->f_dentry
->d_inode
->i_writecount
);
81 spin_lock(&file
->f_dentry
->d_inode
->i_shared_lock
);
82 if(vma
->vm_next_share
)
83 vma
->vm_next_share
->vm_pprev_share
= vma
->vm_pprev_share
;
84 *vma
->vm_pprev_share
= vma
->vm_next_share
;
85 spin_unlock(&file
->f_dentry
->d_inode
->i_shared_lock
);
90 * sys_brk() for the most part doesn't need the global kernel
91 * lock, except when an application is doing something nasty
92 * like trying to un-brk an area that has already been mapped
93 * to a regular file. in this case, the unmapping will need
94 * to invoke file system routines that need the global lock.
96 asmlinkage
unsigned long sys_brk(unsigned long brk
)
98 unsigned long rlim
, retval
;
99 unsigned long newbrk
, oldbrk
;
100 struct mm_struct
*mm
= current
->mm
;
104 if (brk
< mm
->end_code
)
106 newbrk
= PAGE_ALIGN(brk
);
107 oldbrk
= PAGE_ALIGN(mm
->brk
);
108 if (oldbrk
== newbrk
)
111 /* Always allow shrinking brk. */
112 if (brk
<= mm
->brk
) {
113 if (!do_munmap(newbrk
, oldbrk
-newbrk
))
118 /* Check against rlimit and stack.. */
119 rlim
= current
->rlim
[RLIMIT_DATA
].rlim_cur
;
120 if (rlim
< RLIM_INFINITY
&& brk
- mm
->end_code
> rlim
)
123 /* Check against existing mmap mappings. */
124 if (find_vma_intersection(mm
, oldbrk
, newbrk
+PAGE_SIZE
))
127 /* Check if we have enough memory.. */
128 if (!vm_enough_memory((newbrk
-oldbrk
) >> PAGE_SHIFT
))
131 /* Ok, looks good - let it rip. */
132 if (do_brk(oldbrk
, newbrk
-oldbrk
) != oldbrk
)
142 /* Combine the mmap "prot" and "flags" argument into one "vm_flags" used
143 * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
146 static inline unsigned long vm_flags(unsigned long prot
, unsigned long flags
)
148 #define _trans(x,bit1,bit2) \
149 ((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
151 unsigned long prot_bits
, flag_bits
;
153 _trans(prot
, PROT_READ
, VM_READ
) |
154 _trans(prot
, PROT_WRITE
, VM_WRITE
) |
155 _trans(prot
, PROT_EXEC
, VM_EXEC
);
157 _trans(flags
, MAP_GROWSDOWN
, VM_GROWSDOWN
) |
158 _trans(flags
, MAP_DENYWRITE
, VM_DENYWRITE
) |
159 _trans(flags
, MAP_EXECUTABLE
, VM_EXECUTABLE
);
160 return prot_bits
| flag_bits
;
164 unsigned long do_mmap(struct file
* file
, unsigned long addr
, unsigned long len
,
165 unsigned long prot
, unsigned long flags
, unsigned long off
)
167 struct mm_struct
* mm
= current
->mm
;
168 struct vm_area_struct
* vma
;
171 if (file
&& (!file
->f_op
|| !file
->f_op
->mmap
))
174 if ((len
= PAGE_ALIGN(len
)) == 0)
177 if (len
> TASK_SIZE
|| addr
> TASK_SIZE
-len
)
180 if (off
& ~PAGE_MASK
)
183 /* offset overflow? */
187 /* Too many mappings? */
188 if (mm
->map_count
> MAX_MAP_COUNT
)
191 /* mlock MCL_FUTURE? */
192 if (mm
->def_flags
& VM_LOCKED
) {
193 unsigned long locked
= mm
->locked_vm
<< PAGE_SHIFT
;
195 if (locked
> current
->rlim
[RLIMIT_MEMLOCK
].rlim_cur
)
199 /* Do simple checking here so the lower-level routines won't have
200 * to. we assume access permissions have been handled by the open
201 * of the memory object, so we don't do any here.
204 switch (flags
& MAP_TYPE
) {
206 if ((prot
& PROT_WRITE
) && !(file
->f_mode
& 2))
209 /* Make sure we don't allow writing to an append-only file.. */
210 if (IS_APPEND(file
->f_dentry
->d_inode
) && (file
->f_mode
& 2))
213 /* make sure there are no mandatory locks on the file. */
214 if (locks_verify_locked(file
->f_dentry
->d_inode
))
219 if (!(file
->f_mode
& 1))
226 } else if ((flags
& MAP_TYPE
) != MAP_PRIVATE
)
229 /* Obtain the address to map to. we verify (or select) it and ensure
230 * that it represents a valid section of the address space.
232 if (flags
& MAP_FIXED
) {
233 if (addr
& ~PAGE_MASK
)
236 addr
= get_unmapped_area(addr
, len
);
241 /* Determine the object being mapped and call the appropriate
242 * specific mapper. the address has already been validated, but
243 * not unmapped, but the maps are removed from the list.
245 vma
= kmem_cache_alloc(vm_area_cachep
, SLAB_KERNEL
);
250 vma
->vm_start
= addr
;
251 vma
->vm_end
= addr
+ len
;
252 vma
->vm_flags
= vm_flags(prot
,flags
) | mm
->def_flags
;
255 if (file
->f_mode
& 1)
256 vma
->vm_flags
|= VM_MAYREAD
| VM_MAYWRITE
| VM_MAYEXEC
;
257 if (flags
& MAP_SHARED
) {
258 vma
->vm_flags
|= VM_SHARED
| VM_MAYSHARE
;
260 /* This looks strange, but when we don't have the file open
261 * for writing, we can demote the shared mapping to a simpler
262 * private mapping. That also takes care of a security hole
263 * with ptrace() writing to a shared mapping without write
266 * We leave the VM_MAYSHARE bit on, just to get correct output
267 * from /proc/xxx/maps..
269 if (!(file
->f_mode
& 2))
270 vma
->vm_flags
&= ~(VM_MAYWRITE
| VM_SHARED
);
273 vma
->vm_flags
|= VM_MAYREAD
| VM_MAYWRITE
| VM_MAYEXEC
;
274 vma
->vm_page_prot
= protection_map
[vma
->vm_flags
& 0x0f];
276 vma
->vm_offset
= off
;
282 if (do_munmap(addr
, len
))
285 /* Check against address space limit. */
286 if ((mm
->total_vm
<< PAGE_SHIFT
) + len
287 > current
->rlim
[RLIMIT_AS
].rlim_cur
)
290 /* Private writable mapping? Check memory availability.. */
291 if ((vma
->vm_flags
& (VM_SHARED
| VM_WRITE
)) == VM_WRITE
&&
292 !(flags
& MAP_NORESERVE
) &&
293 !vm_enough_memory(len
>> PAGE_SHIFT
))
297 int correct_wcount
= 0;
298 if (vma
->vm_flags
& VM_DENYWRITE
) {
299 if (atomic_read(&file
->f_dentry
->d_inode
->i_writecount
) > 0) {
303 /* f_op->mmap might possibly sleep
304 * (generic_file_mmap doesn't, but other code
305 * might). In any case, this takes care of any
306 * race that this might cause.
308 atomic_dec(&file
->f_dentry
->d_inode
->i_writecount
);
311 error
= file
->f_op
->mmap(file
, vma
);
312 /* Fix up the count if necessary, then check for an error */
314 atomic_inc(&file
->f_dentry
->d_inode
->i_writecount
);
316 goto unmap_and_free_vma
;
322 * merge_segments may merge our vma, so we can't refer to it
323 * after the call. Save the values we need now ...
325 flags
= vma
->vm_flags
;
326 addr
= vma
->vm_start
; /* can addr have changed?? */
327 insert_vm_struct(mm
, vma
);
328 merge_segments(mm
, vma
->vm_start
, vma
->vm_end
);
330 mm
->total_vm
+= len
>> PAGE_SHIFT
;
331 if (flags
& VM_LOCKED
) {
332 mm
->locked_vm
+= len
>> PAGE_SHIFT
;
333 make_pages_present(addr
, addr
+ len
);
338 /* Undo any partial mapping done by a device driver. */
339 flush_cache_range(mm
, vma
->vm_start
, vma
->vm_end
);
340 zap_page_range(mm
, vma
->vm_start
, vma
->vm_end
- vma
->vm_start
);
341 flush_tlb_range(mm
, vma
->vm_start
, vma
->vm_end
);
343 kmem_cache_free(vm_area_cachep
, vma
);
347 /* Get an address range which is currently unmapped.
348 * For mmap() without MAP_FIXED and shmat() with addr=0.
349 * Return value 0 means ENOMEM.
351 unsigned long get_unmapped_area(unsigned long addr
, unsigned long len
)
353 struct vm_area_struct
* vmm
;
358 addr
= TASK_UNMAPPED_BASE
;
359 addr
= PAGE_ALIGN(addr
);
361 for (vmm
= find_vma(current
->mm
, addr
); ; vmm
= vmm
->vm_next
) {
362 /* At this point: (!vmm || addr < vmm->vm_end). */
363 if (TASK_SIZE
- len
< addr
)
365 if (!vmm
|| addr
+ len
<= vmm
->vm_start
)
371 #define vm_avl_empty (struct vm_area_struct *) NULL
373 #include "mmap_avl.c"
375 /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
376 struct vm_area_struct
* find_vma(struct mm_struct
* mm
, unsigned long addr
)
378 struct vm_area_struct
*vma
= NULL
;
381 /* Check the cache first. */
382 /* (Cache hit rate is typically around 35%.) */
383 vma
= mm
->mmap_cache
;
384 if (!(vma
&& vma
->vm_end
> addr
&& vma
->vm_start
<= addr
)) {
386 /* Go through the linear list. */
388 while (vma
&& vma
->vm_end
<= addr
)
391 /* Then go through the AVL tree quickly. */
392 struct vm_area_struct
* tree
= mm
->mmap_avl
;
395 if (tree
== vm_avl_empty
)
397 if (tree
->vm_end
> addr
) {
399 if (tree
->vm_start
<= addr
)
401 tree
= tree
->vm_avl_left
;
403 tree
= tree
->vm_avl_right
;
407 mm
->mmap_cache
= vma
;
413 /* Same as find_vma, but also return a pointer to the previous VMA in *pprev. */
414 struct vm_area_struct
* find_vma_prev(struct mm_struct
* mm
, unsigned long addr
,
415 struct vm_area_struct
**pprev
)
419 /* Go through the linear list. */
420 struct vm_area_struct
* prev
= NULL
;
421 struct vm_area_struct
* vma
= mm
->mmap
;
422 while (vma
&& vma
->vm_end
<= addr
) {
429 /* Go through the AVL tree quickly. */
430 struct vm_area_struct
* vma
= NULL
;
431 struct vm_area_struct
* last_turn_right
= NULL
;
432 struct vm_area_struct
* prev
= NULL
;
433 struct vm_area_struct
* tree
= mm
->mmap_avl
;
435 if (tree
== vm_avl_empty
)
437 if (tree
->vm_end
> addr
) {
439 prev
= last_turn_right
;
440 if (tree
->vm_start
<= addr
)
442 tree
= tree
->vm_avl_left
;
444 last_turn_right
= tree
;
445 tree
= tree
->vm_avl_right
;
449 if (vma
->vm_avl_left
!= vm_avl_empty
) {
450 prev
= vma
->vm_avl_left
;
451 while (prev
->vm_avl_right
!= vm_avl_empty
)
452 prev
= prev
->vm_avl_right
;
454 if ((prev
? prev
->vm_next
: mm
->mmap
) != vma
)
455 printk("find_vma_prev: tree inconsistent with list\n");
465 struct vm_area_struct
* find_extend_vma(struct task_struct
* tsk
, unsigned long addr
)
467 struct vm_area_struct
* vma
;
471 vma
= find_vma(tsk
->mm
,addr
);
474 if (vma
->vm_start
<= addr
)
476 if (!(vma
->vm_flags
& VM_GROWSDOWN
))
478 start
= vma
->vm_start
;
479 if (expand_stack(vma
, addr
))
481 if (vma
->vm_flags
& VM_LOCKED
) {
482 make_pages_present(addr
, start
);
487 /* Normal function to fix up a mapping
488 * This function is the default for when an area has no specific
489 * function. This may be used as part of a more specific routine.
490 * This function works out what part of an area is affected and
491 * adjusts the mapping information. Since the actual page
492 * manipulation is done in do_mmap(), none need be done here,
493 * though it would probably be more appropriate.
495 * By the time this function is called, the area struct has been
496 * removed from the process mapping list, so it needs to be
497 * reinserted if necessary.
499 * The 4 main cases are:
500 * Unmapping the whole area
501 * Unmapping from the start of the segment to a point in it
502 * Unmapping from an intermediate point to the end
503 * Unmapping between to intermediate points, making a hole.
505 * Case 4 involves the creation of 2 new areas, for each side of
506 * the hole. If possible, we reuse the existing area rather than
507 * allocate a new one, and the return indicates whether the old
510 static struct vm_area_struct
* unmap_fixup(struct vm_area_struct
*area
,
511 unsigned long addr
, size_t len
, struct vm_area_struct
*extra
)
513 struct vm_area_struct
*mpnt
;
514 unsigned long end
= addr
+ len
;
516 area
->vm_mm
->total_vm
-= len
>> PAGE_SHIFT
;
517 if (area
->vm_flags
& VM_LOCKED
)
518 area
->vm_mm
->locked_vm
-= len
>> PAGE_SHIFT
;
520 /* Unmapping the whole area. */
521 if (addr
== area
->vm_start
&& end
== area
->vm_end
) {
522 if (area
->vm_ops
&& area
->vm_ops
->close
)
523 area
->vm_ops
->close(area
);
526 kmem_cache_free(vm_area_cachep
, area
);
530 /* Work out to one of the ends. */
531 if (end
== area
->vm_end
)
533 else if (addr
== area
->vm_start
) {
534 area
->vm_offset
+= (end
- area
->vm_start
);
535 area
->vm_start
= end
;
537 /* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
538 /* Add end mapping -- leave beginning for below */
542 mpnt
->vm_mm
= area
->vm_mm
;
543 mpnt
->vm_start
= end
;
544 mpnt
->vm_end
= area
->vm_end
;
545 mpnt
->vm_page_prot
= area
->vm_page_prot
;
546 mpnt
->vm_flags
= area
->vm_flags
;
547 mpnt
->vm_ops
= area
->vm_ops
;
548 mpnt
->vm_offset
= area
->vm_offset
+ (end
- area
->vm_start
);
549 mpnt
->vm_file
= area
->vm_file
;
550 mpnt
->vm_pte
= area
->vm_pte
;
552 get_file(mpnt
->vm_file
);
553 if (mpnt
->vm_ops
&& mpnt
->vm_ops
->open
)
554 mpnt
->vm_ops
->open(mpnt
);
555 area
->vm_end
= addr
; /* Truncate area */
556 insert_vm_struct(current
->mm
, mpnt
);
559 insert_vm_struct(current
->mm
, area
);
564 * Try to free as many page directory entries as we can,
565 * without having to work very hard at actually scanning
566 * the page tables themselves.
568 * Right now we try to free page tables if we have a nice
569 * PGDIR-aligned area that got free'd up. We could be more
570 * granular if we want to, but this is fast and simple,
571 * and covers the bad cases.
573 * "prev", if it exists, points to a vma before the one
574 * we just free'd - but there's no telling how much before.
576 static void free_pgtables(struct mm_struct
* mm
, struct vm_area_struct
*prev
,
577 unsigned long start
, unsigned long end
)
579 unsigned long first
= start
& PGDIR_MASK
;
580 unsigned long last
= (end
+ PGDIR_SIZE
- 1) & PGDIR_MASK
;
586 if (prev
->vm_end
> start
) {
587 if (last
> prev
->vm_start
)
588 last
= prev
->vm_start
;
593 struct vm_area_struct
*next
= prev
->vm_next
;
596 if (next
->vm_start
< start
) {
600 if (last
> next
->vm_start
)
601 last
= next
->vm_start
;
603 if (prev
->vm_end
> first
)
604 first
= prev
->vm_end
+ PGDIR_SIZE
- 1;
608 first
= first
>> PGDIR_SHIFT
;
609 last
= last
>> PGDIR_SHIFT
;
611 clear_page_tables(mm
, first
, last
-first
);
614 /* Munmap is split into 2 main parts -- this part which finds
615 * what needs doing, and the areas themselves, which do the
616 * work. This now handles partial unmappings.
617 * Jeremy Fitzhardine <jeremy@sw.oz.au>
619 int do_munmap(unsigned long addr
, size_t len
)
621 struct mm_struct
* mm
;
622 struct vm_area_struct
*mpnt
, *prev
, **npp
, *free
, *extra
;
624 if ((addr
& ~PAGE_MASK
) || addr
> TASK_SIZE
|| len
> TASK_SIZE
-addr
)
627 if ((len
= PAGE_ALIGN(len
)) == 0)
630 /* Check if this memory area is ok - put it on the temporary
631 * list if so.. The checks here are pretty simple --
632 * every area affected in some way (by any overlap) is put
633 * on the list. If nothing is put on, nothing is affected.
636 mpnt
= find_vma_prev(mm
, addr
, &prev
);
639 /* we have addr < mpnt->vm_end */
641 if (mpnt
->vm_start
>= addr
+len
)
644 /* If we'll make "hole", check the vm areas limit */
645 if ((mpnt
->vm_start
< addr
&& mpnt
->vm_end
> addr
+len
)
646 && mm
->map_count
>= MAX_MAP_COUNT
)
650 * We may need one additional vma to fix up the mappings ...
651 * and this is the last chance for an easy error exit.
653 extra
= kmem_cache_alloc(vm_area_cachep
, SLAB_KERNEL
);
657 npp
= (prev
? &prev
->vm_next
: &mm
->mmap
);
659 for ( ; mpnt
&& mpnt
->vm_start
< addr
+len
; mpnt
= *npp
) {
660 *npp
= mpnt
->vm_next
;
661 mpnt
->vm_next
= free
;
664 avl_remove(mpnt
, &mm
->mmap_avl
);
667 /* Ok - we have the memory areas we should free on the 'free' list,
668 * so release them, and unmap the page range..
669 * If the one of the segments is only being partially unmapped,
670 * it will put new vm_area_struct(s) into the address space.
672 while ((mpnt
= free
) != NULL
) {
673 unsigned long st
, end
, size
;
675 free
= free
->vm_next
;
677 st
= addr
< mpnt
->vm_start
? mpnt
->vm_start
: addr
;
679 end
= end
> mpnt
->vm_end
? mpnt
->vm_end
: end
;
683 if (mpnt
->vm_ops
&& mpnt
->vm_ops
->unmap
)
684 mpnt
->vm_ops
->unmap(mpnt
, st
, size
);
687 remove_shared_vm_struct(mpnt
);
690 flush_cache_range(mm
, st
, end
);
691 zap_page_range(mm
, st
, size
);
692 flush_tlb_range(mm
, st
, end
);
695 * Fix the mapping, and free the old area if it wasn't reused.
697 extra
= unmap_fixup(mpnt
, st
, size
, extra
);
700 /* Release the extra vma struct if it wasn't used */
702 kmem_cache_free(vm_area_cachep
, extra
);
704 free_pgtables(mm
, prev
, addr
, addr
+len
);
706 mm
->mmap_cache
= NULL
; /* Kill the cache. */
710 asmlinkage
int sys_munmap(unsigned long addr
, size_t len
)
714 down(¤t
->mm
->mmap_sem
);
715 ret
= do_munmap(addr
, len
);
716 up(¤t
->mm
->mmap_sem
);
721 * this is really a simplified "do_mmap". it only handles
722 * anonymous maps. eventually we may be able to do some
723 * brk-specific accounting here.
725 unsigned long do_brk(unsigned long addr
, unsigned long len
)
727 struct mm_struct
* mm
= current
->mm
;
728 struct vm_area_struct
* vma
;
729 unsigned long flags
, retval
;
731 len
= PAGE_ALIGN(len
);
738 if (mm
->def_flags
& VM_LOCKED
) {
739 unsigned long locked
= mm
->locked_vm
<< PAGE_SHIFT
;
741 if (locked
> current
->rlim
[RLIMIT_MEMLOCK
].rlim_cur
)
746 * Clear old maps. this also does some error checking for us
748 retval
= do_munmap(addr
, len
);
752 /* Check against address space limits *after* clearing old maps... */
753 if ((mm
->total_vm
<< PAGE_SHIFT
) + len
754 > current
->rlim
[RLIMIT_AS
].rlim_cur
)
757 if (mm
->map_count
> MAX_MAP_COUNT
)
760 if (!vm_enough_memory(len
>> PAGE_SHIFT
))
764 * create a vma struct for an anonymous mapping
766 vma
= kmem_cache_alloc(vm_area_cachep
, SLAB_KERNEL
);
771 vma
->vm_start
= addr
;
772 vma
->vm_end
= addr
+ len
;
773 vma
->vm_flags
= vm_flags(PROT_READ
|PROT_WRITE
|PROT_EXEC
,
774 MAP_FIXED
|MAP_PRIVATE
) | mm
->def_flags
;
776 vma
->vm_flags
|= VM_MAYREAD
| VM_MAYWRITE
| VM_MAYEXEC
;
777 vma
->vm_page_prot
= protection_map
[vma
->vm_flags
& 0x0f];
784 * merge_segments may merge our vma, so we can't refer to it
785 * after the call. Save the values we need now ...
787 flags
= vma
->vm_flags
;
788 addr
= vma
->vm_start
;
790 insert_vm_struct(mm
, vma
);
791 merge_segments(mm
, vma
->vm_start
, vma
->vm_end
);
793 mm
->total_vm
+= len
>> PAGE_SHIFT
;
794 if (flags
& VM_LOCKED
) {
795 mm
->locked_vm
+= len
>> PAGE_SHIFT
;
796 make_pages_present(addr
, addr
+ len
);
801 /* Build the AVL tree corresponding to the VMA list. */
802 void build_mmap_avl(struct mm_struct
* mm
)
804 struct vm_area_struct
* vma
;
807 for (vma
= mm
->mmap
; vma
; vma
= vma
->vm_next
)
808 avl_insert(vma
, &mm
->mmap_avl
);
811 /* Release all mmaps. */
812 void exit_mmap(struct mm_struct
* mm
)
814 struct vm_area_struct
* mpnt
;
816 release_segments(mm
);
818 mm
->mmap
= mm
->mmap_avl
= mm
->mmap_cache
= NULL
;
823 struct vm_area_struct
* next
= mpnt
->vm_next
;
824 unsigned long start
= mpnt
->vm_start
;
825 unsigned long end
= mpnt
->vm_end
;
826 unsigned long size
= end
- start
;
829 if (mpnt
->vm_ops
->unmap
)
830 mpnt
->vm_ops
->unmap(mpnt
, start
, size
);
831 if (mpnt
->vm_ops
->close
)
832 mpnt
->vm_ops
->close(mpnt
);
835 remove_shared_vm_struct(mpnt
);
836 zap_page_range(mm
, start
, size
);
839 kmem_cache_free(vm_area_cachep
, mpnt
);
843 /* This is just debugging */
845 printk("exit_mmap: map count is %d\n", mm
->map_count
);
847 clear_page_tables(mm
, 0, USER_PTRS_PER_PGD
);
850 /* Insert vm structure into process list sorted by address
851 * and into the inode's i_mmap ring.
853 void insert_vm_struct(struct mm_struct
*mm
, struct vm_area_struct
*vmp
)
855 struct vm_area_struct
**pprev
;
860 while (*pprev
&& (*pprev
)->vm_start
<= vmp
->vm_start
)
861 pprev
= &(*pprev
)->vm_next
;
863 struct vm_area_struct
*prev
, *next
;
864 avl_insert_neighbours(vmp
, &mm
->mmap_avl
, &prev
, &next
);
865 pprev
= (prev
? &prev
->vm_next
: &mm
->mmap
);
867 printk("insert_vm_struct: tree inconsistent with list\n");
869 vmp
->vm_next
= *pprev
;
873 if (mm
->map_count
>= AVL_MIN_MAP_COUNT
&& !mm
->mmap_avl
)
878 struct inode
* inode
= file
->f_dentry
->d_inode
;
879 if (vmp
->vm_flags
& VM_DENYWRITE
)
880 atomic_dec(&inode
->i_writecount
);
882 /* insert vmp into inode's share list */
883 spin_lock(&inode
->i_shared_lock
);
884 if((vmp
->vm_next_share
= inode
->i_mmap
) != NULL
)
885 inode
->i_mmap
->vm_pprev_share
= &vmp
->vm_next_share
;
887 vmp
->vm_pprev_share
= &inode
->i_mmap
;
888 spin_unlock(&inode
->i_shared_lock
);
892 /* Merge the list of memory segments if possible.
893 * Redundant vm_area_structs are freed.
894 * This assumes that the list is ordered by address.
895 * We don't need to traverse the entire list, only those segments
896 * which intersect or are adjacent to a given interval.
898 * We must already hold the mm semaphore when we get here..
900 void merge_segments (struct mm_struct
* mm
, unsigned long start_addr
, unsigned long end_addr
)
902 struct vm_area_struct
*prev
, *mpnt
, *next
, *prev1
;
904 mpnt
= find_vma_prev(mm
, start_addr
, &prev1
);
912 mpnt
= mpnt
->vm_next
;
915 /* prev and mpnt cycle through the list, as long as
916 * start_addr < mpnt->vm_end && prev->vm_start < end_addr
918 for ( ; mpnt
&& prev
->vm_start
< end_addr
; prev
= mpnt
, mpnt
= next
) {
919 next
= mpnt
->vm_next
;
921 /* To share, we must have the same file, operations.. */
922 if ((mpnt
->vm_file
!= prev
->vm_file
)||
923 (mpnt
->vm_pte
!= prev
->vm_pte
) ||
924 (mpnt
->vm_ops
!= prev
->vm_ops
) ||
925 (mpnt
->vm_flags
!= prev
->vm_flags
) ||
926 (prev
->vm_end
!= mpnt
->vm_start
))
930 * If we have a file or it's a shared memory area
931 * the offsets must be contiguous..
933 if ((mpnt
->vm_file
!= NULL
) || (mpnt
->vm_flags
& VM_SHM
)) {
934 unsigned long off
= prev
->vm_offset
+prev
->vm_end
-prev
->vm_start
;
935 if (off
!= mpnt
->vm_offset
)
939 /* merge prev with mpnt and set up pointers so the new
940 * big segment can possibly merge with the next one.
941 * The old unused mpnt is freed.
944 avl_remove(mpnt
, &mm
->mmap_avl
);
945 prev
->vm_end
= mpnt
->vm_end
;
946 prev
->vm_next
= mpnt
->vm_next
;
947 if (mpnt
->vm_ops
&& mpnt
->vm_ops
->close
) {
948 mpnt
->vm_offset
+= mpnt
->vm_end
- mpnt
->vm_start
;
949 mpnt
->vm_start
= mpnt
->vm_end
;
950 mpnt
->vm_ops
->close(mpnt
);
953 remove_shared_vm_struct(mpnt
);
956 kmem_cache_free(vm_area_cachep
, mpnt
);
959 mm
->mmap_cache
= NULL
; /* Kill the cache. */
962 void __init
vma_init(void)
964 vm_area_cachep
= kmem_cache_create("vm_area_struct",
965 sizeof(struct vm_area_struct
),
966 0, SLAB_HWCACHE_ALIGN
,
969 panic("vma_init: Cannot alloc vm_area_struct cache.");
971 mm_cachep
= kmem_cache_create("mm_struct",
972 sizeof(struct mm_struct
),
973 0, SLAB_HWCACHE_ALIGN
,
976 panic("vma_init: Cannot alloc mm_struct cache.");