include/linux/mm.h

   1 #ifndef _LINUX_MM_H
   2 #define _LINUX_MM_H
   3
   4 #include <linux/sched.h>
   5 #include <linux/errno.h>
   6 #include <linux/kernel.h>
   7
   8 #ifdef __KERNEL__
   9
  10 #include <linux/string.h>
  11
  12 extern unsigned long max_mapnr;
  13 extern unsigned long num_physpages;
  14 extern void * high_memory;
  15
  16 #include <asm/page.h>
  17 #include <asm/atomic.h>
  18
  19 /*
  20  * Linux kernel virtual memory manager primitives.
  21  * The idea being to have a "virtual" mm in the same way
  22  * we have a virtual fs - giving a cleaner interface to the
  23  * mm details, and allowing different kinds of memory mappings
  24  * (from shared memory to executable loading to arbitrary
  25  * mmap() functions).
  26  */
  27
  28 /*
  29  * This struct defines a memory VMM memory area. There is one of these
  30  * per VM-area/task.  A VM area is any part of the process virtual memory
  31  * space that has a special rule for the page-fault handlers (ie a shared
  32  * library, the executable area etc).
  33  */
  34 struct vm_area_struct {
  35         struct mm_struct * vm_mm;       /* VM area parameters */
  36         unsigned long vm_start;
  37         unsigned long vm_end;
  38         pgprot_t vm_page_prot;
  39         unsigned short vm_flags;
  40 /* AVL tree of VM areas per task, sorted by address */
  41         short vm_avl_height;
  42         struct vm_area_struct * vm_avl_left;
  43         struct vm_area_struct * vm_avl_right;
  44 /* linked list of VM areas per task, sorted by address */
  45         struct vm_area_struct * vm_next;
  46 /* for areas with inode, the circular list inode->i_mmap */
  47 /* for shm areas, the circular list of attaches */
  48 /* otherwise unused */
  49         struct vm_area_struct * vm_next_share;
  50         struct vm_area_struct * vm_prev_share;
  51 /* more */
  52         struct vm_operations_struct * vm_ops;
  53         unsigned long vm_offset;
  54         struct inode * vm_inode;
  55         unsigned long vm_pte;                   /* shared mem */
  56 };
  57
  58 /*
  59  * vm_flags..
  60  */
  61 #define VM_READ         0x0001  /* currently active flags */
  62 #define VM_WRITE        0x0002
  63 #define VM_EXEC         0x0004
  64 #define VM_SHARED       0x0008
  65
  66 #define VM_MAYREAD      0x0010  /* limits for mprotect() etc */
  67 #define VM_MAYWRITE     0x0020
  68 #define VM_MAYEXEC      0x0040
  69 #define VM_MAYSHARE     0x0080
  70
  71 #define VM_GROWSDOWN    0x0100  /* general info on the segment */
  72 #define VM_GROWSUP      0x0200
  73 #define VM_SHM          0x0400  /* shared memory area, don't swap out */
  74 #define VM_DENYWRITE    0x0800  /* ETXTBSY on write attempts.. */
  75
  76 #define VM_EXECUTABLE   0x1000
  77 #define VM_LOCKED       0x2000
  78 #define VM_IO           0x4000  /* Memory mapped I/O or similar */
  79
  80 #define VM_STACK_FLAGS  0x0177
  81
  82 /*
  83  * mapping from the currently active vm_flags protection bits (the
  84  * low four bits) to a page protection mask..
  85  */
  86 extern pgprot_t protection_map[16];
  87
  88
  89 /*
  90  * These are the virtual MM functions - opening of an area, closing and
  91  * unmapping it (needed to keep files on disk up-to-date etc), pointer
  92  * to the functions called when a no-page or a wp-page exception occurs.
  93  */
  94 struct vm_operations_struct {
  95         void (*open)(struct vm_area_struct * area);
  96         void (*close)(struct vm_area_struct * area);
  97         void (*unmap)(struct vm_area_struct *area, unsigned long, size_t);
  98         void (*protect)(struct vm_area_struct *area, unsigned long, size_t, unsigned int newprot);
  99         int (*sync)(struct vm_area_struct *area, unsigned long, size_t, unsigned int flags);
 100         void (*advise)(struct vm_area_struct *area, unsigned long, size_t, unsigned int advise);
 101         unsigned long (*nopage)(struct vm_area_struct * area, unsigned long address, int write_access);
 102         unsigned long (*wppage)(struct vm_area_struct * area, unsigned long address,
 103                 unsigned long page);
 104         int (*swapout)(struct vm_area_struct *,  unsigned long, pte_t *);
 105         pte_t (*swapin)(struct vm_area_struct *, unsigned long, unsigned long);
 106 };
 107
 108 /*
 109  * Try to keep the most commonly accessed fields in single cache lines
 110  * here (16 bytes or greater).  This ordering should be particularly
 111  * beneficial on 32-bit processors.
 112  *
 113  * The first line is data used in page cache lookup, the second line
 114  * is used for linear searches (eg. clock algorithm scans).
 115  */
 116 typedef struct page {
 117         /* these must be first (free area handling) */
 118         struct page *next;
 119         struct page *prev;
 120         struct inode *inode;
 121         unsigned long offset;
 122         struct page *next_hash;
 123         atomic_t count;
 124         unsigned flags; /* atomic flags, some possibly updated asynchronously */
 125         unsigned dirty:16,
 126                  age:8;
 127         struct wait_queue *wait;
 128         struct page **pprev_hash;
 129         struct buffer_head * buffers;
 130         unsigned long swap_unlock_entry;
 131         unsigned long map_nr;   /* page->map_nr == page - mem_map */
 132 } mem_map_t;
 133
 134 /* Page flag bit values */
 135 #define PG_locked                0
 136 #define PG_error                 1
 137 #define PG_referenced            2
 138 #define PG_uptodate              3
 139 #define PG_free_after            4
 140 #define PG_decr_after            5
 141 #define PG_swap_unlock_after     6
 142 #define PG_DMA                   7
 143 #define PG_reserved             31
 144
 145 /* Make it prettier to test the above... */
 146 #define PageLocked(page)        (test_bit(PG_locked, &(page)->flags))
 147 #define PageError(page)         (test_bit(PG_error, &(page)->flags))
 148 #define PageReferenced(page)    (test_bit(PG_referenced, &(page)->flags))
 149 #define PageDirty(page)         (test_bit(PG_dirty, &(page)->flags))
 150 #define PageUptodate(page)      (test_bit(PG_uptodate, &(page)->flags))
 151 #define PageFreeAfter(page)     (test_bit(PG_free_after, &(page)->flags))
 152 #define PageDecrAfter(page)     (test_bit(PG_decr_after, &(page)->flags))
 153 #define PageSwapUnlockAfter(page) (test_bit(PG_swap_unlock_after, &(page)->flags))
 154 #define PageDMA(page)           (test_bit(PG_DMA, &(page)->flags))
 155 #define PageReserved(page)      (test_bit(PG_reserved, &(page)->flags))
 156
 157 /*
 158  * page->reserved denotes a page which must never be accessed (which
 159  * may not even be present).
 160  *
 161  * page->dma is set for those pages which lie in the range of
 162  * physical addresses capable of carrying DMA transfers.
 163  *
 164  * Multiple processes may "see" the same page. E.g. for untouched
 165  * mappings of /dev/null, all processes see the same page full of
 166  * zeroes, and text pages of executables and shared libraries have
 167  * only one copy in memory, at most, normally.
 168  *
 169  * For the non-reserved pages, page->count denotes a reference count.
 170  *   page->count == 0 means the page is free.
 171  *   page->count == 1 means the page is used for exactly one purpose
 172  *   (e.g. a private data page of one process).
 173  *
 174  * A page may be used for kmalloc() or anyone else who does a
 175  * get_free_page(). In this case the page->count is at least 1, and
 176  * all other fields are unused but should be 0 or NULL. The
 177  * management of this page is the responsibility of the one who uses
 178  * it.
 179  *
 180  * The other pages (we may call them "process pages") are completely
 181  * managed by the Linux memory manager: I/O, buffers, swapping etc.
 182  * The following discussion applies only to them.
 183  *
 184  * A page may belong to an inode's memory mapping. In this case,
 185  * page->inode is the inode, and page->offset is the file offset
 186  * of the page (not necessarily a multiple of PAGE_SIZE).
 187  *
 188  * A page may have buffers allocated to it. In this case,
 189  * page->buffers is a circular list of these buffer heads. Else,
 190  * page->buffers == NULL.
 191  *
 192  * For pages belonging to inodes, the page->count is the number of
 193  * attaches, plus 1 if buffers are allocated to the page.
 194  *
 195  * All pages belonging to an inode make up a doubly linked list
 196  * inode->i_pages, using the fields page->next and page->prev. (These
 197  * fields are also used for freelist management when page->count==0.)
 198  * There is also a hash table mapping (inode,offset) to the page
 199  * in memory if present. The lists for this hash table use the fields
 200  * page->next_hash and page->prev_hash.
 201  *
 202  * All process pages can do I/O:
 203  * - inode pages may need to be read from disk,
 204  * - inode pages which have been modified and are MAP_SHARED may need
 205  *   to be written to disk,
 206  * - private pages which have been modified may need to be swapped out
 207  *   to swap space and (later) to be read back into memory.
 208  * During disk I/O, page->locked is true. This bit is set before I/O
 209  * and reset when I/O completes. page->wait is a wait queue of all
 210  * tasks waiting for the I/O on this page to complete.
 211  * page->uptodate tells whether the page's contents is valid.
 212  * When a read completes, the page becomes uptodate, unless a disk I/O
 213  * error happened.
 214  * When a write completes, and page->free_after is true, the page is
 215  * freed without any further delay.
 216  *
 217  * For choosing which pages to swap out, inode pages carry a
 218  * page->referenced bit, which is set any time the system accesses
 219  * that page through the (inode,offset) hash table.
 220  * There is also the page->age counter, which implements a linear
 221  * decay (why not an exponential decay?), see swapctl.h.
 222  */
 223
 224 extern mem_map_t * mem_map;
 225
 226 /*
 227  * This is timing-critical - most of the time in getting a new page
 228  * goes to clearing the page. If you want a page without the clearing
 229  * overhead, just use __get_free_page() directly..
 230  */
 231 #define __get_free_page(priority) __get_free_pages((priority),0,0)
 232 #define __get_dma_pages(priority, order) __get_free_pages((priority),(order),1)
 233 extern unsigned long __get_free_pages(int priority, unsigned long gfporder, int dma);
 234
 235 extern inline unsigned long get_free_page(int priority)
 236 {
 237         unsigned long page;
 238
 239         page = __get_free_page(priority);
 240         if (page)
 241                 clear_page(page);
 242         return page;
 243 }
 244
 245 /* memory.c & swap.c*/
 246
 247 #define free_page(addr) free_pages((addr),0)
 248 extern void free_pages(unsigned long addr, unsigned long order);
 249 extern void __free_page(struct page *);
 250
 251 extern void show_free_areas(void);
 252 extern unsigned long put_dirty_page(struct task_struct * tsk,unsigned long page,
 253         unsigned long address);
 254
 255 extern void free_page_tables(struct mm_struct * mm);
 256 extern void clear_page_tables(struct task_struct * tsk);
 257 extern int new_page_tables(struct task_struct * tsk);
 258 extern int copy_page_tables(struct task_struct * to);
 259
 260 extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size);
 261 extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma);
 262 extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
 263 extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
 264
 265 extern void vmtruncate(struct inode * inode, unsigned long offset);
 266 extern void handle_mm_fault(struct vm_area_struct *vma, unsigned long address, int write_access);
 267 extern void do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long address, int write_access);
 268 extern void do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long address, int write_access);
 269
 270 extern unsigned long paging_init(unsigned long start_mem, unsigned long end_mem);
 271 extern void mem_init(unsigned long start_mem, unsigned long end_mem);
 272 extern void show_mem(void);
 273 extern void oom(struct task_struct * tsk);
 274 extern void si_meminfo(struct sysinfo * val);
 275
 276 /* mmap.c */
 277 extern void vma_init(void);
 278 extern unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
 279         unsigned long prot, unsigned long flags, unsigned long off);
 280 extern void merge_segments(struct mm_struct *, unsigned long, unsigned long);
 281 extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
 282 extern void remove_shared_vm_struct(struct vm_area_struct *);
 283 extern void build_mmap_avl(struct mm_struct *);
 284 extern void exit_mmap(struct mm_struct *);
 285 extern int do_munmap(unsigned long, size_t);
 286 extern unsigned long get_unmapped_area(unsigned long, unsigned long);
 287
 288 /* filemap.c */
 289 extern unsigned long page_unuse(unsigned long);
 290 extern int shrink_mmap(int, int);
 291 extern void truncate_inode_pages(struct inode *, unsigned long);
 292
 293 #define GFP_BUFFER      0x00
 294 #define GFP_ATOMIC      0x01
 295 #define GFP_USER        0x02
 296 #define GFP_KERNEL      0x03
 297 #define GFP_NOBUFFER    0x04
 298 #define GFP_NFS         0x05
 299
 300 /* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
 301    platforms, used as appropriate on others */
 302
 303 #define GFP_DMA         0x80
 304
 305 #define GFP_LEVEL_MASK 0xf
 306
 307 /* vma is the first one with  address < vma->vm_end,
 308  * and even  address < vma->vm_start. Have to extend vma. */
 309 static inline int expand_stack(struct vm_area_struct * vma, unsigned long address)
 310 {
 311         unsigned long grow;
 312
 313         address &= PAGE_MASK;
 314         grow = vma->vm_start - address;
 315         if (vma->vm_end - address
 316             > (unsigned long) current->rlim[RLIMIT_STACK].rlim_cur ||
 317             (vma->vm_mm->total_vm << PAGE_SHIFT) + grow
 318             > (unsigned long) current->rlim[RLIMIT_AS].rlim_cur)
 319                 return -ENOMEM;
 320         vma->vm_start = address;
 321         vma->vm_offset -= grow;
 322         vma->vm_mm->total_vm += grow >> PAGE_SHIFT;
 323         if (vma->vm_flags & VM_LOCKED)
 324                 vma->vm_mm->locked_vm += grow >> PAGE_SHIFT;
 325         return 0;
 326 }
 327
 328 #define avl_empty       (struct vm_area_struct *) NULL
 329
 330 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 331 static inline struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
 332 {
 333         struct vm_area_struct * result = NULL;
 334
 335         if (mm) {
 336                 struct vm_area_struct ** next = &mm->mmap_avl;
 337                 for (;;) {
 338                         struct vm_area_struct *tree = *next;
 339                         if (tree == avl_empty)
 340                                 break;
 341                         next = &tree->vm_avl_right;
 342                         if (tree->vm_end <= addr)
 343                                 continue;
 344                         next = &tree->vm_avl_left;
 345                         result = tree;
 346                         if (tree->vm_start <= addr)
 347                                 break;
 348                 }
 349         }
 350         return result;
 351 }
 352
 353 /* Look up the first VMA which intersects the interval start_addr..end_addr-1,
 354    NULL if none.  Assume start_addr < end_addr. */
 355 static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
 356 {
 357         struct vm_area_struct * vma;
 358
 359         vma = find_vma(mm,start_addr);
 360         if (vma && end_addr <= vma->vm_start)
 361                 vma = NULL;
 362         return vma;
 363 }
 364
 365 #endif /* __KERNEL__ */
 366
 367 #endif