include/linux/mm.h

   1 #ifndef _LINUX_MM_H
   2 #define _LINUX_MM_H
   3
   4 #include <linux/sched.h>
   5 #include <linux/errno.h>
   6
   7 #ifdef __KERNEL__
   8
   9 #include <linux/string.h>
  10
  11 extern unsigned long max_mapnr;
  12 extern unsigned long num_physpages;
  13 extern void * high_memory;
  14 extern int page_cluster;
  15
  16 #include <asm/page.h>
  17 #include <asm/atomic.h>
  18
  19 /*
  20  * Linux kernel virtual memory manager primitives.
  21  * The idea being to have a "virtual" mm in the same way
  22  * we have a virtual fs - giving a cleaner interface to the
  23  * mm details, and allowing different kinds of memory mappings
  24  * (from shared memory to executable loading to arbitrary
  25  * mmap() functions).
  26  */
  27
  28 /*
  29  * This struct defines a memory VMM memory area. There is one of these
  30  * per VM-area/task.  A VM area is any part of the process virtual memory
  31  * space that has a special rule for the page-fault handlers (ie a shared
  32  * library, the executable area etc).
  33  */
  34 struct vm_area_struct {
  35         struct mm_struct * vm_mm;       /* VM area parameters */
  36         unsigned long vm_start;
  37         unsigned long vm_end;
  38
  39         /* linked list of VM areas per task, sorted by address */
  40         struct vm_area_struct *vm_next;
  41
  42         pgprot_t vm_page_prot;
  43         unsigned short vm_flags;
  44
  45         /* AVL tree of VM areas per task, sorted by address */
  46         short vm_avl_height;
  47         struct vm_area_struct * vm_avl_left;
  48         struct vm_area_struct * vm_avl_right;
  49
  50         /* For areas with inode, the list inode->i_mmap, for shm areas,
  51          * the list of attaches, otherwise unused.
  52          */
  53         struct vm_area_struct *vm_next_share;
  54         struct vm_area_struct **vm_pprev_share;
  55
  56         struct vm_operations_struct * vm_ops;
  57         unsigned long vm_offset;
  58         struct file * vm_file;
  59         unsigned long vm_pte;                   /* shared mem */
  60 };
  61
  62 /*
  63  * vm_flags..
  64  */
  65 #define VM_READ         0x0001  /* currently active flags */
  66 #define VM_WRITE        0x0002
  67 #define VM_EXEC         0x0004
  68 #define VM_SHARED       0x0008
  69
  70 #define VM_MAYREAD      0x0010  /* limits for mprotect() etc */
  71 #define VM_MAYWRITE     0x0020
  72 #define VM_MAYEXEC      0x0040
  73 #define VM_MAYSHARE     0x0080
  74
  75 #define VM_GROWSDOWN    0x0100  /* general info on the segment */
  76 #define VM_GROWSUP      0x0200
  77 #define VM_SHM          0x0400  /* shared memory area, don't swap out */
  78 #define VM_DENYWRITE    0x0800  /* ETXTBSY on write attempts.. */
  79
  80 #define VM_EXECUTABLE   0x1000
  81 #define VM_LOCKED       0x2000
  82 #define VM_IO           0x4000  /* Memory mapped I/O or similar */
  83
  84 #define VM_STACK_FLAGS  0x0177
  85
  86 /*
  87  * mapping from the currently active vm_flags protection bits (the
  88  * low four bits) to a page protection mask..
  89  */
  90 extern pgprot_t protection_map[16];
  91
  92
  93 /*
  94  * These are the virtual MM functions - opening of an area, closing and
  95  * unmapping it (needed to keep files on disk up-to-date etc), pointer
  96  * to the functions called when a no-page or a wp-page exception occurs.
  97  */
  98 struct vm_operations_struct {
  99         void (*open)(struct vm_area_struct * area);
 100         void (*close)(struct vm_area_struct * area);
 101         void (*unmap)(struct vm_area_struct *area, unsigned long, size_t);
 102         void (*protect)(struct vm_area_struct *area, unsigned long, size_t, unsigned int newprot);
 103         int (*sync)(struct vm_area_struct *area, unsigned long, size_t, unsigned int flags);
 104         void (*advise)(struct vm_area_struct *area, unsigned long, size_t, unsigned int advise);
 105         unsigned long (*nopage)(struct vm_area_struct * area, unsigned long address, int write_access);
 106         unsigned long (*wppage)(struct vm_area_struct * area, unsigned long address,
 107                 unsigned long page);
 108         int (*swapout)(struct vm_area_struct *, struct page *);
 109 };
 110
 111 /*
 112  * Try to keep the most commonly accessed fields in single cache lines
 113  * here (16 bytes or greater).  This ordering should be particularly
 114  * beneficial on 32-bit processors.
 115  *
 116  * The first line is data used in page cache lookup, the second line
 117  * is used for linear searches (eg. clock algorithm scans).
 118  */
 119 typedef struct page {
 120         /* these must be first (free area handling) */
 121         struct page *next;
 122         struct page *prev;
 123         struct inode *inode;
 124         unsigned long offset;
 125         struct page *next_hash;
 126         atomic_t count;
 127         unsigned long flags;    /* atomic flags, some possibly updated asynchronously */
 128         wait_queue_head_t wait;
 129         struct page **pprev_hash;
 130         struct buffer_head * buffers;
 131         void *owner; /* temporary debugging check */
 132 } mem_map_t;
 133
 134 #define get_page(p) do { atomic_inc(&(p)->count); \
 135                                                 } while (0)
 136 #define put_page(p) __free_page(p)
 137 #define put_page_testzero(p) ({ int __ret = atomic_dec_and_test(&(p)->count);\
 138                                 __ret; })
 139 #define page_count(p) atomic_read(&(p)->count)
 140 #define set_page_count(p,v) do { atomic_set(&(p)->count, v); \
 141                                 } while (0)
 142
 143 /* Page flag bit values */
 144 #define PG_locked                0
 145 #define PG_error                 1
 146 #define PG_referenced            2
 147 #define PG_uptodate              3
 148 #define PG_free_after            4
 149 #define PG_decr_after            5
 150 #define PG_free_swap_after       6
 151 #define PG_DMA                   7
 152 #define PG_Slab                  8
 153 #define PG_swap_cache            9
 154 #define PG_skip                 10
 155                                 /* bits 21-30 unused */
 156 #define PG_reserved             31
 157
 158
 159 /* Make it prettier to test the above... */
 160 #define Page_Uptodate(page)     (test_bit(PG_uptodate, &(page)->flags))
 161 #define SetPageUptodate(page)   do { set_bit(PG_uptodate, &(page)->flags); \
 162                                         } while (0)
 163 #define ClearPageUptodate(page) do { clear_bit(PG_uptodate, &(page)->flags); \
 164                                         } while (0)
 165 #define PageLocked(page)        (test_bit(PG_locked, &(page)->flags))
 166 #define LockPage(page)          \
 167         do { int _ret = test_and_set_bit(PG_locked, &(page)->flags); \
 168         if (_ret) PAGE_BUG(page); \
 169         if (page->owner) PAGE_BUG(page); \
 170         page->owner = current; } while (0)
 171 #define TryLockPage(page)       ({ int _ret = test_and_set_bit(PG_locked, &(page)->flags); \
 172                                 if (!_ret) page->owner = current; _ret; })
 173 #define UnlockPage(page)        do { \
 174                                         if (page->owner != current) { \
 175 BUG(); } page->owner = 0; \
 176 if (!test_and_clear_bit(PG_locked, &(page)->flags)) { \
 177                         PAGE_BUG(page); } wake_up(&page->wait); } while (0)
 178 #define PageError(page)         (test_bit(PG_error, &(page)->flags))
 179 #define SetPageError(page)      ({ int _ret = test_and_set_bit(PG_error, &(page)->flags); _ret; })
 180 #define ClearPageError(page)    do { if (!test_and_clear_bit(PG_error, &(page)->flags)) BUG(); } while (0)
 181 #define PageReferenced(page)    (test_bit(PG_referenced, &(page)->flags))
 182 #define PageFreeAfter(page)     (test_bit(PG_free_after, &(page)->flags))
 183 #define PageDecrAfter(page)     (test_bit(PG_decr_after, &(page)->flags))
 184 #define PageSwapUnlockAfter(page) (test_bit(PG_free_swap_after, &(page)->flags))
 185 #define PageDMA(page)           (test_bit(PG_DMA, &(page)->flags))
 186 #define PageSlab(page)          (test_bit(PG_Slab, &(page)->flags))
 187 #define PageSwapCache(page)     (test_bit(PG_swap_cache, &(page)->flags))
 188 #define PageReserved(page)      (test_bit(PG_reserved, &(page)->flags))
 189
 190 #define PageSetSlab(page)       (set_bit(PG_Slab, &(page)->flags))
 191 #define PageSetSwapCache(page)  (set_bit(PG_swap_cache, &(page)->flags))
 192
 193 #define PageTestandSetSwapCache(page)   \
 194                         (test_and_set_bit(PG_swap_cache, &(page)->flags))
 195
 196 #define PageClearSlab(page)     (clear_bit(PG_Slab, &(page)->flags))
 197 #define PageClearSwapCache(page)(clear_bit(PG_swap_cache, &(page)->flags))
 198
 199 #define PageTestandClearSwapCache(page) \
 200                         (test_and_clear_bit(PG_swap_cache, &(page)->flags))
 201
 202 /*
 203  * Various page->flags bits:
 204  *
 205  * PG_reserved is set for a page which must never be accessed (which
 206  * may not even be present).
 207  *
 208  * PG_DMA is set for those pages which lie in the range of
 209  * physical addresses capable of carrying DMA transfers.
 210  *
 211  * Multiple processes may "see" the same page. E.g. for untouched
 212  * mappings of /dev/null, all processes see the same page full of
 213  * zeroes, and text pages of executables and shared libraries have
 214  * only one copy in memory, at most, normally.
 215  *
 216  * For the non-reserved pages, page->count denotes a reference count.
 217  *   page->count == 0 means the page is free.
 218  *   page->count == 1 means the page is used for exactly one purpose
 219  *   (e.g. a private data page of one process).
 220  *
 221  * A page may be used for kmalloc() or anyone else who does a
 222  * get_free_page(). In this case the page->count is at least 1, and
 223  * all other fields are unused but should be 0 or NULL. The
 224  * management of this page is the responsibility of the one who uses
 225  * it.
 226  *
 227  * The other pages (we may call them "process pages") are completely
 228  * managed by the Linux memory manager: I/O, buffers, swapping etc.
 229  * The following discussion applies only to them.
 230  *
 231  * A page may belong to an inode's memory mapping. In this case,
 232  * page->inode is the pointer to the inode, and page->offset is the
 233  * file offset of the page (not necessarily a multiple of PAGE_SIZE).
 234  *
 235  * A page may have buffers allocated to it. In this case,
 236  * page->buffers is a circular list of these buffer heads. Else,
 237  * page->buffers == NULL.
 238  *
 239  * For pages belonging to inodes, the page->count is the number of
 240  * attaches, plus 1 if buffers are allocated to the page.
 241  *
 242  * All pages belonging to an inode make up a doubly linked list
 243  * inode->i_pages, using the fields page->next and page->prev. (These
 244  * fields are also used for freelist management when page->count==0.)
 245  * There is also a hash table mapping (inode,offset) to the page
 246  * in memory if present. The lists for this hash table use the fields
 247  * page->next_hash and page->pprev_hash.
 248  *
 249  * All process pages can do I/O:
 250  * - inode pages may need to be read from disk,
 251  * - inode pages which have been modified and are MAP_SHARED may need
 252  *   to be written to disk,
 253  * - private pages which have been modified may need to be swapped out
 254  *   to swap space and (later) to be read back into memory.
 255  * During disk I/O, PG_locked is used. This bit is set before I/O
 256  * and reset when I/O completes. page->wait is a wait queue of all
 257  * tasks waiting for the I/O on this page to complete.
 258  * PG_uptodate tells whether the page's contents is valid.
 259  * When a read completes, the page becomes uptodate, unless a disk I/O
 260  * error happened.
 261  * When a write completes, and PG_free_after is set, the page is
 262  * freed without any further delay.
 263  *
 264  * For choosing which pages to swap out, inode pages carry a
 265  * PG_referenced bit, which is set any time the system accesses
 266  * that page through the (inode,offset) hash table.
 267  *
 268  * PG_skip is used on sparc/sparc64 architectures to "skip" certain
 269  * parts of the address space.
 270  *
 271  * PG_error is set to indicate that an I/O error occurred on this page.
 272  */
 273
 274 extern mem_map_t * mem_map;
 275
 276 /*
 277  * This is timing-critical - most of the time in getting a new page
 278  * goes to clearing the page. If you want a page without the clearing
 279  * overhead, just use __get_free_page() directly..
 280  */
 281 #define __get_free_page(gfp_mask) __get_free_pages((gfp_mask),0)
 282 #define __get_dma_pages(gfp_mask, order) __get_free_pages((gfp_mask) | GFP_DMA,(order))
 283 extern unsigned long FASTCALL(__get_free_pages(int gfp_mask, unsigned long gfp_order));
 284
 285 extern inline unsigned long get_free_page(int gfp_mask)
 286 {
 287         unsigned long page;
 288
 289         page = __get_free_page(gfp_mask);
 290         if (page)
 291                 clear_page(page);
 292         return page;
 293 }
 294
 295 extern int low_on_memory;
 296
 297 /* memory.c & swap.c*/
 298
 299 #define free_page(addr) free_pages((addr),0)
 300 extern int FASTCALL(free_pages(unsigned long addr, unsigned long order));
 301 extern int FASTCALL(__free_page(struct page *));
 302
 303 extern void show_free_areas(void);
 304 extern unsigned long put_dirty_page(struct task_struct * tsk,unsigned long page,
 305         unsigned long address);
 306
 307 extern void free_page_tables(struct mm_struct * mm);
 308 extern void clear_page_tables(struct mm_struct *, unsigned long, int);
 309 extern int new_page_tables(struct task_struct * tsk);
 310
 311 extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size);
 312 extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma);
 313 extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
 314 extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
 315
 316 extern void vmtruncate(struct inode * inode, unsigned long offset);
 317 extern int handle_mm_fault(struct task_struct *tsk,struct vm_area_struct *vma, unsigned long address, int write_access);
 318 extern int make_pages_present(unsigned long addr, unsigned long end);
 319 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
 320 extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len);
 321 extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len);
 322
 323 extern int pgt_cache_water[2];
 324 extern int check_pgt_cache(void);
 325
 326 extern unsigned long paging_init(unsigned long start_mem, unsigned long end_mem);
 327 extern void mem_init(unsigned long start_mem, unsigned long end_mem);
 328 extern void show_mem(void);
 329 extern void oom(struct task_struct * tsk);
 330 extern void si_meminfo(struct sysinfo * val);
 331 extern void swapin_readahead(unsigned long);
 332
 333 /* mmap.c */
 334 extern void vma_init(void);
 335 extern void merge_segments(struct mm_struct *, unsigned long, unsigned long);
 336 extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
 337 extern void build_mmap_avl(struct mm_struct *);
 338 extern void exit_mmap(struct mm_struct *);
 339 extern unsigned long get_unmapped_area(unsigned long, unsigned long);
 340
 341 extern unsigned long do_mmap(struct file *, unsigned long, unsigned long,
 342         unsigned long, unsigned long, unsigned long);
 343 extern int do_munmap(unsigned long, size_t);
 344 extern unsigned long do_brk(unsigned long, unsigned long);
 345
 346 /* filemap.c */
 347 extern void remove_inode_page(struct page *);
 348 extern unsigned long page_unuse(struct page *);
 349 extern int shrink_mmap(int, int);
 350 extern void truncate_inode_pages(struct inode *, unsigned long);
 351 extern unsigned long get_cached_page(struct inode *, unsigned long, int);
 352 extern void put_cached_page(unsigned long);
 353
 354 /*
 355  * GFP bitmasks..
 356  */
 357 #define __GFP_WAIT      0x01
 358 #define __GFP_LOW       0x02
 359 #define __GFP_MED       0x04
 360 #define __GFP_HIGH      0x08
 361 #define __GFP_IO        0x10
 362 #define __GFP_SWAP      0x20
 363
 364 #define __GFP_DMA       0x80
 365
 366 #define GFP_BUFFER      (__GFP_LOW | __GFP_WAIT)
 367 #define GFP_ATOMIC      (__GFP_HIGH)
 368 #define GFP_USER        (__GFP_LOW | __GFP_WAIT | __GFP_IO)
 369 #define GFP_KERNEL      (__GFP_MED | __GFP_WAIT | __GFP_IO)
 370 #define GFP_NFS         (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
 371 #define GFP_KSWAPD      (__GFP_IO | __GFP_SWAP)
 372
 373 /* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
 374    platforms, used as appropriate on others */
 375
 376 #define GFP_DMA         __GFP_DMA
 377
 378 /* vma is the first one with  address < vma->vm_end,
 379  * and even  address < vma->vm_start. Have to extend vma. */
 380 static inline int expand_stack(struct vm_area_struct * vma, unsigned long address)
 381 {
 382         unsigned long grow;
 383
 384         address &= PAGE_MASK;
 385         grow = vma->vm_start - address;
 386         if (vma->vm_end - address
 387             > (unsigned long) current->rlim[RLIMIT_STACK].rlim_cur ||
 388             (vma->vm_mm->total_vm << PAGE_SHIFT) + grow
 389             > (unsigned long) current->rlim[RLIMIT_AS].rlim_cur)
 390                 return -ENOMEM;
 391         vma->vm_start = address;
 392         vma->vm_offset -= grow;
 393         vma->vm_mm->total_vm += grow >> PAGE_SHIFT;
 394         if (vma->vm_flags & VM_LOCKED)
 395                 vma->vm_mm->locked_vm += grow >> PAGE_SHIFT;
 396         return 0;
 397 }
 398
 399 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 400 extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
 401
 402 /* Look up the first VMA which intersects the interval start_addr..end_addr-1,
 403    NULL if none.  Assume start_addr < end_addr. */
 404 static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
 405 {
 406         struct vm_area_struct * vma = find_vma(mm,start_addr);
 407
 408         if (vma && end_addr <= vma->vm_start)
 409                 vma = NULL;
 410         return vma;
 411 }
 412
 413 extern struct vm_area_struct *find_extend_vma(struct task_struct *tsk, unsigned long addr);
 414
 415 #define buffer_under_min()      ((atomic_read(&buffermem) >> PAGE_SHIFT) * 100 < \
 416                                 buffer_mem.min_percent * num_physpages)
 417 #define pgcache_under_min()     (atomic_read(&page_cache_size) * 100 < \
 418                                 page_cache.min_percent * num_physpages)
 419
 420 #endif /* __KERNEL__ */
 421
 422 #endif