- pre4:
[davej-history.git] / ipc / shm.c
blob87f37c3ed9df445e8bd043ba6735f3566cf80bff
1 /*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * avoid vmalloc and make shmmax, shmall, shmmni sysctl'able,
13 * Christoph Rohland <hans-christoph.rohland@sap.com>
14 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
15 * make it a file system, Christoph Rohland <hans-christoph.rohland@sap.com>
17 * The filesystem has the following restrictions/bugs:
18 * 1) It only can handle one directory.
19 * 2) Private writeable mappings are not supported
20 * 3) Read and write are not implemented (should they?)
21 * 4) No special nodes are supported
23 * There are the following mount options:
24 * - nr_blocks (^= shmall) is the number of blocks of size PAGE_SIZE
25 * we are allowed to allocate
26 * - nr_inodes (^= shmmni) is the number of files we are allowed to
27 * allocate
28 * - mode is the mode for the root directory (default S_IRWXUGO | S_ISVTX)
31 #include <linux/config.h>
32 #include <linux/module.h>
33 #include <linux/malloc.h>
34 #include <linux/shm.h>
35 #include <linux/swap.h>
36 #include <linux/smp_lock.h>
37 #include <linux/init.h>
38 #include <linux/locks.h>
39 #include <linux/file.h>
40 #include <linux/mman.h>
41 #include <linux/vmalloc.h>
42 #include <linux/pagemap.h>
43 #include <linux/proc_fs.h>
44 #include <linux/highmem.h>
46 #include <asm/uaccess.h>
47 #include <asm/pgtable.h>
49 #include "util.h"
51 static struct super_block *shm_read_super(struct super_block *,void *, int);
52 static void shm_put_super (struct super_block *);
53 static int shm_remount_fs (struct super_block *, int *, char *);
54 static void shm_read_inode (struct inode *);
55 static int shm_statfs (struct super_block *, struct statfs *);
56 static int shm_create (struct inode *,struct dentry *,int);
57 static struct dentry *shm_lookup (struct inode *,struct dentry *);
58 static int shm_unlink (struct inode *,struct dentry *);
59 static int shm_setattr (struct dentry *dent, struct iattr *attr);
60 static void shm_delete (struct inode *);
61 static int shm_mmap (struct file *, struct vm_area_struct *);
62 static int shm_readdir (struct file *, void *, filldir_t);
64 #define SHM_NAME_LEN NAME_MAX
65 #define SHM_FMT ".IPC_%08x"
66 #define SHM_FMT_LEN 13
68 /* shm_mode upper byte flags */
69 /* SHM_DEST and SHM_LOCKED are used in ipcs(8) */
70 #define PRV_DEST 0010000 /* segment will be destroyed on last detach */
71 #define PRV_LOCKED 0020000 /* segment will not be swapped */
72 #define SHM_UNLK 0040000 /* filename is unlinked */
73 #define SHM_SYSV 0100000 /* It is a SYSV shm segment */
75 struct shmid_kernel /* private to the kernel */
77 struct kern_ipc_perm shm_perm;
78 size_t shm_segsz;
79 unsigned long shm_nattch;
80 unsigned long shm_npages; /* size of segment (pages) */
81 pte_t **shm_dir; /* ptr to arr of ptrs to frames */
82 int id;
83 union permap {
84 struct shmem {
85 time_t atime;
86 time_t dtime;
87 time_t ctime;
88 pid_t cpid;
89 pid_t lpid;
90 int nlen;
91 char nm[0];
92 } shmem;
93 struct zero {
94 struct semaphore sema;
95 struct list_head list;
96 } zero;
97 } permap;
100 #define shm_atim permap.shmem.atime
101 #define shm_dtim permap.shmem.dtime
102 #define shm_ctim permap.shmem.ctime
103 #define shm_cprid permap.shmem.cpid
104 #define shm_lprid permap.shmem.lpid
105 #define shm_namelen permap.shmem.nlen
106 #define shm_name permap.shmem.nm
107 #define shm_flags shm_perm.mode
108 #define zsem permap.zero.sema
109 #define zero_list permap.zero.list
111 static struct ipc_ids shm_ids;
113 #define shm_lock(id) ((struct shmid_kernel*)ipc_lock(&shm_ids,id))
114 #define shm_unlock(id) ipc_unlock(&shm_ids,id)
115 #define shm_lockall() ipc_lockall(&shm_ids)
116 #define shm_unlockall() ipc_unlockall(&shm_ids)
117 #define shm_get(id) ((struct shmid_kernel*)ipc_get(&shm_ids,id))
118 #define shm_buildid(id, seq) \
119 ipc_buildid(&shm_ids, id, seq)
121 static int newseg (key_t key, const char *name, int namelen, int shmflg, size_t size);
122 static void seg_free(struct shmid_kernel *shp, int doacc);
123 static void shm_open (struct vm_area_struct *shmd);
124 static void shm_close (struct vm_area_struct *shmd);
125 static int shm_remove_name(int id);
126 static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int);
127 static int shm_swapout(struct page *, struct file *);
128 #ifdef CONFIG_PROC_FS
129 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
130 #endif
132 static void zshm_swap (int prio, int gfp_mask);
133 static void zmap_unuse(swp_entry_t entry, struct page *page);
134 static void shmzero_open(struct vm_area_struct *shmd);
135 static void shmzero_close(struct vm_area_struct *shmd);
136 static struct page *shmzero_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share);
137 static int zero_id;
138 static struct shmid_kernel zshmid_kernel;
139 static struct dentry *zdent;
141 #define SHM_FS_MAGIC 0x02011994
143 static struct super_block * shm_sb;
145 static DECLARE_FSTYPE(shm_fs_type, "shm", shm_read_super, FS_SINGLE);
147 static struct super_operations shm_sops = {
148 read_inode: shm_read_inode,
149 delete_inode: shm_delete,
150 put_super: shm_put_super,
151 statfs: shm_statfs,
152 remount_fs: shm_remount_fs,
155 static struct file_operations shm_root_operations = {
156 readdir: shm_readdir,
159 static struct inode_operations shm_root_inode_operations = {
160 create: shm_create,
161 lookup: shm_lookup,
162 unlink: shm_unlink,
165 static struct file_operations shm_file_operations = {
166 mmap: shm_mmap,
169 static struct inode_operations shm_inode_operations = {
170 setattr: shm_setattr,
173 static struct vm_operations_struct shm_vm_ops = {
174 open: shm_open, /* callback for a new vm-area open */
175 close: shm_close, /* callback for when the vm-area is released */
176 nopage: shm_nopage,
177 swapout:shm_swapout,
180 size_t shm_ctlmax = SHMMAX;
182 /* These parameters should be part of the superblock */
183 static int shm_ctlall;
184 static int shm_ctlmni;
185 static int shm_mode;
187 static int shm_tot; /* total number of shared memory pages */
188 static int shm_rss; /* number of shared memory pages that are in memory */
189 static int shm_swp; /* number of shared memory pages that are in swap */
191 /* locks order:
192 pagecache_lock
193 shm_lock()/shm_lockall()
194 kernel lock
195 inode->i_sem
196 sem_ids.sem
197 mmap_sem
199 SMP assumptions:
200 - swap_free() never sleeps
201 - add_to_swap_cache() never sleeps
202 - add_to_swap_cache() doesn't acquire the big kernel lock.
203 - shm_unuse() is called with the kernel lock acquired.
206 /* some statistics */
207 static ulong swap_attempts;
208 static ulong swap_successes;
209 static ulong used_segs;
211 void __init shm_init (void)
213 struct vfsmount *res;
214 ipc_init_ids(&shm_ids, 1);
216 register_filesystem (&shm_fs_type);
217 res = kern_mount(&shm_fs_type);
218 if (IS_ERR(res)) {
219 unregister_filesystem(&shm_fs_type);
220 return;
222 #ifdef CONFIG_PROC_FS
223 create_proc_read_entry("sysvipc/shm", 0, 0, sysvipc_shm_read_proc, NULL);
224 #endif
225 zero_id = ipc_addid(&shm_ids, &zshmid_kernel.shm_perm, 1);
226 shm_unlock(zero_id);
227 INIT_LIST_HEAD(&zshmid_kernel.zero_list);
228 zdent = d_alloc_root(get_empty_inode());
229 return;
232 static int shm_parse_options(char *options)
234 int blocks = shm_ctlall;
235 int inodes = shm_ctlmni;
236 umode_t mode = shm_mode;
237 char *this_char, *value;
239 this_char = NULL;
240 if ( options )
241 this_char = strtok(options,",");
242 for ( ; this_char; this_char = strtok(NULL,",")) {
243 if ((value = strchr(this_char,'=')) != NULL)
244 *value++ = 0;
245 if (!strcmp(this_char,"nr_blocks")) {
246 if (!value || !*value)
247 return 1;
248 blocks = simple_strtoul(value,&value,0);
249 if (*value)
250 return 1;
252 else if (!strcmp(this_char,"nr_inodes")) {
253 if (!value || !*value)
254 return 1;
255 inodes = simple_strtoul(value,&value,0);
256 if (*value)
257 return 1;
259 else if (!strcmp(this_char,"mode")) {
260 if (!value || !*value)
261 return 1;
262 mode = simple_strtoul(value,&value,8);
263 if (*value)
264 return 1;
266 else
267 return 1;
269 shm_ctlmni = inodes;
270 shm_ctlall = blocks;
271 shm_mode = mode;
273 return 0;
276 static struct super_block *shm_read_super(struct super_block *s,void *data,
277 int silent)
279 struct inode * root_inode;
281 shm_ctlall = SHMALL;
282 shm_ctlmni = SHMMNI;
283 shm_mode = S_IRWXUGO | S_ISVTX;
284 if (shm_parse_options (data)) {
285 printk(KERN_ERR "shm fs invalid option\n");
286 goto out_unlock;
289 s->s_blocksize = PAGE_SIZE;
290 s->s_blocksize_bits = PAGE_SHIFT;
291 s->s_magic = SHM_FS_MAGIC;
292 s->s_op = &shm_sops;
293 root_inode = iget (s, SEQ_MULTIPLIER);
294 if (!root_inode)
295 goto out_no_root;
296 root_inode->i_op = &shm_root_inode_operations;
297 root_inode->i_sb = s;
298 root_inode->i_nlink = 2;
299 root_inode->i_mode = S_IFDIR | shm_mode;
300 s->s_root = d_alloc_root(root_inode);
301 if (!s->s_root)
302 goto out_no_root;
303 shm_sb = s;
304 return s;
306 out_no_root:
307 printk(KERN_ERR "shm_read_super: get root inode failed\n");
308 iput(root_inode);
309 out_unlock:
310 return NULL;
313 static int shm_remount_fs (struct super_block *sb, int *flags, char *data)
315 if (shm_parse_options (data))
316 return -EINVAL;
317 return 0;
320 static inline int shm_checkid(struct shmid_kernel *s, int id)
322 if (!(s->shm_flags & SHM_SYSV))
323 return -EINVAL;
324 if (ipc_checkid(&shm_ids,&s->shm_perm,id))
325 return -EIDRM;
326 return 0;
329 static inline struct shmid_kernel *shm_rmid(int id)
331 return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
334 static inline int shm_addid(struct shmid_kernel *shp)
336 return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni+1);
339 static void shm_put_super(struct super_block *sb)
341 int i;
342 struct shmid_kernel *shp;
344 down(&shm_ids.sem);
345 for(i = 0; i <= shm_ids.max_id; i++) {
346 if (i == zero_id)
347 continue;
348 if (!(shp = shm_lock (i)))
349 continue;
350 if (shp->shm_nattch)
351 printk(KERN_DEBUG "shm_nattch = %ld\n", shp->shm_nattch);
352 shp = shm_rmid(i);
353 shm_unlock(i);
354 seg_free(shp, 1);
356 dput (sb->s_root);
357 up(&shm_ids.sem);
360 static int shm_statfs(struct super_block *sb, struct statfs *buf)
362 buf->f_type = SHM_FS_MAGIC;
363 buf->f_bsize = PAGE_SIZE;
364 buf->f_blocks = shm_ctlall;
365 buf->f_bavail = buf->f_bfree = shm_ctlall - shm_tot;
366 buf->f_files = shm_ctlmni;
367 buf->f_ffree = shm_ctlmni - used_segs;
368 buf->f_namelen = SHM_NAME_LEN;
369 return 0;
372 static void shm_read_inode(struct inode * inode)
374 int id;
375 struct shmid_kernel *shp;
377 id = inode->i_ino;
378 inode->i_op = NULL;
379 inode->i_mode = 0;
380 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
382 if (id < SEQ_MULTIPLIER) {
383 if (!(shp = shm_lock (id)))
384 return;
385 inode->i_mode = (shp->shm_flags & S_IALLUGO) | S_IFREG;
386 inode->i_uid = shp->shm_perm.uid;
387 inode->i_gid = shp->shm_perm.gid;
388 inode->i_size = shp->shm_segsz;
389 shm_unlock (id);
390 inode->i_op = &shm_inode_operations;
391 inode->i_fop = &shm_file_operations;
392 return;
394 inode->i_op = &shm_root_inode_operations;
395 inode->i_fop = &shm_root_operations;
396 inode->i_sb = shm_sb;
397 inode->i_nlink = 2;
398 inode->i_mode = S_IFDIR | shm_mode;
399 inode->i_uid = inode->i_gid = 0;
403 static int shm_create (struct inode *dir, struct dentry *dent, int mode)
405 int id, err;
406 struct inode * inode;
408 down(&shm_ids.sem);
409 err = id = newseg (IPC_PRIVATE, dent->d_name.name, dent->d_name.len, mode, 0);
410 if (err < 0)
411 goto out;
413 err = -ENOMEM;
414 inode = iget (shm_sb, id % SEQ_MULTIPLIER);
415 if (!inode)
416 goto out;
418 err = 0;
419 down (&inode->i_sem);
420 inode->i_mode = mode | S_IFREG;
421 inode->i_op = &shm_inode_operations;
422 d_instantiate(dent, inode);
423 up (&inode->i_sem);
425 out:
426 up(&shm_ids.sem);
427 return err;
430 static int shm_readdir (struct file *filp, void *dirent, filldir_t filldir)
432 struct inode * inode = filp->f_dentry->d_inode;
433 struct shmid_kernel *shp;
434 off_t nr;
436 nr = filp->f_pos;
438 switch(nr)
440 case 0:
441 if (filldir(dirent, ".", 1, nr, inode->i_ino, DT_DIR) < 0)
442 return 0;
443 filp->f_pos = ++nr;
444 /* fall through */
445 case 1:
446 if (filldir(dirent, "..", 2, nr, inode->i_ino, DT_DIR) < 0)
447 return 0;
448 filp->f_pos = ++nr;
449 /* fall through */
450 default:
451 down(&shm_ids.sem);
452 for (; nr-2 <= shm_ids.max_id; nr++ ) {
453 if (nr-2 == zero_id)
454 continue;
455 if (!(shp = shm_get (nr-2)))
456 continue;
457 if (shp->shm_flags & SHM_UNLK)
458 continue;
459 if (filldir(dirent, shp->shm_name, shp->shm_namelen, nr, nr, DT_REG) < 0 )
460 break;;
462 filp->f_pos = nr;
463 up(&shm_ids.sem);
464 break;
467 UPDATE_ATIME(inode);
468 return 0;
471 static struct dentry *shm_lookup (struct inode *dir, struct dentry *dent)
473 int i, err = 0;
474 struct shmid_kernel* shp;
475 struct inode *inode = NULL;
477 if (dent->d_name.len > SHM_NAME_LEN)
478 return ERR_PTR(-ENAMETOOLONG);
480 down(&shm_ids.sem);
481 for(i = 0; i <= shm_ids.max_id; i++) {
482 if (i == zero_id)
483 continue;
484 if (!(shp = shm_lock(i)))
485 continue;
486 if (!(shp->shm_flags & SHM_UNLK) &&
487 dent->d_name.len == shp->shm_namelen &&
488 strncmp(dent->d_name.name, shp->shm_name, shp->shm_namelen) == 0)
489 goto found;
490 shm_unlock(i);
494 * prevent the reserved names as negative dentries.
495 * This also prevents object creation through the filesystem
497 if (dent->d_name.len == SHM_FMT_LEN &&
498 memcmp (SHM_FMT, dent->d_name.name, SHM_FMT_LEN - 8) == 0)
499 err = -EINVAL; /* EINVAL to give IPC_RMID the right error */
501 goto out;
503 found:
504 shm_unlock(i);
505 inode = iget(dir->i_sb, i);
507 if (!inode)
508 err = -EACCES;
509 out:
510 if (err == 0)
511 d_add (dent, inode);
512 up (&shm_ids.sem);
513 return ERR_PTR(err);
516 static int shm_unlink (struct inode *dir, struct dentry *dent)
518 struct inode * inode = dent->d_inode;
519 struct shmid_kernel *shp;
521 down (&shm_ids.sem);
522 if (!(shp = shm_lock (inode->i_ino)))
523 BUG();
524 shp->shm_flags |= SHM_UNLK | PRV_DEST;
525 shp->shm_perm.key = IPC_PRIVATE; /* Do not find it any more */
526 shm_unlock (inode->i_ino);
527 up (&shm_ids.sem);
528 inode->i_nlink -= 1;
530 * If it's a reserved name we have to drop the dentry instead
531 * of creating a negative dentry
533 if (dent->d_name.len == SHM_FMT_LEN &&
534 memcmp (SHM_FMT, dent->d_name.name, SHM_FMT_LEN - 8) == 0)
535 d_drop (dent);
536 return 0;
540 * We cannot use kmalloc for shm_alloc since this restricts the
541 * maximum size of the segments.
543 * We also cannot use vmalloc, since this uses too much of the vmalloc
544 * space and we run out of this on highend machines.
546 * So we have to use this complicated indirect scheme to alloc the shm
547 * page tables.
551 #ifdef PTE_INIT
552 static inline void init_ptes (pte_t *pte, int number) {
553 while (number--)
554 PTE_INIT (pte++);
556 #else
557 static inline void init_ptes (pte_t *pte, int number) {
558 memset (pte, 0, number*sizeof(*pte));
560 #endif
562 #define PTES_PER_PAGE (PAGE_SIZE/sizeof(pte_t))
563 #define SHM_ENTRY(shp, index) (shp)->shm_dir[(index)/PTES_PER_PAGE][(index)%PTES_PER_PAGE]
565 static pte_t **shm_alloc(unsigned long pages, int doacc)
567 unsigned short dir = pages / PTES_PER_PAGE;
568 unsigned short last = pages % PTES_PER_PAGE;
569 pte_t **ret, **ptr;
571 if (pages == 0)
572 return NULL;
574 ret = kmalloc ((dir+1) * sizeof(pte_t *), GFP_KERNEL);
575 if (!ret)
576 goto nomem;
578 for (ptr = ret; ptr < ret+dir ; ptr++)
580 *ptr = (pte_t *)__get_free_page (GFP_KERNEL);
581 if (!*ptr)
582 goto free;
583 init_ptes (*ptr, PTES_PER_PAGE);
586 /* The last one is probably not of PAGE_SIZE: we use kmalloc */
587 if (last) {
588 *ptr = kmalloc (last*sizeof(pte_t), GFP_KERNEL);
589 if (!*ptr)
590 goto free;
591 init_ptes (*ptr, last);
593 if (doacc) {
594 shm_lockall();
595 shm_tot += pages;
596 used_segs++;
597 shm_unlockall();
599 return ret;
601 free:
602 /* The last failed: we decrement first */
603 while (--ptr >= ret)
604 free_page ((unsigned long)*ptr);
606 kfree (ret);
607 nomem:
608 return ERR_PTR(-ENOMEM);
611 static void shm_free(pte_t** dir, unsigned long pages, int doacc)
613 int i, rss, swp;
614 pte_t **ptr = dir+pages/PTES_PER_PAGE;
616 if (!dir)
617 return;
619 for (i = 0, rss = 0, swp = 0; i < pages ; i++) {
620 pte_t pte;
621 pte = dir[i/PTES_PER_PAGE][i%PTES_PER_PAGE];
622 if (pte_none(pte))
623 continue;
624 if (pte_present(pte)) {
625 __free_page (pte_page(pte));
626 rss++;
627 } else {
628 swap_free(pte_to_swp_entry(pte));
629 swp++;
633 /* first the last page */
634 if (pages%PTES_PER_PAGE)
635 kfree (*ptr);
636 /* now the whole pages */
637 while (--ptr >= dir)
638 if (*ptr)
639 free_page ((unsigned long)*ptr);
641 /* Now the indirect block */
642 kfree (dir);
644 if (doacc) {
645 shm_lockall();
646 shm_rss -= rss;
647 shm_swp -= swp;
648 shm_tot -= pages;
649 used_segs--;
650 shm_unlockall();
654 static int shm_setattr (struct dentry *dentry, struct iattr *attr)
656 int error;
657 struct inode *inode = dentry->d_inode;
658 struct shmid_kernel *shp;
659 unsigned long new_pages, old_pages;
660 pte_t **new_dir, **old_dir;
662 error = inode_change_ok(inode, attr);
663 if (error)
664 return error;
665 if (!(attr->ia_valid & ATTR_SIZE))
666 goto set_attr;
667 if (attr->ia_size > shm_ctlmax)
668 return -EFBIG;
670 /* We set old_pages and old_dir for easier cleanup */
671 old_pages = new_pages = (attr->ia_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
672 old_dir = new_dir = shm_alloc(new_pages, 1);
673 if (IS_ERR(new_dir))
674 return PTR_ERR(new_dir);
676 if (!(shp = shm_lock(inode->i_ino)))
677 BUG();
678 error = -ENOSPC;
679 if (shm_tot - shp->shm_npages >= shm_ctlall)
680 goto size_out;
681 error = 0;
682 if (shp->shm_segsz == attr->ia_size)
683 goto size_out;
684 /* Now we set them to the real values */
685 old_dir = shp->shm_dir;
686 old_pages = shp->shm_npages;
687 if (old_dir){
688 pte_t *swap;
689 int i,j;
690 i = old_pages < new_pages ? old_pages : new_pages;
691 j = i % PTES_PER_PAGE;
692 i /= PTES_PER_PAGE;
693 if (j)
694 memcpy (new_dir[i], old_dir[i], j * sizeof (pte_t));
695 while (i--) {
696 swap = new_dir[i];
697 new_dir[i] = old_dir[i];
698 old_dir[i] = swap;
701 shp->shm_dir = new_dir;
702 shp->shm_npages = new_pages;
703 shp->shm_segsz = attr->ia_size;
704 size_out:
705 shm_unlock(inode->i_ino);
706 shm_free (old_dir, old_pages, 1);
708 set_attr:
709 if (!(shp = shm_lock(inode->i_ino)))
710 BUG();
711 if (attr->ia_valid & ATTR_MODE)
712 shp->shm_perm.mode = attr->ia_mode;
713 if (attr->ia_valid & ATTR_UID)
714 shp->shm_perm.uid = attr->ia_uid;
715 if (attr->ia_valid & ATTR_GID)
716 shp->shm_perm.gid = attr->ia_gid;
717 shm_unlock (inode->i_ino);
719 inode_setattr(inode, attr);
720 return error;
723 static struct shmid_kernel *seg_alloc(int numpages, size_t namelen)
725 struct shmid_kernel *shp;
726 pte_t **dir;
728 shp = (struct shmid_kernel *) kmalloc (sizeof (*shp) + namelen, GFP_KERNEL);
729 if (!shp)
730 return ERR_PTR(-ENOMEM);
732 dir = shm_alloc (numpages, namelen);
733 if (IS_ERR(dir)) {
734 kfree(shp);
735 return ERR_PTR(PTR_ERR(dir));
737 shp->shm_dir = dir;
738 shp->shm_npages = numpages;
739 shp->shm_nattch = 0;
740 shp->shm_namelen = namelen;
741 return(shp);
744 static void seg_free(struct shmid_kernel *shp, int doacc)
746 shm_free (shp->shm_dir, shp->shm_npages, doacc);
747 kfree(shp);
750 static int newseg (key_t key, const char *name, int namelen,
751 int shmflg, size_t size)
753 struct shmid_kernel *shp;
754 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
755 int id;
757 if (namelen > SHM_NAME_LEN)
758 return -ENAMETOOLONG;
760 if (size > shm_ctlmax)
761 return -EINVAL;
763 if (shm_tot + numpages >= shm_ctlall)
764 return -ENOSPC;
766 shp = seg_alloc(numpages, namelen ? namelen : SHM_FMT_LEN + 1);
767 if (IS_ERR(shp))
768 return PTR_ERR(shp);
769 id = shm_addid(shp);
770 if(id == -1) {
771 seg_free(shp, 1);
772 return -ENOSPC;
774 shp->shm_perm.key = key;
775 shp->shm_flags = (shmflg & S_IRWXUGO);
776 shp->shm_segsz = size;
777 shp->shm_cprid = current->pid;
778 shp->shm_lprid = 0;
779 shp->shm_atim = shp->shm_dtim = 0;
780 shp->shm_ctim = CURRENT_TIME;
781 shp->id = shm_buildid(id,shp->shm_perm.seq);
782 if (namelen != 0) {
783 shp->shm_namelen = namelen;
784 memcpy (shp->shm_name, name, namelen);
785 } else {
786 shp->shm_flags |= SHM_SYSV;
787 shp->shm_namelen = sprintf (shp->shm_name, SHM_FMT, shp->id);
789 shm_unlock(id);
791 return shp->id;
794 asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
796 struct shmid_kernel *shp;
797 int err, id = 0;
799 if (size < SHMMIN)
800 return -EINVAL;
802 down(&shm_ids.sem);
803 if (key == IPC_PRIVATE) {
804 err = newseg(key, NULL, 0, shmflg, size);
805 } else if ((id = ipc_findkey(&shm_ids,key)) == -1) {
806 if (!(shmflg & IPC_CREAT))
807 err = -ENOENT;
808 else
809 err = newseg(key, NULL, 0, shmflg, size);
810 } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
811 err = -EEXIST;
812 } else {
813 shp = shm_lock(id);
814 if(shp==NULL)
815 BUG();
816 if (shp->shm_segsz < size)
817 err = -EINVAL;
818 else if (ipcperms(&shp->shm_perm, shmflg))
819 err = -EACCES;
820 else
821 err = shm_buildid(id, shp->shm_perm.seq);
822 shm_unlock(id);
824 up(&shm_ids.sem);
825 return err;
828 /* FIXME: maybe we need lock_kernel() here */
829 static void shm_delete (struct inode *ino)
831 int shmid = ino->i_ino;
832 struct shmid_kernel *shp;
834 down(&shm_ids.sem);
835 shp = shm_lock(shmid);
836 if(shp==NULL) {
837 BUG();
839 shp = shm_rmid(shmid);
840 shm_unlock(shmid);
841 up(&shm_ids.sem);
842 seg_free(shp, 1);
843 clear_inode(ino);
846 static inline unsigned long copy_shmid_to_user(void *buf, struct shmid64_ds *in, int version)
848 switch(version) {
849 case IPC_64:
850 return copy_to_user(buf, in, sizeof(*in));
851 case IPC_OLD:
853 struct shmid_ds out;
855 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
856 out.shm_segsz = in->shm_segsz;
857 out.shm_atime = in->shm_atime;
858 out.shm_dtime = in->shm_dtime;
859 out.shm_ctime = in->shm_ctime;
860 out.shm_cpid = in->shm_cpid;
861 out.shm_lpid = in->shm_lpid;
862 out.shm_nattch = in->shm_nattch;
864 return copy_to_user(buf, &out, sizeof(out));
866 default:
867 return -EINVAL;
871 struct shm_setbuf {
872 uid_t uid;
873 gid_t gid;
874 mode_t mode;
877 static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void *buf, int version)
879 switch(version) {
880 case IPC_64:
882 struct shmid64_ds tbuf;
884 if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
885 return -EFAULT;
887 out->uid = tbuf.shm_perm.uid;
888 out->gid = tbuf.shm_perm.gid;
889 out->mode = tbuf.shm_flags;
891 return 0;
893 case IPC_OLD:
895 struct shmid_ds tbuf_old;
897 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
898 return -EFAULT;
900 out->uid = tbuf_old.shm_perm.uid;
901 out->gid = tbuf_old.shm_perm.gid;
902 out->mode = tbuf_old.shm_flags;
904 return 0;
906 default:
907 return -EINVAL;
911 static inline unsigned long copy_shminfo_to_user(void *buf, struct shminfo64 *in, int version)
913 switch(version) {
914 case IPC_64:
915 return copy_to_user(buf, in, sizeof(*in));
916 case IPC_OLD:
918 struct shminfo out;
920 if(in->shmmax > INT_MAX)
921 out.shmmax = INT_MAX;
922 else
923 out.shmmax = (int)in->shmmax;
925 out.shmmin = in->shmmin;
926 out.shmmni = in->shmmni;
927 out.shmseg = in->shmseg;
928 out.shmall = in->shmall;
930 return copy_to_user(buf, &out, sizeof(out));
932 default:
933 return -EINVAL;
937 asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
939 struct shm_setbuf setbuf;
940 struct shmid_kernel *shp;
941 int err, version;
943 if (cmd < 0 || shmid < 0)
944 return -EINVAL;
946 version = ipc_parse_version(&cmd);
948 switch (cmd) { /* replace with proc interface ? */
949 case IPC_INFO:
951 struct shminfo64 shminfo;
953 memset(&shminfo,0,sizeof(shminfo));
954 shminfo.shmmni = shminfo.shmseg = shm_ctlmni;
955 shminfo.shmmax = shm_ctlmax;
956 shminfo.shmall = shm_ctlall;
958 shminfo.shmmin = SHMMIN;
959 if(copy_shminfo_to_user (buf, &shminfo, version))
960 return -EFAULT;
961 /* reading a integer is always atomic */
962 err= shm_ids.max_id;
963 if(err<0)
964 err = 0;
965 return err;
967 case SHM_INFO:
969 struct shm_info shm_info;
971 memset(&shm_info,0,sizeof(shm_info));
972 shm_lockall();
973 shm_info.used_ids = shm_ids.in_use;
974 shm_info.shm_rss = shm_rss;
975 shm_info.shm_tot = shm_tot;
976 shm_info.shm_swp = shm_swp;
977 shm_info.swap_attempts = swap_attempts;
978 shm_info.swap_successes = swap_successes;
979 err = shm_ids.max_id;
980 shm_unlockall();
981 if(copy_to_user (buf, &shm_info, sizeof(shm_info)))
982 return -EFAULT;
984 return err < 0 ? 0 : err;
986 case SHM_STAT:
987 case IPC_STAT:
989 struct shmid64_ds tbuf;
990 int result;
991 if ((shmid % SEQ_MULTIPLIER) == zero_id)
992 return -EINVAL;
993 memset(&tbuf, 0, sizeof(tbuf));
994 shp = shm_lock(shmid);
995 if(shp==NULL)
996 return -EINVAL;
997 if(cmd==SHM_STAT) {
998 err = -EINVAL;
999 if (!(shp->shm_flags & SHM_SYSV) ||
1000 shmid > shm_ids.max_id)
1001 goto out_unlock;
1002 result = shm_buildid(shmid, shp->shm_perm.seq);
1003 } else {
1004 err = shm_checkid(shp,shmid);
1005 if(err)
1006 goto out_unlock;
1007 result = 0;
1009 err=-EACCES;
1010 if (ipcperms (&shp->shm_perm, S_IRUGO))
1011 goto out_unlock;
1012 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
1013 /* ugly hack to keep binary compatibility for ipcs */
1014 tbuf.shm_flags &= PRV_DEST | PRV_LOCKED | S_IRWXUGO;
1015 if (tbuf.shm_flags & PRV_DEST)
1016 tbuf.shm_flags |= SHM_DEST;
1017 if (tbuf.shm_flags & PRV_LOCKED)
1018 tbuf.shm_flags |= SHM_LOCKED;
1019 tbuf.shm_flags &= SHM_DEST | SHM_LOCKED | S_IRWXUGO;
1020 tbuf.shm_segsz = shp->shm_segsz;
1021 tbuf.shm_atime = shp->shm_atim;
1022 tbuf.shm_dtime = shp->shm_dtim;
1023 tbuf.shm_ctime = shp->shm_ctim;
1024 tbuf.shm_cpid = shp->shm_cprid;
1025 tbuf.shm_lpid = shp->shm_lprid;
1026 tbuf.shm_nattch = shp->shm_nattch;
1027 shm_unlock(shmid);
1028 if(copy_shmid_to_user (buf, &tbuf, version))
1029 return -EFAULT;
1030 return result;
1032 case SHM_LOCK:
1033 case SHM_UNLOCK:
1035 /* Allow superuser to lock segment in memory */
1036 /* Should the pages be faulted in here or leave it to user? */
1037 /* need to determine interaction with current->swappable */
1038 if ((shmid % SEQ_MULTIPLIER)== zero_id)
1039 return -EINVAL;
1040 if (!capable(CAP_IPC_LOCK))
1041 return -EPERM;
1043 shp = shm_lock(shmid);
1044 if(shp==NULL)
1045 return -EINVAL;
1046 err = shm_checkid(shp,shmid);
1047 if(err)
1048 goto out_unlock;
1049 if(cmd==SHM_LOCK)
1050 shp->shm_flags |= PRV_LOCKED;
1051 else
1052 shp->shm_flags &= ~PRV_LOCKED;
1053 shm_unlock(shmid);
1054 return err;
1056 case IPC_RMID:
1059 * We cannot simply remove the file. The SVID states
1060 * that the block remains until the last person
1061 * detaches from it, then is deleted. A shmat() on
1062 * an RMID segment is legal in older Linux and if
1063 * we change it apps break...
1065 * Instead we set a destroyed flag, and then blow
1066 * the name away when the usage hits zero.
1068 if ((shmid % SEQ_MULTIPLIER) == zero_id)
1069 return -EINVAL;
1070 down(&shm_ids.sem);
1071 shp = shm_lock(shmid);
1072 if (shp == NULL) {
1073 up(&shm_ids.sem);
1074 return -EINVAL;
1076 err = shm_checkid(shp, shmid);
1077 if (err == 0) {
1078 if (shp->shm_nattch == 0 &&
1079 !(shp->shm_flags & SHM_UNLK)) {
1080 int id=shp->id;
1081 shm_unlock(shmid);
1082 up(&shm_ids.sem);
1084 * We can't hold shm_lock here else we
1085 * will deadlock in shm_lookup when we
1086 * try to recursively grab it.
1088 return shm_remove_name(id);
1090 shp->shm_flags |= PRV_DEST;
1091 /* Do not find it any more */
1092 shp->shm_perm.key = IPC_PRIVATE;
1094 /* Unlock */
1095 shm_unlock(shmid);
1096 up(&shm_ids.sem);
1097 return err;
1100 case IPC_SET:
1102 struct dentry * dentry;
1103 char name[SHM_FMT_LEN+1];
1105 if ((shmid % SEQ_MULTIPLIER)== zero_id)
1106 return -EINVAL;
1108 if(copy_shmid_from_user (&setbuf, buf, version))
1109 return -EFAULT;
1110 down(&shm_ids.sem);
1111 shp = shm_lock(shmid);
1112 err=-EINVAL;
1113 if(shp==NULL)
1114 goto out_up;
1115 err = shm_checkid(shp,shmid);
1116 if(err)
1117 goto out_unlock_up;
1118 err=-EPERM;
1119 if (current->euid != shp->shm_perm.uid &&
1120 current->euid != shp->shm_perm.cuid &&
1121 !capable(CAP_SYS_ADMIN)) {
1122 goto out_unlock_up;
1125 shp->shm_perm.uid = setbuf.uid;
1126 shp->shm_perm.gid = setbuf.gid;
1127 shp->shm_flags = (shp->shm_flags & ~S_IRWXUGO)
1128 | (setbuf.mode & S_IRWXUGO);
1129 shp->shm_ctim = CURRENT_TIME;
1130 shm_unlock(shmid);
1131 up(&shm_ids.sem);
1133 sprintf (name, SHM_FMT, shmid);
1134 dentry = lookup_one(name, lock_parent(shm_sb->s_root));
1135 unlock_dir(shm_sb->s_root);
1136 err = PTR_ERR(dentry);
1137 if (IS_ERR(dentry))
1138 goto bad_dentry;
1139 err = -ENOENT;
1140 if (dentry->d_inode) {
1141 struct inode *ino = dentry->d_inode;
1142 ino->i_uid = setbuf.uid;
1143 ino->i_gid = setbuf.gid;
1144 ino->i_mode = (setbuf.mode & S_IRWXUGO) | (ino->i_mode & ~S_IALLUGO);;
1145 ino->i_atime = ino->i_mtime = ino->i_ctime = CURRENT_TIME;
1146 err = 0;
1148 dput (dentry);
1149 bad_dentry:
1150 return err;
1153 default:
1154 return -EINVAL;
1157 err = 0;
1158 out_unlock_up:
1159 shm_unlock(shmid);
1160 out_up:
1161 up(&shm_ids.sem);
1162 return err;
1163 out_unlock:
1164 shm_unlock(shmid);
1165 return err;
1168 static inline void shm_inc (int id) {
1169 struct shmid_kernel *shp;
1171 if(!(shp = shm_lock(id)))
1172 BUG();
1173 shp->shm_atim = CURRENT_TIME;
1174 shp->shm_lprid = current->pid;
1175 shp->shm_nattch++;
1176 shm_unlock(id);
1179 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
1181 if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
1182 return -EINVAL; /* we cannot do private writable mappings */
1183 UPDATE_ATIME(file->f_dentry->d_inode);
1184 vma->vm_ops = &shm_vm_ops;
1185 shm_inc(file->f_dentry->d_inode->i_ino);
1186 return 0;
1190 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1192 asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
1194 struct shmid_kernel *shp;
1195 unsigned long addr;
1196 struct file * file;
1197 int err;
1198 unsigned long flags;
1199 unsigned long prot;
1200 unsigned long o_flags;
1201 int acc_mode;
1202 struct dentry *dentry;
1203 char name[SHM_FMT_LEN+1];
1205 if (!shm_sb || (shmid % SEQ_MULTIPLIER) == zero_id)
1206 return -EINVAL;
1208 if ((addr = (ulong)shmaddr)) {
1209 if (addr & (SHMLBA-1)) {
1210 if (shmflg & SHM_RND)
1211 addr &= ~(SHMLBA-1); /* round down */
1212 else
1213 return -EINVAL;
1215 flags = MAP_SHARED | MAP_FIXED;
1216 } else
1217 flags = MAP_SHARED;
1219 if (shmflg & SHM_RDONLY) {
1220 prot = PROT_READ;
1221 o_flags = O_RDONLY;
1222 acc_mode = S_IRUGO;
1223 } else {
1224 prot = PROT_READ | PROT_WRITE;
1225 o_flags = O_RDWR;
1226 acc_mode = S_IRUGO | S_IWUGO;
1230 * We cannot rely on the fs check since SYSV IPC does have an
1231 * aditional creator id...
1233 shp = shm_lock(shmid);
1234 if(shp==NULL)
1235 return -EINVAL;
1236 err = ipcperms(&shp->shm_perm, acc_mode);
1237 shm_unlock(shmid);
1238 if (err)
1239 return -EACCES;
1241 sprintf (name, SHM_FMT, shmid);
1243 mntget(shm_fs_type.kern_mnt);
1244 dentry = lookup_one(name, lock_parent(shm_sb->s_root));
1245 unlock_dir(shm_sb->s_root);
1246 err = PTR_ERR(dentry);
1247 if (IS_ERR(dentry))
1248 goto bad_file;
1249 err = -ENOENT;
1250 if (!dentry->d_inode)
1251 goto bad_file;
1252 file = dentry_open(dentry, shm_fs_type.kern_mnt, o_flags);
1253 err = PTR_ERR(file);
1254 if (IS_ERR (file))
1255 goto bad_file1;
1256 down(&current->mm->mmap_sem);
1257 *raddr = do_mmap (file, addr, file->f_dentry->d_inode->i_size,
1258 prot, flags, 0);
1259 up(&current->mm->mmap_sem);
1260 if (IS_ERR(*raddr))
1261 err = PTR_ERR(*raddr);
1262 else
1263 err = 0;
1264 fput (file);
1265 return err;
1267 bad_file1:
1268 dput(dentry);
1269 bad_file:
1270 mntput(shm_fs_type.kern_mnt);
1271 if (err == -ENOENT)
1272 return -EINVAL;
1273 return err;
1276 /* This is called by fork, once for every shm attach. */
1277 static void shm_open (struct vm_area_struct *shmd)
1279 shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
1283 * Remove a name.
1286 static int shm_remove_name(int id)
1288 struct dentry *dir;
1289 struct dentry *dentry;
1290 int error;
1291 char name[SHM_FMT_LEN+1];
1293 sprintf (name, SHM_FMT, id);
1294 dir = lock_parent(shm_sb->s_root);
1295 dentry = lookup_one(name, dir);
1296 error = PTR_ERR(dentry);
1297 if (!IS_ERR(dentry)) {
1299 * We have to do our own unlink to prevent the vfs
1300 * permission check. The SYSV IPC layer has already
1301 * checked the permissions which do not comply to the
1302 * vfs rules.
1304 struct inode *inode = dir->d_inode;
1305 down(&inode->i_zombie);
1306 error = shm_unlink(inode, dentry);
1307 if (!error)
1308 d_delete(dentry);
1309 up(&inode->i_zombie);
1310 dput(dentry);
1312 unlock_dir(dir);
1313 return error;
1317 * remove the attach descriptor shmd.
1318 * free memory for segment if it is marked destroyed.
1319 * The descriptor has already been removed from the current->mm->mmap list
1320 * and will later be kfree()d.
1322 static void shm_close (struct vm_area_struct *shmd)
1324 int id = shmd->vm_file->f_dentry->d_inode->i_ino;
1325 struct shmid_kernel *shp;
1327 /* remove from the list of attaches of the shm segment */
1328 if(!(shp = shm_lock(id)))
1329 BUG();
1330 shp->shm_lprid = current->pid;
1331 shp->shm_dtim = CURRENT_TIME;
1332 shp->shm_nattch--;
1333 if(shp->shm_nattch == 0 &&
1334 shp->shm_flags & PRV_DEST &&
1335 !(shp->shm_flags & SHM_UNLK)) {
1336 int pid=shp->id;
1337 int err;
1338 shm_unlock(id);
1340 /* The kernel lock prevents new attaches from
1341 * being happening. We can't hold shm_lock here
1342 * else we will deadlock in shm_lookup when we
1343 * try to recursively grab it.
1345 err = shm_remove_name(pid);
1346 if(err && err != -EINVAL && err != -ENOENT)
1347 printk(KERN_ERR "Unlink of SHM id %d failed (%d).\n", pid, err);
1349 } else {
1350 shm_unlock(id);
1355 * detach and kill segment if marked destroyed.
1356 * The work is done in shm_close.
1358 asmlinkage long sys_shmdt (char *shmaddr)
1360 struct mm_struct *mm = current->mm;
1361 struct vm_area_struct *shmd, *shmdnext;
1363 down(&mm->mmap_sem);
1364 for (shmd = mm->mmap; shmd; shmd = shmdnext) {
1365 shmdnext = shmd->vm_next;
1366 if (shmd->vm_ops == &shm_vm_ops
1367 && shmd->vm_start - (shmd->vm_pgoff << PAGE_SHIFT) == (ulong) shmaddr)
1368 do_munmap(mm, shmd->vm_start, shmd->vm_end - shmd->vm_start);
1370 up(&mm->mmap_sem);
1371 return 0;
1375 * Enter the shm page into the SHM data structures.
1377 * The way "nopage" is done, we don't actually have to
1378 * do anything here: nopage will have filled in the shm
1379 * data structures already, and shm_swap_out() will just
1380 * work off them..
1382 static int shm_swapout(struct page * page, struct file *file)
1384 return 0;
1388 * page not present ... go through shm_dir
1390 static struct page * shm_nopage_core(struct shmid_kernel *shp, unsigned int idx, int *swp, int *rss, unsigned long address)
1392 pte_t pte;
1393 struct page * page;
1395 if (idx >= shp->shm_npages)
1396 return NOPAGE_SIGBUS;
1398 pte = SHM_ENTRY(shp,idx);
1399 if (!pte_present(pte)) {
1400 /* page not present so shm_swap can't race with us
1401 and the semaphore protects us by other tasks that
1402 could potentially fault on our pte under us */
1403 if (pte_none(pte)) {
1404 shm_unlock(shp->id);
1405 page = page_cache_alloc();
1406 if (!page)
1407 goto oom;
1408 clear_user_highpage(page, address);
1409 if ((shp != shm_lock(shp->id)) && (shp->id != zero_id))
1410 BUG();
1411 } else {
1412 swp_entry_t entry = pte_to_swp_entry(pte);
1414 shm_unlock(shp->id);
1415 page = lookup_swap_cache(entry);
1416 if (!page) {
1417 lock_kernel();
1418 swapin_readahead(entry);
1419 page = read_swap_cache(entry);
1420 unlock_kernel();
1421 if (!page)
1422 goto oom;
1424 delete_from_swap_cache(page);
1425 page = replace_with_highmem(page);
1426 swap_free(entry);
1427 if ((shp != shm_lock(shp->id)) && (shp->id != zero_id))
1428 BUG();
1429 (*swp)--;
1431 (*rss)++;
1432 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
1433 SHM_ENTRY(shp, idx) = pte;
1436 /* pte_val(pte) == SHM_ENTRY (shp, idx) */
1437 page_cache_get(pte_page(pte));
1438 return pte_page(pte);
1440 oom:
1441 shm_lock(shp->id);
1442 return NOPAGE_OOM;
1445 static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
1447 struct page * page;
1448 struct shmid_kernel *shp;
1449 unsigned int idx;
1450 struct inode * inode = shmd->vm_file->f_dentry->d_inode;
1452 idx = (address - shmd->vm_start) >> PAGE_SHIFT;
1453 idx += shmd->vm_pgoff;
1455 down(&inode->i_sem);
1456 if(!(shp = shm_lock(inode->i_ino)))
1457 BUG();
1458 page = shm_nopage_core(shp, idx, &shm_swp, &shm_rss, address);
1459 shm_unlock(inode->i_ino);
1460 up(&inode->i_sem);
1461 return(page);
1464 #define OKAY 0
1465 #define RETRY 1
1466 #define FAILED 2
1468 static int shm_swap_core(struct shmid_kernel *shp, unsigned long idx, swp_entry_t swap_entry, int *counter, struct page **outpage)
1470 pte_t page;
1471 struct page *page_map;
1473 page = SHM_ENTRY(shp, idx);
1474 if (!pte_present(page))
1475 return RETRY;
1476 page_map = pte_page(page);
1477 if (page_map->zone->free_pages > page_map->zone->pages_high)
1478 return RETRY;
1479 if (shp->id != zero_id) swap_attempts++;
1481 if (--*counter < 0) /* failed */
1482 return FAILED;
1483 if (page_count(page_map) != 1)
1484 return RETRY;
1486 lock_page(page_map);
1487 if (!(page_map = prepare_highmem_swapout(page_map)))
1488 return FAILED;
1489 SHM_ENTRY (shp, idx) = swp_entry_to_pte(swap_entry);
1491 /* add the locked page to the swap cache before allowing
1492 the swapin path to run lookup_swap_cache(). This avoids
1493 reading a not yet uptodate block from disk.
1494 NOTE: we just accounted the swap space reference for this
1495 swap cache page at __get_swap_page() time. */
1496 add_to_swap_cache(*outpage = page_map, swap_entry);
1497 return OKAY;
1500 static void shm_swap_postop(struct page *page)
1502 lock_kernel();
1503 rw_swap_page(WRITE, page, 0);
1504 unlock_kernel();
1505 page_cache_release(page);
1508 static int shm_swap_preop(swp_entry_t *swap_entry)
1510 lock_kernel();
1511 /* subtle: preload the swap count for the swap cache. We can't
1512 increase the count inside the critical section as we can't release
1513 the shm_lock there. And we can't acquire the big lock with the
1514 shm_lock held (otherwise we would deadlock too easily). */
1515 *swap_entry = __get_swap_page(2);
1516 if (!(*swap_entry).val) {
1517 unlock_kernel();
1518 return 1;
1520 unlock_kernel();
1521 return 0;
1525 * Goes through counter = (shm_rss / (prio + 1)) present shm pages.
1527 static unsigned long swap_id; /* currently being swapped */
1528 static unsigned long swap_idx; /* next to swap */
1530 int shm_swap (int prio, int gfp_mask)
1532 struct shmid_kernel *shp;
1533 swp_entry_t swap_entry;
1534 unsigned long id, idx;
1535 int loop = 0;
1536 int counter;
1537 struct page * page_map;
1539 zshm_swap(prio, gfp_mask);
1540 counter = shm_rss / (prio + 1);
1541 if (!counter)
1542 return 0;
1543 if (shm_swap_preop(&swap_entry))
1544 return 0;
1546 shm_lockall();
1547 check_id:
1548 shp = shm_get(swap_id);
1549 if(shp==NULL || shp->shm_flags & PRV_LOCKED) {
1550 next_id:
1551 swap_idx = 0;
1552 if (++swap_id > shm_ids.max_id) {
1553 swap_id = 0;
1554 if (loop) {
1555 failed:
1556 shm_unlockall();
1557 __swap_free(swap_entry, 2);
1558 return 0;
1560 loop = 1;
1562 goto check_id;
1564 id = swap_id;
1566 check_table:
1567 idx = swap_idx++;
1568 if (idx >= shp->shm_npages)
1569 goto next_id;
1571 switch (shm_swap_core(shp, idx, swap_entry, &counter, &page_map)) {
1572 case RETRY: goto check_table;
1573 case FAILED: goto failed;
1575 swap_successes++;
1576 shm_swp++;
1577 shm_rss--;
1578 shm_unlockall();
1580 shm_swap_postop(page_map);
1581 return 1;
1585 * Free the swap entry and set the new pte for the shm page.
1587 static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
1588 swp_entry_t entry, struct page *page)
1590 pte_t pte;
1592 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
1593 SHM_ENTRY(shp, idx) = pte;
1594 page_cache_get(page);
1595 shm_rss++;
1597 shm_swp--;
1599 swap_free(entry);
1602 static int shm_unuse_core(struct shmid_kernel *shp, swp_entry_t entry, struct page *page)
1604 int n;
1606 for (n = 0; n < shp->shm_npages; n++) {
1607 if (pte_none(SHM_ENTRY(shp,n)))
1608 continue;
1609 if (pte_present(SHM_ENTRY(shp,n)))
1610 continue;
1611 if (pte_to_swp_entry(SHM_ENTRY(shp,n)).val == entry.val) {
1612 shm_unuse_page(shp, n, entry, page);
1613 return 1;
1616 return 0;
1620 * unuse_shm() search for an eventually swapped out shm page.
1622 void shm_unuse(swp_entry_t entry, struct page *page)
1624 int i;
1626 shm_lockall();
1627 for (i = 0; i <= shm_ids.max_id; i++) {
1628 struct shmid_kernel *shp = shm_get(i);
1629 if(shp==NULL)
1630 continue;
1631 if (shm_unuse_core(shp, entry, page))
1632 goto out;
1634 out:
1635 shm_unlockall();
1636 zmap_unuse(entry, page);
1639 #ifdef CONFIG_PROC_FS
1640 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
1642 off_t pos = 0;
1643 off_t begin = 0;
1644 int i, len = 0;
1646 down(&shm_ids.sem);
1647 len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime name\n");
1649 for(i = 0; i <= shm_ids.max_id; i++) {
1650 struct shmid_kernel* shp;
1652 if (i == zero_id)
1653 continue;
1654 shp = shm_lock(i);
1655 if(shp!=NULL) {
1656 #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s%s\n"
1657 #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s%s\n"
1658 char *format;
1660 if (sizeof(size_t) <= sizeof(int))
1661 format = SMALL_STRING;
1662 else
1663 format = BIG_STRING;
1664 len += sprintf(buffer + len, format,
1665 shp->shm_perm.key,
1666 shm_buildid(i, shp->shm_perm.seq),
1667 shp->shm_flags,
1668 shp->shm_segsz,
1669 shp->shm_cprid,
1670 shp->shm_lprid,
1671 shp->shm_nattch,
1672 shp->shm_perm.uid,
1673 shp->shm_perm.gid,
1674 shp->shm_perm.cuid,
1675 shp->shm_perm.cgid,
1676 shp->shm_atim,
1677 shp->shm_dtim,
1678 shp->shm_ctim,
1679 shp->shm_namelen,
1680 shp->shm_name,
1681 shp->shm_flags & SHM_UNLK ? " (deleted)" : "");
1682 shm_unlock(i);
1684 pos += len;
1685 if(pos < offset) {
1686 len = 0;
1687 begin = pos;
1689 if(pos > offset + length)
1690 goto done;
1693 *eof = 1;
1694 done:
1695 up(&shm_ids.sem);
1696 *start = buffer + (offset - begin);
1697 len -= (offset - begin);
1698 if(len > length)
1699 len = length;
1700 if(len < 0)
1701 len = 0;
1702 return len;
1704 #endif
1706 #define VMA_TO_SHP(vma) ((vma)->vm_file->private_data)
1708 static spinlock_t zmap_list_lock = SPIN_LOCK_UNLOCKED;
1709 static unsigned long zswap_idx; /* next to swap */
1710 static struct shmid_kernel *zswap_shp = &zshmid_kernel;
1711 static int zshm_rss;
1713 static struct vm_operations_struct shmzero_vm_ops = {
1714 open: shmzero_open,
1715 close: shmzero_close,
1716 nopage: shmzero_nopage,
1717 swapout: shm_swapout,
1721 * In this implementation, the "unuse" and "swapout" interfaces are
1722 * interlocked out via the kernel_lock, as well as shm_lock(zero_id).
1723 * "unuse" and "nopage/swapin", as well as "swapout" and "nopage/swapin"
1724 * interlock via shm_lock(zero_id). All these interlocks can be based
1725 * on a per mapping lock instead of being a global lock.
1728 * Reference (existance) counting on the file/dentry/inode is done
1729 * by generic vm_file code. The zero code does not hold any reference
1730 * on the pseudo-file. This is possible because the open/close calls
1731 * are bracketed by the file count update calls.
1733 static struct file *file_setup(struct file *fzero, struct shmid_kernel *shp)
1735 struct file *filp;
1736 struct inode *inp;
1738 if ((filp = get_empty_filp()) == 0)
1739 return(filp);
1740 if ((inp = get_empty_inode()) == 0) {
1741 put_filp(filp);
1742 return(0);
1744 if ((filp->f_dentry = d_alloc(zdent, &(const struct qstr) { "dev/zero",
1745 8, 0 })) == 0) {
1746 iput(inp);
1747 put_filp(filp);
1748 return(0);
1750 filp->f_vfsmnt = mntget(shm_fs_type.kern_mnt);
1751 d_instantiate(filp->f_dentry, inp);
1754 * Copy over dev/ino for benefit of procfs. Use
1755 * ino to indicate seperate mappings.
1757 filp->f_dentry->d_inode->i_dev = shm_fs_type.kern_mnt->mnt_sb->s_dev;
1758 filp->f_dentry->d_inode->i_ino = (unsigned long)shp;
1759 if (fzero)
1760 fput(fzero); /* release /dev/zero file */
1761 return(filp);
1764 int map_zero_setup(struct vm_area_struct *vma)
1766 extern int vm_enough_memory(long pages);
1767 struct shmid_kernel *shp;
1768 struct file *filp;
1770 if (!vm_enough_memory((vma->vm_end - vma->vm_start) >> PAGE_SHIFT))
1771 return -ENOMEM;
1772 if (IS_ERR(shp = seg_alloc((vma->vm_end - vma->vm_start) / PAGE_SIZE, 0)))
1773 return PTR_ERR(shp);
1774 if ((filp = file_setup(vma->vm_file, shp)) == 0) {
1775 seg_free(shp, 0);
1776 return -ENOMEM;
1778 vma->vm_file = filp;
1779 VMA_TO_SHP(vma) = (void *)shp;
1780 shp->id = zero_id;
1781 init_MUTEX(&shp->zsem);
1782 vma->vm_ops = &shmzero_vm_ops;
1783 shmzero_open(vma);
1784 spin_lock(&zmap_list_lock);
1785 list_add(&shp->zero_list, &zshmid_kernel.zero_list);
1786 spin_unlock(&zmap_list_lock);
1787 return 0;
1790 static void shmzero_open(struct vm_area_struct *shmd)
1792 struct shmid_kernel *shp;
1794 shp = VMA_TO_SHP(shmd);
1795 down(&shp->zsem);
1796 shp->shm_nattch++;
1797 up(&shp->zsem);
1800 static void shmzero_close(struct vm_area_struct *shmd)
1802 int done = 0;
1803 struct shmid_kernel *shp;
1805 shp = VMA_TO_SHP(shmd);
1806 down(&shp->zsem);
1807 if (--shp->shm_nattch == 0)
1808 done = 1;
1809 up(&shp->zsem);
1810 if (done) {
1811 spin_lock(&zmap_list_lock);
1812 if (shp == zswap_shp)
1813 zswap_shp = list_entry(zswap_shp->zero_list.next,
1814 struct shmid_kernel, zero_list);
1815 list_del(&shp->zero_list);
1816 spin_unlock(&zmap_list_lock);
1817 seg_free(shp, 0);
1821 static struct page * shmzero_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
1823 struct page *page;
1824 struct shmid_kernel *shp;
1825 unsigned int idx;
1826 int dummy;
1828 idx = (address - shmd->vm_start) >> PAGE_SHIFT;
1829 idx += shmd->vm_pgoff;
1831 shp = VMA_TO_SHP(shmd);
1832 down(&shp->zsem);
1833 shm_lock(zero_id);
1834 page = shm_nopage_core(shp, idx, &dummy, &zshm_rss, address);
1835 shm_unlock(zero_id);
1836 up(&shp->zsem);
1837 return(page);
1840 static void zmap_unuse(swp_entry_t entry, struct page *page)
1842 struct shmid_kernel *shp;
1844 spin_lock(&zmap_list_lock);
1845 shm_lock(zero_id);
1846 for (shp = list_entry(zshmid_kernel.zero_list.next, struct shmid_kernel,
1847 zero_list); shp != &zshmid_kernel;
1848 shp = list_entry(shp->zero_list.next, struct shmid_kernel,
1849 zero_list)) {
1850 if (shm_unuse_core(shp, entry, page))
1851 break;
1853 shm_unlock(zero_id);
1854 spin_unlock(&zmap_list_lock);
1857 static void zshm_swap (int prio, int gfp_mask)
1859 struct shmid_kernel *shp;
1860 swp_entry_t swap_entry;
1861 unsigned long idx;
1862 int loop = 0;
1863 int counter;
1864 struct page * page_map;
1866 counter = zshm_rss / (prio + 1);
1867 if (!counter)
1868 return;
1869 next:
1870 if (shm_swap_preop(&swap_entry))
1871 return;
1873 spin_lock(&zmap_list_lock);
1874 shm_lock(zero_id);
1875 if (zshmid_kernel.zero_list.next == 0)
1876 goto failed;
1877 next_id:
1878 if (zswap_shp == &zshmid_kernel) {
1879 if (loop) {
1880 failed:
1881 shm_unlock(zero_id);
1882 spin_unlock(&zmap_list_lock);
1883 __swap_free(swap_entry, 2);
1884 return;
1886 zswap_shp = list_entry(zshmid_kernel.zero_list.next,
1887 struct shmid_kernel, zero_list);
1888 zswap_idx = 0;
1889 loop = 1;
1891 shp = zswap_shp;
1893 check_table:
1894 idx = zswap_idx++;
1895 if (idx >= shp->shm_npages) {
1896 zswap_shp = list_entry(zswap_shp->zero_list.next,
1897 struct shmid_kernel, zero_list);
1898 zswap_idx = 0;
1899 goto next_id;
1902 switch (shm_swap_core(shp, idx, swap_entry, &counter, &page_map)) {
1903 case RETRY: goto check_table;
1904 case FAILED: goto failed;
1906 shm_unlock(zero_id);
1907 spin_unlock(&zmap_list_lock);
1909 shm_swap_postop(page_map);
1910 if (counter)
1911 goto next;
1912 return;