Import 2.4.0-test2pre7
[davej-history.git] / ipc / shm.c
blobf1b638acfda1086736df5adce53434b80c3c5347
1 /*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * avoid vmalloc and make shmmax, shmall, shmmni sysctl'able,
13 * Christoph Rohland <hans-christoph.rohland@sap.com>
14 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
15 * make it a file system, Christoph Rohland <hans-christoph.rohland@sap.com>
17 * The filesystem has the following restrictions/bugs:
18 * 1) It only can handle one directory.
19 * 2) Private writeable mappings are not supported
20 * 3) Read and write are not implemented (should they?)
21 * 4) No special nodes are supported
23 * There are the following mount options:
24 * - nr_blocks (^= shmall) is the number of blocks of size PAGE_SIZE
25 * we are allowed to allocate
26 * - nr_inodes (^= shmmni) is the number of files we are allowed to
27 * allocate
28 * - mode is the mode for the root directory (default S_IRWXUGO | S_ISVTX)
31 #include <linux/config.h>
32 #include <linux/module.h>
33 #include <linux/malloc.h>
34 #include <linux/shm.h>
35 #include <linux/swap.h>
36 #include <linux/smp_lock.h>
37 #include <linux/init.h>
38 #include <linux/locks.h>
39 #include <linux/file.h>
40 #include <linux/mman.h>
41 #include <linux/vmalloc.h>
42 #include <linux/pagemap.h>
43 #include <linux/proc_fs.h>
44 #include <linux/highmem.h>
46 #include <asm/uaccess.h>
47 #include <asm/pgtable.h>
49 #include "util.h"
51 static struct super_block *shm_read_super(struct super_block *,void *, int);
52 static void shm_put_super (struct super_block *);
53 static int shm_remount_fs (struct super_block *, int *, char *);
54 static void shm_read_inode (struct inode *);
55 static void shm_write_inode(struct inode *);
56 static int shm_statfs (struct super_block *, struct statfs *);
57 static int shm_create (struct inode *,struct dentry *,int);
58 static struct dentry *shm_lookup (struct inode *,struct dentry *);
59 static int shm_unlink (struct inode *,struct dentry *);
60 static int shm_setattr (struct dentry *dent, struct iattr *attr);
61 static void shm_delete (struct inode *);
62 static int shm_mmap (struct file *, struct vm_area_struct *);
63 static int shm_readdir (struct file *, void *, filldir_t);
65 #define SHM_NAME_LEN NAME_MAX
66 #define SHM_FMT ".IPC_%08x"
67 #define SHM_FMT_LEN 13
69 /* shm_mode upper byte flags */
70 /* SHM_DEST and SHM_LOCKED are used in ipcs(8) */
71 #define PRV_DEST 0010000 /* segment will be destroyed on last detach */
72 #define PRV_LOCKED 0020000 /* segment will not be swapped */
73 #define SHM_UNLK 0040000 /* filename is unlinked */
74 #define SHM_SYSV 0100000 /* It is a SYSV shm segment */
76 struct shmid_kernel /* private to the kernel */
78 struct kern_ipc_perm shm_perm;
79 size_t shm_segsz;
80 unsigned long shm_nattch;
81 unsigned long shm_npages; /* size of segment (pages) */
82 pte_t **shm_dir; /* ptr to arr of ptrs to frames */
83 int id;
84 union permap {
85 struct shmem {
86 time_t atime;
87 time_t dtime;
88 time_t ctime;
89 pid_t cpid;
90 pid_t lpid;
91 int nlen;
92 char nm[0];
93 } shmem;
94 struct zero {
95 struct semaphore sema;
96 struct list_head list;
97 } zero;
98 } permap;
101 #define shm_atim permap.shmem.atime
102 #define shm_dtim permap.shmem.dtime
103 #define shm_ctim permap.shmem.ctime
104 #define shm_cprid permap.shmem.cpid
105 #define shm_lprid permap.shmem.lpid
106 #define shm_namelen permap.shmem.nlen
107 #define shm_name permap.shmem.nm
108 #define shm_flags shm_perm.mode
109 #define zsem permap.zero.sema
110 #define zero_list permap.zero.list
112 static struct ipc_ids shm_ids;
114 #define shm_lock(id) ((struct shmid_kernel*)ipc_lock(&shm_ids,id))
115 #define shm_unlock(id) ipc_unlock(&shm_ids,id)
116 #define shm_lockall() ipc_lockall(&shm_ids)
117 #define shm_unlockall() ipc_unlockall(&shm_ids)
118 #define shm_get(id) ((struct shmid_kernel*)ipc_get(&shm_ids,id))
119 #define shm_buildid(id, seq) \
120 ipc_buildid(&shm_ids, id, seq)
122 static int newseg (key_t key, const char *name, int namelen, int shmflg, size_t size);
123 static void seg_free(struct shmid_kernel *shp, int doacc);
124 static void shm_open (struct vm_area_struct *shmd);
125 static void shm_close (struct vm_area_struct *shmd);
126 static int shm_remove_name(int id);
127 static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int);
128 static int shm_swapout(struct page *, struct file *);
129 #ifdef CONFIG_PROC_FS
130 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
131 #endif
133 static void zshm_swap (int prio, int gfp_mask);
134 static void zmap_unuse(swp_entry_t entry, struct page *page);
135 static void shmzero_open(struct vm_area_struct *shmd);
136 static void shmzero_close(struct vm_area_struct *shmd);
137 static struct page *shmzero_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share);
138 static int zero_id;
139 static struct shmid_kernel zshmid_kernel;
140 static struct dentry *zdent;
142 #define SHM_FS_MAGIC 0x02011994
144 static struct super_block * shm_sb;
146 static DECLARE_FSTYPE(shm_fs_type, "shm", shm_read_super, FS_SINGLE);
148 static struct super_operations shm_sops = {
149 read_inode: shm_read_inode,
150 write_inode: shm_write_inode,
151 delete_inode: shm_delete,
152 put_super: shm_put_super,
153 statfs: shm_statfs,
154 remount_fs: shm_remount_fs,
157 static struct file_operations shm_root_operations = {
158 readdir: shm_readdir,
161 static struct inode_operations shm_root_inode_operations = {
162 create: shm_create,
163 lookup: shm_lookup,
164 unlink: shm_unlink,
167 static struct file_operations shm_file_operations = {
168 mmap: shm_mmap,
171 static struct inode_operations shm_inode_operations = {
172 setattr: shm_setattr,
175 static struct vm_operations_struct shm_vm_ops = {
176 open: shm_open, /* callback for a new vm-area open */
177 close: shm_close, /* callback for when the vm-area is released */
178 nopage: shm_nopage,
179 swapout:shm_swapout,
182 size_t shm_ctlmax = SHMMAX;
184 /* These parameters should be part of the superblock */
185 static int shm_ctlall;
186 static int shm_ctlmni;
187 static int shm_mode;
189 static int shm_tot; /* total number of shared memory pages */
190 static int shm_rss; /* number of shared memory pages that are in memory */
191 static int shm_swp; /* number of shared memory pages that are in swap */
193 /* locks order:
194 pagecache_lock
195 shm_lock()/shm_lockall()
196 kernel lock
197 inode->i_sem
198 sem_ids.sem
199 mmap_sem
201 SMP assumptions:
202 - swap_free() never sleeps
203 - add_to_swap_cache() never sleeps
204 - add_to_swap_cache() doesn't acquire the big kernel lock.
205 - shm_unuse() is called with the kernel lock acquired.
208 /* some statistics */
209 static ulong swap_attempts;
210 static ulong swap_successes;
211 static ulong used_segs;
213 void __init shm_init (void)
215 struct vfsmount *res;
216 ipc_init_ids(&shm_ids, 1);
218 register_filesystem (&shm_fs_type);
219 res = kern_mount(&shm_fs_type);
220 if (IS_ERR(res)) {
221 unregister_filesystem(&shm_fs_type);
222 return;
224 #ifdef CONFIG_PROC_FS
225 create_proc_read_entry("sysvipc/shm", 0, 0, sysvipc_shm_read_proc, NULL);
226 #endif
227 zero_id = ipc_addid(&shm_ids, &zshmid_kernel.shm_perm, 1);
228 shm_unlock(zero_id);
229 INIT_LIST_HEAD(&zshmid_kernel.zero_list);
230 zdent = d_alloc_root(get_empty_inode());
231 return;
234 static int shm_parse_options(char *options)
236 int blocks = shm_ctlall;
237 int inodes = shm_ctlmni;
238 umode_t mode = shm_mode;
239 char *this_char, *value;
241 this_char = NULL;
242 if ( options )
243 this_char = strtok(options,",");
244 for ( ; this_char; this_char = strtok(NULL,",")) {
245 if ((value = strchr(this_char,'=')) != NULL)
246 *value++ = 0;
247 if (!strcmp(this_char,"nr_blocks")) {
248 if (!value || !*value)
249 return 1;
250 blocks = simple_strtoul(value,&value,0);
251 if (*value)
252 return 1;
254 else if (!strcmp(this_char,"nr_inodes")) {
255 if (!value || !*value)
256 return 1;
257 inodes = simple_strtoul(value,&value,0);
258 if (*value)
259 return 1;
261 else if (!strcmp(this_char,"mode")) {
262 if (!value || !*value)
263 return 1;
264 mode = simple_strtoul(value,&value,8);
265 if (*value)
266 return 1;
268 else
269 return 1;
271 shm_ctlmni = inodes;
272 shm_ctlall = blocks;
273 shm_mode = mode;
275 return 0;
278 static struct super_block *shm_read_super(struct super_block *s,void *data,
279 int silent)
281 struct inode * root_inode;
283 shm_ctlall = SHMALL;
284 shm_ctlmni = SHMMNI;
285 shm_mode = S_IRWXUGO | S_ISVTX;
286 if (shm_parse_options (data)) {
287 printk(KERN_ERR "shm fs invalid option\n");
288 goto out_unlock;
291 s->s_blocksize = PAGE_SIZE;
292 s->s_blocksize_bits = PAGE_SHIFT;
293 s->s_magic = SHM_FS_MAGIC;
294 s->s_op = &shm_sops;
295 root_inode = iget (s, SEQ_MULTIPLIER);
296 if (!root_inode)
297 goto out_no_root;
298 root_inode->i_op = &shm_root_inode_operations;
299 root_inode->i_sb = s;
300 root_inode->i_nlink = 2;
301 root_inode->i_mode = S_IFDIR | shm_mode;
302 s->s_root = d_alloc_root(root_inode);
303 if (!s->s_root)
304 goto out_no_root;
305 shm_sb = s;
306 return s;
308 out_no_root:
309 printk(KERN_ERR "proc_read_super: get root inode failed\n");
310 iput(root_inode);
311 out_unlock:
312 return NULL;
315 static int shm_remount_fs (struct super_block *sb, int *flags, char *data)
317 if (shm_parse_options (data))
318 return -EINVAL;
319 return 0;
322 static inline int shm_checkid(struct shmid_kernel *s, int id)
324 if (!(s->shm_flags & SHM_SYSV))
325 return -EINVAL;
326 if (ipc_checkid(&shm_ids,&s->shm_perm,id))
327 return -EIDRM;
328 return 0;
331 static inline struct shmid_kernel *shm_rmid(int id)
333 return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
336 static inline int shm_addid(struct shmid_kernel *shp)
338 return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni+1);
341 static void shm_put_super(struct super_block *sb)
343 int i;
344 struct shmid_kernel *shp;
346 down(&shm_ids.sem);
347 for(i = 0; i <= shm_ids.max_id; i++) {
348 if (i == zero_id)
349 continue;
350 if (!(shp = shm_lock (i)))
351 continue;
352 if (shp->shm_nattch)
353 printk(KERN_DEBUG "shm_nattch = %ld\n", shp->shm_nattch);
354 shp = shm_rmid(i);
355 shm_unlock(i);
356 seg_free(shp, 1);
358 dput (sb->s_root);
359 up(&shm_ids.sem);
362 static int shm_statfs(struct super_block *sb, struct statfs *buf)
364 buf->f_type = SHM_FS_MAGIC;
365 buf->f_bsize = PAGE_SIZE;
366 buf->f_blocks = shm_ctlall;
367 buf->f_bavail = buf->f_bfree = shm_ctlall - shm_tot;
368 buf->f_files = shm_ctlmni;
369 buf->f_ffree = shm_ctlmni - used_segs;
370 buf->f_namelen = SHM_NAME_LEN;
371 return 0;
374 static void shm_write_inode(struct inode * inode)
378 static void shm_read_inode(struct inode * inode)
380 int id;
381 struct shmid_kernel *shp;
383 id = inode->i_ino;
384 inode->i_op = NULL;
385 inode->i_mode = 0;
386 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
388 if (id < SEQ_MULTIPLIER) {
389 if (!(shp = shm_lock (id)))
390 return;
391 inode->i_mode = (shp->shm_flags & S_IALLUGO) | S_IFREG;
392 inode->i_uid = shp->shm_perm.uid;
393 inode->i_gid = shp->shm_perm.gid;
394 inode->i_size = shp->shm_segsz;
395 shm_unlock (id);
396 inode->i_op = &shm_inode_operations;
397 inode->i_fop = &shm_file_operations;
398 return;
400 inode->i_op = &shm_root_inode_operations;
401 inode->i_fop = &shm_root_operations;
402 inode->i_sb = shm_sb;
403 inode->i_nlink = 2;
404 inode->i_mode = S_IFDIR | shm_mode;
405 inode->i_uid = inode->i_gid = 0;
409 static int shm_create (struct inode *dir, struct dentry *dent, int mode)
411 int id, err;
412 struct inode * inode;
414 down(&shm_ids.sem);
415 err = id = newseg (IPC_PRIVATE, dent->d_name.name, dent->d_name.len, mode, 0);
416 if (err < 0)
417 goto out;
419 err = -ENOMEM;
420 inode = iget (shm_sb, id % SEQ_MULTIPLIER);
421 if (!inode)
422 goto out;
424 err = 0;
425 down (&inode->i_sem);
426 inode->i_mode = mode | S_IFREG;
427 inode->i_op = &shm_inode_operations;
428 d_instantiate(dent, inode);
429 up (&inode->i_sem);
431 out:
432 up(&shm_ids.sem);
433 return err;
436 static int shm_readdir (struct file *filp, void *dirent, filldir_t filldir)
438 struct inode * inode = filp->f_dentry->d_inode;
439 struct shmid_kernel *shp;
440 off_t nr;
442 nr = filp->f_pos;
444 switch(nr)
446 case 0:
447 if (filldir(dirent, ".", 1, nr, inode->i_ino) < 0)
448 return 0;
449 filp->f_pos = ++nr;
450 /* fall through */
451 case 1:
452 if (filldir(dirent, "..", 2, nr, inode->i_ino) < 0)
453 return 0;
454 filp->f_pos = ++nr;
455 /* fall through */
456 default:
457 down(&shm_ids.sem);
458 for (; nr-2 <= shm_ids.max_id; nr++ ) {
459 if (nr-2 == zero_id)
460 continue;
461 if (!(shp = shm_get (nr-2)))
462 continue;
463 if (shp->shm_flags & SHM_UNLK)
464 continue;
465 if (filldir(dirent, shp->shm_name, shp->shm_namelen, nr, nr) < 0 )
466 break;;
468 filp->f_pos = nr;
469 up(&shm_ids.sem);
470 break;
473 UPDATE_ATIME(inode);
474 return 0;
477 static struct dentry *shm_lookup (struct inode *dir, struct dentry *dent)
479 int i, err = 0;
480 struct shmid_kernel* shp;
481 struct inode *inode = NULL;
483 if (dent->d_name.len > SHM_NAME_LEN)
484 return ERR_PTR(-ENAMETOOLONG);
486 down(&shm_ids.sem);
487 for(i = 0; i <= shm_ids.max_id; i++) {
488 if (i == zero_id)
489 continue;
490 if (!(shp = shm_lock(i)))
491 continue;
492 if (!(shp->shm_flags & SHM_UNLK) &&
493 dent->d_name.len == shp->shm_namelen &&
494 strncmp(dent->d_name.name, shp->shm_name, shp->shm_namelen) == 0)
495 goto found;
496 shm_unlock(i);
500 * prevent the reserved names as negative dentries.
501 * This also prevents object creation through the filesystem
503 if (dent->d_name.len == SHM_FMT_LEN &&
504 memcmp (SHM_FMT, dent->d_name.name, SHM_FMT_LEN - 8) == 0)
505 err = -EINVAL; /* EINVAL to give IPC_RMID the right error */
507 goto out;
509 found:
510 shm_unlock(i);
511 inode = iget(dir->i_sb, i);
513 if (!inode)
514 err = -EACCES;
515 out:
516 if (err == 0)
517 d_add (dent, inode);
518 up (&shm_ids.sem);
519 return ERR_PTR(err);
522 static int shm_unlink (struct inode *dir, struct dentry *dent)
524 struct inode * inode = dent->d_inode;
525 struct shmid_kernel *shp;
527 down (&shm_ids.sem);
528 if (!(shp = shm_lock (inode->i_ino)))
529 BUG();
530 shp->shm_flags |= SHM_UNLK | PRV_DEST;
531 shp->shm_perm.key = IPC_PRIVATE; /* Do not find it any more */
532 shm_unlock (inode->i_ino);
533 up (&shm_ids.sem);
534 inode->i_nlink -= 1;
536 * If it's a reserved name we have to drop the dentry instead
537 * of creating a negative dentry
539 if (dent->d_name.len == SHM_FMT_LEN &&
540 memcmp (SHM_FMT, dent->d_name.name, SHM_FMT_LEN - 8) == 0)
541 d_drop (dent);
542 return 0;
546 * We cannot use kmalloc for shm_alloc since this restricts the
547 * maximum size of the segments.
549 * We also cannot use vmalloc, since this uses too much of the vmalloc
550 * space and we run out of this on highend machines.
552 * So we have to use this complicated indirect scheme to alloc the shm
553 * page tables.
557 #ifdef PTE_INIT
558 static inline void init_ptes (pte_t *pte, int number) {
559 while (number--)
560 PTE_INIT (pte++);
562 #else
563 static inline void init_ptes (pte_t *pte, int number) {
564 memset (pte, 0, number*sizeof(*pte));
566 #endif
568 #define PTES_PER_PAGE (PAGE_SIZE/sizeof(pte_t))
569 #define SHM_ENTRY(shp, index) (shp)->shm_dir[(index)/PTES_PER_PAGE][(index)%PTES_PER_PAGE]
571 static pte_t **shm_alloc(unsigned long pages, int doacc)
573 unsigned short dir = pages / PTES_PER_PAGE;
574 unsigned short last = pages % PTES_PER_PAGE;
575 pte_t **ret, **ptr;
577 if (pages == 0)
578 return NULL;
580 ret = kmalloc ((dir+1) * sizeof(pte_t *), GFP_KERNEL);
581 if (!ret)
582 goto nomem;
584 for (ptr = ret; ptr < ret+dir ; ptr++)
586 *ptr = (pte_t *)__get_free_page (GFP_KERNEL);
587 if (!*ptr)
588 goto free;
589 init_ptes (*ptr, PTES_PER_PAGE);
592 /* The last one is probably not of PAGE_SIZE: we use kmalloc */
593 if (last) {
594 *ptr = kmalloc (last*sizeof(pte_t), GFP_KERNEL);
595 if (!*ptr)
596 goto free;
597 init_ptes (*ptr, last);
599 if (doacc) {
600 shm_lockall();
601 shm_tot += pages;
602 used_segs++;
603 shm_unlockall();
605 return ret;
607 free:
608 /* The last failed: we decrement first */
609 while (--ptr >= ret)
610 free_page ((unsigned long)*ptr);
612 kfree (ret);
613 nomem:
614 return ERR_PTR(-ENOMEM);
617 static void shm_free(pte_t** dir, unsigned long pages, int doacc)
619 int i, rss, swp;
620 pte_t **ptr = dir+pages/PTES_PER_PAGE;
622 if (!dir)
623 return;
625 for (i = 0, rss = 0, swp = 0; i < pages ; i++) {
626 pte_t pte;
627 pte = dir[i/PTES_PER_PAGE][i%PTES_PER_PAGE];
628 if (pte_none(pte))
629 continue;
630 if (pte_present(pte)) {
631 __free_page (pte_page(pte));
632 rss++;
633 } else {
634 swap_free(pte_to_swp_entry(pte));
635 swp++;
639 /* first the last page */
640 if (pages%PTES_PER_PAGE)
641 kfree (*ptr);
642 /* now the whole pages */
643 while (--ptr >= dir)
644 if (*ptr)
645 free_page ((unsigned long)*ptr);
647 /* Now the indirect block */
648 kfree (dir);
650 if (doacc) {
651 shm_lockall();
652 shm_rss -= rss;
653 shm_swp -= swp;
654 shm_tot -= pages;
655 used_segs--;
656 shm_unlockall();
660 static int shm_setattr (struct dentry *dentry, struct iattr *attr)
662 int error;
663 struct inode *inode = dentry->d_inode;
664 struct shmid_kernel *shp;
665 unsigned long new_pages, old_pages;
666 pte_t **new_dir, **old_dir;
668 error = inode_change_ok(inode, attr);
669 if (error)
670 return error;
671 if (!(attr->ia_valid & ATTR_SIZE))
672 goto set_attr;
673 if (attr->ia_size > shm_ctlmax)
674 return -EFBIG;
676 /* We set old_pages and old_dir for easier cleanup */
677 old_pages = new_pages = (attr->ia_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
678 old_dir = new_dir = shm_alloc(new_pages, 1);
679 if (IS_ERR(new_dir))
680 return PTR_ERR(new_dir);
682 if (!(shp = shm_lock(inode->i_ino)))
683 BUG();
684 error = -ENOSPC;
685 if (shm_tot - shp->shm_npages >= shm_ctlall)
686 goto size_out;
687 error = 0;
688 if (shp->shm_segsz == attr->ia_size)
689 goto size_out;
690 /* Now we set them to the real values */
691 old_dir = shp->shm_dir;
692 old_pages = shp->shm_npages;
693 if (old_dir){
694 pte_t *swap;
695 int i,j;
696 i = old_pages < new_pages ? old_pages : new_pages;
697 j = i % PTES_PER_PAGE;
698 i /= PTES_PER_PAGE;
699 if (j)
700 memcpy (new_dir[i], old_dir[i], j * sizeof (pte_t));
701 while (i--) {
702 swap = new_dir[i];
703 new_dir[i] = old_dir[i];
704 old_dir[i] = swap;
707 shp->shm_dir = new_dir;
708 shp->shm_npages = new_pages;
709 shp->shm_segsz = attr->ia_size;
710 size_out:
711 shm_unlock(inode->i_ino);
712 shm_free (old_dir, old_pages, 1);
714 set_attr:
715 if (!(shp = shm_lock(inode->i_ino)))
716 BUG();
717 if (attr->ia_valid & ATTR_MODE)
718 shp->shm_perm.mode = attr->ia_mode;
719 if (attr->ia_valid & ATTR_UID)
720 shp->shm_perm.uid = attr->ia_uid;
721 if (attr->ia_valid & ATTR_GID)
722 shp->shm_perm.gid = attr->ia_gid;
723 shm_unlock (inode->i_ino);
725 inode_setattr(inode, attr);
726 return error;
729 static struct shmid_kernel *seg_alloc(int numpages, size_t namelen)
731 struct shmid_kernel *shp;
732 pte_t **dir;
734 shp = (struct shmid_kernel *) kmalloc (sizeof (*shp) + namelen, GFP_KERNEL);
735 if (!shp)
736 return ERR_PTR(-ENOMEM);
738 dir = shm_alloc (numpages, namelen);
739 if (IS_ERR(dir)) {
740 kfree(shp);
741 return ERR_PTR(PTR_ERR(dir));
743 shp->shm_dir = dir;
744 shp->shm_npages = numpages;
745 shp->shm_nattch = 0;
746 shp->shm_namelen = namelen;
747 return(shp);
750 static void seg_free(struct shmid_kernel *shp, int doacc)
752 shm_free (shp->shm_dir, shp->shm_npages, doacc);
753 kfree(shp);
756 static int newseg (key_t key, const char *name, int namelen,
757 int shmflg, size_t size)
759 struct shmid_kernel *shp;
760 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
761 int id;
763 if (namelen > SHM_NAME_LEN)
764 return -ENAMETOOLONG;
766 if (size > shm_ctlmax)
767 return -EINVAL;
769 if (shm_tot + numpages >= shm_ctlall)
770 return -ENOSPC;
772 shp = seg_alloc(numpages, namelen ? namelen : SHM_FMT_LEN + 1);
773 if (IS_ERR(shp))
774 return PTR_ERR(shp);
775 id = shm_addid(shp);
776 if(id == -1) {
777 seg_free(shp, 1);
778 return -ENOSPC;
780 shp->shm_perm.key = key;
781 shp->shm_flags = (shmflg & S_IRWXUGO);
782 shp->shm_segsz = size;
783 shp->shm_cprid = current->pid;
784 shp->shm_lprid = 0;
785 shp->shm_atim = shp->shm_dtim = 0;
786 shp->shm_ctim = CURRENT_TIME;
787 shp->id = shm_buildid(id,shp->shm_perm.seq);
788 if (namelen != 0) {
789 shp->shm_namelen = namelen;
790 memcpy (shp->shm_name, name, namelen);
791 } else {
792 shp->shm_flags |= SHM_SYSV;
793 shp->shm_namelen = sprintf (shp->shm_name, SHM_FMT, shp->id);
795 shm_unlock(id);
797 return shp->id;
800 asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
802 struct shmid_kernel *shp;
803 int err, id = 0;
805 if (size < SHMMIN)
806 return -EINVAL;
808 down(&shm_ids.sem);
809 if (key == IPC_PRIVATE) {
810 err = newseg(key, NULL, 0, shmflg, size);
811 } else if ((id = ipc_findkey(&shm_ids,key)) == -1) {
812 if (!(shmflg & IPC_CREAT))
813 err = -ENOENT;
814 else
815 err = newseg(key, NULL, 0, shmflg, size);
816 } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
817 err = -EEXIST;
818 } else {
819 shp = shm_lock(id);
820 if(shp==NULL)
821 BUG();
822 if (shp->shm_segsz < size)
823 err = -EINVAL;
824 else if (ipcperms(&shp->shm_perm, shmflg))
825 err = -EACCES;
826 else
827 err = shm_buildid(id, shp->shm_perm.seq);
828 shm_unlock(id);
830 up(&shm_ids.sem);
831 return err;
834 static void shm_delete (struct inode *ino)
836 int shmid = ino->i_ino;
837 struct shmid_kernel *shp;
839 down(&shm_ids.sem);
840 shp = shm_lock(shmid);
841 if(shp==NULL) {
842 BUG();
844 shp = shm_rmid(shmid);
845 shm_unlock(shmid);
846 up(&shm_ids.sem);
847 seg_free(shp, 1);
848 clear_inode(ino);
851 static inline unsigned long copy_shmid_to_user(void *buf, struct shmid64_ds *in, int version)
853 switch(version) {
854 case IPC_64:
855 return copy_to_user(buf, in, sizeof(*in));
856 case IPC_OLD:
858 struct shmid_ds out;
860 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
861 out.shm_segsz = in->shm_segsz;
862 out.shm_atime = in->shm_atime;
863 out.shm_dtime = in->shm_dtime;
864 out.shm_ctime = in->shm_ctime;
865 out.shm_cpid = in->shm_cpid;
866 out.shm_lpid = in->shm_lpid;
867 out.shm_nattch = in->shm_nattch;
869 return copy_to_user(buf, &out, sizeof(out));
871 default:
872 return -EINVAL;
876 struct shm_setbuf {
877 uid_t uid;
878 gid_t gid;
879 mode_t mode;
882 static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void *buf, int version)
884 switch(version) {
885 case IPC_64:
887 struct shmid64_ds tbuf;
889 if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
890 return -EFAULT;
892 out->uid = tbuf.shm_perm.uid;
893 out->gid = tbuf.shm_perm.gid;
894 out->mode = tbuf.shm_flags;
896 return 0;
898 case IPC_OLD:
900 struct shmid_ds tbuf_old;
902 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
903 return -EFAULT;
905 out->uid = tbuf_old.shm_perm.uid;
906 out->gid = tbuf_old.shm_perm.gid;
907 out->mode = tbuf_old.shm_flags;
909 return 0;
911 default:
912 return -EINVAL;
916 static inline unsigned long copy_shminfo_to_user(void *buf, struct shminfo64 *in, int version)
918 switch(version) {
919 case IPC_64:
920 return copy_to_user(buf, in, sizeof(*in));
921 case IPC_OLD:
923 struct shminfo out;
925 if(in->shmmax > INT_MAX)
926 out.shmmax = INT_MAX;
927 else
928 out.shmmax = (int)in->shmmax;
930 out.shmmin = in->shmmin;
931 out.shmmni = in->shmmni;
932 out.shmseg = in->shmseg;
933 out.shmall = in->shmall;
935 return copy_to_user(buf, &out, sizeof(out));
937 default:
938 return -EINVAL;
942 asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
944 struct shm_setbuf setbuf;
945 struct shmid_kernel *shp;
946 int err, version;
948 if (cmd < 0 || shmid < 0)
949 return -EINVAL;
951 version = ipc_parse_version(&cmd);
953 switch (cmd) { /* replace with proc interface ? */
954 case IPC_INFO:
956 struct shminfo64 shminfo;
958 memset(&shminfo,0,sizeof(shminfo));
959 shminfo.shmmni = shminfo.shmseg = shm_ctlmni;
960 shminfo.shmmax = shm_ctlmax;
961 shminfo.shmall = shm_ctlall;
963 shminfo.shmmin = SHMMIN;
964 if(copy_shminfo_to_user (buf, &shminfo, version))
965 return -EFAULT;
966 /* reading a integer is always atomic */
967 err= shm_ids.max_id;
968 if(err<0)
969 err = 0;
970 return err;
972 case SHM_INFO:
974 struct shm_info shm_info;
976 memset(&shm_info,0,sizeof(shm_info));
977 shm_lockall();
978 shm_info.used_ids = shm_ids.in_use;
979 shm_info.shm_rss = shm_rss;
980 shm_info.shm_tot = shm_tot;
981 shm_info.shm_swp = shm_swp;
982 shm_info.swap_attempts = swap_attempts;
983 shm_info.swap_successes = swap_successes;
984 err = shm_ids.max_id;
985 shm_unlockall();
986 if(copy_to_user (buf, &shm_info, sizeof(shm_info)))
987 return -EFAULT;
989 return err < 0 ? 0 : err;
991 case SHM_STAT:
992 case IPC_STAT:
994 struct shmid64_ds tbuf;
995 int result;
996 if ((shmid % SEQ_MULTIPLIER) == zero_id)
997 return -EINVAL;
998 memset(&tbuf, 0, sizeof(tbuf));
999 shp = shm_lock(shmid);
1000 if(shp==NULL)
1001 return -EINVAL;
1002 if(cmd==SHM_STAT) {
1003 err = -EINVAL;
1004 if (!(shp->shm_flags & SHM_SYSV) ||
1005 shmid > shm_ids.max_id)
1006 goto out_unlock;
1007 result = shm_buildid(shmid, shp->shm_perm.seq);
1008 } else {
1009 err = shm_checkid(shp,shmid);
1010 if(err)
1011 goto out_unlock;
1012 result = 0;
1014 err=-EACCES;
1015 if (ipcperms (&shp->shm_perm, S_IRUGO))
1016 goto out_unlock;
1017 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
1018 /* ugly hack to keep binary compatibility for ipcs */
1019 tbuf.shm_flags &= PRV_DEST | PRV_LOCKED | S_IRWXUGO;
1020 if (tbuf.shm_flags & PRV_DEST)
1021 tbuf.shm_flags |= SHM_DEST;
1022 if (tbuf.shm_flags & PRV_LOCKED)
1023 tbuf.shm_flags |= SHM_LOCKED;
1024 tbuf.shm_flags &= SHM_DEST | SHM_LOCKED | S_IRWXUGO;
1025 tbuf.shm_segsz = shp->shm_segsz;
1026 tbuf.shm_atime = shp->shm_atim;
1027 tbuf.shm_dtime = shp->shm_dtim;
1028 tbuf.shm_ctime = shp->shm_ctim;
1029 tbuf.shm_cpid = shp->shm_cprid;
1030 tbuf.shm_lpid = shp->shm_lprid;
1031 tbuf.shm_nattch = shp->shm_nattch;
1032 shm_unlock(shmid);
1033 if(copy_shmid_to_user (buf, &tbuf, version))
1034 return -EFAULT;
1035 return result;
1037 case SHM_LOCK:
1038 case SHM_UNLOCK:
1040 /* Allow superuser to lock segment in memory */
1041 /* Should the pages be faulted in here or leave it to user? */
1042 /* need to determine interaction with current->swappable */
1043 if ((shmid % SEQ_MULTIPLIER)== zero_id)
1044 return -EINVAL;
1045 if (!capable(CAP_IPC_LOCK))
1046 return -EPERM;
1048 shp = shm_lock(shmid);
1049 if(shp==NULL)
1050 return -EINVAL;
1051 err = shm_checkid(shp,shmid);
1052 if(err)
1053 goto out_unlock;
1054 if(cmd==SHM_LOCK)
1055 shp->shm_flags |= PRV_LOCKED;
1056 else
1057 shp->shm_flags &= ~PRV_LOCKED;
1058 shm_unlock(shmid);
1059 return err;
1061 case IPC_RMID:
1064 * We cannot simply remove the file. The SVID states
1065 * that the block remains until the last person
1066 * detaches from it, then is deleted. A shmat() on
1067 * an RMID segment is legal in older Linux and if
1068 * we change it apps break...
1070 * Instead we set a destroyed flag, and then blow
1071 * the name away when the usage hits zero.
1073 if ((shmid % SEQ_MULTIPLIER) == zero_id)
1074 return -EINVAL;
1075 down(&shm_ids.sem);
1076 shp = shm_lock(shmid);
1077 if (shp == NULL) {
1078 up(&shm_ids.sem);
1079 return -EINVAL;
1081 err = shm_checkid(shp, shmid);
1082 if (err == 0) {
1083 if (shp->shm_nattch == 0 &&
1084 !(shp->shm_flags & SHM_UNLK)) {
1085 int id=shp->id;
1086 shm_unlock(shmid);
1087 up(&shm_ids.sem);
1089 * We can't hold shm_lock here else we
1090 * will deadlock in shm_lookup when we
1091 * try to recursively grab it.
1093 return shm_remove_name(id);
1095 shp->shm_flags |= PRV_DEST;
1096 /* Do not find it any more */
1097 shp->shm_perm.key = IPC_PRIVATE;
1099 /* Unlock */
1100 shm_unlock(shmid);
1101 up(&shm_ids.sem);
1102 return err;
1105 case IPC_SET:
1107 struct dentry * dentry;
1108 char name[SHM_FMT_LEN+1];
1110 if ((shmid % SEQ_MULTIPLIER)== zero_id)
1111 return -EINVAL;
1113 if(copy_shmid_from_user (&setbuf, buf, version))
1114 return -EFAULT;
1115 down(&shm_ids.sem);
1116 shp = shm_lock(shmid);
1117 err=-EINVAL;
1118 if(shp==NULL)
1119 goto out_up;
1120 err = shm_checkid(shp,shmid);
1121 if(err)
1122 goto out_unlock_up;
1123 err=-EPERM;
1124 if (current->euid != shp->shm_perm.uid &&
1125 current->euid != shp->shm_perm.cuid &&
1126 !capable(CAP_SYS_ADMIN)) {
1127 goto out_unlock_up;
1130 shp->shm_perm.uid = setbuf.uid;
1131 shp->shm_perm.gid = setbuf.gid;
1132 shp->shm_flags = (shp->shm_flags & ~S_IRWXUGO)
1133 | (setbuf.mode & S_IRWXUGO);
1134 shp->shm_ctim = CURRENT_TIME;
1135 shm_unlock(shmid);
1136 up(&shm_ids.sem);
1138 sprintf (name, SHM_FMT, shmid);
1139 lock_kernel();
1140 dentry = lookup_one(name, lock_parent(shm_sb->s_root));
1141 unlock_dir(shm_sb->s_root);
1142 err = PTR_ERR(dentry);
1143 if (IS_ERR(dentry))
1144 goto bad_dentry;
1145 err = -ENOENT;
1146 if (dentry->d_inode) {
1147 struct inode *ino = dentry->d_inode;
1148 ino->i_uid = setbuf.uid;
1149 ino->i_gid = setbuf.gid;
1150 ino->i_mode = (setbuf.mode & S_IRWXUGO) | (ino->i_mode & ~S_IALLUGO);;
1151 ino->i_atime = ino->i_mtime = ino->i_ctime = CURRENT_TIME;
1152 err = 0;
1154 dput (dentry);
1155 bad_dentry:
1156 unlock_kernel();
1157 return err;
1160 default:
1161 return -EINVAL;
1164 err = 0;
1165 out_unlock_up:
1166 shm_unlock(shmid);
1167 out_up:
1168 up(&shm_ids.sem);
1169 return err;
1170 out_unlock:
1171 shm_unlock(shmid);
1172 return err;
1175 static inline void shm_inc (int id) {
1176 struct shmid_kernel *shp;
1178 if(!(shp = shm_lock(id)))
1179 BUG();
1180 shp->shm_atim = CURRENT_TIME;
1181 shp->shm_lprid = current->pid;
1182 shp->shm_nattch++;
1183 shm_unlock(id);
1186 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
1188 if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
1189 return -EINVAL; /* we cannot do private writable mappings */
1190 UPDATE_ATIME(file->f_dentry->d_inode);
1191 vma->vm_ops = &shm_vm_ops;
1192 shm_inc(file->f_dentry->d_inode->i_ino);
1193 return 0;
1197 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1199 asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
1201 struct shmid_kernel *shp;
1202 unsigned long addr;
1203 struct file * file;
1204 int err;
1205 unsigned long flags;
1206 unsigned long prot;
1207 unsigned long o_flags;
1208 int acc_mode;
1209 struct dentry *dentry;
1210 char name[SHM_FMT_LEN+1];
1212 if (!shm_sb || (shmid % SEQ_MULTIPLIER) == zero_id)
1213 return -EINVAL;
1215 if ((addr = (ulong)shmaddr)) {
1216 if (addr & (SHMLBA-1)) {
1217 if (shmflg & SHM_RND)
1218 addr &= ~(SHMLBA-1); /* round down */
1219 else
1220 return -EINVAL;
1222 flags = MAP_SHARED | MAP_FIXED;
1223 } else
1224 flags = MAP_SHARED;
1226 if (shmflg & SHM_RDONLY) {
1227 prot = PROT_READ;
1228 o_flags = O_RDONLY;
1229 acc_mode = S_IRUGO;
1230 } else {
1231 prot = PROT_READ | PROT_WRITE;
1232 o_flags = O_RDWR;
1233 acc_mode = S_IRUGO | S_IWUGO;
1237 * We cannot rely on the fs check since SYSV IPC does have an
1238 * aditional creator id...
1240 shp = shm_lock(shmid);
1241 if(shp==NULL)
1242 return -EINVAL;
1243 err = ipcperms(&shp->shm_perm, acc_mode);
1244 shm_unlock(shmid);
1245 if (err)
1246 return -EACCES;
1248 sprintf (name, SHM_FMT, shmid);
1250 lock_kernel();
1251 mntget(shm_fs_type.kern_mnt);
1252 dentry = lookup_one(name, lock_parent(shm_sb->s_root));
1253 unlock_dir(shm_sb->s_root);
1254 err = PTR_ERR(dentry);
1255 if (IS_ERR(dentry))
1256 goto bad_file;
1257 err = -ENOENT;
1258 if (!dentry->d_inode)
1259 goto bad_file;
1260 file = dentry_open(dentry, shm_fs_type.kern_mnt, o_flags);
1261 err = PTR_ERR(file);
1262 if (IS_ERR (file))
1263 goto bad_file1;
1264 down(&current->mm->mmap_sem);
1265 *raddr = do_mmap (file, addr, file->f_dentry->d_inode->i_size,
1266 prot, flags, 0);
1267 up(&current->mm->mmap_sem);
1268 unlock_kernel();
1269 if (IS_ERR(*raddr))
1270 err = PTR_ERR(*raddr);
1271 else
1272 err = 0;
1273 fput (file);
1274 return err;
1276 bad_file1:
1277 dput(dentry);
1278 bad_file:
1279 mntput(shm_fs_type.kern_mnt);
1280 unlock_kernel();
1281 if (err == -ENOENT)
1282 return -EINVAL;
1283 return err;
1286 /* This is called by fork, once for every shm attach. */
1287 static void shm_open (struct vm_area_struct *shmd)
1289 shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
1293 * Remove a name.
1296 static int shm_remove_name(int id)
1298 struct dentry *dir;
1299 struct dentry *dentry;
1300 int error;
1301 char name[SHM_FMT_LEN+1];
1303 sprintf (name, SHM_FMT, id);
1304 lock_kernel();
1305 dir = lock_parent(shm_sb->s_root);
1306 dentry = lookup_one(name, dir);
1307 error = PTR_ERR(dentry);
1308 if (!IS_ERR(dentry)) {
1310 * We have to do our own unlink to prevent the vfs
1311 * permission check. The SYSV IPC layer has already
1312 * checked the permissions which do not comply to the
1313 * vfs rules.
1315 struct inode *inode = dir->d_inode;
1316 down(&inode->i_zombie);
1317 error = shm_unlink(inode, dentry);
1318 if (!error)
1319 d_delete(dentry);
1320 up(&inode->i_zombie);
1321 dput(dentry);
1323 unlock_dir(dir);
1324 unlock_kernel();
1325 return error;
1329 * remove the attach descriptor shmd.
1330 * free memory for segment if it is marked destroyed.
1331 * The descriptor has already been removed from the current->mm->mmap list
1332 * and will later be kfree()d.
1334 static void shm_close (struct vm_area_struct *shmd)
1336 int id = shmd->vm_file->f_dentry->d_inode->i_ino;
1337 struct shmid_kernel *shp;
1339 /* remove from the list of attaches of the shm segment */
1340 if(!(shp = shm_lock(id)))
1341 BUG();
1342 shp->shm_lprid = current->pid;
1343 shp->shm_dtim = CURRENT_TIME;
1344 shp->shm_nattch--;
1345 if(shp->shm_nattch == 0 &&
1346 shp->shm_flags & PRV_DEST &&
1347 !(shp->shm_flags & SHM_UNLK)) {
1348 int pid=shp->id;
1349 int err;
1350 shm_unlock(id);
1352 /* The kernel lock prevents new attaches from
1353 * being happening. We can't hold shm_lock here
1354 * else we will deadlock in shm_lookup when we
1355 * try to recursively grab it.
1357 err = shm_remove_name(pid);
1358 if(err && err != -EINVAL && err != -ENOENT)
1359 printk(KERN_ERR "Unlink of SHM id %d failed (%d).\n", pid, err);
1361 } else {
1362 shm_unlock(id);
1367 * detach and kill segment if marked destroyed.
1368 * The work is done in shm_close.
1370 asmlinkage long sys_shmdt (char *shmaddr)
1372 struct mm_struct *mm = current->mm;
1373 struct vm_area_struct *shmd, *shmdnext;
1375 down(&mm->mmap_sem);
1376 for (shmd = mm->mmap; shmd; shmd = shmdnext) {
1377 shmdnext = shmd->vm_next;
1378 if (shmd->vm_ops == &shm_vm_ops
1379 && shmd->vm_start - (shmd->vm_pgoff << PAGE_SHIFT) == (ulong) shmaddr)
1380 do_munmap(mm, shmd->vm_start, shmd->vm_end - shmd->vm_start);
1382 up(&mm->mmap_sem);
1383 return 0;
1387 * Enter the shm page into the SHM data structures.
1389 * The way "nopage" is done, we don't actually have to
1390 * do anything here: nopage will have filled in the shm
1391 * data structures already, and shm_swap_out() will just
1392 * work off them..
1394 static int shm_swapout(struct page * page, struct file *file)
1396 return 0;
1400 * page not present ... go through shm_dir
1402 static struct page * shm_nopage_core(struct shmid_kernel *shp, unsigned int idx, int *swp, int *rss, unsigned long address)
1404 pte_t pte;
1405 struct page * page;
1407 if (idx >= shp->shm_npages)
1408 return NOPAGE_SIGBUS;
1410 pte = SHM_ENTRY(shp,idx);
1411 if (!pte_present(pte)) {
1412 /* page not present so shm_swap can't race with us
1413 and the semaphore protects us by other tasks that
1414 could potentially fault on our pte under us */
1415 if (pte_none(pte)) {
1416 shm_unlock(shp->id);
1417 page = page_cache_alloc();
1418 if (!page)
1419 goto oom;
1420 clear_user_highpage(page, address);
1421 if ((shp != shm_lock(shp->id)) && (shp->id != zero_id))
1422 BUG();
1423 } else {
1424 swp_entry_t entry = pte_to_swp_entry(pte);
1426 shm_unlock(shp->id);
1427 page = lookup_swap_cache(entry);
1428 if (!page) {
1429 lock_kernel();
1430 swapin_readahead(entry);
1431 page = read_swap_cache(entry);
1432 unlock_kernel();
1433 if (!page)
1434 goto oom;
1436 delete_from_swap_cache(page);
1437 page = replace_with_highmem(page);
1438 swap_free(entry);
1439 if ((shp != shm_lock(shp->id)) && (shp->id != zero_id))
1440 BUG();
1441 (*swp)--;
1443 (*rss)++;
1444 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
1445 SHM_ENTRY(shp, idx) = pte;
1448 /* pte_val(pte) == SHM_ENTRY (shp, idx) */
1449 page_cache_get(pte_page(pte));
1450 return pte_page(pte);
1452 oom:
1453 shm_lock(shp->id);
1454 return NOPAGE_OOM;
1457 static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
1459 struct page * page;
1460 struct shmid_kernel *shp;
1461 unsigned int idx;
1462 struct inode * inode = shmd->vm_file->f_dentry->d_inode;
1464 idx = (address - shmd->vm_start) >> PAGE_SHIFT;
1465 idx += shmd->vm_pgoff;
1467 down(&inode->i_sem);
1468 if(!(shp = shm_lock(inode->i_ino)))
1469 BUG();
1470 page = shm_nopage_core(shp, idx, &shm_swp, &shm_rss, address);
1471 shm_unlock(inode->i_ino);
1472 up(&inode->i_sem);
1473 return(page);
1476 #define OKAY 0
1477 #define RETRY 1
1478 #define FAILED 2
1480 static int shm_swap_core(struct shmid_kernel *shp, unsigned long idx, swp_entry_t swap_entry, int *counter, struct page **outpage)
1482 pte_t page;
1483 struct page *page_map;
1485 page = SHM_ENTRY(shp, idx);
1486 if (!pte_present(page))
1487 return RETRY;
1488 page_map = pte_page(page);
1489 if (page_map->zone->free_pages > page_map->zone->pages_high)
1490 return RETRY;
1491 if (shp->id != zero_id) swap_attempts++;
1493 if (--counter < 0) /* failed */
1494 return FAILED;
1495 if (page_count(page_map) != 1)
1496 return RETRY;
1498 lock_page(page_map);
1499 if (!(page_map = prepare_highmem_swapout(page_map)))
1500 return FAILED;
1501 SHM_ENTRY (shp, idx) = swp_entry_to_pte(swap_entry);
1503 /* add the locked page to the swap cache before allowing
1504 the swapin path to run lookup_swap_cache(). This avoids
1505 reading a not yet uptodate block from disk.
1506 NOTE: we just accounted the swap space reference for this
1507 swap cache page at __get_swap_page() time. */
1508 add_to_swap_cache(*outpage = page_map, swap_entry);
1509 return OKAY;
1512 static void shm_swap_postop(struct page *page)
1514 lock_kernel();
1515 rw_swap_page(WRITE, page, 0);
1516 unlock_kernel();
1517 page_cache_release(page);
1520 static int shm_swap_preop(swp_entry_t *swap_entry)
1522 lock_kernel();
1523 /* subtle: preload the swap count for the swap cache. We can't
1524 increase the count inside the critical section as we can't release
1525 the shm_lock there. And we can't acquire the big lock with the
1526 shm_lock held (otherwise we would deadlock too easily). */
1527 *swap_entry = __get_swap_page(2);
1528 if (!(*swap_entry).val) {
1529 unlock_kernel();
1530 return 1;
1532 unlock_kernel();
1533 return 0;
1537 * Goes through counter = (shm_rss / (prio + 1)) present shm pages.
1539 static unsigned long swap_id; /* currently being swapped */
1540 static unsigned long swap_idx; /* next to swap */
1542 int shm_swap (int prio, int gfp_mask)
1544 struct shmid_kernel *shp;
1545 swp_entry_t swap_entry;
1546 unsigned long id, idx;
1547 int loop = 0;
1548 int counter;
1549 struct page * page_map;
1551 zshm_swap(prio, gfp_mask);
1552 counter = shm_rss / (prio + 1);
1553 if (!counter)
1554 return 0;
1555 if (shm_swap_preop(&swap_entry))
1556 return 0;
1558 shm_lockall();
1559 check_id:
1560 shp = shm_get(swap_id);
1561 if(shp==NULL || shp->shm_flags & PRV_LOCKED) {
1562 next_id:
1563 swap_idx = 0;
1564 if (++swap_id > shm_ids.max_id) {
1565 swap_id = 0;
1566 if (loop) {
1567 failed:
1568 shm_unlockall();
1569 __swap_free(swap_entry, 2);
1570 return 0;
1572 loop = 1;
1574 goto check_id;
1576 id = swap_id;
1578 check_table:
1579 idx = swap_idx++;
1580 if (idx >= shp->shm_npages)
1581 goto next_id;
1583 switch (shm_swap_core(shp, idx, swap_entry, &counter, &page_map)) {
1584 case RETRY: goto check_table;
1585 case FAILED: goto failed;
1587 swap_successes++;
1588 shm_swp++;
1589 shm_rss--;
1590 shm_unlockall();
1592 shm_swap_postop(page_map);
1593 return 1;
1597 * Free the swap entry and set the new pte for the shm page.
1599 static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
1600 swp_entry_t entry, struct page *page)
1602 pte_t pte;
1604 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
1605 SHM_ENTRY(shp, idx) = pte;
1606 page_cache_get(page);
1607 shm_rss++;
1609 shm_swp--;
1611 swap_free(entry);
1614 static int shm_unuse_core(struct shmid_kernel *shp, swp_entry_t entry, struct page *page)
1616 int n;
1618 for (n = 0; n < shp->shm_npages; n++) {
1619 if (pte_none(SHM_ENTRY(shp,n)))
1620 continue;
1621 if (pte_present(SHM_ENTRY(shp,n)))
1622 continue;
1623 if (pte_to_swp_entry(SHM_ENTRY(shp,n)).val == entry.val) {
1624 shm_unuse_page(shp, n, entry, page);
1625 return 1;
1628 return 0;
1632 * unuse_shm() search for an eventually swapped out shm page.
1634 void shm_unuse(swp_entry_t entry, struct page *page)
1636 int i;
1638 shm_lockall();
1639 for (i = 0; i <= shm_ids.max_id; i++) {
1640 struct shmid_kernel *shp = shm_get(i);
1641 if(shp==NULL)
1642 continue;
1643 if (shm_unuse_core(shp, entry, page))
1644 goto out;
1646 out:
1647 shm_unlockall();
1648 zmap_unuse(entry, page);
1651 #ifdef CONFIG_PROC_FS
1652 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
1654 off_t pos = 0;
1655 off_t begin = 0;
1656 int i, len = 0;
1658 down(&shm_ids.sem);
1659 len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime name\n");
1661 for(i = 0; i <= shm_ids.max_id; i++) {
1662 struct shmid_kernel* shp;
1664 if (i == zero_id)
1665 continue;
1666 shp = shm_lock(i);
1667 if(shp!=NULL) {
1668 #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s%s\n"
1669 #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s%s\n"
1670 char *format;
1672 if (sizeof(size_t) <= sizeof(int))
1673 format = SMALL_STRING;
1674 else
1675 format = BIG_STRING;
1676 len += sprintf(buffer + len, format,
1677 shp->shm_perm.key,
1678 shm_buildid(i, shp->shm_perm.seq),
1679 shp->shm_flags,
1680 shp->shm_segsz,
1681 shp->shm_cprid,
1682 shp->shm_lprid,
1683 shp->shm_nattch,
1684 shp->shm_perm.uid,
1685 shp->shm_perm.gid,
1686 shp->shm_perm.cuid,
1687 shp->shm_perm.cgid,
1688 shp->shm_atim,
1689 shp->shm_dtim,
1690 shp->shm_ctim,
1691 shp->shm_namelen,
1692 shp->shm_name,
1693 shp->shm_flags & SHM_UNLK ? " (deleted)" : "");
1694 shm_unlock(i);
1696 pos += len;
1697 if(pos < offset) {
1698 len = 0;
1699 begin = pos;
1701 if(pos > offset + length)
1702 goto done;
1705 *eof = 1;
1706 done:
1707 up(&shm_ids.sem);
1708 *start = buffer + (offset - begin);
1709 len -= (offset - begin);
1710 if(len > length)
1711 len = length;
1712 if(len < 0)
1713 len = 0;
1714 return len;
1716 #endif
1718 #define VMA_TO_SHP(vma) ((vma)->vm_file->private_data)
1720 static spinlock_t zmap_list_lock = SPIN_LOCK_UNLOCKED;
1721 static unsigned long zswap_idx; /* next to swap */
1722 static struct shmid_kernel *zswap_shp = &zshmid_kernel;
1723 static int zshm_rss;
1725 static struct vm_operations_struct shmzero_vm_ops = {
1726 open: shmzero_open,
1727 close: shmzero_close,
1728 nopage: shmzero_nopage,
1729 swapout: shm_swapout,
1733 * In this implementation, the "unuse" and "swapout" interfaces are
1734 * interlocked out via the kernel_lock, as well as shm_lock(zero_id).
1735 * "unuse" and "nopage/swapin", as well as "swapout" and "nopage/swapin"
1736 * interlock via shm_lock(zero_id). All these interlocks can be based
1737 * on a per mapping lock instead of being a global lock.
1740 * Reference (existance) counting on the file/dentry/inode is done
1741 * by generic vm_file code. The zero code does not hold any reference
1742 * on the pseudo-file. This is possible because the open/close calls
1743 * are bracketed by the file count update calls.
1745 static struct file *file_setup(struct file *fzero, struct shmid_kernel *shp)
1747 struct file *filp;
1748 struct inode *inp;
1750 if ((filp = get_empty_filp()) == 0)
1751 return(filp);
1752 if ((inp = get_empty_inode()) == 0) {
1753 put_filp(filp);
1754 return(0);
1756 if ((filp->f_dentry = d_alloc(zdent, &(const struct qstr) { "dev/zero",
1757 8, 0 })) == 0) {
1758 iput(inp);
1759 put_filp(filp);
1760 return(0);
1762 filp->f_vfsmnt = mntget(shm_fs_type.kern_mnt);
1763 d_instantiate(filp->f_dentry, inp);
1766 * Copy over dev/ino for benefit of procfs. Use
1767 * ino to indicate seperate mappings.
1769 filp->f_dentry->d_inode->i_dev = shm_fs_type.kern_mnt->mnt_sb->s_dev;
1770 filp->f_dentry->d_inode->i_ino = (unsigned long)shp;
1771 if (fzero)
1772 fput(fzero); /* release /dev/zero file */
1773 return(filp);
1776 int map_zero_setup(struct vm_area_struct *vma)
1778 extern int vm_enough_memory(long pages);
1779 struct shmid_kernel *shp;
1780 struct file *filp;
1782 if (!vm_enough_memory((vma->vm_end - vma->vm_start) >> PAGE_SHIFT))
1783 return -ENOMEM;
1784 if (IS_ERR(shp = seg_alloc((vma->vm_end - vma->vm_start) / PAGE_SIZE, 0)))
1785 return PTR_ERR(shp);
1786 if ((filp = file_setup(vma->vm_file, shp)) == 0) {
1787 seg_free(shp, 0);
1788 return -ENOMEM;
1790 vma->vm_file = filp;
1791 VMA_TO_SHP(vma) = (void *)shp;
1792 shp->id = zero_id;
1793 init_MUTEX(&shp->zsem);
1794 vma->vm_ops = &shmzero_vm_ops;
1795 shmzero_open(vma);
1796 spin_lock(&zmap_list_lock);
1797 list_add(&shp->zero_list, &zshmid_kernel.zero_list);
1798 spin_unlock(&zmap_list_lock);
1799 return 0;
1802 static void shmzero_open(struct vm_area_struct *shmd)
1804 struct shmid_kernel *shp;
1806 shp = VMA_TO_SHP(shmd);
1807 down(&shp->zsem);
1808 shp->shm_nattch++;
1809 up(&shp->zsem);
1812 static void shmzero_close(struct vm_area_struct *shmd)
1814 int done = 0;
1815 struct shmid_kernel *shp;
1817 shp = VMA_TO_SHP(shmd);
1818 down(&shp->zsem);
1819 if (--shp->shm_nattch == 0)
1820 done = 1;
1821 up(&shp->zsem);
1822 if (done) {
1823 spin_lock(&zmap_list_lock);
1824 if (shp == zswap_shp)
1825 zswap_shp = list_entry(zswap_shp->zero_list.next,
1826 struct shmid_kernel, zero_list);
1827 list_del(&shp->zero_list);
1828 spin_unlock(&zmap_list_lock);
1829 seg_free(shp, 0);
1833 static struct page * shmzero_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
1835 struct page *page;
1836 struct shmid_kernel *shp;
1837 unsigned int idx;
1838 int dummy;
1840 idx = (address - shmd->vm_start) >> PAGE_SHIFT;
1841 idx += shmd->vm_pgoff;
1843 shp = VMA_TO_SHP(shmd);
1844 down(&shp->zsem);
1845 shm_lock(zero_id);
1846 page = shm_nopage_core(shp, idx, &dummy, &zshm_rss, address);
1847 shm_unlock(zero_id);
1848 up(&shp->zsem);
1849 return(page);
1852 static void zmap_unuse(swp_entry_t entry, struct page *page)
1854 struct shmid_kernel *shp;
1856 spin_lock(&zmap_list_lock);
1857 shm_lock(zero_id);
1858 for (shp = list_entry(zshmid_kernel.zero_list.next, struct shmid_kernel,
1859 zero_list); shp != &zshmid_kernel;
1860 shp = list_entry(shp->zero_list.next, struct shmid_kernel,
1861 zero_list)) {
1862 if (shm_unuse_core(shp, entry, page))
1863 break;
1865 shm_unlock(zero_id);
1866 spin_unlock(&zmap_list_lock);
1869 static void zshm_swap (int prio, int gfp_mask)
1871 struct shmid_kernel *shp;
1872 swp_entry_t swap_entry;
1873 unsigned long idx;
1874 int loop = 0;
1875 int counter;
1876 struct page * page_map;
1878 counter = zshm_rss / (prio + 1);
1879 if (!counter)
1880 return;
1881 next:
1882 if (shm_swap_preop(&swap_entry))
1883 return;
1885 spin_lock(&zmap_list_lock);
1886 shm_lock(zero_id);
1887 if (zshmid_kernel.zero_list.next == 0)
1888 goto failed;
1889 next_id:
1890 if (zswap_shp == &zshmid_kernel) {
1891 if (loop) {
1892 failed:
1893 shm_unlock(zero_id);
1894 spin_unlock(&zmap_list_lock);
1895 __swap_free(swap_entry, 2);
1896 return;
1898 zswap_shp = list_entry(zshmid_kernel.zero_list.next,
1899 struct shmid_kernel, zero_list);
1900 zswap_idx = 0;
1901 loop = 1;
1903 shp = zswap_shp;
1905 check_table:
1906 idx = zswap_idx++;
1907 if (idx >= shp->shm_npages) {
1908 zswap_shp = list_entry(zswap_shp->zero_list.next,
1909 struct shmid_kernel, zero_list);
1910 zswap_idx = 0;
1911 goto next_id;
1914 switch (shm_swap_core(shp, idx, swap_entry, &counter, &page_map)) {
1915 case RETRY: goto check_table;
1916 case FAILED: goto failed;
1918 shm_unlock(zero_id);
1919 spin_unlock(&zmap_list_lock);
1921 shm_swap_postop(page_map);
1922 if (counter)
1923 goto next;
1924 return;