Import 2.3.99pre7-8
[davej-history.git] / ipc / shm.c
blobef76f54c7a2d32ffb4ba722dd717664d0b186451
1 /*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * avoid vmalloc and make shmmax, shmall, shmmni sysctl'able,
13 * Christoph Rohland <hans-christoph.rohland@sap.com>
14 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
15 * make it a file system, Christoph Rohland <hans-christoph.rohland@sap.com>
17 * The filesystem has the following restrictions/bugs:
18 * 1) It only can handle one directory.
19 * 2) Because the directory is represented by the SYSV shm array it
20 * can only be mounted one time.
21 * 3) Private writeable mappings are not supported
22 * 4) Read and write are not implemented (should they?)
23 * 5) No special nodes are supported
25 * There are the following mount options:
26 * - nr_blocks (^= shmall) is the number of blocks of size PAGE_SIZE
27 * we are allowed to allocate
28 * - nr_inodes (^= shmmni) is the number of files we are allowed to
29 * allocate
30 * - mode is the mode for the root directory (default S_IRWXUGO | S_ISVTX)
33 #include <linux/config.h>
34 #include <linux/module.h>
35 #include <linux/malloc.h>
36 #include <linux/shm.h>
37 #include <linux/swap.h>
38 #include <linux/smp_lock.h>
39 #include <linux/init.h>
40 #include <linux/locks.h>
41 #include <linux/file.h>
42 #include <linux/mman.h>
43 #include <linux/vmalloc.h>
44 #include <linux/pagemap.h>
45 #include <linux/proc_fs.h>
46 #include <linux/highmem.h>
48 #include <asm/uaccess.h>
49 #include <asm/pgtable.h>
51 #include "util.h"
53 static struct super_block *shm_read_super(struct super_block *,void *, int);
54 static void shm_put_super (struct super_block *);
55 static int shm_remount_fs (struct super_block *, int *, char *);
56 static void shm_read_inode (struct inode *);
57 static void shm_write_inode(struct inode *);
58 static int shm_statfs (struct super_block *, struct statfs *);
59 static int shm_create (struct inode *,struct dentry *,int);
60 static struct dentry *shm_lookup (struct inode *,struct dentry *);
61 static int shm_unlink (struct inode *,struct dentry *);
62 static int shm_setattr (struct dentry *dent, struct iattr *attr);
63 static void shm_delete (struct inode *);
64 static int shm_mmap (struct file *, struct vm_area_struct *);
65 static int shm_readdir (struct file *, void *, filldir_t);
67 #define SHM_NAME_LEN NAME_MAX
68 #define SHM_FMT ".IPC_%08x"
69 #define SHM_FMT_LEN 13
71 /* shm_mode upper byte flags */
72 /* SHM_DEST and SHM_LOCKED are used in ipcs(8) */
73 #define PRV_DEST 0010000 /* segment will be destroyed on last detach */
74 #define PRV_LOCKED 0020000 /* segment will not be swapped */
75 #define SHM_UNLK 0040000 /* filename is unlinked */
76 #define SHM_SYSV 0100000 /* It is a SYSV shm segment */
78 struct shmid_kernel /* private to the kernel */
80 struct kern_ipc_perm shm_perm;
81 size_t shm_segsz;
82 unsigned long shm_nattch;
83 unsigned long shm_npages; /* size of segment (pages) */
84 pte_t **shm_dir; /* ptr to arr of ptrs to frames */
85 int id;
86 union permap {
87 struct shmem {
88 time_t atime;
89 time_t dtime;
90 time_t ctime;
91 pid_t cpid;
92 pid_t lpid;
93 int nlen;
94 char nm[0];
95 } shmem;
96 struct zero {
97 struct semaphore sema;
98 struct list_head list;
99 } zero;
100 } permap;
103 #define shm_atim permap.shmem.atime
104 #define shm_dtim permap.shmem.dtime
105 #define shm_ctim permap.shmem.ctime
106 #define shm_cprid permap.shmem.cpid
107 #define shm_lprid permap.shmem.lpid
108 #define shm_namelen permap.shmem.nlen
109 #define shm_name permap.shmem.nm
110 #define shm_flags shm_perm.mode
111 #define zsem permap.zero.sema
112 #define zero_list permap.zero.list
114 static struct ipc_ids shm_ids;
116 #define shm_lock(id) ((struct shmid_kernel*)ipc_lock(&shm_ids,id))
117 #define shm_unlock(id) ipc_unlock(&shm_ids,id)
118 #define shm_lockall() ipc_lockall(&shm_ids)
119 #define shm_unlockall() ipc_unlockall(&shm_ids)
120 #define shm_get(id) ((struct shmid_kernel*)ipc_get(&shm_ids,id))
121 #define shm_buildid(id, seq) \
122 ipc_buildid(&shm_ids, id, seq)
124 static int newseg (key_t key, const char *name, int namelen, int shmflg, size_t size);
125 static void seg_free(struct shmid_kernel *shp, int doacc);
126 static void shm_open (struct vm_area_struct *shmd);
127 static void shm_close (struct vm_area_struct *shmd);
128 static int shm_remove_name(int id);
129 static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int);
130 static int shm_swapout(struct page *, struct file *);
131 #ifdef CONFIG_PROC_FS
132 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
133 #endif
135 static void zshm_swap (int prio, int gfp_mask);
136 static void zmap_unuse(swp_entry_t entry, struct page *page);
137 static void shmzero_open(struct vm_area_struct *shmd);
138 static void shmzero_close(struct vm_area_struct *shmd);
139 static struct page *shmzero_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share);
140 static int zero_id;
141 static struct shmid_kernel zshmid_kernel;
142 static struct dentry *zdent;
144 #define SHM_FS_MAGIC 0x02011994
146 static struct super_block * shm_sb;
148 static DECLARE_FSTYPE(shm_fs_type, "shm", shm_read_super, FS_SINGLE);
150 static struct super_operations shm_sops = {
151 read_inode: shm_read_inode,
152 write_inode: shm_write_inode,
153 delete_inode: shm_delete,
154 put_super: shm_put_super,
155 statfs: shm_statfs,
156 remount_fs: shm_remount_fs,
159 static struct file_operations shm_root_operations = {
160 readdir: shm_readdir,
163 static struct inode_operations shm_root_inode_operations = {
164 create: shm_create,
165 lookup: shm_lookup,
166 unlink: shm_unlink,
169 static struct file_operations shm_file_operations = {
170 mmap: shm_mmap,
173 static struct inode_operations shm_inode_operations = {
174 setattr: shm_setattr,
177 static struct vm_operations_struct shm_vm_ops = {
178 open: shm_open, /* callback for a new vm-area open */
179 close: shm_close, /* callback for when the vm-area is released */
180 nopage: shm_nopage,
181 swapout:shm_swapout,
184 size_t shm_ctlmax = SHMMAX;
186 /* These parameters should be part of the superblock */
187 static int shm_ctlall;
188 static int shm_ctlmni;
189 static int shm_mode;
191 static int shm_tot = 0; /* total number of shared memory pages */
192 static int shm_rss = 0; /* number of shared memory pages that are in memory */
193 static int shm_swp = 0; /* number of shared memory pages that are in swap */
195 /* locks order:
196 pagecache_lock
197 shm_lock()/shm_lockall()
198 kernel lock
199 inode->i_sem
200 sem_ids.sem
201 mmap_sem
203 SMP assumptions:
204 - swap_free() never sleeps
205 - add_to_swap_cache() never sleeps
206 - add_to_swap_cache() doesn't acquire the big kernel lock.
207 - shm_unuse() is called with the kernel lock acquired.
210 /* some statistics */
211 static ulong swap_attempts = 0;
212 static ulong swap_successes = 0;
213 static ulong used_segs = 0;
215 void __init shm_init (void)
217 struct vfsmount *res;
218 ipc_init_ids(&shm_ids, 1);
220 register_filesystem (&shm_fs_type);
221 res = kern_mount(&shm_fs_type);
222 if (IS_ERR(res)) {
223 unregister_filesystem(&shm_fs_type);
224 return;
226 #ifdef CONFIG_PROC_FS
227 create_proc_read_entry("sysvipc/shm", 0, 0, sysvipc_shm_read_proc, NULL);
228 #endif
229 zero_id = ipc_addid(&shm_ids, &zshmid_kernel.shm_perm, 1);
230 shm_unlock(zero_id);
231 INIT_LIST_HEAD(&zshmid_kernel.zero_list);
232 zdent = d_alloc_root(get_empty_inode());
233 return;
236 static int shm_parse_options(char *options)
238 int blocks = shm_ctlall;
239 int inodes = shm_ctlmni;
240 umode_t mode = shm_mode;
241 char *this_char, *value;
243 this_char = NULL;
244 if ( options )
245 this_char = strtok(options,",");
246 for ( ; this_char; this_char = strtok(NULL,",")) {
247 if ((value = strchr(this_char,'=')) != NULL)
248 *value++ = 0;
249 if (!strcmp(this_char,"nr_blocks")) {
250 if (!value || !*value)
251 return 1;
252 blocks = simple_strtoul(value,&value,0);
253 if (*value)
254 return 1;
256 else if (!strcmp(this_char,"nr_inodes")) {
257 if (!value || !*value)
258 return 1;
259 inodes = simple_strtoul(value,&value,0);
260 if (*value)
261 return 1;
263 else if (!strcmp(this_char,"mode")) {
264 if (!value || !*value)
265 return 1;
266 mode = simple_strtoul(value,&value,8);
267 if (*value)
268 return 1;
270 else
271 return 1;
273 shm_ctlmni = inodes;
274 shm_ctlall = blocks;
275 shm_mode = mode;
277 return 0;
280 static struct super_block *shm_read_super(struct super_block *s,void *data,
281 int silent)
283 struct inode * root_inode;
285 shm_ctlall = SHMALL;
286 shm_ctlmni = SHMMNI;
287 shm_mode = S_IRWXUGO | S_ISVTX;
288 if (shm_parse_options (data)) {
289 printk(KERN_ERR "shm fs invalid option\n");
290 goto out_unlock;
293 s->s_blocksize = PAGE_SIZE;
294 s->s_blocksize_bits = PAGE_SHIFT;
295 s->s_magic = SHM_FS_MAGIC;
296 s->s_op = &shm_sops;
297 root_inode = iget (s, SEQ_MULTIPLIER);
298 if (!root_inode)
299 goto out_no_root;
300 root_inode->i_op = &shm_root_inode_operations;
301 root_inode->i_sb = s;
302 root_inode->i_nlink = 2;
303 root_inode->i_mode = S_IFDIR | shm_mode;
304 s->s_root = d_alloc_root(root_inode);
305 if (!s->s_root)
306 goto out_no_root;
307 shm_sb = s;
308 return s;
310 out_no_root:
311 printk(KERN_ERR "proc_read_super: get root inode failed\n");
312 iput(root_inode);
313 out_unlock:
314 return NULL;
317 static int shm_remount_fs (struct super_block *sb, int *flags, char *data)
319 if (shm_parse_options (data))
320 return -EINVAL;
321 return 0;
324 static inline int shm_checkid(struct shmid_kernel *s, int id)
326 if (!(s->shm_flags & SHM_SYSV))
327 return -EINVAL;
328 if (ipc_checkid(&shm_ids,&s->shm_perm,id))
329 return -EIDRM;
330 return 0;
333 static inline struct shmid_kernel *shm_rmid(int id)
335 return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
338 static __inline__ int shm_addid(struct shmid_kernel *shp)
340 return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni+1);
343 static void shm_put_super(struct super_block *sb)
345 int i;
346 struct shmid_kernel *shp;
348 down(&shm_ids.sem);
349 for(i = 0; i <= shm_ids.max_id; i++) {
350 if (i == zero_id)
351 continue;
352 if (!(shp = shm_lock (i)))
353 continue;
354 if (shp->shm_nattch)
355 printk(KERN_DEBUG "shm_nattch = %ld\n", shp->shm_nattch);
356 shp = shm_rmid(i);
357 shm_unlock(i);
358 seg_free(shp, 1);
360 dput (sb->s_root);
361 up(&shm_ids.sem);
364 static int shm_statfs(struct super_block *sb, struct statfs *buf)
366 buf->f_type = SHM_FS_MAGIC;
367 buf->f_bsize = PAGE_SIZE;
368 buf->f_blocks = shm_ctlall;
369 buf->f_bavail = buf->f_bfree = shm_ctlall - shm_tot;
370 buf->f_files = shm_ctlmni;
371 buf->f_ffree = shm_ctlmni - used_segs;
372 buf->f_namelen = SHM_NAME_LEN;
373 return 0;
376 static void shm_write_inode(struct inode * inode)
380 static void shm_read_inode(struct inode * inode)
382 int id;
383 struct shmid_kernel *shp;
385 id = inode->i_ino;
386 inode->i_op = NULL;
387 inode->i_mode = 0;
388 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
390 if (id < SEQ_MULTIPLIER) {
391 if (!(shp = shm_lock (id)))
392 return;
393 inode->i_mode = (shp->shm_flags & S_IALLUGO) | S_IFREG;
394 inode->i_uid = shp->shm_perm.uid;
395 inode->i_gid = shp->shm_perm.gid;
396 inode->i_size = shp->shm_segsz;
397 shm_unlock (id);
398 inode->i_op = &shm_inode_operations;
399 inode->i_fop = &shm_file_operations;
400 return;
402 inode->i_op = &shm_root_inode_operations;
403 inode->i_fop = &shm_root_operations;
404 inode->i_sb = shm_sb;
405 inode->i_nlink = 2;
406 inode->i_mode = S_IFDIR | shm_mode;
407 inode->i_uid = inode->i_gid = 0;
411 static int shm_create (struct inode *dir, struct dentry *dent, int mode)
413 int id, err;
414 struct inode * inode;
416 down(&shm_ids.sem);
417 err = id = newseg (IPC_PRIVATE, dent->d_name.name, dent->d_name.len, mode, 0);
418 if (err < 0)
419 goto out;
421 err = -ENOMEM;
422 inode = iget (shm_sb, id % SEQ_MULTIPLIER);
423 if (!inode)
424 goto out;
426 err = 0;
427 down (&inode->i_sem);
428 inode->i_mode = mode | S_IFREG;
429 inode->i_op = &shm_inode_operations;
430 d_instantiate(dent, inode);
431 up (&inode->i_sem);
433 out:
434 up(&shm_ids.sem);
435 return err;
438 static int shm_readdir (struct file *filp, void *dirent, filldir_t filldir)
440 struct inode * inode = filp->f_dentry->d_inode;
441 struct shmid_kernel *shp;
442 off_t nr;
444 nr = filp->f_pos;
446 switch(nr)
448 case 0:
449 if (filldir(dirent, ".", 1, nr, inode->i_ino) < 0)
450 return 0;
451 filp->f_pos = ++nr;
452 /* fall through */
453 case 1:
454 if (filldir(dirent, "..", 2, nr, inode->i_ino) < 0)
455 return 0;
456 filp->f_pos = ++nr;
457 /* fall through */
458 default:
459 down(&shm_ids.sem);
460 for (; nr-2 <= shm_ids.max_id; nr++ ) {
461 if (nr-2 == zero_id)
462 continue;
463 if (!(shp = shm_get (nr-2)))
464 continue;
465 if (shp->shm_flags & SHM_UNLK)
466 continue;
467 if (filldir(dirent, shp->shm_name, shp->shm_namelen, nr, nr) < 0 )
468 break;;
470 filp->f_pos = nr;
471 up(&shm_ids.sem);
472 break;
475 UPDATE_ATIME(inode);
476 return 0;
479 static struct dentry *shm_lookup (struct inode *dir, struct dentry *dent)
481 int i, err = 0;
482 struct shmid_kernel* shp;
483 struct inode *inode = NULL;
485 if (dent->d_name.len > SHM_NAME_LEN)
486 return ERR_PTR(-ENAMETOOLONG);
488 down(&shm_ids.sem);
489 for(i = 0; i <= shm_ids.max_id; i++) {
490 if (i == zero_id)
491 continue;
492 if (!(shp = shm_lock(i)))
493 continue;
494 if (!(shp->shm_flags & SHM_UNLK) &&
495 dent->d_name.len == shp->shm_namelen &&
496 strncmp(dent->d_name.name, shp->shm_name, shp->shm_namelen) == 0)
497 goto found;
498 shm_unlock(i);
502 * prevent the reserved names as negative dentries.
503 * This also prevents object creation through the filesystem
505 if (dent->d_name.len == SHM_FMT_LEN &&
506 memcmp (SHM_FMT, dent->d_name.name, SHM_FMT_LEN - 8) == 0)
507 err = -EINVAL; /* EINVAL to give IPC_RMID the right error */
509 goto out;
511 found:
512 shm_unlock(i);
513 inode = iget(dir->i_sb, i);
515 if (!inode)
516 err = -EACCES;
517 out:
518 if (err == 0)
519 d_add (dent, inode);
520 up (&shm_ids.sem);
521 return ERR_PTR(err);
524 static int shm_unlink (struct inode *dir, struct dentry *dent)
526 struct inode * inode = dent->d_inode;
527 struct shmid_kernel *shp;
529 down (&shm_ids.sem);
530 if (!(shp = shm_lock (inode->i_ino)))
531 BUG();
532 shp->shm_flags |= SHM_UNLK | PRV_DEST;
533 shp->shm_perm.key = IPC_PRIVATE; /* Do not find it any more */
534 shm_unlock (inode->i_ino);
535 up (&shm_ids.sem);
536 inode->i_nlink -= 1;
538 * If it's a reserved name we have to drop the dentry instead
539 * of creating a negative dentry
541 if (dent->d_name.len == SHM_FMT_LEN &&
542 memcmp (SHM_FMT, dent->d_name.name, SHM_FMT_LEN - 8) == 0)
543 d_drop (dent);
544 else
545 d_delete (dent);
546 return 0;
549 #define SHM_ENTRY(shp, index) (shp)->shm_dir[(index)/PTRS_PER_PTE][(index)%PTRS_PER_PTE]
551 static pte_t **shm_alloc(unsigned long pages, int doacc)
553 unsigned short dir = pages / PTRS_PER_PTE;
554 unsigned short last = pages % PTRS_PER_PTE;
555 pte_t **ret, **ptr, *pte;
557 if (pages == 0)
558 return NULL;
560 ret = kmalloc ((dir+1) * sizeof(pte_t *), GFP_KERNEL);
561 if (!ret)
562 goto nomem;
564 for (ptr = ret; ptr < ret+dir ; ptr++)
566 *ptr = (pte_t *)__get_free_page (GFP_KERNEL);
567 if (!*ptr)
568 goto free;
569 for (pte = *ptr; pte < *ptr + PTRS_PER_PTE; pte++)
570 pte_clear (pte);
573 /* The last one is probably not of PAGE_SIZE: we use kmalloc */
574 if (last) {
575 *ptr = kmalloc (last*sizeof(pte_t), GFP_KERNEL);
576 if (!*ptr)
577 goto free;
578 for (pte = *ptr; pte < *ptr + last; pte++)
579 pte_clear (pte);
581 if (doacc) {
582 shm_lockall();
583 shm_tot += pages;
584 used_segs++;
585 shm_unlockall();
587 return ret;
589 free:
590 /* The last failed: we decrement first */
591 while (--ptr >= ret)
592 free_page ((unsigned long)*ptr);
594 kfree (ret);
595 nomem:
596 return ERR_PTR(-ENOMEM);
599 static void shm_free(pte_t** dir, unsigned long pages, int doacc)
601 int i, rss, swp;
602 pte_t **ptr = dir+pages/PTRS_PER_PTE;
604 if (!dir)
605 return;
607 for (i = 0, rss = 0, swp = 0; i < pages ; i++) {
608 pte_t pte;
609 pte = dir[i/PTRS_PER_PTE][i%PTRS_PER_PTE];
610 if (pte_none(pte))
611 continue;
612 if (pte_present(pte)) {
613 __free_page (pte_page(pte));
614 rss++;
615 } else {
616 swap_free(pte_to_swp_entry(pte));
617 swp++;
621 /* first the last page */
622 if (pages%PTRS_PER_PTE)
623 kfree (*ptr);
624 /* now the whole pages */
625 while (--ptr >= dir)
626 if (*ptr)
627 free_page ((unsigned long)*ptr);
629 /* Now the indirect block */
630 kfree (dir);
632 if (doacc) {
633 shm_lockall();
634 shm_rss -= rss;
635 shm_swp -= swp;
636 shm_tot -= pages;
637 used_segs--;
638 shm_unlockall();
642 static int shm_setattr (struct dentry *dentry, struct iattr *attr)
644 int error;
645 struct inode *inode = dentry->d_inode;
646 struct shmid_kernel *shp;
647 unsigned long new_pages, old_pages;
648 pte_t **new_dir, **old_dir;
650 error = inode_change_ok(inode, attr);
651 if (error)
652 return error;
653 if (!(attr->ia_valid & ATTR_SIZE))
654 goto set_attr;
655 if (attr->ia_size > shm_ctlmax)
656 return -EFBIG;
658 /* We set old_pages and old_dir for easier cleanup */
659 old_pages = new_pages = (attr->ia_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
660 old_dir = new_dir = shm_alloc(new_pages, 1);
661 if (IS_ERR(new_dir))
662 return PTR_ERR(new_dir);
664 if (!(shp = shm_lock(inode->i_ino)))
665 BUG();
666 error = -ENOSPC;
667 if (shm_tot - shp->shm_npages >= shm_ctlall)
668 goto out;
669 error = 0;
670 if (shp->shm_segsz == attr->ia_size)
671 goto out;
672 /* Now we set them to the real values */
673 old_dir = shp->shm_dir;
674 old_pages = shp->shm_npages;
675 if (old_dir){
676 pte_t *swap;
677 int i,j;
678 i = old_pages < new_pages ? old_pages : new_pages;
679 j = i % PTRS_PER_PTE;
680 i /= PTRS_PER_PTE;
681 if (j)
682 memcpy (new_dir[i], old_dir[i], j * sizeof (pte_t));
683 while (i--) {
684 swap = new_dir[i];
685 new_dir[i] = old_dir[i];
686 old_dir[i] = swap;
689 shp->shm_dir = new_dir;
690 shp->shm_npages = new_pages;
691 shp->shm_segsz = attr->ia_size;
692 out:
693 shm_unlock(inode->i_ino);
694 shm_free (old_dir, old_pages, 1);
695 set_attr:
696 inode_setattr(inode, attr);
697 return error;
700 static struct shmid_kernel *seg_alloc(int numpages, size_t namelen)
702 struct shmid_kernel *shp;
703 pte_t **dir;
705 shp = (struct shmid_kernel *) kmalloc (sizeof (*shp) + namelen, GFP_KERNEL);
706 if (!shp)
707 return ERR_PTR(-ENOMEM);
709 dir = shm_alloc (numpages, namelen);
710 if (IS_ERR(dir)) {
711 kfree(shp);
712 return ERR_PTR(PTR_ERR(dir));
714 shp->shm_dir = dir;
715 shp->shm_npages = numpages;
716 shp->shm_nattch = 0;
717 shp->shm_namelen = namelen;
718 return(shp);
721 static void seg_free(struct shmid_kernel *shp, int doacc)
723 shm_free (shp->shm_dir, shp->shm_npages, doacc);
724 kfree(shp);
727 static int newseg (key_t key, const char *name, int namelen,
728 int shmflg, size_t size)
730 struct shmid_kernel *shp;
731 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
732 int id;
734 if (namelen > SHM_NAME_LEN)
735 return -ENAMETOOLONG;
737 if (size > shm_ctlmax)
738 return -EINVAL;
740 if (shm_tot + numpages >= shm_ctlall)
741 return -ENOSPC;
743 if (!(shp = seg_alloc(numpages, namelen ? namelen : SHM_FMT_LEN + 1)))
744 return -ENOMEM;
745 id = shm_addid(shp);
746 if(id == -1) {
747 seg_free(shp, 1);
748 return -ENOSPC;
750 shp->shm_perm.key = key;
751 shp->shm_flags = (shmflg & S_IRWXUGO);
752 shp->shm_segsz = size;
753 shp->shm_cprid = current->pid;
754 shp->shm_lprid = 0;
755 shp->shm_atim = shp->shm_dtim = 0;
756 shp->shm_ctim = CURRENT_TIME;
757 shp->id = shm_buildid(id,shp->shm_perm.seq);
758 if (namelen != 0) {
759 shp->shm_namelen = namelen;
760 memcpy (shp->shm_name, name, namelen);
761 } else {
762 shp->shm_flags |= SHM_SYSV;
763 shp->shm_namelen = sprintf (shp->shm_name, SHM_FMT, shp->id);
765 shm_unlock(id);
767 return shp->id;
770 asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
772 struct shmid_kernel *shp;
773 int err, id = 0;
775 if (size < SHMMIN)
776 return -EINVAL;
778 down(&shm_ids.sem);
779 if (key == IPC_PRIVATE) {
780 err = newseg(key, NULL, 0, shmflg, size);
781 } else if ((id = ipc_findkey(&shm_ids,key)) == -1) {
782 if (!(shmflg & IPC_CREAT))
783 err = -ENOENT;
784 else
785 err = newseg(key, NULL, 0, shmflg, size);
786 } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
787 err = -EEXIST;
788 } else {
789 shp = shm_lock(id);
790 if(shp==NULL)
791 BUG();
792 if (shp->shm_segsz < size)
793 err = -EINVAL;
794 else if (ipcperms(&shp->shm_perm, shmflg))
795 err = -EACCES;
796 else
797 err = shm_buildid(id, shp->shm_perm.seq);
798 shm_unlock(id);
800 up(&shm_ids.sem);
801 return err;
804 static void shm_delete (struct inode *ino)
806 int shmid = ino->i_ino;
807 struct shmid_kernel *shp;
809 down(&shm_ids.sem);
810 shp = shm_lock(shmid);
811 if(shp==NULL) {
812 BUG();
814 shp = shm_rmid(shmid);
815 shm_unlock(shmid);
816 up(&shm_ids.sem);
817 seg_free(shp, 1);
818 clear_inode(ino);
821 static inline unsigned long copy_shmid_to_user(void *buf, struct shmid64_ds *in, int version)
823 switch(version) {
824 case IPC_64:
825 return copy_to_user(buf, in, sizeof(*in));
826 case IPC_OLD:
828 struct shmid_ds out;
830 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
831 out.shm_segsz = in->shm_segsz;
832 out.shm_atime = in->shm_atime;
833 out.shm_dtime = in->shm_dtime;
834 out.shm_ctime = in->shm_ctime;
835 out.shm_cpid = in->shm_cpid;
836 out.shm_lpid = in->shm_lpid;
837 out.shm_nattch = in->shm_nattch;
839 return copy_to_user(buf, &out, sizeof(out));
841 default:
842 return -EINVAL;
846 struct shm_setbuf {
847 uid_t uid;
848 gid_t gid;
849 mode_t mode;
852 static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void *buf, int version)
854 switch(version) {
855 case IPC_64:
857 struct shmid64_ds tbuf;
859 if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
860 return -EFAULT;
862 out->uid = tbuf.shm_perm.uid;
863 out->gid = tbuf.shm_perm.gid;
864 out->mode = tbuf.shm_flags;
866 return 0;
868 case IPC_OLD:
870 struct shmid_ds tbuf_old;
872 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
873 return -EFAULT;
875 out->uid = tbuf_old.shm_perm.uid;
876 out->gid = tbuf_old.shm_perm.gid;
877 out->mode = tbuf_old.shm_flags;
879 return 0;
881 default:
882 return -EINVAL;
886 static inline unsigned long copy_shminfo_to_user(void *buf, struct shminfo64 *in, int version)
888 switch(version) {
889 case IPC_64:
890 return copy_to_user(buf, in, sizeof(*in));
891 case IPC_OLD:
893 struct shminfo out;
895 if(in->shmmax > INT_MAX)
896 out.shmmax = INT_MAX;
897 else
898 out.shmmax = (int)in->shmmax;
900 out.shmmin = in->shmmin;
901 out.shmmni = in->shmmni;
902 out.shmseg = in->shmseg;
903 out.shmall = in->shmall;
905 return copy_to_user(buf, &out, sizeof(out));
907 default:
908 return -EINVAL;
912 asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
914 struct shm_setbuf setbuf;
915 struct shmid_kernel *shp;
916 int err, version;
918 if (cmd < 0 || shmid < 0)
919 return -EINVAL;
921 version = ipc_parse_version(&cmd);
923 switch (cmd) { /* replace with proc interface ? */
924 case IPC_INFO:
926 struct shminfo64 shminfo;
928 memset(&shminfo,0,sizeof(shminfo));
929 shminfo.shmmni = shminfo.shmseg = shm_ctlmni;
930 shminfo.shmmax = shm_ctlmax;
931 shminfo.shmall = shm_ctlall;
933 shminfo.shmmin = SHMMIN;
934 if(copy_shminfo_to_user (buf, &shminfo, version))
935 return -EFAULT;
936 /* reading a integer is always atomic */
937 err= shm_ids.max_id;
938 if(err<0)
939 err = 0;
940 return err;
942 case SHM_INFO:
944 struct shm_info shm_info;
946 memset(&shm_info,0,sizeof(shm_info));
947 shm_lockall();
948 shm_info.used_ids = shm_ids.in_use;
949 shm_info.shm_rss = shm_rss;
950 shm_info.shm_tot = shm_tot;
951 shm_info.shm_swp = shm_swp;
952 shm_info.swap_attempts = swap_attempts;
953 shm_info.swap_successes = swap_successes;
954 err = shm_ids.max_id;
955 shm_unlockall();
956 if(copy_to_user (buf, &shm_info, sizeof(shm_info)))
957 return -EFAULT;
959 return err < 0 ? 0 : err;
961 case SHM_STAT:
962 case IPC_STAT:
964 struct shmid64_ds tbuf;
965 int result;
966 if ((shmid % SEQ_MULTIPLIER) == zero_id)
967 return -EINVAL;
968 memset(&tbuf, 0, sizeof(tbuf));
969 shp = shm_lock(shmid);
970 if(shp==NULL)
971 return -EINVAL;
972 if(cmd==SHM_STAT) {
973 err = -EINVAL;
974 if (!(shp->shm_flags & SHM_SYSV) ||
975 shmid > shm_ids.max_id)
976 goto out_unlock;
977 result = shm_buildid(shmid, shp->shm_perm.seq);
978 } else {
979 err = shm_checkid(shp,shmid);
980 if(err)
981 goto out_unlock;
982 result = 0;
984 err=-EACCES;
985 if (ipcperms (&shp->shm_perm, S_IRUGO))
986 goto out_unlock;
987 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
988 /* ugly hack to keep binary compatibility for ipcs */
989 tbuf.shm_flags &= PRV_DEST | PRV_LOCKED | S_IRWXUGO;
990 if (tbuf.shm_flags & PRV_DEST)
991 tbuf.shm_flags |= SHM_DEST;
992 if (tbuf.shm_flags & PRV_LOCKED)
993 tbuf.shm_flags |= SHM_LOCKED;
994 tbuf.shm_flags &= SHM_DEST | SHM_LOCKED | S_IRWXUGO;
995 tbuf.shm_segsz = shp->shm_segsz;
996 tbuf.shm_atime = shp->shm_atim;
997 tbuf.shm_dtime = shp->shm_dtim;
998 tbuf.shm_ctime = shp->shm_ctim;
999 tbuf.shm_cpid = shp->shm_cprid;
1000 tbuf.shm_lpid = shp->shm_lprid;
1001 tbuf.shm_nattch = shp->shm_nattch;
1002 shm_unlock(shmid);
1003 if(copy_shmid_to_user (buf, &tbuf, version))
1004 return -EFAULT;
1005 return result;
1007 case SHM_LOCK:
1008 case SHM_UNLOCK:
1010 /* Allow superuser to lock segment in memory */
1011 /* Should the pages be faulted in here or leave it to user? */
1012 /* need to determine interaction with current->swappable */
1013 if ((shmid % SEQ_MULTIPLIER)== zero_id)
1014 return -EINVAL;
1015 if (!capable(CAP_IPC_LOCK))
1016 return -EPERM;
1018 shp = shm_lock(shmid);
1019 if(shp==NULL)
1020 return -EINVAL;
1021 err = shm_checkid(shp,shmid);
1022 if(err)
1023 goto out_unlock;
1024 if(cmd==SHM_LOCK)
1025 shp->shm_flags |= PRV_LOCKED;
1026 else
1027 shp->shm_flags &= ~PRV_LOCKED;
1028 shm_unlock(shmid);
1029 return err;
1031 case IPC_RMID:
1034 * We cannot simply remove the file. The SVID states
1035 * that the block remains until the last person
1036 * detaches from it, then is deleted. A shmat() on
1037 * an RMID segment is legal in older Linux and if
1038 * we change it apps break...
1040 * Instead we set a destroyed flag, and then blow
1041 * the name away when the usage hits zero.
1043 if ((shmid % SEQ_MULTIPLIER) == zero_id)
1044 return -EINVAL;
1045 down(&shm_ids.sem);
1046 shp = shm_lock(shmid);
1047 if (shp == NULL) {
1048 up(&shm_ids.sem);
1049 return -EINVAL;
1051 err = shm_checkid(shp, shmid);
1052 if (err == 0) {
1053 if (shp->shm_nattch == 0 &&
1054 !(shp->shm_flags & SHM_UNLK)) {
1055 int id=shp->id;
1056 shm_unlock(shmid);
1057 up(&shm_ids.sem);
1059 * We can't hold shm_lock here else we
1060 * will deadlock in shm_lookup when we
1061 * try to recursively grab it.
1063 return shm_remove_name(id);
1065 shp->shm_flags |= PRV_DEST;
1066 /* Do not find it any more */
1067 shp->shm_perm.key = IPC_PRIVATE;
1069 /* Unlock */
1070 shm_unlock(shmid);
1071 up(&shm_ids.sem);
1072 return err;
1075 case IPC_SET:
1077 if ((shmid % SEQ_MULTIPLIER)== zero_id)
1078 return -EINVAL;
1080 if(copy_shmid_from_user (&setbuf, buf, version))
1081 return -EFAULT;
1082 down(&shm_ids.sem);
1083 shp = shm_lock(shmid);
1084 err=-EINVAL;
1085 if(shp==NULL)
1086 goto out_up;
1087 err = shm_checkid(shp,shmid);
1088 if(err)
1089 goto out_unlock_up;
1090 err=-EPERM;
1091 if (current->euid != shp->shm_perm.uid &&
1092 current->euid != shp->shm_perm.cuid &&
1093 !capable(CAP_SYS_ADMIN)) {
1094 goto out_unlock_up;
1097 shp->shm_perm.uid = setbuf.uid;
1098 shp->shm_perm.gid = setbuf.gid;
1099 shp->shm_flags = (shp->shm_flags & ~S_IRWXUGO)
1100 | (setbuf.mode & S_IRWXUGO);
1101 shp->shm_ctim = CURRENT_TIME;
1102 break;
1105 default:
1106 return -EINVAL;
1109 err = 0;
1110 out_unlock_up:
1111 shm_unlock(shmid);
1112 out_up:
1113 up(&shm_ids.sem);
1114 return err;
1115 out_unlock:
1116 shm_unlock(shmid);
1117 return err;
1120 static inline void shm_inc (int id) {
1121 struct shmid_kernel *shp;
1123 if(!(shp = shm_lock(id)))
1124 BUG();
1125 shp->shm_atim = CURRENT_TIME;
1126 shp->shm_lprid = current->pid;
1127 shp->shm_nattch++;
1128 shm_unlock(id);
1131 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
1133 if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
1134 return -EINVAL; /* we cannot do private writable mappings */
1135 UPDATE_ATIME(file->f_dentry->d_inode);
1136 vma->vm_ops = &shm_vm_ops;
1137 shm_inc(file->f_dentry->d_inode->i_ino);
1138 return 0;
1142 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1144 asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
1146 unsigned long addr;
1147 struct file * file;
1148 int err;
1149 unsigned long flags;
1150 unsigned long prot;
1151 unsigned long o_flags;
1152 int acc_mode;
1153 struct dentry *dentry;
1154 char name[SHM_FMT_LEN+1];
1156 if (!shm_sb || (shmid % SEQ_MULTIPLIER) == zero_id)
1157 return -EINVAL;
1159 if ((addr = (ulong)shmaddr)) {
1160 if (addr & (SHMLBA-1)) {
1161 if (shmflg & SHM_RND)
1162 addr &= ~(SHMLBA-1); /* round down */
1163 else
1164 return -EINVAL;
1166 flags = MAP_SHARED | MAP_FIXED;
1167 } else
1168 flags = MAP_SHARED;
1170 if (shmflg & SHM_RDONLY) {
1171 prot = PROT_READ;
1172 o_flags = O_RDONLY;
1173 acc_mode = MAY_READ;
1174 } else {
1175 prot = PROT_READ | PROT_WRITE;
1176 o_flags = O_RDWR;
1177 acc_mode = MAY_READ | MAY_WRITE;
1180 sprintf (name, SHM_FMT, shmid);
1182 lock_kernel();
1183 mntget(shm_fs_type.kern_mnt);
1184 dentry = lookup_one(name, lock_parent(shm_sb->s_root));
1185 unlock_dir(shm_sb->s_root);
1186 err = PTR_ERR(dentry);
1187 if (IS_ERR(dentry))
1188 goto bad_file;
1189 err = -ENOENT;
1190 if (!dentry->d_inode)
1191 goto bad_file;
1192 err = permission(dentry->d_inode, acc_mode);
1193 if (err)
1194 goto bad_file1;
1195 file = dentry_open(dentry, shm_fs_type.kern_mnt, o_flags);
1196 err = PTR_ERR(file);
1197 if (IS_ERR (file))
1198 goto bad_file1;
1199 down(&current->mm->mmap_sem);
1200 *raddr = do_mmap (file, addr, file->f_dentry->d_inode->i_size,
1201 prot, flags, 0);
1202 up(&current->mm->mmap_sem);
1203 unlock_kernel();
1204 if (IS_ERR(*raddr))
1205 err = PTR_ERR(*raddr);
1206 else
1207 err = 0;
1208 fput (file);
1209 return err;
1211 bad_file1:
1212 dput(dentry);
1213 bad_file:
1214 mntput(shm_fs_type.kern_mnt);
1215 unlock_kernel();
1216 if (err == -ENOENT)
1217 return -EINVAL;
1218 return err;
1221 /* This is called by fork, once for every shm attach. */
1222 static void shm_open (struct vm_area_struct *shmd)
1224 shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
1228 * Remove a name.
1231 static int shm_remove_name(int id)
1233 struct dentry *dir;
1234 struct dentry *dentry;
1235 int error;
1236 char name[SHM_FMT_LEN+1];
1238 sprintf (name, SHM_FMT, id);
1239 lock_kernel();
1240 dir = lock_parent(shm_sb->s_root);
1241 dentry = lookup_one(name, dir);
1242 error = PTR_ERR(dentry);
1243 if (!IS_ERR(dentry)) {
1245 * We have to do our own unlink to prevent the vfs
1246 * permission check. The SYSV IPC layer has already
1247 * checked the permissions which do not comply to the
1248 * vfs rules.
1250 struct inode *inode = dir->d_inode;
1251 down(&inode->i_zombie);
1252 error = shm_unlink(inode, dentry);
1253 up(&inode->i_zombie);
1254 dput(dentry);
1256 unlock_dir(dir);
1257 unlock_kernel();
1258 return error;
1262 * remove the attach descriptor shmd.
1263 * free memory for segment if it is marked destroyed.
1264 * The descriptor has already been removed from the current->mm->mmap list
1265 * and will later be kfree()d.
1267 static void shm_close (struct vm_area_struct *shmd)
1269 int id = shmd->vm_file->f_dentry->d_inode->i_ino;
1270 struct shmid_kernel *shp;
1272 /* remove from the list of attaches of the shm segment */
1273 if(!(shp = shm_lock(id)))
1274 BUG();
1275 shp->shm_lprid = current->pid;
1276 shp->shm_dtim = CURRENT_TIME;
1277 shp->shm_nattch--;
1278 if(shp->shm_nattch == 0 &&
1279 shp->shm_flags & PRV_DEST &&
1280 !(shp->shm_flags & SHM_UNLK)) {
1281 int pid=shp->id;
1282 int err;
1283 shm_unlock(id);
1285 /* The kernel lock prevents new attaches from
1286 * being happening. We can't hold shm_lock here
1287 * else we will deadlock in shm_lookup when we
1288 * try to recursively grab it.
1290 err = shm_remove_name(pid);
1291 if(err && err != -EINVAL && err != -ENOENT)
1292 printk(KERN_ERR "Unlink of SHM id %d failed (%d).\n", pid, err);
1294 } else {
1295 shm_unlock(id);
1300 * detach and kill segment if marked destroyed.
1301 * The work is done in shm_close.
1303 asmlinkage long sys_shmdt (char *shmaddr)
1305 struct mm_struct *mm = current->mm;
1306 struct vm_area_struct *shmd, *shmdnext;
1308 down(&mm->mmap_sem);
1309 for (shmd = mm->mmap; shmd; shmd = shmdnext) {
1310 shmdnext = shmd->vm_next;
1311 if (shmd->vm_ops == &shm_vm_ops
1312 && shmd->vm_start - (shmd->vm_pgoff << PAGE_SHIFT) == (ulong) shmaddr)
1313 do_munmap(mm, shmd->vm_start, shmd->vm_end - shmd->vm_start);
1315 up(&mm->mmap_sem);
1316 return 0;
1320 * Enter the shm page into the SHM data structures.
1322 * The way "nopage" is done, we don't actually have to
1323 * do anything here: nopage will have filled in the shm
1324 * data structures already, and shm_swap_out() will just
1325 * work off them..
1327 static int shm_swapout(struct page * page, struct file *file)
1329 return 0;
1333 * page not present ... go through shm_dir
1335 static struct page * shm_nopage_core(struct shmid_kernel *shp, unsigned int idx, int *swp, int *rss, unsigned long address)
1337 pte_t pte;
1338 struct page * page;
1340 if (idx >= shp->shm_npages)
1341 return NOPAGE_SIGBUS;
1343 pte = SHM_ENTRY(shp,idx);
1344 if (!pte_present(pte)) {
1345 /* page not present so shm_swap can't race with us
1346 and the semaphore protects us by other tasks that
1347 could potentially fault on our pte under us */
1348 if (pte_none(pte)) {
1349 shm_unlock(shp->id);
1350 page = alloc_page(GFP_HIGHUSER);
1351 if (!page)
1352 goto oom;
1353 clear_user_highpage(page, address);
1354 if ((shp != shm_lock(shp->id)) && (shp->id != zero_id))
1355 BUG();
1356 } else {
1357 swp_entry_t entry = pte_to_swp_entry(pte);
1359 shm_unlock(shp->id);
1360 page = lookup_swap_cache(entry);
1361 if (!page) {
1362 lock_kernel();
1363 swapin_readahead(entry);
1364 page = read_swap_cache(entry);
1365 unlock_kernel();
1366 if (!page)
1367 goto oom;
1369 delete_from_swap_cache(page);
1370 page = replace_with_highmem(page);
1371 swap_free(entry);
1372 if ((shp != shm_lock(shp->id)) && (shp->id != zero_id))
1373 BUG();
1374 (*swp)--;
1376 (*rss)++;
1377 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
1378 SHM_ENTRY(shp, idx) = pte;
1381 /* pte_val(pte) == SHM_ENTRY (shp, idx) */
1382 get_page(pte_page(pte));
1383 return pte_page(pte);
1385 oom:
1386 shm_lock(shp->id);
1387 return NOPAGE_OOM;
1390 static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
1392 struct page * page;
1393 struct shmid_kernel *shp;
1394 unsigned int idx;
1395 struct inode * inode = shmd->vm_file->f_dentry->d_inode;
1397 idx = (address - shmd->vm_start) >> PAGE_SHIFT;
1398 idx += shmd->vm_pgoff;
1400 down(&inode->i_sem);
1401 if(!(shp = shm_lock(inode->i_ino)))
1402 BUG();
1403 page = shm_nopage_core(shp, idx, &shm_swp, &shm_rss, address);
1404 shm_unlock(inode->i_ino);
1405 up(&inode->i_sem);
1406 return(page);
1409 #define OKAY 0
1410 #define RETRY 1
1411 #define FAILED 2
1413 static int shm_swap_core(struct shmid_kernel *shp, unsigned long idx, swp_entry_t swap_entry, int *counter, struct page **outpage)
1415 pte_t page;
1416 struct page *page_map;
1418 page = SHM_ENTRY(shp, idx);
1419 if (!pte_present(page))
1420 return RETRY;
1421 page_map = pte_page(page);
1422 if (page_map->zone->free_pages > page_map->zone->pages_high)
1423 return RETRY;
1424 if (shp->id != zero_id) swap_attempts++;
1426 if (--counter < 0) /* failed */
1427 return FAILED;
1428 if (page_count(page_map) != 1)
1429 return RETRY;
1431 if (!(page_map = prepare_highmem_swapout(page_map)))
1432 return FAILED;
1433 SHM_ENTRY (shp, idx) = swp_entry_to_pte(swap_entry);
1435 /* add the locked page to the swap cache before allowing
1436 the swapin path to run lookup_swap_cache(). This avoids
1437 reading a not yet uptodate block from disk.
1438 NOTE: we just accounted the swap space reference for this
1439 swap cache page at __get_swap_page() time. */
1440 lock_page(page_map);
1441 add_to_swap_cache(*outpage = page_map, swap_entry);
1442 return OKAY;
1445 static void shm_swap_postop(struct page *page)
1447 lock_kernel();
1448 rw_swap_page(WRITE, page, 0);
1449 unlock_kernel();
1450 __free_page(page);
1453 static int shm_swap_preop(swp_entry_t *swap_entry)
1455 lock_kernel();
1456 /* subtle: preload the swap count for the swap cache. We can't
1457 increase the count inside the critical section as we can't release
1458 the shm_lock there. And we can't acquire the big lock with the
1459 shm_lock held (otherwise we would deadlock too easily). */
1460 *swap_entry = __get_swap_page(2);
1461 if (!(*swap_entry).val) {
1462 unlock_kernel();
1463 return 1;
1465 unlock_kernel();
1466 return 0;
1470 * Goes through counter = (shm_rss >> prio) present shm pages.
1472 static unsigned long swap_id = 0; /* currently being swapped */
1473 static unsigned long swap_idx = 0; /* next to swap */
1475 int shm_swap (int prio, int gfp_mask)
1477 struct shmid_kernel *shp;
1478 swp_entry_t swap_entry;
1479 unsigned long id, idx;
1480 int loop = 0;
1481 int counter;
1482 struct page * page_map;
1484 zshm_swap(prio, gfp_mask);
1485 counter = shm_rss >> prio;
1486 if (!counter)
1487 return 0;
1488 if (shm_swap_preop(&swap_entry))
1489 return 0;
1491 shm_lockall();
1492 check_id:
1493 shp = shm_get(swap_id);
1494 if(shp==NULL || shp->shm_flags & SHM_LOCKED) {
1495 next_id:
1496 swap_idx = 0;
1497 if (++swap_id > shm_ids.max_id) {
1498 swap_id = 0;
1499 if (loop) {
1500 failed:
1501 shm_unlockall();
1502 __swap_free(swap_entry, 2);
1503 return 0;
1505 loop = 1;
1507 goto check_id;
1509 id = swap_id;
1511 check_table:
1512 idx = swap_idx++;
1513 if (idx >= shp->shm_npages)
1514 goto next_id;
1516 switch (shm_swap_core(shp, idx, swap_entry, &counter, &page_map)) {
1517 case RETRY: goto check_table;
1518 case FAILED: goto failed;
1520 swap_successes++;
1521 shm_swp++;
1522 shm_rss--;
1523 shm_unlockall();
1525 shm_swap_postop(page_map);
1526 return 1;
1530 * Free the swap entry and set the new pte for the shm page.
1532 static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
1533 swp_entry_t entry, struct page *page)
1535 pte_t pte;
1537 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
1538 SHM_ENTRY(shp, idx) = pte;
1539 get_page(page);
1540 shm_rss++;
1542 shm_swp--;
1544 swap_free(entry);
1547 static int shm_unuse_core(struct shmid_kernel *shp, swp_entry_t entry, struct page *page)
1549 int n;
1551 for (n = 0; n < shp->shm_npages; n++) {
1552 if (pte_none(SHM_ENTRY(shp,n)))
1553 continue;
1554 if (pte_present(SHM_ENTRY(shp,n)))
1555 continue;
1556 if (pte_to_swp_entry(SHM_ENTRY(shp,n)).val == entry.val) {
1557 shm_unuse_page(shp, n, entry, page);
1558 return 1;
1561 return 0;
1565 * unuse_shm() search for an eventually swapped out shm page.
1567 void shm_unuse(swp_entry_t entry, struct page *page)
1569 int i;
1571 shm_lockall();
1572 for (i = 0; i <= shm_ids.max_id; i++) {
1573 struct shmid_kernel *shp = shm_get(i);
1574 if(shp==NULL)
1575 continue;
1576 if (shm_unuse_core(shp, entry, page))
1577 goto out;
1579 out:
1580 shm_unlockall();
1581 zmap_unuse(entry, page);
1584 #ifdef CONFIG_PROC_FS
1585 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
1587 off_t pos = 0;
1588 off_t begin = 0;
1589 int i, len = 0;
1591 down(&shm_ids.sem);
1592 len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime name\n");
1594 for(i = 0; i <= shm_ids.max_id; i++) {
1595 struct shmid_kernel* shp;
1597 if (i == zero_id)
1598 continue;
1599 shp = shm_lock(i);
1600 if(shp!=NULL) {
1601 #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s%s\n"
1602 #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s%s\n"
1603 char *format;
1605 if (sizeof(size_t) <= sizeof(int))
1606 format = SMALL_STRING;
1607 else
1608 format = BIG_STRING;
1609 len += sprintf(buffer + len, format,
1610 shp->shm_perm.key,
1611 shm_buildid(i, shp->shm_perm.seq),
1612 shp->shm_flags,
1613 shp->shm_segsz,
1614 shp->shm_cprid,
1615 shp->shm_lprid,
1616 shp->shm_nattch,
1617 shp->shm_perm.uid,
1618 shp->shm_perm.gid,
1619 shp->shm_perm.cuid,
1620 shp->shm_perm.cgid,
1621 shp->shm_atim,
1622 shp->shm_dtim,
1623 shp->shm_ctim,
1624 shp->shm_namelen,
1625 shp->shm_name,
1626 shp->shm_flags & SHM_UNLK ? " (deleted)" : "");
1627 shm_unlock(i);
1629 pos += len;
1630 if(pos < offset) {
1631 len = 0;
1632 begin = pos;
1634 if(pos > offset + length)
1635 goto done;
1638 *eof = 1;
1639 done:
1640 up(&shm_ids.sem);
1641 *start = buffer + (offset - begin);
1642 len -= (offset - begin);
1643 if(len > length)
1644 len = length;
1645 if(len < 0)
1646 len = 0;
1647 return len;
1649 #endif
1651 #define VMA_TO_SHP(vma) ((vma)->vm_file->private_data)
1653 static spinlock_t zmap_list_lock = SPIN_LOCK_UNLOCKED;
1654 static unsigned long zswap_idx = 0; /* next to swap */
1655 static struct shmid_kernel *zswap_shp = &zshmid_kernel;
1656 static int zshm_rss;
1658 static struct vm_operations_struct shmzero_vm_ops = {
1659 open: shmzero_open,
1660 close: shmzero_close,
1661 nopage: shmzero_nopage,
1662 swapout: shm_swapout,
1666 * In this implementation, the "unuse" and "swapout" interfaces are
1667 * interlocked out via the kernel_lock, as well as shm_lock(zero_id).
1668 * "unuse" and "nopage/swapin", as well as "swapout" and "nopage/swapin"
1669 * interlock via shm_lock(zero_id). All these interlocks can be based
1670 * on a per mapping lock instead of being a global lock.
1673 * Reference (existance) counting on the file/dentry/inode is done
1674 * by generic vm_file code. The zero code does not hold any reference
1675 * on the pseudo-file. This is possible because the open/close calls
1676 * are bracketed by the file count update calls.
1678 static struct file *file_setup(struct file *fzero, struct shmid_kernel *shp)
1680 struct file *filp;
1681 struct inode *inp;
1683 if ((filp = get_empty_filp()) == 0)
1684 return(filp);
1685 if ((inp = get_empty_inode()) == 0) {
1686 put_filp(filp);
1687 return(0);
1689 if ((filp->f_dentry = d_alloc(zdent, &(const struct qstr) { "dev/zero",
1690 8, 0 })) == 0) {
1691 iput(inp);
1692 put_filp(filp);
1693 return(0);
1695 filp->f_vfsmnt = mntget(shm_fs_type.kern_mnt);
1696 d_instantiate(filp->f_dentry, inp);
1699 * Copy over dev/ino for benefit of procfs. Use
1700 * ino to indicate seperate mappings.
1702 filp->f_dentry->d_inode->i_dev = shm_fs_type.kern_mnt->mnt_sb->s_dev;
1703 filp->f_dentry->d_inode->i_ino = (unsigned long)shp;
1704 if (fzero)
1705 fput(fzero); /* release /dev/zero file */
1706 return(filp);
1709 int map_zero_setup(struct vm_area_struct *vma)
1711 extern int vm_enough_memory(long pages);
1712 struct shmid_kernel *shp;
1713 struct file *filp;
1715 if (!vm_enough_memory((vma->vm_end - vma->vm_start) >> PAGE_SHIFT))
1716 return -ENOMEM;
1717 if (IS_ERR(shp = seg_alloc((vma->vm_end - vma->vm_start) / PAGE_SIZE, 0)))
1718 return PTR_ERR(shp);
1719 if ((filp = file_setup(vma->vm_file, shp)) == 0) {
1720 seg_free(shp, 0);
1721 return -ENOMEM;
1723 vma->vm_file = filp;
1724 VMA_TO_SHP(vma) = (void *)shp;
1725 shp->id = zero_id;
1726 init_MUTEX(&shp->zsem);
1727 vma->vm_ops = &shmzero_vm_ops;
1728 shmzero_open(vma);
1729 spin_lock(&zmap_list_lock);
1730 list_add(&shp->zero_list, &zshmid_kernel.zero_list);
1731 spin_unlock(&zmap_list_lock);
1732 return 0;
1735 static void shmzero_open(struct vm_area_struct *shmd)
1737 struct shmid_kernel *shp;
1739 shp = VMA_TO_SHP(shmd);
1740 down(&shp->zsem);
1741 shp->shm_nattch++;
1742 up(&shp->zsem);
1745 static void shmzero_close(struct vm_area_struct *shmd)
1747 int done = 0;
1748 struct shmid_kernel *shp;
1750 shp = VMA_TO_SHP(shmd);
1751 down(&shp->zsem);
1752 if (--shp->shm_nattch == 0)
1753 done = 1;
1754 up(&shp->zsem);
1755 if (done) {
1756 spin_lock(&zmap_list_lock);
1757 if (shp == zswap_shp)
1758 zswap_shp = list_entry(zswap_shp->zero_list.next,
1759 struct shmid_kernel, zero_list);
1760 list_del(&shp->zero_list);
1761 spin_unlock(&zmap_list_lock);
1762 seg_free(shp, 0);
1766 static struct page * shmzero_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
1768 struct page *page;
1769 struct shmid_kernel *shp;
1770 unsigned int idx;
1771 int dummy;
1773 idx = (address - shmd->vm_start) >> PAGE_SHIFT;
1774 idx += shmd->vm_pgoff;
1776 shp = VMA_TO_SHP(shmd);
1777 down(&shp->zsem);
1778 shm_lock(zero_id);
1779 page = shm_nopage_core(shp, idx, &dummy, &zshm_rss, address);
1780 shm_unlock(zero_id);
1781 up(&shp->zsem);
1782 return(page);
1785 static void zmap_unuse(swp_entry_t entry, struct page *page)
1787 struct shmid_kernel *shp;
1789 spin_lock(&zmap_list_lock);
1790 shm_lock(zero_id);
1791 for (shp = list_entry(zshmid_kernel.zero_list.next, struct shmid_kernel,
1792 zero_list); shp != &zshmid_kernel;
1793 shp = list_entry(shp->zero_list.next, struct shmid_kernel,
1794 zero_list)) {
1795 if (shm_unuse_core(shp, entry, page))
1796 break;
1798 shm_unlock(zero_id);
1799 spin_unlock(&zmap_list_lock);
1802 static void zshm_swap (int prio, int gfp_mask)
1804 struct shmid_kernel *shp;
1805 swp_entry_t swap_entry;
1806 unsigned long idx;
1807 int loop = 0;
1808 int counter;
1809 struct page * page_map;
1811 counter = zshm_rss >> prio;
1812 if (!counter)
1813 return;
1814 next:
1815 if (shm_swap_preop(&swap_entry))
1816 return;
1818 spin_lock(&zmap_list_lock);
1819 shm_lock(zero_id);
1820 if (zshmid_kernel.zero_list.next == 0)
1821 goto failed;
1822 next_id:
1823 if (zswap_shp == &zshmid_kernel) {
1824 if (loop) {
1825 failed:
1826 shm_unlock(zero_id);
1827 spin_unlock(&zmap_list_lock);
1828 __swap_free(swap_entry, 2);
1829 return;
1831 zswap_shp = list_entry(zshmid_kernel.zero_list.next,
1832 struct shmid_kernel, zero_list);
1833 zswap_idx = 0;
1834 loop = 1;
1836 shp = zswap_shp;
1838 check_table:
1839 idx = zswap_idx++;
1840 if (idx >= shp->shm_npages) {
1841 zswap_shp = list_entry(zswap_shp->zero_list.next,
1842 struct shmid_kernel, zero_list);
1843 zswap_idx = 0;
1844 goto next_id;
1847 switch (shm_swap_core(shp, idx, swap_entry, &counter, &page_map)) {
1848 case RETRY: goto check_table;
1849 case FAILED: goto failed;
1851 shm_unlock(zero_id);
1852 spin_unlock(&zmap_list_lock);
1854 shm_swap_postop(page_map);
1855 if (counter)
1856 goto next;
1857 return;