Import 2.3.99pre2-1
[davej-history.git] / ipc / shm.c
blob64039d6fda4c74930e2d244f0d7ad334d38af03d
1 /*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * avoid vmalloc and make shmmax, shmall, shmmni sysctl'able,
13 * Christoph Rohland <hans-christoph.rohland@sap.com>
14 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
15 * make it a file system, Christoph Rohland <hans-christoph.rohland@sap.com>
17 * The filesystem has the following restrictions/bugs:
18 * 1) It only can handle one directory.
19 * 2) Because the directory is represented by the SYSV shm array it
20 * can only be mounted one time.
21 * 3) This again leads to SYSV shm not working properly in a chrooted
22 * environment
23 * 4) Read and write are not implemented (should they?)
24 * 5) No special nodes are supported
27 #include <linux/config.h>
28 #include <linux/malloc.h>
29 #include <linux/shm.h>
30 #include <linux/swap.h>
31 #include <linux/smp_lock.h>
32 #include <linux/init.h>
33 #include <linux/locks.h>
34 #include <linux/file.h>
35 #include <linux/mman.h>
36 #include <linux/vmalloc.h>
37 #include <linux/pagemap.h>
38 #include <linux/proc_fs.h>
39 #include <linux/highmem.h>
41 #include <asm/uaccess.h>
42 #include <asm/pgtable.h>
44 #include "util.h"
46 static struct super_block *shm_read_super(struct super_block *,void *, int);
47 static void shm_put_super (struct super_block *);
48 static int shm_remount_fs (struct super_block *, int *, char *);
49 static void shm_read_inode (struct inode *);
50 static void shm_write_inode(struct inode *);
51 static int shm_statfs (struct super_block *, struct statfs *);
52 static int shm_create (struct inode *,struct dentry *,int);
53 static struct dentry *shm_lookup (struct inode *,struct dentry *);
54 static int shm_unlink (struct inode *,struct dentry *);
55 static int shm_setattr (struct dentry *dent, struct iattr *attr);
56 static void shm_delete (struct inode *);
57 static int shm_mmap (struct file *, struct vm_area_struct *);
58 static int shm_readdir (struct file *, void *, filldir_t);
60 char shm_path[256] = "/var/shm";
62 #define SHM_NAME_LEN NAME_MAX
63 #define SHM_FMT ".IPC_%08x"
64 #define SHM_FMT_LEN 13
66 struct shmid_kernel /* private to the kernel */
68 struct kern_ipc_perm shm_perm;
69 size_t shm_segsz;
70 unsigned long shm_nattch;
71 unsigned long shm_npages; /* size of segment (pages) */
72 pte_t **shm_dir; /* ptr to arr of ptrs to frames */
73 int id;
74 int destroyed; /* set if the final detach kills */
75 union permap {
76 struct shmem {
77 time_t atime;
78 time_t dtime;
79 time_t ctime;
80 pid_t cpid;
81 pid_t lpid;
82 int nlen;
83 char nm[0];
84 } shmem;
85 struct zero {
86 struct semaphore sema;
87 struct list_head list;
88 } zero;
89 } permap;
92 #define shm_atim permap.shmem.atime
93 #define shm_dtim permap.shmem.dtime
94 #define shm_ctim permap.shmem.ctime
95 #define shm_cprid permap.shmem.cpid
96 #define shm_lprid permap.shmem.lpid
97 #define shm_namelen permap.shmem.nlen
98 #define shm_name permap.shmem.nm
99 #define zsem permap.zero.sema
100 #define zero_list permap.zero.list
102 static struct ipc_ids shm_ids;
104 #define shm_lock(id) ((struct shmid_kernel*)ipc_lock(&shm_ids,id))
105 #define shm_unlock(id) ipc_unlock(&shm_ids,id)
106 #define shm_lockall() ipc_lockall(&shm_ids)
107 #define shm_unlockall() ipc_unlockall(&shm_ids)
108 #define shm_get(id) ((struct shmid_kernel*)ipc_get(&shm_ids,id))
109 #define shm_rmid(id) ((struct shmid_kernel*)ipc_rmid(&shm_ids,id))
110 #define shm_checkid(s, id) \
111 ipc_checkid(&shm_ids,&s->shm_perm,id)
112 #define shm_buildid(id, seq) \
113 ipc_buildid(&shm_ids, id, seq)
115 static int newseg (key_t key, const char *name, int namelen, int shmflg, size_t size);
116 static void killseg_core(struct shmid_kernel *shp, int doacc);
117 static void shm_open (struct vm_area_struct *shmd);
118 static void shm_close (struct vm_area_struct *shmd);
119 static void shm_remove_name(int id);
120 static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int);
121 static int shm_swapout(struct page *, struct file *);
122 #ifdef CONFIG_PROC_FS
123 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
124 #endif
126 static void zshm_swap (int prio, int gfp_mask, zone_t *zone);
127 static void zmap_unuse(swp_entry_t entry, struct page *page);
128 static void shmzero_open(struct vm_area_struct *shmd);
129 static void shmzero_close(struct vm_area_struct *shmd);
130 static struct page *shmzero_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share);
131 static int zero_id;
132 static struct shmid_kernel zshmid_kernel;
133 static struct dentry *zdent;
135 #define SHM_FS_MAGIC 0x02011994
137 static struct super_block * shm_sb;
139 static DECLARE_FSTYPE(shm_fs_type, "shm", shm_read_super, 0);
141 static struct super_operations shm_sops = {
142 read_inode: shm_read_inode,
143 write_inode: shm_write_inode,
144 delete_inode: shm_delete,
145 put_super: shm_put_super,
146 statfs: shm_statfs,
147 remount_fs: shm_remount_fs,
150 static struct file_operations shm_root_operations = {
151 readdir: shm_readdir,
154 static struct inode_operations shm_root_inode_operations = {
155 create: shm_create,
156 lookup: shm_lookup,
157 unlink: shm_unlink,
160 static struct file_operations shm_file_operations = {
161 mmap: shm_mmap,
164 static struct inode_operations shm_inode_operations = {
165 setattr: shm_setattr,
168 static struct vm_operations_struct shm_vm_ops = {
169 open: shm_open, /* callback for a new vm-area open */
170 close: shm_close, /* callback for when the vm-area is released */
171 nopage: shm_nopage,
172 swapout:shm_swapout,
175 size_t shm_ctlmax = SHMMAX;
177 /* These parameters should be part of the superblock */
178 static int shm_ctlall;
179 static int shm_ctlmni;
180 static int shm_mode;
182 static int shm_tot = 0; /* total number of shared memory pages */
183 static int shm_rss = 0; /* number of shared memory pages that are in memory */
184 static int shm_swp = 0; /* number of shared memory pages that are in swap */
186 /* locks order:
187 pagecache_lock
188 shm_lock()/shm_lockall()
189 kernel lock
190 inode->i_sem
191 sem_ids.sem
192 mmap_sem
194 SMP assumptions:
195 - swap_free() never sleeps
196 - add_to_swap_cache() never sleeps
197 - add_to_swap_cache() doesn't acquire the big kernel lock.
198 - shm_unuse() is called with the kernel lock acquired.
201 /* some statistics */
202 static ulong swap_attempts = 0;
203 static ulong swap_successes = 0;
204 static ulong used_segs = 0;
206 void __init shm_init (void)
208 ipc_init_ids(&shm_ids, 1);
210 register_filesystem (&shm_fs_type);
211 #ifdef CONFIG_PROC_FS
212 create_proc_read_entry("sysvipc/shm", 0, 0, sysvipc_shm_read_proc, NULL);
213 #endif
214 zero_id = ipc_addid(&shm_ids, &zshmid_kernel.shm_perm, 1);
215 shm_unlock(zero_id);
216 INIT_LIST_HEAD(&zshmid_kernel.zero_list);
217 zdent = d_alloc_root(get_empty_inode());
218 return;
221 static int shm_parse_options(char *options)
223 int blocks = shm_ctlall;
224 int inodes = shm_ctlmni;
225 umode_t mode = shm_mode;
226 char *this_char, *value;
228 this_char = NULL;
229 if ( options )
230 this_char = strtok(options,",");
231 for ( ; this_char; this_char = strtok(NULL,",")) {
232 if ((value = strchr(this_char,'=')) != NULL)
233 *value++ = 0;
234 if (!strcmp(this_char,"nr_blocks")) {
235 if (!value || !*value)
236 return 1;
237 blocks = simple_strtoul(value,&value,0);
238 if (*value)
239 return 1;
241 else if (!strcmp(this_char,"nr_inodes")) {
242 if (!value || !*value)
243 return 1;
244 inodes = simple_strtoul(value,&value,0);
245 if (*value)
246 return 1;
248 else if (!strcmp(this_char,"mode")) {
249 if (!value || !*value)
250 return 1;
251 mode = simple_strtoul(value,&value,8);
252 if (*value)
253 return 1;
255 else
256 return 1;
258 shm_ctlmni = inodes;
259 shm_ctlall = blocks;
260 shm_mode = mode;
262 return 0;
265 static struct super_block *shm_read_super(struct super_block *s,void *data,
266 int silent)
268 struct inode * root_inode;
270 if (shm_sb) {
271 printk ("shm fs already mounted\n");
272 return NULL;
275 shm_ctlall = SHMALL;
276 shm_ctlmni = SHMMNI;
277 shm_mode = S_IRWXUGO | S_ISVTX;
278 if (shm_parse_options (data)) {
279 printk ("shm fs invalid option\n");
280 goto out_unlock;
283 s->s_blocksize = PAGE_SIZE;
284 s->s_blocksize_bits = PAGE_SHIFT;
285 s->s_magic = SHM_FS_MAGIC;
286 s->s_op = &shm_sops;
287 root_inode = iget (s, SEQ_MULTIPLIER);
288 if (!root_inode)
289 goto out_no_root;
290 root_inode->i_op = &shm_root_inode_operations;
291 root_inode->i_sb = s;
292 root_inode->i_nlink = 2;
293 root_inode->i_mode = S_IFDIR | shm_mode;
294 s->s_root = d_alloc_root(root_inode);
295 if (!s->s_root)
296 goto out_no_root;
297 s->u.generic_sbp = (void*) shm_sb;
298 shm_sb = s;
299 return s;
301 out_no_root:
302 printk("proc_read_super: get root inode failed\n");
303 iput(root_inode);
304 out_unlock:
305 return NULL;
308 static int shm_remount_fs (struct super_block *sb, int *flags, char *data)
310 if (shm_parse_options (data))
311 return -EINVAL;
312 return 0;
315 static struct fs_struct *shm_push_root(void)
317 struct fs_struct *old,*new;
318 new=init_task_union.task.fs;
319 old=current->fs;
320 current->fs=new;
321 return old;
324 static void shm_pop_root(struct fs_struct *saved)
326 current->fs=saved;
329 static void shm_put_super(struct super_block *sb)
331 struct super_block **p = &shm_sb;
332 int i;
333 struct shmid_kernel *shp;
335 while (*p != sb) {
336 if (!*p) /* should never happen */
337 return;
338 p = (struct super_block **)&(*p)->u.generic_sbp;
340 *p = (struct super_block *)(*p)->u.generic_sbp;
341 down(&shm_ids.sem);
342 for(i = 0; i <= shm_ids.max_id; i++) {
343 if (i == zero_id)
344 continue;
345 if (!(shp = shm_lock (i)))
346 continue;
347 if (shp->shm_nattch)
348 printk ("shm_nattch = %ld\n", shp->shm_nattch);
349 shp = shm_rmid(i);
350 shm_unlock(i);
351 killseg_core(shp, 1);
353 dput (sb->s_root);
354 up(&shm_ids.sem);
357 static int shm_statfs(struct super_block *sb, struct statfs *buf)
359 buf->f_type = 0;
360 buf->f_bsize = PAGE_SIZE;
361 buf->f_blocks = shm_ctlall;
362 buf->f_bavail = buf->f_bfree = shm_ctlall - shm_tot;
363 buf->f_files = shm_ctlmni;
364 buf->f_ffree = shm_ctlmni - used_segs;
365 buf->f_namelen = SHM_NAME_LEN;
366 return 0;
369 static void shm_write_inode(struct inode * inode)
373 static void shm_read_inode(struct inode * inode)
375 int id;
376 struct shmid_kernel *shp;
378 id = inode->i_ino;
379 inode->i_op = NULL;
380 inode->i_mode = 0;
381 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
383 if (id < SEQ_MULTIPLIER) {
384 if (!(shp = shm_lock (id)))
385 return;
386 inode->i_mode = shp->shm_perm.mode | S_IFREG;
387 inode->i_uid = shp->shm_perm.uid;
388 inode->i_gid = shp->shm_perm.gid;
389 inode->i_size = shp->shm_segsz;
390 shm_unlock (id);
391 inode->i_op = &shm_inode_operations;
392 inode->i_fop = &shm_file_operations;
393 return;
395 inode->i_op = &shm_root_inode_operations;
396 inode->i_fop = &shm_root_operations;
397 inode->i_sb = shm_sb;
398 inode->i_nlink = 2;
399 inode->i_mode = S_IFDIR | shm_mode;
400 inode->i_uid = inode->i_gid = 0;
404 static int shm_create (struct inode *dir, struct dentry *dent, int mode)
406 int id, err;
407 struct inode * inode;
409 down(&shm_ids.sem);
410 err = id = newseg (IPC_PRIVATE, dent->d_name.name, dent->d_name.len, mode, 0);
411 if (err < 0)
412 goto out;
414 err = -ENOMEM;
415 inode = iget (shm_sb, id % SEQ_MULTIPLIER);
416 if (!inode)
417 goto out;
419 err = 0;
420 down (&inode->i_sem);
421 inode->i_mode = mode | S_IFREG;
422 inode->i_op = &shm_inode_operations;
423 d_instantiate(dent, inode);
424 up (&inode->i_sem);
426 out:
427 up(&shm_ids.sem);
428 return err;
431 static int shm_readdir (struct file *filp, void *dirent, filldir_t filldir)
433 struct inode * inode = filp->f_dentry->d_inode;
434 struct shmid_kernel *shp;
435 off_t nr;
437 nr = filp->f_pos;
439 switch(nr)
441 case 0:
442 if (filldir(dirent, ".", 1, nr, inode->i_ino) < 0)
443 return 0;
444 filp->f_pos = ++nr;
445 /* fall through */
446 case 1:
447 if (filldir(dirent, "..", 2, nr, inode->i_ino) < 0)
448 return 0;
449 filp->f_pos = ++nr;
450 /* fall through */
451 default:
452 down(&shm_ids.sem);
453 for (; nr-2 <= shm_ids.max_id; nr++ ) {
454 if (nr-2 == zero_id)
455 continue;
456 if (!(shp = shm_get (nr-2)))
457 continue;
458 if (shp->shm_perm.mode & SHM_DEST)
459 continue;
460 if (filldir(dirent, shp->shm_name, shp->shm_namelen, nr, nr) < 0 )
461 break;;
463 filp->f_pos = nr;
464 up(&shm_ids.sem);
465 break;
468 UPDATE_ATIME(inode);
469 return 0;
472 static struct dentry *shm_lookup (struct inode *dir, struct dentry *dent)
474 int i, err = 0;
475 struct shmid_kernel* shp;
476 struct inode *inode = NULL;
478 if (dent->d_name.len > SHM_NAME_LEN)
479 return ERR_PTR(-ENAMETOOLONG);
481 down(&shm_ids.sem);
482 for(i = 0; i <= shm_ids.max_id; i++) {
483 if (i == zero_id)
484 continue;
485 if (!(shp = shm_lock(i)))
486 continue;
487 if (!(shp->shm_perm.mode & SHM_DEST) &&
488 dent->d_name.len == shp->shm_namelen &&
489 strncmp(dent->d_name.name, shp->shm_name, shp->shm_namelen) == 0)
490 goto found;
491 shm_unlock(i);
495 * prevent the reserved names as negative dentries.
496 * This also prevents object creation through the filesystem
498 if (dent->d_name.len == SHM_FMT_LEN &&
499 memcmp (SHM_FMT, dent->d_name.name, SHM_FMT_LEN - 8) == 0)
500 err = -EINVAL; /* EINVAL to give IPC_RMID the right error */
502 goto out;
504 found:
505 shm_unlock(i);
506 inode = iget(dir->i_sb, i);
508 if (!inode)
509 err = -EACCES;
510 out:
511 if (err == 0)
512 d_add (dent, inode);
513 up (&shm_ids.sem);
514 return ERR_PTR(err);
517 static int shm_unlink (struct inode *dir, struct dentry *dent)
519 struct inode * inode = dent->d_inode;
520 struct shmid_kernel *shp;
522 down (&shm_ids.sem);
523 if (!(shp = shm_lock (inode->i_ino)))
524 BUG();
525 shp->shm_perm.mode |= SHM_DEST;
526 shp->shm_perm.key = IPC_PRIVATE; /* Do not find it any more */
527 shm_unlock (inode->i_ino);
528 up (&shm_ids.sem);
529 inode->i_nlink -= 1;
530 d_delete (dent);
531 return 0;
534 #define SHM_ENTRY(shp, index) (shp)->shm_dir[(index)/PTRS_PER_PTE][(index)%PTRS_PER_PTE]
536 static pte_t **shm_alloc(unsigned long pages)
538 unsigned short dir = pages / PTRS_PER_PTE;
539 unsigned short last = pages % PTRS_PER_PTE;
540 pte_t **ret, **ptr, *pte;
542 if (pages == 0)
543 return NULL;
545 ret = kmalloc ((dir+1) * sizeof(pte_t *), GFP_KERNEL);
546 if (!ret)
547 goto nomem;
549 for (ptr = ret; ptr < ret+dir ; ptr++)
551 *ptr = (pte_t *)__get_free_page (GFP_KERNEL);
552 if (!*ptr)
553 goto free;
554 for (pte = *ptr; pte < *ptr + PTRS_PER_PTE; pte++)
555 pte_clear (pte);
558 /* The last one is probably not of PAGE_SIZE: we use kmalloc */
559 if (last) {
560 *ptr = kmalloc (last*sizeof(pte_t), GFP_KERNEL);
561 if (!*ptr)
562 goto free;
563 for (pte = *ptr; pte < *ptr + last; pte++)
564 pte_clear (pte);
566 return ret;
568 free:
569 /* The last failed: we decrement first */
570 while (--ptr >= ret)
571 free_page ((unsigned long)*ptr);
573 kfree (ret);
574 nomem:
575 return ERR_PTR(-ENOMEM);
578 static void shm_free(pte_t** dir, unsigned long pages)
580 pte_t **ptr = dir+pages/PTRS_PER_PTE;
582 if (!dir)
583 return;
585 /* first the last page */
586 if (pages%PTRS_PER_PTE)
587 kfree (*ptr);
588 /* now the whole pages */
589 while (--ptr >= dir)
590 if (*ptr)
591 free_page ((unsigned long)*ptr);
593 /* Now the indirect block */
594 kfree (dir);
597 static int shm_setattr (struct dentry *dentry, struct iattr *attr)
599 int error;
600 struct inode *inode = dentry->d_inode;
601 struct shmid_kernel *shp;
602 unsigned long new_pages, old_pages;
603 pte_t **new_dir, **old_dir;
605 if ((error = inode_change_ok(inode, attr)))
606 return error;
607 if (!(attr->ia_valid & ATTR_SIZE))
608 goto set_attr;
609 if (attr->ia_size > shm_ctlmax)
610 return -EFBIG;
612 /* We set old_pages and old_dir for easier cleanup */
613 old_pages = new_pages = (attr->ia_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
614 if (shm_tot + new_pages >= shm_ctlall)
615 return -ENOSPC;
616 if (IS_ERR(old_dir = new_dir = shm_alloc(new_pages)))
617 return PTR_ERR(new_dir);
619 if (!(shp = shm_lock(inode->i_ino)))
620 BUG();
621 if (shp->shm_segsz == attr->ia_size)
622 goto out;
623 old_dir = shp->shm_dir;
624 old_pages = shp->shm_npages;
625 if (old_dir){
626 pte_t *swap;
627 int i,j;
628 i = old_pages < new_pages ? old_pages : new_pages;
629 j = i % PTRS_PER_PTE;
630 i /= PTRS_PER_PTE;
631 if (j)
632 memcpy (new_dir[i], old_dir[i], j * sizeof (pte_t));
633 while (i--) {
634 swap = new_dir[i];
635 new_dir[i] = old_dir[i];
636 old_dir[i] = swap;
639 shp->shm_dir = new_dir;
640 shp->shm_npages = new_pages;
641 shp->shm_segsz = attr->ia_size;
642 out:
643 shm_unlock(inode->i_ino);
644 shm_lockall();
645 shm_tot += new_pages - old_pages;
646 shm_unlockall();
647 shm_free (old_dir, old_pages);
648 set_attr:
649 inode_setattr(inode, attr);
650 return 0;
653 static inline struct shmid_kernel *newseg_alloc(int numpages, size_t namelen)
655 struct shmid_kernel *shp;
657 shp = (struct shmid_kernel *) kmalloc (sizeof (*shp) + namelen, GFP_KERNEL);
658 if (!shp)
659 return 0;
661 shp->shm_dir = shm_alloc (numpages);
662 if (!shp->shm_dir) {
663 kfree(shp);
664 return 0;
666 shp->shm_npages = numpages;
667 shp->shm_nattch = 0;
668 shp->shm_namelen = namelen;
669 return(shp);
672 static int newseg (key_t key, const char *name, int namelen,
673 int shmflg, size_t size)
675 struct shmid_kernel *shp;
676 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
677 int id;
679 if (namelen > SHM_NAME_LEN)
680 return -ENAMETOOLONG;
682 if (size > shm_ctlmax)
683 return -EINVAL;
684 if (shm_tot + numpages >= shm_ctlall)
685 return -ENOSPC;
687 if (!(shp = newseg_alloc(numpages, namelen ? namelen : SHM_FMT_LEN + 1)))
688 return -ENOMEM;
689 id = ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni+1);
690 if(id == -1) {
691 shm_free(shp->shm_dir,numpages);
692 kfree(shp);
693 return -ENOSPC;
695 shp->shm_perm.key = key;
696 shp->shm_perm.mode = (shmflg & S_IRWXUGO);
697 shp->shm_segsz = size;
698 shp->shm_cprid = current->pid;
699 shp->shm_lprid = 0;
700 shp->shm_atim = shp->shm_dtim = 0;
701 shp->shm_ctim = CURRENT_TIME;
702 shp->id = shm_buildid(id,shp->shm_perm.seq);
703 if (namelen != 0) {
704 shp->shm_namelen = namelen;
705 memcpy (shp->shm_name, name, namelen);
706 } else {
707 shp->shm_namelen = sprintf (shp->shm_name, SHM_FMT, shp->id);
710 shm_tot += numpages;
711 used_segs++;
712 shm_unlock(id);
714 return shp->id;
717 asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
719 struct shmid_kernel *shp;
720 int err, id = 0;
721 static int count=0;
723 if (!shm_sb) {
724 if(count++<5)
725 printk(KERN_WARNING "shmget: shm filesystem not mounted\n");
726 return -EINVAL;
729 if (size < SHMMIN)
730 return -EINVAL;
732 down(&shm_ids.sem);
733 if (key == IPC_PRIVATE) {
734 err = newseg(key, NULL, 0, shmflg, size);
735 } else if ((id = ipc_findkey(&shm_ids,key)) == -1) {
736 if (!(shmflg & IPC_CREAT))
737 err = -ENOENT;
738 else
739 err = newseg(key, NULL, 0, shmflg, size);
740 } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
741 err = -EEXIST;
742 } else {
743 shp = shm_lock(id);
744 if(shp==NULL)
745 BUG();
746 if (shp->shm_segsz < size)
747 err = -EINVAL;
748 else if (ipcperms(&shp->shm_perm, shmflg))
749 err = -EACCES;
750 else
751 err = shm_buildid(id, shp->shm_perm.seq);
752 shm_unlock(id);
754 up(&shm_ids.sem);
755 return err;
758 static void killseg_core(struct shmid_kernel *shp, int doacc)
760 int i, numpages, rss, swp;
762 numpages = shp->shm_npages;
763 for (i = 0, rss = 0, swp = 0; i < numpages ; i++) {
764 pte_t pte;
765 pte = SHM_ENTRY (shp,i);
766 if (pte_none(pte))
767 continue;
768 if (pte_present(pte)) {
769 __free_page (pte_page(pte));
770 rss++;
771 } else {
772 swap_free(pte_to_swp_entry(pte));
773 swp++;
776 shm_free (shp->shm_dir, numpages);
777 kfree(shp);
778 if (doacc) {
779 shm_lockall();
780 shm_rss -= rss;
781 shm_swp -= swp;
782 shm_tot -= numpages;
783 used_segs--;
784 shm_unlockall();
788 static void shm_delete (struct inode *ino)
790 int shmid = ino->i_ino;
791 struct shmid_kernel *shp;
793 down(&shm_ids.sem);
794 shp = shm_lock(shmid);
795 if(shp==NULL) {
796 BUG();
798 shp = shm_rmid(shmid);
799 shm_unlock(shmid);
800 up(&shm_ids.sem);
801 killseg_core(shp, 1);
802 clear_inode(ino);
805 static inline unsigned long copy_shmid_to_user(void *buf, struct shmid64_ds *in, int version)
807 switch(version) {
808 case IPC_64:
809 return copy_to_user(buf, in, sizeof(*in));
810 case IPC_OLD:
812 struct shmid_ds out;
814 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
815 out.shm_segsz = in->shm_segsz;
816 out.shm_atime = in->shm_atime;
817 out.shm_dtime = in->shm_dtime;
818 out.shm_ctime = in->shm_ctime;
819 out.shm_cpid = in->shm_cpid;
820 out.shm_lpid = in->shm_lpid;
821 out.shm_nattch = in->shm_nattch;
823 return copy_to_user(buf, &out, sizeof(out));
825 default:
826 return -EINVAL;
830 struct shm_setbuf {
831 uid_t uid;
832 gid_t gid;
833 mode_t mode;
836 static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void *buf, int version)
838 switch(version) {
839 case IPC_64:
841 struct shmid64_ds tbuf;
843 if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
844 return -EFAULT;
846 out->uid = tbuf.shm_perm.uid;
847 out->gid = tbuf.shm_perm.gid;
848 out->mode = tbuf.shm_perm.mode;
850 return 0;
852 case IPC_OLD:
854 struct shmid_ds tbuf_old;
856 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
857 return -EFAULT;
859 out->uid = tbuf_old.shm_perm.uid;
860 out->gid = tbuf_old.shm_perm.gid;
861 out->mode = tbuf_old.shm_perm.mode;
863 return 0;
865 default:
866 return -EINVAL;
870 static inline unsigned long copy_shminfo_to_user(void *buf, struct shminfo64 *in, int version)
872 switch(version) {
873 case IPC_64:
874 return copy_to_user(buf, in, sizeof(*in));
875 case IPC_OLD:
877 struct shminfo out;
879 if(in->shmmax > INT_MAX)
880 out.shmmax = INT_MAX;
881 else
882 out.shmmax = (int)in->shmmax;
884 out.shmmin = in->shmmin;
885 out.shmmni = in->shmmni;
886 out.shmseg = in->shmseg;
887 out.shmall = in->shmall;
889 return copy_to_user(buf, &out, sizeof(out));
891 default:
892 return -EINVAL;
896 char * shm_getname(int id)
898 char *result;
900 if (!(result = __getname ()))
901 return ERR_PTR(-ENOMEM);
903 sprintf (result, "%s/" SHM_FMT, shm_path, id);
904 return result;
907 asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf)
909 struct shm_setbuf setbuf;
910 struct shmid_kernel *shp;
911 int err, version;
912 static int count;
914 if (!shm_sb) {
915 if(count++<5)
916 printk (KERN_WARNING "shmctl: shm filesystem not mounted\n");
917 return -EINVAL;
920 if (cmd < 0 || shmid < 0)
921 return -EINVAL;
923 version = ipc_parse_version(&cmd);
925 switch (cmd) { /* replace with proc interface ? */
926 case IPC_INFO:
928 struct shminfo64 shminfo;
930 memset(&shminfo,0,sizeof(shminfo));
931 shminfo.shmmni = shminfo.shmseg = shm_ctlmni;
932 shminfo.shmmax = shm_ctlmax;
933 shminfo.shmall = shm_ctlall;
935 shminfo.shmmin = SHMMIN;
936 if(copy_shminfo_to_user (buf, &shminfo, version))
937 return -EFAULT;
938 /* reading a integer is always atomic */
939 err= shm_ids.max_id;
940 if(err<0)
941 err = 0;
942 return err;
944 case SHM_INFO:
946 struct shm_info shm_info;
948 memset(&shm_info,0,sizeof(shm_info));
949 shm_lockall();
950 shm_info.used_ids = shm_ids.in_use;
951 shm_info.shm_rss = shm_rss;
952 shm_info.shm_tot = shm_tot;
953 shm_info.shm_swp = shm_swp;
954 shm_info.swap_attempts = swap_attempts;
955 shm_info.swap_successes = swap_successes;
956 err = shm_ids.max_id;
957 shm_unlockall();
958 if(copy_to_user (buf, &shm_info, sizeof(shm_info)))
959 return -EFAULT;
961 return err < 0 ? 0 : err;
963 case SHM_STAT:
964 case IPC_STAT:
966 struct shmid64_ds tbuf;
967 int result;
968 if ((shmid % SEQ_MULTIPLIER) == zero_id)
969 return -EINVAL;
970 memset(&tbuf, 0, sizeof(tbuf));
971 shp = shm_lock(shmid);
972 if(shp==NULL)
973 return -EINVAL;
974 if(cmd==SHM_STAT) {
975 err = -EINVAL;
976 if (shmid > shm_ids.max_id)
977 goto out_unlock;
978 result = shm_buildid(shmid, shp->shm_perm.seq);
979 } else {
980 err = -EIDRM;
981 if(shm_checkid(shp,shmid))
982 goto out_unlock;
983 result = 0;
985 err=-EACCES;
986 if (ipcperms (&shp->shm_perm, S_IRUGO))
987 goto out_unlock;
988 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
989 tbuf.shm_segsz = shp->shm_segsz;
990 tbuf.shm_atime = shp->shm_atim;
991 tbuf.shm_dtime = shp->shm_dtim;
992 tbuf.shm_ctime = shp->shm_ctim;
993 tbuf.shm_cpid = shp->shm_cprid;
994 tbuf.shm_lpid = shp->shm_lprid;
995 tbuf.shm_nattch = shp->shm_nattch;
996 shm_unlock(shmid);
997 if(copy_shmid_to_user (buf, &tbuf, version))
998 return -EFAULT;
999 return result;
1001 case SHM_LOCK:
1002 case SHM_UNLOCK:
1004 /* Allow superuser to lock segment in memory */
1005 /* Should the pages be faulted in here or leave it to user? */
1006 /* need to determine interaction with current->swappable */
1007 struct kern_ipc_perm *ipcp;
1008 if ((shmid % SEQ_MULTIPLIER)== zero_id)
1009 return -EINVAL;
1010 if (!capable(CAP_IPC_LOCK))
1011 return -EPERM;
1013 shp = shm_lock(shmid);
1014 if(shp==NULL)
1015 return -EINVAL;
1016 err=-EIDRM;
1017 if(shm_checkid(shp,shmid))
1018 goto out_unlock;
1019 ipcp = &shp->shm_perm;
1020 if(cmd==SHM_LOCK) {
1021 if (!(ipcp->mode & SHM_LOCKED)) {
1022 ipcp->mode |= SHM_LOCKED;
1023 err = 0;
1025 } else {
1026 if (ipcp->mode & SHM_LOCKED) {
1027 ipcp->mode &= ~SHM_LOCKED;
1028 err = 0;
1031 shm_unlock(shmid);
1032 return err;
1034 case IPC_RMID:
1037 * We cannot simply remove the file. The SVID states
1038 * that the block remains until the last person
1039 * detaches from it, then is deleted. A shmat() on
1040 * an RMID segment is legal in older Linux and if
1041 * we change it apps break...
1043 * Instead we set a destroyed flag, and then blow
1044 * the name away when the usage hits zero.
1046 if ((shmid % SEQ_MULTIPLIER)== zero_id)
1047 return -EINVAL;
1048 lock_kernel();
1049 shp = shm_lock(shmid);
1050 if(shp==NULL)
1052 unlock_kernel();
1053 return -EINVAL;
1055 err=-EIDRM;
1056 if(shm_checkid(shp,shmid)==0)
1058 if(shp->shm_nattch==0)
1059 shm_remove_name(shmid);
1060 else
1061 shp->destroyed=1;
1062 err=0;
1064 shm_unlock(shmid);
1065 unlock_kernel();
1066 return err;
1069 case IPC_SET:
1071 if ((shmid % SEQ_MULTIPLIER)== zero_id)
1072 return -EINVAL;
1074 if(copy_shmid_from_user (&setbuf, buf, version))
1075 return -EFAULT;
1076 down(&shm_ids.sem);
1077 shp = shm_lock(shmid);
1078 err=-EINVAL;
1079 if(shp==NULL)
1080 goto out_up;
1081 err=-EIDRM;
1082 if(shm_checkid(shp,shmid))
1083 goto out_unlock_up;
1084 err=-EPERM;
1085 if (current->euid != shp->shm_perm.uid &&
1086 current->euid != shp->shm_perm.cuid &&
1087 !capable(CAP_SYS_ADMIN)) {
1088 goto out_unlock_up;
1091 shp->shm_perm.uid = setbuf.uid;
1092 shp->shm_perm.gid = setbuf.gid;
1093 shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO)
1094 | (setbuf.mode & S_IRWXUGO);
1095 shp->shm_ctim = CURRENT_TIME;
1096 break;
1099 default:
1100 return -EINVAL;
1103 err = 0;
1104 out_unlock_up:
1105 shm_unlock(shmid);
1106 out_up:
1107 up(&shm_ids.sem);
1108 return err;
1109 out_unlock:
1110 shm_unlock(shmid);
1111 return err;
1114 static inline void shm_inc (int id) {
1115 struct shmid_kernel *shp;
1117 if(!(shp = shm_lock(id)))
1118 BUG();
1119 shp->shm_atim = CURRENT_TIME;
1120 shp->shm_lprid = current->pid;
1121 shp->shm_nattch++;
1122 shm_unlock(id);
1125 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
1127 if (!(vma->vm_flags & VM_SHARED))
1128 return -EINVAL; /* we cannot do private mappings */
1129 UPDATE_ATIME(file->f_dentry->d_inode);
1130 vma->vm_ops = &shm_vm_ops;
1131 shm_inc(file->f_dentry->d_inode->i_ino);
1132 return 0;
1136 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1138 asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
1140 unsigned long addr;
1141 struct file * file;
1142 int err;
1143 int flags;
1144 char *name;
1145 struct fs_struct *saved;
1147 if (!shm_sb || (shmid % SEQ_MULTIPLIER) == zero_id)
1148 return -EINVAL;
1150 if ((addr = (ulong)shmaddr))
1152 if(addr & (SHMLBA-1)) {
1153 if (shmflg & SHM_RND)
1154 addr &= ~(SHMLBA-1); /* round down */
1155 else
1156 return -EINVAL;
1158 flags = MAP_SHARED | MAP_FIXED;
1159 } else
1160 flags = MAP_SHARED;
1162 name = shm_getname(shmid);
1163 if (IS_ERR (name))
1164 return PTR_ERR (name);
1166 lock_kernel();
1167 saved=shm_push_root();
1168 file = filp_open (name, O_RDWR, 0);
1169 shm_pop_root(saved);
1171 putname (name);
1172 if (IS_ERR (file))
1173 goto bad_file;
1175 *raddr = do_mmap (file, addr, file->f_dentry->d_inode->i_size,
1176 (shmflg & SHM_RDONLY ? PROT_READ :
1177 PROT_READ | PROT_WRITE), flags, 0);
1178 unlock_kernel();
1179 if (IS_ERR(*raddr))
1180 err = PTR_ERR(*raddr);
1181 else
1182 err = 0;
1183 fput (file);
1184 return err;
1186 bad_file:
1187 unlock_kernel();
1188 if ((err = PTR_ERR(file)) == -ENOENT)
1189 return -EINVAL;
1190 return err;
1193 /* This is called by fork, once for every shm attach. */
1194 static void shm_open (struct vm_area_struct *shmd)
1196 shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
1200 * Remove a name. Must be called with lock_kernel
1203 static void shm_remove_name(int id)
1205 char *name = shm_getname(id);
1206 if (!IS_ERR(name))
1208 struct fs_struct *saved;
1209 saved=shm_push_root();
1210 do_unlink (name);
1211 shm_pop_root(saved);
1212 putname (name);
1217 * remove the attach descriptor shmd.
1218 * free memory for segment if it is marked destroyed.
1219 * The descriptor has already been removed from the current->mm->mmap list
1220 * and will later be kfree()d.
1222 static void shm_close (struct vm_area_struct *shmd)
1224 int id = shmd->vm_file->f_dentry->d_inode->i_ino;
1225 struct shmid_kernel *shp;
1227 /* remove from the list of attaches of the shm segment */
1228 if(!(shp = shm_lock(id)))
1229 BUG();
1230 shp->shm_lprid = current->pid;
1231 shp->shm_dtim = CURRENT_TIME;
1232 shp->shm_nattch--;
1233 if(shp->shm_nattch==0 && shp->destroyed)
1235 shp->destroyed=0;
1236 shm_remove_name(id);
1237 shm_unlock(id);
1239 else
1240 shm_unlock(id);
1244 * detach and kill segment if marked destroyed.
1245 * The work is done in shm_close.
1247 asmlinkage long sys_shmdt (char *shmaddr)
1249 struct vm_area_struct *shmd, *shmdnext;
1251 down(&current->mm->mmap_sem);
1252 for (shmd = current->mm->mmap; shmd; shmd = shmdnext) {
1253 shmdnext = shmd->vm_next;
1254 if (shmd->vm_ops == &shm_vm_ops
1255 && shmd->vm_start - (shmd->vm_pgoff << PAGE_SHIFT) == (ulong) shmaddr)
1256 do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
1258 up(&current->mm->mmap_sem);
1259 return 0;
1263 * Enter the shm page into the SHM data structures.
1265 * The way "nopage" is done, we don't actually have to
1266 * do anything here: nopage will have filled in the shm
1267 * data structures already, and shm_swap_out() will just
1268 * work off them..
1270 static int shm_swapout(struct page * page, struct file *file)
1272 return 0;
1276 * page not present ... go through shm_dir
1278 static struct page * shm_nopage_core(struct shmid_kernel *shp, unsigned int idx, int *swp, int *rss, unsigned long address)
1280 pte_t pte;
1281 struct page * page;
1283 if (idx >= shp->shm_npages)
1284 return NOPAGE_SIGBUS;
1286 pte = SHM_ENTRY(shp,idx);
1287 if (!pte_present(pte)) {
1288 /* page not present so shm_swap can't race with us
1289 and the semaphore protects us by other tasks that
1290 could potentially fault on our pte under us */
1291 if (pte_none(pte)) {
1292 shm_unlock(shp->id);
1293 page = alloc_page(GFP_HIGHUSER);
1294 if (!page)
1295 goto oom;
1296 clear_user_highpage(page, address);
1297 if ((shp != shm_lock(shp->id)) && (shp->id != zero_id))
1298 BUG();
1299 } else {
1300 swp_entry_t entry = pte_to_swp_entry(pte);
1302 shm_unlock(shp->id);
1303 page = lookup_swap_cache(entry);
1304 if (!page) {
1305 lock_kernel();
1306 swapin_readahead(entry);
1307 page = read_swap_cache(entry);
1308 unlock_kernel();
1309 if (!page)
1310 goto oom;
1312 delete_from_swap_cache(page);
1313 page = replace_with_highmem(page);
1314 swap_free(entry);
1315 if ((shp != shm_lock(shp->id)) && (shp->id != zero_id))
1316 BUG();
1317 (*swp)--;
1319 (*rss)++;
1320 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
1321 SHM_ENTRY(shp, idx) = pte;
1322 } else
1323 --current->maj_flt; /* was incremented in do_no_page */
1325 /* pte_val(pte) == SHM_ENTRY (shp, idx) */
1326 get_page(pte_page(pte));
1327 current->min_flt++;
1328 return pte_page(pte);
1330 oom:
1331 shm_lock(shp->id);
1332 return NOPAGE_OOM;
1335 static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
1337 struct page * page;
1338 struct shmid_kernel *shp;
1339 unsigned int idx;
1340 struct inode * inode = shmd->vm_file->f_dentry->d_inode;
1342 idx = (address - shmd->vm_start) >> PAGE_SHIFT;
1343 idx += shmd->vm_pgoff;
1345 down(&inode->i_sem);
1346 if(!(shp = shm_lock(inode->i_ino)))
1347 BUG();
1348 page = shm_nopage_core(shp, idx, &shm_swp, &shm_rss, address);
1349 shm_unlock(inode->i_ino);
1350 up(&inode->i_sem);
1351 return(page);
1354 #define OKAY 0
1355 #define RETRY 1
1356 #define FAILED 2
1358 static int shm_swap_core(struct shmid_kernel *shp, unsigned long idx, swp_entry_t swap_entry, zone_t *zone, int *counter, struct page **outpage)
1360 pte_t page;
1361 struct page *page_map;
1363 page = SHM_ENTRY(shp, idx);
1364 if (!pte_present(page))
1365 return RETRY;
1366 page_map = pte_page(page);
1367 if (zone && (!memclass(page_map->zone, zone)))
1368 return RETRY;
1369 if (shp->id != zero_id) swap_attempts++;
1371 if (--counter < 0) /* failed */
1372 return FAILED;
1373 if (page_count(page_map) != 1)
1374 return RETRY;
1376 if (!(page_map = prepare_highmem_swapout(page_map)))
1377 return FAILED;
1378 SHM_ENTRY (shp, idx) = swp_entry_to_pte(swap_entry);
1380 /* add the locked page to the swap cache before allowing
1381 the swapin path to run lookup_swap_cache(). This avoids
1382 reading a not yet uptodate block from disk.
1383 NOTE: we just accounted the swap space reference for this
1384 swap cache page at __get_swap_page() time. */
1385 add_to_swap_cache(*outpage = page_map, swap_entry);
1386 return OKAY;
1389 static void shm_swap_postop(struct page *page)
1391 lock_kernel();
1392 rw_swap_page(WRITE, page, 0);
1393 unlock_kernel();
1394 __free_page(page);
1397 static int shm_swap_preop(swp_entry_t *swap_entry)
1399 lock_kernel();
1400 /* subtle: preload the swap count for the swap cache. We can't
1401 increase the count inside the critical section as we can't release
1402 the shm_lock there. And we can't acquire the big lock with the
1403 shm_lock held (otherwise we would deadlock too easily). */
1404 *swap_entry = __get_swap_page(2);
1405 if (!(*swap_entry).val) {
1406 unlock_kernel();
1407 return 1;
1409 unlock_kernel();
1410 return 0;
1414 * Goes through counter = (shm_rss >> prio) present shm pages.
1416 static unsigned long swap_id = 0; /* currently being swapped */
1417 static unsigned long swap_idx = 0; /* next to swap */
1419 int shm_swap (int prio, int gfp_mask, zone_t *zone)
1421 struct shmid_kernel *shp;
1422 swp_entry_t swap_entry;
1423 unsigned long id, idx;
1424 int loop = 0;
1425 int counter;
1426 struct page * page_map;
1428 zshm_swap(prio, gfp_mask, zone);
1429 counter = shm_rss >> prio;
1430 if (!counter)
1431 return 0;
1432 if (shm_swap_preop(&swap_entry))
1433 return 0;
1435 shm_lockall();
1436 check_id:
1437 shp = shm_get(swap_id);
1438 if(shp==NULL || shp->shm_perm.mode & SHM_LOCKED) {
1439 next_id:
1440 swap_idx = 0;
1441 if (++swap_id > shm_ids.max_id) {
1442 swap_id = 0;
1443 if (loop) {
1444 failed:
1445 shm_unlockall();
1446 __swap_free(swap_entry, 2);
1447 return 0;
1449 loop = 1;
1451 goto check_id;
1453 id = swap_id;
1455 check_table:
1456 idx = swap_idx++;
1457 if (idx >= shp->shm_npages)
1458 goto next_id;
1460 switch (shm_swap_core(shp, idx, swap_entry, zone, &counter, &page_map)) {
1461 case RETRY: goto check_table;
1462 case FAILED: goto failed;
1464 swap_successes++;
1465 shm_swp++;
1466 shm_rss--;
1467 shm_unlockall();
1469 shm_swap_postop(page_map);
1470 return 1;
1474 * Free the swap entry and set the new pte for the shm page.
1476 static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
1477 swp_entry_t entry, struct page *page)
1479 pte_t pte;
1481 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
1482 SHM_ENTRY(shp, idx) = pte;
1483 get_page(page);
1484 shm_rss++;
1486 shm_swp--;
1488 swap_free(entry);
1491 static int shm_unuse_core(struct shmid_kernel *shp, swp_entry_t entry, struct page *page)
1493 int n;
1495 for (n = 0; n < shp->shm_npages; n++) {
1496 if (pte_none(SHM_ENTRY(shp,n)))
1497 continue;
1498 if (pte_present(SHM_ENTRY(shp,n)))
1499 continue;
1500 if (pte_to_swp_entry(SHM_ENTRY(shp,n)).val == entry.val) {
1501 shm_unuse_page(shp, n, entry, page);
1502 return 1;
1505 return 0;
1509 * unuse_shm() search for an eventually swapped out shm page.
1511 void shm_unuse(swp_entry_t entry, struct page *page)
1513 int i;
1515 shm_lockall();
1516 for (i = 0; i <= shm_ids.max_id; i++) {
1517 struct shmid_kernel *shp = shm_get(i);
1518 if(shp==NULL)
1519 continue;
1520 if (shm_unuse_core(shp, entry, page))
1521 goto out;
1523 out:
1524 shm_unlockall();
1525 zmap_unuse(entry, page);
1528 #ifdef CONFIG_PROC_FS
1529 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
1531 off_t pos = 0;
1532 off_t begin = 0;
1533 int i, len = 0;
1535 down(&shm_ids.sem);
1536 len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime name\n");
1538 for(i = 0; i <= shm_ids.max_id; i++) {
1539 struct shmid_kernel* shp;
1541 if (i == zero_id)
1542 continue;
1543 shp = shm_lock(i);
1544 if(shp!=NULL) {
1545 #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s\n"
1546 #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s\n"
1547 char *format;
1549 if (sizeof(size_t) <= sizeof(int))
1550 format = SMALL_STRING;
1551 else
1552 format = BIG_STRING;
1553 len += sprintf(buffer + len, format,
1554 shp->shm_perm.key,
1555 shm_buildid(i, shp->shm_perm.seq),
1556 shp->shm_perm.mode,
1557 shp->shm_segsz,
1558 shp->shm_cprid,
1559 shp->shm_lprid,
1560 shp->shm_nattch,
1561 shp->shm_perm.uid,
1562 shp->shm_perm.gid,
1563 shp->shm_perm.cuid,
1564 shp->shm_perm.cgid,
1565 shp->shm_atim,
1566 shp->shm_dtim,
1567 shp->shm_ctim,
1568 shp->shm_namelen,
1569 shp->shm_name);
1570 shm_unlock(i);
1572 pos += len;
1573 if(pos < offset) {
1574 len = 0;
1575 begin = pos;
1577 if(pos > offset + length)
1578 goto done;
1581 *eof = 1;
1582 done:
1583 up(&shm_ids.sem);
1584 *start = buffer + (offset - begin);
1585 len -= (offset - begin);
1586 if(len > length)
1587 len = length;
1588 if(len < 0)
1589 len = 0;
1590 return len;
1592 #endif
1594 #define VMA_TO_SHP(vma) ((vma)->vm_file->private_data)
1596 static spinlock_t zmap_list_lock = SPIN_LOCK_UNLOCKED;
1597 static unsigned long zswap_idx = 0; /* next to swap */
1598 static struct shmid_kernel *zswap_shp = &zshmid_kernel;
1599 static int zshm_rss;
1601 static struct vm_operations_struct shmzero_vm_ops = {
1602 open: shmzero_open,
1603 close: shmzero_close,
1604 nopage: shmzero_nopage,
1605 swapout: shm_swapout,
1609 * In this implementation, the "unuse" and "swapout" interfaces are
1610 * interlocked out via the kernel_lock, as well as shm_lock(zero_id).
1611 * "unuse" and "nopage/swapin", as well as "swapout" and "nopage/swapin"
1612 * interlock via shm_lock(zero_id). All these interlocks can be based
1613 * on a per mapping lock instead of being a global lock.
1616 * Reference (existance) counting on the file/dentry/inode is done
1617 * by generic vm_file code. The zero code does not hold any reference
1618 * on the pseudo-file. This is possible because the open/close calls
1619 * are bracketed by the file count update calls.
1621 static struct file *file_setup(struct file *fzero, struct shmid_kernel *shp)
1623 struct file *filp;
1624 struct inode *inp;
1626 if ((filp = get_empty_filp()) == 0)
1627 return(filp);
1628 if ((inp = get_empty_inode()) == 0) {
1629 put_filp(filp);
1630 return(0);
1632 if ((filp->f_dentry = d_alloc(zdent, &(const struct qstr) { "dev/zero",
1633 8, 0 })) == 0) {
1634 iput(inp);
1635 put_filp(filp);
1636 return(0);
1638 d_instantiate(filp->f_dentry, inp);
1641 * Copy over /dev/zero dev/ino for benefit of procfs. Use
1642 * ino to indicate seperate mappings.
1644 filp->f_dentry->d_inode->i_dev = fzero->f_dentry->d_inode->i_dev;
1645 filp->f_dentry->d_inode->i_ino = (unsigned long)shp;
1646 fput(fzero); /* release /dev/zero file */
1647 return(filp);
1650 int map_zero_setup(struct vm_area_struct *vma)
1652 extern int vm_enough_memory(long pages);
1653 struct shmid_kernel *shp;
1654 struct file *filp;
1656 if (!vm_enough_memory((vma->vm_end - vma->vm_start) >> PAGE_SHIFT))
1657 return -ENOMEM;
1658 if (!(shp = newseg_alloc((vma->vm_end - vma->vm_start) / PAGE_SIZE, 0)))
1659 return -ENOMEM;
1660 if ((filp = file_setup(vma->vm_file, shp)) == 0) {
1661 killseg_core(shp, 0);
1662 return -ENOMEM;
1664 vma->vm_file = filp;
1665 VMA_TO_SHP(vma) = (void *)shp;
1666 shp->id = zero_id;
1667 init_MUTEX(&shp->zsem);
1668 vma->vm_ops = &shmzero_vm_ops;
1669 shmzero_open(vma);
1670 spin_lock(&zmap_list_lock);
1671 list_add(&shp->zero_list, &zshmid_kernel.zero_list);
1672 spin_unlock(&zmap_list_lock);
1673 return 0;
1676 static void shmzero_open(struct vm_area_struct *shmd)
1678 struct shmid_kernel *shp;
1680 shp = VMA_TO_SHP(shmd);
1681 down(&shp->zsem);
1682 shp->shm_nattch++;
1683 up(&shp->zsem);
1686 static void shmzero_close(struct vm_area_struct *shmd)
1688 int done = 0;
1689 struct shmid_kernel *shp;
1691 shp = VMA_TO_SHP(shmd);
1692 down(&shp->zsem);
1693 if (--shp->shm_nattch == 0)
1694 done = 1;
1695 up(&shp->zsem);
1696 if (done) {
1697 spin_lock(&zmap_list_lock);
1698 if (shp == zswap_shp)
1699 zswap_shp = list_entry(zswap_shp->zero_list.next,
1700 struct shmid_kernel, zero_list);
1701 list_del(&shp->zero_list);
1702 spin_unlock(&zmap_list_lock);
1703 killseg_core(shp, 0);
1707 static struct page * shmzero_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
1709 struct page *page;
1710 struct shmid_kernel *shp;
1711 unsigned int idx;
1712 int dummy;
1714 idx = (address - shmd->vm_start) >> PAGE_SHIFT;
1715 idx += shmd->vm_pgoff;
1717 shp = VMA_TO_SHP(shmd);
1718 down(&shp->zsem);
1719 shm_lock(zero_id);
1720 page = shm_nopage_core(shp, idx, &dummy, &zshm_rss, address);
1721 shm_unlock(zero_id);
1722 up(&shp->zsem);
1723 return(page);
1726 static void zmap_unuse(swp_entry_t entry, struct page *page)
1728 struct shmid_kernel *shp;
1730 spin_lock(&zmap_list_lock);
1731 shm_lock(zero_id);
1732 for (shp = list_entry(zshmid_kernel.zero_list.next, struct shmid_kernel,
1733 zero_list); shp != &zshmid_kernel;
1734 shp = list_entry(shp->zero_list.next, struct shmid_kernel,
1735 zero_list)) {
1736 if (shm_unuse_core(shp, entry, page))
1737 break;
1739 shm_unlock(zero_id);
1740 spin_unlock(&zmap_list_lock);
1743 static void zshm_swap (int prio, int gfp_mask, zone_t *zone)
1745 struct shmid_kernel *shp;
1746 swp_entry_t swap_entry;
1747 unsigned long idx;
1748 int loop = 0;
1749 int counter;
1750 struct page * page_map;
1752 counter = zshm_rss >> prio;
1753 if (!counter)
1754 return;
1755 next:
1756 if (shm_swap_preop(&swap_entry))
1757 return;
1759 spin_lock(&zmap_list_lock);
1760 shm_lock(zero_id);
1761 if (zshmid_kernel.zero_list.next == 0)
1762 goto failed;
1763 next_id:
1764 if (zswap_shp == &zshmid_kernel) {
1765 if (loop) {
1766 failed:
1767 shm_unlock(zero_id);
1768 spin_unlock(&zmap_list_lock);
1769 __swap_free(swap_entry, 2);
1770 return;
1772 zswap_shp = list_entry(zshmid_kernel.zero_list.next,
1773 struct shmid_kernel, zero_list);
1774 zswap_idx = 0;
1775 loop = 1;
1777 shp = zswap_shp;
1779 check_table:
1780 idx = zswap_idx++;
1781 if (idx >= shp->shm_npages) {
1782 zswap_shp = list_entry(zswap_shp->zero_list.next,
1783 struct shmid_kernel, zero_list);
1784 zswap_idx = 0;
1785 goto next_id;
1788 switch (shm_swap_core(shp, idx, swap_entry, zone, &counter, &page_map)) {
1789 case RETRY: goto check_table;
1790 case FAILED: goto failed;
1792 shm_unlock(zero_id);
1793 spin_unlock(&zmap_list_lock);
1795 shm_swap_postop(page_map);
1796 if (counter)
1797 goto next;
1798 return;