3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * avoid vmalloc and make shmmax, shmall, shmmni sysctl'able,
13 * Christoph Rohland <hans-christoph.rohland@sap.com>
14 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
15 * make it a file system, Christoph Rohland <hans-christoph.rohland@sap.com>
17 * The filesystem has the following restrictions/bugs:
18 * 1) It only can handle one directory.
19 * 2) Because the directory is represented by the SYSV shm array it
20 * can only be mounted one time.
21 * 3) This again leads to SYSV shm not working properly in a chrooted
23 * 4) Read and write are not implemented (should they?)
24 * 5) No special nodes are supported
27 #include <linux/config.h>
28 #include <linux/malloc.h>
29 #include <linux/shm.h>
30 #include <linux/swap.h>
31 #include <linux/smp_lock.h>
32 #include <linux/init.h>
33 #include <linux/locks.h>
34 #include <linux/file.h>
35 #include <linux/mman.h>
36 #include <linux/vmalloc.h>
37 #include <linux/pagemap.h>
38 #include <linux/proc_fs.h>
39 #include <linux/highmem.h>
41 #include <asm/uaccess.h>
42 #include <asm/pgtable.h>
46 static struct super_block
*shm_read_super(struct super_block
*,void *, int);
47 static void shm_put_super (struct super_block
*);
48 static int shm_remount_fs (struct super_block
*, int *, char *);
49 static void shm_read_inode (struct inode
*);
50 static void shm_write_inode(struct inode
*);
51 static int shm_statfs (struct super_block
*, struct statfs
*);
52 static int shm_create (struct inode
*,struct dentry
*,int);
53 static struct dentry
*shm_lookup (struct inode
*,struct dentry
*);
54 static int shm_unlink (struct inode
*,struct dentry
*);
55 static int shm_setattr (struct dentry
*dent
, struct iattr
*attr
);
56 static void shm_delete (struct inode
*);
57 static int shm_mmap (struct file
*, struct vm_area_struct
*);
58 static int shm_readdir (struct file
*, void *, filldir_t
);
60 char shm_path
[256] = "/var/shm";
62 #define SHM_NAME_LEN NAME_MAX
63 #define SHM_FMT ".IPC_%08x"
64 #define SHM_FMT_LEN 13
66 struct shmid_kernel
/* private to the kernel */
68 struct kern_ipc_perm shm_perm
;
70 unsigned long shm_nattch
;
71 unsigned long shm_npages
; /* size of segment (pages) */
72 pte_t
**shm_dir
; /* ptr to arr of ptrs to frames */
74 int destroyed
; /* set if the final detach kills */
86 struct semaphore sema
;
87 struct list_head list
;
92 #define shm_atim permap.shmem.atime
93 #define shm_dtim permap.shmem.dtime
94 #define shm_ctim permap.shmem.ctime
95 #define shm_cprid permap.shmem.cpid
96 #define shm_lprid permap.shmem.lpid
97 #define shm_namelen permap.shmem.nlen
98 #define shm_name permap.shmem.nm
99 #define zsem permap.zero.sema
100 #define zero_list permap.zero.list
102 static struct ipc_ids shm_ids
;
104 #define shm_lock(id) ((struct shmid_kernel*)ipc_lock(&shm_ids,id))
105 #define shm_unlock(id) ipc_unlock(&shm_ids,id)
106 #define shm_lockall() ipc_lockall(&shm_ids)
107 #define shm_unlockall() ipc_unlockall(&shm_ids)
108 #define shm_get(id) ((struct shmid_kernel*)ipc_get(&shm_ids,id))
109 #define shm_rmid(id) ((struct shmid_kernel*)ipc_rmid(&shm_ids,id))
110 #define shm_checkid(s, id) \
111 ipc_checkid(&shm_ids,&s->shm_perm,id)
112 #define shm_buildid(id, seq) \
113 ipc_buildid(&shm_ids, id, seq)
115 static int newseg (key_t key
, const char *name
, int namelen
, int shmflg
, size_t size
);
116 static void killseg_core(struct shmid_kernel
*shp
, int doacc
);
117 static void shm_open (struct vm_area_struct
*shmd
);
118 static void shm_close (struct vm_area_struct
*shmd
);
119 static void shm_remove_name(int id
);
120 static struct page
* shm_nopage(struct vm_area_struct
*, unsigned long, int);
121 static int shm_swapout(struct page
*, struct file
*);
122 #ifdef CONFIG_PROC_FS
123 static int sysvipc_shm_read_proc(char *buffer
, char **start
, off_t offset
, int length
, int *eof
, void *data
);
126 static void zshm_swap (int prio
, int gfp_mask
, zone_t
*zone
);
127 static void zmap_unuse(swp_entry_t entry
, struct page
*page
);
128 static void shmzero_open(struct vm_area_struct
*shmd
);
129 static void shmzero_close(struct vm_area_struct
*shmd
);
130 static struct page
*shmzero_nopage(struct vm_area_struct
* shmd
, unsigned long address
, int no_share
);
132 static struct shmid_kernel zshmid_kernel
;
133 static struct dentry
*zdent
;
135 #define SHM_FS_MAGIC 0x02011994
137 static struct super_block
* shm_sb
;
139 static DECLARE_FSTYPE(shm_fs_type
, "shm", shm_read_super
, 0);
141 static struct super_operations shm_sops
= {
142 read_inode
: shm_read_inode
,
143 write_inode
: shm_write_inode
,
144 delete_inode
: shm_delete
,
145 put_super
: shm_put_super
,
147 remount_fs
: shm_remount_fs
,
150 static struct file_operations shm_root_operations
= {
151 readdir
: shm_readdir
,
154 static struct inode_operations shm_root_inode_operations
= {
160 static struct file_operations shm_file_operations
= {
164 static struct inode_operations shm_inode_operations
= {
165 setattr
: shm_setattr
,
168 static struct vm_operations_struct shm_vm_ops
= {
169 open
: shm_open
, /* callback for a new vm-area open */
170 close
: shm_close
, /* callback for when the vm-area is released */
175 size_t shm_ctlmax
= SHMMAX
;
177 /* These parameters should be part of the superblock */
178 static int shm_ctlall
;
179 static int shm_ctlmni
;
182 static int shm_tot
= 0; /* total number of shared memory pages */
183 static int shm_rss
= 0; /* number of shared memory pages that are in memory */
184 static int shm_swp
= 0; /* number of shared memory pages that are in swap */
188 shm_lock()/shm_lockall()
195 - swap_free() never sleeps
196 - add_to_swap_cache() never sleeps
197 - add_to_swap_cache() doesn't acquire the big kernel lock.
198 - shm_unuse() is called with the kernel lock acquired.
201 /* some statistics */
202 static ulong swap_attempts
= 0;
203 static ulong swap_successes
= 0;
204 static ulong used_segs
= 0;
206 void __init
shm_init (void)
208 ipc_init_ids(&shm_ids
, 1);
210 register_filesystem (&shm_fs_type
);
211 #ifdef CONFIG_PROC_FS
212 create_proc_read_entry("sysvipc/shm", 0, 0, sysvipc_shm_read_proc
, NULL
);
214 zero_id
= ipc_addid(&shm_ids
, &zshmid_kernel
.shm_perm
, 1);
216 INIT_LIST_HEAD(&zshmid_kernel
.zero_list
);
217 zdent
= d_alloc_root(get_empty_inode());
221 static int shm_parse_options(char *options
)
223 int blocks
= shm_ctlall
;
224 int inodes
= shm_ctlmni
;
225 umode_t mode
= shm_mode
;
226 char *this_char
, *value
;
230 this_char
= strtok(options
,",");
231 for ( ; this_char
; this_char
= strtok(NULL
,",")) {
232 if ((value
= strchr(this_char
,'=')) != NULL
)
234 if (!strcmp(this_char
,"nr_blocks")) {
235 if (!value
|| !*value
)
237 blocks
= simple_strtoul(value
,&value
,0);
241 else if (!strcmp(this_char
,"nr_inodes")) {
242 if (!value
|| !*value
)
244 inodes
= simple_strtoul(value
,&value
,0);
248 else if (!strcmp(this_char
,"mode")) {
249 if (!value
|| !*value
)
251 mode
= simple_strtoul(value
,&value
,8);
265 static struct super_block
*shm_read_super(struct super_block
*s
,void *data
,
268 struct inode
* root_inode
;
271 printk ("shm fs already mounted\n");
277 shm_mode
= S_IRWXUGO
| S_ISVTX
;
278 if (shm_parse_options (data
)) {
279 printk ("shm fs invalid option\n");
283 s
->s_blocksize
= PAGE_SIZE
;
284 s
->s_blocksize_bits
= PAGE_SHIFT
;
285 s
->s_magic
= SHM_FS_MAGIC
;
287 root_inode
= iget (s
, SEQ_MULTIPLIER
);
290 root_inode
->i_op
= &shm_root_inode_operations
;
291 root_inode
->i_sb
= s
;
292 root_inode
->i_nlink
= 2;
293 root_inode
->i_mode
= S_IFDIR
| shm_mode
;
294 s
->s_root
= d_alloc_root(root_inode
);
297 s
->u
.generic_sbp
= (void*) shm_sb
;
302 printk("proc_read_super: get root inode failed\n");
308 static int shm_remount_fs (struct super_block
*sb
, int *flags
, char *data
)
310 if (shm_parse_options (data
))
315 static struct fs_struct
*shm_push_root(void)
317 struct fs_struct
*old
,*new;
318 new=init_task_union
.task
.fs
;
324 static void shm_pop_root(struct fs_struct
*saved
)
329 static void shm_put_super(struct super_block
*sb
)
331 struct super_block
**p
= &shm_sb
;
333 struct shmid_kernel
*shp
;
336 if (!*p
) /* should never happen */
338 p
= (struct super_block
**)&(*p
)->u
.generic_sbp
;
340 *p
= (struct super_block
*)(*p
)->u
.generic_sbp
;
342 for(i
= 0; i
<= shm_ids
.max_id
; i
++) {
345 if (!(shp
= shm_lock (i
)))
348 printk ("shm_nattch = %ld\n", shp
->shm_nattch
);
351 killseg_core(shp
, 1);
357 static int shm_statfs(struct super_block
*sb
, struct statfs
*buf
)
360 buf
->f_bsize
= PAGE_SIZE
;
361 buf
->f_blocks
= shm_ctlall
;
362 buf
->f_bavail
= buf
->f_bfree
= shm_ctlall
- shm_tot
;
363 buf
->f_files
= shm_ctlmni
;
364 buf
->f_ffree
= shm_ctlmni
- used_segs
;
365 buf
->f_namelen
= SHM_NAME_LEN
;
369 static void shm_write_inode(struct inode
* inode
)
373 static void shm_read_inode(struct inode
* inode
)
376 struct shmid_kernel
*shp
;
381 inode
->i_mtime
= inode
->i_atime
= inode
->i_ctime
= CURRENT_TIME
;
383 if (id
< SEQ_MULTIPLIER
) {
384 if (!(shp
= shm_lock (id
)))
386 inode
->i_mode
= shp
->shm_perm
.mode
| S_IFREG
;
387 inode
->i_uid
= shp
->shm_perm
.uid
;
388 inode
->i_gid
= shp
->shm_perm
.gid
;
389 inode
->i_size
= shp
->shm_segsz
;
391 inode
->i_op
= &shm_inode_operations
;
392 inode
->i_fop
= &shm_file_operations
;
395 inode
->i_op
= &shm_root_inode_operations
;
396 inode
->i_fop
= &shm_root_operations
;
397 inode
->i_sb
= shm_sb
;
399 inode
->i_mode
= S_IFDIR
| shm_mode
;
400 inode
->i_uid
= inode
->i_gid
= 0;
404 static int shm_create (struct inode
*dir
, struct dentry
*dent
, int mode
)
407 struct inode
* inode
;
410 err
= id
= newseg (IPC_PRIVATE
, dent
->d_name
.name
, dent
->d_name
.len
, mode
, 0);
415 inode
= iget (shm_sb
, id
% SEQ_MULTIPLIER
);
420 down (&inode
->i_sem
);
421 inode
->i_mode
= mode
| S_IFREG
;
422 inode
->i_op
= &shm_inode_operations
;
423 d_instantiate(dent
, inode
);
431 static int shm_readdir (struct file
*filp
, void *dirent
, filldir_t filldir
)
433 struct inode
* inode
= filp
->f_dentry
->d_inode
;
434 struct shmid_kernel
*shp
;
442 if (filldir(dirent
, ".", 1, nr
, inode
->i_ino
) < 0)
447 if (filldir(dirent
, "..", 2, nr
, inode
->i_ino
) < 0)
453 for (; nr
-2 <= shm_ids
.max_id
; nr
++ ) {
456 if (!(shp
= shm_get (nr
-2)))
458 if (shp
->shm_perm
.mode
& SHM_DEST
)
460 if (filldir(dirent
, shp
->shm_name
, shp
->shm_namelen
, nr
, nr
) < 0 )
472 static struct dentry
*shm_lookup (struct inode
*dir
, struct dentry
*dent
)
475 struct shmid_kernel
* shp
;
476 struct inode
*inode
= NULL
;
478 if (dent
->d_name
.len
> SHM_NAME_LEN
)
479 return ERR_PTR(-ENAMETOOLONG
);
482 for(i
= 0; i
<= shm_ids
.max_id
; i
++) {
485 if (!(shp
= shm_lock(i
)))
487 if (!(shp
->shm_perm
.mode
& SHM_DEST
) &&
488 dent
->d_name
.len
== shp
->shm_namelen
&&
489 strncmp(dent
->d_name
.name
, shp
->shm_name
, shp
->shm_namelen
) == 0)
495 * prevent the reserved names as negative dentries.
496 * This also prevents object creation through the filesystem
498 if (dent
->d_name
.len
== SHM_FMT_LEN
&&
499 memcmp (SHM_FMT
, dent
->d_name
.name
, SHM_FMT_LEN
- 8) == 0)
500 err
= -EINVAL
; /* EINVAL to give IPC_RMID the right error */
506 inode
= iget(dir
->i_sb
, i
);
517 static int shm_unlink (struct inode
*dir
, struct dentry
*dent
)
519 struct inode
* inode
= dent
->d_inode
;
520 struct shmid_kernel
*shp
;
523 if (!(shp
= shm_lock (inode
->i_ino
)))
525 shp
->shm_perm
.mode
|= SHM_DEST
;
526 shp
->shm_perm
.key
= IPC_PRIVATE
; /* Do not find it any more */
527 shm_unlock (inode
->i_ino
);
534 #define SHM_ENTRY(shp, index) (shp)->shm_dir[(index)/PTRS_PER_PTE][(index)%PTRS_PER_PTE]
536 static pte_t
**shm_alloc(unsigned long pages
)
538 unsigned short dir
= pages
/ PTRS_PER_PTE
;
539 unsigned short last
= pages
% PTRS_PER_PTE
;
540 pte_t
**ret
, **ptr
, *pte
;
545 ret
= kmalloc ((dir
+1) * sizeof(pte_t
*), GFP_KERNEL
);
549 for (ptr
= ret
; ptr
< ret
+dir
; ptr
++)
551 *ptr
= (pte_t
*)__get_free_page (GFP_KERNEL
);
554 for (pte
= *ptr
; pte
< *ptr
+ PTRS_PER_PTE
; pte
++)
558 /* The last one is probably not of PAGE_SIZE: we use kmalloc */
560 *ptr
= kmalloc (last
*sizeof(pte_t
), GFP_KERNEL
);
563 for (pte
= *ptr
; pte
< *ptr
+ last
; pte
++)
569 /* The last failed: we decrement first */
571 free_page ((unsigned long)*ptr
);
575 return ERR_PTR(-ENOMEM
);
578 static void shm_free(pte_t
** dir
, unsigned long pages
)
580 pte_t
**ptr
= dir
+pages
/PTRS_PER_PTE
;
585 /* first the last page */
586 if (pages
%PTRS_PER_PTE
)
588 /* now the whole pages */
591 free_page ((unsigned long)*ptr
);
593 /* Now the indirect block */
597 static int shm_setattr (struct dentry
*dentry
, struct iattr
*attr
)
600 struct inode
*inode
= dentry
->d_inode
;
601 struct shmid_kernel
*shp
;
602 unsigned long new_pages
, old_pages
;
603 pte_t
**new_dir
, **old_dir
;
605 if ((error
= inode_change_ok(inode
, attr
)))
607 if (!(attr
->ia_valid
& ATTR_SIZE
))
609 if (attr
->ia_size
> shm_ctlmax
)
612 /* We set old_pages and old_dir for easier cleanup */
613 old_pages
= new_pages
= (attr
->ia_size
+ PAGE_SIZE
- 1) >> PAGE_SHIFT
;
614 if (shm_tot
+ new_pages
>= shm_ctlall
)
616 if (IS_ERR(old_dir
= new_dir
= shm_alloc(new_pages
)))
617 return PTR_ERR(new_dir
);
619 if (!(shp
= shm_lock(inode
->i_ino
)))
621 if (shp
->shm_segsz
== attr
->ia_size
)
623 old_dir
= shp
->shm_dir
;
624 old_pages
= shp
->shm_npages
;
628 i
= old_pages
< new_pages
? old_pages
: new_pages
;
629 j
= i
% PTRS_PER_PTE
;
632 memcpy (new_dir
[i
], old_dir
[i
], j
* sizeof (pte_t
));
635 new_dir
[i
] = old_dir
[i
];
639 shp
->shm_dir
= new_dir
;
640 shp
->shm_npages
= new_pages
;
641 shp
->shm_segsz
= attr
->ia_size
;
643 shm_unlock(inode
->i_ino
);
645 shm_tot
+= new_pages
- old_pages
;
647 shm_free (old_dir
, old_pages
);
649 inode_setattr(inode
, attr
);
653 static inline struct shmid_kernel
*newseg_alloc(int numpages
, size_t namelen
)
655 struct shmid_kernel
*shp
;
657 shp
= (struct shmid_kernel
*) kmalloc (sizeof (*shp
) + namelen
, GFP_KERNEL
);
661 shp
->shm_dir
= shm_alloc (numpages
);
666 shp
->shm_npages
= numpages
;
668 shp
->shm_namelen
= namelen
;
672 static int newseg (key_t key
, const char *name
, int namelen
,
673 int shmflg
, size_t size
)
675 struct shmid_kernel
*shp
;
676 int numpages
= (size
+ PAGE_SIZE
-1) >> PAGE_SHIFT
;
679 if (namelen
> SHM_NAME_LEN
)
680 return -ENAMETOOLONG
;
682 if (size
> shm_ctlmax
)
684 if (shm_tot
+ numpages
>= shm_ctlall
)
687 if (!(shp
= newseg_alloc(numpages
, namelen
? namelen
: SHM_FMT_LEN
+ 1)))
689 id
= ipc_addid(&shm_ids
, &shp
->shm_perm
, shm_ctlmni
+1);
691 shm_free(shp
->shm_dir
,numpages
);
695 shp
->shm_perm
.key
= key
;
696 shp
->shm_perm
.mode
= (shmflg
& S_IRWXUGO
);
697 shp
->shm_segsz
= size
;
698 shp
->shm_cprid
= current
->pid
;
700 shp
->shm_atim
= shp
->shm_dtim
= 0;
701 shp
->shm_ctim
= CURRENT_TIME
;
702 shp
->id
= shm_buildid(id
,shp
->shm_perm
.seq
);
704 shp
->shm_namelen
= namelen
;
705 memcpy (shp
->shm_name
, name
, namelen
);
707 shp
->shm_namelen
= sprintf (shp
->shm_name
, SHM_FMT
, shp
->id
);
717 asmlinkage
long sys_shmget (key_t key
, size_t size
, int shmflg
)
719 struct shmid_kernel
*shp
;
725 printk(KERN_WARNING
"shmget: shm filesystem not mounted\n");
733 if (key
== IPC_PRIVATE
) {
734 err
= newseg(key
, NULL
, 0, shmflg
, size
);
735 } else if ((id
= ipc_findkey(&shm_ids
,key
)) == -1) {
736 if (!(shmflg
& IPC_CREAT
))
739 err
= newseg(key
, NULL
, 0, shmflg
, size
);
740 } else if ((shmflg
& IPC_CREAT
) && (shmflg
& IPC_EXCL
)) {
746 if (shp
->shm_segsz
< size
)
748 else if (ipcperms(&shp
->shm_perm
, shmflg
))
751 err
= shm_buildid(id
, shp
->shm_perm
.seq
);
758 static void killseg_core(struct shmid_kernel
*shp
, int doacc
)
760 int i
, numpages
, rss
, swp
;
762 numpages
= shp
->shm_npages
;
763 for (i
= 0, rss
= 0, swp
= 0; i
< numpages
; i
++) {
765 pte
= SHM_ENTRY (shp
,i
);
768 if (pte_present(pte
)) {
769 __free_page (pte_page(pte
));
772 swap_free(pte_to_swp_entry(pte
));
776 shm_free (shp
->shm_dir
, numpages
);
788 static void shm_delete (struct inode
*ino
)
790 int shmid
= ino
->i_ino
;
791 struct shmid_kernel
*shp
;
794 shp
= shm_lock(shmid
);
798 shp
= shm_rmid(shmid
);
801 killseg_core(shp
, 1);
805 static inline unsigned long copy_shmid_to_user(void *buf
, struct shmid64_ds
*in
, int version
)
809 return copy_to_user(buf
, in
, sizeof(*in
));
814 ipc64_perm_to_ipc_perm(&in
->shm_perm
, &out
.shm_perm
);
815 out
.shm_segsz
= in
->shm_segsz
;
816 out
.shm_atime
= in
->shm_atime
;
817 out
.shm_dtime
= in
->shm_dtime
;
818 out
.shm_ctime
= in
->shm_ctime
;
819 out
.shm_cpid
= in
->shm_cpid
;
820 out
.shm_lpid
= in
->shm_lpid
;
821 out
.shm_nattch
= in
->shm_nattch
;
823 return copy_to_user(buf
, &out
, sizeof(out
));
836 static inline unsigned long copy_shmid_from_user(struct shm_setbuf
*out
, void *buf
, int version
)
841 struct shmid64_ds tbuf
;
843 if (copy_from_user(&tbuf
, buf
, sizeof(tbuf
)))
846 out
->uid
= tbuf
.shm_perm
.uid
;
847 out
->gid
= tbuf
.shm_perm
.gid
;
848 out
->mode
= tbuf
.shm_perm
.mode
;
854 struct shmid_ds tbuf_old
;
856 if (copy_from_user(&tbuf_old
, buf
, sizeof(tbuf_old
)))
859 out
->uid
= tbuf_old
.shm_perm
.uid
;
860 out
->gid
= tbuf_old
.shm_perm
.gid
;
861 out
->mode
= tbuf_old
.shm_perm
.mode
;
870 static inline unsigned long copy_shminfo_to_user(void *buf
, struct shminfo64
*in
, int version
)
874 return copy_to_user(buf
, in
, sizeof(*in
));
879 if(in
->shmmax
> INT_MAX
)
880 out
.shmmax
= INT_MAX
;
882 out
.shmmax
= (int)in
->shmmax
;
884 out
.shmmin
= in
->shmmin
;
885 out
.shmmni
= in
->shmmni
;
886 out
.shmseg
= in
->shmseg
;
887 out
.shmall
= in
->shmall
;
889 return copy_to_user(buf
, &out
, sizeof(out
));
896 char * shm_getname(int id
)
900 if (!(result
= __getname ()))
901 return ERR_PTR(-ENOMEM
);
903 sprintf (result
, "%s/" SHM_FMT
, shm_path
, id
);
907 asmlinkage
long sys_shmctl (int shmid
, int cmd
, struct shmid_ds
*buf
)
909 struct shm_setbuf setbuf
;
910 struct shmid_kernel
*shp
;
916 printk (KERN_WARNING
"shmctl: shm filesystem not mounted\n");
920 if (cmd
< 0 || shmid
< 0)
923 version
= ipc_parse_version(&cmd
);
925 switch (cmd
) { /* replace with proc interface ? */
928 struct shminfo64 shminfo
;
930 memset(&shminfo
,0,sizeof(shminfo
));
931 shminfo
.shmmni
= shminfo
.shmseg
= shm_ctlmni
;
932 shminfo
.shmmax
= shm_ctlmax
;
933 shminfo
.shmall
= shm_ctlall
;
935 shminfo
.shmmin
= SHMMIN
;
936 if(copy_shminfo_to_user (buf
, &shminfo
, version
))
938 /* reading a integer is always atomic */
946 struct shm_info shm_info
;
948 memset(&shm_info
,0,sizeof(shm_info
));
950 shm_info
.used_ids
= shm_ids
.in_use
;
951 shm_info
.shm_rss
= shm_rss
;
952 shm_info
.shm_tot
= shm_tot
;
953 shm_info
.shm_swp
= shm_swp
;
954 shm_info
.swap_attempts
= swap_attempts
;
955 shm_info
.swap_successes
= swap_successes
;
956 err
= shm_ids
.max_id
;
958 if(copy_to_user (buf
, &shm_info
, sizeof(shm_info
)))
961 return err
< 0 ? 0 : err
;
966 struct shmid64_ds tbuf
;
968 if ((shmid
% SEQ_MULTIPLIER
) == zero_id
)
970 memset(&tbuf
, 0, sizeof(tbuf
));
971 shp
= shm_lock(shmid
);
976 if (shmid
> shm_ids
.max_id
)
978 result
= shm_buildid(shmid
, shp
->shm_perm
.seq
);
981 if(shm_checkid(shp
,shmid
))
986 if (ipcperms (&shp
->shm_perm
, S_IRUGO
))
988 kernel_to_ipc64_perm(&shp
->shm_perm
, &tbuf
.shm_perm
);
989 tbuf
.shm_segsz
= shp
->shm_segsz
;
990 tbuf
.shm_atime
= shp
->shm_atim
;
991 tbuf
.shm_dtime
= shp
->shm_dtim
;
992 tbuf
.shm_ctime
= shp
->shm_ctim
;
993 tbuf
.shm_cpid
= shp
->shm_cprid
;
994 tbuf
.shm_lpid
= shp
->shm_lprid
;
995 tbuf
.shm_nattch
= shp
->shm_nattch
;
997 if(copy_shmid_to_user (buf
, &tbuf
, version
))
1004 /* Allow superuser to lock segment in memory */
1005 /* Should the pages be faulted in here or leave it to user? */
1006 /* need to determine interaction with current->swappable */
1007 struct kern_ipc_perm
*ipcp
;
1008 if ((shmid
% SEQ_MULTIPLIER
)== zero_id
)
1010 if (!capable(CAP_IPC_LOCK
))
1013 shp
= shm_lock(shmid
);
1017 if(shm_checkid(shp
,shmid
))
1019 ipcp
= &shp
->shm_perm
;
1021 if (!(ipcp
->mode
& SHM_LOCKED
)) {
1022 ipcp
->mode
|= SHM_LOCKED
;
1026 if (ipcp
->mode
& SHM_LOCKED
) {
1027 ipcp
->mode
&= ~SHM_LOCKED
;
1037 * We cannot simply remove the file. The SVID states
1038 * that the block remains until the last person
1039 * detaches from it, then is deleted. A shmat() on
1040 * an RMID segment is legal in older Linux and if
1041 * we change it apps break...
1043 * Instead we set a destroyed flag, and then blow
1044 * the name away when the usage hits zero.
1046 if ((shmid
% SEQ_MULTIPLIER
)== zero_id
)
1049 shp
= shm_lock(shmid
);
1056 if(shm_checkid(shp
,shmid
)==0)
1058 if(shp
->shm_nattch
==0)
1059 shm_remove_name(shmid
);
1071 if ((shmid
% SEQ_MULTIPLIER
)== zero_id
)
1074 if(copy_shmid_from_user (&setbuf
, buf
, version
))
1077 shp
= shm_lock(shmid
);
1082 if(shm_checkid(shp
,shmid
))
1085 if (current
->euid
!= shp
->shm_perm
.uid
&&
1086 current
->euid
!= shp
->shm_perm
.cuid
&&
1087 !capable(CAP_SYS_ADMIN
)) {
1091 shp
->shm_perm
.uid
= setbuf
.uid
;
1092 shp
->shm_perm
.gid
= setbuf
.gid
;
1093 shp
->shm_perm
.mode
= (shp
->shm_perm
.mode
& ~S_IRWXUGO
)
1094 | (setbuf
.mode
& S_IRWXUGO
);
1095 shp
->shm_ctim
= CURRENT_TIME
;
1114 static inline void shm_inc (int id
) {
1115 struct shmid_kernel
*shp
;
1117 if(!(shp
= shm_lock(id
)))
1119 shp
->shm_atim
= CURRENT_TIME
;
1120 shp
->shm_lprid
= current
->pid
;
1125 static int shm_mmap(struct file
* file
, struct vm_area_struct
* vma
)
1127 if (!(vma
->vm_flags
& VM_SHARED
))
1128 return -EINVAL
; /* we cannot do private mappings */
1129 UPDATE_ATIME(file
->f_dentry
->d_inode
);
1130 vma
->vm_ops
= &shm_vm_ops
;
1131 shm_inc(file
->f_dentry
->d_inode
->i_ino
);
1136 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1138 asmlinkage
long sys_shmat (int shmid
, char *shmaddr
, int shmflg
, ulong
*raddr
)
1145 struct fs_struct
*saved
;
1147 if (!shm_sb
|| (shmid
% SEQ_MULTIPLIER
) == zero_id
)
1150 if ((addr
= (ulong
)shmaddr
))
1152 if(addr
& (SHMLBA
-1)) {
1153 if (shmflg
& SHM_RND
)
1154 addr
&= ~(SHMLBA
-1); /* round down */
1158 flags
= MAP_SHARED
| MAP_FIXED
;
1162 name
= shm_getname(shmid
);
1164 return PTR_ERR (name
);
1167 saved
=shm_push_root();
1168 file
= filp_open (name
, O_RDWR
, 0);
1169 shm_pop_root(saved
);
1175 *raddr
= do_mmap (file
, addr
, file
->f_dentry
->d_inode
->i_size
,
1176 (shmflg
& SHM_RDONLY
? PROT_READ
:
1177 PROT_READ
| PROT_WRITE
), flags
, 0);
1180 err
= PTR_ERR(*raddr
);
1188 if ((err
= PTR_ERR(file
)) == -ENOENT
)
1193 /* This is called by fork, once for every shm attach. */
1194 static void shm_open (struct vm_area_struct
*shmd
)
1196 shm_inc (shmd
->vm_file
->f_dentry
->d_inode
->i_ino
);
1200 * Remove a name. Must be called with lock_kernel
1203 static void shm_remove_name(int id
)
1205 char *name
= shm_getname(id
);
1208 struct fs_struct
*saved
;
1209 saved
=shm_push_root();
1211 shm_pop_root(saved
);
1217 * remove the attach descriptor shmd.
1218 * free memory for segment if it is marked destroyed.
1219 * The descriptor has already been removed from the current->mm->mmap list
1220 * and will later be kfree()d.
1222 static void shm_close (struct vm_area_struct
*shmd
)
1224 int id
= shmd
->vm_file
->f_dentry
->d_inode
->i_ino
;
1225 struct shmid_kernel
*shp
;
1227 /* remove from the list of attaches of the shm segment */
1228 if(!(shp
= shm_lock(id
)))
1230 shp
->shm_lprid
= current
->pid
;
1231 shp
->shm_dtim
= CURRENT_TIME
;
1233 if(shp
->shm_nattch
==0 && shp
->destroyed
)
1236 shm_remove_name(id
);
1244 * detach and kill segment if marked destroyed.
1245 * The work is done in shm_close.
1247 asmlinkage
long sys_shmdt (char *shmaddr
)
1249 struct vm_area_struct
*shmd
, *shmdnext
;
1251 down(¤t
->mm
->mmap_sem
);
1252 for (shmd
= current
->mm
->mmap
; shmd
; shmd
= shmdnext
) {
1253 shmdnext
= shmd
->vm_next
;
1254 if (shmd
->vm_ops
== &shm_vm_ops
1255 && shmd
->vm_start
- (shmd
->vm_pgoff
<< PAGE_SHIFT
) == (ulong
) shmaddr
)
1256 do_munmap(shmd
->vm_start
, shmd
->vm_end
- shmd
->vm_start
);
1258 up(¤t
->mm
->mmap_sem
);
1263 * Enter the shm page into the SHM data structures.
1265 * The way "nopage" is done, we don't actually have to
1266 * do anything here: nopage will have filled in the shm
1267 * data structures already, and shm_swap_out() will just
1270 static int shm_swapout(struct page
* page
, struct file
*file
)
1276 * page not present ... go through shm_dir
1278 static struct page
* shm_nopage_core(struct shmid_kernel
*shp
, unsigned int idx
, int *swp
, int *rss
, unsigned long address
)
1283 if (idx
>= shp
->shm_npages
)
1284 return NOPAGE_SIGBUS
;
1286 pte
= SHM_ENTRY(shp
,idx
);
1287 if (!pte_present(pte
)) {
1288 /* page not present so shm_swap can't race with us
1289 and the semaphore protects us by other tasks that
1290 could potentially fault on our pte under us */
1291 if (pte_none(pte
)) {
1292 shm_unlock(shp
->id
);
1293 page
= alloc_page(GFP_HIGHUSER
);
1296 clear_user_highpage(page
, address
);
1297 if ((shp
!= shm_lock(shp
->id
)) && (shp
->id
!= zero_id
))
1300 swp_entry_t entry
= pte_to_swp_entry(pte
);
1302 shm_unlock(shp
->id
);
1303 page
= lookup_swap_cache(entry
);
1306 swapin_readahead(entry
);
1307 page
= read_swap_cache(entry
);
1312 delete_from_swap_cache(page
);
1313 page
= replace_with_highmem(page
);
1315 if ((shp
!= shm_lock(shp
->id
)) && (shp
->id
!= zero_id
))
1320 pte
= pte_mkdirty(mk_pte(page
, PAGE_SHARED
));
1321 SHM_ENTRY(shp
, idx
) = pte
;
1323 --current
->maj_flt
; /* was incremented in do_no_page */
1325 /* pte_val(pte) == SHM_ENTRY (shp, idx) */
1326 get_page(pte_page(pte
));
1328 return pte_page(pte
);
1335 static struct page
* shm_nopage(struct vm_area_struct
* shmd
, unsigned long address
, int no_share
)
1338 struct shmid_kernel
*shp
;
1340 struct inode
* inode
= shmd
->vm_file
->f_dentry
->d_inode
;
1342 idx
= (address
- shmd
->vm_start
) >> PAGE_SHIFT
;
1343 idx
+= shmd
->vm_pgoff
;
1345 down(&inode
->i_sem
);
1346 if(!(shp
= shm_lock(inode
->i_ino
)))
1348 page
= shm_nopage_core(shp
, idx
, &shm_swp
, &shm_rss
, address
);
1349 shm_unlock(inode
->i_ino
);
1358 static int shm_swap_core(struct shmid_kernel
*shp
, unsigned long idx
, swp_entry_t swap_entry
, zone_t
*zone
, int *counter
, struct page
**outpage
)
1361 struct page
*page_map
;
1363 page
= SHM_ENTRY(shp
, idx
);
1364 if (!pte_present(page
))
1366 page_map
= pte_page(page
);
1367 if (zone
&& (!memclass(page_map
->zone
, zone
)))
1369 if (shp
->id
!= zero_id
) swap_attempts
++;
1371 if (--counter
< 0) /* failed */
1373 if (page_count(page_map
) != 1)
1376 if (!(page_map
= prepare_highmem_swapout(page_map
)))
1378 SHM_ENTRY (shp
, idx
) = swp_entry_to_pte(swap_entry
);
1380 /* add the locked page to the swap cache before allowing
1381 the swapin path to run lookup_swap_cache(). This avoids
1382 reading a not yet uptodate block from disk.
1383 NOTE: we just accounted the swap space reference for this
1384 swap cache page at __get_swap_page() time. */
1385 add_to_swap_cache(*outpage
= page_map
, swap_entry
);
1389 static void shm_swap_postop(struct page
*page
)
1392 rw_swap_page(WRITE
, page
, 0);
1397 static int shm_swap_preop(swp_entry_t
*swap_entry
)
1400 /* subtle: preload the swap count for the swap cache. We can't
1401 increase the count inside the critical section as we can't release
1402 the shm_lock there. And we can't acquire the big lock with the
1403 shm_lock held (otherwise we would deadlock too easily). */
1404 *swap_entry
= __get_swap_page(2);
1405 if (!(*swap_entry
).val
) {
1414 * Goes through counter = (shm_rss >> prio) present shm pages.
1416 static unsigned long swap_id
= 0; /* currently being swapped */
1417 static unsigned long swap_idx
= 0; /* next to swap */
1419 int shm_swap (int prio
, int gfp_mask
, zone_t
*zone
)
1421 struct shmid_kernel
*shp
;
1422 swp_entry_t swap_entry
;
1423 unsigned long id
, idx
;
1426 struct page
* page_map
;
1428 zshm_swap(prio
, gfp_mask
, zone
);
1429 counter
= shm_rss
>> prio
;
1432 if (shm_swap_preop(&swap_entry
))
1437 shp
= shm_get(swap_id
);
1438 if(shp
==NULL
|| shp
->shm_perm
.mode
& SHM_LOCKED
) {
1441 if (++swap_id
> shm_ids
.max_id
) {
1446 __swap_free(swap_entry
, 2);
1457 if (idx
>= shp
->shm_npages
)
1460 switch (shm_swap_core(shp
, idx
, swap_entry
, zone
, &counter
, &page_map
)) {
1461 case RETRY
: goto check_table
;
1462 case FAILED
: goto failed
;
1469 shm_swap_postop(page_map
);
1474 * Free the swap entry and set the new pte for the shm page.
1476 static void shm_unuse_page(struct shmid_kernel
*shp
, unsigned long idx
,
1477 swp_entry_t entry
, struct page
*page
)
1481 pte
= pte_mkdirty(mk_pte(page
, PAGE_SHARED
));
1482 SHM_ENTRY(shp
, idx
) = pte
;
1491 static int shm_unuse_core(struct shmid_kernel
*shp
, swp_entry_t entry
, struct page
*page
)
1495 for (n
= 0; n
< shp
->shm_npages
; n
++) {
1496 if (pte_none(SHM_ENTRY(shp
,n
)))
1498 if (pte_present(SHM_ENTRY(shp
,n
)))
1500 if (pte_to_swp_entry(SHM_ENTRY(shp
,n
)).val
== entry
.val
) {
1501 shm_unuse_page(shp
, n
, entry
, page
);
1509 * unuse_shm() search for an eventually swapped out shm page.
1511 void shm_unuse(swp_entry_t entry
, struct page
*page
)
1516 for (i
= 0; i
<= shm_ids
.max_id
; i
++) {
1517 struct shmid_kernel
*shp
= shm_get(i
);
1520 if (shm_unuse_core(shp
, entry
, page
))
1525 zmap_unuse(entry
, page
);
1528 #ifdef CONFIG_PROC_FS
1529 static int sysvipc_shm_read_proc(char *buffer
, char **start
, off_t offset
, int length
, int *eof
, void *data
)
1536 len
+= sprintf(buffer
, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime name\n");
1538 for(i
= 0; i
<= shm_ids
.max_id
; i
++) {
1539 struct shmid_kernel
* shp
;
1545 #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s\n"
1546 #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu %.*s\n"
1549 if (sizeof(size_t) <= sizeof(int))
1550 format
= SMALL_STRING
;
1552 format
= BIG_STRING
;
1553 len
+= sprintf(buffer
+ len
, format
,
1555 shm_buildid(i
, shp
->shm_perm
.seq
),
1577 if(pos
> offset
+ length
)
1584 *start
= buffer
+ (offset
- begin
);
1585 len
-= (offset
- begin
);
1594 #define VMA_TO_SHP(vma) ((vma)->vm_file->private_data)
1596 static spinlock_t zmap_list_lock
= SPIN_LOCK_UNLOCKED
;
1597 static unsigned long zswap_idx
= 0; /* next to swap */
1598 static struct shmid_kernel
*zswap_shp
= &zshmid_kernel
;
1599 static int zshm_rss
;
1601 static struct vm_operations_struct shmzero_vm_ops
= {
1603 close
: shmzero_close
,
1604 nopage
: shmzero_nopage
,
1605 swapout
: shm_swapout
,
1609 * In this implementation, the "unuse" and "swapout" interfaces are
1610 * interlocked out via the kernel_lock, as well as shm_lock(zero_id).
1611 * "unuse" and "nopage/swapin", as well as "swapout" and "nopage/swapin"
1612 * interlock via shm_lock(zero_id). All these interlocks can be based
1613 * on a per mapping lock instead of being a global lock.
1616 * Reference (existance) counting on the file/dentry/inode is done
1617 * by generic vm_file code. The zero code does not hold any reference
1618 * on the pseudo-file. This is possible because the open/close calls
1619 * are bracketed by the file count update calls.
1621 static struct file
*file_setup(struct file
*fzero
, struct shmid_kernel
*shp
)
1626 if ((filp
= get_empty_filp()) == 0)
1628 if ((inp
= get_empty_inode()) == 0) {
1632 if ((filp
->f_dentry
= d_alloc(zdent
, &(const struct qstr
) { "dev/zero",
1638 d_instantiate(filp
->f_dentry
, inp
);
1641 * Copy over /dev/zero dev/ino for benefit of procfs. Use
1642 * ino to indicate seperate mappings.
1644 filp
->f_dentry
->d_inode
->i_dev
= fzero
->f_dentry
->d_inode
->i_dev
;
1645 filp
->f_dentry
->d_inode
->i_ino
= (unsigned long)shp
;
1646 fput(fzero
); /* release /dev/zero file */
1650 int map_zero_setup(struct vm_area_struct
*vma
)
1652 extern int vm_enough_memory(long pages
);
1653 struct shmid_kernel
*shp
;
1656 if (!vm_enough_memory((vma
->vm_end
- vma
->vm_start
) >> PAGE_SHIFT
))
1658 if (!(shp
= newseg_alloc((vma
->vm_end
- vma
->vm_start
) / PAGE_SIZE
, 0)))
1660 if ((filp
= file_setup(vma
->vm_file
, shp
)) == 0) {
1661 killseg_core(shp
, 0);
1664 vma
->vm_file
= filp
;
1665 VMA_TO_SHP(vma
) = (void *)shp
;
1667 init_MUTEX(&shp
->zsem
);
1668 vma
->vm_ops
= &shmzero_vm_ops
;
1670 spin_lock(&zmap_list_lock
);
1671 list_add(&shp
->zero_list
, &zshmid_kernel
.zero_list
);
1672 spin_unlock(&zmap_list_lock
);
1676 static void shmzero_open(struct vm_area_struct
*shmd
)
1678 struct shmid_kernel
*shp
;
1680 shp
= VMA_TO_SHP(shmd
);
1686 static void shmzero_close(struct vm_area_struct
*shmd
)
1689 struct shmid_kernel
*shp
;
1691 shp
= VMA_TO_SHP(shmd
);
1693 if (--shp
->shm_nattch
== 0)
1697 spin_lock(&zmap_list_lock
);
1698 if (shp
== zswap_shp
)
1699 zswap_shp
= list_entry(zswap_shp
->zero_list
.next
,
1700 struct shmid_kernel
, zero_list
);
1701 list_del(&shp
->zero_list
);
1702 spin_unlock(&zmap_list_lock
);
1703 killseg_core(shp
, 0);
1707 static struct page
* shmzero_nopage(struct vm_area_struct
* shmd
, unsigned long address
, int no_share
)
1710 struct shmid_kernel
*shp
;
1714 idx
= (address
- shmd
->vm_start
) >> PAGE_SHIFT
;
1715 idx
+= shmd
->vm_pgoff
;
1717 shp
= VMA_TO_SHP(shmd
);
1720 page
= shm_nopage_core(shp
, idx
, &dummy
, &zshm_rss
, address
);
1721 shm_unlock(zero_id
);
1726 static void zmap_unuse(swp_entry_t entry
, struct page
*page
)
1728 struct shmid_kernel
*shp
;
1730 spin_lock(&zmap_list_lock
);
1732 for (shp
= list_entry(zshmid_kernel
.zero_list
.next
, struct shmid_kernel
,
1733 zero_list
); shp
!= &zshmid_kernel
;
1734 shp
= list_entry(shp
->zero_list
.next
, struct shmid_kernel
,
1736 if (shm_unuse_core(shp
, entry
, page
))
1739 shm_unlock(zero_id
);
1740 spin_unlock(&zmap_list_lock
);
1743 static void zshm_swap (int prio
, int gfp_mask
, zone_t
*zone
)
1745 struct shmid_kernel
*shp
;
1746 swp_entry_t swap_entry
;
1750 struct page
* page_map
;
1752 counter
= zshm_rss
>> prio
;
1756 if (shm_swap_preop(&swap_entry
))
1759 spin_lock(&zmap_list_lock
);
1761 if (zshmid_kernel
.zero_list
.next
== 0)
1764 if (zswap_shp
== &zshmid_kernel
) {
1767 shm_unlock(zero_id
);
1768 spin_unlock(&zmap_list_lock
);
1769 __swap_free(swap_entry
, 2);
1772 zswap_shp
= list_entry(zshmid_kernel
.zero_list
.next
,
1773 struct shmid_kernel
, zero_list
);
1781 if (idx
>= shp
->shm_npages
) {
1782 zswap_shp
= list_entry(zswap_shp
->zero_list
.next
,
1783 struct shmid_kernel
, zero_list
);
1788 switch (shm_swap_core(shp
, idx
, swap_entry
, zone
, &counter
, &page_map
)) {
1789 case RETRY
: goto check_table
;
1790 case FAILED
: goto failed
;
1792 shm_unlock(zero_id
);
1793 spin_unlock(&zmap_list_lock
);
1795 shm_swap_postop(page_map
);