4 #include <linux/init.h>
5 #include <linux/sysctl.h>
6 #include <linux/poll.h>
7 #include <linux/proc_fs.h>
8 #include <linux/security.h>
9 #include <linux/namei.h>
12 static const struct dentry_operations proc_sys_dentry_operations
;
13 static const struct file_operations proc_sys_file_operations
;
14 static const struct inode_operations proc_sys_inode_operations
;
15 static const struct file_operations proc_sys_dir_file_operations
;
16 static const struct inode_operations proc_sys_dir_operations
;
18 void proc_sys_poll_notify(struct ctl_table_poll
*poll
)
23 atomic_inc(&poll
->event
);
24 wake_up_interruptible(&poll
->wait
);
27 static struct inode
*proc_sys_make_inode(struct super_block
*sb
,
28 struct ctl_table_header
*head
, struct ctl_table
*table
)
31 struct proc_inode
*ei
;
33 inode
= new_inode(sb
);
37 inode
->i_ino
= get_next_ino();
39 sysctl_head_get(head
);
42 ei
->sysctl_entry
= table
;
44 inode
->i_mtime
= inode
->i_atime
= inode
->i_ctime
= CURRENT_TIME
;
45 inode
->i_mode
= table
->mode
;
47 inode
->i_mode
|= S_IFREG
;
48 inode
->i_op
= &proc_sys_inode_operations
;
49 inode
->i_fop
= &proc_sys_file_operations
;
51 inode
->i_mode
|= S_IFDIR
;
53 inode
->i_op
= &proc_sys_dir_operations
;
54 inode
->i_fop
= &proc_sys_dir_file_operations
;
60 static struct ctl_table
*find_in_table(struct ctl_table
*p
, struct qstr
*name
)
63 for ( ; p
->procname
; p
++) {
68 len
= strlen(p
->procname
);
72 if (memcmp(p
->procname
, name
->name
, len
) != 0)
81 static struct ctl_table_header
*grab_header(struct inode
*inode
)
83 if (PROC_I(inode
)->sysctl
)
84 return sysctl_head_grab(PROC_I(inode
)->sysctl
);
86 return sysctl_head_next(NULL
);
89 static struct dentry
*proc_sys_lookup(struct inode
*dir
, struct dentry
*dentry
,
92 struct ctl_table_header
*head
= grab_header(dir
);
93 struct ctl_table
*table
= PROC_I(dir
)->sysctl_entry
;
94 struct ctl_table_header
*h
= NULL
;
95 struct qstr
*name
= &dentry
->d_name
;
98 struct dentry
*err
= ERR_PTR(-ENOENT
);
101 return ERR_CAST(head
);
103 if (table
&& !table
->child
) {
108 table
= table
? table
->child
: head
->ctl_table
;
110 p
= find_in_table(table
, name
);
112 for (h
= sysctl_head_next(NULL
); h
; h
= sysctl_head_next(h
)) {
113 if (h
->attached_to
!= table
)
115 p
= find_in_table(h
->attached_by
, name
);
124 err
= ERR_PTR(-ENOMEM
);
125 inode
= proc_sys_make_inode(dir
->i_sb
, h
? h
: head
, p
);
127 sysctl_head_finish(h
);
133 d_set_d_op(dentry
, &proc_sys_dentry_operations
);
134 d_add(dentry
, inode
);
137 sysctl_head_finish(head
);
141 static ssize_t
proc_sys_call_handler(struct file
*filp
, void __user
*buf
,
142 size_t count
, loff_t
*ppos
, int write
)
144 struct inode
*inode
= filp
->f_path
.dentry
->d_inode
;
145 struct ctl_table_header
*head
= grab_header(inode
);
146 struct ctl_table
*table
= PROC_I(inode
)->sysctl_entry
;
151 return PTR_ERR(head
);
154 * At this point we know that the sysctl was not unregistered
155 * and won't be until we finish.
158 if (sysctl_perm(head
->root
, table
, write
? MAY_WRITE
: MAY_READ
))
161 /* if that can happen at all, it should be -EINVAL, not -EISDIR */
163 if (!table
->proc_handler
)
166 /* careful: calling conventions are nasty here */
168 error
= table
->proc_handler(table
, write
, buf
, &res
, ppos
);
172 sysctl_head_finish(head
);
177 static ssize_t
proc_sys_read(struct file
*filp
, char __user
*buf
,
178 size_t count
, loff_t
*ppos
)
180 return proc_sys_call_handler(filp
, (void __user
*)buf
, count
, ppos
, 0);
183 static ssize_t
proc_sys_write(struct file
*filp
, const char __user
*buf
,
184 size_t count
, loff_t
*ppos
)
186 return proc_sys_call_handler(filp
, (void __user
*)buf
, count
, ppos
, 1);
189 static int proc_sys_open(struct inode
*inode
, struct file
*filp
)
191 struct ctl_table
*table
= PROC_I(inode
)->sysctl_entry
;
194 filp
->private_data
= proc_sys_poll_event(table
->poll
);
199 static unsigned int proc_sys_poll(struct file
*filp
, poll_table
*wait
)
201 struct inode
*inode
= filp
->f_path
.dentry
->d_inode
;
202 struct ctl_table
*table
= PROC_I(inode
)->sysctl_entry
;
203 unsigned long event
= (unsigned long)filp
->private_data
;
204 unsigned int ret
= DEFAULT_POLLMASK
;
206 if (!table
->proc_handler
)
212 poll_wait(filp
, &table
->poll
->wait
, wait
);
214 if (event
!= atomic_read(&table
->poll
->event
)) {
215 filp
->private_data
= proc_sys_poll_event(table
->poll
);
216 ret
= POLLIN
| POLLRDNORM
| POLLERR
| POLLPRI
;
223 static int proc_sys_fill_cache(struct file
*filp
, void *dirent
,
225 struct ctl_table_header
*head
,
226 struct ctl_table
*table
)
228 struct dentry
*child
, *dir
= filp
->f_path
.dentry
;
232 unsigned type
= DT_UNKNOWN
;
234 qname
.name
= table
->procname
;
235 qname
.len
= strlen(table
->procname
);
236 qname
.hash
= full_name_hash(qname
.name
, qname
.len
);
238 child
= d_lookup(dir
, &qname
);
240 child
= d_alloc(dir
, &qname
);
242 inode
= proc_sys_make_inode(dir
->d_sb
, head
, table
);
247 d_set_d_op(child
, &proc_sys_dentry_operations
);
254 inode
= child
->d_inode
;
256 type
= inode
->i_mode
>> 12;
258 return !!filldir(dirent
, qname
.name
, qname
.len
, filp
->f_pos
, ino
, type
);
261 static int scan(struct ctl_table_header
*head
, ctl_table
*table
,
262 unsigned long *pos
, struct file
*file
,
263 void *dirent
, filldir_t filldir
)
266 for (; table
->procname
; table
++, (*pos
)++) {
269 /* Can't do anything without a proc name */
270 if (!table
->procname
)
273 if (*pos
< file
->f_pos
)
276 res
= proc_sys_fill_cache(file
, dirent
, filldir
, head
, table
);
280 file
->f_pos
= *pos
+ 1;
285 static int proc_sys_readdir(struct file
*filp
, void *dirent
, filldir_t filldir
)
287 struct dentry
*dentry
= filp
->f_path
.dentry
;
288 struct inode
*inode
= dentry
->d_inode
;
289 struct ctl_table_header
*head
= grab_header(inode
);
290 struct ctl_table
*table
= PROC_I(inode
)->sysctl_entry
;
291 struct ctl_table_header
*h
= NULL
;
296 return PTR_ERR(head
);
298 if (table
&& !table
->child
) {
303 table
= table
? table
->child
: head
->ctl_table
;
306 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
307 if (filp
->f_pos
== 0) {
308 if (filldir(dirent
, ".", 1, filp
->f_pos
,
309 inode
->i_ino
, DT_DIR
) < 0)
313 if (filp
->f_pos
== 1) {
314 if (filldir(dirent
, "..", 2, filp
->f_pos
,
315 parent_ino(dentry
), DT_DIR
) < 0)
321 ret
= scan(head
, table
, &pos
, filp
, dirent
, filldir
);
325 for (h
= sysctl_head_next(NULL
); h
; h
= sysctl_head_next(h
)) {
326 if (h
->attached_to
!= table
)
328 ret
= scan(h
, h
->attached_by
, &pos
, filp
, dirent
, filldir
);
330 sysctl_head_finish(h
);
336 sysctl_head_finish(head
);
340 static int proc_sys_permission(struct inode
*inode
, int mask
)
343 * sysctl entries that are not writeable,
344 * are _NOT_ writeable, capabilities or not.
346 struct ctl_table_header
*head
;
347 struct ctl_table
*table
;
350 /* Executable files are not allowed under /proc/sys/ */
351 if ((mask
& MAY_EXEC
) && S_ISREG(inode
->i_mode
))
354 head
= grab_header(inode
);
356 return PTR_ERR(head
);
358 table
= PROC_I(inode
)->sysctl_entry
;
359 if (!table
) /* global root - r-xr-xr-x */
360 error
= mask
& MAY_WRITE
? -EACCES
: 0;
361 else /* Use the permissions on the sysctl table entry */
362 error
= sysctl_perm(head
->root
, table
, mask
& ~MAY_NOT_BLOCK
);
364 sysctl_head_finish(head
);
368 static int proc_sys_setattr(struct dentry
*dentry
, struct iattr
*attr
)
370 struct inode
*inode
= dentry
->d_inode
;
373 if (attr
->ia_valid
& (ATTR_MODE
| ATTR_UID
| ATTR_GID
))
376 error
= inode_change_ok(inode
, attr
);
380 if ((attr
->ia_valid
& ATTR_SIZE
) &&
381 attr
->ia_size
!= i_size_read(inode
)) {
382 error
= vmtruncate(inode
, attr
->ia_size
);
387 setattr_copy(inode
, attr
);
388 mark_inode_dirty(inode
);
392 static int proc_sys_getattr(struct vfsmount
*mnt
, struct dentry
*dentry
, struct kstat
*stat
)
394 struct inode
*inode
= dentry
->d_inode
;
395 struct ctl_table_header
*head
= grab_header(inode
);
396 struct ctl_table
*table
= PROC_I(inode
)->sysctl_entry
;
399 return PTR_ERR(head
);
401 generic_fillattr(inode
, stat
);
403 stat
->mode
= (stat
->mode
& S_IFMT
) | table
->mode
;
405 sysctl_head_finish(head
);
409 static const struct file_operations proc_sys_file_operations
= {
410 .open
= proc_sys_open
,
411 .poll
= proc_sys_poll
,
412 .read
= proc_sys_read
,
413 .write
= proc_sys_write
,
414 .llseek
= default_llseek
,
417 static const struct file_operations proc_sys_dir_file_operations
= {
418 .read
= generic_read_dir
,
419 .readdir
= proc_sys_readdir
,
420 .llseek
= generic_file_llseek
,
423 static const struct inode_operations proc_sys_inode_operations
= {
424 .permission
= proc_sys_permission
,
425 .setattr
= proc_sys_setattr
,
426 .getattr
= proc_sys_getattr
,
429 static const struct inode_operations proc_sys_dir_operations
= {
430 .lookup
= proc_sys_lookup
,
431 .permission
= proc_sys_permission
,
432 .setattr
= proc_sys_setattr
,
433 .getattr
= proc_sys_getattr
,
436 static int proc_sys_revalidate(struct dentry
*dentry
, struct nameidata
*nd
)
438 if (nd
->flags
& LOOKUP_RCU
)
440 return !PROC_I(dentry
->d_inode
)->sysctl
->unregistering
;
443 static int proc_sys_delete(const struct dentry
*dentry
)
445 return !!PROC_I(dentry
->d_inode
)->sysctl
->unregistering
;
448 static int proc_sys_compare(const struct dentry
*parent
,
449 const struct inode
*pinode
,
450 const struct dentry
*dentry
, const struct inode
*inode
,
451 unsigned int len
, const char *str
, const struct qstr
*name
)
453 struct ctl_table_header
*head
;
454 /* Although proc doesn't have negative dentries, rcu-walk means
455 * that inode here can be NULL */
456 /* AV: can it, indeed? */
459 if (name
->len
!= len
)
461 if (memcmp(name
->name
, str
, len
))
463 head
= rcu_dereference(PROC_I(inode
)->sysctl
);
464 return !head
|| !sysctl_is_seen(head
);
467 static const struct dentry_operations proc_sys_dentry_operations
= {
468 .d_revalidate
= proc_sys_revalidate
,
469 .d_delete
= proc_sys_delete
,
470 .d_compare
= proc_sys_compare
,
473 int __init
proc_sys_init(void)
475 struct proc_dir_entry
*proc_sys_root
;
477 proc_sys_root
= proc_mkdir("sys", NULL
);
478 proc_sys_root
->proc_iops
= &proc_sys_dir_operations
;
479 proc_sys_root
->proc_fops
= &proc_sys_dir_file_operations
;
480 proc_sys_root
->nlink
= 0;