Merge illumos-gate
[unleashed.git] / kernel / syscall / acctctl.c
bloba7900ad90c8dbada69bfe2e9a7f6211a52695c2e
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/proc.h>
27 #include <sys/systm.h>
28 #include <sys/param.h>
29 #include <sys/kmem.h>
30 #include <sys/sysmacros.h>
31 #include <sys/types.h>
32 #include <sys/cmn_err.h>
33 #include <sys/user.h>
34 #include <sys/cred.h>
35 #include <sys/vnode.h>
36 #include <sys/file.h>
37 #include <sys/pathname.h>
38 #include <sys/modctl.h>
39 #include <sys/acctctl.h>
40 #include <sys/bitmap.h>
41 #include <sys/exacct.h>
42 #include <sys/policy.h>
45 * acctctl(2)
47 * acctctl() provides the administrative interface to the extended accounting
48 * subsystem. The process and task accounting facilities are configurable:
49 * resources can be individually specified for recording in the appropriate
50 * accounting file.
52 * The current implementation of acctctl() requires that the process and task
53 * and flow files be distinct across all zones.
55 * Locking
56 * Each accounting species has an ac_info_t which contains a mutex,
57 * used to protect the ac_info_t's contents, and to serialize access to the
58 * appropriate file.
61 static list_t exacct_globals_list;
62 static kmutex_t exacct_globals_list_lock;
64 static int
65 ac_state_set(ac_info_t *info, void *buf, size_t bufsz)
67 int state;
69 if (buf == NULL || (bufsz != sizeof (int)))
70 return (EINVAL);
72 if (copyin(buf, &state, bufsz) != 0)
73 return (EFAULT);
75 if (state != AC_ON && state != AC_OFF)
76 return (EINVAL);
78 mutex_enter(&info->ac_lock);
79 info->ac_state = state;
80 mutex_exit(&info->ac_lock);
81 return (0);
84 static int
85 ac_state_get(ac_info_t *info, void *buf, size_t bufsz)
87 if (buf == NULL || (bufsz != sizeof (int)))
88 return (EINVAL);
90 mutex_enter(&info->ac_lock);
91 if (copyout(&info->ac_state, buf, bufsz) != 0) {
92 mutex_exit(&info->ac_lock);
93 return (EFAULT);
95 mutex_exit(&info->ac_lock);
96 return (0);
99 static boolean_t
100 ac_file_in_use(vnode_t *vp)
102 boolean_t in_use = B_FALSE;
103 struct exacct_globals *acg;
105 if (vp == NULL)
106 return (B_FALSE);
107 mutex_enter(&exacct_globals_list_lock);
109 * Start off by grabbing all locks.
111 for (acg = list_head(&exacct_globals_list); acg != NULL;
112 acg = list_next(&exacct_globals_list, acg)) {
113 mutex_enter(&acg->ac_proc.ac_lock);
114 mutex_enter(&acg->ac_task.ac_lock);
115 mutex_enter(&acg->ac_flow.ac_lock);
116 mutex_enter(&acg->ac_net.ac_lock);
119 for (acg = list_head(&exacct_globals_list); !in_use && acg != NULL;
120 acg = list_next(&exacct_globals_list, acg)) {
122 * We need to verify that we aren't already using this file for
123 * accounting in any zone.
125 if (vn_compare(acg->ac_proc.ac_vnode, vp) ||
126 vn_compare(acg->ac_task.ac_vnode, vp) ||
127 vn_compare(acg->ac_flow.ac_vnode, vp) ||
128 vn_compare(acg->ac_net.ac_vnode, vp))
129 in_use = B_TRUE;
133 * Drop all locks.
135 for (acg = list_head(&exacct_globals_list); acg != NULL;
136 acg = list_next(&exacct_globals_list, acg)) {
137 mutex_exit(&acg->ac_proc.ac_lock);
138 mutex_exit(&acg->ac_task.ac_lock);
139 mutex_exit(&acg->ac_flow.ac_lock);
140 mutex_exit(&acg->ac_net.ac_lock);
142 mutex_exit(&exacct_globals_list_lock);
143 return (in_use);
146 static int
147 ac_file_set(ac_info_t *info, void *ubuf, size_t bufsz)
149 int error = 0;
150 void *kbuf;
151 void *namebuf;
152 int namelen;
153 vnode_t *vp;
154 void *hdr;
155 size_t hdrsize;
156 vattr_t va;
158 if (ubuf == NULL) {
159 mutex_enter(&info->ac_lock);
162 * Closing accounting file
164 if (info->ac_vnode != NULL) {
165 error = fop_close(info->ac_vnode, FWRITE, 1, 0,
166 CRED(), NULL);
167 if (error) {
168 mutex_exit(&info->ac_lock);
169 return (error);
171 VN_RELE(info->ac_vnode);
172 info->ac_vnode = NULL;
174 if (info->ac_file != NULL) {
175 kmem_free(info->ac_file, strlen(info->ac_file) + 1);
176 info->ac_file = NULL;
179 mutex_exit(&info->ac_lock);
180 return (error);
183 if (bufsz < 2 || bufsz > MAXPATHLEN)
184 return (EINVAL);
187 * We have to copy in the whole buffer since we can't tell the length
188 * of the string in user's address space.
190 kbuf = kmem_zalloc(bufsz, KM_SLEEP);
191 if ((error = copyinstr((char *)ubuf, (char *)kbuf, bufsz, NULL)) != 0) {
192 kmem_free(kbuf, bufsz);
193 return (error);
195 if (*((char *)kbuf) != '/') {
196 kmem_free(kbuf, bufsz);
197 return (EINVAL);
201 * Now, allocate the space where we are going to save the
202 * name of the accounting file and kmem_free kbuf. We have to do this
203 * now because it is not good to sleep in kmem_alloc() while
204 * holding ac_info's lock.
206 namelen = strlen(kbuf) + 1;
207 namebuf = kmem_alloc(namelen, KM_SLEEP);
208 (void) strcpy(namebuf, kbuf);
209 kmem_free(kbuf, bufsz);
212 * Check if this file already exists.
214 error = lookupname(namebuf, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
217 * Check if the file is already in use.
219 if (!error) {
220 if (ac_file_in_use(vp)) {
222 * If we're already using it then return EBUSY
224 kmem_free(namebuf, namelen);
225 VN_RELE(vp);
226 return (EBUSY);
228 VN_RELE(vp);
232 * Create an exacct header here because exacct_create_header() may
233 * sleep so we should not be holding ac_lock. At this point we cannot
234 * reliably know if we need the header or not, so we may end up not
235 * using the header.
237 hdr = exacct_create_header(&hdrsize);
240 * Now, grab info's ac_lock and try to set up everything.
242 mutex_enter(&info->ac_lock);
244 if ((error = vn_open(namebuf, UIO_SYSSPACE,
245 FCREAT | FWRITE, 0600, &vp, CRCREAT, 0)) != 0) {
246 mutex_exit(&info->ac_lock);
247 kmem_free(namebuf, namelen);
248 kmem_free(hdr, hdrsize);
249 return (error);
252 if (vp->v_type != VREG) {
253 VN_RELE(vp);
254 mutex_exit(&info->ac_lock);
255 kmem_free(namebuf, namelen);
256 kmem_free(hdr, hdrsize);
257 return (EACCES);
260 if (info->ac_vnode != NULL) {
262 * Switch from an old file to a new file by swapping
263 * their vnode pointers.
265 vnode_t *oldvp;
266 oldvp = info->ac_vnode;
267 info->ac_vnode = vp;
268 vp = oldvp;
269 } else {
271 * Start writing accounting records to a new file.
273 info->ac_vnode = vp;
274 vp = NULL;
276 if (vp) {
278 * We still need to close the old file.
280 if ((error = fop_close(vp, FWRITE, 1, 0, CRED(), NULL)) != 0) {
281 VN_RELE(vp);
282 mutex_exit(&info->ac_lock);
283 kmem_free(namebuf, namelen);
284 kmem_free(hdr, hdrsize);
285 return (error);
287 VN_RELE(vp);
288 if (info->ac_file != NULL) {
289 kmem_free(info->ac_file,
290 strlen(info->ac_file) + 1);
291 info->ac_file = NULL;
294 info->ac_file = namebuf;
297 * Write the exacct header only if the file is empty.
299 error = fop_getattr(info->ac_vnode, &va, VATTR_SIZE, CRED(), NULL);
300 if (error == 0 && va.va_size == 0)
301 error = exacct_write_header(info, hdr, hdrsize);
303 mutex_exit(&info->ac_lock);
304 kmem_free(hdr, hdrsize);
305 return (error);
308 static int
309 ac_file_get(ac_info_t *info, void *buf, size_t bufsz)
311 int error = 0;
312 vnode_t *vnode;
313 char *file;
315 mutex_enter(&info->ac_lock);
316 file = info->ac_file;
317 vnode = info->ac_vnode;
319 if (file == NULL || vnode == NULL) {
320 mutex_exit(&info->ac_lock);
321 return (ENOTACTIVE);
324 if (strlen(file) >= bufsz)
325 error = ENOMEM;
326 else
327 error = copyoutstr(file, buf, MAXPATHLEN, NULL);
329 mutex_exit(&info->ac_lock);
330 return (error);
333 static int
334 ac_res_set(ac_info_t *info, void *buf, size_t bufsz, int maxres)
336 ac_res_t *res;
337 ac_res_t *tmp;
338 ulong_t *maskp;
339 int id;
340 uint_t counter = 0;
343 * Validate that a non-zero buffer, sized within limits and to an
344 * integral number of ac_res_t's has been specified.
346 if (bufsz == 0 ||
347 bufsz > sizeof (ac_res_t) * (AC_MAX_RES + 1) ||
348 (bufsz / sizeof (ac_res_t)) * sizeof (ac_res_t) != bufsz)
349 return (EINVAL);
351 tmp = res = kmem_alloc(bufsz, KM_SLEEP);
352 if (copyin(buf, res, bufsz) != 0) {
353 kmem_free(res, bufsz);
354 return (EFAULT);
357 maskp = (ulong_t *)&info->ac_mask;
359 mutex_enter(&info->ac_lock);
360 while ((id = tmp->ar_id) != AC_NONE && counter < maxres + 1) {
361 if (id > maxres || id < 0) {
362 mutex_exit(&info->ac_lock);
363 kmem_free(res, bufsz);
364 return (EINVAL);
366 if (tmp->ar_state == AC_ON) {
367 BT_SET(maskp, id);
368 } else if (tmp->ar_state == AC_OFF) {
369 BT_CLEAR(maskp, id);
370 } else {
371 mutex_exit(&info->ac_lock);
372 kmem_free(res, bufsz);
373 return (EINVAL);
375 tmp++;
376 counter++;
378 mutex_exit(&info->ac_lock);
379 kmem_free(res, bufsz);
380 return (0);
383 static int
384 ac_res_get(ac_info_t *info, void *buf, size_t bufsz, int maxres)
386 int error = 0;
387 ac_res_t *res;
388 ac_res_t *tmp;
389 size_t ressz = sizeof (ac_res_t) * (maxres + 1);
390 ulong_t *maskp;
391 int id;
393 if (bufsz < ressz)
394 return (EINVAL);
395 tmp = res = kmem_alloc(ressz, KM_SLEEP);
397 mutex_enter(&info->ac_lock);
398 maskp = (ulong_t *)&info->ac_mask;
399 for (id = 1; id <= maxres; id++) {
400 tmp->ar_id = id;
401 tmp->ar_state = BT_TEST(maskp, id);
402 tmp++;
404 tmp->ar_id = AC_NONE;
405 tmp->ar_state = AC_OFF;
406 mutex_exit(&info->ac_lock);
407 error = copyout(res, buf, ressz);
408 kmem_free(res, ressz);
409 return (error);
413 * acctctl()
415 * Overview
416 * acctctl() is the entry point for the acctctl(2) system call.
418 * Return values
419 * On successful completion, return 0; otherwise -1 is returned and errno is
420 * set appropriately.
422 * Caller's context
423 * Called from the system call path.
426 acctctl(int cmd, void *buf, size_t bufsz)
428 int error = 0;
429 int mode = AC_MODE(cmd);
430 int option = AC_OPTION(cmd);
431 int maxres;
432 ac_info_t *info;
433 zone_t *zone = curproc->p_zone;
434 struct exacct_globals *acg;
436 acg = zone_getspecific(exacct_zone_key, zone);
438 * exacct_zone_key and associated per-zone state were initialized when
439 * the module was loaded.
441 ASSERT(exacct_zone_key != ZONE_KEY_UNINITIALIZED);
442 ASSERT(acg != NULL);
444 switch (mode) { /* sanity check */
445 case AC_TASK:
446 info = &acg->ac_task;
447 maxres = AC_TASK_MAX_RES;
448 break;
449 case AC_PROC:
450 info = &acg->ac_proc;
451 maxres = AC_PROC_MAX_RES;
452 break;
454 * Flow/net accounting isn't configurable in non-global
455 * zones, but we have this field on a per-zone basis for future
456 * expansion as well as the ability to return default "unset"
457 * values for the various AC_*_GET queries. AC_*_SET commands
458 * fail with EPERM for AC_FLOW and AC_NET in non-global zones.
460 case AC_FLOW:
461 info = &acg->ac_flow;
462 maxres = AC_FLOW_MAX_RES;
463 break;
464 case AC_NET:
465 info = &acg->ac_net;
466 maxres = AC_NET_MAX_RES;
467 break;
468 default:
469 return (set_errno(EINVAL));
472 switch (option) {
473 case AC_STATE_SET:
474 if ((error = secpolicy_acct(CRED())) != 0)
475 break;
476 if ((mode == AC_FLOW || mode == AC_NET) &&
477 getzoneid() != GLOBAL_ZONEID) {
478 error = EPERM;
479 break;
481 error = ac_state_set(info, buf, bufsz);
482 break;
483 case AC_STATE_GET:
484 error = ac_state_get(info, buf, bufsz);
485 break;
486 case AC_FILE_SET:
487 if ((error = secpolicy_acct(CRED())) != 0)
488 break;
489 if ((mode == AC_FLOW || mode == AC_NET) &&
490 getzoneid() != GLOBAL_ZONEID) {
491 error = EPERM;
492 break;
494 error = ac_file_set(info, buf, bufsz);
495 break;
496 case AC_FILE_GET:
497 error = ac_file_get(info, buf, bufsz);
498 break;
499 case AC_RES_SET:
500 if ((error = secpolicy_acct(CRED())) != 0)
501 break;
502 if ((mode == AC_FLOW || mode == AC_NET) &&
503 getzoneid() != GLOBAL_ZONEID) {
504 error = EPERM;
505 break;
507 error = ac_res_set(info, buf, bufsz, maxres);
508 break;
509 case AC_RES_GET:
510 error = ac_res_get(info, buf, bufsz, maxres);
511 break;
512 default:
513 return (set_errno(EINVAL));
515 if (error)
516 return (set_errno(error));
517 return (0);
520 static struct sysent ac_sysent = {
522 SE_NOUNLOAD | SE_ARGC | SE_32RVAL1,
523 acctctl
526 static struct modlsys modlsys = {
527 &mod_syscallops,
528 "acctctl system call",
529 &ac_sysent
532 #ifdef _SYSCALL32_IMPL
533 static struct modlsys modlsys32 = {
534 &mod_syscallops32,
535 "32-bit acctctl system call",
536 &ac_sysent
538 #endif
540 static struct modlinkage modlinkage = {
541 MODREV_1,
542 &modlsys,
543 #ifdef _SYSCALL32_IMPL
544 &modlsys32,
545 #endif
546 NULL
549 /* ARGSUSED */
550 static void *
551 exacct_zone_init(zoneid_t zoneid)
553 struct exacct_globals *acg;
555 acg = kmem_zalloc(sizeof (*acg), KM_SLEEP);
556 mutex_enter(&exacct_globals_list_lock);
557 list_insert_tail(&exacct_globals_list, acg);
558 mutex_exit(&exacct_globals_list_lock);
559 return (acg);
562 static void
563 exacct_free_info(ac_info_t *info)
565 mutex_enter(&info->ac_lock);
566 if (info->ac_vnode) {
567 (void) fop_close(info->ac_vnode, FWRITE, 1, 0, kcred, NULL);
568 VN_RELE(info->ac_vnode);
569 kmem_free(info->ac_file, strlen(info->ac_file) + 1);
571 info->ac_state = AC_OFF;
572 info->ac_vnode = NULL;
573 info->ac_file = NULL;
574 mutex_exit(&info->ac_lock);
577 /* ARGSUSED */
578 static void
579 exacct_zone_shutdown(zoneid_t zoneid, void *data)
581 struct exacct_globals *acg = data;
584 * The accounting files need to be closed during shutdown rather than
585 * destroy, since otherwise the filesystem they reside on may fail to
586 * unmount, thus causing the entire zone halt/reboot to fail.
588 exacct_free_info(&acg->ac_proc);
589 exacct_free_info(&acg->ac_task);
590 exacct_free_info(&acg->ac_flow);
591 exacct_free_info(&acg->ac_net);
594 /* ARGSUSED */
595 static void
596 exacct_zone_fini(zoneid_t zoneid, void *data)
598 struct exacct_globals *acg = data;
600 mutex_enter(&exacct_globals_list_lock);
601 list_remove(&exacct_globals_list, acg);
602 mutex_exit(&exacct_globals_list_lock);
604 mutex_destroy(&acg->ac_proc.ac_lock);
605 mutex_destroy(&acg->ac_task.ac_lock);
606 mutex_destroy(&acg->ac_flow.ac_lock);
607 mutex_destroy(&acg->ac_net.ac_lock);
608 kmem_free(acg, sizeof (*acg));
612 _init()
614 int error;
616 mutex_init(&exacct_globals_list_lock, NULL, MUTEX_DEFAULT, NULL);
617 list_create(&exacct_globals_list, sizeof (struct exacct_globals),
618 offsetof(struct exacct_globals, ac_link));
619 zone_key_create(&exacct_zone_key, exacct_zone_init,
620 exacct_zone_shutdown, exacct_zone_fini);
622 if ((error = mod_install(&modlinkage)) != 0) {
623 (void) zone_key_delete(exacct_zone_key);
624 exacct_zone_key = ZONE_KEY_UNINITIALIZED;
625 mutex_destroy(&exacct_globals_list_lock);
626 list_destroy(&exacct_globals_list);
628 return (error);
632 _info(struct modinfo *modinfop)
634 return (mod_info(&modlinkage, modinfop));
638 _fini()
640 return (EBUSY);