4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
25 #include <sys/exacct.h>
26 #include <sys/exacct_catalog.h>
30 #include <sys/cmn_err.h>
32 #include <sys/project.h>
33 #include <sys/systm.h>
34 #include <sys/vnode.h>
36 #include <sys/acctctl.h>
38 #include <sys/utsname.h>
39 #include <sys/session.h>
40 #include <sys/sysmacros.h>
41 #include <sys/bitmap.h>
42 #include <sys/msacct.h>
45 * exacct usage and recording routines
47 * wracct(2), getacct(2), and the records written at process or task
48 * termination are constructed using the exacct_assemble_[task,proc]_usage()
49 * functions, which take a callback that takes the appropriate action on
50 * the packed exacct record for the task or process. For the process-related
51 * actions, we partition the routines such that the data collecting component
52 * can be performed while holding p_lock, and all sleeping or blocking
53 * operations can be performed without acquiring p_lock.
55 * putacct(2), which allows an application to construct a customized record
56 * associated with an existing process or task, has its own entry points:
57 * exacct_tag_task() and exacct_tag_proc().
60 taskq_t
*exacct_queue
;
61 kmem_cache_t
*exacct_object_cache
;
63 zone_key_t exacct_zone_key
= ZONE_KEY_UNINITIALIZED
;
65 static const uint32_t exacct_version
= EXACCT_VERSION
;
66 static const char exacct_header
[] = "exacct";
67 static const char exacct_creator
[] = "SunOS";
70 ea_alloc_item(ea_catalog_t catalog
, void *buf
, size_t bufsz
)
74 item
= kmem_cache_alloc(exacct_object_cache
, KM_SLEEP
);
75 bzero(item
, sizeof (ea_object_t
));
76 (void) ea_set_item(item
, catalog
, buf
, bufsz
);
81 ea_alloc_group(ea_catalog_t catalog
)
85 group
= kmem_cache_alloc(exacct_object_cache
, KM_SLEEP
);
86 bzero(group
, sizeof (ea_object_t
));
87 (void) ea_set_group(group
, catalog
);
92 ea_attach_item(ea_object_t
*grp
, void *buf
, size_t bufsz
, ea_catalog_t catalog
)
96 item
= ea_alloc_item(catalog
, buf
, bufsz
);
97 (void) ea_attach_to_group(grp
, item
);
102 * exacct_add_task_mstate() and exacct_sub_task_mstate() add and subtract
103 * microstate accounting data and resource usage counters from one task_usage_t
104 * from those supplied in another. These functions do not operate on *all*
105 * members of a task_usage_t: for some (e.g. tu_anctaskid) it would not make
109 exacct_add_task_mstate(task_usage_t
*tu
, task_usage_t
*delta
)
111 tu
->tu_utime
+= delta
->tu_utime
;
112 tu
->tu_stime
+= delta
->tu_stime
;
113 tu
->tu_minflt
+= delta
->tu_minflt
;
114 tu
->tu_majflt
+= delta
->tu_majflt
;
115 tu
->tu_sndmsg
+= delta
->tu_sndmsg
;
116 tu
->tu_rcvmsg
+= delta
->tu_rcvmsg
;
117 tu
->tu_ioch
+= delta
->tu_ioch
;
118 tu
->tu_iblk
+= delta
->tu_iblk
;
119 tu
->tu_oblk
+= delta
->tu_oblk
;
120 tu
->tu_vcsw
+= delta
->tu_vcsw
;
121 tu
->tu_icsw
+= delta
->tu_icsw
;
122 tu
->tu_nsig
+= delta
->tu_nsig
;
123 tu
->tu_nswp
+= delta
->tu_nswp
;
124 tu
->tu_nscl
+= delta
->tu_nscl
;
128 * See the comments for exacct_add_task_mstate(), above.
131 exacct_sub_task_mstate(task_usage_t
*tu
, task_usage_t
*delta
)
133 tu
->tu_utime
-= delta
->tu_utime
;
134 tu
->tu_stime
-= delta
->tu_stime
;
135 tu
->tu_minflt
-= delta
->tu_minflt
;
136 tu
->tu_majflt
-= delta
->tu_majflt
;
137 tu
->tu_sndmsg
-= delta
->tu_sndmsg
;
138 tu
->tu_rcvmsg
-= delta
->tu_rcvmsg
;
139 tu
->tu_ioch
-= delta
->tu_ioch
;
140 tu
->tu_iblk
-= delta
->tu_iblk
;
141 tu
->tu_oblk
-= delta
->tu_oblk
;
142 tu
->tu_vcsw
-= delta
->tu_vcsw
;
143 tu
->tu_icsw
-= delta
->tu_icsw
;
144 tu
->tu_nsig
-= delta
->tu_nsig
;
145 tu
->tu_nswp
-= delta
->tu_nswp
;
146 tu
->tu_nscl
-= delta
->tu_nscl
;
150 * Wrapper for vn_rdwr() used by exacct_vn_write() and exacct_write_header()
151 * to write to the accounting file without corrupting it in case of an I/O or
155 exacct_vn_write_impl(ac_info_t
*info
, void *buf
, ssize_t bufsize
)
161 ASSERT(info
!= NULL
);
162 ASSERT(info
->ac_vnode
!= NULL
);
163 ASSERT(MUTEX_HELD(&info
->ac_lock
));
166 * Save the size. If vn_rdwr fails, reset the size to avoid corrupting
167 * the present accounting file.
169 va
.va_mask
= AT_SIZE
;
170 error
= fop_getattr(info
->ac_vnode
, &va
, 0, kcred
, NULL
);
172 error
= vn_rdwr(UIO_WRITE
, info
->ac_vnode
, (caddr_t
)buf
,
173 bufsize
, 0LL, UIO_SYSSPACE
, FAPPEND
, (rlim64_t
)MAXOFFSET_T
,
176 (void) fop_setattr(info
->ac_vnode
, &va
, 0, kcred
, NULL
);
177 } else if (resid
!= 0) {
178 (void) fop_setattr(info
->ac_vnode
, &va
, 0, kcred
, NULL
);
186 * exacct_vn_write() safely writes to an accounting file. acctctl() prevents
187 * the two accounting vnodes from being equal, and the appropriate ac_lock is
188 * held across the call, so we're single threaded through this code for each
192 exacct_vn_write(ac_info_t
*info
, void *buf
, ssize_t bufsize
)
199 mutex_enter(&info
->ac_lock
);
202 * Don't do anything unless accounting file is set.
204 if (info
->ac_vnode
== NULL
) {
205 mutex_exit(&info
->ac_lock
);
208 error
= exacct_vn_write_impl(info
, buf
, bufsize
);
209 mutex_exit(&info
->ac_lock
);
215 * void *exacct_create_header(size_t *)
218 * exacct_create_header() constructs an exacct file header identifying the
219 * accounting file as the output of the kernel. exacct_create_header() and
220 * the static write_header() and verify_header() routines in libexacct must
221 * remain synchronized.
224 * A pointer to a packed exacct buffer containing the appropriate header is
225 * returned; the size of the buffer is placed in the location indicated by
229 * Suitable for KM_SLEEP allocations.
232 exacct_create_header(size_t *sizep
)
234 ea_object_t
*hdr_grp
;
239 hdr_grp
= ea_alloc_group(EXT_GROUP
| EXC_DEFAULT
| EXD_GROUP_HEADER
);
240 (void) ea_attach_item(hdr_grp
, (void *)&exacct_version
, 0,
241 EXT_UINT32
| EXC_DEFAULT
| EXD_VERSION
);
242 (void) ea_attach_item(hdr_grp
, (void *)exacct_header
, 0,
243 EXT_STRING
| EXC_DEFAULT
| EXD_FILETYPE
);
244 (void) ea_attach_item(hdr_grp
, (void *)exacct_creator
, 0,
245 EXT_STRING
| EXC_DEFAULT
| EXD_CREATOR
);
246 (void) ea_attach_item(hdr_grp
, uts_nodename(), 0,
247 EXT_STRING
| EXC_DEFAULT
| EXD_HOSTNAME
);
249 bufsize
= ea_pack_object(hdr_grp
, NULL
, 0);
250 buf
= kmem_alloc(bufsize
, KM_SLEEP
);
251 (void) ea_pack_object(hdr_grp
, buf
, bufsize
);
252 ea_free_object(hdr_grp
, EUP_ALLOC
);
255 * To prevent reading the header when reading the file backwards,
256 * set the large backskip of the header group to 0 (last 4 bytes).
259 exacct_order32(&bskip
);
260 bcopy(&bskip
, (char *)buf
+ bufsize
- sizeof (bskip
),
268 * int exacct_write_header(ac_info_t *, void *, size_t)
271 * exacct_write_header() writes the given header buffer to the indicated
275 * The result of the write operation is returned.
278 * Caller must hold the ac_lock of the appropriate accounting file
279 * information block (ac_info_t).
282 exacct_write_header(ac_info_t
*info
, void *hdr
, size_t hdrsize
)
284 if (info
!= NULL
&& info
->ac_vnode
!= NULL
)
285 return (exacct_vn_write_impl(info
, hdr
, hdrsize
));
291 exacct_get_interval_task_usage(task_t
*tk
, task_usage_t
*tu
,
292 task_usage_t
**tu_buf
)
294 task_usage_t
*oldtu
, *newtu
;
295 task_usage_t
**prevusage
;
297 ASSERT(MUTEX_HELD(&tk
->tk_usage_lock
));
298 if (getzoneid() != GLOBAL_ZONEID
) {
299 prevusage
= &tk
->tk_zoneusage
;
301 prevusage
= &tk
->tk_prevusage
;
303 if ((oldtu
= *prevusage
) != NULL
) {
305 * In case we have any accounting information
306 * saved from the previous interval record.
309 bcopy(tu
, newtu
, sizeof (task_usage_t
));
310 tu
->tu_minflt
-= oldtu
->tu_minflt
;
311 tu
->tu_majflt
-= oldtu
->tu_majflt
;
312 tu
->tu_sndmsg
-= oldtu
->tu_sndmsg
;
313 tu
->tu_rcvmsg
-= oldtu
->tu_rcvmsg
;
314 tu
->tu_ioch
-= oldtu
->tu_ioch
;
315 tu
->tu_iblk
-= oldtu
->tu_iblk
;
316 tu
->tu_oblk
-= oldtu
->tu_oblk
;
317 tu
->tu_vcsw
-= oldtu
->tu_vcsw
;
318 tu
->tu_icsw
-= oldtu
->tu_icsw
;
319 tu
->tu_nsig
-= oldtu
->tu_nsig
;
320 tu
->tu_nswp
-= oldtu
->tu_nswp
;
321 tu
->tu_nscl
-= oldtu
->tu_nscl
;
322 tu
->tu_utime
-= oldtu
->tu_utime
;
323 tu
->tu_stime
-= oldtu
->tu_stime
;
325 tu
->tu_startsec
= oldtu
->tu_finishsec
;
326 tu
->tu_startnsec
= oldtu
->tu_finishnsec
;
328 * Copy the data from our temporary storage to the task's
329 * previous interval usage structure for future reference.
331 bcopy(newtu
, oldtu
, sizeof (task_usage_t
));
334 * Store current statistics in the task's previous interval
335 * usage structure for future references.
337 *prevusage
= *tu_buf
;
338 bcopy(tu
, *prevusage
, sizeof (task_usage_t
));
344 exacct_snapshot_task_usage(task_t
*tk
, task_usage_t
*tu
)
349 ASSERT(MUTEX_HELD(&pidlock
));
351 if ((p
= tk
->tk_memb_list
) == NULL
)
355 * exacct_snapshot_task_usage() provides an approximate snapshot of the
356 * usage of the potentially many members of the task. Since we don't
357 * guarantee exactness, we don't acquire the p_lock of any of the member
361 mutex_enter(&p
->p_lock
);
362 tu
->tu_utime
+= mstate_aggr_state(p
, LMS_USER
);
363 tu
->tu_stime
+= mstate_aggr_state(p
, LMS_SYSTEM
);
364 mutex_exit(&p
->p_lock
);
365 tu
->tu_minflt
+= p
->p_ru
.minflt
;
366 tu
->tu_majflt
+= p
->p_ru
.majflt
;
367 tu
->tu_sndmsg
+= p
->p_ru
.msgsnd
;
368 tu
->tu_rcvmsg
+= p
->p_ru
.msgrcv
;
369 tu
->tu_ioch
+= p
->p_ru
.ioch
;
370 tu
->tu_iblk
+= p
->p_ru
.inblock
;
371 tu
->tu_oblk
+= p
->p_ru
.oublock
;
372 tu
->tu_vcsw
+= p
->p_ru
.nvcsw
;
373 tu
->tu_icsw
+= p
->p_ru
.nivcsw
;
374 tu
->tu_nsig
+= p
->p_ru
.nsignals
;
375 tu
->tu_nswp
+= p
->p_ru
.nswap
;
376 tu
->tu_nscl
+= p
->p_ru
.sysc
;
377 } while ((p
= p
->p_tasknext
) != tk
->tk_memb_list
);
380 * The resource usage accounted for so far will include that
381 * contributed by the task's first process. If this process
382 * came from another task, then its accumulated resource usage
383 * will include a contribution from work performed there.
384 * We must therefore subtract any resource usage that was
385 * inherited with the first process.
387 exacct_sub_task_mstate(tu
, tk
->tk_inherited
);
390 tu
->tu_finishsec
= (uint64_t)(ulong_t
)ts
.tv_sec
;
391 tu
->tu_finishnsec
= (uint64_t)(ulong_t
)ts
.tv_nsec
;
395 * void exacct_update_task_mstate(proc_t *)
398 * exacct_update_task_mstate() updates the task usage; it is intended
399 * to be called from proc_exit().
405 * p_lock must be held at entry.
408 exacct_update_task_mstate(proc_t
*p
)
412 mutex_enter(&p
->p_task
->tk_usage_lock
);
413 tu
= p
->p_task
->tk_usage
;
414 tu
->tu_utime
+= mstate_aggr_state(p
, LMS_USER
);
415 tu
->tu_stime
+= mstate_aggr_state(p
, LMS_SYSTEM
);
416 tu
->tu_minflt
+= p
->p_ru
.minflt
;
417 tu
->tu_majflt
+= p
->p_ru
.majflt
;
418 tu
->tu_sndmsg
+= p
->p_ru
.msgsnd
;
419 tu
->tu_rcvmsg
+= p
->p_ru
.msgrcv
;
420 tu
->tu_ioch
+= p
->p_ru
.ioch
;
421 tu
->tu_iblk
+= p
->p_ru
.inblock
;
422 tu
->tu_oblk
+= p
->p_ru
.oublock
;
423 tu
->tu_vcsw
+= p
->p_ru
.nvcsw
;
424 tu
->tu_icsw
+= p
->p_ru
.nivcsw
;
425 tu
->tu_nsig
+= p
->p_ru
.nsignals
;
426 tu
->tu_nswp
+= p
->p_ru
.nswap
;
427 tu
->tu_nscl
+= p
->p_ru
.sysc
;
428 mutex_exit(&p
->p_task
->tk_usage_lock
);
432 exacct_calculate_task_usage(task_t
*tk
, task_usage_t
*tu
, int flag
)
435 task_usage_t
*tu_buf
;
440 * For partial records we must report the sum of current
441 * accounting statistics with previously accumulated
444 mutex_enter(&pidlock
);
445 mutex_enter(&tk
->tk_usage_lock
);
447 (void) bcopy(tk
->tk_usage
, tu
, sizeof (task_usage_t
));
448 exacct_snapshot_task_usage(tk
, tu
);
450 mutex_exit(&tk
->tk_usage_lock
);
451 mutex_exit(&pidlock
);
455 * We need to allocate spare task_usage_t buffer before
456 * grabbing pidlock because we might need it later in
457 * exacct_get_interval_task_usage().
459 tu_buf
= kmem_zalloc(sizeof (task_usage_t
), KM_SLEEP
);
460 mutex_enter(&pidlock
);
461 mutex_enter(&tk
->tk_usage_lock
);
464 * For interval records, we deduct the previous microstate
465 * accounting data and cpu usage times from previously saved
466 * results and update the previous task usage structure.
468 (void) bcopy(tk
->tk_usage
, tu
, sizeof (task_usage_t
));
469 exacct_snapshot_task_usage(tk
, tu
);
470 exacct_get_interval_task_usage(tk
, tu
, &tu_buf
);
472 mutex_exit(&tk
->tk_usage_lock
);
473 mutex_exit(&pidlock
);
476 kmem_free(tu_buf
, sizeof (task_usage_t
));
480 * For final records, we deduct, from the task's current
481 * usage, any usage that was inherited with the arrival
482 * of a process from a previous task. We then record
483 * the task's finish time.
485 mutex_enter(&tk
->tk_usage_lock
);
486 (void) bcopy(tk
->tk_usage
, tu
, sizeof (task_usage_t
));
487 exacct_sub_task_mstate(tu
, tk
->tk_inherited
);
488 mutex_exit(&tk
->tk_usage_lock
);
491 tu
->tu_finishsec
= (uint64_t)(ulong_t
)ts
.tv_sec
;
492 tu
->tu_finishnsec
= (uint64_t)(ulong_t
)ts
.tv_nsec
;
499 exacct_attach_task_item(task_t
*tk
, task_usage_t
*tu
, ea_object_t
*record
,
506 (void) ea_attach_item(record
, &tk
->tk_tkid
,
507 sizeof (uint32_t), EXT_UINT32
| EXD_TASK_TASKID
);
510 (void) ea_attach_item(record
, &tk
->tk_proj
->kpj_id
,
511 sizeof (uint32_t), EXT_UINT32
| EXD_TASK_PROJID
);
517 hrt2ts(tu
->tu_stime
, &ts
);
519 (void) ea_attach_item(record
, &ui
, sizeof (uint64_t),
520 EXT_UINT64
| EXD_TASK_CPU_SYS_SEC
);
522 (void) ea_attach_item(record
, &ui
, sizeof (uint64_t),
523 EXT_UINT64
| EXD_TASK_CPU_SYS_NSEC
);
525 hrt2ts(tu
->tu_utime
, &ts
);
527 (void) ea_attach_item(record
, &ui
, sizeof (uint64_t),
528 EXT_UINT64
| EXD_TASK_CPU_USER_SEC
);
530 (void) ea_attach_item(record
, &ui
, sizeof (uint64_t),
531 EXT_UINT64
| EXD_TASK_CPU_USER_NSEC
);
535 (void) ea_attach_item(record
, &tu
->tu_startsec
,
536 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_START_SEC
);
537 (void) ea_attach_item(record
, &tu
->tu_startnsec
,
538 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_START_NSEC
);
539 (void) ea_attach_item(record
, &tu
->tu_finishsec
,
540 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_FINISH_SEC
);
541 (void) ea_attach_item(record
, &tu
->tu_finishnsec
,
542 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_FINISH_NSEC
);
544 case AC_TASK_HOSTNAME
:
545 (void) ea_attach_item(record
, tk
->tk_zone
->zone_nodename
,
546 strlen(tk
->tk_zone
->zone_nodename
) + 1,
547 EXT_STRING
| EXD_TASK_HOSTNAME
);
549 case AC_TASK_MICROSTATE
:
550 (void) ea_attach_item(record
, &tu
->tu_majflt
,
551 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_FAULTS_MAJOR
);
552 (void) ea_attach_item(record
, &tu
->tu_minflt
,
553 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_FAULTS_MINOR
);
554 (void) ea_attach_item(record
, &tu
->tu_sndmsg
,
555 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_MESSAGES_SND
);
556 (void) ea_attach_item(record
, &tu
->tu_rcvmsg
,
557 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_MESSAGES_RCV
);
558 (void) ea_attach_item(record
, &tu
->tu_iblk
,
559 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_BLOCKS_IN
);
560 (void) ea_attach_item(record
, &tu
->tu_oblk
,
561 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_BLOCKS_OUT
);
562 (void) ea_attach_item(record
, &tu
->tu_ioch
,
563 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_CHARS_RDWR
);
564 (void) ea_attach_item(record
, &tu
->tu_vcsw
,
565 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_CONTEXT_VOL
);
566 (void) ea_attach_item(record
, &tu
->tu_icsw
,
567 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_CONTEXT_INV
);
568 (void) ea_attach_item(record
, &tu
->tu_nsig
,
569 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_SIGNALS
);
570 (void) ea_attach_item(record
, &tu
->tu_nswp
,
571 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_SWAPS
);
572 (void) ea_attach_item(record
, &tu
->tu_nscl
,
573 sizeof (uint64_t), EXT_UINT64
| EXD_TASK_SYSCALLS
);
575 case AC_TASK_ANCTASKID
:
576 (void) ea_attach_item(record
, &tu
->tu_anctaskid
,
577 sizeof (uint32_t), EXT_UINT32
| EXD_TASK_ANCTASKID
);
579 case AC_TASK_ZONENAME
:
580 (void) ea_attach_item(record
, tk
->tk_zone
->zone_name
,
581 strlen(tk
->tk_zone
->zone_name
) + 1,
582 EXT_STRING
| EXD_TASK_ZONENAME
);
591 exacct_assemble_task_record(task_t
*tk
, task_usage_t
*tu
, ulong_t
*mask
,
592 ea_catalog_t record_type
)
598 * Assemble usage values into group.
600 record
= ea_alloc_group(EXT_GROUP
| EXC_DEFAULT
| record_type
);
601 for (res
= 1, count
= 0; res
<= AC_TASK_MAX_RES
; res
++)
602 if (BT_TEST(mask
, res
))
603 count
+= exacct_attach_task_item(tk
, tu
, record
, res
);
605 ea_free_object(record
, EUP_ALLOC
);
612 * int exacct_assemble_task_usage(task_t *, int (*)(void *, size_t, void *,
613 * size_t, size_t *), void *, size_t, size_t *, int)
616 * exacct_assemble_task_usage() builds the packed exacct buffer for the
617 * indicated task, executes the given callback function, and free the packed
621 * Returns 0 on success; otherwise the appropriate error code is returned.
624 * Suitable for KM_SLEEP allocations.
627 exacct_assemble_task_usage(ac_info_t
*ac_task
, task_t
*tk
,
628 int (*callback
)(ac_info_t
*, void *, size_t, void *, size_t, size_t *),
629 void *ubuf
, size_t ubufsize
, size_t *actual
, int flag
)
631 ulong_t mask
[AC_MASK_SZ
];
632 ea_object_t
*task_record
;
633 ea_catalog_t record_type
;
639 ASSERT(flag
== EW_FINAL
|| flag
== EW_PARTIAL
|| flag
== EW_INTERVAL
);
641 mutex_enter(&ac_task
->ac_lock
);
642 if (ac_task
->ac_state
== AC_OFF
) {
643 mutex_exit(&ac_task
->ac_lock
);
646 bt_copy(ac_task
->ac_mask
, mask
, AC_MASK_SZ
);
647 mutex_exit(&ac_task
->ac_lock
);
651 record_type
= EXD_GROUP_TASK
;
654 record_type
= EXD_GROUP_TASK_PARTIAL
;
657 record_type
= EXD_GROUP_TASK_INTERVAL
;
662 * Calculate task usage and assemble it into the task record.
664 tu
= kmem_zalloc(sizeof (task_usage_t
), KM_SLEEP
);
665 exacct_calculate_task_usage(tk
, tu
, flag
);
666 task_record
= exacct_assemble_task_record(tk
, tu
, mask
, record_type
);
667 if (task_record
== NULL
) {
669 * The current configuration of the accounting system has
670 * resulted in records with no data; accordingly, we don't write
671 * these, but we return success.
673 kmem_free(tu
, sizeof (task_usage_t
));
678 * Pack object into buffer and run callback on it.
680 bufsize
= ea_pack_object(task_record
, NULL
, 0);
681 buf
= kmem_alloc(bufsize
, KM_SLEEP
);
682 (void) ea_pack_object(task_record
, buf
, bufsize
);
683 ret
= callback(ac_task
, ubuf
, ubufsize
, buf
, bufsize
, actual
);
686 * Free all previously allocated structures.
688 kmem_free(buf
, bufsize
);
689 ea_free_object(task_record
, EUP_ALLOC
);
690 kmem_free(tu
, sizeof (task_usage_t
));
695 * void exacct_commit_task(void *)
698 * exacct_commit_task() calculates the final usage for a task, updating the
699 * task usage if task accounting is active, and writing a task record if task
700 * accounting is active. exacct_commit_task() is intended for being called
701 * from a task queue (taskq_t).
707 * Suitable for KM_SLEEP allocations.
711 exacct_commit_task(void *arg
)
713 task_t
*tk
= (task_t
*)arg
;
715 zone_t
*zone
= tk
->tk_zone
;
716 struct exacct_globals
*acg
;
718 ASSERT(tk
!= task0p
);
719 ASSERT(tk
->tk_memb_list
== NULL
);
722 * Don't do any extra work if the acctctl module isn't loaded.
723 * If acctctl module is loaded when zone is in down state then
724 * zone_getspecific can return NULL for that zone.
726 if (exacct_zone_key
!= ZONE_KEY_UNINITIALIZED
) {
727 acg
= zone_getspecific(exacct_zone_key
, zone
);
730 (void) exacct_assemble_task_usage(&acg
->ac_task
, tk
,
731 exacct_commit_callback
, NULL
, 0, &size
, EW_FINAL
);
732 if (tk
->tk_zone
!= global_zone
) {
733 acg
= zone_getspecific(exacct_zone_key
, global_zone
);
734 (void) exacct_assemble_task_usage(&acg
->ac_task
, tk
,
735 exacct_commit_callback
, NULL
, 0, &size
, EW_FINAL
);
739 * Release associated project and finalize task.
746 exacct_attach_proc_item(proc_usage_t
*pu
, ea_object_t
*record
, int res
)
752 (void) ea_attach_item(record
, &pu
->pu_pid
,
753 sizeof (uint32_t), EXT_UINT32
| EXD_PROC_PID
);
756 (void) ea_attach_item(record
, &pu
->pu_ruid
,
757 sizeof (uint32_t), EXT_UINT32
| EXD_PROC_UID
);
760 (void) ea_attach_item(record
, &pu
->pu_acflag
,
761 sizeof (uint32_t), EXT_UINT32
| EXD_PROC_ACCT_FLAGS
);
764 (void) ea_attach_item(record
, &pu
->pu_rgid
,
765 sizeof (uint32_t), EXT_UINT32
| EXD_PROC_GID
);
768 (void) ea_attach_item(record
, &pu
->pu_projid
,
769 sizeof (uint32_t), EXT_UINT32
| EXD_PROC_PROJID
);
772 (void) ea_attach_item(record
, &pu
->pu_taskid
,
773 sizeof (uint32_t), EXT_UINT32
| EXD_PROC_TASKID
);
776 (void) ea_attach_item(record
, &pu
->pu_utimesec
,
777 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_CPU_USER_SEC
);
778 (void) ea_attach_item(record
, &pu
->pu_utimensec
,
779 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_CPU_USER_NSEC
);
780 (void) ea_attach_item(record
, &pu
->pu_stimesec
,
781 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_CPU_SYS_SEC
);
782 (void) ea_attach_item(record
, &pu
->pu_stimensec
,
783 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_CPU_SYS_NSEC
);
786 (void) ea_attach_item(record
, &pu
->pu_startsec
,
787 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_START_SEC
);
788 (void) ea_attach_item(record
, &pu
->pu_startnsec
,
789 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_START_NSEC
);
790 (void) ea_attach_item(record
, &pu
->pu_finishsec
,
791 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_FINISH_SEC
);
792 (void) ea_attach_item(record
, &pu
->pu_finishnsec
,
793 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_FINISH_NSEC
);
795 case AC_PROC_COMMAND
:
796 (void) ea_attach_item(record
, pu
->pu_command
,
797 strlen(pu
->pu_command
) + 1, EXT_STRING
| EXD_PROC_COMMAND
);
799 case AC_PROC_HOSTNAME
:
800 (void) ea_attach_item(record
, pu
->pu_nodename
,
801 strlen(pu
->pu_nodename
) + 1,
802 EXT_STRING
| EXD_PROC_HOSTNAME
);
805 (void) ea_attach_item(record
, &pu
->pu_major
,
806 sizeof (uint32_t), EXT_UINT32
| EXD_PROC_TTY_MAJOR
);
807 (void) ea_attach_item(record
, &pu
->pu_minor
,
808 sizeof (uint32_t), EXT_UINT32
| EXD_PROC_TTY_MINOR
);
810 case AC_PROC_MICROSTATE
:
811 (void) ea_attach_item(record
, &pu
->pu_majflt
,
812 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_FAULTS_MAJOR
);
813 (void) ea_attach_item(record
, &pu
->pu_minflt
,
814 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_FAULTS_MINOR
);
815 (void) ea_attach_item(record
, &pu
->pu_sndmsg
,
816 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_MESSAGES_SND
);
817 (void) ea_attach_item(record
, &pu
->pu_rcvmsg
,
818 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_MESSAGES_RCV
);
819 (void) ea_attach_item(record
, &pu
->pu_iblk
,
820 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_BLOCKS_IN
);
821 (void) ea_attach_item(record
, &pu
->pu_oblk
,
822 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_BLOCKS_OUT
);
823 (void) ea_attach_item(record
, &pu
->pu_ioch
,
824 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_CHARS_RDWR
);
825 (void) ea_attach_item(record
, &pu
->pu_vcsw
,
826 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_CONTEXT_VOL
);
827 (void) ea_attach_item(record
, &pu
->pu_icsw
,
828 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_CONTEXT_INV
);
829 (void) ea_attach_item(record
, &pu
->pu_nsig
,
830 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_SIGNALS
);
831 (void) ea_attach_item(record
, &pu
->pu_nswp
,
832 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_SWAPS
);
833 (void) ea_attach_item(record
, &pu
->pu_nscl
,
834 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_SYSCALLS
);
837 (void) ea_attach_item(record
, &pu
->pu_ancpid
,
838 sizeof (uint32_t), EXT_UINT32
| EXD_PROC_ANCPID
);
840 case AC_PROC_WAIT_STATUS
:
841 (void) ea_attach_item(record
, &pu
->pu_wstat
,
842 sizeof (uint32_t), EXT_UINT32
| EXD_PROC_WAIT_STATUS
);
844 case AC_PROC_ZONENAME
:
845 (void) ea_attach_item(record
, pu
->pu_zonename
,
846 strlen(pu
->pu_zonename
) + 1,
847 EXT_STRING
| EXD_PROC_ZONENAME
);
850 (void) ea_attach_item(record
, &pu
->pu_mem_rss_avg
,
851 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_MEM_RSS_AVG_K
);
852 (void) ea_attach_item(record
, &pu
->pu_mem_rss_max
,
853 sizeof (uint64_t), EXT_UINT64
| EXD_PROC_MEM_RSS_MAX_K
);
862 exacct_assemble_proc_record(proc_usage_t
*pu
, ulong_t
*mask
,
863 ea_catalog_t record_type
)
869 * Assemble usage values into group.
871 record
= ea_alloc_group(EXT_GROUP
| EXC_DEFAULT
| record_type
);
872 for (res
= 1, count
= 0; res
<= AC_PROC_MAX_RES
; res
++)
873 if (BT_TEST(mask
, res
))
874 count
+= exacct_attach_proc_item(pu
, record
, res
);
876 ea_free_object(record
, EUP_ALLOC
);
883 * The following two routines assume that process's p_lock is held or
884 * exacct_commit_proc has been called from exit() when all lwps are stopped.
887 exacct_calculate_proc_mstate(proc_t
*p
, proc_usage_t
*pu
)
891 ASSERT(MUTEX_HELD(&p
->p_lock
));
892 if ((t
= p
->p_tlist
) == NULL
)
896 pu
->pu_minflt
+= t
->t_lwp
->lwp_ru
.minflt
;
897 pu
->pu_majflt
+= t
->t_lwp
->lwp_ru
.majflt
;
898 pu
->pu_sndmsg
+= t
->t_lwp
->lwp_ru
.msgsnd
;
899 pu
->pu_rcvmsg
+= t
->t_lwp
->lwp_ru
.msgrcv
;
900 pu
->pu_ioch
+= t
->t_lwp
->lwp_ru
.ioch
;
901 pu
->pu_iblk
+= t
->t_lwp
->lwp_ru
.inblock
;
902 pu
->pu_oblk
+= t
->t_lwp
->lwp_ru
.oublock
;
903 pu
->pu_vcsw
+= t
->t_lwp
->lwp_ru
.nvcsw
;
904 pu
->pu_icsw
+= t
->t_lwp
->lwp_ru
.nivcsw
;
905 pu
->pu_nsig
+= t
->t_lwp
->lwp_ru
.nsignals
;
906 pu
->pu_nswp
+= t
->t_lwp
->lwp_ru
.nswap
;
907 pu
->pu_nscl
+= t
->t_lwp
->lwp_ru
.sysc
;
908 } while ((t
= t
->t_forw
) != p
->p_tlist
);
912 exacct_copy_proc_mstate(proc_t
*p
, proc_usage_t
*pu
)
914 pu
->pu_minflt
= p
->p_ru
.minflt
;
915 pu
->pu_majflt
= p
->p_ru
.majflt
;
916 pu
->pu_sndmsg
= p
->p_ru
.msgsnd
;
917 pu
->pu_rcvmsg
= p
->p_ru
.msgrcv
;
918 pu
->pu_ioch
= p
->p_ru
.ioch
;
919 pu
->pu_iblk
= p
->p_ru
.inblock
;
920 pu
->pu_oblk
= p
->p_ru
.oublock
;
921 pu
->pu_vcsw
= p
->p_ru
.nvcsw
;
922 pu
->pu_icsw
= p
->p_ru
.nivcsw
;
923 pu
->pu_nsig
= p
->p_ru
.nsignals
;
924 pu
->pu_nswp
= p
->p_ru
.nswap
;
925 pu
->pu_nscl
= p
->p_ru
.sysc
;
929 exacct_calculate_proc_usage(proc_t
*p
, proc_usage_t
*pu
, ulong_t
*mask
,
932 timestruc_t ts
, ts_run
;
934 ASSERT(MUTEX_HELD(&p
->p_lock
));
937 * Convert CPU and execution times to sec/nsec format.
939 if (BT_TEST(mask
, AC_PROC_CPU
)) {
940 hrt2ts(mstate_aggr_state(p
, LMS_USER
), &ts
);
941 pu
->pu_utimesec
= (uint64_t)(ulong_t
)ts
.tv_sec
;
942 pu
->pu_utimensec
= (uint64_t)(ulong_t
)ts
.tv_nsec
;
943 hrt2ts(mstate_aggr_state(p
, LMS_SYSTEM
), &ts
);
944 pu
->pu_stimesec
= (uint64_t)(ulong_t
)ts
.tv_sec
;
945 pu
->pu_stimensec
= (uint64_t)(ulong_t
)ts
.tv_nsec
;
947 if (BT_TEST(mask
, AC_PROC_TIME
)) {
949 pu
->pu_finishsec
= (uint64_t)(ulong_t
)ts
.tv_sec
;
950 pu
->pu_finishnsec
= (uint64_t)(ulong_t
)ts
.tv_nsec
;
951 hrt2ts(gethrtime() - p
->p_mstart
, &ts_run
);
952 ts
.tv_sec
-= ts_run
.tv_sec
;
953 ts
.tv_nsec
-= ts_run
.tv_nsec
;
954 if (ts
.tv_nsec
< 0) {
956 if ((ts
.tv_nsec
= ts
.tv_nsec
+ NANOSEC
) >= NANOSEC
) {
958 ts
.tv_nsec
-= NANOSEC
;
961 pu
->pu_startsec
= (uint64_t)(ulong_t
)ts
.tv_sec
;
962 pu
->pu_startnsec
= (uint64_t)(ulong_t
)ts
.tv_nsec
;
965 pu
->pu_pid
= p
->p_pidp
->pid_id
;
966 pu
->pu_acflag
= p
->p_user
.u_acflag
;
967 pu
->pu_projid
= p
->p_task
->tk_proj
->kpj_id
;
968 pu
->pu_taskid
= p
->p_task
->tk_tkid
;
969 pu
->pu_major
= getmajor(p
->p_sessp
->s_dev
);
970 pu
->pu_minor
= getminor(p
->p_sessp
->s_dev
);
971 pu
->pu_ancpid
= p
->p_ancpid
;
972 pu
->pu_wstat
= wstat
;
974 * Compute average RSS in K. The denominator is the number of
975 * samples: the number of clock ticks plus the initial value.
977 pu
->pu_mem_rss_avg
= (PTOU(p
)->u_mem
/ (p
->p_stime
+ p
->p_utime
+ 1)) *
979 pu
->pu_mem_rss_max
= PTOU(p
)->u_mem_max
* (PAGESIZE
/ 1024);
981 mutex_enter(&p
->p_crlock
);
982 pu
->pu_ruid
= crgetruid(p
->p_cred
);
983 pu
->pu_rgid
= crgetrgid(p
->p_cred
);
984 mutex_exit(&p
->p_crlock
);
986 bcopy(p
->p_user
.u_comm
, pu
->pu_command
, strlen(p
->p_user
.u_comm
) + 1);
987 bcopy(p
->p_zone
->zone_name
, pu
->pu_zonename
,
988 strlen(p
->p_zone
->zone_name
) + 1);
989 bcopy(p
->p_zone
->zone_nodename
, pu
->pu_nodename
,
990 strlen(p
->p_zone
->zone_nodename
) + 1);
993 * Calculate microstate accounting data for a process that is still
994 * running. Presently, we explicitly collect all of the LWP usage into
995 * the proc usage structure here.
997 if (flag
& EW_PARTIAL
)
998 exacct_calculate_proc_mstate(p
, pu
);
1000 exacct_copy_proc_mstate(p
, pu
);
1004 * int exacct_assemble_proc_usage(proc_usage_t *, int (*)(void *, size_t, void
1005 * *, size_t, size_t *), void *, size_t, size_t *)
1008 * Assemble record with miscellaneous accounting information about the process
1009 * and execute the callback on it. It is the callback's job to set "actual" to
1010 * the size of record.
1013 * The result of the callback function, unless the extended process accounting
1014 * feature is not active, in which case ENOTACTIVE is returned.
1017 * Suitable for KM_SLEEP allocations.
1020 exacct_assemble_proc_usage(ac_info_t
*ac_proc
, proc_usage_t
*pu
,
1021 int (*callback
)(ac_info_t
*, void *, size_t, void *, size_t, size_t *),
1022 void *ubuf
, size_t ubufsize
, size_t *actual
, int flag
)
1024 ulong_t mask
[AC_MASK_SZ
];
1025 ea_object_t
*proc_record
;
1026 ea_catalog_t record_type
;
1031 ASSERT(flag
== EW_FINAL
|| flag
== EW_PARTIAL
);
1033 mutex_enter(&ac_proc
->ac_lock
);
1034 if (ac_proc
->ac_state
== AC_OFF
) {
1035 mutex_exit(&ac_proc
->ac_lock
);
1036 return (ENOTACTIVE
);
1038 bt_copy(&ac_proc
->ac_mask
[0], mask
, AC_MASK_SZ
);
1039 mutex_exit(&ac_proc
->ac_lock
);
1043 record_type
= EXD_GROUP_PROC
;
1046 record_type
= EXD_GROUP_PROC_PARTIAL
;
1050 proc_record
= exacct_assemble_proc_record(pu
, mask
, record_type
);
1051 if (proc_record
== NULL
)
1055 * Pack object into buffer and pass to callback.
1057 bufsize
= ea_pack_object(proc_record
, NULL
, 0);
1058 buf
= kmem_alloc(bufsize
, KM_SLEEP
);
1059 (void) ea_pack_object(proc_record
, buf
, bufsize
);
1061 ret
= callback(ac_proc
, ubuf
, ubufsize
, buf
, bufsize
, actual
);
1064 * Free all previously allocations.
1066 kmem_free(buf
, bufsize
);
1067 ea_free_object(proc_record
, EUP_ALLOC
);
1072 * int exacct_commit_callback(ac_info_t *, void *, size_t, void *, size_t,
1076 * exacct_commit_callback() writes the indicated buffer to the indicated
1077 * extended accounting file.
1080 * The result of the write operation is returned. "actual" is updated to
1081 * contain the number of bytes actually written.
1084 * Suitable for a vn_rdwr() operation.
1088 exacct_commit_callback(ac_info_t
*info
, void *ubuf
, size_t ubufsize
,
1089 void *buf
, size_t bufsize
, size_t *actual
)
1094 if ((error
= exacct_vn_write(info
, buf
, bufsize
)) == 0)
1100 exacct_do_commit_proc(ac_info_t
*ac_proc
, proc_t
*p
, int wstat
)
1104 ulong_t mask
[AC_MASK_SZ
];
1106 mutex_enter(&ac_proc
->ac_lock
);
1107 if (ac_proc
->ac_state
== AC_ON
) {
1108 bt_copy(&ac_proc
->ac_mask
[0], mask
, AC_MASK_SZ
);
1109 mutex_exit(&ac_proc
->ac_lock
);
1111 mutex_exit(&ac_proc
->ac_lock
);
1115 mutex_enter(&p
->p_lock
);
1116 size
= strlen(p
->p_user
.u_comm
) + 1;
1117 mutex_exit(&p
->p_lock
);
1119 pu
= kmem_alloc(sizeof (proc_usage_t
), KM_SLEEP
);
1120 pu
->pu_command
= kmem_alloc(size
, KM_SLEEP
);
1121 mutex_enter(&p
->p_lock
);
1122 exacct_calculate_proc_usage(p
, pu
, mask
, EW_FINAL
, wstat
);
1123 mutex_exit(&p
->p_lock
);
1125 (void) exacct_assemble_proc_usage(ac_proc
, pu
,
1126 exacct_commit_callback
, NULL
, 0, &size
, EW_FINAL
);
1128 kmem_free(pu
->pu_command
, strlen(pu
->pu_command
) + 1);
1129 kmem_free(pu
, sizeof (proc_usage_t
));
1133 * void exacct_commit_proc(proc_t *, int)
1136 * exacct_commit_proc() calculates the final usage for a process, updating the
1137 * task usage if task accounting is active, and writing a process record if
1138 * process accounting is active. exacct_commit_proc() is intended for being
1139 * called from proc_exit().
1145 * Suitable for KM_SLEEP allocations. p_lock must not be held at entry.
1148 exacct_commit_proc(proc_t
*p
, int wstat
)
1150 zone_t
*zone
= p
->p_zone
;
1151 struct exacct_globals
*acg
, *gacg
= NULL
;
1153 if (exacct_zone_key
== ZONE_KEY_UNINITIALIZED
) {
1155 * acctctl module not loaded. Nothing to do.
1161 * If acctctl module is loaded when zone is in down state then
1162 * zone_getspecific can return NULL for that zone.
1164 acg
= zone_getspecific(exacct_zone_key
, zone
);
1167 exacct_do_commit_proc(&acg
->ac_proc
, p
, wstat
);
1168 if (zone
!= global_zone
) {
1169 gacg
= zone_getspecific(exacct_zone_key
, global_zone
);
1170 exacct_do_commit_proc(&gacg
->ac_proc
, p
, wstat
);
1175 exacct_attach_netstat_item(net_stat_t
*ns
, ea_object_t
*record
, int res
)
1181 (void) ea_attach_item(record
, ns
->ns_name
,
1182 strlen(ns
->ns_name
) + 1, EXT_STRING
| EXD_NET_STATS_NAME
);
1184 case AC_NET_CURTIME
:
1190 now
= (uint64_t)(ulong_t
)ts
.tv_sec
;
1191 (void) ea_attach_item(record
, &now
, sizeof (uint64_t),
1192 EXT_UINT64
| EXD_NET_STATS_CURTIME
);
1196 (void) ea_attach_item(record
, &ns
->ns_ibytes
,
1197 sizeof (uint64_t), EXT_UINT64
| EXD_NET_STATS_IBYTES
);
1200 (void) ea_attach_item(record
, &ns
->ns_obytes
,
1201 sizeof (uint64_t), EXT_UINT64
| EXD_NET_STATS_OBYTES
);
1204 (void) ea_attach_item(record
, &ns
->ns_ipackets
,
1205 sizeof (uint64_t), EXT_UINT64
| EXD_NET_STATS_IPKTS
);
1208 (void) ea_attach_item(record
, &ns
->ns_opackets
,
1209 sizeof (uint64_t), EXT_UINT64
| EXD_NET_STATS_OPKTS
);
1211 case AC_NET_IERRPKTS
:
1212 (void) ea_attach_item(record
, &ns
->ns_ierrors
,
1213 sizeof (uint64_t), EXT_UINT64
| EXD_NET_STATS_IERRPKTS
);
1215 case AC_NET_OERRPKTS
:
1216 (void) ea_attach_item(record
, &ns
->ns_oerrors
,
1217 sizeof (uint64_t), EXT_UINT64
| EXD_NET_STATS_OERRPKTS
);
1226 exacct_attach_netdesc_item(net_desc_t
*nd
, ea_object_t
*record
, int res
)
1232 (void) ea_attach_item(record
, nd
->nd_name
,
1233 strlen(nd
->nd_name
) + 1, EXT_STRING
| EXD_NET_DESC_NAME
);
1235 case AC_NET_DEVNAME
:
1236 (void) ea_attach_item(record
, nd
->nd_devname
,
1237 strlen(nd
->nd_devname
) + 1, EXT_STRING
|
1238 EXD_NET_DESC_DEVNAME
);
1241 (void) ea_attach_item(record
, &nd
->nd_ehost
,
1242 sizeof (nd
->nd_ehost
), EXT_RAW
| EXD_NET_DESC_EHOST
);
1245 (void) ea_attach_item(record
, &nd
->nd_edest
,
1246 sizeof (nd
->nd_edest
), EXT_RAW
| EXD_NET_DESC_EDEST
);
1248 case AC_NET_VLAN_TPID
:
1249 (void) ea_attach_item(record
, &nd
->nd_vlan_tpid
,
1250 sizeof (ushort_t
), EXT_UINT16
| EXD_NET_DESC_VLAN_TPID
);
1252 case AC_NET_VLAN_TCI
:
1253 (void) ea_attach_item(record
, &nd
->nd_vlan_tci
,
1254 sizeof (ushort_t
), EXT_UINT16
| EXD_NET_DESC_VLAN_TCI
);
1257 (void) ea_attach_item(record
, &nd
->nd_sap
,
1258 sizeof (ushort_t
), EXT_UINT16
| EXD_NET_DESC_SAP
);
1260 case AC_NET_PRIORITY
:
1261 (void) ea_attach_item(record
, &nd
->nd_priority
,
1262 sizeof (ushort_t
), EXT_UINT16
| EXD_NET_DESC_PRIORITY
);
1264 case AC_NET_BWLIMIT
:
1265 (void) ea_attach_item(record
, &nd
->nd_bw_limit
,
1266 sizeof (uint64_t), EXT_UINT64
| EXD_NET_DESC_BWLIMIT
);
1270 (void) ea_attach_item(record
, &nd
->nd_saddr
[3],
1271 sizeof (uint32_t), EXT_UINT32
|
1272 EXD_NET_DESC_V4SADDR
);
1274 (void) ea_attach_item(record
, &nd
->nd_saddr
,
1275 sizeof (nd
->nd_saddr
), EXT_RAW
|
1276 EXD_NET_DESC_V6SADDR
);
1281 (void) ea_attach_item(record
, &nd
->nd_daddr
[3],
1282 sizeof (uint32_t), EXT_UINT32
|
1283 EXD_NET_DESC_V4DADDR
);
1285 (void) ea_attach_item(record
, &nd
->nd_daddr
,
1286 sizeof (nd
->nd_daddr
), EXT_RAW
|
1287 EXD_NET_DESC_V6DADDR
);
1291 (void) ea_attach_item(record
, &nd
->nd_sport
,
1292 sizeof (uint16_t), EXT_UINT16
| EXD_NET_DESC_SPORT
);
1295 (void) ea_attach_item(record
, &nd
->nd_dport
,
1296 sizeof (uint16_t), EXT_UINT16
| EXD_NET_DESC_DPORT
);
1298 case AC_NET_PROTOCOL
:
1299 (void) ea_attach_item(record
, &nd
->nd_protocol
,
1300 sizeof (uint8_t), EXT_UINT8
| EXD_NET_DESC_PROTOCOL
);
1302 case AC_NET_DSFIELD
:
1303 (void) ea_attach_item(record
, &nd
->nd_dsfield
,
1304 sizeof (uint8_t), EXT_UINT8
| EXD_NET_DESC_DSFIELD
);
1312 static ea_object_t
*
1313 exacct_assemble_net_record(void *ninfo
, ulong_t
*mask
, ea_catalog_t record_type
,
1318 ea_object_t
*record
;
1321 * Assemble usage values into group.
1323 record
= ea_alloc_group(EXT_GROUP
| EXC_DEFAULT
| record_type
);
1324 for (res
= 1, count
= 0; res
<= AC_NET_MAX_RES
; res
++)
1325 if (BT_TEST(mask
, res
)) {
1326 if (what
== EX_NET_LNDESC_REC
||
1327 what
== EX_NET_FLDESC_REC
) {
1328 count
+= exacct_attach_netdesc_item(
1329 (net_desc_t
*)ninfo
, record
, res
);
1331 count
+= exacct_attach_netstat_item(
1332 (net_stat_t
*)ninfo
, record
, res
);
1336 ea_free_object(record
, EUP_ALLOC
);
1343 exacct_assemble_net_usage(ac_info_t
*ac_net
, void *ninfo
,
1344 int (*callback
)(ac_info_t
*, void *, size_t, void *, size_t, size_t *),
1345 void *ubuf
, size_t ubufsize
, size_t *actual
, int what
)
1347 ulong_t mask
[AC_MASK_SZ
];
1348 ea_object_t
*net_desc
;
1349 ea_catalog_t record_type
;
1354 mutex_enter(&ac_net
->ac_lock
);
1355 if (ac_net
->ac_state
== AC_OFF
) {
1356 mutex_exit(&ac_net
->ac_lock
);
1357 return (ENOTACTIVE
);
1359 bt_copy(&ac_net
->ac_mask
[0], mask
, AC_MASK_SZ
);
1360 mutex_exit(&ac_net
->ac_lock
);
1363 case EX_NET_LNDESC_REC
:
1364 record_type
= EXD_GROUP_NET_LINK_DESC
;
1366 case EX_NET_LNSTAT_REC
:
1367 record_type
= EXD_GROUP_NET_LINK_STATS
;
1369 case EX_NET_FLDESC_REC
:
1370 record_type
= EXD_GROUP_NET_FLOW_DESC
;
1372 case EX_NET_FLSTAT_REC
:
1373 record_type
= EXD_GROUP_NET_FLOW_STATS
;
1377 net_desc
= exacct_assemble_net_record(ninfo
, mask
, record_type
, what
);
1378 if (net_desc
== NULL
)
1382 * Pack object into buffer and pass to callback.
1384 bufsize
= ea_pack_object(net_desc
, NULL
, 0);
1385 buf
= kmem_alloc(bufsize
, KM_NOSLEEP
);
1389 (void) ea_pack_object(net_desc
, buf
, bufsize
);
1391 ret
= callback(ac_net
, ubuf
, ubufsize
, buf
, bufsize
, actual
);
1394 * Free all previously allocations.
1396 kmem_free(buf
, bufsize
);
1397 ea_free_object(net_desc
, EUP_ALLOC
);
1402 exacct_commit_netinfo(void *arg
, int what
)
1405 ulong_t mask
[AC_MASK_SZ
];
1406 struct exacct_globals
*acg
;
1409 if (exacct_zone_key
== ZONE_KEY_UNINITIALIZED
) {
1411 * acctctl module not loaded. Nothing to do.
1413 return (ENOTACTIVE
);
1417 * Even though each zone nominally has its own flow accounting settings
1418 * (ac_flow), these are only maintained by and for the global zone.
1420 * If this were to change in the future, this function should grow a
1421 * second zoneid (or zone) argument, and use the corresponding zone's
1422 * settings rather than always using those of the global zone.
1424 acg
= zone_getspecific(exacct_zone_key
, global_zone
);
1425 ac_net
= &acg
->ac_net
;
1427 mutex_enter(&ac_net
->ac_lock
);
1428 if (ac_net
->ac_state
== AC_OFF
) {
1429 mutex_exit(&ac_net
->ac_lock
);
1430 return (ENOTACTIVE
);
1432 bt_copy(&ac_net
->ac_mask
[0], mask
, AC_MASK_SZ
);
1433 mutex_exit(&ac_net
->ac_lock
);
1435 return (exacct_assemble_net_usage(ac_net
, arg
, exacct_commit_callback
,
1436 NULL
, 0, &size
, what
));
1440 exacct_attach_flow_item(flow_usage_t
*fu
, ea_object_t
*record
, int res
)
1447 (void) ea_attach_item(record
, &fu
->fu_saddr
[3],
1448 sizeof (uint32_t), EXT_UINT32
| EXD_FLOW_V4SADDR
);
1450 (void) ea_attach_item(record
, &fu
->fu_saddr
,
1451 sizeof (fu
->fu_saddr
), EXT_RAW
|
1457 (void) ea_attach_item(record
, &fu
->fu_daddr
[3],
1458 sizeof (uint32_t), EXT_UINT32
| EXD_FLOW_V4DADDR
);
1460 (void) ea_attach_item(record
, &fu
->fu_daddr
,
1461 sizeof (fu
->fu_daddr
), EXT_RAW
|
1466 (void) ea_attach_item(record
, &fu
->fu_sport
,
1467 sizeof (uint16_t), EXT_UINT16
| EXD_FLOW_SPORT
);
1470 (void) ea_attach_item(record
, &fu
->fu_dport
,
1471 sizeof (uint16_t), EXT_UINT16
| EXD_FLOW_DPORT
);
1473 case AC_FLOW_PROTOCOL
:
1474 (void) ea_attach_item(record
, &fu
->fu_protocol
,
1475 sizeof (uint8_t), EXT_UINT8
| EXD_FLOW_PROTOCOL
);
1477 case AC_FLOW_DSFIELD
:
1478 (void) ea_attach_item(record
, &fu
->fu_dsfield
,
1479 sizeof (uint8_t), EXT_UINT8
| EXD_FLOW_DSFIELD
);
1482 (void) ea_attach_item(record
, &fu
->fu_ctime
,
1483 sizeof (uint64_t), EXT_UINT64
| EXD_FLOW_CTIME
);
1486 (void) ea_attach_item(record
, &fu
->fu_lseen
,
1487 sizeof (uint64_t), EXT_UINT64
| EXD_FLOW_LSEEN
);
1489 case AC_FLOW_NBYTES
:
1490 (void) ea_attach_item(record
, &fu
->fu_nbytes
,
1491 sizeof (uint64_t), EXT_UINT32
| EXD_FLOW_NBYTES
);
1494 (void) ea_attach_item(record
, &fu
->fu_npackets
,
1495 sizeof (uint64_t), EXT_UINT32
| EXD_FLOW_NPKTS
);
1497 case AC_FLOW_PROJID
:
1498 if (fu
->fu_projid
>= 0) {
1499 (void) ea_attach_item(record
, &fu
->fu_projid
,
1500 sizeof (uint32_t), EXT_UINT32
| EXD_FLOW_PROJID
);
1504 if (fu
->fu_userid
>= 0) {
1505 (void) ea_attach_item(record
, &fu
->fu_userid
,
1506 sizeof (uint32_t), EXT_UINT32
| EXD_FLOW_UID
);
1510 (void) ea_attach_item(record
, fu
->fu_aname
,
1511 strlen(fu
->fu_aname
) + 1, EXT_STRING
| EXD_FLOW_ANAME
);
1519 static ea_object_t
*
1520 exacct_assemble_flow_record(flow_usage_t
*fu
, ulong_t
*mask
,
1521 ea_catalog_t record_type
)
1524 ea_object_t
*record
;
1527 * Assemble usage values into group.
1529 record
= ea_alloc_group(EXT_GROUP
| EXC_DEFAULT
| record_type
);
1530 for (res
= 1, count
= 0; res
<= AC_FLOW_MAX_RES
; res
++)
1531 if (BT_TEST(mask
, res
))
1532 count
+= exacct_attach_flow_item(fu
, record
, res
);
1534 ea_free_object(record
, EUP_ALLOC
);
1541 exacct_assemble_flow_usage(ac_info_t
*ac_flow
, flow_usage_t
*fu
,
1542 int (*callback
)(ac_info_t
*, void *, size_t, void *, size_t, size_t *),
1543 void *ubuf
, size_t ubufsize
, size_t *actual
)
1545 ulong_t mask
[AC_MASK_SZ
];
1546 ea_object_t
*flow_usage
;
1547 ea_catalog_t record_type
;
1552 mutex_enter(&ac_flow
->ac_lock
);
1553 if (ac_flow
->ac_state
== AC_OFF
) {
1554 mutex_exit(&ac_flow
->ac_lock
);
1555 return (ENOTACTIVE
);
1557 bt_copy(&ac_flow
->ac_mask
[0], mask
, AC_MASK_SZ
);
1558 mutex_exit(&ac_flow
->ac_lock
);
1560 record_type
= EXD_GROUP_FLOW
;
1562 flow_usage
= exacct_assemble_flow_record(fu
, mask
, record_type
);
1563 if (flow_usage
== NULL
) {
1568 * Pack object into buffer and pass to callback.
1570 bufsize
= ea_pack_object(flow_usage
, NULL
, 0);
1571 buf
= kmem_alloc(bufsize
, KM_NOSLEEP
);
1576 (void) ea_pack_object(flow_usage
, buf
, bufsize
);
1578 ret
= callback(ac_flow
, ubuf
, ubufsize
, buf
, bufsize
, actual
);
1581 * Free all previously allocations.
1583 kmem_free(buf
, bufsize
);
1584 ea_free_object(flow_usage
, EUP_ALLOC
);
1589 exacct_commit_flow(void *arg
)
1591 flow_usage_t
*f
= (flow_usage_t
*)arg
;
1593 ulong_t mask
[AC_MASK_SZ
];
1594 struct exacct_globals
*acg
;
1597 if (exacct_zone_key
== ZONE_KEY_UNINITIALIZED
) {
1599 * acctctl module not loaded. Nothing to do.
1605 * Even though each zone nominally has its own flow accounting settings
1606 * (ac_flow), these are only maintained by and for the global zone.
1608 * If this were to change in the future, this function should grow a
1609 * second zoneid (or zone) argument, and use the corresponding zone's
1610 * settings rather than always using those of the global zone.
1612 acg
= zone_getspecific(exacct_zone_key
, global_zone
);
1613 ac_flow
= &acg
->ac_flow
;
1615 mutex_enter(&ac_flow
->ac_lock
);
1616 if (ac_flow
->ac_state
== AC_OFF
) {
1617 mutex_exit(&ac_flow
->ac_lock
);
1620 bt_copy(&ac_flow
->ac_mask
[0], mask
, AC_MASK_SZ
);
1621 mutex_exit(&ac_flow
->ac_lock
);
1623 (void) exacct_assemble_flow_usage(ac_flow
, f
, exacct_commit_callback
,
1628 * int exacct_tag_task(task_t *, void *, size_t, int)
1631 * exacct_tag_task() provides the exacct record construction and writing
1632 * support required by putacct(2) for task entities.
1635 * The result of the write operation is returned, unless the extended
1636 * accounting facility is not active, in which case ENOTACTIVE is returned.
1639 * Suitable for KM_SLEEP allocations.
1642 exacct_tag_task(ac_info_t
*ac_task
, task_t
*tk
, void *ubuf
, size_t ubufsz
,
1651 mutex_enter(&ac_task
->ac_lock
);
1652 if (ac_task
->ac_state
== AC_OFF
|| ac_task
->ac_vnode
== NULL
) {
1653 mutex_exit(&ac_task
->ac_lock
);
1654 return (ENOTACTIVE
);
1656 mutex_exit(&ac_task
->ac_lock
);
1658 tag
= ea_alloc_group(EXT_GROUP
| EXC_DEFAULT
| EXD_GROUP_TASK_TAG
);
1659 (void) ea_attach_item(tag
, &tk
->tk_tkid
, 0,
1660 EXT_UINT32
| EXC_DEFAULT
| EXD_TASK_TASKID
);
1661 (void) ea_attach_item(tag
, tk
->tk_zone
->zone_nodename
, 0,
1662 EXT_STRING
| EXC_DEFAULT
| EXD_TASK_HOSTNAME
);
1663 if (flags
== EP_RAW
)
1664 cat
= EXT_RAW
| EXC_DEFAULT
| EXD_TASK_TAG
;
1666 cat
= EXT_EXACCT_OBJECT
| EXC_DEFAULT
| EXD_TASK_TAG
;
1667 (void) ea_attach_item(tag
, ubuf
, ubufsz
, cat
);
1669 bufsize
= ea_pack_object(tag
, NULL
, 0);
1670 buf
= kmem_alloc(bufsize
, KM_SLEEP
);
1671 (void) ea_pack_object(tag
, buf
, bufsize
);
1672 error
= exacct_vn_write(ac_task
, buf
, bufsize
);
1673 kmem_free(buf
, bufsize
);
1674 ea_free_object(tag
, EUP_ALLOC
);
1679 * exacct_tag_proc(pid_t, taskid_t, void *, size_t, int, char *)
1682 * exacct_tag_proc() provides the exacct record construction and writing
1683 * support required by putacct(2) for processes.
1686 * The result of the write operation is returned, unless the extended
1687 * accounting facility is not active, in which case ENOTACTIVE is returned.
1690 * Suitable for KM_SLEEP allocations.
1693 exacct_tag_proc(ac_info_t
*ac_proc
, pid_t pid
, taskid_t tkid
, void *ubuf
,
1694 size_t ubufsz
, int flags
, const char *hostname
)
1702 mutex_enter(&ac_proc
->ac_lock
);
1703 if (ac_proc
->ac_state
== AC_OFF
|| ac_proc
->ac_vnode
== NULL
) {
1704 mutex_exit(&ac_proc
->ac_lock
);
1705 return (ENOTACTIVE
);
1707 mutex_exit(&ac_proc
->ac_lock
);
1709 tag
= ea_alloc_group(EXT_GROUP
| EXC_DEFAULT
| EXD_GROUP_PROC_TAG
);
1710 (void) ea_attach_item(tag
, &pid
, sizeof (uint32_t),
1711 EXT_UINT32
| EXC_DEFAULT
| EXD_PROC_PID
);
1712 (void) ea_attach_item(tag
, &tkid
, 0,
1713 EXT_UINT32
| EXC_DEFAULT
| EXD_TASK_TASKID
);
1714 (void) ea_attach_item(tag
, (void *)hostname
, 0,
1715 EXT_STRING
| EXC_DEFAULT
| EXD_TASK_HOSTNAME
);
1716 if (flags
== EP_RAW
)
1717 cat
= EXT_RAW
| EXC_DEFAULT
| EXD_PROC_TAG
;
1719 cat
= EXT_EXACCT_OBJECT
| EXC_DEFAULT
| EXD_PROC_TAG
;
1720 (void) ea_attach_item(tag
, ubuf
, ubufsz
, cat
);
1722 bufsize
= ea_pack_object(tag
, NULL
, 0);
1723 buf
= kmem_alloc(bufsize
, KM_SLEEP
);
1724 (void) ea_pack_object(tag
, buf
, bufsize
);
1725 error
= exacct_vn_write(ac_proc
, buf
, bufsize
);
1726 kmem_free(buf
, bufsize
);
1727 ea_free_object(tag
, EUP_ALLOC
);
1732 * void exacct_init(void)
1735 * Initialized the extended accounting subsystem.
1741 * Suitable for KM_SLEEP allocations.
1746 exacct_queue
= system_taskq
;
1747 exacct_object_cache
= kmem_cache_create("exacct_object_cache",
1748 sizeof (ea_object_t
), 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
1749 task_commit_thread_init();
1753 * exacct_snapshot_proc_mstate() copies a process's microstate accounting data
1754 * and resource usage counters into a given task_usage_t. It differs from
1755 * exacct_copy_proc_mstate() in that here a) we are copying to a task_usage_t,
1756 * b) p_lock will have been acquired earlier in the call path and c) we
1757 * are here including the process's user and system times.
1760 exacct_snapshot_proc_mstate(proc_t
*p
, task_usage_t
*tu
)
1762 tu
->tu_utime
= mstate_aggr_state(p
, LMS_USER
);
1763 tu
->tu_stime
= mstate_aggr_state(p
, LMS_SYSTEM
);
1764 tu
->tu_minflt
= p
->p_ru
.minflt
;
1765 tu
->tu_majflt
= p
->p_ru
.majflt
;
1766 tu
->tu_sndmsg
= p
->p_ru
.msgsnd
;
1767 tu
->tu_rcvmsg
= p
->p_ru
.msgrcv
;
1768 tu
->tu_ioch
= p
->p_ru
.ioch
;
1769 tu
->tu_iblk
= p
->p_ru
.inblock
;
1770 tu
->tu_oblk
= p
->p_ru
.oublock
;
1771 tu
->tu_vcsw
= p
->p_ru
.nvcsw
;
1772 tu
->tu_icsw
= p
->p_ru
.nivcsw
;
1773 tu
->tu_nsig
= p
->p_ru
.nsignals
;
1774 tu
->tu_nswp
= p
->p_ru
.nswap
;
1775 tu
->tu_nscl
= p
->p_ru
.sysc
;
1779 * void exacct_move_mstate(proc_t *, task_t *, task_t *)
1782 * exacct_move_mstate() is called by task_change() and accounts for
1783 * a process's resource usage when it is moved from one task to another.
1785 * The process's usage at this point is recorded in the new task so
1786 * that it can be excluded from the calculation of resources consumed
1789 * The resource usage inherited by the new task is also added to the
1790 * aggregate maintained by the old task for processes that have exited.
1796 * pidlock and p_lock held across exacct_move_mstate().
1799 exacct_move_mstate(proc_t
*p
, task_t
*oldtk
, task_t
*newtk
)
1803 /* Take a snapshot of this process's mstate and RU counters */
1804 exacct_snapshot_proc_mstate(p
, &tu
);
1807 * Use the snapshot to increment the aggregate usage of the old
1808 * task, and the inherited usage of the new one.
1810 mutex_enter(&oldtk
->tk_usage_lock
);
1811 exacct_add_task_mstate(oldtk
->tk_usage
, &tu
);
1812 mutex_exit(&oldtk
->tk_usage_lock
);
1813 mutex_enter(&newtk
->tk_usage_lock
);
1814 exacct_add_task_mstate(newtk
->tk_inherited
, &tu
);
1815 mutex_exit(&newtk
->tk_usage_lock
);