kernel NFS - Fix another deadlock in the readdirplus code
[dragonfly.git] / lib / libevtr / evtr.c
blob2d0f50fabd226018116e2bf37e827ee2b35283d9
1 /*
2 * Copyright (c) 2009, 2010 Aggelos Economopoulos. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 * 3. Neither the name of The DragonFly Project nor the names of its
15 * contributors may be used to endorse or promote products derived
16 * from this software without specific, prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
32 #include <assert.h>
33 #include <ctype.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <stdarg.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sys/queue.h>
41 #include <sys/stat.h>
42 #include <sys/tree.h>
45 #include "evtr.h"
47 enum {
48 MAX_EVHDR_SIZE = PATH_MAX + 200,
49 /* string namespaces */
50 EVTR_NS_PATH = 0x1,
51 EVTR_NS_FUNC,
52 EVTR_NS_DSTR,
53 EVTR_NS_MAX,
54 NR_BUCKETS = 1023, /* XXX */
55 REC_ALIGN = 8,
56 REC_BOUNDARY = 1 << 14,
57 FILTF_ID = 0x10,
58 EVTRF_WR = 0x1, /* open for writing */
61 typedef uint16_t fileid_t;
62 typedef uint16_t funcid_t;
63 typedef uint16_t fmtid_t;
65 struct trace_event_header {
66 uint8_t type;
67 uint64_t ts; /* XXX: this should only be part of probe */
68 } __attribute__((packed));
70 struct probe_event_header {
71 struct trace_event_header eh;
73 * For these fields, 0 implies "not available"
75 fileid_t file;
76 funcid_t caller1;
77 funcid_t caller2;
78 funcid_t func;
79 uint16_t line;
80 fmtid_t fmt;
81 uint16_t datalen;
82 uint8_t cpu; /* -1 if n/a */
83 } __attribute__((packed));
85 struct string_event_header {
86 struct trace_event_header eh;
87 uint16_t ns;
88 uint32_t id;
89 uint16_t len;
90 } __attribute__((packed));
92 struct fmt_event_header {
93 struct trace_event_header eh;
94 uint16_t id;
95 uint8_t subsys_len;
96 uint8_t fmt_len;
97 } __attribute__((packed));
99 struct hashentry {
100 const char *str;
101 uint16_t id;
102 struct hashentry *next;
105 struct hashtab {
106 struct hashentry *buckets[NR_BUCKETS];
107 uint16_t id;
110 struct event_fmt {
111 const char *subsys;
112 const char *fmt;
115 struct event_filter_unresolved {
116 TAILQ_ENTRY(event_filter_unresolved) link;
117 evtr_filter_t filt;
120 struct id_map {
121 RB_ENTRY(id_map) rb_node;
122 int id;
123 const void *data;
126 RB_HEAD(id_tree, id_map);
127 struct string_map {
128 struct id_tree root;
131 struct fmt_map {
132 struct id_tree root;
135 RB_HEAD(thread_tree, evtr_thread);
137 struct thread_map {
138 struct thread_tree root;
141 struct event_callback {
142 void (*cb)(evtr_event_t, void *data);
143 void *data; /* this field must be malloc()ed */
146 struct cpu {
147 struct evtr_thread *td; /* currently executing thread */
150 struct evtr {
151 FILE *f;
152 int err;
153 int flags;
154 char *errmsg;
155 off_t bytes;
156 union {
158 * When writing, we keep track of the strings we've
159 * already dumped so we only dump them once.
160 * Paths, function names etc belong to different
161 * namespaces.
163 struct hashtab *strings[EVTR_NS_MAX - 1];
165 * When reading, we build a map from id to string.
166 * Every id must be defined at the point of use.
168 struct string_map maps[EVTR_NS_MAX - 1];
170 union {
171 /* same as above, but for subsys+fmt pairs */
172 struct fmt_map fmtmap;
173 struct hashtab *fmts;
176 * Filters that have a format specified and we
177 * need to resolve that to an fmtid
179 TAILQ_HEAD(, event_filter_unresolved) unresolved_filtq;
180 struct event_callback **cbs;
181 int ncbs;
182 struct thread_map threads;
183 struct cpu *cpus;
184 int ncpus;
187 struct evtr_query {
188 evtr_t evtr;
189 off_t off;
190 evtr_filter_t filt;
191 int nfilt;
192 int nmatched;
193 int ntried;
194 void *buf;
195 int bufsize;
198 static int
199 evtr_debug = 0;
201 void
202 evtr_set_debug(int lvl)
204 evtr_debug = lvl;
207 static int id_map_cmp(struct id_map *, struct id_map *);
208 RB_PROTOTYPE2(id_tree, id_map, rb_node, id_map_cmp, int);
209 RB_GENERATE2(id_tree, id_map, rb_node, id_map_cmp, int, id);
211 static int thread_cmp(struct evtr_thread *, struct evtr_thread *);
212 RB_PROTOTYPE2(thread_tree, evtr_thread, rb_node, thread_cmp, void *);
213 RB_GENERATE2(thread_tree, evtr_thread, rb_node, thread_cmp, void *, id);
215 #define printd(...) \
216 do { \
217 if (evtr_debug) \
218 fprintf(stderr, __VA_ARGS__); \
219 } while (0)
221 static inline
222 void
223 validate_string(const char *str)
225 if (!evtr_debug)
226 return;
227 for (; *str; ++str)
228 assert(isprint(*str));
231 static
232 void
233 id_tree_free(struct id_tree *root)
235 struct id_map *v, *n;
237 for (v = RB_MIN(id_tree, root); v; v = n) {
238 n = RB_NEXT(id_tree, root, v);
239 RB_REMOVE(id_tree, root, v);
243 static
245 evtr_register_callback(evtr_t evtr, void (*fn)(evtr_event_t, void *), void *d)
247 struct event_callback *cb;
248 void *cbs;
250 if (!(cb = malloc(sizeof(*cb)))) {
251 evtr->err = ENOMEM;
252 return !0;
254 cb->cb = fn;
255 cb->data = d;
256 if (!(cbs = realloc(evtr->cbs, (++evtr->ncbs) * sizeof(cb)))) {
257 --evtr->ncbs;
258 free(cb);
259 evtr->err = ENOMEM;
260 return !0;
262 evtr->cbs = cbs;
263 evtr->cbs[evtr->ncbs - 1] = cb;
264 return 0;
267 static
268 void
269 evtr_deregister_callbacks(evtr_t evtr)
271 int i;
273 for (i = 0; i < evtr->ncbs; ++i) {
274 free(evtr->cbs[i]);
276 free(evtr->cbs);
277 evtr->cbs = NULL;
280 static
281 void
282 evtr_run_callbacks(evtr_event_t ev, evtr_t evtr)
284 struct event_callback *cb;
285 int i;
287 for (i = 0; i < evtr->ncbs; ++i) {
288 cb = evtr->cbs[i];
289 cb->cb(ev, cb->data);
293 static
294 struct cpu *
295 evtr_cpu(evtr_t evtr, int c)
297 if ((c < 0) || (c >= evtr->ncpus))
298 return NULL;
299 return &evtr->cpus[c];
302 static
304 parse_format_data(evtr_event_t ev, const char *fmt, ...) __attribute__((format (scanf, 2, 3)));
305 static
307 parse_format_data(evtr_event_t ev, const char *fmt, ...)
309 va_list ap;
310 char buf[2048];
312 if (strcmp(fmt, ev->fmt))
313 return 0;
314 vsnprintf(buf, sizeof(buf), fmt, ev->fmtdata);
315 printd("string is: %s\n", buf);
316 va_start(ap, fmt);
317 return vsscanf(buf, fmt, ap);
320 static
321 void
322 evtr_deregister_filters(evtr_t evtr, evtr_filter_t filt, int nfilt)
324 struct event_filter_unresolved *u, *tmp;
325 int i;
326 TAILQ_FOREACH_MUTABLE(u, &evtr->unresolved_filtq, link, tmp) {
327 for (i = 0; i < nfilt; ++i) {
328 if (u->filt == &filt[i]) {
329 TAILQ_REMOVE(&evtr->unresolved_filtq, u, link);
335 static
336 void
337 evtr_resolve_filters(evtr_t evtr, const char *fmt, int id)
339 struct event_filter_unresolved *u, *tmp;
340 TAILQ_FOREACH_MUTABLE(u, &evtr->unresolved_filtq, link, tmp) {
341 if ((u->filt->fmt != NULL) && !strcmp(fmt, u->filt->fmt)) {
342 u->filt->fmtid = id;
343 u->filt->flags |= FILTF_ID;
344 TAILQ_REMOVE(&evtr->unresolved_filtq, u, link);
349 static
351 evtr_filter_register(evtr_t evtr, evtr_filter_t filt)
353 struct event_filter_unresolved *res;
355 if (!(res = malloc(sizeof(*res)))) {
356 evtr->err = ENOMEM;
357 return !0;
359 res->filt = filt;
360 TAILQ_INSERT_TAIL(&evtr->unresolved_filtq, res, link);
361 return 0;
364 void
365 evtr_event_data(evtr_event_t ev, char *buf, size_t len)
368 * XXX: we implicitly trust the format string.
369 * We shouldn't.
371 if (ev->fmtdatalen) {
372 vsnprintf(buf, len, ev->fmt, ev->fmtdata);
373 } else {
374 strlcpy(buf, ev->fmt, len);
380 evtr_error(evtr_t evtr)
382 return evtr->err || (evtr->errmsg == NULL);
385 const char *
386 evtr_errmsg(evtr_t evtr)
388 return evtr->errmsg ? evtr->errmsg : strerror(evtr->err);
391 static
393 id_map_cmp(struct id_map *a, struct id_map *b)
395 return a->id - b->id;
398 static
400 thread_cmp(struct evtr_thread *a, struct evtr_thread *b)
402 return (int)a->id - (int)b->id;
405 #define DEFINE_MAP_FIND(prefix, type) \
406 static \
407 type \
408 prefix ## _map_find(struct id_tree *tree, int id)\
410 struct id_map *sid; \
412 sid = id_tree_RB_LOOKUP(tree, id); \
413 return sid ? sid->data : NULL; \
416 DEFINE_MAP_FIND(string, const char *)
417 DEFINE_MAP_FIND(fmt, const struct event_fmt *)
419 static
420 struct evtr_thread *
421 thread_map_find(struct thread_map *map, void *id)
423 return thread_tree_RB_LOOKUP(&map->root, id);
426 #define DEFINE_MAP_INSERT(prefix, type, _cmp, _dup) \
427 static \
428 int \
429 prefix ## _map_insert(struct id_tree *tree, type data, int id) \
431 struct id_map *sid, *osid; \
433 sid = malloc(sizeof(*sid)); \
434 if (!sid) { \
435 return ENOMEM; \
437 sid->id = id; \
438 sid->data = data; \
439 if ((osid = id_tree_RB_INSERT(tree, sid))) { \
440 free(sid); \
441 if (_cmp((type)osid->data, data)) { \
442 return EEXIST; \
444 printd("mapping already exists, skipping\n"); \
445 /* we're OK with redefinitions of an id to the same string */ \
446 return 0; \
448 /* only do the strdup if we're inserting a new string */ \
449 sid->data = _dup(data); /* XXX: oom */ \
450 return 0; \
453 static
454 void
455 thread_map_insert(struct thread_map *map, struct evtr_thread *td)
457 struct evtr_thread *otd;
459 if ((otd = thread_tree_RB_INSERT(&map->root, td))) {
461 * Thread addresses might be reused, we're
462 * ok with that.
463 * DANGER, Will Robinson: this means the user
464 * of the API needs to copy event->td if they
465 * want it to remain stable.
467 free((void *)otd->comm);
468 otd->comm = td->comm;
469 free(td);
473 static
475 event_fmt_cmp(const struct event_fmt *a, const struct event_fmt *b)
477 int ret = 0;
479 if (a->subsys) {
480 if (b->subsys) {
481 ret = strcmp(a->subsys, b->subsys);
482 } else {
483 ret = strcmp(a->subsys, "");
485 } else if (b->subsys) {
486 ret = strcmp("", b->subsys);
488 if (ret)
489 return ret;
490 return strcmp(a->fmt, b->fmt);
493 static
494 struct event_fmt *
495 event_fmt_dup(const struct event_fmt *o)
497 struct event_fmt *n;
499 if (!(n = malloc(sizeof(*n)))) {
500 return n;
502 memcpy(n, o, sizeof(*n));
503 return n;
506 DEFINE_MAP_INSERT(string, const char *, strcmp, strdup)
507 DEFINE_MAP_INSERT(fmt, const struct event_fmt *, event_fmt_cmp, event_fmt_dup)
509 static
511 hashfunc(const char *str)
513 unsigned long hash = 5381;
514 int c;
516 while ((c = *str++))
517 hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
518 return hash % NR_BUCKETS;
521 static
522 struct hashentry *
523 hash_find(struct hashtab *tab, const char *str)
525 struct hashentry *ent;
527 for(ent = tab->buckets[hashfunc(str)]; ent && strcmp(ent->str, str);
528 ent = ent->next);
530 return ent;
533 static
534 struct hashentry *
535 hash_insert(struct hashtab *tab, const char *str)
537 struct hashentry *ent;
538 int hsh;
540 if (!(ent = malloc(sizeof(*ent)))) {
541 fprintf(stderr, "out of memory\n");
542 return NULL;
544 hsh = hashfunc(str);
545 ent->next = tab->buckets[hsh];
546 ent->str = strdup(str);
547 ent->id = ++tab->id;
548 if (tab->id == 0) {
549 fprintf(stderr, "too many strings\n");
550 free(ent);
551 return NULL;
553 tab->buckets[hsh] = ent;
554 return ent;
557 static
558 void
559 thread_creation_callback(evtr_event_t ev, void *d)
561 evtr_t evtr = (evtr_t)d;
562 struct evtr_thread *td;
563 void *ktd;
564 char buf[20];
566 //printd("thread_creation_callback\n");
567 if (parse_format_data(ev, "new_td %p %s", &ktd, buf) != 2) {
568 return;
570 buf[19] = '\0';
572 if (!(td = malloc(sizeof(*td)))) {
573 evtr->err = ENOMEM;
574 return;
576 td->id = ktd;
577 td->userdata = NULL;
578 if (!(td->comm = strdup(buf))) {
579 free(td);
580 evtr->err = ENOMEM;
581 return;
583 printd("inserting new thread %p: %s\n", td->id, td->comm);
584 thread_map_insert(&evtr->threads, td);
587 static
588 void
589 thread_switch_callback(evtr_event_t ev, void *d)
591 evtr_t evtr = (evtr_t)d;
592 struct evtr_thread *tdp, *tdn;
593 void *ktdp, *ktdn;
594 struct cpu *cpu;
595 static struct evtr_event tdcr;
596 static char *fmt = "new_td %p %s";
597 char tidstr[40];
598 char fmtdata[sizeof(void *) + sizeof(char *)];
600 //printd("thread_switch_callback\n");
601 cpu = evtr_cpu(evtr, ev->cpu);
602 if (!cpu) {
603 printd("invalid cpu %d\n", ev->cpu);
604 return;
606 if (parse_format_data(ev, "sw %p > %p", &ktdp, &ktdn) != 2) {
607 return;
609 tdp = thread_map_find(&evtr->threads, ktdp);
610 if (!tdp) {
611 printd("switching from unknown thread %p\n", ktdp);
613 tdn = thread_map_find(&evtr->threads, ktdn);
614 if (!tdn) {
616 * Fake a thread creation event for threads we
617 * haven't seen before.
619 tdcr.type = EVTR_TYPE_PROBE;
620 tdcr.ts = ev->ts;
621 tdcr.file = NULL;
622 tdcr.func = NULL;
623 tdcr.line = 0;
624 tdcr.fmt = fmt;
625 tdcr.fmtdata = &fmtdata;
626 tdcr.fmtdatalen = sizeof(fmtdata);
627 tdcr.cpu = ev->cpu;
628 tdcr.td = NULL;
629 snprintf(tidstr, sizeof(tidstr), "%p", ktdn);
630 ((void **)fmtdata)[0] = ktdn;
631 ((char **)fmtdata)[1] = &tidstr[0];
632 thread_creation_callback(&tdcr, evtr);
634 tdn = thread_map_find(&evtr->threads, ktdn);
635 assert(tdn != NULL);
636 printd("switching to unknown thread %p\n", ktdn);
637 cpu->td = tdn;
638 return;
640 printd("cpu %d: switching to thread %p\n", ev->cpu, ktdn);
641 cpu->td = tdn;
644 static
645 void
646 assert_foff_in_sync(evtr_t evtr)
648 off_t off;
651 * We keep our own offset because we
652 * might want to support mmap()
654 off = ftello(evtr->f);
655 if (evtr->bytes != off) {
656 fprintf(stderr, "bytes %jd, off %jd\n", evtr->bytes, off);
657 abort();
661 static
663 evtr_write(evtr_t evtr, const void *buf, size_t bytes)
665 assert_foff_in_sync(evtr);
666 if (fwrite(buf, bytes, 1, evtr->f) != 1) {
667 evtr->err = errno;
668 evtr->errmsg = strerror(errno);
669 return !0;
671 evtr->bytes += bytes;
672 assert_foff_in_sync(evtr);
673 return 0;
677 * Called after dumping a record to make sure the next
678 * record is REC_ALIGN aligned. This does not make much sense,
679 * as we shouldn't be using packed structs anyway.
681 static
683 evtr_dump_pad(evtr_t evtr)
685 size_t pad;
686 static char buf[REC_ALIGN];
688 pad = REC_ALIGN - (evtr->bytes % REC_ALIGN);
689 if (pad > 0) {
690 return evtr_write(evtr, buf, pad);
692 return 0;
696 * We make sure that there is a new record every REC_BOUNDARY
697 * bytes, this costs next to nothing in space and allows for
698 * fast seeking.
700 static
702 evtr_dump_avoid_boundary(evtr_t evtr, size_t bytes)
704 unsigned pad, i;
705 static char buf[256];
707 pad = REC_BOUNDARY - (evtr->bytes % REC_BOUNDARY);
708 /* if adding @bytes would cause us to cross a boundary... */
709 if (bytes > pad) {
710 /* then pad to the boundary */
711 for (i = 0; i < (pad / sizeof(buf)); ++i) {
712 if (evtr_write(evtr, buf, sizeof(buf))) {
713 return !0;
716 i = pad % sizeof(buf);
717 if (i) {
718 if (evtr_write(evtr, buf, i)) {
719 return !0;
723 return 0;
726 static
728 evtr_dump_fmt(evtr_t evtr, uint64_t ts, const evtr_event_t ev)
730 struct fmt_event_header fmt;
731 struct hashentry *ent;
732 char *subsys = "", buf[1024];
734 if (strlcpy(buf, subsys, sizeof(buf)) >= sizeof(buf)) {
735 evtr->errmsg = "name of subsystem is too large";
736 evtr->err = ERANGE;
737 return 0;
739 if (strlcat(buf, ev->fmt, sizeof(buf)) >= sizeof(buf)) {
740 evtr->errmsg = "fmt + name of subsystem is too large";
741 evtr->err = ERANGE;
742 return 0;
745 if ((ent = hash_find(evtr->fmts, buf))) {
746 return ent->id;
748 if (!(ent = hash_insert(evtr->fmts, buf))) {
749 evtr->err = evtr->fmts->id ? ENOMEM : ERANGE;
750 return 0;
753 fmt.eh.type = EVTR_TYPE_FMT;
754 fmt.eh.ts = ts;
755 fmt.subsys_len = strlen(subsys);
756 fmt.fmt_len = strlen(ev->fmt);
757 fmt.id = ent->id;
758 if (evtr_dump_avoid_boundary(evtr, sizeof(fmt) + fmt.subsys_len +
759 fmt.fmt_len))
760 return 0;
761 if (evtr_write(evtr, &fmt, sizeof(fmt)))
762 return 0;
763 if (evtr_write(evtr, subsys, fmt.subsys_len))
764 return 0;
765 if (evtr_write(evtr, ev->fmt, fmt.fmt_len))
766 return 0;
767 if (evtr_dump_pad(evtr))
768 return 0;
769 return fmt.id;
773 * Replace string pointers or string ids in fmtdata
775 static
777 mangle_string_ptrs(const char *fmt, uint8_t *fmtdata,
778 const char *(*replace)(void *, const char *), void *ctx)
780 const char *f, *p;
781 size_t skipsize, intsz;
782 int ret = 0;
784 for (f = fmt; f[0] != '\0'; ++f) {
785 if (f[0] != '%')
786 continue;
787 ++f;
788 skipsize = 0;
789 for (p = f; p[0]; ++p) {
790 int again = 0;
792 * Eat flags. Notice this will accept duplicate
793 * flags.
795 switch (p[0]) {
796 case '#':
797 case '0':
798 case '-':
799 case ' ':
800 case '+':
801 case '\'':
802 again = !0;
803 break;
805 if (!again)
806 break;
808 /* Eat minimum field width, if any */
809 for (; isdigit(p[0]); ++p)
811 if (p[0] == '.')
812 ++p;
813 /* Eat precision, if any */
814 for (; isdigit(p[0]); ++p)
816 intsz = 0;
817 switch (p[0]) {
818 case 'l':
819 if (p[1] == 'l') {
820 ++p;
821 intsz = sizeof(long long);
822 } else {
823 intsz = sizeof(long);
825 break;
826 case 'j':
827 intsz = sizeof(intmax_t);
828 break;
829 case 't':
830 intsz = sizeof(ptrdiff_t);
831 break;
832 case 'z':
833 intsz = sizeof(size_t);
834 break;
835 default:
836 break;
838 if (intsz != 0)
839 ++p;
840 else
841 intsz = sizeof(int);
843 switch (p[0]) {
844 case 'd':
845 case 'i':
846 case 'o':
847 case 'u':
848 case 'x':
849 case 'X':
850 case 'c':
851 skipsize = intsz;
852 break;
853 case 'p':
854 skipsize = sizeof(void *);
855 break;
856 case 'f':
857 if (p[-1] == 'l')
858 skipsize = sizeof(double);
859 else
860 skipsize = sizeof(float);
861 break;
862 case 's':
863 ((const char **)fmtdata)[0] =
864 replace(ctx, ((char **)fmtdata)[0]);
865 skipsize = sizeof(char *);
866 ++ret;
867 break;
868 default:
869 fprintf(stderr, "Unknown conversion specifier %c "
870 "in fmt starting with %s", p[0], f - 1);
871 return -1;
873 fmtdata += skipsize;
875 return ret;
878 /* XXX: do we really want the timestamp? */
879 static
881 evtr_dump_string(evtr_t evtr, uint64_t ts, const char *str, int ns)
883 struct string_event_header s;
884 struct hashentry *ent;
886 assert((0 <= ns) && (ns < EVTR_NS_MAX));
887 if ((ent = hash_find(evtr->strings[ns], str))) {
888 return ent->id;
890 if (!(ent = hash_insert(evtr->strings[ns], str))) {
891 evtr->err = evtr->strings[ns]->id ? ENOMEM : ERANGE;
892 return 0;
895 printd("hash_insert %s ns %d id %d\n", str, ns, ent->id);
896 s.eh.type = EVTR_TYPE_STR;
897 s.eh.ts = ts;
898 s.ns = ns;
899 s.id = ent->id;
900 s.len = strnlen(str, PATH_MAX);
902 if (evtr_dump_avoid_boundary(evtr, sizeof(s) + s.len))
903 return 0;
904 if (evtr_write(evtr, &s, sizeof(s)))
905 return 0;
906 if (evtr_write(evtr, str, s.len))
907 return 0;
908 if (evtr_dump_pad(evtr))
909 return 0;
910 return s.id;
913 struct replace_ctx {
914 evtr_t evtr;
915 uint64_t ts;
918 static
919 const char *
920 replace_strptr(void *_ctx, const char *s)
922 struct replace_ctx *ctx = _ctx;
923 return (const char *)evtr_dump_string(ctx->evtr, ctx->ts, s, EVTR_NS_DSTR);
926 static
927 const char *
928 replace_strid(void *_ctx, const char *s)
930 struct replace_ctx *ctx = _ctx;
931 const char *ret;
933 ret = string_map_find(&ctx->evtr->maps[EVTR_NS_DSTR - 1].root,
934 (uint32_t)s);
935 if (!ret) {
936 fprintf(stderr, "Unknown id for data string\n");
937 ctx->evtr->errmsg = "unknown id for data string";
938 ctx->evtr->err = !0;
940 validate_string(ret);
941 printd("replacing strid %d (ns %d) with string '%s' (or int %#x)\n", (int)s,
942 EVTR_NS_DSTR, ret ? ret : "NULL", (int)ret);
943 return ret;
946 static
948 evtr_dump_probe(evtr_t evtr, evtr_event_t ev)
950 struct probe_event_header kev;
951 char buf[1024];
953 memset(&kev, '\0', sizeof(kev));
954 kev.eh.type = ev->type;
955 kev.eh.ts = ev->ts;
956 kev.line = ev->line;
957 kev.cpu = ev->cpu;
958 if (ev->file) {
959 kev.file = evtr_dump_string(evtr, kev.eh.ts, ev->file,
960 EVTR_NS_PATH);
962 if (ev->func) {
963 kev.func = evtr_dump_string(evtr, kev.eh.ts, ev->func,
964 EVTR_NS_FUNC);
966 if (ev->fmt) {
967 kev.fmt = evtr_dump_fmt(evtr, kev.eh.ts, ev);
969 if (ev->fmtdata) {
970 struct replace_ctx replctx = {
971 .evtr = evtr,
972 .ts = ev->ts,
974 assert(ev->fmtdatalen <= sizeof(buf));
975 kev.datalen = ev->fmtdatalen;
977 * Replace all string pointers with string ids before dumping
978 * the data.
980 memcpy(buf, ev->fmtdata, ev->fmtdatalen);
981 if (mangle_string_ptrs(ev->fmt, buf,
982 replace_strptr, &replctx) < 0)
983 return !0;
984 if (evtr->err)
985 return evtr->err;
987 if (evtr_dump_avoid_boundary(evtr, sizeof(kev) + ev->fmtdatalen))
988 return !0;
989 if (evtr_write(evtr, &kev, sizeof(kev)))
990 return !0;
991 if (evtr_write(evtr, buf, ev->fmtdatalen))
992 return !0;
993 if (evtr_dump_pad(evtr))
994 return !0;
995 return 0;
998 static
1000 evtr_dump_cpuinfo(evtr_t evtr, evtr_event_t ev)
1002 uint8_t type = EVTR_TYPE_CPUINFO;
1003 uint16_t ncpus = ev->ncpus;
1005 if (ncpus <= 0) {
1006 evtr->errmsg = "invalid number of cpus";
1007 return !0;
1009 if (evtr_dump_avoid_boundary(evtr, sizeof(type) + sizeof(ncpus)))
1010 return !0;
1011 if (evtr_write(evtr, &type, sizeof(type))) {
1012 return !0;
1014 if (evtr_write(evtr, &ncpus, sizeof(ncpus))) {
1015 return !0;
1017 if (evtr_dump_pad(evtr))
1018 return !0;
1019 return 0;
1023 evtr_rewind(evtr_t evtr)
1025 assert((evtr->flags & EVTRF_WR) == 0);
1026 evtr->bytes = 0;
1027 if (fseek(evtr->f, 0, SEEK_SET)) {
1028 evtr->err = errno;
1029 return !0;
1031 return 0;
1035 evtr_dump_event(evtr_t evtr, evtr_event_t ev)
1037 switch (ev->type) {
1038 case EVTR_TYPE_PROBE:
1039 return evtr_dump_probe(evtr, ev);
1040 case EVTR_TYPE_CPUINFO:
1041 return evtr_dump_cpuinfo(evtr, ev);
1043 evtr->errmsg = "unknown event type";
1044 return !0;
1047 static
1048 evtr_t
1049 evtr_alloc(FILE *f)
1051 evtr_t evtr;
1052 if (!(evtr = malloc(sizeof(*evtr)))) {
1053 return NULL;
1056 evtr->f = f;
1057 evtr->err = 0;
1058 evtr->errmsg = NULL;
1059 evtr->bytes = 0;
1060 TAILQ_INIT(&evtr->unresolved_filtq);
1061 return evtr;
1064 evtr_t
1065 evtr_open_read(FILE *f)
1067 evtr_t evtr;
1068 struct evtr_event ev;
1069 int i;
1071 if (!(evtr = evtr_alloc(f))) {
1072 return NULL;
1074 evtr->flags = 0;
1075 for (i = 0; i < (EVTR_NS_MAX - 1); ++i) {
1076 RB_INIT(&evtr->maps[i].root);
1078 RB_INIT(&evtr->fmtmap.root);
1079 TAILQ_INIT(&evtr->unresolved_filtq);
1080 evtr->cbs = 0;
1081 evtr->ncbs = 0;
1082 RB_INIT(&evtr->threads.root);
1083 evtr->cpus = NULL;
1084 evtr->ncpus = 0;
1085 if (evtr_register_callback(evtr, &thread_creation_callback, evtr)) {
1086 goto free_evtr;
1088 if (evtr_register_callback(evtr, &thread_switch_callback, evtr)) {
1089 goto free_cbs;
1092 * Load the first event so we can pick up any
1093 * cpuinfo entries.
1095 if (evtr_next_event(evtr, &ev)) {
1096 goto free_cbs;
1098 if (evtr_rewind(evtr))
1099 goto free_cbs;
1100 return evtr;
1101 free_cbs:
1102 evtr_deregister_callbacks(evtr);
1103 free_evtr:
1104 free(evtr);
1105 return NULL;
1108 evtr_t
1109 evtr_open_write(FILE *f)
1111 evtr_t evtr;
1112 int i, j;
1114 if (!(evtr = evtr_alloc(f))) {
1115 return NULL;
1118 evtr->flags = EVTRF_WR;
1119 if (!(evtr->fmts = calloc(sizeof(struct hashtab), 1)))
1120 goto free_evtr;
1122 for (i = 0; i < EVTR_NS_MAX; ++i) {
1123 evtr->strings[i] = calloc(sizeof(struct hashtab), 1);
1124 if (!evtr->strings[i]) {
1125 for (j = 0; j < i; ++j) {
1126 free(evtr->strings[j]);
1128 goto free_fmts;
1132 return evtr;
1133 free_fmts:
1134 free(evtr->fmts);
1135 free_evtr:
1136 free(evtr);
1137 return NULL;
1140 static
1141 void
1142 hashtab_destroy(struct hashtab *h)
1144 struct hashentry *ent, *next;
1145 int i;
1146 for (i = 0; i < NR_BUCKETS; ++i) {
1147 for (ent = h->buckets[i]; ent; ent = next) {
1148 next = ent->next;
1149 free(ent);
1152 free(h);
1155 void
1156 evtr_close(evtr_t evtr)
1158 int i;
1160 if (evtr->flags & EVTRF_WR) {
1161 hashtab_destroy(evtr->fmts);
1162 for (i = 0; i < EVTR_NS_MAX; ++i)
1163 hashtab_destroy(evtr->strings[i]);
1164 } else {
1165 id_tree_free(&evtr->fmtmap.root);
1166 for (i = 0; i < EVTR_NS_MAX - 1; ++i) {
1167 id_tree_free(&evtr->maps[i].root);
1170 free(evtr);
1173 static
1175 evtr_read(evtr_t evtr, void *buf, size_t size)
1177 assert(size > 0);
1178 assert_foff_in_sync(evtr);
1179 // printd("evtr_read at %#jx, %zd bytes\n", evtr->bytes, size);
1180 if (fread(buf, size, 1, evtr->f) != 1) {
1181 if (feof(evtr->f)) {
1182 evtr->errmsg = "incomplete record";
1183 } else {
1184 evtr->errmsg = strerror(errno);
1186 return !0;
1188 evtr->bytes += size;
1189 assert_foff_in_sync(evtr);
1190 return 0;
1193 static
1195 evtr_load_fmt(evtr_t evtr, char *buf)
1197 struct fmt_event_header *evh = (struct fmt_event_header *)buf;
1198 struct event_fmt *fmt;
1199 char *subsys = NULL, *fmtstr;
1201 if (!(fmt = malloc(sizeof(*fmt)))) {
1202 evtr->err = errno;
1203 return !0;
1205 if (evtr_read(evtr, buf + sizeof(struct trace_event_header),
1206 sizeof(*evh) - sizeof(evh->eh))) {
1207 goto free_fmt;
1209 assert(!evh->subsys_len);
1210 if (evh->subsys_len) {
1211 if (!(subsys = malloc(evh->subsys_len))) {
1212 evtr->err = errno;
1213 goto free_fmt;
1215 if (evtr_read(evtr, subsys, evh->subsys_len)) {
1216 goto free_subsys;
1218 fmt->subsys = subsys;
1219 } else {
1220 fmt->subsys = "";
1222 if (!(fmtstr = malloc(evh->fmt_len + 1))) {
1223 evtr->err = errno;
1224 goto free_subsys;
1226 if (evtr_read(evtr, fmtstr, evh->fmt_len)) {
1227 goto free_fmtstr;
1229 fmtstr[evh->fmt_len] = '\0';
1230 fmt->fmt = fmtstr;
1232 printd("fmt_map_insert (%d, %s)\n", evh->id, fmt->fmt);
1233 evtr->err = fmt_map_insert(&evtr->fmtmap.root, fmt, evh->id);
1234 switch (evtr->err) {
1235 case ENOMEM:
1236 evtr->errmsg = "out of memory";
1237 break;
1238 case EEXIST:
1239 evtr->errmsg = "redefinition of an id to a "
1240 "different format (corrupt input)";
1241 break;
1242 default:
1243 evtr_resolve_filters(evtr, fmt->fmt, evh->id);
1245 return 0;
1247 free_fmtstr:
1248 free(fmtstr);
1249 free_subsys:
1250 if (subsys)
1251 free(subsys);
1252 free_fmt:
1253 free(fmt);
1254 return !0;
1257 static
1259 evtr_load_string(evtr_t evtr, char *buf)
1261 char sbuf[PATH_MAX + 1];
1262 struct string_event_header *evh = (struct string_event_header *)buf;
1264 if (evtr_read(evtr, buf + sizeof(struct trace_event_header),
1265 sizeof(*evh) - sizeof(evh->eh))) {
1266 return !0;
1268 if (evh->len > PATH_MAX) {
1269 evtr->errmsg = "string too large (corrupt input)";
1270 return !0;
1271 } else if (evh->len < 0) {
1272 evtr->errmsg = "negative string size (corrupt input)";
1273 return !0;
1275 if (evh->len && evtr_read(evtr, sbuf, evh->len)) {
1276 return !0;
1278 sbuf[evh->len] = 0;
1279 if (evh->ns >= EVTR_NS_MAX) {
1280 evtr->errmsg = "invalid namespace (corrupt input)";
1281 return !0;
1283 validate_string(sbuf);
1284 printd("evtr_load_string:ns %d id %d : \"%s\"\n", evh->ns, evh->id,
1285 sbuf);
1286 evtr->err = string_map_insert(&evtr->maps[evh->ns - 1].root, sbuf, evh->id);
1287 switch (evtr->err) {
1288 case ENOMEM:
1289 evtr->errmsg = "out of memory";
1290 break;
1291 case EEXIST:
1292 evtr->errmsg = "redefinition of an id to a "
1293 "different string (corrupt input)";
1294 break;
1295 default:
1298 return 0;
1301 static
1303 evtr_filter_match(evtr_filter_t f, struct probe_event_header *pev)
1305 if ((f->cpu != -1) && (f->cpu != pev->cpu))
1306 return 0;
1307 if (!f->fmtid)
1308 return !0;
1310 * If we don't have an id for the required format
1311 * string, the format string won't match anyway
1312 * (we require that id <-> fmt mappings appear
1313 * before the first appearance of the fmt string),
1314 * so don't bother comparing.
1316 if (!(f->flags & FILTF_ID))
1317 return 0;
1318 if(pev->fmt == f->fmtid)
1319 return !0;
1320 return 0;
1323 static
1325 evtr_match_filters(struct evtr_query *q, struct probe_event_header *pev)
1327 int i;
1329 /* no filters means we're interested in all events */
1330 if (!q->nfilt)
1331 return !0;
1332 ++q->ntried;
1333 for (i = 0; i < q->nfilt; ++i) {
1334 if (evtr_filter_match(&q->filt[i], pev)) {
1335 ++q->nmatched;
1336 return !0;
1339 return 0;
1342 static
1344 evtr_skip(evtr_t evtr, off_t bytes)
1346 if (fseek(evtr->f, bytes, SEEK_CUR)) {
1347 evtr->err = errno;
1348 evtr->errmsg = strerror(errno);
1349 return !0;
1351 evtr->bytes += bytes;
1352 return 0;
1356 * Make sure q->buf is at least len bytes
1358 static
1360 evtr_query_reserve_buf(struct evtr_query *q, int len)
1362 void *tmp;
1364 if (q->bufsize >= len)
1365 return 0;
1366 if (!(tmp = realloc(q->buf, len)))
1367 return !0;
1368 q->buf = tmp;
1369 q->bufsize = len;
1370 return 0;
1373 static
1375 evtr_load_probe(evtr_t evtr, evtr_event_t ev, char *buf, struct evtr_query *q)
1377 struct probe_event_header *evh = (struct probe_event_header *)buf;
1378 struct cpu *cpu;
1380 if (evtr_read(evtr, buf + sizeof(struct trace_event_header),
1381 sizeof(*evh) - sizeof(evh->eh)))
1382 return !0;
1383 memset(ev, '\0', sizeof(*ev));
1384 ev->ts = evh->eh.ts;
1385 ev->type = EVTR_TYPE_PROBE;
1386 ev->line = evh->line;
1387 ev->cpu = evh->cpu;
1388 if ((cpu = evtr_cpu(evtr, evh->cpu))) {
1389 ev->td = cpu->td;
1390 } else {
1391 ev->td = NULL;
1393 if (evh->file) {
1394 ev->file = string_map_find(
1395 &evtr->maps[EVTR_NS_PATH - 1].root,
1396 evh->file);
1397 if (!ev->file) {
1398 evtr->errmsg = "unknown id for file path";
1399 evtr->err = !0;
1400 ev->file = "<unknown>";
1401 } else {
1402 validate_string(ev->file);
1404 } else {
1405 ev->file = "<unknown>";
1407 if (evh->fmt) {
1408 const struct event_fmt *fmt;
1409 if (!(fmt = fmt_map_find(&evtr->fmtmap.root, evh->fmt))) {
1410 evtr->errmsg = "unknown id for event fmt";
1411 evtr->err = !0;
1412 ev->fmt = NULL;
1413 } else {
1414 ev->fmt = fmt->fmt;
1415 validate_string(fmt->fmt);
1418 if (evh->datalen) {
1419 if (evtr_query_reserve_buf(q, evh->datalen + 1)) {
1420 evtr->err = ENOMEM;
1421 } else if (!evtr_read(evtr, q->buf, evh->datalen)) {
1422 struct replace_ctx replctx = {
1423 .evtr = evtr,
1424 .ts = ev->ts,
1426 assert(ev->fmt);
1428 ev->fmtdata = q->buf;
1430 * If the format specifies any string pointers, there
1431 * is a string id stored in the fmtdata. Look it up
1432 * and replace it with a string pointer before
1433 * returning it to the user.
1435 if (mangle_string_ptrs(ev->fmt, __DECONST(uint8_t *,
1436 ev->fmtdata),
1437 replace_strid, &replctx) < 0)
1438 return evtr->err;
1439 if (evtr->err)
1440 return evtr->err;
1441 ((char *)ev->fmtdata)[evh->datalen] = '\0';
1442 ev->fmtdatalen = evh->datalen;
1445 evtr_run_callbacks(ev, evtr);
1446 /* we can't filter before running the callbacks */
1447 if (!evtr_match_filters(q, evh)) {
1448 return -1; /* no match */
1451 return evtr->err;
1454 static
1456 evtr_skip_to_record(evtr_t evtr)
1458 int skip;
1460 skip = REC_ALIGN - (evtr->bytes % REC_ALIGN);
1461 if (skip > 0) {
1462 if (fseek(evtr->f, skip, SEEK_CUR)) {
1463 evtr->err = errno;
1464 evtr->errmsg = strerror(errno);
1465 return !0;
1467 evtr->bytes += skip;
1469 return 0;
1472 static
1474 evtr_load_cpuinfo(evtr_t evtr)
1476 uint16_t ncpus;
1477 int i;
1479 if (evtr_read(evtr, &ncpus, sizeof(ncpus))) {
1480 return !0;
1482 if (evtr->cpus)
1483 return 0;
1484 evtr->cpus = malloc(ncpus * sizeof(struct cpu));
1485 if (!evtr->cpus) {
1486 evtr->err = ENOMEM;
1487 return !0;
1489 evtr->ncpus = ncpus;
1490 for (i = 0; i < ncpus; ++i) {
1491 evtr->cpus[i].td = NULL;
1493 return 0;
1496 static
1498 _evtr_next_event(evtr_t evtr, evtr_event_t ev, struct evtr_query *q)
1500 char buf[MAX_EVHDR_SIZE];
1501 int ret, err, ntried, nmatched;
1502 struct trace_event_header *evhdr = (struct trace_event_header *)buf;
1504 for (ret = 0; !ret;) {
1506 * skip pad records -- this will only happen if there's a
1507 * variable sized record close to the boundary
1509 if (evtr_read(evtr, &evhdr->type, 1))
1510 return feof(evtr->f) ? -1 : !0;
1511 if (evhdr->type == EVTR_TYPE_PAD) {
1512 evtr_skip_to_record(evtr);
1513 continue;
1515 if (evhdr->type == EVTR_TYPE_CPUINFO) {
1516 evtr_load_cpuinfo(evtr);
1517 continue;
1519 if (evtr_read(evtr, buf + 1, sizeof(*evhdr) - 1))
1520 return feof(evtr->f) ? -1 : !0;
1521 switch (evhdr->type) {
1522 case EVTR_TYPE_PROBE:
1523 ntried = q->ntried;
1524 nmatched = q->nmatched;
1525 if ((err = evtr_load_probe(evtr, ev, buf, q))) {
1526 if (err == -1) {
1527 /* no match */
1528 ret = 0;
1529 } else {
1530 return !0;
1532 } else {
1533 ret = !0;
1535 break;
1536 case EVTR_TYPE_STR:
1537 if (evtr_load_string(evtr, buf)) {
1538 return !0;
1540 break;
1541 case EVTR_TYPE_FMT:
1542 if (evtr_load_fmt(evtr, buf)) {
1543 return !0;
1545 break;
1546 default:
1547 evtr->err = !0;
1548 evtr->errmsg = "unknown event type (corrupt input?)";
1549 return !0;
1551 evtr_skip_to_record(evtr);
1552 if (ret) {
1553 q->off = evtr->bytes;
1554 return 0;
1557 /* can't get here */
1558 return !0;
1562 evtr_next_event(evtr_t evtr, evtr_event_t ev)
1564 struct evtr_query *q;
1565 int ret;
1567 if (!(q = evtr_query_init(evtr, NULL, 0))) {
1568 evtr->err = ENOMEM;
1569 return !0;
1571 ret = _evtr_next_event(evtr, ev, q);
1572 evtr_query_destroy(q);
1573 return ret;
1577 evtr_last_event(evtr_t evtr, evtr_event_t ev)
1579 struct stat st;
1580 int fd;
1581 off_t last_boundary;
1583 fd = fileno(evtr->f);
1584 if (fstat(fd, &st))
1585 return !0;
1587 * This skips pseudo records, so we can't provide
1588 * an event with all fields filled in this way.
1589 * It's doable, just needs some care. TBD.
1591 if (0 && (st.st_mode & S_IFREG)) {
1593 * Skip to last boundary, that's the closest to the EOF
1594 * location that we are sure contains a header so we can
1595 * pick up the stream.
1597 last_boundary = (st.st_size / REC_BOUNDARY) * REC_BOUNDARY;
1598 /* XXX: ->bytes should be in query */
1599 assert(evtr->bytes == 0);
1600 evtr_skip(evtr, last_boundary);
1605 * If we can't seek, we need to go through the whole file.
1606 * Since you can't seek back, this is pretty useless unless
1607 * you really are interested only in the last event.
1609 while (!evtr_next_event(evtr, ev))
1611 if (evtr_error(evtr))
1612 return !0;
1613 evtr_rewind(evtr);
1614 return 0;
1617 struct evtr_query *
1618 evtr_query_init(evtr_t evtr, evtr_filter_t filt, int nfilt)
1620 struct evtr_query *q;
1621 int i;
1623 if (!(q = malloc(sizeof(*q)))) {
1624 return q;
1626 q->bufsize = 2;
1627 if (!(q->buf = malloc(q->bufsize))) {
1628 goto free_q;
1630 q->evtr = evtr;
1631 q->off = 0;
1632 q->filt = filt;
1633 q->nfilt = nfilt;
1634 q->nmatched = 0;
1635 for (i = 0; i < nfilt; ++i) {
1636 filt[i].flags = 0;
1637 if (filt[i].fmt == NULL)
1638 continue;
1639 if (evtr_filter_register(evtr, &filt[i])) {
1640 evtr_deregister_filters(evtr, filt, i);
1641 goto free_buf;
1645 return q;
1646 free_buf:
1647 free(q->buf);
1648 free_q:
1649 free(q);
1650 return NULL;
1653 void
1654 evtr_query_destroy(struct evtr_query *q)
1656 evtr_deregister_filters(q->evtr, q->filt, q->nfilt);
1657 free(q->buf);
1658 free(q);
1662 evtr_query_next(struct evtr_query *q, evtr_event_t ev)
1664 /* we may support that in the future */
1665 if (q->off != q->evtr->bytes)
1666 return !0;
1667 return _evtr_next_event(q->evtr, ev, q);
1671 evtr_ncpus(evtr_t evtr)
1673 return evtr->ncpus;