staging:vt6655:ttype: Whitespace cleanups
[linux-2.6.git] / kernel / trace / trace_syscalls.c
blob7a809e3210583a77f5eb8e8a64ce1cb6552e7f6f
1 #include <trace/syscall.h>
2 #include <trace/events/syscalls.h>
3 #include <linux/syscalls.h>
4 #include <linux/slab.h>
5 #include <linux/kernel.h>
6 #include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
7 #include <linux/ftrace.h>
8 #include <linux/perf_event.h>
9 #include <asm/syscall.h>
11 #include "trace_output.h"
12 #include "trace.h"
14 static DEFINE_MUTEX(syscall_trace_lock);
15 static int sys_refcount_enter;
16 static int sys_refcount_exit;
17 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
18 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
20 static int syscall_enter_register(struct ftrace_event_call *event,
21 enum trace_reg type, void *data);
22 static int syscall_exit_register(struct ftrace_event_call *event,
23 enum trace_reg type, void *data);
25 static struct list_head *
26 syscall_get_enter_fields(struct ftrace_event_call *call)
28 struct syscall_metadata *entry = call->data;
30 return &entry->enter_fields;
33 extern struct syscall_metadata *__start_syscalls_metadata[];
34 extern struct syscall_metadata *__stop_syscalls_metadata[];
36 static struct syscall_metadata **syscalls_metadata;
38 #ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
39 static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
42 * Only compare after the "sys" prefix. Archs that use
43 * syscall wrappers may have syscalls symbols aliases prefixed
44 * with "SyS" instead of "sys", leading to an unwanted
45 * mismatch.
47 return !strcmp(sym + 3, name + 3);
49 #endif
51 #ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
53 * Some architectures that allow for 32bit applications
54 * to run on a 64bit kernel, do not map the syscalls for
55 * the 32bit tasks the same as they do for 64bit tasks.
57 * *cough*x86*cough*
59 * In such a case, instead of reporting the wrong syscalls,
60 * simply ignore them.
62 * For an arch to ignore the compat syscalls it needs to
63 * define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as
64 * define the function arch_trace_is_compat_syscall() to let
65 * the tracing system know that it should ignore it.
67 static int
68 trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
70 if (unlikely(arch_trace_is_compat_syscall(regs)))
71 return -1;
73 return syscall_get_nr(task, regs);
75 #else
76 static inline int
77 trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
79 return syscall_get_nr(task, regs);
81 #endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */
83 static __init struct syscall_metadata *
84 find_syscall_meta(unsigned long syscall)
86 struct syscall_metadata **start;
87 struct syscall_metadata **stop;
88 char str[KSYM_SYMBOL_LEN];
91 start = __start_syscalls_metadata;
92 stop = __stop_syscalls_metadata;
93 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
95 if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
96 return NULL;
98 for ( ; start < stop; start++) {
99 if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
100 return *start;
102 return NULL;
105 static struct syscall_metadata *syscall_nr_to_meta(int nr)
107 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
108 return NULL;
110 return syscalls_metadata[nr];
113 static enum print_line_t
114 print_syscall_enter(struct trace_iterator *iter, int flags,
115 struct trace_event *event)
117 struct trace_seq *s = &iter->seq;
118 struct trace_entry *ent = iter->ent;
119 struct syscall_trace_enter *trace;
120 struct syscall_metadata *entry;
121 int i, ret, syscall;
123 trace = (typeof(trace))ent;
124 syscall = trace->nr;
125 entry = syscall_nr_to_meta(syscall);
127 if (!entry)
128 goto end;
130 if (entry->enter_event->event.type != ent->type) {
131 WARN_ON_ONCE(1);
132 goto end;
135 ret = trace_seq_printf(s, "%s(", entry->name);
136 if (!ret)
137 return TRACE_TYPE_PARTIAL_LINE;
139 for (i = 0; i < entry->nb_args; i++) {
140 /* parameter types */
141 if (trace_flags & TRACE_ITER_VERBOSE) {
142 ret = trace_seq_printf(s, "%s ", entry->types[i]);
143 if (!ret)
144 return TRACE_TYPE_PARTIAL_LINE;
146 /* parameter values */
147 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
148 trace->args[i],
149 i == entry->nb_args - 1 ? "" : ", ");
150 if (!ret)
151 return TRACE_TYPE_PARTIAL_LINE;
154 ret = trace_seq_putc(s, ')');
155 if (!ret)
156 return TRACE_TYPE_PARTIAL_LINE;
158 end:
159 ret = trace_seq_putc(s, '\n');
160 if (!ret)
161 return TRACE_TYPE_PARTIAL_LINE;
163 return TRACE_TYPE_HANDLED;
166 static enum print_line_t
167 print_syscall_exit(struct trace_iterator *iter, int flags,
168 struct trace_event *event)
170 struct trace_seq *s = &iter->seq;
171 struct trace_entry *ent = iter->ent;
172 struct syscall_trace_exit *trace;
173 int syscall;
174 struct syscall_metadata *entry;
175 int ret;
177 trace = (typeof(trace))ent;
178 syscall = trace->nr;
179 entry = syscall_nr_to_meta(syscall);
181 if (!entry) {
182 trace_seq_printf(s, "\n");
183 return TRACE_TYPE_HANDLED;
186 if (entry->exit_event->event.type != ent->type) {
187 WARN_ON_ONCE(1);
188 return TRACE_TYPE_UNHANDLED;
191 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
192 trace->ret);
193 if (!ret)
194 return TRACE_TYPE_PARTIAL_LINE;
196 return TRACE_TYPE_HANDLED;
199 extern char *__bad_type_size(void);
201 #define SYSCALL_FIELD(type, name) \
202 sizeof(type) != sizeof(trace.name) ? \
203 __bad_type_size() : \
204 #type, #name, offsetof(typeof(trace), name), \
205 sizeof(trace.name), is_signed_type(type)
207 static
208 int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
210 int i;
211 int pos = 0;
213 /* When len=0, we just calculate the needed length */
214 #define LEN_OR_ZERO (len ? len - pos : 0)
216 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
217 for (i = 0; i < entry->nb_args; i++) {
218 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
219 entry->args[i], sizeof(unsigned long),
220 i == entry->nb_args - 1 ? "" : ", ");
222 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
224 for (i = 0; i < entry->nb_args; i++) {
225 pos += snprintf(buf + pos, LEN_OR_ZERO,
226 ", ((unsigned long)(REC->%s))", entry->args[i]);
229 #undef LEN_OR_ZERO
231 /* return the length of print_fmt */
232 return pos;
235 static int set_syscall_print_fmt(struct ftrace_event_call *call)
237 char *print_fmt;
238 int len;
239 struct syscall_metadata *entry = call->data;
241 if (entry->enter_event != call) {
242 call->print_fmt = "\"0x%lx\", REC->ret";
243 return 0;
246 /* First: called with 0 length to calculate the needed length */
247 len = __set_enter_print_fmt(entry, NULL, 0);
249 print_fmt = kmalloc(len + 1, GFP_KERNEL);
250 if (!print_fmt)
251 return -ENOMEM;
253 /* Second: actually write the @print_fmt */
254 __set_enter_print_fmt(entry, print_fmt, len + 1);
255 call->print_fmt = print_fmt;
257 return 0;
260 static void free_syscall_print_fmt(struct ftrace_event_call *call)
262 struct syscall_metadata *entry = call->data;
264 if (entry->enter_event == call)
265 kfree(call->print_fmt);
268 static int syscall_enter_define_fields(struct ftrace_event_call *call)
270 struct syscall_trace_enter trace;
271 struct syscall_metadata *meta = call->data;
272 int ret;
273 int i;
274 int offset = offsetof(typeof(trace), args);
276 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
277 if (ret)
278 return ret;
280 for (i = 0; i < meta->nb_args; i++) {
281 ret = trace_define_field(call, meta->types[i],
282 meta->args[i], offset,
283 sizeof(unsigned long), 0,
284 FILTER_OTHER);
285 offset += sizeof(unsigned long);
288 return ret;
291 static int syscall_exit_define_fields(struct ftrace_event_call *call)
293 struct syscall_trace_exit trace;
294 int ret;
296 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
297 if (ret)
298 return ret;
300 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
301 FILTER_OTHER);
303 return ret;
306 static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
308 struct syscall_trace_enter *entry;
309 struct syscall_metadata *sys_data;
310 struct ring_buffer_event *event;
311 struct ring_buffer *buffer;
312 int syscall_nr;
313 int size;
315 syscall_nr = trace_get_syscall_nr(current, regs);
316 if (syscall_nr < 0)
317 return;
318 if (!test_bit(syscall_nr, enabled_enter_syscalls))
319 return;
321 sys_data = syscall_nr_to_meta(syscall_nr);
322 if (!sys_data)
323 return;
325 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
327 event = trace_current_buffer_lock_reserve(&buffer,
328 sys_data->enter_event->event.type, size, 0, 0);
329 if (!event)
330 return;
332 entry = ring_buffer_event_data(event);
333 entry->nr = syscall_nr;
334 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
336 if (!filter_current_check_discard(buffer, sys_data->enter_event,
337 entry, event))
338 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
341 static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
343 struct syscall_trace_exit *entry;
344 struct syscall_metadata *sys_data;
345 struct ring_buffer_event *event;
346 struct ring_buffer *buffer;
347 int syscall_nr;
349 syscall_nr = trace_get_syscall_nr(current, regs);
350 if (syscall_nr < 0)
351 return;
352 if (!test_bit(syscall_nr, enabled_exit_syscalls))
353 return;
355 sys_data = syscall_nr_to_meta(syscall_nr);
356 if (!sys_data)
357 return;
359 event = trace_current_buffer_lock_reserve(&buffer,
360 sys_data->exit_event->event.type, sizeof(*entry), 0, 0);
361 if (!event)
362 return;
364 entry = ring_buffer_event_data(event);
365 entry->nr = syscall_nr;
366 entry->ret = syscall_get_return_value(current, regs);
368 if (!filter_current_check_discard(buffer, sys_data->exit_event,
369 entry, event))
370 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
373 static int reg_event_syscall_enter(struct ftrace_event_call *call)
375 int ret = 0;
376 int num;
378 num = ((struct syscall_metadata *)call->data)->syscall_nr;
379 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
380 return -ENOSYS;
381 mutex_lock(&syscall_trace_lock);
382 if (!sys_refcount_enter)
383 ret = register_trace_sys_enter(ftrace_syscall_enter, NULL);
384 if (!ret) {
385 set_bit(num, enabled_enter_syscalls);
386 sys_refcount_enter++;
388 mutex_unlock(&syscall_trace_lock);
389 return ret;
392 static void unreg_event_syscall_enter(struct ftrace_event_call *call)
394 int num;
396 num = ((struct syscall_metadata *)call->data)->syscall_nr;
397 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
398 return;
399 mutex_lock(&syscall_trace_lock);
400 sys_refcount_enter--;
401 clear_bit(num, enabled_enter_syscalls);
402 if (!sys_refcount_enter)
403 unregister_trace_sys_enter(ftrace_syscall_enter, NULL);
404 mutex_unlock(&syscall_trace_lock);
407 static int reg_event_syscall_exit(struct ftrace_event_call *call)
409 int ret = 0;
410 int num;
412 num = ((struct syscall_metadata *)call->data)->syscall_nr;
413 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
414 return -ENOSYS;
415 mutex_lock(&syscall_trace_lock);
416 if (!sys_refcount_exit)
417 ret = register_trace_sys_exit(ftrace_syscall_exit, NULL);
418 if (!ret) {
419 set_bit(num, enabled_exit_syscalls);
420 sys_refcount_exit++;
422 mutex_unlock(&syscall_trace_lock);
423 return ret;
426 static void unreg_event_syscall_exit(struct ftrace_event_call *call)
428 int num;
430 num = ((struct syscall_metadata *)call->data)->syscall_nr;
431 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
432 return;
433 mutex_lock(&syscall_trace_lock);
434 sys_refcount_exit--;
435 clear_bit(num, enabled_exit_syscalls);
436 if (!sys_refcount_exit)
437 unregister_trace_sys_exit(ftrace_syscall_exit, NULL);
438 mutex_unlock(&syscall_trace_lock);
441 static int init_syscall_trace(struct ftrace_event_call *call)
443 int id;
444 int num;
446 num = ((struct syscall_metadata *)call->data)->syscall_nr;
447 if (num < 0 || num >= NR_syscalls) {
448 pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
449 ((struct syscall_metadata *)call->data)->name);
450 return -ENOSYS;
453 if (set_syscall_print_fmt(call) < 0)
454 return -ENOMEM;
456 id = trace_event_raw_init(call);
458 if (id < 0) {
459 free_syscall_print_fmt(call);
460 return id;
463 return id;
466 struct trace_event_functions enter_syscall_print_funcs = {
467 .trace = print_syscall_enter,
470 struct trace_event_functions exit_syscall_print_funcs = {
471 .trace = print_syscall_exit,
474 struct ftrace_event_class event_class_syscall_enter = {
475 .system = "syscalls",
476 .reg = syscall_enter_register,
477 .define_fields = syscall_enter_define_fields,
478 .get_fields = syscall_get_enter_fields,
479 .raw_init = init_syscall_trace,
482 struct ftrace_event_class event_class_syscall_exit = {
483 .system = "syscalls",
484 .reg = syscall_exit_register,
485 .define_fields = syscall_exit_define_fields,
486 .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields),
487 .raw_init = init_syscall_trace,
490 unsigned long __init __weak arch_syscall_addr(int nr)
492 return (unsigned long)sys_call_table[nr];
495 static int __init init_ftrace_syscalls(void)
497 struct syscall_metadata *meta;
498 unsigned long addr;
499 int i;
501 syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata),
502 GFP_KERNEL);
503 if (!syscalls_metadata) {
504 WARN_ON(1);
505 return -ENOMEM;
508 for (i = 0; i < NR_syscalls; i++) {
509 addr = arch_syscall_addr(i);
510 meta = find_syscall_meta(addr);
511 if (!meta)
512 continue;
514 meta->syscall_nr = i;
515 syscalls_metadata[i] = meta;
518 return 0;
520 early_initcall(init_ftrace_syscalls);
522 #ifdef CONFIG_PERF_EVENTS
524 static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
525 static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
526 static int sys_perf_refcount_enter;
527 static int sys_perf_refcount_exit;
529 static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
531 struct syscall_metadata *sys_data;
532 struct syscall_trace_enter *rec;
533 struct hlist_head *head;
534 int syscall_nr;
535 int rctx;
536 int size;
538 syscall_nr = trace_get_syscall_nr(current, regs);
539 if (syscall_nr < 0)
540 return;
541 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
542 return;
544 sys_data = syscall_nr_to_meta(syscall_nr);
545 if (!sys_data)
546 return;
548 /* get the size after alignment with the u32 buffer size field */
549 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
550 size = ALIGN(size + sizeof(u32), sizeof(u64));
551 size -= sizeof(u32);
553 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
554 "perf buffer not large enough"))
555 return;
557 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
558 sys_data->enter_event->event.type, regs, &rctx);
559 if (!rec)
560 return;
562 rec->nr = syscall_nr;
563 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
564 (unsigned long *)&rec->args);
566 head = this_cpu_ptr(sys_data->enter_event->perf_events);
567 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
570 static int perf_sysenter_enable(struct ftrace_event_call *call)
572 int ret = 0;
573 int num;
575 num = ((struct syscall_metadata *)call->data)->syscall_nr;
577 mutex_lock(&syscall_trace_lock);
578 if (!sys_perf_refcount_enter)
579 ret = register_trace_sys_enter(perf_syscall_enter, NULL);
580 if (ret) {
581 pr_info("event trace: Could not activate"
582 "syscall entry trace point");
583 } else {
584 set_bit(num, enabled_perf_enter_syscalls);
585 sys_perf_refcount_enter++;
587 mutex_unlock(&syscall_trace_lock);
588 return ret;
591 static void perf_sysenter_disable(struct ftrace_event_call *call)
593 int num;
595 num = ((struct syscall_metadata *)call->data)->syscall_nr;
597 mutex_lock(&syscall_trace_lock);
598 sys_perf_refcount_enter--;
599 clear_bit(num, enabled_perf_enter_syscalls);
600 if (!sys_perf_refcount_enter)
601 unregister_trace_sys_enter(perf_syscall_enter, NULL);
602 mutex_unlock(&syscall_trace_lock);
605 static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
607 struct syscall_metadata *sys_data;
608 struct syscall_trace_exit *rec;
609 struct hlist_head *head;
610 int syscall_nr;
611 int rctx;
612 int size;
614 syscall_nr = trace_get_syscall_nr(current, regs);
615 if (syscall_nr < 0)
616 return;
617 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
618 return;
620 sys_data = syscall_nr_to_meta(syscall_nr);
621 if (!sys_data)
622 return;
624 /* We can probably do that at build time */
625 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
626 size -= sizeof(u32);
629 * Impossible, but be paranoid with the future
630 * How to put this check outside runtime?
632 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
633 "exit event has grown above perf buffer size"))
634 return;
636 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
637 sys_data->exit_event->event.type, regs, &rctx);
638 if (!rec)
639 return;
641 rec->nr = syscall_nr;
642 rec->ret = syscall_get_return_value(current, regs);
644 head = this_cpu_ptr(sys_data->exit_event->perf_events);
645 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
648 static int perf_sysexit_enable(struct ftrace_event_call *call)
650 int ret = 0;
651 int num;
653 num = ((struct syscall_metadata *)call->data)->syscall_nr;
655 mutex_lock(&syscall_trace_lock);
656 if (!sys_perf_refcount_exit)
657 ret = register_trace_sys_exit(perf_syscall_exit, NULL);
658 if (ret) {
659 pr_info("event trace: Could not activate"
660 "syscall exit trace point");
661 } else {
662 set_bit(num, enabled_perf_exit_syscalls);
663 sys_perf_refcount_exit++;
665 mutex_unlock(&syscall_trace_lock);
666 return ret;
669 static void perf_sysexit_disable(struct ftrace_event_call *call)
671 int num;
673 num = ((struct syscall_metadata *)call->data)->syscall_nr;
675 mutex_lock(&syscall_trace_lock);
676 sys_perf_refcount_exit--;
677 clear_bit(num, enabled_perf_exit_syscalls);
678 if (!sys_perf_refcount_exit)
679 unregister_trace_sys_exit(perf_syscall_exit, NULL);
680 mutex_unlock(&syscall_trace_lock);
683 #endif /* CONFIG_PERF_EVENTS */
685 static int syscall_enter_register(struct ftrace_event_call *event,
686 enum trace_reg type, void *data)
688 switch (type) {
689 case TRACE_REG_REGISTER:
690 return reg_event_syscall_enter(event);
691 case TRACE_REG_UNREGISTER:
692 unreg_event_syscall_enter(event);
693 return 0;
695 #ifdef CONFIG_PERF_EVENTS
696 case TRACE_REG_PERF_REGISTER:
697 return perf_sysenter_enable(event);
698 case TRACE_REG_PERF_UNREGISTER:
699 perf_sysenter_disable(event);
700 return 0;
701 case TRACE_REG_PERF_OPEN:
702 case TRACE_REG_PERF_CLOSE:
703 case TRACE_REG_PERF_ADD:
704 case TRACE_REG_PERF_DEL:
705 return 0;
706 #endif
708 return 0;
711 static int syscall_exit_register(struct ftrace_event_call *event,
712 enum trace_reg type, void *data)
714 switch (type) {
715 case TRACE_REG_REGISTER:
716 return reg_event_syscall_exit(event);
717 case TRACE_REG_UNREGISTER:
718 unreg_event_syscall_exit(event);
719 return 0;
721 #ifdef CONFIG_PERF_EVENTS
722 case TRACE_REG_PERF_REGISTER:
723 return perf_sysexit_enable(event);
724 case TRACE_REG_PERF_UNREGISTER:
725 perf_sysexit_disable(event);
726 return 0;
727 case TRACE_REG_PERF_OPEN:
728 case TRACE_REG_PERF_CLOSE:
729 case TRACE_REG_PERF_ADD:
730 case TRACE_REG_PERF_DEL:
731 return 0;
732 #endif
734 return 0;