Merge tag 'staging-3.11-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh...
[linux-2.6.git] / kernel / trace / trace_kprobe.c
blob7ed6976493c87ec2d31637a60a285bc39b4925b8
1 /*
2 * Kprobes-based tracing events
4 * Created by Masami Hiramatsu <mhiramat@redhat.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 #include <linux/module.h>
21 #include <linux/uaccess.h>
23 #include "trace_probe.h"
25 #define KPROBE_EVENT_SYSTEM "kprobes"
27 /**
28 * Kprobe event core functions
30 struct trace_probe {
31 struct list_head list;
32 struct kretprobe rp; /* Use rp.kp for kprobe use */
33 unsigned long nhit;
34 unsigned int flags; /* For TP_FLAG_* */
35 const char *symbol; /* symbol name */
36 struct ftrace_event_class class;
37 struct ftrace_event_call call;
38 struct list_head files;
39 ssize_t size; /* trace entry size */
40 unsigned int nr_args;
41 struct probe_arg args[];
44 struct event_file_link {
45 struct ftrace_event_file *file;
46 struct list_head list;
49 #define SIZEOF_TRACE_PROBE(n) \
50 (offsetof(struct trace_probe, args) + \
51 (sizeof(struct probe_arg) * (n)))
54 static __kprobes bool trace_probe_is_return(struct trace_probe *tp)
56 return tp->rp.handler != NULL;
59 static __kprobes const char *trace_probe_symbol(struct trace_probe *tp)
61 return tp->symbol ? tp->symbol : "unknown";
64 static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp)
66 return tp->rp.kp.offset;
69 static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp)
71 return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE));
74 static __kprobes bool trace_probe_is_registered(struct trace_probe *tp)
76 return !!(tp->flags & TP_FLAG_REGISTERED);
79 static __kprobes bool trace_probe_has_gone(struct trace_probe *tp)
81 return !!(kprobe_gone(&tp->rp.kp));
84 static __kprobes bool trace_probe_within_module(struct trace_probe *tp,
85 struct module *mod)
87 int len = strlen(mod->name);
88 const char *name = trace_probe_symbol(tp);
89 return strncmp(mod->name, name, len) == 0 && name[len] == ':';
92 static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp)
94 return !!strchr(trace_probe_symbol(tp), ':');
97 static int register_probe_event(struct trace_probe *tp);
98 static void unregister_probe_event(struct trace_probe *tp);
100 static DEFINE_MUTEX(probe_lock);
101 static LIST_HEAD(probe_list);
103 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
104 static int kretprobe_dispatcher(struct kretprobe_instance *ri,
105 struct pt_regs *regs);
108 * Allocate new trace_probe and initialize it (including kprobes).
110 static struct trace_probe *alloc_trace_probe(const char *group,
111 const char *event,
112 void *addr,
113 const char *symbol,
114 unsigned long offs,
115 int nargs, bool is_return)
117 struct trace_probe *tp;
118 int ret = -ENOMEM;
120 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
121 if (!tp)
122 return ERR_PTR(ret);
124 if (symbol) {
125 tp->symbol = kstrdup(symbol, GFP_KERNEL);
126 if (!tp->symbol)
127 goto error;
128 tp->rp.kp.symbol_name = tp->symbol;
129 tp->rp.kp.offset = offs;
130 } else
131 tp->rp.kp.addr = addr;
133 if (is_return)
134 tp->rp.handler = kretprobe_dispatcher;
135 else
136 tp->rp.kp.pre_handler = kprobe_dispatcher;
138 if (!event || !is_good_name(event)) {
139 ret = -EINVAL;
140 goto error;
143 tp->call.class = &tp->class;
144 tp->call.name = kstrdup(event, GFP_KERNEL);
145 if (!tp->call.name)
146 goto error;
148 if (!group || !is_good_name(group)) {
149 ret = -EINVAL;
150 goto error;
153 tp->class.system = kstrdup(group, GFP_KERNEL);
154 if (!tp->class.system)
155 goto error;
157 INIT_LIST_HEAD(&tp->list);
158 INIT_LIST_HEAD(&tp->files);
159 return tp;
160 error:
161 kfree(tp->call.name);
162 kfree(tp->symbol);
163 kfree(tp);
164 return ERR_PTR(ret);
167 static void free_trace_probe(struct trace_probe *tp)
169 int i;
171 for (i = 0; i < tp->nr_args; i++)
172 traceprobe_free_probe_arg(&tp->args[i]);
174 kfree(tp->call.class->system);
175 kfree(tp->call.name);
176 kfree(tp->symbol);
177 kfree(tp);
180 static struct trace_probe *find_trace_probe(const char *event,
181 const char *group)
183 struct trace_probe *tp;
185 list_for_each_entry(tp, &probe_list, list)
186 if (strcmp(tp->call.name, event) == 0 &&
187 strcmp(tp->call.class->system, group) == 0)
188 return tp;
189 return NULL;
193 * Enable trace_probe
194 * if the file is NULL, enable "perf" handler, or enable "trace" handler.
196 static int
197 enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
199 int ret = 0;
201 if (file) {
202 struct event_file_link *link;
204 link = kmalloc(sizeof(*link), GFP_KERNEL);
205 if (!link) {
206 ret = -ENOMEM;
207 goto out;
210 link->file = file;
211 list_add_tail_rcu(&link->list, &tp->files);
213 tp->flags |= TP_FLAG_TRACE;
214 } else
215 tp->flags |= TP_FLAG_PROFILE;
217 if (trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) {
218 if (trace_probe_is_return(tp))
219 ret = enable_kretprobe(&tp->rp);
220 else
221 ret = enable_kprobe(&tp->rp.kp);
223 out:
224 return ret;
227 static struct event_file_link *
228 find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file)
230 struct event_file_link *link;
232 list_for_each_entry(link, &tp->files, list)
233 if (link->file == file)
234 return link;
236 return NULL;
240 * Disable trace_probe
241 * if the file is NULL, disable "perf" handler, or disable "trace" handler.
243 static int
244 disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
246 int ret = 0;
248 if (file) {
249 struct event_file_link *link;
251 link = find_event_file_link(tp, file);
252 if (!link) {
253 ret = -EINVAL;
254 goto out;
257 list_del_rcu(&link->list);
258 /* synchronize with kprobe_trace_func/kretprobe_trace_func */
259 synchronize_sched();
260 kfree(link);
262 if (!list_empty(&tp->files))
263 goto out;
265 tp->flags &= ~TP_FLAG_TRACE;
266 } else
267 tp->flags &= ~TP_FLAG_PROFILE;
269 if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) {
270 if (trace_probe_is_return(tp))
271 disable_kretprobe(&tp->rp);
272 else
273 disable_kprobe(&tp->rp.kp);
275 out:
276 return ret;
279 /* Internal register function - just handle k*probes and flags */
280 static int __register_trace_probe(struct trace_probe *tp)
282 int i, ret;
284 if (trace_probe_is_registered(tp))
285 return -EINVAL;
287 for (i = 0; i < tp->nr_args; i++)
288 traceprobe_update_arg(&tp->args[i]);
290 /* Set/clear disabled flag according to tp->flag */
291 if (trace_probe_is_enabled(tp))
292 tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
293 else
294 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
296 if (trace_probe_is_return(tp))
297 ret = register_kretprobe(&tp->rp);
298 else
299 ret = register_kprobe(&tp->rp.kp);
301 if (ret == 0)
302 tp->flags |= TP_FLAG_REGISTERED;
303 else {
304 pr_warning("Could not insert probe at %s+%lu: %d\n",
305 trace_probe_symbol(tp), trace_probe_offset(tp), ret);
306 if (ret == -ENOENT && trace_probe_is_on_module(tp)) {
307 pr_warning("This probe might be able to register after"
308 "target module is loaded. Continue.\n");
309 ret = 0;
310 } else if (ret == -EILSEQ) {
311 pr_warning("Probing address(0x%p) is not an "
312 "instruction boundary.\n",
313 tp->rp.kp.addr);
314 ret = -EINVAL;
318 return ret;
321 /* Internal unregister function - just handle k*probes and flags */
322 static void __unregister_trace_probe(struct trace_probe *tp)
324 if (trace_probe_is_registered(tp)) {
325 if (trace_probe_is_return(tp))
326 unregister_kretprobe(&tp->rp);
327 else
328 unregister_kprobe(&tp->rp.kp);
329 tp->flags &= ~TP_FLAG_REGISTERED;
330 /* Cleanup kprobe for reuse */
331 if (tp->rp.kp.symbol_name)
332 tp->rp.kp.addr = NULL;
336 /* Unregister a trace_probe and probe_event: call with locking probe_lock */
337 static int unregister_trace_probe(struct trace_probe *tp)
339 /* Enabled event can not be unregistered */
340 if (trace_probe_is_enabled(tp))
341 return -EBUSY;
343 __unregister_trace_probe(tp);
344 list_del(&tp->list);
345 unregister_probe_event(tp);
347 return 0;
350 /* Register a trace_probe and probe_event */
351 static int register_trace_probe(struct trace_probe *tp)
353 struct trace_probe *old_tp;
354 int ret;
356 mutex_lock(&probe_lock);
358 /* Delete old (same name) event if exist */
359 old_tp = find_trace_probe(tp->call.name, tp->call.class->system);
360 if (old_tp) {
361 ret = unregister_trace_probe(old_tp);
362 if (ret < 0)
363 goto end;
364 free_trace_probe(old_tp);
367 /* Register new event */
368 ret = register_probe_event(tp);
369 if (ret) {
370 pr_warning("Failed to register probe event(%d)\n", ret);
371 goto end;
374 /* Register k*probe */
375 ret = __register_trace_probe(tp);
376 if (ret < 0)
377 unregister_probe_event(tp);
378 else
379 list_add_tail(&tp->list, &probe_list);
381 end:
382 mutex_unlock(&probe_lock);
383 return ret;
386 /* Module notifier call back, checking event on the module */
387 static int trace_probe_module_callback(struct notifier_block *nb,
388 unsigned long val, void *data)
390 struct module *mod = data;
391 struct trace_probe *tp;
392 int ret;
394 if (val != MODULE_STATE_COMING)
395 return NOTIFY_DONE;
397 /* Update probes on coming module */
398 mutex_lock(&probe_lock);
399 list_for_each_entry(tp, &probe_list, list) {
400 if (trace_probe_within_module(tp, mod)) {
401 /* Don't need to check busy - this should have gone. */
402 __unregister_trace_probe(tp);
403 ret = __register_trace_probe(tp);
404 if (ret)
405 pr_warning("Failed to re-register probe %s on"
406 "%s: %d\n",
407 tp->call.name, mod->name, ret);
410 mutex_unlock(&probe_lock);
412 return NOTIFY_DONE;
415 static struct notifier_block trace_probe_module_nb = {
416 .notifier_call = trace_probe_module_callback,
417 .priority = 1 /* Invoked after kprobe module callback */
420 static int create_trace_probe(int argc, char **argv)
423 * Argument syntax:
424 * - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
425 * - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
426 * Fetch args:
427 * $retval : fetch return value
428 * $stack : fetch stack address
429 * $stackN : fetch Nth of stack (N:0-)
430 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
431 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
432 * %REG : fetch register REG
433 * Dereferencing memory fetch:
434 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
435 * Alias name of args:
436 * NAME=FETCHARG : set NAME as alias of FETCHARG.
437 * Type of args:
438 * FETCHARG:TYPE : use TYPE instead of unsigned long.
440 struct trace_probe *tp;
441 int i, ret = 0;
442 bool is_return = false, is_delete = false;
443 char *symbol = NULL, *event = NULL, *group = NULL;
444 char *arg;
445 unsigned long offset = 0;
446 void *addr = NULL;
447 char buf[MAX_EVENT_NAME_LEN];
449 /* argc must be >= 1 */
450 if (argv[0][0] == 'p')
451 is_return = false;
452 else if (argv[0][0] == 'r')
453 is_return = true;
454 else if (argv[0][0] == '-')
455 is_delete = true;
456 else {
457 pr_info("Probe definition must be started with 'p', 'r' or"
458 " '-'.\n");
459 return -EINVAL;
462 if (argv[0][1] == ':') {
463 event = &argv[0][2];
464 if (strchr(event, '/')) {
465 group = event;
466 event = strchr(group, '/') + 1;
467 event[-1] = '\0';
468 if (strlen(group) == 0) {
469 pr_info("Group name is not specified\n");
470 return -EINVAL;
473 if (strlen(event) == 0) {
474 pr_info("Event name is not specified\n");
475 return -EINVAL;
478 if (!group)
479 group = KPROBE_EVENT_SYSTEM;
481 if (is_delete) {
482 if (!event) {
483 pr_info("Delete command needs an event name.\n");
484 return -EINVAL;
486 mutex_lock(&probe_lock);
487 tp = find_trace_probe(event, group);
488 if (!tp) {
489 mutex_unlock(&probe_lock);
490 pr_info("Event %s/%s doesn't exist.\n", group, event);
491 return -ENOENT;
493 /* delete an event */
494 ret = unregister_trace_probe(tp);
495 if (ret == 0)
496 free_trace_probe(tp);
497 mutex_unlock(&probe_lock);
498 return ret;
501 if (argc < 2) {
502 pr_info("Probe point is not specified.\n");
503 return -EINVAL;
505 if (isdigit(argv[1][0])) {
506 if (is_return) {
507 pr_info("Return probe point must be a symbol.\n");
508 return -EINVAL;
510 /* an address specified */
511 ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
512 if (ret) {
513 pr_info("Failed to parse address.\n");
514 return ret;
516 } else {
517 /* a symbol specified */
518 symbol = argv[1];
519 /* TODO: support .init module functions */
520 ret = traceprobe_split_symbol_offset(symbol, &offset);
521 if (ret) {
522 pr_info("Failed to parse symbol.\n");
523 return ret;
525 if (offset && is_return) {
526 pr_info("Return probe must be used without offset.\n");
527 return -EINVAL;
530 argc -= 2; argv += 2;
532 /* setup a probe */
533 if (!event) {
534 /* Make a new event name */
535 if (symbol)
536 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
537 is_return ? 'r' : 'p', symbol, offset);
538 else
539 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
540 is_return ? 'r' : 'p', addr);
541 event = buf;
543 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
544 is_return);
545 if (IS_ERR(tp)) {
546 pr_info("Failed to allocate trace_probe.(%d)\n",
547 (int)PTR_ERR(tp));
548 return PTR_ERR(tp);
551 /* parse arguments */
552 ret = 0;
553 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
554 /* Increment count for freeing args in error case */
555 tp->nr_args++;
557 /* Parse argument name */
558 arg = strchr(argv[i], '=');
559 if (arg) {
560 *arg++ = '\0';
561 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
562 } else {
563 arg = argv[i];
564 /* If argument name is omitted, set "argN" */
565 snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
566 tp->args[i].name = kstrdup(buf, GFP_KERNEL);
569 if (!tp->args[i].name) {
570 pr_info("Failed to allocate argument[%d] name.\n", i);
571 ret = -ENOMEM;
572 goto error;
575 if (!is_good_name(tp->args[i].name)) {
576 pr_info("Invalid argument[%d] name: %s\n",
577 i, tp->args[i].name);
578 ret = -EINVAL;
579 goto error;
582 if (traceprobe_conflict_field_name(tp->args[i].name,
583 tp->args, i)) {
584 pr_info("Argument[%d] name '%s' conflicts with "
585 "another field.\n", i, argv[i]);
586 ret = -EINVAL;
587 goto error;
590 /* Parse fetch argument */
591 ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i],
592 is_return, true);
593 if (ret) {
594 pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
595 goto error;
599 ret = register_trace_probe(tp);
600 if (ret)
601 goto error;
602 return 0;
604 error:
605 free_trace_probe(tp);
606 return ret;
609 static int release_all_trace_probes(void)
611 struct trace_probe *tp;
612 int ret = 0;
614 mutex_lock(&probe_lock);
615 /* Ensure no probe is in use. */
616 list_for_each_entry(tp, &probe_list, list)
617 if (trace_probe_is_enabled(tp)) {
618 ret = -EBUSY;
619 goto end;
621 /* TODO: Use batch unregistration */
622 while (!list_empty(&probe_list)) {
623 tp = list_entry(probe_list.next, struct trace_probe, list);
624 unregister_trace_probe(tp);
625 free_trace_probe(tp);
628 end:
629 mutex_unlock(&probe_lock);
631 return ret;
634 /* Probes listing interfaces */
635 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
637 mutex_lock(&probe_lock);
638 return seq_list_start(&probe_list, *pos);
641 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
643 return seq_list_next(v, &probe_list, pos);
646 static void probes_seq_stop(struct seq_file *m, void *v)
648 mutex_unlock(&probe_lock);
651 static int probes_seq_show(struct seq_file *m, void *v)
653 struct trace_probe *tp = v;
654 int i;
656 seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p');
657 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
659 if (!tp->symbol)
660 seq_printf(m, " 0x%p", tp->rp.kp.addr);
661 else if (tp->rp.kp.offset)
662 seq_printf(m, " %s+%u", trace_probe_symbol(tp),
663 tp->rp.kp.offset);
664 else
665 seq_printf(m, " %s", trace_probe_symbol(tp));
667 for (i = 0; i < tp->nr_args; i++)
668 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
669 seq_printf(m, "\n");
671 return 0;
674 static const struct seq_operations probes_seq_op = {
675 .start = probes_seq_start,
676 .next = probes_seq_next,
677 .stop = probes_seq_stop,
678 .show = probes_seq_show
681 static int probes_open(struct inode *inode, struct file *file)
683 int ret;
685 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
686 ret = release_all_trace_probes();
687 if (ret < 0)
688 return ret;
691 return seq_open(file, &probes_seq_op);
694 static ssize_t probes_write(struct file *file, const char __user *buffer,
695 size_t count, loff_t *ppos)
697 return traceprobe_probes_write(file, buffer, count, ppos,
698 create_trace_probe);
701 static const struct file_operations kprobe_events_ops = {
702 .owner = THIS_MODULE,
703 .open = probes_open,
704 .read = seq_read,
705 .llseek = seq_lseek,
706 .release = seq_release,
707 .write = probes_write,
710 /* Probes profiling interfaces */
711 static int probes_profile_seq_show(struct seq_file *m, void *v)
713 struct trace_probe *tp = v;
715 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
716 tp->rp.kp.nmissed);
718 return 0;
721 static const struct seq_operations profile_seq_op = {
722 .start = probes_seq_start,
723 .next = probes_seq_next,
724 .stop = probes_seq_stop,
725 .show = probes_profile_seq_show
728 static int profile_open(struct inode *inode, struct file *file)
730 return seq_open(file, &profile_seq_op);
733 static const struct file_operations kprobe_profile_ops = {
734 .owner = THIS_MODULE,
735 .open = profile_open,
736 .read = seq_read,
737 .llseek = seq_lseek,
738 .release = seq_release,
741 /* Sum up total data length for dynamic arraies (strings) */
742 static __kprobes int __get_data_size(struct trace_probe *tp,
743 struct pt_regs *regs)
745 int i, ret = 0;
746 u32 len;
748 for (i = 0; i < tp->nr_args; i++)
749 if (unlikely(tp->args[i].fetch_size.fn)) {
750 call_fetch(&tp->args[i].fetch_size, regs, &len);
751 ret += len;
754 return ret;
757 /* Store the value of each argument */
758 static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
759 struct pt_regs *regs,
760 u8 *data, int maxlen)
762 int i;
763 u32 end = tp->size;
764 u32 *dl; /* Data (relative) location */
766 for (i = 0; i < tp->nr_args; i++) {
767 if (unlikely(tp->args[i].fetch_size.fn)) {
769 * First, we set the relative location and
770 * maximum data length to *dl
772 dl = (u32 *)(data + tp->args[i].offset);
773 *dl = make_data_rloc(maxlen, end - tp->args[i].offset);
774 /* Then try to fetch string or dynamic array data */
775 call_fetch(&tp->args[i].fetch, regs, dl);
776 /* Reduce maximum length */
777 end += get_rloc_len(*dl);
778 maxlen -= get_rloc_len(*dl);
779 /* Trick here, convert data_rloc to data_loc */
780 *dl = convert_rloc_to_loc(*dl,
781 ent_size + tp->args[i].offset);
782 } else
783 /* Just fetching data normally */
784 call_fetch(&tp->args[i].fetch, regs,
785 data + tp->args[i].offset);
789 /* Kprobe handler */
790 static __kprobes void
791 __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs,
792 struct ftrace_event_file *ftrace_file)
794 struct kprobe_trace_entry_head *entry;
795 struct ring_buffer_event *event;
796 struct ring_buffer *buffer;
797 int size, dsize, pc;
798 unsigned long irq_flags;
799 struct ftrace_event_call *call = &tp->call;
801 WARN_ON(call != ftrace_file->event_call);
803 if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
804 return;
806 local_save_flags(irq_flags);
807 pc = preempt_count();
809 dsize = __get_data_size(tp, regs);
810 size = sizeof(*entry) + tp->size + dsize;
812 event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
813 call->event.type,
814 size, irq_flags, pc);
815 if (!event)
816 return;
818 entry = ring_buffer_event_data(event);
819 entry->ip = (unsigned long)tp->rp.kp.addr;
820 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
822 if (!filter_current_check_discard(buffer, call, entry, event))
823 trace_buffer_unlock_commit_regs(buffer, event,
824 irq_flags, pc, regs);
827 static __kprobes void
828 kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs)
830 struct event_file_link *link;
832 list_for_each_entry_rcu(link, &tp->files, list)
833 __kprobe_trace_func(tp, regs, link->file);
836 /* Kretprobe handler */
837 static __kprobes void
838 __kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
839 struct pt_regs *regs,
840 struct ftrace_event_file *ftrace_file)
842 struct kretprobe_trace_entry_head *entry;
843 struct ring_buffer_event *event;
844 struct ring_buffer *buffer;
845 int size, pc, dsize;
846 unsigned long irq_flags;
847 struct ftrace_event_call *call = &tp->call;
849 WARN_ON(call != ftrace_file->event_call);
851 if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
852 return;
854 local_save_flags(irq_flags);
855 pc = preempt_count();
857 dsize = __get_data_size(tp, regs);
858 size = sizeof(*entry) + tp->size + dsize;
860 event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
861 call->event.type,
862 size, irq_flags, pc);
863 if (!event)
864 return;
866 entry = ring_buffer_event_data(event);
867 entry->func = (unsigned long)tp->rp.kp.addr;
868 entry->ret_ip = (unsigned long)ri->ret_addr;
869 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
871 if (!filter_current_check_discard(buffer, call, entry, event))
872 trace_buffer_unlock_commit_regs(buffer, event,
873 irq_flags, pc, regs);
876 static __kprobes void
877 kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
878 struct pt_regs *regs)
880 struct event_file_link *link;
882 list_for_each_entry_rcu(link, &tp->files, list)
883 __kretprobe_trace_func(tp, ri, regs, link->file);
886 /* Event entry printers */
887 static enum print_line_t
888 print_kprobe_event(struct trace_iterator *iter, int flags,
889 struct trace_event *event)
891 struct kprobe_trace_entry_head *field;
892 struct trace_seq *s = &iter->seq;
893 struct trace_probe *tp;
894 u8 *data;
895 int i;
897 field = (struct kprobe_trace_entry_head *)iter->ent;
898 tp = container_of(event, struct trace_probe, call.event);
900 if (!trace_seq_printf(s, "%s: (", tp->call.name))
901 goto partial;
903 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
904 goto partial;
906 if (!trace_seq_puts(s, ")"))
907 goto partial;
909 data = (u8 *)&field[1];
910 for (i = 0; i < tp->nr_args; i++)
911 if (!tp->args[i].type->print(s, tp->args[i].name,
912 data + tp->args[i].offset, field))
913 goto partial;
915 if (!trace_seq_puts(s, "\n"))
916 goto partial;
918 return TRACE_TYPE_HANDLED;
919 partial:
920 return TRACE_TYPE_PARTIAL_LINE;
923 static enum print_line_t
924 print_kretprobe_event(struct trace_iterator *iter, int flags,
925 struct trace_event *event)
927 struct kretprobe_trace_entry_head *field;
928 struct trace_seq *s = &iter->seq;
929 struct trace_probe *tp;
930 u8 *data;
931 int i;
933 field = (struct kretprobe_trace_entry_head *)iter->ent;
934 tp = container_of(event, struct trace_probe, call.event);
936 if (!trace_seq_printf(s, "%s: (", tp->call.name))
937 goto partial;
939 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
940 goto partial;
942 if (!trace_seq_puts(s, " <- "))
943 goto partial;
945 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
946 goto partial;
948 if (!trace_seq_puts(s, ")"))
949 goto partial;
951 data = (u8 *)&field[1];
952 for (i = 0; i < tp->nr_args; i++)
953 if (!tp->args[i].type->print(s, tp->args[i].name,
954 data + tp->args[i].offset, field))
955 goto partial;
957 if (!trace_seq_puts(s, "\n"))
958 goto partial;
960 return TRACE_TYPE_HANDLED;
961 partial:
962 return TRACE_TYPE_PARTIAL_LINE;
966 static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
968 int ret, i;
969 struct kprobe_trace_entry_head field;
970 struct trace_probe *tp = (struct trace_probe *)event_call->data;
972 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
973 /* Set argument names as fields */
974 for (i = 0; i < tp->nr_args; i++) {
975 ret = trace_define_field(event_call, tp->args[i].type->fmttype,
976 tp->args[i].name,
977 sizeof(field) + tp->args[i].offset,
978 tp->args[i].type->size,
979 tp->args[i].type->is_signed,
980 FILTER_OTHER);
981 if (ret)
982 return ret;
984 return 0;
987 static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
989 int ret, i;
990 struct kretprobe_trace_entry_head field;
991 struct trace_probe *tp = (struct trace_probe *)event_call->data;
993 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
994 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
995 /* Set argument names as fields */
996 for (i = 0; i < tp->nr_args; i++) {
997 ret = trace_define_field(event_call, tp->args[i].type->fmttype,
998 tp->args[i].name,
999 sizeof(field) + tp->args[i].offset,
1000 tp->args[i].type->size,
1001 tp->args[i].type->is_signed,
1002 FILTER_OTHER);
1003 if (ret)
1004 return ret;
1006 return 0;
1009 static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1011 int i;
1012 int pos = 0;
1014 const char *fmt, *arg;
1016 if (!trace_probe_is_return(tp)) {
1017 fmt = "(%lx)";
1018 arg = "REC->" FIELD_STRING_IP;
1019 } else {
1020 fmt = "(%lx <- %lx)";
1021 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
1024 /* When len=0, we just calculate the needed length */
1025 #define LEN_OR_ZERO (len ? len - pos : 0)
1027 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1029 for (i = 0; i < tp->nr_args; i++) {
1030 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
1031 tp->args[i].name, tp->args[i].type->fmt);
1034 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1036 for (i = 0; i < tp->nr_args; i++) {
1037 if (strcmp(tp->args[i].type->name, "string") == 0)
1038 pos += snprintf(buf + pos, LEN_OR_ZERO,
1039 ", __get_str(%s)",
1040 tp->args[i].name);
1041 else
1042 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1043 tp->args[i].name);
1046 #undef LEN_OR_ZERO
1048 /* return the length of print_fmt */
1049 return pos;
1052 static int set_print_fmt(struct trace_probe *tp)
1054 int len;
1055 char *print_fmt;
1057 /* First: called with 0 length to calculate the needed length */
1058 len = __set_print_fmt(tp, NULL, 0);
1059 print_fmt = kmalloc(len + 1, GFP_KERNEL);
1060 if (!print_fmt)
1061 return -ENOMEM;
1063 /* Second: actually write the @print_fmt */
1064 __set_print_fmt(tp, print_fmt, len + 1);
1065 tp->call.print_fmt = print_fmt;
1067 return 0;
1070 #ifdef CONFIG_PERF_EVENTS
1072 /* Kprobe profile handler */
1073 static __kprobes void
1074 kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
1076 struct ftrace_event_call *call = &tp->call;
1077 struct kprobe_trace_entry_head *entry;
1078 struct hlist_head *head;
1079 int size, __size, dsize;
1080 int rctx;
1082 head = this_cpu_ptr(call->perf_events);
1083 if (hlist_empty(head))
1084 return;
1086 dsize = __get_data_size(tp, regs);
1087 __size = sizeof(*entry) + tp->size + dsize;
1088 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1089 size -= sizeof(u32);
1090 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1091 "profile buffer not large enough"))
1092 return;
1094 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1095 if (!entry)
1096 return;
1098 entry->ip = (unsigned long)tp->rp.kp.addr;
1099 memset(&entry[1], 0, dsize);
1100 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1101 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
1104 /* Kretprobe profile handler */
1105 static __kprobes void
1106 kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
1107 struct pt_regs *regs)
1109 struct ftrace_event_call *call = &tp->call;
1110 struct kretprobe_trace_entry_head *entry;
1111 struct hlist_head *head;
1112 int size, __size, dsize;
1113 int rctx;
1115 head = this_cpu_ptr(call->perf_events);
1116 if (hlist_empty(head))
1117 return;
1119 dsize = __get_data_size(tp, regs);
1120 __size = sizeof(*entry) + tp->size + dsize;
1121 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1122 size -= sizeof(u32);
1123 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1124 "profile buffer not large enough"))
1125 return;
1127 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1128 if (!entry)
1129 return;
1131 entry->func = (unsigned long)tp->rp.kp.addr;
1132 entry->ret_ip = (unsigned long)ri->ret_addr;
1133 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1134 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
1136 #endif /* CONFIG_PERF_EVENTS */
1139 * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
1141 * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
1142 * lockless, but we can't race with this __init function.
1144 static __kprobes
1145 int kprobe_register(struct ftrace_event_call *event,
1146 enum trace_reg type, void *data)
1148 struct trace_probe *tp = (struct trace_probe *)event->data;
1149 struct ftrace_event_file *file = data;
1151 switch (type) {
1152 case TRACE_REG_REGISTER:
1153 return enable_trace_probe(tp, file);
1154 case TRACE_REG_UNREGISTER:
1155 return disable_trace_probe(tp, file);
1157 #ifdef CONFIG_PERF_EVENTS
1158 case TRACE_REG_PERF_REGISTER:
1159 return enable_trace_probe(tp, NULL);
1160 case TRACE_REG_PERF_UNREGISTER:
1161 return disable_trace_probe(tp, NULL);
1162 case TRACE_REG_PERF_OPEN:
1163 case TRACE_REG_PERF_CLOSE:
1164 case TRACE_REG_PERF_ADD:
1165 case TRACE_REG_PERF_DEL:
1166 return 0;
1167 #endif
1169 return 0;
1172 static __kprobes
1173 int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1175 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1177 tp->nhit++;
1179 if (tp->flags & TP_FLAG_TRACE)
1180 kprobe_trace_func(tp, regs);
1181 #ifdef CONFIG_PERF_EVENTS
1182 if (tp->flags & TP_FLAG_PROFILE)
1183 kprobe_perf_func(tp, regs);
1184 #endif
1185 return 0; /* We don't tweek kernel, so just return 0 */
1188 static __kprobes
1189 int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1191 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1193 tp->nhit++;
1195 if (tp->flags & TP_FLAG_TRACE)
1196 kretprobe_trace_func(tp, ri, regs);
1197 #ifdef CONFIG_PERF_EVENTS
1198 if (tp->flags & TP_FLAG_PROFILE)
1199 kretprobe_perf_func(tp, ri, regs);
1200 #endif
1201 return 0; /* We don't tweek kernel, so just return 0 */
1204 static struct trace_event_functions kretprobe_funcs = {
1205 .trace = print_kretprobe_event
1208 static struct trace_event_functions kprobe_funcs = {
1209 .trace = print_kprobe_event
1212 static int register_probe_event(struct trace_probe *tp)
1214 struct ftrace_event_call *call = &tp->call;
1215 int ret;
1217 /* Initialize ftrace_event_call */
1218 INIT_LIST_HEAD(&call->class->fields);
1219 if (trace_probe_is_return(tp)) {
1220 call->event.funcs = &kretprobe_funcs;
1221 call->class->define_fields = kretprobe_event_define_fields;
1222 } else {
1223 call->event.funcs = &kprobe_funcs;
1224 call->class->define_fields = kprobe_event_define_fields;
1226 if (set_print_fmt(tp) < 0)
1227 return -ENOMEM;
1228 ret = register_ftrace_event(&call->event);
1229 if (!ret) {
1230 kfree(call->print_fmt);
1231 return -ENODEV;
1233 call->flags = 0;
1234 call->class->reg = kprobe_register;
1235 call->data = tp;
1236 ret = trace_add_event_call(call);
1237 if (ret) {
1238 pr_info("Failed to register kprobe event: %s\n", call->name);
1239 kfree(call->print_fmt);
1240 unregister_ftrace_event(&call->event);
1242 return ret;
1245 static void unregister_probe_event(struct trace_probe *tp)
1247 /* tp->event is unregistered in trace_remove_event_call() */
1248 trace_remove_event_call(&tp->call);
1249 kfree(tp->call.print_fmt);
1252 /* Make a debugfs interface for controlling probe points */
1253 static __init int init_kprobe_trace(void)
1255 struct dentry *d_tracer;
1256 struct dentry *entry;
1258 if (register_module_notifier(&trace_probe_module_nb))
1259 return -EINVAL;
1261 d_tracer = tracing_init_dentry();
1262 if (!d_tracer)
1263 return 0;
1265 entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1266 NULL, &kprobe_events_ops);
1268 /* Event list interface */
1269 if (!entry)
1270 pr_warning("Could not create debugfs "
1271 "'kprobe_events' entry\n");
1273 /* Profile interface */
1274 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1275 NULL, &kprobe_profile_ops);
1277 if (!entry)
1278 pr_warning("Could not create debugfs "
1279 "'kprobe_profile' entry\n");
1280 return 0;
1282 fs_initcall(init_kprobe_trace);
1285 #ifdef CONFIG_FTRACE_STARTUP_TEST
1288 * The "__used" keeps gcc from removing the function symbol
1289 * from the kallsyms table.
1291 static __used int kprobe_trace_selftest_target(int a1, int a2, int a3,
1292 int a4, int a5, int a6)
1294 return a1 + a2 + a3 + a4 + a5 + a6;
1297 static struct ftrace_event_file *
1298 find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr)
1300 struct ftrace_event_file *file;
1302 list_for_each_entry(file, &tr->events, list)
1303 if (file->event_call == &tp->call)
1304 return file;
1306 return NULL;
1310 * Nobody but us can call enable_trace_probe/disable_trace_probe at this
1311 * stage, we can do this lockless.
1313 static __init int kprobe_trace_self_tests_init(void)
1315 int ret, warn = 0;
1316 int (*target)(int, int, int, int, int, int);
1317 struct trace_probe *tp;
1318 struct ftrace_event_file *file;
1320 target = kprobe_trace_selftest_target;
1322 pr_info("Testing kprobe tracing: ");
1324 ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target "
1325 "$stack $stack0 +0($stack)",
1326 create_trace_probe);
1327 if (WARN_ON_ONCE(ret)) {
1328 pr_warn("error on probing function entry.\n");
1329 warn++;
1330 } else {
1331 /* Enable trace point */
1332 tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
1333 if (WARN_ON_ONCE(tp == NULL)) {
1334 pr_warn("error on getting new probe.\n");
1335 warn++;
1336 } else {
1337 file = find_trace_probe_file(tp, top_trace_array());
1338 if (WARN_ON_ONCE(file == NULL)) {
1339 pr_warn("error on getting probe file.\n");
1340 warn++;
1341 } else
1342 enable_trace_probe(tp, file);
1346 ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target "
1347 "$retval", create_trace_probe);
1348 if (WARN_ON_ONCE(ret)) {
1349 pr_warn("error on probing function return.\n");
1350 warn++;
1351 } else {
1352 /* Enable trace point */
1353 tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
1354 if (WARN_ON_ONCE(tp == NULL)) {
1355 pr_warn("error on getting 2nd new probe.\n");
1356 warn++;
1357 } else {
1358 file = find_trace_probe_file(tp, top_trace_array());
1359 if (WARN_ON_ONCE(file == NULL)) {
1360 pr_warn("error on getting probe file.\n");
1361 warn++;
1362 } else
1363 enable_trace_probe(tp, file);
1367 if (warn)
1368 goto end;
1370 ret = target(1, 2, 3, 4, 5, 6);
1372 /* Disable trace points before removing it */
1373 tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
1374 if (WARN_ON_ONCE(tp == NULL)) {
1375 pr_warn("error on getting test probe.\n");
1376 warn++;
1377 } else {
1378 file = find_trace_probe_file(tp, top_trace_array());
1379 if (WARN_ON_ONCE(file == NULL)) {
1380 pr_warn("error on getting probe file.\n");
1381 warn++;
1382 } else
1383 disable_trace_probe(tp, file);
1386 tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
1387 if (WARN_ON_ONCE(tp == NULL)) {
1388 pr_warn("error on getting 2nd test probe.\n");
1389 warn++;
1390 } else {
1391 file = find_trace_probe_file(tp, top_trace_array());
1392 if (WARN_ON_ONCE(file == NULL)) {
1393 pr_warn("error on getting probe file.\n");
1394 warn++;
1395 } else
1396 disable_trace_probe(tp, file);
1399 ret = traceprobe_command("-:testprobe", create_trace_probe);
1400 if (WARN_ON_ONCE(ret)) {
1401 pr_warn("error on deleting a probe.\n");
1402 warn++;
1405 ret = traceprobe_command("-:testprobe2", create_trace_probe);
1406 if (WARN_ON_ONCE(ret)) {
1407 pr_warn("error on deleting a probe.\n");
1408 warn++;
1411 end:
1412 release_all_trace_probes();
1413 if (warn)
1414 pr_cont("NG: Some tests are failed. Please check them.\n");
1415 else
1416 pr_cont("OK\n");
1417 return 0;
1420 late_initcall(kprobe_trace_self_tests_init);
1422 #endif