arch/x86/kernel/platform/i86pc/os/dtrace_subr.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26
  27 /*
  28  * Copyright (c) 2011, Joyent, Inc. All rights reserved.
  29  */
  30
  31 #include <sys/dtrace.h>
  32 #include <sys/fasttrap.h>
  33 #include <sys/x_call.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/trap.h>
  36 #include <sys/psw.h>
  37 #include <sys/privregs.h>
  38 #include <sys/machsystm.h>
  39 #include <vm/seg_kmem.h>
  40
  41 typedef struct dtrace_invop_hdlr {
  42         int (*dtih_func)(uintptr_t, uintptr_t *, uintptr_t);
  43         struct dtrace_invop_hdlr *dtih_next;
  44 } dtrace_invop_hdlr_t;
  45
  46 dtrace_invop_hdlr_t *dtrace_invop_hdlr;
  47
  48 int
  49 dtrace_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax)
  50 {
  51         dtrace_invop_hdlr_t *hdlr;
  52         int rval;
  53
  54         for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next) {
  55                 if ((rval = hdlr->dtih_func(addr, stack, eax)) != 0)
  56                         return (rval);
  57         }
  58
  59         return (0);
  60 }
  61
  62 void
  63 dtrace_invop_add(int (*func)(uintptr_t, uintptr_t *, uintptr_t))
  64 {
  65         dtrace_invop_hdlr_t *hdlr;
  66
  67         hdlr = kmem_alloc(sizeof (dtrace_invop_hdlr_t), KM_SLEEP);
  68         hdlr->dtih_func = func;
  69         hdlr->dtih_next = dtrace_invop_hdlr;
  70         dtrace_invop_hdlr = hdlr;
  71 }
  72
  73 void
  74 dtrace_invop_remove(int (*func)(uintptr_t, uintptr_t *, uintptr_t))
  75 {
  76         dtrace_invop_hdlr_t *hdlr = dtrace_invop_hdlr, *prev = NULL;
  77
  78         for (;;) {
  79                 if (hdlr == NULL)
  80                         panic("attempt to remove non-existent invop handler");
  81
  82                 if (hdlr->dtih_func == func)
  83                         break;
  84
  85                 prev = hdlr;
  86                 hdlr = hdlr->dtih_next;
  87         }
  88
  89         if (prev == NULL) {
  90                 ASSERT(dtrace_invop_hdlr == hdlr);
  91                 dtrace_invop_hdlr = hdlr->dtih_next;
  92         } else {
  93                 ASSERT(dtrace_invop_hdlr != hdlr);
  94                 prev->dtih_next = hdlr->dtih_next;
  95         }
  96
  97         kmem_free(hdlr, sizeof (dtrace_invop_hdlr_t));
  98 }
  99
 100 int
 101 dtrace_getipl(void)
 102 {
 103         return (CPU->cpu_pri);
 104 }
 105
 106 /*ARGSUSED*/
 107 void
 108 dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
 109 {
 110 #ifdef __amd64
 111         extern uintptr_t toxic_addr;
 112         extern size_t toxic_size;
 113
 114         (*func)(0, _userlimit);
 115
 116         if (hole_end > hole_start)
 117                 (*func)(hole_start, hole_end);
 118         (*func)(toxic_addr, toxic_addr + toxic_size);
 119 #else
 120         extern void *device_arena_contains(void *, size_t, size_t *);
 121         caddr_t vaddr;
 122         size_t  len;
 123
 124         for (vaddr = (caddr_t)kernelbase; vaddr < (caddr_t)KERNEL_TEXT;
 125             vaddr += len) {
 126                 len = (caddr_t)KERNEL_TEXT - vaddr;
 127                 vaddr = device_arena_contains(vaddr, len, &len);
 128                 if (vaddr == NULL)
 129                         break;
 130                 (*func)((uintptr_t)vaddr, (uintptr_t)vaddr + len);
 131         }
 132 #endif
 133         (*func)(0, _userlimit);
 134 }
 135
 136 static int
 137 dtrace_xcall_func(dtrace_xcall_t func, void *arg)
 138 {
 139         (*func)(arg);
 140
 141         return (0);
 142 }
 143
 144 /*ARGSUSED*/
 145 void
 146 dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg)
 147 {
 148         cpuset_t set;
 149
 150         CPUSET_ZERO(set);
 151
 152         if (cpu == DTRACE_CPUALL) {
 153                 CPUSET_ALL(set);
 154         } else {
 155                 CPUSET_ADD(set, cpu);
 156         }
 157
 158         kpreempt_disable();
 159         xc_sync((xc_arg_t)func, (xc_arg_t)arg, 0, CPUSET2BV(set),
 160             (xc_func_t)dtrace_xcall_func);
 161         kpreempt_enable();
 162 }
 163
 164 void
 165 dtrace_sync_func(void)
 166 {}
 167
 168 void
 169 dtrace_sync(void)
 170 {
 171         dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL);
 172 }
 173
 174 int (*dtrace_pid_probe_ptr)(struct regs *);
 175 int (*dtrace_return_probe_ptr)(struct regs *);
 176
 177 void
 178 dtrace_user_probe(struct regs *rp, caddr_t addr, processorid_t cpuid)
 179 {
 180         krwlock_t *rwp;
 181         proc_t *p = curproc;
 182         extern void trap(struct regs *, caddr_t, processorid_t);
 183
 184         if (USERMODE(rp->r_cs) || (rp->r_ps & PS_VM)) {
 185                 if (curthread->t_cred != p->p_cred) {
 186                         cred_t *oldcred = curthread->t_cred;
 187                         /*
 188                          * DTrace accesses t_cred in probe context.  t_cred
 189                          * must always be either NULL, or point to a valid,
 190                          * allocated cred structure.
 191                          */
 192                         curthread->t_cred = crgetcred();
 193                         crfree(oldcred);
 194                 }
 195         }
 196
 197         if (rp->r_trapno == T_DTRACE_RET) {
 198                 uint8_t step = curthread->t_dtrace_step;
 199                 uint8_t ret = curthread->t_dtrace_ret;
 200                 uintptr_t npc = curthread->t_dtrace_npc;
 201
 202                 if (curthread->t_dtrace_ast) {
 203                         aston(curthread);
 204                         curthread->t_sig_check = 1;
 205                 }
 206
 207                 /*
 208                  * Clear all user tracing flags.
 209                  */
 210                 curthread->t_dtrace_ft = 0;
 211
 212                 /*
 213                  * If we weren't expecting to take a return probe trap, kill
 214                  * the process as though it had just executed an unassigned
 215                  * trap instruction.
 216                  */
 217                 if (step == 0) {
 218                         tsignal(curthread, SIGILL);
 219                         return;
 220                 }
 221
 222                 /*
 223                  * If we hit this trap unrelated to a return probe, we're
 224                  * just here to reset the AST flag since we deferred a signal
 225                  * until after we logically single-stepped the instruction we
 226                  * copied out.
 227                  */
 228                 if (ret == 0) {
 229                         rp->r_pc = npc;
 230                         return;
 231                 }
 232
 233                 /*
 234                  * We need to wait until after we've called the
 235                  * dtrace_return_probe_ptr function pointer to set %pc.
 236                  */
 237                 rwp = &CPU->cpu_ft_lock;
 238                 rw_enter(rwp, RW_READER);
 239                 if (dtrace_return_probe_ptr != NULL)
 240                         (void) (*dtrace_return_probe_ptr)(rp);
 241                 rw_exit(rwp);
 242                 rp->r_pc = npc;
 243
 244         } else if (rp->r_trapno == T_BPTFLT) {
 245                 uint8_t instr, instr2;
 246                 caddr_t linearpc;
 247                 rwp = &CPU->cpu_ft_lock;
 248
 249                 /*
 250                  * The DTrace fasttrap provider uses the breakpoint trap
 251                  * (int 3). We let DTrace take the first crack at handling
 252                  * this trap; if it's not a probe that DTrace knowns about,
 253                  * we call into the trap() routine to handle it like a
 254                  * breakpoint placed by a conventional debugger.
 255                  */
 256                 rw_enter(rwp, RW_READER);
 257                 if (dtrace_pid_probe_ptr != NULL &&
 258                     (*dtrace_pid_probe_ptr)(rp) == 0) {
 259                         rw_exit(rwp);
 260                         return;
 261                 }
 262                 rw_exit(rwp);
 263
 264                 if (dtrace_linear_pc(rp, p, &linearpc) != 0) {
 265                         trap(rp, addr, cpuid);
 266                         return;
 267                 }
 268
 269                 /*
 270                  * If the instruction that caused the breakpoint trap doesn't
 271                  * look like an int 3 anymore, it may be that this tracepoint
 272                  * was removed just after the user thread executed it. In
 273                  * that case, return to user land to retry the instuction.
 274                  * Note that we assume the length of the instruction to retry
 275                  * is 1 byte because that's the length of FASTTRAP_INSTR.
 276                  * We check for r_pc > 0 and > 2 so that we don't have to
 277                  * deal with segment wraparound.
 278                  */
 279                 if (rp->r_pc > 0 && fuword8(linearpc - 1, &instr) == 0 &&
 280                     instr != FASTTRAP_INSTR &&
 281                     (instr != 3 || (rp->r_pc >= 2 &&
 282                     (fuword8(linearpc - 2, &instr2) != 0 || instr2 != 0xCD)))) {
 283                         rp->r_pc--;
 284                         return;
 285                 }
 286
 287                 trap(rp, addr, cpuid);
 288
 289         } else {
 290                 trap(rp, addr, cpuid);
 291         }
 292 }
 293
 294 void
 295 dtrace_safe_synchronous_signal(void)
 296 {
 297         kthread_t *t = curthread;
 298         struct regs *rp = lwptoregs(ttolwp(t));
 299         size_t isz = t->t_dtrace_npc - t->t_dtrace_pc;
 300
 301         ASSERT(t->t_dtrace_on);
 302
 303         /*
 304          * If we're not in the range of scratch addresses, we're not actually
 305          * tracing user instructions so turn off the flags. If the instruction
 306          * we copied out caused a synchonous trap, reset the pc back to its
 307          * original value and turn off the flags.
 308          */
 309         if (rp->r_pc < t->t_dtrace_scrpc ||
 310             rp->r_pc > t->t_dtrace_astpc + isz) {
 311                 t->t_dtrace_ft = 0;
 312         } else if (rp->r_pc == t->t_dtrace_scrpc ||
 313             rp->r_pc == t->t_dtrace_astpc) {
 314                 rp->r_pc = t->t_dtrace_pc;
 315                 t->t_dtrace_ft = 0;
 316         }
 317 }
 318
 319 int
 320 dtrace_safe_defer_signal(void)
 321 {
 322         kthread_t *t = curthread;
 323         struct regs *rp = lwptoregs(ttolwp(t));
 324         size_t isz = t->t_dtrace_npc - t->t_dtrace_pc;
 325
 326         ASSERT(t->t_dtrace_on);
 327
 328         /*
 329          * If we're not in the range of scratch addresses, we're not actually
 330          * tracing user instructions so turn off the flags.
 331          */
 332         if (rp->r_pc < t->t_dtrace_scrpc ||
 333             rp->r_pc > t->t_dtrace_astpc + isz) {
 334                 t->t_dtrace_ft = 0;
 335                 return (0);
 336         }
 337
 338         /*
 339          * If we have executed the original instruction, but we have performed
 340          * neither the jmp back to t->t_dtrace_npc nor the clean up of any
 341          * registers used to emulate %rip-relative instructions in 64-bit mode,
 342          * we'll save ourselves some effort by doing that here and taking the
 343          * signal right away.  We detect this condition by seeing if the program
 344          * counter is the range [scrpc + isz, astpc).
 345          */
 346         if (rp->r_pc >= t->t_dtrace_scrpc + isz &&
 347             rp->r_pc < t->t_dtrace_astpc) {
 348 #ifdef __amd64
 349                 /*
 350                  * If there is a scratch register and we're on the
 351                  * instruction immediately after the modified instruction,
 352                  * restore the value of that scratch register.
 353                  */
 354                 if (t->t_dtrace_reg != 0 &&
 355                     rp->r_pc == t->t_dtrace_scrpc + isz) {
 356                         switch (t->t_dtrace_reg) {
 357                         case REG_RAX:
 358                                 rp->r_rax = t->t_dtrace_regv;
 359                                 break;
 360                         case REG_RCX:
 361                                 rp->r_rcx = t->t_dtrace_regv;
 362                                 break;
 363                         case REG_R8:
 364                                 rp->r_r8 = t->t_dtrace_regv;
 365                                 break;
 366                         case REG_R9:
 367                                 rp->r_r9 = t->t_dtrace_regv;
 368                                 break;
 369                         }
 370                 }
 371 #endif
 372                 rp->r_pc = t->t_dtrace_npc;
 373                 t->t_dtrace_ft = 0;
 374                 return (0);
 375         }
 376
 377         /*
 378          * Otherwise, make sure we'll return to the kernel after executing
 379          * the copied out instruction and defer the signal.
 380          */
 381         if (!t->t_dtrace_step) {
 382                 ASSERT(rp->r_pc < t->t_dtrace_astpc);
 383                 rp->r_pc += t->t_dtrace_astpc - t->t_dtrace_scrpc;
 384                 t->t_dtrace_step = 1;
 385         }
 386
 387         t->t_dtrace_ast = 1;
 388
 389         return (1);
 390 }
 391
 392 /*
 393  * Additional artificial frames for the machine type. For i86pc, we're already
 394  * accounted for, so return 0. On the hypervisor, we have an additional frame
 395  * (xen_callback_handler).
 396  */
 397 int
 398 dtrace_mach_aframes(void)
 399 {
 400         return (0);
 401 }