2 * kmemcheck - a heavyweight memory checker for the linux kernel
3 * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no>
4 * (With a lot of help from Ingo Molnar and Pekka Enberg.)
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2) as
8 * published by the Free Software Foundation.
11 #include <linux/init.h>
12 #include <linux/interrupt.h>
13 #include <linux/kallsyms.h>
14 #include <linux/kernel.h>
15 #include <linux/kmemcheck.h>
17 #include <linux/module.h>
18 #include <linux/page-flags.h>
19 #include <linux/percpu.h>
20 #include <linux/ptrace.h>
21 #include <linux/string.h>
22 #include <linux/types.h>
24 #include <asm/cacheflush.h>
25 #include <asm/kmemcheck.h>
26 #include <asm/pgtable.h>
27 #include <asm/tlbflush.h>
34 #ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
35 # define KMEMCHECK_ENABLED 0
38 #ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
39 # define KMEMCHECK_ENABLED 1
42 #ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
43 # define KMEMCHECK_ENABLED 2
46 int kmemcheck_enabled
= KMEMCHECK_ENABLED
;
48 int __init
kmemcheck_init(void)
50 printk(KERN_INFO
"kmemcheck: Initialized\n");
54 * Limit SMP to use a single CPU. We rely on the fact that this code
55 * runs before SMP is set up.
57 if (setup_max_cpus
> 1) {
59 "kmemcheck: Limiting number of CPUs to 1.\n");
67 early_initcall(kmemcheck_init
);
69 #ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
70 # define KMEMCHECK_ENABLED 0
73 #ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
74 # define KMEMCHECK_ENABLED 1
77 #ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
78 # define KMEMCHECK_ENABLED 2
81 int kmemcheck_enabled
= KMEMCHECK_ENABLED
;
84 * We need to parse the kmemcheck= option before any memory is allocated.
86 static int __init
param_kmemcheck(char *str
)
91 sscanf(str
, "%d", &kmemcheck_enabled
);
95 early_param("kmemcheck", param_kmemcheck
);
97 int kmemcheck_show_addr(unsigned long address
)
101 pte
= kmemcheck_pte_lookup(address
);
105 set_pte(pte
, __pte(pte_val(*pte
) | _PAGE_PRESENT
));
106 __flush_tlb_one(address
);
110 int kmemcheck_hide_addr(unsigned long address
)
114 pte
= kmemcheck_pte_lookup(address
);
118 set_pte(pte
, __pte(pte_val(*pte
) & ~_PAGE_PRESENT
));
119 __flush_tlb_one(address
);
123 struct kmemcheck_context
{
128 * There can be at most two memory operands to an instruction, but
129 * each address can cross a page boundary -- so we may need up to
130 * four addresses that must be hidden/revealed for each fault.
132 unsigned long addr
[4];
133 unsigned long n_addrs
;
136 /* Data size of the instruction that caused a fault. */
140 static DEFINE_PER_CPU(struct kmemcheck_context
, kmemcheck_context
);
142 bool kmemcheck_active(struct pt_regs
*regs
)
144 struct kmemcheck_context
*data
= &__get_cpu_var(kmemcheck_context
);
146 return data
->balance
> 0;
149 /* Save an address that needs to be shown/hidden */
150 static void kmemcheck_save_addr(unsigned long addr
)
152 struct kmemcheck_context
*data
= &__get_cpu_var(kmemcheck_context
);
154 BUG_ON(data
->n_addrs
>= ARRAY_SIZE(data
->addr
));
155 data
->addr
[data
->n_addrs
++] = addr
;
158 static unsigned int kmemcheck_show_all(void)
160 struct kmemcheck_context
*data
= &__get_cpu_var(kmemcheck_context
);
165 for (i
= 0; i
< data
->n_addrs
; ++i
)
166 n
+= kmemcheck_show_addr(data
->addr
[i
]);
171 static unsigned int kmemcheck_hide_all(void)
173 struct kmemcheck_context
*data
= &__get_cpu_var(kmemcheck_context
);
178 for (i
= 0; i
< data
->n_addrs
; ++i
)
179 n
+= kmemcheck_hide_addr(data
->addr
[i
]);
185 * Called from the #PF handler.
187 void kmemcheck_show(struct pt_regs
*regs
)
189 struct kmemcheck_context
*data
= &__get_cpu_var(kmemcheck_context
);
191 BUG_ON(!irqs_disabled());
193 if (unlikely(data
->balance
!= 0)) {
194 kmemcheck_show_all();
195 kmemcheck_error_save_bug(regs
);
201 * None of the addresses actually belonged to kmemcheck. Note that
202 * this is not an error.
204 if (kmemcheck_show_all() == 0)
210 * The IF needs to be cleared as well, so that the faulting
211 * instruction can run "uninterrupted". Otherwise, we might take
212 * an interrupt and start executing that before we've had a chance
213 * to hide the page again.
215 * NOTE: In the rare case of multiple faults, we must not override
216 * the original flags:
218 if (!(regs
->flags
& X86_EFLAGS_TF
))
219 data
->flags
= regs
->flags
;
221 regs
->flags
|= X86_EFLAGS_TF
;
222 regs
->flags
&= ~X86_EFLAGS_IF
;
226 * Called from the #DB handler.
228 void kmemcheck_hide(struct pt_regs
*regs
)
230 struct kmemcheck_context
*data
= &__get_cpu_var(kmemcheck_context
);
233 BUG_ON(!irqs_disabled());
235 if (data
->balance
== 0)
238 if (unlikely(data
->balance
!= 1)) {
239 kmemcheck_show_all();
240 kmemcheck_error_save_bug(regs
);
244 if (!(data
->flags
& X86_EFLAGS_TF
))
245 regs
->flags
&= ~X86_EFLAGS_TF
;
246 if (data
->flags
& X86_EFLAGS_IF
)
247 regs
->flags
|= X86_EFLAGS_IF
;
251 if (kmemcheck_enabled
)
252 n
= kmemcheck_hide_all();
254 n
= kmemcheck_show_all();
263 if (!(data
->flags
& X86_EFLAGS_TF
))
264 regs
->flags
&= ~X86_EFLAGS_TF
;
265 if (data
->flags
& X86_EFLAGS_IF
)
266 regs
->flags
|= X86_EFLAGS_IF
;
269 void kmemcheck_show_pages(struct page
*p
, unsigned int n
)
273 for (i
= 0; i
< n
; ++i
) {
274 unsigned long address
;
278 address
= (unsigned long) page_address(&p
[i
]);
279 pte
= lookup_address(address
, &level
);
281 BUG_ON(level
!= PG_LEVEL_4K
);
283 set_pte(pte
, __pte(pte_val(*pte
) | _PAGE_PRESENT
));
284 set_pte(pte
, __pte(pte_val(*pte
) & ~_PAGE_HIDDEN
));
285 __flush_tlb_one(address
);
289 bool kmemcheck_page_is_tracked(struct page
*p
)
291 /* This will also check the "hidden" flag of the PTE. */
292 return kmemcheck_pte_lookup((unsigned long) page_address(p
));
295 void kmemcheck_hide_pages(struct page
*p
, unsigned int n
)
299 for (i
= 0; i
< n
; ++i
) {
300 unsigned long address
;
304 address
= (unsigned long) page_address(&p
[i
]);
305 pte
= lookup_address(address
, &level
);
307 BUG_ON(level
!= PG_LEVEL_4K
);
309 set_pte(pte
, __pte(pte_val(*pte
) & ~_PAGE_PRESENT
));
310 set_pte(pte
, __pte(pte_val(*pte
) | _PAGE_HIDDEN
));
311 __flush_tlb_one(address
);
315 /* Access may NOT cross page boundary */
316 static void kmemcheck_read_strict(struct pt_regs
*regs
,
317 unsigned long addr
, unsigned int size
)
320 enum kmemcheck_shadow status
;
322 shadow
= kmemcheck_shadow_lookup(addr
);
326 kmemcheck_save_addr(addr
);
327 status
= kmemcheck_shadow_test(shadow
, size
);
328 if (status
== KMEMCHECK_SHADOW_INITIALIZED
)
331 if (kmemcheck_enabled
)
332 kmemcheck_error_save(status
, addr
, size
, regs
);
334 if (kmemcheck_enabled
== 2)
335 kmemcheck_enabled
= 0;
337 /* Don't warn about it again. */
338 kmemcheck_shadow_set(shadow
, size
);
341 /* Access may cross page boundary */
342 static void kmemcheck_read(struct pt_regs
*regs
,
343 unsigned long addr
, unsigned int size
)
345 unsigned long page
= addr
& PAGE_MASK
;
346 unsigned long next_addr
= addr
+ size
- 1;
347 unsigned long next_page
= next_addr
& PAGE_MASK
;
349 if (likely(page
== next_page
)) {
350 kmemcheck_read_strict(regs
, addr
, size
);
355 * What we do is basically to split the access across the
356 * two pages and handle each part separately. Yes, this means
357 * that we may now see reads that are 3 + 5 bytes, for
358 * example (and if both are uninitialized, there will be two
359 * reports), but it makes the code a lot simpler.
361 kmemcheck_read_strict(regs
, addr
, next_page
- addr
);
362 kmemcheck_read_strict(regs
, next_page
, next_addr
- next_page
);
365 static void kmemcheck_write_strict(struct pt_regs
*regs
,
366 unsigned long addr
, unsigned int size
)
370 shadow
= kmemcheck_shadow_lookup(addr
);
374 kmemcheck_save_addr(addr
);
375 kmemcheck_shadow_set(shadow
, size
);
378 static void kmemcheck_write(struct pt_regs
*regs
,
379 unsigned long addr
, unsigned int size
)
381 unsigned long page
= addr
& PAGE_MASK
;
382 unsigned long next_addr
= addr
+ size
- 1;
383 unsigned long next_page
= next_addr
& PAGE_MASK
;
385 if (likely(page
== next_page
)) {
386 kmemcheck_write_strict(regs
, addr
, size
);
390 /* See comment in kmemcheck_read(). */
391 kmemcheck_write_strict(regs
, addr
, next_page
- addr
);
392 kmemcheck_write_strict(regs
, next_page
, next_addr
- next_page
);
396 * Copying is hard. We have two addresses, each of which may be split across
397 * a page (and each page will have different shadow addresses).
399 static void kmemcheck_copy(struct pt_regs
*regs
,
400 unsigned long src_addr
, unsigned long dst_addr
, unsigned int size
)
403 enum kmemcheck_shadow status
;
406 unsigned long next_addr
;
407 unsigned long next_page
;
413 BUG_ON(size
> sizeof(shadow
));
415 page
= src_addr
& PAGE_MASK
;
416 next_addr
= src_addr
+ size
- 1;
417 next_page
= next_addr
& PAGE_MASK
;
419 if (likely(page
== next_page
)) {
421 x
= kmemcheck_shadow_lookup(src_addr
);
423 kmemcheck_save_addr(src_addr
);
424 for (i
= 0; i
< size
; ++i
)
427 for (i
= 0; i
< size
; ++i
)
428 shadow
[i
] = KMEMCHECK_SHADOW_INITIALIZED
;
431 n
= next_page
- src_addr
;
432 BUG_ON(n
> sizeof(shadow
));
435 x
= kmemcheck_shadow_lookup(src_addr
);
437 kmemcheck_save_addr(src_addr
);
438 for (i
= 0; i
< n
; ++i
)
442 for (i
= 0; i
< n
; ++i
)
443 shadow
[i
] = KMEMCHECK_SHADOW_INITIALIZED
;
447 x
= kmemcheck_shadow_lookup(next_page
);
449 kmemcheck_save_addr(next_page
);
450 for (i
= n
; i
< size
; ++i
)
451 shadow
[i
] = x
[i
- n
];
454 for (i
= n
; i
< size
; ++i
)
455 shadow
[i
] = KMEMCHECK_SHADOW_INITIALIZED
;
459 page
= dst_addr
& PAGE_MASK
;
460 next_addr
= dst_addr
+ size
- 1;
461 next_page
= next_addr
& PAGE_MASK
;
463 if (likely(page
== next_page
)) {
465 x
= kmemcheck_shadow_lookup(dst_addr
);
467 kmemcheck_save_addr(dst_addr
);
468 for (i
= 0; i
< size
; ++i
) {
470 shadow
[i
] = KMEMCHECK_SHADOW_INITIALIZED
;
474 n
= next_page
- dst_addr
;
475 BUG_ON(n
> sizeof(shadow
));
478 x
= kmemcheck_shadow_lookup(dst_addr
);
480 kmemcheck_save_addr(dst_addr
);
481 for (i
= 0; i
< n
; ++i
) {
483 shadow
[i
] = KMEMCHECK_SHADOW_INITIALIZED
;
488 x
= kmemcheck_shadow_lookup(next_page
);
490 kmemcheck_save_addr(next_page
);
491 for (i
= n
; i
< size
; ++i
) {
492 x
[i
- n
] = shadow
[i
];
493 shadow
[i
] = KMEMCHECK_SHADOW_INITIALIZED
;
498 status
= kmemcheck_shadow_test(shadow
, size
);
499 if (status
== KMEMCHECK_SHADOW_INITIALIZED
)
502 if (kmemcheck_enabled
)
503 kmemcheck_error_save(status
, src_addr
, size
, regs
);
505 if (kmemcheck_enabled
== 2)
506 kmemcheck_enabled
= 0;
509 enum kmemcheck_method
{
514 static void kmemcheck_access(struct pt_regs
*regs
,
515 unsigned long fallback_address
, enum kmemcheck_method fallback_method
)
518 const uint8_t *insn_primary
;
521 struct kmemcheck_context
*data
= &__get_cpu_var(kmemcheck_context
);
523 /* Recursive fault -- ouch. */
525 kmemcheck_show_addr(fallback_address
);
526 kmemcheck_error_save_bug(regs
);
532 insn
= (const uint8_t *) regs
->ip
;
533 insn_primary
= kmemcheck_opcode_get_primary(insn
);
535 kmemcheck_opcode_decode(insn
, &size
);
537 switch (insn_primary
[0]) {
538 #ifdef CONFIG_KMEMCHECK_BITOPS_OK
541 * Unfortunately, these instructions have to be excluded from
542 * our regular checking since they access only some (and not
543 * all) bits. This clears out "bogus" bitfield-access warnings.
549 switch ((insn_primary
[1] >> 3) & 7) {
556 kmemcheck_write(regs
, fallback_address
, size
);
574 /* MOVS, MOVSB, MOVSW, MOVSD */
578 * These instructions are special because they take two
579 * addresses, but we only get one page fault.
581 kmemcheck_copy(regs
, regs
->si
, regs
->di
, size
);
584 /* CMPS, CMPSB, CMPSW, CMPSD */
587 kmemcheck_read(regs
, regs
->si
, size
);
588 kmemcheck_read(regs
, regs
->di
, size
);
593 * If the opcode isn't special in any way, we use the data from the
594 * page fault handler to determine the address and type of memory
597 switch (fallback_method
) {
599 kmemcheck_read(regs
, fallback_address
, size
);
601 case KMEMCHECK_WRITE
:
602 kmemcheck_write(regs
, fallback_address
, size
);
610 bool kmemcheck_fault(struct pt_regs
*regs
, unsigned long address
,
611 unsigned long error_code
)
617 * XXX: Is it safe to assume that memory accesses from virtual 86
618 * mode or non-kernel code segments will _never_ access kernel
619 * memory (e.g. tracked pages)? For now, we need this to avoid
620 * invoking kmemcheck for PnP BIOS calls.
622 if (regs
->flags
& X86_VM_MASK
)
624 if (regs
->cs
!= __KERNEL_CS
)
627 pte
= lookup_address(address
, &level
);
630 if (level
!= PG_LEVEL_4K
)
632 if (!pte_hidden(*pte
))
636 kmemcheck_access(regs
, address
, KMEMCHECK_WRITE
);
638 kmemcheck_access(regs
, address
, KMEMCHECK_READ
);
640 kmemcheck_show(regs
);
644 bool kmemcheck_trap(struct pt_regs
*regs
)
646 if (!kmemcheck_active(regs
))
650 kmemcheck_hide(regs
);