Initial asan cleanups
[official-gcc.git] / gcc / asan.c
blob9655b118bff1644a39ccbe44f785da890a95c65d
1 /* AddressSanitizer, a fast memory error detector.
2 Copyright (C) 2012 Free Software Foundation, Inc.
3 Contributed by Kostya Serebryany <kcc@google.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "basic-block.h"
29 #include "flags.h"
30 #include "function.h"
31 #include "tree-inline.h"
32 #include "gimple.h"
33 #include "tree-iterator.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "tree-pass.h"
37 #include "diagnostic.h"
38 #include "demangle.h"
39 #include "langhooks.h"
40 #include "ggc.h"
41 #include "cgraph.h"
42 #include "gimple.h"
43 #include "asan.h"
44 #include "gimple-pretty-print.h"
45 #include "target.h"
48 AddressSanitizer finds out-of-bounds and use-after-free bugs
49 with <2x slowdown on average.
51 The tool consists of two parts:
52 instrumentation module (this file) and a run-time library.
53 The instrumentation module adds a run-time check before every memory insn.
54 For a 8- or 16- byte load accessing address X:
55 ShadowAddr = (X >> 3) + Offset
56 ShadowValue = *(char*)ShadowAddr; // *(short*) for 16-byte access.
57 if (ShadowValue)
58 __asan_report_load8(X);
59 For a load of N bytes (N=1, 2 or 4) from address X:
60 ShadowAddr = (X >> 3) + Offset
61 ShadowValue = *(char*)ShadowAddr;
62 if (ShadowValue)
63 if ((X & 7) + N - 1 > ShadowValue)
64 __asan_report_loadN(X);
65 Stores are instrumented similarly, but using __asan_report_storeN functions.
66 A call too __asan_init() is inserted to the list of module CTORs.
68 The run-time library redefines malloc (so that redzone are inserted around
69 the allocated memory) and free (so that reuse of free-ed memory is delayed),
70 provides __asan_report* and __asan_init functions.
72 Read more:
73 http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm
75 Future work:
76 The current implementation supports only detection of out-of-bounds and
77 use-after-free bugs in heap.
78 In order to support out-of-bounds for stack and globals we will need
79 to create redzones for stack and global object and poison them.
82 /* Construct a function tree for __asan_report_{load,store}{1,2,4,8,16}.
83 IS_STORE is either 1 (for a store) or 0 (for a load).
84 SIZE_IN_BYTES is one of 1, 2, 4, 8, 16. */
86 static tree
87 report_error_func (int is_store, int size_in_bytes)
89 tree fn_type;
90 tree def;
91 char name[100];
93 sprintf (name, "__asan_report_%s%d\n",
94 is_store ? "store" : "load", size_in_bytes);
95 fn_type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
96 def = build_fn_decl (name, fn_type);
97 TREE_NOTHROW (def) = 1;
98 TREE_THIS_VOLATILE (def) = 1; /* Attribute noreturn. Surprise! */
99 DECL_ATTRIBUTES (def) = tree_cons (get_identifier ("leaf"),
100 NULL, DECL_ATTRIBUTES (def));
101 DECL_ASSEMBLER_NAME (def);
102 return def;
105 /* Construct a function tree for __asan_init(). */
107 static tree
108 asan_init_func (void)
110 tree fn_type;
111 tree def;
113 fn_type = build_function_type_list (void_type_node, NULL_TREE);
114 def = build_fn_decl ("__asan_init", fn_type);
115 TREE_NOTHROW (def) = 1;
116 DECL_ASSEMBLER_NAME (def);
117 return def;
121 /* Instrument the memory access instruction BASE.
122 Insert new statements before ITER.
123 LOCATION is source code location.
124 IS_STORE is either 1 (for a store) or 0 (for a load).
125 SIZE_IN_BYTES is one of 1, 2, 4, 8, 16. */
127 static void
128 build_check_stmt (tree base,
129 gimple_stmt_iterator *iter,
130 location_t location, int is_store, int size_in_bytes)
132 gimple_stmt_iterator gsi;
133 basic_block cond_bb, then_bb, join_bb;
134 edge e;
135 tree cond, t, u;
136 tree base_addr;
137 tree shadow_value;
138 gimple g;
139 gimple_seq seq, stmts;
140 tree shadow_type = size_in_bytes == 16 ?
141 short_integer_type_node : char_type_node;
142 tree shadow_ptr_type = build_pointer_type (shadow_type);
143 tree uintptr_type = lang_hooks.types.type_for_mode (ptr_mode,
144 /*unsignedp=*/true);
146 /* We first need to split the current basic block, and start altering
147 the CFG. This allows us to insert the statements we're about to
148 construct into the right basic blocks. */
150 cond_bb = gimple_bb (gsi_stmt (*iter));
151 gsi = *iter;
152 gsi_prev (&gsi);
153 if (!gsi_end_p (gsi))
154 e = split_block (cond_bb, gsi_stmt (gsi));
155 else
156 e = split_block_after_labels (cond_bb);
157 cond_bb = e->src;
158 join_bb = e->dest;
160 /* A recap at this point: join_bb is the basic block at whose head
161 is the gimple statement for which this check expression is being
162 built. cond_bb is the (possibly new, synthetic) basic block the
163 end of which will contain the cache-lookup code, and a
164 conditional that jumps to the cache-miss code or, much more
165 likely, over to join_bb. */
167 /* Create the bb that contains the crash block. */
168 then_bb = create_empty_bb (cond_bb);
169 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
170 make_single_succ_edge (then_bb, join_bb, EDGE_FALLTHRU);
172 /* Mark the pseudo-fallthrough edge from cond_bb to join_bb. */
173 e = find_edge (cond_bb, join_bb);
174 e->flags = EDGE_FALSE_VALUE;
175 e->count = cond_bb->count;
176 e->probability = REG_BR_PROB_BASE;
178 /* Update dominance info. Note that bb_join's data was
179 updated by split_block. */
180 if (dom_info_available_p (CDI_DOMINATORS))
182 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
183 set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
186 base_addr = create_tmp_reg (uintptr_type, "__asan_base_addr");
188 seq = NULL;
189 t = fold_convert_loc (location, uintptr_type,
190 unshare_expr (base));
191 t = force_gimple_operand (t, &stmts, false, NULL_TREE);
192 gimple_seq_add_seq (&seq, stmts);
193 g = gimple_build_assign (base_addr, t);
194 gimple_set_location (g, location);
195 gimple_seq_add_stmt (&seq, g);
197 /* Build
198 (base_addr >> ASAN_SHADOW_SHIFT) | targetm.asan_shadow_offset (). */
200 t = build2 (RSHIFT_EXPR, uintptr_type, base_addr,
201 build_int_cst (uintptr_type, ASAN_SHADOW_SHIFT));
202 t = build2 (PLUS_EXPR, uintptr_type, t,
203 build_int_cst (uintptr_type, targetm.asan_shadow_offset ()));
204 t = build1 (INDIRECT_REF, shadow_type,
205 build1 (VIEW_CONVERT_EXPR, shadow_ptr_type, t));
206 t = force_gimple_operand (t, &stmts, false, NULL_TREE);
207 gimple_seq_add_seq (&seq, stmts);
208 shadow_value = create_tmp_reg (shadow_type, "__asan_shadow");
209 g = gimple_build_assign (shadow_value, t);
210 gimple_set_location (g, location);
211 gimple_seq_add_stmt (&seq, g);
212 t = build2 (NE_EXPR, boolean_type_node, shadow_value,
213 build_int_cst (shadow_type, 0));
214 if (size_in_bytes < 8)
217 /* Slow path for 1-, 2- and 4- byte accesses.
218 Build ((base_addr & 7) + (size_in_bytes - 1)) >= shadow_value. */
220 u = build2 (BIT_AND_EXPR, uintptr_type,
221 base_addr,
222 build_int_cst (uintptr_type, 7));
223 u = build1 (CONVERT_EXPR, shadow_type, u);
224 u = build2 (PLUS_EXPR, shadow_type, u,
225 build_int_cst (shadow_type, size_in_bytes - 1));
226 u = build2 (GE_EXPR, uintptr_type, u, shadow_value);
228 else
229 u = build_int_cst (boolean_type_node, 1);
230 t = build2 (TRUTH_AND_EXPR, boolean_type_node, t, u);
231 t = force_gimple_operand (t, &stmts, false, NULL_TREE);
232 gimple_seq_add_seq (&seq, stmts);
233 cond = create_tmp_reg (boolean_type_node, "__asan_crash_cond");
234 g = gimple_build_assign (cond, t);
235 gimple_set_location (g, location);
236 gimple_seq_add_stmt (&seq, g);
237 g = gimple_build_cond (NE_EXPR, cond, boolean_false_node, NULL_TREE,
238 NULL_TREE);
239 gimple_set_location (g, location);
240 gimple_seq_add_stmt (&seq, g);
242 /* Generate call to the run-time library (e.g. __asan_report_load8). */
244 gsi = gsi_last_bb (cond_bb);
245 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
246 seq = NULL;
247 g = gimple_build_call (report_error_func (is_store, size_in_bytes),
248 1, base_addr);
249 gimple_seq_add_stmt (&seq, g);
251 /* Insert the check code in the THEN block. */
253 gsi = gsi_start_bb (then_bb);
254 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
256 *iter = gsi_start_bb (join_bb);
259 /* If T represents a memory access, add instrumentation code before ITER.
260 LOCATION is source code location.
261 IS_STORE is either 1 (for a store) or 0 (for a load). */
263 static void
264 instrument_derefs (gimple_stmt_iterator *iter, tree t,
265 location_t location, int is_store)
267 tree type, base;
268 int size_in_bytes;
270 type = TREE_TYPE (t);
271 if (type == error_mark_node)
272 return;
273 switch (TREE_CODE (t))
275 case ARRAY_REF:
276 case COMPONENT_REF:
277 case INDIRECT_REF:
278 case MEM_REF:
279 break;
280 default:
281 return;
283 size_in_bytes = tree_low_cst (TYPE_SIZE (type), 0) / BITS_PER_UNIT;
284 if (size_in_bytes != 1 && size_in_bytes != 2 &&
285 size_in_bytes != 4 && size_in_bytes != 8 && size_in_bytes != 16)
286 return;
288 /* For now just avoid instrumenting bit field acceses.
289 Fixing it is doable, but expected to be messy. */
291 HOST_WIDE_INT bitsize, bitpos;
292 tree offset;
293 enum machine_mode mode;
294 int volatilep = 0, unsignedp = 0;
295 get_inner_reference (t, &bitsize, &bitpos, &offset,
296 &mode, &unsignedp, &volatilep, false);
297 if (bitpos != 0 || bitsize != size_in_bytes * BITS_PER_UNIT)
298 return;
301 base = build_addr (t, current_function_decl);
302 build_check_stmt (base, iter, location, is_store, size_in_bytes);
305 /* asan: this looks too complex. Can this be done simpler? */
306 /* Transform
307 1) Memory references.
308 2) BUILTIN_ALLOCA calls.
311 static void
312 transform_statements (void)
314 basic_block bb;
315 gimple_stmt_iterator i;
316 int saved_last_basic_block = last_basic_block;
317 enum gimple_rhs_class grhs_class;
319 FOR_EACH_BB (bb)
321 if (bb->index >= saved_last_basic_block) continue;
322 for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
324 gimple s = gsi_stmt (i);
325 if (gimple_code (s) != GIMPLE_ASSIGN)
326 continue;
327 instrument_derefs (&i, gimple_assign_lhs (s),
328 gimple_location (s), 1);
329 instrument_derefs (&i, gimple_assign_rhs1 (s),
330 gimple_location (s), 0);
331 grhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (s));
332 if (grhs_class == GIMPLE_BINARY_RHS)
333 instrument_derefs (&i, gimple_assign_rhs2 (s),
334 gimple_location (s), 0);
339 /* Module-level instrumentation.
340 - Insert __asan_init() into the list of CTORs.
341 - TODO: insert redzones around globals.
344 void
345 asan_finish_file (void)
347 tree ctor_statements = NULL_TREE;
348 append_to_statement_list (build_call_expr (asan_init_func (), 0),
349 &ctor_statements);
350 cgraph_build_static_cdtor ('I', ctor_statements,
351 MAX_RESERVED_INIT_PRIORITY - 1);
354 /* Instrument the current function. */
356 static unsigned int
357 asan_instrument (void)
359 struct gimplify_ctx gctx;
360 push_gimplify_context (&gctx);
361 transform_statements ();
362 pop_gimplify_context (NULL);
363 return 0;
366 static bool
367 gate_asan (void)
369 return flag_asan != 0;
372 struct gimple_opt_pass pass_asan =
375 GIMPLE_PASS,
376 "asan", /* name */
377 OPTGROUP_NONE, /* optinfo_flags */
378 gate_asan, /* gate */
379 asan_instrument, /* execute */
380 NULL, /* sub */
381 NULL, /* next */
382 0, /* static_pass_number */
383 TV_NONE, /* tv_id */
384 PROP_ssa | PROP_cfg | PROP_gimple_leh,/* properties_required */
385 0, /* properties_provided */
386 0, /* properties_destroyed */
387 0, /* todo_flags_start */
388 TODO_verify_flow | TODO_verify_stmts
389 | TODO_update_ssa /* todo_flags_finish */