1 /* UndefinedBehaviorSanitizer, undefined behavior detector.
2 Copyright (C) 2013 Free Software Foundation, Inc.
3 Contributed by Marek Polacek <polacek@redhat.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "stor-layout.h"
26 #include "stringpool.h"
28 #include "tree-pass.h"
30 #include "gimple-iterator.h"
31 #include "gimple-ssa.h"
32 #include "gimple-walk.h"
34 #include "pointer-set.h"
40 #include "c-family/c-common.h"
42 /* From trans-mem.c. */
43 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1)
45 /* Map from a tree to a VAR_DECL tree. */
47 struct GTY(()) tree_type_map
{
48 struct tree_map_base type
;
52 #define tree_type_map_eq tree_map_base_eq
53 #define tree_type_map_marked_p tree_map_base_marked_p
55 /* Hash from a tree in a tree_type_map. */
58 tree_type_map_hash (const void *item
)
60 return TYPE_UID (((const struct tree_type_map
*)item
)->type
.from
);
63 static GTY ((if_marked ("tree_type_map_marked_p"), param_is (struct tree_type_map
)))
64 htab_t decl_tree_for_type
;
66 /* Lookup a VAR_DECL for TYPE, and return it if we find one. */
69 decl_for_type_lookup (tree type
)
71 /* If the hash table is not initialized yet, create it now. */
72 if (decl_tree_for_type
== NULL
)
74 decl_tree_for_type
= htab_create_ggc (10, tree_type_map_hash
,
76 /* That also means we don't have to bother with the lookup. */
80 struct tree_type_map
*h
, in
;
83 h
= (struct tree_type_map
*)
84 htab_find_with_hash (decl_tree_for_type
, &in
, TYPE_UID (type
));
85 return h
? h
->decl
: NULL_TREE
;
88 /* Insert a mapping TYPE->DECL in the VAR_DECL for type hashtable. */
91 decl_for_type_insert (tree type
, tree decl
)
93 struct tree_type_map
*h
;
96 h
= ggc_alloc_tree_type_map ();
99 slot
= htab_find_slot_with_hash (decl_tree_for_type
, h
, TYPE_UID (type
),
101 *(struct tree_type_map
**) slot
= h
;
104 /* Helper routine, which encodes a value in the pointer_sized_int_node.
105 Arguments with precision <= POINTER_SIZE are passed directly,
106 the rest is passed by reference. T is a value we are to encode. */
109 ubsan_encode_value (tree t
)
111 tree type
= TREE_TYPE (t
);
112 switch (TREE_CODE (type
))
115 if (TYPE_PRECISION (type
) <= POINTER_SIZE
)
116 return fold_build1 (NOP_EXPR
, pointer_sized_int_node
, t
);
118 return build_fold_addr_expr (t
);
121 unsigned int bitsize
= GET_MODE_BITSIZE (TYPE_MODE (type
));
122 if (bitsize
<= POINTER_SIZE
)
124 tree itype
= build_nonstandard_integer_type (bitsize
, true);
125 t
= fold_build1 (VIEW_CONVERT_EXPR
, itype
, t
);
126 return fold_convert (pointer_sized_int_node
, t
);
130 if (!TREE_ADDRESSABLE (t
))
132 /* The reason for this is that we don't want to pessimize
133 code by making vars unnecessarily addressable. */
134 tree var
= create_tmp_var (TREE_TYPE (t
), NULL
);
135 tree tem
= build2 (MODIFY_EXPR
, void_type_node
, var
, t
);
136 t
= build_fold_addr_expr (var
);
137 return build2 (COMPOUND_EXPR
, TREE_TYPE (t
), tem
, t
);
140 return build_fold_addr_expr (t
);
149 struct __ubsan_type_descriptor
151 unsigned short __typekind;
152 unsigned short __typeinfo;
158 ubsan_type_descriptor_type (void)
160 static const char *field_names
[3]
161 = { "__typekind", "__typeinfo", "__typename" };
163 tree itype
= build_range_type (sizetype
, size_zero_node
, NULL_TREE
);
164 tree flex_arr_type
= build_array_type (char_type_node
, itype
);
166 ret
= make_node (RECORD_TYPE
);
167 for (int i
= 0; i
< 3; i
++)
169 fields
[i
] = build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
170 get_identifier (field_names
[i
]),
171 (i
== 2) ? flex_arr_type
172 : short_unsigned_type_node
);
173 DECL_CONTEXT (fields
[i
]) = ret
;
175 DECL_CHAIN (fields
[i
- 1]) = fields
[i
];
177 TYPE_FIELDS (ret
) = fields
[0];
178 TYPE_NAME (ret
) = get_identifier ("__ubsan_type_descriptor");
184 struct __ubsan_source_location
186 const char *__filename;
188 unsigned int __column;
193 ubsan_source_location_type (void)
195 static const char *field_names
[3]
196 = { "__filename", "__line", "__column" };
198 tree const_char_type
= build_qualified_type (char_type_node
,
201 ret
= make_node (RECORD_TYPE
);
202 for (int i
= 0; i
< 3; i
++)
204 fields
[i
] = build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
205 get_identifier (field_names
[i
]),
206 (i
== 0) ? build_pointer_type (const_char_type
)
207 : unsigned_type_node
);
208 DECL_CONTEXT (fields
[i
]) = ret
;
210 DECL_CHAIN (fields
[i
- 1]) = fields
[i
];
212 TYPE_FIELDS (ret
) = fields
[0];
213 TYPE_NAME (ret
) = get_identifier ("__ubsan_source_location");
218 /* Helper routine that returns a CONSTRUCTOR of __ubsan_source_location
219 type with its fields filled from a location_t LOC. */
222 ubsan_source_location (location_t loc
)
224 expanded_location xloc
;
225 tree type
= ubsan_source_location_type ();
227 xloc
= expand_location (loc
);
229 /* Fill in the values from LOC. */
230 size_t len
= strlen (xloc
.file
);
231 tree str
= build_string (len
+ 1, xloc
.file
);
232 TREE_TYPE (str
) = build_array_type (char_type_node
,
233 build_index_type (size_int (len
)));
234 TREE_READONLY (str
) = 1;
235 TREE_STATIC (str
) = 1;
236 str
= build_fold_addr_expr_loc (loc
, str
);
237 tree ctor
= build_constructor_va (type
, 3, NULL_TREE
, str
, NULL_TREE
,
238 build_int_cst (unsigned_type_node
,
239 xloc
.line
), NULL_TREE
,
240 build_int_cst (unsigned_type_node
,
242 TREE_CONSTANT (ctor
) = 1;
243 TREE_STATIC (ctor
) = 1;
248 /* This routine returns a magic number for TYPE. */
250 static unsigned short
251 get_ubsan_type_info_for_type (tree type
)
253 gcc_assert (TYPE_SIZE (type
) && tree_fits_uhwi_p (TYPE_SIZE (type
)));
254 int prec
= exact_log2 (tree_to_uhwi (TYPE_SIZE (type
)));
255 gcc_assert (prec
!= -1);
256 return (prec
<< 1) | !TYPE_UNSIGNED (type
);
259 /* Helper routine that returns ADDR_EXPR of a VAR_DECL of a type
260 descriptor. It first looks into the hash table; if not found,
261 create the VAR_DECL, put it into the hash table and return the
262 ADDR_EXPR of it. TYPE describes a particular type. WANT_POINTER_TYPE_P
263 means whether we are interested in the pointer type and not the pointer
267 ubsan_type_descriptor (tree type
, bool want_pointer_type_p
)
269 /* See through any typedefs. */
270 type
= TYPE_MAIN_VARIANT (type
);
272 tree decl
= decl_for_type_lookup (type
);
273 if (decl
!= NULL_TREE
)
276 tree dtype
= ubsan_type_descriptor_type ();
278 const char *tname
= NULL
;
280 unsigned char deref_depth
= 0;
281 unsigned short tkind
, tinfo
;
283 /* Get the name of the type, or the name of the pointer type. */
284 if (want_pointer_type_p
)
286 gcc_assert (POINTER_TYPE_P (type
));
287 type2
= TREE_TYPE (type
);
289 /* Remove any '*' operators from TYPE. */
290 while (POINTER_TYPE_P (type2
))
291 deref_depth
++, type2
= TREE_TYPE (type2
);
293 if (TREE_CODE (type2
) == METHOD_TYPE
)
294 type2
= TYPE_METHOD_BASETYPE (type2
);
297 if (TYPE_NAME (type2
) != NULL
)
299 if (TREE_CODE (TYPE_NAME (type2
)) == IDENTIFIER_NODE
)
300 tname
= IDENTIFIER_POINTER (TYPE_NAME (type2
));
302 tname
= IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type2
)));
306 /* We weren't able to determine the type name. */
309 /* Decorate the type name with '', '*', "struct", or "union". */
310 pretty_name
= (char *) alloca (strlen (tname
) + 16 + deref_depth
);
311 if (want_pointer_type_p
)
313 int pos
= sprintf (pretty_name
, "'%s%s%s%s%s%s%s",
314 TYPE_VOLATILE (type2
) ? "volatile " : "",
315 TYPE_READONLY (type2
) ? "const " : "",
316 TYPE_RESTRICT (type2
) ? "restrict " : "",
317 TYPE_ATOMIC (type2
) ? "_Atomic " : "",
318 TREE_CODE (type2
) == RECORD_TYPE
320 : TREE_CODE (type2
) == UNION_TYPE
321 ? "union " : "", tname
,
322 deref_depth
== 0 ? "" : " ");
323 while (deref_depth
-- > 0)
324 pretty_name
[pos
++] = '*';
325 pretty_name
[pos
++] = '\'';
326 pretty_name
[pos
] = '\0';
329 sprintf (pretty_name
, "'%s'", tname
);
331 switch (TREE_CODE (type
))
343 tinfo
= get_ubsan_type_info_for_type (type
);
345 /* Create a new VAR_DECL of type descriptor. */
347 static unsigned int type_var_id_num
;
348 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "Lubsan_type", type_var_id_num
++);
349 decl
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
, get_identifier (tmp_name
),
351 TREE_STATIC (decl
) = 1;
352 TREE_PUBLIC (decl
) = 0;
353 DECL_ARTIFICIAL (decl
) = 1;
354 DECL_IGNORED_P (decl
) = 1;
355 DECL_EXTERNAL (decl
) = 0;
357 size_t len
= strlen (pretty_name
);
358 tree str
= build_string (len
+ 1, pretty_name
);
359 TREE_TYPE (str
) = build_array_type (char_type_node
,
360 build_index_type (size_int (len
)));
361 TREE_READONLY (str
) = 1;
362 TREE_STATIC (str
) = 1;
363 tree ctor
= build_constructor_va (dtype
, 3, NULL_TREE
,
364 build_int_cst (short_unsigned_type_node
,
366 build_int_cst (short_unsigned_type_node
,
367 tinfo
), NULL_TREE
, str
);
368 TREE_CONSTANT (ctor
) = 1;
369 TREE_STATIC (ctor
) = 1;
370 DECL_INITIAL (decl
) = ctor
;
371 rest_of_decl_compilation (decl
, 1, 0);
373 /* Save the address of the VAR_DECL into the hash table. */
374 decl
= build_fold_addr_expr (decl
);
375 decl_for_type_insert (type
, decl
);
380 /* Create a structure for the ubsan library. NAME is a name of the new
381 structure. The arguments in ... are of __ubsan_type_descriptor type
382 and there are at most two of them. MISMATCH are data used by ubsan
386 ubsan_create_data (const char *name
, location_t loc
,
387 const struct ubsan_mismatch_data
*mismatch
, ...)
392 vec
<tree
, va_gc
> *saved_args
= NULL
;
395 /* Firstly, create a pointer to type descriptor type. */
396 tree td_type
= ubsan_type_descriptor_type ();
397 TYPE_READONLY (td_type
) = 1;
398 td_type
= build_pointer_type (td_type
);
400 /* Create the structure type. */
401 ret
= make_node (RECORD_TYPE
);
402 if (loc
!= UNKNOWN_LOCATION
)
404 fields
[i
] = build_decl (UNKNOWN_LOCATION
, FIELD_DECL
, NULL_TREE
,
405 ubsan_source_location_type ());
406 DECL_CONTEXT (fields
[i
]) = ret
;
410 va_start (args
, mismatch
);
411 for (t
= va_arg (args
, tree
); t
!= NULL_TREE
;
412 i
++, t
= va_arg (args
, tree
))
414 gcc_checking_assert (i
< 3);
415 /* Save the tree arguments for later use. */
416 vec_safe_push (saved_args
, t
);
417 fields
[i
] = build_decl (UNKNOWN_LOCATION
, FIELD_DECL
, NULL_TREE
,
419 DECL_CONTEXT (fields
[i
]) = ret
;
421 DECL_CHAIN (fields
[i
- 1]) = fields
[i
];
425 if (mismatch
!= NULL
)
427 /* We have to add two more decls. */
428 fields
[i
] = build_decl (UNKNOWN_LOCATION
, FIELD_DECL
, NULL_TREE
,
429 pointer_sized_int_node
);
430 DECL_CONTEXT (fields
[i
]) = ret
;
431 DECL_CHAIN (fields
[i
- 1]) = fields
[i
];
434 fields
[i
] = build_decl (UNKNOWN_LOCATION
, FIELD_DECL
, NULL_TREE
,
435 unsigned_char_type_node
);
436 DECL_CONTEXT (fields
[i
]) = ret
;
437 DECL_CHAIN (fields
[i
- 1]) = fields
[i
];
441 TYPE_FIELDS (ret
) = fields
[0];
442 TYPE_NAME (ret
) = get_identifier (name
);
445 /* Now, fill in the type. */
447 static unsigned int ubsan_var_id_num
;
448 ASM_GENERATE_INTERNAL_LABEL (tmp_name
, "Lubsan_data", ubsan_var_id_num
++);
449 tree var
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
, get_identifier (tmp_name
),
451 TREE_STATIC (var
) = 1;
452 TREE_PUBLIC (var
) = 0;
453 DECL_ARTIFICIAL (var
) = 1;
454 DECL_IGNORED_P (var
) = 1;
455 DECL_EXTERNAL (var
) = 0;
457 vec
<constructor_elt
, va_gc
> *v
;
459 tree ctor
= build_constructor (ret
, v
);
461 /* If desirable, set the __ubsan_source_location element. */
462 if (loc
!= UNKNOWN_LOCATION
)
463 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, ubsan_source_location (loc
));
465 size_t nelts
= vec_safe_length (saved_args
);
466 for (i
= 0; i
< nelts
; i
++)
468 t
= (*saved_args
)[i
];
469 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, t
);
472 if (mismatch
!= NULL
)
474 /* Append the pointer data. */
475 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, mismatch
->align
);
476 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, mismatch
->ckind
);
479 TREE_CONSTANT (ctor
) = 1;
480 TREE_STATIC (ctor
) = 1;
481 DECL_INITIAL (var
) = ctor
;
482 rest_of_decl_compilation (var
, 1, 0);
487 /* Instrument the __builtin_unreachable call. We just call the libubsan
491 ubsan_instrument_unreachable (location_t loc
)
493 tree data
= ubsan_create_data ("__ubsan_unreachable_data", loc
, NULL
,
495 tree t
= builtin_decl_explicit (BUILT_IN_UBSAN_HANDLE_BUILTIN_UNREACHABLE
);
496 return build_call_expr_loc (loc
, t
, 1, build_fold_addr_expr_loc (loc
, data
));
499 /* Return true if T is a call to a libubsan routine. */
502 is_ubsan_builtin_p (tree t
)
504 gcc_checking_assert (TREE_CODE (t
) == FUNCTION_DECL
);
505 return strncmp (IDENTIFIER_POINTER (DECL_NAME (t
)),
506 "__builtin___ubsan_", 18) == 0;
509 /* Expand UBSAN_NULL internal call. */
512 ubsan_expand_null_ifn (gimple_stmt_iterator gsi
)
514 gimple stmt
= gsi_stmt (gsi
);
515 location_t loc
= gimple_location (stmt
);
516 gcc_assert (gimple_call_num_args (stmt
) == 2);
517 tree ptr
= gimple_call_arg (stmt
, 0);
518 tree ckind
= gimple_call_arg (stmt
, 1);
520 basic_block cur_bb
= gsi_bb (gsi
);
522 /* Split the original block holding the pointer dereference. */
523 edge e
= split_block (cur_bb
, stmt
);
525 /* Get a hold on the 'condition block', the 'then block' and the
527 basic_block cond_bb
= e
->src
;
528 basic_block fallthru_bb
= e
->dest
;
529 basic_block then_bb
= create_empty_bb (cond_bb
);
532 add_bb_to_loop (then_bb
, cond_bb
->loop_father
);
533 loops_state_set (LOOPS_NEED_FIXUP
);
536 /* Make an edge coming from the 'cond block' into the 'then block';
537 this edge is unlikely taken, so set up the probability accordingly. */
538 e
= make_edge (cond_bb
, then_bb
, EDGE_TRUE_VALUE
);
539 e
->probability
= PROB_VERY_UNLIKELY
;
541 /* Connect 'then block' with the 'else block'. This is needed
542 as the ubsan routines we call in the 'then block' are not noreturn.
543 The 'then block' only has one outcoming edge. */
544 make_single_succ_edge (then_bb
, fallthru_bb
, EDGE_FALLTHRU
);
546 /* Set up the fallthrough basic block. */
547 e
= find_edge (cond_bb
, fallthru_bb
);
548 e
->flags
= EDGE_FALSE_VALUE
;
549 e
->count
= cond_bb
->count
;
550 e
->probability
= REG_BR_PROB_BASE
- PROB_VERY_UNLIKELY
;
552 /* Update dominance info for the newly created then_bb; note that
553 fallthru_bb's dominance info has already been updated by
555 if (dom_info_available_p (CDI_DOMINATORS
))
556 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
558 /* Put the ubsan builtin call into the newly created BB. */
559 tree fn
= builtin_decl_implicit (BUILT_IN_UBSAN_HANDLE_TYPE_MISMATCH
);
560 const struct ubsan_mismatch_data m
561 = { build_zero_cst (pointer_sized_int_node
), ckind
};
562 tree data
= ubsan_create_data ("__ubsan_null_data",
564 ubsan_type_descriptor (TREE_TYPE (ptr
), true),
566 data
= build_fold_addr_expr_loc (loc
, data
);
567 gimple g
= gimple_build_call (fn
, 2, data
,
568 build_zero_cst (pointer_sized_int_node
));
569 gimple_set_location (g
, loc
);
570 gimple_stmt_iterator gsi2
= gsi_start_bb (then_bb
);
571 gsi_insert_after (&gsi2
, g
, GSI_NEW_STMT
);
573 /* Unlink the UBSAN_NULLs vops before replacing it. */
574 unlink_stmt_vdef (stmt
);
576 g
= gimple_build_cond (EQ_EXPR
, ptr
, build_int_cst (TREE_TYPE (ptr
), 0),
577 NULL_TREE
, NULL_TREE
);
578 gimple_set_location (g
, loc
);
580 /* Replace the UBSAN_NULL with a GIMPLE_COND stmt. */
581 gsi_replace (&gsi
, g
, false);
584 /* Instrument a member call. We check whether 'this' is NULL. */
587 instrument_member_call (gimple_stmt_iterator
*iter
)
589 tree this_parm
= gimple_call_arg (gsi_stmt (*iter
), 0);
590 tree kind
= build_int_cst (unsigned_char_type_node
, UBSAN_MEMBER_CALL
);
591 gimple g
= gimple_build_call_internal (IFN_UBSAN_NULL
, 2, this_parm
, kind
);
592 gimple_set_location (g
, gimple_location (gsi_stmt (*iter
)));
593 gsi_insert_before (iter
, g
, GSI_SAME_STMT
);
596 /* Instrument a memory reference. T is the pointer, IS_LHS says
597 whether the pointer is on the left hand side of the assignment. */
600 instrument_mem_ref (tree t
, gimple_stmt_iterator
*iter
, bool is_lhs
)
602 enum ubsan_null_ckind ikind
= is_lhs
? UBSAN_STORE_OF
: UBSAN_LOAD_OF
;
603 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (TREE_TYPE (t
))))
604 ikind
= UBSAN_MEMBER_ACCESS
;
605 tree kind
= build_int_cst (unsigned_char_type_node
, ikind
);
606 gimple g
= gimple_build_call_internal (IFN_UBSAN_NULL
, 2, t
, kind
);
607 gimple_set_location (g
, gimple_location (gsi_stmt (*iter
)));
608 gsi_insert_before (iter
, g
, GSI_SAME_STMT
);
611 /* Callback function for the pointer instrumentation. */
614 instrument_null (tree
*tp
, int * /*walk_subtree*/, void *data
)
617 const enum tree_code code
= TREE_CODE (t
);
618 struct walk_stmt_info
*wi
= (struct walk_stmt_info
*) data
;
621 && TREE_CODE (TREE_OPERAND (t
, 0)) == SSA_NAME
)
622 instrument_mem_ref (TREE_OPERAND (t
, 0), &wi
->gsi
, wi
->is_lhs
);
623 else if (code
== ADDR_EXPR
624 && POINTER_TYPE_P (TREE_TYPE (t
))
625 && TREE_CODE (TREE_TYPE (TREE_TYPE (t
))) == METHOD_TYPE
)
626 instrument_member_call (&wi
->gsi
);
631 /* Gate and execute functions for ubsan pass. */
637 gimple_stmt_iterator gsi
;
641 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
643 struct walk_stmt_info wi
;
644 gimple stmt
= gsi_stmt (gsi
);
645 if (is_gimple_debug (stmt
))
651 memset (&wi
, 0, sizeof (wi
));
653 walk_gimple_op (stmt
, instrument_null
, &wi
);
663 return flag_sanitize
& SANITIZE_NULL
;
668 const pass_data pass_data_ubsan
=
670 GIMPLE_PASS
, /* type */
672 OPTGROUP_NONE
, /* optinfo_flags */
674 true, /* has_execute */
675 TV_TREE_UBSAN
, /* tv_id */
676 ( PROP_cfg
| PROP_ssa
), /* properties_required */
677 0, /* properties_provided */
678 0, /* properties_destroyed */
679 0, /* todo_flags_start */
680 TODO_update_ssa
, /* todo_flags_finish */
683 class pass_ubsan
: public gimple_opt_pass
686 pass_ubsan (gcc::context
*ctxt
)
687 : gimple_opt_pass (pass_data_ubsan
, ctxt
)
690 /* opt_pass methods: */
691 bool gate () { return gate_ubsan (); }
692 unsigned int execute () { return ubsan_pass (); }
694 }; // class pass_ubsan
699 make_pass_ubsan (gcc::context
*ctxt
)
701 return new pass_ubsan (ctxt
);
704 #include "gt-ubsan.h"