Merge from trunk: 215733-215743
[official-gcc.git] / gcc-4_6_3-mobile / gcc / mversn-dispatch.c
blobdc9c1a2cf306795a8dd58f6489d2031420ae1832
1 /* Mulitversion Dispatch Pass.
2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Sriraman Tallam (tmsriram@google.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 /* This pass processes __builtin_dispatch calls to call multi-versioned
23 functions. Only two versions are supported now. Example use :
25 int popcnt_sse4(unsigned int x) __attribute__((__target__("popcnt")));
26 int popcnt_sse4(unsigned int x)
28 int count = __builtin_popcount(x);
29 return count;
32 int popcnt(unsigned int x) __attribute__((__target__("no-popcnt")));
33 int popcnt(unsigned int x)
35 int count = __builtin_popcount(x);
36 return count;
39 int testsse() __attribute__((version_selector));
40 int main ()
42 ...
43 ret = __builtin_dispatch (testsse, (void*)popcnt_sse4, (void*)popcnt, 25);
44 ...
47 There are two passes that are run to achieve multi-versioning.
48 "pass_ipa_multiversion_dispatch" is an ipa pass that decides which functions
49 have to be cloned and hoists the feature-test calls appropriately. This
50 pass can be enabled with the flag "-fclone-hot-version-paths" and disabled
51 with "-fno-clone-hot-version-paths".
53 "pass_tree_convert_builtin_dispatch" does the lowering. It is a
54 function-level pass. Functions marked with attribute "version_selector" are
55 also handled by this pass. This pass is always on.
57 How to use __builtin_dispatch ?
58 -----------------------------
60 __builtin_dispatch takes 3 mandatory arguments :
62 __builtin_dispatch (arg1, arg2, arg3, <arg4>, <arg5>, ...);
64 arg1 is the pointer to the feature-test function.
65 arg2 is the ( void *) cast pointer to the versioned function that is
66 executed when the feature test returns 1.
67 arg3 is the ( void *) cast pointer to the versioned function that is
68 executed when the feature test returns 0.
69 arg4, arg5, ... are optional. They are the arguments to the versioned
70 functions. Both versions must accept the same number of arguments.
71 The __builtin_dispatch function returns the value returned by the
72 versioned function that gets executed. The versioned function arg2
73 is executed when the feature_test function arg1 returns 1 and arg3
74 is executed when the feature_test function arg1 returns 0. arg1
75 could be marked as a "version_selector" function if it is a pure
76 function with no side-effects, returns a constant at run-time and
77 can be evaluated at any point in the execution.
79 When to use the "version_selector" attribute ?
80 -----------------------------------------------
82 Functions are marked with attribute "version_selector" only if
83 they are run-time constants. Example of such functions would
84 be those that test if a specific feature is available on a
85 particular architecture. Such functions must return a positive
86 integer. For two-way functions, those that test if a feature
87 is present or not must return 1 or 0 respectively.
90 The code is organized into five parts. The first part has the functionality
91 to detect and handle functions marked with attribute "version_selector". The
92 second part is the analysis phase where we find calls to __builtin_dispatch
93 and mark all functions that are hot and have a call-graph path to a
94 __builtin_dispatch call. The third part decides which functions
95 to clone. This is based on the number of clones that have to be created for
96 the functions marked in the analysis phase. Only two clones are allowed for
97 a function currently. The fourth part is where the actual cloning happens.
98 The fifth part contains the implementation to lower the __builtin_dispatch
99 calls.
101 Flags : -fclone-hot-version-paths does function unswitching via cloning.
102 --param=num-mversn-clones=<num> allows to specify the number of
103 functions that should be cloned.
104 --param=mversn-clone-depth=<num> allows to specify the length of
105 the call graph path that should be cloned. num = 0 implies only
106 leaf node that contains the __builtin_dispatch statement must be
107 cloned. */
109 #include "config.h"
110 #include "system.h"
111 #include "coretypes.h"
112 #include "tm.h"
113 #include "tree.h"
114 #include "tree-inline.h"
115 #include "langhooks.h"
116 #include "flags.h"
117 #include "cgraph.h"
118 #include "diagnostic.h"
119 #include "toplev.h"
120 #include "timevar.h"
121 #include "params.h"
122 #include "fibheap.h"
123 #include "intl.h"
124 #include "tree-pass.h"
125 #include "hashtab.h"
126 #include "coverage.h"
127 #include "ggc.h"
128 #include "tree-flow.h"
129 #include "rtl.h"
130 #include "ipa-prop.h"
131 #include "basic-block.h"
132 #include "toplev.h"
133 #include "dbgcnt.h"
134 #include "tree-dump.h"
135 #include "output.h"
136 #include "vecprim.h"
137 #include "gimple-pretty-print.h"
138 #include "target.h"
139 #include "cfgloop.h"
141 typedef struct cgraph_node* NODEPTR;
142 DEF_VEC_P (NODEPTR);
143 DEF_VEC_ALLOC_P (NODEPTR, heap);
145 /* Store the decl of __builtin_dispatch */
146 static tree builtin_function_decl = NULL;
148 /* Hash to map name to a decl. Used for variables and functions. */
149 static htab_t name_decl_htab = NULL;
151 /* Hashtable helpers for name_decl_htab. */
153 static hashval_t
154 name_decl_htab_hash_descriptor (const void *p)
156 const_tree t = (const_tree) p;
157 const char *name
158 = (IDENTIFIER_POINTER (DECL_NAME (t)));
159 return htab_hash_string(name);
162 /* Hashtable helper for name_decl_htab. */
164 static int
165 name_decl_htab_eq_descriptor (const void *p1, const void *p2)
167 const_tree t1 = (const_tree) p1;
168 const char *c1 = IDENTIFIER_POINTER (DECL_NAME (t1));
169 const char *c2 = (const char *)p2;
171 return (strcmp (c1, c2) == 0);
174 /* Return true if NODE is a hot function. It is a hot function
175 if its execution frequency is determined to be hot or
176 if any of its incoming or outgoing call-graph edges is hot. */
178 static bool
179 hot_function_p (struct cgraph_node *node)
181 struct cgraph_edge *edge;
183 if (node->frequency == NODE_FREQUENCY_HOT)
184 return true;
186 for (edge = node->callees; edge; edge = edge->next_callee)
187 if (cgraph_maybe_hot_edge_p (edge))
188 return true;
190 for (edge = node->callers; edge; edge = edge->next_caller)
191 if (cgraph_maybe_hot_edge_p (edge))
192 return true;
194 return false;
197 /* Return the number of arguments that a function has. */
199 static int
200 function_args_count (tree fntype)
202 function_args_iterator args_iter;
203 tree t;
204 int num = 0;
206 if (fntype)
208 FOREACH_FUNCTION_ARGS(fntype, t, args_iter)
210 num++;
214 return num;
217 /* Return a new name by appending SUFFIX to the DECL name. */
219 static char *
220 make_name (tree decl, const char *suffix)
222 char *global_var_name;
223 int name_len;
224 const char *name;
226 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
228 name_len = strlen (name) + strlen (suffix) + 2;
229 global_var_name = (char *) xmalloc (name_len);
230 /* Use '.' to concatenate names as it is demangler friendly. */
231 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
232 return global_var_name;
235 /* Code for handling version_selector attribute functions. Such functions are
236 run-time constants and need to be executed only once. They are hoisted
237 to a static constructor and their result is stored in a global.
241 /* This function returns the global variable / constructor name created
242 for feature-test functions marked with attribute "version_selector".
243 The name returned is the DECL name appended with
244 "version_selector_global" for the variable and
245 "version_selector_constructor" for the constructor. */
247 static char*
248 make_feature_test_global_name (tree decl, bool is_constructor)
250 if (is_constructor)
251 return make_name (decl, "version.selector.constructor");
253 return make_name (decl, "version.selector.global");
256 /* This function creates a new VAR_DECL with attributes set
257 using the parameters. PUBLIK corresponds to TREE_PUBLIC,
258 EXTERNAL corresponds to DECL_EXTERNAL and comdat is
259 for DECL_ONE_ONLY. The global variable will have the
260 same status as the version_selector function.*/
262 static tree
263 allocate_new_var (const char *name, int publik,
264 int external, int comdat)
266 tree new_global_var;
267 struct varpool_node *vnode;
269 new_global_var = build_decl (UNKNOWN_LOCATION,
270 VAR_DECL,
271 get_identifier (name),
272 integer_type_node);
274 DECL_EXTERNAL (new_global_var) = external;
275 TREE_STATIC (new_global_var) = 1;
276 TREE_PUBLIC (new_global_var) = publik;
277 DECL_INITIAL (new_global_var) = 0;
278 DECL_ARTIFICIAL (new_global_var) = 1;
279 DECL_PRESERVE_P (new_global_var) = 1;
281 if (comdat)
282 make_decl_one_only (new_global_var, DECL_ASSEMBLER_NAME (new_global_var));
283 assemble_variable (new_global_var, 0, 0, 0);
285 vnode = varpool_node (new_global_var);
286 gcc_assert (vnode != NULL);
287 /* Set finalized to 1, otherwise it asserts in function "write_symbol" in
288 lto-streamer-out.c. */
289 vnode->finalized = 1;
291 return new_global_var;
294 /* Make a new constructor function here to call a feature-test function
295 and set its body to CONSTRUCTOR_BODY. Its public and comdat
296 attributes are set from the parameters, PUBLIK, and COMDAT.
297 VERSION_SELECTOR_VAR is the global decl that saves the result of the
298 feature-test function in the constructor. */
300 static tree
301 make_constructor_function (char *name, gimple constructor_body, int publik,
302 int comdat, tree version_selector_var)
304 tree decl, type, t;
305 gimple_seq seq;
306 basic_block new_bb;
307 tree old_current_function_decl;
309 type = build_function_type_list (void_type_node, NULL_TREE);
311 if (dump_file)
312 fprintf (dump_file, "Name of new constructor function = %s\n", name);
314 decl = build_fn_decl (name, type);
316 DECL_NAME (decl) = get_identifier (name);
317 SET_DECL_ASSEMBLER_NAME (decl, DECL_NAME (decl));
318 gcc_assert (cgraph_node (decl) != NULL);
320 TREE_USED (decl) = 1;
321 DECL_ARTIFICIAL (decl) = 1;
322 DECL_IGNORED_P (decl) = 0;
323 TREE_PUBLIC (decl) = publik;
324 DECL_UNINLINABLE (decl) = 1;
325 DECL_EXTERNAL (decl) = 0;
326 DECL_CONTEXT (decl) = NULL_TREE;
327 DECL_INITIAL (decl) = make_node (BLOCK);
328 DECL_STATIC_CONSTRUCTOR (decl) = 1;
329 TREE_READONLY (decl) = 0;
330 DECL_PURE_P (decl) = 0;
332 if (comdat)
333 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
335 /* Build result decl and add to function_decl. */
336 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node);
337 DECL_ARTIFICIAL (t) = 1;
338 DECL_IGNORED_P (t) = 1;
339 DECL_RESULT (decl) = t;
341 gimplify_function_tree (decl);
343 /* Build CFG for this function. */
345 old_current_function_decl = current_function_decl;
346 push_cfun (DECL_STRUCT_FUNCTION (decl));
347 current_function_decl = decl;
348 init_empty_tree_cfg_for_function (DECL_STRUCT_FUNCTION (decl));
349 cfun->curr_properties |=
350 (PROP_gimple_lcf | PROP_gimple_leh | PROP_cfg | PROP_referenced_vars |
351 PROP_ssa);
352 new_bb = create_empty_bb (ENTRY_BLOCK_PTR);
353 make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU);
355 /* XXX: Not sure if the edge commented below is necessary. If I add this
356 edge, it fails in gimple_verify_flow_info in tree-cfg.c in condition :
357 " if (e->flags & EDGE_FALLTHRU)"
358 during -fprofile-generate.
359 Otherwise, it is fine. Deleting this edge does not break anything.
360 Commenting this so that it is clear I am intentionally not doing this.*/
361 /* make_edge (new_bb, EXIT_BLOCK_PTR, EDGE_FALLTHRU); */
363 seq = gimple_seq_alloc_with_stmt (constructor_body);
365 set_bb_seq (new_bb, seq);
366 gimple_set_bb (constructor_body, new_bb);
368 /* Set the lexical block of the constructor body. Fails the inliner
369 other wise. */
370 gimple_set_block (constructor_body, DECL_INITIAL (decl));
372 /* This call is very important if this pass runs when the IR is in
373 SSA form. It breaks things in strange ways otherwise. */
374 init_tree_ssa (DECL_STRUCT_FUNCTION (decl));
375 add_referenced_var (version_selector_var);
377 cgraph_add_new_function (decl, true);
378 cgraph_call_function_insertion_hooks (cgraph_node (decl));
379 cgraph_mark_needed_node (cgraph_node (decl));
381 if (dump_file)
382 dump_function_to_file (decl, dump_file, TDF_BLOCKS);
384 pop_cfun ();
385 current_function_decl = old_current_function_decl;
386 return decl;
389 /* If the current function is marked with attribute
390 "version_selector" then it is the predicate (feature-test) function
391 for multi-versioning. Call this function in a constructor and assign
392 the return value to a global variable.
393 The constructor's name is the decl name suffixed
394 "version_selector_constructor" and the global variable's name is the
395 decl name suffixed with "version_selector_global"
397 For example, feature-test function isSSE4 marked with attribute
398 version_selector is converted to
400 void isSSE4_version_selector_constructor ()
402 isSSE4_version_selector_global = isSSE4 ();
405 This function returns the decl of the global variable.
407 THIS_DECL is the function decl of the "version_selector" function.
410 static tree
411 handle_version_selector_attr_function (tree this_decl)
413 char *global_var_name;
414 tree version_selector_var = NULL;
415 void **slot;
417 gcc_assert (!flag_lto);
419 if (dump_file)
420 fprintf (dump_file, "Creating constructor/global for function %s\n",
421 IDENTIFIER_POINTER (DECL_NAME (this_decl)));
423 global_var_name = make_feature_test_global_name (this_decl,
424 false);
426 slot = htab_find_slot_with_hash (name_decl_htab, global_var_name,
427 htab_hash_string (global_var_name),
428 INSERT);
429 if (*slot == NULL)
431 if (dump_file)
432 fprintf (dump_file, "Creating global variable %s\n",
433 global_var_name);
434 *slot = allocate_new_var (global_var_name,
435 TREE_PUBLIC (this_decl),
436 DECL_EXTERNAL (this_decl),
437 DECL_ONE_ONLY (this_decl));
439 else
441 free (global_var_name);
442 return (tree) *slot;
445 version_selector_var = (tree) *slot;
447 /* If the feature-test function is not external, create a constructor and
448 call this function in the constructor. */
450 if (!DECL_EXTERNAL (this_decl))
452 char *constructor_name;
453 gimple constructor_body;
454 tree constructor_decl;
456 constructor_name
457 = make_feature_test_global_name (this_decl, true);
459 constructor_body = gimple_build_call (this_decl, 0);
461 gimple_call_set_lhs (constructor_body, version_selector_var);
463 if (dump_file)
464 print_gimple_stmt (dump_file, constructor_body, 0, TDF_VOPS);
466 constructor_decl =
467 make_constructor_function (constructor_name, constructor_body,
468 TREE_PUBLIC (this_decl),
469 DECL_ONE_ONLY (this_decl),
470 version_selector_var);
472 gcc_assert (constructor_decl != NULL_TREE);
473 free (constructor_name);
476 free (global_var_name);
477 return version_selector_var;
480 /* Start Analysis phase. Mark all functions that are hot and have a call-graph
481 path to a __builtin_dispatch call. */
483 /* This function returns the address of the feature test function.
484 If the address of the function is saved to a temporary,
485 this function traverses the gimple statements before BUILTIN_STMT
486 and finds an assignment whose rhs is the feature test function.
487 If the feature test function is specified as a function pointer
488 whose function value is unknown, this funcition returns NULL. */
490 static tree
491 find_version_selector_func_addr (gimple builtin_stmt)
493 tree cond_func_addr = NULL;
494 gimple def_stmt = NULL;
496 cond_func_addr = gimple_call_arg (builtin_stmt, 0);
498 gcc_assert (TREE_CODE (cond_func_addr) == ADDR_EXPR
499 || TREE_CODE (cond_func_addr) == SSA_NAME);
501 if (TREE_CODE (cond_func_addr) == ADDR_EXPR)
502 return cond_func_addr;
504 /* TREE_CODE (cond_func_addr) == SSA_NAME
505 This means a new function pointer variable is created and assigned the
506 address of the feature-test function. Traverse the statements backwards
507 and find the assignment to get the RHS. */
509 def_stmt = SSA_NAME_DEF_STMT (cond_func_addr);
511 gcc_assert (def_stmt
512 && gimple_assign_lhs (def_stmt) == cond_func_addr);
514 cond_func_addr = gimple_assign_rhs1 (def_stmt);
516 /* If the cond_func_addr is still not an ADDR_EXPR, it means that the
517 feature-test function is specified as a pointer. In this case, we
518 return NULL, since the feature-test function decl is not known. */
520 if (cond_func_addr == NULL
521 || TREE_CODE (cond_func_addr) != ADDR_EXPR)
522 return NULL;
524 /* If the operand of the ADDR_EXPR is not a function_decl, return NULL
525 as this still means the feature-test function is specified as a
526 function pointer. */
528 if (TREE_CODE (TREE_OPERAND (cond_func_addr, 0)) != FUNCTION_DECL)
529 return NULL;
531 return cond_func_addr;
534 /* Finds the gimple calls to __builtin_dispatch in function pointed
535 to by the call graph NODE and populates the vector VEC. Returns
536 true if at least one statement was found where the feature test
537 function is marked as "version_selector". Otherwise, there is no
538 question of hoisting it. */
540 static bool
541 is_builtin_dispatch_stmt_present (struct cgraph_node *node,
542 VEC (tree,heap) **vec)
544 struct cgraph_edge *edge;
545 bool present = false;
547 gcc_assert (!flag_lto);
549 for (edge = node->callees; edge; edge = edge->next_callee)
551 if (edge->callee->decl == builtin_function_decl)
553 tree cond_func_decl;
554 tree cond_func_addr;
555 gcc_assert (*vec != NULL);
556 cond_func_addr = find_version_selector_func_addr (edge->call_stmt);
558 if (cond_func_addr == NULL)
559 continue;
561 cond_func_decl = TREE_OPERAND (cond_func_addr, 0);
563 /* Do not consider for hoisting if "version_selector" attribute is
564 not set. */
565 if (lookup_attribute ("version_selector",
566 DECL_ATTRIBUTES (cond_func_decl)) == NULL)
568 if (dump_file)
570 fprintf (dump_file, "Not hoisting builtin_dispatch as "
571 "feature_test function not version_selector :\n");
572 print_gimple_stmt (dump_file, edge->call_stmt, 0, TDF_VOPS);
574 continue;
577 present = true;
578 VEC_safe_push (tree, heap, *vec, cond_func_decl);
581 return present;
584 /* Updates the list of feature-test function decls reaching the cgraph
585 function NODE. */
587 static void
588 update_reachable_decls_list (struct cgraph_node *node,
589 VEC (tree, heap) *predicate_decls)
591 VEC (tree, heap) **decl_list = NULL;
592 tree cond_func_decl;
593 int ix;
595 if (node->aux == NULL)
597 decl_list = (VEC (tree, heap) **) xmalloc (sizeof (VEC (tree, heap) *));
598 *decl_list = VEC_alloc (tree, heap, 1);
599 node->aux = decl_list;
601 else
602 decl_list = (VEC (tree, heap) **) node->aux;
604 for (ix = 0; VEC_iterate (tree, predicate_decls, ix, cond_func_decl); ++ix)
605 VEC_safe_push (tree, heap, *decl_list, cond_func_decl);
608 /* Propagate the __builtin_dispatch stmt (s) called from node to its
609 callers, PREDICATE_DECLS is the decls list of the predicate functions. */
611 static unsigned int
612 mark_reachable_functions (struct cgraph_node *this_node,
613 VEC (tree, heap) *predicate_decls)
615 VEC (NODEPTR, heap) *work_list;
616 VEC (int, heap) *depth_list;
617 struct cgraph_edge *e;
618 htab_t node_htab = NULL;
619 void **slot = NULL;
621 /* Use a work-list style algorithm to mark functions in any call-graph
622 path to the current function. */
624 work_list = VEC_alloc (NODEPTR, heap, 8);
625 depth_list = VEC_alloc (int, heap, 8);
627 VEC_safe_push (NODEPTR, heap, work_list, this_node);
628 VEC_safe_push (int, heap, depth_list, 0);
630 node_htab = htab_create (10, htab_hash_pointer,
631 htab_eq_pointer, NULL);
633 slot = htab_find_slot (node_htab, this_node, INSERT);
635 gcc_assert (*slot == NULL);
636 *slot = this_node;
638 while (!VEC_empty (NODEPTR, work_list))
640 struct cgraph_node *node = VEC_pop (NODEPTR, work_list);
641 int depth = VEC_pop (int, depth_list);
643 if (dump_file)
644 fprintf (dump_file, "%s has a depth = %d callgraph path to %s\n",
645 cgraph_node_name (node), depth,
646 cgraph_node_name (this_node));
648 update_reachable_decls_list (node, predicate_decls);
650 gcc_assert (node->aux != NULL);
652 if (depth >= PARAM_VALUE (PARAM_MVERSN_CLONE_CGRAPH_DEPTH))
654 if (dump_file)
655 fprintf (dump_file, "Not propogating __builtin_dispatch... "
656 "maximum cloning depth = %d reached\n",
657 PARAM_VALUE (PARAM_MVERSN_CLONE_CGRAPH_DEPTH));
658 continue;
661 for (e = node->callers; e; e = e->next_caller)
663 slot = htab_find_slot (node_htab, e->caller, INSERT);
664 if (*slot != NULL)
665 continue;
666 *slot = e->caller;
667 if (!hot_function_p (e->caller))
668 continue;
670 VEC_safe_push (NODEPTR, heap, work_list, e->caller);
671 VEC_safe_push (int, heap, depth_list, (depth + 1));
675 htab_delete (node_htab);
676 VEC_free (NODEPTR, heap, work_list);
677 VEC_free (int, heap, depth_list);
678 return 0;
681 /* Scan the call graph and detect hot functions that have __builtin_dispatch
682 calls. Then, propogate this information to its callers. Returns true if
683 a suitable __builtin_dispatch was found. */
685 static bool
686 perform_analysis_phase (void)
688 struct cgraph_node *node;
689 VEC(tree, heap) *builtin_predicates_vec = NULL;
690 bool flag = false;
692 builtin_predicates_vec = VEC_alloc (tree, heap, 1);
694 for (node = cgraph_nodes; node; node = node->next)
696 /* if the body of this decl is from outside, do nothing. */
697 if (DECL_EXTERNAL (node->decl))
698 continue;
700 if (!hot_function_p (node))
701 continue;
703 if (!is_builtin_dispatch_stmt_present (node, &builtin_predicates_vec))
704 continue;
706 if (dump_file)
708 fprintf (dump_file, "%s calls __builtin_dispatch atleast once.\n",
709 cgraph_node_name (node));
711 fprintf (dump_file, "%s is a hot function, consider cloning ...\n",
712 cgraph_node_name (node));
715 flag = true;
716 mark_reachable_functions (node, builtin_predicates_vec);
717 VEC_truncate (tree, builtin_predicates_vec, 0);
720 VEC_free (tree, heap, builtin_predicates_vec);
721 return flag;
724 /* End Analysis phase. */
726 /* Decide Cloning Phase.
728 In this phase, we go through each function and decide if it should be
729 cloned or not. */
731 /* This function counts the number of unique decls in the DECL_LIST.*/
733 static int
734 count_predicate_functions (VEC (tree,heap) *decl_list)
736 int ix;
737 int count = 0;
738 tree cond_func_decl = NULL;
739 htab_t dup_decl_htab = NULL;
741 if (VEC_length (tree, decl_list) == 1)
742 return 1;
744 dup_decl_htab = htab_create (2, htab_hash_pointer, htab_eq_pointer, NULL);
746 for (ix = 0; VEC_iterate (tree, decl_list, ix, cond_func_decl); ++ix)
748 void **slot = NULL;
749 slot = htab_find_slot (dup_decl_htab, cond_func_decl, INSERT);
751 if (*slot != NULL)
752 continue;
753 count++;
754 *slot = cond_func_decl;
757 htab_delete (dup_decl_htab);
758 return count;
761 /* This function decides which functions to clone based on the number of
762 feature_test decls reaching it. Currently, only one feature_test decl
763 is allowed. */
765 static bool
766 decide_cloning_phase (void)
768 struct cgraph_node *node;
769 int count;
770 bool run_cloning_phase = false;
771 int num_funcs_cloned = 0;
773 for (node = cgraph_nodes; node; node = node->next)
775 tree cond_func_decl = NULL;
776 VEC (tree, heap) *vec;
777 if (node->aux == NULL)
778 continue;
780 if (num_funcs_cloned >= PARAM_VALUE (PARAM_NUMBER_OF_MVERSN_CLONES))
782 if (dump_file)
783 fprintf (dump_file, "Reached cloning limit specified "
784 "by \"num-mversn-clones\" for %s\n",
785 cgraph_node_name (node));
787 free (node->aux);
788 node->aux = NULL;
789 continue;
792 vec = *(VEC (tree,heap) **) node->aux;
793 count = count_predicate_functions (vec);
794 gcc_assert (count >= 1);
795 cond_func_decl = VEC_index (tree, vec, 0);
796 gcc_assert (cond_func_decl != NULL);
797 VEC_free (tree, heap, vec);
798 free (node->aux);
799 node->aux = NULL;
801 if (count > 1)
803 if (dump_file)
804 fprintf (dump_file, "%s has %d predicates, Not cloning for > 1\n",
805 cgraph_node_name (node), count);
806 continue;
808 /* Set the node's aux value to be that of the predicate decl. */
809 node->aux = cond_func_decl;
810 run_cloning_phase = true;
811 num_funcs_cloned++;
813 return run_cloning_phase;
816 /* End Decide Cloning Phase. */
818 /* Cloning Phase. */
820 /* Deletes all basic-blocks and leaves function with :
821 ENTRY_BLOCK ---> (new empty basic block) ---> EXIT_BLOCK
824 static basic_block
825 empty_function_body (tree fndecl)
827 basic_block bb, new_bb;
828 edge e;
829 tree old_current_function_decl;
831 old_current_function_decl = current_function_decl;
832 push_cfun (DECL_STRUCT_FUNCTION (fndecl));
833 current_function_decl = fndecl;
835 clear_edges ();
836 for (bb = ENTRY_BLOCK_PTR; bb != NULL;)
838 basic_block bb_next;
839 bb_next = bb->next_bb;
840 if (bb != EXIT_BLOCK_PTR
841 && bb != ENTRY_BLOCK_PTR)
843 if (bb_seq (bb) != NULL)
845 gimple_stmt_iterator i;
846 for (i = gsi_start_bb (bb); !gsi_end_p (i);)
848 gimple stmt = gsi_stmt (i);
849 unlink_stmt_vdef (stmt);
850 gsi_remove (&i, true);
851 release_defs (stmt);
854 bb->il.gimple = NULL;
855 bb->prev_bb = NULL;
856 bb->next_bb = NULL;
857 SET_BASIC_BLOCK (bb->index, NULL);
858 n_basic_blocks--;
860 bb = bb_next;
862 ENTRY_BLOCK_PTR->next_bb = EXIT_BLOCK_PTR;
863 new_bb = create_empty_bb (ENTRY_BLOCK_PTR);
864 e = make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU);
865 gcc_assert (e != NULL);
866 /* XXX:Is this edge necessary ? */
867 e = make_edge (new_bb, EXIT_BLOCK_PTR, 0);
868 gcc_assert (e != NULL);
870 if (dump_file)
871 dump_function_to_file (current_function_decl, dump_file, TDF_BLOCKS);
873 current_function_decl = old_current_function_decl;
874 pop_cfun ();
875 return new_bb;
878 /* Takes function with decl ORIG_FNDECL and clones it. The
879 name of the clone is the original name suffixed with
880 NAME_SUFFIX. Code is adapted from cgraph_function_versioning
881 in cgraphunit.c */
883 static tree
884 clone_function (tree orig_fndecl, const char *name_suffix)
886 tree new_decl;
887 char *new_name;
888 struct cgraph_node *new_version;
889 struct cgraph_node *old_version;
890 void **slot;
891 tree old_current_function_decl;
893 new_name = make_name (orig_fndecl, name_suffix);
894 new_decl = copy_node (orig_fndecl);
897 slot = htab_find_slot_with_hash (name_decl_htab, new_name,
898 htab_hash_string (new_name), INSERT);
900 gcc_assert (*slot == NULL);
901 *slot = new_decl;
903 /* Code adapted from cgraph_function_versioning in cgraphuinit.c */
905 new_version = cgraph_node (new_decl);
906 old_version = cgraph_node (orig_fndecl);
908 new_version->local = old_version->local;
909 new_version->global = old_version->global;
910 new_version->rtl = old_version->rtl;
911 new_version->reachable = true;
912 new_version->count = old_version->count;
914 /* Set the name of the new function. */
915 DECL_NAME (new_decl) = get_identifier (new_name);
916 SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
917 SET_DECL_RTL (new_decl, NULL);
919 tree_function_versioning (orig_fndecl, new_decl, NULL /*tree_map*/,
920 false, NULL /*args_to_skip*/,
921 NULL /* blocks_to_copy */ ,
922 NULL /* new_entry */);
925 old_current_function_decl = current_function_decl;
926 push_cfun (DECL_STRUCT_FUNCTION (new_decl));
927 current_function_decl = new_decl;
929 TREE_READONLY (new_decl) = TREE_READONLY (orig_fndecl);
930 TREE_STATIC (new_decl) = TREE_STATIC (orig_fndecl);
931 TREE_USED (new_decl) = TREE_USED (orig_fndecl);
932 DECL_ARTIFICIAL (new_decl) = 1;
933 DECL_IGNORED_P (new_decl) = 0;
934 TREE_PUBLIC (new_decl) = TREE_PUBLIC (orig_fndecl);
935 DECL_CONTEXT (new_decl) = DECL_CONTEXT (orig_fndecl);
937 DECL_EXTERNAL (new_decl) = DECL_EXTERNAL (orig_fndecl);
938 DECL_COMDAT (new_decl) = DECL_COMDAT (orig_fndecl);
939 DECL_COMDAT_GROUP (new_decl) = DECL_COMDAT_GROUP (orig_fndecl);
940 DECL_VIRTUAL_P (new_decl) = DECL_VIRTUAL_P (orig_fndecl);
941 DECL_WEAK (new_decl) = DECL_WEAK (orig_fndecl);
943 /* Always inline the clones. Why are we cloning otherwise? */
944 DECL_DECLARED_INLINE_P (new_decl) = 1;
945 DECL_UNINLINABLE (new_decl) = 0;
946 new_version->local.externally_visible
947 = old_version->local.externally_visible;
948 new_version->local.local
949 = old_version->local.local;
951 new_version->analyzed = true;
952 new_version->lowered = true;
954 if (dump_file)
955 dump_function_to_file (new_decl, dump_file, TDF_BLOCKS);
957 cgraph_add_new_function (new_decl, true);
959 cgraph_call_function_insertion_hooks (new_version);
960 cgraph_mark_needed_node (new_version);
963 free_dominance_info (CDI_DOMINATORS);
964 free_dominance_info (CDI_POST_DOMINATORS);
965 calculate_dominance_info (CDI_DOMINATORS);
966 calculate_dominance_info (CDI_POST_DOMINATORS);
968 pop_cfun ();
969 current_function_decl = old_current_function_decl;
971 return new_decl;
974 /* This function populates the vector *VEC with the args in the gimple
975 call statement STMT. SKIP_ARGS is the number of args to skip.*/
977 static void
978 get_function_args (gimple stmt, int num_args, VEC (tree, heap) **vec,
979 int skip_args)
981 int i;
983 if (num_args == 0) return;
985 *vec = VEC_alloc (tree, heap, num_args);
986 /* The number of args in a function is 1 plus the actual number of
987 args. Also, there are 3 special args reserved, so the first arg
988 starts from 3. */
989 for (i = 0; i <= num_args - 2; ++i)
990 VEC_quick_push (tree, *vec, gimple_call_arg (stmt, (skip_args + i)));
993 /* Given ret = __builtin_dispatch (pred, fn1, fn2, arg1, ....)
994 get ret = fn1 (arg1, ...) or ret = fn2 (arg1, ....)
995 depending on the value of SIDE == 0 or 1. */
997 static gimple
998 make_specialized_call_from_builtin (gimple builtin_stmt, int side)
1000 tree func_addr;
1001 int num_func_args = 0;
1002 VEC (tree, heap) *nargs = NULL;
1003 tree lhs_stmt;
1004 gimple specialized_call_stmt;
1006 if (side == 0)
1007 func_addr = gimple_call_arg (builtin_stmt, 1);
1008 else
1009 func_addr = gimple_call_arg (builtin_stmt, 2);
1011 num_func_args
1012 = function_args_count (TREE_TYPE (TREE_OPERAND (func_addr, 0)));
1014 get_function_args (builtin_stmt, num_func_args, &nargs, 3);
1016 specialized_call_stmt = gimple_build_call_vec (func_addr, nargs);
1018 lhs_stmt = gimple_call_lhs (builtin_stmt);
1020 if (lhs_stmt != NULL_TREE)
1021 gimple_call_set_lhs (specialized_call_stmt, lhs_stmt);
1023 if (nargs != NULL)
1024 VEC_free (tree, heap, nargs);
1026 return specialized_call_stmt;
1029 /* Given a call (GENERIC_STMT) to a function that is cloned, substitute
1030 with a call to the correct clone. */
1032 static gimple
1033 make_specialized_call_to_clone (gimple generic_stmt, int side)
1035 tree new_decl;
1036 char *new_name;
1037 tree generic_fndecl;
1038 gimple specialized_call_stmt;
1039 void **slot;
1040 int num_func_args;
1041 tree lhs_stmt;
1042 VEC (tree, heap) *nargs= NULL;
1044 generic_fndecl = gimple_call_fndecl (generic_stmt);
1045 gcc_assert (generic_fndecl != NULL);
1047 if (side == 0)
1048 new_name = make_name (generic_fndecl, "clone.0");
1049 else
1050 new_name = make_name (generic_fndecl, "clone.1");
1052 slot = htab_find_slot_with_hash (name_decl_htab, new_name,
1053 htab_hash_string (new_name), NO_INSERT);
1054 gcc_assert (slot != NULL);
1055 new_decl = (tree) *slot;
1056 gcc_assert (new_decl);
1058 num_func_args = function_args_count (TREE_TYPE (generic_fndecl));
1059 get_function_args (generic_stmt, num_func_args, &nargs, 0);
1060 specialized_call_stmt = gimple_build_call_vec (new_decl, nargs);
1062 lhs_stmt = gimple_call_lhs (generic_stmt);
1064 if (lhs_stmt != NULL_TREE)
1065 gimple_call_set_lhs (specialized_call_stmt, lhs_stmt);
1067 if (nargs != NULL)
1068 VEC_free (tree, heap, nargs);
1070 return specialized_call_stmt;
1073 /* Returns true if STMT is a call to __builtin_dispatch and its
1074 predicate feature-test function is marked with attribute
1075 "version_selector". */
1077 static bool
1078 is_builtin_with_predicate_version_selector (gimple stmt)
1080 tree cond_func_addr, cond_func_decl;
1082 gcc_assert (!flag_lto);
1084 if (gimple_call_fndecl (stmt) != builtin_function_decl)
1085 return false;
1087 cond_func_addr = find_version_selector_func_addr (stmt);
1089 if (cond_func_addr == NULL)
1090 return false;
1092 cond_func_decl = TREE_OPERAND (cond_func_addr, 0);
1093 if (lookup_attribute ("version_selector",
1094 DECL_ATTRIBUTES (cond_func_decl)) != NULL)
1095 return true;
1097 return false;
1100 /* Find calls to __builtin_dispatch or to functions that are versioned
1101 in CLONE_DECL and substitute the call with the correct version based
1102 on the value of SIDE. */
1104 static void
1105 specialize_call (tree clone_decl, int side)
1107 basic_block bb;
1108 tree old_current_function_decl;
1110 old_current_function_decl = current_function_decl;
1111 push_cfun (DECL_STRUCT_FUNCTION (clone_decl));
1112 current_function_decl = clone_decl;
1114 /* Iterate over call edges and find out if there is
1115 a call to __builtin_dispatch or a cloned function.
1116 We cannot iterate over call graph edges as there are
1117 no edges for the clones yet. */
1119 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (clone_decl))
1121 gimple_stmt_iterator gsi;
1122 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1124 tree callee_decl;
1125 struct cgraph_node *callee_node;
1126 gimple specialized_call_stmt;
1127 gimple stmt = gsi_stmt (gsi);
1129 if (!is_gimple_call (stmt))
1130 continue;
1132 callee_decl = gimple_call_fndecl (stmt);
1134 if (callee_decl == NULL)
1135 continue;
1137 callee_node = cgraph_node (callee_decl);
1139 /* For a __builtin_dispatch stmt, only specialize if
1140 version_selector attribute is set. Otherwise, it is
1141 not hoisted, so no specialization. */
1143 if (is_builtin_with_predicate_version_selector (stmt))
1145 specialized_call_stmt =
1146 make_specialized_call_from_builtin (stmt, side);
1148 else if (callee_node->aux != NULL)
1150 specialized_call_stmt =
1151 make_specialized_call_to_clone (stmt, side);
1153 else
1154 continue;
1156 if (dump_file)
1158 fprintf (dump_file, "Specialize stmt : \n");
1159 print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS);
1160 fprintf (dump_file, "Specialized stmt : \n");
1161 print_gimple_stmt (dump_file, specialized_call_stmt,
1162 0, TDF_VOPS);
1165 gimple_set_block (specialized_call_stmt, gimple_block (stmt));
1166 gsi_insert_before_without_update (&gsi, specialized_call_stmt,
1167 GSI_SAME_STMT);
1170 unlink_stmt_vdef (stmt);
1171 gsi_remove (&gsi, true);
1172 mark_symbols_for_renaming (specialized_call_stmt);
1174 /* After removing make sure gsi is set correctly to not skip
1175 a statememt. */
1176 gsi = gsi_for_stmt (specialized_call_stmt);
1179 current_function_decl = old_current_function_decl;
1180 pop_cfun ();
1183 /* When a function is version cloned, its body is replaced to call one
1184 of the versions with the feature-test function acting as a predicate.
1185 This is done with __builtin_dispatch which is later expanded. */
1187 static gimple
1188 make_builtin_call_to_clones (tree orig_fndecl, tree clone_0_addr,
1189 tree clone_1_addr, tree cond_func_addr)
1191 gimple new_builtin_call;
1192 VEC(tree, heap) *vargs = VEC_alloc (tree, heap, 4);
1193 tree arg;
1195 VEC_quick_push (tree, vargs, cond_func_addr);
1196 VEC_quick_push (tree, vargs, clone_0_addr);
1197 VEC_quick_push (tree, vargs, clone_1_addr);
1199 for (arg = DECL_ARGUMENTS (orig_fndecl); arg; arg = TREE_CHAIN (arg))
1201 VEC_safe_push (tree, heap, vargs, arg);
1202 /* Again, this add_referenced_var is very very important. It broke
1203 a build where a cloned function's arguments where never
1204 referenced. Missing this statement in places asserts at
1205 tree-dfa.c:589, in function referenced_var_lookup at
1206 "gcc_assert (h || uid == 0);" and is very difficult to triage. */
1207 add_referenced_var (arg);
1210 new_builtin_call = gimple_build_call_vec (builtin_function_decl, vargs);
1211 mark_symbols_for_renaming (new_builtin_call);
1214 if (dump_file)
1215 print_gimple_stmt (dump_file, new_builtin_call, 0, TDF_VOPS);
1217 VEC_free (tree, heap, vargs);
1219 return new_builtin_call;
1222 /* This clones a dispatch function whose callee-graph path has a function
1223 which calls __builtin_dispatch. This function is cloned and the
1224 original function branches to the right clone. */
1226 static int
1227 clone_and_dispatch_function (struct cgraph_node *orig_node, tree *clone_0,
1228 tree *clone_1)
1230 tree clone_0_decl, clone_1_decl;
1231 gimple new_builtin_call = NULL;
1232 gimple new_return_stmt = NULL;
1233 gimple_seq seq = NULL;
1234 basic_block new_bb;
1235 tree orig_fndecl;
1236 tree return_var = NULL;
1237 tree return_type;
1238 tree old_current_function_decl;
1240 old_current_function_decl = current_function_decl;
1241 orig_fndecl = orig_node->decl;
1242 push_cfun (DECL_STRUCT_FUNCTION (orig_fndecl));
1243 current_function_decl = orig_fndecl;
1245 /* Make 2 clones for true and false function. */
1246 clone_0_decl = clone_function (orig_fndecl, "clone.0");
1247 clone_1_decl = clone_function (orig_fndecl, "clone.1");
1248 *clone_0 = clone_0_decl;
1249 *clone_1 = clone_1_decl;
1251 new_bb = empty_function_body (orig_fndecl);
1253 new_builtin_call = make_builtin_call_to_clones (
1254 orig_fndecl,
1255 build_fold_addr_expr (clone_0_decl),
1256 build_fold_addr_expr (clone_1_decl),
1257 build_fold_addr_expr ((tree)orig_node->aux));
1259 return_type = TREE_TYPE (TREE_TYPE (orig_fndecl));
1261 if (!TREE_ADDRESSABLE (return_type) && COMPLETE_TYPE_P (return_type))
1263 tree tmp_var;
1264 tmp_var = create_tmp_var (return_type, NULL);
1265 add_referenced_var (tmp_var);
1266 return_var = make_ssa_name (tmp_var, new_builtin_call);
1267 gimple_call_set_lhs (new_builtin_call, return_var);
1270 mark_symbols_for_renaming (new_builtin_call);
1271 new_return_stmt = gimple_build_return (return_var);
1272 mark_symbols_for_renaming (new_return_stmt);
1273 gimple_seq_add_stmt (&seq, new_builtin_call);
1274 gimple_seq_add_stmt (&seq, new_return_stmt);
1275 set_bb_seq (new_bb, seq);
1276 gimple_set_bb (new_builtin_call, new_bb);
1277 gimple_set_bb (new_return_stmt, new_bb);
1279 gimple_set_block (new_builtin_call, DECL_INITIAL (orig_fndecl));
1280 gimple_set_block (new_return_stmt, DECL_INITIAL (orig_fndecl));
1282 if (dump_file)
1283 dump_function_to_file (orig_fndecl, dump_file, TDF_BLOCKS);
1285 /* This update_ssa is necessary here for the following reason. SSA uses
1286 a global syms_to_rename bitmap that stores syms that must be renamed.
1287 So, if we accumulate the syms from one function in IPA but move to
1288 a different function without updating SSA, then we could be
1289 accumulating syms from many functions. This would assert in
1290 referenced_var_lookup because the hashtab storing the syms is
1291 function local. This is horrible. gcc-4.6 makes this bitmap a
1292 global. */
1293 update_ssa (TODO_update_ssa);
1295 compute_inline_parameters (cgraph_node (orig_fndecl));
1296 DECL_DECLARED_INLINE_P (orig_fndecl) = 1;
1297 DECL_UNINLINABLE (orig_fndecl) = 0;
1298 current_function_decl = old_current_function_decl;
1299 pop_cfun ();
1300 return 0;
1303 /* Clone all functions marked for cloning by the earlier phase. */
1305 static void
1306 perform_cloning_phase (void)
1308 struct cgraph_node *node;
1309 int ix;
1310 VEC (tree, heap) *cloned_decl_list = NULL;
1311 tree cloned_decl = NULL;
1313 cloned_decl_list = VEC_alloc (tree, heap, 2);
1315 /* First clone, then specialize the clones. */
1316 for (node = cgraph_nodes; node; node = node->next)
1318 tree clone_0_decl, clone_1_decl;
1319 if (node->aux == NULL)
1320 continue;
1321 if (dump_file)
1323 fprintf (dump_file, "%s will be cloned\n", cgraph_node_name (node));
1324 dump_function_to_file (node->decl, dump_file, TDF_BLOCKS);
1326 clone_and_dispatch_function (node, &clone_0_decl, &clone_1_decl);
1327 VEC_safe_push (tree, heap, cloned_decl_list, clone_0_decl);
1328 VEC_safe_push (tree, heap, cloned_decl_list, clone_1_decl);
1329 continue;
1332 /* Specialize the clones now. */
1333 for (ix = 0; VEC_iterate (tree, cloned_decl_list, ix, cloned_decl); ++ix)
1335 int which_clone = ix % 2;
1336 specialize_call (cloned_decl, which_clone);
1339 VEC_free (tree, heap, cloned_decl_list);
1342 /* End Cloning phase. */
1344 /* Checks if there is atleast one call to __builtin_dispatch. */
1346 static bool
1347 find_builtin_decl (void)
1349 struct cgraph_node *node;
1350 for (node = cgraph_nodes; node; node = node->next)
1352 if (strstr (cgraph_node_name (node), "__builtin_dispatch") != NULL)
1354 builtin_function_decl = node->decl;
1355 return true;
1358 return false;
1361 /* Set the aux fields of all nodes and edges in the call graph to be NULL. */
1363 static void
1364 cleanup_aux_field (void)
1366 struct cgraph_node *node;
1367 struct cgraph_edge *edge;
1369 for (node = cgraph_nodes; node; node = node->next)
1371 node->aux = NULL;
1372 for (edge = node->callees; edge; edge = edge->next_callee)
1373 edge->aux = NULL;
1377 /* Main driver function. It scans the __builtin_dispatch calls and
1378 figures out which functions to clone. It then clones the functions. */
1380 static unsigned int
1381 builtin_dispatch_ipa_clone (void)
1383 cleanup_aux_field ();
1385 /* Allocate hashtab mapping name to decl. */
1386 name_decl_htab = htab_create (10, name_decl_htab_hash_descriptor,
1387 name_decl_htab_eq_descriptor, NULL);
1389 /* Turn it on for O1 and above. At -O0, there is a SSA alias bug
1390 with create_tmp_var. Cloning and hoisting is not necessary at
1391 -O0 anyways. Also, guard it with the flag
1392 "-fclone-hot-version-paths".
1393 Disabled for LTO as it needs more work. */
1394 if (optimize == 0
1395 || profile_arc_flag
1396 || !flag_clone_hot_version_paths
1397 || flag_lto)
1398 return 0;
1400 if (!find_builtin_decl ())
1401 return 0;
1403 gcc_assert (builtin_function_decl != NULL);
1405 if (!perform_analysis_phase ())
1407 cleanup_aux_field ();
1408 return 0;
1411 if (decide_cloning_phase ())
1412 perform_cloning_phase ();
1414 cleanup_aux_field ();
1416 return 0;
1419 static bool
1420 gate_handle_builtin_dispatch (void)
1422 return true;
1425 struct simple_ipa_opt_pass pass_ipa_multiversion_dispatch =
1428 SIMPLE_IPA_PASS,
1429 "multiversion_dispatch", /* name */
1430 gate_handle_builtin_dispatch, /* gate */
1431 builtin_dispatch_ipa_clone, /* execute */
1432 NULL, /* sub */
1433 NULL, /* next */
1434 0, /* static_pass_number */
1435 TV_MVERSN_DISPATCH, /* tv_id */
1436 0, /* properties_required */
1437 PROP_cfg, /* properties_provided */
1438 0, /* properties_destroyed */
1439 0, /* todo_flags_start */
1440 TODO_dump_func | /* todo_flags_finish */
1441 TODO_update_ssa
1445 /* Lowering of the __builtin_dispatch calls. */
1448 /* This function converts STMT which is a __builtin_dispatch
1449 call of the form :
1450 ret = __builtin_dispatch (predicate, foo, bar, arg1, ...)
1451 into :
1452 var_1 = predicate
1453 if (var_1)
1454 var_2 = foo (arg1, ...);
1455 else
1456 var_3 = bar (arg1, ...);
1457 var_4 = phi (var_2, var_3)
1458 ret = var_4
1460 var_? are ssa names for variable var.
1463 static unsigned int
1464 convert_builtin_dispatch (gimple stmt)
1466 tree cond_func_addr, if_func_addr, else_func_addr;
1467 tree cond_func_decl = NULL;
1468 gimple if_part, else_part, if_else_stmt;
1469 basic_block bb1, bb2, bb3, bb4;
1470 gimple bb1end, bb2end, bb3end;
1471 edge e12, e13, e23, e24, e34;
1472 VEC(tree, heap) *nargs = NULL;
1473 int num_func_args = 0, i;
1474 tree version_selector_var;
1475 tree lhs_result;
1476 gimple_stmt_iterator gsi;
1477 basic_block bb;
1478 gimple feature_test_call = NULL;
1479 tree tmp_var = NULL;
1480 gimple init_stmt = NULL;
1481 tree ssa_if_name, ssa_else_name;
1482 gimple phinode = NULL;
1483 tree tmp_result_var, ssa_result_var;
1485 gsi = gsi_for_stmt (stmt);
1486 bb = gsi_bb (gsi);
1488 cond_func_addr = find_version_selector_func_addr (stmt);
1489 if (cond_func_addr != NULL)
1491 cond_func_decl = TREE_OPERAND (cond_func_addr, 0);
1492 gcc_assert (cond_func_decl);
1495 if (dump_file)
1497 fprintf (dump_file, "Before Converting __builtin_dispatch :\n");
1498 dump_function_to_file (current_function_decl, dump_file, TDF_BLOCKS);
1501 if_func_addr = gimple_call_arg (stmt, 1);
1502 else_func_addr = gimple_call_arg (stmt, 2);
1504 tmp_result_var = create_tmp_var (integer_type_node, NULL);
1505 add_referenced_var (tmp_result_var);
1507 if (flag_lto
1508 || cond_func_decl == NULL
1509 || lookup_attribute ("version_selector",
1510 DECL_ATTRIBUTES (cond_func_decl)) == NULL)
1512 tree arg = gimple_call_arg (stmt, 0);
1513 /* This means the feature-test function is not set with attribute
1514 version_selector or it is a function pointer or in LTO. So,
1515 explicitly call it. */
1516 feature_test_call = gimple_build_call (arg, 0);
1517 ssa_result_var = make_ssa_name (tmp_result_var, feature_test_call);
1518 gimple_call_set_lhs (feature_test_call, ssa_result_var);
1519 mark_symbols_for_renaming (feature_test_call);
1520 version_selector_var = ssa_result_var;
1522 else
1524 /* Get the global corresponding to the "version_selector" function. */
1525 version_selector_var
1526 = handle_version_selector_attr_function (cond_func_decl);
1527 gcc_assert (version_selector_var);
1528 add_referenced_var (version_selector_var);
1529 feature_test_call = gimple_build_assign (tmp_result_var,
1530 version_selector_var);
1531 ssa_result_var = make_ssa_name (tmp_result_var, feature_test_call);
1532 gimple_assign_set_lhs (feature_test_call, ssa_result_var);
1533 mark_symbols_for_renaming (feature_test_call);
1534 version_selector_var = ssa_result_var;
1537 if_else_stmt = gimple_build_cond (GT_EXPR,
1538 version_selector_var,
1539 integer_zero_node,
1540 NULL_TREE, NULL_TREE);
1542 mark_symbols_for_renaming (if_else_stmt);
1544 num_func_args = function_args_count (
1545 TREE_TYPE (TREE_OPERAND (if_func_addr, 0)));
1547 nargs = VEC_alloc (tree, heap, num_func_args);
1549 /* The arguments to the feature test function start from the 4th argument
1550 in __builtin_dispatch. The first 3 arguments are mandatory. */
1552 for (i = 0; i <= num_func_args - 2; ++i)
1553 VEC_quick_push (tree, nargs,
1554 gimple_call_arg (stmt, (3 + i)));
1556 if_part = gimple_build_call_vec (if_func_addr, nargs);
1557 else_part = gimple_build_call_vec (else_func_addr, nargs);
1559 lhs_result = gimple_call_lhs (stmt);
1561 if (lhs_result != NULL_TREE)
1563 tree ssa_var;
1564 tree return_type;
1565 return_type = TREE_TYPE (lhs_result);
1566 tmp_var = create_tmp_var (return_type, NULL);
1567 add_referenced_var (tmp_var);
1569 init_stmt = gimple_build_assign (tmp_var, build_zero_cst (return_type));
1570 ssa_var = make_ssa_name (tmp_var, init_stmt);
1571 gimple_assign_set_lhs (init_stmt, ssa_var);
1572 mark_symbols_for_renaming (init_stmt);
1574 ssa_if_name = make_ssa_name (tmp_var, init_stmt);
1575 ssa_else_name = make_ssa_name (tmp_var, init_stmt);
1576 gimple_call_set_lhs (if_part, ssa_if_name);
1577 gimple_call_set_lhs (else_part, ssa_else_name);
1579 mark_symbols_for_renaming (if_part);
1580 mark_symbols_for_renaming (else_part);
1582 /* Set the lexical block to be the same as the dispatch call. */
1583 gcc_assert (feature_test_call);
1584 gimple_set_block (feature_test_call, gimple_block (stmt));
1586 if (init_stmt)
1587 gimple_set_block (init_stmt, gimple_block (stmt));
1589 gimple_set_block (if_else_stmt, gimple_block (stmt));
1590 gimple_set_block (if_part, gimple_block (stmt));
1591 gimple_set_block (else_part, gimple_block (stmt));
1593 gsi_insert_before_without_update (&gsi, feature_test_call, GSI_SAME_STMT);
1594 gimple_set_bb (feature_test_call, bb);
1596 if (init_stmt)
1598 gsi_insert_before_without_update (&gsi, init_stmt,
1599 GSI_SAME_STMT);
1600 gimple_set_bb (init_stmt, bb);
1603 gsi_insert_before_without_update (&gsi, if_else_stmt, GSI_SAME_STMT);
1604 gsi_insert_before_without_update (&gsi, if_part, GSI_SAME_STMT);
1605 gsi_insert_before_without_update (&gsi, else_part, GSI_SAME_STMT);
1607 /* Remove the builtin_dispatch call after the expansion. */
1608 unlink_stmt_vdef (stmt);
1609 gsi_remove (&gsi, true);
1611 bb1end = if_else_stmt;
1612 bb2end = if_part;
1613 bb3end = else_part;
1614 bb1 = bb;
1615 e12 = split_block (bb1, bb1end);
1616 bb2 = e12->dest;
1617 e23 = split_block (bb2, bb2end);
1618 bb3 = e23->dest;
1619 e34 = split_block (bb3, bb3end);
1620 bb4 = e34->dest;
1622 e12->flags &= ~EDGE_FALLTHRU;
1623 e12->flags |= EDGE_TRUE_VALUE;
1624 e13 = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
1625 gcc_assert (e13);
1626 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
1627 gcc_assert (e24);
1628 remove_edge (e23);
1630 if (tmp_var)
1632 gimple assign_stmt;
1633 phinode = create_phi_node (tmp_var, bb4);
1634 add_phi_arg (phinode, ssa_if_name, e24, UNKNOWN_LOCATION);
1635 add_phi_arg (phinode, ssa_else_name, e34, UNKNOWN_LOCATION);
1636 mark_symbols_for_renaming (phinode);
1637 gcc_assert (lhs_result);
1638 assign_stmt
1639 = gimple_build_assign (lhs_result, gimple_phi_result (phinode));
1640 mark_symbols_for_renaming (assign_stmt);
1641 gsi = gsi_start_bb (bb4);
1642 gsi_insert_before_without_update (&gsi, assign_stmt, GSI_SAME_STMT);
1643 gimple_set_bb (assign_stmt, bb4);
1646 if (dump_file)
1648 fprintf (dump_file, "Converted __builtin_dispatch :\n");
1649 dump_function_to_file (current_function_decl, dump_file, TDF_BLOCKS);
1652 return 0;
1655 /* This function does two things.
1657 1) For a feature-test function marked with attribute "version_selector",
1658 it creates a constructor that calls the feature-test function and a
1659 global that holds the result. The global's result will be used
1660 to lower any __builtin_dispatch statement that refers to this feature
1661 test function. The __builtin_dispatch statement and the feature test
1662 function can be in different modules.
1664 2) It lowers __builtin_dispatch statements. */
1666 static unsigned int
1667 do_convert_builtin_dispatch (void)
1669 basic_block bb;
1670 gimple_stmt_iterator gsi;
1671 VEC (gimple, heap) *builtin_stmt_list = NULL;
1672 int ix;
1673 gimple builtin_stmt;
1675 /* Allocate hashtab mapping name to decl. */
1676 if (name_decl_htab == NULL)
1677 name_decl_htab = htab_create (10, name_decl_htab_hash_descriptor,
1678 name_decl_htab_eq_descriptor, NULL);
1680 /* Look for functions with attribute "version_selector" and make a
1681 constructor which calls the function and saves the result in a
1682 global. Disabled for LTO as it needs more work. */
1684 if (!flag_lto
1685 && lookup_attribute ("version_selector",
1686 DECL_ATTRIBUTES (current_function_decl)) != NULL)
1688 if (dump_file)
1689 fprintf (dump_file, "Function with version_selector attribute found :"
1690 " %s. Making constructor for it.\n",
1691 current_function_name ());
1693 handle_version_selector_attr_function (current_function_decl);
1694 /* Assume there are no __builtin_dispatch calls in feature test
1695 functions. So it is safe to return. */
1696 return 0;
1699 /* Find and lower __builtin_dispatch calls. */
1701 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (current_function_decl))
1703 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1705 gimple stmt = gsi_stmt (gsi);
1706 tree call_decl;
1708 if (!is_gimple_call (stmt))
1709 continue;
1711 call_decl = gimple_call_fndecl (stmt);
1713 if (call_decl == NULL)
1714 continue;
1716 if (DECL_NAME (call_decl) == NULL_TREE)
1717 continue;
1719 if (strstr (IDENTIFIER_POINTER (DECL_NAME (call_decl)),
1720 "__builtin_dispatch") == NULL)
1721 continue;
1723 if (dump_file)
1725 fprintf (dump_file, "Converting __builtin_dispatch stmt in:%s\n",
1726 current_function_name ());
1727 print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS);
1730 if (builtin_stmt_list == NULL)
1731 builtin_stmt_list = VEC_alloc (gimple, heap, 2);
1733 gcc_assert (builtin_stmt_list != NULL);
1734 VEC_safe_push (gimple, heap, builtin_stmt_list, stmt);
1738 if (!builtin_stmt_list)
1739 return 0;
1741 for (ix = 0; VEC_iterate (gimple, builtin_stmt_list, ix, builtin_stmt);
1742 ++ix)
1743 convert_builtin_dispatch (builtin_stmt);
1745 compute_inline_parameters (cgraph_node (current_function_decl));
1747 VEC_free (gimple, heap, builtin_stmt_list);
1749 return 0;
1752 static bool
1753 gate_convert_builtin_dispatch (void)
1755 return true;
1758 struct gimple_opt_pass pass_tree_convert_builtin_dispatch =
1761 GIMPLE_PASS,
1762 "convert_builtin_dispatch", /* name */
1763 gate_convert_builtin_dispatch, /* gate */
1764 do_convert_builtin_dispatch, /* execute */
1765 NULL, /* sub */
1766 NULL, /* next */
1767 0, /* static_pass_number */
1768 TV_MVERSN_DISPATCH, /* tv_id */
1769 PROP_cfg, /* properties_required */
1770 PROP_cfg, /* properties_provided */
1771 0, /* properties_destroyed */
1772 0, /* todo_flags_start */
1773 TODO_dump_func | /* todo_flags_finish */
1774 TODO_cleanup_cfg | TODO_dump_cgraph |
1775 TODO_update_ssa | TODO_verify_ssa
1779 /* This function generates gimple code in NEW_BB to check if COND_VAR
1780 is equal to WHICH_VERSION and return FN_VER pointer if it is equal.
1781 The basic block returned is the block where the control flows if
1782 the equality is false. */
1784 static basic_block
1785 make_bb_flow (basic_block new_bb, tree cond_var, tree fn_ver,
1786 int which_version, tree bindings)
1788 tree result_var;
1789 tree convert_expr;
1791 basic_block bb1, bb2, bb3;
1792 edge e12, e23;
1794 gimple if_else_stmt;
1795 gimple if_stmt;
1796 gimple return_stmt;
1797 gimple_seq gseq = bb_seq (new_bb);
1799 /* Check if the value of cond_var is equal to which_version. */
1800 if_else_stmt = gimple_build_cond (EQ_EXPR, cond_var,
1801 build_int_cst (NULL, which_version),
1802 NULL_TREE, NULL_TREE);
1804 mark_symbols_for_renaming (if_else_stmt);
1805 gimple_seq_add_stmt (&gseq, if_else_stmt);
1806 gimple_set_block (if_else_stmt, bindings);
1807 gimple_set_bb (if_else_stmt, new_bb);
1809 result_var = create_tmp_var (ptr_type_node, NULL);
1810 add_referenced_var (result_var);
1812 convert_expr = build1 (CONVERT_EXPR, ptr_type_node, fn_ver);
1813 if_stmt = gimple_build_assign (result_var, convert_expr);
1814 mark_symbols_for_renaming (if_stmt);
1815 gimple_seq_add_stmt (&gseq, if_stmt);
1816 gimple_set_block (if_stmt, bindings);
1818 return_stmt = gimple_build_return (result_var);
1819 mark_symbols_for_renaming (return_stmt);
1820 gimple_seq_add_stmt (&gseq, return_stmt);
1822 set_bb_seq (new_bb, gseq);
1824 bb1 = new_bb;
1825 e12 = split_block (bb1, if_else_stmt);
1826 bb2 = e12->dest;
1827 e12->flags &= ~EDGE_FALLTHRU;
1828 e12->flags |= EDGE_TRUE_VALUE;
1830 e23 = split_block (bb2, return_stmt);
1831 gimple_set_bb (if_stmt, bb2);
1832 gimple_set_bb (return_stmt, bb2);
1833 bb3 = e23->dest;
1834 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
1836 remove_edge (e23);
1837 make_edge (bb2, EXIT_BLOCK_PTR, 0);
1839 return bb3;
1842 /* Given the pointer to the condition function COND_FUNC_ARG, whose return
1843 value decides the version that gets executed, and the pointers to the
1844 function versions, FN_VER_LIST, this function generates control-flow to
1845 return the appropriate function version pointer based on the return value
1846 of the conditional function. The condition function is assumed to return
1847 values 0, 1, 2, ... */
1849 static gimple_seq
1850 get_selector_gimple_seq (tree cond_func_arg, tree fn_ver_list, tree default_ver,
1851 basic_block new_bb, tree bindings)
1853 basic_block final_bb;
1855 gimple return_stmt, default_stmt;
1856 gimple_seq gseq = NULL;
1857 gimple_seq gseq_final = NULL;
1858 gimple call_cond_stmt;
1860 tree result_var;
1861 tree convert_expr;
1862 tree p;
1863 tree cond_var;
1865 int which_version;
1867 /* Call the condition function once and store the outcome in cond_var. */
1868 cond_var = create_tmp_var (integer_type_node, NULL);
1869 call_cond_stmt = gimple_build_call (cond_func_arg, 0);
1870 gimple_call_set_lhs (call_cond_stmt, cond_var);
1871 add_referenced_var (cond_var);
1872 mark_symbols_for_renaming (call_cond_stmt);
1874 gimple_seq_add_stmt (&gseq, call_cond_stmt);
1875 gimple_set_block (call_cond_stmt, bindings);
1876 gimple_set_bb (call_cond_stmt, new_bb);
1878 set_bb_seq (new_bb, gseq);
1880 final_bb = new_bb;
1882 which_version = 0;
1883 for (p = fn_ver_list; p != NULL_TREE; p = TREE_CHAIN (p))
1885 tree ver = TREE_PURPOSE (p);
1886 /* Return this version's pointer, VER, if the value returned by the
1887 condition funciton is equal to WHICH_VERSION. */
1888 final_bb = make_bb_flow (final_bb, cond_var, ver, which_version,
1889 bindings);
1890 which_version++;
1893 result_var = create_tmp_var (ptr_type_node, NULL);
1894 add_referenced_var (result_var);
1896 /* Return the default version function pointer as the default. */
1897 convert_expr = build1 (CONVERT_EXPR, ptr_type_node, default_ver);
1898 default_stmt = gimple_build_assign (result_var, convert_expr);
1899 mark_symbols_for_renaming (default_stmt);
1900 gimple_seq_add_stmt (&gseq_final, default_stmt);
1901 gimple_set_block (default_stmt, bindings);
1902 gimple_set_bb (default_stmt, final_bb);
1904 return_stmt = gimple_build_return (result_var);
1905 mark_symbols_for_renaming (return_stmt);
1906 gimple_seq_add_stmt (&gseq_final, return_stmt);
1907 gimple_set_bb (return_stmt, final_bb);
1909 set_bb_seq (final_bb, gseq_final);
1911 return gseq;
1914 /* Make the ifunc selector function which calls function pointed to by
1915 COND_FUNC_ARG and checks the value to return the appropriate function
1916 version pointer. */
1918 static tree
1919 make_selector_function (const char *name, tree cond_func_arg,
1920 tree fn_ver_list, tree default_ver)
1922 tree decl, type, t;
1923 basic_block new_bb;
1924 tree old_current_function_decl;
1925 tree decl_name;
1927 /* The selector function should return a (void *). */
1928 type = build_function_type_list (ptr_type_node, NULL_TREE);
1930 decl = build_fn_decl (name, type);
1932 decl_name = get_identifier (name);
1933 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
1934 DECL_NAME (decl) = decl_name;
1935 gcc_assert (cgraph_node (decl) != NULL);
1937 TREE_USED (decl) = 1;
1938 DECL_ARTIFICIAL (decl) = 1;
1939 DECL_IGNORED_P (decl) = 0;
1940 TREE_PUBLIC (decl) = 0;
1941 DECL_UNINLINABLE (decl) = 1;
1942 DECL_EXTERNAL (decl) = 0;
1943 DECL_CONTEXT (decl) = NULL_TREE;
1944 DECL_INITIAL (decl) = make_node (BLOCK);
1945 DECL_STATIC_CONSTRUCTOR (decl) = 0;
1946 TREE_READONLY (decl) = 0;
1947 DECL_PURE_P (decl) = 0;
1949 /* Build result decl and add to function_decl. */
1950 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
1951 DECL_ARTIFICIAL (t) = 1;
1952 DECL_IGNORED_P (t) = 1;
1953 DECL_RESULT (decl) = t;
1955 gimplify_function_tree (decl);
1957 old_current_function_decl = current_function_decl;
1958 push_cfun (DECL_STRUCT_FUNCTION (decl));
1959 current_function_decl = decl;
1960 init_empty_tree_cfg_for_function (DECL_STRUCT_FUNCTION (decl));
1962 cfun->curr_properties |=
1963 (PROP_gimple_lcf | PROP_gimple_leh | PROP_cfg | PROP_referenced_vars |
1964 PROP_ssa);
1966 new_bb = create_empty_bb (ENTRY_BLOCK_PTR);
1967 make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU);
1968 make_edge (new_bb, EXIT_BLOCK_PTR, 0);
1970 /* This call is very important if this pass runs when the IR is in
1971 SSA form. It breaks things in strange ways otherwise. */
1972 init_tree_ssa (DECL_STRUCT_FUNCTION (decl));
1973 init_ssa_operands ();
1975 /* Make the body of thr selector function. */
1976 get_selector_gimple_seq (cond_func_arg, fn_ver_list, default_ver, new_bb,
1977 DECL_INITIAL (decl));
1979 cgraph_add_new_function (decl, true);
1980 cgraph_call_function_insertion_hooks (cgraph_node (decl));
1981 cgraph_mark_needed_node (cgraph_node (decl));
1983 if (dump_file)
1984 dump_function_to_file (decl, dump_file, TDF_BLOCKS);
1986 pop_cfun ();
1987 current_function_decl = old_current_function_decl;
1988 return decl;
1991 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
1992 it to CHAIN. */
1994 static tree
1995 make_attribute (const char *name, const char *arg_name, tree chain)
1997 tree attr_name;
1998 tree attr_arg_name;
1999 tree attr_args;
2000 tree attr;
2002 attr_name = get_identifier (name);
2003 attr_arg_name = build_string (strlen (arg_name), arg_name);
2004 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
2005 attr = tree_cons (attr_name, attr_args, chain);
2006 return attr;
2009 /* This creates the ifunc function IFUNC_NAME whose selector function is
2010 SELECTOR_NAME. */
2012 static tree
2013 make_ifunc_function (const char* ifunc_name, const char *selector_name,
2014 tree fn_type)
2016 tree type;
2017 tree decl;
2019 /* The signature of the ifunc function is set to the
2020 type of any version. */
2021 type = build_function_type (TREE_TYPE (fn_type), TYPE_ARG_TYPES (fn_type));
2022 decl = build_fn_decl (ifunc_name, type);
2024 DECL_CONTEXT (decl) = NULL_TREE;
2025 DECL_INITIAL (decl) = error_mark_node;
2027 /* Set ifunc attribute */
2028 DECL_ATTRIBUTES (decl)
2029 = make_attribute ("ifunc", selector_name, DECL_ATTRIBUTES (decl));
2031 assemble_alias (decl, get_identifier (selector_name));
2033 return decl;
2036 /* Copy the decl attributes from from_decl to to_decl, except
2037 DECL_ARTIFICIAL and TREE_PUBLIC. */
2039 static void
2040 copy_decl_attributes (tree to_decl, tree from_decl)
2042 TREE_READONLY (to_decl) = TREE_READONLY (from_decl);
2043 TREE_USED (to_decl) = TREE_USED (from_decl);
2044 DECL_ARTIFICIAL (to_decl) = 1;
2045 DECL_IGNORED_P (to_decl) = DECL_IGNORED_P (from_decl);
2046 TREE_PUBLIC (to_decl) = 0;
2047 DECL_CONTEXT (to_decl) = DECL_CONTEXT (from_decl);
2048 DECL_EXTERNAL (to_decl) = DECL_EXTERNAL (from_decl);
2049 DECL_COMDAT (to_decl) = DECL_COMDAT (from_decl);
2050 DECL_COMDAT_GROUP (to_decl) = DECL_COMDAT_GROUP (from_decl);
2051 DECL_VIRTUAL_P (to_decl) = DECL_VIRTUAL_P (from_decl);
2052 DECL_WEAK (to_decl) = DECL_WEAK (from_decl);
2055 /* This function does the mult-version run-time dispatch using IFUNC. Given
2056 NUM_VERSIONS versions of a function with the decls in FN_VER_LIST along
2057 with a default version in DEFAULT_VER. Also given is a condition function,
2058 COND_FUNC_ADDR, whose return value decides the version that gets executed.
2059 This function generates the necessary code to dispatch the right function
2060 version and returns this a GIMPLE_SEQ. The decls of the ifunc function and
2061 the selector function that are created are stored in IFUNC_DECL and
2062 SELECTOR_DECL. */
2064 static gimple_seq
2065 dispatch_using_ifunc (int num_versions, tree orig_func_decl,
2066 tree cond_func_addr, tree fn_ver_list,
2067 tree default_ver, tree *selector_decl,
2068 tree *ifunc_decl)
2070 char *selector_name;
2071 char *ifunc_name;
2072 tree ifunc_function;
2073 tree selector_function;
2074 tree return_type;
2075 VEC (tree, heap) *nargs = NULL;
2076 tree arg;
2077 gimple ifunc_call_stmt;
2078 gimple return_stmt;
2079 gimple_seq gseq = NULL;
2081 gcc_assert (cond_func_addr != NULL
2082 && num_versions > 0
2083 && orig_func_decl != NULL
2084 && fn_ver_list != NULL);
2086 /* The return type of any function version. */
2087 return_type = TREE_TYPE (TREE_TYPE (orig_func_decl));
2089 nargs = VEC_alloc (tree, heap, 4);
2091 for (arg = DECL_ARGUMENTS (orig_func_decl);
2092 arg; arg = TREE_CHAIN (arg))
2094 VEC_safe_push (tree, heap, nargs, arg);
2095 add_referenced_var (arg);
2098 /* Assign names to ifunc and ifunc_selector functions. */
2099 selector_name = make_name (orig_func_decl, "ifunc.selector");
2100 ifunc_name = make_name (orig_func_decl, "ifunc");
2102 /* Make a selector function which returns the appropriate function
2103 version pointer based on the outcome of the condition function
2104 execution. */
2105 selector_function = make_selector_function (selector_name, cond_func_addr,
2106 fn_ver_list, default_ver);
2107 *selector_decl = selector_function;
2109 /* Make a new ifunc function. */
2110 ifunc_function = make_ifunc_function (ifunc_name, selector_name,
2111 TREE_TYPE (orig_func_decl));
2112 *ifunc_decl = ifunc_function;
2114 /* Make selector and ifunc shadow the attributes of the original function. */
2115 copy_decl_attributes (ifunc_function, orig_func_decl);
2116 copy_decl_attributes (selector_function, orig_func_decl);
2118 ifunc_call_stmt = gimple_build_call_vec (ifunc_function, nargs);
2119 gimple_seq_add_stmt (&gseq, ifunc_call_stmt);
2121 /* Make function return the value of it is a non-void type. */
2122 if (TREE_CODE (return_type) != VOID_TYPE)
2124 tree lhs_var;
2125 tree lhs_var_ssa_name;
2126 tree result_decl;
2128 result_decl = DECL_RESULT (orig_func_decl);
2130 if (result_decl
2131 && aggregate_value_p (result_decl, orig_func_decl)
2132 && !TREE_ADDRESSABLE (result_decl))
2134 /* Build a RESULT_DECL rather than a VAR_DECL for this case.
2135 See tree-nrv.c: tree_nrv. It checks if the DECL_RESULT and the
2136 return value are the same. */
2137 lhs_var = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL,
2138 return_type);
2139 DECL_ARTIFICIAL (lhs_var) = 1;
2140 DECL_IGNORED_P (lhs_var) = 1;
2141 TREE_READONLY (lhs_var) = 0;
2142 DECL_EXTERNAL (lhs_var) = 0;
2143 TREE_STATIC (lhs_var) = 0;
2144 TREE_USED (lhs_var) = 1;
2146 add_referenced_var (lhs_var);
2147 DECL_RESULT (orig_func_decl) = lhs_var;
2149 else if (!TREE_ADDRESSABLE (return_type)
2150 && COMPLETE_TYPE_P (return_type))
2152 lhs_var = create_tmp_var (return_type, NULL);
2153 add_referenced_var (lhs_var);
2155 else
2157 lhs_var = create_tmp_var_raw (return_type, NULL);
2158 TREE_ADDRESSABLE (lhs_var) = 1;
2159 gimple_add_tmp_var (lhs_var);
2160 add_referenced_var (lhs_var);
2163 if (AGGREGATE_TYPE_P (return_type)
2164 || TREE_CODE (return_type) == COMPLEX_TYPE)
2166 gimple_call_set_lhs (ifunc_call_stmt, lhs_var);
2167 return_stmt = gimple_build_return (lhs_var);
2169 else
2171 lhs_var_ssa_name = make_ssa_name (lhs_var, ifunc_call_stmt);
2172 gimple_call_set_lhs (ifunc_call_stmt, lhs_var_ssa_name);
2173 return_stmt = gimple_build_return (lhs_var_ssa_name);
2176 else
2178 return_stmt = gimple_build_return (NULL_TREE);
2181 mark_symbols_for_renaming (ifunc_call_stmt);
2182 mark_symbols_for_renaming (return_stmt);
2183 gimple_seq_add_stmt (&gseq, return_stmt);
2185 VEC_free (tree, heap, nargs);
2186 return gseq;
2189 /* Empty the function body of function fndecl. Retain just one basic block
2190 along with the ENTRY and EXIT block. Return the retained basic block. */
2192 static basic_block
2193 purge_function_body (tree fndecl)
2195 basic_block bb, new_bb;
2196 edge first_edge, last_edge;
2197 tree old_current_function_decl;
2199 old_current_function_decl = current_function_decl;
2200 push_cfun (DECL_STRUCT_FUNCTION (fndecl));
2201 current_function_decl = fndecl;
2203 /* Set new_bb to be the first block after ENTRY_BLOCK_PTR. */
2205 first_edge = VEC_index (edge, ENTRY_BLOCK_PTR->succs, 0);
2206 new_bb = first_edge->dest;
2207 gcc_assert (new_bb != NULL);
2209 for (bb = ENTRY_BLOCK_PTR; bb != NULL;)
2211 edge_iterator ei;
2212 edge e;
2213 basic_block bb_next;
2214 bb_next = bb->next_bb;
2215 if (bb == EXIT_BLOCK_PTR)
2216 VEC_truncate (edge, EXIT_BLOCK_PTR->preds, 0);
2217 else if (bb == ENTRY_BLOCK_PTR)
2218 VEC_truncate (edge, ENTRY_BLOCK_PTR->succs, 0);
2219 else
2221 remove_phi_nodes (bb);
2222 if (bb_seq (bb) != NULL)
2224 gimple_stmt_iterator i;
2225 for (i = gsi_start_bb (bb); !gsi_end_p (i);)
2227 gimple stmt = gsi_stmt (i);
2228 unlink_stmt_vdef (stmt);
2229 reset_debug_uses (stmt);
2230 gsi_remove (&i, true);
2231 release_defs (stmt);
2234 FOR_EACH_EDGE (e, ei, bb->succs)
2236 n_edges--;
2237 ggc_free (e);
2239 VEC_truncate (edge, bb->succs, 0);
2240 VEC_truncate (edge, bb->preds, 0);
2241 bb->prev_bb = NULL;
2242 bb->next_bb = NULL;
2243 if (bb == new_bb)
2245 bb = bb_next;
2246 continue;
2248 bb->il.gimple = NULL;
2249 SET_BASIC_BLOCK (bb->index, NULL);
2250 n_basic_blocks--;
2252 bb = bb_next;
2256 /* This is to allow iterating over the basic blocks. */
2257 new_bb->next_bb = EXIT_BLOCK_PTR;
2258 EXIT_BLOCK_PTR->prev_bb = new_bb;
2260 new_bb->prev_bb = ENTRY_BLOCK_PTR;
2261 ENTRY_BLOCK_PTR->next_bb = new_bb;
2263 gcc_assert (find_edge (new_bb, EXIT_BLOCK_PTR) == NULL);
2264 last_edge = make_edge (new_bb, EXIT_BLOCK_PTR, 0);
2265 gcc_assert (last_edge);
2267 gcc_assert (find_edge (ENTRY_BLOCK_PTR, new_bb) == NULL);
2268 last_edge = make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU);
2269 gcc_assert (last_edge);
2271 free_dominance_info (CDI_DOMINATORS);
2272 free_dominance_info (CDI_POST_DOMINATORS);
2273 calculate_dominance_info (CDI_DOMINATORS);
2274 calculate_dominance_info (CDI_POST_DOMINATORS);
2276 current_function_decl = old_current_function_decl;
2277 pop_cfun ();
2279 return new_bb;
2282 /* Returns true if function FUNC_DECL contains abnormal goto statements. */
2284 static bool
2285 function_can_make_abnormal_goto (tree func_decl)
2287 basic_block bb;
2288 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (func_decl))
2290 gimple_stmt_iterator gsi;
2291 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2293 gimple stmt = gsi_stmt (gsi);
2294 if (stmt_can_make_abnormal_goto (stmt))
2295 return true;
2298 return false;
2301 /* Has an entry for every cloned function and auxiliaries that have been
2302 generated by auto cloning. These cannot be further cloned. */
2304 htab_t cloned_function_decls_htab = NULL;
2306 /* Adds function FUNC_DECL to the cloned_function_decls_htab. */
2308 static void
2309 mark_function_not_cloneable (tree func_decl)
2311 void **slot;
2313 slot = htab_find_slot_with_hash (cloned_function_decls_htab, func_decl,
2314 htab_hash_pointer (func_decl), INSERT);
2315 gcc_assert (*slot == NULL);
2316 *slot = func_decl;
2319 /* Entry point for the auto clone pass. Calls the target hook to determine if
2320 this function must be cloned. */
2322 static unsigned int
2323 do_auto_clone (void)
2325 tree opt_node = NULL_TREE;
2326 int num_versions = 0;
2327 int i = 0;
2328 tree fn_ver_addr_chain = NULL_TREE;
2329 tree default_ver = NULL_TREE;
2330 tree cond_func_decl = NULL_TREE;
2331 tree cond_func_addr;
2332 tree default_decl;
2333 basic_block empty_bb;
2334 gimple_seq gseq = NULL;
2335 gimple_stmt_iterator gsi;
2336 tree selector_decl;
2337 tree ifunc_decl;
2338 void **slot;
2339 struct cgraph_node *node;
2341 node = cgraph_node (current_function_decl);
2343 if (lookup_attribute ("noclone", DECL_ATTRIBUTES (current_function_decl))
2344 != NULL)
2346 if (dump_file)
2347 fprintf (dump_file, "Not cloning, noclone attribute set\n");
2348 return 0;
2351 if (lookup_attribute ("target", DECL_ATTRIBUTES (current_function_decl))
2352 != NULL)
2354 if (dump_file)
2355 fprintf (dump_file, "Not cloning, target attribute set\n");
2356 return 0;
2359 /* No cloning of constructors and destructors. */
2360 if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
2361 || DECL_STATIC_DESTRUCTOR (current_function_decl))
2362 return 0;
2364 /* Check if function size is within permissible limits for cloning. */
2365 if (node->global.size
2366 > PARAM_VALUE (PARAM_MAX_FUNCTION_SIZE_FOR_AUTO_CLONING))
2368 if (dump_file)
2369 fprintf (dump_file, "Function size exceeds auto cloning threshold.\n");
2370 return 0;
2373 if (cloned_function_decls_htab == NULL)
2374 cloned_function_decls_htab = htab_create (10, htab_hash_pointer,
2375 htab_eq_pointer, NULL);
2378 /* If this function is a clone or other, like the selector function, pass. */
2379 slot = htab_find_slot_with_hash (cloned_function_decls_htab,
2380 current_function_decl,
2381 htab_hash_pointer (current_function_decl),
2382 INSERT);
2384 if (*slot != NULL)
2385 return 0;
2387 if (profile_status == PROFILE_READ
2388 && !hot_function_p (cgraph_node (current_function_decl)))
2389 return 0;
2391 /* Ignore functions with abnormal gotos, not correct to clone them. */
2392 if (function_can_make_abnormal_goto (current_function_decl))
2393 return 0;
2395 if (!targetm.mversion_function)
2396 return 0;
2398 /* Call the target hook to see if this function needs to be versioned. */
2399 num_versions = targetm.mversion_function (current_function_decl, &opt_node,
2400 &cond_func_decl);
2402 /* Nothing more to do if versions are not to be created. */
2403 if (num_versions == 0)
2404 return 0;
2406 mark_function_not_cloneable (cond_func_decl);
2407 copy_decl_attributes (cond_func_decl, current_function_decl);
2409 /* Make as many clones as requested. */
2410 for (i = 0; i < num_versions; ++i)
2412 tree cloned_decl;
2413 char clone_name[100];
2415 sprintf (clone_name, "autoclone.%d", i);
2416 cloned_decl = clone_function (current_function_decl, clone_name);
2417 fn_ver_addr_chain = tree_cons (build_fold_addr_expr (cloned_decl),
2418 NULL, fn_ver_addr_chain);
2419 gcc_assert (cloned_decl != NULL);
2420 mark_function_not_cloneable (cloned_decl);
2421 DECL_FUNCTION_SPECIFIC_TARGET (cloned_decl)
2422 = TREE_PURPOSE (opt_node);
2423 opt_node = TREE_CHAIN (opt_node);
2426 /* The current function is replaced by an ifunc call to the right version.
2427 Make another clone for the default. */
2428 default_decl = clone_function (current_function_decl, "autoclone.original");
2429 mark_function_not_cloneable (default_decl);
2430 /* Empty the body of the current function. */
2431 empty_bb = purge_function_body (current_function_decl);
2432 default_ver = build_fold_addr_expr (default_decl);
2433 cond_func_addr = build_fold_addr_expr (cond_func_decl);
2435 /* Get the gimple sequence to replace the current function's body with a
2436 ifunc dispatch call to the right version. */
2437 gseq = dispatch_using_ifunc (num_versions, current_function_decl,
2438 cond_func_addr, fn_ver_addr_chain,
2439 default_ver, &selector_decl, &ifunc_decl);
2441 mark_function_not_cloneable (selector_decl);
2442 mark_function_not_cloneable (ifunc_decl);
2444 for (gsi = gsi_start (gseq); !gsi_end_p (gsi); gsi_next (&gsi))
2445 gimple_set_bb (gsi_stmt (gsi), empty_bb);
2447 set_bb_seq (empty_bb, gseq);
2449 if (dump_file)
2450 dump_function_to_file (current_function_decl, dump_file, TDF_BLOCKS);
2452 update_ssa (TODO_update_ssa_no_phi);
2454 return 0;
2457 static bool
2458 gate_auto_clone (void)
2460 /* Turned on at -O2 and above. */
2461 return optimize >= 2;
2464 struct gimple_opt_pass pass_auto_clone =
2467 GIMPLE_PASS,
2468 "auto_clone", /* name */
2469 gate_auto_clone, /* gate */
2470 do_auto_clone, /* execute */
2471 NULL, /* sub */
2472 NULL, /* next */
2473 0, /* static_pass_number */
2474 TV_MVERSN_DISPATCH, /* tv_id */
2475 PROP_cfg, /* properties_required */
2476 PROP_cfg, /* properties_provided */
2477 0, /* properties_destroyed */
2478 0, /* todo_flags_start */
2479 TODO_dump_func | /* todo_flags_finish */
2480 TODO_cleanup_cfg | TODO_dump_cgraph |
2481 TODO_update_ssa | TODO_verify_ssa