Merge from trunk: 215733-215743
[official-gcc.git] / gcc-4_9 / gcc / mversn-dispatch.c
blob17b2ae8db82fef8f49a07b84989d55b18d07d09d
1 /* Mulitversion Dispatch Pass.
2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Sriraman Tallam (tmsriram@google.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 /* This pass processes __builtin_dispatch calls to call multi-versioned
23 functions. Only two versions are supported now. Example use :
25 int popcnt_sse4(unsigned int x) __attribute__((__target__("popcnt")));
26 int popcnt_sse4(unsigned int x)
28 int count = __builtin_popcount(x);
29 return count;
32 int popcnt(unsigned int x) __attribute__((__target__("no-popcnt")));
33 int popcnt(unsigned int x)
35 int count = __builtin_popcount(x);
36 return count;
39 int testsse() __attribute__((version_selector));
40 int main ()
42 ...
43 ret = __builtin_dispatch (testsse, (void*)popcnt_sse4, (void*)popcnt, 25);
44 ...
47 There are two passes that are run to achieve multi-versioning.
48 "pass_ipa_multiversion_dispatch" is an ipa pass that decides which functions
49 have to be cloned and hoists the feature-test calls appropriately. This
50 pass can be enabled with the flag "-fclone-hot-version-paths" and disabled
51 with "-fno-clone-hot-version-paths".
53 "pass_tree_convert_builtin_dispatch" does the lowering. It is a
54 function-level pass. Functions marked with attribute "version_selector" are
55 also handled by this pass. This pass is always on.
57 How to use __builtin_dispatch ?
58 -----------------------------
60 __builtin_dispatch takes 3 mandatory arguments :
62 __builtin_dispatch (arg1, arg2, arg3, <arg4>, <arg5>, ...);
64 arg1 is the pointer to the feature-test function.
65 arg2 is the ( void *) cast pointer to the versioned function that is
66 executed when the feature test returns 1.
67 arg3 is the ( void *) cast pointer to the versioned function that is
68 executed when the feature test returns 0.
69 arg4, arg5, ... are optional. They are the arguments to the versioned
70 functions. Both versions must accept the same number of arguments.
71 The __builtin_dispatch function returns the value returned by the
72 versioned function that gets executed. The versioned function arg2
73 is executed when the feature_test function arg1 returns 1 and arg3
74 is executed when the feature_test function arg1 returns 0. arg1
75 could be marked as a "version_selector" function if it is a pure
76 function with no side-effects, returns a constant at run-time and
77 can be evaluated at any point in the execution.
79 When to use the "version_selector" attribute ?
80 -----------------------------------------------
82 Functions are marked with attribute "version_selector" only if
83 they are run-time constants. Example of such functions would
84 be those that test if a specific feature is available on a
85 particular architecture. Such functions must return a positive
86 integer. For two-way functions, those that test if a feature
87 is present or not must return 1 or 0 respectively.
90 The code is organized into five parts. The first part has the functionality
91 to detect and handle functions marked with attribute "version_selector". The
92 second part is the analysis phase where we find calls to __builtin_dispatch
93 and mark all functions that are hot and have a call-graph path to a
94 __builtin_dispatch call. The third part decides which functions
95 to clone. This is based on the number of clones that have to be created for
96 the functions marked in the analysis phase. Only two clones are allowed for
97 a function currently. The fourth part is where the actual cloning happens.
98 The fifth part contains the implementation to lower the __builtin_dispatch
99 calls.
101 Flags : -fclone-hot-version-paths does function unswitching via cloning.
102 --param=num-mversn-clones=<num> allows to specify the number of
103 functions that should be cloned.
104 --param=mversn-clone-depth=<num> allows to specify the length of
105 the call graph path that should be cloned. num = 0 implies only
106 leaf node that contains the __builtin_dispatch statement must be
107 cloned. */
109 #include "config.h"
110 #include "system.h"
111 #include "coretypes.h"
112 #include "tm.h"
113 #include "tree.h"
114 #include "tree-inline.h"
115 #include "langhooks.h"
116 #include "flags.h"
117 #include "cgraph.h"
118 #include "diagnostic.h"
119 #include "toplev.h"
120 #include "timevar.h"
121 #include "params.h"
122 #include "fibheap.h"
123 #include "intl.h"
124 #include "tree-pass.h"
125 #include "hashtab.h"
126 #include "coverage.h"
127 #include "ggc.h"
128 #include "tree-flow.h"
129 #include "rtl.h"
130 #include "ipa-prop.h"
131 #include "basic-block.h"
132 #include "toplev.h"
133 #include "dbgcnt.h"
134 #include "tree-dump.h"
135 #include "output.h"
136 #include "vecprim.h"
137 #include "gimple-pretty-print.h"
138 #include "ipa-inline.h"
140 typedef struct cgraph_node* NODEPTR;
141 DEF_VEC_P (NODEPTR);
142 DEF_VEC_ALLOC_P (NODEPTR, heap);
144 /* Store the decl of __builtin_dispatch */
145 static tree builtin_function_decl = NULL;
147 /* Hash to map name to a decl. Used for variables and functions. */
148 static htab_t name_decl_htab = NULL;
150 /* Hashtable helpers for name_decl_htab. */
152 static hashval_t
153 name_decl_htab_hash_descriptor (const void *p)
155 const_tree t = (const_tree) p;
156 const char *name
157 = (IDENTIFIER_POINTER (DECL_NAME (t)));
158 return htab_hash_string(name);
161 /* Hashtable helper for name_decl_htab. */
163 static int
164 name_decl_htab_eq_descriptor (const void *p1, const void *p2)
166 const_tree t1 = (const_tree) p1;
167 const char *c1 = IDENTIFIER_POINTER (DECL_NAME (t1));
168 const char *c2 = (const char *)p2;
170 return (strcmp (c1, c2) == 0);
173 /* Return true if NODE is a hot function. It is a hot function
174 if its execution frequency is determined to be hot or
175 if any of its incoming or outgoing call-graph edges is hot. */
177 static bool
178 hot_function_p (struct cgraph_node *node)
180 struct cgraph_edge *edge;
182 if (node->frequency == NODE_FREQUENCY_HOT)
183 return true;
185 for (edge = node->callees; edge; edge = edge->next_callee)
186 if (cgraph_maybe_hot_edge_p (edge))
187 return true;
189 for (edge = node->callers; edge; edge = edge->next_caller)
190 if (cgraph_maybe_hot_edge_p (edge))
191 return true;
193 return false;
196 /* Return the number of arguments that a function has. */
198 static int
199 function_args_count (tree fntype)
201 function_args_iterator args_iter;
202 tree t;
203 int num = 0;
205 if (fntype)
207 FOREACH_FUNCTION_ARGS(fntype, t, args_iter)
209 num++;
213 return num;
216 /* Return the variable name (global/constructor) to use for the
217 version_selector function with name of DECL by appending SUFFIX. */
219 static char *
220 make_name (tree decl, const char *suffix)
222 char *global_var_name;
223 int name_len;
224 const char *name;
226 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
228 name_len = strlen (name) + strlen (suffix) + 2;
229 global_var_name = (char *) xmalloc (name_len);
230 snprintf (global_var_name, name_len, "%s_%s", name, suffix);
231 return global_var_name;
234 /* Code for handling version_selector attribute functions. Such functions are
235 run-time constants and need to be executed only once. They are hoisted
236 to a static constructor and their result is stored in a global.
240 /* This function returns the global variable / constructor name created
241 for feature-test functions marked with attribute "version_selector".
242 The name returned is the DECL name appended with
243 "version_selector_global" for the variable and
244 "version_selector_constructor" for the constructor. */
246 static char*
247 make_feature_test_global_name (tree decl, bool is_constructor)
249 if (is_constructor)
250 return make_name (decl, "version_selector_constructor");
252 return make_name (decl, "version_selector_global");
255 /* This function creates a new VAR_DECL with attributes set
256 using the parameters. PUBLIK corresponds to TREE_PUBLIC,
257 EXTERNAL corresponds to DECL_EXTERNAL and comdat is
258 for DECL_ONE_ONLY. The global variable will have the
259 same status as the version_selector function.*/
261 static tree
262 allocate_new_var (const char *name, int publik,
263 int external, int comdat)
265 tree new_global_var;
266 struct varpool_node *vnode;
268 new_global_var = build_decl (UNKNOWN_LOCATION,
269 VAR_DECL,
270 get_identifier (name),
271 integer_type_node);
273 DECL_EXTERNAL (new_global_var) = external;
274 TREE_STATIC (new_global_var) = 1;
275 TREE_PUBLIC (new_global_var) = publik;
276 DECL_INITIAL (new_global_var) = 0;
277 DECL_ARTIFICIAL (new_global_var) = 1;
278 DECL_PRESERVE_P (new_global_var) = 1;
280 if (comdat)
281 make_decl_one_only (new_global_var, DECL_ASSEMBLER_NAME (new_global_var));
282 assemble_variable (new_global_var, 0, 0, 0);
284 vnode = varpool_node (new_global_var);
285 gcc_assert (vnode != NULL);
286 /* Set finalized to 1, otherwise it asserts in function "write_symbol" in
287 lto-streamer-out.c. */
288 vnode->finalized = 1;
290 return new_global_var;
293 /* Make a new constructor function here to call a feature-test function
294 and set its body to CONSTRUCTOR_BODY. Its public and comdat
295 attributes are set from the parameters, PUBLIK, and COMDAT.
296 VERSION_SELECTOR_VAR is the global decl that saves the result of the
297 feature-test function in the constructor. */
299 static tree
300 make_constructor_function (char *name, gimple constructor_body, int publik,
301 int comdat, tree version_selector_var)
303 tree decl, type, t;
304 gimple_seq seq;
305 basic_block new_bb;
306 tree old_current_function_decl;
308 type = build_function_type_list (void_type_node, NULL_TREE);
310 if (dump_file)
311 fprintf (dump_file, "Name of new constructor function = %s\n", name);
313 decl = build_fn_decl (name, type);
315 DECL_NAME (decl) = get_identifier (name);
316 SET_DECL_ASSEMBLER_NAME (decl, DECL_NAME (decl));
317 gcc_assert (cgraph_get_create_node (decl) != NULL);
319 TREE_USED (decl) = 1;
320 DECL_ARTIFICIAL (decl) = 1;
321 DECL_IGNORED_P (decl) = 0;
322 TREE_PUBLIC (decl) = publik;
323 DECL_UNINLINABLE (decl) = 1;
324 DECL_EXTERNAL (decl) = 0;
325 DECL_CONTEXT (decl) = NULL_TREE;
326 DECL_INITIAL (decl) = make_node (BLOCK);
327 DECL_STATIC_CONSTRUCTOR (decl) = 1;
328 TREE_READONLY (decl) = 0;
329 DECL_PURE_P (decl) = 0;
331 if (comdat)
332 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
334 /* Build result decl and add to function_decl. */
335 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node);
336 DECL_ARTIFICIAL (t) = 1;
337 DECL_IGNORED_P (t) = 1;
338 DECL_RESULT (decl) = t;
340 gimplify_function_tree (decl);
342 /* Build CFG for this function. */
344 old_current_function_decl = current_function_decl;
345 push_cfun (DECL_STRUCT_FUNCTION (decl));
346 current_function_decl = decl;
347 init_empty_tree_cfg_for_function (DECL_STRUCT_FUNCTION (decl));
348 cfun->curr_properties |=
349 (PROP_gimple_lcf | PROP_gimple_leh | PROP_cfg | PROP_referenced_vars |
350 PROP_ssa);
351 new_bb = create_empty_bb (ENTRY_BLOCK_PTR);
352 make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU);
354 /* XXX: Not sure if the edge commented below is necessary. If I add this
355 edge, it fails in gimple_verify_flow_info in tree-cfg.c in condition :
356 " if (e->flags & EDGE_FALLTHRU)"
357 during -fprofile-generate.
358 Otherwise, it is fine. Deleting this edge does not break anything.
359 Commenting this so that it is clear I am intentionally not doing this.*/
360 /* make_edge (new_bb, EXIT_BLOCK_PTR, EDGE_FALLTHRU); */
362 seq = gimple_seq_alloc_with_stmt (constructor_body);
364 set_bb_seq (new_bb, seq);
365 gimple_set_bb (constructor_body, new_bb);
367 /* Set the lexical block of the constructor body. Fails the inliner
368 other wise. */
369 gimple_set_block (constructor_body, DECL_INITIAL (decl));
371 /* This call is very important if this pass runs when the IR is in
372 SSA form. It breaks things in strange ways otherwise. */
373 init_tree_ssa (DECL_STRUCT_FUNCTION (decl));
374 add_referenced_var (version_selector_var);
376 cgraph_add_new_function (decl, true);
377 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
378 cgraph_mark_needed_node (cgraph_get_create_node (decl));
380 if (dump_file)
381 dump_function_to_file (decl, dump_file, TDF_BLOCKS);
383 pop_cfun ();
384 current_function_decl = old_current_function_decl;
385 return decl;
388 /* If the current function is marked with attribute
389 "version_selector" then it is the predicate (feature-test) function
390 for multi-versioning. Call this function in a constructor and assign
391 the return value to a global variable.
392 The constructor's name is the decl name suffixed
393 "version_selector_constructor" and the global variable's name is the
394 decl name suffixed with "version_selector_global"
396 For example, feature-test function isSSE4 marked with attribute
397 version_selector is converted to
399 void isSSE4_version_selector_constructor ()
401 isSSE4_version_selector_global = isSSE4 ();
404 This function returns the decl of the global variable.
406 THIS_DECL is the function decl of the "version_selector" function.
409 static tree
410 handle_version_selector_attr_function (tree this_decl)
412 char *global_var_name;
413 tree version_selector_var = NULL;
414 void **slot;
416 gcc_assert (!flag_lto);
418 if (dump_file)
419 fprintf (dump_file, "Creating constructor/global for function %s\n",
420 IDENTIFIER_POINTER (DECL_NAME (this_decl)));
422 global_var_name = make_feature_test_global_name (this_decl,
423 false);
425 slot = htab_find_slot_with_hash (name_decl_htab, global_var_name,
426 htab_hash_string (global_var_name),
427 INSERT);
428 if (*slot == NULL)
430 if (dump_file)
431 fprintf (dump_file, "Creating global variable %s\n",
432 global_var_name);
433 *slot = allocate_new_var (global_var_name,
434 TREE_PUBLIC (this_decl),
435 DECL_EXTERNAL (this_decl),
436 DECL_ONE_ONLY (this_decl));
438 else
440 free (global_var_name);
441 return (tree) *slot;
444 version_selector_var = (tree) *slot;
446 /* If the feature-test function is not external, create a constructor and
447 call this function in the constructor. */
449 if (!DECL_EXTERNAL (this_decl))
451 char *constructor_name;
452 gimple constructor_body;
453 tree constructor_decl;
455 constructor_name
456 = make_feature_test_global_name (this_decl, true);
458 constructor_body = gimple_build_call (this_decl, 0);
460 gimple_call_set_lhs (constructor_body, version_selector_var);
462 if (dump_file)
463 print_gimple_stmt (dump_file, constructor_body, 0, TDF_VOPS);
465 constructor_decl =
466 make_constructor_function (constructor_name, constructor_body,
467 TREE_PUBLIC (this_decl),
468 DECL_ONE_ONLY (this_decl),
469 version_selector_var);
471 gcc_assert (constructor_decl != NULL_TREE);
472 free (constructor_name);
475 free (global_var_name);
476 return version_selector_var;
479 /* Start Analysis phase. Mark all functions that are hot and have a call-graph
480 path to a __builtin_dispatch call. */
482 /* This function returns the address of the feature test function.
483 If the address of the function is saved to a temporary,
484 this function traverses the gimple statements before BUILTIN_STMT
485 and finds an assignment whose rhs is the feature test function.
486 If the feature test function is specified as a function pointer
487 whose function value is unknown, this funcition returns NULL. */
489 static tree
490 find_version_selector_func_addr (gimple builtin_stmt)
492 tree cond_func_addr = NULL;
493 gimple def_stmt = NULL;
495 cond_func_addr = gimple_call_arg (builtin_stmt, 0);
497 gcc_assert (TREE_CODE (cond_func_addr) == ADDR_EXPR
498 || TREE_CODE (cond_func_addr) == SSA_NAME);
500 if (TREE_CODE (cond_func_addr) == ADDR_EXPR)
501 return cond_func_addr;
503 /* TREE_CODE (cond_func_addr) == SSA_NAME
504 This means a new function pointer variable is created and assigned the
505 address of the feature-test function. Traverse the statements backwards
506 and find the assignment to get the RHS. */
508 def_stmt = SSA_NAME_DEF_STMT (cond_func_addr);
510 gcc_assert (def_stmt
511 && gimple_assign_lhs (def_stmt) == cond_func_addr);
513 cond_func_addr = gimple_assign_rhs1 (def_stmt);
515 /* If the cond_func_addr is still not an ADDR_EXPR, it means that the
516 feature-test function is specified as a pointer. In this case, we
517 return NULL, since the feature-test function decl is not known. */
519 if (cond_func_addr == NULL
520 || TREE_CODE (cond_func_addr) != ADDR_EXPR)
521 return NULL;
523 /* If the operand of the ADDR_EXPR is not a function_decl, return NULL
524 as this still means the feature-test function is specified as a
525 function pointer. */
527 if (TREE_CODE (TREE_OPERAND (cond_func_addr, 0)) != FUNCTION_DECL)
528 return NULL;
530 return cond_func_addr;
533 /* Finds the gimple calls to __builtin_dispatch in function pointed
534 to by the call graph NODE and populates the vector VEC. Returns
535 true if at least one statement was found where the feature test
536 function is marked as "version_selector". Otherwise, there is no
537 question of hoisting it. */
539 static bool
540 is_builtin_dispatch_stmt_present (struct cgraph_node *node,
541 VEC (tree,heap) **vec)
543 struct cgraph_edge *edge;
544 bool present = false;
546 gcc_assert (!flag_lto);
548 for (edge = node->callees; edge; edge = edge->next_callee)
550 if (edge->callee->decl == builtin_function_decl)
552 tree cond_func_decl;
553 tree cond_func_addr;
554 gcc_assert (*vec != NULL);
555 cond_func_addr = find_version_selector_func_addr (edge->call_stmt);
557 if (cond_func_addr == NULL)
558 continue;
560 cond_func_decl = TREE_OPERAND (cond_func_addr, 0);
562 /* Do not consider for hoisting if "version_selector" attribute is
563 not set. */
564 if (lookup_attribute ("version_selector",
565 DECL_ATTRIBUTES (cond_func_decl)) == NULL)
567 if (dump_file)
569 fprintf (dump_file, "Not hoisting builtin_dispatch as "
570 "feature_test function not version_selector :\n");
571 print_gimple_stmt (dump_file, edge->call_stmt, 0, TDF_VOPS);
573 continue;
576 present = true;
577 VEC_safe_push (tree, heap, *vec, cond_func_decl);
580 return present;
583 /* Updates the list of feature-test function decls reaching the cgraph
584 function NODE. */
586 static void
587 update_reachable_decls_list (struct cgraph_node *node,
588 VEC (tree, heap) *predicate_decls)
590 VEC (tree, heap) **decl_list = NULL;
591 tree cond_func_decl;
592 int ix;
594 if (node->aux == NULL)
596 decl_list = (VEC (tree, heap) **) xmalloc (sizeof (VEC (tree, heap) *));
597 *decl_list = VEC_alloc (tree, heap, 1);
598 node->aux = decl_list;
600 else
601 decl_list = (VEC (tree, heap) **) node->aux;
603 for (ix = 0; VEC_iterate (tree, predicate_decls, ix, cond_func_decl); ++ix)
604 VEC_safe_push (tree, heap, *decl_list, cond_func_decl);
607 /* Propagate the __builtin_dispatch stmt (s) called from node to its
608 callers, PREDICATE_DECLS is the decls list of the predicate functions. */
610 static unsigned int
611 mark_reachable_functions (struct cgraph_node *this_node,
612 VEC (tree, heap) *predicate_decls)
614 VEC (NODEPTR, heap) *work_list;
615 VEC (int, heap) *depth_list;
616 struct cgraph_edge *e;
617 htab_t node_htab = NULL;
618 void **slot = NULL;
620 /* Use a work-list style algorithm to mark functions in any call-graph
621 path to the current function. */
623 work_list = VEC_alloc (NODEPTR, heap, 8);
624 depth_list = VEC_alloc (int, heap, 8);
626 VEC_safe_push (NODEPTR, heap, work_list, this_node);
627 VEC_safe_push (int, heap, depth_list, 0);
629 node_htab = htab_create (10, htab_hash_pointer,
630 htab_eq_pointer, NULL);
632 slot = htab_find_slot (node_htab, this_node, INSERT);
634 gcc_assert (*slot == NULL);
635 *slot = this_node;
637 while (!VEC_empty (NODEPTR, work_list))
639 struct cgraph_node *node = VEC_pop (NODEPTR, work_list);
640 int depth = VEC_pop (int, depth_list);
642 if (dump_file)
643 fprintf (dump_file, "%s has a depth = %d callgraph path to %s\n",
644 cgraph_node_name (node), depth,
645 cgraph_node_name (this_node));
647 update_reachable_decls_list (node, predicate_decls);
649 gcc_assert (node->aux != NULL);
651 if (depth >= PARAM_VALUE (PARAM_MVERSN_CLONE_CGRAPH_DEPTH))
653 if (dump_file)
654 fprintf (dump_file, "Not propogating __builtin_dispatch... "
655 "maximum cloning depth = %d reached\n",
656 PARAM_VALUE (PARAM_MVERSN_CLONE_CGRAPH_DEPTH));
657 continue;
660 for (e = node->callers; e; e = e->next_caller)
662 slot = htab_find_slot (node_htab, e->caller, INSERT);
663 if (*slot != NULL)
664 continue;
665 *slot = e->caller;
666 if (!hot_function_p (e->caller))
667 continue;
669 VEC_safe_push (NODEPTR, heap, work_list, e->caller);
670 VEC_safe_push (int, heap, depth_list, (depth + 1));
674 htab_delete (node_htab);
675 VEC_free (NODEPTR, heap, work_list);
676 VEC_free (int, heap, depth_list);
677 return 0;
680 /* Scan the call graph and detect hot functions that have __builtin_dispatch
681 calls. Then, propogate this information to its callers. Returns true if
682 a suitable __builtin_dispatch was found. */
684 static bool
685 perform_analysis_phase (void)
687 struct cgraph_node *node;
688 VEC(tree, heap) *builtin_predicates_vec = NULL;
689 bool flag = false;
691 builtin_predicates_vec = VEC_alloc (tree, heap, 1);
693 for (node = cgraph_nodes; node; node = node->next)
695 /* if the body of this decl is from outside, do nothing. */
696 if (DECL_EXTERNAL (node->decl))
697 continue;
699 if (!hot_function_p (node))
700 continue;
702 if (!is_builtin_dispatch_stmt_present (node, &builtin_predicates_vec))
703 continue;
705 if (dump_file)
707 fprintf (dump_file, "%s calls __builtin_dispatch atleast once.\n",
708 cgraph_node_name (node));
710 fprintf (dump_file, "%s is a hot function, consider cloning ...\n",
711 cgraph_node_name (node));
714 flag = true;
715 mark_reachable_functions (node, builtin_predicates_vec);
716 VEC_truncate (tree, builtin_predicates_vec, 0);
719 VEC_free (tree, heap, builtin_predicates_vec);
720 return flag;
723 /* End Analysis phase. */
725 /* Decide Cloning Phase.
727 In this phase, we go through each function and decide if it should be
728 cloned or not. */
730 /* This function counts the number of unique decls in the DECL_LIST.*/
732 static int
733 count_predicate_functions (VEC (tree,heap) *decl_list)
735 int ix;
736 int count = 0;
737 tree cond_func_decl = NULL;
738 htab_t dup_decl_htab = NULL;
740 if (VEC_length (tree, decl_list) == 1)
741 return 1;
743 dup_decl_htab = htab_create (2, htab_hash_pointer, htab_eq_pointer, NULL);
745 for (ix = 0; VEC_iterate (tree, decl_list, ix, cond_func_decl); ++ix)
747 void **slot = NULL;
748 slot = htab_find_slot (dup_decl_htab, cond_func_decl, INSERT);
750 if (*slot != NULL)
751 continue;
752 count++;
753 *slot = cond_func_decl;
756 htab_delete (dup_decl_htab);
757 return count;
760 /* This function decides which functions to clone based on the number of
761 feature_test decls reaching it. Currently, only one feature_test decl
762 is allowed. */
764 static bool
765 decide_cloning_phase (void)
767 struct cgraph_node *node;
768 int count;
769 bool run_cloning_phase = false;
770 int num_funcs_cloned = 0;
772 for (node = cgraph_nodes; node; node = node->next)
774 tree cond_func_decl = NULL;
775 VEC (tree, heap) *vec;
776 if (node->aux == NULL)
777 continue;
779 if (num_funcs_cloned >= PARAM_VALUE (PARAM_NUMBER_OF_MVERSN_CLONES))
781 if (dump_file)
782 fprintf (dump_file, "Reached cloning limit specified "
783 "by \"num-mversn-clones\" for %s\n",
784 cgraph_node_name (node));
786 free (node->aux);
787 node->aux = NULL;
788 continue;
791 vec = *(VEC (tree,heap) **) node->aux;
792 count = count_predicate_functions (vec);
793 gcc_assert (count >= 1);
794 cond_func_decl = VEC_index (tree, vec, 0);
795 gcc_assert (cond_func_decl != NULL);
796 VEC_free (tree, heap, vec);
797 free (node->aux);
798 node->aux = NULL;
800 if (count > 1)
802 if (dump_file)
803 fprintf (dump_file, "%s has %d predicates, Not cloning for > 1\n",
804 cgraph_node_name (node), count);
805 continue;
807 /* Set the node's aux value to be that of the predicate decl. */
808 node->aux = cond_func_decl;
809 run_cloning_phase = true;
810 num_funcs_cloned++;
812 return run_cloning_phase;
815 /* End Decide Cloning Phase. */
817 /* Cloning Phase. */
819 /* Deletes all basic-blocks and leaves function with :
820 ENTRY_BLOCK ---> (new empty basic block) ---> EXIT_BLOCK
823 static basic_block
824 empty_function_body (tree fndecl)
826 basic_block bb, new_bb;
827 edge e;
828 tree old_current_function_decl;
830 old_current_function_decl = current_function_decl;
831 push_cfun (DECL_STRUCT_FUNCTION (fndecl));
832 current_function_decl = fndecl;
834 clear_edges ();
835 for (bb = ENTRY_BLOCK_PTR; bb != NULL;)
837 basic_block bb_next;
838 bb_next = bb->next_bb;
839 if (bb != EXIT_BLOCK_PTR
840 && bb != ENTRY_BLOCK_PTR)
842 if (bb_seq (bb) != NULL)
844 gimple_stmt_iterator i;
845 for (i = gsi_start_bb (bb); !gsi_end_p (i);)
847 gimple stmt = gsi_stmt (i);
848 unlink_stmt_vdef (stmt);
849 gsi_remove (&i, true);
850 release_defs (stmt);
853 bb->il.gimple = NULL;
854 bb->prev_bb = NULL;
855 bb->next_bb = NULL;
856 SET_BASIC_BLOCK (bb->index, NULL);
857 n_basic_blocks--;
859 bb = bb_next;
861 ENTRY_BLOCK_PTR->next_bb = EXIT_BLOCK_PTR;
862 new_bb = create_empty_bb (ENTRY_BLOCK_PTR);
863 e = make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU);
864 gcc_assert (e != NULL);
865 /* XXX:Is this edge necessary ? */
866 e = make_edge (new_bb, EXIT_BLOCK_PTR, 0);
867 gcc_assert (e != NULL);
869 current_function_decl = old_current_function_decl;
870 pop_cfun ();
871 return new_bb;
874 /* Takes function with decl ORIG_FNDECL and clones it. The
875 name of the clone is the original name suffixed with
876 NAME_SUFFIX. Code is adapted from cgraph_function_versioning
877 in cgraphunit.c */
879 static tree
880 clone_function (tree orig_fndecl, const char *name_suffix)
882 tree new_decl;
883 char *new_name;
884 struct cgraph_node *new_version;
885 struct cgraph_node *old_version;
886 void **slot;
887 tree old_current_function_decl;
889 new_name = make_name (orig_fndecl, name_suffix);
890 new_decl = copy_node (orig_fndecl);
893 slot = htab_find_slot_with_hash (name_decl_htab, new_name,
894 htab_hash_string (new_name), INSERT);
896 gcc_assert (*slot == NULL);
897 *slot = new_decl;
899 /* Code adapted from cgraph_function_versioning in cgraphuinit.c */
901 new_version = cgraph_get_create_node (new_decl);
902 old_version = cgraph_get_create_node (orig_fndecl);
904 new_version->local = old_version->local;
905 new_version->global = old_version->global;
906 new_version->rtl = old_version->rtl;
907 new_version->reachable = true;
908 new_version->count = old_version->count;
910 /* Set the name of the new function. */
911 DECL_NAME (new_decl) = get_identifier (new_name);
912 SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
913 SET_DECL_RTL (new_decl, NULL);
915 tree_function_versioning (orig_fndecl, new_decl, NULL /*tree_map*/,
916 false, NULL /*args_to_skip*/,
917 false, /* skip return */
918 NULL /* blocks_to_copy */ ,
919 NULL /* new_entry */);
922 old_current_function_decl = current_function_decl;
923 push_cfun (DECL_STRUCT_FUNCTION (new_decl));
924 current_function_decl = new_decl;
926 TREE_READONLY (new_decl) = TREE_READONLY (orig_fndecl);
927 TREE_STATIC (new_decl) = TREE_STATIC (orig_fndecl);
928 TREE_USED (new_decl) = TREE_USED (orig_fndecl);
929 DECL_ARTIFICIAL (new_decl) = 1;
930 DECL_IGNORED_P (new_decl) = 0;
931 TREE_PUBLIC (new_decl) = TREE_PUBLIC (orig_fndecl);
932 DECL_CONTEXT (new_decl) = DECL_CONTEXT (orig_fndecl);
934 DECL_EXTERNAL (new_decl) = DECL_EXTERNAL (orig_fndecl);
935 DECL_COMDAT (new_decl) = DECL_COMDAT (orig_fndecl);
936 DECL_COMDAT_GROUP (new_decl) = DECL_COMDAT_GROUP (orig_fndecl);
937 DECL_VIRTUAL_P (new_decl) = DECL_VIRTUAL_P (orig_fndecl);
938 DECL_WEAK (new_decl) = DECL_WEAK (orig_fndecl);
940 /* Always inline the clones. Why are we cloning otherwise? */
941 DECL_DECLARED_INLINE_P (new_decl) = 1;
942 DECL_UNINLINABLE (new_decl) = 0;
943 new_version->local.externally_visible
944 = old_version->local.externally_visible;
945 new_version->local.local
946 = old_version->local.local;
948 new_version->analyzed = true;
949 new_version->lowered = true;
951 if (dump_file)
952 dump_function_to_file (new_decl, dump_file, TDF_BLOCKS);
954 cgraph_add_new_function (new_decl, true);
956 cgraph_call_function_insertion_hooks (new_version);
957 cgraph_mark_needed_node (new_version);
959 pop_cfun ();
960 current_function_decl = old_current_function_decl;
962 return new_decl;
965 /* This function populates the vector *VEC with the args in the gimple
966 call statement STMT. SKIP_ARGS is the number of args to skip.*/
968 static void
969 get_function_args (gimple stmt, int num_args, VEC (tree, heap) **vec,
970 int skip_args)
972 int i;
974 if (num_args == 0) return;
976 *vec = VEC_alloc (tree, heap, num_args);
977 /* The number of args in a function is 1 plus the actual number of
978 args. Also, there are 3 special args reserved, so the first arg
979 starts from 3. */
980 for (i = 0; i <= num_args - 2; ++i)
981 VEC_quick_push (tree, *vec, gimple_call_arg (stmt, (skip_args + i)));
984 /* Given ret = __builtin_dispatch (pred, fn1, fn2, arg1, ....)
985 get ret = fn1 (arg1, ...) or ret = fn2 (arg1, ....)
986 depending on the value of SIDE == 0 or 1. */
988 static gimple
989 make_specialized_call_from_builtin (gimple builtin_stmt, int side)
991 tree func_addr;
992 int num_func_args = 0;
993 VEC (tree, heap) *nargs = NULL;
994 tree lhs_stmt;
995 gimple specialized_call_stmt;
997 if (side == 0)
998 func_addr = gimple_call_arg (builtin_stmt, 1);
999 else
1000 func_addr = gimple_call_arg (builtin_stmt, 2);
1002 num_func_args
1003 = function_args_count (TREE_TYPE (TREE_OPERAND (func_addr, 0)));
1005 get_function_args (builtin_stmt, num_func_args, &nargs, 3);
1007 specialized_call_stmt = gimple_build_call_vec (func_addr, nargs);
1009 lhs_stmt = gimple_call_lhs (builtin_stmt);
1011 if (lhs_stmt != NULL_TREE)
1012 gimple_call_set_lhs (specialized_call_stmt, lhs_stmt);
1014 if (nargs != NULL)
1015 VEC_free (tree, heap, nargs);
1017 return specialized_call_stmt;
1020 /* Given a call (GENERIC_STMT) to a function that is cloned, substitute
1021 with a call to the correct clone. */
1023 static gimple
1024 make_specialized_call_to_clone (gimple generic_stmt, int side)
1026 tree new_decl;
1027 char *new_name;
1028 tree generic_fndecl;
1029 gimple specialized_call_stmt;
1030 void **slot;
1031 int num_func_args;
1032 tree lhs_stmt;
1033 VEC (tree, heap) *nargs= NULL;
1035 generic_fndecl = gimple_call_fndecl (generic_stmt);
1036 gcc_assert (generic_fndecl != NULL);
1038 if (side == 0)
1039 new_name = make_name (generic_fndecl, "clone_0");
1040 else
1041 new_name = make_name (generic_fndecl, "clone_1");
1043 slot = htab_find_slot_with_hash (name_decl_htab, new_name,
1044 htab_hash_string (new_name), NO_INSERT);
1045 gcc_assert (slot != NULL);
1046 new_decl = (tree) *slot;
1047 gcc_assert (new_decl);
1049 num_func_args = function_args_count (TREE_TYPE (generic_fndecl));
1050 get_function_args (generic_stmt, num_func_args, &nargs, 0);
1051 specialized_call_stmt = gimple_build_call_vec (new_decl, nargs);
1053 lhs_stmt = gimple_call_lhs (generic_stmt);
1055 if (lhs_stmt != NULL_TREE)
1056 gimple_call_set_lhs (specialized_call_stmt, lhs_stmt);
1058 if (nargs != NULL)
1059 VEC_free (tree, heap, nargs);
1061 return specialized_call_stmt;
1064 /* Returns true if STMT is a call to __builtin_dispatch and its
1065 predicate feature-test function is marked with attribute
1066 "version_selector". */
1068 static bool
1069 is_builtin_with_predicate_version_selector (gimple stmt)
1071 tree cond_func_addr, cond_func_decl;
1073 gcc_assert (!flag_lto);
1075 if (gimple_call_fndecl (stmt) != builtin_function_decl)
1076 return false;
1078 cond_func_addr = find_version_selector_func_addr (stmt);
1080 if (cond_func_addr == NULL)
1081 return false;
1083 cond_func_decl = TREE_OPERAND (cond_func_addr, 0);
1084 if (lookup_attribute ("version_selector",
1085 DECL_ATTRIBUTES (cond_func_decl)) != NULL)
1086 return true;
1088 return false;
1091 /* Find calls to __builtin_dispatch or to functions that are versioned
1092 in CLONE_DECL and substitute the call with the correct version based
1093 on the value of SIDE. */
1095 static void
1096 specialize_call (tree clone_decl, int side)
1098 basic_block bb;
1099 tree old_current_function_decl;
1101 old_current_function_decl = current_function_decl;
1102 push_cfun (DECL_STRUCT_FUNCTION (clone_decl));
1103 current_function_decl = clone_decl;
1105 /* Iterate over call edges and find out if there is
1106 a call to __builtin_dispatch or a cloned function.
1107 We cannot iterate over call graph edges as there are
1108 no edges for the clones yet. */
1110 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (clone_decl))
1112 gimple_stmt_iterator gsi;
1113 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1115 tree callee_decl;
1116 struct cgraph_node *callee_node;
1117 gimple specialized_call_stmt;
1118 gimple stmt = gsi_stmt (gsi);
1120 if (!is_gimple_call (stmt))
1121 continue;
1123 callee_decl = gimple_call_fndecl (stmt);
1125 if (callee_decl == NULL)
1126 continue;
1128 callee_node = cgraph_get_create_node (callee_decl);
1130 /* For a __builtin_dispatch stmt, only specialize if
1131 version_selector attribute is set. Otherwise, it is
1132 not hoisted, so no specialization. */
1134 if (is_builtin_with_predicate_version_selector (stmt))
1136 specialized_call_stmt =
1137 make_specialized_call_from_builtin (stmt, side);
1139 else if (callee_node->aux != NULL)
1141 specialized_call_stmt =
1142 make_specialized_call_to_clone (stmt, side);
1144 else
1145 continue;
1147 if (dump_file)
1149 fprintf (dump_file, "Specialize stmt : \n");
1150 print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS);
1151 fprintf (dump_file, "Specialized stmt : \n");
1152 print_gimple_stmt (dump_file, specialized_call_stmt,
1153 0, TDF_VOPS);
1156 gimple_set_block (specialized_call_stmt, gimple_block (stmt));
1157 gsi_insert_before_without_update (&gsi, specialized_call_stmt,
1158 GSI_SAME_STMT);
1161 unlink_stmt_vdef (stmt);
1162 gsi_remove (&gsi, true);
1163 mark_symbols_for_renaming (specialized_call_stmt);
1165 /* After removing make sure gsi is set correctly to not skip
1166 a statememt. */
1167 gsi = gsi_for_stmt (specialized_call_stmt);
1170 current_function_decl = old_current_function_decl;
1171 pop_cfun ();
1174 /* When a function is version cloned, its body is replaced to call one
1175 of the versions with the feature-test function acting as a predicate.
1176 This is done with __builtin_dispatch which is later expanded. */
1178 static gimple
1179 make_builtin_call_to_clones (tree orig_fndecl, tree clone_0_addr,
1180 tree clone_1_addr, tree cond_func_addr)
1182 gimple new_builtin_call;
1183 VEC(tree, heap) *vargs = VEC_alloc (tree, heap, 4);
1184 tree arg;
1186 VEC_quick_push (tree, vargs, cond_func_addr);
1187 VEC_quick_push (tree, vargs, clone_0_addr);
1188 VEC_quick_push (tree, vargs, clone_1_addr);
1190 for (arg = DECL_ARGUMENTS (orig_fndecl); arg; arg = TREE_CHAIN (arg))
1192 VEC_safe_push (tree, heap, vargs, arg);
1193 /* Again, this add_referenced_var is very very important. It broke
1194 a build where a cloned function's arguments where never
1195 referenced. Missing this statement in places asserts at
1196 tree-dfa.c:589, in function referenced_var_lookup at
1197 "gcc_assert (h || uid == 0);" and is very difficult to triage. */
1198 add_referenced_var (arg);
1201 new_builtin_call = gimple_build_call_vec (builtin_function_decl, vargs);
1202 mark_symbols_for_renaming (new_builtin_call);
1205 if (dump_file)
1206 print_gimple_stmt (dump_file, new_builtin_call, 0, TDF_VOPS);
1208 VEC_free (tree, heap, vargs);
1210 return new_builtin_call;
1213 /* This clones a dispatch function whose callee-graph path has a function
1214 which calls __builtin_dispatch. This function is cloned and the
1215 original function branches to the right clone. */
1217 static int
1218 clone_and_dispatch_function (struct cgraph_node *orig_node, tree *clone_0,
1219 tree *clone_1)
1221 tree clone_0_decl, clone_1_decl;
1222 gimple new_builtin_call = NULL;
1223 gimple new_return_stmt = NULL;
1224 gimple_seq seq = NULL;
1225 basic_block new_bb;
1226 tree orig_fndecl;
1227 tree return_var = NULL;
1228 tree return_type;
1229 tree old_current_function_decl;
1231 old_current_function_decl = current_function_decl;
1232 orig_fndecl = orig_node->decl;
1233 push_cfun (DECL_STRUCT_FUNCTION (orig_fndecl));
1234 current_function_decl = orig_fndecl;
1236 /* Make 2 clones for true and false function. */
1237 clone_0_decl = clone_function (orig_fndecl, "clone_0");
1238 clone_1_decl = clone_function (orig_fndecl, "clone_1");
1239 *clone_0 = clone_0_decl;
1240 *clone_1 = clone_1_decl;
1242 new_bb = empty_function_body (orig_fndecl);
1244 new_builtin_call = make_builtin_call_to_clones (
1245 orig_fndecl,
1246 build_fold_addr_expr (clone_0_decl),
1247 build_fold_addr_expr (clone_1_decl),
1248 build_fold_addr_expr ((tree)orig_node->aux));
1250 return_type = TREE_TYPE (TREE_TYPE (orig_fndecl));
1252 if (!TREE_ADDRESSABLE (return_type) && COMPLETE_TYPE_P (return_type))
1254 tree tmp_var;
1255 tmp_var = create_tmp_var (return_type, NULL);
1256 add_referenced_var (tmp_var);
1257 return_var = make_ssa_name (tmp_var, new_builtin_call);
1258 gimple_call_set_lhs (new_builtin_call, return_var);
1261 mark_symbols_for_renaming (new_builtin_call);
1262 new_return_stmt = gimple_build_return (return_var);
1263 mark_symbols_for_renaming (new_return_stmt);
1264 gimple_seq_add_stmt (&seq, new_builtin_call);
1265 gimple_seq_add_stmt (&seq, new_return_stmt);
1266 set_bb_seq (new_bb, seq);
1267 gimple_set_bb (new_builtin_call, new_bb);
1268 gimple_set_bb (new_return_stmt, new_bb);
1270 gimple_set_block (new_builtin_call, DECL_INITIAL (orig_fndecl));
1271 gimple_set_block (new_return_stmt, DECL_INITIAL (orig_fndecl));
1273 if (dump_file)
1274 dump_function_to_file (orig_fndecl, dump_file, TDF_BLOCKS);
1276 /* This update_ssa is necessary here for the following reason. SSA uses
1277 a global syms_to_rename bitmap that stores syms that must be renamed.
1278 So, if we accumulate the syms from one function in IPA but move to
1279 a different function without updating SSA, then we could be
1280 accumulating syms from many functions. This would assert in
1281 referenced_var_lookup because the hashtab storing the syms is
1282 function local. This is horrible. gcc-4.6 makes this bitmap a
1283 global. */
1284 update_ssa (TODO_update_ssa);
1286 compute_inline_parameters (cgraph_get_create_node (orig_fndecl), false);
1287 DECL_DECLARED_INLINE_P (orig_fndecl) = 1;
1288 DECL_UNINLINABLE (orig_fndecl) = 0;
1289 current_function_decl = old_current_function_decl;
1290 pop_cfun ();
1291 return 0;
1294 /* Clone all functions marked for cloning by the earlier phase. */
1296 static void
1297 perform_cloning_phase (void)
1299 struct cgraph_node *node;
1300 int ix;
1301 VEC (tree, heap) *cloned_decl_list = NULL;
1302 tree cloned_decl = NULL;
1304 cloned_decl_list = VEC_alloc (tree, heap, 2);
1306 /* First clone, then specialize the clones. */
1307 for (node = cgraph_nodes; node; node = node->next)
1309 tree clone_0_decl, clone_1_decl;
1310 if (node->aux == NULL)
1311 continue;
1312 if (dump_file)
1314 fprintf (dump_file, "%s will be cloned\n", cgraph_node_name (node));
1315 dump_function_to_file (node->decl, dump_file, TDF_BLOCKS);
1317 clone_and_dispatch_function (node, &clone_0_decl, &clone_1_decl);
1318 VEC_safe_push (tree, heap, cloned_decl_list, clone_0_decl);
1319 VEC_safe_push (tree, heap, cloned_decl_list, clone_1_decl);
1320 continue;
1323 /* Specialize the clones now. */
1324 for (ix = 0; VEC_iterate (tree, cloned_decl_list, ix, cloned_decl); ++ix)
1326 int which_clone = ix % 2;
1327 specialize_call (cloned_decl, which_clone);
1330 VEC_free (tree, heap, cloned_decl_list);
1333 /* End Cloning phase. */
1335 /* Checks if there is atleast one call to __builtin_dispatch. */
1337 static bool
1338 find_builtin_decl (void)
1340 struct cgraph_node *node;
1341 for (node = cgraph_nodes; node; node = node->next)
1343 if (strstr (cgraph_node_name (node), "__builtin_dispatch") != NULL)
1345 builtin_function_decl = node->decl;
1346 return true;
1349 return false;
1352 /* Set the aux fields of all nodes and edges in the call graph to be NULL. */
1354 static void
1355 cleanup_aux_field (void)
1357 struct cgraph_node *node;
1358 struct cgraph_edge *edge;
1360 for (node = cgraph_nodes; node; node = node->next)
1362 node->aux = NULL;
1363 for (edge = node->callees; edge; edge = edge->next_callee)
1364 edge->aux = NULL;
1368 /* Main driver function. It scans the __builtin_dispatch calls and
1369 figures out which functions to clone. It then clones the functions. */
1371 static unsigned int
1372 builtin_dispatch_ipa_clone (void)
1374 cleanup_aux_field ();
1376 /* Allocate hashtab mapping name to decl. */
1377 name_decl_htab = htab_create (10, name_decl_htab_hash_descriptor,
1378 name_decl_htab_eq_descriptor, NULL);
1380 /* Turn it on for O1 and above. At -O0, there is a SSA alias bug
1381 with create_tmp_var. Cloning and hoisting is not necessary at
1382 -O0 anyways. Also, guard it with the flag
1383 "-fclone-hot-version-paths".
1384 Disabled for LTO as it needs more work. */
1385 if (optimize == 0
1386 || profile_arc_flag
1387 || !flag_clone_hot_version_paths
1388 || flag_lto)
1389 return 0;
1391 if (!find_builtin_decl ())
1392 return 0;
1394 gcc_assert (builtin_function_decl != NULL);
1396 if (!perform_analysis_phase ())
1398 cleanup_aux_field ();
1399 return 0;
1402 if (decide_cloning_phase ())
1403 perform_cloning_phase ();
1405 cleanup_aux_field ();
1407 return 0;
1410 static bool
1411 gate_handle_builtin_dispatch (void)
1413 return true;
1416 struct simple_ipa_opt_pass pass_ipa_multiversion_dispatch =
1419 SIMPLE_IPA_PASS,
1420 "multiversion_dispatch", /* name */
1421 gate_handle_builtin_dispatch, /* gate */
1422 builtin_dispatch_ipa_clone, /* execute */
1423 NULL, /* sub */
1424 NULL, /* next */
1425 0, /* static_pass_number */
1426 TV_MVERSN_DISPATCH, /* tv_id */
1427 0, /* properties_required */
1428 PROP_cfg, /* properties_provided */
1429 0, /* properties_destroyed */
1430 0, /* todo_flags_start */
1431 TODO_dump_func | /* todo_flags_finish */
1432 TODO_update_ssa
1436 /* Lowering of the __builtin_dispatch calls. */
1439 /* This function converts STMT which is a __builtin_dispatch
1440 call of the form :
1441 ret = __builtin_dispatch (predicate, foo, bar, arg1, ...)
1442 into :
1443 var_1 = predicate
1444 if (var_1)
1445 var_2 = foo (arg1, ...);
1446 else
1447 var_3 = bar (arg1, ...);
1448 var_4 = phi (var_2, var_3)
1449 ret = var_4
1451 var_? are ssa names for variable var.
1454 static unsigned int
1455 convert_builtin_dispatch (gimple stmt)
1457 tree cond_func_addr, if_func_addr, else_func_addr;
1458 tree cond_func_decl = NULL;
1459 gimple if_part, else_part, if_else_stmt;
1460 basic_block bb1, bb2, bb3, bb4;
1461 gimple bb1end, bb2end, bb3end;
1462 edge e12, e13, e23, e24, e34;
1463 VEC(tree, heap) *nargs = NULL;
1464 int num_func_args = 0, i;
1465 tree version_selector_var;
1466 tree lhs_result;
1467 gimple_stmt_iterator gsi;
1468 basic_block bb;
1469 gimple feature_test_call = NULL;
1470 tree tmp_var = NULL;
1471 gimple init_stmt = NULL;
1472 tree ssa_if_name, ssa_else_name;
1473 gimple phinode = NULL;
1474 tree tmp_result_var, ssa_result_var;
1476 gsi = gsi_for_stmt (stmt);
1477 bb = gsi_bb (gsi);
1479 cond_func_addr = find_version_selector_func_addr (stmt);
1480 if (cond_func_addr != NULL)
1482 cond_func_decl = TREE_OPERAND (cond_func_addr, 0);
1483 gcc_assert (cond_func_decl);
1486 if (dump_file)
1488 fprintf (dump_file, "Before Converting __builtin_dispatch :\n");
1489 dump_function_to_file (current_function_decl, dump_file, TDF_BLOCKS);
1492 if_func_addr = gimple_call_arg (stmt, 1);
1493 else_func_addr = gimple_call_arg (stmt, 2);
1495 tmp_result_var = create_tmp_var (integer_type_node, NULL);
1496 add_referenced_var (tmp_result_var);
1498 if (flag_lto
1499 || cond_func_decl == NULL
1500 || lookup_attribute ("version_selector",
1501 DECL_ATTRIBUTES (cond_func_decl)) == NULL)
1503 tree arg = gimple_call_arg (stmt, 0);
1504 /* This means the feature-test function is not set with attribute
1505 version_selector or it is a function pointer or in LTO. So,
1506 explicitly call it. */
1507 feature_test_call = gimple_build_call (arg, 0);
1508 ssa_result_var = make_ssa_name (tmp_result_var, feature_test_call);
1509 gimple_call_set_lhs (feature_test_call, ssa_result_var);
1510 mark_symbols_for_renaming (feature_test_call);
1511 version_selector_var = ssa_result_var;
1513 else
1515 /* Get the global corresponding to the "version_selector" function. */
1516 version_selector_var
1517 = handle_version_selector_attr_function (cond_func_decl);
1518 gcc_assert (version_selector_var);
1519 add_referenced_var (version_selector_var);
1520 feature_test_call = gimple_build_assign (tmp_result_var,
1521 version_selector_var);
1522 ssa_result_var = make_ssa_name (tmp_result_var, feature_test_call);
1523 gimple_assign_set_lhs (feature_test_call, ssa_result_var);
1524 mark_symbols_for_renaming (feature_test_call);
1525 version_selector_var = ssa_result_var;
1528 if_else_stmt = gimple_build_cond (GT_EXPR,
1529 version_selector_var,
1530 integer_zero_node,
1531 NULL_TREE, NULL_TREE);
1533 mark_symbols_for_renaming (if_else_stmt);
1535 num_func_args = function_args_count (
1536 TREE_TYPE (TREE_OPERAND (if_func_addr, 0)));
1538 nargs = VEC_alloc (tree, heap, num_func_args);
1540 /* The arguments to the feature test function start from the 4th argument
1541 in __builtin_dispatch. The first 3 arguments are mandatory. */
1543 for (i = 0; i <= num_func_args - 2; ++i)
1544 VEC_quick_push (tree, nargs,
1545 gimple_call_arg (stmt, (3 + i)));
1547 if_part = gimple_build_call_vec (if_func_addr, nargs);
1548 else_part = gimple_build_call_vec (else_func_addr, nargs);
1550 lhs_result = gimple_call_lhs (stmt);
1552 if (lhs_result != NULL_TREE)
1554 tree ssa_var;
1555 tree return_type;
1556 return_type = TREE_TYPE (lhs_result);
1557 tmp_var = create_tmp_var (return_type, NULL);
1558 add_referenced_var (tmp_var);
1560 init_stmt = gimple_build_assign (tmp_var, build_zero_cst (return_type));
1561 ssa_var = make_ssa_name (tmp_var, init_stmt);
1562 gimple_assign_set_lhs (init_stmt, ssa_var);
1563 mark_symbols_for_renaming (init_stmt);
1565 ssa_if_name = make_ssa_name (tmp_var, init_stmt);
1566 ssa_else_name = make_ssa_name (tmp_var, init_stmt);
1567 gimple_call_set_lhs (if_part, ssa_if_name);
1568 gimple_call_set_lhs (else_part, ssa_else_name);
1570 mark_symbols_for_renaming (if_part);
1571 mark_symbols_for_renaming (else_part);
1573 /* Set the lexical block to be the same as the dispatch call. */
1574 gcc_assert (feature_test_call);
1575 gimple_set_block (feature_test_call, gimple_block (stmt));
1577 if (init_stmt)
1578 gimple_set_block (init_stmt, gimple_block (stmt));
1580 gimple_set_block (if_else_stmt, gimple_block (stmt));
1581 gimple_set_block (if_part, gimple_block (stmt));
1582 gimple_set_block (else_part, gimple_block (stmt));
1584 gsi_insert_before_without_update (&gsi, feature_test_call, GSI_SAME_STMT);
1585 gimple_set_bb (feature_test_call, bb);
1587 if (init_stmt)
1589 gsi_insert_before_without_update (&gsi, init_stmt,
1590 GSI_SAME_STMT);
1591 gimple_set_bb (init_stmt, bb);
1594 gsi_insert_before_without_update (&gsi, if_else_stmt, GSI_SAME_STMT);
1595 gsi_insert_before_without_update (&gsi, if_part, GSI_SAME_STMT);
1596 gsi_insert_before_without_update (&gsi, else_part, GSI_SAME_STMT);
1598 /* Remove the builtin_dispatch call after the expansion. */
1599 unlink_stmt_vdef (stmt);
1600 gsi_remove (&gsi, true);
1602 bb1end = if_else_stmt;
1603 bb2end = if_part;
1604 bb3end = else_part;
1605 bb1 = bb;
1606 e12 = split_block (bb1, bb1end);
1607 bb2 = e12->dest;
1608 e23 = split_block (bb2, bb2end);
1609 bb3 = e23->dest;
1610 e34 = split_block (bb3, bb3end);
1611 bb4 = e34->dest;
1613 e12->flags &= ~EDGE_FALLTHRU;
1614 e12->flags |= EDGE_TRUE_VALUE;
1615 e13 = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
1616 gcc_assert (e13);
1617 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
1618 gcc_assert (e24);
1619 remove_edge (e23);
1621 if (tmp_var)
1623 gimple assign_stmt;
1624 phinode = create_phi_node (tmp_var, bb4);
1625 add_phi_arg (phinode, ssa_if_name, e24, UNKNOWN_LOCATION);
1626 add_phi_arg (phinode, ssa_else_name, e34, UNKNOWN_LOCATION);
1627 mark_symbols_for_renaming (phinode);
1628 gcc_assert (lhs_result);
1629 assign_stmt
1630 = gimple_build_assign (lhs_result, gimple_phi_result (phinode));
1631 mark_symbols_for_renaming (assign_stmt);
1632 gsi = gsi_start_bb (bb4);
1633 gsi_insert_before_without_update (&gsi, assign_stmt, GSI_SAME_STMT);
1634 gimple_set_bb (assign_stmt, bb4);
1637 if (dump_file)
1639 fprintf (dump_file, "Converted __builtin_dispatch :\n");
1640 dump_function_to_file (current_function_decl, dump_file, TDF_BLOCKS);
1643 return 0;
1646 /* This function does two things.
1648 1) For a feature-test function marked with attribute "version_selector",
1649 it creates a constructor that calls the feature-test function and a
1650 global that holds the result. The global's result will be used
1651 to lower any __builtin_dispatch statement that refers to this feature
1652 test function. The __builtin_dispatch statement and the feature test
1653 function can be in different modules.
1655 2) It lowers __builtin_dispatch statements. */
1657 static unsigned int
1658 do_convert_builtin_dispatch (void)
1660 basic_block bb;
1661 gimple_stmt_iterator gsi;
1662 VEC (gimple, heap) *builtin_stmt_list = NULL;
1663 int ix;
1664 gimple builtin_stmt;
1666 /* Allocate hashtab mapping name to decl. */
1667 if (name_decl_htab == NULL)
1668 name_decl_htab = htab_create (10, name_decl_htab_hash_descriptor,
1669 name_decl_htab_eq_descriptor, NULL);
1671 /* Look for functions with attribute "version_selector" and make a
1672 constructor which calls the function and saves the result in a
1673 global. Disabled for LTO as it needs more work. */
1675 if (!flag_lto
1676 && lookup_attribute ("version_selector",
1677 DECL_ATTRIBUTES (current_function_decl)) != NULL)
1679 if (dump_file)
1680 fprintf (dump_file, "Function with version_selector attribute found :"
1681 " %s. Making constructor for it.\n",
1682 current_function_name ());
1684 handle_version_selector_attr_function (current_function_decl);
1685 /* Assume there are no __builtin_dispatch calls in feature test
1686 functions. So it is safe to return. */
1687 return 0;
1690 /* Find and lower __builtin_dispatch calls. */
1692 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (current_function_decl))
1694 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1696 gimple stmt = gsi_stmt (gsi);
1697 tree call_decl;
1699 if (!is_gimple_call (stmt))
1700 continue;
1702 call_decl = gimple_call_fndecl (stmt);
1704 if (call_decl == NULL)
1705 continue;
1707 if (DECL_NAME (call_decl) == NULL_TREE)
1708 continue;
1710 if (strstr (IDENTIFIER_POINTER (DECL_NAME (call_decl)),
1711 "__builtin_dispatch") == NULL)
1712 continue;
1714 if (dump_file)
1716 fprintf (dump_file, "Converting __builtin_dispatch stmt in:%s\n",
1717 current_function_name ());
1718 print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS);
1721 if (builtin_stmt_list == NULL)
1722 builtin_stmt_list = VEC_alloc (gimple, heap, 2);
1724 gcc_assert (builtin_stmt_list != NULL);
1725 VEC_safe_push (gimple, heap, builtin_stmt_list, stmt);
1729 if (!builtin_stmt_list)
1730 return 0;
1732 for (ix = 0; VEC_iterate (gimple, builtin_stmt_list, ix, builtin_stmt);
1733 ++ix)
1734 convert_builtin_dispatch (builtin_stmt);
1736 compute_inline_parameters (cgraph_get_create_node (current_function_decl),
1737 false);
1739 VEC_free (gimple, heap, builtin_stmt_list);
1741 return 0;
1744 static bool
1745 gate_convert_builtin_dispatch (void)
1747 return true;
1750 struct gimple_opt_pass pass_tree_convert_builtin_dispatch =
1753 GIMPLE_PASS,
1754 "convert_builtin_dispatch", /* name */
1755 gate_convert_builtin_dispatch, /* gate */
1756 do_convert_builtin_dispatch, /* execute */
1757 NULL, /* sub */
1758 NULL, /* next */
1759 0, /* static_pass_number */
1760 TV_MVERSN_DISPATCH, /* tv_id */
1761 PROP_cfg, /* properties_required */
1762 PROP_cfg, /* properties_provided */
1763 0, /* properties_destroyed */
1764 0, /* todo_flags_start */
1765 TODO_dump_func | /* todo_flags_finish */
1766 TODO_cleanup_cfg | TODO_dump_cgraph |
1767 TODO_update_ssa | TODO_verify_ssa