* gcc.dg/atomic-compare-exchange-1.c,
[official-gcc.git] / gcc / trans-mem.c
blobd74455d2bbcfaf545e402b7fe4fa4d2302e2485c
1 /* Passes for transactional memory support.
2 Copyright (C) 2008-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "hash-table.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "gimple-ssa.h"
27 #include "cgraph.h"
28 #include "tree-cfg.h"
29 #include "tree-ssanames.h"
30 #include "tree-into-ssa.h"
31 #include "tree-pass.h"
32 #include "tree-inline.h"
33 #include "diagnostic-core.h"
34 #include "demangle.h"
35 #include "output.h"
36 #include "trans-mem.h"
37 #include "params.h"
38 #include "target.h"
39 #include "langhooks.h"
40 #include "gimple-pretty-print.h"
41 #include "cfgloop.h"
42 #include "tree-ssa-address.h"
45 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1)
46 #define PROB_VERY_LIKELY (PROB_ALWAYS - PROB_VERY_UNLIKELY)
47 #define PROB_UNLIKELY (REG_BR_PROB_BASE / 5 - 1)
48 #define PROB_LIKELY (PROB_ALWAYS - PROB_VERY_LIKELY)
49 #define PROB_ALWAYS (REG_BR_PROB_BASE)
51 #define A_RUNINSTRUMENTEDCODE 0x0001
52 #define A_RUNUNINSTRUMENTEDCODE 0x0002
53 #define A_SAVELIVEVARIABLES 0x0004
54 #define A_RESTORELIVEVARIABLES 0x0008
55 #define A_ABORTTRANSACTION 0x0010
57 #define AR_USERABORT 0x0001
58 #define AR_USERRETRY 0x0002
59 #define AR_TMCONFLICT 0x0004
60 #define AR_EXCEPTIONBLOCKABORT 0x0008
61 #define AR_OUTERABORT 0x0010
63 #define MODE_SERIALIRREVOCABLE 0x0000
66 /* The representation of a transaction changes several times during the
67 lowering process. In the beginning, in the front-end we have the
68 GENERIC tree TRANSACTION_EXPR. For example,
70 __transaction {
71 local++;
72 if (++global == 10)
73 __tm_abort;
76 During initial gimplification (gimplify.c) the TRANSACTION_EXPR node is
77 trivially replaced with a GIMPLE_TRANSACTION node.
79 During pass_lower_tm, we examine the body of transactions looking
80 for aborts. Transactions that do not contain an abort may be
81 merged into an outer transaction. We also add a TRY-FINALLY node
82 to arrange for the transaction to be committed on any exit.
84 [??? Think about how this arrangement affects throw-with-commit
85 and throw-with-abort operations. In this case we want the TRY to
86 handle gotos, but not to catch any exceptions because the transaction
87 will already be closed.]
89 GIMPLE_TRANSACTION [label=NULL] {
90 try {
91 local = local + 1;
92 t0 = global;
93 t1 = t0 + 1;
94 global = t1;
95 if (t1 == 10)
96 __builtin___tm_abort ();
97 } finally {
98 __builtin___tm_commit ();
102 During pass_lower_eh, we create EH regions for the transactions,
103 intermixed with the regular EH stuff. This gives us a nice persistent
104 mapping (all the way through rtl) from transactional memory operation
105 back to the transaction, which allows us to get the abnormal edges
106 correct to model transaction aborts and restarts:
108 GIMPLE_TRANSACTION [label=over]
109 local = local + 1;
110 t0 = global;
111 t1 = t0 + 1;
112 global = t1;
113 if (t1 == 10)
114 __builtin___tm_abort ();
115 __builtin___tm_commit ();
116 over:
118 This is the end of all_lowering_passes, and so is what is present
119 during the IPA passes, and through all of the optimization passes.
121 During pass_ipa_tm, we examine all GIMPLE_TRANSACTION blocks in all
122 functions and mark functions for cloning.
124 At the end of gimple optimization, before exiting SSA form,
125 pass_tm_edges replaces statements that perform transactional
126 memory operations with the appropriate TM builtins, and swap
127 out function calls with their transactional clones. At this
128 point we introduce the abnormal transaction restart edges and
129 complete lowering of the GIMPLE_TRANSACTION node.
131 x = __builtin___tm_start (MAY_ABORT);
132 eh_label:
133 if (x & abort_transaction)
134 goto over;
135 local = local + 1;
136 t0 = __builtin___tm_load (global);
137 t1 = t0 + 1;
138 __builtin___tm_store (&global, t1);
139 if (t1 == 10)
140 __builtin___tm_abort ();
141 __builtin___tm_commit ();
142 over:
145 static void *expand_regions (struct tm_region *,
146 void *(*callback)(struct tm_region *, void *),
147 void *, bool);
150 /* Return the attributes we want to examine for X, or NULL if it's not
151 something we examine. We look at function types, but allow pointers
152 to function types and function decls and peek through. */
154 static tree
155 get_attrs_for (const_tree x)
157 switch (TREE_CODE (x))
159 case FUNCTION_DECL:
160 return TYPE_ATTRIBUTES (TREE_TYPE (x));
161 break;
163 default:
164 if (TYPE_P (x))
165 return NULL;
166 x = TREE_TYPE (x);
167 if (TREE_CODE (x) != POINTER_TYPE)
168 return NULL;
169 /* FALLTHRU */
171 case POINTER_TYPE:
172 x = TREE_TYPE (x);
173 if (TREE_CODE (x) != FUNCTION_TYPE && TREE_CODE (x) != METHOD_TYPE)
174 return NULL;
175 /* FALLTHRU */
177 case FUNCTION_TYPE:
178 case METHOD_TYPE:
179 return TYPE_ATTRIBUTES (x);
183 /* Return true if X has been marked TM_PURE. */
185 bool
186 is_tm_pure (const_tree x)
188 unsigned flags;
190 switch (TREE_CODE (x))
192 case FUNCTION_DECL:
193 case FUNCTION_TYPE:
194 case METHOD_TYPE:
195 break;
197 default:
198 if (TYPE_P (x))
199 return false;
200 x = TREE_TYPE (x);
201 if (TREE_CODE (x) != POINTER_TYPE)
202 return false;
203 /* FALLTHRU */
205 case POINTER_TYPE:
206 x = TREE_TYPE (x);
207 if (TREE_CODE (x) != FUNCTION_TYPE && TREE_CODE (x) != METHOD_TYPE)
208 return false;
209 break;
212 flags = flags_from_decl_or_type (x);
213 return (flags & ECF_TM_PURE) != 0;
216 /* Return true if X has been marked TM_IRREVOCABLE. */
218 static bool
219 is_tm_irrevocable (tree x)
221 tree attrs = get_attrs_for (x);
223 if (attrs && lookup_attribute ("transaction_unsafe", attrs))
224 return true;
226 /* A call to the irrevocable builtin is by definition,
227 irrevocable. */
228 if (TREE_CODE (x) == ADDR_EXPR)
229 x = TREE_OPERAND (x, 0);
230 if (TREE_CODE (x) == FUNCTION_DECL
231 && DECL_BUILT_IN_CLASS (x) == BUILT_IN_NORMAL
232 && DECL_FUNCTION_CODE (x) == BUILT_IN_TM_IRREVOCABLE)
233 return true;
235 return false;
238 /* Return true if X has been marked TM_SAFE. */
240 bool
241 is_tm_safe (const_tree x)
243 if (flag_tm)
245 tree attrs = get_attrs_for (x);
246 if (attrs)
248 if (lookup_attribute ("transaction_safe", attrs))
249 return true;
250 if (lookup_attribute ("transaction_may_cancel_outer", attrs))
251 return true;
254 return false;
257 /* Return true if CALL is const, or tm_pure. */
259 static bool
260 is_tm_pure_call (gimple call)
262 tree fn = gimple_call_fn (call);
264 if (TREE_CODE (fn) == ADDR_EXPR)
266 fn = TREE_OPERAND (fn, 0);
267 gcc_assert (TREE_CODE (fn) == FUNCTION_DECL);
269 else
270 fn = TREE_TYPE (fn);
272 return is_tm_pure (fn);
275 /* Return true if X has been marked TM_CALLABLE. */
277 static bool
278 is_tm_callable (tree x)
280 tree attrs = get_attrs_for (x);
281 if (attrs)
283 if (lookup_attribute ("transaction_callable", attrs))
284 return true;
285 if (lookup_attribute ("transaction_safe", attrs))
286 return true;
287 if (lookup_attribute ("transaction_may_cancel_outer", attrs))
288 return true;
290 return false;
293 /* Return true if X has been marked TRANSACTION_MAY_CANCEL_OUTER. */
295 bool
296 is_tm_may_cancel_outer (tree x)
298 tree attrs = get_attrs_for (x);
299 if (attrs)
300 return lookup_attribute ("transaction_may_cancel_outer", attrs) != NULL;
301 return false;
304 /* Return true for built in functions that "end" a transaction. */
306 bool
307 is_tm_ending_fndecl (tree fndecl)
309 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
310 switch (DECL_FUNCTION_CODE (fndecl))
312 case BUILT_IN_TM_COMMIT:
313 case BUILT_IN_TM_COMMIT_EH:
314 case BUILT_IN_TM_ABORT:
315 case BUILT_IN_TM_IRREVOCABLE:
316 return true;
317 default:
318 break;
321 return false;
324 /* Return true if STMT is a TM load. */
326 static bool
327 is_tm_load (gimple stmt)
329 tree fndecl;
331 if (gimple_code (stmt) != GIMPLE_CALL)
332 return false;
334 fndecl = gimple_call_fndecl (stmt);
335 return (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
336 && BUILTIN_TM_LOAD_P (DECL_FUNCTION_CODE (fndecl)));
339 /* Same as above, but for simple TM loads, that is, not the
340 after-write, after-read, etc optimized variants. */
342 static bool
343 is_tm_simple_load (gimple stmt)
345 tree fndecl;
347 if (gimple_code (stmt) != GIMPLE_CALL)
348 return false;
350 fndecl = gimple_call_fndecl (stmt);
351 if (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
353 enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
354 return (fcode == BUILT_IN_TM_LOAD_1
355 || fcode == BUILT_IN_TM_LOAD_2
356 || fcode == BUILT_IN_TM_LOAD_4
357 || fcode == BUILT_IN_TM_LOAD_8
358 || fcode == BUILT_IN_TM_LOAD_FLOAT
359 || fcode == BUILT_IN_TM_LOAD_DOUBLE
360 || fcode == BUILT_IN_TM_LOAD_LDOUBLE
361 || fcode == BUILT_IN_TM_LOAD_M64
362 || fcode == BUILT_IN_TM_LOAD_M128
363 || fcode == BUILT_IN_TM_LOAD_M256);
365 return false;
368 /* Return true if STMT is a TM store. */
370 static bool
371 is_tm_store (gimple stmt)
373 tree fndecl;
375 if (gimple_code (stmt) != GIMPLE_CALL)
376 return false;
378 fndecl = gimple_call_fndecl (stmt);
379 return (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
380 && BUILTIN_TM_STORE_P (DECL_FUNCTION_CODE (fndecl)));
383 /* Same as above, but for simple TM stores, that is, not the
384 after-write, after-read, etc optimized variants. */
386 static bool
387 is_tm_simple_store (gimple stmt)
389 tree fndecl;
391 if (gimple_code (stmt) != GIMPLE_CALL)
392 return false;
394 fndecl = gimple_call_fndecl (stmt);
395 if (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
397 enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
398 return (fcode == BUILT_IN_TM_STORE_1
399 || fcode == BUILT_IN_TM_STORE_2
400 || fcode == BUILT_IN_TM_STORE_4
401 || fcode == BUILT_IN_TM_STORE_8
402 || fcode == BUILT_IN_TM_STORE_FLOAT
403 || fcode == BUILT_IN_TM_STORE_DOUBLE
404 || fcode == BUILT_IN_TM_STORE_LDOUBLE
405 || fcode == BUILT_IN_TM_STORE_M64
406 || fcode == BUILT_IN_TM_STORE_M128
407 || fcode == BUILT_IN_TM_STORE_M256);
409 return false;
412 /* Return true if FNDECL is BUILT_IN_TM_ABORT. */
414 static bool
415 is_tm_abort (tree fndecl)
417 return (fndecl
418 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
419 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_TM_ABORT);
422 /* Build a GENERIC tree for a user abort. This is called by front ends
423 while transforming the __tm_abort statement. */
425 tree
426 build_tm_abort_call (location_t loc, bool is_outer)
428 return build_call_expr_loc (loc, builtin_decl_explicit (BUILT_IN_TM_ABORT), 1,
429 build_int_cst (integer_type_node,
430 AR_USERABORT
431 | (is_outer ? AR_OUTERABORT : 0)));
434 /* Common gateing function for several of the TM passes. */
436 static bool
437 gate_tm (void)
439 return flag_tm;
442 /* Map for aribtrary function replacement under TM, as created
443 by the tm_wrap attribute. */
445 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
446 htab_t tm_wrap_map;
448 void
449 record_tm_replacement (tree from, tree to)
451 struct tree_map **slot, *h;
453 /* Do not inline wrapper functions that will get replaced in the TM
454 pass.
456 Suppose you have foo() that will get replaced into tmfoo(). Make
457 sure the inliner doesn't try to outsmart us and inline foo()
458 before we get a chance to do the TM replacement. */
459 DECL_UNINLINABLE (from) = 1;
461 if (tm_wrap_map == NULL)
462 tm_wrap_map = htab_create_ggc (32, tree_map_hash, tree_map_eq, 0);
464 h = ggc_alloc_tree_map ();
465 h->hash = htab_hash_pointer (from);
466 h->base.from = from;
467 h->to = to;
469 slot = (struct tree_map **)
470 htab_find_slot_with_hash (tm_wrap_map, h, h->hash, INSERT);
471 *slot = h;
474 /* Return a TM-aware replacement function for DECL. */
476 static tree
477 find_tm_replacement_function (tree fndecl)
479 if (tm_wrap_map)
481 struct tree_map *h, in;
483 in.base.from = fndecl;
484 in.hash = htab_hash_pointer (fndecl);
485 h = (struct tree_map *) htab_find_with_hash (tm_wrap_map, &in, in.hash);
486 if (h)
487 return h->to;
490 /* ??? We may well want TM versions of most of the common <string.h>
491 functions. For now, we've already these two defined. */
492 /* Adjust expand_call_tm() attributes as necessary for the cases
493 handled here: */
494 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
495 switch (DECL_FUNCTION_CODE (fndecl))
497 case BUILT_IN_MEMCPY:
498 return builtin_decl_explicit (BUILT_IN_TM_MEMCPY);
499 case BUILT_IN_MEMMOVE:
500 return builtin_decl_explicit (BUILT_IN_TM_MEMMOVE);
501 case BUILT_IN_MEMSET:
502 return builtin_decl_explicit (BUILT_IN_TM_MEMSET);
503 default:
504 return NULL;
507 return NULL;
510 /* When appropriate, record TM replacement for memory allocation functions.
512 FROM is the FNDECL to wrap. */
513 void
514 tm_malloc_replacement (tree from)
516 const char *str;
517 tree to;
519 if (TREE_CODE (from) != FUNCTION_DECL)
520 return;
522 /* If we have a previous replacement, the user must be explicitly
523 wrapping malloc/calloc/free. They better know what they're
524 doing... */
525 if (find_tm_replacement_function (from))
526 return;
528 str = IDENTIFIER_POINTER (DECL_NAME (from));
530 if (!strcmp (str, "malloc"))
531 to = builtin_decl_explicit (BUILT_IN_TM_MALLOC);
532 else if (!strcmp (str, "calloc"))
533 to = builtin_decl_explicit (BUILT_IN_TM_CALLOC);
534 else if (!strcmp (str, "free"))
535 to = builtin_decl_explicit (BUILT_IN_TM_FREE);
536 else
537 return;
539 TREE_NOTHROW (to) = 0;
541 record_tm_replacement (from, to);
544 /* Diagnostics for tm_safe functions/regions. Called by the front end
545 once we've lowered the function to high-gimple. */
547 /* Subroutine of diagnose_tm_safe_errors, called through walk_gimple_seq.
548 Process exactly one statement. WI->INFO is set to non-null when in
549 the context of a tm_safe function, and null for a __transaction block. */
551 #define DIAG_TM_OUTER 1
552 #define DIAG_TM_SAFE 2
553 #define DIAG_TM_RELAXED 4
555 struct diagnose_tm
557 unsigned int summary_flags : 8;
558 unsigned int block_flags : 8;
559 unsigned int func_flags : 8;
560 unsigned int saw_volatile : 1;
561 gimple stmt;
564 /* Return true if T is a volatile variable of some kind. */
566 static bool
567 volatile_var_p (tree t)
569 return (SSA_VAR_P (t)
570 && TREE_THIS_VOLATILE (TREE_TYPE (t)));
573 /* Tree callback function for diagnose_tm pass. */
575 static tree
576 diagnose_tm_1_op (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED,
577 void *data)
579 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
580 struct diagnose_tm *d = (struct diagnose_tm *) wi->info;
582 if (volatile_var_p (*tp)
583 && d->block_flags & DIAG_TM_SAFE
584 && !d->saw_volatile)
586 d->saw_volatile = 1;
587 error_at (gimple_location (d->stmt),
588 "invalid volatile use of %qD inside transaction",
589 *tp);
592 return NULL_TREE;
595 static tree
596 diagnose_tm_1 (gimple_stmt_iterator *gsi, bool *handled_ops_p,
597 struct walk_stmt_info *wi)
599 gimple stmt = gsi_stmt (*gsi);
600 struct diagnose_tm *d = (struct diagnose_tm *) wi->info;
602 /* Save stmt for use in leaf analysis. */
603 d->stmt = stmt;
605 switch (gimple_code (stmt))
607 case GIMPLE_CALL:
609 tree fn = gimple_call_fn (stmt);
611 if ((d->summary_flags & DIAG_TM_OUTER) == 0
612 && is_tm_may_cancel_outer (fn))
613 error_at (gimple_location (stmt),
614 "%<transaction_may_cancel_outer%> function call not within"
615 " outer transaction or %<transaction_may_cancel_outer%>");
617 if (d->summary_flags & DIAG_TM_SAFE)
619 bool is_safe, direct_call_p;
620 tree replacement;
622 if (TREE_CODE (fn) == ADDR_EXPR
623 && TREE_CODE (TREE_OPERAND (fn, 0)) == FUNCTION_DECL)
625 direct_call_p = true;
626 replacement = TREE_OPERAND (fn, 0);
627 replacement = find_tm_replacement_function (replacement);
628 if (replacement)
629 fn = replacement;
631 else
633 direct_call_p = false;
634 replacement = NULL_TREE;
637 if (is_tm_safe_or_pure (fn))
638 is_safe = true;
639 else if (is_tm_callable (fn) || is_tm_irrevocable (fn))
641 /* A function explicitly marked transaction_callable as
642 opposed to transaction_safe is being defined to be
643 unsafe as part of its ABI, regardless of its contents. */
644 is_safe = false;
646 else if (direct_call_p)
648 if (flags_from_decl_or_type (fn) & ECF_TM_BUILTIN)
649 is_safe = true;
650 else if (replacement)
652 /* ??? At present we've been considering replacements
653 merely transaction_callable, and therefore might
654 enter irrevocable. The tm_wrap attribute has not
655 yet made it into the new language spec. */
656 is_safe = false;
658 else
660 /* ??? Diagnostics for unmarked direct calls moved into
661 the IPA pass. Section 3.2 of the spec details how
662 functions not marked should be considered "implicitly
663 safe" based on having examined the function body. */
664 is_safe = true;
667 else
669 /* An unmarked indirect call. Consider it unsafe even
670 though optimization may yet figure out how to inline. */
671 is_safe = false;
674 if (!is_safe)
676 if (TREE_CODE (fn) == ADDR_EXPR)
677 fn = TREE_OPERAND (fn, 0);
678 if (d->block_flags & DIAG_TM_SAFE)
680 if (direct_call_p)
681 error_at (gimple_location (stmt),
682 "unsafe function call %qD within "
683 "atomic transaction", fn);
684 else
686 if (!DECL_P (fn) || DECL_NAME (fn))
687 error_at (gimple_location (stmt),
688 "unsafe function call %qE within "
689 "atomic transaction", fn);
690 else
691 error_at (gimple_location (stmt),
692 "unsafe indirect function call within "
693 "atomic transaction");
696 else
698 if (direct_call_p)
699 error_at (gimple_location (stmt),
700 "unsafe function call %qD within "
701 "%<transaction_safe%> function", fn);
702 else
704 if (!DECL_P (fn) || DECL_NAME (fn))
705 error_at (gimple_location (stmt),
706 "unsafe function call %qE within "
707 "%<transaction_safe%> function", fn);
708 else
709 error_at (gimple_location (stmt),
710 "unsafe indirect function call within "
711 "%<transaction_safe%> function");
717 break;
719 case GIMPLE_ASM:
720 /* ??? We ought to come up with a way to add attributes to
721 asm statements, and then add "transaction_safe" to it.
722 Either that or get the language spec to resurrect __tm_waiver. */
723 if (d->block_flags & DIAG_TM_SAFE)
724 error_at (gimple_location (stmt),
725 "asm not allowed in atomic transaction");
726 else if (d->func_flags & DIAG_TM_SAFE)
727 error_at (gimple_location (stmt),
728 "asm not allowed in %<transaction_safe%> function");
729 break;
731 case GIMPLE_TRANSACTION:
733 unsigned char inner_flags = DIAG_TM_SAFE;
735 if (gimple_transaction_subcode (stmt) & GTMA_IS_RELAXED)
737 if (d->block_flags & DIAG_TM_SAFE)
738 error_at (gimple_location (stmt),
739 "relaxed transaction in atomic transaction");
740 else if (d->func_flags & DIAG_TM_SAFE)
741 error_at (gimple_location (stmt),
742 "relaxed transaction in %<transaction_safe%> function");
743 inner_flags = DIAG_TM_RELAXED;
745 else if (gimple_transaction_subcode (stmt) & GTMA_IS_OUTER)
747 if (d->block_flags)
748 error_at (gimple_location (stmt),
749 "outer transaction in transaction");
750 else if (d->func_flags & DIAG_TM_OUTER)
751 error_at (gimple_location (stmt),
752 "outer transaction in "
753 "%<transaction_may_cancel_outer%> function");
754 else if (d->func_flags & DIAG_TM_SAFE)
755 error_at (gimple_location (stmt),
756 "outer transaction in %<transaction_safe%> function");
757 inner_flags |= DIAG_TM_OUTER;
760 *handled_ops_p = true;
761 if (gimple_transaction_body (stmt))
763 struct walk_stmt_info wi_inner;
764 struct diagnose_tm d_inner;
766 memset (&d_inner, 0, sizeof (d_inner));
767 d_inner.func_flags = d->func_flags;
768 d_inner.block_flags = d->block_flags | inner_flags;
769 d_inner.summary_flags = d_inner.func_flags | d_inner.block_flags;
771 memset (&wi_inner, 0, sizeof (wi_inner));
772 wi_inner.info = &d_inner;
774 walk_gimple_seq (gimple_transaction_body (stmt),
775 diagnose_tm_1, diagnose_tm_1_op, &wi_inner);
778 break;
780 default:
781 break;
784 return NULL_TREE;
787 static unsigned int
788 diagnose_tm_blocks (void)
790 struct walk_stmt_info wi;
791 struct diagnose_tm d;
793 memset (&d, 0, sizeof (d));
794 if (is_tm_may_cancel_outer (current_function_decl))
795 d.func_flags = DIAG_TM_OUTER | DIAG_TM_SAFE;
796 else if (is_tm_safe (current_function_decl))
797 d.func_flags = DIAG_TM_SAFE;
798 d.summary_flags = d.func_flags;
800 memset (&wi, 0, sizeof (wi));
801 wi.info = &d;
803 walk_gimple_seq (gimple_body (current_function_decl),
804 diagnose_tm_1, diagnose_tm_1_op, &wi);
806 return 0;
809 namespace {
811 const pass_data pass_data_diagnose_tm_blocks =
813 GIMPLE_PASS, /* type */
814 "*diagnose_tm_blocks", /* name */
815 OPTGROUP_NONE, /* optinfo_flags */
816 true, /* has_gate */
817 true, /* has_execute */
818 TV_TRANS_MEM, /* tv_id */
819 PROP_gimple_any, /* properties_required */
820 0, /* properties_provided */
821 0, /* properties_destroyed */
822 0, /* todo_flags_start */
823 0, /* todo_flags_finish */
826 class pass_diagnose_tm_blocks : public gimple_opt_pass
828 public:
829 pass_diagnose_tm_blocks (gcc::context *ctxt)
830 : gimple_opt_pass (pass_data_diagnose_tm_blocks, ctxt)
833 /* opt_pass methods: */
834 bool gate () { return gate_tm (); }
835 unsigned int execute () { return diagnose_tm_blocks (); }
837 }; // class pass_diagnose_tm_blocks
839 } // anon namespace
841 gimple_opt_pass *
842 make_pass_diagnose_tm_blocks (gcc::context *ctxt)
844 return new pass_diagnose_tm_blocks (ctxt);
847 /* Instead of instrumenting thread private memory, we save the
848 addresses in a log which we later use to save/restore the addresses
849 upon transaction start/restart.
851 The log is keyed by address, where each element contains individual
852 statements among different code paths that perform the store.
854 This log is later used to generate either plain save/restore of the
855 addresses upon transaction start/restart, or calls to the ITM_L*
856 logging functions.
858 So for something like:
860 struct large { int x[1000]; };
861 struct large lala = { 0 };
862 __transaction {
863 lala.x[i] = 123;
867 We can either save/restore:
869 lala = { 0 };
870 trxn = _ITM_startTransaction ();
871 if (trxn & a_saveLiveVariables)
872 tmp_lala1 = lala.x[i];
873 else if (a & a_restoreLiveVariables)
874 lala.x[i] = tmp_lala1;
876 or use the logging functions:
878 lala = { 0 };
879 trxn = _ITM_startTransaction ();
880 _ITM_LU4 (&lala.x[i]);
882 Obviously, if we use _ITM_L* to log, we prefer to call _ITM_L* as
883 far up the dominator tree to shadow all of the writes to a given
884 location (thus reducing the total number of logging calls), but not
885 so high as to be called on a path that does not perform a
886 write. */
888 /* One individual log entry. We may have multiple statements for the
889 same location if neither dominate each other (on different
890 execution paths). */
891 typedef struct tm_log_entry
893 /* Address to save. */
894 tree addr;
895 /* Entry block for the transaction this address occurs in. */
896 basic_block entry_block;
897 /* Dominating statements the store occurs in. */
898 gimple_vec stmts;
899 /* Initially, while we are building the log, we place a nonzero
900 value here to mean that this address *will* be saved with a
901 save/restore sequence. Later, when generating the save sequence
902 we place the SSA temp generated here. */
903 tree save_var;
904 } *tm_log_entry_t;
907 /* Log entry hashtable helpers. */
909 struct log_entry_hasher
911 typedef tm_log_entry value_type;
912 typedef tm_log_entry compare_type;
913 static inline hashval_t hash (const value_type *);
914 static inline bool equal (const value_type *, const compare_type *);
915 static inline void remove (value_type *);
918 /* Htab support. Return hash value for a `tm_log_entry'. */
919 inline hashval_t
920 log_entry_hasher::hash (const value_type *log)
922 return iterative_hash_expr (log->addr, 0);
925 /* Htab support. Return true if two log entries are the same. */
926 inline bool
927 log_entry_hasher::equal (const value_type *log1, const compare_type *log2)
929 /* FIXME:
931 rth: I suggest that we get rid of the component refs etc.
932 I.e. resolve the reference to base + offset.
934 We may need to actually finish a merge with mainline for this,
935 since we'd like to be presented with Richi's MEM_REF_EXPRs more
936 often than not. But in the meantime your tm_log_entry could save
937 the results of get_inner_reference.
939 See: g++.dg/tm/pr46653.C
942 /* Special case plain equality because operand_equal_p() below will
943 return FALSE if the addresses are equal but they have
944 side-effects (e.g. a volatile address). */
945 if (log1->addr == log2->addr)
946 return true;
948 return operand_equal_p (log1->addr, log2->addr, 0);
951 /* Htab support. Free one tm_log_entry. */
952 inline void
953 log_entry_hasher::remove (value_type *lp)
955 lp->stmts.release ();
956 free (lp);
960 /* The actual log. */
961 static hash_table <log_entry_hasher> tm_log;
963 /* Addresses to log with a save/restore sequence. These should be in
964 dominator order. */
965 static vec<tree> tm_log_save_addresses;
967 enum thread_memory_type
969 mem_non_local = 0,
970 mem_thread_local,
971 mem_transaction_local,
972 mem_max
975 typedef struct tm_new_mem_map
977 /* SSA_NAME being dereferenced. */
978 tree val;
979 enum thread_memory_type local_new_memory;
980 } tm_new_mem_map_t;
982 /* Hashtable helpers. */
984 struct tm_mem_map_hasher : typed_free_remove <tm_new_mem_map_t>
986 typedef tm_new_mem_map_t value_type;
987 typedef tm_new_mem_map_t compare_type;
988 static inline hashval_t hash (const value_type *);
989 static inline bool equal (const value_type *, const compare_type *);
992 inline hashval_t
993 tm_mem_map_hasher::hash (const value_type *v)
995 return (intptr_t)v->val >> 4;
998 inline bool
999 tm_mem_map_hasher::equal (const value_type *v, const compare_type *c)
1001 return v->val == c->val;
1004 /* Map for an SSA_NAME originally pointing to a non aliased new piece
1005 of memory (malloc, alloc, etc). */
1006 static hash_table <tm_mem_map_hasher> tm_new_mem_hash;
1008 /* Initialize logging data structures. */
1009 static void
1010 tm_log_init (void)
1012 tm_log.create (10);
1013 tm_new_mem_hash.create (5);
1014 tm_log_save_addresses.create (5);
1017 /* Free logging data structures. */
1018 static void
1019 tm_log_delete (void)
1021 tm_log.dispose ();
1022 tm_new_mem_hash.dispose ();
1023 tm_log_save_addresses.release ();
1026 /* Return true if MEM is a transaction invariant memory for the TM
1027 region starting at REGION_ENTRY_BLOCK. */
1028 static bool
1029 transaction_invariant_address_p (const_tree mem, basic_block region_entry_block)
1031 if ((TREE_CODE (mem) == INDIRECT_REF || TREE_CODE (mem) == MEM_REF)
1032 && TREE_CODE (TREE_OPERAND (mem, 0)) == SSA_NAME)
1034 basic_block def_bb;
1036 def_bb = gimple_bb (SSA_NAME_DEF_STMT (TREE_OPERAND (mem, 0)));
1037 return def_bb != region_entry_block
1038 && dominated_by_p (CDI_DOMINATORS, region_entry_block, def_bb);
1041 mem = strip_invariant_refs (mem);
1042 return mem && (CONSTANT_CLASS_P (mem) || decl_address_invariant_p (mem));
1045 /* Given an address ADDR in STMT, find it in the memory log or add it,
1046 making sure to keep only the addresses highest in the dominator
1047 tree.
1049 ENTRY_BLOCK is the entry_block for the transaction.
1051 If we find the address in the log, make sure it's either the same
1052 address, or an equivalent one that dominates ADDR.
1054 If we find the address, but neither ADDR dominates the found
1055 address, nor the found one dominates ADDR, we're on different
1056 execution paths. Add it.
1058 If known, ENTRY_BLOCK is the entry block for the region, otherwise
1059 NULL. */
1060 static void
1061 tm_log_add (basic_block entry_block, tree addr, gimple stmt)
1063 tm_log_entry **slot;
1064 struct tm_log_entry l, *lp;
1066 l.addr = addr;
1067 slot = tm_log.find_slot (&l, INSERT);
1068 if (!*slot)
1070 tree type = TREE_TYPE (addr);
1072 lp = XNEW (struct tm_log_entry);
1073 lp->addr = addr;
1074 *slot = lp;
1076 /* Small invariant addresses can be handled as save/restores. */
1077 if (entry_block
1078 && transaction_invariant_address_p (lp->addr, entry_block)
1079 && TYPE_SIZE_UNIT (type) != NULL
1080 && host_integerp (TYPE_SIZE_UNIT (type), 1)
1081 && (tree_low_cst (TYPE_SIZE_UNIT (type), 1)
1082 < PARAM_VALUE (PARAM_TM_MAX_AGGREGATE_SIZE))
1083 /* We must be able to copy this type normally. I.e., no
1084 special constructors and the like. */
1085 && !TREE_ADDRESSABLE (type))
1087 lp->save_var = create_tmp_reg (TREE_TYPE (lp->addr), "tm_save");
1088 lp->stmts.create (0);
1089 lp->entry_block = entry_block;
1090 /* Save addresses separately in dominator order so we don't
1091 get confused by overlapping addresses in the save/restore
1092 sequence. */
1093 tm_log_save_addresses.safe_push (lp->addr);
1095 else
1097 /* Use the logging functions. */
1098 lp->stmts.create (5);
1099 lp->stmts.quick_push (stmt);
1100 lp->save_var = NULL;
1103 else
1105 size_t i;
1106 gimple oldstmt;
1108 lp = *slot;
1110 /* If we're generating a save/restore sequence, we don't care
1111 about statements. */
1112 if (lp->save_var)
1113 return;
1115 for (i = 0; lp->stmts.iterate (i, &oldstmt); ++i)
1117 if (stmt == oldstmt)
1118 return;
1119 /* We already have a store to the same address, higher up the
1120 dominator tree. Nothing to do. */
1121 if (dominated_by_p (CDI_DOMINATORS,
1122 gimple_bb (stmt), gimple_bb (oldstmt)))
1123 return;
1124 /* We should be processing blocks in dominator tree order. */
1125 gcc_assert (!dominated_by_p (CDI_DOMINATORS,
1126 gimple_bb (oldstmt), gimple_bb (stmt)));
1128 /* Store is on a different code path. */
1129 lp->stmts.safe_push (stmt);
1133 /* Gimplify the address of a TARGET_MEM_REF. Return the SSA_NAME
1134 result, insert the new statements before GSI. */
1136 static tree
1137 gimplify_addr (gimple_stmt_iterator *gsi, tree x)
1139 if (TREE_CODE (x) == TARGET_MEM_REF)
1140 x = tree_mem_ref_addr (build_pointer_type (TREE_TYPE (x)), x);
1141 else
1142 x = build_fold_addr_expr (x);
1143 return force_gimple_operand_gsi (gsi, x, true, NULL, true, GSI_SAME_STMT);
1146 /* Instrument one address with the logging functions.
1147 ADDR is the address to save.
1148 STMT is the statement before which to place it. */
1149 static void
1150 tm_log_emit_stmt (tree addr, gimple stmt)
1152 tree type = TREE_TYPE (addr);
1153 tree size = TYPE_SIZE_UNIT (type);
1154 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1155 gimple log;
1156 enum built_in_function code = BUILT_IN_TM_LOG;
1158 if (type == float_type_node)
1159 code = BUILT_IN_TM_LOG_FLOAT;
1160 else if (type == double_type_node)
1161 code = BUILT_IN_TM_LOG_DOUBLE;
1162 else if (type == long_double_type_node)
1163 code = BUILT_IN_TM_LOG_LDOUBLE;
1164 else if (host_integerp (size, 1))
1166 unsigned int n = tree_low_cst (size, 1);
1167 switch (n)
1169 case 1:
1170 code = BUILT_IN_TM_LOG_1;
1171 break;
1172 case 2:
1173 code = BUILT_IN_TM_LOG_2;
1174 break;
1175 case 4:
1176 code = BUILT_IN_TM_LOG_4;
1177 break;
1178 case 8:
1179 code = BUILT_IN_TM_LOG_8;
1180 break;
1181 default:
1182 code = BUILT_IN_TM_LOG;
1183 if (TREE_CODE (type) == VECTOR_TYPE)
1185 if (n == 8 && builtin_decl_explicit (BUILT_IN_TM_LOG_M64))
1186 code = BUILT_IN_TM_LOG_M64;
1187 else if (n == 16 && builtin_decl_explicit (BUILT_IN_TM_LOG_M128))
1188 code = BUILT_IN_TM_LOG_M128;
1189 else if (n == 32 && builtin_decl_explicit (BUILT_IN_TM_LOG_M256))
1190 code = BUILT_IN_TM_LOG_M256;
1192 break;
1196 addr = gimplify_addr (&gsi, addr);
1197 if (code == BUILT_IN_TM_LOG)
1198 log = gimple_build_call (builtin_decl_explicit (code), 2, addr, size);
1199 else
1200 log = gimple_build_call (builtin_decl_explicit (code), 1, addr);
1201 gsi_insert_before (&gsi, log, GSI_SAME_STMT);
1204 /* Go through the log and instrument address that must be instrumented
1205 with the logging functions. Leave the save/restore addresses for
1206 later. */
1207 static void
1208 tm_log_emit (void)
1210 hash_table <log_entry_hasher>::iterator hi;
1211 struct tm_log_entry *lp;
1213 FOR_EACH_HASH_TABLE_ELEMENT (tm_log, lp, tm_log_entry_t, hi)
1215 size_t i;
1216 gimple stmt;
1218 if (dump_file)
1220 fprintf (dump_file, "TM thread private mem logging: ");
1221 print_generic_expr (dump_file, lp->addr, 0);
1222 fprintf (dump_file, "\n");
1225 if (lp->save_var)
1227 if (dump_file)
1228 fprintf (dump_file, "DUMPING to variable\n");
1229 continue;
1231 else
1233 if (dump_file)
1234 fprintf (dump_file, "DUMPING with logging functions\n");
1235 for (i = 0; lp->stmts.iterate (i, &stmt); ++i)
1236 tm_log_emit_stmt (lp->addr, stmt);
1241 /* Emit the save sequence for the corresponding addresses in the log.
1242 ENTRY_BLOCK is the entry block for the transaction.
1243 BB is the basic block to insert the code in. */
1244 static void
1245 tm_log_emit_saves (basic_block entry_block, basic_block bb)
1247 size_t i;
1248 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1249 gimple stmt;
1250 struct tm_log_entry l, *lp;
1252 for (i = 0; i < tm_log_save_addresses.length (); ++i)
1254 l.addr = tm_log_save_addresses[i];
1255 lp = *(tm_log.find_slot (&l, NO_INSERT));
1256 gcc_assert (lp->save_var != NULL);
1258 /* We only care about variables in the current transaction. */
1259 if (lp->entry_block != entry_block)
1260 continue;
1262 stmt = gimple_build_assign (lp->save_var, unshare_expr (lp->addr));
1264 /* Make sure we can create an SSA_NAME for this type. For
1265 instance, aggregates aren't allowed, in which case the system
1266 will create a VOP for us and everything will just work. */
1267 if (is_gimple_reg_type (TREE_TYPE (lp->save_var)))
1269 lp->save_var = make_ssa_name (lp->save_var, stmt);
1270 gimple_assign_set_lhs (stmt, lp->save_var);
1273 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1277 /* Emit the restore sequence for the corresponding addresses in the log.
1278 ENTRY_BLOCK is the entry block for the transaction.
1279 BB is the basic block to insert the code in. */
1280 static void
1281 tm_log_emit_restores (basic_block entry_block, basic_block bb)
1283 int i;
1284 struct tm_log_entry l, *lp;
1285 gimple_stmt_iterator gsi;
1286 gimple stmt;
1288 for (i = tm_log_save_addresses.length () - 1; i >= 0; i--)
1290 l.addr = tm_log_save_addresses[i];
1291 lp = *(tm_log.find_slot (&l, NO_INSERT));
1292 gcc_assert (lp->save_var != NULL);
1294 /* We only care about variables in the current transaction. */
1295 if (lp->entry_block != entry_block)
1296 continue;
1298 /* Restores are in LIFO order from the saves in case we have
1299 overlaps. */
1300 gsi = gsi_start_bb (bb);
1302 stmt = gimple_build_assign (unshare_expr (lp->addr), lp->save_var);
1303 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1308 static tree lower_sequence_tm (gimple_stmt_iterator *, bool *,
1309 struct walk_stmt_info *);
1310 static tree lower_sequence_no_tm (gimple_stmt_iterator *, bool *,
1311 struct walk_stmt_info *);
1313 /* Evaluate an address X being dereferenced and determine if it
1314 originally points to a non aliased new chunk of memory (malloc,
1315 alloca, etc).
1317 Return MEM_THREAD_LOCAL if it points to a thread-local address.
1318 Return MEM_TRANSACTION_LOCAL if it points to a transaction-local address.
1319 Return MEM_NON_LOCAL otherwise.
1321 ENTRY_BLOCK is the entry block to the transaction containing the
1322 dereference of X. */
1323 static enum thread_memory_type
1324 thread_private_new_memory (basic_block entry_block, tree x)
1326 gimple stmt = NULL;
1327 enum tree_code code;
1328 tm_new_mem_map_t **slot;
1329 tm_new_mem_map_t elt, *elt_p;
1330 tree val = x;
1331 enum thread_memory_type retval = mem_transaction_local;
1333 if (!entry_block
1334 || TREE_CODE (x) != SSA_NAME
1335 /* Possible uninitialized use, or a function argument. In
1336 either case, we don't care. */
1337 || SSA_NAME_IS_DEFAULT_DEF (x))
1338 return mem_non_local;
1340 /* Look in cache first. */
1341 elt.val = x;
1342 slot = tm_new_mem_hash.find_slot (&elt, INSERT);
1343 elt_p = *slot;
1344 if (elt_p)
1345 return elt_p->local_new_memory;
1347 /* Optimistically assume the memory is transaction local during
1348 processing. This catches recursion into this variable. */
1349 *slot = elt_p = XNEW (tm_new_mem_map_t);
1350 elt_p->val = val;
1351 elt_p->local_new_memory = mem_transaction_local;
1353 /* Search DEF chain to find the original definition of this address. */
1356 if (ptr_deref_may_alias_global_p (x))
1358 /* Address escapes. This is not thread-private. */
1359 retval = mem_non_local;
1360 goto new_memory_ret;
1363 stmt = SSA_NAME_DEF_STMT (x);
1365 /* If the malloc call is outside the transaction, this is
1366 thread-local. */
1367 if (retval != mem_thread_local
1368 && !dominated_by_p (CDI_DOMINATORS, gimple_bb (stmt), entry_block))
1369 retval = mem_thread_local;
1371 if (is_gimple_assign (stmt))
1373 code = gimple_assign_rhs_code (stmt);
1374 /* x = foo ==> foo */
1375 if (code == SSA_NAME)
1376 x = gimple_assign_rhs1 (stmt);
1377 /* x = foo + n ==> foo */
1378 else if (code == POINTER_PLUS_EXPR)
1379 x = gimple_assign_rhs1 (stmt);
1380 /* x = (cast*) foo ==> foo */
1381 else if (code == VIEW_CONVERT_EXPR || code == NOP_EXPR)
1382 x = gimple_assign_rhs1 (stmt);
1383 /* x = c ? op1 : op2 == > op1 or op2 just like a PHI */
1384 else if (code == COND_EXPR)
1386 tree op1 = gimple_assign_rhs2 (stmt);
1387 tree op2 = gimple_assign_rhs3 (stmt);
1388 enum thread_memory_type mem;
1389 retval = thread_private_new_memory (entry_block, op1);
1390 if (retval == mem_non_local)
1391 goto new_memory_ret;
1392 mem = thread_private_new_memory (entry_block, op2);
1393 retval = MIN (retval, mem);
1394 goto new_memory_ret;
1396 else
1398 retval = mem_non_local;
1399 goto new_memory_ret;
1402 else
1404 if (gimple_code (stmt) == GIMPLE_PHI)
1406 unsigned int i;
1407 enum thread_memory_type mem;
1408 tree phi_result = gimple_phi_result (stmt);
1410 /* If any of the ancestors are non-local, we are sure to
1411 be non-local. Otherwise we can avoid doing anything
1412 and inherit what has already been generated. */
1413 retval = mem_max;
1414 for (i = 0; i < gimple_phi_num_args (stmt); ++i)
1416 tree op = PHI_ARG_DEF (stmt, i);
1418 /* Exclude self-assignment. */
1419 if (phi_result == op)
1420 continue;
1422 mem = thread_private_new_memory (entry_block, op);
1423 if (mem == mem_non_local)
1425 retval = mem;
1426 goto new_memory_ret;
1428 retval = MIN (retval, mem);
1430 goto new_memory_ret;
1432 break;
1435 while (TREE_CODE (x) == SSA_NAME);
1437 if (stmt && is_gimple_call (stmt) && gimple_call_flags (stmt) & ECF_MALLOC)
1438 /* Thread-local or transaction-local. */
1440 else
1441 retval = mem_non_local;
1443 new_memory_ret:
1444 elt_p->local_new_memory = retval;
1445 return retval;
1448 /* Determine whether X has to be instrumented using a read
1449 or write barrier.
1451 ENTRY_BLOCK is the entry block for the region where stmt resides
1452 in. NULL if unknown.
1454 STMT is the statement in which X occurs in. It is used for thread
1455 private memory instrumentation. If no TPM instrumentation is
1456 desired, STMT should be null. */
1457 static bool
1458 requires_barrier (basic_block entry_block, tree x, gimple stmt)
1460 tree orig = x;
1461 while (handled_component_p (x))
1462 x = TREE_OPERAND (x, 0);
1464 switch (TREE_CODE (x))
1466 case INDIRECT_REF:
1467 case MEM_REF:
1469 enum thread_memory_type ret;
1471 ret = thread_private_new_memory (entry_block, TREE_OPERAND (x, 0));
1472 if (ret == mem_non_local)
1473 return true;
1474 if (stmt && ret == mem_thread_local)
1475 /* ?? Should we pass `orig', or the INDIRECT_REF X. ?? */
1476 tm_log_add (entry_block, orig, stmt);
1478 /* Transaction-locals require nothing at all. For malloc, a
1479 transaction restart frees the memory and we reallocate.
1480 For alloca, the stack pointer gets reset by the retry and
1481 we reallocate. */
1482 return false;
1485 case TARGET_MEM_REF:
1486 if (TREE_CODE (TMR_BASE (x)) != ADDR_EXPR)
1487 return true;
1488 x = TREE_OPERAND (TMR_BASE (x), 0);
1489 if (TREE_CODE (x) == PARM_DECL)
1490 return false;
1491 gcc_assert (TREE_CODE (x) == VAR_DECL);
1492 /* FALLTHRU */
1494 case PARM_DECL:
1495 case RESULT_DECL:
1496 case VAR_DECL:
1497 if (DECL_BY_REFERENCE (x))
1499 /* ??? This value is a pointer, but aggregate_value_p has been
1500 jigged to return true which confuses needs_to_live_in_memory.
1501 This ought to be cleaned up generically.
1503 FIXME: Verify this still happens after the next mainline
1504 merge. Testcase ie g++.dg/tm/pr47554.C.
1506 return false;
1509 if (is_global_var (x))
1510 return !TREE_READONLY (x);
1511 if (/* FIXME: This condition should actually go below in the
1512 tm_log_add() call, however is_call_clobbered() depends on
1513 aliasing info which is not available during
1514 gimplification. Since requires_barrier() gets called
1515 during lower_sequence_tm/gimplification, leave the call
1516 to needs_to_live_in_memory until we eliminate
1517 lower_sequence_tm altogether. */
1518 needs_to_live_in_memory (x))
1519 return true;
1520 else
1522 /* For local memory that doesn't escape (aka thread private
1523 memory), we can either save the value at the beginning of
1524 the transaction and restore on restart, or call a tm
1525 function to dynamically save and restore on restart
1526 (ITM_L*). */
1527 if (stmt)
1528 tm_log_add (entry_block, orig, stmt);
1529 return false;
1532 default:
1533 return false;
1537 /* Mark the GIMPLE_ASSIGN statement as appropriate for being inside
1538 a transaction region. */
1540 static void
1541 examine_assign_tm (unsigned *state, gimple_stmt_iterator *gsi)
1543 gimple stmt = gsi_stmt (*gsi);
1545 if (requires_barrier (/*entry_block=*/NULL, gimple_assign_rhs1 (stmt), NULL))
1546 *state |= GTMA_HAVE_LOAD;
1547 if (requires_barrier (/*entry_block=*/NULL, gimple_assign_lhs (stmt), NULL))
1548 *state |= GTMA_HAVE_STORE;
1551 /* Mark a GIMPLE_CALL as appropriate for being inside a transaction. */
1553 static void
1554 examine_call_tm (unsigned *state, gimple_stmt_iterator *gsi)
1556 gimple stmt = gsi_stmt (*gsi);
1557 tree fn;
1559 if (is_tm_pure_call (stmt))
1560 return;
1562 /* Check if this call is a transaction abort. */
1563 fn = gimple_call_fndecl (stmt);
1564 if (is_tm_abort (fn))
1565 *state |= GTMA_HAVE_ABORT;
1567 /* Note that something may happen. */
1568 *state |= GTMA_HAVE_LOAD | GTMA_HAVE_STORE;
1571 /* Lower a GIMPLE_TRANSACTION statement. */
1573 static void
1574 lower_transaction (gimple_stmt_iterator *gsi, struct walk_stmt_info *wi)
1576 gimple g, stmt = gsi_stmt (*gsi);
1577 unsigned int *outer_state = (unsigned int *) wi->info;
1578 unsigned int this_state = 0;
1579 struct walk_stmt_info this_wi;
1581 /* First, lower the body. The scanning that we do inside gives
1582 us some idea of what we're dealing with. */
1583 memset (&this_wi, 0, sizeof (this_wi));
1584 this_wi.info = (void *) &this_state;
1585 walk_gimple_seq_mod (gimple_transaction_body_ptr (stmt),
1586 lower_sequence_tm, NULL, &this_wi);
1588 /* If there was absolutely nothing transaction related inside the
1589 transaction, we may elide it. Likewise if this is a nested
1590 transaction and does not contain an abort. */
1591 if (this_state == 0
1592 || (!(this_state & GTMA_HAVE_ABORT) && outer_state != NULL))
1594 if (outer_state)
1595 *outer_state |= this_state;
1597 gsi_insert_seq_before (gsi, gimple_transaction_body (stmt),
1598 GSI_SAME_STMT);
1599 gimple_transaction_set_body (stmt, NULL);
1601 gsi_remove (gsi, true);
1602 wi->removed_stmt = true;
1603 return;
1606 /* Wrap the body of the transaction in a try-finally node so that
1607 the commit call is always properly called. */
1608 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_TM_COMMIT), 0);
1609 if (flag_exceptions)
1611 tree ptr;
1612 gimple_seq n_seq, e_seq;
1614 n_seq = gimple_seq_alloc_with_stmt (g);
1615 e_seq = NULL;
1617 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_EH_POINTER),
1618 1, integer_zero_node);
1619 ptr = create_tmp_var (ptr_type_node, NULL);
1620 gimple_call_set_lhs (g, ptr);
1621 gimple_seq_add_stmt (&e_seq, g);
1623 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_TM_COMMIT_EH),
1624 1, ptr);
1625 gimple_seq_add_stmt (&e_seq, g);
1627 g = gimple_build_eh_else (n_seq, e_seq);
1630 g = gimple_build_try (gimple_transaction_body (stmt),
1631 gimple_seq_alloc_with_stmt (g), GIMPLE_TRY_FINALLY);
1632 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
1634 gimple_transaction_set_body (stmt, NULL);
1636 /* If the transaction calls abort or if this is an outer transaction,
1637 add an "over" label afterwards. */
1638 if ((this_state & (GTMA_HAVE_ABORT))
1639 || (gimple_transaction_subcode (stmt) & GTMA_IS_OUTER))
1641 tree label = create_artificial_label (UNKNOWN_LOCATION);
1642 gimple_transaction_set_label (stmt, label);
1643 gsi_insert_after (gsi, gimple_build_label (label), GSI_CONTINUE_LINKING);
1646 /* Record the set of operations found for use later. */
1647 this_state |= gimple_transaction_subcode (stmt) & GTMA_DECLARATION_MASK;
1648 gimple_transaction_set_subcode (stmt, this_state);
1651 /* Iterate through the statements in the sequence, lowering them all
1652 as appropriate for being in a transaction. */
1654 static tree
1655 lower_sequence_tm (gimple_stmt_iterator *gsi, bool *handled_ops_p,
1656 struct walk_stmt_info *wi)
1658 unsigned int *state = (unsigned int *) wi->info;
1659 gimple stmt = gsi_stmt (*gsi);
1661 *handled_ops_p = true;
1662 switch (gimple_code (stmt))
1664 case GIMPLE_ASSIGN:
1665 /* Only memory reads/writes need to be instrumented. */
1666 if (gimple_assign_single_p (stmt))
1667 examine_assign_tm (state, gsi);
1668 break;
1670 case GIMPLE_CALL:
1671 examine_call_tm (state, gsi);
1672 break;
1674 case GIMPLE_ASM:
1675 *state |= GTMA_MAY_ENTER_IRREVOCABLE;
1676 break;
1678 case GIMPLE_TRANSACTION:
1679 lower_transaction (gsi, wi);
1680 break;
1682 default:
1683 *handled_ops_p = !gimple_has_substatements (stmt);
1684 break;
1687 return NULL_TREE;
1690 /* Iterate through the statements in the sequence, lowering them all
1691 as appropriate for being outside of a transaction. */
1693 static tree
1694 lower_sequence_no_tm (gimple_stmt_iterator *gsi, bool *handled_ops_p,
1695 struct walk_stmt_info * wi)
1697 gimple stmt = gsi_stmt (*gsi);
1699 if (gimple_code (stmt) == GIMPLE_TRANSACTION)
1701 *handled_ops_p = true;
1702 lower_transaction (gsi, wi);
1704 else
1705 *handled_ops_p = !gimple_has_substatements (stmt);
1707 return NULL_TREE;
1710 /* Main entry point for flattening GIMPLE_TRANSACTION constructs. After
1711 this, GIMPLE_TRANSACTION nodes still exist, but the nested body has
1712 been moved out, and all the data required for constructing a proper
1713 CFG has been recorded. */
1715 static unsigned int
1716 execute_lower_tm (void)
1718 struct walk_stmt_info wi;
1719 gimple_seq body;
1721 /* Transactional clones aren't created until a later pass. */
1722 gcc_assert (!decl_is_tm_clone (current_function_decl));
1724 body = gimple_body (current_function_decl);
1725 memset (&wi, 0, sizeof (wi));
1726 walk_gimple_seq_mod (&body, lower_sequence_no_tm, NULL, &wi);
1727 gimple_set_body (current_function_decl, body);
1729 return 0;
1732 namespace {
1734 const pass_data pass_data_lower_tm =
1736 GIMPLE_PASS, /* type */
1737 "tmlower", /* name */
1738 OPTGROUP_NONE, /* optinfo_flags */
1739 true, /* has_gate */
1740 true, /* has_execute */
1741 TV_TRANS_MEM, /* tv_id */
1742 PROP_gimple_lcf, /* properties_required */
1743 0, /* properties_provided */
1744 0, /* properties_destroyed */
1745 0, /* todo_flags_start */
1746 0, /* todo_flags_finish */
1749 class pass_lower_tm : public gimple_opt_pass
1751 public:
1752 pass_lower_tm (gcc::context *ctxt)
1753 : gimple_opt_pass (pass_data_lower_tm, ctxt)
1756 /* opt_pass methods: */
1757 bool gate () { return gate_tm (); }
1758 unsigned int execute () { return execute_lower_tm (); }
1760 }; // class pass_lower_tm
1762 } // anon namespace
1764 gimple_opt_pass *
1765 make_pass_lower_tm (gcc::context *ctxt)
1767 return new pass_lower_tm (ctxt);
1770 /* Collect region information for each transaction. */
1772 struct tm_region
1774 /* Link to the next unnested transaction. */
1775 struct tm_region *next;
1777 /* Link to the next inner transaction. */
1778 struct tm_region *inner;
1780 /* Link to the next outer transaction. */
1781 struct tm_region *outer;
1783 /* The GIMPLE_TRANSACTION statement beginning this transaction.
1784 After TM_MARK, this gets replaced by a call to
1785 BUILT_IN_TM_START. */
1786 gimple transaction_stmt;
1788 /* After TM_MARK expands the GIMPLE_TRANSACTION into a call to
1789 BUILT_IN_TM_START, this field is true if the transaction is an
1790 outer transaction. */
1791 bool original_transaction_was_outer;
1793 /* Return value from BUILT_IN_TM_START. */
1794 tree tm_state;
1796 /* The entry block to this region. This will always be the first
1797 block of the body of the transaction. */
1798 basic_block entry_block;
1800 /* The first block after an expanded call to _ITM_beginTransaction. */
1801 basic_block restart_block;
1803 /* The set of all blocks that end the region; NULL if only EXIT_BLOCK.
1804 These blocks are still a part of the region (i.e., the border is
1805 inclusive). Note that this set is only complete for paths in the CFG
1806 starting at ENTRY_BLOCK, and that there is no exit block recorded for
1807 the edge to the "over" label. */
1808 bitmap exit_blocks;
1810 /* The set of all blocks that have an TM_IRREVOCABLE call. */
1811 bitmap irr_blocks;
1814 typedef struct tm_region *tm_region_p;
1816 /* True if there are pending edge statements to be committed for the
1817 current function being scanned in the tmmark pass. */
1818 bool pending_edge_inserts_p;
1820 static struct tm_region *all_tm_regions;
1821 static bitmap_obstack tm_obstack;
1824 /* A subroutine of tm_region_init. Record the existence of the
1825 GIMPLE_TRANSACTION statement in a tree of tm_region elements. */
1827 static struct tm_region *
1828 tm_region_init_0 (struct tm_region *outer, basic_block bb, gimple stmt)
1830 struct tm_region *region;
1832 region = (struct tm_region *)
1833 obstack_alloc (&tm_obstack.obstack, sizeof (struct tm_region));
1835 if (outer)
1837 region->next = outer->inner;
1838 outer->inner = region;
1840 else
1842 region->next = all_tm_regions;
1843 all_tm_regions = region;
1845 region->inner = NULL;
1846 region->outer = outer;
1848 region->transaction_stmt = stmt;
1849 region->original_transaction_was_outer = false;
1850 region->tm_state = NULL;
1852 /* There are either one or two edges out of the block containing
1853 the GIMPLE_TRANSACTION, one to the actual region and one to the
1854 "over" label if the region contains an abort. The former will
1855 always be the one marked FALLTHRU. */
1856 region->entry_block = FALLTHRU_EDGE (bb)->dest;
1858 region->exit_blocks = BITMAP_ALLOC (&tm_obstack);
1859 region->irr_blocks = BITMAP_ALLOC (&tm_obstack);
1861 return region;
1864 /* A subroutine of tm_region_init. Record all the exit and
1865 irrevocable blocks in BB into the region's exit_blocks and
1866 irr_blocks bitmaps. Returns the new region being scanned. */
1868 static struct tm_region *
1869 tm_region_init_1 (struct tm_region *region, basic_block bb)
1871 gimple_stmt_iterator gsi;
1872 gimple g;
1874 if (!region
1875 || (!region->irr_blocks && !region->exit_blocks))
1876 return region;
1878 /* Check to see if this is the end of a region by seeing if it
1879 contains a call to __builtin_tm_commit{,_eh}. Note that the
1880 outermost region for DECL_IS_TM_CLONE need not collect this. */
1881 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
1883 g = gsi_stmt (gsi);
1884 if (gimple_code (g) == GIMPLE_CALL)
1886 tree fn = gimple_call_fndecl (g);
1887 if (fn && DECL_BUILT_IN_CLASS (fn) == BUILT_IN_NORMAL)
1889 if ((DECL_FUNCTION_CODE (fn) == BUILT_IN_TM_COMMIT
1890 || DECL_FUNCTION_CODE (fn) == BUILT_IN_TM_COMMIT_EH)
1891 && region->exit_blocks)
1893 bitmap_set_bit (region->exit_blocks, bb->index);
1894 region = region->outer;
1895 break;
1897 if (DECL_FUNCTION_CODE (fn) == BUILT_IN_TM_IRREVOCABLE)
1898 bitmap_set_bit (region->irr_blocks, bb->index);
1902 return region;
1905 /* Collect all of the transaction regions within the current function
1906 and record them in ALL_TM_REGIONS. The REGION parameter may specify
1907 an "outermost" region for use by tm clones. */
1909 static void
1910 tm_region_init (struct tm_region *region)
1912 gimple g;
1913 edge_iterator ei;
1914 edge e;
1915 basic_block bb;
1916 vec<basic_block> queue = vNULL;
1917 bitmap visited_blocks = BITMAP_ALLOC (NULL);
1918 struct tm_region *old_region;
1919 vec<tm_region_p> bb_regions = vNULL;
1921 all_tm_regions = region;
1922 bb = single_succ (ENTRY_BLOCK_PTR);
1924 /* We could store this information in bb->aux, but we may get called
1925 through get_all_tm_blocks() from another pass that may be already
1926 using bb->aux. */
1927 bb_regions.safe_grow_cleared (last_basic_block);
1929 queue.safe_push (bb);
1930 bb_regions[bb->index] = region;
1933 bb = queue.pop ();
1934 region = bb_regions[bb->index];
1935 bb_regions[bb->index] = NULL;
1937 /* Record exit and irrevocable blocks. */
1938 region = tm_region_init_1 (region, bb);
1940 /* Check for the last statement in the block beginning a new region. */
1941 g = last_stmt (bb);
1942 old_region = region;
1943 if (g && gimple_code (g) == GIMPLE_TRANSACTION)
1944 region = tm_region_init_0 (region, bb, g);
1946 /* Process subsequent blocks. */
1947 FOR_EACH_EDGE (e, ei, bb->succs)
1948 if (!bitmap_bit_p (visited_blocks, e->dest->index))
1950 bitmap_set_bit (visited_blocks, e->dest->index);
1951 queue.safe_push (e->dest);
1953 /* If the current block started a new region, make sure that only
1954 the entry block of the new region is associated with this region.
1955 Other successors are still part of the old region. */
1956 if (old_region != region && e->dest != region->entry_block)
1957 bb_regions[e->dest->index] = old_region;
1958 else
1959 bb_regions[e->dest->index] = region;
1962 while (!queue.is_empty ());
1963 queue.release ();
1964 BITMAP_FREE (visited_blocks);
1965 bb_regions.release ();
1968 /* The "gate" function for all transactional memory expansion and optimization
1969 passes. We collect region information for each top-level transaction, and
1970 if we don't find any, we skip all of the TM passes. Each region will have
1971 all of the exit blocks recorded, and the originating statement. */
1973 static bool
1974 gate_tm_init (void)
1976 if (!flag_tm)
1977 return false;
1979 calculate_dominance_info (CDI_DOMINATORS);
1980 bitmap_obstack_initialize (&tm_obstack);
1982 /* If the function is a TM_CLONE, then the entire function is the region. */
1983 if (decl_is_tm_clone (current_function_decl))
1985 struct tm_region *region = (struct tm_region *)
1986 obstack_alloc (&tm_obstack.obstack, sizeof (struct tm_region));
1987 memset (region, 0, sizeof (*region));
1988 region->entry_block = single_succ (ENTRY_BLOCK_PTR);
1989 /* For a clone, the entire function is the region. But even if
1990 we don't need to record any exit blocks, we may need to
1991 record irrevocable blocks. */
1992 region->irr_blocks = BITMAP_ALLOC (&tm_obstack);
1994 tm_region_init (region);
1996 else
1998 tm_region_init (NULL);
2000 /* If we didn't find any regions, cleanup and skip the whole tree
2001 of tm-related optimizations. */
2002 if (all_tm_regions == NULL)
2004 bitmap_obstack_release (&tm_obstack);
2005 return false;
2009 return true;
2012 namespace {
2014 const pass_data pass_data_tm_init =
2016 GIMPLE_PASS, /* type */
2017 "*tminit", /* name */
2018 OPTGROUP_NONE, /* optinfo_flags */
2019 true, /* has_gate */
2020 false, /* has_execute */
2021 TV_TRANS_MEM, /* tv_id */
2022 ( PROP_ssa | PROP_cfg ), /* properties_required */
2023 0, /* properties_provided */
2024 0, /* properties_destroyed */
2025 0, /* todo_flags_start */
2026 0, /* todo_flags_finish */
2029 class pass_tm_init : public gimple_opt_pass
2031 public:
2032 pass_tm_init (gcc::context *ctxt)
2033 : gimple_opt_pass (pass_data_tm_init, ctxt)
2036 /* opt_pass methods: */
2037 bool gate () { return gate_tm_init (); }
2039 }; // class pass_tm_init
2041 } // anon namespace
2043 gimple_opt_pass *
2044 make_pass_tm_init (gcc::context *ctxt)
2046 return new pass_tm_init (ctxt);
2049 /* Add FLAGS to the GIMPLE_TRANSACTION subcode for the transaction region
2050 represented by STATE. */
2052 static inline void
2053 transaction_subcode_ior (struct tm_region *region, unsigned flags)
2055 if (region && region->transaction_stmt)
2057 flags |= gimple_transaction_subcode (region->transaction_stmt);
2058 gimple_transaction_set_subcode (region->transaction_stmt, flags);
2062 /* Construct a memory load in a transactional context. Return the
2063 gimple statement performing the load, or NULL if there is no
2064 TM_LOAD builtin of the appropriate size to do the load.
2066 LOC is the location to use for the new statement(s). */
2068 static gimple
2069 build_tm_load (location_t loc, tree lhs, tree rhs, gimple_stmt_iterator *gsi)
2071 enum built_in_function code = END_BUILTINS;
2072 tree t, type = TREE_TYPE (rhs), decl;
2073 gimple gcall;
2075 if (type == float_type_node)
2076 code = BUILT_IN_TM_LOAD_FLOAT;
2077 else if (type == double_type_node)
2078 code = BUILT_IN_TM_LOAD_DOUBLE;
2079 else if (type == long_double_type_node)
2080 code = BUILT_IN_TM_LOAD_LDOUBLE;
2081 else if (TYPE_SIZE_UNIT (type) != NULL
2082 && host_integerp (TYPE_SIZE_UNIT (type), 1))
2084 switch (tree_low_cst (TYPE_SIZE_UNIT (type), 1))
2086 case 1:
2087 code = BUILT_IN_TM_LOAD_1;
2088 break;
2089 case 2:
2090 code = BUILT_IN_TM_LOAD_2;
2091 break;
2092 case 4:
2093 code = BUILT_IN_TM_LOAD_4;
2094 break;
2095 case 8:
2096 code = BUILT_IN_TM_LOAD_8;
2097 break;
2101 if (code == END_BUILTINS)
2103 decl = targetm.vectorize.builtin_tm_load (type);
2104 if (!decl)
2105 return NULL;
2107 else
2108 decl = builtin_decl_explicit (code);
2110 t = gimplify_addr (gsi, rhs);
2111 gcall = gimple_build_call (decl, 1, t);
2112 gimple_set_location (gcall, loc);
2114 t = TREE_TYPE (TREE_TYPE (decl));
2115 if (useless_type_conversion_p (type, t))
2117 gimple_call_set_lhs (gcall, lhs);
2118 gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
2120 else
2122 gimple g;
2123 tree temp;
2125 temp = create_tmp_reg (t, NULL);
2126 gimple_call_set_lhs (gcall, temp);
2127 gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
2129 t = fold_build1 (VIEW_CONVERT_EXPR, type, temp);
2130 g = gimple_build_assign (lhs, t);
2131 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2134 return gcall;
2138 /* Similarly for storing TYPE in a transactional context. */
2140 static gimple
2141 build_tm_store (location_t loc, tree lhs, tree rhs, gimple_stmt_iterator *gsi)
2143 enum built_in_function code = END_BUILTINS;
2144 tree t, fn, type = TREE_TYPE (rhs), simple_type;
2145 gimple gcall;
2147 if (type == float_type_node)
2148 code = BUILT_IN_TM_STORE_FLOAT;
2149 else if (type == double_type_node)
2150 code = BUILT_IN_TM_STORE_DOUBLE;
2151 else if (type == long_double_type_node)
2152 code = BUILT_IN_TM_STORE_LDOUBLE;
2153 else if (TYPE_SIZE_UNIT (type) != NULL
2154 && host_integerp (TYPE_SIZE_UNIT (type), 1))
2156 switch (tree_low_cst (TYPE_SIZE_UNIT (type), 1))
2158 case 1:
2159 code = BUILT_IN_TM_STORE_1;
2160 break;
2161 case 2:
2162 code = BUILT_IN_TM_STORE_2;
2163 break;
2164 case 4:
2165 code = BUILT_IN_TM_STORE_4;
2166 break;
2167 case 8:
2168 code = BUILT_IN_TM_STORE_8;
2169 break;
2173 if (code == END_BUILTINS)
2175 fn = targetm.vectorize.builtin_tm_store (type);
2176 if (!fn)
2177 return NULL;
2179 else
2180 fn = builtin_decl_explicit (code);
2182 simple_type = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (TREE_TYPE (fn))));
2184 if (TREE_CODE (rhs) == CONSTRUCTOR)
2186 /* Handle the easy initialization to zero. */
2187 if (!CONSTRUCTOR_ELTS (rhs))
2188 rhs = build_int_cst (simple_type, 0);
2189 else
2191 /* ...otherwise punt to the caller and probably use
2192 BUILT_IN_TM_MEMMOVE, because we can't wrap a
2193 VIEW_CONVERT_EXPR around a CONSTRUCTOR (below) and produce
2194 valid gimple. */
2195 return NULL;
2198 else if (!useless_type_conversion_p (simple_type, type))
2200 gimple g;
2201 tree temp;
2203 temp = create_tmp_reg (simple_type, NULL);
2204 t = fold_build1 (VIEW_CONVERT_EXPR, simple_type, rhs);
2205 g = gimple_build_assign (temp, t);
2206 gimple_set_location (g, loc);
2207 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2209 rhs = temp;
2212 t = gimplify_addr (gsi, lhs);
2213 gcall = gimple_build_call (fn, 2, t, rhs);
2214 gimple_set_location (gcall, loc);
2215 gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
2217 return gcall;
2221 /* Expand an assignment statement into transactional builtins. */
2223 static void
2224 expand_assign_tm (struct tm_region *region, gimple_stmt_iterator *gsi)
2226 gimple stmt = gsi_stmt (*gsi);
2227 location_t loc = gimple_location (stmt);
2228 tree lhs = gimple_assign_lhs (stmt);
2229 tree rhs = gimple_assign_rhs1 (stmt);
2230 bool store_p = requires_barrier (region->entry_block, lhs, NULL);
2231 bool load_p = requires_barrier (region->entry_block, rhs, NULL);
2232 gimple gcall = NULL;
2234 if (!load_p && !store_p)
2236 /* Add thread private addresses to log if applicable. */
2237 requires_barrier (region->entry_block, lhs, stmt);
2238 gsi_next (gsi);
2239 return;
2242 // Remove original load/store statement.
2243 gsi_remove (gsi, true);
2245 if (load_p && !store_p)
2247 transaction_subcode_ior (region, GTMA_HAVE_LOAD);
2248 gcall = build_tm_load (loc, lhs, rhs, gsi);
2250 else if (store_p && !load_p)
2252 transaction_subcode_ior (region, GTMA_HAVE_STORE);
2253 gcall = build_tm_store (loc, lhs, rhs, gsi);
2255 if (!gcall)
2257 tree lhs_addr, rhs_addr, tmp;
2259 if (load_p)
2260 transaction_subcode_ior (region, GTMA_HAVE_LOAD);
2261 if (store_p)
2262 transaction_subcode_ior (region, GTMA_HAVE_STORE);
2264 /* ??? Figure out if there's any possible overlap between the LHS
2265 and the RHS and if not, use MEMCPY. */
2267 if (load_p && is_gimple_reg (lhs))
2269 tmp = create_tmp_var (TREE_TYPE (lhs), NULL);
2270 lhs_addr = build_fold_addr_expr (tmp);
2272 else
2274 tmp = NULL_TREE;
2275 lhs_addr = gimplify_addr (gsi, lhs);
2277 rhs_addr = gimplify_addr (gsi, rhs);
2278 gcall = gimple_build_call (builtin_decl_explicit (BUILT_IN_TM_MEMMOVE),
2279 3, lhs_addr, rhs_addr,
2280 TYPE_SIZE_UNIT (TREE_TYPE (lhs)));
2281 gimple_set_location (gcall, loc);
2282 gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
2284 if (tmp)
2286 gcall = gimple_build_assign (lhs, tmp);
2287 gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
2291 /* Now that we have the load/store in its instrumented form, add
2292 thread private addresses to the log if applicable. */
2293 if (!store_p)
2294 requires_barrier (region->entry_block, lhs, gcall);
2296 // The calls to build_tm_{store,load} above inserted the instrumented
2297 // call into the stream.
2298 // gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
2302 /* Expand a call statement as appropriate for a transaction. That is,
2303 either verify that the call does not affect the transaction, or
2304 redirect the call to a clone that handles transactions, or change
2305 the transaction state to IRREVOCABLE. Return true if the call is
2306 one of the builtins that end a transaction. */
2308 static bool
2309 expand_call_tm (struct tm_region *region,
2310 gimple_stmt_iterator *gsi)
2312 gimple stmt = gsi_stmt (*gsi);
2313 tree lhs = gimple_call_lhs (stmt);
2314 tree fn_decl;
2315 struct cgraph_node *node;
2316 bool retval = false;
2318 fn_decl = gimple_call_fndecl (stmt);
2320 if (fn_decl == builtin_decl_explicit (BUILT_IN_TM_MEMCPY)
2321 || fn_decl == builtin_decl_explicit (BUILT_IN_TM_MEMMOVE))
2322 transaction_subcode_ior (region, GTMA_HAVE_STORE | GTMA_HAVE_LOAD);
2323 if (fn_decl == builtin_decl_explicit (BUILT_IN_TM_MEMSET))
2324 transaction_subcode_ior (region, GTMA_HAVE_STORE);
2326 if (is_tm_pure_call (stmt))
2327 return false;
2329 if (fn_decl)
2330 retval = is_tm_ending_fndecl (fn_decl);
2331 if (!retval)
2333 /* Assume all non-const/pure calls write to memory, except
2334 transaction ending builtins. */
2335 transaction_subcode_ior (region, GTMA_HAVE_STORE);
2338 /* For indirect calls, we already generated a call into the runtime. */
2339 if (!fn_decl)
2341 tree fn = gimple_call_fn (stmt);
2343 /* We are guaranteed never to go irrevocable on a safe or pure
2344 call, and the pure call was handled above. */
2345 if (is_tm_safe (fn))
2346 return false;
2347 else
2348 transaction_subcode_ior (region, GTMA_MAY_ENTER_IRREVOCABLE);
2350 return false;
2353 node = cgraph_get_node (fn_decl);
2354 /* All calls should have cgraph here. */
2355 if (!node)
2357 /* We can have a nodeless call here if some pass after IPA-tm
2358 added uninstrumented calls. For example, loop distribution
2359 can transform certain loop constructs into __builtin_mem*
2360 calls. In this case, see if we have a suitable TM
2361 replacement and fill in the gaps. */
2362 gcc_assert (DECL_BUILT_IN_CLASS (fn_decl) == BUILT_IN_NORMAL);
2363 enum built_in_function code = DECL_FUNCTION_CODE (fn_decl);
2364 gcc_assert (code == BUILT_IN_MEMCPY
2365 || code == BUILT_IN_MEMMOVE
2366 || code == BUILT_IN_MEMSET);
2368 tree repl = find_tm_replacement_function (fn_decl);
2369 if (repl)
2371 gimple_call_set_fndecl (stmt, repl);
2372 update_stmt (stmt);
2373 node = cgraph_create_node (repl);
2374 node->local.tm_may_enter_irr = false;
2375 return expand_call_tm (region, gsi);
2377 gcc_unreachable ();
2379 if (node->local.tm_may_enter_irr)
2380 transaction_subcode_ior (region, GTMA_MAY_ENTER_IRREVOCABLE);
2382 if (is_tm_abort (fn_decl))
2384 transaction_subcode_ior (region, GTMA_HAVE_ABORT);
2385 return true;
2388 /* Instrument the store if needed.
2390 If the assignment happens inside the function call (return slot
2391 optimization), there is no instrumentation to be done, since
2392 the callee should have done the right thing. */
2393 if (lhs && requires_barrier (region->entry_block, lhs, stmt)
2394 && !gimple_call_return_slot_opt_p (stmt))
2396 tree tmp = create_tmp_reg (TREE_TYPE (lhs), NULL);
2397 location_t loc = gimple_location (stmt);
2398 edge fallthru_edge = NULL;
2400 /* Remember if the call was going to throw. */
2401 if (stmt_can_throw_internal (stmt))
2403 edge_iterator ei;
2404 edge e;
2405 basic_block bb = gimple_bb (stmt);
2407 FOR_EACH_EDGE (e, ei, bb->succs)
2408 if (e->flags & EDGE_FALLTHRU)
2410 fallthru_edge = e;
2411 break;
2415 gimple_call_set_lhs (stmt, tmp);
2416 update_stmt (stmt);
2417 stmt = gimple_build_assign (lhs, tmp);
2418 gimple_set_location (stmt, loc);
2420 /* We cannot throw in the middle of a BB. If the call was going
2421 to throw, place the instrumentation on the fallthru edge, so
2422 the call remains the last statement in the block. */
2423 if (fallthru_edge)
2425 gimple_seq fallthru_seq = gimple_seq_alloc_with_stmt (stmt);
2426 gimple_stmt_iterator fallthru_gsi = gsi_start (fallthru_seq);
2427 expand_assign_tm (region, &fallthru_gsi);
2428 gsi_insert_seq_on_edge (fallthru_edge, fallthru_seq);
2429 pending_edge_inserts_p = true;
2431 else
2433 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2434 expand_assign_tm (region, gsi);
2437 transaction_subcode_ior (region, GTMA_HAVE_STORE);
2440 return retval;
2444 /* Expand all statements in BB as appropriate for being inside
2445 a transaction. */
2447 static void
2448 expand_block_tm (struct tm_region *region, basic_block bb)
2450 gimple_stmt_iterator gsi;
2452 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2454 gimple stmt = gsi_stmt (gsi);
2455 switch (gimple_code (stmt))
2457 case GIMPLE_ASSIGN:
2458 /* Only memory reads/writes need to be instrumented. */
2459 if (gimple_assign_single_p (stmt)
2460 && !gimple_clobber_p (stmt))
2462 expand_assign_tm (region, &gsi);
2463 continue;
2465 break;
2467 case GIMPLE_CALL:
2468 if (expand_call_tm (region, &gsi))
2469 return;
2470 break;
2472 case GIMPLE_ASM:
2473 gcc_unreachable ();
2475 default:
2476 break;
2478 if (!gsi_end_p (gsi))
2479 gsi_next (&gsi);
2483 /* Return the list of basic-blocks in REGION.
2485 STOP_AT_IRREVOCABLE_P is true if caller is uninterested in blocks
2486 following a TM_IRREVOCABLE call.
2488 INCLUDE_UNINSTRUMENTED_P is TRUE if we should include the
2489 uninstrumented code path blocks in the list of basic blocks
2490 returned, false otherwise. */
2492 static vec<basic_block>
2493 get_tm_region_blocks (basic_block entry_block,
2494 bitmap exit_blocks,
2495 bitmap irr_blocks,
2496 bitmap all_region_blocks,
2497 bool stop_at_irrevocable_p,
2498 bool include_uninstrumented_p = true)
2500 vec<basic_block> bbs = vNULL;
2501 unsigned i;
2502 edge e;
2503 edge_iterator ei;
2504 bitmap visited_blocks = BITMAP_ALLOC (NULL);
2506 i = 0;
2507 bbs.safe_push (entry_block);
2508 bitmap_set_bit (visited_blocks, entry_block->index);
2512 basic_block bb = bbs[i++];
2514 if (exit_blocks &&
2515 bitmap_bit_p (exit_blocks, bb->index))
2516 continue;
2518 if (stop_at_irrevocable_p
2519 && irr_blocks
2520 && bitmap_bit_p (irr_blocks, bb->index))
2521 continue;
2523 FOR_EACH_EDGE (e, ei, bb->succs)
2524 if ((include_uninstrumented_p
2525 || !(e->flags & EDGE_TM_UNINSTRUMENTED))
2526 && !bitmap_bit_p (visited_blocks, e->dest->index))
2528 bitmap_set_bit (visited_blocks, e->dest->index);
2529 bbs.safe_push (e->dest);
2532 while (i < bbs.length ());
2534 if (all_region_blocks)
2535 bitmap_ior_into (all_region_blocks, visited_blocks);
2537 BITMAP_FREE (visited_blocks);
2538 return bbs;
2541 // Callback data for collect_bb2reg.
2542 struct bb2reg_stuff
2544 vec<tm_region_p> *bb2reg;
2545 bool include_uninstrumented_p;
2548 // Callback for expand_regions, collect innermost region data for each bb.
2549 static void *
2550 collect_bb2reg (struct tm_region *region, void *data)
2552 struct bb2reg_stuff *stuff = (struct bb2reg_stuff *)data;
2553 vec<tm_region_p> *bb2reg = stuff->bb2reg;
2554 vec<basic_block> queue;
2555 unsigned int i;
2556 basic_block bb;
2558 queue = get_tm_region_blocks (region->entry_block,
2559 region->exit_blocks,
2560 region->irr_blocks,
2561 NULL,
2562 /*stop_at_irr_p=*/true,
2563 stuff->include_uninstrumented_p);
2565 // We expect expand_region to perform a post-order traversal of the region
2566 // tree. Therefore the last region seen for any bb is the innermost.
2567 FOR_EACH_VEC_ELT (queue, i, bb)
2568 (*bb2reg)[bb->index] = region;
2570 queue.release ();
2571 return NULL;
2574 // Returns a vector, indexed by BB->INDEX, of the innermost tm_region to
2575 // which a basic block belongs. Note that we only consider the instrumented
2576 // code paths for the region; the uninstrumented code paths are ignored if
2577 // INCLUDE_UNINSTRUMENTED_P is false.
2579 // ??? This data is very similar to the bb_regions array that is collected
2580 // during tm_region_init. Or, rather, this data is similar to what could
2581 // be used within tm_region_init. The actual computation in tm_region_init
2582 // begins and ends with bb_regions entirely full of NULL pointers, due to
2583 // the way in which pointers are swapped in and out of the array.
2585 // ??? Our callers expect that blocks are not shared between transactions.
2586 // When the optimizers get too smart, and blocks are shared, then during
2587 // the tm_mark phase we'll add log entries to only one of the two transactions,
2588 // and in the tm_edge phase we'll add edges to the CFG that create invalid
2589 // cycles. The symptom being SSA defs that do not dominate their uses.
2590 // Note that the optimizers were locally correct with their transformation,
2591 // as we have no info within the program that suggests that the blocks cannot
2592 // be shared.
2594 // ??? There is currently a hack inside tree-ssa-pre.c to work around the
2595 // only known instance of this block sharing.
2597 static vec<tm_region_p>
2598 get_bb_regions_instrumented (bool traverse_clones,
2599 bool include_uninstrumented_p)
2601 unsigned n = last_basic_block;
2602 struct bb2reg_stuff stuff;
2603 vec<tm_region_p> ret;
2605 ret.create (n);
2606 ret.safe_grow_cleared (n);
2607 stuff.bb2reg = &ret;
2608 stuff.include_uninstrumented_p = include_uninstrumented_p;
2609 expand_regions (all_tm_regions, collect_bb2reg, &stuff, traverse_clones);
2611 return ret;
2614 /* Set the IN_TRANSACTION for all gimple statements that appear in a
2615 transaction. */
2617 void
2618 compute_transaction_bits (void)
2620 struct tm_region *region;
2621 vec<basic_block> queue;
2622 unsigned int i;
2623 basic_block bb;
2625 /* ?? Perhaps we need to abstract gate_tm_init further, because we
2626 certainly don't need it to calculate CDI_DOMINATOR info. */
2627 gate_tm_init ();
2629 FOR_EACH_BB (bb)
2630 bb->flags &= ~BB_IN_TRANSACTION;
2632 for (region = all_tm_regions; region; region = region->next)
2634 queue = get_tm_region_blocks (region->entry_block,
2635 region->exit_blocks,
2636 region->irr_blocks,
2637 NULL,
2638 /*stop_at_irr_p=*/true);
2639 for (i = 0; queue.iterate (i, &bb); ++i)
2640 bb->flags |= BB_IN_TRANSACTION;
2641 queue.release ();
2644 if (all_tm_regions)
2645 bitmap_obstack_release (&tm_obstack);
2648 /* Replace the GIMPLE_TRANSACTION in this region with the corresponding
2649 call to BUILT_IN_TM_START. */
2651 static void *
2652 expand_transaction (struct tm_region *region, void *data ATTRIBUTE_UNUSED)
2654 tree tm_start = builtin_decl_explicit (BUILT_IN_TM_START);
2655 basic_block transaction_bb = gimple_bb (region->transaction_stmt);
2656 tree tm_state = region->tm_state;
2657 tree tm_state_type = TREE_TYPE (tm_state);
2658 edge abort_edge = NULL;
2659 edge inst_edge = NULL;
2660 edge uninst_edge = NULL;
2661 edge fallthru_edge = NULL;
2663 // Identify the various successors of the transaction start.
2665 edge_iterator i;
2666 edge e;
2667 FOR_EACH_EDGE (e, i, transaction_bb->succs)
2669 if (e->flags & EDGE_TM_ABORT)
2670 abort_edge = e;
2671 else if (e->flags & EDGE_TM_UNINSTRUMENTED)
2672 uninst_edge = e;
2673 else
2674 inst_edge = e;
2675 if (e->flags & EDGE_FALLTHRU)
2676 fallthru_edge = e;
2680 /* ??? There are plenty of bits here we're not computing. */
2682 int subcode = gimple_transaction_subcode (region->transaction_stmt);
2683 int flags = 0;
2684 if (subcode & GTMA_DOES_GO_IRREVOCABLE)
2685 flags |= PR_DOESGOIRREVOCABLE;
2686 if ((subcode & GTMA_MAY_ENTER_IRREVOCABLE) == 0)
2687 flags |= PR_HASNOIRREVOCABLE;
2688 /* If the transaction does not have an abort in lexical scope and is not
2689 marked as an outer transaction, then it will never abort. */
2690 if ((subcode & GTMA_HAVE_ABORT) == 0 && (subcode & GTMA_IS_OUTER) == 0)
2691 flags |= PR_HASNOABORT;
2692 if ((subcode & GTMA_HAVE_STORE) == 0)
2693 flags |= PR_READONLY;
2694 if (inst_edge && !(subcode & GTMA_HAS_NO_INSTRUMENTATION))
2695 flags |= PR_INSTRUMENTEDCODE;
2696 if (uninst_edge)
2697 flags |= PR_UNINSTRUMENTEDCODE;
2698 if (subcode & GTMA_IS_OUTER)
2699 region->original_transaction_was_outer = true;
2700 tree t = build_int_cst (tm_state_type, flags);
2701 gimple call = gimple_build_call (tm_start, 1, t);
2702 gimple_call_set_lhs (call, tm_state);
2703 gimple_set_location (call, gimple_location (region->transaction_stmt));
2705 // Replace the GIMPLE_TRANSACTION with the call to BUILT_IN_TM_START.
2706 gimple_stmt_iterator gsi = gsi_last_bb (transaction_bb);
2707 gcc_assert (gsi_stmt (gsi) == region->transaction_stmt);
2708 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
2709 gsi_remove (&gsi, true);
2710 region->transaction_stmt = call;
2713 // Generate log saves.
2714 if (!tm_log_save_addresses.is_empty ())
2715 tm_log_emit_saves (region->entry_block, transaction_bb);
2717 // In the beginning, we've no tests to perform on transaction restart.
2718 // Note that after this point, transaction_bb becomes the "most recent
2719 // block containing tests for the transaction".
2720 region->restart_block = region->entry_block;
2722 // Generate log restores.
2723 if (!tm_log_save_addresses.is_empty ())
2725 basic_block test_bb = create_empty_bb (transaction_bb);
2726 basic_block code_bb = create_empty_bb (test_bb);
2727 basic_block join_bb = create_empty_bb (code_bb);
2728 if (current_loops && transaction_bb->loop_father)
2730 add_bb_to_loop (test_bb, transaction_bb->loop_father);
2731 add_bb_to_loop (code_bb, transaction_bb->loop_father);
2732 add_bb_to_loop (join_bb, transaction_bb->loop_father);
2734 if (region->restart_block == region->entry_block)
2735 region->restart_block = test_bb;
2737 tree t1 = create_tmp_reg (tm_state_type, NULL);
2738 tree t2 = build_int_cst (tm_state_type, A_RESTORELIVEVARIABLES);
2739 gimple stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, t1,
2740 tm_state, t2);
2741 gimple_stmt_iterator gsi = gsi_last_bb (test_bb);
2742 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2744 t2 = build_int_cst (tm_state_type, 0);
2745 stmt = gimple_build_cond (NE_EXPR, t1, t2, NULL, NULL);
2746 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2748 tm_log_emit_restores (region->entry_block, code_bb);
2750 edge ei = make_edge (transaction_bb, test_bb, EDGE_FALLTHRU);
2751 edge et = make_edge (test_bb, code_bb, EDGE_TRUE_VALUE);
2752 edge ef = make_edge (test_bb, join_bb, EDGE_FALSE_VALUE);
2753 redirect_edge_pred (fallthru_edge, join_bb);
2755 join_bb->frequency = test_bb->frequency = transaction_bb->frequency;
2756 join_bb->count = test_bb->count = transaction_bb->count;
2758 ei->probability = PROB_ALWAYS;
2759 et->probability = PROB_LIKELY;
2760 ef->probability = PROB_UNLIKELY;
2761 et->count = apply_probability (test_bb->count, et->probability);
2762 ef->count = apply_probability (test_bb->count, ef->probability);
2764 code_bb->count = et->count;
2765 code_bb->frequency = EDGE_FREQUENCY (et);
2767 transaction_bb = join_bb;
2770 // If we have an ABORT edge, create a test to perform the abort.
2771 if (abort_edge)
2773 basic_block test_bb = create_empty_bb (transaction_bb);
2774 if (current_loops && transaction_bb->loop_father)
2775 add_bb_to_loop (test_bb, transaction_bb->loop_father);
2776 if (region->restart_block == region->entry_block)
2777 region->restart_block = test_bb;
2779 tree t1 = create_tmp_reg (tm_state_type, NULL);
2780 tree t2 = build_int_cst (tm_state_type, A_ABORTTRANSACTION);
2781 gimple stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, t1,
2782 tm_state, t2);
2783 gimple_stmt_iterator gsi = gsi_last_bb (test_bb);
2784 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2786 t2 = build_int_cst (tm_state_type, 0);
2787 stmt = gimple_build_cond (NE_EXPR, t1, t2, NULL, NULL);
2788 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2790 edge ei = make_edge (transaction_bb, test_bb, EDGE_FALLTHRU);
2791 test_bb->frequency = transaction_bb->frequency;
2792 test_bb->count = transaction_bb->count;
2793 ei->probability = PROB_ALWAYS;
2795 // Not abort edge. If both are live, chose one at random as we'll
2796 // we'll be fixing that up below.
2797 redirect_edge_pred (fallthru_edge, test_bb);
2798 fallthru_edge->flags = EDGE_FALSE_VALUE;
2799 fallthru_edge->probability = PROB_VERY_LIKELY;
2800 fallthru_edge->count
2801 = apply_probability (test_bb->count, fallthru_edge->probability);
2803 // Abort/over edge.
2804 redirect_edge_pred (abort_edge, test_bb);
2805 abort_edge->flags = EDGE_TRUE_VALUE;
2806 abort_edge->probability = PROB_VERY_UNLIKELY;
2807 abort_edge->count
2808 = apply_probability (test_bb->count, abort_edge->probability);
2810 transaction_bb = test_bb;
2813 // If we have both instrumented and uninstrumented code paths, select one.
2814 if (inst_edge && uninst_edge)
2816 basic_block test_bb = create_empty_bb (transaction_bb);
2817 if (current_loops && transaction_bb->loop_father)
2818 add_bb_to_loop (test_bb, transaction_bb->loop_father);
2819 if (region->restart_block == region->entry_block)
2820 region->restart_block = test_bb;
2822 tree t1 = create_tmp_reg (tm_state_type, NULL);
2823 tree t2 = build_int_cst (tm_state_type, A_RUNUNINSTRUMENTEDCODE);
2825 gimple stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, t1,
2826 tm_state, t2);
2827 gimple_stmt_iterator gsi = gsi_last_bb (test_bb);
2828 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2830 t2 = build_int_cst (tm_state_type, 0);
2831 stmt = gimple_build_cond (NE_EXPR, t1, t2, NULL, NULL);
2832 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2834 // Create the edge into test_bb first, as we want to copy values
2835 // out of the fallthru edge.
2836 edge e = make_edge (transaction_bb, test_bb, fallthru_edge->flags);
2837 e->probability = fallthru_edge->probability;
2838 test_bb->count = e->count = fallthru_edge->count;
2839 test_bb->frequency = EDGE_FREQUENCY (e);
2841 // Now update the edges to the inst/uninist implementations.
2842 // For now assume that the paths are equally likely. When using HTM,
2843 // we'll try the uninst path first and fallback to inst path if htm
2844 // buffers are exceeded. Without HTM we start with the inst path and
2845 // use the uninst path when falling back to serial mode.
2846 redirect_edge_pred (inst_edge, test_bb);
2847 inst_edge->flags = EDGE_FALSE_VALUE;
2848 inst_edge->probability = REG_BR_PROB_BASE / 2;
2849 inst_edge->count
2850 = apply_probability (test_bb->count, inst_edge->probability);
2852 redirect_edge_pred (uninst_edge, test_bb);
2853 uninst_edge->flags = EDGE_TRUE_VALUE;
2854 uninst_edge->probability = REG_BR_PROB_BASE / 2;
2855 uninst_edge->count
2856 = apply_probability (test_bb->count, uninst_edge->probability);
2859 // If we have no previous special cases, and we have PHIs at the beginning
2860 // of the atomic region, this means we have a loop at the beginning of the
2861 // atomic region that shares the first block. This can cause problems with
2862 // the transaction restart abnormal edges to be added in the tm_edges pass.
2863 // Solve this by adding a new empty block to receive the abnormal edges.
2864 if (region->restart_block == region->entry_block
2865 && phi_nodes (region->entry_block))
2867 basic_block empty_bb = create_empty_bb (transaction_bb);
2868 region->restart_block = empty_bb;
2869 if (current_loops && transaction_bb->loop_father)
2870 add_bb_to_loop (empty_bb, transaction_bb->loop_father);
2872 redirect_edge_pred (fallthru_edge, empty_bb);
2873 make_edge (transaction_bb, empty_bb, EDGE_FALLTHRU);
2876 return NULL;
2879 /* Generate the temporary to be used for the return value of
2880 BUILT_IN_TM_START. */
2882 static void *
2883 generate_tm_state (struct tm_region *region, void *data ATTRIBUTE_UNUSED)
2885 tree tm_start = builtin_decl_explicit (BUILT_IN_TM_START);
2886 region->tm_state =
2887 create_tmp_reg (TREE_TYPE (TREE_TYPE (tm_start)), "tm_state");
2889 // Reset the subcode, post optimizations. We'll fill this in
2890 // again as we process blocks.
2891 if (region->exit_blocks)
2893 unsigned int subcode
2894 = gimple_transaction_subcode (region->transaction_stmt);
2896 if (subcode & GTMA_DOES_GO_IRREVOCABLE)
2897 subcode &= (GTMA_DECLARATION_MASK | GTMA_DOES_GO_IRREVOCABLE
2898 | GTMA_MAY_ENTER_IRREVOCABLE
2899 | GTMA_HAS_NO_INSTRUMENTATION);
2900 else
2901 subcode &= GTMA_DECLARATION_MASK;
2902 gimple_transaction_set_subcode (region->transaction_stmt, subcode);
2905 return NULL;
2908 // Propagate flags from inner transactions outwards.
2909 static void
2910 propagate_tm_flags_out (struct tm_region *region)
2912 if (region == NULL)
2913 return;
2914 propagate_tm_flags_out (region->inner);
2916 if (region->outer && region->outer->transaction_stmt)
2918 unsigned s = gimple_transaction_subcode (region->transaction_stmt);
2919 s &= (GTMA_HAVE_ABORT | GTMA_HAVE_LOAD | GTMA_HAVE_STORE
2920 | GTMA_MAY_ENTER_IRREVOCABLE);
2921 s |= gimple_transaction_subcode (region->outer->transaction_stmt);
2922 gimple_transaction_set_subcode (region->outer->transaction_stmt, s);
2925 propagate_tm_flags_out (region->next);
2928 /* Entry point to the MARK phase of TM expansion. Here we replace
2929 transactional memory statements with calls to builtins, and function
2930 calls with their transactional clones (if available). But we don't
2931 yet lower GIMPLE_TRANSACTION or add the transaction restart back-edges. */
2933 static unsigned int
2934 execute_tm_mark (void)
2936 pending_edge_inserts_p = false;
2938 expand_regions (all_tm_regions, generate_tm_state, NULL,
2939 /*traverse_clones=*/true);
2941 tm_log_init ();
2943 vec<tm_region_p> bb_regions
2944 = get_bb_regions_instrumented (/*traverse_clones=*/true,
2945 /*include_uninstrumented_p=*/false);
2946 struct tm_region *r;
2947 unsigned i;
2949 // Expand memory operations into calls into the runtime.
2950 // This collects log entries as well.
2951 FOR_EACH_VEC_ELT (bb_regions, i, r)
2953 if (r != NULL)
2955 if (r->transaction_stmt)
2957 unsigned sub = gimple_transaction_subcode (r->transaction_stmt);
2959 /* If we're sure to go irrevocable, there won't be
2960 anything to expand, since the run-time will go
2961 irrevocable right away. */
2962 if (sub & GTMA_DOES_GO_IRREVOCABLE
2963 && sub & GTMA_MAY_ENTER_IRREVOCABLE)
2964 continue;
2966 expand_block_tm (r, BASIC_BLOCK (i));
2970 bb_regions.release ();
2972 // Propagate flags from inner transactions outwards.
2973 propagate_tm_flags_out (all_tm_regions);
2975 // Expand GIMPLE_TRANSACTIONs into calls into the runtime.
2976 expand_regions (all_tm_regions, expand_transaction, NULL,
2977 /*traverse_clones=*/false);
2979 tm_log_emit ();
2980 tm_log_delete ();
2982 if (pending_edge_inserts_p)
2983 gsi_commit_edge_inserts ();
2984 free_dominance_info (CDI_DOMINATORS);
2985 return 0;
2988 namespace {
2990 const pass_data pass_data_tm_mark =
2992 GIMPLE_PASS, /* type */
2993 "tmmark", /* name */
2994 OPTGROUP_NONE, /* optinfo_flags */
2995 false, /* has_gate */
2996 true, /* has_execute */
2997 TV_TRANS_MEM, /* tv_id */
2998 ( PROP_ssa | PROP_cfg ), /* properties_required */
2999 0, /* properties_provided */
3000 0, /* properties_destroyed */
3001 0, /* todo_flags_start */
3002 ( TODO_update_ssa | TODO_verify_ssa ), /* todo_flags_finish */
3005 class pass_tm_mark : public gimple_opt_pass
3007 public:
3008 pass_tm_mark (gcc::context *ctxt)
3009 : gimple_opt_pass (pass_data_tm_mark, ctxt)
3012 /* opt_pass methods: */
3013 unsigned int execute () { return execute_tm_mark (); }
3015 }; // class pass_tm_mark
3017 } // anon namespace
3019 gimple_opt_pass *
3020 make_pass_tm_mark (gcc::context *ctxt)
3022 return new pass_tm_mark (ctxt);
3026 /* Create an abnormal edge from STMT at iter, splitting the block
3027 as necessary. Adjust *PNEXT as needed for the split block. */
3029 static inline void
3030 split_bb_make_tm_edge (gimple stmt, basic_block dest_bb,
3031 gimple_stmt_iterator iter, gimple_stmt_iterator *pnext)
3033 basic_block bb = gimple_bb (stmt);
3034 if (!gsi_one_before_end_p (iter))
3036 edge e = split_block (bb, stmt);
3037 *pnext = gsi_start_bb (e->dest);
3039 make_edge (bb, dest_bb, EDGE_ABNORMAL);
3041 // Record the need for the edge for the benefit of the rtl passes.
3042 if (cfun->gimple_df->tm_restart == NULL)
3043 cfun->gimple_df->tm_restart = htab_create_ggc (31, struct_ptr_hash,
3044 struct_ptr_eq, ggc_free);
3046 struct tm_restart_node dummy;
3047 dummy.stmt = stmt;
3048 dummy.label_or_list = gimple_block_label (dest_bb);
3050 void **slot = htab_find_slot (cfun->gimple_df->tm_restart, &dummy, INSERT);
3051 struct tm_restart_node *n = (struct tm_restart_node *) *slot;
3052 if (n == NULL)
3054 n = ggc_alloc_tm_restart_node ();
3055 *n = dummy;
3057 else
3059 tree old = n->label_or_list;
3060 if (TREE_CODE (old) == LABEL_DECL)
3061 old = tree_cons (NULL, old, NULL);
3062 n->label_or_list = tree_cons (NULL, dummy.label_or_list, old);
3066 /* Split block BB as necessary for every builtin function we added, and
3067 wire up the abnormal back edges implied by the transaction restart. */
3069 static void
3070 expand_block_edges (struct tm_region *const region, basic_block bb)
3072 gimple_stmt_iterator gsi, next_gsi;
3074 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi = next_gsi)
3076 gimple stmt = gsi_stmt (gsi);
3078 next_gsi = gsi;
3079 gsi_next (&next_gsi);
3081 // ??? Shouldn't we split for any non-pure, non-irrevocable function?
3082 if (gimple_code (stmt) != GIMPLE_CALL
3083 || (gimple_call_flags (stmt) & ECF_TM_BUILTIN) == 0)
3084 continue;
3086 if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)) == BUILT_IN_TM_ABORT)
3088 // If we have a ``_transaction_cancel [[outer]]'', there is only
3089 // one abnormal edge: to the transaction marked OUTER.
3090 // All compiler-generated instances of BUILT_IN_TM_ABORT have a
3091 // constant argument, which we can examine here. Users invoking
3092 // TM_ABORT directly get what they deserve.
3093 tree arg = gimple_call_arg (stmt, 0);
3094 if (TREE_CODE (arg) == INTEGER_CST
3095 && (TREE_INT_CST_LOW (arg) & AR_OUTERABORT) != 0
3096 && !decl_is_tm_clone (current_function_decl))
3098 // Find the GTMA_IS_OUTER transaction.
3099 for (struct tm_region *o = region; o; o = o->outer)
3100 if (o->original_transaction_was_outer)
3102 split_bb_make_tm_edge (stmt, o->restart_block,
3103 gsi, &next_gsi);
3104 break;
3107 // Otherwise, the front-end should have semantically checked
3108 // outer aborts, but in either case the target region is not
3109 // within this function.
3110 continue;
3113 // Non-outer, TM aborts have an abnormal edge to the inner-most
3114 // transaction, the one being aborted;
3115 split_bb_make_tm_edge (stmt, region->restart_block, gsi, &next_gsi);
3118 // All TM builtins have an abnormal edge to the outer-most transaction.
3119 // We never restart inner transactions. For tm clones, we know a-priori
3120 // that the outer-most transaction is outside the function.
3121 if (decl_is_tm_clone (current_function_decl))
3122 continue;
3124 if (cfun->gimple_df->tm_restart == NULL)
3125 cfun->gimple_df->tm_restart
3126 = htab_create_ggc (31, struct_ptr_hash, struct_ptr_eq, ggc_free);
3128 // All TM builtins have an abnormal edge to the outer-most transaction.
3129 // We never restart inner transactions.
3130 for (struct tm_region *o = region; o; o = o->outer)
3131 if (!o->outer)
3133 split_bb_make_tm_edge (stmt, o->restart_block, gsi, &next_gsi);
3134 break;
3137 // Delete any tail-call annotation that may have been added.
3138 // The tail-call pass may have mis-identified the commit as being
3139 // a candidate because we had not yet added this restart edge.
3140 gimple_call_set_tail (stmt, false);
3144 /* Entry point to the final expansion of transactional nodes. */
3146 static unsigned int
3147 execute_tm_edges (void)
3149 vec<tm_region_p> bb_regions
3150 = get_bb_regions_instrumented (/*traverse_clones=*/false,
3151 /*include_uninstrumented_p=*/true);
3152 struct tm_region *r;
3153 unsigned i;
3155 FOR_EACH_VEC_ELT (bb_regions, i, r)
3156 if (r != NULL)
3157 expand_block_edges (r, BASIC_BLOCK (i));
3159 bb_regions.release ();
3161 /* We've got to release the dominance info now, to indicate that it
3162 must be rebuilt completely. Otherwise we'll crash trying to update
3163 the SSA web in the TODO section following this pass. */
3164 free_dominance_info (CDI_DOMINATORS);
3165 bitmap_obstack_release (&tm_obstack);
3166 all_tm_regions = NULL;
3168 return 0;
3171 namespace {
3173 const pass_data pass_data_tm_edges =
3175 GIMPLE_PASS, /* type */
3176 "tmedge", /* name */
3177 OPTGROUP_NONE, /* optinfo_flags */
3178 false, /* has_gate */
3179 true, /* has_execute */
3180 TV_TRANS_MEM, /* tv_id */
3181 ( PROP_ssa | PROP_cfg ), /* properties_required */
3182 0, /* properties_provided */
3183 0, /* properties_destroyed */
3184 0, /* todo_flags_start */
3185 ( TODO_update_ssa | TODO_verify_ssa ), /* todo_flags_finish */
3188 class pass_tm_edges : public gimple_opt_pass
3190 public:
3191 pass_tm_edges (gcc::context *ctxt)
3192 : gimple_opt_pass (pass_data_tm_edges, ctxt)
3195 /* opt_pass methods: */
3196 unsigned int execute () { return execute_tm_edges (); }
3198 }; // class pass_tm_edges
3200 } // anon namespace
3202 gimple_opt_pass *
3203 make_pass_tm_edges (gcc::context *ctxt)
3205 return new pass_tm_edges (ctxt);
3208 /* Helper function for expand_regions. Expand REGION and recurse to
3209 the inner region. Call CALLBACK on each region. CALLBACK returns
3210 NULL to continue the traversal, otherwise a non-null value which
3211 this function will return as well. TRAVERSE_CLONES is true if we
3212 should traverse transactional clones. */
3214 static void *
3215 expand_regions_1 (struct tm_region *region,
3216 void *(*callback)(struct tm_region *, void *),
3217 void *data,
3218 bool traverse_clones)
3220 void *retval = NULL;
3221 if (region->exit_blocks
3222 || (traverse_clones && decl_is_tm_clone (current_function_decl)))
3224 retval = callback (region, data);
3225 if (retval)
3226 return retval;
3228 if (region->inner)
3230 retval = expand_regions (region->inner, callback, data, traverse_clones);
3231 if (retval)
3232 return retval;
3234 return retval;
3237 /* Traverse the regions enclosed and including REGION. Execute
3238 CALLBACK for each region, passing DATA. CALLBACK returns NULL to
3239 continue the traversal, otherwise a non-null value which this
3240 function will return as well. TRAVERSE_CLONES is true if we should
3241 traverse transactional clones. */
3243 static void *
3244 expand_regions (struct tm_region *region,
3245 void *(*callback)(struct tm_region *, void *),
3246 void *data,
3247 bool traverse_clones)
3249 void *retval = NULL;
3250 while (region)
3252 retval = expand_regions_1 (region, callback, data, traverse_clones);
3253 if (retval)
3254 return retval;
3255 region = region->next;
3257 return retval;
3261 /* A unique TM memory operation. */
3262 typedef struct tm_memop
3264 /* Unique ID that all memory operations to the same location have. */
3265 unsigned int value_id;
3266 /* Address of load/store. */
3267 tree addr;
3268 } *tm_memop_t;
3270 /* TM memory operation hashtable helpers. */
3272 struct tm_memop_hasher : typed_free_remove <tm_memop>
3274 typedef tm_memop value_type;
3275 typedef tm_memop compare_type;
3276 static inline hashval_t hash (const value_type *);
3277 static inline bool equal (const value_type *, const compare_type *);
3280 /* Htab support. Return a hash value for a `tm_memop'. */
3281 inline hashval_t
3282 tm_memop_hasher::hash (const value_type *mem)
3284 tree addr = mem->addr;
3285 /* We drill down to the SSA_NAME/DECL for the hash, but equality is
3286 actually done with operand_equal_p (see tm_memop_eq). */
3287 if (TREE_CODE (addr) == ADDR_EXPR)
3288 addr = TREE_OPERAND (addr, 0);
3289 return iterative_hash_expr (addr, 0);
3292 /* Htab support. Return true if two tm_memop's are the same. */
3293 inline bool
3294 tm_memop_hasher::equal (const value_type *mem1, const compare_type *mem2)
3296 return operand_equal_p (mem1->addr, mem2->addr, 0);
3299 /* Sets for solving data flow equations in the memory optimization pass. */
3300 struct tm_memopt_bitmaps
3302 /* Stores available to this BB upon entry. Basically, stores that
3303 dominate this BB. */
3304 bitmap store_avail_in;
3305 /* Stores available at the end of this BB. */
3306 bitmap store_avail_out;
3307 bitmap store_antic_in;
3308 bitmap store_antic_out;
3309 /* Reads available to this BB upon entry. Basically, reads that
3310 dominate this BB. */
3311 bitmap read_avail_in;
3312 /* Reads available at the end of this BB. */
3313 bitmap read_avail_out;
3314 /* Reads performed in this BB. */
3315 bitmap read_local;
3316 /* Writes performed in this BB. */
3317 bitmap store_local;
3319 /* Temporary storage for pass. */
3320 /* Is the current BB in the worklist? */
3321 bool avail_in_worklist_p;
3322 /* Have we visited this BB? */
3323 bool visited_p;
3326 static bitmap_obstack tm_memopt_obstack;
3328 /* Unique counter for TM loads and stores. Loads and stores of the
3329 same address get the same ID. */
3330 static unsigned int tm_memopt_value_id;
3331 static hash_table <tm_memop_hasher> tm_memopt_value_numbers;
3333 #define STORE_AVAIL_IN(BB) \
3334 ((struct tm_memopt_bitmaps *) ((BB)->aux))->store_avail_in
3335 #define STORE_AVAIL_OUT(BB) \
3336 ((struct tm_memopt_bitmaps *) ((BB)->aux))->store_avail_out
3337 #define STORE_ANTIC_IN(BB) \
3338 ((struct tm_memopt_bitmaps *) ((BB)->aux))->store_antic_in
3339 #define STORE_ANTIC_OUT(BB) \
3340 ((struct tm_memopt_bitmaps *) ((BB)->aux))->store_antic_out
3341 #define READ_AVAIL_IN(BB) \
3342 ((struct tm_memopt_bitmaps *) ((BB)->aux))->read_avail_in
3343 #define READ_AVAIL_OUT(BB) \
3344 ((struct tm_memopt_bitmaps *) ((BB)->aux))->read_avail_out
3345 #define READ_LOCAL(BB) \
3346 ((struct tm_memopt_bitmaps *) ((BB)->aux))->read_local
3347 #define STORE_LOCAL(BB) \
3348 ((struct tm_memopt_bitmaps *) ((BB)->aux))->store_local
3349 #define AVAIL_IN_WORKLIST_P(BB) \
3350 ((struct tm_memopt_bitmaps *) ((BB)->aux))->avail_in_worklist_p
3351 #define BB_VISITED_P(BB) \
3352 ((struct tm_memopt_bitmaps *) ((BB)->aux))->visited_p
3354 /* Given a TM load/store in STMT, return the value number for the address
3355 it accesses. */
3357 static unsigned int
3358 tm_memopt_value_number (gimple stmt, enum insert_option op)
3360 struct tm_memop tmpmem, *mem;
3361 tm_memop **slot;
3363 gcc_assert (is_tm_load (stmt) || is_tm_store (stmt));
3364 tmpmem.addr = gimple_call_arg (stmt, 0);
3365 slot = tm_memopt_value_numbers.find_slot (&tmpmem, op);
3366 if (*slot)
3367 mem = *slot;
3368 else if (op == INSERT)
3370 mem = XNEW (struct tm_memop);
3371 *slot = mem;
3372 mem->value_id = tm_memopt_value_id++;
3373 mem->addr = tmpmem.addr;
3375 else
3376 gcc_unreachable ();
3377 return mem->value_id;
3380 /* Accumulate TM memory operations in BB into STORE_LOCAL and READ_LOCAL. */
3382 static void
3383 tm_memopt_accumulate_memops (basic_block bb)
3385 gimple_stmt_iterator gsi;
3387 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3389 gimple stmt = gsi_stmt (gsi);
3390 bitmap bits;
3391 unsigned int loc;
3393 if (is_tm_store (stmt))
3394 bits = STORE_LOCAL (bb);
3395 else if (is_tm_load (stmt))
3396 bits = READ_LOCAL (bb);
3397 else
3398 continue;
3400 loc = tm_memopt_value_number (stmt, INSERT);
3401 bitmap_set_bit (bits, loc);
3402 if (dump_file)
3404 fprintf (dump_file, "TM memopt (%s): value num=%d, BB=%d, addr=",
3405 is_tm_load (stmt) ? "LOAD" : "STORE", loc,
3406 gimple_bb (stmt)->index);
3407 print_generic_expr (dump_file, gimple_call_arg (stmt, 0), 0);
3408 fprintf (dump_file, "\n");
3413 /* Prettily dump one of the memopt sets. BITS is the bitmap to dump. */
3415 static void
3416 dump_tm_memopt_set (const char *set_name, bitmap bits)
3418 unsigned i;
3419 bitmap_iterator bi;
3420 const char *comma = "";
3422 fprintf (dump_file, "TM memopt: %s: [", set_name);
3423 EXECUTE_IF_SET_IN_BITMAP (bits, 0, i, bi)
3425 hash_table <tm_memop_hasher>::iterator hi;
3426 struct tm_memop *mem = NULL;
3428 /* Yeah, yeah, yeah. Whatever. This is just for debugging. */
3429 FOR_EACH_HASH_TABLE_ELEMENT (tm_memopt_value_numbers, mem, tm_memop_t, hi)
3430 if (mem->value_id == i)
3431 break;
3432 gcc_assert (mem->value_id == i);
3433 fprintf (dump_file, "%s", comma);
3434 comma = ", ";
3435 print_generic_expr (dump_file, mem->addr, 0);
3437 fprintf (dump_file, "]\n");
3440 /* Prettily dump all of the memopt sets in BLOCKS. */
3442 static void
3443 dump_tm_memopt_sets (vec<basic_block> blocks)
3445 size_t i;
3446 basic_block bb;
3448 for (i = 0; blocks.iterate (i, &bb); ++i)
3450 fprintf (dump_file, "------------BB %d---------\n", bb->index);
3451 dump_tm_memopt_set ("STORE_LOCAL", STORE_LOCAL (bb));
3452 dump_tm_memopt_set ("READ_LOCAL", READ_LOCAL (bb));
3453 dump_tm_memopt_set ("STORE_AVAIL_IN", STORE_AVAIL_IN (bb));
3454 dump_tm_memopt_set ("STORE_AVAIL_OUT", STORE_AVAIL_OUT (bb));
3455 dump_tm_memopt_set ("READ_AVAIL_IN", READ_AVAIL_IN (bb));
3456 dump_tm_memopt_set ("READ_AVAIL_OUT", READ_AVAIL_OUT (bb));
3460 /* Compute {STORE,READ}_AVAIL_IN for the basic block BB. */
3462 static void
3463 tm_memopt_compute_avin (basic_block bb)
3465 edge e;
3466 unsigned ix;
3468 /* Seed with the AVOUT of any predecessor. */
3469 for (ix = 0; ix < EDGE_COUNT (bb->preds); ix++)
3471 e = EDGE_PRED (bb, ix);
3472 /* Make sure we have already visited this BB, and is thus
3473 initialized.
3475 If e->src->aux is NULL, this predecessor is actually on an
3476 enclosing transaction. We only care about the current
3477 transaction, so ignore it. */
3478 if (e->src->aux && BB_VISITED_P (e->src))
3480 bitmap_copy (STORE_AVAIL_IN (bb), STORE_AVAIL_OUT (e->src));
3481 bitmap_copy (READ_AVAIL_IN (bb), READ_AVAIL_OUT (e->src));
3482 break;
3486 for (; ix < EDGE_COUNT (bb->preds); ix++)
3488 e = EDGE_PRED (bb, ix);
3489 if (e->src->aux && BB_VISITED_P (e->src))
3491 bitmap_and_into (STORE_AVAIL_IN (bb), STORE_AVAIL_OUT (e->src));
3492 bitmap_and_into (READ_AVAIL_IN (bb), READ_AVAIL_OUT (e->src));
3496 BB_VISITED_P (bb) = true;
3499 /* Compute the STORE_ANTIC_IN for the basic block BB. */
3501 static void
3502 tm_memopt_compute_antin (basic_block bb)
3504 edge e;
3505 unsigned ix;
3507 /* Seed with the ANTIC_OUT of any successor. */
3508 for (ix = 0; ix < EDGE_COUNT (bb->succs); ix++)
3510 e = EDGE_SUCC (bb, ix);
3511 /* Make sure we have already visited this BB, and is thus
3512 initialized. */
3513 if (BB_VISITED_P (e->dest))
3515 bitmap_copy (STORE_ANTIC_IN (bb), STORE_ANTIC_OUT (e->dest));
3516 break;
3520 for (; ix < EDGE_COUNT (bb->succs); ix++)
3522 e = EDGE_SUCC (bb, ix);
3523 if (BB_VISITED_P (e->dest))
3524 bitmap_and_into (STORE_ANTIC_IN (bb), STORE_ANTIC_OUT (e->dest));
3527 BB_VISITED_P (bb) = true;
3530 /* Compute the AVAIL sets for every basic block in BLOCKS.
3532 We compute {STORE,READ}_AVAIL_{OUT,IN} as follows:
3534 AVAIL_OUT[bb] = union (AVAIL_IN[bb], LOCAL[bb])
3535 AVAIL_IN[bb] = intersect (AVAIL_OUT[predecessors])
3537 This is basically what we do in lcm's compute_available(), but here
3538 we calculate two sets of sets (one for STOREs and one for READs),
3539 and we work on a region instead of the entire CFG.
3541 REGION is the TM region.
3542 BLOCKS are the basic blocks in the region. */
3544 static void
3545 tm_memopt_compute_available (struct tm_region *region,
3546 vec<basic_block> blocks)
3548 edge e;
3549 basic_block *worklist, *qin, *qout, *qend, bb;
3550 unsigned int qlen, i;
3551 edge_iterator ei;
3552 bool changed;
3554 /* Allocate a worklist array/queue. Entries are only added to the
3555 list if they were not already on the list. So the size is
3556 bounded by the number of basic blocks in the region. */
3557 qlen = blocks.length () - 1;
3558 qin = qout = worklist =
3559 XNEWVEC (basic_block, qlen);
3561 /* Put every block in the region on the worklist. */
3562 for (i = 0; blocks.iterate (i, &bb); ++i)
3564 /* Seed AVAIL_OUT with the LOCAL set. */
3565 bitmap_ior_into (STORE_AVAIL_OUT (bb), STORE_LOCAL (bb));
3566 bitmap_ior_into (READ_AVAIL_OUT (bb), READ_LOCAL (bb));
3568 AVAIL_IN_WORKLIST_P (bb) = true;
3569 /* No need to insert the entry block, since it has an AVIN of
3570 null, and an AVOUT that has already been seeded in. */
3571 if (bb != region->entry_block)
3572 *qin++ = bb;
3575 /* The entry block has been initialized with the local sets. */
3576 BB_VISITED_P (region->entry_block) = true;
3578 qin = worklist;
3579 qend = &worklist[qlen];
3581 /* Iterate until the worklist is empty. */
3582 while (qlen)
3584 /* Take the first entry off the worklist. */
3585 bb = *qout++;
3586 qlen--;
3588 if (qout >= qend)
3589 qout = worklist;
3591 /* This block can be added to the worklist again if necessary. */
3592 AVAIL_IN_WORKLIST_P (bb) = false;
3593 tm_memopt_compute_avin (bb);
3595 /* Note: We do not add the LOCAL sets here because we already
3596 seeded the AVAIL_OUT sets with them. */
3597 changed = bitmap_ior_into (STORE_AVAIL_OUT (bb), STORE_AVAIL_IN (bb));
3598 changed |= bitmap_ior_into (READ_AVAIL_OUT (bb), READ_AVAIL_IN (bb));
3599 if (changed
3600 && (region->exit_blocks == NULL
3601 || !bitmap_bit_p (region->exit_blocks, bb->index)))
3602 /* If the out state of this block changed, then we need to add
3603 its successors to the worklist if they are not already in. */
3604 FOR_EACH_EDGE (e, ei, bb->succs)
3605 if (!AVAIL_IN_WORKLIST_P (e->dest) && e->dest != EXIT_BLOCK_PTR)
3607 *qin++ = e->dest;
3608 AVAIL_IN_WORKLIST_P (e->dest) = true;
3609 qlen++;
3611 if (qin >= qend)
3612 qin = worklist;
3616 free (worklist);
3618 if (dump_file)
3619 dump_tm_memopt_sets (blocks);
3622 /* Compute ANTIC sets for every basic block in BLOCKS.
3624 We compute STORE_ANTIC_OUT as follows:
3626 STORE_ANTIC_OUT[bb] = union(STORE_ANTIC_IN[bb], STORE_LOCAL[bb])
3627 STORE_ANTIC_IN[bb] = intersect(STORE_ANTIC_OUT[successors])
3629 REGION is the TM region.
3630 BLOCKS are the basic blocks in the region. */
3632 static void
3633 tm_memopt_compute_antic (struct tm_region *region,
3634 vec<basic_block> blocks)
3636 edge e;
3637 basic_block *worklist, *qin, *qout, *qend, bb;
3638 unsigned int qlen;
3639 int i;
3640 edge_iterator ei;
3642 /* Allocate a worklist array/queue. Entries are only added to the
3643 list if they were not already on the list. So the size is
3644 bounded by the number of basic blocks in the region. */
3645 qin = qout = worklist = XNEWVEC (basic_block, blocks.length ());
3647 for (qlen = 0, i = blocks.length () - 1; i >= 0; --i)
3649 bb = blocks[i];
3651 /* Seed ANTIC_OUT with the LOCAL set. */
3652 bitmap_ior_into (STORE_ANTIC_OUT (bb), STORE_LOCAL (bb));
3654 /* Put every block in the region on the worklist. */
3655 AVAIL_IN_WORKLIST_P (bb) = true;
3656 /* No need to insert exit blocks, since their ANTIC_IN is NULL,
3657 and their ANTIC_OUT has already been seeded in. */
3658 if (region->exit_blocks
3659 && !bitmap_bit_p (region->exit_blocks, bb->index))
3661 qlen++;
3662 *qin++ = bb;
3666 /* The exit blocks have been initialized with the local sets. */
3667 if (region->exit_blocks)
3669 unsigned int i;
3670 bitmap_iterator bi;
3671 EXECUTE_IF_SET_IN_BITMAP (region->exit_blocks, 0, i, bi)
3672 BB_VISITED_P (BASIC_BLOCK (i)) = true;
3675 qin = worklist;
3676 qend = &worklist[qlen];
3678 /* Iterate until the worklist is empty. */
3679 while (qlen)
3681 /* Take the first entry off the worklist. */
3682 bb = *qout++;
3683 qlen--;
3685 if (qout >= qend)
3686 qout = worklist;
3688 /* This block can be added to the worklist again if necessary. */
3689 AVAIL_IN_WORKLIST_P (bb) = false;
3690 tm_memopt_compute_antin (bb);
3692 /* Note: We do not add the LOCAL sets here because we already
3693 seeded the ANTIC_OUT sets with them. */
3694 if (bitmap_ior_into (STORE_ANTIC_OUT (bb), STORE_ANTIC_IN (bb))
3695 && bb != region->entry_block)
3696 /* If the out state of this block changed, then we need to add
3697 its predecessors to the worklist if they are not already in. */
3698 FOR_EACH_EDGE (e, ei, bb->preds)
3699 if (!AVAIL_IN_WORKLIST_P (e->src))
3701 *qin++ = e->src;
3702 AVAIL_IN_WORKLIST_P (e->src) = true;
3703 qlen++;
3705 if (qin >= qend)
3706 qin = worklist;
3710 free (worklist);
3712 if (dump_file)
3713 dump_tm_memopt_sets (blocks);
3716 /* Offsets of load variants from TM_LOAD. For example,
3717 BUILT_IN_TM_LOAD_RAR* is an offset of 1 from BUILT_IN_TM_LOAD*.
3718 See gtm-builtins.def. */
3719 #define TRANSFORM_RAR 1
3720 #define TRANSFORM_RAW 2
3721 #define TRANSFORM_RFW 3
3722 /* Offsets of store variants from TM_STORE. */
3723 #define TRANSFORM_WAR 1
3724 #define TRANSFORM_WAW 2
3726 /* Inform about a load/store optimization. */
3728 static void
3729 dump_tm_memopt_transform (gimple stmt)
3731 if (dump_file)
3733 fprintf (dump_file, "TM memopt: transforming: ");
3734 print_gimple_stmt (dump_file, stmt, 0, 0);
3735 fprintf (dump_file, "\n");
3739 /* Perform a read/write optimization. Replaces the TM builtin in STMT
3740 by a builtin that is OFFSET entries down in the builtins table in
3741 gtm-builtins.def. */
3743 static void
3744 tm_memopt_transform_stmt (unsigned int offset,
3745 gimple stmt,
3746 gimple_stmt_iterator *gsi)
3748 tree fn = gimple_call_fn (stmt);
3749 gcc_assert (TREE_CODE (fn) == ADDR_EXPR);
3750 TREE_OPERAND (fn, 0)
3751 = builtin_decl_explicit ((enum built_in_function)
3752 (DECL_FUNCTION_CODE (TREE_OPERAND (fn, 0))
3753 + offset));
3754 gimple_call_set_fn (stmt, fn);
3755 gsi_replace (gsi, stmt, true);
3756 dump_tm_memopt_transform (stmt);
3759 /* Perform the actual TM memory optimization transformations in the
3760 basic blocks in BLOCKS. */
3762 static void
3763 tm_memopt_transform_blocks (vec<basic_block> blocks)
3765 size_t i;
3766 basic_block bb;
3767 gimple_stmt_iterator gsi;
3769 for (i = 0; blocks.iterate (i, &bb); ++i)
3771 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3773 gimple stmt = gsi_stmt (gsi);
3774 bitmap read_avail = READ_AVAIL_IN (bb);
3775 bitmap store_avail = STORE_AVAIL_IN (bb);
3776 bitmap store_antic = STORE_ANTIC_OUT (bb);
3777 unsigned int loc;
3779 if (is_tm_simple_load (stmt))
3781 loc = tm_memopt_value_number (stmt, NO_INSERT);
3782 if (store_avail && bitmap_bit_p (store_avail, loc))
3783 tm_memopt_transform_stmt (TRANSFORM_RAW, stmt, &gsi);
3784 else if (store_antic && bitmap_bit_p (store_antic, loc))
3786 tm_memopt_transform_stmt (TRANSFORM_RFW, stmt, &gsi);
3787 bitmap_set_bit (store_avail, loc);
3789 else if (read_avail && bitmap_bit_p (read_avail, loc))
3790 tm_memopt_transform_stmt (TRANSFORM_RAR, stmt, &gsi);
3791 else
3792 bitmap_set_bit (read_avail, loc);
3794 else if (is_tm_simple_store (stmt))
3796 loc = tm_memopt_value_number (stmt, NO_INSERT);
3797 if (store_avail && bitmap_bit_p (store_avail, loc))
3798 tm_memopt_transform_stmt (TRANSFORM_WAW, stmt, &gsi);
3799 else
3801 if (read_avail && bitmap_bit_p (read_avail, loc))
3802 tm_memopt_transform_stmt (TRANSFORM_WAR, stmt, &gsi);
3803 bitmap_set_bit (store_avail, loc);
3810 /* Return a new set of bitmaps for a BB. */
3812 static struct tm_memopt_bitmaps *
3813 tm_memopt_init_sets (void)
3815 struct tm_memopt_bitmaps *b
3816 = XOBNEW (&tm_memopt_obstack.obstack, struct tm_memopt_bitmaps);
3817 b->store_avail_in = BITMAP_ALLOC (&tm_memopt_obstack);
3818 b->store_avail_out = BITMAP_ALLOC (&tm_memopt_obstack);
3819 b->store_antic_in = BITMAP_ALLOC (&tm_memopt_obstack);
3820 b->store_antic_out = BITMAP_ALLOC (&tm_memopt_obstack);
3821 b->store_avail_out = BITMAP_ALLOC (&tm_memopt_obstack);
3822 b->read_avail_in = BITMAP_ALLOC (&tm_memopt_obstack);
3823 b->read_avail_out = BITMAP_ALLOC (&tm_memopt_obstack);
3824 b->read_local = BITMAP_ALLOC (&tm_memopt_obstack);
3825 b->store_local = BITMAP_ALLOC (&tm_memopt_obstack);
3826 return b;
3829 /* Free sets computed for each BB. */
3831 static void
3832 tm_memopt_free_sets (vec<basic_block> blocks)
3834 size_t i;
3835 basic_block bb;
3837 for (i = 0; blocks.iterate (i, &bb); ++i)
3838 bb->aux = NULL;
3841 /* Clear the visited bit for every basic block in BLOCKS. */
3843 static void
3844 tm_memopt_clear_visited (vec<basic_block> blocks)
3846 size_t i;
3847 basic_block bb;
3849 for (i = 0; blocks.iterate (i, &bb); ++i)
3850 BB_VISITED_P (bb) = false;
3853 /* Replace TM load/stores with hints for the runtime. We handle
3854 things like read-after-write, write-after-read, read-after-read,
3855 read-for-write, etc. */
3857 static unsigned int
3858 execute_tm_memopt (void)
3860 struct tm_region *region;
3861 vec<basic_block> bbs;
3863 tm_memopt_value_id = 0;
3864 tm_memopt_value_numbers.create (10);
3866 for (region = all_tm_regions; region; region = region->next)
3868 /* All the TM stores/loads in the current region. */
3869 size_t i;
3870 basic_block bb;
3872 bitmap_obstack_initialize (&tm_memopt_obstack);
3874 /* Save all BBs for the current region. */
3875 bbs = get_tm_region_blocks (region->entry_block,
3876 region->exit_blocks,
3877 region->irr_blocks,
3878 NULL,
3879 false);
3881 /* Collect all the memory operations. */
3882 for (i = 0; bbs.iterate (i, &bb); ++i)
3884 bb->aux = tm_memopt_init_sets ();
3885 tm_memopt_accumulate_memops (bb);
3888 /* Solve data flow equations and transform each block accordingly. */
3889 tm_memopt_clear_visited (bbs);
3890 tm_memopt_compute_available (region, bbs);
3891 tm_memopt_clear_visited (bbs);
3892 tm_memopt_compute_antic (region, bbs);
3893 tm_memopt_transform_blocks (bbs);
3895 tm_memopt_free_sets (bbs);
3896 bbs.release ();
3897 bitmap_obstack_release (&tm_memopt_obstack);
3898 tm_memopt_value_numbers.empty ();
3901 tm_memopt_value_numbers.dispose ();
3902 return 0;
3905 static bool
3906 gate_tm_memopt (void)
3908 return flag_tm && optimize > 0;
3911 namespace {
3913 const pass_data pass_data_tm_memopt =
3915 GIMPLE_PASS, /* type */
3916 "tmmemopt", /* name */
3917 OPTGROUP_NONE, /* optinfo_flags */
3918 true, /* has_gate */
3919 true, /* has_execute */
3920 TV_TRANS_MEM, /* tv_id */
3921 ( PROP_ssa | PROP_cfg ), /* properties_required */
3922 0, /* properties_provided */
3923 0, /* properties_destroyed */
3924 0, /* todo_flags_start */
3925 0, /* todo_flags_finish */
3928 class pass_tm_memopt : public gimple_opt_pass
3930 public:
3931 pass_tm_memopt (gcc::context *ctxt)
3932 : gimple_opt_pass (pass_data_tm_memopt, ctxt)
3935 /* opt_pass methods: */
3936 bool gate () { return gate_tm_memopt (); }
3937 unsigned int execute () { return execute_tm_memopt (); }
3939 }; // class pass_tm_memopt
3941 } // anon namespace
3943 gimple_opt_pass *
3944 make_pass_tm_memopt (gcc::context *ctxt)
3946 return new pass_tm_memopt (ctxt);
3950 /* Interprocedual analysis for the creation of transactional clones.
3951 The aim of this pass is to find which functions are referenced in
3952 a non-irrevocable transaction context, and for those over which
3953 we have control (or user directive), create a version of the
3954 function which uses only the transactional interface to reference
3955 protected memories. This analysis proceeds in several steps:
3957 (1) Collect the set of all possible transactional clones:
3959 (a) For all local public functions marked tm_callable, push
3960 it onto the tm_callee queue.
3962 (b) For all local functions, scan for calls in transaction blocks.
3963 Push the caller and callee onto the tm_caller and tm_callee
3964 queues. Count the number of callers for each callee.
3966 (c) For each local function on the callee list, assume we will
3967 create a transactional clone. Push *all* calls onto the
3968 callee queues; count the number of clone callers separately
3969 to the number of original callers.
3971 (2) Propagate irrevocable status up the dominator tree:
3973 (a) Any external function on the callee list that is not marked
3974 tm_callable is irrevocable. Push all callers of such onto
3975 a worklist.
3977 (b) For each function on the worklist, mark each block that
3978 contains an irrevocable call. Use the AND operator to
3979 propagate that mark up the dominator tree.
3981 (c) If we reach the entry block for a possible transactional
3982 clone, then the transactional clone is irrevocable, and
3983 we should not create the clone after all. Push all
3984 callers onto the worklist.
3986 (d) Place tm_irrevocable calls at the beginning of the relevant
3987 blocks. Special case here is the entry block for the entire
3988 transaction region; there we mark it GTMA_DOES_GO_IRREVOCABLE for
3989 the library to begin the region in serial mode. Decrement
3990 the call count for all callees in the irrevocable region.
3992 (3) Create the transactional clones:
3994 Any tm_callee that still has a non-zero call count is cloned.
3997 /* This structure is stored in the AUX field of each cgraph_node. */
3998 struct tm_ipa_cg_data
4000 /* The clone of the function that got created. */
4001 struct cgraph_node *clone;
4003 /* The tm regions in the normal function. */
4004 struct tm_region *all_tm_regions;
4006 /* The blocks of the normal/clone functions that contain irrevocable
4007 calls, or blocks that are post-dominated by irrevocable calls. */
4008 bitmap irrevocable_blocks_normal;
4009 bitmap irrevocable_blocks_clone;
4011 /* The blocks of the normal function that are involved in transactions. */
4012 bitmap transaction_blocks_normal;
4014 /* The number of callers to the transactional clone of this function
4015 from normal and transactional clones respectively. */
4016 unsigned tm_callers_normal;
4017 unsigned tm_callers_clone;
4019 /* True if all calls to this function's transactional clone
4020 are irrevocable. Also automatically true if the function
4021 has no transactional clone. */
4022 bool is_irrevocable;
4024 /* Flags indicating the presence of this function in various queues. */
4025 bool in_callee_queue;
4026 bool in_worklist;
4028 /* Flags indicating the kind of scan desired while in the worklist. */
4029 bool want_irr_scan_normal;
4032 typedef vec<cgraph_node_ptr> cgraph_node_queue;
4034 /* Return the ipa data associated with NODE, allocating zeroed memory
4035 if necessary. TRAVERSE_ALIASES is true if we must traverse aliases
4036 and set *NODE accordingly. */
4038 static struct tm_ipa_cg_data *
4039 get_cg_data (struct cgraph_node **node, bool traverse_aliases)
4041 struct tm_ipa_cg_data *d;
4043 if (traverse_aliases && (*node)->alias)
4044 *node = cgraph_alias_target (*node);
4046 d = (struct tm_ipa_cg_data *) (*node)->aux;
4048 if (d == NULL)
4050 d = (struct tm_ipa_cg_data *)
4051 obstack_alloc (&tm_obstack.obstack, sizeof (*d));
4052 (*node)->aux = (void *) d;
4053 memset (d, 0, sizeof (*d));
4056 return d;
4059 /* Add NODE to the end of QUEUE, unless IN_QUEUE_P indicates that
4060 it is already present. */
4062 static void
4063 maybe_push_queue (struct cgraph_node *node,
4064 cgraph_node_queue *queue_p, bool *in_queue_p)
4066 if (!*in_queue_p)
4068 *in_queue_p = true;
4069 queue_p->safe_push (node);
4073 /* Duplicate the basic blocks in QUEUE for use in the uninstrumented
4074 code path. QUEUE are the basic blocks inside the transaction
4075 represented in REGION.
4077 Later in split_code_paths() we will add the conditional to choose
4078 between the two alternatives. */
4080 static void
4081 ipa_uninstrument_transaction (struct tm_region *region,
4082 vec<basic_block> queue)
4084 gimple transaction = region->transaction_stmt;
4085 basic_block transaction_bb = gimple_bb (transaction);
4086 int n = queue.length ();
4087 basic_block *new_bbs = XNEWVEC (basic_block, n);
4089 copy_bbs (queue.address (), n, new_bbs, NULL, 0, NULL, NULL, transaction_bb,
4090 true);
4091 edge e = make_edge (transaction_bb, new_bbs[0], EDGE_TM_UNINSTRUMENTED);
4092 add_phi_args_after_copy (new_bbs, n, e);
4094 // Now we will have a GIMPLE_ATOMIC with 3 possible edges out of it.
4095 // a) EDGE_FALLTHRU into the transaction
4096 // b) EDGE_TM_ABORT out of the transaction
4097 // c) EDGE_TM_UNINSTRUMENTED into the uninstrumented blocks.
4099 free (new_bbs);
4102 /* A subroutine of ipa_tm_scan_calls_transaction and ipa_tm_scan_calls_clone.
4103 Queue all callees within block BB. */
4105 static void
4106 ipa_tm_scan_calls_block (cgraph_node_queue *callees_p,
4107 basic_block bb, bool for_clone)
4109 gimple_stmt_iterator gsi;
4111 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4113 gimple stmt = gsi_stmt (gsi);
4114 if (is_gimple_call (stmt) && !is_tm_pure_call (stmt))
4116 tree fndecl = gimple_call_fndecl (stmt);
4117 if (fndecl)
4119 struct tm_ipa_cg_data *d;
4120 unsigned *pcallers;
4121 struct cgraph_node *node;
4123 if (is_tm_ending_fndecl (fndecl))
4124 continue;
4125 if (find_tm_replacement_function (fndecl))
4126 continue;
4128 node = cgraph_get_node (fndecl);
4129 gcc_assert (node != NULL);
4130 d = get_cg_data (&node, true);
4132 pcallers = (for_clone ? &d->tm_callers_clone
4133 : &d->tm_callers_normal);
4134 *pcallers += 1;
4136 maybe_push_queue (node, callees_p, &d->in_callee_queue);
4142 /* Scan all calls in NODE that are within a transaction region,
4143 and push the resulting nodes into the callee queue. */
4145 static void
4146 ipa_tm_scan_calls_transaction (struct tm_ipa_cg_data *d,
4147 cgraph_node_queue *callees_p)
4149 struct tm_region *r;
4151 d->transaction_blocks_normal = BITMAP_ALLOC (&tm_obstack);
4152 d->all_tm_regions = all_tm_regions;
4154 for (r = all_tm_regions; r; r = r->next)
4156 vec<basic_block> bbs;
4157 basic_block bb;
4158 unsigned i;
4160 bbs = get_tm_region_blocks (r->entry_block, r->exit_blocks, NULL,
4161 d->transaction_blocks_normal, false);
4163 // Generate the uninstrumented code path for this transaction.
4164 ipa_uninstrument_transaction (r, bbs);
4166 FOR_EACH_VEC_ELT (bbs, i, bb)
4167 ipa_tm_scan_calls_block (callees_p, bb, false);
4169 bbs.release ();
4172 // ??? copy_bbs should maintain cgraph edges for the blocks as it is
4173 // copying them, rather than forcing us to do this externally.
4174 rebuild_cgraph_edges ();
4176 // ??? In ipa_uninstrument_transaction we don't try to update dominators
4177 // because copy_bbs doesn't return a VEC like iterate_fix_dominators expects.
4178 // Instead, just release dominators here so update_ssa recomputes them.
4179 free_dominance_info (CDI_DOMINATORS);
4181 // When building the uninstrumented code path, copy_bbs will have invoked
4182 // create_new_def_for starting an "ssa update context". There is only one
4183 // instance of this context, so resolve ssa updates before moving on to
4184 // the next function.
4185 update_ssa (TODO_update_ssa);
4188 /* Scan all calls in NODE as if this is the transactional clone,
4189 and push the destinations into the callee queue. */
4191 static void
4192 ipa_tm_scan_calls_clone (struct cgraph_node *node,
4193 cgraph_node_queue *callees_p)
4195 struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
4196 basic_block bb;
4198 FOR_EACH_BB_FN (bb, fn)
4199 ipa_tm_scan_calls_block (callees_p, bb, true);
4202 /* The function NODE has been detected to be irrevocable. Push all
4203 of its callers onto WORKLIST for the purpose of re-scanning them. */
4205 static void
4206 ipa_tm_note_irrevocable (struct cgraph_node *node,
4207 cgraph_node_queue *worklist_p)
4209 struct tm_ipa_cg_data *d = get_cg_data (&node, true);
4210 struct cgraph_edge *e;
4212 d->is_irrevocable = true;
4214 for (e = node->callers; e ; e = e->next_caller)
4216 basic_block bb;
4217 struct cgraph_node *caller;
4219 /* Don't examine recursive calls. */
4220 if (e->caller == node)
4221 continue;
4222 /* Even if we think we can go irrevocable, believe the user
4223 above all. */
4224 if (is_tm_safe_or_pure (e->caller->decl))
4225 continue;
4227 caller = e->caller;
4228 d = get_cg_data (&caller, true);
4230 /* Check if the callee is in a transactional region. If so,
4231 schedule the function for normal re-scan as well. */
4232 bb = gimple_bb (e->call_stmt);
4233 gcc_assert (bb != NULL);
4234 if (d->transaction_blocks_normal
4235 && bitmap_bit_p (d->transaction_blocks_normal, bb->index))
4236 d->want_irr_scan_normal = true;
4238 maybe_push_queue (caller, worklist_p, &d->in_worklist);
4242 /* A subroutine of ipa_tm_scan_irr_blocks; return true iff any statement
4243 within the block is irrevocable. */
4245 static bool
4246 ipa_tm_scan_irr_block (basic_block bb)
4248 gimple_stmt_iterator gsi;
4249 tree fn;
4251 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4253 gimple stmt = gsi_stmt (gsi);
4254 switch (gimple_code (stmt))
4256 case GIMPLE_ASSIGN:
4257 if (gimple_assign_single_p (stmt))
4259 tree lhs = gimple_assign_lhs (stmt);
4260 tree rhs = gimple_assign_rhs1 (stmt);
4261 if (volatile_var_p (lhs) || volatile_var_p (rhs))
4262 return true;
4264 break;
4266 case GIMPLE_CALL:
4268 tree lhs = gimple_call_lhs (stmt);
4269 if (lhs && volatile_var_p (lhs))
4270 return true;
4272 if (is_tm_pure_call (stmt))
4273 break;
4275 fn = gimple_call_fn (stmt);
4277 /* Functions with the attribute are by definition irrevocable. */
4278 if (is_tm_irrevocable (fn))
4279 return true;
4281 /* For direct function calls, go ahead and check for replacement
4282 functions, or transitive irrevocable functions. For indirect
4283 functions, we'll ask the runtime. */
4284 if (TREE_CODE (fn) == ADDR_EXPR)
4286 struct tm_ipa_cg_data *d;
4287 struct cgraph_node *node;
4289 fn = TREE_OPERAND (fn, 0);
4290 if (is_tm_ending_fndecl (fn))
4291 break;
4292 if (find_tm_replacement_function (fn))
4293 break;
4295 node = cgraph_get_node (fn);
4296 d = get_cg_data (&node, true);
4298 /* Return true if irrevocable, but above all, believe
4299 the user. */
4300 if (d->is_irrevocable
4301 && !is_tm_safe_or_pure (fn))
4302 return true;
4304 break;
4307 case GIMPLE_ASM:
4308 /* ??? The Approved Method of indicating that an inline
4309 assembly statement is not relevant to the transaction
4310 is to wrap it in a __tm_waiver block. This is not
4311 yet implemented, so we can't check for it. */
4312 if (is_tm_safe (current_function_decl))
4314 tree t = build1 (NOP_EXPR, void_type_node, size_zero_node);
4315 SET_EXPR_LOCATION (t, gimple_location (stmt));
4316 error ("%Kasm not allowed in %<transaction_safe%> function", t);
4318 return true;
4320 default:
4321 break;
4325 return false;
4328 /* For each of the blocks seeded witin PQUEUE, walk the CFG looking
4329 for new irrevocable blocks, marking them in NEW_IRR. Don't bother
4330 scanning past OLD_IRR or EXIT_BLOCKS. */
4332 static bool
4333 ipa_tm_scan_irr_blocks (vec<basic_block> *pqueue, bitmap new_irr,
4334 bitmap old_irr, bitmap exit_blocks)
4336 bool any_new_irr = false;
4337 edge e;
4338 edge_iterator ei;
4339 bitmap visited_blocks = BITMAP_ALLOC (NULL);
4343 basic_block bb = pqueue->pop ();
4345 /* Don't re-scan blocks we know already are irrevocable. */
4346 if (old_irr && bitmap_bit_p (old_irr, bb->index))
4347 continue;
4349 if (ipa_tm_scan_irr_block (bb))
4351 bitmap_set_bit (new_irr, bb->index);
4352 any_new_irr = true;
4354 else if (exit_blocks == NULL || !bitmap_bit_p (exit_blocks, bb->index))
4356 FOR_EACH_EDGE (e, ei, bb->succs)
4357 if (!bitmap_bit_p (visited_blocks, e->dest->index))
4359 bitmap_set_bit (visited_blocks, e->dest->index);
4360 pqueue->safe_push (e->dest);
4364 while (!pqueue->is_empty ());
4366 BITMAP_FREE (visited_blocks);
4368 return any_new_irr;
4371 /* Propagate the irrevocable property both up and down the dominator tree.
4372 BB is the current block being scanned; EXIT_BLOCKS are the edges of the
4373 TM regions; OLD_IRR are the results of a previous scan of the dominator
4374 tree which has been fully propagated; NEW_IRR is the set of new blocks
4375 which are gaining the irrevocable property during the current scan. */
4377 static void
4378 ipa_tm_propagate_irr (basic_block entry_block, bitmap new_irr,
4379 bitmap old_irr, bitmap exit_blocks)
4381 vec<basic_block> bbs;
4382 bitmap all_region_blocks;
4384 /* If this block is in the old set, no need to rescan. */
4385 if (old_irr && bitmap_bit_p (old_irr, entry_block->index))
4386 return;
4388 all_region_blocks = BITMAP_ALLOC (&tm_obstack);
4389 bbs = get_tm_region_blocks (entry_block, exit_blocks, NULL,
4390 all_region_blocks, false);
4393 basic_block bb = bbs.pop ();
4394 bool this_irr = bitmap_bit_p (new_irr, bb->index);
4395 bool all_son_irr = false;
4396 edge_iterator ei;
4397 edge e;
4399 /* Propagate up. If my children are, I am too, but we must have
4400 at least one child that is. */
4401 if (!this_irr)
4403 FOR_EACH_EDGE (e, ei, bb->succs)
4405 if (!bitmap_bit_p (new_irr, e->dest->index))
4407 all_son_irr = false;
4408 break;
4410 else
4411 all_son_irr = true;
4413 if (all_son_irr)
4415 /* Add block to new_irr if it hasn't already been processed. */
4416 if (!old_irr || !bitmap_bit_p (old_irr, bb->index))
4418 bitmap_set_bit (new_irr, bb->index);
4419 this_irr = true;
4424 /* Propagate down to everyone we immediately dominate. */
4425 if (this_irr)
4427 basic_block son;
4428 for (son = first_dom_son (CDI_DOMINATORS, bb);
4429 son;
4430 son = next_dom_son (CDI_DOMINATORS, son))
4432 /* Make sure block is actually in a TM region, and it
4433 isn't already in old_irr. */
4434 if ((!old_irr || !bitmap_bit_p (old_irr, son->index))
4435 && bitmap_bit_p (all_region_blocks, son->index))
4436 bitmap_set_bit (new_irr, son->index);
4440 while (!bbs.is_empty ());
4442 BITMAP_FREE (all_region_blocks);
4443 bbs.release ();
4446 static void
4447 ipa_tm_decrement_clone_counts (basic_block bb, bool for_clone)
4449 gimple_stmt_iterator gsi;
4451 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4453 gimple stmt = gsi_stmt (gsi);
4454 if (is_gimple_call (stmt) && !is_tm_pure_call (stmt))
4456 tree fndecl = gimple_call_fndecl (stmt);
4457 if (fndecl)
4459 struct tm_ipa_cg_data *d;
4460 unsigned *pcallers;
4461 struct cgraph_node *tnode;
4463 if (is_tm_ending_fndecl (fndecl))
4464 continue;
4465 if (find_tm_replacement_function (fndecl))
4466 continue;
4468 tnode = cgraph_get_node (fndecl);
4469 d = get_cg_data (&tnode, true);
4471 pcallers = (for_clone ? &d->tm_callers_clone
4472 : &d->tm_callers_normal);
4474 gcc_assert (*pcallers > 0);
4475 *pcallers -= 1;
4481 /* (Re-)Scan the transaction blocks in NODE for calls to irrevocable functions,
4482 as well as other irrevocable actions such as inline assembly. Mark all
4483 such blocks as irrevocable and decrement the number of calls to
4484 transactional clones. Return true if, for the transactional clone, the
4485 entire function is irrevocable. */
4487 static bool
4488 ipa_tm_scan_irr_function (struct cgraph_node *node, bool for_clone)
4490 struct tm_ipa_cg_data *d;
4491 bitmap new_irr, old_irr;
4492 vec<basic_block> queue;
4493 bool ret = false;
4495 /* Builtin operators (operator new, and such). */
4496 if (DECL_STRUCT_FUNCTION (node->decl) == NULL
4497 || DECL_STRUCT_FUNCTION (node->decl)->cfg == NULL)
4498 return false;
4500 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4501 calculate_dominance_info (CDI_DOMINATORS);
4503 d = get_cg_data (&node, true);
4504 queue.create (10);
4505 new_irr = BITMAP_ALLOC (&tm_obstack);
4507 /* Scan each tm region, propagating irrevocable status through the tree. */
4508 if (for_clone)
4510 old_irr = d->irrevocable_blocks_clone;
4511 queue.quick_push (single_succ (ENTRY_BLOCK_PTR));
4512 if (ipa_tm_scan_irr_blocks (&queue, new_irr, old_irr, NULL))
4514 ipa_tm_propagate_irr (single_succ (ENTRY_BLOCK_PTR), new_irr,
4515 old_irr, NULL);
4516 ret = bitmap_bit_p (new_irr, single_succ (ENTRY_BLOCK_PTR)->index);
4519 else
4521 struct tm_region *region;
4523 old_irr = d->irrevocable_blocks_normal;
4524 for (region = d->all_tm_regions; region; region = region->next)
4526 queue.quick_push (region->entry_block);
4527 if (ipa_tm_scan_irr_blocks (&queue, new_irr, old_irr,
4528 region->exit_blocks))
4529 ipa_tm_propagate_irr (region->entry_block, new_irr, old_irr,
4530 region->exit_blocks);
4534 /* If we found any new irrevocable blocks, reduce the call count for
4535 transactional clones within the irrevocable blocks. Save the new
4536 set of irrevocable blocks for next time. */
4537 if (!bitmap_empty_p (new_irr))
4539 bitmap_iterator bmi;
4540 unsigned i;
4542 EXECUTE_IF_SET_IN_BITMAP (new_irr, 0, i, bmi)
4543 ipa_tm_decrement_clone_counts (BASIC_BLOCK (i), for_clone);
4545 if (old_irr)
4547 bitmap_ior_into (old_irr, new_irr);
4548 BITMAP_FREE (new_irr);
4550 else if (for_clone)
4551 d->irrevocable_blocks_clone = new_irr;
4552 else
4553 d->irrevocable_blocks_normal = new_irr;
4555 if (dump_file && new_irr)
4557 const char *dname;
4558 bitmap_iterator bmi;
4559 unsigned i;
4561 dname = lang_hooks.decl_printable_name (current_function_decl, 2);
4562 EXECUTE_IF_SET_IN_BITMAP (new_irr, 0, i, bmi)
4563 fprintf (dump_file, "%s: bb %d goes irrevocable\n", dname, i);
4566 else
4567 BITMAP_FREE (new_irr);
4569 queue.release ();
4570 pop_cfun ();
4572 return ret;
4575 /* Return true if, for the transactional clone of NODE, any call
4576 may enter irrevocable mode. */
4578 static bool
4579 ipa_tm_mayenterirr_function (struct cgraph_node *node)
4581 struct tm_ipa_cg_data *d;
4582 tree decl;
4583 unsigned flags;
4585 d = get_cg_data (&node, true);
4586 decl = node->decl;
4587 flags = flags_from_decl_or_type (decl);
4589 /* Handle some TM builtins. Ordinarily these aren't actually generated
4590 at this point, but handling these functions when written in by the
4591 user makes it easier to build unit tests. */
4592 if (flags & ECF_TM_BUILTIN)
4593 return false;
4595 /* Filter out all functions that are marked. */
4596 if (flags & ECF_TM_PURE)
4597 return false;
4598 if (is_tm_safe (decl))
4599 return false;
4600 if (is_tm_irrevocable (decl))
4601 return true;
4602 if (is_tm_callable (decl))
4603 return true;
4604 if (find_tm_replacement_function (decl))
4605 return true;
4607 /* If we aren't seeing the final version of the function we don't
4608 know what it will contain at runtime. */
4609 if (cgraph_function_body_availability (node) < AVAIL_AVAILABLE)
4610 return true;
4612 /* If the function must go irrevocable, then of course true. */
4613 if (d->is_irrevocable)
4614 return true;
4616 /* If there are any blocks marked irrevocable, then the function
4617 as a whole may enter irrevocable. */
4618 if (d->irrevocable_blocks_clone)
4619 return true;
4621 /* We may have previously marked this function as tm_may_enter_irr;
4622 see pass_diagnose_tm_blocks. */
4623 if (node->local.tm_may_enter_irr)
4624 return true;
4626 /* Recurse on the main body for aliases. In general, this will
4627 result in one of the bits above being set so that we will not
4628 have to recurse next time. */
4629 if (node->alias)
4630 return ipa_tm_mayenterirr_function (cgraph_get_node (node->thunk.alias));
4632 /* What remains is unmarked local functions without items that force
4633 the function to go irrevocable. */
4634 return false;
4637 /* Diagnose calls from transaction_safe functions to unmarked
4638 functions that are determined to not be safe. */
4640 static void
4641 ipa_tm_diagnose_tm_safe (struct cgraph_node *node)
4643 struct cgraph_edge *e;
4645 for (e = node->callees; e ; e = e->next_callee)
4646 if (!is_tm_callable (e->callee->decl)
4647 && e->callee->local.tm_may_enter_irr)
4648 error_at (gimple_location (e->call_stmt),
4649 "unsafe function call %qD within "
4650 "%<transaction_safe%> function", e->callee->decl);
4653 /* Diagnose call from atomic transactions to unmarked functions
4654 that are determined to not be safe. */
4656 static void
4657 ipa_tm_diagnose_transaction (struct cgraph_node *node,
4658 struct tm_region *all_tm_regions)
4660 struct tm_region *r;
4662 for (r = all_tm_regions; r ; r = r->next)
4663 if (gimple_transaction_subcode (r->transaction_stmt) & GTMA_IS_RELAXED)
4665 /* Atomic transactions can be nested inside relaxed. */
4666 if (r->inner)
4667 ipa_tm_diagnose_transaction (node, r->inner);
4669 else
4671 vec<basic_block> bbs;
4672 gimple_stmt_iterator gsi;
4673 basic_block bb;
4674 size_t i;
4676 bbs = get_tm_region_blocks (r->entry_block, r->exit_blocks,
4677 r->irr_blocks, NULL, false);
4679 for (i = 0; bbs.iterate (i, &bb); ++i)
4680 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4682 gimple stmt = gsi_stmt (gsi);
4683 tree fndecl;
4685 if (gimple_code (stmt) == GIMPLE_ASM)
4687 error_at (gimple_location (stmt),
4688 "asm not allowed in atomic transaction");
4689 continue;
4692 if (!is_gimple_call (stmt))
4693 continue;
4694 fndecl = gimple_call_fndecl (stmt);
4696 /* Indirect function calls have been diagnosed already. */
4697 if (!fndecl)
4698 continue;
4700 /* Stop at the end of the transaction. */
4701 if (is_tm_ending_fndecl (fndecl))
4703 if (bitmap_bit_p (r->exit_blocks, bb->index))
4704 break;
4705 continue;
4708 /* Marked functions have been diagnosed already. */
4709 if (is_tm_pure_call (stmt))
4710 continue;
4711 if (is_tm_callable (fndecl))
4712 continue;
4714 if (cgraph_local_info (fndecl)->tm_may_enter_irr)
4715 error_at (gimple_location (stmt),
4716 "unsafe function call %qD within "
4717 "atomic transaction", fndecl);
4720 bbs.release ();
4724 /* Return a transactional mangled name for the DECL_ASSEMBLER_NAME in
4725 OLD_DECL. The returned value is a freshly malloced pointer that
4726 should be freed by the caller. */
4728 static tree
4729 tm_mangle (tree old_asm_id)
4731 const char *old_asm_name;
4732 char *tm_name;
4733 void *alloc = NULL;
4734 struct demangle_component *dc;
4735 tree new_asm_id;
4737 /* Determine if the symbol is already a valid C++ mangled name. Do this
4738 even for C, which might be interfacing with C++ code via appropriately
4739 ugly identifiers. */
4740 /* ??? We could probably do just as well checking for "_Z" and be done. */
4741 old_asm_name = IDENTIFIER_POINTER (old_asm_id);
4742 dc = cplus_demangle_v3_components (old_asm_name, DMGL_NO_OPTS, &alloc);
4744 if (dc == NULL)
4746 char length[8];
4748 do_unencoded:
4749 sprintf (length, "%u", IDENTIFIER_LENGTH (old_asm_id));
4750 tm_name = concat ("_ZGTt", length, old_asm_name, NULL);
4752 else
4754 old_asm_name += 2; /* Skip _Z */
4756 switch (dc->type)
4758 case DEMANGLE_COMPONENT_TRANSACTION_CLONE:
4759 case DEMANGLE_COMPONENT_NONTRANSACTION_CLONE:
4760 /* Don't play silly games, you! */
4761 goto do_unencoded;
4763 case DEMANGLE_COMPONENT_HIDDEN_ALIAS:
4764 /* I'd really like to know if we can ever be passed one of
4765 these from the C++ front end. The Logical Thing would
4766 seem that hidden-alias should be outer-most, so that we
4767 get hidden-alias of a transaction-clone and not vice-versa. */
4768 old_asm_name += 2;
4769 break;
4771 default:
4772 break;
4775 tm_name = concat ("_ZGTt", old_asm_name, NULL);
4777 free (alloc);
4779 new_asm_id = get_identifier (tm_name);
4780 free (tm_name);
4782 return new_asm_id;
4785 static inline void
4786 ipa_tm_mark_force_output_node (struct cgraph_node *node)
4788 cgraph_mark_force_output_node (node);
4789 node->analyzed = true;
4792 static inline void
4793 ipa_tm_mark_forced_by_abi_node (struct cgraph_node *node)
4795 node->forced_by_abi = true;
4796 node->analyzed = true;
4799 /* Callback data for ipa_tm_create_version_alias. */
4800 struct create_version_alias_info
4802 struct cgraph_node *old_node;
4803 tree new_decl;
4806 /* A subroutine of ipa_tm_create_version, called via
4807 cgraph_for_node_and_aliases. Create new tm clones for each of
4808 the existing aliases. */
4809 static bool
4810 ipa_tm_create_version_alias (struct cgraph_node *node, void *data)
4812 struct create_version_alias_info *info
4813 = (struct create_version_alias_info *)data;
4814 tree old_decl, new_decl, tm_name;
4815 struct cgraph_node *new_node;
4817 if (!node->cpp_implicit_alias)
4818 return false;
4820 old_decl = node->decl;
4821 tm_name = tm_mangle (DECL_ASSEMBLER_NAME (old_decl));
4822 new_decl = build_decl (DECL_SOURCE_LOCATION (old_decl),
4823 TREE_CODE (old_decl), tm_name,
4824 TREE_TYPE (old_decl));
4826 SET_DECL_ASSEMBLER_NAME (new_decl, tm_name);
4827 SET_DECL_RTL (new_decl, NULL);
4829 /* Based loosely on C++'s make_alias_for(). */
4830 TREE_PUBLIC (new_decl) = TREE_PUBLIC (old_decl);
4831 DECL_CONTEXT (new_decl) = DECL_CONTEXT (old_decl);
4832 DECL_LANG_SPECIFIC (new_decl) = DECL_LANG_SPECIFIC (old_decl);
4833 TREE_READONLY (new_decl) = TREE_READONLY (old_decl);
4834 DECL_EXTERNAL (new_decl) = 0;
4835 DECL_ARTIFICIAL (new_decl) = 1;
4836 TREE_ADDRESSABLE (new_decl) = 1;
4837 TREE_USED (new_decl) = 1;
4838 TREE_SYMBOL_REFERENCED (tm_name) = 1;
4840 /* Perform the same remapping to the comdat group. */
4841 if (DECL_ONE_ONLY (new_decl))
4842 DECL_COMDAT_GROUP (new_decl) = tm_mangle (DECL_COMDAT_GROUP (old_decl));
4844 new_node = cgraph_same_body_alias (NULL, new_decl, info->new_decl);
4845 new_node->tm_clone = true;
4846 new_node->externally_visible = info->old_node->externally_visible;
4847 /* ?? Do not traverse aliases here. */
4848 get_cg_data (&node, false)->clone = new_node;
4850 record_tm_clone_pair (old_decl, new_decl);
4852 if (info->old_node->force_output
4853 || ipa_ref_list_first_referring (&info->old_node->ref_list))
4854 ipa_tm_mark_force_output_node (new_node);
4855 if (info->old_node->forced_by_abi)
4856 ipa_tm_mark_forced_by_abi_node (new_node);
4857 return false;
4860 /* Create a copy of the function (possibly declaration only) of OLD_NODE,
4861 appropriate for the transactional clone. */
4863 static void
4864 ipa_tm_create_version (struct cgraph_node *old_node)
4866 tree new_decl, old_decl, tm_name;
4867 struct cgraph_node *new_node;
4869 old_decl = old_node->decl;
4870 new_decl = copy_node (old_decl);
4872 /* DECL_ASSEMBLER_NAME needs to be set before we call
4873 cgraph_copy_node_for_versioning below, because cgraph_node will
4874 fill the assembler_name_hash. */
4875 tm_name = tm_mangle (DECL_ASSEMBLER_NAME (old_decl));
4876 SET_DECL_ASSEMBLER_NAME (new_decl, tm_name);
4877 SET_DECL_RTL (new_decl, NULL);
4878 TREE_SYMBOL_REFERENCED (tm_name) = 1;
4880 /* Perform the same remapping to the comdat group. */
4881 if (DECL_ONE_ONLY (new_decl))
4882 DECL_COMDAT_GROUP (new_decl) = tm_mangle (DECL_COMDAT_GROUP (old_decl));
4884 new_node = cgraph_copy_node_for_versioning (old_node, new_decl, vNULL, NULL);
4885 new_node->local.local = false;
4886 new_node->externally_visible = old_node->externally_visible;
4887 new_node->lowered = true;
4888 new_node->tm_clone = 1;
4889 get_cg_data (&old_node, true)->clone = new_node;
4891 if (cgraph_function_body_availability (old_node) >= AVAIL_OVERWRITABLE)
4893 /* Remap extern inline to static inline. */
4894 /* ??? Is it worth trying to use make_decl_one_only? */
4895 if (DECL_DECLARED_INLINE_P (new_decl) && DECL_EXTERNAL (new_decl))
4897 DECL_EXTERNAL (new_decl) = 0;
4898 TREE_PUBLIC (new_decl) = 0;
4899 DECL_WEAK (new_decl) = 0;
4902 tree_function_versioning (old_decl, new_decl,
4903 NULL, false, NULL,
4904 false, NULL, NULL);
4907 record_tm_clone_pair (old_decl, new_decl);
4909 cgraph_call_function_insertion_hooks (new_node);
4910 if (old_node->force_output
4911 || ipa_ref_list_first_referring (&old_node->ref_list))
4912 ipa_tm_mark_force_output_node (new_node);
4913 if (old_node->forced_by_abi)
4914 ipa_tm_mark_forced_by_abi_node (new_node);
4916 /* Do the same thing, but for any aliases of the original node. */
4918 struct create_version_alias_info data;
4919 data.old_node = old_node;
4920 data.new_decl = new_decl;
4921 cgraph_for_node_and_aliases (old_node, ipa_tm_create_version_alias,
4922 &data, true);
4926 /* Construct a call to TM_IRREVOCABLE and insert it at the beginning of BB. */
4928 static void
4929 ipa_tm_insert_irr_call (struct cgraph_node *node, struct tm_region *region,
4930 basic_block bb)
4932 gimple_stmt_iterator gsi;
4933 gimple g;
4935 transaction_subcode_ior (region, GTMA_MAY_ENTER_IRREVOCABLE);
4937 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_TM_IRREVOCABLE),
4938 1, build_int_cst (NULL_TREE, MODE_SERIALIRREVOCABLE));
4940 split_block_after_labels (bb);
4941 gsi = gsi_after_labels (bb);
4942 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4944 cgraph_create_edge (node,
4945 cgraph_get_create_node
4946 (builtin_decl_explicit (BUILT_IN_TM_IRREVOCABLE)),
4947 g, 0,
4948 compute_call_stmt_bb_frequency (node->decl,
4949 gimple_bb (g)));
4952 /* Construct a call to TM_GETTMCLONE and insert it before GSI. */
4954 static bool
4955 ipa_tm_insert_gettmclone_call (struct cgraph_node *node,
4956 struct tm_region *region,
4957 gimple_stmt_iterator *gsi, gimple stmt)
4959 tree gettm_fn, ret, old_fn, callfn;
4960 gimple g, g2;
4961 bool safe;
4963 old_fn = gimple_call_fn (stmt);
4965 if (TREE_CODE (old_fn) == ADDR_EXPR)
4967 tree fndecl = TREE_OPERAND (old_fn, 0);
4968 tree clone = get_tm_clone_pair (fndecl);
4970 /* By transforming the call into a TM_GETTMCLONE, we are
4971 technically taking the address of the original function and
4972 its clone. Explain this so inlining will know this function
4973 is needed. */
4974 cgraph_mark_address_taken_node (cgraph_get_node (fndecl));
4975 if (clone)
4976 cgraph_mark_address_taken_node (cgraph_get_node (clone));
4979 safe = is_tm_safe (TREE_TYPE (old_fn));
4980 gettm_fn = builtin_decl_explicit (safe ? BUILT_IN_TM_GETTMCLONE_SAFE
4981 : BUILT_IN_TM_GETTMCLONE_IRR);
4982 ret = create_tmp_var (ptr_type_node, NULL);
4984 if (!safe)
4985 transaction_subcode_ior (region, GTMA_MAY_ENTER_IRREVOCABLE);
4987 /* Discard OBJ_TYPE_REF, since we weren't able to fold it. */
4988 if (TREE_CODE (old_fn) == OBJ_TYPE_REF)
4989 old_fn = OBJ_TYPE_REF_EXPR (old_fn);
4991 g = gimple_build_call (gettm_fn, 1, old_fn);
4992 ret = make_ssa_name (ret, g);
4993 gimple_call_set_lhs (g, ret);
4995 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4997 cgraph_create_edge (node, cgraph_get_create_node (gettm_fn), g, 0,
4998 compute_call_stmt_bb_frequency (node->decl,
4999 gimple_bb (g)));
5001 /* Cast return value from tm_gettmclone* into appropriate function
5002 pointer. */
5003 callfn = create_tmp_var (TREE_TYPE (old_fn), NULL);
5004 g2 = gimple_build_assign (callfn,
5005 fold_build1 (NOP_EXPR, TREE_TYPE (callfn), ret));
5006 callfn = make_ssa_name (callfn, g2);
5007 gimple_assign_set_lhs (g2, callfn);
5008 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
5010 /* ??? This is a hack to preserve the NOTHROW bit on the call,
5011 which we would have derived from the decl. Failure to save
5012 this bit means we might have to split the basic block. */
5013 if (gimple_call_nothrow_p (stmt))
5014 gimple_call_set_nothrow (stmt, true);
5016 gimple_call_set_fn (stmt, callfn);
5018 /* Discarding OBJ_TYPE_REF above may produce incompatible LHS and RHS
5019 for a call statement. Fix it. */
5021 tree lhs = gimple_call_lhs (stmt);
5022 tree rettype = TREE_TYPE (gimple_call_fntype (stmt));
5023 if (lhs
5024 && !useless_type_conversion_p (TREE_TYPE (lhs), rettype))
5026 tree temp;
5028 temp = create_tmp_reg (rettype, 0);
5029 gimple_call_set_lhs (stmt, temp);
5031 g2 = gimple_build_assign (lhs,
5032 fold_build1 (VIEW_CONVERT_EXPR,
5033 TREE_TYPE (lhs), temp));
5034 gsi_insert_after (gsi, g2, GSI_SAME_STMT);
5038 update_stmt (stmt);
5040 return true;
5043 /* Helper function for ipa_tm_transform_calls*. Given a call
5044 statement in GSI which resides inside transaction REGION, redirect
5045 the call to either its wrapper function, or its clone. */
5047 static void
5048 ipa_tm_transform_calls_redirect (struct cgraph_node *node,
5049 struct tm_region *region,
5050 gimple_stmt_iterator *gsi,
5051 bool *need_ssa_rename_p)
5053 gimple stmt = gsi_stmt (*gsi);
5054 struct cgraph_node *new_node;
5055 struct cgraph_edge *e = cgraph_edge (node, stmt);
5056 tree fndecl = gimple_call_fndecl (stmt);
5058 /* For indirect calls, pass the address through the runtime. */
5059 if (fndecl == NULL)
5061 *need_ssa_rename_p |=
5062 ipa_tm_insert_gettmclone_call (node, region, gsi, stmt);
5063 return;
5066 /* Handle some TM builtins. Ordinarily these aren't actually generated
5067 at this point, but handling these functions when written in by the
5068 user makes it easier to build unit tests. */
5069 if (flags_from_decl_or_type (fndecl) & ECF_TM_BUILTIN)
5070 return;
5072 /* Fixup recursive calls inside clones. */
5073 /* ??? Why did cgraph_copy_node_for_versioning update the call edges
5074 for recursion but not update the call statements themselves? */
5075 if (e->caller == e->callee && decl_is_tm_clone (current_function_decl))
5077 gimple_call_set_fndecl (stmt, current_function_decl);
5078 return;
5081 /* If there is a replacement, use it. */
5082 fndecl = find_tm_replacement_function (fndecl);
5083 if (fndecl)
5085 new_node = cgraph_get_create_node (fndecl);
5087 /* ??? Mark all transaction_wrap functions tm_may_enter_irr.
5089 We can't do this earlier in record_tm_replacement because
5090 cgraph_remove_unreachable_nodes is called before we inject
5091 references to the node. Further, we can't do this in some
5092 nice central place in ipa_tm_execute because we don't have
5093 the exact list of wrapper functions that would be used.
5094 Marking more wrappers than necessary results in the creation
5095 of unnecessary cgraph_nodes, which can cause some of the
5096 other IPA passes to crash.
5098 We do need to mark these nodes so that we get the proper
5099 result in expand_call_tm. */
5100 /* ??? This seems broken. How is it that we're marking the
5101 CALLEE as may_enter_irr? Surely we should be marking the
5102 CALLER. Also note that find_tm_replacement_function also
5103 contains mappings into the TM runtime, e.g. memcpy. These
5104 we know won't go irrevocable. */
5105 new_node->local.tm_may_enter_irr = 1;
5107 else
5109 struct tm_ipa_cg_data *d;
5110 struct cgraph_node *tnode = e->callee;
5112 d = get_cg_data (&tnode, true);
5113 new_node = d->clone;
5115 /* As we've already skipped pure calls and appropriate builtins,
5116 and we've already marked irrevocable blocks, if we can't come
5117 up with a static replacement, then ask the runtime. */
5118 if (new_node == NULL)
5120 *need_ssa_rename_p |=
5121 ipa_tm_insert_gettmclone_call (node, region, gsi, stmt);
5122 return;
5125 fndecl = new_node->decl;
5128 cgraph_redirect_edge_callee (e, new_node);
5129 gimple_call_set_fndecl (stmt, fndecl);
5132 /* Helper function for ipa_tm_transform_calls. For a given BB,
5133 install calls to tm_irrevocable when IRR_BLOCKS are reached,
5134 redirect other calls to the generated transactional clone. */
5136 static bool
5137 ipa_tm_transform_calls_1 (struct cgraph_node *node, struct tm_region *region,
5138 basic_block bb, bitmap irr_blocks)
5140 gimple_stmt_iterator gsi;
5141 bool need_ssa_rename = false;
5143 if (irr_blocks && bitmap_bit_p (irr_blocks, bb->index))
5145 ipa_tm_insert_irr_call (node, region, bb);
5146 return true;
5149 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5151 gimple stmt = gsi_stmt (gsi);
5153 if (!is_gimple_call (stmt))
5154 continue;
5155 if (is_tm_pure_call (stmt))
5156 continue;
5158 /* Redirect edges to the appropriate replacement or clone. */
5159 ipa_tm_transform_calls_redirect (node, region, &gsi, &need_ssa_rename);
5162 return need_ssa_rename;
5165 /* Walk the CFG for REGION, beginning at BB. Install calls to
5166 tm_irrevocable when IRR_BLOCKS are reached, redirect other calls to
5167 the generated transactional clone. */
5169 static bool
5170 ipa_tm_transform_calls (struct cgraph_node *node, struct tm_region *region,
5171 basic_block bb, bitmap irr_blocks)
5173 bool need_ssa_rename = false;
5174 edge e;
5175 edge_iterator ei;
5176 vec<basic_block> queue = vNULL;
5177 bitmap visited_blocks = BITMAP_ALLOC (NULL);
5179 queue.safe_push (bb);
5182 bb = queue.pop ();
5184 need_ssa_rename |=
5185 ipa_tm_transform_calls_1 (node, region, bb, irr_blocks);
5187 if (irr_blocks && bitmap_bit_p (irr_blocks, bb->index))
5188 continue;
5190 if (region && bitmap_bit_p (region->exit_blocks, bb->index))
5191 continue;
5193 FOR_EACH_EDGE (e, ei, bb->succs)
5194 if (!bitmap_bit_p (visited_blocks, e->dest->index))
5196 bitmap_set_bit (visited_blocks, e->dest->index);
5197 queue.safe_push (e->dest);
5200 while (!queue.is_empty ());
5202 queue.release ();
5203 BITMAP_FREE (visited_blocks);
5205 return need_ssa_rename;
5208 /* Transform the calls within the TM regions within NODE. */
5210 static void
5211 ipa_tm_transform_transaction (struct cgraph_node *node)
5213 struct tm_ipa_cg_data *d;
5214 struct tm_region *region;
5215 bool need_ssa_rename = false;
5217 d = get_cg_data (&node, true);
5219 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
5220 calculate_dominance_info (CDI_DOMINATORS);
5222 for (region = d->all_tm_regions; region; region = region->next)
5224 /* If we're sure to go irrevocable, don't transform anything. */
5225 if (d->irrevocable_blocks_normal
5226 && bitmap_bit_p (d->irrevocable_blocks_normal,
5227 region->entry_block->index))
5229 transaction_subcode_ior (region, GTMA_DOES_GO_IRREVOCABLE
5230 | GTMA_MAY_ENTER_IRREVOCABLE
5231 | GTMA_HAS_NO_INSTRUMENTATION);
5232 continue;
5235 need_ssa_rename |=
5236 ipa_tm_transform_calls (node, region, region->entry_block,
5237 d->irrevocable_blocks_normal);
5240 if (need_ssa_rename)
5241 update_ssa (TODO_update_ssa_only_virtuals);
5243 pop_cfun ();
5246 /* Transform the calls within the transactional clone of NODE. */
5248 static void
5249 ipa_tm_transform_clone (struct cgraph_node *node)
5251 struct tm_ipa_cg_data *d;
5252 bool need_ssa_rename;
5254 d = get_cg_data (&node, true);
5256 /* If this function makes no calls and has no irrevocable blocks,
5257 then there's nothing to do. */
5258 /* ??? Remove non-aborting top-level transactions. */
5259 if (!node->callees && !node->indirect_calls && !d->irrevocable_blocks_clone)
5260 return;
5262 push_cfun (DECL_STRUCT_FUNCTION (d->clone->decl));
5263 calculate_dominance_info (CDI_DOMINATORS);
5265 need_ssa_rename =
5266 ipa_tm_transform_calls (d->clone, NULL, single_succ (ENTRY_BLOCK_PTR),
5267 d->irrevocable_blocks_clone);
5269 if (need_ssa_rename)
5270 update_ssa (TODO_update_ssa_only_virtuals);
5272 pop_cfun ();
5275 /* Main entry point for the transactional memory IPA pass. */
5277 static unsigned int
5278 ipa_tm_execute (void)
5280 cgraph_node_queue tm_callees = cgraph_node_queue ();
5281 /* List of functions that will go irrevocable. */
5282 cgraph_node_queue irr_worklist = cgraph_node_queue ();
5284 struct cgraph_node *node;
5285 struct tm_ipa_cg_data *d;
5286 enum availability a;
5287 unsigned int i;
5289 #ifdef ENABLE_CHECKING
5290 verify_cgraph ();
5291 #endif
5293 bitmap_obstack_initialize (&tm_obstack);
5294 initialize_original_copy_tables ();
5296 /* For all local functions marked tm_callable, queue them. */
5297 FOR_EACH_DEFINED_FUNCTION (node)
5298 if (is_tm_callable (node->decl)
5299 && cgraph_function_body_availability (node) >= AVAIL_OVERWRITABLE)
5301 d = get_cg_data (&node, true);
5302 maybe_push_queue (node, &tm_callees, &d->in_callee_queue);
5305 /* For all local reachable functions... */
5306 FOR_EACH_DEFINED_FUNCTION (node)
5307 if (node->lowered
5308 && cgraph_function_body_availability (node) >= AVAIL_OVERWRITABLE)
5310 /* ... marked tm_pure, record that fact for the runtime by
5311 indicating that the pure function is its own tm_callable.
5312 No need to do this if the function's address can't be taken. */
5313 if (is_tm_pure (node->decl))
5315 if (!node->local.local)
5316 record_tm_clone_pair (node->decl, node->decl);
5317 continue;
5320 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
5321 calculate_dominance_info (CDI_DOMINATORS);
5323 tm_region_init (NULL);
5324 if (all_tm_regions)
5326 d = get_cg_data (&node, true);
5328 /* Scan for calls that are in each transaction, and
5329 generate the uninstrumented code path. */
5330 ipa_tm_scan_calls_transaction (d, &tm_callees);
5332 /* Put it in the worklist so we can scan the function
5333 later (ipa_tm_scan_irr_function) and mark the
5334 irrevocable blocks. */
5335 maybe_push_queue (node, &irr_worklist, &d->in_worklist);
5336 d->want_irr_scan_normal = true;
5339 pop_cfun ();
5342 /* For every local function on the callee list, scan as if we will be
5343 creating a transactional clone, queueing all new functions we find
5344 along the way. */
5345 for (i = 0; i < tm_callees.length (); ++i)
5347 node = tm_callees[i];
5348 a = cgraph_function_body_availability (node);
5349 d = get_cg_data (&node, true);
5351 /* Put it in the worklist so we can scan the function later
5352 (ipa_tm_scan_irr_function) and mark the irrevocable
5353 blocks. */
5354 maybe_push_queue (node, &irr_worklist, &d->in_worklist);
5356 /* Some callees cannot be arbitrarily cloned. These will always be
5357 irrevocable. Mark these now, so that we need not scan them. */
5358 if (is_tm_irrevocable (node->decl))
5359 ipa_tm_note_irrevocable (node, &irr_worklist);
5360 else if (a <= AVAIL_NOT_AVAILABLE
5361 && !is_tm_safe_or_pure (node->decl))
5362 ipa_tm_note_irrevocable (node, &irr_worklist);
5363 else if (a >= AVAIL_OVERWRITABLE)
5365 if (!tree_versionable_function_p (node->decl))
5366 ipa_tm_note_irrevocable (node, &irr_worklist);
5367 else if (!d->is_irrevocable)
5369 /* If this is an alias, make sure its base is queued as well.
5370 we need not scan the callees now, as the base will do. */
5371 if (node->alias)
5373 node = cgraph_get_node (node->thunk.alias);
5374 d = get_cg_data (&node, true);
5375 maybe_push_queue (node, &tm_callees, &d->in_callee_queue);
5376 continue;
5379 /* Add all nodes called by this function into
5380 tm_callees as well. */
5381 ipa_tm_scan_calls_clone (node, &tm_callees);
5386 /* Iterate scans until no more work to be done. Prefer not to use
5387 vec::pop because the worklist tends to follow a breadth-first
5388 search of the callgraph, which should allow convergance with a
5389 minimum number of scans. But we also don't want the worklist
5390 array to grow without bound, so we shift the array up periodically. */
5391 for (i = 0; i < irr_worklist.length (); ++i)
5393 if (i > 256 && i == irr_worklist.length () / 8)
5395 irr_worklist.block_remove (0, i);
5396 i = 0;
5399 node = irr_worklist[i];
5400 d = get_cg_data (&node, true);
5401 d->in_worklist = false;
5403 if (d->want_irr_scan_normal)
5405 d->want_irr_scan_normal = false;
5406 ipa_tm_scan_irr_function (node, false);
5408 if (d->in_callee_queue && ipa_tm_scan_irr_function (node, true))
5409 ipa_tm_note_irrevocable (node, &irr_worklist);
5412 /* For every function on the callee list, collect the tm_may_enter_irr
5413 bit on the node. */
5414 irr_worklist.truncate (0);
5415 for (i = 0; i < tm_callees.length (); ++i)
5417 node = tm_callees[i];
5418 if (ipa_tm_mayenterirr_function (node))
5420 d = get_cg_data (&node, true);
5421 gcc_assert (d->in_worklist == false);
5422 maybe_push_queue (node, &irr_worklist, &d->in_worklist);
5426 /* Propagate the tm_may_enter_irr bit to callers until stable. */
5427 for (i = 0; i < irr_worklist.length (); ++i)
5429 struct cgraph_node *caller;
5430 struct cgraph_edge *e;
5431 struct ipa_ref *ref;
5432 unsigned j;
5434 if (i > 256 && i == irr_worklist.length () / 8)
5436 irr_worklist.block_remove (0, i);
5437 i = 0;
5440 node = irr_worklist[i];
5441 d = get_cg_data (&node, true);
5442 d->in_worklist = false;
5443 node->local.tm_may_enter_irr = true;
5445 /* Propagate back to normal callers. */
5446 for (e = node->callers; e ; e = e->next_caller)
5448 caller = e->caller;
5449 if (!is_tm_safe_or_pure (caller->decl)
5450 && !caller->local.tm_may_enter_irr)
5452 d = get_cg_data (&caller, true);
5453 maybe_push_queue (caller, &irr_worklist, &d->in_worklist);
5457 /* Propagate back to referring aliases as well. */
5458 for (j = 0; ipa_ref_list_referring_iterate (&node->ref_list, j, ref); j++)
5460 caller = cgraph (ref->referring);
5461 if (ref->use == IPA_REF_ALIAS
5462 && !caller->local.tm_may_enter_irr)
5464 /* ?? Do not traverse aliases here. */
5465 d = get_cg_data (&caller, false);
5466 maybe_push_queue (caller, &irr_worklist, &d->in_worklist);
5471 /* Now validate all tm_safe functions, and all atomic regions in
5472 other functions. */
5473 FOR_EACH_DEFINED_FUNCTION (node)
5474 if (node->lowered
5475 && cgraph_function_body_availability (node) >= AVAIL_OVERWRITABLE)
5477 d = get_cg_data (&node, true);
5478 if (is_tm_safe (node->decl))
5479 ipa_tm_diagnose_tm_safe (node);
5480 else if (d->all_tm_regions)
5481 ipa_tm_diagnose_transaction (node, d->all_tm_regions);
5484 /* Create clones. Do those that are not irrevocable and have a
5485 positive call count. Do those publicly visible functions that
5486 the user directed us to clone. */
5487 for (i = 0; i < tm_callees.length (); ++i)
5489 bool doit = false;
5491 node = tm_callees[i];
5492 if (node->cpp_implicit_alias)
5493 continue;
5495 a = cgraph_function_body_availability (node);
5496 d = get_cg_data (&node, true);
5498 if (a <= AVAIL_NOT_AVAILABLE)
5499 doit = is_tm_callable (node->decl);
5500 else if (a <= AVAIL_AVAILABLE && is_tm_callable (node->decl))
5501 doit = true;
5502 else if (!d->is_irrevocable
5503 && d->tm_callers_normal + d->tm_callers_clone > 0)
5504 doit = true;
5506 if (doit)
5507 ipa_tm_create_version (node);
5510 /* Redirect calls to the new clones, and insert irrevocable marks. */
5511 for (i = 0; i < tm_callees.length (); ++i)
5513 node = tm_callees[i];
5514 if (node->analyzed)
5516 d = get_cg_data (&node, true);
5517 if (d->clone)
5518 ipa_tm_transform_clone (node);
5521 FOR_EACH_DEFINED_FUNCTION (node)
5522 if (node->lowered
5523 && cgraph_function_body_availability (node) >= AVAIL_OVERWRITABLE)
5525 d = get_cg_data (&node, true);
5526 if (d->all_tm_regions)
5527 ipa_tm_transform_transaction (node);
5530 /* Free and clear all data structures. */
5531 tm_callees.release ();
5532 irr_worklist.release ();
5533 bitmap_obstack_release (&tm_obstack);
5534 free_original_copy_tables ();
5536 FOR_EACH_FUNCTION (node)
5537 node->aux = NULL;
5539 #ifdef ENABLE_CHECKING
5540 verify_cgraph ();
5541 #endif
5543 return 0;
5546 namespace {
5548 const pass_data pass_data_ipa_tm =
5550 SIMPLE_IPA_PASS, /* type */
5551 "tmipa", /* name */
5552 OPTGROUP_NONE, /* optinfo_flags */
5553 true, /* has_gate */
5554 true, /* has_execute */
5555 TV_TRANS_MEM, /* tv_id */
5556 ( PROP_ssa | PROP_cfg ), /* properties_required */
5557 0, /* properties_provided */
5558 0, /* properties_destroyed */
5559 0, /* todo_flags_start */
5560 0, /* todo_flags_finish */
5563 class pass_ipa_tm : public simple_ipa_opt_pass
5565 public:
5566 pass_ipa_tm (gcc::context *ctxt)
5567 : simple_ipa_opt_pass (pass_data_ipa_tm, ctxt)
5570 /* opt_pass methods: */
5571 bool gate () { return gate_tm (); }
5572 unsigned int execute () { return ipa_tm_execute (); }
5574 }; // class pass_ipa_tm
5576 } // anon namespace
5578 simple_ipa_opt_pass *
5579 make_pass_ipa_tm (gcc::context *ctxt)
5581 return new pass_ipa_tm (ctxt);
5584 #include "gt-trans-mem.h"