* rw.po: Remove.
[official-gcc.git] / gcc / omp-low.c
blobc1c6d424260e690d005995d35188f4913ae3bbb7
1 /* Lowering pass for OpenMP directives. Converts OpenMP directives
2 into explicit calls to the runtime library (libgomp) and data
3 marshalling to implement data sharing and copying clauses.
4 Contributed by Diego Novillo <dnovillo@redhat.com>
6 Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "tree.h"
29 #include "rtl.h"
30 #include "tree-gimple.h"
31 #include "tree-inline.h"
32 #include "langhooks.h"
33 #include "diagnostic.h"
34 #include "tree-flow.h"
35 #include "timevar.h"
36 #include "flags.h"
37 #include "function.h"
38 #include "expr.h"
39 #include "toplev.h"
40 #include "tree-pass.h"
41 #include "ggc.h"
42 #include "except.h"
45 /* Lowering of OpenMP parallel and workshare constructs proceeds in two
46 phases. The first phase scans the function looking for OMP statements
47 and then for variables that must be replaced to satisfy data sharing
48 clauses. The second phase expands code for the constructs, as well as
49 re-gimplifying things when variables have been replaced with complex
50 expressions.
52 Final code generation is done by pass_expand_omp. The flowgraph is
53 scanned for parallel regions which are then moved to a new
54 function, to be invoked by the thread library. */
56 /* Context structure. Used to store information about each parallel
57 directive in the code. */
59 typedef struct omp_context
61 /* This field must be at the beginning, as we do "inheritance": Some
62 callback functions for tree-inline.c (e.g., omp_copy_decl)
63 receive a copy_body_data pointer that is up-casted to an
64 omp_context pointer. */
65 copy_body_data cb;
67 /* The tree of contexts corresponding to the encountered constructs. */
68 struct omp_context *outer;
69 tree stmt;
71 /* Map variables to fields in a structure that allows communication
72 between sending and receiving threads. */
73 splay_tree field_map;
74 tree record_type;
75 tree sender_decl;
76 tree receiver_decl;
78 /* A chain of variables to add to the top-level block surrounding the
79 construct. In the case of a parallel, this is in the child function. */
80 tree block_vars;
82 /* What to do with variables with implicitly determined sharing
83 attributes. */
84 enum omp_clause_default_kind default_kind;
86 /* Nesting depth of this context. Used to beautify error messages re
87 invalid gotos. The outermost ctx is depth 1, with depth 0 being
88 reserved for the main body of the function. */
89 int depth;
91 /* True if this parallel directive is nested within another. */
92 bool is_nested;
93 } omp_context;
96 /* A structure describing the main elements of a parallel loop. */
98 struct omp_for_data
100 tree v, n1, n2, step, chunk_size, for_stmt;
101 enum tree_code cond_code;
102 tree pre;
103 bool have_nowait, have_ordered;
104 enum omp_clause_schedule_kind sched_kind;
108 static splay_tree all_contexts;
109 static int parallel_nesting_level;
110 struct omp_region *root_omp_region;
112 static void scan_omp (tree *, omp_context *);
113 static void lower_omp (tree *, omp_context *);
114 static tree lookup_decl_in_outer_ctx (tree, omp_context *);
115 static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *);
117 /* Find an OpenMP clause of type KIND within CLAUSES. */
119 static tree
120 find_omp_clause (tree clauses, enum tree_code kind)
122 for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
123 if (OMP_CLAUSE_CODE (clauses) == kind)
124 return clauses;
126 return NULL_TREE;
129 /* Return true if CTX is for an omp parallel. */
131 static inline bool
132 is_parallel_ctx (omp_context *ctx)
134 return TREE_CODE (ctx->stmt) == OMP_PARALLEL;
138 /* Return true if REGION is a combined parallel+workshare region. */
140 static inline bool
141 is_combined_parallel (struct omp_region *region)
143 return region->is_combined_parallel;
147 /* Extract the header elements of parallel loop FOR_STMT and store
148 them into *FD. */
150 static void
151 extract_omp_for_data (tree for_stmt, struct omp_for_data *fd)
153 tree t;
155 fd->for_stmt = for_stmt;
156 fd->pre = NULL;
158 t = OMP_FOR_INIT (for_stmt);
159 gcc_assert (TREE_CODE (t) == MODIFY_EXPR);
160 fd->v = TREE_OPERAND (t, 0);
161 gcc_assert (DECL_P (fd->v));
162 gcc_assert (TREE_CODE (TREE_TYPE (fd->v)) == INTEGER_TYPE);
163 fd->n1 = TREE_OPERAND (t, 1);
165 t = OMP_FOR_COND (for_stmt);
166 fd->cond_code = TREE_CODE (t);
167 gcc_assert (TREE_OPERAND (t, 0) == fd->v);
168 fd->n2 = TREE_OPERAND (t, 1);
169 switch (fd->cond_code)
171 case LT_EXPR:
172 case GT_EXPR:
173 break;
174 case LE_EXPR:
175 fd->n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->n2), fd->n2,
176 build_int_cst (TREE_TYPE (fd->n2), 1));
177 fd->cond_code = LT_EXPR;
178 break;
179 case GE_EXPR:
180 fd->n2 = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->n2), fd->n2,
181 build_int_cst (TREE_TYPE (fd->n2), 1));
182 fd->cond_code = GT_EXPR;
183 break;
184 default:
185 gcc_unreachable ();
188 t = OMP_FOR_INCR (fd->for_stmt);
189 gcc_assert (TREE_CODE (t) == MODIFY_EXPR);
190 gcc_assert (TREE_OPERAND (t, 0) == fd->v);
191 t = TREE_OPERAND (t, 1);
192 gcc_assert (TREE_OPERAND (t, 0) == fd->v);
193 switch (TREE_CODE (t))
195 case PLUS_EXPR:
196 fd->step = TREE_OPERAND (t, 1);
197 break;
198 case MINUS_EXPR:
199 fd->step = TREE_OPERAND (t, 1);
200 fd->step = fold_build1 (NEGATE_EXPR, TREE_TYPE (fd->step), fd->step);
201 break;
202 default:
203 gcc_unreachable ();
206 fd->have_nowait = fd->have_ordered = false;
207 fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
208 fd->chunk_size = NULL_TREE;
210 for (t = OMP_FOR_CLAUSES (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t))
211 switch (OMP_CLAUSE_CODE (t))
213 case OMP_CLAUSE_NOWAIT:
214 fd->have_nowait = true;
215 break;
216 case OMP_CLAUSE_ORDERED:
217 fd->have_ordered = true;
218 break;
219 case OMP_CLAUSE_SCHEDULE:
220 fd->sched_kind = OMP_CLAUSE_SCHEDULE_KIND (t);
221 fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t);
222 break;
223 default:
224 break;
227 if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
228 gcc_assert (fd->chunk_size == NULL);
229 else if (fd->chunk_size == NULL)
231 /* We only need to compute a default chunk size for ordered
232 static loops and dynamic loops. */
233 if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC || fd->have_ordered)
234 fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
235 ? integer_zero_node : integer_one_node;
240 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
241 is the immediate dominator of PAR_ENTRY_BB, return true if there
242 are no data dependencies that would prevent expanding the parallel
243 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
245 When expanding a combined parallel+workshare region, the call to
246 the child function may need additional arguments in the case of
247 OMP_FOR regions. In some cases, these arguments are computed out
248 of variables passed in from the parent to the child via 'struct
249 .omp_data_s'. For instance:
251 #pragma omp parallel for schedule (guided, i * 4)
252 for (j ...)
254 Is lowered into:
256 # BLOCK 2 (PAR_ENTRY_BB)
257 .omp_data_o.i = i;
258 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
260 # BLOCK 3 (WS_ENTRY_BB)
261 .omp_data_i = &.omp_data_o;
262 D.1667 = .omp_data_i->i;
263 D.1598 = D.1667 * 4;
264 #pragma omp for schedule (guided, D.1598)
266 When we outline the parallel region, the call to the child function
267 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
268 that value is computed *after* the call site. So, in principle we
269 cannot do the transformation.
271 To see whether the code in WS_ENTRY_BB blocks the combined
272 parallel+workshare call, we collect all the variables used in the
273 OMP_FOR header check whether they appear on the LHS of any
274 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
275 call.
277 FIXME. If we had the SSA form built at this point, we could merely
278 hoist the code in block 3 into block 2 and be done with it. But at
279 this point we don't have dataflow information and though we could
280 hack something up here, it is really not worth the aggravation. */
282 static bool
283 workshare_safe_to_combine_p (basic_block par_entry_bb, basic_block ws_entry_bb)
285 struct omp_for_data fd;
286 tree par_stmt, ws_stmt;
288 par_stmt = last_stmt (par_entry_bb);
289 ws_stmt = last_stmt (ws_entry_bb);
291 if (TREE_CODE (ws_stmt) == OMP_SECTIONS)
292 return true;
294 gcc_assert (TREE_CODE (ws_stmt) == OMP_FOR);
296 extract_omp_for_data (ws_stmt, &fd);
298 /* FIXME. We give up too easily here. If any of these arguments
299 are not constants, they will likely involve variables that have
300 been mapped into fields of .omp_data_s for sharing with the child
301 function. With appropriate data flow, it would be possible to
302 see through this. */
303 if (!is_gimple_min_invariant (fd.n1)
304 || !is_gimple_min_invariant (fd.n2)
305 || !is_gimple_min_invariant (fd.step)
306 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
307 return false;
309 return true;
313 /* Collect additional arguments needed to emit a combined
314 parallel+workshare call. WS_STMT is the workshare directive being
315 expanded. */
317 static tree
318 get_ws_args_for (tree ws_stmt)
320 tree t;
322 if (TREE_CODE (ws_stmt) == OMP_FOR)
324 struct omp_for_data fd;
325 tree ws_args;
327 extract_omp_for_data (ws_stmt, &fd);
329 ws_args = NULL_TREE;
330 if (fd.chunk_size)
332 t = fold_convert (long_integer_type_node, fd.chunk_size);
333 ws_args = tree_cons (NULL, t, ws_args);
336 t = fold_convert (long_integer_type_node, fd.step);
337 ws_args = tree_cons (NULL, t, ws_args);
339 t = fold_convert (long_integer_type_node, fd.n2);
340 ws_args = tree_cons (NULL, t, ws_args);
342 t = fold_convert (long_integer_type_node, fd.n1);
343 ws_args = tree_cons (NULL, t, ws_args);
345 return ws_args;
347 else if (TREE_CODE (ws_stmt) == OMP_SECTIONS)
349 basic_block bb = bb_for_stmt (ws_stmt);
350 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs));
351 t = tree_cons (NULL, t, NULL);
352 return t;
355 gcc_unreachable ();
359 /* Discover whether REGION is a combined parallel+workshare region. */
361 static void
362 determine_parallel_type (struct omp_region *region)
364 basic_block par_entry_bb, par_exit_bb;
365 basic_block ws_entry_bb, ws_exit_bb;
367 if (region == NULL || region->inner == NULL
368 || region->exit == NULL || region->inner->exit == NULL)
369 return;
371 /* We only support parallel+for and parallel+sections. */
372 if (region->type != OMP_PARALLEL
373 || (region->inner->type != OMP_FOR
374 && region->inner->type != OMP_SECTIONS))
375 return;
377 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
378 WS_EXIT_BB -> PAR_EXIT_BB. */
379 par_entry_bb = region->entry;
380 par_exit_bb = region->exit;
381 ws_entry_bb = region->inner->entry;
382 ws_exit_bb = region->inner->exit;
384 if (single_succ (par_entry_bb) == ws_entry_bb
385 && single_succ (ws_exit_bb) == par_exit_bb
386 && workshare_safe_to_combine_p (par_entry_bb, ws_entry_bb)
387 && (OMP_PARALLEL_COMBINED (last_stmt (par_entry_bb))
388 || (last_and_only_stmt (ws_entry_bb)
389 && last_and_only_stmt (par_exit_bb))))
391 tree ws_stmt = last_stmt (ws_entry_bb);
393 if (region->inner->type == OMP_FOR)
395 /* If this is a combined parallel loop, we need to determine
396 whether or not to use the combined library calls. There
397 are two cases where we do not apply the transformation:
398 static loops and any kind of ordered loop. In the first
399 case, we already open code the loop so there is no need
400 to do anything else. In the latter case, the combined
401 parallel loop call would still need extra synchronization
402 to implement ordered semantics, so there would not be any
403 gain in using the combined call. */
404 tree clauses = OMP_FOR_CLAUSES (ws_stmt);
405 tree c = find_omp_clause (clauses, OMP_CLAUSE_SCHEDULE);
406 if (c == NULL
407 || OMP_CLAUSE_SCHEDULE_KIND (c) == OMP_CLAUSE_SCHEDULE_STATIC
408 || find_omp_clause (clauses, OMP_CLAUSE_ORDERED))
410 region->is_combined_parallel = false;
411 region->inner->is_combined_parallel = false;
412 return;
416 region->is_combined_parallel = true;
417 region->inner->is_combined_parallel = true;
418 region->ws_args = get_ws_args_for (ws_stmt);
423 /* Return true if EXPR is variable sized. */
425 static inline bool
426 is_variable_sized (tree expr)
428 return !TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
431 /* Return true if DECL is a reference type. */
433 static inline bool
434 is_reference (tree decl)
436 return lang_hooks.decls.omp_privatize_by_reference (decl);
439 /* Lookup variables in the decl or field splay trees. The "maybe" form
440 allows for the variable form to not have been entered, otherwise we
441 assert that the variable must have been entered. */
443 static inline tree
444 lookup_decl (tree var, omp_context *ctx)
446 splay_tree_node n;
447 n = splay_tree_lookup (ctx->cb.decl_map, (splay_tree_key) var);
448 return (tree) n->value;
451 static inline tree
452 maybe_lookup_decl (tree var, omp_context *ctx)
454 splay_tree_node n;
455 n = splay_tree_lookup (ctx->cb.decl_map, (splay_tree_key) var);
456 return n ? (tree) n->value : NULL_TREE;
459 static inline tree
460 lookup_field (tree var, omp_context *ctx)
462 splay_tree_node n;
463 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) var);
464 return (tree) n->value;
467 static inline tree
468 maybe_lookup_field (tree var, omp_context *ctx)
470 splay_tree_node n;
471 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) var);
472 return n ? (tree) n->value : NULL_TREE;
475 /* Return true if DECL should be copied by pointer. SHARED_P is true
476 if DECL is to be shared. */
478 static bool
479 use_pointer_for_field (tree decl, bool shared_p)
481 if (AGGREGATE_TYPE_P (TREE_TYPE (decl)))
482 return true;
484 /* We can only use copy-in/copy-out semantics for shared variables
485 when we know the value is not accessible from an outer scope. */
486 if (shared_p)
488 /* ??? Trivially accessible from anywhere. But why would we even
489 be passing an address in this case? Should we simply assert
490 this to be false, or should we have a cleanup pass that removes
491 these from the list of mappings? */
492 if (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
493 return true;
495 /* For variables with DECL_HAS_VALUE_EXPR_P set, we cannot tell
496 without analyzing the expression whether or not its location
497 is accessible to anyone else. In the case of nested parallel
498 regions it certainly may be. */
499 if (TREE_CODE (decl) != RESULT_DECL && DECL_HAS_VALUE_EXPR_P (decl))
500 return true;
502 /* Do not use copy-in/copy-out for variables that have their
503 address taken. */
504 if (TREE_ADDRESSABLE (decl))
505 return true;
508 return false;
511 /* Construct a new automatic decl similar to VAR. */
513 static tree
514 omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx)
516 tree copy = build_decl (VAR_DECL, name, type);
518 TREE_ADDRESSABLE (copy) = TREE_ADDRESSABLE (var);
519 DECL_COMPLEX_GIMPLE_REG_P (copy) = DECL_COMPLEX_GIMPLE_REG_P (var);
520 DECL_ARTIFICIAL (copy) = DECL_ARTIFICIAL (var);
521 DECL_IGNORED_P (copy) = DECL_IGNORED_P (var);
522 TREE_USED (copy) = 1;
523 DECL_CONTEXT (copy) = current_function_decl;
524 DECL_SEEN_IN_BIND_EXPR_P (copy) = 1;
526 TREE_CHAIN (copy) = ctx->block_vars;
527 ctx->block_vars = copy;
529 return copy;
532 static tree
533 omp_copy_decl_1 (tree var, omp_context *ctx)
535 return omp_copy_decl_2 (var, DECL_NAME (var), TREE_TYPE (var), ctx);
538 /* Build tree nodes to access the field for VAR on the receiver side. */
540 static tree
541 build_receiver_ref (tree var, bool by_ref, omp_context *ctx)
543 tree x, field = lookup_field (var, ctx);
545 /* If the receiver record type was remapped in the child function,
546 remap the field into the new record type. */
547 x = maybe_lookup_field (field, ctx);
548 if (x != NULL)
549 field = x;
551 x = build_fold_indirect_ref (ctx->receiver_decl);
552 x = build3 (COMPONENT_REF, TREE_TYPE (field), x, field, NULL);
553 if (by_ref)
554 x = build_fold_indirect_ref (x);
556 return x;
559 /* Build tree nodes to access VAR in the scope outer to CTX. In the case
560 of a parallel, this is a component reference; for workshare constructs
561 this is some variable. */
563 static tree
564 build_outer_var_ref (tree var, omp_context *ctx)
566 tree x;
568 if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx)))
569 x = var;
570 else if (is_variable_sized (var))
572 x = TREE_OPERAND (DECL_VALUE_EXPR (var), 0);
573 x = build_outer_var_ref (x, ctx);
574 x = build_fold_indirect_ref (x);
576 else if (is_parallel_ctx (ctx))
578 bool by_ref = use_pointer_for_field (var, false);
579 x = build_receiver_ref (var, by_ref, ctx);
581 else if (ctx->outer)
582 x = lookup_decl (var, ctx->outer);
583 else if (is_reference (var))
584 /* This can happen with orphaned constructs. If var is reference, it is
585 possible it is shared and as such valid. */
586 x = var;
587 else
588 gcc_unreachable ();
590 if (is_reference (var))
591 x = build_fold_indirect_ref (x);
593 return x;
596 /* Build tree nodes to access the field for VAR on the sender side. */
598 static tree
599 build_sender_ref (tree var, omp_context *ctx)
601 tree field = lookup_field (var, ctx);
602 return build3 (COMPONENT_REF, TREE_TYPE (field),
603 ctx->sender_decl, field, NULL);
606 /* Add a new field for VAR inside the structure CTX->SENDER_DECL. */
608 static void
609 install_var_field (tree var, bool by_ref, omp_context *ctx)
611 tree field, type;
613 gcc_assert (!splay_tree_lookup (ctx->field_map, (splay_tree_key) var));
615 type = TREE_TYPE (var);
616 if (by_ref)
617 type = build_pointer_type (type);
619 field = build_decl (FIELD_DECL, DECL_NAME (var), type);
621 /* Remember what variable this field was created for. This does have a
622 side effect of making dwarf2out ignore this member, so for helpful
623 debugging we clear it later in delete_omp_context. */
624 DECL_ABSTRACT_ORIGIN (field) = var;
626 insert_field_into_struct (ctx->record_type, field);
628 splay_tree_insert (ctx->field_map, (splay_tree_key) var,
629 (splay_tree_value) field);
632 static tree
633 install_var_local (tree var, omp_context *ctx)
635 tree new_var = omp_copy_decl_1 (var, ctx);
636 insert_decl_map (&ctx->cb, var, new_var);
637 return new_var;
640 /* Adjust the replacement for DECL in CTX for the new context. This means
641 copying the DECL_VALUE_EXPR, and fixing up the type. */
643 static void
644 fixup_remapped_decl (tree decl, omp_context *ctx, bool private_debug)
646 tree new_decl, size;
648 new_decl = lookup_decl (decl, ctx);
650 TREE_TYPE (new_decl) = remap_type (TREE_TYPE (decl), &ctx->cb);
652 if ((!TREE_CONSTANT (DECL_SIZE (new_decl)) || private_debug)
653 && DECL_HAS_VALUE_EXPR_P (decl))
655 tree ve = DECL_VALUE_EXPR (decl);
656 walk_tree (&ve, copy_body_r, &ctx->cb, NULL);
657 SET_DECL_VALUE_EXPR (new_decl, ve);
658 DECL_HAS_VALUE_EXPR_P (new_decl) = 1;
661 if (!TREE_CONSTANT (DECL_SIZE (new_decl)))
663 size = remap_decl (DECL_SIZE (decl), &ctx->cb);
664 if (size == error_mark_node)
665 size = TYPE_SIZE (TREE_TYPE (new_decl));
666 DECL_SIZE (new_decl) = size;
668 size = remap_decl (DECL_SIZE_UNIT (decl), &ctx->cb);
669 if (size == error_mark_node)
670 size = TYPE_SIZE_UNIT (TREE_TYPE (new_decl));
671 DECL_SIZE_UNIT (new_decl) = size;
675 /* The callback for remap_decl. Search all containing contexts for a
676 mapping of the variable; this avoids having to duplicate the splay
677 tree ahead of time. We know a mapping doesn't already exist in the
678 given context. Create new mappings to implement default semantics. */
680 static tree
681 omp_copy_decl (tree var, copy_body_data *cb)
683 omp_context *ctx = (omp_context *) cb;
684 tree new_var;
686 if (TREE_CODE (var) == LABEL_DECL)
688 new_var = create_artificial_label ();
689 DECL_CONTEXT (new_var) = current_function_decl;
690 insert_decl_map (&ctx->cb, var, new_var);
691 return new_var;
694 while (!is_parallel_ctx (ctx))
696 ctx = ctx->outer;
697 if (ctx == NULL)
698 return var;
699 new_var = maybe_lookup_decl (var, ctx);
700 if (new_var)
701 return new_var;
704 if (is_global_var (var) || decl_function_context (var) != ctx->cb.src_fn)
705 return var;
707 return error_mark_node;
711 /* Return the parallel region associated with STMT. */
713 /* Debugging dumps for parallel regions. */
714 void dump_omp_region (FILE *, struct omp_region *, int);
715 void debug_omp_region (struct omp_region *);
716 void debug_all_omp_regions (void);
718 /* Dump the parallel region tree rooted at REGION. */
720 void
721 dump_omp_region (FILE *file, struct omp_region *region, int indent)
723 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
724 tree_code_name[region->type]);
726 if (region->inner)
727 dump_omp_region (file, region->inner, indent + 4);
729 if (region->cont)
731 fprintf (file, "%*sbb %d: OMP_CONTINUE\n", indent, "",
732 region->cont->index);
735 if (region->exit)
736 fprintf (file, "%*sbb %d: OMP_RETURN\n", indent, "",
737 region->exit->index);
738 else
739 fprintf (file, "%*s[no exit marker]\n", indent, "");
741 if (region->next)
742 dump_omp_region (file, region->next, indent);
745 void
746 debug_omp_region (struct omp_region *region)
748 dump_omp_region (stderr, region, 0);
751 void
752 debug_all_omp_regions (void)
754 dump_omp_region (stderr, root_omp_region, 0);
758 /* Create a new parallel region starting at STMT inside region PARENT. */
760 struct omp_region *
761 new_omp_region (basic_block bb, enum tree_code type, struct omp_region *parent)
763 struct omp_region *region = xcalloc (1, sizeof (*region));
765 region->outer = parent;
766 region->entry = bb;
767 region->type = type;
769 if (parent)
771 /* This is a nested region. Add it to the list of inner
772 regions in PARENT. */
773 region->next = parent->inner;
774 parent->inner = region;
776 else
778 /* This is a toplevel region. Add it to the list of toplevel
779 regions in ROOT_OMP_REGION. */
780 region->next = root_omp_region;
781 root_omp_region = region;
784 return region;
787 /* Release the memory associated with the region tree rooted at REGION. */
789 static void
790 free_omp_region_1 (struct omp_region *region)
792 struct omp_region *i, *n;
794 for (i = region->inner; i ; i = n)
796 n = i->next;
797 free_omp_region_1 (i);
800 free (region);
803 /* Release the memory for the entire omp region tree. */
805 void
806 free_omp_regions (void)
808 struct omp_region *r, *n;
809 for (r = root_omp_region; r ; r = n)
811 n = r->next;
812 free_omp_region_1 (r);
814 root_omp_region = NULL;
818 /* Create a new context, with OUTER_CTX being the surrounding context. */
820 static omp_context *
821 new_omp_context (tree stmt, omp_context *outer_ctx)
823 omp_context *ctx = XCNEW (omp_context);
825 splay_tree_insert (all_contexts, (splay_tree_key) stmt,
826 (splay_tree_value) ctx);
827 ctx->stmt = stmt;
829 if (outer_ctx)
831 ctx->outer = outer_ctx;
832 ctx->cb = outer_ctx->cb;
833 ctx->cb.block = NULL;
834 ctx->depth = outer_ctx->depth + 1;
836 else
838 ctx->cb.src_fn = current_function_decl;
839 ctx->cb.dst_fn = current_function_decl;
840 ctx->cb.src_node = cgraph_node (current_function_decl);
841 ctx->cb.dst_node = ctx->cb.src_node;
842 ctx->cb.src_cfun = cfun;
843 ctx->cb.copy_decl = omp_copy_decl;
844 ctx->cb.eh_region = -1;
845 ctx->cb.transform_call_graph_edges = CB_CGE_MOVE;
846 ctx->depth = 1;
849 ctx->cb.decl_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
851 return ctx;
854 /* Destroy a omp_context data structures. Called through the splay tree
855 value delete callback. */
857 static void
858 delete_omp_context (splay_tree_value value)
860 omp_context *ctx = (omp_context *) value;
862 splay_tree_delete (ctx->cb.decl_map);
864 if (ctx->field_map)
865 splay_tree_delete (ctx->field_map);
867 /* We hijacked DECL_ABSTRACT_ORIGIN earlier. We need to clear it before
868 it produces corrupt debug information. */
869 if (ctx->record_type)
871 tree t;
872 for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t))
873 DECL_ABSTRACT_ORIGIN (t) = NULL;
876 XDELETE (ctx);
879 /* Fix up RECEIVER_DECL with a type that has been remapped to the child
880 context. */
882 static void
883 fixup_child_record_type (omp_context *ctx)
885 tree f, type = ctx->record_type;
887 /* ??? It isn't sufficient to just call remap_type here, because
888 variably_modified_type_p doesn't work the way we expect for
889 record types. Testing each field for whether it needs remapping
890 and creating a new record by hand works, however. */
891 for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
892 if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
893 break;
894 if (f)
896 tree name, new_fields = NULL;
898 type = lang_hooks.types.make_type (RECORD_TYPE);
899 name = DECL_NAME (TYPE_NAME (ctx->record_type));
900 name = build_decl (TYPE_DECL, name, type);
901 TYPE_NAME (type) = name;
903 for (f = TYPE_FIELDS (ctx->record_type); f ; f = TREE_CHAIN (f))
905 tree new_f = copy_node (f);
906 DECL_CONTEXT (new_f) = type;
907 TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &ctx->cb);
908 TREE_CHAIN (new_f) = new_fields;
909 new_fields = new_f;
911 /* Arrange to be able to look up the receiver field
912 given the sender field. */
913 splay_tree_insert (ctx->field_map, (splay_tree_key) f,
914 (splay_tree_value) new_f);
916 TYPE_FIELDS (type) = nreverse (new_fields);
917 layout_type (type);
920 TREE_TYPE (ctx->receiver_decl) = build_pointer_type (type);
923 /* Instantiate decls as necessary in CTX to satisfy the data sharing
924 specified by CLAUSES. */
926 static void
927 scan_sharing_clauses (tree clauses, omp_context *ctx)
929 tree c, decl;
930 bool scan_array_reductions = false;
932 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
934 bool by_ref;
936 switch (OMP_CLAUSE_CODE (c))
938 case OMP_CLAUSE_PRIVATE:
939 decl = OMP_CLAUSE_DECL (c);
940 if (!is_variable_sized (decl))
941 install_var_local (decl, ctx);
942 break;
944 case OMP_CLAUSE_SHARED:
945 gcc_assert (is_parallel_ctx (ctx));
946 decl = OMP_CLAUSE_DECL (c);
947 gcc_assert (!is_variable_sized (decl));
948 by_ref = use_pointer_for_field (decl, true);
949 /* Global variables don't need to be copied,
950 the receiver side will use them directly. */
951 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
952 break;
953 if (! TREE_READONLY (decl)
954 || TREE_ADDRESSABLE (decl)
955 || by_ref
956 || is_reference (decl))
958 install_var_field (decl, by_ref, ctx);
959 install_var_local (decl, ctx);
960 break;
962 /* We don't need to copy const scalar vars back. */
963 OMP_CLAUSE_SET_CODE (c, OMP_CLAUSE_FIRSTPRIVATE);
964 goto do_private;
966 case OMP_CLAUSE_LASTPRIVATE:
967 /* Let the corresponding firstprivate clause create
968 the variable. */
969 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
970 break;
971 /* FALLTHRU */
973 case OMP_CLAUSE_FIRSTPRIVATE:
974 case OMP_CLAUSE_REDUCTION:
975 decl = OMP_CLAUSE_DECL (c);
976 do_private:
977 if (is_variable_sized (decl))
978 break;
979 else if (is_parallel_ctx (ctx)
980 && ! is_global_var (maybe_lookup_decl_in_outer_ctx (decl,
981 ctx)))
983 by_ref = use_pointer_for_field (decl, false);
984 install_var_field (decl, by_ref, ctx);
986 install_var_local (decl, ctx);
987 break;
989 case OMP_CLAUSE_COPYPRIVATE:
990 if (ctx->outer)
991 scan_omp (&OMP_CLAUSE_DECL (c), ctx->outer);
992 /* FALLTHRU */
994 case OMP_CLAUSE_COPYIN:
995 decl = OMP_CLAUSE_DECL (c);
996 by_ref = use_pointer_for_field (decl, false);
997 install_var_field (decl, by_ref, ctx);
998 break;
1000 case OMP_CLAUSE_DEFAULT:
1001 ctx->default_kind = OMP_CLAUSE_DEFAULT_KIND (c);
1002 break;
1004 case OMP_CLAUSE_IF:
1005 case OMP_CLAUSE_NUM_THREADS:
1006 case OMP_CLAUSE_SCHEDULE:
1007 if (ctx->outer)
1008 scan_omp (&OMP_CLAUSE_OPERAND (c, 0), ctx->outer);
1009 break;
1011 case OMP_CLAUSE_NOWAIT:
1012 case OMP_CLAUSE_ORDERED:
1013 break;
1015 default:
1016 gcc_unreachable ();
1020 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
1022 switch (OMP_CLAUSE_CODE (c))
1024 case OMP_CLAUSE_LASTPRIVATE:
1025 /* Let the corresponding firstprivate clause create
1026 the variable. */
1027 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1028 break;
1029 /* FALLTHRU */
1031 case OMP_CLAUSE_PRIVATE:
1032 case OMP_CLAUSE_FIRSTPRIVATE:
1033 case OMP_CLAUSE_REDUCTION:
1034 decl = OMP_CLAUSE_DECL (c);
1035 if (is_variable_sized (decl))
1036 install_var_local (decl, ctx);
1037 fixup_remapped_decl (decl, ctx,
1038 OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
1039 && OMP_CLAUSE_PRIVATE_DEBUG (c));
1040 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
1041 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1042 scan_array_reductions = true;
1043 break;
1045 case OMP_CLAUSE_SHARED:
1046 decl = OMP_CLAUSE_DECL (c);
1047 if (! is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
1048 fixup_remapped_decl (decl, ctx, false);
1049 break;
1051 case OMP_CLAUSE_COPYPRIVATE:
1052 case OMP_CLAUSE_COPYIN:
1053 case OMP_CLAUSE_DEFAULT:
1054 case OMP_CLAUSE_IF:
1055 case OMP_CLAUSE_NUM_THREADS:
1056 case OMP_CLAUSE_SCHEDULE:
1057 case OMP_CLAUSE_NOWAIT:
1058 case OMP_CLAUSE_ORDERED:
1059 break;
1061 default:
1062 gcc_unreachable ();
1066 if (scan_array_reductions)
1067 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
1068 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
1069 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1071 scan_omp (&OMP_CLAUSE_REDUCTION_INIT (c), ctx);
1072 scan_omp (&OMP_CLAUSE_REDUCTION_MERGE (c), ctx);
1076 /* Create a new name for omp child function. Returns an identifier. */
1078 static GTY(()) unsigned int tmp_ompfn_id_num;
1080 static tree
1081 create_omp_child_function_name (void)
1083 tree name = DECL_ASSEMBLER_NAME (current_function_decl);
1084 size_t len = IDENTIFIER_LENGTH (name);
1085 char *tmp_name, *prefix;
1087 prefix = alloca (len + sizeof ("_omp_fn"));
1088 memcpy (prefix, IDENTIFIER_POINTER (name), len);
1089 strcpy (prefix + len, "_omp_fn");
1090 #ifndef NO_DOT_IN_LABEL
1091 prefix[len] = '.';
1092 #elif !defined NO_DOLLAR_IN_LABEL
1093 prefix[len] = '$';
1094 #endif
1095 ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, tmp_ompfn_id_num++);
1096 return get_identifier (tmp_name);
1099 /* Build a decl for the omp child function. It'll not contain a body
1100 yet, just the bare decl. */
1102 static void
1103 create_omp_child_function (omp_context *ctx)
1105 tree decl, type, name, t;
1107 name = create_omp_child_function_name ();
1108 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1110 decl = build_decl (FUNCTION_DECL, name, type);
1111 decl = lang_hooks.decls.pushdecl (decl);
1113 ctx->cb.dst_fn = decl;
1115 TREE_STATIC (decl) = 1;
1116 TREE_USED (decl) = 1;
1117 DECL_ARTIFICIAL (decl) = 1;
1118 DECL_IGNORED_P (decl) = 0;
1119 TREE_PUBLIC (decl) = 0;
1120 DECL_UNINLINABLE (decl) = 1;
1121 DECL_EXTERNAL (decl) = 0;
1122 DECL_CONTEXT (decl) = NULL_TREE;
1123 DECL_INITIAL (decl) = make_node (BLOCK);
1125 t = build_decl (RESULT_DECL, NULL_TREE, void_type_node);
1126 DECL_ARTIFICIAL (t) = 1;
1127 DECL_IGNORED_P (t) = 1;
1128 DECL_RESULT (decl) = t;
1130 t = build_decl (PARM_DECL, get_identifier (".omp_data_i"), ptr_type_node);
1131 DECL_ARTIFICIAL (t) = 1;
1132 DECL_ARG_TYPE (t) = ptr_type_node;
1133 DECL_CONTEXT (t) = current_function_decl;
1134 TREE_USED (t) = 1;
1135 DECL_ARGUMENTS (decl) = t;
1136 ctx->receiver_decl = t;
1138 /* Allocate memory for the function structure. The call to
1139 allocate_struct_function clobbers CFUN, so we need to restore
1140 it afterward. */
1141 allocate_struct_function (decl);
1142 DECL_SOURCE_LOCATION (decl) = EXPR_LOCATION (ctx->stmt);
1143 cfun->function_end_locus = EXPR_LOCATION (ctx->stmt);
1144 cfun = ctx->cb.src_cfun;
1148 /* Scan an OpenMP parallel directive. */
1150 static void
1151 scan_omp_parallel (tree *stmt_p, omp_context *outer_ctx)
1153 omp_context *ctx;
1154 tree name;
1156 /* Ignore parallel directives with empty bodies, unless there
1157 are copyin clauses. */
1158 if (optimize > 0
1159 && empty_body_p (OMP_PARALLEL_BODY (*stmt_p))
1160 && find_omp_clause (OMP_CLAUSES (*stmt_p), OMP_CLAUSE_COPYIN) == NULL)
1162 *stmt_p = build_empty_stmt ();
1163 return;
1166 ctx = new_omp_context (*stmt_p, outer_ctx);
1167 if (parallel_nesting_level > 1)
1168 ctx->is_nested = true;
1169 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
1170 ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
1171 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
1172 name = create_tmp_var_name (".omp_data_s");
1173 name = build_decl (TYPE_DECL, name, ctx->record_type);
1174 TYPE_NAME (ctx->record_type) = name;
1175 create_omp_child_function (ctx);
1176 OMP_PARALLEL_FN (*stmt_p) = ctx->cb.dst_fn;
1178 scan_sharing_clauses (OMP_PARALLEL_CLAUSES (*stmt_p), ctx);
1179 scan_omp (&OMP_PARALLEL_BODY (*stmt_p), ctx);
1181 if (TYPE_FIELDS (ctx->record_type) == NULL)
1182 ctx->record_type = ctx->receiver_decl = NULL;
1183 else
1185 layout_type (ctx->record_type);
1186 fixup_child_record_type (ctx);
1191 /* Scan an OpenMP loop directive. */
1193 static void
1194 scan_omp_for (tree *stmt_p, omp_context *outer_ctx)
1196 omp_context *ctx;
1197 tree stmt;
1199 stmt = *stmt_p;
1200 ctx = new_omp_context (stmt, outer_ctx);
1202 scan_sharing_clauses (OMP_FOR_CLAUSES (stmt), ctx);
1204 scan_omp (&OMP_FOR_PRE_BODY (stmt), ctx);
1205 scan_omp (&OMP_FOR_INIT (stmt), ctx);
1206 scan_omp (&OMP_FOR_COND (stmt), ctx);
1207 scan_omp (&OMP_FOR_INCR (stmt), ctx);
1208 scan_omp (&OMP_FOR_BODY (stmt), ctx);
1211 /* Scan an OpenMP sections directive. */
1213 static void
1214 scan_omp_sections (tree *stmt_p, omp_context *outer_ctx)
1216 tree stmt;
1217 omp_context *ctx;
1219 stmt = *stmt_p;
1220 ctx = new_omp_context (stmt, outer_ctx);
1221 scan_sharing_clauses (OMP_SECTIONS_CLAUSES (stmt), ctx);
1222 scan_omp (&OMP_SECTIONS_BODY (stmt), ctx);
1225 /* Scan an OpenMP single directive. */
1227 static void
1228 scan_omp_single (tree *stmt_p, omp_context *outer_ctx)
1230 tree stmt = *stmt_p;
1231 omp_context *ctx;
1232 tree name;
1234 ctx = new_omp_context (stmt, outer_ctx);
1235 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
1236 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
1237 name = create_tmp_var_name (".omp_copy_s");
1238 name = build_decl (TYPE_DECL, name, ctx->record_type);
1239 TYPE_NAME (ctx->record_type) = name;
1241 scan_sharing_clauses (OMP_SINGLE_CLAUSES (stmt), ctx);
1242 scan_omp (&OMP_SINGLE_BODY (stmt), ctx);
1244 if (TYPE_FIELDS (ctx->record_type) == NULL)
1245 ctx->record_type = NULL;
1246 else
1247 layout_type (ctx->record_type);
1251 /* Check OpenMP nesting restrictions. */
1252 static void
1253 check_omp_nesting_restrictions (tree t, omp_context *ctx)
1255 switch (TREE_CODE (t))
1257 case OMP_FOR:
1258 case OMP_SECTIONS:
1259 case OMP_SINGLE:
1260 for (; ctx != NULL; ctx = ctx->outer)
1261 switch (TREE_CODE (ctx->stmt))
1263 case OMP_FOR:
1264 case OMP_SECTIONS:
1265 case OMP_SINGLE:
1266 case OMP_ORDERED:
1267 case OMP_MASTER:
1268 warning (0, "work-sharing region may not be closely nested inside "
1269 "of work-sharing, critical, ordered or master region");
1270 return;
1271 case OMP_PARALLEL:
1272 return;
1273 default:
1274 break;
1276 break;
1277 case OMP_MASTER:
1278 for (; ctx != NULL; ctx = ctx->outer)
1279 switch (TREE_CODE (ctx->stmt))
1281 case OMP_FOR:
1282 case OMP_SECTIONS:
1283 case OMP_SINGLE:
1284 warning (0, "master region may not be closely nested inside "
1285 "of work-sharing region");
1286 return;
1287 case OMP_PARALLEL:
1288 return;
1289 default:
1290 break;
1292 break;
1293 case OMP_ORDERED:
1294 for (; ctx != NULL; ctx = ctx->outer)
1295 switch (TREE_CODE (ctx->stmt))
1297 case OMP_CRITICAL:
1298 warning (0, "ordered region may not be closely nested inside "
1299 "of critical region");
1300 return;
1301 case OMP_FOR:
1302 if (find_omp_clause (OMP_CLAUSES (ctx->stmt),
1303 OMP_CLAUSE_ORDERED) == NULL)
1304 warning (0, "ordered region must be closely nested inside "
1305 "a loop region with an ordered clause");
1306 return;
1307 case OMP_PARALLEL:
1308 return;
1309 default:
1310 break;
1312 break;
1313 case OMP_CRITICAL:
1314 for (; ctx != NULL; ctx = ctx->outer)
1315 if (TREE_CODE (ctx->stmt) == OMP_CRITICAL
1316 && OMP_CRITICAL_NAME (t) == OMP_CRITICAL_NAME (ctx->stmt))
1318 warning (0, "critical region may not be nested inside a critical "
1319 "region with the same name");
1320 return;
1322 break;
1323 default:
1324 break;
1329 /* Callback for walk_stmts used to scan for OpenMP directives at TP. */
1331 static tree
1332 scan_omp_1 (tree *tp, int *walk_subtrees, void *data)
1334 struct walk_stmt_info *wi = data;
1335 omp_context *ctx = wi->info;
1336 tree t = *tp;
1338 if (EXPR_HAS_LOCATION (t))
1339 input_location = EXPR_LOCATION (t);
1341 /* Check the OpenMP nesting restrictions. */
1342 if (OMP_DIRECTIVE_P (t) && ctx != NULL)
1343 check_omp_nesting_restrictions (t, ctx);
1345 *walk_subtrees = 0;
1346 switch (TREE_CODE (t))
1348 case OMP_PARALLEL:
1349 parallel_nesting_level++;
1350 scan_omp_parallel (tp, ctx);
1351 parallel_nesting_level--;
1352 break;
1354 case OMP_FOR:
1355 scan_omp_for (tp, ctx);
1356 break;
1358 case OMP_SECTIONS:
1359 scan_omp_sections (tp, ctx);
1360 break;
1362 case OMP_SINGLE:
1363 scan_omp_single (tp, ctx);
1364 break;
1366 case OMP_SECTION:
1367 case OMP_MASTER:
1368 case OMP_ORDERED:
1369 case OMP_CRITICAL:
1370 ctx = new_omp_context (*tp, ctx);
1371 scan_omp (&OMP_BODY (*tp), ctx);
1372 break;
1374 case BIND_EXPR:
1376 tree var;
1377 *walk_subtrees = 1;
1379 for (var = BIND_EXPR_VARS (t); var ; var = TREE_CHAIN (var))
1380 insert_decl_map (&ctx->cb, var, var);
1382 break;
1384 case VAR_DECL:
1385 case PARM_DECL:
1386 case LABEL_DECL:
1387 case RESULT_DECL:
1388 if (ctx)
1389 *tp = remap_decl (t, &ctx->cb);
1390 break;
1392 default:
1393 if (ctx && TYPE_P (t))
1394 *tp = remap_type (t, &ctx->cb);
1395 else if (!DECL_P (t))
1396 *walk_subtrees = 1;
1397 break;
1400 return NULL_TREE;
1404 /* Scan all the statements starting at STMT_P. CTX contains context
1405 information about the OpenMP directives and clauses found during
1406 the scan. */
1408 static void
1409 scan_omp (tree *stmt_p, omp_context *ctx)
1411 location_t saved_location;
1412 struct walk_stmt_info wi;
1414 memset (&wi, 0, sizeof (wi));
1415 wi.callback = scan_omp_1;
1416 wi.info = ctx;
1417 wi.want_bind_expr = (ctx != NULL);
1418 wi.want_locations = true;
1420 saved_location = input_location;
1421 walk_stmts (&wi, stmt_p);
1422 input_location = saved_location;
1425 /* Re-gimplification and code generation routines. */
1427 /* Build a call to GOMP_barrier. */
1429 static void
1430 build_omp_barrier (tree *stmt_list)
1432 tree t;
1434 t = built_in_decls[BUILT_IN_GOMP_BARRIER];
1435 t = build_function_call_expr (t, NULL);
1436 gimplify_and_add (t, stmt_list);
1439 /* If a context was created for STMT when it was scanned, return it. */
1441 static omp_context *
1442 maybe_lookup_ctx (tree stmt)
1444 splay_tree_node n;
1445 n = splay_tree_lookup (all_contexts, (splay_tree_key) stmt);
1446 return n ? (omp_context *) n->value : NULL;
1450 /* Find the mapping for DECL in CTX or the immediately enclosing
1451 context that has a mapping for DECL.
1453 If CTX is a nested parallel directive, we may have to use the decl
1454 mappings created in CTX's parent context. Suppose that we have the
1455 following parallel nesting (variable UIDs showed for clarity):
1457 iD.1562 = 0;
1458 #omp parallel shared(iD.1562) -> outer parallel
1459 iD.1562 = iD.1562 + 1;
1461 #omp parallel shared (iD.1562) -> inner parallel
1462 iD.1562 = iD.1562 - 1;
1464 Each parallel structure will create a distinct .omp_data_s structure
1465 for copying iD.1562 in/out of the directive:
1467 outer parallel .omp_data_s.1.i -> iD.1562
1468 inner parallel .omp_data_s.2.i -> iD.1562
1470 A shared variable mapping will produce a copy-out operation before
1471 the parallel directive and a copy-in operation after it. So, in
1472 this case we would have:
1474 iD.1562 = 0;
1475 .omp_data_o.1.i = iD.1562;
1476 #omp parallel shared(iD.1562) -> outer parallel
1477 .omp_data_i.1 = &.omp_data_o.1
1478 .omp_data_i.1->i = .omp_data_i.1->i + 1;
1480 .omp_data_o.2.i = iD.1562; -> **
1481 #omp parallel shared(iD.1562) -> inner parallel
1482 .omp_data_i.2 = &.omp_data_o.2
1483 .omp_data_i.2->i = .omp_data_i.2->i - 1;
1486 ** This is a problem. The symbol iD.1562 cannot be referenced
1487 inside the body of the outer parallel region. But since we are
1488 emitting this copy operation while expanding the inner parallel
1489 directive, we need to access the CTX structure of the outer
1490 parallel directive to get the correct mapping:
1492 .omp_data_o.2.i = .omp_data_i.1->i
1494 Since there may be other workshare or parallel directives enclosing
1495 the parallel directive, it may be necessary to walk up the context
1496 parent chain. This is not a problem in general because nested
1497 parallelism happens only rarely. */
1499 static tree
1500 lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
1502 tree t;
1503 omp_context *up;
1505 gcc_assert (ctx->is_nested);
1507 for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
1508 t = maybe_lookup_decl (decl, up);
1510 gcc_assert (t || is_global_var (decl));
1512 return t ? t : decl;
1516 /* Similar to lookup_decl_in_outer_ctx, but return DECL if not found
1517 in outer contexts. */
1519 static tree
1520 maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
1522 tree t = NULL;
1523 omp_context *up;
1525 if (ctx->is_nested)
1526 for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
1527 t = maybe_lookup_decl (decl, up);
1529 return t ? t : decl;
1533 /* Construct the initialization value for reduction CLAUSE. */
1535 tree
1536 omp_reduction_init (tree clause, tree type)
1538 switch (OMP_CLAUSE_REDUCTION_CODE (clause))
1540 case PLUS_EXPR:
1541 case MINUS_EXPR:
1542 case BIT_IOR_EXPR:
1543 case BIT_XOR_EXPR:
1544 case TRUTH_OR_EXPR:
1545 case TRUTH_ORIF_EXPR:
1546 case TRUTH_XOR_EXPR:
1547 case NE_EXPR:
1548 return fold_convert (type, integer_zero_node);
1550 case MULT_EXPR:
1551 case TRUTH_AND_EXPR:
1552 case TRUTH_ANDIF_EXPR:
1553 case EQ_EXPR:
1554 return fold_convert (type, integer_one_node);
1556 case BIT_AND_EXPR:
1557 return fold_convert (type, integer_minus_one_node);
1559 case MAX_EXPR:
1560 if (SCALAR_FLOAT_TYPE_P (type))
1562 REAL_VALUE_TYPE max, min;
1563 if (HONOR_INFINITIES (TYPE_MODE (type)))
1565 real_inf (&max);
1566 real_arithmetic (&min, NEGATE_EXPR, &max, NULL);
1568 else
1569 real_maxval (&min, 1, TYPE_MODE (type));
1570 return build_real (type, min);
1572 else
1574 gcc_assert (INTEGRAL_TYPE_P (type));
1575 return TYPE_MIN_VALUE (type);
1578 case MIN_EXPR:
1579 if (SCALAR_FLOAT_TYPE_P (type))
1581 REAL_VALUE_TYPE max;
1582 if (HONOR_INFINITIES (TYPE_MODE (type)))
1583 real_inf (&max);
1584 else
1585 real_maxval (&max, 0, TYPE_MODE (type));
1586 return build_real (type, max);
1588 else
1590 gcc_assert (INTEGRAL_TYPE_P (type));
1591 return TYPE_MAX_VALUE (type);
1594 default:
1595 gcc_unreachable ();
1599 /* Generate code to implement the input clauses, FIRSTPRIVATE and COPYIN,
1600 from the receiver (aka child) side and initializers for REFERENCE_TYPE
1601 private variables. Initialization statements go in ILIST, while calls
1602 to destructors go in DLIST. */
1604 static void
1605 lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist,
1606 omp_context *ctx)
1608 tree_stmt_iterator diter;
1609 tree c, dtor, copyin_seq, x, args, ptr;
1610 bool copyin_by_ref = false;
1611 bool lastprivate_firstprivate = false;
1612 int pass;
1614 *dlist = alloc_stmt_list ();
1615 diter = tsi_start (*dlist);
1616 copyin_seq = NULL;
1618 /* Do all the fixed sized types in the first pass, and the variable sized
1619 types in the second pass. This makes sure that the scalar arguments to
1620 the variable sized types are processed before we use them in the
1621 variable sized operations. */
1622 for (pass = 0; pass < 2; ++pass)
1624 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1626 enum omp_clause_code c_kind = OMP_CLAUSE_CODE (c);
1627 tree var, new_var;
1628 bool by_ref;
1630 switch (c_kind)
1632 case OMP_CLAUSE_PRIVATE:
1633 if (OMP_CLAUSE_PRIVATE_DEBUG (c))
1634 continue;
1635 break;
1636 case OMP_CLAUSE_SHARED:
1637 if (maybe_lookup_decl (OMP_CLAUSE_DECL (c), ctx) == NULL)
1639 gcc_assert (is_global_var (OMP_CLAUSE_DECL (c)));
1640 continue;
1642 case OMP_CLAUSE_FIRSTPRIVATE:
1643 case OMP_CLAUSE_COPYIN:
1644 case OMP_CLAUSE_REDUCTION:
1645 break;
1646 case OMP_CLAUSE_LASTPRIVATE:
1647 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1649 lastprivate_firstprivate = true;
1650 if (pass != 0)
1651 continue;
1653 break;
1654 default:
1655 continue;
1658 new_var = var = OMP_CLAUSE_DECL (c);
1659 if (c_kind != OMP_CLAUSE_COPYIN)
1660 new_var = lookup_decl (var, ctx);
1662 if (c_kind == OMP_CLAUSE_SHARED || c_kind == OMP_CLAUSE_COPYIN)
1664 if (pass != 0)
1665 continue;
1667 else if (is_variable_sized (var))
1669 /* For variable sized types, we need to allocate the
1670 actual storage here. Call alloca and store the
1671 result in the pointer decl that we created elsewhere. */
1672 if (pass == 0)
1673 continue;
1675 ptr = DECL_VALUE_EXPR (new_var);
1676 gcc_assert (TREE_CODE (ptr) == INDIRECT_REF);
1677 ptr = TREE_OPERAND (ptr, 0);
1678 gcc_assert (DECL_P (ptr));
1680 x = TYPE_SIZE_UNIT (TREE_TYPE (new_var));
1681 args = tree_cons (NULL, x, NULL);
1682 x = built_in_decls[BUILT_IN_ALLOCA];
1683 x = build_function_call_expr (x, args);
1684 x = fold_convert (TREE_TYPE (ptr), x);
1685 x = build2 (MODIFY_EXPR, void_type_node, ptr, x);
1686 gimplify_and_add (x, ilist);
1688 else if (is_reference (var))
1690 /* For references that are being privatized for Fortran,
1691 allocate new backing storage for the new pointer
1692 variable. This allows us to avoid changing all the
1693 code that expects a pointer to something that expects
1694 a direct variable. Note that this doesn't apply to
1695 C++, since reference types are disallowed in data
1696 sharing clauses there, except for NRV optimized
1697 return values. */
1698 if (pass == 0)
1699 continue;
1701 x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var)));
1702 if (TREE_CONSTANT (x))
1704 const char *name = NULL;
1705 if (DECL_NAME (var))
1706 name = IDENTIFIER_POINTER (DECL_NAME (new_var));
1708 x = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_var)),
1709 name);
1710 gimple_add_tmp_var (x);
1711 x = build_fold_addr_expr_with_type (x, TREE_TYPE (new_var));
1713 else
1715 args = tree_cons (NULL, x, NULL);
1716 x = built_in_decls[BUILT_IN_ALLOCA];
1717 x = build_function_call_expr (x, args);
1718 x = fold_convert (TREE_TYPE (new_var), x);
1721 x = build2 (MODIFY_EXPR, void_type_node, new_var, x);
1722 gimplify_and_add (x, ilist);
1724 new_var = build_fold_indirect_ref (new_var);
1726 else if (c_kind == OMP_CLAUSE_REDUCTION
1727 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1729 if (pass == 0)
1730 continue;
1732 else if (pass != 0)
1733 continue;
1735 switch (OMP_CLAUSE_CODE (c))
1737 case OMP_CLAUSE_SHARED:
1738 /* Shared global vars are just accessed directly. */
1739 if (is_global_var (new_var))
1740 break;
1741 /* Set up the DECL_VALUE_EXPR for shared variables now. This
1742 needs to be delayed until after fixup_child_record_type so
1743 that we get the correct type during the dereference. */
1744 by_ref = use_pointer_for_field (var, true);
1745 x = build_receiver_ref (var, by_ref, ctx);
1746 SET_DECL_VALUE_EXPR (new_var, x);
1747 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
1749 /* ??? If VAR is not passed by reference, and the variable
1750 hasn't been initialized yet, then we'll get a warning for
1751 the store into the omp_data_s structure. Ideally, we'd be
1752 able to notice this and not store anything at all, but
1753 we're generating code too early. Suppress the warning. */
1754 if (!by_ref)
1755 TREE_NO_WARNING (var) = 1;
1756 break;
1758 case OMP_CLAUSE_LASTPRIVATE:
1759 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1760 break;
1761 /* FALLTHRU */
1763 case OMP_CLAUSE_PRIVATE:
1764 x = lang_hooks.decls.omp_clause_default_ctor (c, new_var);
1765 if (x)
1766 gimplify_and_add (x, ilist);
1767 /* FALLTHRU */
1769 do_dtor:
1770 x = lang_hooks.decls.omp_clause_dtor (c, new_var);
1771 if (x)
1773 dtor = x;
1774 gimplify_stmt (&dtor);
1775 tsi_link_before (&diter, dtor, TSI_SAME_STMT);
1777 break;
1779 case OMP_CLAUSE_FIRSTPRIVATE:
1780 x = build_outer_var_ref (var, ctx);
1781 x = lang_hooks.decls.omp_clause_copy_ctor (c, new_var, x);
1782 gimplify_and_add (x, ilist);
1783 goto do_dtor;
1784 break;
1786 case OMP_CLAUSE_COPYIN:
1787 by_ref = use_pointer_for_field (var, false);
1788 x = build_receiver_ref (var, by_ref, ctx);
1789 x = lang_hooks.decls.omp_clause_assign_op (c, new_var, x);
1790 append_to_statement_list (x, &copyin_seq);
1791 copyin_by_ref |= by_ref;
1792 break;
1794 case OMP_CLAUSE_REDUCTION:
1795 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1797 gimplify_and_add (OMP_CLAUSE_REDUCTION_INIT (c), ilist);
1798 OMP_CLAUSE_REDUCTION_INIT (c) = NULL;
1800 else
1802 x = omp_reduction_init (c, TREE_TYPE (new_var));
1803 gcc_assert (TREE_CODE (TREE_TYPE (new_var)) != ARRAY_TYPE);
1804 x = build2 (MODIFY_EXPR, void_type_node, new_var, x);
1805 gimplify_and_add (x, ilist);
1807 break;
1809 default:
1810 gcc_unreachable ();
1815 /* The copyin sequence is not to be executed by the main thread, since
1816 that would result in self-copies. Perhaps not visible to scalars,
1817 but it certainly is to C++ operator=. */
1818 if (copyin_seq)
1820 x = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
1821 x = build_function_call_expr (x, NULL);
1822 x = build2 (NE_EXPR, boolean_type_node, x,
1823 build_int_cst (TREE_TYPE (x), 0));
1824 x = build3 (COND_EXPR, void_type_node, x, copyin_seq, NULL);
1825 gimplify_and_add (x, ilist);
1828 /* If any copyin variable is passed by reference, we must ensure the
1829 master thread doesn't modify it before it is copied over in all
1830 threads. Similarly for variables in both firstprivate and
1831 lastprivate clauses we need to ensure the lastprivate copying
1832 happens after firstprivate copying in all threads. */
1833 if (copyin_by_ref || lastprivate_firstprivate)
1834 build_omp_barrier (ilist);
1838 /* Generate code to implement the LASTPRIVATE clauses. This is used for
1839 both parallel and workshare constructs. PREDICATE may be NULL if it's
1840 always true. */
1842 static void
1843 lower_lastprivate_clauses (tree clauses, tree predicate, tree *stmt_list,
1844 omp_context *ctx)
1846 tree sub_list, x, c;
1848 /* Early exit if there are no lastprivate clauses. */
1849 clauses = find_omp_clause (clauses, OMP_CLAUSE_LASTPRIVATE);
1850 if (clauses == NULL)
1852 /* If this was a workshare clause, see if it had been combined
1853 with its parallel. In that case, look for the clauses on the
1854 parallel statement itself. */
1855 if (is_parallel_ctx (ctx))
1856 return;
1858 ctx = ctx->outer;
1859 if (ctx == NULL || !is_parallel_ctx (ctx))
1860 return;
1862 clauses = find_omp_clause (OMP_PARALLEL_CLAUSES (ctx->stmt),
1863 OMP_CLAUSE_LASTPRIVATE);
1864 if (clauses == NULL)
1865 return;
1868 sub_list = alloc_stmt_list ();
1870 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1872 tree var, new_var;
1874 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_LASTPRIVATE)
1875 continue;
1877 var = OMP_CLAUSE_DECL (c);
1878 new_var = lookup_decl (var, ctx);
1880 x = build_outer_var_ref (var, ctx);
1881 if (is_reference (var))
1882 new_var = build_fold_indirect_ref (new_var);
1883 x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var);
1884 append_to_statement_list (x, &sub_list);
1887 if (predicate)
1888 x = build3 (COND_EXPR, void_type_node, predicate, sub_list, NULL);
1889 else
1890 x = sub_list;
1892 gimplify_and_add (x, stmt_list);
1896 /* Generate code to implement the REDUCTION clauses. */
1898 static void
1899 lower_reduction_clauses (tree clauses, tree *stmt_list, omp_context *ctx)
1901 tree sub_list = NULL, x, c;
1902 int count = 0;
1904 /* First see if there is exactly one reduction clause. Use OMP_ATOMIC
1905 update in that case, otherwise use a lock. */
1906 for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
1907 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
1909 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1911 /* Never use OMP_ATOMIC for array reductions. */
1912 count = -1;
1913 break;
1915 count++;
1918 if (count == 0)
1919 return;
1921 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1923 tree var, ref, new_var;
1924 enum tree_code code;
1926 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
1927 continue;
1929 var = OMP_CLAUSE_DECL (c);
1930 new_var = lookup_decl (var, ctx);
1931 if (is_reference (var))
1932 new_var = build_fold_indirect_ref (new_var);
1933 ref = build_outer_var_ref (var, ctx);
1934 code = OMP_CLAUSE_REDUCTION_CODE (c);
1936 /* reduction(-:var) sums up the partial results, so it acts
1937 identically to reduction(+:var). */
1938 if (code == MINUS_EXPR)
1939 code = PLUS_EXPR;
1941 if (count == 1)
1943 tree addr = build_fold_addr_expr (ref);
1945 addr = save_expr (addr);
1946 ref = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (addr)), addr);
1947 x = fold_build2 (code, TREE_TYPE (ref), ref, new_var);
1948 x = build2 (OMP_ATOMIC, void_type_node, addr, x);
1949 gimplify_and_add (x, stmt_list);
1950 return;
1953 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1955 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
1957 if (is_reference (var))
1958 ref = build_fold_addr_expr (ref);
1959 SET_DECL_VALUE_EXPR (placeholder, ref);
1960 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
1961 gimplify_and_add (OMP_CLAUSE_REDUCTION_MERGE (c), &sub_list);
1962 OMP_CLAUSE_REDUCTION_MERGE (c) = NULL;
1963 OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
1965 else
1967 x = build2 (code, TREE_TYPE (ref), ref, new_var);
1968 ref = build_outer_var_ref (var, ctx);
1969 x = build2 (MODIFY_EXPR, void_type_node, ref, x);
1970 append_to_statement_list (x, &sub_list);
1974 x = built_in_decls[BUILT_IN_GOMP_ATOMIC_START];
1975 x = build_function_call_expr (x, NULL);
1976 gimplify_and_add (x, stmt_list);
1978 gimplify_and_add (sub_list, stmt_list);
1980 x = built_in_decls[BUILT_IN_GOMP_ATOMIC_END];
1981 x = build_function_call_expr (x, NULL);
1982 gimplify_and_add (x, stmt_list);
1986 /* Generate code to implement the COPYPRIVATE clauses. */
1988 static void
1989 lower_copyprivate_clauses (tree clauses, tree *slist, tree *rlist,
1990 omp_context *ctx)
1992 tree c;
1994 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1996 tree var, ref, x;
1997 bool by_ref;
1999 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYPRIVATE)
2000 continue;
2002 var = OMP_CLAUSE_DECL (c);
2003 by_ref = use_pointer_for_field (var, false);
2005 ref = build_sender_ref (var, ctx);
2006 x = (ctx->is_nested) ? lookup_decl_in_outer_ctx (var, ctx) : var;
2007 x = by_ref ? build_fold_addr_expr (x) : x;
2008 x = build2 (MODIFY_EXPR, void_type_node, ref, x);
2009 gimplify_and_add (x, slist);
2011 ref = build_receiver_ref (var, by_ref, ctx);
2012 if (is_reference (var))
2014 ref = build_fold_indirect_ref (ref);
2015 var = build_fold_indirect_ref (var);
2017 x = lang_hooks.decls.omp_clause_assign_op (c, var, ref);
2018 gimplify_and_add (x, rlist);
2023 /* Generate code to implement the clauses, FIRSTPRIVATE, COPYIN, LASTPRIVATE,
2024 and REDUCTION from the sender (aka parent) side. */
2026 static void
2027 lower_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx)
2029 tree c;
2031 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
2033 tree val, ref, x, var;
2034 bool by_ref, do_in = false, do_out = false;
2036 switch (OMP_CLAUSE_CODE (c))
2038 case OMP_CLAUSE_FIRSTPRIVATE:
2039 case OMP_CLAUSE_COPYIN:
2040 case OMP_CLAUSE_LASTPRIVATE:
2041 case OMP_CLAUSE_REDUCTION:
2042 break;
2043 default:
2044 continue;
2047 var = val = OMP_CLAUSE_DECL (c);
2048 if (ctx->is_nested)
2049 var = lookup_decl_in_outer_ctx (val, ctx);
2051 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN
2052 && is_global_var (var))
2053 continue;
2054 if (is_variable_sized (val))
2055 continue;
2056 by_ref = use_pointer_for_field (val, false);
2058 switch (OMP_CLAUSE_CODE (c))
2060 case OMP_CLAUSE_FIRSTPRIVATE:
2061 case OMP_CLAUSE_COPYIN:
2062 do_in = true;
2063 break;
2065 case OMP_CLAUSE_LASTPRIVATE:
2066 if (by_ref || is_reference (val))
2068 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
2069 continue;
2070 do_in = true;
2072 else
2073 do_out = true;
2074 break;
2076 case OMP_CLAUSE_REDUCTION:
2077 do_in = true;
2078 do_out = !(by_ref || is_reference (val));
2079 break;
2081 default:
2082 gcc_unreachable ();
2085 if (do_in)
2087 ref = build_sender_ref (val, ctx);
2088 x = by_ref ? build_fold_addr_expr (var) : var;
2089 x = build2 (MODIFY_EXPR, void_type_node, ref, x);
2090 gimplify_and_add (x, ilist);
2093 if (do_out)
2095 ref = build_sender_ref (val, ctx);
2096 x = build2 (MODIFY_EXPR, void_type_node, var, ref);
2097 gimplify_and_add (x, olist);
2102 /* Generate code to implement SHARED from the sender (aka parent) side.
2103 This is trickier, since OMP_PARALLEL_CLAUSES doesn't list things that
2104 got automatically shared. */
2106 static void
2107 lower_send_shared_vars (tree *ilist, tree *olist, omp_context *ctx)
2109 tree var, ovar, nvar, f, x;
2111 if (ctx->record_type == NULL)
2112 return;
2114 for (f = TYPE_FIELDS (ctx->record_type); f ; f = TREE_CHAIN (f))
2116 ovar = DECL_ABSTRACT_ORIGIN (f);
2117 nvar = maybe_lookup_decl (ovar, ctx);
2118 if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar))
2119 continue;
2121 var = ovar;
2123 /* If CTX is a nested parallel directive. Find the immediately
2124 enclosing parallel or workshare construct that contains a
2125 mapping for OVAR. */
2126 if (ctx->is_nested)
2127 var = lookup_decl_in_outer_ctx (ovar, ctx);
2129 if (use_pointer_for_field (ovar, true))
2131 x = build_sender_ref (ovar, ctx);
2132 var = build_fold_addr_expr (var);
2133 x = build2 (MODIFY_EXPR, void_type_node, x, var);
2134 gimplify_and_add (x, ilist);
2136 else
2138 x = build_sender_ref (ovar, ctx);
2139 x = build2 (MODIFY_EXPR, void_type_node, x, var);
2140 gimplify_and_add (x, ilist);
2142 x = build_sender_ref (ovar, ctx);
2143 x = build2 (MODIFY_EXPR, void_type_node, var, x);
2144 gimplify_and_add (x, olist);
2149 /* Build the function calls to GOMP_parallel_start etc to actually
2150 generate the parallel operation. REGION is the parallel region
2151 being expanded. BB is the block where to insert the code. WS_ARGS
2152 will be set if this is a call to a combined parallel+workshare
2153 construct, it contains the list of additional arguments needed by
2154 the workshare construct. */
2156 static void
2157 expand_parallel_call (struct omp_region *region, basic_block bb,
2158 tree entry_stmt, tree ws_args)
2160 tree t, args, val, cond, c, list, clauses;
2161 block_stmt_iterator si;
2162 int start_ix;
2164 clauses = OMP_PARALLEL_CLAUSES (entry_stmt);
2165 push_gimplify_context ();
2167 /* Determine what flavor of GOMP_parallel_start we will be
2168 emitting. */
2169 start_ix = BUILT_IN_GOMP_PARALLEL_START;
2170 if (is_combined_parallel (region))
2172 switch (region->inner->type)
2174 case OMP_FOR:
2175 start_ix = BUILT_IN_GOMP_PARALLEL_LOOP_STATIC_START
2176 + region->inner->sched_kind;
2177 break;
2178 case OMP_SECTIONS:
2179 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS_START;
2180 break;
2181 default:
2182 gcc_unreachable ();
2186 /* By default, the value of NUM_THREADS is zero (selected at run time)
2187 and there is no conditional. */
2188 cond = NULL_TREE;
2189 val = build_int_cst (unsigned_type_node, 0);
2191 c = find_omp_clause (clauses, OMP_CLAUSE_IF);
2192 if (c)
2193 cond = OMP_CLAUSE_IF_EXPR (c);
2195 c = find_omp_clause (clauses, OMP_CLAUSE_NUM_THREADS);
2196 if (c)
2197 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
2199 /* Ensure 'val' is of the correct type. */
2200 val = fold_convert (unsigned_type_node, val);
2202 /* If we found the clause 'if (cond)', build either
2203 (cond != 0) or (cond ? val : 1u). */
2204 if (cond)
2206 block_stmt_iterator si;
2208 cond = gimple_boolify (cond);
2210 if (integer_zerop (val))
2211 val = build2 (EQ_EXPR, unsigned_type_node, cond,
2212 build_int_cst (TREE_TYPE (cond), 0));
2213 else
2215 basic_block cond_bb, then_bb, else_bb;
2216 edge e;
2217 tree t, then_lab, else_lab, tmp;
2219 tmp = create_tmp_var (TREE_TYPE (val), NULL);
2220 e = split_block (bb, NULL);
2221 cond_bb = e->src;
2222 bb = e->dest;
2223 remove_edge (e);
2225 then_bb = create_empty_bb (cond_bb);
2226 else_bb = create_empty_bb (then_bb);
2227 then_lab = create_artificial_label ();
2228 else_lab = create_artificial_label ();
2230 t = build3 (COND_EXPR, void_type_node,
2231 cond,
2232 build_and_jump (&then_lab),
2233 build_and_jump (&else_lab));
2235 si = bsi_start (cond_bb);
2236 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2238 si = bsi_start (then_bb);
2239 t = build1 (LABEL_EXPR, void_type_node, then_lab);
2240 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2241 t = build2 (MODIFY_EXPR, void_type_node, tmp, val);
2242 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2244 si = bsi_start (else_bb);
2245 t = build1 (LABEL_EXPR, void_type_node, else_lab);
2246 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2247 t = build2 (MODIFY_EXPR, void_type_node, tmp,
2248 build_int_cst (unsigned_type_node, 1));
2249 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2251 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
2252 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
2253 make_edge (then_bb, bb, EDGE_FALLTHRU);
2254 make_edge (else_bb, bb, EDGE_FALLTHRU);
2256 val = tmp;
2259 list = NULL_TREE;
2260 val = get_formal_tmp_var (val, &list);
2261 si = bsi_start (bb);
2262 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2265 list = NULL_TREE;
2266 args = tree_cons (NULL, val, NULL);
2267 t = OMP_PARALLEL_DATA_ARG (entry_stmt);
2268 if (t == NULL)
2269 t = null_pointer_node;
2270 else
2271 t = build_fold_addr_expr (t);
2272 args = tree_cons (NULL, t, args);
2273 t = build_fold_addr_expr (OMP_PARALLEL_FN (entry_stmt));
2274 args = tree_cons (NULL, t, args);
2276 if (ws_args)
2277 args = chainon (args, ws_args);
2279 t = built_in_decls[start_ix];
2280 t = build_function_call_expr (t, args);
2281 gimplify_and_add (t, &list);
2283 t = OMP_PARALLEL_DATA_ARG (entry_stmt);
2284 if (t == NULL)
2285 t = null_pointer_node;
2286 else
2287 t = build_fold_addr_expr (t);
2288 args = tree_cons (NULL, t, NULL);
2289 t = build_function_call_expr (OMP_PARALLEL_FN (entry_stmt), args);
2290 gimplify_and_add (t, &list);
2292 t = built_in_decls[BUILT_IN_GOMP_PARALLEL_END];
2293 t = build_function_call_expr (t, NULL);
2294 gimplify_and_add (t, &list);
2296 si = bsi_last (bb);
2297 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2299 pop_gimplify_context (NULL_TREE);
2303 /* If exceptions are enabled, wrap *STMT_P in a MUST_NOT_THROW catch
2304 handler. This prevents programs from violating the structured
2305 block semantics with throws. */
2307 static void
2308 maybe_catch_exception (tree *stmt_p)
2310 tree f, t;
2312 if (!flag_exceptions)
2313 return;
2315 if (lang_protect_cleanup_actions)
2316 t = lang_protect_cleanup_actions ();
2317 else
2319 t = built_in_decls[BUILT_IN_TRAP];
2320 t = build_function_call_expr (t, NULL);
2322 f = build2 (EH_FILTER_EXPR, void_type_node, NULL, NULL);
2323 EH_FILTER_MUST_NOT_THROW (f) = 1;
2324 gimplify_and_add (t, &EH_FILTER_FAILURE (f));
2326 t = build2 (TRY_CATCH_EXPR, void_type_node, *stmt_p, NULL);
2327 append_to_statement_list (f, &TREE_OPERAND (t, 1));
2329 *stmt_p = NULL;
2330 append_to_statement_list (t, stmt_p);
2333 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
2335 static tree
2336 list2chain (tree list)
2338 tree t;
2340 for (t = list; t; t = TREE_CHAIN (t))
2342 tree var = TREE_VALUE (t);
2343 if (TREE_CHAIN (t))
2344 TREE_CHAIN (var) = TREE_VALUE (TREE_CHAIN (t));
2345 else
2346 TREE_CHAIN (var) = NULL_TREE;
2349 return list ? TREE_VALUE (list) : NULL_TREE;
2353 /* Remove barriers in REGION->EXIT's block. Note that this is only
2354 valid for OMP_PARALLEL regions. Since the end of a parallel region
2355 is an implicit barrier, any workshare inside the OMP_PARALLEL that
2356 left a barrier at the end of the OMP_PARALLEL region can now be
2357 removed. */
2359 static void
2360 remove_exit_barrier (struct omp_region *region)
2362 block_stmt_iterator si;
2363 basic_block exit_bb;
2364 edge_iterator ei;
2365 edge e;
2366 tree t;
2368 exit_bb = region->exit;
2370 /* If the parallel region doesn't return, we don't have REGION->EXIT
2371 block at all. */
2372 if (! exit_bb)
2373 return;
2375 /* The last insn in the block will be the parallel's OMP_RETURN. The
2376 workshare's OMP_RETURN will be in a preceding block. The kinds of
2377 statements that can appear in between are extremely limited -- no
2378 memory operations at all. Here, we allow nothing at all, so the
2379 only thing we allow to precede this OMP_RETURN is a label. */
2380 si = bsi_last (exit_bb);
2381 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
2382 bsi_prev (&si);
2383 if (!bsi_end_p (si) && TREE_CODE (bsi_stmt (si)) != LABEL_EXPR)
2384 return;
2386 FOR_EACH_EDGE (e, ei, exit_bb->preds)
2388 si = bsi_last (e->src);
2389 if (bsi_end_p (si))
2390 continue;
2391 t = bsi_stmt (si);
2392 if (TREE_CODE (t) == OMP_RETURN)
2393 OMP_RETURN_NOWAIT (t) = 1;
2397 static void
2398 remove_exit_barriers (struct omp_region *region)
2400 if (region->type == OMP_PARALLEL)
2401 remove_exit_barrier (region);
2403 if (region->inner)
2405 region = region->inner;
2406 remove_exit_barriers (region);
2407 while (region->next)
2409 region = region->next;
2410 remove_exit_barriers (region);
2415 /* Expand the OpenMP parallel directive starting at REGION. */
2417 static void
2418 expand_omp_parallel (struct omp_region *region)
2420 basic_block entry_bb, exit_bb, new_bb;
2421 struct function *child_cfun, *saved_cfun;
2422 tree child_fn, block, t, ws_args;
2423 block_stmt_iterator si;
2424 tree entry_stmt;
2425 edge e;
2426 bool do_cleanup_cfg = false;
2428 entry_stmt = last_stmt (region->entry);
2429 child_fn = OMP_PARALLEL_FN (entry_stmt);
2430 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
2431 saved_cfun = cfun;
2433 entry_bb = region->entry;
2434 exit_bb = region->exit;
2436 if (is_combined_parallel (region))
2437 ws_args = region->ws_args;
2438 else
2439 ws_args = NULL_TREE;
2441 if (child_cfun->cfg)
2443 /* Due to inlining, it may happen that we have already outlined
2444 the region, in which case all we need to do is make the
2445 sub-graph unreachable and emit the parallel call. */
2446 edge entry_succ_e, exit_succ_e;
2447 block_stmt_iterator si;
2449 entry_succ_e = single_succ_edge (entry_bb);
2451 si = bsi_last (entry_bb);
2452 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_PARALLEL);
2453 bsi_remove (&si, true);
2455 new_bb = entry_bb;
2456 remove_edge (entry_succ_e);
2457 if (exit_bb)
2459 exit_succ_e = single_succ_edge (exit_bb);
2460 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
2462 do_cleanup_cfg = true;
2464 else
2466 /* If the parallel region needs data sent from the parent
2467 function, then the very first statement (except possible
2468 tree profile counter updates) of the parallel body
2469 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
2470 &.OMP_DATA_O is passed as an argument to the child function,
2471 we need to replace it with the argument as seen by the child
2472 function.
2474 In most cases, this will end up being the identity assignment
2475 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
2476 a function call that has been inlined, the original PARM_DECL
2477 .OMP_DATA_I may have been converted into a different local
2478 variable. In which case, we need to keep the assignment. */
2479 if (OMP_PARALLEL_DATA_ARG (entry_stmt))
2481 basic_block entry_succ_bb = single_succ (entry_bb);
2482 block_stmt_iterator si;
2484 for (si = bsi_start (entry_succ_bb); ; bsi_next (&si))
2486 tree stmt, arg;
2488 gcc_assert (!bsi_end_p (si));
2489 stmt = bsi_stmt (si);
2490 if (TREE_CODE (stmt) != MODIFY_EXPR)
2491 continue;
2493 arg = TREE_OPERAND (stmt, 1);
2494 STRIP_NOPS (arg);
2495 if (TREE_CODE (arg) == ADDR_EXPR
2496 && TREE_OPERAND (arg, 0)
2497 == OMP_PARALLEL_DATA_ARG (entry_stmt))
2499 if (TREE_OPERAND (stmt, 0) == DECL_ARGUMENTS (child_fn))
2500 bsi_remove (&si, true);
2501 else
2502 TREE_OPERAND (stmt, 1) = DECL_ARGUMENTS (child_fn);
2503 break;
2508 /* Declare local variables needed in CHILD_CFUN. */
2509 block = DECL_INITIAL (child_fn);
2510 BLOCK_VARS (block) = list2chain (child_cfun->unexpanded_var_list);
2511 DECL_SAVED_TREE (child_fn) = single_succ (entry_bb)->stmt_list;
2513 /* Reset DECL_CONTEXT on locals and function arguments. */
2514 for (t = BLOCK_VARS (block); t; t = TREE_CHAIN (t))
2515 DECL_CONTEXT (t) = child_fn;
2517 for (t = DECL_ARGUMENTS (child_fn); t; t = TREE_CHAIN (t))
2518 DECL_CONTEXT (t) = child_fn;
2520 /* Split ENTRY_BB at OMP_PARALLEL so that it can be moved to the
2521 child function. */
2522 si = bsi_last (entry_bb);
2523 t = bsi_stmt (si);
2524 gcc_assert (t && TREE_CODE (t) == OMP_PARALLEL);
2525 bsi_remove (&si, true);
2526 e = split_block (entry_bb, t);
2527 entry_bb = e->dest;
2528 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
2530 /* Move the parallel region into CHILD_CFUN. We need to reset
2531 dominance information because the expansion of the inner
2532 regions has invalidated it. */
2533 free_dominance_info (CDI_DOMINATORS);
2534 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb);
2535 if (exit_bb)
2536 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
2537 cgraph_add_new_function (child_fn);
2539 /* Convert OMP_RETURN into a RETURN_EXPR. */
2540 if (exit_bb)
2542 si = bsi_last (exit_bb);
2543 gcc_assert (!bsi_end_p (si)
2544 && TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
2545 t = build1 (RETURN_EXPR, void_type_node, NULL);
2546 bsi_insert_after (&si, t, BSI_SAME_STMT);
2547 bsi_remove (&si, true);
2551 /* Emit a library call to launch the children threads. */
2552 expand_parallel_call (region, new_bb, entry_stmt, ws_args);
2554 if (do_cleanup_cfg)
2556 /* Clean up the unreachable sub-graph we created above. */
2557 free_dominance_info (CDI_DOMINATORS);
2558 free_dominance_info (CDI_POST_DOMINATORS);
2559 cleanup_tree_cfg ();
2564 /* A subroutine of expand_omp_for. Generate code for a parallel
2565 loop with any schedule. Given parameters:
2567 for (V = N1; V cond N2; V += STEP) BODY;
2569 where COND is "<" or ">", we generate pseudocode
2571 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2572 if (more) goto L0; else goto L3;
2574 V = istart0;
2575 iend = iend0;
2577 BODY;
2578 V += STEP;
2579 if (V cond iend) goto L1; else goto L2;
2581 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2584 If this is a combined omp parallel loop, instead of the call to
2585 GOMP_loop_foo_start, we emit 'goto L3'. */
2587 static void
2588 expand_omp_for_generic (struct omp_region *region,
2589 struct omp_for_data *fd,
2590 enum built_in_function start_fn,
2591 enum built_in_function next_fn)
2593 tree l0, l1, l2 = NULL, l3 = NULL;
2594 tree type, istart0, iend0, iend;
2595 tree t, args, list;
2596 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb;
2597 basic_block l2_bb = NULL, l3_bb = NULL;
2598 block_stmt_iterator si;
2599 bool in_combined_parallel = is_combined_parallel (region);
2601 type = TREE_TYPE (fd->v);
2603 istart0 = create_tmp_var (long_integer_type_node, ".istart0");
2604 iend0 = create_tmp_var (long_integer_type_node, ".iend0");
2605 iend = create_tmp_var (type, NULL);
2606 TREE_ADDRESSABLE (istart0) = 1;
2607 TREE_ADDRESSABLE (iend0) = 1;
2609 gcc_assert ((region->cont != NULL) ^ (region->exit == NULL));
2611 entry_bb = region->entry;
2612 l0_bb = create_empty_bb (entry_bb);
2613 l1_bb = single_succ (entry_bb);
2615 l0 = tree_block_label (l0_bb);
2616 l1 = tree_block_label (l1_bb);
2618 cont_bb = region->cont;
2619 exit_bb = region->exit;
2620 if (cont_bb)
2622 l2_bb = create_empty_bb (cont_bb);
2623 l3_bb = single_succ (cont_bb);
2625 l2 = tree_block_label (l2_bb);
2626 l3 = tree_block_label (l3_bb);
2629 si = bsi_last (entry_bb);
2630 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
2631 if (!in_combined_parallel)
2633 /* If this is not a combined parallel loop, emit a call to
2634 GOMP_loop_foo_start in ENTRY_BB. */
2635 list = alloc_stmt_list ();
2636 t = build_fold_addr_expr (iend0);
2637 args = tree_cons (NULL, t, NULL);
2638 t = build_fold_addr_expr (istart0);
2639 args = tree_cons (NULL, t, args);
2640 if (fd->chunk_size)
2642 t = fold_convert (long_integer_type_node, fd->chunk_size);
2643 args = tree_cons (NULL, t, args);
2645 t = fold_convert (long_integer_type_node, fd->step);
2646 args = tree_cons (NULL, t, args);
2647 t = fold_convert (long_integer_type_node, fd->n2);
2648 args = tree_cons (NULL, t, args);
2649 t = fold_convert (long_integer_type_node, fd->n1);
2650 args = tree_cons (NULL, t, args);
2651 t = build_function_call_expr (built_in_decls[start_fn], args);
2652 t = get_formal_tmp_var (t, &list);
2653 if (cont_bb)
2655 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0),
2656 build_and_jump (&l3));
2657 append_to_statement_list (t, &list);
2659 bsi_insert_after (&si, list, BSI_SAME_STMT);
2661 bsi_remove (&si, true);
2663 /* Iteration setup for sequential loop goes in L0_BB. */
2664 list = alloc_stmt_list ();
2665 t = fold_convert (type, istart0);
2666 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
2667 gimplify_and_add (t, &list);
2669 t = fold_convert (type, iend0);
2670 t = build2 (MODIFY_EXPR, void_type_node, iend, t);
2671 gimplify_and_add (t, &list);
2673 si = bsi_start (l0_bb);
2674 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2676 /* Handle the rare case where BODY doesn't ever return. */
2677 if (cont_bb == NULL)
2679 remove_edge (single_succ_edge (entry_bb));
2680 make_edge (entry_bb, l0_bb, EDGE_FALLTHRU);
2681 make_edge (l0_bb, l1_bb, EDGE_FALLTHRU);
2682 return;
2685 /* Code to control the increment and predicate for the sequential
2686 loop goes in the first half of EXIT_BB (we split EXIT_BB so
2687 that we can inherit all the edges going out of the loop
2688 body). */
2689 list = alloc_stmt_list ();
2691 t = build2 (PLUS_EXPR, type, fd->v, fd->step);
2692 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
2693 gimplify_and_add (t, &list);
2695 t = build2 (fd->cond_code, boolean_type_node, fd->v, iend);
2696 t = get_formal_tmp_var (t, &list);
2697 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1),
2698 build_and_jump (&l2));
2699 append_to_statement_list (t, &list);
2701 si = bsi_last (cont_bb);
2702 bsi_insert_after (&si, list, BSI_SAME_STMT);
2703 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
2704 bsi_remove (&si, true);
2706 /* Emit code to get the next parallel iteration in L2_BB. */
2707 list = alloc_stmt_list ();
2709 t = build_fold_addr_expr (iend0);
2710 args = tree_cons (NULL, t, NULL);
2711 t = build_fold_addr_expr (istart0);
2712 args = tree_cons (NULL, t, args);
2713 t = build_function_call_expr (built_in_decls[next_fn], args);
2714 t = get_formal_tmp_var (t, &list);
2715 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0),
2716 build_and_jump (&l3));
2717 append_to_statement_list (t, &list);
2719 si = bsi_start (l2_bb);
2720 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2722 /* Add the loop cleanup function. */
2723 si = bsi_last (exit_bb);
2724 if (OMP_RETURN_NOWAIT (bsi_stmt (si)))
2725 t = built_in_decls[BUILT_IN_GOMP_LOOP_END_NOWAIT];
2726 else
2727 t = built_in_decls[BUILT_IN_GOMP_LOOP_END];
2728 t = build_function_call_expr (t, NULL);
2729 bsi_insert_after (&si, t, BSI_SAME_STMT);
2730 bsi_remove (&si, true);
2732 /* Connect the new blocks. */
2733 remove_edge (single_succ_edge (entry_bb));
2734 if (in_combined_parallel)
2735 make_edge (entry_bb, l2_bb, EDGE_FALLTHRU);
2736 else
2738 make_edge (entry_bb, l0_bb, EDGE_TRUE_VALUE);
2739 make_edge (entry_bb, l3_bb, EDGE_FALSE_VALUE);
2742 make_edge (l0_bb, l1_bb, EDGE_FALLTHRU);
2744 remove_edge (single_succ_edge (cont_bb));
2745 make_edge (cont_bb, l1_bb, EDGE_TRUE_VALUE);
2746 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
2748 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
2749 make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
2753 /* A subroutine of expand_omp_for. Generate code for a parallel
2754 loop with static schedule and no specified chunk size. Given
2755 parameters:
2757 for (V = N1; V cond N2; V += STEP) BODY;
2759 where COND is "<" or ">", we generate pseudocode
2761 if (cond is <)
2762 adj = STEP - 1;
2763 else
2764 adj = STEP + 1;
2765 n = (adj + N2 - N1) / STEP;
2766 q = n / nthreads;
2767 q += (q * nthreads != n);
2768 s0 = q * threadid;
2769 e0 = min(s0 + q, n);
2770 if (s0 >= e0) goto L2; else goto L0;
2772 V = s0 * STEP + N1;
2773 e = e0 * STEP + N1;
2775 BODY;
2776 V += STEP;
2777 if (V cond e) goto L1;
2781 static void
2782 expand_omp_for_static_nochunk (struct omp_region *region,
2783 struct omp_for_data *fd)
2785 tree l0, l1, l2, n, q, s0, e0, e, t, nthreads, threadid;
2786 tree type, list;
2787 basic_block entry_bb, exit_bb, seq_start_bb, body_bb, cont_bb;
2788 basic_block fin_bb;
2789 block_stmt_iterator si;
2791 type = TREE_TYPE (fd->v);
2793 entry_bb = region->entry;
2794 seq_start_bb = create_empty_bb (entry_bb);
2795 body_bb = single_succ (entry_bb);
2796 cont_bb = region->cont;
2797 fin_bb = single_succ (cont_bb);
2798 exit_bb = region->exit;
2800 l0 = tree_block_label (seq_start_bb);
2801 l1 = tree_block_label (body_bb);
2802 l2 = tree_block_label (fin_bb);
2804 /* Iteration space partitioning goes in ENTRY_BB. */
2805 list = alloc_stmt_list ();
2807 t = built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS];
2808 t = build_function_call_expr (t, NULL);
2809 t = fold_convert (type, t);
2810 nthreads = get_formal_tmp_var (t, &list);
2812 t = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
2813 t = build_function_call_expr (t, NULL);
2814 t = fold_convert (type, t);
2815 threadid = get_formal_tmp_var (t, &list);
2817 fd->n1 = fold_convert (type, fd->n1);
2818 if (!is_gimple_val (fd->n1))
2819 fd->n1 = get_formal_tmp_var (fd->n1, &list);
2821 fd->n2 = fold_convert (type, fd->n2);
2822 if (!is_gimple_val (fd->n2))
2823 fd->n2 = get_formal_tmp_var (fd->n2, &list);
2825 fd->step = fold_convert (type, fd->step);
2826 if (!is_gimple_val (fd->step))
2827 fd->step = get_formal_tmp_var (fd->step, &list);
2829 t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1));
2830 t = fold_build2 (PLUS_EXPR, type, fd->step, t);
2831 t = fold_build2 (PLUS_EXPR, type, t, fd->n2);
2832 t = fold_build2 (MINUS_EXPR, type, t, fd->n1);
2833 t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step);
2834 t = fold_convert (type, t);
2835 if (is_gimple_val (t))
2836 n = t;
2837 else
2838 n = get_formal_tmp_var (t, &list);
2840 t = build2 (TRUNC_DIV_EXPR, type, n, nthreads);
2841 q = get_formal_tmp_var (t, &list);
2843 t = build2 (MULT_EXPR, type, q, nthreads);
2844 t = build2 (NE_EXPR, type, t, n);
2845 t = build2 (PLUS_EXPR, type, q, t);
2846 q = get_formal_tmp_var (t, &list);
2848 t = build2 (MULT_EXPR, type, q, threadid);
2849 s0 = get_formal_tmp_var (t, &list);
2851 t = build2 (PLUS_EXPR, type, s0, q);
2852 t = build2 (MIN_EXPR, type, t, n);
2853 e0 = get_formal_tmp_var (t, &list);
2855 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
2856 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l2),
2857 build_and_jump (&l0));
2858 append_to_statement_list (t, &list);
2860 si = bsi_last (entry_bb);
2861 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
2862 bsi_insert_after (&si, list, BSI_SAME_STMT);
2863 bsi_remove (&si, true);
2865 /* Setup code for sequential iteration goes in SEQ_START_BB. */
2866 list = alloc_stmt_list ();
2868 t = fold_convert (type, s0);
2869 t = build2 (MULT_EXPR, type, t, fd->step);
2870 t = build2 (PLUS_EXPR, type, t, fd->n1);
2871 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
2872 gimplify_and_add (t, &list);
2874 t = fold_convert (type, e0);
2875 t = build2 (MULT_EXPR, type, t, fd->step);
2876 t = build2 (PLUS_EXPR, type, t, fd->n1);
2877 e = get_formal_tmp_var (t, &list);
2879 si = bsi_start (seq_start_bb);
2880 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2882 /* The code controlling the sequential loop replaces the OMP_CONTINUE. */
2883 list = alloc_stmt_list ();
2885 t = build2 (PLUS_EXPR, type, fd->v, fd->step);
2886 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
2887 gimplify_and_add (t, &list);
2889 t = build2 (fd->cond_code, boolean_type_node, fd->v, e);
2890 t = get_formal_tmp_var (t, &list);
2891 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1),
2892 build_and_jump (&l2));
2893 append_to_statement_list (t, &list);
2895 si = bsi_last (cont_bb);
2896 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
2897 bsi_insert_after (&si, list, BSI_SAME_STMT);
2898 bsi_remove (&si, true);
2900 /* Replace the OMP_RETURN with a barrier, or nothing. */
2901 si = bsi_last (exit_bb);
2902 if (!OMP_RETURN_NOWAIT (bsi_stmt (si)))
2904 list = alloc_stmt_list ();
2905 build_omp_barrier (&list);
2906 bsi_insert_after (&si, list, BSI_SAME_STMT);
2908 bsi_remove (&si, true);
2910 /* Connect all the blocks. */
2911 make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU);
2913 remove_edge (single_succ_edge (entry_bb));
2914 make_edge (entry_bb, fin_bb, EDGE_TRUE_VALUE);
2915 make_edge (entry_bb, seq_start_bb, EDGE_FALSE_VALUE);
2917 make_edge (cont_bb, body_bb, EDGE_TRUE_VALUE);
2918 find_edge (cont_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
2922 /* A subroutine of expand_omp_for. Generate code for a parallel
2923 loop with static schedule and a specified chunk size. Given
2924 parameters:
2926 for (V = N1; V cond N2; V += STEP) BODY;
2928 where COND is "<" or ">", we generate pseudocode
2930 if (cond is <)
2931 adj = STEP - 1;
2932 else
2933 adj = STEP + 1;
2934 n = (adj + N2 - N1) / STEP;
2935 trip = 0;
2937 s0 = (trip * nthreads + threadid) * CHUNK;
2938 e0 = min(s0 + CHUNK, n);
2939 if (s0 < n) goto L1; else goto L4;
2941 V = s0 * STEP + N1;
2942 e = e0 * STEP + N1;
2944 BODY;
2945 V += STEP;
2946 if (V cond e) goto L2; else goto L3;
2948 trip += 1;
2949 goto L0;
2953 static void
2954 expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
2956 tree l0, l1, l2, l3, l4, n, s0, e0, e, t;
2957 tree trip, nthreads, threadid;
2958 tree type;
2959 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
2960 basic_block trip_update_bb, cont_bb, fin_bb;
2961 tree list;
2962 block_stmt_iterator si;
2964 type = TREE_TYPE (fd->v);
2966 entry_bb = region->entry;
2967 iter_part_bb = create_empty_bb (entry_bb);
2968 seq_start_bb = create_empty_bb (iter_part_bb);
2969 body_bb = single_succ (entry_bb);
2970 cont_bb = region->cont;
2971 trip_update_bb = create_empty_bb (cont_bb);
2972 fin_bb = single_succ (cont_bb);
2973 exit_bb = region->exit;
2975 l0 = tree_block_label (iter_part_bb);
2976 l1 = tree_block_label (seq_start_bb);
2977 l2 = tree_block_label (body_bb);
2978 l3 = tree_block_label (trip_update_bb);
2979 l4 = tree_block_label (fin_bb);
2981 /* Trip and adjustment setup goes in ENTRY_BB. */
2982 list = alloc_stmt_list ();
2984 t = built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS];
2985 t = build_function_call_expr (t, NULL);
2986 t = fold_convert (type, t);
2987 nthreads = get_formal_tmp_var (t, &list);
2989 t = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
2990 t = build_function_call_expr (t, NULL);
2991 t = fold_convert (type, t);
2992 threadid = get_formal_tmp_var (t, &list);
2994 fd->n1 = fold_convert (type, fd->n1);
2995 if (!is_gimple_val (fd->n1))
2996 fd->n1 = get_formal_tmp_var (fd->n1, &list);
2998 fd->n2 = fold_convert (type, fd->n2);
2999 if (!is_gimple_val (fd->n2))
3000 fd->n2 = get_formal_tmp_var (fd->n2, &list);
3002 fd->step = fold_convert (type, fd->step);
3003 if (!is_gimple_val (fd->step))
3004 fd->step = get_formal_tmp_var (fd->step, &list);
3006 fd->chunk_size = fold_convert (type, fd->chunk_size);
3007 if (!is_gimple_val (fd->chunk_size))
3008 fd->chunk_size = get_formal_tmp_var (fd->chunk_size, &list);
3010 t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1));
3011 t = fold_build2 (PLUS_EXPR, type, fd->step, t);
3012 t = fold_build2 (PLUS_EXPR, type, t, fd->n2);
3013 t = fold_build2 (MINUS_EXPR, type, t, fd->n1);
3014 t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step);
3015 t = fold_convert (type, t);
3016 if (is_gimple_val (t))
3017 n = t;
3018 else
3019 n = get_formal_tmp_var (t, &list);
3021 t = build_int_cst (type, 0);
3022 trip = get_initialized_tmp_var (t, &list, NULL);
3024 si = bsi_last (entry_bb);
3025 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
3026 bsi_insert_after (&si, list, BSI_SAME_STMT);
3027 bsi_remove (&si, true);
3029 /* Iteration space partitioning goes in ITER_PART_BB. */
3030 list = alloc_stmt_list ();
3032 t = build2 (MULT_EXPR, type, trip, nthreads);
3033 t = build2 (PLUS_EXPR, type, t, threadid);
3034 t = build2 (MULT_EXPR, type, t, fd->chunk_size);
3035 s0 = get_formal_tmp_var (t, &list);
3037 t = build2 (PLUS_EXPR, type, s0, fd->chunk_size);
3038 t = build2 (MIN_EXPR, type, t, n);
3039 e0 = get_formal_tmp_var (t, &list);
3041 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3042 t = build3 (COND_EXPR, void_type_node, t,
3043 build_and_jump (&l1), build_and_jump (&l4));
3044 append_to_statement_list (t, &list);
3046 si = bsi_start (iter_part_bb);
3047 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
3049 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3050 list = alloc_stmt_list ();
3052 t = fold_convert (type, s0);
3053 t = build2 (MULT_EXPR, type, t, fd->step);
3054 t = build2 (PLUS_EXPR, type, t, fd->n1);
3055 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
3056 gimplify_and_add (t, &list);
3058 t = fold_convert (type, e0);
3059 t = build2 (MULT_EXPR, type, t, fd->step);
3060 t = build2 (PLUS_EXPR, type, t, fd->n1);
3061 e = get_formal_tmp_var (t, &list);
3063 si = bsi_start (seq_start_bb);
3064 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
3066 /* The code controlling the sequential loop goes in CONT_BB,
3067 replacing the OMP_CONTINUE. */
3068 list = alloc_stmt_list ();
3070 t = build2 (PLUS_EXPR, type, fd->v, fd->step);
3071 t = build2 (MODIFY_EXPR, void_type_node, fd->v, t);
3072 gimplify_and_add (t, &list);
3074 t = build2 (fd->cond_code, boolean_type_node, fd->v, e);
3075 t = get_formal_tmp_var (t, &list);
3076 t = build3 (COND_EXPR, void_type_node, t,
3077 build_and_jump (&l2), build_and_jump (&l3));
3078 append_to_statement_list (t, &list);
3080 si = bsi_last (cont_bb);
3081 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
3082 bsi_insert_after (&si, list, BSI_SAME_STMT);
3083 bsi_remove (&si, true);
3085 /* Trip update code goes into TRIP_UPDATE_BB. */
3086 list = alloc_stmt_list ();
3088 t = build_int_cst (type, 1);
3089 t = build2 (PLUS_EXPR, type, trip, t);
3090 t = build2 (MODIFY_EXPR, void_type_node, trip, t);
3091 gimplify_and_add (t, &list);
3093 si = bsi_start (trip_update_bb);
3094 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
3096 /* Replace the OMP_RETURN with a barrier, or nothing. */
3097 si = bsi_last (exit_bb);
3098 if (!OMP_RETURN_NOWAIT (bsi_stmt (si)))
3100 list = alloc_stmt_list ();
3101 build_omp_barrier (&list);
3102 bsi_insert_after (&si, list, BSI_SAME_STMT);
3104 bsi_remove (&si, true);
3106 /* Connect the new blocks. */
3107 remove_edge (single_succ_edge (entry_bb));
3108 make_edge (entry_bb, iter_part_bb, EDGE_FALLTHRU);
3110 make_edge (iter_part_bb, seq_start_bb, EDGE_TRUE_VALUE);
3111 make_edge (iter_part_bb, fin_bb, EDGE_FALSE_VALUE);
3113 make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU);
3115 remove_edge (single_succ_edge (cont_bb));
3116 make_edge (cont_bb, body_bb, EDGE_TRUE_VALUE);
3117 make_edge (cont_bb, trip_update_bb, EDGE_FALSE_VALUE);
3119 make_edge (trip_update_bb, iter_part_bb, EDGE_FALLTHRU);
3123 /* Expand the OpenMP loop defined by REGION. */
3125 static void
3126 expand_omp_for (struct omp_region *region)
3128 struct omp_for_data fd;
3130 push_gimplify_context ();
3132 extract_omp_for_data (last_stmt (region->entry), &fd);
3133 region->sched_kind = fd.sched_kind;
3135 if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
3136 && !fd.have_ordered
3137 && region->cont
3138 && region->exit)
3140 if (fd.chunk_size == NULL)
3141 expand_omp_for_static_nochunk (region, &fd);
3142 else
3143 expand_omp_for_static_chunk (region, &fd);
3145 else
3147 int fn_index = fd.sched_kind + fd.have_ordered * 4;
3148 int start_ix = BUILT_IN_GOMP_LOOP_STATIC_START + fn_index;
3149 int next_ix = BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index;
3150 expand_omp_for_generic (region, &fd, start_ix, next_ix);
3153 pop_gimplify_context (NULL);
3157 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
3159 v = GOMP_sections_start (n);
3161 switch (v)
3163 case 0:
3164 goto L2;
3165 case 1:
3166 section 1;
3167 goto L1;
3168 case 2:
3170 case n:
3172 default:
3173 abort ();
3176 v = GOMP_sections_next ();
3177 goto L0;
3179 reduction;
3181 If this is a combined parallel sections, replace the call to
3182 GOMP_sections_start with 'goto L1'. */
3184 static void
3185 expand_omp_sections (struct omp_region *region)
3187 tree label_vec, l0, l1, l2, t, u, v, sections_stmt;
3188 unsigned i, len;
3189 basic_block entry_bb, exit_bb, l0_bb, l1_bb, l2_bb, default_bb;
3190 block_stmt_iterator si;
3191 struct omp_region *inner;
3192 edge e;
3194 entry_bb = region->entry;
3195 l0_bb = create_empty_bb (entry_bb);
3196 l0 = tree_block_label (l0_bb);
3198 gcc_assert ((region->cont != NULL) ^ (region->exit == NULL));
3199 l1_bb = region->cont;
3200 if (l1_bb)
3202 l2_bb = single_succ (l1_bb);
3203 default_bb = create_empty_bb (l1_bb->prev_bb);
3205 l1 = tree_block_label (l1_bb);
3207 else
3209 l2_bb = create_empty_bb (l0_bb);
3210 default_bb = l2_bb;
3212 l1 = NULL;
3214 l2 = tree_block_label (l2_bb);
3216 exit_bb = region->exit;
3218 v = create_tmp_var (unsigned_type_node, ".section");
3220 /* We will build a switch() with enough cases for all the
3221 OMP_SECTION regions, a '0' case to handle the end of more work
3222 and a default case to abort if something goes wrong. */
3223 len = EDGE_COUNT (entry_bb->succs);
3224 label_vec = make_tree_vec (len + 2);
3226 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
3227 OMP_SECTIONS statement. */
3228 si = bsi_last (entry_bb);
3229 sections_stmt = bsi_stmt (si);
3230 gcc_assert (TREE_CODE (sections_stmt) == OMP_SECTIONS);
3231 if (!is_combined_parallel (region))
3233 /* If we are not inside a combined parallel+sections region,
3234 call GOMP_sections_start. */
3235 t = build_int_cst (unsigned_type_node, len);
3236 t = tree_cons (NULL, t, NULL);
3237 u = built_in_decls[BUILT_IN_GOMP_SECTIONS_START];
3238 t = build_function_call_expr (u, t);
3239 t = build2 (MODIFY_EXPR, void_type_node, v, t);
3240 bsi_insert_after (&si, t, BSI_SAME_STMT);
3242 bsi_remove (&si, true);
3244 /* The switch() statement replacing OMP_SECTIONS goes in L0_BB. */
3245 si = bsi_start (l0_bb);
3247 t = build3 (SWITCH_EXPR, void_type_node, v, NULL, label_vec);
3248 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3250 t = build3 (CASE_LABEL_EXPR, void_type_node,
3251 build_int_cst (unsigned_type_node, 0), NULL, l2);
3252 TREE_VEC_ELT (label_vec, 0) = t;
3253 make_edge (l0_bb, l2_bb, 0);
3255 /* Convert each OMP_SECTION into a CASE_LABEL_EXPR. */
3256 for (inner = region->inner, i = 1; inner; inner = inner->next, ++i)
3258 basic_block s_entry_bb, s_exit_bb;
3260 s_entry_bb = inner->entry;
3261 s_exit_bb = inner->exit;
3263 t = tree_block_label (s_entry_bb);
3264 u = build_int_cst (unsigned_type_node, i);
3265 u = build3 (CASE_LABEL_EXPR, void_type_node, u, NULL, t);
3266 TREE_VEC_ELT (label_vec, i) = u;
3268 si = bsi_last (s_entry_bb);
3269 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SECTION);
3270 gcc_assert (i < len || OMP_SECTION_LAST (bsi_stmt (si)));
3271 bsi_remove (&si, true);
3273 e = single_pred_edge (s_entry_bb);
3274 e->flags = 0;
3275 redirect_edge_pred (e, l0_bb);
3277 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
3279 if (s_exit_bb == NULL)
3280 continue;
3282 si = bsi_last (s_exit_bb);
3283 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
3284 bsi_remove (&si, true);
3286 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
3289 /* Error handling code goes in DEFAULT_BB. */
3290 t = tree_block_label (default_bb);
3291 u = build3 (CASE_LABEL_EXPR, void_type_node, NULL, NULL, t);
3292 TREE_VEC_ELT (label_vec, len + 1) = u;
3293 make_edge (l0_bb, default_bb, 0);
3295 si = bsi_start (default_bb);
3296 t = built_in_decls[BUILT_IN_TRAP];
3297 t = build_function_call_expr (t, NULL);
3298 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3300 /* Code to get the next section goes in L1_BB. */
3301 if (l1_bb)
3303 si = bsi_last (l1_bb);
3304 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
3306 t = built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT];
3307 t = build_function_call_expr (t, NULL);
3308 t = build2 (MODIFY_EXPR, void_type_node, v, t);
3309 bsi_insert_after (&si, t, BSI_SAME_STMT);
3310 bsi_remove (&si, true);
3313 /* Cleanup function replaces OMP_RETURN in EXIT_BB. */
3314 if (exit_bb)
3316 si = bsi_last (exit_bb);
3317 if (OMP_RETURN_NOWAIT (bsi_stmt (si)))
3318 t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END_NOWAIT];
3319 else
3320 t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END];
3321 t = build_function_call_expr (t, NULL);
3322 bsi_insert_after (&si, t, BSI_SAME_STMT);
3323 bsi_remove (&si, true);
3326 /* Connect the new blocks. */
3327 if (is_combined_parallel (region))
3329 /* If this was a combined parallel+sections region, we did not
3330 emit a GOMP_sections_start in the entry block, so we just
3331 need to jump to L1_BB to get the next section. */
3332 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
3334 else
3335 make_edge (entry_bb, l0_bb, EDGE_FALLTHRU);
3337 if (l1_bb)
3339 e = single_succ_edge (l1_bb);
3340 redirect_edge_succ (e, l0_bb);
3341 e->flags = EDGE_FALLTHRU;
3346 /* Expand code for an OpenMP single directive. We've already expanded
3347 much of the code, here we simply place the GOMP_barrier call. */
3349 static void
3350 expand_omp_single (struct omp_region *region)
3352 basic_block entry_bb, exit_bb;
3353 block_stmt_iterator si;
3354 bool need_barrier = false;
3356 entry_bb = region->entry;
3357 exit_bb = region->exit;
3359 si = bsi_last (entry_bb);
3360 /* The terminal barrier at the end of a GOMP_single_copy sequence cannot
3361 be removed. We need to ensure that the thread that entered the single
3362 does not exit before the data is copied out by the other threads. */
3363 if (find_omp_clause (OMP_SINGLE_CLAUSES (bsi_stmt (si)),
3364 OMP_CLAUSE_COPYPRIVATE))
3365 need_barrier = true;
3366 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SINGLE);
3367 bsi_remove (&si, true);
3368 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
3370 si = bsi_last (exit_bb);
3371 if (!OMP_RETURN_NOWAIT (bsi_stmt (si)) || need_barrier)
3373 tree t = alloc_stmt_list ();
3374 build_omp_barrier (&t);
3375 bsi_insert_after (&si, t, BSI_SAME_STMT);
3377 bsi_remove (&si, true);
3378 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
3382 /* Generic expansion for OpenMP synchronization directives: master,
3383 ordered and critical. All we need to do here is remove the entry
3384 and exit markers for REGION. */
3386 static void
3387 expand_omp_synch (struct omp_region *region)
3389 basic_block entry_bb, exit_bb;
3390 block_stmt_iterator si;
3392 entry_bb = region->entry;
3393 exit_bb = region->exit;
3395 si = bsi_last (entry_bb);
3396 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SINGLE
3397 || TREE_CODE (bsi_stmt (si)) == OMP_MASTER
3398 || TREE_CODE (bsi_stmt (si)) == OMP_ORDERED
3399 || TREE_CODE (bsi_stmt (si)) == OMP_CRITICAL);
3400 bsi_remove (&si, true);
3401 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
3403 if (exit_bb)
3405 si = bsi_last (exit_bb);
3406 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
3407 bsi_remove (&si, true);
3408 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
3413 /* Expand the parallel region tree rooted at REGION. Expansion
3414 proceeds in depth-first order. Innermost regions are expanded
3415 first. This way, parallel regions that require a new function to
3416 be created (e.g., OMP_PARALLEL) can be expanded without having any
3417 internal dependencies in their body. */
3419 static void
3420 expand_omp (struct omp_region *region)
3422 while (region)
3424 if (region->inner)
3425 expand_omp (region->inner);
3427 switch (region->type)
3429 case OMP_PARALLEL:
3430 expand_omp_parallel (region);
3431 break;
3433 case OMP_FOR:
3434 expand_omp_for (region);
3435 break;
3437 case OMP_SECTIONS:
3438 expand_omp_sections (region);
3439 break;
3441 case OMP_SECTION:
3442 /* Individual omp sections are handled together with their
3443 parent OMP_SECTIONS region. */
3444 break;
3446 case OMP_SINGLE:
3447 expand_omp_single (region);
3448 break;
3450 case OMP_MASTER:
3451 case OMP_ORDERED:
3452 case OMP_CRITICAL:
3453 expand_omp_synch (region);
3454 break;
3456 default:
3457 gcc_unreachable ();
3460 region = region->next;
3465 /* Helper for build_omp_regions. Scan the dominator tree starting at
3466 block BB. PARENT is the region that contains BB. */
3468 static void
3469 build_omp_regions_1 (basic_block bb, struct omp_region *parent)
3471 block_stmt_iterator si;
3472 tree stmt;
3473 basic_block son;
3475 si = bsi_last (bb);
3476 if (!bsi_end_p (si) && OMP_DIRECTIVE_P (bsi_stmt (si)))
3478 struct omp_region *region;
3479 enum tree_code code;
3481 stmt = bsi_stmt (si);
3482 code = TREE_CODE (stmt);
3484 if (code == OMP_RETURN)
3486 /* STMT is the return point out of region PARENT. Mark it
3487 as the exit point and make PARENT the immediately
3488 enclosing region. */
3489 gcc_assert (parent);
3490 region = parent;
3491 region->exit = bb;
3492 parent = parent->outer;
3494 /* If REGION is a parallel region, determine whether it is
3495 a combined parallel+workshare region. */
3496 if (region->type == OMP_PARALLEL)
3497 determine_parallel_type (region);
3499 else if (code == OMP_CONTINUE)
3501 gcc_assert (parent);
3502 parent->cont = bb;
3504 else
3506 /* Otherwise, this directive becomes the parent for a new
3507 region. */
3508 region = new_omp_region (bb, code, parent);
3509 parent = region;
3513 for (son = first_dom_son (CDI_DOMINATORS, bb);
3514 son;
3515 son = next_dom_son (CDI_DOMINATORS, son))
3516 build_omp_regions_1 (son, parent);
3520 /* Scan the CFG and build a tree of OMP regions. Return the root of
3521 the OMP region tree. */
3523 static void
3524 build_omp_regions (void)
3526 gcc_assert (root_omp_region == NULL);
3527 calculate_dominance_info (CDI_DOMINATORS);
3528 build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL);
3532 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
3534 static unsigned int
3535 execute_expand_omp (void)
3537 build_omp_regions ();
3539 if (!root_omp_region)
3540 return 0;
3542 if (dump_file)
3544 fprintf (dump_file, "\nOMP region tree\n\n");
3545 dump_omp_region (dump_file, root_omp_region, 0);
3546 fprintf (dump_file, "\n");
3549 remove_exit_barriers (root_omp_region);
3551 expand_omp (root_omp_region);
3553 free_dominance_info (CDI_DOMINATORS);
3554 free_dominance_info (CDI_POST_DOMINATORS);
3555 cleanup_tree_cfg ();
3557 free_omp_regions ();
3559 return 0;
3562 static bool
3563 gate_expand_omp (void)
3565 return flag_openmp != 0 && errorcount == 0;
3568 struct tree_opt_pass pass_expand_omp =
3570 "ompexp", /* name */
3571 gate_expand_omp, /* gate */
3572 execute_expand_omp, /* execute */
3573 NULL, /* sub */
3574 NULL, /* next */
3575 0, /* static_pass_number */
3576 0, /* tv_id */
3577 PROP_gimple_any, /* properties_required */
3578 PROP_gimple_lomp, /* properties_provided */
3579 0, /* properties_destroyed */
3580 0, /* todo_flags_start */
3581 TODO_dump_func, /* todo_flags_finish */
3582 0 /* letter */
3585 /* Routines to lower OpenMP directives into OMP-GIMPLE. */
3587 /* Lower the OpenMP sections directive in *STMT_P. */
3589 static void
3590 lower_omp_sections (tree *stmt_p, omp_context *ctx)
3592 tree new_stmt, stmt, body, bind, block, ilist, olist, new_body;
3593 tree t, dlist;
3594 tree_stmt_iterator tsi;
3595 unsigned i, len;
3597 stmt = *stmt_p;
3599 push_gimplify_context ();
3601 dlist = NULL;
3602 ilist = NULL;
3603 lower_rec_input_clauses (OMP_SECTIONS_CLAUSES (stmt), &ilist, &dlist, ctx);
3605 tsi = tsi_start (OMP_SECTIONS_BODY (stmt));
3606 for (len = 0; !tsi_end_p (tsi); len++, tsi_next (&tsi))
3607 continue;
3609 tsi = tsi_start (OMP_SECTIONS_BODY (stmt));
3610 body = alloc_stmt_list ();
3611 for (i = 0; i < len; i++, tsi_next (&tsi))
3613 omp_context *sctx;
3614 tree sec_start, sec_end;
3616 sec_start = tsi_stmt (tsi);
3617 sctx = maybe_lookup_ctx (sec_start);
3618 gcc_assert (sctx);
3620 append_to_statement_list (sec_start, &body);
3622 lower_omp (&OMP_SECTION_BODY (sec_start), sctx);
3623 append_to_statement_list (OMP_SECTION_BODY (sec_start), &body);
3624 OMP_SECTION_BODY (sec_start) = NULL;
3626 if (i == len - 1)
3628 tree l = alloc_stmt_list ();
3629 lower_lastprivate_clauses (OMP_SECTIONS_CLAUSES (stmt), NULL,
3630 &l, ctx);
3631 append_to_statement_list (l, &body);
3632 OMP_SECTION_LAST (sec_start) = 1;
3635 sec_end = make_node (OMP_RETURN);
3636 append_to_statement_list (sec_end, &body);
3639 block = make_node (BLOCK);
3640 bind = build3 (BIND_EXPR, void_type_node, NULL, body, block);
3642 olist = NULL_TREE;
3643 lower_reduction_clauses (OMP_SECTIONS_CLAUSES (stmt), &olist, ctx);
3645 pop_gimplify_context (NULL_TREE);
3646 record_vars_into (ctx->block_vars, ctx->cb.dst_fn);
3648 new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
3649 TREE_SIDE_EFFECTS (new_stmt) = 1;
3651 new_body = alloc_stmt_list ();
3652 append_to_statement_list (ilist, &new_body);
3653 append_to_statement_list (stmt, &new_body);
3654 append_to_statement_list (bind, &new_body);
3656 t = make_node (OMP_CONTINUE);
3657 append_to_statement_list (t, &new_body);
3659 append_to_statement_list (olist, &new_body);
3660 append_to_statement_list (dlist, &new_body);
3662 maybe_catch_exception (&new_body);
3664 t = make_node (OMP_RETURN);
3665 OMP_RETURN_NOWAIT (t) = !!find_omp_clause (OMP_SECTIONS_CLAUSES (stmt),
3666 OMP_CLAUSE_NOWAIT);
3667 append_to_statement_list (t, &new_body);
3669 BIND_EXPR_BODY (new_stmt) = new_body;
3670 OMP_SECTIONS_BODY (stmt) = NULL;
3672 *stmt_p = new_stmt;
3676 /* A subroutine of lower_omp_single. Expand the simple form of
3677 an OMP_SINGLE, without a copyprivate clause:
3679 if (GOMP_single_start ())
3680 BODY;
3681 [ GOMP_barrier (); ] -> unless 'nowait' is present.
3683 FIXME. It may be better to delay expanding the logic of this until
3684 pass_expand_omp. The expanded logic may make the job more difficult
3685 to a synchronization analysis pass. */
3687 static void
3688 lower_omp_single_simple (tree single_stmt, tree *pre_p)
3690 tree t;
3692 t = built_in_decls[BUILT_IN_GOMP_SINGLE_START];
3693 t = build_function_call_expr (t, NULL);
3694 t = build3 (COND_EXPR, void_type_node, t,
3695 OMP_SINGLE_BODY (single_stmt), NULL);
3696 gimplify_and_add (t, pre_p);
3700 /* A subroutine of lower_omp_single. Expand the simple form of
3701 an OMP_SINGLE, with a copyprivate clause:
3703 #pragma omp single copyprivate (a, b, c)
3705 Create a new structure to hold copies of 'a', 'b' and 'c' and emit:
3708 if ((copyout_p = GOMP_single_copy_start ()) == NULL)
3710 BODY;
3711 copyout.a = a;
3712 copyout.b = b;
3713 copyout.c = c;
3714 GOMP_single_copy_end (&copyout);
3716 else
3718 a = copyout_p->a;
3719 b = copyout_p->b;
3720 c = copyout_p->c;
3722 GOMP_barrier ();
3725 FIXME. It may be better to delay expanding the logic of this until
3726 pass_expand_omp. The expanded logic may make the job more difficult
3727 to a synchronization analysis pass. */
3729 static void
3730 lower_omp_single_copy (tree single_stmt, tree *pre_p, omp_context *ctx)
3732 tree ptr_type, t, args, l0, l1, l2, copyin_seq;
3734 ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_copy_o");
3736 ptr_type = build_pointer_type (ctx->record_type);
3737 ctx->receiver_decl = create_tmp_var (ptr_type, ".omp_copy_i");
3739 l0 = create_artificial_label ();
3740 l1 = create_artificial_label ();
3741 l2 = create_artificial_label ();
3743 t = built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_START];
3744 t = build_function_call_expr (t, NULL);
3745 t = fold_convert (ptr_type, t);
3746 t = build2 (MODIFY_EXPR, void_type_node, ctx->receiver_decl, t);
3747 gimplify_and_add (t, pre_p);
3749 t = build2 (EQ_EXPR, boolean_type_node, ctx->receiver_decl,
3750 build_int_cst (ptr_type, 0));
3751 t = build3 (COND_EXPR, void_type_node, t,
3752 build_and_jump (&l0), build_and_jump (&l1));
3753 gimplify_and_add (t, pre_p);
3755 t = build1 (LABEL_EXPR, void_type_node, l0);
3756 gimplify_and_add (t, pre_p);
3758 append_to_statement_list (OMP_SINGLE_BODY (single_stmt), pre_p);
3760 copyin_seq = NULL;
3761 lower_copyprivate_clauses (OMP_SINGLE_CLAUSES (single_stmt), pre_p,
3762 &copyin_seq, ctx);
3764 t = build_fold_addr_expr (ctx->sender_decl);
3765 args = tree_cons (NULL, t, NULL);
3766 t = built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_END];
3767 t = build_function_call_expr (t, args);
3768 gimplify_and_add (t, pre_p);
3770 t = build_and_jump (&l2);
3771 gimplify_and_add (t, pre_p);
3773 t = build1 (LABEL_EXPR, void_type_node, l1);
3774 gimplify_and_add (t, pre_p);
3776 append_to_statement_list (copyin_seq, pre_p);
3778 t = build1 (LABEL_EXPR, void_type_node, l2);
3779 gimplify_and_add (t, pre_p);
3783 /* Expand code for an OpenMP single directive. */
3785 static void
3786 lower_omp_single (tree *stmt_p, omp_context *ctx)
3788 tree t, bind, block, single_stmt = *stmt_p, dlist;
3790 push_gimplify_context ();
3792 block = make_node (BLOCK);
3793 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3794 TREE_SIDE_EFFECTS (bind) = 1;
3796 lower_rec_input_clauses (OMP_SINGLE_CLAUSES (single_stmt),
3797 &BIND_EXPR_BODY (bind), &dlist, ctx);
3798 lower_omp (&OMP_SINGLE_BODY (single_stmt), ctx);
3800 append_to_statement_list (single_stmt, &BIND_EXPR_BODY (bind));
3802 if (ctx->record_type)
3803 lower_omp_single_copy (single_stmt, &BIND_EXPR_BODY (bind), ctx);
3804 else
3805 lower_omp_single_simple (single_stmt, &BIND_EXPR_BODY (bind));
3807 OMP_SINGLE_BODY (single_stmt) = NULL;
3809 append_to_statement_list (dlist, &BIND_EXPR_BODY (bind));
3811 maybe_catch_exception (&BIND_EXPR_BODY (bind));
3813 t = make_node (OMP_RETURN);
3814 OMP_RETURN_NOWAIT (t) = !!find_omp_clause (OMP_SINGLE_CLAUSES (single_stmt),
3815 OMP_CLAUSE_NOWAIT);
3816 append_to_statement_list (t, &BIND_EXPR_BODY (bind));
3818 pop_gimplify_context (bind);
3820 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3821 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3825 /* Expand code for an OpenMP master directive. */
3827 static void
3828 lower_omp_master (tree *stmt_p, omp_context *ctx)
3830 tree bind, block, stmt = *stmt_p, lab = NULL, x;
3832 push_gimplify_context ();
3834 block = make_node (BLOCK);
3835 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3836 TREE_SIDE_EFFECTS (bind) = 1;
3838 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
3840 x = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
3841 x = build_function_call_expr (x, NULL);
3842 x = build2 (EQ_EXPR, boolean_type_node, x, integer_zero_node);
3843 x = build3 (COND_EXPR, void_type_node, x, NULL, build_and_jump (&lab));
3844 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3846 lower_omp (&OMP_MASTER_BODY (stmt), ctx);
3847 maybe_catch_exception (&OMP_MASTER_BODY (stmt));
3848 append_to_statement_list (OMP_MASTER_BODY (stmt), &BIND_EXPR_BODY (bind));
3849 OMP_MASTER_BODY (stmt) = NULL;
3851 x = build1 (LABEL_EXPR, void_type_node, lab);
3852 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3854 x = make_node (OMP_RETURN);
3855 OMP_RETURN_NOWAIT (x) = 1;
3856 append_to_statement_list (x, &BIND_EXPR_BODY (bind));
3858 pop_gimplify_context (bind);
3860 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3861 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3865 /* Expand code for an OpenMP ordered directive. */
3867 static void
3868 lower_omp_ordered (tree *stmt_p, omp_context *ctx)
3870 tree bind, block, stmt = *stmt_p, x;
3872 push_gimplify_context ();
3874 block = make_node (BLOCK);
3875 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3876 TREE_SIDE_EFFECTS (bind) = 1;
3878 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
3880 x = built_in_decls[BUILT_IN_GOMP_ORDERED_START];
3881 x = build_function_call_expr (x, NULL);
3882 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3884 lower_omp (&OMP_ORDERED_BODY (stmt), ctx);
3885 maybe_catch_exception (&OMP_ORDERED_BODY (stmt));
3886 append_to_statement_list (OMP_ORDERED_BODY (stmt), &BIND_EXPR_BODY (bind));
3887 OMP_ORDERED_BODY (stmt) = NULL;
3889 x = built_in_decls[BUILT_IN_GOMP_ORDERED_END];
3890 x = build_function_call_expr (x, NULL);
3891 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3893 x = make_node (OMP_RETURN);
3894 OMP_RETURN_NOWAIT (x) = 1;
3895 append_to_statement_list (x, &BIND_EXPR_BODY (bind));
3897 pop_gimplify_context (bind);
3899 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3900 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3904 /* Gimplify an OMP_CRITICAL statement. This is a relatively simple
3905 substitution of a couple of function calls. But in the NAMED case,
3906 requires that languages coordinate a symbol name. It is therefore
3907 best put here in common code. */
3909 static GTY((param1_is (tree), param2_is (tree)))
3910 splay_tree critical_name_mutexes;
3912 static void
3913 lower_omp_critical (tree *stmt_p, omp_context *ctx)
3915 tree bind, block, stmt = *stmt_p;
3916 tree t, lock, unlock, name;
3918 name = OMP_CRITICAL_NAME (stmt);
3919 if (name)
3921 tree decl, args;
3922 splay_tree_node n;
3924 if (!critical_name_mutexes)
3925 critical_name_mutexes
3926 = splay_tree_new_ggc (splay_tree_compare_pointers);
3928 n = splay_tree_lookup (critical_name_mutexes, (splay_tree_key) name);
3929 if (n == NULL)
3931 char *new_str;
3933 decl = create_tmp_var_raw (ptr_type_node, NULL);
3935 new_str = ACONCAT ((".gomp_critical_user_",
3936 IDENTIFIER_POINTER (name), NULL));
3937 DECL_NAME (decl) = get_identifier (new_str);
3938 TREE_PUBLIC (decl) = 1;
3939 TREE_STATIC (decl) = 1;
3940 DECL_COMMON (decl) = 1;
3941 DECL_ARTIFICIAL (decl) = 1;
3942 DECL_IGNORED_P (decl) = 1;
3943 cgraph_varpool_finalize_decl (decl);
3945 splay_tree_insert (critical_name_mutexes, (splay_tree_key) name,
3946 (splay_tree_value) decl);
3948 else
3949 decl = (tree) n->value;
3951 args = tree_cons (NULL, build_fold_addr_expr (decl), NULL);
3952 lock = built_in_decls[BUILT_IN_GOMP_CRITICAL_NAME_START];
3953 lock = build_function_call_expr (lock, args);
3955 args = tree_cons (NULL, build_fold_addr_expr (decl), NULL);
3956 unlock = built_in_decls[BUILT_IN_GOMP_CRITICAL_NAME_END];
3957 unlock = build_function_call_expr (unlock, args);
3959 else
3961 lock = built_in_decls[BUILT_IN_GOMP_CRITICAL_START];
3962 lock = build_function_call_expr (lock, NULL);
3964 unlock = built_in_decls[BUILT_IN_GOMP_CRITICAL_END];
3965 unlock = build_function_call_expr (unlock, NULL);
3968 push_gimplify_context ();
3970 block = make_node (BLOCK);
3971 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3972 TREE_SIDE_EFFECTS (bind) = 1;
3974 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
3976 gimplify_and_add (lock, &BIND_EXPR_BODY (bind));
3978 lower_omp (&OMP_CRITICAL_BODY (stmt), ctx);
3979 maybe_catch_exception (&OMP_CRITICAL_BODY (stmt));
3980 append_to_statement_list (OMP_CRITICAL_BODY (stmt), &BIND_EXPR_BODY (bind));
3981 OMP_CRITICAL_BODY (stmt) = NULL;
3983 gimplify_and_add (unlock, &BIND_EXPR_BODY (bind));
3985 t = make_node (OMP_RETURN);
3986 OMP_RETURN_NOWAIT (t) = 1;
3987 append_to_statement_list (t, &BIND_EXPR_BODY (bind));
3989 pop_gimplify_context (bind);
3990 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3991 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3995 /* A subroutine of lower_omp_for. Generate code to emit the predicate
3996 for a lastprivate clause. Given a loop control predicate of (V
3997 cond N2), we gate the clause on (!(V cond N2)). The lowered form
3998 is appended to *DLIST, iterator initialization is appended to
3999 *BODY_P. */
4001 static void
4002 lower_omp_for_lastprivate (struct omp_for_data *fd, tree *body_p,
4003 tree *dlist, struct omp_context *ctx)
4005 tree clauses, cond, stmts, vinit, t;
4006 enum tree_code cond_code;
4008 cond_code = fd->cond_code;
4009 cond_code = cond_code == LT_EXPR ? GE_EXPR : LE_EXPR;
4011 /* When possible, use a strict equality expression. This can let VRP
4012 type optimizations deduce the value and remove a copy. */
4013 if (host_integerp (fd->step, 0))
4015 HOST_WIDE_INT step = TREE_INT_CST_LOW (fd->step);
4016 if (step == 1 || step == -1)
4017 cond_code = EQ_EXPR;
4020 cond = build2 (cond_code, boolean_type_node, fd->v, fd->n2);
4022 clauses = OMP_FOR_CLAUSES (fd->for_stmt);
4023 stmts = NULL;
4024 lower_lastprivate_clauses (clauses, cond, &stmts, ctx);
4025 if (stmts != NULL)
4027 append_to_statement_list (stmts, dlist);
4029 /* Optimize: v = 0; is usually cheaper than v = some_other_constant. */
4030 vinit = fd->n1;
4031 if (cond_code == EQ_EXPR
4032 && host_integerp (fd->n2, 0)
4033 && ! integer_zerop (fd->n2))
4034 vinit = build_int_cst (TREE_TYPE (fd->v), 0);
4036 /* Initialize the iterator variable, so that threads that don't execute
4037 any iterations don't execute the lastprivate clauses by accident. */
4038 t = build2 (MODIFY_EXPR, void_type_node, fd->v, vinit);
4039 gimplify_and_add (t, body_p);
4044 /* Lower code for an OpenMP loop directive. */
4046 static void
4047 lower_omp_for (tree *stmt_p, omp_context *ctx)
4049 tree t, stmt, ilist, dlist, new_stmt, *body_p, *rhs_p;
4050 struct omp_for_data fd;
4052 stmt = *stmt_p;
4054 push_gimplify_context ();
4056 lower_omp (&OMP_FOR_PRE_BODY (stmt), ctx);
4057 lower_omp (&OMP_FOR_BODY (stmt), ctx);
4059 /* Move declaration of temporaries in the loop body before we make
4060 it go away. */
4061 if (TREE_CODE (OMP_FOR_BODY (stmt)) == BIND_EXPR)
4062 record_vars_into (BIND_EXPR_VARS (OMP_FOR_BODY (stmt)), ctx->cb.dst_fn);
4064 new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
4065 TREE_SIDE_EFFECTS (new_stmt) = 1;
4066 body_p = &BIND_EXPR_BODY (new_stmt);
4068 /* The pre-body and input clauses go before the lowered OMP_FOR. */
4069 ilist = NULL;
4070 dlist = NULL;
4071 append_to_statement_list (OMP_FOR_PRE_BODY (stmt), body_p);
4072 lower_rec_input_clauses (OMP_FOR_CLAUSES (stmt), body_p, &dlist, ctx);
4074 /* Lower the header expressions. At this point, we can assume that
4075 the header is of the form:
4077 #pragma omp for (V = VAL1; V {<|>|<=|>=} VAL2; V = V [+-] VAL3)
4079 We just need to make sure that VAL1, VAL2 and VAL3 are lowered
4080 using the .omp_data_s mapping, if needed. */
4081 rhs_p = &TREE_OPERAND (OMP_FOR_INIT (stmt), 1);
4082 if (!is_gimple_min_invariant (*rhs_p))
4083 *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
4085 rhs_p = &TREE_OPERAND (OMP_FOR_COND (stmt), 1);
4086 if (!is_gimple_min_invariant (*rhs_p))
4087 *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
4089 rhs_p = &TREE_OPERAND (TREE_OPERAND (OMP_FOR_INCR (stmt), 1), 1);
4090 if (!is_gimple_min_invariant (*rhs_p))
4091 *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
4093 /* Once lowered, extract the bounds and clauses. */
4094 extract_omp_for_data (stmt, &fd);
4096 lower_omp_for_lastprivate (&fd, body_p, &dlist, ctx);
4098 append_to_statement_list (stmt, body_p);
4100 append_to_statement_list (OMP_FOR_BODY (stmt), body_p);
4102 t = make_node (OMP_CONTINUE);
4103 append_to_statement_list (t, body_p);
4105 /* After the loop, add exit clauses. */
4106 lower_reduction_clauses (OMP_FOR_CLAUSES (stmt), body_p, ctx);
4107 append_to_statement_list (dlist, body_p);
4109 maybe_catch_exception (body_p);
4111 /* Region exit marker goes at the end of the loop body. */
4112 t = make_node (OMP_RETURN);
4113 OMP_RETURN_NOWAIT (t) = fd.have_nowait;
4114 append_to_statement_list (t, body_p);
4116 pop_gimplify_context (NULL_TREE);
4117 record_vars_into (ctx->block_vars, ctx->cb.dst_fn);
4119 OMP_FOR_BODY (stmt) = NULL_TREE;
4120 OMP_FOR_PRE_BODY (stmt) = NULL_TREE;
4121 *stmt_p = new_stmt;
4124 /* Callback for walk_stmts. Check if *TP only contains OMP_FOR
4125 or OMP_PARALLEL. */
4127 static tree
4128 check_combined_parallel (tree *tp, int *walk_subtrees, void *data)
4130 struct walk_stmt_info *wi = data;
4131 int *info = wi->info;
4133 *walk_subtrees = 0;
4134 switch (TREE_CODE (*tp))
4136 case OMP_FOR:
4137 case OMP_SECTIONS:
4138 *info = *info == 0 ? 1 : -1;
4139 break;
4140 default:
4141 *info = -1;
4142 break;
4144 return NULL;
4147 /* Lower the OpenMP parallel directive in *STMT_P. CTX holds context
4148 information for the directive. */
4150 static void
4151 lower_omp_parallel (tree *stmt_p, omp_context *ctx)
4153 tree clauses, par_bind, par_body, new_body, bind;
4154 tree olist, ilist, par_olist, par_ilist;
4155 tree stmt, child_fn, t;
4157 stmt = *stmt_p;
4159 clauses = OMP_PARALLEL_CLAUSES (stmt);
4160 par_bind = OMP_PARALLEL_BODY (stmt);
4161 par_body = BIND_EXPR_BODY (par_bind);
4162 child_fn = ctx->cb.dst_fn;
4163 if (!OMP_PARALLEL_COMBINED (stmt))
4165 struct walk_stmt_info wi;
4166 int ws_num = 0;
4168 memset (&wi, 0, sizeof (wi));
4169 wi.callback = check_combined_parallel;
4170 wi.info = &ws_num;
4171 wi.val_only = true;
4172 walk_stmts (&wi, &par_bind);
4173 if (ws_num == 1)
4174 OMP_PARALLEL_COMBINED (stmt) = 1;
4177 push_gimplify_context ();
4179 par_olist = NULL_TREE;
4180 par_ilist = NULL_TREE;
4181 lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx);
4182 lower_omp (&par_body, ctx);
4183 lower_reduction_clauses (clauses, &par_olist, ctx);
4185 /* Declare all the variables created by mapping and the variables
4186 declared in the scope of the parallel body. */
4187 record_vars_into (ctx->block_vars, child_fn);
4188 record_vars_into (BIND_EXPR_VARS (par_bind), child_fn);
4190 if (ctx->record_type)
4192 ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_data_o");
4193 OMP_PARALLEL_DATA_ARG (stmt) = ctx->sender_decl;
4196 olist = NULL_TREE;
4197 ilist = NULL_TREE;
4198 lower_send_clauses (clauses, &ilist, &olist, ctx);
4199 lower_send_shared_vars (&ilist, &olist, ctx);
4201 /* Once all the expansions are done, sequence all the different
4202 fragments inside OMP_PARALLEL_BODY. */
4203 bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
4204 append_to_statement_list (ilist, &BIND_EXPR_BODY (bind));
4206 new_body = alloc_stmt_list ();
4208 if (ctx->record_type)
4210 t = build_fold_addr_expr (ctx->sender_decl);
4211 /* fixup_child_record_type might have changed receiver_decl's type. */
4212 t = fold_convert (TREE_TYPE (ctx->receiver_decl), t);
4213 t = build2 (MODIFY_EXPR, void_type_node, ctx->receiver_decl, t);
4214 append_to_statement_list (t, &new_body);
4217 append_to_statement_list (par_ilist, &new_body);
4218 append_to_statement_list (par_body, &new_body);
4219 append_to_statement_list (par_olist, &new_body);
4220 maybe_catch_exception (&new_body);
4221 t = make_node (OMP_RETURN);
4222 append_to_statement_list (t, &new_body);
4223 OMP_PARALLEL_BODY (stmt) = new_body;
4225 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
4226 append_to_statement_list (olist, &BIND_EXPR_BODY (bind));
4228 *stmt_p = bind;
4230 pop_gimplify_context (NULL_TREE);
4234 /* Pass *TP back through the gimplifier within the context determined by WI.
4235 This handles replacement of DECL_VALUE_EXPR, as well as adjusting the
4236 flags on ADDR_EXPR. */
4238 static void
4239 lower_regimplify (tree *tp, struct walk_stmt_info *wi)
4241 enum gimplify_status gs;
4242 tree pre = NULL;
4244 if (wi->is_lhs)
4245 gs = gimplify_expr (tp, &pre, NULL, is_gimple_lvalue, fb_lvalue);
4246 else if (wi->val_only)
4247 gs = gimplify_expr (tp, &pre, NULL, is_gimple_val, fb_rvalue);
4248 else
4249 gs = gimplify_expr (tp, &pre, NULL, is_gimple_formal_tmp_var, fb_rvalue);
4250 gcc_assert (gs == GS_ALL_DONE);
4252 if (pre)
4253 tsi_link_before (&wi->tsi, pre, TSI_SAME_STMT);
4256 /* Copy EXP into a temporary. Insert the initialization statement before TSI. */
4258 static tree
4259 init_tmp_var (tree exp, tree_stmt_iterator *tsi)
4261 tree t, stmt;
4263 t = create_tmp_var (TREE_TYPE (exp), NULL);
4264 if (TREE_CODE (TREE_TYPE (t)) == COMPLEX_TYPE)
4265 DECL_COMPLEX_GIMPLE_REG_P (t) = 1;
4266 stmt = build2 (MODIFY_EXPR, TREE_TYPE (t), t, exp);
4267 SET_EXPR_LOCUS (stmt, EXPR_LOCUS (tsi_stmt (*tsi)));
4268 tsi_link_before (tsi, stmt, TSI_SAME_STMT);
4270 return t;
4273 /* Similarly, but copy from the temporary and insert the statement
4274 after the iterator. */
4276 static tree
4277 save_tmp_var (tree exp, tree_stmt_iterator *tsi)
4279 tree t, stmt;
4281 t = create_tmp_var (TREE_TYPE (exp), NULL);
4282 if (TREE_CODE (TREE_TYPE (t)) == COMPLEX_TYPE)
4283 DECL_COMPLEX_GIMPLE_REG_P (t) = 1;
4284 stmt = build2 (MODIFY_EXPR, TREE_TYPE (t), exp, t);
4285 SET_EXPR_LOCUS (stmt, EXPR_LOCUS (tsi_stmt (*tsi)));
4286 tsi_link_after (tsi, stmt, TSI_SAME_STMT);
4288 return t;
4291 /* Callback for walk_stmts. Lower the OpenMP directive pointed by TP. */
4293 static tree
4294 lower_omp_1 (tree *tp, int *walk_subtrees, void *data)
4296 struct walk_stmt_info *wi = data;
4297 omp_context *ctx = wi->info;
4298 tree t = *tp;
4300 /* If we have issued syntax errors, avoid doing any heavy lifting.
4301 Just replace the OpenMP directives with a NOP to avoid
4302 confusing RTL expansion. */
4303 if (errorcount && OMP_DIRECTIVE_P (*tp))
4305 *tp = build_empty_stmt ();
4306 return NULL_TREE;
4309 *walk_subtrees = 0;
4310 switch (TREE_CODE (*tp))
4312 case OMP_PARALLEL:
4313 ctx = maybe_lookup_ctx (t);
4314 lower_omp_parallel (tp, ctx);
4315 break;
4317 case OMP_FOR:
4318 ctx = maybe_lookup_ctx (t);
4319 gcc_assert (ctx);
4320 lower_omp_for (tp, ctx);
4321 break;
4323 case OMP_SECTIONS:
4324 ctx = maybe_lookup_ctx (t);
4325 gcc_assert (ctx);
4326 lower_omp_sections (tp, ctx);
4327 break;
4329 case OMP_SINGLE:
4330 ctx = maybe_lookup_ctx (t);
4331 gcc_assert (ctx);
4332 lower_omp_single (tp, ctx);
4333 break;
4335 case OMP_MASTER:
4336 ctx = maybe_lookup_ctx (t);
4337 gcc_assert (ctx);
4338 lower_omp_master (tp, ctx);
4339 break;
4341 case OMP_ORDERED:
4342 ctx = maybe_lookup_ctx (t);
4343 gcc_assert (ctx);
4344 lower_omp_ordered (tp, ctx);
4345 break;
4347 case OMP_CRITICAL:
4348 ctx = maybe_lookup_ctx (t);
4349 gcc_assert (ctx);
4350 lower_omp_critical (tp, ctx);
4351 break;
4353 case VAR_DECL:
4354 if (ctx && DECL_HAS_VALUE_EXPR_P (t))
4356 lower_regimplify (&t, wi);
4357 if (wi->val_only)
4359 if (wi->is_lhs)
4360 t = save_tmp_var (t, &wi->tsi);
4361 else
4362 t = init_tmp_var (t, &wi->tsi);
4364 *tp = t;
4366 break;
4368 case ADDR_EXPR:
4369 if (ctx)
4370 lower_regimplify (tp, wi);
4371 break;
4373 case ARRAY_REF:
4374 case ARRAY_RANGE_REF:
4375 case REALPART_EXPR:
4376 case IMAGPART_EXPR:
4377 case COMPONENT_REF:
4378 case VIEW_CONVERT_EXPR:
4379 if (ctx)
4380 lower_regimplify (tp, wi);
4381 break;
4383 case INDIRECT_REF:
4384 if (ctx)
4386 wi->is_lhs = false;
4387 wi->val_only = true;
4388 lower_regimplify (&TREE_OPERAND (t, 0), wi);
4390 break;
4392 default:
4393 if (!TYPE_P (t) && !DECL_P (t))
4394 *walk_subtrees = 1;
4395 break;
4398 return NULL_TREE;
4401 static void
4402 lower_omp (tree *stmt_p, omp_context *ctx)
4404 struct walk_stmt_info wi;
4406 memset (&wi, 0, sizeof (wi));
4407 wi.callback = lower_omp_1;
4408 wi.info = ctx;
4409 wi.val_only = true;
4410 wi.want_locations = true;
4412 walk_stmts (&wi, stmt_p);
4415 /* Main entry point. */
4417 static unsigned int
4418 execute_lower_omp (void)
4420 all_contexts = splay_tree_new (splay_tree_compare_pointers, 0,
4421 delete_omp_context);
4423 scan_omp (&DECL_SAVED_TREE (current_function_decl), NULL);
4424 gcc_assert (parallel_nesting_level == 0);
4426 if (all_contexts->root)
4427 lower_omp (&DECL_SAVED_TREE (current_function_decl), NULL);
4429 if (all_contexts)
4431 splay_tree_delete (all_contexts);
4432 all_contexts = NULL;
4434 return 0;
4437 static bool
4438 gate_lower_omp (void)
4440 return flag_openmp != 0;
4443 struct tree_opt_pass pass_lower_omp =
4445 "omplower", /* name */
4446 gate_lower_omp, /* gate */
4447 execute_lower_omp, /* execute */
4448 NULL, /* sub */
4449 NULL, /* next */
4450 0, /* static_pass_number */
4451 0, /* tv_id */
4452 PROP_gimple_any, /* properties_required */
4453 PROP_gimple_lomp, /* properties_provided */
4454 0, /* properties_destroyed */
4455 0, /* todo_flags_start */
4456 TODO_dump_func, /* todo_flags_finish */
4457 0 /* letter */
4460 /* The following is a utility to diagnose OpenMP structured block violations.
4461 It is not part of the "omplower" pass, as that's invoked too late. It
4462 should be invoked by the respective front ends after gimplification. */
4464 static splay_tree all_labels;
4466 /* Check for mismatched contexts and generate an error if needed. Return
4467 true if an error is detected. */
4469 static bool
4470 diagnose_sb_0 (tree *stmt_p, tree branch_ctx, tree label_ctx)
4472 bool exit_p = true;
4474 if ((label_ctx ? TREE_VALUE (label_ctx) : NULL) == branch_ctx)
4475 return false;
4477 /* Try to avoid confusing the user by producing and error message
4478 with correct "exit" or "enter" verbage. We prefer "exit"
4479 unless we can show that LABEL_CTX is nested within BRANCH_CTX. */
4480 if (branch_ctx == NULL)
4481 exit_p = false;
4482 else
4484 while (label_ctx)
4486 if (TREE_VALUE (label_ctx) == branch_ctx)
4488 exit_p = false;
4489 break;
4491 label_ctx = TREE_CHAIN (label_ctx);
4495 if (exit_p)
4496 error ("invalid exit from OpenMP structured block");
4497 else
4498 error ("invalid entry to OpenMP structured block");
4500 *stmt_p = build_empty_stmt ();
4501 return true;
4504 /* Pass 1: Create a minimal tree of OpenMP structured blocks, and record
4505 where in the tree each label is found. */
4507 static tree
4508 diagnose_sb_1 (tree *tp, int *walk_subtrees, void *data)
4510 struct walk_stmt_info *wi = data;
4511 tree context = (tree) wi->info;
4512 tree inner_context;
4513 tree t = *tp;
4515 *walk_subtrees = 0;
4516 switch (TREE_CODE (t))
4518 case OMP_PARALLEL:
4519 case OMP_SECTIONS:
4520 case OMP_SINGLE:
4521 walk_tree (&OMP_CLAUSES (t), diagnose_sb_1, wi, NULL);
4522 /* FALLTHRU */
4523 case OMP_SECTION:
4524 case OMP_MASTER:
4525 case OMP_ORDERED:
4526 case OMP_CRITICAL:
4527 /* The minimal context here is just a tree of statements. */
4528 inner_context = tree_cons (NULL, t, context);
4529 wi->info = inner_context;
4530 walk_stmts (wi, &OMP_BODY (t));
4531 wi->info = context;
4532 break;
4534 case OMP_FOR:
4535 walk_tree (&OMP_FOR_CLAUSES (t), diagnose_sb_1, wi, NULL);
4536 inner_context = tree_cons (NULL, t, context);
4537 wi->info = inner_context;
4538 walk_tree (&OMP_FOR_INIT (t), diagnose_sb_1, wi, NULL);
4539 walk_tree (&OMP_FOR_COND (t), diagnose_sb_1, wi, NULL);
4540 walk_tree (&OMP_FOR_INCR (t), diagnose_sb_1, wi, NULL);
4541 walk_stmts (wi, &OMP_FOR_PRE_BODY (t));
4542 walk_stmts (wi, &OMP_FOR_BODY (t));
4543 wi->info = context;
4544 break;
4546 case LABEL_EXPR:
4547 splay_tree_insert (all_labels, (splay_tree_key) LABEL_EXPR_LABEL (t),
4548 (splay_tree_value) context);
4549 break;
4551 default:
4552 break;
4555 return NULL_TREE;
4558 /* Pass 2: Check each branch and see if its context differs from that of
4559 the destination label's context. */
4561 static tree
4562 diagnose_sb_2 (tree *tp, int *walk_subtrees, void *data)
4564 struct walk_stmt_info *wi = data;
4565 tree context = (tree) wi->info;
4566 splay_tree_node n;
4567 tree t = *tp;
4569 *walk_subtrees = 0;
4570 switch (TREE_CODE (t))
4572 case OMP_PARALLEL:
4573 case OMP_SECTIONS:
4574 case OMP_SINGLE:
4575 walk_tree (&OMP_CLAUSES (t), diagnose_sb_2, wi, NULL);
4576 /* FALLTHRU */
4577 case OMP_SECTION:
4578 case OMP_MASTER:
4579 case OMP_ORDERED:
4580 case OMP_CRITICAL:
4581 wi->info = t;
4582 walk_stmts (wi, &OMP_BODY (t));
4583 wi->info = context;
4584 break;
4586 case OMP_FOR:
4587 walk_tree (&OMP_FOR_CLAUSES (t), diagnose_sb_2, wi, NULL);
4588 wi->info = t;
4589 walk_tree (&OMP_FOR_INIT (t), diagnose_sb_2, wi, NULL);
4590 walk_tree (&OMP_FOR_COND (t), diagnose_sb_2, wi, NULL);
4591 walk_tree (&OMP_FOR_INCR (t), diagnose_sb_2, wi, NULL);
4592 walk_stmts (wi, &OMP_FOR_PRE_BODY (t));
4593 walk_stmts (wi, &OMP_FOR_BODY (t));
4594 wi->info = context;
4595 break;
4597 case GOTO_EXPR:
4599 tree lab = GOTO_DESTINATION (t);
4600 if (TREE_CODE (lab) != LABEL_DECL)
4601 break;
4603 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
4604 diagnose_sb_0 (tp, context, n ? (tree) n->value : NULL_TREE);
4606 break;
4608 case SWITCH_EXPR:
4610 tree vec = SWITCH_LABELS (t);
4611 int i, len = TREE_VEC_LENGTH (vec);
4612 for (i = 0; i < len; ++i)
4614 tree lab = CASE_LABEL (TREE_VEC_ELT (vec, i));
4615 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
4616 if (diagnose_sb_0 (tp, context, (tree) n->value))
4617 break;
4620 break;
4622 case RETURN_EXPR:
4623 diagnose_sb_0 (tp, context, NULL_TREE);
4624 break;
4626 default:
4627 break;
4630 return NULL_TREE;
4633 void
4634 diagnose_omp_structured_block_errors (tree fndecl)
4636 tree save_current = current_function_decl;
4637 struct walk_stmt_info wi;
4639 current_function_decl = fndecl;
4641 all_labels = splay_tree_new (splay_tree_compare_pointers, 0, 0);
4643 memset (&wi, 0, sizeof (wi));
4644 wi.callback = diagnose_sb_1;
4645 walk_stmts (&wi, &DECL_SAVED_TREE (fndecl));
4647 memset (&wi, 0, sizeof (wi));
4648 wi.callback = diagnose_sb_2;
4649 wi.want_locations = true;
4650 wi.want_return_expr = true;
4651 walk_stmts (&wi, &DECL_SAVED_TREE (fndecl));
4653 splay_tree_delete (all_labels);
4654 all_labels = NULL;
4656 current_function_decl = save_current;
4659 #include "gt-omp-low.h"