gcc/ChangeLog:
[official-gcc.git] / gcc / omp-low.c
blobe5a320db09d3a8fb6d6168cc9792d36321ed509e
1 /* Lowering pass for OpenMP directives. Converts OpenMP directives
2 into explicit calls to the runtime library (libgomp) and data
3 marshalling to implement data sharing and copying clauses.
4 Contributed by Diego Novillo <dnovillo@redhat.com>
6 Copyright (C) 2005, 2006 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
23 02110-1301, USA. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "tree.h"
30 #include "rtl.h"
31 #include "tree-gimple.h"
32 #include "tree-inline.h"
33 #include "langhooks.h"
34 #include "diagnostic.h"
35 #include "tree-flow.h"
36 #include "timevar.h"
37 #include "flags.h"
38 #include "function.h"
39 #include "expr.h"
40 #include "toplev.h"
41 #include "tree-pass.h"
42 #include "ggc.h"
43 #include "except.h"
46 /* Lowering of OpenMP parallel and workshare constructs proceeds in two
47 phases. The first phase scans the function looking for OMP statements
48 and then for variables that must be replaced to satisfy data sharing
49 clauses. The second phase expands code for the constructs, as well as
50 re-gimplifying things when variables have been replaced with complex
51 expressions.
53 Final code generation is done by pass_expand_omp. The flowgraph is
54 scanned for parallel regions which are then moved to a new
55 function, to be invoked by the thread library. */
57 /* Context structure. Used to store information about each parallel
58 directive in the code. */
60 typedef struct omp_context
62 /* This field must be at the beginning, as we do "inheritance": Some
63 callback functions for tree-inline.c (e.g., omp_copy_decl)
64 receive a copy_body_data pointer that is up-casted to an
65 omp_context pointer. */
66 copy_body_data cb;
68 /* The tree of contexts corresponding to the encountered constructs. */
69 struct omp_context *outer;
70 tree stmt;
72 /* Map variables to fields in a structure that allows communication
73 between sending and receiving threads. */
74 splay_tree field_map;
75 tree record_type;
76 tree sender_decl;
77 tree receiver_decl;
79 /* A chain of variables to add to the top-level block surrounding the
80 construct. In the case of a parallel, this is in the child function. */
81 tree block_vars;
83 /* What to do with variables with implicitly determined sharing
84 attributes. */
85 enum omp_clause_default_kind default_kind;
87 /* Nesting depth of this context. Used to beautify error messages re
88 invalid gotos. The outermost ctx is depth 1, with depth 0 being
89 reserved for the main body of the function. */
90 int depth;
92 /* True if this parallel directive is nested within another. */
93 bool is_nested;
94 } omp_context;
97 /* A structure describing the main elements of a parallel loop. */
99 struct omp_for_data
101 tree v, n1, n2, step, chunk_size, for_stmt;
102 enum tree_code cond_code;
103 tree pre;
104 bool have_nowait, have_ordered;
105 enum omp_clause_schedule_kind sched_kind;
109 static splay_tree all_contexts;
110 static int parallel_nesting_level;
111 struct omp_region *root_omp_region;
113 static void scan_omp (tree *, omp_context *);
114 static void lower_omp (tree *, omp_context *);
115 static tree lookup_decl_in_outer_ctx (tree, omp_context *);
116 static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *);
118 /* Find an OpenMP clause of type KIND within CLAUSES. */
120 static tree
121 find_omp_clause (tree clauses, enum tree_code kind)
123 for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
124 if (OMP_CLAUSE_CODE (clauses) == kind)
125 return clauses;
127 return NULL_TREE;
130 /* Return true if CTX is for an omp parallel. */
132 static inline bool
133 is_parallel_ctx (omp_context *ctx)
135 return TREE_CODE (ctx->stmt) == OMP_PARALLEL;
139 /* Return true if REGION is a combined parallel+workshare region. */
141 static inline bool
142 is_combined_parallel (struct omp_region *region)
144 return region->is_combined_parallel;
148 /* Extract the header elements of parallel loop FOR_STMT and store
149 them into *FD. */
151 static void
152 extract_omp_for_data (tree for_stmt, struct omp_for_data *fd)
154 tree t;
156 fd->for_stmt = for_stmt;
157 fd->pre = NULL;
159 t = OMP_FOR_INIT (for_stmt);
160 gcc_assert (TREE_CODE (t) == GIMPLE_MODIFY_STMT);
161 fd->v = GIMPLE_STMT_OPERAND (t, 0);
162 gcc_assert (DECL_P (fd->v));
163 gcc_assert (TREE_CODE (TREE_TYPE (fd->v)) == INTEGER_TYPE);
164 fd->n1 = GIMPLE_STMT_OPERAND (t, 1);
166 t = OMP_FOR_COND (for_stmt);
167 fd->cond_code = TREE_CODE (t);
168 gcc_assert (TREE_OPERAND (t, 0) == fd->v);
169 fd->n2 = TREE_OPERAND (t, 1);
170 switch (fd->cond_code)
172 case LT_EXPR:
173 case GT_EXPR:
174 break;
175 case LE_EXPR:
176 fd->n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->n2), fd->n2,
177 build_int_cst (TREE_TYPE (fd->n2), 1));
178 fd->cond_code = LT_EXPR;
179 break;
180 case GE_EXPR:
181 fd->n2 = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->n2), fd->n2,
182 build_int_cst (TREE_TYPE (fd->n2), 1));
183 fd->cond_code = GT_EXPR;
184 break;
185 default:
186 gcc_unreachable ();
189 t = OMP_FOR_INCR (fd->for_stmt);
190 gcc_assert (TREE_CODE (t) == GIMPLE_MODIFY_STMT);
191 gcc_assert (GIMPLE_STMT_OPERAND (t, 0) == fd->v);
192 t = GIMPLE_STMT_OPERAND (t, 1);
193 gcc_assert (TREE_OPERAND (t, 0) == fd->v);
194 switch (TREE_CODE (t))
196 case PLUS_EXPR:
197 fd->step = TREE_OPERAND (t, 1);
198 break;
199 case MINUS_EXPR:
200 fd->step = TREE_OPERAND (t, 1);
201 fd->step = fold_build1 (NEGATE_EXPR, TREE_TYPE (fd->step), fd->step);
202 break;
203 default:
204 gcc_unreachable ();
207 fd->have_nowait = fd->have_ordered = false;
208 fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
209 fd->chunk_size = NULL_TREE;
211 for (t = OMP_FOR_CLAUSES (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t))
212 switch (OMP_CLAUSE_CODE (t))
214 case OMP_CLAUSE_NOWAIT:
215 fd->have_nowait = true;
216 break;
217 case OMP_CLAUSE_ORDERED:
218 fd->have_ordered = true;
219 break;
220 case OMP_CLAUSE_SCHEDULE:
221 fd->sched_kind = OMP_CLAUSE_SCHEDULE_KIND (t);
222 fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t);
223 break;
224 default:
225 break;
228 if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
229 gcc_assert (fd->chunk_size == NULL);
230 else if (fd->chunk_size == NULL)
232 /* We only need to compute a default chunk size for ordered
233 static loops and dynamic loops. */
234 if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC || fd->have_ordered)
235 fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
236 ? integer_zero_node : integer_one_node;
241 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
242 is the immediate dominator of PAR_ENTRY_BB, return true if there
243 are no data dependencies that would prevent expanding the parallel
244 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
246 When expanding a combined parallel+workshare region, the call to
247 the child function may need additional arguments in the case of
248 OMP_FOR regions. In some cases, these arguments are computed out
249 of variables passed in from the parent to the child via 'struct
250 .omp_data_s'. For instance:
252 #pragma omp parallel for schedule (guided, i * 4)
253 for (j ...)
255 Is lowered into:
257 # BLOCK 2 (PAR_ENTRY_BB)
258 .omp_data_o.i = i;
259 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
261 # BLOCK 3 (WS_ENTRY_BB)
262 .omp_data_i = &.omp_data_o;
263 D.1667 = .omp_data_i->i;
264 D.1598 = D.1667 * 4;
265 #pragma omp for schedule (guided, D.1598)
267 When we outline the parallel region, the call to the child function
268 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
269 that value is computed *after* the call site. So, in principle we
270 cannot do the transformation.
272 To see whether the code in WS_ENTRY_BB blocks the combined
273 parallel+workshare call, we collect all the variables used in the
274 OMP_FOR header check whether they appear on the LHS of any
275 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
276 call.
278 FIXME. If we had the SSA form built at this point, we could merely
279 hoist the code in block 3 into block 2 and be done with it. But at
280 this point we don't have dataflow information and though we could
281 hack something up here, it is really not worth the aggravation. */
283 static bool
284 workshare_safe_to_combine_p (basic_block par_entry_bb, basic_block ws_entry_bb)
286 struct omp_for_data fd;
287 tree par_stmt, ws_stmt;
289 par_stmt = last_stmt (par_entry_bb);
290 ws_stmt = last_stmt (ws_entry_bb);
292 if (TREE_CODE (ws_stmt) == OMP_SECTIONS)
293 return true;
295 gcc_assert (TREE_CODE (ws_stmt) == OMP_FOR);
297 extract_omp_for_data (ws_stmt, &fd);
299 /* FIXME. We give up too easily here. If any of these arguments
300 are not constants, they will likely involve variables that have
301 been mapped into fields of .omp_data_s for sharing with the child
302 function. With appropriate data flow, it would be possible to
303 see through this. */
304 if (!is_gimple_min_invariant (fd.n1)
305 || !is_gimple_min_invariant (fd.n2)
306 || !is_gimple_min_invariant (fd.step)
307 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
308 return false;
310 return true;
314 /* Collect additional arguments needed to emit a combined
315 parallel+workshare call. WS_STMT is the workshare directive being
316 expanded. */
318 static tree
319 get_ws_args_for (tree ws_stmt)
321 tree t;
323 if (TREE_CODE (ws_stmt) == OMP_FOR)
325 struct omp_for_data fd;
326 tree ws_args;
328 extract_omp_for_data (ws_stmt, &fd);
330 ws_args = NULL_TREE;
331 if (fd.chunk_size)
333 t = fold_convert (long_integer_type_node, fd.chunk_size);
334 ws_args = tree_cons (NULL, t, ws_args);
337 t = fold_convert (long_integer_type_node, fd.step);
338 ws_args = tree_cons (NULL, t, ws_args);
340 t = fold_convert (long_integer_type_node, fd.n2);
341 ws_args = tree_cons (NULL, t, ws_args);
343 t = fold_convert (long_integer_type_node, fd.n1);
344 ws_args = tree_cons (NULL, t, ws_args);
346 return ws_args;
348 else if (TREE_CODE (ws_stmt) == OMP_SECTIONS)
350 basic_block bb = bb_for_stmt (ws_stmt);
351 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs));
352 t = tree_cons (NULL, t, NULL);
353 return t;
356 gcc_unreachable ();
360 /* Discover whether REGION is a combined parallel+workshare region. */
362 static void
363 determine_parallel_type (struct omp_region *region)
365 basic_block par_entry_bb, par_exit_bb;
366 basic_block ws_entry_bb, ws_exit_bb;
368 if (region == NULL || region->inner == NULL
369 || region->exit == NULL || region->inner->exit == NULL)
370 return;
372 /* We only support parallel+for and parallel+sections. */
373 if (region->type != OMP_PARALLEL
374 || (region->inner->type != OMP_FOR
375 && region->inner->type != OMP_SECTIONS))
376 return;
378 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
379 WS_EXIT_BB -> PAR_EXIT_BB. */
380 par_entry_bb = region->entry;
381 par_exit_bb = region->exit;
382 ws_entry_bb = region->inner->entry;
383 ws_exit_bb = region->inner->exit;
385 if (single_succ (par_entry_bb) == ws_entry_bb
386 && single_succ (ws_exit_bb) == par_exit_bb
387 && workshare_safe_to_combine_p (par_entry_bb, ws_entry_bb))
389 tree ws_stmt = last_stmt (region->inner->entry);
391 if (region->inner->type == OMP_FOR)
393 /* If this is a combined parallel loop, we need to determine
394 whether or not to use the combined library calls. There
395 are two cases where we do not apply the transformation:
396 static loops and any kind of ordered loop. In the first
397 case, we already open code the loop so there is no need
398 to do anything else. In the latter case, the combined
399 parallel loop call would still need extra synchronization
400 to implement ordered semantics, so there would not be any
401 gain in using the combined call. */
402 tree clauses = OMP_FOR_CLAUSES (ws_stmt);
403 tree c = find_omp_clause (clauses, OMP_CLAUSE_SCHEDULE);
404 if (c == NULL
405 || OMP_CLAUSE_SCHEDULE_KIND (c) == OMP_CLAUSE_SCHEDULE_STATIC
406 || find_omp_clause (clauses, OMP_CLAUSE_ORDERED))
408 region->is_combined_parallel = false;
409 region->inner->is_combined_parallel = false;
410 return;
414 region->is_combined_parallel = true;
415 region->inner->is_combined_parallel = true;
416 region->ws_args = get_ws_args_for (ws_stmt);
421 /* Return true if EXPR is variable sized. */
423 static inline bool
424 is_variable_sized (tree expr)
426 return !TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
429 /* Return true if DECL is a reference type. */
431 static inline bool
432 is_reference (tree decl)
434 return lang_hooks.decls.omp_privatize_by_reference (decl);
437 /* Lookup variables in the decl or field splay trees. The "maybe" form
438 allows for the variable form to not have been entered, otherwise we
439 assert that the variable must have been entered. */
441 static inline tree
442 lookup_decl (tree var, omp_context *ctx)
444 splay_tree_node n;
445 n = splay_tree_lookup (ctx->cb.decl_map, (splay_tree_key) var);
446 return (tree) n->value;
449 static inline tree
450 maybe_lookup_decl (tree var, omp_context *ctx)
452 splay_tree_node n;
453 n = splay_tree_lookup (ctx->cb.decl_map, (splay_tree_key) var);
454 return n ? (tree) n->value : NULL_TREE;
457 static inline tree
458 lookup_field (tree var, omp_context *ctx)
460 splay_tree_node n;
461 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) var);
462 return (tree) n->value;
465 static inline tree
466 maybe_lookup_field (tree var, omp_context *ctx)
468 splay_tree_node n;
469 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) var);
470 return n ? (tree) n->value : NULL_TREE;
473 /* Return true if DECL should be copied by pointer. SHARED_P is true
474 if DECL is to be shared. */
476 static bool
477 use_pointer_for_field (tree decl, bool shared_p)
479 if (AGGREGATE_TYPE_P (TREE_TYPE (decl)))
480 return true;
482 /* We can only use copy-in/copy-out semantics for shared variables
483 when we know the value is not accessible from an outer scope. */
484 if (shared_p)
486 /* ??? Trivially accessible from anywhere. But why would we even
487 be passing an address in this case? Should we simply assert
488 this to be false, or should we have a cleanup pass that removes
489 these from the list of mappings? */
490 if (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
491 return true;
493 /* For variables with DECL_HAS_VALUE_EXPR_P set, we cannot tell
494 without analyzing the expression whether or not its location
495 is accessible to anyone else. In the case of nested parallel
496 regions it certainly may be. */
497 if (TREE_CODE (decl) != RESULT_DECL && DECL_HAS_VALUE_EXPR_P (decl))
498 return true;
500 /* Do not use copy-in/copy-out for variables that have their
501 address taken. */
502 if (TREE_ADDRESSABLE (decl))
503 return true;
506 return false;
509 /* Construct a new automatic decl similar to VAR. */
511 static tree
512 omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx)
514 tree copy = build_decl (VAR_DECL, name, type);
516 TREE_ADDRESSABLE (copy) = TREE_ADDRESSABLE (var);
517 DECL_GIMPLE_REG_P (copy) = DECL_GIMPLE_REG_P (var);
518 DECL_ARTIFICIAL (copy) = DECL_ARTIFICIAL (var);
519 DECL_IGNORED_P (copy) = DECL_IGNORED_P (var);
520 TREE_USED (copy) = 1;
521 DECL_CONTEXT (copy) = current_function_decl;
522 DECL_SEEN_IN_BIND_EXPR_P (copy) = 1;
524 TREE_CHAIN (copy) = ctx->block_vars;
525 ctx->block_vars = copy;
527 return copy;
530 static tree
531 omp_copy_decl_1 (tree var, omp_context *ctx)
533 return omp_copy_decl_2 (var, DECL_NAME (var), TREE_TYPE (var), ctx);
536 /* Build tree nodes to access the field for VAR on the receiver side. */
538 static tree
539 build_receiver_ref (tree var, bool by_ref, omp_context *ctx)
541 tree x, field = lookup_field (var, ctx);
543 /* If the receiver record type was remapped in the child function,
544 remap the field into the new record type. */
545 x = maybe_lookup_field (field, ctx);
546 if (x != NULL)
547 field = x;
549 x = build_fold_indirect_ref (ctx->receiver_decl);
550 x = build3 (COMPONENT_REF, TREE_TYPE (field), x, field, NULL);
551 if (by_ref)
552 x = build_fold_indirect_ref (x);
554 return x;
557 /* Build tree nodes to access VAR in the scope outer to CTX. In the case
558 of a parallel, this is a component reference; for workshare constructs
559 this is some variable. */
561 static tree
562 build_outer_var_ref (tree var, omp_context *ctx)
564 tree x;
566 if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx)))
567 x = var;
568 else if (is_variable_sized (var))
570 x = TREE_OPERAND (DECL_VALUE_EXPR (var), 0);
571 x = build_outer_var_ref (x, ctx);
572 x = build_fold_indirect_ref (x);
574 else if (is_parallel_ctx (ctx))
576 bool by_ref = use_pointer_for_field (var, false);
577 x = build_receiver_ref (var, by_ref, ctx);
579 else if (ctx->outer)
580 x = lookup_decl (var, ctx->outer);
581 else if (is_reference (var))
582 /* This can happen with orphaned constructs. If var is reference, it is
583 possible it is shared and as such valid. */
584 x = var;
585 else
586 gcc_unreachable ();
588 if (is_reference (var))
589 x = build_fold_indirect_ref (x);
591 return x;
594 /* Build tree nodes to access the field for VAR on the sender side. */
596 static tree
597 build_sender_ref (tree var, omp_context *ctx)
599 tree field = lookup_field (var, ctx);
600 return build3 (COMPONENT_REF, TREE_TYPE (field),
601 ctx->sender_decl, field, NULL);
604 /* Add a new field for VAR inside the structure CTX->SENDER_DECL. */
606 static void
607 install_var_field (tree var, bool by_ref, omp_context *ctx)
609 tree field, type;
611 gcc_assert (!splay_tree_lookup (ctx->field_map, (splay_tree_key) var));
613 type = TREE_TYPE (var);
614 if (by_ref)
615 type = build_pointer_type (type);
617 field = build_decl (FIELD_DECL, DECL_NAME (var), type);
619 /* Remember what variable this field was created for. This does have a
620 side effect of making dwarf2out ignore this member, so for helpful
621 debugging we clear it later in delete_omp_context. */
622 DECL_ABSTRACT_ORIGIN (field) = var;
624 insert_field_into_struct (ctx->record_type, field);
626 splay_tree_insert (ctx->field_map, (splay_tree_key) var,
627 (splay_tree_value) field);
630 static tree
631 install_var_local (tree var, omp_context *ctx)
633 tree new_var = omp_copy_decl_1 (var, ctx);
634 insert_decl_map (&ctx->cb, var, new_var);
635 return new_var;
638 /* Adjust the replacement for DECL in CTX for the new context. This means
639 copying the DECL_VALUE_EXPR, and fixing up the type. */
641 static void
642 fixup_remapped_decl (tree decl, omp_context *ctx, bool private_debug)
644 tree new_decl, size;
646 new_decl = lookup_decl (decl, ctx);
648 TREE_TYPE (new_decl) = remap_type (TREE_TYPE (decl), &ctx->cb);
650 if ((!TREE_CONSTANT (DECL_SIZE (new_decl)) || private_debug)
651 && DECL_HAS_VALUE_EXPR_P (decl))
653 tree ve = DECL_VALUE_EXPR (decl);
654 walk_tree (&ve, copy_body_r, &ctx->cb, NULL);
655 SET_DECL_VALUE_EXPR (new_decl, ve);
656 DECL_HAS_VALUE_EXPR_P (new_decl) = 1;
659 if (!TREE_CONSTANT (DECL_SIZE (new_decl)))
661 size = remap_decl (DECL_SIZE (decl), &ctx->cb);
662 if (size == error_mark_node)
663 size = TYPE_SIZE (TREE_TYPE (new_decl));
664 DECL_SIZE (new_decl) = size;
666 size = remap_decl (DECL_SIZE_UNIT (decl), &ctx->cb);
667 if (size == error_mark_node)
668 size = TYPE_SIZE_UNIT (TREE_TYPE (new_decl));
669 DECL_SIZE_UNIT (new_decl) = size;
673 /* The callback for remap_decl. Search all containing contexts for a
674 mapping of the variable; this avoids having to duplicate the splay
675 tree ahead of time. We know a mapping doesn't already exist in the
676 given context. Create new mappings to implement default semantics. */
678 static tree
679 omp_copy_decl (tree var, copy_body_data *cb)
681 omp_context *ctx = (omp_context *) cb;
682 tree new_var;
684 if (TREE_CODE (var) == LABEL_DECL)
686 new_var = create_artificial_label ();
687 DECL_CONTEXT (new_var) = current_function_decl;
688 insert_decl_map (&ctx->cb, var, new_var);
689 return new_var;
692 while (!is_parallel_ctx (ctx))
694 ctx = ctx->outer;
695 if (ctx == NULL)
696 return var;
697 new_var = maybe_lookup_decl (var, ctx);
698 if (new_var)
699 return new_var;
702 if (is_global_var (var) || decl_function_context (var) != ctx->cb.src_fn)
703 return var;
705 return error_mark_node;
709 /* Return the parallel region associated with STMT. */
711 /* Debugging dumps for parallel regions. */
712 void dump_omp_region (FILE *, struct omp_region *, int);
713 void debug_omp_region (struct omp_region *);
714 void debug_all_omp_regions (void);
716 /* Dump the parallel region tree rooted at REGION. */
718 void
719 dump_omp_region (FILE *file, struct omp_region *region, int indent)
721 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
722 tree_code_name[region->type]);
724 if (region->inner)
725 dump_omp_region (file, region->inner, indent + 4);
727 if (region->cont)
729 fprintf (file, "%*sbb %d: OMP_CONTINUE\n", indent, "",
730 region->cont->index);
733 if (region->exit)
734 fprintf (file, "%*sbb %d: OMP_RETURN\n", indent, "",
735 region->exit->index);
736 else
737 fprintf (file, "%*s[no exit marker]\n", indent, "");
739 if (region->next)
740 dump_omp_region (file, region->next, indent);
743 void
744 debug_omp_region (struct omp_region *region)
746 dump_omp_region (stderr, region, 0);
749 void
750 debug_all_omp_regions (void)
752 dump_omp_region (stderr, root_omp_region, 0);
756 /* Create a new parallel region starting at STMT inside region PARENT. */
758 struct omp_region *
759 new_omp_region (basic_block bb, enum tree_code type, struct omp_region *parent)
761 struct omp_region *region = xcalloc (1, sizeof (*region));
763 region->outer = parent;
764 region->entry = bb;
765 region->type = type;
767 if (parent)
769 /* This is a nested region. Add it to the list of inner
770 regions in PARENT. */
771 region->next = parent->inner;
772 parent->inner = region;
774 else
776 /* This is a toplevel region. Add it to the list of toplevel
777 regions in ROOT_OMP_REGION. */
778 region->next = root_omp_region;
779 root_omp_region = region;
782 return region;
785 /* Release the memory associated with the region tree rooted at REGION. */
787 static void
788 free_omp_region_1 (struct omp_region *region)
790 struct omp_region *i, *n;
792 for (i = region->inner; i ; i = n)
794 n = i->next;
795 free_omp_region_1 (i);
798 free (region);
801 /* Release the memory for the entire omp region tree. */
803 void
804 free_omp_regions (void)
806 struct omp_region *r, *n;
807 for (r = root_omp_region; r ; r = n)
809 n = r->next;
810 free_omp_region_1 (r);
812 root_omp_region = NULL;
816 /* Create a new context, with OUTER_CTX being the surrounding context. */
818 static omp_context *
819 new_omp_context (tree stmt, omp_context *outer_ctx)
821 omp_context *ctx = XCNEW (omp_context);
823 splay_tree_insert (all_contexts, (splay_tree_key) stmt,
824 (splay_tree_value) ctx);
825 ctx->stmt = stmt;
827 if (outer_ctx)
829 ctx->outer = outer_ctx;
830 ctx->cb = outer_ctx->cb;
831 ctx->cb.block = NULL;
832 ctx->depth = outer_ctx->depth + 1;
834 else
836 ctx->cb.src_fn = current_function_decl;
837 ctx->cb.dst_fn = current_function_decl;
838 ctx->cb.src_node = cgraph_node (current_function_decl);
839 ctx->cb.dst_node = ctx->cb.src_node;
840 ctx->cb.src_cfun = cfun;
841 ctx->cb.copy_decl = omp_copy_decl;
842 ctx->cb.eh_region = -1;
843 ctx->cb.transform_call_graph_edges = CB_CGE_MOVE;
844 ctx->depth = 1;
847 ctx->cb.decl_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
849 return ctx;
852 /* Destroy a omp_context data structures. Called through the splay tree
853 value delete callback. */
855 static void
856 delete_omp_context (splay_tree_value value)
858 omp_context *ctx = (omp_context *) value;
860 splay_tree_delete (ctx->cb.decl_map);
862 if (ctx->field_map)
863 splay_tree_delete (ctx->field_map);
865 /* We hijacked DECL_ABSTRACT_ORIGIN earlier. We need to clear it before
866 it produces corrupt debug information. */
867 if (ctx->record_type)
869 tree t;
870 for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t))
871 DECL_ABSTRACT_ORIGIN (t) = NULL;
874 XDELETE (ctx);
877 /* Fix up RECEIVER_DECL with a type that has been remapped to the child
878 context. */
880 static void
881 fixup_child_record_type (omp_context *ctx)
883 tree f, type = ctx->record_type;
885 /* ??? It isn't sufficient to just call remap_type here, because
886 variably_modified_type_p doesn't work the way we expect for
887 record types. Testing each field for whether it needs remapping
888 and creating a new record by hand works, however. */
889 for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
890 if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
891 break;
892 if (f)
894 tree name, new_fields = NULL;
896 type = lang_hooks.types.make_type (RECORD_TYPE);
897 name = DECL_NAME (TYPE_NAME (ctx->record_type));
898 name = build_decl (TYPE_DECL, name, type);
899 TYPE_NAME (type) = name;
901 for (f = TYPE_FIELDS (ctx->record_type); f ; f = TREE_CHAIN (f))
903 tree new_f = copy_node (f);
904 DECL_CONTEXT (new_f) = type;
905 TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &ctx->cb);
906 TREE_CHAIN (new_f) = new_fields;
907 new_fields = new_f;
909 /* Arrange to be able to look up the receiver field
910 given the sender field. */
911 splay_tree_insert (ctx->field_map, (splay_tree_key) f,
912 (splay_tree_value) new_f);
914 TYPE_FIELDS (type) = nreverse (new_fields);
915 layout_type (type);
918 TREE_TYPE (ctx->receiver_decl) = build_pointer_type (type);
921 /* Instantiate decls as necessary in CTX to satisfy the data sharing
922 specified by CLAUSES. */
924 static void
925 scan_sharing_clauses (tree clauses, omp_context *ctx)
927 tree c, decl;
928 bool scan_array_reductions = false;
930 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
932 bool by_ref;
934 switch (OMP_CLAUSE_CODE (c))
936 case OMP_CLAUSE_PRIVATE:
937 decl = OMP_CLAUSE_DECL (c);
938 if (!is_variable_sized (decl))
939 install_var_local (decl, ctx);
940 break;
942 case OMP_CLAUSE_SHARED:
943 gcc_assert (is_parallel_ctx (ctx));
944 decl = OMP_CLAUSE_DECL (c);
945 gcc_assert (!is_variable_sized (decl));
946 by_ref = use_pointer_for_field (decl, true);
947 /* Global variables don't need to be copied,
948 the receiver side will use them directly. */
949 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
950 break;
951 if (! TREE_READONLY (decl)
952 || TREE_ADDRESSABLE (decl)
953 || by_ref
954 || is_reference (decl))
956 install_var_field (decl, by_ref, ctx);
957 install_var_local (decl, ctx);
958 break;
960 /* We don't need to copy const scalar vars back. */
961 OMP_CLAUSE_SET_CODE (c, OMP_CLAUSE_FIRSTPRIVATE);
962 goto do_private;
964 case OMP_CLAUSE_LASTPRIVATE:
965 /* Let the corresponding firstprivate clause create
966 the variable. */
967 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
968 break;
969 /* FALLTHRU */
971 case OMP_CLAUSE_FIRSTPRIVATE:
972 case OMP_CLAUSE_REDUCTION:
973 decl = OMP_CLAUSE_DECL (c);
974 do_private:
975 if (is_variable_sized (decl))
976 break;
977 else if (is_parallel_ctx (ctx)
978 && ! is_global_var (maybe_lookup_decl_in_outer_ctx (decl,
979 ctx)))
981 by_ref = use_pointer_for_field (decl, false);
982 install_var_field (decl, by_ref, ctx);
984 install_var_local (decl, ctx);
985 break;
987 case OMP_CLAUSE_COPYPRIVATE:
988 if (ctx->outer)
989 scan_omp (&OMP_CLAUSE_DECL (c), ctx->outer);
990 /* FALLTHRU */
992 case OMP_CLAUSE_COPYIN:
993 decl = OMP_CLAUSE_DECL (c);
994 by_ref = use_pointer_for_field (decl, false);
995 install_var_field (decl, by_ref, ctx);
996 break;
998 case OMP_CLAUSE_DEFAULT:
999 ctx->default_kind = OMP_CLAUSE_DEFAULT_KIND (c);
1000 break;
1002 case OMP_CLAUSE_IF:
1003 case OMP_CLAUSE_NUM_THREADS:
1004 case OMP_CLAUSE_SCHEDULE:
1005 if (ctx->outer)
1006 scan_omp (&OMP_CLAUSE_OPERAND (c, 0), ctx->outer);
1007 break;
1009 case OMP_CLAUSE_NOWAIT:
1010 case OMP_CLAUSE_ORDERED:
1011 break;
1013 default:
1014 gcc_unreachable ();
1018 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
1020 switch (OMP_CLAUSE_CODE (c))
1022 case OMP_CLAUSE_LASTPRIVATE:
1023 /* Let the corresponding firstprivate clause create
1024 the variable. */
1025 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1026 break;
1027 /* FALLTHRU */
1029 case OMP_CLAUSE_PRIVATE:
1030 case OMP_CLAUSE_FIRSTPRIVATE:
1031 case OMP_CLAUSE_REDUCTION:
1032 decl = OMP_CLAUSE_DECL (c);
1033 if (is_variable_sized (decl))
1034 install_var_local (decl, ctx);
1035 fixup_remapped_decl (decl, ctx,
1036 OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
1037 && OMP_CLAUSE_PRIVATE_DEBUG (c));
1038 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
1039 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1040 scan_array_reductions = true;
1041 break;
1043 case OMP_CLAUSE_SHARED:
1044 decl = OMP_CLAUSE_DECL (c);
1045 if (! is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
1046 fixup_remapped_decl (decl, ctx, false);
1047 break;
1049 case OMP_CLAUSE_COPYPRIVATE:
1050 case OMP_CLAUSE_COPYIN:
1051 case OMP_CLAUSE_DEFAULT:
1052 case OMP_CLAUSE_IF:
1053 case OMP_CLAUSE_NUM_THREADS:
1054 case OMP_CLAUSE_SCHEDULE:
1055 case OMP_CLAUSE_NOWAIT:
1056 case OMP_CLAUSE_ORDERED:
1057 break;
1059 default:
1060 gcc_unreachable ();
1064 if (scan_array_reductions)
1065 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
1066 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
1067 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1069 scan_omp (&OMP_CLAUSE_REDUCTION_INIT (c), ctx);
1070 scan_omp (&OMP_CLAUSE_REDUCTION_MERGE (c), ctx);
1074 /* Create a new name for omp child function. Returns an identifier. */
1076 static GTY(()) unsigned int tmp_ompfn_id_num;
1078 static tree
1079 create_omp_child_function_name (void)
1081 tree name = DECL_ASSEMBLER_NAME (current_function_decl);
1082 size_t len = IDENTIFIER_LENGTH (name);
1083 char *tmp_name, *prefix;
1085 prefix = alloca (len + sizeof ("_omp_fn"));
1086 memcpy (prefix, IDENTIFIER_POINTER (name), len);
1087 strcpy (prefix + len, "_omp_fn");
1088 #ifndef NO_DOT_IN_LABEL
1089 prefix[len] = '.';
1090 #elif !defined NO_DOLLAR_IN_LABEL
1091 prefix[len] = '$';
1092 #endif
1093 ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, tmp_ompfn_id_num++);
1094 return get_identifier (tmp_name);
1097 /* Build a decl for the omp child function. It'll not contain a body
1098 yet, just the bare decl. */
1100 static void
1101 create_omp_child_function (omp_context *ctx)
1103 tree decl, type, name, t;
1105 name = create_omp_child_function_name ();
1106 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1108 decl = build_decl (FUNCTION_DECL, name, type);
1109 decl = lang_hooks.decls.pushdecl (decl);
1111 ctx->cb.dst_fn = decl;
1113 TREE_STATIC (decl) = 1;
1114 TREE_USED (decl) = 1;
1115 DECL_ARTIFICIAL (decl) = 1;
1116 DECL_IGNORED_P (decl) = 0;
1117 TREE_PUBLIC (decl) = 0;
1118 DECL_UNINLINABLE (decl) = 1;
1119 DECL_EXTERNAL (decl) = 0;
1120 DECL_CONTEXT (decl) = NULL_TREE;
1121 DECL_INITIAL (decl) = make_node (BLOCK);
1123 t = build_decl (RESULT_DECL, NULL_TREE, void_type_node);
1124 DECL_ARTIFICIAL (t) = 1;
1125 DECL_IGNORED_P (t) = 1;
1126 DECL_RESULT (decl) = t;
1128 t = build_decl (PARM_DECL, get_identifier (".omp_data_i"), ptr_type_node);
1129 DECL_ARTIFICIAL (t) = 1;
1130 DECL_ARG_TYPE (t) = ptr_type_node;
1131 DECL_CONTEXT (t) = current_function_decl;
1132 TREE_USED (t) = 1;
1133 DECL_ARGUMENTS (decl) = t;
1134 ctx->receiver_decl = t;
1136 /* Allocate memory for the function structure. The call to
1137 allocate_struct_function clobbers CFUN, so we need to restore
1138 it afterward. */
1139 allocate_struct_function (decl);
1140 DECL_SOURCE_LOCATION (decl) = EXPR_LOCATION (ctx->stmt);
1141 cfun->function_end_locus = EXPR_LOCATION (ctx->stmt);
1142 cfun = ctx->cb.src_cfun;
1146 /* Scan an OpenMP parallel directive. */
1148 static void
1149 scan_omp_parallel (tree *stmt_p, omp_context *outer_ctx)
1151 omp_context *ctx;
1152 tree name;
1154 /* Ignore parallel directives with empty bodies, unless there
1155 are copyin clauses. */
1156 if (optimize > 0
1157 && empty_body_p (OMP_PARALLEL_BODY (*stmt_p))
1158 && find_omp_clause (OMP_CLAUSES (*stmt_p), OMP_CLAUSE_COPYIN) == NULL)
1160 *stmt_p = build_empty_stmt ();
1161 return;
1164 ctx = new_omp_context (*stmt_p, outer_ctx);
1165 if (parallel_nesting_level > 1)
1166 ctx->is_nested = true;
1167 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
1168 ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
1169 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
1170 name = create_tmp_var_name (".omp_data_s");
1171 name = build_decl (TYPE_DECL, name, ctx->record_type);
1172 TYPE_NAME (ctx->record_type) = name;
1173 create_omp_child_function (ctx);
1174 OMP_PARALLEL_FN (*stmt_p) = ctx->cb.dst_fn;
1176 scan_sharing_clauses (OMP_PARALLEL_CLAUSES (*stmt_p), ctx);
1177 scan_omp (&OMP_PARALLEL_BODY (*stmt_p), ctx);
1179 if (TYPE_FIELDS (ctx->record_type) == NULL)
1180 ctx->record_type = ctx->receiver_decl = NULL;
1181 else
1183 layout_type (ctx->record_type);
1184 fixup_child_record_type (ctx);
1189 /* Scan an OpenMP loop directive. */
1191 static void
1192 scan_omp_for (tree *stmt_p, omp_context *outer_ctx)
1194 omp_context *ctx;
1195 tree stmt;
1197 stmt = *stmt_p;
1198 ctx = new_omp_context (stmt, outer_ctx);
1200 scan_sharing_clauses (OMP_FOR_CLAUSES (stmt), ctx);
1202 scan_omp (&OMP_FOR_PRE_BODY (stmt), ctx);
1203 scan_omp (&OMP_FOR_INIT (stmt), ctx);
1204 scan_omp (&OMP_FOR_COND (stmt), ctx);
1205 scan_omp (&OMP_FOR_INCR (stmt), ctx);
1206 scan_omp (&OMP_FOR_BODY (stmt), ctx);
1209 /* Scan an OpenMP sections directive. */
1211 static void
1212 scan_omp_sections (tree *stmt_p, omp_context *outer_ctx)
1214 tree stmt;
1215 omp_context *ctx;
1217 stmt = *stmt_p;
1218 ctx = new_omp_context (stmt, outer_ctx);
1219 scan_sharing_clauses (OMP_SECTIONS_CLAUSES (stmt), ctx);
1220 scan_omp (&OMP_SECTIONS_BODY (stmt), ctx);
1223 /* Scan an OpenMP single directive. */
1225 static void
1226 scan_omp_single (tree *stmt_p, omp_context *outer_ctx)
1228 tree stmt = *stmt_p;
1229 omp_context *ctx;
1230 tree name;
1232 ctx = new_omp_context (stmt, outer_ctx);
1233 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
1234 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
1235 name = create_tmp_var_name (".omp_copy_s");
1236 name = build_decl (TYPE_DECL, name, ctx->record_type);
1237 TYPE_NAME (ctx->record_type) = name;
1239 scan_sharing_clauses (OMP_SINGLE_CLAUSES (stmt), ctx);
1240 scan_omp (&OMP_SINGLE_BODY (stmt), ctx);
1242 if (TYPE_FIELDS (ctx->record_type) == NULL)
1243 ctx->record_type = NULL;
1244 else
1245 layout_type (ctx->record_type);
1249 /* Check OpenMP nesting restrictions. */
1250 static void
1251 check_omp_nesting_restrictions (tree t, omp_context *ctx)
1253 switch (TREE_CODE (t))
1255 case OMP_FOR:
1256 case OMP_SECTIONS:
1257 case OMP_SINGLE:
1258 for (; ctx != NULL; ctx = ctx->outer)
1259 switch (TREE_CODE (ctx->stmt))
1261 case OMP_FOR:
1262 case OMP_SECTIONS:
1263 case OMP_SINGLE:
1264 case OMP_ORDERED:
1265 case OMP_MASTER:
1266 warning (0, "work-sharing region may not be closely nested inside "
1267 "of work-sharing, critical, ordered or master region");
1268 return;
1269 case OMP_PARALLEL:
1270 return;
1271 default:
1272 break;
1274 break;
1275 case OMP_MASTER:
1276 for (; ctx != NULL; ctx = ctx->outer)
1277 switch (TREE_CODE (ctx->stmt))
1279 case OMP_FOR:
1280 case OMP_SECTIONS:
1281 case OMP_SINGLE:
1282 warning (0, "master region may not be closely nested inside "
1283 "of work-sharing region");
1284 return;
1285 case OMP_PARALLEL:
1286 return;
1287 default:
1288 break;
1290 break;
1291 case OMP_ORDERED:
1292 for (; ctx != NULL; ctx = ctx->outer)
1293 switch (TREE_CODE (ctx->stmt))
1295 case OMP_CRITICAL:
1296 warning (0, "ordered region may not be closely nested inside "
1297 "of critical region");
1298 return;
1299 case OMP_FOR:
1300 if (find_omp_clause (OMP_CLAUSES (ctx->stmt),
1301 OMP_CLAUSE_ORDERED) == NULL)
1302 warning (0, "ordered region must be closely nested inside "
1303 "a loop region with an ordered clause");
1304 return;
1305 case OMP_PARALLEL:
1306 return;
1307 default:
1308 break;
1310 break;
1311 case OMP_CRITICAL:
1312 for (; ctx != NULL; ctx = ctx->outer)
1313 if (TREE_CODE (ctx->stmt) == OMP_CRITICAL
1314 && OMP_CRITICAL_NAME (t) == OMP_CRITICAL_NAME (ctx->stmt))
1316 warning (0, "critical region may not be nested inside a critical "
1317 "region with the same name");
1318 return;
1320 break;
1321 default:
1322 break;
1327 /* Callback for walk_stmts used to scan for OpenMP directives at TP. */
1329 static tree
1330 scan_omp_1 (tree *tp, int *walk_subtrees, void *data)
1332 struct walk_stmt_info *wi = data;
1333 omp_context *ctx = wi->info;
1334 tree t = *tp;
1336 if (EXPR_HAS_LOCATION (t))
1337 input_location = EXPR_LOCATION (t);
1339 /* Check the OpenMP nesting restrictions. */
1340 if (OMP_DIRECTIVE_P (t) && ctx != NULL)
1341 check_omp_nesting_restrictions (t, ctx);
1343 *walk_subtrees = 0;
1344 switch (TREE_CODE (t))
1346 case OMP_PARALLEL:
1347 parallel_nesting_level++;
1348 scan_omp_parallel (tp, ctx);
1349 parallel_nesting_level--;
1350 break;
1352 case OMP_FOR:
1353 scan_omp_for (tp, ctx);
1354 break;
1356 case OMP_SECTIONS:
1357 scan_omp_sections (tp, ctx);
1358 break;
1360 case OMP_SINGLE:
1361 scan_omp_single (tp, ctx);
1362 break;
1364 case OMP_SECTION:
1365 case OMP_MASTER:
1366 case OMP_ORDERED:
1367 case OMP_CRITICAL:
1368 ctx = new_omp_context (*tp, ctx);
1369 scan_omp (&OMP_BODY (*tp), ctx);
1370 break;
1372 case BIND_EXPR:
1374 tree var;
1375 *walk_subtrees = 1;
1377 for (var = BIND_EXPR_VARS (t); var ; var = TREE_CHAIN (var))
1378 insert_decl_map (&ctx->cb, var, var);
1380 break;
1382 case VAR_DECL:
1383 case PARM_DECL:
1384 case LABEL_DECL:
1385 case RESULT_DECL:
1386 if (ctx)
1387 *tp = remap_decl (t, &ctx->cb);
1388 break;
1390 default:
1391 if (ctx && TYPE_P (t))
1392 *tp = remap_type (t, &ctx->cb);
1393 else if (!DECL_P (t))
1394 *walk_subtrees = 1;
1395 break;
1398 return NULL_TREE;
1402 /* Scan all the statements starting at STMT_P. CTX contains context
1403 information about the OpenMP directives and clauses found during
1404 the scan. */
1406 static void
1407 scan_omp (tree *stmt_p, omp_context *ctx)
1409 location_t saved_location;
1410 struct walk_stmt_info wi;
1412 memset (&wi, 0, sizeof (wi));
1413 wi.callback = scan_omp_1;
1414 wi.info = ctx;
1415 wi.want_bind_expr = (ctx != NULL);
1416 wi.want_locations = true;
1418 saved_location = input_location;
1419 walk_stmts (&wi, stmt_p);
1420 input_location = saved_location;
1423 /* Re-gimplification and code generation routines. */
1425 /* Build a call to GOMP_barrier. */
1427 static void
1428 build_omp_barrier (tree *stmt_list)
1430 tree t;
1432 t = built_in_decls[BUILT_IN_GOMP_BARRIER];
1433 t = build_function_call_expr (t, NULL);
1434 gimplify_and_add (t, stmt_list);
1437 /* If a context was created for STMT when it was scanned, return it. */
1439 static omp_context *
1440 maybe_lookup_ctx (tree stmt)
1442 splay_tree_node n;
1443 n = splay_tree_lookup (all_contexts, (splay_tree_key) stmt);
1444 return n ? (omp_context *) n->value : NULL;
1448 /* Find the mapping for DECL in CTX or the immediately enclosing
1449 context that has a mapping for DECL.
1451 If CTX is a nested parallel directive, we may have to use the decl
1452 mappings created in CTX's parent context. Suppose that we have the
1453 following parallel nesting (variable UIDs showed for clarity):
1455 iD.1562 = 0;
1456 #omp parallel shared(iD.1562) -> outer parallel
1457 iD.1562 = iD.1562 + 1;
1459 #omp parallel shared (iD.1562) -> inner parallel
1460 iD.1562 = iD.1562 - 1;
1462 Each parallel structure will create a distinct .omp_data_s structure
1463 for copying iD.1562 in/out of the directive:
1465 outer parallel .omp_data_s.1.i -> iD.1562
1466 inner parallel .omp_data_s.2.i -> iD.1562
1468 A shared variable mapping will produce a copy-out operation before
1469 the parallel directive and a copy-in operation after it. So, in
1470 this case we would have:
1472 iD.1562 = 0;
1473 .omp_data_o.1.i = iD.1562;
1474 #omp parallel shared(iD.1562) -> outer parallel
1475 .omp_data_i.1 = &.omp_data_o.1
1476 .omp_data_i.1->i = .omp_data_i.1->i + 1;
1478 .omp_data_o.2.i = iD.1562; -> **
1479 #omp parallel shared(iD.1562) -> inner parallel
1480 .omp_data_i.2 = &.omp_data_o.2
1481 .omp_data_i.2->i = .omp_data_i.2->i - 1;
1484 ** This is a problem. The symbol iD.1562 cannot be referenced
1485 inside the body of the outer parallel region. But since we are
1486 emitting this copy operation while expanding the inner parallel
1487 directive, we need to access the CTX structure of the outer
1488 parallel directive to get the correct mapping:
1490 .omp_data_o.2.i = .omp_data_i.1->i
1492 Since there may be other workshare or parallel directives enclosing
1493 the parallel directive, it may be necessary to walk up the context
1494 parent chain. This is not a problem in general because nested
1495 parallelism happens only rarely. */
1497 static tree
1498 lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
1500 tree t;
1501 omp_context *up;
1503 gcc_assert (ctx->is_nested);
1505 for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
1506 t = maybe_lookup_decl (decl, up);
1508 gcc_assert (t);
1510 return t;
1514 /* Similar to lookup_decl_in_outer_ctx, but return DECL if not found
1515 in outer contexts. */
1517 static tree
1518 maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
1520 tree t = NULL;
1521 omp_context *up;
1523 if (ctx->is_nested)
1524 for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
1525 t = maybe_lookup_decl (decl, up);
1527 return t ? t : decl;
1531 /* Construct the initialization value for reduction CLAUSE. */
1533 tree
1534 omp_reduction_init (tree clause, tree type)
1536 switch (OMP_CLAUSE_REDUCTION_CODE (clause))
1538 case PLUS_EXPR:
1539 case MINUS_EXPR:
1540 case BIT_IOR_EXPR:
1541 case BIT_XOR_EXPR:
1542 case TRUTH_OR_EXPR:
1543 case TRUTH_ORIF_EXPR:
1544 case TRUTH_XOR_EXPR:
1545 case NE_EXPR:
1546 return fold_convert (type, integer_zero_node);
1548 case MULT_EXPR:
1549 case TRUTH_AND_EXPR:
1550 case TRUTH_ANDIF_EXPR:
1551 case EQ_EXPR:
1552 return fold_convert (type, integer_one_node);
1554 case BIT_AND_EXPR:
1555 return fold_convert (type, integer_minus_one_node);
1557 case MAX_EXPR:
1558 if (SCALAR_FLOAT_TYPE_P (type))
1560 REAL_VALUE_TYPE max, min;
1561 if (HONOR_INFINITIES (TYPE_MODE (type)))
1563 real_inf (&max);
1564 real_arithmetic (&min, NEGATE_EXPR, &max, NULL);
1566 else
1567 real_maxval (&min, 1, TYPE_MODE (type));
1568 return build_real (type, min);
1570 else
1572 gcc_assert (INTEGRAL_TYPE_P (type));
1573 return TYPE_MIN_VALUE (type);
1576 case MIN_EXPR:
1577 if (SCALAR_FLOAT_TYPE_P (type))
1579 REAL_VALUE_TYPE max;
1580 if (HONOR_INFINITIES (TYPE_MODE (type)))
1581 real_inf (&max);
1582 else
1583 real_maxval (&max, 0, TYPE_MODE (type));
1584 return build_real (type, max);
1586 else
1588 gcc_assert (INTEGRAL_TYPE_P (type));
1589 return TYPE_MAX_VALUE (type);
1592 default:
1593 gcc_unreachable ();
1597 /* Generate code to implement the input clauses, FIRSTPRIVATE and COPYIN,
1598 from the receiver (aka child) side and initializers for REFERENCE_TYPE
1599 private variables. Initialization statements go in ILIST, while calls
1600 to destructors go in DLIST. */
1602 static void
1603 lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist,
1604 omp_context *ctx)
1606 tree_stmt_iterator diter;
1607 tree c, dtor, copyin_seq, x, args, ptr;
1608 bool copyin_by_ref = false;
1609 bool lastprivate_firstprivate = false;
1610 int pass;
1612 *dlist = alloc_stmt_list ();
1613 diter = tsi_start (*dlist);
1614 copyin_seq = NULL;
1616 /* Do all the fixed sized types in the first pass, and the variable sized
1617 types in the second pass. This makes sure that the scalar arguments to
1618 the variable sized types are processed before we use them in the
1619 variable sized operations. */
1620 for (pass = 0; pass < 2; ++pass)
1622 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1624 enum omp_clause_code c_kind = OMP_CLAUSE_CODE (c);
1625 tree var, new_var;
1626 bool by_ref;
1628 switch (c_kind)
1630 case OMP_CLAUSE_PRIVATE:
1631 if (OMP_CLAUSE_PRIVATE_DEBUG (c))
1632 continue;
1633 break;
1634 case OMP_CLAUSE_SHARED:
1635 if (maybe_lookup_decl (OMP_CLAUSE_DECL (c), ctx) == NULL)
1637 gcc_assert (is_global_var (OMP_CLAUSE_DECL (c)));
1638 continue;
1640 case OMP_CLAUSE_FIRSTPRIVATE:
1641 case OMP_CLAUSE_COPYIN:
1642 case OMP_CLAUSE_REDUCTION:
1643 break;
1644 case OMP_CLAUSE_LASTPRIVATE:
1645 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1647 lastprivate_firstprivate = true;
1648 if (pass != 0)
1649 continue;
1651 break;
1652 default:
1653 continue;
1656 new_var = var = OMP_CLAUSE_DECL (c);
1657 if (c_kind != OMP_CLAUSE_COPYIN)
1658 new_var = lookup_decl (var, ctx);
1660 if (c_kind == OMP_CLAUSE_SHARED || c_kind == OMP_CLAUSE_COPYIN)
1662 if (pass != 0)
1663 continue;
1665 else if (is_variable_sized (var))
1667 /* For variable sized types, we need to allocate the
1668 actual storage here. Call alloca and store the
1669 result in the pointer decl that we created elsewhere. */
1670 if (pass == 0)
1671 continue;
1673 ptr = DECL_VALUE_EXPR (new_var);
1674 gcc_assert (TREE_CODE (ptr) == INDIRECT_REF);
1675 ptr = TREE_OPERAND (ptr, 0);
1676 gcc_assert (DECL_P (ptr));
1678 x = TYPE_SIZE_UNIT (TREE_TYPE (new_var));
1679 args = tree_cons (NULL, x, NULL);
1680 x = built_in_decls[BUILT_IN_ALLOCA];
1681 x = build_function_call_expr (x, args);
1682 x = fold_convert (TREE_TYPE (ptr), x);
1683 x = build2 (GIMPLE_MODIFY_STMT, void_type_node, ptr, x);
1684 gimplify_and_add (x, ilist);
1686 else if (is_reference (var))
1688 /* For references that are being privatized for Fortran,
1689 allocate new backing storage for the new pointer
1690 variable. This allows us to avoid changing all the
1691 code that expects a pointer to something that expects
1692 a direct variable. Note that this doesn't apply to
1693 C++, since reference types are disallowed in data
1694 sharing clauses there, except for NRV optimized
1695 return values. */
1696 if (pass == 0)
1697 continue;
1699 x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var)));
1700 if (TREE_CONSTANT (x))
1702 const char *name = NULL;
1703 if (DECL_NAME (var))
1704 name = IDENTIFIER_POINTER (DECL_NAME (new_var));
1706 x = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_var)),
1707 name);
1708 gimple_add_tmp_var (x);
1709 x = build_fold_addr_expr_with_type (x, TREE_TYPE (new_var));
1711 else
1713 args = tree_cons (NULL, x, NULL);
1714 x = built_in_decls[BUILT_IN_ALLOCA];
1715 x = build_function_call_expr (x, args);
1716 x = fold_convert (TREE_TYPE (new_var), x);
1719 x = build2 (GIMPLE_MODIFY_STMT, void_type_node, new_var, x);
1720 gimplify_and_add (x, ilist);
1722 new_var = build_fold_indirect_ref (new_var);
1724 else if (c_kind == OMP_CLAUSE_REDUCTION
1725 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1727 if (pass == 0)
1728 continue;
1730 else if (pass != 0)
1731 continue;
1733 switch (OMP_CLAUSE_CODE (c))
1735 case OMP_CLAUSE_SHARED:
1736 /* Shared global vars are just accessed directly. */
1737 if (is_global_var (new_var))
1738 break;
1739 /* Set up the DECL_VALUE_EXPR for shared variables now. This
1740 needs to be delayed until after fixup_child_record_type so
1741 that we get the correct type during the dereference. */
1742 by_ref = use_pointer_for_field (var, true);
1743 x = build_receiver_ref (var, by_ref, ctx);
1744 SET_DECL_VALUE_EXPR (new_var, x);
1745 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
1747 /* ??? If VAR is not passed by reference, and the variable
1748 hasn't been initialized yet, then we'll get a warning for
1749 the store into the omp_data_s structure. Ideally, we'd be
1750 able to notice this and not store anything at all, but
1751 we're generating code too early. Suppress the warning. */
1752 if (!by_ref)
1753 TREE_NO_WARNING (var) = 1;
1754 break;
1756 case OMP_CLAUSE_LASTPRIVATE:
1757 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1758 break;
1759 /* FALLTHRU */
1761 case OMP_CLAUSE_PRIVATE:
1762 x = lang_hooks.decls.omp_clause_default_ctor (c, new_var);
1763 if (x)
1764 gimplify_and_add (x, ilist);
1765 /* FALLTHRU */
1767 do_dtor:
1768 x = lang_hooks.decls.omp_clause_dtor (c, new_var);
1769 if (x)
1771 dtor = x;
1772 gimplify_stmt (&dtor);
1773 tsi_link_before (&diter, dtor, TSI_SAME_STMT);
1775 break;
1777 case OMP_CLAUSE_FIRSTPRIVATE:
1778 x = build_outer_var_ref (var, ctx);
1779 x = lang_hooks.decls.omp_clause_copy_ctor (c, new_var, x);
1780 gimplify_and_add (x, ilist);
1781 goto do_dtor;
1782 break;
1784 case OMP_CLAUSE_COPYIN:
1785 by_ref = use_pointer_for_field (var, false);
1786 x = build_receiver_ref (var, by_ref, ctx);
1787 x = lang_hooks.decls.omp_clause_assign_op (c, new_var, x);
1788 append_to_statement_list (x, &copyin_seq);
1789 copyin_by_ref |= by_ref;
1790 break;
1792 case OMP_CLAUSE_REDUCTION:
1793 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1795 gimplify_and_add (OMP_CLAUSE_REDUCTION_INIT (c), ilist);
1796 OMP_CLAUSE_REDUCTION_INIT (c) = NULL;
1798 else
1800 x = omp_reduction_init (c, TREE_TYPE (new_var));
1801 gcc_assert (TREE_CODE (TREE_TYPE (new_var)) != ARRAY_TYPE);
1802 x = build2 (GIMPLE_MODIFY_STMT, void_type_node, new_var, x);
1803 gimplify_and_add (x, ilist);
1805 break;
1807 default:
1808 gcc_unreachable ();
1813 /* The copyin sequence is not to be executed by the main thread, since
1814 that would result in self-copies. Perhaps not visible to scalars,
1815 but it certainly is to C++ operator=. */
1816 if (copyin_seq)
1818 x = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
1819 x = build_function_call_expr (x, NULL);
1820 x = build2 (NE_EXPR, boolean_type_node, x,
1821 build_int_cst (TREE_TYPE (x), 0));
1822 x = build3 (COND_EXPR, void_type_node, x, copyin_seq, NULL);
1823 gimplify_and_add (x, ilist);
1826 /* If any copyin variable is passed by reference, we must ensure the
1827 master thread doesn't modify it before it is copied over in all
1828 threads. Similarly for variables in both firstprivate and
1829 lastprivate clauses we need to ensure the lastprivate copying
1830 happens after firstprivate copying in all threads. */
1831 if (copyin_by_ref || lastprivate_firstprivate)
1832 build_omp_barrier (ilist);
1836 /* Generate code to implement the LASTPRIVATE clauses. This is used for
1837 both parallel and workshare constructs. PREDICATE may be NULL if it's
1838 always true. */
1840 static void
1841 lower_lastprivate_clauses (tree clauses, tree predicate, tree *stmt_list,
1842 omp_context *ctx)
1844 tree sub_list, x, c;
1846 /* Early exit if there are no lastprivate clauses. */
1847 clauses = find_omp_clause (clauses, OMP_CLAUSE_LASTPRIVATE);
1848 if (clauses == NULL)
1850 /* If this was a workshare clause, see if it had been combined
1851 with its parallel. In that case, look for the clauses on the
1852 parallel statement itself. */
1853 if (is_parallel_ctx (ctx))
1854 return;
1856 ctx = ctx->outer;
1857 if (ctx == NULL || !is_parallel_ctx (ctx))
1858 return;
1860 clauses = find_omp_clause (OMP_PARALLEL_CLAUSES (ctx->stmt),
1861 OMP_CLAUSE_LASTPRIVATE);
1862 if (clauses == NULL)
1863 return;
1866 sub_list = alloc_stmt_list ();
1868 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1870 tree var, new_var;
1872 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_LASTPRIVATE)
1873 continue;
1875 var = OMP_CLAUSE_DECL (c);
1876 new_var = lookup_decl (var, ctx);
1878 x = build_outer_var_ref (var, ctx);
1879 if (is_reference (var))
1880 new_var = build_fold_indirect_ref (new_var);
1881 x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var);
1882 append_to_statement_list (x, &sub_list);
1885 if (predicate)
1886 x = build3 (COND_EXPR, void_type_node, predicate, sub_list, NULL);
1887 else
1888 x = sub_list;
1890 gimplify_and_add (x, stmt_list);
1894 /* Generate code to implement the REDUCTION clauses. */
1896 static void
1897 lower_reduction_clauses (tree clauses, tree *stmt_list, omp_context *ctx)
1899 tree sub_list = NULL, x, c;
1900 int count = 0;
1902 /* First see if there is exactly one reduction clause. Use OMP_ATOMIC
1903 update in that case, otherwise use a lock. */
1904 for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
1905 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
1907 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1909 /* Never use OMP_ATOMIC for array reductions. */
1910 count = -1;
1911 break;
1913 count++;
1916 if (count == 0)
1917 return;
1919 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1921 tree var, ref, new_var;
1922 enum tree_code code;
1924 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
1925 continue;
1927 var = OMP_CLAUSE_DECL (c);
1928 new_var = lookup_decl (var, ctx);
1929 if (is_reference (var))
1930 new_var = build_fold_indirect_ref (new_var);
1931 ref = build_outer_var_ref (var, ctx);
1932 code = OMP_CLAUSE_REDUCTION_CODE (c);
1934 /* reduction(-:var) sums up the partial results, so it acts
1935 identically to reduction(+:var). */
1936 if (code == MINUS_EXPR)
1937 code = PLUS_EXPR;
1939 if (count == 1)
1941 tree addr = build_fold_addr_expr (ref);
1943 addr = save_expr (addr);
1944 ref = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (addr)), addr);
1945 x = fold_build2 (code, TREE_TYPE (ref), ref, new_var);
1946 x = build2 (OMP_ATOMIC, void_type_node, addr, x);
1947 gimplify_and_add (x, stmt_list);
1948 return;
1951 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1953 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
1955 if (is_reference (var))
1956 ref = build_fold_addr_expr (ref);
1957 SET_DECL_VALUE_EXPR (placeholder, ref);
1958 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
1959 gimplify_and_add (OMP_CLAUSE_REDUCTION_MERGE (c), &sub_list);
1960 OMP_CLAUSE_REDUCTION_MERGE (c) = NULL;
1961 OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
1963 else
1965 x = build2 (code, TREE_TYPE (ref), ref, new_var);
1966 ref = build_outer_var_ref (var, ctx);
1967 x = build2 (GIMPLE_MODIFY_STMT, void_type_node, ref, x);
1968 append_to_statement_list (x, &sub_list);
1972 x = built_in_decls[BUILT_IN_GOMP_ATOMIC_START];
1973 x = build_function_call_expr (x, NULL);
1974 gimplify_and_add (x, stmt_list);
1976 gimplify_and_add (sub_list, stmt_list);
1978 x = built_in_decls[BUILT_IN_GOMP_ATOMIC_END];
1979 x = build_function_call_expr (x, NULL);
1980 gimplify_and_add (x, stmt_list);
1984 /* Generate code to implement the COPYPRIVATE clauses. */
1986 static void
1987 lower_copyprivate_clauses (tree clauses, tree *slist, tree *rlist,
1988 omp_context *ctx)
1990 tree c;
1992 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1994 tree var, ref, x;
1995 bool by_ref;
1997 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYPRIVATE)
1998 continue;
2000 var = OMP_CLAUSE_DECL (c);
2001 by_ref = use_pointer_for_field (var, false);
2003 ref = build_sender_ref (var, ctx);
2004 x = (ctx->is_nested) ? lookup_decl_in_outer_ctx (var, ctx) : var;
2005 x = by_ref ? build_fold_addr_expr (x) : x;
2006 x = build2 (GIMPLE_MODIFY_STMT, void_type_node, ref, x);
2007 gimplify_and_add (x, slist);
2009 ref = build_receiver_ref (var, by_ref, ctx);
2010 if (is_reference (var))
2012 ref = build_fold_indirect_ref (ref);
2013 var = build_fold_indirect_ref (var);
2015 x = lang_hooks.decls.omp_clause_assign_op (c, var, ref);
2016 gimplify_and_add (x, rlist);
2021 /* Generate code to implement the clauses, FIRSTPRIVATE, COPYIN, LASTPRIVATE,
2022 and REDUCTION from the sender (aka parent) side. */
2024 static void
2025 lower_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx)
2027 tree c;
2029 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
2031 tree val, ref, x, var;
2032 bool by_ref, do_in = false, do_out = false;
2034 switch (OMP_CLAUSE_CODE (c))
2036 case OMP_CLAUSE_FIRSTPRIVATE:
2037 case OMP_CLAUSE_COPYIN:
2038 case OMP_CLAUSE_LASTPRIVATE:
2039 case OMP_CLAUSE_REDUCTION:
2040 break;
2041 default:
2042 continue;
2045 var = val = OMP_CLAUSE_DECL (c);
2046 if (ctx->is_nested)
2047 var = lookup_decl_in_outer_ctx (val, ctx);
2049 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN
2050 && is_global_var (var))
2051 continue;
2052 if (is_variable_sized (val))
2053 continue;
2054 by_ref = use_pointer_for_field (val, false);
2056 switch (OMP_CLAUSE_CODE (c))
2058 case OMP_CLAUSE_FIRSTPRIVATE:
2059 case OMP_CLAUSE_COPYIN:
2060 do_in = true;
2061 break;
2063 case OMP_CLAUSE_LASTPRIVATE:
2064 if (by_ref || is_reference (val))
2066 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
2067 continue;
2068 do_in = true;
2070 else
2071 do_out = true;
2072 break;
2074 case OMP_CLAUSE_REDUCTION:
2075 do_in = true;
2076 do_out = !(by_ref || is_reference (val));
2077 break;
2079 default:
2080 gcc_unreachable ();
2083 if (do_in)
2085 ref = build_sender_ref (val, ctx);
2086 x = by_ref ? build_fold_addr_expr (var) : var;
2087 x = build2 (GIMPLE_MODIFY_STMT, void_type_node, ref, x);
2088 gimplify_and_add (x, ilist);
2091 if (do_out)
2093 ref = build_sender_ref (val, ctx);
2094 x = build2 (GIMPLE_MODIFY_STMT, void_type_node, var, ref);
2095 gimplify_and_add (x, olist);
2100 /* Generate code to implement SHARED from the sender (aka parent) side.
2101 This is trickier, since OMP_PARALLEL_CLAUSES doesn't list things that
2102 got automatically shared. */
2104 static void
2105 lower_send_shared_vars (tree *ilist, tree *olist, omp_context *ctx)
2107 tree var, ovar, nvar, f, x;
2109 if (ctx->record_type == NULL)
2110 return;
2112 for (f = TYPE_FIELDS (ctx->record_type); f ; f = TREE_CHAIN (f))
2114 ovar = DECL_ABSTRACT_ORIGIN (f);
2115 nvar = maybe_lookup_decl (ovar, ctx);
2116 if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar))
2117 continue;
2119 var = ovar;
2121 /* If CTX is a nested parallel directive. Find the immediately
2122 enclosing parallel or workshare construct that contains a
2123 mapping for OVAR. */
2124 if (ctx->is_nested)
2125 var = lookup_decl_in_outer_ctx (ovar, ctx);
2127 if (use_pointer_for_field (ovar, true))
2129 x = build_sender_ref (ovar, ctx);
2130 var = build_fold_addr_expr (var);
2131 x = build2 (GIMPLE_MODIFY_STMT, void_type_node, x, var);
2132 gimplify_and_add (x, ilist);
2134 else
2136 x = build_sender_ref (ovar, ctx);
2137 x = build2 (GIMPLE_MODIFY_STMT, void_type_node, x, var);
2138 gimplify_and_add (x, ilist);
2140 x = build_sender_ref (ovar, ctx);
2141 x = build2 (GIMPLE_MODIFY_STMT, void_type_node, var, x);
2142 gimplify_and_add (x, olist);
2147 /* Build the function calls to GOMP_parallel_start etc to actually
2148 generate the parallel operation. REGION is the parallel region
2149 being expanded. BB is the block where to insert the code. WS_ARGS
2150 will be set if this is a call to a combined parallel+workshare
2151 construct, it contains the list of additional arguments needed by
2152 the workshare construct. */
2154 static void
2155 expand_parallel_call (struct omp_region *region, basic_block bb,
2156 tree entry_stmt, tree ws_args)
2158 tree t, args, val, cond, c, list, clauses;
2159 block_stmt_iterator si;
2160 int start_ix;
2162 clauses = OMP_PARALLEL_CLAUSES (entry_stmt);
2163 push_gimplify_context ();
2165 /* Determine what flavor of GOMP_parallel_start we will be
2166 emitting. */
2167 start_ix = BUILT_IN_GOMP_PARALLEL_START;
2168 if (is_combined_parallel (region))
2170 switch (region->inner->type)
2172 case OMP_FOR:
2173 start_ix = BUILT_IN_GOMP_PARALLEL_LOOP_STATIC_START
2174 + region->inner->sched_kind;
2175 break;
2176 case OMP_SECTIONS:
2177 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS_START;
2178 break;
2179 default:
2180 gcc_unreachable ();
2184 /* By default, the value of NUM_THREADS is zero (selected at run time)
2185 and there is no conditional. */
2186 cond = NULL_TREE;
2187 val = build_int_cst (unsigned_type_node, 0);
2189 c = find_omp_clause (clauses, OMP_CLAUSE_IF);
2190 if (c)
2191 cond = OMP_CLAUSE_IF_EXPR (c);
2193 c = find_omp_clause (clauses, OMP_CLAUSE_NUM_THREADS);
2194 if (c)
2195 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
2197 /* Ensure 'val' is of the correct type. */
2198 val = fold_convert (unsigned_type_node, val);
2200 /* If we found the clause 'if (cond)', build either
2201 (cond != 0) or (cond ? val : 1u). */
2202 if (cond)
2204 block_stmt_iterator si;
2206 cond = gimple_boolify (cond);
2208 if (integer_zerop (val))
2209 val = build2 (EQ_EXPR, unsigned_type_node, cond,
2210 build_int_cst (TREE_TYPE (cond), 0));
2211 else
2213 basic_block cond_bb, then_bb, else_bb;
2214 edge e;
2215 tree t, then_lab, else_lab, tmp;
2217 tmp = create_tmp_var (TREE_TYPE (val), NULL);
2218 e = split_block (bb, NULL);
2219 cond_bb = e->src;
2220 bb = e->dest;
2221 remove_edge (e);
2223 then_bb = create_empty_bb (cond_bb);
2224 else_bb = create_empty_bb (then_bb);
2225 then_lab = create_artificial_label ();
2226 else_lab = create_artificial_label ();
2228 t = build3 (COND_EXPR, void_type_node,
2229 cond,
2230 build_and_jump (&then_lab),
2231 build_and_jump (&else_lab));
2233 si = bsi_start (cond_bb);
2234 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2236 si = bsi_start (then_bb);
2237 t = build1 (LABEL_EXPR, void_type_node, then_lab);
2238 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2239 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, tmp, val);
2240 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2242 si = bsi_start (else_bb);
2243 t = build1 (LABEL_EXPR, void_type_node, else_lab);
2244 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2245 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, tmp,
2246 build_int_cst (unsigned_type_node, 1));
2247 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2249 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
2250 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
2251 make_edge (then_bb, bb, EDGE_FALLTHRU);
2252 make_edge (else_bb, bb, EDGE_FALLTHRU);
2254 val = tmp;
2257 list = NULL_TREE;
2258 val = get_formal_tmp_var (val, &list);
2259 si = bsi_start (bb);
2260 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2263 list = NULL_TREE;
2264 args = tree_cons (NULL, val, NULL);
2265 t = OMP_PARALLEL_DATA_ARG (entry_stmt);
2266 if (t == NULL)
2267 t = null_pointer_node;
2268 else
2269 t = build_fold_addr_expr (t);
2270 args = tree_cons (NULL, t, args);
2271 t = build_fold_addr_expr (OMP_PARALLEL_FN (entry_stmt));
2272 args = tree_cons (NULL, t, args);
2274 if (ws_args)
2275 args = chainon (args, ws_args);
2277 t = built_in_decls[start_ix];
2278 t = build_function_call_expr (t, args);
2279 gimplify_and_add (t, &list);
2281 t = OMP_PARALLEL_DATA_ARG (entry_stmt);
2282 if (t == NULL)
2283 t = null_pointer_node;
2284 else
2285 t = build_fold_addr_expr (t);
2286 args = tree_cons (NULL, t, NULL);
2287 t = build_function_call_expr (OMP_PARALLEL_FN (entry_stmt), args);
2288 gimplify_and_add (t, &list);
2290 t = built_in_decls[BUILT_IN_GOMP_PARALLEL_END];
2291 t = build_function_call_expr (t, NULL);
2292 gimplify_and_add (t, &list);
2294 si = bsi_last (bb);
2295 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2297 pop_gimplify_context (NULL_TREE);
2301 /* If exceptions are enabled, wrap *STMT_P in a MUST_NOT_THROW catch
2302 handler. This prevents programs from violating the structured
2303 block semantics with throws. */
2305 static void
2306 maybe_catch_exception (tree *stmt_p)
2308 tree f, t;
2310 if (!flag_exceptions)
2311 return;
2313 if (lang_protect_cleanup_actions)
2314 t = lang_protect_cleanup_actions ();
2315 else
2317 t = built_in_decls[BUILT_IN_TRAP];
2318 t = build_function_call_expr (t, NULL);
2320 f = build2 (EH_FILTER_EXPR, void_type_node, NULL, NULL);
2321 EH_FILTER_MUST_NOT_THROW (f) = 1;
2322 gimplify_and_add (t, &EH_FILTER_FAILURE (f));
2324 t = build2 (TRY_CATCH_EXPR, void_type_node, *stmt_p, NULL);
2325 append_to_statement_list (f, &TREE_OPERAND (t, 1));
2327 *stmt_p = NULL;
2328 append_to_statement_list (t, stmt_p);
2331 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
2333 static tree
2334 list2chain (tree list)
2336 tree t;
2338 for (t = list; t; t = TREE_CHAIN (t))
2340 tree var = TREE_VALUE (t);
2341 if (TREE_CHAIN (t))
2342 TREE_CHAIN (var) = TREE_VALUE (TREE_CHAIN (t));
2343 else
2344 TREE_CHAIN (var) = NULL_TREE;
2347 return list ? TREE_VALUE (list) : NULL_TREE;
2351 /* Remove barriers in REGION->EXIT's block. Note that this is only
2352 valid for OMP_PARALLEL regions. Since the end of a parallel region
2353 is an implicit barrier, any workshare inside the OMP_PARALLEL that
2354 left a barrier at the end of the OMP_PARALLEL region can now be
2355 removed. */
2357 static void
2358 remove_exit_barrier (struct omp_region *region)
2360 block_stmt_iterator si;
2361 basic_block exit_bb;
2362 edge_iterator ei;
2363 edge e;
2364 tree t;
2366 exit_bb = region->exit;
2368 /* If the parallel region doesn't return, we don't have REGION->EXIT
2369 block at all. */
2370 if (! exit_bb)
2371 return;
2373 /* The last insn in the block will be the parallel's OMP_RETURN. The
2374 workshare's OMP_RETURN will be in a preceding block. The kinds of
2375 statements that can appear in between are extremely limited -- no
2376 memory operations at all. Here, we allow nothing at all, so the
2377 only thing we allow to precede this OMP_RETURN is a label. */
2378 si = bsi_last (exit_bb);
2379 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
2380 bsi_prev (&si);
2381 if (!bsi_end_p (si) && TREE_CODE (bsi_stmt (si)) != LABEL_EXPR)
2382 return;
2384 FOR_EACH_EDGE (e, ei, exit_bb->preds)
2386 si = bsi_last (e->src);
2387 if (bsi_end_p (si))
2388 continue;
2389 t = bsi_stmt (si);
2390 if (TREE_CODE (t) == OMP_RETURN)
2391 OMP_RETURN_NOWAIT (t) = 1;
2395 static void
2396 remove_exit_barriers (struct omp_region *region)
2398 if (region->type == OMP_PARALLEL)
2399 remove_exit_barrier (region);
2401 if (region->inner)
2403 region = region->inner;
2404 remove_exit_barriers (region);
2405 while (region->next)
2407 region = region->next;
2408 remove_exit_barriers (region);
2413 /* Expand the OpenMP parallel directive starting at REGION. */
2415 static void
2416 expand_omp_parallel (struct omp_region *region)
2418 basic_block entry_bb, exit_bb, new_bb;
2419 struct function *child_cfun, *saved_cfun;
2420 tree child_fn, block, t, ws_args;
2421 block_stmt_iterator si;
2422 tree entry_stmt;
2423 edge e;
2425 entry_stmt = last_stmt (region->entry);
2426 child_fn = OMP_PARALLEL_FN (entry_stmt);
2427 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
2428 saved_cfun = cfun;
2430 entry_bb = region->entry;
2431 exit_bb = region->exit;
2433 if (is_combined_parallel (region))
2434 ws_args = region->ws_args;
2435 else
2436 ws_args = NULL_TREE;
2438 if (child_cfun->cfg)
2440 /* Due to inlining, it may happen that we have already outlined
2441 the region, in which case all we need to do is make the
2442 sub-graph unreachable and emit the parallel call. */
2443 edge entry_succ_e, exit_succ_e;
2444 block_stmt_iterator si;
2446 entry_succ_e = single_succ_edge (entry_bb);
2448 si = bsi_last (entry_bb);
2449 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_PARALLEL);
2450 bsi_remove (&si, true);
2452 new_bb = entry_bb;
2453 remove_edge (entry_succ_e);
2454 if (exit_bb)
2456 exit_succ_e = single_succ_edge (exit_bb);
2457 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
2460 else
2462 /* If the parallel region needs data sent from the parent
2463 function, then the very first statement (except possible
2464 tree profile counter updates) of the parallel body
2465 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
2466 &.OMP_DATA_O is passed as an argument to the child function,
2467 we need to replace it with the argument as seen by the child
2468 function.
2470 In most cases, this will end up being the identity assignment
2471 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
2472 a function call that has been inlined, the original PARM_DECL
2473 .OMP_DATA_I may have been converted into a different local
2474 variable. In which case, we need to keep the assignment. */
2475 if (OMP_PARALLEL_DATA_ARG (entry_stmt))
2477 basic_block entry_succ_bb = single_succ (entry_bb);
2478 block_stmt_iterator si;
2480 for (si = bsi_start (entry_succ_bb); ; bsi_next (&si))
2482 tree stmt, arg;
2484 gcc_assert (!bsi_end_p (si));
2485 stmt = bsi_stmt (si);
2486 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2487 continue;
2489 arg = GIMPLE_STMT_OPERAND (stmt, 1);
2490 STRIP_NOPS (arg);
2491 if (TREE_CODE (arg) == ADDR_EXPR
2492 && TREE_OPERAND (arg, 0)
2493 == OMP_PARALLEL_DATA_ARG (entry_stmt))
2495 if (GIMPLE_STMT_OPERAND (stmt, 0)
2496 == DECL_ARGUMENTS (child_fn))
2497 bsi_remove (&si, true);
2498 else
2499 GIMPLE_STMT_OPERAND (stmt, 1) = DECL_ARGUMENTS (child_fn);
2500 break;
2505 /* Declare local variables needed in CHILD_CFUN. */
2506 block = DECL_INITIAL (child_fn);
2507 BLOCK_VARS (block) = list2chain (child_cfun->unexpanded_var_list);
2508 DECL_SAVED_TREE (child_fn) = single_succ (entry_bb)->stmt_list;
2510 /* Reset DECL_CONTEXT on locals and function arguments. */
2511 for (t = BLOCK_VARS (block); t; t = TREE_CHAIN (t))
2512 DECL_CONTEXT (t) = child_fn;
2514 for (t = DECL_ARGUMENTS (child_fn); t; t = TREE_CHAIN (t))
2515 DECL_CONTEXT (t) = child_fn;
2517 /* Split ENTRY_BB at OMP_PARALLEL so that it can be moved to the
2518 child function. */
2519 si = bsi_last (entry_bb);
2520 t = bsi_stmt (si);
2521 gcc_assert (t && TREE_CODE (t) == OMP_PARALLEL);
2522 bsi_remove (&si, true);
2523 e = split_block (entry_bb, t);
2524 entry_bb = e->dest;
2525 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
2527 /* Move the parallel region into CHILD_CFUN. We need to reset
2528 dominance information because the expansion of the inner
2529 regions has invalidated it. */
2530 free_dominance_info (CDI_DOMINATORS);
2531 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb);
2532 if (exit_bb)
2533 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
2534 DECL_STRUCT_FUNCTION (child_fn)->curr_properties
2535 = cfun->curr_properties;
2536 cgraph_add_new_function (child_fn, true);
2538 /* Convert OMP_RETURN into a RETURN_EXPR. */
2539 if (exit_bb)
2541 si = bsi_last (exit_bb);
2542 gcc_assert (!bsi_end_p (si)
2543 && TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
2544 t = build1 (RETURN_EXPR, void_type_node, NULL);
2545 bsi_insert_after (&si, t, BSI_SAME_STMT);
2546 bsi_remove (&si, true);
2550 /* Emit a library call to launch the children threads. */
2551 expand_parallel_call (region, new_bb, entry_stmt, ws_args);
2555 /* A subroutine of expand_omp_for. Generate code for a parallel
2556 loop with any schedule. Given parameters:
2558 for (V = N1; V cond N2; V += STEP) BODY;
2560 where COND is "<" or ">", we generate pseudocode
2562 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2563 if (more) goto L0; else goto L3;
2565 V = istart0;
2566 iend = iend0;
2568 BODY;
2569 V += STEP;
2570 if (V cond iend) goto L1; else goto L2;
2572 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2575 If this is a combined omp parallel loop, instead of the call to
2576 GOMP_loop_foo_start, we emit 'goto L3'. */
2578 static void
2579 expand_omp_for_generic (struct omp_region *region,
2580 struct omp_for_data *fd,
2581 enum built_in_function start_fn,
2582 enum built_in_function next_fn)
2584 tree l0, l1, l2 = NULL, l3 = NULL;
2585 tree type, istart0, iend0, iend;
2586 tree t, args, list;
2587 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb;
2588 basic_block l2_bb = NULL, l3_bb = NULL;
2589 block_stmt_iterator si;
2590 bool in_combined_parallel = is_combined_parallel (region);
2592 type = TREE_TYPE (fd->v);
2594 istart0 = create_tmp_var (long_integer_type_node, ".istart0");
2595 iend0 = create_tmp_var (long_integer_type_node, ".iend0");
2596 iend = create_tmp_var (type, NULL);
2597 TREE_ADDRESSABLE (istart0) = 1;
2598 TREE_ADDRESSABLE (iend0) = 1;
2600 gcc_assert ((region->cont != NULL) ^ (region->exit == NULL));
2602 entry_bb = region->entry;
2603 l0_bb = create_empty_bb (entry_bb);
2604 l1_bb = single_succ (entry_bb);
2606 l0 = tree_block_label (l0_bb);
2607 l1 = tree_block_label (l1_bb);
2609 cont_bb = region->cont;
2610 exit_bb = region->exit;
2611 if (cont_bb)
2613 l2_bb = create_empty_bb (cont_bb);
2614 l3_bb = single_succ (cont_bb);
2616 l2 = tree_block_label (l2_bb);
2617 l3 = tree_block_label (l3_bb);
2620 si = bsi_last (entry_bb);
2621 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
2622 if (!in_combined_parallel)
2624 /* If this is not a combined parallel loop, emit a call to
2625 GOMP_loop_foo_start in ENTRY_BB. */
2626 list = alloc_stmt_list ();
2627 t = build_fold_addr_expr (iend0);
2628 args = tree_cons (NULL, t, NULL);
2629 t = build_fold_addr_expr (istart0);
2630 args = tree_cons (NULL, t, args);
2631 if (fd->chunk_size)
2633 t = fold_convert (long_integer_type_node, fd->chunk_size);
2634 args = tree_cons (NULL, t, args);
2636 t = fold_convert (long_integer_type_node, fd->step);
2637 args = tree_cons (NULL, t, args);
2638 t = fold_convert (long_integer_type_node, fd->n2);
2639 args = tree_cons (NULL, t, args);
2640 t = fold_convert (long_integer_type_node, fd->n1);
2641 args = tree_cons (NULL, t, args);
2642 t = build_function_call_expr (built_in_decls[start_fn], args);
2643 t = get_formal_tmp_var (t, &list);
2644 if (cont_bb)
2646 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0),
2647 build_and_jump (&l3));
2648 append_to_statement_list (t, &list);
2650 bsi_insert_after (&si, list, BSI_SAME_STMT);
2652 bsi_remove (&si, true);
2654 /* Iteration setup for sequential loop goes in L0_BB. */
2655 list = alloc_stmt_list ();
2656 t = fold_convert (type, istart0);
2657 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, fd->v, t);
2658 gimplify_and_add (t, &list);
2660 t = fold_convert (type, iend0);
2661 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, iend, t);
2662 gimplify_and_add (t, &list);
2664 si = bsi_start (l0_bb);
2665 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2667 /* Handle the rare case where BODY doesn't ever return. */
2668 if (cont_bb == NULL)
2670 remove_edge (single_succ_edge (entry_bb));
2671 make_edge (entry_bb, l0_bb, EDGE_FALLTHRU);
2672 make_edge (l0_bb, l1_bb, EDGE_FALLTHRU);
2673 return;
2676 /* Code to control the increment and predicate for the sequential
2677 loop goes in the first half of EXIT_BB (we split EXIT_BB so
2678 that we can inherit all the edges going out of the loop
2679 body). */
2680 list = alloc_stmt_list ();
2682 t = build2 (PLUS_EXPR, type, fd->v, fd->step);
2683 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, fd->v, t);
2684 gimplify_and_add (t, &list);
2686 t = build2 (fd->cond_code, boolean_type_node, fd->v, iend);
2687 t = get_formal_tmp_var (t, &list);
2688 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1),
2689 build_and_jump (&l2));
2690 append_to_statement_list (t, &list);
2692 si = bsi_last (cont_bb);
2693 bsi_insert_after (&si, list, BSI_SAME_STMT);
2694 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
2695 bsi_remove (&si, true);
2697 /* Emit code to get the next parallel iteration in L2_BB. */
2698 list = alloc_stmt_list ();
2700 t = build_fold_addr_expr (iend0);
2701 args = tree_cons (NULL, t, NULL);
2702 t = build_fold_addr_expr (istart0);
2703 args = tree_cons (NULL, t, args);
2704 t = build_function_call_expr (built_in_decls[next_fn], args);
2705 t = get_formal_tmp_var (t, &list);
2706 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0),
2707 build_and_jump (&l3));
2708 append_to_statement_list (t, &list);
2710 si = bsi_start (l2_bb);
2711 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2713 /* Add the loop cleanup function. */
2714 si = bsi_last (exit_bb);
2715 if (OMP_RETURN_NOWAIT (bsi_stmt (si)))
2716 t = built_in_decls[BUILT_IN_GOMP_LOOP_END_NOWAIT];
2717 else
2718 t = built_in_decls[BUILT_IN_GOMP_LOOP_END];
2719 t = build_function_call_expr (t, NULL);
2720 bsi_insert_after (&si, t, BSI_SAME_STMT);
2721 bsi_remove (&si, true);
2723 /* Connect the new blocks. */
2724 remove_edge (single_succ_edge (entry_bb));
2725 if (in_combined_parallel)
2726 make_edge (entry_bb, l2_bb, EDGE_FALLTHRU);
2727 else
2729 make_edge (entry_bb, l0_bb, EDGE_TRUE_VALUE);
2730 make_edge (entry_bb, l3_bb, EDGE_FALSE_VALUE);
2733 make_edge (l0_bb, l1_bb, EDGE_FALLTHRU);
2735 remove_edge (single_succ_edge (cont_bb));
2736 make_edge (cont_bb, l1_bb, EDGE_TRUE_VALUE);
2737 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
2739 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
2740 make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
2744 /* A subroutine of expand_omp_for. Generate code for a parallel
2745 loop with static schedule and no specified chunk size. Given
2746 parameters:
2748 for (V = N1; V cond N2; V += STEP) BODY;
2750 where COND is "<" or ">", we generate pseudocode
2752 if (cond is <)
2753 adj = STEP - 1;
2754 else
2755 adj = STEP + 1;
2756 n = (adj + N2 - N1) / STEP;
2757 q = n / nthreads;
2758 q += (q * nthreads != n);
2759 s0 = q * threadid;
2760 e0 = min(s0 + q, n);
2761 if (s0 >= e0) goto L2; else goto L0;
2763 V = s0 * STEP + N1;
2764 e = e0 * STEP + N1;
2766 BODY;
2767 V += STEP;
2768 if (V cond e) goto L1;
2772 static void
2773 expand_omp_for_static_nochunk (struct omp_region *region,
2774 struct omp_for_data *fd)
2776 tree l0, l1, l2, n, q, s0, e0, e, t, nthreads, threadid;
2777 tree type, list;
2778 basic_block entry_bb, exit_bb, seq_start_bb, body_bb, cont_bb;
2779 basic_block fin_bb;
2780 block_stmt_iterator si;
2782 type = TREE_TYPE (fd->v);
2784 entry_bb = region->entry;
2785 seq_start_bb = create_empty_bb (entry_bb);
2786 body_bb = single_succ (entry_bb);
2787 cont_bb = region->cont;
2788 fin_bb = single_succ (cont_bb);
2789 exit_bb = region->exit;
2791 l0 = tree_block_label (seq_start_bb);
2792 l1 = tree_block_label (body_bb);
2793 l2 = tree_block_label (fin_bb);
2795 /* Iteration space partitioning goes in ENTRY_BB. */
2796 list = alloc_stmt_list ();
2798 t = built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS];
2799 t = build_function_call_expr (t, NULL);
2800 t = fold_convert (type, t);
2801 nthreads = get_formal_tmp_var (t, &list);
2803 t = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
2804 t = build_function_call_expr (t, NULL);
2805 t = fold_convert (type, t);
2806 threadid = get_formal_tmp_var (t, &list);
2808 fd->n1 = fold_convert (type, fd->n1);
2809 if (!is_gimple_val (fd->n1))
2810 fd->n1 = get_formal_tmp_var (fd->n1, &list);
2812 fd->n2 = fold_convert (type, fd->n2);
2813 if (!is_gimple_val (fd->n2))
2814 fd->n2 = get_formal_tmp_var (fd->n2, &list);
2816 fd->step = fold_convert (type, fd->step);
2817 if (!is_gimple_val (fd->step))
2818 fd->step = get_formal_tmp_var (fd->step, &list);
2820 t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1));
2821 t = fold_build2 (PLUS_EXPR, type, fd->step, t);
2822 t = fold_build2 (PLUS_EXPR, type, t, fd->n2);
2823 t = fold_build2 (MINUS_EXPR, type, t, fd->n1);
2824 t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step);
2825 t = fold_convert (type, t);
2826 if (is_gimple_val (t))
2827 n = t;
2828 else
2829 n = get_formal_tmp_var (t, &list);
2831 t = build2 (TRUNC_DIV_EXPR, type, n, nthreads);
2832 q = get_formal_tmp_var (t, &list);
2834 t = build2 (MULT_EXPR, type, q, nthreads);
2835 t = build2 (NE_EXPR, type, t, n);
2836 t = build2 (PLUS_EXPR, type, q, t);
2837 q = get_formal_tmp_var (t, &list);
2839 t = build2 (MULT_EXPR, type, q, threadid);
2840 s0 = get_formal_tmp_var (t, &list);
2842 t = build2 (PLUS_EXPR, type, s0, q);
2843 t = build2 (MIN_EXPR, type, t, n);
2844 e0 = get_formal_tmp_var (t, &list);
2846 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
2847 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l2),
2848 build_and_jump (&l0));
2849 append_to_statement_list (t, &list);
2851 si = bsi_last (entry_bb);
2852 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
2853 bsi_insert_after (&si, list, BSI_SAME_STMT);
2854 bsi_remove (&si, true);
2856 /* Setup code for sequential iteration goes in SEQ_START_BB. */
2857 list = alloc_stmt_list ();
2859 t = fold_convert (type, s0);
2860 t = build2 (MULT_EXPR, type, t, fd->step);
2861 t = build2 (PLUS_EXPR, type, t, fd->n1);
2862 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, fd->v, t);
2863 gimplify_and_add (t, &list);
2865 t = fold_convert (type, e0);
2866 t = build2 (MULT_EXPR, type, t, fd->step);
2867 t = build2 (PLUS_EXPR, type, t, fd->n1);
2868 e = get_formal_tmp_var (t, &list);
2870 si = bsi_start (seq_start_bb);
2871 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
2873 /* The code controlling the sequential loop replaces the OMP_CONTINUE. */
2874 list = alloc_stmt_list ();
2876 t = build2 (PLUS_EXPR, type, fd->v, fd->step);
2877 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, fd->v, t);
2878 gimplify_and_add (t, &list);
2880 t = build2 (fd->cond_code, boolean_type_node, fd->v, e);
2881 t = get_formal_tmp_var (t, &list);
2882 t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1),
2883 build_and_jump (&l2));
2884 append_to_statement_list (t, &list);
2886 si = bsi_last (cont_bb);
2887 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
2888 bsi_insert_after (&si, list, BSI_SAME_STMT);
2889 bsi_remove (&si, true);
2891 /* Replace the OMP_RETURN with a barrier, or nothing. */
2892 si = bsi_last (exit_bb);
2893 if (!OMP_RETURN_NOWAIT (bsi_stmt (si)))
2895 list = alloc_stmt_list ();
2896 build_omp_barrier (&list);
2897 bsi_insert_after (&si, list, BSI_SAME_STMT);
2899 bsi_remove (&si, true);
2901 /* Connect all the blocks. */
2902 make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU);
2904 remove_edge (single_succ_edge (entry_bb));
2905 make_edge (entry_bb, fin_bb, EDGE_TRUE_VALUE);
2906 make_edge (entry_bb, seq_start_bb, EDGE_FALSE_VALUE);
2908 make_edge (cont_bb, body_bb, EDGE_TRUE_VALUE);
2909 find_edge (cont_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
2913 /* A subroutine of expand_omp_for. Generate code for a parallel
2914 loop with static schedule and a specified chunk size. Given
2915 parameters:
2917 for (V = N1; V cond N2; V += STEP) BODY;
2919 where COND is "<" or ">", we generate pseudocode
2921 if (cond is <)
2922 adj = STEP - 1;
2923 else
2924 adj = STEP + 1;
2925 n = (adj + N2 - N1) / STEP;
2926 trip = 0;
2928 s0 = (trip * nthreads + threadid) * CHUNK;
2929 e0 = min(s0 + CHUNK, n);
2930 if (s0 < n) goto L1; else goto L4;
2932 V = s0 * STEP + N1;
2933 e = e0 * STEP + N1;
2935 BODY;
2936 V += STEP;
2937 if (V cond e) goto L2; else goto L3;
2939 trip += 1;
2940 goto L0;
2944 static void
2945 expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
2947 tree l0, l1, l2, l3, l4, n, s0, e0, e, t;
2948 tree trip, nthreads, threadid;
2949 tree type;
2950 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
2951 basic_block trip_update_bb, cont_bb, fin_bb;
2952 tree list;
2953 block_stmt_iterator si;
2955 type = TREE_TYPE (fd->v);
2957 entry_bb = region->entry;
2958 iter_part_bb = create_empty_bb (entry_bb);
2959 seq_start_bb = create_empty_bb (iter_part_bb);
2960 body_bb = single_succ (entry_bb);
2961 cont_bb = region->cont;
2962 trip_update_bb = create_empty_bb (cont_bb);
2963 fin_bb = single_succ (cont_bb);
2964 exit_bb = region->exit;
2966 l0 = tree_block_label (iter_part_bb);
2967 l1 = tree_block_label (seq_start_bb);
2968 l2 = tree_block_label (body_bb);
2969 l3 = tree_block_label (trip_update_bb);
2970 l4 = tree_block_label (fin_bb);
2972 /* Trip and adjustment setup goes in ENTRY_BB. */
2973 list = alloc_stmt_list ();
2975 t = built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS];
2976 t = build_function_call_expr (t, NULL);
2977 t = fold_convert (type, t);
2978 nthreads = get_formal_tmp_var (t, &list);
2980 t = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
2981 t = build_function_call_expr (t, NULL);
2982 t = fold_convert (type, t);
2983 threadid = get_formal_tmp_var (t, &list);
2985 fd->n1 = fold_convert (type, fd->n1);
2986 if (!is_gimple_val (fd->n1))
2987 fd->n1 = get_formal_tmp_var (fd->n1, &list);
2989 fd->n2 = fold_convert (type, fd->n2);
2990 if (!is_gimple_val (fd->n2))
2991 fd->n2 = get_formal_tmp_var (fd->n2, &list);
2993 fd->step = fold_convert (type, fd->step);
2994 if (!is_gimple_val (fd->step))
2995 fd->step = get_formal_tmp_var (fd->step, &list);
2997 fd->chunk_size = fold_convert (type, fd->chunk_size);
2998 if (!is_gimple_val (fd->chunk_size))
2999 fd->chunk_size = get_formal_tmp_var (fd->chunk_size, &list);
3001 t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1));
3002 t = fold_build2 (PLUS_EXPR, type, fd->step, t);
3003 t = fold_build2 (PLUS_EXPR, type, t, fd->n2);
3004 t = fold_build2 (MINUS_EXPR, type, t, fd->n1);
3005 t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step);
3006 t = fold_convert (type, t);
3007 if (is_gimple_val (t))
3008 n = t;
3009 else
3010 n = get_formal_tmp_var (t, &list);
3012 t = build_int_cst (type, 0);
3013 trip = get_initialized_tmp_var (t, &list, NULL);
3015 si = bsi_last (entry_bb);
3016 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
3017 bsi_insert_after (&si, list, BSI_SAME_STMT);
3018 bsi_remove (&si, true);
3020 /* Iteration space partitioning goes in ITER_PART_BB. */
3021 list = alloc_stmt_list ();
3023 t = build2 (MULT_EXPR, type, trip, nthreads);
3024 t = build2 (PLUS_EXPR, type, t, threadid);
3025 t = build2 (MULT_EXPR, type, t, fd->chunk_size);
3026 s0 = get_formal_tmp_var (t, &list);
3028 t = build2 (PLUS_EXPR, type, s0, fd->chunk_size);
3029 t = build2 (MIN_EXPR, type, t, n);
3030 e0 = get_formal_tmp_var (t, &list);
3032 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3033 t = build3 (COND_EXPR, void_type_node, t,
3034 build_and_jump (&l1), build_and_jump (&l4));
3035 append_to_statement_list (t, &list);
3037 si = bsi_start (iter_part_bb);
3038 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
3040 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3041 list = alloc_stmt_list ();
3043 t = fold_convert (type, s0);
3044 t = build2 (MULT_EXPR, type, t, fd->step);
3045 t = build2 (PLUS_EXPR, type, t, fd->n1);
3046 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, fd->v, t);
3047 gimplify_and_add (t, &list);
3049 t = fold_convert (type, e0);
3050 t = build2 (MULT_EXPR, type, t, fd->step);
3051 t = build2 (PLUS_EXPR, type, t, fd->n1);
3052 e = get_formal_tmp_var (t, &list);
3054 si = bsi_start (seq_start_bb);
3055 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
3057 /* The code controlling the sequential loop goes in CONT_BB,
3058 replacing the OMP_CONTINUE. */
3059 list = alloc_stmt_list ();
3061 t = build2 (PLUS_EXPR, type, fd->v, fd->step);
3062 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, fd->v, t);
3063 gimplify_and_add (t, &list);
3065 t = build2 (fd->cond_code, boolean_type_node, fd->v, e);
3066 t = get_formal_tmp_var (t, &list);
3067 t = build3 (COND_EXPR, void_type_node, t,
3068 build_and_jump (&l2), build_and_jump (&l3));
3069 append_to_statement_list (t, &list);
3071 si = bsi_last (cont_bb);
3072 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
3073 bsi_insert_after (&si, list, BSI_SAME_STMT);
3074 bsi_remove (&si, true);
3076 /* Trip update code goes into TRIP_UPDATE_BB. */
3077 list = alloc_stmt_list ();
3079 t = build_int_cst (type, 1);
3080 t = build2 (PLUS_EXPR, type, trip, t);
3081 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, trip, t);
3082 gimplify_and_add (t, &list);
3084 si = bsi_start (trip_update_bb);
3085 bsi_insert_after (&si, list, BSI_CONTINUE_LINKING);
3087 /* Replace the OMP_RETURN with a barrier, or nothing. */
3088 si = bsi_last (exit_bb);
3089 if (!OMP_RETURN_NOWAIT (bsi_stmt (si)))
3091 list = alloc_stmt_list ();
3092 build_omp_barrier (&list);
3093 bsi_insert_after (&si, list, BSI_SAME_STMT);
3095 bsi_remove (&si, true);
3097 /* Connect the new blocks. */
3098 remove_edge (single_succ_edge (entry_bb));
3099 make_edge (entry_bb, iter_part_bb, EDGE_FALLTHRU);
3101 make_edge (iter_part_bb, seq_start_bb, EDGE_TRUE_VALUE);
3102 make_edge (iter_part_bb, fin_bb, EDGE_FALSE_VALUE);
3104 make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU);
3106 remove_edge (single_succ_edge (cont_bb));
3107 make_edge (cont_bb, body_bb, EDGE_TRUE_VALUE);
3108 make_edge (cont_bb, trip_update_bb, EDGE_FALSE_VALUE);
3110 make_edge (trip_update_bb, iter_part_bb, EDGE_FALLTHRU);
3114 /* Expand the OpenMP loop defined by REGION. */
3116 static void
3117 expand_omp_for (struct omp_region *region)
3119 struct omp_for_data fd;
3121 push_gimplify_context ();
3123 extract_omp_for_data (last_stmt (region->entry), &fd);
3124 region->sched_kind = fd.sched_kind;
3126 if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
3127 && !fd.have_ordered
3128 && region->cont
3129 && region->exit)
3131 if (fd.chunk_size == NULL)
3132 expand_omp_for_static_nochunk (region, &fd);
3133 else
3134 expand_omp_for_static_chunk (region, &fd);
3136 else
3138 int fn_index = fd.sched_kind + fd.have_ordered * 4;
3139 int start_ix = BUILT_IN_GOMP_LOOP_STATIC_START + fn_index;
3140 int next_ix = BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index;
3141 expand_omp_for_generic (region, &fd, start_ix, next_ix);
3144 pop_gimplify_context (NULL);
3148 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
3150 v = GOMP_sections_start (n);
3152 switch (v)
3154 case 0:
3155 goto L2;
3156 case 1:
3157 section 1;
3158 goto L1;
3159 case 2:
3161 case n:
3163 default:
3164 abort ();
3167 v = GOMP_sections_next ();
3168 goto L0;
3170 reduction;
3172 If this is a combined parallel sections, replace the call to
3173 GOMP_sections_start with 'goto L1'. */
3175 static void
3176 expand_omp_sections (struct omp_region *region)
3178 tree label_vec, l0, l1, l2, t, u, v, sections_stmt;
3179 unsigned i, len;
3180 basic_block entry_bb, exit_bb, l0_bb, l1_bb, l2_bb, default_bb;
3181 block_stmt_iterator si;
3182 struct omp_region *inner;
3183 edge e;
3185 entry_bb = region->entry;
3186 l0_bb = create_empty_bb (entry_bb);
3187 l0 = tree_block_label (l0_bb);
3189 gcc_assert ((region->cont != NULL) ^ (region->exit == NULL));
3190 l1_bb = region->cont;
3191 if (l1_bb)
3193 l2_bb = single_succ (l1_bb);
3194 default_bb = create_empty_bb (l1_bb->prev_bb);
3196 l1 = tree_block_label (l1_bb);
3198 else
3200 l2_bb = create_empty_bb (l0_bb);
3201 default_bb = l2_bb;
3203 l1 = NULL;
3205 l2 = tree_block_label (l2_bb);
3207 exit_bb = region->exit;
3209 v = create_tmp_var (unsigned_type_node, ".section");
3211 /* We will build a switch() with enough cases for all the
3212 OMP_SECTION regions, a '0' case to handle the end of more work
3213 and a default case to abort if something goes wrong. */
3214 len = EDGE_COUNT (entry_bb->succs);
3215 label_vec = make_tree_vec (len + 2);
3217 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
3218 OMP_SECTIONS statement. */
3219 si = bsi_last (entry_bb);
3220 sections_stmt = bsi_stmt (si);
3221 gcc_assert (TREE_CODE (sections_stmt) == OMP_SECTIONS);
3222 if (!is_combined_parallel (region))
3224 /* If we are not inside a combined parallel+sections region,
3225 call GOMP_sections_start. */
3226 t = build_int_cst (unsigned_type_node, len);
3227 t = tree_cons (NULL, t, NULL);
3228 u = built_in_decls[BUILT_IN_GOMP_SECTIONS_START];
3229 t = build_function_call_expr (u, t);
3230 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, v, t);
3231 bsi_insert_after (&si, t, BSI_SAME_STMT);
3233 bsi_remove (&si, true);
3235 /* The switch() statement replacing OMP_SECTIONS goes in L0_BB. */
3236 si = bsi_start (l0_bb);
3238 t = build3 (SWITCH_EXPR, void_type_node, v, NULL, label_vec);
3239 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3241 t = build3 (CASE_LABEL_EXPR, void_type_node,
3242 build_int_cst (unsigned_type_node, 0), NULL, l2);
3243 TREE_VEC_ELT (label_vec, 0) = t;
3244 make_edge (l0_bb, l2_bb, 0);
3246 /* Convert each OMP_SECTION into a CASE_LABEL_EXPR. */
3247 for (inner = region->inner, i = 1; inner; inner = inner->next, ++i)
3249 basic_block s_entry_bb, s_exit_bb;
3251 s_entry_bb = inner->entry;
3252 s_exit_bb = inner->exit;
3254 t = tree_block_label (s_entry_bb);
3255 u = build_int_cst (unsigned_type_node, i);
3256 u = build3 (CASE_LABEL_EXPR, void_type_node, u, NULL, t);
3257 TREE_VEC_ELT (label_vec, i) = u;
3259 si = bsi_last (s_entry_bb);
3260 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SECTION);
3261 gcc_assert (i < len || OMP_SECTION_LAST (bsi_stmt (si)));
3262 bsi_remove (&si, true);
3264 e = single_pred_edge (s_entry_bb);
3265 e->flags = 0;
3266 redirect_edge_pred (e, l0_bb);
3268 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
3270 if (s_exit_bb == NULL)
3271 continue;
3273 si = bsi_last (s_exit_bb);
3274 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
3275 bsi_remove (&si, true);
3277 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
3280 /* Error handling code goes in DEFAULT_BB. */
3281 t = tree_block_label (default_bb);
3282 u = build3 (CASE_LABEL_EXPR, void_type_node, NULL, NULL, t);
3283 TREE_VEC_ELT (label_vec, len + 1) = u;
3284 make_edge (l0_bb, default_bb, 0);
3286 si = bsi_start (default_bb);
3287 t = built_in_decls[BUILT_IN_TRAP];
3288 t = build_function_call_expr (t, NULL);
3289 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3291 /* Code to get the next section goes in L1_BB. */
3292 if (l1_bb)
3294 si = bsi_last (l1_bb);
3295 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
3297 t = built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT];
3298 t = build_function_call_expr (t, NULL);
3299 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, v, t);
3300 bsi_insert_after (&si, t, BSI_SAME_STMT);
3301 bsi_remove (&si, true);
3304 /* Cleanup function replaces OMP_RETURN in EXIT_BB. */
3305 if (exit_bb)
3307 si = bsi_last (exit_bb);
3308 if (OMP_RETURN_NOWAIT (bsi_stmt (si)))
3309 t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END_NOWAIT];
3310 else
3311 t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END];
3312 t = build_function_call_expr (t, NULL);
3313 bsi_insert_after (&si, t, BSI_SAME_STMT);
3314 bsi_remove (&si, true);
3317 /* Connect the new blocks. */
3318 if (is_combined_parallel (region))
3320 /* If this was a combined parallel+sections region, we did not
3321 emit a GOMP_sections_start in the entry block, so we just
3322 need to jump to L1_BB to get the next section. */
3323 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
3325 else
3326 make_edge (entry_bb, l0_bb, EDGE_FALLTHRU);
3328 if (l1_bb)
3330 e = single_succ_edge (l1_bb);
3331 redirect_edge_succ (e, l0_bb);
3332 e->flags = EDGE_FALLTHRU;
3337 /* Expand code for an OpenMP single directive. We've already expanded
3338 much of the code, here we simply place the GOMP_barrier call. */
3340 static void
3341 expand_omp_single (struct omp_region *region)
3343 basic_block entry_bb, exit_bb;
3344 block_stmt_iterator si;
3345 bool need_barrier = false;
3347 entry_bb = region->entry;
3348 exit_bb = region->exit;
3350 si = bsi_last (entry_bb);
3351 /* The terminal barrier at the end of a GOMP_single_copy sequence cannot
3352 be removed. We need to ensure that the thread that entered the single
3353 does not exit before the data is copied out by the other threads. */
3354 if (find_omp_clause (OMP_SINGLE_CLAUSES (bsi_stmt (si)),
3355 OMP_CLAUSE_COPYPRIVATE))
3356 need_barrier = true;
3357 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SINGLE);
3358 bsi_remove (&si, true);
3359 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
3361 si = bsi_last (exit_bb);
3362 if (!OMP_RETURN_NOWAIT (bsi_stmt (si)) || need_barrier)
3364 tree t = alloc_stmt_list ();
3365 build_omp_barrier (&t);
3366 bsi_insert_after (&si, t, BSI_SAME_STMT);
3368 bsi_remove (&si, true);
3369 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
3373 /* Generic expansion for OpenMP synchronization directives: master,
3374 ordered and critical. All we need to do here is remove the entry
3375 and exit markers for REGION. */
3377 static void
3378 expand_omp_synch (struct omp_region *region)
3380 basic_block entry_bb, exit_bb;
3381 block_stmt_iterator si;
3383 entry_bb = region->entry;
3384 exit_bb = region->exit;
3386 si = bsi_last (entry_bb);
3387 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SINGLE
3388 || TREE_CODE (bsi_stmt (si)) == OMP_MASTER
3389 || TREE_CODE (bsi_stmt (si)) == OMP_ORDERED
3390 || TREE_CODE (bsi_stmt (si)) == OMP_CRITICAL);
3391 bsi_remove (&si, true);
3392 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
3394 if (exit_bb)
3396 si = bsi_last (exit_bb);
3397 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
3398 bsi_remove (&si, true);
3399 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
3404 /* Expand the parallel region tree rooted at REGION. Expansion
3405 proceeds in depth-first order. Innermost regions are expanded
3406 first. This way, parallel regions that require a new function to
3407 be created (e.g., OMP_PARALLEL) can be expanded without having any
3408 internal dependencies in their body. */
3410 static void
3411 expand_omp (struct omp_region *region)
3413 while (region)
3415 if (region->inner)
3416 expand_omp (region->inner);
3418 switch (region->type)
3420 case OMP_PARALLEL:
3421 expand_omp_parallel (region);
3422 break;
3424 case OMP_FOR:
3425 expand_omp_for (region);
3426 break;
3428 case OMP_SECTIONS:
3429 expand_omp_sections (region);
3430 break;
3432 case OMP_SECTION:
3433 /* Individual omp sections are handled together with their
3434 parent OMP_SECTIONS region. */
3435 break;
3437 case OMP_SINGLE:
3438 expand_omp_single (region);
3439 break;
3441 case OMP_MASTER:
3442 case OMP_ORDERED:
3443 case OMP_CRITICAL:
3444 expand_omp_synch (region);
3445 break;
3447 default:
3448 gcc_unreachable ();
3451 region = region->next;
3456 /* Helper for build_omp_regions. Scan the dominator tree starting at
3457 block BB. PARENT is the region that contains BB. */
3459 static void
3460 build_omp_regions_1 (basic_block bb, struct omp_region *parent)
3462 block_stmt_iterator si;
3463 tree stmt;
3464 basic_block son;
3466 si = bsi_last (bb);
3467 if (!bsi_end_p (si) && OMP_DIRECTIVE_P (bsi_stmt (si)))
3469 struct omp_region *region;
3470 enum tree_code code;
3472 stmt = bsi_stmt (si);
3473 code = TREE_CODE (stmt);
3475 if (code == OMP_RETURN)
3477 /* STMT is the return point out of region PARENT. Mark it
3478 as the exit point and make PARENT the immediately
3479 enclosing region. */
3480 gcc_assert (parent);
3481 region = parent;
3482 region->exit = bb;
3483 parent = parent->outer;
3485 /* If REGION is a parallel region, determine whether it is
3486 a combined parallel+workshare region. */
3487 if (region->type == OMP_PARALLEL)
3488 determine_parallel_type (region);
3490 else if (code == OMP_CONTINUE)
3492 gcc_assert (parent);
3493 parent->cont = bb;
3495 else
3497 /* Otherwise, this directive becomes the parent for a new
3498 region. */
3499 region = new_omp_region (bb, code, parent);
3500 parent = region;
3504 for (son = first_dom_son (CDI_DOMINATORS, bb);
3505 son;
3506 son = next_dom_son (CDI_DOMINATORS, son))
3507 build_omp_regions_1 (son, parent);
3511 /* Scan the CFG and build a tree of OMP regions. Return the root of
3512 the OMP region tree. */
3514 static void
3515 build_omp_regions (void)
3517 gcc_assert (root_omp_region == NULL);
3518 calculate_dominance_info (CDI_DOMINATORS);
3519 build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL);
3523 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
3525 static unsigned int
3526 execute_expand_omp (void)
3528 build_omp_regions ();
3530 if (!root_omp_region)
3531 return 0;
3533 if (dump_file)
3535 fprintf (dump_file, "\nOMP region tree\n\n");
3536 dump_omp_region (dump_file, root_omp_region, 0);
3537 fprintf (dump_file, "\n");
3540 remove_exit_barriers (root_omp_region);
3542 expand_omp (root_omp_region);
3544 free_dominance_info (CDI_DOMINATORS);
3545 free_dominance_info (CDI_POST_DOMINATORS);
3546 cleanup_tree_cfg ();
3548 free_omp_regions ();
3550 return 0;
3553 static bool
3554 gate_expand_omp (void)
3556 return flag_openmp != 0 && errorcount == 0;
3559 struct tree_opt_pass pass_expand_omp =
3561 "ompexp", /* name */
3562 gate_expand_omp, /* gate */
3563 execute_expand_omp, /* execute */
3564 NULL, /* sub */
3565 NULL, /* next */
3566 0, /* static_pass_number */
3567 0, /* tv_id */
3568 PROP_gimple_any, /* properties_required */
3569 PROP_gimple_lomp, /* properties_provided */
3570 0, /* properties_destroyed */
3571 0, /* todo_flags_start */
3572 TODO_dump_func, /* todo_flags_finish */
3573 0 /* letter */
3576 /* Routines to lower OpenMP directives into OMP-GIMPLE. */
3578 /* Lower the OpenMP sections directive in *STMT_P. */
3580 static void
3581 lower_omp_sections (tree *stmt_p, omp_context *ctx)
3583 tree new_stmt, stmt, body, bind, block, ilist, olist, new_body;
3584 tree t, dlist;
3585 tree_stmt_iterator tsi;
3586 unsigned i, len;
3588 stmt = *stmt_p;
3590 push_gimplify_context ();
3592 dlist = NULL;
3593 ilist = NULL;
3594 lower_rec_input_clauses (OMP_SECTIONS_CLAUSES (stmt), &ilist, &dlist, ctx);
3596 tsi = tsi_start (OMP_SECTIONS_BODY (stmt));
3597 for (len = 0; !tsi_end_p (tsi); len++, tsi_next (&tsi))
3598 continue;
3600 tsi = tsi_start (OMP_SECTIONS_BODY (stmt));
3601 body = alloc_stmt_list ();
3602 for (i = 0; i < len; i++, tsi_next (&tsi))
3604 omp_context *sctx;
3605 tree sec_start, sec_end;
3607 sec_start = tsi_stmt (tsi);
3608 sctx = maybe_lookup_ctx (sec_start);
3609 gcc_assert (sctx);
3611 append_to_statement_list (sec_start, &body);
3613 lower_omp (&OMP_SECTION_BODY (sec_start), sctx);
3614 append_to_statement_list (OMP_SECTION_BODY (sec_start), &body);
3615 OMP_SECTION_BODY (sec_start) = NULL;
3617 if (i == len - 1)
3619 tree l = alloc_stmt_list ();
3620 lower_lastprivate_clauses (OMP_SECTIONS_CLAUSES (stmt), NULL,
3621 &l, ctx);
3622 append_to_statement_list (l, &body);
3623 OMP_SECTION_LAST (sec_start) = 1;
3626 sec_end = make_node (OMP_RETURN);
3627 append_to_statement_list (sec_end, &body);
3630 block = make_node (BLOCK);
3631 bind = build3 (BIND_EXPR, void_type_node, NULL, body, block);
3633 olist = NULL_TREE;
3634 lower_reduction_clauses (OMP_SECTIONS_CLAUSES (stmt), &olist, ctx);
3636 pop_gimplify_context (NULL_TREE);
3637 record_vars_into (ctx->block_vars, ctx->cb.dst_fn);
3639 new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
3640 TREE_SIDE_EFFECTS (new_stmt) = 1;
3642 new_body = alloc_stmt_list ();
3643 append_to_statement_list (ilist, &new_body);
3644 append_to_statement_list (stmt, &new_body);
3645 append_to_statement_list (bind, &new_body);
3647 t = make_node (OMP_CONTINUE);
3648 append_to_statement_list (t, &new_body);
3650 append_to_statement_list (olist, &new_body);
3651 append_to_statement_list (dlist, &new_body);
3653 maybe_catch_exception (&new_body);
3655 t = make_node (OMP_RETURN);
3656 OMP_RETURN_NOWAIT (t) = !!find_omp_clause (OMP_SECTIONS_CLAUSES (stmt),
3657 OMP_CLAUSE_NOWAIT);
3658 append_to_statement_list (t, &new_body);
3660 BIND_EXPR_BODY (new_stmt) = new_body;
3661 OMP_SECTIONS_BODY (stmt) = NULL;
3663 *stmt_p = new_stmt;
3667 /* A subroutine of lower_omp_single. Expand the simple form of
3668 an OMP_SINGLE, without a copyprivate clause:
3670 if (GOMP_single_start ())
3671 BODY;
3672 [ GOMP_barrier (); ] -> unless 'nowait' is present.
3674 FIXME. It may be better to delay expanding the logic of this until
3675 pass_expand_omp. The expanded logic may make the job more difficult
3676 to a synchronization analysis pass. */
3678 static void
3679 lower_omp_single_simple (tree single_stmt, tree *pre_p)
3681 tree t;
3683 t = built_in_decls[BUILT_IN_GOMP_SINGLE_START];
3684 t = build_function_call_expr (t, NULL);
3685 t = build3 (COND_EXPR, void_type_node, t,
3686 OMP_SINGLE_BODY (single_stmt), NULL);
3687 gimplify_and_add (t, pre_p);
3691 /* A subroutine of lower_omp_single. Expand the simple form of
3692 an OMP_SINGLE, with a copyprivate clause:
3694 #pragma omp single copyprivate (a, b, c)
3696 Create a new structure to hold copies of 'a', 'b' and 'c' and emit:
3699 if ((copyout_p = GOMP_single_copy_start ()) == NULL)
3701 BODY;
3702 copyout.a = a;
3703 copyout.b = b;
3704 copyout.c = c;
3705 GOMP_single_copy_end (&copyout);
3707 else
3709 a = copyout_p->a;
3710 b = copyout_p->b;
3711 c = copyout_p->c;
3713 GOMP_barrier ();
3716 FIXME. It may be better to delay expanding the logic of this until
3717 pass_expand_omp. The expanded logic may make the job more difficult
3718 to a synchronization analysis pass. */
3720 static void
3721 lower_omp_single_copy (tree single_stmt, tree *pre_p, omp_context *ctx)
3723 tree ptr_type, t, args, l0, l1, l2, copyin_seq;
3725 ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_copy_o");
3727 ptr_type = build_pointer_type (ctx->record_type);
3728 ctx->receiver_decl = create_tmp_var (ptr_type, ".omp_copy_i");
3730 l0 = create_artificial_label ();
3731 l1 = create_artificial_label ();
3732 l2 = create_artificial_label ();
3734 t = built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_START];
3735 t = build_function_call_expr (t, NULL);
3736 t = fold_convert (ptr_type, t);
3737 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, ctx->receiver_decl, t);
3738 gimplify_and_add (t, pre_p);
3740 t = build2 (EQ_EXPR, boolean_type_node, ctx->receiver_decl,
3741 build_int_cst (ptr_type, 0));
3742 t = build3 (COND_EXPR, void_type_node, t,
3743 build_and_jump (&l0), build_and_jump (&l1));
3744 gimplify_and_add (t, pre_p);
3746 t = build1 (LABEL_EXPR, void_type_node, l0);
3747 gimplify_and_add (t, pre_p);
3749 append_to_statement_list (OMP_SINGLE_BODY (single_stmt), pre_p);
3751 copyin_seq = NULL;
3752 lower_copyprivate_clauses (OMP_SINGLE_CLAUSES (single_stmt), pre_p,
3753 &copyin_seq, ctx);
3755 t = build_fold_addr_expr (ctx->sender_decl);
3756 args = tree_cons (NULL, t, NULL);
3757 t = built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_END];
3758 t = build_function_call_expr (t, args);
3759 gimplify_and_add (t, pre_p);
3761 t = build_and_jump (&l2);
3762 gimplify_and_add (t, pre_p);
3764 t = build1 (LABEL_EXPR, void_type_node, l1);
3765 gimplify_and_add (t, pre_p);
3767 append_to_statement_list (copyin_seq, pre_p);
3769 t = build1 (LABEL_EXPR, void_type_node, l2);
3770 gimplify_and_add (t, pre_p);
3774 /* Expand code for an OpenMP single directive. */
3776 static void
3777 lower_omp_single (tree *stmt_p, omp_context *ctx)
3779 tree t, bind, block, single_stmt = *stmt_p, dlist;
3781 push_gimplify_context ();
3783 block = make_node (BLOCK);
3784 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3785 TREE_SIDE_EFFECTS (bind) = 1;
3787 lower_rec_input_clauses (OMP_SINGLE_CLAUSES (single_stmt),
3788 &BIND_EXPR_BODY (bind), &dlist, ctx);
3789 lower_omp (&OMP_SINGLE_BODY (single_stmt), ctx);
3791 append_to_statement_list (single_stmt, &BIND_EXPR_BODY (bind));
3793 if (ctx->record_type)
3794 lower_omp_single_copy (single_stmt, &BIND_EXPR_BODY (bind), ctx);
3795 else
3796 lower_omp_single_simple (single_stmt, &BIND_EXPR_BODY (bind));
3798 OMP_SINGLE_BODY (single_stmt) = NULL;
3800 append_to_statement_list (dlist, &BIND_EXPR_BODY (bind));
3802 maybe_catch_exception (&BIND_EXPR_BODY (bind));
3804 t = make_node (OMP_RETURN);
3805 OMP_RETURN_NOWAIT (t) = !!find_omp_clause (OMP_SINGLE_CLAUSES (single_stmt),
3806 OMP_CLAUSE_NOWAIT);
3807 append_to_statement_list (t, &BIND_EXPR_BODY (bind));
3809 pop_gimplify_context (bind);
3811 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3812 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3816 /* Expand code for an OpenMP master directive. */
3818 static void
3819 lower_omp_master (tree *stmt_p, omp_context *ctx)
3821 tree bind, block, stmt = *stmt_p, lab = NULL, x;
3823 push_gimplify_context ();
3825 block = make_node (BLOCK);
3826 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3827 TREE_SIDE_EFFECTS (bind) = 1;
3829 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
3831 x = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
3832 x = build_function_call_expr (x, NULL);
3833 x = build2 (EQ_EXPR, boolean_type_node, x, integer_zero_node);
3834 x = build3 (COND_EXPR, void_type_node, x, NULL, build_and_jump (&lab));
3835 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3837 lower_omp (&OMP_MASTER_BODY (stmt), ctx);
3838 maybe_catch_exception (&OMP_MASTER_BODY (stmt));
3839 append_to_statement_list (OMP_MASTER_BODY (stmt), &BIND_EXPR_BODY (bind));
3840 OMP_MASTER_BODY (stmt) = NULL;
3842 x = build1 (LABEL_EXPR, void_type_node, lab);
3843 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3845 x = make_node (OMP_RETURN);
3846 OMP_RETURN_NOWAIT (x) = 1;
3847 append_to_statement_list (x, &BIND_EXPR_BODY (bind));
3849 pop_gimplify_context (bind);
3851 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3852 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3856 /* Expand code for an OpenMP ordered directive. */
3858 static void
3859 lower_omp_ordered (tree *stmt_p, omp_context *ctx)
3861 tree bind, block, stmt = *stmt_p, x;
3863 push_gimplify_context ();
3865 block = make_node (BLOCK);
3866 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3867 TREE_SIDE_EFFECTS (bind) = 1;
3869 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
3871 x = built_in_decls[BUILT_IN_GOMP_ORDERED_START];
3872 x = build_function_call_expr (x, NULL);
3873 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3875 lower_omp (&OMP_ORDERED_BODY (stmt), ctx);
3876 maybe_catch_exception (&OMP_ORDERED_BODY (stmt));
3877 append_to_statement_list (OMP_ORDERED_BODY (stmt), &BIND_EXPR_BODY (bind));
3878 OMP_ORDERED_BODY (stmt) = NULL;
3880 x = built_in_decls[BUILT_IN_GOMP_ORDERED_END];
3881 x = build_function_call_expr (x, NULL);
3882 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
3884 x = make_node (OMP_RETURN);
3885 OMP_RETURN_NOWAIT (x) = 1;
3886 append_to_statement_list (x, &BIND_EXPR_BODY (bind));
3888 pop_gimplify_context (bind);
3890 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3891 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3895 /* Gimplify an OMP_CRITICAL statement. This is a relatively simple
3896 substitution of a couple of function calls. But in the NAMED case,
3897 requires that languages coordinate a symbol name. It is therefore
3898 best put here in common code. */
3900 static GTY((param1_is (tree), param2_is (tree)))
3901 splay_tree critical_name_mutexes;
3903 static void
3904 lower_omp_critical (tree *stmt_p, omp_context *ctx)
3906 tree bind, block, stmt = *stmt_p;
3907 tree t, lock, unlock, name;
3909 name = OMP_CRITICAL_NAME (stmt);
3910 if (name)
3912 tree decl, args;
3913 splay_tree_node n;
3915 if (!critical_name_mutexes)
3916 critical_name_mutexes
3917 = splay_tree_new_ggc (splay_tree_compare_pointers);
3919 n = splay_tree_lookup (critical_name_mutexes, (splay_tree_key) name);
3920 if (n == NULL)
3922 char *new_str;
3924 decl = create_tmp_var_raw (ptr_type_node, NULL);
3926 new_str = ACONCAT ((".gomp_critical_user_",
3927 IDENTIFIER_POINTER (name), NULL));
3928 DECL_NAME (decl) = get_identifier (new_str);
3929 TREE_PUBLIC (decl) = 1;
3930 TREE_STATIC (decl) = 1;
3931 DECL_COMMON (decl) = 1;
3932 DECL_ARTIFICIAL (decl) = 1;
3933 DECL_IGNORED_P (decl) = 1;
3934 varpool_finalize_decl (decl);
3936 splay_tree_insert (critical_name_mutexes, (splay_tree_key) name,
3937 (splay_tree_value) decl);
3939 else
3940 decl = (tree) n->value;
3942 args = tree_cons (NULL, build_fold_addr_expr (decl), NULL);
3943 lock = built_in_decls[BUILT_IN_GOMP_CRITICAL_NAME_START];
3944 lock = build_function_call_expr (lock, args);
3946 args = tree_cons (NULL, build_fold_addr_expr (decl), NULL);
3947 unlock = built_in_decls[BUILT_IN_GOMP_CRITICAL_NAME_END];
3948 unlock = build_function_call_expr (unlock, args);
3950 else
3952 lock = built_in_decls[BUILT_IN_GOMP_CRITICAL_START];
3953 lock = build_function_call_expr (lock, NULL);
3955 unlock = built_in_decls[BUILT_IN_GOMP_CRITICAL_END];
3956 unlock = build_function_call_expr (unlock, NULL);
3959 push_gimplify_context ();
3961 block = make_node (BLOCK);
3962 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
3963 TREE_SIDE_EFFECTS (bind) = 1;
3965 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
3967 gimplify_and_add (lock, &BIND_EXPR_BODY (bind));
3969 lower_omp (&OMP_CRITICAL_BODY (stmt), ctx);
3970 maybe_catch_exception (&OMP_CRITICAL_BODY (stmt));
3971 append_to_statement_list (OMP_CRITICAL_BODY (stmt), &BIND_EXPR_BODY (bind));
3972 OMP_CRITICAL_BODY (stmt) = NULL;
3974 gimplify_and_add (unlock, &BIND_EXPR_BODY (bind));
3976 t = make_node (OMP_RETURN);
3977 OMP_RETURN_NOWAIT (t) = 1;
3978 append_to_statement_list (t, &BIND_EXPR_BODY (bind));
3980 pop_gimplify_context (bind);
3981 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
3982 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
3986 /* A subroutine of lower_omp_for. Generate code to emit the predicate
3987 for a lastprivate clause. Given a loop control predicate of (V
3988 cond N2), we gate the clause on (!(V cond N2)). The lowered form
3989 is appended to *DLIST, iterator initialization is appended to
3990 *BODY_P. */
3992 static void
3993 lower_omp_for_lastprivate (struct omp_for_data *fd, tree *body_p,
3994 tree *dlist, struct omp_context *ctx)
3996 tree clauses, cond, stmts, vinit, t;
3997 enum tree_code cond_code;
3999 cond_code = fd->cond_code;
4000 cond_code = cond_code == LT_EXPR ? GE_EXPR : LE_EXPR;
4002 /* When possible, use a strict equality expression. This can let VRP
4003 type optimizations deduce the value and remove a copy. */
4004 if (host_integerp (fd->step, 0))
4006 HOST_WIDE_INT step = TREE_INT_CST_LOW (fd->step);
4007 if (step == 1 || step == -1)
4008 cond_code = EQ_EXPR;
4011 cond = build2 (cond_code, boolean_type_node, fd->v, fd->n2);
4013 clauses = OMP_FOR_CLAUSES (fd->for_stmt);
4014 stmts = NULL;
4015 lower_lastprivate_clauses (clauses, cond, &stmts, ctx);
4016 if (stmts != NULL)
4018 append_to_statement_list (stmts, dlist);
4020 /* Optimize: v = 0; is usually cheaper than v = some_other_constant. */
4021 vinit = fd->n1;
4022 if (cond_code == EQ_EXPR
4023 && host_integerp (fd->n2, 0)
4024 && ! integer_zerop (fd->n2))
4025 vinit = build_int_cst (TREE_TYPE (fd->v), 0);
4027 /* Initialize the iterator variable, so that threads that don't execute
4028 any iterations don't execute the lastprivate clauses by accident. */
4029 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, fd->v, vinit);
4030 gimplify_and_add (t, body_p);
4035 /* Lower code for an OpenMP loop directive. */
4037 static void
4038 lower_omp_for (tree *stmt_p, omp_context *ctx)
4040 tree t, stmt, ilist, dlist, new_stmt, *body_p, *rhs_p;
4041 struct omp_for_data fd;
4043 stmt = *stmt_p;
4045 push_gimplify_context ();
4047 lower_omp (&OMP_FOR_PRE_BODY (stmt), ctx);
4048 lower_omp (&OMP_FOR_BODY (stmt), ctx);
4050 /* Move declaration of temporaries in the loop body before we make
4051 it go away. */
4052 if (TREE_CODE (OMP_FOR_BODY (stmt)) == BIND_EXPR)
4053 record_vars_into (BIND_EXPR_VARS (OMP_FOR_BODY (stmt)), ctx->cb.dst_fn);
4055 new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
4056 TREE_SIDE_EFFECTS (new_stmt) = 1;
4057 body_p = &BIND_EXPR_BODY (new_stmt);
4059 /* The pre-body and input clauses go before the lowered OMP_FOR. */
4060 ilist = NULL;
4061 dlist = NULL;
4062 append_to_statement_list (OMP_FOR_PRE_BODY (stmt), body_p);
4063 lower_rec_input_clauses (OMP_FOR_CLAUSES (stmt), body_p, &dlist, ctx);
4065 /* Lower the header expressions. At this point, we can assume that
4066 the header is of the form:
4068 #pragma omp for (V = VAL1; V {<|>|<=|>=} VAL2; V = V [+-] VAL3)
4070 We just need to make sure that VAL1, VAL2 and VAL3 are lowered
4071 using the .omp_data_s mapping, if needed. */
4072 rhs_p = &GIMPLE_STMT_OPERAND (OMP_FOR_INIT (stmt), 1);
4073 if (!is_gimple_min_invariant (*rhs_p))
4074 *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
4076 rhs_p = &TREE_OPERAND (OMP_FOR_COND (stmt), 1);
4077 if (!is_gimple_min_invariant (*rhs_p))
4078 *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
4080 rhs_p = &TREE_OPERAND (GIMPLE_STMT_OPERAND (OMP_FOR_INCR (stmt), 1), 1);
4081 if (!is_gimple_min_invariant (*rhs_p))
4082 *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
4084 /* Once lowered, extract the bounds and clauses. */
4085 extract_omp_for_data (stmt, &fd);
4087 lower_omp_for_lastprivate (&fd, body_p, &dlist, ctx);
4089 append_to_statement_list (stmt, body_p);
4091 append_to_statement_list (OMP_FOR_BODY (stmt), body_p);
4093 t = make_node (OMP_CONTINUE);
4094 append_to_statement_list (t, body_p);
4096 /* After the loop, add exit clauses. */
4097 lower_reduction_clauses (OMP_FOR_CLAUSES (stmt), body_p, ctx);
4098 append_to_statement_list (dlist, body_p);
4100 maybe_catch_exception (body_p);
4102 /* Region exit marker goes at the end of the loop body. */
4103 t = make_node (OMP_RETURN);
4104 OMP_RETURN_NOWAIT (t) = fd.have_nowait;
4105 append_to_statement_list (t, body_p);
4107 pop_gimplify_context (NULL_TREE);
4108 record_vars_into (ctx->block_vars, ctx->cb.dst_fn);
4110 OMP_FOR_BODY (stmt) = NULL_TREE;
4111 OMP_FOR_PRE_BODY (stmt) = NULL_TREE;
4112 *stmt_p = new_stmt;
4116 /* Lower the OpenMP parallel directive in *STMT_P. CTX holds context
4117 information for the directive. */
4119 static void
4120 lower_omp_parallel (tree *stmt_p, omp_context *ctx)
4122 tree clauses, par_bind, par_body, new_body, bind;
4123 tree olist, ilist, par_olist, par_ilist;
4124 tree stmt, child_fn, t;
4126 stmt = *stmt_p;
4128 clauses = OMP_PARALLEL_CLAUSES (stmt);
4129 par_bind = OMP_PARALLEL_BODY (stmt);
4130 par_body = BIND_EXPR_BODY (par_bind);
4131 child_fn = ctx->cb.dst_fn;
4133 push_gimplify_context ();
4135 par_olist = NULL_TREE;
4136 par_ilist = NULL_TREE;
4137 lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx);
4138 lower_omp (&par_body, ctx);
4139 lower_reduction_clauses (clauses, &par_olist, ctx);
4141 /* Declare all the variables created by mapping and the variables
4142 declared in the scope of the parallel body. */
4143 record_vars_into (ctx->block_vars, child_fn);
4144 record_vars_into (BIND_EXPR_VARS (par_bind), child_fn);
4146 if (ctx->record_type)
4148 ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_data_o");
4149 OMP_PARALLEL_DATA_ARG (stmt) = ctx->sender_decl;
4152 olist = NULL_TREE;
4153 ilist = NULL_TREE;
4154 lower_send_clauses (clauses, &ilist, &olist, ctx);
4155 lower_send_shared_vars (&ilist, &olist, ctx);
4157 /* Once all the expansions are done, sequence all the different
4158 fragments inside OMP_PARALLEL_BODY. */
4159 bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
4160 append_to_statement_list (ilist, &BIND_EXPR_BODY (bind));
4162 new_body = alloc_stmt_list ();
4164 if (ctx->record_type)
4166 t = build_fold_addr_expr (ctx->sender_decl);
4167 /* fixup_child_record_type might have changed receiver_decl's type. */
4168 t = fold_convert (TREE_TYPE (ctx->receiver_decl), t);
4169 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, ctx->receiver_decl, t);
4170 append_to_statement_list (t, &new_body);
4173 append_to_statement_list (par_ilist, &new_body);
4174 append_to_statement_list (par_body, &new_body);
4175 append_to_statement_list (par_olist, &new_body);
4176 maybe_catch_exception (&new_body);
4177 t = make_node (OMP_RETURN);
4178 append_to_statement_list (t, &new_body);
4179 OMP_PARALLEL_BODY (stmt) = new_body;
4181 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
4182 append_to_statement_list (olist, &BIND_EXPR_BODY (bind));
4184 *stmt_p = bind;
4186 pop_gimplify_context (NULL_TREE);
4190 /* Pass *TP back through the gimplifier within the context determined by WI.
4191 This handles replacement of DECL_VALUE_EXPR, as well as adjusting the
4192 flags on ADDR_EXPR. */
4194 static void
4195 lower_regimplify (tree *tp, struct walk_stmt_info *wi)
4197 enum gimplify_status gs;
4198 tree pre = NULL;
4200 if (wi->is_lhs)
4201 gs = gimplify_expr (tp, &pre, NULL, is_gimple_lvalue, fb_lvalue);
4202 else if (wi->val_only)
4203 gs = gimplify_expr (tp, &pre, NULL, is_gimple_val, fb_rvalue);
4204 else
4205 gs = gimplify_expr (tp, &pre, NULL, is_gimple_formal_tmp_var, fb_rvalue);
4206 gcc_assert (gs == GS_ALL_DONE);
4208 if (pre)
4209 tsi_link_before (&wi->tsi, pre, TSI_SAME_STMT);
4212 /* Copy EXP into a temporary. Insert the initialization statement before TSI. */
4214 static tree
4215 init_tmp_var (tree exp, tree_stmt_iterator *tsi)
4217 tree t, stmt;
4219 t = create_tmp_var (TREE_TYPE (exp), NULL);
4220 DECL_GIMPLE_REG_P (t) = 1;
4221 stmt = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (t), t, exp);
4222 SET_EXPR_LOCUS (stmt, EXPR_LOCUS (tsi_stmt (*tsi)));
4223 tsi_link_before (tsi, stmt, TSI_SAME_STMT);
4225 return t;
4228 /* Similarly, but copy from the temporary and insert the statement
4229 after the iterator. */
4231 static tree
4232 save_tmp_var (tree exp, tree_stmt_iterator *tsi)
4234 tree t, stmt;
4236 t = create_tmp_var (TREE_TYPE (exp), NULL);
4237 DECL_GIMPLE_REG_P (t) = 1;
4238 stmt = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (t), exp, t);
4239 SET_EXPR_LOCUS (stmt, EXPR_LOCUS (tsi_stmt (*tsi)));
4240 tsi_link_after (tsi, stmt, TSI_SAME_STMT);
4242 return t;
4245 /* Callback for walk_stmts. Lower the OpenMP directive pointed by TP. */
4247 static tree
4248 lower_omp_1 (tree *tp, int *walk_subtrees, void *data)
4250 struct walk_stmt_info *wi = data;
4251 omp_context *ctx = wi->info;
4252 tree t = *tp;
4254 /* If we have issued syntax errors, avoid doing any heavy lifting.
4255 Just replace the OpenMP directives with a NOP to avoid
4256 confusing RTL expansion. */
4257 if (errorcount && OMP_DIRECTIVE_P (*tp))
4259 *tp = build_empty_stmt ();
4260 return NULL_TREE;
4263 *walk_subtrees = 0;
4264 switch (TREE_CODE (*tp))
4266 case OMP_PARALLEL:
4267 ctx = maybe_lookup_ctx (t);
4268 lower_omp_parallel (tp, ctx);
4269 break;
4271 case OMP_FOR:
4272 ctx = maybe_lookup_ctx (t);
4273 gcc_assert (ctx);
4274 lower_omp_for (tp, ctx);
4275 break;
4277 case OMP_SECTIONS:
4278 ctx = maybe_lookup_ctx (t);
4279 gcc_assert (ctx);
4280 lower_omp_sections (tp, ctx);
4281 break;
4283 case OMP_SINGLE:
4284 ctx = maybe_lookup_ctx (t);
4285 gcc_assert (ctx);
4286 lower_omp_single (tp, ctx);
4287 break;
4289 case OMP_MASTER:
4290 ctx = maybe_lookup_ctx (t);
4291 gcc_assert (ctx);
4292 lower_omp_master (tp, ctx);
4293 break;
4295 case OMP_ORDERED:
4296 ctx = maybe_lookup_ctx (t);
4297 gcc_assert (ctx);
4298 lower_omp_ordered (tp, ctx);
4299 break;
4301 case OMP_CRITICAL:
4302 ctx = maybe_lookup_ctx (t);
4303 gcc_assert (ctx);
4304 lower_omp_critical (tp, ctx);
4305 break;
4307 case VAR_DECL:
4308 if (ctx && DECL_HAS_VALUE_EXPR_P (t))
4310 lower_regimplify (&t, wi);
4311 if (wi->val_only)
4313 if (wi->is_lhs)
4314 t = save_tmp_var (t, &wi->tsi);
4315 else
4316 t = init_tmp_var (t, &wi->tsi);
4318 *tp = t;
4320 break;
4322 case ADDR_EXPR:
4323 if (ctx)
4324 lower_regimplify (tp, wi);
4325 break;
4327 case ARRAY_REF:
4328 case ARRAY_RANGE_REF:
4329 case REALPART_EXPR:
4330 case IMAGPART_EXPR:
4331 case COMPONENT_REF:
4332 case VIEW_CONVERT_EXPR:
4333 if (ctx)
4334 lower_regimplify (tp, wi);
4335 break;
4337 case INDIRECT_REF:
4338 if (ctx)
4340 wi->is_lhs = false;
4341 wi->val_only = true;
4342 lower_regimplify (&TREE_OPERAND (t, 0), wi);
4344 break;
4346 default:
4347 if (!TYPE_P (t) && !DECL_P (t))
4348 *walk_subtrees = 1;
4349 break;
4352 return NULL_TREE;
4355 static void
4356 lower_omp (tree *stmt_p, omp_context *ctx)
4358 struct walk_stmt_info wi;
4360 memset (&wi, 0, sizeof (wi));
4361 wi.callback = lower_omp_1;
4362 wi.info = ctx;
4363 wi.val_only = true;
4364 wi.want_locations = true;
4366 walk_stmts (&wi, stmt_p);
4369 /* Main entry point. */
4371 static unsigned int
4372 execute_lower_omp (void)
4374 all_contexts = splay_tree_new (splay_tree_compare_pointers, 0,
4375 delete_omp_context);
4377 scan_omp (&DECL_SAVED_TREE (current_function_decl), NULL);
4378 gcc_assert (parallel_nesting_level == 0);
4380 if (all_contexts->root)
4381 lower_omp (&DECL_SAVED_TREE (current_function_decl), NULL);
4383 if (all_contexts)
4385 splay_tree_delete (all_contexts);
4386 all_contexts = NULL;
4388 return 0;
4391 static bool
4392 gate_lower_omp (void)
4394 return flag_openmp != 0;
4397 struct tree_opt_pass pass_lower_omp =
4399 "omplower", /* name */
4400 gate_lower_omp, /* gate */
4401 execute_lower_omp, /* execute */
4402 NULL, /* sub */
4403 NULL, /* next */
4404 0, /* static_pass_number */
4405 0, /* tv_id */
4406 PROP_gimple_any, /* properties_required */
4407 PROP_gimple_lomp, /* properties_provided */
4408 0, /* properties_destroyed */
4409 0, /* todo_flags_start */
4410 TODO_dump_func, /* todo_flags_finish */
4411 0 /* letter */
4414 /* The following is a utility to diagnose OpenMP structured block violations.
4415 It is not part of the "omplower" pass, as that's invoked too late. It
4416 should be invoked by the respective front ends after gimplification. */
4418 static splay_tree all_labels;
4420 /* Check for mismatched contexts and generate an error if needed. Return
4421 true if an error is detected. */
4423 static bool
4424 diagnose_sb_0 (tree *stmt_p, tree branch_ctx, tree label_ctx)
4426 bool exit_p = true;
4428 if ((label_ctx ? TREE_VALUE (label_ctx) : NULL) == branch_ctx)
4429 return false;
4431 /* Try to avoid confusing the user by producing and error message
4432 with correct "exit" or "enter" verbage. We prefer "exit"
4433 unless we can show that LABEL_CTX is nested within BRANCH_CTX. */
4434 if (branch_ctx == NULL)
4435 exit_p = false;
4436 else
4438 while (label_ctx)
4440 if (TREE_VALUE (label_ctx) == branch_ctx)
4442 exit_p = false;
4443 break;
4445 label_ctx = TREE_CHAIN (label_ctx);
4449 if (exit_p)
4450 error ("invalid exit from OpenMP structured block");
4451 else
4452 error ("invalid entry to OpenMP structured block");
4454 *stmt_p = build_empty_stmt ();
4455 return true;
4458 /* Pass 1: Create a minimal tree of OpenMP structured blocks, and record
4459 where in the tree each label is found. */
4461 static tree
4462 diagnose_sb_1 (tree *tp, int *walk_subtrees, void *data)
4464 struct walk_stmt_info *wi = data;
4465 tree context = (tree) wi->info;
4466 tree inner_context;
4467 tree t = *tp;
4469 *walk_subtrees = 0;
4470 switch (TREE_CODE (t))
4472 case OMP_PARALLEL:
4473 case OMP_SECTIONS:
4474 case OMP_SINGLE:
4475 walk_tree (&OMP_CLAUSES (t), diagnose_sb_1, wi, NULL);
4476 /* FALLTHRU */
4477 case OMP_SECTION:
4478 case OMP_MASTER:
4479 case OMP_ORDERED:
4480 case OMP_CRITICAL:
4481 /* The minimal context here is just a tree of statements. */
4482 inner_context = tree_cons (NULL, t, context);
4483 wi->info = inner_context;
4484 walk_stmts (wi, &OMP_BODY (t));
4485 wi->info = context;
4486 break;
4488 case OMP_FOR:
4489 walk_tree (&OMP_FOR_CLAUSES (t), diagnose_sb_1, wi, NULL);
4490 inner_context = tree_cons (NULL, t, context);
4491 wi->info = inner_context;
4492 walk_tree (&OMP_FOR_INIT (t), diagnose_sb_1, wi, NULL);
4493 walk_tree (&OMP_FOR_COND (t), diagnose_sb_1, wi, NULL);
4494 walk_tree (&OMP_FOR_INCR (t), diagnose_sb_1, wi, NULL);
4495 walk_stmts (wi, &OMP_FOR_PRE_BODY (t));
4496 walk_stmts (wi, &OMP_FOR_BODY (t));
4497 wi->info = context;
4498 break;
4500 case LABEL_EXPR:
4501 splay_tree_insert (all_labels, (splay_tree_key) LABEL_EXPR_LABEL (t),
4502 (splay_tree_value) context);
4503 break;
4505 default:
4506 break;
4509 return NULL_TREE;
4512 /* Pass 2: Check each branch and see if its context differs from that of
4513 the destination label's context. */
4515 static tree
4516 diagnose_sb_2 (tree *tp, int *walk_subtrees, void *data)
4518 struct walk_stmt_info *wi = data;
4519 tree context = (tree) wi->info;
4520 splay_tree_node n;
4521 tree t = *tp;
4523 *walk_subtrees = 0;
4524 switch (TREE_CODE (t))
4526 case OMP_PARALLEL:
4527 case OMP_SECTIONS:
4528 case OMP_SINGLE:
4529 walk_tree (&OMP_CLAUSES (t), diagnose_sb_2, wi, NULL);
4530 /* FALLTHRU */
4531 case OMP_SECTION:
4532 case OMP_MASTER:
4533 case OMP_ORDERED:
4534 case OMP_CRITICAL:
4535 wi->info = t;
4536 walk_stmts (wi, &OMP_BODY (t));
4537 wi->info = context;
4538 break;
4540 case OMP_FOR:
4541 walk_tree (&OMP_FOR_CLAUSES (t), diagnose_sb_2, wi, NULL);
4542 wi->info = t;
4543 walk_tree (&OMP_FOR_INIT (t), diagnose_sb_2, wi, NULL);
4544 walk_tree (&OMP_FOR_COND (t), diagnose_sb_2, wi, NULL);
4545 walk_tree (&OMP_FOR_INCR (t), diagnose_sb_2, wi, NULL);
4546 walk_stmts (wi, &OMP_FOR_PRE_BODY (t));
4547 walk_stmts (wi, &OMP_FOR_BODY (t));
4548 wi->info = context;
4549 break;
4551 case GOTO_EXPR:
4553 tree lab = GOTO_DESTINATION (t);
4554 if (TREE_CODE (lab) != LABEL_DECL)
4555 break;
4557 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
4558 diagnose_sb_0 (tp, context, n ? (tree) n->value : NULL_TREE);
4560 break;
4562 case SWITCH_EXPR:
4564 tree vec = SWITCH_LABELS (t);
4565 int i, len = TREE_VEC_LENGTH (vec);
4566 for (i = 0; i < len; ++i)
4568 tree lab = CASE_LABEL (TREE_VEC_ELT (vec, i));
4569 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
4570 if (diagnose_sb_0 (tp, context, (tree) n->value))
4571 break;
4574 break;
4576 case RETURN_EXPR:
4577 diagnose_sb_0 (tp, context, NULL_TREE);
4578 break;
4580 default:
4581 break;
4584 return NULL_TREE;
4587 void
4588 diagnose_omp_structured_block_errors (tree fndecl)
4590 tree save_current = current_function_decl;
4591 struct walk_stmt_info wi;
4593 current_function_decl = fndecl;
4595 all_labels = splay_tree_new (splay_tree_compare_pointers, 0, 0);
4597 memset (&wi, 0, sizeof (wi));
4598 wi.callback = diagnose_sb_1;
4599 walk_stmts (&wi, &DECL_SAVED_TREE (fndecl));
4601 memset (&wi, 0, sizeof (wi));
4602 wi.callback = diagnose_sb_2;
4603 wi.want_locations = true;
4604 wi.want_return_expr = true;
4605 walk_stmts (&wi, &DECL_SAVED_TREE (fndecl));
4607 splay_tree_delete (all_labels);
4608 all_labels = NULL;
4610 current_function_decl = save_current;
4613 #include "gt-omp-low.h"