1 /* General types and functions that are uselful for processing of OpenMP,
2 OpenACC and similar directivers at various stages of compilation.
4 Copyright (C) 2005-2017 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Find an OMP clause of type KIND within CLAUSES. */
26 #include "coretypes.h"
32 #include "diagnostic-core.h"
33 #include "fold-const.h"
34 #include "langhooks.h"
35 #include "omp-general.h"
36 #include "stringpool.h"
40 omp_find_clause (tree clauses
, enum omp_clause_code kind
)
42 for (; clauses
; clauses
= OMP_CLAUSE_CHAIN (clauses
))
43 if (OMP_CLAUSE_CODE (clauses
) == kind
)
49 /* Return true if DECL is a reference type. */
52 omp_is_reference (tree decl
)
54 return lang_hooks
.decls
.omp_privatize_by_reference (decl
);
57 /* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or
61 omp_adjust_for_condition (location_t loc
, enum tree_code
*cond_code
, tree
*n2
)
70 if (POINTER_TYPE_P (TREE_TYPE (*n2
)))
71 *n2
= fold_build_pointer_plus_hwi_loc (loc
, *n2
, 1);
73 *n2
= fold_build2_loc (loc
, PLUS_EXPR
, TREE_TYPE (*n2
), *n2
,
74 build_int_cst (TREE_TYPE (*n2
), 1));
78 if (POINTER_TYPE_P (TREE_TYPE (*n2
)))
79 *n2
= fold_build_pointer_plus_hwi_loc (loc
, *n2
, -1);
81 *n2
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (*n2
), *n2
,
82 build_int_cst (TREE_TYPE (*n2
), 1));
90 /* Return the looping step from INCR, extracted from the step of a gimple omp
94 omp_get_for_step_from_incr (location_t loc
, tree incr
)
97 switch (TREE_CODE (incr
))
100 step
= TREE_OPERAND (incr
, 1);
102 case POINTER_PLUS_EXPR
:
103 step
= fold_convert (ssizetype
, TREE_OPERAND (incr
, 1));
106 step
= TREE_OPERAND (incr
, 1);
107 step
= fold_build1_loc (loc
, NEGATE_EXPR
, TREE_TYPE (step
), step
);
115 /* Extract the header elements of parallel loop FOR_STMT and store
119 omp_extract_for_data (gomp_for
*for_stmt
, struct omp_for_data
*fd
,
120 struct omp_for_data_loop
*loops
)
122 tree t
, var
, *collapse_iter
, *collapse_count
;
123 tree count
= NULL_TREE
, iter_type
= long_integer_type_node
;
124 struct omp_for_data_loop
*loop
;
126 struct omp_for_data_loop dummy_loop
;
127 location_t loc
= gimple_location (for_stmt
);
128 bool simd
= gimple_omp_for_kind (for_stmt
) & GF_OMP_FOR_SIMD
;
129 bool distribute
= gimple_omp_for_kind (for_stmt
)
130 == GF_OMP_FOR_KIND_DISTRIBUTE
;
131 bool taskloop
= gimple_omp_for_kind (for_stmt
)
132 == GF_OMP_FOR_KIND_TASKLOOP
;
135 fd
->for_stmt
= for_stmt
;
137 fd
->have_nowait
= distribute
|| simd
;
138 fd
->have_ordered
= false;
139 fd
->tiling
= NULL_TREE
;
142 fd
->sched_kind
= OMP_CLAUSE_SCHEDULE_STATIC
;
143 fd
->sched_modifiers
= 0;
144 fd
->chunk_size
= NULL_TREE
;
145 fd
->simd_schedule
= false;
146 collapse_iter
= NULL
;
147 collapse_count
= NULL
;
149 for (t
= gimple_omp_for_clauses (for_stmt
); t
; t
= OMP_CLAUSE_CHAIN (t
))
150 switch (OMP_CLAUSE_CODE (t
))
152 case OMP_CLAUSE_NOWAIT
:
153 fd
->have_nowait
= true;
155 case OMP_CLAUSE_ORDERED
:
156 fd
->have_ordered
= true;
157 if (OMP_CLAUSE_ORDERED_EXPR (t
))
158 fd
->ordered
= tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t
));
160 case OMP_CLAUSE_SCHEDULE
:
161 gcc_assert (!distribute
&& !taskloop
);
163 = (enum omp_clause_schedule_kind
)
164 (OMP_CLAUSE_SCHEDULE_KIND (t
) & OMP_CLAUSE_SCHEDULE_MASK
);
165 fd
->sched_modifiers
= (OMP_CLAUSE_SCHEDULE_KIND (t
)
166 & ~OMP_CLAUSE_SCHEDULE_MASK
);
167 fd
->chunk_size
= OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t
);
168 fd
->simd_schedule
= OMP_CLAUSE_SCHEDULE_SIMD (t
);
170 case OMP_CLAUSE_DIST_SCHEDULE
:
171 gcc_assert (distribute
);
172 fd
->chunk_size
= OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t
);
174 case OMP_CLAUSE_COLLAPSE
:
175 fd
->collapse
= tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t
));
176 if (fd
->collapse
> 1)
178 collapse_iter
= &OMP_CLAUSE_COLLAPSE_ITERVAR (t
);
179 collapse_count
= &OMP_CLAUSE_COLLAPSE_COUNT (t
);
182 case OMP_CLAUSE_TILE
:
183 fd
->tiling
= OMP_CLAUSE_TILE_LIST (t
);
184 fd
->collapse
= list_length (fd
->tiling
);
185 gcc_assert (fd
->collapse
);
186 collapse_iter
= &OMP_CLAUSE_TILE_ITERVAR (t
);
187 collapse_count
= &OMP_CLAUSE_TILE_COUNT (t
);
193 if (fd
->collapse
> 1 || fd
->tiling
)
196 fd
->loops
= &fd
->loop
;
198 if (fd
->ordered
&& fd
->collapse
== 1 && loops
!= NULL
)
203 collapse_iter
= &iterv
;
204 collapse_count
= &countv
;
207 /* FIXME: for now map schedule(auto) to schedule(static).
208 There should be analysis to determine whether all iterations
209 are approximately the same amount of work (then schedule(static)
210 is best) or if it varies (then schedule(dynamic,N) is better). */
211 if (fd
->sched_kind
== OMP_CLAUSE_SCHEDULE_AUTO
)
213 fd
->sched_kind
= OMP_CLAUSE_SCHEDULE_STATIC
;
214 gcc_assert (fd
->chunk_size
== NULL
);
216 gcc_assert ((fd
->collapse
== 1 && !fd
->tiling
) || collapse_iter
!= NULL
);
218 fd
->sched_kind
= OMP_CLAUSE_SCHEDULE_RUNTIME
;
219 if (fd
->sched_kind
== OMP_CLAUSE_SCHEDULE_RUNTIME
)
220 gcc_assert (fd
->chunk_size
== NULL
);
221 else if (fd
->chunk_size
== NULL
)
223 /* We only need to compute a default chunk size for ordered
224 static loops and dynamic loops. */
225 if (fd
->sched_kind
!= OMP_CLAUSE_SCHEDULE_STATIC
227 fd
->chunk_size
= (fd
->sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
)
228 ? integer_zero_node
: integer_one_node
;
231 int cnt
= fd
->ordered
? fd
->ordered
: fd
->collapse
;
232 for (i
= 0; i
< cnt
; i
++)
237 && (fd
->ordered
== 0 || loops
== NULL
))
239 else if (loops
!= NULL
)
244 loop
->v
= gimple_omp_for_index (for_stmt
, i
);
245 gcc_assert (SSA_VAR_P (loop
->v
));
246 gcc_assert (TREE_CODE (TREE_TYPE (loop
->v
)) == INTEGER_TYPE
247 || TREE_CODE (TREE_TYPE (loop
->v
)) == POINTER_TYPE
);
248 var
= TREE_CODE (loop
->v
) == SSA_NAME
? SSA_NAME_VAR (loop
->v
) : loop
->v
;
249 loop
->n1
= gimple_omp_for_initial (for_stmt
, i
);
251 loop
->cond_code
= gimple_omp_for_cond (for_stmt
, i
);
252 loop
->n2
= gimple_omp_for_final (for_stmt
, i
);
253 gcc_assert (loop
->cond_code
!= NE_EXPR
);
254 omp_adjust_for_condition (loc
, &loop
->cond_code
, &loop
->n2
);
256 t
= gimple_omp_for_incr (for_stmt
, i
);
257 gcc_assert (TREE_OPERAND (t
, 0) == var
);
258 loop
->step
= omp_get_for_step_from_incr (loc
, t
);
261 || (fd
->sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
262 && !fd
->have_ordered
))
264 if (fd
->collapse
== 1 && !fd
->tiling
)
265 iter_type
= TREE_TYPE (loop
->v
);
267 || TYPE_PRECISION (iter_type
)
268 < TYPE_PRECISION (TREE_TYPE (loop
->v
)))
270 = build_nonstandard_integer_type
271 (TYPE_PRECISION (TREE_TYPE (loop
->v
)), 1);
273 else if (iter_type
!= long_long_unsigned_type_node
)
275 if (POINTER_TYPE_P (TREE_TYPE (loop
->v
)))
276 iter_type
= long_long_unsigned_type_node
;
277 else if (TYPE_UNSIGNED (TREE_TYPE (loop
->v
))
278 && TYPE_PRECISION (TREE_TYPE (loop
->v
))
279 >= TYPE_PRECISION (iter_type
))
283 if (loop
->cond_code
== LT_EXPR
)
284 n
= fold_build2_loc (loc
,
285 PLUS_EXPR
, TREE_TYPE (loop
->v
),
286 loop
->n2
, loop
->step
);
289 if (TREE_CODE (n
) != INTEGER_CST
290 || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type
), n
))
291 iter_type
= long_long_unsigned_type_node
;
293 else if (TYPE_PRECISION (TREE_TYPE (loop
->v
))
294 > TYPE_PRECISION (iter_type
))
298 if (loop
->cond_code
== LT_EXPR
)
301 n2
= fold_build2_loc (loc
,
302 PLUS_EXPR
, TREE_TYPE (loop
->v
),
303 loop
->n2
, loop
->step
);
307 n1
= fold_build2_loc (loc
,
308 MINUS_EXPR
, TREE_TYPE (loop
->v
),
309 loop
->n2
, loop
->step
);
312 if (TREE_CODE (n1
) != INTEGER_CST
313 || TREE_CODE (n2
) != INTEGER_CST
314 || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type
), n1
)
315 || !tree_int_cst_lt (n2
, TYPE_MAX_VALUE (iter_type
)))
316 iter_type
= long_long_unsigned_type_node
;
320 if (i
>= fd
->collapse
)
323 if (collapse_count
&& *collapse_count
== NULL
)
325 t
= fold_binary (loop
->cond_code
, boolean_type_node
,
326 fold_convert (TREE_TYPE (loop
->v
), loop
->n1
),
327 fold_convert (TREE_TYPE (loop
->v
), loop
->n2
));
328 if (t
&& integer_zerop (t
))
329 count
= build_zero_cst (long_long_unsigned_type_node
);
330 else if ((i
== 0 || count
!= NULL_TREE
)
331 && TREE_CODE (TREE_TYPE (loop
->v
)) == INTEGER_TYPE
332 && TREE_CONSTANT (loop
->n1
)
333 && TREE_CONSTANT (loop
->n2
)
334 && TREE_CODE (loop
->step
) == INTEGER_CST
)
336 tree itype
= TREE_TYPE (loop
->v
);
338 if (POINTER_TYPE_P (itype
))
339 itype
= signed_type_for (itype
);
340 t
= build_int_cst (itype
, (loop
->cond_code
== LT_EXPR
? -1 : 1));
341 t
= fold_build2_loc (loc
,
343 fold_convert_loc (loc
, itype
, loop
->step
), t
);
344 t
= fold_build2_loc (loc
, PLUS_EXPR
, itype
, t
,
345 fold_convert_loc (loc
, itype
, loop
->n2
));
346 t
= fold_build2_loc (loc
, MINUS_EXPR
, itype
, t
,
347 fold_convert_loc (loc
, itype
, loop
->n1
));
348 if (TYPE_UNSIGNED (itype
) && loop
->cond_code
== GT_EXPR
)
349 t
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
,
350 fold_build1_loc (loc
, NEGATE_EXPR
, itype
, t
),
351 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
352 fold_convert_loc (loc
, itype
,
355 t
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, t
,
356 fold_convert_loc (loc
, itype
, loop
->step
));
357 t
= fold_convert_loc (loc
, long_long_unsigned_type_node
, t
);
358 if (count
!= NULL_TREE
)
359 count
= fold_build2_loc (loc
,
360 MULT_EXPR
, long_long_unsigned_type_node
,
364 if (TREE_CODE (count
) != INTEGER_CST
)
367 else if (count
&& !integer_zerop (count
))
374 && (fd
->sched_kind
!= OMP_CLAUSE_SCHEDULE_STATIC
375 || fd
->have_ordered
))
377 if (!tree_int_cst_lt (count
, TYPE_MAX_VALUE (long_integer_type_node
)))
378 iter_type
= long_long_unsigned_type_node
;
380 iter_type
= long_integer_type_node
;
382 else if (collapse_iter
&& *collapse_iter
!= NULL
)
383 iter_type
= TREE_TYPE (*collapse_iter
);
384 fd
->iter_type
= iter_type
;
385 if (collapse_iter
&& *collapse_iter
== NULL
)
386 *collapse_iter
= create_tmp_var (iter_type
, ".iter");
387 if (collapse_count
&& *collapse_count
== NULL
)
390 *collapse_count
= fold_convert_loc (loc
, iter_type
, count
);
392 *collapse_count
= create_tmp_var (iter_type
, ".count");
395 if (fd
->collapse
> 1 || fd
->tiling
|| (fd
->ordered
&& loops
))
397 fd
->loop
.v
= *collapse_iter
;
398 fd
->loop
.n1
= build_int_cst (TREE_TYPE (fd
->loop
.v
), 0);
399 fd
->loop
.n2
= *collapse_count
;
400 fd
->loop
.step
= build_int_cst (TREE_TYPE (fd
->loop
.v
), 1);
401 fd
->loop
.cond_code
= LT_EXPR
;
407 /* Build a call to GOMP_barrier. */
410 omp_build_barrier (tree lhs
)
412 tree fndecl
= builtin_decl_explicit (lhs
? BUILT_IN_GOMP_BARRIER_CANCEL
413 : BUILT_IN_GOMP_BARRIER
);
414 gcall
*g
= gimple_build_call (fndecl
, 0);
416 gimple_call_set_lhs (g
, lhs
);
420 /* Return maximum possible vectorization factor for the target. */
427 || !flag_tree_loop_optimize
428 || (!flag_tree_loop_vectorize
429 && global_options_set
.x_flag_tree_loop_vectorize
))
433 int vs
= targetm
.vectorize
.autovectorize_vector_sizes ();
435 vf
= 1 << floor_log2 (vs
);
438 machine_mode vqimode
= targetm
.vectorize
.preferred_simd_mode (QImode
);
439 if (GET_MODE_CLASS (vqimode
) == MODE_VECTOR_INT
)
440 vf
= GET_MODE_NUNITS (vqimode
);
445 /* Return maximum SIMT width if offloading may target SIMT hardware. */
448 omp_max_simt_vf (void)
452 if (ENABLE_OFFLOADING
)
453 for (const char *c
= getenv ("OFFLOAD_TARGET_NAMES"); c
;)
455 if (!strncmp (c
, "nvptx", strlen ("nvptx")))
457 else if ((c
= strchr (c
, ',')))
463 /* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK
464 macro on gomp-constants.h. We do not check for overflow. */
467 oacc_launch_pack (unsigned code
, tree device
, unsigned op
)
471 res
= build_int_cst (unsigned_type_node
, GOMP_LAUNCH_PACK (code
, 0, op
));
474 device
= fold_build2 (LSHIFT_EXPR
, unsigned_type_node
,
475 device
, build_int_cst (unsigned_type_node
,
476 GOMP_LAUNCH_DEVICE_SHIFT
));
477 res
= fold_build2 (BIT_IOR_EXPR
, unsigned_type_node
, res
, device
);
482 /* FIXME: What is the following comment for? */
483 /* Look for compute grid dimension clauses and convert to an attribute
484 attached to FN. This permits the target-side code to (a) massage
485 the dimensions, (b) emit that data and (c) optimize. Non-constant
486 dimensions are pushed onto ARGS.
488 The attribute value is a TREE_LIST. A set of dimensions is
489 represented as a list of INTEGER_CST. Those that are runtime
490 exprs are represented as an INTEGER_CST of zero.
492 TODO: Normally the attribute will just contain a single such list. If
493 however it contains a list of lists, this will represent the use of
494 device_type. Each member of the outer list is an assoc list of
495 dimensions, keyed by the device type. The first entry will be the
496 default. Well, that's the plan. */
498 /* Replace any existing oacc fn attribute with updated dimensions. */
501 oacc_replace_fn_attrib (tree fn
, tree dims
)
503 tree ident
= get_identifier (OACC_FN_ATTRIB
);
504 tree attribs
= DECL_ATTRIBUTES (fn
);
506 /* If we happen to be present as the first attrib, drop it. */
507 if (attribs
&& TREE_PURPOSE (attribs
) == ident
)
508 attribs
= TREE_CHAIN (attribs
);
509 DECL_ATTRIBUTES (fn
) = tree_cons (ident
, dims
, attribs
);
512 /* Scan CLAUSES for launch dimensions and attach them to the oacc
513 function attribute. Push any that are non-constant onto the ARGS
514 list, along with an appropriate GOMP_LAUNCH_DIM tag. */
517 oacc_set_fn_attrib (tree fn
, tree clauses
, vec
<tree
> *args
)
519 /* Must match GOMP_DIM ordering. */
520 static const omp_clause_code ids
[]
521 = { OMP_CLAUSE_NUM_GANGS
, OMP_CLAUSE_NUM_WORKERS
,
522 OMP_CLAUSE_VECTOR_LENGTH
};
524 tree dims
[GOMP_DIM_MAX
];
526 tree attr
= NULL_TREE
;
527 unsigned non_const
= 0;
529 for (ix
= GOMP_DIM_MAX
; ix
--;)
531 tree clause
= omp_find_clause (clauses
, ids
[ix
]);
532 tree dim
= NULL_TREE
;
535 dim
= OMP_CLAUSE_EXPR (clause
, ids
[ix
]);
537 if (dim
&& TREE_CODE (dim
) != INTEGER_CST
)
539 dim
= integer_zero_node
;
540 non_const
|= GOMP_DIM_MASK (ix
);
542 attr
= tree_cons (NULL_TREE
, dim
, attr
);
545 oacc_replace_fn_attrib (fn
, attr
);
549 /* Push a dynamic argument set. */
550 args
->safe_push (oacc_launch_pack (GOMP_LAUNCH_DIM
,
551 NULL_TREE
, non_const
));
552 for (unsigned ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++)
553 if (non_const
& GOMP_DIM_MASK (ix
))
554 args
->safe_push (dims
[ix
]);
558 /* Process the routine's dimension clauess to generate an attribute
559 value. Issue diagnostics as appropriate. We default to SEQ
560 (OpenACC 2.5 clarifies this). All dimensions have a size of zero
561 (dynamic). TREE_PURPOSE is set to indicate whether that dimension
562 can have a loop partitioned on it. non-zero indicates
563 yes, zero indicates no. By construction once a non-zero has been
564 reached, further inner dimensions must also be non-zero. We set
565 TREE_VALUE to zero for the dimensions that may be partitioned and
566 1 for the other ones -- if a loop is (erroneously) spawned at
567 an outer level, we don't want to try and partition it. */
570 oacc_build_routine_dims (tree clauses
)
572 /* Must match GOMP_DIM ordering. */
573 static const omp_clause_code ids
[]
574 = {OMP_CLAUSE_GANG
, OMP_CLAUSE_WORKER
, OMP_CLAUSE_VECTOR
, OMP_CLAUSE_SEQ
};
578 for (; clauses
; clauses
= OMP_CLAUSE_CHAIN (clauses
))
579 for (ix
= GOMP_DIM_MAX
+ 1; ix
--;)
580 if (OMP_CLAUSE_CODE (clauses
) == ids
[ix
])
583 error_at (OMP_CLAUSE_LOCATION (clauses
),
584 "multiple loop axes specified for routine");
589 /* Default to SEQ. */
591 level
= GOMP_DIM_MAX
;
593 tree dims
= NULL_TREE
;
595 for (ix
= GOMP_DIM_MAX
; ix
--;)
596 dims
= tree_cons (build_int_cst (boolean_type_node
, ix
>= level
),
597 build_int_cst (integer_type_node
, ix
< level
), dims
);
602 /* Retrieve the oacc function attrib and return it. Non-oacc
603 functions will return NULL. */
606 oacc_get_fn_attrib (tree fn
)
608 return lookup_attribute (OACC_FN_ATTRIB
, DECL_ATTRIBUTES (fn
));
611 /* Extract an oacc execution dimension from FN. FN must be an
612 offloaded function or routine that has already had its execution
613 dimensions lowered to the target-specific values. */
616 oacc_get_fn_dim_size (tree fn
, int axis
)
618 tree attrs
= oacc_get_fn_attrib (fn
);
620 gcc_assert (axis
< GOMP_DIM_MAX
);
622 tree dims
= TREE_VALUE (attrs
);
624 dims
= TREE_CHAIN (dims
);
626 int size
= TREE_INT_CST_LOW (TREE_VALUE (dims
));
631 /* Extract the dimension axis from an IFN_GOACC_DIM_POS or
632 IFN_GOACC_DIM_SIZE call. */
635 oacc_get_ifn_dim_arg (const gimple
*stmt
)
637 gcc_checking_assert (gimple_call_internal_fn (stmt
) == IFN_GOACC_DIM_SIZE
638 || gimple_call_internal_fn (stmt
) == IFN_GOACC_DIM_POS
);
639 tree arg
= gimple_call_arg (stmt
, 0);
640 HOST_WIDE_INT axis
= TREE_INT_CST_LOW (arg
);
642 gcc_checking_assert (axis
>= 0 && axis
< GOMP_DIM_MAX
);