1 /* General types and functions that are uselful for processing of OpenMP,
2 OpenACC and similar directivers at various stages of compilation.
4 Copyright (C) 2005-2017 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Find an OMP clause of type KIND within CLAUSES. */
26 #include "coretypes.h"
32 #include "diagnostic-core.h"
33 #include "fold-const.h"
34 #include "langhooks.h"
35 #include "omp-general.h"
39 omp_find_clause (tree clauses
, enum omp_clause_code kind
)
41 for (; clauses
; clauses
= OMP_CLAUSE_CHAIN (clauses
))
42 if (OMP_CLAUSE_CODE (clauses
) == kind
)
48 /* Return true if DECL is a reference type. */
51 omp_is_reference (tree decl
)
53 return lang_hooks
.decls
.omp_privatize_by_reference (decl
);
56 /* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or
60 omp_adjust_for_condition (location_t loc
, enum tree_code
*cond_code
, tree
*n2
)
69 if (POINTER_TYPE_P (TREE_TYPE (*n2
)))
70 *n2
= fold_build_pointer_plus_hwi_loc (loc
, *n2
, 1);
72 *n2
= fold_build2_loc (loc
, PLUS_EXPR
, TREE_TYPE (*n2
), *n2
,
73 build_int_cst (TREE_TYPE (*n2
), 1));
77 if (POINTER_TYPE_P (TREE_TYPE (*n2
)))
78 *n2
= fold_build_pointer_plus_hwi_loc (loc
, *n2
, -1);
80 *n2
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (*n2
), *n2
,
81 build_int_cst (TREE_TYPE (*n2
), 1));
89 /* Return the looping step from INCR, extracted from the step of a gimple omp
93 omp_get_for_step_from_incr (location_t loc
, tree incr
)
96 switch (TREE_CODE (incr
))
99 step
= TREE_OPERAND (incr
, 1);
101 case POINTER_PLUS_EXPR
:
102 step
= fold_convert (ssizetype
, TREE_OPERAND (incr
, 1));
105 step
= TREE_OPERAND (incr
, 1);
106 step
= fold_build1_loc (loc
, NEGATE_EXPR
, TREE_TYPE (step
), step
);
114 /* Extract the header elements of parallel loop FOR_STMT and store
118 omp_extract_for_data (gomp_for
*for_stmt
, struct omp_for_data
*fd
,
119 struct omp_for_data_loop
*loops
)
121 tree t
, var
, *collapse_iter
, *collapse_count
;
122 tree count
= NULL_TREE
, iter_type
= long_integer_type_node
;
123 struct omp_for_data_loop
*loop
;
125 struct omp_for_data_loop dummy_loop
;
126 location_t loc
= gimple_location (for_stmt
);
127 bool simd
= gimple_omp_for_kind (for_stmt
) & GF_OMP_FOR_SIMD
;
128 bool distribute
= gimple_omp_for_kind (for_stmt
)
129 == GF_OMP_FOR_KIND_DISTRIBUTE
;
130 bool taskloop
= gimple_omp_for_kind (for_stmt
)
131 == GF_OMP_FOR_KIND_TASKLOOP
;
134 fd
->for_stmt
= for_stmt
;
136 fd
->have_nowait
= distribute
|| simd
;
137 fd
->have_ordered
= false;
138 fd
->tiling
= NULL_TREE
;
141 fd
->sched_kind
= OMP_CLAUSE_SCHEDULE_STATIC
;
142 fd
->sched_modifiers
= 0;
143 fd
->chunk_size
= NULL_TREE
;
144 fd
->simd_schedule
= false;
145 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_CILKFOR
)
146 fd
->sched_kind
= OMP_CLAUSE_SCHEDULE_CILKFOR
;
147 collapse_iter
= NULL
;
148 collapse_count
= NULL
;
150 for (t
= gimple_omp_for_clauses (for_stmt
); t
; t
= OMP_CLAUSE_CHAIN (t
))
151 switch (OMP_CLAUSE_CODE (t
))
153 case OMP_CLAUSE_NOWAIT
:
154 fd
->have_nowait
= true;
156 case OMP_CLAUSE_ORDERED
:
157 fd
->have_ordered
= true;
158 if (OMP_CLAUSE_ORDERED_EXPR (t
))
159 fd
->ordered
= tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t
));
161 case OMP_CLAUSE_SCHEDULE
:
162 gcc_assert (!distribute
&& !taskloop
);
164 = (enum omp_clause_schedule_kind
)
165 (OMP_CLAUSE_SCHEDULE_KIND (t
) & OMP_CLAUSE_SCHEDULE_MASK
);
166 fd
->sched_modifiers
= (OMP_CLAUSE_SCHEDULE_KIND (t
)
167 & ~OMP_CLAUSE_SCHEDULE_MASK
);
168 fd
->chunk_size
= OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t
);
169 fd
->simd_schedule
= OMP_CLAUSE_SCHEDULE_SIMD (t
);
171 case OMP_CLAUSE_DIST_SCHEDULE
:
172 gcc_assert (distribute
);
173 fd
->chunk_size
= OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t
);
175 case OMP_CLAUSE_COLLAPSE
:
176 fd
->collapse
= tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t
));
177 if (fd
->collapse
> 1)
179 collapse_iter
= &OMP_CLAUSE_COLLAPSE_ITERVAR (t
);
180 collapse_count
= &OMP_CLAUSE_COLLAPSE_COUNT (t
);
183 case OMP_CLAUSE_TILE
:
184 fd
->tiling
= OMP_CLAUSE_TILE_LIST (t
);
185 fd
->collapse
= list_length (fd
->tiling
);
186 gcc_assert (fd
->collapse
);
187 collapse_iter
= &OMP_CLAUSE_TILE_ITERVAR (t
);
188 collapse_count
= &OMP_CLAUSE_TILE_COUNT (t
);
194 if (fd
->collapse
> 1 || fd
->tiling
)
197 fd
->loops
= &fd
->loop
;
199 if (fd
->ordered
&& fd
->collapse
== 1 && loops
!= NULL
)
204 collapse_iter
= &iterv
;
205 collapse_count
= &countv
;
208 /* FIXME: for now map schedule(auto) to schedule(static).
209 There should be analysis to determine whether all iterations
210 are approximately the same amount of work (then schedule(static)
211 is best) or if it varies (then schedule(dynamic,N) is better). */
212 if (fd
->sched_kind
== OMP_CLAUSE_SCHEDULE_AUTO
)
214 fd
->sched_kind
= OMP_CLAUSE_SCHEDULE_STATIC
;
215 gcc_assert (fd
->chunk_size
== NULL
);
217 gcc_assert ((fd
->collapse
== 1 && !fd
->tiling
) || collapse_iter
!= NULL
);
219 fd
->sched_kind
= OMP_CLAUSE_SCHEDULE_RUNTIME
;
220 if (fd
->sched_kind
== OMP_CLAUSE_SCHEDULE_RUNTIME
)
221 gcc_assert (fd
->chunk_size
== NULL
);
222 else if (fd
->chunk_size
== NULL
)
224 /* We only need to compute a default chunk size for ordered
225 static loops and dynamic loops. */
226 if (fd
->sched_kind
!= OMP_CLAUSE_SCHEDULE_STATIC
228 fd
->chunk_size
= (fd
->sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
)
229 ? integer_zero_node
: integer_one_node
;
232 int cnt
= fd
->ordered
? fd
->ordered
: fd
->collapse
;
233 for (i
= 0; i
< cnt
; i
++)
238 && (fd
->ordered
== 0 || loops
== NULL
))
240 else if (loops
!= NULL
)
245 loop
->v
= gimple_omp_for_index (for_stmt
, i
);
246 gcc_assert (SSA_VAR_P (loop
->v
));
247 gcc_assert (TREE_CODE (TREE_TYPE (loop
->v
)) == INTEGER_TYPE
248 || TREE_CODE (TREE_TYPE (loop
->v
)) == POINTER_TYPE
);
249 var
= TREE_CODE (loop
->v
) == SSA_NAME
? SSA_NAME_VAR (loop
->v
) : loop
->v
;
250 loop
->n1
= gimple_omp_for_initial (for_stmt
, i
);
252 loop
->cond_code
= gimple_omp_for_cond (for_stmt
, i
);
253 loop
->n2
= gimple_omp_for_final (for_stmt
, i
);
254 gcc_assert (loop
->cond_code
!= NE_EXPR
255 || gimple_omp_for_kind (for_stmt
) == GF_OMP_FOR_KIND_CILKSIMD
256 || gimple_omp_for_kind (for_stmt
) == GF_OMP_FOR_KIND_CILKFOR
);
257 omp_adjust_for_condition (loc
, &loop
->cond_code
, &loop
->n2
);
259 t
= gimple_omp_for_incr (for_stmt
, i
);
260 gcc_assert (TREE_OPERAND (t
, 0) == var
);
261 loop
->step
= omp_get_for_step_from_incr (loc
, t
);
264 || (fd
->sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
265 && !fd
->have_ordered
))
267 if (fd
->collapse
== 1 && !fd
->tiling
)
268 iter_type
= TREE_TYPE (loop
->v
);
270 || TYPE_PRECISION (iter_type
)
271 < TYPE_PRECISION (TREE_TYPE (loop
->v
)))
273 = build_nonstandard_integer_type
274 (TYPE_PRECISION (TREE_TYPE (loop
->v
)), 1);
276 else if (iter_type
!= long_long_unsigned_type_node
)
278 if (POINTER_TYPE_P (TREE_TYPE (loop
->v
)))
279 iter_type
= long_long_unsigned_type_node
;
280 else if (TYPE_UNSIGNED (TREE_TYPE (loop
->v
))
281 && TYPE_PRECISION (TREE_TYPE (loop
->v
))
282 >= TYPE_PRECISION (iter_type
))
286 if (loop
->cond_code
== LT_EXPR
)
287 n
= fold_build2_loc (loc
,
288 PLUS_EXPR
, TREE_TYPE (loop
->v
),
289 loop
->n2
, loop
->step
);
292 if (TREE_CODE (n
) != INTEGER_CST
293 || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type
), n
))
294 iter_type
= long_long_unsigned_type_node
;
296 else if (TYPE_PRECISION (TREE_TYPE (loop
->v
))
297 > TYPE_PRECISION (iter_type
))
301 if (loop
->cond_code
== LT_EXPR
)
304 n2
= fold_build2_loc (loc
,
305 PLUS_EXPR
, TREE_TYPE (loop
->v
),
306 loop
->n2
, loop
->step
);
310 n1
= fold_build2_loc (loc
,
311 MINUS_EXPR
, TREE_TYPE (loop
->v
),
312 loop
->n2
, loop
->step
);
315 if (TREE_CODE (n1
) != INTEGER_CST
316 || TREE_CODE (n2
) != INTEGER_CST
317 || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type
), n1
)
318 || !tree_int_cst_lt (n2
, TYPE_MAX_VALUE (iter_type
)))
319 iter_type
= long_long_unsigned_type_node
;
323 if (i
>= fd
->collapse
)
326 if (collapse_count
&& *collapse_count
== NULL
)
328 t
= fold_binary (loop
->cond_code
, boolean_type_node
,
329 fold_convert (TREE_TYPE (loop
->v
), loop
->n1
),
330 fold_convert (TREE_TYPE (loop
->v
), loop
->n2
));
331 if (t
&& integer_zerop (t
))
332 count
= build_zero_cst (long_long_unsigned_type_node
);
333 else if ((i
== 0 || count
!= NULL_TREE
)
334 && TREE_CODE (TREE_TYPE (loop
->v
)) == INTEGER_TYPE
335 && TREE_CONSTANT (loop
->n1
)
336 && TREE_CONSTANT (loop
->n2
)
337 && TREE_CODE (loop
->step
) == INTEGER_CST
)
339 tree itype
= TREE_TYPE (loop
->v
);
341 if (POINTER_TYPE_P (itype
))
342 itype
= signed_type_for (itype
);
343 t
= build_int_cst (itype
, (loop
->cond_code
== LT_EXPR
? -1 : 1));
344 t
= fold_build2_loc (loc
,
346 fold_convert_loc (loc
, itype
, loop
->step
), t
);
347 t
= fold_build2_loc (loc
, PLUS_EXPR
, itype
, t
,
348 fold_convert_loc (loc
, itype
, loop
->n2
));
349 t
= fold_build2_loc (loc
, MINUS_EXPR
, itype
, t
,
350 fold_convert_loc (loc
, itype
, loop
->n1
));
351 if (TYPE_UNSIGNED (itype
) && loop
->cond_code
== GT_EXPR
)
352 t
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
,
353 fold_build1_loc (loc
, NEGATE_EXPR
, itype
, t
),
354 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
355 fold_convert_loc (loc
, itype
,
358 t
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, t
,
359 fold_convert_loc (loc
, itype
, loop
->step
));
360 t
= fold_convert_loc (loc
, long_long_unsigned_type_node
, t
);
361 if (count
!= NULL_TREE
)
362 count
= fold_build2_loc (loc
,
363 MULT_EXPR
, long_long_unsigned_type_node
,
367 if (TREE_CODE (count
) != INTEGER_CST
)
370 else if (count
&& !integer_zerop (count
))
377 && (fd
->sched_kind
!= OMP_CLAUSE_SCHEDULE_STATIC
378 || fd
->have_ordered
))
380 if (!tree_int_cst_lt (count
, TYPE_MAX_VALUE (long_integer_type_node
)))
381 iter_type
= long_long_unsigned_type_node
;
383 iter_type
= long_integer_type_node
;
385 else if (collapse_iter
&& *collapse_iter
!= NULL
)
386 iter_type
= TREE_TYPE (*collapse_iter
);
387 fd
->iter_type
= iter_type
;
388 if (collapse_iter
&& *collapse_iter
== NULL
)
389 *collapse_iter
= create_tmp_var (iter_type
, ".iter");
390 if (collapse_count
&& *collapse_count
== NULL
)
393 *collapse_count
= fold_convert_loc (loc
, iter_type
, count
);
395 *collapse_count
= create_tmp_var (iter_type
, ".count");
398 if (fd
->collapse
> 1 || fd
->tiling
|| (fd
->ordered
&& loops
))
400 fd
->loop
.v
= *collapse_iter
;
401 fd
->loop
.n1
= build_int_cst (TREE_TYPE (fd
->loop
.v
), 0);
402 fd
->loop
.n2
= *collapse_count
;
403 fd
->loop
.step
= build_int_cst (TREE_TYPE (fd
->loop
.v
), 1);
404 fd
->loop
.cond_code
= LT_EXPR
;
410 /* Build a call to GOMP_barrier. */
413 omp_build_barrier (tree lhs
)
415 tree fndecl
= builtin_decl_explicit (lhs
? BUILT_IN_GOMP_BARRIER_CANCEL
416 : BUILT_IN_GOMP_BARRIER
);
417 gcall
*g
= gimple_build_call (fndecl
, 0);
419 gimple_call_set_lhs (g
, lhs
);
423 /* Return maximum possible vectorization factor for the target. */
430 || !flag_tree_loop_optimize
431 || (!flag_tree_loop_vectorize
432 && (global_options_set
.x_flag_tree_loop_vectorize
433 || global_options_set
.x_flag_tree_vectorize
)))
437 int vs
= targetm
.vectorize
.autovectorize_vector_sizes ();
439 vf
= 1 << floor_log2 (vs
);
442 machine_mode vqimode
= targetm
.vectorize
.preferred_simd_mode (QImode
);
443 if (GET_MODE_CLASS (vqimode
) == MODE_VECTOR_INT
)
444 vf
= GET_MODE_NUNITS (vqimode
);
449 /* Return maximum SIMT width if offloading may target SIMT hardware. */
452 omp_max_simt_vf (void)
456 if (ENABLE_OFFLOADING
)
457 for (const char *c
= getenv ("OFFLOAD_TARGET_NAMES"); c
;)
459 if (!strncmp (c
, "nvptx", strlen ("nvptx")))
461 else if ((c
= strchr (c
, ',')))
467 /* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK
468 macro on gomp-constants.h. We do not check for overflow. */
471 oacc_launch_pack (unsigned code
, tree device
, unsigned op
)
475 res
= build_int_cst (unsigned_type_node
, GOMP_LAUNCH_PACK (code
, 0, op
));
478 device
= fold_build2 (LSHIFT_EXPR
, unsigned_type_node
,
479 device
, build_int_cst (unsigned_type_node
,
480 GOMP_LAUNCH_DEVICE_SHIFT
));
481 res
= fold_build2 (BIT_IOR_EXPR
, unsigned_type_node
, res
, device
);
486 /* FIXME: What is the following comment for? */
487 /* Look for compute grid dimension clauses and convert to an attribute
488 attached to FN. This permits the target-side code to (a) massage
489 the dimensions, (b) emit that data and (c) optimize. Non-constant
490 dimensions are pushed onto ARGS.
492 The attribute value is a TREE_LIST. A set of dimensions is
493 represented as a list of INTEGER_CST. Those that are runtime
494 exprs are represented as an INTEGER_CST of zero.
496 TODO: Normally the attribute will just contain a single such list. If
497 however it contains a list of lists, this will represent the use of
498 device_type. Each member of the outer list is an assoc list of
499 dimensions, keyed by the device type. The first entry will be the
500 default. Well, that's the plan. */
502 /* Replace any existing oacc fn attribute with updated dimensions. */
505 oacc_replace_fn_attrib (tree fn
, tree dims
)
507 tree ident
= get_identifier (OACC_FN_ATTRIB
);
508 tree attribs
= DECL_ATTRIBUTES (fn
);
510 /* If we happen to be present as the first attrib, drop it. */
511 if (attribs
&& TREE_PURPOSE (attribs
) == ident
)
512 attribs
= TREE_CHAIN (attribs
);
513 DECL_ATTRIBUTES (fn
) = tree_cons (ident
, dims
, attribs
);
516 /* Scan CLAUSES for launch dimensions and attach them to the oacc
517 function attribute. Push any that are non-constant onto the ARGS
518 list, along with an appropriate GOMP_LAUNCH_DIM tag. IS_KERNEL is
519 true, if these are for a kernels region offload function. */
522 oacc_set_fn_attrib (tree fn
, tree clauses
, bool is_kernel
, vec
<tree
> *args
)
524 /* Must match GOMP_DIM ordering. */
525 static const omp_clause_code ids
[]
526 = { OMP_CLAUSE_NUM_GANGS
, OMP_CLAUSE_NUM_WORKERS
,
527 OMP_CLAUSE_VECTOR_LENGTH
};
529 tree dims
[GOMP_DIM_MAX
];
531 tree attr
= NULL_TREE
;
532 unsigned non_const
= 0;
534 for (ix
= GOMP_DIM_MAX
; ix
--;)
536 tree clause
= omp_find_clause (clauses
, ids
[ix
]);
537 tree dim
= NULL_TREE
;
540 dim
= OMP_CLAUSE_EXPR (clause
, ids
[ix
]);
542 if (dim
&& TREE_CODE (dim
) != INTEGER_CST
)
544 dim
= integer_zero_node
;
545 non_const
|= GOMP_DIM_MASK (ix
);
547 attr
= tree_cons (NULL_TREE
, dim
, attr
);
548 /* Note kernelness with TREE_PUBLIC. */
550 TREE_PUBLIC (attr
) = 1;
553 oacc_replace_fn_attrib (fn
, attr
);
557 /* Push a dynamic argument set. */
558 args
->safe_push (oacc_launch_pack (GOMP_LAUNCH_DIM
,
559 NULL_TREE
, non_const
));
560 for (unsigned ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++)
561 if (non_const
& GOMP_DIM_MASK (ix
))
562 args
->safe_push (dims
[ix
]);
566 /* Process the routine's dimension clauess to generate an attribute
567 value. Issue diagnostics as appropriate. We default to SEQ
568 (OpenACC 2.5 clarifies this). All dimensions have a size of zero
569 (dynamic). TREE_PURPOSE is set to indicate whether that dimension
570 can have a loop partitioned on it. non-zero indicates
571 yes, zero indicates no. By construction once a non-zero has been
572 reached, further inner dimensions must also be non-zero. We set
573 TREE_VALUE to zero for the dimensions that may be partitioned and
574 1 for the other ones -- if a loop is (erroneously) spawned at
575 an outer level, we don't want to try and partition it. */
578 oacc_build_routine_dims (tree clauses
)
580 /* Must match GOMP_DIM ordering. */
581 static const omp_clause_code ids
[]
582 = {OMP_CLAUSE_GANG
, OMP_CLAUSE_WORKER
, OMP_CLAUSE_VECTOR
, OMP_CLAUSE_SEQ
};
586 for (; clauses
; clauses
= OMP_CLAUSE_CHAIN (clauses
))
587 for (ix
= GOMP_DIM_MAX
+ 1; ix
--;)
588 if (OMP_CLAUSE_CODE (clauses
) == ids
[ix
])
591 error_at (OMP_CLAUSE_LOCATION (clauses
),
592 "multiple loop axes specified for routine");
597 /* Default to SEQ. */
599 level
= GOMP_DIM_MAX
;
601 tree dims
= NULL_TREE
;
603 for (ix
= GOMP_DIM_MAX
; ix
--;)
604 dims
= tree_cons (build_int_cst (boolean_type_node
, ix
>= level
),
605 build_int_cst (integer_type_node
, ix
< level
), dims
);
610 /* Retrieve the oacc function attrib and return it. Non-oacc
611 functions will return NULL. */
614 oacc_get_fn_attrib (tree fn
)
616 return lookup_attribute (OACC_FN_ATTRIB
, DECL_ATTRIBUTES (fn
));
619 /* Return true if this oacc fn attrib is for a kernels offload
620 region. We use the TREE_PUBLIC flag of each dimension -- only
621 need to check the first one. */
624 oacc_fn_attrib_kernels_p (tree attr
)
626 return TREE_PUBLIC (TREE_VALUE (attr
));
629 /* Extract an oacc execution dimension from FN. FN must be an
630 offloaded function or routine that has already had its execution
631 dimensions lowered to the target-specific values. */
634 oacc_get_fn_dim_size (tree fn
, int axis
)
636 tree attrs
= oacc_get_fn_attrib (fn
);
638 gcc_assert (axis
< GOMP_DIM_MAX
);
640 tree dims
= TREE_VALUE (attrs
);
642 dims
= TREE_CHAIN (dims
);
644 int size
= TREE_INT_CST_LOW (TREE_VALUE (dims
));
649 /* Extract the dimension axis from an IFN_GOACC_DIM_POS or
650 IFN_GOACC_DIM_SIZE call. */
653 oacc_get_ifn_dim_arg (const gimple
*stmt
)
655 gcc_checking_assert (gimple_call_internal_fn (stmt
) == IFN_GOACC_DIM_SIZE
656 || gimple_call_internal_fn (stmt
) == IFN_GOACC_DIM_POS
);
657 tree arg
= gimple_call_arg (stmt
, 0);
658 HOST_WIDE_INT axis
= TREE_INT_CST_LOW (arg
);
660 gcc_checking_assert (axis
>= 0 && axis
< GOMP_DIM_MAX
);