1 /* General types and functions that are uselful for processing of OpenMP,
2 OpenACC and similar directivers at various stages of compilation.
4 Copyright (C) 2005-2017 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Find an OMP clause of type KIND within CLAUSES. */
26 #include "coretypes.h"
32 #include "diagnostic-core.h"
33 #include "fold-const.h"
34 #include "langhooks.h"
35 #include "omp-general.h"
39 omp_find_clause (tree clauses
, enum omp_clause_code kind
)
41 for (; clauses
; clauses
= OMP_CLAUSE_CHAIN (clauses
))
42 if (OMP_CLAUSE_CODE (clauses
) == kind
)
48 /* Return true if DECL is a reference type. */
51 omp_is_reference (tree decl
)
53 return lang_hooks
.decls
.omp_privatize_by_reference (decl
);
56 /* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or
60 omp_adjust_for_condition (location_t loc
, enum tree_code
*cond_code
, tree
*n2
)
69 if (POINTER_TYPE_P (TREE_TYPE (*n2
)))
70 *n2
= fold_build_pointer_plus_hwi_loc (loc
, *n2
, 1);
72 *n2
= fold_build2_loc (loc
, PLUS_EXPR
, TREE_TYPE (*n2
), *n2
,
73 build_int_cst (TREE_TYPE (*n2
), 1));
77 if (POINTER_TYPE_P (TREE_TYPE (*n2
)))
78 *n2
= fold_build_pointer_plus_hwi_loc (loc
, *n2
, -1);
80 *n2
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (*n2
), *n2
,
81 build_int_cst (TREE_TYPE (*n2
), 1));
89 /* Return the looping step from INCR, extracted from the step of a gimple omp
93 omp_get_for_step_from_incr (location_t loc
, tree incr
)
96 switch (TREE_CODE (incr
))
99 step
= TREE_OPERAND (incr
, 1);
101 case POINTER_PLUS_EXPR
:
102 step
= fold_convert (ssizetype
, TREE_OPERAND (incr
, 1));
105 step
= TREE_OPERAND (incr
, 1);
106 step
= fold_build1_loc (loc
, NEGATE_EXPR
, TREE_TYPE (step
), step
);
114 /* Extract the header elements of parallel loop FOR_STMT and store
118 omp_extract_for_data (gomp_for
*for_stmt
, struct omp_for_data
*fd
,
119 struct omp_for_data_loop
*loops
)
121 tree t
, var
, *collapse_iter
, *collapse_count
;
122 tree count
= NULL_TREE
, iter_type
= long_integer_type_node
;
123 struct omp_for_data_loop
*loop
;
125 struct omp_for_data_loop dummy_loop
;
126 location_t loc
= gimple_location (for_stmt
);
127 bool simd
= gimple_omp_for_kind (for_stmt
) & GF_OMP_FOR_SIMD
;
128 bool distribute
= gimple_omp_for_kind (for_stmt
)
129 == GF_OMP_FOR_KIND_DISTRIBUTE
;
130 bool taskloop
= gimple_omp_for_kind (for_stmt
)
131 == GF_OMP_FOR_KIND_TASKLOOP
;
134 fd
->for_stmt
= for_stmt
;
136 if (gimple_omp_for_collapse (for_stmt
) > 1)
139 fd
->loops
= &fd
->loop
;
141 fd
->have_nowait
= distribute
|| simd
;
142 fd
->have_ordered
= false;
145 fd
->sched_kind
= OMP_CLAUSE_SCHEDULE_STATIC
;
146 fd
->sched_modifiers
= 0;
147 fd
->chunk_size
= NULL_TREE
;
148 fd
->simd_schedule
= false;
149 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_CILKFOR
)
150 fd
->sched_kind
= OMP_CLAUSE_SCHEDULE_CILKFOR
;
151 collapse_iter
= NULL
;
152 collapse_count
= NULL
;
154 for (t
= gimple_omp_for_clauses (for_stmt
); t
; t
= OMP_CLAUSE_CHAIN (t
))
155 switch (OMP_CLAUSE_CODE (t
))
157 case OMP_CLAUSE_NOWAIT
:
158 fd
->have_nowait
= true;
160 case OMP_CLAUSE_ORDERED
:
161 fd
->have_ordered
= true;
162 if (OMP_CLAUSE_ORDERED_EXPR (t
))
163 fd
->ordered
= tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t
));
165 case OMP_CLAUSE_SCHEDULE
:
166 gcc_assert (!distribute
&& !taskloop
);
168 = (enum omp_clause_schedule_kind
)
169 (OMP_CLAUSE_SCHEDULE_KIND (t
) & OMP_CLAUSE_SCHEDULE_MASK
);
170 fd
->sched_modifiers
= (OMP_CLAUSE_SCHEDULE_KIND (t
)
171 & ~OMP_CLAUSE_SCHEDULE_MASK
);
172 fd
->chunk_size
= OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t
);
173 fd
->simd_schedule
= OMP_CLAUSE_SCHEDULE_SIMD (t
);
175 case OMP_CLAUSE_DIST_SCHEDULE
:
176 gcc_assert (distribute
);
177 fd
->chunk_size
= OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t
);
179 case OMP_CLAUSE_COLLAPSE
:
180 fd
->collapse
= tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t
));
181 if (fd
->collapse
> 1)
183 collapse_iter
= &OMP_CLAUSE_COLLAPSE_ITERVAR (t
);
184 collapse_count
= &OMP_CLAUSE_COLLAPSE_COUNT (t
);
190 if (fd
->ordered
&& fd
->collapse
== 1 && loops
!= NULL
)
195 collapse_iter
= &iterv
;
196 collapse_count
= &countv
;
199 /* FIXME: for now map schedule(auto) to schedule(static).
200 There should be analysis to determine whether all iterations
201 are approximately the same amount of work (then schedule(static)
202 is best) or if it varies (then schedule(dynamic,N) is better). */
203 if (fd
->sched_kind
== OMP_CLAUSE_SCHEDULE_AUTO
)
205 fd
->sched_kind
= OMP_CLAUSE_SCHEDULE_STATIC
;
206 gcc_assert (fd
->chunk_size
== NULL
);
208 gcc_assert (fd
->collapse
== 1 || collapse_iter
!= NULL
);
210 fd
->sched_kind
= OMP_CLAUSE_SCHEDULE_RUNTIME
;
211 if (fd
->sched_kind
== OMP_CLAUSE_SCHEDULE_RUNTIME
)
212 gcc_assert (fd
->chunk_size
== NULL
);
213 else if (fd
->chunk_size
== NULL
)
215 /* We only need to compute a default chunk size for ordered
216 static loops and dynamic loops. */
217 if (fd
->sched_kind
!= OMP_CLAUSE_SCHEDULE_STATIC
219 fd
->chunk_size
= (fd
->sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
)
220 ? integer_zero_node
: integer_one_node
;
223 int cnt
= fd
->ordered
? fd
->ordered
: fd
->collapse
;
224 for (i
= 0; i
< cnt
; i
++)
226 if (i
== 0 && fd
->collapse
== 1 && (fd
->ordered
== 0 || loops
== NULL
))
228 else if (loops
!= NULL
)
233 loop
->v
= gimple_omp_for_index (for_stmt
, i
);
234 gcc_assert (SSA_VAR_P (loop
->v
));
235 gcc_assert (TREE_CODE (TREE_TYPE (loop
->v
)) == INTEGER_TYPE
236 || TREE_CODE (TREE_TYPE (loop
->v
)) == POINTER_TYPE
);
237 var
= TREE_CODE (loop
->v
) == SSA_NAME
? SSA_NAME_VAR (loop
->v
) : loop
->v
;
238 loop
->n1
= gimple_omp_for_initial (for_stmt
, i
);
240 loop
->cond_code
= gimple_omp_for_cond (for_stmt
, i
);
241 loop
->n2
= gimple_omp_for_final (for_stmt
, i
);
242 gcc_assert (loop
->cond_code
!= NE_EXPR
243 || gimple_omp_for_kind (for_stmt
) == GF_OMP_FOR_KIND_CILKSIMD
244 || gimple_omp_for_kind (for_stmt
) == GF_OMP_FOR_KIND_CILKFOR
);
245 omp_adjust_for_condition (loc
, &loop
->cond_code
, &loop
->n2
);
247 t
= gimple_omp_for_incr (for_stmt
, i
);
248 gcc_assert (TREE_OPERAND (t
, 0) == var
);
249 loop
->step
= omp_get_for_step_from_incr (loc
, t
);
252 || (fd
->sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
253 && !fd
->have_ordered
))
255 if (fd
->collapse
== 1)
256 iter_type
= TREE_TYPE (loop
->v
);
258 || TYPE_PRECISION (iter_type
)
259 < TYPE_PRECISION (TREE_TYPE (loop
->v
)))
261 = build_nonstandard_integer_type
262 (TYPE_PRECISION (TREE_TYPE (loop
->v
)), 1);
264 else if (iter_type
!= long_long_unsigned_type_node
)
266 if (POINTER_TYPE_P (TREE_TYPE (loop
->v
)))
267 iter_type
= long_long_unsigned_type_node
;
268 else if (TYPE_UNSIGNED (TREE_TYPE (loop
->v
))
269 && TYPE_PRECISION (TREE_TYPE (loop
->v
))
270 >= TYPE_PRECISION (iter_type
))
274 if (loop
->cond_code
== LT_EXPR
)
275 n
= fold_build2_loc (loc
,
276 PLUS_EXPR
, TREE_TYPE (loop
->v
),
277 loop
->n2
, loop
->step
);
280 if (TREE_CODE (n
) != INTEGER_CST
281 || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type
), n
))
282 iter_type
= long_long_unsigned_type_node
;
284 else if (TYPE_PRECISION (TREE_TYPE (loop
->v
))
285 > TYPE_PRECISION (iter_type
))
289 if (loop
->cond_code
== LT_EXPR
)
292 n2
= fold_build2_loc (loc
,
293 PLUS_EXPR
, TREE_TYPE (loop
->v
),
294 loop
->n2
, loop
->step
);
298 n1
= fold_build2_loc (loc
,
299 MINUS_EXPR
, TREE_TYPE (loop
->v
),
300 loop
->n2
, loop
->step
);
303 if (TREE_CODE (n1
) != INTEGER_CST
304 || TREE_CODE (n2
) != INTEGER_CST
305 || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type
), n1
)
306 || !tree_int_cst_lt (n2
, TYPE_MAX_VALUE (iter_type
)))
307 iter_type
= long_long_unsigned_type_node
;
311 if (i
>= fd
->collapse
)
314 if (collapse_count
&& *collapse_count
== NULL
)
316 t
= fold_binary (loop
->cond_code
, boolean_type_node
,
317 fold_convert (TREE_TYPE (loop
->v
), loop
->n1
),
318 fold_convert (TREE_TYPE (loop
->v
), loop
->n2
));
319 if (t
&& integer_zerop (t
))
320 count
= build_zero_cst (long_long_unsigned_type_node
);
321 else if ((i
== 0 || count
!= NULL_TREE
)
322 && TREE_CODE (TREE_TYPE (loop
->v
)) == INTEGER_TYPE
323 && TREE_CONSTANT (loop
->n1
)
324 && TREE_CONSTANT (loop
->n2
)
325 && TREE_CODE (loop
->step
) == INTEGER_CST
)
327 tree itype
= TREE_TYPE (loop
->v
);
329 if (POINTER_TYPE_P (itype
))
330 itype
= signed_type_for (itype
);
331 t
= build_int_cst (itype
, (loop
->cond_code
== LT_EXPR
? -1 : 1));
332 t
= fold_build2_loc (loc
,
334 fold_convert_loc (loc
, itype
, loop
->step
), t
);
335 t
= fold_build2_loc (loc
, PLUS_EXPR
, itype
, t
,
336 fold_convert_loc (loc
, itype
, loop
->n2
));
337 t
= fold_build2_loc (loc
, MINUS_EXPR
, itype
, t
,
338 fold_convert_loc (loc
, itype
, loop
->n1
));
339 if (TYPE_UNSIGNED (itype
) && loop
->cond_code
== GT_EXPR
)
340 t
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
,
341 fold_build1_loc (loc
, NEGATE_EXPR
, itype
, t
),
342 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
343 fold_convert_loc (loc
, itype
,
346 t
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, t
,
347 fold_convert_loc (loc
, itype
, loop
->step
));
348 t
= fold_convert_loc (loc
, long_long_unsigned_type_node
, t
);
349 if (count
!= NULL_TREE
)
350 count
= fold_build2_loc (loc
,
351 MULT_EXPR
, long_long_unsigned_type_node
,
355 if (TREE_CODE (count
) != INTEGER_CST
)
358 else if (count
&& !integer_zerop (count
))
365 && (fd
->sched_kind
!= OMP_CLAUSE_SCHEDULE_STATIC
366 || fd
->have_ordered
))
368 if (!tree_int_cst_lt (count
, TYPE_MAX_VALUE (long_integer_type_node
)))
369 iter_type
= long_long_unsigned_type_node
;
371 iter_type
= long_integer_type_node
;
373 else if (collapse_iter
&& *collapse_iter
!= NULL
)
374 iter_type
= TREE_TYPE (*collapse_iter
);
375 fd
->iter_type
= iter_type
;
376 if (collapse_iter
&& *collapse_iter
== NULL
)
377 *collapse_iter
= create_tmp_var (iter_type
, ".iter");
378 if (collapse_count
&& *collapse_count
== NULL
)
381 *collapse_count
= fold_convert_loc (loc
, iter_type
, count
);
383 *collapse_count
= create_tmp_var (iter_type
, ".count");
386 if (fd
->collapse
> 1 || (fd
->ordered
&& loops
))
388 fd
->loop
.v
= *collapse_iter
;
389 fd
->loop
.n1
= build_int_cst (TREE_TYPE (fd
->loop
.v
), 0);
390 fd
->loop
.n2
= *collapse_count
;
391 fd
->loop
.step
= build_int_cst (TREE_TYPE (fd
->loop
.v
), 1);
392 fd
->loop
.cond_code
= LT_EXPR
;
398 /* Build a call to GOMP_barrier. */
401 omp_build_barrier (tree lhs
)
403 tree fndecl
= builtin_decl_explicit (lhs
? BUILT_IN_GOMP_BARRIER_CANCEL
404 : BUILT_IN_GOMP_BARRIER
);
405 gcall
*g
= gimple_build_call (fndecl
, 0);
407 gimple_call_set_lhs (g
, lhs
);
411 /* Return maximum possible vectorization factor for the target. */
418 || !flag_tree_loop_optimize
419 || (!flag_tree_loop_vectorize
420 && (global_options_set
.x_flag_tree_loop_vectorize
421 || global_options_set
.x_flag_tree_vectorize
)))
425 int vs
= targetm
.vectorize
.autovectorize_vector_sizes ();
427 vf
= 1 << floor_log2 (vs
);
430 machine_mode vqimode
= targetm
.vectorize
.preferred_simd_mode (QImode
);
431 if (GET_MODE_CLASS (vqimode
) == MODE_VECTOR_INT
)
432 vf
= GET_MODE_NUNITS (vqimode
);
437 /* Return maximum SIMT width if offloading may target SIMT hardware. */
440 omp_max_simt_vf (void)
444 if (ENABLE_OFFLOADING
)
445 for (const char *c
= getenv ("OFFLOAD_TARGET_NAMES"); c
;)
447 if (!strncmp (c
, "nvptx", strlen ("nvptx")))
449 else if ((c
= strchr (c
, ',')))
455 /* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK
456 macro on gomp-constants.h. We do not check for overflow. */
459 oacc_launch_pack (unsigned code
, tree device
, unsigned op
)
463 res
= build_int_cst (unsigned_type_node
, GOMP_LAUNCH_PACK (code
, 0, op
));
466 device
= fold_build2 (LSHIFT_EXPR
, unsigned_type_node
,
467 device
, build_int_cst (unsigned_type_node
,
468 GOMP_LAUNCH_DEVICE_SHIFT
));
469 res
= fold_build2 (BIT_IOR_EXPR
, unsigned_type_node
, res
, device
);
474 /* FIXME: What is the following comment for? */
475 /* Look for compute grid dimension clauses and convert to an attribute
476 attached to FN. This permits the target-side code to (a) massage
477 the dimensions, (b) emit that data and (c) optimize. Non-constant
478 dimensions are pushed onto ARGS.
480 The attribute value is a TREE_LIST. A set of dimensions is
481 represented as a list of INTEGER_CST. Those that are runtime
482 exprs are represented as an INTEGER_CST of zero.
484 TODO: Normally the attribute will just contain a single such list. If
485 however it contains a list of lists, this will represent the use of
486 device_type. Each member of the outer list is an assoc list of
487 dimensions, keyed by the device type. The first entry will be the
488 default. Well, that's the plan. */
490 /* Replace any existing oacc fn attribute with updated dimensions. */
493 oacc_replace_fn_attrib (tree fn
, tree dims
)
495 tree ident
= get_identifier (OACC_FN_ATTRIB
);
496 tree attribs
= DECL_ATTRIBUTES (fn
);
498 /* If we happen to be present as the first attrib, drop it. */
499 if (attribs
&& TREE_PURPOSE (attribs
) == ident
)
500 attribs
= TREE_CHAIN (attribs
);
501 DECL_ATTRIBUTES (fn
) = tree_cons (ident
, dims
, attribs
);
504 /* Scan CLAUSES for launch dimensions and attach them to the oacc
505 function attribute. Push any that are non-constant onto the ARGS
506 list, along with an appropriate GOMP_LAUNCH_DIM tag. IS_KERNEL is
507 true, if these are for a kernels region offload function. */
510 oacc_set_fn_attrib (tree fn
, tree clauses
, bool is_kernel
, vec
<tree
> *args
)
512 /* Must match GOMP_DIM ordering. */
513 static const omp_clause_code ids
[]
514 = { OMP_CLAUSE_NUM_GANGS
, OMP_CLAUSE_NUM_WORKERS
,
515 OMP_CLAUSE_VECTOR_LENGTH
};
517 tree dims
[GOMP_DIM_MAX
];
519 tree attr
= NULL_TREE
;
520 unsigned non_const
= 0;
522 for (ix
= GOMP_DIM_MAX
; ix
--;)
524 tree clause
= omp_find_clause (clauses
, ids
[ix
]);
525 tree dim
= NULL_TREE
;
528 dim
= OMP_CLAUSE_EXPR (clause
, ids
[ix
]);
530 if (dim
&& TREE_CODE (dim
) != INTEGER_CST
)
532 dim
= integer_zero_node
;
533 non_const
|= GOMP_DIM_MASK (ix
);
535 attr
= tree_cons (NULL_TREE
, dim
, attr
);
536 /* Note kernelness with TREE_PUBLIC. */
538 TREE_PUBLIC (attr
) = 1;
541 oacc_replace_fn_attrib (fn
, attr
);
545 /* Push a dynamic argument set. */
546 args
->safe_push (oacc_launch_pack (GOMP_LAUNCH_DIM
,
547 NULL_TREE
, non_const
));
548 for (unsigned ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++)
549 if (non_const
& GOMP_DIM_MASK (ix
))
550 args
->safe_push (dims
[ix
]);
554 /* Process the routine's dimension clauess to generate an attribute
555 value. Issue diagnostics as appropriate. We default to SEQ
556 (OpenACC 2.5 clarifies this). All dimensions have a size of zero
557 (dynamic). TREE_PURPOSE is set to indicate whether that dimension
558 can have a loop partitioned on it. non-zero indicates
559 yes, zero indicates no. By construction once a non-zero has been
560 reached, further inner dimensions must also be non-zero. We set
561 TREE_VALUE to zero for the dimensions that may be partitioned and
562 1 for the other ones -- if a loop is (erroneously) spawned at
563 an outer level, we don't want to try and partition it. */
566 oacc_build_routine_dims (tree clauses
)
568 /* Must match GOMP_DIM ordering. */
569 static const omp_clause_code ids
[]
570 = {OMP_CLAUSE_GANG
, OMP_CLAUSE_WORKER
, OMP_CLAUSE_VECTOR
, OMP_CLAUSE_SEQ
};
574 for (; clauses
; clauses
= OMP_CLAUSE_CHAIN (clauses
))
575 for (ix
= GOMP_DIM_MAX
+ 1; ix
--;)
576 if (OMP_CLAUSE_CODE (clauses
) == ids
[ix
])
579 error_at (OMP_CLAUSE_LOCATION (clauses
),
580 "multiple loop axes specified for routine");
585 /* Default to SEQ. */
587 level
= GOMP_DIM_MAX
;
589 tree dims
= NULL_TREE
;
591 for (ix
= GOMP_DIM_MAX
; ix
--;)
592 dims
= tree_cons (build_int_cst (boolean_type_node
, ix
>= level
),
593 build_int_cst (integer_type_node
, ix
< level
), dims
);
598 /* Retrieve the oacc function attrib and return it. Non-oacc
599 functions will return NULL. */
602 oacc_get_fn_attrib (tree fn
)
604 return lookup_attribute (OACC_FN_ATTRIB
, DECL_ATTRIBUTES (fn
));
607 /* Return true if this oacc fn attrib is for a kernels offload
608 region. We use the TREE_PUBLIC flag of each dimension -- only
609 need to check the first one. */
612 oacc_fn_attrib_kernels_p (tree attr
)
614 return TREE_PUBLIC (TREE_VALUE (attr
));
617 /* Extract an oacc execution dimension from FN. FN must be an
618 offloaded function or routine that has already had its execution
619 dimensions lowered to the target-specific values. */
622 oacc_get_fn_dim_size (tree fn
, int axis
)
624 tree attrs
= oacc_get_fn_attrib (fn
);
626 gcc_assert (axis
< GOMP_DIM_MAX
);
628 tree dims
= TREE_VALUE (attrs
);
630 dims
= TREE_CHAIN (dims
);
632 int size
= TREE_INT_CST_LOW (TREE_VALUE (dims
));
637 /* Extract the dimension axis from an IFN_GOACC_DIM_POS or
638 IFN_GOACC_DIM_SIZE call. */
641 oacc_get_ifn_dim_arg (const gimple
*stmt
)
643 gcc_checking_assert (gimple_call_internal_fn (stmt
) == IFN_GOACC_DIM_SIZE
644 || gimple_call_internal_fn (stmt
) == IFN_GOACC_DIM_POS
);
645 tree arg
= gimple_call_arg (stmt
, 0);
646 HOST_WIDE_INT axis
= TREE_INT_CST_LOW (arg
);
648 gcc_checking_assert (axis
>= 0 && axis
< GOMP_DIM_MAX
);