1 /* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
4 Copyright (C) 2005-2017 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "tree-pass.h"
32 #include "pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "fold-const.h"
35 #include "internal-fn.h"
37 #include "gimple-iterator.h"
38 #include "gimplify-me.h"
39 #include "gimple-walk.h"
41 #include "tree-into-ssa.h"
42 #include "common/common-target.h"
43 #include "omp-general.h"
44 #include "omp-offload.h"
45 #include "lto-section-names.h"
46 #include "gomp-constants.h"
47 #include "gimple-pretty-print.h"
49 /* Describe the OpenACC looping structure of a function. The entire
50 function is held in a 'NULL' loop. */
54 oacc_loop
*parent
; /* Containing loop. */
56 oacc_loop
*child
; /* First inner loop. */
58 oacc_loop
*sibling
; /* Next loop within same parent. */
60 location_t loc
; /* Location of the loop start. */
62 gcall
*marker
; /* Initial head marker. */
64 gcall
*heads
[GOMP_DIM_MAX
]; /* Head marker functions. */
65 gcall
*tails
[GOMP_DIM_MAX
]; /* Tail marker functions. */
67 tree routine
; /* Pseudo-loop enclosing a routine. */
69 unsigned mask
; /* Partitioning mask. */
70 unsigned inner
; /* Partitioning of inner loops. */
71 unsigned flags
; /* Partitioning flags. */
72 unsigned ifns
; /* Contained loop abstraction functions. */
73 tree chunk_size
; /* Chunk size. */
74 gcall
*head_end
; /* Final marker of head sequence. */
77 /* Holds offload tables with decls. */
78 vec
<tree
, va_gc
> *offload_funcs
, *offload_vars
;
80 /* Return level at which oacc routine may spawn a partitioned loop, or
81 -1 if it is not a routine (i.e. is an offload fn). */
84 oacc_fn_attrib_level (tree attr
)
86 tree pos
= TREE_VALUE (attr
);
88 if (!TREE_PURPOSE (pos
))
92 for (ix
= 0; ix
!= GOMP_DIM_MAX
;
93 ix
++, pos
= TREE_CHAIN (pos
))
94 if (!integer_zerop (TREE_PURPOSE (pos
)))
100 /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
101 adds their addresses and sizes to constructor-vector V_CTOR. */
104 add_decls_addresses_to_decl_constructor (vec
<tree
, va_gc
> *v_decls
,
105 vec
<constructor_elt
, va_gc
> *v_ctor
)
107 unsigned len
= vec_safe_length (v_decls
);
108 for (unsigned i
= 0; i
< len
; i
++)
110 tree it
= (*v_decls
)[i
];
111 bool is_var
= VAR_P (it
);
114 #ifdef ACCEL_COMPILER
115 && DECL_HAS_VALUE_EXPR_P (it
)
117 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it
));
119 tree size
= NULL_TREE
;
121 size
= fold_convert (const_ptr_type_node
, DECL_SIZE_UNIT (it
));
125 addr
= build_fold_addr_expr (it
);
128 #ifdef ACCEL_COMPILER
129 /* For "omp declare target link" vars add address of the pointer to
130 the target table, instead of address of the var. */
131 tree value_expr
= DECL_VALUE_EXPR (it
);
132 tree link_ptr_decl
= TREE_OPERAND (value_expr
, 0);
133 varpool_node::finalize_decl (link_ptr_decl
);
134 addr
= build_fold_addr_expr (link_ptr_decl
);
136 addr
= build_fold_addr_expr (it
);
139 /* Most significant bit of the size marks "omp declare target link"
140 vars in host and target tables. */
141 unsigned HOST_WIDE_INT isize
= tree_to_uhwi (size
);
142 isize
|= 1ULL << (int_size_in_bytes (const_ptr_type_node
)
143 * BITS_PER_UNIT
- 1);
144 size
= wide_int_to_tree (const_ptr_type_node
, isize
);
147 CONSTRUCTOR_APPEND_ELT (v_ctor
, NULL_TREE
, addr
);
149 CONSTRUCTOR_APPEND_ELT (v_ctor
, NULL_TREE
, size
);
153 /* Create new symbols containing (address, size) pairs for global variables,
154 marked with "omp declare target" attribute, as well as addresses for the
155 functions, which are outlined offloading regions. */
157 omp_finish_file (void)
159 unsigned num_funcs
= vec_safe_length (offload_funcs
);
160 unsigned num_vars
= vec_safe_length (offload_vars
);
162 if (num_funcs
== 0 && num_vars
== 0)
165 if (targetm_common
.have_named_sections
)
167 vec
<constructor_elt
, va_gc
> *v_f
, *v_v
;
168 vec_alloc (v_f
, num_funcs
);
169 vec_alloc (v_v
, num_vars
* 2);
171 add_decls_addresses_to_decl_constructor (offload_funcs
, v_f
);
172 add_decls_addresses_to_decl_constructor (offload_vars
, v_v
);
174 tree vars_decl_type
= build_array_type_nelts (pointer_sized_int_node
,
176 tree funcs_decl_type
= build_array_type_nelts (pointer_sized_int_node
,
178 SET_TYPE_ALIGN (vars_decl_type
, TYPE_ALIGN (pointer_sized_int_node
));
179 SET_TYPE_ALIGN (funcs_decl_type
, TYPE_ALIGN (pointer_sized_int_node
));
180 tree ctor_v
= build_constructor (vars_decl_type
, v_v
);
181 tree ctor_f
= build_constructor (funcs_decl_type
, v_f
);
182 TREE_CONSTANT (ctor_v
) = TREE_CONSTANT (ctor_f
) = 1;
183 TREE_STATIC (ctor_v
) = TREE_STATIC (ctor_f
) = 1;
184 tree funcs_decl
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
185 get_identifier (".offload_func_table"),
187 tree vars_decl
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
188 get_identifier (".offload_var_table"),
190 TREE_STATIC (funcs_decl
) = TREE_STATIC (vars_decl
) = 1;
191 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
192 otherwise a joint table in a binary will contain padding between
193 tables from multiple object files. */
194 DECL_USER_ALIGN (funcs_decl
) = DECL_USER_ALIGN (vars_decl
) = 1;
195 SET_DECL_ALIGN (funcs_decl
, TYPE_ALIGN (funcs_decl_type
));
196 SET_DECL_ALIGN (vars_decl
, TYPE_ALIGN (vars_decl_type
));
197 DECL_INITIAL (funcs_decl
) = ctor_f
;
198 DECL_INITIAL (vars_decl
) = ctor_v
;
199 set_decl_section_name (funcs_decl
, OFFLOAD_FUNC_TABLE_SECTION_NAME
);
200 set_decl_section_name (vars_decl
, OFFLOAD_VAR_TABLE_SECTION_NAME
);
202 varpool_node::finalize_decl (vars_decl
);
203 varpool_node::finalize_decl (funcs_decl
);
207 for (unsigned i
= 0; i
< num_funcs
; i
++)
209 tree it
= (*offload_funcs
)[i
];
210 targetm
.record_offload_symbol (it
);
212 for (unsigned i
= 0; i
< num_vars
; i
++)
214 tree it
= (*offload_vars
)[i
];
215 targetm
.record_offload_symbol (it
);
220 /* Find the number of threads (POS = false), or thread number (POS =
221 true) for an OpenACC region partitioned as MASK. Setup code
222 required for the calculation is added to SEQ. */
225 oacc_thread_numbers (bool pos
, int mask
, gimple_seq
*seq
)
227 tree res
= pos
? NULL_TREE
: build_int_cst (unsigned_type_node
, 1);
230 /* Start at gang level, and examine relevant dimension indices. */
231 for (ix
= GOMP_DIM_GANG
; ix
!= GOMP_DIM_MAX
; ix
++)
232 if (GOMP_DIM_MASK (ix
) & mask
)
234 tree arg
= build_int_cst (unsigned_type_node
, ix
);
238 /* We had an outer index, so scale that by the size of
240 tree n
= create_tmp_var (integer_type_node
);
242 = gimple_build_call_internal (IFN_GOACC_DIM_SIZE
, 1, arg
);
244 gimple_call_set_lhs (call
, n
);
245 gimple_seq_add_stmt (seq
, call
);
246 res
= fold_build2 (MULT_EXPR
, integer_type_node
, res
, n
);
250 /* Determine index in this dimension. */
251 tree id
= create_tmp_var (integer_type_node
);
252 gimple
*call
= gimple_build_call_internal
253 (IFN_GOACC_DIM_POS
, 1, arg
);
255 gimple_call_set_lhs (call
, id
);
256 gimple_seq_add_stmt (seq
, call
);
258 res
= fold_build2 (PLUS_EXPR
, integer_type_node
, res
, id
);
264 if (res
== NULL_TREE
)
265 res
= integer_zero_node
;
270 /* Transform IFN_GOACC_LOOP calls to actual code. See
271 expand_oacc_for for where these are generated. At the vector
272 level, we stride loops, such that each member of a warp will
273 operate on adjacent iterations. At the worker and gang level,
274 each gang/warp executes a set of contiguous iterations. Chunking
275 can override this such that each iteration engine executes a
276 contiguous chunk, and then moves on to stride to the next chunk. */
279 oacc_xform_loop (gcall
*call
)
281 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
282 enum ifn_goacc_loop_kind code
283 = (enum ifn_goacc_loop_kind
) TREE_INT_CST_LOW (gimple_call_arg (call
, 0));
284 tree dir
= gimple_call_arg (call
, 1);
285 tree range
= gimple_call_arg (call
, 2);
286 tree step
= gimple_call_arg (call
, 3);
287 tree chunk_size
= NULL_TREE
;
288 unsigned mask
= (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call
, 5));
289 tree lhs
= gimple_call_lhs (call
);
290 tree type
= TREE_TYPE (lhs
);
291 tree diff_type
= TREE_TYPE (range
);
293 gimple_seq seq
= NULL
;
294 bool chunking
= false, striding
= true;
295 unsigned outer_mask
= mask
& (~mask
+ 1); // Outermost partitioning
296 unsigned inner_mask
= mask
& ~outer_mask
; // Inner partitioning (if any)
298 #ifdef ACCEL_COMPILER
299 chunk_size
= gimple_call_arg (call
, 4);
300 if (integer_minus_onep (chunk_size
) /* Force static allocation. */
301 || integer_zerop (chunk_size
)) /* Default (also static). */
303 /* If we're at the gang level, we want each to execute a
304 contiguous run of iterations. Otherwise we want each element
306 striding
= !(outer_mask
& GOMP_DIM_MASK (GOMP_DIM_GANG
));
311 /* Chunk of size 1 is striding. */
312 striding
= integer_onep (chunk_size
);
313 chunking
= !striding
;
317 /* striding=true, chunking=true
319 striding=true, chunking=false
321 striding=false,chunking=true
322 -> chunks=ceil (range/(chunksize*threads*step))
323 striding=false,chunking=false
324 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
325 push_gimplify_context (true);
329 default: gcc_unreachable ();
331 case IFN_GOACC_LOOP_CHUNKS
:
333 r
= build_int_cst (type
, 1);
337 = (range - dir) / (chunks * step * num_threads) + dir */
338 tree per
= oacc_thread_numbers (false, mask
, &seq
);
339 per
= fold_convert (type
, per
);
340 chunk_size
= fold_convert (type
, chunk_size
);
341 per
= fold_build2 (MULT_EXPR
, type
, per
, chunk_size
);
342 per
= fold_build2 (MULT_EXPR
, type
, per
, step
);
343 r
= build2 (MINUS_EXPR
, type
, range
, dir
);
344 r
= build2 (PLUS_EXPR
, type
, r
, per
);
345 r
= build2 (TRUNC_DIV_EXPR
, type
, r
, per
);
349 case IFN_GOACC_LOOP_STEP
:
351 /* If striding, step by the entire compute volume, otherwise
352 step by the inner volume. */
353 unsigned volume
= striding
? mask
: inner_mask
;
355 r
= oacc_thread_numbers (false, volume
, &seq
);
356 r
= build2 (MULT_EXPR
, type
, fold_convert (type
, r
), step
);
360 case IFN_GOACC_LOOP_OFFSET
:
363 r
= oacc_thread_numbers (true, mask
, &seq
);
364 r
= fold_convert (diff_type
, r
);
368 tree inner_size
= oacc_thread_numbers (false, inner_mask
, &seq
);
369 tree outer_size
= oacc_thread_numbers (false, outer_mask
, &seq
);
370 tree volume
= fold_build2 (MULT_EXPR
, TREE_TYPE (inner_size
),
371 inner_size
, outer_size
);
373 volume
= fold_convert (diff_type
, volume
);
375 chunk_size
= fold_convert (diff_type
, chunk_size
);
378 tree per
= fold_build2 (MULT_EXPR
, diff_type
, volume
, step
);
380 chunk_size
= build2 (MINUS_EXPR
, diff_type
, range
, dir
);
381 chunk_size
= build2 (PLUS_EXPR
, diff_type
, chunk_size
, per
);
382 chunk_size
= build2 (TRUNC_DIV_EXPR
, diff_type
, chunk_size
, per
);
385 tree span
= build2 (MULT_EXPR
, diff_type
, chunk_size
,
386 fold_convert (diff_type
, inner_size
));
387 r
= oacc_thread_numbers (true, outer_mask
, &seq
);
388 r
= fold_convert (diff_type
, r
);
389 r
= build2 (MULT_EXPR
, diff_type
, r
, span
);
391 tree inner
= oacc_thread_numbers (true, inner_mask
, &seq
);
392 inner
= fold_convert (diff_type
, inner
);
393 r
= fold_build2 (PLUS_EXPR
, diff_type
, r
, inner
);
397 tree chunk
= fold_convert (diff_type
, gimple_call_arg (call
, 6));
399 = fold_build2 (MULT_EXPR
, diff_type
, volume
, chunk_size
);
400 per
= build2 (MULT_EXPR
, diff_type
, per
, chunk
);
402 r
= build2 (PLUS_EXPR
, diff_type
, r
, per
);
405 r
= fold_build2 (MULT_EXPR
, diff_type
, r
, step
);
406 if (type
!= diff_type
)
407 r
= fold_convert (type
, r
);
410 case IFN_GOACC_LOOP_BOUND
:
415 tree inner_size
= oacc_thread_numbers (false, inner_mask
, &seq
);
416 tree outer_size
= oacc_thread_numbers (false, outer_mask
, &seq
);
417 tree volume
= fold_build2 (MULT_EXPR
, TREE_TYPE (inner_size
),
418 inner_size
, outer_size
);
420 volume
= fold_convert (diff_type
, volume
);
422 chunk_size
= fold_convert (diff_type
, chunk_size
);
425 tree per
= fold_build2 (MULT_EXPR
, diff_type
, volume
, step
);
427 chunk_size
= build2 (MINUS_EXPR
, diff_type
, range
, dir
);
428 chunk_size
= build2 (PLUS_EXPR
, diff_type
, chunk_size
, per
);
429 chunk_size
= build2 (TRUNC_DIV_EXPR
, diff_type
, chunk_size
, per
);
432 tree span
= build2 (MULT_EXPR
, diff_type
, chunk_size
,
433 fold_convert (diff_type
, inner_size
));
435 r
= fold_build2 (MULT_EXPR
, diff_type
, span
, step
);
437 tree offset
= gimple_call_arg (call
, 6);
438 r
= build2 (PLUS_EXPR
, diff_type
, r
,
439 fold_convert (diff_type
, offset
));
440 r
= build2 (integer_onep (dir
) ? MIN_EXPR
: MAX_EXPR
,
441 diff_type
, r
, range
);
443 if (diff_type
!= type
)
444 r
= fold_convert (type
, r
);
448 gimplify_assign (lhs
, r
, &seq
);
450 pop_gimplify_context (NULL
);
452 gsi_replace_with_seq (&gsi
, seq
, true);
455 /* Default partitioned and minimum partitioned dimensions. */
457 static int oacc_default_dims
[GOMP_DIM_MAX
];
458 static int oacc_min_dims
[GOMP_DIM_MAX
];
460 /* Parse the default dimension parameter. This is a set of
461 :-separated optional compute dimensions. Each specified dimension
462 is a positive integer. When device type support is added, it is
463 planned to be a comma separated list of such compute dimensions,
464 with all but the first prefixed by the colon-terminated device
468 oacc_parse_default_dims (const char *dims
)
472 for (ix
= GOMP_DIM_MAX
; ix
--;)
474 oacc_default_dims
[ix
] = -1;
475 oacc_min_dims
[ix
] = 1;
478 #ifndef ACCEL_COMPILER
479 /* Cannot be overridden on the host. */
484 const char *pos
= dims
;
486 for (ix
= 0; *pos
&& ix
!= GOMP_DIM_MAX
; ix
++)
501 val
= strtol (pos
, CONST_CAST (char **, &eptr
), 10);
502 if (errno
|| val
<= 0 || (int) val
!= val
)
505 oacc_default_dims
[ix
] = (int) val
;
511 error_at (UNKNOWN_LOCATION
,
512 "-fopenacc-dim operand is malformed at '%s'", pos
);
516 /* Allow the backend to validate the dimensions. */
517 targetm
.goacc
.validate_dims (NULL_TREE
, oacc_default_dims
, -1);
518 targetm
.goacc
.validate_dims (NULL_TREE
, oacc_min_dims
, -2);
521 /* Validate and update the dimensions for offloaded FN. ATTRS is the
522 raw attribute. DIMS is an array of dimensions, which is filled in.
523 LEVEL is the partitioning level of a routine, or -1 for an offload
524 region itself. USED is the mask of partitioned execution in the
528 oacc_validate_dims (tree fn
, tree attrs
, int *dims
, int level
, unsigned used
)
530 tree purpose
[GOMP_DIM_MAX
];
532 tree pos
= TREE_VALUE (attrs
);
533 bool is_kernel
= oacc_fn_attrib_kernels_p (attrs
);
535 /* Make sure the attribute creator attached the dimension
539 for (ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++)
541 purpose
[ix
] = TREE_PURPOSE (pos
);
542 tree val
= TREE_VALUE (pos
);
543 dims
[ix
] = val
? TREE_INT_CST_LOW (val
) : -1;
544 pos
= TREE_CHAIN (pos
);
547 bool changed
= targetm
.goacc
.validate_dims (fn
, dims
, level
);
549 /* Default anything left to 1 or a partitioned default. */
550 for (ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++)
553 /* The OpenACC spec says 'If the [num_gangs] clause is not
554 specified, an implementation-defined default will be used;
555 the default may depend on the code within the construct.'
556 (2.5.6). Thus an implementation is free to choose
557 non-unity default for a parallel region that doesn't have
558 any gang-partitioned loops. However, it appears that there
559 is a sufficient body of user code that expects non-gang
560 partitioned regions to not execute in gang-redundant mode.
561 So we (a) don't warn about the non-portability and (b) pick
562 the minimum permissible dimension size when there is no
563 partitioned execution. Otherwise we pick the global
564 default for the dimension, which the user can control. The
565 same wording and logic applies to num_workers and
566 vector_length, however the worker- or vector- single
567 execution doesn't have the same impact as gang-redundant
568 execution. (If the minimum gang-level partioning is not 1,
569 the target is probably too confusing.) */
570 dims
[ix
] = (used
& GOMP_DIM_MASK (ix
)
571 ? oacc_default_dims
[ix
] : oacc_min_dims
[ix
]);
577 /* Replace the attribute with new values. */
579 for (ix
= GOMP_DIM_MAX
; ix
--;)
581 pos
= tree_cons (purpose
[ix
],
582 build_int_cst (integer_type_node
, dims
[ix
]),
585 TREE_PUBLIC (pos
) = 1;
587 oacc_replace_fn_attrib (fn
, pos
);
591 /* Create an empty OpenACC loop structure at LOC. */
594 new_oacc_loop_raw (oacc_loop
*parent
, location_t loc
)
596 oacc_loop
*loop
= XCNEW (oacc_loop
);
598 loop
->parent
= parent
;
599 loop
->child
= loop
->sibling
= NULL
;
603 loop
->sibling
= parent
->child
;
604 parent
->child
= loop
;
609 memset (loop
->heads
, 0, sizeof (loop
->heads
));
610 memset (loop
->tails
, 0, sizeof (loop
->tails
));
611 loop
->routine
= NULL_TREE
;
613 loop
->mask
= loop
->flags
= loop
->inner
= 0;
615 loop
->chunk_size
= 0;
616 loop
->head_end
= NULL
;
621 /* Create an outermost, dummy OpenACC loop for offloaded function
625 new_oacc_loop_outer (tree decl
)
627 return new_oacc_loop_raw (NULL
, DECL_SOURCE_LOCATION (decl
));
630 /* Start a new OpenACC loop structure beginning at head marker HEAD.
631 Link into PARENT loop. Return the new loop. */
634 new_oacc_loop (oacc_loop
*parent
, gcall
*marker
)
636 oacc_loop
*loop
= new_oacc_loop_raw (parent
, gimple_location (marker
));
638 loop
->marker
= marker
;
640 /* TODO: This is where device_type flattening would occur for the loop
643 loop
->flags
= TREE_INT_CST_LOW (gimple_call_arg (marker
, 3));
645 tree chunk_size
= integer_zero_node
;
646 if (loop
->flags
& OLF_GANG_STATIC
)
647 chunk_size
= gimple_call_arg (marker
, 4);
648 loop
->chunk_size
= chunk_size
;
653 /* Create a dummy loop encompassing a call to a openACC routine.
654 Extract the routine's partitioning requirements. */
657 new_oacc_loop_routine (oacc_loop
*parent
, gcall
*call
, tree decl
, tree attrs
)
659 oacc_loop
*loop
= new_oacc_loop_raw (parent
, gimple_location (call
));
660 int level
= oacc_fn_attrib_level (attrs
);
662 gcc_assert (level
>= 0);
665 loop
->routine
= decl
;
666 loop
->mask
= ((GOMP_DIM_MASK (GOMP_DIM_MAX
) - 1)
667 ^ (GOMP_DIM_MASK (level
) - 1));
670 /* Finish off the current OpenACC loop ending at tail marker TAIL.
671 Return the parent loop. */
674 finish_oacc_loop (oacc_loop
*loop
)
676 /* If the loop has been collapsed, don't partition it. */
678 loop
->mask
= loop
->flags
= 0;
682 /* Free all OpenACC loop structures within LOOP (inclusive). */
685 free_oacc_loop (oacc_loop
*loop
)
688 free_oacc_loop (loop
->sibling
);
690 free_oacc_loop (loop
->child
);
695 /* Dump out the OpenACC loop head or tail beginning at FROM. */
698 dump_oacc_loop_part (FILE *file
, gcall
*from
, int depth
,
699 const char *title
, int level
)
701 enum ifn_unique_kind kind
702 = (enum ifn_unique_kind
) TREE_INT_CST_LOW (gimple_call_arg (from
, 0));
704 fprintf (file
, "%*s%s-%d:\n", depth
* 2, "", title
, level
);
705 for (gimple_stmt_iterator gsi
= gsi_for_stmt (from
);;)
707 gimple
*stmt
= gsi_stmt (gsi
);
709 if (gimple_call_internal_p (stmt
, IFN_UNIQUE
))
711 enum ifn_unique_kind k
712 = ((enum ifn_unique_kind
) TREE_INT_CST_LOW
713 (gimple_call_arg (stmt
, 0)));
715 if (k
== kind
&& stmt
!= from
)
718 print_gimple_stmt (file
, stmt
, depth
* 2 + 2, 0);
721 while (gsi_end_p (gsi
))
722 gsi
= gsi_start_bb (single_succ (gsi_bb (gsi
)));
726 /* Dump OpenACC loops LOOP, its siblings and its children. */
729 dump_oacc_loop (FILE *file
, oacc_loop
*loop
, int depth
)
733 fprintf (file
, "%*sLoop %x(%x) %s:%u\n", depth
* 2, "",
734 loop
->flags
, loop
->mask
,
735 LOCATION_FILE (loop
->loc
), LOCATION_LINE (loop
->loc
));
738 print_gimple_stmt (file
, loop
->marker
, depth
* 2, 0);
741 fprintf (file
, "%*sRoutine %s:%u:%s\n",
742 depth
* 2, "", DECL_SOURCE_FILE (loop
->routine
),
743 DECL_SOURCE_LINE (loop
->routine
),
744 IDENTIFIER_POINTER (DECL_NAME (loop
->routine
)));
746 for (ix
= GOMP_DIM_GANG
; ix
!= GOMP_DIM_MAX
; ix
++)
748 dump_oacc_loop_part (file
, loop
->heads
[ix
], depth
, "Head", ix
);
749 for (ix
= GOMP_DIM_MAX
; ix
--;)
751 dump_oacc_loop_part (file
, loop
->tails
[ix
], depth
, "Tail", ix
);
754 dump_oacc_loop (file
, loop
->child
, depth
+ 1);
756 dump_oacc_loop (file
, loop
->sibling
, depth
);
759 void debug_oacc_loop (oacc_loop
*);
761 /* Dump loops to stderr. */
764 debug_oacc_loop (oacc_loop
*loop
)
766 dump_oacc_loop (stderr
, loop
, 0);
769 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
770 structures as we go. By construction these loops are properly
774 oacc_loop_discover_walk (oacc_loop
*loop
, basic_block bb
)
779 if (bb
->flags
& BB_VISITED
)
783 bb
->flags
|= BB_VISITED
;
785 /* Scan for loop markers. */
786 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
789 gimple
*stmt
= gsi_stmt (gsi
);
791 if (!is_gimple_call (stmt
))
794 gcall
*call
= as_a
<gcall
*> (stmt
);
796 /* If this is a routine, make a dummy loop for it. */
797 if (tree decl
= gimple_call_fndecl (call
))
798 if (tree attrs
= oacc_get_fn_attrib (decl
))
800 gcc_assert (!marker
);
801 new_oacc_loop_routine (loop
, call
, decl
, attrs
);
804 if (!gimple_call_internal_p (call
))
807 switch (gimple_call_internal_fn (call
))
813 /* Count the goacc loop abstraction fns, to determine if the
814 loop was collapsed already. */
819 enum ifn_unique_kind kind
820 = (enum ifn_unique_kind
) (TREE_INT_CST_LOW
821 (gimple_call_arg (call
, 0)));
822 if (kind
== IFN_UNIQUE_OACC_HEAD_MARK
823 || kind
== IFN_UNIQUE_OACC_TAIL_MARK
)
825 if (gimple_call_num_args (call
) == 2)
827 gcc_assert (marker
&& !remaining
);
829 if (kind
== IFN_UNIQUE_OACC_TAIL_MARK
)
830 loop
= finish_oacc_loop (loop
);
832 loop
->head_end
= call
;
836 int count
= TREE_INT_CST_LOW (gimple_call_arg (call
, 2));
840 if (kind
== IFN_UNIQUE_OACC_HEAD_MARK
)
841 loop
= new_oacc_loop (loop
, call
);
844 gcc_assert (count
== remaining
);
848 if (kind
== IFN_UNIQUE_OACC_HEAD_MARK
)
849 loop
->heads
[marker
] = call
;
851 loop
->tails
[remaining
] = call
;
858 if (remaining
|| marker
)
860 bb
= single_succ (bb
);
861 gcc_assert (single_pred_p (bb
) && !(bb
->flags
& BB_VISITED
));
865 /* Walk successor blocks. */
869 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
870 oacc_loop_discover_walk (loop
, e
->dest
);
873 /* LOOP is the first sibling. Reverse the order in place and return
874 the new first sibling. Recurse to child loops. */
877 oacc_loop_sibling_nreverse (oacc_loop
*loop
)
879 oacc_loop
*last
= NULL
;
883 loop
->child
= oacc_loop_sibling_nreverse (loop
->child
);
885 oacc_loop
*next
= loop
->sibling
;
886 loop
->sibling
= last
;
895 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
896 the current function. */
899 oacc_loop_discovery ()
901 /* Clear basic block flags, in particular BB_VISITED which we're going to use
905 oacc_loop
*top
= new_oacc_loop_outer (current_function_decl
);
906 oacc_loop_discover_walk (top
, ENTRY_BLOCK_PTR_FOR_FN (cfun
));
908 /* The siblings were constructed in reverse order, reverse them so
909 that diagnostics come out in an unsurprising order. */
910 top
= oacc_loop_sibling_nreverse (top
);
915 /* Transform the abstract internal function markers starting at FROM
916 to be for partitioning level LEVEL. Stop when we meet another HEAD
920 oacc_loop_xform_head_tail (gcall
*from
, int level
)
922 enum ifn_unique_kind kind
923 = (enum ifn_unique_kind
) TREE_INT_CST_LOW (gimple_call_arg (from
, 0));
924 tree replacement
= build_int_cst (unsigned_type_node
, level
);
926 for (gimple_stmt_iterator gsi
= gsi_for_stmt (from
);;)
928 gimple
*stmt
= gsi_stmt (gsi
);
930 if (gimple_call_internal_p (stmt
, IFN_UNIQUE
))
932 enum ifn_unique_kind k
933 = ((enum ifn_unique_kind
)
934 TREE_INT_CST_LOW (gimple_call_arg (stmt
, 0)));
936 if (k
== IFN_UNIQUE_OACC_FORK
|| k
== IFN_UNIQUE_OACC_JOIN
)
937 *gimple_call_arg_ptr (stmt
, 2) = replacement
;
938 else if (k
== kind
&& stmt
!= from
)
941 else if (gimple_call_internal_p (stmt
, IFN_GOACC_REDUCTION
))
942 *gimple_call_arg_ptr (stmt
, 3) = replacement
;
945 while (gsi_end_p (gsi
))
946 gsi
= gsi_start_bb (single_succ (gsi_bb (gsi
)));
950 /* Transform the IFN_GOACC_LOOP internal functions by providing the
951 determined partitioning mask and chunking argument. END_MARKER
952 points at the end IFN_HEAD_TAIL call intgroducing the loop. IFNS
953 is the number of IFN_GOACC_LOOP calls for the loop. MASK_ARG is
954 the replacement partitioning mask and CHUNK_ARG is the replacement
958 oacc_loop_xform_loop (gcall
*end_marker
, unsigned ifns
,
959 tree mask_arg
, tree chunk_arg
)
961 gimple_stmt_iterator gsi
= gsi_for_stmt (end_marker
);
963 gcc_checking_assert (ifns
);
966 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
968 gimple
*stmt
= gsi_stmt (gsi
);
970 if (!is_gimple_call (stmt
))
973 gcall
*call
= as_a
<gcall
*> (stmt
);
975 if (!gimple_call_internal_p (call
))
978 if (gimple_call_internal_fn (call
) != IFN_GOACC_LOOP
)
981 *gimple_call_arg_ptr (call
, 5) = mask_arg
;
982 *gimple_call_arg_ptr (call
, 4) = chunk_arg
;
988 /* The LOOP_BOUND ifn could be in the single successor
990 basic_block bb
= single_succ (gsi_bb (gsi
));
991 gsi
= gsi_start_bb (bb
);
995 /* Process the discovered OpenACC loops, setting the correct
996 partitioning level etc. */
999 oacc_loop_process (oacc_loop
*loop
)
1002 oacc_loop_process (loop
->child
);
1004 if (loop
->mask
&& !loop
->routine
)
1007 unsigned mask
= loop
->mask
;
1008 unsigned dim
= GOMP_DIM_GANG
;
1009 tree mask_arg
= build_int_cst (unsigned_type_node
, mask
);
1010 tree chunk_arg
= loop
->chunk_size
;
1012 oacc_loop_xform_loop (loop
->head_end
, loop
->ifns
, mask_arg
, chunk_arg
);
1014 for (ix
= 0; ix
!= GOMP_DIM_MAX
&& mask
; ix
++)
1016 while (!(GOMP_DIM_MASK (dim
) & mask
))
1019 oacc_loop_xform_head_tail (loop
->heads
[ix
], dim
);
1020 oacc_loop_xform_head_tail (loop
->tails
[ix
], dim
);
1022 mask
^= GOMP_DIM_MASK (dim
);
1027 oacc_loop_process (loop
->sibling
);
1030 /* Walk the OpenACC loop heirarchy checking and assigning the
1031 programmer-specified partitionings. OUTER_MASK is the partitioning
1032 this loop is contained within. Return mask of partitioning
1033 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1037 oacc_loop_fixed_partitions (oacc_loop
*loop
, unsigned outer_mask
)
1039 unsigned this_mask
= loop
->mask
;
1040 unsigned mask_all
= 0;
1043 #ifdef ACCEL_COMPILER
1044 /* When device_type is supported, we want the device compiler to be
1045 noisy, if the loop parameters are device_type-specific. */
1051 bool auto_par
= (loop
->flags
& OLF_AUTO
) != 0;
1052 bool seq_par
= (loop
->flags
& OLF_SEQ
) != 0;
1054 this_mask
= ((loop
->flags
>> OLF_DIM_BASE
)
1055 & (GOMP_DIM_MASK (GOMP_DIM_MAX
) - 1));
1057 if ((this_mask
!= 0) + auto_par
+ seq_par
> 1)
1060 error_at (loop
->loc
,
1062 ? "%<seq%> overrides other OpenACC loop specifiers"
1063 : "%<auto%> conflicts with other OpenACC loop "
1066 loop
->flags
&= ~OLF_AUTO
;
1070 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX
) - 1) << OLF_DIM_BASE
);
1074 if (auto_par
&& (loop
->flags
& OLF_INDEPENDENT
))
1075 mask_all
|= GOMP_DIM_MASK (GOMP_DIM_MAX
);
1078 if (this_mask
& outer_mask
)
1080 const oacc_loop
*outer
;
1081 for (outer
= loop
->parent
; outer
; outer
= outer
->parent
)
1082 if (outer
->mask
& this_mask
)
1089 error_at (loop
->loc
,
1090 "%s uses same OpenACC parallelism as containing loop",
1091 loop
->routine
? "routine call" : "inner loop");
1092 inform (outer
->loc
, "containing loop here");
1095 error_at (loop
->loc
,
1096 "%s uses OpenACC parallelism disallowed by containing "
1097 "routine", loop
->routine
? "routine call" : "loop");
1100 inform (DECL_SOURCE_LOCATION (loop
->routine
),
1101 "routine %qD declared here", loop
->routine
);
1103 this_mask
&= ~outer_mask
;
1107 unsigned outermost
= least_bit_hwi (this_mask
);
1109 if (outermost
&& outermost
<= outer_mask
)
1113 error_at (loop
->loc
,
1114 "incorrectly nested OpenACC loop parallelism");
1116 const oacc_loop
*outer
;
1117 for (outer
= loop
->parent
;
1118 outer
->flags
&& outer
->flags
< outermost
;
1119 outer
= outer
->parent
)
1121 inform (outer
->loc
, "containing loop here");
1124 this_mask
&= ~outermost
;
1128 loop
->mask
= this_mask
;
1129 mask_all
|= this_mask
;
1133 loop
->inner
= oacc_loop_fixed_partitions (loop
->child
,
1134 outer_mask
| this_mask
);
1135 mask_all
|= loop
->inner
;
1139 mask_all
|= oacc_loop_fixed_partitions (loop
->sibling
, outer_mask
);
1144 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1145 OUTER_MASK is the partitioning this loop is contained within.
1146 Return the cumulative partitioning used by this loop, siblings and
1150 oacc_loop_auto_partitions (oacc_loop
*loop
, unsigned outer_mask
)
1152 bool assign
= (loop
->flags
& OLF_AUTO
) && (loop
->flags
& OLF_INDEPENDENT
);
1155 #ifdef ACCEL_COMPILER
1156 /* When device_type is supported, we want the device compiler to be
1157 noisy, if the loop parameters are device_type-specific. */
1161 if (assign
&& outer_mask
< GOMP_DIM_MASK (GOMP_DIM_MAX
- 1))
1163 /* Allocate the outermost loop at the outermost available
1165 unsigned this_mask
= outer_mask
+ 1;
1167 if (!(this_mask
& loop
->inner
))
1168 loop
->mask
= this_mask
;
1173 unsigned child_mask
= outer_mask
| loop
->mask
;
1175 if (loop
->mask
|| assign
)
1176 child_mask
|= GOMP_DIM_MASK (GOMP_DIM_MAX
);
1178 loop
->inner
= oacc_loop_auto_partitions (loop
->child
, child_mask
);
1181 if (assign
&& !loop
->mask
)
1183 /* Allocate the loop at the innermost available level. */
1184 unsigned this_mask
= 0;
1186 /* Determine the outermost partitioning used within this loop. */
1187 this_mask
= loop
->inner
| GOMP_DIM_MASK (GOMP_DIM_MAX
);
1188 this_mask
= least_bit_hwi (this_mask
);
1190 /* Pick the partitioning just inside that one. */
1193 /* And avoid picking one use by an outer loop. */
1194 this_mask
&= ~outer_mask
;
1196 if (!this_mask
&& noisy
)
1197 warning_at (loop
->loc
, 0,
1198 "insufficient partitioning available to parallelize loop");
1200 loop
->mask
= this_mask
;
1203 if (assign
&& dump_file
)
1204 fprintf (dump_file
, "Auto loop %s:%d assigned %d\n",
1205 LOCATION_FILE (loop
->loc
), LOCATION_LINE (loop
->loc
),
1208 unsigned inner_mask
= 0;
1211 inner_mask
|= oacc_loop_auto_partitions (loop
->sibling
, outer_mask
);
1213 inner_mask
|= loop
->inner
| loop
->mask
;
1218 /* Walk the OpenACC loop heirarchy to check and assign partitioning
1219 axes. Return mask of partitioning. */
1222 oacc_loop_partition (oacc_loop
*loop
, unsigned outer_mask
)
1224 unsigned mask_all
= oacc_loop_fixed_partitions (loop
, outer_mask
);
1226 if (mask_all
& GOMP_DIM_MASK (GOMP_DIM_MAX
))
1228 mask_all
^= GOMP_DIM_MASK (GOMP_DIM_MAX
);
1229 mask_all
|= oacc_loop_auto_partitions (loop
, outer_mask
);
1234 /* Default fork/join early expander. Delete the function calls if
1235 there is no RTL expander. */
1238 default_goacc_fork_join (gcall
*ARG_UNUSED (call
),
1239 const int *ARG_UNUSED (dims
), bool is_fork
)
1242 return targetm
.have_oacc_fork ();
1244 return targetm
.have_oacc_join ();
1247 /* Default goacc.reduction early expander.
1249 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1250 If RES_PTR is not integer-zerop:
1251 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1252 TEARDOWN - emit '*RES_PTR = VAR'
1257 default_goacc_reduction (gcall
*call
)
1259 unsigned code
= (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call
, 0));
1260 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
1261 tree lhs
= gimple_call_lhs (call
);
1262 tree var
= gimple_call_arg (call
, 2);
1263 gimple_seq seq
= NULL
;
1265 if (code
== IFN_GOACC_REDUCTION_SETUP
1266 || code
== IFN_GOACC_REDUCTION_TEARDOWN
)
1268 /* Setup and Teardown need to copy from/to the receiver object,
1270 tree ref_to_res
= gimple_call_arg (call
, 1);
1272 if (!integer_zerop (ref_to_res
))
1274 tree dst
= build_simple_mem_ref (ref_to_res
);
1277 if (code
== IFN_GOACC_REDUCTION_SETUP
)
1283 gimple_seq_add_stmt (&seq
, gimple_build_assign (dst
, src
));
1287 /* Copy VAR to LHS, if there is an LHS. */
1289 gimple_seq_add_stmt (&seq
, gimple_build_assign (lhs
, var
));
1291 gsi_replace_with_seq (&gsi
, seq
, true);
1294 /* Main entry point for oacc transformations which run on the device
1295 compiler after LTO, so we know what the target device is at this
1296 point (including the host fallback). */
1299 execute_oacc_device_lower ()
1301 tree attrs
= oacc_get_fn_attrib (current_function_decl
);
1304 /* Not an offloaded function. */
1307 /* Parse the default dim argument exactly once. */
1308 if ((const void *)flag_openacc_dims
!= &flag_openacc_dims
)
1310 oacc_parse_default_dims (flag_openacc_dims
);
1311 flag_openacc_dims
= (char *)&flag_openacc_dims
;
1314 /* Discover, partition and process the loops. */
1315 oacc_loop
*loops
= oacc_loop_discovery ();
1316 int fn_level
= oacc_fn_attrib_level (attrs
);
1319 fprintf (dump_file
, oacc_fn_attrib_kernels_p (attrs
)
1320 ? "Function is kernels offload\n"
1321 : fn_level
< 0 ? "Function is parallel offload\n"
1322 : "Function is routine level %d\n", fn_level
);
1324 unsigned outer_mask
= fn_level
>= 0 ? GOMP_DIM_MASK (fn_level
) - 1 : 0;
1325 unsigned used_mask
= oacc_loop_partition (loops
, outer_mask
);
1326 int dims
[GOMP_DIM_MAX
];
1328 oacc_validate_dims (current_function_decl
, attrs
, dims
, fn_level
, used_mask
);
1332 const char *comma
= "Compute dimensions [";
1333 for (int ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++, comma
= ", ")
1334 fprintf (dump_file
, "%s%d", comma
, dims
[ix
]);
1335 fprintf (dump_file
, "]\n");
1338 oacc_loop_process (loops
);
1341 fprintf (dump_file
, "OpenACC loops\n");
1342 dump_oacc_loop (dump_file
, loops
, 0);
1343 fprintf (dump_file
, "\n");
1346 /* Offloaded targets may introduce new basic blocks, which require
1347 dominance information to update SSA. */
1348 calculate_dominance_info (CDI_DOMINATORS
);
1350 /* Now lower internal loop functions to target-specific code
1353 FOR_ALL_BB_FN (bb
, cfun
)
1354 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1356 gimple
*stmt
= gsi_stmt (gsi
);
1357 if (!is_gimple_call (stmt
))
1363 gcall
*call
= as_a
<gcall
*> (stmt
);
1364 if (!gimple_call_internal_p (call
))
1370 /* Rewind to allow rescan. */
1372 bool rescan
= false, remove
= false;
1373 enum internal_fn ifn_code
= gimple_call_internal_fn (call
);
1379 case IFN_GOACC_LOOP
:
1380 oacc_xform_loop (call
);
1384 case IFN_GOACC_REDUCTION
:
1385 /* Mark the function for SSA renaming. */
1386 mark_virtual_operands_for_renaming (cfun
);
1388 /* If the level is -1, this ended up being an unused
1389 axis. Handle as a default. */
1390 if (integer_minus_onep (gimple_call_arg (call
, 3)))
1391 default_goacc_reduction (call
);
1393 targetm
.goacc
.reduction (call
);
1399 enum ifn_unique_kind kind
1400 = ((enum ifn_unique_kind
)
1401 TREE_INT_CST_LOW (gimple_call_arg (call
, 0)));
1408 case IFN_UNIQUE_OACC_FORK
:
1409 case IFN_UNIQUE_OACC_JOIN
:
1410 if (integer_minus_onep (gimple_call_arg (call
, 2)))
1412 else if (!targetm
.goacc
.fork_join
1413 (call
, dims
, kind
== IFN_UNIQUE_OACC_FORK
))
1417 case IFN_UNIQUE_OACC_HEAD_MARK
:
1418 case IFN_UNIQUE_OACC_TAIL_MARK
:
1426 if (gsi_end_p (gsi
))
1427 /* We rewound past the beginning of the BB. */
1428 gsi
= gsi_start_bb (bb
);
1430 /* Undo the rewind. */
1435 if (gimple_vdef (call
))
1436 replace_uses_by (gimple_vdef (call
), gimple_vuse (call
));
1437 if (gimple_call_lhs (call
))
1439 /* Propagate the data dependency var. */
1440 gimple
*ass
= gimple_build_assign (gimple_call_lhs (call
),
1441 gimple_call_arg (call
, 1));
1442 gsi_replace (&gsi
, ass
, false);
1445 gsi_remove (&gsi
, true);
1448 /* If not rescanning, advance over the call. */
1452 free_oacc_loop (loops
);
1457 /* Default launch dimension validator. Force everything to 1. A
1458 backend that wants to provide larger dimensions must override this
1462 default_goacc_validate_dims (tree
ARG_UNUSED (decl
), int *dims
,
1463 int ARG_UNUSED (fn_level
))
1465 bool changed
= false;
1467 for (unsigned ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++)
1479 /* Default dimension bound is unknown on accelerator and 1 on host. */
1482 default_goacc_dim_limit (int ARG_UNUSED (axis
))
1484 #ifdef ACCEL_COMPILER
1493 const pass_data pass_data_oacc_device_lower
=
1495 GIMPLE_PASS
, /* type */
1496 "oaccdevlow", /* name */
1497 OPTGROUP_OPENMP
, /* optinfo_flags */
1498 TV_NONE
, /* tv_id */
1499 PROP_cfg
, /* properties_required */
1500 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
1501 0, /* properties_destroyed */
1502 0, /* todo_flags_start */
1503 TODO_update_ssa
| TODO_cleanup_cfg
, /* todo_flags_finish */
1506 class pass_oacc_device_lower
: public gimple_opt_pass
1509 pass_oacc_device_lower (gcc::context
*ctxt
)
1510 : gimple_opt_pass (pass_data_oacc_device_lower
, ctxt
)
1513 /* opt_pass methods: */
1514 virtual bool gate (function
*) { return flag_openacc
; };
1516 virtual unsigned int execute (function
*)
1518 return execute_oacc_device_lower ();
1521 }; // class pass_oacc_device_lower
1526 make_pass_oacc_device_lower (gcc::context
*ctxt
)
1528 return new pass_oacc_device_lower (ctxt
);
1531 /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
1532 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
1533 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
1534 internal functions on non-SIMT targets, and likewise some SIMD internal
1535 functions on SIMT targets. */
1538 execute_omp_device_lower ()
1540 int vf
= targetm
.simt
.vf
? targetm
.simt
.vf () : 1;
1542 gimple_stmt_iterator gsi
;
1543 FOR_EACH_BB_FN (bb
, cfun
)
1544 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1546 gimple
*stmt
= gsi_stmt (gsi
);
1547 if (!is_gimple_call (stmt
) || !gimple_call_internal_p (stmt
))
1549 tree lhs
= gimple_call_lhs (stmt
), rhs
= NULL_TREE
;
1550 tree type
= lhs
? TREE_TYPE (lhs
) : integer_type_node
;
1551 switch (gimple_call_internal_fn (stmt
))
1553 case IFN_GOMP_USE_SIMT
:
1554 rhs
= vf
== 1 ? integer_zero_node
: integer_one_node
;
1556 case IFN_GOMP_SIMT_LANE
:
1557 case IFN_GOMP_SIMT_LAST_LANE
:
1558 rhs
= vf
== 1 ? build_zero_cst (type
) : NULL_TREE
;
1560 case IFN_GOMP_SIMT_VF
:
1561 rhs
= build_int_cst (type
, vf
);
1563 case IFN_GOMP_SIMT_ORDERED_PRED
:
1564 rhs
= vf
== 1 ? integer_zero_node
: NULL_TREE
;
1566 unlink_stmt_vdef (stmt
);
1568 case IFN_GOMP_SIMT_VOTE_ANY
:
1569 case IFN_GOMP_SIMT_XCHG_BFLY
:
1570 case IFN_GOMP_SIMT_XCHG_IDX
:
1571 rhs
= vf
== 1 ? gimple_call_arg (stmt
, 0) : NULL_TREE
;
1573 case IFN_GOMP_SIMD_LANE
:
1574 case IFN_GOMP_SIMD_LAST_LANE
:
1575 rhs
= vf
!= 1 ? build_zero_cst (type
) : NULL_TREE
;
1577 case IFN_GOMP_SIMD_VF
:
1578 rhs
= vf
!= 1 ? build_one_cst (type
) : NULL_TREE
;
1585 stmt
= lhs
? gimple_build_assign (lhs
, rhs
) : gimple_build_nop ();
1586 gsi_replace (&gsi
, stmt
, false);
1589 cfun
->has_force_vectorize_loops
= false;
1595 const pass_data pass_data_omp_device_lower
=
1597 GIMPLE_PASS
, /* type */
1598 "ompdevlow", /* name */
1599 OPTGROUP_OPENMP
, /* optinfo_flags */
1600 TV_NONE
, /* tv_id */
1601 PROP_cfg
, /* properties_required */
1602 PROP_gimple_lomp_dev
, /* properties_provided */
1603 0, /* properties_destroyed */
1604 0, /* todo_flags_start */
1605 TODO_update_ssa
, /* todo_flags_finish */
1608 class pass_omp_device_lower
: public gimple_opt_pass
1611 pass_omp_device_lower (gcc::context
*ctxt
)
1612 : gimple_opt_pass (pass_data_omp_device_lower
, ctxt
)
1615 /* opt_pass methods: */
1616 virtual bool gate (function
*fun
)
1618 return !(fun
->curr_properties
& PROP_gimple_lomp_dev
);
1620 virtual unsigned int execute (function
*)
1622 return execute_omp_device_lower ();
1625 }; // class pass_expand_omp_ssa
1630 make_pass_omp_device_lower (gcc::context
*ctxt
)
1632 return new pass_omp_device_lower (ctxt
);
1635 /* "omp declare target link" handling pass. */
1639 const pass_data pass_data_omp_target_link
=
1641 GIMPLE_PASS
, /* type */
1642 "omptargetlink", /* name */
1643 OPTGROUP_OPENMP
, /* optinfo_flags */
1644 TV_NONE
, /* tv_id */
1645 PROP_ssa
, /* properties_required */
1646 0, /* properties_provided */
1647 0, /* properties_destroyed */
1648 0, /* todo_flags_start */
1649 TODO_update_ssa
, /* todo_flags_finish */
1652 class pass_omp_target_link
: public gimple_opt_pass
1655 pass_omp_target_link (gcc::context
*ctxt
)
1656 : gimple_opt_pass (pass_data_omp_target_link
, ctxt
)
1659 /* opt_pass methods: */
1660 virtual bool gate (function
*fun
)
1662 #ifdef ACCEL_COMPILER
1663 tree attrs
= DECL_ATTRIBUTES (fun
->decl
);
1664 return lookup_attribute ("omp declare target", attrs
)
1665 || lookup_attribute ("omp target entrypoint", attrs
);
1672 virtual unsigned execute (function
*);
1675 /* Callback for walk_gimple_stmt used to scan for link var operands. */
1678 find_link_var_op (tree
*tp
, int *walk_subtrees
, void *)
1682 if (VAR_P (t
) && DECL_HAS_VALUE_EXPR_P (t
)
1683 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t
)))
1693 pass_omp_target_link::execute (function
*fun
)
1696 FOR_EACH_BB_FN (bb
, fun
)
1698 gimple_stmt_iterator gsi
;
1699 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1700 if (walk_gimple_stmt (&gsi
, NULL
, find_link_var_op
, NULL
))
1701 gimple_regimplify_operands (gsi_stmt (gsi
), &gsi
);
1710 make_pass_omp_target_link (gcc::context
*ctxt
)
1712 return new pass_omp_target_link (ctxt
);