1 /* OMP constructs' SIMD clone supporting code.
3 Copyright (C) 2005-2023 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "alloc-pool.h"
30 #include "tree-pass.h"
33 #include "pretty-print.h"
34 #include "diagnostic-core.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "gimple-walk.h"
42 #include "langhooks.h"
44 #include "tree-into-ssa.h"
47 #include "symbol-summary.h"
48 #include "ipa-param-manipulation.h"
51 #include "stringpool.h"
53 #include "omp-simd-clone.h"
55 #include "omp-general.h"
57 /* Print debug info for ok_for_auto_simd_clone to the dump file, logging
58 failure reason EXCUSE for function DECL. Always returns false. */
60 auto_simd_fail (tree decl
, const char *excuse
)
62 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
63 fprintf (dump_file
, "\nNot auto-cloning %s because %s\n",
64 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
)),
69 /* Helper function for ok_for_auto_simd_clone; return false if the statement
70 violates restrictions for an "omp declare simd" function. Specifically,
72 - throw or call setjmp/longjmp
73 - write memory that could alias parallel calls
74 - read volatile memory
75 - include openmp directives or calls
76 - call functions that might do those things */
79 auto_simd_check_stmt (gimple
*stmt
, tree outer
)
83 switch (gimple_code (stmt
))
87 /* Calls to functions that are CONST or PURE are ok, even if they
88 are internal functions without a decl. Reject other internal
90 if (gimple_call_flags (stmt
) & (ECF_CONST
| ECF_PURE
))
92 if (gimple_call_internal_p (stmt
))
93 return auto_simd_fail (outer
,
94 "body contains internal function call");
96 decl
= gimple_call_fndecl (stmt
);
98 /* We can't know whether indirect calls are safe. */
99 if (decl
== NULL_TREE
)
100 return auto_simd_fail (outer
, "body contains indirect call");
102 /* Calls to functions that are already marked "omp declare simd" are
104 if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (decl
)))
107 /* Let recursive calls to the current function through. */
111 /* Other function calls are not permitted. This covers all calls to
112 the libgomp API and setjmp/longjmp, too, as well as things like
113 __cxa_throw_ related to exception handling. */
114 return auto_simd_fail (outer
, "body contains unsafe function call");
116 /* Reject EH-related constructs. Most of the EH gimple codes are
117 already lowered by the time this pass runs during IPA.
118 GIMPLE_EH_DISPATCH and GIMPLE_RESX remain and are lowered by
119 pass_lower_eh_dispatch and pass_lower_resx, respectively; those
121 case GIMPLE_EH_DISPATCH
:
123 return auto_simd_fail (outer
, "body contains EH constructs");
125 /* Asms are not permitted since we don't know what they do. */
127 return auto_simd_fail (outer
, "body contains inline asm");
133 /* Memory writes are not permitted.
134 FIXME: this could be relaxed a little to permit writes to
135 function-local variables that could not alias other instances
136 of the function running in parallel. */
137 if (gimple_store_p (stmt
))
138 return auto_simd_fail (outer
, "body includes memory write");
140 /* Volatile reads are not permitted. */
141 if (gimple_has_volatile_ops (stmt
))
142 return auto_simd_fail (outer
, "body includes volatile op");
148 /* Helper function for ok_for_auto_simd_clone: return true if type T is
149 plausible for a cloneable function argument or return type. */
151 plausible_type_for_simd_clone (tree t
)
153 if (TREE_CODE (t
) == VOID_TYPE
)
155 else if (RECORD_OR_UNION_TYPE_P (t
) || !is_a
<scalar_mode
> (TYPE_MODE (t
)))
156 /* Small record/union types may fit into a scalar mode, but are
157 still not suitable. */
159 else if (TYPE_ATOMIC (t
))
160 /* Atomic types trigger warnings in simd_clone_clauses_extract. */
166 /* Check if the function NODE appears suitable for auto-annotation
167 with "declare simd". */
170 ok_for_auto_simd_clone (struct cgraph_node
*node
)
172 tree decl
= node
->decl
;
176 /* Nothing to do if the function isn't a definition or doesn't
178 if (!node
->definition
|| !node
->has_gimple_body_p ())
179 return auto_simd_fail (decl
, "no definition or body");
181 /* No point in trying to generate implicit clones if the function
182 isn't used in the compilation unit. */
184 return auto_simd_fail (decl
, "function is not used");
186 /* Nothing to do if the function already has the "omp declare simd"
187 attribute, is marked noclone, or is not "omp declare target". */
188 if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (decl
))
189 || lookup_attribute ("noclone", DECL_ATTRIBUTES (decl
))
190 || !lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl
)))
191 return auto_simd_fail (decl
, "incompatible attributes");
193 /* Check whether the function is restricted host/nohost via the
194 "omp declare target device_type" clause, and that doesn't match
195 what we're compiling for. Internally, these translate into
196 "omp declare target [no]host" attributes on the decl; "any"
197 translates into both attributes, but the default (which is supposed
198 to be equivalent to "any") is neither. */
199 tree host
= lookup_attribute ("omp declare target host",
200 DECL_ATTRIBUTES (decl
));
201 tree nohost
= lookup_attribute ("omp declare target nohost",
202 DECL_ATTRIBUTES (decl
));
203 #ifdef ACCEL_COMPILER
205 return auto_simd_fail (decl
, "device doesn't match for accel compiler");
208 return auto_simd_fail (decl
, "device doesn't match for host compiler");
211 /* Backends will check for vectorizable arguments/return types in a
212 target-specific way, but we can immediately filter out functions
213 that have implausible argument/return types. */
214 t
= TREE_TYPE (TREE_TYPE (decl
));
215 if (!plausible_type_for_simd_clone (t
))
216 return auto_simd_fail (decl
, "return type fails sniff test");
218 if (TYPE_ARG_TYPES (TREE_TYPE (decl
)))
220 for (tree temp
= TYPE_ARG_TYPES (TREE_TYPE (decl
));
221 temp
; temp
= TREE_CHAIN (temp
))
223 t
= TREE_VALUE (temp
);
224 if (!plausible_type_for_simd_clone (t
))
225 return auto_simd_fail (decl
, "argument type fails sniff test");
228 else if (DECL_ARGUMENTS (decl
))
230 for (tree temp
= DECL_ARGUMENTS (decl
); temp
; temp
= DECL_CHAIN (temp
))
232 t
= TREE_TYPE (temp
);
233 if (!plausible_type_for_simd_clone (t
))
234 return auto_simd_fail (decl
, "argument type fails sniff test");
238 return auto_simd_fail (decl
, "function has no arguments");
240 /* Scan the function body to see if it is suitable for SIMD-ization. */
243 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (decl
))
245 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
247 if (!auto_simd_check_stmt (gsi_stmt (gsi
), decl
))
253 fprintf (dump_file
, "\nMarking %s for auto-cloning\n",
254 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
)));
259 /* Return the number of elements in vector type VECTYPE, which is associated
260 with a SIMD clone. At present these always have a constant length. */
262 static unsigned HOST_WIDE_INT
263 simd_clone_subparts (tree vectype
)
265 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
268 /* Allocate a fresh `simd_clone' and return it. NARGS is the number
269 of arguments to reserve space for. */
271 static struct cgraph_simd_clone
*
272 simd_clone_struct_alloc (int nargs
)
274 struct cgraph_simd_clone
*clone_info
;
275 size_t len
= (sizeof (struct cgraph_simd_clone
)
276 + nargs
* sizeof (struct cgraph_simd_clone_arg
));
277 clone_info
= (struct cgraph_simd_clone
*)
278 ggc_internal_cleared_alloc (len
);
282 /* Make a copy of the `struct cgraph_simd_clone' in FROM to TO. */
285 simd_clone_struct_copy (struct cgraph_simd_clone
*to
,
286 struct cgraph_simd_clone
*from
)
288 memcpy (to
, from
, (sizeof (struct cgraph_simd_clone
)
289 + ((from
->nargs
- from
->inbranch
)
290 * sizeof (struct cgraph_simd_clone_arg
))));
293 /* Fill an empty vector ARGS with parameter types of function FNDECL. This
294 uses TYPE_ARG_TYPES if available, otherwise falls back to types of
295 DECL_ARGUMENTS types. */
298 simd_clone_vector_of_formal_parm_types (vec
<tree
> *args
, tree fndecl
)
300 if (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)))
302 push_function_arg_types (args
, TREE_TYPE (fndecl
));
305 push_function_arg_decls (args
, fndecl
);
308 FOR_EACH_VEC_ELT (*args
, i
, arg
)
309 (*args
)[i
] = TREE_TYPE ((*args
)[i
]);
312 /* Given a simd function in NODE, extract the simd specific
313 information from the OMP clauses passed in CLAUSES, and return
314 the struct cgraph_simd_clone * if it should be cloned. *INBRANCH_SPECIFIED
315 is set to TRUE if the `inbranch' or `notinbranch' clause specified,
316 otherwise set to FALSE. */
318 static struct cgraph_simd_clone
*
319 simd_clone_clauses_extract (struct cgraph_node
*node
, tree clauses
,
320 bool *inbranch_specified
)
323 simd_clone_vector_of_formal_parm_types (&args
, node
->decl
);
326 *inbranch_specified
= false;
329 if (n
> 0 && args
.last () == void_type_node
)
332 /* Allocate one more than needed just in case this is an in-branch
333 clone which will require a mask argument. */
334 struct cgraph_simd_clone
*clone_info
= simd_clone_struct_alloc (n
+ 1);
335 clone_info
->nargs
= n
;
340 clauses
= TREE_VALUE (clauses
);
341 if (!clauses
|| TREE_CODE (clauses
) != OMP_CLAUSE
)
344 for (t
= clauses
; t
; t
= OMP_CLAUSE_CHAIN (t
))
346 switch (OMP_CLAUSE_CODE (t
))
348 case OMP_CLAUSE_INBRANCH
:
349 clone_info
->inbranch
= 1;
350 *inbranch_specified
= true;
352 case OMP_CLAUSE_NOTINBRANCH
:
353 clone_info
->inbranch
= 0;
354 *inbranch_specified
= true;
356 case OMP_CLAUSE_SIMDLEN
:
358 = TREE_INT_CST_LOW (OMP_CLAUSE_SIMDLEN_EXPR (t
));
360 case OMP_CLAUSE_LINEAR
:
362 tree decl
= OMP_CLAUSE_DECL (t
);
363 tree step
= OMP_CLAUSE_LINEAR_STEP (t
);
364 int argno
= TREE_INT_CST_LOW (decl
);
365 if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (t
))
367 enum cgraph_simd_clone_arg_type arg_type
;
368 if (TREE_CODE (args
[argno
]) == REFERENCE_TYPE
)
369 switch (OMP_CLAUSE_LINEAR_KIND (t
))
371 case OMP_CLAUSE_LINEAR_REF
:
373 = SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
;
375 case OMP_CLAUSE_LINEAR_UVAL
:
377 = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
;
379 case OMP_CLAUSE_LINEAR_VAL
:
380 case OMP_CLAUSE_LINEAR_DEFAULT
:
382 = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
;
388 arg_type
= SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
;
389 clone_info
->args
[argno
].arg_type
= arg_type
;
390 clone_info
->args
[argno
].linear_step
= tree_to_shwi (step
);
391 gcc_assert (clone_info
->args
[argno
].linear_step
>= 0
392 && clone_info
->args
[argno
].linear_step
< n
);
396 if (POINTER_TYPE_P (args
[argno
]))
397 step
= fold_convert (ssizetype
, step
);
398 if (!tree_fits_shwi_p (step
))
400 warning_at (OMP_CLAUSE_LOCATION (t
), 0,
401 "ignoring large linear step");
404 else if (integer_zerop (step
))
406 warning_at (OMP_CLAUSE_LOCATION (t
), 0,
407 "ignoring zero linear step");
412 enum cgraph_simd_clone_arg_type arg_type
;
413 if (TREE_CODE (args
[argno
]) == REFERENCE_TYPE
)
414 switch (OMP_CLAUSE_LINEAR_KIND (t
))
416 case OMP_CLAUSE_LINEAR_REF
:
418 = SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
;
420 case OMP_CLAUSE_LINEAR_UVAL
:
422 = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
;
424 case OMP_CLAUSE_LINEAR_VAL
:
425 case OMP_CLAUSE_LINEAR_DEFAULT
:
427 = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
;
433 arg_type
= SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
;
434 clone_info
->args
[argno
].arg_type
= arg_type
;
435 clone_info
->args
[argno
].linear_step
= tree_to_shwi (step
);
440 case OMP_CLAUSE_UNIFORM
:
442 tree decl
= OMP_CLAUSE_DECL (t
);
443 int argno
= tree_to_uhwi (decl
);
444 clone_info
->args
[argno
].arg_type
445 = SIMD_CLONE_ARG_TYPE_UNIFORM
;
448 case OMP_CLAUSE_ALIGNED
:
450 /* Ignore aligned (x) for declare simd, for the ABI we really
451 need an alignment specified. */
452 if (OMP_CLAUSE_ALIGNED_ALIGNMENT (t
) == NULL_TREE
)
454 tree decl
= OMP_CLAUSE_DECL (t
);
455 int argno
= tree_to_uhwi (decl
);
456 clone_info
->args
[argno
].alignment
457 = TREE_INT_CST_LOW (OMP_CLAUSE_ALIGNED_ALIGNMENT (t
));
466 if (TYPE_ATOMIC (TREE_TYPE (TREE_TYPE (node
->decl
))))
468 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
469 "ignoring %<#pragma omp declare simd%> on function "
470 "with %<_Atomic%> qualified return type");
474 for (unsigned int argno
= 0; argno
< clone_info
->nargs
; argno
++)
475 if (TYPE_ATOMIC (args
[argno
])
476 && clone_info
->args
[argno
].arg_type
!= SIMD_CLONE_ARG_TYPE_UNIFORM
)
478 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
479 "ignoring %<#pragma omp declare simd%> on function "
480 "with %<_Atomic%> qualified non-%<uniform%> argument");
488 /* Given a SIMD clone in NODE, calculate the characteristic data
489 type and return the coresponding type. The characteristic data
490 type is computed as described in the Intel Vector ABI. */
493 simd_clone_compute_base_data_type (struct cgraph_node
*node
,
494 struct cgraph_simd_clone
*clone_info
)
496 tree type
= integer_type_node
;
497 tree fndecl
= node
->decl
;
499 /* a) For non-void function, the characteristic data type is the
501 if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl
))) != VOID_TYPE
)
502 type
= TREE_TYPE (TREE_TYPE (fndecl
));
504 /* b) If the function has any non-uniform, non-linear parameters,
505 then the characteristic data type is the type of the first
510 simd_clone_vector_of_formal_parm_types (&map
, fndecl
);
511 for (unsigned int i
= 0; i
< clone_info
->nargs
; ++i
)
512 if (clone_info
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
519 /* c) If the characteristic data type determined by a) or b) above
520 is struct, union, or class type which is pass-by-value (except
521 for the type that maps to the built-in complex data type), the
522 characteristic data type is int. */
523 if (RECORD_OR_UNION_TYPE_P (type
)
524 && !aggregate_value_p (type
, NULL
)
525 && TREE_CODE (type
) != COMPLEX_TYPE
)
526 return integer_type_node
;
528 /* d) If none of the above three classes is applicable, the
529 characteristic data type is int. */
533 /* e) For Intel Xeon Phi native and offload compilation, if the
534 resulting characteristic data type is 8-bit or 16-bit integer
535 data type, the characteristic data type is int. */
536 /* Well, we don't handle Xeon Phi yet. */
540 simd_clone_mangle (struct cgraph_node
*node
,
541 struct cgraph_simd_clone
*clone_info
)
543 char vecsize_mangle
= clone_info
->vecsize_mangle
;
544 char mask
= clone_info
->inbranch
? 'M' : 'N';
545 poly_uint64 simdlen
= clone_info
->simdlen
;
549 gcc_assert (vecsize_mangle
&& maybe_ne (simdlen
, 0U));
551 pp_string (&pp
, "_ZGV");
552 pp_character (&pp
, vecsize_mangle
);
553 pp_character (&pp
, mask
);
554 /* For now, simdlen is always constant, while variable simdlen pp 'n'. */
555 unsigned int len
= simdlen
.to_constant ();
556 pp_decimal_int (&pp
, (len
));
558 for (n
= 0; n
< clone_info
->nargs
; ++n
)
560 struct cgraph_simd_clone_arg arg
= clone_info
->args
[n
];
562 switch (arg
.arg_type
)
564 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
565 pp_character (&pp
, 'u');
567 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
568 pp_character (&pp
, 'l');
570 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
571 pp_character (&pp
, 'R');
573 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
574 pp_character (&pp
, 'L');
576 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
577 pp_character (&pp
, 'U');
580 gcc_assert (arg
.linear_step
!= 0);
581 if (arg
.linear_step
> 1)
582 pp_unsigned_wide_integer (&pp
, arg
.linear_step
);
583 else if (arg
.linear_step
< 0)
585 pp_character (&pp
, 'n');
586 pp_unsigned_wide_integer (&pp
, (-(unsigned HOST_WIDE_INT
)
590 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
591 pp_string (&pp
, "ls");
592 pp_unsigned_wide_integer (&pp
, arg
.linear_step
);
594 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
595 pp_string (&pp
, "Rs");
596 pp_unsigned_wide_integer (&pp
, arg
.linear_step
);
598 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
599 pp_string (&pp
, "Ls");
600 pp_unsigned_wide_integer (&pp
, arg
.linear_step
);
602 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
603 pp_string (&pp
, "Us");
604 pp_unsigned_wide_integer (&pp
, arg
.linear_step
);
607 pp_character (&pp
, 'v');
611 pp_character (&pp
, 'a');
612 pp_decimal_int (&pp
, arg
.alignment
);
617 const char *str
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node
->decl
));
620 pp_string (&pp
, str
);
621 str
= pp_formatted_text (&pp
);
623 /* If there already is a SIMD clone with the same mangled name, don't
624 add another one. This can happen e.g. for
625 #pragma omp declare simd
626 #pragma omp declare simd simdlen(8)
628 if the simdlen is assumed to be 8 for the first one, etc. */
629 for (struct cgraph_node
*clone
= node
->simd_clones
; clone
;
630 clone
= clone
->simdclone
->next_clone
)
631 if (id_equal (DECL_ASSEMBLER_NAME (clone
->decl
), str
))
634 return get_identifier (str
);
637 /* Create a simd clone of OLD_NODE and return it. If FORCE_LOCAL is true,
638 create it as a local symbol, otherwise copy the symbol linkage and
639 visibility attributes from OLD_NODE. */
641 static struct cgraph_node
*
642 simd_clone_create (struct cgraph_node
*old_node
, bool force_local
)
644 struct cgraph_node
*new_node
;
645 if (old_node
->definition
)
647 if (!old_node
->has_gimple_body_p ())
649 old_node
->get_body ();
650 new_node
= old_node
->create_version_clone_with_body (vNULL
, NULL
, NULL
,
656 tree old_decl
= old_node
->decl
;
657 tree new_decl
= copy_node (old_node
->decl
);
658 DECL_NAME (new_decl
) = clone_function_name_numbered (old_decl
,
660 SET_DECL_ASSEMBLER_NAME (new_decl
, DECL_NAME (new_decl
));
661 SET_DECL_RTL (new_decl
, NULL
);
662 DECL_STATIC_CONSTRUCTOR (new_decl
) = 0;
663 DECL_STATIC_DESTRUCTOR (new_decl
) = 0;
664 new_node
= old_node
->create_version_clone (new_decl
, vNULL
, NULL
);
665 if (old_node
->in_other_partition
)
666 new_node
->in_other_partition
= 1;
668 if (new_node
== NULL
)
671 set_decl_built_in_function (new_node
->decl
, NOT_BUILT_IN
, 0);
674 TREE_PUBLIC (new_node
->decl
) = 0;
675 DECL_COMDAT (new_node
->decl
) = 0;
676 DECL_WEAK (new_node
->decl
) = 0;
677 DECL_EXTERNAL (new_node
->decl
) = 0;
678 DECL_VISIBILITY_SPECIFIED (new_node
->decl
) = 0;
679 DECL_VISIBILITY (new_node
->decl
) = VISIBILITY_DEFAULT
;
680 DECL_DLLIMPORT_P (new_node
->decl
) = 0;
684 TREE_PUBLIC (new_node
->decl
) = TREE_PUBLIC (old_node
->decl
);
685 DECL_COMDAT (new_node
->decl
) = DECL_COMDAT (old_node
->decl
);
686 DECL_WEAK (new_node
->decl
) = DECL_WEAK (old_node
->decl
);
687 DECL_EXTERNAL (new_node
->decl
) = DECL_EXTERNAL (old_node
->decl
);
688 DECL_VISIBILITY_SPECIFIED (new_node
->decl
)
689 = DECL_VISIBILITY_SPECIFIED (old_node
->decl
);
690 DECL_VISIBILITY (new_node
->decl
) = DECL_VISIBILITY (old_node
->decl
);
691 DECL_DLLIMPORT_P (new_node
->decl
) = DECL_DLLIMPORT_P (old_node
->decl
);
692 if (DECL_ONE_ONLY (old_node
->decl
))
693 make_decl_one_only (new_node
->decl
,
694 DECL_ASSEMBLER_NAME (new_node
->decl
));
696 /* The method cgraph_version_clone_with_body () will force the new
697 symbol local. Undo this, and inherit external visibility from
699 new_node
->local
= old_node
->local
;
700 new_node
->externally_visible
= old_node
->externally_visible
;
701 new_node
->calls_declare_variant_alt
702 = old_node
->calls_declare_variant_alt
;
705 /* Mark clones with internal linkage as gc'able, so they will not be
706 emitted unless the vectorizer can actually use them. */
707 if (!TREE_PUBLIC (new_node
->decl
))
708 new_node
->gc_candidate
= true;
713 /* Adjust the return type of the given function to its appropriate
714 vector counterpart. Returns a simd array to be used throughout the
715 function as a return value. */
718 simd_clone_adjust_return_type (struct cgraph_node
*node
)
720 tree fndecl
= node
->decl
;
721 tree orig_rettype
= TREE_TYPE (TREE_TYPE (fndecl
));
725 /* Adjust the function return type. */
726 if (orig_rettype
== void_type_node
)
728 t
= TREE_TYPE (TREE_TYPE (fndecl
));
729 if (INTEGRAL_TYPE_P (t
) || POINTER_TYPE_P (t
))
730 veclen
= node
->simdclone
->vecsize_int
;
732 veclen
= node
->simdclone
->vecsize_float
;
733 if (known_eq (veclen
, 0U))
734 veclen
= node
->simdclone
->simdlen
;
736 veclen
= exact_div (veclen
, GET_MODE_BITSIZE (SCALAR_TYPE_MODE (t
)));
737 if (multiple_p (veclen
, node
->simdclone
->simdlen
))
738 veclen
= node
->simdclone
->simdlen
;
739 if (POINTER_TYPE_P (t
))
740 t
= pointer_sized_int_node
;
741 if (known_eq (veclen
, node
->simdclone
->simdlen
))
742 t
= build_vector_type (t
, node
->simdclone
->simdlen
);
745 t
= build_vector_type (t
, veclen
);
746 t
= build_array_type_nelts (t
, exact_div (node
->simdclone
->simdlen
,
749 TREE_TYPE (TREE_TYPE (fndecl
)) = t
;
750 if (!node
->definition
)
753 t
= DECL_RESULT (fndecl
);
754 /* Adjust the DECL_RESULT. */
755 gcc_assert (TREE_TYPE (t
) != void_type_node
);
756 TREE_TYPE (t
) = TREE_TYPE (TREE_TYPE (fndecl
));
759 tree atype
= build_array_type_nelts (orig_rettype
,
760 node
->simdclone
->simdlen
);
761 if (maybe_ne (veclen
, node
->simdclone
->simdlen
))
762 return build1 (VIEW_CONVERT_EXPR
, atype
, t
);
764 /* Set up a SIMD array to use as the return value. */
765 tree retval
= create_tmp_var_raw (atype
, "retval");
766 gimple_add_tmp_var (retval
);
770 /* Each vector argument has a corresponding array to be used locally
771 as part of the eventual loop. Create such temporary array and
774 PREFIX is the prefix to be used for the temporary.
776 TYPE is the inner element type.
778 SIMDLEN is the number of elements. */
781 create_tmp_simd_array (const char *prefix
, tree type
, poly_uint64 simdlen
)
783 tree atype
= build_array_type_nelts (type
, simdlen
);
784 tree avar
= create_tmp_var_raw (atype
, prefix
);
785 gimple_add_tmp_var (avar
);
789 /* Modify the function argument types to their corresponding vector
790 counterparts if appropriate. Also, create one array for each simd
791 argument to be used locally when using the function arguments as
794 NODE is the function whose arguments are to be adjusted.
796 If NODE does not represent function definition, returns NULL. Otherwise
797 returns an adjustment class that will be filled describing how the argument
798 declarations will be remapped. New arguments which are not to be remapped
799 are marked with USER_FLAG. */
801 static ipa_param_body_adjustments
*
802 simd_clone_adjust_argument_types (struct cgraph_node
*node
)
806 if (node
->definition
)
807 push_function_arg_decls (&args
, node
->decl
);
809 simd_clone_vector_of_formal_parm_types (&args
, node
->decl
);
810 struct cgraph_simd_clone
*sc
= node
->simdclone
;
811 vec
<ipa_adjusted_param
, va_gc
> *new_params
= NULL
;
812 vec_safe_reserve (new_params
, sc
->nargs
);
816 for (i
= 0; i
< sc
->nargs
; ++i
)
818 ipa_adjusted_param adj
;
819 memset (&adj
, 0, sizeof (adj
));
821 tree parm_type
= node
->definition
? TREE_TYPE (parm
) : parm
;
823 adj
.prev_clone_index
= i
;
825 sc
->args
[i
].orig_arg
= node
->definition
? parm
: NULL_TREE
;
826 sc
->args
[i
].orig_type
= parm_type
;
828 switch (sc
->args
[i
].arg_type
)
831 /* No adjustment necessary for scalar arguments. */
832 adj
.op
= IPA_PARAM_OP_COPY
;
834 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
835 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
836 if (node
->definition
)
837 sc
->args
[i
].simd_array
838 = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm
)),
839 TREE_TYPE (parm_type
),
841 adj
.op
= IPA_PARAM_OP_COPY
;
843 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
844 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
845 case SIMD_CLONE_ARG_TYPE_VECTOR
:
846 if (INTEGRAL_TYPE_P (parm_type
) || POINTER_TYPE_P (parm_type
))
847 veclen
= sc
->vecsize_int
;
849 veclen
= sc
->vecsize_float
;
850 if (known_eq (veclen
, 0U))
851 veclen
= sc
->simdlen
;
855 GET_MODE_BITSIZE (SCALAR_TYPE_MODE (parm_type
)));
856 if (multiple_p (veclen
, sc
->simdlen
))
857 veclen
= sc
->simdlen
;
858 adj
.op
= IPA_PARAM_OP_NEW
;
859 adj
.param_prefix_index
= IPA_PARAM_PREFIX_SIMD
;
860 if (POINTER_TYPE_P (parm_type
))
861 adj
.type
= build_vector_type (pointer_sized_int_node
, veclen
);
863 adj
.type
= build_vector_type (parm_type
, veclen
);
864 sc
->args
[i
].vector_type
= adj
.type
;
865 k
= vector_unroll_factor (sc
->simdlen
, veclen
);
866 for (j
= 1; j
< k
; j
++)
868 vec_safe_push (new_params
, adj
);
871 memset (&adj
, 0, sizeof (adj
));
872 adj
.op
= IPA_PARAM_OP_NEW
;
874 adj
.param_prefix_index
= IPA_PARAM_PREFIX_SIMD
;
876 adj
.prev_clone_index
= i
;
877 adj
.type
= sc
->args
[i
].vector_type
;
881 if (node
->definition
)
882 sc
->args
[i
].simd_array
883 = create_tmp_simd_array (DECL_NAME (parm
)
884 ? IDENTIFIER_POINTER (DECL_NAME (parm
))
885 : NULL
, parm_type
, sc
->simdlen
);
887 vec_safe_push (new_params
, adj
);
892 tree base_type
= simd_clone_compute_base_data_type (sc
->origin
, sc
);
893 ipa_adjusted_param adj
;
894 memset (&adj
, 0, sizeof (adj
));
895 adj
.op
= IPA_PARAM_OP_NEW
;
897 adj
.param_prefix_index
= IPA_PARAM_PREFIX_MASK
;
900 adj
.prev_clone_index
= i
;
901 if (INTEGRAL_TYPE_P (base_type
) || POINTER_TYPE_P (base_type
))
902 veclen
= sc
->vecsize_int
;
904 veclen
= sc
->vecsize_float
;
905 if (known_eq (veclen
, 0U))
906 veclen
= sc
->simdlen
;
908 veclen
= exact_div (veclen
,
909 GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type
)));
910 if (multiple_p (veclen
, sc
->simdlen
))
911 veclen
= sc
->simdlen
;
912 if (sc
->mask_mode
!= VOIDmode
)
914 = lang_hooks
.types
.type_for_mode (sc
->mask_mode
, 1);
915 else if (POINTER_TYPE_P (base_type
))
916 adj
.type
= build_vector_type (pointer_sized_int_node
, veclen
);
918 adj
.type
= build_vector_type (base_type
, veclen
);
919 vec_safe_push (new_params
, adj
);
921 k
= vector_unroll_factor (sc
->simdlen
, veclen
);
922 for (j
= 1; j
< k
; j
++)
923 vec_safe_push (new_params
, adj
);
925 /* We have previously allocated one extra entry for the mask. Use
928 if (sc
->mask_mode
!= VOIDmode
)
929 base_type
= boolean_type_node
;
930 if (node
->definition
)
933 = build_decl (UNKNOWN_LOCATION
, PARM_DECL
, NULL
, base_type
);
934 if (sc
->mask_mode
== VOIDmode
)
935 sc
->args
[i
].simd_array
936 = create_tmp_simd_array ("mask", base_type
, sc
->simdlen
);
938 sc
->args
[i
].simd_array
939 = create_tmp_simd_array ("mask", adj
.type
, k
);
941 sc
->args
[i
].simd_array
= NULL_TREE
;
943 sc
->args
[i
].orig_type
= base_type
;
944 sc
->args
[i
].arg_type
= SIMD_CLONE_ARG_TYPE_MASK
;
945 sc
->args
[i
].vector_type
= adj
.type
;
948 if (node
->definition
)
950 ipa_param_body_adjustments
*adjustments
951 = new ipa_param_body_adjustments (new_params
, node
->decl
);
953 adjustments
->modify_formal_parameters ();
958 tree new_arg_types
= NULL_TREE
, new_reversed
;
959 bool last_parm_void
= false;
960 if (args
.length () > 0 && args
.last () == void_type_node
)
961 last_parm_void
= true;
963 gcc_assert (TYPE_ARG_TYPES (TREE_TYPE (node
->decl
)));
964 j
= vec_safe_length (new_params
);
965 for (i
= 0; i
< j
; i
++)
967 struct ipa_adjusted_param
*adj
= &(*new_params
)[i
];
969 if (adj
->op
== IPA_PARAM_OP_COPY
)
970 ptype
= args
[adj
->base_index
];
973 new_arg_types
= tree_cons (NULL_TREE
, ptype
, new_arg_types
);
975 new_reversed
= nreverse (new_arg_types
);
979 TREE_CHAIN (new_arg_types
) = void_list_node
;
981 new_reversed
= void_list_node
;
983 TYPE_ARG_TYPES (TREE_TYPE (node
->decl
)) = new_reversed
;
988 /* Initialize and copy the function arguments in NODE to their
989 corresponding local simd arrays. Returns a fresh gimple_seq with
990 the instruction sequence generated. */
993 simd_clone_init_simd_arrays (struct cgraph_node
*node
,
994 ipa_param_body_adjustments
*adjustments
)
996 gimple_seq seq
= NULL
;
997 unsigned i
= 0, j
= 0, k
;
999 for (tree arg
= DECL_ARGUMENTS (node
->decl
);
1001 arg
= DECL_CHAIN (arg
), i
++, j
++)
1003 if ((*adjustments
->m_adj_params
)[j
].op
== IPA_PARAM_OP_COPY
1004 || POINTER_TYPE_P (TREE_TYPE (arg
)))
1007 node
->simdclone
->args
[i
].vector_arg
= arg
;
1009 tree array
= node
->simdclone
->args
[i
].simd_array
;
1010 if (node
->simdclone
->mask_mode
!= VOIDmode
1011 && node
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
)
1013 if (array
== NULL_TREE
)
1016 = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (array
))));
1017 for (k
= 0; k
<= l
; k
++)
1021 arg
= DECL_CHAIN (arg
);
1024 tree t
= build4 (ARRAY_REF
, TREE_TYPE (TREE_TYPE (array
)),
1025 array
, size_int (k
), NULL
, NULL
);
1026 t
= build2 (MODIFY_EXPR
, TREE_TYPE (t
), t
, arg
);
1027 gimplify_and_add (t
, &seq
);
1031 if (known_eq (simd_clone_subparts (TREE_TYPE (arg
)),
1032 node
->simdclone
->simdlen
))
1034 tree ptype
= build_pointer_type (TREE_TYPE (TREE_TYPE (array
)));
1035 tree ptr
= build_fold_addr_expr (array
);
1036 tree t
= build2 (MEM_REF
, TREE_TYPE (arg
), ptr
,
1037 build_int_cst (ptype
, 0));
1038 t
= build2 (MODIFY_EXPR
, TREE_TYPE (t
), t
, arg
);
1039 gimplify_and_add (t
, &seq
);
1043 unsigned int simdlen
= simd_clone_subparts (TREE_TYPE (arg
));
1044 unsigned int times
= vector_unroll_factor (node
->simdclone
->simdlen
,
1046 tree ptype
= build_pointer_type (TREE_TYPE (TREE_TYPE (array
)));
1047 for (k
= 0; k
< times
; k
++)
1049 tree ptr
= build_fold_addr_expr (array
);
1053 arg
= DECL_CHAIN (arg
);
1056 tree elemtype
= TREE_TYPE (TREE_TYPE (arg
));
1057 elemsize
= GET_MODE_SIZE (SCALAR_TYPE_MODE (elemtype
));
1058 tree t
= build2 (MEM_REF
, TREE_TYPE (arg
), ptr
,
1059 build_int_cst (ptype
, k
* elemsize
* simdlen
));
1060 t
= build2 (MODIFY_EXPR
, TREE_TYPE (t
), t
, arg
);
1061 gimplify_and_add (t
, &seq
);
1068 /* Callback info for ipa_simd_modify_stmt_ops below. */
1070 struct modify_stmt_info
{
1071 ipa_param_body_adjustments
*adjustments
;
1074 /* True if the parent statement was modified by
1075 ipa_simd_modify_stmt_ops. */
1079 /* Callback for walk_gimple_op.
1081 Adjust operands from a given statement as specified in the
1082 adjustments vector in the callback data. */
1085 ipa_simd_modify_stmt_ops (tree
*tp
, int *walk_subtrees
, void *data
)
1087 struct walk_stmt_info
*wi
= (struct walk_stmt_info
*) data
;
1088 struct modify_stmt_info
*info
= (struct modify_stmt_info
*) wi
->info
;
1090 if (TREE_CODE (*tp
) == ADDR_EXPR
)
1091 tp
= &TREE_OPERAND (*tp
, 0);
1093 if (TREE_CODE (*tp
) == BIT_FIELD_REF
1094 || TREE_CODE (*tp
) == IMAGPART_EXPR
1095 || TREE_CODE (*tp
) == REALPART_EXPR
)
1096 tp
= &TREE_OPERAND (*tp
, 0);
1098 tree repl
= NULL_TREE
;
1099 ipa_param_body_replacement
*pbr
= NULL
;
1101 if (TREE_CODE (*tp
) == PARM_DECL
)
1103 pbr
= info
->adjustments
->get_expr_replacement (*tp
, true);
1107 else if (TYPE_P (*tp
))
1111 repl
= unshare_expr (repl
);
1117 bool modified
= info
->modified
;
1118 info
->modified
= false;
1119 walk_tree (tp
, ipa_simd_modify_stmt_ops
, wi
, wi
->pset
);
1120 if (!info
->modified
)
1122 info
->modified
= modified
;
1125 info
->modified
= modified
;
1134 if (gimple_code (info
->stmt
) == GIMPLE_PHI
1136 && TREE_CODE (*orig_tp
) == ADDR_EXPR
1137 && TREE_CODE (TREE_OPERAND (*orig_tp
, 0)) == PARM_DECL
1140 gcc_assert (TREE_CODE (pbr
->dummy
) == SSA_NAME
);
1141 *orig_tp
= pbr
->dummy
;
1142 info
->modified
= true;
1146 repl
= build_fold_addr_expr (repl
);
1148 if (is_gimple_debug (info
->stmt
))
1150 tree vexpr
= build_debug_expr_decl (TREE_TYPE (repl
));
1151 stmt
= gimple_build_debug_source_bind (vexpr
, repl
, NULL
);
1156 stmt
= gimple_build_assign (make_ssa_name (TREE_TYPE (repl
)), repl
);
1157 repl
= gimple_assign_lhs (stmt
);
1159 gimple_stmt_iterator gsi
;
1160 if (gimple_code (info
->stmt
) == GIMPLE_PHI
)
1162 if (info
->after_stmt
)
1163 gsi
= gsi_for_stmt (info
->after_stmt
);
1165 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1166 /* Cache SSA_NAME for next time. */
1168 && TREE_CODE (*orig_tp
) == ADDR_EXPR
1169 && TREE_CODE (TREE_OPERAND (*orig_tp
, 0)) == PARM_DECL
)
1171 gcc_assert (!pbr
->dummy
);
1176 gsi
= gsi_for_stmt (info
->stmt
);
1177 if (info
->after_stmt
)
1178 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
1180 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1181 if (gimple_code (info
->stmt
) == GIMPLE_PHI
)
1182 info
->after_stmt
= stmt
;
1185 else if (!useless_type_conversion_p (TREE_TYPE (*tp
), TREE_TYPE (repl
)))
1187 tree vce
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (*tp
), repl
);
1193 info
->modified
= true;
1197 /* Traverse the function body and perform all modifications as
1198 described in ADJUSTMENTS. At function return, ADJUSTMENTS will be
1199 modified such that the replacement/reduction value will now be an
1200 offset into the corresponding simd_array.
1202 This function will replace all function argument uses with their
1203 corresponding simd array elements, and ajust the return values
1207 ipa_simd_modify_function_body (struct cgraph_node
*node
,
1208 ipa_param_body_adjustments
*adjustments
,
1209 tree retval_array
, tree iter
)
1215 /* Register replacements for every function argument use to an offset into
1216 the corresponding simd_array. */
1217 for (i
= 0, j
= 0; i
< node
->simdclone
->nargs
; ++i
, ++j
)
1219 if (!node
->simdclone
->args
[i
].vector_arg
1220 || (*adjustments
->m_adj_params
)[j
].user_flag
)
1223 tree basetype
= TREE_TYPE (node
->simdclone
->args
[i
].orig_arg
);
1224 tree vectype
= TREE_TYPE (node
->simdclone
->args
[i
].vector_arg
);
1225 tree r
= build4 (ARRAY_REF
, basetype
, node
->simdclone
->args
[i
].simd_array
,
1226 iter
, NULL_TREE
, NULL_TREE
);
1227 adjustments
->register_replacement (&(*adjustments
->m_adj_params
)[j
], r
);
1229 if (multiple_p (node
->simdclone
->simdlen
, simd_clone_subparts (vectype
)))
1230 j
+= vector_unroll_factor (node
->simdclone
->simdlen
,
1231 simd_clone_subparts (vectype
)) - 1;
1233 adjustments
->sort_replacements ();
1236 FOR_EACH_SSA_NAME (i
, name
, cfun
)
1239 if (SSA_NAME_VAR (name
)
1240 && TREE_CODE (SSA_NAME_VAR (name
)) == PARM_DECL
1242 = adjustments
->get_replacement_ssa_base (SSA_NAME_VAR (name
))))
1244 if (SSA_NAME_IS_DEFAULT_DEF (name
))
1246 tree old_decl
= SSA_NAME_VAR (name
);
1247 bb
= single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
1248 gimple_stmt_iterator gsi
= gsi_after_labels (bb
);
1249 tree repl
= adjustments
->lookup_replacement (old_decl
, 0);
1250 gcc_checking_assert (repl
);
1251 repl
= unshare_expr (repl
);
1252 set_ssa_default_def (cfun
, old_decl
, NULL_TREE
);
1253 SET_SSA_NAME_VAR_OR_IDENTIFIER (name
, base_var
);
1254 SSA_NAME_IS_DEFAULT_DEF (name
) = 0;
1255 gimple
*stmt
= gimple_build_assign (name
, repl
);
1256 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1259 SET_SSA_NAME_VAR_OR_IDENTIFIER (name
, base_var
);
1263 struct modify_stmt_info info
;
1264 info
.adjustments
= adjustments
;
1266 FOR_EACH_BB_FN (bb
, DECL_STRUCT_FUNCTION (node
->decl
))
1268 gimple_stmt_iterator gsi
;
1270 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1272 gphi
*phi
= as_a
<gphi
*> (gsi_stmt (gsi
));
1273 int i
, n
= gimple_phi_num_args (phi
);
1275 info
.after_stmt
= NULL
;
1276 struct walk_stmt_info wi
;
1277 memset (&wi
, 0, sizeof (wi
));
1278 info
.modified
= false;
1280 for (i
= 0; i
< n
; ++i
)
1282 int walk_subtrees
= 1;
1283 tree arg
= gimple_phi_arg_def (phi
, i
);
1285 ipa_simd_modify_stmt_ops (&op
, &walk_subtrees
, &wi
);
1288 SET_PHI_ARG_DEF (phi
, i
, op
);
1289 gcc_assert (TREE_CODE (op
) == SSA_NAME
);
1290 if (gimple_phi_arg_edge (phi
, i
)->flags
& EDGE_ABNORMAL
)
1291 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op
) = 1;
1296 gsi
= gsi_start_bb (bb
);
1297 while (!gsi_end_p (gsi
))
1299 gimple
*stmt
= gsi_stmt (gsi
);
1301 info
.after_stmt
= NULL
;
1302 struct walk_stmt_info wi
;
1304 memset (&wi
, 0, sizeof (wi
));
1305 info
.modified
= false;
1307 walk_gimple_op (stmt
, ipa_simd_modify_stmt_ops
, &wi
);
1309 if (greturn
*return_stmt
= dyn_cast
<greturn
*> (stmt
))
1311 tree retval
= gimple_return_retval (return_stmt
);
1312 edge e
= find_edge (bb
, EXIT_BLOCK_PTR_FOR_FN (cfun
));
1313 e
->flags
|= EDGE_FALLTHRU
;
1316 gsi_remove (&gsi
, true);
1320 /* Replace `return foo' with `retval_array[iter] = foo'. */
1321 tree ref
= build4 (ARRAY_REF
, TREE_TYPE (retval
),
1322 retval_array
, iter
, NULL
, NULL
);
1323 stmt
= gimple_build_assign (ref
, retval
);
1324 gsi_replace (&gsi
, stmt
, true);
1325 info
.modified
= true;
1331 /* If the above changed the var of a debug bind into something
1332 different, remove the debug stmt. We could also for all the
1333 replaced parameters add VAR_DECLs for debug info purposes,
1334 add debug stmts for those to be the simd array accesses and
1335 replace debug stmt var operand with that var. Debugging of
1336 vectorized loops doesn't work too well, so don't bother for
1338 if ((gimple_debug_bind_p (stmt
)
1339 && !DECL_P (gimple_debug_bind_get_var (stmt
)))
1340 || (gimple_debug_source_bind_p (stmt
)
1341 && !DECL_P (gimple_debug_source_bind_get_var (stmt
))))
1343 gsi_remove (&gsi
, true);
1346 if (maybe_clean_eh_stmt (stmt
))
1347 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
1354 /* Helper function of simd_clone_adjust, return linear step addend
1358 simd_clone_linear_addend (struct cgraph_node
*node
, unsigned int i
,
1359 tree addtype
, basic_block entry_bb
)
1361 tree ptype
= NULL_TREE
;
1362 switch (node
->simdclone
->args
[i
].arg_type
)
1364 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
1365 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
1366 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
1367 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
1368 return build_int_cst (addtype
, node
->simdclone
->args
[i
].linear_step
);
1369 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
1370 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
1371 ptype
= TREE_TYPE (node
->simdclone
->args
[i
].orig_arg
);
1373 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
1374 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
1375 ptype
= TREE_TYPE (TREE_TYPE (node
->simdclone
->args
[i
].orig_arg
));
1381 unsigned int idx
= node
->simdclone
->args
[i
].linear_step
;
1382 tree arg
= node
->simdclone
->args
[idx
].orig_arg
;
1383 gcc_assert (is_gimple_reg_type (TREE_TYPE (arg
)));
1384 gimple_stmt_iterator gsi
= gsi_after_labels (entry_bb
);
1387 if (is_gimple_reg (arg
))
1388 ret
= get_or_create_ssa_default_def (cfun
, arg
);
1391 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (arg
)), arg
);
1392 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
1393 ret
= gimple_assign_lhs (g
);
1395 if (TREE_CODE (TREE_TYPE (arg
)) == REFERENCE_TYPE
)
1397 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (TREE_TYPE (arg
))),
1398 build_simple_mem_ref (ret
));
1399 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
1400 ret
= gimple_assign_lhs (g
);
1402 if (!useless_type_conversion_p (addtype
, TREE_TYPE (ret
)))
1404 g
= gimple_build_assign (make_ssa_name (addtype
), NOP_EXPR
, ret
);
1405 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
1406 ret
= gimple_assign_lhs (g
);
1408 if (POINTER_TYPE_P (ptype
))
1410 tree size
= TYPE_SIZE_UNIT (TREE_TYPE (ptype
));
1411 if (size
&& TREE_CODE (size
) == INTEGER_CST
)
1413 g
= gimple_build_assign (make_ssa_name (addtype
), MULT_EXPR
,
1414 ret
, fold_convert (addtype
, size
));
1415 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
1416 ret
= gimple_assign_lhs (g
);
1422 /* Adjust the argument types in NODE to their appropriate vector
1426 simd_clone_adjust (struct cgraph_node
*node
)
1428 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
1430 TREE_TYPE (node
->decl
) = build_distinct_type_copy (TREE_TYPE (node
->decl
));
1431 targetm
.simd_clone
.adjust (node
);
1433 tree retval
= simd_clone_adjust_return_type (node
);
1434 ipa_param_body_adjustments
*adjustments
1435 = simd_clone_adjust_argument_types (node
);
1436 gcc_assert (adjustments
);
1438 push_gimplify_context ();
1440 gimple_seq seq
= simd_clone_init_simd_arrays (node
, adjustments
);
1442 /* Adjust all uses of vector arguments accordingly. Adjust all
1443 return values accordingly. */
1444 tree iter
= create_tmp_var (unsigned_type_node
, "iter");
1445 tree iter1
= make_ssa_name (iter
);
1446 tree iter2
= NULL_TREE
;
1447 ipa_simd_modify_function_body (node
, adjustments
, retval
, iter1
);
1450 /* Initialize the iteration variable. */
1451 basic_block entry_bb
= single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
1452 basic_block body_bb
= split_block_after_labels (entry_bb
)->dest
;
1453 gimple_stmt_iterator gsi
= gsi_after_labels (entry_bb
);
1454 /* Insert the SIMD array and iv initialization at function
1456 gsi_insert_seq_before (&gsi
, seq
, GSI_NEW_STMT
);
1458 pop_gimplify_context (NULL
);
1461 basic_block incr_bb
= NULL
;
1462 class loop
*loop
= NULL
;
1464 /* Create a new BB right before the original exit BB, to hold the
1465 iteration increment and the condition/branch. */
1466 if (EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
))
1468 basic_block orig_exit
= EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun
), 0)->src
;
1469 incr_bb
= create_empty_bb (orig_exit
);
1470 incr_bb
->count
= profile_count::zero ();
1471 add_bb_to_loop (incr_bb
, body_bb
->loop_father
);
1472 while (EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
))
1474 edge e
= EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
1475 redirect_edge_succ (e
, incr_bb
);
1476 incr_bb
->count
+= e
->count ();
1479 else if (node
->simdclone
->inbranch
)
1481 incr_bb
= create_empty_bb (entry_bb
);
1482 incr_bb
->count
= profile_count::zero ();
1483 add_bb_to_loop (incr_bb
, body_bb
->loop_father
);
1488 make_single_succ_edge (incr_bb
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
1489 gsi
= gsi_last_bb (incr_bb
);
1490 iter2
= make_ssa_name (iter
);
1491 g
= gimple_build_assign (iter2
, PLUS_EXPR
, iter1
,
1492 build_int_cst (unsigned_type_node
, 1));
1493 gsi_insert_after (&gsi
, g
, GSI_CONTINUE_LINKING
);
1495 /* Mostly annotate the loop for the vectorizer (the rest is done
1497 loop
= alloc_loop ();
1498 cfun
->has_force_vectorize_loops
= true;
1499 /* For now, simlen is always constant. */
1500 loop
->safelen
= node
->simdclone
->simdlen
.to_constant ();
1501 loop
->force_vectorize
= true;
1502 loop
->header
= body_bb
;
1505 /* Branch around the body if the mask applies. */
1506 if (node
->simdclone
->inbranch
)
1508 gsi
= gsi_last_bb (loop
->header
);
1510 = node
->simdclone
->args
[node
->simdclone
->nargs
- 1].simd_array
;
1512 if (node
->simdclone
->mask_mode
!= VOIDmode
)
1515 if (mask_array
== NULL_TREE
)
1517 tree arg
= node
->simdclone
->args
[node
->simdclone
->nargs
1519 mask
= get_or_create_ssa_default_def (cfun
, arg
);
1524 tree maskt
= TREE_TYPE (mask_array
);
1525 int c
= tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (maskt
)));
1526 /* For now, c must be constant here. */
1527 c
= exact_div (node
->simdclone
->simdlen
, c
+ 1).to_constant ();
1528 int s
= exact_log2 (c
);
1531 tree idx
= make_ssa_name (TREE_TYPE (iter1
));
1532 g
= gimple_build_assign (idx
, RSHIFT_EXPR
, iter1
,
1533 build_int_cst (NULL_TREE
, s
));
1534 gsi_insert_after (&gsi
, g
, GSI_CONTINUE_LINKING
);
1535 mask
= make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array
)));
1536 tree aref
= build4 (ARRAY_REF
,
1537 TREE_TYPE (TREE_TYPE (mask_array
)),
1538 mask_array
, idx
, NULL
, NULL
);
1539 g
= gimple_build_assign (mask
, aref
);
1540 gsi_insert_after (&gsi
, g
, GSI_CONTINUE_LINKING
);
1541 shift_cnt
= make_ssa_name (TREE_TYPE (iter1
));
1542 g
= gimple_build_assign (shift_cnt
, BIT_AND_EXPR
, iter1
,
1543 build_int_cst (TREE_TYPE (iter1
), c
));
1544 gsi_insert_after (&gsi
, g
, GSI_CONTINUE_LINKING
);
1546 tree shift_cnt_conv
= shift_cnt
;
1547 if (!useless_type_conversion_p (TREE_TYPE (mask
),
1548 TREE_TYPE (shift_cnt
)))
1550 shift_cnt_conv
= make_ssa_name (TREE_TYPE (mask
));
1551 g
= gimple_build_assign (shift_cnt_conv
, NOP_EXPR
, shift_cnt
);
1552 gsi_insert_after (&gsi
, g
, GSI_CONTINUE_LINKING
);
1554 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (mask
)),
1555 RSHIFT_EXPR
, mask
, shift_cnt_conv
);
1556 gsi_insert_after (&gsi
, g
, GSI_CONTINUE_LINKING
);
1557 mask
= gimple_assign_lhs (g
);
1558 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (mask
)),
1560 build_int_cst (TREE_TYPE (mask
), 1));
1561 gsi_insert_after (&gsi
, g
, GSI_CONTINUE_LINKING
);
1562 mask
= gimple_assign_lhs (g
);
1566 mask
= make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array
)));
1567 tree aref
= build4 (ARRAY_REF
,
1568 TREE_TYPE (TREE_TYPE (mask_array
)),
1569 mask_array
, iter1
, NULL
, NULL
);
1570 g
= gimple_build_assign (mask
, aref
);
1571 gsi_insert_after (&gsi
, g
, GSI_CONTINUE_LINKING
);
1572 int bitsize
= GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (aref
)));
1573 if (!INTEGRAL_TYPE_P (TREE_TYPE (aref
)))
1575 aref
= build1 (VIEW_CONVERT_EXPR
,
1576 build_nonstandard_integer_type (bitsize
, 0),
1578 mask
= make_ssa_name (TREE_TYPE (aref
));
1579 g
= gimple_build_assign (mask
, aref
);
1580 gsi_insert_after (&gsi
, g
, GSI_CONTINUE_LINKING
);
1584 g
= gimple_build_cond (EQ_EXPR
, mask
, build_zero_cst (TREE_TYPE (mask
)),
1586 gsi_insert_after (&gsi
, g
, GSI_CONTINUE_LINKING
);
1587 edge e
= make_edge (loop
->header
, incr_bb
, EDGE_TRUE_VALUE
);
1588 e
->probability
= profile_probability::unlikely ().guessed ();
1589 incr_bb
->count
+= e
->count ();
1590 edge fallthru
= FALLTHRU_EDGE (loop
->header
);
1591 fallthru
->flags
= EDGE_FALSE_VALUE
;
1592 fallthru
->probability
= profile_probability::likely ().guessed ();
1595 basic_block latch_bb
= NULL
;
1596 basic_block new_exit_bb
= NULL
;
1598 /* Generate the condition. */
1601 gsi
= gsi_last_bb (incr_bb
);
1602 g
= gimple_build_cond (LT_EXPR
, iter2
,
1603 build_int_cst (unsigned_type_node
,
1604 node
->simdclone
->simdlen
),
1606 gsi_insert_after (&gsi
, g
, GSI_CONTINUE_LINKING
);
1607 edge e
= split_block (incr_bb
, gsi_stmt (gsi
));
1609 new_exit_bb
= split_block_after_labels (latch_bb
)->dest
;
1610 loop
->latch
= latch_bb
;
1612 redirect_edge_succ (FALLTHRU_EDGE (latch_bb
), body_bb
);
1614 edge new_e
= make_edge (incr_bb
, new_exit_bb
, EDGE_FALSE_VALUE
);
1616 /* FIXME: Do we need to distribute probabilities for the conditional? */
1617 new_e
->probability
= profile_probability::guessed_never ();
1618 /* The successor of incr_bb is already pointing to latch_bb; just
1620 make_edge (incr_bb, latch_bb, EDGE_TRUE_VALUE); */
1621 FALLTHRU_EDGE (incr_bb
)->flags
= EDGE_TRUE_VALUE
;
1624 gphi
*phi
= create_phi_node (iter1
, body_bb
);
1625 edge preheader_edge
= find_edge (entry_bb
, body_bb
);
1626 edge latch_edge
= NULL
;
1627 add_phi_arg (phi
, build_zero_cst (unsigned_type_node
), preheader_edge
,
1631 latch_edge
= single_succ_edge (latch_bb
);
1632 add_phi_arg (phi
, iter2
, latch_edge
, UNKNOWN_LOCATION
);
1634 /* Generate the new return. */
1635 gsi
= gsi_last_bb (new_exit_bb
);
1637 && TREE_CODE (retval
) == VIEW_CONVERT_EXPR
1638 && TREE_CODE (TREE_OPERAND (retval
, 0)) == RESULT_DECL
)
1639 retval
= TREE_OPERAND (retval
, 0);
1642 retval
= build1 (VIEW_CONVERT_EXPR
,
1643 TREE_TYPE (TREE_TYPE (node
->decl
)),
1645 retval
= force_gimple_operand_gsi (&gsi
, retval
, true, NULL
,
1646 false, GSI_CONTINUE_LINKING
);
1648 g
= gimple_build_return (retval
);
1649 gsi_insert_after (&gsi
, g
, GSI_CONTINUE_LINKING
);
1652 /* Handle aligned clauses by replacing default defs of the aligned
1653 uniform args with __builtin_assume_aligned (arg_N(D), alignment)
1654 lhs. Handle linear by adding PHIs. */
1655 for (unsigned i
= 0; i
< node
->simdclone
->nargs
; i
++)
1656 if (node
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_UNIFORM
1657 && (TREE_ADDRESSABLE (node
->simdclone
->args
[i
].orig_arg
)
1658 || !is_gimple_reg_type
1659 (TREE_TYPE (node
->simdclone
->args
[i
].orig_arg
))))
1661 tree orig_arg
= node
->simdclone
->args
[i
].orig_arg
;
1662 if (is_gimple_reg_type (TREE_TYPE (orig_arg
)))
1663 iter1
= make_ssa_name (TREE_TYPE (orig_arg
));
1666 iter1
= create_tmp_var_raw (TREE_TYPE (orig_arg
));
1667 gimple_add_tmp_var (iter1
);
1669 gsi
= gsi_after_labels (entry_bb
);
1670 g
= gimple_build_assign (iter1
, orig_arg
);
1671 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
1672 gsi
= gsi_after_labels (body_bb
);
1673 g
= gimple_build_assign (orig_arg
, iter1
);
1674 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
1676 else if (node
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_UNIFORM
1677 && DECL_BY_REFERENCE (node
->simdclone
->args
[i
].orig_arg
)
1678 && TREE_CODE (TREE_TYPE (node
->simdclone
->args
[i
].orig_arg
))
1681 (TREE_TYPE (TREE_TYPE (node
->simdclone
->args
[i
].orig_arg
))))
1683 tree orig_arg
= node
->simdclone
->args
[i
].orig_arg
;
1684 tree def
= ssa_default_def (cfun
, orig_arg
);
1685 if (def
&& !has_zero_uses (def
))
1687 iter1
= create_tmp_var_raw (TREE_TYPE (TREE_TYPE (orig_arg
)));
1688 gimple_add_tmp_var (iter1
);
1689 gsi
= gsi_after_labels (entry_bb
);
1690 g
= gimple_build_assign (iter1
, build_simple_mem_ref (def
));
1691 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
1692 gsi
= gsi_after_labels (body_bb
);
1693 g
= gimple_build_assign (build_simple_mem_ref (def
), iter1
);
1694 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
1697 else if (node
->simdclone
->args
[i
].alignment
1698 && node
->simdclone
->args
[i
].arg_type
1699 == SIMD_CLONE_ARG_TYPE_UNIFORM
1700 && (node
->simdclone
->args
[i
].alignment
1701 & (node
->simdclone
->args
[i
].alignment
- 1)) == 0
1702 && TREE_CODE (TREE_TYPE (node
->simdclone
->args
[i
].orig_arg
))
1705 unsigned int alignment
= node
->simdclone
->args
[i
].alignment
;
1706 tree orig_arg
= node
->simdclone
->args
[i
].orig_arg
;
1707 tree def
= ssa_default_def (cfun
, orig_arg
);
1708 if (def
&& !has_zero_uses (def
))
1710 tree fn
= builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED
);
1711 gimple_seq seq
= NULL
;
1712 bool need_cvt
= false;
1714 = gimple_build_call (fn
, 2, def
, size_int (alignment
));
1716 if (!useless_type_conversion_p (TREE_TYPE (orig_arg
),
1719 tree t
= make_ssa_name (need_cvt
? ptr_type_node
: orig_arg
);
1720 gimple_call_set_lhs (g
, t
);
1721 gimple_seq_add_stmt_without_update (&seq
, g
);
1724 t
= make_ssa_name (orig_arg
);
1725 g
= gimple_build_assign (t
, NOP_EXPR
, gimple_call_lhs (g
));
1726 gimple_seq_add_stmt_without_update (&seq
, g
);
1728 gsi_insert_seq_on_edge_immediate
1729 (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), seq
);
1731 entry_bb
= single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
1732 node
->create_edge (cgraph_node::get_create (fn
),
1733 call
, entry_bb
->count
);
1735 imm_use_iterator iter
;
1736 use_operand_p use_p
;
1738 tree repl
= gimple_get_lhs (g
);
1739 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, def
)
1740 if (is_gimple_debug (use_stmt
) || use_stmt
== call
)
1743 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
1744 SET_USE (use_p
, repl
);
1747 else if ((node
->simdclone
->args
[i
].arg_type
1748 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
1749 || (node
->simdclone
->args
[i
].arg_type
1750 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
)
1751 || (node
->simdclone
->args
[i
].arg_type
1752 == SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
)
1753 || (node
->simdclone
->args
[i
].arg_type
1754 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
))
1756 tree orig_arg
= node
->simdclone
->args
[i
].orig_arg
;
1757 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (orig_arg
))
1758 || POINTER_TYPE_P (TREE_TYPE (orig_arg
)));
1759 tree def
= NULL_TREE
;
1760 if (TREE_ADDRESSABLE (orig_arg
))
1762 def
= make_ssa_name (TREE_TYPE (orig_arg
));
1763 iter1
= make_ssa_name (TREE_TYPE (orig_arg
));
1765 iter2
= make_ssa_name (TREE_TYPE (orig_arg
));
1766 gsi
= gsi_after_labels (entry_bb
);
1767 g
= gimple_build_assign (def
, orig_arg
);
1768 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
1772 def
= ssa_default_def (cfun
, orig_arg
);
1773 if (!def
|| has_zero_uses (def
))
1777 iter1
= make_ssa_name (orig_arg
);
1779 iter2
= make_ssa_name (orig_arg
);
1784 phi
= create_phi_node (iter1
, body_bb
);
1785 add_phi_arg (phi
, def
, preheader_edge
, UNKNOWN_LOCATION
);
1788 add_phi_arg (phi
, iter2
, latch_edge
, UNKNOWN_LOCATION
);
1789 enum tree_code code
= INTEGRAL_TYPE_P (TREE_TYPE (orig_arg
))
1790 ? PLUS_EXPR
: POINTER_PLUS_EXPR
;
1791 tree addtype
= INTEGRAL_TYPE_P (TREE_TYPE (orig_arg
))
1792 ? TREE_TYPE (orig_arg
) : sizetype
;
1793 tree addcst
= simd_clone_linear_addend (node
, i
, addtype
,
1795 gsi
= gsi_last_bb (incr_bb
);
1796 g
= gimple_build_assign (iter2
, code
, iter1
, addcst
);
1797 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
1800 imm_use_iterator iter
;
1801 use_operand_p use_p
;
1803 if (TREE_ADDRESSABLE (orig_arg
))
1805 gsi
= gsi_after_labels (body_bb
);
1806 g
= gimple_build_assign (orig_arg
, iter1
);
1807 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
1810 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, def
)
1811 if (use_stmt
== phi
)
1814 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
1815 SET_USE (use_p
, iter1
);
1818 else if (node
->simdclone
->args
[i
].arg_type
1819 == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
1820 || (node
->simdclone
->args
[i
].arg_type
1821 == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
))
1823 tree orig_arg
= node
->simdclone
->args
[i
].orig_arg
;
1824 tree def
= ssa_default_def (cfun
, orig_arg
);
1825 gcc_assert (!TREE_ADDRESSABLE (orig_arg
)
1826 && TREE_CODE (TREE_TYPE (orig_arg
)) == REFERENCE_TYPE
);
1827 if (def
&& !has_zero_uses (def
))
1829 tree rtype
= TREE_TYPE (TREE_TYPE (orig_arg
));
1830 iter1
= make_ssa_name (orig_arg
);
1832 iter2
= make_ssa_name (orig_arg
);
1833 tree iter3
= make_ssa_name (rtype
);
1834 tree iter4
= make_ssa_name (rtype
);
1835 tree iter5
= incr_bb
? make_ssa_name (rtype
) : NULL_TREE
;
1836 gsi
= gsi_after_labels (entry_bb
);
1838 = gimple_build_assign (iter3
, build_simple_mem_ref (def
));
1839 gsi_insert_before (&gsi
, load
, GSI_NEW_STMT
);
1841 tree array
= node
->simdclone
->args
[i
].simd_array
;
1842 TREE_ADDRESSABLE (array
) = 1;
1843 tree ptr
= build_fold_addr_expr (array
);
1844 phi
= create_phi_node (iter1
, body_bb
);
1845 add_phi_arg (phi
, ptr
, preheader_edge
, UNKNOWN_LOCATION
);
1848 add_phi_arg (phi
, iter2
, latch_edge
, UNKNOWN_LOCATION
);
1849 g
= gimple_build_assign (iter2
, POINTER_PLUS_EXPR
, iter1
,
1850 TYPE_SIZE_UNIT (TREE_TYPE (iter3
)));
1851 gsi
= gsi_last_bb (incr_bb
);
1852 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
1855 phi
= create_phi_node (iter4
, body_bb
);
1856 add_phi_arg (phi
, iter3
, preheader_edge
, UNKNOWN_LOCATION
);
1859 add_phi_arg (phi
, iter5
, latch_edge
, UNKNOWN_LOCATION
);
1860 enum tree_code code
= INTEGRAL_TYPE_P (TREE_TYPE (iter3
))
1861 ? PLUS_EXPR
: POINTER_PLUS_EXPR
;
1862 tree addtype
= INTEGRAL_TYPE_P (TREE_TYPE (iter3
))
1863 ? TREE_TYPE (iter3
) : sizetype
;
1864 tree addcst
= simd_clone_linear_addend (node
, i
, addtype
,
1866 g
= gimple_build_assign (iter5
, code
, iter4
, addcst
);
1867 gsi
= gsi_last_bb (incr_bb
);
1868 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
1871 g
= gimple_build_assign (build_simple_mem_ref (iter1
), iter4
);
1872 gsi
= gsi_after_labels (body_bb
);
1873 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
1875 imm_use_iterator iter
;
1876 use_operand_p use_p
;
1878 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, def
)
1879 if (use_stmt
== load
)
1882 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
1883 SET_USE (use_p
, iter1
);
1885 if (!TYPE_READONLY (rtype
) && incr_bb
)
1887 tree v
= make_ssa_name (rtype
);
1888 tree aref
= build4 (ARRAY_REF
, rtype
, array
,
1889 size_zero_node
, NULL_TREE
,
1891 gsi
= gsi_after_labels (new_exit_bb
);
1892 g
= gimple_build_assign (v
, aref
);
1893 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
1894 g
= gimple_build_assign (build_simple_mem_ref (def
), v
);
1895 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
1900 calculate_dominance_info (CDI_DOMINATORS
);
1902 add_loop (loop
, loop
->header
->loop_father
);
1903 update_ssa (TODO_update_ssa
);
1908 /* If the function in NODE is tagged as an elemental SIMD function,
1909 create the appropriate SIMD clones. */
1912 expand_simd_clones (struct cgraph_node
*node
)
1915 bool explicit_p
= true;
1917 if (node
->inlined_to
1918 || lookup_attribute ("noclone", DECL_ATTRIBUTES (node
->decl
)))
1921 attr
= lookup_attribute ("omp declare simd",
1922 DECL_ATTRIBUTES (node
->decl
));
1924 /* See if we can add an "omp declare simd" directive implicitly
1925 before giving up. */
1926 /* FIXME: OpenACC "#pragma acc routine" translates into
1927 "omp declare target", but appears also to have some other effects
1928 that conflict with generating SIMD clones, causing ICEs. So don't
1929 do this if we've got OpenACC instead of OpenMP. */
1930 if (attr
== NULL_TREE
1931 #ifdef ACCEL_COMPILER
1932 && (flag_openmp_target_simd_clone
== OMP_TARGET_SIMD_CLONE_ANY
1933 || flag_openmp_target_simd_clone
== OMP_TARGET_SIMD_CLONE_NOHOST
)
1935 && (flag_openmp_target_simd_clone
== OMP_TARGET_SIMD_CLONE_ANY
1936 || flag_openmp_target_simd_clone
== OMP_TARGET_SIMD_CLONE_HOST
)
1938 && !oacc_get_fn_attrib (node
->decl
)
1939 && ok_for_auto_simd_clone (node
))
1941 attr
= tree_cons (get_identifier ("omp declare simd"), NULL
,
1942 DECL_ATTRIBUTES (node
->decl
));
1943 DECL_ATTRIBUTES (node
->decl
) = attr
;
1947 if (attr
== NULL_TREE
)
1951 #pragma omp declare simd
1953 in C, there we don't know the argument types at all. */
1954 if (!node
->definition
1955 && TYPE_ARG_TYPES (TREE_TYPE (node
->decl
)) == NULL_TREE
)
1958 /* Call this before creating clone_info, as it might ggc_collect. */
1959 if (node
->definition
&& node
->has_gimple_body_p ())
1964 /* Start with parsing the "omp declare simd" attribute(s). */
1965 bool inbranch_clause_specified
;
1966 struct cgraph_simd_clone
*clone_info
1967 = simd_clone_clauses_extract (node
, TREE_VALUE (attr
),
1968 &inbranch_clause_specified
);
1969 if (clone_info
== NULL
)
1972 poly_uint64 orig_simdlen
= clone_info
->simdlen
;
1973 tree base_type
= simd_clone_compute_base_data_type (node
, clone_info
);
1975 /* The target can return 0 (no simd clones should be created),
1976 1 (just one ISA of simd clones should be created) or higher
1977 count of ISA variants. In that case, clone_info is initialized
1978 for the first ISA variant. */
1980 = targetm
.simd_clone
.compute_vecsize_and_simdlen (node
, clone_info
,
1986 /* Loop over all COUNT ISA variants, and if !INBRANCH_CLAUSE_SPECIFIED,
1987 also create one inbranch and one !inbranch clone of it. */
1988 for (int i
= 0; i
< count
* 2; i
++)
1990 struct cgraph_simd_clone
*clone
= clone_info
;
1991 if (inbranch_clause_specified
&& (i
& 1) != 0)
1996 clone
= simd_clone_struct_alloc (clone_info
->nargs
1998 simd_clone_struct_copy (clone
, clone_info
);
1999 /* Undo changes targetm.simd_clone.compute_vecsize_and_simdlen
2000 and simd_clone_adjust_argument_types did to the first
2002 clone
->nargs
-= clone_info
->inbranch
;
2003 clone
->simdlen
= orig_simdlen
;
2004 /* And call the target hook again to get the right ISA. */
2005 targetm
.simd_clone
.compute_vecsize_and_simdlen (node
, clone
,
2010 clone
->inbranch
= 1;
2013 /* simd_clone_mangle might fail if such a clone has been created
2015 tree id
= simd_clone_mangle (node
, clone
);
2016 if (id
== NULL_TREE
)
2019 clone
->nargs
+= clone
->inbranch
;
2023 /* Only when we are sure we want to create the clone actually
2024 clone the function (or definitions) or create another
2025 extern FUNCTION_DECL (for prototypes without definitions). */
2026 struct cgraph_node
*n
= simd_clone_create (node
, !explicit_p
);
2030 clone
->nargs
+= clone
->inbranch
;
2034 n
->simdclone
= clone
;
2035 clone
->origin
= node
;
2036 clone
->next_clone
= NULL
;
2037 if (node
->simd_clones
== NULL
)
2039 clone
->prev_clone
= n
;
2040 node
->simd_clones
= n
;
2044 clone
->prev_clone
= node
->simd_clones
->simdclone
->prev_clone
;
2045 clone
->prev_clone
->simdclone
->next_clone
= n
;
2046 node
->simd_clones
->simdclone
->prev_clone
= n
;
2048 symtab
->change_decl_assembler_name (n
->decl
, id
);
2049 /* And finally adjust the return type, parameters and for
2050 definitions also function body. */
2051 if (node
->definition
)
2052 simd_clone_adjust (n
);
2056 = build_distinct_type_copy (TREE_TYPE (n
->decl
));
2057 targetm
.simd_clone
.adjust (n
);
2058 simd_clone_adjust_return_type (n
);
2059 simd_clone_adjust_argument_types (n
);
2062 fprintf (dump_file
, "\nGenerated %s clone %s\n",
2063 (TREE_PUBLIC (n
->decl
) ? "global" : "local"),
2064 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
)));
2067 while ((attr
= lookup_attribute ("omp declare simd", TREE_CHAIN (attr
))));
2070 /* Entry point for IPA simd clone creation pass. */
2073 ipa_omp_simd_clone (void)
2075 struct cgraph_node
*node
;
2076 FOR_EACH_FUNCTION (node
)
2077 expand_simd_clones (node
);
2083 const pass_data pass_data_omp_simd_clone
=
2085 SIMPLE_IPA_PASS
, /* type */
2086 "simdclone", /* name */
2087 OPTGROUP_OMP
, /* optinfo_flags */
2088 TV_NONE
, /* tv_id */
2089 ( PROP_ssa
| PROP_cfg
), /* properties_required */
2090 0, /* properties_provided */
2091 0, /* properties_destroyed */
2092 0, /* todo_flags_start */
2093 0, /* todo_flags_finish */
2096 class pass_omp_simd_clone
: public simple_ipa_opt_pass
2099 pass_omp_simd_clone(gcc::context
*ctxt
)
2100 : simple_ipa_opt_pass(pass_data_omp_simd_clone
, ctxt
)
2103 /* opt_pass methods: */
2104 bool gate (function
*) final override
;
2105 unsigned int execute (function
*) final override
2107 return ipa_omp_simd_clone ();
2112 pass_omp_simd_clone::gate (function
*)
2114 return targetm
.simd_clone
.compute_vecsize_and_simdlen
!= NULL
;
2119 simple_ipa_opt_pass
*
2120 make_pass_omp_simd_clone (gcc::context
*ctxt
)
2122 return new pass_omp_simd_clone (ctxt
);