Add assember CFI directives to millicode division and remainder routines.
[official-gcc.git] / gcc / omp-simd-clone.cc
blob03ff86e7d188cd8eb30d5d971c5c9bbc7d64543f
1 /* OMP constructs' SIMD clone supporting code.
3 Copyright (C) 2005-2023 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "alloc-pool.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "cgraph.h"
33 #include "pretty-print.h"
34 #include "diagnostic-core.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "cfganal.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "gimple-walk.h"
42 #include "langhooks.h"
43 #include "tree-cfg.h"
44 #include "tree-into-ssa.h"
45 #include "tree-dfa.h"
46 #include "cfgloop.h"
47 #include "symbol-summary.h"
48 #include "ipa-param-manipulation.h"
49 #include "tree-eh.h"
50 #include "varasm.h"
51 #include "stringpool.h"
52 #include "attribs.h"
53 #include "omp-simd-clone.h"
54 #include "omp-low.h"
55 #include "omp-general.h"
57 /* Print debug info for ok_for_auto_simd_clone to the dump file, logging
58 failure reason EXCUSE for function DECL. Always returns false. */
59 static bool
60 auto_simd_fail (tree decl, const char *excuse)
62 if (dump_file && (dump_flags & TDF_DETAILS))
63 fprintf (dump_file, "\nNot auto-cloning %s because %s\n",
64 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)),
65 excuse);
66 return false;
69 /* Helper function for ok_for_auto_simd_clone; return false if the statement
70 violates restrictions for an "omp declare simd" function. Specifically,
71 the function must not
72 - throw or call setjmp/longjmp
73 - write memory that could alias parallel calls
74 - read volatile memory
75 - include openmp directives or calls
76 - call functions that might do those things */
78 static bool
79 auto_simd_check_stmt (gimple *stmt, tree outer)
81 tree decl;
83 switch (gimple_code (stmt))
85 case GIMPLE_CALL:
87 /* Calls to functions that are CONST or PURE are ok, even if they
88 are internal functions without a decl. Reject other internal
89 functions. */
90 if (gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE))
91 break;
92 if (gimple_call_internal_p (stmt))
93 return auto_simd_fail (outer,
94 "body contains internal function call");
96 decl = gimple_call_fndecl (stmt);
98 /* We can't know whether indirect calls are safe. */
99 if (decl == NULL_TREE)
100 return auto_simd_fail (outer, "body contains indirect call");
102 /* Calls to functions that are already marked "omp declare simd" are
103 OK. */
104 if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (decl)))
105 break;
107 /* Let recursive calls to the current function through. */
108 if (decl == outer)
109 break;
111 /* Other function calls are not permitted. This covers all calls to
112 the libgomp API and setjmp/longjmp, too, as well as things like
113 __cxa_throw_ related to exception handling. */
114 return auto_simd_fail (outer, "body contains unsafe function call");
116 /* Reject EH-related constructs. Most of the EH gimple codes are
117 already lowered by the time this pass runs during IPA.
118 GIMPLE_EH_DISPATCH and GIMPLE_RESX remain and are lowered by
119 pass_lower_eh_dispatch and pass_lower_resx, respectively; those
120 passes run later. */
121 case GIMPLE_EH_DISPATCH:
122 case GIMPLE_RESX:
123 return auto_simd_fail (outer, "body contains EH constructs");
125 /* Asms are not permitted since we don't know what they do. */
126 case GIMPLE_ASM:
127 return auto_simd_fail (outer, "body contains inline asm");
129 default:
130 break;
133 /* Memory writes are not permitted.
134 FIXME: this could be relaxed a little to permit writes to
135 function-local variables that could not alias other instances
136 of the function running in parallel. */
137 if (gimple_store_p (stmt))
138 return auto_simd_fail (outer, "body includes memory write");
140 /* Volatile reads are not permitted. */
141 if (gimple_has_volatile_ops (stmt))
142 return auto_simd_fail (outer, "body includes volatile op");
144 /* Otherwise OK. */
145 return true;
148 /* Helper function for ok_for_auto_simd_clone: return true if type T is
149 plausible for a cloneable function argument or return type. */
150 static bool
151 plausible_type_for_simd_clone (tree t)
153 if (TREE_CODE (t) == VOID_TYPE)
154 return true;
155 else if (RECORD_OR_UNION_TYPE_P (t) || !is_a <scalar_mode> (TYPE_MODE (t)))
156 /* Small record/union types may fit into a scalar mode, but are
157 still not suitable. */
158 return false;
159 else if (TYPE_ATOMIC (t))
160 /* Atomic types trigger warnings in simd_clone_clauses_extract. */
161 return false;
162 else
163 return true;
166 /* Check if the function NODE appears suitable for auto-annotation
167 with "declare simd". */
169 static bool
170 ok_for_auto_simd_clone (struct cgraph_node *node)
172 tree decl = node->decl;
173 tree t;
174 basic_block bb;
176 /* Nothing to do if the function isn't a definition or doesn't
177 have a body. */
178 if (!node->definition || !node->has_gimple_body_p ())
179 return auto_simd_fail (decl, "no definition or body");
181 /* No point in trying to generate implicit clones if the function
182 isn't used in the compilation unit. */
183 if (!node->callers)
184 return auto_simd_fail (decl, "function is not used");
186 /* Nothing to do if the function already has the "omp declare simd"
187 attribute, is marked noclone, or is not "omp declare target". */
188 if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (decl))
189 || lookup_attribute ("noclone", DECL_ATTRIBUTES (decl))
190 || !lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl)))
191 return auto_simd_fail (decl, "incompatible attributes");
193 /* Check whether the function is restricted host/nohost via the
194 "omp declare target device_type" clause, and that doesn't match
195 what we're compiling for. Internally, these translate into
196 "omp declare target [no]host" attributes on the decl; "any"
197 translates into both attributes, but the default (which is supposed
198 to be equivalent to "any") is neither. */
199 tree host = lookup_attribute ("omp declare target host",
200 DECL_ATTRIBUTES (decl));
201 tree nohost = lookup_attribute ("omp declare target nohost",
202 DECL_ATTRIBUTES (decl));
203 #ifdef ACCEL_COMPILER
204 if (host && !nohost)
205 return auto_simd_fail (decl, "device doesn't match for accel compiler");
206 #else
207 if (nohost && !host)
208 return auto_simd_fail (decl, "device doesn't match for host compiler");
209 #endif
211 /* Backends will check for vectorizable arguments/return types in a
212 target-specific way, but we can immediately filter out functions
213 that have implausible argument/return types. */
214 t = TREE_TYPE (TREE_TYPE (decl));
215 if (!plausible_type_for_simd_clone (t))
216 return auto_simd_fail (decl, "return type fails sniff test");
218 if (TYPE_ARG_TYPES (TREE_TYPE (decl)))
220 for (tree temp = TYPE_ARG_TYPES (TREE_TYPE (decl));
221 temp; temp = TREE_CHAIN (temp))
223 t = TREE_VALUE (temp);
224 if (!plausible_type_for_simd_clone (t))
225 return auto_simd_fail (decl, "argument type fails sniff test");
228 else if (DECL_ARGUMENTS (decl))
230 for (tree temp = DECL_ARGUMENTS (decl); temp; temp = DECL_CHAIN (temp))
232 t = TREE_TYPE (temp);
233 if (!plausible_type_for_simd_clone (t))
234 return auto_simd_fail (decl, "argument type fails sniff test");
237 else
238 return auto_simd_fail (decl, "function has no arguments");
240 /* Scan the function body to see if it is suitable for SIMD-ization. */
241 node->get_body ();
243 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (decl))
245 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
246 gsi_next (&gsi))
247 if (!auto_simd_check_stmt (gsi_stmt (gsi), decl))
248 return false;
251 /* All is good. */
252 if (dump_file)
253 fprintf (dump_file, "\nMarking %s for auto-cloning\n",
254 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
255 return true;
259 /* Return the number of elements in vector type VECTYPE, which is associated
260 with a SIMD clone. At present these always have a constant length. */
262 static unsigned HOST_WIDE_INT
263 simd_clone_subparts (tree vectype)
265 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
268 /* Allocate a fresh `simd_clone' and return it. NARGS is the number
269 of arguments to reserve space for. */
271 static struct cgraph_simd_clone *
272 simd_clone_struct_alloc (int nargs)
274 struct cgraph_simd_clone *clone_info;
275 size_t len = (sizeof (struct cgraph_simd_clone)
276 + nargs * sizeof (struct cgraph_simd_clone_arg));
277 clone_info = (struct cgraph_simd_clone *)
278 ggc_internal_cleared_alloc (len);
279 return clone_info;
282 /* Make a copy of the `struct cgraph_simd_clone' in FROM to TO. */
284 static inline void
285 simd_clone_struct_copy (struct cgraph_simd_clone *to,
286 struct cgraph_simd_clone *from)
288 memcpy (to, from, (sizeof (struct cgraph_simd_clone)
289 + ((from->nargs - from->inbranch)
290 * sizeof (struct cgraph_simd_clone_arg))));
293 /* Fill an empty vector ARGS with parameter types of function FNDECL. This
294 uses TYPE_ARG_TYPES if available, otherwise falls back to types of
295 DECL_ARGUMENTS types. */
297 static void
298 simd_clone_vector_of_formal_parm_types (vec<tree> *args, tree fndecl)
300 if (TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
302 push_function_arg_types (args, TREE_TYPE (fndecl));
303 return;
305 push_function_arg_decls (args, fndecl);
306 unsigned int i;
307 tree arg;
308 FOR_EACH_VEC_ELT (*args, i, arg)
309 (*args)[i] = TREE_TYPE ((*args)[i]);
312 /* Given a simd function in NODE, extract the simd specific
313 information from the OMP clauses passed in CLAUSES, and return
314 the struct cgraph_simd_clone * if it should be cloned. *INBRANCH_SPECIFIED
315 is set to TRUE if the `inbranch' or `notinbranch' clause specified,
316 otherwise set to FALSE. */
318 static struct cgraph_simd_clone *
319 simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
320 bool *inbranch_specified)
322 auto_vec<tree> args;
323 simd_clone_vector_of_formal_parm_types (&args, node->decl);
324 tree t;
325 int n;
326 *inbranch_specified = false;
328 n = args.length ();
329 if (n > 0 && args.last () == void_type_node)
330 n--;
332 /* Allocate one more than needed just in case this is an in-branch
333 clone which will require a mask argument. */
334 struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (n + 1);
335 clone_info->nargs = n;
337 if (!clauses)
338 goto out;
340 clauses = TREE_VALUE (clauses);
341 if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE)
342 goto out;
344 for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t))
346 switch (OMP_CLAUSE_CODE (t))
348 case OMP_CLAUSE_INBRANCH:
349 clone_info->inbranch = 1;
350 *inbranch_specified = true;
351 break;
352 case OMP_CLAUSE_NOTINBRANCH:
353 clone_info->inbranch = 0;
354 *inbranch_specified = true;
355 break;
356 case OMP_CLAUSE_SIMDLEN:
357 clone_info->simdlen
358 = TREE_INT_CST_LOW (OMP_CLAUSE_SIMDLEN_EXPR (t));
359 break;
360 case OMP_CLAUSE_LINEAR:
362 tree decl = OMP_CLAUSE_DECL (t);
363 tree step = OMP_CLAUSE_LINEAR_STEP (t);
364 int argno = TREE_INT_CST_LOW (decl);
365 if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (t))
367 enum cgraph_simd_clone_arg_type arg_type;
368 if (TREE_CODE (args[argno]) == REFERENCE_TYPE)
369 switch (OMP_CLAUSE_LINEAR_KIND (t))
371 case OMP_CLAUSE_LINEAR_REF:
372 arg_type
373 = SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP;
374 break;
375 case OMP_CLAUSE_LINEAR_UVAL:
376 arg_type
377 = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP;
378 break;
379 case OMP_CLAUSE_LINEAR_VAL:
380 case OMP_CLAUSE_LINEAR_DEFAULT:
381 arg_type
382 = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP;
383 break;
384 default:
385 gcc_unreachable ();
387 else
388 arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP;
389 clone_info->args[argno].arg_type = arg_type;
390 clone_info->args[argno].linear_step = tree_to_shwi (step);
391 gcc_assert (clone_info->args[argno].linear_step >= 0
392 && clone_info->args[argno].linear_step < n);
394 else
396 if (POINTER_TYPE_P (args[argno]))
397 step = fold_convert (ssizetype, step);
398 if (!tree_fits_shwi_p (step))
400 warning_at (OMP_CLAUSE_LOCATION (t), 0,
401 "ignoring large linear step");
402 return NULL;
404 else if (integer_zerop (step))
406 warning_at (OMP_CLAUSE_LOCATION (t), 0,
407 "ignoring zero linear step");
408 return NULL;
410 else
412 enum cgraph_simd_clone_arg_type arg_type;
413 if (TREE_CODE (args[argno]) == REFERENCE_TYPE)
414 switch (OMP_CLAUSE_LINEAR_KIND (t))
416 case OMP_CLAUSE_LINEAR_REF:
417 arg_type
418 = SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP;
419 break;
420 case OMP_CLAUSE_LINEAR_UVAL:
421 arg_type
422 = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP;
423 break;
424 case OMP_CLAUSE_LINEAR_VAL:
425 case OMP_CLAUSE_LINEAR_DEFAULT:
426 arg_type
427 = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP;
428 break;
429 default:
430 gcc_unreachable ();
432 else
433 arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP;
434 clone_info->args[argno].arg_type = arg_type;
435 clone_info->args[argno].linear_step = tree_to_shwi (step);
438 break;
440 case OMP_CLAUSE_UNIFORM:
442 tree decl = OMP_CLAUSE_DECL (t);
443 int argno = tree_to_uhwi (decl);
444 clone_info->args[argno].arg_type
445 = SIMD_CLONE_ARG_TYPE_UNIFORM;
446 break;
448 case OMP_CLAUSE_ALIGNED:
450 /* Ignore aligned (x) for declare simd, for the ABI we really
451 need an alignment specified. */
452 if (OMP_CLAUSE_ALIGNED_ALIGNMENT (t) == NULL_TREE)
453 break;
454 tree decl = OMP_CLAUSE_DECL (t);
455 int argno = tree_to_uhwi (decl);
456 clone_info->args[argno].alignment
457 = TREE_INT_CST_LOW (OMP_CLAUSE_ALIGNED_ALIGNMENT (t));
458 break;
460 default:
461 break;
465 out:
466 if (TYPE_ATOMIC (TREE_TYPE (TREE_TYPE (node->decl))))
468 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
469 "ignoring %<#pragma omp declare simd%> on function "
470 "with %<_Atomic%> qualified return type");
471 return NULL;
474 for (unsigned int argno = 0; argno < clone_info->nargs; argno++)
475 if (TYPE_ATOMIC (args[argno])
476 && clone_info->args[argno].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM)
478 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
479 "ignoring %<#pragma omp declare simd%> on function "
480 "with %<_Atomic%> qualified non-%<uniform%> argument");
481 args.release ();
482 return NULL;
485 return clone_info;
488 /* Given a SIMD clone in NODE, calculate the characteristic data
489 type and return the coresponding type. The characteristic data
490 type is computed as described in the Intel Vector ABI. */
492 static tree
493 simd_clone_compute_base_data_type (struct cgraph_node *node,
494 struct cgraph_simd_clone *clone_info)
496 tree type = integer_type_node;
497 tree fndecl = node->decl;
499 /* a) For non-void function, the characteristic data type is the
500 return type. */
501 if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) != VOID_TYPE)
502 type = TREE_TYPE (TREE_TYPE (fndecl));
504 /* b) If the function has any non-uniform, non-linear parameters,
505 then the characteristic data type is the type of the first
506 such parameter. */
507 else
509 auto_vec<tree> map;
510 simd_clone_vector_of_formal_parm_types (&map, fndecl);
511 for (unsigned int i = 0; i < clone_info->nargs; ++i)
512 if (clone_info->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
514 type = map[i];
515 break;
519 /* c) If the characteristic data type determined by a) or b) above
520 is struct, union, or class type which is pass-by-value (except
521 for the type that maps to the built-in complex data type), the
522 characteristic data type is int. */
523 if (RECORD_OR_UNION_TYPE_P (type)
524 && !aggregate_value_p (type, NULL)
525 && TREE_CODE (type) != COMPLEX_TYPE)
526 return integer_type_node;
528 /* d) If none of the above three classes is applicable, the
529 characteristic data type is int. */
531 return type;
533 /* e) For Intel Xeon Phi native and offload compilation, if the
534 resulting characteristic data type is 8-bit or 16-bit integer
535 data type, the characteristic data type is int. */
536 /* Well, we don't handle Xeon Phi yet. */
539 static tree
540 simd_clone_mangle (struct cgraph_node *node,
541 struct cgraph_simd_clone *clone_info)
543 char vecsize_mangle = clone_info->vecsize_mangle;
544 char mask = clone_info->inbranch ? 'M' : 'N';
545 poly_uint64 simdlen = clone_info->simdlen;
546 unsigned int n;
547 pretty_printer pp;
549 gcc_assert (vecsize_mangle && maybe_ne (simdlen, 0U));
551 pp_string (&pp, "_ZGV");
552 pp_character (&pp, vecsize_mangle);
553 pp_character (&pp, mask);
554 /* For now, simdlen is always constant, while variable simdlen pp 'n'. */
555 unsigned int len = simdlen.to_constant ();
556 pp_decimal_int (&pp, (len));
558 for (n = 0; n < clone_info->nargs; ++n)
560 struct cgraph_simd_clone_arg arg = clone_info->args[n];
562 switch (arg.arg_type)
564 case SIMD_CLONE_ARG_TYPE_UNIFORM:
565 pp_character (&pp, 'u');
566 break;
567 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
568 pp_character (&pp, 'l');
569 goto mangle_linear;
570 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
571 pp_character (&pp, 'R');
572 goto mangle_linear;
573 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
574 pp_character (&pp, 'L');
575 goto mangle_linear;
576 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
577 pp_character (&pp, 'U');
578 goto mangle_linear;
579 mangle_linear:
580 gcc_assert (arg.linear_step != 0);
581 if (arg.linear_step > 1)
582 pp_unsigned_wide_integer (&pp, arg.linear_step);
583 else if (arg.linear_step < 0)
585 pp_character (&pp, 'n');
586 pp_unsigned_wide_integer (&pp, (-(unsigned HOST_WIDE_INT)
587 arg.linear_step));
589 break;
590 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
591 pp_string (&pp, "ls");
592 pp_unsigned_wide_integer (&pp, arg.linear_step);
593 break;
594 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
595 pp_string (&pp, "Rs");
596 pp_unsigned_wide_integer (&pp, arg.linear_step);
597 break;
598 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
599 pp_string (&pp, "Ls");
600 pp_unsigned_wide_integer (&pp, arg.linear_step);
601 break;
602 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
603 pp_string (&pp, "Us");
604 pp_unsigned_wide_integer (&pp, arg.linear_step);
605 break;
606 default:
607 pp_character (&pp, 'v');
609 if (arg.alignment)
611 pp_character (&pp, 'a');
612 pp_decimal_int (&pp, arg.alignment);
616 pp_underscore (&pp);
617 const char *str = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl));
618 if (*str == '*')
619 ++str;
620 pp_string (&pp, str);
621 str = pp_formatted_text (&pp);
623 /* If there already is a SIMD clone with the same mangled name, don't
624 add another one. This can happen e.g. for
625 #pragma omp declare simd
626 #pragma omp declare simd simdlen(8)
627 int foo (int, int);
628 if the simdlen is assumed to be 8 for the first one, etc. */
629 for (struct cgraph_node *clone = node->simd_clones; clone;
630 clone = clone->simdclone->next_clone)
631 if (id_equal (DECL_ASSEMBLER_NAME (clone->decl), str))
632 return NULL_TREE;
634 return get_identifier (str);
637 /* Create a simd clone of OLD_NODE and return it. If FORCE_LOCAL is true,
638 create it as a local symbol, otherwise copy the symbol linkage and
639 visibility attributes from OLD_NODE. */
641 static struct cgraph_node *
642 simd_clone_create (struct cgraph_node *old_node, bool force_local)
644 struct cgraph_node *new_node;
645 if (old_node->definition)
647 if (!old_node->has_gimple_body_p ())
648 return NULL;
649 old_node->get_body ();
650 new_node = old_node->create_version_clone_with_body (vNULL, NULL, NULL,
651 NULL, NULL,
652 "simdclone");
654 else
656 tree old_decl = old_node->decl;
657 tree new_decl = copy_node (old_node->decl);
658 DECL_NAME (new_decl) = clone_function_name_numbered (old_decl,
659 "simdclone");
660 SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
661 SET_DECL_RTL (new_decl, NULL);
662 DECL_STATIC_CONSTRUCTOR (new_decl) = 0;
663 DECL_STATIC_DESTRUCTOR (new_decl) = 0;
664 new_node = old_node->create_version_clone (new_decl, vNULL, NULL);
665 if (old_node->in_other_partition)
666 new_node->in_other_partition = 1;
668 if (new_node == NULL)
669 return new_node;
671 set_decl_built_in_function (new_node->decl, NOT_BUILT_IN, 0);
672 if (force_local)
674 TREE_PUBLIC (new_node->decl) = 0;
675 DECL_COMDAT (new_node->decl) = 0;
676 DECL_WEAK (new_node->decl) = 0;
677 DECL_EXTERNAL (new_node->decl) = 0;
678 DECL_VISIBILITY_SPECIFIED (new_node->decl) = 0;
679 DECL_VISIBILITY (new_node->decl) = VISIBILITY_DEFAULT;
680 DECL_DLLIMPORT_P (new_node->decl) = 0;
682 else
684 TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
685 DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl);
686 DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl);
687 DECL_EXTERNAL (new_node->decl) = DECL_EXTERNAL (old_node->decl);
688 DECL_VISIBILITY_SPECIFIED (new_node->decl)
689 = DECL_VISIBILITY_SPECIFIED (old_node->decl);
690 DECL_VISIBILITY (new_node->decl) = DECL_VISIBILITY (old_node->decl);
691 DECL_DLLIMPORT_P (new_node->decl) = DECL_DLLIMPORT_P (old_node->decl);
692 if (DECL_ONE_ONLY (old_node->decl))
693 make_decl_one_only (new_node->decl,
694 DECL_ASSEMBLER_NAME (new_node->decl));
696 /* The method cgraph_version_clone_with_body () will force the new
697 symbol local. Undo this, and inherit external visibility from
698 the old node. */
699 new_node->local = old_node->local;
700 new_node->externally_visible = old_node->externally_visible;
701 new_node->calls_declare_variant_alt
702 = old_node->calls_declare_variant_alt;
705 /* Mark clones with internal linkage as gc'able, so they will not be
706 emitted unless the vectorizer can actually use them. */
707 if (!TREE_PUBLIC (new_node->decl))
708 new_node->gc_candidate = true;
710 return new_node;
713 /* Adjust the return type of the given function to its appropriate
714 vector counterpart. Returns a simd array to be used throughout the
715 function as a return value. */
717 static tree
718 simd_clone_adjust_return_type (struct cgraph_node *node)
720 tree fndecl = node->decl;
721 tree orig_rettype = TREE_TYPE (TREE_TYPE (fndecl));
722 poly_uint64 veclen;
723 tree t;
725 /* Adjust the function return type. */
726 if (orig_rettype == void_type_node)
727 return NULL_TREE;
728 t = TREE_TYPE (TREE_TYPE (fndecl));
729 if (INTEGRAL_TYPE_P (t) || POINTER_TYPE_P (t))
730 veclen = node->simdclone->vecsize_int;
731 else
732 veclen = node->simdclone->vecsize_float;
733 if (known_eq (veclen, 0U))
734 veclen = node->simdclone->simdlen;
735 else
736 veclen = exact_div (veclen, GET_MODE_BITSIZE (SCALAR_TYPE_MODE (t)));
737 if (multiple_p (veclen, node->simdclone->simdlen))
738 veclen = node->simdclone->simdlen;
739 if (POINTER_TYPE_P (t))
740 t = pointer_sized_int_node;
741 if (known_eq (veclen, node->simdclone->simdlen))
742 t = build_vector_type (t, node->simdclone->simdlen);
743 else
745 t = build_vector_type (t, veclen);
746 t = build_array_type_nelts (t, exact_div (node->simdclone->simdlen,
747 veclen));
749 TREE_TYPE (TREE_TYPE (fndecl)) = t;
750 if (!node->definition)
751 return NULL_TREE;
753 t = DECL_RESULT (fndecl);
754 /* Adjust the DECL_RESULT. */
755 gcc_assert (TREE_TYPE (t) != void_type_node);
756 TREE_TYPE (t) = TREE_TYPE (TREE_TYPE (fndecl));
757 relayout_decl (t);
759 tree atype = build_array_type_nelts (orig_rettype,
760 node->simdclone->simdlen);
761 if (maybe_ne (veclen, node->simdclone->simdlen))
762 return build1 (VIEW_CONVERT_EXPR, atype, t);
764 /* Set up a SIMD array to use as the return value. */
765 tree retval = create_tmp_var_raw (atype, "retval");
766 gimple_add_tmp_var (retval);
767 return retval;
770 /* Each vector argument has a corresponding array to be used locally
771 as part of the eventual loop. Create such temporary array and
772 return it.
774 PREFIX is the prefix to be used for the temporary.
776 TYPE is the inner element type.
778 SIMDLEN is the number of elements. */
780 static tree
781 create_tmp_simd_array (const char *prefix, tree type, poly_uint64 simdlen)
783 tree atype = build_array_type_nelts (type, simdlen);
784 tree avar = create_tmp_var_raw (atype, prefix);
785 gimple_add_tmp_var (avar);
786 return avar;
789 /* Modify the function argument types to their corresponding vector
790 counterparts if appropriate. Also, create one array for each simd
791 argument to be used locally when using the function arguments as
792 part of the loop.
794 NODE is the function whose arguments are to be adjusted.
796 If NODE does not represent function definition, returns NULL. Otherwise
797 returns an adjustment class that will be filled describing how the argument
798 declarations will be remapped. New arguments which are not to be remapped
799 are marked with USER_FLAG. */
801 static ipa_param_body_adjustments *
802 simd_clone_adjust_argument_types (struct cgraph_node *node)
804 auto_vec<tree> args;
806 if (node->definition)
807 push_function_arg_decls (&args, node->decl);
808 else
809 simd_clone_vector_of_formal_parm_types (&args, node->decl);
810 struct cgraph_simd_clone *sc = node->simdclone;
811 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
812 vec_safe_reserve (new_params, sc->nargs);
813 unsigned i, j, k;
814 poly_uint64 veclen;
816 for (i = 0; i < sc->nargs; ++i)
818 ipa_adjusted_param adj;
819 memset (&adj, 0, sizeof (adj));
820 tree parm = args[i];
821 tree parm_type = node->definition ? TREE_TYPE (parm) : parm;
822 adj.base_index = i;
823 adj.prev_clone_index = i;
825 sc->args[i].orig_arg = node->definition ? parm : NULL_TREE;
826 sc->args[i].orig_type = parm_type;
828 switch (sc->args[i].arg_type)
830 default:
831 /* No adjustment necessary for scalar arguments. */
832 adj.op = IPA_PARAM_OP_COPY;
833 break;
834 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
835 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
836 if (node->definition)
837 sc->args[i].simd_array
838 = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
839 TREE_TYPE (parm_type),
840 sc->simdlen);
841 adj.op = IPA_PARAM_OP_COPY;
842 break;
843 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
844 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
845 case SIMD_CLONE_ARG_TYPE_VECTOR:
846 if (INTEGRAL_TYPE_P (parm_type) || POINTER_TYPE_P (parm_type))
847 veclen = sc->vecsize_int;
848 else
849 veclen = sc->vecsize_float;
850 if (known_eq (veclen, 0U))
851 veclen = sc->simdlen;
852 else
853 veclen
854 = exact_div (veclen,
855 GET_MODE_BITSIZE (SCALAR_TYPE_MODE (parm_type)));
856 if (multiple_p (veclen, sc->simdlen))
857 veclen = sc->simdlen;
858 adj.op = IPA_PARAM_OP_NEW;
859 adj.param_prefix_index = IPA_PARAM_PREFIX_SIMD;
860 if (POINTER_TYPE_P (parm_type))
861 adj.type = build_vector_type (pointer_sized_int_node, veclen);
862 else
863 adj.type = build_vector_type (parm_type, veclen);
864 sc->args[i].vector_type = adj.type;
865 k = vector_unroll_factor (sc->simdlen, veclen);
866 for (j = 1; j < k; j++)
868 vec_safe_push (new_params, adj);
869 if (j == 1)
871 memset (&adj, 0, sizeof (adj));
872 adj.op = IPA_PARAM_OP_NEW;
873 adj.user_flag = 1;
874 adj.param_prefix_index = IPA_PARAM_PREFIX_SIMD;
875 adj.base_index = i;
876 adj.prev_clone_index = i;
877 adj.type = sc->args[i].vector_type;
881 if (node->definition)
882 sc->args[i].simd_array
883 = create_tmp_simd_array (DECL_NAME (parm)
884 ? IDENTIFIER_POINTER (DECL_NAME (parm))
885 : NULL, parm_type, sc->simdlen);
887 vec_safe_push (new_params, adj);
890 if (sc->inbranch)
892 tree base_type = simd_clone_compute_base_data_type (sc->origin, sc);
893 ipa_adjusted_param adj;
894 memset (&adj, 0, sizeof (adj));
895 adj.op = IPA_PARAM_OP_NEW;
896 adj.user_flag = 1;
897 adj.param_prefix_index = IPA_PARAM_PREFIX_MASK;
899 adj.base_index = i;
900 adj.prev_clone_index = i;
901 if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type))
902 veclen = sc->vecsize_int;
903 else
904 veclen = sc->vecsize_float;
905 if (known_eq (veclen, 0U))
906 veclen = sc->simdlen;
907 else
908 veclen = exact_div (veclen,
909 GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)));
910 if (multiple_p (veclen, sc->simdlen))
911 veclen = sc->simdlen;
912 if (sc->mask_mode != VOIDmode)
913 adj.type
914 = lang_hooks.types.type_for_mode (sc->mask_mode, 1);
915 else if (POINTER_TYPE_P (base_type))
916 adj.type = build_vector_type (pointer_sized_int_node, veclen);
917 else
918 adj.type = build_vector_type (base_type, veclen);
919 vec_safe_push (new_params, adj);
921 k = vector_unroll_factor (sc->simdlen, veclen);
922 for (j = 1; j < k; j++)
923 vec_safe_push (new_params, adj);
925 /* We have previously allocated one extra entry for the mask. Use
926 it and fill it. */
927 sc->nargs++;
928 if (sc->mask_mode != VOIDmode)
929 base_type = boolean_type_node;
930 if (node->definition)
932 sc->args[i].orig_arg
933 = build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL, base_type);
934 if (sc->mask_mode == VOIDmode)
935 sc->args[i].simd_array
936 = create_tmp_simd_array ("mask", base_type, sc->simdlen);
937 else if (k > 1)
938 sc->args[i].simd_array
939 = create_tmp_simd_array ("mask", adj.type, k);
940 else
941 sc->args[i].simd_array = NULL_TREE;
943 sc->args[i].orig_type = base_type;
944 sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK;
945 sc->args[i].vector_type = adj.type;
948 if (node->definition)
950 ipa_param_body_adjustments *adjustments
951 = new ipa_param_body_adjustments (new_params, node->decl);
953 adjustments->modify_formal_parameters ();
954 return adjustments;
956 else
958 tree new_arg_types = NULL_TREE, new_reversed;
959 bool last_parm_void = false;
960 if (args.length () > 0 && args.last () == void_type_node)
961 last_parm_void = true;
963 gcc_assert (TYPE_ARG_TYPES (TREE_TYPE (node->decl)));
964 j = vec_safe_length (new_params);
965 for (i = 0; i < j; i++)
967 struct ipa_adjusted_param *adj = &(*new_params)[i];
968 tree ptype;
969 if (adj->op == IPA_PARAM_OP_COPY)
970 ptype = args[adj->base_index];
971 else
972 ptype = adj->type;
973 new_arg_types = tree_cons (NULL_TREE, ptype, new_arg_types);
975 new_reversed = nreverse (new_arg_types);
976 if (last_parm_void)
978 if (new_reversed)
979 TREE_CHAIN (new_arg_types) = void_list_node;
980 else
981 new_reversed = void_list_node;
983 TYPE_ARG_TYPES (TREE_TYPE (node->decl)) = new_reversed;
984 return NULL;
988 /* Initialize and copy the function arguments in NODE to their
989 corresponding local simd arrays. Returns a fresh gimple_seq with
990 the instruction sequence generated. */
992 static gimple_seq
993 simd_clone_init_simd_arrays (struct cgraph_node *node,
994 ipa_param_body_adjustments *adjustments)
996 gimple_seq seq = NULL;
997 unsigned i = 0, j = 0, k;
999 for (tree arg = DECL_ARGUMENTS (node->decl);
1000 arg;
1001 arg = DECL_CHAIN (arg), i++, j++)
1003 if ((*adjustments->m_adj_params)[j].op == IPA_PARAM_OP_COPY
1004 || POINTER_TYPE_P (TREE_TYPE (arg)))
1005 continue;
1007 node->simdclone->args[i].vector_arg = arg;
1009 tree array = node->simdclone->args[i].simd_array;
1010 if (node->simdclone->mask_mode != VOIDmode
1011 && node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
1013 if (array == NULL_TREE)
1014 continue;
1015 unsigned int l
1016 = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (array))));
1017 for (k = 0; k <= l; k++)
1019 if (k)
1021 arg = DECL_CHAIN (arg);
1022 j++;
1024 tree t = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (array)),
1025 array, size_int (k), NULL, NULL);
1026 t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
1027 gimplify_and_add (t, &seq);
1029 continue;
1031 if (known_eq (simd_clone_subparts (TREE_TYPE (arg)),
1032 node->simdclone->simdlen))
1034 tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
1035 tree ptr = build_fold_addr_expr (array);
1036 tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
1037 build_int_cst (ptype, 0));
1038 t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
1039 gimplify_and_add (t, &seq);
1041 else
1043 unsigned int simdlen = simd_clone_subparts (TREE_TYPE (arg));
1044 unsigned int times = vector_unroll_factor (node->simdclone->simdlen,
1045 simdlen);
1046 tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
1047 for (k = 0; k < times; k++)
1049 tree ptr = build_fold_addr_expr (array);
1050 int elemsize;
1051 if (k)
1053 arg = DECL_CHAIN (arg);
1054 j++;
1056 tree elemtype = TREE_TYPE (TREE_TYPE (arg));
1057 elemsize = GET_MODE_SIZE (SCALAR_TYPE_MODE (elemtype));
1058 tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
1059 build_int_cst (ptype, k * elemsize * simdlen));
1060 t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
1061 gimplify_and_add (t, &seq);
1065 return seq;
1068 /* Callback info for ipa_simd_modify_stmt_ops below. */
1070 struct modify_stmt_info {
1071 ipa_param_body_adjustments *adjustments;
1072 gimple *stmt;
1073 gimple *after_stmt;
1074 /* True if the parent statement was modified by
1075 ipa_simd_modify_stmt_ops. */
1076 bool modified;
1079 /* Callback for walk_gimple_op.
1081 Adjust operands from a given statement as specified in the
1082 adjustments vector in the callback data. */
1084 static tree
1085 ipa_simd_modify_stmt_ops (tree *tp, int *walk_subtrees, void *data)
1087 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
1088 struct modify_stmt_info *info = (struct modify_stmt_info *) wi->info;
1089 tree *orig_tp = tp;
1090 if (TREE_CODE (*tp) == ADDR_EXPR)
1091 tp = &TREE_OPERAND (*tp, 0);
1093 if (TREE_CODE (*tp) == BIT_FIELD_REF
1094 || TREE_CODE (*tp) == IMAGPART_EXPR
1095 || TREE_CODE (*tp) == REALPART_EXPR)
1096 tp = &TREE_OPERAND (*tp, 0);
1098 tree repl = NULL_TREE;
1099 ipa_param_body_replacement *pbr = NULL;
1101 if (TREE_CODE (*tp) == PARM_DECL)
1103 pbr = info->adjustments->get_expr_replacement (*tp, true);
1104 if (pbr)
1105 repl = pbr->repl;
1107 else if (TYPE_P (*tp))
1108 *walk_subtrees = 0;
1110 if (repl)
1111 repl = unshare_expr (repl);
1112 else
1114 if (tp != orig_tp)
1116 *walk_subtrees = 0;
1117 bool modified = info->modified;
1118 info->modified = false;
1119 walk_tree (tp, ipa_simd_modify_stmt_ops, wi, wi->pset);
1120 if (!info->modified)
1122 info->modified = modified;
1123 return NULL_TREE;
1125 info->modified = modified;
1126 repl = *tp;
1128 else
1129 return NULL_TREE;
1132 if (tp != orig_tp)
1134 if (gimple_code (info->stmt) == GIMPLE_PHI
1135 && pbr
1136 && TREE_CODE (*orig_tp) == ADDR_EXPR
1137 && TREE_CODE (TREE_OPERAND (*orig_tp, 0)) == PARM_DECL
1138 && pbr->dummy)
1140 gcc_assert (TREE_CODE (pbr->dummy) == SSA_NAME);
1141 *orig_tp = pbr->dummy;
1142 info->modified = true;
1143 return NULL_TREE;
1146 repl = build_fold_addr_expr (repl);
1147 gimple *stmt;
1148 if (is_gimple_debug (info->stmt))
1150 tree vexpr = build_debug_expr_decl (TREE_TYPE (repl));
1151 stmt = gimple_build_debug_source_bind (vexpr, repl, NULL);
1152 repl = vexpr;
1154 else
1156 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl);
1157 repl = gimple_assign_lhs (stmt);
1159 gimple_stmt_iterator gsi;
1160 if (gimple_code (info->stmt) == GIMPLE_PHI)
1162 if (info->after_stmt)
1163 gsi = gsi_for_stmt (info->after_stmt);
1164 else
1165 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1166 /* Cache SSA_NAME for next time. */
1167 if (pbr
1168 && TREE_CODE (*orig_tp) == ADDR_EXPR
1169 && TREE_CODE (TREE_OPERAND (*orig_tp, 0)) == PARM_DECL)
1171 gcc_assert (!pbr->dummy);
1172 pbr->dummy = repl;
1175 else
1176 gsi = gsi_for_stmt (info->stmt);
1177 if (info->after_stmt)
1178 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1179 else
1180 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1181 if (gimple_code (info->stmt) == GIMPLE_PHI)
1182 info->after_stmt = stmt;
1183 *orig_tp = repl;
1185 else if (!useless_type_conversion_p (TREE_TYPE (*tp), TREE_TYPE (repl)))
1187 tree vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (*tp), repl);
1188 *tp = vce;
1190 else
1191 *tp = repl;
1193 info->modified = true;
1194 return NULL_TREE;
1197 /* Traverse the function body and perform all modifications as
1198 described in ADJUSTMENTS. At function return, ADJUSTMENTS will be
1199 modified such that the replacement/reduction value will now be an
1200 offset into the corresponding simd_array.
1202 This function will replace all function argument uses with their
1203 corresponding simd array elements, and ajust the return values
1204 accordingly. */
1206 static void
1207 ipa_simd_modify_function_body (struct cgraph_node *node,
1208 ipa_param_body_adjustments *adjustments,
1209 tree retval_array, tree iter)
1211 basic_block bb;
1212 unsigned int i, j;
1215 /* Register replacements for every function argument use to an offset into
1216 the corresponding simd_array. */
1217 for (i = 0, j = 0; i < node->simdclone->nargs; ++i, ++j)
1219 if (!node->simdclone->args[i].vector_arg
1220 || (*adjustments->m_adj_params)[j].user_flag)
1221 continue;
1223 tree basetype = TREE_TYPE (node->simdclone->args[i].orig_arg);
1224 tree vectype = TREE_TYPE (node->simdclone->args[i].vector_arg);
1225 tree r = build4 (ARRAY_REF, basetype, node->simdclone->args[i].simd_array,
1226 iter, NULL_TREE, NULL_TREE);
1227 adjustments->register_replacement (&(*adjustments->m_adj_params)[j], r);
1229 if (multiple_p (node->simdclone->simdlen, simd_clone_subparts (vectype)))
1230 j += vector_unroll_factor (node->simdclone->simdlen,
1231 simd_clone_subparts (vectype)) - 1;
1233 adjustments->sort_replacements ();
1235 tree name;
1236 FOR_EACH_SSA_NAME (i, name, cfun)
1238 tree base_var;
1239 if (SSA_NAME_VAR (name)
1240 && TREE_CODE (SSA_NAME_VAR (name)) == PARM_DECL
1241 && (base_var
1242 = adjustments->get_replacement_ssa_base (SSA_NAME_VAR (name))))
1244 if (SSA_NAME_IS_DEFAULT_DEF (name))
1246 tree old_decl = SSA_NAME_VAR (name);
1247 bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1248 gimple_stmt_iterator gsi = gsi_after_labels (bb);
1249 tree repl = adjustments->lookup_replacement (old_decl, 0);
1250 gcc_checking_assert (repl);
1251 repl = unshare_expr (repl);
1252 set_ssa_default_def (cfun, old_decl, NULL_TREE);
1253 SET_SSA_NAME_VAR_OR_IDENTIFIER (name, base_var);
1254 SSA_NAME_IS_DEFAULT_DEF (name) = 0;
1255 gimple *stmt = gimple_build_assign (name, repl);
1256 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1258 else
1259 SET_SSA_NAME_VAR_OR_IDENTIFIER (name, base_var);
1263 struct modify_stmt_info info;
1264 info.adjustments = adjustments;
1266 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
1268 gimple_stmt_iterator gsi;
1270 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1272 gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
1273 int i, n = gimple_phi_num_args (phi);
1274 info.stmt = phi;
1275 info.after_stmt = NULL;
1276 struct walk_stmt_info wi;
1277 memset (&wi, 0, sizeof (wi));
1278 info.modified = false;
1279 wi.info = &info;
1280 for (i = 0; i < n; ++i)
1282 int walk_subtrees = 1;
1283 tree arg = gimple_phi_arg_def (phi, i);
1284 tree op = arg;
1285 ipa_simd_modify_stmt_ops (&op, &walk_subtrees, &wi);
1286 if (op != arg)
1288 SET_PHI_ARG_DEF (phi, i, op);
1289 gcc_assert (TREE_CODE (op) == SSA_NAME);
1290 if (gimple_phi_arg_edge (phi, i)->flags & EDGE_ABNORMAL)
1291 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op) = 1;
1296 gsi = gsi_start_bb (bb);
1297 while (!gsi_end_p (gsi))
1299 gimple *stmt = gsi_stmt (gsi);
1300 info.stmt = stmt;
1301 info.after_stmt = NULL;
1302 struct walk_stmt_info wi;
1304 memset (&wi, 0, sizeof (wi));
1305 info.modified = false;
1306 wi.info = &info;
1307 walk_gimple_op (stmt, ipa_simd_modify_stmt_ops, &wi);
1309 if (greturn *return_stmt = dyn_cast <greturn *> (stmt))
1311 tree retval = gimple_return_retval (return_stmt);
1312 edge e = find_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun));
1313 e->flags |= EDGE_FALLTHRU;
1314 if (!retval)
1316 gsi_remove (&gsi, true);
1317 continue;
1320 /* Replace `return foo' with `retval_array[iter] = foo'. */
1321 tree ref = build4 (ARRAY_REF, TREE_TYPE (retval),
1322 retval_array, iter, NULL, NULL);
1323 stmt = gimple_build_assign (ref, retval);
1324 gsi_replace (&gsi, stmt, true);
1325 info.modified = true;
1328 if (info.modified)
1330 update_stmt (stmt);
1331 /* If the above changed the var of a debug bind into something
1332 different, remove the debug stmt. We could also for all the
1333 replaced parameters add VAR_DECLs for debug info purposes,
1334 add debug stmts for those to be the simd array accesses and
1335 replace debug stmt var operand with that var. Debugging of
1336 vectorized loops doesn't work too well, so don't bother for
1337 now. */
1338 if ((gimple_debug_bind_p (stmt)
1339 && !DECL_P (gimple_debug_bind_get_var (stmt)))
1340 || (gimple_debug_source_bind_p (stmt)
1341 && !DECL_P (gimple_debug_source_bind_get_var (stmt))))
1343 gsi_remove (&gsi, true);
1344 continue;
1346 if (maybe_clean_eh_stmt (stmt))
1347 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1349 gsi_next (&gsi);
1354 /* Helper function of simd_clone_adjust, return linear step addend
1355 of Ith argument. */
1357 static tree
1358 simd_clone_linear_addend (struct cgraph_node *node, unsigned int i,
1359 tree addtype, basic_block entry_bb)
1361 tree ptype = NULL_TREE;
1362 switch (node->simdclone->args[i].arg_type)
1364 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
1365 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
1366 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
1367 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
1368 return build_int_cst (addtype, node->simdclone->args[i].linear_step);
1369 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
1370 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
1371 ptype = TREE_TYPE (node->simdclone->args[i].orig_arg);
1372 break;
1373 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
1374 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
1375 ptype = TREE_TYPE (TREE_TYPE (node->simdclone->args[i].orig_arg));
1376 break;
1377 default:
1378 gcc_unreachable ();
1381 unsigned int idx = node->simdclone->args[i].linear_step;
1382 tree arg = node->simdclone->args[idx].orig_arg;
1383 gcc_assert (is_gimple_reg_type (TREE_TYPE (arg)));
1384 gimple_stmt_iterator gsi = gsi_after_labels (entry_bb);
1385 gimple *g;
1386 tree ret;
1387 if (is_gimple_reg (arg))
1388 ret = get_or_create_ssa_default_def (cfun, arg);
1389 else
1391 g = gimple_build_assign (make_ssa_name (TREE_TYPE (arg)), arg);
1392 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1393 ret = gimple_assign_lhs (g);
1395 if (TREE_CODE (TREE_TYPE (arg)) == REFERENCE_TYPE)
1397 g = gimple_build_assign (make_ssa_name (TREE_TYPE (TREE_TYPE (arg))),
1398 build_simple_mem_ref (ret));
1399 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1400 ret = gimple_assign_lhs (g);
1402 if (!useless_type_conversion_p (addtype, TREE_TYPE (ret)))
1404 g = gimple_build_assign (make_ssa_name (addtype), NOP_EXPR, ret);
1405 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1406 ret = gimple_assign_lhs (g);
1408 if (POINTER_TYPE_P (ptype))
1410 tree size = TYPE_SIZE_UNIT (TREE_TYPE (ptype));
1411 if (size && TREE_CODE (size) == INTEGER_CST)
1413 g = gimple_build_assign (make_ssa_name (addtype), MULT_EXPR,
1414 ret, fold_convert (addtype, size));
1415 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1416 ret = gimple_assign_lhs (g);
1419 return ret;
1422 /* Adjust the argument types in NODE to their appropriate vector
1423 counterparts. */
1425 static void
1426 simd_clone_adjust (struct cgraph_node *node)
1428 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1430 TREE_TYPE (node->decl) = build_distinct_type_copy (TREE_TYPE (node->decl));
1431 targetm.simd_clone.adjust (node);
1433 tree retval = simd_clone_adjust_return_type (node);
1434 ipa_param_body_adjustments *adjustments
1435 = simd_clone_adjust_argument_types (node);
1436 gcc_assert (adjustments);
1438 push_gimplify_context ();
1440 gimple_seq seq = simd_clone_init_simd_arrays (node, adjustments);
1442 /* Adjust all uses of vector arguments accordingly. Adjust all
1443 return values accordingly. */
1444 tree iter = create_tmp_var (unsigned_type_node, "iter");
1445 tree iter1 = make_ssa_name (iter);
1446 tree iter2 = NULL_TREE;
1447 ipa_simd_modify_function_body (node, adjustments, retval, iter1);
1448 delete adjustments;
1450 /* Initialize the iteration variable. */
1451 basic_block entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1452 basic_block body_bb = split_block_after_labels (entry_bb)->dest;
1453 gimple_stmt_iterator gsi = gsi_after_labels (entry_bb);
1454 /* Insert the SIMD array and iv initialization at function
1455 entry. */
1456 gsi_insert_seq_before (&gsi, seq, GSI_NEW_STMT);
1458 pop_gimplify_context (NULL);
1460 gimple *g;
1461 basic_block incr_bb = NULL;
1462 class loop *loop = NULL;
1464 /* Create a new BB right before the original exit BB, to hold the
1465 iteration increment and the condition/branch. */
1466 if (EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds))
1468 basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src;
1469 incr_bb = create_empty_bb (orig_exit);
1470 incr_bb->count = profile_count::zero ();
1471 add_bb_to_loop (incr_bb, body_bb->loop_father);
1472 while (EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds))
1474 edge e = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
1475 redirect_edge_succ (e, incr_bb);
1476 incr_bb->count += e->count ();
1479 else if (node->simdclone->inbranch)
1481 incr_bb = create_empty_bb (entry_bb);
1482 incr_bb->count = profile_count::zero ();
1483 add_bb_to_loop (incr_bb, body_bb->loop_father);
1486 if (incr_bb)
1488 make_single_succ_edge (incr_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
1489 gsi = gsi_last_bb (incr_bb);
1490 iter2 = make_ssa_name (iter);
1491 g = gimple_build_assign (iter2, PLUS_EXPR, iter1,
1492 build_int_cst (unsigned_type_node, 1));
1493 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1495 /* Mostly annotate the loop for the vectorizer (the rest is done
1496 below). */
1497 loop = alloc_loop ();
1498 cfun->has_force_vectorize_loops = true;
1499 /* For now, simlen is always constant. */
1500 loop->safelen = node->simdclone->simdlen.to_constant ();
1501 loop->force_vectorize = true;
1502 loop->header = body_bb;
1505 /* Branch around the body if the mask applies. */
1506 if (node->simdclone->inbranch)
1508 gsi = gsi_last_bb (loop->header);
1509 tree mask_array
1510 = node->simdclone->args[node->simdclone->nargs - 1].simd_array;
1511 tree mask;
1512 if (node->simdclone->mask_mode != VOIDmode)
1514 tree shift_cnt;
1515 if (mask_array == NULL_TREE)
1517 tree arg = node->simdclone->args[node->simdclone->nargs
1518 - 1].vector_arg;
1519 mask = get_or_create_ssa_default_def (cfun, arg);
1520 shift_cnt = iter1;
1522 else
1524 tree maskt = TREE_TYPE (mask_array);
1525 int c = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (maskt)));
1526 /* For now, c must be constant here. */
1527 c = exact_div (node->simdclone->simdlen, c + 1).to_constant ();
1528 int s = exact_log2 (c);
1529 gcc_assert (s > 0);
1530 c--;
1531 tree idx = make_ssa_name (TREE_TYPE (iter1));
1532 g = gimple_build_assign (idx, RSHIFT_EXPR, iter1,
1533 build_int_cst (NULL_TREE, s));
1534 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1535 mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)));
1536 tree aref = build4 (ARRAY_REF,
1537 TREE_TYPE (TREE_TYPE (mask_array)),
1538 mask_array, idx, NULL, NULL);
1539 g = gimple_build_assign (mask, aref);
1540 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1541 shift_cnt = make_ssa_name (TREE_TYPE (iter1));
1542 g = gimple_build_assign (shift_cnt, BIT_AND_EXPR, iter1,
1543 build_int_cst (TREE_TYPE (iter1), c));
1544 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1546 tree shift_cnt_conv = shift_cnt;
1547 if (!useless_type_conversion_p (TREE_TYPE (mask),
1548 TREE_TYPE (shift_cnt)))
1550 shift_cnt_conv = make_ssa_name (TREE_TYPE (mask));
1551 g = gimple_build_assign (shift_cnt_conv, NOP_EXPR, shift_cnt);
1552 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1554 g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)),
1555 RSHIFT_EXPR, mask, shift_cnt_conv);
1556 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1557 mask = gimple_assign_lhs (g);
1558 g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)),
1559 BIT_AND_EXPR, mask,
1560 build_int_cst (TREE_TYPE (mask), 1));
1561 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1562 mask = gimple_assign_lhs (g);
1564 else
1566 mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)));
1567 tree aref = build4 (ARRAY_REF,
1568 TREE_TYPE (TREE_TYPE (mask_array)),
1569 mask_array, iter1, NULL, NULL);
1570 g = gimple_build_assign (mask, aref);
1571 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1572 int bitsize = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (aref)));
1573 if (!INTEGRAL_TYPE_P (TREE_TYPE (aref)))
1575 aref = build1 (VIEW_CONVERT_EXPR,
1576 build_nonstandard_integer_type (bitsize, 0),
1577 mask);
1578 mask = make_ssa_name (TREE_TYPE (aref));
1579 g = gimple_build_assign (mask, aref);
1580 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1584 g = gimple_build_cond (EQ_EXPR, mask, build_zero_cst (TREE_TYPE (mask)),
1585 NULL, NULL);
1586 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1587 edge e = make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE);
1588 e->probability = profile_probability::unlikely ().guessed ();
1589 incr_bb->count += e->count ();
1590 edge fallthru = FALLTHRU_EDGE (loop->header);
1591 fallthru->flags = EDGE_FALSE_VALUE;
1592 fallthru->probability = profile_probability::likely ().guessed ();
1595 basic_block latch_bb = NULL;
1596 basic_block new_exit_bb = NULL;
1598 /* Generate the condition. */
1599 if (incr_bb)
1601 gsi = gsi_last_bb (incr_bb);
1602 g = gimple_build_cond (LT_EXPR, iter2,
1603 build_int_cst (unsigned_type_node,
1604 node->simdclone->simdlen),
1605 NULL, NULL);
1606 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1607 edge e = split_block (incr_bb, gsi_stmt (gsi));
1608 latch_bb = e->dest;
1609 new_exit_bb = split_block_after_labels (latch_bb)->dest;
1610 loop->latch = latch_bb;
1612 redirect_edge_succ (FALLTHRU_EDGE (latch_bb), body_bb);
1614 edge new_e = make_edge (incr_bb, new_exit_bb, EDGE_FALSE_VALUE);
1616 /* FIXME: Do we need to distribute probabilities for the conditional? */
1617 new_e->probability = profile_probability::guessed_never ();
1618 /* The successor of incr_bb is already pointing to latch_bb; just
1619 change the flags.
1620 make_edge (incr_bb, latch_bb, EDGE_TRUE_VALUE); */
1621 FALLTHRU_EDGE (incr_bb)->flags = EDGE_TRUE_VALUE;
1624 gphi *phi = create_phi_node (iter1, body_bb);
1625 edge preheader_edge = find_edge (entry_bb, body_bb);
1626 edge latch_edge = NULL;
1627 add_phi_arg (phi, build_zero_cst (unsigned_type_node), preheader_edge,
1628 UNKNOWN_LOCATION);
1629 if (incr_bb)
1631 latch_edge = single_succ_edge (latch_bb);
1632 add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
1634 /* Generate the new return. */
1635 gsi = gsi_last_bb (new_exit_bb);
1636 if (retval
1637 && TREE_CODE (retval) == VIEW_CONVERT_EXPR
1638 && TREE_CODE (TREE_OPERAND (retval, 0)) == RESULT_DECL)
1639 retval = TREE_OPERAND (retval, 0);
1640 else if (retval)
1642 retval = build1 (VIEW_CONVERT_EXPR,
1643 TREE_TYPE (TREE_TYPE (node->decl)),
1644 retval);
1645 retval = force_gimple_operand_gsi (&gsi, retval, true, NULL,
1646 false, GSI_CONTINUE_LINKING);
1648 g = gimple_build_return (retval);
1649 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1652 /* Handle aligned clauses by replacing default defs of the aligned
1653 uniform args with __builtin_assume_aligned (arg_N(D), alignment)
1654 lhs. Handle linear by adding PHIs. */
1655 for (unsigned i = 0; i < node->simdclone->nargs; i++)
1656 if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
1657 && (TREE_ADDRESSABLE (node->simdclone->args[i].orig_arg)
1658 || !is_gimple_reg_type
1659 (TREE_TYPE (node->simdclone->args[i].orig_arg))))
1661 tree orig_arg = node->simdclone->args[i].orig_arg;
1662 if (is_gimple_reg_type (TREE_TYPE (orig_arg)))
1663 iter1 = make_ssa_name (TREE_TYPE (orig_arg));
1664 else
1666 iter1 = create_tmp_var_raw (TREE_TYPE (orig_arg));
1667 gimple_add_tmp_var (iter1);
1669 gsi = gsi_after_labels (entry_bb);
1670 g = gimple_build_assign (iter1, orig_arg);
1671 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
1672 gsi = gsi_after_labels (body_bb);
1673 g = gimple_build_assign (orig_arg, iter1);
1674 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
1676 else if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
1677 && DECL_BY_REFERENCE (node->simdclone->args[i].orig_arg)
1678 && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg))
1679 == REFERENCE_TYPE
1680 && TREE_ADDRESSABLE
1681 (TREE_TYPE (TREE_TYPE (node->simdclone->args[i].orig_arg))))
1683 tree orig_arg = node->simdclone->args[i].orig_arg;
1684 tree def = ssa_default_def (cfun, orig_arg);
1685 if (def && !has_zero_uses (def))
1687 iter1 = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (orig_arg)));
1688 gimple_add_tmp_var (iter1);
1689 gsi = gsi_after_labels (entry_bb);
1690 g = gimple_build_assign (iter1, build_simple_mem_ref (def));
1691 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
1692 gsi = gsi_after_labels (body_bb);
1693 g = gimple_build_assign (build_simple_mem_ref (def), iter1);
1694 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
1697 else if (node->simdclone->args[i].alignment
1698 && node->simdclone->args[i].arg_type
1699 == SIMD_CLONE_ARG_TYPE_UNIFORM
1700 && (node->simdclone->args[i].alignment
1701 & (node->simdclone->args[i].alignment - 1)) == 0
1702 && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg))
1703 == POINTER_TYPE)
1705 unsigned int alignment = node->simdclone->args[i].alignment;
1706 tree orig_arg = node->simdclone->args[i].orig_arg;
1707 tree def = ssa_default_def (cfun, orig_arg);
1708 if (def && !has_zero_uses (def))
1710 tree fn = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
1711 gimple_seq seq = NULL;
1712 bool need_cvt = false;
1713 gcall *call
1714 = gimple_build_call (fn, 2, def, size_int (alignment));
1715 g = call;
1716 if (!useless_type_conversion_p (TREE_TYPE (orig_arg),
1717 ptr_type_node))
1718 need_cvt = true;
1719 tree t = make_ssa_name (need_cvt ? ptr_type_node : orig_arg);
1720 gimple_call_set_lhs (g, t);
1721 gimple_seq_add_stmt_without_update (&seq, g);
1722 if (need_cvt)
1724 t = make_ssa_name (orig_arg);
1725 g = gimple_build_assign (t, NOP_EXPR, gimple_call_lhs (g));
1726 gimple_seq_add_stmt_without_update (&seq, g);
1728 gsi_insert_seq_on_edge_immediate
1729 (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), seq);
1731 entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1732 node->create_edge (cgraph_node::get_create (fn),
1733 call, entry_bb->count);
1735 imm_use_iterator iter;
1736 use_operand_p use_p;
1737 gimple *use_stmt;
1738 tree repl = gimple_get_lhs (g);
1739 FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
1740 if (is_gimple_debug (use_stmt) || use_stmt == call)
1741 continue;
1742 else
1743 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
1744 SET_USE (use_p, repl);
1747 else if ((node->simdclone->args[i].arg_type
1748 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
1749 || (node->simdclone->args[i].arg_type
1750 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP)
1751 || (node->simdclone->args[i].arg_type
1752 == SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP)
1753 || (node->simdclone->args[i].arg_type
1754 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP))
1756 tree orig_arg = node->simdclone->args[i].orig_arg;
1757 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
1758 || POINTER_TYPE_P (TREE_TYPE (orig_arg)));
1759 tree def = NULL_TREE;
1760 if (TREE_ADDRESSABLE (orig_arg))
1762 def = make_ssa_name (TREE_TYPE (orig_arg));
1763 iter1 = make_ssa_name (TREE_TYPE (orig_arg));
1764 if (incr_bb)
1765 iter2 = make_ssa_name (TREE_TYPE (orig_arg));
1766 gsi = gsi_after_labels (entry_bb);
1767 g = gimple_build_assign (def, orig_arg);
1768 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
1770 else
1772 def = ssa_default_def (cfun, orig_arg);
1773 if (!def || has_zero_uses (def))
1774 def = NULL_TREE;
1775 else
1777 iter1 = make_ssa_name (orig_arg);
1778 if (incr_bb)
1779 iter2 = make_ssa_name (orig_arg);
1782 if (def)
1784 phi = create_phi_node (iter1, body_bb);
1785 add_phi_arg (phi, def, preheader_edge, UNKNOWN_LOCATION);
1786 if (incr_bb)
1788 add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
1789 enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
1790 ? PLUS_EXPR : POINTER_PLUS_EXPR;
1791 tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
1792 ? TREE_TYPE (orig_arg) : sizetype;
1793 tree addcst = simd_clone_linear_addend (node, i, addtype,
1794 entry_bb);
1795 gsi = gsi_last_bb (incr_bb);
1796 g = gimple_build_assign (iter2, code, iter1, addcst);
1797 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1800 imm_use_iterator iter;
1801 use_operand_p use_p;
1802 gimple *use_stmt;
1803 if (TREE_ADDRESSABLE (orig_arg))
1805 gsi = gsi_after_labels (body_bb);
1806 g = gimple_build_assign (orig_arg, iter1);
1807 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
1809 else
1810 FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
1811 if (use_stmt == phi)
1812 continue;
1813 else
1814 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
1815 SET_USE (use_p, iter1);
1818 else if (node->simdclone->args[i].arg_type
1819 == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
1820 || (node->simdclone->args[i].arg_type
1821 == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP))
1823 tree orig_arg = node->simdclone->args[i].orig_arg;
1824 tree def = ssa_default_def (cfun, orig_arg);
1825 gcc_assert (!TREE_ADDRESSABLE (orig_arg)
1826 && TREE_CODE (TREE_TYPE (orig_arg)) == REFERENCE_TYPE);
1827 if (def && !has_zero_uses (def))
1829 tree rtype = TREE_TYPE (TREE_TYPE (orig_arg));
1830 iter1 = make_ssa_name (orig_arg);
1831 if (incr_bb)
1832 iter2 = make_ssa_name (orig_arg);
1833 tree iter3 = make_ssa_name (rtype);
1834 tree iter4 = make_ssa_name (rtype);
1835 tree iter5 = incr_bb ? make_ssa_name (rtype) : NULL_TREE;
1836 gsi = gsi_after_labels (entry_bb);
1837 gimple *load
1838 = gimple_build_assign (iter3, build_simple_mem_ref (def));
1839 gsi_insert_before (&gsi, load, GSI_NEW_STMT);
1841 tree array = node->simdclone->args[i].simd_array;
1842 TREE_ADDRESSABLE (array) = 1;
1843 tree ptr = build_fold_addr_expr (array);
1844 phi = create_phi_node (iter1, body_bb);
1845 add_phi_arg (phi, ptr, preheader_edge, UNKNOWN_LOCATION);
1846 if (incr_bb)
1848 add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
1849 g = gimple_build_assign (iter2, POINTER_PLUS_EXPR, iter1,
1850 TYPE_SIZE_UNIT (TREE_TYPE (iter3)));
1851 gsi = gsi_last_bb (incr_bb);
1852 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1855 phi = create_phi_node (iter4, body_bb);
1856 add_phi_arg (phi, iter3, preheader_edge, UNKNOWN_LOCATION);
1857 if (incr_bb)
1859 add_phi_arg (phi, iter5, latch_edge, UNKNOWN_LOCATION);
1860 enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (iter3))
1861 ? PLUS_EXPR : POINTER_PLUS_EXPR;
1862 tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (iter3))
1863 ? TREE_TYPE (iter3) : sizetype;
1864 tree addcst = simd_clone_linear_addend (node, i, addtype,
1865 entry_bb);
1866 g = gimple_build_assign (iter5, code, iter4, addcst);
1867 gsi = gsi_last_bb (incr_bb);
1868 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1871 g = gimple_build_assign (build_simple_mem_ref (iter1), iter4);
1872 gsi = gsi_after_labels (body_bb);
1873 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1875 imm_use_iterator iter;
1876 use_operand_p use_p;
1877 gimple *use_stmt;
1878 FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
1879 if (use_stmt == load)
1880 continue;
1881 else
1882 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
1883 SET_USE (use_p, iter1);
1885 if (!TYPE_READONLY (rtype) && incr_bb)
1887 tree v = make_ssa_name (rtype);
1888 tree aref = build4 (ARRAY_REF, rtype, array,
1889 size_zero_node, NULL_TREE,
1890 NULL_TREE);
1891 gsi = gsi_after_labels (new_exit_bb);
1892 g = gimple_build_assign (v, aref);
1893 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1894 g = gimple_build_assign (build_simple_mem_ref (def), v);
1895 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1900 calculate_dominance_info (CDI_DOMINATORS);
1901 if (loop)
1902 add_loop (loop, loop->header->loop_father);
1903 update_ssa (TODO_update_ssa);
1905 pop_cfun ();
1908 /* If the function in NODE is tagged as an elemental SIMD function,
1909 create the appropriate SIMD clones. */
1911 void
1912 expand_simd_clones (struct cgraph_node *node)
1914 tree attr;
1915 bool explicit_p = true;
1917 if (node->inlined_to
1918 || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
1919 return;
1921 attr = lookup_attribute ("omp declare simd",
1922 DECL_ATTRIBUTES (node->decl));
1924 /* See if we can add an "omp declare simd" directive implicitly
1925 before giving up. */
1926 /* FIXME: OpenACC "#pragma acc routine" translates into
1927 "omp declare target", but appears also to have some other effects
1928 that conflict with generating SIMD clones, causing ICEs. So don't
1929 do this if we've got OpenACC instead of OpenMP. */
1930 if (attr == NULL_TREE
1931 #ifdef ACCEL_COMPILER
1932 && (flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_ANY
1933 || flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_NOHOST)
1934 #else
1935 && (flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_ANY
1936 || flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_HOST)
1937 #endif
1938 && !oacc_get_fn_attrib (node->decl)
1939 && ok_for_auto_simd_clone (node))
1941 attr = tree_cons (get_identifier ("omp declare simd"), NULL,
1942 DECL_ATTRIBUTES (node->decl));
1943 DECL_ATTRIBUTES (node->decl) = attr;
1944 explicit_p = false;
1947 if (attr == NULL_TREE)
1948 return;
1950 /* Ignore
1951 #pragma omp declare simd
1952 extern int foo ();
1953 in C, there we don't know the argument types at all. */
1954 if (!node->definition
1955 && TYPE_ARG_TYPES (TREE_TYPE (node->decl)) == NULL_TREE)
1956 return;
1958 /* Call this before creating clone_info, as it might ggc_collect. */
1959 if (node->definition && node->has_gimple_body_p ())
1960 node->get_body ();
1964 /* Start with parsing the "omp declare simd" attribute(s). */
1965 bool inbranch_clause_specified;
1966 struct cgraph_simd_clone *clone_info
1967 = simd_clone_clauses_extract (node, TREE_VALUE (attr),
1968 &inbranch_clause_specified);
1969 if (clone_info == NULL)
1970 continue;
1972 poly_uint64 orig_simdlen = clone_info->simdlen;
1973 tree base_type = simd_clone_compute_base_data_type (node, clone_info);
1975 /* The target can return 0 (no simd clones should be created),
1976 1 (just one ISA of simd clones should be created) or higher
1977 count of ISA variants. In that case, clone_info is initialized
1978 for the first ISA variant. */
1979 int count
1980 = targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info,
1981 base_type, 0,
1982 explicit_p);
1983 if (count == 0)
1984 continue;
1986 /* Loop over all COUNT ISA variants, and if !INBRANCH_CLAUSE_SPECIFIED,
1987 also create one inbranch and one !inbranch clone of it. */
1988 for (int i = 0; i < count * 2; i++)
1990 struct cgraph_simd_clone *clone = clone_info;
1991 if (inbranch_clause_specified && (i & 1) != 0)
1992 continue;
1994 if (i != 0)
1996 clone = simd_clone_struct_alloc (clone_info->nargs
1997 + ((i & 1) != 0));
1998 simd_clone_struct_copy (clone, clone_info);
1999 /* Undo changes targetm.simd_clone.compute_vecsize_and_simdlen
2000 and simd_clone_adjust_argument_types did to the first
2001 clone's info. */
2002 clone->nargs -= clone_info->inbranch;
2003 clone->simdlen = orig_simdlen;
2004 /* And call the target hook again to get the right ISA. */
2005 targetm.simd_clone.compute_vecsize_and_simdlen (node, clone,
2006 base_type,
2007 i / 2,
2008 explicit_p);
2009 if ((i & 1) != 0)
2010 clone->inbranch = 1;
2013 /* simd_clone_mangle might fail if such a clone has been created
2014 already. */
2015 tree id = simd_clone_mangle (node, clone);
2016 if (id == NULL_TREE)
2018 if (i == 0)
2019 clone->nargs += clone->inbranch;
2020 continue;
2023 /* Only when we are sure we want to create the clone actually
2024 clone the function (or definitions) or create another
2025 extern FUNCTION_DECL (for prototypes without definitions). */
2026 struct cgraph_node *n = simd_clone_create (node, !explicit_p);
2027 if (n == NULL)
2029 if (i == 0)
2030 clone->nargs += clone->inbranch;
2031 continue;
2034 n->simdclone = clone;
2035 clone->origin = node;
2036 clone->next_clone = NULL;
2037 if (node->simd_clones == NULL)
2039 clone->prev_clone = n;
2040 node->simd_clones = n;
2042 else
2044 clone->prev_clone = node->simd_clones->simdclone->prev_clone;
2045 clone->prev_clone->simdclone->next_clone = n;
2046 node->simd_clones->simdclone->prev_clone = n;
2048 symtab->change_decl_assembler_name (n->decl, id);
2049 /* And finally adjust the return type, parameters and for
2050 definitions also function body. */
2051 if (node->definition)
2052 simd_clone_adjust (n);
2053 else
2055 TREE_TYPE (n->decl)
2056 = build_distinct_type_copy (TREE_TYPE (n->decl));
2057 targetm.simd_clone.adjust (n);
2058 simd_clone_adjust_return_type (n);
2059 simd_clone_adjust_argument_types (n);
2061 if (dump_file)
2062 fprintf (dump_file, "\nGenerated %s clone %s\n",
2063 (TREE_PUBLIC (n->decl) ? "global" : "local"),
2064 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl)));
2067 while ((attr = lookup_attribute ("omp declare simd", TREE_CHAIN (attr))));
2070 /* Entry point for IPA simd clone creation pass. */
2072 static unsigned int
2073 ipa_omp_simd_clone (void)
2075 struct cgraph_node *node;
2076 FOR_EACH_FUNCTION (node)
2077 expand_simd_clones (node);
2078 return 0;
2081 namespace {
2083 const pass_data pass_data_omp_simd_clone =
2085 SIMPLE_IPA_PASS, /* type */
2086 "simdclone", /* name */
2087 OPTGROUP_OMP, /* optinfo_flags */
2088 TV_NONE, /* tv_id */
2089 ( PROP_ssa | PROP_cfg ), /* properties_required */
2090 0, /* properties_provided */
2091 0, /* properties_destroyed */
2092 0, /* todo_flags_start */
2093 0, /* todo_flags_finish */
2096 class pass_omp_simd_clone : public simple_ipa_opt_pass
2098 public:
2099 pass_omp_simd_clone(gcc::context *ctxt)
2100 : simple_ipa_opt_pass(pass_data_omp_simd_clone, ctxt)
2103 /* opt_pass methods: */
2104 bool gate (function *) final override;
2105 unsigned int execute (function *) final override
2107 return ipa_omp_simd_clone ();
2111 bool
2112 pass_omp_simd_clone::gate (function *)
2114 return targetm.simd_clone.compute_vecsize_and_simdlen != NULL;
2117 } // anon namespace
2119 simple_ipa_opt_pass *
2120 make_pass_omp_simd_clone (gcc::context *ctxt)
2122 return new pass_omp_simd_clone (ctxt);