Suppress -fstack-protector warning on hppa.
[official-gcc.git] / gcc / omp-simd-clone.cc
blob21d69aa874702600e0f92ad53528a7277840df4a
1 /* OMP constructs' SIMD clone supporting code.
3 Copyright (C) 2005-2022 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "alloc-pool.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "cgraph.h"
33 #include "pretty-print.h"
34 #include "diagnostic-core.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "cfganal.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "gimple-walk.h"
42 #include "langhooks.h"
43 #include "tree-cfg.h"
44 #include "tree-into-ssa.h"
45 #include "tree-dfa.h"
46 #include "cfgloop.h"
47 #include "symbol-summary.h"
48 #include "ipa-param-manipulation.h"
49 #include "tree-eh.h"
50 #include "varasm.h"
51 #include "stringpool.h"
52 #include "attribs.h"
53 #include "omp-simd-clone.h"
54 #include "omp-low.h"
55 #include "omp-general.h"
57 /* Print debug info for ok_for_auto_simd_clone to the dump file, logging
58 failure reason EXCUSE for function DECL. Always returns false. */
59 static bool
60 auto_simd_fail (tree decl, const char *excuse)
62 if (dump_file && (dump_flags & TDF_DETAILS))
63 fprintf (dump_file, "\nNot auto-cloning %s because %s\n",
64 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)),
65 excuse);
66 return false;
69 /* Helper function for ok_for_auto_simd_clone; return false if the statement
70 violates restrictions for an "omp declare simd" function. Specifically,
71 the function must not
72 - throw or call setjmp/longjmp
73 - write memory that could alias parallel calls
74 - read volatile memory
75 - include openmp directives or calls
76 - call functions that might do those things */
78 static bool
79 auto_simd_check_stmt (gimple *stmt, tree outer)
81 tree decl;
83 switch (gimple_code (stmt))
85 case GIMPLE_CALL:
87 /* Calls to functions that are CONST or PURE are ok, even if they
88 are internal functions without a decl. Reject other internal
89 functions. */
90 if (gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE))
91 break;
92 if (gimple_call_internal_p (stmt))
93 return auto_simd_fail (outer,
94 "body contains internal function call");
96 decl = gimple_call_fndecl (stmt);
98 /* We can't know whether indirect calls are safe. */
99 if (decl == NULL_TREE)
100 return auto_simd_fail (outer, "body contains indirect call");
102 /* Calls to functions that are already marked "omp declare simd" are
103 OK. */
104 if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (decl)))
105 break;
107 /* Let recursive calls to the current function through. */
108 if (decl == outer)
109 break;
111 /* Other function calls are not permitted. This covers all calls to
112 the libgomp API and setjmp/longjmp, too, as well as things like
113 __cxa_throw_ related to exception handling. */
114 return auto_simd_fail (outer, "body contains unsafe function call");
116 /* Reject EH-related constructs. Most of the EH gimple codes are
117 already lowered by the time this pass runs during IPA.
118 GIMPLE_EH_DISPATCH and GIMPLE_RESX remain and are lowered by
119 pass_lower_eh_dispatch and pass_lower_resx, respectively; those
120 passes run later. */
121 case GIMPLE_EH_DISPATCH:
122 case GIMPLE_RESX:
123 return auto_simd_fail (outer, "body contains EH constructs");
125 /* Asms are not permitted since we don't know what they do. */
126 case GIMPLE_ASM:
127 return auto_simd_fail (outer, "body contains inline asm");
129 default:
130 break;
133 /* Memory writes are not permitted.
134 FIXME: this could be relaxed a little to permit writes to
135 function-local variables that could not alias other instances
136 of the function running in parallel. */
137 if (gimple_store_p (stmt))
138 return auto_simd_fail (outer, "body includes memory write");
140 /* Volatile reads are not permitted. */
141 if (gimple_has_volatile_ops (stmt))
142 return auto_simd_fail (outer, "body includes volatile op");
144 /* Otherwise OK. */
145 return true;
148 /* Helper function for ok_for_auto_simd_clone: return true if type T is
149 plausible for a cloneable function argument or return type. */
150 static bool
151 plausible_type_for_simd_clone (tree t)
153 if (TREE_CODE (t) == VOID_TYPE)
154 return true;
155 else if (RECORD_OR_UNION_TYPE_P (t) || !is_a <scalar_mode> (TYPE_MODE (t)))
156 /* Small record/union types may fit into a scalar mode, but are
157 still not suitable. */
158 return false;
159 else if (TYPE_ATOMIC (t))
160 /* Atomic types trigger warnings in simd_clone_clauses_extract. */
161 return false;
162 else
163 return true;
166 /* Check if the function NODE appears suitable for auto-annotation
167 with "declare simd". */
169 static bool
170 ok_for_auto_simd_clone (struct cgraph_node *node)
172 tree decl = node->decl;
173 tree t;
174 basic_block bb;
176 /* Nothing to do if the function isn't a definition or doesn't
177 have a body. */
178 if (!node->definition || !node->has_gimple_body_p ())
179 return auto_simd_fail (decl, "no definition or body");
181 /* No point in trying to generate implicit clones if the function
182 isn't used in the compilation unit. */
183 if (!node->callers)
184 return auto_simd_fail (decl, "function is not used");
186 /* Nothing to do if the function already has the "omp declare simd"
187 attribute, is marked noclone, or is not "omp declare target". */
188 if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (decl))
189 || lookup_attribute ("noclone", DECL_ATTRIBUTES (decl))
190 || !lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl)))
191 return auto_simd_fail (decl, "incompatible attributes");
193 /* Check whether the function is restricted host/nohost via the
194 "omp declare target device_type" clause, and that doesn't match
195 what we're compiling for. Internally, these translate into
196 "omp declare target [no]host" attributes on the decl; "any"
197 translates into both attributes, but the default (which is supposed
198 to be equivalent to "any") is neither. */
199 tree host = lookup_attribute ("omp declare target host",
200 DECL_ATTRIBUTES (decl));
201 tree nohost = lookup_attribute ("omp declare target nohost",
202 DECL_ATTRIBUTES (decl));
203 #ifdef ACCEL_COMPILER
204 if (host && !nohost)
205 return auto_simd_fail (decl, "device doesn't match for accel compiler");
206 #else
207 if (nohost && !host)
208 return auto_simd_fail (decl, "device doesn't match for host compiler");
209 #endif
211 /* Backends will check for vectorizable arguments/return types in a
212 target-specific way, but we can immediately filter out functions
213 that have implausible argument/return types. */
214 t = TREE_TYPE (TREE_TYPE (decl));
215 if (!plausible_type_for_simd_clone (t))
216 return auto_simd_fail (decl, "return type fails sniff test");
218 if (TYPE_ARG_TYPES (TREE_TYPE (decl)))
220 for (tree temp = TYPE_ARG_TYPES (TREE_TYPE (decl));
221 temp; temp = TREE_CHAIN (temp))
223 t = TREE_VALUE (temp);
224 if (!plausible_type_for_simd_clone (t))
225 return auto_simd_fail (decl, "argument type fails sniff test");
228 else if (DECL_ARGUMENTS (decl))
230 for (tree temp = DECL_ARGUMENTS (decl); temp; temp = DECL_CHAIN (temp))
232 t = TREE_TYPE (temp);
233 if (!plausible_type_for_simd_clone (t))
234 return auto_simd_fail (decl, "argument type fails sniff test");
237 else
238 return auto_simd_fail (decl, "function has no arguments");
240 /* Scan the function body to see if it is suitable for SIMD-ization. */
241 node->get_body ();
243 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (decl))
245 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
246 gsi_next (&gsi))
247 if (!auto_simd_check_stmt (gsi_stmt (gsi), decl))
248 return false;
251 /* All is good. */
252 if (dump_file)
253 fprintf (dump_file, "\nMarking %s for auto-cloning\n",
254 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
255 return true;
259 /* Return the number of elements in vector type VECTYPE, which is associated
260 with a SIMD clone. At present these always have a constant length. */
262 static unsigned HOST_WIDE_INT
263 simd_clone_subparts (tree vectype)
265 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
268 /* Allocate a fresh `simd_clone' and return it. NARGS is the number
269 of arguments to reserve space for. */
271 static struct cgraph_simd_clone *
272 simd_clone_struct_alloc (int nargs)
274 struct cgraph_simd_clone *clone_info;
275 size_t len = (sizeof (struct cgraph_simd_clone)
276 + nargs * sizeof (struct cgraph_simd_clone_arg));
277 clone_info = (struct cgraph_simd_clone *)
278 ggc_internal_cleared_alloc (len);
279 return clone_info;
282 /* Make a copy of the `struct cgraph_simd_clone' in FROM to TO. */
284 static inline void
285 simd_clone_struct_copy (struct cgraph_simd_clone *to,
286 struct cgraph_simd_clone *from)
288 memcpy (to, from, (sizeof (struct cgraph_simd_clone)
289 + ((from->nargs - from->inbranch)
290 * sizeof (struct cgraph_simd_clone_arg))));
293 /* Fill an empty vector ARGS with parameter types of function FNDECL. This
294 uses TYPE_ARG_TYPES if available, otherwise falls back to types of
295 DECL_ARGUMENTS types. */
297 static void
298 simd_clone_vector_of_formal_parm_types (vec<tree> *args, tree fndecl)
300 if (TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
302 push_function_arg_types (args, TREE_TYPE (fndecl));
303 return;
305 push_function_arg_decls (args, fndecl);
306 unsigned int i;
307 tree arg;
308 FOR_EACH_VEC_ELT (*args, i, arg)
309 (*args)[i] = TREE_TYPE ((*args)[i]);
312 /* Given a simd function in NODE, extract the simd specific
313 information from the OMP clauses passed in CLAUSES, and return
314 the struct cgraph_simd_clone * if it should be cloned. *INBRANCH_SPECIFIED
315 is set to TRUE if the `inbranch' or `notinbranch' clause specified,
316 otherwise set to FALSE. */
318 static struct cgraph_simd_clone *
319 simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
320 bool *inbranch_specified)
322 auto_vec<tree> args;
323 simd_clone_vector_of_formal_parm_types (&args, node->decl);
324 tree t;
325 int n;
326 *inbranch_specified = false;
328 n = args.length ();
329 if (n > 0 && args.last () == void_type_node)
330 n--;
332 /* Allocate one more than needed just in case this is an in-branch
333 clone which will require a mask argument. */
334 struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (n + 1);
335 clone_info->nargs = n;
337 if (!clauses)
338 goto out;
340 clauses = TREE_VALUE (clauses);
341 if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE)
342 goto out;
344 for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t))
346 switch (OMP_CLAUSE_CODE (t))
348 case OMP_CLAUSE_INBRANCH:
349 clone_info->inbranch = 1;
350 *inbranch_specified = true;
351 break;
352 case OMP_CLAUSE_NOTINBRANCH:
353 clone_info->inbranch = 0;
354 *inbranch_specified = true;
355 break;
356 case OMP_CLAUSE_SIMDLEN:
357 clone_info->simdlen
358 = TREE_INT_CST_LOW (OMP_CLAUSE_SIMDLEN_EXPR (t));
359 break;
360 case OMP_CLAUSE_LINEAR:
362 tree decl = OMP_CLAUSE_DECL (t);
363 tree step = OMP_CLAUSE_LINEAR_STEP (t);
364 int argno = TREE_INT_CST_LOW (decl);
365 if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (t))
367 enum cgraph_simd_clone_arg_type arg_type;
368 if (TREE_CODE (args[argno]) == REFERENCE_TYPE)
369 switch (OMP_CLAUSE_LINEAR_KIND (t))
371 case OMP_CLAUSE_LINEAR_REF:
372 arg_type
373 = SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP;
374 break;
375 case OMP_CLAUSE_LINEAR_UVAL:
376 arg_type
377 = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP;
378 break;
379 case OMP_CLAUSE_LINEAR_VAL:
380 case OMP_CLAUSE_LINEAR_DEFAULT:
381 arg_type
382 = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP;
383 break;
384 default:
385 gcc_unreachable ();
387 else
388 arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP;
389 clone_info->args[argno].arg_type = arg_type;
390 clone_info->args[argno].linear_step = tree_to_shwi (step);
391 gcc_assert (clone_info->args[argno].linear_step >= 0
392 && clone_info->args[argno].linear_step < n);
394 else
396 if (POINTER_TYPE_P (args[argno]))
397 step = fold_convert (ssizetype, step);
398 if (!tree_fits_shwi_p (step))
400 warning_at (OMP_CLAUSE_LOCATION (t), 0,
401 "ignoring large linear step");
402 return NULL;
404 else if (integer_zerop (step))
406 warning_at (OMP_CLAUSE_LOCATION (t), 0,
407 "ignoring zero linear step");
408 return NULL;
410 else
412 enum cgraph_simd_clone_arg_type arg_type;
413 if (TREE_CODE (args[argno]) == REFERENCE_TYPE)
414 switch (OMP_CLAUSE_LINEAR_KIND (t))
416 case OMP_CLAUSE_LINEAR_REF:
417 arg_type
418 = SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP;
419 break;
420 case OMP_CLAUSE_LINEAR_UVAL:
421 arg_type
422 = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP;
423 break;
424 case OMP_CLAUSE_LINEAR_VAL:
425 case OMP_CLAUSE_LINEAR_DEFAULT:
426 arg_type
427 = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP;
428 break;
429 default:
430 gcc_unreachable ();
432 else
433 arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP;
434 clone_info->args[argno].arg_type = arg_type;
435 clone_info->args[argno].linear_step = tree_to_shwi (step);
438 break;
440 case OMP_CLAUSE_UNIFORM:
442 tree decl = OMP_CLAUSE_DECL (t);
443 int argno = tree_to_uhwi (decl);
444 clone_info->args[argno].arg_type
445 = SIMD_CLONE_ARG_TYPE_UNIFORM;
446 break;
448 case OMP_CLAUSE_ALIGNED:
450 /* Ignore aligned (x) for declare simd, for the ABI we really
451 need an alignment specified. */
452 if (OMP_CLAUSE_ALIGNED_ALIGNMENT (t) == NULL_TREE)
453 break;
454 tree decl = OMP_CLAUSE_DECL (t);
455 int argno = tree_to_uhwi (decl);
456 clone_info->args[argno].alignment
457 = TREE_INT_CST_LOW (OMP_CLAUSE_ALIGNED_ALIGNMENT (t));
458 break;
460 default:
461 break;
465 out:
466 if (TYPE_ATOMIC (TREE_TYPE (TREE_TYPE (node->decl))))
468 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
469 "ignoring %<#pragma omp declare simd%> on function "
470 "with %<_Atomic%> qualified return type");
471 return NULL;
474 for (unsigned int argno = 0; argno < clone_info->nargs; argno++)
475 if (TYPE_ATOMIC (args[argno])
476 && clone_info->args[argno].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM)
478 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
479 "ignoring %<#pragma omp declare simd%> on function "
480 "with %<_Atomic%> qualified non-%<uniform%> argument");
481 args.release ();
482 return NULL;
485 return clone_info;
488 /* Given a SIMD clone in NODE, calculate the characteristic data
489 type and return the coresponding type. The characteristic data
490 type is computed as described in the Intel Vector ABI. */
492 static tree
493 simd_clone_compute_base_data_type (struct cgraph_node *node,
494 struct cgraph_simd_clone *clone_info)
496 tree type = integer_type_node;
497 tree fndecl = node->decl;
499 /* a) For non-void function, the characteristic data type is the
500 return type. */
501 if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) != VOID_TYPE)
502 type = TREE_TYPE (TREE_TYPE (fndecl));
504 /* b) If the function has any non-uniform, non-linear parameters,
505 then the characteristic data type is the type of the first
506 such parameter. */
507 else
509 auto_vec<tree> map;
510 simd_clone_vector_of_formal_parm_types (&map, fndecl);
511 for (unsigned int i = 0; i < clone_info->nargs; ++i)
512 if (clone_info->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
514 type = map[i];
515 break;
519 /* c) If the characteristic data type determined by a) or b) above
520 is struct, union, or class type which is pass-by-value (except
521 for the type that maps to the built-in complex data type), the
522 characteristic data type is int. */
523 if (RECORD_OR_UNION_TYPE_P (type)
524 && !aggregate_value_p (type, NULL)
525 && TREE_CODE (type) != COMPLEX_TYPE)
526 return integer_type_node;
528 /* d) If none of the above three classes is applicable, the
529 characteristic data type is int. */
531 return type;
533 /* e) For Intel Xeon Phi native and offload compilation, if the
534 resulting characteristic data type is 8-bit or 16-bit integer
535 data type, the characteristic data type is int. */
536 /* Well, we don't handle Xeon Phi yet. */
539 static tree
540 simd_clone_mangle (struct cgraph_node *node,
541 struct cgraph_simd_clone *clone_info)
543 char vecsize_mangle = clone_info->vecsize_mangle;
544 char mask = clone_info->inbranch ? 'M' : 'N';
545 poly_uint64 simdlen = clone_info->simdlen;
546 unsigned int n;
547 pretty_printer pp;
549 gcc_assert (vecsize_mangle && maybe_ne (simdlen, 0U));
551 pp_string (&pp, "_ZGV");
552 pp_character (&pp, vecsize_mangle);
553 pp_character (&pp, mask);
554 /* For now, simdlen is always constant, while variable simdlen pp 'n'. */
555 unsigned int len = simdlen.to_constant ();
556 pp_decimal_int (&pp, (len));
558 for (n = 0; n < clone_info->nargs; ++n)
560 struct cgraph_simd_clone_arg arg = clone_info->args[n];
562 switch (arg.arg_type)
564 case SIMD_CLONE_ARG_TYPE_UNIFORM:
565 pp_character (&pp, 'u');
566 break;
567 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
568 pp_character (&pp, 'l');
569 goto mangle_linear;
570 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
571 pp_character (&pp, 'R');
572 goto mangle_linear;
573 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
574 pp_character (&pp, 'L');
575 goto mangle_linear;
576 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
577 pp_character (&pp, 'U');
578 goto mangle_linear;
579 mangle_linear:
580 gcc_assert (arg.linear_step != 0);
581 if (arg.linear_step > 1)
582 pp_unsigned_wide_integer (&pp, arg.linear_step);
583 else if (arg.linear_step < 0)
585 pp_character (&pp, 'n');
586 pp_unsigned_wide_integer (&pp, (-(unsigned HOST_WIDE_INT)
587 arg.linear_step));
589 break;
590 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
591 pp_string (&pp, "ls");
592 pp_unsigned_wide_integer (&pp, arg.linear_step);
593 break;
594 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
595 pp_string (&pp, "Rs");
596 pp_unsigned_wide_integer (&pp, arg.linear_step);
597 break;
598 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
599 pp_string (&pp, "Ls");
600 pp_unsigned_wide_integer (&pp, arg.linear_step);
601 break;
602 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
603 pp_string (&pp, "Us");
604 pp_unsigned_wide_integer (&pp, arg.linear_step);
605 break;
606 default:
607 pp_character (&pp, 'v');
609 if (arg.alignment)
611 pp_character (&pp, 'a');
612 pp_decimal_int (&pp, arg.alignment);
616 pp_underscore (&pp);
617 const char *str = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl));
618 if (*str == '*')
619 ++str;
620 pp_string (&pp, str);
621 str = pp_formatted_text (&pp);
623 /* If there already is a SIMD clone with the same mangled name, don't
624 add another one. This can happen e.g. for
625 #pragma omp declare simd
626 #pragma omp declare simd simdlen(8)
627 int foo (int, int);
628 if the simdlen is assumed to be 8 for the first one, etc. */
629 for (struct cgraph_node *clone = node->simd_clones; clone;
630 clone = clone->simdclone->next_clone)
631 if (id_equal (DECL_ASSEMBLER_NAME (clone->decl), str))
632 return NULL_TREE;
634 return get_identifier (str);
637 /* Create a simd clone of OLD_NODE and return it. If FORCE_LOCAL is true,
638 create it as a local symbol, otherwise copy the symbol linkage and
639 visibility attributes from OLD_NODE. */
641 static struct cgraph_node *
642 simd_clone_create (struct cgraph_node *old_node, bool force_local)
644 struct cgraph_node *new_node;
645 if (old_node->definition)
647 if (!old_node->has_gimple_body_p ())
648 return NULL;
649 old_node->get_body ();
650 new_node = old_node->create_version_clone_with_body (vNULL, NULL, NULL,
651 NULL, NULL,
652 "simdclone");
654 else
656 tree old_decl = old_node->decl;
657 tree new_decl = copy_node (old_node->decl);
658 DECL_NAME (new_decl) = clone_function_name_numbered (old_decl,
659 "simdclone");
660 SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
661 SET_DECL_RTL (new_decl, NULL);
662 DECL_STATIC_CONSTRUCTOR (new_decl) = 0;
663 DECL_STATIC_DESTRUCTOR (new_decl) = 0;
664 new_node = old_node->create_version_clone (new_decl, vNULL, NULL);
665 if (old_node->in_other_partition)
666 new_node->in_other_partition = 1;
668 if (new_node == NULL)
669 return new_node;
671 set_decl_built_in_function (new_node->decl, NOT_BUILT_IN, 0);
672 if (force_local)
674 TREE_PUBLIC (new_node->decl) = 0;
675 DECL_COMDAT (new_node->decl) = 0;
676 DECL_WEAK (new_node->decl) = 0;
677 DECL_EXTERNAL (new_node->decl) = 0;
678 DECL_VISIBILITY_SPECIFIED (new_node->decl) = 0;
679 DECL_VISIBILITY (new_node->decl) = VISIBILITY_DEFAULT;
680 DECL_DLLIMPORT_P (new_node->decl) = 0;
682 else
684 TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
685 DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl);
686 DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl);
687 DECL_EXTERNAL (new_node->decl) = DECL_EXTERNAL (old_node->decl);
688 DECL_VISIBILITY_SPECIFIED (new_node->decl)
689 = DECL_VISIBILITY_SPECIFIED (old_node->decl);
690 DECL_VISIBILITY (new_node->decl) = DECL_VISIBILITY (old_node->decl);
691 DECL_DLLIMPORT_P (new_node->decl) = DECL_DLLIMPORT_P (old_node->decl);
692 if (DECL_ONE_ONLY (old_node->decl))
693 make_decl_one_only (new_node->decl,
694 DECL_ASSEMBLER_NAME (new_node->decl));
696 /* The method cgraph_version_clone_with_body () will force the new
697 symbol local. Undo this, and inherit external visibility from
698 the old node. */
699 new_node->local = old_node->local;
700 new_node->externally_visible = old_node->externally_visible;
701 new_node->calls_declare_variant_alt
702 = old_node->calls_declare_variant_alt;
705 return new_node;
708 /* Adjust the return type of the given function to its appropriate
709 vector counterpart. Returns a simd array to be used throughout the
710 function as a return value. */
712 static tree
713 simd_clone_adjust_return_type (struct cgraph_node *node)
715 tree fndecl = node->decl;
716 tree orig_rettype = TREE_TYPE (TREE_TYPE (fndecl));
717 poly_uint64 veclen;
718 tree t;
720 /* Adjust the function return type. */
721 if (orig_rettype == void_type_node)
722 return NULL_TREE;
723 t = TREE_TYPE (TREE_TYPE (fndecl));
724 if (INTEGRAL_TYPE_P (t) || POINTER_TYPE_P (t))
725 veclen = node->simdclone->vecsize_int;
726 else
727 veclen = node->simdclone->vecsize_float;
728 if (known_eq (veclen, 0U))
729 veclen = node->simdclone->simdlen;
730 else
731 veclen = exact_div (veclen, GET_MODE_BITSIZE (SCALAR_TYPE_MODE (t)));
732 if (multiple_p (veclen, node->simdclone->simdlen))
733 veclen = node->simdclone->simdlen;
734 if (POINTER_TYPE_P (t))
735 t = pointer_sized_int_node;
736 if (known_eq (veclen, node->simdclone->simdlen))
737 t = build_vector_type (t, node->simdclone->simdlen);
738 else
740 t = build_vector_type (t, veclen);
741 t = build_array_type_nelts (t, exact_div (node->simdclone->simdlen,
742 veclen));
744 TREE_TYPE (TREE_TYPE (fndecl)) = t;
745 if (!node->definition)
746 return NULL_TREE;
748 t = DECL_RESULT (fndecl);
749 /* Adjust the DECL_RESULT. */
750 gcc_assert (TREE_TYPE (t) != void_type_node);
751 TREE_TYPE (t) = TREE_TYPE (TREE_TYPE (fndecl));
752 relayout_decl (t);
754 tree atype = build_array_type_nelts (orig_rettype,
755 node->simdclone->simdlen);
756 if (maybe_ne (veclen, node->simdclone->simdlen))
757 return build1 (VIEW_CONVERT_EXPR, atype, t);
759 /* Set up a SIMD array to use as the return value. */
760 tree retval = create_tmp_var_raw (atype, "retval");
761 gimple_add_tmp_var (retval);
762 return retval;
765 /* Each vector argument has a corresponding array to be used locally
766 as part of the eventual loop. Create such temporary array and
767 return it.
769 PREFIX is the prefix to be used for the temporary.
771 TYPE is the inner element type.
773 SIMDLEN is the number of elements. */
775 static tree
776 create_tmp_simd_array (const char *prefix, tree type, poly_uint64 simdlen)
778 tree atype = build_array_type_nelts (type, simdlen);
779 tree avar = create_tmp_var_raw (atype, prefix);
780 gimple_add_tmp_var (avar);
781 return avar;
784 /* Modify the function argument types to their corresponding vector
785 counterparts if appropriate. Also, create one array for each simd
786 argument to be used locally when using the function arguments as
787 part of the loop.
789 NODE is the function whose arguments are to be adjusted.
791 If NODE does not represent function definition, returns NULL. Otherwise
792 returns an adjustment class that will be filled describing how the argument
793 declarations will be remapped. New arguments which are not to be remapped
794 are marked with USER_FLAG. */
796 static ipa_param_body_adjustments *
797 simd_clone_adjust_argument_types (struct cgraph_node *node)
799 auto_vec<tree> args;
801 if (node->definition)
802 push_function_arg_decls (&args, node->decl);
803 else
804 simd_clone_vector_of_formal_parm_types (&args, node->decl);
805 struct cgraph_simd_clone *sc = node->simdclone;
806 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
807 vec_safe_reserve (new_params, sc->nargs);
808 unsigned i, j, k;
809 poly_uint64 veclen;
811 for (i = 0; i < sc->nargs; ++i)
813 ipa_adjusted_param adj;
814 memset (&adj, 0, sizeof (adj));
815 tree parm = args[i];
816 tree parm_type = node->definition ? TREE_TYPE (parm) : parm;
817 adj.base_index = i;
818 adj.prev_clone_index = i;
820 sc->args[i].orig_arg = node->definition ? parm : NULL_TREE;
821 sc->args[i].orig_type = parm_type;
823 switch (sc->args[i].arg_type)
825 default:
826 /* No adjustment necessary for scalar arguments. */
827 adj.op = IPA_PARAM_OP_COPY;
828 break;
829 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
830 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
831 if (node->definition)
832 sc->args[i].simd_array
833 = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
834 TREE_TYPE (parm_type),
835 sc->simdlen);
836 adj.op = IPA_PARAM_OP_COPY;
837 break;
838 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
839 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
840 case SIMD_CLONE_ARG_TYPE_VECTOR:
841 if (INTEGRAL_TYPE_P (parm_type) || POINTER_TYPE_P (parm_type))
842 veclen = sc->vecsize_int;
843 else
844 veclen = sc->vecsize_float;
845 if (known_eq (veclen, 0U))
846 veclen = sc->simdlen;
847 else
848 veclen
849 = exact_div (veclen,
850 GET_MODE_BITSIZE (SCALAR_TYPE_MODE (parm_type)));
851 if (multiple_p (veclen, sc->simdlen))
852 veclen = sc->simdlen;
853 adj.op = IPA_PARAM_OP_NEW;
854 adj.param_prefix_index = IPA_PARAM_PREFIX_SIMD;
855 if (POINTER_TYPE_P (parm_type))
856 adj.type = build_vector_type (pointer_sized_int_node, veclen);
857 else
858 adj.type = build_vector_type (parm_type, veclen);
859 sc->args[i].vector_type = adj.type;
860 k = vector_unroll_factor (sc->simdlen, veclen);
861 for (j = 1; j < k; j++)
863 vec_safe_push (new_params, adj);
864 if (j == 1)
866 memset (&adj, 0, sizeof (adj));
867 adj.op = IPA_PARAM_OP_NEW;
868 adj.user_flag = 1;
869 adj.param_prefix_index = IPA_PARAM_PREFIX_SIMD;
870 adj.base_index = i;
871 adj.prev_clone_index = i;
872 adj.type = sc->args[i].vector_type;
876 if (node->definition)
877 sc->args[i].simd_array
878 = create_tmp_simd_array (DECL_NAME (parm)
879 ? IDENTIFIER_POINTER (DECL_NAME (parm))
880 : NULL, parm_type, sc->simdlen);
882 vec_safe_push (new_params, adj);
885 if (sc->inbranch)
887 tree base_type = simd_clone_compute_base_data_type (sc->origin, sc);
888 ipa_adjusted_param adj;
889 memset (&adj, 0, sizeof (adj));
890 adj.op = IPA_PARAM_OP_NEW;
891 adj.user_flag = 1;
892 adj.param_prefix_index = IPA_PARAM_PREFIX_MASK;
894 adj.base_index = i;
895 adj.prev_clone_index = i;
896 if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type))
897 veclen = sc->vecsize_int;
898 else
899 veclen = sc->vecsize_float;
900 if (known_eq (veclen, 0U))
901 veclen = sc->simdlen;
902 else
903 veclen = exact_div (veclen,
904 GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)));
905 if (multiple_p (veclen, sc->simdlen))
906 veclen = sc->simdlen;
907 if (sc->mask_mode != VOIDmode)
908 adj.type
909 = lang_hooks.types.type_for_mode (sc->mask_mode, 1);
910 else if (POINTER_TYPE_P (base_type))
911 adj.type = build_vector_type (pointer_sized_int_node, veclen);
912 else
913 adj.type = build_vector_type (base_type, veclen);
914 vec_safe_push (new_params, adj);
916 k = vector_unroll_factor (sc->simdlen, veclen);
917 for (j = 1; j < k; j++)
918 vec_safe_push (new_params, adj);
920 /* We have previously allocated one extra entry for the mask. Use
921 it and fill it. */
922 sc->nargs++;
923 if (sc->mask_mode != VOIDmode)
924 base_type = boolean_type_node;
925 if (node->definition)
927 sc->args[i].orig_arg
928 = build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL, base_type);
929 if (sc->mask_mode == VOIDmode)
930 sc->args[i].simd_array
931 = create_tmp_simd_array ("mask", base_type, sc->simdlen);
932 else if (k > 1)
933 sc->args[i].simd_array
934 = create_tmp_simd_array ("mask", adj.type, k);
935 else
936 sc->args[i].simd_array = NULL_TREE;
938 sc->args[i].orig_type = base_type;
939 sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK;
942 if (node->definition)
944 ipa_param_body_adjustments *adjustments
945 = new ipa_param_body_adjustments (new_params, node->decl);
947 adjustments->modify_formal_parameters ();
948 return adjustments;
950 else
952 tree new_arg_types = NULL_TREE, new_reversed;
953 bool last_parm_void = false;
954 if (args.length () > 0 && args.last () == void_type_node)
955 last_parm_void = true;
957 gcc_assert (TYPE_ARG_TYPES (TREE_TYPE (node->decl)));
958 j = vec_safe_length (new_params);
959 for (i = 0; i < j; i++)
961 struct ipa_adjusted_param *adj = &(*new_params)[i];
962 tree ptype;
963 if (adj->op == IPA_PARAM_OP_COPY)
964 ptype = args[adj->base_index];
965 else
966 ptype = adj->type;
967 new_arg_types = tree_cons (NULL_TREE, ptype, new_arg_types);
969 new_reversed = nreverse (new_arg_types);
970 if (last_parm_void)
972 if (new_reversed)
973 TREE_CHAIN (new_arg_types) = void_list_node;
974 else
975 new_reversed = void_list_node;
977 TYPE_ARG_TYPES (TREE_TYPE (node->decl)) = new_reversed;
978 return NULL;
982 /* Initialize and copy the function arguments in NODE to their
983 corresponding local simd arrays. Returns a fresh gimple_seq with
984 the instruction sequence generated. */
986 static gimple_seq
987 simd_clone_init_simd_arrays (struct cgraph_node *node,
988 ipa_param_body_adjustments *adjustments)
990 gimple_seq seq = NULL;
991 unsigned i = 0, j = 0, k;
993 for (tree arg = DECL_ARGUMENTS (node->decl);
994 arg;
995 arg = DECL_CHAIN (arg), i++, j++)
997 if ((*adjustments->m_adj_params)[j].op == IPA_PARAM_OP_COPY
998 || POINTER_TYPE_P (TREE_TYPE (arg)))
999 continue;
1001 node->simdclone->args[i].vector_arg = arg;
1003 tree array = node->simdclone->args[i].simd_array;
1004 if (node->simdclone->mask_mode != VOIDmode
1005 && node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
1007 if (array == NULL_TREE)
1008 continue;
1009 unsigned int l
1010 = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (array))));
1011 for (k = 0; k <= l; k++)
1013 if (k)
1015 arg = DECL_CHAIN (arg);
1016 j++;
1018 tree t = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (array)),
1019 array, size_int (k), NULL, NULL);
1020 t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
1021 gimplify_and_add (t, &seq);
1023 continue;
1025 if (known_eq (simd_clone_subparts (TREE_TYPE (arg)),
1026 node->simdclone->simdlen))
1028 tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
1029 tree ptr = build_fold_addr_expr (array);
1030 tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
1031 build_int_cst (ptype, 0));
1032 t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
1033 gimplify_and_add (t, &seq);
1035 else
1037 unsigned int simdlen = simd_clone_subparts (TREE_TYPE (arg));
1038 unsigned int times = vector_unroll_factor (node->simdclone->simdlen,
1039 simdlen);
1040 tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
1041 for (k = 0; k < times; k++)
1043 tree ptr = build_fold_addr_expr (array);
1044 int elemsize;
1045 if (k)
1047 arg = DECL_CHAIN (arg);
1048 j++;
1050 tree elemtype = TREE_TYPE (TREE_TYPE (arg));
1051 elemsize = GET_MODE_SIZE (SCALAR_TYPE_MODE (elemtype));
1052 tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
1053 build_int_cst (ptype, k * elemsize * simdlen));
1054 t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
1055 gimplify_and_add (t, &seq);
1059 return seq;
1062 /* Callback info for ipa_simd_modify_stmt_ops below. */
1064 struct modify_stmt_info {
1065 ipa_param_body_adjustments *adjustments;
1066 gimple *stmt;
1067 gimple *after_stmt;
1068 /* True if the parent statement was modified by
1069 ipa_simd_modify_stmt_ops. */
1070 bool modified;
1073 /* Callback for walk_gimple_op.
1075 Adjust operands from a given statement as specified in the
1076 adjustments vector in the callback data. */
1078 static tree
1079 ipa_simd_modify_stmt_ops (tree *tp, int *walk_subtrees, void *data)
1081 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
1082 struct modify_stmt_info *info = (struct modify_stmt_info *) wi->info;
1083 tree *orig_tp = tp;
1084 if (TREE_CODE (*tp) == ADDR_EXPR)
1085 tp = &TREE_OPERAND (*tp, 0);
1087 if (TREE_CODE (*tp) == BIT_FIELD_REF
1088 || TREE_CODE (*tp) == IMAGPART_EXPR
1089 || TREE_CODE (*tp) == REALPART_EXPR)
1090 tp = &TREE_OPERAND (*tp, 0);
1092 tree repl = NULL_TREE;
1093 ipa_param_body_replacement *pbr = NULL;
1095 if (TREE_CODE (*tp) == PARM_DECL)
1097 pbr = info->adjustments->get_expr_replacement (*tp, true);
1098 if (pbr)
1099 repl = pbr->repl;
1101 else if (TYPE_P (*tp))
1102 *walk_subtrees = 0;
1104 if (repl)
1105 repl = unshare_expr (repl);
1106 else
1108 if (tp != orig_tp)
1110 *walk_subtrees = 0;
1111 bool modified = info->modified;
1112 info->modified = false;
1113 walk_tree (tp, ipa_simd_modify_stmt_ops, wi, wi->pset);
1114 if (!info->modified)
1116 info->modified = modified;
1117 return NULL_TREE;
1119 info->modified = modified;
1120 repl = *tp;
1122 else
1123 return NULL_TREE;
1126 if (tp != orig_tp)
1128 if (gimple_code (info->stmt) == GIMPLE_PHI
1129 && pbr
1130 && TREE_CODE (*orig_tp) == ADDR_EXPR
1131 && TREE_CODE (TREE_OPERAND (*orig_tp, 0)) == PARM_DECL
1132 && pbr->dummy)
1134 gcc_assert (TREE_CODE (pbr->dummy) == SSA_NAME);
1135 *orig_tp = pbr->dummy;
1136 info->modified = true;
1137 return NULL_TREE;
1140 repl = build_fold_addr_expr (repl);
1141 gimple *stmt;
1142 if (is_gimple_debug (info->stmt))
1144 tree vexpr = build_debug_expr_decl (TREE_TYPE (repl));
1145 stmt = gimple_build_debug_source_bind (vexpr, repl, NULL);
1146 repl = vexpr;
1148 else
1150 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl);
1151 repl = gimple_assign_lhs (stmt);
1153 gimple_stmt_iterator gsi;
1154 if (gimple_code (info->stmt) == GIMPLE_PHI)
1156 if (info->after_stmt)
1157 gsi = gsi_for_stmt (info->after_stmt);
1158 else
1159 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1160 /* Cache SSA_NAME for next time. */
1161 if (pbr
1162 && TREE_CODE (*orig_tp) == ADDR_EXPR
1163 && TREE_CODE (TREE_OPERAND (*orig_tp, 0)) == PARM_DECL)
1165 gcc_assert (!pbr->dummy);
1166 pbr->dummy = repl;
1169 else
1170 gsi = gsi_for_stmt (info->stmt);
1171 if (info->after_stmt)
1172 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1173 else
1174 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1175 if (gimple_code (info->stmt) == GIMPLE_PHI)
1176 info->after_stmt = stmt;
1177 *orig_tp = repl;
1179 else if (!useless_type_conversion_p (TREE_TYPE (*tp), TREE_TYPE (repl)))
1181 tree vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (*tp), repl);
1182 *tp = vce;
1184 else
1185 *tp = repl;
1187 info->modified = true;
1188 return NULL_TREE;
1191 /* Traverse the function body and perform all modifications as
1192 described in ADJUSTMENTS. At function return, ADJUSTMENTS will be
1193 modified such that the replacement/reduction value will now be an
1194 offset into the corresponding simd_array.
1196 This function will replace all function argument uses with their
1197 corresponding simd array elements, and ajust the return values
1198 accordingly. */
1200 static void
1201 ipa_simd_modify_function_body (struct cgraph_node *node,
1202 ipa_param_body_adjustments *adjustments,
1203 tree retval_array, tree iter)
1205 basic_block bb;
1206 unsigned int i, j;
1209 /* Register replacements for every function argument use to an offset into
1210 the corresponding simd_array. */
1211 for (i = 0, j = 0; i < node->simdclone->nargs; ++i, ++j)
1213 if (!node->simdclone->args[i].vector_arg
1214 || (*adjustments->m_adj_params)[j].user_flag)
1215 continue;
1217 tree basetype = TREE_TYPE (node->simdclone->args[i].orig_arg);
1218 tree vectype = TREE_TYPE (node->simdclone->args[i].vector_arg);
1219 tree r = build4 (ARRAY_REF, basetype, node->simdclone->args[i].simd_array,
1220 iter, NULL_TREE, NULL_TREE);
1221 adjustments->register_replacement (&(*adjustments->m_adj_params)[j], r);
1223 if (multiple_p (node->simdclone->simdlen, simd_clone_subparts (vectype)))
1224 j += vector_unroll_factor (node->simdclone->simdlen,
1225 simd_clone_subparts (vectype)) - 1;
1228 tree name;
1229 FOR_EACH_SSA_NAME (i, name, cfun)
1231 tree base_var;
1232 if (SSA_NAME_VAR (name)
1233 && TREE_CODE (SSA_NAME_VAR (name)) == PARM_DECL
1234 && (base_var
1235 = adjustments->get_replacement_ssa_base (SSA_NAME_VAR (name))))
1237 if (SSA_NAME_IS_DEFAULT_DEF (name))
1239 tree old_decl = SSA_NAME_VAR (name);
1240 bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1241 gimple_stmt_iterator gsi = gsi_after_labels (bb);
1242 tree repl = adjustments->lookup_replacement (old_decl, 0);
1243 gcc_checking_assert (repl);
1244 repl = unshare_expr (repl);
1245 set_ssa_default_def (cfun, old_decl, NULL_TREE);
1246 SET_SSA_NAME_VAR_OR_IDENTIFIER (name, base_var);
1247 SSA_NAME_IS_DEFAULT_DEF (name) = 0;
1248 gimple *stmt = gimple_build_assign (name, repl);
1249 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1251 else
1252 SET_SSA_NAME_VAR_OR_IDENTIFIER (name, base_var);
1256 struct modify_stmt_info info;
1257 info.adjustments = adjustments;
1259 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
1261 gimple_stmt_iterator gsi;
1263 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1265 gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
1266 int i, n = gimple_phi_num_args (phi);
1267 info.stmt = phi;
1268 info.after_stmt = NULL;
1269 struct walk_stmt_info wi;
1270 memset (&wi, 0, sizeof (wi));
1271 info.modified = false;
1272 wi.info = &info;
1273 for (i = 0; i < n; ++i)
1275 int walk_subtrees = 1;
1276 tree arg = gimple_phi_arg_def (phi, i);
1277 tree op = arg;
1278 ipa_simd_modify_stmt_ops (&op, &walk_subtrees, &wi);
1279 if (op != arg)
1281 SET_PHI_ARG_DEF (phi, i, op);
1282 gcc_assert (TREE_CODE (op) == SSA_NAME);
1283 if (gimple_phi_arg_edge (phi, i)->flags & EDGE_ABNORMAL)
1284 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op) = 1;
1289 gsi = gsi_start_bb (bb);
1290 while (!gsi_end_p (gsi))
1292 gimple *stmt = gsi_stmt (gsi);
1293 info.stmt = stmt;
1294 info.after_stmt = NULL;
1295 struct walk_stmt_info wi;
1297 memset (&wi, 0, sizeof (wi));
1298 info.modified = false;
1299 wi.info = &info;
1300 walk_gimple_op (stmt, ipa_simd_modify_stmt_ops, &wi);
1302 if (greturn *return_stmt = dyn_cast <greturn *> (stmt))
1304 tree retval = gimple_return_retval (return_stmt);
1305 edge e = find_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun));
1306 e->flags |= EDGE_FALLTHRU;
1307 if (!retval)
1309 gsi_remove (&gsi, true);
1310 continue;
1313 /* Replace `return foo' with `retval_array[iter] = foo'. */
1314 tree ref = build4 (ARRAY_REF, TREE_TYPE (retval),
1315 retval_array, iter, NULL, NULL);
1316 stmt = gimple_build_assign (ref, retval);
1317 gsi_replace (&gsi, stmt, true);
1318 info.modified = true;
1321 if (info.modified)
1323 update_stmt (stmt);
1324 /* If the above changed the var of a debug bind into something
1325 different, remove the debug stmt. We could also for all the
1326 replaced parameters add VAR_DECLs for debug info purposes,
1327 add debug stmts for those to be the simd array accesses and
1328 replace debug stmt var operand with that var. Debugging of
1329 vectorized loops doesn't work too well, so don't bother for
1330 now. */
1331 if ((gimple_debug_bind_p (stmt)
1332 && !DECL_P (gimple_debug_bind_get_var (stmt)))
1333 || (gimple_debug_source_bind_p (stmt)
1334 && !DECL_P (gimple_debug_source_bind_get_var (stmt))))
1336 gsi_remove (&gsi, true);
1337 continue;
1339 if (maybe_clean_eh_stmt (stmt))
1340 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1342 gsi_next (&gsi);
1347 /* Helper function of simd_clone_adjust, return linear step addend
1348 of Ith argument. */
1350 static tree
1351 simd_clone_linear_addend (struct cgraph_node *node, unsigned int i,
1352 tree addtype, basic_block entry_bb)
1354 tree ptype = NULL_TREE;
1355 switch (node->simdclone->args[i].arg_type)
1357 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
1358 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
1359 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
1360 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
1361 return build_int_cst (addtype, node->simdclone->args[i].linear_step);
1362 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
1363 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
1364 ptype = TREE_TYPE (node->simdclone->args[i].orig_arg);
1365 break;
1366 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
1367 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
1368 ptype = TREE_TYPE (TREE_TYPE (node->simdclone->args[i].orig_arg));
1369 break;
1370 default:
1371 gcc_unreachable ();
1374 unsigned int idx = node->simdclone->args[i].linear_step;
1375 tree arg = node->simdclone->args[idx].orig_arg;
1376 gcc_assert (is_gimple_reg_type (TREE_TYPE (arg)));
1377 gimple_stmt_iterator gsi = gsi_after_labels (entry_bb);
1378 gimple *g;
1379 tree ret;
1380 if (is_gimple_reg (arg))
1381 ret = get_or_create_ssa_default_def (cfun, arg);
1382 else
1384 g = gimple_build_assign (make_ssa_name (TREE_TYPE (arg)), arg);
1385 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1386 ret = gimple_assign_lhs (g);
1388 if (TREE_CODE (TREE_TYPE (arg)) == REFERENCE_TYPE)
1390 g = gimple_build_assign (make_ssa_name (TREE_TYPE (TREE_TYPE (arg))),
1391 build_simple_mem_ref (ret));
1392 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1393 ret = gimple_assign_lhs (g);
1395 if (!useless_type_conversion_p (addtype, TREE_TYPE (ret)))
1397 g = gimple_build_assign (make_ssa_name (addtype), NOP_EXPR, ret);
1398 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1399 ret = gimple_assign_lhs (g);
1401 if (POINTER_TYPE_P (ptype))
1403 tree size = TYPE_SIZE_UNIT (TREE_TYPE (ptype));
1404 if (size && TREE_CODE (size) == INTEGER_CST)
1406 g = gimple_build_assign (make_ssa_name (addtype), MULT_EXPR,
1407 ret, fold_convert (addtype, size));
1408 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1409 ret = gimple_assign_lhs (g);
1412 return ret;
1415 /* Adjust the argument types in NODE to their appropriate vector
1416 counterparts. */
1418 static void
1419 simd_clone_adjust (struct cgraph_node *node)
1421 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1423 TREE_TYPE (node->decl) = build_distinct_type_copy (TREE_TYPE (node->decl));
1424 targetm.simd_clone.adjust (node);
1426 tree retval = simd_clone_adjust_return_type (node);
1427 ipa_param_body_adjustments *adjustments
1428 = simd_clone_adjust_argument_types (node);
1429 gcc_assert (adjustments);
1431 push_gimplify_context ();
1433 gimple_seq seq = simd_clone_init_simd_arrays (node, adjustments);
1435 /* Adjust all uses of vector arguments accordingly. Adjust all
1436 return values accordingly. */
1437 tree iter = create_tmp_var (unsigned_type_node, "iter");
1438 tree iter1 = make_ssa_name (iter);
1439 tree iter2 = NULL_TREE;
1440 ipa_simd_modify_function_body (node, adjustments, retval, iter1);
1441 delete adjustments;
1443 /* Initialize the iteration variable. */
1444 basic_block entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1445 basic_block body_bb = split_block_after_labels (entry_bb)->dest;
1446 gimple_stmt_iterator gsi = gsi_after_labels (entry_bb);
1447 /* Insert the SIMD array and iv initialization at function
1448 entry. */
1449 gsi_insert_seq_before (&gsi, seq, GSI_NEW_STMT);
1451 pop_gimplify_context (NULL);
1453 gimple *g;
1454 basic_block incr_bb = NULL;
1455 class loop *loop = NULL;
1457 /* Create a new BB right before the original exit BB, to hold the
1458 iteration increment and the condition/branch. */
1459 if (EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds))
1461 basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src;
1462 incr_bb = create_empty_bb (orig_exit);
1463 incr_bb->count = profile_count::zero ();
1464 add_bb_to_loop (incr_bb, body_bb->loop_father);
1465 while (EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds))
1467 edge e = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
1468 redirect_edge_succ (e, incr_bb);
1469 incr_bb->count += e->count ();
1472 else if (node->simdclone->inbranch)
1474 incr_bb = create_empty_bb (entry_bb);
1475 incr_bb->count = profile_count::zero ();
1476 add_bb_to_loop (incr_bb, body_bb->loop_father);
1479 if (incr_bb)
1481 make_single_succ_edge (incr_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
1482 gsi = gsi_last_bb (incr_bb);
1483 iter2 = make_ssa_name (iter);
1484 g = gimple_build_assign (iter2, PLUS_EXPR, iter1,
1485 build_int_cst (unsigned_type_node, 1));
1486 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1488 /* Mostly annotate the loop for the vectorizer (the rest is done
1489 below). */
1490 loop = alloc_loop ();
1491 cfun->has_force_vectorize_loops = true;
1492 /* For now, simlen is always constant. */
1493 loop->safelen = node->simdclone->simdlen.to_constant ();
1494 loop->force_vectorize = true;
1495 loop->header = body_bb;
1498 /* Branch around the body if the mask applies. */
1499 if (node->simdclone->inbranch)
1501 gsi = gsi_last_bb (loop->header);
1502 tree mask_array
1503 = node->simdclone->args[node->simdclone->nargs - 1].simd_array;
1504 tree mask;
1505 if (node->simdclone->mask_mode != VOIDmode)
1507 tree shift_cnt;
1508 if (mask_array == NULL_TREE)
1510 tree arg = node->simdclone->args[node->simdclone->nargs
1511 - 1].vector_arg;
1512 mask = get_or_create_ssa_default_def (cfun, arg);
1513 shift_cnt = iter1;
1515 else
1517 tree maskt = TREE_TYPE (mask_array);
1518 int c = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (maskt)));
1519 /* For now, c must be constant here. */
1520 c = exact_div (node->simdclone->simdlen, c + 1).to_constant ();
1521 int s = exact_log2 (c);
1522 gcc_assert (s > 0);
1523 c--;
1524 tree idx = make_ssa_name (TREE_TYPE (iter1));
1525 g = gimple_build_assign (idx, RSHIFT_EXPR, iter1,
1526 build_int_cst (NULL_TREE, s));
1527 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1528 mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)));
1529 tree aref = build4 (ARRAY_REF,
1530 TREE_TYPE (TREE_TYPE (mask_array)),
1531 mask_array, idx, NULL, NULL);
1532 g = gimple_build_assign (mask, aref);
1533 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1534 shift_cnt = make_ssa_name (TREE_TYPE (iter1));
1535 g = gimple_build_assign (shift_cnt, BIT_AND_EXPR, iter1,
1536 build_int_cst (TREE_TYPE (iter1), c));
1537 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1539 tree shift_cnt_conv = shift_cnt;
1540 if (!useless_type_conversion_p (TREE_TYPE (mask),
1541 TREE_TYPE (shift_cnt)))
1543 shift_cnt_conv = make_ssa_name (TREE_TYPE (mask));
1544 g = gimple_build_assign (shift_cnt_conv, NOP_EXPR, shift_cnt);
1545 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1547 g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)),
1548 RSHIFT_EXPR, mask, shift_cnt_conv);
1549 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1550 mask = gimple_assign_lhs (g);
1551 g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)),
1552 BIT_AND_EXPR, mask,
1553 build_int_cst (TREE_TYPE (mask), 1));
1554 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1555 mask = gimple_assign_lhs (g);
1557 else
1559 mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)));
1560 tree aref = build4 (ARRAY_REF,
1561 TREE_TYPE (TREE_TYPE (mask_array)),
1562 mask_array, iter1, NULL, NULL);
1563 g = gimple_build_assign (mask, aref);
1564 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1565 int bitsize = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (aref)));
1566 if (!INTEGRAL_TYPE_P (TREE_TYPE (aref)))
1568 aref = build1 (VIEW_CONVERT_EXPR,
1569 build_nonstandard_integer_type (bitsize, 0),
1570 mask);
1571 mask = make_ssa_name (TREE_TYPE (aref));
1572 g = gimple_build_assign (mask, aref);
1573 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1577 g = gimple_build_cond (EQ_EXPR, mask, build_zero_cst (TREE_TYPE (mask)),
1578 NULL, NULL);
1579 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1580 edge e = make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE);
1581 e->probability = profile_probability::unlikely ().guessed ();
1582 incr_bb->count += e->count ();
1583 edge fallthru = FALLTHRU_EDGE (loop->header);
1584 fallthru->flags = EDGE_FALSE_VALUE;
1585 fallthru->probability = profile_probability::likely ().guessed ();
1588 basic_block latch_bb = NULL;
1589 basic_block new_exit_bb = NULL;
1591 /* Generate the condition. */
1592 if (incr_bb)
1594 gsi = gsi_last_bb (incr_bb);
1595 g = gimple_build_cond (LT_EXPR, iter2,
1596 build_int_cst (unsigned_type_node,
1597 node->simdclone->simdlen),
1598 NULL, NULL);
1599 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1600 edge e = split_block (incr_bb, gsi_stmt (gsi));
1601 latch_bb = e->dest;
1602 new_exit_bb = split_block_after_labels (latch_bb)->dest;
1603 loop->latch = latch_bb;
1605 redirect_edge_succ (FALLTHRU_EDGE (latch_bb), body_bb);
1607 edge new_e = make_edge (incr_bb, new_exit_bb, EDGE_FALSE_VALUE);
1609 /* FIXME: Do we need to distribute probabilities for the conditional? */
1610 new_e->probability = profile_probability::guessed_never ();
1611 /* The successor of incr_bb is already pointing to latch_bb; just
1612 change the flags.
1613 make_edge (incr_bb, latch_bb, EDGE_TRUE_VALUE); */
1614 FALLTHRU_EDGE (incr_bb)->flags = EDGE_TRUE_VALUE;
1617 gphi *phi = create_phi_node (iter1, body_bb);
1618 edge preheader_edge = find_edge (entry_bb, body_bb);
1619 edge latch_edge = NULL;
1620 add_phi_arg (phi, build_zero_cst (unsigned_type_node), preheader_edge,
1621 UNKNOWN_LOCATION);
1622 if (incr_bb)
1624 latch_edge = single_succ_edge (latch_bb);
1625 add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
1627 /* Generate the new return. */
1628 gsi = gsi_last_bb (new_exit_bb);
1629 if (retval
1630 && TREE_CODE (retval) == VIEW_CONVERT_EXPR
1631 && TREE_CODE (TREE_OPERAND (retval, 0)) == RESULT_DECL)
1632 retval = TREE_OPERAND (retval, 0);
1633 else if (retval)
1635 retval = build1 (VIEW_CONVERT_EXPR,
1636 TREE_TYPE (TREE_TYPE (node->decl)),
1637 retval);
1638 retval = force_gimple_operand_gsi (&gsi, retval, true, NULL,
1639 false, GSI_CONTINUE_LINKING);
1641 g = gimple_build_return (retval);
1642 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
1645 /* Handle aligned clauses by replacing default defs of the aligned
1646 uniform args with __builtin_assume_aligned (arg_N(D), alignment)
1647 lhs. Handle linear by adding PHIs. */
1648 for (unsigned i = 0; i < node->simdclone->nargs; i++)
1649 if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
1650 && (TREE_ADDRESSABLE (node->simdclone->args[i].orig_arg)
1651 || !is_gimple_reg_type
1652 (TREE_TYPE (node->simdclone->args[i].orig_arg))))
1654 tree orig_arg = node->simdclone->args[i].orig_arg;
1655 if (is_gimple_reg_type (TREE_TYPE (orig_arg)))
1656 iter1 = make_ssa_name (TREE_TYPE (orig_arg));
1657 else
1659 iter1 = create_tmp_var_raw (TREE_TYPE (orig_arg));
1660 gimple_add_tmp_var (iter1);
1662 gsi = gsi_after_labels (entry_bb);
1663 g = gimple_build_assign (iter1, orig_arg);
1664 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
1665 gsi = gsi_after_labels (body_bb);
1666 g = gimple_build_assign (orig_arg, iter1);
1667 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
1669 else if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
1670 && DECL_BY_REFERENCE (node->simdclone->args[i].orig_arg)
1671 && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg))
1672 == REFERENCE_TYPE
1673 && TREE_ADDRESSABLE
1674 (TREE_TYPE (TREE_TYPE (node->simdclone->args[i].orig_arg))))
1676 tree orig_arg = node->simdclone->args[i].orig_arg;
1677 tree def = ssa_default_def (cfun, orig_arg);
1678 if (def && !has_zero_uses (def))
1680 iter1 = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (orig_arg)));
1681 gimple_add_tmp_var (iter1);
1682 gsi = gsi_after_labels (entry_bb);
1683 g = gimple_build_assign (iter1, build_simple_mem_ref (def));
1684 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
1685 gsi = gsi_after_labels (body_bb);
1686 g = gimple_build_assign (build_simple_mem_ref (def), iter1);
1687 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
1690 else if (node->simdclone->args[i].alignment
1691 && node->simdclone->args[i].arg_type
1692 == SIMD_CLONE_ARG_TYPE_UNIFORM
1693 && (node->simdclone->args[i].alignment
1694 & (node->simdclone->args[i].alignment - 1)) == 0
1695 && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg))
1696 == POINTER_TYPE)
1698 unsigned int alignment = node->simdclone->args[i].alignment;
1699 tree orig_arg = node->simdclone->args[i].orig_arg;
1700 tree def = ssa_default_def (cfun, orig_arg);
1701 if (def && !has_zero_uses (def))
1703 tree fn = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
1704 gimple_seq seq = NULL;
1705 bool need_cvt = false;
1706 gcall *call
1707 = gimple_build_call (fn, 2, def, size_int (alignment));
1708 g = call;
1709 if (!useless_type_conversion_p (TREE_TYPE (orig_arg),
1710 ptr_type_node))
1711 need_cvt = true;
1712 tree t = make_ssa_name (need_cvt ? ptr_type_node : orig_arg);
1713 gimple_call_set_lhs (g, t);
1714 gimple_seq_add_stmt_without_update (&seq, g);
1715 if (need_cvt)
1717 t = make_ssa_name (orig_arg);
1718 g = gimple_build_assign (t, NOP_EXPR, gimple_call_lhs (g));
1719 gimple_seq_add_stmt_without_update (&seq, g);
1721 gsi_insert_seq_on_edge_immediate
1722 (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), seq);
1724 entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1725 node->create_edge (cgraph_node::get_create (fn),
1726 call, entry_bb->count);
1728 imm_use_iterator iter;
1729 use_operand_p use_p;
1730 gimple *use_stmt;
1731 tree repl = gimple_get_lhs (g);
1732 FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
1733 if (is_gimple_debug (use_stmt) || use_stmt == call)
1734 continue;
1735 else
1736 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
1737 SET_USE (use_p, repl);
1740 else if ((node->simdclone->args[i].arg_type
1741 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
1742 || (node->simdclone->args[i].arg_type
1743 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP)
1744 || (node->simdclone->args[i].arg_type
1745 == SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP)
1746 || (node->simdclone->args[i].arg_type
1747 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP))
1749 tree orig_arg = node->simdclone->args[i].orig_arg;
1750 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
1751 || POINTER_TYPE_P (TREE_TYPE (orig_arg)));
1752 tree def = NULL_TREE;
1753 if (TREE_ADDRESSABLE (orig_arg))
1755 def = make_ssa_name (TREE_TYPE (orig_arg));
1756 iter1 = make_ssa_name (TREE_TYPE (orig_arg));
1757 if (incr_bb)
1758 iter2 = make_ssa_name (TREE_TYPE (orig_arg));
1759 gsi = gsi_after_labels (entry_bb);
1760 g = gimple_build_assign (def, orig_arg);
1761 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
1763 else
1765 def = ssa_default_def (cfun, orig_arg);
1766 if (!def || has_zero_uses (def))
1767 def = NULL_TREE;
1768 else
1770 iter1 = make_ssa_name (orig_arg);
1771 if (incr_bb)
1772 iter2 = make_ssa_name (orig_arg);
1775 if (def)
1777 phi = create_phi_node (iter1, body_bb);
1778 add_phi_arg (phi, def, preheader_edge, UNKNOWN_LOCATION);
1779 if (incr_bb)
1781 add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
1782 enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
1783 ? PLUS_EXPR : POINTER_PLUS_EXPR;
1784 tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
1785 ? TREE_TYPE (orig_arg) : sizetype;
1786 tree addcst = simd_clone_linear_addend (node, i, addtype,
1787 entry_bb);
1788 gsi = gsi_last_bb (incr_bb);
1789 g = gimple_build_assign (iter2, code, iter1, addcst);
1790 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1793 imm_use_iterator iter;
1794 use_operand_p use_p;
1795 gimple *use_stmt;
1796 if (TREE_ADDRESSABLE (orig_arg))
1798 gsi = gsi_after_labels (body_bb);
1799 g = gimple_build_assign (orig_arg, iter1);
1800 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
1802 else
1803 FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
1804 if (use_stmt == phi)
1805 continue;
1806 else
1807 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
1808 SET_USE (use_p, iter1);
1811 else if (node->simdclone->args[i].arg_type
1812 == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
1813 || (node->simdclone->args[i].arg_type
1814 == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP))
1816 tree orig_arg = node->simdclone->args[i].orig_arg;
1817 tree def = ssa_default_def (cfun, orig_arg);
1818 gcc_assert (!TREE_ADDRESSABLE (orig_arg)
1819 && TREE_CODE (TREE_TYPE (orig_arg)) == REFERENCE_TYPE);
1820 if (def && !has_zero_uses (def))
1822 tree rtype = TREE_TYPE (TREE_TYPE (orig_arg));
1823 iter1 = make_ssa_name (orig_arg);
1824 if (incr_bb)
1825 iter2 = make_ssa_name (orig_arg);
1826 tree iter3 = make_ssa_name (rtype);
1827 tree iter4 = make_ssa_name (rtype);
1828 tree iter5 = incr_bb ? make_ssa_name (rtype) : NULL_TREE;
1829 gsi = gsi_after_labels (entry_bb);
1830 gimple *load
1831 = gimple_build_assign (iter3, build_simple_mem_ref (def));
1832 gsi_insert_before (&gsi, load, GSI_NEW_STMT);
1834 tree array = node->simdclone->args[i].simd_array;
1835 TREE_ADDRESSABLE (array) = 1;
1836 tree ptr = build_fold_addr_expr (array);
1837 phi = create_phi_node (iter1, body_bb);
1838 add_phi_arg (phi, ptr, preheader_edge, UNKNOWN_LOCATION);
1839 if (incr_bb)
1841 add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
1842 g = gimple_build_assign (iter2, POINTER_PLUS_EXPR, iter1,
1843 TYPE_SIZE_UNIT (TREE_TYPE (iter3)));
1844 gsi = gsi_last_bb (incr_bb);
1845 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1848 phi = create_phi_node (iter4, body_bb);
1849 add_phi_arg (phi, iter3, preheader_edge, UNKNOWN_LOCATION);
1850 if (incr_bb)
1852 add_phi_arg (phi, iter5, latch_edge, UNKNOWN_LOCATION);
1853 enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (iter3))
1854 ? PLUS_EXPR : POINTER_PLUS_EXPR;
1855 tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (iter3))
1856 ? TREE_TYPE (iter3) : sizetype;
1857 tree addcst = simd_clone_linear_addend (node, i, addtype,
1858 entry_bb);
1859 g = gimple_build_assign (iter5, code, iter4, addcst);
1860 gsi = gsi_last_bb (incr_bb);
1861 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1864 g = gimple_build_assign (build_simple_mem_ref (iter1), iter4);
1865 gsi = gsi_after_labels (body_bb);
1866 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1868 imm_use_iterator iter;
1869 use_operand_p use_p;
1870 gimple *use_stmt;
1871 FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
1872 if (use_stmt == load)
1873 continue;
1874 else
1875 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
1876 SET_USE (use_p, iter1);
1878 if (!TYPE_READONLY (rtype) && incr_bb)
1880 tree v = make_ssa_name (rtype);
1881 tree aref = build4 (ARRAY_REF, rtype, array,
1882 size_zero_node, NULL_TREE,
1883 NULL_TREE);
1884 gsi = gsi_after_labels (new_exit_bb);
1885 g = gimple_build_assign (v, aref);
1886 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1887 g = gimple_build_assign (build_simple_mem_ref (def), v);
1888 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
1893 calculate_dominance_info (CDI_DOMINATORS);
1894 if (loop)
1895 add_loop (loop, loop->header->loop_father);
1896 update_ssa (TODO_update_ssa);
1898 pop_cfun ();
1901 /* If the function in NODE is tagged as an elemental SIMD function,
1902 create the appropriate SIMD clones. */
1904 void
1905 expand_simd_clones (struct cgraph_node *node)
1907 tree attr;
1908 bool explicit_p = true;
1910 if (node->inlined_to
1911 || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
1912 return;
1914 attr = lookup_attribute ("omp declare simd",
1915 DECL_ATTRIBUTES (node->decl));
1917 /* See if we can add an "omp declare simd" directive implicitly
1918 before giving up. */
1919 /* FIXME: OpenACC "#pragma acc routine" translates into
1920 "omp declare target", but appears also to have some other effects
1921 that conflict with generating SIMD clones, causing ICEs. So don't
1922 do this if we've got OpenACC instead of OpenMP. */
1923 if (attr == NULL_TREE
1924 #ifdef ACCEL_COMPILER
1925 && (flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_ANY
1926 || flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_NOHOST)
1927 #else
1928 && (flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_ANY
1929 || flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_HOST)
1930 #endif
1931 && !oacc_get_fn_attrib (node->decl)
1932 && ok_for_auto_simd_clone (node))
1934 attr = tree_cons (get_identifier ("omp declare simd"), NULL,
1935 DECL_ATTRIBUTES (node->decl));
1936 DECL_ATTRIBUTES (node->decl) = attr;
1937 explicit_p = false;
1940 if (attr == NULL_TREE)
1941 return;
1943 /* Ignore
1944 #pragma omp declare simd
1945 extern int foo ();
1946 in C, there we don't know the argument types at all. */
1947 if (!node->definition
1948 && TYPE_ARG_TYPES (TREE_TYPE (node->decl)) == NULL_TREE)
1949 return;
1951 /* Call this before creating clone_info, as it might ggc_collect. */
1952 if (node->definition && node->has_gimple_body_p ())
1953 node->get_body ();
1957 /* Start with parsing the "omp declare simd" attribute(s). */
1958 bool inbranch_clause_specified;
1959 struct cgraph_simd_clone *clone_info
1960 = simd_clone_clauses_extract (node, TREE_VALUE (attr),
1961 &inbranch_clause_specified);
1962 if (clone_info == NULL)
1963 continue;
1965 poly_uint64 orig_simdlen = clone_info->simdlen;
1966 tree base_type = simd_clone_compute_base_data_type (node, clone_info);
1968 /* The target can return 0 (no simd clones should be created),
1969 1 (just one ISA of simd clones should be created) or higher
1970 count of ISA variants. In that case, clone_info is initialized
1971 for the first ISA variant. */
1972 int count
1973 = targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info,
1974 base_type, 0,
1975 explicit_p);
1976 if (count == 0)
1977 continue;
1979 /* Loop over all COUNT ISA variants, and if !INBRANCH_CLAUSE_SPECIFIED,
1980 also create one inbranch and one !inbranch clone of it. */
1981 for (int i = 0; i < count * 2; i++)
1983 struct cgraph_simd_clone *clone = clone_info;
1984 if (inbranch_clause_specified && (i & 1) != 0)
1985 continue;
1987 if (i != 0)
1989 clone = simd_clone_struct_alloc (clone_info->nargs
1990 + ((i & 1) != 0));
1991 simd_clone_struct_copy (clone, clone_info);
1992 /* Undo changes targetm.simd_clone.compute_vecsize_and_simdlen
1993 and simd_clone_adjust_argument_types did to the first
1994 clone's info. */
1995 clone->nargs -= clone_info->inbranch;
1996 clone->simdlen = orig_simdlen;
1997 /* And call the target hook again to get the right ISA. */
1998 targetm.simd_clone.compute_vecsize_and_simdlen (node, clone,
1999 base_type,
2000 i / 2,
2001 explicit_p);
2002 if ((i & 1) != 0)
2003 clone->inbranch = 1;
2006 /* simd_clone_mangle might fail if such a clone has been created
2007 already. */
2008 tree id = simd_clone_mangle (node, clone);
2009 if (id == NULL_TREE)
2011 if (i == 0)
2012 clone->nargs += clone->inbranch;
2013 continue;
2016 /* Only when we are sure we want to create the clone actually
2017 clone the function (or definitions) or create another
2018 extern FUNCTION_DECL (for prototypes without definitions). */
2019 struct cgraph_node *n = simd_clone_create (node, !explicit_p);
2020 if (n == NULL)
2022 if (i == 0)
2023 clone->nargs += clone->inbranch;
2024 continue;
2027 n->simdclone = clone;
2028 clone->origin = node;
2029 clone->next_clone = NULL;
2030 if (node->simd_clones == NULL)
2032 clone->prev_clone = n;
2033 node->simd_clones = n;
2035 else
2037 clone->prev_clone = node->simd_clones->simdclone->prev_clone;
2038 clone->prev_clone->simdclone->next_clone = n;
2039 node->simd_clones->simdclone->prev_clone = n;
2041 symtab->change_decl_assembler_name (n->decl, id);
2042 /* And finally adjust the return type, parameters and for
2043 definitions also function body. */
2044 if (node->definition)
2045 simd_clone_adjust (n);
2046 else
2048 TREE_TYPE (n->decl)
2049 = build_distinct_type_copy (TREE_TYPE (n->decl));
2050 targetm.simd_clone.adjust (n);
2051 simd_clone_adjust_return_type (n);
2052 simd_clone_adjust_argument_types (n);
2054 if (dump_file)
2055 fprintf (dump_file, "\nGenerated %s clone %s\n",
2056 (TREE_PUBLIC (n->decl) ? "global" : "local"),
2057 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl)));
2060 while ((attr = lookup_attribute ("omp declare simd", TREE_CHAIN (attr))));
2063 /* Entry point for IPA simd clone creation pass. */
2065 static unsigned int
2066 ipa_omp_simd_clone (void)
2068 struct cgraph_node *node;
2069 FOR_EACH_FUNCTION (node)
2070 expand_simd_clones (node);
2071 return 0;
2074 namespace {
2076 const pass_data pass_data_omp_simd_clone =
2078 SIMPLE_IPA_PASS, /* type */
2079 "simdclone", /* name */
2080 OPTGROUP_OMP, /* optinfo_flags */
2081 TV_NONE, /* tv_id */
2082 ( PROP_ssa | PROP_cfg ), /* properties_required */
2083 0, /* properties_provided */
2084 0, /* properties_destroyed */
2085 0, /* todo_flags_start */
2086 0, /* todo_flags_finish */
2089 class pass_omp_simd_clone : public simple_ipa_opt_pass
2091 public:
2092 pass_omp_simd_clone(gcc::context *ctxt)
2093 : simple_ipa_opt_pass(pass_data_omp_simd_clone, ctxt)
2096 /* opt_pass methods: */
2097 bool gate (function *) final override;
2098 unsigned int execute (function *) final override
2100 return ipa_omp_simd_clone ();
2104 bool
2105 pass_omp_simd_clone::gate (function *)
2107 return targetm.simd_clone.compute_vecsize_and_simdlen != NULL;
2110 } // anon namespace
2112 simple_ipa_opt_pass *
2113 make_pass_omp_simd_clone (gcc::context *ctxt)
2115 return new pass_omp_simd_clone (ctxt);