tree-optimization/115537 - ICE with SLP condition reduction vectorization
[official-gcc.git] / gcc / config / aarch64 / falkor-tag-collision-avoidance.cc
blobc58a3001184baf5bfec9e96d6bf90fa7200a24a1
1 /* Tag Collision Avoidance pass for Falkor.
2 Copyright (C) 2018-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
22 #include "config.h"
23 #define INCLUDE_LIST
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "tree-pass.h"
31 #include "aarch64-protos.h"
32 #include "hash-map.h"
33 #include "cfgloop.h"
34 #include "cfgrtl.h"
35 #include "rtl-iter.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "recog.h"
41 #include "function-abi.h"
42 #include "regrename.h"
43 #include "print-rtl.h"
45 /* The Falkor hardware prefetching system uses the encoding of the registers
46 and offsets of loads to decide which of the multiple hardware prefetchers to
47 assign the load to. This has the positive effect of accelerating prefetches
48 when all related loads with uniform strides are assigned to the same
49 prefetcher unit. The down side is that because of the way the assignment
50 works, multiple unrelated loads may end up on the same prefetch unit, thus
51 causing the unit to bounce between different sets of addresses and never
52 train correctly. The point of this pass is to avoid such collisions so that
53 unrelated loads are spread out to different prefetchers. It also makes a
54 rudimentary attempt to ensure that related loads with the same tags don't
55 get moved out unnecessarily.
57 Perhaps a future enhancement would be to make a more concerted attempt to
58 get related loads under the same tag. See the memcpy/memset implementation
59 for falkor in glibc to understand the kind of impact this can have on
60 falkor.
62 The assignment of loads is based on a tag that is computed from the encoding
63 of the first destination register (the only destination in case of LDR), the
64 base register and the offset (either the register or the immediate value, as
65 encoded in the instruction). This is what the 14 bit tag looks like:
67 |<- 6 bits ->|<- 4b ->|<- 4b ->|
68 --------------------------------
69 | OFFSET | SRC | DST |
70 --------------------------------
72 For all cases, the SRC and DST are the 4 LSB of the encoding of the register
73 in the instruction. Offset computation is more involved and is as follows:
75 - For register offset addressing: 4 LSB of the offset register with the MSB
76 of the 6 bits set to 1.
78 - For immediate offset: 4 LSB of the encoded immediate offset. The encoding
79 depends on the width of the load and is expressed as multiples of the
80 width.
82 - For loads with update: 4 LSB of the offset. The encoding here is the
83 exact number by which the base is offset and incremented.
85 Based on the above it is clear that registers 0 and 16 will result in
86 collisions, 1 and 17 and so on. This pass detects such collisions within a
87 def/use chain of the source register in a loop and tries to resolve the
88 collision by renaming one of the destination registers. */
90 /* Get the destination part of the tag. */
91 #define TAG_GET_DEST(__tag) ((__tag) & 0xf)
93 /* Get the tag with the destination part updated. */
94 #define TAG_UPDATE_DEST(__tag, __dest) (((__tag) & ~0xf) | (__dest & 0xf))
96 #define MAX_PREFETCH_STRIDE 2048
98 /* The instruction information structure. This is used to cache information
99 about the INSN that we derive when traversing through all of the insns in
100 loops. */
101 class tag_insn_info
103 public:
104 rtx_insn *insn;
105 rtx dest;
106 rtx base;
107 rtx offset;
108 bool writeback;
109 bool ldp;
111 tag_insn_info (rtx_insn *i, rtx d, rtx b, rtx o, bool w, bool p)
112 : insn (i), dest (d), base (b), offset (o), writeback (w), ldp (p)
115 /* Compute the tag based on BASE, DEST and OFFSET of the load. */
116 unsigned tag ()
118 unsigned int_offset = 0;
119 rtx offset = this->offset;
120 unsigned dest = REGNO (this->dest);
121 unsigned base = REGNO (this->base);
122 machine_mode dest_mode = GET_MODE (this->dest);
124 /* Falkor does not support SVE; GET_LOAD_INFO ensures that the
125 destination mode is constant here. */
126 unsigned dest_mode_size = GET_MODE_SIZE (dest_mode).to_constant ();
128 /* For loads of larger than 16 bytes, the DEST part of the tag is 0. */
129 if ((dest_mode_size << this->ldp) > 16)
130 dest = 0;
132 if (offset && REG_P (offset))
133 int_offset = (1 << 5) | REGNO (offset);
134 else if (offset && CONST_INT_P (offset))
136 int_offset = INTVAL (offset);
137 int_offset /= dest_mode_size;
138 if (!this->writeback)
139 int_offset >>= 2;
141 return ((dest & 0xf)
142 | ((base & 0xf) << 4)
143 | ((int_offset & 0x3f) << 8));
147 /* Hash map to traverse and process instructions with colliding tags. */
148 typedef hash_map <rtx, auto_vec <tag_insn_info *> > tag_map_t;
150 /* Vector of instructions with colliding tags. */
151 typedef auto_vec <tag_insn_info *> insn_info_list_t;
153 /* Pair of instruction information and unavailable register set to pass to
154 CHECK_COLLIDING_TAGS. */
155 typedef std::pair <tag_insn_info *, HARD_REG_SET *> arg_pair_t;
158 /* Callback to free all tag_insn_info objects. */
159 bool
160 free_insn_info (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
161 void *arg ATTRIBUTE_UNUSED)
163 while (v->length () > 0)
164 delete v->pop ();
166 return true;
170 /* Add all aliases of the register to the unavailable register set. REG is the
171 smallest register number that can then be used to reference its aliases.
172 UNAVAILABLE is the hard register set to add the ignored register numbers to
173 and MODE is the mode in which the registers would have been used. */
174 static void
175 ignore_all_aliases (HARD_REG_SET *unavailable, machine_mode mode, unsigned reg)
177 add_to_hard_reg_set (unavailable, mode, reg);
178 add_to_hard_reg_set (unavailable, mode, reg + 16);
179 add_to_hard_reg_set (unavailable, mode, reg + 32);
180 add_to_hard_reg_set (unavailable, mode, reg + 48);
184 /* Callback to check which destination registers are unavailable to us for
185 renaming because of the base and offset colliding. This is a callback that
186 gets called for every name value pair (T, V) in the TAG_MAP. The ARG is an
187 std::pair of the tag_insn_info of the original insn and the hard register
188 set UNAVAILABLE that is used to record hard register numbers that cannot be
189 used for the renaming. This always returns true since we want to traverse
190 through the entire TAG_MAP. */
191 bool
192 check_colliding_tags (const rtx &t, const insn_info_list_t &v, arg_pair_t *arg)
194 HARD_REG_SET *unavailable = arg->second;
195 unsigned orig_tag = arg->first->tag ();
196 unsigned tag = INTVAL (t);
197 machine_mode mode = GET_MODE (arg->first->dest);
199 /* Can't collide with emptiness. */
200 if (v.length () == 0)
201 return true;
203 /* Drop all aliased destination registers that result in the same
204 tag. It is not necessary to drop all of them but we do anyway
205 because it is quicker than checking ranges. */
206 if (TAG_UPDATE_DEST (tag, 0) == TAG_UPDATE_DEST (orig_tag, 0))
207 ignore_all_aliases (unavailable, mode, TAG_GET_DEST (tag));
209 return true;
213 /* Initialize and build a set of hard register numbers UNAVAILABLE to avoid for
214 renaming. INSN_INFO is the original insn, TAG_MAP is the map of the list of
215 insns indexed by their tags, HEAD is the def/use chain head of the
216 destination register of the original insn. The routine returns the super
217 class of register classes that may be used during the renaming. */
218 static enum reg_class
219 init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head,
220 HARD_REG_SET *unavailable)
222 unsigned dest = head->regno;
223 enum reg_class super_class = NO_REGS;
224 machine_mode mode = GET_MODE (insn_info->dest);
226 CLEAR_HARD_REG_SET (*unavailable);
228 for (struct du_chain *tmp = head->first; tmp; tmp = tmp->next_use)
230 if (DEBUG_INSN_P (tmp->insn))
231 continue;
233 *unavailable |= ~reg_class_contents[tmp->cl];
234 super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
237 for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
238 if (fixed_regs[i] || global_regs[i])
239 add_to_hard_reg_set (unavailable, mode, i);
241 arg_pair_t arg = arg_pair_t (insn_info, unavailable);
243 /* Exclude all registers that would lead to collisions with other loads. */
244 tag_map.traverse <arg_pair_t *, check_colliding_tags> (&arg);
246 /* Finally, also ignore all aliases of the current reg. */
247 ignore_all_aliases (unavailable, mode, dest & 0xf);
249 return super_class;
253 /* Find a suitable and available register and rename the chain of occurrences
254 of the register defined in the def/use chain headed by HEAD in which INSN
255 exists. CUR_TAG, TAGS and TAG_MAP are used to determine which registers are
256 unavailable due to a potential collision due to the rename. The routine
257 returns the register number in case of a successful rename or -1 to indicate
258 failure. */
259 static int
260 rename_chain (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head)
262 unsigned dest_regno = head->regno;
264 if (head->cannot_rename || head->renamed)
265 return -1;
267 HARD_REG_SET unavailable;
269 enum reg_class super_class = init_unavailable (insn_info, tag_map, head,
270 &unavailable);
272 unsigned new_regno = find_rename_reg (head, super_class, &unavailable,
273 dest_regno, false);
275 /* Attempt to rename as long as regrename doesn't just throw the same
276 register at us. */
277 if (new_regno != dest_regno && regrename_do_replace (head, new_regno))
279 if (dump_file && (dump_flags & TDF_DETAILS))
280 fprintf (dump_file, "\tInsn %d: Renamed %d to %d\n",
281 INSN_UID (insn_info->insn), dest_regno, new_regno);
283 return new_regno;
286 return -1;
290 /* Return true if REGNO is not safe to rename. */
291 static bool
292 unsafe_rename_p (unsigned regno)
294 /* Avoid renaming registers used for argument passing and return value. In
295 future we could be a little less conservative and walk through the basic
296 blocks to see if there are any call or syscall sites. */
297 if (regno <= R8_REGNUM
298 || (regno >= V0_REGNUM && regno < V8_REGNUM))
299 return true;
301 /* Don't attempt to rename registers that may have specific meanings. */
302 switch (regno)
304 case LR_REGNUM:
305 case HARD_FRAME_POINTER_REGNUM:
306 case FRAME_POINTER_REGNUM:
307 case STACK_POINTER_REGNUM:
308 return true;
311 return false;
315 /* Go through the def/use chains for the register and find the chain for this
316 insn to rename. The function returns the hard register number in case of a
317 successful rename and -1 otherwise. */
318 static int
319 rename_dest (tag_insn_info *insn_info, tag_map_t &tag_map)
321 struct du_chain *chain = NULL;
322 du_head_p head = NULL;
323 int i;
325 unsigned dest_regno = REGNO (insn_info->dest);
327 if (unsafe_rename_p (dest_regno))
328 return -1;
330 /* Search the chain where this instruction is (one of) the root. */
331 rtx_insn *insn = insn_info->insn;
332 operand_rr_info *dest_op_info = insn_rr[INSN_UID (insn)].op_info;
334 for (i = 0; i < dest_op_info->n_chains; i++)
336 /* The register tracked by this chain does not match the
337 destination register of insn. */
338 if (dest_op_info->heads[i]->regno != dest_regno)
339 continue;
341 head = dest_op_info->heads[i];
342 /* The chain was merged in another, find the new head. */
343 if (!head->first)
344 head = regrename_chain_from_id (head->id);
346 for (chain = head->first; chain; chain = chain->next_use)
347 /* Found the insn in the chain, so try renaming the register in this
348 chain. */
349 if (chain->insn == insn)
350 return rename_chain (insn_info, tag_map, head);
353 return -1;
357 /* Flag to track if the map has changed. */
358 static bool map_changed = false;
360 /* The actual reallocation logic. For each vector of collisions V, try to
361 resolve the collision by attempting to rename the destination register of
362 all but one of the loads. This is a callback that is invoked for each
363 name-value pair (T, V) in TAG_MAP. The function returns true whenever it
364 returns unchanged and false otherwise to halt traversal. */
365 bool
366 avoid_collisions_1 (const rtx &t, insn_info_list_t *v, tag_map_t *tag_map)
368 /* We need at least two loads to cause a tag collision, return unchanged. */
369 if (v->length () < 2)
370 return true;
372 tag_insn_info *vec_start = v->pop ();
373 tag_insn_info *insn_info = vec_start;
375 /* Try to rename at least one register to reduce the collision. If we
376 iterate all the way through, we end up dropping one of the loads from the
377 list. This is fine because we want at most one element to ensure that a
378 subsequent rename attempt does not end up worsening the collision. */
381 int new_regno;
383 if ((new_regno = rename_dest (insn_info, *tag_map)) != -1)
385 rtx new_tag = GEN_INT (TAG_UPDATE_DEST (INTVAL (t), new_regno));
387 tag_map->get_or_insert (new_tag).safe_push (insn_info);
388 df_set_regs_ever_live (new_regno, true);
389 map_changed = true;
390 return false;
393 v->safe_insert (0, insn_info);
394 insn_info = v->pop ();
396 while (insn_info != vec_start);
398 if (dump_file)
399 fprintf (dump_file, "\t>> Failed to rename destination in insn %d\n\t>>",
400 INSN_UID (insn_info->insn));
402 /* Drop the last element and move on to the next tag. */
403 delete insn_info;
404 return true;
408 /* For each set of collisions, attempt to rename the registers or insert a move
409 to avoid the collision. We repeatedly traverse through TAG_MAP using
410 AVOID_COLLISIONS_1 trying to rename registers to avoid collisions until a
411 full traversal results in no change in the map. */
412 static void
413 avoid_collisions (tag_map_t &tag_map)
417 map_changed = false;
418 tag_map.traverse <tag_map_t *, avoid_collisions_1> (&tag_map);
420 while (map_changed);
425 /* Find the use def chain in which INSN exists and then see if there is a
426 definition inside the loop and outside it. We use this as a simple
427 approximation to determine whether the base register is an IV. The basic
428 idea is to find INSN in the use-def chains for its base register and find
429 all definitions that reach it. Of all these definitions, there should be at
430 least one definition that is a simple addition of a constant value, either
431 as a binary operation or a pre or post update.
433 The function returns true if the base register is estimated to be an IV. */
434 static bool
435 iv_p (rtx_insn *insn, rtx reg, struct loop *loop)
437 df_ref ause;
438 unsigned regno = REGNO (reg);
440 /* Ignore loads from the stack. */
441 if (regno == SP_REGNUM)
442 return false;
444 for (ause = DF_REG_USE_CHAIN (regno); ause; ause = DF_REF_NEXT_REG (ause))
446 if (!DF_REF_INSN_INFO (ause)
447 || !NONDEBUG_INSN_P (DF_REF_INSN (ause)))
448 continue;
450 if (insn != DF_REF_INSN (ause))
451 continue;
453 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
454 df_ref def_rec;
456 FOR_EACH_INSN_INFO_DEF (def_rec, insn_info)
458 rtx_insn *insn = DF_REF_INSN (def_rec);
459 basic_block bb = BLOCK_FOR_INSN (insn);
461 if (dominated_by_p (CDI_DOMINATORS, bb, loop->header)
462 && bb->loop_father == loop)
464 if (recog_memoized (insn) < 0)
465 continue;
467 rtx pat = PATTERN (insn);
469 /* Prefetch or clobber; unlikely to be a constant stride. The
470 falkor software prefetcher tuning is pretty conservative, so
471 its presence indicates that the access pattern is probably
472 strided but most likely with an unknown stride size or a
473 stride size that is quite large. */
474 if (GET_CODE (pat) != SET)
475 continue;
477 rtx x = SET_SRC (pat);
478 if (GET_CODE (x) == ZERO_EXTRACT
479 || GET_CODE (x) == ZERO_EXTEND
480 || GET_CODE (x) == SIGN_EXTEND)
481 x = XEXP (x, 0);
483 /* Loading the value from memory; unlikely to be a constant
484 stride. */
485 if (MEM_P (x))
486 continue;
488 /* An increment or decrement by a constant MODE_SIZE amount or
489 the result of a binary expression is likely to be an IV. */
490 if (GET_CODE (x) == POST_INC
491 || GET_CODE (x) == POST_DEC
492 || GET_CODE (x) == PRE_INC
493 || GET_CODE (x) == PRE_DEC)
494 return true;
495 else if (BINARY_P (x)
496 && (CONST_INT_P (XEXP (x, 0))
497 || CONST_INT_P (XEXP (x, 1))))
499 rtx stride = (CONST_INT_P (XEXP (x, 0))
500 ? XEXP (x, 0) : XEXP (x, 1));
502 /* Don't bother with very long strides because the prefetcher
503 is unable to train on them anyway. */
504 if (INTVAL (stride) < MAX_PREFETCH_STRIDE)
505 return true;
509 return false;
511 return false;
515 /* Return true if SRC is a strided load in the LOOP, false otherwise.
516 If it is a strided load, set the BASE and OFFSET. Also, if this is
517 a pre/post increment load, set PRE_POST to true. */
518 static bool
519 valid_src_p (rtx src, rtx_insn *insn, struct loop *loop, bool *pre_post,
520 rtx *base, rtx *offset, bool load_pair)
522 subrtx_var_iterator::array_type array;
523 rtx x = NULL_RTX;
525 FOR_EACH_SUBRTX_VAR (iter, array, src, NONCONST)
526 if (MEM_P (*iter))
528 x = *iter;
529 break;
532 if (!x)
533 return false;
535 struct aarch64_address_info addr;
536 machine_mode mode = GET_MODE (x);
538 if (!aarch64_classify_address (&addr, XEXP (x, 0), mode, true))
539 return false;
541 if (addr.type != ADDRESS_REG_IMM
542 && addr.type != ADDRESS_REG_WB
543 && addr.type != ADDRESS_REG_REG
544 && addr.type != ADDRESS_REG_UXTW
545 && addr.type != ADDRESS_REG_SXTW)
546 return false;
548 unsigned regno = REGNO (addr.base);
549 if (global_regs[regno] || fixed_regs[regno])
550 return false;
552 if (addr.type == ADDRESS_REG_WB)
554 unsigned code = GET_CODE (XEXP (x, 0));
556 *pre_post = true;
557 *base = addr.base;
559 if (code == PRE_MODIFY || code == POST_MODIFY)
560 *offset = addr.offset;
561 else
563 /*Writeback is only supported for fixed-width modes. */
564 unsigned int_offset = GET_MODE_SIZE (mode).to_constant ();
566 /* For post-incremented load pairs we would increment the base twice
567 over, so make that adjustment. */
568 if (load_pair && (code == POST_INC || code == POST_DEC))
569 int_offset *= 2;
571 *offset = GEN_INT (int_offset);
573 return true;
575 else if (addr.type == ADDRESS_REG_IMM || addr.type == ADDRESS_REG_REG)
577 /* Check if the load is strided. */
578 if (!iv_p (insn, addr.base, loop))
579 return false;
581 *base = addr.base;
582 *offset = addr.offset;
583 return true;
586 return false;
590 /* Return true if INSN is a strided load in LOOP. If it is a strided load, set
591 the DEST, BASE and OFFSET. Also, if this is a pre/post increment load, set
592 PRE_POST to true.
594 The routine does checks on the destination of the insn and depends on
595 STRIDED_LOAD_P to check the source and fill in the BASE and OFFSET. */
596 static bool
597 get_load_info (rtx_insn *insn, struct loop *loop, rtx *dest, rtx *base,
598 rtx *offset, bool *pre_post, bool *ldp)
600 if (!INSN_P (insn) || recog_memoized (insn) < 0)
601 return false;
603 rtx pat = PATTERN (insn);
604 unsigned code = GET_CODE (pat);
605 bool load_pair = (code == PARALLEL);
607 /* For a load pair we need only the first base and destination
608 registers. We however need to ensure that our pre/post increment
609 offset is doubled; we do that in STRIDED_LOAD_P. */
610 if (load_pair)
612 pat = XVECEXP (pat, 0, 0);
613 code = GET_CODE (pat);
616 if (code != SET)
617 return false;
619 rtx dest_rtx = SET_DEST (pat);
621 if (!REG_P (dest_rtx))
622 return false;
624 unsigned regno = REGNO (dest_rtx);
625 machine_mode mode = GET_MODE (dest_rtx);
626 machine_mode inner_mode = GET_MODE_INNER (mode);
628 /* Falkor does not support SVE vectors. */
629 if (!GET_MODE_SIZE (mode).is_constant ())
630 return false;
632 /* Ignore vector struct or lane loads. */
633 if (GET_MODE_SIZE (mode).to_constant ()
634 != GET_MODE_SIZE (inner_mode).to_constant ())
635 return false;
637 /* The largest width we want to bother with is a load of a pair of
638 quad-words. */
639 if ((GET_MODE_SIZE (mode).to_constant () << load_pair)
640 > GET_MODE_SIZE (OImode))
641 return false;
643 /* Ignore loads into the stack pointer because it is unlikely to be a
644 stream. */
645 if (regno == SP_REGNUM)
646 return false;
648 if (valid_src_p (SET_SRC (pat), insn, loop, pre_post, base, offset,
649 load_pair))
651 *dest = dest_rtx;
652 *ldp = load_pair;
654 return true;
657 return false;
661 /* Return whether INSN and CAND are in the same def/use chain. */
662 static bool
663 in_same_chain (rtx_insn *insn, rtx_insn *cand, unsigned regno)
665 struct du_chain *chain = NULL;
666 du_head_p head = NULL;
667 int i;
669 /* Search the chain where this instruction is (one of) the root. */
670 operand_rr_info *op_info = insn_rr[INSN_UID (insn)].op_info;
672 for (i = 0; i < op_info->n_chains; i++)
674 /* The register tracked by this chain does not match the
675 dest register of insn. */
676 if (op_info->heads[i]->regno != regno)
677 continue;
679 head = op_info->heads[i];
680 /* The chain was merged in another, find the new head. */
681 if (!head->first)
682 head = regrename_chain_from_id (head->id);
684 bool found_insn = false, found_cand = false;
686 for (chain = head->first; chain; chain = chain->next_use)
688 rtx *loc = &SET_DEST (PATTERN (chain->insn));
690 if (chain->loc != loc)
691 continue;
693 if (chain->insn == insn)
694 found_insn = true;
696 if (chain->insn == cand)
697 found_cand = true;
699 if (found_insn && found_cand)
700 return true;
704 return false;
708 /* Callback function to traverse the tag map and drop loads that have the same
709 destination and are in the same chain of occurrence. Routine always returns
710 true to allow traversal through all of TAG_MAP. */
711 bool
712 single_dest_per_chain (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
713 void *arg ATTRIBUTE_UNUSED)
715 for (int i = v->length () - 1; i>= 1; i--)
717 tag_insn_info *insn_info = (*v)[i];
719 for (int j = v->length () - 2; j >= 0; j--)
721 /* Filter out destinations in the same chain. */
722 if (in_same_chain (insn_info->insn, (*v)[j]->insn,
723 REGNO (insn_info->dest)))
725 v->ordered_remove (j);
726 i = v->length ();
727 break;
732 return true;
736 /* Callback invoked for each name-value pair (T, INSN_INFO) to dump the insn
737 list INSN_INFO for tag T. */
738 bool
739 dump_insn_list (const rtx &t, const insn_info_list_t &insn_info,
740 void *unused ATTRIBUTE_UNUSED)
742 gcc_assert (dump_file);
743 fprintf (dump_file, "Tag 0x" HOST_WIDE_INT_PRINT_HEX_PURE " ::\n", INTVAL (t));
745 for (unsigned i = 0; i < insn_info.length (); i++)
746 dump_insn_slim (dump_file, insn_info[i]->insn);
748 fprintf (dump_file, "\n");
750 return true;
754 /* Record all loads in LOOP into TAG_MAP indexed by the falkor hardware
755 prefetcher memory tags. */
756 static void
757 record_loads (tag_map_t &tag_map, struct loop *loop)
759 rtx_insn *insn;
760 basic_block *body, bb;
762 body = get_loop_body (loop);
764 for (unsigned i = 0; i < loop->num_nodes; i++)
766 bb = body[i];
767 FOR_BB_INSNS (bb, insn)
769 rtx base = NULL_RTX;
770 rtx dest = NULL_RTX;
771 rtx offset = NULL_RTX;
772 bool writeback = false;
773 bool ldp = false;
775 if (!INSN_P (insn) || DEBUG_INSN_P (insn))
776 continue;
778 if (get_load_info (insn, loop, &dest, &base, &offset, &writeback,
779 &ldp))
781 tag_insn_info *i = new tag_insn_info (insn, dest, base, offset,
782 writeback, ldp);
783 rtx tag = GEN_INT (i->tag ());
784 tag_map.get_or_insert (tag).safe_push (i);
789 if (dump_file)
791 fprintf (dump_file, "Loop %d: Tag map generated.\n", loop->num);
792 tag_map.traverse <void *, dump_insn_list> (NULL);
795 /* Try to reduce the dataset before launching into the rename attempt. Drop
796 destinations in the same collision chain that appear in the same def/use
797 chain, all as defs. These chains will move together in a rename so
798 there's no point in keeping both in there. */
799 tag_map.traverse <void *, single_dest_per_chain> (NULL);
803 /* Tag collision avoidance pass for Falkor. The pass runs in two phases for
804 each loop; the first phase collects all loads that we consider as
805 interesting for renaming into a tag-indexed map of lists. The second phase
806 renames the destination register of the loads in an attempt to spread out
807 the loads into different tags. */
808 void
809 execute_tag_collision_avoidance ()
811 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
812 df_chain_add_problem (DF_UD_CHAIN);
813 df_compute_regs_ever_live (true);
814 df_note_add_problem ();
815 df_analyze ();
816 df_set_flags (DF_DEFER_INSN_RESCAN);
818 regrename_init (true);
819 regrename_analyze (NULL);
821 compute_bb_for_insn ();
822 calculate_dominance_info (CDI_DOMINATORS);
823 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
825 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
827 tag_map_t tag_map (512);
829 record_loads (tag_map, loop);
830 avoid_collisions (tag_map);
831 if (dump_file)
833 fprintf (dump_file, "Loop %d: Completed rename.\n", loop->num);
834 tag_map.traverse <void *, dump_insn_list> (NULL);
836 tag_map.traverse <void *, free_insn_info> (NULL);
839 loop_optimizer_finalize ();
840 free_dominance_info (CDI_DOMINATORS);
841 regrename_finish ();
845 const pass_data pass_data_tag_collision_avoidance =
847 RTL_PASS, /* type */
848 "tag_collision_avoidance", /* name */
849 OPTGROUP_NONE, /* optinfo_flags */
850 TV_NONE, /* tv_id */
851 0, /* properties_required */
852 0, /* properties_provided */
853 0, /* properties_destroyed */
854 0, /* todo_flags_start */
855 TODO_df_finish, /* todo_flags_finish */
859 class pass_tag_collision_avoidance : public rtl_opt_pass
861 public:
862 pass_tag_collision_avoidance (gcc::context *ctxt)
863 : rtl_opt_pass (pass_data_tag_collision_avoidance, ctxt)
866 /* opt_pass methods: */
867 virtual bool gate (function *)
869 return ((aarch64_tune_params.extra_tuning_flags
870 & AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS)
871 && optimize >= 2);
874 virtual unsigned int execute (function *)
876 execute_tag_collision_avoidance ();
877 return 0;
880 }; // class pass_tag_collision_avoidance
883 /* Create a new pass instance. */
884 rtl_opt_pass *
885 make_pass_tag_collision_avoidance (gcc::context *ctxt)
887 return new pass_tag_collision_avoidance (ctxt);