* config/i386/driver-i386.c (decode_caches_intel): Add missing entries.
[official-gcc.git] / gcc / ira-emit.c
blob196efa025457292e16d0927e276dd5a80b89ca63
1 /* Integrated Register Allocator. Changing code and generating moves.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Vladimir Makarov <vmakarov@redhat.com>.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* When we have more one region, we need to change the original RTL
22 code after coloring. Let us consider two allocnos representing the
23 same pseudo-register outside and inside a region respectively.
24 They can get different hard-registers. The reload pass works on
25 pseudo registers basis and there is no way to say the reload that
26 pseudo could be in different registers and it is even more
27 difficult to say in what places of the code the pseudo should have
28 particular hard-registers. So in this case IRA has to create and
29 use a new pseudo-register inside the region and adds code to move
30 allocno values on the region's borders. This is done by the code
31 in this file.
33 The code makes top-down traversal of the regions and generate new
34 pseudos and the move code on the region borders. In some
35 complicated cases IRA can create a new pseudo used temporarily to
36 move allocno values when a swap of values stored in two
37 hard-registers is needed (e.g. two allocnos representing different
38 pseudos outside region got respectively hard registers 1 and 2 and
39 the corresponding allocnos inside the region got respectively hard
40 registers 2 and 1). At this stage, the new pseudo is marked as
41 spilled.
43 IRA still creates the pseudo-register and the moves on the region
44 borders even when the both corresponding allocnos were assigned to
45 the same hard-register. It is done because, if the reload pass for
46 some reason spills a pseudo-register representing the original
47 pseudo outside or inside the region, the effect will be smaller
48 because another pseudo will still be in the hard-register. In most
49 cases, this is better then spilling the original pseudo in its
50 whole live-range. If reload does not change the allocation for the
51 two pseudo-registers, the trivial move will be removed by
52 post-reload optimizations.
54 IRA does not generate a new pseudo and moves for the allocno values
55 if the both allocnos representing an original pseudo inside and
56 outside region assigned to the same hard register when the register
57 pressure in the region for the corresponding pressure class is less
58 than number of available hard registers for given pressure class.
60 IRA also does some optimizations to remove redundant moves which is
61 transformed into stores by the reload pass on CFG edges
62 representing exits from the region.
64 IRA tries to reduce duplication of code generated on CFG edges
65 which are enters and exits to/from regions by moving some code to
66 the edge sources or destinations when it is possible. */
68 #include "config.h"
69 #include "system.h"
70 #include "coretypes.h"
71 #include "tm.h"
72 #include "regs.h"
73 #include "rtl.h"
74 #include "tm_p.h"
75 #include "target.h"
76 #include "flags.h"
77 #include "obstack.h"
78 #include "bitmap.h"
79 #include "hard-reg-set.h"
80 #include "basic-block.h"
81 #include "expr.h"
82 #include "recog.h"
83 #include "params.h"
84 #include "reload.h"
85 #include "df.h"
86 #include "ira-int.h"
89 /* Data used to emit live range split insns and to flattening IR. */
90 ira_emit_data_t ira_allocno_emit_data;
92 /* Definitions for vectors of pointers. */
93 typedef void *void_p;
95 /* Pointers to data allocated for allocnos being created during
96 emitting. Usually there are quite few such allocnos because they
97 are created only for resolving loop in register shuffling. */
98 static vec<void_p> new_allocno_emit_data_vec;
100 /* Allocate and initiate the emit data. */
101 void
102 ira_initiate_emit_data (void)
104 ira_allocno_t a;
105 ira_allocno_iterator ai;
107 ira_allocno_emit_data
108 = (ira_emit_data_t) ira_allocate (ira_allocnos_num
109 * sizeof (struct ira_emit_data));
110 memset (ira_allocno_emit_data, 0,
111 ira_allocnos_num * sizeof (struct ira_emit_data));
112 FOR_EACH_ALLOCNO (a, ai)
113 ALLOCNO_ADD_DATA (a) = ira_allocno_emit_data + ALLOCNO_NUM (a);
114 new_allocno_emit_data_vec.create (50);
118 /* Free the emit data. */
119 void
120 ira_finish_emit_data (void)
122 void_p p;
123 ira_allocno_t a;
124 ira_allocno_iterator ai;
126 ira_free (ira_allocno_emit_data);
127 FOR_EACH_ALLOCNO (a, ai)
128 ALLOCNO_ADD_DATA (a) = NULL;
129 for (;new_allocno_emit_data_vec.length () != 0;)
131 p = new_allocno_emit_data_vec.pop ();
132 ira_free (p);
134 new_allocno_emit_data_vec.release ();
137 /* Create and return a new allocno with given REGNO and
138 LOOP_TREE_NODE. Allocate emit data for it. */
139 static ira_allocno_t
140 create_new_allocno (int regno, ira_loop_tree_node_t loop_tree_node)
142 ira_allocno_t a;
144 a = ira_create_allocno (regno, false, loop_tree_node);
145 ALLOCNO_ADD_DATA (a) = ira_allocate (sizeof (struct ira_emit_data));
146 memset (ALLOCNO_ADD_DATA (a), 0, sizeof (struct ira_emit_data));
147 new_allocno_emit_data_vec.safe_push (ALLOCNO_ADD_DATA (a));
148 return a;
153 /* See comments below. */
154 typedef struct move *move_t;
156 /* The structure represents an allocno move. Both allocnos have the
157 same original regno but different allocation. */
158 struct move
160 /* The allocnos involved in the move. */
161 ira_allocno_t from, to;
162 /* The next move in the move sequence. */
163 move_t next;
164 /* Used for finding dependencies. */
165 bool visited_p;
166 /* The size of the following array. */
167 int deps_num;
168 /* Moves on which given move depends on. Dependency can be cyclic.
169 It means we need a temporary to generates the moves. Sequence
170 A1->A2, B1->B2 where A1 and B2 are assigned to reg R1 and A2 and
171 B1 are assigned to reg R2 is an example of the cyclic
172 dependencies. */
173 move_t *deps;
174 /* First insn generated for the move. */
175 rtx insn;
178 /* Array of moves (indexed by BB index) which should be put at the
179 start/end of the corresponding basic blocks. */
180 static move_t *at_bb_start, *at_bb_end;
182 /* Max regno before renaming some pseudo-registers. For example, the
183 same pseudo-register can be renamed in a loop if its allocation is
184 different outside the loop. */
185 static int max_regno_before_changing;
187 /* Return new move of allocnos TO and FROM. */
188 static move_t
189 create_move (ira_allocno_t to, ira_allocno_t from)
191 move_t move;
193 move = (move_t) ira_allocate (sizeof (struct move));
194 move->deps = NULL;
195 move->deps_num = 0;
196 move->to = to;
197 move->from = from;
198 move->next = NULL;
199 move->insn = NULL_RTX;
200 move->visited_p = false;
201 return move;
204 /* Free memory for MOVE and its dependencies. */
205 static void
206 free_move (move_t move)
208 if (move->deps != NULL)
209 ira_free (move->deps);
210 ira_free (move);
213 /* Free memory for list of the moves given by its HEAD. */
214 static void
215 free_move_list (move_t head)
217 move_t next;
219 for (; head != NULL; head = next)
221 next = head->next;
222 free_move (head);
226 /* Return TRUE if the move list LIST1 and LIST2 are equal (two
227 moves are equal if they involve the same allocnos). */
228 static bool
229 eq_move_lists_p (move_t list1, move_t list2)
231 for (; list1 != NULL && list2 != NULL;
232 list1 = list1->next, list2 = list2->next)
233 if (list1->from != list2->from || list1->to != list2->to)
234 return false;
235 return list1 == list2;
238 /* Print move list LIST into file F. */
239 static void
240 print_move_list (FILE *f, move_t list)
242 for (; list != NULL; list = list->next)
243 fprintf (f, " a%dr%d->a%dr%d",
244 ALLOCNO_NUM (list->from), ALLOCNO_REGNO (list->from),
245 ALLOCNO_NUM (list->to), ALLOCNO_REGNO (list->to));
246 fprintf (f, "\n");
249 extern void ira_debug_move_list (move_t list);
251 /* Print move list LIST into stderr. */
252 void
253 ira_debug_move_list (move_t list)
255 print_move_list (stderr, list);
258 /* This recursive function changes pseudo-registers in *LOC if it is
259 necessary. The function returns TRUE if a change was done. */
260 static bool
261 change_regs (rtx *loc)
263 int i, regno, result = false;
264 const char *fmt;
265 enum rtx_code code;
266 rtx reg;
268 if (*loc == NULL_RTX)
269 return false;
270 code = GET_CODE (*loc);
271 if (code == REG)
273 regno = REGNO (*loc);
274 if (regno < FIRST_PSEUDO_REGISTER)
275 return false;
276 if (regno >= max_regno_before_changing)
277 /* It is a shared register which was changed already. */
278 return false;
279 if (ira_curr_regno_allocno_map[regno] == NULL)
280 return false;
281 reg = allocno_emit_reg (ira_curr_regno_allocno_map[regno]);
282 if (reg == *loc)
283 return false;
284 *loc = reg;
285 return true;
288 fmt = GET_RTX_FORMAT (code);
289 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
291 if (fmt[i] == 'e')
292 result = change_regs (&XEXP (*loc, i)) || result;
293 else if (fmt[i] == 'E')
295 int j;
297 for (j = XVECLEN (*loc, i) - 1; j >= 0; j--)
298 result = change_regs (&XVECEXP (*loc, i, j)) || result;
301 return result;
304 /* Attach MOVE to the edge E. The move is attached to the head of the
305 list if HEAD_P is TRUE. */
306 static void
307 add_to_edge_list (edge e, move_t move, bool head_p)
309 move_t last;
311 if (head_p || e->aux == NULL)
313 move->next = (move_t) e->aux;
314 e->aux = move;
316 else
318 for (last = (move_t) e->aux; last->next != NULL; last = last->next)
320 last->next = move;
321 move->next = NULL;
325 /* Create and return new pseudo-register with the same attributes as
326 ORIGINAL_REG. */
328 ira_create_new_reg (rtx original_reg)
330 rtx new_reg;
332 new_reg = gen_reg_rtx (GET_MODE (original_reg));
333 ORIGINAL_REGNO (new_reg) = ORIGINAL_REGNO (original_reg);
334 REG_USERVAR_P (new_reg) = REG_USERVAR_P (original_reg);
335 REG_POINTER (new_reg) = REG_POINTER (original_reg);
336 REG_ATTRS (new_reg) = REG_ATTRS (original_reg);
337 if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
338 fprintf (ira_dump_file, " Creating newreg=%i from oldreg=%i\n",
339 REGNO (new_reg), REGNO (original_reg));
340 ira_expand_reg_equiv ();
341 return new_reg;
344 /* Return TRUE if loop given by SUBNODE inside the loop given by
345 NODE. */
346 static bool
347 subloop_tree_node_p (ira_loop_tree_node_t subnode, ira_loop_tree_node_t node)
349 for (; subnode != NULL; subnode = subnode->parent)
350 if (subnode == node)
351 return true;
352 return false;
355 /* Set up member `reg' to REG for allocnos which has the same regno as
356 ALLOCNO and which are inside the loop corresponding to ALLOCNO. */
357 static void
358 set_allocno_reg (ira_allocno_t allocno, rtx reg)
360 int regno;
361 ira_allocno_t a;
362 ira_loop_tree_node_t node;
364 node = ALLOCNO_LOOP_TREE_NODE (allocno);
365 for (a = ira_regno_allocno_map[ALLOCNO_REGNO (allocno)];
366 a != NULL;
367 a = ALLOCNO_NEXT_REGNO_ALLOCNO (a))
368 if (subloop_tree_node_p (ALLOCNO_LOOP_TREE_NODE (a), node))
369 ALLOCNO_EMIT_DATA (a)->reg = reg;
370 for (a = ALLOCNO_CAP (allocno); a != NULL; a = ALLOCNO_CAP (a))
371 ALLOCNO_EMIT_DATA (a)->reg = reg;
372 regno = ALLOCNO_REGNO (allocno);
373 for (a = allocno;;)
375 if (a == NULL || (a = ALLOCNO_CAP (a)) == NULL)
377 node = node->parent;
378 if (node == NULL)
379 break;
380 a = node->regno_allocno_map[regno];
382 if (a == NULL)
383 continue;
384 if (ALLOCNO_EMIT_DATA (a)->child_renamed_p)
385 break;
386 ALLOCNO_EMIT_DATA (a)->child_renamed_p = true;
390 /* Return true if there is an entry to given loop not from its parent
391 (or grandparent) block. For example, it is possible for two
392 adjacent loops inside another loop. */
393 static bool
394 entered_from_non_parent_p (ira_loop_tree_node_t loop_node)
396 ira_loop_tree_node_t bb_node, src_loop_node, parent;
397 edge e;
398 edge_iterator ei;
400 for (bb_node = loop_node->children;
401 bb_node != NULL;
402 bb_node = bb_node->next)
403 if (bb_node->bb != NULL)
405 FOR_EACH_EDGE (e, ei, bb_node->bb->preds)
406 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
407 && (src_loop_node = IRA_BB_NODE (e->src)->parent) != loop_node)
409 for (parent = src_loop_node->parent;
410 parent != NULL;
411 parent = parent->parent)
412 if (parent == loop_node)
413 break;
414 if (parent != NULL)
415 /* That is an exit from a nested loop -- skip it. */
416 continue;
417 for (parent = loop_node->parent;
418 parent != NULL;
419 parent = parent->parent)
420 if (src_loop_node == parent)
421 break;
422 if (parent == NULL)
423 return true;
426 return false;
429 /* Set up ENTERED_FROM_NON_PARENT_P for each loop region. */
430 static void
431 setup_entered_from_non_parent_p (void)
433 unsigned int i;
434 loop_p loop;
436 ira_assert (current_loops != NULL);
437 FOR_EACH_VEC_SAFE_ELT (get_loops (cfun), i, loop)
438 if (ira_loop_nodes[i].regno_allocno_map != NULL)
439 ira_loop_nodes[i].entered_from_non_parent_p
440 = entered_from_non_parent_p (&ira_loop_nodes[i]);
443 /* Return TRUE if move of SRC_ALLOCNO (assigned to hard register) to
444 DEST_ALLOCNO (assigned to memory) can be removed because it does
445 not change value of the destination. One possible reason for this
446 is the situation when SRC_ALLOCNO is not modified in the
447 corresponding loop. */
448 static bool
449 store_can_be_removed_p (ira_allocno_t src_allocno, ira_allocno_t dest_allocno)
451 int regno, orig_regno;
452 ira_allocno_t a;
453 ira_loop_tree_node_t node;
455 ira_assert (ALLOCNO_CAP_MEMBER (src_allocno) == NULL
456 && ALLOCNO_CAP_MEMBER (dest_allocno) == NULL);
457 orig_regno = ALLOCNO_REGNO (src_allocno);
458 regno = REGNO (allocno_emit_reg (dest_allocno));
459 for (node = ALLOCNO_LOOP_TREE_NODE (src_allocno);
460 node != NULL;
461 node = node->parent)
463 a = node->regno_allocno_map[orig_regno];
464 ira_assert (a != NULL);
465 if (REGNO (allocno_emit_reg (a)) == (unsigned) regno)
466 /* We achieved the destination and everything is ok. */
467 return true;
468 else if (bitmap_bit_p (node->modified_regnos, orig_regno))
469 return false;
470 else if (node->entered_from_non_parent_p)
471 /* If there is a path from a destination loop block to the
472 source loop header containing basic blocks of non-parents
473 (grandparents) of the source loop, we should have checked
474 modifications of the pseudo on this path too to decide
475 about possibility to remove the store. It could be done by
476 solving a data-flow problem. Unfortunately such global
477 solution would complicate IR flattening. Therefore we just
478 prohibit removal of the store in such complicated case. */
479 return false;
481 /* It is actually a loop entry -- do not remove the store. */
482 return false;
485 /* Generate and attach moves to the edge E. This looks at the final
486 regnos of allocnos living on the edge with the same original regno
487 to figure out when moves should be generated. */
488 static void
489 generate_edge_moves (edge e)
491 ira_loop_tree_node_t src_loop_node, dest_loop_node;
492 unsigned int regno;
493 bitmap_iterator bi;
494 ira_allocno_t src_allocno, dest_allocno, *src_map, *dest_map;
495 move_t move;
496 bitmap regs_live_in_dest, regs_live_out_src;
498 src_loop_node = IRA_BB_NODE (e->src)->parent;
499 dest_loop_node = IRA_BB_NODE (e->dest)->parent;
500 e->aux = NULL;
501 if (src_loop_node == dest_loop_node)
502 return;
503 src_map = src_loop_node->regno_allocno_map;
504 dest_map = dest_loop_node->regno_allocno_map;
505 regs_live_in_dest = df_get_live_in (e->dest);
506 regs_live_out_src = df_get_live_out (e->src);
507 EXECUTE_IF_SET_IN_REG_SET (regs_live_in_dest,
508 FIRST_PSEUDO_REGISTER, regno, bi)
509 if (bitmap_bit_p (regs_live_out_src, regno))
511 src_allocno = src_map[regno];
512 dest_allocno = dest_map[regno];
513 if (REGNO (allocno_emit_reg (src_allocno))
514 == REGNO (allocno_emit_reg (dest_allocno)))
515 continue;
516 /* Remove unnecessary stores at the region exit. We should do
517 this for readonly memory for sure and this is guaranteed by
518 that we never generate moves on region borders (see
519 checking in function change_loop). */
520 if (ALLOCNO_HARD_REGNO (dest_allocno) < 0
521 && ALLOCNO_HARD_REGNO (src_allocno) >= 0
522 && store_can_be_removed_p (src_allocno, dest_allocno))
524 ALLOCNO_EMIT_DATA (src_allocno)->mem_optimized_dest = dest_allocno;
525 ALLOCNO_EMIT_DATA (dest_allocno)->mem_optimized_dest_p = true;
526 if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
527 fprintf (ira_dump_file, " Remove r%d:a%d->a%d(mem)\n",
528 regno, ALLOCNO_NUM (src_allocno),
529 ALLOCNO_NUM (dest_allocno));
530 continue;
532 move = create_move (dest_allocno, src_allocno);
533 add_to_edge_list (e, move, true);
537 /* Bitmap of allocnos local for the current loop. */
538 static bitmap local_allocno_bitmap;
540 /* This bitmap is used to find that we need to generate and to use a
541 new pseudo-register when processing allocnos with the same original
542 regno. */
543 static bitmap used_regno_bitmap;
545 /* This bitmap contains regnos of allocnos which were renamed locally
546 because the allocnos correspond to disjoint live ranges in loops
547 with a common parent. */
548 static bitmap renamed_regno_bitmap;
550 /* Change (if necessary) pseudo-registers inside loop given by loop
551 tree node NODE. */
552 static void
553 change_loop (ira_loop_tree_node_t node)
555 bitmap_iterator bi;
556 unsigned int i;
557 int regno;
558 bool used_p;
559 ira_allocno_t allocno, parent_allocno, *map;
560 rtx insn, original_reg;
561 enum reg_class aclass, pclass;
562 ira_loop_tree_node_t parent;
564 if (node != ira_loop_tree_root)
566 ira_assert (current_loops != NULL);
568 if (node->bb != NULL)
570 FOR_BB_INSNS (node->bb, insn)
571 if (INSN_P (insn) && change_regs (&insn))
573 df_insn_rescan (insn);
574 df_notes_rescan (insn);
576 return;
579 if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
580 fprintf (ira_dump_file,
581 " Changing RTL for loop %d (header bb%d)\n",
582 node->loop_num, node->loop->header->index);
584 parent = ira_curr_loop_tree_node->parent;
585 map = parent->regno_allocno_map;
586 EXECUTE_IF_SET_IN_REG_SET (ira_curr_loop_tree_node->border_allocnos,
587 0, i, bi)
589 allocno = ira_allocnos[i];
590 regno = ALLOCNO_REGNO (allocno);
591 aclass = ALLOCNO_CLASS (allocno);
592 pclass = ira_pressure_class_translate[aclass];
593 parent_allocno = map[regno];
594 ira_assert (regno < ira_reg_equiv_len);
595 /* We generate the same hard register move because the
596 reload pass can put an allocno into memory in this case
597 we will have live range splitting. If it does not happen
598 such the same hard register moves will be removed. The
599 worst case when the both allocnos are put into memory by
600 the reload is very rare. */
601 if (parent_allocno != NULL
602 && (ALLOCNO_HARD_REGNO (allocno)
603 == ALLOCNO_HARD_REGNO (parent_allocno))
604 && (ALLOCNO_HARD_REGNO (allocno) < 0
605 || (parent->reg_pressure[pclass] + 1
606 <= ira_class_hard_regs_num[pclass])
607 || TEST_HARD_REG_BIT (ira_prohibited_mode_move_regs
608 [ALLOCNO_MODE (allocno)],
609 ALLOCNO_HARD_REGNO (allocno))
610 /* don't create copies because reload can spill an
611 allocno set by copy although the allocno will not
612 get memory slot. */
613 || ira_equiv_no_lvalue_p (regno)))
614 continue;
615 original_reg = allocno_emit_reg (allocno);
616 if (parent_allocno == NULL
617 || (REGNO (allocno_emit_reg (parent_allocno))
618 == REGNO (original_reg)))
620 if (internal_flag_ira_verbose > 3 && ira_dump_file)
621 fprintf (ira_dump_file, " %i vs parent %i:",
622 ALLOCNO_HARD_REGNO (allocno),
623 ALLOCNO_HARD_REGNO (parent_allocno));
624 set_allocno_reg (allocno, ira_create_new_reg (original_reg));
628 /* Rename locals: Local allocnos with same regno in different loops
629 might get the different hard register. So we need to change
630 ALLOCNO_REG. */
631 bitmap_and_compl (local_allocno_bitmap,
632 ira_curr_loop_tree_node->all_allocnos,
633 ira_curr_loop_tree_node->border_allocnos);
634 EXECUTE_IF_SET_IN_REG_SET (local_allocno_bitmap, 0, i, bi)
636 allocno = ira_allocnos[i];
637 regno = ALLOCNO_REGNO (allocno);
638 if (ALLOCNO_CAP_MEMBER (allocno) != NULL)
639 continue;
640 used_p = !bitmap_set_bit (used_regno_bitmap, regno);
641 ALLOCNO_EMIT_DATA (allocno)->somewhere_renamed_p = true;
642 if (! used_p)
643 continue;
644 bitmap_set_bit (renamed_regno_bitmap, regno);
645 set_allocno_reg (allocno, ira_create_new_reg (allocno_emit_reg (allocno)));
649 /* Process to set up flag somewhere_renamed_p. */
650 static void
651 set_allocno_somewhere_renamed_p (void)
653 unsigned int regno;
654 ira_allocno_t allocno;
655 ira_allocno_iterator ai;
657 FOR_EACH_ALLOCNO (allocno, ai)
659 regno = ALLOCNO_REGNO (allocno);
660 if (bitmap_bit_p (renamed_regno_bitmap, regno)
661 && REGNO (allocno_emit_reg (allocno)) == regno)
662 ALLOCNO_EMIT_DATA (allocno)->somewhere_renamed_p = true;
666 /* Return TRUE if move lists on all edges given in vector VEC are
667 equal. */
668 static bool
669 eq_edge_move_lists_p (vec<edge, va_gc> *vec)
671 move_t list;
672 int i;
674 list = (move_t) EDGE_I (vec, 0)->aux;
675 for (i = EDGE_COUNT (vec) - 1; i > 0; i--)
676 if (! eq_move_lists_p (list, (move_t) EDGE_I (vec, i)->aux))
677 return false;
678 return true;
681 /* Look at all entry edges (if START_P) or exit edges of basic block
682 BB and put move lists at the BB start or end if it is possible. In
683 other words, this decreases code duplication of allocno moves. */
684 static void
685 unify_moves (basic_block bb, bool start_p)
687 int i;
688 edge e;
689 move_t list;
690 vec<edge, va_gc> *vec;
692 vec = (start_p ? bb->preds : bb->succs);
693 if (EDGE_COUNT (vec) == 0 || ! eq_edge_move_lists_p (vec))
694 return;
695 e = EDGE_I (vec, 0);
696 list = (move_t) e->aux;
697 if (! start_p && control_flow_insn_p (BB_END (bb)))
698 return;
699 e->aux = NULL;
700 for (i = EDGE_COUNT (vec) - 1; i > 0; i--)
702 e = EDGE_I (vec, i);
703 free_move_list ((move_t) e->aux);
704 e->aux = NULL;
706 if (start_p)
707 at_bb_start[bb->index] = list;
708 else
709 at_bb_end[bb->index] = list;
712 /* Last move (in move sequence being processed) setting up the
713 corresponding hard register. */
714 static move_t hard_regno_last_set[FIRST_PSEUDO_REGISTER];
716 /* If the element value is equal to CURR_TICK then the corresponding
717 element in `hard_regno_last_set' is defined and correct. */
718 static int hard_regno_last_set_check[FIRST_PSEUDO_REGISTER];
720 /* Last move (in move sequence being processed) setting up the
721 corresponding allocno. */
722 static move_t *allocno_last_set;
724 /* If the element value is equal to CURR_TICK then the corresponding
725 element in . `allocno_last_set' is defined and correct. */
726 static int *allocno_last_set_check;
728 /* Definition of vector of moves. */
730 /* This vec contains moves sorted topologically (depth-first) on their
731 dependency graph. */
732 static vec<move_t> move_vec;
734 /* The variable value is used to check correctness of values of
735 elements of arrays `hard_regno_last_set' and
736 `allocno_last_set_check'. */
737 static int curr_tick;
739 /* This recursive function traverses dependencies of MOVE and produces
740 topological sorting (in depth-first order). */
741 static void
742 traverse_moves (move_t move)
744 int i;
746 if (move->visited_p)
747 return;
748 move->visited_p = true;
749 for (i = move->deps_num - 1; i >= 0; i--)
750 traverse_moves (move->deps[i]);
751 move_vec.safe_push (move);
754 /* Remove unnecessary moves in the LIST, makes topological sorting,
755 and removes cycles on hard reg dependencies by introducing new
756 allocnos assigned to memory and additional moves. It returns the
757 result move list. */
758 static move_t
759 modify_move_list (move_t list)
761 int i, n, nregs, hard_regno;
762 ira_allocno_t to, from;
763 move_t move, new_move, set_move, first, last;
765 if (list == NULL)
766 return NULL;
767 /* Creat move deps. */
768 curr_tick++;
769 for (move = list; move != NULL; move = move->next)
771 to = move->to;
772 if ((hard_regno = ALLOCNO_HARD_REGNO (to)) < 0)
773 continue;
774 nregs = hard_regno_nregs[hard_regno][ALLOCNO_MODE (to)];
775 for (i = 0; i < nregs; i++)
777 hard_regno_last_set[hard_regno + i] = move;
778 hard_regno_last_set_check[hard_regno + i] = curr_tick;
781 for (move = list; move != NULL; move = move->next)
783 from = move->from;
784 to = move->to;
785 if ((hard_regno = ALLOCNO_HARD_REGNO (from)) >= 0)
787 nregs = hard_regno_nregs[hard_regno][ALLOCNO_MODE (from)];
788 for (n = i = 0; i < nregs; i++)
789 if (hard_regno_last_set_check[hard_regno + i] == curr_tick
790 && (ALLOCNO_REGNO (hard_regno_last_set[hard_regno + i]->to)
791 != ALLOCNO_REGNO (from)))
792 n++;
793 move->deps = (move_t *) ira_allocate (n * sizeof (move_t));
794 for (n = i = 0; i < nregs; i++)
795 if (hard_regno_last_set_check[hard_regno + i] == curr_tick
796 && (ALLOCNO_REGNO (hard_regno_last_set[hard_regno + i]->to)
797 != ALLOCNO_REGNO (from)))
798 move->deps[n++] = hard_regno_last_set[hard_regno + i];
799 move->deps_num = n;
802 /* Toplogical sorting: */
803 move_vec.truncate (0);
804 for (move = list; move != NULL; move = move->next)
805 traverse_moves (move);
806 last = NULL;
807 for (i = (int) move_vec.length () - 1; i >= 0; i--)
809 move = move_vec[i];
810 move->next = NULL;
811 if (last != NULL)
812 last->next = move;
813 last = move;
815 first = move_vec.last ();
816 /* Removing cycles: */
817 curr_tick++;
818 move_vec.truncate (0);
819 for (move = first; move != NULL; move = move->next)
821 from = move->from;
822 to = move->to;
823 if ((hard_regno = ALLOCNO_HARD_REGNO (from)) >= 0)
825 nregs = hard_regno_nregs[hard_regno][ALLOCNO_MODE (from)];
826 for (i = 0; i < nregs; i++)
827 if (hard_regno_last_set_check[hard_regno + i] == curr_tick
828 && ALLOCNO_HARD_REGNO
829 (hard_regno_last_set[hard_regno + i]->to) >= 0)
831 int n, j;
832 ira_allocno_t new_allocno;
834 set_move = hard_regno_last_set[hard_regno + i];
835 /* It does not matter what loop_tree_node (of TO or
836 FROM) to use for the new allocno because of
837 subsequent IRA internal representation
838 flattening. */
839 new_allocno
840 = create_new_allocno (ALLOCNO_REGNO (set_move->to),
841 ALLOCNO_LOOP_TREE_NODE (set_move->to));
842 ALLOCNO_MODE (new_allocno) = ALLOCNO_MODE (set_move->to);
843 ira_set_allocno_class (new_allocno,
844 ALLOCNO_CLASS (set_move->to));
845 ira_create_allocno_objects (new_allocno);
846 ALLOCNO_ASSIGNED_P (new_allocno) = true;
847 ALLOCNO_HARD_REGNO (new_allocno) = -1;
848 ALLOCNO_EMIT_DATA (new_allocno)->reg
849 = ira_create_new_reg (allocno_emit_reg (set_move->to));
851 /* Make it possibly conflicting with all earlier
852 created allocnos. Cases where temporary allocnos
853 created to remove the cycles are quite rare. */
854 n = ALLOCNO_NUM_OBJECTS (new_allocno);
855 gcc_assert (n == ALLOCNO_NUM_OBJECTS (set_move->to));
856 for (j = 0; j < n; j++)
858 ira_object_t new_obj = ALLOCNO_OBJECT (new_allocno, j);
860 OBJECT_MIN (new_obj) = 0;
861 OBJECT_MAX (new_obj) = ira_objects_num - 1;
864 new_move = create_move (set_move->to, new_allocno);
865 set_move->to = new_allocno;
866 move_vec.safe_push (new_move);
867 ira_move_loops_num++;
868 if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
869 fprintf (ira_dump_file,
870 " Creating temporary allocno a%dr%d\n",
871 ALLOCNO_NUM (new_allocno),
872 REGNO (allocno_emit_reg (new_allocno)));
875 if ((hard_regno = ALLOCNO_HARD_REGNO (to)) < 0)
876 continue;
877 nregs = hard_regno_nregs[hard_regno][ALLOCNO_MODE (to)];
878 for (i = 0; i < nregs; i++)
880 hard_regno_last_set[hard_regno + i] = move;
881 hard_regno_last_set_check[hard_regno + i] = curr_tick;
884 for (i = (int) move_vec.length () - 1; i >= 0; i--)
886 move = move_vec[i];
887 move->next = NULL;
888 last->next = move;
889 last = move;
891 return first;
894 /* Generate RTX move insns from the move list LIST. This updates
895 allocation cost using move execution frequency FREQ. */
896 static rtx
897 emit_move_list (move_t list, int freq)
899 rtx to, from, dest;
900 int to_regno, from_regno, cost, regno;
901 rtx result, insn, set;
902 enum machine_mode mode;
903 enum reg_class aclass;
905 grow_reg_equivs ();
906 start_sequence ();
907 for (; list != NULL; list = list->next)
909 start_sequence ();
910 to = allocno_emit_reg (list->to);
911 to_regno = REGNO (to);
912 from = allocno_emit_reg (list->from);
913 from_regno = REGNO (from);
914 emit_move_insn (to, from);
915 list->insn = get_insns ();
916 end_sequence ();
917 for (insn = list->insn; insn != NULL_RTX; insn = NEXT_INSN (insn))
919 /* The reload needs to have set up insn codes. If the
920 reload sets up insn codes by itself, it may fail because
921 insns will have hard registers instead of pseudos and
922 there may be no machine insn with given hard
923 registers. */
924 recog_memoized (insn);
925 /* Add insn to equiv init insn list if it is necessary.
926 Otherwise reload will not remove this insn if it decides
927 to use the equivalence. */
928 if ((set = single_set (insn)) != NULL_RTX)
930 dest = SET_DEST (set);
931 if (GET_CODE (dest) == SUBREG)
932 dest = SUBREG_REG (dest);
933 ira_assert (REG_P (dest));
934 regno = REGNO (dest);
935 if (regno >= ira_reg_equiv_len
936 || (ira_reg_equiv[regno].invariant == NULL_RTX
937 && ira_reg_equiv[regno].constant == NULL_RTX))
938 continue; /* regno has no equivalence. */
939 ira_assert ((int) reg_equivs->length () > regno);
940 reg_equiv_init (regno)
941 = gen_rtx_INSN_LIST (VOIDmode, insn, reg_equiv_init (regno));
944 if (ira_use_lra_p)
945 ira_update_equiv_info_by_shuffle_insn (to_regno, from_regno, list->insn);
946 emit_insn (list->insn);
947 mode = ALLOCNO_MODE (list->to);
948 aclass = ALLOCNO_CLASS (list->to);
949 cost = 0;
950 if (ALLOCNO_HARD_REGNO (list->to) < 0)
952 if (ALLOCNO_HARD_REGNO (list->from) >= 0)
954 cost = ira_memory_move_cost[mode][aclass][0] * freq;
955 ira_store_cost += cost;
958 else if (ALLOCNO_HARD_REGNO (list->from) < 0)
960 if (ALLOCNO_HARD_REGNO (list->to) >= 0)
962 cost = ira_memory_move_cost[mode][aclass][0] * freq;
963 ira_load_cost += cost;
966 else
968 ira_init_register_move_cost_if_necessary (mode);
969 cost = ira_register_move_cost[mode][aclass][aclass] * freq;
970 ira_shuffle_cost += cost;
972 ira_overall_cost += cost;
974 result = get_insns ();
975 end_sequence ();
976 return result;
979 /* Generate RTX move insns from move lists attached to basic blocks
980 and edges. */
981 static void
982 emit_moves (void)
984 basic_block bb;
985 edge_iterator ei;
986 edge e;
987 rtx insns, tmp;
989 FOR_EACH_BB_FN (bb, cfun)
991 if (at_bb_start[bb->index] != NULL)
993 at_bb_start[bb->index] = modify_move_list (at_bb_start[bb->index]);
994 insns = emit_move_list (at_bb_start[bb->index],
995 REG_FREQ_FROM_BB (bb));
996 tmp = BB_HEAD (bb);
997 if (LABEL_P (tmp))
998 tmp = NEXT_INSN (tmp);
999 if (NOTE_INSN_BASIC_BLOCK_P (tmp))
1000 tmp = NEXT_INSN (tmp);
1001 if (tmp == BB_HEAD (bb))
1002 emit_insn_before (insns, tmp);
1003 else if (tmp != NULL_RTX)
1004 emit_insn_after (insns, PREV_INSN (tmp));
1005 else
1006 emit_insn_after (insns, get_last_insn ());
1009 if (at_bb_end[bb->index] != NULL)
1011 at_bb_end[bb->index] = modify_move_list (at_bb_end[bb->index]);
1012 insns = emit_move_list (at_bb_end[bb->index], REG_FREQ_FROM_BB (bb));
1013 ira_assert (! control_flow_insn_p (BB_END (bb)));
1014 emit_insn_after (insns, BB_END (bb));
1017 FOR_EACH_EDGE (e, ei, bb->succs)
1019 if (e->aux == NULL)
1020 continue;
1021 ira_assert ((e->flags & EDGE_ABNORMAL) == 0
1022 || ! EDGE_CRITICAL_P (e));
1023 e->aux = modify_move_list ((move_t) e->aux);
1024 insert_insn_on_edge
1025 (emit_move_list ((move_t) e->aux,
1026 REG_FREQ_FROM_EDGE_FREQ (EDGE_FREQUENCY (e))),
1028 if (e->src->next_bb != e->dest)
1029 ira_additional_jumps_num++;
1034 /* Update costs of A and corresponding allocnos on upper levels on the
1035 loop tree from reading (if READ_P) or writing A on an execution
1036 path with FREQ. */
1037 static void
1038 update_costs (ira_allocno_t a, bool read_p, int freq)
1040 ira_loop_tree_node_t parent;
1042 for (;;)
1044 ALLOCNO_NREFS (a)++;
1045 ALLOCNO_FREQ (a) += freq;
1046 ALLOCNO_MEMORY_COST (a)
1047 += (ira_memory_move_cost[ALLOCNO_MODE (a)][ALLOCNO_CLASS (a)]
1048 [read_p ? 1 : 0] * freq);
1049 if (ALLOCNO_CAP (a) != NULL)
1050 a = ALLOCNO_CAP (a);
1051 else if ((parent = ALLOCNO_LOOP_TREE_NODE (a)->parent) == NULL
1052 || (a = parent->regno_allocno_map[ALLOCNO_REGNO (a)]) == NULL)
1053 break;
1057 /* Process moves from LIST with execution FREQ to add ranges, copies,
1058 and modify costs for allocnos involved in the moves. All regnos
1059 living through the list is in LIVE_THROUGH, and the loop tree node
1060 used to find corresponding allocnos is NODE. */
1061 static void
1062 add_range_and_copies_from_move_list (move_t list, ira_loop_tree_node_t node,
1063 bitmap live_through, int freq)
1065 int start, n;
1066 unsigned int regno;
1067 move_t move;
1068 ira_allocno_t a;
1069 ira_copy_t cp;
1070 live_range_t r;
1071 bitmap_iterator bi;
1072 HARD_REG_SET hard_regs_live;
1074 if (list == NULL)
1075 return;
1076 n = 0;
1077 EXECUTE_IF_SET_IN_BITMAP (live_through, FIRST_PSEUDO_REGISTER, regno, bi)
1078 n++;
1079 REG_SET_TO_HARD_REG_SET (hard_regs_live, live_through);
1080 /* This is a trick to guarantee that new ranges is not merged with
1081 the old ones. */
1082 ira_max_point++;
1083 start = ira_max_point;
1084 for (move = list; move != NULL; move = move->next)
1086 ira_allocno_t from = move->from;
1087 ira_allocno_t to = move->to;
1088 int nr, i;
1090 bitmap_clear_bit (live_through, ALLOCNO_REGNO (from));
1091 bitmap_clear_bit (live_through, ALLOCNO_REGNO (to));
1093 nr = ALLOCNO_NUM_OBJECTS (to);
1094 for (i = 0; i < nr; i++)
1096 ira_object_t to_obj = ALLOCNO_OBJECT (to, i);
1097 if (OBJECT_CONFLICT_ARRAY (to_obj) == NULL)
1099 if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1100 fprintf (ira_dump_file, " Allocate conflicts for a%dr%d\n",
1101 ALLOCNO_NUM (to), REGNO (allocno_emit_reg (to)));
1102 ira_allocate_object_conflicts (to_obj, n);
1105 ior_hard_reg_conflicts (from, &hard_regs_live);
1106 ior_hard_reg_conflicts (to, &hard_regs_live);
1108 update_costs (from, true, freq);
1109 update_costs (to, false, freq);
1110 cp = ira_add_allocno_copy (from, to, freq, false, move->insn, NULL);
1111 if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1112 fprintf (ira_dump_file, " Adding cp%d:a%dr%d-a%dr%d\n",
1113 cp->num, ALLOCNO_NUM (cp->first),
1114 REGNO (allocno_emit_reg (cp->first)),
1115 ALLOCNO_NUM (cp->second),
1116 REGNO (allocno_emit_reg (cp->second)));
1118 nr = ALLOCNO_NUM_OBJECTS (from);
1119 for (i = 0; i < nr; i++)
1121 ira_object_t from_obj = ALLOCNO_OBJECT (from, i);
1122 r = OBJECT_LIVE_RANGES (from_obj);
1123 if (r == NULL || r->finish >= 0)
1125 ira_add_live_range_to_object (from_obj, start, ira_max_point);
1126 if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1127 fprintf (ira_dump_file,
1128 " Adding range [%d..%d] to allocno a%dr%d\n",
1129 start, ira_max_point, ALLOCNO_NUM (from),
1130 REGNO (allocno_emit_reg (from)));
1132 else
1134 r->finish = ira_max_point;
1135 if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1136 fprintf (ira_dump_file,
1137 " Adding range [%d..%d] to allocno a%dr%d\n",
1138 r->start, ira_max_point, ALLOCNO_NUM (from),
1139 REGNO (allocno_emit_reg (from)));
1142 ira_max_point++;
1143 nr = ALLOCNO_NUM_OBJECTS (to);
1144 for (i = 0; i < nr; i++)
1146 ira_object_t to_obj = ALLOCNO_OBJECT (to, i);
1147 ira_add_live_range_to_object (to_obj, ira_max_point, -1);
1149 ira_max_point++;
1151 for (move = list; move != NULL; move = move->next)
1153 int nr, i;
1154 nr = ALLOCNO_NUM_OBJECTS (move->to);
1155 for (i = 0; i < nr; i++)
1157 ira_object_t to_obj = ALLOCNO_OBJECT (move->to, i);
1158 r = OBJECT_LIVE_RANGES (to_obj);
1159 if (r->finish < 0)
1161 r->finish = ira_max_point - 1;
1162 if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1163 fprintf (ira_dump_file,
1164 " Adding range [%d..%d] to allocno a%dr%d\n",
1165 r->start, r->finish, ALLOCNO_NUM (move->to),
1166 REGNO (allocno_emit_reg (move->to)));
1170 EXECUTE_IF_SET_IN_BITMAP (live_through, FIRST_PSEUDO_REGISTER, regno, bi)
1172 ira_allocno_t to;
1173 int nr, i;
1175 a = node->regno_allocno_map[regno];
1176 if ((to = ALLOCNO_EMIT_DATA (a)->mem_optimized_dest) != NULL)
1177 a = to;
1178 nr = ALLOCNO_NUM_OBJECTS (a);
1179 for (i = 0; i < nr; i++)
1181 ira_object_t obj = ALLOCNO_OBJECT (a, i);
1182 ira_add_live_range_to_object (obj, start, ira_max_point - 1);
1184 if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1185 fprintf
1186 (ira_dump_file,
1187 " Adding range [%d..%d] to live through %s allocno a%dr%d\n",
1188 start, ira_max_point - 1,
1189 to != NULL ? "upper level" : "",
1190 ALLOCNO_NUM (a), REGNO (allocno_emit_reg (a)));
1194 /* Process all move list to add ranges, conflicts, copies, and modify
1195 costs for allocnos involved in the moves. */
1196 static void
1197 add_ranges_and_copies (void)
1199 basic_block bb;
1200 edge_iterator ei;
1201 edge e;
1202 ira_loop_tree_node_t node;
1203 bitmap live_through;
1205 live_through = ira_allocate_bitmap ();
1206 FOR_EACH_BB_FN (bb, cfun)
1208 /* It does not matter what loop_tree_node (of source or
1209 destination block) to use for searching allocnos by their
1210 regnos because of subsequent IR flattening. */
1211 node = IRA_BB_NODE (bb)->parent;
1212 bitmap_copy (live_through, df_get_live_in (bb));
1213 add_range_and_copies_from_move_list
1214 (at_bb_start[bb->index], node, live_through, REG_FREQ_FROM_BB (bb));
1215 bitmap_copy (live_through, df_get_live_out (bb));
1216 add_range_and_copies_from_move_list
1217 (at_bb_end[bb->index], node, live_through, REG_FREQ_FROM_BB (bb));
1218 FOR_EACH_EDGE (e, ei, bb->succs)
1220 bitmap_and (live_through,
1221 df_get_live_in (e->dest), df_get_live_out (bb));
1222 add_range_and_copies_from_move_list
1223 ((move_t) e->aux, node, live_through,
1224 REG_FREQ_FROM_EDGE_FREQ (EDGE_FREQUENCY (e)));
1227 ira_free_bitmap (live_through);
1230 /* The entry function changes code and generates shuffling allocnos on
1231 region borders for the regional (LOOPS_P is TRUE in this case)
1232 register allocation. */
1233 void
1234 ira_emit (bool loops_p)
1236 basic_block bb;
1237 rtx insn;
1238 edge_iterator ei;
1239 edge e;
1240 ira_allocno_t a;
1241 ira_allocno_iterator ai;
1242 size_t sz;
1244 FOR_EACH_ALLOCNO (a, ai)
1245 ALLOCNO_EMIT_DATA (a)->reg = regno_reg_rtx[ALLOCNO_REGNO (a)];
1246 if (! loops_p)
1247 return;
1248 sz = sizeof (move_t) * last_basic_block_for_fn (cfun);
1249 at_bb_start = (move_t *) ira_allocate (sz);
1250 memset (at_bb_start, 0, sz);
1251 at_bb_end = (move_t *) ira_allocate (sz);
1252 memset (at_bb_end, 0, sz);
1253 local_allocno_bitmap = ira_allocate_bitmap ();
1254 used_regno_bitmap = ira_allocate_bitmap ();
1255 renamed_regno_bitmap = ira_allocate_bitmap ();
1256 max_regno_before_changing = max_reg_num ();
1257 ira_traverse_loop_tree (true, ira_loop_tree_root, change_loop, NULL);
1258 set_allocno_somewhere_renamed_p ();
1259 ira_free_bitmap (used_regno_bitmap);
1260 ira_free_bitmap (renamed_regno_bitmap);
1261 ira_free_bitmap (local_allocno_bitmap);
1262 setup_entered_from_non_parent_p ();
1263 FOR_EACH_BB_FN (bb, cfun)
1265 at_bb_start[bb->index] = NULL;
1266 at_bb_end[bb->index] = NULL;
1267 FOR_EACH_EDGE (e, ei, bb->succs)
1268 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1269 generate_edge_moves (e);
1271 allocno_last_set
1272 = (move_t *) ira_allocate (sizeof (move_t) * max_reg_num ());
1273 allocno_last_set_check
1274 = (int *) ira_allocate (sizeof (int) * max_reg_num ());
1275 memset (allocno_last_set_check, 0, sizeof (int) * max_reg_num ());
1276 memset (hard_regno_last_set_check, 0, sizeof (hard_regno_last_set_check));
1277 curr_tick = 0;
1278 FOR_EACH_BB_FN (bb, cfun)
1279 unify_moves (bb, true);
1280 FOR_EACH_BB_FN (bb, cfun)
1281 unify_moves (bb, false);
1282 move_vec.create (ira_allocnos_num);
1283 emit_moves ();
1284 add_ranges_and_copies ();
1285 /* Clean up: */
1286 FOR_EACH_BB_FN (bb, cfun)
1288 free_move_list (at_bb_start[bb->index]);
1289 free_move_list (at_bb_end[bb->index]);
1290 FOR_EACH_EDGE (e, ei, bb->succs)
1292 free_move_list ((move_t) e->aux);
1293 e->aux = NULL;
1296 move_vec.release ();
1297 ira_free (allocno_last_set_check);
1298 ira_free (allocno_last_set);
1299 commit_edge_insertions ();
1300 /* Fix insn codes. It is necessary to do it before reload because
1301 reload assumes initial insn codes defined. The insn codes can be
1302 invalidated by CFG infrastructure for example in jump
1303 redirection. */
1304 FOR_EACH_BB_FN (bb, cfun)
1305 FOR_BB_INSNS_REVERSE (bb, insn)
1306 if (INSN_P (insn))
1307 recog_memoized (insn);
1308 ira_free (at_bb_end);
1309 ira_free (at_bb_start);