Bug 439685 compiler warning in callgrind/main.c
[valgrind.git] / VEX / priv / host_generic_reg_alloc2.c
blobceea1f4d61aad76fc6b694ef5300ad7045f111da
2 /*---------------------------------------------------------------*/
3 /*--- begin host_reg_alloc2.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex.h"
37 #include "main_util.h"
38 #include "host_generic_regs.h"
40 /* Set to 1 for lots of debugging output. */
41 #define DEBUG_REGALLOC 0
44 /* TODO 27 Oct 04:
46 We can possibly do V-V coalescing even when the src is spilled,
47 providing we can arrange for the dst to have the same spill slot.
49 Note that state[].hreg is the same as the available real regs.
51 Generally rationalise data structures. */
54 /* Records information on virtual register live ranges. Computed once
55 and remains unchanged after that. */
56 typedef
57 struct {
58 /* Becomes live for the first time after this insn ... */
59 Short live_after;
60 /* Becomes dead for the last time before this insn ... */
61 Short dead_before;
62 /* The "home" spill slot, if needed. Never changes. */
63 Short spill_offset;
64 Short spill_size;
65 /* What kind of register this is. */
66 HRegClass reg_class;
68 VRegLR;
71 /* Records information on real-register live ranges. Computed once
72 and remains unchanged after that. */
73 typedef
74 struct {
75 HReg rreg;
76 /* Becomes live after this insn ... */
77 Short live_after;
78 /* Becomes dead before this insn ... */
79 Short dead_before;
81 RRegLR;
84 /* An array of the following structs (rreg_state) comprises the
85 running state of the allocator. It indicates what the current
86 disposition of each allocatable real register is. The array gets
87 updated as the allocator processes instructions. The identity of
88 the register is not recorded here, because the index of this
89 structure in doRegisterAllocation()'s |rreg_state| is the index
90 number of the register, and the register itself can be extracted
91 from the RRegUniverse supplied to doRegisterAllocation(). */
92 typedef
93 struct {
94 /* ------ FIELDS WHICH DO NOT CHANGE ------ */
95 /* Is this involved in any HLRs? (only an optimisation hint) */
96 Bool has_hlrs;
97 /* ------ FIELDS WHICH DO CHANGE ------ */
98 /* 6 May 07: rearranged fields below so the whole struct fits
99 into 16 bytes on both x86 and amd64. */
100 /* Used when .disp == Bound and we are looking for vregs to
101 spill. */
102 Bool is_spill_cand;
103 /* Optimisation: used when .disp == Bound. Indicates when the
104 rreg has the same value as the spill slot for the associated
105 vreg. Is safely left at False, and becomes True after a
106 spill store or reload for this rreg. */
107 Bool eq_spill_slot;
108 /* What's it's current disposition? */
109 enum { Free, /* available for use */
110 Unavail, /* in a real-reg live range */
111 Bound /* in use (holding value of some vreg) */
113 disp;
114 /* If .disp == Bound, what vreg is it bound to? */
115 HReg vreg;
117 RRegState;
120 /* The allocator also maintains a redundant array of indexes
121 (vreg_state) from vreg numbers back to entries in rreg_state. It
122 is redundant because iff vreg_state[i] == j then
123 hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
124 point at each other. The purpose of this is to speed up activities
125 which involve looking for a particular vreg: there is no need to
126 scan the rreg_state looking for it, just index directly into
127 vreg_state. The FAQ "does this vreg already have an associated
128 rreg" is the main beneficiary.
130 To indicate, in vreg_state[i], that a given vreg is not currently
131 associated with any rreg, that entry can be set to INVALID_RREG_NO.
133 Because the vreg_state entries are signed Shorts, the max number
134 of vregs that can be handed by regalloc is 32767.
137 #define INVALID_RREG_NO ((Short)(-1))
139 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
140 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
143 /* Search forward from some given point in the incoming instruction
144 sequence. Point is to select a virtual register to spill, by
145 finding the vreg which is mentioned as far ahead as possible, in
146 the hope that this will minimise the number of consequent reloads.
148 Only do the search for vregs which are Bound in the running state,
149 and for which the .is_spill_cand field is set. This allows the
150 caller to arbitrarily restrict the set of spill candidates to be
151 considered.
153 To do this we don't actually need to see the incoming instruction
154 stream. Rather, what we need us the HRegUsage records for the
155 incoming instruction stream. Hence that is passed in.
157 Returns an index into the state array indicating the (v,r) pair to
158 spill, or -1 if none was found. */
159 static
160 Int findMostDistantlyMentionedVReg (
161 HRegUsage* reg_usages_in,
162 Int search_from_instr,
163 Int num_instrs,
164 RRegState* rreg_state,
165 HRegClass hreg_class,
166 const RegAllocControl* con
169 Int m;
170 Int furthest_k = -1;
171 Int furthest = -1;
172 vassert(search_from_instr >= 0);
173 for (UInt k = con->univ->allocable_start[hreg_class];
174 k <= con->univ->allocable_end[hreg_class]; k++) {
175 if (!rreg_state[k].is_spill_cand)
176 continue;
177 vassert(rreg_state[k].disp == Bound);
178 for (m = search_from_instr; m < num_instrs; m++) {
179 if (HRegUsage__contains(&reg_usages_in[m], rreg_state[k].vreg))
180 break;
182 if (m > furthest) {
183 furthest = m;
184 furthest_k = k;
187 return furthest_k;
191 /* Check that this vreg has been assigned a sane spill offset. */
192 inline
193 static void sanity_check_spill_offset ( VRegLR* vreg )
195 switch (vreg->reg_class) {
196 case HRcVec128: case HRcFlt64:
197 vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
198 default:
199 vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
204 /* Double the size of the real-reg live-range array, if needed. */
205 __attribute__((noinline))
206 static void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used )
208 Int k;
209 RRegLR* arr2;
210 if (0)
211 vex_printf("ensureRRLRspace: %d -> %d\n", *size, 2 * *size);
212 vassert(used == *size);
213 arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR));
214 for (k = 0; k < *size; k++)
215 arr2[k] = (*info)[k];
216 *size *= 2;
217 *info = arr2;
219 inline
220 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
222 if (LIKELY(used < *size)) return;
223 ensureRRLRspace_SLOW(info, size, used);
227 /* Sort an array of RRegLR entries by either the .live_after or
228 .dead_before fields. This is performance-critical. */
229 static void sortRRLRarray ( RRegLR* arr,
230 Int size, Bool by_live_after )
232 Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
233 9841, 29524, 88573, 265720,
234 797161, 2391484 };
235 Int lo = 0;
236 Int hi = size-1;
237 Int i, j, h, bigN, hp;
238 RRegLR v;
240 vassert(size >= 0);
241 if (size == 0)
242 return;
244 bigN = hi - lo + 1; if (bigN < 2) return;
245 hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
247 if (by_live_after) {
249 for ( ; hp >= 0; hp--) {
250 h = incs[hp];
251 for (i = lo + h; i <= hi; i++) {
252 v = arr[i];
253 j = i;
254 while (arr[j-h].live_after > v.live_after) {
255 arr[j] = arr[j-h];
256 j = j - h;
257 if (j <= (lo + h - 1)) break;
259 arr[j] = v;
263 } else {
265 for ( ; hp >= 0; hp--) {
266 h = incs[hp];
267 for (i = lo + h; i <= hi; i++) {
268 v = arr[i];
269 j = i;
270 while (arr[j-h].dead_before > v.dead_before) {
271 arr[j] = arr[j-h];
272 j = j - h;
273 if (j <= (lo + h - 1)) break;
275 arr[j] = v;
283 /* Compute the index of the highest and lowest 1 in a ULong,
284 respectively. Results are undefined if the argument is zero.
285 Don't pass it zero :) */
286 static inline UInt ULong__maxIndex ( ULong w64 ) {
287 return 63 - __builtin_clzll(w64);
290 static inline UInt ULong__minIndex ( ULong w64 ) {
291 return __builtin_ctzll(w64);
295 /* A target-independent register allocator. Requires various
296 functions which it uses to deal abstractly with instructions and
297 registers, since it cannot have any target-specific knowledge.
299 Returns a new list of instructions, which, as a result of the
300 behaviour of mapRegs, will be in-place modifications of the
301 original instructions.
303 Requires that the incoming code has been generated using
304 vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside
305 that range is a checked run-time error.
307 Takes an expandable array of pointers to unallocated insns.
308 Returns an expandable array of pointers to allocated insns.
310 HInstrArray* doRegisterAllocation_v2 (
312 /* Incoming virtual-registerised code. */
313 HInstrArray* instrs_in,
315 /* Register allocator controls to use. */
316 const RegAllocControl* con
319 # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8)
321 const Bool eq_spill_opt = True;
323 /* Info on vregs and rregs. Computed once and remains
324 unchanged. */
325 Int n_vregs;
326 VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
328 /* We keep two copies of the real-reg live range info, one sorted
329 by .live_after and the other by .dead_before. First the
330 unsorted info is created in the _la variant is copied into the
331 _db variant. Once that's done both of them are sorted.
332 We also need two integer cursors which record the next
333 location in the two arrays to consider. */
334 RRegLR* rreg_lrs_la;
335 RRegLR* rreg_lrs_db;
336 Int rreg_lrs_size;
337 Int rreg_lrs_used;
338 Int rreg_lrs_la_next;
339 Int rreg_lrs_db_next;
341 /* Info on register usage in the incoming instruction array.
342 Computed once and remains unchanged, more or less; updated
343 sometimes by the direct-reload optimisation. */
344 HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */
346 /* Used when constructing vreg_lrs (for allocating stack
347 slots). */
348 Short ss_busy_until_before[N_SPILL64S];
350 /* Used when constructing rreg_lrs. */
351 Int* rreg_live_after;
352 Int* rreg_dead_before;
354 /* Running state of the core allocation algorithm. */
355 RRegState* rreg_state; /* [0 .. n_rregs-1] */
356 Int n_rregs;
358 /* .. and the redundant backward map */
359 /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
360 This implies n_rregs must be <= 32768. */
361 Short* vreg_state; /* [0 .. n_vregs-1] */
363 /* The vreg -> rreg map constructed and then applied to each
364 instr. */
365 HRegRemap remap;
367 /* The output array of instructions. */
368 HInstrArray* instrs_out;
370 /* Sanity checks are expensive. They are only done periodically,
371 not at each insn processed. */
372 Bool do_sanity_check;
374 vassert(0 == (con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN));
375 vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN));
376 vassert(0 == (N_SPILL64S % 2));
378 /* The live range numbers are signed shorts, and so limiting the
379 number of insns to 15000 comfortably guards against them
380 overflowing 32k. */
381 vassert(instrs_in->arr_used <= 15000);
383 # define INVALID_INSTRNO (-2)
385 # define EMIT_INSTR(_instr) \
386 do { \
387 HInstr* _tmp = (_instr); \
388 if (DEBUG_REGALLOC) { \
389 vex_printf("** "); \
390 con->ppInstr(_tmp, con->mode64); \
391 vex_printf("\n\n"); \
393 addHInstr ( instrs_out, _tmp ); \
394 } while (0)
396 # define PRINT_STATE \
397 do { \
398 Int z, q; \
399 for (z = 0; z < n_rregs; z++) { \
400 vex_printf(" rreg_state[%2d] = ", z); \
401 con->ppReg(con->univ->regs[z]); \
402 vex_printf(" \t"); \
403 switch (rreg_state[z].disp) { \
404 case Free: vex_printf("Free\n"); break; \
405 case Unavail: vex_printf("Unavail\n"); break; \
406 case Bound: vex_printf("BoundTo "); \
407 con->ppReg(rreg_state[z].vreg); \
408 vex_printf("\n"); break; \
411 vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \
412 q = 0; \
413 for (z = 0; z < n_vregs; z++) { \
414 if (vreg_state[z] == INVALID_RREG_NO) \
415 continue; \
416 vex_printf("[%d] -> %d ", z, vreg_state[z]); \
417 q++; \
418 if (q > 0 && (q % 6) == 0) \
419 vex_printf("\n "); \
421 vex_printf("\n"); \
422 } while (0)
425 /* --------- Stage 0: set up output array --------- */
426 /* --------- and allocate/initialise running state. --------- */
428 instrs_out = newHInstrArray();
430 /* ... and initialise running state. */
431 /* n_rregs is no more than a short name for n_available_real_regs. */
432 n_rregs = con->univ->allocable;
433 n_vregs = instrs_in->n_vregs;
435 /* If this is not so, vreg_state entries will overflow. */
436 vassert(n_vregs < 32767);
438 /* If this is not so, the universe we have is nonsensical. */
439 vassert(n_rregs > 0);
441 rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState));
442 vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short));
444 for (Int j = 0; j < n_rregs; j++) {
445 rreg_state[j].has_hlrs = False;
446 rreg_state[j].disp = Free;
447 rreg_state[j].vreg = INVALID_HREG;
448 rreg_state[j].is_spill_cand = False;
449 rreg_state[j].eq_spill_slot = False;
452 for (Int j = 0; j < n_vregs; j++)
453 vreg_state[j] = INVALID_RREG_NO;
456 /* --------- Stage 1: compute vreg live ranges. --------- */
457 /* --------- Stage 2: compute rreg live ranges. --------- */
459 /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
461 /* This is relatively simple, because (1) we only seek the complete
462 end-to-end live range of each vreg, and are not interested in
463 any holes in it, and (2) the vregs are conveniently numbered 0
464 .. n_vregs-1, so we can just dump the results in a
465 pre-allocated array. */
467 vreg_lrs = NULL;
468 if (n_vregs > 0)
469 vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs);
471 for (Int j = 0; j < n_vregs; j++) {
472 vreg_lrs[j].live_after = INVALID_INSTRNO;
473 vreg_lrs[j].dead_before = INVALID_INSTRNO;
474 vreg_lrs[j].spill_offset = 0;
475 vreg_lrs[j].spill_size = 0;
476 vreg_lrs[j].reg_class = HRcINVALID;
479 /* An array to hold the reg-usage info for the incoming
480 instructions. */
481 reg_usage_arr = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used);
483 /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
485 /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
487 /* This is more complex than Stage 1, because we need to compute
488 exactly all the live ranges of all the allocatable real regs,
489 and we don't know in advance how many there will be. */
491 rreg_lrs_used = 0;
492 rreg_lrs_size = 4;
493 rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR));
494 rreg_lrs_db = NULL; /* we'll create this later */
496 /* We'll need to track live range start/end points seperately for
497 each rreg. Sigh. */
498 vassert(n_rregs > 0);
499 rreg_live_after = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
500 rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
502 for (Int j = 0; j < n_rregs; j++) {
503 rreg_live_after[j] =
504 rreg_dead_before[j] = INVALID_INSTRNO;
507 /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
509 /* ------ start of ITERATE OVER INSNS ------ */
511 for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
513 con->getRegUsage(&reg_usage_arr[ii], instrs_in->arr[ii], con->mode64);
514 reg_usage_arr[ii].isVregVregMove
515 = reg_usage_arr[ii].isRegRegMove
516 && hregIsVirtual(reg_usage_arr[ii].regMoveSrc)
517 && hregIsVirtual(reg_usage_arr[ii].regMoveDst);
519 if (0) {
520 vex_printf("\n%d stage1: ", ii);
521 con->ppInstr(instrs_in->arr[ii], con->mode64);
522 vex_printf("\n");
523 ppHRegUsage(con->univ, &reg_usage_arr[ii]);
526 /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
528 /* for each virtual reg mentioned in the insn ... */
529 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
531 HReg vreg = reg_usage_arr[ii].vRegs[j];
532 vassert(hregIsVirtual(vreg));
534 Int k = hregIndex(vreg);
535 if (k < 0 || k >= n_vregs) {
536 vex_printf("\n");
537 con->ppInstr(instrs_in->arr[ii], con->mode64);
538 vex_printf("\n");
539 vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
540 vpanic("doRegisterAllocation: out-of-range vreg");
543 /* Take the opportunity to note its regclass. We'll need
544 that when allocating spill slots. */
545 if (vreg_lrs[k].reg_class == HRcINVALID) {
546 /* First mention of this vreg. */
547 vreg_lrs[k].reg_class = hregClass(vreg);
548 } else {
549 /* Seen it before, so check for consistency. */
550 vassert(vreg_lrs[k].reg_class == hregClass(vreg));
553 /* Now consider live ranges. */
554 switch (reg_usage_arr[ii].vMode[j]) {
555 case HRmRead:
556 if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
557 vex_printf("\n\nOFFENDING VREG = %d\n", k);
558 vpanic("doRegisterAllocation: "
559 "first event for vreg is Read");
561 vreg_lrs[k].dead_before = toShort(ii + 1);
562 break;
563 case HRmWrite:
564 if (vreg_lrs[k].live_after == INVALID_INSTRNO)
565 vreg_lrs[k].live_after = toShort(ii);
566 vreg_lrs[k].dead_before = toShort(ii + 1);
567 break;
568 case HRmModify:
569 if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
570 vex_printf("\n\nOFFENDING VREG = %d\n", k);
571 vpanic("doRegisterAllocation: "
572 "first event for vreg is Modify");
574 vreg_lrs[k].dead_before = toShort(ii + 1);
575 break;
576 default:
577 vpanic("doRegisterAllocation(1)");
578 } /* switch */
580 } /* iterate over virtual registers */
582 /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
584 /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
586 /* If this doesn't hold, the following iteration over real registers
587 will fail miserably. */
588 vassert(N_RREGUNIVERSE_REGS == 64);
590 const ULong rRead = reg_usage_arr[ii].rRead;
591 const ULong rWritten = reg_usage_arr[ii].rWritten;
592 const ULong rMentioned = rRead | rWritten;
594 UInt rReg_minIndex;
595 UInt rReg_maxIndex;
596 if (rMentioned == 0) {
597 /* There are no real register uses in this insn. Set
598 rReg_{min,max}Index so that the following loop doesn't iterate
599 at all, so as to avoid wasting time. */
600 rReg_minIndex = 1;
601 rReg_maxIndex = 0;
602 } else {
603 rReg_minIndex = ULong__minIndex(rMentioned);
604 rReg_maxIndex = ULong__maxIndex(rMentioned);
605 /* Don't bother to look at registers which are not available
606 to the allocator. We asserted above that n_rregs > 0, so
607 n_rregs-1 is safe. */
608 if (rReg_maxIndex >= n_rregs)
609 rReg_maxIndex = n_rregs-1;
612 /* for each allocator-available real reg mentioned in the insn ... */
613 /* Note. We are allocating only over the real regs available to
614 the allocator. Others, eg the stack or baseblock pointers,
615 are unavailable to allocation and so we never visit them.
616 Hence the iteration is cut off at n_rregs-1, since n_rregs ==
617 univ->allocable. */
618 for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) {
620 const ULong jMask = 1ULL << j;
621 if (LIKELY((rMentioned & jMask) == 0))
622 continue;
624 const Bool isR = (rRead & jMask) != 0;
625 const Bool isW = (rWritten & jMask) != 0;
627 /* Dummy initialisations of flush_la and flush_db to avoid
628 possible bogus uninit-var warnings from gcc. */
629 Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
630 Bool flush = False;
632 if (isW && !isR) {
633 flush_la = rreg_live_after[j];
634 flush_db = rreg_dead_before[j];
635 if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO)
636 flush = True;
637 rreg_live_after[j] = ii;
638 rreg_dead_before[j] = ii+1;
639 } else if (!isW && isR) {
640 if (rreg_live_after[j] == INVALID_INSTRNO) {
641 vex_printf("\nOFFENDING RREG = ");
642 con->ppReg(con->univ->regs[j]);
643 vex_printf("\n");
644 vex_printf("\nOFFENDING instr = ");
645 con->ppInstr(instrs_in->arr[ii], con->mode64);
646 vex_printf("\n");
647 vpanic("doRegisterAllocation: "
648 "first event for rreg is Read");
650 rreg_dead_before[j] = ii+1;
651 } else {
652 vassert(isR && isW);
653 if (rreg_live_after[j] == INVALID_INSTRNO) {
654 vex_printf("\nOFFENDING RREG = ");
655 con->ppReg(con->univ->regs[j]);
656 vex_printf("\n");
657 vex_printf("\nOFFENDING instr = ");
658 con->ppInstr(instrs_in->arr[ii], con->mode64);
659 vex_printf("\n");
660 vpanic("doRegisterAllocation: "
661 "first event for rreg is Modify");
663 rreg_dead_before[j] = ii+1;
666 if (flush) {
667 vassert(flush_la != INVALID_INSTRNO);
668 vassert(flush_db != INVALID_INSTRNO);
669 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
670 if (0)
671 vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
672 rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j];
673 rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la);
674 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
675 rreg_lrs_used++;
678 } /* iterate over rregs in the instr */
680 /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
682 } /* iterate over insns */
684 /* ------ end of ITERATE OVER INSNS ------ */
686 /* ------ start of FINALISE RREG LIVE RANGES ------ */
688 /* Now finish up any live ranges left over. */
689 for (Int j = 0; j < n_rregs; j++) {
691 if (0) {
692 vex_printf("residual %d: %d %d\n", j, rreg_live_after[j],
693 rreg_dead_before[j]);
695 vassert( (rreg_live_after[j] == INVALID_INSTRNO
696 && rreg_dead_before[j] == INVALID_INSTRNO)
698 (rreg_live_after[j] != INVALID_INSTRNO
699 && rreg_dead_before[j] != INVALID_INSTRNO)
702 if (rreg_live_after[j] == INVALID_INSTRNO)
703 continue;
705 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
706 if (0)
707 vex_printf("FLUSH 2 (%d,%d)\n",
708 rreg_live_after[j], rreg_dead_before[j]);
709 rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j];
710 rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]);
711 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
712 rreg_lrs_used++;
715 /* Compute summary hints for choosing real regs. If a real reg is
716 involved in a hard live range, record that fact in the fixed
717 part of the running rreg_state. Later, when offered a choice between
718 rregs, it's better to choose one which is not marked as having
719 any HLRs, since ones with HLRs may need to be spilled around
720 their HLRs. Correctness of final assignment is unaffected by
721 this mechanism -- it is only an optimisation. */
723 for (Int j = 0; j < rreg_lrs_used; j++) {
724 HReg rreg = rreg_lrs_la[j].rreg;
725 vassert(!hregIsVirtual(rreg));
726 /* rreg is involved in a HLR. Record this info in the array, if
727 there is space. */
728 UInt ix = hregIndex(rreg);
729 vassert(ix < n_rregs);
730 rreg_state[ix].has_hlrs = True;
732 if (0) {
733 for (Int j = 0; j < n_rregs; j++) {
734 if (!rreg_state[j].has_hlrs)
735 continue;
736 con->ppReg(con->univ->regs[j]);
737 vex_printf(" hinted\n");
741 /* Finally, copy the _la variant into the _db variant and
742 sort both by their respective fields. */
743 rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR));
744 for (Int j = 0; j < rreg_lrs_used; j++)
745 rreg_lrs_db[j] = rreg_lrs_la[j];
747 sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ );
748 sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
750 /* And set up the cursors. */
751 rreg_lrs_la_next = 0;
752 rreg_lrs_db_next = 0;
754 for (Int j = 1; j < rreg_lrs_used; j++) {
755 vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after);
756 vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
759 /* ------ end of FINALISE RREG LIVE RANGES ------ */
761 if (DEBUG_REGALLOC) {
762 for (Int j = 0; j < n_vregs; j++) {
763 vex_printf("vreg %d: la = %d, db = %d\n",
764 j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
768 if (DEBUG_REGALLOC) {
769 vex_printf("RRegLRs by LA:\n");
770 for (Int j = 0; j < rreg_lrs_used; j++) {
771 vex_printf(" ");
772 con->ppReg(rreg_lrs_la[j].rreg);
773 vex_printf(" la = %d, db = %d\n",
774 rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
776 vex_printf("RRegLRs by DB:\n");
777 for (Int j = 0; j < rreg_lrs_used; j++) {
778 vex_printf(" ");
779 con->ppReg(rreg_lrs_db[j].rreg);
780 vex_printf(" la = %d, db = %d\n",
781 rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
785 /* --------- Stage 3: allocate spill slots. --------- */
787 /* Each spill slot is 8 bytes long. For vregs which take more than
788 64 bits to spill (classes Flt64 and Vec128), we have to allocate
789 two consecutive spill slots. For 256 bit registers (class
790 Vec256), we have to allocate four consecutive spill slots.
792 For Vec128-class on PowerPC, the spill slot's actual address
793 must be 16-byte aligned. Since the spill slot's address is
794 computed as an offset from the guest state pointer, and since
795 the user of the generated code must set that pointer to a
796 32-aligned value, we have the residual obligation here of
797 choosing a 16-aligned spill slot offset for Vec128-class values.
798 Since each spill slot is 8 bytes long, that means for
799 Vec128-class values we must allocated a spill slot number which
800 is zero mod 2.
802 Similarly, for Vec256 class on amd64, find a spill slot number
803 which is zero mod 4. This guarantees it will be 32 byte
804 aligned, which isn't actually necessary on amd64 (we use movUpd
805 etc to spill), but seems like good practice.
807 Do a rank-based allocation of vregs to spill slot numbers. We
808 put as few values as possible in spill slots, but nevertheless
809 need to have a spill slot available for all vregs, just in case.
811 /* Int max_ss_no = -1; */
813 vex_bzero(ss_busy_until_before, sizeof(ss_busy_until_before));
815 for (Int j = 0; j < n_vregs; j++) {
817 /* True iff this vreg is unused. In which case we also expect
818 that the reg_class field for it has not been set. */
819 if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
820 vassert(vreg_lrs[j].reg_class == HRcINVALID);
821 continue;
824 /* The spill slots are 64 bits in size. As per the comment on
825 definition of HRegClass in host_generic_regs.h, that means,
826 to spill a vreg of class Flt64 or Vec128, we'll need to find
827 two adjacent spill slots to use. For Vec256, we'll need to
828 find four adjacent slots to use. Note, this logic needs to
829 kept in sync with the size info on the definition of
830 HRegClass. */
831 Int ss_no = -1;
832 switch (vreg_lrs[j].reg_class) {
834 case HRcVec128: case HRcFlt64:
835 /* Find two adjacent free slots in which between them
836 provide up to 128 bits in which to spill the vreg.
837 Since we are trying to find an even:odd pair, move
838 along in steps of 2 (slots). */
839 for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2)
840 if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after
841 && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after)
842 break;
843 if (ss_no >= N_SPILL64S-1) {
844 vpanic("LibVEX_N_SPILL_BYTES is too low. "
845 "Increase and recompile.");
847 ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before;
848 ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before;
849 break;
851 default:
852 /* The ordinary case -- just find a single spill slot. */
853 /* Find the lowest-numbered spill slot which is available
854 at the start point of this interval, and assign the
855 interval to it. */
856 for (ss_no = 0; ss_no < N_SPILL64S; ss_no++)
857 if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after)
858 break;
859 if (ss_no == N_SPILL64S) {
860 vpanic("LibVEX_N_SPILL_BYTES is too low. "
861 "Increase and recompile.");
863 ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before;
864 break;
866 } /* switch (vreg_lrs[j].reg_class) */
868 /* This reflects LibVEX's hard-wired knowledge of the baseBlock
869 layout: the guest state, then two equal sized areas following
870 it for two sets of shadow state, and then the spill area. */
871 vreg_lrs[j].spill_offset = toShort(con->guest_sizeB * 3 + ss_no * 8);
873 /* Independent check that we've made a sane choice of slot */
874 sanity_check_spill_offset( &vreg_lrs[j] );
875 /* if (j > max_ss_no) */
876 /* max_ss_no = j; */
879 if (0) {
880 vex_printf("\n\n");
881 for (Int j = 0; j < n_vregs; j++)
882 vex_printf("vreg %d --> spill offset %d\n",
883 j, vreg_lrs[j].spill_offset);
886 /* --------- Stage 4: establish rreg preferences --------- */
888 /* It may be advantageous to allocating certain vregs to specific
889 rregs, as a way of avoiding reg-reg moves later. Here we
890 establish which, if any, rreg each vreg would prefer to be in.
891 Note that this constrains the allocator -- ideally we end up
892 with as few as possible vregs expressing a preference.
894 This is an optimisation: if the .preferred_rreg field is never
895 set to anything different from INVALID_HREG, the allocator still
896 works. */
898 /* 30 Dec 04: removed this mechanism as it does not seem to
899 help. */
901 /* --------- Stage 5: process instructions --------- */
903 /* This is the main loop of the allocator. First, we need to
904 correctly set up our running state, which tracks the status of
905 each real register. */
907 /* ------ BEGIN: Process each insn in turn. ------ */
909 for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
911 if (DEBUG_REGALLOC) {
912 vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
913 vex_printf("---- ");
914 con->ppInstr(instrs_in->arr[ii], con->mode64);
915 vex_printf("\n\nInitial state:\n");
916 PRINT_STATE;
917 vex_printf("\n");
920 /* ------------ Sanity checks ------------ */
922 /* Sanity checks are expensive. So they are done only once
923 every 17 instructions, and just before the last
924 instruction. */
925 do_sanity_check
926 = toBool(
927 False /* Set to True for sanity checking of all insns. */
928 || ii == instrs_in->arr_used-1
929 || (ii > 0 && (ii % 17) == 0)
932 if (do_sanity_check) {
934 /* Sanity check 1: all rregs with a hard live range crossing
935 this insn must be marked as unavailable in the running
936 state. */
937 for (Int j = 0; j < rreg_lrs_used; j++) {
938 if (rreg_lrs_la[j].live_after < ii
939 && ii < rreg_lrs_la[j].dead_before) {
940 /* ii is the middle of a hard live range for some real
941 reg. Check it's marked as such in the running
942 state. */
943 HReg reg = rreg_lrs_la[j].rreg;
945 if (0) {
946 vex_printf("considering la %d .. db %d reg = ",
947 rreg_lrs_la[j].live_after,
948 rreg_lrs_la[j].dead_before);
949 con->ppReg(reg);
950 vex_printf("\n");
953 /* assert that this rreg is marked as unavailable */
954 vassert(!hregIsVirtual(reg));
955 vassert(rreg_state[hregIndex(reg)].disp == Unavail);
959 /* Sanity check 2: conversely, all rregs marked as
960 unavailable in the running rreg_state must have a
961 corresponding hard live range entry in the rreg_lrs
962 array. */
963 for (Int j = 0; j < n_rregs; j++) {
964 vassert(rreg_state[j].disp == Bound
965 || rreg_state[j].disp == Free
966 || rreg_state[j].disp == Unavail);
967 if (rreg_state[j].disp != Unavail)
968 continue;
969 Int k;
970 for (k = 0; k < rreg_lrs_used; k++) {
971 HReg reg = rreg_lrs_la[k].rreg;
972 vassert(!hregIsVirtual(reg));
973 if (hregIndex(reg) == j
974 && rreg_lrs_la[k].live_after < ii
975 && ii < rreg_lrs_la[k].dead_before)
976 break;
978 /* If this vassertion fails, we couldn't find a
979 corresponding HLR. */
980 vassert(k < rreg_lrs_used);
983 /* Sanity check 3: all vreg-rreg bindings must bind registers
984 of the same class. */
985 for (Int j = 0; j < n_rregs; j++) {
986 if (rreg_state[j].disp != Bound) {
987 vassert(rreg_state[j].eq_spill_slot == False);
988 continue;
990 vassert(hregClass(con->univ->regs[j])
991 == hregClass(rreg_state[j].vreg));
992 vassert( hregIsVirtual(rreg_state[j].vreg));
995 /* Sanity check 4: the vreg_state and rreg_state
996 mutually-redundant mappings are consistent. If
997 rreg_state[j].vreg points at some vreg_state entry then
998 that vreg_state entry should point back at
999 rreg_state[j]. */
1000 for (Int j = 0; j < n_rregs; j++) {
1001 if (rreg_state[j].disp != Bound)
1002 continue;
1003 Int k = hregIndex(rreg_state[j].vreg);
1004 vassert(IS_VALID_VREGNO(k));
1005 vassert(vreg_state[k] == j);
1007 for (Int j = 0; j < n_vregs; j++) {
1008 Int k = vreg_state[j];
1009 if (k == INVALID_RREG_NO)
1010 continue;
1011 vassert(IS_VALID_RREGNO(k));
1012 vassert(rreg_state[k].disp == Bound);
1013 vassert(hregIndex(rreg_state[k].vreg) == j);
1016 } /* if (do_sanity_check) */
1018 /* ------------ end of Sanity checks ------------ */
1020 /* Do various optimisations pertaining to register coalescing
1021 and preferencing:
1022 MOV v <-> v coalescing (done here).
1023 MOV v <-> r coalescing (not yet, if ever)
1025 /* If doing a reg-reg move between two vregs, and the src's live
1026 range ends here and the dst's live range starts here, bind
1027 the dst to the src's rreg, and that's all. */
1028 if (reg_usage_arr[ii].isVregVregMove) {
1029 HReg vregS = reg_usage_arr[ii].regMoveSrc;
1030 HReg vregD = reg_usage_arr[ii].regMoveDst;
1031 /* Check that |isVregVregMove| is not telling us a bunch of lies ... */
1032 vassert(hregClass(vregS) == hregClass(vregD));
1033 Int k = hregIndex(vregS);
1034 Int m = hregIndex(vregD);
1035 vassert(IS_VALID_VREGNO(k));
1036 vassert(IS_VALID_VREGNO(m));
1037 if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1038 if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
1039 if (DEBUG_REGALLOC) {
1040 vex_printf("COALESCE ");
1041 con->ppReg(vregS);
1042 vex_printf(" -> ");
1043 con->ppReg(vregD);
1044 vex_printf("\n\n");
1046 /* Find the state entry for vregS. */
1047 Int n = vreg_state[k]; /* k is the index of vregS */
1048 if (n == INVALID_RREG_NO) {
1049 /* vregS is not currently in a real register. So we can't
1050 do the coalescing. Give up. */
1051 goto cannot_coalesce;
1053 vassert(IS_VALID_RREGNO(n));
1055 /* Finally, we can do the coalescing. It's trivial -- merely
1056 claim vregS's register for vregD. */
1057 rreg_state[n].vreg = vregD;
1058 vassert(IS_VALID_VREGNO(hregIndex(vregD)));
1059 vassert(IS_VALID_VREGNO(hregIndex(vregS)));
1060 vreg_state[hregIndex(vregD)] = toShort(n);
1061 vreg_state[hregIndex(vregS)] = INVALID_RREG_NO;
1063 /* This rreg has become associated with a different vreg and
1064 hence with a different spill slot. Play safe. */
1065 rreg_state[n].eq_spill_slot = False;
1067 /* Move on to the next insn. We skip the post-insn stuff for
1068 fixed registers, since this move should not interact with
1069 them in any way. */
1070 continue;
1072 cannot_coalesce:
1074 /* ------ Free up rregs bound to dead vregs ------ */
1076 /* Look for vregs whose live range has just ended, and
1077 mark the associated rreg as free. */
1079 for (Int j = 0; j < n_rregs; j++) {
1080 if (rreg_state[j].disp != Bound)
1081 continue;
1082 UInt vregno = hregIndex(rreg_state[j].vreg);
1083 vassert(IS_VALID_VREGNO(vregno));
1084 if (vreg_lrs[vregno].dead_before <= ii) {
1085 rreg_state[j].disp = Free;
1086 rreg_state[j].eq_spill_slot = False;
1087 Int m = hregIndex(rreg_state[j].vreg);
1088 vassert(IS_VALID_VREGNO(m));
1089 vreg_state[m] = INVALID_RREG_NO;
1090 if (DEBUG_REGALLOC) {
1091 vex_printf("free up ");
1092 con->ppReg(con->univ->regs[j]);
1093 vex_printf("\n");
1098 /* ------ Pre-instruction actions for fixed rreg uses ------ */
1100 /* Now we have to deal with rregs which are about to be made
1101 live by this instruction -- in other words, are entering into
1102 one of their live ranges. If any such rreg holds a vreg, we
1103 will have to free up the rreg. The simplest solution which
1104 is correct is to spill the rreg.
1106 Note we could do better:
1107 * Could move it into some other free rreg, if one is available
1109 Do this efficiently, by incrementally stepping along an array
1110 of rreg HLRs that are known to be sorted by start point
1111 (their .live_after field).
1113 while (True) {
1114 vassert(rreg_lrs_la_next >= 0);
1115 vassert(rreg_lrs_la_next <= rreg_lrs_used);
1116 if (rreg_lrs_la_next == rreg_lrs_used)
1117 break; /* no more real reg live ranges to consider */
1118 if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1119 break; /* next live range does not yet start */
1120 vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1121 /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1122 Find the associated rreg_state entry. */
1123 /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1124 Real register live ranges are guaranteed to be well-formed
1125 in that they start with a write to the register -- Stage 2
1126 rejects any code not satisfying this. So the correct
1127 question to ask is whether
1128 rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1129 whether the reg becomes live after this insn -- rather
1130 than before it. */
1131 if (DEBUG_REGALLOC) {
1132 vex_printf("need to free up rreg: ");
1133 con->ppReg(rreg_lrs_la[rreg_lrs_la_next].rreg);
1134 vex_printf("\n\n");
1136 Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg);
1138 /* If this fails, we don't have an entry for this rreg.
1139 Which we should. */
1140 vassert(IS_VALID_RREGNO(k));
1141 Int m = hregIndex(rreg_state[k].vreg);
1142 if (rreg_state[k].disp == Bound) {
1143 /* Yes, there is an associated vreg. Spill it if it's
1144 still live. */
1145 vassert(IS_VALID_VREGNO(m));
1146 vreg_state[m] = INVALID_RREG_NO;
1147 if (vreg_lrs[m].dead_before > ii) {
1148 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1149 if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
1150 HInstr* spill1 = NULL;
1151 HInstr* spill2 = NULL;
1152 con->genSpill(&spill1, &spill2, con->univ->regs[k],
1153 vreg_lrs[m].spill_offset, con->mode64);
1154 vassert(spill1 || spill2); /* can't both be NULL */
1155 if (spill1)
1156 EMIT_INSTR(spill1);
1157 if (spill2)
1158 EMIT_INSTR(spill2);
1160 rreg_state[k].eq_spill_slot = True;
1163 rreg_state[k].disp = Unavail;
1164 rreg_state[k].vreg = INVALID_HREG;
1165 rreg_state[k].eq_spill_slot = False;
1167 /* check for further rregs entering HLRs at this point */
1168 rreg_lrs_la_next++;
1171 if (DEBUG_REGALLOC) {
1172 vex_printf("After pre-insn actions for fixed regs:\n");
1173 PRINT_STATE;
1174 vex_printf("\n");
1177 /* ------ Deal with the current instruction. ------ */
1179 /* Finally we can begin the processing of this instruction
1180 itself. The aim is to free up enough rregs for this insn.
1181 This may generate spill stores since we may have to evict
1182 some vregs currently in rregs. Also generates spill loads.
1183 We also build up the final vreg->rreg mapping to be applied
1184 to the insn. */
1186 initHRegRemap(&remap);
1188 /* ------------ BEGIN directReload optimisation ----------- */
1190 /* If the instruction reads exactly one vreg which is currently
1191 in a spill slot, and this is last use of that vreg, see if we
1192 can convert the instruction into one that reads directly from
1193 the spill slot. This is clearly only possible for x86 and
1194 amd64 targets, since ppc and arm are load-store
1195 architectures. If successful, replace instrs_in->arr[ii]
1196 with this new instruction, and recompute its reg usage, so
1197 that the change is invisible to the standard-case handling
1198 that follows. */
1200 if (con->directReload != NULL && reg_usage_arr[ii].n_vRegs <= 2) {
1201 Bool debug_direct_reload = False;
1202 HReg cand = INVALID_HREG;
1203 Bool nreads = 0;
1204 Short spilloff = 0;
1206 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
1208 HReg vreg = reg_usage_arr[ii].vRegs[j];
1209 vassert(hregIsVirtual(vreg));
1211 if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1212 nreads++;
1213 Int m = hregIndex(vreg);
1214 vassert(IS_VALID_VREGNO(m));
1215 Int k = vreg_state[m];
1216 if (!IS_VALID_RREGNO(k)) {
1217 /* ok, it is spilled. Now, is this its last use? */
1218 vassert(vreg_lrs[m].dead_before >= ii+1);
1219 if (vreg_lrs[m].dead_before == ii+1
1220 && hregIsInvalid(cand)) {
1221 spilloff = vreg_lrs[m].spill_offset;
1222 cand = vreg;
1228 if (nreads == 1 && ! hregIsInvalid(cand)) {
1229 HInstr* reloaded;
1230 if (reg_usage_arr[ii].n_vRegs == 2)
1231 vassert(! sameHReg(reg_usage_arr[ii].vRegs[0],
1232 reg_usage_arr[ii].vRegs[1]));
1234 reloaded = con->directReload(instrs_in->arr[ii], cand, spilloff);
1235 if (debug_direct_reload && !reloaded) {
1236 vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1237 con->ppInstr(instrs_in->arr[ii], con->mode64);
1239 if (reloaded) {
1240 /* Update info about the insn, so it looks as if it had
1241 been in this form all along. */
1242 instrs_in->arr[ii] = reloaded;
1243 con->getRegUsage(&reg_usage_arr[ii], instrs_in->arr[ii],
1244 con->mode64);
1245 if (debug_direct_reload && !reloaded) {
1246 vex_printf(" --> ");
1247 con->ppInstr(reloaded, con->mode64);
1251 if (debug_direct_reload && !reloaded)
1252 vex_printf("\n");
1257 /* ------------ END directReload optimisation ------------ */
1259 /* for each virtual reg mentioned in the insn ... */
1260 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
1262 HReg vreg = reg_usage_arr[ii].vRegs[j];
1263 vassert(hregIsVirtual(vreg));
1265 if (0) {
1266 vex_printf("considering "); con->ppReg(vreg); vex_printf("\n");
1269 /* Now we're trying to find a rreg for "vreg". First of all,
1270 if it already has an rreg assigned, we don't need to do
1271 anything more. Inspect the current state to find out. */
1272 Int m = hregIndex(vreg);
1273 vassert(IS_VALID_VREGNO(m));
1274 Int n = vreg_state[m];
1275 if (IS_VALID_RREGNO(n)) {
1276 vassert(rreg_state[n].disp == Bound);
1277 addToHRegRemap(&remap, vreg, con->univ->regs[n]);
1278 /* If this rreg is written or modified, mark it as different
1279 from any spill slot value. */
1280 if (reg_usage_arr[ii].vMode[j] != HRmRead)
1281 rreg_state[n].eq_spill_slot = False;
1282 continue;
1283 } else {
1284 vassert(n == INVALID_RREG_NO);
1287 /* No luck. The next thing to do is see if there is a
1288 currently free rreg available, of the correct class. If
1289 so, bag it. NOTE, we could improve this by selecting an
1290 rreg for which the next live-range event is as far ahead
1291 as possible. */
1292 Int k_suboptimal = -1;
1293 Int k;
1294 for (k = con->univ->allocable_start[hregClass(vreg)];
1295 k <= con->univ->allocable_end[hregClass(vreg)]; k++) {
1296 if (rreg_state[k].disp != Free)
1297 continue;
1298 if (rreg_state[k].has_hlrs) {
1299 /* Well, at least we can use k_suboptimal if we really
1300 have to. Keep on looking for a better candidate. */
1301 k_suboptimal = k;
1302 } else {
1303 /* Found a preferable reg. Use it. */
1304 k_suboptimal = -1;
1305 break;
1308 if (k_suboptimal >= 0)
1309 k = k_suboptimal;
1311 if (k <= con->univ->allocable_end[hregClass(vreg)]) {
1312 rreg_state[k].disp = Bound;
1313 rreg_state[k].vreg = vreg;
1314 Int p = hregIndex(vreg);
1315 vassert(IS_VALID_VREGNO(p));
1316 vreg_state[p] = toShort(k);
1317 addToHRegRemap(&remap, vreg, con->univ->regs[k]);
1318 /* Generate a reload if needed. This only creates needed
1319 reloads because the live range builder for vregs will
1320 guarantee that the first event for a vreg is a write.
1321 Hence, if this reference is not a write, it cannot be
1322 the first reference for this vreg, and so a reload is
1323 indeed needed. */
1324 if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
1325 vassert(vreg_lrs[p].reg_class != HRcINVALID);
1326 HInstr* reload1 = NULL;
1327 HInstr* reload2 = NULL;
1328 con->genReload(&reload1, &reload2, con->univ->regs[k],
1329 vreg_lrs[p].spill_offset, con->mode64);
1330 vassert(reload1 || reload2); /* can't both be NULL */
1331 if (reload1)
1332 EMIT_INSTR(reload1);
1333 if (reload2)
1334 EMIT_INSTR(reload2);
1335 /* This rreg is read or modified by the instruction.
1336 If it's merely read we can claim it now equals the
1337 spill slot, but not so if it is modified. */
1338 if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1339 rreg_state[k].eq_spill_slot = True;
1340 } else {
1341 vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
1342 rreg_state[k].eq_spill_slot = False;
1344 } else {
1345 rreg_state[k].eq_spill_slot = False;
1348 continue;
1351 /* Well, now we have no option but to spill a vreg. It's
1352 important to make a good choice of vreg to spill, and of
1353 course we need to be careful not to spill a vreg which is
1354 needed by this insn. */
1356 /* First, mark in the rreg_state, those rregs which are not spill
1357 candidates, due to holding a vreg mentioned by this
1358 instruction. Or being of the wrong class. */
1359 for (k = con->univ->allocable_start[hregClass(vreg)];
1360 k <= con->univ->allocable_end[hregClass(vreg)]; k++) {
1361 rreg_state[k].is_spill_cand = False;
1362 if (rreg_state[k].disp != Bound)
1363 continue;
1364 rreg_state[k].is_spill_cand = True;
1365 /* Note, the following loop visits only the virtual regs
1366 mentioned by the instruction. */
1367 for (m = 0; m < reg_usage_arr[ii].n_vRegs; m++) {
1368 if (sameHReg(rreg_state[k].vreg, reg_usage_arr[ii].vRegs[m])) {
1369 rreg_state[k].is_spill_cand = False;
1370 break;
1375 /* We can choose to spill any rreg satisfying
1376 rreg_state[r].is_spill_cand (so to speak). Choose r so that
1377 the next use of its associated vreg is as far ahead as
1378 possible, in the hope that this will minimise the number
1379 of consequent reloads required. */
1380 Int spillee
1381 = findMostDistantlyMentionedVReg (
1382 reg_usage_arr, ii+1, instrs_in->arr_used, rreg_state,
1383 hregClass(vreg), con);
1385 if (spillee == -1) {
1386 /* Hmmmmm. There don't appear to be any spill candidates.
1387 We're hosed. */
1388 vex_printf("reg_alloc: can't find a register in class: ");
1389 ppHRegClass(hregClass(vreg));
1390 vex_printf("\n");
1391 vpanic("reg_alloc: can't create a free register.");
1394 /* Right. So we're going to spill rreg_state[spillee]. */
1395 vassert(IS_VALID_RREGNO(spillee));
1396 vassert(rreg_state[spillee].disp == Bound);
1397 /* check it's the right class */
1398 vassert(hregClass(con->univ->regs[spillee]) == hregClass(vreg));
1399 /* check we're not ejecting the vreg for which we are trying
1400 to free up a register. */
1401 vassert(! sameHReg(rreg_state[spillee].vreg, vreg));
1403 m = hregIndex(rreg_state[spillee].vreg);
1404 vassert(IS_VALID_VREGNO(m));
1406 /* So here's the spill store. Assert that we're spilling a
1407 live vreg. */
1408 vassert(vreg_lrs[m].dead_before > ii);
1409 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1410 if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
1411 HInstr* spill1 = NULL;
1412 HInstr* spill2 = NULL;
1413 con->genSpill(&spill1, &spill2, con->univ->regs[spillee],
1414 vreg_lrs[m].spill_offset, con->mode64);
1415 vassert(spill1 || spill2); /* can't both be NULL */
1416 if (spill1)
1417 EMIT_INSTR(spill1);
1418 if (spill2)
1419 EMIT_INSTR(spill2);
1422 /* Update the rreg_state to reflect the new assignment for this
1423 rreg. */
1424 rreg_state[spillee].vreg = vreg;
1425 vreg_state[m] = INVALID_RREG_NO;
1427 rreg_state[spillee].eq_spill_slot = False; /* be safe */
1429 m = hregIndex(vreg);
1430 vassert(IS_VALID_VREGNO(m));
1431 vreg_state[m] = toShort(spillee);
1433 /* Now, if this vreg is being read or modified (as opposed to
1434 written), we have to generate a reload for it. */
1435 if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
1436 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1437 HInstr* reload1 = NULL;
1438 HInstr* reload2 = NULL;
1439 con->genReload(&reload1, &reload2, con->univ->regs[spillee],
1440 vreg_lrs[m].spill_offset, con->mode64);
1441 vassert(reload1 || reload2); /* can't both be NULL */
1442 if (reload1)
1443 EMIT_INSTR(reload1);
1444 if (reload2)
1445 EMIT_INSTR(reload2);
1446 /* This rreg is read or modified by the instruction.
1447 If it's merely read we can claim it now equals the
1448 spill slot, but not so if it is modified. */
1449 if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1450 rreg_state[spillee].eq_spill_slot = True;
1451 } else {
1452 vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
1453 rreg_state[spillee].eq_spill_slot = False;
1457 /* So after much twisting and turning, we have vreg mapped to
1458 rreg_state[spillee].rreg. Note that in the map. */
1459 addToHRegRemap(&remap, vreg, con->univ->regs[spillee]);
1461 } /* iterate over virtual registers in this instruction. */
1463 /* We've finished clowning around with registers in this instruction.
1464 Three results:
1465 - the running rreg_state[] has been updated
1466 - a suitable vreg->rreg mapping for this instruction has been
1467 constructed
1468 - spill and reload instructions may have been emitted.
1470 The final step is to apply the mapping to the instruction,
1471 and emit that.
1474 /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
1475 con->mapRegs(&remap, instrs_in->arr[ii], con->mode64);
1476 EMIT_INSTR( instrs_in->arr[ii] );
1478 if (DEBUG_REGALLOC) {
1479 vex_printf("After dealing with current insn:\n");
1480 PRINT_STATE;
1481 vex_printf("\n");
1484 /* ------ Post-instruction actions for fixed rreg uses ------ */
1486 /* Now we need to check for rregs exiting fixed live ranges
1487 after this instruction, and if so mark them as free. */
1488 while (True) {
1489 vassert(rreg_lrs_db_next >= 0);
1490 vassert(rreg_lrs_db_next <= rreg_lrs_used);
1491 if (rreg_lrs_db_next == rreg_lrs_used)
1492 break; /* no more real reg live ranges to consider */
1493 if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1494 break; /* next live range does not yet start */
1495 vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1496 /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1497 range. Mark it as such in the main rreg_state array. */
1498 HReg reg = rreg_lrs_db[rreg_lrs_db_next].rreg;
1499 vassert(!hregIsVirtual(reg));
1500 Int k = hregIndex(reg);
1501 vassert(IS_VALID_RREGNO(k));
1502 vassert(rreg_state[k].disp == Unavail);
1503 rreg_state[k].disp = Free;
1504 rreg_state[k].vreg = INVALID_HREG;
1505 rreg_state[k].eq_spill_slot = False;
1507 /* check for further rregs leaving HLRs at this point */
1508 rreg_lrs_db_next++;
1511 if (DEBUG_REGALLOC) {
1512 vex_printf("After post-insn actions for fixed regs:\n");
1513 PRINT_STATE;
1514 vex_printf("\n");
1517 } /* iterate over insns */
1519 /* ------ END: Process each insn in turn. ------ */
1521 /* free(rreg_state); */
1522 /* free(rreg_lrs); */
1523 /* if (vreg_lrs) free(vreg_lrs); */
1525 /* Paranoia */
1526 vassert(rreg_lrs_la_next == rreg_lrs_used);
1527 vassert(rreg_lrs_db_next == rreg_lrs_used);
1529 return instrs_out;
1531 # undef INVALID_INSTRNO
1532 # undef EMIT_INSTR
1533 # undef PRINT_STATE
1538 /*---------------------------------------------------------------*/
1539 /*--- host_reg_alloc2.c ---*/
1540 /*---------------------------------------------------------------*/