1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened
, "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened
, "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened
, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened
, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved
, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed
,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed
,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM
, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM
, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR
, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR
, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt
: public MachineFunctionPass
{
60 ARMLoadStoreOpt() : MachineFunctionPass(ID
) {}
62 const TargetInstrInfo
*TII
;
63 const TargetRegisterInfo
*TRI
;
68 virtual bool runOnMachineFunction(MachineFunction
&Fn
);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry
{
80 MachineBasicBlock::iterator MBBI
;
82 MemOpQueueEntry(int o
, unsigned r
, bool k
, unsigned p
,
83 MachineBasicBlock::iterator i
)
84 : Offset(o
), Reg(r
), isKill(k
), Position(p
), MBBI(i
), Merged(false) {}
86 typedef SmallVector
<MemOpQueueEntry
,8> MemOpQueue
;
87 typedef MemOpQueue::iterator MemOpQueueIter
;
89 bool MergeOps(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
,
90 int Offset
, unsigned Base
, bool BaseKill
, int Opcode
,
91 ARMCC::CondCodes Pred
, unsigned PredReg
, unsigned Scratch
,
92 DebugLoc dl
, SmallVector
<std::pair
<unsigned, bool>, 8> &Regs
);
93 void MergeOpsUpdate(MachineBasicBlock
&MBB
,
102 ARMCC::CondCodes Pred
,
106 SmallVector
<MachineBasicBlock::iterator
, 4> &Merges
);
107 void MergeLDR_STR(MachineBasicBlock
&MBB
, unsigned SIndex
, unsigned Base
,
108 int Opcode
, unsigned Size
,
109 ARMCC::CondCodes Pred
, unsigned PredReg
,
110 unsigned Scratch
, MemOpQueue
&MemOps
,
111 SmallVector
<MachineBasicBlock::iterator
, 4> &Merges
);
113 void AdvanceRS(MachineBasicBlock
&MBB
, MemOpQueue
&MemOps
);
114 bool FixInvalidRegPairOp(MachineBasicBlock
&MBB
,
115 MachineBasicBlock::iterator
&MBBI
);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock
&MBB
,
117 MachineBasicBlock::iterator MBBI
,
118 const TargetInstrInfo
*TII
,
120 MachineBasicBlock::iterator
&I
);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock
&MBB
,
122 MachineBasicBlock::iterator MBBI
,
124 MachineBasicBlock::iterator
&I
);
125 bool LoadStoreMultipleOpti(MachineBasicBlock
&MBB
);
126 bool MergeReturnIntoLDM(MachineBasicBlock
&MBB
);
128 char ARMLoadStoreOpt::ID
= 0;
131 static int getLoadStoreMultipleOpcode(int Opcode
) {
159 default: llvm_unreachable("Unhandled opcode!");
164 static bool isT2i32Load(unsigned Opc
) {
165 return Opc
== ARM::t2LDRi12
|| Opc
== ARM::t2LDRi8
;
168 static bool isi32Load(unsigned Opc
) {
169 return Opc
== ARM::LDR
|| isT2i32Load(Opc
);
172 static bool isT2i32Store(unsigned Opc
) {
173 return Opc
== ARM::t2STRi12
|| Opc
== ARM::t2STRi8
;
176 static bool isi32Store(unsigned Opc
) {
177 return Opc
== ARM::STR
|| isT2i32Store(Opc
);
180 /// MergeOps - Create and insert a LDM or STM with Base as base register and
181 /// registers in Regs as the register operands that would be loaded / stored.
182 /// It returns true if the transformation is done.
184 ARMLoadStoreOpt::MergeOps(MachineBasicBlock
&MBB
,
185 MachineBasicBlock::iterator MBBI
,
186 int Offset
, unsigned Base
, bool BaseKill
,
187 int Opcode
, ARMCC::CondCodes Pred
,
188 unsigned PredReg
, unsigned Scratch
, DebugLoc dl
,
189 SmallVector
<std::pair
<unsigned, bool>, 8> &Regs
) {
190 // Only a single register to load / store. Don't bother.
191 unsigned NumRegs
= Regs
.size();
195 ARM_AM::AMSubMode Mode
= ARM_AM::ia
;
196 // VFP and Thumb2 do not support IB or DA modes.
197 bool isNotVFP
= isi32Load(Opcode
) || isi32Store(Opcode
);
198 bool haveIBAndDA
= isNotVFP
&& !isThumb2
;
199 if (Offset
== 4 && haveIBAndDA
)
201 else if (Offset
== -4 * (int)NumRegs
+ 4 && haveIBAndDA
)
203 else if (Offset
== -4 * (int)NumRegs
&& isNotVFP
)
204 // VLDM/VSTM do not support DB mode without also updating the base reg.
206 else if (Offset
!= 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode
))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase
= Regs
[NumRegs
-1].first
;
219 // Use the scratch register to use as a new base.
224 int BaseOpc
= !isThumb2
226 : ((Base
== ARM::SP
) ? ARM::t2ADDrSPi
: ARM::t2ADDri
);
230 : ((Base
== ARM::SP
) ? ARM::t2SUBrSPi
: ARM::t2SUBri
);
233 int ImmedOffset
= isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset
) : ARM_AM::getSOImmVal(Offset
);
235 if (ImmedOffset
== -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB
, MBBI
, dl
, TII
->get(BaseOpc
), NewBase
)
240 .addReg(Base
, getKillRegState(BaseKill
)).addImm(Offset
)
241 .addImm(Pred
).addReg(PredReg
).addReg(0);
243 BaseKill
= true; // New base is always killed right its use.
246 bool isDef
= (isi32Load(Opcode
) || Opcode
== ARM::VLDRS
||
247 Opcode
== ARM::VLDRD
);
248 Opcode
= getLoadStoreMultipleOpcode(Opcode
);
249 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, dl
, TII
->get(Opcode
))
250 .addReg(Base
, getKillRegState(BaseKill
))
251 .addImm(ARM_AM::getAM4ModeImm(Mode
)).addImm(Pred
).addReg(PredReg
);
252 for (unsigned i
= 0; i
!= NumRegs
; ++i
)
253 MIB
= MIB
.addReg(Regs
[i
].first
, getDefRegState(isDef
)
254 | getKillRegState(Regs
[i
].second
));
259 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
261 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock
&MBB
,
263 unsigned memOpsBegin
, unsigned memOpsEnd
,
264 unsigned insertAfter
, int Offset
,
265 unsigned Base
, bool BaseKill
,
267 ARMCC::CondCodes Pred
, unsigned PredReg
,
270 SmallVector
<MachineBasicBlock::iterator
, 4> &Merges
) {
271 // First calculate which of the registers should be killed by the merged
273 const unsigned insertPos
= memOps
[insertAfter
].Position
;
275 SmallSet
<unsigned, 4> UnavailRegs
;
276 SmallSet
<unsigned, 4> KilledRegs
;
277 DenseMap
<unsigned, unsigned> Killer
;
278 for (unsigned i
= 0; i
< memOpsBegin
; ++i
) {
279 if (memOps
[i
].Position
< insertPos
&& memOps
[i
].isKill
) {
280 unsigned Reg
= memOps
[i
].Reg
;
281 if (memOps
[i
].Merged
)
282 UnavailRegs
.insert(Reg
);
284 KilledRegs
.insert(Reg
);
289 for (unsigned i
= memOpsEnd
, e
= memOps
.size(); i
!= e
; ++i
) {
290 if (memOps
[i
].Position
< insertPos
&& memOps
[i
].isKill
) {
291 unsigned Reg
= memOps
[i
].Reg
;
292 KilledRegs
.insert(Reg
);
297 SmallVector
<std::pair
<unsigned, bool>, 8> Regs
;
298 for (unsigned i
= memOpsBegin
; i
< memOpsEnd
; ++i
) {
299 unsigned Reg
= memOps
[i
].Reg
;
300 if (UnavailRegs
.count(Reg
))
301 // Register is killed before and it's not easy / possible to update the
302 // kill marker on already merged instructions. Abort.
305 // If we are inserting the merged operation after an unmerged operation that
306 // uses the same register, make sure to transfer any kill flag.
307 bool isKill
= memOps
[i
].isKill
|| KilledRegs
.count(Reg
);
308 Regs
.push_back(std::make_pair(Reg
, isKill
));
311 // Try to do the merge.
312 MachineBasicBlock::iterator Loc
= memOps
[insertAfter
].MBBI
;
314 if (!MergeOps(MBB
, Loc
, Offset
, Base
, BaseKill
, Opcode
,
315 Pred
, PredReg
, Scratch
, dl
, Regs
))
318 // Merge succeeded, update records.
319 Merges
.push_back(prior(Loc
));
320 for (unsigned i
= memOpsBegin
; i
< memOpsEnd
; ++i
) {
321 // Remove kill flags from any unmerged memops that come before insertPos.
322 if (Regs
[i
-memOpsBegin
].second
) {
323 unsigned Reg
= Regs
[i
-memOpsBegin
].first
;
324 if (KilledRegs
.count(Reg
)) {
325 unsigned j
= Killer
[Reg
];
326 memOps
[j
].MBBI
->getOperand(0).setIsKill(false);
327 memOps
[j
].isKill
= false;
330 MBB
.erase(memOps
[i
].MBBI
);
331 memOps
[i
].Merged
= true;
335 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
336 /// load / store multiple instructions.
338 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock
&MBB
, unsigned SIndex
,
339 unsigned Base
, int Opcode
, unsigned Size
,
340 ARMCC::CondCodes Pred
, unsigned PredReg
,
341 unsigned Scratch
, MemOpQueue
&MemOps
,
342 SmallVector
<MachineBasicBlock::iterator
, 4> &Merges
) {
343 bool isNotVFP
= isi32Load(Opcode
) || isi32Store(Opcode
);
344 int Offset
= MemOps
[SIndex
].Offset
;
345 int SOffset
= Offset
;
346 unsigned insertAfter
= SIndex
;
347 MachineBasicBlock::iterator Loc
= MemOps
[SIndex
].MBBI
;
348 DebugLoc dl
= Loc
->getDebugLoc();
349 const MachineOperand
&PMO
= Loc
->getOperand(0);
350 unsigned PReg
= PMO
.getReg();
351 unsigned PRegNum
= PMO
.isUndef() ? UINT_MAX
352 : ARMRegisterInfo::getRegisterNumbering(PReg
);
355 for (unsigned i
= SIndex
+1, e
= MemOps
.size(); i
!= e
; ++i
) {
356 int NewOffset
= MemOps
[i
].Offset
;
357 const MachineOperand
&MO
= MemOps
[i
].MBBI
->getOperand(0);
358 unsigned Reg
= MO
.getReg();
359 unsigned RegNum
= MO
.isUndef() ? UINT_MAX
360 : ARMRegisterInfo::getRegisterNumbering(Reg
);
361 // Register numbers must be in ascending order. For VFP, the registers
362 // must also be consecutive and there is a limit of 16 double-word
363 // registers per instruction.
364 if (Reg
!= ARM::SP
&&
365 NewOffset
== Offset
+ (int)Size
&&
366 ((isNotVFP
&& RegNum
> PRegNum
)
367 || ((Size
< 8 || Count
< 16) && RegNum
== PRegNum
+1))) {
372 // Can't merge this in. Try merge the earlier ones first.
373 MergeOpsUpdate(MBB
, MemOps
, SIndex
, i
, insertAfter
, SOffset
,
374 Base
, false, Opcode
, Pred
, PredReg
, Scratch
, dl
, Merges
);
375 MergeLDR_STR(MBB
, i
, Base
, Opcode
, Size
, Pred
, PredReg
, Scratch
,
380 if (MemOps
[i
].Position
> MemOps
[insertAfter
].Position
)
384 bool BaseKill
= Loc
->findRegisterUseOperandIdx(Base
, true) != -1;
385 MergeOpsUpdate(MBB
, MemOps
, SIndex
, MemOps
.size(), insertAfter
, SOffset
,
386 Base
, BaseKill
, Opcode
, Pred
, PredReg
, Scratch
, dl
, Merges
);
390 static inline bool isMatchingDecrement(MachineInstr
*MI
, unsigned Base
,
391 unsigned Bytes
, unsigned Limit
,
392 ARMCC::CondCodes Pred
, unsigned PredReg
){
393 unsigned MyPredReg
= 0;
396 if (MI
->getOpcode() != ARM::t2SUBri
&&
397 MI
->getOpcode() != ARM::t2SUBrSPi
&&
398 MI
->getOpcode() != ARM::t2SUBrSPi12
&&
399 MI
->getOpcode() != ARM::tSUBspi
&&
400 MI
->getOpcode() != ARM::SUBri
)
403 // Make sure the offset fits in 8 bits.
404 if (Bytes
== 0 || (Limit
&& Bytes
>= Limit
))
407 unsigned Scale
= (MI
->getOpcode() == ARM::tSUBspi
) ? 4 : 1; // FIXME
408 return (MI
->getOperand(0).getReg() == Base
&&
409 MI
->getOperand(1).getReg() == Base
&&
410 (MI
->getOperand(2).getImm()*Scale
) == Bytes
&&
411 llvm::getInstrPredicate(MI
, MyPredReg
) == Pred
&&
412 MyPredReg
== PredReg
);
415 static inline bool isMatchingIncrement(MachineInstr
*MI
, unsigned Base
,
416 unsigned Bytes
, unsigned Limit
,
417 ARMCC::CondCodes Pred
, unsigned PredReg
){
418 unsigned MyPredReg
= 0;
421 if (MI
->getOpcode() != ARM::t2ADDri
&&
422 MI
->getOpcode() != ARM::t2ADDrSPi
&&
423 MI
->getOpcode() != ARM::t2ADDrSPi12
&&
424 MI
->getOpcode() != ARM::tADDspi
&&
425 MI
->getOpcode() != ARM::ADDri
)
428 if (Bytes
== 0 || (Limit
&& Bytes
>= Limit
))
429 // Make sure the offset fits in 8 bits.
432 unsigned Scale
= (MI
->getOpcode() == ARM::tADDspi
) ? 4 : 1; // FIXME
433 return (MI
->getOperand(0).getReg() == Base
&&
434 MI
->getOperand(1).getReg() == Base
&&
435 (MI
->getOperand(2).getImm()*Scale
) == Bytes
&&
436 llvm::getInstrPredicate(MI
, MyPredReg
) == Pred
&&
437 MyPredReg
== PredReg
);
440 static inline unsigned getLSMultipleTransferSize(MachineInstr
*MI
) {
441 switch (MI
->getOpcode()) {
463 return (MI
->getNumOperands() - 4) * 4;
467 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc
) {
469 case ARM::LDM
: return ARM::LDM_UPD
;
470 case ARM::STM
: return ARM::STM_UPD
;
471 case ARM::t2LDM
: return ARM::t2LDM_UPD
;
472 case ARM::t2STM
: return ARM::t2STM_UPD
;
473 case ARM::VLDMS
: return ARM::VLDMS_UPD
;
474 case ARM::VLDMD
: return ARM::VLDMD_UPD
;
475 case ARM::VSTMS
: return ARM::VSTMS_UPD
;
476 case ARM::VSTMD
: return ARM::VSTMD_UPD
;
477 default: llvm_unreachable("Unhandled opcode!");
482 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
483 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
485 /// stmia rn, <ra, rb, rc>
486 /// rn := rn + 4 * 3;
488 /// stmia rn!, <ra, rb, rc>
490 /// rn := rn - 4 * 3;
491 /// ldmia rn, <ra, rb, rc>
493 /// ldmdb rn!, <ra, rb, rc>
494 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock
&MBB
,
495 MachineBasicBlock::iterator MBBI
,
497 MachineBasicBlock::iterator
&I
) {
498 MachineInstr
*MI
= MBBI
;
499 unsigned Base
= MI
->getOperand(0).getReg();
500 bool BaseKill
= MI
->getOperand(0).isKill();
501 unsigned Bytes
= getLSMultipleTransferSize(MI
);
502 unsigned PredReg
= 0;
503 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MI
, PredReg
);
504 int Opcode
= MI
->getOpcode();
505 DebugLoc dl
= MI
->getDebugLoc();
507 bool DoMerge
= false;
508 ARM_AM::AMSubMode Mode
= ARM_AM::ia
;
510 // Can't use an updating ld/st if the base register is also a dest
511 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
512 for (unsigned i
= 3, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
513 if (MI
->getOperand(i
).getReg() == Base
)
516 Mode
= ARM_AM::getAM4SubMode(MI
->getOperand(1).getImm());
518 // Try merging with the previous instruction.
519 MachineBasicBlock::iterator BeginMBBI
= MBB
.begin();
520 if (MBBI
!= BeginMBBI
) {
521 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
522 while (PrevMBBI
!= BeginMBBI
&& PrevMBBI
->isDebugValue())
524 if (Mode
== ARM_AM::ia
&&
525 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
528 } else if (Mode
== ARM_AM::ib
&&
529 isMatchingDecrement(PrevMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
537 // Try merging with the next instruction.
538 MachineBasicBlock::iterator EndMBBI
= MBB
.end();
539 if (!DoMerge
&& MBBI
!= EndMBBI
) {
540 MachineBasicBlock::iterator NextMBBI
= llvm::next(MBBI
);
541 while (NextMBBI
!= EndMBBI
&& NextMBBI
->isDebugValue())
543 if ((Mode
== ARM_AM::ia
|| Mode
== ARM_AM::ib
) &&
544 isMatchingIncrement(NextMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
546 } else if ((Mode
== ARM_AM::da
|| Mode
== ARM_AM::db
) &&
547 isMatchingDecrement(NextMBBI
, Base
, Bytes
, 0, Pred
, PredReg
)) {
562 unsigned NewOpc
= getUpdatingLSMultipleOpcode(Opcode
);
563 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
))
564 .addReg(Base
, getDefRegState(true)) // WB base register
565 .addReg(Base
, getKillRegState(BaseKill
))
566 .addImm(ARM_AM::getAM4ModeImm(Mode
))
567 .addImm(Pred
).addReg(PredReg
);
568 // Transfer the rest of operands.
569 for (unsigned OpNum
= 4, e
= MI
->getNumOperands(); OpNum
!= e
; ++OpNum
)
570 MIB
.addOperand(MI
->getOperand(OpNum
));
571 // Transfer memoperands.
572 (*MIB
).setMemRefs(MI
->memoperands_begin(), MI
->memoperands_end());
578 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc
) {
580 case ARM::LDR
: return ARM::LDR_PRE
;
581 case ARM::STR
: return ARM::STR_PRE
;
582 case ARM::VLDRS
: return ARM::VLDMS_UPD
;
583 case ARM::VLDRD
: return ARM::VLDMD_UPD
;
584 case ARM::VSTRS
: return ARM::VSTMS_UPD
;
585 case ARM::VSTRD
: return ARM::VSTMD_UPD
;
588 return ARM::t2LDR_PRE
;
591 return ARM::t2STR_PRE
;
592 default: llvm_unreachable("Unhandled opcode!");
597 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc
) {
599 case ARM::LDR
: return ARM::LDR_POST
;
600 case ARM::STR
: return ARM::STR_POST
;
601 case ARM::VLDRS
: return ARM::VLDMS_UPD
;
602 case ARM::VLDRD
: return ARM::VLDMD_UPD
;
603 case ARM::VSTRS
: return ARM::VSTMS_UPD
;
604 case ARM::VSTRD
: return ARM::VSTMD_UPD
;
607 return ARM::t2LDR_POST
;
610 return ARM::t2STR_POST
;
611 default: llvm_unreachable("Unhandled opcode!");
616 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
617 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
618 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock
&MBB
,
619 MachineBasicBlock::iterator MBBI
,
620 const TargetInstrInfo
*TII
,
622 MachineBasicBlock::iterator
&I
) {
623 MachineInstr
*MI
= MBBI
;
624 unsigned Base
= MI
->getOperand(1).getReg();
625 bool BaseKill
= MI
->getOperand(1).isKill();
626 unsigned Bytes
= getLSMultipleTransferSize(MI
);
627 int Opcode
= MI
->getOpcode();
628 DebugLoc dl
= MI
->getDebugLoc();
629 bool isAM5
= (Opcode
== ARM::VLDRD
|| Opcode
== ARM::VLDRS
||
630 Opcode
== ARM::VSTRD
|| Opcode
== ARM::VSTRS
);
631 bool isAM2
= (Opcode
== ARM::LDR
|| Opcode
== ARM::STR
);
632 if (isAM2
&& ARM_AM::getAM2Offset(MI
->getOperand(3).getImm()) != 0)
634 if (isAM5
&& ARM_AM::getAM5Offset(MI
->getOperand(2).getImm()) != 0)
636 if (isT2i32Load(Opcode
) || isT2i32Store(Opcode
))
637 if (MI
->getOperand(2).getImm() != 0)
640 bool isLd
= isi32Load(Opcode
) || Opcode
== ARM::VLDRS
|| Opcode
== ARM::VLDRD
;
641 // Can't do the merge if the destination register is the same as the would-be
642 // writeback register.
643 if (isLd
&& MI
->getOperand(0).getReg() == Base
)
646 unsigned PredReg
= 0;
647 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MI
, PredReg
);
648 bool DoMerge
= false;
649 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
651 // AM2 - 12 bits, thumb2 - 8 bits.
652 unsigned Limit
= isAM5
? 0 : (isAM2
? 0x1000 : 0x100);
654 // Try merging with the previous instruction.
655 MachineBasicBlock::iterator BeginMBBI
= MBB
.begin();
656 if (MBBI
!= BeginMBBI
) {
657 MachineBasicBlock::iterator PrevMBBI
= prior(MBBI
);
658 while (PrevMBBI
!= BeginMBBI
&& PrevMBBI
->isDebugValue())
660 if (isMatchingDecrement(PrevMBBI
, Base
, Bytes
, Limit
, Pred
, PredReg
)) {
662 AddSub
= ARM_AM::sub
;
664 isMatchingIncrement(PrevMBBI
, Base
, Bytes
, Limit
,Pred
,PredReg
)) {
668 NewOpc
= getPreIndexedLoadStoreOpcode(Opcode
);
673 // Try merging with the next instruction.
674 MachineBasicBlock::iterator EndMBBI
= MBB
.end();
675 if (!DoMerge
&& MBBI
!= EndMBBI
) {
676 MachineBasicBlock::iterator NextMBBI
= llvm::next(MBBI
);
677 while (NextMBBI
!= EndMBBI
&& NextMBBI
->isDebugValue())
680 isMatchingDecrement(NextMBBI
, Base
, Bytes
, Limit
, Pred
, PredReg
)) {
682 AddSub
= ARM_AM::sub
;
683 } else if (isMatchingIncrement(NextMBBI
, Base
, Bytes
, Limit
,Pred
,PredReg
)) {
687 NewOpc
= getPostIndexedLoadStoreOpcode(Opcode
);
701 Offset
= ARM_AM::getAM4ModeImm(AddSub
== ARM_AM::sub
?
702 ARM_AM::db
: ARM_AM::ia
);
704 Offset
= ARM_AM::getAM2Opc(AddSub
, Bytes
, ARM_AM::no_shift
);
706 Offset
= AddSub
== ARM_AM::sub
? -Bytes
: Bytes
;
709 // VLDM[SD}_UPD, VSTM[SD]_UPD
710 // (There are no base-updating versions of VLDR/VSTR instructions, but the
711 // updating load/store-multiple instructions can be used with only one
713 MachineOperand
&MO
= MI
->getOperand(0);
714 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
))
715 .addReg(Base
, getDefRegState(true)) // WB base register
716 .addReg(Base
, getKillRegState(isLd
? BaseKill
: false))
718 .addImm(Pred
).addReg(PredReg
)
719 .addReg(MO
.getReg(), (isLd
? getDefRegState(true) :
720 getKillRegState(MO
.isKill())));
723 // LDR_PRE, LDR_POST,
724 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), MI
->getOperand(0).getReg())
725 .addReg(Base
, RegState::Define
)
726 .addReg(Base
).addReg(0).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
728 // t2LDR_PRE, t2LDR_POST
729 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), MI
->getOperand(0).getReg())
730 .addReg(Base
, RegState::Define
)
731 .addReg(Base
).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
733 MachineOperand
&MO
= MI
->getOperand(0);
736 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), Base
)
737 .addReg(MO
.getReg(), getKillRegState(MO
.isKill()))
738 .addReg(Base
).addReg(0).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
740 // t2STR_PRE, t2STR_POST
741 BuildMI(MBB
, MBBI
, dl
, TII
->get(NewOpc
), Base
)
742 .addReg(MO
.getReg(), getKillRegState(MO
.isKill()))
743 .addReg(Base
).addImm(Offset
).addImm(Pred
).addReg(PredReg
);
750 /// isMemoryOp - Returns true if instruction is a memory operations (that this
751 /// pass is capable of operating on).
752 static bool isMemoryOp(const MachineInstr
*MI
) {
753 // When no memory operands are present, conservatively assume unaligned,
754 // volatile, unfoldable.
755 if (!MI
->hasOneMemOperand())
758 const MachineMemOperand
*MMO
= *MI
->memoperands_begin();
760 // Don't touch volatile memory accesses - we may be changing their order.
761 if (MMO
->isVolatile())
764 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
766 if (MMO
->getAlignment() < 4)
769 // str <undef> could probably be eliminated entirely, but for now we just want
770 // to avoid making a mess of it.
771 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
772 if (MI
->getNumOperands() > 0 && MI
->getOperand(0).isReg() &&
773 MI
->getOperand(0).isUndef())
776 // Likewise don't mess with references to undefined addresses.
777 if (MI
->getNumOperands() > 1 && MI
->getOperand(1).isReg() &&
778 MI
->getOperand(1).isUndef())
781 int Opcode
= MI
->getOpcode();
786 return MI
->getOperand(1).isReg() && MI
->getOperand(2).getReg() == 0;
789 return MI
->getOperand(1).isReg();
792 return MI
->getOperand(1).isReg();
797 return MI
->getOperand(1).isReg();
802 /// AdvanceRS - Advance register scavenger to just before the earliest memory
803 /// op that is being merged.
804 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock
&MBB
, MemOpQueue
&MemOps
) {
805 MachineBasicBlock::iterator Loc
= MemOps
[0].MBBI
;
806 unsigned Position
= MemOps
[0].Position
;
807 for (unsigned i
= 1, e
= MemOps
.size(); i
!= e
; ++i
) {
808 if (MemOps
[i
].Position
< Position
) {
809 Position
= MemOps
[i
].Position
;
810 Loc
= MemOps
[i
].MBBI
;
814 if (Loc
!= MBB
.begin())
815 RS
->forward(prior(Loc
));
818 static int getMemoryOpOffset(const MachineInstr
*MI
) {
819 int Opcode
= MI
->getOpcode();
820 bool isAM2
= Opcode
== ARM::LDR
|| Opcode
== ARM::STR
;
821 bool isAM3
= Opcode
== ARM::LDRD
|| Opcode
== ARM::STRD
;
822 unsigned NumOperands
= MI
->getDesc().getNumOperands();
823 unsigned OffField
= MI
->getOperand(NumOperands
-3).getImm();
825 if (Opcode
== ARM::t2LDRi12
|| Opcode
== ARM::t2LDRi8
||
826 Opcode
== ARM::t2STRi12
|| Opcode
== ARM::t2STRi8
||
827 Opcode
== ARM::t2LDRDi8
|| Opcode
== ARM::t2STRDi8
)
831 ? ARM_AM::getAM2Offset(OffField
)
832 : (isAM3
? ARM_AM::getAM3Offset(OffField
)
833 : ARM_AM::getAM5Offset(OffField
) * 4);
835 if (ARM_AM::getAM2Op(OffField
) == ARM_AM::sub
)
838 if (ARM_AM::getAM3Op(OffField
) == ARM_AM::sub
)
841 if (ARM_AM::getAM5Op(OffField
) == ARM_AM::sub
)
847 static void InsertLDR_STR(MachineBasicBlock
&MBB
,
848 MachineBasicBlock::iterator
&MBBI
,
849 int OffImm
, bool isDef
,
850 DebugLoc dl
, unsigned NewOpc
,
851 unsigned Reg
, bool RegDeadKill
, bool RegUndef
,
852 unsigned BaseReg
, bool BaseKill
, bool BaseUndef
,
853 unsigned OffReg
, bool OffKill
, bool OffUndef
,
854 ARMCC::CondCodes Pred
, unsigned PredReg
,
855 const TargetInstrInfo
*TII
, bool isT2
) {
859 Offset
= ARM_AM::getAM2Opc(ARM_AM::sub
, -OffImm
, ARM_AM::no_shift
);
861 Offset
= ARM_AM::getAM2Opc(ARM_AM::add
, OffImm
, ARM_AM::no_shift
);
864 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(),
866 .addReg(Reg
, getDefRegState(true) | getDeadRegState(RegDeadKill
))
867 .addReg(BaseReg
, getKillRegState(BaseKill
)|getUndefRegState(BaseUndef
));
869 MIB
.addReg(OffReg
, getKillRegState(OffKill
)|getUndefRegState(OffUndef
));
870 MIB
.addImm(Offset
).addImm(Pred
).addReg(PredReg
);
872 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(),
874 .addReg(Reg
, getKillRegState(RegDeadKill
) | getUndefRegState(RegUndef
))
875 .addReg(BaseReg
, getKillRegState(BaseKill
)|getUndefRegState(BaseUndef
));
877 MIB
.addReg(OffReg
, getKillRegState(OffKill
)|getUndefRegState(OffUndef
));
878 MIB
.addImm(Offset
).addImm(Pred
).addReg(PredReg
);
882 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock
&MBB
,
883 MachineBasicBlock::iterator
&MBBI
) {
884 MachineInstr
*MI
= &*MBBI
;
885 unsigned Opcode
= MI
->getOpcode();
886 if (Opcode
== ARM::LDRD
|| Opcode
== ARM::STRD
||
887 Opcode
== ARM::t2LDRDi8
|| Opcode
== ARM::t2STRDi8
) {
888 unsigned EvenReg
= MI
->getOperand(0).getReg();
889 unsigned OddReg
= MI
->getOperand(1).getReg();
890 unsigned EvenRegNum
= TRI
->getDwarfRegNum(EvenReg
, false);
891 unsigned OddRegNum
= TRI
->getDwarfRegNum(OddReg
, false);
892 if ((EvenRegNum
& 1) == 0 && (EvenRegNum
+ 1) == OddRegNum
)
895 MachineBasicBlock::iterator NewBBI
= MBBI
;
896 bool isT2
= Opcode
== ARM::t2LDRDi8
|| Opcode
== ARM::t2STRDi8
;
897 bool isLd
= Opcode
== ARM::LDRD
|| Opcode
== ARM::t2LDRDi8
;
898 bool EvenDeadKill
= isLd
?
899 MI
->getOperand(0).isDead() : MI
->getOperand(0).isKill();
900 bool EvenUndef
= MI
->getOperand(0).isUndef();
901 bool OddDeadKill
= isLd
?
902 MI
->getOperand(1).isDead() : MI
->getOperand(1).isKill();
903 bool OddUndef
= MI
->getOperand(1).isUndef();
904 const MachineOperand
&BaseOp
= MI
->getOperand(2);
905 unsigned BaseReg
= BaseOp
.getReg();
906 bool BaseKill
= BaseOp
.isKill();
907 bool BaseUndef
= BaseOp
.isUndef();
908 unsigned OffReg
= isT2
? 0 : MI
->getOperand(3).getReg();
909 bool OffKill
= isT2
? false : MI
->getOperand(3).isKill();
910 bool OffUndef
= isT2
? false : MI
->getOperand(3).isUndef();
911 int OffImm
= getMemoryOpOffset(MI
);
912 unsigned PredReg
= 0;
913 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MI
, PredReg
);
915 if (OddRegNum
> EvenRegNum
&& OffReg
== 0 && OffImm
== 0) {
916 // Ascending register numbers and no offset. It's safe to change it to a
918 unsigned NewOpc
= (isLd
)
919 ? (isT2
? ARM::t2LDM
: ARM::LDM
)
920 : (isT2
? ARM::t2STM
: ARM::STM
);
922 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
->get(NewOpc
))
923 .addReg(BaseReg
, getKillRegState(BaseKill
))
924 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia
))
925 .addImm(Pred
).addReg(PredReg
)
926 .addReg(EvenReg
, getDefRegState(isLd
) | getDeadRegState(EvenDeadKill
))
927 .addReg(OddReg
, getDefRegState(isLd
) | getDeadRegState(OddDeadKill
));
930 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
->get(NewOpc
))
931 .addReg(BaseReg
, getKillRegState(BaseKill
))
932 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia
))
933 .addImm(Pred
).addReg(PredReg
)
935 getKillRegState(EvenDeadKill
) | getUndefRegState(EvenUndef
))
937 getKillRegState(OddDeadKill
) | getUndefRegState(OddUndef
));
940 NewBBI
= llvm::prior(MBBI
);
942 // Split into two instructions.
943 assert((!isT2
|| !OffReg
) &&
944 "Thumb2 ldrd / strd does not encode offset register!");
945 unsigned NewOpc
= (isLd
)
946 ? (isT2
? (OffImm
< 0 ? ARM::t2LDRi8
: ARM::t2LDRi12
) : ARM::LDR
)
947 : (isT2
? (OffImm
< 0 ? ARM::t2STRi8
: ARM::t2STRi12
) : ARM::STR
);
948 DebugLoc dl
= MBBI
->getDebugLoc();
949 // If this is a load and base register is killed, it may have been
950 // re-defed by the load, make sure the first load does not clobber it.
952 (BaseKill
|| OffKill
) &&
953 (TRI
->regsOverlap(EvenReg
, BaseReg
) ||
954 (OffReg
&& TRI
->regsOverlap(EvenReg
, OffReg
)))) {
955 assert(!TRI
->regsOverlap(OddReg
, BaseReg
) &&
956 (!OffReg
|| !TRI
->regsOverlap(OddReg
, OffReg
)));
957 InsertLDR_STR(MBB
, MBBI
, OffImm
+4, isLd
, dl
, NewOpc
,
958 OddReg
, OddDeadKill
, false,
959 BaseReg
, false, BaseUndef
, OffReg
, false, OffUndef
,
960 Pred
, PredReg
, TII
, isT2
);
961 NewBBI
= llvm::prior(MBBI
);
962 InsertLDR_STR(MBB
, MBBI
, OffImm
, isLd
, dl
, NewOpc
,
963 EvenReg
, EvenDeadKill
, false,
964 BaseReg
, BaseKill
, BaseUndef
, OffReg
, OffKill
, OffUndef
,
965 Pred
, PredReg
, TII
, isT2
);
967 if (OddReg
== EvenReg
&& EvenDeadKill
) {
968 // If the two source operands are the same, the kill marker is
969 // probably on the first one. e.g.
970 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
971 EvenDeadKill
= false;
974 InsertLDR_STR(MBB
, MBBI
, OffImm
, isLd
, dl
, NewOpc
,
975 EvenReg
, EvenDeadKill
, EvenUndef
,
976 BaseReg
, false, BaseUndef
, OffReg
, false, OffUndef
,
977 Pred
, PredReg
, TII
, isT2
);
978 NewBBI
= llvm::prior(MBBI
);
979 InsertLDR_STR(MBB
, MBBI
, OffImm
+4, isLd
, dl
, NewOpc
,
980 OddReg
, OddDeadKill
, OddUndef
,
981 BaseReg
, BaseKill
, BaseUndef
, OffReg
, OffKill
, OffUndef
,
982 Pred
, PredReg
, TII
, isT2
);
997 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
998 /// ops of the same base and incrementing offset into LDM / STM ops.
999 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock
&MBB
) {
1000 unsigned NumMerges
= 0;
1001 unsigned NumMemOps
= 0;
1003 unsigned CurrBase
= 0;
1005 unsigned CurrSize
= 0;
1006 ARMCC::CondCodes CurrPred
= ARMCC::AL
;
1007 unsigned CurrPredReg
= 0;
1008 unsigned Position
= 0;
1009 SmallVector
<MachineBasicBlock::iterator
,4> Merges
;
1011 RS
->enterBasicBlock(&MBB
);
1012 MachineBasicBlock::iterator MBBI
= MBB
.begin(), E
= MBB
.end();
1014 if (FixInvalidRegPairOp(MBB
, MBBI
))
1017 bool Advance
= false;
1018 bool TryMerge
= false;
1019 bool Clobber
= false;
1021 bool isMemOp
= isMemoryOp(MBBI
);
1023 int Opcode
= MBBI
->getOpcode();
1024 unsigned Size
= getLSMultipleTransferSize(MBBI
);
1025 const MachineOperand
&MO
= MBBI
->getOperand(0);
1026 unsigned Reg
= MO
.getReg();
1027 bool isKill
= MO
.isDef() ? false : MO
.isKill();
1028 unsigned Base
= MBBI
->getOperand(1).getReg();
1029 unsigned PredReg
= 0;
1030 ARMCC::CondCodes Pred
= llvm::getInstrPredicate(MBBI
, PredReg
);
1031 int Offset
= getMemoryOpOffset(MBBI
);
1034 // r5 := ldr [r5, #4]
1035 // r6 := ldr [r5, #8]
1037 // The second ldr has effectively broken the chain even though it
1038 // looks like the later ldr(s) use the same base register. Try to
1039 // merge the ldr's so far, including this one. But don't try to
1040 // combine the following ldr(s).
1041 Clobber
= (isi32Load(Opcode
) && Base
== MBBI
->getOperand(0).getReg());
1042 if (CurrBase
== 0 && !Clobber
) {
1043 // Start of a new chain.
1048 CurrPredReg
= PredReg
;
1049 MemOps
.push_back(MemOpQueueEntry(Offset
, Reg
, isKill
, Position
, MBBI
));
1058 if (CurrOpc
== Opcode
&& CurrBase
== Base
&& CurrPred
== Pred
) {
1059 // No need to match PredReg.
1060 // Continue adding to the queue.
1061 if (Offset
> MemOps
.back().Offset
) {
1062 MemOps
.push_back(MemOpQueueEntry(Offset
, Reg
, isKill
,
1067 for (MemOpQueueIter I
= MemOps
.begin(), E
= MemOps
.end();
1069 if (Offset
< I
->Offset
) {
1070 MemOps
.insert(I
, MemOpQueueEntry(Offset
, Reg
, isKill
,
1075 } else if (Offset
== I
->Offset
) {
1076 // Collision! This can't be merged!
1085 if (MBBI
->isDebugValue()) {
1088 // Reach the end of the block, try merging the memory instructions.
1090 } else if (Advance
) {
1094 // Reach the end of the block, try merging the memory instructions.
1100 if (NumMemOps
> 1) {
1101 // Try to find a free register to use as a new base in case it's needed.
1102 // First advance to the instruction just before the start of the chain.
1103 AdvanceRS(MBB
, MemOps
);
1104 // Find a scratch register.
1105 unsigned Scratch
= RS
->FindUnusedReg(ARM::GPRRegisterClass
);
1106 // Process the load / store instructions.
1107 RS
->forward(prior(MBBI
));
1111 MergeLDR_STR(MBB
, 0, CurrBase
, CurrOpc
, CurrSize
,
1112 CurrPred
, CurrPredReg
, Scratch
, MemOps
, Merges
);
1114 // Try folding preceeding/trailing base inc/dec into the generated
1116 for (unsigned i
= 0, e
= Merges
.size(); i
< e
; ++i
)
1117 if (MergeBaseUpdateLSMultiple(MBB
, Merges
[i
], Advance
, MBBI
))
1119 NumMerges
+= Merges
.size();
1121 // Try folding preceeding/trailing base inc/dec into those load/store
1122 // that were not merged to form LDM/STM ops.
1123 for (unsigned i
= 0; i
!= NumMemOps
; ++i
)
1124 if (!MemOps
[i
].Merged
)
1125 if (MergeBaseUpdateLoadStore(MBB
, MemOps
[i
].MBBI
, TII
,Advance
,MBBI
))
1128 // RS may be pointing to an instruction that's deleted.
1129 RS
->skipTo(prior(MBBI
));
1130 } else if (NumMemOps
== 1) {
1131 // Try folding preceeding/trailing base inc/dec into the single
1133 if (MergeBaseUpdateLoadStore(MBB
, MemOps
[0].MBBI
, TII
, Advance
, MBBI
)) {
1135 RS
->forward(prior(MBBI
));
1142 CurrPred
= ARMCC::AL
;
1149 // If iterator hasn't been advanced and this is not a memory op, skip it.
1150 // It can't start a new chain anyway.
1151 if (!Advance
&& !isMemOp
&& MBBI
!= E
) {
1157 return NumMerges
> 0;
1161 struct OffsetCompare
{
1162 bool operator()(const MachineInstr
*LHS
, const MachineInstr
*RHS
) const {
1163 int LOffset
= getMemoryOpOffset(LHS
);
1164 int ROffset
= getMemoryOpOffset(RHS
);
1165 assert(LHS
== RHS
|| LOffset
!= ROffset
);
1166 return LOffset
> ROffset
;
1171 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1172 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1173 /// directly restore the value of LR into pc.
1174 /// ldmfd sp!, {..., lr}
1177 /// ldmfd sp!, {..., lr}
1180 /// ldmfd sp!, {..., pc}
1181 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock
&MBB
) {
1182 if (MBB
.empty()) return false;
1184 MachineBasicBlock::iterator MBBI
= prior(MBB
.end());
1185 if (MBBI
!= MBB
.begin() &&
1186 (MBBI
->getOpcode() == ARM::BX_RET
||
1187 MBBI
->getOpcode() == ARM::tBX_RET
||
1188 MBBI
->getOpcode() == ARM::MOVPCLR
)) {
1189 MachineInstr
*PrevMI
= prior(MBBI
);
1190 if (PrevMI
->getOpcode() == ARM::LDM_UPD
||
1191 PrevMI
->getOpcode() == ARM::t2LDM_UPD
) {
1192 MachineOperand
&MO
= PrevMI
->getOperand(PrevMI
->getNumOperands()-1);
1193 if (MO
.getReg() != ARM::LR
)
1195 unsigned NewOpc
= isThumb2
? ARM::t2LDM_RET
: ARM::LDM_RET
;
1196 PrevMI
->setDesc(TII
->get(NewOpc
));
1205 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction
&Fn
) {
1206 const TargetMachine
&TM
= Fn
.getTarget();
1207 AFI
= Fn
.getInfo
<ARMFunctionInfo
>();
1208 TII
= TM
.getInstrInfo();
1209 TRI
= TM
.getRegisterInfo();
1210 RS
= new RegScavenger();
1211 isThumb2
= AFI
->isThumb2Function();
1213 bool Modified
= false;
1214 for (MachineFunction::iterator MFI
= Fn
.begin(), E
= Fn
.end(); MFI
!= E
;
1216 MachineBasicBlock
&MBB
= *MFI
;
1217 Modified
|= LoadStoreMultipleOpti(MBB
);
1218 Modified
|= MergeReturnIntoLDM(MBB
);
1226 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1227 /// load / stores from consecutive locations close to make it more
1228 /// likely they will be combined later.
1231 struct ARMPreAllocLoadStoreOpt
: public MachineFunctionPass
{
1233 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID
) {}
1235 const TargetData
*TD
;
1236 const TargetInstrInfo
*TII
;
1237 const TargetRegisterInfo
*TRI
;
1238 const ARMSubtarget
*STI
;
1239 MachineRegisterInfo
*MRI
;
1240 MachineFunction
*MF
;
1242 virtual bool runOnMachineFunction(MachineFunction
&Fn
);
1244 virtual const char *getPassName() const {
1245 return "ARM pre- register allocation load / store optimization pass";
1249 bool CanFormLdStDWord(MachineInstr
*Op0
, MachineInstr
*Op1
, DebugLoc
&dl
,
1250 unsigned &NewOpc
, unsigned &EvenReg
,
1251 unsigned &OddReg
, unsigned &BaseReg
,
1252 unsigned &OffReg
, int &Offset
,
1253 unsigned &PredReg
, ARMCC::CondCodes
&Pred
,
1255 bool RescheduleOps(MachineBasicBlock
*MBB
,
1256 SmallVector
<MachineInstr
*, 4> &Ops
,
1257 unsigned Base
, bool isLd
,
1258 DenseMap
<MachineInstr
*, unsigned> &MI2LocMap
);
1259 bool RescheduleLoadStoreInstrs(MachineBasicBlock
*MBB
);
1261 char ARMPreAllocLoadStoreOpt::ID
= 0;
1264 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction
&Fn
) {
1265 TD
= Fn
.getTarget().getTargetData();
1266 TII
= Fn
.getTarget().getInstrInfo();
1267 TRI
= Fn
.getTarget().getRegisterInfo();
1268 STI
= &Fn
.getTarget().getSubtarget
<ARMSubtarget
>();
1269 MRI
= &Fn
.getRegInfo();
1272 bool Modified
= false;
1273 for (MachineFunction::iterator MFI
= Fn
.begin(), E
= Fn
.end(); MFI
!= E
;
1275 Modified
|= RescheduleLoadStoreInstrs(MFI
);
1280 static bool IsSafeAndProfitableToMove(bool isLd
, unsigned Base
,
1281 MachineBasicBlock::iterator I
,
1282 MachineBasicBlock::iterator E
,
1283 SmallPtrSet
<MachineInstr
*, 4> &MemOps
,
1284 SmallSet
<unsigned, 4> &MemRegs
,
1285 const TargetRegisterInfo
*TRI
) {
1286 // Are there stores / loads / calls between them?
1287 // FIXME: This is overly conservative. We should make use of alias information
1289 SmallSet
<unsigned, 4> AddedRegPressure
;
1291 if (I
->isDebugValue() || MemOps
.count(&*I
))
1293 const TargetInstrDesc
&TID
= I
->getDesc();
1294 if (TID
.isCall() || TID
.isTerminator() || TID
.hasUnmodeledSideEffects())
1296 if (isLd
&& TID
.mayStore())
1301 // It's not safe to move the first 'str' down.
1304 // str r4, [r0, #+4]
1308 for (unsigned j
= 0, NumOps
= I
->getNumOperands(); j
!= NumOps
; ++j
) {
1309 MachineOperand
&MO
= I
->getOperand(j
);
1312 unsigned Reg
= MO
.getReg();
1313 if (MO
.isDef() && TRI
->regsOverlap(Reg
, Base
))
1315 if (Reg
!= Base
&& !MemRegs
.count(Reg
))
1316 AddedRegPressure
.insert(Reg
);
1320 // Estimate register pressure increase due to the transformation.
1321 if (MemRegs
.size() <= 4)
1322 // Ok if we are moving small number of instructions.
1324 return AddedRegPressure
.size() <= MemRegs
.size() * 2;
1328 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr
*Op0
, MachineInstr
*Op1
,
1330 unsigned &NewOpc
, unsigned &EvenReg
,
1331 unsigned &OddReg
, unsigned &BaseReg
,
1332 unsigned &OffReg
, int &Offset
,
1334 ARMCC::CondCodes
&Pred
,
1336 // Make sure we're allowed to generate LDRD/STRD.
1337 if (!STI
->hasV5TEOps())
1340 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1342 unsigned Opcode
= Op0
->getOpcode();
1343 if (Opcode
== ARM::LDR
)
1345 else if (Opcode
== ARM::STR
)
1347 else if (Opcode
== ARM::t2LDRi8
|| Opcode
== ARM::t2LDRi12
) {
1348 NewOpc
= ARM::t2LDRDi8
;
1351 } else if (Opcode
== ARM::t2STRi8
|| Opcode
== ARM::t2STRi12
) {
1352 NewOpc
= ARM::t2STRDi8
;
1358 // Make sure the offset registers match.
1360 (Op0
->getOperand(2).getReg() != Op1
->getOperand(2).getReg()))
1363 // Must sure the base address satisfies i64 ld / st alignment requirement.
1364 if (!Op0
->hasOneMemOperand() ||
1365 !(*Op0
->memoperands_begin())->getValue() ||
1366 (*Op0
->memoperands_begin())->isVolatile())
1369 unsigned Align
= (*Op0
->memoperands_begin())->getAlignment();
1370 const Function
*Func
= MF
->getFunction();
1371 unsigned ReqAlign
= STI
->hasV6Ops()
1372 ? TD
->getPrefTypeAlignment(Type::getInt64Ty(Func
->getContext()))
1373 : 8; // Pre-v6 need 8-byte align
1374 if (Align
< ReqAlign
)
1377 // Then make sure the immediate offset fits.
1378 int OffImm
= getMemoryOpOffset(Op0
);
1382 // Can't fall back to t2LDRi8 / t2STRi8.
1385 int Limit
= (1 << 8) * Scale
;
1386 if (OffImm
>= Limit
|| (OffImm
& (Scale
-1)))
1391 ARM_AM::AddrOpc AddSub
= ARM_AM::add
;
1393 AddSub
= ARM_AM::sub
;
1396 int Limit
= (1 << 8) * Scale
;
1397 if (OffImm
>= Limit
|| (OffImm
& (Scale
-1)))
1399 Offset
= ARM_AM::getAM3Opc(AddSub
, OffImm
);
1401 EvenReg
= Op0
->getOperand(0).getReg();
1402 OddReg
= Op1
->getOperand(0).getReg();
1403 if (EvenReg
== OddReg
)
1405 BaseReg
= Op0
->getOperand(1).getReg();
1407 OffReg
= Op0
->getOperand(2).getReg();
1408 Pred
= llvm::getInstrPredicate(Op0
, PredReg
);
1409 dl
= Op0
->getDebugLoc();
1413 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock
*MBB
,
1414 SmallVector
<MachineInstr
*, 4> &Ops
,
1415 unsigned Base
, bool isLd
,
1416 DenseMap
<MachineInstr
*, unsigned> &MI2LocMap
) {
1417 bool RetVal
= false;
1419 // Sort by offset (in reverse order).
1420 std::sort(Ops
.begin(), Ops
.end(), OffsetCompare());
1422 // The loads / stores of the same base are in order. Scan them from first to
1423 // last and check for the following:
1424 // 1. Any def of base.
1426 while (Ops
.size() > 1) {
1427 unsigned FirstLoc
= ~0U;
1428 unsigned LastLoc
= 0;
1429 MachineInstr
*FirstOp
= 0;
1430 MachineInstr
*LastOp
= 0;
1432 unsigned LastOpcode
= 0;
1433 unsigned LastBytes
= 0;
1434 unsigned NumMove
= 0;
1435 for (int i
= Ops
.size() - 1; i
>= 0; --i
) {
1436 MachineInstr
*Op
= Ops
[i
];
1437 unsigned Loc
= MI2LocMap
[Op
];
1438 if (Loc
<= FirstLoc
) {
1442 if (Loc
>= LastLoc
) {
1447 unsigned Opcode
= Op
->getOpcode();
1448 if (LastOpcode
&& Opcode
!= LastOpcode
)
1451 int Offset
= getMemoryOpOffset(Op
);
1452 unsigned Bytes
= getLSMultipleTransferSize(Op
);
1454 if (Bytes
!= LastBytes
|| Offset
!= (LastOffset
+ (int)Bytes
))
1457 LastOffset
= Offset
;
1459 LastOpcode
= Opcode
;
1460 if (++NumMove
== 8) // FIXME: Tune this limit.
1467 SmallPtrSet
<MachineInstr
*, 4> MemOps
;
1468 SmallSet
<unsigned, 4> MemRegs
;
1469 for (int i
= NumMove
-1; i
>= 0; --i
) {
1470 MemOps
.insert(Ops
[i
]);
1471 MemRegs
.insert(Ops
[i
]->getOperand(0).getReg());
1474 // Be conservative, if the instructions are too far apart, don't
1475 // move them. We want to limit the increase of register pressure.
1476 bool DoMove
= (LastLoc
- FirstLoc
) <= NumMove
*4; // FIXME: Tune this.
1478 DoMove
= IsSafeAndProfitableToMove(isLd
, Base
, FirstOp
, LastOp
,
1479 MemOps
, MemRegs
, TRI
);
1481 for (unsigned i
= 0; i
!= NumMove
; ++i
)
1484 // This is the new location for the loads / stores.
1485 MachineBasicBlock::iterator InsertPos
= isLd
? FirstOp
: LastOp
;
1486 while (InsertPos
!= MBB
->end()
1487 && (MemOps
.count(InsertPos
) || InsertPos
->isDebugValue()))
1490 // If we are moving a pair of loads / stores, see if it makes sense
1491 // to try to allocate a pair of registers that can form register pairs.
1492 MachineInstr
*Op0
= Ops
.back();
1493 MachineInstr
*Op1
= Ops
[Ops
.size()-2];
1494 unsigned EvenReg
= 0, OddReg
= 0;
1495 unsigned BaseReg
= 0, OffReg
= 0, PredReg
= 0;
1496 ARMCC::CondCodes Pred
= ARMCC::AL
;
1498 unsigned NewOpc
= 0;
1501 if (NumMove
== 2 && CanFormLdStDWord(Op0
, Op1
, dl
, NewOpc
,
1502 EvenReg
, OddReg
, BaseReg
, OffReg
,
1503 Offset
, PredReg
, Pred
, isT2
)) {
1507 // Form the pair instruction.
1509 MachineInstrBuilder MIB
= BuildMI(*MBB
, InsertPos
,
1510 dl
, TII
->get(NewOpc
))
1511 .addReg(EvenReg
, RegState::Define
)
1512 .addReg(OddReg
, RegState::Define
)
1516 MIB
.addImm(Offset
).addImm(Pred
).addReg(PredReg
);
1519 MachineInstrBuilder MIB
= BuildMI(*MBB
, InsertPos
,
1520 dl
, TII
->get(NewOpc
))
1526 MIB
.addImm(Offset
).addImm(Pred
).addReg(PredReg
);
1532 // Add register allocation hints to form register pairs.
1533 MRI
->setRegAllocationHint(EvenReg
, ARMRI::RegPairEven
, OddReg
);
1534 MRI
->setRegAllocationHint(OddReg
, ARMRI::RegPairOdd
, EvenReg
);
1536 for (unsigned i
= 0; i
!= NumMove
; ++i
) {
1537 MachineInstr
*Op
= Ops
.back();
1539 MBB
->splice(InsertPos
, MBB
, Op
);
1543 NumLdStMoved
+= NumMove
;
1553 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock
*MBB
) {
1554 bool RetVal
= false;
1556 DenseMap
<MachineInstr
*, unsigned> MI2LocMap
;
1557 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> > Base2LdsMap
;
1558 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> > Base2StsMap
;
1559 SmallVector
<unsigned, 4> LdBases
;
1560 SmallVector
<unsigned, 4> StBases
;
1563 MachineBasicBlock::iterator MBBI
= MBB
->begin();
1564 MachineBasicBlock::iterator E
= MBB
->end();
1566 for (; MBBI
!= E
; ++MBBI
) {
1567 MachineInstr
*MI
= MBBI
;
1568 const TargetInstrDesc
&TID
= MI
->getDesc();
1569 if (TID
.isCall() || TID
.isTerminator()) {
1570 // Stop at barriers.
1575 if (!MI
->isDebugValue())
1576 MI2LocMap
[MI
] = ++Loc
;
1578 if (!isMemoryOp(MI
))
1580 unsigned PredReg
= 0;
1581 if (llvm::getInstrPredicate(MI
, PredReg
) != ARMCC::AL
)
1584 int Opc
= MI
->getOpcode();
1585 bool isLd
= isi32Load(Opc
) || Opc
== ARM::VLDRS
|| Opc
== ARM::VLDRD
;
1586 unsigned Base
= MI
->getOperand(1).getReg();
1587 int Offset
= getMemoryOpOffset(MI
);
1589 bool StopHere
= false;
1591 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> >::iterator BI
=
1592 Base2LdsMap
.find(Base
);
1593 if (BI
!= Base2LdsMap
.end()) {
1594 for (unsigned i
= 0, e
= BI
->second
.size(); i
!= e
; ++i
) {
1595 if (Offset
== getMemoryOpOffset(BI
->second
[i
])) {
1601 BI
->second
.push_back(MI
);
1603 SmallVector
<MachineInstr
*, 4> MIs
;
1605 Base2LdsMap
[Base
] = MIs
;
1606 LdBases
.push_back(Base
);
1609 DenseMap
<unsigned, SmallVector
<MachineInstr
*, 4> >::iterator BI
=
1610 Base2StsMap
.find(Base
);
1611 if (BI
!= Base2StsMap
.end()) {
1612 for (unsigned i
= 0, e
= BI
->second
.size(); i
!= e
; ++i
) {
1613 if (Offset
== getMemoryOpOffset(BI
->second
[i
])) {
1619 BI
->second
.push_back(MI
);
1621 SmallVector
<MachineInstr
*, 4> MIs
;
1623 Base2StsMap
[Base
] = MIs
;
1624 StBases
.push_back(Base
);
1629 // Found a duplicate (a base+offset combination that's seen earlier).
1636 // Re-schedule loads.
1637 for (unsigned i
= 0, e
= LdBases
.size(); i
!= e
; ++i
) {
1638 unsigned Base
= LdBases
[i
];
1639 SmallVector
<MachineInstr
*, 4> &Lds
= Base2LdsMap
[Base
];
1641 RetVal
|= RescheduleOps(MBB
, Lds
, Base
, true, MI2LocMap
);
1644 // Re-schedule stores.
1645 for (unsigned i
= 0, e
= StBases
.size(); i
!= e
; ++i
) {
1646 unsigned Base
= StBases
[i
];
1647 SmallVector
<MachineInstr
*, 4> &Sts
= Base2StsMap
[Base
];
1649 RetVal
|= RescheduleOps(MBB
, Sts
, Base
, false, MI2LocMap
);
1653 Base2LdsMap
.clear();
1654 Base2StsMap
.clear();
1664 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1665 /// optimization pass.
1666 FunctionPass
*llvm::createARMLoadStoreOptimizationPass(bool PreAlloc
) {
1668 return new ARMPreAllocLoadStoreOpt();
1669 return new ARMLoadStoreOpt();