1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/TargetLowering.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetOptions.h"
36 
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "arm-isel"
40 
41 static cl::opt<bool>
42 DisableShifterOp("disable-shifter-op", cl::Hidden,
43   cl::desc("Disable isel of shifter-op"),
44   cl::init(false));
45 
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
49 ///
50 namespace {
51 
52 class ARMDAGToDAGISel : public SelectionDAGISel {
53   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
54   /// make the right decision when generating code for different targets.
55   const ARMSubtarget *Subtarget;
56 
57 public:
58   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
59       : SelectionDAGISel(tm, OptLevel) {}
60 
61   bool runOnMachineFunction(MachineFunction &MF) override {
62     // Reset the subtarget each time through.
63     Subtarget = &MF.getSubtarget<ARMSubtarget>();
64     SelectionDAGISel::runOnMachineFunction(MF);
65     return true;
66   }
67 
68   StringRef getPassName() const override { return "ARM Instruction Selection"; }
69 
70   void PreprocessISelDAG() override;
71 
72   /// getI32Imm - Return a target constant of type i32 with the specified
73   /// value.
74   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
75     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
76   }
77 
78   void Select(SDNode *N) override;
79 
80   bool hasNoVMLxHazardUse(SDNode *N) const;
81   bool isShifterOpProfitable(const SDValue &Shift,
82                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
83   bool SelectRegShifterOperand(SDValue N, SDValue &A,
84                                SDValue &B, SDValue &C,
85                                bool CheckProfitability = true);
86   bool SelectImmShifterOperand(SDValue N, SDValue &A,
87                                SDValue &B, bool CheckProfitability = true);
88   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
89                                     SDValue &B, SDValue &C) {
90     // Don't apply the profitability check
91     return SelectRegShifterOperand(N, A, B, C, false);
92   }
93   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
94                                     SDValue &B) {
95     // Don't apply the profitability check
96     return SelectImmShifterOperand(N, A, B, false);
97   }
98 
99   bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
100 
101   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
102   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
103 
104   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
105     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
106     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
107     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
108     return true;
109   }
110 
111   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
112                              SDValue &Offset, SDValue &Opc);
113   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
114                              SDValue &Offset, SDValue &Opc);
115   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
116                              SDValue &Offset, SDValue &Opc);
117   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
118   bool SelectAddrMode3(SDValue N, SDValue &Base,
119                        SDValue &Offset, SDValue &Opc);
120   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
121                              SDValue &Offset, SDValue &Opc);
122   bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
123   bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
124   bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
125   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
126   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
127 
128   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
129 
130   // Thumb Addressing Modes:
131   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
132   bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
133   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
134                                 SDValue &OffImm);
135   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
136                                  SDValue &OffImm);
137   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
138                                  SDValue &OffImm);
139   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
140                                  SDValue &OffImm);
141   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
142 
143   // Thumb 2 Addressing Modes:
144   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
145   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
146                             SDValue &OffImm);
147   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
148                                  SDValue &OffImm);
149   template <unsigned Shift>
150   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
151   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
152                                   unsigned Shift);
153   template <unsigned Shift>
154   bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
155   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
156                              SDValue &OffReg, SDValue &ShImm);
157   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
158 
159   inline bool is_so_imm(unsigned Imm) const {
160     return ARM_AM::getSOImmVal(Imm) != -1;
161   }
162 
163   inline bool is_so_imm_not(unsigned Imm) const {
164     return ARM_AM::getSOImmVal(~Imm) != -1;
165   }
166 
167   inline bool is_t2_so_imm(unsigned Imm) const {
168     return ARM_AM::getT2SOImmVal(Imm) != -1;
169   }
170 
171   inline bool is_t2_so_imm_not(unsigned Imm) const {
172     return ARM_AM::getT2SOImmVal(~Imm) != -1;
173   }
174 
175   // Include the pieces autogenerated from the target description.
176 #include "ARMGenDAGISel.inc"
177 
178 private:
179   void transferMemOperands(SDNode *Src, SDNode *Dst);
180 
181   /// Indexed (pre/post inc/dec) load matching code for ARM.
182   bool tryARMIndexedLoad(SDNode *N);
183   bool tryT1IndexedLoad(SDNode *N);
184   bool tryT2IndexedLoad(SDNode *N);
185   bool tryMVEIndexedLoad(SDNode *N);
186 
187   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
188   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
189   /// loads of D registers and even subregs and odd subregs of Q registers.
190   /// For NumVecs <= 2, QOpcodes1 is not used.
191   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
192                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
193                  const uint16_t *QOpcodes1);
194 
195   /// SelectVST - Select NEON store intrinsics.  NumVecs should
196   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
197   /// stores of D registers and even subregs and odd subregs of Q registers.
198   /// For NumVecs <= 2, QOpcodes1 is not used.
199   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
200                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
201                  const uint16_t *QOpcodes1);
202 
203   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
204   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
205   /// load/store of D registers and Q registers.
206   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
207                        unsigned NumVecs, const uint16_t *DOpcodes,
208                        const uint16_t *QOpcodes);
209 
210   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
211   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
212   /// for loading D registers.
213   void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
214                     unsigned NumVecs, const uint16_t *DOpcodes,
215                     const uint16_t *QOpcodes0 = nullptr,
216                     const uint16_t *QOpcodes1 = nullptr);
217 
218   /// Try to select SBFX/UBFX instructions for ARM.
219   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
220 
221   // Select special operations if node forms integer ABS pattern
222   bool tryABSOp(SDNode *N);
223 
224   bool tryReadRegister(SDNode *N);
225   bool tryWriteRegister(SDNode *N);
226 
227   bool tryInlineAsm(SDNode *N);
228 
229   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
230 
231   void SelectCMP_SWAP(SDNode *N);
232 
233   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
234   /// inline asm expressions.
235   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
236                                     std::vector<SDValue> &OutOps) override;
237 
238   // Form pairs of consecutive R, S, D, or Q registers.
239   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
240   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
241   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
242   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
243 
244   // Form sequences of 4 consecutive S, D, or Q registers.
245   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
246   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
247   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
248 
249   // Get the alignment operand for a NEON VLD or VST instruction.
250   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
251                         bool is64BitVector);
252 
253   /// Returns the number of instructions required to materialize the given
254   /// constant in a register, or 3 if a literal pool load is needed.
255   unsigned ConstantMaterializationCost(unsigned Val) const;
256 
257   /// Checks if N is a multiplication by a constant where we can extract out a
258   /// power of two from the constant so that it can be used in a shift, but only
259   /// if it simplifies the materialization of the constant. Returns true if it
260   /// is, and assigns to PowerOfTwo the power of two that should be extracted
261   /// out and to NewMulConst the new constant to be multiplied by.
262   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
263                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
264 
265   /// Replace N with M in CurDAG, in a way that also ensures that M gets
266   /// selected when N would have been selected.
267   void replaceDAGValue(const SDValue &N, SDValue M);
268 };
269 }
270 
271 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
272 /// operand. If so Imm will receive the 32-bit value.
273 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
274   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
275     Imm = cast<ConstantSDNode>(N)->getZExtValue();
276     return true;
277   }
278   return false;
279 }
280 
281 // isInt32Immediate - This method tests to see if a constant operand.
282 // If so Imm will receive the 32 bit value.
283 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
284   return isInt32Immediate(N.getNode(), Imm);
285 }
286 
287 // isOpcWithIntImmediate - This method tests to see if the node is a specific
288 // opcode and that it has a immediate integer right operand.
289 // If so Imm will receive the 32 bit value.
290 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
291   return N->getOpcode() == Opc &&
292          isInt32Immediate(N->getOperand(1).getNode(), Imm);
293 }
294 
295 /// Check whether a particular node is a constant value representable as
296 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
297 ///
298 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
299 static bool isScaledConstantInRange(SDValue Node, int Scale,
300                                     int RangeMin, int RangeMax,
301                                     int &ScaledConstant) {
302   assert(Scale > 0 && "Invalid scale!");
303 
304   // Check that this is a constant.
305   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
306   if (!C)
307     return false;
308 
309   ScaledConstant = (int) C->getZExtValue();
310   if ((ScaledConstant % Scale) != 0)
311     return false;
312 
313   ScaledConstant /= Scale;
314   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
315 }
316 
317 void ARMDAGToDAGISel::PreprocessISelDAG() {
318   if (!Subtarget->hasV6T2Ops())
319     return;
320 
321   bool isThumb2 = Subtarget->isThumb();
322   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
323        E = CurDAG->allnodes_end(); I != E; ) {
324     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
325 
326     if (N->getOpcode() != ISD::ADD)
327       continue;
328 
329     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
330     // leading zeros, followed by consecutive set bits, followed by 1 or 2
331     // trailing zeros, e.g. 1020.
332     // Transform the expression to
333     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
334     // of trailing zeros of c2. The left shift would be folded as an shifter
335     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
336     // node (UBFX).
337 
338     SDValue N0 = N->getOperand(0);
339     SDValue N1 = N->getOperand(1);
340     unsigned And_imm = 0;
341     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
342       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
343         std::swap(N0, N1);
344     }
345     if (!And_imm)
346       continue;
347 
348     // Check if the AND mask is an immediate of the form: 000.....1111111100
349     unsigned TZ = countTrailingZeros(And_imm);
350     if (TZ != 1 && TZ != 2)
351       // Be conservative here. Shifter operands aren't always free. e.g. On
352       // Swift, left shifter operand of 1 / 2 for free but others are not.
353       // e.g.
354       //  ubfx   r3, r1, #16, #8
355       //  ldr.w  r3, [r0, r3, lsl #2]
356       // vs.
357       //  mov.w  r9, #1020
358       //  and.w  r2, r9, r1, lsr #14
359       //  ldr    r2, [r0, r2]
360       continue;
361     And_imm >>= TZ;
362     if (And_imm & (And_imm + 1))
363       continue;
364 
365     // Look for (and (srl X, c1), c2).
366     SDValue Srl = N1.getOperand(0);
367     unsigned Srl_imm = 0;
368     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
369         (Srl_imm <= 2))
370       continue;
371 
372     // Make sure first operand is not a shifter operand which would prevent
373     // folding of the left shift.
374     SDValue CPTmp0;
375     SDValue CPTmp1;
376     SDValue CPTmp2;
377     if (isThumb2) {
378       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
379         continue;
380     } else {
381       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
382           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
383         continue;
384     }
385 
386     // Now make the transformation.
387     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
388                           Srl.getOperand(0),
389                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
390                                               MVT::i32));
391     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
392                          Srl,
393                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
394     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
395                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
396     CurDAG->UpdateNodeOperands(N, N0, N1);
397   }
398 }
399 
400 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
401 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
402 /// least on current ARM implementations) which should be avoidded.
403 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
404   if (OptLevel == CodeGenOpt::None)
405     return true;
406 
407   if (!Subtarget->hasVMLxHazards())
408     return true;
409 
410   if (!N->hasOneUse())
411     return false;
412 
413   SDNode *Use = *N->use_begin();
414   if (Use->getOpcode() == ISD::CopyToReg)
415     return true;
416   if (Use->isMachineOpcode()) {
417     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
418         CurDAG->getSubtarget().getInstrInfo());
419 
420     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
421     if (MCID.mayStore())
422       return true;
423     unsigned Opcode = MCID.getOpcode();
424     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
425       return true;
426     // vmlx feeding into another vmlx. We actually want to unfold
427     // the use later in the MLxExpansion pass. e.g.
428     // vmla
429     // vmla (stall 8 cycles)
430     //
431     // vmul (5 cycles)
432     // vadd (5 cycles)
433     // vmla
434     // This adds up to about 18 - 19 cycles.
435     //
436     // vmla
437     // vmul (stall 4 cycles)
438     // vadd adds up to about 14 cycles.
439     return TII->isFpMLxInstruction(Opcode);
440   }
441 
442   return false;
443 }
444 
445 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
446                                             ARM_AM::ShiftOpc ShOpcVal,
447                                             unsigned ShAmt) {
448   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
449     return true;
450   if (Shift.hasOneUse())
451     return true;
452   // R << 2 is free.
453   return ShOpcVal == ARM_AM::lsl &&
454          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
455 }
456 
457 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
458   if (Subtarget->isThumb()) {
459     if (Val <= 255) return 1;                               // MOV
460     if (Subtarget->hasV6T2Ops() &&
461         (Val <= 0xffff ||                                   // MOV
462          ARM_AM::getT2SOImmVal(Val) != -1 ||                // MOVW
463          ARM_AM::getT2SOImmVal(~Val) != -1))                // MVN
464       return 1;
465     if (Val <= 510) return 2;                               // MOV + ADDi8
466     if (~Val <= 255) return 2;                              // MOV + MVN
467     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
468   } else {
469     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
470     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
471     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
472     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
473   }
474   if (Subtarget->useMovt()) return 2; // MOVW + MOVT
475   return 3; // Literal pool load
476 }
477 
478 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
479                                              unsigned MaxShift,
480                                              unsigned &PowerOfTwo,
481                                              SDValue &NewMulConst) const {
482   assert(N.getOpcode() == ISD::MUL);
483   assert(MaxShift > 0);
484 
485   // If the multiply is used in more than one place then changing the constant
486   // will make other uses incorrect, so don't.
487   if (!N.hasOneUse()) return false;
488   // Check if the multiply is by a constant
489   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
490   if (!MulConst) return false;
491   // If the constant is used in more than one place then modifying it will mean
492   // we need to materialize two constants instead of one, which is a bad idea.
493   if (!MulConst->hasOneUse()) return false;
494   unsigned MulConstVal = MulConst->getZExtValue();
495   if (MulConstVal == 0) return false;
496 
497   // Find the largest power of 2 that MulConstVal is a multiple of
498   PowerOfTwo = MaxShift;
499   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
500     --PowerOfTwo;
501     if (PowerOfTwo == 0) return false;
502   }
503 
504   // Only optimise if the new cost is better
505   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
506   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
507   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
508   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
509   return NewCost < OldCost;
510 }
511 
512 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
513   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
514   ReplaceUses(N, M);
515 }
516 
517 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
518                                               SDValue &BaseReg,
519                                               SDValue &Opc,
520                                               bool CheckProfitability) {
521   if (DisableShifterOp)
522     return false;
523 
524   // If N is a multiply-by-constant and it's profitable to extract a shift and
525   // use it in a shifted operand do so.
526   if (N.getOpcode() == ISD::MUL) {
527     unsigned PowerOfTwo = 0;
528     SDValue NewMulConst;
529     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
530       HandleSDNode Handle(N);
531       SDLoc Loc(N);
532       replaceDAGValue(N.getOperand(1), NewMulConst);
533       BaseReg = Handle.getValue();
534       Opc = CurDAG->getTargetConstant(
535           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
536       return true;
537     }
538   }
539 
540   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
541 
542   // Don't match base register only case. That is matched to a separate
543   // lower complexity pattern with explicit register operand.
544   if (ShOpcVal == ARM_AM::no_shift) return false;
545 
546   BaseReg = N.getOperand(0);
547   unsigned ShImmVal = 0;
548   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
549   if (!RHS) return false;
550   ShImmVal = RHS->getZExtValue() & 31;
551   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
552                                   SDLoc(N), MVT::i32);
553   return true;
554 }
555 
556 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
557                                               SDValue &BaseReg,
558                                               SDValue &ShReg,
559                                               SDValue &Opc,
560                                               bool CheckProfitability) {
561   if (DisableShifterOp)
562     return false;
563 
564   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
565 
566   // Don't match base register only case. That is matched to a separate
567   // lower complexity pattern with explicit register operand.
568   if (ShOpcVal == ARM_AM::no_shift) return false;
569 
570   BaseReg = N.getOperand(0);
571   unsigned ShImmVal = 0;
572   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
573   if (RHS) return false;
574 
575   ShReg = N.getOperand(1);
576   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
577     return false;
578   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
579                                   SDLoc(N), MVT::i32);
580   return true;
581 }
582 
583 // Determine whether an ISD::OR's operands are suitable to turn the operation
584 // into an addition, which often has more compact encodings.
585 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
586   assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
587   Out = N;
588   return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
589 }
590 
591 
592 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
593                                           SDValue &Base,
594                                           SDValue &OffImm) {
595   // Match simple R + imm12 operands.
596 
597   // Base only.
598   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
599       !CurDAG->isBaseWithConstantOffset(N)) {
600     if (N.getOpcode() == ISD::FrameIndex) {
601       // Match frame index.
602       int FI = cast<FrameIndexSDNode>(N)->getIndex();
603       Base = CurDAG->getTargetFrameIndex(
604           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
605       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
606       return true;
607     }
608 
609     if (N.getOpcode() == ARMISD::Wrapper &&
610         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
611         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
612         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
613       Base = N.getOperand(0);
614     } else
615       Base = N;
616     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
617     return true;
618   }
619 
620   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
621     int RHSC = (int)RHS->getSExtValue();
622     if (N.getOpcode() == ISD::SUB)
623       RHSC = -RHSC;
624 
625     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
626       Base   = N.getOperand(0);
627       if (Base.getOpcode() == ISD::FrameIndex) {
628         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
629         Base = CurDAG->getTargetFrameIndex(
630             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
631       }
632       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
633       return true;
634     }
635   }
636 
637   // Base only.
638   Base = N;
639   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
640   return true;
641 }
642 
643 
644 
645 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
646                                       SDValue &Opc) {
647   if (N.getOpcode() == ISD::MUL &&
648       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
649     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
650       // X * [3,5,9] -> X + X * [2,4,8] etc.
651       int RHSC = (int)RHS->getZExtValue();
652       if (RHSC & 1) {
653         RHSC = RHSC & ~1;
654         ARM_AM::AddrOpc AddSub = ARM_AM::add;
655         if (RHSC < 0) {
656           AddSub = ARM_AM::sub;
657           RHSC = - RHSC;
658         }
659         if (isPowerOf2_32(RHSC)) {
660           unsigned ShAmt = Log2_32(RHSC);
661           Base = Offset = N.getOperand(0);
662           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
663                                                             ARM_AM::lsl),
664                                           SDLoc(N), MVT::i32);
665           return true;
666         }
667       }
668     }
669   }
670 
671   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
672       // ISD::OR that is equivalent to an ISD::ADD.
673       !CurDAG->isBaseWithConstantOffset(N))
674     return false;
675 
676   // Leave simple R +/- imm12 operands for LDRi12
677   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
678     int RHSC;
679     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
680                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
681       return false;
682   }
683 
684   // Otherwise this is R +/- [possibly shifted] R.
685   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
686   ARM_AM::ShiftOpc ShOpcVal =
687     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
688   unsigned ShAmt = 0;
689 
690   Base   = N.getOperand(0);
691   Offset = N.getOperand(1);
692 
693   if (ShOpcVal != ARM_AM::no_shift) {
694     // Check to see if the RHS of the shift is a constant, if not, we can't fold
695     // it.
696     if (ConstantSDNode *Sh =
697            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
698       ShAmt = Sh->getZExtValue();
699       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
700         Offset = N.getOperand(1).getOperand(0);
701       else {
702         ShAmt = 0;
703         ShOpcVal = ARM_AM::no_shift;
704       }
705     } else {
706       ShOpcVal = ARM_AM::no_shift;
707     }
708   }
709 
710   // Try matching (R shl C) + (R).
711   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
712       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
713         N.getOperand(0).hasOneUse())) {
714     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
715     if (ShOpcVal != ARM_AM::no_shift) {
716       // Check to see if the RHS of the shift is a constant, if not, we can't
717       // fold it.
718       if (ConstantSDNode *Sh =
719           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
720         ShAmt = Sh->getZExtValue();
721         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
722           Offset = N.getOperand(0).getOperand(0);
723           Base = N.getOperand(1);
724         } else {
725           ShAmt = 0;
726           ShOpcVal = ARM_AM::no_shift;
727         }
728       } else {
729         ShOpcVal = ARM_AM::no_shift;
730       }
731     }
732   }
733 
734   // If Offset is a multiply-by-constant and it's profitable to extract a shift
735   // and use it in a shifted operand do so.
736   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
737     unsigned PowerOfTwo = 0;
738     SDValue NewMulConst;
739     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
740       HandleSDNode Handle(Offset);
741       replaceDAGValue(Offset.getOperand(1), NewMulConst);
742       Offset = Handle.getValue();
743       ShAmt = PowerOfTwo;
744       ShOpcVal = ARM_AM::lsl;
745     }
746   }
747 
748   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
749                                   SDLoc(N), MVT::i32);
750   return true;
751 }
752 
753 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
754                                             SDValue &Offset, SDValue &Opc) {
755   unsigned Opcode = Op->getOpcode();
756   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
757     ? cast<LoadSDNode>(Op)->getAddressingMode()
758     : cast<StoreSDNode>(Op)->getAddressingMode();
759   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
760     ? ARM_AM::add : ARM_AM::sub;
761   int Val;
762   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
763     return false;
764 
765   Offset = N;
766   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
767   unsigned ShAmt = 0;
768   if (ShOpcVal != ARM_AM::no_shift) {
769     // Check to see if the RHS of the shift is a constant, if not, we can't fold
770     // it.
771     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
772       ShAmt = Sh->getZExtValue();
773       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
774         Offset = N.getOperand(0);
775       else {
776         ShAmt = 0;
777         ShOpcVal = ARM_AM::no_shift;
778       }
779     } else {
780       ShOpcVal = ARM_AM::no_shift;
781     }
782   }
783 
784   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
785                                   SDLoc(N), MVT::i32);
786   return true;
787 }
788 
789 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
790                                             SDValue &Offset, SDValue &Opc) {
791   unsigned Opcode = Op->getOpcode();
792   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
793     ? cast<LoadSDNode>(Op)->getAddressingMode()
794     : cast<StoreSDNode>(Op)->getAddressingMode();
795   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
796     ? ARM_AM::add : ARM_AM::sub;
797   int Val;
798   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
799     if (AddSub == ARM_AM::sub) Val *= -1;
800     Offset = CurDAG->getRegister(0, MVT::i32);
801     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
802     return true;
803   }
804 
805   return false;
806 }
807 
808 
809 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
810                                             SDValue &Offset, SDValue &Opc) {
811   unsigned Opcode = Op->getOpcode();
812   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
813     ? cast<LoadSDNode>(Op)->getAddressingMode()
814     : cast<StoreSDNode>(Op)->getAddressingMode();
815   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
816     ? ARM_AM::add : ARM_AM::sub;
817   int Val;
818   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
819     Offset = CurDAG->getRegister(0, MVT::i32);
820     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
821                                                       ARM_AM::no_shift),
822                                     SDLoc(Op), MVT::i32);
823     return true;
824   }
825 
826   return false;
827 }
828 
829 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
830   Base = N;
831   return true;
832 }
833 
834 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
835                                       SDValue &Base, SDValue &Offset,
836                                       SDValue &Opc) {
837   if (N.getOpcode() == ISD::SUB) {
838     // X - C  is canonicalize to X + -C, no need to handle it here.
839     Base = N.getOperand(0);
840     Offset = N.getOperand(1);
841     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
842                                     MVT::i32);
843     return true;
844   }
845 
846   if (!CurDAG->isBaseWithConstantOffset(N)) {
847     Base = N;
848     if (N.getOpcode() == ISD::FrameIndex) {
849       int FI = cast<FrameIndexSDNode>(N)->getIndex();
850       Base = CurDAG->getTargetFrameIndex(
851           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
852     }
853     Offset = CurDAG->getRegister(0, MVT::i32);
854     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
855                                     MVT::i32);
856     return true;
857   }
858 
859   // If the RHS is +/- imm8, fold into addr mode.
860   int RHSC;
861   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
862                               -256 + 1, 256, RHSC)) { // 8 bits.
863     Base = N.getOperand(0);
864     if (Base.getOpcode() == ISD::FrameIndex) {
865       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
866       Base = CurDAG->getTargetFrameIndex(
867           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
868     }
869     Offset = CurDAG->getRegister(0, MVT::i32);
870 
871     ARM_AM::AddrOpc AddSub = ARM_AM::add;
872     if (RHSC < 0) {
873       AddSub = ARM_AM::sub;
874       RHSC = -RHSC;
875     }
876     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
877                                     MVT::i32);
878     return true;
879   }
880 
881   Base = N.getOperand(0);
882   Offset = N.getOperand(1);
883   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
884                                   MVT::i32);
885   return true;
886 }
887 
888 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
889                                             SDValue &Offset, SDValue &Opc) {
890   unsigned Opcode = Op->getOpcode();
891   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
892     ? cast<LoadSDNode>(Op)->getAddressingMode()
893     : cast<StoreSDNode>(Op)->getAddressingMode();
894   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
895     ? ARM_AM::add : ARM_AM::sub;
896   int Val;
897   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
898     Offset = CurDAG->getRegister(0, MVT::i32);
899     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
900                                     MVT::i32);
901     return true;
902   }
903 
904   Offset = N;
905   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
906                                   MVT::i32);
907   return true;
908 }
909 
910 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
911                                         bool FP16) {
912   if (!CurDAG->isBaseWithConstantOffset(N)) {
913     Base = N;
914     if (N.getOpcode() == ISD::FrameIndex) {
915       int FI = cast<FrameIndexSDNode>(N)->getIndex();
916       Base = CurDAG->getTargetFrameIndex(
917           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
918     } else if (N.getOpcode() == ARMISD::Wrapper &&
919                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
920                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
921                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
922       Base = N.getOperand(0);
923     }
924     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
925                                        SDLoc(N), MVT::i32);
926     return true;
927   }
928 
929   // If the RHS is +/- imm8, fold into addr mode.
930   int RHSC;
931   const int Scale = FP16 ? 2 : 4;
932 
933   if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
934     Base = N.getOperand(0);
935     if (Base.getOpcode() == ISD::FrameIndex) {
936       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
937       Base = CurDAG->getTargetFrameIndex(
938           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
939     }
940 
941     ARM_AM::AddrOpc AddSub = ARM_AM::add;
942     if (RHSC < 0) {
943       AddSub = ARM_AM::sub;
944       RHSC = -RHSC;
945     }
946 
947     if (FP16)
948       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
949                                          SDLoc(N), MVT::i32);
950     else
951       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
952                                          SDLoc(N), MVT::i32);
953 
954     return true;
955   }
956 
957   Base = N;
958 
959   if (FP16)
960     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
961                                        SDLoc(N), MVT::i32);
962   else
963     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
964                                        SDLoc(N), MVT::i32);
965 
966   return true;
967 }
968 
969 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
970                                       SDValue &Base, SDValue &Offset) {
971   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
972 }
973 
974 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
975                                           SDValue &Base, SDValue &Offset) {
976   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
977 }
978 
979 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
980                                       SDValue &Align) {
981   Addr = N;
982 
983   unsigned Alignment = 0;
984 
985   MemSDNode *MemN = cast<MemSDNode>(Parent);
986 
987   if (isa<LSBaseSDNode>(MemN) ||
988       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
989         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
990        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
991     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
992     // The maximum alignment is equal to the memory size being referenced.
993     unsigned MMOAlign = MemN->getAlignment();
994     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
995     if (MMOAlign >= MemSize && MemSize > 1)
996       Alignment = MemSize;
997   } else {
998     // All other uses of addrmode6 are for intrinsics.  For now just record
999     // the raw alignment value; it will be refined later based on the legal
1000     // alignment operands for the intrinsic.
1001     Alignment = MemN->getAlignment();
1002   }
1003 
1004   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1005   return true;
1006 }
1007 
1008 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1009                                             SDValue &Offset) {
1010   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1011   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1012   if (AM != ISD::POST_INC)
1013     return false;
1014   Offset = N;
1015   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1016     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1017       Offset = CurDAG->getRegister(0, MVT::i32);
1018   }
1019   return true;
1020 }
1021 
1022 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1023                                        SDValue &Offset, SDValue &Label) {
1024   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1025     Offset = N.getOperand(0);
1026     SDValue N1 = N.getOperand(1);
1027     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1028                                       SDLoc(N), MVT::i32);
1029     return true;
1030   }
1031 
1032   return false;
1033 }
1034 
1035 
1036 //===----------------------------------------------------------------------===//
1037 //                         Thumb Addressing Modes
1038 //===----------------------------------------------------------------------===//
1039 
1040 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1041   // Negative numbers are difficult to materialise in thumb1. If we are
1042   // selecting the add of a negative, instead try to select ri with a zero
1043   // offset, so create the add node directly which will become a sub.
1044   if (N.getOpcode() != ISD::ADD)
1045     return false;
1046 
1047   // Look for an imm which is not legal for ld/st, but is legal for sub.
1048   if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1049     return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1050 
1051   return false;
1052 }
1053 
1054 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1055                                                 SDValue &Offset) {
1056   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1057     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1058     if (!NC || !NC->isNullValue())
1059       return false;
1060 
1061     Base = Offset = N;
1062     return true;
1063   }
1064 
1065   Base = N.getOperand(0);
1066   Offset = N.getOperand(1);
1067   return true;
1068 }
1069 
1070 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1071                                             SDValue &Offset) {
1072   if (shouldUseZeroOffsetLdSt(N))
1073     return false; // Select ri instead
1074   return SelectThumbAddrModeRRSext(N, Base, Offset);
1075 }
1076 
1077 bool
1078 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1079                                           SDValue &Base, SDValue &OffImm) {
1080   if (shouldUseZeroOffsetLdSt(N)) {
1081     Base = N;
1082     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1083     return true;
1084   }
1085 
1086   if (!CurDAG->isBaseWithConstantOffset(N)) {
1087     if (N.getOpcode() == ISD::ADD) {
1088       return false; // We want to select register offset instead
1089     } else if (N.getOpcode() == ARMISD::Wrapper &&
1090         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1091         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1092         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1093         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1094       Base = N.getOperand(0);
1095     } else {
1096       Base = N;
1097     }
1098 
1099     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1100     return true;
1101   }
1102 
1103   // If the RHS is + imm5 * scale, fold into addr mode.
1104   int RHSC;
1105   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1106     Base = N.getOperand(0);
1107     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1108     return true;
1109   }
1110 
1111   // Offset is too large, so use register offset instead.
1112   return false;
1113 }
1114 
1115 bool
1116 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1117                                            SDValue &OffImm) {
1118   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1119 }
1120 
1121 bool
1122 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1123                                            SDValue &OffImm) {
1124   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1125 }
1126 
1127 bool
1128 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1129                                            SDValue &OffImm) {
1130   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1131 }
1132 
1133 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1134                                             SDValue &Base, SDValue &OffImm) {
1135   if (N.getOpcode() == ISD::FrameIndex) {
1136     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1137     // Only multiples of 4 are allowed for the offset, so the frame object
1138     // alignment must be at least 4.
1139     MachineFrameInfo &MFI = MF->getFrameInfo();
1140     if (MFI.getObjectAlignment(FI) < 4)
1141       MFI.setObjectAlignment(FI, 4);
1142     Base = CurDAG->getTargetFrameIndex(
1143         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1144     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1145     return true;
1146   }
1147 
1148   if (!CurDAG->isBaseWithConstantOffset(N))
1149     return false;
1150 
1151   if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1152     // If the RHS is + imm8 * scale, fold into addr mode.
1153     int RHSC;
1154     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1155       Base = N.getOperand(0);
1156       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1157       // Make sure the offset is inside the object, or we might fail to
1158       // allocate an emergency spill slot. (An out-of-range access is UB, but
1159       // it could show up anyway.)
1160       MachineFrameInfo &MFI = MF->getFrameInfo();
1161       if (RHSC * 4 < MFI.getObjectSize(FI)) {
1162         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1163         // indexed by the LHS must be 4-byte aligned.
1164         if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
1165           MFI.setObjectAlignment(FI, 4);
1166         if (MFI.getObjectAlignment(FI) >= 4) {
1167           Base = CurDAG->getTargetFrameIndex(
1168               FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1169           OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1170           return true;
1171         }
1172       }
1173     }
1174   }
1175 
1176   return false;
1177 }
1178 
1179 
1180 //===----------------------------------------------------------------------===//
1181 //                        Thumb 2 Addressing Modes
1182 //===----------------------------------------------------------------------===//
1183 
1184 
1185 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1186                                             SDValue &Base, SDValue &OffImm) {
1187   // Match simple R + imm12 operands.
1188 
1189   // Base only.
1190   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1191       !CurDAG->isBaseWithConstantOffset(N)) {
1192     if (N.getOpcode() == ISD::FrameIndex) {
1193       // Match frame index.
1194       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1195       Base = CurDAG->getTargetFrameIndex(
1196           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1197       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1198       return true;
1199     }
1200 
1201     if (N.getOpcode() == ARMISD::Wrapper &&
1202         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1203         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1204         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1205       Base = N.getOperand(0);
1206       if (Base.getOpcode() == ISD::TargetConstantPool)
1207         return false;  // We want to select t2LDRpci instead.
1208     } else
1209       Base = N;
1210     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1211     return true;
1212   }
1213 
1214   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1215     if (SelectT2AddrModeImm8(N, Base, OffImm))
1216       // Let t2LDRi8 handle (R - imm8).
1217       return false;
1218 
1219     int RHSC = (int)RHS->getZExtValue();
1220     if (N.getOpcode() == ISD::SUB)
1221       RHSC = -RHSC;
1222 
1223     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1224       Base   = N.getOperand(0);
1225       if (Base.getOpcode() == ISD::FrameIndex) {
1226         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1227         Base = CurDAG->getTargetFrameIndex(
1228             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1229       }
1230       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1231       return true;
1232     }
1233   }
1234 
1235   // Base only.
1236   Base = N;
1237   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1238   return true;
1239 }
1240 
1241 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1242                                            SDValue &Base, SDValue &OffImm) {
1243   // Match simple R - imm8 operands.
1244   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1245       !CurDAG->isBaseWithConstantOffset(N))
1246     return false;
1247 
1248   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1249     int RHSC = (int)RHS->getSExtValue();
1250     if (N.getOpcode() == ISD::SUB)
1251       RHSC = -RHSC;
1252 
1253     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1254       Base = N.getOperand(0);
1255       if (Base.getOpcode() == ISD::FrameIndex) {
1256         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1257         Base = CurDAG->getTargetFrameIndex(
1258             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1259       }
1260       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1261       return true;
1262     }
1263   }
1264 
1265   return false;
1266 }
1267 
1268 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1269                                                  SDValue &OffImm){
1270   unsigned Opcode = Op->getOpcode();
1271   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1272     ? cast<LoadSDNode>(Op)->getAddressingMode()
1273     : cast<StoreSDNode>(Op)->getAddressingMode();
1274   int RHSC;
1275   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1276     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1277       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1278       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1279     return true;
1280   }
1281 
1282   return false;
1283 }
1284 
1285 template<unsigned Shift>
1286 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N,
1287                                            SDValue &Base, SDValue &OffImm) {
1288   if (N.getOpcode() == ISD::SUB ||
1289       CurDAG->isBaseWithConstantOffset(N)) {
1290     if (auto RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1291       int RHSC = (int)RHS->getZExtValue();
1292       if (N.getOpcode() == ISD::SUB)
1293         RHSC = -RHSC;
1294 
1295       if (isShiftedInt<7, Shift>(RHSC)) {
1296         Base = N.getOperand(0);
1297         if (Base.getOpcode() == ISD::FrameIndex) {
1298           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1299           Base = CurDAG->getTargetFrameIndex(
1300             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1301         }
1302         OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1303         return true;
1304       }
1305     }
1306   }
1307 
1308   // Base only.
1309   Base = N;
1310   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1311   return true;
1312 }
1313 
1314 template <unsigned Shift>
1315 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1316                                                  SDValue &OffImm) {
1317   return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1318 }
1319 
1320 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1321                                                  SDValue &OffImm,
1322                                                  unsigned Shift) {
1323   unsigned Opcode = Op->getOpcode();
1324   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1325                                ? cast<LoadSDNode>(Op)->getAddressingMode()
1326                                : cast<StoreSDNode>(Op)->getAddressingMode();
1327   int RHSC;
1328   if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits.
1329     OffImm =
1330         ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1331             ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1332             : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1333                                         MVT::i32);
1334     return true;
1335   }
1336   return false;
1337 }
1338 
1339 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1340                                             SDValue &Base,
1341                                             SDValue &OffReg, SDValue &ShImm) {
1342   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1343   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1344     return false;
1345 
1346   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1347   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1348     int RHSC = (int)RHS->getZExtValue();
1349     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1350       return false;
1351     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1352       return false;
1353   }
1354 
1355   // Look for (R + R) or (R + (R << [1,2,3])).
1356   unsigned ShAmt = 0;
1357   Base   = N.getOperand(0);
1358   OffReg = N.getOperand(1);
1359 
1360   // Swap if it is ((R << c) + R).
1361   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1362   if (ShOpcVal != ARM_AM::lsl) {
1363     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1364     if (ShOpcVal == ARM_AM::lsl)
1365       std::swap(Base, OffReg);
1366   }
1367 
1368   if (ShOpcVal == ARM_AM::lsl) {
1369     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1370     // it.
1371     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1372       ShAmt = Sh->getZExtValue();
1373       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1374         OffReg = OffReg.getOperand(0);
1375       else {
1376         ShAmt = 0;
1377       }
1378     }
1379   }
1380 
1381   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1382   // and use it in a shifted operand do so.
1383   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1384     unsigned PowerOfTwo = 0;
1385     SDValue NewMulConst;
1386     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1387       HandleSDNode Handle(OffReg);
1388       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1389       OffReg = Handle.getValue();
1390       ShAmt = PowerOfTwo;
1391     }
1392   }
1393 
1394   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1395 
1396   return true;
1397 }
1398 
1399 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1400                                                 SDValue &OffImm) {
1401   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1402   // instructions.
1403   Base = N;
1404   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1405 
1406   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1407     return true;
1408 
1409   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1410   if (!RHS)
1411     return true;
1412 
1413   uint32_t RHSC = (int)RHS->getZExtValue();
1414   if (RHSC > 1020 || RHSC % 4 != 0)
1415     return true;
1416 
1417   Base = N.getOperand(0);
1418   if (Base.getOpcode() == ISD::FrameIndex) {
1419     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1420     Base = CurDAG->getTargetFrameIndex(
1421         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1422   }
1423 
1424   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1425   return true;
1426 }
1427 
1428 //===--------------------------------------------------------------------===//
1429 
1430 /// getAL - Returns a ARMCC::AL immediate node.
1431 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1432   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1433 }
1434 
1435 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1436   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1437   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1438 }
1439 
1440 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1441   LoadSDNode *LD = cast<LoadSDNode>(N);
1442   ISD::MemIndexedMode AM = LD->getAddressingMode();
1443   if (AM == ISD::UNINDEXED)
1444     return false;
1445 
1446   EVT LoadedVT = LD->getMemoryVT();
1447   SDValue Offset, AMOpc;
1448   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1449   unsigned Opcode = 0;
1450   bool Match = false;
1451   if (LoadedVT == MVT::i32 && isPre &&
1452       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1453     Opcode = ARM::LDR_PRE_IMM;
1454     Match = true;
1455   } else if (LoadedVT == MVT::i32 && !isPre &&
1456       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1457     Opcode = ARM::LDR_POST_IMM;
1458     Match = true;
1459   } else if (LoadedVT == MVT::i32 &&
1460       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1461     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1462     Match = true;
1463 
1464   } else if (LoadedVT == MVT::i16 &&
1465              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1466     Match = true;
1467     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1468       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1469       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1470   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1471     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1472       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1473         Match = true;
1474         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1475       }
1476     } else {
1477       if (isPre &&
1478           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1479         Match = true;
1480         Opcode = ARM::LDRB_PRE_IMM;
1481       } else if (!isPre &&
1482                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1483         Match = true;
1484         Opcode = ARM::LDRB_POST_IMM;
1485       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1486         Match = true;
1487         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1488       }
1489     }
1490   }
1491 
1492   if (Match) {
1493     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1494       SDValue Chain = LD->getChain();
1495       SDValue Base = LD->getBasePtr();
1496       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1497                        CurDAG->getRegister(0, MVT::i32), Chain };
1498       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1499                                            MVT::Other, Ops);
1500       transferMemOperands(N, New);
1501       ReplaceNode(N, New);
1502       return true;
1503     } else {
1504       SDValue Chain = LD->getChain();
1505       SDValue Base = LD->getBasePtr();
1506       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1507                        CurDAG->getRegister(0, MVT::i32), Chain };
1508       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1509                                            MVT::Other, Ops);
1510       transferMemOperands(N, New);
1511       ReplaceNode(N, New);
1512       return true;
1513     }
1514   }
1515 
1516   return false;
1517 }
1518 
1519 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1520   LoadSDNode *LD = cast<LoadSDNode>(N);
1521   EVT LoadedVT = LD->getMemoryVT();
1522   ISD::MemIndexedMode AM = LD->getAddressingMode();
1523   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1524       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1525     return false;
1526 
1527   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1528   if (!COffs || COffs->getZExtValue() != 4)
1529     return false;
1530 
1531   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1532   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1533   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1534   // ISel.
1535   SDValue Chain = LD->getChain();
1536   SDValue Base = LD->getBasePtr();
1537   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1538                    CurDAG->getRegister(0, MVT::i32), Chain };
1539   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1540                                        MVT::i32, MVT::Other, Ops);
1541   transferMemOperands(N, New);
1542   ReplaceNode(N, New);
1543   return true;
1544 }
1545 
1546 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1547   LoadSDNode *LD = cast<LoadSDNode>(N);
1548   ISD::MemIndexedMode AM = LD->getAddressingMode();
1549   if (AM == ISD::UNINDEXED)
1550     return false;
1551 
1552   EVT LoadedVT = LD->getMemoryVT();
1553   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1554   SDValue Offset;
1555   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1556   unsigned Opcode = 0;
1557   bool Match = false;
1558   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1559     switch (LoadedVT.getSimpleVT().SimpleTy) {
1560     case MVT::i32:
1561       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1562       break;
1563     case MVT::i16:
1564       if (isSExtLd)
1565         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1566       else
1567         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1568       break;
1569     case MVT::i8:
1570     case MVT::i1:
1571       if (isSExtLd)
1572         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1573       else
1574         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1575       break;
1576     default:
1577       return false;
1578     }
1579     Match = true;
1580   }
1581 
1582   if (Match) {
1583     SDValue Chain = LD->getChain();
1584     SDValue Base = LD->getBasePtr();
1585     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1586                      CurDAG->getRegister(0, MVT::i32), Chain };
1587     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1588                                          MVT::Other, Ops);
1589     transferMemOperands(N, New);
1590     ReplaceNode(N, New);
1591     return true;
1592   }
1593 
1594   return false;
1595 }
1596 
1597 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1598   LoadSDNode *LD = cast<LoadSDNode>(N);
1599   ISD::MemIndexedMode AM = LD->getAddressingMode();
1600   if (AM == ISD::UNINDEXED)
1601     return false;
1602   EVT LoadedVT = LD->getMemoryVT();
1603   if (!LoadedVT.isVector())
1604     return false;
1605   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1606   SDValue Offset;
1607   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1608   unsigned Opcode = 0;
1609   unsigned Align = LD->getAlignment();
1610   bool IsLE = Subtarget->isLittle();
1611 
1612   if (Align >= 2 && LoadedVT == MVT::v4i16 &&
1613       SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) {
1614     if (isSExtLd)
1615       Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1616     else
1617       Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1618   } else if (LoadedVT == MVT::v8i8 &&
1619              SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1620     if (isSExtLd)
1621       Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1622     else
1623       Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1624   } else if (LoadedVT == MVT::v4i8 &&
1625              SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
1626     if (isSExtLd)
1627       Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1628     else
1629       Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1630   } else if (Align >= 4 &&
1631              (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) &&
1632              SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2))
1633     Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1634   else if (Align >= 2 &&
1635            (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) &&
1636            SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1))
1637     Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1638   else if ((IsLE || LoadedVT == MVT::v16i8) &&
1639            SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0))
1640     Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1641   else
1642     return false;
1643 
1644   SDValue Chain = LD->getChain();
1645   SDValue Base = LD->getBasePtr();
1646   SDValue Ops[] = {Base, Offset,
1647                    CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32),
1648                    CurDAG->getRegister(0, MVT::i32), Chain};
1649   SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0),
1650                                        MVT::i32, MVT::Other, Ops);
1651   transferMemOperands(N, New);
1652   ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1653   ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1654   ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1655   CurDAG->RemoveDeadNode(N);
1656   return true;
1657 }
1658 
1659 /// Form a GPRPair pseudo register from a pair of GPR regs.
1660 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1661   SDLoc dl(V0.getNode());
1662   SDValue RegClass =
1663     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1664   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1665   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1666   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1667   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1668 }
1669 
1670 /// Form a D register from a pair of S registers.
1671 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1672   SDLoc dl(V0.getNode());
1673   SDValue RegClass =
1674     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1675   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1676   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1677   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1678   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1679 }
1680 
1681 /// Form a quad register from a pair of D registers.
1682 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1683   SDLoc dl(V0.getNode());
1684   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1685                                                MVT::i32);
1686   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1687   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1688   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1689   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1690 }
1691 
1692 /// Form 4 consecutive D registers from a pair of Q registers.
1693 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1694   SDLoc dl(V0.getNode());
1695   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1696                                                MVT::i32);
1697   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1698   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1699   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1700   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1701 }
1702 
1703 /// Form 4 consecutive S registers.
1704 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1705                                    SDValue V2, SDValue V3) {
1706   SDLoc dl(V0.getNode());
1707   SDValue RegClass =
1708     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1709   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1710   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1711   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1712   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1713   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1714                                     V2, SubReg2, V3, SubReg3 };
1715   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1716 }
1717 
1718 /// Form 4 consecutive D registers.
1719 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1720                                    SDValue V2, SDValue V3) {
1721   SDLoc dl(V0.getNode());
1722   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1723                                                MVT::i32);
1724   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1725   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1726   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1727   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1728   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1729                                     V2, SubReg2, V3, SubReg3 };
1730   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1731 }
1732 
1733 /// Form 4 consecutive Q registers.
1734 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1735                                    SDValue V2, SDValue V3) {
1736   SDLoc dl(V0.getNode());
1737   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1738                                                MVT::i32);
1739   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1740   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1741   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1742   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1743   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1744                                     V2, SubReg2, V3, SubReg3 };
1745   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1746 }
1747 
1748 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1749 /// of a NEON VLD or VST instruction.  The supported values depend on the
1750 /// number of registers being loaded.
1751 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1752                                        unsigned NumVecs, bool is64BitVector) {
1753   unsigned NumRegs = NumVecs;
1754   if (!is64BitVector && NumVecs < 3)
1755     NumRegs *= 2;
1756 
1757   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1758   if (Alignment >= 32 && NumRegs == 4)
1759     Alignment = 32;
1760   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1761     Alignment = 16;
1762   else if (Alignment >= 8)
1763     Alignment = 8;
1764   else
1765     Alignment = 0;
1766 
1767   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1768 }
1769 
1770 static bool isVLDfixed(unsigned Opc)
1771 {
1772   switch (Opc) {
1773   default: return false;
1774   case ARM::VLD1d8wb_fixed : return true;
1775   case ARM::VLD1d16wb_fixed : return true;
1776   case ARM::VLD1d64Qwb_fixed : return true;
1777   case ARM::VLD1d32wb_fixed : return true;
1778   case ARM::VLD1d64wb_fixed : return true;
1779   case ARM::VLD1d64TPseudoWB_fixed : return true;
1780   case ARM::VLD1d64QPseudoWB_fixed : return true;
1781   case ARM::VLD1q8wb_fixed : return true;
1782   case ARM::VLD1q16wb_fixed : return true;
1783   case ARM::VLD1q32wb_fixed : return true;
1784   case ARM::VLD1q64wb_fixed : return true;
1785   case ARM::VLD1DUPd8wb_fixed : return true;
1786   case ARM::VLD1DUPd16wb_fixed : return true;
1787   case ARM::VLD1DUPd32wb_fixed : return true;
1788   case ARM::VLD1DUPq8wb_fixed : return true;
1789   case ARM::VLD1DUPq16wb_fixed : return true;
1790   case ARM::VLD1DUPq32wb_fixed : return true;
1791   case ARM::VLD2d8wb_fixed : return true;
1792   case ARM::VLD2d16wb_fixed : return true;
1793   case ARM::VLD2d32wb_fixed : return true;
1794   case ARM::VLD2q8PseudoWB_fixed : return true;
1795   case ARM::VLD2q16PseudoWB_fixed : return true;
1796   case ARM::VLD2q32PseudoWB_fixed : return true;
1797   case ARM::VLD2DUPd8wb_fixed : return true;
1798   case ARM::VLD2DUPd16wb_fixed : return true;
1799   case ARM::VLD2DUPd32wb_fixed : return true;
1800   }
1801 }
1802 
1803 static bool isVSTfixed(unsigned Opc)
1804 {
1805   switch (Opc) {
1806   default: return false;
1807   case ARM::VST1d8wb_fixed : return true;
1808   case ARM::VST1d16wb_fixed : return true;
1809   case ARM::VST1d32wb_fixed : return true;
1810   case ARM::VST1d64wb_fixed : return true;
1811   case ARM::VST1q8wb_fixed : return true;
1812   case ARM::VST1q16wb_fixed : return true;
1813   case ARM::VST1q32wb_fixed : return true;
1814   case ARM::VST1q64wb_fixed : return true;
1815   case ARM::VST1d64TPseudoWB_fixed : return true;
1816   case ARM::VST1d64QPseudoWB_fixed : return true;
1817   case ARM::VST2d8wb_fixed : return true;
1818   case ARM::VST2d16wb_fixed : return true;
1819   case ARM::VST2d32wb_fixed : return true;
1820   case ARM::VST2q8PseudoWB_fixed : return true;
1821   case ARM::VST2q16PseudoWB_fixed : return true;
1822   case ARM::VST2q32PseudoWB_fixed : return true;
1823   }
1824 }
1825 
1826 // Get the register stride update opcode of a VLD/VST instruction that
1827 // is otherwise equivalent to the given fixed stride updating instruction.
1828 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1829   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1830     && "Incorrect fixed stride updating instruction.");
1831   switch (Opc) {
1832   default: break;
1833   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1834   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1835   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1836   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1837   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1838   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1839   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1840   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1841   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1842   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1843   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1844   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1845   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1846   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1847   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1848   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1849   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1850   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1851 
1852   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1853   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1854   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1855   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1856   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1857   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1858   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1859   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1860   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1861   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1862 
1863   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1864   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1865   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1866   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1867   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1868   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1869 
1870   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1871   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1872   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1873   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1874   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1875   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1876 
1877   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1878   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1879   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1880   }
1881   return Opc; // If not one we handle, return it unchanged.
1882 }
1883 
1884 /// Returns true if the given increment is a Constant known to be equal to the
1885 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1886 /// be used.
1887 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1888   auto C = dyn_cast<ConstantSDNode>(Inc);
1889   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1890 }
1891 
1892 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1893                                 const uint16_t *DOpcodes,
1894                                 const uint16_t *QOpcodes0,
1895                                 const uint16_t *QOpcodes1) {
1896   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1897   SDLoc dl(N);
1898 
1899   SDValue MemAddr, Align;
1900   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1901                                    // nodes are not intrinsics.
1902   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1903   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1904     return;
1905 
1906   SDValue Chain = N->getOperand(0);
1907   EVT VT = N->getValueType(0);
1908   bool is64BitVector = VT.is64BitVector();
1909   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1910 
1911   unsigned OpcodeIndex;
1912   switch (VT.getSimpleVT().SimpleTy) {
1913   default: llvm_unreachable("unhandled vld type");
1914     // Double-register operations:
1915   case MVT::v8i8:  OpcodeIndex = 0; break;
1916   case MVT::v4f16:
1917   case MVT::v4i16: OpcodeIndex = 1; break;
1918   case MVT::v2f32:
1919   case MVT::v2i32: OpcodeIndex = 2; break;
1920   case MVT::v1i64: OpcodeIndex = 3; break;
1921     // Quad-register operations:
1922   case MVT::v16i8: OpcodeIndex = 0; break;
1923   case MVT::v8f16:
1924   case MVT::v8i16: OpcodeIndex = 1; break;
1925   case MVT::v4f32:
1926   case MVT::v4i32: OpcodeIndex = 2; break;
1927   case MVT::v2f64:
1928   case MVT::v2i64: OpcodeIndex = 3; break;
1929   }
1930 
1931   EVT ResTy;
1932   if (NumVecs == 1)
1933     ResTy = VT;
1934   else {
1935     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1936     if (!is64BitVector)
1937       ResTyElts *= 2;
1938     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1939   }
1940   std::vector<EVT> ResTys;
1941   ResTys.push_back(ResTy);
1942   if (isUpdating)
1943     ResTys.push_back(MVT::i32);
1944   ResTys.push_back(MVT::Other);
1945 
1946   SDValue Pred = getAL(CurDAG, dl);
1947   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1948   SDNode *VLd;
1949   SmallVector<SDValue, 7> Ops;
1950 
1951   // Double registers and VLD1/VLD2 quad registers are directly supported.
1952   if (is64BitVector || NumVecs <= 2) {
1953     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1954                     QOpcodes0[OpcodeIndex]);
1955     Ops.push_back(MemAddr);
1956     Ops.push_back(Align);
1957     if (isUpdating) {
1958       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1959       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1960       if (!IsImmUpdate) {
1961         // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1962         // check for the opcode rather than the number of vector elements.
1963         if (isVLDfixed(Opc))
1964           Opc = getVLDSTRegisterUpdateOpcode(Opc);
1965         Ops.push_back(Inc);
1966       // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1967       // the operands if not such an opcode.
1968       } else if (!isVLDfixed(Opc))
1969         Ops.push_back(Reg0);
1970     }
1971     Ops.push_back(Pred);
1972     Ops.push_back(Reg0);
1973     Ops.push_back(Chain);
1974     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1975 
1976   } else {
1977     // Otherwise, quad registers are loaded with two separate instructions,
1978     // where one loads the even registers and the other loads the odd registers.
1979     EVT AddrTy = MemAddr.getValueType();
1980 
1981     // Load the even subregs.  This is always an updating load, so that it
1982     // provides the address to the second load for the odd subregs.
1983     SDValue ImplDef =
1984       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1985     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1986     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1987                                           ResTy, AddrTy, MVT::Other, OpsA);
1988     Chain = SDValue(VLdA, 2);
1989 
1990     // Load the odd subregs.
1991     Ops.push_back(SDValue(VLdA, 1));
1992     Ops.push_back(Align);
1993     if (isUpdating) {
1994       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1995       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1996              "only constant post-increment update allowed for VLD3/4");
1997       (void)Inc;
1998       Ops.push_back(Reg0);
1999     }
2000     Ops.push_back(SDValue(VLdA, 0));
2001     Ops.push_back(Pred);
2002     Ops.push_back(Reg0);
2003     Ops.push_back(Chain);
2004     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2005   }
2006 
2007   // Transfer memoperands.
2008   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2009   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2010 
2011   if (NumVecs == 1) {
2012     ReplaceNode(N, VLd);
2013     return;
2014   }
2015 
2016   // Extract out the subregisters.
2017   SDValue SuperReg = SDValue(VLd, 0);
2018   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2019                     ARM::qsub_3 == ARM::qsub_0 + 3,
2020                 "Unexpected subreg numbering");
2021   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2022   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2023     ReplaceUses(SDValue(N, Vec),
2024                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2025   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2026   if (isUpdating)
2027     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2028   CurDAG->RemoveDeadNode(N);
2029 }
2030 
2031 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2032                                 const uint16_t *DOpcodes,
2033                                 const uint16_t *QOpcodes0,
2034                                 const uint16_t *QOpcodes1) {
2035   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2036   SDLoc dl(N);
2037 
2038   SDValue MemAddr, Align;
2039   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2040                                    // nodes are not intrinsics.
2041   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2042   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2043   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2044     return;
2045 
2046   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2047 
2048   SDValue Chain = N->getOperand(0);
2049   EVT VT = N->getOperand(Vec0Idx).getValueType();
2050   bool is64BitVector = VT.is64BitVector();
2051   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2052 
2053   unsigned OpcodeIndex;
2054   switch (VT.getSimpleVT().SimpleTy) {
2055   default: llvm_unreachable("unhandled vst type");
2056     // Double-register operations:
2057   case MVT::v8i8:  OpcodeIndex = 0; break;
2058   case MVT::v4f16:
2059   case MVT::v4i16: OpcodeIndex = 1; break;
2060   case MVT::v2f32:
2061   case MVT::v2i32: OpcodeIndex = 2; break;
2062   case MVT::v1i64: OpcodeIndex = 3; break;
2063     // Quad-register operations:
2064   case MVT::v16i8: OpcodeIndex = 0; break;
2065   case MVT::v8f16:
2066   case MVT::v8i16: OpcodeIndex = 1; break;
2067   case MVT::v4f32:
2068   case MVT::v4i32: OpcodeIndex = 2; break;
2069   case MVT::v2f64:
2070   case MVT::v2i64: OpcodeIndex = 3; break;
2071   }
2072 
2073   std::vector<EVT> ResTys;
2074   if (isUpdating)
2075     ResTys.push_back(MVT::i32);
2076   ResTys.push_back(MVT::Other);
2077 
2078   SDValue Pred = getAL(CurDAG, dl);
2079   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2080   SmallVector<SDValue, 7> Ops;
2081 
2082   // Double registers and VST1/VST2 quad registers are directly supported.
2083   if (is64BitVector || NumVecs <= 2) {
2084     SDValue SrcReg;
2085     if (NumVecs == 1) {
2086       SrcReg = N->getOperand(Vec0Idx);
2087     } else if (is64BitVector) {
2088       // Form a REG_SEQUENCE to force register allocation.
2089       SDValue V0 = N->getOperand(Vec0Idx + 0);
2090       SDValue V1 = N->getOperand(Vec0Idx + 1);
2091       if (NumVecs == 2)
2092         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2093       else {
2094         SDValue V2 = N->getOperand(Vec0Idx + 2);
2095         // If it's a vst3, form a quad D-register and leave the last part as
2096         // an undef.
2097         SDValue V3 = (NumVecs == 3)
2098           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2099           : N->getOperand(Vec0Idx + 3);
2100         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2101       }
2102     } else {
2103       // Form a QQ register.
2104       SDValue Q0 = N->getOperand(Vec0Idx);
2105       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2106       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2107     }
2108 
2109     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2110                     QOpcodes0[OpcodeIndex]);
2111     Ops.push_back(MemAddr);
2112     Ops.push_back(Align);
2113     if (isUpdating) {
2114       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2115       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2116       if (!IsImmUpdate) {
2117         // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2118         // check for the opcode rather than the number of vector elements.
2119         if (isVSTfixed(Opc))
2120           Opc = getVLDSTRegisterUpdateOpcode(Opc);
2121         Ops.push_back(Inc);
2122       }
2123       // VST1/VST2 fixed increment does not need Reg0 so only include it in
2124       // the operands if not such an opcode.
2125       else if (!isVSTfixed(Opc))
2126         Ops.push_back(Reg0);
2127     }
2128     Ops.push_back(SrcReg);
2129     Ops.push_back(Pred);
2130     Ops.push_back(Reg0);
2131     Ops.push_back(Chain);
2132     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2133 
2134     // Transfer memoperands.
2135     CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2136 
2137     ReplaceNode(N, VSt);
2138     return;
2139   }
2140 
2141   // Otherwise, quad registers are stored with two separate instructions,
2142   // where one stores the even registers and the other stores the odd registers.
2143 
2144   // Form the QQQQ REG_SEQUENCE.
2145   SDValue V0 = N->getOperand(Vec0Idx + 0);
2146   SDValue V1 = N->getOperand(Vec0Idx + 1);
2147   SDValue V2 = N->getOperand(Vec0Idx + 2);
2148   SDValue V3 = (NumVecs == 3)
2149     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2150     : N->getOperand(Vec0Idx + 3);
2151   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2152 
2153   // Store the even D registers.  This is always an updating store, so that it
2154   // provides the address to the second store for the odd subregs.
2155   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2156   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2157                                         MemAddr.getValueType(),
2158                                         MVT::Other, OpsA);
2159   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2160   Chain = SDValue(VStA, 1);
2161 
2162   // Store the odd D registers.
2163   Ops.push_back(SDValue(VStA, 0));
2164   Ops.push_back(Align);
2165   if (isUpdating) {
2166     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2167     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2168            "only constant post-increment update allowed for VST3/4");
2169     (void)Inc;
2170     Ops.push_back(Reg0);
2171   }
2172   Ops.push_back(RegSeq);
2173   Ops.push_back(Pred);
2174   Ops.push_back(Reg0);
2175   Ops.push_back(Chain);
2176   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2177                                         Ops);
2178   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2179   ReplaceNode(N, VStB);
2180 }
2181 
2182 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2183                                       unsigned NumVecs,
2184                                       const uint16_t *DOpcodes,
2185                                       const uint16_t *QOpcodes) {
2186   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2187   SDLoc dl(N);
2188 
2189   SDValue MemAddr, Align;
2190   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2191                                    // nodes are not intrinsics.
2192   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2193   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2194   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2195     return;
2196 
2197   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2198 
2199   SDValue Chain = N->getOperand(0);
2200   unsigned Lane =
2201     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2202   EVT VT = N->getOperand(Vec0Idx).getValueType();
2203   bool is64BitVector = VT.is64BitVector();
2204 
2205   unsigned Alignment = 0;
2206   if (NumVecs != 3) {
2207     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2208     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2209     if (Alignment > NumBytes)
2210       Alignment = NumBytes;
2211     if (Alignment < 8 && Alignment < NumBytes)
2212       Alignment = 0;
2213     // Alignment must be a power of two; make sure of that.
2214     Alignment = (Alignment & -Alignment);
2215     if (Alignment == 1)
2216       Alignment = 0;
2217   }
2218   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2219 
2220   unsigned OpcodeIndex;
2221   switch (VT.getSimpleVT().SimpleTy) {
2222   default: llvm_unreachable("unhandled vld/vst lane type");
2223     // Double-register operations:
2224   case MVT::v8i8:  OpcodeIndex = 0; break;
2225   case MVT::v4f16:
2226   case MVT::v4i16: OpcodeIndex = 1; break;
2227   case MVT::v2f32:
2228   case MVT::v2i32: OpcodeIndex = 2; break;
2229     // Quad-register operations:
2230   case MVT::v8f16:
2231   case MVT::v8i16: OpcodeIndex = 0; break;
2232   case MVT::v4f32:
2233   case MVT::v4i32: OpcodeIndex = 1; break;
2234   }
2235 
2236   std::vector<EVT> ResTys;
2237   if (IsLoad) {
2238     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2239     if (!is64BitVector)
2240       ResTyElts *= 2;
2241     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2242                                       MVT::i64, ResTyElts));
2243   }
2244   if (isUpdating)
2245     ResTys.push_back(MVT::i32);
2246   ResTys.push_back(MVT::Other);
2247 
2248   SDValue Pred = getAL(CurDAG, dl);
2249   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2250 
2251   SmallVector<SDValue, 8> Ops;
2252   Ops.push_back(MemAddr);
2253   Ops.push_back(Align);
2254   if (isUpdating) {
2255     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2256     bool IsImmUpdate =
2257         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2258     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2259   }
2260 
2261   SDValue SuperReg;
2262   SDValue V0 = N->getOperand(Vec0Idx + 0);
2263   SDValue V1 = N->getOperand(Vec0Idx + 1);
2264   if (NumVecs == 2) {
2265     if (is64BitVector)
2266       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2267     else
2268       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2269   } else {
2270     SDValue V2 = N->getOperand(Vec0Idx + 2);
2271     SDValue V3 = (NumVecs == 3)
2272       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2273       : N->getOperand(Vec0Idx + 3);
2274     if (is64BitVector)
2275       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2276     else
2277       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2278   }
2279   Ops.push_back(SuperReg);
2280   Ops.push_back(getI32Imm(Lane, dl));
2281   Ops.push_back(Pred);
2282   Ops.push_back(Reg0);
2283   Ops.push_back(Chain);
2284 
2285   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2286                                   QOpcodes[OpcodeIndex]);
2287   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2288   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2289   if (!IsLoad) {
2290     ReplaceNode(N, VLdLn);
2291     return;
2292   }
2293 
2294   // Extract the subregisters.
2295   SuperReg = SDValue(VLdLn, 0);
2296   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2297                     ARM::qsub_3 == ARM::qsub_0 + 3,
2298                 "Unexpected subreg numbering");
2299   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2300   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2301     ReplaceUses(SDValue(N, Vec),
2302                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2303   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2304   if (isUpdating)
2305     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2306   CurDAG->RemoveDeadNode(N);
2307 }
2308 
2309 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2310                                    bool isUpdating, unsigned NumVecs,
2311                                    const uint16_t *DOpcodes,
2312                                    const uint16_t *QOpcodes0,
2313                                    const uint16_t *QOpcodes1) {
2314   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2315   SDLoc dl(N);
2316 
2317   SDValue MemAddr, Align;
2318   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2319   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2320     return;
2321 
2322   SDValue Chain = N->getOperand(0);
2323   EVT VT = N->getValueType(0);
2324   bool is64BitVector = VT.is64BitVector();
2325 
2326   unsigned Alignment = 0;
2327   if (NumVecs != 3) {
2328     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2329     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2330     if (Alignment > NumBytes)
2331       Alignment = NumBytes;
2332     if (Alignment < 8 && Alignment < NumBytes)
2333       Alignment = 0;
2334     // Alignment must be a power of two; make sure of that.
2335     Alignment = (Alignment & -Alignment);
2336     if (Alignment == 1)
2337       Alignment = 0;
2338   }
2339   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2340 
2341   unsigned OpcodeIndex;
2342   switch (VT.getSimpleVT().SimpleTy) {
2343   default: llvm_unreachable("unhandled vld-dup type");
2344   case MVT::v8i8:
2345   case MVT::v16i8: OpcodeIndex = 0; break;
2346   case MVT::v4i16:
2347   case MVT::v8i16:
2348   case MVT::v4f16:
2349   case MVT::v8f16:
2350                   OpcodeIndex = 1; break;
2351   case MVT::v2f32:
2352   case MVT::v2i32:
2353   case MVT::v4f32:
2354   case MVT::v4i32: OpcodeIndex = 2; break;
2355   case MVT::v1f64:
2356   case MVT::v1i64: OpcodeIndex = 3; break;
2357   }
2358 
2359   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2360   if (!is64BitVector)
2361     ResTyElts *= 2;
2362   EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2363 
2364   std::vector<EVT> ResTys;
2365   ResTys.push_back(ResTy);
2366   if (isUpdating)
2367     ResTys.push_back(MVT::i32);
2368   ResTys.push_back(MVT::Other);
2369 
2370   SDValue Pred = getAL(CurDAG, dl);
2371   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2372 
2373   SDNode *VLdDup;
2374   if (is64BitVector || NumVecs == 1) {
2375     SmallVector<SDValue, 6> Ops;
2376     Ops.push_back(MemAddr);
2377     Ops.push_back(Align);
2378     unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2379                                    QOpcodes0[OpcodeIndex];
2380     if (isUpdating) {
2381       // fixed-stride update instructions don't have an explicit writeback
2382       // operand. It's implicit in the opcode itself.
2383       SDValue Inc = N->getOperand(2);
2384       bool IsImmUpdate =
2385           isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2386       if (NumVecs <= 2 && !IsImmUpdate)
2387         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2388       if (!IsImmUpdate)
2389         Ops.push_back(Inc);
2390       // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2391       else if (NumVecs > 2)
2392         Ops.push_back(Reg0);
2393     }
2394     Ops.push_back(Pred);
2395     Ops.push_back(Reg0);
2396     Ops.push_back(Chain);
2397     VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2398   } else if (NumVecs == 2) {
2399     const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2400     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2401                                           dl, ResTys, OpsA);
2402 
2403     Chain = SDValue(VLdA, 1);
2404     const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2405     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2406   } else {
2407     SDValue ImplDef =
2408       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2409     const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2410     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2411                                           dl, ResTys, OpsA);
2412 
2413     SDValue SuperReg = SDValue(VLdA, 0);
2414     Chain = SDValue(VLdA, 1);
2415     const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2416     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2417   }
2418 
2419   // Transfer memoperands.
2420   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2421   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2422 
2423   // Extract the subregisters.
2424   if (NumVecs == 1) {
2425     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2426   } else {
2427     SDValue SuperReg = SDValue(VLdDup, 0);
2428     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2429     unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2430     for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2431       ReplaceUses(SDValue(N, Vec),
2432                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2433     }
2434   }
2435   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2436   if (isUpdating)
2437     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2438   CurDAG->RemoveDeadNode(N);
2439 }
2440 
2441 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2442   if (!Subtarget->hasV6T2Ops())
2443     return false;
2444 
2445   unsigned Opc = isSigned
2446     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2447     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2448   SDLoc dl(N);
2449 
2450   // For unsigned extracts, check for a shift right and mask
2451   unsigned And_imm = 0;
2452   if (N->getOpcode() == ISD::AND) {
2453     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2454 
2455       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2456       if (And_imm & (And_imm + 1))
2457         return false;
2458 
2459       unsigned Srl_imm = 0;
2460       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2461                                 Srl_imm)) {
2462         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2463 
2464         // Mask off the unnecessary bits of the AND immediate; normally
2465         // DAGCombine will do this, but that might not happen if
2466         // targetShrinkDemandedConstant chooses a different immediate.
2467         And_imm &= -1U >> Srl_imm;
2468 
2469         // Note: The width operand is encoded as width-1.
2470         unsigned Width = countTrailingOnes(And_imm) - 1;
2471         unsigned LSB = Srl_imm;
2472 
2473         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2474 
2475         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2476           // It's cheaper to use a right shift to extract the top bits.
2477           if (Subtarget->isThumb()) {
2478             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2479             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2480                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2481                               getAL(CurDAG, dl), Reg0, Reg0 };
2482             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2483             return true;
2484           }
2485 
2486           // ARM models shift instructions as MOVsi with shifter operand.
2487           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2488           SDValue ShOpc =
2489             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2490                                       MVT::i32);
2491           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2492                             getAL(CurDAG, dl), Reg0, Reg0 };
2493           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2494           return true;
2495         }
2496 
2497         assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2498         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2499                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2500                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2501                           getAL(CurDAG, dl), Reg0 };
2502         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2503         return true;
2504       }
2505     }
2506     return false;
2507   }
2508 
2509   // Otherwise, we're looking for a shift of a shift
2510   unsigned Shl_imm = 0;
2511   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2512     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2513     unsigned Srl_imm = 0;
2514     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2515       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2516       // Note: The width operand is encoded as width-1.
2517       unsigned Width = 32 - Srl_imm - 1;
2518       int LSB = Srl_imm - Shl_imm;
2519       if (LSB < 0)
2520         return false;
2521       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2522       assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2523       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2524                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2525                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2526                         getAL(CurDAG, dl), Reg0 };
2527       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2528       return true;
2529     }
2530   }
2531 
2532   // Or we are looking for a shift of an and, with a mask operand
2533   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2534       isShiftedMask_32(And_imm)) {
2535     unsigned Srl_imm = 0;
2536     unsigned LSB = countTrailingZeros(And_imm);
2537     // Shift must be the same as the ands lsb
2538     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2539       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2540       unsigned MSB = 31 - countLeadingZeros(And_imm);
2541       // Note: The width operand is encoded as width-1.
2542       unsigned Width = MSB - LSB;
2543       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2544       assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2545       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2546                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2547                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2548                         getAL(CurDAG, dl), Reg0 };
2549       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2550       return true;
2551     }
2552   }
2553 
2554   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2555     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2556     unsigned LSB = 0;
2557     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2558         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2559       return false;
2560 
2561     if (LSB + Width > 32)
2562       return false;
2563 
2564     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2565     assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2566     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2567                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2568                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2569                       getAL(CurDAG, dl), Reg0 };
2570     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2571     return true;
2572   }
2573 
2574   return false;
2575 }
2576 
2577 /// Target-specific DAG combining for ISD::XOR.
2578 /// Target-independent combining lowers SELECT_CC nodes of the form
2579 /// select_cc setg[ge] X,  0,  X, -X
2580 /// select_cc setgt    X, -1,  X, -X
2581 /// select_cc setl[te] X,  0, -X,  X
2582 /// select_cc setlt    X,  1, -X,  X
2583 /// which represent Integer ABS into:
2584 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2585 /// ARM instruction selection detects the latter and matches it to
2586 /// ARM::ABS or ARM::t2ABS machine node.
2587 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2588   SDValue XORSrc0 = N->getOperand(0);
2589   SDValue XORSrc1 = N->getOperand(1);
2590   EVT VT = N->getValueType(0);
2591 
2592   if (Subtarget->isThumb1Only())
2593     return false;
2594 
2595   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2596     return false;
2597 
2598   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2599   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2600   SDValue SRASrc0 = XORSrc1.getOperand(0);
2601   SDValue SRASrc1 = XORSrc1.getOperand(1);
2602   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2603   EVT XType = SRASrc0.getValueType();
2604   unsigned Size = XType.getSizeInBits() - 1;
2605 
2606   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2607       XType.isInteger() && SRAConstant != nullptr &&
2608       Size == SRAConstant->getZExtValue()) {
2609     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2610     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2611     return true;
2612   }
2613 
2614   return false;
2615 }
2616 
2617 /// We've got special pseudo-instructions for these
2618 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2619   unsigned Opcode;
2620   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2621   if (MemTy == MVT::i8)
2622     Opcode = ARM::CMP_SWAP_8;
2623   else if (MemTy == MVT::i16)
2624     Opcode = ARM::CMP_SWAP_16;
2625   else if (MemTy == MVT::i32)
2626     Opcode = ARM::CMP_SWAP_32;
2627   else
2628     llvm_unreachable("Unknown AtomicCmpSwap type");
2629 
2630   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2631                    N->getOperand(0)};
2632   SDNode *CmpSwap = CurDAG->getMachineNode(
2633       Opcode, SDLoc(N),
2634       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2635 
2636   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2637   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2638 
2639   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2640   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2641   CurDAG->RemoveDeadNode(N);
2642 }
2643 
2644 static Optional<std::pair<unsigned, unsigned>>
2645 getContiguousRangeOfSetBits(const APInt &A) {
2646   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2647   unsigned LastOne = A.countTrailingZeros();
2648   if (A.countPopulation() != (FirstOne - LastOne + 1))
2649     return Optional<std::pair<unsigned,unsigned>>();
2650   return std::make_pair(FirstOne, LastOne);
2651 }
2652 
2653 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2654   assert(N->getOpcode() == ARMISD::CMPZ);
2655   SwitchEQNEToPLMI = false;
2656 
2657   if (!Subtarget->isThumb())
2658     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2659     // LSR don't exist as standalone instructions - they need the barrel shifter.
2660     return;
2661 
2662   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2663   SDValue And = N->getOperand(0);
2664   if (!And->hasOneUse())
2665     return;
2666 
2667   SDValue Zero = N->getOperand(1);
2668   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2669       And->getOpcode() != ISD::AND)
2670     return;
2671   SDValue X = And.getOperand(0);
2672   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2673 
2674   if (!C)
2675     return;
2676   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2677   if (!Range)
2678     return;
2679 
2680   // There are several ways to lower this:
2681   SDNode *NewN;
2682   SDLoc dl(N);
2683 
2684   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2685     if (Subtarget->isThumb2()) {
2686       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2687       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2688                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2689                         CurDAG->getRegister(0, MVT::i32) };
2690       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2691     } else {
2692       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2693                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2694                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2695       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2696     }
2697   };
2698 
2699   if (Range->second == 0) {
2700     //  1. Mask includes the LSB -> Simply shift the top N bits off
2701     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2702     ReplaceNode(And.getNode(), NewN);
2703   } else if (Range->first == 31) {
2704     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2705     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2706     ReplaceNode(And.getNode(), NewN);
2707   } else if (Range->first == Range->second) {
2708     //  3. Only one bit is set. We can shift this into the sign bit and use a
2709     //     PL/MI comparison.
2710     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2711     ReplaceNode(And.getNode(), NewN);
2712 
2713     SwitchEQNEToPLMI = true;
2714   } else if (!Subtarget->hasV6T2Ops()) {
2715     //  4. Do a double shift to clear bottom and top bits, but only in
2716     //     thumb-1 mode as in thumb-2 we can use UBFX.
2717     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2718     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2719                      Range->second + (31 - Range->first));
2720     ReplaceNode(And.getNode(), NewN);
2721   }
2722 
2723 }
2724 
2725 void ARMDAGToDAGISel::Select(SDNode *N) {
2726   SDLoc dl(N);
2727 
2728   if (N->isMachineOpcode()) {
2729     N->setNodeId(-1);
2730     return;   // Already selected.
2731   }
2732 
2733   switch (N->getOpcode()) {
2734   default: break;
2735   case ISD::STORE: {
2736     // For Thumb1, match an sp-relative store in C++. This is a little
2737     // unfortunate, but I don't think I can make the chain check work
2738     // otherwise.  (The chain of the store has to be the same as the chain
2739     // of the CopyFromReg, or else we can't replace the CopyFromReg with
2740     // a direct reference to "SP".)
2741     //
2742     // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2743     // a different addressing mode from other four-byte stores.
2744     //
2745     // This pattern usually comes up with call arguments.
2746     StoreSDNode *ST = cast<StoreSDNode>(N);
2747     SDValue Ptr = ST->getBasePtr();
2748     if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
2749       int RHSC = 0;
2750       if (Ptr.getOpcode() == ISD::ADD &&
2751           isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
2752         Ptr = Ptr.getOperand(0);
2753 
2754       if (Ptr.getOpcode() == ISD::CopyFromReg &&
2755           cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
2756           Ptr.getOperand(0) == ST->getChain()) {
2757         SDValue Ops[] = {ST->getValue(),
2758                          CurDAG->getRegister(ARM::SP, MVT::i32),
2759                          CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
2760                          getAL(CurDAG, dl),
2761                          CurDAG->getRegister(0, MVT::i32),
2762                          ST->getChain()};
2763         MachineSDNode *ResNode =
2764             CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
2765         MachineMemOperand *MemOp = ST->getMemOperand();
2766         CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2767         ReplaceNode(N, ResNode);
2768         return;
2769       }
2770     }
2771     break;
2772   }
2773   case ISD::WRITE_REGISTER:
2774     if (tryWriteRegister(N))
2775       return;
2776     break;
2777   case ISD::READ_REGISTER:
2778     if (tryReadRegister(N))
2779       return;
2780     break;
2781   case ISD::INLINEASM:
2782   case ISD::INLINEASM_BR:
2783     if (tryInlineAsm(N))
2784       return;
2785     break;
2786   case ISD::XOR:
2787     // Select special operations if XOR node forms integer ABS pattern
2788     if (tryABSOp(N))
2789       return;
2790     // Other cases are autogenerated.
2791     break;
2792   case ISD::Constant: {
2793     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2794     // If we can't materialize the constant we need to use a literal pool
2795     if (ConstantMaterializationCost(Val) > 2) {
2796       SDValue CPIdx = CurDAG->getTargetConstantPool(
2797           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2798           TLI->getPointerTy(CurDAG->getDataLayout()));
2799 
2800       SDNode *ResNode;
2801       if (Subtarget->isThumb()) {
2802         SDValue Ops[] = {
2803           CPIdx,
2804           getAL(CurDAG, dl),
2805           CurDAG->getRegister(0, MVT::i32),
2806           CurDAG->getEntryNode()
2807         };
2808         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2809                                          Ops);
2810       } else {
2811         SDValue Ops[] = {
2812           CPIdx,
2813           CurDAG->getTargetConstant(0, dl, MVT::i32),
2814           getAL(CurDAG, dl),
2815           CurDAG->getRegister(0, MVT::i32),
2816           CurDAG->getEntryNode()
2817         };
2818         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2819                                          Ops);
2820       }
2821       // Annotate the Node with memory operand information so that MachineInstr
2822       // queries work properly. This e.g. gives the register allocation the
2823       // required information for rematerialization.
2824       MachineFunction& MF = CurDAG->getMachineFunction();
2825       MachineMemOperand *MemOp =
2826           MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2827                                   MachineMemOperand::MOLoad, 4, 4);
2828 
2829       CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2830 
2831       ReplaceNode(N, ResNode);
2832       return;
2833     }
2834 
2835     // Other cases are autogenerated.
2836     break;
2837   }
2838   case ISD::FrameIndex: {
2839     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2840     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2841     SDValue TFI = CurDAG->getTargetFrameIndex(
2842         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2843     if (Subtarget->isThumb1Only()) {
2844       // Set the alignment of the frame object to 4, to avoid having to generate
2845       // more than one ADD
2846       MachineFrameInfo &MFI = MF->getFrameInfo();
2847       if (MFI.getObjectAlignment(FI) < 4)
2848         MFI.setObjectAlignment(FI, 4);
2849       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2850                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2851       return;
2852     } else {
2853       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2854                       ARM::t2ADDri : ARM::ADDri);
2855       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2856                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2857                         CurDAG->getRegister(0, MVT::i32) };
2858       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2859       return;
2860     }
2861   }
2862   case ISD::SRL:
2863     if (tryV6T2BitfieldExtractOp(N, false))
2864       return;
2865     break;
2866   case ISD::SIGN_EXTEND_INREG:
2867   case ISD::SRA:
2868     if (tryV6T2BitfieldExtractOp(N, true))
2869       return;
2870     break;
2871   case ISD::MUL:
2872     if (Subtarget->isThumb1Only())
2873       break;
2874     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2875       unsigned RHSV = C->getZExtValue();
2876       if (!RHSV) break;
2877       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2878         unsigned ShImm = Log2_32(RHSV-1);
2879         if (ShImm >= 32)
2880           break;
2881         SDValue V = N->getOperand(0);
2882         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2883         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2884         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2885         if (Subtarget->isThumb()) {
2886           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2887           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2888           return;
2889         } else {
2890           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2891                             Reg0 };
2892           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2893           return;
2894         }
2895       }
2896       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2897         unsigned ShImm = Log2_32(RHSV+1);
2898         if (ShImm >= 32)
2899           break;
2900         SDValue V = N->getOperand(0);
2901         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2902         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2903         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2904         if (Subtarget->isThumb()) {
2905           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2906           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2907           return;
2908         } else {
2909           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2910                             Reg0 };
2911           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2912           return;
2913         }
2914       }
2915     }
2916     break;
2917   case ISD::AND: {
2918     // Check for unsigned bitfield extract
2919     if (tryV6T2BitfieldExtractOp(N, false))
2920       return;
2921 
2922     // If an immediate is used in an AND node, it is possible that the immediate
2923     // can be more optimally materialized when negated. If this is the case we
2924     // can negate the immediate and use a BIC instead.
2925     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2926     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2927       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2928 
2929       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2930       // immediate can be negated and fit in the immediate operand of
2931       // a t2BIC, don't do any manual transform here as this can be
2932       // handled by the generic ISel machinery.
2933       bool PreferImmediateEncoding =
2934         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2935       if (!PreferImmediateEncoding &&
2936           ConstantMaterializationCost(Imm) >
2937               ConstantMaterializationCost(~Imm)) {
2938         // The current immediate costs more to materialize than a negated
2939         // immediate, so negate the immediate and use a BIC.
2940         SDValue NewImm =
2941           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2942         // If the new constant didn't exist before, reposition it in the topological
2943         // ordering so it is just before N. Otherwise, don't touch its location.
2944         if (NewImm->getNodeId() == -1)
2945           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2946 
2947         if (!Subtarget->hasThumb2()) {
2948           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2949                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2950                            CurDAG->getRegister(0, MVT::i32)};
2951           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2952           return;
2953         } else {
2954           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2955                            CurDAG->getRegister(0, MVT::i32),
2956                            CurDAG->getRegister(0, MVT::i32)};
2957           ReplaceNode(N,
2958                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2959           return;
2960         }
2961       }
2962     }
2963 
2964     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2965     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2966     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2967     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2968     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2969     EVT VT = N->getValueType(0);
2970     if (VT != MVT::i32)
2971       break;
2972     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2973       ? ARM::t2MOVTi16
2974       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2975     if (!Opc)
2976       break;
2977     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2978     N1C = dyn_cast<ConstantSDNode>(N1);
2979     if (!N1C)
2980       break;
2981     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2982       SDValue N2 = N0.getOperand(1);
2983       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2984       if (!N2C)
2985         break;
2986       unsigned N1CVal = N1C->getZExtValue();
2987       unsigned N2CVal = N2C->getZExtValue();
2988       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2989           (N1CVal & 0xffffU) == 0xffffU &&
2990           (N2CVal & 0xffffU) == 0x0U) {
2991         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2992                                                   dl, MVT::i32);
2993         SDValue Ops[] = { N0.getOperand(0), Imm16,
2994                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2995         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2996         return;
2997       }
2998     }
2999 
3000     break;
3001   }
3002   case ARMISD::UMAAL: {
3003     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3004     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3005                       N->getOperand(2), N->getOperand(3),
3006                       getAL(CurDAG, dl),
3007                       CurDAG->getRegister(0, MVT::i32) };
3008     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3009     return;
3010   }
3011   case ARMISD::UMLAL:{
3012     if (Subtarget->isThumb()) {
3013       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3014                         N->getOperand(3), getAL(CurDAG, dl),
3015                         CurDAG->getRegister(0, MVT::i32)};
3016       ReplaceNode(
3017           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3018       return;
3019     }else{
3020       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3021                         N->getOperand(3), getAL(CurDAG, dl),
3022                         CurDAG->getRegister(0, MVT::i32),
3023                         CurDAG->getRegister(0, MVT::i32) };
3024       ReplaceNode(N, CurDAG->getMachineNode(
3025                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3026                          MVT::i32, MVT::i32, Ops));
3027       return;
3028     }
3029   }
3030   case ARMISD::SMLAL:{
3031     if (Subtarget->isThumb()) {
3032       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3033                         N->getOperand(3), getAL(CurDAG, dl),
3034                         CurDAG->getRegister(0, MVT::i32)};
3035       ReplaceNode(
3036           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3037       return;
3038     }else{
3039       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3040                         N->getOperand(3), getAL(CurDAG, dl),
3041                         CurDAG->getRegister(0, MVT::i32),
3042                         CurDAG->getRegister(0, MVT::i32) };
3043       ReplaceNode(N, CurDAG->getMachineNode(
3044                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3045                          MVT::i32, MVT::i32, Ops));
3046       return;
3047     }
3048   }
3049   case ARMISD::SUBE: {
3050     if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3051       break;
3052     // Look for a pattern to match SMMLS
3053     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3054     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3055         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3056         !SDValue(N, 1).use_empty())
3057       break;
3058 
3059     if (Subtarget->isThumb())
3060       assert(Subtarget->hasThumb2() &&
3061              "This pattern should not be generated for Thumb");
3062 
3063     SDValue SmulLoHi = N->getOperand(1);
3064     SDValue Subc = N->getOperand(2);
3065     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3066 
3067     if (!Zero || Zero->getZExtValue() != 0 ||
3068         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3069         N->getOperand(1) != SmulLoHi.getValue(1) ||
3070         N->getOperand(2) != Subc.getValue(1))
3071       break;
3072 
3073     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3074     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3075                       N->getOperand(0), getAL(CurDAG, dl),
3076                       CurDAG->getRegister(0, MVT::i32) };
3077     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3078     return;
3079   }
3080   case ISD::LOAD: {
3081     if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3082       return;
3083     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3084       if (tryT2IndexedLoad(N))
3085         return;
3086     } else if (Subtarget->isThumb()) {
3087       if (tryT1IndexedLoad(N))
3088         return;
3089     } else if (tryARMIndexedLoad(N))
3090       return;
3091     // Other cases are autogenerated.
3092     break;
3093   }
3094   case ARMISD::WLS:
3095   case ARMISD::LE: {
3096     SDValue Ops[] = { N->getOperand(1),
3097                       N->getOperand(2),
3098                       N->getOperand(0) };
3099     unsigned Opc = N->getOpcode() == ARMISD::WLS ?
3100       ARM::t2WhileLoopStart : ARM::t2LoopEnd;
3101     SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
3102     ReplaceUses(N, New);
3103     CurDAG->RemoveDeadNode(N);
3104     return;
3105   }
3106   case ARMISD::LOOP_DEC: {
3107     SDValue Ops[] = { N->getOperand(1),
3108                       N->getOperand(2),
3109                       N->getOperand(0) };
3110     SDNode *Dec =
3111       CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3112                              CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
3113     ReplaceUses(N, Dec);
3114     CurDAG->RemoveDeadNode(N);
3115     return;
3116   }
3117   case ARMISD::BRCOND: {
3118     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3119     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3120     // Pattern complexity = 6  cost = 1  size = 0
3121 
3122     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3123     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3124     // Pattern complexity = 6  cost = 1  size = 0
3125 
3126     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3127     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3128     // Pattern complexity = 6  cost = 1  size = 0
3129 
3130     unsigned Opc = Subtarget->isThumb() ?
3131       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3132     SDValue Chain = N->getOperand(0);
3133     SDValue N1 = N->getOperand(1);
3134     SDValue N2 = N->getOperand(2);
3135     SDValue N3 = N->getOperand(3);
3136     SDValue InFlag = N->getOperand(4);
3137     assert(N1.getOpcode() == ISD::BasicBlock);
3138     assert(N2.getOpcode() == ISD::Constant);
3139     assert(N3.getOpcode() == ISD::Register);
3140 
3141     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3142 
3143     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3144       if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3145         SDValue Int = InFlag.getOperand(0);
3146         uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3147 
3148         // Handle low-overhead loops.
3149         if (ID == Intrinsic::loop_decrement_reg) {
3150           SDValue Elements = Int.getOperand(2);
3151           SDValue Size = CurDAG->getTargetConstant(
3152             cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3153                                  MVT::i32);
3154 
3155           SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3156           SDNode *LoopDec =
3157             CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3158                                    CurDAG->getVTList(MVT::i32, MVT::Other),
3159                                    Args);
3160           ReplaceUses(Int.getNode(), LoopDec);
3161 
3162           SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3163           SDNode *LoopEnd =
3164             CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3165 
3166           ReplaceUses(N, LoopEnd);
3167           CurDAG->RemoveDeadNode(N);
3168           CurDAG->RemoveDeadNode(InFlag.getNode());
3169           CurDAG->RemoveDeadNode(Int.getNode());
3170           return;
3171         }
3172       }
3173 
3174       bool SwitchEQNEToPLMI;
3175       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3176       InFlag = N->getOperand(4);
3177 
3178       if (SwitchEQNEToPLMI) {
3179         switch ((ARMCC::CondCodes)CC) {
3180         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3181         case ARMCC::NE:
3182           CC = (unsigned)ARMCC::MI;
3183           break;
3184         case ARMCC::EQ:
3185           CC = (unsigned)ARMCC::PL;
3186           break;
3187         }
3188       }
3189     }
3190 
3191     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3192     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3193     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3194                                              MVT::Glue, Ops);
3195     Chain = SDValue(ResNode, 0);
3196     if (N->getNumValues() == 2) {
3197       InFlag = SDValue(ResNode, 1);
3198       ReplaceUses(SDValue(N, 1), InFlag);
3199     }
3200     ReplaceUses(SDValue(N, 0),
3201                 SDValue(Chain.getNode(), Chain.getResNo()));
3202     CurDAG->RemoveDeadNode(N);
3203     return;
3204   }
3205 
3206   case ARMISD::CMPZ: {
3207     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3208     //   This allows us to avoid materializing the expensive negative constant.
3209     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3210     //   for its glue output.
3211     SDValue X = N->getOperand(0);
3212     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3213     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3214       int64_t Addend = -C->getSExtValue();
3215 
3216       SDNode *Add = nullptr;
3217       // ADDS can be better than CMN if the immediate fits in a
3218       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3219       // Outside that range we can just use a CMN which is 32-bit but has a
3220       // 12-bit immediate range.
3221       if (Addend < 1<<8) {
3222         if (Subtarget->isThumb2()) {
3223           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3224                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3225                             CurDAG->getRegister(0, MVT::i32) };
3226           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3227         } else {
3228           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3229           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3230                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3231                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3232           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3233         }
3234       }
3235       if (Add) {
3236         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3237         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3238       }
3239     }
3240     // Other cases are autogenerated.
3241     break;
3242   }
3243 
3244   case ARMISD::CMOV: {
3245     SDValue InFlag = N->getOperand(4);
3246 
3247     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3248       bool SwitchEQNEToPLMI;
3249       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3250 
3251       if (SwitchEQNEToPLMI) {
3252         SDValue ARMcc = N->getOperand(2);
3253         ARMCC::CondCodes CC =
3254           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3255 
3256         switch (CC) {
3257         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3258         case ARMCC::NE:
3259           CC = ARMCC::MI;
3260           break;
3261         case ARMCC::EQ:
3262           CC = ARMCC::PL;
3263           break;
3264         }
3265         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3266         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3267                          N->getOperand(3), N->getOperand(4)};
3268         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3269       }
3270 
3271     }
3272     // Other cases are autogenerated.
3273     break;
3274   }
3275 
3276   case ARMISD::VZIP: {
3277     unsigned Opc = 0;
3278     EVT VT = N->getValueType(0);
3279     switch (VT.getSimpleVT().SimpleTy) {
3280     default: return;
3281     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3282     case MVT::v4f16:
3283     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3284     case MVT::v2f32:
3285     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3286     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3287     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3288     case MVT::v8f16:
3289     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3290     case MVT::v4f32:
3291     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3292     }
3293     SDValue Pred = getAL(CurDAG, dl);
3294     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3295     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3296     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3297     return;
3298   }
3299   case ARMISD::VUZP: {
3300     unsigned Opc = 0;
3301     EVT VT = N->getValueType(0);
3302     switch (VT.getSimpleVT().SimpleTy) {
3303     default: return;
3304     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3305     case MVT::v4f16:
3306     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3307     case MVT::v2f32:
3308     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3309     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3310     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3311     case MVT::v8f16:
3312     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3313     case MVT::v4f32:
3314     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3315     }
3316     SDValue Pred = getAL(CurDAG, dl);
3317     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3318     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3319     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3320     return;
3321   }
3322   case ARMISD::VTRN: {
3323     unsigned Opc = 0;
3324     EVT VT = N->getValueType(0);
3325     switch (VT.getSimpleVT().SimpleTy) {
3326     default: return;
3327     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3328     case MVT::v4f16:
3329     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3330     case MVT::v2f32:
3331     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3332     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3333     case MVT::v8f16:
3334     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3335     case MVT::v4f32:
3336     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3337     }
3338     SDValue Pred = getAL(CurDAG, dl);
3339     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3340     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3341     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3342     return;
3343   }
3344   case ARMISD::BUILD_VECTOR: {
3345     EVT VecVT = N->getValueType(0);
3346     EVT EltVT = VecVT.getVectorElementType();
3347     unsigned NumElts = VecVT.getVectorNumElements();
3348     if (EltVT == MVT::f64) {
3349       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3350       ReplaceNode(
3351           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3352       return;
3353     }
3354     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3355     if (NumElts == 2) {
3356       ReplaceNode(
3357           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3358       return;
3359     }
3360     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3361     ReplaceNode(N,
3362                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3363                                     N->getOperand(2), N->getOperand(3)));
3364     return;
3365   }
3366 
3367   case ARMISD::VLD1DUP: {
3368     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3369                                          ARM::VLD1DUPd32 };
3370     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3371                                          ARM::VLD1DUPq32 };
3372     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3373     return;
3374   }
3375 
3376   case ARMISD::VLD2DUP: {
3377     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3378                                         ARM::VLD2DUPd32 };
3379     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3380     return;
3381   }
3382 
3383   case ARMISD::VLD3DUP: {
3384     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3385                                         ARM::VLD3DUPd16Pseudo,
3386                                         ARM::VLD3DUPd32Pseudo };
3387     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3388     return;
3389   }
3390 
3391   case ARMISD::VLD4DUP: {
3392     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3393                                         ARM::VLD4DUPd16Pseudo,
3394                                         ARM::VLD4DUPd32Pseudo };
3395     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3396     return;
3397   }
3398 
3399   case ARMISD::VLD1DUP_UPD: {
3400     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3401                                          ARM::VLD1DUPd16wb_fixed,
3402                                          ARM::VLD1DUPd32wb_fixed };
3403     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3404                                          ARM::VLD1DUPq16wb_fixed,
3405                                          ARM::VLD1DUPq32wb_fixed };
3406     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3407     return;
3408   }
3409 
3410   case ARMISD::VLD2DUP_UPD: {
3411     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3412                                         ARM::VLD2DUPd16wb_fixed,
3413                                         ARM::VLD2DUPd32wb_fixed };
3414     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3415     return;
3416   }
3417 
3418   case ARMISD::VLD3DUP_UPD: {
3419     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3420                                         ARM::VLD3DUPd16Pseudo_UPD,
3421                                         ARM::VLD3DUPd32Pseudo_UPD };
3422     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3423     return;
3424   }
3425 
3426   case ARMISD::VLD4DUP_UPD: {
3427     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3428                                         ARM::VLD4DUPd16Pseudo_UPD,
3429                                         ARM::VLD4DUPd32Pseudo_UPD };
3430     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3431     return;
3432   }
3433 
3434   case ARMISD::VLD1_UPD: {
3435     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3436                                          ARM::VLD1d16wb_fixed,
3437                                          ARM::VLD1d32wb_fixed,
3438                                          ARM::VLD1d64wb_fixed };
3439     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3440                                          ARM::VLD1q16wb_fixed,
3441                                          ARM::VLD1q32wb_fixed,
3442                                          ARM::VLD1q64wb_fixed };
3443     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3444     return;
3445   }
3446 
3447   case ARMISD::VLD2_UPD: {
3448     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3449                                          ARM::VLD2d16wb_fixed,
3450                                          ARM::VLD2d32wb_fixed,
3451                                          ARM::VLD1q64wb_fixed};
3452     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3453                                          ARM::VLD2q16PseudoWB_fixed,
3454                                          ARM::VLD2q32PseudoWB_fixed };
3455     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3456     return;
3457   }
3458 
3459   case ARMISD::VLD3_UPD: {
3460     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3461                                          ARM::VLD3d16Pseudo_UPD,
3462                                          ARM::VLD3d32Pseudo_UPD,
3463                                          ARM::VLD1d64TPseudoWB_fixed};
3464     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3465                                           ARM::VLD3q16Pseudo_UPD,
3466                                           ARM::VLD3q32Pseudo_UPD };
3467     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3468                                           ARM::VLD3q16oddPseudo_UPD,
3469                                           ARM::VLD3q32oddPseudo_UPD };
3470     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3471     return;
3472   }
3473 
3474   case ARMISD::VLD4_UPD: {
3475     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3476                                          ARM::VLD4d16Pseudo_UPD,
3477                                          ARM::VLD4d32Pseudo_UPD,
3478                                          ARM::VLD1d64QPseudoWB_fixed};
3479     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3480                                           ARM::VLD4q16Pseudo_UPD,
3481                                           ARM::VLD4q32Pseudo_UPD };
3482     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3483                                           ARM::VLD4q16oddPseudo_UPD,
3484                                           ARM::VLD4q32oddPseudo_UPD };
3485     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3486     return;
3487   }
3488 
3489   case ARMISD::VLD2LN_UPD: {
3490     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3491                                          ARM::VLD2LNd16Pseudo_UPD,
3492                                          ARM::VLD2LNd32Pseudo_UPD };
3493     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3494                                          ARM::VLD2LNq32Pseudo_UPD };
3495     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3496     return;
3497   }
3498 
3499   case ARMISD::VLD3LN_UPD: {
3500     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3501                                          ARM::VLD3LNd16Pseudo_UPD,
3502                                          ARM::VLD3LNd32Pseudo_UPD };
3503     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3504                                          ARM::VLD3LNq32Pseudo_UPD };
3505     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3506     return;
3507   }
3508 
3509   case ARMISD::VLD4LN_UPD: {
3510     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3511                                          ARM::VLD4LNd16Pseudo_UPD,
3512                                          ARM::VLD4LNd32Pseudo_UPD };
3513     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3514                                          ARM::VLD4LNq32Pseudo_UPD };
3515     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3516     return;
3517   }
3518 
3519   case ARMISD::VST1_UPD: {
3520     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3521                                          ARM::VST1d16wb_fixed,
3522                                          ARM::VST1d32wb_fixed,
3523                                          ARM::VST1d64wb_fixed };
3524     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3525                                          ARM::VST1q16wb_fixed,
3526                                          ARM::VST1q32wb_fixed,
3527                                          ARM::VST1q64wb_fixed };
3528     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3529     return;
3530   }
3531 
3532   case ARMISD::VST2_UPD: {
3533     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3534                                          ARM::VST2d16wb_fixed,
3535                                          ARM::VST2d32wb_fixed,
3536                                          ARM::VST1q64wb_fixed};
3537     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3538                                          ARM::VST2q16PseudoWB_fixed,
3539                                          ARM::VST2q32PseudoWB_fixed };
3540     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3541     return;
3542   }
3543 
3544   case ARMISD::VST3_UPD: {
3545     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3546                                          ARM::VST3d16Pseudo_UPD,
3547                                          ARM::VST3d32Pseudo_UPD,
3548                                          ARM::VST1d64TPseudoWB_fixed};
3549     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3550                                           ARM::VST3q16Pseudo_UPD,
3551                                           ARM::VST3q32Pseudo_UPD };
3552     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3553                                           ARM::VST3q16oddPseudo_UPD,
3554                                           ARM::VST3q32oddPseudo_UPD };
3555     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3556     return;
3557   }
3558 
3559   case ARMISD::VST4_UPD: {
3560     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3561                                          ARM::VST4d16Pseudo_UPD,
3562                                          ARM::VST4d32Pseudo_UPD,
3563                                          ARM::VST1d64QPseudoWB_fixed};
3564     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3565                                           ARM::VST4q16Pseudo_UPD,
3566                                           ARM::VST4q32Pseudo_UPD };
3567     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3568                                           ARM::VST4q16oddPseudo_UPD,
3569                                           ARM::VST4q32oddPseudo_UPD };
3570     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3571     return;
3572   }
3573 
3574   case ARMISD::VST2LN_UPD: {
3575     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3576                                          ARM::VST2LNd16Pseudo_UPD,
3577                                          ARM::VST2LNd32Pseudo_UPD };
3578     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3579                                          ARM::VST2LNq32Pseudo_UPD };
3580     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3581     return;
3582   }
3583 
3584   case ARMISD::VST3LN_UPD: {
3585     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3586                                          ARM::VST3LNd16Pseudo_UPD,
3587                                          ARM::VST3LNd32Pseudo_UPD };
3588     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3589                                          ARM::VST3LNq32Pseudo_UPD };
3590     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3591     return;
3592   }
3593 
3594   case ARMISD::VST4LN_UPD: {
3595     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3596                                          ARM::VST4LNd16Pseudo_UPD,
3597                                          ARM::VST4LNd32Pseudo_UPD };
3598     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3599                                          ARM::VST4LNq32Pseudo_UPD };
3600     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3601     return;
3602   }
3603 
3604   case ISD::INTRINSIC_VOID:
3605   case ISD::INTRINSIC_W_CHAIN: {
3606     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3607     switch (IntNo) {
3608     default:
3609       break;
3610 
3611     case Intrinsic::arm_mrrc:
3612     case Intrinsic::arm_mrrc2: {
3613       SDLoc dl(N);
3614       SDValue Chain = N->getOperand(0);
3615       unsigned Opc;
3616 
3617       if (Subtarget->isThumb())
3618         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3619       else
3620         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3621 
3622       SmallVector<SDValue, 5> Ops;
3623       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3624       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3625       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3626 
3627       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3628       // instruction will always be '1111' but it is possible in assembly language to specify
3629       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3630       if (Opc != ARM::MRRC2) {
3631         Ops.push_back(getAL(CurDAG, dl));
3632         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3633       }
3634 
3635       Ops.push_back(Chain);
3636 
3637       // Writes to two registers.
3638       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3639 
3640       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3641       return;
3642     }
3643     case Intrinsic::arm_ldaexd:
3644     case Intrinsic::arm_ldrexd: {
3645       SDLoc dl(N);
3646       SDValue Chain = N->getOperand(0);
3647       SDValue MemAddr = N->getOperand(2);
3648       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3649 
3650       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3651       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3652                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3653 
3654       // arm_ldrexd returns a i64 value in {i32, i32}
3655       std::vector<EVT> ResTys;
3656       if (isThumb) {
3657         ResTys.push_back(MVT::i32);
3658         ResTys.push_back(MVT::i32);
3659       } else
3660         ResTys.push_back(MVT::Untyped);
3661       ResTys.push_back(MVT::Other);
3662 
3663       // Place arguments in the right order.
3664       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3665                        CurDAG->getRegister(0, MVT::i32), Chain};
3666       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3667       // Transfer memoperands.
3668       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3669       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3670 
3671       // Remap uses.
3672       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3673       if (!SDValue(N, 0).use_empty()) {
3674         SDValue Result;
3675         if (isThumb)
3676           Result = SDValue(Ld, 0);
3677         else {
3678           SDValue SubRegIdx =
3679             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3680           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3681               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3682           Result = SDValue(ResNode,0);
3683         }
3684         ReplaceUses(SDValue(N, 0), Result);
3685       }
3686       if (!SDValue(N, 1).use_empty()) {
3687         SDValue Result;
3688         if (isThumb)
3689           Result = SDValue(Ld, 1);
3690         else {
3691           SDValue SubRegIdx =
3692             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3693           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3694               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3695           Result = SDValue(ResNode,0);
3696         }
3697         ReplaceUses(SDValue(N, 1), Result);
3698       }
3699       ReplaceUses(SDValue(N, 2), OutChain);
3700       CurDAG->RemoveDeadNode(N);
3701       return;
3702     }
3703     case Intrinsic::arm_stlexd:
3704     case Intrinsic::arm_strexd: {
3705       SDLoc dl(N);
3706       SDValue Chain = N->getOperand(0);
3707       SDValue Val0 = N->getOperand(2);
3708       SDValue Val1 = N->getOperand(3);
3709       SDValue MemAddr = N->getOperand(4);
3710 
3711       // Store exclusive double return a i32 value which is the return status
3712       // of the issued store.
3713       const EVT ResTys[] = {MVT::i32, MVT::Other};
3714 
3715       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3716       // Place arguments in the right order.
3717       SmallVector<SDValue, 7> Ops;
3718       if (isThumb) {
3719         Ops.push_back(Val0);
3720         Ops.push_back(Val1);
3721       } else
3722         // arm_strexd uses GPRPair.
3723         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3724       Ops.push_back(MemAddr);
3725       Ops.push_back(getAL(CurDAG, dl));
3726       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3727       Ops.push_back(Chain);
3728 
3729       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3730       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3731                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3732 
3733       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3734       // Transfer memoperands.
3735       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3736       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3737 
3738       ReplaceNode(N, St);
3739       return;
3740     }
3741 
3742     case Intrinsic::arm_neon_vld1: {
3743       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3744                                            ARM::VLD1d32, ARM::VLD1d64 };
3745       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3746                                            ARM::VLD1q32, ARM::VLD1q64};
3747       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3748       return;
3749     }
3750 
3751     case Intrinsic::arm_neon_vld1x2: {
3752       static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3753                                            ARM::VLD1q32, ARM::VLD1q64 };
3754       static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3755                                            ARM::VLD1d16QPseudo,
3756                                            ARM::VLD1d32QPseudo,
3757                                            ARM::VLD1d64QPseudo };
3758       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3759       return;
3760     }
3761 
3762     case Intrinsic::arm_neon_vld1x3: {
3763       static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3764                                            ARM::VLD1d16TPseudo,
3765                                            ARM::VLD1d32TPseudo,
3766                                            ARM::VLD1d64TPseudo };
3767       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3768                                             ARM::VLD1q16LowTPseudo_UPD,
3769                                             ARM::VLD1q32LowTPseudo_UPD,
3770                                             ARM::VLD1q64LowTPseudo_UPD };
3771       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3772                                             ARM::VLD1q16HighTPseudo,
3773                                             ARM::VLD1q32HighTPseudo,
3774                                             ARM::VLD1q64HighTPseudo };
3775       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3776       return;
3777     }
3778 
3779     case Intrinsic::arm_neon_vld1x4: {
3780       static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3781                                            ARM::VLD1d16QPseudo,
3782                                            ARM::VLD1d32QPseudo,
3783                                            ARM::VLD1d64QPseudo };
3784       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3785                                             ARM::VLD1q16LowQPseudo_UPD,
3786                                             ARM::VLD1q32LowQPseudo_UPD,
3787                                             ARM::VLD1q64LowQPseudo_UPD };
3788       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3789                                             ARM::VLD1q16HighQPseudo,
3790                                             ARM::VLD1q32HighQPseudo,
3791                                             ARM::VLD1q64HighQPseudo };
3792       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3793       return;
3794     }
3795 
3796     case Intrinsic::arm_neon_vld2: {
3797       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3798                                            ARM::VLD2d32, ARM::VLD1q64 };
3799       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3800                                            ARM::VLD2q32Pseudo };
3801       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3802       return;
3803     }
3804 
3805     case Intrinsic::arm_neon_vld3: {
3806       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3807                                            ARM::VLD3d16Pseudo,
3808                                            ARM::VLD3d32Pseudo,
3809                                            ARM::VLD1d64TPseudo };
3810       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3811                                             ARM::VLD3q16Pseudo_UPD,
3812                                             ARM::VLD3q32Pseudo_UPD };
3813       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3814                                             ARM::VLD3q16oddPseudo,
3815                                             ARM::VLD3q32oddPseudo };
3816       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3817       return;
3818     }
3819 
3820     case Intrinsic::arm_neon_vld4: {
3821       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3822                                            ARM::VLD4d16Pseudo,
3823                                            ARM::VLD4d32Pseudo,
3824                                            ARM::VLD1d64QPseudo };
3825       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3826                                             ARM::VLD4q16Pseudo_UPD,
3827                                             ARM::VLD4q32Pseudo_UPD };
3828       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3829                                             ARM::VLD4q16oddPseudo,
3830                                             ARM::VLD4q32oddPseudo };
3831       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3832       return;
3833     }
3834 
3835     case Intrinsic::arm_neon_vld2dup: {
3836       static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3837                                            ARM::VLD2DUPd32, ARM::VLD1q64 };
3838       static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3839                                             ARM::VLD2DUPq16EvenPseudo,
3840                                             ARM::VLD2DUPq32EvenPseudo };
3841       static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3842                                             ARM::VLD2DUPq16OddPseudo,
3843                                             ARM::VLD2DUPq32OddPseudo };
3844       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3845                    DOpcodes, QOpcodes0, QOpcodes1);
3846       return;
3847     }
3848 
3849     case Intrinsic::arm_neon_vld3dup: {
3850       static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3851                                            ARM::VLD3DUPd16Pseudo,
3852                                            ARM::VLD3DUPd32Pseudo,
3853                                            ARM::VLD1d64TPseudo };
3854       static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3855                                             ARM::VLD3DUPq16EvenPseudo,
3856                                             ARM::VLD3DUPq32EvenPseudo };
3857       static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3858                                             ARM::VLD3DUPq16OddPseudo,
3859                                             ARM::VLD3DUPq32OddPseudo };
3860       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3861                    DOpcodes, QOpcodes0, QOpcodes1);
3862       return;
3863     }
3864 
3865     case Intrinsic::arm_neon_vld4dup: {
3866       static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3867                                            ARM::VLD4DUPd16Pseudo,
3868                                            ARM::VLD4DUPd32Pseudo,
3869                                            ARM::VLD1d64QPseudo };
3870       static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3871                                             ARM::VLD4DUPq16EvenPseudo,
3872                                             ARM::VLD4DUPq32EvenPseudo };
3873       static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3874                                             ARM::VLD4DUPq16OddPseudo,
3875                                             ARM::VLD4DUPq32OddPseudo };
3876       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3877                    DOpcodes, QOpcodes0, QOpcodes1);
3878       return;
3879     }
3880 
3881     case Intrinsic::arm_neon_vld2lane: {
3882       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3883                                            ARM::VLD2LNd16Pseudo,
3884                                            ARM::VLD2LNd32Pseudo };
3885       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3886                                            ARM::VLD2LNq32Pseudo };
3887       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3888       return;
3889     }
3890 
3891     case Intrinsic::arm_neon_vld3lane: {
3892       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3893                                            ARM::VLD3LNd16Pseudo,
3894                                            ARM::VLD3LNd32Pseudo };
3895       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3896                                            ARM::VLD3LNq32Pseudo };
3897       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3898       return;
3899     }
3900 
3901     case Intrinsic::arm_neon_vld4lane: {
3902       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3903                                            ARM::VLD4LNd16Pseudo,
3904                                            ARM::VLD4LNd32Pseudo };
3905       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3906                                            ARM::VLD4LNq32Pseudo };
3907       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3908       return;
3909     }
3910 
3911     case Intrinsic::arm_neon_vst1: {
3912       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3913                                            ARM::VST1d32, ARM::VST1d64 };
3914       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3915                                            ARM::VST1q32, ARM::VST1q64 };
3916       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3917       return;
3918     }
3919 
3920     case Intrinsic::arm_neon_vst1x2: {
3921       static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3922                                            ARM::VST1q32, ARM::VST1q64 };
3923       static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3924                                            ARM::VST1d16QPseudo,
3925                                            ARM::VST1d32QPseudo,
3926                                            ARM::VST1d64QPseudo };
3927       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3928       return;
3929     }
3930 
3931     case Intrinsic::arm_neon_vst1x3: {
3932       static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3933                                            ARM::VST1d16TPseudo,
3934                                            ARM::VST1d32TPseudo,
3935                                            ARM::VST1d64TPseudo };
3936       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3937                                             ARM::VST1q16LowTPseudo_UPD,
3938                                             ARM::VST1q32LowTPseudo_UPD,
3939                                             ARM::VST1q64LowTPseudo_UPD };
3940       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3941                                             ARM::VST1q16HighTPseudo,
3942                                             ARM::VST1q32HighTPseudo,
3943                                             ARM::VST1q64HighTPseudo };
3944       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3945       return;
3946     }
3947 
3948     case Intrinsic::arm_neon_vst1x4: {
3949       static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3950                                            ARM::VST1d16QPseudo,
3951                                            ARM::VST1d32QPseudo,
3952                                            ARM::VST1d64QPseudo };
3953       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3954                                             ARM::VST1q16LowQPseudo_UPD,
3955                                             ARM::VST1q32LowQPseudo_UPD,
3956                                             ARM::VST1q64LowQPseudo_UPD };
3957       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3958                                             ARM::VST1q16HighQPseudo,
3959                                             ARM::VST1q32HighQPseudo,
3960                                             ARM::VST1q64HighQPseudo };
3961       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3962       return;
3963     }
3964 
3965     case Intrinsic::arm_neon_vst2: {
3966       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3967                                            ARM::VST2d32, ARM::VST1q64 };
3968       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3969                                            ARM::VST2q32Pseudo };
3970       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3971       return;
3972     }
3973 
3974     case Intrinsic::arm_neon_vst3: {
3975       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3976                                            ARM::VST3d16Pseudo,
3977                                            ARM::VST3d32Pseudo,
3978                                            ARM::VST1d64TPseudo };
3979       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3980                                             ARM::VST3q16Pseudo_UPD,
3981                                             ARM::VST3q32Pseudo_UPD };
3982       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3983                                             ARM::VST3q16oddPseudo,
3984                                             ARM::VST3q32oddPseudo };
3985       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3986       return;
3987     }
3988 
3989     case Intrinsic::arm_neon_vst4: {
3990       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3991                                            ARM::VST4d16Pseudo,
3992                                            ARM::VST4d32Pseudo,
3993                                            ARM::VST1d64QPseudo };
3994       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3995                                             ARM::VST4q16Pseudo_UPD,
3996                                             ARM::VST4q32Pseudo_UPD };
3997       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3998                                             ARM::VST4q16oddPseudo,
3999                                             ARM::VST4q32oddPseudo };
4000       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4001       return;
4002     }
4003 
4004     case Intrinsic::arm_neon_vst2lane: {
4005       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
4006                                            ARM::VST2LNd16Pseudo,
4007                                            ARM::VST2LNd32Pseudo };
4008       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
4009                                            ARM::VST2LNq32Pseudo };
4010       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
4011       return;
4012     }
4013 
4014     case Intrinsic::arm_neon_vst3lane: {
4015       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
4016                                            ARM::VST3LNd16Pseudo,
4017                                            ARM::VST3LNd32Pseudo };
4018       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
4019                                            ARM::VST3LNq32Pseudo };
4020       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
4021       return;
4022     }
4023 
4024     case Intrinsic::arm_neon_vst4lane: {
4025       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
4026                                            ARM::VST4LNd16Pseudo,
4027                                            ARM::VST4LNd32Pseudo };
4028       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
4029                                            ARM::VST4LNq32Pseudo };
4030       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
4031       return;
4032     }
4033     }
4034     break;
4035   }
4036 
4037   case ISD::ATOMIC_CMP_SWAP:
4038     SelectCMP_SWAP(N);
4039     return;
4040   }
4041 
4042   SelectCode(N);
4043 }
4044 
4045 // Inspect a register string of the form
4046 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
4047 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
4048 // and obtain the integer operands from them, adding these operands to the
4049 // provided vector.
4050 static void getIntOperandsFromRegisterString(StringRef RegString,
4051                                              SelectionDAG *CurDAG,
4052                                              const SDLoc &DL,
4053                                              std::vector<SDValue> &Ops) {
4054   SmallVector<StringRef, 5> Fields;
4055   RegString.split(Fields, ':');
4056 
4057   if (Fields.size() > 1) {
4058     bool AllIntFields = true;
4059 
4060     for (StringRef Field : Fields) {
4061       // Need to trim out leading 'cp' characters and get the integer field.
4062       unsigned IntField;
4063       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
4064       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
4065     }
4066 
4067     assert(AllIntFields &&
4068             "Unexpected non-integer value in special register string.");
4069   }
4070 }
4071 
4072 // Maps a Banked Register string to its mask value. The mask value returned is
4073 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
4074 // mask operand, which expresses which register is to be used, e.g. r8, and in
4075 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
4076 // was invalid.
4077 static inline int getBankedRegisterMask(StringRef RegString) {
4078   auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
4079   if (!TheReg)
4080      return -1;
4081   return TheReg->Encoding;
4082 }
4083 
4084 // The flags here are common to those allowed for apsr in the A class cores and
4085 // those allowed for the special registers in the M class cores. Returns a
4086 // value representing which flags were present, -1 if invalid.
4087 static inline int getMClassFlagsMask(StringRef Flags) {
4088   return StringSwitch<int>(Flags)
4089           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
4090                          // correct when flags are not permitted
4091           .Case("g", 0x1)
4092           .Case("nzcvq", 0x2)
4093           .Case("nzcvqg", 0x3)
4094           .Default(-1);
4095 }
4096 
4097 // Maps MClass special registers string to its value for use in the
4098 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
4099 // Returns -1 to signify that the string was invalid.
4100 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
4101   auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
4102   const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
4103   if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
4104     return -1;
4105   return (int)(TheReg->Encoding & 0xFFF); // SYSm value
4106 }
4107 
4108 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4109   // The mask operand contains the special register (R Bit) in bit 4, whether
4110   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4111   // bits 3-0 contains the fields to be accessed in the special register, set by
4112   // the flags provided with the register.
4113   int Mask = 0;
4114   if (Reg == "apsr") {
4115     // The flags permitted for apsr are the same flags that are allowed in
4116     // M class registers. We get the flag value and then shift the flags into
4117     // the correct place to combine with the mask.
4118     Mask = getMClassFlagsMask(Flags);
4119     if (Mask == -1)
4120       return -1;
4121     return Mask << 2;
4122   }
4123 
4124   if (Reg != "cpsr" && Reg != "spsr") {
4125     return -1;
4126   }
4127 
4128   // This is the same as if the flags were "fc"
4129   if (Flags.empty() || Flags == "all")
4130     return Mask | 0x9;
4131 
4132   // Inspect the supplied flags string and set the bits in the mask for
4133   // the relevant and valid flags allowed for cpsr and spsr.
4134   for (char Flag : Flags) {
4135     int FlagVal;
4136     switch (Flag) {
4137       case 'c':
4138         FlagVal = 0x1;
4139         break;
4140       case 'x':
4141         FlagVal = 0x2;
4142         break;
4143       case 's':
4144         FlagVal = 0x4;
4145         break;
4146       case 'f':
4147         FlagVal = 0x8;
4148         break;
4149       default:
4150         FlagVal = 0;
4151     }
4152 
4153     // This avoids allowing strings where the same flag bit appears twice.
4154     if (!FlagVal || (Mask & FlagVal))
4155       return -1;
4156     Mask |= FlagVal;
4157   }
4158 
4159   // If the register is spsr then we need to set the R bit.
4160   if (Reg == "spsr")
4161     Mask |= 0x10;
4162 
4163   return Mask;
4164 }
4165 
4166 // Lower the read_register intrinsic to ARM specific DAG nodes
4167 // using the supplied metadata string to select the instruction node to use
4168 // and the registers/masks to construct as operands for the node.
4169 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4170   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4171   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4172   bool IsThumb2 = Subtarget->isThumb2();
4173   SDLoc DL(N);
4174 
4175   std::vector<SDValue> Ops;
4176   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4177 
4178   if (!Ops.empty()) {
4179     // If the special register string was constructed of fields (as defined
4180     // in the ACLE) then need to lower to MRC node (32 bit) or
4181     // MRRC node(64 bit), we can make the distinction based on the number of
4182     // operands we have.
4183     unsigned Opcode;
4184     SmallVector<EVT, 3> ResTypes;
4185     if (Ops.size() == 5){
4186       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4187       ResTypes.append({ MVT::i32, MVT::Other });
4188     } else {
4189       assert(Ops.size() == 3 &&
4190               "Invalid number of fields in special register string.");
4191       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4192       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4193     }
4194 
4195     Ops.push_back(getAL(CurDAG, DL));
4196     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4197     Ops.push_back(N->getOperand(0));
4198     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4199     return true;
4200   }
4201 
4202   std::string SpecialReg = RegString->getString().lower();
4203 
4204   int BankedReg = getBankedRegisterMask(SpecialReg);
4205   if (BankedReg != -1) {
4206     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4207             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4208             N->getOperand(0) };
4209     ReplaceNode(
4210         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4211                                   DL, MVT::i32, MVT::Other, Ops));
4212     return true;
4213   }
4214 
4215   // The VFP registers are read by creating SelectionDAG nodes with opcodes
4216   // corresponding to the register that is being read from. So we switch on the
4217   // string to find which opcode we need to use.
4218   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4219                     .Case("fpscr", ARM::VMRS)
4220                     .Case("fpexc", ARM::VMRS_FPEXC)
4221                     .Case("fpsid", ARM::VMRS_FPSID)
4222                     .Case("mvfr0", ARM::VMRS_MVFR0)
4223                     .Case("mvfr1", ARM::VMRS_MVFR1)
4224                     .Case("mvfr2", ARM::VMRS_MVFR2)
4225                     .Case("fpinst", ARM::VMRS_FPINST)
4226                     .Case("fpinst2", ARM::VMRS_FPINST2)
4227                     .Default(0);
4228 
4229   // If an opcode was found then we can lower the read to a VFP instruction.
4230   if (Opcode) {
4231     if (!Subtarget->hasVFP2Base())
4232       return false;
4233     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4234       return false;
4235 
4236     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4237             N->getOperand(0) };
4238     ReplaceNode(N,
4239                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4240     return true;
4241   }
4242 
4243   // If the target is M Class then need to validate that the register string
4244   // is an acceptable value, so check that a mask can be constructed from the
4245   // string.
4246   if (Subtarget->isMClass()) {
4247     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4248     if (SYSmValue == -1)
4249       return false;
4250 
4251     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4252                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4253                       N->getOperand(0) };
4254     ReplaceNode(
4255         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4256     return true;
4257   }
4258 
4259   // Here we know the target is not M Class so we need to check if it is one
4260   // of the remaining possible values which are apsr, cpsr or spsr.
4261   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4262     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4263             N->getOperand(0) };
4264     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4265                                           DL, MVT::i32, MVT::Other, Ops));
4266     return true;
4267   }
4268 
4269   if (SpecialReg == "spsr") {
4270     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4271             N->getOperand(0) };
4272     ReplaceNode(
4273         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4274                                   MVT::i32, MVT::Other, Ops));
4275     return true;
4276   }
4277 
4278   return false;
4279 }
4280 
4281 // Lower the write_register intrinsic to ARM specific DAG nodes
4282 // using the supplied metadata string to select the instruction node to use
4283 // and the registers/masks to use in the nodes
4284 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4285   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4286   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4287   bool IsThumb2 = Subtarget->isThumb2();
4288   SDLoc DL(N);
4289 
4290   std::vector<SDValue> Ops;
4291   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4292 
4293   if (!Ops.empty()) {
4294     // If the special register string was constructed of fields (as defined
4295     // in the ACLE) then need to lower to MCR node (32 bit) or
4296     // MCRR node(64 bit), we can make the distinction based on the number of
4297     // operands we have.
4298     unsigned Opcode;
4299     if (Ops.size() == 5) {
4300       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4301       Ops.insert(Ops.begin()+2, N->getOperand(2));
4302     } else {
4303       assert(Ops.size() == 3 &&
4304               "Invalid number of fields in special register string.");
4305       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4306       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4307       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4308     }
4309 
4310     Ops.push_back(getAL(CurDAG, DL));
4311     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4312     Ops.push_back(N->getOperand(0));
4313 
4314     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4315     return true;
4316   }
4317 
4318   std::string SpecialReg = RegString->getString().lower();
4319   int BankedReg = getBankedRegisterMask(SpecialReg);
4320   if (BankedReg != -1) {
4321     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4322             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4323             N->getOperand(0) };
4324     ReplaceNode(
4325         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4326                                   DL, MVT::Other, Ops));
4327     return true;
4328   }
4329 
4330   // The VFP registers are written to by creating SelectionDAG nodes with
4331   // opcodes corresponding to the register that is being written. So we switch
4332   // on the string to find which opcode we need to use.
4333   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4334                     .Case("fpscr", ARM::VMSR)
4335                     .Case("fpexc", ARM::VMSR_FPEXC)
4336                     .Case("fpsid", ARM::VMSR_FPSID)
4337                     .Case("fpinst", ARM::VMSR_FPINST)
4338                     .Case("fpinst2", ARM::VMSR_FPINST2)
4339                     .Default(0);
4340 
4341   if (Opcode) {
4342     if (!Subtarget->hasVFP2Base())
4343       return false;
4344     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4345             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4346     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4347     return true;
4348   }
4349 
4350   std::pair<StringRef, StringRef> Fields;
4351   Fields = StringRef(SpecialReg).rsplit('_');
4352   std::string Reg = Fields.first.str();
4353   StringRef Flags = Fields.second;
4354 
4355   // If the target was M Class then need to validate the special register value
4356   // and retrieve the mask for use in the instruction node.
4357   if (Subtarget->isMClass()) {
4358     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4359     if (SYSmValue == -1)
4360       return false;
4361 
4362     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4363                       N->getOperand(2), getAL(CurDAG, DL),
4364                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4365     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4366     return true;
4367   }
4368 
4369   // We then check to see if a valid mask can be constructed for one of the
4370   // register string values permitted for the A and R class cores. These values
4371   // are apsr, spsr and cpsr; these are also valid on older cores.
4372   int Mask = getARClassRegisterMask(Reg, Flags);
4373   if (Mask != -1) {
4374     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4375             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4376             N->getOperand(0) };
4377     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4378                                           DL, MVT::Other, Ops));
4379     return true;
4380   }
4381 
4382   return false;
4383 }
4384 
4385 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4386   std::vector<SDValue> AsmNodeOperands;
4387   unsigned Flag, Kind;
4388   bool Changed = false;
4389   unsigned NumOps = N->getNumOperands();
4390 
4391   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4392   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4393   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4394   // respectively. Since there is no constraint to explicitly specify a
4395   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4396   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4397   // them into a GPRPair.
4398 
4399   SDLoc dl(N);
4400   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4401                                    : SDValue(nullptr,0);
4402 
4403   SmallVector<bool, 8> OpChanged;
4404   // Glue node will be appended late.
4405   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4406     SDValue op = N->getOperand(i);
4407     AsmNodeOperands.push_back(op);
4408 
4409     if (i < InlineAsm::Op_FirstOperand)
4410       continue;
4411 
4412     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4413       Flag = C->getZExtValue();
4414       Kind = InlineAsm::getKind(Flag);
4415     }
4416     else
4417       continue;
4418 
4419     // Immediate operands to inline asm in the SelectionDAG are modeled with
4420     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4421     // the second is a constant with the value of the immediate. If we get here
4422     // and we have a Kind_Imm, skip the next operand, and continue.
4423     if (Kind == InlineAsm::Kind_Imm) {
4424       SDValue op = N->getOperand(++i);
4425       AsmNodeOperands.push_back(op);
4426       continue;
4427     }
4428 
4429     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4430     if (NumRegs)
4431       OpChanged.push_back(false);
4432 
4433     unsigned DefIdx = 0;
4434     bool IsTiedToChangedOp = false;
4435     // If it's a use that is tied with a previous def, it has no
4436     // reg class constraint.
4437     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4438       IsTiedToChangedOp = OpChanged[DefIdx];
4439 
4440     // Memory operands to inline asm in the SelectionDAG are modeled with two
4441     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4442     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4443     // it doesn't get misinterpreted), and continue. We do this here because
4444     // it's important to update the OpChanged array correctly before moving on.
4445     if (Kind == InlineAsm::Kind_Mem) {
4446       SDValue op = N->getOperand(++i);
4447       AsmNodeOperands.push_back(op);
4448       continue;
4449     }
4450 
4451     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4452         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4453       continue;
4454 
4455     unsigned RC;
4456     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4457     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4458         || NumRegs != 2)
4459       continue;
4460 
4461     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4462     SDValue V0 = N->getOperand(i+1);
4463     SDValue V1 = N->getOperand(i+2);
4464     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4465     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4466     SDValue PairedReg;
4467     MachineRegisterInfo &MRI = MF->getRegInfo();
4468 
4469     if (Kind == InlineAsm::Kind_RegDef ||
4470         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4471       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4472       // the original GPRs.
4473 
4474       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4475       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4476       SDValue Chain = SDValue(N,0);
4477 
4478       SDNode *GU = N->getGluedUser();
4479       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4480                                                Chain.getValue(1));
4481 
4482       // Extract values from a GPRPair reg and copy to the original GPR reg.
4483       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4484                                                     RegCopy);
4485       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4486                                                     RegCopy);
4487       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4488                                         RegCopy.getValue(1));
4489       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4490 
4491       // Update the original glue user.
4492       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4493       Ops.push_back(T1.getValue(1));
4494       CurDAG->UpdateNodeOperands(GU, Ops);
4495     }
4496     else {
4497       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4498       // GPRPair and then pass the GPRPair to the inline asm.
4499       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4500 
4501       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4502       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4503                                           Chain.getValue(1));
4504       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4505                                           T0.getValue(1));
4506       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4507 
4508       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4509       // i32 VRs of inline asm with it.
4510       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4511       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4512       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4513 
4514       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4515       Glue = Chain.getValue(1);
4516     }
4517 
4518     Changed = true;
4519 
4520     if(PairedReg.getNode()) {
4521       OpChanged[OpChanged.size() -1 ] = true;
4522       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4523       if (IsTiedToChangedOp)
4524         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4525       else
4526         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4527       // Replace the current flag.
4528       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4529           Flag, dl, MVT::i32);
4530       // Add the new register node and skip the original two GPRs.
4531       AsmNodeOperands.push_back(PairedReg);
4532       // Skip the next two GPRs.
4533       i += 2;
4534     }
4535   }
4536 
4537   if (Glue.getNode())
4538     AsmNodeOperands.push_back(Glue);
4539   if (!Changed)
4540     return false;
4541 
4542   SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4543       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4544   New->setNodeId(-1);
4545   ReplaceNode(N, New.getNode());
4546   return true;
4547 }
4548 
4549 
4550 bool ARMDAGToDAGISel::
4551 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4552                              std::vector<SDValue> &OutOps) {
4553   switch(ConstraintID) {
4554   default:
4555     llvm_unreachable("Unexpected asm memory constraint");
4556   case InlineAsm::Constraint_i:
4557     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4558     //        be an immediate and not a memory constraint.
4559     LLVM_FALLTHROUGH;
4560   case InlineAsm::Constraint_m:
4561   case InlineAsm::Constraint_o:
4562   case InlineAsm::Constraint_Q:
4563   case InlineAsm::Constraint_Um:
4564   case InlineAsm::Constraint_Un:
4565   case InlineAsm::Constraint_Uq:
4566   case InlineAsm::Constraint_Us:
4567   case InlineAsm::Constraint_Ut:
4568   case InlineAsm::Constraint_Uv:
4569   case InlineAsm::Constraint_Uy:
4570     // Require the address to be in a register.  That is safe for all ARM
4571     // variants and it is hard to do anything much smarter without knowing
4572     // how the operand is used.
4573     OutOps.push_back(Op);
4574     return false;
4575   }
4576   return true;
4577 }
4578 
4579 /// createARMISelDag - This pass converts a legalized DAG into a
4580 /// ARM-specific DAG, ready for instruction scheduling.
4581 ///
4582 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4583                                      CodeGenOpt::Level OptLevel) {
4584   return new ARMDAGToDAGISel(TM, OptLevel);
4585 }
4586