1f22ef01cSRoman Divacky //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
2f22ef01cSRoman Divacky //
3f22ef01cSRoman Divacky //                     The LLVM Compiler Infrastructure
4f22ef01cSRoman Divacky //
5f22ef01cSRoman Divacky // This file is distributed under the University of Illinois Open Source
6f22ef01cSRoman Divacky // License. See LICENSE.TXT for details.
7f22ef01cSRoman Divacky //
8f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
9f22ef01cSRoman Divacky //
10f22ef01cSRoman Divacky // This file contains logic for simplifying instructions based on information
11f22ef01cSRoman Divacky // about how they are used.
12f22ef01cSRoman Divacky //
13f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
14f22ef01cSRoman Divacky 
15ff0cc061SDimitry Andric #include "InstCombineInternal.h"
16ff0cc061SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
17139f7f9bSDimitry Andric #include "llvm/IR/IntrinsicInst.h"
1891bc56edSDimitry Andric #include "llvm/IR/PatternMatch.h"
1951690af2SDimitry Andric #include "llvm/Support/KnownBits.h"
20f22ef01cSRoman Divacky 
21f22ef01cSRoman Divacky using namespace llvm;
22139f7f9bSDimitry Andric using namespace llvm::PatternMatch;
23f22ef01cSRoman Divacky 
2491bc56edSDimitry Andric #define DEBUG_TYPE "instcombine"
2591bc56edSDimitry Andric 
264ba319b5SDimitry Andric namespace {
274ba319b5SDimitry Andric 
284ba319b5SDimitry Andric struct AMDGPUImageDMaskIntrinsic {
294ba319b5SDimitry Andric   unsigned Intr;
304ba319b5SDimitry Andric };
314ba319b5SDimitry Andric 
324ba319b5SDimitry Andric #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
334ba319b5SDimitry Andric #include "InstCombineTables.inc"
344ba319b5SDimitry Andric 
354ba319b5SDimitry Andric } // end anonymous namespace
364ba319b5SDimitry Andric 
373ca95b02SDimitry Andric /// Check to see if the specified operand of the specified instruction is a
383ca95b02SDimitry Andric /// constant integer. If so, check to see if there are any bits set in the
393ca95b02SDimitry Andric /// constant that are not demanded. If so, shrink the constant and return true.
ShrinkDemandedConstant(Instruction * I,unsigned OpNo,const APInt & Demanded)40f22ef01cSRoman Divacky static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
4151690af2SDimitry Andric                                    const APInt &Demanded) {
42f22ef01cSRoman Divacky   assert(I && "No instruction?");
43f22ef01cSRoman Divacky   assert(OpNo < I->getNumOperands() && "Operand index too large");
44f22ef01cSRoman Divacky 
457a7e6055SDimitry Andric   // The operand must be a constant integer or splat integer.
467a7e6055SDimitry Andric   Value *Op = I->getOperand(OpNo);
477a7e6055SDimitry Andric   const APInt *C;
487a7e6055SDimitry Andric   if (!match(Op, m_APInt(C)))
497a7e6055SDimitry Andric     return false;
50f22ef01cSRoman Divacky 
51f22ef01cSRoman Divacky   // If there are no bits set that aren't demanded, nothing to do.
526bc11b14SDimitry Andric   if (C->isSubsetOf(Demanded))
53f22ef01cSRoman Divacky     return false;
54f22ef01cSRoman Divacky 
55f22ef01cSRoman Divacky   // This instruction is producing bits that are not demanded. Shrink the RHS.
5651690af2SDimitry Andric   I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded));
5739d628a0SDimitry Andric 
58f22ef01cSRoman Divacky   return true;
59f22ef01cSRoman Divacky }
60f22ef01cSRoman Divacky 
61f22ef01cSRoman Divacky 
62f22ef01cSRoman Divacky 
633ca95b02SDimitry Andric /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
643ca95b02SDimitry Andric /// the instruction has any properties that allow us to simplify its operands.
SimplifyDemandedInstructionBits(Instruction & Inst)65f22ef01cSRoman Divacky bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
66f22ef01cSRoman Divacky   unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
6751690af2SDimitry Andric   KnownBits Known(BitWidth);
68f22ef01cSRoman Divacky   APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
69f22ef01cSRoman Divacky 
7051690af2SDimitry Andric   Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
71ff0cc061SDimitry Andric                                      0, &Inst);
7291bc56edSDimitry Andric   if (!V) return false;
73f22ef01cSRoman Divacky   if (V == &Inst) return true;
743ca95b02SDimitry Andric   replaceInstUsesWith(Inst, V);
75f22ef01cSRoman Divacky   return true;
76f22ef01cSRoman Divacky }
77f22ef01cSRoman Divacky 
783ca95b02SDimitry Andric /// This form of SimplifyDemandedBits simplifies the specified instruction
793ca95b02SDimitry Andric /// operand if possible, updating it in place. It returns true if it made any
803ca95b02SDimitry Andric /// change and false otherwise.
SimplifyDemandedBits(Instruction * I,unsigned OpNo,const APInt & DemandedMask,KnownBits & Known,unsigned Depth)817a7e6055SDimitry Andric bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
827a7e6055SDimitry Andric                                         const APInt &DemandedMask,
8351690af2SDimitry Andric                                         KnownBits &Known,
84f22ef01cSRoman Divacky                                         unsigned Depth) {
857a7e6055SDimitry Andric   Use &U = I->getOperandUse(OpNo);
8651690af2SDimitry Andric   Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known,
8751690af2SDimitry Andric                                           Depth, I);
8891bc56edSDimitry Andric   if (!NewVal) return false;
89f22ef01cSRoman Divacky   U = NewVal;
90f22ef01cSRoman Divacky   return true;
91f22ef01cSRoman Divacky }
92f22ef01cSRoman Divacky 
93f22ef01cSRoman Divacky 
943ca95b02SDimitry Andric /// This function attempts to replace V with a simpler value based on the
953ca95b02SDimitry Andric /// demanded bits. When this function is called, it is known that only the bits
963ca95b02SDimitry Andric /// set in DemandedMask of the result of V are ever used downstream.
973ca95b02SDimitry Andric /// Consequently, depending on the mask and V, it may be possible to replace V
983ca95b02SDimitry Andric /// with a constant or one of its operands. In such cases, this function does
993ca95b02SDimitry Andric /// the replacement and returns true. In all other cases, it returns false after
1003ca95b02SDimitry Andric /// analyzing the expression and setting KnownOne and known to be one in the
10151690af2SDimitry Andric /// expression. Known.Zero contains all the bits that are known to be zero in
10251690af2SDimitry Andric /// the expression. These are provided to potentially allow the caller (which
10351690af2SDimitry Andric /// might recursively be SimplifyDemandedBits itself) to simplify the
10451690af2SDimitry Andric /// expression.
10551690af2SDimitry Andric /// Known.One and Known.Zero always follow the invariant that:
10651690af2SDimitry Andric ///   Known.One & Known.Zero == 0.
10751690af2SDimitry Andric /// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and
10851690af2SDimitry Andric /// Known.Zero may only be accurate for those bits set in DemandedMask. Note
10951690af2SDimitry Andric /// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all
11051690af2SDimitry Andric /// be the same.
111f22ef01cSRoman Divacky ///
112f22ef01cSRoman Divacky /// This returns null if it did not change anything and it permits no
113f22ef01cSRoman Divacky /// simplification.  This returns V itself if it did some simplification of V's
114f22ef01cSRoman Divacky /// operands based on the information about what bits are demanded. This returns
115f22ef01cSRoman Divacky /// some other non-null value if it found out that V is equal to another value
116f22ef01cSRoman Divacky /// in the context where the specified bits are demanded, but not for all users.
SimplifyDemandedUseBits(Value * V,APInt DemandedMask,KnownBits & Known,unsigned Depth,Instruction * CxtI)117f22ef01cSRoman Divacky Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
11851690af2SDimitry Andric                                              KnownBits &Known, unsigned Depth,
11939d628a0SDimitry Andric                                              Instruction *CxtI) {
12091bc56edSDimitry Andric   assert(V != nullptr && "Null pointer of Value???");
121f22ef01cSRoman Divacky   assert(Depth <= 6 && "Limit Search Depth");
122f22ef01cSRoman Divacky   uint32_t BitWidth = DemandedMask.getBitWidth();
1236122f3e6SDimitry Andric   Type *VTy = V->getType();
124ff0cc061SDimitry Andric   assert(
125ff0cc061SDimitry Andric       (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
12651690af2SDimitry Andric       Known.getBitWidth() == BitWidth &&
12751690af2SDimitry Andric       "Value *V, DemandedMask and Known must have same BitWidth");
1286bc11b14SDimitry Andric 
1296bc11b14SDimitry Andric   if (isa<Constant>(V)) {
13051690af2SDimitry Andric     computeKnownBits(V, Known, Depth, CxtI);
13191bc56edSDimitry Andric     return nullptr;
132f22ef01cSRoman Divacky   }
133f22ef01cSRoman Divacky 
1340f5676f4SDimitry Andric   Known.resetAll();
135db17bf38SDimitry Andric   if (DemandedMask.isNullValue())     // Not demanding any bits from V.
136f22ef01cSRoman Divacky     return UndefValue::get(VTy);
137f22ef01cSRoman Divacky 
138f22ef01cSRoman Divacky   if (Depth == 6)        // Limit search depth.
13991bc56edSDimitry Andric     return nullptr;
140f22ef01cSRoman Divacky 
141f22ef01cSRoman Divacky   Instruction *I = dyn_cast<Instruction>(V);
142f22ef01cSRoman Divacky   if (!I) {
14351690af2SDimitry Andric     computeKnownBits(V, Known, Depth, CxtI);
14491bc56edSDimitry Andric     return nullptr;        // Only analyze instructions.
145f22ef01cSRoman Divacky   }
146f22ef01cSRoman Divacky 
147f22ef01cSRoman Divacky   // If there are multiple uses of this value and we aren't at the root, then
148f22ef01cSRoman Divacky   // we can't do any simplifications of the operands, because DemandedMask
149f22ef01cSRoman Divacky   // only reflects the bits demanded by *one* of the users.
15051690af2SDimitry Andric   if (Depth != 0 && !I->hasOneUse())
15151690af2SDimitry Andric     return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI);
152f22ef01cSRoman Divacky 
15351690af2SDimitry Andric   KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);
154f22ef01cSRoman Divacky 
155f22ef01cSRoman Divacky   // If this is the root being simplified, allow it to have multiple uses,
156f22ef01cSRoman Divacky   // just set the DemandedMask to all bits so that we can try to simplify the
157f22ef01cSRoman Divacky   // operands.  This allows visitTruncInst (for example) to simplify the
158f22ef01cSRoman Divacky   // operand of a trunc without duplicating all the logic below.
159f22ef01cSRoman Divacky   if (Depth == 0 && !V->hasOneUse())
1607a7e6055SDimitry Andric     DemandedMask.setAllBits();
161f22ef01cSRoman Divacky 
162f22ef01cSRoman Divacky   switch (I->getOpcode()) {
163f22ef01cSRoman Divacky   default:
16451690af2SDimitry Andric     computeKnownBits(I, Known, Depth, CxtI);
165f22ef01cSRoman Divacky     break;
1667a7e6055SDimitry Andric   case Instruction::And: {
167f22ef01cSRoman Divacky     // If either the LHS or the RHS are Zero, the result is zero.
16851690af2SDimitry Andric     if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
16951690af2SDimitry Andric         SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown,
17051690af2SDimitry Andric                              Depth + 1))
171f22ef01cSRoman Divacky       return I;
172302affcbSDimitry Andric     assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
173302affcbSDimitry Andric     assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
174f22ef01cSRoman Divacky 
1757a7e6055SDimitry Andric     // Output known-0 are known to be clear if zero in either the LHS | RHS.
17651690af2SDimitry Andric     APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero;
1777a7e6055SDimitry Andric     // Output known-1 bits are only known if set in both the LHS & RHS.
17851690af2SDimitry Andric     APInt IKnownOne = RHSKnown.One & LHSKnown.One;
1797a7e6055SDimitry Andric 
18039d628a0SDimitry Andric     // If the client is only demanding bits that we know, return the known
18139d628a0SDimitry Andric     // constant.
1826bc11b14SDimitry Andric     if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
1837a7e6055SDimitry Andric       return Constant::getIntegerValue(VTy, IKnownOne);
18439d628a0SDimitry Andric 
185f22ef01cSRoman Divacky     // If all of the demanded bits are known 1 on one side, return the other.
186f22ef01cSRoman Divacky     // These bits cannot contribute to the result of the 'and'.
18751690af2SDimitry Andric     if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
188f22ef01cSRoman Divacky       return I->getOperand(0);
18951690af2SDimitry Andric     if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
190f22ef01cSRoman Divacky       return I->getOperand(1);
191f22ef01cSRoman Divacky 
192f22ef01cSRoman Divacky     // If the RHS is a constant, see if we can simplify it.
19351690af2SDimitry Andric     if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero))
194f22ef01cSRoman Divacky       return I;
195f22ef01cSRoman Divacky 
19651690af2SDimitry Andric     Known.Zero = std::move(IKnownZero);
19751690af2SDimitry Andric     Known.One  = std::move(IKnownOne);
198f22ef01cSRoman Divacky     break;
1997a7e6055SDimitry Andric   }
2007a7e6055SDimitry Andric   case Instruction::Or: {
201f22ef01cSRoman Divacky     // If either the LHS or the RHS are One, the result is One.
20251690af2SDimitry Andric     if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
20351690af2SDimitry Andric         SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown,
20451690af2SDimitry Andric                              Depth + 1))
205f22ef01cSRoman Divacky       return I;
206302affcbSDimitry Andric     assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
207302affcbSDimitry Andric     assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
208f22ef01cSRoman Divacky 
2097a7e6055SDimitry Andric     // Output known-0 bits are only known if clear in both the LHS & RHS.
21051690af2SDimitry Andric     APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero;
21151690af2SDimitry Andric     // Output known-1 are known. to be set if s.et in either the LHS | RHS.
21251690af2SDimitry Andric     APInt IKnownOne = RHSKnown.One | LHSKnown.One;
2137a7e6055SDimitry Andric 
21439d628a0SDimitry Andric     // If the client is only demanding bits that we know, return the known
21539d628a0SDimitry Andric     // constant.
2166bc11b14SDimitry Andric     if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
2177a7e6055SDimitry Andric       return Constant::getIntegerValue(VTy, IKnownOne);
21839d628a0SDimitry Andric 
219f22ef01cSRoman Divacky     // If all of the demanded bits are known zero on one side, return the other.
220f22ef01cSRoman Divacky     // These bits cannot contribute to the result of the 'or'.
22151690af2SDimitry Andric     if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
222f22ef01cSRoman Divacky       return I->getOperand(0);
22351690af2SDimitry Andric     if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
224f22ef01cSRoman Divacky       return I->getOperand(1);
225f22ef01cSRoman Divacky 
226f22ef01cSRoman Divacky     // If the RHS is a constant, see if we can simplify it.
227f22ef01cSRoman Divacky     if (ShrinkDemandedConstant(I, 1, DemandedMask))
228f22ef01cSRoman Divacky       return I;
229f22ef01cSRoman Divacky 
23051690af2SDimitry Andric     Known.Zero = std::move(IKnownZero);
23151690af2SDimitry Andric     Known.One  = std::move(IKnownOne);
232f22ef01cSRoman Divacky     break;
2337a7e6055SDimitry Andric   }
234f22ef01cSRoman Divacky   case Instruction::Xor: {
23551690af2SDimitry Andric     if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
23651690af2SDimitry Andric         SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1))
237f22ef01cSRoman Divacky       return I;
238302affcbSDimitry Andric     assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
239302affcbSDimitry Andric     assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
240f22ef01cSRoman Divacky 
24139d628a0SDimitry Andric     // Output known-0 bits are known if clear or set in both the LHS & RHS.
24251690af2SDimitry Andric     APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) |
24351690af2SDimitry Andric                        (RHSKnown.One & LHSKnown.One);
24439d628a0SDimitry Andric     // Output known-1 are known to be set if set in only one of the LHS, RHS.
24551690af2SDimitry Andric     APInt IKnownOne =  (RHSKnown.Zero & LHSKnown.One) |
24651690af2SDimitry Andric                        (RHSKnown.One & LHSKnown.Zero);
24739d628a0SDimitry Andric 
24839d628a0SDimitry Andric     // If the client is only demanding bits that we know, return the known
24939d628a0SDimitry Andric     // constant.
2506bc11b14SDimitry Andric     if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
25139d628a0SDimitry Andric       return Constant::getIntegerValue(VTy, IKnownOne);
25239d628a0SDimitry Andric 
253f22ef01cSRoman Divacky     // If all of the demanded bits are known zero on one side, return the other.
254f22ef01cSRoman Divacky     // These bits cannot contribute to the result of the 'xor'.
25551690af2SDimitry Andric     if (DemandedMask.isSubsetOf(RHSKnown.Zero))
256f22ef01cSRoman Divacky       return I->getOperand(0);
25751690af2SDimitry Andric     if (DemandedMask.isSubsetOf(LHSKnown.Zero))
258f22ef01cSRoman Divacky       return I->getOperand(1);
259f22ef01cSRoman Divacky 
260f22ef01cSRoman Divacky     // If all of the demanded bits are known to be zero on one side or the
261f22ef01cSRoman Divacky     // other, turn this into an *inclusive* or.
262f22ef01cSRoman Divacky     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
26351690af2SDimitry Andric     if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) {
264f22ef01cSRoman Divacky       Instruction *Or =
265f22ef01cSRoman Divacky         BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
266f22ef01cSRoman Divacky                                  I->getName());
267bd5abe19SDimitry Andric       return InsertNewInstWith(Or, *I);
268f22ef01cSRoman Divacky     }
269f22ef01cSRoman Divacky 
270f22ef01cSRoman Divacky     // If all of the demanded bits on one side are known, and all of the set
271f22ef01cSRoman Divacky     // bits on that side are also known to be set on the other side, turn this
272f22ef01cSRoman Divacky     // into an AND, as we know the bits will be cleared.
273f22ef01cSRoman Divacky     //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
27451690af2SDimitry Andric     if (DemandedMask.isSubsetOf(RHSKnown.Zero|RHSKnown.One) &&
27551690af2SDimitry Andric         RHSKnown.One.isSubsetOf(LHSKnown.One)) {
276f22ef01cSRoman Divacky       Constant *AndC = Constant::getIntegerValue(VTy,
27751690af2SDimitry Andric                                                  ~RHSKnown.One & DemandedMask);
2786122f3e6SDimitry Andric       Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
279bd5abe19SDimitry Andric       return InsertNewInstWith(And, *I);
280f22ef01cSRoman Divacky     }
281f22ef01cSRoman Divacky 
282f22ef01cSRoman Divacky     // If the RHS is a constant, see if we can simplify it.
283f22ef01cSRoman Divacky     // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
284f22ef01cSRoman Divacky     if (ShrinkDemandedConstant(I, 1, DemandedMask))
285f22ef01cSRoman Divacky       return I;
286f22ef01cSRoman Divacky 
287f22ef01cSRoman Divacky     // If our LHS is an 'and' and if it has one use, and if any of the bits we
288f22ef01cSRoman Divacky     // are flipping are known to be set, then the xor is just resetting those
289f22ef01cSRoman Divacky     // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
290f22ef01cSRoman Divacky     // simplifying both of them.
291f22ef01cSRoman Divacky     if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
292f22ef01cSRoman Divacky       if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
293f22ef01cSRoman Divacky           isa<ConstantInt>(I->getOperand(1)) &&
294f22ef01cSRoman Divacky           isa<ConstantInt>(LHSInst->getOperand(1)) &&
29551690af2SDimitry Andric           (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) {
296f22ef01cSRoman Divacky         ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
297f22ef01cSRoman Divacky         ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
29851690af2SDimitry Andric         APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask);
299f22ef01cSRoman Divacky 
300f22ef01cSRoman Divacky         Constant *AndC =
301f22ef01cSRoman Divacky           ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
3026122f3e6SDimitry Andric         Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
303bd5abe19SDimitry Andric         InsertNewInstWith(NewAnd, *I);
304f22ef01cSRoman Divacky 
305f22ef01cSRoman Divacky         Constant *XorC =
306f22ef01cSRoman Divacky           ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
3076122f3e6SDimitry Andric         Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
308bd5abe19SDimitry Andric         return InsertNewInstWith(NewXor, *I);
309f22ef01cSRoman Divacky       }
310f22ef01cSRoman Divacky 
311f22ef01cSRoman Divacky     // Output known-0 bits are known if clear or set in both the LHS & RHS.
31251690af2SDimitry Andric     Known.Zero = std::move(IKnownZero);
313f22ef01cSRoman Divacky     // Output known-1 are known to be set if set in only one of the LHS, RHS.
31451690af2SDimitry Andric     Known.One  = std::move(IKnownOne);
315f22ef01cSRoman Divacky     break;
316f22ef01cSRoman Divacky   }
317*b5893f02SDimitry Andric   case Instruction::Select: {
318ff0cc061SDimitry Andric     Value *LHS, *RHS;
319*b5893f02SDimitry Andric     SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
320*b5893f02SDimitry Andric     if (SPF == SPF_UMAX) {
321*b5893f02SDimitry Andric       // UMax(A, C) == A if ...
322*b5893f02SDimitry Andric       // The lowest non-zero bit of DemandMask is higher than the highest
323*b5893f02SDimitry Andric       // non-zero bit of C.
324*b5893f02SDimitry Andric       const APInt *C;
325*b5893f02SDimitry Andric       unsigned CTZ = DemandedMask.countTrailingZeros();
326*b5893f02SDimitry Andric       if (match(RHS, m_APInt(C)) && CTZ >= C->getActiveBits())
327*b5893f02SDimitry Andric         return LHS;
328*b5893f02SDimitry Andric     } else if (SPF == SPF_UMIN) {
329*b5893f02SDimitry Andric       // UMin(A, C) == A if ...
330*b5893f02SDimitry Andric       // The lowest non-zero bit of DemandMask is higher than the highest
331*b5893f02SDimitry Andric       // non-one bit of C.
332*b5893f02SDimitry Andric       // This comes from using DeMorgans on the above umax example.
333*b5893f02SDimitry Andric       const APInt *C;
334*b5893f02SDimitry Andric       unsigned CTZ = DemandedMask.countTrailingZeros();
335*b5893f02SDimitry Andric       if (match(RHS, m_APInt(C)) &&
336*b5893f02SDimitry Andric           CTZ >= C->getBitWidth() - C->countLeadingOnes())
337*b5893f02SDimitry Andric         return LHS;
338*b5893f02SDimitry Andric     }
339*b5893f02SDimitry Andric 
340*b5893f02SDimitry Andric     // If this is a select as part of any other min/max pattern, don't simplify
341*b5893f02SDimitry Andric     // any further in case we break the structure.
342*b5893f02SDimitry Andric     if (SPF != SPF_UNKNOWN)
343ff0cc061SDimitry Andric       return nullptr;
344ff0cc061SDimitry Andric 
34551690af2SDimitry Andric     if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) ||
34651690af2SDimitry Andric         SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1))
347f22ef01cSRoman Divacky       return I;
348302affcbSDimitry Andric     assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
349302affcbSDimitry Andric     assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
350f22ef01cSRoman Divacky 
351f22ef01cSRoman Divacky     // If the operands are constants, see if we can simplify them.
352f22ef01cSRoman Divacky     if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
353f22ef01cSRoman Divacky         ShrinkDemandedConstant(I, 2, DemandedMask))
354f22ef01cSRoman Divacky       return I;
355f22ef01cSRoman Divacky 
356f22ef01cSRoman Divacky     // Only known if known in both the LHS and RHS.
35751690af2SDimitry Andric     Known.One = RHSKnown.One & LHSKnown.One;
35851690af2SDimitry Andric     Known.Zero = RHSKnown.Zero & LHSKnown.Zero;
359f22ef01cSRoman Divacky     break;
360*b5893f02SDimitry Andric   }
361302affcbSDimitry Andric   case Instruction::ZExt:
362f22ef01cSRoman Divacky   case Instruction::Trunc: {
363302affcbSDimitry Andric     unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
364302affcbSDimitry Andric 
365302affcbSDimitry Andric     APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth);
366302affcbSDimitry Andric     KnownBits InputKnown(SrcBitWidth);
367302affcbSDimitry Andric     if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1))
368f22ef01cSRoman Divacky       return I;
3694ba319b5SDimitry Andric     Known = InputKnown.zextOrTrunc(BitWidth);
370302affcbSDimitry Andric     // Any top bits are known to be zero.
371302affcbSDimitry Andric     if (BitWidth > SrcBitWidth)
372302affcbSDimitry Andric       Known.Zero.setBitsFrom(SrcBitWidth);
373302affcbSDimitry Andric     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
374f22ef01cSRoman Divacky     break;
375f22ef01cSRoman Divacky   }
376f22ef01cSRoman Divacky   case Instruction::BitCast:
377f22ef01cSRoman Divacky     if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
37891bc56edSDimitry Andric       return nullptr;  // vector->int or fp->int?
379f22ef01cSRoman Divacky 
3806122f3e6SDimitry Andric     if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
3816122f3e6SDimitry Andric       if (VectorType *SrcVTy =
382f22ef01cSRoman Divacky             dyn_cast<VectorType>(I->getOperand(0)->getType())) {
383f22ef01cSRoman Divacky         if (DstVTy->getNumElements() != SrcVTy->getNumElements())
384f22ef01cSRoman Divacky           // Don't touch a bitcast between vectors of different element counts.
38591bc56edSDimitry Andric           return nullptr;
386f22ef01cSRoman Divacky       } else
387f22ef01cSRoman Divacky         // Don't touch a scalar-to-vector bitcast.
38891bc56edSDimitry Andric         return nullptr;
389f22ef01cSRoman Divacky     } else if (I->getOperand(0)->getType()->isVectorTy())
390f22ef01cSRoman Divacky       // Don't touch a vector-to-scalar bitcast.
39191bc56edSDimitry Andric       return nullptr;
392f22ef01cSRoman Divacky 
39351690af2SDimitry Andric     if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1))
394f22ef01cSRoman Divacky       return I;
395302affcbSDimitry Andric     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
396f22ef01cSRoman Divacky     break;
397f22ef01cSRoman Divacky   case Instruction::SExt: {
398f22ef01cSRoman Divacky     // Compute the bits in the result that are not present in the input.
399f22ef01cSRoman Divacky     unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
400f22ef01cSRoman Divacky 
401302affcbSDimitry Andric     APInt InputDemandedBits = DemandedMask.trunc(SrcBitWidth);
402f22ef01cSRoman Divacky 
403f22ef01cSRoman Divacky     // If any of the sign extended bits are demanded, we know that the sign
404f22ef01cSRoman Divacky     // bit is demanded.
405302affcbSDimitry Andric     if (DemandedMask.getActiveBits() > SrcBitWidth)
4062754fe60SDimitry Andric       InputDemandedBits.setBit(SrcBitWidth-1);
407f22ef01cSRoman Divacky 
408302affcbSDimitry Andric     KnownBits InputKnown(SrcBitWidth);
409302affcbSDimitry Andric     if (SimplifyDemandedBits(I, 0, InputDemandedBits, InputKnown, Depth + 1))
410f22ef01cSRoman Divacky       return I;
411f22ef01cSRoman Divacky 
412f22ef01cSRoman Divacky     // If the input sign bit is known zero, or if the NewBits are not demanded
413f22ef01cSRoman Divacky     // convert this into a zero extension.
414302affcbSDimitry Andric     if (InputKnown.isNonNegative() ||
415302affcbSDimitry Andric         DemandedMask.getActiveBits() <= SrcBitWidth) {
416302affcbSDimitry Andric       // Convert to ZExt cast.
417f22ef01cSRoman Divacky       CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
418bd5abe19SDimitry Andric       return InsertNewInstWith(NewCast, *I);
419f22ef01cSRoman Divacky      }
420302affcbSDimitry Andric 
421302affcbSDimitry Andric     // If the sign bit of the input is known set or clear, then we know the
422302affcbSDimitry Andric     // top bits of the result.
423302affcbSDimitry Andric     Known = InputKnown.sext(BitWidth);
424302affcbSDimitry Andric     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
425f22ef01cSRoman Divacky     break;
426f22ef01cSRoman Divacky   }
427ff0cc061SDimitry Andric   case Instruction::Add:
428ff0cc061SDimitry Andric   case Instruction::Sub: {
429ff0cc061SDimitry Andric     /// If the high-bits of an ADD/SUB are not demanded, then we do not care
430ff0cc061SDimitry Andric     /// about the high bits of the operands.
431f22ef01cSRoman Divacky     unsigned NLZ = DemandedMask.countLeadingZeros();
432ff0cc061SDimitry Andric     // Right fill the mask of bits for this ADD/SUB to demand the most
433f22ef01cSRoman Divacky     // significant bit and all those below it.
434f22ef01cSRoman Divacky     APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
4357a7e6055SDimitry Andric     if (ShrinkDemandedConstant(I, 0, DemandedFromOps) ||
43651690af2SDimitry Andric         SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) ||
437ff0cc061SDimitry Andric         ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
43851690af2SDimitry Andric         SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) {
4392cab237bSDimitry Andric       if (NLZ > 0) {
440ff0cc061SDimitry Andric         // Disable the nsw and nuw flags here: We can no longer guarantee that
441ff0cc061SDimitry Andric         // we won't wrap after simplification. Removing the nsw/nuw flags is
442ff0cc061SDimitry Andric         // legal here because the top bit is not demanded.
443ff0cc061SDimitry Andric         BinaryOperator &BinOP = *cast<BinaryOperator>(I);
444ff0cc061SDimitry Andric         BinOP.setHasNoSignedWrap(false);
445ff0cc061SDimitry Andric         BinOP.setHasNoUnsignedWrap(false);
4462cab237bSDimitry Andric       }
447f22ef01cSRoman Divacky       return I;
448f22ef01cSRoman Divacky     }
4497a7e6055SDimitry Andric 
4507a7e6055SDimitry Andric     // If we are known to be adding/subtracting zeros to every bit below
4517a7e6055SDimitry Andric     // the highest demanded bit, we just return the other side.
45251690af2SDimitry Andric     if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
4537a7e6055SDimitry Andric       return I->getOperand(0);
454b40b48b8SDimitry Andric     // We can't do this with the LHS for subtraction, unless we are only
455b40b48b8SDimitry Andric     // demanding the LSB.
456b40b48b8SDimitry Andric     if ((I->getOpcode() == Instruction::Add ||
457b40b48b8SDimitry Andric          DemandedFromOps.isOneValue()) &&
45851690af2SDimitry Andric         DemandedFromOps.isSubsetOf(LHSKnown.Zero))
4597a7e6055SDimitry Andric       return I->getOperand(1);
460dff0c46cSDimitry Andric 
4612cab237bSDimitry Andric     // Otherwise just compute the known bits of the result.
4622cab237bSDimitry Andric     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
4632cab237bSDimitry Andric     Known = KnownBits::computeForAddSub(I->getOpcode() == Instruction::Add,
4642cab237bSDimitry Andric                                         NSW, LHSKnown, RHSKnown);
465f22ef01cSRoman Divacky     break;
466ff0cc061SDimitry Andric   }
46751690af2SDimitry Andric   case Instruction::Shl: {
46851690af2SDimitry Andric     const APInt *SA;
46951690af2SDimitry Andric     if (match(I->getOperand(1), m_APInt(SA))) {
47051690af2SDimitry Andric       const APInt *ShrAmt;
4712cab237bSDimitry Andric       if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt))))
4722cab237bSDimitry Andric         if (Instruction *Shr = dyn_cast<Instruction>(I->getOperand(0)))
4732cab237bSDimitry Andric           if (Value *R = simplifyShrShlDemandedBits(Shr, *ShrAmt, I, *SA,
4742cab237bSDimitry Andric                                                     DemandedMask, Known))
475139f7f9bSDimitry Andric             return R;
476139f7f9bSDimitry Andric 
4772754fe60SDimitry Andric       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
478f22ef01cSRoman Divacky       APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
4792754fe60SDimitry Andric 
4802754fe60SDimitry Andric       // If the shift is NUW/NSW, then it does demand the high bits.
4812754fe60SDimitry Andric       ShlOperator *IOp = cast<ShlOperator>(I);
4822754fe60SDimitry Andric       if (IOp->hasNoSignedWrap())
4837a7e6055SDimitry Andric         DemandedMaskIn.setHighBits(ShiftAmt+1);
4842754fe60SDimitry Andric       else if (IOp->hasNoUnsignedWrap())
4857a7e6055SDimitry Andric         DemandedMaskIn.setHighBits(ShiftAmt);
4862754fe60SDimitry Andric 
48751690af2SDimitry Andric       if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
488f22ef01cSRoman Divacky         return I;
489302affcbSDimitry Andric       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
49051690af2SDimitry Andric       Known.Zero <<= ShiftAmt;
49151690af2SDimitry Andric       Known.One  <<= ShiftAmt;
492f22ef01cSRoman Divacky       // low bits known zero.
493f22ef01cSRoman Divacky       if (ShiftAmt)
49451690af2SDimitry Andric         Known.Zero.setLowBits(ShiftAmt);
495f22ef01cSRoman Divacky     }
496f22ef01cSRoman Divacky     break;
49751690af2SDimitry Andric   }
4986bc11b14SDimitry Andric   case Instruction::LShr: {
4996bc11b14SDimitry Andric     const APInt *SA;
5006bc11b14SDimitry Andric     if (match(I->getOperand(1), m_APInt(SA))) {
5012754fe60SDimitry Andric       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
502f22ef01cSRoman Divacky 
503f22ef01cSRoman Divacky       // Unsigned shift right.
504f22ef01cSRoman Divacky       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
5052754fe60SDimitry Andric 
5062754fe60SDimitry Andric       // If the shift is exact, then it does demand the low bits (and knows that
5072754fe60SDimitry Andric       // they are zero).
5082754fe60SDimitry Andric       if (cast<LShrOperator>(I)->isExact())
5097a7e6055SDimitry Andric         DemandedMaskIn.setLowBits(ShiftAmt);
5102754fe60SDimitry Andric 
51151690af2SDimitry Andric       if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
512f22ef01cSRoman Divacky         return I;
513302affcbSDimitry Andric       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
51451690af2SDimitry Andric       Known.Zero.lshrInPlace(ShiftAmt);
51551690af2SDimitry Andric       Known.One.lshrInPlace(ShiftAmt);
5167a7e6055SDimitry Andric       if (ShiftAmt)
51751690af2SDimitry Andric         Known.Zero.setHighBits(ShiftAmt);  // high bits known zero.
518f22ef01cSRoman Divacky     }
519f22ef01cSRoman Divacky     break;
5206bc11b14SDimitry Andric   }
5216bc11b14SDimitry Andric   case Instruction::AShr: {
522f22ef01cSRoman Divacky     // If this is an arithmetic shift right and only the low-bit is set, we can
523f22ef01cSRoman Divacky     // always convert this into a logical shr, even if the shift amount is
524f22ef01cSRoman Divacky     // variable.  The low bit of the shift cannot be an input sign bit unless
525f22ef01cSRoman Divacky     // the shift amount is >= the size of the datatype, which is undefined.
526db17bf38SDimitry Andric     if (DemandedMask.isOneValue()) {
527f22ef01cSRoman Divacky       // Perform the logical shift right.
528f22ef01cSRoman Divacky       Instruction *NewVal = BinaryOperator::CreateLShr(
529f22ef01cSRoman Divacky                         I->getOperand(0), I->getOperand(1), I->getName());
530bd5abe19SDimitry Andric       return InsertNewInstWith(NewVal, *I);
531f22ef01cSRoman Divacky     }
532f22ef01cSRoman Divacky 
533f22ef01cSRoman Divacky     // If the sign bit is the only bit demanded by this ashr, then there is no
534f22ef01cSRoman Divacky     // need to do it, the shift doesn't change the high bit.
5356bc11b14SDimitry Andric     if (DemandedMask.isSignMask())
536f22ef01cSRoman Divacky       return I->getOperand(0);
537f22ef01cSRoman Divacky 
5386bc11b14SDimitry Andric     const APInt *SA;
5396bc11b14SDimitry Andric     if (match(I->getOperand(1), m_APInt(SA))) {
5402754fe60SDimitry Andric       uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
541f22ef01cSRoman Divacky 
542f22ef01cSRoman Divacky       // Signed shift right.
543f22ef01cSRoman Divacky       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
5446bc11b14SDimitry Andric       // If any of the high bits are demanded, we should set the sign bit as
545f22ef01cSRoman Divacky       // demanded.
546f22ef01cSRoman Divacky       if (DemandedMask.countLeadingZeros() <= ShiftAmt)
5477a7e6055SDimitry Andric         DemandedMaskIn.setSignBit();
5482754fe60SDimitry Andric 
5492754fe60SDimitry Andric       // If the shift is exact, then it does demand the low bits (and knows that
5502754fe60SDimitry Andric       // they are zero).
5512754fe60SDimitry Andric       if (cast<AShrOperator>(I)->isExact())
5527a7e6055SDimitry Andric         DemandedMaskIn.setLowBits(ShiftAmt);
5532754fe60SDimitry Andric 
55451690af2SDimitry Andric       if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
555f22ef01cSRoman Divacky         return I;
5566bc11b14SDimitry Andric 
5572cab237bSDimitry Andric       unsigned SignBits = ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI);
5582cab237bSDimitry Andric 
559302affcbSDimitry Andric       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
5602cab237bSDimitry Andric       // Compute the new bits that are at the top now plus sign bits.
5612cab237bSDimitry Andric       APInt HighBits(APInt::getHighBitsSet(
5622cab237bSDimitry Andric           BitWidth, std::min(SignBits + ShiftAmt - 1, BitWidth)));
56351690af2SDimitry Andric       Known.Zero.lshrInPlace(ShiftAmt);
56451690af2SDimitry Andric       Known.One.lshrInPlace(ShiftAmt);
565f22ef01cSRoman Divacky 
566f22ef01cSRoman Divacky       // If the input sign bit is known to be zero, or if none of the top bits
567f22ef01cSRoman Divacky       // are demanded, turn this into an unsigned shift right.
5682cab237bSDimitry Andric       assert(BitWidth > ShiftAmt && "Shift amount not saturated?");
5692cab237bSDimitry Andric       if (Known.Zero[BitWidth-ShiftAmt-1] ||
57051690af2SDimitry Andric           !DemandedMask.intersects(HighBits)) {
5716bc11b14SDimitry Andric         BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),
5726bc11b14SDimitry Andric                                                           I->getOperand(1));
5736bc11b14SDimitry Andric         LShr->setIsExact(cast<BinaryOperator>(I)->isExact());
5746bc11b14SDimitry Andric         return InsertNewInstWith(LShr, *I);
5752cab237bSDimitry Andric       } else if (Known.One[BitWidth-ShiftAmt-1]) { // New bits are known one.
57651690af2SDimitry Andric         Known.One |= HighBits;
577f22ef01cSRoman Divacky       }
578f22ef01cSRoman Divacky     }
579f22ef01cSRoman Divacky     break;
5806bc11b14SDimitry Andric   }
5814ba319b5SDimitry Andric   case Instruction::UDiv: {
5824ba319b5SDimitry Andric     // UDiv doesn't demand low bits that are zero in the divisor.
5834ba319b5SDimitry Andric     const APInt *SA;
5844ba319b5SDimitry Andric     if (match(I->getOperand(1), m_APInt(SA))) {
5854ba319b5SDimitry Andric       // If the shift is exact, then it does demand the low bits.
5864ba319b5SDimitry Andric       if (cast<UDivOperator>(I)->isExact())
5874ba319b5SDimitry Andric         break;
5884ba319b5SDimitry Andric 
5894ba319b5SDimitry Andric       // FIXME: Take the demanded mask of the result into account.
5904ba319b5SDimitry Andric       unsigned RHSTrailingZeros = SA->countTrailingZeros();
5914ba319b5SDimitry Andric       APInt DemandedMaskIn =
5924ba319b5SDimitry Andric           APInt::getHighBitsSet(BitWidth, BitWidth - RHSTrailingZeros);
5934ba319b5SDimitry Andric       if (SimplifyDemandedBits(I, 0, DemandedMaskIn, LHSKnown, Depth + 1))
5944ba319b5SDimitry Andric         return I;
5954ba319b5SDimitry Andric 
5964ba319b5SDimitry Andric       // Propagate zero bits from the input.
5974ba319b5SDimitry Andric       Known.Zero.setHighBits(std::min(
5984ba319b5SDimitry Andric           BitWidth, LHSKnown.Zero.countLeadingOnes() + RHSTrailingZeros));
5994ba319b5SDimitry Andric     }
6004ba319b5SDimitry Andric     break;
6014ba319b5SDimitry Andric   }
602f22ef01cSRoman Divacky   case Instruction::SRem:
603f22ef01cSRoman Divacky     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
6043b0f4066SDimitry Andric       // X % -1 demands all the bits because we don't want to introduce
6053b0f4066SDimitry Andric       // INT_MIN % -1 (== undef) by accident.
606c4394386SDimitry Andric       if (Rem->isMinusOne())
6073b0f4066SDimitry Andric         break;
608f22ef01cSRoman Divacky       APInt RA = Rem->getValue().abs();
609f22ef01cSRoman Divacky       if (RA.isPowerOf2()) {
610f22ef01cSRoman Divacky         if (DemandedMask.ult(RA))    // srem won't affect demanded bits
611f22ef01cSRoman Divacky           return I->getOperand(0);
612f22ef01cSRoman Divacky 
613f22ef01cSRoman Divacky         APInt LowBits = RA - 1;
6146bc11b14SDimitry Andric         APInt Mask2 = LowBits | APInt::getSignMask(BitWidth);
61551690af2SDimitry Andric         if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Depth + 1))
616f22ef01cSRoman Divacky           return I;
617f22ef01cSRoman Divacky 
618f22ef01cSRoman Divacky         // The low bits of LHS are unchanged by the srem.
61951690af2SDimitry Andric         Known.Zero = LHSKnown.Zero & LowBits;
62051690af2SDimitry Andric         Known.One = LHSKnown.One & LowBits;
621f22ef01cSRoman Divacky 
622f22ef01cSRoman Divacky         // If LHS is non-negative or has all low bits zero, then the upper bits
623f22ef01cSRoman Divacky         // are all zero.
624f37b6182SDimitry Andric         if (LHSKnown.isNonNegative() || LowBits.isSubsetOf(LHSKnown.Zero))
62551690af2SDimitry Andric           Known.Zero |= ~LowBits;
626f22ef01cSRoman Divacky 
627f22ef01cSRoman Divacky         // If LHS is negative and not all low bits are zero, then the upper bits
628f22ef01cSRoman Divacky         // are all one.
629f37b6182SDimitry Andric         if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One))
63051690af2SDimitry Andric           Known.One |= ~LowBits;
631f22ef01cSRoman Divacky 
632302affcbSDimitry Andric         assert(!Known.hasConflict() && "Bits known to be one AND zero?");
6336bc11b14SDimitry Andric         break;
634f22ef01cSRoman Divacky       }
635f22ef01cSRoman Divacky     }
6363b0f4066SDimitry Andric 
6373b0f4066SDimitry Andric     // The sign bit is the LHS's sign bit, except when the result of the
6383b0f4066SDimitry Andric     // remainder is zero.
6396bc11b14SDimitry Andric     if (DemandedMask.isSignBitSet()) {
64051690af2SDimitry Andric       computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
6413b0f4066SDimitry Andric       // If it's known zero, our sign bit is also zero.
642f37b6182SDimitry Andric       if (LHSKnown.isNonNegative())
643f37b6182SDimitry Andric         Known.makeNonNegative();
6443b0f4066SDimitry Andric     }
645f22ef01cSRoman Divacky     break;
646f22ef01cSRoman Divacky   case Instruction::URem: {
64751690af2SDimitry Andric     KnownBits Known2(BitWidth);
648f22ef01cSRoman Divacky     APInt AllOnes = APInt::getAllOnesValue(BitWidth);
64951690af2SDimitry Andric     if (SimplifyDemandedBits(I, 0, AllOnes, Known2, Depth + 1) ||
65051690af2SDimitry Andric         SimplifyDemandedBits(I, 1, AllOnes, Known2, Depth + 1))
651f22ef01cSRoman Divacky       return I;
652f22ef01cSRoman Divacky 
6535517e702SDimitry Andric     unsigned Leaders = Known2.countMinLeadingZeros();
65451690af2SDimitry Andric     Known.Zero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
655f22ef01cSRoman Divacky     break;
656f22ef01cSRoman Divacky   }
657f22ef01cSRoman Divacky   case Instruction::Call:
658f22ef01cSRoman Divacky     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
659f22ef01cSRoman Divacky       switch (II->getIntrinsicID()) {
660f22ef01cSRoman Divacky       default: break;
661f22ef01cSRoman Divacky       case Intrinsic::bswap: {
662f22ef01cSRoman Divacky         // If the only bits demanded come from one byte of the bswap result,
663f22ef01cSRoman Divacky         // just shift the input byte into position to eliminate the bswap.
664f22ef01cSRoman Divacky         unsigned NLZ = DemandedMask.countLeadingZeros();
665f22ef01cSRoman Divacky         unsigned NTZ = DemandedMask.countTrailingZeros();
666f22ef01cSRoman Divacky 
667f22ef01cSRoman Divacky         // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
668f22ef01cSRoman Divacky         // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
669f22ef01cSRoman Divacky         // have 14 leading zeros, round to 8.
670f22ef01cSRoman Divacky         NLZ &= ~7;
671f22ef01cSRoman Divacky         NTZ &= ~7;
672f22ef01cSRoman Divacky         // If we need exactly one byte, we can do this transformation.
673f22ef01cSRoman Divacky         if (BitWidth-NLZ-NTZ == 8) {
674f22ef01cSRoman Divacky           unsigned ResultBit = NTZ;
675f22ef01cSRoman Divacky           unsigned InputBit = BitWidth-NTZ-8;
676f22ef01cSRoman Divacky 
677f22ef01cSRoman Divacky           // Replace this with either a left or right shift to get the byte into
678f22ef01cSRoman Divacky           // the right place.
679f22ef01cSRoman Divacky           Instruction *NewVal;
680f22ef01cSRoman Divacky           if (InputBit > ResultBit)
681ffd1746dSEd Schouten             NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
682f22ef01cSRoman Divacky                     ConstantInt::get(I->getType(), InputBit-ResultBit));
683f22ef01cSRoman Divacky           else
684ffd1746dSEd Schouten             NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
685f22ef01cSRoman Divacky                     ConstantInt::get(I->getType(), ResultBit-InputBit));
686f22ef01cSRoman Divacky           NewVal->takeName(I);
687bd5abe19SDimitry Andric           return InsertNewInstWith(NewVal, *I);
688f22ef01cSRoman Divacky         }
689f22ef01cSRoman Divacky 
690f22ef01cSRoman Divacky         // TODO: Could compute known zero/one bits based on the input.
691f22ef01cSRoman Divacky         break;
692f22ef01cSRoman Divacky       }
693*b5893f02SDimitry Andric       case Intrinsic::fshr:
694*b5893f02SDimitry Andric       case Intrinsic::fshl: {
695*b5893f02SDimitry Andric         const APInt *SA;
696*b5893f02SDimitry Andric         if (!match(I->getOperand(2), m_APInt(SA)))
697*b5893f02SDimitry Andric           break;
698*b5893f02SDimitry Andric 
699*b5893f02SDimitry Andric         // Normalize to funnel shift left. APInt shifts of BitWidth are well-
700*b5893f02SDimitry Andric         // defined, so no need to special-case zero shifts here.
701*b5893f02SDimitry Andric         uint64_t ShiftAmt = SA->urem(BitWidth);
702*b5893f02SDimitry Andric         if (II->getIntrinsicID() == Intrinsic::fshr)
703*b5893f02SDimitry Andric           ShiftAmt = BitWidth - ShiftAmt;
704*b5893f02SDimitry Andric 
705*b5893f02SDimitry Andric         APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt));
706*b5893f02SDimitry Andric         APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt));
707*b5893f02SDimitry Andric         if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Depth + 1) ||
708*b5893f02SDimitry Andric             SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1))
709*b5893f02SDimitry Andric           return I;
710*b5893f02SDimitry Andric 
711*b5893f02SDimitry Andric         Known.Zero = LHSKnown.Zero.shl(ShiftAmt) |
712*b5893f02SDimitry Andric                      RHSKnown.Zero.lshr(BitWidth - ShiftAmt);
713*b5893f02SDimitry Andric         Known.One = LHSKnown.One.shl(ShiftAmt) |
714*b5893f02SDimitry Andric                     RHSKnown.One.lshr(BitWidth - ShiftAmt);
715*b5893f02SDimitry Andric         break;
716*b5893f02SDimitry Andric       }
7173ca95b02SDimitry Andric       case Intrinsic::x86_mmx_pmovmskb:
7183ca95b02SDimitry Andric       case Intrinsic::x86_sse_movmsk_ps:
7193ca95b02SDimitry Andric       case Intrinsic::x86_sse2_movmsk_pd:
7203ca95b02SDimitry Andric       case Intrinsic::x86_sse2_pmovmskb_128:
7213ca95b02SDimitry Andric       case Intrinsic::x86_avx_movmsk_ps_256:
7223ca95b02SDimitry Andric       case Intrinsic::x86_avx_movmsk_pd_256:
7233ca95b02SDimitry Andric       case Intrinsic::x86_avx2_pmovmskb: {
7243ca95b02SDimitry Andric         // MOVMSK copies the vector elements' sign bits to the low bits
7253ca95b02SDimitry Andric         // and zeros the high bits.
7263ca95b02SDimitry Andric         unsigned ArgWidth;
7273ca95b02SDimitry Andric         if (II->getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
7283ca95b02SDimitry Andric           ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>.
7293ca95b02SDimitry Andric         } else {
7303ca95b02SDimitry Andric           auto Arg = II->getArgOperand(0);
7313ca95b02SDimitry Andric           auto ArgType = cast<VectorType>(Arg->getType());
7323ca95b02SDimitry Andric           ArgWidth = ArgType->getNumElements();
7333ca95b02SDimitry Andric         }
7343ca95b02SDimitry Andric 
7353ca95b02SDimitry Andric         // If we don't need any of low bits then return zero,
7363ca95b02SDimitry Andric         // we know that DemandedMask is non-zero already.
7373ca95b02SDimitry Andric         APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth);
738db17bf38SDimitry Andric         if (DemandedElts.isNullValue())
7393ca95b02SDimitry Andric           return ConstantInt::getNullValue(VTy);
7403ca95b02SDimitry Andric 
7413ca95b02SDimitry Andric         // We know that the upper bits are set to zero.
74251690af2SDimitry Andric         Known.Zero.setBitsFrom(ArgWidth);
7433ca95b02SDimitry Andric         return nullptr;
7443ca95b02SDimitry Andric       }
745bd5abe19SDimitry Andric       case Intrinsic::x86_sse42_crc32_64_64:
74651690af2SDimitry Andric         Known.Zero.setBitsFrom(32);
74791bc56edSDimitry Andric         return nullptr;
748f22ef01cSRoman Divacky       }
749f22ef01cSRoman Divacky     }
75051690af2SDimitry Andric     computeKnownBits(V, Known, Depth, CxtI);
751f22ef01cSRoman Divacky     break;
752f22ef01cSRoman Divacky   }
753f22ef01cSRoman Divacky 
754f22ef01cSRoman Divacky   // If the client is only demanding bits that we know, return the known
755f22ef01cSRoman Divacky   // constant.
75651690af2SDimitry Andric   if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
75751690af2SDimitry Andric     return Constant::getIntegerValue(VTy, Known.One);
75891bc56edSDimitry Andric   return nullptr;
759f22ef01cSRoman Divacky }
760f22ef01cSRoman Divacky 
76151690af2SDimitry Andric /// Helper routine of SimplifyDemandedUseBits. It computes Known
7627a7e6055SDimitry Andric /// bits. It also tries to handle simplifications that can be done based on
7637a7e6055SDimitry Andric /// DemandedMask, but without modifying the Instruction.
SimplifyMultipleUseDemandedBits(Instruction * I,const APInt & DemandedMask,KnownBits & Known,unsigned Depth,Instruction * CxtI)7647a7e6055SDimitry Andric Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
7657a7e6055SDimitry Andric                                                      const APInt &DemandedMask,
76651690af2SDimitry Andric                                                      KnownBits &Known,
7677a7e6055SDimitry Andric                                                      unsigned Depth,
7687a7e6055SDimitry Andric                                                      Instruction *CxtI) {
7697a7e6055SDimitry Andric   unsigned BitWidth = DemandedMask.getBitWidth();
7707a7e6055SDimitry Andric   Type *ITy = I->getType();
7717a7e6055SDimitry Andric 
77251690af2SDimitry Andric   KnownBits LHSKnown(BitWidth);
77351690af2SDimitry Andric   KnownBits RHSKnown(BitWidth);
7747a7e6055SDimitry Andric 
7757a7e6055SDimitry Andric   // Despite the fact that we can't simplify this instruction in all User's
77651690af2SDimitry Andric   // context, we can at least compute the known bits, and we can
7777a7e6055SDimitry Andric   // do simplifications that apply to *just* the one user if we know that
7787a7e6055SDimitry Andric   // this instruction has a simpler value in that context.
7797a7e6055SDimitry Andric   switch (I->getOpcode()) {
7807a7e6055SDimitry Andric   case Instruction::And: {
7817a7e6055SDimitry Andric     // If either the LHS or the RHS are Zero, the result is zero.
78251690af2SDimitry Andric     computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
78351690af2SDimitry Andric     computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
7847a7e6055SDimitry Andric                      CxtI);
7857a7e6055SDimitry Andric 
7867a7e6055SDimitry Andric     // Output known-0 are known to be clear if zero in either the LHS | RHS.
78751690af2SDimitry Andric     APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero;
7887a7e6055SDimitry Andric     // Output known-1 bits are only known if set in both the LHS & RHS.
78951690af2SDimitry Andric     APInt IKnownOne = RHSKnown.One & LHSKnown.One;
7907a7e6055SDimitry Andric 
7917a7e6055SDimitry Andric     // If the client is only demanding bits that we know, return the known
7927a7e6055SDimitry Andric     // constant.
7936bc11b14SDimitry Andric     if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
7947a7e6055SDimitry Andric       return Constant::getIntegerValue(ITy, IKnownOne);
7957a7e6055SDimitry Andric 
7967a7e6055SDimitry Andric     // If all of the demanded bits are known 1 on one side, return the other.
7977a7e6055SDimitry Andric     // These bits cannot contribute to the result of the 'and' in this
7987a7e6055SDimitry Andric     // context.
79951690af2SDimitry Andric     if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
8007a7e6055SDimitry Andric       return I->getOperand(0);
80151690af2SDimitry Andric     if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
8027a7e6055SDimitry Andric       return I->getOperand(1);
8037a7e6055SDimitry Andric 
80451690af2SDimitry Andric     Known.Zero = std::move(IKnownZero);
80551690af2SDimitry Andric     Known.One  = std::move(IKnownOne);
8067a7e6055SDimitry Andric     break;
8077a7e6055SDimitry Andric   }
8087a7e6055SDimitry Andric   case Instruction::Or: {
8097a7e6055SDimitry Andric     // We can simplify (X|Y) -> X or Y in the user's context if we know that
8107a7e6055SDimitry Andric     // only bits from X or Y are demanded.
8117a7e6055SDimitry Andric 
8127a7e6055SDimitry Andric     // If either the LHS or the RHS are One, the result is One.
81351690af2SDimitry Andric     computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
81451690af2SDimitry Andric     computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
8157a7e6055SDimitry Andric                      CxtI);
8167a7e6055SDimitry Andric 
8177a7e6055SDimitry Andric     // Output known-0 bits are only known if clear in both the LHS & RHS.
81851690af2SDimitry Andric     APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero;
8197a7e6055SDimitry Andric     // Output known-1 are known to be set if set in either the LHS | RHS.
82051690af2SDimitry Andric     APInt IKnownOne = RHSKnown.One | LHSKnown.One;
8217a7e6055SDimitry Andric 
8227a7e6055SDimitry Andric     // If the client is only demanding bits that we know, return the known
8237a7e6055SDimitry Andric     // constant.
8246bc11b14SDimitry Andric     if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
8257a7e6055SDimitry Andric       return Constant::getIntegerValue(ITy, IKnownOne);
8267a7e6055SDimitry Andric 
8277a7e6055SDimitry Andric     // If all of the demanded bits are known zero on one side, return the
8287a7e6055SDimitry Andric     // other.  These bits cannot contribute to the result of the 'or' in this
8297a7e6055SDimitry Andric     // context.
83051690af2SDimitry Andric     if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
8317a7e6055SDimitry Andric       return I->getOperand(0);
83251690af2SDimitry Andric     if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
8337a7e6055SDimitry Andric       return I->getOperand(1);
8347a7e6055SDimitry Andric 
83551690af2SDimitry Andric     Known.Zero = std::move(IKnownZero);
83651690af2SDimitry Andric     Known.One  = std::move(IKnownOne);
8377a7e6055SDimitry Andric     break;
8387a7e6055SDimitry Andric   }
8397a7e6055SDimitry Andric   case Instruction::Xor: {
8407a7e6055SDimitry Andric     // We can simplify (X^Y) -> X or Y in the user's context if we know that
8417a7e6055SDimitry Andric     // only bits from X or Y are demanded.
8427a7e6055SDimitry Andric 
84351690af2SDimitry Andric     computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
84451690af2SDimitry Andric     computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
8457a7e6055SDimitry Andric                      CxtI);
8467a7e6055SDimitry Andric 
8477a7e6055SDimitry Andric     // Output known-0 bits are known if clear or set in both the LHS & RHS.
84851690af2SDimitry Andric     APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) |
84951690af2SDimitry Andric                        (RHSKnown.One & LHSKnown.One);
8507a7e6055SDimitry Andric     // Output known-1 are known to be set if set in only one of the LHS, RHS.
85151690af2SDimitry Andric     APInt IKnownOne =  (RHSKnown.Zero & LHSKnown.One) |
85251690af2SDimitry Andric                        (RHSKnown.One & LHSKnown.Zero);
8537a7e6055SDimitry Andric 
8547a7e6055SDimitry Andric     // If the client is only demanding bits that we know, return the known
8557a7e6055SDimitry Andric     // constant.
8566bc11b14SDimitry Andric     if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
8577a7e6055SDimitry Andric       return Constant::getIntegerValue(ITy, IKnownOne);
8587a7e6055SDimitry Andric 
8597a7e6055SDimitry Andric     // If all of the demanded bits are known zero on one side, return the
8607a7e6055SDimitry Andric     // other.
86151690af2SDimitry Andric     if (DemandedMask.isSubsetOf(RHSKnown.Zero))
8627a7e6055SDimitry Andric       return I->getOperand(0);
86351690af2SDimitry Andric     if (DemandedMask.isSubsetOf(LHSKnown.Zero))
8647a7e6055SDimitry Andric       return I->getOperand(1);
8657a7e6055SDimitry Andric 
8667a7e6055SDimitry Andric     // Output known-0 bits are known if clear or set in both the LHS & RHS.
86751690af2SDimitry Andric     Known.Zero = std::move(IKnownZero);
8687a7e6055SDimitry Andric     // Output known-1 are known to be set if set in only one of the LHS, RHS.
86951690af2SDimitry Andric     Known.One  = std::move(IKnownOne);
8707a7e6055SDimitry Andric     break;
8717a7e6055SDimitry Andric   }
8727a7e6055SDimitry Andric   default:
87351690af2SDimitry Andric     // Compute the Known bits to simplify things downstream.
87451690af2SDimitry Andric     computeKnownBits(I, Known, Depth, CxtI);
8757a7e6055SDimitry Andric 
8767a7e6055SDimitry Andric     // If this user is only demanding bits that we know, return the known
8777a7e6055SDimitry Andric     // constant.
87851690af2SDimitry Andric     if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
87951690af2SDimitry Andric       return Constant::getIntegerValue(ITy, Known.One);
8807a7e6055SDimitry Andric 
8817a7e6055SDimitry Andric     break;
8827a7e6055SDimitry Andric   }
8837a7e6055SDimitry Andric 
8847a7e6055SDimitry Andric   return nullptr;
8857a7e6055SDimitry Andric }
8867a7e6055SDimitry Andric 
8877a7e6055SDimitry Andric 
888139f7f9bSDimitry Andric /// Helper routine of SimplifyDemandedUseBits. It tries to simplify
889139f7f9bSDimitry Andric /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
890139f7f9bSDimitry Andric /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
891139f7f9bSDimitry Andric /// of "C2-C1".
892139f7f9bSDimitry Andric ///
893139f7f9bSDimitry Andric /// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
894139f7f9bSDimitry Andric /// ..., bn}, without considering the specific value X is holding.
895139f7f9bSDimitry Andric /// This transformation is legal iff one of following conditions is hold:
896139f7f9bSDimitry Andric ///  1) All the bit in S are 0, in this case E1 == E2.
897139f7f9bSDimitry Andric ///  2) We don't care those bits in S, per the input DemandedMask.
898139f7f9bSDimitry Andric ///  3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
899139f7f9bSDimitry Andric ///     rest bits.
900139f7f9bSDimitry Andric ///
901139f7f9bSDimitry Andric /// Currently we only test condition 2).
902139f7f9bSDimitry Andric ///
903139f7f9bSDimitry Andric /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
904139f7f9bSDimitry Andric /// not successful.
90551690af2SDimitry Andric Value *
simplifyShrShlDemandedBits(Instruction * Shr,const APInt & ShrOp1,Instruction * Shl,const APInt & ShlOp1,const APInt & DemandedMask,KnownBits & Known)90651690af2SDimitry Andric InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
90751690af2SDimitry Andric                                          Instruction *Shl, const APInt &ShlOp1,
9083ca95b02SDimitry Andric                                          const APInt &DemandedMask,
90951690af2SDimitry Andric                                          KnownBits &Known) {
91089a53411SDimitry Andric   if (!ShlOp1 || !ShrOp1)
91151690af2SDimitry Andric     return nullptr; // No-op.
91289a53411SDimitry Andric 
91389a53411SDimitry Andric   Value *VarX = Shr->getOperand(0);
91489a53411SDimitry Andric   Type *Ty = VarX->getType();
91551690af2SDimitry Andric   unsigned BitWidth = Ty->getScalarSizeInBits();
91689a53411SDimitry Andric   if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
91791bc56edSDimitry Andric     return nullptr; // Undef.
91889a53411SDimitry Andric 
91989a53411SDimitry Andric   unsigned ShlAmt = ShlOp1.getZExtValue();
92089a53411SDimitry Andric   unsigned ShrAmt = ShrOp1.getZExtValue();
921139f7f9bSDimitry Andric 
92251690af2SDimitry Andric   Known.One.clearAllBits();
92351690af2SDimitry Andric   Known.Zero.setLowBits(ShlAmt - 1);
92451690af2SDimitry Andric   Known.Zero &= DemandedMask;
925139f7f9bSDimitry Andric 
92689a53411SDimitry Andric   APInt BitMask1(APInt::getAllOnesValue(BitWidth));
92789a53411SDimitry Andric   APInt BitMask2(APInt::getAllOnesValue(BitWidth));
928139f7f9bSDimitry Andric 
929139f7f9bSDimitry Andric   bool isLshr = (Shr->getOpcode() == Instruction::LShr);
930139f7f9bSDimitry Andric   BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
931139f7f9bSDimitry Andric                       (BitMask1.ashr(ShrAmt) << ShlAmt);
932139f7f9bSDimitry Andric 
933139f7f9bSDimitry Andric   if (ShrAmt <= ShlAmt) {
934139f7f9bSDimitry Andric     BitMask2 <<= (ShlAmt - ShrAmt);
935139f7f9bSDimitry Andric   } else {
936139f7f9bSDimitry Andric     BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
937139f7f9bSDimitry Andric                         BitMask2.ashr(ShrAmt - ShlAmt);
938139f7f9bSDimitry Andric   }
939139f7f9bSDimitry Andric 
940139f7f9bSDimitry Andric   // Check if condition-2 (see the comment to this function) is satified.
941139f7f9bSDimitry Andric   if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
942139f7f9bSDimitry Andric     if (ShrAmt == ShlAmt)
943139f7f9bSDimitry Andric       return VarX;
944139f7f9bSDimitry Andric 
945139f7f9bSDimitry Andric     if (!Shr->hasOneUse())
94691bc56edSDimitry Andric       return nullptr;
947139f7f9bSDimitry Andric 
948139f7f9bSDimitry Andric     BinaryOperator *New;
949139f7f9bSDimitry Andric     if (ShrAmt < ShlAmt) {
950139f7f9bSDimitry Andric       Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
951139f7f9bSDimitry Andric       New = BinaryOperator::CreateShl(VarX, Amt);
952139f7f9bSDimitry Andric       BinaryOperator *Orig = cast<BinaryOperator>(Shl);
953139f7f9bSDimitry Andric       New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
954139f7f9bSDimitry Andric       New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
955139f7f9bSDimitry Andric     } else {
956139f7f9bSDimitry Andric       Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
957139f7f9bSDimitry Andric       New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
958139f7f9bSDimitry Andric                      BinaryOperator::CreateAShr(VarX, Amt);
959139f7f9bSDimitry Andric       if (cast<BinaryOperator>(Shr)->isExact())
960139f7f9bSDimitry Andric         New->setIsExact(true);
961139f7f9bSDimitry Andric     }
962139f7f9bSDimitry Andric 
963139f7f9bSDimitry Andric     return InsertNewInstWith(New, *Shl);
964139f7f9bSDimitry Andric   }
965139f7f9bSDimitry Andric 
96691bc56edSDimitry Andric   return nullptr;
967139f7f9bSDimitry Andric }
968f22ef01cSRoman Divacky 
9694ba319b5SDimitry Andric /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst * II,APInt DemandedElts,int DMaskIdx,int TFCIdx)9704ba319b5SDimitry Andric Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
9714ba319b5SDimitry Andric                                                            APInt DemandedElts,
972*b5893f02SDimitry Andric                                                            int DMaskIdx,
973*b5893f02SDimitry Andric                                                            int TFCIdx) {
9744ba319b5SDimitry Andric   unsigned VWidth = II->getType()->getVectorNumElements();
9754ba319b5SDimitry Andric   if (VWidth == 1)
9764ba319b5SDimitry Andric     return nullptr;
9774ba319b5SDimitry Andric 
978*b5893f02SDimitry Andric   // Need to change to new instruction format
979*b5893f02SDimitry Andric   ConstantInt *TFC = nullptr;
980*b5893f02SDimitry Andric   bool TFELWEEnabled = false;
981*b5893f02SDimitry Andric   if (TFCIdx > 0) {
982*b5893f02SDimitry Andric     TFC = dyn_cast<ConstantInt>(II->getArgOperand(TFCIdx));
983*b5893f02SDimitry Andric     TFELWEEnabled =    TFC->getZExtValue() & 0x1  // TFE
984*b5893f02SDimitry Andric                     || TFC->getZExtValue() & 0x2; // LWE
985*b5893f02SDimitry Andric   }
986*b5893f02SDimitry Andric 
987*b5893f02SDimitry Andric   if (TFELWEEnabled)
988*b5893f02SDimitry Andric     return nullptr; // TFE not yet supported
989*b5893f02SDimitry Andric 
9904ba319b5SDimitry Andric   ConstantInt *NewDMask = nullptr;
9914ba319b5SDimitry Andric 
9924ba319b5SDimitry Andric   if (DMaskIdx < 0) {
9934ba319b5SDimitry Andric     // Pretend that a prefix of elements is demanded to simplify the code
9944ba319b5SDimitry Andric     // below.
9954ba319b5SDimitry Andric     DemandedElts = (1 << DemandedElts.getActiveBits()) - 1;
9964ba319b5SDimitry Andric   } else {
9974ba319b5SDimitry Andric     ConstantInt *DMask = dyn_cast<ConstantInt>(II->getArgOperand(DMaskIdx));
9984ba319b5SDimitry Andric     if (!DMask)
9994ba319b5SDimitry Andric       return nullptr; // non-constant dmask is not supported by codegen
10004ba319b5SDimitry Andric 
10014ba319b5SDimitry Andric     unsigned DMaskVal = DMask->getZExtValue() & 0xf;
10024ba319b5SDimitry Andric 
10034ba319b5SDimitry Andric     // Mask off values that are undefined because the dmask doesn't cover them
10044ba319b5SDimitry Andric     DemandedElts &= (1 << countPopulation(DMaskVal)) - 1;
10054ba319b5SDimitry Andric 
10064ba319b5SDimitry Andric     unsigned NewDMaskVal = 0;
10074ba319b5SDimitry Andric     unsigned OrigLoadIdx = 0;
10084ba319b5SDimitry Andric     for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
10094ba319b5SDimitry Andric       const unsigned Bit = 1 << SrcIdx;
10104ba319b5SDimitry Andric       if (!!(DMaskVal & Bit)) {
10114ba319b5SDimitry Andric         if (!!DemandedElts[OrigLoadIdx])
10124ba319b5SDimitry Andric           NewDMaskVal |= Bit;
10134ba319b5SDimitry Andric         OrigLoadIdx++;
10144ba319b5SDimitry Andric       }
10154ba319b5SDimitry Andric     }
10164ba319b5SDimitry Andric 
10174ba319b5SDimitry Andric     if (DMaskVal != NewDMaskVal)
10184ba319b5SDimitry Andric       NewDMask = ConstantInt::get(DMask->getType(), NewDMaskVal);
10194ba319b5SDimitry Andric   }
10204ba319b5SDimitry Andric 
10214ba319b5SDimitry Andric   // TODO: Handle 3 vectors when supported in code gen.
10224ba319b5SDimitry Andric   unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countPopulation());
10234ba319b5SDimitry Andric   if (!NewNumElts)
10244ba319b5SDimitry Andric     return UndefValue::get(II->getType());
10254ba319b5SDimitry Andric 
10264ba319b5SDimitry Andric   if (NewNumElts >= VWidth && DemandedElts.isMask()) {
10274ba319b5SDimitry Andric     if (NewDMask)
10284ba319b5SDimitry Andric       II->setArgOperand(DMaskIdx, NewDMask);
10294ba319b5SDimitry Andric     return nullptr;
10304ba319b5SDimitry Andric   }
10314ba319b5SDimitry Andric 
10324ba319b5SDimitry Andric   // Determine the overload types of the original intrinsic.
10334ba319b5SDimitry Andric   auto IID = II->getIntrinsicID();
10344ba319b5SDimitry Andric   SmallVector<Intrinsic::IITDescriptor, 16> Table;
10354ba319b5SDimitry Andric   getIntrinsicInfoTableEntries(IID, Table);
10364ba319b5SDimitry Andric   ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
10374ba319b5SDimitry Andric 
10384ba319b5SDimitry Andric   FunctionType *FTy = II->getCalledFunction()->getFunctionType();
10394ba319b5SDimitry Andric   SmallVector<Type *, 6> OverloadTys;
10404ba319b5SDimitry Andric   Intrinsic::matchIntrinsicType(FTy->getReturnType(), TableRef, OverloadTys);
10414ba319b5SDimitry Andric   for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
10424ba319b5SDimitry Andric     Intrinsic::matchIntrinsicType(FTy->getParamType(i), TableRef, OverloadTys);
10434ba319b5SDimitry Andric 
10444ba319b5SDimitry Andric   // Get the new return type overload of the intrinsic.
10454ba319b5SDimitry Andric   Module *M = II->getParent()->getParent()->getParent();
10464ba319b5SDimitry Andric   Type *EltTy = II->getType()->getVectorElementType();
10474ba319b5SDimitry Andric   Type *NewTy = (NewNumElts == 1) ? EltTy : VectorType::get(EltTy, NewNumElts);
10484ba319b5SDimitry Andric 
10494ba319b5SDimitry Andric   OverloadTys[0] = NewTy;
10504ba319b5SDimitry Andric   Function *NewIntrin = Intrinsic::getDeclaration(M, IID, OverloadTys);
10514ba319b5SDimitry Andric 
10524ba319b5SDimitry Andric   SmallVector<Value *, 16> Args;
10534ba319b5SDimitry Andric   for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
10544ba319b5SDimitry Andric     Args.push_back(II->getArgOperand(I));
10554ba319b5SDimitry Andric 
10564ba319b5SDimitry Andric   if (NewDMask)
10574ba319b5SDimitry Andric     Args[DMaskIdx] = NewDMask;
10584ba319b5SDimitry Andric 
10594ba319b5SDimitry Andric   IRBuilderBase::InsertPointGuard Guard(Builder);
10604ba319b5SDimitry Andric   Builder.SetInsertPoint(II);
10614ba319b5SDimitry Andric 
10624ba319b5SDimitry Andric   CallInst *NewCall = Builder.CreateCall(NewIntrin, Args);
10634ba319b5SDimitry Andric   NewCall->takeName(II);
10644ba319b5SDimitry Andric   NewCall->copyMetadata(*II);
10654ba319b5SDimitry Andric 
10664ba319b5SDimitry Andric   if (NewNumElts == 1) {
10674ba319b5SDimitry Andric     return Builder.CreateInsertElement(UndefValue::get(II->getType()), NewCall,
10684ba319b5SDimitry Andric                                        DemandedElts.countTrailingZeros());
10694ba319b5SDimitry Andric   }
10704ba319b5SDimitry Andric 
10714ba319b5SDimitry Andric   SmallVector<uint32_t, 8> EltMask;
10724ba319b5SDimitry Andric   unsigned NewLoadIdx = 0;
10734ba319b5SDimitry Andric   for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
10744ba319b5SDimitry Andric     if (!!DemandedElts[OrigLoadIdx])
10754ba319b5SDimitry Andric       EltMask.push_back(NewLoadIdx++);
10764ba319b5SDimitry Andric     else
10774ba319b5SDimitry Andric       EltMask.push_back(NewNumElts);
10784ba319b5SDimitry Andric   }
10794ba319b5SDimitry Andric 
10804ba319b5SDimitry Andric   Value *Shuffle =
10814ba319b5SDimitry Andric       Builder.CreateShuffleVector(NewCall, UndefValue::get(NewTy), EltMask);
10824ba319b5SDimitry Andric 
10834ba319b5SDimitry Andric   return Shuffle;
10844ba319b5SDimitry Andric }
10854ba319b5SDimitry Andric 
10863ca95b02SDimitry Andric /// The specified value produces a vector with any number of elements.
10873ca95b02SDimitry Andric /// DemandedElts contains the set of elements that are actually used by the
10883ca95b02SDimitry Andric /// caller. This method analyzes which elements of the operand are undef and
10893ca95b02SDimitry Andric /// returns that information in UndefElts.
1090f22ef01cSRoman Divacky ///
1091f22ef01cSRoman Divacky /// If the information about demanded elements can be used to simplify the
1092f22ef01cSRoman Divacky /// operation, the operation is simplified, then the resultant value is
1093f22ef01cSRoman Divacky /// returned.  This returns null if no change was made.
SimplifyDemandedVectorElts(Value * V,APInt DemandedElts,APInt & UndefElts,unsigned Depth)1094f22ef01cSRoman Divacky Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
1095f22ef01cSRoman Divacky                                                 APInt &UndefElts,
1096f22ef01cSRoman Divacky                                                 unsigned Depth) {
10973ca95b02SDimitry Andric   unsigned VWidth = V->getType()->getVectorNumElements();
1098f22ef01cSRoman Divacky   APInt EltMask(APInt::getAllOnesValue(VWidth));
1099f22ef01cSRoman Divacky   assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
1100f22ef01cSRoman Divacky 
1101f22ef01cSRoman Divacky   if (isa<UndefValue>(V)) {
1102f22ef01cSRoman Divacky     // If the entire vector is undefined, just return this info.
1103f22ef01cSRoman Divacky     UndefElts = EltMask;
110491bc56edSDimitry Andric     return nullptr;
1105f22ef01cSRoman Divacky   }
1106f22ef01cSRoman Divacky 
1107db17bf38SDimitry Andric   if (DemandedElts.isNullValue()) { // If nothing is demanded, provide undef.
1108f22ef01cSRoman Divacky     UndefElts = EltMask;
1109f22ef01cSRoman Divacky     return UndefValue::get(V->getType());
1110f22ef01cSRoman Divacky   }
1111f22ef01cSRoman Divacky 
1112f22ef01cSRoman Divacky   UndefElts = 0;
1113f22ef01cSRoman Divacky 
1114*b5893f02SDimitry Andric   if (auto *C = dyn_cast<Constant>(V)) {
1115f22ef01cSRoman Divacky     // Check if this is identity. If so, return 0 since we are not simplifying
1116f22ef01cSRoman Divacky     // anything.
1117f22ef01cSRoman Divacky     if (DemandedElts.isAllOnesValue())
111891bc56edSDimitry Andric       return nullptr;
1119f22ef01cSRoman Divacky 
11206122f3e6SDimitry Andric     Type *EltTy = cast<VectorType>(V->getType())->getElementType();
1121f22ef01cSRoman Divacky     Constant *Undef = UndefValue::get(EltTy);
1122dff0c46cSDimitry Andric     SmallVector<Constant*, 16> Elts;
1123f22ef01cSRoman Divacky     for (unsigned i = 0; i != VWidth; ++i) {
1124dff0c46cSDimitry Andric       if (!DemandedElts[i]) {   // If not demanded, set to undef.
1125dff0c46cSDimitry Andric         Elts.push_back(Undef);
1126dff0c46cSDimitry Andric         UndefElts.setBit(i);
1127dff0c46cSDimitry Andric         continue;
1128dff0c46cSDimitry Andric       }
1129dff0c46cSDimitry Andric 
1130dff0c46cSDimitry Andric       Constant *Elt = C->getAggregateElement(i);
113191bc56edSDimitry Andric       if (!Elt) return nullptr;
1132dff0c46cSDimitry Andric 
1133dff0c46cSDimitry Andric       if (isa<UndefValue>(Elt)) {   // Already undef.
1134dff0c46cSDimitry Andric         Elts.push_back(Undef);
1135dff0c46cSDimitry Andric         UndefElts.setBit(i);
1136dff0c46cSDimitry Andric       } else {                               // Otherwise, defined.
1137f22ef01cSRoman Divacky         Elts.push_back(Elt);
1138f22ef01cSRoman Divacky       }
1139dff0c46cSDimitry Andric     }
1140dff0c46cSDimitry Andric 
1141dff0c46cSDimitry Andric     // If we changed the constant, return it.
1142dff0c46cSDimitry Andric     Constant *NewCV = ConstantVector::get(Elts);
114391bc56edSDimitry Andric     return NewCV != C ? NewCV : nullptr;
1144f22ef01cSRoman Divacky   }
1145f22ef01cSRoman Divacky 
1146f22ef01cSRoman Divacky   // Limit search depth.
1147f22ef01cSRoman Divacky   if (Depth == 10)
114891bc56edSDimitry Andric     return nullptr;
1149f22ef01cSRoman Divacky 
1150bd5abe19SDimitry Andric   // If multiple users are using the root value, proceed with
1151f22ef01cSRoman Divacky   // simplification conservatively assuming that all elements
1152f22ef01cSRoman Divacky   // are needed.
1153f22ef01cSRoman Divacky   if (!V->hasOneUse()) {
1154f22ef01cSRoman Divacky     // Quit if we find multiple users of a non-root value though.
1155f22ef01cSRoman Divacky     // They'll be handled when it's their turn to be visited by
1156f22ef01cSRoman Divacky     // the main instcombine process.
1157f22ef01cSRoman Divacky     if (Depth != 0)
1158f22ef01cSRoman Divacky       // TODO: Just compute the UndefElts information recursively.
115991bc56edSDimitry Andric       return nullptr;
1160f22ef01cSRoman Divacky 
1161f22ef01cSRoman Divacky     // Conservatively assume that all elements are needed.
1162f22ef01cSRoman Divacky     DemandedElts = EltMask;
1163f22ef01cSRoman Divacky   }
1164f22ef01cSRoman Divacky 
1165f22ef01cSRoman Divacky   Instruction *I = dyn_cast<Instruction>(V);
116691bc56edSDimitry Andric   if (!I) return nullptr;        // Only analyze instructions.
1167f22ef01cSRoman Divacky 
1168f22ef01cSRoman Divacky   bool MadeChange = false;
1169*b5893f02SDimitry Andric   auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum,
1170*b5893f02SDimitry Andric                               APInt Demanded, APInt &Undef) {
1171*b5893f02SDimitry Andric     auto *II = dyn_cast<IntrinsicInst>(Inst);
1172*b5893f02SDimitry Andric     Value *Op = II ? II->getArgOperand(OpNum) : Inst->getOperand(OpNum);
1173*b5893f02SDimitry Andric     if (Value *V = SimplifyDemandedVectorElts(Op, Demanded, Undef, Depth + 1)) {
1174*b5893f02SDimitry Andric       if (II)
1175*b5893f02SDimitry Andric         II->setArgOperand(OpNum, V);
1176*b5893f02SDimitry Andric       else
1177*b5893f02SDimitry Andric         Inst->setOperand(OpNum, V);
1178*b5893f02SDimitry Andric       MadeChange = true;
1179*b5893f02SDimitry Andric     }
1180*b5893f02SDimitry Andric   };
1181*b5893f02SDimitry Andric 
1182f22ef01cSRoman Divacky   APInt UndefElts2(VWidth, 0);
1183d88c1a5aSDimitry Andric   APInt UndefElts3(VWidth, 0);
1184f22ef01cSRoman Divacky   switch (I->getOpcode()) {
1185f22ef01cSRoman Divacky   default: break;
1186f22ef01cSRoman Divacky 
1187f22ef01cSRoman Divacky   case Instruction::InsertElement: {
1188f22ef01cSRoman Divacky     // If this is a variable index, we don't know which element it overwrites.
1189f22ef01cSRoman Divacky     // demand exactly the same input as we produce.
1190f22ef01cSRoman Divacky     ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
119191bc56edSDimitry Andric     if (!Idx) {
1192f22ef01cSRoman Divacky       // Note that we can't propagate undef elt info, because we don't know
1193f22ef01cSRoman Divacky       // which elt is getting updated.
1194*b5893f02SDimitry Andric       simplifyAndSetOp(I, 0, DemandedElts, UndefElts2);
1195f22ef01cSRoman Divacky       break;
1196f22ef01cSRoman Divacky     }
1197f22ef01cSRoman Divacky 
11982cab237bSDimitry Andric     // The element inserted overwrites whatever was there, so the input demanded
11992cab237bSDimitry Andric     // set is simpler than the output set.
12002cab237bSDimitry Andric     unsigned IdxNo = Idx->getZExtValue();
12012cab237bSDimitry Andric     APInt PreInsertDemandedElts = DemandedElts;
12022cab237bSDimitry Andric     if (IdxNo < VWidth)
12032cab237bSDimitry Andric       PreInsertDemandedElts.clearBit(IdxNo);
1204*b5893f02SDimitry Andric 
1205*b5893f02SDimitry Andric     simplifyAndSetOp(I, 0, PreInsertDemandedElts, UndefElts);
12062cab237bSDimitry Andric 
1207f22ef01cSRoman Divacky     // If this is inserting an element that isn't demanded, remove this
1208f22ef01cSRoman Divacky     // insertelement.
1209f22ef01cSRoman Divacky     if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
1210f22ef01cSRoman Divacky       Worklist.Add(I);
1211f22ef01cSRoman Divacky       return I->getOperand(0);
1212f22ef01cSRoman Divacky     }
1213f22ef01cSRoman Divacky 
1214f22ef01cSRoman Divacky     // The inserted element is defined.
12152754fe60SDimitry Andric     UndefElts.clearBit(IdxNo);
1216f22ef01cSRoman Divacky     break;
1217f22ef01cSRoman Divacky   }
1218f22ef01cSRoman Divacky   case Instruction::ShuffleVector: {
1219f22ef01cSRoman Divacky     ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1220d88c1a5aSDimitry Andric     unsigned LHSVWidth =
1221d88c1a5aSDimitry Andric       Shuffle->getOperand(0)->getType()->getVectorNumElements();
1222f22ef01cSRoman Divacky     APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
1223f22ef01cSRoman Divacky     for (unsigned i = 0; i < VWidth; i++) {
1224f22ef01cSRoman Divacky       if (DemandedElts[i]) {
1225f22ef01cSRoman Divacky         unsigned MaskVal = Shuffle->getMaskValue(i);
1226f22ef01cSRoman Divacky         if (MaskVal != -1u) {
1227f22ef01cSRoman Divacky           assert(MaskVal < LHSVWidth * 2 &&
1228f22ef01cSRoman Divacky                  "shufflevector mask index out of range!");
1229f22ef01cSRoman Divacky           if (MaskVal < LHSVWidth)
12302754fe60SDimitry Andric             LeftDemanded.setBit(MaskVal);
1231f22ef01cSRoman Divacky           else
12322754fe60SDimitry Andric             RightDemanded.setBit(MaskVal - LHSVWidth);
1233f22ef01cSRoman Divacky         }
1234f22ef01cSRoman Divacky       }
1235f22ef01cSRoman Divacky     }
1236f22ef01cSRoman Divacky 
1237d88c1a5aSDimitry Andric     APInt LHSUndefElts(LHSVWidth, 0);
1238*b5893f02SDimitry Andric     simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts);
1239f22ef01cSRoman Divacky 
1240d88c1a5aSDimitry Andric     APInt RHSUndefElts(LHSVWidth, 0);
1241*b5893f02SDimitry Andric     simplifyAndSetOp(I, 1, RightDemanded, RHSUndefElts);
1242f22ef01cSRoman Divacky 
1243f22ef01cSRoman Divacky     bool NewUndefElts = false;
1244d88c1a5aSDimitry Andric     unsigned LHSIdx = -1u, LHSValIdx = -1u;
1245d88c1a5aSDimitry Andric     unsigned RHSIdx = -1u, RHSValIdx = -1u;
1246d88c1a5aSDimitry Andric     bool LHSUniform = true;
1247d88c1a5aSDimitry Andric     bool RHSUniform = true;
1248f22ef01cSRoman Divacky     for (unsigned i = 0; i < VWidth; i++) {
1249f22ef01cSRoman Divacky       unsigned MaskVal = Shuffle->getMaskValue(i);
1250f22ef01cSRoman Divacky       if (MaskVal == -1u) {
12512754fe60SDimitry Andric         UndefElts.setBit(i);
12526122f3e6SDimitry Andric       } else if (!DemandedElts[i]) {
12536122f3e6SDimitry Andric         NewUndefElts = true;
12546122f3e6SDimitry Andric         UndefElts.setBit(i);
1255f22ef01cSRoman Divacky       } else if (MaskVal < LHSVWidth) {
1256d88c1a5aSDimitry Andric         if (LHSUndefElts[MaskVal]) {
1257f22ef01cSRoman Divacky           NewUndefElts = true;
12582754fe60SDimitry Andric           UndefElts.setBit(i);
1259d88c1a5aSDimitry Andric         } else {
1260d88c1a5aSDimitry Andric           LHSIdx = LHSIdx == -1u ? i : LHSVWidth;
1261d88c1a5aSDimitry Andric           LHSValIdx = LHSValIdx == -1u ? MaskVal : LHSVWidth;
1262d88c1a5aSDimitry Andric           LHSUniform = LHSUniform && (MaskVal == i);
1263f22ef01cSRoman Divacky         }
1264f22ef01cSRoman Divacky       } else {
1265d88c1a5aSDimitry Andric         if (RHSUndefElts[MaskVal - LHSVWidth]) {
1266f22ef01cSRoman Divacky           NewUndefElts = true;
12672754fe60SDimitry Andric           UndefElts.setBit(i);
1268d88c1a5aSDimitry Andric         } else {
1269d88c1a5aSDimitry Andric           RHSIdx = RHSIdx == -1u ? i : LHSVWidth;
1270d88c1a5aSDimitry Andric           RHSValIdx = RHSValIdx == -1u ? MaskVal - LHSVWidth : LHSVWidth;
1271d88c1a5aSDimitry Andric           RHSUniform = RHSUniform && (MaskVal - LHSVWidth == i);
1272f22ef01cSRoman Divacky         }
1273f22ef01cSRoman Divacky       }
1274f22ef01cSRoman Divacky     }
1275f22ef01cSRoman Divacky 
1276d88c1a5aSDimitry Andric     // Try to transform shuffle with constant vector and single element from
1277d88c1a5aSDimitry Andric     // this constant vector to single insertelement instruction.
1278d88c1a5aSDimitry Andric     // shufflevector V, C, <v1, v2, .., ci, .., vm> ->
1279d88c1a5aSDimitry Andric     // insertelement V, C[ci], ci-n
1280d88c1a5aSDimitry Andric     if (LHSVWidth == Shuffle->getType()->getNumElements()) {
1281d88c1a5aSDimitry Andric       Value *Op = nullptr;
1282d88c1a5aSDimitry Andric       Constant *Value = nullptr;
1283d88c1a5aSDimitry Andric       unsigned Idx = -1u;
1284d88c1a5aSDimitry Andric 
1285d88c1a5aSDimitry Andric       // Find constant vector with the single element in shuffle (LHS or RHS).
1286d88c1a5aSDimitry Andric       if (LHSIdx < LHSVWidth && RHSUniform) {
1287d88c1a5aSDimitry Andric         if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) {
1288d88c1a5aSDimitry Andric           Op = Shuffle->getOperand(1);
1289d88c1a5aSDimitry Andric           Value = CV->getOperand(LHSValIdx);
1290d88c1a5aSDimitry Andric           Idx = LHSIdx;
1291d88c1a5aSDimitry Andric         }
1292d88c1a5aSDimitry Andric       }
1293d88c1a5aSDimitry Andric       if (RHSIdx < LHSVWidth && LHSUniform) {
1294d88c1a5aSDimitry Andric         if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) {
1295d88c1a5aSDimitry Andric           Op = Shuffle->getOperand(0);
1296d88c1a5aSDimitry Andric           Value = CV->getOperand(RHSValIdx);
1297d88c1a5aSDimitry Andric           Idx = RHSIdx;
1298d88c1a5aSDimitry Andric         }
1299d88c1a5aSDimitry Andric       }
1300d88c1a5aSDimitry Andric       // Found constant vector with single element - convert to insertelement.
1301d88c1a5aSDimitry Andric       if (Op && Value) {
1302d88c1a5aSDimitry Andric         Instruction *New = InsertElementInst::Create(
1303d88c1a5aSDimitry Andric             Op, Value, ConstantInt::get(Type::getInt32Ty(I->getContext()), Idx),
1304d88c1a5aSDimitry Andric             Shuffle->getName());
1305d88c1a5aSDimitry Andric         InsertNewInstWith(New, *Shuffle);
1306d88c1a5aSDimitry Andric         return New;
1307d88c1a5aSDimitry Andric       }
1308d88c1a5aSDimitry Andric     }
1309f22ef01cSRoman Divacky     if (NewUndefElts) {
1310f22ef01cSRoman Divacky       // Add additional discovered undefs.
1311dff0c46cSDimitry Andric       SmallVector<Constant*, 16> Elts;
1312f22ef01cSRoman Divacky       for (unsigned i = 0; i < VWidth; ++i) {
1313f22ef01cSRoman Divacky         if (UndefElts[i])
1314f22ef01cSRoman Divacky           Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
1315f22ef01cSRoman Divacky         else
1316f22ef01cSRoman Divacky           Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
1317f22ef01cSRoman Divacky                                           Shuffle->getMaskValue(i)));
1318f22ef01cSRoman Divacky       }
1319f22ef01cSRoman Divacky       I->setOperand(2, ConstantVector::get(Elts));
1320f22ef01cSRoman Divacky       MadeChange = true;
1321f22ef01cSRoman Divacky     }
1322f22ef01cSRoman Divacky     break;
1323f22ef01cSRoman Divacky   }
13247ae0e2c9SDimitry Andric   case Instruction::Select: {
1325*b5893f02SDimitry Andric     // If this is a vector select, try to transform the select condition based
1326*b5893f02SDimitry Andric     // on the current demanded elements.
1327*b5893f02SDimitry Andric     SelectInst *Sel = cast<SelectInst>(I);
1328*b5893f02SDimitry Andric     if (Sel->getCondition()->getType()->isVectorTy()) {
1329*b5893f02SDimitry Andric       // TODO: We are not doing anything with UndefElts based on this call.
1330*b5893f02SDimitry Andric       // It is overwritten below based on the other select operands. If an
1331*b5893f02SDimitry Andric       // element of the select condition is known undef, then we are free to
1332*b5893f02SDimitry Andric       // choose the output value from either arm of the select. If we know that
1333*b5893f02SDimitry Andric       // one of those values is undef, then the output can be undef.
1334*b5893f02SDimitry Andric       simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
1335*b5893f02SDimitry Andric     }
1336*b5893f02SDimitry Andric 
1337*b5893f02SDimitry Andric     // Next, see if we can transform the arms of the select.
1338*b5893f02SDimitry Andric     APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts);
1339*b5893f02SDimitry Andric     if (auto *CV = dyn_cast<ConstantVector>(Sel->getCondition())) {
13407ae0e2c9SDimitry Andric       for (unsigned i = 0; i < VWidth; i++) {
1341*b5893f02SDimitry Andric         // isNullValue() always returns false when called on a ConstantExpr.
1342*b5893f02SDimitry Andric         // Skip constant expressions to avoid propagating incorrect information.
13437d523365SDimitry Andric         Constant *CElt = CV->getAggregateElement(i);
13447d523365SDimitry Andric         if (isa<ConstantExpr>(CElt))
13457d523365SDimitry Andric           continue;
1346*b5893f02SDimitry Andric         // TODO: If a select condition element is undef, we can demand from
1347*b5893f02SDimitry Andric         // either side. If one side is known undef, choosing that side would
1348*b5893f02SDimitry Andric         // propagate undef.
13497d523365SDimitry Andric         if (CElt->isNullValue())
1350*b5893f02SDimitry Andric           DemandedLHS.clearBit(i);
13517ae0e2c9SDimitry Andric         else
1352*b5893f02SDimitry Andric           DemandedRHS.clearBit(i);
13537ae0e2c9SDimitry Andric       }
13547ae0e2c9SDimitry Andric     }
13557ae0e2c9SDimitry Andric 
1356*b5893f02SDimitry Andric     simplifyAndSetOp(I, 1, DemandedLHS, UndefElts2);
1357*b5893f02SDimitry Andric     simplifyAndSetOp(I, 2, DemandedRHS, UndefElts3);
13587ae0e2c9SDimitry Andric 
1359*b5893f02SDimitry Andric     // Output elements are undefined if the element from each arm is undefined.
1360*b5893f02SDimitry Andric     // TODO: This can be improved. See comment in select condition handling.
1361*b5893f02SDimitry Andric     UndefElts = UndefElts2 & UndefElts3;
13627ae0e2c9SDimitry Andric     break;
13637ae0e2c9SDimitry Andric   }
1364f22ef01cSRoman Divacky   case Instruction::BitCast: {
1365f22ef01cSRoman Divacky     // Vector->vector casts only.
13666122f3e6SDimitry Andric     VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
1367f22ef01cSRoman Divacky     if (!VTy) break;
1368f22ef01cSRoman Divacky     unsigned InVWidth = VTy->getNumElements();
1369f22ef01cSRoman Divacky     APInt InputDemandedElts(InVWidth, 0);
13707d523365SDimitry Andric     UndefElts2 = APInt(InVWidth, 0);
1371f22ef01cSRoman Divacky     unsigned Ratio;
1372f22ef01cSRoman Divacky 
1373f22ef01cSRoman Divacky     if (VWidth == InVWidth) {
1374f22ef01cSRoman Divacky       // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
1375f22ef01cSRoman Divacky       // elements as are demanded of us.
1376f22ef01cSRoman Divacky       Ratio = 1;
1377f22ef01cSRoman Divacky       InputDemandedElts = DemandedElts;
13787d523365SDimitry Andric     } else if ((VWidth % InVWidth) == 0) {
13797d523365SDimitry Andric       // If the number of elements in the output is a multiple of the number of
13807d523365SDimitry Andric       // elements in the input then an input element is live if any of the
13817d523365SDimitry Andric       // corresponding output elements are live.
1382f22ef01cSRoman Divacky       Ratio = VWidth / InVWidth;
13837d523365SDimitry Andric       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
1384f22ef01cSRoman Divacky         if (DemandedElts[OutIdx])
13852754fe60SDimitry Andric           InputDemandedElts.setBit(OutIdx / Ratio);
13867d523365SDimitry Andric     } else if ((InVWidth % VWidth) == 0) {
13877d523365SDimitry Andric       // If the number of elements in the input is a multiple of the number of
13887d523365SDimitry Andric       // elements in the output then an input element is live if the
13897d523365SDimitry Andric       // corresponding output element is live.
1390f22ef01cSRoman Divacky       Ratio = InVWidth / VWidth;
1391f22ef01cSRoman Divacky       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
1392f22ef01cSRoman Divacky         if (DemandedElts[InIdx / Ratio])
13932754fe60SDimitry Andric           InputDemandedElts.setBit(InIdx);
13947d523365SDimitry Andric     } else {
13957d523365SDimitry Andric       // Unsupported so far.
13967d523365SDimitry Andric       break;
1397f22ef01cSRoman Divacky     }
1398f22ef01cSRoman Divacky 
1399*b5893f02SDimitry Andric     simplifyAndSetOp(I, 0, InputDemandedElts, UndefElts2);
1400f22ef01cSRoman Divacky 
14017d523365SDimitry Andric     if (VWidth == InVWidth) {
1402f22ef01cSRoman Divacky       UndefElts = UndefElts2;
14037d523365SDimitry Andric     } else if ((VWidth % InVWidth) == 0) {
14047d523365SDimitry Andric       // If the number of elements in the output is a multiple of the number of
14057d523365SDimitry Andric       // elements in the input then an output element is undef if the
14067d523365SDimitry Andric       // corresponding input element is undef.
1407f22ef01cSRoman Divacky       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
1408f22ef01cSRoman Divacky         if (UndefElts2[OutIdx / Ratio])
14092754fe60SDimitry Andric           UndefElts.setBit(OutIdx);
14107d523365SDimitry Andric     } else if ((InVWidth % VWidth) == 0) {
14117d523365SDimitry Andric       // If the number of elements in the input is a multiple of the number of
14127d523365SDimitry Andric       // elements in the output then an output element is undef if all of the
14137d523365SDimitry Andric       // corresponding input elements are undef.
14147d523365SDimitry Andric       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
14157d523365SDimitry Andric         APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
14167d523365SDimitry Andric         if (SubUndef.countPopulation() == Ratio)
14177d523365SDimitry Andric           UndefElts.setBit(OutIdx);
14187d523365SDimitry Andric       }
14197d523365SDimitry Andric     } else {
1420f22ef01cSRoman Divacky       llvm_unreachable("Unimp");
1421f22ef01cSRoman Divacky     }
1422f22ef01cSRoman Divacky     break;
1423f22ef01cSRoman Divacky   }
14247ae0e2c9SDimitry Andric   case Instruction::FPTrunc:
14257ae0e2c9SDimitry Andric   case Instruction::FPExt:
1426*b5893f02SDimitry Andric     simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
14277ae0e2c9SDimitry Andric     break;
1428f22ef01cSRoman Divacky 
1429f22ef01cSRoman Divacky   case Instruction::Call: {
1430f22ef01cSRoman Divacky     IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
1431f22ef01cSRoman Divacky     if (!II) break;
1432f22ef01cSRoman Divacky     switch (II->getIntrinsicID()) {
1433d88c1a5aSDimitry Andric     case Intrinsic::x86_xop_vfrcz_ss:
1434d88c1a5aSDimitry Andric     case Intrinsic::x86_xop_vfrcz_sd:
1435d88c1a5aSDimitry Andric       // The instructions for these intrinsics are speced to zero upper bits not
1436d88c1a5aSDimitry Andric       // pass them through like other scalar intrinsics. So we shouldn't just
1437d88c1a5aSDimitry Andric       // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
1438d88c1a5aSDimitry Andric       // Instead we should return a zero vector.
1439d88c1a5aSDimitry Andric       if (!DemandedElts[0]) {
1440d88c1a5aSDimitry Andric         Worklist.Add(II);
1441d88c1a5aSDimitry Andric         return ConstantAggregateZero::get(II->getType());
1442d88c1a5aSDimitry Andric       }
1443d88c1a5aSDimitry Andric 
1444d88c1a5aSDimitry Andric       // Only the lower element is used.
1445d88c1a5aSDimitry Andric       DemandedElts = 1;
1446*b5893f02SDimitry Andric       simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
1447d88c1a5aSDimitry Andric 
1448d88c1a5aSDimitry Andric       // Only the lower element is undefined. The high elements are zero.
1449d88c1a5aSDimitry Andric       UndefElts = UndefElts[0];
1450d88c1a5aSDimitry Andric       break;
1451d88c1a5aSDimitry Andric 
14523ca95b02SDimitry Andric     // Unary scalar-as-vector operations that work column-wise.
14533ca95b02SDimitry Andric     case Intrinsic::x86_sse_rcp_ss:
14543ca95b02SDimitry Andric     case Intrinsic::x86_sse_rsqrt_ss:
1455*b5893f02SDimitry Andric       simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
14563ca95b02SDimitry Andric 
14573ca95b02SDimitry Andric       // If lowest element of a scalar op isn't used then use Arg0.
1458d88c1a5aSDimitry Andric       if (!DemandedElts[0]) {
1459d88c1a5aSDimitry Andric         Worklist.Add(II);
14603ca95b02SDimitry Andric         return II->getArgOperand(0);
1461d88c1a5aSDimitry Andric       }
14623ca95b02SDimitry Andric       // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions
14633ca95b02SDimitry Andric       // checks).
14643ca95b02SDimitry Andric       break;
14653ca95b02SDimitry Andric 
1466d88c1a5aSDimitry Andric     // Binary scalar-as-vector operations that work column-wise. The high
1467d88c1a5aSDimitry Andric     // elements come from operand 0. The low element is a function of both
1468d88c1a5aSDimitry Andric     // operands.
1469f22ef01cSRoman Divacky     case Intrinsic::x86_sse_min_ss:
1470f22ef01cSRoman Divacky     case Intrinsic::x86_sse_max_ss:
14713ca95b02SDimitry Andric     case Intrinsic::x86_sse_cmp_ss:
1472f22ef01cSRoman Divacky     case Intrinsic::x86_sse2_min_sd:
1473f22ef01cSRoman Divacky     case Intrinsic::x86_sse2_max_sd:
1474d88c1a5aSDimitry Andric     case Intrinsic::x86_sse2_cmp_sd: {
1475*b5893f02SDimitry Andric       simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
1476d88c1a5aSDimitry Andric 
1477d88c1a5aSDimitry Andric       // If lowest element of a scalar op isn't used then use Arg0.
1478d88c1a5aSDimitry Andric       if (!DemandedElts[0]) {
1479d88c1a5aSDimitry Andric         Worklist.Add(II);
1480d88c1a5aSDimitry Andric         return II->getArgOperand(0);
1481d88c1a5aSDimitry Andric       }
1482d88c1a5aSDimitry Andric 
1483d88c1a5aSDimitry Andric       // Only lower element is used for operand 1.
1484d88c1a5aSDimitry Andric       DemandedElts = 1;
1485*b5893f02SDimitry Andric       simplifyAndSetOp(II, 1, DemandedElts, UndefElts2);
1486f22ef01cSRoman Divacky 
1487d88c1a5aSDimitry Andric       // Lower element is undefined if both lower elements are undefined.
1488d88c1a5aSDimitry Andric       // Consider things like undef&0.  The result is known zero, not undef.
1489d88c1a5aSDimitry Andric       if (!UndefElts2[0])
1490d88c1a5aSDimitry Andric         UndefElts.clearBit(0);
1491f22ef01cSRoman Divacky 
14923ca95b02SDimitry Andric       break;
1493f22ef01cSRoman Divacky     }
1494f22ef01cSRoman Divacky 
1495d88c1a5aSDimitry Andric     // Binary scalar-as-vector operations that work column-wise. The high
1496d88c1a5aSDimitry Andric     // elements come from operand 0 and the low element comes from operand 1.
1497d88c1a5aSDimitry Andric     case Intrinsic::x86_sse41_round_ss:
1498d88c1a5aSDimitry Andric     case Intrinsic::x86_sse41_round_sd: {
1499d88c1a5aSDimitry Andric       // Don't use the low element of operand 0.
1500d88c1a5aSDimitry Andric       APInt DemandedElts2 = DemandedElts;
1501d88c1a5aSDimitry Andric       DemandedElts2.clearBit(0);
1502*b5893f02SDimitry Andric       simplifyAndSetOp(II, 0, DemandedElts2, UndefElts);
1503f22ef01cSRoman Divacky 
15043ca95b02SDimitry Andric       // If lowest element of a scalar op isn't used then use Arg0.
1505d88c1a5aSDimitry Andric       if (!DemandedElts[0]) {
1506d88c1a5aSDimitry Andric         Worklist.Add(II);
15073ca95b02SDimitry Andric         return II->getArgOperand(0);
1508d88c1a5aSDimitry Andric       }
15093ca95b02SDimitry Andric 
1510d88c1a5aSDimitry Andric       // Only lower element is used for operand 1.
1511d88c1a5aSDimitry Andric       DemandedElts = 1;
1512*b5893f02SDimitry Andric       simplifyAndSetOp(II, 1, DemandedElts, UndefElts2);
1513d88c1a5aSDimitry Andric 
1514d88c1a5aSDimitry Andric       // Take the high undef elements from operand 0 and take the lower element
1515d88c1a5aSDimitry Andric       // from operand 1.
1516d88c1a5aSDimitry Andric       UndefElts.clearBit(0);
1517d88c1a5aSDimitry Andric       UndefElts |= UndefElts2[0];
1518f22ef01cSRoman Divacky       break;
1519d88c1a5aSDimitry Andric     }
1520d88c1a5aSDimitry Andric 
1521d88c1a5aSDimitry Andric     // Three input scalar-as-vector operations that work column-wise. The high
1522d88c1a5aSDimitry Andric     // elements come from operand 0 and the low element is a function of all
1523d88c1a5aSDimitry Andric     // three inputs.
1524d88c1a5aSDimitry Andric     case Intrinsic::x86_avx512_mask_add_ss_round:
1525d88c1a5aSDimitry Andric     case Intrinsic::x86_avx512_mask_div_ss_round:
1526d88c1a5aSDimitry Andric     case Intrinsic::x86_avx512_mask_mul_ss_round:
1527d88c1a5aSDimitry Andric     case Intrinsic::x86_avx512_mask_sub_ss_round:
1528d88c1a5aSDimitry Andric     case Intrinsic::x86_avx512_mask_max_ss_round:
1529d88c1a5aSDimitry Andric     case Intrinsic::x86_avx512_mask_min_ss_round:
1530d88c1a5aSDimitry Andric     case Intrinsic::x86_avx512_mask_add_sd_round:
1531d88c1a5aSDimitry Andric     case Intrinsic::x86_avx512_mask_div_sd_round:
1532d88c1a5aSDimitry Andric     case Intrinsic::x86_avx512_mask_mul_sd_round:
1533d88c1a5aSDimitry Andric     case Intrinsic::x86_avx512_mask_sub_sd_round:
1534d88c1a5aSDimitry Andric     case Intrinsic::x86_avx512_mask_max_sd_round:
1535d88c1a5aSDimitry Andric     case Intrinsic::x86_avx512_mask_min_sd_round:
1536*b5893f02SDimitry Andric       simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
1537d88c1a5aSDimitry Andric 
1538d88c1a5aSDimitry Andric       // If lowest element of a scalar op isn't used then use Arg0.
1539d88c1a5aSDimitry Andric       if (!DemandedElts[0]) {
1540d88c1a5aSDimitry Andric         Worklist.Add(II);
1541d88c1a5aSDimitry Andric         return II->getArgOperand(0);
1542d88c1a5aSDimitry Andric       }
1543d88c1a5aSDimitry Andric 
1544d88c1a5aSDimitry Andric       // Only lower element is used for operand 1 and 2.
1545d88c1a5aSDimitry Andric       DemandedElts = 1;
1546*b5893f02SDimitry Andric       simplifyAndSetOp(II, 1, DemandedElts, UndefElts2);
1547*b5893f02SDimitry Andric       simplifyAndSetOp(II, 2, DemandedElts, UndefElts3);
1548d88c1a5aSDimitry Andric 
1549d88c1a5aSDimitry Andric       // Lower element is undefined if all three lower elements are undefined.
1550d88c1a5aSDimitry Andric       // Consider things like undef&0.  The result is known zero, not undef.
1551d88c1a5aSDimitry Andric       if (!UndefElts2[0] || !UndefElts3[0])
1552d88c1a5aSDimitry Andric         UndefElts.clearBit(0);
1553d88c1a5aSDimitry Andric 
1554d88c1a5aSDimitry Andric       break;
1555d88c1a5aSDimitry Andric 
15567a7e6055SDimitry Andric     case Intrinsic::x86_sse2_packssdw_128:
15577a7e6055SDimitry Andric     case Intrinsic::x86_sse2_packsswb_128:
15587a7e6055SDimitry Andric     case Intrinsic::x86_sse2_packuswb_128:
15597a7e6055SDimitry Andric     case Intrinsic::x86_sse41_packusdw:
15607a7e6055SDimitry Andric     case Intrinsic::x86_avx2_packssdw:
15617a7e6055SDimitry Andric     case Intrinsic::x86_avx2_packsswb:
15627a7e6055SDimitry Andric     case Intrinsic::x86_avx2_packusdw:
15637a7e6055SDimitry Andric     case Intrinsic::x86_avx2_packuswb:
15647a7e6055SDimitry Andric     case Intrinsic::x86_avx512_packssdw_512:
15657a7e6055SDimitry Andric     case Intrinsic::x86_avx512_packsswb_512:
15667a7e6055SDimitry Andric     case Intrinsic::x86_avx512_packusdw_512:
15677a7e6055SDimitry Andric     case Intrinsic::x86_avx512_packuswb_512: {
15687a7e6055SDimitry Andric       auto *Ty0 = II->getArgOperand(0)->getType();
15697a7e6055SDimitry Andric       unsigned InnerVWidth = Ty0->getVectorNumElements();
15707a7e6055SDimitry Andric       assert(VWidth == (InnerVWidth * 2) && "Unexpected input size");
15717a7e6055SDimitry Andric 
15727a7e6055SDimitry Andric       unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
15737a7e6055SDimitry Andric       unsigned VWidthPerLane = VWidth / NumLanes;
15747a7e6055SDimitry Andric       unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
15757a7e6055SDimitry Andric 
15767a7e6055SDimitry Andric       // Per lane, pack the elements of the first input and then the second.
15777a7e6055SDimitry Andric       // e.g.
15787a7e6055SDimitry Andric       // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3])
15797a7e6055SDimitry Andric       // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15])
15807a7e6055SDimitry Andric       for (int OpNum = 0; OpNum != 2; ++OpNum) {
15817a7e6055SDimitry Andric         APInt OpDemandedElts(InnerVWidth, 0);
15827a7e6055SDimitry Andric         for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
15837a7e6055SDimitry Andric           unsigned LaneIdx = Lane * VWidthPerLane;
15847a7e6055SDimitry Andric           for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
15857a7e6055SDimitry Andric             unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
15867a7e6055SDimitry Andric             if (DemandedElts[Idx])
15877a7e6055SDimitry Andric               OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt);
15887a7e6055SDimitry Andric           }
15897a7e6055SDimitry Andric         }
15907a7e6055SDimitry Andric 
15917a7e6055SDimitry Andric         // Demand elements from the operand.
15927a7e6055SDimitry Andric         APInt OpUndefElts(InnerVWidth, 0);
1593*b5893f02SDimitry Andric         simplifyAndSetOp(II, OpNum, OpDemandedElts, OpUndefElts);
15947a7e6055SDimitry Andric 
15957a7e6055SDimitry Andric         // Pack the operand's UNDEF elements, one lane at a time.
15967a7e6055SDimitry Andric         OpUndefElts = OpUndefElts.zext(VWidth);
15977a7e6055SDimitry Andric         for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
15987a7e6055SDimitry Andric           APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);
15997a7e6055SDimitry Andric           LaneElts = LaneElts.getLoBits(InnerVWidthPerLane);
1600f37b6182SDimitry Andric           LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
16017a7e6055SDimitry Andric           UndefElts |= LaneElts;
16027a7e6055SDimitry Andric         }
16037a7e6055SDimitry Andric       }
16047a7e6055SDimitry Andric       break;
16057a7e6055SDimitry Andric     }
16067a7e6055SDimitry Andric 
16077a7e6055SDimitry Andric     // PSHUFB
16087a7e6055SDimitry Andric     case Intrinsic::x86_ssse3_pshuf_b_128:
16097a7e6055SDimitry Andric     case Intrinsic::x86_avx2_pshuf_b:
16107a7e6055SDimitry Andric     case Intrinsic::x86_avx512_pshuf_b_512:
16117a7e6055SDimitry Andric     // PERMILVAR
16127a7e6055SDimitry Andric     case Intrinsic::x86_avx_vpermilvar_ps:
16137a7e6055SDimitry Andric     case Intrinsic::x86_avx_vpermilvar_ps_256:
16147a7e6055SDimitry Andric     case Intrinsic::x86_avx512_vpermilvar_ps_512:
16157a7e6055SDimitry Andric     case Intrinsic::x86_avx_vpermilvar_pd:
16167a7e6055SDimitry Andric     case Intrinsic::x86_avx_vpermilvar_pd_256:
16177a7e6055SDimitry Andric     case Intrinsic::x86_avx512_vpermilvar_pd_512:
16187a7e6055SDimitry Andric     // PERMV
16197a7e6055SDimitry Andric     case Intrinsic::x86_avx2_permd:
16207a7e6055SDimitry Andric     case Intrinsic::x86_avx2_permps: {
1621*b5893f02SDimitry Andric       simplifyAndSetOp(II, 1, DemandedElts, UndefElts);
16227a7e6055SDimitry Andric       break;
16237a7e6055SDimitry Andric     }
16247a7e6055SDimitry Andric 
16257d523365SDimitry Andric     // SSE4A instructions leave the upper 64-bits of the 128-bit result
16267d523365SDimitry Andric     // in an undefined state.
16277d523365SDimitry Andric     case Intrinsic::x86_sse4a_extrq:
16287d523365SDimitry Andric     case Intrinsic::x86_sse4a_extrqi:
16297d523365SDimitry Andric     case Intrinsic::x86_sse4a_insertq:
16307d523365SDimitry Andric     case Intrinsic::x86_sse4a_insertqi:
16317a7e6055SDimitry Andric       UndefElts.setHighBits(VWidth / 2);
16327d523365SDimitry Andric       break;
16337a7e6055SDimitry Andric     case Intrinsic::amdgcn_buffer_load:
16346bc11b14SDimitry Andric     case Intrinsic::amdgcn_buffer_load_format:
1635*b5893f02SDimitry Andric     case Intrinsic::amdgcn_raw_buffer_load:
1636*b5893f02SDimitry Andric     case Intrinsic::amdgcn_raw_buffer_load_format:
1637*b5893f02SDimitry Andric     case Intrinsic::amdgcn_struct_buffer_load:
1638*b5893f02SDimitry Andric     case Intrinsic::amdgcn_struct_buffer_load_format:
16394ba319b5SDimitry Andric       return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts);
16404ba319b5SDimitry Andric     default: {
16414ba319b5SDimitry Andric       if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID()))
1642*b5893f02SDimitry Andric         return simplifyAMDGCNMemoryIntrinsicDemanded(
1643*b5893f02SDimitry Andric             II, DemandedElts, 0, II->getNumArgOperands() - 2);
16446bc11b14SDimitry Andric 
16456bc11b14SDimitry Andric       break;
16467a7e6055SDimitry Andric     }
1647*b5893f02SDimitry Andric     } // switch on IntrinsicID
1648f22ef01cSRoman Divacky     break;
1649*b5893f02SDimitry Andric   } // case Call
1650*b5893f02SDimitry Andric   } // switch on Opcode
1651*b5893f02SDimitry Andric 
1652*b5893f02SDimitry Andric   // TODO: We bail completely on integer div/rem and shifts because they have
1653*b5893f02SDimitry Andric   // UB/poison potential, but that should be refined.
1654*b5893f02SDimitry Andric   BinaryOperator *BO;
1655*b5893f02SDimitry Andric   if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) {
1656*b5893f02SDimitry Andric     simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
1657*b5893f02SDimitry Andric     simplifyAndSetOp(I, 1, DemandedElts, UndefElts2);
1658*b5893f02SDimitry Andric 
1659*b5893f02SDimitry Andric     // Any change to an instruction with potential poison must clear those flags
1660*b5893f02SDimitry Andric     // because we can not guarantee those constraints now. Other analysis may
1661*b5893f02SDimitry Andric     // determine that it is safe to re-apply the flags.
1662*b5893f02SDimitry Andric     if (MadeChange)
1663*b5893f02SDimitry Andric       BO->dropPoisonGeneratingFlags();
1664*b5893f02SDimitry Andric 
1665*b5893f02SDimitry Andric     // Output elements are undefined if both are undefined. Consider things
1666*b5893f02SDimitry Andric     // like undef & 0. The result is known zero, not undef.
1667*b5893f02SDimitry Andric     UndefElts &= UndefElts2;
1668f22ef01cSRoman Divacky   }
1669*b5893f02SDimitry Andric 
167091bc56edSDimitry Andric   return MadeChange ? I : nullptr;
1671f22ef01cSRoman Divacky }
1672