1 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass looks for instructions that can be replaced by a Test Data Class
10 // instruction, and replaces them when profitable.
11 //
12 // Roughly, the following rules are recognized:
13 //
14 // 1: fcmp pred X, 0 -> tdc X, mask
15 // 2: fcmp pred X, +-inf -> tdc X, mask
16 // 3: fcmp pred X, +-minnorm -> tdc X, mask
17 // 4: tdc (fabs X), mask -> tdc X, newmask
18 // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
19 // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
20 // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
21 // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
22 // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
23 // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
24 //
25 // The pass works in 4 steps:
26 //
27 // 1. All fcmp and icmp instructions in a function are checked for a match
28 //    with rules 1-3 and 5-7.  Their TDC equivalents are stored in
29 //    the ConvertedInsts mapping.  If the operand of a fcmp instruction is
30 //    a fabs, it's also folded according to rule 4.
31 // 2. All and/or/xor i1 instructions whose both operands have been already
32 //    mapped are mapped according to rules 8-10.  LogicOpsWorklist is used
33 //    as a queue of instructions to check.
34 // 3. All mapped instructions that are considered worthy of conversion (ie.
35 //    replacing them will actually simplify the final code) are replaced
36 //    with a call to the s390.tdc intrinsic.
37 // 4. All intermediate results of replaced instructions are removed if unused.
38 //
39 // Instructions that match rules 1-3 are considered unworthy of conversion
40 // on their own (since a comparison instruction is superior), but are mapped
41 // in the hopes of folding the result using rules 4 and 8-10 (likely removing
42 // the original comparison in the process).
43 //
44 //===----------------------------------------------------------------------===//
45 
46 #include "SystemZ.h"
47 #include "SystemZSubtarget.h"
48 #include "llvm/ADT/MapVector.h"
49 #include "llvm/CodeGen/TargetPassConfig.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/IRBuilder.h"
52 #include "llvm/IR/InstIterator.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/IntrinsicsS390.h"
56 #include "llvm/IR/LegacyPassManager.h"
57 #include "llvm/IR/Module.h"
58 #include <deque>
59 #include <set>
60 
61 using namespace llvm;
62 
63 namespace llvm {
64   void initializeSystemZTDCPassPass(PassRegistry&);
65 }
66 
67 namespace {
68 
69 class SystemZTDCPass : public FunctionPass {
70 public:
71   static char ID;
72   SystemZTDCPass() : FunctionPass(ID) {
73     initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
74   }
75 
76   bool runOnFunction(Function &F) override;
77 
78   void getAnalysisUsage(AnalysisUsage &AU) const override {
79     AU.addRequired<TargetPassConfig>();
80  }
81 
82 private:
83   // Maps seen instructions that can be mapped to a TDC, values are
84   // (TDC operand, TDC mask, worthy flag) triples.
85   MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts;
86   // The queue of and/or/xor i1 instructions to be potentially folded.
87   std::vector<BinaryOperator *> LogicOpsWorklist;
88   // Instructions matched while folding, to be removed at the end if unused.
89   std::set<Instruction *> PossibleJunk;
90 
91   // Tries to convert a fcmp instruction.
92   void convertFCmp(CmpInst &I);
93 
94   // Tries to convert an icmp instruction.
95   void convertICmp(CmpInst &I);
96 
97   // Tries to convert an i1 and/or/xor instruction, whose both operands
98   // have been already converted.
99   void convertLogicOp(BinaryOperator &I);
100 
101   // Marks an instruction as converted - adds it to ConvertedInsts and adds
102   // any and/or/xor i1 users to the queue.
103   void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
104     ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
105     auto &M = *I->getFunction()->getParent();
106     auto &Ctx = M.getContext();
107     for (auto *U : I->users()) {
108       auto *LI = dyn_cast<BinaryOperator>(U);
109       if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
110           (LI->getOpcode() == Instruction::And ||
111            LI->getOpcode() == Instruction::Or ||
112            LI->getOpcode() == Instruction::Xor)) {
113         LogicOpsWorklist.push_back(LI);
114       }
115     }
116   }
117 };
118 
119 } // end anonymous namespace
120 
121 char SystemZTDCPass::ID = 0;
122 INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
123                 "SystemZ Test Data Class optimization", false, false)
124 
125 FunctionPass *llvm::createSystemZTDCPass() {
126   return new SystemZTDCPass();
127 }
128 
129 void SystemZTDCPass::convertFCmp(CmpInst &I) {
130   Value *Op0 = I.getOperand(0);
131   auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
132   auto Pred = I.getPredicate();
133   // Only comparisons with consts are interesting.
134   if (!Const)
135     return;
136   // Compute the smallest normal number (and its negation).
137   auto &Sem = Op0->getType()->getFltSemantics();
138   APFloat Smallest = APFloat::getSmallestNormalized(Sem);
139   APFloat NegSmallest = Smallest;
140   NegSmallest.changeSign();
141   // Check if Const is one of our recognized consts.
142   int WhichConst;
143   if (Const->isZero()) {
144     // All comparisons with 0 can be converted.
145     WhichConst = 0;
146   } else if (Const->isInfinity()) {
147     // Likewise for infinities.
148     WhichConst = Const->isNegative() ? 2 : 1;
149   } else if (Const->isExactlyValue(Smallest)) {
150     // For Smallest, we cannot do EQ separately from GT.
151     if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
152         (Pred & CmpInst::FCMP_OGE) != 0)
153       return;
154     WhichConst = 3;
155   } else if (Const->isExactlyValue(NegSmallest)) {
156     // Likewise for NegSmallest, we cannot do EQ separately from LT.
157     if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
158         (Pred & CmpInst::FCMP_OLE) != 0)
159       return;
160     WhichConst = 4;
161   } else {
162     // Not one of our special constants.
163     return;
164   }
165   // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
166   static const int Masks[][4] = {
167     { // 0
168       SystemZ::TDCMASK_ZERO,              // eq
169       SystemZ::TDCMASK_POSITIVE,          // gt
170       SystemZ::TDCMASK_NEGATIVE,          // lt
171       SystemZ::TDCMASK_NAN,               // un
172     },
173     { // inf
174       SystemZ::TDCMASK_INFINITY_PLUS,     // eq
175       0,                                  // gt
176       (SystemZ::TDCMASK_ZERO |
177        SystemZ::TDCMASK_NEGATIVE |
178        SystemZ::TDCMASK_NORMAL_PLUS |
179        SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
180       SystemZ::TDCMASK_NAN,               // un
181     },
182     { // -inf
183       SystemZ::TDCMASK_INFINITY_MINUS,    // eq
184       (SystemZ::TDCMASK_ZERO |
185        SystemZ::TDCMASK_POSITIVE |
186        SystemZ::TDCMASK_NORMAL_MINUS |
187        SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
188       0,                                  // lt
189       SystemZ::TDCMASK_NAN,               // un
190     },
191     { // minnorm
192       0,                                  // eq (unsupported)
193       (SystemZ::TDCMASK_NORMAL_PLUS |
194        SystemZ::TDCMASK_INFINITY_PLUS),   // gt (actually ge)
195       (SystemZ::TDCMASK_ZERO |
196        SystemZ::TDCMASK_NEGATIVE |
197        SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
198       SystemZ::TDCMASK_NAN,               // un
199     },
200     { // -minnorm
201       0,                                  // eq (unsupported)
202       (SystemZ::TDCMASK_ZERO |
203        SystemZ::TDCMASK_POSITIVE |
204        SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
205       (SystemZ::TDCMASK_NORMAL_MINUS |
206        SystemZ::TDCMASK_INFINITY_MINUS),  // lt (actually le)
207       SystemZ::TDCMASK_NAN,               // un
208     }
209   };
210   // Construct the mask as a combination of the partial masks.
211   int Mask = 0;
212   if (Pred & CmpInst::FCMP_OEQ)
213     Mask |= Masks[WhichConst][0];
214   if (Pred & CmpInst::FCMP_OGT)
215     Mask |= Masks[WhichConst][1];
216   if (Pred & CmpInst::FCMP_OLT)
217     Mask |= Masks[WhichConst][2];
218   if (Pred & CmpInst::FCMP_UNO)
219     Mask |= Masks[WhichConst][3];
220   // A lone fcmp is unworthy of tdc conversion on its own, but may become
221   // worthy if combined with fabs.
222   bool Worthy = false;
223   if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
224     Function *F = CI->getCalledFunction();
225     if (F && F->getIntrinsicID() == Intrinsic::fabs) {
226       // Fold with fabs - adjust the mask appropriately.
227       Mask &= SystemZ::TDCMASK_PLUS;
228       Mask |= Mask >> 1;
229       Op0 = CI->getArgOperand(0);
230       // A combination of fcmp with fabs is a win, unless the constant
231       // involved is 0 (which is handled by later passes).
232       Worthy = WhichConst != 0;
233       PossibleJunk.insert(CI);
234     }
235   }
236   converted(&I, Op0, Mask, Worthy);
237 }
238 
239 void SystemZTDCPass::convertICmp(CmpInst &I) {
240   Value *Op0 = I.getOperand(0);
241   auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
242   auto Pred = I.getPredicate();
243   // All our icmp rules involve comparisons with consts.
244   if (!Const)
245     return;
246   if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
247     // Check for icmp+bitcast used for signbit.
248     if (!Cast->getSrcTy()->isFloatTy() &&
249         !Cast->getSrcTy()->isDoubleTy() &&
250         !Cast->getSrcTy()->isFP128Ty())
251       return;
252     Value *V = Cast->getOperand(0);
253     int Mask;
254     if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
255       // icmp slt (bitcast X), 0 - set if sign bit true
256       Mask = SystemZ::TDCMASK_MINUS;
257     } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
258       // icmp sgt (bitcast X), -1 - set if sign bit false
259       Mask = SystemZ::TDCMASK_PLUS;
260     } else {
261       // Not a sign bit check.
262       return;
263     }
264     PossibleJunk.insert(Cast);
265     converted(&I, V, Mask, true);
266   } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
267     // Check if this is a pre-existing call of our tdc intrinsic.
268     Function *F = CI->getCalledFunction();
269     if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
270       return;
271     if (!Const->isZero())
272       return;
273     Value *V = CI->getArgOperand(0);
274     auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
275     // Bail if the mask is not a constant.
276     if (!MaskC)
277       return;
278     int Mask = MaskC->getZExtValue();
279     Mask &= SystemZ::TDCMASK_ALL;
280     if (Pred == CmpInst::ICMP_NE) {
281       // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
282     } else if (Pred == CmpInst::ICMP_EQ) {
283       // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
284       Mask ^= SystemZ::TDCMASK_ALL;
285     } else {
286       // An unknown comparison - ignore.
287       return;
288     }
289     PossibleJunk.insert(CI);
290     converted(&I, V, Mask, false);
291   }
292 }
293 
294 void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
295   Value *Op0, *Op1;
296   int Mask0, Mask1;
297   bool Worthy0, Worthy1;
298   std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
299   std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
300   if (Op0 != Op1)
301     return;
302   int Mask;
303   switch (I.getOpcode()) {
304     case Instruction::And:
305       Mask = Mask0 & Mask1;
306       break;
307     case Instruction::Or:
308       Mask = Mask0 | Mask1;
309       break;
310     case Instruction::Xor:
311       Mask = Mask0 ^ Mask1;
312       break;
313     default:
314       llvm_unreachable("Unknown op in convertLogicOp");
315   }
316   converted(&I, Op0, Mask, true);
317 }
318 
319 bool SystemZTDCPass::runOnFunction(Function &F) {
320   auto &TPC = getAnalysis<TargetPassConfig>();
321   if (TPC.getTM<TargetMachine>()
322           .getSubtarget<SystemZSubtarget>(F)
323           .hasSoftFloat())
324     return false;
325 
326   ConvertedInsts.clear();
327   LogicOpsWorklist.clear();
328   PossibleJunk.clear();
329 
330   // Look for icmp+fcmp instructions.
331   for (auto &I : instructions(F)) {
332     if (I.getOpcode() == Instruction::FCmp)
333       convertFCmp(cast<CmpInst>(I));
334     else if (I.getOpcode() == Instruction::ICmp)
335       convertICmp(cast<CmpInst>(I));
336   }
337 
338   // If none found, bail already.
339   if (ConvertedInsts.empty())
340     return false;
341 
342   // Process the queue of logic instructions.
343   while (!LogicOpsWorklist.empty()) {
344     BinaryOperator *Op = LogicOpsWorklist.back();
345     LogicOpsWorklist.pop_back();
346     // If both operands mapped, and the instruction itself not yet mapped,
347     // convert it.
348     if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
349         ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
350         !ConvertedInsts.count(Op))
351       convertLogicOp(*Op);
352   }
353 
354   // Time to actually replace the instructions.  Do it in the reverse order
355   // of finding them, since there's a good chance the earlier ones will be
356   // unused (due to being folded into later ones).
357   Module &M = *F.getParent();
358   auto &Ctx = M.getContext();
359   Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
360   bool MadeChange = false;
361   for (auto &It : reverse(ConvertedInsts)) {
362     Instruction *I = It.first;
363     Value *V;
364     int Mask;
365     bool Worthy;
366     std::tie(V, Mask, Worthy) = It.second;
367     if (!I->user_empty()) {
368       // If used and unworthy of conversion, skip it.
369       if (!Worthy)
370         continue;
371       // Call the intrinsic, compare result with 0.
372       Function *TDCFunc =
373           Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, V->getType());
374       IRBuilder<> IRB(I);
375       Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
376       Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
377       Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
378       I->replaceAllUsesWith(ICmp);
379     }
380     // If unused, or used and converted, remove it.
381     I->eraseFromParent();
382     MadeChange = true;
383   }
384 
385   if (!MadeChange)
386     return false;
387 
388   // We've actually done something - now clear misc accumulated junk (fabs,
389   // bitcast).
390   for (auto *I : PossibleJunk)
391     if (I->user_empty())
392       I->eraseFromParent();
393 
394   return true;
395 }
396