1 //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass implements IR expansion for reduction intrinsics, allowing targets 11 // to enable the experimental intrinsics until just before codegen. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/ExpandReductions.h" 16 #include "llvm/Analysis/TargetTransformInfo.h" 17 #include "llvm/CodeGen/Passes.h" 18 #include "llvm/IR/Function.h" 19 #include "llvm/IR/IRBuilder.h" 20 #include "llvm/IR/InstIterator.h" 21 #include "llvm/IR/IntrinsicInst.h" 22 #include "llvm/IR/Intrinsics.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/Pass.h" 25 #include "llvm/Transforms/Utils/LoopUtils.h" 26 27 using namespace llvm; 28 29 namespace { 30 31 unsigned getOpcode(Intrinsic::ID ID) { 32 switch (ID) { 33 case Intrinsic::experimental_vector_reduce_fadd: 34 return Instruction::FAdd; 35 case Intrinsic::experimental_vector_reduce_fmul: 36 return Instruction::FMul; 37 case Intrinsic::experimental_vector_reduce_add: 38 return Instruction::Add; 39 case Intrinsic::experimental_vector_reduce_mul: 40 return Instruction::Mul; 41 case Intrinsic::experimental_vector_reduce_and: 42 return Instruction::And; 43 case Intrinsic::experimental_vector_reduce_or: 44 return Instruction::Or; 45 case Intrinsic::experimental_vector_reduce_xor: 46 return Instruction::Xor; 47 case Intrinsic::experimental_vector_reduce_smax: 48 case Intrinsic::experimental_vector_reduce_smin: 49 case Intrinsic::experimental_vector_reduce_umax: 50 case Intrinsic::experimental_vector_reduce_umin: 51 return Instruction::ICmp; 52 case Intrinsic::experimental_vector_reduce_fmax: 53 case Intrinsic::experimental_vector_reduce_fmin: 54 return Instruction::FCmp; 55 default: 56 llvm_unreachable("Unexpected ID"); 57 } 58 } 59 60 RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) { 61 switch (ID) { 62 case Intrinsic::experimental_vector_reduce_smax: 63 return RecurrenceDescriptor::MRK_SIntMax; 64 case Intrinsic::experimental_vector_reduce_smin: 65 return RecurrenceDescriptor::MRK_SIntMin; 66 case Intrinsic::experimental_vector_reduce_umax: 67 return RecurrenceDescriptor::MRK_UIntMax; 68 case Intrinsic::experimental_vector_reduce_umin: 69 return RecurrenceDescriptor::MRK_UIntMin; 70 case Intrinsic::experimental_vector_reduce_fmax: 71 return RecurrenceDescriptor::MRK_FloatMax; 72 case Intrinsic::experimental_vector_reduce_fmin: 73 return RecurrenceDescriptor::MRK_FloatMin; 74 default: 75 return RecurrenceDescriptor::MRK_Invalid; 76 } 77 } 78 79 bool expandReductions(Function &F, const TargetTransformInfo *TTI) { 80 bool Changed = false; 81 SmallVector<IntrinsicInst *, 4> Worklist; 82 for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) 83 if (auto II = dyn_cast<IntrinsicInst>(&*I)) 84 Worklist.push_back(II); 85 86 for (auto *II : Worklist) { 87 IRBuilder<> Builder(II); 88 bool IsOrdered = false; 89 Value *Acc = nullptr; 90 Value *Vec = nullptr; 91 auto ID = II->getIntrinsicID(); 92 auto MRK = RecurrenceDescriptor::MRK_Invalid; 93 switch (ID) { 94 case Intrinsic::experimental_vector_reduce_fadd: 95 case Intrinsic::experimental_vector_reduce_fmul: 96 // FMFs must be attached to the call, otherwise it's an ordered reduction 97 // and it can't be handled by generating a shuffle sequence. 98 if (!II->getFastMathFlags().isFast()) 99 IsOrdered = true; 100 Acc = II->getArgOperand(0); 101 Vec = II->getArgOperand(1); 102 break; 103 case Intrinsic::experimental_vector_reduce_add: 104 case Intrinsic::experimental_vector_reduce_mul: 105 case Intrinsic::experimental_vector_reduce_and: 106 case Intrinsic::experimental_vector_reduce_or: 107 case Intrinsic::experimental_vector_reduce_xor: 108 case Intrinsic::experimental_vector_reduce_smax: 109 case Intrinsic::experimental_vector_reduce_smin: 110 case Intrinsic::experimental_vector_reduce_umax: 111 case Intrinsic::experimental_vector_reduce_umin: 112 case Intrinsic::experimental_vector_reduce_fmax: 113 case Intrinsic::experimental_vector_reduce_fmin: 114 Vec = II->getArgOperand(0); 115 MRK = getMRK(ID); 116 break; 117 default: 118 continue; 119 } 120 if (!TTI->shouldExpandReduction(II)) 121 continue; 122 Value *Rdx = 123 IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK) 124 : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); 125 II->replaceAllUsesWith(Rdx); 126 II->eraseFromParent(); 127 Changed = true; 128 } 129 return Changed; 130 } 131 132 class ExpandReductions : public FunctionPass { 133 public: 134 static char ID; 135 ExpandReductions() : FunctionPass(ID) { 136 initializeExpandReductionsPass(*PassRegistry::getPassRegistry()); 137 } 138 139 bool runOnFunction(Function &F) override { 140 const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 141 return expandReductions(F, TTI); 142 } 143 144 void getAnalysisUsage(AnalysisUsage &AU) const override { 145 AU.addRequired<TargetTransformInfoWrapperPass>(); 146 AU.setPreservesCFG(); 147 } 148 }; 149 } 150 151 char ExpandReductions::ID; 152 INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions", 153 "Expand reduction intrinsics", false, false) 154 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 155 INITIALIZE_PASS_END(ExpandReductions, "expand-reductions", 156 "Expand reduction intrinsics", false, false) 157 158 FunctionPass *llvm::createExpandReductionsPass() { 159 return new ExpandReductions(); 160 } 161 162 PreservedAnalyses ExpandReductionsPass::run(Function &F, 163 FunctionAnalysisManager &AM) { 164 const auto &TTI = AM.getResult<TargetIRAnalysis>(F); 165 if (!expandReductions(F, &TTI)) 166 return PreservedAnalyses::all(); 167 PreservedAnalyses PA; 168 PA.preserveSet<CFGAnalyses>(); 169 return PA; 170 } 171