1 //===------ ManualOptimizer.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handle pragma/metadata-directed transformations.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "polly/ManualOptimizer.h"
14 #include "polly/DependenceInfo.h"
15 #include "polly/Options.h"
16 #include "polly/ScheduleTreeTransform.h"
17 #include "polly/Support/ScopHelper.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
22 #include "llvm/IR/Metadata.h"
23 #include "llvm/Transforms/Utils/LoopUtils.h"
24 
25 #define DEBUG_TYPE "polly-opt-manual"
26 
27 using namespace polly;
28 using namespace llvm;
29 
30 namespace {
31 
32 static cl::opt<bool> IgnoreDepcheck(
33     "polly-pragma-ignore-depcheck",
34     cl::desc("Skip the dependency check for pragma-based transformations"),
35     cl::cat(PollyCategory));
36 
37 /// Same as llvm::hasUnrollTransformation(), but takes a LoopID as argument
38 /// instead of a Loop.
hasUnrollTransformation(MDNode * LoopID)39 static TransformationMode hasUnrollTransformation(MDNode *LoopID) {
40   if (getBooleanLoopAttribute(LoopID, "llvm.loop.unroll.disable"))
41     return TM_SuppressedByUser;
42 
43   Optional<int> Count =
44       getOptionalIntLoopAttribute(LoopID, "llvm.loop.unroll.count");
45   if (Count)
46     return Count.value() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
47 
48   if (getBooleanLoopAttribute(LoopID, "llvm.loop.unroll.enable"))
49     return TM_ForcedByUser;
50 
51   if (getBooleanLoopAttribute(LoopID, "llvm.loop.unroll.full"))
52     return TM_ForcedByUser;
53 
54   if (hasDisableAllTransformsHint(LoopID))
55     return TM_Disable;
56 
57   return TM_Unspecified;
58 }
59 
60 // Return the first DebugLoc in the list.
findFirstDebugLoc(MDNode * MD)61 static DebugLoc findFirstDebugLoc(MDNode *MD) {
62   if (MD) {
63     for (const MDOperand &X : drop_begin(MD->operands(), 1)) {
64       Metadata *A = X.get();
65       if (!isa<DILocation>(A))
66         continue;
67       return cast<DILocation>(A);
68     }
69   }
70 
71   return {};
72 }
73 
findTransformationDebugLoc(MDNode * LoopMD,StringRef Name)74 static DebugLoc findTransformationDebugLoc(MDNode *LoopMD, StringRef Name) {
75   // First find dedicated transformation location
76   // (such as the location of #pragma clang loop)
77   MDNode *MD = findOptionMDForLoopID(LoopMD, Name);
78   if (DebugLoc K = findFirstDebugLoc(MD))
79     return K;
80 
81   // Otherwise, fall back to the location of the loop itself
82   return findFirstDebugLoc(LoopMD);
83 }
84 
85 /// Apply full or partial unrolling.
applyLoopUnroll(MDNode * LoopMD,isl::schedule_node BandToUnroll)86 static isl::schedule applyLoopUnroll(MDNode *LoopMD,
87                                      isl::schedule_node BandToUnroll) {
88   TransformationMode UnrollMode = ::hasUnrollTransformation(LoopMD);
89   if (UnrollMode & TM_Disable)
90     return {};
91 
92   assert(!BandToUnroll.is_null());
93   // TODO: Isl's codegen also supports unrolling by isl_ast_build via
94   // isl_schedule_node_band_set_ast_build_options({ unroll[x] }) which would be
95   // more efficient because the content duplication is delayed. However, the
96   // unrolled loop could be input of another loop transformation which expects
97   // the explicit schedule nodes. That is, we would need this explicit expansion
98   // anyway and using the ISL codegen option is a compile-time optimization.
99   int64_t Factor =
100       getOptionalIntLoopAttribute(LoopMD, "llvm.loop.unroll.count").value_or(0);
101   bool Full = getBooleanLoopAttribute(LoopMD, "llvm.loop.unroll.full");
102   assert((!Full || !(Factor > 0)) &&
103          "Cannot unroll fully and partially at the same time");
104 
105   if (Full)
106     return applyFullUnroll(BandToUnroll);
107 
108   if (Factor > 0)
109     return applyPartialUnroll(BandToUnroll, Factor);
110 
111   // For heuristic unrolling, fall back to LLVM's LoopUnroll pass.
112   return {};
113 }
114 
applyLoopFission(MDNode * LoopMD,isl::schedule_node BandToFission)115 static isl::schedule applyLoopFission(MDNode *LoopMD,
116                                       isl::schedule_node BandToFission) {
117   // TODO: Make it possible to selectively fission substatements.
118   // TODO: Apply followup loop properties.
119   // TODO: Instead of fission every statement, find the maximum set that does
120   // not cause a dependency violation.
121   return applyMaxFission(BandToFission);
122 }
123 
124 // Return the properties from a LoopID. Scalar properties are ignored.
getLoopMDProps(MDNode * LoopMD)125 static auto getLoopMDProps(MDNode *LoopMD) {
126   return map_range(
127       make_filter_range(
128           drop_begin(LoopMD->operands(), 1),
129           [](const MDOperand &MDOp) { return isa<MDNode>(MDOp.get()); }),
130       [](const MDOperand &MDOp) { return cast<MDNode>(MDOp.get()); });
131 }
132 
133 /// Recursively visit all nodes in a schedule, loop for loop-transformations
134 /// metadata and apply the first encountered.
135 class SearchTransformVisitor final
136     : public RecursiveScheduleTreeVisitor<SearchTransformVisitor> {
137 private:
138   using BaseTy = RecursiveScheduleTreeVisitor<SearchTransformVisitor>;
getBase()139   BaseTy &getBase() { return *this; }
getBase() const140   const BaseTy &getBase() const { return *this; }
141 
142   polly::Scop *S;
143   const Dependences *D;
144   OptimizationRemarkEmitter *ORE;
145 
146   // Set after a transformation is applied. Recursive search must be aborted
147   // once this happens to ensure that any new followup transformation is
148   // transformed in innermost-first order.
149   isl::schedule Result;
150 
151   /// Check wether a schedule after a  transformation is legal. Return the old
152   /// schedule without the transformation.
153   isl::schedule
checkDependencyViolation(llvm::MDNode * LoopMD,llvm::Value * CodeRegion,const isl::schedule_node & OrigBand,StringRef DebugLocAttr,StringRef TransPrefix,StringRef RemarkName,StringRef TransformationName)154   checkDependencyViolation(llvm::MDNode *LoopMD, llvm::Value *CodeRegion,
155                            const isl::schedule_node &OrigBand,
156                            StringRef DebugLocAttr, StringRef TransPrefix,
157                            StringRef RemarkName, StringRef TransformationName) {
158     if (D->isValidSchedule(*S, Result))
159       return Result;
160 
161     LLVMContext &Ctx = LoopMD->getContext();
162     LLVM_DEBUG(dbgs() << "Dependency violation detected\n");
163 
164     DebugLoc TransformLoc = findTransformationDebugLoc(LoopMD, DebugLocAttr);
165 
166     if (IgnoreDepcheck) {
167       LLVM_DEBUG(dbgs() << "Still accepting transformation due to "
168                            "-polly-pragma-ignore-depcheck\n");
169       if (ORE) {
170         ORE->emit(
171             OptimizationRemark(DEBUG_TYPE, RemarkName, TransformLoc, CodeRegion)
172             << (Twine("Could not verify dependencies for ") +
173                 TransformationName +
174                 "; still applying because of -polly-pragma-ignore-depcheck")
175                    .str());
176       }
177       return Result;
178     }
179 
180     LLVM_DEBUG(dbgs() << "Rolling back transformation\n");
181 
182     if (ORE) {
183       ORE->emit(DiagnosticInfoOptimizationFailure(DEBUG_TYPE, RemarkName,
184                                                   TransformLoc, CodeRegion)
185                 << (Twine("not applying ") + TransformationName +
186                     ": cannot ensure semantic equivalence due to possible "
187                     "dependency violations")
188                        .str());
189     }
190 
191     // If illegal, revert and remove the transformation to not risk re-trying
192     // indefintely.
193     MDNode *NewLoopMD =
194         makePostTransformationMetadata(Ctx, LoopMD, {TransPrefix}, {});
195     BandAttr *Attr = getBandAttr(OrigBand);
196     Attr->Metadata = NewLoopMD;
197 
198     // Roll back old schedule.
199     return OrigBand.get_schedule();
200   }
201 
202 public:
SearchTransformVisitor(polly::Scop * S,const Dependences * D,OptimizationRemarkEmitter * ORE)203   SearchTransformVisitor(polly::Scop *S, const Dependences *D,
204                          OptimizationRemarkEmitter *ORE)
205       : S(S), D(D), ORE(ORE) {}
206 
applyOneTransformation(polly::Scop * S,const Dependences * D,OptimizationRemarkEmitter * ORE,const isl::schedule & Sched)207   static isl::schedule applyOneTransformation(polly::Scop *S,
208                                               const Dependences *D,
209                                               OptimizationRemarkEmitter *ORE,
210                                               const isl::schedule &Sched) {
211     SearchTransformVisitor Transformer(S, D, ORE);
212     Transformer.visit(Sched);
213     return Transformer.Result;
214   }
215 
visitBand(isl::schedule_node_band Band)216   void visitBand(isl::schedule_node_band Band) {
217     // Transform inner loops first (depth-first search).
218     getBase().visitBand(Band);
219     if (!Result.is_null())
220       return;
221 
222     // Since it is (currently) not possible to have a BandAttr marker that is
223     // specific to each loop in a band, we only support single-loop bands.
224     if (isl_schedule_node_band_n_member(Band.get()) != 1)
225       return;
226 
227     BandAttr *Attr = getBandAttr(Band);
228     if (!Attr) {
229       // Band has no attribute.
230       return;
231     }
232 
233     // CodeRegion used but ORE to determine code hotness.
234     // TODO: Works only for original loop; for transformed loops, should track
235     // where the loop's body code comes from.
236     Loop *Loop = Attr->OriginalLoop;
237     Value *CodeRegion = nullptr;
238     if (Loop)
239       CodeRegion = Loop->getHeader();
240 
241     MDNode *LoopMD = Attr->Metadata;
242     if (!LoopMD)
243       return;
244 
245     // Iterate over loop properties to find the first transformation.
246     // FIXME: If there are more than one transformation in the LoopMD (making
247     // the order of transformations ambiguous), all others are silently ignored.
248     for (MDNode *MD : getLoopMDProps(LoopMD)) {
249       auto *NameMD = dyn_cast<MDString>(MD->getOperand(0).get());
250       if (!NameMD)
251         continue;
252       StringRef AttrName = NameMD->getString();
253 
254       // Honor transformation order; transform the first transformation in the
255       // list first.
256       if (AttrName == "llvm.loop.unroll.enable" ||
257           AttrName == "llvm.loop.unroll.count" ||
258           AttrName == "llvm.loop.unroll.full") {
259         Result = applyLoopUnroll(LoopMD, Band);
260         if (!Result.is_null())
261           return;
262       } else if (AttrName == "llvm.loop.distribute.enable") {
263         Result = applyLoopFission(LoopMD, Band);
264         if (!Result.is_null())
265           Result = checkDependencyViolation(
266               LoopMD, CodeRegion, Band, "llvm.loop.distribute.loc",
267               "llvm.loop.distribute.", "FailedRequestedFission",
268               "loop fission/distribution");
269         if (!Result.is_null())
270           return;
271       }
272 
273       // not a loop transformation; look for next property
274     }
275   }
276 
visitNode(isl::schedule_node Other)277   void visitNode(isl::schedule_node Other) {
278     if (!Result.is_null())
279       return;
280     getBase().visitNode(Other);
281   }
282 };
283 
284 } // namespace
285 
286 isl::schedule
applyManualTransformations(Scop * S,isl::schedule Sched,const Dependences & D,OptimizationRemarkEmitter * ORE)287 polly::applyManualTransformations(Scop *S, isl::schedule Sched,
288                                   const Dependences &D,
289                                   OptimizationRemarkEmitter *ORE) {
290   // Search the loop nest for transformations until fixpoint.
291   while (true) {
292     isl::schedule Result =
293         SearchTransformVisitor::applyOneTransformation(S, &D, ORE, Sched);
294     if (Result.is_null()) {
295       // No (more) transformation has been found.
296       break;
297     }
298 
299     // Use transformed schedule and look for more transformations.
300     Sched = Result;
301   }
302 
303   return Sched;
304 }
305