1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a model runner using Tensorflow C APIs, allowing the
11 // loading of a model from a command line option.
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Config/config.h"
15 #if defined(LLVM_HAVE_TF_API)
16 
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
19 #include "llvm/Analysis/MLInlineAdvisor.h"
20 #include "llvm/Analysis/Utils/TFUtils.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/ManagedStatic.h"
24 
25 #include <vector>
26 
27 using namespace llvm;
28 
29 static cl::opt<std::string> TrainingLog(
30     "training-log", cl::Hidden,
31     cl::desc("Path where the development - mode inlining log is saved."));
32 
33 static cl::opt<std::string> TFModelUnderTrainingPath(
34     "ml-inliner-model-under-training", cl::Hidden,
35     cl::desc("Path to SavedModel from the previous training iteration."));
36 
37 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
38                                          cl::Hidden, cl::init("action_"),
39                                          cl::desc("Prefix for feature names."));
40 
41 static cl::opt<std::string> TFDecisionName(
42     "ml-inliner-trained-model-decision-name", cl::Hidden,
43     cl::init("StatefulPartitionedCall"),
44     cl::desc("Name of the graph operation representing the decision."));
45 
46 namespace {
47 /// An InlineEvent, used by TrainingLogger.
48 struct InlineEvent {
49   /// What the default policy's decision would have been.
50   bool DefaultDecision = false;
51 
52   /// What we advised. When training off the default policy, this is the same as
53   /// DefaultDecision.
54   bool AdvisedDecision = false;
55 
56   /// What actually happened. This would be 'false' in the case of an inline
57   /// error, even if AdvisedDecision were true, otherwise it agrees with
58   /// AdvisedDecision.
59   bool Effect = false;
60 
61   /// What the change in size was: size_after - size_before
62   int64_t Reward = 0;
63 };
64 
65 /// Collect data we may use for training a model, and write it as a textual
66 /// Tensorflow SequenceExample
67 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample)
68 /// protobuf (https://developers.google.com/protocol-buffers).
69 /// Because this is a protobuf, we cannot just stream the events as they come.
70 /// Internally, TrainingLogger stores data in column-major format, because that
71 /// lines up with how TF SequenceExample represents it.
72 class TrainingLogger final {
73 public:
74   TrainingLogger();
75 
76   /// Log one inlining event.
77   void logInlineEvent(const InlineEvent &Event,
78                       const MLModelRunner &ModelRunner);
79 
80   /// Print the stored tensors.
81   void print(raw_fd_ostream &OutFile);
82 
83 private:
84   template <typename T>
85   void writeTensor(raw_fd_ostream &OutFile, StringRef TensorName,
86                    const std::vector<T> &Tensor);
87 
88   std::vector<InlineFeatures> Features;
89   std::vector<bool> DefaultDecisions;
90   std::vector<bool> Decisions;
91   std::vector<bool> Effects;
92   std::vector<int64_t> Rewards;
93   std::vector<bool> Mandatory;
94 };
95 
96 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
97 /// the offline training scenario. Note that training happens outside of the
98 /// compiler, this facility is concerned with producing training data ("logs").
99 /// This InlineAdvisor can operate in the following modes:
100 ///
101 /// 1) collect logs for the default policy. This is useful for bootstrapping
102 /// training, which will be considerably faster by starting from a reasonable
103 /// policy.
104 ///
105 /// 2) collect logs for the ML policy, using a model from a previous
106 /// training. Potentially, that model uses internally some small random
107 /// perturbation of its weights, to induce exploration (setting this up is the
108 /// responsibility of the training algorithm). The logs would then be used to
109 /// retrain and improve on this model.
110 ///
111 /// 3) use the provided model, with no logging. This is useful for end to end
112 /// validation - the model, in this case, is a release candidate and shouldn't
113 /// have random perturbations. It is a convenience feature: rather than needing
114 /// to take the release candidate model and compile it in 'release' mode,
115 /// validate it, then potentially discard it, it's easier to just pass the model
116 /// to the compiler, albeit compilation would be slower, as a one-off. Once the
117 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
118 /// release mode. The expectation is that a well-trained model provides a good
119 /// policy over a sufficiently diverse codebase, over many changes (i.e.
120 /// training happens seldom).
121 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
122 public:
123   DevelopmentModeMLInlineAdvisor(
124       Module &M, ModuleAnalysisManager &MAM,
125       std::unique_ptr<MLModelRunner> ModelRunner,
126       std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference);
127 
128   size_t getTotalSizeEstimate();
129 
130   virtual ~DevelopmentModeMLInlineAdvisor();
131   void updateNativeSizeEstimate(int64_t Change) { CurrentNativeSize += Change; }
132   void resetNativeSize(Function *F) {
133     FAM.invalidate<InlineSizeEstimatorAnalysis>(*F);
134   }
135 
136   std::unique_ptr<MLInlineAdvice>
137   getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
138   std::unique_ptr<MLInlineAdvice>
139   getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
140 
141   size_t getNativeSizeEstimate(const Function &F) const;
142 
143 private:
144   bool isLogging() const { return !TrainingLog.empty(); }
145 
146   std::function<bool(CallBase &)> GetDefaultAdvice;
147   TrainingLogger Logger;
148   const bool IsDoingInference;
149 
150   const int32_t InitialNativeSize;
151   int32_t CurrentNativeSize = 0;
152 };
153 
154 /// A variant of MLInlineAdvice that tracks all non-trivial inlining
155 /// decisions, for training/logging.
156 class LoggingMLInlineAdvice : public MLInlineAdvice {
157 public:
158   LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
159                         OptimizationRemarkEmitter &ORE, bool Recommendation,
160                         TrainingLogger &Logger, size_t CallerSizeEstimateBefore,
161                         size_t CalleeSizeEstimateBefore, bool DefaultDecision)
162       : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
163         CallerSizeEstimateBefore(CallerSizeEstimateBefore),
164         CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
165         DefaultDecision(DefaultDecision) {}
166 
167   virtual ~LoggingMLInlineAdvice() = default;
168 
169 private:
170   DevelopmentModeMLInlineAdvisor *getAdvisor() const {
171     return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
172   }
173   void recordInliningImpl() override {
174     MLInlineAdvice::recordInliningImpl();
175     getAdvisor()->resetNativeSize(Caller);
176     int Reward = std::numeric_limits<int>::max();
177     if (!getAdvisor()->isForcedToStop()) {
178       int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller) +
179                             CalleeSizeEstimateBefore;
180       Reward = NativeSizeAfter -
181                (CallerSizeEstimateBefore + CalleeSizeEstimateBefore);
182       getAdvisor()->updateNativeSizeEstimate(Reward);
183     }
184     log(Reward, /*Success=*/true);
185   }
186 
187   void recordInliningWithCalleeDeletedImpl() override {
188     MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
189     getAdvisor()->resetNativeSize(Caller);
190     if (!getAdvisor()->isForcedToStop()) {
191       int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller);
192       int Reward = NativeSizeAfter -
193                    (CallerSizeEstimateBefore + CalleeSizeEstimateBefore);
194       getAdvisor()->updateNativeSizeEstimate(Reward);
195       log(Reward, /*Success=*/true);
196     }
197   }
198 
199   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
200     MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
201     log(NoReward, /*Success=*/false);
202   }
203 
204   void recordUnattemptedInliningImpl() override {
205     MLInlineAdvice::recordUnattemptedInliningImpl();
206     log(NoReward, /*Success=*/false);
207   }
208 
209   void log(int64_t Reward, bool Success) {
210     InlineEvent Event;
211     Event.AdvisedDecision = isInliningRecommended();
212     Event.DefaultDecision = DefaultDecision;
213     Event.Effect = Success;
214     Event.Reward = Reward;
215     Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
216   }
217 
218   static const int64_t NoReward = 0;
219   TrainingLogger &Logger;
220   const size_t CallerSizeEstimateBefore;
221   const size_t CalleeSizeEstimateBefore;
222   const bool DefaultDecision;
223 };
224 
225 /// A pseudo model runner. We use it to store feature values when collecting
226 /// logs for the default policy, but never ask it to 'run'.
227 class NoInferenceModelRunner : public MLModelRunner {
228 public:
229   NoInferenceModelRunner(LLVMContext &Ctx)
230       : MLModelRunner(Ctx), Features(NumberOfFeatures) {}
231   void setFeature(FeatureIndex Index, int64_t Value) override {
232     Features[static_cast<int>(Index)] = Value;
233   }
234 
235   int64_t getFeature(int Index) const override { return Features[Index]; }
236   bool run() override {
237     llvm_unreachable("We shouldn't call run on this model runner.");
238   }
239 
240 private:
241   InlineFeatures Features;
242 };
243 
244 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs
245 /// to dynamically load and evaluate a TF SavedModel
246 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is
247 /// sacrificed for ease of use while training.
248 class ModelUnderTrainingRunner final : public MLModelRunner {
249 public:
250   ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath);
251 
252   bool run() override;
253 
254   // Disallows copy and assign.
255   ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete;
256   ModelUnderTrainingRunner &
257   operator=(const ModelUnderTrainingRunner &) = delete;
258 
259   void setFeature(FeatureIndex Index, int64_t Value) override;
260   int64_t getFeature(int Index) const override;
261   bool isValid() const { return !!Evaluator; }
262 
263 private:
264   std::unique_ptr<TFModelEvaluator> Evaluator;
265 
266   // The training framework needs some additional features.
267   const std::vector<TensorSpec> TrainingOnlyFeatures{
268       TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
269       TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
270       TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
271       TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
272 };
273 } // namespace
274 
275 TrainingLogger::TrainingLogger() {
276   for (size_t I = 0; I < NumberOfFeatures; ++I) {
277     Features.push_back(InlineFeatures());
278   }
279 }
280 
281 /// Log one inlining event.
282 void TrainingLogger::logInlineEvent(const InlineEvent &Event,
283                                     const MLModelRunner &ModelRunner) {
284   for (size_t I = 0; I < NumberOfFeatures; ++I) {
285     Features[I].push_back(ModelRunner.getFeature(I));
286   }
287   Decisions.push_back(Event.AdvisedDecision);
288   Effects.push_back(Event.Effect);
289   Rewards.push_back(Event.Reward);
290   DefaultDecisions.push_back(Event.DefaultDecision);
291 }
292 
293 void TrainingLogger::print(raw_fd_ostream &OutFile) {
294   if (DefaultDecisions.empty())
295     return;
296   OutFile << "feature_lists: {\n";
297 
298   for (size_t I = 0; I < Features.size(); I++) {
299     writeTensor(OutFile, FeatureNameMap.at(I), Features[I]);
300   }
301   writeTensor(OutFile, DefaultDecisionName, DefaultDecisions);
302   writeTensor(OutFile, DecisionName, Decisions);
303   writeTensor(OutFile, RewardName, Rewards);
304 
305   OutFile << "}\n";
306 }
307 
308 template <typename T>
309 void TrainingLogger::writeTensor(raw_fd_ostream &OutFile, StringRef TensorName,
310                                  const std::vector<T> &Tensor) {
311   OutFile << "  feature_list: {\n";
312   OutFile << "    key: "
313           << "\"" << TensorName << "\" ";
314   OutFile << "value: {\n";
315   for (const auto &Feature : Tensor) {
316     OutFile << "      feature: { int64_list: { value: [" << Feature
317             << "] } }\n";
318   }
319   OutFile << "    }\n";
320   OutFile << "  }\n";
321 }
322 
323 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
324     Module &M, ModuleAnalysisManager &MAM,
325     std::unique_ptr<MLModelRunner> ModelRunner,
326     std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference)
327     : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
328       GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference),
329       InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
330       CurrentNativeSize(InitialNativeSize) {
331   // We cannot have the case of neither inference nor logging.
332   assert(IsDoingInference || isLogging());
333 }
334 
335 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() {
336   if (TrainingLog.empty())
337     return;
338   std::error_code ErrorCode;
339   raw_fd_ostream OutFile(TrainingLog, ErrorCode);
340   Logger.print(OutFile);
341 }
342 
343 size_t
344 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
345   auto &R =
346       FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
347   if (!R) {
348     F.getParent()->getContext().emitError(
349         "Native size estimator is not present.");
350     return 0;
351   }
352   return *R;
353 }
354 
355 std::unique_ptr<MLInlineAdvice>
356 DevelopmentModeMLInlineAdvisor::getMandatoryAdvice(
357     CallBase &CB, OptimizationRemarkEmitter &ORE) {
358   if (!isLogging())
359     return MLInlineAdvisor::getMandatoryAdvice(CB, ORE);
360   return std::make_unique<LoggingMLInlineAdvice>(
361       /*Advisor=*/this,
362       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/true, /*Logger=*/Logger,
363       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
364       /*CalleeSizeEstimateBefore=*/
365       getNativeSizeEstimate(*CB.getCalledFunction()),
366       /*DefaultDecision=*/true);
367 }
368 
369 std::unique_ptr<MLInlineAdvice>
370 DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
371     CallBase &CB, OptimizationRemarkEmitter &ORE) {
372   if (IsDoingInference && !isLogging())
373     return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
374 
375   bool DefaultAdvice = GetDefaultAdvice(CB);
376   auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice;
377   return std::make_unique<LoggingMLInlineAdvice>(
378       /*Advisor=*/this,
379       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
380       /*Logger=*/Logger,
381       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
382       /*CalleeSizeEstimateBefore=*/
383       getNativeSizeEstimate(*CB.getCalledFunction()),
384       /*DefaultDecision=*/DefaultAdvice);
385 }
386 
387 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
388   size_t Ret = 0;
389   for (auto &F : M) {
390     if (F.isDeclaration())
391       continue;
392     if (isFunctionDeleted(&F))
393       continue;
394     Ret += getNativeSizeEstimate(F);
395   }
396   return Ret;
397 }
398 
399 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
400                                                    const std::string &ModelPath)
401     : MLModelRunner(Ctx) {
402   std::vector<TensorSpec> InputSpecs;
403   std::vector<TensorSpec> OutputSpecs;
404   for (size_t I = 0; I < NumberOfFeatures; ++I)
405     InputSpecs.push_back(
406         TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1}));
407   InputSpecs.insert(InputSpecs.end(), TrainingOnlyFeatures.begin(),
408                     TrainingOnlyFeatures.end());
409   OutputSpecs.push_back(TensorSpec::createSpec<int64_t>(TFDecisionName, {1}));
410 
411   Evaluator =
412       std::make_unique<TFModelEvaluator>(ModelPath, InputSpecs, OutputSpecs);
413   if (!Evaluator || !Evaluator->isValid()) {
414     Ctx.emitError("Failed to create inliner saved model evaluator");
415     Evaluator.reset();
416     return;
417   }
418 }
419 
420 bool ModelUnderTrainingRunner::run() {
421   auto ER = Evaluator->evaluate();
422   if (!ER.hasValue()) {
423     Ctx.emitError("Error evaluating model.");
424     return false;
425   }
426   int64_t Decision = *ER->getTensorValue<int64_t>(0);
427   return static_cast<bool>(Decision);
428 }
429 
430 int64_t ModelUnderTrainingRunner::getFeature(int Index) const {
431   return *Evaluator->getInput<int64_t>(Index);
432 }
433 
434 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) {
435   size_t NumericIndex = static_cast<size_t>(Index);
436   *(Evaluator->getInput<int64_t>(NumericIndex)) = Value;
437 }
438 
439 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
440     Module &M, ModuleAnalysisManager &MAM,
441     std::function<bool(CallBase &)> GetDefaultAdvice) {
442   auto &Ctx = M.getContext();
443   if (TrainingLog.empty() !=
444       !InlineSizeEstimatorAnalysis::isEvaluatorRequested()) {
445     Ctx.emitError("For development mode, if training logs are requested, then "
446                   "a size estimator must be available; either that, or neither "
447                   "are specified.");
448     return nullptr;
449   }
450 
451   std::unique_ptr<MLModelRunner> Runner;
452 
453   bool IsDoingInference = false;
454   if (TFModelUnderTrainingPath.empty())
455     Runner.reset(new NoInferenceModelRunner(Ctx));
456   else {
457     Runner = std::make_unique<ModelUnderTrainingRunner>(
458         Ctx, TFModelUnderTrainingPath);
459     if (!Runner) {
460       Ctx.emitError("Could not load the policy model from the provided path");
461       return nullptr;
462     }
463     IsDoingInference = true;
464   }
465   return std::make_unique<DevelopmentModeMLInlineAdvisor>(
466       M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference);
467 }
468 #endif // defined(LLVM_HAVE_TF_API)
469