1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a model runner using Tensorflow C APIs, allowing the
11 // loading of a model from a command line option.
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Config/config.h"
15 #if defined(LLVM_HAVE_TF_API)
16 
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
19 #include "llvm/Analysis/MLInlineAdvisor.h"
20 #include "llvm/Analysis/Utils/TFUtils.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/ManagedStatic.h"
24 
25 #include <vector>
26 
27 using namespace llvm;
28 
29 static cl::opt<std::string> TrainingLog(
30     "training-log", cl::Hidden,
31     cl::desc("Path where the development - mode inlining log is saved."));
32 
33 static cl::opt<std::string> TFModelUnderTrainingPath(
34     "ml-inliner-model-under-training", cl::Hidden,
35     cl::desc("Path to SavedModel from the previous training iteration."));
36 
37 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
38                                          cl::Hidden, cl::init("action_"),
39                                          cl::desc("Prefix for feature names."));
40 
41 static cl::opt<std::string> TFDecisionName(
42     "ml-inliner-trained-model-decision-name", cl::Hidden,
43     cl::init("StatefulPartitionedCall"),
44     cl::desc("Name of the graph operation representing the decision."));
45 
46 namespace {
47 /// An InlineEvent, used by TrainingLogger.
48 struct InlineEvent {
49   /// What the default policy's decision would have been.
50   bool DefaultDecision = false;
51 
52   /// What we advised. When training off the default policy, this is the same as
53   /// DefaultDecision.
54   bool AdvisedDecision = false;
55 
56   /// What actually happened. This would be 'false' in the case of an inline
57   /// error, even if AdvisedDecision were true, otherwise it agrees with
58   /// AdvisedDecision.
59   bool Effect = false;
60 
61   /// What the change in size was: size_after - size_before
62   int64_t Reward = 0;
63 };
64 
65 /// Collect data we may use for training a model, and write it as a textual
66 /// Tensorflow SequenceExample
67 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample)
68 /// protobuf (https://developers.google.com/protocol-buffers).
69 /// Because this is a protobuf, we cannot just stream the events as they come.
70 /// Internally, TrainingLogger stores data in column-major format, because that
71 /// lines up with how TF SequenceExample represents it.
72 class TrainingLogger final {
73 public:
74   TrainingLogger() {
75     for (size_t I = 0; I < NumberOfFeatures; ++I) {
76       Features.push_back(InlineFeatures());
77     }
78   }
79 
80   /// Log one inlining event.
81   void logInlineEvent(const InlineEvent &Event,
82                       const MLModelRunner &ModelRunner) {
83     for (size_t I = 0; I < NumberOfFeatures; ++I) {
84       Features[I].push_back(ModelRunner.getFeature(I));
85     }
86     Decisions.push_back(Event.AdvisedDecision);
87     Effects.push_back(Event.Effect);
88     Rewards.push_back(Event.Reward);
89     DefaultDecisions.push_back(Event.DefaultDecision);
90   }
91 
92   void printTensor(raw_fd_ostream &OutFile) {
93     if (DefaultDecisions.empty())
94       return;
95     OutFile << "feature_lists: {\n";
96 
97     for (size_t I = 0; I < Features.size(); I++) {
98       writeTensor(OutFile, FeatureNameMap.at(I), Features[I]);
99     }
100     writeTensor(OutFile, DefaultDecisionName, DefaultDecisions);
101     writeTensor(OutFile, DecisionName, Decisions);
102     writeTensor(OutFile, RewardName, Rewards);
103 
104     OutFile << "}\n";
105   }
106 
107 private:
108   template <typename T>
109   void writeTensor(raw_fd_ostream &OutFile, StringRef TensorName,
110                    const std::vector<T> &Tensor) {
111     OutFile << "  feature_list: {\n";
112     OutFile << "    key: "
113             << "\"" << TensorName << "\" ";
114     OutFile << "value: {\n";
115     for (const auto &Feature : Tensor) {
116       OutFile << "      feature: { int64_list: { value: [" << Feature
117               << "] } }\n";
118     }
119     OutFile << "    }\n";
120     OutFile << "  }\n";
121   }
122 
123   std::vector<InlineFeatures> Features;
124   std::vector<bool> DefaultDecisions;
125   std::vector<bool> Decisions;
126   std::vector<bool> Effects;
127   std::vector<int64_t> Rewards;
128   std::vector<bool> Mandatory;
129 };
130 
131 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
132 /// the offline training scenario. Note that training happens outside of the
133 /// compiler, this facility is concerned with producing training data ("logs").
134 /// This InlineAdvisor can operate in the following modes:
135 ///
136 /// 1) collect logs for the default policy. This is useful for bootstrapping
137 /// training, which will be considerably faster by starting from a reasonable
138 /// policy.
139 ///
140 /// 2) collect logs for the ML policy, using a model from a previous
141 /// training. Potentially, that model uses internally some small random
142 /// perturbation of its weights, to induce exploration (setting this up is the
143 /// responsibility of the training algorithm). The logs would then be used to
144 /// retrain and improve on this model.
145 ///
146 /// 3) use the provided model, with no logging. This is useful for end to end
147 /// validation - the model, in this case, is a release candidate and shouldn't
148 /// have random perturbations. It is a convenience feature: rather than needing
149 /// to take the release candidate model and compile it in 'release' mode,
150 /// validate it, then potentially discard it, it's easier to just pass the model
151 /// to the compiler, albeit compilation would be slower, as a one-off. Once the
152 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
153 /// release mode. The expectation is that a well-trained model provides a good
154 /// policy over a sufficiently diverse codebase, over many changes (i.e.
155 /// training happens seldom).
156 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
157 public:
158   DevelopmentModeMLInlineAdvisor(
159       Module &M, ModuleAnalysisManager &MAM,
160       std::unique_ptr<MLModelRunner> ModelRunner,
161       std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference);
162 
163   size_t getTotalSizeEstimate();
164 
165   virtual ~DevelopmentModeMLInlineAdvisor();
166   void updateNativeSizeEstimate(int64_t Change) { CurrentNativeSize += Change; }
167   void resetNativeSize(Function *F) {
168     FAM.invalidate<InlineSizeEstimatorAnalysis>(*F);
169   }
170 
171   std::unique_ptr<MLInlineAdvice>
172   getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
173   std::unique_ptr<MLInlineAdvice>
174   getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
175 
176   size_t getNativeSizeEstimate(const Function &F) const;
177 
178 private:
179   bool isLogging() const { return !TrainingLog.empty(); }
180 
181   std::function<bool(CallBase &)> GetDefaultAdvice;
182   TrainingLogger Logger;
183   const bool IsDoingInference;
184 
185   const int32_t InitialNativeSize;
186   int32_t CurrentNativeSize = 0;
187 };
188 
189 /// A variant of MLInlineAdvice that tracks all non-trivial inlining
190 /// decisions, for training/logging.
191 class LoggingMLInlineAdvice : public MLInlineAdvice {
192 public:
193   LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
194                         OptimizationRemarkEmitter &ORE, bool Recommendation,
195                         TrainingLogger &Logger, size_t CallerSizeEstimateBefore,
196                         size_t CalleeSizeEstimateBefore, bool DefaultDecision)
197       : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
198         CallerSizeEstimateBefore(CallerSizeEstimateBefore),
199         CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
200         DefaultDecision(DefaultDecision) {}
201 
202   virtual ~LoggingMLInlineAdvice() = default;
203 
204 private:
205   DevelopmentModeMLInlineAdvisor *getAdvisor() const {
206     return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
207   }
208   void recordInliningImpl() override {
209     MLInlineAdvice::recordInliningImpl();
210     getAdvisor()->resetNativeSize(Caller);
211     int Reward = std::numeric_limits<int>::max();
212     if (!getAdvisor()->isForcedToStop()) {
213       int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller) +
214                             CalleeSizeEstimateBefore;
215       Reward = NativeSizeAfter -
216                (CallerSizeEstimateBefore + CalleeSizeEstimateBefore);
217       getAdvisor()->updateNativeSizeEstimate(Reward);
218     }
219     log(Reward, /*Success=*/true);
220   }
221 
222   void recordInliningWithCalleeDeletedImpl() override {
223     MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
224     getAdvisor()->resetNativeSize(Caller);
225     if (!getAdvisor()->isForcedToStop()) {
226       int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller);
227       int Reward = NativeSizeAfter -
228                    (CallerSizeEstimateBefore + CalleeSizeEstimateBefore);
229       getAdvisor()->updateNativeSizeEstimate(Reward);
230       log(Reward, /*Success=*/true);
231     }
232   }
233 
234   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
235     MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
236     log(NoReward, /*Success=*/false);
237   }
238 
239   void recordUnattemptedInliningImpl() override {
240     MLInlineAdvice::recordUnattemptedInliningImpl();
241     log(NoReward, /*Success=*/false);
242   }
243 
244   void log(int64_t Reward, bool Success) {
245     InlineEvent Event;
246     Event.AdvisedDecision = isInliningRecommended();
247     Event.DefaultDecision = DefaultDecision;
248     Event.Effect = Success;
249     Event.Reward = Reward;
250     Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
251   }
252 
253   static const int64_t NoReward = 0;
254   TrainingLogger &Logger;
255   const size_t CallerSizeEstimateBefore;
256   const size_t CalleeSizeEstimateBefore;
257   const bool DefaultDecision;
258 };
259 
260 /// A pseudo model runner. We use it to store feature values when collecting
261 /// logs for the default policy, but never ask it to 'run'.
262 class NoInferenceModelRunner : public MLModelRunner {
263 public:
264   NoInferenceModelRunner(LLVMContext &Ctx)
265       : MLModelRunner(Ctx), Features(NumberOfFeatures) {}
266   void setFeature(FeatureIndex Index, int64_t Value) override {
267     Features[static_cast<int>(Index)] = Value;
268   }
269 
270   int64_t getFeature(int Index) const override { return Features[Index]; }
271   bool run() override {
272     llvm_unreachable("We shouldn't call run on this model runner.");
273   }
274 
275 private:
276   InlineFeatures Features;
277 };
278 
279 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs
280 /// to dynamically load and evaluate a TF SavedModel
281 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is
282 /// sacrificed for ease of use while training.
283 class ModelUnderTrainingRunner final : public MLModelRunner {
284 public:
285   ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath);
286 
287   bool run() override;
288 
289   // Disallows copy and assign.
290   ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete;
291   ModelUnderTrainingRunner &
292   operator=(const ModelUnderTrainingRunner &) = delete;
293 
294   void setFeature(FeatureIndex Index, int64_t Value) override;
295   int64_t getFeature(int Index) const override;
296   bool isValid() const { return !!Evaluator; }
297 
298 private:
299   std::unique_ptr<TFModelEvaluator> Evaluator;
300 
301   // The training framework needs some additional features.
302   const std::vector<TensorSpec> TrainingOnlyFeatures{
303       TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
304       TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
305       TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
306       TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
307 };
308 } // namespace
309 
310 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
311     Module &M, ModuleAnalysisManager &MAM,
312     std::unique_ptr<MLModelRunner> ModelRunner,
313     std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference)
314     : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
315       GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference),
316       InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
317       CurrentNativeSize(InitialNativeSize) {
318   // We cannot have the case of neither inference nor logging.
319   assert(IsDoingInference || isLogging());
320 }
321 
322 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() {
323   if (TrainingLog.empty())
324     return;
325   std::error_code ErrorCode;
326   raw_fd_ostream OutFile(TrainingLog, ErrorCode);
327   Logger.printTensor(OutFile);
328 }
329 
330 size_t
331 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
332   auto &R =
333       FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
334   if (!R) {
335     F.getParent()->getContext().emitError(
336         "Native size estimator is not present.");
337     return 0;
338   }
339   return *R;
340 }
341 
342 std::unique_ptr<MLInlineAdvice>
343 DevelopmentModeMLInlineAdvisor::getMandatoryAdvice(
344     CallBase &CB, OptimizationRemarkEmitter &ORE) {
345   if (!isLogging())
346     return MLInlineAdvisor::getMandatoryAdvice(CB, ORE);
347   return std::make_unique<LoggingMLInlineAdvice>(
348       /*Advisor=*/this,
349       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/true, /*Logger=*/Logger,
350       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
351       /*CalleeSizeEstimateBefore=*/
352       getNativeSizeEstimate(*CB.getCalledFunction()),
353       /*DefaultDecision=*/true);
354 }
355 
356 std::unique_ptr<MLInlineAdvice>
357 DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
358     CallBase &CB, OptimizationRemarkEmitter &ORE) {
359   if (IsDoingInference && !isLogging())
360     return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
361 
362   bool DefaultAdvice = GetDefaultAdvice(CB);
363   auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice;
364   return std::make_unique<LoggingMLInlineAdvice>(
365       /*Advisor=*/this,
366       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
367       /*Logger=*/Logger,
368       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
369       /*CalleeSizeEstimateBefore=*/
370       getNativeSizeEstimate(*CB.getCalledFunction()),
371       /*DefaultDecision=*/DefaultAdvice);
372 }
373 
374 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
375   size_t Ret = 0;
376   for (auto &F : M) {
377     if (F.isDeclaration())
378       continue;
379     if (isFunctionDeleted(&F))
380       continue;
381     Ret += getNativeSizeEstimate(F);
382   }
383   return Ret;
384 }
385 
386 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
387                                                    const std::string &ModelPath)
388     : MLModelRunner(Ctx) {
389   std::vector<TensorSpec> InputSpecs;
390   std::vector<TensorSpec> OutputSpecs;
391   for (size_t I = 0; I < NumberOfFeatures; ++I)
392     InputSpecs.push_back(
393         TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1}));
394   InputSpecs.insert(InputSpecs.end(), TrainingOnlyFeatures.begin(),
395                     TrainingOnlyFeatures.end());
396   OutputSpecs.push_back(TensorSpec::createSpec<int64_t>(TFDecisionName, {1}));
397 
398   Evaluator =
399       std::make_unique<TFModelEvaluator>(ModelPath, InputSpecs, OutputSpecs);
400   if (!Evaluator || !Evaluator->isValid()) {
401     Ctx.emitError("Failed to create inliner saved model evaluator");
402     Evaluator.reset();
403     return;
404   }
405 }
406 
407 bool ModelUnderTrainingRunner::run() {
408   auto ER = Evaluator->evaluate();
409   if (!ER.hasValue()) {
410     Ctx.emitError("Error evaluating model.");
411     return false;
412   }
413   int64_t Decision = *ER->getTensorValue<int64_t>(0);
414   return static_cast<bool>(Decision);
415 }
416 
417 int64_t ModelUnderTrainingRunner::getFeature(int Index) const {
418   return *Evaluator->getInput<int64_t>(Index);
419 }
420 
421 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) {
422   size_t NumericIndex = static_cast<size_t>(Index);
423   *(Evaluator->getInput<int64_t>(NumericIndex)) = Value;
424 }
425 
426 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
427     Module &M, ModuleAnalysisManager &MAM,
428     std::function<bool(CallBase &)> GetDefaultAdvice) {
429   auto &Ctx = M.getContext();
430   if (TrainingLog.empty() !=
431       !InlineSizeEstimatorAnalysis::isEvaluatorRequested()) {
432     Ctx.emitError("For development mode, if training logs are requested, then "
433                   "a size estimator must be available; either that, or neither "
434                   "are specified.");
435     return nullptr;
436   }
437 
438   std::unique_ptr<MLModelRunner> Runner;
439 
440   bool IsDoingInference = false;
441   if (TFModelUnderTrainingPath.empty())
442     Runner.reset(new NoInferenceModelRunner(Ctx));
443   else {
444     Runner = std::make_unique<ModelUnderTrainingRunner>(
445         Ctx, TFModelUnderTrainingPath);
446     if (!Runner) {
447       Ctx.emitError("Could not load the policy model from the provided path");
448       return nullptr;
449     }
450     IsDoingInference = true;
451   }
452   return std::make_unique<DevelopmentModeMLInlineAdvisor>(
453       M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference);
454 }
455 #endif // defined(LLVM_HAVE_TF_API)
456