1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner  --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a model runner using Tensorflow C APIs, allowing the
11 // loading of a model from a command line option.
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Config/config.h"
15 #if defined(LLVM_HAVE_TF_API)
16 
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
19 #include "llvm/Analysis/MLInlineAdvisor.h"
20 #include "llvm/Analysis/Utils/TFUtils.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/ManagedStatic.h"
24 
25 #include <vector>
26 
27 using namespace llvm;
28 
29 static cl::opt<std::string> TrainingLog(
30     "training-log", cl::Hidden,
31     cl::desc("Path where the development - mode inlining log is saved."));
32 
33 static cl::opt<std::string> TFModelUnderTrainingPath(
34     "ml-inliner-model-under-training", cl::Hidden,
35     cl::desc("Path to SavedModel from the previous training iteration."));
36 
37 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
38                                          cl::Hidden, cl::init("action_"),
39                                          cl::desc("Prefix for feature names."));
40 
41 static cl::opt<std::string> TFDecisionName(
42     "ml-inliner-trained-model-decision-name", cl::Hidden,
43     cl::init("StatefulPartitionedCall"),
44     cl::desc("Name of the graph operation representing the decision."));
45 
46 namespace {
47 /// An InlineEvent, used by TrainingLogger.
48 struct InlineEvent {
49   /// What the default policy's decision would have been.
50   bool DefaultDecision = false;
51 
52   /// What we advised. When training off the default policy, this is the same as
53   /// DefaultDecision.
54   bool AdvisedDecision = false;
55 
56   /// What actually happened. This would be 'false' in the case of an inline
57   /// error, even if AdvisedDecision were true, otherwise it agrees with
58   /// AdvisedDecision.
59   bool Effect = false;
60 
61   /// What the change in size was: size_after - size_before
62   int64_t Reward = 0;
63 };
64 
65 /// Collect data we may use for training a model, and write it as a textual
66 /// Tensorflow SequenceExample
67 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample)
68 /// protobuf (https://developers.google.com/protocol-buffers).
69 /// Because this is a protobuf, we cannot just stream the events as they come.
70 /// Internally, TrainingLogger stores data in column-major format, because that
71 /// lines up with how TF SequenceExample represents it.
72 class TrainingLogger final {
73 public:
74   TrainingLogger(StringRef LogFileName);
75 
76   /// Log one inlining event.
77   void logInlineEvent(const InlineEvent &Event,
78                       const MLModelRunner &ModelRunner);
79 
80   /// Print the stored tensors.
81   void print();
82 
83 private:
84   /// Write the values of one tensor as a list.
85   template <typename T>
86   void writeTensorValues(raw_fd_ostream &OutFile, const char *TensorData,
87                          size_t ElemCount) const {
88     OutFile << "[";
89     const T *TypedData = reinterpret_cast<const T *>(TensorData);
90     for (size_t I = 0; I < ElemCount; ++I) {
91       if (I > 0)
92         OutFile << ", ";
93       OutFile << TypedData[I];
94     }
95     OutFile << "]";
96   }
97 
98   /// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs.
99   /// The tensors are assumed to be stored contiguously, in row-major format,
100   /// in the TensorData buffer. Each tensor has the shape given by Spec. The
101   /// feature name in the output is either the provided LoggingName, if
102   /// specified, otherwise it's the name of the tensor (as given by Spec).
103   template <typename T>
104   void
105   writeTensorsAsFeatureLists(raw_fd_ostream &OutFile, const TensorSpec &Spec,
106                              const T *TensorData, size_t TensorCount,
107                              Optional<StringRef> LoggingName = None) const {
108     writeRawTensorsAsFeatureLists(OutFile, Spec,
109                                   reinterpret_cast<const char *>(TensorData),
110                                   TensorCount, LoggingName);
111   }
112 
113   /// Untyped implementation of the API above.
114   void
115   writeRawTensorsAsFeatureLists(raw_fd_ostream &OutFile, const TensorSpec &Spec,
116                                 const char *TensorData, size_t TensorCount,
117                                 Optional<StringRef> LoggingName = None) const {
118     const char *FieldName = "<invalid>";
119     std::function<void(const char *)> ValueWriter;
120     // The 'Feature' protobuf only has 3 possible fields: float_list,
121     // int64_list, or bytes_list, so we capture int32 values as int64. We don't
122     // support any other types.
123     if (Spec.isElementType<int64_t>()) {
124       FieldName = "int64_list";
125       ValueWriter = [&](const char *Data) {
126         writeTensorValues<int64_t>(OutFile, Data, Spec.getElementCount());
127       };
128     } else if (Spec.isElementType<int32_t>()) {
129       FieldName = "int64_list";
130       ValueWriter = [&](const char *Data) {
131         writeTensorValues<int32_t>(OutFile, Data, Spec.getElementCount());
132       };
133 
134     } else if (Spec.isElementType<float>()) {
135       FieldName = "float_list";
136       ValueWriter = [&](const char *Data) {
137         writeTensorValues<float>(OutFile, Data, Spec.getElementCount());
138       };
139 
140     } else
141       llvm_unreachable("Unsupported tensor type.");
142 
143     OutFile << "  feature_list: {\n";
144     OutFile << "    key: "
145             << "\"" << (LoggingName ? *LoggingName : Spec.name()) << "\" ";
146     OutFile << "value: {\n";
147     size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize();
148     for (const char *P = TensorData,
149                     *E = TensorData + TensorByteSize * TensorCount;
150          P < E; P += TensorByteSize) {
151       OutFile << "      feature: { " << FieldName << ": { value: ";
152       ValueWriter(P);
153       OutFile << " } }\n";
154     }
155     OutFile << "    }\n";
156     OutFile << "  }\n";
157   }
158 
159   StringRef LogFileName;
160   std::vector<InlineFeatures> Features;
161   std::vector<int64_t> DefaultDecisions;
162   std::vector<int64_t> Decisions;
163   std::vector<bool> Effects;
164   std::vector<int64_t> Rewards;
165 };
166 
167 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
168 /// the offline training scenario. Note that training happens outside of the
169 /// compiler, this facility is concerned with producing training data ("logs").
170 /// This InlineAdvisor can operate in the following modes:
171 ///
172 /// 1) collect logs for the default policy. This is useful for bootstrapping
173 /// training, which will be considerably faster by starting from a reasonable
174 /// policy.
175 ///
176 /// 2) collect logs for the ML policy, using a model from a previous
177 /// training. Potentially, that model uses internally some small random
178 /// perturbation of its weights, to induce exploration (setting this up is the
179 /// responsibility of the training algorithm). The logs would then be used to
180 /// retrain and improve on this model.
181 ///
182 /// 3) use the provided model, with no logging. This is useful for end to end
183 /// validation - the model, in this case, is a release candidate and shouldn't
184 /// have random perturbations. It is a convenience feature: rather than needing
185 /// to take the release candidate model and compile it in 'release' mode,
186 /// validate it, then potentially discard it, it's easier to just pass the model
187 /// to the compiler, albeit compilation would be slower, as a one-off. Once the
188 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
189 /// release mode. The expectation is that a well-trained model provides a good
190 /// policy over a sufficiently diverse codebase, over many changes (i.e.
191 /// training happens seldom).
192 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
193 public:
194   DevelopmentModeMLInlineAdvisor(
195       Module &M, ModuleAnalysisManager &MAM,
196       std::unique_ptr<MLModelRunner> ModelRunner,
197       std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference,
198       std::unique_ptr<TrainingLogger> Logger);
199 
200   size_t getTotalSizeEstimate();
201 
202   virtual ~DevelopmentModeMLInlineAdvisor();
203   void updateNativeSizeEstimate(int64_t Change) { CurrentNativeSize += Change; }
204   void resetNativeSize(Function *F) {
205     FAM.invalidate<InlineSizeEstimatorAnalysis>(*F);
206   }
207 
208   std::unique_ptr<MLInlineAdvice>
209   getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
210   std::unique_ptr<MLInlineAdvice>
211   getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
212 
213   size_t getNativeSizeEstimate(const Function &F) const;
214 
215 private:
216   bool isLogging() const { return !!Logger; }
217 
218   std::function<bool(CallBase &)> GetDefaultAdvice;
219   const bool IsDoingInference;
220   std::unique_ptr<TrainingLogger> Logger;
221 
222   const int32_t InitialNativeSize;
223   int32_t CurrentNativeSize = 0;
224 };
225 
226 /// A variant of MLInlineAdvice that tracks all non-trivial inlining
227 /// decisions, for training/logging.
228 class LoggingMLInlineAdvice : public MLInlineAdvice {
229 public:
230   LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
231                         OptimizationRemarkEmitter &ORE, bool Recommendation,
232                         TrainingLogger &Logger, size_t CallerSizeEstimateBefore,
233                         size_t CalleeSizeEstimateBefore, bool DefaultDecision,
234                         bool Mandatory = false)
235       : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
236         CallerSizeEstimateBefore(CallerSizeEstimateBefore),
237         CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
238         DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
239 
240   virtual ~LoggingMLInlineAdvice() = default;
241 
242 private:
243   DevelopmentModeMLInlineAdvisor *getAdvisor() const {
244     return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
245   }
246   void recordInliningImpl() override {
247     MLInlineAdvice::recordInliningImpl();
248     getAdvisor()->resetNativeSize(Caller);
249     int Reward = std::numeric_limits<int>::max();
250     if (!getAdvisor()->isForcedToStop()) {
251       int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller) +
252                             CalleeSizeEstimateBefore;
253       Reward = NativeSizeAfter -
254                (CallerSizeEstimateBefore + CalleeSizeEstimateBefore);
255       getAdvisor()->updateNativeSizeEstimate(Reward);
256     }
257     log(Reward, /*Success=*/true);
258   }
259 
260   void recordInliningWithCalleeDeletedImpl() override {
261     MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
262     getAdvisor()->resetNativeSize(Caller);
263     if (!getAdvisor()->isForcedToStop()) {
264       int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller);
265       int Reward = NativeSizeAfter -
266                    (CallerSizeEstimateBefore + CalleeSizeEstimateBefore);
267       getAdvisor()->updateNativeSizeEstimate(Reward);
268       log(Reward, /*Success=*/true);
269     }
270   }
271 
272   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
273     MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
274     log(NoReward, /*Success=*/false);
275   }
276 
277   void recordUnattemptedInliningImpl() override {
278     MLInlineAdvice::recordUnattemptedInliningImpl();
279     log(NoReward, /*Success=*/false);
280   }
281 
282   void log(int64_t Reward, bool Success) {
283     if (Mandatory)
284       return;
285     InlineEvent Event;
286     Event.AdvisedDecision = isInliningRecommended();
287     Event.DefaultDecision = DefaultDecision;
288     Event.Effect = Success;
289     Event.Reward = Reward;
290     Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
291   }
292 
293   static const int64_t NoReward = 0;
294   TrainingLogger &Logger;
295   const size_t CallerSizeEstimateBefore;
296   const size_t CalleeSizeEstimateBefore;
297   const bool DefaultDecision;
298   const bool Mandatory;
299 };
300 
301 /// A pseudo model runner. We use it to store feature values when collecting
302 /// logs for the default policy, but never ask it to 'run'.
303 class NoInferenceModelRunner : public MLModelRunner {
304 public:
305   NoInferenceModelRunner(LLVMContext &Ctx)
306       : MLModelRunner(Ctx), Features(NumberOfFeatures) {}
307   void setFeature(FeatureIndex Index, int64_t Value) override {
308     Features[static_cast<int>(Index)] = Value;
309   }
310 
311   int64_t getFeature(int Index) const override { return Features[Index]; }
312   bool run() override {
313     llvm_unreachable("We shouldn't call run on this model runner.");
314   }
315 
316 private:
317   InlineFeatures Features;
318 };
319 
320 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs
321 /// to dynamically load and evaluate a TF SavedModel
322 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is
323 /// sacrificed for ease of use while training.
324 class ModelUnderTrainingRunner final : public MLModelRunner {
325 public:
326   ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath);
327 
328   bool run() override;
329 
330   // Disallows copy and assign.
331   ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete;
332   ModelUnderTrainingRunner &
333   operator=(const ModelUnderTrainingRunner &) = delete;
334 
335   void setFeature(FeatureIndex Index, int64_t Value) override;
336   int64_t getFeature(int Index) const override;
337   bool isValid() const { return !!Evaluator; }
338 
339 private:
340   std::unique_ptr<TFModelEvaluator> Evaluator;
341 
342   // The training framework needs some additional features.
343   const std::vector<TensorSpec> TrainingOnlyFeatures{
344       TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
345       TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
346       TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
347       TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
348 };
349 } // namespace
350 
351 TrainingLogger::TrainingLogger(StringRef LogFileName)
352     : LogFileName(LogFileName) {
353   for (size_t I = 0; I < NumberOfFeatures; ++I) {
354     Features.push_back(InlineFeatures());
355   }
356 }
357 
358 /// Log one inlining event.
359 void TrainingLogger::logInlineEvent(const InlineEvent &Event,
360                                     const MLModelRunner &ModelRunner) {
361   for (size_t I = 0; I < NumberOfFeatures; ++I) {
362     Features[I].push_back(ModelRunner.getFeature(I));
363   }
364   Decisions.push_back(Event.AdvisedDecision);
365   Effects.push_back(Event.Effect);
366   Rewards.push_back(Event.Reward);
367   DefaultDecisions.push_back(Event.DefaultDecision);
368 }
369 
370 void TrainingLogger::print() {
371   std::error_code EC;
372   raw_fd_ostream OutFile(LogFileName, EC);
373   size_t NumberOfRecords = Decisions.size();
374   if (NumberOfRecords == 0)
375     return;
376 
377   OutFile << "feature_lists: {\n";
378   for (size_t I = 0; I < Features.size(); ++I)
379     writeTensorsAsFeatureLists(
380         OutFile, TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}),
381         Features[I].data(), NumberOfRecords);
382 
383   writeTensorsAsFeatureLists(
384       OutFile, TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}),
385       DefaultDecisions.data(), NumberOfRecords);
386 
387   writeTensorsAsFeatureLists(OutFile,
388                              TensorSpec::createSpec<int64_t>(DecisionName, {1}),
389                              Decisions.data(), NumberOfRecords);
390   writeTensorsAsFeatureLists(OutFile,
391                              TensorSpec::createSpec<int64_t>(RewardName, {1}),
392                              Rewards.data(), NumberOfRecords);
393 
394   OutFile << "}\n";
395 }
396 
397 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
398     Module &M, ModuleAnalysisManager &MAM,
399     std::unique_ptr<MLModelRunner> ModelRunner,
400     std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference,
401     std::unique_ptr<TrainingLogger> Logger)
402     : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
403       GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference),
404       Logger(std::move(Logger)),
405       InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
406       CurrentNativeSize(InitialNativeSize) {
407   // We cannot have the case of neither inference nor logging.
408   assert(IsDoingInference || isLogging());
409 }
410 
411 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() {
412   if (isLogging())
413     Logger->print();
414 }
415 
416 size_t
417 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
418   auto &R =
419       FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
420   if (!R) {
421     F.getParent()->getContext().emitError(
422         "Native size estimator is not present.");
423     return 0;
424   }
425   return *R;
426 }
427 
428 std::unique_ptr<MLInlineAdvice>
429 DevelopmentModeMLInlineAdvisor::getMandatoryAdvice(
430     CallBase &CB, OptimizationRemarkEmitter &ORE) {
431   if (!isLogging())
432     return MLInlineAdvisor::getMandatoryAdvice(CB, ORE);
433   return std::make_unique<LoggingMLInlineAdvice>(
434       /*Advisor=*/this,
435       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/true, /*Logger=*/*Logger,
436       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
437       /*CalleeSizeEstimateBefore=*/
438       getNativeSizeEstimate(*CB.getCalledFunction()),
439       /*DefaultDecision=*/true, /*Mandatory*/ true);
440 }
441 
442 std::unique_ptr<MLInlineAdvice>
443 DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
444     CallBase &CB, OptimizationRemarkEmitter &ORE) {
445   if (IsDoingInference && !isLogging())
446     return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
447 
448   bool DefaultAdvice = GetDefaultAdvice(CB);
449   auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice;
450   return std::make_unique<LoggingMLInlineAdvice>(
451       /*Advisor=*/this,
452       /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
453       /*Logger=*/*Logger,
454       /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
455       /*CalleeSizeEstimateBefore=*/
456       getNativeSizeEstimate(*CB.getCalledFunction()),
457       /*DefaultDecision=*/DefaultAdvice);
458 }
459 
460 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
461   size_t Ret = 0;
462   for (auto &F : M) {
463     if (F.isDeclaration())
464       continue;
465     if (isFunctionDeleted(&F))
466       continue;
467     Ret += getNativeSizeEstimate(F);
468   }
469   return Ret;
470 }
471 
472 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
473                                                    const std::string &ModelPath)
474     : MLModelRunner(Ctx) {
475   std::vector<TensorSpec> InputSpecs;
476   std::vector<TensorSpec> OutputSpecs;
477   for (size_t I = 0; I < NumberOfFeatures; ++I)
478     InputSpecs.push_back(
479         TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1}));
480   InputSpecs.insert(InputSpecs.end(), TrainingOnlyFeatures.begin(),
481                     TrainingOnlyFeatures.end());
482   OutputSpecs.push_back(TensorSpec::createSpec<int64_t>(TFDecisionName, {1}));
483 
484   Evaluator =
485       std::make_unique<TFModelEvaluator>(ModelPath, InputSpecs, OutputSpecs);
486   if (!Evaluator || !Evaluator->isValid()) {
487     Ctx.emitError("Failed to create inliner saved model evaluator");
488     Evaluator.reset();
489     return;
490   }
491 }
492 
493 bool ModelUnderTrainingRunner::run() {
494   auto ER = Evaluator->evaluate();
495   if (!ER.hasValue()) {
496     Ctx.emitError("Error evaluating model.");
497     return false;
498   }
499   int64_t Decision = *ER->getTensorValue<int64_t>(0);
500   return static_cast<bool>(Decision);
501 }
502 
503 int64_t ModelUnderTrainingRunner::getFeature(int Index) const {
504   return *Evaluator->getInput<int64_t>(Index);
505 }
506 
507 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) {
508   size_t NumericIndex = static_cast<size_t>(Index);
509   *(Evaluator->getInput<int64_t>(NumericIndex)) = Value;
510 }
511 
512 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
513     Module &M, ModuleAnalysisManager &MAM,
514     std::function<bool(CallBase &)> GetDefaultAdvice) {
515   auto &Ctx = M.getContext();
516   if (TrainingLog.empty() !=
517       !InlineSizeEstimatorAnalysis::isEvaluatorRequested()) {
518     Ctx.emitError("For development mode, if training logs are requested, then "
519                   "a size estimator must be available; either that, or neither "
520                   "are specified.");
521     return nullptr;
522   }
523 
524   std::unique_ptr<MLModelRunner> Runner;
525 
526   bool IsDoingInference = false;
527   if (TFModelUnderTrainingPath.empty())
528     Runner.reset(new NoInferenceModelRunner(Ctx));
529   else {
530     Runner = std::make_unique<ModelUnderTrainingRunner>(
531         Ctx, TFModelUnderTrainingPath);
532     if (!Runner) {
533       Ctx.emitError("Could not load the policy model from the provided path");
534       return nullptr;
535     }
536     IsDoingInference = true;
537   }
538   std::unique_ptr<TrainingLogger> Logger;
539   if (!TrainingLog.empty())
540     Logger = std::make_unique<TrainingLogger>(TrainingLog);
541 
542   return std::make_unique<DevelopmentModeMLInlineAdvisor>(
543       M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference,
544       std::move(Logger));
545 }
546 #endif // defined(LLVM_HAVE_TF_API)
547