1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a model runner using Tensorflow C APIs, allowing the 11 // loading of a model from a command line option. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Config/config.h" 15 #if defined(LLVM_HAVE_TF_API) 16 17 #include "llvm/Analysis/CallGraph.h" 18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 19 #include "llvm/Analysis/MLInlineAdvisor.h" 20 #include "llvm/Analysis/Utils/TFUtils.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/ManagedStatic.h" 24 25 #include <vector> 26 27 using namespace llvm; 28 29 static cl::opt<std::string> TrainingLog( 30 "training-log", cl::Hidden, 31 cl::desc("Path where the development - mode inlining log is saved.")); 32 33 static cl::opt<std::string> TFModelUnderTrainingPath( 34 "ml-inliner-model-under-training", cl::Hidden, 35 cl::desc("Path to SavedModel from the previous training iteration.")); 36 37 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", 38 cl::Hidden, cl::init("action_"), 39 cl::desc("Prefix for feature names.")); 40 41 static cl::opt<std::string> TFDecisionName( 42 "ml-inliner-trained-model-decision-name", cl::Hidden, 43 cl::init("StatefulPartitionedCall"), 44 cl::desc("Name of the graph operation representing the decision.")); 45 46 namespace { 47 /// An InlineEvent, used by TrainingLogger. 48 struct InlineEvent { 49 /// What the default policy's decision would have been. 50 bool DefaultDecision = false; 51 52 /// What we advised. When training off the default policy, this is the same as 53 /// DefaultDecision. 54 bool AdvisedDecision = false; 55 56 /// What actually happened. This would be 'false' in the case of an inline 57 /// error, even if AdvisedDecision were true, otherwise it agrees with 58 /// AdvisedDecision. 59 bool Effect = false; 60 61 /// What the change in size was: size_after - size_before 62 int64_t Reward = 0; 63 }; 64 65 /// Collect data we may use for training a model, and write it as a textual 66 /// Tensorflow SequenceExample 67 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample) 68 /// protobuf (https://developers.google.com/protocol-buffers). 69 /// Because this is a protobuf, we cannot just stream the events as they come. 70 /// Internally, TrainingLogger stores data in column-major format, because that 71 /// lines up with how TF SequenceExample represents it. 72 class TrainingLogger final { 73 public: 74 TrainingLogger(); 75 76 /// Log one inlining event. 77 void logInlineEvent(const InlineEvent &Event, 78 const MLModelRunner &ModelRunner); 79 80 /// Print the stored tensors. 81 void print(raw_fd_ostream &OutFile); 82 83 private: 84 template <typename T> 85 void writeTensor(raw_fd_ostream &OutFile, StringRef TensorName, 86 const std::vector<T> &Tensor); 87 88 std::vector<InlineFeatures> Features; 89 std::vector<bool> DefaultDecisions; 90 std::vector<bool> Decisions; 91 std::vector<bool> Effects; 92 std::vector<int64_t> Rewards; 93 std::vector<bool> Mandatory; 94 }; 95 96 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting 97 /// the offline training scenario. Note that training happens outside of the 98 /// compiler, this facility is concerned with producing training data ("logs"). 99 /// This InlineAdvisor can operate in the following modes: 100 /// 101 /// 1) collect logs for the default policy. This is useful for bootstrapping 102 /// training, which will be considerably faster by starting from a reasonable 103 /// policy. 104 /// 105 /// 2) collect logs for the ML policy, using a model from a previous 106 /// training. Potentially, that model uses internally some small random 107 /// perturbation of its weights, to induce exploration (setting this up is the 108 /// responsibility of the training algorithm). The logs would then be used to 109 /// retrain and improve on this model. 110 /// 111 /// 3) use the provided model, with no logging. This is useful for end to end 112 /// validation - the model, in this case, is a release candidate and shouldn't 113 /// have random perturbations. It is a convenience feature: rather than needing 114 /// to take the release candidate model and compile it in 'release' mode, 115 /// validate it, then potentially discard it, it's easier to just pass the model 116 /// to the compiler, albeit compilation would be slower, as a one-off. Once the 117 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in 118 /// release mode. The expectation is that a well-trained model provides a good 119 /// policy over a sufficiently diverse codebase, over many changes (i.e. 120 /// training happens seldom). 121 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { 122 public: 123 DevelopmentModeMLInlineAdvisor( 124 Module &M, ModuleAnalysisManager &MAM, 125 std::unique_ptr<MLModelRunner> ModelRunner, 126 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference); 127 128 size_t getTotalSizeEstimate(); 129 130 virtual ~DevelopmentModeMLInlineAdvisor(); 131 void updateNativeSizeEstimate(int64_t Change) { CurrentNativeSize += Change; } 132 void resetNativeSize(Function *F) { 133 FAM.invalidate<InlineSizeEstimatorAnalysis>(*F); 134 } 135 136 std::unique_ptr<MLInlineAdvice> 137 getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 138 std::unique_ptr<MLInlineAdvice> 139 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 140 141 size_t getNativeSizeEstimate(const Function &F) const; 142 143 private: 144 bool isLogging() const { return !TrainingLog.empty(); } 145 146 std::function<bool(CallBase &)> GetDefaultAdvice; 147 TrainingLogger Logger; 148 const bool IsDoingInference; 149 150 const int32_t InitialNativeSize; 151 int32_t CurrentNativeSize = 0; 152 }; 153 154 /// A variant of MLInlineAdvice that tracks all non-trivial inlining 155 /// decisions, for training/logging. 156 class LoggingMLInlineAdvice : public MLInlineAdvice { 157 public: 158 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, 159 OptimizationRemarkEmitter &ORE, bool Recommendation, 160 TrainingLogger &Logger, size_t CallerSizeEstimateBefore, 161 size_t CalleeSizeEstimateBefore, bool DefaultDecision) 162 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), 163 CallerSizeEstimateBefore(CallerSizeEstimateBefore), 164 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), 165 DefaultDecision(DefaultDecision) {} 166 167 virtual ~LoggingMLInlineAdvice() = default; 168 169 private: 170 DevelopmentModeMLInlineAdvisor *getAdvisor() const { 171 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); 172 } 173 void recordInliningImpl() override { 174 MLInlineAdvice::recordInliningImpl(); 175 getAdvisor()->resetNativeSize(Caller); 176 int Reward = std::numeric_limits<int>::max(); 177 if (!getAdvisor()->isForcedToStop()) { 178 int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller) + 179 CalleeSizeEstimateBefore; 180 Reward = NativeSizeAfter - 181 (CallerSizeEstimateBefore + CalleeSizeEstimateBefore); 182 getAdvisor()->updateNativeSizeEstimate(Reward); 183 } 184 log(Reward, /*Success=*/true); 185 } 186 187 void recordInliningWithCalleeDeletedImpl() override { 188 MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); 189 getAdvisor()->resetNativeSize(Caller); 190 if (!getAdvisor()->isForcedToStop()) { 191 int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller); 192 int Reward = NativeSizeAfter - 193 (CallerSizeEstimateBefore + CalleeSizeEstimateBefore); 194 getAdvisor()->updateNativeSizeEstimate(Reward); 195 log(Reward, /*Success=*/true); 196 } 197 } 198 199 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { 200 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); 201 log(NoReward, /*Success=*/false); 202 } 203 204 void recordUnattemptedInliningImpl() override { 205 MLInlineAdvice::recordUnattemptedInliningImpl(); 206 log(NoReward, /*Success=*/false); 207 } 208 209 void log(int64_t Reward, bool Success) { 210 InlineEvent Event; 211 Event.AdvisedDecision = isInliningRecommended(); 212 Event.DefaultDecision = DefaultDecision; 213 Event.Effect = Success; 214 Event.Reward = Reward; 215 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); 216 } 217 218 static const int64_t NoReward = 0; 219 TrainingLogger &Logger; 220 const size_t CallerSizeEstimateBefore; 221 const size_t CalleeSizeEstimateBefore; 222 const bool DefaultDecision; 223 }; 224 225 /// A pseudo model runner. We use it to store feature values when collecting 226 /// logs for the default policy, but never ask it to 'run'. 227 class NoInferenceModelRunner : public MLModelRunner { 228 public: 229 NoInferenceModelRunner(LLVMContext &Ctx) 230 : MLModelRunner(Ctx), Features(NumberOfFeatures) {} 231 void setFeature(FeatureIndex Index, int64_t Value) override { 232 Features[static_cast<int>(Index)] = Value; 233 } 234 235 int64_t getFeature(int Index) const override { return Features[Index]; } 236 bool run() override { 237 llvm_unreachable("We shouldn't call run on this model runner."); 238 } 239 240 private: 241 InlineFeatures Features; 242 }; 243 244 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs 245 /// to dynamically load and evaluate a TF SavedModel 246 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is 247 /// sacrificed for ease of use while training. 248 class ModelUnderTrainingRunner final : public MLModelRunner { 249 public: 250 ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath); 251 252 bool run() override; 253 254 // Disallows copy and assign. 255 ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete; 256 ModelUnderTrainingRunner & 257 operator=(const ModelUnderTrainingRunner &) = delete; 258 259 void setFeature(FeatureIndex Index, int64_t Value) override; 260 int64_t getFeature(int Index) const override; 261 bool isValid() const { return !!Evaluator; } 262 263 private: 264 std::unique_ptr<TFModelEvaluator> Evaluator; 265 266 // The training framework needs some additional features. 267 const std::vector<TensorSpec> TrainingOnlyFeatures{ 268 TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}), 269 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}), 270 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}), 271 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})}; 272 }; 273 } // namespace 274 275 TrainingLogger::TrainingLogger() { 276 for (size_t I = 0; I < NumberOfFeatures; ++I) { 277 Features.push_back(InlineFeatures()); 278 } 279 } 280 281 /// Log one inlining event. 282 void TrainingLogger::logInlineEvent(const InlineEvent &Event, 283 const MLModelRunner &ModelRunner) { 284 for (size_t I = 0; I < NumberOfFeatures; ++I) { 285 Features[I].push_back(ModelRunner.getFeature(I)); 286 } 287 Decisions.push_back(Event.AdvisedDecision); 288 Effects.push_back(Event.Effect); 289 Rewards.push_back(Event.Reward); 290 DefaultDecisions.push_back(Event.DefaultDecision); 291 } 292 293 void TrainingLogger::print(raw_fd_ostream &OutFile) { 294 if (DefaultDecisions.empty()) 295 return; 296 OutFile << "feature_lists: {\n"; 297 298 for (size_t I = 0; I < Features.size(); I++) { 299 writeTensor(OutFile, FeatureNameMap.at(I), Features[I]); 300 } 301 writeTensor(OutFile, DefaultDecisionName, DefaultDecisions); 302 writeTensor(OutFile, DecisionName, Decisions); 303 writeTensor(OutFile, RewardName, Rewards); 304 305 OutFile << "}\n"; 306 } 307 308 template <typename T> 309 void TrainingLogger::writeTensor(raw_fd_ostream &OutFile, StringRef TensorName, 310 const std::vector<T> &Tensor) { 311 OutFile << " feature_list: {\n"; 312 OutFile << " key: " 313 << "\"" << TensorName << "\" "; 314 OutFile << "value: {\n"; 315 for (const auto &Feature : Tensor) { 316 OutFile << " feature: { int64_list: { value: [" << Feature 317 << "] } }\n"; 318 } 319 OutFile << " }\n"; 320 OutFile << " }\n"; 321 } 322 323 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( 324 Module &M, ModuleAnalysisManager &MAM, 325 std::unique_ptr<MLModelRunner> ModelRunner, 326 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference) 327 : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), 328 GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference), 329 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), 330 CurrentNativeSize(InitialNativeSize) { 331 // We cannot have the case of neither inference nor logging. 332 assert(IsDoingInference || isLogging()); 333 } 334 335 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() { 336 if (TrainingLog.empty()) 337 return; 338 std::error_code ErrorCode; 339 raw_fd_ostream OutFile(TrainingLog, ErrorCode); 340 Logger.print(OutFile); 341 } 342 343 size_t 344 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { 345 auto &R = 346 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); 347 if (!R) { 348 F.getParent()->getContext().emitError( 349 "Native size estimator is not present."); 350 return 0; 351 } 352 return *R; 353 } 354 355 std::unique_ptr<MLInlineAdvice> 356 DevelopmentModeMLInlineAdvisor::getMandatoryAdvice( 357 CallBase &CB, OptimizationRemarkEmitter &ORE) { 358 if (!isLogging()) 359 return MLInlineAdvisor::getMandatoryAdvice(CB, ORE); 360 return std::make_unique<LoggingMLInlineAdvice>( 361 /*Advisor=*/this, 362 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/true, /*Logger=*/Logger, 363 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 364 /*CalleeSizeEstimateBefore=*/ 365 getNativeSizeEstimate(*CB.getCalledFunction()), 366 /*DefaultDecision=*/true); 367 } 368 369 std::unique_ptr<MLInlineAdvice> 370 DevelopmentModeMLInlineAdvisor::getAdviceFromModel( 371 CallBase &CB, OptimizationRemarkEmitter &ORE) { 372 if (IsDoingInference && !isLogging()) 373 return MLInlineAdvisor::getAdviceFromModel(CB, ORE); 374 375 bool DefaultAdvice = GetDefaultAdvice(CB); 376 auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice; 377 return std::make_unique<LoggingMLInlineAdvice>( 378 /*Advisor=*/this, 379 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, 380 /*Logger=*/Logger, 381 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 382 /*CalleeSizeEstimateBefore=*/ 383 getNativeSizeEstimate(*CB.getCalledFunction()), 384 /*DefaultDecision=*/DefaultAdvice); 385 } 386 387 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { 388 size_t Ret = 0; 389 for (auto &F : M) { 390 if (F.isDeclaration()) 391 continue; 392 if (isFunctionDeleted(&F)) 393 continue; 394 Ret += getNativeSizeEstimate(F); 395 } 396 return Ret; 397 } 398 399 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx, 400 const std::string &ModelPath) 401 : MLModelRunner(Ctx) { 402 std::vector<TensorSpec> InputSpecs; 403 std::vector<TensorSpec> OutputSpecs; 404 for (size_t I = 0; I < NumberOfFeatures; ++I) 405 InputSpecs.push_back( 406 TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1})); 407 InputSpecs.insert(InputSpecs.end(), TrainingOnlyFeatures.begin(), 408 TrainingOnlyFeatures.end()); 409 OutputSpecs.push_back(TensorSpec::createSpec<int64_t>(TFDecisionName, {1})); 410 411 Evaluator = 412 std::make_unique<TFModelEvaluator>(ModelPath, InputSpecs, OutputSpecs); 413 if (!Evaluator || !Evaluator->isValid()) { 414 Ctx.emitError("Failed to create inliner saved model evaluator"); 415 Evaluator.reset(); 416 return; 417 } 418 } 419 420 bool ModelUnderTrainingRunner::run() { 421 auto ER = Evaluator->evaluate(); 422 if (!ER.hasValue()) { 423 Ctx.emitError("Error evaluating model."); 424 return false; 425 } 426 int64_t Decision = *ER->getTensorValue<int64_t>(0); 427 return static_cast<bool>(Decision); 428 } 429 430 int64_t ModelUnderTrainingRunner::getFeature(int Index) const { 431 return *Evaluator->getInput<int64_t>(Index); 432 } 433 434 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) { 435 size_t NumericIndex = static_cast<size_t>(Index); 436 *(Evaluator->getInput<int64_t>(NumericIndex)) = Value; 437 } 438 439 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( 440 Module &M, ModuleAnalysisManager &MAM, 441 std::function<bool(CallBase &)> GetDefaultAdvice) { 442 auto &Ctx = M.getContext(); 443 if (TrainingLog.empty() != 444 !InlineSizeEstimatorAnalysis::isEvaluatorRequested()) { 445 Ctx.emitError("For development mode, if training logs are requested, then " 446 "a size estimator must be available; either that, or neither " 447 "are specified."); 448 return nullptr; 449 } 450 451 std::unique_ptr<MLModelRunner> Runner; 452 453 bool IsDoingInference = false; 454 if (TFModelUnderTrainingPath.empty()) 455 Runner.reset(new NoInferenceModelRunner(Ctx)); 456 else { 457 Runner = std::make_unique<ModelUnderTrainingRunner>( 458 Ctx, TFModelUnderTrainingPath); 459 if (!Runner) { 460 Ctx.emitError("Could not load the policy model from the provided path"); 461 return nullptr; 462 } 463 IsDoingInference = true; 464 } 465 return std::make_unique<DevelopmentModeMLInlineAdvisor>( 466 M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference); 467 } 468 #endif // defined(LLVM_HAVE_TF_API) 469