1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a model runner using Tensorflow C APIs, allowing the 11 // loading of a model from a command line option. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Config/config.h" 15 #if defined(LLVM_HAVE_TF_API) 16 17 #include "llvm/Analysis/CallGraph.h" 18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 19 #include "llvm/Analysis/MLInlineAdvisor.h" 20 #include "llvm/Analysis/Utils/TFUtils.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/ManagedStatic.h" 24 #include "llvm/Support/Path.h" 25 26 #include <vector> 27 28 using namespace llvm; 29 30 static cl::opt<std::string> TrainingLog( 31 "training-log", cl::Hidden, 32 cl::desc("Path where the development - mode inlining log is saved.")); 33 34 static cl::opt<std::string> TFModelUnderTrainingPath( 35 "ml-inliner-model-under-training", cl::Hidden, 36 cl::desc(R"(Path to SavedModel from the previous training iteration. 37 The directory is also expected to contain a JSON specification of the 38 outputs expected to be logged, where the first entry must be the 39 inlining decision. The file containing the specification should be 40 called output_spec.json. The expected JSON value is an array of 41 dictionaries. Each dictionary should have 2 keys: 42 43 - "tensor_spec, followed by the TensorSpec description of the 44 output; and 45 - "logging_name", a string indicating the name to use when 46 logging the output values. 47 48 Example: 49 [ 50 { 51 "logging_name" : "some_name", 52 "tensor_spec" : { 53 "name" : "model_name", 54 "port" : 0, 55 "shape" : [2, 3], 56 "type" : "float" 57 } 58 } 59 ] 60 61 The first value must always correspond to the decision.)")); 62 63 static cl::opt<std::string> TFOutputSpecOverride( 64 "ml-inliner-output-spec-override", cl::Hidden, 65 cl::desc("Override the path to the output spec json file. See " 66 "-ml-inliner-model-under-training documentation for the " 67 "specification of that file.")); 68 69 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", 70 cl::Hidden, cl::init("action_"), 71 cl::desc("Prefix for feature names.")); 72 73 namespace { 74 /// An InlineEvent, used by TrainingLogger. 75 struct InlineEvent { 76 /// What the default policy's decision would have been. 77 int64_t DefaultDecision = 0; 78 79 /// What we advised. When training off the default policy, this is the same as 80 /// DefaultDecision. 81 int64_t AdvisedDecision = 0; 82 83 /// What actually happened. This would be 'false' in the case of an inline 84 /// error, even if AdvisedDecision were true, otherwise it agrees with 85 /// AdvisedDecision. 86 bool Effect = false; 87 88 /// What the change in size was: size_after - size_before 89 int64_t Reward = 0; 90 }; 91 92 /// Collect data we may use for training a model, and write it as a textual 93 /// Tensorflow SequenceExample 94 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample) 95 /// protobuf (https://developers.google.com/protocol-buffers). 96 /// Because this is a protobuf, we cannot just stream the events as they come. 97 /// Internally, TrainingLogger stores data in column-major format, because that 98 /// lines up with how TF SequenceExample represents it. 99 class ModelUnderTrainingRunner; 100 class TrainingLogger final { 101 public: 102 TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR); 103 104 /// Log one inlining event. 105 void logInlineEvent(const InlineEvent &Event, 106 const MLModelRunner &ModelRunner); 107 108 /// Print the stored tensors. 109 void print(); 110 111 private: 112 StringRef LogFileName; 113 const ModelUnderTrainingRunner *const MUTR; 114 std::unique_ptr<Logger> L; 115 std::vector<bool> Effects; 116 /// There's at least one output. We'll set this to a different value if MUTR 117 /// is avaliable. 118 size_t OutputCount = 1; 119 /// Set these 2 clearly OOB, to make sure we set them later. 120 size_t DefaultDecisionPos = std::numeric_limits<size_t>::max(); 121 size_t DecisionPos = std::numeric_limits<size_t>::max(); 122 }; 123 124 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting 125 /// the offline training scenario. Note that training happens outside of the 126 /// compiler, this facility is concerned with producing training data ("logs"). 127 /// This InlineAdvisor can operate in the following modes: 128 /// 129 /// 1) collect logs for the default policy. This is useful for bootstrapping 130 /// training, which will be considerably faster by starting from a reasonable 131 /// policy. 132 /// 133 /// 2) collect logs for the ML policy, using a model from a previous 134 /// training. Potentially, that model uses internally some small random 135 /// perturbation of its weights, to induce exploration (setting this up is the 136 /// responsibility of the training algorithm). The logs would then be used to 137 /// retrain and improve on this model. 138 /// 139 /// 3) use the provided model, with no logging. This is useful for end to end 140 /// validation - the model, in this case, is a release candidate and shouldn't 141 /// have random perturbations. It is a convenience feature: rather than needing 142 /// to take the release candidate model and compile it in 'release' mode, 143 /// validate it, then potentially discard it, it's easier to just pass the model 144 /// to the compiler, albeit compilation would be slower, as a one-off. Once the 145 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in 146 /// release mode. The expectation is that a well-trained model provides a good 147 /// policy over a sufficiently diverse codebase, over many changes (i.e. 148 /// training happens seldom). 149 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { 150 public: 151 DevelopmentModeMLInlineAdvisor( 152 Module &M, ModuleAnalysisManager &MAM, 153 std::unique_ptr<MLModelRunner> ModelRunner, 154 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, 155 std::unique_ptr<TrainingLogger> Logger); 156 157 size_t getTotalSizeEstimate(); 158 159 virtual ~DevelopmentModeMLInlineAdvisor(); 160 void updateNativeSizeEstimate(int64_t Change) { 161 *CurrentNativeSize += Change; 162 } 163 void resetNativeSize(Function *F) { 164 FAM.invalidate<InlineSizeEstimatorAnalysis>(*F); 165 } 166 167 std::unique_ptr<MLInlineAdvice> 168 getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 169 std::unique_ptr<MLInlineAdvice> 170 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 171 172 Optional<size_t> getNativeSizeEstimate(const Function &F) const; 173 174 private: 175 bool isLogging() const { return !!Logger; } 176 177 std::function<bool(CallBase &)> GetDefaultAdvice; 178 const bool IsDoingInference; 179 std::unique_ptr<TrainingLogger> Logger; 180 181 const Optional<int32_t> InitialNativeSize; 182 Optional<int32_t> CurrentNativeSize; 183 }; 184 185 /// A variant of MLInlineAdvice that tracks all non-trivial inlining 186 /// decisions, for training/logging. 187 class LoggingMLInlineAdvice : public MLInlineAdvice { 188 public: 189 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, 190 OptimizationRemarkEmitter &ORE, bool Recommendation, 191 TrainingLogger &Logger, 192 Optional<size_t> CallerSizeEstimateBefore, 193 Optional<size_t> CalleeSizeEstimateBefore, 194 bool DefaultDecision, bool Mandatory = false) 195 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), 196 CallerSizeEstimateBefore(CallerSizeEstimateBefore), 197 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), 198 DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} 199 200 virtual ~LoggingMLInlineAdvice() = default; 201 202 private: 203 DevelopmentModeMLInlineAdvisor *getAdvisor() const { 204 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); 205 } 206 void recordInliningImpl() override { 207 MLInlineAdvice::recordInliningImpl(); 208 getAdvisor()->resetNativeSize(Caller); 209 int Reward = std::numeric_limits<int>::max(); 210 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 211 !getAdvisor()->isForcedToStop()) { 212 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) + 213 *CalleeSizeEstimateBefore; 214 Reward = NativeSizeAfter - 215 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 216 getAdvisor()->updateNativeSizeEstimate(Reward); 217 } 218 log(Reward, /*Success=*/true); 219 } 220 221 void recordInliningWithCalleeDeletedImpl() override { 222 MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); 223 getAdvisor()->resetNativeSize(Caller); 224 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() && 225 !getAdvisor()->isForcedToStop()) { 226 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller); 227 int Reward = NativeSizeAfter - 228 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore); 229 getAdvisor()->updateNativeSizeEstimate(Reward); 230 log(Reward, /*Success=*/true); 231 } 232 } 233 234 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { 235 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); 236 log(NoReward, /*Success=*/false); 237 } 238 239 void recordUnattemptedInliningImpl() override { 240 MLInlineAdvice::recordUnattemptedInliningImpl(); 241 log(NoReward, /*Success=*/false); 242 } 243 244 void log(int64_t Reward, bool Success) { 245 if (Mandatory) 246 return; 247 InlineEvent Event; 248 Event.AdvisedDecision = isInliningRecommended(); 249 Event.DefaultDecision = DefaultDecision; 250 Event.Effect = Success; 251 Event.Reward = Reward; 252 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); 253 } 254 255 static const int64_t NoReward = 0; 256 TrainingLogger &Logger; 257 const Optional<size_t> CallerSizeEstimateBefore; 258 const Optional<size_t> CalleeSizeEstimateBefore; 259 const int64_t DefaultDecision; 260 const int64_t Mandatory; 261 }; 262 263 /// A pseudo model runner. We use it to store feature values when collecting 264 /// logs for the default policy, but never ask it to 'run'. 265 class NoInferenceModelRunner : public MLModelRunner { 266 public: 267 NoInferenceModelRunner(LLVMContext &Ctx) 268 : MLModelRunner(Ctx), Features(NumberOfFeatures) {} 269 void setFeature(FeatureIndex Index, int64_t Value) override { 270 Features[static_cast<int>(Index)] = Value; 271 } 272 273 int64_t getFeature(int Index) const override { return Features[Index]; } 274 bool run() override { 275 llvm_unreachable("We shouldn't call run on this model runner."); 276 } 277 278 private: 279 InlineFeatures Features; 280 }; 281 282 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs 283 /// to dynamically load and evaluate a TF SavedModel 284 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is 285 /// sacrificed for ease of use while training. 286 class ModelUnderTrainingRunner final : public MLModelRunner { 287 public: 288 ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath); 289 290 bool run() override; 291 292 // Disallows copy and assign. 293 ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete; 294 ModelUnderTrainingRunner & 295 operator=(const ModelUnderTrainingRunner &) = delete; 296 297 void setFeature(FeatureIndex Index, int64_t Value) override; 298 int64_t getFeature(int Index) const override; 299 bool isValid() const { return !!Evaluator; } 300 301 const std::vector<LoggedFeatureSpec> &outputLoggedFeatureSpecs() const { 302 return OutputSpecs; 303 } 304 305 const Optional<TFModelEvaluator::EvaluationResult> & 306 lastEvaluationResult() const { 307 return LastEvaluationResult; 308 } 309 310 private: 311 std::unique_ptr<TFModelEvaluator> Evaluator; 312 std::vector<LoggedFeatureSpec> OutputSpecs; 313 Optional<TFModelEvaluator::EvaluationResult> LastEvaluationResult; 314 315 // The training framework needs some additional features. 316 const std::vector<TensorSpec> TrainingOnlyFeatures{ 317 TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}), 318 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}), 319 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}), 320 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})}; 321 }; 322 } // namespace 323 324 TrainingLogger::TrainingLogger(StringRef LogFileName, 325 const ModelUnderTrainingRunner *MUTR) 326 : LogFileName(LogFileName), MUTR(MUTR) { 327 // The first output is the inlining decision. 328 if (MUTR) 329 OutputCount = MUTR->outputLoggedFeatureSpecs().size(); 330 std::vector<LoggedFeatureSpec> FT; 331 332 for (size_t I = 0; I < NumberOfFeatures; ++I) 333 FT.push_back( 334 {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None}); 335 if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1) 336 FT.insert(FT.end(), MUTR->outputLoggedFeatureSpecs().begin() + 1, 337 MUTR->outputLoggedFeatureSpecs().end()); 338 339 DefaultDecisionPos = FT.size(); 340 FT.push_back( 341 {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None}); 342 343 DecisionPos = FT.size(); 344 FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None}); 345 346 L = std::make_unique<Logger>( 347 FT, TensorSpec::createSpec<int64_t>(RewardName, {1}), 348 InlineSizeEstimatorAnalysis::isEvaluatorRequested()); 349 } 350 351 /// Log one inlining event. 352 void TrainingLogger::logInlineEvent(const InlineEvent &Event, 353 const MLModelRunner &ModelRunner) { 354 size_t CurrentFeature = 0; 355 for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) { 356 int64_t F = ModelRunner.getFeature(CurrentFeature); 357 L->logTensorValue(CurrentFeature, &F); 358 } 359 360 for (size_t I = 1; I < OutputCount; ++I) { 361 const auto &Result = *MUTR->lastEvaluationResult(); 362 auto &Spec = MUTR->outputLoggedFeatureSpecs()[I].Spec; 363 const char *RawData = 364 reinterpret_cast<const char *>(Result.getUntypedTensorValue(I)); 365 L->logTensorValue(CurrentFeature, RawData, 366 Spec.getElementCount() * Spec.getElementByteSize()); 367 ++CurrentFeature; 368 } 369 370 assert(CurrentFeature == DefaultDecisionPos); 371 L->logTensorValue(DefaultDecisionPos, &Event.DefaultDecision); 372 L->logTensorValue(DecisionPos, &Event.AdvisedDecision); 373 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 374 L->logReward(Event.Reward); 375 376 // For debugging / later use 377 Effects.push_back(Event.Effect); 378 } 379 380 void TrainingLogger::print() { 381 std::error_code EC; 382 raw_fd_ostream OutFile(LogFileName, EC); 383 L->print(OutFile); 384 } 385 386 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( 387 Module &M, ModuleAnalysisManager &MAM, 388 std::unique_ptr<MLModelRunner> ModelRunner, 389 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, 390 std::unique_ptr<TrainingLogger> Logger) 391 : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), 392 GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference), 393 Logger(std::move(Logger)), 394 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), 395 CurrentNativeSize(InitialNativeSize) { 396 // We cannot have the case of neither inference nor logging. 397 assert(IsDoingInference || isLogging()); 398 } 399 400 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() { 401 if (isLogging()) 402 Logger->print(); 403 } 404 405 Optional<size_t> 406 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { 407 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 408 return None; 409 auto &R = 410 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); 411 if (!R) { 412 F.getParent()->getContext().emitError( 413 "Native size estimator is not present."); 414 return 0; 415 } 416 return *R; 417 } 418 419 std::unique_ptr<MLInlineAdvice> 420 DevelopmentModeMLInlineAdvisor::getMandatoryAdvice( 421 CallBase &CB, OptimizationRemarkEmitter &ORE) { 422 if (!isLogging()) 423 return MLInlineAdvisor::getMandatoryAdvice(CB, ORE); 424 425 return std::make_unique<LoggingMLInlineAdvice>( 426 /*Advisor=*/this, 427 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/true, /*Logger=*/*Logger, 428 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 429 /*CalleeSizeEstimateBefore=*/ 430 getNativeSizeEstimate(*CB.getCalledFunction()), 431 /*DefaultDecision=*/true, /*Mandatory*/ true); 432 } 433 434 std::unique_ptr<MLInlineAdvice> 435 DevelopmentModeMLInlineAdvisor::getAdviceFromModel( 436 CallBase &CB, OptimizationRemarkEmitter &ORE) { 437 if (IsDoingInference && !isLogging()) 438 return MLInlineAdvisor::getAdviceFromModel(CB, ORE); 439 440 bool DefaultAdvice = GetDefaultAdvice(CB); 441 auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice; 442 return std::make_unique<LoggingMLInlineAdvice>( 443 /*Advisor=*/this, 444 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, 445 /*Logger=*/*Logger, 446 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 447 /*CalleeSizeEstimateBefore=*/ 448 getNativeSizeEstimate(*CB.getCalledFunction()), 449 /*DefaultDecision=*/DefaultAdvice); 450 } 451 452 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { 453 if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested()) 454 return 0; 455 size_t Ret = 0; 456 for (auto &F : M) { 457 if (F.isDeclaration()) 458 continue; 459 if (isFunctionDeleted(&F)) 460 continue; 461 Ret += *getNativeSizeEstimate(F); 462 } 463 return Ret; 464 } 465 466 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx, 467 const std::string &ModelPath) 468 : MLModelRunner(Ctx) { 469 std::vector<TensorSpec> InputSpecs; 470 for (size_t I = 0; I < NumberOfFeatures; ++I) 471 InputSpecs.push_back( 472 TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1})); 473 InputSpecs.insert(InputSpecs.end(), TrainingOnlyFeatures.begin(), 474 TrainingOnlyFeatures.end()); 475 SmallVector<char, 128> OutputSpecsPath; 476 StringRef OutputSpecPath = TFOutputSpecOverride; 477 if (OutputSpecPath.empty()) { 478 llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json"); 479 OutputSpecPath = {OutputSpecsPath.data(), OutputSpecsPath.size()}; 480 } 481 482 if (!loadOutputSpecs(Ctx, OutputSpecPath, DecisionName, OutputSpecs)) 483 return; 484 485 Evaluator = std::make_unique<TFModelEvaluator>( 486 ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; }, 487 OutputSpecs.size()); 488 if (!Evaluator || !Evaluator->isValid()) { 489 Ctx.emitError("Failed to create inliner saved model evaluator"); 490 Evaluator.reset(); 491 return; 492 } 493 } 494 495 bool ModelUnderTrainingRunner::run() { 496 LastEvaluationResult = Evaluator->evaluate(); 497 if (!LastEvaluationResult.hasValue()) { 498 Ctx.emitError("Error evaluating model."); 499 return false; 500 } 501 int64_t Decision = *LastEvaluationResult->getTensorValue<int64_t>(0); 502 return static_cast<bool>(Decision); 503 } 504 505 int64_t ModelUnderTrainingRunner::getFeature(int Index) const { 506 return *Evaluator->getInput<int64_t>(Index); 507 } 508 509 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) { 510 size_t NumericIndex = static_cast<size_t>(Index); 511 *(Evaluator->getInput<int64_t>(NumericIndex)) = Value; 512 } 513 514 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( 515 Module &M, ModuleAnalysisManager &MAM, 516 std::function<bool(CallBase &)> GetDefaultAdvice) { 517 auto &Ctx = M.getContext(); 518 std::unique_ptr<MLModelRunner> Runner; 519 ModelUnderTrainingRunner *MUTRPtr = nullptr; 520 bool IsDoingInference = false; 521 if (TFModelUnderTrainingPath.empty()) 522 Runner.reset(new NoInferenceModelRunner(Ctx)); 523 else { 524 auto MUTR = std::make_unique<ModelUnderTrainingRunner>( 525 Ctx, TFModelUnderTrainingPath); 526 if (!MUTR || !MUTR->isValid()) { 527 Ctx.emitError("Could not load the policy model from the provided path"); 528 return nullptr; 529 } 530 IsDoingInference = true; 531 MUTRPtr = MUTR.get(); 532 Runner = std::move(MUTR); 533 } 534 std::unique_ptr<TrainingLogger> Logger; 535 if (!TrainingLog.empty()) 536 Logger = std::make_unique<TrainingLogger>(TrainingLog, MUTRPtr); 537 538 return std::make_unique<DevelopmentModeMLInlineAdvisor>( 539 M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference, 540 std::move(Logger)); 541 } 542 #endif // defined(LLVM_HAVE_TF_API) 543