1 //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a model runner using Tensorflow C APIs, allowing the 11 // loading of a model from a command line option. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Config/config.h" 15 #if defined(LLVM_HAVE_TF_API) 16 17 #include "llvm/Analysis/CallGraph.h" 18 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 19 #include "llvm/Analysis/MLInlineAdvisor.h" 20 #include "llvm/Analysis/Utils/TFUtils.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/ManagedStatic.h" 24 25 #include <vector> 26 27 using namespace llvm; 28 29 static cl::opt<std::string> TrainingLog( 30 "training-log", cl::Hidden, 31 cl::desc("Path where the development - mode inlining log is saved.")); 32 33 static cl::opt<std::string> TFModelUnderTrainingPath( 34 "ml-inliner-model-under-training", cl::Hidden, 35 cl::desc("Path to SavedModel from the previous training iteration.")); 36 37 static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix", 38 cl::Hidden, cl::init("action_"), 39 cl::desc("Prefix for feature names.")); 40 41 static cl::opt<std::string> TFDecisionName( 42 "ml-inliner-trained-model-decision-name", cl::Hidden, 43 cl::init("StatefulPartitionedCall"), 44 cl::desc("Name of the graph operation representing the decision.")); 45 46 namespace { 47 /// An InlineEvent, used by TrainingLogger. 48 struct InlineEvent { 49 /// What the default policy's decision would have been. 50 bool DefaultDecision = false; 51 52 /// What we advised. When training off the default policy, this is the same as 53 /// DefaultDecision. 54 bool AdvisedDecision = false; 55 56 /// What actually happened. This would be 'false' in the case of an inline 57 /// error, even if AdvisedDecision were true, otherwise it agrees with 58 /// AdvisedDecision. 59 bool Effect = false; 60 61 /// What the change in size was: size_after - size_before 62 int64_t Reward = 0; 63 }; 64 65 /// Collect data we may use for training a model, and write it as a textual 66 /// Tensorflow SequenceExample 67 /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample) 68 /// protobuf (https://developers.google.com/protocol-buffers). 69 /// Because this is a protobuf, we cannot just stream the events as they come. 70 /// Internally, TrainingLogger stores data in column-major format, because that 71 /// lines up with how TF SequenceExample represents it. 72 class TrainingLogger final { 73 public: 74 TrainingLogger(StringRef LogFileName); 75 76 /// Log one inlining event. 77 void logInlineEvent(const InlineEvent &Event, 78 const MLModelRunner &ModelRunner); 79 80 /// Print the stored tensors. 81 void print(); 82 83 private: 84 /// Write the values of one tensor as a list. 85 template <typename T> 86 void writeTensorValues(raw_fd_ostream &OutFile, const char *TensorData, 87 size_t ElemCount) const { 88 OutFile << "["; 89 const T *TypedData = reinterpret_cast<const T *>(TensorData); 90 for (size_t I = 0; I < ElemCount; ++I) { 91 if (I > 0) 92 OutFile << ", "; 93 OutFile << TypedData[I]; 94 } 95 OutFile << "]"; 96 } 97 98 /// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs. 99 /// The tensors are assumed to be stored contiguously, in row-major format, 100 /// in the TensorData buffer. Each tensor has the shape given by Spec. The 101 /// feature name in the output is either the provided LoggingName, if 102 /// specified, otherwise it's the name of the tensor (as given by Spec). 103 template <typename T> 104 void 105 writeTensorsAsFeatureLists(raw_fd_ostream &OutFile, const TensorSpec &Spec, 106 const T *TensorData, size_t TensorCount, 107 Optional<StringRef> LoggingName = None) const { 108 writeRawTensorsAsFeatureLists(OutFile, Spec, 109 reinterpret_cast<const char *>(TensorData), 110 TensorCount, LoggingName); 111 } 112 113 /// Untyped implementation of the API above. 114 void 115 writeRawTensorsAsFeatureLists(raw_fd_ostream &OutFile, const TensorSpec &Spec, 116 const char *TensorData, size_t TensorCount, 117 Optional<StringRef> LoggingName = None) const { 118 const char *FieldName = "<invalid>"; 119 std::function<void(const char *)> ValueWriter; 120 // The 'Feature' protobuf only has 3 possible fields: float_list, 121 // int64_list, or bytes_list, so we capture int32 values as int64. We don't 122 // support any other types. 123 if (Spec.isElementType<int64_t>()) { 124 FieldName = "int64_list"; 125 ValueWriter = [&](const char *Data) { 126 writeTensorValues<int64_t>(OutFile, Data, Spec.getElementCount()); 127 }; 128 } else if (Spec.isElementType<int32_t>()) { 129 FieldName = "int64_list"; 130 ValueWriter = [&](const char *Data) { 131 writeTensorValues<int32_t>(OutFile, Data, Spec.getElementCount()); 132 }; 133 134 } else if (Spec.isElementType<float>()) { 135 FieldName = "float_list"; 136 ValueWriter = [&](const char *Data) { 137 writeTensorValues<float>(OutFile, Data, Spec.getElementCount()); 138 }; 139 140 } else 141 llvm_unreachable("Unsupported tensor type."); 142 143 OutFile << " feature_list: {\n"; 144 OutFile << " key: " 145 << "\"" << (LoggingName ? *LoggingName : Spec.name()) << "\" "; 146 OutFile << "value: {\n"; 147 size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize(); 148 for (const char *P = TensorData, 149 *E = TensorData + TensorByteSize * TensorCount; 150 P < E; P += TensorByteSize) { 151 OutFile << " feature: { " << FieldName << ": { value: "; 152 ValueWriter(P); 153 OutFile << " } }\n"; 154 } 155 OutFile << " }\n"; 156 OutFile << " }\n"; 157 } 158 159 StringRef LogFileName; 160 std::vector<InlineFeatures> Features; 161 std::vector<int64_t> DefaultDecisions; 162 std::vector<int64_t> Decisions; 163 std::vector<bool> Effects; 164 std::vector<int64_t> Rewards; 165 }; 166 167 /// An extension of the MLInlineAdvisor for the 'development' mode, targeting 168 /// the offline training scenario. Note that training happens outside of the 169 /// compiler, this facility is concerned with producing training data ("logs"). 170 /// This InlineAdvisor can operate in the following modes: 171 /// 172 /// 1) collect logs for the default policy. This is useful for bootstrapping 173 /// training, which will be considerably faster by starting from a reasonable 174 /// policy. 175 /// 176 /// 2) collect logs for the ML policy, using a model from a previous 177 /// training. Potentially, that model uses internally some small random 178 /// perturbation of its weights, to induce exploration (setting this up is the 179 /// responsibility of the training algorithm). The logs would then be used to 180 /// retrain and improve on this model. 181 /// 182 /// 3) use the provided model, with no logging. This is useful for end to end 183 /// validation - the model, in this case, is a release candidate and shouldn't 184 /// have random perturbations. It is a convenience feature: rather than needing 185 /// to take the release candidate model and compile it in 'release' mode, 186 /// validate it, then potentially discard it, it's easier to just pass the model 187 /// to the compiler, albeit compilation would be slower, as a one-off. Once the 188 /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in 189 /// release mode. The expectation is that a well-trained model provides a good 190 /// policy over a sufficiently diverse codebase, over many changes (i.e. 191 /// training happens seldom). 192 class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor { 193 public: 194 DevelopmentModeMLInlineAdvisor( 195 Module &M, ModuleAnalysisManager &MAM, 196 std::unique_ptr<MLModelRunner> ModelRunner, 197 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, 198 std::unique_ptr<TrainingLogger> Logger); 199 200 size_t getTotalSizeEstimate(); 201 202 virtual ~DevelopmentModeMLInlineAdvisor(); 203 void updateNativeSizeEstimate(int64_t Change) { CurrentNativeSize += Change; } 204 void resetNativeSize(Function *F) { 205 FAM.invalidate<InlineSizeEstimatorAnalysis>(*F); 206 } 207 208 std::unique_ptr<MLInlineAdvice> 209 getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 210 std::unique_ptr<MLInlineAdvice> 211 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override; 212 213 size_t getNativeSizeEstimate(const Function &F) const; 214 215 private: 216 bool isLogging() const { return !!Logger; } 217 218 std::function<bool(CallBase &)> GetDefaultAdvice; 219 const bool IsDoingInference; 220 std::unique_ptr<TrainingLogger> Logger; 221 222 const int32_t InitialNativeSize; 223 int32_t CurrentNativeSize = 0; 224 }; 225 226 /// A variant of MLInlineAdvice that tracks all non-trivial inlining 227 /// decisions, for training/logging. 228 class LoggingMLInlineAdvice : public MLInlineAdvice { 229 public: 230 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB, 231 OptimizationRemarkEmitter &ORE, bool Recommendation, 232 TrainingLogger &Logger, size_t CallerSizeEstimateBefore, 233 size_t CalleeSizeEstimateBefore, bool DefaultDecision, 234 bool Mandatory = false) 235 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger), 236 CallerSizeEstimateBefore(CallerSizeEstimateBefore), 237 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore), 238 DefaultDecision(DefaultDecision), Mandatory(Mandatory) {} 239 240 virtual ~LoggingMLInlineAdvice() = default; 241 242 private: 243 DevelopmentModeMLInlineAdvisor *getAdvisor() const { 244 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor); 245 } 246 void recordInliningImpl() override { 247 MLInlineAdvice::recordInliningImpl(); 248 getAdvisor()->resetNativeSize(Caller); 249 int Reward = std::numeric_limits<int>::max(); 250 if (!getAdvisor()->isForcedToStop()) { 251 int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller) + 252 CalleeSizeEstimateBefore; 253 Reward = NativeSizeAfter - 254 (CallerSizeEstimateBefore + CalleeSizeEstimateBefore); 255 getAdvisor()->updateNativeSizeEstimate(Reward); 256 } 257 log(Reward, /*Success=*/true); 258 } 259 260 void recordInliningWithCalleeDeletedImpl() override { 261 MLInlineAdvice::recordInliningWithCalleeDeletedImpl(); 262 getAdvisor()->resetNativeSize(Caller); 263 if (!getAdvisor()->isForcedToStop()) { 264 int NativeSizeAfter = getAdvisor()->getNativeSizeEstimate(*Caller); 265 int Reward = NativeSizeAfter - 266 (CallerSizeEstimateBefore + CalleeSizeEstimateBefore); 267 getAdvisor()->updateNativeSizeEstimate(Reward); 268 log(Reward, /*Success=*/true); 269 } 270 } 271 272 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override { 273 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result); 274 log(NoReward, /*Success=*/false); 275 } 276 277 void recordUnattemptedInliningImpl() override { 278 MLInlineAdvice::recordUnattemptedInliningImpl(); 279 log(NoReward, /*Success=*/false); 280 } 281 282 void log(int64_t Reward, bool Success) { 283 if (Mandatory) 284 return; 285 InlineEvent Event; 286 Event.AdvisedDecision = isInliningRecommended(); 287 Event.DefaultDecision = DefaultDecision; 288 Event.Effect = Success; 289 Event.Reward = Reward; 290 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner()); 291 } 292 293 static const int64_t NoReward = 0; 294 TrainingLogger &Logger; 295 const size_t CallerSizeEstimateBefore; 296 const size_t CalleeSizeEstimateBefore; 297 const bool DefaultDecision; 298 const bool Mandatory; 299 }; 300 301 /// A pseudo model runner. We use it to store feature values when collecting 302 /// logs for the default policy, but never ask it to 'run'. 303 class NoInferenceModelRunner : public MLModelRunner { 304 public: 305 NoInferenceModelRunner(LLVMContext &Ctx) 306 : MLModelRunner(Ctx), Features(NumberOfFeatures) {} 307 void setFeature(FeatureIndex Index, int64_t Value) override { 308 Features[static_cast<int>(Index)] = Value; 309 } 310 311 int64_t getFeature(int Index) const override { return Features[Index]; } 312 bool run() override { 313 llvm_unreachable("We shouldn't call run on this model runner."); 314 } 315 316 private: 317 InlineFeatures Features; 318 }; 319 320 /// ModelUnderTrainingRunner - training mode implementation. It uses TF C APIs 321 /// to dynamically load and evaluate a TF SavedModel 322 /// (https://www.tensorflow.org/guide/saved_model). Runtime performance is 323 /// sacrificed for ease of use while training. 324 class ModelUnderTrainingRunner final : public MLModelRunner { 325 public: 326 ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath); 327 328 bool run() override; 329 330 // Disallows copy and assign. 331 ModelUnderTrainingRunner(const ModelUnderTrainingRunner &) = delete; 332 ModelUnderTrainingRunner & 333 operator=(const ModelUnderTrainingRunner &) = delete; 334 335 void setFeature(FeatureIndex Index, int64_t Value) override; 336 int64_t getFeature(int Index) const override; 337 bool isValid() const { return !!Evaluator; } 338 339 private: 340 std::unique_ptr<TFModelEvaluator> Evaluator; 341 342 // The training framework needs some additional features. 343 const std::vector<TensorSpec> TrainingOnlyFeatures{ 344 TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}), 345 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}), 346 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}), 347 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})}; 348 }; 349 } // namespace 350 351 TrainingLogger::TrainingLogger(StringRef LogFileName) 352 : LogFileName(LogFileName) { 353 for (size_t I = 0; I < NumberOfFeatures; ++I) 354 Features.push_back(InlineFeatures()); 355 } 356 357 /// Log one inlining event. 358 void TrainingLogger::logInlineEvent(const InlineEvent &Event, 359 const MLModelRunner &ModelRunner) { 360 for (size_t I = 0; I < NumberOfFeatures; ++I) 361 Features[I].push_back(ModelRunner.getFeature(I)); 362 363 Decisions.push_back(Event.AdvisedDecision); 364 Effects.push_back(Event.Effect); 365 Rewards.push_back(Event.Reward); 366 DefaultDecisions.push_back(Event.DefaultDecision); 367 } 368 369 void TrainingLogger::print() { 370 std::error_code EC; 371 raw_fd_ostream OutFile(LogFileName, EC); 372 size_t NumberOfRecords = Decisions.size(); 373 if (NumberOfRecords == 0) 374 return; 375 376 OutFile << "feature_lists: {\n"; 377 for (size_t I = 0; I < Features.size(); ++I) 378 writeTensorsAsFeatureLists( 379 OutFile, TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), 380 Features[I].data(), NumberOfRecords); 381 382 writeTensorsAsFeatureLists( 383 OutFile, TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), 384 DefaultDecisions.data(), NumberOfRecords); 385 386 writeTensorsAsFeatureLists(OutFile, 387 TensorSpec::createSpec<int64_t>(DecisionName, {1}), 388 Decisions.data(), NumberOfRecords); 389 writeTensorsAsFeatureLists(OutFile, 390 TensorSpec::createSpec<int64_t>(RewardName, {1}), 391 Rewards.data(), NumberOfRecords); 392 393 OutFile << "}\n"; 394 } 395 396 DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( 397 Module &M, ModuleAnalysisManager &MAM, 398 std::unique_ptr<MLModelRunner> ModelRunner, 399 std::function<bool(CallBase &)> GetDefaultAdvice, bool IsDoingInference, 400 std::unique_ptr<TrainingLogger> Logger) 401 : MLInlineAdvisor(M, MAM, std::move(ModelRunner)), 402 GetDefaultAdvice(GetDefaultAdvice), IsDoingInference(IsDoingInference), 403 Logger(std::move(Logger)), 404 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0), 405 CurrentNativeSize(InitialNativeSize) { 406 // We cannot have the case of neither inference nor logging. 407 assert(IsDoingInference || isLogging()); 408 } 409 410 DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() { 411 if (isLogging()) 412 Logger->print(); 413 } 414 415 size_t 416 DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const { 417 auto &R = 418 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F)); 419 if (!R) { 420 F.getParent()->getContext().emitError( 421 "Native size estimator is not present."); 422 return 0; 423 } 424 return *R; 425 } 426 427 std::unique_ptr<MLInlineAdvice> 428 DevelopmentModeMLInlineAdvisor::getMandatoryAdvice( 429 CallBase &CB, OptimizationRemarkEmitter &ORE) { 430 if (!isLogging()) 431 return MLInlineAdvisor::getMandatoryAdvice(CB, ORE); 432 return std::make_unique<LoggingMLInlineAdvice>( 433 /*Advisor=*/this, 434 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/true, /*Logger=*/*Logger, 435 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 436 /*CalleeSizeEstimateBefore=*/ 437 getNativeSizeEstimate(*CB.getCalledFunction()), 438 /*DefaultDecision=*/true, /*Mandatory*/ true); 439 } 440 441 std::unique_ptr<MLInlineAdvice> 442 DevelopmentModeMLInlineAdvisor::getAdviceFromModel( 443 CallBase &CB, OptimizationRemarkEmitter &ORE) { 444 if (IsDoingInference && !isLogging()) 445 return MLInlineAdvisor::getAdviceFromModel(CB, ORE); 446 447 bool DefaultAdvice = GetDefaultAdvice(CB); 448 auto Recommendation = IsDoingInference ? ModelRunner->run() : DefaultAdvice; 449 return std::make_unique<LoggingMLInlineAdvice>( 450 /*Advisor=*/this, 451 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation, 452 /*Logger=*/*Logger, 453 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()), 454 /*CalleeSizeEstimateBefore=*/ 455 getNativeSizeEstimate(*CB.getCalledFunction()), 456 /*DefaultDecision=*/DefaultAdvice); 457 } 458 459 size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() { 460 size_t Ret = 0; 461 for (auto &F : M) { 462 if (F.isDeclaration()) 463 continue; 464 if (isFunctionDeleted(&F)) 465 continue; 466 Ret += getNativeSizeEstimate(F); 467 } 468 return Ret; 469 } 470 471 ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx, 472 const std::string &ModelPath) 473 : MLModelRunner(Ctx) { 474 std::vector<TensorSpec> InputSpecs; 475 std::vector<TensorSpec> OutputSpecs; 476 for (size_t I = 0; I < NumberOfFeatures; ++I) 477 InputSpecs.push_back( 478 TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1})); 479 InputSpecs.insert(InputSpecs.end(), TrainingOnlyFeatures.begin(), 480 TrainingOnlyFeatures.end()); 481 OutputSpecs.push_back(TensorSpec::createSpec<int64_t>(TFDecisionName, {1})); 482 483 Evaluator = 484 std::make_unique<TFModelEvaluator>(ModelPath, InputSpecs, OutputSpecs); 485 if (!Evaluator || !Evaluator->isValid()) { 486 Ctx.emitError("Failed to create inliner saved model evaluator"); 487 Evaluator.reset(); 488 return; 489 } 490 } 491 492 bool ModelUnderTrainingRunner::run() { 493 auto ER = Evaluator->evaluate(); 494 if (!ER.hasValue()) { 495 Ctx.emitError("Error evaluating model."); 496 return false; 497 } 498 int64_t Decision = *ER->getTensorValue<int64_t>(0); 499 return static_cast<bool>(Decision); 500 } 501 502 int64_t ModelUnderTrainingRunner::getFeature(int Index) const { 503 return *Evaluator->getInput<int64_t>(Index); 504 } 505 506 void ModelUnderTrainingRunner::setFeature(FeatureIndex Index, int64_t Value) { 507 size_t NumericIndex = static_cast<size_t>(Index); 508 *(Evaluator->getInput<int64_t>(NumericIndex)) = Value; 509 } 510 511 std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor( 512 Module &M, ModuleAnalysisManager &MAM, 513 std::function<bool(CallBase &)> GetDefaultAdvice) { 514 auto &Ctx = M.getContext(); 515 if (TrainingLog.empty() != 516 !InlineSizeEstimatorAnalysis::isEvaluatorRequested()) { 517 Ctx.emitError("For development mode, if training logs are requested, then " 518 "a size estimator must be available; either that, or neither " 519 "are specified."); 520 return nullptr; 521 } 522 523 std::unique_ptr<MLModelRunner> Runner; 524 525 bool IsDoingInference = false; 526 if (TFModelUnderTrainingPath.empty()) 527 Runner.reset(new NoInferenceModelRunner(Ctx)); 528 else { 529 Runner = std::make_unique<ModelUnderTrainingRunner>( 530 Ctx, TFModelUnderTrainingPath); 531 if (!Runner) { 532 Ctx.emitError("Could not load the policy model from the provided path"); 533 return nullptr; 534 } 535 IsDoingInference = true; 536 } 537 std::unique_ptr<TrainingLogger> Logger; 538 if (!TrainingLog.empty()) 539 Logger = std::make_unique<TrainingLogger>(TrainingLog); 540 541 return std::make_unique<DevelopmentModeMLInlineAdvisor>( 542 M, MAM, std::move(Runner), GetDefaultAdvice, IsDoingInference, 543 std::move(Logger)); 544 } 545 #endif // defined(LLVM_HAVE_TF_API) 546