1 //===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements utilities for interfacing with tensorflow C APIs. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/Config/config.h" 14 #if defined(LLVM_HAVE_TF_API) 15 16 #include "llvm/ADT/Twine.h" 17 #include "llvm/Analysis/Utils/TFUtils.h" 18 #include "llvm/Support/CommandLine.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/JSON.h" 21 #include "llvm/Support/ManagedStatic.h" 22 #include "llvm/Support/MemoryBuffer.h" 23 #include "llvm/Support/Path.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 #include "google/protobuf/text_format.h" 27 #include "tensorflow/c/c_api.h" 28 #include "tensorflow/c/c_api_experimental.h" 29 #include "tensorflow/core/example/example.pb.h" 30 #include <cassert> 31 #include <numeric> 32 33 using namespace llvm; 34 35 using google::protobuf::Message; 36 using google::protobuf::TextFormat; 37 38 static cl::opt<bool> 39 ProtobufTextMode("tfutils-text-log", cl::init(false), cl::Hidden, 40 cl::desc("Output textual (human-readable) protobuf.")); 41 42 namespace { 43 44 using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>; 45 using TFSessionOptionsPtr = 46 std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>; 47 using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>; 48 49 struct TFInitializer { 50 TFInitializer() { 51 assert(!IsInitialized && "TFInitialized should be called only once"); 52 int Argc = 1; 53 const char *Name = ""; 54 const char **NamePtr = &Name; 55 TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr)); 56 IsInitialized = true; 57 } 58 bool IsInitialized = false; 59 }; 60 61 llvm::ManagedStatic<TFInitializer> TFLibInitializer; 62 63 bool ensureInitTF() { return TFLibInitializer->IsInitialized; } 64 65 TFGraphPtr createTFGraph() { 66 return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph); 67 } 68 69 TFStatusPtr createTFStatus() { 70 return TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus); 71 } 72 73 TFSessionOptionsPtr createTFSessionOptions() { 74 return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions); 75 } 76 } // namespace 77 78 namespace llvm { 79 class EvaluationResultImpl { 80 public: 81 EvaluationResultImpl(size_t OutputSize) 82 : OutputSize(OutputSize), Output(OutputSize){}; 83 84 ~EvaluationResultImpl() { 85 for (auto *P : Output) 86 if (P) 87 TF_DeleteTensor(P); 88 } 89 90 EvaluationResultImpl(const EvaluationResultImpl &) = delete; 91 EvaluationResultImpl(EvaluationResultImpl &&Other) = delete; 92 std::vector<TF_Tensor *> &getOutput() { return Output; } 93 94 private: 95 const size_t OutputSize; 96 std::vector<TF_Tensor *> Output; 97 }; 98 99 size_t TensorSpec::getElementByteSize() const { 100 return TF_DataTypeSize(static_cast<TF_DataType>(TypeIndex)); 101 } 102 103 TensorSpec::TensorSpec(const std::string &Name, int Port, int TypeIndex, 104 const std::vector<int64_t> &Shape) 105 : Name(Name), Port(Port), TypeIndex(TypeIndex), Shape(Shape), 106 ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1, 107 std::multiplies<int64_t>())) {} 108 109 Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx, 110 const json::Value &Value) { 111 auto EmitError = [&](const llvm::Twine &Message) -> Optional<TensorSpec> { 112 std::string S; 113 llvm::raw_string_ostream OS(S); 114 OS << Value; 115 Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S); 116 return None; 117 }; 118 // FIXME: accept a Path as a parameter, and use it for error reporting. 119 json::Path::Root Root("tensor_spec"); 120 json::ObjectMapper Mapper(Value, Root); 121 if (!Mapper) 122 return EmitError("Value is not a dict"); 123 124 std::string TensorName; 125 int TensorPort = -1; 126 std::string TensorType; 127 std::vector<int64_t> TensorShape; 128 129 if (!Mapper.map<std::string>("name", TensorName)) 130 return EmitError("'name' property not present or not a string"); 131 if (!Mapper.map<std::string>("type", TensorType)) 132 return EmitError("'type' property not present or not a string"); 133 if (!Mapper.map<int>("port", TensorPort)) 134 return EmitError("'port' property not present or not an int"); 135 if (!Mapper.map<std::vector<int64_t>>("shape", TensorShape)) 136 return EmitError("'shape' property not present or not an int array"); 137 138 #define PARSE_TYPE(T, E) \ 139 if (TensorType == #T) \ 140 return TensorSpec::createSpec<T>(TensorName, TensorShape, TensorPort); 141 TFUTILS_SUPPORTED_TYPES(PARSE_TYPE) 142 #undef PARSE_TYPE 143 return None; 144 } 145 146 Optional<std::vector<LoggedFeatureSpec>> 147 loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName, 148 StringRef ModelPath, StringRef SpecFileOverride) { 149 SmallVector<char, 128> OutputSpecsPath; 150 StringRef FileName = SpecFileOverride; 151 if (FileName.empty()) { 152 llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json"); 153 FileName = {OutputSpecsPath.data(), OutputSpecsPath.size()}; 154 } 155 156 auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName); 157 if (!BufferOrError) { 158 Ctx.emitError("Error opening output specs file: " + FileName + " : " + 159 BufferOrError.getError().message()); 160 return None; 161 } 162 auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer()); 163 if (!ParsedJSONValues) { 164 Ctx.emitError("Could not parse specs file: " + FileName); 165 return None; 166 } 167 auto ValuesArray = ParsedJSONValues->getAsArray(); 168 if (!ValuesArray) { 169 Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, " 170 "logging_name:<name>} dictionaries"); 171 return None; 172 } 173 std::vector<LoggedFeatureSpec> Ret; 174 for (const auto &Value : *ValuesArray) 175 if (const auto *Obj = Value.getAsObject()) 176 if (const auto *SpecPart = Obj->get("tensor_spec")) 177 if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart)) 178 if (auto LoggingName = Obj->getString("logging_name")) { 179 if (!TensorSpec->isElementType<int64_t>() && 180 !TensorSpec->isElementType<int32_t>() && 181 !TensorSpec->isElementType<float>()) { 182 Ctx.emitError( 183 "Only int64, int32, and float tensors are supported. " 184 "Found unsupported type for tensor named " + 185 TensorSpec->name()); 186 return None; 187 } 188 Ret.push_back({*TensorSpec, LoggingName->str()}); 189 } 190 191 if (ValuesArray->size() != Ret.size()) { 192 Ctx.emitError( 193 "Unable to parse output spec. It should be a json file containing an " 194 "array of dictionaries. Each dictionary must have a 'tensor_spec' key, " 195 "with a json object describing a TensorSpec; and a 'logging_name' key, " 196 "which is a string to use as name when logging this tensor in the " 197 "training log."); 198 return None; 199 } 200 if (Ret.empty() || *Ret[0].LoggingName != ExpectedDecisionName) { 201 Ctx.emitError("The first output spec must describe the decision tensor, " 202 "and must have the logging_name " + 203 StringRef(ExpectedDecisionName)); 204 return None; 205 } 206 return Ret; 207 } 208 209 class TFModelEvaluatorImpl { 210 public: 211 TFModelEvaluatorImpl(StringRef SavedModelPath, 212 const std::vector<TensorSpec> &InputSpecs, 213 function_ref<TensorSpec(size_t)> GetOutputSpecs, 214 size_t OutputSpecsSize, const char *Tags); 215 216 bool isValid() const { return IsValid; } 217 size_t OutputSize() const { return OutputFeed.size(); } 218 219 void evaluate(TF_Tensor **Output, TF_Status *Status) { 220 TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(), 221 Input.size(), OutputFeed.data(), Output, OutputFeed.size(), 222 nullptr, 0, nullptr, Status); 223 } 224 225 void initInput(size_t Index, TF_DataType Type, 226 const std::vector<int64_t> &Dimensions); 227 const std::vector<TF_Tensor *> &getInput() const { return Input; } 228 229 ~TFModelEvaluatorImpl(); 230 231 private: 232 /// The objects necessary for carrying out an evaluation of the SavedModel. 233 /// They are expensive to set up, and we maintain them accross all the 234 /// evaluations of the model. 235 TF_Session *Session = nullptr; 236 TFGraphPtr Graph; 237 TFSessionOptionsPtr Options; 238 239 /// The specification of the input nodes. 240 std::vector<TF_Output> InputFeed; 241 242 /// The input tensors. They must match by index of the corresponding InputFeed 243 /// value. We set up the tensors once and just mutate theirs scalars before 244 /// each evaluation. The input tensors keep their value after an evaluation. 245 std::vector<TF_Tensor *> Input; 246 247 /// The specification of the output nodes. When evaluating, the tensors in the 248 /// output tensor vector must match by index the corresponding element in the 249 /// OutputFeed. 250 std::vector<TF_Output> OutputFeed; 251 252 void invalidate() { IsValid = false; } 253 254 bool IsValid = true; 255 256 /// Reusable utility for ensuring we can bind the requested Name to a node in 257 /// the SavedModel Graph. 258 bool checkReportAndInvalidate(const TF_Output &Output, 259 const TensorSpec &OutputSpec); 260 }; 261 262 class LoggerDataImpl { 263 const std::vector<LoggedFeatureSpec> LoggedFeatureSpecs; 264 const TensorSpec RewardSpec; 265 266 tensorflow::SequenceExample SE; 267 std::vector<tensorflow::FeatureList *> FeatureLists; 268 tensorflow::FeatureList *Reward = nullptr; 269 270 public: 271 LoggerDataImpl(const std::vector<LoggedFeatureSpec> &LoggedSpecs, 272 const TensorSpec &RewardSpec, bool IncludeReward) 273 : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec) { 274 auto *FL = SE.mutable_feature_lists()->mutable_feature_list(); 275 if (IncludeReward) 276 Reward = &(*FL)[RewardSpec.name()]; 277 // Allocate first the map entries, then capture their address. We will not 278 // mutate the set of features after this (i.e. the pointers won't dangle). 279 for (const auto &LFS : LoggedSpecs) { 280 (*FL)[LFS.LoggingName ? *LFS.LoggingName : LFS.Spec.name()] = {}; 281 } 282 for (const auto &LFS : LoggedSpecs) 283 FeatureLists.push_back( 284 &(*FL)[LFS.LoggingName ? *LFS.LoggingName : LFS.Spec.name()]); 285 } 286 287 void print(raw_ostream &OS) { 288 std::string OutStr; 289 if (ProtobufTextMode) 290 google::protobuf::TextFormat::PrintToString(SE, &OutStr); 291 else 292 OutStr = SE.SerializeAsString(); 293 294 OS << OutStr; 295 } 296 297 char *addNewTensor(size_t FeatureID) { 298 const auto &Spec = LoggedFeatureSpecs[FeatureID].Spec; 299 if (Spec.isElementType<float>()) { 300 auto *RF = FeatureLists[FeatureID] 301 ->add_feature() 302 ->mutable_float_list() 303 ->mutable_value(); 304 RF->Resize(Spec.getElementCount(), 0.0); 305 return reinterpret_cast<char *>(RF->mutable_data()); 306 } else if (Spec.isElementType<int32_t>() || Spec.isElementType<int64_t>()) { 307 auto *RF = FeatureLists[FeatureID] 308 ->add_feature() 309 ->mutable_int64_list() 310 ->mutable_value(); 311 RF->Resize(Spec.getElementCount(), 0); 312 return reinterpret_cast<char *>(RF->mutable_data()); 313 } 314 llvm_unreachable("Unsupported tensor type."); 315 } 316 317 template <typename T> void logReward(T Value) { 318 if (RewardSpec.isElementType<float>()) 319 Reward->add_feature()->mutable_float_list()->add_value(Value); 320 else if (RewardSpec.isElementType<int32_t>() || 321 RewardSpec.isElementType<int64_t>()) 322 Reward->add_feature()->mutable_int64_list()->add_value(Value); 323 else 324 llvm_unreachable("Unsupported tensor type."); 325 } 326 327 size_t getNrRecords() const { 328 return FeatureLists.empty() ? 0 : FeatureLists[0]->feature().size(); 329 } 330 }; 331 } // namespace llvm 332 333 TFModelEvaluatorImpl::TFModelEvaluatorImpl( 334 StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs, 335 function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize, 336 const char *Tags = "serve") 337 : Graph(createTFGraph()), Options(createTFSessionOptions()), 338 InputFeed(InputSpecs.size()), Input(InputSpecs.size()), 339 OutputFeed(OutputSpecsSize) { 340 if (!ensureInitTF()) { 341 errs() << "Tensorflow should have been initialized"; 342 return; 343 } 344 auto Status = createTFStatus(); 345 346 Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr, 347 SavedModelPath.str().c_str(), &Tags, 1, 348 Graph.get(), nullptr, Status.get()); 349 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { 350 errs() << TF_Message(Status.get()); 351 invalidate(); 352 } 353 for (size_t I = 0; I < InputSpecs.size(); ++I) { 354 auto &InputSpec = InputSpecs[I]; 355 InputFeed[I] = { 356 TF_GraphOperationByName(Graph.get(), (InputSpec.name()).c_str()), 357 InputSpec.port()}; 358 if (!checkReportAndInvalidate(InputFeed[I], InputSpec)) 359 return; 360 initInput(I, static_cast<TF_DataType>(InputSpec.typeIndex()), 361 InputSpec.shape()); 362 } 363 for (size_t I = 0; I < OutputSpecsSize; ++I) { 364 auto OutputSpec = GetOutputSpecs(I); 365 OutputFeed[I] = { 366 TF_GraphOperationByName(Graph.get(), (OutputSpec.name()).c_str()), 367 OutputSpec.port()}; 368 if (!checkReportAndInvalidate(OutputFeed[I], OutputSpec)) 369 return; 370 } 371 } 372 373 TFModelEvaluator::TFModelEvaluator( 374 StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs, 375 function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize, 376 const char *Tags) 377 : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, GetOutputSpecs, 378 OutputSpecsSize, Tags)) { 379 if (!Impl->isValid()) 380 Impl.reset(); 381 } 382 383 TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath, 384 const std::vector<TensorSpec> &InputSpecs, 385 const std::vector<TensorSpec> &OutputSpecs, 386 const char *Tags) 387 : TFModelEvaluator( 388 SavedModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I]; }, 389 OutputSpecs.size(), Tags) {} 390 391 TFModelEvaluatorImpl::~TFModelEvaluatorImpl() { 392 for (auto *T : Input) { 393 TF_DeleteTensor(T); 394 } 395 if (Session == nullptr) 396 return; 397 auto Status = createTFStatus(); 398 TF_DeleteSession(Session, Status.get()); 399 Session = nullptr; 400 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) 401 errs() << "Could not delete TF session"; 402 } 403 404 bool TFModelEvaluatorImpl::checkReportAndInvalidate( 405 const TF_Output &Output, const TensorSpec &OutputSpec) { 406 if (Output.oper) 407 return true; 408 errs() << "Could not find TF_Output named: " + OutputSpec.name(); 409 IsValid = false; 410 return IsValid; 411 } 412 413 Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() { 414 if (!isValid()) 415 return None; 416 std::unique_ptr<EvaluationResultImpl> Ret = 417 std::make_unique<EvaluationResultImpl>(Impl->OutputSize()); 418 auto Status = createTFStatus(); 419 Impl->evaluate(Ret->getOutput().data(), Status.get()); 420 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { 421 errs() << TF_Message(Status.get()); 422 Impl.reset(); 423 return None; 424 } 425 return EvaluationResult(std::move(Ret)); 426 } 427 428 void TFModelEvaluatorImpl::initInput(size_t Index, TF_DataType Type, 429 const std::vector<int64_t> &Dimensions) { 430 int64_t TotalSize = TF_DataTypeSize(Type); 431 for (auto &D : Dimensions) 432 TotalSize *= D; 433 434 Input[Index] = 435 TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize); 436 std::memset(TF_TensorData(Input[Index]), 0, TotalSize); 437 } 438 439 void *TFModelEvaluator::getUntypedInput(size_t Index) { 440 return TF_TensorData(Impl->getInput()[Index]); 441 } 442 443 TFModelEvaluator::EvaluationResult::EvaluationResult( 444 std::unique_ptr<EvaluationResultImpl> Impl) 445 : Impl(std::move(Impl)) {} 446 447 TFModelEvaluator::EvaluationResult::EvaluationResult(EvaluationResult &&Other) 448 : Impl(std::move(Other.Impl)) {} 449 450 TFModelEvaluator::EvaluationResult & 451 TFModelEvaluator::EvaluationResult::operator=(EvaluationResult &&Other) { 452 Impl = std::move(Other.Impl); 453 return *this; 454 } 455 456 void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) { 457 return TF_TensorData(Impl->getOutput()[Index]); 458 } 459 460 const void * 461 TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) const { 462 return TF_TensorData(Impl->getOutput()[Index]); 463 } 464 465 #define TFUTILS_GETDATATYPE_IMPL(T, E) \ 466 template <> int TensorSpec::getDataType<T>() { return E; } 467 468 TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL) 469 470 #undef TFUTILS_GETDATATYPE_IMPL 471 472 TFModelEvaluator::EvaluationResult::~EvaluationResult() {} 473 TFModelEvaluator::~TFModelEvaluator() {} 474 475 Logger::Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs, 476 const TensorSpec &RewardSpec, bool IncludeReward) 477 : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec), 478 IncludeReward(IncludeReward), 479 LoggerData(std::make_unique<LoggerDataImpl>(FeatureSpecs, RewardSpec, 480 IncludeReward)) {} 481 482 Logger::~Logger() {} 483 484 #define LOG_REWARD(NAME, TYPE) \ 485 void Logger::log##NAME##Reward(TYPE Value) { \ 486 assert(IncludeReward); \ 487 LoggerData->logReward(Value); \ 488 } 489 490 LOG_REWARD(Float, float) 491 LOG_REWARD(Int32, int32_t) 492 LOG_REWARD(Int64, int64_t) 493 #undef LOG_REWARD 494 495 #define LOG_FINAL_REWARD(NAME, TYPE) \ 496 void Logger::log##NAME##FinalReward(TYPE Value) { \ 497 assert(RewardSpec.isElementType<TYPE>()); \ 498 for (size_t I = 1; I < LoggerData->getNrRecords(); ++I) \ 499 log##NAME##Reward(0); \ 500 log##NAME##Reward(Value); \ 501 } 502 503 LOG_FINAL_REWARD(Float, float) 504 LOG_FINAL_REWARD(Int32, int32_t) 505 LOG_FINAL_REWARD(Int64, int64_t) 506 #undef LOG_FINAL_REWARD 507 508 void Logger::logFloatValue(size_t FeatureID, const float *Value) { 509 assert(FeatureSpecs[FeatureID].Spec.isElementType<float>()); 510 logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value)); 511 } 512 513 void Logger::logInt64Value(size_t FeatureID, const int64_t *Value) { 514 assert(FeatureSpecs[FeatureID].Spec.isElementType<int64_t>()); 515 logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value)); 516 } 517 518 void Logger::logInt32Value(size_t FeatureID, const int32_t *Value) { 519 assert(FeatureSpecs[FeatureID].Spec.isElementType<int32_t>()); 520 logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value)); 521 } 522 523 void Logger::logSpecifiedTensorValue(size_t FeatureID, const char *RawData) { 524 const auto &Spec = FeatureSpecs[FeatureID].Spec; 525 char *Buff = addEntryAndGetFloatOrInt64Buffer(FeatureID); 526 if (Spec.isElementType<int32_t>()) 527 for (size_t I = 0; I < Spec.getElementCount(); ++I) 528 (reinterpret_cast<int64_t *>(Buff))[I] = 529 static_cast<int64_t>((reinterpret_cast<const int32_t *>(RawData))[I]); 530 else if (Spec.isElementType<int64_t>() || Spec.isElementType<float>()) 531 std::memcpy(Buff, RawData, 532 Spec.getElementCount() * Spec.getElementByteSize()); 533 else 534 llvm_unreachable("Unsupported tensor type"); 535 } 536 537 char *Logger::addEntryAndGetFloatOrInt64Buffer(size_t FeatureID) { 538 return reinterpret_cast<char *>(LoggerData->addNewTensor(FeatureID)); 539 } 540 541 void Logger::print(raw_ostream &OS) { LoggerData->print(OS); } 542 #endif // defined(LLVM_HAVE_TF_API) 543