1 //===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements utilities for interfacing with tensorflow C APIs. 10 // 11 //===----------------------------------------------------------------------===// 12 #include "llvm/Config/config.h" 13 #if defined(LLVM_HAVE_TF_API) 14 15 #include "llvm/ADT/Twine.h" 16 #include "llvm/Analysis/Utils/TFUtils.h" 17 #include "llvm/Support/Base64.h" 18 #include "llvm/Support/CommandLine.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/JSON.h" 21 #include "llvm/Support/ManagedStatic.h" 22 #include "llvm/Support/MemoryBuffer.h" 23 #include "llvm/Support/Path.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 #include "google/protobuf/struct.pb.h" 27 #include "google/protobuf/text_format.h" 28 #include "tensorflow/c/c_api.h" 29 #include "tensorflow/c/c_api_experimental.h" 30 #include "tensorflow/core/example/example.pb.h" 31 #include <cassert> 32 #include <numeric> 33 34 using namespace llvm; 35 36 using google::protobuf::Message; 37 using google::protobuf::TextFormat; 38 39 static cl::opt<bool> 40 ProtobufTextMode("tfutils-text-log", cl::init(false), cl::Hidden, 41 cl::desc("Output textual (human-readable) protobuf.")); 42 43 namespace { 44 45 using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>; 46 using TFSessionOptionsPtr = 47 std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>; 48 using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>; 49 50 struct TFInitializer { 51 TFInitializer() { 52 assert(!IsInitialized && "TFInitialized should be called only once"); 53 int Argc = 1; 54 const char *Name = ""; 55 const char **NamePtr = &Name; 56 TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr)); 57 IsInitialized = true; 58 } 59 bool IsInitialized = false; 60 }; 61 62 llvm::ManagedStatic<TFInitializer> TFLibInitializer; 63 64 bool ensureInitTF() { return TFLibInitializer->IsInitialized; } 65 66 TFGraphPtr createTFGraph() { 67 return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph); 68 } 69 70 TFStatusPtr createTFStatus() { 71 return TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus); 72 } 73 74 TFSessionOptionsPtr createTFSessionOptions() { 75 return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions); 76 } 77 78 void serialize(const Message &SE, std::string *OutStr) { 79 if (ProtobufTextMode) { 80 TextFormat::PrintToString(SE, OutStr); 81 } else { 82 *OutStr = SE.SerializeAsString(); 83 } 84 } 85 86 int getTFTypeIndex(TensorType TType) { 87 switch (TType) { 88 case TensorType::Double: 89 return TF_DOUBLE; 90 case TensorType::Float: 91 return TF_FLOAT; 92 case TensorType::Int8: 93 return TF_INT8; 94 case TensorType::UInt8: 95 return TF_UINT8; 96 case TensorType::Int16: 97 return TF_INT16; 98 case TensorType::UInt16: 99 return TF_UINT16; 100 case TensorType::Int32: 101 return TF_INT32; 102 case TensorType::UInt32: 103 return TF_UINT32; 104 case TensorType::Int64: 105 return TF_INT64; 106 case TensorType::UInt64: 107 return TF_UINT64; 108 case TensorType::Invalid: 109 llvm_unreachable("Unknown tensor type"); 110 } 111 } 112 } // namespace 113 114 namespace llvm { 115 class EvaluationResultImpl { 116 public: 117 EvaluationResultImpl(size_t OutputSize) 118 : OutputSize(OutputSize), Output(OutputSize){}; 119 120 ~EvaluationResultImpl() { 121 for (auto *P : Output) 122 if (P) 123 TF_DeleteTensor(P); 124 } 125 126 EvaluationResultImpl(const EvaluationResultImpl &) = delete; 127 EvaluationResultImpl(EvaluationResultImpl &&Other) = delete; 128 std::vector<TF_Tensor *> &getOutput() { return Output; } 129 130 private: 131 const size_t OutputSize; 132 std::vector<TF_Tensor *> Output; 133 }; 134 135 class TFModelEvaluatorImpl { 136 public: 137 TFModelEvaluatorImpl(StringRef SavedModelPath, 138 const std::vector<TensorSpec> &InputSpecs, 139 function_ref<TensorSpec(size_t)> GetOutputSpecs, 140 size_t OutputSpecsSize, const char *Tags); 141 142 bool isValid() const { return IsValid; } 143 size_t OutputSize() const { return OutputFeed.size(); } 144 145 void evaluate(TF_Tensor **Output, TF_Status *Status) { 146 TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(), 147 Input.size(), OutputFeed.data(), Output, OutputFeed.size(), 148 nullptr, 0, nullptr, Status); 149 } 150 151 void initInput(size_t Index, TF_DataType Type, 152 const std::vector<int64_t> &Dimensions); 153 const std::vector<TF_Tensor *> &getInput() const { return Input; } 154 155 ~TFModelEvaluatorImpl(); 156 157 private: 158 /// The objects necessary for carrying out an evaluation of the SavedModel. 159 /// They are expensive to set up, and we maintain them accross all the 160 /// evaluations of the model. 161 TF_Session *Session = nullptr; 162 TFGraphPtr Graph; 163 TFSessionOptionsPtr Options; 164 165 /// The specification of the input nodes. 166 std::vector<TF_Output> InputFeed; 167 168 /// The input tensors. They must match by index of the corresponding InputFeed 169 /// value. We set up the tensors once and just mutate theirs scalars before 170 /// each evaluation. The input tensors keep their value after an evaluation. 171 std::vector<TF_Tensor *> Input; 172 173 /// The specification of the output nodes. When evaluating, the tensors in the 174 /// output tensor vector must match by index the corresponding element in the 175 /// OutputFeed. 176 std::vector<TF_Output> OutputFeed; 177 178 void invalidate() { IsValid = false; } 179 180 bool IsValid = true; 181 182 /// Reusable utility for ensuring we can bind the requested Name to a node in 183 /// the SavedModel Graph. 184 bool checkReportAndInvalidate(const TF_Output &Output, 185 const TensorSpec &OutputSpec); 186 }; 187 188 class LoggerDataImpl { 189 const std::vector<LoggedFeatureSpec> LoggedFeatureSpecs; 190 const TensorSpec RewardSpec; 191 const bool IncludeReward; 192 193 std::vector<tensorflow::FeatureList> FeatureLists; 194 tensorflow::FeatureList Reward; 195 196 bool isSelfConsistent(const tensorflow::SequenceExample &SE, 197 size_t NrRecords) const { 198 bool Ret = true; 199 for (const auto &TSpecs : LoggedFeatureSpecs) { 200 const auto &Name = TSpecs.getLoggingName(); 201 const auto &FL = SE.feature_lists().feature_list().at(Name).feature(); 202 if (NrRecords != static_cast<size_t>(FL.size())) { 203 dbgs() << "[TF-UTILS]: " << Name << " has missing records. Expected " 204 << NrRecords << " got " << FL.size() << "\n"; 205 Ret = false; 206 } 207 } 208 if (IncludeReward && static_cast<size_t>(SE.feature_lists() 209 .feature_list() 210 .at(RewardSpec.name()) 211 .feature() 212 .size()) != NrRecords) { 213 dbgs() << "[TF-UTILS]: reward is missing records.\n"; 214 Ret = false; 215 } 216 return Ret; 217 } 218 219 void transferLog(tensorflow::SequenceExample &SE) { 220 auto *FL = SE.mutable_feature_lists()->mutable_feature_list(); 221 if (IncludeReward) 222 (*FL)[RewardSpec.name()] = std::move(Reward); 223 assert(FeatureLists.size() == LoggedFeatureSpecs.size()); 224 for (size_t I = 0; I < FeatureLists.size(); ++I) { 225 const auto &LFS = LoggedFeatureSpecs[I]; 226 (*FL)[LFS.getLoggingName()] = std::move(FeatureLists[I]); 227 } 228 } 229 230 public: 231 LoggerDataImpl(const std::vector<LoggedFeatureSpec> &LoggedSpecs, 232 const TensorSpec &RewardSpec, bool IncludeReward) 233 : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec), 234 IncludeReward(IncludeReward), FeatureLists(LoggedFeatureSpecs.size()) {} 235 236 // flush the logged info to a stream and clear the log contents. 237 void flush(std::string *Str) { 238 size_t NrRecords = getNrRecords(); 239 (void)NrRecords; 240 tensorflow::SequenceExample SE; 241 transferLog(SE); 242 assert(isSelfConsistent(SE, NrRecords)); 243 serialize(SE, Str); 244 } 245 246 char *addNewTensor(size_t FeatureID) { 247 const auto &Spec = LoggedFeatureSpecs[FeatureID].Spec; 248 if (Spec.isElementType<float>()) { 249 auto *RF = FeatureLists[FeatureID] 250 .add_feature() 251 ->mutable_float_list() 252 ->mutable_value(); 253 RF->Resize(Spec.getElementCount(), 0.0); 254 return reinterpret_cast<char *>(RF->mutable_data()); 255 } else if (Spec.isElementType<int32_t>() || Spec.isElementType<int64_t>()) { 256 auto *RF = FeatureLists[FeatureID] 257 .add_feature() 258 ->mutable_int64_list() 259 ->mutable_value(); 260 RF->Resize(Spec.getElementCount(), 0); 261 return reinterpret_cast<char *>(RF->mutable_data()); 262 } 263 llvm_unreachable("Unsupported tensor type."); 264 } 265 266 template <typename T> void logReward(T Value) { 267 assert(IncludeReward); 268 if (RewardSpec.isElementType<float>()) 269 Reward.add_feature()->mutable_float_list()->add_value(Value); 270 else if (RewardSpec.isElementType<int32_t>() || 271 RewardSpec.isElementType<int64_t>()) 272 Reward.add_feature()->mutable_int64_list()->add_value(Value); 273 else 274 llvm_unreachable("Unsupported tensor type."); 275 } 276 277 size_t getNrRecords() const { 278 return FeatureLists.empty() ? 0 : FeatureLists[0].feature().size(); 279 } 280 }; 281 } // namespace llvm 282 283 TFModelEvaluatorImpl::TFModelEvaluatorImpl( 284 StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs, 285 function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize, 286 const char *Tags = "serve") 287 : Graph(createTFGraph()), Options(createTFSessionOptions()), 288 InputFeed(InputSpecs.size()), Input(InputSpecs.size()), 289 OutputFeed(OutputSpecsSize) { 290 if (!ensureInitTF()) { 291 errs() << "Tensorflow should have been initialized"; 292 return; 293 } 294 auto Status = createTFStatus(); 295 296 Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr, 297 SavedModelPath.str().c_str(), &Tags, 1, 298 Graph.get(), nullptr, Status.get()); 299 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { 300 errs() << TF_Message(Status.get()); 301 invalidate(); 302 } 303 size_t NrSupported = 0; 304 for (size_t I = 0; I < InputSpecs.size(); ++I) { 305 auto &InputSpec = InputSpecs[I]; 306 InputFeed[I] = { 307 TF_GraphOperationByName(Graph.get(), (InputSpec.name()).c_str()), 308 InputSpec.port()}; 309 if (!InputFeed[I].oper) { 310 continue; 311 } 312 if (NrSupported++ != I) { 313 errs() 314 << "Unsupported features must be placed at the end of the InputSpecs"; 315 invalidate(); 316 return; 317 } 318 if (!checkReportAndInvalidate(InputFeed[I], InputSpec)) 319 return; 320 initInput(I, static_cast<TF_DataType>(getTFTypeIndex(InputSpec.type())), 321 InputSpec.shape()); 322 } 323 InputFeed.resize(NrSupported); 324 Input.resize(NrSupported); 325 326 for (size_t I = 0; I < OutputSpecsSize; ++I) { 327 auto OutputSpec = GetOutputSpecs(I); 328 OutputFeed[I] = { 329 TF_GraphOperationByName(Graph.get(), (OutputSpec.name()).c_str()), 330 OutputSpec.port()}; 331 if (!checkReportAndInvalidate(OutputFeed[I], OutputSpec)) 332 return; 333 } 334 } 335 336 TFModelEvaluator::TFModelEvaluator( 337 StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs, 338 function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize, 339 const char *Tags) 340 : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, GetOutputSpecs, 341 OutputSpecsSize, Tags)) { 342 if (!Impl->isValid()) 343 Impl.reset(); 344 } 345 346 TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath, 347 const std::vector<TensorSpec> &InputSpecs, 348 const std::vector<TensorSpec> &OutputSpecs, 349 const char *Tags) 350 : TFModelEvaluator( 351 SavedModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I]; }, 352 OutputSpecs.size(), Tags) {} 353 354 TFModelEvaluatorImpl::~TFModelEvaluatorImpl() { 355 for (auto *T : Input) { 356 TF_DeleteTensor(T); 357 } 358 if (Session == nullptr) 359 return; 360 auto Status = createTFStatus(); 361 TF_DeleteSession(Session, Status.get()); 362 Session = nullptr; 363 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) 364 errs() << "Could not delete TF session"; 365 } 366 367 bool TFModelEvaluatorImpl::checkReportAndInvalidate( 368 const TF_Output &Output, const TensorSpec &OutputSpec) { 369 if (Output.oper) 370 return true; 371 errs() << "Could not find TF_Output named: " + OutputSpec.name(); 372 IsValid = false; 373 return IsValid; 374 } 375 376 Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() { 377 if (!isValid()) 378 return None; 379 std::unique_ptr<EvaluationResultImpl> Ret = 380 std::make_unique<EvaluationResultImpl>(Impl->OutputSize()); 381 auto Status = createTFStatus(); 382 Impl->evaluate(Ret->getOutput().data(), Status.get()); 383 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { 384 errs() << TF_Message(Status.get()); 385 Impl.reset(); 386 return None; 387 } 388 return EvaluationResult(std::move(Ret)); 389 } 390 391 void TFModelEvaluatorImpl::initInput(size_t Index, TF_DataType Type, 392 const std::vector<int64_t> &Dimensions) { 393 int64_t TotalSize = TF_DataTypeSize(Type); 394 for (auto &D : Dimensions) 395 TotalSize *= D; 396 397 Input[Index] = 398 TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize); 399 std::memset(TF_TensorData(Input[Index]), 0, TotalSize); 400 } 401 402 void *TFModelEvaluator::getUntypedInput(size_t Index) { 403 if (Index < Impl->getInput().size()) 404 return TF_TensorData(Impl->getInput()[Index]); 405 return nullptr; 406 } 407 408 TFModelEvaluator::EvaluationResult::EvaluationResult( 409 std::unique_ptr<EvaluationResultImpl> Impl) 410 : Impl(std::move(Impl)) {} 411 412 TFModelEvaluator::EvaluationResult::EvaluationResult(EvaluationResult &&Other) 413 : Impl(std::move(Other.Impl)) {} 414 415 TFModelEvaluator::EvaluationResult & 416 TFModelEvaluator::EvaluationResult::operator=(EvaluationResult &&Other) { 417 Impl = std::move(Other.Impl); 418 return *this; 419 } 420 421 void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) { 422 return TF_TensorData(Impl->getOutput()[Index]); 423 } 424 425 const void * 426 TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) const { 427 return TF_TensorData(Impl->getOutput()[Index]); 428 } 429 430 TFModelEvaluator::EvaluationResult::~EvaluationResult() {} 431 TFModelEvaluator::~TFModelEvaluator() {} 432 433 Logger::Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs, 434 const TensorSpec &RewardSpec, bool IncludeReward) 435 : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec), 436 IncludeReward(IncludeReward), 437 LoggerData(std::make_unique<LoggerDataImpl>(FeatureSpecs, RewardSpec, 438 IncludeReward)) {} 439 440 Logger::~Logger() {} 441 442 #define LOG_REWARD(NAME, TYPE) \ 443 void Logger::log##NAME##Reward(TYPE Value) { \ 444 assert(IncludeReward); \ 445 LoggerData->logReward(Value); \ 446 } 447 448 LOG_REWARD(Float, float) 449 LOG_REWARD(Int32, int32_t) 450 LOG_REWARD(Int64, int64_t) 451 #undef LOG_REWARD 452 453 #define LOG_FINAL_REWARD(NAME, TYPE) \ 454 void Logger::log##NAME##FinalReward(TYPE Value) { \ 455 assert(RewardSpec.isElementType<TYPE>()); \ 456 for (size_t I = 1; I < LoggerData->getNrRecords(); ++I) \ 457 log##NAME##Reward(0); \ 458 log##NAME##Reward(Value); \ 459 } 460 461 LOG_FINAL_REWARD(Float, float) 462 LOG_FINAL_REWARD(Int32, int32_t) 463 LOG_FINAL_REWARD(Int64, int64_t) 464 #undef LOG_FINAL_REWARD 465 466 void Logger::logFloatValue(size_t FeatureID, const float *Value) { 467 assert(FeatureSpecs[FeatureID].Spec.isElementType<float>()); 468 logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value)); 469 } 470 471 void Logger::logInt64Value(size_t FeatureID, const int64_t *Value) { 472 assert(FeatureSpecs[FeatureID].Spec.isElementType<int64_t>()); 473 logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value)); 474 } 475 476 void Logger::logInt32Value(size_t FeatureID, const int32_t *Value) { 477 assert(FeatureSpecs[FeatureID].Spec.isElementType<int32_t>()); 478 logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value)); 479 } 480 481 void Logger::logSpecifiedTensorValue(size_t FeatureID, const char *RawData) { 482 const auto &Spec = FeatureSpecs[FeatureID].Spec; 483 char *Buff = addEntryAndGetFloatOrInt64Buffer(FeatureID); 484 if (Spec.isElementType<int32_t>()) 485 for (size_t I = 0; I < Spec.getElementCount(); ++I) 486 (reinterpret_cast<int64_t *>(Buff))[I] = 487 static_cast<int64_t>((reinterpret_cast<const int32_t *>(RawData))[I]); 488 else if (Spec.isElementType<int64_t>() || Spec.isElementType<float>()) 489 std::memcpy(Buff, RawData, 490 Spec.getElementCount() * Spec.getElementByteSize()); 491 else 492 llvm_unreachable("Unsupported tensor type"); 493 } 494 495 char *Logger::addEntryAndGetFloatOrInt64Buffer(size_t FeatureID) { 496 return reinterpret_cast<char *>(LoggerData->addNewTensor(FeatureID)); 497 } 498 499 void Logger::flush(std::string *Str) { LoggerData->flush(Str); } 500 501 void Logger::flush(raw_ostream &OS) { 502 std::string Buff; 503 LoggerData->flush(&Buff); 504 OS << Buff; 505 } 506 507 void Logger::flushLogs(raw_ostream &OS, 508 const StringMap<std::unique_ptr<Logger>> &Loggers) { 509 google::protobuf::Struct Msg; 510 for (const auto &NamedLogger : Loggers) { 511 tensorflow::SequenceExample SE; 512 const auto &Logger = NamedLogger.second; 513 std::string Unencoded; 514 if (Logger->LoggerData->getNrRecords() > 0) 515 Logger->flush(&Unencoded); 516 517 (*Msg.mutable_fields())[NamedLogger.first().str()] 518 .mutable_string_value() 519 ->append(ProtobufTextMode ? Unencoded : encodeBase64(Unencoded)); 520 } 521 522 std::string OutStr; 523 serialize(Msg, &OutStr); 524 OS << OutStr; 525 } 526 #endif // defined(LLVM_HAVE_TF_API) 527