1 //===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements utilities for interfacing with tensorflow C APIs. 10 // 11 //===----------------------------------------------------------------------===// 12 #include "llvm/Config/config.h" 13 #if defined(LLVM_HAVE_TF_API) 14 15 #include "llvm/ADT/Twine.h" 16 #include "llvm/Analysis/Utils/TFUtils.h" 17 #include "llvm/Support/Base64.h" 18 #include "llvm/Support/CommandLine.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/JSON.h" 21 #include "llvm/Support/ManagedStatic.h" 22 #include "llvm/Support/MemoryBuffer.h" 23 #include "llvm/Support/Path.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 #include "google/protobuf/struct.pb.h" 27 #include "google/protobuf/text_format.h" 28 #include "tensorflow/c/c_api.h" 29 #include "tensorflow/c/c_api_experimental.h" 30 #include "tensorflow/core/example/example.pb.h" 31 #include <cassert> 32 #include <numeric> 33 34 using namespace llvm; 35 36 using google::protobuf::Message; 37 using google::protobuf::TextFormat; 38 39 static cl::opt<bool> 40 ProtobufTextMode("tfutils-text-log", cl::init(false), cl::Hidden, 41 cl::desc("Output textual (human-readable) protobuf.")); 42 43 namespace { 44 45 using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>; 46 using TFSessionOptionsPtr = 47 std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>; 48 using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>; 49 50 struct TFInitializer { 51 TFInitializer() { 52 assert(!IsInitialized && "TFInitialized should be called only once"); 53 int Argc = 1; 54 const char *Name = ""; 55 const char **NamePtr = &Name; 56 TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr)); 57 IsInitialized = true; 58 } 59 bool IsInitialized = false; 60 }; 61 62 llvm::ManagedStatic<TFInitializer> TFLibInitializer; 63 64 bool ensureInitTF() { return TFLibInitializer->IsInitialized; } 65 66 TFGraphPtr createTFGraph() { 67 return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph); 68 } 69 70 TFStatusPtr createTFStatus() { 71 return TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus); 72 } 73 74 TFSessionOptionsPtr createTFSessionOptions() { 75 return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions); 76 } 77 78 void serialize(const Message &SE, std::string *OutStr) { 79 if (ProtobufTextMode) { 80 TextFormat::PrintToString(SE, OutStr); 81 } else { 82 *OutStr = SE.SerializeAsString(); 83 } 84 } 85 86 int getTFTypeIndex(TensorType TType) { 87 switch (TType) { 88 case TensorType::Double: 89 return TF_DOUBLE; 90 case TensorType::Float: 91 return TF_FLOAT; 92 case TensorType::Int8: 93 return TF_INT8; 94 case TensorType::UInt8: 95 return TF_UINT8; 96 case TensorType::Int16: 97 return TF_INT16; 98 case TensorType::UInt16: 99 return TF_UINT16; 100 case TensorType::Int32: 101 return TF_INT32; 102 case TensorType::UInt32: 103 return TF_UINT32; 104 case TensorType::Int64: 105 return TF_INT64; 106 case TensorType::UInt64: 107 return TF_UINT64; 108 case TensorType::Invalid: 109 llvm_unreachable("Unknown tensor type"); 110 } 111 } 112 } // namespace 113 114 namespace llvm { 115 class EvaluationResultImpl { 116 public: 117 EvaluationResultImpl(size_t OutputSize) 118 : OutputSize(OutputSize), Output(OutputSize){}; 119 120 ~EvaluationResultImpl() { 121 for (auto *P : Output) 122 if (P) 123 TF_DeleteTensor(P); 124 } 125 126 EvaluationResultImpl(const EvaluationResultImpl &) = delete; 127 EvaluationResultImpl(EvaluationResultImpl &&Other) = delete; 128 std::vector<TF_Tensor *> &getOutput() { return Output; } 129 130 private: 131 const size_t OutputSize; 132 std::vector<TF_Tensor *> Output; 133 }; 134 135 class TFModelEvaluatorImpl { 136 public: 137 TFModelEvaluatorImpl(StringRef SavedModelPath, 138 const std::vector<TensorSpec> &InputSpecs, 139 function_ref<TensorSpec(size_t)> GetOutputSpecs, 140 size_t OutputSpecsSize, const char *Tags); 141 142 bool isValid() const { return IsValid; } 143 size_t OutputSize() const { return OutputFeed.size(); } 144 145 void evaluate(TF_Tensor **Output, TF_Status *Status) { 146 TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(), 147 Input.size(), OutputFeed.data(), Output, OutputFeed.size(), 148 nullptr, 0, nullptr, Status); 149 } 150 151 void initInput(size_t Index, TF_DataType Type, 152 const std::vector<int64_t> &Dimensions); 153 const std::vector<TF_Tensor *> &getInput() const { return Input; } 154 155 ~TFModelEvaluatorImpl(); 156 157 private: 158 /// The objects necessary for carrying out an evaluation of the SavedModel. 159 /// They are expensive to set up, and we maintain them accross all the 160 /// evaluations of the model. 161 TF_Session *Session = nullptr; 162 TFGraphPtr Graph; 163 TFSessionOptionsPtr Options; 164 165 /// The specification of the input nodes. 166 std::vector<TF_Output> InputFeed; 167 168 /// The input tensors. They must match by index of the corresponding InputFeed 169 /// value. We set up the tensors once and just mutate theirs scalars before 170 /// each evaluation. The input tensors keep their value after an evaluation. 171 std::vector<TF_Tensor *> Input; 172 173 /// The specification of the output nodes. When evaluating, the tensors in the 174 /// output tensor vector must match by index the corresponding element in the 175 /// OutputFeed. 176 std::vector<TF_Output> OutputFeed; 177 178 void invalidate() { IsValid = false; } 179 180 bool IsValid = true; 181 182 /// Reusable utility for ensuring we can bind the requested Name to a node in 183 /// the SavedModel Graph. 184 bool checkReportAndInvalidate(const TF_Output &Output, 185 const TensorSpec &OutputSpec); 186 }; 187 188 class LoggerDataImpl { 189 const std::vector<LoggedFeatureSpec> LoggedFeatureSpecs; 190 const TensorSpec RewardSpec; 191 const bool IncludeReward; 192 193 std::vector<tensorflow::FeatureList> FeatureLists; 194 tensorflow::FeatureList Reward; 195 196 bool isSelfConsistent(const tensorflow::SequenceExample &SE, 197 size_t NrRecords) const { 198 bool Ret = true; 199 for (const auto &TSpecs : LoggedFeatureSpecs) { 200 const auto &Name = TSpecs.getLoggingName(); 201 const auto &FL = SE.feature_lists().feature_list().at(Name).feature(); 202 if (NrRecords != static_cast<size_t>(FL.size())) { 203 dbgs() << "[TF-UTILS]: " << Name << " has missing records. Expected " 204 << NrRecords << " got " << FL.size() << "\n"; 205 Ret = false; 206 } 207 } 208 if (IncludeReward && static_cast<size_t>(SE.feature_lists() 209 .feature_list() 210 .at(RewardSpec.name()) 211 .feature() 212 .size()) != NrRecords) { 213 dbgs() << "[TF-UTILS]: reward is missing records.\n"; 214 Ret = false; 215 } 216 return Ret; 217 } 218 219 void transferLog(tensorflow::SequenceExample &SE) { 220 auto *FL = SE.mutable_feature_lists()->mutable_feature_list(); 221 if (IncludeReward) 222 (*FL)[RewardSpec.name()] = std::move(Reward); 223 assert(FeatureLists.size() == LoggedFeatureSpecs.size()); 224 for (size_t I = 0; I < FeatureLists.size(); ++I) { 225 const auto &LFS = LoggedFeatureSpecs[I]; 226 (*FL)[LFS.getLoggingName()] = std::move(FeatureLists[I]); 227 } 228 } 229 230 public: 231 LoggerDataImpl(const std::vector<LoggedFeatureSpec> &LoggedSpecs, 232 const TensorSpec &RewardSpec, bool IncludeReward) 233 : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec), 234 IncludeReward(IncludeReward), FeatureLists(LoggedFeatureSpecs.size()) {} 235 236 // flush the logged info to a stream and clear the log contents. 237 void flush(std::string *Str) { 238 size_t NrRecords = getNrRecords(); 239 (void)NrRecords; 240 tensorflow::SequenceExample SE; 241 transferLog(SE); 242 assert(isSelfConsistent(SE, NrRecords)); 243 serialize(SE, Str); 244 } 245 246 char *addNewTensor(size_t FeatureID) { 247 const auto &Spec = LoggedFeatureSpecs[FeatureID].Spec; 248 if (Spec.isElementType<float>()) { 249 auto *RF = FeatureLists[FeatureID] 250 .add_feature() 251 ->mutable_float_list() 252 ->mutable_value(); 253 RF->Resize(Spec.getElementCount(), 0.0); 254 return reinterpret_cast<char *>(RF->mutable_data()); 255 } else if (Spec.isElementType<int32_t>() || Spec.isElementType<int64_t>()) { 256 auto *RF = FeatureLists[FeatureID] 257 .add_feature() 258 ->mutable_int64_list() 259 ->mutable_value(); 260 RF->Resize(Spec.getElementCount(), 0); 261 return reinterpret_cast<char *>(RF->mutable_data()); 262 } 263 llvm_unreachable("Unsupported tensor type."); 264 } 265 266 template <typename T> void logReward(T Value) { 267 assert(IncludeReward); 268 if (RewardSpec.isElementType<float>()) 269 Reward.add_feature()->mutable_float_list()->add_value(Value); 270 else if (RewardSpec.isElementType<int32_t>() || 271 RewardSpec.isElementType<int64_t>()) 272 Reward.add_feature()->mutable_int64_list()->add_value(Value); 273 else 274 llvm_unreachable("Unsupported tensor type."); 275 } 276 277 size_t getNrRecords() const { 278 return FeatureLists.empty() ? 0 : FeatureLists[0].feature().size(); 279 } 280 }; 281 } // namespace llvm 282 283 TFModelEvaluatorImpl::TFModelEvaluatorImpl( 284 StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs, 285 function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize, 286 const char *Tags = "serve") 287 : Graph(createTFGraph()), Options(createTFSessionOptions()), 288 InputFeed(InputSpecs.size()), Input(InputSpecs.size()), 289 OutputFeed(OutputSpecsSize) { 290 if (!ensureInitTF()) { 291 errs() << "Tensorflow should have been initialized"; 292 return; 293 } 294 auto Status = createTFStatus(); 295 296 Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr, 297 SavedModelPath.str().c_str(), &Tags, 1, 298 Graph.get(), nullptr, Status.get()); 299 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { 300 errs() << TF_Message(Status.get()); 301 invalidate(); 302 } 303 for (size_t I = 0; I < InputSpecs.size(); ++I) { 304 auto &InputSpec = InputSpecs[I]; 305 InputFeed[I] = { 306 TF_GraphOperationByName(Graph.get(), (InputSpec.name()).c_str()), 307 InputSpec.port()}; 308 if (!checkReportAndInvalidate(InputFeed[I], InputSpec)) 309 return; 310 initInput(I, static_cast<TF_DataType>(getTFTypeIndex(InputSpec.type())), 311 InputSpec.shape()); 312 } 313 for (size_t I = 0; I < OutputSpecsSize; ++I) { 314 auto OutputSpec = GetOutputSpecs(I); 315 OutputFeed[I] = { 316 TF_GraphOperationByName(Graph.get(), (OutputSpec.name()).c_str()), 317 OutputSpec.port()}; 318 if (!checkReportAndInvalidate(OutputFeed[I], OutputSpec)) 319 return; 320 } 321 } 322 323 TFModelEvaluator::TFModelEvaluator( 324 StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs, 325 function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize, 326 const char *Tags) 327 : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, GetOutputSpecs, 328 OutputSpecsSize, Tags)) { 329 if (!Impl->isValid()) 330 Impl.reset(); 331 } 332 333 TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath, 334 const std::vector<TensorSpec> &InputSpecs, 335 const std::vector<TensorSpec> &OutputSpecs, 336 const char *Tags) 337 : TFModelEvaluator( 338 SavedModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I]; }, 339 OutputSpecs.size(), Tags) {} 340 341 TFModelEvaluatorImpl::~TFModelEvaluatorImpl() { 342 for (auto *T : Input) { 343 TF_DeleteTensor(T); 344 } 345 if (Session == nullptr) 346 return; 347 auto Status = createTFStatus(); 348 TF_DeleteSession(Session, Status.get()); 349 Session = nullptr; 350 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) 351 errs() << "Could not delete TF session"; 352 } 353 354 bool TFModelEvaluatorImpl::checkReportAndInvalidate( 355 const TF_Output &Output, const TensorSpec &OutputSpec) { 356 if (Output.oper) 357 return true; 358 errs() << "Could not find TF_Output named: " + OutputSpec.name(); 359 IsValid = false; 360 return IsValid; 361 } 362 363 Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() { 364 if (!isValid()) 365 return None; 366 std::unique_ptr<EvaluationResultImpl> Ret = 367 std::make_unique<EvaluationResultImpl>(Impl->OutputSize()); 368 auto Status = createTFStatus(); 369 Impl->evaluate(Ret->getOutput().data(), Status.get()); 370 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { 371 errs() << TF_Message(Status.get()); 372 Impl.reset(); 373 return None; 374 } 375 return EvaluationResult(std::move(Ret)); 376 } 377 378 void TFModelEvaluatorImpl::initInput(size_t Index, TF_DataType Type, 379 const std::vector<int64_t> &Dimensions) { 380 int64_t TotalSize = TF_DataTypeSize(Type); 381 for (auto &D : Dimensions) 382 TotalSize *= D; 383 384 Input[Index] = 385 TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize); 386 std::memset(TF_TensorData(Input[Index]), 0, TotalSize); 387 } 388 389 void *TFModelEvaluator::getUntypedInput(size_t Index) { 390 return TF_TensorData(Impl->getInput()[Index]); 391 } 392 393 TFModelEvaluator::EvaluationResult::EvaluationResult( 394 std::unique_ptr<EvaluationResultImpl> Impl) 395 : Impl(std::move(Impl)) {} 396 397 TFModelEvaluator::EvaluationResult::EvaluationResult(EvaluationResult &&Other) 398 : Impl(std::move(Other.Impl)) {} 399 400 TFModelEvaluator::EvaluationResult & 401 TFModelEvaluator::EvaluationResult::operator=(EvaluationResult &&Other) { 402 Impl = std::move(Other.Impl); 403 return *this; 404 } 405 406 void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) { 407 return TF_TensorData(Impl->getOutput()[Index]); 408 } 409 410 const void * 411 TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) const { 412 return TF_TensorData(Impl->getOutput()[Index]); 413 } 414 415 TFModelEvaluator::EvaluationResult::~EvaluationResult() {} 416 TFModelEvaluator::~TFModelEvaluator() {} 417 418 Logger::Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs, 419 const TensorSpec &RewardSpec, bool IncludeReward) 420 : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec), 421 IncludeReward(IncludeReward), 422 LoggerData(std::make_unique<LoggerDataImpl>(FeatureSpecs, RewardSpec, 423 IncludeReward)) {} 424 425 Logger::~Logger() {} 426 427 #define LOG_REWARD(NAME, TYPE) \ 428 void Logger::log##NAME##Reward(TYPE Value) { \ 429 assert(IncludeReward); \ 430 LoggerData->logReward(Value); \ 431 } 432 433 LOG_REWARD(Float, float) 434 LOG_REWARD(Int32, int32_t) 435 LOG_REWARD(Int64, int64_t) 436 #undef LOG_REWARD 437 438 #define LOG_FINAL_REWARD(NAME, TYPE) \ 439 void Logger::log##NAME##FinalReward(TYPE Value) { \ 440 assert(RewardSpec.isElementType<TYPE>()); \ 441 for (size_t I = 1; I < LoggerData->getNrRecords(); ++I) \ 442 log##NAME##Reward(0); \ 443 log##NAME##Reward(Value); \ 444 } 445 446 LOG_FINAL_REWARD(Float, float) 447 LOG_FINAL_REWARD(Int32, int32_t) 448 LOG_FINAL_REWARD(Int64, int64_t) 449 #undef LOG_FINAL_REWARD 450 451 void Logger::logFloatValue(size_t FeatureID, const float *Value) { 452 assert(FeatureSpecs[FeatureID].Spec.isElementType<float>()); 453 logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value)); 454 } 455 456 void Logger::logInt64Value(size_t FeatureID, const int64_t *Value) { 457 assert(FeatureSpecs[FeatureID].Spec.isElementType<int64_t>()); 458 logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value)); 459 } 460 461 void Logger::logInt32Value(size_t FeatureID, const int32_t *Value) { 462 assert(FeatureSpecs[FeatureID].Spec.isElementType<int32_t>()); 463 logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value)); 464 } 465 466 void Logger::logSpecifiedTensorValue(size_t FeatureID, const char *RawData) { 467 const auto &Spec = FeatureSpecs[FeatureID].Spec; 468 char *Buff = addEntryAndGetFloatOrInt64Buffer(FeatureID); 469 if (Spec.isElementType<int32_t>()) 470 for (size_t I = 0; I < Spec.getElementCount(); ++I) 471 (reinterpret_cast<int64_t *>(Buff))[I] = 472 static_cast<int64_t>((reinterpret_cast<const int32_t *>(RawData))[I]); 473 else if (Spec.isElementType<int64_t>() || Spec.isElementType<float>()) 474 std::memcpy(Buff, RawData, 475 Spec.getElementCount() * Spec.getElementByteSize()); 476 else 477 llvm_unreachable("Unsupported tensor type"); 478 } 479 480 char *Logger::addEntryAndGetFloatOrInt64Buffer(size_t FeatureID) { 481 return reinterpret_cast<char *>(LoggerData->addNewTensor(FeatureID)); 482 } 483 484 void Logger::flush(std::string *Str) { LoggerData->flush(Str); } 485 486 void Logger::flush(raw_ostream &OS) { 487 std::string Buff; 488 LoggerData->flush(&Buff); 489 OS << Buff; 490 } 491 492 void Logger::flushLogs(raw_ostream &OS, 493 const StringMap<std::unique_ptr<Logger>> &Loggers) { 494 google::protobuf::Struct Msg; 495 for (const auto &NamedLogger : Loggers) { 496 tensorflow::SequenceExample SE; 497 const auto &Logger = NamedLogger.second; 498 std::string Unencoded; 499 if (Logger->LoggerData->getNrRecords() > 0) 500 Logger->flush(&Unencoded); 501 502 (*Msg.mutable_fields())[NamedLogger.first().str()] 503 .mutable_string_value() 504 ->append(ProtobufTextMode ? Unencoded : encodeBase64(Unencoded)); 505 } 506 507 std::string OutStr; 508 serialize(Msg, &OutStr); 509 OS << OutStr; 510 } 511 #endif // defined(LLVM_HAVE_TF_API) 512