1 //===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements utilities for interfacing with tensorflow C APIs. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/Config/config.h" 14 #if defined(LLVM_HAVE_TF_API) 15 16 #include "llvm/ADT/Twine.h" 17 #include "llvm/Analysis/Utils/TFUtils.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/JSON.h" 20 #include "llvm/Support/ManagedStatic.h" 21 #include "llvm/Support/raw_ostream.h" 22 23 #include "tensorflow/c/c_api.h" 24 #include "tensorflow/c/c_api_experimental.h" 25 26 #include <cassert> 27 #include <numeric> 28 29 using namespace llvm; 30 31 namespace { 32 33 using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>; 34 using TFSessionOptionsPtr = 35 std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>; 36 using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>; 37 38 struct TFInitializer { 39 TFInitializer() { 40 assert(!IsInitialized && "TFInitialized should be called only once"); 41 int Argc = 1; 42 const char *Name = ""; 43 const char **NamePtr = &Name; 44 TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr)); 45 IsInitialized = true; 46 } 47 bool IsInitialized = false; 48 }; 49 50 llvm::ManagedStatic<TFInitializer> TFLibInitializer; 51 52 bool ensureInitTF() { return TFLibInitializer->IsInitialized; } 53 54 TFGraphPtr createTFGraph() { 55 return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph); 56 } 57 58 TFStatusPtr createTFStatus() { 59 return TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus); 60 } 61 62 TFSessionOptionsPtr createTFSessionOptions() { 63 return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions); 64 } 65 66 /// Write the values of one tensor as a list. 67 template <typename T> 68 void writeTensorValues(raw_ostream &OutFile, const char *TensorData, 69 size_t ElemCount) { 70 OutFile << "["; 71 const T *TypedData = reinterpret_cast<const T *>(TensorData); 72 for (size_t I = 0; I < ElemCount; ++I) { 73 if (I > 0) 74 OutFile << ", "; 75 OutFile << TypedData[I]; 76 } 77 OutFile << "]"; 78 } 79 80 /// Untyped implementation of the API above. 81 void writeRawTensorsAsFeatureLists(raw_ostream &OutFile, 82 const Logger::LoggedFeatureSpec &LoggedSpec, 83 const char *TensorData, size_t TensorCount) { 84 const char *FieldName = "<invalid>"; 85 std::function<void(const char *)> ValueWriter; 86 const auto &Spec = LoggedSpec.Spec; 87 // The 'Feature' protobuf only has 3 possible fields: float_list, 88 // int64_list, or bytes_list, so we capture int32 values as int64. We don't 89 // support any other types. 90 if (Spec.isElementType<int64_t>()) { 91 FieldName = "int64_list"; 92 ValueWriter = [&](const char *Data) { 93 writeTensorValues<int64_t>(OutFile, Data, Spec.getElementCount()); 94 }; 95 } else if (Spec.isElementType<int32_t>()) { 96 FieldName = "int64_list"; 97 ValueWriter = [&](const char *Data) { 98 writeTensorValues<int32_t>(OutFile, Data, Spec.getElementCount()); 99 }; 100 101 } else if (Spec.isElementType<float>()) { 102 FieldName = "float_list"; 103 ValueWriter = [&](const char *Data) { 104 writeTensorValues<float>(OutFile, Data, Spec.getElementCount()); 105 }; 106 107 } else { 108 llvm_unreachable("Unsupported tensor type."); 109 } 110 111 OutFile << " feature_list: {\n"; 112 OutFile << " key: " 113 << "\"" 114 << (LoggedSpec.LoggingName ? *LoggedSpec.LoggingName : Spec.name()) 115 << "\" "; 116 OutFile << "value: {\n"; 117 size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize(); 118 for (const char *P = TensorData, 119 *E = TensorData + TensorByteSize * TensorCount; 120 P < E; P += TensorByteSize) { 121 OutFile << " feature: { " << FieldName << ": { value: "; 122 ValueWriter(P); 123 OutFile << " } }\n"; 124 } 125 OutFile << " }\n"; 126 OutFile << " }\n"; 127 } 128 129 /// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs. 130 /// The tensors are assumed to be stored contiguously, in row-major format, 131 /// in the TensorData buffer. Each tensor has the shape given by Spec. The 132 /// feature name in the output is either the provided LoggingName, if 133 /// specified, otherwise it's the name of the tensor (as given by Spec). 134 template <typename T> 135 void writeTensorsAsFeatureLists(raw_ostream &OutFile, 136 const Logger::LoggedFeatureSpec &Spec, 137 const T *TensorData, size_t TensorCount) { 138 writeRawTensorsAsFeatureLists( 139 OutFile, Spec, reinterpret_cast<const char *>(TensorData), TensorCount); 140 } 141 } // namespace 142 143 namespace llvm { 144 class EvaluationResultImpl { 145 public: 146 EvaluationResultImpl(size_t OutputSize) 147 : OutputSize(OutputSize), Output(OutputSize){}; 148 149 ~EvaluationResultImpl() { 150 for (auto *P : Output) 151 if (P) 152 TF_DeleteTensor(P); 153 } 154 155 EvaluationResultImpl(const EvaluationResultImpl &) = delete; 156 EvaluationResultImpl(EvaluationResultImpl &&Other) = delete; 157 std::vector<TF_Tensor *> &getOutput() { return Output; } 158 159 private: 160 const size_t OutputSize; 161 std::vector<TF_Tensor *> Output; 162 }; 163 164 size_t TensorSpec::getElementByteSize() const { 165 return TF_DataTypeSize(static_cast<TF_DataType>(TypeIndex)); 166 } 167 168 TensorSpec::TensorSpec(const std::string &Name, int Port, int TypeIndex, 169 const std::vector<int64_t> &Shape) 170 : Name(Name), Port(Port), TypeIndex(TypeIndex), Shape(Shape), 171 ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1, 172 std::multiplies<int64_t>())) {} 173 174 Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx, 175 const json::Value &Value) { 176 auto EmitError = [&](const llvm::Twine &Message) -> Optional<TensorSpec> { 177 std::string S; 178 llvm::raw_string_ostream OS(S); 179 OS << Value; 180 Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S); 181 return None; 182 }; 183 // FIXME: accept a Path as a parameter, and use it for error reporting. 184 json::Path::Root Root("tensor_spec"); 185 json::ObjectMapper Mapper(Value, Root); 186 if (!Mapper) 187 return EmitError("Value is not a dict"); 188 189 std::string TensorName; 190 int TensorPort = -1; 191 std::string TensorType; 192 std::vector<int64_t> TensorShape; 193 194 if (!Mapper.map<std::string>("name", TensorName)) 195 return EmitError("'name' property not present or not a string"); 196 if (!Mapper.map<std::string>("type", TensorType)) 197 return EmitError("'type' property not present or not a string"); 198 if (!Mapper.map<int>("port", TensorPort)) 199 return EmitError("'port' property not present or not an int"); 200 if (!Mapper.map<std::vector<int64_t>>("shape", TensorShape)) 201 return EmitError("'shape' property not present or not an int array"); 202 203 #define PARSE_TYPE(T, E) \ 204 if (TensorType == #T) \ 205 return TensorSpec::createSpec<T>(TensorName, TensorShape, TensorPort); 206 TFUTILS_SUPPORTED_TYPES(PARSE_TYPE) 207 #undef PARSE_TYPE 208 return None; 209 } 210 211 class TFModelEvaluatorImpl { 212 public: 213 TFModelEvaluatorImpl(StringRef SavedModelPath, 214 const std::vector<TensorSpec> &InputSpecs, 215 const std::vector<TensorSpec> &OutputSpecs, 216 const char *Tags); 217 218 bool isValid() const { return IsValid; } 219 size_t OutputSize() const { return OutputFeed.size(); } 220 221 void evaluate(TF_Tensor **Output, TF_Status *Status) { 222 TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(), 223 Input.size(), OutputFeed.data(), Output, OutputFeed.size(), 224 nullptr, 0, nullptr, Status); 225 } 226 227 void initInput(size_t Index, TF_DataType Type, 228 const std::vector<int64_t> &Dimensions); 229 const std::vector<TF_Tensor *> &getInput() const { return Input; } 230 231 ~TFModelEvaluatorImpl(); 232 233 private: 234 /// The objects necessary for carrying out an evaluation of the SavedModel. 235 /// They are expensive to set up, and we maintain them accross all the 236 /// evaluations of the model. 237 TF_Session *Session = nullptr; 238 TFGraphPtr Graph; 239 TFSessionOptionsPtr Options; 240 241 /// The specification of the input nodes. 242 std::vector<TF_Output> InputFeed; 243 244 /// The input tensors. They must match by index of the corresponding InputFeed 245 /// value. We set up the tensors once and just mutate theirs scalars before 246 /// each evaluation. The input tensors keep their value after an evaluation. 247 std::vector<TF_Tensor *> Input; 248 249 /// The specification of the output nodes. When evaluating, the tensors in the 250 /// output tensor vector must match by index the corresponding element in the 251 /// OutputFeed. 252 std::vector<TF_Output> OutputFeed; 253 254 void invalidate() { IsValid = false; } 255 256 bool IsValid = true; 257 258 /// Reusable utility for ensuring we can bind the requested Name to a node in 259 /// the SavedModel Graph. 260 bool checkReportAndInvalidate(const TF_Output &Output, 261 const TensorSpec &OutputSpec); 262 }; 263 } // namespace llvm 264 265 TFModelEvaluatorImpl::TFModelEvaluatorImpl( 266 StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs, 267 const std::vector<TensorSpec> &OutputSpecs, const char *Tags) 268 : Graph(createTFGraph()), Options(createTFSessionOptions()), 269 InputFeed(InputSpecs.size()), Input(InputSpecs.size()), 270 OutputFeed(OutputSpecs.size()) { 271 if (!ensureInitTF()) { 272 errs() << "Tensorflow should have been initialized"; 273 return; 274 } 275 auto Status = createTFStatus(); 276 277 Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr, 278 SavedModelPath.str().c_str(), &Tags, 1, 279 Graph.get(), nullptr, Status.get()); 280 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { 281 errs() << TF_Message(Status.get()); 282 invalidate(); 283 } 284 for (size_t I = 0; I < InputSpecs.size(); ++I) { 285 auto &InputSpec = InputSpecs[I]; 286 InputFeed[I] = { 287 TF_GraphOperationByName(Graph.get(), (InputSpec.name()).c_str()), 288 InputSpec.port()}; 289 if (!checkReportAndInvalidate(InputFeed[I], InputSpec)) 290 return; 291 initInput(I, static_cast<TF_DataType>(InputSpec.typeIndex()), 292 InputSpec.shape()); 293 } 294 for (size_t I = 0; I < OutputSpecs.size(); ++I) { 295 auto &OutputSpec = OutputSpecs[I]; 296 OutputFeed[I] = { 297 TF_GraphOperationByName(Graph.get(), (OutputSpec.name()).c_str()), 298 OutputSpec.port()}; 299 if (!checkReportAndInvalidate(OutputFeed[I], OutputSpec)) 300 return; 301 } 302 } 303 304 TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath, 305 const std::vector<TensorSpec> &InputSpecs, 306 const std::vector<TensorSpec> &OutputSpecs, 307 const char *Tags) 308 : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, OutputSpecs, 309 Tags)) { 310 if (!Impl->isValid()) 311 Impl.reset(); 312 } 313 314 TFModelEvaluatorImpl::~TFModelEvaluatorImpl() { 315 for (auto *T : Input) { 316 TF_DeleteTensor(T); 317 } 318 if (Session == nullptr) 319 return; 320 auto Status = createTFStatus(); 321 TF_DeleteSession(Session, Status.get()); 322 Session = nullptr; 323 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) 324 errs() << "Could not delete TF session"; 325 } 326 327 bool TFModelEvaluatorImpl::checkReportAndInvalidate( 328 const TF_Output &Output, const TensorSpec &OutputSpec) { 329 if (Output.oper) 330 return true; 331 errs() << "Could not find TF_Output named: " + OutputSpec.name(); 332 IsValid = false; 333 return IsValid; 334 } 335 336 Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() { 337 if (!isValid()) 338 return None; 339 std::unique_ptr<EvaluationResultImpl> Ret = 340 std::make_unique<EvaluationResultImpl>(Impl->OutputSize()); 341 auto Status = createTFStatus(); 342 Impl->evaluate(Ret->getOutput().data(), Status.get()); 343 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { 344 errs() << TF_Message(Status.get()); 345 Impl.reset(); 346 return None; 347 } 348 return EvaluationResult(std::move(Ret)); 349 } 350 351 void TFModelEvaluatorImpl::initInput(size_t Index, TF_DataType Type, 352 const std::vector<int64_t> &Dimensions) { 353 int64_t TotalSize = TF_DataTypeSize(Type); 354 for (auto &D : Dimensions) 355 TotalSize *= D; 356 357 Input[Index] = 358 TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize); 359 std::memset(TF_TensorData(Input[Index]), 0, TotalSize); 360 } 361 362 void *TFModelEvaluator::getUntypedInput(size_t Index) { 363 return TF_TensorData(Impl->getInput()[Index]); 364 } 365 366 TFModelEvaluator::EvaluationResult::EvaluationResult( 367 std::unique_ptr<EvaluationResultImpl> Impl) 368 : Impl(std::move(Impl)) {} 369 370 TFModelEvaluator::EvaluationResult::EvaluationResult(EvaluationResult &&Other) 371 : Impl(std::move(Other.Impl)) {} 372 373 TFModelEvaluator::EvaluationResult & 374 TFModelEvaluator::EvaluationResult::operator=(EvaluationResult &&Other) { 375 Impl = std::move(Other.Impl); 376 return *this; 377 } 378 379 void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) { 380 return TF_TensorData(Impl->getOutput()[Index]); 381 } 382 383 const void * 384 TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) const { 385 return TF_TensorData(Impl->getOutput()[Index]); 386 } 387 388 #define TFUTILS_GETDATATYPE_IMPL(T, E) \ 389 template <> int TensorSpec::getDataType<T>() { return E; } 390 391 TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL) 392 393 #undef TFUTILS_GETDATATYPE_IMPL 394 395 TFModelEvaluator::EvaluationResult::~EvaluationResult() {} 396 TFModelEvaluator::~TFModelEvaluator() {} 397 398 void Logger::print(raw_ostream &OS) { 399 if (RawLogData.empty()) 400 return; 401 if (RawLogData[0].empty()) 402 return; 403 size_t Tensor0Size = FeatureSpecs[0].Spec.getElementCount() * 404 FeatureSpecs[0].Spec.getElementByteSize(); 405 size_t NumberOfRecords = RawLogData[0].size() / Tensor0Size; 406 if (NumberOfRecords == 0) 407 return; 408 409 OS << "feature_lists: {\n"; 410 for (size_t I = 0; I < FeatureSpecs.size(); ++I) 411 writeTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(), 412 NumberOfRecords); 413 414 if (IncludeReward) 415 writeTensorsAsFeatureLists(OS, {RewardSpec, None}, RawLogData.back().data(), 416 NumberOfRecords); 417 418 OS << "}\n"; 419 } 420 #endif // defined(LLVM_HAVE_TF_API) 421