1 //===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utilities for interfacing with tensorflow C APIs.
10 //
11 //===----------------------------------------------------------------------===//
12 #include "llvm/Config/config.h"
13 #if defined(LLVM_HAVE_TF_API)
14
15 #include "llvm/ADT/Twine.h"
16 #include "llvm/Analysis/Utils/TFUtils.h"
17 #include "llvm/Support/Base64.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/JSON.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 #include "llvm/Support/Path.h"
23 #include "llvm/Support/raw_ostream.h"
24
25 #include "google/protobuf/struct.pb.h"
26 #include "google/protobuf/text_format.h"
27 #include "tensorflow/c/c_api.h"
28 #include "tensorflow/c/c_api_experimental.h"
29 #include "tensorflow/core/example/example.pb.h"
30 #include <cassert>
31 #include <numeric>
32
33 using namespace llvm;
34
35 using google::protobuf::Message;
36 using google::protobuf::TextFormat;
37
38 static cl::opt<bool>
39 ProtobufTextMode("tfutils-text-log", cl::init(false), cl::Hidden,
40 cl::desc("Output textual (human-readable) protobuf."));
41
42 namespace {
43
44 using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>;
45 using TFSessionOptionsPtr =
46 std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>;
47 using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>;
48
49 struct TFInitializer {
TFInitializer__anon5e75051e0111::TFInitializer50 TFInitializer() {
51 int Argc = 1;
52 const char *Name = "";
53 const char **NamePtr = &Name;
54 TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr));
55 }
56 };
57
ensureInitTF()58 bool ensureInitTF() {
59 static TFInitializer TFLibInitializer;
60 return true;
61 }
62
createTFGraph()63 TFGraphPtr createTFGraph() {
64 return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph);
65 }
66
createTFStatus()67 TFStatusPtr createTFStatus() {
68 return TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus);
69 }
70
createTFSessionOptions()71 TFSessionOptionsPtr createTFSessionOptions() {
72 return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions);
73 }
74
serialize(const Message & SE,std::string * OutStr)75 void serialize(const Message &SE, std::string *OutStr) {
76 if (ProtobufTextMode) {
77 TextFormat::PrintToString(SE, OutStr);
78 } else {
79 *OutStr = SE.SerializeAsString();
80 }
81 }
82
getTFTypeIndex(TensorType TType)83 int getTFTypeIndex(TensorType TType) {
84 switch (TType) {
85 case TensorType::Double:
86 return TF_DOUBLE;
87 case TensorType::Float:
88 return TF_FLOAT;
89 case TensorType::Int8:
90 return TF_INT8;
91 case TensorType::UInt8:
92 return TF_UINT8;
93 case TensorType::Int16:
94 return TF_INT16;
95 case TensorType::UInt16:
96 return TF_UINT16;
97 case TensorType::Int32:
98 return TF_INT32;
99 case TensorType::UInt32:
100 return TF_UINT32;
101 case TensorType::Int64:
102 return TF_INT64;
103 case TensorType::UInt64:
104 return TF_UINT64;
105 case TensorType::Invalid:
106 llvm_unreachable("Unknown tensor type");
107 }
108 }
109 } // namespace
110
111 namespace llvm {
112 class EvaluationResultImpl {
113 public:
EvaluationResultImpl(size_t OutputSize)114 EvaluationResultImpl(size_t OutputSize)
115 : OutputSize(OutputSize), Output(OutputSize){};
116
~EvaluationResultImpl()117 ~EvaluationResultImpl() {
118 for (auto *P : Output)
119 if (P)
120 TF_DeleteTensor(P);
121 }
122
123 EvaluationResultImpl(const EvaluationResultImpl &) = delete;
124 EvaluationResultImpl(EvaluationResultImpl &&Other) = delete;
getOutput()125 std::vector<TF_Tensor *> &getOutput() { return Output; }
126
127 private:
128 const size_t OutputSize;
129 std::vector<TF_Tensor *> Output;
130 };
131
132 class TFModelEvaluatorImpl {
133 public:
134 TFModelEvaluatorImpl(StringRef SavedModelPath,
135 const std::vector<TensorSpec> &InputSpecs,
136 function_ref<TensorSpec(size_t)> GetOutputSpecs,
137 size_t OutputSpecsSize, const char *Tags);
138
isValid() const139 bool isValid() const { return IsValid; }
OutputSize() const140 size_t OutputSize() const { return OutputFeed.size(); }
141
evaluate(TF_Tensor ** Output,TF_Status * Status)142 void evaluate(TF_Tensor **Output, TF_Status *Status) {
143 TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(),
144 Input.size(), OutputFeed.data(), Output, OutputFeed.size(),
145 nullptr, 0, nullptr, Status);
146 }
147
148 void initInput(size_t Index, TF_DataType Type,
149 const std::vector<int64_t> &Dimensions);
getInput() const150 const std::vector<TF_Tensor *> &getInput() const { return Input; }
151
152 ~TFModelEvaluatorImpl();
153
154 private:
155 /// The objects necessary for carrying out an evaluation of the SavedModel.
156 /// They are expensive to set up, and we maintain them accross all the
157 /// evaluations of the model.
158 TF_Session *Session = nullptr;
159 TFGraphPtr Graph;
160 TFSessionOptionsPtr Options;
161
162 /// The specification of the input nodes.
163 std::vector<TF_Output> InputFeed;
164
165 /// The input tensors. They must match by index of the corresponding InputFeed
166 /// value. We set up the tensors once and just mutate theirs scalars before
167 /// each evaluation. The input tensors keep their value after an evaluation.
168 std::vector<TF_Tensor *> Input;
169
170 /// The specification of the output nodes. When evaluating, the tensors in the
171 /// output tensor vector must match by index the corresponding element in the
172 /// OutputFeed.
173 std::vector<TF_Output> OutputFeed;
174
invalidate()175 void invalidate() { IsValid = false; }
176
177 bool IsValid = true;
178
179 /// Reusable utility for ensuring we can bind the requested Name to a node in
180 /// the SavedModel Graph.
181 bool checkReportAndInvalidate(const TF_Output &Output,
182 const TensorSpec &OutputSpec);
183 };
184
185 class LoggerDataImpl {
186 const std::vector<LoggedFeatureSpec> LoggedFeatureSpecs;
187 const TensorSpec RewardSpec;
188 const bool IncludeReward;
189
190 std::vector<tensorflow::FeatureList> FeatureLists;
191 tensorflow::FeatureList Reward;
192
isSelfConsistent(const tensorflow::SequenceExample & SE,size_t NrRecords) const193 bool isSelfConsistent(const tensorflow::SequenceExample &SE,
194 size_t NrRecords) const {
195 bool Ret = true;
196 for (const auto &TSpecs : LoggedFeatureSpecs) {
197 const auto &Name = TSpecs.getLoggingName();
198 const auto &FL = SE.feature_lists().feature_list().at(Name).feature();
199 if (NrRecords != static_cast<size_t>(FL.size())) {
200 dbgs() << "[TF-UTILS]: " << Name << " has missing records. Expected "
201 << NrRecords << " got " << FL.size() << "\n";
202 Ret = false;
203 }
204 }
205 if (IncludeReward && static_cast<size_t>(SE.feature_lists()
206 .feature_list()
207 .at(RewardSpec.name())
208 .feature()
209 .size()) != NrRecords) {
210 dbgs() << "[TF-UTILS]: reward is missing records.\n";
211 Ret = false;
212 }
213 return Ret;
214 }
215
transferLog(tensorflow::SequenceExample & SE)216 void transferLog(tensorflow::SequenceExample &SE) {
217 auto *FL = SE.mutable_feature_lists()->mutable_feature_list();
218 if (IncludeReward)
219 (*FL)[RewardSpec.name()] = std::move(Reward);
220 assert(FeatureLists.size() == LoggedFeatureSpecs.size());
221 for (size_t I = 0; I < FeatureLists.size(); ++I) {
222 const auto &LFS = LoggedFeatureSpecs[I];
223 (*FL)[LFS.getLoggingName()] = std::move(FeatureLists[I]);
224 }
225 }
226
227 public:
LoggerDataImpl(const std::vector<LoggedFeatureSpec> & LoggedSpecs,const TensorSpec & RewardSpec,bool IncludeReward)228 LoggerDataImpl(const std::vector<LoggedFeatureSpec> &LoggedSpecs,
229 const TensorSpec &RewardSpec, bool IncludeReward)
230 : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec),
231 IncludeReward(IncludeReward), FeatureLists(LoggedFeatureSpecs.size()) {}
232
233 // flush the logged info to a stream and clear the log contents.
flush(std::string * Str)234 void flush(std::string *Str) {
235 size_t NrRecords = getNrRecords();
236 (void)NrRecords;
237 tensorflow::SequenceExample SE;
238 transferLog(SE);
239 assert(isSelfConsistent(SE, NrRecords));
240 serialize(SE, Str);
241 }
242
addNewTensor(size_t FeatureID)243 char *addNewTensor(size_t FeatureID) {
244 const auto &Spec = LoggedFeatureSpecs[FeatureID].Spec;
245 if (Spec.isElementType<float>()) {
246 auto *RF = FeatureLists[FeatureID]
247 .add_feature()
248 ->mutable_float_list()
249 ->mutable_value();
250 RF->Resize(Spec.getElementCount(), 0.0);
251 return reinterpret_cast<char *>(RF->mutable_data());
252 } else if (Spec.isElementType<int32_t>() || Spec.isElementType<int64_t>()) {
253 auto *RF = FeatureLists[FeatureID]
254 .add_feature()
255 ->mutable_int64_list()
256 ->mutable_value();
257 RF->Resize(Spec.getElementCount(), 0);
258 return reinterpret_cast<char *>(RF->mutable_data());
259 }
260 llvm_unreachable("Unsupported tensor type.");
261 }
262
logReward(T Value)263 template <typename T> void logReward(T Value) {
264 assert(IncludeReward);
265 if (RewardSpec.isElementType<float>())
266 Reward.add_feature()->mutable_float_list()->add_value(Value);
267 else if (RewardSpec.isElementType<int32_t>() ||
268 RewardSpec.isElementType<int64_t>())
269 Reward.add_feature()->mutable_int64_list()->add_value(Value);
270 else
271 llvm_unreachable("Unsupported tensor type.");
272 }
273
getNrRecords() const274 size_t getNrRecords() const {
275 return FeatureLists.empty() ? 0 : FeatureLists[0].feature().size();
276 }
277 };
278 } // namespace llvm
279
TFModelEvaluatorImpl(StringRef SavedModelPath,const std::vector<TensorSpec> & InputSpecs,function_ref<TensorSpec (size_t)> GetOutputSpecs,size_t OutputSpecsSize,const char * Tags="serve")280 TFModelEvaluatorImpl::TFModelEvaluatorImpl(
281 StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs,
282 function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize,
283 const char *Tags = "serve")
284 : Graph(createTFGraph()), Options(createTFSessionOptions()),
285 InputFeed(InputSpecs.size()), Input(InputSpecs.size()),
286 OutputFeed(OutputSpecsSize) {
287 if (!ensureInitTF()) {
288 errs() << "Tensorflow should have been initialized";
289 return;
290 }
291 auto Status = createTFStatus();
292
293 Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr,
294 SavedModelPath.str().c_str(), &Tags, 1,
295 Graph.get(), nullptr, Status.get());
296 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
297 errs() << TF_Message(Status.get());
298 invalidate();
299 }
300 size_t NrSupported = 0;
301 for (size_t I = 0; I < InputSpecs.size(); ++I) {
302 auto &InputSpec = InputSpecs[I];
303 InputFeed[I] = {
304 TF_GraphOperationByName(Graph.get(), (InputSpec.name()).c_str()),
305 InputSpec.port()};
306 if (!InputFeed[I].oper) {
307 continue;
308 }
309 if (NrSupported++ != I) {
310 errs()
311 << "Unsupported features must be placed at the end of the InputSpecs";
312 invalidate();
313 return;
314 }
315 if (!checkReportAndInvalidate(InputFeed[I], InputSpec))
316 return;
317 initInput(I, static_cast<TF_DataType>(getTFTypeIndex(InputSpec.type())),
318 InputSpec.shape());
319 }
320 InputFeed.resize(NrSupported);
321 Input.resize(NrSupported);
322
323 for (size_t I = 0; I < OutputSpecsSize; ++I) {
324 auto OutputSpec = GetOutputSpecs(I);
325 OutputFeed[I] = {
326 TF_GraphOperationByName(Graph.get(), (OutputSpec.name()).c_str()),
327 OutputSpec.port()};
328 if (!checkReportAndInvalidate(OutputFeed[I], OutputSpec))
329 return;
330 }
331 }
332
TFModelEvaluator(StringRef SavedModelPath,const std::vector<TensorSpec> & InputSpecs,function_ref<TensorSpec (size_t)> GetOutputSpecs,size_t OutputSpecsSize,const char * Tags)333 TFModelEvaluator::TFModelEvaluator(
334 StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs,
335 function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize,
336 const char *Tags)
337 : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, GetOutputSpecs,
338 OutputSpecsSize, Tags)) {
339 if (!Impl->isValid())
340 Impl.reset();
341 }
342
TFModelEvaluator(StringRef SavedModelPath,const std::vector<TensorSpec> & InputSpecs,const std::vector<TensorSpec> & OutputSpecs,const char * Tags)343 TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
344 const std::vector<TensorSpec> &InputSpecs,
345 const std::vector<TensorSpec> &OutputSpecs,
346 const char *Tags)
347 : TFModelEvaluator(
348 SavedModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I]; },
349 OutputSpecs.size(), Tags) {}
350
~TFModelEvaluatorImpl()351 TFModelEvaluatorImpl::~TFModelEvaluatorImpl() {
352 for (auto *T : Input) {
353 TF_DeleteTensor(T);
354 }
355 if (Session == nullptr)
356 return;
357 auto Status = createTFStatus();
358 TF_DeleteSession(Session, Status.get());
359 Session = nullptr;
360 if (TF_GetCode(Status.get()) != TF_Code::TF_OK)
361 errs() << "Could not delete TF session";
362 }
363
checkReportAndInvalidate(const TF_Output & Output,const TensorSpec & OutputSpec)364 bool TFModelEvaluatorImpl::checkReportAndInvalidate(
365 const TF_Output &Output, const TensorSpec &OutputSpec) {
366 if (Output.oper)
367 return true;
368 errs() << "Could not find TF_Output named: " + OutputSpec.name();
369 IsValid = false;
370 return IsValid;
371 }
372
evaluate()373 Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() {
374 if (!isValid())
375 return None;
376 std::unique_ptr<EvaluationResultImpl> Ret =
377 std::make_unique<EvaluationResultImpl>(Impl->OutputSize());
378 auto Status = createTFStatus();
379 Impl->evaluate(Ret->getOutput().data(), Status.get());
380 if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
381 errs() << TF_Message(Status.get());
382 Impl.reset();
383 return None;
384 }
385 return EvaluationResult(std::move(Ret));
386 }
387
initInput(size_t Index,TF_DataType Type,const std::vector<int64_t> & Dimensions)388 void TFModelEvaluatorImpl::initInput(size_t Index, TF_DataType Type,
389 const std::vector<int64_t> &Dimensions) {
390 int64_t TotalSize = TF_DataTypeSize(Type);
391 for (auto &D : Dimensions)
392 TotalSize *= D;
393
394 Input[Index] =
395 TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize);
396 std::memset(TF_TensorData(Input[Index]), 0, TotalSize);
397 }
398
getUntypedInput(size_t Index)399 void *TFModelEvaluator::getUntypedInput(size_t Index) {
400 if (Index < Impl->getInput().size())
401 return TF_TensorData(Impl->getInput()[Index]);
402 return nullptr;
403 }
404
EvaluationResult(std::unique_ptr<EvaluationResultImpl> Impl)405 TFModelEvaluator::EvaluationResult::EvaluationResult(
406 std::unique_ptr<EvaluationResultImpl> Impl)
407 : Impl(std::move(Impl)) {}
408
EvaluationResult(EvaluationResult && Other)409 TFModelEvaluator::EvaluationResult::EvaluationResult(EvaluationResult &&Other)
410 : Impl(std::move(Other.Impl)) {}
411
412 TFModelEvaluator::EvaluationResult &
operator =(EvaluationResult && Other)413 TFModelEvaluator::EvaluationResult::operator=(EvaluationResult &&Other) {
414 Impl = std::move(Other.Impl);
415 return *this;
416 }
417
getUntypedTensorValue(size_t Index)418 void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) {
419 return TF_TensorData(Impl->getOutput()[Index]);
420 }
421
422 const void *
getUntypedTensorValue(size_t Index) const423 TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) const {
424 return TF_TensorData(Impl->getOutput()[Index]);
425 }
426
~EvaluationResult()427 TFModelEvaluator::EvaluationResult::~EvaluationResult() {}
~TFModelEvaluator()428 TFModelEvaluator::~TFModelEvaluator() {}
429
Logger(const std::vector<LoggedFeatureSpec> & FeatureSpecs,const TensorSpec & RewardSpec,bool IncludeReward)430 Logger::Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs,
431 const TensorSpec &RewardSpec, bool IncludeReward)
432 : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
433 IncludeReward(IncludeReward),
434 LoggerData(std::make_unique<LoggerDataImpl>(FeatureSpecs, RewardSpec,
435 IncludeReward)) {}
436
~Logger()437 Logger::~Logger() {}
438
439 #define LOG_REWARD(NAME, TYPE) \
440 void Logger::log##NAME##Reward(TYPE Value) { \
441 assert(IncludeReward); \
442 LoggerData->logReward(Value); \
443 }
444
LOG_REWARD(Float,float)445 LOG_REWARD(Float, float)
446 LOG_REWARD(Int32, int32_t)
447 LOG_REWARD(Int64, int64_t)
448 #undef LOG_REWARD
449
450 #define LOG_FINAL_REWARD(NAME, TYPE) \
451 void Logger::log##NAME##FinalReward(TYPE Value) { \
452 assert(RewardSpec.isElementType<TYPE>()); \
453 for (size_t I = 1; I < LoggerData->getNrRecords(); ++I) \
454 log##NAME##Reward(0); \
455 log##NAME##Reward(Value); \
456 }
457
458 LOG_FINAL_REWARD(Float, float)
459 LOG_FINAL_REWARD(Int32, int32_t)
460 LOG_FINAL_REWARD(Int64, int64_t)
461 #undef LOG_FINAL_REWARD
462
463 void Logger::logFloatValue(size_t FeatureID, const float *Value) {
464 assert(FeatureSpecs[FeatureID].Spec.isElementType<float>());
465 logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value));
466 }
467
logInt64Value(size_t FeatureID,const int64_t * Value)468 void Logger::logInt64Value(size_t FeatureID, const int64_t *Value) {
469 assert(FeatureSpecs[FeatureID].Spec.isElementType<int64_t>());
470 logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value));
471 }
472
logInt32Value(size_t FeatureID,const int32_t * Value)473 void Logger::logInt32Value(size_t FeatureID, const int32_t *Value) {
474 assert(FeatureSpecs[FeatureID].Spec.isElementType<int32_t>());
475 logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value));
476 }
477
logSpecifiedTensorValue(size_t FeatureID,const char * RawData)478 void Logger::logSpecifiedTensorValue(size_t FeatureID, const char *RawData) {
479 const auto &Spec = FeatureSpecs[FeatureID].Spec;
480 char *Buff = addEntryAndGetFloatOrInt64Buffer(FeatureID);
481 if (Spec.isElementType<int32_t>())
482 for (size_t I = 0; I < Spec.getElementCount(); ++I)
483 (reinterpret_cast<int64_t *>(Buff))[I] =
484 static_cast<int64_t>((reinterpret_cast<const int32_t *>(RawData))[I]);
485 else if (Spec.isElementType<int64_t>() || Spec.isElementType<float>())
486 std::memcpy(Buff, RawData,
487 Spec.getElementCount() * Spec.getElementByteSize());
488 else
489 llvm_unreachable("Unsupported tensor type");
490 }
491
addEntryAndGetFloatOrInt64Buffer(size_t FeatureID)492 char *Logger::addEntryAndGetFloatOrInt64Buffer(size_t FeatureID) {
493 return reinterpret_cast<char *>(LoggerData->addNewTensor(FeatureID));
494 }
495
flush(std::string * Str)496 void Logger::flush(std::string *Str) { LoggerData->flush(Str); }
497
flush(raw_ostream & OS)498 void Logger::flush(raw_ostream &OS) {
499 std::string Buff;
500 LoggerData->flush(&Buff);
501 OS << Buff;
502 }
503
flushLogs(raw_ostream & OS,const StringMap<std::unique_ptr<Logger>> & Loggers)504 void Logger::flushLogs(raw_ostream &OS,
505 const StringMap<std::unique_ptr<Logger>> &Loggers) {
506 google::protobuf::Struct Msg;
507 for (const auto &NamedLogger : Loggers) {
508 tensorflow::SequenceExample SE;
509 const auto &Logger = NamedLogger.second;
510 std::string Unencoded;
511 if (Logger->LoggerData->getNrRecords() > 0)
512 Logger->flush(&Unencoded);
513
514 (*Msg.mutable_fields())[NamedLogger.first().str()]
515 .mutable_string_value()
516 ->append(ProtobufTextMode ? Unencoded : encodeBase64(Unencoded));
517 }
518
519 std::string OutStr;
520 serialize(Msg, &OutStr);
521 OS << OutStr;
522 }
523 #endif // defined(LLVM_HAVE_TF_API)
524