1 //===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements utilities for interfacing with tensorflow C APIs.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/Config/config.h"
14 #if defined(LLVM_HAVE_TF_API)
15 
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/Analysis/Utils/TFUtils.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/JSON.h"
20 #include "llvm/Support/ManagedStatic.h"
21 #include "llvm/Support/raw_ostream.h"
22 
23 #include "tensorflow/c/c_api.h"
24 #include "tensorflow/c/c_api_experimental.h"
25 
26 #include <cassert>
27 #include <numeric>
28 
29 using namespace llvm;
30 
31 namespace {
32 
33 using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>;
34 using TFSessionOptionsPtr =
35     std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>;
36 using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>;
37 
38 struct TFInitializer {
39   TFInitializer() {
40     assert(!IsInitialized && "TFInitialized should be called only once");
41     int Argc = 1;
42     const char *Name = "";
43     const char **NamePtr = &Name;
44     TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr));
45     IsInitialized = true;
46   }
47   bool IsInitialized = false;
48 };
49 
50 llvm::ManagedStatic<TFInitializer> TFLibInitializer;
51 
52 bool ensureInitTF() { return TFLibInitializer->IsInitialized; }
53 
54 TFGraphPtr createTFGraph() {
55   return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph);
56 }
57 
58 TFStatusPtr createTFStatus() {
59   return TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus);
60 }
61 
62 TFSessionOptionsPtr createTFSessionOptions() {
63   return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions);
64 }
65 
66 /// Write the values of one tensor as a list.
67 template <typename T>
68 void writeTensorValues(raw_ostream &OutFile, const char *TensorData,
69                        size_t ElemCount) {
70   OutFile << "[";
71   const T *TypedData = reinterpret_cast<const T *>(TensorData);
72   for (size_t I = 0; I < ElemCount; ++I) {
73     if (I > 0)
74       OutFile << ", ";
75     OutFile << TypedData[I];
76   }
77   OutFile << "]";
78 }
79 
80 /// Untyped implementation of the API above.
81 void writeRawTensorsAsFeatureLists(raw_ostream &OutFile,
82                                    const Logger::LoggedFeatureSpec &LoggedSpec,
83                                    const char *TensorData, size_t TensorCount) {
84   const char *FieldName = "<invalid>";
85   std::function<void(const char *)> ValueWriter;
86   const auto &Spec = LoggedSpec.Spec;
87   // The 'Feature' protobuf only has 3 possible fields: float_list,
88   // int64_list, or bytes_list, so we capture int32 values as int64. We don't
89   // support any other types.
90   if (Spec.isElementType<int64_t>()) {
91     FieldName = "int64_list";
92     ValueWriter = [&](const char *Data) {
93       writeTensorValues<int64_t>(OutFile, Data, Spec.getElementCount());
94     };
95   } else if (Spec.isElementType<int32_t>()) {
96     FieldName = "int64_list";
97     ValueWriter = [&](const char *Data) {
98       writeTensorValues<int32_t>(OutFile, Data, Spec.getElementCount());
99     };
100 
101   } else if (Spec.isElementType<float>()) {
102     FieldName = "float_list";
103     ValueWriter = [&](const char *Data) {
104       writeTensorValues<float>(OutFile, Data, Spec.getElementCount());
105     };
106 
107   } else {
108     llvm_unreachable("Unsupported tensor type.");
109   }
110 
111   OutFile << "  feature_list: {\n";
112   OutFile << "    key: "
113           << "\""
114           << (LoggedSpec.LoggingName ? *LoggedSpec.LoggingName : Spec.name())
115           << "\" ";
116   OutFile << "value: {\n";
117   size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize();
118   for (const char *P = TensorData,
119                   *E = TensorData + TensorByteSize * TensorCount;
120        P < E; P += TensorByteSize) {
121     OutFile << "      feature: { " << FieldName << ": { value: ";
122     ValueWriter(P);
123     OutFile << " } }\n";
124   }
125   OutFile << "    }\n";
126   OutFile << "  }\n";
127 }
128 
129 /// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs.
130 /// The tensors are assumed to be stored contiguously, in row-major format,
131 /// in the TensorData buffer. Each tensor has the shape given by Spec. The
132 /// feature name in the output is either the provided LoggingName, if
133 /// specified, otherwise it's the name of the tensor (as given by Spec).
134 template <typename T>
135 void writeTensorsAsFeatureLists(raw_ostream &OutFile,
136                                 const Logger::LoggedFeatureSpec &Spec,
137                                 const T *TensorData, size_t TensorCount) {
138   writeRawTensorsAsFeatureLists(
139       OutFile, Spec, reinterpret_cast<const char *>(TensorData), TensorCount);
140 }
141 } // namespace
142 
143 namespace llvm {
144 class EvaluationResultImpl {
145 public:
146   EvaluationResultImpl(size_t OutputSize)
147       : OutputSize(OutputSize), Output(OutputSize){};
148 
149   ~EvaluationResultImpl() {
150     for (auto *P : Output)
151       if (P)
152         TF_DeleteTensor(P);
153   }
154 
155   EvaluationResultImpl(const EvaluationResultImpl &) = delete;
156   EvaluationResultImpl(EvaluationResultImpl &&Other) = delete;
157   std::vector<TF_Tensor *> &getOutput() { return Output; }
158 
159 private:
160   const size_t OutputSize;
161   std::vector<TF_Tensor *> Output;
162 };
163 
164 size_t TensorSpec::getElementByteSize() const {
165   return TF_DataTypeSize(static_cast<TF_DataType>(TypeIndex));
166 }
167 
168 TensorSpec::TensorSpec(const std::string &Name, int Port, int TypeIndex,
169                        const std::vector<int64_t> &Shape)
170     : Name(Name), Port(Port), TypeIndex(TypeIndex), Shape(Shape),
171       ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1,
172                                    std::multiplies<int64_t>())) {}
173 
174 Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
175                                            const json::Value &Value) {
176   auto EmitError = [&](const llvm::Twine &Message) -> Optional<TensorSpec> {
177     std::string S;
178     llvm::raw_string_ostream OS(S);
179     OS << Value;
180     Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S);
181     return None;
182   };
183   // FIXME: accept a Path as a parameter, and use it for error reporting.
184   json::Path::Root Root("tensor_spec");
185   json::ObjectMapper Mapper(Value, Root);
186   if (!Mapper)
187     return EmitError("Value is not a dict");
188 
189   std::string TensorName;
190   int TensorPort = -1;
191   std::string TensorType;
192   std::vector<int64_t> TensorShape;
193 
194   if (!Mapper.map<std::string>("name", TensorName))
195     return EmitError("'name' property not present or not a string");
196   if (!Mapper.map<std::string>("type", TensorType))
197     return EmitError("'type' property not present or not a string");
198   if (!Mapper.map<int>("port", TensorPort))
199     return EmitError("'port' property not present or not an int");
200   if (!Mapper.map<std::vector<int64_t>>("shape", TensorShape))
201     return EmitError("'shape' property not present or not an int array");
202 
203 #define PARSE_TYPE(T, E)                                                       \
204   if (TensorType == #T)                                                        \
205     return TensorSpec::createSpec<T>(TensorName, TensorShape, TensorPort);
206   TFUTILS_SUPPORTED_TYPES(PARSE_TYPE)
207 #undef PARSE_TYPE
208   return None;
209 }
210 
211 class TFModelEvaluatorImpl {
212 public:
213   TFModelEvaluatorImpl(StringRef SavedModelPath,
214                        const std::vector<TensorSpec> &InputSpecs,
215                        const std::vector<TensorSpec> &OutputSpecs,
216                        const char *Tags);
217 
218   bool isValid() const { return IsValid; }
219   size_t OutputSize() const { return OutputFeed.size(); }
220 
221   void evaluate(TF_Tensor **Output, TF_Status *Status) {
222     TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(),
223                   Input.size(), OutputFeed.data(), Output, OutputFeed.size(),
224                   nullptr, 0, nullptr, Status);
225   }
226 
227   void initInput(size_t Index, TF_DataType Type,
228                  const std::vector<int64_t> &Dimensions);
229   const std::vector<TF_Tensor *> &getInput() const { return Input; }
230 
231   ~TFModelEvaluatorImpl();
232 
233 private:
234   /// The objects necessary for carrying out an evaluation of the SavedModel.
235   /// They are expensive to set up, and we maintain them accross all the
236   /// evaluations of the model.
237   TF_Session *Session = nullptr;
238   TFGraphPtr Graph;
239   TFSessionOptionsPtr Options;
240 
241   /// The specification of the input nodes.
242   std::vector<TF_Output> InputFeed;
243 
244   /// The input tensors. They must match by index of the corresponding InputFeed
245   /// value. We set up the tensors once and just mutate theirs scalars before
246   /// each evaluation. The input tensors keep their value after an evaluation.
247   std::vector<TF_Tensor *> Input;
248 
249   /// The specification of the output nodes. When evaluating, the tensors in the
250   /// output tensor vector must match by index the corresponding element in the
251   /// OutputFeed.
252   std::vector<TF_Output> OutputFeed;
253 
254   void invalidate() { IsValid = false; }
255 
256   bool IsValid = true;
257 
258   /// Reusable utility for ensuring we can bind the requested Name to a node in
259   /// the SavedModel Graph.
260   bool checkReportAndInvalidate(const TF_Output &Output,
261                                 const TensorSpec &OutputSpec);
262 };
263 } // namespace llvm
264 
265 TFModelEvaluatorImpl::TFModelEvaluatorImpl(
266     StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs,
267     const std::vector<TensorSpec> &OutputSpecs, const char *Tags)
268     : Graph(createTFGraph()), Options(createTFSessionOptions()),
269       InputFeed(InputSpecs.size()), Input(InputSpecs.size()),
270       OutputFeed(OutputSpecs.size()) {
271   if (!ensureInitTF()) {
272     errs() << "Tensorflow should have been initialized";
273     return;
274   }
275   auto Status = createTFStatus();
276 
277   Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr,
278                                          SavedModelPath.str().c_str(), &Tags, 1,
279                                          Graph.get(), nullptr, Status.get());
280   if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
281     errs() << TF_Message(Status.get());
282     invalidate();
283   }
284   for (size_t I = 0; I < InputSpecs.size(); ++I) {
285     auto &InputSpec = InputSpecs[I];
286     InputFeed[I] = {
287         TF_GraphOperationByName(Graph.get(), (InputSpec.name()).c_str()),
288         InputSpec.port()};
289     if (!checkReportAndInvalidate(InputFeed[I], InputSpec))
290       return;
291     initInput(I, static_cast<TF_DataType>(InputSpec.typeIndex()),
292               InputSpec.shape());
293   }
294   for (size_t I = 0; I < OutputSpecs.size(); ++I) {
295     auto &OutputSpec = OutputSpecs[I];
296     OutputFeed[I] = {
297         TF_GraphOperationByName(Graph.get(), (OutputSpec.name()).c_str()),
298         OutputSpec.port()};
299     if (!checkReportAndInvalidate(OutputFeed[I], OutputSpec))
300       return;
301   }
302 }
303 
304 TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
305                                    const std::vector<TensorSpec> &InputSpecs,
306                                    const std::vector<TensorSpec> &OutputSpecs,
307                                    const char *Tags)
308     : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, OutputSpecs,
309                                     Tags)) {
310   if (!Impl->isValid())
311     Impl.reset();
312 }
313 
314 TFModelEvaluatorImpl::~TFModelEvaluatorImpl() {
315   for (auto *T : Input) {
316     TF_DeleteTensor(T);
317   }
318   if (Session == nullptr)
319     return;
320   auto Status = createTFStatus();
321   TF_DeleteSession(Session, Status.get());
322   Session = nullptr;
323   if (TF_GetCode(Status.get()) != TF_Code::TF_OK)
324     errs() << "Could not delete TF session";
325 }
326 
327 bool TFModelEvaluatorImpl::checkReportAndInvalidate(
328     const TF_Output &Output, const TensorSpec &OutputSpec) {
329   if (Output.oper)
330     return true;
331   errs() << "Could not find TF_Output named: " + OutputSpec.name();
332   IsValid = false;
333   return IsValid;
334 }
335 
336 Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() {
337   if (!isValid())
338     return None;
339   std::unique_ptr<EvaluationResultImpl> Ret =
340       std::make_unique<EvaluationResultImpl>(Impl->OutputSize());
341   auto Status = createTFStatus();
342   Impl->evaluate(Ret->getOutput().data(), Status.get());
343   if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
344     errs() << TF_Message(Status.get());
345     Impl.reset();
346     return None;
347   }
348   return EvaluationResult(std::move(Ret));
349 }
350 
351 void TFModelEvaluatorImpl::initInput(size_t Index, TF_DataType Type,
352                                      const std::vector<int64_t> &Dimensions) {
353   int64_t TotalSize = TF_DataTypeSize(Type);
354   for (auto &D : Dimensions)
355     TotalSize *= D;
356 
357   Input[Index] =
358       TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize);
359   std::memset(TF_TensorData(Input[Index]), 0, TotalSize);
360 }
361 
362 void *TFModelEvaluator::getUntypedInput(size_t Index) {
363   return TF_TensorData(Impl->getInput()[Index]);
364 }
365 
366 TFModelEvaluator::EvaluationResult::EvaluationResult(
367     std::unique_ptr<EvaluationResultImpl> Impl)
368     : Impl(std::move(Impl)) {}
369 
370 TFModelEvaluator::EvaluationResult::EvaluationResult(EvaluationResult &&Other)
371     : Impl(std::move(Other.Impl)) {}
372 
373 TFModelEvaluator::EvaluationResult &
374 TFModelEvaluator::EvaluationResult::operator=(EvaluationResult &&Other) {
375   Impl = std::move(Other.Impl);
376   return *this;
377 }
378 
379 void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) {
380   return TF_TensorData(Impl->getOutput()[Index]);
381 }
382 
383 const void *
384 TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) const {
385   return TF_TensorData(Impl->getOutput()[Index]);
386 }
387 
388 #define TFUTILS_GETDATATYPE_IMPL(T, E)                                         \
389   template <> int TensorSpec::getDataType<T>() { return E; }
390 
391 TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL)
392 
393 #undef TFUTILS_GETDATATYPE_IMPL
394 
395 TFModelEvaluator::EvaluationResult::~EvaluationResult() {}
396 TFModelEvaluator::~TFModelEvaluator() {}
397 
398 void Logger::print(raw_ostream &OS) {
399   if (RawLogData.empty())
400     return;
401   if (RawLogData[0].empty())
402     return;
403   size_t Tensor0Size = FeatureSpecs[0].Spec.getElementCount() *
404                        FeatureSpecs[0].Spec.getElementByteSize();
405   size_t NumberOfRecords = RawLogData[0].size() / Tensor0Size;
406   if (NumberOfRecords == 0)
407     return;
408 
409   OS << "feature_lists: {\n";
410   for (size_t I = 0; I < FeatureSpecs.size(); ++I)
411     writeTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(),
412                                NumberOfRecords);
413 
414   if (IncludeReward)
415     writeTensorsAsFeatureLists(OS, {RewardSpec, None}, RawLogData.back().data(),
416                                NumberOfRecords);
417 
418   OS << "}\n";
419 }
420 #endif // defined(LLVM_HAVE_TF_API)
421