1 //===-- lib/Semantics/mod-file.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "mod-file.h"
10 #include "resolve-names.h"
11 #include "flang/Evaluate/tools.h"
12 #include "flang/Parser/message.h"
13 #include "flang/Parser/parsing.h"
14 #include "flang/Semantics/scope.h"
15 #include "flang/Semantics/semantics.h"
16 #include "flang/Semantics/symbol.h"
17 #include "flang/Semantics/tools.h"
18 #include "llvm/Support/FileSystem.h"
19 #include "llvm/Support/MemoryBuffer.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include <algorithm>
22 #include <fstream>
23 #include <set>
24 #include <string_view>
25 #include <vector>
26 
27 namespace Fortran::semantics {
28 
29 using namespace parser::literals;
30 
31 // The first line of a file that identifies it as a .mod file.
32 // The first three bytes are a Unicode byte order mark that ensures
33 // that the module file is decoded as UTF-8 even if source files
34 // are using another encoding.
35 struct ModHeader {
36   static constexpr const char bom[3 + 1]{"\xef\xbb\xbf"};
37   static constexpr int magicLen{13};
38   static constexpr int sumLen{16};
39   static constexpr const char magic[magicLen + 1]{"!mod$ v1 sum:"};
40   static constexpr char terminator{'\n'};
41   static constexpr int len{magicLen + 1 + sumLen};
42 };
43 
44 static std::optional<SourceName> GetSubmoduleParent(const parser::Program &);
45 static SymbolVector CollectSymbols(const Scope &);
46 static void PutEntity(llvm::raw_ostream &, const Symbol &);
47 static void PutObjectEntity(llvm::raw_ostream &, const Symbol &);
48 static void PutProcEntity(llvm::raw_ostream &, const Symbol &);
49 static void PutPassName(llvm::raw_ostream &, const std::optional<SourceName> &);
50 static void PutTypeParam(llvm::raw_ostream &, const Symbol &);
51 static void PutEntity(
52     llvm::raw_ostream &, const Symbol &, std::function<void()>, Attrs);
53 static void PutInit(llvm::raw_ostream &, const Symbol &, const MaybeExpr &);
54 static void PutInit(llvm::raw_ostream &, const MaybeIntExpr &);
55 static void PutBound(llvm::raw_ostream &, const Bound &);
56 static llvm::raw_ostream &PutAttrs(llvm::raw_ostream &, Attrs,
57     const MaybeExpr & = std::nullopt, std::string before = ","s,
58     std::string after = ""s);
59 
60 static llvm::raw_ostream &PutAttr(llvm::raw_ostream &, Attr);
61 static llvm::raw_ostream &PutType(llvm::raw_ostream &, const DeclTypeSpec &);
62 static llvm::raw_ostream &PutLower(llvm::raw_ostream &, const std::string &);
63 static std::error_code WriteFile(
64     const std::string &, const std::string &, bool = true);
65 static bool FileContentsMatch(
66     const std::string &, const std::string &, const std::string &);
67 static std::string CheckSum(const std::string_view &);
68 
69 // Collect symbols needed for a subprogram interface
70 class SubprogramSymbolCollector {
71 public:
72   SubprogramSymbolCollector(const Symbol &symbol, const Scope &scope)
73       : symbol_{symbol}, scope_{scope} {}
74   const SymbolVector &symbols() const { return need_; }
75   const std::set<SourceName> &imports() const { return imports_; }
76   void Collect();
77 
78 private:
79   const Symbol &symbol_;
80   const Scope &scope_;
81   bool isInterface_{false};
82   SymbolVector need_; // symbols that are needed
83   SymbolSet needSet_; // symbols already in need_
84   SymbolSet useSet_; // use-associations that might be needed
85   std::set<SourceName> imports_; // imports from host that are needed
86 
87   void DoSymbol(const Symbol &);
88   void DoSymbol(const SourceName &, const Symbol &);
89   void DoType(const DeclTypeSpec *);
90   void DoBound(const Bound &);
91   void DoParamValue(const ParamValue &);
92   bool NeedImport(const SourceName &, const Symbol &);
93 
94   template <typename T> void DoExpr(evaluate::Expr<T> expr) {
95     for (const Symbol &symbol : evaluate::CollectSymbols(expr)) {
96       DoSymbol(symbol);
97     }
98   }
99 };
100 
101 bool ModFileWriter::WriteAll() {
102   WriteAll(context_.globalScope());
103   return !context_.AnyFatalError();
104 }
105 
106 void ModFileWriter::WriteAll(const Scope &scope) {
107   for (const auto &child : scope.children()) {
108     WriteOne(child);
109   }
110 }
111 
112 void ModFileWriter::WriteOne(const Scope &scope) {
113   if (scope.kind() == Scope::Kind::Module) {
114     auto *symbol{scope.symbol()};
115     if (!symbol->test(Symbol::Flag::ModFile)) {
116       Write(*symbol);
117     }
118     WriteAll(scope); // write out submodules
119   }
120 }
121 
122 // Construct the name of a module file. Non-empty ancestorName means submodule.
123 static std::string ModFileName(const SourceName &name,
124     const std::string &ancestorName, const std::string &suffix) {
125   std::string result{name.ToString() + suffix};
126   return ancestorName.empty() ? result : ancestorName + '-' + result;
127 }
128 
129 // Write the module file for symbol, which must be a module or submodule.
130 void ModFileWriter::Write(const Symbol &symbol) {
131   auto *ancestor{symbol.get<ModuleDetails>().ancestor()};
132   auto ancestorName{ancestor ? ancestor->GetName().value().ToString() : ""s};
133   auto path{context_.moduleDirectory() + '/' +
134       ModFileName(symbol.name(), ancestorName, context_.moduleFileSuffix())};
135   PutSymbols(DEREF(symbol.scope()));
136   if (std::error_code error{
137           WriteFile(path, GetAsString(symbol), context_.debugModuleWriter())}) {
138     context_.Say(
139         symbol.name(), "Error writing %s: %s"_err_en_US, path, error.message());
140   }
141 }
142 
143 // Return the entire body of the module file
144 // and clear saved uses, decls, and contains.
145 std::string ModFileWriter::GetAsString(const Symbol &symbol) {
146   std::string buf;
147   llvm::raw_string_ostream all{buf};
148   auto &details{symbol.get<ModuleDetails>()};
149   if (!details.isSubmodule()) {
150     all << "module " << symbol.name();
151   } else {
152     auto *parent{details.parent()->symbol()};
153     auto *ancestor{details.ancestor()->symbol()};
154     all << "submodule(" << ancestor->name();
155     if (parent != ancestor) {
156       all << ':' << parent->name();
157     }
158     all << ") " << symbol.name();
159   }
160   all << '\n' << uses_.str();
161   uses_.str().clear();
162   all << useExtraAttrs_.str();
163   useExtraAttrs_.str().clear();
164   all << decls_.str();
165   decls_.str().clear();
166   auto str{contains_.str()};
167   contains_.str().clear();
168   if (!str.empty()) {
169     all << "contains\n" << str;
170   }
171   all << "end\n";
172   return all.str();
173 }
174 
175 // Put out the visible symbols from scope.
176 void ModFileWriter::PutSymbols(const Scope &scope) {
177   std::string buf;
178   llvm::raw_string_ostream typeBindings{
179       buf}; // stuff after CONTAINS in derived type
180   for (const Symbol &symbol : CollectSymbols(scope)) {
181     PutSymbol(typeBindings, symbol);
182   }
183   if (auto str{typeBindings.str()}; !str.empty()) {
184     CHECK(scope.IsDerivedType());
185     decls_ << "contains\n" << str;
186   }
187 }
188 
189 // Emit a symbol to decls_, except for bindings in a derived type (type-bound
190 // procedures, type-bound generics, final procedures) which go to typeBindings.
191 void ModFileWriter::PutSymbol(
192     llvm::raw_ostream &typeBindings, const Symbol &symbol) {
193   std::visit(common::visitors{
194                  [&](const ModuleDetails &) { /* should be current module */ },
195                  [&](const DerivedTypeDetails &) { PutDerivedType(symbol); },
196                  [&](const SubprogramDetails &) { PutSubprogram(symbol); },
197                  [&](const GenericDetails &x) {
198                    if (symbol.owner().IsDerivedType()) {
199                      // generic binding
200                      for (const Symbol &proc : x.specificProcs()) {
201                        typeBindings << "generic::" << symbol.name() << "=>"
202                                     << proc.name() << '\n';
203                      }
204                    } else {
205                      PutGeneric(symbol);
206                      if (x.specific()) {
207                        PutSymbol(typeBindings, *x.specific());
208                      }
209                      if (x.derivedType()) {
210                        PutSymbol(typeBindings, *x.derivedType());
211                      }
212                    }
213                  },
214                  [&](const UseDetails &) { PutUse(symbol); },
215                  [](const UseErrorDetails &) {},
216                  [&](const ProcBindingDetails &x) {
217                    bool deferred{symbol.attrs().test(Attr::DEFERRED)};
218                    typeBindings << "procedure";
219                    if (deferred) {
220                      typeBindings << '(' << x.symbol().name() << ')';
221                    }
222                    PutPassName(typeBindings, x.passName());
223                    auto attrs{symbol.attrs()};
224                    if (x.passName()) {
225                      attrs.reset(Attr::PASS);
226                    }
227                    PutAttrs(typeBindings, attrs);
228                    typeBindings << "::" << symbol.name();
229                    if (!deferred && x.symbol().name() != symbol.name()) {
230                      typeBindings << "=>" << x.symbol().name();
231                    }
232                    typeBindings << '\n';
233                  },
234                  [&](const NamelistDetails &x) {
235                    decls_ << "namelist/" << symbol.name();
236                    char sep{'/'};
237                    for (const Symbol &object : x.objects()) {
238                      decls_ << sep << object.name();
239                      sep = ',';
240                    }
241                    decls_ << '\n';
242                  },
243                  [&](const CommonBlockDetails &x) {
244                    decls_ << "common/" << symbol.name();
245                    char sep = '/';
246                    for (const auto &object : x.objects()) {
247                      decls_ << sep << object->name();
248                      sep = ',';
249                    }
250                    decls_ << '\n';
251                    if (symbol.attrs().test(Attr::BIND_C)) {
252                      PutAttrs(decls_, symbol.attrs(), x.bindName(), ""s);
253                      decls_ << "::/" << symbol.name() << "/\n";
254                    }
255                  },
256                  [&](const FinalProcDetails &) {
257                    typeBindings << "final::" << symbol.name() << '\n';
258                  },
259                  [](const HostAssocDetails &) {},
260                  [](const MiscDetails &) {},
261                  [&](const auto &) { PutEntity(decls_, symbol); },
262              },
263       symbol.details());
264 }
265 
266 void ModFileWriter::PutDerivedType(const Symbol &typeSymbol) {
267   auto &details{typeSymbol.get<DerivedTypeDetails>()};
268   PutAttrs(decls_ << "type", typeSymbol.attrs());
269   if (const DerivedTypeSpec * extends{typeSymbol.GetParentTypeSpec()}) {
270     decls_ << ",extends(" << extends->name() << ')';
271   }
272   decls_ << "::" << typeSymbol.name();
273   auto &typeScope{*typeSymbol.scope()};
274   if (!details.paramNames().empty()) {
275     char sep{'('};
276     for (const auto &name : details.paramNames()) {
277       decls_ << sep << name;
278       sep = ',';
279     }
280     decls_ << ')';
281   }
282   decls_ << '\n';
283   if (details.sequence()) {
284     decls_ << "sequence\n";
285   }
286   PutSymbols(typeScope);
287   decls_ << "end type\n";
288 }
289 
290 // Attributes that may be in a subprogram prefix
291 static const Attrs subprogramPrefixAttrs{Attr::ELEMENTAL, Attr::IMPURE,
292     Attr::MODULE, Attr::NON_RECURSIVE, Attr::PURE, Attr::RECURSIVE};
293 
294 void ModFileWriter::PutSubprogram(const Symbol &symbol) {
295   auto attrs{symbol.attrs()};
296   auto &details{symbol.get<SubprogramDetails>()};
297   Attrs bindAttrs{};
298   if (attrs.test(Attr::BIND_C)) {
299     // bind(c) is a suffix, not prefix
300     bindAttrs.set(Attr::BIND_C, true);
301     attrs.set(Attr::BIND_C, false);
302   }
303   Attrs prefixAttrs{subprogramPrefixAttrs & attrs};
304   // emit any non-prefix attributes in an attribute statement
305   attrs &= ~subprogramPrefixAttrs;
306   std::string ssBuf;
307   llvm::raw_string_ostream ss{ssBuf};
308   PutAttrs(ss, attrs);
309   if (!ss.str().empty()) {
310     decls_ << ss.str().substr(1) << "::" << symbol.name() << '\n';
311   }
312   bool isInterface{details.isInterface()};
313   llvm::raw_ostream &os{isInterface ? decls_ : contains_};
314   if (isInterface) {
315     os << "interface\n";
316   }
317   PutAttrs(os, prefixAttrs, std::nullopt, ""s, " "s);
318   os << (details.isFunction() ? "function " : "subroutine ");
319   os << symbol.name() << '(';
320   int n = 0;
321   for (const auto &dummy : details.dummyArgs()) {
322     if (n++ > 0) {
323       os << ',';
324     }
325     if (dummy) {
326       os << dummy->name();
327     } else {
328       os << "*";
329     }
330   }
331   os << ')';
332   PutAttrs(os, bindAttrs, details.bindName(), " "s, ""s);
333   if (details.isFunction()) {
334     const Symbol &result{details.result()};
335     if (result.name() != symbol.name()) {
336       os << " result(" << result.name() << ')';
337     }
338   }
339   os << '\n';
340 
341   // walk symbols, collect ones needed for interface
342   const Scope &scope{
343       details.entryScope() ? *details.entryScope() : DEREF(symbol.scope())};
344   SubprogramSymbolCollector collector{symbol, scope};
345   collector.Collect();
346   std::string typeBindingsBuf;
347   llvm::raw_string_ostream typeBindings{typeBindingsBuf};
348   ModFileWriter writer{context_};
349   for (const Symbol &need : collector.symbols()) {
350     writer.PutSymbol(typeBindings, need);
351   }
352   CHECK(typeBindings.str().empty());
353   os << writer.uses_.str();
354   for (const SourceName &import : collector.imports()) {
355     decls_ << "import::" << import << "\n";
356   }
357   os << writer.decls_.str();
358   os << "end\n";
359   if (isInterface) {
360     os << "end interface\n";
361   }
362 }
363 
364 static bool IsIntrinsicOp(const Symbol &symbol) {
365   if (const auto *details{symbol.GetUltimate().detailsIf<GenericDetails>()}) {
366     return details->kind().IsIntrinsicOperator();
367   } else {
368     return false;
369   }
370 }
371 
372 static llvm::raw_ostream &PutGenericName(
373     llvm::raw_ostream &os, const Symbol &symbol) {
374   if (IsGenericDefinedOp(symbol)) {
375     return os << "operator(" << symbol.name() << ')';
376   } else {
377     return os << symbol.name();
378   }
379 }
380 
381 void ModFileWriter::PutGeneric(const Symbol &symbol) {
382   auto &details{symbol.get<GenericDetails>()};
383   PutGenericName(decls_ << "interface ", symbol) << '\n';
384   for (const Symbol &specific : details.specificProcs()) {
385     decls_ << "procedure::" << specific.name() << '\n';
386   }
387   decls_ << "end interface\n";
388   if (symbol.attrs().test(Attr::PRIVATE)) {
389     PutGenericName(decls_ << "private::", symbol) << '\n';
390   }
391 }
392 
393 void ModFileWriter::PutUse(const Symbol &symbol) {
394   auto &details{symbol.get<UseDetails>()};
395   auto &use{details.symbol()};
396   uses_ << "use " << GetUsedModule(details).name();
397   PutGenericName(uses_ << ",only:", symbol);
398   // Can have intrinsic op with different local-name and use-name
399   // (e.g. `operator(<)` and `operator(.lt.)`) but rename is not allowed
400   if (!IsIntrinsicOp(symbol) && use.name() != symbol.name()) {
401     PutGenericName(uses_ << "=>", use);
402   }
403   uses_ << '\n';
404   PutUseExtraAttr(Attr::VOLATILE, symbol, use);
405   PutUseExtraAttr(Attr::ASYNCHRONOUS, symbol, use);
406 }
407 
408 // We have "USE local => use" in this module. If attr was added locally
409 // (i.e. on local but not on use), also write it out in the mod file.
410 void ModFileWriter::PutUseExtraAttr(
411     Attr attr, const Symbol &local, const Symbol &use) {
412   if (local.attrs().test(attr) && !use.attrs().test(attr)) {
413     PutAttr(useExtraAttrs_, attr) << "::";
414     useExtraAttrs_ << local.name() << '\n';
415   }
416 }
417 
418 // Collect the symbols of this scope sorted by their original order, not name.
419 // Namelists are an exception: they are sorted after other symbols.
420 SymbolVector CollectSymbols(const Scope &scope) {
421   SymbolVector sorted;
422   SymbolVector namelist;
423   std::size_t commonSize{scope.commonBlocks().size()};
424   auto symbols{scope.GetSymbols()};
425   sorted.reserve(symbols.size() + commonSize);
426   for (SymbolRef symbol : symbols) {
427     if (!symbol->test(Symbol::Flag::ParentComp)) {
428       if (symbol->has<NamelistDetails>()) {
429         namelist.push_back(symbol);
430       } else {
431         sorted.push_back(symbol);
432       }
433     }
434   }
435   sorted.insert(sorted.end(), namelist.begin(), namelist.end());
436   for (const auto &pair : scope.commonBlocks()) {
437     sorted.push_back(*pair.second);
438   }
439   std::sort(sorted.end() - commonSize, sorted.end());
440   return sorted;
441 }
442 
443 void PutEntity(llvm::raw_ostream &os, const Symbol &symbol) {
444   std::visit(
445       common::visitors{
446           [&](const ObjectEntityDetails &) { PutObjectEntity(os, symbol); },
447           [&](const ProcEntityDetails &) { PutProcEntity(os, symbol); },
448           [&](const TypeParamDetails &) { PutTypeParam(os, symbol); },
449           [&](const auto &) {
450             common::die("PutEntity: unexpected details: %s",
451                 DetailsToString(symbol.details()).c_str());
452           },
453       },
454       symbol.details());
455 }
456 
457 void PutShapeSpec(llvm::raw_ostream &os, const ShapeSpec &x) {
458   if (x.lbound().isAssumed()) {
459     CHECK(x.ubound().isAssumed());
460     os << "..";
461   } else {
462     if (!x.lbound().isDeferred()) {
463       PutBound(os, x.lbound());
464     }
465     os << ':';
466     if (!x.ubound().isDeferred()) {
467       PutBound(os, x.ubound());
468     }
469   }
470 }
471 void PutShape(
472     llvm::raw_ostream &os, const ArraySpec &shape, char open, char close) {
473   if (!shape.empty()) {
474     os << open;
475     bool first{true};
476     for (const auto &shapeSpec : shape) {
477       if (first) {
478         first = false;
479       } else {
480         os << ',';
481       }
482       PutShapeSpec(os, shapeSpec);
483     }
484     os << close;
485   }
486 }
487 
488 void PutObjectEntity(llvm::raw_ostream &os, const Symbol &symbol) {
489   auto &details{symbol.get<ObjectEntityDetails>()};
490   PutEntity(
491       os, symbol, [&]() { PutType(os, DEREF(symbol.GetType())); },
492       symbol.attrs());
493   PutShape(os, details.shape(), '(', ')');
494   PutShape(os, details.coshape(), '[', ']');
495   PutInit(os, symbol, details.init());
496   os << '\n';
497 }
498 
499 void PutProcEntity(llvm::raw_ostream &os, const Symbol &symbol) {
500   if (symbol.attrs().test(Attr::INTRINSIC)) {
501     os << "intrinsic::" << symbol.name() << '\n';
502     return;
503   }
504   const auto &details{symbol.get<ProcEntityDetails>()};
505   const ProcInterface &interface{details.interface()};
506   Attrs attrs{symbol.attrs()};
507   if (details.passName()) {
508     attrs.reset(Attr::PASS);
509   }
510   PutEntity(
511       os, symbol,
512       [&]() {
513         os << "procedure(";
514         if (interface.symbol()) {
515           os << interface.symbol()->name();
516         } else if (interface.type()) {
517           PutType(os, *interface.type());
518         }
519         os << ')';
520         PutPassName(os, details.passName());
521       },
522       attrs);
523   os << '\n';
524 }
525 
526 void PutPassName(
527     llvm::raw_ostream &os, const std::optional<SourceName> &passName) {
528   if (passName) {
529     os << ",pass(" << *passName << ')';
530   }
531 }
532 void PutTypeParam(llvm::raw_ostream &os, const Symbol &symbol) {
533   auto &details{symbol.get<TypeParamDetails>()};
534   PutEntity(
535       os, symbol,
536       [&]() {
537         PutType(os, DEREF(symbol.GetType()));
538         PutLower(os << ',', common::EnumToString(details.attr()));
539       },
540       symbol.attrs());
541   PutInit(os, details.init());
542   os << '\n';
543 }
544 
545 void PutInit(
546     llvm::raw_ostream &os, const Symbol &symbol, const MaybeExpr &init) {
547   if (init) {
548     if (symbol.attrs().test(Attr::PARAMETER) ||
549         symbol.owner().IsDerivedType()) {
550       os << (symbol.attrs().test(Attr::POINTER) ? "=>" : "=");
551       init->AsFortran(os);
552     }
553   }
554 }
555 
556 void PutInit(llvm::raw_ostream &os, const MaybeIntExpr &init) {
557   if (init) {
558     init->AsFortran(os << '=');
559   }
560 }
561 
562 void PutBound(llvm::raw_ostream &os, const Bound &x) {
563   if (x.isAssumed()) {
564     os << '*';
565   } else if (x.isDeferred()) {
566     os << ':';
567   } else {
568     x.GetExplicit()->AsFortran(os);
569   }
570 }
571 
572 // Write an entity (object or procedure) declaration.
573 // writeType is called to write out the type.
574 void PutEntity(llvm::raw_ostream &os, const Symbol &symbol,
575     std::function<void()> writeType, Attrs attrs) {
576   writeType();
577   MaybeExpr bindName;
578   std::visit(common::visitors{
579                  [&](const SubprogramDetails &x) { bindName = x.bindName(); },
580                  [&](const ObjectEntityDetails &x) { bindName = x.bindName(); },
581                  [&](const ProcEntityDetails &x) { bindName = x.bindName(); },
582                  [&](const auto &) {},
583              },
584       symbol.details());
585   PutAttrs(os, attrs, bindName);
586   os << "::" << symbol.name();
587 }
588 
589 // Put out each attribute to os, surrounded by `before` and `after` and
590 // mapped to lower case.
591 llvm::raw_ostream &PutAttrs(llvm::raw_ostream &os, Attrs attrs,
592     const MaybeExpr &bindName, std::string before, std::string after) {
593   attrs.set(Attr::PUBLIC, false); // no need to write PUBLIC
594   attrs.set(Attr::EXTERNAL, false); // no need to write EXTERNAL
595   if (bindName) {
596     bindName->AsFortran(os << before << "bind(c, name=") << ')' << after;
597     attrs.set(Attr::BIND_C, false);
598   }
599   for (std::size_t i{0}; i < Attr_enumSize; ++i) {
600     Attr attr{static_cast<Attr>(i)};
601     if (attrs.test(attr)) {
602       PutAttr(os << before, attr) << after;
603     }
604   }
605   return os;
606 }
607 
608 llvm::raw_ostream &PutAttr(llvm::raw_ostream &os, Attr attr) {
609   return PutLower(os, AttrToString(attr));
610 }
611 
612 llvm::raw_ostream &PutType(llvm::raw_ostream &os, const DeclTypeSpec &type) {
613   return PutLower(os, type.AsFortran());
614 }
615 
616 llvm::raw_ostream &PutLower(llvm::raw_ostream &os, const std::string &str) {
617   for (char c : str) {
618     os << parser::ToLowerCaseLetter(c);
619   }
620   return os;
621 }
622 
623 struct Temp {
624   Temp(int fd, std::string path) : fd{fd}, path{path} {}
625   Temp(Temp &&t) : fd{std::exchange(t.fd, -1)}, path{std::move(t.path)} {}
626   ~Temp() {
627     if (fd >= 0) {
628       llvm::sys::fs::file_t native{llvm::sys::fs::convertFDToNativeFile(fd)};
629       llvm::sys::fs::closeFile(native);
630       llvm::sys::fs::remove(path.c_str());
631     }
632   }
633   int fd;
634   std::string path;
635 };
636 
637 // Create a temp file in the same directory and with the same suffix as path.
638 // Return an open file descriptor and its path.
639 static llvm::ErrorOr<Temp> MkTemp(const std::string &path) {
640   auto length{path.length()};
641   auto dot{path.find_last_of("./")};
642   std::string suffix{
643       dot < length && path[dot] == '.' ? path.substr(dot + 1) : ""};
644   CHECK(length > suffix.length() &&
645       path.substr(length - suffix.length()) == suffix);
646   auto prefix{path.substr(0, length - suffix.length())};
647   int fd;
648   llvm::SmallString<16> tempPath;
649   if (std::error_code err{llvm::sys::fs::createUniqueFile(
650           prefix + "%%%%%%" + suffix, fd, tempPath)}) {
651     return err;
652   }
653   return Temp{fd, tempPath.c_str()};
654 }
655 
656 // Write the module file at path, prepending header. If an error occurs,
657 // return errno, otherwise 0.
658 static std::error_code WriteFile(
659     const std::string &path, const std::string &contents, bool debug) {
660   auto header{std::string{ModHeader::bom} + ModHeader::magic +
661       CheckSum(contents) + ModHeader::terminator};
662   if (debug) {
663     llvm::dbgs() << "Processing module " << path << ": ";
664   }
665   if (FileContentsMatch(path, header, contents)) {
666     if (debug) {
667       llvm::dbgs() << "module unchanged, not writing\n";
668     }
669     return {};
670   }
671   llvm::ErrorOr<Temp> temp{MkTemp(path)};
672   if (!temp) {
673     return temp.getError();
674   }
675   llvm::raw_fd_ostream writer(temp->fd, /*shouldClose=*/false);
676   writer << header;
677   writer << contents;
678   writer.flush();
679   if (writer.has_error()) {
680     return writer.error();
681   }
682   if (debug) {
683     llvm::dbgs() << "module written\n";
684   }
685   return llvm::sys::fs::rename(temp->path, path);
686 }
687 
688 // Return true if the stream matches what we would write for the mod file.
689 static bool FileContentsMatch(const std::string &path,
690     const std::string &header, const std::string &contents) {
691   std::size_t hsize{header.size()};
692   std::size_t csize{contents.size()};
693   auto buf_or{llvm::MemoryBuffer::getFile(path)};
694   if (!buf_or) {
695     return false;
696   }
697   auto buf = std::move(buf_or.get());
698   if (buf->getBufferSize() != hsize + csize) {
699     return false;
700   }
701   if (!std::equal(header.begin(), header.end(), buf->getBufferStart(),
702           buf->getBufferStart() + hsize)) {
703     return false;
704   }
705 
706   return std::equal(contents.begin(), contents.end(),
707       buf->getBufferStart() + hsize, buf->getBufferEnd());
708 }
709 
710 // Compute a simple hash of the contents of a module file and
711 // return it as a string of hex digits.
712 // This uses the Fowler-Noll-Vo hash function.
713 static std::string CheckSum(const std::string_view &contents) {
714   std::uint64_t hash{0xcbf29ce484222325ull};
715   for (char c : contents) {
716     hash ^= c & 0xff;
717     hash *= 0x100000001b3;
718   }
719   static const char *digits = "0123456789abcdef";
720   std::string result(ModHeader::sumLen, '0');
721   for (size_t i{ModHeader::sumLen}; hash != 0; hash >>= 4) {
722     result[--i] = digits[hash & 0xf];
723   }
724   return result;
725 }
726 
727 static bool VerifyHeader(llvm::ArrayRef<char> content) {
728   std::string_view sv{content.data(), content.size()};
729   if (sv.substr(0, ModHeader::magicLen) != ModHeader::magic) {
730     return false;
731   }
732   std::string_view expectSum{sv.substr(ModHeader::magicLen, ModHeader::sumLen)};
733   std::string actualSum{CheckSum(sv.substr(ModHeader::len))};
734   return expectSum == actualSum;
735 }
736 
737 Scope *ModFileReader::Read(const SourceName &name, Scope *ancestor) {
738   std::string ancestorName; // empty for module
739   if (ancestor) {
740     if (auto *scope{ancestor->FindSubmodule(name)}) {
741       return scope;
742     }
743     ancestorName = ancestor->GetName().value().ToString();
744   } else {
745     auto it{context_.globalScope().find(name)};
746     if (it != context_.globalScope().end()) {
747       return it->second->scope();
748     }
749   }
750   parser::Parsing parsing{context_.allSources()};
751   parser::Options options;
752   options.isModuleFile = true;
753   options.features.Enable(common::LanguageFeature::BackslashEscapes);
754   options.searchDirectories = context_.searchDirectories();
755   auto path{ModFileName(name, ancestorName, context_.moduleFileSuffix())};
756   const auto *sourceFile{parsing.Prescan(path, options)};
757   if (parsing.messages().AnyFatalError()) {
758     for (auto &msg : parsing.messages().messages()) {
759       std::string str{msg.ToString()};
760       Say(name, ancestorName, parser::MessageFixedText{str.c_str(), str.size()},
761           path);
762     }
763     return nullptr;
764   }
765   CHECK(sourceFile);
766   if (!VerifyHeader(sourceFile->content())) {
767     Say(name, ancestorName, "File has invalid checksum: %s"_en_US,
768         sourceFile->path());
769     return nullptr;
770   }
771   llvm::raw_null_ostream NullStream;
772   parsing.Parse(NullStream);
773   auto &parseTree{parsing.parseTree()};
774   if (!parsing.messages().empty() || !parsing.consumedWholeFile() ||
775       !parseTree) {
776     Say(name, ancestorName, "Module file is corrupt: %s"_err_en_US,
777         sourceFile->path());
778     return nullptr;
779   }
780   Scope *parentScope; // the scope this module/submodule goes into
781   if (!ancestor) {
782     parentScope = &context_.globalScope();
783   } else if (std::optional<SourceName> parent{GetSubmoduleParent(*parseTree)}) {
784     parentScope = Read(*parent, ancestor);
785   } else {
786     parentScope = ancestor;
787   }
788   ResolveNames(context_, *parseTree);
789   const auto &it{parentScope->find(name)};
790   if (it == parentScope->end()) {
791     return nullptr;
792   }
793   auto &modSymbol{*it->second};
794   modSymbol.set(Symbol::Flag::ModFile);
795   modSymbol.scope()->set_chars(parsing.cooked());
796   return modSymbol.scope();
797 }
798 
799 parser::Message &ModFileReader::Say(const SourceName &name,
800     const std::string &ancestor, parser::MessageFixedText &&msg,
801     const std::string &arg) {
802   return context_
803       .Say(name,
804           ancestor.empty()
805               ? "Error reading module file for module '%s'"_err_en_US
806               : "Error reading module file for submodule '%s' of module '%s'"_err_en_US,
807           name, ancestor)
808       .Attach(name, std::move(msg), arg);
809 }
810 
811 // program was read from a .mod file for a submodule; return the name of the
812 // submodule's parent submodule, nullptr if none.
813 static std::optional<SourceName> GetSubmoduleParent(
814     const parser::Program &program) {
815   CHECK(program.v.size() == 1);
816   auto &unit{program.v.front()};
817   auto &submod{std::get<common::Indirection<parser::Submodule>>(unit.u)};
818   auto &stmt{
819       std::get<parser::Statement<parser::SubmoduleStmt>>(submod.value().t)};
820   auto &parentId{std::get<parser::ParentIdentifier>(stmt.statement.t)};
821   if (auto &parent{std::get<std::optional<parser::Name>>(parentId.t)}) {
822     return parent->source;
823   } else {
824     return std::nullopt;
825   }
826 }
827 
828 void SubprogramSymbolCollector::Collect() {
829   const auto &details{symbol_.get<SubprogramDetails>()};
830   isInterface_ = details.isInterface();
831   for (const Symbol *dummyArg : details.dummyArgs()) {
832     if (dummyArg) {
833       DoSymbol(*dummyArg);
834     }
835   }
836   if (details.isFunction()) {
837     DoSymbol(details.result());
838   }
839   for (const auto &pair : scope_) {
840     const Symbol &symbol{*pair.second};
841     if (const auto *useDetails{symbol.detailsIf<UseDetails>()}) {
842       if (useSet_.count(useDetails->symbol().GetUltimate()) > 0) {
843         need_.push_back(symbol);
844       }
845     }
846   }
847 }
848 
849 void SubprogramSymbolCollector::DoSymbol(const Symbol &symbol) {
850   DoSymbol(symbol.name(), symbol);
851 }
852 
853 // Do symbols this one depends on; then add to need_
854 void SubprogramSymbolCollector::DoSymbol(
855     const SourceName &name, const Symbol &symbol) {
856   const auto &scope{symbol.owner()};
857   if (scope != scope_ && !scope.IsDerivedType()) {
858     if (scope != scope_.parent()) {
859       useSet_.insert(symbol);
860     }
861     if (NeedImport(name, symbol)) {
862       imports_.insert(name);
863     }
864     return;
865   }
866   if (!needSet_.insert(symbol).second) {
867     return; // already done
868   }
869   std::visit(common::visitors{
870                  [this](const ObjectEntityDetails &details) {
871                    for (const ShapeSpec &spec : details.shape()) {
872                      DoBound(spec.lbound());
873                      DoBound(spec.ubound());
874                    }
875                    for (const ShapeSpec &spec : details.coshape()) {
876                      DoBound(spec.lbound());
877                      DoBound(spec.ubound());
878                    }
879                    if (const Symbol * commonBlock{details.commonBlock()}) {
880                      DoSymbol(*commonBlock);
881                    }
882                  },
883                  [this](const CommonBlockDetails &details) {
884                    for (const auto &object : details.objects()) {
885                      DoSymbol(*object);
886                    }
887                  },
888                  [](const auto &) {},
889              },
890       symbol.details());
891   if (!symbol.has<UseDetails>()) {
892     DoType(symbol.GetType());
893   }
894   if (!scope.IsDerivedType()) {
895     need_.push_back(symbol);
896   }
897 }
898 
899 void SubprogramSymbolCollector::DoType(const DeclTypeSpec *type) {
900   if (!type) {
901     return;
902   }
903   switch (type->category()) {
904   case DeclTypeSpec::Numeric:
905   case DeclTypeSpec::Logical:
906     break; // nothing to do
907   case DeclTypeSpec::Character:
908     DoParamValue(type->characterTypeSpec().length());
909     break;
910   default:
911     if (const DerivedTypeSpec * derived{type->AsDerived()}) {
912       const auto &typeSymbol{derived->typeSymbol()};
913       if (const DerivedTypeSpec * extends{typeSymbol.GetParentTypeSpec()}) {
914         DoSymbol(extends->name(), extends->typeSymbol());
915       }
916       for (const auto &pair : derived->parameters()) {
917         DoParamValue(pair.second);
918       }
919       for (const auto &pair : *typeSymbol.scope()) {
920         const Symbol &comp{*pair.second};
921         DoSymbol(comp);
922       }
923       DoSymbol(derived->name(), derived->typeSymbol());
924     }
925   }
926 }
927 
928 void SubprogramSymbolCollector::DoBound(const Bound &bound) {
929   if (const MaybeSubscriptIntExpr & expr{bound.GetExplicit()}) {
930     DoExpr(*expr);
931   }
932 }
933 void SubprogramSymbolCollector::DoParamValue(const ParamValue &paramValue) {
934   if (const auto &expr{paramValue.GetExplicit()}) {
935     DoExpr(*expr);
936   }
937 }
938 
939 // Do we need a IMPORT of this symbol into an interface block?
940 bool SubprogramSymbolCollector::NeedImport(
941     const SourceName &name, const Symbol &symbol) {
942   if (!isInterface_) {
943     return false;
944   } else if (symbol.owner() != scope_.parent()) {
945     // detect import from parent of use-associated symbol
946     // can be null in the case of a use-associated derived type's parent type
947     const auto *found{scope_.FindSymbol(name)};
948     CHECK(found || symbol.has<DerivedTypeDetails>());
949     return found && found->has<UseDetails>() && found->owner() != scope_;
950   } else {
951     return true;
952   }
953 }
954 
955 } // namespace Fortran::semantics
956