1 //===-- lib/Parser/source.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "flang/Parser/source.h"
10 #include "flang/Common/idioms.h"
11 #include "flang/Parser/char-buffer.h"
12 #include "llvm/Support/Errno.h"
13 #include "llvm/Support/raw_ostream.h"
14 #include <algorithm>
15 #include <cstddef>
16 #include <cstring>
17 #include <fcntl.h>
18 #include <memory>
19 #include <sys/mman.h>
20 #include <sys/stat.h>
21 #include <sys/types.h>
22 #include <unistd.h>
23 #include <vector>
24 
25 // TODO: Port to Windows &c.
26 
27 namespace Fortran::parser {
28 
29 static constexpr bool useMMap{true};
30 static constexpr int minMapFileBytes{1};  // i.e., no minimum requirement
31 static constexpr int maxMapOpenFileDescriptors{100};
32 static int openFileDescriptors{0};
33 
34 SourceFile::~SourceFile() { Close(); }
35 
36 static std::vector<std::size_t> FindLineStarts(
37     const char *source, std::size_t bytes) {
38   std::vector<std::size_t> result;
39   if (bytes > 0) {
40     CHECK(source[bytes - 1] == '\n' && "missing ultimate newline");
41     std::size_t at{0};
42     do {
43       result.push_back(at);
44       const void *vp{static_cast<const void *>(&source[at])};
45       const void *vnl{std::memchr(vp, '\n', bytes - at)};
46       const char *nl{static_cast<const char *>(vnl)};
47       at = nl + 1 - source;
48     } while (at < bytes);
49     result.shrink_to_fit();
50   }
51   return result;
52 }
53 
54 void SourceFile::RecordLineStarts() {
55   lineStart_ = FindLineStarts(content_, bytes_);
56 }
57 
58 // Check for a Unicode byte order mark (BOM).
59 // Module files all have one; so can source files.
60 void SourceFile::IdentifyPayload() {
61   content_ = address_;
62   bytes_ = size_;
63   if (content_) {
64     static constexpr int BOMBytes{3};
65     static const char UTF8_BOM[]{"\xef\xbb\xbf"};
66     if (bytes_ >= BOMBytes && std::memcmp(content_, UTF8_BOM, BOMBytes) == 0) {
67       content_ += BOMBytes;
68       bytes_ -= BOMBytes;
69       encoding_ = Encoding::UTF_8;
70     }
71   }
72 }
73 
74 std::string DirectoryName(std::string path) {
75   auto lastSlash{path.rfind("/")};
76   return lastSlash == std::string::npos ? path : path.substr(0, lastSlash);
77 }
78 
79 std::string LocateSourceFile(
80     std::string name, const std::vector<std::string> &searchPath) {
81   if (name.empty() || name == "-" || name[0] == '/') {
82     return name;
83   }
84   for (const std::string &dir : searchPath) {
85     std::string path{dir + '/' + name};
86     struct stat statbuf;
87     if (stat(path.c_str(), &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) {
88       return path;
89     }
90   }
91   return name;
92 }
93 
94 static std::size_t RemoveCarriageReturns(char *buffer, std::size_t bytes) {
95   std::size_t wrote{0};
96   char *p{buffer};
97   while (bytes > 0) {
98     void *vp{static_cast<void *>(p)};
99     void *crvp{std::memchr(vp, '\r', bytes)};
100     char *crcp{static_cast<char *>(crvp)};
101     if (!crcp) {
102       std::memmove(buffer + wrote, p, bytes);
103       wrote += bytes;
104       break;
105     }
106     std::size_t chunk = crcp - p;
107     std::memmove(buffer + wrote, p, chunk);
108     wrote += chunk;
109     p += chunk + 1;
110     bytes -= chunk + 1;
111   }
112   return wrote;
113 }
114 
115 bool SourceFile::Open(std::string path, llvm::raw_ostream &error) {
116   Close();
117   path_ = path;
118   std::string errorPath{"'"s + path + "'"};
119   errno = 0;
120   fileDescriptor_ = open(path.c_str(), O_RDONLY);
121   if (fileDescriptor_ < 0) {
122     error << "Could not open " << errorPath << ": "
123           << llvm::sys::StrError(errno);
124     return false;
125   }
126   ++openFileDescriptors;
127   return ReadFile(errorPath, error);
128 }
129 
130 bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) {
131   Close();
132   path_ = "standard input";
133   fileDescriptor_ = 0;
134   return ReadFile(path_, error);
135 }
136 
137 bool SourceFile::ReadFile(std::string errorPath, llvm::raw_ostream &error) {
138   struct stat statbuf;
139   if (fstat(fileDescriptor_, &statbuf) != 0) {
140     error << "fstat failed on " << errorPath << ": "
141           << llvm::sys::StrError(errno);
142     Close();
143     return false;
144   }
145   if (S_ISDIR(statbuf.st_mode)) {
146     error << errorPath << " is a directory";
147     Close();
148     return false;
149   }
150 
151   // Try to map a large source file into the process' address space.
152   // Don't bother with small ones.  This also helps keep the number
153   // of open file descriptors from getting out of hand.
154   if (useMMap && S_ISREG(statbuf.st_mode)) {
155     size_ = static_cast<std::size_t>(statbuf.st_size);
156     if (size_ >= minMapFileBytes &&
157         openFileDescriptors <= maxMapOpenFileDescriptors) {
158       void *vp = mmap(0, size_, PROT_READ, MAP_SHARED, fileDescriptor_, 0);
159       if (vp != MAP_FAILED) {
160         address_ = static_cast<const char *>(const_cast<const void *>(vp));
161         IdentifyPayload();
162         if (bytes_ > 0 && content_[bytes_ - 1] == '\n' &&
163             std::memchr(static_cast<const void *>(content_), '\r', bytes_) ==
164                 nullptr) {
165           isMemoryMapped_ = true;
166           RecordLineStarts();
167           return true;
168         }
169         // The file needs to have its line endings normalized to simple
170         // newlines.  Remap it for a private rewrite in place.
171         vp = mmap(
172             vp, size_, PROT_READ | PROT_WRITE, MAP_PRIVATE, fileDescriptor_, 0);
173         if (vp != MAP_FAILED) {
174           address_ = static_cast<const char *>(const_cast<const void *>(vp));
175           IdentifyPayload();
176           auto mutableContent{const_cast<char *>(content_)};
177           bytes_ = RemoveCarriageReturns(mutableContent, bytes_);
178           if (bytes_ > 0) {
179             if (mutableContent[bytes_ - 1] == '\n' ||
180                 (bytes_ & 0xfff) != 0 /* don't cross into next page */) {
181               if (mutableContent[bytes_ - 1] != '\n') {
182                 // Append a final newline.
183                 mutableContent[bytes_++] = '\n';
184               }
185               bool isNowReadOnly{mprotect(vp, bytes_, PROT_READ) == 0};
186               CHECK(isNowReadOnly);
187               content_ = mutableContent;
188               isMemoryMapped_ = true;
189               RecordLineStarts();
190               return true;
191             }
192           }
193         }
194         munmap(vp, size_);
195         address_ = content_ = nullptr;
196         size_ = bytes_ = 0;
197       }
198     }
199   }
200 
201   // Read it into an expandable buffer, then marshal its content into a single
202   // contiguous block.
203   CharBuffer buffer;
204   while (true) {
205     std::size_t count;
206     char *to{buffer.FreeSpace(count)};
207     ssize_t got{read(fileDescriptor_, to, count)};
208     if (got < 0) {
209       error << "could not read " << errorPath << ": "
210             << llvm::sys::StrError(errno);
211       Close();
212       return false;
213     }
214     if (got == 0) {
215       break;
216     }
217     buffer.Claim(got);
218   }
219   if (fileDescriptor_ > 0) {
220     close(fileDescriptor_);
221     --openFileDescriptors;
222   }
223   fileDescriptor_ = -1;
224   normalized_ = buffer.MarshalNormalized();
225   address_ = normalized_.c_str();
226   size_ = normalized_.size();
227   IdentifyPayload();
228   RecordLineStarts();
229   return true;
230 }
231 
232 void SourceFile::Close() {
233   if (useMMap && isMemoryMapped_) {
234     munmap(reinterpret_cast<void *>(const_cast<char *>(address_)), size_);
235     isMemoryMapped_ = false;
236   } else if (!normalized_.empty()) {
237     normalized_.clear();
238   } else if (address_) {
239     delete[] address_;
240   }
241   address_ = content_ = nullptr;
242   size_ = bytes_ = 0;
243   if (fileDescriptor_ > 0) {
244     close(fileDescriptor_);
245     --openFileDescriptors;
246   }
247   fileDescriptor_ = -1;
248   path_.clear();
249 }
250 
251 SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const {
252   CHECK(at < bytes_);
253   if (lineStart_.empty()) {
254     return {*this, 1, static_cast<int>(at + 1)};
255   }
256   std::size_t low{0}, count{lineStart_.size()};
257   while (count > 1) {
258     std::size_t mid{low + (count >> 1)};
259     if (lineStart_[mid] > at) {
260       count = mid - low;
261     } else {
262       count -= mid - low;
263       low = mid;
264     }
265   }
266   return {*this, static_cast<int>(low + 1),
267       static_cast<int>(at - lineStart_[low] + 1)};
268 }
269 }
270