1 //===-- lib/Parser/source.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "flang/Parser/source.h" 10 #include "flang/Common/idioms.h" 11 #include "flang/Parser/char-buffer.h" 12 #include "llvm/Support/Errno.h" 13 #include "llvm/Support/raw_ostream.h" 14 #include <algorithm> 15 #include <cstddef> 16 #include <cstring> 17 #include <fcntl.h> 18 #include <memory> 19 #include <sys/mman.h> 20 #include <sys/stat.h> 21 #include <sys/types.h> 22 #include <unistd.h> 23 #include <vector> 24 25 // TODO: Port to Windows &c. 26 27 namespace Fortran::parser { 28 29 static constexpr bool useMMap{true}; 30 static constexpr int minMapFileBytes{1}; // i.e., no minimum requirement 31 static constexpr int maxMapOpenFileDescriptors{100}; 32 static int openFileDescriptors{0}; 33 34 SourceFile::~SourceFile() { Close(); } 35 36 static std::vector<std::size_t> FindLineStarts( 37 const char *source, std::size_t bytes) { 38 std::vector<std::size_t> result; 39 if (bytes > 0) { 40 CHECK(source[bytes - 1] == '\n' && "missing ultimate newline"); 41 std::size_t at{0}; 42 do { 43 result.push_back(at); 44 const void *vp{static_cast<const void *>(&source[at])}; 45 const void *vnl{std::memchr(vp, '\n', bytes - at)}; 46 const char *nl{static_cast<const char *>(vnl)}; 47 at = nl + 1 - source; 48 } while (at < bytes); 49 result.shrink_to_fit(); 50 } 51 return result; 52 } 53 54 void SourceFile::RecordLineStarts() { 55 lineStart_ = FindLineStarts(content_, bytes_); 56 } 57 58 // Check for a Unicode byte order mark (BOM). 59 // Module files all have one; so can source files. 60 void SourceFile::IdentifyPayload() { 61 content_ = address_; 62 bytes_ = size_; 63 if (content_) { 64 static constexpr int BOMBytes{3}; 65 static const char UTF8_BOM[]{"\xef\xbb\xbf"}; 66 if (bytes_ >= BOMBytes && std::memcmp(content_, UTF8_BOM, BOMBytes) == 0) { 67 content_ += BOMBytes; 68 bytes_ -= BOMBytes; 69 encoding_ = Encoding::UTF_8; 70 } 71 } 72 } 73 74 std::string DirectoryName(std::string path) { 75 auto lastSlash{path.rfind("/")}; 76 return lastSlash == std::string::npos ? path : path.substr(0, lastSlash); 77 } 78 79 std::string LocateSourceFile( 80 std::string name, const std::vector<std::string> &searchPath) { 81 if (name.empty() || name == "-" || name[0] == '/') { 82 return name; 83 } 84 for (const std::string &dir : searchPath) { 85 std::string path{dir + '/' + name}; 86 struct stat statbuf; 87 if (stat(path.c_str(), &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) { 88 return path; 89 } 90 } 91 return name; 92 } 93 94 static std::size_t RemoveCarriageReturns(char *buffer, std::size_t bytes) { 95 std::size_t wrote{0}; 96 char *p{buffer}; 97 while (bytes > 0) { 98 void *vp{static_cast<void *>(p)}; 99 void *crvp{std::memchr(vp, '\r', bytes)}; 100 char *crcp{static_cast<char *>(crvp)}; 101 if (!crcp) { 102 std::memmove(buffer + wrote, p, bytes); 103 wrote += bytes; 104 break; 105 } 106 std::size_t chunk = crcp - p; 107 std::memmove(buffer + wrote, p, chunk); 108 wrote += chunk; 109 p += chunk + 1; 110 bytes -= chunk + 1; 111 } 112 return wrote; 113 } 114 115 bool SourceFile::Open(std::string path, llvm::raw_ostream &error) { 116 Close(); 117 path_ = path; 118 std::string errorPath{"'"s + path + "'"}; 119 errno = 0; 120 fileDescriptor_ = open(path.c_str(), O_RDONLY); 121 if (fileDescriptor_ < 0) { 122 error << "Could not open " << errorPath << ": " 123 << llvm::sys::StrError(errno); 124 return false; 125 } 126 ++openFileDescriptors; 127 return ReadFile(errorPath, error); 128 } 129 130 bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) { 131 Close(); 132 path_ = "standard input"; 133 fileDescriptor_ = 0; 134 return ReadFile(path_, error); 135 } 136 137 bool SourceFile::ReadFile(std::string errorPath, llvm::raw_ostream &error) { 138 struct stat statbuf; 139 if (fstat(fileDescriptor_, &statbuf) != 0) { 140 error << "fstat failed on " << errorPath << ": " 141 << llvm::sys::StrError(errno); 142 Close(); 143 return false; 144 } 145 if (S_ISDIR(statbuf.st_mode)) { 146 error << errorPath << " is a directory"; 147 Close(); 148 return false; 149 } 150 151 // Try to map a large source file into the process' address space. 152 // Don't bother with small ones. This also helps keep the number 153 // of open file descriptors from getting out of hand. 154 if (useMMap && S_ISREG(statbuf.st_mode)) { 155 size_ = static_cast<std::size_t>(statbuf.st_size); 156 if (size_ >= minMapFileBytes && 157 openFileDescriptors <= maxMapOpenFileDescriptors) { 158 void *vp = mmap(0, size_, PROT_READ, MAP_SHARED, fileDescriptor_, 0); 159 if (vp != MAP_FAILED) { 160 address_ = static_cast<const char *>(const_cast<const void *>(vp)); 161 IdentifyPayload(); 162 if (bytes_ > 0 && content_[bytes_ - 1] == '\n' && 163 std::memchr(static_cast<const void *>(content_), '\r', bytes_) == 164 nullptr) { 165 isMemoryMapped_ = true; 166 RecordLineStarts(); 167 return true; 168 } 169 // The file needs to have its line endings normalized to simple 170 // newlines. Remap it for a private rewrite in place. 171 vp = mmap( 172 vp, size_, PROT_READ | PROT_WRITE, MAP_PRIVATE, fileDescriptor_, 0); 173 if (vp != MAP_FAILED) { 174 address_ = static_cast<const char *>(const_cast<const void *>(vp)); 175 IdentifyPayload(); 176 auto mutableContent{const_cast<char *>(content_)}; 177 bytes_ = RemoveCarriageReturns(mutableContent, bytes_); 178 if (bytes_ > 0) { 179 if (mutableContent[bytes_ - 1] == '\n' || 180 (bytes_ & 0xfff) != 0 /* don't cross into next page */) { 181 if (mutableContent[bytes_ - 1] != '\n') { 182 // Append a final newline. 183 mutableContent[bytes_++] = '\n'; 184 } 185 bool isNowReadOnly{mprotect(vp, bytes_, PROT_READ) == 0}; 186 CHECK(isNowReadOnly); 187 content_ = mutableContent; 188 isMemoryMapped_ = true; 189 RecordLineStarts(); 190 return true; 191 } 192 } 193 } 194 munmap(vp, size_); 195 address_ = content_ = nullptr; 196 size_ = bytes_ = 0; 197 } 198 } 199 } 200 201 // Read it into an expandable buffer, then marshal its content into a single 202 // contiguous block. 203 CharBuffer buffer; 204 while (true) { 205 std::size_t count; 206 char *to{buffer.FreeSpace(count)}; 207 ssize_t got{read(fileDescriptor_, to, count)}; 208 if (got < 0) { 209 error << "could not read " << errorPath << ": " 210 << llvm::sys::StrError(errno); 211 Close(); 212 return false; 213 } 214 if (got == 0) { 215 break; 216 } 217 buffer.Claim(got); 218 } 219 if (fileDescriptor_ > 0) { 220 close(fileDescriptor_); 221 --openFileDescriptors; 222 } 223 fileDescriptor_ = -1; 224 normalized_ = buffer.MarshalNormalized(); 225 address_ = normalized_.c_str(); 226 size_ = normalized_.size(); 227 IdentifyPayload(); 228 RecordLineStarts(); 229 return true; 230 } 231 232 void SourceFile::Close() { 233 if (useMMap && isMemoryMapped_) { 234 munmap(reinterpret_cast<void *>(const_cast<char *>(address_)), size_); 235 isMemoryMapped_ = false; 236 } else if (!normalized_.empty()) { 237 normalized_.clear(); 238 } else if (address_) { 239 delete[] address_; 240 } 241 address_ = content_ = nullptr; 242 size_ = bytes_ = 0; 243 if (fileDescriptor_ > 0) { 244 close(fileDescriptor_); 245 --openFileDescriptors; 246 } 247 fileDescriptor_ = -1; 248 path_.clear(); 249 } 250 251 SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const { 252 CHECK(at < bytes_); 253 if (lineStart_.empty()) { 254 return {*this, 1, static_cast<int>(at + 1)}; 255 } 256 std::size_t low{0}, count{lineStart_.size()}; 257 while (count > 1) { 258 std::size_t mid{low + (count >> 1)}; 259 if (lineStart_[mid] > at) { 260 count = mid - low; 261 } else { 262 count -= mid - low; 263 low = mid; 264 } 265 } 266 return {*this, static_cast<int>(low + 1), 267 static_cast<int>(at - lineStart_[low] + 1)}; 268 } 269 } 270