1 //===-- lib/Parser/source.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "flang/Parser/source.h" 10 #include "flang/Common/idioms.h" 11 #include "flang/Parser/char-buffer.h" 12 #include "llvm/Support/Errno.h" 13 #include "llvm/Support/FileSystem.h" 14 #include "llvm/Support/Path.h" 15 #include "llvm/Support/raw_ostream.h" 16 #include <algorithm> 17 #include <memory> 18 #include <vector> 19 20 namespace Fortran::parser { 21 22 SourceFile::~SourceFile() { Close(); } 23 24 static std::vector<std::size_t> FindLineStarts(llvm::StringRef source) { 25 std::vector<std::size_t> result; 26 if (source.size() > 0) { 27 CHECK(source.back() == '\n' && "missing ultimate newline"); 28 std::size_t at{0}; 29 do { 30 result.push_back(at); 31 at = source.find('\n', at) + 1; 32 } while (at < source.size()); 33 result.shrink_to_fit(); 34 } 35 return result; 36 } 37 38 void SourceFile::RecordLineStarts() { 39 lineStart_ = FindLineStarts({content().data(), bytes()}); 40 } 41 42 // Check for a Unicode byte order mark (BOM). 43 // Module files all have one; so can source files. 44 void SourceFile::IdentifyPayload() { 45 llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()}; 46 constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"}; 47 if (content.startswith(UTF8_BOM)) { 48 bom_end_ = UTF8_BOM.size(); 49 encoding_ = Encoding::UTF_8; 50 } 51 } 52 53 std::string DirectoryName(std::string path) { 54 llvm::SmallString<128> pathBuf{path}; 55 llvm::sys::path::remove_filename(pathBuf); 56 return pathBuf.str().str(); 57 } 58 59 std::string LocateSourceFile( 60 std::string name, const std::vector<std::string> &searchPath) { 61 if (name.empty() || name == "-" || llvm::sys::path::is_absolute(name)) { 62 return name; 63 } 64 for (const std::string &dir : searchPath) { 65 llvm::SmallString<128> path{dir}; 66 llvm::sys::path::append(path, name); 67 bool isDir{false}; 68 auto er = llvm::sys::fs::is_directory(path, isDir); 69 if (!er && !isDir) { 70 return path.str().str(); 71 } 72 } 73 return name; 74 } 75 76 std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) { 77 std::size_t wrote{0}; 78 char *buffer{buf.data()}; 79 char *p{buf.data()}; 80 std::size_t bytes = buf.size(); 81 while (bytes > 0) { 82 void *vp{static_cast<void *>(p)}; 83 void *crvp{std::memchr(vp, '\r', bytes)}; 84 char *crcp{static_cast<char *>(crvp)}; 85 if (!crcp) { 86 std::memmove(buffer + wrote, p, bytes); 87 wrote += bytes; 88 break; 89 } 90 std::size_t chunk = crcp - p; 91 auto advance{chunk + 1}; 92 if (chunk + 1 >= bytes || crcp[1] == '\n') { 93 // CR followed by LF or EOF: omit 94 } else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') { 95 // CR preceded by LF or BOF: omit 96 } else { 97 // CR in line: retain 98 ++chunk; 99 } 100 std::memmove(buffer + wrote, p, chunk); 101 wrote += chunk; 102 p += advance; 103 bytes -= advance; 104 } 105 return wrote; 106 } 107 108 bool SourceFile::Open(std::string path, llvm::raw_ostream &error) { 109 Close(); 110 path_ = path; 111 std::string errorPath{"'"s + path_ + "'"}; 112 auto bufOr{llvm::WritableMemoryBuffer::getFile(path)}; 113 if (!bufOr) { 114 auto err = bufOr.getError(); 115 error << "Could not open " << errorPath << ": " << err.message(); 116 return false; 117 } 118 buf_ = std::move(bufOr.get()); 119 ReadFile(); 120 return true; 121 } 122 123 bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) { 124 Close(); 125 path_ = "standard input"; 126 127 auto buf_or = llvm::MemoryBuffer::getSTDIN(); 128 if (!buf_or) { 129 auto err = buf_or.getError(); 130 error << err.message(); 131 return false; 132 } 133 auto inbuf = std::move(buf_or.get()); 134 buf_ = 135 llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize()); 136 llvm::copy(inbuf->getBuffer(), buf_->getBufferStart()); 137 ReadFile(); 138 return true; 139 } 140 141 void SourceFile::ReadFile() { 142 buf_end_ = RemoveCarriageReturns(buf_->getBuffer()); 143 if (content().size() == 0 || content().back() != '\n') { 144 // Don't bother to copy if we have spare memory 145 if (content().size() >= buf_->getBufferSize()) { 146 auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer( 147 content().size() + 1)}; 148 llvm::copy(content(), tmp_buf->getBufferStart()); 149 Close(); 150 buf_ = std::move(tmp_buf); 151 } 152 buf_end_++; 153 buf_->getBuffer()[buf_end_ - 1] = '\n'; 154 } 155 IdentifyPayload(); 156 RecordLineStarts(); 157 } 158 159 void SourceFile::Close() { 160 path_.clear(); 161 buf_.reset(); 162 } 163 164 SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const { 165 CHECK(at < bytes()); 166 167 auto it = llvm::upper_bound(lineStart_, at); 168 auto low = std::distance(lineStart_.begin(), it - 1); 169 return {*this, static_cast<int>(low + 1), 170 static_cast<int>(at - lineStart_[low] + 1)}; 171 } 172 } // namespace Fortran::parser 173