1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2013 David Chisnall
5 * All rights reserved.
6 *
7 * This software was developed by SRI International and the University of
8 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
9 * ("CTSRD"), as part of the DARPA CRASH research programme.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * $FreeBSD$
33 */
34
35 #ifndef _INPUT_BUFFER_HH_
36 #define _INPUT_BUFFER_HH_
37 #include "util.hh"
38 #include <assert.h>
39 #include <stack>
40 #include <string>
41 #include <unordered_set>
42
43 namespace dtc
44 {
45
46 namespace {
47 struct expression;
48 typedef std::unique_ptr<expression> expression_ptr;
49 }
50
51 /**
52 * Class encapsulating the input file. Can be used as a const char*, but has
53 * range checking. Attempting to access anything out of range will return a 0
54 * byte. The input buffer can be cheaply copied, without copying the
55 * underlying memory, however it is the user's responsibility to ensure that
56 * such copies do not persist beyond the lifetime of the underlying memory.
57 *
58 * This also contains methods for reporting errors and for consuming the token
59 * stream.
60 */
61 class input_buffer
62 {
63 friend class text_input_buffer;
64 protected:
65 /**
66 * The buffer. This class doesn't own the buffer, but the
67 * mmap_input_buffer subclass does.
68 */
69 const char* buffer;
70 /**
71 * The size of the buffer.
72 */
73 int size;
74 private:
75 /**
76 * The current place in the buffer where we are reading. This class
77 * keeps a separate size, pointer, and cursor so that we can move
78 * forwards and backwards and still have checks that we haven't fallen
79 * off either end.
80 */
81 int cursor;
82 /**
83 * Private constructor. This is used to create input buffers that
84 * refer to the same memory, but have different cursors.
85 */
input_buffer(const char * b,int s,int c)86 input_buffer(const char* b, int s, int c) : buffer(b), size(s),
87 cursor(c) {}
88 public:
89 /**
90 * Returns the file name associated with this buffer.
91 */
filename() const92 virtual const std::string &filename() const
93 {
94 static std::string s;
95 return s;
96 }
97 static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path,
98 bool warn=true);
99 /**
100 * Skips all characters in the input until the specified character is
101 * encountered.
102 */
103 void skip_to(char);
104 /**
105 * Parses up to a specified character and returns the intervening
106 * characters as a string.
107 */
108 std::string parse_to(char);
109 /**
110 * Return whether all input has been consumed.
111 */
finished()112 bool finished() { return cursor >= size; }
113 /**
114 * Virtual destructor. Does nothing, but exists so that subclasses
115 * that own the memory can run cleanup code for deallocating it.
116 */
~input_buffer()117 virtual ~input_buffer() {};
118 /**
119 * Constructs an empty buffer.
120 */
input_buffer()121 input_buffer() : buffer(0), size(0), cursor(0) {}
122 /**
123 * Constructs a new buffer with a specified memory region and size.
124 */
input_buffer(const char * b,int s)125 input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){}
126 /**
127 * Returns a new input buffer referring into this input, clamped to the
128 * specified size. If the requested buffer would fall outside the
129 * range of this one, then it returns an empty buffer.
130 *
131 * The returned buffer shares the same underlying storage as the
132 * original. This is intended to be used for splitting up the various
133 * sections of a device tree blob. Requesting a size of 0 will give a
134 * buffer that extends to the end of the available memory.
135 */
136 input_buffer buffer_from_offset(int offset, int s=0);
137 /**
138 * Dereferencing operator, allows the buffer to be treated as a char*
139 * and dereferenced to give a character. This returns a null byte if
140 * the cursor is out of range.
141 */
operator *()142 inline char operator*()
143 {
144 if (cursor >= size) { return '\0'; }
145 if (cursor < 0) { return '\0'; }
146 return buffer[cursor];
147 }
148 /**
149 * Array subscripting operator, returns a character at the specified
150 * index offset from the current cursor. The offset may be negative,
151 * to reread characters that have already been read. If the current
152 * cursor plus offset is outside of the range, this returns a nul
153 * byte.
154 */
operator [](int offset)155 inline char operator[](int offset)
156 {
157 if (cursor + offset >= size) { return '\0'; }
158 if (cursor + offset < 0) { return '\0'; }
159 return buffer[cursor + offset];
160 }
161 /**
162 * Increments the cursor, iterating forward in the buffer.
163 */
operator ++()164 inline input_buffer &operator++()
165 {
166 cursor++;
167 return *this;
168 }
begin()169 const char *begin()
170 {
171 return buffer;
172 }
end()173 const char *end()
174 {
175 return buffer + size;
176 }
177 /**
178 * Consumes a character. Moves the cursor one character forward if the
179 * next character matches the argument, returning true. If the current
180 * character does not match the argument, returns false.
181 */
consume(char c)182 inline bool consume(char c)
183 {
184 if (*(*this) == c)
185 {
186 ++(*this);
187 return true;
188 }
189 return false;
190 }
191 /**
192 * Consumes a string. If the (null-terminated) string passed as the
193 * argument appears in the input, advances the cursor to the end and
194 * returns true. Returns false if the string does not appear at the
195 * current point in the input.
196 */
197 bool consume(const char *str);
198 /**
199 * Reads an integer in base 8, 10, or 16. Returns true and advances
200 * the cursor to the end of the integer if the cursor points to an
201 * integer, returns false and does not move the cursor otherwise.
202 *
203 * The parsed value is returned via the argument.
204 */
205 bool consume_integer(unsigned long long &outInt);
206 /**
207 * Reads an arithmetic expression (containing any of the normal C
208 * operators), evaluates it, and returns the result.
209 */
210 bool consume_integer_expression(unsigned long long &outInt);
211 /**
212 * Consumes two hex digits and return the resulting byte via the first
213 * argument. If the next two characters are hex digits, returns true
214 * and advances the cursor. If not, then returns false and leaves the
215 * cursor in place.
216 */
217 bool consume_hex_byte(uint8_t &outByte);
218 /**
219 * Template function that consumes a binary value in big-endian format
220 * from the input stream. Returns true and advances the cursor if
221 * there is a value of the correct size. This function assumes that
222 * all values must be natively aligned, and so advances the cursor to
223 * the correct alignment before reading.
224 */
225 template<typename T>
consume_binary(T & out)226 bool consume_binary(T &out)
227 {
228 int align = 0;
229 int type_size = sizeof(T);
230 if (cursor % type_size != 0)
231 {
232 align = type_size - (cursor % type_size);
233 }
234 if (size < cursor + align + type_size)
235 {
236 return false;
237 }
238 cursor += align;
239 assert(cursor % type_size == 0);
240 out = 0;
241 for (int i=0 ; i<type_size ; ++i)
242 {
243 if (size < cursor)
244 {
245 return false;
246 }
247 out <<= 8;
248 out |= (((T)buffer[cursor++]) & 0xff);
249 }
250 return true;
251 }
252 #ifndef NDEBUG
253 /**
254 * Dumps the current cursor value and the unconsumed values in the
255 * input buffer to the standard error. This method is intended solely
256 * for debugging.
257 */
258 void dump();
259 #endif
260 };
261 /**
262 * Explicit specialisation for reading a single byte.
263 */
264 template<>
consume_binary(uint8_t & out)265 inline bool input_buffer::consume_binary(uint8_t &out)
266 {
267 if (size < cursor + 1)
268 {
269 return false;
270 }
271 out = buffer[cursor++];
272 return true;
273 }
274
275 /**
276 * An input buffer subclass used for parsing DTS files. This manages a stack
277 * of input buffers to handle /input/ operations.
278 */
279 class text_input_buffer
280 {
281 std::unordered_set<std::string> defines;
282 /**
283 * The cursor is the input into the input stream where we are currently reading.
284 */
285 int cursor = 0;
286 /**
287 * The current stack of includes. The current input is always from the top
288 * of the stack.
289 */
290 std::stack<std::shared_ptr<input_buffer>> input_stack;
291 /**
292 *
293 */
294 const std::vector<std::string> include_paths;
295 /**
296 * Reads forward past any spaces. The DTS format is not whitespace
297 * sensitive and so we want to scan past whitespace when reading it.
298 */
299 void skip_spaces();
300 /**
301 * Returns the character immediately after the current one.
302 *
303 * This method does not look between files.
304 */
305 char peek();
306 /**
307 * If a /include/ token is encountered, then look up the corresponding
308 * input file, push it onto the input stack, and continue.
309 */
310 void handle_include();
311 /**
312 * The base directory for this file.
313 */
314 const std::string dir;
315 /**
316 * The file where dependencies should be output.
317 */
318 FILE *depfile;
319 public:
320 /**
321 * Construct a new text input buffer with the specified buffer as the start
322 * of parsing and the specified set of input paths for handling new
323 * inclusions.
324 */
text_input_buffer(std::unique_ptr<input_buffer> && b,std::unordered_set<std::string> && d,std::vector<std::string> && i,const std::string directory,FILE * deps)325 text_input_buffer(std::unique_ptr<input_buffer> &&b,
326 std::unordered_set<std::string> &&d,
327 std::vector<std::string> &&i,
328 const std::string directory,
329 FILE *deps)
330 : defines(d), include_paths(i), dir(directory), depfile(deps)
331 {
332 input_stack.push(std::move(b));
333 }
334 /**
335 * Skips all characters in the input until the specified character is
336 * encountered.
337 */
338 void skip_to(char);
339 /**
340 * Parse an expression. If `stopAtParen` is set, then only parse a number
341 * or a parenthetical expression, otherwise assume that either is the
342 * left-hand side of a binary expression and try to parse the right-hand
343 * side.
344 */
345 expression_ptr parse_expression(bool stopAtParen=false);
346 /**
347 * Parse a binary expression, having already parsed the right-hand side.
348 */
349 expression_ptr parse_binary_expression(expression_ptr lhs);
350 /**
351 * Return whether all input has been consumed.
352 */
finished()353 bool finished()
354 {
355 return input_stack.empty() ||
356 ((input_stack.size() == 1) && input_stack.top()->finished());
357 }
358 /**
359 * Dereferencing operator. Returns the current character in the top input buffer.
360 */
operator *()361 inline char operator*()
362 {
363 if (input_stack.empty())
364 {
365 return 0;
366 }
367 return *(*input_stack.top());
368 }
369 /**
370 * Increments the cursor, iterating forward in the buffer.
371 */
operator ++()372 inline text_input_buffer &operator++()
373 {
374 if (input_stack.empty())
375 {
376 return *this;
377 }
378 cursor++;
379 auto &top = *input_stack.top();
380 ++top;
381 if (top.finished())
382 {
383 input_stack.pop();
384 }
385 return *this;
386 }
387 /**
388 * Consumes a character. Moves the cursor one character forward if the
389 * next character matches the argument, returning true. If the current
390 * character does not match the argument, returns false.
391 */
consume(char c)392 inline bool consume(char c)
393 {
394 if (*(*this) == c)
395 {
396 ++(*this);
397 return true;
398 }
399 return false;
400 }
401 /**
402 * Consumes a string. If the (null-terminated) string passed as the
403 * argument appears in the input, advances the cursor to the end and
404 * returns true. Returns false if the string does not appear at the
405 * current point in the input.
406 *
407 * This method does not scan between files.
408 */
consume(const char * str)409 bool consume(const char *str)
410 {
411 if (input_stack.empty())
412 {
413 return false;
414 }
415 return input_stack.top()->consume(str);
416 }
417 /**
418 * Reads an integer in base 8, 10, or 16. Returns true and advances
419 * the cursor to the end of the integer if the cursor points to an
420 * integer, returns false and does not move the cursor otherwise.
421 *
422 * The parsed value is returned via the argument.
423 *
424 * This method does not scan between files.
425 */
consume_integer(unsigned long long & outInt)426 bool consume_integer(unsigned long long &outInt)
427 {
428 if (input_stack.empty())
429 {
430 return false;
431 }
432 return input_stack.top()->consume_integer(outInt);
433 }
434 /**
435 * Reads an arithmetic expression (containing any of the normal C
436 * operators), evaluates it, and returns the result.
437 */
438 bool consume_integer_expression(unsigned long long &outInt);
439 /**
440 * Consumes two hex digits and return the resulting byte via the first
441 * argument. If the next two characters are hex digits, returns true
442 * and advances the cursor. If not, then returns false and leaves the
443 * cursor in place.
444 *
445 * This method does not scan between files.
446 */
consume_hex_byte(uint8_t & outByte)447 bool consume_hex_byte(uint8_t &outByte)
448 {
449 if (input_stack.empty())
450 {
451 return false;
452 }
453 return input_stack.top()->consume_hex_byte(outByte);
454 }
455 /**
456 * Returns the longest string in the input buffer starting at the
457 * current cursor and composed entirely of characters that are valid in
458 * node names.
459 */
460 std::string parse_node_name();
461 /**
462 * Returns the longest string in the input buffer starting at the
463 * current cursor and composed entirely of characters that are valid in
464 * property names.
465 */
466 std::string parse_property_name();
467 /**
468 * Parses either a node or a property name. If is_property is true on
469 * entry, then only property names are parsed. If it is false, then it
470 * will be set, on return, to indicate whether the parsed name is only
471 * valid as a property.
472 */
473 std::string parse_node_or_property_name(bool &is_property);
474 /**
475 * Parses up to a specified character and returns the intervening
476 * characters as a string.
477 */
478 std::string parse_to(char);
479 /**
480 * Advances the cursor to the start of the next token, skipping
481 * comments and whitespace. If the cursor already points to the start
482 * of a token, then this function does nothing.
483 */
484 text_input_buffer &next_token();
485 /**
486 * Location in the source file. This should never be interpreted by
487 * anything other than error reporting functions of this class. It will
488 * eventually become something more complex than an `int`.
489 */
490 class source_location
491 {
492 friend class text_input_buffer;
493 /**
494 * The text buffer object that included `b`.
495 */
496 text_input_buffer &buffer;
497 /**
498 * The underlying buffer that contains this location.
499 */
500 std::shared_ptr<input_buffer> b;
501 /**
502 * The offset within the current buffer of the source location.
503 */
504 int cursor;
source_location(text_input_buffer & buf)505 source_location(text_input_buffer &buf)
506 : buffer(buf),
507 b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()),
508 cursor(b ? b->cursor : 0) {}
509 public:
510 /**
511 * Report an error at this location.
512 */
report_error(const char * msg)513 void report_error(const char *msg)
514 {
515 if (b)
516 {
517 buffer.parse_error(msg, *b, cursor);
518 }
519 else
520 {
521 buffer.parse_error(msg);
522 }
523 }
524 };
525 /**
526 * Returns the current source location.
527 */
location()528 source_location location()
529 {
530 return { *this };
531 }
532 /**
533 * Prints a message indicating the location of a parse error.
534 */
535 void parse_error(const char *msg);
536 /**
537 * Reads the contents of a binary file into `b`. The file name is assumed
538 * to be relative to one of the include paths.
539 *
540 * Returns true if the file exists and can be read, false otherwise.
541 */
542 bool read_binary_file(const std::string &filename, byte_buffer &b);
543 private:
544 /**
545 * Prints a message indicating the location of a parse error, given a
546 * specified location. This is used when input has already moved beyond
547 * the location that caused the failure.
548 */
549 void parse_error(const char *msg, input_buffer &b, int loc);
550 };
551
552 } // namespace dtc
553
554 #endif // !_INPUT_BUFFER_HH_
555