1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
26
27 enum spell_type
28 {
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
33 };
34
35 struct token_spelling
36 {
37 enum spell_type category;
38 const unsigned char *name;
39 };
40
41 static const unsigned char *const digraph_spellings[] =
42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
43
44 #define OP(e, s) { SPELL_OPERATOR, U s },
45 #define TK(e, s) { SPELL_ ## s, U #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
49
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59 unsigned int, enum cpp_ttype);
60 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61 static int name_p (cpp_reader *, const cpp_string *);
62 static tokenrun *next_tokenrun (tokenrun *);
63
64 static _cpp_buff *new_buff (size_t);
65
66
67 /* Utility routine:
68
69 Compares, the token TOKEN to the NUL-terminated string STRING.
70 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
71 int
cpp_ideq(const cpp_token * token,const char * string)72 cpp_ideq (const cpp_token *token, const char *string)
73 {
74 if (token->type != CPP_NAME)
75 return 0;
76
77 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
78 }
79
80 /* Record a note TYPE at byte POS into the current cleaned logical
81 line. */
82 static void
add_line_note(cpp_buffer * buffer,const uchar * pos,unsigned int type)83 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
84 {
85 if (buffer->notes_used == buffer->notes_cap)
86 {
87 buffer->notes_cap = buffer->notes_cap * 2 + 200;
88 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89 buffer->notes_cap);
90 }
91
92 buffer->notes[buffer->notes_used].pos = pos;
93 buffer->notes[buffer->notes_used].type = type;
94 buffer->notes_used++;
95 }
96
97 /* Returns with a logical line that contains no escaped newlines or
98 trigraphs. This is a time-critical inner loop. */
99 void
_cpp_clean_line(cpp_reader * pfile)100 _cpp_clean_line (cpp_reader *pfile)
101 {
102 cpp_buffer *buffer;
103 const uchar *s;
104 uchar c, *d, *p;
105
106 buffer = pfile->buffer;
107 buffer->cur_note = buffer->notes_used = 0;
108 buffer->cur = buffer->line_base = buffer->next_line;
109 buffer->need_line = false;
110 s = buffer->next_line - 1;
111
112 if (!buffer->from_stage3)
113 {
114 const uchar *pbackslash = NULL;
115
116 /* Short circuit for the common case of an un-escaped line with
117 no trigraphs. The primary win here is by not writing any
118 data back to memory until we have to. */
119 for (;;)
120 {
121 c = *++s;
122 if (__builtin_expect (c == '\n', false)
123 || __builtin_expect (c == '\r', false))
124 {
125 d = (uchar *) s;
126
127 if (__builtin_expect (s == buffer->rlimit, false))
128 goto done;
129
130 /* DOS line ending? */
131 if (__builtin_expect (c == '\r', false)
132 && s[1] == '\n')
133 {
134 s++;
135 if (s == buffer->rlimit)
136 goto done;
137 }
138
139 if (__builtin_expect (pbackslash == NULL, true))
140 goto done;
141
142 /* Check for escaped newline. */
143 p = d;
144 while (is_nvspace (p[-1]))
145 p--;
146 if (p - 1 != pbackslash)
147 goto done;
148
149 /* Have an escaped newline; process it and proceed to
150 the slow path. */
151 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
152 d = p - 2;
153 buffer->next_line = p - 1;
154 break;
155 }
156 if (__builtin_expect (c == '\\', false))
157 pbackslash = s;
158 else if (__builtin_expect (c == '?', false)
159 && __builtin_expect (s[1] == '?', false)
160 && _cpp_trigraph_map[s[2]])
161 {
162 /* Have a trigraph. We may or may not have to convert
163 it. Add a line note regardless, for -Wtrigraphs. */
164 add_line_note (buffer, s, s[2]);
165 if (CPP_OPTION (pfile, trigraphs))
166 {
167 /* We do, and that means we have to switch to the
168 slow path. */
169 d = (uchar *) s;
170 *d = _cpp_trigraph_map[s[2]];
171 s += 2;
172 break;
173 }
174 }
175 }
176
177
178 for (;;)
179 {
180 c = *++s;
181 *++d = c;
182
183 if (c == '\n' || c == '\r')
184 {
185 /* Handle DOS line endings. */
186 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
187 s++;
188 if (s == buffer->rlimit)
189 break;
190
191 /* Escaped? */
192 p = d;
193 while (p != buffer->next_line && is_nvspace (p[-1]))
194 p--;
195 if (p == buffer->next_line || p[-1] != '\\')
196 break;
197
198 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
199 d = p - 2;
200 buffer->next_line = p - 1;
201 }
202 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
203 {
204 /* Add a note regardless, for the benefit of -Wtrigraphs. */
205 add_line_note (buffer, d, s[2]);
206 if (CPP_OPTION (pfile, trigraphs))
207 {
208 *d = _cpp_trigraph_map[s[2]];
209 s += 2;
210 }
211 }
212 }
213 }
214 else
215 {
216 do
217 s++;
218 while (*s != '\n' && *s != '\r');
219 d = (uchar *) s;
220
221 /* Handle DOS line endings. */
222 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
223 s++;
224 }
225
226 done:
227 *d = '\n';
228 /* A sentinel note that should never be processed. */
229 add_line_note (buffer, d + 1, '\n');
230 buffer->next_line = s + 1;
231 }
232
233 /* Return true if the trigraph indicated by NOTE should be warned
234 about in a comment. */
235 static bool
warn_in_comment(cpp_reader * pfile,_cpp_line_note * note)236 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
237 {
238 const uchar *p;
239
240 /* Within comments we don't warn about trigraphs, unless the
241 trigraph forms an escaped newline, as that may change
242 behavior. */
243 if (note->type != '/')
244 return false;
245
246 /* If -trigraphs, then this was an escaped newline iff the next note
247 is coincident. */
248 if (CPP_OPTION (pfile, trigraphs))
249 return note[1].pos == note->pos;
250
251 /* Otherwise, see if this forms an escaped newline. */
252 p = note->pos + 3;
253 while (is_nvspace (*p))
254 p++;
255
256 /* There might have been escaped newlines between the trigraph and the
257 newline we found. Hence the position test. */
258 return (*p == '\n' && p < note[1].pos);
259 }
260
261 /* Process the notes created by add_line_note as far as the current
262 location. */
263 void
_cpp_process_line_notes(cpp_reader * pfile,int in_comment)264 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
265 {
266 cpp_buffer *buffer = pfile->buffer;
267
268 for (;;)
269 {
270 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
271 unsigned int col;
272
273 if (note->pos > buffer->cur)
274 break;
275
276 buffer->cur_note++;
277 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
278
279 if (note->type == '\\' || note->type == ' ')
280 {
281 if (note->type == ' ' && !in_comment)
282 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
283 "backslash and newline separated by space");
284
285 if (buffer->next_line > buffer->rlimit)
286 {
287 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
288 "backslash-newline at end of file");
289 /* Prevent "no newline at end of file" warning. */
290 buffer->next_line = buffer->rlimit;
291 }
292
293 buffer->line_base = note->pos;
294 CPP_INCREMENT_LINE (pfile, 0);
295 }
296 else if (_cpp_trigraph_map[note->type])
297 {
298 if (CPP_OPTION (pfile, warn_trigraphs)
299 && (!in_comment || warn_in_comment (pfile, note)))
300 {
301 if (CPP_OPTION (pfile, trigraphs))
302 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
303 "trigraph ??%c converted to %c",
304 note->type,
305 (int) _cpp_trigraph_map[note->type]);
306 else
307 {
308 cpp_error_with_line
309 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
310 "trigraph ??%c ignored, use -trigraphs to enable",
311 note->type);
312 }
313 }
314 }
315 else
316 abort ();
317 }
318 }
319
320 /* Skip a C-style block comment. We find the end of the comment by
321 seeing if an asterisk is before every '/' we encounter. Returns
322 nonzero if comment terminated by EOF, zero otherwise.
323
324 Buffer->cur points to the initial asterisk of the comment. */
325 bool
_cpp_skip_block_comment(cpp_reader * pfile)326 _cpp_skip_block_comment (cpp_reader *pfile)
327 {
328 cpp_buffer *buffer = pfile->buffer;
329 const uchar *cur = buffer->cur;
330 uchar c;
331
332 cur++;
333 if (*cur == '/')
334 cur++;
335
336 for (;;)
337 {
338 /* People like decorating comments with '*', so check for '/'
339 instead for efficiency. */
340 c = *cur++;
341
342 if (c == '/')
343 {
344 if (cur[-2] == '*')
345 break;
346
347 /* Warn about potential nested comments, but not if the '/'
348 comes immediately before the true comment delimiter.
349 Don't bother to get it right across escaped newlines. */
350 if (CPP_OPTION (pfile, warn_comments)
351 && cur[0] == '*' && cur[1] != '/')
352 {
353 buffer->cur = cur;
354 cpp_error_with_line (pfile, CPP_DL_WARNING,
355 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
356 "\"/*\" within comment");
357 }
358 }
359 else if (c == '\n')
360 {
361 unsigned int cols;
362 buffer->cur = cur - 1;
363 _cpp_process_line_notes (pfile, true);
364 if (buffer->next_line >= buffer->rlimit)
365 return true;
366 _cpp_clean_line (pfile);
367
368 cols = buffer->next_line - buffer->line_base;
369 CPP_INCREMENT_LINE (pfile, cols);
370
371 cur = buffer->cur;
372 }
373 }
374
375 buffer->cur = cur;
376 _cpp_process_line_notes (pfile, true);
377 return false;
378 }
379
380 /* Skip a C++ line comment, leaving buffer->cur pointing to the
381 terminating newline. Handles escaped newlines. Returns nonzero
382 if a multiline comment. */
383 static int
skip_line_comment(cpp_reader * pfile)384 skip_line_comment (cpp_reader *pfile)
385 {
386 cpp_buffer *buffer = pfile->buffer;
387 unsigned int orig_line = pfile->line_table->highest_line;
388
389 while (*buffer->cur != '\n')
390 buffer->cur++;
391
392 _cpp_process_line_notes (pfile, true);
393 return orig_line != pfile->line_table->highest_line;
394 }
395
396 /* Skips whitespace, saving the next non-whitespace character. */
397 static void
skip_whitespace(cpp_reader * pfile,cppchar_t c)398 skip_whitespace (cpp_reader *pfile, cppchar_t c)
399 {
400 cpp_buffer *buffer = pfile->buffer;
401 bool saw_NUL = false;
402
403 do
404 {
405 /* Horizontal space always OK. */
406 if (c == ' ' || c == '\t')
407 ;
408 /* Just \f \v or \0 left. */
409 else if (c == '\0')
410 saw_NUL = true;
411 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
412 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
413 CPP_BUF_COL (buffer),
414 "%s in preprocessing directive",
415 c == '\f' ? "form feed" : "vertical tab");
416
417 c = *buffer->cur++;
418 }
419 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
420 while (is_nvspace (c));
421
422 if (saw_NUL)
423 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
424
425 buffer->cur--;
426 }
427
428 /* See if the characters of a number token are valid in a name (no
429 '.', '+' or '-'). */
430 static int
name_p(cpp_reader * pfile,const cpp_string * string)431 name_p (cpp_reader *pfile, const cpp_string *string)
432 {
433 unsigned int i;
434
435 for (i = 0; i < string->len; i++)
436 if (!is_idchar (string->text[i]))
437 return 0;
438
439 return 1;
440 }
441
442 /* After parsing an identifier or other sequence, produce a warning about
443 sequences not in NFC/NFKC. */
444 static void
warn_about_normalization(cpp_reader * pfile,const cpp_token * token,const struct normalize_state * s)445 warn_about_normalization (cpp_reader *pfile,
446 const cpp_token *token,
447 const struct normalize_state *s)
448 {
449 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
450 && !pfile->state.skipping)
451 {
452 /* Make sure that the token is printed using UCNs, even
453 if we'd otherwise happily print UTF-8. */
454 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
455 size_t sz;
456
457 sz = cpp_spell_token (pfile, token, buf, false) - buf;
458 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
459 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
460 "`%.*s' is not in NFKC", (int) sz, buf);
461 else
462 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
463 "`%.*s' is not in NFC", (int) sz, buf);
464 }
465 }
466
467 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
468 an identifier. FIRST is TRUE if this starts an identifier. */
469 static bool
forms_identifier_p(cpp_reader * pfile,int first,struct normalize_state * state)470 forms_identifier_p (cpp_reader *pfile, int first,
471 struct normalize_state *state)
472 {
473 cpp_buffer *buffer = pfile->buffer;
474
475 if (*buffer->cur == '$')
476 {
477 if (!CPP_OPTION (pfile, dollars_in_ident))
478 return false;
479
480 buffer->cur++;
481 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
482 {
483 CPP_OPTION (pfile, warn_dollars) = 0;
484 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
485 }
486
487 return true;
488 }
489
490 /* Is this a syntactically valid UCN? */
491 if (CPP_OPTION (pfile, extended_identifiers)
492 && *buffer->cur == '\\'
493 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
494 {
495 buffer->cur += 2;
496 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
497 state))
498 return true;
499 buffer->cur -= 2;
500 }
501
502 return false;
503 }
504
505 /* Lex an identifier starting at BUFFER->CUR - 1. */
506 static cpp_hashnode *
lex_identifier(cpp_reader * pfile,const uchar * base,bool starts_ucn,struct normalize_state * nst)507 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
508 struct normalize_state *nst)
509 {
510 cpp_hashnode *result;
511 const uchar *cur;
512 unsigned int len;
513 unsigned int hash = HT_HASHSTEP (0, *base);
514
515 cur = pfile->buffer->cur;
516 if (! starts_ucn)
517 while (ISIDNUM (*cur))
518 {
519 hash = HT_HASHSTEP (hash, *cur);
520 cur++;
521 }
522 pfile->buffer->cur = cur;
523 if (starts_ucn || forms_identifier_p (pfile, false, nst))
524 {
525 /* Slower version for identifiers containing UCNs (or $). */
526 do {
527 while (ISIDNUM (*pfile->buffer->cur))
528 {
529 pfile->buffer->cur++;
530 NORMALIZE_STATE_UPDATE_IDNUM (nst);
531 }
532 } while (forms_identifier_p (pfile, false, nst));
533 result = _cpp_interpret_identifier (pfile, base,
534 pfile->buffer->cur - base);
535 }
536 else
537 {
538 len = cur - base;
539 hash = HT_HASHFINISH (hash, len);
540
541 result = (cpp_hashnode *)
542 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
543 }
544
545 /* Rarely, identifiers require diagnostics when lexed. */
546 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
547 && !pfile->state.skipping, 0))
548 {
549 /* It is allowed to poison the same identifier twice. */
550 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
551 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
552 NODE_NAME (result));
553
554 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
555 replacement list of a variadic macro. */
556 if (result == pfile->spec_nodes.n__VA_ARGS__
557 && !pfile->state.va_args_ok)
558 cpp_error (pfile, CPP_DL_PEDWARN,
559 "__VA_ARGS__ can only appear in the expansion"
560 " of a C99 variadic macro");
561 }
562
563 return result;
564 }
565
566 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
567 static void
lex_number(cpp_reader * pfile,cpp_string * number,struct normalize_state * nst)568 lex_number (cpp_reader *pfile, cpp_string *number,
569 struct normalize_state *nst)
570 {
571 const uchar *cur;
572 const uchar *base;
573 uchar *dest;
574
575 base = pfile->buffer->cur - 1;
576 do
577 {
578 cur = pfile->buffer->cur;
579
580 /* N.B. ISIDNUM does not include $. */
581 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
582 {
583 cur++;
584 NORMALIZE_STATE_UPDATE_IDNUM (nst);
585 }
586
587 pfile->buffer->cur = cur;
588 }
589 while (forms_identifier_p (pfile, false, nst));
590
591 number->len = cur - base;
592 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
593 memcpy (dest, base, number->len);
594 dest[number->len] = '\0';
595 number->text = dest;
596 }
597
598 /* Create a token of type TYPE with a literal spelling. */
599 static void
create_literal(cpp_reader * pfile,cpp_token * token,const uchar * base,unsigned int len,enum cpp_ttype type)600 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
601 unsigned int len, enum cpp_ttype type)
602 {
603 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
604
605 memcpy (dest, base, len);
606 dest[len] = '\0';
607 token->type = type;
608 token->val.str.len = len;
609 token->val.str.text = dest;
610 }
611
612 /* Lexes a string, character constant, or angle-bracketed header file
613 name. The stored string contains the spelling, including opening
614 quote and leading any leading 'L'. It returns the type of the
615 literal, or CPP_OTHER if it was not properly terminated.
616
617 The spelling is NUL-terminated, but it is not guaranteed that this
618 is the first NUL since embedded NULs are preserved. */
619 static void
lex_string(cpp_reader * pfile,cpp_token * token,const uchar * base)620 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
621 {
622 bool saw_NUL = false;
623 const uchar *cur;
624 cppchar_t terminator;
625 enum cpp_ttype type;
626
627 cur = base;
628 terminator = *cur++;
629 if (terminator == 'L')
630 terminator = *cur++;
631 if (terminator == '\"')
632 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
633 else if (terminator == '\'')
634 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
635 else
636 terminator = '>', type = CPP_HEADER_NAME;
637
638 for (;;)
639 {
640 cppchar_t c = *cur++;
641
642 /* In #include-style directives, terminators are not escapable. */
643 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
644 cur++;
645 else if (c == terminator)
646 break;
647 else if (c == '\n')
648 {
649 cur--;
650 type = CPP_OTHER;
651 break;
652 }
653 else if (c == '\0')
654 saw_NUL = true;
655 }
656
657 if (saw_NUL && !pfile->state.skipping)
658 cpp_error (pfile, CPP_DL_WARNING,
659 "null character(s) preserved in literal");
660
661 /* APPLE LOCAL begin #error with unmatched quotes 5607574 */
662 if (type == CPP_OTHER
663 && CPP_OPTION (pfile, lang) != CLK_ASM
664 && !pfile->state.in_diagnostic
665 && !pfile->state.skipping)
666 /* APPLE LOCAL end #error with unmatched quotes 5607574 */
667 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
668 (int) terminator);
669
670 pfile->buffer->cur = cur;
671 create_literal (pfile, token, base, cur - base, type);
672 }
673
674 /* The stored comment includes the comment start and any terminator. */
675 static void
save_comment(cpp_reader * pfile,cpp_token * token,const unsigned char * from,cppchar_t type)676 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
677 cppchar_t type)
678 {
679 unsigned char *buffer;
680 unsigned int len, clen;
681
682 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
683
684 /* C++ comments probably (not definitely) have moved past a new
685 line, which we don't want to save in the comment. */
686 if (is_vspace (pfile->buffer->cur[-1]))
687 len--;
688
689 /* If we are currently in a directive, then we need to store all
690 C++ comments as C comments internally, and so we need to
691 allocate a little extra space in that case.
692
693 Note that the only time we encounter a directive here is
694 when we are saving comments in a "#define". */
695 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
696
697 buffer = _cpp_unaligned_alloc (pfile, clen);
698
699 token->type = CPP_COMMENT;
700 token->val.str.len = clen;
701 token->val.str.text = buffer;
702
703 buffer[0] = '/';
704 memcpy (buffer + 1, from, len - 1);
705
706 /* Finish conversion to a C comment, if necessary. */
707 if (pfile->state.in_directive && type == '/')
708 {
709 buffer[1] = '*';
710 buffer[clen - 2] = '*';
711 buffer[clen - 1] = '/';
712 }
713 }
714
715 /* Allocate COUNT tokens for RUN. */
716 void
_cpp_init_tokenrun(tokenrun * run,unsigned int count)717 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
718 {
719 run->base = XNEWVEC (cpp_token, count);
720 run->limit = run->base + count;
721 run->next = NULL;
722 }
723
724 /* Returns the next tokenrun, or creates one if there is none. */
725 static tokenrun *
next_tokenrun(tokenrun * run)726 next_tokenrun (tokenrun *run)
727 {
728 if (run->next == NULL)
729 {
730 run->next = XNEW (tokenrun);
731 run->next->prev = run;
732 _cpp_init_tokenrun (run->next, 250);
733 }
734
735 return run->next;
736 }
737
738 /* Allocate a single token that is invalidated at the same time as the
739 rest of the tokens on the line. Has its line and col set to the
740 same as the last lexed token, so that diagnostics appear in the
741 right place. */
742 cpp_token *
_cpp_temp_token(cpp_reader * pfile)743 _cpp_temp_token (cpp_reader *pfile)
744 {
745 cpp_token *old, *result;
746
747 old = pfile->cur_token - 1;
748 if (pfile->cur_token == pfile->cur_run->limit)
749 {
750 pfile->cur_run = next_tokenrun (pfile->cur_run);
751 pfile->cur_token = pfile->cur_run->base;
752 }
753
754 result = pfile->cur_token++;
755 result->src_loc = old->src_loc;
756 return result;
757 }
758
759 /* Lex a token into RESULT (external interface). Takes care of issues
760 like directive handling, token lookahead, multiple include
761 optimization and skipping. */
762 const cpp_token *
_cpp_lex_token(cpp_reader * pfile)763 _cpp_lex_token (cpp_reader *pfile)
764 {
765 cpp_token *result;
766
767 for (;;)
768 {
769 if (pfile->cur_token == pfile->cur_run->limit)
770 {
771 pfile->cur_run = next_tokenrun (pfile->cur_run);
772 pfile->cur_token = pfile->cur_run->base;
773 }
774 /* We assume that the current token is somewhere in the current
775 run. */
776 if (pfile->cur_token < pfile->cur_run->base
777 || pfile->cur_token >= pfile->cur_run->limit)
778 abort ();
779
780 if (pfile->lookaheads)
781 {
782 pfile->lookaheads--;
783 result = pfile->cur_token++;
784 }
785 else
786 result = _cpp_lex_direct (pfile);
787
788 if (result->flags & BOL)
789 {
790 /* Is this a directive. If _cpp_handle_directive returns
791 false, it is an assembler #. */
792 if (result->type == CPP_HASH
793 /* 6.10.3 p 11: Directives in a list of macro arguments
794 gives undefined behavior. This implementation
795 handles the directive as normal. */
796 && pfile->state.parsing_args != 1)
797 {
798 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
799 {
800 if (pfile->directive_result.type == CPP_PADDING)
801 continue;
802 result = &pfile->directive_result;
803 }
804 }
805 else if (pfile->state.in_deferred_pragma)
806 result = &pfile->directive_result;
807
808 if (pfile->cb.line_change && !pfile->state.skipping)
809 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
810 }
811
812 /* We don't skip tokens in directives. */
813 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
814 break;
815
816 /* Outside a directive, invalidate controlling macros. At file
817 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
818 get here and MI optimization works. */
819 pfile->mi_valid = false;
820
821 if (!pfile->state.skipping || result->type == CPP_EOF)
822 break;
823 }
824
825 return result;
826 }
827
828 /* Returns true if a fresh line has been loaded. */
829 bool
_cpp_get_fresh_line(cpp_reader * pfile)830 _cpp_get_fresh_line (cpp_reader *pfile)
831 {
832 int return_at_eof;
833
834 /* We can't get a new line until we leave the current directive. */
835 if (pfile->state.in_directive)
836 return false;
837
838 for (;;)
839 {
840 cpp_buffer *buffer = pfile->buffer;
841
842 if (!buffer->need_line)
843 return true;
844
845 if (buffer->next_line < buffer->rlimit)
846 {
847 _cpp_clean_line (pfile);
848 return true;
849 }
850
851 /* First, get out of parsing arguments state. */
852 if (pfile->state.parsing_args)
853 return false;
854
855 /* End of buffer. Non-empty files should end in a newline. */
856 if (buffer->buf != buffer->rlimit
857 && buffer->next_line > buffer->rlimit
858 && !buffer->from_stage3)
859 {
860 /* Clip to buffer size. */
861 buffer->next_line = buffer->rlimit;
862 /* APPLE LOCAL begin suppress no newline warning. */
863 if ( CPP_OPTION (pfile, warn_newline_at_eof))
864 {
865 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
866 CPP_BUF_COLUMN (buffer, buffer->cur),
867 "no newline at end of file");
868 }
869 /* APPLE LOCAL end suppress no newline warning. */
870 }
871
872 return_at_eof = buffer->return_at_eof;
873 _cpp_pop_buffer (pfile);
874 if (pfile->buffer == NULL || return_at_eof)
875 return false;
876 }
877 }
878
879 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
880 do \
881 { \
882 result->type = ELSE_TYPE; \
883 if (*buffer->cur == CHAR) \
884 buffer->cur++, result->type = THEN_TYPE; \
885 } \
886 while (0)
887
888 /* Lex a token into pfile->cur_token, which is also incremented, to
889 get diagnostics pointing to the correct location.
890
891 Does not handle issues such as token lookahead, multiple-include
892 optimization, directives, skipping etc. This function is only
893 suitable for use by _cpp_lex_token, and in special cases like
894 lex_expansion_token which doesn't care for any of these issues.
895
896 When meeting a newline, returns CPP_EOF if parsing a directive,
897 otherwise returns to the start of the token buffer if permissible.
898 Returns the location of the lexed token. */
899 cpp_token *
_cpp_lex_direct(cpp_reader * pfile)900 _cpp_lex_direct (cpp_reader *pfile)
901 {
902 cppchar_t c;
903 cpp_buffer *buffer;
904 const unsigned char *comment_start;
905 cpp_token *result = pfile->cur_token++;
906
907 fresh_line:
908 result->flags = 0;
909 buffer = pfile->buffer;
910 if (buffer->need_line)
911 {
912 if (pfile->state.in_deferred_pragma)
913 {
914 result->type = CPP_PRAGMA_EOL;
915 pfile->state.in_deferred_pragma = false;
916 if (!pfile->state.pragma_allow_expansion)
917 pfile->state.prevent_expansion--;
918 return result;
919 }
920 if (!_cpp_get_fresh_line (pfile))
921 {
922 result->type = CPP_EOF;
923 if (!pfile->state.in_directive)
924 {
925 /* Tell the compiler the line number of the EOF token. */
926 result->src_loc = pfile->line_table->highest_line;
927 result->flags = BOL;
928 }
929 return result;
930 }
931 if (!pfile->keep_tokens)
932 {
933 pfile->cur_run = &pfile->base_run;
934 result = pfile->base_run.base;
935 pfile->cur_token = result + 1;
936 }
937 result->flags = BOL;
938 if (pfile->state.parsing_args == 2)
939 result->flags |= PREV_WHITE;
940 }
941 buffer = pfile->buffer;
942 update_tokens_line:
943 result->src_loc = pfile->line_table->highest_line;
944
945 skipped_white:
946 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
947 && !pfile->overlaid_buffer)
948 {
949 _cpp_process_line_notes (pfile, false);
950 result->src_loc = pfile->line_table->highest_line;
951 }
952 c = *buffer->cur++;
953
954 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
955 CPP_BUF_COLUMN (buffer, buffer->cur));
956
957 switch (c)
958 {
959 case ' ': case '\t': case '\f': case '\v': case '\0':
960 result->flags |= PREV_WHITE;
961 skip_whitespace (pfile, c);
962 goto skipped_white;
963
964 case '\n':
965 if (buffer->cur < buffer->rlimit)
966 CPP_INCREMENT_LINE (pfile, 0);
967 buffer->need_line = true;
968 goto fresh_line;
969
970 case '0': case '1': case '2': case '3': case '4':
971 case '5': case '6': case '7': case '8': case '9':
972 {
973 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
974 result->type = CPP_NUMBER;
975 lex_number (pfile, &result->val.str, &nst);
976 warn_about_normalization (pfile, result, &nst);
977 break;
978 }
979
980 case 'L':
981 /* 'L' may introduce wide characters or strings. */
982 if (*buffer->cur == '\'' || *buffer->cur == '"')
983 {
984 lex_string (pfile, result, buffer->cur - 1);
985 break;
986 }
987 /* Fall through. */
988
989 case '_':
990 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
991 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
992 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
993 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
994 case 'y': case 'z':
995 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
996 case 'G': case 'H': case 'I': case 'J': case 'K':
997 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
998 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
999 case 'Y': case 'Z':
1000 result->type = CPP_NAME;
1001 {
1002 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1003 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1004 &nst);
1005 warn_about_normalization (pfile, result, &nst);
1006 }
1007
1008 /* Convert named operators to their proper types. */
1009 if (result->val.node->flags & NODE_OPERATOR)
1010 {
1011 result->flags |= NAMED_OP;
1012 result->type = (enum cpp_ttype) result->val.node->directive_index;
1013 }
1014 break;
1015
1016 case '\'':
1017 case '"':
1018 lex_string (pfile, result, buffer->cur - 1);
1019 break;
1020
1021 case '/':
1022 /* A potential block or line comment. */
1023 comment_start = buffer->cur;
1024 c = *buffer->cur;
1025
1026 if (c == '*')
1027 {
1028 if (_cpp_skip_block_comment (pfile))
1029 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1030 }
1031 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1032 || cpp_in_system_header (pfile)))
1033 {
1034 /* Warn about comments only if pedantically GNUC89, and not
1035 in system headers. */
1036 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1037 && ! buffer->warned_cplusplus_comments)
1038 {
1039 cpp_error (pfile, CPP_DL_PEDWARN,
1040 "C++ style comments are not allowed in ISO C90");
1041 cpp_error (pfile, CPP_DL_PEDWARN,
1042 "(this will be reported only once per input file)");
1043 buffer->warned_cplusplus_comments = 1;
1044 }
1045
1046 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1047 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1048 }
1049 else if (c == '=')
1050 {
1051 buffer->cur++;
1052 result->type = CPP_DIV_EQ;
1053 break;
1054 }
1055 else
1056 {
1057 result->type = CPP_DIV;
1058 break;
1059 }
1060
1061 if (!pfile->state.save_comments)
1062 {
1063 result->flags |= PREV_WHITE;
1064 goto update_tokens_line;
1065 }
1066
1067 /* Save the comment as a token in its own right. */
1068 save_comment (pfile, result, comment_start, c);
1069 break;
1070
1071 case '<':
1072 if (pfile->state.angled_headers)
1073 {
1074 lex_string (pfile, result, buffer->cur - 1);
1075 break;
1076 }
1077
1078 result->type = CPP_LESS;
1079 if (*buffer->cur == '=')
1080 buffer->cur++, result->type = CPP_LESS_EQ;
1081 else if (*buffer->cur == '<')
1082 {
1083 buffer->cur++;
1084 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1085 }
1086 else if (CPP_OPTION (pfile, digraphs))
1087 {
1088 if (*buffer->cur == ':')
1089 {
1090 buffer->cur++;
1091 result->flags |= DIGRAPH;
1092 result->type = CPP_OPEN_SQUARE;
1093 }
1094 else if (*buffer->cur == '%')
1095 {
1096 buffer->cur++;
1097 result->flags |= DIGRAPH;
1098 result->type = CPP_OPEN_BRACE;
1099 }
1100 }
1101 break;
1102
1103 case '>':
1104 result->type = CPP_GREATER;
1105 if (*buffer->cur == '=')
1106 buffer->cur++, result->type = CPP_GREATER_EQ;
1107 else if (*buffer->cur == '>')
1108 {
1109 buffer->cur++;
1110 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1111 }
1112 break;
1113
1114 case '%':
1115 result->type = CPP_MOD;
1116 if (*buffer->cur == '=')
1117 buffer->cur++, result->type = CPP_MOD_EQ;
1118 else if (CPP_OPTION (pfile, digraphs))
1119 {
1120 if (*buffer->cur == ':')
1121 {
1122 buffer->cur++;
1123 result->flags |= DIGRAPH;
1124 result->type = CPP_HASH;
1125 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1126 buffer->cur += 2, result->type = CPP_PASTE;
1127 }
1128 else if (*buffer->cur == '>')
1129 {
1130 buffer->cur++;
1131 result->flags |= DIGRAPH;
1132 result->type = CPP_CLOSE_BRACE;
1133 }
1134 }
1135 break;
1136
1137 case '.':
1138 result->type = CPP_DOT;
1139 if (ISDIGIT (*buffer->cur))
1140 {
1141 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1142 result->type = CPP_NUMBER;
1143 lex_number (pfile, &result->val.str, &nst);
1144 warn_about_normalization (pfile, result, &nst);
1145 }
1146 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1147 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1148 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1149 buffer->cur++, result->type = CPP_DOT_STAR;
1150 break;
1151
1152 case '+':
1153 result->type = CPP_PLUS;
1154 if (*buffer->cur == '+')
1155 buffer->cur++, result->type = CPP_PLUS_PLUS;
1156 else if (*buffer->cur == '=')
1157 buffer->cur++, result->type = CPP_PLUS_EQ;
1158 break;
1159
1160 case '-':
1161 result->type = CPP_MINUS;
1162 if (*buffer->cur == '>')
1163 {
1164 buffer->cur++;
1165 result->type = CPP_DEREF;
1166 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1167 buffer->cur++, result->type = CPP_DEREF_STAR;
1168 }
1169 else if (*buffer->cur == '-')
1170 buffer->cur++, result->type = CPP_MINUS_MINUS;
1171 else if (*buffer->cur == '=')
1172 buffer->cur++, result->type = CPP_MINUS_EQ;
1173 break;
1174
1175 case '&':
1176 result->type = CPP_AND;
1177 if (*buffer->cur == '&')
1178 buffer->cur++, result->type = CPP_AND_AND;
1179 else if (*buffer->cur == '=')
1180 buffer->cur++, result->type = CPP_AND_EQ;
1181 break;
1182
1183 case '|':
1184 result->type = CPP_OR;
1185 if (*buffer->cur == '|')
1186 buffer->cur++, result->type = CPP_OR_OR;
1187 else if (*buffer->cur == '=')
1188 buffer->cur++, result->type = CPP_OR_EQ;
1189 break;
1190
1191 case ':':
1192 result->type = CPP_COLON;
1193 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1194 buffer->cur++, result->type = CPP_SCOPE;
1195 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1196 {
1197 buffer->cur++;
1198 result->flags |= DIGRAPH;
1199 result->type = CPP_CLOSE_SQUARE;
1200 }
1201 break;
1202
1203 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1204 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1205 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1206 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1207 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1208
1209 case '?': result->type = CPP_QUERY; break;
1210 case '~': result->type = CPP_COMPL; break;
1211 case ',': result->type = CPP_COMMA; break;
1212 case '(': result->type = CPP_OPEN_PAREN; break;
1213 case ')': result->type = CPP_CLOSE_PAREN; break;
1214 case '[': result->type = CPP_OPEN_SQUARE; break;
1215 case ']': result->type = CPP_CLOSE_SQUARE; break;
1216 case '{': result->type = CPP_OPEN_BRACE; break;
1217 case '}': result->type = CPP_CLOSE_BRACE; break;
1218 case ';': result->type = CPP_SEMICOLON; break;
1219
1220 /* @ is a punctuator in Objective-C. */
1221 case '@': result->type = CPP_ATSIGN; break;
1222
1223 case '$':
1224 case '\\':
1225 {
1226 const uchar *base = --buffer->cur;
1227 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1228
1229 if (forms_identifier_p (pfile, true, &nst))
1230 {
1231 result->type = CPP_NAME;
1232 result->val.node = lex_identifier (pfile, base, true, &nst);
1233 warn_about_normalization (pfile, result, &nst);
1234 break;
1235 }
1236 buffer->cur++;
1237 }
1238
1239 default:
1240 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1241 break;
1242 }
1243
1244 return result;
1245 }
1246
1247 /* An upper bound on the number of bytes needed to spell TOKEN.
1248 Does not include preceding whitespace. */
1249 unsigned int
cpp_token_len(const cpp_token * token)1250 cpp_token_len (const cpp_token *token)
1251 {
1252 unsigned int len;
1253
1254 switch (TOKEN_SPELL (token))
1255 {
1256 default: len = 4; break;
1257 case SPELL_LITERAL: len = token->val.str.len; break;
1258 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1259 }
1260
1261 return len;
1262 }
1263
1264 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1265 Return the number of bytes read out of NAME. (There are always
1266 10 bytes written to BUFFER.) */
1267
1268 static size_t
utf8_to_ucn(unsigned char * buffer,const unsigned char * name)1269 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1270 {
1271 int j;
1272 int ucn_len = 0;
1273 int ucn_len_c;
1274 unsigned t;
1275 unsigned long utf32;
1276
1277 /* Compute the length of the UTF-8 sequence. */
1278 for (t = *name; t & 0x80; t <<= 1)
1279 ucn_len++;
1280
1281 utf32 = *name & (0x7F >> ucn_len);
1282 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1283 {
1284 utf32 = (utf32 << 6) | (*++name & 0x3F);
1285
1286 /* Ill-formed UTF-8. */
1287 if ((*name & ~0x3F) != 0x80)
1288 abort ();
1289 }
1290
1291 *buffer++ = '\\';
1292 *buffer++ = 'U';
1293 for (j = 7; j >= 0; j--)
1294 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1295 return ucn_len;
1296 }
1297
1298
1299 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1300 already contain the enough space to hold the token's spelling.
1301 Returns a pointer to the character after the last character written.
1302 FORSTRING is true if this is to be the spelling after translation
1303 phase 1 (this is different for UCNs).
1304 FIXME: Would be nice if we didn't need the PFILE argument. */
1305 unsigned char *
cpp_spell_token(cpp_reader * pfile,const cpp_token * token,unsigned char * buffer,bool forstring)1306 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1307 unsigned char *buffer, bool forstring)
1308 {
1309 switch (TOKEN_SPELL (token))
1310 {
1311 case SPELL_OPERATOR:
1312 {
1313 const unsigned char *spelling;
1314 unsigned char c;
1315
1316 if (token->flags & DIGRAPH)
1317 spelling
1318 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1319 else if (token->flags & NAMED_OP)
1320 goto spell_ident;
1321 else
1322 spelling = TOKEN_NAME (token);
1323
1324 while ((c = *spelling++) != '\0')
1325 *buffer++ = c;
1326 }
1327 break;
1328
1329 spell_ident:
1330 case SPELL_IDENT:
1331 if (forstring)
1332 {
1333 memcpy (buffer, NODE_NAME (token->val.node),
1334 NODE_LEN (token->val.node));
1335 buffer += NODE_LEN (token->val.node);
1336 }
1337 else
1338 {
1339 size_t i;
1340 const unsigned char * name = NODE_NAME (token->val.node);
1341
1342 for (i = 0; i < NODE_LEN (token->val.node); i++)
1343 if (name[i] & ~0x7F)
1344 {
1345 i += utf8_to_ucn (buffer, name + i) - 1;
1346 buffer += 10;
1347 }
1348 else
1349 *buffer++ = NODE_NAME (token->val.node)[i];
1350 }
1351 break;
1352
1353 case SPELL_LITERAL:
1354 memcpy (buffer, token->val.str.text, token->val.str.len);
1355 buffer += token->val.str.len;
1356 break;
1357
1358 case SPELL_NONE:
1359 cpp_error (pfile, CPP_DL_ICE,
1360 "unspellable token %s", TOKEN_NAME (token));
1361 break;
1362 }
1363
1364 return buffer;
1365 }
1366
1367 /* Returns TOKEN spelt as a null-terminated string. The string is
1368 freed when the reader is destroyed. Useful for diagnostics. */
1369 unsigned char *
cpp_token_as_text(cpp_reader * pfile,const cpp_token * token)1370 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1371 {
1372 unsigned int len = cpp_token_len (token) + 1;
1373 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1374
1375 end = cpp_spell_token (pfile, token, start, false);
1376 end[0] = '\0';
1377
1378 return start;
1379 }
1380
1381 /* Used by C front ends, which really should move to using
1382 cpp_token_as_text. */
1383 const char *
cpp_type2name(enum cpp_ttype type)1384 cpp_type2name (enum cpp_ttype type)
1385 {
1386 return (const char *) token_spellings[type].name;
1387 }
1388
1389 /* Writes the spelling of token to FP, without any preceding space.
1390 Separated from cpp_spell_token for efficiency - to avoid stdio
1391 double-buffering. */
1392 void
cpp_output_token(const cpp_token * token,FILE * fp)1393 cpp_output_token (const cpp_token *token, FILE *fp)
1394 {
1395 switch (TOKEN_SPELL (token))
1396 {
1397 case SPELL_OPERATOR:
1398 {
1399 const unsigned char *spelling;
1400 int c;
1401
1402 if (token->flags & DIGRAPH)
1403 spelling
1404 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1405 else if (token->flags & NAMED_OP)
1406 goto spell_ident;
1407 else
1408 spelling = TOKEN_NAME (token);
1409
1410 c = *spelling;
1411 do
1412 putc (c, fp);
1413 while ((c = *++spelling) != '\0');
1414 }
1415 break;
1416
1417 spell_ident:
1418 case SPELL_IDENT:
1419 {
1420 size_t i;
1421 const unsigned char * name = NODE_NAME (token->val.node);
1422
1423 for (i = 0; i < NODE_LEN (token->val.node); i++)
1424 if (name[i] & ~0x7F)
1425 {
1426 unsigned char buffer[10];
1427 i += utf8_to_ucn (buffer, name + i) - 1;
1428 fwrite (buffer, 1, 10, fp);
1429 }
1430 else
1431 fputc (NODE_NAME (token->val.node)[i], fp);
1432 }
1433 break;
1434
1435 case SPELL_LITERAL:
1436 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1437 break;
1438
1439 case SPELL_NONE:
1440 /* An error, most probably. */
1441 break;
1442 }
1443 }
1444
1445 /* Compare two tokens. */
1446 int
_cpp_equiv_tokens(const cpp_token * a,const cpp_token * b)1447 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1448 {
1449 if (a->type == b->type && a->flags == b->flags)
1450 switch (TOKEN_SPELL (a))
1451 {
1452 default: /* Keep compiler happy. */
1453 case SPELL_OPERATOR:
1454 return 1;
1455 case SPELL_NONE:
1456 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1457 case SPELL_IDENT:
1458 return a->val.node == b->val.node;
1459 case SPELL_LITERAL:
1460 return (a->val.str.len == b->val.str.len
1461 && !memcmp (a->val.str.text, b->val.str.text,
1462 a->val.str.len));
1463 }
1464
1465 return 0;
1466 }
1467
1468 /* Returns nonzero if a space should be inserted to avoid an
1469 accidental token paste for output. For simplicity, it is
1470 conservative, and occasionally advises a space where one is not
1471 needed, e.g. "." and ".2". */
1472 int
cpp_avoid_paste(cpp_reader * pfile,const cpp_token * token1,const cpp_token * token2)1473 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1474 const cpp_token *token2)
1475 {
1476 enum cpp_ttype a = token1->type, b = token2->type;
1477 cppchar_t c;
1478
1479 if (token1->flags & NAMED_OP)
1480 a = CPP_NAME;
1481 if (token2->flags & NAMED_OP)
1482 b = CPP_NAME;
1483
1484 c = EOF;
1485 if (token2->flags & DIGRAPH)
1486 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1487 else if (token_spellings[b].category == SPELL_OPERATOR)
1488 c = token_spellings[b].name[0];
1489
1490 /* Quickly get everything that can paste with an '='. */
1491 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1492 return 1;
1493
1494 switch (a)
1495 {
1496 case CPP_GREATER: return c == '>';
1497 case CPP_LESS: return c == '<' || c == '%' || c == ':';
1498 case CPP_PLUS: return c == '+';
1499 case CPP_MINUS: return c == '-' || c == '>';
1500 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1501 case CPP_MOD: return c == ':' || c == '>';
1502 case CPP_AND: return c == '&';
1503 case CPP_OR: return c == '|';
1504 case CPP_COLON: return c == ':' || c == '>';
1505 case CPP_DEREF: return c == '*';
1506 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1507 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1508 case CPP_NAME: return ((b == CPP_NUMBER
1509 && name_p (pfile, &token2->val.str))
1510 || b == CPP_NAME
1511 || b == CPP_CHAR || b == CPP_STRING); /* L */
1512 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1513 || c == '.' || c == '+' || c == '-');
1514 /* UCNs */
1515 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1516 && b == CPP_NAME)
1517 || (CPP_OPTION (pfile, objc)
1518 && token1->val.str.text[0] == '@'
1519 && (b == CPP_NAME || b == CPP_STRING)));
1520 default: break;
1521 }
1522
1523 return 0;
1524 }
1525
1526 /* Output all the remaining tokens on the current line, and a newline
1527 character, to FP. Leading whitespace is removed. If there are
1528 macros, special token padding is not performed. */
1529 void
cpp_output_line(cpp_reader * pfile,FILE * fp)1530 cpp_output_line (cpp_reader *pfile, FILE *fp)
1531 {
1532 const cpp_token *token;
1533
1534 token = cpp_get_token (pfile);
1535 while (token->type != CPP_EOF)
1536 {
1537 cpp_output_token (token, fp);
1538 token = cpp_get_token (pfile);
1539 if (token->flags & PREV_WHITE)
1540 putc (' ', fp);
1541 }
1542
1543 putc ('\n', fp);
1544 }
1545
1546 /* Memory buffers. Changing these three constants can have a dramatic
1547 effect on performance. The values here are reasonable defaults,
1548 but might be tuned. If you adjust them, be sure to test across a
1549 range of uses of cpplib, including heavy nested function-like macro
1550 expansion. Also check the change in peak memory usage (NJAMD is a
1551 good tool for this). */
1552 #define MIN_BUFF_SIZE 8000
1553 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1554 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1555 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1556
1557 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1558 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1559 #endif
1560
1561 /* Create a new allocation buffer. Place the control block at the end
1562 of the buffer, so that buffer overflows will cause immediate chaos. */
1563 static _cpp_buff *
new_buff(size_t len)1564 new_buff (size_t len)
1565 {
1566 _cpp_buff *result;
1567 unsigned char *base;
1568
1569 if (len < MIN_BUFF_SIZE)
1570 len = MIN_BUFF_SIZE;
1571 len = CPP_ALIGN (len);
1572
1573 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1574 result = (_cpp_buff *) (base + len);
1575 result->base = base;
1576 result->cur = base;
1577 result->limit = base + len;
1578 result->next = NULL;
1579 return result;
1580 }
1581
1582 /* Place a chain of unwanted allocation buffers on the free list. */
1583 void
_cpp_release_buff(cpp_reader * pfile,_cpp_buff * buff)1584 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1585 {
1586 _cpp_buff *end = buff;
1587
1588 while (end->next)
1589 end = end->next;
1590 end->next = pfile->free_buffs;
1591 pfile->free_buffs = buff;
1592 }
1593
1594 /* Return a free buffer of size at least MIN_SIZE. */
1595 _cpp_buff *
_cpp_get_buff(cpp_reader * pfile,size_t min_size)1596 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1597 {
1598 _cpp_buff *result, **p;
1599
1600 for (p = &pfile->free_buffs;; p = &(*p)->next)
1601 {
1602 size_t size;
1603
1604 if (*p == NULL)
1605 return new_buff (min_size);
1606 result = *p;
1607 size = result->limit - result->base;
1608 /* Return a buffer that's big enough, but don't waste one that's
1609 way too big. */
1610 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1611 break;
1612 }
1613
1614 *p = result->next;
1615 result->next = NULL;
1616 result->cur = result->base;
1617 return result;
1618 }
1619
1620 /* Creates a new buffer with enough space to hold the uncommitted
1621 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1622 the excess bytes to the new buffer. Chains the new buffer after
1623 BUFF, and returns the new buffer. */
1624 _cpp_buff *
_cpp_append_extend_buff(cpp_reader * pfile,_cpp_buff * buff,size_t min_extra)1625 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1626 {
1627 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1628 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1629
1630 buff->next = new_buff;
1631 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1632 return new_buff;
1633 }
1634
1635 /* Creates a new buffer with enough space to hold the uncommitted
1636 remaining bytes of the buffer pointed to by BUFF, and at least
1637 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1638 Chains the new buffer before the buffer pointed to by BUFF, and
1639 updates the pointer to point to the new buffer. */
1640 void
_cpp_extend_buff(cpp_reader * pfile,_cpp_buff ** pbuff,size_t min_extra)1641 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1642 {
1643 _cpp_buff *new_buff, *old_buff = *pbuff;
1644 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1645
1646 new_buff = _cpp_get_buff (pfile, size);
1647 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1648 new_buff->next = old_buff;
1649 *pbuff = new_buff;
1650 }
1651
1652 /* Free a chain of buffers starting at BUFF. */
1653 void
_cpp_free_buff(_cpp_buff * buff)1654 _cpp_free_buff (_cpp_buff *buff)
1655 {
1656 _cpp_buff *next;
1657
1658 for (; buff; buff = next)
1659 {
1660 next = buff->next;
1661 free (buff->base);
1662 }
1663 }
1664
1665 /* Allocate permanent, unaligned storage of length LEN. */
1666 unsigned char *
_cpp_unaligned_alloc(cpp_reader * pfile,size_t len)1667 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1668 {
1669 _cpp_buff *buff = pfile->u_buff;
1670 unsigned char *result = buff->cur;
1671
1672 if (len > (size_t) (buff->limit - result))
1673 {
1674 buff = _cpp_get_buff (pfile, len);
1675 buff->next = pfile->u_buff;
1676 pfile->u_buff = buff;
1677 result = buff->cur;
1678 }
1679
1680 buff->cur = result + len;
1681 return result;
1682 }
1683
1684 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1685 That buffer is used for growing allocations when saving macro
1686 replacement lists in a #define, and when parsing an answer to an
1687 assertion in #assert, #unassert or #if (and therefore possibly
1688 whilst expanding macros). It therefore must not be used by any
1689 code that they might call: specifically the lexer and the guts of
1690 the macro expander.
1691
1692 All existing other uses clearly fit this restriction: storing
1693 registered pragmas during initialization. */
1694 unsigned char *
_cpp_aligned_alloc(cpp_reader * pfile,size_t len)1695 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1696 {
1697 _cpp_buff *buff = pfile->a_buff;
1698 unsigned char *result = buff->cur;
1699
1700 if (len > (size_t) (buff->limit - result))
1701 {
1702 buff = _cpp_get_buff (pfile, len);
1703 buff->next = pfile->a_buff;
1704 pfile->a_buff = buff;
1705 result = buff->cur;
1706 }
1707
1708 buff->cur = result + len;
1709 return result;
1710 }
1711
1712 /* Say which field of TOK is in use. */
1713
1714 enum cpp_token_fld_kind
cpp_token_val_index(cpp_token * tok)1715 cpp_token_val_index (cpp_token *tok)
1716 {
1717 switch (TOKEN_SPELL (tok))
1718 {
1719 case SPELL_IDENT:
1720 return CPP_TOKEN_FLD_NODE;
1721 case SPELL_LITERAL:
1722 return CPP_TOKEN_FLD_STR;
1723 case SPELL_NONE:
1724 if (tok->type == CPP_MACRO_ARG)
1725 return CPP_TOKEN_FLD_ARG_NO;
1726 else if (tok->type == CPP_PADDING)
1727 return CPP_TOKEN_FLD_SOURCE;
1728 else if (tok->type == CPP_PRAGMA)
1729 return CPP_TOKEN_FLD_PRAGMA;
1730 /* else fall through */
1731 default:
1732 return CPP_TOKEN_FLD_NONE;
1733 }
1734 }
1735