xref: /freebsd-12.1/contrib/gcclibs/libcpp/lex.c (revision e1b3bb53)
1 /* CPP Library - lexical analysis.
2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3    Contributed by Per Bothner, 1994-95.
4    Based on CCCP program by Paul Rubin, June 1986
5    Adapted to ANSI C, Richard Stallman, Jan 1987
6    Broken out to separate file, Zack Weinberg, Mar 2000
7 
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
26 
27 enum spell_type
28 {
29   SPELL_OPERATOR = 0,
30   SPELL_IDENT,
31   SPELL_LITERAL,
32   SPELL_NONE
33 };
34 
35 struct token_spelling
36 {
37   enum spell_type category;
38   const unsigned char *name;
39 };
40 
41 static const unsigned char *const digraph_spellings[] =
42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
43 
44 #define OP(e, s) { SPELL_OPERATOR, U s  },
45 #define TK(e, s) { SPELL_ ## s,    U #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
49 
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52 
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59 			    unsigned int, enum cpp_ttype);
60 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61 static int name_p (cpp_reader *, const cpp_string *);
62 static tokenrun *next_tokenrun (tokenrun *);
63 
64 static _cpp_buff *new_buff (size_t);
65 
66 
67 /* Utility routine:
68 
69    Compares, the token TOKEN to the NUL-terminated string STRING.
70    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
71 int
cpp_ideq(const cpp_token * token,const char * string)72 cpp_ideq (const cpp_token *token, const char *string)
73 {
74   if (token->type != CPP_NAME)
75     return 0;
76 
77   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
78 }
79 
80 /* Record a note TYPE at byte POS into the current cleaned logical
81    line.  */
82 static void
add_line_note(cpp_buffer * buffer,const uchar * pos,unsigned int type)83 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
84 {
85   if (buffer->notes_used == buffer->notes_cap)
86     {
87       buffer->notes_cap = buffer->notes_cap * 2 + 200;
88       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89                                   buffer->notes_cap);
90     }
91 
92   buffer->notes[buffer->notes_used].pos = pos;
93   buffer->notes[buffer->notes_used].type = type;
94   buffer->notes_used++;
95 }
96 
97 /* Returns with a logical line that contains no escaped newlines or
98    trigraphs.  This is a time-critical inner loop.  */
99 void
_cpp_clean_line(cpp_reader * pfile)100 _cpp_clean_line (cpp_reader *pfile)
101 {
102   cpp_buffer *buffer;
103   const uchar *s;
104   uchar c, *d, *p;
105 
106   buffer = pfile->buffer;
107   buffer->cur_note = buffer->notes_used = 0;
108   buffer->cur = buffer->line_base = buffer->next_line;
109   buffer->need_line = false;
110   s = buffer->next_line - 1;
111 
112   if (!buffer->from_stage3)
113     {
114       const uchar *pbackslash = NULL;
115 
116       /* Short circuit for the common case of an un-escaped line with
117 	 no trigraphs.  The primary win here is by not writing any
118 	 data back to memory until we have to.  */
119       for (;;)
120 	{
121 	  c = *++s;
122 	  if (__builtin_expect (c == '\n', false)
123 	      || __builtin_expect (c == '\r', false))
124 	    {
125 	      d = (uchar *) s;
126 
127 	      if (__builtin_expect (s == buffer->rlimit, false))
128 		goto done;
129 
130 	      /* DOS line ending? */
131 	      if (__builtin_expect (c == '\r', false)
132 		  && s[1] == '\n')
133 		{
134 		  s++;
135 		  if (s == buffer->rlimit)
136 		    goto done;
137 		}
138 
139 	      if (__builtin_expect (pbackslash == NULL, true))
140 		goto done;
141 
142 	      /* Check for escaped newline.  */
143 	      p = d;
144 	      while (is_nvspace (p[-1]))
145 		p--;
146 	      if (p - 1 != pbackslash)
147 		goto done;
148 
149 	      /* Have an escaped newline; process it and proceed to
150 		 the slow path.  */
151 	      add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
152 	      d = p - 2;
153 	      buffer->next_line = p - 1;
154 	      break;
155 	    }
156 	  if (__builtin_expect (c == '\\', false))
157 	    pbackslash = s;
158 	  else if (__builtin_expect (c == '?', false)
159 		   && __builtin_expect (s[1] == '?', false)
160 		   && _cpp_trigraph_map[s[2]])
161 	    {
162 	      /* Have a trigraph.  We may or may not have to convert
163 		 it.  Add a line note regardless, for -Wtrigraphs.  */
164 	      add_line_note (buffer, s, s[2]);
165 	      if (CPP_OPTION (pfile, trigraphs))
166 		{
167 		  /* We do, and that means we have to switch to the
168 		     slow path.  */
169 		  d = (uchar *) s;
170 		  *d = _cpp_trigraph_map[s[2]];
171 		  s += 2;
172 		  break;
173 		}
174 	    }
175 	}
176 
177 
178       for (;;)
179 	{
180 	  c = *++s;
181 	  *++d = c;
182 
183 	  if (c == '\n' || c == '\r')
184 	    {
185 		  /* Handle DOS line endings.  */
186 	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
187 		s++;
188 	      if (s == buffer->rlimit)
189 		break;
190 
191 	      /* Escaped?  */
192 	      p = d;
193 	      while (p != buffer->next_line && is_nvspace (p[-1]))
194 		p--;
195 	      if (p == buffer->next_line || p[-1] != '\\')
196 		break;
197 
198 	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
199 	      d = p - 2;
200 	      buffer->next_line = p - 1;
201 	    }
202 	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
203 	    {
204 	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
205 	      add_line_note (buffer, d, s[2]);
206 	      if (CPP_OPTION (pfile, trigraphs))
207 		{
208 		  *d = _cpp_trigraph_map[s[2]];
209 		  s += 2;
210 		}
211 	    }
212 	}
213     }
214   else
215     {
216       do
217 	s++;
218       while (*s != '\n' && *s != '\r');
219       d = (uchar *) s;
220 
221       /* Handle DOS line endings.  */
222       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
223 	s++;
224     }
225 
226  done:
227   *d = '\n';
228   /* A sentinel note that should never be processed.  */
229   add_line_note (buffer, d + 1, '\n');
230   buffer->next_line = s + 1;
231 }
232 
233 /* Return true if the trigraph indicated by NOTE should be warned
234    about in a comment.  */
235 static bool
warn_in_comment(cpp_reader * pfile,_cpp_line_note * note)236 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
237 {
238   const uchar *p;
239 
240   /* Within comments we don't warn about trigraphs, unless the
241      trigraph forms an escaped newline, as that may change
242      behavior.  */
243   if (note->type != '/')
244     return false;
245 
246   /* If -trigraphs, then this was an escaped newline iff the next note
247      is coincident.  */
248   if (CPP_OPTION (pfile, trigraphs))
249     return note[1].pos == note->pos;
250 
251   /* Otherwise, see if this forms an escaped newline.  */
252   p = note->pos + 3;
253   while (is_nvspace (*p))
254     p++;
255 
256   /* There might have been escaped newlines between the trigraph and the
257      newline we found.  Hence the position test.  */
258   return (*p == '\n' && p < note[1].pos);
259 }
260 
261 /* Process the notes created by add_line_note as far as the current
262    location.  */
263 void
_cpp_process_line_notes(cpp_reader * pfile,int in_comment)264 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
265 {
266   cpp_buffer *buffer = pfile->buffer;
267 
268   for (;;)
269     {
270       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
271       unsigned int col;
272 
273       if (note->pos > buffer->cur)
274 	break;
275 
276       buffer->cur_note++;
277       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
278 
279       if (note->type == '\\' || note->type == ' ')
280 	{
281 	  if (note->type == ' ' && !in_comment)
282 	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
283 				 "backslash and newline separated by space");
284 
285 	  if (buffer->next_line > buffer->rlimit)
286 	    {
287 	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
288 				   "backslash-newline at end of file");
289 	      /* Prevent "no newline at end of file" warning.  */
290 	      buffer->next_line = buffer->rlimit;
291 	    }
292 
293 	  buffer->line_base = note->pos;
294 	  CPP_INCREMENT_LINE (pfile, 0);
295 	}
296       else if (_cpp_trigraph_map[note->type])
297 	{
298 	  if (CPP_OPTION (pfile, warn_trigraphs)
299 	      && (!in_comment || warn_in_comment (pfile, note)))
300 	    {
301 	      if (CPP_OPTION (pfile, trigraphs))
302 		cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
303 				     "trigraph ??%c converted to %c",
304 				     note->type,
305 				     (int) _cpp_trigraph_map[note->type]);
306 	      else
307 		{
308 		  cpp_error_with_line
309 		    (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
310 		     "trigraph ??%c ignored, use -trigraphs to enable",
311 		     note->type);
312 		}
313 	    }
314 	}
315       else
316 	abort ();
317     }
318 }
319 
320 /* Skip a C-style block comment.  We find the end of the comment by
321    seeing if an asterisk is before every '/' we encounter.  Returns
322    nonzero if comment terminated by EOF, zero otherwise.
323 
324    Buffer->cur points to the initial asterisk of the comment.  */
325 bool
_cpp_skip_block_comment(cpp_reader * pfile)326 _cpp_skip_block_comment (cpp_reader *pfile)
327 {
328   cpp_buffer *buffer = pfile->buffer;
329   const uchar *cur = buffer->cur;
330   uchar c;
331 
332   cur++;
333   if (*cur == '/')
334     cur++;
335 
336   for (;;)
337     {
338       /* People like decorating comments with '*', so check for '/'
339 	 instead for efficiency.  */
340       c = *cur++;
341 
342       if (c == '/')
343 	{
344 	  if (cur[-2] == '*')
345 	    break;
346 
347 	  /* Warn about potential nested comments, but not if the '/'
348 	     comes immediately before the true comment delimiter.
349 	     Don't bother to get it right across escaped newlines.  */
350 	  if (CPP_OPTION (pfile, warn_comments)
351 	      && cur[0] == '*' && cur[1] != '/')
352 	    {
353 	      buffer->cur = cur;
354 	      cpp_error_with_line (pfile, CPP_DL_WARNING,
355 				   pfile->line_table->highest_line, CPP_BUF_COL (buffer),
356 				   "\"/*\" within comment");
357 	    }
358 	}
359       else if (c == '\n')
360 	{
361 	  unsigned int cols;
362 	  buffer->cur = cur - 1;
363 	  _cpp_process_line_notes (pfile, true);
364 	  if (buffer->next_line >= buffer->rlimit)
365 	    return true;
366 	  _cpp_clean_line (pfile);
367 
368 	  cols = buffer->next_line - buffer->line_base;
369 	  CPP_INCREMENT_LINE (pfile, cols);
370 
371 	  cur = buffer->cur;
372 	}
373     }
374 
375   buffer->cur = cur;
376   _cpp_process_line_notes (pfile, true);
377   return false;
378 }
379 
380 /* Skip a C++ line comment, leaving buffer->cur pointing to the
381    terminating newline.  Handles escaped newlines.  Returns nonzero
382    if a multiline comment.  */
383 static int
skip_line_comment(cpp_reader * pfile)384 skip_line_comment (cpp_reader *pfile)
385 {
386   cpp_buffer *buffer = pfile->buffer;
387   unsigned int orig_line = pfile->line_table->highest_line;
388 
389   while (*buffer->cur != '\n')
390     buffer->cur++;
391 
392   _cpp_process_line_notes (pfile, true);
393   return orig_line != pfile->line_table->highest_line;
394 }
395 
396 /* Skips whitespace, saving the next non-whitespace character.  */
397 static void
skip_whitespace(cpp_reader * pfile,cppchar_t c)398 skip_whitespace (cpp_reader *pfile, cppchar_t c)
399 {
400   cpp_buffer *buffer = pfile->buffer;
401   bool saw_NUL = false;
402 
403   do
404     {
405       /* Horizontal space always OK.  */
406       if (c == ' ' || c == '\t')
407 	;
408       /* Just \f \v or \0 left.  */
409       else if (c == '\0')
410 	saw_NUL = true;
411       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
412 	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
413 			     CPP_BUF_COL (buffer),
414 			     "%s in preprocessing directive",
415 			     c == '\f' ? "form feed" : "vertical tab");
416 
417       c = *buffer->cur++;
418     }
419   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
420   while (is_nvspace (c));
421 
422   if (saw_NUL)
423     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
424 
425   buffer->cur--;
426 }
427 
428 /* See if the characters of a number token are valid in a name (no
429    '.', '+' or '-').  */
430 static int
name_p(cpp_reader * pfile,const cpp_string * string)431 name_p (cpp_reader *pfile, const cpp_string *string)
432 {
433   unsigned int i;
434 
435   for (i = 0; i < string->len; i++)
436     if (!is_idchar (string->text[i]))
437       return 0;
438 
439   return 1;
440 }
441 
442 /* After parsing an identifier or other sequence, produce a warning about
443    sequences not in NFC/NFKC.  */
444 static void
warn_about_normalization(cpp_reader * pfile,const cpp_token * token,const struct normalize_state * s)445 warn_about_normalization (cpp_reader *pfile,
446 			  const cpp_token *token,
447 			  const struct normalize_state *s)
448 {
449   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
450       && !pfile->state.skipping)
451     {
452       /* Make sure that the token is printed using UCNs, even
453 	 if we'd otherwise happily print UTF-8.  */
454       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
455       size_t sz;
456 
457       sz = cpp_spell_token (pfile, token, buf, false) - buf;
458       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
459 	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
460 			     "`%.*s' is not in NFKC", (int) sz, buf);
461       else
462 	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
463 			     "`%.*s' is not in NFC", (int) sz, buf);
464     }
465 }
466 
467 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
468    an identifier.  FIRST is TRUE if this starts an identifier.  */
469 static bool
forms_identifier_p(cpp_reader * pfile,int first,struct normalize_state * state)470 forms_identifier_p (cpp_reader *pfile, int first,
471 		    struct normalize_state *state)
472 {
473   cpp_buffer *buffer = pfile->buffer;
474 
475   if (*buffer->cur == '$')
476     {
477       if (!CPP_OPTION (pfile, dollars_in_ident))
478 	return false;
479 
480       buffer->cur++;
481       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
482 	{
483 	  CPP_OPTION (pfile, warn_dollars) = 0;
484 	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
485 	}
486 
487       return true;
488     }
489 
490   /* Is this a syntactically valid UCN?  */
491   if (CPP_OPTION (pfile, extended_identifiers)
492       && *buffer->cur == '\\'
493       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
494     {
495       buffer->cur += 2;
496       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
497 			  state))
498 	return true;
499       buffer->cur -= 2;
500     }
501 
502   return false;
503 }
504 
505 /* Lex an identifier starting at BUFFER->CUR - 1.  */
506 static cpp_hashnode *
lex_identifier(cpp_reader * pfile,const uchar * base,bool starts_ucn,struct normalize_state * nst)507 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
508 		struct normalize_state *nst)
509 {
510   cpp_hashnode *result;
511   const uchar *cur;
512   unsigned int len;
513   unsigned int hash = HT_HASHSTEP (0, *base);
514 
515   cur = pfile->buffer->cur;
516   if (! starts_ucn)
517     while (ISIDNUM (*cur))
518       {
519 	hash = HT_HASHSTEP (hash, *cur);
520 	cur++;
521       }
522   pfile->buffer->cur = cur;
523   if (starts_ucn || forms_identifier_p (pfile, false, nst))
524     {
525       /* Slower version for identifiers containing UCNs (or $).  */
526       do {
527 	while (ISIDNUM (*pfile->buffer->cur))
528 	  {
529 	    pfile->buffer->cur++;
530 	    NORMALIZE_STATE_UPDATE_IDNUM (nst);
531 	  }
532       } while (forms_identifier_p (pfile, false, nst));
533       result = _cpp_interpret_identifier (pfile, base,
534 					  pfile->buffer->cur - base);
535     }
536   else
537     {
538       len = cur - base;
539       hash = HT_HASHFINISH (hash, len);
540 
541       result = (cpp_hashnode *)
542 	ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
543     }
544 
545   /* Rarely, identifiers require diagnostics when lexed.  */
546   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
547 			&& !pfile->state.skipping, 0))
548     {
549       /* It is allowed to poison the same identifier twice.  */
550       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
551 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
552 		   NODE_NAME (result));
553 
554       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
555 	 replacement list of a variadic macro.  */
556       if (result == pfile->spec_nodes.n__VA_ARGS__
557 	  && !pfile->state.va_args_ok)
558 	cpp_error (pfile, CPP_DL_PEDWARN,
559 		   "__VA_ARGS__ can only appear in the expansion"
560 		   " of a C99 variadic macro");
561     }
562 
563   return result;
564 }
565 
566 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
567 static void
lex_number(cpp_reader * pfile,cpp_string * number,struct normalize_state * nst)568 lex_number (cpp_reader *pfile, cpp_string *number,
569 	    struct normalize_state *nst)
570 {
571   const uchar *cur;
572   const uchar *base;
573   uchar *dest;
574 
575   base = pfile->buffer->cur - 1;
576   do
577     {
578       cur = pfile->buffer->cur;
579 
580       /* N.B. ISIDNUM does not include $.  */
581       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
582 	{
583 	  cur++;
584 	  NORMALIZE_STATE_UPDATE_IDNUM (nst);
585 	}
586 
587       pfile->buffer->cur = cur;
588     }
589   while (forms_identifier_p (pfile, false, nst));
590 
591   number->len = cur - base;
592   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
593   memcpy (dest, base, number->len);
594   dest[number->len] = '\0';
595   number->text = dest;
596 }
597 
598 /* Create a token of type TYPE with a literal spelling.  */
599 static void
create_literal(cpp_reader * pfile,cpp_token * token,const uchar * base,unsigned int len,enum cpp_ttype type)600 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
601 		unsigned int len, enum cpp_ttype type)
602 {
603   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
604 
605   memcpy (dest, base, len);
606   dest[len] = '\0';
607   token->type = type;
608   token->val.str.len = len;
609   token->val.str.text = dest;
610 }
611 
612 /* Lexes a string, character constant, or angle-bracketed header file
613    name.  The stored string contains the spelling, including opening
614    quote and leading any leading 'L'.  It returns the type of the
615    literal, or CPP_OTHER if it was not properly terminated.
616 
617    The spelling is NUL-terminated, but it is not guaranteed that this
618    is the first NUL since embedded NULs are preserved.  */
619 static void
lex_string(cpp_reader * pfile,cpp_token * token,const uchar * base)620 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
621 {
622   bool saw_NUL = false;
623   const uchar *cur;
624   cppchar_t terminator;
625   enum cpp_ttype type;
626 
627   cur = base;
628   terminator = *cur++;
629   if (terminator == 'L')
630     terminator = *cur++;
631   if (terminator == '\"')
632     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
633   else if (terminator == '\'')
634     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
635   else
636     terminator = '>', type = CPP_HEADER_NAME;
637 
638   for (;;)
639     {
640       cppchar_t c = *cur++;
641 
642       /* In #include-style directives, terminators are not escapable.  */
643       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
644 	cur++;
645       else if (c == terminator)
646 	break;
647       else if (c == '\n')
648 	{
649 	  cur--;
650 	  type = CPP_OTHER;
651 	  break;
652 	}
653       else if (c == '\0')
654 	saw_NUL = true;
655     }
656 
657   if (saw_NUL && !pfile->state.skipping)
658     cpp_error (pfile, CPP_DL_WARNING,
659 	       "null character(s) preserved in literal");
660 
661   /* APPLE LOCAL begin #error with unmatched quotes 5607574 */
662   if (type == CPP_OTHER
663       && CPP_OPTION (pfile, lang) != CLK_ASM
664       && !pfile->state.in_diagnostic
665       && !pfile->state.skipping)
666   /* APPLE LOCAL end #error with unmatched quotes 5607574 */
667     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
668 	       (int) terminator);
669 
670   pfile->buffer->cur = cur;
671   create_literal (pfile, token, base, cur - base, type);
672 }
673 
674 /* The stored comment includes the comment start and any terminator.  */
675 static void
save_comment(cpp_reader * pfile,cpp_token * token,const unsigned char * from,cppchar_t type)676 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
677 	      cppchar_t type)
678 {
679   unsigned char *buffer;
680   unsigned int len, clen;
681 
682   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
683 
684   /* C++ comments probably (not definitely) have moved past a new
685      line, which we don't want to save in the comment.  */
686   if (is_vspace (pfile->buffer->cur[-1]))
687     len--;
688 
689   /* If we are currently in a directive, then we need to store all
690      C++ comments as C comments internally, and so we need to
691      allocate a little extra space in that case.
692 
693      Note that the only time we encounter a directive here is
694      when we are saving comments in a "#define".  */
695   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
696 
697   buffer = _cpp_unaligned_alloc (pfile, clen);
698 
699   token->type = CPP_COMMENT;
700   token->val.str.len = clen;
701   token->val.str.text = buffer;
702 
703   buffer[0] = '/';
704   memcpy (buffer + 1, from, len - 1);
705 
706   /* Finish conversion to a C comment, if necessary.  */
707   if (pfile->state.in_directive && type == '/')
708     {
709       buffer[1] = '*';
710       buffer[clen - 2] = '*';
711       buffer[clen - 1] = '/';
712     }
713 }
714 
715 /* Allocate COUNT tokens for RUN.  */
716 void
_cpp_init_tokenrun(tokenrun * run,unsigned int count)717 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
718 {
719   run->base = XNEWVEC (cpp_token, count);
720   run->limit = run->base + count;
721   run->next = NULL;
722 }
723 
724 /* Returns the next tokenrun, or creates one if there is none.  */
725 static tokenrun *
next_tokenrun(tokenrun * run)726 next_tokenrun (tokenrun *run)
727 {
728   if (run->next == NULL)
729     {
730       run->next = XNEW (tokenrun);
731       run->next->prev = run;
732       _cpp_init_tokenrun (run->next, 250);
733     }
734 
735   return run->next;
736 }
737 
738 /* Allocate a single token that is invalidated at the same time as the
739    rest of the tokens on the line.  Has its line and col set to the
740    same as the last lexed token, so that diagnostics appear in the
741    right place.  */
742 cpp_token *
_cpp_temp_token(cpp_reader * pfile)743 _cpp_temp_token (cpp_reader *pfile)
744 {
745   cpp_token *old, *result;
746 
747   old = pfile->cur_token - 1;
748   if (pfile->cur_token == pfile->cur_run->limit)
749     {
750       pfile->cur_run = next_tokenrun (pfile->cur_run);
751       pfile->cur_token = pfile->cur_run->base;
752     }
753 
754   result = pfile->cur_token++;
755   result->src_loc = old->src_loc;
756   return result;
757 }
758 
759 /* Lex a token into RESULT (external interface).  Takes care of issues
760    like directive handling, token lookahead, multiple include
761    optimization and skipping.  */
762 const cpp_token *
_cpp_lex_token(cpp_reader * pfile)763 _cpp_lex_token (cpp_reader *pfile)
764 {
765   cpp_token *result;
766 
767   for (;;)
768     {
769       if (pfile->cur_token == pfile->cur_run->limit)
770 	{
771 	  pfile->cur_run = next_tokenrun (pfile->cur_run);
772 	  pfile->cur_token = pfile->cur_run->base;
773 	}
774       /* We assume that the current token is somewhere in the current
775 	 run.  */
776       if (pfile->cur_token < pfile->cur_run->base
777 	  || pfile->cur_token >= pfile->cur_run->limit)
778 	abort ();
779 
780       if (pfile->lookaheads)
781 	{
782 	  pfile->lookaheads--;
783 	  result = pfile->cur_token++;
784 	}
785       else
786 	result = _cpp_lex_direct (pfile);
787 
788       if (result->flags & BOL)
789 	{
790 	  /* Is this a directive.  If _cpp_handle_directive returns
791 	     false, it is an assembler #.  */
792 	  if (result->type == CPP_HASH
793 	      /* 6.10.3 p 11: Directives in a list of macro arguments
794 		 gives undefined behavior.  This implementation
795 		 handles the directive as normal.  */
796 	      && pfile->state.parsing_args != 1)
797 	    {
798 	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
799 		{
800 		  if (pfile->directive_result.type == CPP_PADDING)
801 		    continue;
802 		  result = &pfile->directive_result;
803 		}
804 	    }
805 	  else if (pfile->state.in_deferred_pragma)
806 	    result = &pfile->directive_result;
807 
808 	  if (pfile->cb.line_change && !pfile->state.skipping)
809 	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
810 	}
811 
812       /* We don't skip tokens in directives.  */
813       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
814 	break;
815 
816       /* Outside a directive, invalidate controlling macros.  At file
817 	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
818 	 get here and MI optimization works.  */
819       pfile->mi_valid = false;
820 
821       if (!pfile->state.skipping || result->type == CPP_EOF)
822 	break;
823     }
824 
825   return result;
826 }
827 
828 /* Returns true if a fresh line has been loaded.  */
829 bool
_cpp_get_fresh_line(cpp_reader * pfile)830 _cpp_get_fresh_line (cpp_reader *pfile)
831 {
832   int return_at_eof;
833 
834   /* We can't get a new line until we leave the current directive.  */
835   if (pfile->state.in_directive)
836     return false;
837 
838   for (;;)
839     {
840       cpp_buffer *buffer = pfile->buffer;
841 
842       if (!buffer->need_line)
843 	return true;
844 
845       if (buffer->next_line < buffer->rlimit)
846 	{
847 	  _cpp_clean_line (pfile);
848 	  return true;
849 	}
850 
851       /* First, get out of parsing arguments state.  */
852       if (pfile->state.parsing_args)
853 	return false;
854 
855       /* End of buffer.  Non-empty files should end in a newline.  */
856       if (buffer->buf != buffer->rlimit
857 	  && buffer->next_line > buffer->rlimit
858 	  && !buffer->from_stage3)
859 	{
860 	  /* Clip to buffer size.  */
861 	  buffer->next_line = buffer->rlimit;
862 	  /* APPLE LOCAL begin suppress no newline warning.  */
863 	  if ( CPP_OPTION (pfile, warn_newline_at_eof))
864 	    {
865 	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
866 				   CPP_BUF_COLUMN (buffer, buffer->cur),
867 				   "no newline at end of file");
868 	    }
869 	  /* APPLE LOCAL end suppress no newline warning.  */
870 	}
871 
872       return_at_eof = buffer->return_at_eof;
873       _cpp_pop_buffer (pfile);
874       if (pfile->buffer == NULL || return_at_eof)
875 	return false;
876     }
877 }
878 
879 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
880   do							\
881     {							\
882       result->type = ELSE_TYPE;				\
883       if (*buffer->cur == CHAR)				\
884 	buffer->cur++, result->type = THEN_TYPE;	\
885     }							\
886   while (0)
887 
888 /* Lex a token into pfile->cur_token, which is also incremented, to
889    get diagnostics pointing to the correct location.
890 
891    Does not handle issues such as token lookahead, multiple-include
892    optimization, directives, skipping etc.  This function is only
893    suitable for use by _cpp_lex_token, and in special cases like
894    lex_expansion_token which doesn't care for any of these issues.
895 
896    When meeting a newline, returns CPP_EOF if parsing a directive,
897    otherwise returns to the start of the token buffer if permissible.
898    Returns the location of the lexed token.  */
899 cpp_token *
_cpp_lex_direct(cpp_reader * pfile)900 _cpp_lex_direct (cpp_reader *pfile)
901 {
902   cppchar_t c;
903   cpp_buffer *buffer;
904   const unsigned char *comment_start;
905   cpp_token *result = pfile->cur_token++;
906 
907  fresh_line:
908   result->flags = 0;
909   buffer = pfile->buffer;
910   if (buffer->need_line)
911     {
912       if (pfile->state.in_deferred_pragma)
913 	{
914 	  result->type = CPP_PRAGMA_EOL;
915 	  pfile->state.in_deferred_pragma = false;
916 	  if (!pfile->state.pragma_allow_expansion)
917 	    pfile->state.prevent_expansion--;
918 	  return result;
919 	}
920       if (!_cpp_get_fresh_line (pfile))
921 	{
922 	  result->type = CPP_EOF;
923 	  if (!pfile->state.in_directive)
924 	    {
925 	      /* Tell the compiler the line number of the EOF token.  */
926 	      result->src_loc = pfile->line_table->highest_line;
927 	      result->flags = BOL;
928 	    }
929 	  return result;
930 	}
931       if (!pfile->keep_tokens)
932 	{
933 	  pfile->cur_run = &pfile->base_run;
934 	  result = pfile->base_run.base;
935 	  pfile->cur_token = result + 1;
936 	}
937       result->flags = BOL;
938       if (pfile->state.parsing_args == 2)
939 	result->flags |= PREV_WHITE;
940     }
941   buffer = pfile->buffer;
942  update_tokens_line:
943   result->src_loc = pfile->line_table->highest_line;
944 
945  skipped_white:
946   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
947       && !pfile->overlaid_buffer)
948     {
949       _cpp_process_line_notes (pfile, false);
950       result->src_loc = pfile->line_table->highest_line;
951     }
952   c = *buffer->cur++;
953 
954   LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
955 			       CPP_BUF_COLUMN (buffer, buffer->cur));
956 
957   switch (c)
958     {
959     case ' ': case '\t': case '\f': case '\v': case '\0':
960       result->flags |= PREV_WHITE;
961       skip_whitespace (pfile, c);
962       goto skipped_white;
963 
964     case '\n':
965       if (buffer->cur < buffer->rlimit)
966 	CPP_INCREMENT_LINE (pfile, 0);
967       buffer->need_line = true;
968       goto fresh_line;
969 
970     case '0': case '1': case '2': case '3': case '4':
971     case '5': case '6': case '7': case '8': case '9':
972       {
973 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
974 	result->type = CPP_NUMBER;
975 	lex_number (pfile, &result->val.str, &nst);
976 	warn_about_normalization (pfile, result, &nst);
977 	break;
978       }
979 
980     case 'L':
981       /* 'L' may introduce wide characters or strings.  */
982       if (*buffer->cur == '\'' || *buffer->cur == '"')
983 	{
984 	  lex_string (pfile, result, buffer->cur - 1);
985 	  break;
986 	}
987       /* Fall through.  */
988 
989     case '_':
990     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
991     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
992     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
993     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
994     case 'y': case 'z':
995     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
996     case 'G': case 'H': case 'I': case 'J': case 'K':
997     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
998     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
999     case 'Y': case 'Z':
1000       result->type = CPP_NAME;
1001       {
1002 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1003 	result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1004 					   &nst);
1005 	warn_about_normalization (pfile, result, &nst);
1006       }
1007 
1008       /* Convert named operators to their proper types.  */
1009       if (result->val.node->flags & NODE_OPERATOR)
1010 	{
1011 	  result->flags |= NAMED_OP;
1012 	  result->type = (enum cpp_ttype) result->val.node->directive_index;
1013 	}
1014       break;
1015 
1016     case '\'':
1017     case '"':
1018       lex_string (pfile, result, buffer->cur - 1);
1019       break;
1020 
1021     case '/':
1022       /* A potential block or line comment.  */
1023       comment_start = buffer->cur;
1024       c = *buffer->cur;
1025 
1026       if (c == '*')
1027 	{
1028 	  if (_cpp_skip_block_comment (pfile))
1029 	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1030 	}
1031       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1032 			    || cpp_in_system_header (pfile)))
1033 	{
1034 	  /* Warn about comments only if pedantically GNUC89, and not
1035 	     in system headers.  */
1036 	  if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1037 	      && ! buffer->warned_cplusplus_comments)
1038 	    {
1039 	      cpp_error (pfile, CPP_DL_PEDWARN,
1040 			 "C++ style comments are not allowed in ISO C90");
1041 	      cpp_error (pfile, CPP_DL_PEDWARN,
1042 			 "(this will be reported only once per input file)");
1043 	      buffer->warned_cplusplus_comments = 1;
1044 	    }
1045 
1046 	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1047 	    cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1048 	}
1049       else if (c == '=')
1050 	{
1051 	  buffer->cur++;
1052 	  result->type = CPP_DIV_EQ;
1053 	  break;
1054 	}
1055       else
1056 	{
1057 	  result->type = CPP_DIV;
1058 	  break;
1059 	}
1060 
1061       if (!pfile->state.save_comments)
1062 	{
1063 	  result->flags |= PREV_WHITE;
1064 	  goto update_tokens_line;
1065 	}
1066 
1067       /* Save the comment as a token in its own right.  */
1068       save_comment (pfile, result, comment_start, c);
1069       break;
1070 
1071     case '<':
1072       if (pfile->state.angled_headers)
1073 	{
1074 	  lex_string (pfile, result, buffer->cur - 1);
1075 	  break;
1076 	}
1077 
1078       result->type = CPP_LESS;
1079       if (*buffer->cur == '=')
1080 	buffer->cur++, result->type = CPP_LESS_EQ;
1081       else if (*buffer->cur == '<')
1082 	{
1083 	  buffer->cur++;
1084 	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1085 	}
1086       else if (CPP_OPTION (pfile, digraphs))
1087 	{
1088 	  if (*buffer->cur == ':')
1089 	    {
1090 	      buffer->cur++;
1091 	      result->flags |= DIGRAPH;
1092 	      result->type = CPP_OPEN_SQUARE;
1093 	    }
1094 	  else if (*buffer->cur == '%')
1095 	    {
1096 	      buffer->cur++;
1097 	      result->flags |= DIGRAPH;
1098 	      result->type = CPP_OPEN_BRACE;
1099 	    }
1100 	}
1101       break;
1102 
1103     case '>':
1104       result->type = CPP_GREATER;
1105       if (*buffer->cur == '=')
1106 	buffer->cur++, result->type = CPP_GREATER_EQ;
1107       else if (*buffer->cur == '>')
1108 	{
1109 	  buffer->cur++;
1110 	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1111 	}
1112       break;
1113 
1114     case '%':
1115       result->type = CPP_MOD;
1116       if (*buffer->cur == '=')
1117 	buffer->cur++, result->type = CPP_MOD_EQ;
1118       else if (CPP_OPTION (pfile, digraphs))
1119 	{
1120 	  if (*buffer->cur == ':')
1121 	    {
1122 	      buffer->cur++;
1123 	      result->flags |= DIGRAPH;
1124 	      result->type = CPP_HASH;
1125 	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
1126 		buffer->cur += 2, result->type = CPP_PASTE;
1127 	    }
1128 	  else if (*buffer->cur == '>')
1129 	    {
1130 	      buffer->cur++;
1131 	      result->flags |= DIGRAPH;
1132 	      result->type = CPP_CLOSE_BRACE;
1133 	    }
1134 	}
1135       break;
1136 
1137     case '.':
1138       result->type = CPP_DOT;
1139       if (ISDIGIT (*buffer->cur))
1140 	{
1141 	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1142 	  result->type = CPP_NUMBER;
1143 	  lex_number (pfile, &result->val.str, &nst);
1144 	  warn_about_normalization (pfile, result, &nst);
1145 	}
1146       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1147 	buffer->cur += 2, result->type = CPP_ELLIPSIS;
1148       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1149 	buffer->cur++, result->type = CPP_DOT_STAR;
1150       break;
1151 
1152     case '+':
1153       result->type = CPP_PLUS;
1154       if (*buffer->cur == '+')
1155 	buffer->cur++, result->type = CPP_PLUS_PLUS;
1156       else if (*buffer->cur == '=')
1157 	buffer->cur++, result->type = CPP_PLUS_EQ;
1158       break;
1159 
1160     case '-':
1161       result->type = CPP_MINUS;
1162       if (*buffer->cur == '>')
1163 	{
1164 	  buffer->cur++;
1165 	  result->type = CPP_DEREF;
1166 	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1167 	    buffer->cur++, result->type = CPP_DEREF_STAR;
1168 	}
1169       else if (*buffer->cur == '-')
1170 	buffer->cur++, result->type = CPP_MINUS_MINUS;
1171       else if (*buffer->cur == '=')
1172 	buffer->cur++, result->type = CPP_MINUS_EQ;
1173       break;
1174 
1175     case '&':
1176       result->type = CPP_AND;
1177       if (*buffer->cur == '&')
1178 	buffer->cur++, result->type = CPP_AND_AND;
1179       else if (*buffer->cur == '=')
1180 	buffer->cur++, result->type = CPP_AND_EQ;
1181       break;
1182 
1183     case '|':
1184       result->type = CPP_OR;
1185       if (*buffer->cur == '|')
1186 	buffer->cur++, result->type = CPP_OR_OR;
1187       else if (*buffer->cur == '=')
1188 	buffer->cur++, result->type = CPP_OR_EQ;
1189       break;
1190 
1191     case ':':
1192       result->type = CPP_COLON;
1193       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1194 	buffer->cur++, result->type = CPP_SCOPE;
1195       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1196 	{
1197 	  buffer->cur++;
1198 	  result->flags |= DIGRAPH;
1199 	  result->type = CPP_CLOSE_SQUARE;
1200 	}
1201       break;
1202 
1203     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1204     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1205     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1206     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1207     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1208 
1209     case '?': result->type = CPP_QUERY; break;
1210     case '~': result->type = CPP_COMPL; break;
1211     case ',': result->type = CPP_COMMA; break;
1212     case '(': result->type = CPP_OPEN_PAREN; break;
1213     case ')': result->type = CPP_CLOSE_PAREN; break;
1214     case '[': result->type = CPP_OPEN_SQUARE; break;
1215     case ']': result->type = CPP_CLOSE_SQUARE; break;
1216     case '{': result->type = CPP_OPEN_BRACE; break;
1217     case '}': result->type = CPP_CLOSE_BRACE; break;
1218     case ';': result->type = CPP_SEMICOLON; break;
1219 
1220       /* @ is a punctuator in Objective-C.  */
1221     case '@': result->type = CPP_ATSIGN; break;
1222 
1223     case '$':
1224     case '\\':
1225       {
1226 	const uchar *base = --buffer->cur;
1227 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1228 
1229 	if (forms_identifier_p (pfile, true, &nst))
1230 	  {
1231 	    result->type = CPP_NAME;
1232 	    result->val.node = lex_identifier (pfile, base, true, &nst);
1233 	    warn_about_normalization (pfile, result, &nst);
1234 	    break;
1235 	  }
1236 	buffer->cur++;
1237       }
1238 
1239     default:
1240       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1241       break;
1242     }
1243 
1244   return result;
1245 }
1246 
1247 /* An upper bound on the number of bytes needed to spell TOKEN.
1248    Does not include preceding whitespace.  */
1249 unsigned int
cpp_token_len(const cpp_token * token)1250 cpp_token_len (const cpp_token *token)
1251 {
1252   unsigned int len;
1253 
1254   switch (TOKEN_SPELL (token))
1255     {
1256     default:		len = 4;				break;
1257     case SPELL_LITERAL:	len = token->val.str.len;		break;
1258     case SPELL_IDENT:	len = NODE_LEN (token->val.node) * 10;	break;
1259     }
1260 
1261   return len;
1262 }
1263 
1264 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1265    Return the number of bytes read out of NAME.  (There are always
1266    10 bytes written to BUFFER.)  */
1267 
1268 static size_t
utf8_to_ucn(unsigned char * buffer,const unsigned char * name)1269 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1270 {
1271   int j;
1272   int ucn_len = 0;
1273   int ucn_len_c;
1274   unsigned t;
1275   unsigned long utf32;
1276 
1277   /* Compute the length of the UTF-8 sequence.  */
1278   for (t = *name; t & 0x80; t <<= 1)
1279     ucn_len++;
1280 
1281   utf32 = *name & (0x7F >> ucn_len);
1282   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1283     {
1284       utf32 = (utf32 << 6) | (*++name & 0x3F);
1285 
1286       /* Ill-formed UTF-8.  */
1287       if ((*name & ~0x3F) != 0x80)
1288 	abort ();
1289     }
1290 
1291   *buffer++ = '\\';
1292   *buffer++ = 'U';
1293   for (j = 7; j >= 0; j--)
1294     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1295   return ucn_len;
1296 }
1297 
1298 
1299 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1300    already contain the enough space to hold the token's spelling.
1301    Returns a pointer to the character after the last character written.
1302    FORSTRING is true if this is to be the spelling after translation
1303    phase 1 (this is different for UCNs).
1304    FIXME: Would be nice if we didn't need the PFILE argument.  */
1305 unsigned char *
cpp_spell_token(cpp_reader * pfile,const cpp_token * token,unsigned char * buffer,bool forstring)1306 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1307 		 unsigned char *buffer, bool forstring)
1308 {
1309   switch (TOKEN_SPELL (token))
1310     {
1311     case SPELL_OPERATOR:
1312       {
1313 	const unsigned char *spelling;
1314 	unsigned char c;
1315 
1316 	if (token->flags & DIGRAPH)
1317 	  spelling
1318 	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1319 	else if (token->flags & NAMED_OP)
1320 	  goto spell_ident;
1321 	else
1322 	  spelling = TOKEN_NAME (token);
1323 
1324 	while ((c = *spelling++) != '\0')
1325 	  *buffer++ = c;
1326       }
1327       break;
1328 
1329     spell_ident:
1330     case SPELL_IDENT:
1331       if (forstring)
1332 	{
1333 	  memcpy (buffer, NODE_NAME (token->val.node),
1334 		  NODE_LEN (token->val.node));
1335 	  buffer += NODE_LEN (token->val.node);
1336 	}
1337       else
1338 	{
1339 	  size_t i;
1340 	  const unsigned char * name = NODE_NAME (token->val.node);
1341 
1342 	  for (i = 0; i < NODE_LEN (token->val.node); i++)
1343 	    if (name[i] & ~0x7F)
1344 	      {
1345 		i += utf8_to_ucn (buffer, name + i) - 1;
1346 		buffer += 10;
1347 	      }
1348 	    else
1349 	      *buffer++ = NODE_NAME (token->val.node)[i];
1350 	}
1351       break;
1352 
1353     case SPELL_LITERAL:
1354       memcpy (buffer, token->val.str.text, token->val.str.len);
1355       buffer += token->val.str.len;
1356       break;
1357 
1358     case SPELL_NONE:
1359       cpp_error (pfile, CPP_DL_ICE,
1360 		 "unspellable token %s", TOKEN_NAME (token));
1361       break;
1362     }
1363 
1364   return buffer;
1365 }
1366 
1367 /* Returns TOKEN spelt as a null-terminated string.  The string is
1368    freed when the reader is destroyed.  Useful for diagnostics.  */
1369 unsigned char *
cpp_token_as_text(cpp_reader * pfile,const cpp_token * token)1370 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1371 {
1372   unsigned int len = cpp_token_len (token) + 1;
1373   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1374 
1375   end = cpp_spell_token (pfile, token, start, false);
1376   end[0] = '\0';
1377 
1378   return start;
1379 }
1380 
1381 /* Used by C front ends, which really should move to using
1382    cpp_token_as_text.  */
1383 const char *
cpp_type2name(enum cpp_ttype type)1384 cpp_type2name (enum cpp_ttype type)
1385 {
1386   return (const char *) token_spellings[type].name;
1387 }
1388 
1389 /* Writes the spelling of token to FP, without any preceding space.
1390    Separated from cpp_spell_token for efficiency - to avoid stdio
1391    double-buffering.  */
1392 void
cpp_output_token(const cpp_token * token,FILE * fp)1393 cpp_output_token (const cpp_token *token, FILE *fp)
1394 {
1395   switch (TOKEN_SPELL (token))
1396     {
1397     case SPELL_OPERATOR:
1398       {
1399 	const unsigned char *spelling;
1400 	int c;
1401 
1402 	if (token->flags & DIGRAPH)
1403 	  spelling
1404 	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1405 	else if (token->flags & NAMED_OP)
1406 	  goto spell_ident;
1407 	else
1408 	  spelling = TOKEN_NAME (token);
1409 
1410 	c = *spelling;
1411 	do
1412 	  putc (c, fp);
1413 	while ((c = *++spelling) != '\0');
1414       }
1415       break;
1416 
1417     spell_ident:
1418     case SPELL_IDENT:
1419       {
1420 	size_t i;
1421 	const unsigned char * name = NODE_NAME (token->val.node);
1422 
1423 	for (i = 0; i < NODE_LEN (token->val.node); i++)
1424 	  if (name[i] & ~0x7F)
1425 	    {
1426 	      unsigned char buffer[10];
1427 	      i += utf8_to_ucn (buffer, name + i) - 1;
1428 	      fwrite (buffer, 1, 10, fp);
1429 	    }
1430 	  else
1431 	    fputc (NODE_NAME (token->val.node)[i], fp);
1432       }
1433       break;
1434 
1435     case SPELL_LITERAL:
1436       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1437       break;
1438 
1439     case SPELL_NONE:
1440       /* An error, most probably.  */
1441       break;
1442     }
1443 }
1444 
1445 /* Compare two tokens.  */
1446 int
_cpp_equiv_tokens(const cpp_token * a,const cpp_token * b)1447 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1448 {
1449   if (a->type == b->type && a->flags == b->flags)
1450     switch (TOKEN_SPELL (a))
1451       {
1452       default:			/* Keep compiler happy.  */
1453       case SPELL_OPERATOR:
1454 	return 1;
1455       case SPELL_NONE:
1456 	return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1457       case SPELL_IDENT:
1458 	return a->val.node == b->val.node;
1459       case SPELL_LITERAL:
1460 	return (a->val.str.len == b->val.str.len
1461 		&& !memcmp (a->val.str.text, b->val.str.text,
1462 			    a->val.str.len));
1463       }
1464 
1465   return 0;
1466 }
1467 
1468 /* Returns nonzero if a space should be inserted to avoid an
1469    accidental token paste for output.  For simplicity, it is
1470    conservative, and occasionally advises a space where one is not
1471    needed, e.g. "." and ".2".  */
1472 int
cpp_avoid_paste(cpp_reader * pfile,const cpp_token * token1,const cpp_token * token2)1473 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1474 		 const cpp_token *token2)
1475 {
1476   enum cpp_ttype a = token1->type, b = token2->type;
1477   cppchar_t c;
1478 
1479   if (token1->flags & NAMED_OP)
1480     a = CPP_NAME;
1481   if (token2->flags & NAMED_OP)
1482     b = CPP_NAME;
1483 
1484   c = EOF;
1485   if (token2->flags & DIGRAPH)
1486     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1487   else if (token_spellings[b].category == SPELL_OPERATOR)
1488     c = token_spellings[b].name[0];
1489 
1490   /* Quickly get everything that can paste with an '='.  */
1491   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1492     return 1;
1493 
1494   switch (a)
1495     {
1496     case CPP_GREATER:	return c == '>';
1497     case CPP_LESS:	return c == '<' || c == '%' || c == ':';
1498     case CPP_PLUS:	return c == '+';
1499     case CPP_MINUS:	return c == '-' || c == '>';
1500     case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
1501     case CPP_MOD:	return c == ':' || c == '>';
1502     case CPP_AND:	return c == '&';
1503     case CPP_OR:	return c == '|';
1504     case CPP_COLON:	return c == ':' || c == '>';
1505     case CPP_DEREF:	return c == '*';
1506     case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
1507     case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
1508     case CPP_NAME:	return ((b == CPP_NUMBER
1509 				 && name_p (pfile, &token2->val.str))
1510 				|| b == CPP_NAME
1511 				|| b == CPP_CHAR || b == CPP_STRING); /* L */
1512     case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
1513 				|| c == '.' || c == '+' || c == '-');
1514 				      /* UCNs */
1515     case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
1516 				 && b == CPP_NAME)
1517 				|| (CPP_OPTION (pfile, objc)
1518 				    && token1->val.str.text[0] == '@'
1519 				    && (b == CPP_NAME || b == CPP_STRING)));
1520     default:		break;
1521     }
1522 
1523   return 0;
1524 }
1525 
1526 /* Output all the remaining tokens on the current line, and a newline
1527    character, to FP.  Leading whitespace is removed.  If there are
1528    macros, special token padding is not performed.  */
1529 void
cpp_output_line(cpp_reader * pfile,FILE * fp)1530 cpp_output_line (cpp_reader *pfile, FILE *fp)
1531 {
1532   const cpp_token *token;
1533 
1534   token = cpp_get_token (pfile);
1535   while (token->type != CPP_EOF)
1536     {
1537       cpp_output_token (token, fp);
1538       token = cpp_get_token (pfile);
1539       if (token->flags & PREV_WHITE)
1540 	putc (' ', fp);
1541     }
1542 
1543   putc ('\n', fp);
1544 }
1545 
1546 /* Memory buffers.  Changing these three constants can have a dramatic
1547    effect on performance.  The values here are reasonable defaults,
1548    but might be tuned.  If you adjust them, be sure to test across a
1549    range of uses of cpplib, including heavy nested function-like macro
1550    expansion.  Also check the change in peak memory usage (NJAMD is a
1551    good tool for this).  */
1552 #define MIN_BUFF_SIZE 8000
1553 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1554 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1555 	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1556 
1557 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1558   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1559 #endif
1560 
1561 /* Create a new allocation buffer.  Place the control block at the end
1562    of the buffer, so that buffer overflows will cause immediate chaos.  */
1563 static _cpp_buff *
new_buff(size_t len)1564 new_buff (size_t len)
1565 {
1566   _cpp_buff *result;
1567   unsigned char *base;
1568 
1569   if (len < MIN_BUFF_SIZE)
1570     len = MIN_BUFF_SIZE;
1571   len = CPP_ALIGN (len);
1572 
1573   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1574   result = (_cpp_buff *) (base + len);
1575   result->base = base;
1576   result->cur = base;
1577   result->limit = base + len;
1578   result->next = NULL;
1579   return result;
1580 }
1581 
1582 /* Place a chain of unwanted allocation buffers on the free list.  */
1583 void
_cpp_release_buff(cpp_reader * pfile,_cpp_buff * buff)1584 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1585 {
1586   _cpp_buff *end = buff;
1587 
1588   while (end->next)
1589     end = end->next;
1590   end->next = pfile->free_buffs;
1591   pfile->free_buffs = buff;
1592 }
1593 
1594 /* Return a free buffer of size at least MIN_SIZE.  */
1595 _cpp_buff *
_cpp_get_buff(cpp_reader * pfile,size_t min_size)1596 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1597 {
1598   _cpp_buff *result, **p;
1599 
1600   for (p = &pfile->free_buffs;; p = &(*p)->next)
1601     {
1602       size_t size;
1603 
1604       if (*p == NULL)
1605 	return new_buff (min_size);
1606       result = *p;
1607       size = result->limit - result->base;
1608       /* Return a buffer that's big enough, but don't waste one that's
1609          way too big.  */
1610       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1611 	break;
1612     }
1613 
1614   *p = result->next;
1615   result->next = NULL;
1616   result->cur = result->base;
1617   return result;
1618 }
1619 
1620 /* Creates a new buffer with enough space to hold the uncommitted
1621    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1622    the excess bytes to the new buffer.  Chains the new buffer after
1623    BUFF, and returns the new buffer.  */
1624 _cpp_buff *
_cpp_append_extend_buff(cpp_reader * pfile,_cpp_buff * buff,size_t min_extra)1625 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1626 {
1627   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1628   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1629 
1630   buff->next = new_buff;
1631   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1632   return new_buff;
1633 }
1634 
1635 /* Creates a new buffer with enough space to hold the uncommitted
1636    remaining bytes of the buffer pointed to by BUFF, and at least
1637    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1638    Chains the new buffer before the buffer pointed to by BUFF, and
1639    updates the pointer to point to the new buffer.  */
1640 void
_cpp_extend_buff(cpp_reader * pfile,_cpp_buff ** pbuff,size_t min_extra)1641 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1642 {
1643   _cpp_buff *new_buff, *old_buff = *pbuff;
1644   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1645 
1646   new_buff = _cpp_get_buff (pfile, size);
1647   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1648   new_buff->next = old_buff;
1649   *pbuff = new_buff;
1650 }
1651 
1652 /* Free a chain of buffers starting at BUFF.  */
1653 void
_cpp_free_buff(_cpp_buff * buff)1654 _cpp_free_buff (_cpp_buff *buff)
1655 {
1656   _cpp_buff *next;
1657 
1658   for (; buff; buff = next)
1659     {
1660       next = buff->next;
1661       free (buff->base);
1662     }
1663 }
1664 
1665 /* Allocate permanent, unaligned storage of length LEN.  */
1666 unsigned char *
_cpp_unaligned_alloc(cpp_reader * pfile,size_t len)1667 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1668 {
1669   _cpp_buff *buff = pfile->u_buff;
1670   unsigned char *result = buff->cur;
1671 
1672   if (len > (size_t) (buff->limit - result))
1673     {
1674       buff = _cpp_get_buff (pfile, len);
1675       buff->next = pfile->u_buff;
1676       pfile->u_buff = buff;
1677       result = buff->cur;
1678     }
1679 
1680   buff->cur = result + len;
1681   return result;
1682 }
1683 
1684 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1685    That buffer is used for growing allocations when saving macro
1686    replacement lists in a #define, and when parsing an answer to an
1687    assertion in #assert, #unassert or #if (and therefore possibly
1688    whilst expanding macros).  It therefore must not be used by any
1689    code that they might call: specifically the lexer and the guts of
1690    the macro expander.
1691 
1692    All existing other uses clearly fit this restriction: storing
1693    registered pragmas during initialization.  */
1694 unsigned char *
_cpp_aligned_alloc(cpp_reader * pfile,size_t len)1695 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1696 {
1697   _cpp_buff *buff = pfile->a_buff;
1698   unsigned char *result = buff->cur;
1699 
1700   if (len > (size_t) (buff->limit - result))
1701     {
1702       buff = _cpp_get_buff (pfile, len);
1703       buff->next = pfile->a_buff;
1704       pfile->a_buff = buff;
1705       result = buff->cur;
1706     }
1707 
1708   buff->cur = result + len;
1709   return result;
1710 }
1711 
1712 /* Say which field of TOK is in use.  */
1713 
1714 enum cpp_token_fld_kind
cpp_token_val_index(cpp_token * tok)1715 cpp_token_val_index (cpp_token *tok)
1716 {
1717   switch (TOKEN_SPELL (tok))
1718     {
1719     case SPELL_IDENT:
1720       return CPP_TOKEN_FLD_NODE;
1721     case SPELL_LITERAL:
1722       return CPP_TOKEN_FLD_STR;
1723     case SPELL_NONE:
1724       if (tok->type == CPP_MACRO_ARG)
1725 	return CPP_TOKEN_FLD_ARG_NO;
1726       else if (tok->type == CPP_PADDING)
1727 	return CPP_TOKEN_FLD_SOURCE;
1728       else if (tok->type == CPP_PRAGMA)
1729 	return CPP_TOKEN_FLD_PRAGMA;
1730       /* else fall through */
1731     default:
1732       return CPP_TOKEN_FLD_NONE;
1733     }
1734 }
1735